@aigne/core 1.70.1 → 1.71.0-beta.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +22 -0
- package/lib/cjs/agents/agent.d.ts +4 -0
- package/lib/cjs/agents/agent.js +3 -0
- package/lib/cjs/agents/image-agent.js +2 -2
- package/lib/cjs/agents/types.d.ts +9 -1
- package/lib/cjs/loader/agent-js.d.ts +1 -2
- package/lib/cjs/loader/agent-js.js +2 -2
- package/lib/cjs/loader/agent-yaml.d.ts +19 -3
- package/lib/cjs/loader/agent-yaml.js +151 -110
- package/lib/cjs/loader/index.d.ts +11 -1
- package/lib/cjs/loader/index.js +43 -17
- package/lib/cjs/prompt/prompt-builder.d.ts +3 -3
- package/lib/cjs/prompt/prompt-builder.js +8 -2
- package/lib/cjs/prompt/skills/afs.js +41 -4
- package/lib/cjs/prompt/template.d.ts +1 -0
- package/lib/cjs/prompt/template.js +5 -3
- package/lib/cjs/utils/agent-utils.d.ts +3 -2
- package/lib/cjs/utils/agent-utils.js +7 -0
- package/lib/cjs/utils/token-estimator.d.ts +9 -0
- package/lib/cjs/utils/token-estimator.js +66 -0
- package/lib/dts/agents/agent.d.ts +4 -0
- package/lib/dts/agents/types.d.ts +9 -1
- package/lib/dts/loader/agent-js.d.ts +1 -2
- package/lib/dts/loader/agent-yaml.d.ts +19 -3
- package/lib/dts/loader/index.d.ts +11 -1
- package/lib/dts/prompt/prompt-builder.d.ts +3 -3
- package/lib/dts/prompt/template.d.ts +1 -0
- package/lib/dts/utils/agent-utils.d.ts +3 -2
- package/lib/dts/utils/token-estimator.d.ts +9 -0
- package/lib/esm/agents/agent.d.ts +4 -0
- package/lib/esm/agents/agent.js +3 -0
- package/lib/esm/agents/image-agent.js +2 -2
- package/lib/esm/agents/types.d.ts +9 -1
- package/lib/esm/loader/agent-js.d.ts +1 -2
- package/lib/esm/loader/agent-js.js +2 -2
- package/lib/esm/loader/agent-yaml.d.ts +19 -3
- package/lib/esm/loader/agent-yaml.js +147 -110
- package/lib/esm/loader/index.d.ts +11 -1
- package/lib/esm/loader/index.js +42 -20
- package/lib/esm/prompt/prompt-builder.d.ts +3 -3
- package/lib/esm/prompt/prompt-builder.js +8 -2
- package/lib/esm/prompt/skills/afs.js +41 -4
- package/lib/esm/prompt/template.d.ts +1 -0
- package/lib/esm/prompt/template.js +5 -3
- package/lib/esm/utils/agent-utils.d.ts +3 -2
- package/lib/esm/utils/agent-utils.js +6 -0
- package/lib/esm/utils/token-estimator.d.ts +9 -0
- package/lib/esm/utils/token-estimator.js +63 -0
- package/package.json +4 -4
|
@@ -0,0 +1,63 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Token estimation ratios for different character types
|
|
3
|
+
* Based on empirical data from various tokenizers
|
|
4
|
+
*/
|
|
5
|
+
const CHAR_TYPE_RATIOS = {
|
|
6
|
+
chinese: 1.5, // Chinese characters: ~1.5 characters per token
|
|
7
|
+
word: 0.75, // English words: ~0.75 tokens per word (accounting for subword tokenization)
|
|
8
|
+
other: 4, // Other characters (punctuation, numbers, etc.): ~4 characters per token
|
|
9
|
+
};
|
|
10
|
+
/**
|
|
11
|
+
* Regular expressions for character type detection
|
|
12
|
+
*/
|
|
13
|
+
const CHAR_PATTERNS = {
|
|
14
|
+
// CJK characters (Chinese, Japanese Kanji, etc.)
|
|
15
|
+
chinese: /[\u4e00-\u9fff\u3400-\u4dbf\u3040-\u309f\u30a0-\u30ff]/g,
|
|
16
|
+
// English words (sequences of letters)
|
|
17
|
+
word: /[a-zA-Z]+/g,
|
|
18
|
+
};
|
|
19
|
+
/**
|
|
20
|
+
* Estimate tokens in text by analyzing character types
|
|
21
|
+
* This function handles mixed-language text (Chinese and English) by counting
|
|
22
|
+
* different character types and applying appropriate token ratios for each type
|
|
23
|
+
*
|
|
24
|
+
* @param text - The text to estimate
|
|
25
|
+
* @returns Estimated token count
|
|
26
|
+
*/
|
|
27
|
+
export function estimateTokens(text) {
|
|
28
|
+
if (!text)
|
|
29
|
+
return 0;
|
|
30
|
+
let tokens = 0;
|
|
31
|
+
const processedIndices = new Set();
|
|
32
|
+
// Count Chinese characters (including CJK)
|
|
33
|
+
const chineseMatches = text.match(CHAR_PATTERNS.chinese);
|
|
34
|
+
if (chineseMatches) {
|
|
35
|
+
tokens += chineseMatches.length / CHAR_TYPE_RATIOS.chinese;
|
|
36
|
+
// Mark processed positions
|
|
37
|
+
const chineseRegex = new RegExp(CHAR_PATTERNS.chinese.source, "g");
|
|
38
|
+
let match = chineseRegex.exec(text);
|
|
39
|
+
while (match !== null) {
|
|
40
|
+
processedIndices.add(match.index);
|
|
41
|
+
match = chineseRegex.exec(text);
|
|
42
|
+
}
|
|
43
|
+
}
|
|
44
|
+
// Count English words
|
|
45
|
+
const wordMatches = text.match(CHAR_PATTERNS.word);
|
|
46
|
+
if (wordMatches) {
|
|
47
|
+
tokens += wordMatches.length * CHAR_TYPE_RATIOS.word;
|
|
48
|
+
const wordRegex = new RegExp(CHAR_PATTERNS.word.source, "g");
|
|
49
|
+
let match = wordRegex.exec(text);
|
|
50
|
+
while (match !== null) {
|
|
51
|
+
for (let i = 0; i < match[0].length; i++) {
|
|
52
|
+
processedIndices.add(match.index + i);
|
|
53
|
+
}
|
|
54
|
+
match = wordRegex.exec(text);
|
|
55
|
+
}
|
|
56
|
+
}
|
|
57
|
+
// Count remaining characters (punctuation, numbers, whitespace, etc.)
|
|
58
|
+
const remainingChars = text.length - processedIndices.size;
|
|
59
|
+
if (remainingChars > 0) {
|
|
60
|
+
tokens += remainingChars / CHAR_TYPE_RATIOS.other;
|
|
61
|
+
}
|
|
62
|
+
return Math.ceil(tokens);
|
|
63
|
+
}
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@aigne/core",
|
|
3
|
-
"version": "1.
|
|
3
|
+
"version": "1.71.0-beta.1",
|
|
4
4
|
"description": "The functional core of agentic AI",
|
|
5
5
|
"publishConfig": {
|
|
6
6
|
"access": "public"
|
|
@@ -92,10 +92,10 @@
|
|
|
92
92
|
"zod": "^3.25.67",
|
|
93
93
|
"zod-from-json-schema": "^0.0.5",
|
|
94
94
|
"zod-to-json-schema": "^3.24.6",
|
|
95
|
-
"@aigne/afs": "^1.
|
|
95
|
+
"@aigne/afs-history": "^1.1.3-beta",
|
|
96
96
|
"@aigne/observability-api": "^0.11.12",
|
|
97
|
-
"@aigne/
|
|
98
|
-
"@aigne/
|
|
97
|
+
"@aigne/platform-helpers": "^0.6.5",
|
|
98
|
+
"@aigne/afs": "^1.3.0-beta"
|
|
99
99
|
},
|
|
100
100
|
"devDependencies": {
|
|
101
101
|
"@types/bun": "^1.2.22",
|