@nanocollective/nanocoder 1.15.1 → 1.16.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +8 -1
- package/dist/ai-sdk-client.d.ts +5 -0
- package/dist/ai-sdk-client.d.ts.map +1 -1
- package/dist/ai-sdk-client.js +21 -3
- package/dist/ai-sdk-client.js.map +1 -1
- package/dist/app/utils/appUtils.d.ts.map +1 -1
- package/dist/app/utils/appUtils.js +5 -3
- package/dist/app/utils/appUtils.js.map +1 -1
- package/dist/commands/index.d.ts +1 -0
- package/dist/commands/index.d.ts.map +1 -1
- package/dist/commands/index.js +1 -0
- package/dist/commands/index.js.map +1 -1
- package/dist/commands/usage.d.ts +7 -0
- package/dist/commands/usage.d.ts.map +1 -0
- package/dist/commands/usage.js +69 -0
- package/dist/commands/usage.js.map +1 -0
- package/dist/commands.d.ts +3 -2
- package/dist/commands.d.ts.map +1 -1
- package/dist/commands.js.map +1 -1
- package/dist/components/usage/progress-bar.d.ts +14 -0
- package/dist/components/usage/progress-bar.d.ts.map +1 -0
- package/dist/components/usage/progress-bar.js +14 -0
- package/dist/components/usage/progress-bar.js.map +1 -0
- package/dist/components/usage/usage-display.d.ts +18 -0
- package/dist/components/usage/usage-display.d.ts.map +1 -0
- package/dist/components/usage/usage-display.js +50 -0
- package/dist/components/usage/usage-display.js.map +1 -0
- package/dist/config/paths.d.ts +0 -4
- package/dist/config/paths.d.ts.map +1 -1
- package/dist/config/paths.js +2 -18
- package/dist/config/paths.js.map +1 -1
- package/dist/hooks/useAppInitialization.d.ts.map +1 -1
- package/dist/hooks/useAppInitialization.js +2 -1
- package/dist/hooks/useAppInitialization.js.map +1 -1
- package/dist/hooks/useAppState.d.ts +2 -0
- package/dist/hooks/useAppState.d.ts.map +1 -1
- package/dist/hooks/useAppState.js +27 -5
- package/dist/hooks/useAppState.js.map +1 -1
- package/dist/hooks/useChatHandler.d.ts.map +1 -1
- package/dist/hooks/useChatHandler.js +45 -9
- package/dist/hooks/useChatHandler.js.map +1 -1
- package/dist/models/index.d.ts +2 -0
- package/dist/models/index.d.ts.map +1 -0
- package/dist/models/index.js +2 -0
- package/dist/models/index.js.map +1 -0
- package/dist/models/models-cache.d.ts +8 -0
- package/dist/models/models-cache.d.ts.map +1 -0
- package/dist/models/models-cache.js +63 -0
- package/dist/models/models-cache.js.map +1 -0
- package/dist/models/models-dev-client.d.ts +10 -0
- package/dist/models/models-dev-client.d.ts.map +1 -0
- package/dist/models/models-dev-client.js +268 -0
- package/dist/models/models-dev-client.js.map +1 -0
- package/dist/models/models-types.d.ts +66 -0
- package/dist/models/models-types.d.ts.map +1 -0
- package/dist/models/models-types.js +5 -0
- package/dist/models/models-types.js.map +1 -0
- package/dist/recommendations/model-database.d.ts.map +1 -1
- package/dist/recommendations/model-database.js +45 -0
- package/dist/recommendations/model-database.js.map +1 -1
- package/dist/tokenization/index.d.ts +2 -0
- package/dist/tokenization/index.d.ts.map +1 -0
- package/dist/tokenization/index.js +2 -0
- package/dist/tokenization/index.js.map +1 -0
- package/dist/tokenization/tokenizer-factory.d.ts +14 -0
- package/dist/tokenization/tokenizer-factory.d.ts.map +1 -0
- package/dist/tokenization/tokenizer-factory.js +90 -0
- package/dist/tokenization/tokenizer-factory.js.map +1 -0
- package/dist/tokenization/tokenizer-factory.spec.d.ts +5 -0
- package/dist/tokenization/tokenizer-factory.spec.d.ts.map +1 -0
- package/dist/tokenization/tokenizer-factory.spec.js +137 -0
- package/dist/tokenization/tokenizer-factory.spec.js.map +1 -0
- package/dist/tokenization/tokenizers/anthropic-tokenizer.d.ts +17 -0
- package/dist/tokenization/tokenizers/anthropic-tokenizer.d.ts.map +1 -0
- package/dist/tokenization/tokenizers/anthropic-tokenizer.js +35 -0
- package/dist/tokenization/tokenizers/anthropic-tokenizer.js.map +1 -0
- package/dist/tokenization/tokenizers/anthropic-tokenizer.spec.d.ts +5 -0
- package/dist/tokenization/tokenizers/anthropic-tokenizer.spec.d.ts.map +1 -0
- package/dist/tokenization/tokenizers/anthropic-tokenizer.spec.js +152 -0
- package/dist/tokenization/tokenizers/anthropic-tokenizer.spec.js.map +1 -0
- package/dist/tokenization/tokenizers/fallback-tokenizer.d.ts +13 -0
- package/dist/tokenization/tokenizers/fallback-tokenizer.d.ts.map +1 -0
- package/dist/tokenization/tokenizers/fallback-tokenizer.js +20 -0
- package/dist/tokenization/tokenizers/fallback-tokenizer.js.map +1 -0
- package/dist/tokenization/tokenizers/fallback-tokenizer.spec.d.ts +5 -0
- package/dist/tokenization/tokenizers/fallback-tokenizer.spec.d.ts.map +1 -0
- package/dist/tokenization/tokenizers/fallback-tokenizer.spec.js +183 -0
- package/dist/tokenization/tokenizers/fallback-tokenizer.spec.js.map +1 -0
- package/dist/tokenization/tokenizers/llama-tokenizer.d.ts +14 -0
- package/dist/tokenization/tokenizers/llama-tokenizer.d.ts.map +1 -0
- package/dist/tokenization/tokenizers/llama-tokenizer.js +33 -0
- package/dist/tokenization/tokenizers/llama-tokenizer.js.map +1 -0
- package/dist/tokenization/tokenizers/llama-tokenizer.spec.d.ts +5 -0
- package/dist/tokenization/tokenizers/llama-tokenizer.spec.d.ts.map +1 -0
- package/dist/tokenization/tokenizers/llama-tokenizer.spec.js +170 -0
- package/dist/tokenization/tokenizers/llama-tokenizer.spec.js.map +1 -0
- package/dist/tokenization/tokenizers/openai-tokenizer.d.ts +22 -0
- package/dist/tokenization/tokenizers/openai-tokenizer.d.ts.map +1 -0
- package/dist/tokenization/tokenizers/openai-tokenizer.js +48 -0
- package/dist/tokenization/tokenizers/openai-tokenizer.js.map +1 -0
- package/dist/tokenization/tokenizers/openai-tokenizer.spec.d.ts +5 -0
- package/dist/tokenization/tokenizers/openai-tokenizer.spec.d.ts.map +1 -0
- package/dist/tokenization/tokenizers/openai-tokenizer.spec.js +140 -0
- package/dist/tokenization/tokenizers/openai-tokenizer.spec.js.map +1 -0
- package/dist/types/commands.d.ts +1 -0
- package/dist/types/commands.d.ts.map +1 -1
- package/dist/types/tokenization.d.ts +31 -0
- package/dist/types/tokenization.d.ts.map +1 -0
- package/dist/types/tokenization.js +5 -0
- package/dist/types/tokenization.js.map +1 -0
- package/dist/types/usage.d.ts +55 -0
- package/dist/types/usage.d.ts.map +1 -0
- package/dist/types/usage.js +2 -0
- package/dist/types/usage.js.map +1 -0
- package/dist/usage/calculator.d.ts +28 -0
- package/dist/usage/calculator.d.ts.map +1 -0
- package/dist/usage/calculator.js +81 -0
- package/dist/usage/calculator.js.map +1 -0
- package/dist/usage/calculator.spec.d.ts +2 -0
- package/dist/usage/calculator.spec.d.ts.map +1 -0
- package/dist/usage/calculator.spec.js +303 -0
- package/dist/usage/calculator.spec.js.map +1 -0
- package/dist/usage/storage.d.ts +19 -0
- package/dist/usage/storage.d.ts.map +1 -0
- package/dist/usage/storage.js +134 -0
- package/dist/usage/storage.js.map +1 -0
- package/dist/usage/storage.spec.d.ts +2 -0
- package/dist/usage/storage.spec.d.ts.map +1 -0
- package/dist/usage/storage.spec.js +417 -0
- package/dist/usage/storage.spec.js.map +1 -0
- package/dist/usage/tracker.d.ts +28 -0
- package/dist/usage/tracker.d.ts.map +1 -0
- package/dist/usage/tracker.js +75 -0
- package/dist/usage/tracker.js.map +1 -0
- package/dist/usage/tracker.spec.d.ts +2 -0
- package/dist/usage/tracker.spec.d.ts.map +1 -0
- package/dist/usage/tracker.spec.js +347 -0
- package/dist/usage/tracker.spec.js.map +1 -0
- package/dist/utils/paste-roundtrip.spec.d.ts.map +1 -0
- package/dist/utils/paste-roundtrip.spec.js.map +1 -0
- package/package.json +6 -2
- package/dist/integration/paste-roundtrip.spec.d.ts.map +0 -1
- package/dist/integration/paste-roundtrip.spec.js.map +0 -1
- /package/dist/{integration → utils}/paste-roundtrip.spec.d.ts +0 -0
- /package/dist/{integration → utils}/paste-roundtrip.spec.js +0 -0
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"llama-tokenizer.d.ts","sourceRoot":"","sources":["../../../source/tokenization/tokenizers/llama-tokenizer.ts"],"names":[],"mappings":"AAAA;;;GAGG;AAGH,OAAO,KAAK,EAAC,SAAS,EAAC,MAAM,0BAA0B,CAAC;AACxD,OAAO,KAAK,EAAC,OAAO,EAAC,MAAM,cAAc,CAAC;AAE1C,qBAAa,cAAe,YAAW,SAAS;IAC/C,OAAO,CAAC,SAAS,CAAS;gBAEd,OAAO,CAAC,EAAE,MAAM;IAI5B,MAAM,CAAC,IAAI,EAAE,MAAM,GAAG,MAAM;IAU5B,WAAW,CAAC,OAAO,EAAE,OAAO,GAAG,MAAM;IAWrC,OAAO,IAAI,MAAM;CAGjB"}
|
|
@@ -0,0 +1,33 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Llama tokenizer for local models
|
|
3
|
+
* Uses llama-tokenizer-js package
|
|
4
|
+
*/
|
|
5
|
+
import llamaTokenizer from 'llama-tokenizer-js';
|
|
6
|
+
export class LlamaTokenizer {
|
|
7
|
+
modelName;
|
|
8
|
+
constructor(modelId) {
|
|
9
|
+
this.modelName = modelId || 'llama';
|
|
10
|
+
}
|
|
11
|
+
encode(text) {
|
|
12
|
+
try {
|
|
13
|
+
const tokens = llamaTokenizer.encode(text);
|
|
14
|
+
return tokens.length;
|
|
15
|
+
}
|
|
16
|
+
catch {
|
|
17
|
+
// Fallback to character-based estimation if tokenization fails
|
|
18
|
+
return Math.ceil(text.length / 4);
|
|
19
|
+
}
|
|
20
|
+
}
|
|
21
|
+
countTokens(message) {
|
|
22
|
+
const content = message.content || '';
|
|
23
|
+
const role = message.role || '';
|
|
24
|
+
// Llama format: <|start_header_id|>role<|end_header_id|>content<|eot_id|>
|
|
25
|
+
// Approximate overhead for message formatting
|
|
26
|
+
const messageOverhead = 6;
|
|
27
|
+
return this.encode(content) + this.encode(role) + messageOverhead;
|
|
28
|
+
}
|
|
29
|
+
getName() {
|
|
30
|
+
return `llama-${this.modelName}`;
|
|
31
|
+
}
|
|
32
|
+
}
|
|
33
|
+
//# sourceMappingURL=llama-tokenizer.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"llama-tokenizer.js","sourceRoot":"","sources":["../../../source/tokenization/tokenizers/llama-tokenizer.ts"],"names":[],"mappings":"AAAA;;;GAGG;AAEH,OAAO,cAAc,MAAM,oBAAoB,CAAC;AAIhD,MAAM,OAAO,cAAc;IAClB,SAAS,CAAS;IAE1B,YAAY,OAAgB;QAC3B,IAAI,CAAC,SAAS,GAAG,OAAO,IAAI,OAAO,CAAC;IACrC,CAAC;IAED,MAAM,CAAC,IAAY;QAClB,IAAI,CAAC;YACJ,MAAM,MAAM,GAAG,cAAc,CAAC,MAAM,CAAC,IAAI,CAAC,CAAC;YAC3C,OAAO,MAAM,CAAC,MAAM,CAAC;QACtB,CAAC;QAAC,MAAM,CAAC;YACR,+DAA+D;YAC/D,OAAO,IAAI,CAAC,IAAI,CAAC,IAAI,CAAC,MAAM,GAAG,CAAC,CAAC,CAAC;QACnC,CAAC;IACF,CAAC;IAED,WAAW,CAAC,OAAgB;QAC3B,MAAM,OAAO,GAAG,OAAO,CAAC,OAAO,IAAI,EAAE,CAAC;QACtC,MAAM,IAAI,GAAG,OAAO,CAAC,IAAI,IAAI,EAAE,CAAC;QAEhC,0EAA0E;QAC1E,8CAA8C;QAC9C,MAAM,eAAe,GAAG,CAAC,CAAC;QAE1B,OAAO,IAAI,CAAC,MAAM,CAAC,OAAO,CAAC,GAAG,IAAI,CAAC,MAAM,CAAC,IAAI,CAAC,GAAG,eAAe,CAAC;IACnE,CAAC;IAED,OAAO;QACN,OAAO,SAAS,IAAI,CAAC,SAAS,EAAE,CAAC;IAClC,CAAC;CACD"}
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"llama-tokenizer.spec.d.ts","sourceRoot":"","sources":["../../../source/tokenization/tokenizers/llama-tokenizer.spec.ts"],"names":[],"mappings":"AAAA;;GAEG"}
|
|
@@ -0,0 +1,170 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Tests for llama-tokenizer.ts
|
|
3
|
+
*/
|
|
4
|
+
import test from 'ava';
|
|
5
|
+
import { LlamaTokenizer } from './llama-tokenizer.js';
|
|
6
|
+
console.log(`\nllama-tokenizer.spec.ts`);
|
|
7
|
+
test('LlamaTokenizer encodes simple text', t => {
|
|
8
|
+
const tokenizer = new LlamaTokenizer('llama-3-8b');
|
|
9
|
+
const count = tokenizer.encode('Hello, world!');
|
|
10
|
+
// Should return a positive token count
|
|
11
|
+
t.true(count > 0);
|
|
12
|
+
t.true(count < 10);
|
|
13
|
+
});
|
|
14
|
+
test('LlamaTokenizer encodes empty string', t => {
|
|
15
|
+
const tokenizer = new LlamaTokenizer('llama-3-8b');
|
|
16
|
+
const count = tokenizer.encode('');
|
|
17
|
+
t.is(count, 0);
|
|
18
|
+
});
|
|
19
|
+
test('LlamaTokenizer encodes longer text', t => {
|
|
20
|
+
const tokenizer = new LlamaTokenizer('llama-3-8b');
|
|
21
|
+
const text = 'This is a longer piece of text that should have more tokens than a simple hello world.';
|
|
22
|
+
const count = tokenizer.encode(text);
|
|
23
|
+
// Should have significantly more tokens
|
|
24
|
+
t.true(count > 10);
|
|
25
|
+
t.true(count < 50);
|
|
26
|
+
});
|
|
27
|
+
test('LlamaTokenizer defaults to llama when no model specified', t => {
|
|
28
|
+
const tokenizer = new LlamaTokenizer();
|
|
29
|
+
t.is(tokenizer.getName(), 'llama-llama');
|
|
30
|
+
});
|
|
31
|
+
test('LlamaTokenizer getName returns correct format', t => {
|
|
32
|
+
const tokenizer = new LlamaTokenizer('llama-3-70b');
|
|
33
|
+
t.is(tokenizer.getName(), 'llama-llama-3-70b');
|
|
34
|
+
});
|
|
35
|
+
test('LlamaTokenizer countTokens for user message', t => {
|
|
36
|
+
const tokenizer = new LlamaTokenizer('llama-3-8b');
|
|
37
|
+
const message = {
|
|
38
|
+
role: 'user',
|
|
39
|
+
content: 'Hello, how are you?',
|
|
40
|
+
};
|
|
41
|
+
const count = tokenizer.countTokens(message);
|
|
42
|
+
// Should include content tokens + role tokens + overhead
|
|
43
|
+
t.true(count > 5);
|
|
44
|
+
t.true(count < 25);
|
|
45
|
+
});
|
|
46
|
+
test('LlamaTokenizer countTokens for assistant message', t => {
|
|
47
|
+
const tokenizer = new LlamaTokenizer('llama-3-8b');
|
|
48
|
+
const message = {
|
|
49
|
+
role: 'assistant',
|
|
50
|
+
content: 'I am doing well, thank you!',
|
|
51
|
+
};
|
|
52
|
+
const count = tokenizer.countTokens(message);
|
|
53
|
+
t.true(count > 5);
|
|
54
|
+
});
|
|
55
|
+
test('LlamaTokenizer countTokens for system message', t => {
|
|
56
|
+
const tokenizer = new LlamaTokenizer('llama-3-8b');
|
|
57
|
+
const message = {
|
|
58
|
+
role: 'system',
|
|
59
|
+
content: 'You are a helpful assistant.',
|
|
60
|
+
};
|
|
61
|
+
const count = tokenizer.countTokens(message);
|
|
62
|
+
t.true(count > 5);
|
|
63
|
+
});
|
|
64
|
+
test('LlamaTokenizer countTokens handles empty content', t => {
|
|
65
|
+
const tokenizer = new LlamaTokenizer('llama-3-8b');
|
|
66
|
+
const message = {
|
|
67
|
+
role: 'user',
|
|
68
|
+
content: '',
|
|
69
|
+
};
|
|
70
|
+
const count = tokenizer.countTokens(message);
|
|
71
|
+
// Should still have overhead for role and message structure
|
|
72
|
+
t.true(count >= 6);
|
|
73
|
+
});
|
|
74
|
+
test('LlamaTokenizer countTokens handles missing content', t => {
|
|
75
|
+
const tokenizer = new LlamaTokenizer('llama-3-8b');
|
|
76
|
+
const message = {
|
|
77
|
+
role: 'user',
|
|
78
|
+
};
|
|
79
|
+
const count = tokenizer.countTokens(message);
|
|
80
|
+
// Should handle gracefully
|
|
81
|
+
t.true(count >= 0);
|
|
82
|
+
});
|
|
83
|
+
test('LlamaTokenizer countTokens includes message overhead', t => {
|
|
84
|
+
const tokenizer = new LlamaTokenizer('llama-3-8b');
|
|
85
|
+
const shortMessage = {
|
|
86
|
+
role: 'user',
|
|
87
|
+
content: 'Hi',
|
|
88
|
+
};
|
|
89
|
+
const count = tokenizer.countTokens(shortMessage);
|
|
90
|
+
const contentOnly = tokenizer.encode('Hi');
|
|
91
|
+
const roleOnly = tokenizer.encode('user');
|
|
92
|
+
// Total should be more than just content + role due to overhead
|
|
93
|
+
t.true(count > contentOnly + roleOnly);
|
|
94
|
+
});
|
|
95
|
+
test('LlamaTokenizer handles special characters', t => {
|
|
96
|
+
const tokenizer = new LlamaTokenizer('llama-3-8b');
|
|
97
|
+
const text = '你好世界 🌍 Привет мир';
|
|
98
|
+
const count = tokenizer.encode(text);
|
|
99
|
+
t.true(count > 0);
|
|
100
|
+
});
|
|
101
|
+
test('LlamaTokenizer handles code snippets', t => {
|
|
102
|
+
const tokenizer = new LlamaTokenizer('llama-3-8b');
|
|
103
|
+
const code = `
|
|
104
|
+
function hello() {
|
|
105
|
+
console.log("Hello, world!");
|
|
106
|
+
}
|
|
107
|
+
`;
|
|
108
|
+
const count = tokenizer.encode(code);
|
|
109
|
+
t.true(count > 10);
|
|
110
|
+
});
|
|
111
|
+
test('LlamaTokenizer works with mistral model', t => {
|
|
112
|
+
const tokenizer = new LlamaTokenizer('mistral-7b');
|
|
113
|
+
const count = tokenizer.encode('Hello, world!');
|
|
114
|
+
t.true(count > 0);
|
|
115
|
+
t.is(tokenizer.getName(), 'llama-mistral-7b');
|
|
116
|
+
});
|
|
117
|
+
test('LlamaTokenizer works with qwen model', t => {
|
|
118
|
+
const tokenizer = new LlamaTokenizer('qwen-2.5');
|
|
119
|
+
const count = tokenizer.encode('Hello, world!');
|
|
120
|
+
t.true(count > 0);
|
|
121
|
+
t.is(tokenizer.getName(), 'llama-qwen-2.5');
|
|
122
|
+
});
|
|
123
|
+
test('LlamaTokenizer works with codellama model', t => {
|
|
124
|
+
const tokenizer = new LlamaTokenizer('codellama-7b');
|
|
125
|
+
const count = tokenizer.encode('Hello, world!');
|
|
126
|
+
t.true(count > 0);
|
|
127
|
+
t.is(tokenizer.getName(), 'llama-codellama-7b');
|
|
128
|
+
});
|
|
129
|
+
test('LlamaTokenizer handles long messages', t => {
|
|
130
|
+
const tokenizer = new LlamaTokenizer('llama-3-8b');
|
|
131
|
+
const longText = 'Hello '.repeat(1000);
|
|
132
|
+
const message = {
|
|
133
|
+
role: 'user',
|
|
134
|
+
content: longText,
|
|
135
|
+
};
|
|
136
|
+
const count = tokenizer.countTokens(message);
|
|
137
|
+
// Should handle long text without crashing
|
|
138
|
+
t.true(count > 1000);
|
|
139
|
+
});
|
|
140
|
+
test('LlamaTokenizer uses fallback on encoding error', t => {
|
|
141
|
+
const tokenizer = new LlamaTokenizer('llama-3-8b');
|
|
142
|
+
// The fallback should kick in for any edge cases
|
|
143
|
+
// Testing with normal text should still work
|
|
144
|
+
const count = tokenizer.encode('Normal text');
|
|
145
|
+
t.true(count > 0);
|
|
146
|
+
});
|
|
147
|
+
test('LlamaTokenizer countTokens with tool message', t => {
|
|
148
|
+
const tokenizer = new LlamaTokenizer('llama-3-8b');
|
|
149
|
+
const message = {
|
|
150
|
+
role: 'tool',
|
|
151
|
+
content: 'Tool result here',
|
|
152
|
+
tool_call_id: '123',
|
|
153
|
+
};
|
|
154
|
+
const count = tokenizer.countTokens(message);
|
|
155
|
+
// Should handle tool messages
|
|
156
|
+
t.true(count > 0);
|
|
157
|
+
});
|
|
158
|
+
test('LlamaTokenizer handles deepseek model', t => {
|
|
159
|
+
const tokenizer = new LlamaTokenizer('deepseek-coder-33b');
|
|
160
|
+
const count = tokenizer.encode('const x = 42;');
|
|
161
|
+
t.true(count > 0);
|
|
162
|
+
t.is(tokenizer.getName(), 'llama-deepseek-coder-33b');
|
|
163
|
+
});
|
|
164
|
+
test('LlamaTokenizer handles mixtral model', t => {
|
|
165
|
+
const tokenizer = new LlamaTokenizer('mixtral-8x7b');
|
|
166
|
+
const count = tokenizer.encode('Hello, world!');
|
|
167
|
+
t.true(count > 0);
|
|
168
|
+
t.is(tokenizer.getName(), 'llama-mixtral-8x7b');
|
|
169
|
+
});
|
|
170
|
+
//# sourceMappingURL=llama-tokenizer.spec.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"llama-tokenizer.spec.js","sourceRoot":"","sources":["../../../source/tokenization/tokenizers/llama-tokenizer.spec.ts"],"names":[],"mappings":"AAAA;;GAEG;AAEH,OAAO,IAAI,MAAM,KAAK,CAAC;AACvB,OAAO,EAAC,cAAc,EAAC,MAAM,sBAAsB,CAAC;AAGpD,OAAO,CAAC,GAAG,CAAC,2BAA2B,CAAC,CAAC;AAEzC,IAAI,CAAC,oCAAoC,EAAE,CAAC,CAAC,EAAE;IAC9C,MAAM,SAAS,GAAG,IAAI,cAAc,CAAC,YAAY,CAAC,CAAC;IACnD,MAAM,KAAK,GAAG,SAAS,CAAC,MAAM,CAAC,eAAe,CAAC,CAAC;IAEhD,uCAAuC;IACvC,CAAC,CAAC,IAAI,CAAC,KAAK,GAAG,CAAC,CAAC,CAAC;IAClB,CAAC,CAAC,IAAI,CAAC,KAAK,GAAG,EAAE,CAAC,CAAC;AACpB,CAAC,CAAC,CAAC;AAEH,IAAI,CAAC,qCAAqC,EAAE,CAAC,CAAC,EAAE;IAC/C,MAAM,SAAS,GAAG,IAAI,cAAc,CAAC,YAAY,CAAC,CAAC;IACnD,MAAM,KAAK,GAAG,SAAS,CAAC,MAAM,CAAC,EAAE,CAAC,CAAC;IAEnC,CAAC,CAAC,EAAE,CAAC,KAAK,EAAE,CAAC,CAAC,CAAC;AAChB,CAAC,CAAC,CAAC;AAEH,IAAI,CAAC,oCAAoC,EAAE,CAAC,CAAC,EAAE;IAC9C,MAAM,SAAS,GAAG,IAAI,cAAc,CAAC,YAAY,CAAC,CAAC;IACnD,MAAM,IAAI,GACT,wFAAwF,CAAC;IAC1F,MAAM,KAAK,GAAG,SAAS,CAAC,MAAM,CAAC,IAAI,CAAC,CAAC;IAErC,wCAAwC;IACxC,CAAC,CAAC,IAAI,CAAC,KAAK,GAAG,EAAE,CAAC,CAAC;IACnB,CAAC,CAAC,IAAI,CAAC,KAAK,GAAG,EAAE,CAAC,CAAC;AACpB,CAAC,CAAC,CAAC;AAEH,IAAI,CAAC,0DAA0D,EAAE,CAAC,CAAC,EAAE;IACpE,MAAM,SAAS,GAAG,IAAI,cAAc,EAAE,CAAC;IAEvC,CAAC,CAAC,EAAE,CAAC,SAAS,CAAC,OAAO,EAAE,EAAE,aAAa,CAAC,CAAC;AAC1C,CAAC,CAAC,CAAC;AAEH,IAAI,CAAC,+CAA+C,EAAE,CAAC,CAAC,EAAE;IACzD,MAAM,SAAS,GAAG,IAAI,cAAc,CAAC,aAAa,CAAC,CAAC;IAEpD,CAAC,CAAC,EAAE,CAAC,SAAS,CAAC,OAAO,EAAE,EAAE,mBAAmB,CAAC,CAAC;AAChD,CAAC,CAAC,CAAC;AAEH,IAAI,CAAC,6CAA6C,EAAE,CAAC,CAAC,EAAE;IACvD,MAAM,SAAS,GAAG,IAAI,cAAc,CAAC,YAAY,CAAC,CAAC;IACnD,MAAM,OAAO,GAAY;QACxB,IAAI,EAAE,MAAM;QACZ,OAAO,EAAE,qBAAqB;KAC9B,CAAC;IAEF,MAAM,KAAK,GAAG,SAAS,CAAC,WAAW,CAAC,OAAO,CAAC,CAAC;IAE7C,yDAAyD;IACzD,CAAC,CAAC,IAAI,CAAC,KAAK,GAAG,CAAC,CAAC,CAAC;IAClB,CAAC,CAAC,IAAI,CAAC,KAAK,GAAG,EAAE,CAAC,CAAC;AACpB,CAAC,CAAC,CAAC;AAEH,IAAI,CAAC,kDAAkD,EAAE,CAAC,CAAC,EAAE;IAC5D,MAAM,SAAS,GAAG,IAAI,cAAc,CAAC,YAAY,CAAC,CAAC;IACnD,MAAM,OAAO,GAAY;QACxB,IAAI,EAAE,WAAW;QACjB,OAAO,EAAE,6BAA6B;KACtC,CAAC;IAEF,MAAM,KAAK,GAAG,SAAS,CAAC,WAAW,CAAC,OAAO,CAAC,CAAC;IAE7C,CAAC,CAAC,IAAI,CAAC,KAAK,GAAG,CAAC,CAAC,CAAC;AACnB,CAAC,CAAC,CAAC;AAEH,IAAI,CAAC,+CAA+C,EAAE,CAAC,CAAC,EAAE;IACzD,MAAM,SAAS,GAAG,IAAI,cAAc,CAAC,YAAY,CAAC,CAAC;IACnD,MAAM,OAAO,GAAY;QACxB,IAAI,EAAE,QAAQ;QACd,OAAO,EAAE,8BAA8B;KACvC,CAAC;IAEF,MAAM,KAAK,GAAG,SAAS,CAAC,WAAW,CAAC,OAAO,CAAC,CAAC;IAE7C,CAAC,CAAC,IAAI,CAAC,KAAK,GAAG,CAAC,CAAC,CAAC;AACnB,CAAC,CAAC,CAAC;AAEH,IAAI,CAAC,kDAAkD,EAAE,CAAC,CAAC,EAAE;IAC5D,MAAM,SAAS,GAAG,IAAI,cAAc,CAAC,YAAY,CAAC,CAAC;IACnD,MAAM,OAAO,GAAY;QACxB,IAAI,EAAE,MAAM;QACZ,OAAO,EAAE,EAAE;KACX,CAAC;IAEF,MAAM,KAAK,GAAG,SAAS,CAAC,WAAW,CAAC,OAAO,CAAC,CAAC;IAE7C,4DAA4D;IAC5D,CAAC,CAAC,IAAI,CAAC,KAAK,IAAI,CAAC,CAAC,CAAC;AACpB,CAAC,CAAC,CAAC;AAEH,IAAI,CAAC,oDAAoD,EAAE,CAAC,CAAC,EAAE;IAC9D,MAAM,SAAS,GAAG,IAAI,cAAc,CAAC,YAAY,CAAC,CAAC;IACnD,MAAM,OAAO,GAAY;QACxB,IAAI,EAAE,MAAM;KACD,CAAC;IAEb,MAAM,KAAK,GAAG,SAAS,CAAC,WAAW,CAAC,OAAO,CAAC,CAAC;IAE7C,2BAA2B;IAC3B,CAAC,CAAC,IAAI,CAAC,KAAK,IAAI,CAAC,CAAC,CAAC;AACpB,CAAC,CAAC,CAAC;AAEH,IAAI,CAAC,sDAAsD,EAAE,CAAC,CAAC,EAAE;IAChE,MAAM,SAAS,GAAG,IAAI,cAAc,CAAC,YAAY,CAAC,CAAC;IACnD,MAAM,YAAY,GAAY;QAC7B,IAAI,EAAE,MAAM;QACZ,OAAO,EAAE,IAAI;KACb,CAAC;IAEF,MAAM,KAAK,GAAG,SAAS,CAAC,WAAW,CAAC,YAAY,CAAC,CAAC;IAClD,MAAM,WAAW,GAAG,SAAS,CAAC,MAAM,CAAC,IAAI,CAAC,CAAC;IAC3C,MAAM,QAAQ,GAAG,SAAS,CAAC,MAAM,CAAC,MAAM,CAAC,CAAC;IAE1C,gEAAgE;IAChE,CAAC,CAAC,IAAI,CAAC,KAAK,GAAG,WAAW,GAAG,QAAQ,CAAC,CAAC;AACxC,CAAC,CAAC,CAAC;AAEH,IAAI,CAAC,2CAA2C,EAAE,CAAC,CAAC,EAAE;IACrD,MAAM,SAAS,GAAG,IAAI,cAAc,CAAC,YAAY,CAAC,CAAC;IACnD,MAAM,IAAI,GAAG,oBAAoB,CAAC;IAClC,MAAM,KAAK,GAAG,SAAS,CAAC,MAAM,CAAC,IAAI,CAAC,CAAC;IAErC,CAAC,CAAC,IAAI,CAAC,KAAK,GAAG,CAAC,CAAC,CAAC;AACnB,CAAC,CAAC,CAAC;AAEH,IAAI,CAAC,sCAAsC,EAAE,CAAC,CAAC,EAAE;IAChD,MAAM,SAAS,GAAG,IAAI,cAAc,CAAC,YAAY,CAAC,CAAC;IACnD,MAAM,IAAI,GAAG;;;;EAIZ,CAAC;IACF,MAAM,KAAK,GAAG,SAAS,CAAC,MAAM,CAAC,IAAI,CAAC,CAAC;IAErC,CAAC,CAAC,IAAI,CAAC,KAAK,GAAG,EAAE,CAAC,CAAC;AACpB,CAAC,CAAC,CAAC;AAEH,IAAI,CAAC,yCAAyC,EAAE,CAAC,CAAC,EAAE;IACnD,MAAM,SAAS,GAAG,IAAI,cAAc,CAAC,YAAY,CAAC,CAAC;IACnD,MAAM,KAAK,GAAG,SAAS,CAAC,MAAM,CAAC,eAAe,CAAC,CAAC;IAEhD,CAAC,CAAC,IAAI,CAAC,KAAK,GAAG,CAAC,CAAC,CAAC;IAClB,CAAC,CAAC,EAAE,CAAC,SAAS,CAAC,OAAO,EAAE,EAAE,kBAAkB,CAAC,CAAC;AAC/C,CAAC,CAAC,CAAC;AAEH,IAAI,CAAC,sCAAsC,EAAE,CAAC,CAAC,EAAE;IAChD,MAAM,SAAS,GAAG,IAAI,cAAc,CAAC,UAAU,CAAC,CAAC;IACjD,MAAM,KAAK,GAAG,SAAS,CAAC,MAAM,CAAC,eAAe,CAAC,CAAC;IAEhD,CAAC,CAAC,IAAI,CAAC,KAAK,GAAG,CAAC,CAAC,CAAC;IAClB,CAAC,CAAC,EAAE,CAAC,SAAS,CAAC,OAAO,EAAE,EAAE,gBAAgB,CAAC,CAAC;AAC7C,CAAC,CAAC,CAAC;AAEH,IAAI,CAAC,2CAA2C,EAAE,CAAC,CAAC,EAAE;IACrD,MAAM,SAAS,GAAG,IAAI,cAAc,CAAC,cAAc,CAAC,CAAC;IACrD,MAAM,KAAK,GAAG,SAAS,CAAC,MAAM,CAAC,eAAe,CAAC,CAAC;IAEhD,CAAC,CAAC,IAAI,CAAC,KAAK,GAAG,CAAC,CAAC,CAAC;IAClB,CAAC,CAAC,EAAE,CAAC,SAAS,CAAC,OAAO,EAAE,EAAE,oBAAoB,CAAC,CAAC;AACjD,CAAC,CAAC,CAAC;AAEH,IAAI,CAAC,sCAAsC,EAAE,CAAC,CAAC,EAAE;IAChD,MAAM,SAAS,GAAG,IAAI,cAAc,CAAC,YAAY,CAAC,CAAC;IACnD,MAAM,QAAQ,GAAG,QAAQ,CAAC,MAAM,CAAC,IAAI,CAAC,CAAC;IACvC,MAAM,OAAO,GAAY;QACxB,IAAI,EAAE,MAAM;QACZ,OAAO,EAAE,QAAQ;KACjB,CAAC;IAEF,MAAM,KAAK,GAAG,SAAS,CAAC,WAAW,CAAC,OAAO,CAAC,CAAC;IAE7C,2CAA2C;IAC3C,CAAC,CAAC,IAAI,CAAC,KAAK,GAAG,IAAI,CAAC,CAAC;AACtB,CAAC,CAAC,CAAC;AAEH,IAAI,CAAC,gDAAgD,EAAE,CAAC,CAAC,EAAE;IAC1D,MAAM,SAAS,GAAG,IAAI,cAAc,CAAC,YAAY,CAAC,CAAC;IAEnD,iDAAiD;IACjD,6CAA6C;IAC7C,MAAM,KAAK,GAAG,SAAS,CAAC,MAAM,CAAC,aAAa,CAAC,CAAC;IAE9C,CAAC,CAAC,IAAI,CAAC,KAAK,GAAG,CAAC,CAAC,CAAC;AACnB,CAAC,CAAC,CAAC;AAEH,IAAI,CAAC,8CAA8C,EAAE,CAAC,CAAC,EAAE;IACxD,MAAM,SAAS,GAAG,IAAI,cAAc,CAAC,YAAY,CAAC,CAAC;IACnD,MAAM,OAAO,GAAY;QACxB,IAAI,EAAE,MAAM;QACZ,OAAO,EAAE,kBAAkB;QAC3B,YAAY,EAAE,KAAK;KACnB,CAAC;IAEF,MAAM,KAAK,GAAG,SAAS,CAAC,WAAW,CAAC,OAAO,CAAC,CAAC;IAE7C,8BAA8B;IAC9B,CAAC,CAAC,IAAI,CAAC,KAAK,GAAG,CAAC,CAAC,CAAC;AACnB,CAAC,CAAC,CAAC;AAEH,IAAI,CAAC,uCAAuC,EAAE,CAAC,CAAC,EAAE;IACjD,MAAM,SAAS,GAAG,IAAI,cAAc,CAAC,oBAAoB,CAAC,CAAC;IAC3D,MAAM,KAAK,GAAG,SAAS,CAAC,MAAM,CAAC,eAAe,CAAC,CAAC;IAEhD,CAAC,CAAC,IAAI,CAAC,KAAK,GAAG,CAAC,CAAC,CAAC;IAClB,CAAC,CAAC,EAAE,CAAC,SAAS,CAAC,OAAO,EAAE,EAAE,0BAA0B,CAAC,CAAC;AACvD,CAAC,CAAC,CAAC;AAEH,IAAI,CAAC,sCAAsC,EAAE,CAAC,CAAC,EAAE;IAChD,MAAM,SAAS,GAAG,IAAI,cAAc,CAAC,cAAc,CAAC,CAAC;IACrD,MAAM,KAAK,GAAG,SAAS,CAAC,MAAM,CAAC,eAAe,CAAC,CAAC;IAEhD,CAAC,CAAC,IAAI,CAAC,KAAK,GAAG,CAAC,CAAC,CAAC;IAClB,CAAC,CAAC,EAAE,CAAC,SAAS,CAAC,OAAO,EAAE,EAAE,oBAAoB,CAAC,CAAC;AACjD,CAAC,CAAC,CAAC"}
|
|
@@ -0,0 +1,22 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* OpenAI tokenizer using tiktoken
|
|
3
|
+
* Supports GPT-3.5, GPT-4, and other OpenAI models
|
|
4
|
+
*/
|
|
5
|
+
import type { Tokenizer } from '../../types/tokenization.js';
|
|
6
|
+
import type { Message } from '../../types/core.js';
|
|
7
|
+
/**
|
|
8
|
+
* OpenAI tokenizer using tiktoken for accurate token counting
|
|
9
|
+
*/
|
|
10
|
+
export declare class OpenAITokenizer implements Tokenizer {
|
|
11
|
+
private encoding;
|
|
12
|
+
private modelName;
|
|
13
|
+
constructor(modelId?: string);
|
|
14
|
+
encode(text: string): number;
|
|
15
|
+
countTokens(message: Message): number;
|
|
16
|
+
getName(): string;
|
|
17
|
+
/**
|
|
18
|
+
* Clean up encoding resources
|
|
19
|
+
*/
|
|
20
|
+
free(): void;
|
|
21
|
+
}
|
|
22
|
+
//# sourceMappingURL=openai-tokenizer.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"openai-tokenizer.d.ts","sourceRoot":"","sources":["../../../source/tokenization/tokenizers/openai-tokenizer.ts"],"names":[],"mappings":"AAAA;;;GAGG;AAGH,OAAO,KAAK,EAAC,SAAS,EAAC,MAAM,0BAA0B,CAAC;AACxD,OAAO,KAAK,EAAC,OAAO,EAAC,MAAM,cAAc,CAAC;AAE1C;;GAEG;AACH,qBAAa,eAAgB,YAAW,SAAS;IAChD,OAAO,CAAC,QAAQ,CAAkC;IAClD,OAAO,CAAC,SAAS,CAAS;gBAEd,OAAO,CAAC,EAAE,MAAM;IAU5B,MAAM,CAAC,IAAI,EAAE,MAAM,GAAG,MAAM;IAS5B,WAAW,CAAC,OAAO,EAAE,OAAO,GAAG,MAAM;IAWrC,OAAO,IAAI,MAAM;IAIjB;;OAEG;IACH,IAAI,IAAI,IAAI;CAGZ"}
|
|
@@ -0,0 +1,48 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* OpenAI tokenizer using tiktoken
|
|
3
|
+
* Supports GPT-3.5, GPT-4, and other OpenAI models
|
|
4
|
+
*/
|
|
5
|
+
import { encoding_for_model, get_encoding } from 'tiktoken';
|
|
6
|
+
/**
|
|
7
|
+
* OpenAI tokenizer using tiktoken for accurate token counting
|
|
8
|
+
*/
|
|
9
|
+
export class OpenAITokenizer {
|
|
10
|
+
encoding;
|
|
11
|
+
modelName;
|
|
12
|
+
constructor(modelId) {
|
|
13
|
+
this.modelName = modelId || 'gpt-4';
|
|
14
|
+
try {
|
|
15
|
+
this.encoding = encoding_for_model(modelId);
|
|
16
|
+
}
|
|
17
|
+
catch {
|
|
18
|
+
this.encoding = get_encoding('cl100k_base');
|
|
19
|
+
}
|
|
20
|
+
}
|
|
21
|
+
encode(text) {
|
|
22
|
+
try {
|
|
23
|
+
const tokens = this.encoding.encode(text);
|
|
24
|
+
return tokens.length;
|
|
25
|
+
}
|
|
26
|
+
catch {
|
|
27
|
+
return Math.ceil(text.length / 4);
|
|
28
|
+
}
|
|
29
|
+
}
|
|
30
|
+
countTokens(message) {
|
|
31
|
+
const content = message.content || '';
|
|
32
|
+
const role = message.role || '';
|
|
33
|
+
// OpenAI format: each message has overhead for role markers
|
|
34
|
+
// <|im_start|>role\ncontent<|im_end|>
|
|
35
|
+
const messageOverhead = 4; // Approximate overhead per message
|
|
36
|
+
return this.encode(content) + this.encode(role) + messageOverhead;
|
|
37
|
+
}
|
|
38
|
+
getName() {
|
|
39
|
+
return `openai-${this.modelName}`;
|
|
40
|
+
}
|
|
41
|
+
/**
|
|
42
|
+
* Clean up encoding resources
|
|
43
|
+
*/
|
|
44
|
+
free() {
|
|
45
|
+
this.encoding.free();
|
|
46
|
+
}
|
|
47
|
+
}
|
|
48
|
+
//# sourceMappingURL=openai-tokenizer.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"openai-tokenizer.js","sourceRoot":"","sources":["../../../source/tokenization/tokenizers/openai-tokenizer.ts"],"names":[],"mappings":"AAAA;;;GAGG;AAEH,OAAO,EAAC,kBAAkB,EAAE,YAAY,EAAqB,MAAM,UAAU,CAAC;AAI9E;;GAEG;AACH,MAAM,OAAO,eAAe;IACnB,QAAQ,CAAkC;IAC1C,SAAS,CAAS;IAE1B,YAAY,OAAgB;QAC3B,IAAI,CAAC,SAAS,GAAG,OAAO,IAAI,OAAO,CAAC;QAEpC,IAAI,CAAC;YACJ,IAAI,CAAC,QAAQ,GAAG,kBAAkB,CAAC,OAAwB,CAAC,CAAC;QAC9D,CAAC;QAAC,MAAM,CAAC;YACR,IAAI,CAAC,QAAQ,GAAG,YAAY,CAAC,aAAa,CAAC,CAAC;QAC7C,CAAC;IACF,CAAC;IAED,MAAM,CAAC,IAAY;QAClB,IAAI,CAAC;YACJ,MAAM,MAAM,GAAG,IAAI,CAAC,QAAQ,CAAC,MAAM,CAAC,IAAI,CAAC,CAAC;YAC1C,OAAO,MAAM,CAAC,MAAM,CAAC;QACtB,CAAC;QAAC,MAAM,CAAC;YACR,OAAO,IAAI,CAAC,IAAI,CAAC,IAAI,CAAC,MAAM,GAAG,CAAC,CAAC,CAAC;QACnC,CAAC;IACF,CAAC;IAED,WAAW,CAAC,OAAgB;QAC3B,MAAM,OAAO,GAAG,OAAO,CAAC,OAAO,IAAI,EAAE,CAAC;QACtC,MAAM,IAAI,GAAG,OAAO,CAAC,IAAI,IAAI,EAAE,CAAC;QAEhC,4DAA4D;QAC5D,sCAAsC;QACtC,MAAM,eAAe,GAAG,CAAC,CAAC,CAAC,mCAAmC;QAE9D,OAAO,IAAI,CAAC,MAAM,CAAC,OAAO,CAAC,GAAG,IAAI,CAAC,MAAM,CAAC,IAAI,CAAC,GAAG,eAAe,CAAC;IACnE,CAAC;IAED,OAAO;QACN,OAAO,UAAU,IAAI,CAAC,SAAS,EAAE,CAAC;IACnC,CAAC;IAED;;OAEG;IACH,IAAI;QACH,IAAI,CAAC,QAAQ,CAAC,IAAI,EAAE,CAAC;IACtB,CAAC;CACD"}
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"openai-tokenizer.spec.d.ts","sourceRoot":"","sources":["../../../source/tokenization/tokenizers/openai-tokenizer.spec.ts"],"names":[],"mappings":"AAAA;;GAEG"}
|
|
@@ -0,0 +1,140 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Tests for openai-tokenizer.ts
|
|
3
|
+
*/
|
|
4
|
+
import test from 'ava';
|
|
5
|
+
import { OpenAITokenizer } from './openai-tokenizer.js';
|
|
6
|
+
console.log(`\nopenai-tokenizer.spec.ts`);
|
|
7
|
+
test('OpenAITokenizer encodes simple text', t => {
|
|
8
|
+
const tokenizer = new OpenAITokenizer('gpt-4');
|
|
9
|
+
const count = tokenizer.encode('Hello, world!');
|
|
10
|
+
// "Hello, world!" should tokenize to around 4 tokens
|
|
11
|
+
t.true(count > 0);
|
|
12
|
+
t.true(count < 10);
|
|
13
|
+
});
|
|
14
|
+
test('OpenAITokenizer encodes empty string', t => {
|
|
15
|
+
const tokenizer = new OpenAITokenizer('gpt-4');
|
|
16
|
+
const count = tokenizer.encode('');
|
|
17
|
+
t.is(count, 0);
|
|
18
|
+
});
|
|
19
|
+
test('OpenAITokenizer encodes longer text', t => {
|
|
20
|
+
const tokenizer = new OpenAITokenizer('gpt-4');
|
|
21
|
+
const text = 'This is a longer piece of text that should have more tokens than a simple hello world.';
|
|
22
|
+
const count = tokenizer.encode(text);
|
|
23
|
+
// Should have significantly more tokens
|
|
24
|
+
t.true(count > 10);
|
|
25
|
+
t.true(count < 50);
|
|
26
|
+
});
|
|
27
|
+
test('OpenAITokenizer uses fallback encoding for unsupported model', t => {
|
|
28
|
+
const tokenizer = new OpenAITokenizer('unknown-model-xyz');
|
|
29
|
+
const count = tokenizer.encode('Hello, world!');
|
|
30
|
+
// Should still return a count using fallback
|
|
31
|
+
t.true(count > 0);
|
|
32
|
+
});
|
|
33
|
+
test('OpenAITokenizer defaults to gpt-4 when no model specified', t => {
|
|
34
|
+
const tokenizer = new OpenAITokenizer();
|
|
35
|
+
t.is(tokenizer.getName(), 'openai-gpt-4');
|
|
36
|
+
});
|
|
37
|
+
test('OpenAITokenizer getName returns correct format', t => {
|
|
38
|
+
const tokenizer = new OpenAITokenizer('gpt-3.5-turbo');
|
|
39
|
+
t.is(tokenizer.getName(), 'openai-gpt-3.5-turbo');
|
|
40
|
+
});
|
|
41
|
+
test('OpenAITokenizer countTokens for user message', t => {
|
|
42
|
+
const tokenizer = new OpenAITokenizer('gpt-4');
|
|
43
|
+
const message = {
|
|
44
|
+
role: 'user',
|
|
45
|
+
content: 'Hello, how are you?',
|
|
46
|
+
};
|
|
47
|
+
const count = tokenizer.countTokens(message);
|
|
48
|
+
// Should include content tokens + role tokens + overhead
|
|
49
|
+
t.true(count > 5);
|
|
50
|
+
t.true(count < 20);
|
|
51
|
+
});
|
|
52
|
+
test('OpenAITokenizer countTokens for assistant message', t => {
|
|
53
|
+
const tokenizer = new OpenAITokenizer('gpt-4');
|
|
54
|
+
const message = {
|
|
55
|
+
role: 'assistant',
|
|
56
|
+
content: 'I am doing well, thank you!',
|
|
57
|
+
};
|
|
58
|
+
const count = tokenizer.countTokens(message);
|
|
59
|
+
t.true(count > 5);
|
|
60
|
+
});
|
|
61
|
+
test('OpenAITokenizer countTokens for system message', t => {
|
|
62
|
+
const tokenizer = new OpenAITokenizer('gpt-4');
|
|
63
|
+
const message = {
|
|
64
|
+
role: 'system',
|
|
65
|
+
content: 'You are a helpful assistant.',
|
|
66
|
+
};
|
|
67
|
+
const count = tokenizer.countTokens(message);
|
|
68
|
+
t.true(count > 5);
|
|
69
|
+
});
|
|
70
|
+
test('OpenAITokenizer countTokens handles empty content', t => {
|
|
71
|
+
const tokenizer = new OpenAITokenizer('gpt-4');
|
|
72
|
+
const message = {
|
|
73
|
+
role: 'user',
|
|
74
|
+
content: '',
|
|
75
|
+
};
|
|
76
|
+
const count = tokenizer.countTokens(message);
|
|
77
|
+
// Should still have overhead for role and message structure
|
|
78
|
+
t.true(count >= 4);
|
|
79
|
+
});
|
|
80
|
+
test('OpenAITokenizer countTokens handles missing content', t => {
|
|
81
|
+
const tokenizer = new OpenAITokenizer('gpt-4');
|
|
82
|
+
const message = {
|
|
83
|
+
role: 'user',
|
|
84
|
+
};
|
|
85
|
+
const count = tokenizer.countTokens(message);
|
|
86
|
+
// Should handle gracefully
|
|
87
|
+
t.true(count >= 0);
|
|
88
|
+
});
|
|
89
|
+
test('OpenAITokenizer countTokens includes message overhead', t => {
|
|
90
|
+
const tokenizer = new OpenAITokenizer('gpt-4');
|
|
91
|
+
const shortMessage = {
|
|
92
|
+
role: 'user',
|
|
93
|
+
content: 'Hi',
|
|
94
|
+
};
|
|
95
|
+
const count = tokenizer.countTokens(shortMessage);
|
|
96
|
+
const contentOnly = tokenizer.encode('Hi');
|
|
97
|
+
const roleOnly = tokenizer.encode('user');
|
|
98
|
+
// Total should be more than just content + role due to overhead
|
|
99
|
+
t.true(count > contentOnly + roleOnly);
|
|
100
|
+
});
|
|
101
|
+
test('OpenAITokenizer free method exists', t => {
|
|
102
|
+
const tokenizer = new OpenAITokenizer('gpt-4');
|
|
103
|
+
t.notThrows(() => {
|
|
104
|
+
tokenizer.free();
|
|
105
|
+
});
|
|
106
|
+
});
|
|
107
|
+
test('OpenAITokenizer handles special characters', t => {
|
|
108
|
+
const tokenizer = new OpenAITokenizer('gpt-4');
|
|
109
|
+
const text = '你好世界 🌍 Привет мир';
|
|
110
|
+
const count = tokenizer.encode(text);
|
|
111
|
+
t.true(count > 0);
|
|
112
|
+
});
|
|
113
|
+
test('OpenAITokenizer handles code snippets', t => {
|
|
114
|
+
const tokenizer = new OpenAITokenizer('gpt-4');
|
|
115
|
+
const code = `
|
|
116
|
+
function hello() {
|
|
117
|
+
console.log("Hello, world!");
|
|
118
|
+
}
|
|
119
|
+
`;
|
|
120
|
+
const count = tokenizer.encode(code);
|
|
121
|
+
t.true(count > 10);
|
|
122
|
+
});
|
|
123
|
+
test('OpenAITokenizer works with gpt-3.5-turbo model', t => {
|
|
124
|
+
const tokenizer = new OpenAITokenizer('gpt-3.5-turbo');
|
|
125
|
+
const count = tokenizer.encode('Hello, world!');
|
|
126
|
+
t.true(count > 0);
|
|
127
|
+
t.is(tokenizer.getName(), 'openai-gpt-3.5-turbo');
|
|
128
|
+
});
|
|
129
|
+
test('OpenAITokenizer handles long messages', t => {
|
|
130
|
+
const tokenizer = new OpenAITokenizer('gpt-4');
|
|
131
|
+
const longText = 'Hello '.repeat(1000);
|
|
132
|
+
const message = {
|
|
133
|
+
role: 'user',
|
|
134
|
+
content: longText,
|
|
135
|
+
};
|
|
136
|
+
const count = tokenizer.countTokens(message);
|
|
137
|
+
// Should handle long text without crashing
|
|
138
|
+
t.true(count > 1000);
|
|
139
|
+
});
|
|
140
|
+
//# sourceMappingURL=openai-tokenizer.spec.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"openai-tokenizer.spec.js","sourceRoot":"","sources":["../../../source/tokenization/tokenizers/openai-tokenizer.spec.ts"],"names":[],"mappings":"AAAA;;GAEG;AAEH,OAAO,IAAI,MAAM,KAAK,CAAC;AACvB,OAAO,EAAC,eAAe,EAAC,MAAM,uBAAuB,CAAC;AAGtD,OAAO,CAAC,GAAG,CAAC,4BAA4B,CAAC,CAAC;AAE1C,IAAI,CAAC,qCAAqC,EAAE,CAAC,CAAC,EAAE;IAC/C,MAAM,SAAS,GAAG,IAAI,eAAe,CAAC,OAAO,CAAC,CAAC;IAC/C,MAAM,KAAK,GAAG,SAAS,CAAC,MAAM,CAAC,eAAe,CAAC,CAAC;IAEhD,qDAAqD;IACrD,CAAC,CAAC,IAAI,CAAC,KAAK,GAAG,CAAC,CAAC,CAAC;IAClB,CAAC,CAAC,IAAI,CAAC,KAAK,GAAG,EAAE,CAAC,CAAC;AACpB,CAAC,CAAC,CAAC;AAEH,IAAI,CAAC,sCAAsC,EAAE,CAAC,CAAC,EAAE;IAChD,MAAM,SAAS,GAAG,IAAI,eAAe,CAAC,OAAO,CAAC,CAAC;IAC/C,MAAM,KAAK,GAAG,SAAS,CAAC,MAAM,CAAC,EAAE,CAAC,CAAC;IAEnC,CAAC,CAAC,EAAE,CAAC,KAAK,EAAE,CAAC,CAAC,CAAC;AAChB,CAAC,CAAC,CAAC;AAEH,IAAI,CAAC,qCAAqC,EAAE,CAAC,CAAC,EAAE;IAC/C,MAAM,SAAS,GAAG,IAAI,eAAe,CAAC,OAAO,CAAC,CAAC;IAC/C,MAAM,IAAI,GACT,wFAAwF,CAAC;IAC1F,MAAM,KAAK,GAAG,SAAS,CAAC,MAAM,CAAC,IAAI,CAAC,CAAC;IAErC,wCAAwC;IACxC,CAAC,CAAC,IAAI,CAAC,KAAK,GAAG,EAAE,CAAC,CAAC;IACnB,CAAC,CAAC,IAAI,CAAC,KAAK,GAAG,EAAE,CAAC,CAAC;AACpB,CAAC,CAAC,CAAC;AAEH,IAAI,CAAC,8DAA8D,EAAE,CAAC,CAAC,EAAE;IACxE,MAAM,SAAS,GAAG,IAAI,eAAe,CAAC,mBAAmB,CAAC,CAAC;IAC3D,MAAM,KAAK,GAAG,SAAS,CAAC,MAAM,CAAC,eAAe,CAAC,CAAC;IAEhD,6CAA6C;IAC7C,CAAC,CAAC,IAAI,CAAC,KAAK,GAAG,CAAC,CAAC,CAAC;AACnB,CAAC,CAAC,CAAC;AAEH,IAAI,CAAC,2DAA2D,EAAE,CAAC,CAAC,EAAE;IACrE,MAAM,SAAS,GAAG,IAAI,eAAe,EAAE,CAAC;IAExC,CAAC,CAAC,EAAE,CAAC,SAAS,CAAC,OAAO,EAAE,EAAE,cAAc,CAAC,CAAC;AAC3C,CAAC,CAAC,CAAC;AAEH,IAAI,CAAC,gDAAgD,EAAE,CAAC,CAAC,EAAE;IAC1D,MAAM,SAAS,GAAG,IAAI,eAAe,CAAC,eAAe,CAAC,CAAC;IAEvD,CAAC,CAAC,EAAE,CAAC,SAAS,CAAC,OAAO,EAAE,EAAE,sBAAsB,CAAC,CAAC;AACnD,CAAC,CAAC,CAAC;AAEH,IAAI,CAAC,8CAA8C,EAAE,CAAC,CAAC,EAAE;IACxD,MAAM,SAAS,GAAG,IAAI,eAAe,CAAC,OAAO,CAAC,CAAC;IAC/C,MAAM,OAAO,GAAY;QACxB,IAAI,EAAE,MAAM;QACZ,OAAO,EAAE,qBAAqB;KAC9B,CAAC;IAEF,MAAM,KAAK,GAAG,SAAS,CAAC,WAAW,CAAC,OAAO,CAAC,CAAC;IAE7C,yDAAyD;IACzD,CAAC,CAAC,IAAI,CAAC,KAAK,GAAG,CAAC,CAAC,CAAC;IAClB,CAAC,CAAC,IAAI,CAAC,KAAK,GAAG,EAAE,CAAC,CAAC;AACpB,CAAC,CAAC,CAAC;AAEH,IAAI,CAAC,mDAAmD,EAAE,CAAC,CAAC,EAAE;IAC7D,MAAM,SAAS,GAAG,IAAI,eAAe,CAAC,OAAO,CAAC,CAAC;IAC/C,MAAM,OAAO,GAAY;QACxB,IAAI,EAAE,WAAW;QACjB,OAAO,EAAE,6BAA6B;KACtC,CAAC;IAEF,MAAM,KAAK,GAAG,SAAS,CAAC,WAAW,CAAC,OAAO,CAAC,CAAC;IAE7C,CAAC,CAAC,IAAI,CAAC,KAAK,GAAG,CAAC,CAAC,CAAC;AACnB,CAAC,CAAC,CAAC;AAEH,IAAI,CAAC,gDAAgD,EAAE,CAAC,CAAC,EAAE;IAC1D,MAAM,SAAS,GAAG,IAAI,eAAe,CAAC,OAAO,CAAC,CAAC;IAC/C,MAAM,OAAO,GAAY;QACxB,IAAI,EAAE,QAAQ;QACd,OAAO,EAAE,8BAA8B;KACvC,CAAC;IAEF,MAAM,KAAK,GAAG,SAAS,CAAC,WAAW,CAAC,OAAO,CAAC,CAAC;IAE7C,CAAC,CAAC,IAAI,CAAC,KAAK,GAAG,CAAC,CAAC,CAAC;AACnB,CAAC,CAAC,CAAC;AAEH,IAAI,CAAC,mDAAmD,EAAE,CAAC,CAAC,EAAE;IAC7D,MAAM,SAAS,GAAG,IAAI,eAAe,CAAC,OAAO,CAAC,CAAC;IAC/C,MAAM,OAAO,GAAY;QACxB,IAAI,EAAE,MAAM;QACZ,OAAO,EAAE,EAAE;KACX,CAAC;IAEF,MAAM,KAAK,GAAG,SAAS,CAAC,WAAW,CAAC,OAAO,CAAC,CAAC;IAE7C,4DAA4D;IAC5D,CAAC,CAAC,IAAI,CAAC,KAAK,IAAI,CAAC,CAAC,CAAC;AACpB,CAAC,CAAC,CAAC;AAEH,IAAI,CAAC,qDAAqD,EAAE,CAAC,CAAC,EAAE;IAC/D,MAAM,SAAS,GAAG,IAAI,eAAe,CAAC,OAAO,CAAC,CAAC;IAC/C,MAAM,OAAO,GAAY;QACxB,IAAI,EAAE,MAAM;KACD,CAAC;IAEb,MAAM,KAAK,GAAG,SAAS,CAAC,WAAW,CAAC,OAAO,CAAC,CAAC;IAE7C,2BAA2B;IAC3B,CAAC,CAAC,IAAI,CAAC,KAAK,IAAI,CAAC,CAAC,CAAC;AACpB,CAAC,CAAC,CAAC;AAEH,IAAI,CAAC,uDAAuD,EAAE,CAAC,CAAC,EAAE;IACjE,MAAM,SAAS,GAAG,IAAI,eAAe,CAAC,OAAO,CAAC,CAAC;IAC/C,MAAM,YAAY,GAAY;QAC7B,IAAI,EAAE,MAAM;QACZ,OAAO,EAAE,IAAI;KACb,CAAC;IAEF,MAAM,KAAK,GAAG,SAAS,CAAC,WAAW,CAAC,YAAY,CAAC,CAAC;IAClD,MAAM,WAAW,GAAG,SAAS,CAAC,MAAM,CAAC,IAAI,CAAC,CAAC;IAC3C,MAAM,QAAQ,GAAG,SAAS,CAAC,MAAM,CAAC,MAAM,CAAC,CAAC;IAE1C,gEAAgE;IAChE,CAAC,CAAC,IAAI,CAAC,KAAK,GAAG,WAAW,GAAG,QAAQ,CAAC,CAAC;AACxC,CAAC,CAAC,CAAC;AAEH,IAAI,CAAC,oCAAoC,EAAE,CAAC,CAAC,EAAE;IAC9C,MAAM,SAAS,GAAG,IAAI,eAAe,CAAC,OAAO,CAAC,CAAC;IAE/C,CAAC,CAAC,SAAS,CAAC,GAAG,EAAE;QAChB,SAAS,CAAC,IAAI,EAAE,CAAC;IAClB,CAAC,CAAC,CAAC;AACJ,CAAC,CAAC,CAAC;AAEH,IAAI,CAAC,4CAA4C,EAAE,CAAC,CAAC,EAAE;IACtD,MAAM,SAAS,GAAG,IAAI,eAAe,CAAC,OAAO,CAAC,CAAC;IAC/C,MAAM,IAAI,GAAG,oBAAoB,CAAC;IAClC,MAAM,KAAK,GAAG,SAAS,CAAC,MAAM,CAAC,IAAI,CAAC,CAAC;IAErC,CAAC,CAAC,IAAI,CAAC,KAAK,GAAG,CAAC,CAAC,CAAC;AACnB,CAAC,CAAC,CAAC;AAEH,IAAI,CAAC,uCAAuC,EAAE,CAAC,CAAC,EAAE;IACjD,MAAM,SAAS,GAAG,IAAI,eAAe,CAAC,OAAO,CAAC,CAAC;IAC/C,MAAM,IAAI,GAAG;;;;EAIZ,CAAC;IACF,MAAM,KAAK,GAAG,SAAS,CAAC,MAAM,CAAC,IAAI,CAAC,CAAC;IAErC,CAAC,CAAC,IAAI,CAAC,KAAK,GAAG,EAAE,CAAC,CAAC;AACpB,CAAC,CAAC,CAAC;AAEH,IAAI,CAAC,gDAAgD,EAAE,CAAC,CAAC,EAAE;IAC1D,MAAM,SAAS,GAAG,IAAI,eAAe,CAAC,eAAe,CAAC,CAAC;IACvD,MAAM,KAAK,GAAG,SAAS,CAAC,MAAM,CAAC,eAAe,CAAC,CAAC;IAEhD,CAAC,CAAC,IAAI,CAAC,KAAK,GAAG,CAAC,CAAC,CAAC;IAClB,CAAC,CAAC,EAAE,CAAC,SAAS,CAAC,OAAO,EAAE,EAAE,sBAAsB,CAAC,CAAC;AACnD,CAAC,CAAC,CAAC;AAEH,IAAI,CAAC,uCAAuC,EAAE,CAAC,CAAC,EAAE;IACjD,MAAM,SAAS,GAAG,IAAI,eAAe,CAAC,OAAO,CAAC,CAAC;IAC/C,MAAM,QAAQ,GAAG,QAAQ,CAAC,MAAM,CAAC,IAAI,CAAC,CAAC;IACvC,MAAM,OAAO,GAAY;QACxB,IAAI,EAAE,MAAM;QACZ,OAAO,EAAE,QAAQ;KACjB,CAAC;IAEF,MAAM,KAAK,GAAG,SAAS,CAAC,WAAW,CAAC,OAAO,CAAC,CAAC;IAE7C,2CAA2C;IAC3C,CAAC,CAAC,IAAI,CAAC,KAAK,GAAG,IAAI,CAAC,CAAC;AACtB,CAAC,CAAC,CAAC"}
|
package/dist/types/commands.d.ts
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"commands.d.ts","sourceRoot":"","sources":["../../source/types/commands.ts"],"names":[],"mappings":"AAAA,OAAO,EAAC,OAAO,EAAC,MAAM,cAAc,CAAC;AAErC,MAAM,WAAW,OAAO,CAAC,CAAC,GAAG,KAAK,CAAC,YAAY;IAC9C,IAAI,EAAE,MAAM,CAAC;IACb,WAAW,EAAE,MAAM,CAAC;IACpB,OAAO,EAAE,CACR,IAAI,EAAE,MAAM,EAAE,EACd,QAAQ,EAAE,OAAO,EAAE,EACnB,QAAQ,EAAE;
|
|
1
|
+
{"version":3,"file":"commands.d.ts","sourceRoot":"","sources":["../../source/types/commands.ts"],"names":[],"mappings":"AAAA,OAAO,EAAC,OAAO,EAAC,MAAM,cAAc,CAAC;AAErC,MAAM,WAAW,OAAO,CAAC,CAAC,GAAG,KAAK,CAAC,YAAY;IAC9C,IAAI,EAAE,MAAM,CAAC;IACb,WAAW,EAAE,MAAM,CAAC;IACpB,OAAO,EAAE,CACR,IAAI,EAAE,MAAM,EAAE,EACd,QAAQ,EAAE,OAAO,EAAE,EACnB,QAAQ,EAAE;QACT,QAAQ,EAAE,MAAM,CAAC;QACjB,KAAK,EAAE,MAAM,CAAC;QACd,MAAM,EAAE,MAAM,CAAC;QACf,gBAAgB,EAAE,CAAC,OAAO,EAAE,OAAO,KAAK,MAAM,CAAC;KAC/C,KACG,OAAO,CAAC,CAAC,CAAC,CAAC;CAChB;AAED,MAAM,WAAW,aAAa;IAC7B,SAAS,EAAE,OAAO,CAAC;IACnB,OAAO,CAAC,EAAE,MAAM,CAAC;IACjB,IAAI,CAAC,EAAE,MAAM,EAAE,CAAC;IAChB,WAAW,CAAC,EAAE,MAAM,CAAC;IAErB,aAAa,CAAC,EAAE,OAAO,CAAC;IACxB,WAAW,CAAC,EAAE,MAAM,CAAC;CACrB;AAED,MAAM,WAAW,qBAAqB;IACrC,WAAW,CAAC,EAAE,MAAM,CAAC;IACrB,OAAO,CAAC,EAAE,MAAM,EAAE,CAAC;IACnB,UAAU,CAAC,EAAE,MAAM,EAAE,CAAC;CACtB;AAED,MAAM,WAAW,aAAa;IAC7B,IAAI,EAAE,MAAM,CAAC;IACb,IAAI,EAAE,MAAM,CAAC;IACb,SAAS,CAAC,EAAE,MAAM,CAAC;IACnB,QAAQ,EAAE,MAAM,CAAC;IACjB,QAAQ,EAAE,qBAAqB,CAAC;IAChC,OAAO,EAAE,MAAM,CAAC;CAChB;AAED,MAAM,WAAW,mBAAmB;IACnC,QAAQ,EAAE,qBAAqB,CAAC;IAChC,OAAO,EAAE,MAAM,CAAC;CAChB"}
|
|
@@ -0,0 +1,31 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Tokenizer interface and types
|
|
3
|
+
*/
|
|
4
|
+
import type { Message } from '../types/core.js';
|
|
5
|
+
/**
|
|
6
|
+
* Tokenizer interface for encoding text and counting tokens
|
|
7
|
+
*/
|
|
8
|
+
export interface Tokenizer {
|
|
9
|
+
/**
|
|
10
|
+
* Encode text and return token count
|
|
11
|
+
*/
|
|
12
|
+
encode(text: string): number;
|
|
13
|
+
/**
|
|
14
|
+
* Count tokens in a message (content + role)
|
|
15
|
+
*/
|
|
16
|
+
countTokens(message: Message): number;
|
|
17
|
+
/**
|
|
18
|
+
* Get the tokenizer name/type
|
|
19
|
+
*/
|
|
20
|
+
getName(): string;
|
|
21
|
+
/**
|
|
22
|
+
* Optional cleanup method for releasing resources
|
|
23
|
+
* Should be called when the tokenizer is no longer needed
|
|
24
|
+
*/
|
|
25
|
+
free?(): void;
|
|
26
|
+
}
|
|
27
|
+
/**
|
|
28
|
+
* Provider types for tokenizer selection
|
|
29
|
+
*/
|
|
30
|
+
export type TokenizerProvider = 'openai' | 'anthropic' | 'llama' | 'fallback' | 'auto';
|
|
31
|
+
//# sourceMappingURL=tokenization.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"tokenization.d.ts","sourceRoot":"","sources":["../../source/types/tokenization.ts"],"names":[],"mappings":"AAAA;;GAEG;AAEH,OAAO,KAAK,EAAC,OAAO,EAAC,MAAM,cAAc,CAAC;AAE1C;;GAEG;AACH,MAAM,WAAW,SAAS;IACzB;;OAEG;IACH,MAAM,CAAC,IAAI,EAAE,MAAM,GAAG,MAAM,CAAC;IAE7B;;OAEG;IACH,WAAW,CAAC,OAAO,EAAE,OAAO,GAAG,MAAM,CAAC;IAEtC;;OAEG;IACH,OAAO,IAAI,MAAM,CAAC;IAElB;;;OAGG;IACH,IAAI,CAAC,IAAI,IAAI,CAAC;CACd;AAED;;GAEG;AACH,MAAM,MAAM,iBAAiB,GAC1B,QAAQ,GACR,WAAW,GACX,OAAO,GACP,UAAU,GACV,MAAM,CAAC"}
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"tokenization.js","sourceRoot":"","sources":["../../source/types/tokenization.ts"],"names":[],"mappings":"AAAA;;GAEG"}
|
|
@@ -0,0 +1,55 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Token breakdown by category
|
|
3
|
+
*/
|
|
4
|
+
export interface TokenBreakdown {
|
|
5
|
+
system: number;
|
|
6
|
+
userMessages: number;
|
|
7
|
+
assistantMessages: number;
|
|
8
|
+
toolDefinitions: number;
|
|
9
|
+
toolResults: number;
|
|
10
|
+
total: number;
|
|
11
|
+
}
|
|
12
|
+
/**
|
|
13
|
+
* Session usage data
|
|
14
|
+
*/
|
|
15
|
+
export interface SessionUsage {
|
|
16
|
+
id: string;
|
|
17
|
+
timestamp: number;
|
|
18
|
+
provider: string;
|
|
19
|
+
model: string;
|
|
20
|
+
tokens: TokenBreakdown;
|
|
21
|
+
messageCount: number;
|
|
22
|
+
duration?: number;
|
|
23
|
+
}
|
|
24
|
+
/**
|
|
25
|
+
* Daily aggregate usage
|
|
26
|
+
*/
|
|
27
|
+
export interface DailyAggregate {
|
|
28
|
+
date: string;
|
|
29
|
+
sessions: number;
|
|
30
|
+
totalTokens: number;
|
|
31
|
+
providers: Record<string, number>;
|
|
32
|
+
models: Record<string, number>;
|
|
33
|
+
}
|
|
34
|
+
/**
|
|
35
|
+
* Persistent usage data structure
|
|
36
|
+
*/
|
|
37
|
+
export interface UsageData {
|
|
38
|
+
sessions: SessionUsage[];
|
|
39
|
+
dailyAggregates: DailyAggregate[];
|
|
40
|
+
totalLifetime: number;
|
|
41
|
+
lastUpdated: number;
|
|
42
|
+
}
|
|
43
|
+
/**
|
|
44
|
+
* Current session statistics
|
|
45
|
+
*/
|
|
46
|
+
export interface CurrentSessionStats {
|
|
47
|
+
tokens: TokenBreakdown;
|
|
48
|
+
messageCount: number;
|
|
49
|
+
startTime: number;
|
|
50
|
+
provider: string;
|
|
51
|
+
model: string;
|
|
52
|
+
contextLimit: number | null;
|
|
53
|
+
percentUsed: number;
|
|
54
|
+
}
|
|
55
|
+
//# sourceMappingURL=usage.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"usage.d.ts","sourceRoot":"","sources":["../../source/types/usage.ts"],"names":[],"mappings":"AAAA;;GAEG;AACH,MAAM,WAAW,cAAc;IAC9B,MAAM,EAAE,MAAM,CAAC;IACf,YAAY,EAAE,MAAM,CAAC;IACrB,iBAAiB,EAAE,MAAM,CAAC;IAC1B,eAAe,EAAE,MAAM,CAAC;IACxB,WAAW,EAAE,MAAM,CAAC;IACpB,KAAK,EAAE,MAAM,CAAC;CACd;AAED;;GAEG;AACH,MAAM,WAAW,YAAY;IAC5B,EAAE,EAAE,MAAM,CAAC;IACX,SAAS,EAAE,MAAM,CAAC;IAClB,QAAQ,EAAE,MAAM,CAAC;IACjB,KAAK,EAAE,MAAM,CAAC;IACd,MAAM,EAAE,cAAc,CAAC;IACvB,YAAY,EAAE,MAAM,CAAC;IACrB,QAAQ,CAAC,EAAE,MAAM,CAAC;CAClB;AAED;;GAEG;AACH,MAAM,WAAW,cAAc;IAC9B,IAAI,EAAE,MAAM,CAAC;IACb,QAAQ,EAAE,MAAM,CAAC;IACjB,WAAW,EAAE,MAAM,CAAC;IACpB,SAAS,EAAE,MAAM,CAAC,MAAM,EAAE,MAAM,CAAC,CAAC;IAClC,MAAM,EAAE,MAAM,CAAC,MAAM,EAAE,MAAM,CAAC,CAAC;CAC/B;AAED;;GAEG;AACH,MAAM,WAAW,SAAS;IACzB,QAAQ,EAAE,YAAY,EAAE,CAAC;IACzB,eAAe,EAAE,cAAc,EAAE,CAAC;IAClC,aAAa,EAAE,MAAM,CAAC;IACtB,WAAW,EAAE,MAAM,CAAC;CACpB;AAED;;GAEG;AACH,MAAM,WAAW,mBAAmB;IACnC,MAAM,EAAE,cAAc,CAAC;IACvB,YAAY,EAAE,MAAM,CAAC;IACrB,SAAS,EAAE,MAAM,CAAC;IAClB,QAAQ,EAAE,MAAM,CAAC;IACjB,KAAK,EAAE,MAAM,CAAC;IACd,YAAY,EAAE,MAAM,GAAG,IAAI,CAAC;IAC5B,WAAW,EAAE,MAAM,CAAC;CACpB"}
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"usage.js","sourceRoot":"","sources":["../../source/types/usage.ts"],"names":[],"mappings":""}
|
|
@@ -0,0 +1,28 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Usage calculator
|
|
3
|
+
* Calculates token breakdown by category
|
|
4
|
+
*/
|
|
5
|
+
import type { Message } from '../types/core.js';
|
|
6
|
+
import type { Tokenizer } from '../types/tokenization.js';
|
|
7
|
+
import type { TokenBreakdown } from '../types/usage.js';
|
|
8
|
+
/**
|
|
9
|
+
* Calculate token breakdown from messages
|
|
10
|
+
* @param messages - Messages to calculate breakdown for
|
|
11
|
+
* @param tokenizer - Tokenizer instance (used as fallback if getTokens not provided)
|
|
12
|
+
* @param getTokens - Optional cached token counting function for performance
|
|
13
|
+
*/
|
|
14
|
+
export declare function calculateTokenBreakdown(messages: Message[], tokenizer: Tokenizer, getTokens?: (message: Message) => number): TokenBreakdown;
|
|
15
|
+
/**
|
|
16
|
+
* Calculate tool definitions token count
|
|
17
|
+
* This estimates the tokens used by tool definitions sent to the model
|
|
18
|
+
*/
|
|
19
|
+
export declare function calculateToolDefinitionsTokens(toolCount: number): number;
|
|
20
|
+
/**
|
|
21
|
+
* Get status color based on percentage used
|
|
22
|
+
*/
|
|
23
|
+
export declare function getUsageStatusColor(percentUsed: number): 'success' | 'warning' | 'error';
|
|
24
|
+
/**
|
|
25
|
+
* Format token count with thousands separator
|
|
26
|
+
*/
|
|
27
|
+
export declare function formatTokenCount(tokens: number): string;
|
|
28
|
+
//# sourceMappingURL=calculator.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"calculator.d.ts","sourceRoot":"","sources":["../../source/usage/calculator.ts"],"names":[],"mappings":"AAAA;;;GAGG;AAEH,OAAO,KAAK,EAAC,OAAO,EAAC,MAAM,cAAc,CAAC;AAC1C,OAAO,KAAK,EAAC,SAAS,EAAC,MAAM,sBAAsB,CAAC;AACpD,OAAO,KAAK,EAAC,cAAc,EAAC,MAAM,gBAAgB,CAAC;AAEnD;;;;;GAKG;AACH,wBAAgB,uBAAuB,CACtC,QAAQ,EAAE,OAAO,EAAE,EACnB,SAAS,EAAE,SAAS,EACpB,SAAS,CAAC,EAAE,CAAC,OAAO,EAAE,OAAO,KAAK,MAAM,GACtC,cAAc,CA+ChB;AAED;;;GAGG;AACH,wBAAgB,8BAA8B,CAAC,SAAS,EAAE,MAAM,GAAG,MAAM,CAMxE;AAED;;GAEG;AACH,wBAAgB,mBAAmB,CAClC,WAAW,EAAE,MAAM,GACjB,SAAS,GAAG,SAAS,GAAG,OAAO,CAQjC;AAED;;GAEG;AACH,wBAAgB,gBAAgB,CAAC,MAAM,EAAE,MAAM,GAAG,MAAM,CAEvD"}
|