@ryukin-dev/pi-featherless-kali 1.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +15 -0
- package/README.md +136 -0
- package/bin/kaliai.js +146 -0
- package/extensions/featherless.ts +16 -0
- package/package.json +71 -0
- package/skills/kali-admin/SKILL.md +30 -0
- package/skills/websearch/SKILL.md +43 -0
- package/skills/websearch/extract.js +65 -0
- package/skills/websearch/package.json +16 -0
- package/skills/websearch/search.js +110 -0
- package/src/handlers/compaction.ts +66 -0
- package/src/handlers/concurrency.ts +70 -0
- package/src/handlers/context.test.ts +260 -0
- package/src/handlers/context.ts +211 -0
- package/src/handlers/provider.ts +14 -0
- package/src/handlers/shared.ts +10 -0
- package/src/handlers/update-check.ts +202 -0
- package/src/models/fetch.ts +31 -0
- package/src/models.ts +262 -0
- package/src/test-api.ts +157 -0
- package/src/tokenize.ts +198 -0
package/src/tokenize.ts
ADDED
|
@@ -0,0 +1,198 @@
|
|
|
1
|
+
|
|
2
|
+
|
|
3
|
+
import type { ExtensionAPI } from "@earendil-works/pi-coding-agent";
|
|
4
|
+
|
|
5
|
+
const TOKENIZE_URL = "https://api.featherless.ai/v1/tokenize";
|
|
6
|
+
|
|
7
|
+
const tokenCache = new Map<string, number>();
|
|
8
|
+
|
|
9
|
+
const MAX_CACHE_SIZE = 10000;
|
|
10
|
+
|
|
11
|
+
function cacheKey(model: string, text: string): string {
|
|
12
|
+
return `${model}:${text}`;
|
|
13
|
+
}
|
|
14
|
+
|
|
15
|
+
export async function tokenize(
|
|
16
|
+
model: string,
|
|
17
|
+
text: string,
|
|
18
|
+
apiKey?: string,
|
|
19
|
+
): Promise<number> {
|
|
20
|
+
const key = cacheKey(model, text);
|
|
21
|
+
const cached = tokenCache.get(key);
|
|
22
|
+
if (cached !== undefined) {
|
|
23
|
+
return cached;
|
|
24
|
+
}
|
|
25
|
+
|
|
26
|
+
const response = await fetch(TOKENIZE_URL, {
|
|
27
|
+
method: "POST",
|
|
28
|
+
headers: {
|
|
29
|
+
"Content-Type": "application/json",
|
|
30
|
+
...(apiKey ? { Authorization: `Bearer ${apiKey}` } : {}),
|
|
31
|
+
},
|
|
32
|
+
body: JSON.stringify({ model, text }),
|
|
33
|
+
});
|
|
34
|
+
|
|
35
|
+
if (!response.ok) {
|
|
36
|
+
const body = await response.text().catch(() => "unknown");
|
|
37
|
+
console.warn(
|
|
38
|
+
`Tokenize API error: ${response.status} ${response.statusText}`,
|
|
39
|
+
body,
|
|
40
|
+
);
|
|
41
|
+
return estimateTokens(text);
|
|
42
|
+
}
|
|
43
|
+
|
|
44
|
+
const data = (await response.json()) as {
|
|
45
|
+
count?: number;
|
|
46
|
+
tokens?: number[];
|
|
47
|
+
};
|
|
48
|
+
|
|
49
|
+
const count = data.count ?? data.tokens?.length ?? 0;
|
|
50
|
+
|
|
51
|
+
if (tokenCache.size >= MAX_CACHE_SIZE) {
|
|
52
|
+
const keysToDelete = Array.from(tokenCache.keys()).slice(
|
|
53
|
+
0,
|
|
54
|
+
Math.floor(MAX_CACHE_SIZE * 0.1),
|
|
55
|
+
);
|
|
56
|
+
for (const k of keysToDelete) {
|
|
57
|
+
tokenCache.delete(k);
|
|
58
|
+
}
|
|
59
|
+
}
|
|
60
|
+
tokenCache.set(key, count);
|
|
61
|
+
|
|
62
|
+
return count;
|
|
63
|
+
}
|
|
64
|
+
|
|
65
|
+
export async function tokenizeBatch(
|
|
66
|
+
model: string,
|
|
67
|
+
texts: string[],
|
|
68
|
+
apiKey?: string,
|
|
69
|
+
): Promise<number[]> {
|
|
70
|
+
const results: number[] = new Array(texts.length);
|
|
71
|
+
const uncached: { index: number; text: string }[] = [];
|
|
72
|
+
|
|
73
|
+
for (let i = 0; i < texts.length; i++) {
|
|
74
|
+
const key = cacheKey(model, texts[i]);
|
|
75
|
+
const cached = tokenCache.get(key);
|
|
76
|
+
if (cached !== undefined) {
|
|
77
|
+
results[i] = cached;
|
|
78
|
+
} else {
|
|
79
|
+
uncached.push({ index: i, text: texts[i] });
|
|
80
|
+
}
|
|
81
|
+
}
|
|
82
|
+
|
|
83
|
+
if (uncached.length === 0) {
|
|
84
|
+
return results;
|
|
85
|
+
}
|
|
86
|
+
|
|
87
|
+
await Promise.all(
|
|
88
|
+
uncached.map(async ({ index, text }) => {
|
|
89
|
+
try {
|
|
90
|
+
results[index] = await tokenize(model, text, apiKey);
|
|
91
|
+
} catch {
|
|
92
|
+
results[index] = estimateTokens(text);
|
|
93
|
+
}
|
|
94
|
+
}),
|
|
95
|
+
);
|
|
96
|
+
|
|
97
|
+
return results;
|
|
98
|
+
}
|
|
99
|
+
|
|
100
|
+
export function estimateTokens(
|
|
101
|
+
text: string,
|
|
102
|
+
defaultCharsPerToken = 3.2,
|
|
103
|
+
): number {
|
|
104
|
+
const chars = text.length;
|
|
105
|
+
if (chars === 0) return 0;
|
|
106
|
+
|
|
107
|
+
const lines = text.split("\n");
|
|
108
|
+
const permissionPattern = /^[d-][rwx-]{9}\s/;
|
|
109
|
+
const matchingLines = lines.filter((line) => permissionPattern.test(line));
|
|
110
|
+
|
|
111
|
+
if (
|
|
112
|
+
matchingLines.length > 0 &&
|
|
113
|
+
matchingLines.length >= lines.length * 0.5
|
|
114
|
+
) {
|
|
115
|
+
return Math.ceil(chars / 1.8);
|
|
116
|
+
}
|
|
117
|
+
|
|
118
|
+
return Math.ceil(chars / defaultCharsPerToken);
|
|
119
|
+
}
|
|
120
|
+
|
|
121
|
+
export function extractText(message: any): string {
|
|
122
|
+
switch (message.role) {
|
|
123
|
+
case "user": {
|
|
124
|
+
const content = message.content;
|
|
125
|
+
if (typeof content === "string") {
|
|
126
|
+
return content;
|
|
127
|
+
}
|
|
128
|
+
if (Array.isArray(content)) {
|
|
129
|
+
return content
|
|
130
|
+
.filter((block: any) => block.type === "text")
|
|
131
|
+
.map((block: any) => block.text || "")
|
|
132
|
+
.join("");
|
|
133
|
+
}
|
|
134
|
+
return "";
|
|
135
|
+
}
|
|
136
|
+
case "assistant": {
|
|
137
|
+
const parts: string[] = [];
|
|
138
|
+
for (const block of message.content || []) {
|
|
139
|
+
if (block.type === "text") {
|
|
140
|
+
parts.push(block.text || "");
|
|
141
|
+
} else if (block.type === "toolCall") {
|
|
142
|
+
parts.push(block.name || "");
|
|
143
|
+
parts.push(JSON.stringify(block.arguments || {}));
|
|
144
|
+
}
|
|
145
|
+
}
|
|
146
|
+
return parts.join("");
|
|
147
|
+
}
|
|
148
|
+
case "toolResult": {
|
|
149
|
+
const content = message.content;
|
|
150
|
+
if (typeof content === "string") {
|
|
151
|
+
return content;
|
|
152
|
+
}
|
|
153
|
+
if (Array.isArray(content)) {
|
|
154
|
+
return content
|
|
155
|
+
.filter((block: any) => block.type === "text")
|
|
156
|
+
.map((block: any) => block.text || "")
|
|
157
|
+
.join("");
|
|
158
|
+
}
|
|
159
|
+
return "";
|
|
160
|
+
}
|
|
161
|
+
default:
|
|
162
|
+
return "";
|
|
163
|
+
}
|
|
164
|
+
}
|
|
165
|
+
|
|
166
|
+
export async function countMessageTokens(
|
|
167
|
+
model: string,
|
|
168
|
+
message: any,
|
|
169
|
+
apiKey?: string,
|
|
170
|
+
): Promise<number> {
|
|
171
|
+
const text = extractText(message);
|
|
172
|
+
if (!text) return 0;
|
|
173
|
+
return tokenize(model, text, apiKey);
|
|
174
|
+
}
|
|
175
|
+
|
|
176
|
+
export async function countMessagesTokens(
|
|
177
|
+
model: string,
|
|
178
|
+
messages: any[],
|
|
179
|
+
apiKey?: string,
|
|
180
|
+
): Promise<number> {
|
|
181
|
+
const counts = await tokenizeBatch(
|
|
182
|
+
model,
|
|
183
|
+
messages.map(extractText),
|
|
184
|
+
apiKey,
|
|
185
|
+
);
|
|
186
|
+
return counts.reduce((sum, n) => sum + n, 0);
|
|
187
|
+
}
|
|
188
|
+
|
|
189
|
+
export function clearTokenCache(): void {
|
|
190
|
+
tokenCache.clear();
|
|
191
|
+
}
|
|
192
|
+
|
|
193
|
+
export function getCacheStats(): { size: number; maxSize: number } {
|
|
194
|
+
return {
|
|
195
|
+
size: tokenCache.size,
|
|
196
|
+
maxSize: MAX_CACHE_SIZE,
|
|
197
|
+
};
|
|
198
|
+
}
|