reasonix 0.33.1 → 0.33.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/cli/{chat-Q5ZCVIOO.js → chat-ZMSAXE77.js} +5 -4
- package/dist/cli/chunk-DAEAAVDF.js +199 -0
- package/dist/cli/chunk-DAEAAVDF.js.map +1 -0
- package/dist/cli/{chunk-Q6YFXW7H.js → chunk-G7M3QWEN.js} +27 -213
- package/dist/cli/chunk-G7M3QWEN.js.map +1 -0
- package/dist/cli/{chunk-MDHVWCJ4.js → chunk-OW7IHE6M.js} +10 -14
- package/dist/cli/chunk-OW7IHE6M.js.map +1 -0
- package/dist/cli/{chunk-D5DKXIP5.js → chunk-WVJL7ZO2.js} +15 -24
- package/dist/cli/chunk-WVJL7ZO2.js.map +1 -0
- package/dist/cli/{code-DLR77NPZ.js → code-R4TXQQEE.js} +5 -4
- package/dist/cli/{code-DLR77NPZ.js.map → code-R4TXQQEE.js.map} +1 -1
- package/dist/cli/{doctor-3TGB2NZN.js → doctor-V5HLCMSQ.js} +3 -2
- package/dist/cli/index.js +9 -8
- package/dist/cli/index.js.map +1 -1
- package/dist/cli/{run-JMEOTQCG.js → run-HK3FP266.js} +3 -2
- package/dist/cli/{run-JMEOTQCG.js.map → run-HK3FP266.js.map} +1 -1
- package/dist/cli/{sessions-MOJAALJI.js → sessions-3XU2GGHX.js} +3 -2
- package/dist/cli/{sessions-MOJAALJI.js.map → sessions-3XU2GGHX.js.map} +1 -1
- package/dist/cli/{version-3MYFE4G6.js → version-5SGI2SEE.js} +3 -2
- package/dist/cli/{version-3MYFE4G6.js.map → version-5SGI2SEE.js.map} +1 -1
- package/dist/index.js.map +1 -1
- package/package.json +1 -1
- package/dist/cli/chunk-D5DKXIP5.js.map +0 -1
- package/dist/cli/chunk-MDHVWCJ4.js.map +0 -1
- package/dist/cli/chunk-Q6YFXW7H.js.map +0 -1
- /package/dist/cli/{chat-Q5ZCVIOO.js.map → chat-ZMSAXE77.js.map} +0 -0
- /package/dist/cli/{doctor-3TGB2NZN.js.map → doctor-V5HLCMSQ.js.map} +0 -0
|
@@ -1,12 +1,12 @@
|
|
|
1
1
|
#!/usr/bin/env node
|
|
2
2
|
import {
|
|
3
3
|
chatCommand
|
|
4
|
-
} from "./chunk-
|
|
4
|
+
} from "./chunk-OW7IHE6M.js";
|
|
5
5
|
import "./chunk-BQNUJJN7.js";
|
|
6
6
|
import "./chunk-RFX7TYVV.js";
|
|
7
7
|
import "./chunk-63KAV5DX.js";
|
|
8
8
|
import "./chunk-CPOV2O73.js";
|
|
9
|
-
import "./chunk-
|
|
9
|
+
import "./chunk-G7M3QWEN.js";
|
|
10
10
|
import "./chunk-I6YIAK6C.js";
|
|
11
11
|
import "./chunk-XJLZ4HKU.js";
|
|
12
12
|
import "./chunk-XHQIK7B6.js";
|
|
@@ -14,7 +14,8 @@ import "./chunk-6TMHAK5D.js";
|
|
|
14
14
|
import "./chunk-SDE5U32Z.js";
|
|
15
15
|
import "./chunk-ZPTSJGX5.js";
|
|
16
16
|
import "./chunk-MHDNZXJJ.js";
|
|
17
|
-
import "./chunk-
|
|
17
|
+
import "./chunk-WVJL7ZO2.js";
|
|
18
|
+
import "./chunk-DAEAAVDF.js";
|
|
18
19
|
import "./chunk-KMWKGPFZ.js";
|
|
19
20
|
import "./chunk-3Q3C4W66.js";
|
|
20
21
|
import "./chunk-4DCHFFEY.js";
|
|
@@ -36,4 +37,4 @@ import "./chunk-ORM6PK57.js";
|
|
|
36
37
|
export {
|
|
37
38
|
chatCommand
|
|
38
39
|
};
|
|
39
|
-
//# sourceMappingURL=chat-
|
|
40
|
+
//# sourceMappingURL=chat-ZMSAXE77.js.map
|
|
@@ -0,0 +1,199 @@
|
|
|
1
|
+
#!/usr/bin/env node
|
|
2
|
+
|
|
3
|
+
// src/tokenizer.ts
|
|
4
|
+
import { existsSync, readFileSync } from "fs";
|
|
5
|
+
import { createRequire } from "module";
|
|
6
|
+
import { dirname, join } from "path";
|
|
7
|
+
import { fileURLToPath } from "url";
|
|
8
|
+
import { gunzipSync } from "zlib";
|
|
9
|
+
function buildByteToChar() {
|
|
10
|
+
const result = new Array(256);
|
|
11
|
+
const bs = [];
|
|
12
|
+
for (let b = 33; b <= 126; b++) bs.push(b);
|
|
13
|
+
for (let b = 161; b <= 172; b++) bs.push(b);
|
|
14
|
+
for (let b = 174; b <= 255; b++) bs.push(b);
|
|
15
|
+
const cs = bs.slice();
|
|
16
|
+
let n = 0;
|
|
17
|
+
for (let b = 0; b < 256; b++) {
|
|
18
|
+
if (!bs.includes(b)) {
|
|
19
|
+
bs.push(b);
|
|
20
|
+
cs.push(256 + n);
|
|
21
|
+
n++;
|
|
22
|
+
}
|
|
23
|
+
}
|
|
24
|
+
for (let i = 0; i < bs.length; i++) {
|
|
25
|
+
result[bs[i]] = String.fromCodePoint(cs[i]);
|
|
26
|
+
}
|
|
27
|
+
return result;
|
|
28
|
+
}
|
|
29
|
+
var cached = null;
|
|
30
|
+
function resolveDataPath() {
|
|
31
|
+
if (process.env.REASONIX_TOKENIZER_PATH) return process.env.REASONIX_TOKENIZER_PATH;
|
|
32
|
+
const candidates = [];
|
|
33
|
+
try {
|
|
34
|
+
const here = dirname(fileURLToPath(import.meta.url));
|
|
35
|
+
candidates.push(join(here, "..", "data", "deepseek-tokenizer.json.gz"));
|
|
36
|
+
candidates.push(join(here, "..", "..", "data", "deepseek-tokenizer.json.gz"));
|
|
37
|
+
} catch {
|
|
38
|
+
}
|
|
39
|
+
try {
|
|
40
|
+
const req = createRequire(import.meta.url);
|
|
41
|
+
candidates.push(
|
|
42
|
+
join(dirname(req.resolve("reasonix/package.json")), "data", "deepseek-tokenizer.json.gz")
|
|
43
|
+
);
|
|
44
|
+
} catch {
|
|
45
|
+
}
|
|
46
|
+
for (const p of candidates) {
|
|
47
|
+
if (existsSync(p)) return p;
|
|
48
|
+
}
|
|
49
|
+
return candidates[0] ?? join(process.cwd(), "data", "deepseek-tokenizer.json.gz");
|
|
50
|
+
}
|
|
51
|
+
function loadTokenizer() {
|
|
52
|
+
if (cached) return cached;
|
|
53
|
+
const buf = readFileSync(resolveDataPath());
|
|
54
|
+
const json = gunzipSync(buf).toString("utf8");
|
|
55
|
+
const data = JSON.parse(json);
|
|
56
|
+
const mergeRank = /* @__PURE__ */ new Map();
|
|
57
|
+
for (let i = 0; i < data.model.merges.length; i++) {
|
|
58
|
+
mergeRank.set(data.model.merges[i], i);
|
|
59
|
+
}
|
|
60
|
+
const splitRegexes = [];
|
|
61
|
+
for (const p of data.pre_tokenizer.pretokenizers) {
|
|
62
|
+
if (p.type === "Split") {
|
|
63
|
+
splitRegexes.push(new RegExp(p.pattern.Regex, "gu"));
|
|
64
|
+
}
|
|
65
|
+
}
|
|
66
|
+
const addedMap = /* @__PURE__ */ new Map();
|
|
67
|
+
const addedContents = [];
|
|
68
|
+
for (const t of data.added_tokens) {
|
|
69
|
+
if (!t.special) {
|
|
70
|
+
addedMap.set(t.content, t.id);
|
|
71
|
+
addedContents.push(t.content);
|
|
72
|
+
}
|
|
73
|
+
}
|
|
74
|
+
addedContents.sort((a, b) => b.length - a.length);
|
|
75
|
+
const addedPattern = addedContents.length ? new RegExp(addedContents.map(escapeRegex).join("|"), "g") : null;
|
|
76
|
+
cached = {
|
|
77
|
+
vocab: data.model.vocab,
|
|
78
|
+
mergeRank,
|
|
79
|
+
splitRegexes,
|
|
80
|
+
byteToChar: buildByteToChar(),
|
|
81
|
+
addedPattern,
|
|
82
|
+
addedMap
|
|
83
|
+
};
|
|
84
|
+
return cached;
|
|
85
|
+
}
|
|
86
|
+
function escapeRegex(s) {
|
|
87
|
+
return s.replace(/[.*+?^${}()|[\]\\]/g, "\\$&");
|
|
88
|
+
}
|
|
89
|
+
function applySplit(chunks, re) {
|
|
90
|
+
const out = [];
|
|
91
|
+
for (const chunk of chunks) {
|
|
92
|
+
if (!chunk) continue;
|
|
93
|
+
re.lastIndex = 0;
|
|
94
|
+
let last = 0;
|
|
95
|
+
for (const m of chunk.matchAll(re)) {
|
|
96
|
+
const idx = m.index ?? 0;
|
|
97
|
+
if (idx > last) out.push(chunk.slice(last, idx));
|
|
98
|
+
if (m[0].length > 0) out.push(m[0]);
|
|
99
|
+
last = idx + m[0].length;
|
|
100
|
+
}
|
|
101
|
+
if (last < chunk.length) out.push(chunk.slice(last));
|
|
102
|
+
}
|
|
103
|
+
return out;
|
|
104
|
+
}
|
|
105
|
+
function byteLevelEncode(s, byteToChar) {
|
|
106
|
+
const bytes = new TextEncoder().encode(s);
|
|
107
|
+
let out = "";
|
|
108
|
+
for (let i = 0; i < bytes.length; i++) out += byteToChar[bytes[i]];
|
|
109
|
+
return out;
|
|
110
|
+
}
|
|
111
|
+
function bpeEncode(piece, mergeRank) {
|
|
112
|
+
if (piece.length <= 1) return piece ? [piece] : [];
|
|
113
|
+
let word = Array.from(piece);
|
|
114
|
+
while (true) {
|
|
115
|
+
let bestIdx = -1;
|
|
116
|
+
let bestRank = Number.POSITIVE_INFINITY;
|
|
117
|
+
for (let i = 0; i < word.length - 1; i++) {
|
|
118
|
+
const pair = `${word[i]} ${word[i + 1]}`;
|
|
119
|
+
const rank = mergeRank.get(pair);
|
|
120
|
+
if (rank !== void 0 && rank < bestRank) {
|
|
121
|
+
bestRank = rank;
|
|
122
|
+
bestIdx = i;
|
|
123
|
+
if (rank === 0) break;
|
|
124
|
+
}
|
|
125
|
+
}
|
|
126
|
+
if (bestIdx < 0) break;
|
|
127
|
+
word = [
|
|
128
|
+
...word.slice(0, bestIdx),
|
|
129
|
+
word[bestIdx] + word[bestIdx + 1],
|
|
130
|
+
...word.slice(bestIdx + 2)
|
|
131
|
+
];
|
|
132
|
+
if (word.length === 1) break;
|
|
133
|
+
}
|
|
134
|
+
return word;
|
|
135
|
+
}
|
|
136
|
+
function encode(text) {
|
|
137
|
+
if (!text) return [];
|
|
138
|
+
const t = loadTokenizer();
|
|
139
|
+
const ids = [];
|
|
140
|
+
const process2 = (segment) => {
|
|
141
|
+
if (!segment) return;
|
|
142
|
+
let chunks = [segment];
|
|
143
|
+
for (const re of t.splitRegexes) chunks = applySplit(chunks, re);
|
|
144
|
+
for (const chunk of chunks) {
|
|
145
|
+
if (!chunk) continue;
|
|
146
|
+
const byteLevel = byteLevelEncode(chunk, t.byteToChar);
|
|
147
|
+
const pieces = bpeEncode(byteLevel, t.mergeRank);
|
|
148
|
+
for (const p of pieces) {
|
|
149
|
+
const id = t.vocab[p];
|
|
150
|
+
if (id !== void 0) ids.push(id);
|
|
151
|
+
}
|
|
152
|
+
}
|
|
153
|
+
};
|
|
154
|
+
if (t.addedPattern) {
|
|
155
|
+
t.addedPattern.lastIndex = 0;
|
|
156
|
+
let last = 0;
|
|
157
|
+
for (const m of text.matchAll(t.addedPattern)) {
|
|
158
|
+
const idx = m.index ?? 0;
|
|
159
|
+
if (idx > last) process2(text.slice(last, idx));
|
|
160
|
+
const id = t.addedMap.get(m[0]);
|
|
161
|
+
if (id !== void 0) ids.push(id);
|
|
162
|
+
last = idx + m[0].length;
|
|
163
|
+
}
|
|
164
|
+
if (last < text.length) process2(text.slice(last));
|
|
165
|
+
} else {
|
|
166
|
+
process2(text);
|
|
167
|
+
}
|
|
168
|
+
return ids;
|
|
169
|
+
}
|
|
170
|
+
function countTokens(text) {
|
|
171
|
+
return encode(text).length;
|
|
172
|
+
}
|
|
173
|
+
function estimateConversationTokens(messages) {
|
|
174
|
+
let total = 0;
|
|
175
|
+
for (const m of messages) {
|
|
176
|
+
if (typeof m.content === "string" && m.content) {
|
|
177
|
+
total += countTokens(m.content);
|
|
178
|
+
}
|
|
179
|
+
if (m.tool_calls && Array.isArray(m.tool_calls) && m.tool_calls.length > 0) {
|
|
180
|
+
total += countTokens(JSON.stringify(m.tool_calls));
|
|
181
|
+
}
|
|
182
|
+
}
|
|
183
|
+
return total;
|
|
184
|
+
}
|
|
185
|
+
function estimateRequestTokens(messages, toolSpecs) {
|
|
186
|
+
let total = estimateConversationTokens(messages);
|
|
187
|
+
if (toolSpecs && toolSpecs.length > 0) {
|
|
188
|
+
total += countTokens(JSON.stringify(toolSpecs));
|
|
189
|
+
}
|
|
190
|
+
return total;
|
|
191
|
+
}
|
|
192
|
+
|
|
193
|
+
export {
|
|
194
|
+
resolveDataPath,
|
|
195
|
+
countTokens,
|
|
196
|
+
estimateConversationTokens,
|
|
197
|
+
estimateRequestTokens
|
|
198
|
+
};
|
|
199
|
+
//# sourceMappingURL=chunk-DAEAAVDF.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"sources":["../../src/tokenizer.ts"],"sourcesContent":["/** Encode-only DeepSeek V3 tokenizer port; ~3% drift vs API (chat-template framing not replayed). */\n\nimport { existsSync, readFileSync } from \"node:fs\";\nimport { createRequire } from \"node:module\";\nimport { dirname, join } from \"node:path\";\nimport { fileURLToPath } from \"node:url\";\nimport { gunzipSync } from \"node:zlib\";\n\ninterface AddedToken {\n id: number;\n content: string;\n special: boolean;\n normalized: boolean;\n}\n\ninterface SplitPretokenizer {\n type: \"Split\";\n pattern: { Regex: string };\n behavior: \"Isolated\" | \"Removed\" | string;\n invert: boolean;\n}\n\ninterface ByteLevelPretokenizer {\n type: \"ByteLevel\";\n add_prefix_space: boolean;\n trim_offsets: boolean;\n use_regex: boolean;\n}\n\ntype Pretokenizer = SplitPretokenizer | ByteLevelPretokenizer;\n\ninterface TokenizerData {\n added_tokens: AddedToken[];\n pre_tokenizer: {\n type: \"Sequence\";\n pretokenizers: Pretokenizer[];\n };\n model: {\n type: \"BPE\";\n vocab: Record<string, number>;\n merges: string[];\n };\n}\n\ninterface LoadedTokenizer {\n vocab: Record<string, number>;\n mergeRank: Map<string, number>;\n splitRegexes: RegExp[];\n byteToChar: string[];\n /** Non-special added tokens only — special tokens in user text tokenize byte-by-byte (HF default). */\n addedPattern: RegExp | null;\n addedMap: Map<string, number>;\n}\n\n/** GPT-2 byte→unicode map; lets byte-level BPE vocab serialize as readable JSON strings. */\nfunction buildByteToChar(): string[] {\n const result: string[] = new Array(256);\n const bs: number[] = [];\n for (let b = 33; b <= 126; b++) bs.push(b);\n for (let b = 161; b <= 172; b++) bs.push(b);\n for (let b = 174; b <= 255; b++) bs.push(b);\n const cs = bs.slice();\n let n = 0;\n for (let b = 0; b < 256; b++) {\n if (!bs.includes(b)) {\n bs.push(b);\n cs.push(256 + n);\n n++;\n }\n }\n for (let i = 0; i < bs.length; i++) {\n result[bs[i]!] = String.fromCodePoint(cs[i]!);\n }\n return result;\n}\n\nlet cached: LoadedTokenizer | null = null;\n\n/** Two ../data candidates needed: dist/index.js AND dist/cli/index.js resolve to different roots. */\nexport function resolveDataPath(): string {\n if (process.env.REASONIX_TOKENIZER_PATH) return process.env.REASONIX_TOKENIZER_PATH;\n const candidates: string[] = [];\n try {\n const here = dirname(fileURLToPath(import.meta.url));\n candidates.push(join(here, \"..\", \"data\", \"deepseek-tokenizer.json.gz\"));\n candidates.push(join(here, \"..\", \"..\", \"data\", \"deepseek-tokenizer.json.gz\"));\n } catch {\n /* import.meta.url unavailable — skip to the package resolution step. */\n }\n try {\n const req = createRequire(import.meta.url);\n candidates.push(\n join(dirname(req.resolve(\"reasonix/package.json\")), \"data\", \"deepseek-tokenizer.json.gz\"),\n );\n } catch {\n /* Not installed as `reasonix/` — the earlier candidates still may hit. */\n }\n for (const p of candidates) {\n if (existsSync(p)) return p;\n }\n // Nothing exists — return the first candidate anyway so readFileSync\n // surfaces a concrete path in the ENOENT message (better than silent miss).\n return candidates[0] ?? join(process.cwd(), \"data\", \"deepseek-tokenizer.json.gz\");\n}\n\nfunction loadTokenizer(): LoadedTokenizer {\n if (cached) return cached;\n const buf = readFileSync(resolveDataPath());\n const json = gunzipSync(buf).toString(\"utf8\");\n const data = JSON.parse(json) as TokenizerData;\n\n const mergeRank = new Map<string, number>();\n for (let i = 0; i < data.model.merges.length; i++) {\n mergeRank.set(data.model.merges[i]!, i);\n }\n\n const splitRegexes: RegExp[] = [];\n for (const p of data.pre_tokenizer.pretokenizers) {\n if (p.type === \"Split\") {\n // All three Split rules use Isolated — matches become their own\n // pre-tokens and so do the in-between stretches. The ByteLevel\n // stage in the Sequence does no extra splitting here\n // (use_regex:false), so our 3 Split regexes are the whole story.\n splitRegexes.push(new RegExp(p.pattern.Regex, \"gu\"));\n }\n }\n\n const addedMap = new Map<string, number>();\n const addedContents: string[] = [];\n for (const t of data.added_tokens) {\n if (!t.special) {\n addedMap.set(t.content, t.id);\n addedContents.push(t.content);\n }\n }\n // Longest-first ensures greedy matching doesn't lose a longer token\n // to a shorter prefix (e.g. `<think>` before `<`).\n addedContents.sort((a, b) => b.length - a.length);\n const addedPattern = addedContents.length\n ? new RegExp(addedContents.map(escapeRegex).join(\"|\"), \"g\")\n : null;\n\n cached = {\n vocab: data.model.vocab,\n mergeRank,\n splitRegexes,\n byteToChar: buildByteToChar(),\n addedPattern,\n addedMap,\n };\n return cached;\n}\n\nfunction escapeRegex(s: string): string {\n return s.replace(/[.*+?^${}()|[\\]\\\\]/g, \"\\\\$&\");\n}\n\nfunction applySplit(chunks: string[], re: RegExp): string[] {\n const out: string[] = [];\n for (const chunk of chunks) {\n if (!chunk) continue;\n // Reset lastIndex — reusing a /g regex across matchAll iterations\n // is safe (matchAll internally advances), but across different\n // input strings we want a clean start.\n re.lastIndex = 0;\n let last = 0;\n for (const m of chunk.matchAll(re)) {\n const idx = m.index ?? 0;\n if (idx > last) out.push(chunk.slice(last, idx));\n if (m[0].length > 0) out.push(m[0]);\n last = idx + m[0].length;\n }\n if (last < chunk.length) out.push(chunk.slice(last));\n }\n return out;\n}\n\n/** UTF-8 bytes of `s`, each mapped to its byte-level visible char. */\nfunction byteLevelEncode(s: string, byteToChar: string[]): string {\n const bytes = new TextEncoder().encode(s);\n let out = \"\";\n for (let i = 0; i < bytes.length; i++) out += byteToChar[bytes[i]!];\n return out;\n}\n\nfunction bpeEncode(piece: string, mergeRank: Map<string, number>): string[] {\n if (piece.length <= 1) return piece ? [piece] : [];\n let word: string[] = Array.from(piece);\n while (true) {\n let bestIdx = -1;\n let bestRank = Number.POSITIVE_INFINITY;\n for (let i = 0; i < word.length - 1; i++) {\n const pair = `${word[i]} ${word[i + 1]}`;\n const rank = mergeRank.get(pair);\n if (rank !== undefined && rank < bestRank) {\n bestRank = rank;\n bestIdx = i;\n if (rank === 0) break; // 0 is already the best possible\n }\n }\n if (bestIdx < 0) break;\n word = [\n ...word.slice(0, bestIdx),\n word[bestIdx]! + word[bestIdx + 1]!,\n ...word.slice(bestIdx + 2),\n ];\n if (word.length === 1) break;\n }\n return word;\n}\n\nexport function encode(text: string): number[] {\n if (!text) return [];\n const t = loadTokenizer();\n const ids: number[] = [];\n\n const process = (segment: string) => {\n if (!segment) return;\n let chunks: string[] = [segment];\n for (const re of t.splitRegexes) chunks = applySplit(chunks, re);\n for (const chunk of chunks) {\n if (!chunk) continue;\n const byteLevel = byteLevelEncode(chunk, t.byteToChar);\n const pieces = bpeEncode(byteLevel, t.mergeRank);\n for (const p of pieces) {\n const id = t.vocab[p];\n // If not in vocab we silently skip: shouldn't happen for\n // byte-level BPE (every single byte has its own vocab entry),\n // but if a future tokenizer update breaks that invariant we'd\n // rather under-count than throw from a UI gauge.\n if (id !== undefined) ids.push(id);\n }\n }\n };\n\n if (t.addedPattern) {\n t.addedPattern.lastIndex = 0;\n let last = 0;\n for (const m of text.matchAll(t.addedPattern)) {\n const idx = m.index ?? 0;\n if (idx > last) process(text.slice(last, idx));\n const id = t.addedMap.get(m[0]);\n if (id !== undefined) ids.push(id);\n last = idx + m[0].length;\n }\n if (last < text.length) process(text.slice(last));\n } else {\n process(text);\n }\n return ids;\n}\n\nexport function countTokens(text: string): number {\n return encode(text).length;\n}\n\n/** Doesn't add chat-template framing overhead; under-counts ~3-6% vs real `prompt_tokens`. */\nexport function estimateConversationTokens(\n messages: Array<{ content?: string | null; tool_calls?: unknown }>,\n): number {\n let total = 0;\n for (const m of messages) {\n if (typeof m.content === \"string\" && m.content) {\n total += countTokens(m.content);\n }\n // Tool-call arguments are serialized as JSON in the prompt by the\n // chat template; their bytes WILL count upstream, so we count\n // them too. Stringify-once is cheap relative to the tokenize.\n if (m.tool_calls && Array.isArray(m.tool_calls) && m.tool_calls.length > 0) {\n total += countTokens(JSON.stringify(m.tool_calls));\n }\n }\n return total;\n}\n\n/** Tool specs ride in a separate request blob; must be counted separately for an accurate preflight. */\nexport function estimateRequestTokens(\n messages: Array<{ content?: string | null; tool_calls?: unknown }>,\n toolSpecs?: ReadonlyArray<unknown> | null,\n): number {\n let total = estimateConversationTokens(messages);\n if (toolSpecs && toolSpecs.length > 0) {\n total += countTokens(JSON.stringify(toolSpecs));\n }\n return total;\n}\n\n/** Exposed for tests — resets the lazy-load singleton. */\nexport function _resetForTests(): void {\n cached = null;\n}\n"],"mappings":";;;AAEA,SAAS,YAAY,oBAAoB;AACzC,SAAS,qBAAqB;AAC9B,SAAS,SAAS,YAAY;AAC9B,SAAS,qBAAqB;AAC9B,SAAS,kBAAkB;AAiD3B,SAAS,kBAA4B;AACnC,QAAM,SAAmB,IAAI,MAAM,GAAG;AACtC,QAAM,KAAe,CAAC;AACtB,WAAS,IAAI,IAAI,KAAK,KAAK,IAAK,IAAG,KAAK,CAAC;AACzC,WAAS,IAAI,KAAK,KAAK,KAAK,IAAK,IAAG,KAAK,CAAC;AAC1C,WAAS,IAAI,KAAK,KAAK,KAAK,IAAK,IAAG,KAAK,CAAC;AAC1C,QAAM,KAAK,GAAG,MAAM;AACpB,MAAI,IAAI;AACR,WAAS,IAAI,GAAG,IAAI,KAAK,KAAK;AAC5B,QAAI,CAAC,GAAG,SAAS,CAAC,GAAG;AACnB,SAAG,KAAK,CAAC;AACT,SAAG,KAAK,MAAM,CAAC;AACf;AAAA,IACF;AAAA,EACF;AACA,WAAS,IAAI,GAAG,IAAI,GAAG,QAAQ,KAAK;AAClC,WAAO,GAAG,CAAC,CAAE,IAAI,OAAO,cAAc,GAAG,CAAC,CAAE;AAAA,EAC9C;AACA,SAAO;AACT;AAEA,IAAI,SAAiC;AAG9B,SAAS,kBAA0B;AACxC,MAAI,QAAQ,IAAI,wBAAyB,QAAO,QAAQ,IAAI;AAC5D,QAAM,aAAuB,CAAC;AAC9B,MAAI;AACF,UAAM,OAAO,QAAQ,cAAc,YAAY,GAAG,CAAC;AACnD,eAAW,KAAK,KAAK,MAAM,MAAM,QAAQ,4BAA4B,CAAC;AACtE,eAAW,KAAK,KAAK,MAAM,MAAM,MAAM,QAAQ,4BAA4B,CAAC;AAAA,EAC9E,QAAQ;AAAA,EAER;AACA,MAAI;AACF,UAAM,MAAM,cAAc,YAAY,GAAG;AACzC,eAAW;AAAA,MACT,KAAK,QAAQ,IAAI,QAAQ,uBAAuB,CAAC,GAAG,QAAQ,4BAA4B;AAAA,IAC1F;AAAA,EACF,QAAQ;AAAA,EAER;AACA,aAAW,KAAK,YAAY;AAC1B,QAAI,WAAW,CAAC,EAAG,QAAO;AAAA,EAC5B;AAGA,SAAO,WAAW,CAAC,KAAK,KAAK,QAAQ,IAAI,GAAG,QAAQ,4BAA4B;AAClF;AAEA,SAAS,gBAAiC;AACxC,MAAI,OAAQ,QAAO;AACnB,QAAM,MAAM,aAAa,gBAAgB,CAAC;AAC1C,QAAM,OAAO,WAAW,GAAG,EAAE,SAAS,MAAM;AAC5C,QAAM,OAAO,KAAK,MAAM,IAAI;AAE5B,QAAM,YAAY,oBAAI,IAAoB;AAC1C,WAAS,IAAI,GAAG,IAAI,KAAK,MAAM,OAAO,QAAQ,KAAK;AACjD,cAAU,IAAI,KAAK,MAAM,OAAO,CAAC,GAAI,CAAC;AAAA,EACxC;AAEA,QAAM,eAAyB,CAAC;AAChC,aAAW,KAAK,KAAK,cAAc,eAAe;AAChD,QAAI,EAAE,SAAS,SAAS;AAKtB,mBAAa,KAAK,IAAI,OAAO,EAAE,QAAQ,OAAO,IAAI,CAAC;AAAA,IACrD;AAAA,EACF;AAEA,QAAM,WAAW,oBAAI,IAAoB;AACzC,QAAM,gBAA0B,CAAC;AACjC,aAAW,KAAK,KAAK,cAAc;AACjC,QAAI,CAAC,EAAE,SAAS;AACd,eAAS,IAAI,EAAE,SAAS,EAAE,EAAE;AAC5B,oBAAc,KAAK,EAAE,OAAO;AAAA,IAC9B;AAAA,EACF;AAGA,gBAAc,KAAK,CAAC,GAAG,MAAM,EAAE,SAAS,EAAE,MAAM;AAChD,QAAM,eAAe,cAAc,SAC/B,IAAI,OAAO,cAAc,IAAI,WAAW,EAAE,KAAK,GAAG,GAAG,GAAG,IACxD;AAEJ,WAAS;AAAA,IACP,OAAO,KAAK,MAAM;AAAA,IAClB;AAAA,IACA;AAAA,IACA,YAAY,gBAAgB;AAAA,IAC5B;AAAA,IACA;AAAA,EACF;AACA,SAAO;AACT;AAEA,SAAS,YAAY,GAAmB;AACtC,SAAO,EAAE,QAAQ,uBAAuB,MAAM;AAChD;AAEA,SAAS,WAAW,QAAkB,IAAsB;AAC1D,QAAM,MAAgB,CAAC;AACvB,aAAW,SAAS,QAAQ;AAC1B,QAAI,CAAC,MAAO;AAIZ,OAAG,YAAY;AACf,QAAI,OAAO;AACX,eAAW,KAAK,MAAM,SAAS,EAAE,GAAG;AAClC,YAAM,MAAM,EAAE,SAAS;AACvB,UAAI,MAAM,KAAM,KAAI,KAAK,MAAM,MAAM,MAAM,GAAG,CAAC;AAC/C,UAAI,EAAE,CAAC,EAAE,SAAS,EAAG,KAAI,KAAK,EAAE,CAAC,CAAC;AAClC,aAAO,MAAM,EAAE,CAAC,EAAE;AAAA,IACpB;AACA,QAAI,OAAO,MAAM,OAAQ,KAAI,KAAK,MAAM,MAAM,IAAI,CAAC;AAAA,EACrD;AACA,SAAO;AACT;AAGA,SAAS,gBAAgB,GAAW,YAA8B;AAChE,QAAM,QAAQ,IAAI,YAAY,EAAE,OAAO,CAAC;AACxC,MAAI,MAAM;AACV,WAAS,IAAI,GAAG,IAAI,MAAM,QAAQ,IAAK,QAAO,WAAW,MAAM,CAAC,CAAE;AAClE,SAAO;AACT;AAEA,SAAS,UAAU,OAAe,WAA0C;AAC1E,MAAI,MAAM,UAAU,EAAG,QAAO,QAAQ,CAAC,KAAK,IAAI,CAAC;AACjD,MAAI,OAAiB,MAAM,KAAK,KAAK;AACrC,SAAO,MAAM;AACX,QAAI,UAAU;AACd,QAAI,WAAW,OAAO;AACtB,aAAS,IAAI,GAAG,IAAI,KAAK,SAAS,GAAG,KAAK;AACxC,YAAM,OAAO,GAAG,KAAK,CAAC,CAAC,IAAI,KAAK,IAAI,CAAC,CAAC;AACtC,YAAM,OAAO,UAAU,IAAI,IAAI;AAC/B,UAAI,SAAS,UAAa,OAAO,UAAU;AACzC,mBAAW;AACX,kBAAU;AACV,YAAI,SAAS,EAAG;AAAA,MAClB;AAAA,IACF;AACA,QAAI,UAAU,EAAG;AACjB,WAAO;AAAA,MACL,GAAG,KAAK,MAAM,GAAG,OAAO;AAAA,MACxB,KAAK,OAAO,IAAK,KAAK,UAAU,CAAC;AAAA,MACjC,GAAG,KAAK,MAAM,UAAU,CAAC;AAAA,IAC3B;AACA,QAAI,KAAK,WAAW,EAAG;AAAA,EACzB;AACA,SAAO;AACT;AAEO,SAAS,OAAO,MAAwB;AAC7C,MAAI,CAAC,KAAM,QAAO,CAAC;AACnB,QAAM,IAAI,cAAc;AACxB,QAAM,MAAgB,CAAC;AAEvB,QAAMA,WAAU,CAAC,YAAoB;AACnC,QAAI,CAAC,QAAS;AACd,QAAI,SAAmB,CAAC,OAAO;AAC/B,eAAW,MAAM,EAAE,aAAc,UAAS,WAAW,QAAQ,EAAE;AAC/D,eAAW,SAAS,QAAQ;AAC1B,UAAI,CAAC,MAAO;AACZ,YAAM,YAAY,gBAAgB,OAAO,EAAE,UAAU;AACrD,YAAM,SAAS,UAAU,WAAW,EAAE,SAAS;AAC/C,iBAAW,KAAK,QAAQ;AACtB,cAAM,KAAK,EAAE,MAAM,CAAC;AAKpB,YAAI,OAAO,OAAW,KAAI,KAAK,EAAE;AAAA,MACnC;AAAA,IACF;AAAA,EACF;AAEA,MAAI,EAAE,cAAc;AAClB,MAAE,aAAa,YAAY;AAC3B,QAAI,OAAO;AACX,eAAW,KAAK,KAAK,SAAS,EAAE,YAAY,GAAG;AAC7C,YAAM,MAAM,EAAE,SAAS;AACvB,UAAI,MAAM,KAAM,CAAAA,SAAQ,KAAK,MAAM,MAAM,GAAG,CAAC;AAC7C,YAAM,KAAK,EAAE,SAAS,IAAI,EAAE,CAAC,CAAC;AAC9B,UAAI,OAAO,OAAW,KAAI,KAAK,EAAE;AACjC,aAAO,MAAM,EAAE,CAAC,EAAE;AAAA,IACpB;AACA,QAAI,OAAO,KAAK,OAAQ,CAAAA,SAAQ,KAAK,MAAM,IAAI,CAAC;AAAA,EAClD,OAAO;AACL,IAAAA,SAAQ,IAAI;AAAA,EACd;AACA,SAAO;AACT;AAEO,SAAS,YAAY,MAAsB;AAChD,SAAO,OAAO,IAAI,EAAE;AACtB;AAGO,SAAS,2BACd,UACQ;AACR,MAAI,QAAQ;AACZ,aAAW,KAAK,UAAU;AACxB,QAAI,OAAO,EAAE,YAAY,YAAY,EAAE,SAAS;AAC9C,eAAS,YAAY,EAAE,OAAO;AAAA,IAChC;AAIA,QAAI,EAAE,cAAc,MAAM,QAAQ,EAAE,UAAU,KAAK,EAAE,WAAW,SAAS,GAAG;AAC1E,eAAS,YAAY,KAAK,UAAU,EAAE,UAAU,CAAC;AAAA,IACnD;AAAA,EACF;AACA,SAAO;AACT;AAGO,SAAS,sBACd,UACA,WACQ;AACR,MAAI,QAAQ,2BAA2B,QAAQ;AAC/C,MAAI,aAAa,UAAU,SAAS,GAAG;AACrC,aAAS,YAAY,KAAK,UAAU,SAAS,CAAC;AAAA,EAChD;AACA,SAAO;AACT;","names":["process"]}
|
|
@@ -3,6 +3,11 @@ import {
|
|
|
3
3
|
MemoryStore,
|
|
4
4
|
sanitizeMemoryName
|
|
5
5
|
} from "./chunk-6TMHAK5D.js";
|
|
6
|
+
import {
|
|
7
|
+
countTokens,
|
|
8
|
+
estimateConversationTokens,
|
|
9
|
+
estimateRequestTokens
|
|
10
|
+
} from "./chunk-DAEAAVDF.js";
|
|
6
11
|
import {
|
|
7
12
|
Usage
|
|
8
13
|
} from "./chunk-KMWKGPFZ.js";
|
|
@@ -43,196 +48,6 @@ import {
|
|
|
43
48
|
SessionStats
|
|
44
49
|
} from "./chunk-ORM6PK57.js";
|
|
45
50
|
|
|
46
|
-
// src/tokenizer.ts
|
|
47
|
-
import { existsSync, readFileSync } from "fs";
|
|
48
|
-
import { createRequire } from "module";
|
|
49
|
-
import { dirname, join } from "path";
|
|
50
|
-
import { fileURLToPath } from "url";
|
|
51
|
-
import { gunzipSync } from "zlib";
|
|
52
|
-
function buildByteToChar() {
|
|
53
|
-
const result = new Array(256);
|
|
54
|
-
const bs = [];
|
|
55
|
-
for (let b = 33; b <= 126; b++) bs.push(b);
|
|
56
|
-
for (let b = 161; b <= 172; b++) bs.push(b);
|
|
57
|
-
for (let b = 174; b <= 255; b++) bs.push(b);
|
|
58
|
-
const cs = bs.slice();
|
|
59
|
-
let n = 0;
|
|
60
|
-
for (let b = 0; b < 256; b++) {
|
|
61
|
-
if (!bs.includes(b)) {
|
|
62
|
-
bs.push(b);
|
|
63
|
-
cs.push(256 + n);
|
|
64
|
-
n++;
|
|
65
|
-
}
|
|
66
|
-
}
|
|
67
|
-
for (let i = 0; i < bs.length; i++) {
|
|
68
|
-
result[bs[i]] = String.fromCodePoint(cs[i]);
|
|
69
|
-
}
|
|
70
|
-
return result;
|
|
71
|
-
}
|
|
72
|
-
var cached = null;
|
|
73
|
-
function resolveDataPath() {
|
|
74
|
-
if (process.env.REASONIX_TOKENIZER_PATH) return process.env.REASONIX_TOKENIZER_PATH;
|
|
75
|
-
const candidates = [];
|
|
76
|
-
try {
|
|
77
|
-
const here = dirname(fileURLToPath(import.meta.url));
|
|
78
|
-
candidates.push(join(here, "..", "data", "deepseek-tokenizer.json.gz"));
|
|
79
|
-
candidates.push(join(here, "..", "..", "data", "deepseek-tokenizer.json.gz"));
|
|
80
|
-
} catch {
|
|
81
|
-
}
|
|
82
|
-
try {
|
|
83
|
-
const req = createRequire(import.meta.url);
|
|
84
|
-
candidates.push(
|
|
85
|
-
join(dirname(req.resolve("reasonix/package.json")), "data", "deepseek-tokenizer.json.gz")
|
|
86
|
-
);
|
|
87
|
-
} catch {
|
|
88
|
-
}
|
|
89
|
-
for (const p of candidates) {
|
|
90
|
-
if (existsSync(p)) return p;
|
|
91
|
-
}
|
|
92
|
-
return candidates[0] ?? join(process.cwd(), "data", "deepseek-tokenizer.json.gz");
|
|
93
|
-
}
|
|
94
|
-
function loadTokenizer() {
|
|
95
|
-
if (cached) return cached;
|
|
96
|
-
const buf = readFileSync(resolveDataPath());
|
|
97
|
-
const json = gunzipSync(buf).toString("utf8");
|
|
98
|
-
const data = JSON.parse(json);
|
|
99
|
-
const mergeRank = /* @__PURE__ */ new Map();
|
|
100
|
-
for (let i = 0; i < data.model.merges.length; i++) {
|
|
101
|
-
mergeRank.set(data.model.merges[i], i);
|
|
102
|
-
}
|
|
103
|
-
const splitRegexes = [];
|
|
104
|
-
for (const p of data.pre_tokenizer.pretokenizers) {
|
|
105
|
-
if (p.type === "Split") {
|
|
106
|
-
splitRegexes.push(new RegExp(p.pattern.Regex, "gu"));
|
|
107
|
-
}
|
|
108
|
-
}
|
|
109
|
-
const addedMap = /* @__PURE__ */ new Map();
|
|
110
|
-
const addedContents = [];
|
|
111
|
-
for (const t2 of data.added_tokens) {
|
|
112
|
-
if (!t2.special) {
|
|
113
|
-
addedMap.set(t2.content, t2.id);
|
|
114
|
-
addedContents.push(t2.content);
|
|
115
|
-
}
|
|
116
|
-
}
|
|
117
|
-
addedContents.sort((a, b) => b.length - a.length);
|
|
118
|
-
const addedPattern = addedContents.length ? new RegExp(addedContents.map(escapeRegex).join("|"), "g") : null;
|
|
119
|
-
cached = {
|
|
120
|
-
vocab: data.model.vocab,
|
|
121
|
-
mergeRank,
|
|
122
|
-
splitRegexes,
|
|
123
|
-
byteToChar: buildByteToChar(),
|
|
124
|
-
addedPattern,
|
|
125
|
-
addedMap
|
|
126
|
-
};
|
|
127
|
-
return cached;
|
|
128
|
-
}
|
|
129
|
-
function escapeRegex(s) {
|
|
130
|
-
return s.replace(/[.*+?^${}()|[\]\\]/g, "\\$&");
|
|
131
|
-
}
|
|
132
|
-
function applySplit(chunks, re) {
|
|
133
|
-
const out = [];
|
|
134
|
-
for (const chunk of chunks) {
|
|
135
|
-
if (!chunk) continue;
|
|
136
|
-
re.lastIndex = 0;
|
|
137
|
-
let last = 0;
|
|
138
|
-
for (const m of chunk.matchAll(re)) {
|
|
139
|
-
const idx = m.index ?? 0;
|
|
140
|
-
if (idx > last) out.push(chunk.slice(last, idx));
|
|
141
|
-
if (m[0].length > 0) out.push(m[0]);
|
|
142
|
-
last = idx + m[0].length;
|
|
143
|
-
}
|
|
144
|
-
if (last < chunk.length) out.push(chunk.slice(last));
|
|
145
|
-
}
|
|
146
|
-
return out;
|
|
147
|
-
}
|
|
148
|
-
function byteLevelEncode(s, byteToChar) {
|
|
149
|
-
const bytes = new TextEncoder().encode(s);
|
|
150
|
-
let out = "";
|
|
151
|
-
for (let i = 0; i < bytes.length; i++) out += byteToChar[bytes[i]];
|
|
152
|
-
return out;
|
|
153
|
-
}
|
|
154
|
-
function bpeEncode(piece, mergeRank) {
|
|
155
|
-
if (piece.length <= 1) return piece ? [piece] : [];
|
|
156
|
-
let word = Array.from(piece);
|
|
157
|
-
while (true) {
|
|
158
|
-
let bestIdx = -1;
|
|
159
|
-
let bestRank = Number.POSITIVE_INFINITY;
|
|
160
|
-
for (let i = 0; i < word.length - 1; i++) {
|
|
161
|
-
const pair = `${word[i]} ${word[i + 1]}`;
|
|
162
|
-
const rank = mergeRank.get(pair);
|
|
163
|
-
if (rank !== void 0 && rank < bestRank) {
|
|
164
|
-
bestRank = rank;
|
|
165
|
-
bestIdx = i;
|
|
166
|
-
if (rank === 0) break;
|
|
167
|
-
}
|
|
168
|
-
}
|
|
169
|
-
if (bestIdx < 0) break;
|
|
170
|
-
word = [
|
|
171
|
-
...word.slice(0, bestIdx),
|
|
172
|
-
word[bestIdx] + word[bestIdx + 1],
|
|
173
|
-
...word.slice(bestIdx + 2)
|
|
174
|
-
];
|
|
175
|
-
if (word.length === 1) break;
|
|
176
|
-
}
|
|
177
|
-
return word;
|
|
178
|
-
}
|
|
179
|
-
function encode(text) {
|
|
180
|
-
if (!text) return [];
|
|
181
|
-
const t2 = loadTokenizer();
|
|
182
|
-
const ids = [];
|
|
183
|
-
const process2 = (segment) => {
|
|
184
|
-
if (!segment) return;
|
|
185
|
-
let chunks = [segment];
|
|
186
|
-
for (const re of t2.splitRegexes) chunks = applySplit(chunks, re);
|
|
187
|
-
for (const chunk of chunks) {
|
|
188
|
-
if (!chunk) continue;
|
|
189
|
-
const byteLevel = byteLevelEncode(chunk, t2.byteToChar);
|
|
190
|
-
const pieces = bpeEncode(byteLevel, t2.mergeRank);
|
|
191
|
-
for (const p of pieces) {
|
|
192
|
-
const id = t2.vocab[p];
|
|
193
|
-
if (id !== void 0) ids.push(id);
|
|
194
|
-
}
|
|
195
|
-
}
|
|
196
|
-
};
|
|
197
|
-
if (t2.addedPattern) {
|
|
198
|
-
t2.addedPattern.lastIndex = 0;
|
|
199
|
-
let last = 0;
|
|
200
|
-
for (const m of text.matchAll(t2.addedPattern)) {
|
|
201
|
-
const idx = m.index ?? 0;
|
|
202
|
-
if (idx > last) process2(text.slice(last, idx));
|
|
203
|
-
const id = t2.addedMap.get(m[0]);
|
|
204
|
-
if (id !== void 0) ids.push(id);
|
|
205
|
-
last = idx + m[0].length;
|
|
206
|
-
}
|
|
207
|
-
if (last < text.length) process2(text.slice(last));
|
|
208
|
-
} else {
|
|
209
|
-
process2(text);
|
|
210
|
-
}
|
|
211
|
-
return ids;
|
|
212
|
-
}
|
|
213
|
-
function countTokens(text) {
|
|
214
|
-
return encode(text).length;
|
|
215
|
-
}
|
|
216
|
-
function estimateConversationTokens(messages) {
|
|
217
|
-
let total = 0;
|
|
218
|
-
for (const m of messages) {
|
|
219
|
-
if (typeof m.content === "string" && m.content) {
|
|
220
|
-
total += countTokens(m.content);
|
|
221
|
-
}
|
|
222
|
-
if (m.tool_calls && Array.isArray(m.tool_calls) && m.tool_calls.length > 0) {
|
|
223
|
-
total += countTokens(JSON.stringify(m.tool_calls));
|
|
224
|
-
}
|
|
225
|
-
}
|
|
226
|
-
return total;
|
|
227
|
-
}
|
|
228
|
-
function estimateRequestTokens(messages, toolSpecs) {
|
|
229
|
-
let total = estimateConversationTokens(messages);
|
|
230
|
-
if (toolSpecs && toolSpecs.length > 0) {
|
|
231
|
-
total += countTokens(JSON.stringify(toolSpecs));
|
|
232
|
-
}
|
|
233
|
-
return total;
|
|
234
|
-
}
|
|
235
|
-
|
|
236
51
|
// src/mcp/latency.ts
|
|
237
52
|
var SAMPLE_SIZE = 5;
|
|
238
53
|
var DEFAULT_THRESHOLD_MS = 4e3;
|
|
@@ -4021,9 +3836,9 @@ ${i + 1}. ${r.title}`);
|
|
|
4021
3836
|
}
|
|
4022
3837
|
|
|
4023
3838
|
// src/at-mentions.ts
|
|
4024
|
-
import { existsSync
|
|
3839
|
+
import { existsSync, readFileSync, readdirSync, statSync } from "fs";
|
|
4025
3840
|
import { readdir, stat } from "fs/promises";
|
|
4026
|
-
import { isAbsolute as isAbsolute2, join as
|
|
3841
|
+
import { isAbsolute as isAbsolute2, join as join4, relative as relative5, resolve as resolve2 } from "path";
|
|
4027
3842
|
var DEFAULT_AT_MENTION_MAX_BYTES = 64 * 1024;
|
|
4028
3843
|
var DEFAULT_AT_DIR_MAX_ENTRIES = 200;
|
|
4029
3844
|
var DEFAULT_PICKER_IGNORE_DIRS = [
|
|
@@ -4068,7 +3883,7 @@ function listFilesWithStatsSync(root, opts = {}) {
|
|
|
4068
3883
|
for (const ent of entries) {
|
|
4069
3884
|
if (out.length >= maxResults) return;
|
|
4070
3885
|
const relPath = dirRel ? `${dirRel}/${ent.name}` : ent.name;
|
|
4071
|
-
const absPath =
|
|
3886
|
+
const absPath = join4(dirAbs, ent.name);
|
|
4072
3887
|
if (ent.isDirectory()) {
|
|
4073
3888
|
if (ent.name.startsWith(".") || ignoreDirs.has(ent.name)) continue;
|
|
4074
3889
|
if (ignoredByLayers(effectiveLayers, absPath, true)) continue;
|
|
@@ -4121,7 +3936,7 @@ async function listFilesWithStatsAsync(root, opts = {}) {
|
|
|
4121
3936
|
for (const ent of entries) {
|
|
4122
3937
|
if (out.length >= maxResults) break;
|
|
4123
3938
|
const relPath = dirRel ? `${dirRel}/${ent.name}` : ent.name;
|
|
4124
|
-
const absPath =
|
|
3939
|
+
const absPath = join4(dirAbs, ent.name);
|
|
4125
3940
|
if (ent.isDirectory()) {
|
|
4126
3941
|
if (ent.name.startsWith(".") || ignoreDirs.has(ent.name)) continue;
|
|
4127
3942
|
if (ignoredByLayers(effectiveLayers, absPath, true)) continue;
|
|
@@ -4146,12 +3961,12 @@ async function statBatch(ents, dirAbs, dirRel, out, maxResults, layers) {
|
|
|
4146
3961
|
const accepted = [];
|
|
4147
3962
|
for (const e of ents) {
|
|
4148
3963
|
if (out.length + accepted.length >= maxResults) break;
|
|
4149
|
-
if (ignoredByLayers(layers,
|
|
3964
|
+
if (ignoredByLayers(layers, join4(dirAbs, e.name), false)) continue;
|
|
4150
3965
|
accepted.push(e);
|
|
4151
3966
|
}
|
|
4152
3967
|
const stats = await Promise.all(
|
|
4153
3968
|
accepted.map(
|
|
4154
|
-
(e) => stat(
|
|
3969
|
+
(e) => stat(join4(dirAbs, e.name)).then((s) => ({ mtimeMs: s.mtimeMs, isFile: s.isFile() })).catch(() => null)
|
|
4155
3970
|
)
|
|
4156
3971
|
);
|
|
4157
3972
|
for (let i = 0; i < accepted.length; i++) {
|
|
@@ -4342,7 +4157,7 @@ function readSafe(root, rawPath, fs5) {
|
|
|
4342
4157
|
}
|
|
4343
4158
|
}
|
|
4344
4159
|
var defaultFs = {
|
|
4345
|
-
exists: (p) =>
|
|
4160
|
+
exists: (p) => existsSync(p),
|
|
4346
4161
|
isFile: (p) => {
|
|
4347
4162
|
try {
|
|
4348
4163
|
return statSync(p).isFile();
|
|
@@ -4375,7 +4190,7 @@ var defaultFs = {
|
|
|
4375
4190
|
return 0;
|
|
4376
4191
|
}
|
|
4377
4192
|
},
|
|
4378
|
-
read: (p) =>
|
|
4193
|
+
read: (p) => readFileSync(p, "utf8")
|
|
4379
4194
|
};
|
|
4380
4195
|
var AT_URL_PATTERN = /(?<=^|\s)@(https?:\/\/\S+)/g;
|
|
4381
4196
|
var DEFAULT_AT_URL_MAX_CHARS = 32e3;
|
|
@@ -4393,10 +4208,10 @@ async function expandAtUrls(text, opts = {}) {
|
|
|
4393
4208
|
const url = stripUrlTail(rawUrl);
|
|
4394
4209
|
if (!url) continue;
|
|
4395
4210
|
if (seen.has(url)) continue;
|
|
4396
|
-
const
|
|
4397
|
-
if (
|
|
4398
|
-
seen.set(url,
|
|
4399
|
-
if (
|
|
4211
|
+
const cached = opts.cache?.get(url);
|
|
4212
|
+
if (cached) {
|
|
4213
|
+
seen.set(url, cached);
|
|
4214
|
+
if (cached.body) bodies.set(url, cached.body);
|
|
4400
4215
|
order.push(url);
|
|
4401
4216
|
continue;
|
|
4402
4217
|
}
|
|
@@ -4770,18 +4585,18 @@ function forkRegistryWithAllowList(parent, allow, alsoExclude) {
|
|
|
4770
4585
|
// src/code/edit-blocks.ts
|
|
4771
4586
|
import {
|
|
4772
4587
|
closeSync,
|
|
4773
|
-
existsSync as
|
|
4588
|
+
existsSync as existsSync2,
|
|
4774
4589
|
fstatSync,
|
|
4775
4590
|
ftruncateSync,
|
|
4776
4591
|
mkdirSync,
|
|
4777
4592
|
openSync,
|
|
4778
|
-
readFileSync as
|
|
4593
|
+
readFileSync as readFileSync2,
|
|
4779
4594
|
readSync,
|
|
4780
4595
|
unlinkSync,
|
|
4781
4596
|
writeFileSync,
|
|
4782
4597
|
writeSync
|
|
4783
4598
|
} from "fs";
|
|
4784
|
-
import { dirname as
|
|
4599
|
+
import { dirname as dirname2, resolve as resolve3 } from "path";
|
|
4785
4600
|
var BLOCK_RE = /^(\S[^\n]*)\n<{7} SEARCH\n([\s\S]*?)\n?={7}\n([\s\S]*?)\n?>{7} REPLACE/gm;
|
|
4786
4601
|
function parseEditBlocks(text) {
|
|
4787
4602
|
const out = [];
|
|
@@ -4811,7 +4626,7 @@ function applyEditBlock(block, rootDir) {
|
|
|
4811
4626
|
const searchEmpty = block.search.length === 0;
|
|
4812
4627
|
if (searchEmpty) {
|
|
4813
4628
|
try {
|
|
4814
|
-
mkdirSync(
|
|
4629
|
+
mkdirSync(dirname2(absTarget), { recursive: true });
|
|
4815
4630
|
const fd = openSync(absTarget, "wx");
|
|
4816
4631
|
try {
|
|
4817
4632
|
writeSync(fd, block.replace);
|
|
@@ -4889,9 +4704,9 @@ function applyEditBlocks(blocks, rootDir) {
|
|
|
4889
4704
|
function toWholeFileEditBlock(path, content, rootDir) {
|
|
4890
4705
|
const abs = resolve3(rootDir, path);
|
|
4891
4706
|
let search = "";
|
|
4892
|
-
if (
|
|
4707
|
+
if (existsSync2(abs)) {
|
|
4893
4708
|
try {
|
|
4894
|
-
search =
|
|
4709
|
+
search = readFileSync2(abs, "utf8");
|
|
4895
4710
|
} catch {
|
|
4896
4711
|
search = "";
|
|
4897
4712
|
}
|
|
@@ -4906,12 +4721,12 @@ function snapshotBeforeEdits(blocks, rootDir) {
|
|
|
4906
4721
|
if (seen.has(b.path)) continue;
|
|
4907
4722
|
seen.add(b.path);
|
|
4908
4723
|
const abs = resolve3(absRoot, b.path);
|
|
4909
|
-
if (!
|
|
4724
|
+
if (!existsSync2(abs)) {
|
|
4910
4725
|
snapshots.push({ path: b.path, prevContent: null });
|
|
4911
4726
|
continue;
|
|
4912
4727
|
}
|
|
4913
4728
|
try {
|
|
4914
|
-
snapshots.push({ path: b.path, prevContent:
|
|
4729
|
+
snapshots.push({ path: b.path, prevContent: readFileSync2(abs, "utf8") });
|
|
4915
4730
|
} catch {
|
|
4916
4731
|
snapshots.push({ path: b.path, prevContent: null });
|
|
4917
4732
|
}
|
|
@@ -4931,7 +4746,7 @@ function restoreSnapshots(snapshots, rootDir) {
|
|
|
4931
4746
|
}
|
|
4932
4747
|
try {
|
|
4933
4748
|
if (snap.prevContent === null) {
|
|
4934
|
-
if (
|
|
4749
|
+
if (existsSync2(abs)) unlinkSync(abs);
|
|
4935
4750
|
return {
|
|
4936
4751
|
path: snap.path,
|
|
4937
4752
|
status: "applied",
|
|
@@ -4957,7 +4772,6 @@ function lineEndingOf(text) {
|
|
|
4957
4772
|
}
|
|
4958
4773
|
|
|
4959
4774
|
export {
|
|
4960
|
-
countTokens,
|
|
4961
4775
|
ToolRegistry,
|
|
4962
4776
|
registerSingleMcpTool,
|
|
4963
4777
|
bridgeMcpTools,
|
|
@@ -4983,4 +4797,4 @@ export {
|
|
|
4983
4797
|
snapshotBeforeEdits,
|
|
4984
4798
|
restoreSnapshots
|
|
4985
4799
|
};
|
|
4986
|
-
//# sourceMappingURL=chunk-
|
|
4800
|
+
//# sourceMappingURL=chunk-G7M3QWEN.js.map
|