reasonix 0.43.0 → 0.44.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (169) hide show
  1. package/README.md +49 -11
  2. package/README.zh-CN.md +35 -7
  3. package/dashboard/app.css +225 -4
  4. package/dashboard/dist/app.js +6441 -6080
  5. package/dashboard/dist/app.js.map +1 -1
  6. package/data/deepseek-tokenizer.json.gz +0 -0
  7. package/dist/cli/{acp-DAGPCVFZ.js → acp-TYZ2CTDL.js} +28 -30
  8. package/dist/cli/acp-TYZ2CTDL.js.map +1 -0
  9. package/dist/cli/chat-TH7VNNCJ.js +51 -0
  10. package/dist/cli/chunk-2425HK6U.js +0 -0
  11. package/dist/cli/chunk-25T6CVUP.js +0 -0
  12. package/dist/cli/chunk-2UQP6H6T.js +0 -0
  13. package/dist/cli/{chunk-3Z6IBU3D.js → chunk-2V6EAEUW.js} +95 -31
  14. package/dist/cli/chunk-2V6EAEUW.js.map +1 -0
  15. package/dist/cli/{chunk-XCGGEJTI.js → chunk-4CTDEJUF.js} +2 -2
  16. package/dist/cli/chunk-4QUNBQQ2.js +0 -0
  17. package/dist/cli/{chunk-74EX7SUH.js → chunk-5QCB62C4.js} +33 -7
  18. package/dist/cli/{chunk-74EX7SUH.js.map → chunk-5QCB62C4.js.map} +1 -1
  19. package/dist/cli/chunk-6OWJV3YW.js +390 -0
  20. package/dist/cli/chunk-6OWJV3YW.js.map +1 -0
  21. package/dist/cli/chunk-6PBZN4VI.js +0 -0
  22. package/dist/cli/{chunk-7O5ALB4C.js → chunk-7CIGMZT3.js} +2 -2
  23. package/dist/cli/{chunk-H6PS7IUE.js → chunk-7UCMM425.js} +7 -3
  24. package/dist/cli/chunk-7UCMM425.js.map +1 -0
  25. package/dist/cli/{chunk-TJX6BFZZ.js → chunk-AB2RED3C.js} +3 -3
  26. package/dist/cli/{chunk-XPDVG52A.js → chunk-AVFXO2EZ.js} +361 -13
  27. package/dist/cli/chunk-AVFXO2EZ.js.map +1 -0
  28. package/dist/cli/{chunk-FHOGSSCH.js → chunk-C53JQES5.js} +3 -3
  29. package/dist/cli/{chunk-RE4RAVFF.js → chunk-CGDR2ELH.js} +92 -30
  30. package/dist/cli/chunk-CGDR2ELH.js.map +1 -0
  31. package/dist/cli/{chunk-OSZC7C6F.js → chunk-CWZKQ5FE.js} +7 -4
  32. package/dist/cli/chunk-CWZKQ5FE.js.map +1 -0
  33. package/dist/cli/{devtools-YECO25QO.js → chunk-FEZK652I.js} +10 -85
  34. package/dist/cli/chunk-FEZK652I.js.map +1 -0
  35. package/dist/cli/{chunk-45U62RI3.js → chunk-HNXDZGC6.js} +104 -2
  36. package/dist/cli/chunk-HNXDZGC6.js.map +1 -0
  37. package/dist/cli/chunk-J5XJHLWM.js +0 -0
  38. package/dist/cli/chunk-JMBMLOBP.js +0 -0
  39. package/dist/cli/{chunk-5JJRUIPA.js → chunk-JNAQYELD.js} +16 -8
  40. package/dist/cli/{chunk-5JJRUIPA.js.map → chunk-JNAQYELD.js.map} +1 -1
  41. package/dist/cli/{chunk-YFGF5NKA.js → chunk-KGBG6M2X.js} +19 -15
  42. package/dist/cli/chunk-KGBG6M2X.js.map +1 -0
  43. package/dist/cli/{chunk-3BXRZFWS.js → chunk-KLQTAZIY.js} +12 -4
  44. package/dist/cli/chunk-KLQTAZIY.js.map +1 -0
  45. package/dist/cli/{chunk-VK5HG73G.js → chunk-KM465GST.js} +9 -9
  46. package/dist/cli/{chunk-DOYHN4KB.js → chunk-LIR2HBQH.js} +2 -2
  47. package/dist/cli/{chunk-YYQAUTTN.js → chunk-MJ6W5UN3.js} +2 -2
  48. package/dist/cli/{chunk-6PZ3CXBP.js → chunk-MRHHQJAQ.js} +5 -4
  49. package/dist/cli/chunk-MRHHQJAQ.js.map +1 -0
  50. package/dist/cli/{chunk-PQXPXJBJ.js → chunk-NVURFF27.js} +16 -5
  51. package/dist/cli/chunk-NVURFF27.js.map +1 -0
  52. package/dist/cli/{chunk-2R4QCDOZ.js → chunk-OPFUUYHL.js} +540 -287
  53. package/dist/cli/chunk-OPFUUYHL.js.map +1 -0
  54. package/dist/cli/chunk-PLHAZOLZ.js +0 -0
  55. package/dist/cli/{chunk-HFEAY5DT.js → chunk-R3CTO2HM.js} +2 -2
  56. package/dist/cli/{chunk-O52OLQL3.js → chunk-RDRC3XDT.js} +136 -38
  57. package/dist/cli/chunk-RDRC3XDT.js.map +1 -0
  58. package/dist/cli/chunk-S4XVGLRW.js +0 -0
  59. package/dist/cli/chunk-SZ5XES2N.js +0 -0
  60. package/dist/cli/{chunk-2K65GZBT.js → chunk-TEUDEGX2.js} +64 -19
  61. package/dist/cli/chunk-TEUDEGX2.js.map +1 -0
  62. package/dist/cli/{chunk-2Z35JOA4.js → chunk-TKVXTQ3T.js} +4 -4
  63. package/dist/cli/{chunk-2Z35JOA4.js.map → chunk-TKVXTQ3T.js.map} +1 -1
  64. package/dist/cli/chunk-TUK7OWJA.js +0 -0
  65. package/dist/cli/{chunk-32TIKD5U.js → chunk-TXJMRPIL.js} +3 -3
  66. package/dist/cli/{chunk-2KDUS647.js → chunk-V26WPN3J.js} +7 -4
  67. package/dist/cli/chunk-V26WPN3J.js.map +1 -0
  68. package/dist/cli/{chunk-F3PXYSNN.js → chunk-WK3UFQY3.js} +2 -2
  69. package/dist/cli/{chunk-6G3CUUFG.js → chunk-X53B3JIX.js} +3 -3
  70. package/dist/cli/{chunk-6G3CUUFG.js.map → chunk-X53B3JIX.js.map} +1 -1
  71. package/dist/cli/chunk-XJXDHAES.js +0 -0
  72. package/dist/cli/{chunk-6AK4EY3D.js → chunk-XSU4QVFW.js} +1 -81
  73. package/dist/cli/chunk-XSU4QVFW.js.map +1 -0
  74. package/dist/cli/chunk-XXC2BYTV.js +0 -0
  75. package/dist/cli/{chunk-P7EKE5ZQ.js → chunk-Z4S7EYXG.js} +4482 -1310
  76. package/dist/cli/chunk-Z4S7EYXG.js.map +1 -0
  77. package/dist/cli/chunk-ZZM6QJ4W.js +0 -0
  78. package/dist/cli/{chunk-YQ6NTIIE.js → chunk-ZZYBBX5N.js} +13 -5
  79. package/dist/cli/chunk-ZZYBBX5N.js.map +1 -0
  80. package/dist/cli/{code-SMKEW6CD.js → code-PSVJ3KEN.js} +48 -36
  81. package/dist/cli/code-PSVJ3KEN.js.map +1 -0
  82. package/dist/cli/{commands-FVVB5FZF.js → commands-OCU42XG4.js} +4 -4
  83. package/dist/cli/{commit-HE4VSPZ7.js → commit-XCQIQCYG.js} +3 -3
  84. package/dist/cli/{desktop-Q7NDXCON.js → desktop-KWGR4BNE.js} +210 -69
  85. package/dist/cli/desktop-KWGR4BNE.js.map +1 -0
  86. package/dist/cli/devtools-HW3WDT3Q.js +91 -0
  87. package/dist/cli/devtools-HW3WDT3Q.js.map +1 -0
  88. package/dist/cli/{diff-435UTPC5.js → diff-NHANTNC3.js} +9 -9
  89. package/dist/cli/{doctor-OT7KH75K.js → doctor-CC5CLOGG.js} +10 -10
  90. package/dist/cli/events-XEFAD5VX.js +0 -0
  91. package/dist/cli/index.js +132 -94
  92. package/dist/cli/index.js.map +1 -1
  93. package/dist/cli/{mcp-WUL2WO75.js → mcp-MPVGBBJF.js} +2 -2
  94. package/dist/cli/{mcp-browse-RR7R4XET.js → mcp-browse-4XOTC3FJ.js} +3 -3
  95. package/dist/cli/{mcp-inspect-REGLYBWT.js → mcp-inspect-CEMGKKAH.js} +14 -9
  96. package/dist/cli/mcp-inspect-CEMGKKAH.js.map +1 -0
  97. package/dist/cli/{prompt-UW6EFLVR.js → prompt-2D7ID24X.js} +4 -4
  98. package/dist/cli/prune-sessions-3RWUBYRS.js +0 -0
  99. package/dist/cli/{replay-YOURXV4C.js → replay-SR44E6RS.js} +10 -10
  100. package/dist/cli/{run-Q6BUXV66.js → run-MDGL27WL.js} +35 -36
  101. package/dist/cli/run-MDGL27WL.js.map +1 -0
  102. package/dist/cli/{server-XGDBRWMB.js → server-27ARQXIZ.js} +67 -24
  103. package/dist/cli/server-27ARQXIZ.js.map +1 -0
  104. package/dist/cli/{sessions-FH7QVYSY.js → sessions-CKQXCYGP.js} +18 -18
  105. package/dist/cli/sessions-CKQXCYGP.js.map +1 -0
  106. package/dist/cli/{setup-VDS6SVEP.js → setup-TPAGSVXO.js} +6 -6
  107. package/dist/cli/{stats-MQVI2XQH.js → stats-DPUBZNVX.js} +6 -4
  108. package/dist/cli/update-6ITLPRDV.js +0 -0
  109. package/dist/cli/{version-DAHGZY5N.js → version-2X3BHVVK.js} +15 -15
  110. package/dist/index.d.ts +181 -53
  111. package/dist/index.js +1322 -533
  112. package/dist/index.js.map +1 -1
  113. package/package.json +21 -8
  114. package/dist/cli/.-3G6VX5S7.js +0 -327
  115. package/dist/cli/.-6YRPB2C7.js +0 -329
  116. package/dist/cli/.-EYSVINK3.js +0 -317
  117. package/dist/cli/acp-DAGPCVFZ.js.map +0 -1
  118. package/dist/cli/chat-7ES4IBNH.js +0 -50
  119. package/dist/cli/chunk-2K65GZBT.js.map +0 -1
  120. package/dist/cli/chunk-2KDUS647.js.map +0 -1
  121. package/dist/cli/chunk-2R4QCDOZ.js.map +0 -1
  122. package/dist/cli/chunk-3BXRZFWS.js.map +0 -1
  123. package/dist/cli/chunk-3Z6IBU3D.js.map +0 -1
  124. package/dist/cli/chunk-45U62RI3.js.map +0 -1
  125. package/dist/cli/chunk-6AK4EY3D.js.map +0 -1
  126. package/dist/cli/chunk-6PZ3CXBP.js.map +0 -1
  127. package/dist/cli/chunk-H6PS7IUE.js.map +0 -1
  128. package/dist/cli/chunk-O52OLQL3.js.map +0 -1
  129. package/dist/cli/chunk-OSZC7C6F.js.map +0 -1
  130. package/dist/cli/chunk-P7EKE5ZQ.js.map +0 -1
  131. package/dist/cli/chunk-PQXPXJBJ.js.map +0 -1
  132. package/dist/cli/chunk-PV55UMTO.js +0 -200
  133. package/dist/cli/chunk-PV55UMTO.js.map +0 -1
  134. package/dist/cli/chunk-RE4RAVFF.js.map +0 -1
  135. package/dist/cli/chunk-XPDVG52A.js.map +0 -1
  136. package/dist/cli/chunk-YFGF5NKA.js.map +0 -1
  137. package/dist/cli/chunk-YQ6NTIIE.js.map +0 -1
  138. package/dist/cli/code-SMKEW6CD.js.map +0 -1
  139. package/dist/cli/desktop-Q7NDXCON.js.map +0 -1
  140. package/dist/cli/devtools-YECO25QO.js.map +0 -1
  141. package/dist/cli/doctor-OT7KH75K.js.map +0 -1
  142. package/dist/cli/mcp-inspect-REGLYBWT.js.map +0 -1
  143. package/dist/cli/prompt-UW6EFLVR.js.map +0 -1
  144. package/dist/cli/run-Q6BUXV66.js.map +0 -1
  145. package/dist/cli/server-XGDBRWMB.js.map +0 -1
  146. package/dist/cli/sessions-FH7QVYSY.js.map +0 -1
  147. package/dist/cli/stats-MQVI2XQH.js.map +0 -1
  148. /package/dist/cli/{.-3G6VX5S7.js.map → chat-TH7VNNCJ.js.map} +0 -0
  149. /package/dist/cli/{chunk-XCGGEJTI.js.map → chunk-4CTDEJUF.js.map} +0 -0
  150. /package/dist/cli/{chunk-7O5ALB4C.js.map → chunk-7CIGMZT3.js.map} +0 -0
  151. /package/dist/cli/{chunk-TJX6BFZZ.js.map → chunk-AB2RED3C.js.map} +0 -0
  152. /package/dist/cli/{chunk-FHOGSSCH.js.map → chunk-C53JQES5.js.map} +0 -0
  153. /package/dist/cli/{chunk-VK5HG73G.js.map → chunk-KM465GST.js.map} +0 -0
  154. /package/dist/cli/{chunk-DOYHN4KB.js.map → chunk-LIR2HBQH.js.map} +0 -0
  155. /package/dist/cli/{chunk-YYQAUTTN.js.map → chunk-MJ6W5UN3.js.map} +0 -0
  156. /package/dist/cli/{chunk-HFEAY5DT.js.map → chunk-R3CTO2HM.js.map} +0 -0
  157. /package/dist/cli/{chunk-32TIKD5U.js.map → chunk-TXJMRPIL.js.map} +0 -0
  158. /package/dist/cli/{chunk-F3PXYSNN.js.map → chunk-WK3UFQY3.js.map} +0 -0
  159. /package/dist/cli/{commands-FVVB5FZF.js.map → commands-OCU42XG4.js.map} +0 -0
  160. /package/dist/cli/{commit-HE4VSPZ7.js.map → commit-XCQIQCYG.js.map} +0 -0
  161. /package/dist/cli/{diff-435UTPC5.js.map → diff-NHANTNC3.js.map} +0 -0
  162. /package/dist/cli/{.-6YRPB2C7.js.map → doctor-CC5CLOGG.js.map} +0 -0
  163. /package/dist/cli/{mcp-WUL2WO75.js.map → mcp-MPVGBBJF.js.map} +0 -0
  164. /package/dist/cli/{mcp-browse-RR7R4XET.js.map → mcp-browse-4XOTC3FJ.js.map} +0 -0
  165. /package/dist/cli/{.-EYSVINK3.js.map → prompt-2D7ID24X.js.map} +0 -0
  166. /package/dist/cli/{replay-YOURXV4C.js.map → replay-SR44E6RS.js.map} +0 -0
  167. /package/dist/cli/{setup-VDS6SVEP.js.map → setup-TPAGSVXO.js.map} +0 -0
  168. /package/dist/cli/{chat-7ES4IBNH.js.map → stats-DPUBZNVX.js.map} +0 -0
  169. /package/dist/cli/{version-DAHGZY5N.js.map → version-2X3BHVVK.js.map} +0 -0
@@ -1 +0,0 @@
1
- {"version":3,"sources":["../../src/mcp/preflight.ts","../../src/mcp/transport-from-spec.ts"],"sourcesContent":["import { type Stats, statSync } from \"node:fs\";\nimport type { StdioMcpSpec } from \"./spec.js\";\n\nconst FILESYSTEM_PKG = \"@modelcontextprotocol/server-filesystem\";\n\nexport function preflightStdioSpec(spec: StdioMcpSpec): void {\n const pkgIndex = spec.args.indexOf(FILESYSTEM_PKG);\n if (pkgIndex < 0) return;\n const positional = spec.args.slice(pkgIndex + 1).filter((a) => !a.startsWith(\"-\"));\n for (const dir of positional) {\n let stat: Stats;\n try {\n stat = statSync(dir);\n } catch {\n throw new Error(\n `MCP filesystem sandbox '${dir}' does not exist — create it with: mkdir -p '${dir}'`,\n );\n }\n if (!stat.isDirectory()) {\n throw new Error(`MCP filesystem sandbox '${dir}' exists but is not a directory`);\n }\n }\n}\n","import type { McpSpec } from \"./spec.js\";\nimport { SseTransport } from \"./sse.js\";\nimport { type McpTransport, StdioTransport } from \"./stdio.js\";\nimport { StreamableHttpTransport } from \"./streamable-http.js\";\n\nexport interface BuildTransportOptions {\n /** Stdio-only env overlay — merged over process.env. SSE/Streamable-HTTP ignore it. */\n env?: Record<string, string>;\n}\n\nexport function buildTransportFromSpec(\n spec: McpSpec,\n opts: BuildTransportOptions = {},\n): McpTransport {\n if (spec.transport === \"sse\") return new SseTransport({ url: spec.url });\n if (spec.transport === \"streamable-http\") return new StreamableHttpTransport({ url: spec.url });\n return new StdioTransport({ command: spec.command, args: spec.args, env: opts.env });\n}\n"],"mappings":";;;;;;;;;AAAA,SAAqB,gBAAgB;AAGrC,IAAM,iBAAiB;AAEhB,SAAS,mBAAmB,MAA0B;AAC3D,QAAM,WAAW,KAAK,KAAK,QAAQ,cAAc;AACjD,MAAI,WAAW,EAAG;AAClB,QAAM,aAAa,KAAK,KAAK,MAAM,WAAW,CAAC,EAAE,OAAO,CAAC,MAAM,CAAC,EAAE,WAAW,GAAG,CAAC;AACjF,aAAW,OAAO,YAAY;AAC5B,QAAI;AACJ,QAAI;AACF,aAAO,SAAS,GAAG;AAAA,IACrB,QAAQ;AACN,YAAM,IAAI;AAAA,QACR,2BAA2B,GAAG,qDAAgD,GAAG;AAAA,MACnF;AAAA,IACF;AACA,QAAI,CAAC,KAAK,YAAY,GAAG;AACvB,YAAM,IAAI,MAAM,2BAA2B,GAAG,iCAAiC;AAAA,IACjF;AAAA,EACF;AACF;;;ACZO,SAAS,uBACd,MACA,OAA8B,CAAC,GACjB;AACd,MAAI,KAAK,cAAc,MAAO,QAAO,IAAI,aAAa,EAAE,KAAK,KAAK,IAAI,CAAC;AACvE,MAAI,KAAK,cAAc,kBAAmB,QAAO,IAAI,wBAAwB,EAAE,KAAK,KAAK,IAAI,CAAC;AAC9F,SAAO,IAAI,eAAe,EAAE,SAAS,KAAK,SAAS,MAAM,KAAK,MAAM,KAAK,KAAK,IAAI,CAAC;AACrF;","names":[]}
@@ -1,200 +0,0 @@
1
- #!/usr/bin/env node
2
- import { createRequire as __cr } from 'node:module'; if (typeof globalThis.require === 'undefined') { globalThis.require = __cr(import.meta.url); }
3
-
4
- // src/tokenizer.ts
5
- import { existsSync, readFileSync } from "fs";
6
- import { createRequire } from "module";
7
- import { dirname, join } from "path";
8
- import { fileURLToPath } from "url";
9
- import { gunzipSync } from "zlib";
10
- function buildByteToChar() {
11
- const result = new Array(256);
12
- const bs = [];
13
- for (let b = 33; b <= 126; b++) bs.push(b);
14
- for (let b = 161; b <= 172; b++) bs.push(b);
15
- for (let b = 174; b <= 255; b++) bs.push(b);
16
- const cs = bs.slice();
17
- let n = 0;
18
- for (let b = 0; b < 256; b++) {
19
- if (!bs.includes(b)) {
20
- bs.push(b);
21
- cs.push(256 + n);
22
- n++;
23
- }
24
- }
25
- for (let i = 0; i < bs.length; i++) {
26
- result[bs[i]] = String.fromCodePoint(cs[i]);
27
- }
28
- return result;
29
- }
30
- var cached = null;
31
- function resolveDataPath() {
32
- if (process.env.REASONIX_TOKENIZER_PATH) return process.env.REASONIX_TOKENIZER_PATH;
33
- const candidates = [];
34
- try {
35
- const here = dirname(fileURLToPath(import.meta.url));
36
- candidates.push(join(here, "..", "data", "deepseek-tokenizer.json.gz"));
37
- candidates.push(join(here, "..", "..", "data", "deepseek-tokenizer.json.gz"));
38
- } catch {
39
- }
40
- try {
41
- const req = createRequire(import.meta.url);
42
- candidates.push(
43
- join(dirname(req.resolve("reasonix/package.json")), "data", "deepseek-tokenizer.json.gz")
44
- );
45
- } catch {
46
- }
47
- for (const p of candidates) {
48
- if (existsSync(p)) return p;
49
- }
50
- return candidates[0] ?? join(process.cwd(), "data", "deepseek-tokenizer.json.gz");
51
- }
52
- function loadTokenizer() {
53
- if (cached) return cached;
54
- const buf = readFileSync(resolveDataPath());
55
- const json = gunzipSync(buf).toString("utf8");
56
- const data = JSON.parse(json);
57
- const mergeRank = /* @__PURE__ */ new Map();
58
- for (let i = 0; i < data.model.merges.length; i++) {
59
- mergeRank.set(data.model.merges[i], i);
60
- }
61
- const splitRegexes = [];
62
- for (const p of data.pre_tokenizer.pretokenizers) {
63
- if (p.type === "Split") {
64
- splitRegexes.push(new RegExp(p.pattern.Regex, "gu"));
65
- }
66
- }
67
- const addedMap = /* @__PURE__ */ new Map();
68
- const addedContents = [];
69
- for (const t of data.added_tokens) {
70
- if (!t.special) {
71
- addedMap.set(t.content, t.id);
72
- addedContents.push(t.content);
73
- }
74
- }
75
- addedContents.sort((a, b) => b.length - a.length);
76
- const addedPattern = addedContents.length ? new RegExp(addedContents.map(escapeRegex).join("|"), "g") : null;
77
- cached = {
78
- vocab: data.model.vocab,
79
- mergeRank,
80
- splitRegexes,
81
- byteToChar: buildByteToChar(),
82
- addedPattern,
83
- addedMap
84
- };
85
- return cached;
86
- }
87
- function escapeRegex(s) {
88
- return s.replace(/[.*+?^${}()|[\]\\]/g, "\\$&");
89
- }
90
- function applySplit(chunks, re) {
91
- const out = [];
92
- for (const chunk of chunks) {
93
- if (!chunk) continue;
94
- re.lastIndex = 0;
95
- let last = 0;
96
- for (const m of chunk.matchAll(re)) {
97
- const idx = m.index ?? 0;
98
- if (idx > last) out.push(chunk.slice(last, idx));
99
- if (m[0].length > 0) out.push(m[0]);
100
- last = idx + m[0].length;
101
- }
102
- if (last < chunk.length) out.push(chunk.slice(last));
103
- }
104
- return out;
105
- }
106
- function byteLevelEncode(s, byteToChar) {
107
- const bytes = new TextEncoder().encode(s);
108
- let out = "";
109
- for (let i = 0; i < bytes.length; i++) out += byteToChar[bytes[i]];
110
- return out;
111
- }
112
- function bpeEncode(piece, mergeRank) {
113
- if (piece.length <= 1) return piece ? [piece] : [];
114
- let word = Array.from(piece);
115
- while (true) {
116
- let bestIdx = -1;
117
- let bestRank = Number.POSITIVE_INFINITY;
118
- for (let i = 0; i < word.length - 1; i++) {
119
- const pair = `${word[i]} ${word[i + 1]}`;
120
- const rank = mergeRank.get(pair);
121
- if (rank !== void 0 && rank < bestRank) {
122
- bestRank = rank;
123
- bestIdx = i;
124
- if (rank === 0) break;
125
- }
126
- }
127
- if (bestIdx < 0) break;
128
- word = [
129
- ...word.slice(0, bestIdx),
130
- word[bestIdx] + word[bestIdx + 1],
131
- ...word.slice(bestIdx + 2)
132
- ];
133
- if (word.length === 1) break;
134
- }
135
- return word;
136
- }
137
- function encode(text) {
138
- if (!text) return [];
139
- const t = loadTokenizer();
140
- const ids = [];
141
- const process2 = (segment) => {
142
- if (!segment) return;
143
- let chunks = [segment];
144
- for (const re of t.splitRegexes) chunks = applySplit(chunks, re);
145
- for (const chunk of chunks) {
146
- if (!chunk) continue;
147
- const byteLevel = byteLevelEncode(chunk, t.byteToChar);
148
- const pieces = bpeEncode(byteLevel, t.mergeRank);
149
- for (const p of pieces) {
150
- const id = t.vocab[p];
151
- if (id !== void 0) ids.push(id);
152
- }
153
- }
154
- };
155
- if (t.addedPattern) {
156
- t.addedPattern.lastIndex = 0;
157
- let last = 0;
158
- for (const m of text.matchAll(t.addedPattern)) {
159
- const idx = m.index ?? 0;
160
- if (idx > last) process2(text.slice(last, idx));
161
- const id = t.addedMap.get(m[0]);
162
- if (id !== void 0) ids.push(id);
163
- last = idx + m[0].length;
164
- }
165
- if (last < text.length) process2(text.slice(last));
166
- } else {
167
- process2(text);
168
- }
169
- return ids;
170
- }
171
- function countTokens(text) {
172
- return encode(text).length;
173
- }
174
- function estimateConversationTokens(messages) {
175
- let total = 0;
176
- for (const m of messages) {
177
- if (typeof m.content === "string" && m.content) {
178
- total += countTokens(m.content);
179
- }
180
- if (m.tool_calls && Array.isArray(m.tool_calls) && m.tool_calls.length > 0) {
181
- total += countTokens(JSON.stringify(m.tool_calls));
182
- }
183
- }
184
- return total;
185
- }
186
- function estimateRequestTokens(messages, toolSpecs) {
187
- let total = estimateConversationTokens(messages);
188
- if (toolSpecs && toolSpecs.length > 0) {
189
- total += countTokens(JSON.stringify(toolSpecs));
190
- }
191
- return total;
192
- }
193
-
194
- export {
195
- resolveDataPath,
196
- countTokens,
197
- estimateConversationTokens,
198
- estimateRequestTokens
199
- };
200
- //# sourceMappingURL=chunk-PV55UMTO.js.map
@@ -1 +0,0 @@
1
- {"version":3,"sources":["../../src/tokenizer.ts"],"sourcesContent":["/** Encode-only DeepSeek V3 tokenizer port; ~3% drift vs API (chat-template framing not replayed). */\n\nimport { existsSync, readFileSync } from \"node:fs\";\nimport { createRequire } from \"node:module\";\nimport { dirname, join } from \"node:path\";\nimport { fileURLToPath } from \"node:url\";\nimport { gunzipSync } from \"node:zlib\";\n\ninterface AddedToken {\n id: number;\n content: string;\n special: boolean;\n normalized: boolean;\n}\n\ninterface SplitPretokenizer {\n type: \"Split\";\n pattern: { Regex: string };\n behavior: \"Isolated\" | \"Removed\" | string;\n invert: boolean;\n}\n\ninterface ByteLevelPretokenizer {\n type: \"ByteLevel\";\n add_prefix_space: boolean;\n trim_offsets: boolean;\n use_regex: boolean;\n}\n\ntype Pretokenizer = SplitPretokenizer | ByteLevelPretokenizer;\n\ninterface TokenizerData {\n added_tokens: AddedToken[];\n pre_tokenizer: {\n type: \"Sequence\";\n pretokenizers: Pretokenizer[];\n };\n model: {\n type: \"BPE\";\n vocab: Record<string, number>;\n merges: string[];\n };\n}\n\ninterface LoadedTokenizer {\n vocab: Record<string, number>;\n mergeRank: Map<string, number>;\n splitRegexes: RegExp[];\n byteToChar: string[];\n /** Non-special added tokens only — special tokens in user text tokenize byte-by-byte (HF default). */\n addedPattern: RegExp | null;\n addedMap: Map<string, number>;\n}\n\n/** GPT-2 byte→unicode map; lets byte-level BPE vocab serialize as readable JSON strings. */\nfunction buildByteToChar(): string[] {\n const result: string[] = new Array(256);\n const bs: number[] = [];\n for (let b = 33; b <= 126; b++) bs.push(b);\n for (let b = 161; b <= 172; b++) bs.push(b);\n for (let b = 174; b <= 255; b++) bs.push(b);\n const cs = bs.slice();\n let n = 0;\n for (let b = 0; b < 256; b++) {\n if (!bs.includes(b)) {\n bs.push(b);\n cs.push(256 + n);\n n++;\n }\n }\n for (let i = 0; i < bs.length; i++) {\n result[bs[i]!] = String.fromCodePoint(cs[i]!);\n }\n return result;\n}\n\nlet cached: LoadedTokenizer | null = null;\n\n/** Two ../data candidates needed: dist/index.js AND dist/cli/index.js resolve to different roots. */\nexport function resolveDataPath(): string {\n if (process.env.REASONIX_TOKENIZER_PATH) return process.env.REASONIX_TOKENIZER_PATH;\n const candidates: string[] = [];\n try {\n const here = dirname(fileURLToPath(import.meta.url));\n candidates.push(join(here, \"..\", \"data\", \"deepseek-tokenizer.json.gz\"));\n candidates.push(join(here, \"..\", \"..\", \"data\", \"deepseek-tokenizer.json.gz\"));\n } catch {\n /* import.meta.url unavailable — skip to the package resolution step. */\n }\n try {\n const req = createRequire(import.meta.url);\n candidates.push(\n join(dirname(req.resolve(\"reasonix/package.json\")), \"data\", \"deepseek-tokenizer.json.gz\"),\n );\n } catch {\n /* Not installed as `reasonix/` — the earlier candidates still may hit. */\n }\n for (const p of candidates) {\n if (existsSync(p)) return p;\n }\n // Nothing exists — return the first candidate anyway so readFileSync\n // surfaces a concrete path in the ENOENT message (better than silent miss).\n return candidates[0] ?? join(process.cwd(), \"data\", \"deepseek-tokenizer.json.gz\");\n}\n\nfunction loadTokenizer(): LoadedTokenizer {\n if (cached) return cached;\n const buf = readFileSync(resolveDataPath());\n const json = gunzipSync(buf).toString(\"utf8\");\n const data = JSON.parse(json) as TokenizerData;\n\n const mergeRank = new Map<string, number>();\n for (let i = 0; i < data.model.merges.length; i++) {\n mergeRank.set(data.model.merges[i]!, i);\n }\n\n const splitRegexes: RegExp[] = [];\n for (const p of data.pre_tokenizer.pretokenizers) {\n if (p.type === \"Split\") {\n // All three Split rules use Isolated — matches become their own\n // pre-tokens and so do the in-between stretches. The ByteLevel\n // stage in the Sequence does no extra splitting here\n // (use_regex:false), so our 3 Split regexes are the whole story.\n splitRegexes.push(new RegExp(p.pattern.Regex, \"gu\"));\n }\n }\n\n const addedMap = new Map<string, number>();\n const addedContents: string[] = [];\n for (const t of data.added_tokens) {\n if (!t.special) {\n addedMap.set(t.content, t.id);\n addedContents.push(t.content);\n }\n }\n // Longest-first ensures greedy matching doesn't lose a longer token\n // to a shorter prefix (e.g. `<think>` before `<`).\n addedContents.sort((a, b) => b.length - a.length);\n const addedPattern = addedContents.length\n ? new RegExp(addedContents.map(escapeRegex).join(\"|\"), \"g\")\n : null;\n\n cached = {\n vocab: data.model.vocab,\n mergeRank,\n splitRegexes,\n byteToChar: buildByteToChar(),\n addedPattern,\n addedMap,\n };\n return cached;\n}\n\nfunction escapeRegex(s: string): string {\n return s.replace(/[.*+?^${}()|[\\]\\\\]/g, \"\\\\$&\");\n}\n\nfunction applySplit(chunks: string[], re: RegExp): string[] {\n const out: string[] = [];\n for (const chunk of chunks) {\n if (!chunk) continue;\n // Reset lastIndex — reusing a /g regex across matchAll iterations\n // is safe (matchAll internally advances), but across different\n // input strings we want a clean start.\n re.lastIndex = 0;\n let last = 0;\n for (const m of chunk.matchAll(re)) {\n const idx = m.index ?? 0;\n if (idx > last) out.push(chunk.slice(last, idx));\n if (m[0].length > 0) out.push(m[0]);\n last = idx + m[0].length;\n }\n if (last < chunk.length) out.push(chunk.slice(last));\n }\n return out;\n}\n\n/** UTF-8 bytes of `s`, each mapped to its byte-level visible char. */\nfunction byteLevelEncode(s: string, byteToChar: string[]): string {\n const bytes = new TextEncoder().encode(s);\n let out = \"\";\n for (let i = 0; i < bytes.length; i++) out += byteToChar[bytes[i]!];\n return out;\n}\n\nfunction bpeEncode(piece: string, mergeRank: Map<string, number>): string[] {\n if (piece.length <= 1) return piece ? [piece] : [];\n let word: string[] = Array.from(piece);\n while (true) {\n let bestIdx = -1;\n let bestRank = Number.POSITIVE_INFINITY;\n for (let i = 0; i < word.length - 1; i++) {\n const pair = `${word[i]} ${word[i + 1]}`;\n const rank = mergeRank.get(pair);\n if (rank !== undefined && rank < bestRank) {\n bestRank = rank;\n bestIdx = i;\n if (rank === 0) break; // 0 is already the best possible\n }\n }\n if (bestIdx < 0) break;\n word = [\n ...word.slice(0, bestIdx),\n word[bestIdx]! + word[bestIdx + 1]!,\n ...word.slice(bestIdx + 2),\n ];\n if (word.length === 1) break;\n }\n return word;\n}\n\nexport function encode(text: string): number[] {\n if (!text) return [];\n const t = loadTokenizer();\n const ids: number[] = [];\n\n const process = (segment: string) => {\n if (!segment) return;\n let chunks: string[] = [segment];\n for (const re of t.splitRegexes) chunks = applySplit(chunks, re);\n for (const chunk of chunks) {\n if (!chunk) continue;\n const byteLevel = byteLevelEncode(chunk, t.byteToChar);\n const pieces = bpeEncode(byteLevel, t.mergeRank);\n for (const p of pieces) {\n const id = t.vocab[p];\n // If not in vocab we silently skip: shouldn't happen for\n // byte-level BPE (every single byte has its own vocab entry),\n // but if a future tokenizer update breaks that invariant we'd\n // rather under-count than throw from a UI gauge.\n if (id !== undefined) ids.push(id);\n }\n }\n };\n\n if (t.addedPattern) {\n t.addedPattern.lastIndex = 0;\n let last = 0;\n for (const m of text.matchAll(t.addedPattern)) {\n const idx = m.index ?? 0;\n if (idx > last) process(text.slice(last, idx));\n const id = t.addedMap.get(m[0]);\n if (id !== undefined) ids.push(id);\n last = idx + m[0].length;\n }\n if (last < text.length) process(text.slice(last));\n } else {\n process(text);\n }\n return ids;\n}\n\nexport function countTokens(text: string): number {\n return encode(text).length;\n}\n\n/** Doesn't add chat-template framing overhead; under-counts ~3-6% vs real `prompt_tokens`. */\nexport function estimateConversationTokens(\n messages: Array<{ content?: string | null; tool_calls?: unknown }>,\n): number {\n let total = 0;\n for (const m of messages) {\n if (typeof m.content === \"string\" && m.content) {\n total += countTokens(m.content);\n }\n // Tool-call arguments are serialized as JSON in the prompt by the\n // chat template; their bytes WILL count upstream, so we count\n // them too. Stringify-once is cheap relative to the tokenize.\n if (m.tool_calls && Array.isArray(m.tool_calls) && m.tool_calls.length > 0) {\n total += countTokens(JSON.stringify(m.tool_calls));\n }\n }\n return total;\n}\n\n/** Tool specs ride in a separate request blob; must be counted separately for an accurate preflight. */\nexport function estimateRequestTokens(\n messages: Array<{ content?: string | null; tool_calls?: unknown }>,\n toolSpecs?: ReadonlyArray<unknown> | null,\n): number {\n let total = estimateConversationTokens(messages);\n if (toolSpecs && toolSpecs.length > 0) {\n total += countTokens(JSON.stringify(toolSpecs));\n }\n return total;\n}\n\n/** Exposed for tests — resets the lazy-load singleton. */\nexport function _resetForTests(): void {\n cached = null;\n}\n"],"mappings":";;;;AAEA,SAAS,YAAY,oBAAoB;AACzC,SAAS,qBAAqB;AAC9B,SAAS,SAAS,YAAY;AAC9B,SAAS,qBAAqB;AAC9B,SAAS,kBAAkB;AAiD3B,SAAS,kBAA4B;AACnC,QAAM,SAAmB,IAAI,MAAM,GAAG;AACtC,QAAM,KAAe,CAAC;AACtB,WAAS,IAAI,IAAI,KAAK,KAAK,IAAK,IAAG,KAAK,CAAC;AACzC,WAAS,IAAI,KAAK,KAAK,KAAK,IAAK,IAAG,KAAK,CAAC;AAC1C,WAAS,IAAI,KAAK,KAAK,KAAK,IAAK,IAAG,KAAK,CAAC;AAC1C,QAAM,KAAK,GAAG,MAAM;AACpB,MAAI,IAAI;AACR,WAAS,IAAI,GAAG,IAAI,KAAK,KAAK;AAC5B,QAAI,CAAC,GAAG,SAAS,CAAC,GAAG;AACnB,SAAG,KAAK,CAAC;AACT,SAAG,KAAK,MAAM,CAAC;AACf;AAAA,IACF;AAAA,EACF;AACA,WAAS,IAAI,GAAG,IAAI,GAAG,QAAQ,KAAK;AAClC,WAAO,GAAG,CAAC,CAAE,IAAI,OAAO,cAAc,GAAG,CAAC,CAAE;AAAA,EAC9C;AACA,SAAO;AACT;AAEA,IAAI,SAAiC;AAG9B,SAAS,kBAA0B;AACxC,MAAI,QAAQ,IAAI,wBAAyB,QAAO,QAAQ,IAAI;AAC5D,QAAM,aAAuB,CAAC;AAC9B,MAAI;AACF,UAAM,OAAO,QAAQ,cAAc,YAAY,GAAG,CAAC;AACnD,eAAW,KAAK,KAAK,MAAM,MAAM,QAAQ,4BAA4B,CAAC;AACtE,eAAW,KAAK,KAAK,MAAM,MAAM,MAAM,QAAQ,4BAA4B,CAAC;AAAA,EAC9E,QAAQ;AAAA,EAER;AACA,MAAI;AACF,UAAM,MAAM,cAAc,YAAY,GAAG;AACzC,eAAW;AAAA,MACT,KAAK,QAAQ,IAAI,QAAQ,uBAAuB,CAAC,GAAG,QAAQ,4BAA4B;AAAA,IAC1F;AAAA,EACF,QAAQ;AAAA,EAER;AACA,aAAW,KAAK,YAAY;AAC1B,QAAI,WAAW,CAAC,EAAG,QAAO;AAAA,EAC5B;AAGA,SAAO,WAAW,CAAC,KAAK,KAAK,QAAQ,IAAI,GAAG,QAAQ,4BAA4B;AAClF;AAEA,SAAS,gBAAiC;AACxC,MAAI,OAAQ,QAAO;AACnB,QAAM,MAAM,aAAa,gBAAgB,CAAC;AAC1C,QAAM,OAAO,WAAW,GAAG,EAAE,SAAS,MAAM;AAC5C,QAAM,OAAO,KAAK,MAAM,IAAI;AAE5B,QAAM,YAAY,oBAAI,IAAoB;AAC1C,WAAS,IAAI,GAAG,IAAI,KAAK,MAAM,OAAO,QAAQ,KAAK;AACjD,cAAU,IAAI,KAAK,MAAM,OAAO,CAAC,GAAI,CAAC;AAAA,EACxC;AAEA,QAAM,eAAyB,CAAC;AAChC,aAAW,KAAK,KAAK,cAAc,eAAe;AAChD,QAAI,EAAE,SAAS,SAAS;AAKtB,mBAAa,KAAK,IAAI,OAAO,EAAE,QAAQ,OAAO,IAAI,CAAC;AAAA,IACrD;AAAA,EACF;AAEA,QAAM,WAAW,oBAAI,IAAoB;AACzC,QAAM,gBAA0B,CAAC;AACjC,aAAW,KAAK,KAAK,cAAc;AACjC,QAAI,CAAC,EAAE,SAAS;AACd,eAAS,IAAI,EAAE,SAAS,EAAE,EAAE;AAC5B,oBAAc,KAAK,EAAE,OAAO;AAAA,IAC9B;AAAA,EACF;AAGA,gBAAc,KAAK,CAAC,GAAG,MAAM,EAAE,SAAS,EAAE,MAAM;AAChD,QAAM,eAAe,cAAc,SAC/B,IAAI,OAAO,cAAc,IAAI,WAAW,EAAE,KAAK,GAAG,GAAG,GAAG,IACxD;AAEJ,WAAS;AAAA,IACP,OAAO,KAAK,MAAM;AAAA,IAClB;AAAA,IACA;AAAA,IACA,YAAY,gBAAgB;AAAA,IAC5B;AAAA,IACA;AAAA,EACF;AACA,SAAO;AACT;AAEA,SAAS,YAAY,GAAmB;AACtC,SAAO,EAAE,QAAQ,uBAAuB,MAAM;AAChD;AAEA,SAAS,WAAW,QAAkB,IAAsB;AAC1D,QAAM,MAAgB,CAAC;AACvB,aAAW,SAAS,QAAQ;AAC1B,QAAI,CAAC,MAAO;AAIZ,OAAG,YAAY;AACf,QAAI,OAAO;AACX,eAAW,KAAK,MAAM,SAAS,EAAE,GAAG;AAClC,YAAM,MAAM,EAAE,SAAS;AACvB,UAAI,MAAM,KAAM,KAAI,KAAK,MAAM,MAAM,MAAM,GAAG,CAAC;AAC/C,UAAI,EAAE,CAAC,EAAE,SAAS,EAAG,KAAI,KAAK,EAAE,CAAC,CAAC;AAClC,aAAO,MAAM,EAAE,CAAC,EAAE;AAAA,IACpB;AACA,QAAI,OAAO,MAAM,OAAQ,KAAI,KAAK,MAAM,MAAM,IAAI,CAAC;AAAA,EACrD;AACA,SAAO;AACT;AAGA,SAAS,gBAAgB,GAAW,YAA8B;AAChE,QAAM,QAAQ,IAAI,YAAY,EAAE,OAAO,CAAC;AACxC,MAAI,MAAM;AACV,WAAS,IAAI,GAAG,IAAI,MAAM,QAAQ,IAAK,QAAO,WAAW,MAAM,CAAC,CAAE;AAClE,SAAO;AACT;AAEA,SAAS,UAAU,OAAe,WAA0C;AAC1E,MAAI,MAAM,UAAU,EAAG,QAAO,QAAQ,CAAC,KAAK,IAAI,CAAC;AACjD,MAAI,OAAiB,MAAM,KAAK,KAAK;AACrC,SAAO,MAAM;AACX,QAAI,UAAU;AACd,QAAI,WAAW,OAAO;AACtB,aAAS,IAAI,GAAG,IAAI,KAAK,SAAS,GAAG,KAAK;AACxC,YAAM,OAAO,GAAG,KAAK,CAAC,CAAC,IAAI,KAAK,IAAI,CAAC,CAAC;AACtC,YAAM,OAAO,UAAU,IAAI,IAAI;AAC/B,UAAI,SAAS,UAAa,OAAO,UAAU;AACzC,mBAAW;AACX,kBAAU;AACV,YAAI,SAAS,EAAG;AAAA,MAClB;AAAA,IACF;AACA,QAAI,UAAU,EAAG;AACjB,WAAO;AAAA,MACL,GAAG,KAAK,MAAM,GAAG,OAAO;AAAA,MACxB,KAAK,OAAO,IAAK,KAAK,UAAU,CAAC;AAAA,MACjC,GAAG,KAAK,MAAM,UAAU,CAAC;AAAA,IAC3B;AACA,QAAI,KAAK,WAAW,EAAG;AAAA,EACzB;AACA,SAAO;AACT;AAEO,SAAS,OAAO,MAAwB;AAC7C,MAAI,CAAC,KAAM,QAAO,CAAC;AACnB,QAAM,IAAI,cAAc;AACxB,QAAM,MAAgB,CAAC;AAEvB,QAAMA,WAAU,CAAC,YAAoB;AACnC,QAAI,CAAC,QAAS;AACd,QAAI,SAAmB,CAAC,OAAO;AAC/B,eAAW,MAAM,EAAE,aAAc,UAAS,WAAW,QAAQ,EAAE;AAC/D,eAAW,SAAS,QAAQ;AAC1B,UAAI,CAAC,MAAO;AACZ,YAAM,YAAY,gBAAgB,OAAO,EAAE,UAAU;AACrD,YAAM,SAAS,UAAU,WAAW,EAAE,SAAS;AAC/C,iBAAW,KAAK,QAAQ;AACtB,cAAM,KAAK,EAAE,MAAM,CAAC;AAKpB,YAAI,OAAO,OAAW,KAAI,KAAK,EAAE;AAAA,MACnC;AAAA,IACF;AAAA,EACF;AAEA,MAAI,EAAE,cAAc;AAClB,MAAE,aAAa,YAAY;AAC3B,QAAI,OAAO;AACX,eAAW,KAAK,KAAK,SAAS,EAAE,YAAY,GAAG;AAC7C,YAAM,MAAM,EAAE,SAAS;AACvB,UAAI,MAAM,KAAM,CAAAA,SAAQ,KAAK,MAAM,MAAM,GAAG,CAAC;AAC7C,YAAM,KAAK,EAAE,SAAS,IAAI,EAAE,CAAC,CAAC;AAC9B,UAAI,OAAO,OAAW,KAAI,KAAK,EAAE;AACjC,aAAO,MAAM,EAAE,CAAC,EAAE;AAAA,IACpB;AACA,QAAI,OAAO,KAAK,OAAQ,CAAAA,SAAQ,KAAK,MAAM,IAAI,CAAC;AAAA,EAClD,OAAO;AACL,IAAAA,SAAQ,IAAI;AAAA,EACd;AACA,SAAO;AACT;AAEO,SAAS,YAAY,MAAsB;AAChD,SAAO,OAAO,IAAI,EAAE;AACtB;AAGO,SAAS,2BACd,UACQ;AACR,MAAI,QAAQ;AACZ,aAAW,KAAK,UAAU;AACxB,QAAI,OAAO,EAAE,YAAY,YAAY,EAAE,SAAS;AAC9C,eAAS,YAAY,EAAE,OAAO;AAAA,IAChC;AAIA,QAAI,EAAE,cAAc,MAAM,QAAQ,EAAE,UAAU,KAAK,EAAE,WAAW,SAAS,GAAG;AAC1E,eAAS,YAAY,KAAK,UAAU,EAAE,UAAU,CAAC;AAAA,IACnD;AAAA,EACF;AACA,SAAO;AACT;AAGO,SAAS,sBACd,UACA,WACQ;AACR,MAAI,QAAQ,2BAA2B,QAAQ;AAC/C,MAAI,aAAa,UAAU,SAAS,GAAG;AACrC,aAAS,YAAY,KAAK,UAAU,SAAS,CAAC;AAAA,EAChD;AACA,SAAO;AACT;","names":["process"]}