@bike4mind/cli 0.2.62 → 0.2.63-fix-chunk-token-limit-overflow.21805
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/{chunk-PJFESKK6.js → chunk-5LZS5CVJ.js} +1 -1
- package/dist/{chunk-VJLPCIK2.js → chunk-C7N7VYXQ.js} +7 -7
- package/dist/{chunk-EO2Y5GFY.js → chunk-EPIYC3LA.js} +127 -7
- package/dist/{chunk-Y7K4HI6L.js → chunk-UZUHPHZC.js} +1 -1
- package/dist/{chunk-2J4HB7EB.js → chunk-WBE7SQUB.js} +1 -1
- package/dist/{chunk-MNBT2VFX.js → chunk-YHHCU4PX.js} +2 -2
- package/dist/commands/doctorCommand.js +1 -1
- package/dist/commands/headlessCommand.js +5 -5
- package/dist/commands/updateCommand.js +1 -1
- package/dist/{create-4Q7IAG7H.js → create-C4VEEEYR.js} +2 -2
- package/dist/index.js +6 -6
- package/dist/{mementoService-5VAXTB4G.js → mementoService-N4IM6QAC.js} +2 -2
- package/dist/{src-NYW3P73W.js → src-F4KZCAA2.js} +1 -1
- package/dist/{subtractCredits-ZDMCQ6R5.js → subtractCredits-D4KEM6VU.js} +2 -2
- package/package.json +7 -7
|
@@ -3,7 +3,7 @@
|
|
|
3
3
|
// package.json
|
|
4
4
|
var package_default = {
|
|
5
5
|
name: "@bike4mind/cli",
|
|
6
|
-
version: "0.2.
|
|
6
|
+
version: "0.2.63-fix-chunk-token-limit-overflow.21805+c654aa55d",
|
|
7
7
|
type: "module",
|
|
8
8
|
description: "Interactive CLI tool for Bike4Mind with ReAct agents",
|
|
9
9
|
license: "UNLICENSED",
|
|
@@ -118,11 +118,11 @@ var package_default = {
|
|
|
118
118
|
zustand: "^4.5.4"
|
|
119
119
|
},
|
|
120
120
|
devDependencies: {
|
|
121
|
-
"@bike4mind/agents": "0.4.
|
|
122
|
-
"@bike4mind/common": "2.75.
|
|
123
|
-
"@bike4mind/mcp": "1.33.
|
|
124
|
-
"@bike4mind/services": "2.68.
|
|
125
|
-
"@bike4mind/utils": "2.16.
|
|
121
|
+
"@bike4mind/agents": "0.4.1-fix-chunk-token-limit-overflow.21805+c654aa55d",
|
|
122
|
+
"@bike4mind/common": "2.75.1-fix-chunk-token-limit-overflow.21805+c654aa55d",
|
|
123
|
+
"@bike4mind/mcp": "1.33.21-fix-chunk-token-limit-overflow.21805+c654aa55d",
|
|
124
|
+
"@bike4mind/services": "2.68.3-fix-chunk-token-limit-overflow.21805+c654aa55d",
|
|
125
|
+
"@bike4mind/utils": "2.16.2-fix-chunk-token-limit-overflow.21805+c654aa55d",
|
|
126
126
|
"@types/better-sqlite3": "^7.6.13",
|
|
127
127
|
"@types/jsonwebtoken": "^9.0.4",
|
|
128
128
|
"@types/node": "^22.9.0",
|
|
@@ -139,7 +139,7 @@ var package_default = {
|
|
|
139
139
|
optionalDependencies: {
|
|
140
140
|
"@vscode/ripgrep": "^1.17.1"
|
|
141
141
|
},
|
|
142
|
-
gitHead: "
|
|
142
|
+
gitHead: "c654aa55d9c000f9b4c8a1cd92c6dbd5e0095ccc"
|
|
143
143
|
};
|
|
144
144
|
|
|
145
145
|
// src/utils/updateChecker.ts
|
|
@@ -9747,18 +9747,24 @@ var SmartChunker = class {
|
|
|
9747
9747
|
}
|
|
9748
9748
|
this.logger.updateMetadata({ mimeType });
|
|
9749
9749
|
this.logger.log(`Chunking file with type: ${mimeType}`);
|
|
9750
|
+
let chunks;
|
|
9750
9751
|
switch (mimeType) {
|
|
9751
9752
|
case SupportedFabFileMimeTypes.CSV:
|
|
9752
|
-
|
|
9753
|
+
chunks = await this.chunkCSV(content);
|
|
9754
|
+
break;
|
|
9753
9755
|
case SupportedFabFileMimeTypes.PDF:
|
|
9754
|
-
|
|
9756
|
+
chunks = await this.chunkPDF(content);
|
|
9757
|
+
break;
|
|
9755
9758
|
case SupportedFabFileMimeTypes.JSON:
|
|
9756
|
-
|
|
9759
|
+
chunks = await this.chunkJSON(content);
|
|
9760
|
+
break;
|
|
9757
9761
|
case SupportedFabFileMimeTypes.DOCX:
|
|
9758
|
-
|
|
9762
|
+
chunks = await this.chunkDOCX(content);
|
|
9763
|
+
break;
|
|
9759
9764
|
case SupportedFabFileMimeTypes.XLS:
|
|
9760
9765
|
case SupportedFabFileMimeTypes.XLSX:
|
|
9761
|
-
|
|
9766
|
+
chunks = await this.chunkExcel(content);
|
|
9767
|
+
break;
|
|
9762
9768
|
case SupportedFabFileMimeTypes.PNG:
|
|
9763
9769
|
case SupportedFabFileMimeTypes.JPG:
|
|
9764
9770
|
case SupportedFabFileMimeTypes.WEBP:
|
|
@@ -9774,14 +9780,17 @@ var SmartChunker = class {
|
|
|
9774
9780
|
case SupportedFabFileMimeTypes.RUBY:
|
|
9775
9781
|
case SupportedFabFileMimeTypes.SH:
|
|
9776
9782
|
case SupportedFabFileMimeTypes.BASH:
|
|
9777
|
-
|
|
9783
|
+
chunks = await this.chunkText(content.toString());
|
|
9784
|
+
break;
|
|
9778
9785
|
default:
|
|
9779
9786
|
if (mimeType && mimeType.startsWith("text/")) {
|
|
9780
|
-
|
|
9787
|
+
chunks = await this.chunkText(content.toString());
|
|
9788
|
+
break;
|
|
9781
9789
|
}
|
|
9782
9790
|
this.logger.error(`Unsupported file type: ${mimeType}`);
|
|
9783
9791
|
return [];
|
|
9784
9792
|
}
|
|
9793
|
+
return this.validateAndResplitChunks(chunks);
|
|
9785
9794
|
}
|
|
9786
9795
|
// Fetches the content of a file from storage as a Buffer
|
|
9787
9796
|
async fetchFileContent(file) {
|
|
@@ -9995,6 +10004,19 @@ var SmartChunker = class {
|
|
|
9995
10004
|
for (const word of words) {
|
|
9996
10005
|
const wordWithSpace = word + " ";
|
|
9997
10006
|
const wordTokens = await this.countTokens(wordWithSpace);
|
|
10007
|
+
if (wordTokens > this.chunkTokenLimit) {
|
|
10008
|
+
if (subChunk.trim().length > 0) {
|
|
10009
|
+
chunks.push({
|
|
10010
|
+
text: subChunk.trim(),
|
|
10011
|
+
tokenCount: subChunkTokens
|
|
10012
|
+
});
|
|
10013
|
+
subChunk = "";
|
|
10014
|
+
subChunkTokens = 0;
|
|
10015
|
+
}
|
|
10016
|
+
const wordChunks = await this.splitOversizedSegment(word);
|
|
10017
|
+
chunks.push(...wordChunks);
|
|
10018
|
+
continue;
|
|
10019
|
+
}
|
|
9998
10020
|
if (subChunkTokens + wordTokens > this.chunkTokenLimit) {
|
|
9999
10021
|
if (subChunk.trim().length > 0) {
|
|
10000
10022
|
chunks.push({
|
|
@@ -10142,6 +10164,104 @@ var SmartChunker = class {
|
|
|
10142
10164
|
console.log("Skipping image chunking as AI models can accept file image urls");
|
|
10143
10165
|
return chunks;
|
|
10144
10166
|
}
|
|
10167
|
+
/**
|
|
10168
|
+
* Encode text into token IDs. Uses tiktoken for OpenAI models,
|
|
10169
|
+
* falls back to character-based splitting for VoyageAI/Bedrock.
|
|
10170
|
+
*/
|
|
10171
|
+
async encodeTokens(text) {
|
|
10172
|
+
if (isEmbeddingModel(this.model, OpenAIEmbeddingModel)) {
|
|
10173
|
+
await this.initializeEncoder();
|
|
10174
|
+
return Array.from(this.encoder.encode(text));
|
|
10175
|
+
}
|
|
10176
|
+
const charsPerToken = isEmbeddingModel(this.model, VoyageAIEmbeddingModel) ? 3.7 : 4;
|
|
10177
|
+
const groupSize = Math.max(1, Math.round(charsPerToken));
|
|
10178
|
+
const tokens = [];
|
|
10179
|
+
for (let i = 0; i < text.length; i += groupSize) {
|
|
10180
|
+
tokens.push(i);
|
|
10181
|
+
}
|
|
10182
|
+
return tokens;
|
|
10183
|
+
}
|
|
10184
|
+
/**
|
|
10185
|
+
* Decode token IDs back to text. Uses tiktoken for OpenAI models,
|
|
10186
|
+
* falls back to character-based reconstruction for VoyageAI/Bedrock.
|
|
10187
|
+
*
|
|
10188
|
+
* For non-OpenAI models, originalText is REQUIRED — the pseudo-token IDs from
|
|
10189
|
+
* encodeTokens() are character offsets, so decoding reconstructs by slicing
|
|
10190
|
+
* the original string. Returns '' if originalText is omitted for non-OpenAI models.
|
|
10191
|
+
*
|
|
10192
|
+
* Note: splitOversizedSegment() uses character slicing directly for non-OpenAI
|
|
10193
|
+
* models and does not call this method, so this is only used in the OpenAI path today.
|
|
10194
|
+
*/
|
|
10195
|
+
async decodeTokens(tokens, originalText) {
|
|
10196
|
+
if (isEmbeddingModel(this.model, OpenAIEmbeddingModel)) {
|
|
10197
|
+
await this.initializeEncoder();
|
|
10198
|
+
const decoded = this.encoder.decode(new Uint32Array(tokens));
|
|
10199
|
+
if (typeof decoded === "string")
|
|
10200
|
+
return decoded;
|
|
10201
|
+
return new TextDecoder().decode(decoded);
|
|
10202
|
+
}
|
|
10203
|
+
if (!originalText)
|
|
10204
|
+
return "";
|
|
10205
|
+
const charsPerToken = isEmbeddingModel(this.model, VoyageAIEmbeddingModel) ? 3.7 : 4;
|
|
10206
|
+
const groupSize = Math.max(1, Math.round(charsPerToken));
|
|
10207
|
+
const startIdx = tokens[0] ?? 0;
|
|
10208
|
+
const endIdx = (tokens[tokens.length - 1] ?? 0) + groupSize;
|
|
10209
|
+
return originalText.slice(startIdx, Math.min(endIdx, originalText.length));
|
|
10210
|
+
}
|
|
10211
|
+
/**
|
|
10212
|
+
* Split an oversized text segment using encode-slice-decode for guaranteed correct splitting.
|
|
10213
|
+
* Works with any model: tiktoken for OpenAI, character-based for others.
|
|
10214
|
+
*/
|
|
10215
|
+
async splitOversizedSegment(text) {
|
|
10216
|
+
const chunks = [];
|
|
10217
|
+
if (isEmbeddingModel(this.model, OpenAIEmbeddingModel)) {
|
|
10218
|
+
const encoded = await this.encodeTokens(text);
|
|
10219
|
+
for (let j = 0; j < encoded.length; j += this.chunkTokenLimit) {
|
|
10220
|
+
const segmentTokens = encoded.slice(j, j + this.chunkTokenLimit);
|
|
10221
|
+
const segment = await this.decodeTokens(segmentTokens);
|
|
10222
|
+
if (segment.trim().length > 0) {
|
|
10223
|
+
chunks.push({ text: segment, tokenCount: segmentTokens.length });
|
|
10224
|
+
}
|
|
10225
|
+
}
|
|
10226
|
+
} else {
|
|
10227
|
+
const charsPerToken = isEmbeddingModel(this.model, VoyageAIEmbeddingModel) ? 3.7 : 4;
|
|
10228
|
+
const charsPerChunk = Math.floor(this.chunkTokenLimit * charsPerToken);
|
|
10229
|
+
for (let j = 0; j < text.length; j += charsPerChunk) {
|
|
10230
|
+
const segment = text.slice(j, j + charsPerChunk);
|
|
10231
|
+
if (segment.trim().length > 0) {
|
|
10232
|
+
const tokenCount = await this.countTokens(segment);
|
|
10233
|
+
chunks.push({ text: segment, tokenCount });
|
|
10234
|
+
}
|
|
10235
|
+
}
|
|
10236
|
+
}
|
|
10237
|
+
return chunks;
|
|
10238
|
+
}
|
|
10239
|
+
/**
|
|
10240
|
+
* Post-chunking validation: re-split any chunks that still exceed the token limit.
|
|
10241
|
+
* Bounded to max 3 passes to prevent infinite loops.
|
|
10242
|
+
*/
|
|
10243
|
+
async validateAndResplitChunks(chunks) {
|
|
10244
|
+
let result = chunks;
|
|
10245
|
+
for (let pass = 0; pass < 3; pass++) {
|
|
10246
|
+
let allValid = true;
|
|
10247
|
+
const validated = [];
|
|
10248
|
+
for (const chunk of result) {
|
|
10249
|
+
const actualTokens = await this.countTokens(chunk.text);
|
|
10250
|
+
if (actualTokens > this.chunkTokenLimit) {
|
|
10251
|
+
allValid = false;
|
|
10252
|
+
this.logger.warn(`Chunk exceeds limit (${actualTokens} > ${this.chunkTokenLimit}), re-splitting (pass ${pass + 1})`);
|
|
10253
|
+
const resplit = await this.splitOversizedSegment(chunk.text);
|
|
10254
|
+
validated.push(...resplit);
|
|
10255
|
+
} else {
|
|
10256
|
+
validated.push({ ...chunk, tokenCount: actualTokens });
|
|
10257
|
+
}
|
|
10258
|
+
}
|
|
10259
|
+
result = validated;
|
|
10260
|
+
if (allValid)
|
|
10261
|
+
break;
|
|
10262
|
+
}
|
|
10263
|
+
return result.filter((c) => c.text.trim().length > 0);
|
|
10264
|
+
}
|
|
10145
10265
|
// Counts the number of tokens in the given text using the appropriate tokenization method
|
|
10146
10266
|
async countTokens(text) {
|
|
10147
10267
|
if (isEmbeddingModel(this.model, OpenAIEmbeddingModel)) {
|
|
@@ -4,7 +4,7 @@ import {
|
|
|
4
4
|
getOpenWeatherKey,
|
|
5
5
|
getSerperKey,
|
|
6
6
|
getWolframAlphaKey
|
|
7
|
-
} from "./chunk-
|
|
7
|
+
} from "./chunk-WBE7SQUB.js";
|
|
8
8
|
import {
|
|
9
9
|
assertPathAllowed,
|
|
10
10
|
isPathAllowed
|
|
@@ -20,7 +20,7 @@ import {
|
|
|
20
20
|
OpenAIBackend,
|
|
21
21
|
OpenAIImageService,
|
|
22
22
|
XAIImageService
|
|
23
|
-
} from "./chunk-
|
|
23
|
+
} from "./chunk-EPIYC3LA.js";
|
|
24
24
|
import {
|
|
25
25
|
Logger
|
|
26
26
|
} from "./chunk-PFBYGCOW.js";
|
|
@@ -36,14 +36,14 @@ import {
|
|
|
36
36
|
isReadOnlyTool,
|
|
37
37
|
loadContextFiles,
|
|
38
38
|
setWebSocketToolExecutor
|
|
39
|
-
} from "../chunk-
|
|
39
|
+
} from "../chunk-YHHCU4PX.js";
|
|
40
40
|
import "../chunk-BDQBOLYG.js";
|
|
41
|
-
import "../chunk-
|
|
41
|
+
import "../chunk-WBE7SQUB.js";
|
|
42
42
|
import "../chunk-GQGOWACU.js";
|
|
43
43
|
import "../chunk-LTLJRF6I.js";
|
|
44
|
-
import "../chunk-
|
|
45
|
-
import "../chunk-
|
|
46
|
-
import "../chunk-
|
|
44
|
+
import "../chunk-5LZS5CVJ.js";
|
|
45
|
+
import "../chunk-UZUHPHZC.js";
|
|
46
|
+
import "../chunk-EPIYC3LA.js";
|
|
47
47
|
import "../chunk-PFBYGCOW.js";
|
|
48
48
|
import "../chunk-BPFEGDC7.js";
|
|
49
49
|
import {
|
package/dist/index.js
CHANGED
|
@@ -48,16 +48,16 @@ import {
|
|
|
48
48
|
setWebSocketToolExecutor,
|
|
49
49
|
substituteArguments,
|
|
50
50
|
warmFileCache
|
|
51
|
-
} from "./chunk-
|
|
51
|
+
} from "./chunk-YHHCU4PX.js";
|
|
52
52
|
import "./chunk-BDQBOLYG.js";
|
|
53
|
-
import "./chunk-
|
|
53
|
+
import "./chunk-WBE7SQUB.js";
|
|
54
54
|
import "./chunk-GQGOWACU.js";
|
|
55
55
|
import "./chunk-LTLJRF6I.js";
|
|
56
|
-
import "./chunk-
|
|
57
|
-
import "./chunk-
|
|
56
|
+
import "./chunk-5LZS5CVJ.js";
|
|
57
|
+
import "./chunk-UZUHPHZC.js";
|
|
58
58
|
import {
|
|
59
59
|
OllamaBackend
|
|
60
|
-
} from "./chunk-
|
|
60
|
+
} from "./chunk-EPIYC3LA.js";
|
|
61
61
|
import "./chunk-PFBYGCOW.js";
|
|
62
62
|
import "./chunk-BPFEGDC7.js";
|
|
63
63
|
import {
|
|
@@ -67,7 +67,7 @@ import {
|
|
|
67
67
|
import {
|
|
68
68
|
checkForUpdate,
|
|
69
69
|
package_default
|
|
70
|
-
} from "./chunk-
|
|
70
|
+
} from "./chunk-C7N7VYXQ.js";
|
|
71
71
|
import {
|
|
72
72
|
selectActiveBackgroundAgents,
|
|
73
73
|
useCliStore
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@bike4mind/cli",
|
|
3
|
-
"version": "0.2.
|
|
3
|
+
"version": "0.2.63-fix-chunk-token-limit-overflow.21805+c654aa55d",
|
|
4
4
|
"type": "module",
|
|
5
5
|
"description": "Interactive CLI tool for Bike4Mind with ReAct agents",
|
|
6
6
|
"license": "UNLICENSED",
|
|
@@ -115,11 +115,11 @@
|
|
|
115
115
|
"zustand": "^4.5.4"
|
|
116
116
|
},
|
|
117
117
|
"devDependencies": {
|
|
118
|
-
"@bike4mind/agents": "0.4.
|
|
119
|
-
"@bike4mind/common": "2.75.
|
|
120
|
-
"@bike4mind/mcp": "1.33.
|
|
121
|
-
"@bike4mind/services": "2.68.
|
|
122
|
-
"@bike4mind/utils": "2.16.
|
|
118
|
+
"@bike4mind/agents": "0.4.1-fix-chunk-token-limit-overflow.21805+c654aa55d",
|
|
119
|
+
"@bike4mind/common": "2.75.1-fix-chunk-token-limit-overflow.21805+c654aa55d",
|
|
120
|
+
"@bike4mind/mcp": "1.33.21-fix-chunk-token-limit-overflow.21805+c654aa55d",
|
|
121
|
+
"@bike4mind/services": "2.68.3-fix-chunk-token-limit-overflow.21805+c654aa55d",
|
|
122
|
+
"@bike4mind/utils": "2.16.2-fix-chunk-token-limit-overflow.21805+c654aa55d",
|
|
123
123
|
"@types/better-sqlite3": "^7.6.13",
|
|
124
124
|
"@types/jsonwebtoken": "^9.0.4",
|
|
125
125
|
"@types/node": "^22.9.0",
|
|
@@ -136,5 +136,5 @@
|
|
|
136
136
|
"optionalDependencies": {
|
|
137
137
|
"@vscode/ripgrep": "^1.17.1"
|
|
138
138
|
},
|
|
139
|
-
"gitHead": "
|
|
139
|
+
"gitHead": "c654aa55d9c000f9b4c8a1cd92c6dbd5e0095ccc"
|
|
140
140
|
}
|