@bike4mind/cli 0.2.62 → 0.2.63-fix-chunk-token-limit-overflow.21805

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -2,7 +2,7 @@
2
2
  import {
3
3
  BadRequestError,
4
4
  secureParameters
5
- } from "./chunk-EO2Y5GFY.js";
5
+ } from "./chunk-EPIYC3LA.js";
6
6
  import {
7
7
  CompletionApiUsageTransaction,
8
8
  GenericCreditDeductTransaction,
@@ -3,7 +3,7 @@
3
3
  // package.json
4
4
  var package_default = {
5
5
  name: "@bike4mind/cli",
6
- version: "0.2.62",
6
+ version: "0.2.63-fix-chunk-token-limit-overflow.21805+c654aa55d",
7
7
  type: "module",
8
8
  description: "Interactive CLI tool for Bike4Mind with ReAct agents",
9
9
  license: "UNLICENSED",
@@ -118,11 +118,11 @@ var package_default = {
118
118
  zustand: "^4.5.4"
119
119
  },
120
120
  devDependencies: {
121
- "@bike4mind/agents": "0.4.0",
122
- "@bike4mind/common": "2.75.0",
123
- "@bike4mind/mcp": "1.33.20",
124
- "@bike4mind/services": "2.68.2",
125
- "@bike4mind/utils": "2.16.1",
121
+ "@bike4mind/agents": "0.4.1-fix-chunk-token-limit-overflow.21805+c654aa55d",
122
+ "@bike4mind/common": "2.75.1-fix-chunk-token-limit-overflow.21805+c654aa55d",
123
+ "@bike4mind/mcp": "1.33.21-fix-chunk-token-limit-overflow.21805+c654aa55d",
124
+ "@bike4mind/services": "2.68.3-fix-chunk-token-limit-overflow.21805+c654aa55d",
125
+ "@bike4mind/utils": "2.16.2-fix-chunk-token-limit-overflow.21805+c654aa55d",
126
126
  "@types/better-sqlite3": "^7.6.13",
127
127
  "@types/jsonwebtoken": "^9.0.4",
128
128
  "@types/node": "^22.9.0",
@@ -139,7 +139,7 @@ var package_default = {
139
139
  optionalDependencies: {
140
140
  "@vscode/ripgrep": "^1.17.1"
141
141
  },
142
- gitHead: "faae6cba90cc104961ee4dbef74dbe51d599b9dc"
142
+ gitHead: "c654aa55d9c000f9b4c8a1cd92c6dbd5e0095ccc"
143
143
  };
144
144
 
145
145
  // src/utils/updateChecker.ts
@@ -9747,18 +9747,24 @@ var SmartChunker = class {
9747
9747
  }
9748
9748
  this.logger.updateMetadata({ mimeType });
9749
9749
  this.logger.log(`Chunking file with type: ${mimeType}`);
9750
+ let chunks;
9750
9751
  switch (mimeType) {
9751
9752
  case SupportedFabFileMimeTypes.CSV:
9752
- return this.chunkCSV(content);
9753
+ chunks = await this.chunkCSV(content);
9754
+ break;
9753
9755
  case SupportedFabFileMimeTypes.PDF:
9754
- return this.chunkPDF(content);
9756
+ chunks = await this.chunkPDF(content);
9757
+ break;
9755
9758
  case SupportedFabFileMimeTypes.JSON:
9756
- return this.chunkJSON(content);
9759
+ chunks = await this.chunkJSON(content);
9760
+ break;
9757
9761
  case SupportedFabFileMimeTypes.DOCX:
9758
- return this.chunkDOCX(content);
9762
+ chunks = await this.chunkDOCX(content);
9763
+ break;
9759
9764
  case SupportedFabFileMimeTypes.XLS:
9760
9765
  case SupportedFabFileMimeTypes.XLSX:
9761
- return this.chunkExcel(content);
9766
+ chunks = await this.chunkExcel(content);
9767
+ break;
9762
9768
  case SupportedFabFileMimeTypes.PNG:
9763
9769
  case SupportedFabFileMimeTypes.JPG:
9764
9770
  case SupportedFabFileMimeTypes.WEBP:
@@ -9774,14 +9780,17 @@ var SmartChunker = class {
9774
9780
  case SupportedFabFileMimeTypes.RUBY:
9775
9781
  case SupportedFabFileMimeTypes.SH:
9776
9782
  case SupportedFabFileMimeTypes.BASH:
9777
- return this.chunkText(content.toString());
9783
+ chunks = await this.chunkText(content.toString());
9784
+ break;
9778
9785
  default:
9779
9786
  if (mimeType && mimeType.startsWith("text/")) {
9780
- return this.chunkText(content.toString());
9787
+ chunks = await this.chunkText(content.toString());
9788
+ break;
9781
9789
  }
9782
9790
  this.logger.error(`Unsupported file type: ${mimeType}`);
9783
9791
  return [];
9784
9792
  }
9793
+ return this.validateAndResplitChunks(chunks);
9785
9794
  }
9786
9795
  // Fetches the content of a file from storage as a Buffer
9787
9796
  async fetchFileContent(file) {
@@ -9995,6 +10004,19 @@ var SmartChunker = class {
9995
10004
  for (const word of words) {
9996
10005
  const wordWithSpace = word + " ";
9997
10006
  const wordTokens = await this.countTokens(wordWithSpace);
10007
+ if (wordTokens > this.chunkTokenLimit) {
10008
+ if (subChunk.trim().length > 0) {
10009
+ chunks.push({
10010
+ text: subChunk.trim(),
10011
+ tokenCount: subChunkTokens
10012
+ });
10013
+ subChunk = "";
10014
+ subChunkTokens = 0;
10015
+ }
10016
+ const wordChunks = await this.splitOversizedSegment(word);
10017
+ chunks.push(...wordChunks);
10018
+ continue;
10019
+ }
9998
10020
  if (subChunkTokens + wordTokens > this.chunkTokenLimit) {
9999
10021
  if (subChunk.trim().length > 0) {
10000
10022
  chunks.push({
@@ -10142,6 +10164,104 @@ var SmartChunker = class {
10142
10164
  console.log("Skipping image chunking as AI models can accept file image urls");
10143
10165
  return chunks;
10144
10166
  }
10167
+ /**
10168
+ * Encode text into token IDs. Uses tiktoken for OpenAI models,
10169
+ * falls back to character-based splitting for VoyageAI/Bedrock.
10170
+ */
10171
+ async encodeTokens(text) {
10172
+ if (isEmbeddingModel(this.model, OpenAIEmbeddingModel)) {
10173
+ await this.initializeEncoder();
10174
+ return Array.from(this.encoder.encode(text));
10175
+ }
10176
+ const charsPerToken = isEmbeddingModel(this.model, VoyageAIEmbeddingModel) ? 3.7 : 4;
10177
+ const groupSize = Math.max(1, Math.round(charsPerToken));
10178
+ const tokens = [];
10179
+ for (let i = 0; i < text.length; i += groupSize) {
10180
+ tokens.push(i);
10181
+ }
10182
+ return tokens;
10183
+ }
10184
+ /**
10185
+ * Decode token IDs back to text. Uses tiktoken for OpenAI models,
10186
+ * falls back to character-based reconstruction for VoyageAI/Bedrock.
10187
+ *
10188
+ * For non-OpenAI models, originalText is REQUIRED — the pseudo-token IDs from
10189
+ * encodeTokens() are character offsets, so decoding reconstructs by slicing
10190
+ * the original string. Returns '' if originalText is omitted for non-OpenAI models.
10191
+ *
10192
+ * Note: splitOversizedSegment() uses character slicing directly for non-OpenAI
10193
+ * models and does not call this method, so this is only used in the OpenAI path today.
10194
+ */
10195
+ async decodeTokens(tokens, originalText) {
10196
+ if (isEmbeddingModel(this.model, OpenAIEmbeddingModel)) {
10197
+ await this.initializeEncoder();
10198
+ const decoded = this.encoder.decode(new Uint32Array(tokens));
10199
+ if (typeof decoded === "string")
10200
+ return decoded;
10201
+ return new TextDecoder().decode(decoded);
10202
+ }
10203
+ if (!originalText)
10204
+ return "";
10205
+ const charsPerToken = isEmbeddingModel(this.model, VoyageAIEmbeddingModel) ? 3.7 : 4;
10206
+ const groupSize = Math.max(1, Math.round(charsPerToken));
10207
+ const startIdx = tokens[0] ?? 0;
10208
+ const endIdx = (tokens[tokens.length - 1] ?? 0) + groupSize;
10209
+ return originalText.slice(startIdx, Math.min(endIdx, originalText.length));
10210
+ }
10211
+ /**
10212
+ * Split an oversized text segment using encode-slice-decode for guaranteed correct splitting.
10213
+ * Works with any model: tiktoken for OpenAI, character-based for others.
10214
+ */
10215
+ async splitOversizedSegment(text) {
10216
+ const chunks = [];
10217
+ if (isEmbeddingModel(this.model, OpenAIEmbeddingModel)) {
10218
+ const encoded = await this.encodeTokens(text);
10219
+ for (let j = 0; j < encoded.length; j += this.chunkTokenLimit) {
10220
+ const segmentTokens = encoded.slice(j, j + this.chunkTokenLimit);
10221
+ const segment = await this.decodeTokens(segmentTokens);
10222
+ if (segment.trim().length > 0) {
10223
+ chunks.push({ text: segment, tokenCount: segmentTokens.length });
10224
+ }
10225
+ }
10226
+ } else {
10227
+ const charsPerToken = isEmbeddingModel(this.model, VoyageAIEmbeddingModel) ? 3.7 : 4;
10228
+ const charsPerChunk = Math.floor(this.chunkTokenLimit * charsPerToken);
10229
+ for (let j = 0; j < text.length; j += charsPerChunk) {
10230
+ const segment = text.slice(j, j + charsPerChunk);
10231
+ if (segment.trim().length > 0) {
10232
+ const tokenCount = await this.countTokens(segment);
10233
+ chunks.push({ text: segment, tokenCount });
10234
+ }
10235
+ }
10236
+ }
10237
+ return chunks;
10238
+ }
10239
+ /**
10240
+ * Post-chunking validation: re-split any chunks that still exceed the token limit.
10241
+ * Bounded to max 3 passes to prevent infinite loops.
10242
+ */
10243
+ async validateAndResplitChunks(chunks) {
10244
+ let result = chunks;
10245
+ for (let pass = 0; pass < 3; pass++) {
10246
+ let allValid = true;
10247
+ const validated = [];
10248
+ for (const chunk of result) {
10249
+ const actualTokens = await this.countTokens(chunk.text);
10250
+ if (actualTokens > this.chunkTokenLimit) {
10251
+ allValid = false;
10252
+ this.logger.warn(`Chunk exceeds limit (${actualTokens} > ${this.chunkTokenLimit}), re-splitting (pass ${pass + 1})`);
10253
+ const resplit = await this.splitOversizedSegment(chunk.text);
10254
+ validated.push(...resplit);
10255
+ } else {
10256
+ validated.push({ ...chunk, tokenCount: actualTokens });
10257
+ }
10258
+ }
10259
+ result = validated;
10260
+ if (allValid)
10261
+ break;
10262
+ }
10263
+ return result.filter((c) => c.text.trim().length > 0);
10264
+ }
10145
10265
  // Counts the number of tokens in the given text using the appropriate tokenization method
10146
10266
  async countTokens(text) {
10147
10267
  if (isEmbeddingModel(this.model, OpenAIEmbeddingModel)) {
@@ -7,7 +7,7 @@ import {
7
7
  getSettingsMap,
8
8
  getSettingsValue,
9
9
  secureParameters
10
- } from "./chunk-EO2Y5GFY.js";
10
+ } from "./chunk-EPIYC3LA.js";
11
11
  import {
12
12
  KnowledgeType,
13
13
  SupportedFabFileMimeTypes
@@ -6,7 +6,7 @@ import {
6
6
  getSettingsByNames,
7
7
  obfuscateApiKey,
8
8
  secureParameters
9
- } from "./chunk-EO2Y5GFY.js";
9
+ } from "./chunk-EPIYC3LA.js";
10
10
  import {
11
11
  ApiKeyType,
12
12
  MementoTier,
@@ -4,7 +4,7 @@ import {
4
4
  getOpenWeatherKey,
5
5
  getSerperKey,
6
6
  getWolframAlphaKey
7
- } from "./chunk-2J4HB7EB.js";
7
+ } from "./chunk-WBE7SQUB.js";
8
8
  import {
9
9
  assertPathAllowed,
10
10
  isPathAllowed
@@ -20,7 +20,7 @@ import {
20
20
  OpenAIBackend,
21
21
  OpenAIImageService,
22
22
  XAIImageService
23
- } from "./chunk-EO2Y5GFY.js";
23
+ } from "./chunk-EPIYC3LA.js";
24
24
  import {
25
25
  Logger
26
26
  } from "./chunk-PFBYGCOW.js";
@@ -3,7 +3,7 @@ import {
3
3
  fetchLatestVersion,
4
4
  forceCheckForUpdate,
5
5
  package_default
6
- } from "../chunk-VJLPCIK2.js";
6
+ } from "../chunk-C7N7VYXQ.js";
7
7
 
8
8
  // src/commands/doctorCommand.ts
9
9
  import { execSync } from "child_process";
@@ -36,14 +36,14 @@ import {
36
36
  isReadOnlyTool,
37
37
  loadContextFiles,
38
38
  setWebSocketToolExecutor
39
- } from "../chunk-MNBT2VFX.js";
39
+ } from "../chunk-YHHCU4PX.js";
40
40
  import "../chunk-BDQBOLYG.js";
41
- import "../chunk-2J4HB7EB.js";
41
+ import "../chunk-WBE7SQUB.js";
42
42
  import "../chunk-GQGOWACU.js";
43
43
  import "../chunk-LTLJRF6I.js";
44
- import "../chunk-PJFESKK6.js";
45
- import "../chunk-Y7K4HI6L.js";
46
- import "../chunk-EO2Y5GFY.js";
44
+ import "../chunk-5LZS5CVJ.js";
45
+ import "../chunk-UZUHPHZC.js";
46
+ import "../chunk-EPIYC3LA.js";
47
47
  import "../chunk-PFBYGCOW.js";
48
48
  import "../chunk-BPFEGDC7.js";
49
49
  import {
@@ -2,7 +2,7 @@
2
2
  import {
3
3
  forceCheckForUpdate,
4
4
  package_default
5
- } from "../chunk-VJLPCIK2.js";
5
+ } from "../chunk-C7N7VYXQ.js";
6
6
 
7
7
  // src/commands/updateCommand.ts
8
8
  import { execSync } from "child_process";
@@ -2,8 +2,8 @@
2
2
  import {
3
3
  createFabFile,
4
4
  createFabFileSchema
5
- } from "./chunk-Y7K4HI6L.js";
6
- import "./chunk-EO2Y5GFY.js";
5
+ } from "./chunk-UZUHPHZC.js";
6
+ import "./chunk-EPIYC3LA.js";
7
7
  import "./chunk-PFBYGCOW.js";
8
8
  import "./chunk-JW3JRHH7.js";
9
9
  export {
package/dist/index.js CHANGED
@@ -48,16 +48,16 @@ import {
48
48
  setWebSocketToolExecutor,
49
49
  substituteArguments,
50
50
  warmFileCache
51
- } from "./chunk-MNBT2VFX.js";
51
+ } from "./chunk-YHHCU4PX.js";
52
52
  import "./chunk-BDQBOLYG.js";
53
- import "./chunk-2J4HB7EB.js";
53
+ import "./chunk-WBE7SQUB.js";
54
54
  import "./chunk-GQGOWACU.js";
55
55
  import "./chunk-LTLJRF6I.js";
56
- import "./chunk-PJFESKK6.js";
57
- import "./chunk-Y7K4HI6L.js";
56
+ import "./chunk-5LZS5CVJ.js";
57
+ import "./chunk-UZUHPHZC.js";
58
58
  import {
59
59
  OllamaBackend
60
- } from "./chunk-EO2Y5GFY.js";
60
+ } from "./chunk-EPIYC3LA.js";
61
61
  import "./chunk-PFBYGCOW.js";
62
62
  import "./chunk-BPFEGDC7.js";
63
63
  import {
@@ -67,7 +67,7 @@ import {
67
67
  import {
68
68
  checkForUpdate,
69
69
  package_default
70
- } from "./chunk-VJLPCIK2.js";
70
+ } from "./chunk-C7N7VYXQ.js";
71
71
  import {
72
72
  selectActiveBackgroundAgents,
73
73
  useCliStore
@@ -2,8 +2,8 @@
2
2
  import {
3
3
  findMostSimilarMemento,
4
4
  getRelevantMementos
5
- } from "./chunk-2J4HB7EB.js";
6
- import "./chunk-EO2Y5GFY.js";
5
+ } from "./chunk-WBE7SQUB.js";
6
+ import "./chunk-EPIYC3LA.js";
7
7
  import "./chunk-PFBYGCOW.js";
8
8
  import "./chunk-JW3JRHH7.js";
9
9
  export {
@@ -146,7 +146,7 @@ import {
146
146
  validateUrlForFetch,
147
147
  warmUpSettingsCache,
148
148
  withRetry
149
- } from "./chunk-EO2Y5GFY.js";
149
+ } from "./chunk-EPIYC3LA.js";
150
150
  import {
151
151
  Logger,
152
152
  NotificationDeduplicator,
@@ -2,8 +2,8 @@
2
2
  import {
3
3
  SubtractCreditsSchema,
4
4
  subtractCredits
5
- } from "./chunk-PJFESKK6.js";
6
- import "./chunk-EO2Y5GFY.js";
5
+ } from "./chunk-5LZS5CVJ.js";
6
+ import "./chunk-EPIYC3LA.js";
7
7
  import "./chunk-PFBYGCOW.js";
8
8
  import "./chunk-JW3JRHH7.js";
9
9
  export {
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@bike4mind/cli",
3
- "version": "0.2.62",
3
+ "version": "0.2.63-fix-chunk-token-limit-overflow.21805+c654aa55d",
4
4
  "type": "module",
5
5
  "description": "Interactive CLI tool for Bike4Mind with ReAct agents",
6
6
  "license": "UNLICENSED",
@@ -115,11 +115,11 @@
115
115
  "zustand": "^4.5.4"
116
116
  },
117
117
  "devDependencies": {
118
- "@bike4mind/agents": "0.4.0",
119
- "@bike4mind/common": "2.75.0",
120
- "@bike4mind/mcp": "1.33.20",
121
- "@bike4mind/services": "2.68.2",
122
- "@bike4mind/utils": "2.16.1",
118
+ "@bike4mind/agents": "0.4.1-fix-chunk-token-limit-overflow.21805+c654aa55d",
119
+ "@bike4mind/common": "2.75.1-fix-chunk-token-limit-overflow.21805+c654aa55d",
120
+ "@bike4mind/mcp": "1.33.21-fix-chunk-token-limit-overflow.21805+c654aa55d",
121
+ "@bike4mind/services": "2.68.3-fix-chunk-token-limit-overflow.21805+c654aa55d",
122
+ "@bike4mind/utils": "2.16.2-fix-chunk-token-limit-overflow.21805+c654aa55d",
123
123
  "@types/better-sqlite3": "^7.6.13",
124
124
  "@types/jsonwebtoken": "^9.0.4",
125
125
  "@types/node": "^22.9.0",
@@ -136,5 +136,5 @@
136
136
  "optionalDependencies": {
137
137
  "@vscode/ripgrep": "^1.17.1"
138
138
  },
139
- "gitHead": "faae6cba90cc104961ee4dbef74dbe51d599b9dc"
139
+ "gitHead": "c654aa55d9c000f9b4c8a1cd92c6dbd5e0095ccc"
140
140
  }