@vivantel/virage-core 0.2.5 → 0.2.6

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1 +1 @@
1
- {"version":3,"file":"markdown-headers.d.ts","sourceRoot":"","sources":["../../../src/strategies/chunk/markdown-headers.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,aAAa,EAAS,MAAM,2BAA2B,CAAC;AAIjE,MAAM,WAAW,sBAAsB;IACrC,YAAY,CAAC,EAAE,MAAM,CAAC;IACtB,YAAY,CAAC,EAAE,MAAM,CAAC;CACvB;AAED,wBAAgB,uBAAuB,CACrC,OAAO,GAAE,sBAA2B,GACnC,aAAa,CAoGf"}
1
+ {"version":3,"file":"markdown-headers.d.ts","sourceRoot":"","sources":["../../../src/strategies/chunk/markdown-headers.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,aAAa,EAAS,MAAM,2BAA2B,CAAC;AAIjE,MAAM,WAAW,sBAAsB;IACrC,YAAY,CAAC,EAAE,MAAM,CAAC;IACtB,YAAY,CAAC,EAAE,MAAM,CAAC;CACvB;AAED,wBAAgB,uBAAuB,CACrC,OAAO,GAAE,sBAA2B,GACnC,aAAa,CAuGf"}
@@ -6,7 +6,8 @@ export function markdownHeadersStrategy(options = {}) {
6
6
  name: "markdown-headers",
7
7
  async chunk(text, filePath) {
8
8
  const chunks = [];
9
- const lines = text.split("\n");
9
+ // Normalise line endings so CRLF files don't leave \r on header text/content.
10
+ const lines = text.split("\n").map((l) => l.replace(/\r$/, ""));
10
11
  let currentChunk = [];
11
12
  let currentHeader = "";
12
13
  let currentHeaderLevel = 0;
@@ -48,12 +49,14 @@ export function markdownHeadersStrategy(options = {}) {
48
49
  strategy: this.name,
49
50
  header: currentHeader,
50
51
  header_level: currentHeaderLevel,
52
+ source_file: filePath,
51
53
  truncated: true,
52
54
  },
53
55
  sourceFile: filePath || "unknown",
54
56
  commitHash: "",
55
57
  });
56
- currentChunk = [];
58
+ // Keep the header line so the continuation chunk is self-contained.
59
+ currentChunk = currentChunk.length > 0 ? [currentChunk[0]] : [];
57
60
  }
58
61
  }
59
62
  // Last chunk
@@ -1 +1 @@
1
- {"version":3,"file":"markdown-headers.js","sourceRoot":"","sources":["../../../src/strategies/chunk/markdown-headers.ts"],"names":[],"mappings":"AAEA,OAAO,EAAE,0BAA0B,EAAE,MAAM,sBAAsB,CAAC;AAOlE,MAAM,UAAU,uBAAuB,CACrC,UAAkC,EAAE;IAEpC,MAAM,YAAY,GAAG,OAAO,CAAC,YAAY,IAAI,GAAG,CAAC;IACjD,MAAM,YAAY,GAAG,OAAO,CAAC,YAAY,IAAI,IAAI,CAAC;IAElD,OAAO;QACL,IAAI,EAAE,kBAAkB;QAExB,KAAK,CAAC,KAAK,CAAC,IAAY,EAAE,QAAiB;YACzC,MAAM,MAAM,GAAY,EAAE,CAAC;YAC3B,MAAM,KAAK,GAAG,IAAI,CAAC,KAAK,CAAC,IAAI,CAAC,CAAC;YAE/B,IAAI,YAAY,GAAa,EAAE,CAAC;YAChC,IAAI,aAAa,GAAG,EAAE,CAAC;YACvB,IAAI,kBAAkB,GAAG,CAAC,CAAC;YAE3B,KAAK,MAAM,IAAI,IAAI,KAAK,EAAE,CAAC;gBACzB,MAAM,WAAW,GAAG,IAAI,CAAC,KAAK,CAAC,mBAAmB,CAAC,CAAC;gBAEpD,IAAI,WAAW,EAAE,CAAC;oBAChB,mCAAmC;oBACnC,IAAI,YAAY,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;wBAC5B,MAAM,OAAO,GAAG,YAAY,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC,IAAI,EAAE,CAAC;wBAC/C,IAAI,OAAO,CAAC,MAAM,IAAI,YAAY,EAAE,CAAC;4BACnC,MAAM,CAAC,IAAI,CAAC;gCACV,OAAO;gCACP,QAAQ,EAAE;oCACR,QAAQ,EAAE,IAAI,CAAC,IAAI;oCACnB,MAAM,EAAE,aAAa;oCACrB,YAAY,EAAE,kBAAkB;oCAChC,WAAW,EAAE,QAAQ;iCACtB;gCACD,UAAU,EAAE,QAAQ,IAAI,SAAS;gCACjC,UAAU,EAAE,EAAE;6BACf,CAAC,CAAC;wBACL,CAAC;oBACH,CAAC;oBAED,kBAAkB;oBAClB,kBAAkB,GAAG,WAAW,CAAC,CAAC,CAAC,CAAC,MAAM,CAAC;oBAC3C,aAAa,GAAG,WAAW,CAAC,CAAC,CAAC,CAAC;oBAC/B,YAAY,GAAG,CAAC,IAAI,CAAC,CAAC;gBACxB,CAAC;qBAAM,CAAC;oBACN,YAAY,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;gBAC1B,CAAC;gBAED,wCAAwC;gBACxC,MAAM,WAAW,GAAG,YAAY,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC,MAAM,CAAC;gBACnD,IAAI,WAAW,GAAG,YAAY,IAAI,YAAY,CAAC,MAAM,GAAG,EAAE,EAAE,CAAC;oBAC3D,MAAM,OAAO,GAAG,YAAY,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC,IAAI,EAAE,CAAC;oBAC/C,MAAM,CAAC,IAAI,CAAC;wBACV,OAAO;wBACP,QAAQ,EAAE;4BACR,QAAQ,EAAE,IAAI,CAAC,IAAI;4BACnB,MAAM,EAAE,aAAa;4BACrB,YAAY,EAAE,kBAAkB;4BAChC,SAAS,EAAE,IAAI;yBAChB;wBACD,UAAU,EAAE,QAAQ,IAAI,SAAS;wBACjC,UAAU,EAAE,EAAE;qBACf,CAAC,CAAC;oBACH,YAAY,GAAG,EAAE,CAAC;gBACpB,CAAC;YACH,CAAC;YAED,aAAa;YACb,IAAI,YAAY,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;gBAC5B,MAAM,OAAO,GAAG,YAAY,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC,IAAI,EAAE,CAAC;gBAC/C,IAAI,OAAO,CAAC,MAAM,IAAI,YAAY,EAAE,CAAC;oBACnC,MAAM,CAAC,IAAI,CAAC;wBACV,OAAO;wBACP,QAAQ,EAAE;4BACR,QAAQ,EAAE,IAAI,CAAC,IAAI;4BACnB,MAAM,EAAE,aAAa;4BACrB,YAAY,EAAE,kBAAkB;4BAChC,WAAW,EAAE,QAAQ;4BACrB,OAAO,EAAE,IAAI;yBACd;wBACD,UAAU,EAAE,QAAQ,IAAI,SAAS;wBACjC,UAAU,EAAE,EAAE;qBACf,CAAC,CAAC;gBACL,CAAC;YACH,CAAC;YAED,OAAO,MAAM,CAAC;QAChB,CAAC;QAED,eAAe,CAAC,IAAY,EAAE,SAAkB;YAC9C,MAAM,WAAW,GAAG,IAAI,CAAC,KAAK,CAAC,oBAAoB,CAAC,CAAC;YACrD,OAAO;gBACL,QAAQ,EAAE,IAAI,CAAC,IAAI;gBACnB,WAAW,EAAE,CAAC,CAAC,WAAW;gBAC1B,YAAY,EAAE,WAAW,EAAE,CAAC,CAAC,CAAC;gBAC9B,UAAU,EAAE,IAAI,CAAC,KAAK,CAAC,IAAI,CAAC,CAAC,MAAM;aACpC,CAAC;QACJ,CAAC;QAED,iBAAiB,CAAC,MAAe;YAC/B,OAAO,0BAA0B,CAAC,MAAM,CAAC,CAAC;QAC5C,CAAC;KACF,CAAC;AACJ,CAAC"}
1
+ {"version":3,"file":"markdown-headers.js","sourceRoot":"","sources":["../../../src/strategies/chunk/markdown-headers.ts"],"names":[],"mappings":"AAEA,OAAO,EAAE,0BAA0B,EAAE,MAAM,sBAAsB,CAAC;AAOlE,MAAM,UAAU,uBAAuB,CACrC,UAAkC,EAAE;IAEpC,MAAM,YAAY,GAAG,OAAO,CAAC,YAAY,IAAI,GAAG,CAAC;IACjD,MAAM,YAAY,GAAG,OAAO,CAAC,YAAY,IAAI,IAAI,CAAC;IAElD,OAAO;QACL,IAAI,EAAE,kBAAkB;QAExB,KAAK,CAAC,KAAK,CAAC,IAAY,EAAE,QAAiB;YACzC,MAAM,MAAM,GAAY,EAAE,CAAC;YAC3B,8EAA8E;YAC9E,MAAM,KAAK,GAAG,IAAI,CAAC,KAAK,CAAC,IAAI,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,OAAO,CAAC,KAAK,EAAE,EAAE,CAAC,CAAC,CAAC;YAEhE,IAAI,YAAY,GAAa,EAAE,CAAC;YAChC,IAAI,aAAa,GAAG,EAAE,CAAC;YACvB,IAAI,kBAAkB,GAAG,CAAC,CAAC;YAE3B,KAAK,MAAM,IAAI,IAAI,KAAK,EAAE,CAAC;gBACzB,MAAM,WAAW,GAAG,IAAI,CAAC,KAAK,CAAC,mBAAmB,CAAC,CAAC;gBAEpD,IAAI,WAAW,EAAE,CAAC;oBAChB,mCAAmC;oBACnC,IAAI,YAAY,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;wBAC5B,MAAM,OAAO,GAAG,YAAY,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC,IAAI,EAAE,CAAC;wBAC/C,IAAI,OAAO,CAAC,MAAM,IAAI,YAAY,EAAE,CAAC;4BACnC,MAAM,CAAC,IAAI,CAAC;gCACV,OAAO;gCACP,QAAQ,EAAE;oCACR,QAAQ,EAAE,IAAI,CAAC,IAAI;oCACnB,MAAM,EAAE,aAAa;oCACrB,YAAY,EAAE,kBAAkB;oCAChC,WAAW,EAAE,QAAQ;iCACtB;gCACD,UAAU,EAAE,QAAQ,IAAI,SAAS;gCACjC,UAAU,EAAE,EAAE;6BACf,CAAC,CAAC;wBACL,CAAC;oBACH,CAAC;oBAED,kBAAkB;oBAClB,kBAAkB,GAAG,WAAW,CAAC,CAAC,CAAC,CAAC,MAAM,CAAC;oBAC3C,aAAa,GAAG,WAAW,CAAC,CAAC,CAAC,CAAC;oBAC/B,YAAY,GAAG,CAAC,IAAI,CAAC,CAAC;gBACxB,CAAC;qBAAM,CAAC;oBACN,YAAY,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;gBAC1B,CAAC;gBAED,wCAAwC;gBACxC,MAAM,WAAW,GAAG,YAAY,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC,MAAM,CAAC;gBACnD,IAAI,WAAW,GAAG,YAAY,IAAI,YAAY,CAAC,MAAM,GAAG,EAAE,EAAE,CAAC;oBAC3D,MAAM,OAAO,GAAG,YAAY,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC,IAAI,EAAE,CAAC;oBAC/C,MAAM,CAAC,IAAI,CAAC;wBACV,OAAO;wBACP,QAAQ,EAAE;4BACR,QAAQ,EAAE,IAAI,CAAC,IAAI;4BACnB,MAAM,EAAE,aAAa;4BACrB,YAAY,EAAE,kBAAkB;4BAChC,WAAW,EAAE,QAAQ;4BACrB,SAAS,EAAE,IAAI;yBAChB;wBACD,UAAU,EAAE,QAAQ,IAAI,SAAS;wBACjC,UAAU,EAAE,EAAE;qBACf,CAAC,CAAC;oBACH,oEAAoE;oBACpE,YAAY,GAAG,YAAY,CAAC,MAAM,GAAG,CAAC,CAAC,CAAC,CAAC,CAAC,YAAY,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,EAAE,CAAC;gBAClE,CAAC;YACH,CAAC;YAED,aAAa;YACb,IAAI,YAAY,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;gBAC5B,MAAM,OAAO,GAAG,YAAY,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC,IAAI,EAAE,CAAC;gBAC/C,IAAI,OAAO,CAAC,MAAM,IAAI,YAAY,EAAE,CAAC;oBACnC,MAAM,CAAC,IAAI,CAAC;wBACV,OAAO;wBACP,QAAQ,EAAE;4BACR,QAAQ,EAAE,IAAI,CAAC,IAAI;4BACnB,MAAM,EAAE,aAAa;4BACrB,YAAY,EAAE,kBAAkB;4BAChC,WAAW,EAAE,QAAQ;4BACrB,OAAO,EAAE,IAAI;yBACd;wBACD,UAAU,EAAE,QAAQ,IAAI,SAAS;wBACjC,UAAU,EAAE,EAAE;qBACf,CAAC,CAAC;gBACL,CAAC;YACH,CAAC;YAED,OAAO,MAAM,CAAC;QAChB,CAAC;QAED,eAAe,CAAC,IAAY,EAAE,SAAkB;YAC9C,MAAM,WAAW,GAAG,IAAI,CAAC,KAAK,CAAC,oBAAoB,CAAC,CAAC;YACrD,OAAO;gBACL,QAAQ,EAAE,IAAI,CAAC,IAAI;gBACnB,WAAW,EAAE,CAAC,CAAC,WAAW;gBAC1B,YAAY,EAAE,WAAW,EAAE,CAAC,CAAC,CAAC;gBAC9B,UAAU,EAAE,IAAI,CAAC,KAAK,CAAC,IAAI,CAAC,CAAC,MAAM;aACpC,CAAC;QACJ,CAAC;QAED,iBAAiB,CAAC,MAAe;YAC/B,OAAO,0BAA0B,CAAC,MAAM,CAAC,CAAC;QAC5C,CAAC;KACF,CAAC;AACJ,CAAC"}
@@ -1 +1 @@
1
- {"version":3,"file":"semantic.d.ts","sourceRoot":"","sources":["../../../src/strategies/chunk/semantic.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,aAAa,EAAS,MAAM,2BAA2B,CAAC;AAIjE,MAAM,WAAW,uBAAuB;IACtC,QAAQ,CAAC,EAAE,MAAM,CAAC;IAClB,QAAQ,CAAC,EAAE,MAAM,CAAC;CACnB;AAED,wBAAgB,gBAAgB,CAC9B,OAAO,GAAE,uBAA4B,GACpC,aAAa,CA0Ef"}
1
+ {"version":3,"file":"semantic.d.ts","sourceRoot":"","sources":["../../../src/strategies/chunk/semantic.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,aAAa,EAAS,MAAM,2BAA2B,CAAC;AAIjE,MAAM,WAAW,uBAAuB;IACtC,QAAQ,CAAC,EAAE,MAAM,CAAC;IAClB,QAAQ,CAAC,EAAE,MAAM,CAAC;CACnB;AAED,wBAAgB,gBAAgB,CAC9B,OAAO,GAAE,uBAA4B,GACpC,aAAa,CA+Ef"}
@@ -32,7 +32,7 @@ export function semanticStrategy(options = {}) {
32
32
  currentChunk.push(sentence);
33
33
  currentSize += sentenceSize;
34
34
  }
35
- // Last chunk
35
+ // Last chunk: append to previous if too small, otherwise save as its own chunk.
36
36
  if (currentChunk.length > 0) {
37
37
  const content = currentChunk.join(" ").trim();
38
38
  if (content.length >= minChars) {
@@ -48,6 +48,12 @@ export function semanticStrategy(options = {}) {
48
48
  commitHash: "",
49
49
  });
50
50
  }
51
+ else if (chunks.length > 0) {
52
+ // Merge into the previous chunk rather than silently dropping it.
53
+ const prev = chunks[chunks.length - 1];
54
+ prev.content = `${prev.content} ${content}`.trim();
55
+ prev.metadata.is_last = true;
56
+ }
51
57
  }
52
58
  return chunks;
53
59
  },
@@ -1 +1 @@
1
- {"version":3,"file":"semantic.js","sourceRoot":"","sources":["../../../src/strategies/chunk/semantic.ts"],"names":[],"mappings":"AAEA,OAAO,EAAE,0BAA0B,EAAE,MAAM,sBAAsB,CAAC;AAOlE,MAAM,UAAU,gBAAgB,CAC9B,UAAmC,EAAE;IAErC,MAAM,QAAQ,GAAG,OAAO,CAAC,QAAQ,IAAI,IAAI,CAAC;IAC1C,MAAM,QAAQ,GAAG,OAAO,CAAC,QAAQ,IAAI,GAAG,CAAC;IAEzC,OAAO;QACL,IAAI,EAAE,UAAU;QAEhB,KAAK,CAAC,KAAK,CAAC,IAAY,EAAE,QAAiB;YACzC,MAAM,MAAM,GAAY,EAAE,CAAC;YAE3B,uCAAuC;YACvC,MAAM,SAAS,GAAG,IAAI,CAAC,KAAK,CAAC,eAAe,CAAC,CAAC;YAE9C,IAAI,YAAY,GAAa,EAAE,CAAC;YAChC,IAAI,WAAW,GAAG,CAAC,CAAC;YAEpB,KAAK,MAAM,QAAQ,IAAI,SAAS,EAAE,CAAC;gBACjC,MAAM,YAAY,GAAG,QAAQ,CAAC,MAAM,CAAC;gBAErC,IAAI,WAAW,GAAG,YAAY,GAAG,QAAQ,IAAI,YAAY,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;oBACrE,MAAM,OAAO,GAAG,YAAY,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC,IAAI,EAAE,CAAC;oBAC9C,IAAI,OAAO,CAAC,MAAM,IAAI,QAAQ,EAAE,CAAC;wBAC/B,MAAM,CAAC,IAAI,CAAC;4BACV,OAAO;4BACP,QAAQ,EAAE;gCACR,QAAQ,EAAE,IAAI,CAAC,IAAI;gCACnB,cAAc,EAAE,YAAY,CAAC,MAAM;gCACnC,WAAW,EAAE,QAAQ;6BACtB;4BACD,UAAU,EAAE,QAAQ,IAAI,SAAS;4BACjC,UAAU,EAAE,EAAE;yBACf,CAAC,CAAC;oBACL,CAAC;oBACD,YAAY,GAAG,EAAE,CAAC;oBAClB,WAAW,GAAG,CAAC,CAAC;gBAClB,CAAC;gBAED,YAAY,CAAC,IAAI,CAAC,QAAQ,CAAC,CAAC;gBAC5B,WAAW,IAAI,YAAY,CAAC;YAC9B,CAAC;YAED,aAAa;YACb,IAAI,YAAY,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;gBAC5B,MAAM,OAAO,GAAG,YAAY,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC,IAAI,EAAE,CAAC;gBAC9C,IAAI,OAAO,CAAC,MAAM,IAAI,QAAQ,EAAE,CAAC;oBAC/B,MAAM,CAAC,IAAI,CAAC;wBACV,OAAO;wBACP,QAAQ,EAAE;4BACR,QAAQ,EAAE,IAAI,CAAC,IAAI;4BACnB,cAAc,EAAE,YAAY,CAAC,MAAM;4BACnC,WAAW,EAAE,QAAQ;4BACrB,OAAO,EAAE,IAAI;yBACd;wBACD,UAAU,EAAE,QAAQ,IAAI,SAAS;wBACjC,UAAU,EAAE,EAAE;qBACf,CAAC,CAAC;gBACL,CAAC;YACH,CAAC;YAED,OAAO,MAAM,CAAC;QAChB,CAAC;QAED,eAAe,CAAC,IAAY,EAAE,SAAkB;YAC9C,OAAO;gBACL,QAAQ,EAAE,IAAI,CAAC,IAAI;gBACnB,cAAc,EAAE,IAAI,CAAC,KAAK,CAAC,QAAQ,CAAC,CAAC,MAAM;gBAC3C,UAAU,EAAE,IAAI,CAAC,MAAM;aACxB,CAAC;QACJ,CAAC;QAED,iBAAiB,CAAC,MAAe;YAC/B,OAAO,0BAA0B,CAAC,MAAM,CAAC,CAAC;QAC5C,CAAC;KACF,CAAC;AACJ,CAAC"}
1
+ {"version":3,"file":"semantic.js","sourceRoot":"","sources":["../../../src/strategies/chunk/semantic.ts"],"names":[],"mappings":"AAEA,OAAO,EAAE,0BAA0B,EAAE,MAAM,sBAAsB,CAAC;AAOlE,MAAM,UAAU,gBAAgB,CAC9B,UAAmC,EAAE;IAErC,MAAM,QAAQ,GAAG,OAAO,CAAC,QAAQ,IAAI,IAAI,CAAC;IAC1C,MAAM,QAAQ,GAAG,OAAO,CAAC,QAAQ,IAAI,GAAG,CAAC;IAEzC,OAAO;QACL,IAAI,EAAE,UAAU;QAEhB,KAAK,CAAC,KAAK,CAAC,IAAY,EAAE,QAAiB;YACzC,MAAM,MAAM,GAAY,EAAE,CAAC;YAE3B,uCAAuC;YACvC,MAAM,SAAS,GAAG,IAAI,CAAC,KAAK,CAAC,eAAe,CAAC,CAAC;YAE9C,IAAI,YAAY,GAAa,EAAE,CAAC;YAChC,IAAI,WAAW,GAAG,CAAC,CAAC;YAEpB,KAAK,MAAM,QAAQ,IAAI,SAAS,EAAE,CAAC;gBACjC,MAAM,YAAY,GAAG,QAAQ,CAAC,MAAM,CAAC;gBAErC,IAAI,WAAW,GAAG,YAAY,GAAG,QAAQ,IAAI,YAAY,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;oBACrE,MAAM,OAAO,GAAG,YAAY,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC,IAAI,EAAE,CAAC;oBAC9C,IAAI,OAAO,CAAC,MAAM,IAAI,QAAQ,EAAE,CAAC;wBAC/B,MAAM,CAAC,IAAI,CAAC;4BACV,OAAO;4BACP,QAAQ,EAAE;gCACR,QAAQ,EAAE,IAAI,CAAC,IAAI;gCACnB,cAAc,EAAE,YAAY,CAAC,MAAM;gCACnC,WAAW,EAAE,QAAQ;6BACtB;4BACD,UAAU,EAAE,QAAQ,IAAI,SAAS;4BACjC,UAAU,EAAE,EAAE;yBACf,CAAC,CAAC;oBACL,CAAC;oBACD,YAAY,GAAG,EAAE,CAAC;oBAClB,WAAW,GAAG,CAAC,CAAC;gBAClB,CAAC;gBAED,YAAY,CAAC,IAAI,CAAC,QAAQ,CAAC,CAAC;gBAC5B,WAAW,IAAI,YAAY,CAAC;YAC9B,CAAC;YAED,gFAAgF;YAChF,IAAI,YAAY,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;gBAC5B,MAAM,OAAO,GAAG,YAAY,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC,IAAI,EAAE,CAAC;gBAC9C,IAAI,OAAO,CAAC,MAAM,IAAI,QAAQ,EAAE,CAAC;oBAC/B,MAAM,CAAC,IAAI,CAAC;wBACV,OAAO;wBACP,QAAQ,EAAE;4BACR,QAAQ,EAAE,IAAI,CAAC,IAAI;4BACnB,cAAc,EAAE,YAAY,CAAC,MAAM;4BACnC,WAAW,EAAE,QAAQ;4BACrB,OAAO,EAAE,IAAI;yBACd;wBACD,UAAU,EAAE,QAAQ,IAAI,SAAS;wBACjC,UAAU,EAAE,EAAE;qBACf,CAAC,CAAC;gBACL,CAAC;qBAAM,IAAI,MAAM,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;oBAC7B,kEAAkE;oBAClE,MAAM,IAAI,GAAG,MAAM,CAAC,MAAM,CAAC,MAAM,GAAG,CAAC,CAAC,CAAC;oBACvC,IAAI,CAAC,OAAO,GAAG,GAAG,IAAI,CAAC,OAAO,IAAI,OAAO,EAAE,CAAC,IAAI,EAAE,CAAC;oBACnD,IAAI,CAAC,QAAQ,CAAC,OAAO,GAAG,IAAI,CAAC;gBAC/B,CAAC;YACH,CAAC;YAED,OAAO,MAAM,CAAC;QAChB,CAAC;QAED,eAAe,CAAC,IAAY,EAAE,SAAkB;YAC9C,OAAO;gBACL,QAAQ,EAAE,IAAI,CAAC,IAAI;gBACnB,cAAc,EAAE,IAAI,CAAC,KAAK,CAAC,QAAQ,CAAC,CAAC,MAAM;gBAC3C,UAAU,EAAE,IAAI,CAAC,MAAM;aACxB,CAAC;QACJ,CAAC;QAED,iBAAiB,CAAC,MAAe;YAC/B,OAAO,0BAA0B,CAAC,MAAM,CAAC,CAAC;QAC5C,CAAC;KACF,CAAC;AACJ,CAAC"}
@@ -1 +1 @@
1
- {"version":3,"file":"token.d.ts","sourceRoot":"","sources":["../../../src/strategies/chunk/token.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,aAAa,EAAS,MAAM,2BAA2B,CAAC;AAIjE,MAAM,WAAW,oBAAoB;IACnC,SAAS,CAAC,EAAE,MAAM,CAAC;IACnB,OAAO,CAAC,EAAE,MAAM,CAAC;CAClB;AAED;;;;GAIG;AACH,wBAAgB,aAAa,CAC3B,OAAO,GAAE,oBAAyB,GACjC,aAAa,CA6Df"}
1
+ {"version":3,"file":"token.d.ts","sourceRoot":"","sources":["../../../src/strategies/chunk/token.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,aAAa,EAAS,MAAM,2BAA2B,CAAC;AAIjE,MAAM,WAAW,oBAAoB;IACnC,SAAS,CAAC,EAAE,MAAM,CAAC;IACnB,OAAO,CAAC,EAAE,MAAM,CAAC;CAClB;AAED;;;;GAIG;AACH,wBAAgB,aAAa,CAC3B,OAAO,GAAE,oBAAyB,GACjC,aAAa,CAuEf"}
@@ -9,7 +9,10 @@ export function tokenStrategy(options = {}) {
9
9
  const overlap = options.overlap ?? 50;
10
10
  const charsPerToken = 4;
11
11
  const maxChars = maxTokens * charsPerToken;
12
- const overlapChars = overlap * charsPerToken;
12
+ // Clamp overlap so it never equals or exceeds maxChars, guaranteeing forward progress.
13
+ const safeOverlap = Math.min(overlap, maxTokens - 1);
14
+ const overlapChars = safeOverlap * charsPerToken;
15
+ const stepChars = maxChars - overlapChars; // minimum advance per iteration
13
16
  return {
14
17
  name: `token-${maxTokens}`,
15
18
  async chunk(text, filePath) {
@@ -17,12 +20,16 @@ export function tokenStrategy(options = {}) {
17
20
  let start = 0;
18
21
  while (start < text.length) {
19
22
  let end = Math.min(start + maxChars, text.length);
20
- // Try to break at sentence boundary
23
+ // Only snap to a sentence/line boundary when it falls in the second half
24
+ // of the window. Searching from the full end position can land on a break
25
+ // point just past `start` (e.g. first newline inside a template literal),
26
+ // which produces tiny chunks and triggers the one-char crawl loop.
21
27
  if (end < text.length) {
28
+ const minBreak = start + Math.floor(maxChars / 2);
22
29
  const lastPeriod = text.lastIndexOf(".", end);
23
30
  const lastNewline = text.lastIndexOf("\n", end);
24
31
  const breakPoint = Math.max(lastPeriod, lastNewline);
25
- if (breakPoint > start) {
32
+ if (breakPoint >= minBreak) {
26
33
  end = breakPoint + 1;
27
34
  }
28
35
  }
@@ -41,7 +48,10 @@ export function tokenStrategy(options = {}) {
41
48
  commitHash: "", // Will be filled by caller
42
49
  });
43
50
  }
44
- start = Math.max(end - overlapChars, start + 1);
51
+ // Advance by overlap window, but never less than stepChars to prevent
52
+ // the one-char crawl when the boundary lands too close to start.
53
+ const nextStart = end - overlapChars;
54
+ start = nextStart > start ? nextStart : start + stepChars;
45
55
  }
46
56
  return chunks;
47
57
  },
@@ -1 +1 @@
1
- {"version":3,"file":"token.js","sourceRoot":"","sources":["../../../src/strategies/chunk/token.ts"],"names":[],"mappings":"AAEA,OAAO,EAAE,0BAA0B,EAAE,MAAM,sBAAsB,CAAC;AAOlE;;;;GAIG;AACH,MAAM,UAAU,aAAa,CAC3B,UAAgC,EAAE;IAElC,MAAM,SAAS,GAAG,OAAO,CAAC,SAAS,IAAI,GAAG,CAAC;IAC3C,MAAM,OAAO,GAAG,OAAO,CAAC,OAAO,IAAI,EAAE,CAAC;IACtC,MAAM,aAAa,GAAG,CAAC,CAAC;IACxB,MAAM,QAAQ,GAAG,SAAS,GAAG,aAAa,CAAC;IAC3C,MAAM,YAAY,GAAG,OAAO,GAAG,aAAa,CAAC;IAE7C,OAAO;QACL,IAAI,EAAE,SAAS,SAAS,EAAE;QAE1B,KAAK,CAAC,KAAK,CAAC,IAAY,EAAE,QAAiB;YACzC,MAAM,MAAM,GAAY,EAAE,CAAC;YAC3B,IAAI,KAAK,GAAG,CAAC,CAAC;YAEd,OAAO,KAAK,GAAG,IAAI,CAAC,MAAM,EAAE,CAAC;gBAC3B,IAAI,GAAG,GAAG,IAAI,CAAC,GAAG,CAAC,KAAK,GAAG,QAAQ,EAAE,IAAI,CAAC,MAAM,CAAC,CAAC;gBAElD,oCAAoC;gBACpC,IAAI,GAAG,GAAG,IAAI,CAAC,MAAM,EAAE,CAAC;oBACtB,MAAM,UAAU,GAAG,IAAI,CAAC,WAAW,CAAC,GAAG,EAAE,GAAG,CAAC,CAAC;oBAC9C,MAAM,WAAW,GAAG,IAAI,CAAC,WAAW,CAAC,IAAI,EAAE,GAAG,CAAC,CAAC;oBAChD,MAAM,UAAU,GAAG,IAAI,CAAC,GAAG,CAAC,UAAU,EAAE,WAAW,CAAC,CAAC;oBACrD,IAAI,UAAU,GAAG,KAAK,EAAE,CAAC;wBACvB,GAAG,GAAG,UAAU,GAAG,CAAC,CAAC;oBACvB,CAAC;gBACH,CAAC;gBAED,MAAM,OAAO,GAAG,IAAI,CAAC,KAAK,CAAC,KAAK,EAAE,GAAG,CAAC,CAAC,IAAI,EAAE,CAAC;gBAC9C,IAAI,OAAO,EAAE,CAAC;oBACZ,MAAM,CAAC,IAAI,CAAC;wBACV,OAAO;wBACP,QAAQ,EAAE;4BACR,QAAQ,EAAE,IAAI,CAAC,IAAI;4BACnB,WAAW,EAAE,MAAM,CAAC,MAAM;4BAC1B,WAAW,EAAE,QAAQ;4BACrB,UAAU,EAAE,KAAK;4BACjB,QAAQ,EAAE,GAAG;yBACd;wBACD,UAAU,EAAE,QAAQ,IAAI,SAAS;wBACjC,UAAU,EAAE,EAAE,EAAE,2BAA2B;qBAC5C,CAAC,CAAC;gBACL,CAAC;gBAED,KAAK,GAAG,IAAI,CAAC,GAAG,CAAC,GAAG,GAAG,YAAY,EAAE,KAAK,GAAG,CAAC,CAAC,CAAC;YAClD,CAAC;YAED,OAAO,MAAM,CAAC;QAChB,CAAC;QAED,eAAe,CAAC,IAAY,EAAE,SAAkB;YAC9C,OAAO;gBACL,QAAQ,EAAE,IAAI,CAAC,IAAI;gBACnB,UAAU,EAAE,IAAI,CAAC,MAAM;gBACvB,gBAAgB,EAAE,IAAI,CAAC,IAAI,CAAC,IAAI,CAAC,MAAM,GAAG,aAAa,CAAC;aACzD,CAAC;QACJ,CAAC;QAED,iBAAiB,CAAC,MAAe;YAC/B,OAAO,0BAA0B,CAAC,MAAM,CAAC,CAAC;QAC5C,CAAC;KACF,CAAC;AACJ,CAAC"}
1
+ {"version":3,"file":"token.js","sourceRoot":"","sources":["../../../src/strategies/chunk/token.ts"],"names":[],"mappings":"AAEA,OAAO,EAAE,0BAA0B,EAAE,MAAM,sBAAsB,CAAC;AAOlE;;;;GAIG;AACH,MAAM,UAAU,aAAa,CAC3B,UAAgC,EAAE;IAElC,MAAM,SAAS,GAAG,OAAO,CAAC,SAAS,IAAI,GAAG,CAAC;IAC3C,MAAM,OAAO,GAAG,OAAO,CAAC,OAAO,IAAI,EAAE,CAAC;IACtC,MAAM,aAAa,GAAG,CAAC,CAAC;IACxB,MAAM,QAAQ,GAAG,SAAS,GAAG,aAAa,CAAC;IAC3C,uFAAuF;IACvF,MAAM,WAAW,GAAG,IAAI,CAAC,GAAG,CAAC,OAAO,EAAE,SAAS,GAAG,CAAC,CAAC,CAAC;IACrD,MAAM,YAAY,GAAG,WAAW,GAAG,aAAa,CAAC;IACjD,MAAM,SAAS,GAAG,QAAQ,GAAG,YAAY,CAAC,CAAC,gCAAgC;IAE3E,OAAO;QACL,IAAI,EAAE,SAAS,SAAS,EAAE;QAE1B,KAAK,CAAC,KAAK,CAAC,IAAY,EAAE,QAAiB;YACzC,MAAM,MAAM,GAAY,EAAE,CAAC;YAC3B,IAAI,KAAK,GAAG,CAAC,CAAC;YAEd,OAAO,KAAK,GAAG,IAAI,CAAC,MAAM,EAAE,CAAC;gBAC3B,IAAI,GAAG,GAAG,IAAI,CAAC,GAAG,CAAC,KAAK,GAAG,QAAQ,EAAE,IAAI,CAAC,MAAM,CAAC,CAAC;gBAElD,yEAAyE;gBACzE,0EAA0E;gBAC1E,0EAA0E;gBAC1E,mEAAmE;gBACnE,IAAI,GAAG,GAAG,IAAI,CAAC,MAAM,EAAE,CAAC;oBACtB,MAAM,QAAQ,GAAG,KAAK,GAAG,IAAI,CAAC,KAAK,CAAC,QAAQ,GAAG,CAAC,CAAC,CAAC;oBAClD,MAAM,UAAU,GAAG,IAAI,CAAC,WAAW,CAAC,GAAG,EAAE,GAAG,CAAC,CAAC;oBAC9C,MAAM,WAAW,GAAG,IAAI,CAAC,WAAW,CAAC,IAAI,EAAE,GAAG,CAAC,CAAC;oBAChD,MAAM,UAAU,GAAG,IAAI,CAAC,GAAG,CAAC,UAAU,EAAE,WAAW,CAAC,CAAC;oBACrD,IAAI,UAAU,IAAI,QAAQ,EAAE,CAAC;wBAC3B,GAAG,GAAG,UAAU,GAAG,CAAC,CAAC;oBACvB,CAAC;gBACH,CAAC;gBAED,MAAM,OAAO,GAAG,IAAI,CAAC,KAAK,CAAC,KAAK,EAAE,GAAG,CAAC,CAAC,IAAI,EAAE,CAAC;gBAC9C,IAAI,OAAO,EAAE,CAAC;oBACZ,MAAM,CAAC,IAAI,CAAC;wBACV,OAAO;wBACP,QAAQ,EAAE;4BACR,QAAQ,EAAE,IAAI,CAAC,IAAI;4BACnB,WAAW,EAAE,MAAM,CAAC,MAAM;4BAC1B,WAAW,EAAE,QAAQ;4BACrB,UAAU,EAAE,KAAK;4BACjB,QAAQ,EAAE,GAAG;yBACd;wBACD,UAAU,EAAE,QAAQ,IAAI,SAAS;wBACjC,UAAU,EAAE,EAAE,EAAE,2BAA2B;qBAC5C,CAAC,CAAC;gBACL,CAAC;gBAED,sEAAsE;gBACtE,iEAAiE;gBACjE,MAAM,SAAS,GAAG,GAAG,GAAG,YAAY,CAAC;gBACrC,KAAK,GAAG,SAAS,GAAG,KAAK,CAAC,CAAC,CAAC,SAAS,CAAC,CAAC,CAAC,KAAK,GAAG,SAAS,CAAC;YAC5D,CAAC;YAED,OAAO,MAAM,CAAC;QAChB,CAAC;QAED,eAAe,CAAC,IAAY,EAAE,SAAkB;YAC9C,OAAO;gBACL,QAAQ,EAAE,IAAI,CAAC,IAAI;gBACnB,UAAU,EAAE,IAAI,CAAC,MAAM;gBACvB,gBAAgB,EAAE,IAAI,CAAC,IAAI,CAAC,IAAI,CAAC,MAAM,GAAG,aAAa,CAAC;aACzD,CAAC;QACJ,CAAC;QAED,iBAAiB,CAAC,MAAe;YAC/B,OAAO,0BAA0B,CAAC,MAAM,CAAC,CAAC;QAC5C,CAAC;KACF,CAAC;AACJ,CAAC"}
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@vivantel/virage-core",
3
- "version": "0.2.5",
3
+ "version": "0.2.6",
4
4
  "type": "module",
5
5
  "description": "Core RAG pipeline tools - universal chunking, embedding, vector store interfaces",
6
6
  "repository": {