voctar 0.1.2 → 0.1.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1 +1 @@
1
- {"version":3,"file":"fixed.d.ts","sourceRoot":"","sources":["../../../../src/chunking/strategies/fixed.ts"],"names":[],"mappings":"AAEA,OAAO,KAAK,EAAE,KAAK,EAAE,eAAe,EAAE,gBAAgB,EAAE,MAAM,UAAU,CAAC;AAGzE,qBAAa,yBAA0B,YAAW,gBAAgB;IAChE,OAAO,IAAI,MAAM;IAIjB,KAAK,CAAC,IAAI,EAAE,MAAM,EAAE,UAAU,EAAE,MAAM,EAAE,OAAO,EAAE,eAAe,GAAG,KAAK,EAAE;IA4D1E;;OAEG;IACH,OAAO,CAAC,qBAAqB;IA0B7B;;OAEG;IACH,OAAO,CAAC,cAAc;CAuBvB"}
1
+ {"version":3,"file":"fixed.d.ts","sourceRoot":"","sources":["../../../../src/chunking/strategies/fixed.ts"],"names":[],"mappings":"AAEA,OAAO,KAAK,EAAE,KAAK,EAAE,eAAe,EAAE,gBAAgB,EAAE,MAAM,UAAU,CAAC;AAGzE,qBAAa,yBAA0B,YAAW,gBAAgB;IAChE,OAAO,IAAI,MAAM;IAIjB,KAAK,CAAC,IAAI,EAAE,MAAM,EAAE,UAAU,EAAE,MAAM,EAAE,OAAO,EAAE,eAAe,GAAG,KAAK,EAAE;IAqE1E;;OAEG;IACH,OAAO,CAAC,qBAAqB;IA0B7B;;OAEG;IACH,OAAO,CAAC,cAAc;CAuBvB"}
@@ -42,9 +42,16 @@ class FixedSizeChunkingStrategy {
42
42
  ...options.metadata,
43
43
  },
44
44
  });
45
+ if (endChar >= normalizedText.length) {
46
+ break;
47
+ }
45
48
  // Calculate overlap position using token count
46
49
  const overlapText = this.getOverlapText(chunkText, overlap);
47
- startChar = endChar - overlapText.length;
50
+ const nextStartChar = endChar - overlapText.length;
51
+ if (nextStartChar <= startChar) {
52
+ break;
53
+ }
54
+ startChar = nextStartChar;
48
55
  chunkIndex++;
49
56
  // Avoid creating tiny overlapping chunks at the end
50
57
  if (normalizedText.length - startChar < overlapText.length) {
@@ -1 +1 @@
1
- {"version":3,"file":"fixed.js","sourceRoot":"","sources":["../../../../src/chunking/strategies/fixed.ts"],"names":[],"mappings":";;;AAAA,+BAA+B;AAC/B,+BAAoC;AAEpC,kDAAiD;AAEjD,MAAa,yBAAyB;IACpC,OAAO;QACL,OAAO,OAAO,CAAC;IACjB,CAAC;IAED,KAAK,CAAC,IAAY,EAAE,UAAkB,EAAE,OAAwB;QAC9D,uDAAuD;QACvD,MAAM,UAAU,GAAI,OAAe,CAAC,UAAU,IAAI,IAAI,CAAC;QACvD,MAAM,OAAO,GAAG,IAAI,CAAC,GAAG,CAAC,OAAO,CAAC,YAAY,IAAI,IAAI,EAAE,UAAU,CAAC,CAAC;QACnE,MAAM,OAAO,GAAG,IAAI,CAAC,GAAG,CAAC,OAAO,CAAC,OAAO,IAAI,GAAG,EAAE,IAAI,CAAC,KAAK,CAAC,OAAO,GAAG,GAAG,CAAC,CAAC,CAAC;QAC5E,MAAM,kBAAkB,GAAG,OAAO,CAAC,kBAAkB,IAAI,KAAK,CAAC;QAE/D,8CAA8C;QAC9C,MAAM,cAAc,GAAG,kBAAkB;YACvC,CAAC,CAAC,IAAI;YACN,CAAC,CAAC,IAAI,CAAC,OAAO,CAAC,MAAM,EAAE,GAAG,CAAC,CAAC,IAAI,EAAE,CAAC;QAErC,MAAM,MAAM,GAAY,EAAE,CAAC;QAC3B,IAAI,SAAS,GAAG,CAAC,CAAC;QAClB,IAAI,UAAU,GAAG,CAAC,CAAC;QAEnB,OAAO,SAAS,GAAG,cAAc,CAAC,MAAM,EAAE,CAAC;YACzC,6BAA6B;YAC7B,MAAM,aAAa,GAAG,cAAc,CAAC,KAAK,CAAC,SAAS,CAAC,CAAC;YACtD,MAAM,SAAS,GAAG,IAAI,CAAC,qBAAqB,CAAC,aAAa,EAAE,OAAO,CAAC,CAAC;YAErE,IAAI,CAAC,SAAS,IAAI,SAAS,CAAC,MAAM,KAAK,CAAC,EAAE,CAAC;gBACzC,MAAM;YACR,CAAC;YAED,MAAM,OAAO,GAAG,SAAS,GAAG,SAAS,CAAC,MAAM,CAAC;YAE7C,MAAM,CAAC,IAAI,CAAC;gBACV,EAAE,EAAE,IAAA,SAAM,GAAE;gBACZ,IAAI,EAAE,SAAS;gBACf,QAAQ,EAAE;oBACR,UAAU;oBACV,UAAU;oBACV,WAAW,EAAE,CAAC,EAAE,+CAA+C;oBAC/D,SAAS;oBACT,OAAO;oBACP,MAAM,EAAE,IAAA,uBAAW,EAAC,SAAS,CAAC;oBAC9B,GAAG,OAAO,CAAC,QAAQ;iBACpB;aACF,CAAC,CAAC;YAEH,+CAA+C;YAC/C,MAAM,WAAW,GAAG,IAAI,CAAC,cAAc,CAAC,SAAS,EAAE,OAAO,CAAC,CAAC;YAC5D,SAAS,GAAG,OAAO,GAAG,WAAW,CAAC,MAAM,CAAC;YACzC,UAAU,EAAE,CAAC;YAEb,oDAAoD;YACpD,IAAI,cAAc,CAAC,MAAM,GAAG,SAAS,GAAG,WAAW,CAAC,MAAM,EAAE,CAAC;gBAC3D,MAAM;YACR,CAAC;QACH,CAAC;QAED,oCAAoC;QACpC,MAAM,CAAC,OAAO,CAAC,KAAK,CAAC,EAAE;YACrB,KAAK,CAAC,QAAQ,CAAC,WAAW,GAAG,MAAM,CAAC,MAAM,CAAC;QAC7C,CAAC,CAAC,CAAC;QAEH,OAAO,MAAM,CAAC;IAChB,CAAC;IAED;;OAEG;IACK,qBAAqB,CAAC,IAAY,EAAE,SAAiB;QAC3D,IAAI,IAAA,uBAAW,EAAC,IAAI,CAAC,IAAI,SAAS,EAAE,CAAC;YACnC,OAAO,IAAI,CAAC;QACd,CAAC;QAED,iDAAiD;QACjD,IAAI,KAAK,GAAG,CAAC,CAAC;QACd,IAAI,GAAG,GAAG,IAAI,CAAC,MAAM,CAAC;QACtB,IAAI,SAAS,GAAG,EAAE,CAAC;QAEnB,OAAO,KAAK,GAAG,GAAG,EAAE,CAAC;YACnB,MAAM,GAAG,GAAG,IAAI,CAAC,KAAK,CAAC,CAAC,KAAK,GAAG,GAAG,CAAC,GAAG,CAAC,CAAC,CAAC;YAC1C,MAAM,SAAS,GAAG,IAAI,CAAC,KAAK,CAAC,CAAC,EAAE,GAAG,CAAC,CAAC;YACrC,MAAM,MAAM,GAAG,IAAA,uBAAW,EAAC,SAAS,CAAC,CAAC;YAEtC,IAAI,MAAM,IAAI,SAAS,EAAE,CAAC;gBACxB,SAAS,GAAG,SAAS,CAAC;gBACtB,KAAK,GAAG,GAAG,GAAG,CAAC,CAAC;YAClB,CAAC;iBAAM,CAAC;gBACN,GAAG,GAAG,GAAG,CAAC;YACZ,CAAC;QACH,CAAC;QAED,OAAO,SAAS,IAAI,IAAI,CAAC,KAAK,CAAC,CAAC,EAAE,IAAI,CAAC,KAAK,CAAC,IAAI,CAAC,MAAM,GAAG,GAAG,CAAC,CAAC,CAAC,CAAC,kBAAkB;IACtF,CAAC;IAED;;OAEG;IACK,cAAc,CAAC,IAAY,EAAE,aAAqB;QACxD,IAAI,aAAa,KAAK,CAAC;YAAE,OAAO,EAAE,CAAC;QAEnC,6CAA6C;QAC7C,IAAI,KAAK,GAAG,CAAC,CAAC;QACd,IAAI,GAAG,GAAG,IAAI,CAAC,MAAM,CAAC;QACtB,IAAI,SAAS,GAAG,EAAE,CAAC;QAEnB,OAAO,KAAK,GAAG,GAAG,EAAE,CAAC;YACnB,MAAM,GAAG,GAAG,IAAI,CAAC,KAAK,CAAC,CAAC,KAAK,GAAG,GAAG,CAAC,GAAG,CAAC,CAAC,CAAC;YAC1C,MAAM,SAAS,GAAG,IAAI,CAAC,KAAK,CAAC,GAAG,CAAC,CAAC;YAClC,MAAM,MAAM,GAAG,IAAA,uBAAW,EAAC,SAAS,CAAC,CAAC;YAEtC,IAAI,MAAM,IAAI,aAAa,EAAE,CAAC;gBAC5B,SAAS,GAAG,SAAS,CAAC;gBACtB,GAAG,GAAG,GAAG,CAAC;YACZ,CAAC;iBAAM,CAAC;gBACN,KAAK,GAAG,GAAG,GAAG,CAAC,CAAC;YAClB,CAAC;QACH,CAAC;QAED,OAAO,SAAS,IAAI,IAAI,CAAC,KAAK,CAAC,CAAC,IAAI,CAAC,KAAK,CAAC,IAAI,CAAC,MAAM,GAAG,GAAG,CAAC,CAAC,CAAC,CAAC,uBAAuB;IACzF,CAAC;CACF;AAxHD,8DAwHC"}
1
+ {"version":3,"file":"fixed.js","sourceRoot":"","sources":["../../../../src/chunking/strategies/fixed.ts"],"names":[],"mappings":";;;AAAA,+BAA+B;AAC/B,+BAAoC;AAEpC,kDAAiD;AAEjD,MAAa,yBAAyB;IACpC,OAAO;QACL,OAAO,OAAO,CAAC;IACjB,CAAC;IAED,KAAK,CAAC,IAAY,EAAE,UAAkB,EAAE,OAAwB;QAC9D,uDAAuD;QACvD,MAAM,UAAU,GAAI,OAAe,CAAC,UAAU,IAAI,IAAI,CAAC;QACvD,MAAM,OAAO,GAAG,IAAI,CAAC,GAAG,CAAC,OAAO,CAAC,YAAY,IAAI,IAAI,EAAE,UAAU,CAAC,CAAC;QACnE,MAAM,OAAO,GAAG,IAAI,CAAC,GAAG,CAAC,OAAO,CAAC,OAAO,IAAI,GAAG,EAAE,IAAI,CAAC,KAAK,CAAC,OAAO,GAAG,GAAG,CAAC,CAAC,CAAC;QAC5E,MAAM,kBAAkB,GAAG,OAAO,CAAC,kBAAkB,IAAI,KAAK,CAAC;QAE/D,8CAA8C;QAC9C,MAAM,cAAc,GAAG,kBAAkB;YACvC,CAAC,CAAC,IAAI;YACN,CAAC,CAAC,IAAI,CAAC,OAAO,CAAC,MAAM,EAAE,GAAG,CAAC,CAAC,IAAI,EAAE,CAAC;QAErC,MAAM,MAAM,GAAY,EAAE,CAAC;QAC3B,IAAI,SAAS,GAAG,CAAC,CAAC;QAClB,IAAI,UAAU,GAAG,CAAC,CAAC;QAEnB,OAAO,SAAS,GAAG,cAAc,CAAC,MAAM,EAAE,CAAC;YACzC,6BAA6B;YAC7B,MAAM,aAAa,GAAG,cAAc,CAAC,KAAK,CAAC,SAAS,CAAC,CAAC;YACtD,MAAM,SAAS,GAAG,IAAI,CAAC,qBAAqB,CAAC,aAAa,EAAE,OAAO,CAAC,CAAC;YAErE,IAAI,CAAC,SAAS,IAAI,SAAS,CAAC,MAAM,KAAK,CAAC,EAAE,CAAC;gBACzC,MAAM;YACR,CAAC;YAED,MAAM,OAAO,GAAG,SAAS,GAAG,SAAS,CAAC,MAAM,CAAC;YAE7C,MAAM,CAAC,IAAI,CAAC;gBACV,EAAE,EAAE,IAAA,SAAM,GAAE;gBACZ,IAAI,EAAE,SAAS;gBACf,QAAQ,EAAE;oBACR,UAAU;oBACV,UAAU;oBACV,WAAW,EAAE,CAAC,EAAE,+CAA+C;oBAC/D,SAAS;oBACT,OAAO;oBACP,MAAM,EAAE,IAAA,uBAAW,EAAC,SAAS,CAAC;oBAC9B,GAAG,OAAO,CAAC,QAAQ;iBACpB;aACF,CAAC,CAAC;YAEH,IAAI,OAAO,IAAI,cAAc,CAAC,MAAM,EAAE,CAAC;gBACrC,MAAM;YACR,CAAC;YAED,+CAA+C;YAC/C,MAAM,WAAW,GAAG,IAAI,CAAC,cAAc,CAAC,SAAS,EAAE,OAAO,CAAC,CAAC;YAC5D,MAAM,aAAa,GAAG,OAAO,GAAG,WAAW,CAAC,MAAM,CAAC;YACnD,IAAI,aAAa,IAAI,SAAS,EAAE,CAAC;gBAC/B,MAAM;YACR,CAAC;YAED,SAAS,GAAG,aAAa,CAAC;YAC1B,UAAU,EAAE,CAAC;YAEb,oDAAoD;YACpD,IAAI,cAAc,CAAC,MAAM,GAAG,SAAS,GAAG,WAAW,CAAC,MAAM,EAAE,CAAC;gBAC3D,MAAM;YACR,CAAC;QACH,CAAC;QAED,oCAAoC;QACpC,MAAM,CAAC,OAAO,CAAC,KAAK,CAAC,EAAE;YACrB,KAAK,CAAC,QAAQ,CAAC,WAAW,GAAG,MAAM,CAAC,MAAM,CAAC;QAC7C,CAAC,CAAC,CAAC;QAEH,OAAO,MAAM,CAAC;IAChB,CAAC;IAED;;OAEG;IACK,qBAAqB,CAAC,IAAY,EAAE,SAAiB;QAC3D,IAAI,IAAA,uBAAW,EAAC,IAAI,CAAC,IAAI,SAAS,EAAE,CAAC;YACnC,OAAO,IAAI,CAAC;QACd,CAAC;QAED,iDAAiD;QACjD,IAAI,KAAK,GAAG,CAAC,CAAC;QACd,IAAI,GAAG,GAAG,IAAI,CAAC,MAAM,CAAC;QACtB,IAAI,SAAS,GAAG,EAAE,CAAC;QAEnB,OAAO,KAAK,GAAG,GAAG,EAAE,CAAC;YACnB,MAAM,GAAG,GAAG,IAAI,CAAC,KAAK,CAAC,CAAC,KAAK,GAAG,GAAG,CAAC,GAAG,CAAC,CAAC,CAAC;YAC1C,MAAM,SAAS,GAAG,IAAI,CAAC,KAAK,CAAC,CAAC,EAAE,GAAG,CAAC,CAAC;YACrC,MAAM,MAAM,GAAG,IAAA,uBAAW,EAAC,SAAS,CAAC,CAAC;YAEtC,IAAI,MAAM,IAAI,SAAS,EAAE,CAAC;gBACxB,SAAS,GAAG,SAAS,CAAC;gBACtB,KAAK,GAAG,GAAG,GAAG,CAAC,CAAC;YAClB,CAAC;iBAAM,CAAC;gBACN,GAAG,GAAG,GAAG,CAAC;YACZ,CAAC;QACH,CAAC;QAED,OAAO,SAAS,IAAI,IAAI,CAAC,KAAK,CAAC,CAAC,EAAE,IAAI,CAAC,KAAK,CAAC,IAAI,CAAC,MAAM,GAAG,GAAG,CAAC,CAAC,CAAC,CAAC,kBAAkB;IACtF,CAAC;IAED;;OAEG;IACK,cAAc,CAAC,IAAY,EAAE,aAAqB;QACxD,IAAI,aAAa,KAAK,CAAC;YAAE,OAAO,EAAE,CAAC;QAEnC,6CAA6C;QAC7C,IAAI,KAAK,GAAG,CAAC,CAAC;QACd,IAAI,GAAG,GAAG,IAAI,CAAC,MAAM,CAAC;QACtB,IAAI,SAAS,GAAG,EAAE,CAAC;QAEnB,OAAO,KAAK,GAAG,GAAG,EAAE,CAAC;YACnB,MAAM,GAAG,GAAG,IAAI,CAAC,KAAK,CAAC,CAAC,KAAK,GAAG,GAAG,CAAC,GAAG,CAAC,CAAC,CAAC;YAC1C,MAAM,SAAS,GAAG,IAAI,CAAC,KAAK,CAAC,GAAG,CAAC,CAAC;YAClC,MAAM,MAAM,GAAG,IAAA,uBAAW,EAAC,SAAS,CAAC,CAAC;YAEtC,IAAI,MAAM,IAAI,aAAa,EAAE,CAAC;gBAC5B,SAAS,GAAG,SAAS,CAAC;gBACtB,GAAG,GAAG,GAAG,CAAC;YACZ,CAAC;iBAAM,CAAC;gBACN,KAAK,GAAG,GAAG,GAAG,CAAC,CAAC;YAClB,CAAC;QACH,CAAC;QAED,OAAO,SAAS,IAAI,IAAI,CAAC,KAAK,CAAC,CAAC,IAAI,CAAC,KAAK,CAAC,IAAI,CAAC,MAAM,GAAG,GAAG,CAAC,CAAC,CAAC,CAAC,uBAAuB;IACzF,CAAC;CACF;AAjID,8DAiIC"}
@@ -43,8 +43,8 @@ class ParagraphChunkingStrategy {
43
43
  ...options.metadata,
44
44
  },
45
45
  });
46
- // Keep last N paragraphs for overlap
47
- const overlapParagraphs = currentChunk.slice(-overlap);
46
+ // Keep last N paragraphs for overlap. slice(-0) equals slice(0), so handle zero explicitly.
47
+ const overlapParagraphs = overlap > 0 ? currentChunk.slice(-overlap) : [];
48
48
  currentChunk = [...overlapParagraphs, paragraph];
49
49
  currentTokens = (0, tokenizer_1.countTokens)(overlapParagraphs.join('\n\n')) + paragraphTokens;
50
50
  startChar = endChar - (overlapParagraphs.join('\n\n').length);
@@ -1 +1 @@
1
- {"version":3,"file":"paragraph.js","sourceRoot":"","sources":["../../../../src/chunking/strategies/paragraph.ts"],"names":[],"mappings":";;;AAAA,oCAAoC;AACpC,+BAAoC;AAEpC,kDAAiD;AAEjD,MAAa,yBAAyB;IACpC,OAAO;QACL,OAAO,WAAW,CAAC;IACrB,CAAC;IAED,KAAK,CAAC,IAAY,EAAE,UAAkB,EAAE,OAAwB;QAC9D,uDAAuD;QACvD,MAAM,UAAU,GAAI,OAAe,CAAC,UAAU,IAAI,IAAI,CAAC;QACvD,MAAM,OAAO,GAAG,IAAI,CAAC,GAAG,CAAC,OAAO,CAAC,YAAY,IAAI,IAAI,EAAE,UAAU,CAAC,CAAC;QACnE,MAAM,OAAO,GAAG,OAAO,CAAC,OAAO,IAAI,CAAC,CAAC,CAAC,kCAAkC;QAExE,6BAA6B;QAC7B,MAAM,UAAU,GAAG,IAAI;aACpB,KAAK,CAAC,SAAS,CAAC;aAChB,GAAG,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,IAAI,EAAE,CAAC;aAClB,MAAM,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,MAAM,GAAG,CAAC,CAAC,CAAC;QAE7B,MAAM,MAAM,GAAY,EAAE,CAAC;QAC3B,IAAI,YAAY,GAAa,EAAE,CAAC;QAChC,IAAI,aAAa,GAAG,CAAC,CAAC;QACtB,IAAI,UAAU,GAAG,CAAC,CAAC;QACnB,IAAI,SAAS,GAAG,CAAC,CAAC;QAElB,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,UAAU,CAAC,MAAM,EAAE,CAAC,EAAE,EAAE,CAAC;YAC3C,MAAM,SAAS,GAAG,UAAU,CAAC,CAAC,CAAC,CAAC;YAChC,MAAM,eAAe,GAAG,IAAA,uBAAW,EAAC,SAAS,CAAC,CAAC;YAE/C,gGAAgG;YAChG,IAAI,aAAa,GAAG,eAAe,GAAG,OAAO,IAAI,YAAY,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;gBACzE,MAAM,SAAS,GAAG,YAAY,CAAC,IAAI,CAAC,MAAM,CAAC,CAAC,IAAI,EAAE,CAAC;gBACnD,MAAM,OAAO,GAAG,SAAS,GAAG,SAAS,CAAC,MAAM,CAAC;gBAE7C,MAAM,CAAC,IAAI,CAAC;oBACV,EAAE,EAAE,IAAA,SAAM,GAAE;oBACZ,IAAI,EAAE,SAAS;oBACf,QAAQ,EAAE;wBACR,UAAU;wBACV,UAAU;wBACV,WAAW,EAAE,CAAC,EAAE,wBAAwB;wBACxC,SAAS;wBACT,OAAO;wBACP,UAAU,EAAE,YAAY,CAAC,MAAM;wBAC/B,GAAG,OAAO,CAAC,QAAQ;qBACpB;iBACF,CAAC,CAAC;gBAEH,qCAAqC;gBACrC,MAAM,iBAAiB,GAAG,YAAY,CAAC,KAAK,CAAC,CAAC,OAAO,CAAC,CAAC;gBACvD,YAAY,GAAG,CAAC,GAAG,iBAAiB,EAAE,SAAS,CAAC,CAAC;gBACjD,aAAa,GAAG,IAAA,uBAAW,EAAC,iBAAiB,CAAC,IAAI,CAAC,MAAM,CAAC,CAAC,GAAG,eAAe,CAAC;gBAC9E,SAAS,GAAG,OAAO,GAAG,CAAC,iBAAiB,CAAC,IAAI,CAAC,MAAM,CAAC,CAAC,MAAM,CAAC,CAAC;gBAC9D,UAAU,EAAE,CAAC;YACf,CAAC;iBAAM,CAAC;gBACN,YAAY,CAAC,IAAI,CAAC,SAAS,CAAC,CAAC;gBAC7B,aAAa,IAAI,eAAe,CAAC;YACnC,CAAC;QACH,CAAC;QAED,uCAAuC;QACvC,IAAI,YAAY,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;YAC5B,MAAM,SAAS,GAAG,YAAY,CAAC,IAAI,CAAC,MAAM,CAAC,CAAC,IAAI,EAAE,CAAC;YACnD,MAAM,OAAO,GAAG,SAAS,GAAG,SAAS,CAAC,MAAM,CAAC;YAE7C,MAAM,CAAC,IAAI,CAAC;gBACV,EAAE,EAAE,IAAA,SAAM,GAAE;gBACZ,IAAI,EAAE,SAAS;gBACf,QAAQ,EAAE;oBACR,UAAU;oBACV,UAAU;oBACV,WAAW,EAAE,CAAC;oBACd,SAAS;oBACT,OAAO;oBACP,UAAU,EAAE,YAAY,CAAC,MAAM;oBAC/B,GAAG,OAAO,CAAC,QAAQ;iBACpB;aACF,CAAC,CAAC;QACL,CAAC;QAED,qBAAqB;QACrB,MAAM,CAAC,OAAO,CAAC,KAAK,CAAC,EAAE;YACrB,KAAK,CAAC,QAAQ,CAAC,WAAW,GAAG,MAAM,CAAC,MAAM,CAAC;QAC7C,CAAC,CAAC,CAAC;QAEH,OAAO,MAAM,CAAC;IAChB,CAAC;CACF;AArFD,8DAqFC"}
1
+ {"version":3,"file":"paragraph.js","sourceRoot":"","sources":["../../../../src/chunking/strategies/paragraph.ts"],"names":[],"mappings":";;;AAAA,oCAAoC;AACpC,+BAAoC;AAEpC,kDAAiD;AAEjD,MAAa,yBAAyB;IACpC,OAAO;QACL,OAAO,WAAW,CAAC;IACrB,CAAC;IAED,KAAK,CAAC,IAAY,EAAE,UAAkB,EAAE,OAAwB;QAC9D,uDAAuD;QACvD,MAAM,UAAU,GAAI,OAAe,CAAC,UAAU,IAAI,IAAI,CAAC;QACvD,MAAM,OAAO,GAAG,IAAI,CAAC,GAAG,CAAC,OAAO,CAAC,YAAY,IAAI,IAAI,EAAE,UAAU,CAAC,CAAC;QACnE,MAAM,OAAO,GAAG,OAAO,CAAC,OAAO,IAAI,CAAC,CAAC,CAAC,kCAAkC;QAExE,6BAA6B;QAC7B,MAAM,UAAU,GAAG,IAAI;aACpB,KAAK,CAAC,SAAS,CAAC;aAChB,GAAG,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,IAAI,EAAE,CAAC;aAClB,MAAM,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,MAAM,GAAG,CAAC,CAAC,CAAC;QAE7B,MAAM,MAAM,GAAY,EAAE,CAAC;QAC3B,IAAI,YAAY,GAAa,EAAE,CAAC;QAChC,IAAI,aAAa,GAAG,CAAC,CAAC;QACtB,IAAI,UAAU,GAAG,CAAC,CAAC;QACnB,IAAI,SAAS,GAAG,CAAC,CAAC;QAElB,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,UAAU,CAAC,MAAM,EAAE,CAAC,EAAE,EAAE,CAAC;YAC3C,MAAM,SAAS,GAAG,UAAU,CAAC,CAAC,CAAC,CAAC;YAChC,MAAM,eAAe,GAAG,IAAA,uBAAW,EAAC,SAAS,CAAC,CAAC;YAE/C,gGAAgG;YAChG,IAAI,aAAa,GAAG,eAAe,GAAG,OAAO,IAAI,YAAY,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;gBACzE,MAAM,SAAS,GAAG,YAAY,CAAC,IAAI,CAAC,MAAM,CAAC,CAAC,IAAI,EAAE,CAAC;gBACnD,MAAM,OAAO,GAAG,SAAS,GAAG,SAAS,CAAC,MAAM,CAAC;gBAE7C,MAAM,CAAC,IAAI,CAAC;oBACV,EAAE,EAAE,IAAA,SAAM,GAAE;oBACZ,IAAI,EAAE,SAAS;oBACf,QAAQ,EAAE;wBACR,UAAU;wBACV,UAAU;wBACV,WAAW,EAAE,CAAC,EAAE,wBAAwB;wBACxC,SAAS;wBACT,OAAO;wBACP,UAAU,EAAE,YAAY,CAAC,MAAM;wBAC/B,GAAG,OAAO,CAAC,QAAQ;qBACpB;iBACF,CAAC,CAAC;gBAEH,4FAA4F;gBAC5F,MAAM,iBAAiB,GAAG,OAAO,GAAG,CAAC,CAAC,CAAC,CAAC,YAAY,CAAC,KAAK,CAAC,CAAC,OAAO,CAAC,CAAC,CAAC,CAAC,EAAE,CAAC;gBAC1E,YAAY,GAAG,CAAC,GAAG,iBAAiB,EAAE,SAAS,CAAC,CAAC;gBACjD,aAAa,GAAG,IAAA,uBAAW,EAAC,iBAAiB,CAAC,IAAI,CAAC,MAAM,CAAC,CAAC,GAAG,eAAe,CAAC;gBAC9E,SAAS,GAAG,OAAO,GAAG,CAAC,iBAAiB,CAAC,IAAI,CAAC,MAAM,CAAC,CAAC,MAAM,CAAC,CAAC;gBAC9D,UAAU,EAAE,CAAC;YACf,CAAC;iBAAM,CAAC;gBACN,YAAY,CAAC,IAAI,CAAC,SAAS,CAAC,CAAC;gBAC7B,aAAa,IAAI,eAAe,CAAC;YACnC,CAAC;QACH,CAAC;QAED,uCAAuC;QACvC,IAAI,YAAY,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;YAC5B,MAAM,SAAS,GAAG,YAAY,CAAC,IAAI,CAAC,MAAM,CAAC,CAAC,IAAI,EAAE,CAAC;YACnD,MAAM,OAAO,GAAG,SAAS,GAAG,SAAS,CAAC,MAAM,CAAC;YAE7C,MAAM,CAAC,IAAI,CAAC;gBACV,EAAE,EAAE,IAAA,SAAM,GAAE;gBACZ,IAAI,EAAE,SAAS;gBACf,QAAQ,EAAE;oBACR,UAAU;oBACV,UAAU;oBACV,WAAW,EAAE,CAAC;oBACd,SAAS;oBACT,OAAO;oBACP,UAAU,EAAE,YAAY,CAAC,MAAM;oBAC/B,GAAG,OAAO,CAAC,QAAQ;iBACpB;aACF,CAAC,CAAC;QACL,CAAC;QAED,qBAAqB;QACrB,MAAM,CAAC,OAAO,CAAC,KAAK,CAAC,EAAE;YACrB,KAAK,CAAC,QAAQ,CAAC,WAAW,GAAG,MAAM,CAAC,MAAM,CAAC;QAC7C,CAAC,CAAC,CAAC;QAEH,OAAO,MAAM,CAAC;IAChB,CAAC;CACF;AArFD,8DAqFC"}
@@ -1 +1 @@
1
- {"version":3,"file":"recursive.d.ts","sourceRoot":"","sources":["../../../../src/chunking/strategies/recursive.ts"],"names":[],"mappings":"AAEA,OAAO,KAAK,EAAE,KAAK,EAAE,eAAe,EAAE,gBAAgB,EAAE,MAAM,UAAU,CAAC;AAGzE,qBAAa,yBAA0B,YAAW,gBAAgB;IAChE,OAAO,IAAI,MAAM;IAIjB,KAAK,CAAC,IAAI,EAAE,MAAM,EAAE,UAAU,EAAE,MAAM,EAAE,OAAO,EAAE,eAAe,GAAG,KAAK,EAAE;IAoD1E,OAAO,CAAC,cAAc;IA8CtB,OAAO,CAAC,WAAW;IAsCnB;;OAEG;IACH,OAAO,CAAC,cAAc;IAsBtB,OAAO,CAAC,iBAAiB;IAyBzB;;OAEG;IACH,OAAO,CAAC,qBAAqB;CAyB9B"}
1
+ {"version":3,"file":"recursive.d.ts","sourceRoot":"","sources":["../../../../src/chunking/strategies/recursive.ts"],"names":[],"mappings":"AAEA,OAAO,KAAK,EAAE,KAAK,EAAE,eAAe,EAAE,gBAAgB,EAAE,MAAM,UAAU,CAAC;AAGzE,qBAAa,yBAA0B,YAAW,gBAAgB;IAChE,OAAO,IAAI,MAAM;IAIjB,KAAK,CAAC,IAAI,EAAE,MAAM,EAAE,UAAU,EAAE,MAAM,EAAE,OAAO,EAAE,eAAe,GAAG,KAAK,EAAE;IAoD1E,OAAO,CAAC,cAAc;IA8CtB,OAAO,CAAC,WAAW;IAsCnB;;OAEG;IACH,OAAO,CAAC,cAAc;IAwBtB,OAAO,CAAC,iBAAiB;IAkCzB;;OAEG;IACH,OAAO,CAAC,qBAAqB;CAyB9B"}
@@ -124,6 +124,8 @@ class RecursiveChunkingStrategy {
124
124
  * Get overlap text that is approximately 'overlapTokens' tokens
125
125
  */
126
126
  getOverlapText(text, overlapTokens) {
127
+ if (overlapTokens === 0)
128
+ return '';
127
129
  // Binary search for the right amount of text
128
130
  let start = 0;
129
131
  let end = text.length;
@@ -153,9 +155,16 @@ class RecursiveChunkingStrategy {
153
155
  break;
154
156
  chunks.push(chunkText);
155
157
  const chunkLength = chunkText.length;
158
+ if (start + chunkLength >= text.length) {
159
+ break;
160
+ }
156
161
  // Calculate overlap start position
157
162
  const overlapText = this.getOverlapText(chunkText, overlap);
158
- start += chunkLength - overlapText.length;
163
+ const nextStart = start + chunkLength - overlapText.length;
164
+ if (nextStart <= start) {
165
+ break;
166
+ }
167
+ start = nextStart;
159
168
  if (text.length - start < overlapText.length) {
160
169
  break;
161
170
  }
@@ -1 +1 @@
1
- {"version":3,"file":"recursive.js","sourceRoot":"","sources":["../../../../src/chunking/strategies/recursive.ts"],"names":[],"mappings":";;;AAAA,2EAA2E;AAC3E,+BAAoC;AAEpC,kDAAiD;AAEjD,MAAa,yBAAyB;IACpC,OAAO;QACL,OAAO,WAAW,CAAC;IACrB,CAAC;IAED,KAAK,CAAC,IAAY,EAAE,UAAkB,EAAE,OAAwB;QAC9D,uDAAuD;QACvD,MAAM,UAAU,GAAI,OAAe,CAAC,UAAU,IAAI,IAAI,CAAC;QACvD,MAAM,OAAO,GAAG,IAAI,CAAC,GAAG,CAAC,OAAO,CAAC,YAAY,IAAI,IAAI,EAAE,UAAU,CAAC,CAAC;QACnE,MAAM,OAAO,GAAG,IAAI,CAAC,GAAG,CAAC,OAAO,CAAC,OAAO,IAAI,GAAG,EAAE,IAAI,CAAC,KAAK,CAAC,OAAO,GAAG,GAAG,CAAC,CAAC,CAAC,CAAC,0CAA0C;QAEvH,iFAAiF;QACjF,MAAM,iBAAiB,GAAG;YACxB,MAAM,EAAG,YAAY;YACrB,IAAI,EAAK,aAAa;YACtB,IAAI,EAAK,WAAW;YACpB,IAAI,EAAK,WAAW;YACpB,IAAI,EAAK,WAAW;YACpB,IAAI,EAAK,SAAS;YAClB,IAAI,EAAK,SAAS;YAClB,GAAG,EAAM,OAAO;YAChB,EAAE,EAAO,YAAY;SACtB,CAAC;QAEF,MAAM,UAAU,GAAG,OAAO,CAAC,SAAS;YAClC,CAAC,CAAC,CAAC,KAAK,CAAC,OAAO,CAAC,OAAO,CAAC,SAAS,CAAC,CAAC,CAAC,CAAC,OAAO,CAAC,SAAS,CAAC,CAAC,CAAC,CAAC,OAAO,CAAC,SAAS,CAAC,CAAC;YAC9E,CAAC,CAAC,iBAAiB,CAAC;QAEtB,MAAM,MAAM,GAAG,IAAI,CAAC,cAAc,CAAC,IAAI,EAAE,OAAO,EAAE,OAAO,EAAE,UAAU,CAAC,CAAC;QAEvE,yCAAyC;QACzC,IAAI,SAAS,GAAG,CAAC,CAAC;QAClB,MAAM,MAAM,GAAY,EAAE,CAAC;QAE3B,MAAM,CAAC,OAAO,CAAC,CAAC,SAAS,EAAE,KAAK,EAAE,EAAE;YAClC,MAAM,OAAO,GAAG,SAAS,GAAG,SAAS,CAAC,MAAM,CAAC;YAE7C,MAAM,CAAC,IAAI,CAAC;gBACV,EAAE,EAAE,IAAA,SAAM,GAAE;gBACZ,IAAI,EAAE,SAAS;gBACf,QAAQ,EAAE;oBACR,UAAU;oBACV,UAAU,EAAE,KAAK;oBACjB,WAAW,EAAE,MAAM,CAAC,MAAM;oBAC1B,SAAS;oBACT,OAAO;oBACP,GAAG,OAAO,CAAC,QAAQ;iBACpB;aACF,CAAC,CAAC;YAEH,wCAAwC;YACxC,SAAS,GAAG,OAAO,GAAG,OAAO,CAAC;QAChC,CAAC,CAAC,CAAC;QAEH,OAAO,MAAM,CAAC;IAChB,CAAC;IAEO,cAAc,CACpB,IAAY,EACZ,OAAe,EAAE,2BAA2B;IAC5C,OAAe,EAAE,2BAA2B;IAC5C,UAAoB;QAEpB,MAAM,WAAW,GAAa,EAAE,CAAC;QAEjC,4DAA4D;QAC5D,MAAM,UAAU,GAAG,IAAA,uBAAW,EAAC,IAAI,CAAC,CAAC;QACrC,IAAI,UAAU,IAAI,OAAO,EAAE,CAAC;YAC1B,OAAO,IAAI,CAAC,IAAI,EAAE,CAAC,CAAC,CAAC,CAAC,IAAI,CAAC,IAAI,EAAE,CAAC,CAAC,CAAC,CAAC,EAAE,CAAC;QAC1C,CAAC;QAED,8BAA8B;QAC9B,KAAK,MAAM,SAAS,IAAI,UAAU,EAAE,CAAC;YACnC,IAAI,SAAS,KAAK,EAAE,EAAE,CAAC;gBACrB,uCAAuC;gBACvC,OAAO,IAAI,CAAC,iBAAiB,CAAC,IAAI,EAAE,OAAO,EAAE,OAAO,CAAC,CAAC;YACxD,CAAC;YAED,IAAI,IAAI,CAAC,QAAQ,CAAC,SAAS,CAAC,EAAE,CAAC;gBAC7B,MAAM,MAAM,GAAG,IAAI,CAAC,KAAK,CAAC,SAAS,CAAC,CAAC;gBACrC,MAAM,MAAM,GAAG,IAAI,CAAC,WAAW,CAAC,MAAM,EAAE,SAAS,EAAE,OAAO,EAAE,OAAO,CAAC,CAAC;gBAErE,yEAAyE;gBACzE,KAAK,MAAM,KAAK,IAAI,MAAM,EAAE,CAAC;oBAC3B,MAAM,WAAW,GAAG,IAAA,uBAAW,EAAC,KAAK,CAAC,CAAC;oBACvC,IAAI,WAAW,GAAG,OAAO,EAAE,CAAC;wBAC1B,kCAAkC;wBAClC,MAAM,kBAAkB,GAAG,UAAU,CAAC,OAAO,CAAC,SAAS,CAAC,GAAG,CAAC,CAAC;wBAC7D,MAAM,mBAAmB,GAAG,UAAU,CAAC,KAAK,CAAC,kBAAkB,CAAC,CAAC;wBACjE,WAAW,CAAC,IAAI,CAAC,GAAG,IAAI,CAAC,cAAc,CAAC,KAAK,EAAE,OAAO,EAAE,OAAO,EAAE,mBAAmB,CAAC,CAAC,CAAC;oBACzF,CAAC;yBAAM,IAAI,KAAK,CAAC,IAAI,EAAE,EAAE,CAAC;wBACxB,WAAW,CAAC,IAAI,CAAC,KAAK,CAAC,IAAI,EAAE,CAAC,CAAC;oBACjC,CAAC;gBACH,CAAC;gBAED,OAAO,WAAW,CAAC;YACrB,CAAC;QACH,CAAC;QAED,8BAA8B;QAC9B,OAAO,IAAI,CAAC,iBAAiB,CAAC,IAAI,EAAE,OAAO,EAAE,OAAO,CAAC,CAAC;IACxD,CAAC;IAEO,WAAW,CACjB,MAAgB,EAChB,SAAiB,EACjB,OAAe,EAAE,2BAA2B;IAC5C,OAAe,CAAC,2BAA2B;;QAE3C,MAAM,MAAM,GAAa,EAAE,CAAC;QAC5B,IAAI,YAAY,GAAG,EAAE,CAAC;QAEtB,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,MAAM,CAAC,MAAM,EAAE,CAAC,EAAE,EAAE,CAAC;YACvC,MAAM,KAAK,GAAG,MAAM,CAAC,CAAC,CAAC,CAAC;YACxB,MAAM,KAAK,GAAG,CAAC,GAAG,MAAM,CAAC,MAAM,GAAG,CAAC,CAAC,CAAC,CAAC,KAAK,GAAG,SAAS,CAAC,CAAC,CAAC,KAAK,CAAC;YAChE,MAAM,QAAQ,GAAG,YAAY,GAAG,KAAK,CAAC;YACtC,MAAM,cAAc,GAAG,IAAA,uBAAW,EAAC,QAAQ,CAAC,CAAC;YAE7C,IAAI,cAAc,IAAI,OAAO,EAAE,CAAC;gBAC9B,YAAY,GAAG,QAAQ,CAAC;YAC1B,CAAC;iBAAM,CAAC;gBACN,IAAI,YAAY,EAAE,CAAC;oBACjB,MAAM,CAAC,IAAI,CAAC,YAAY,CAAC,CAAC;oBAC1B,2CAA2C;oBAC3C,8DAA8D;oBAC9D,MAAM,WAAW,GAAG,IAAI,CAAC,cAAc,CAAC,YAAY,EAAE,OAAO,CAAC,CAAC;oBAC/D,YAAY,GAAG,WAAW,GAAG,KAAK,CAAC;gBACrC,CAAC;qBAAM,CAAC;oBACN,iFAAiF;oBACjF,YAAY,GAAG,KAAK,CAAC;gBACvB,CAAC;YACH,CAAC;QACH,CAAC;QAED,IAAI,YAAY,EAAE,CAAC;YACjB,MAAM,CAAC,IAAI,CAAC,YAAY,CAAC,CAAC;QAC5B,CAAC;QAED,OAAO,MAAM,CAAC;IAChB,CAAC;IAED;;OAEG;IACK,cAAc,CAAC,IAAY,EAAE,aAAqB;QACxD,6CAA6C;QAC7C,IAAI,KAAK,GAAG,CAAC,CAAC;QACd,IAAI,GAAG,GAAG,IAAI,CAAC,MAAM,CAAC;QACtB,IAAI,SAAS,GAAG,EAAE,CAAC;QAEnB,OAAO,KAAK,GAAG,GAAG,EAAE,CAAC;YACnB,MAAM,GAAG,GAAG,IAAI,CAAC,KAAK,CAAC,CAAC,KAAK,GAAG,GAAG,CAAC,GAAG,CAAC,CAAC,CAAC;YAC1C,MAAM,SAAS,GAAG,IAAI,CAAC,KAAK,CAAC,GAAG,CAAC,CAAC;YAClC,MAAM,MAAM,GAAG,IAAA,uBAAW,EAAC,SAAS,CAAC,CAAC;YAEtC,IAAI,MAAM,IAAI,aAAa,EAAE,CAAC;gBAC5B,SAAS,GAAG,SAAS,CAAC;gBACtB,GAAG,GAAG,GAAG,CAAC;YACZ,CAAC;iBAAM,CAAC;gBACN,KAAK,GAAG,GAAG,GAAG,CAAC,CAAC;YAClB,CAAC;QACH,CAAC;QAED,OAAO,SAAS,IAAI,IAAI,CAAC,KAAK,CAAC,CAAC,IAAI,CAAC,KAAK,CAAC,IAAI,CAAC,MAAM,GAAG,GAAG,CAAC,CAAC,CAAC,CAAC,uBAAuB;IACzF,CAAC;IAEO,iBAAiB,CAAC,IAAY,EAAE,OAAe,EAAE,OAAe;QACtE,wEAAwE;QACxE,wEAAwE;QACxE,MAAM,MAAM,GAAa,EAAE,CAAC;QAC5B,IAAI,KAAK,GAAG,CAAC,CAAC;QAEd,OAAO,KAAK,GAAG,IAAI,CAAC,MAAM,EAAE,CAAC;YAC3B,MAAM,SAAS,GAAG,IAAI,CAAC,qBAAqB,CAAC,IAAI,CAAC,KAAK,CAAC,KAAK,CAAC,EAAE,OAAO,CAAC,CAAC;YACzE,IAAI,CAAC,SAAS;gBAAE,MAAM;YAEtB,MAAM,CAAC,IAAI,CAAC,SAAS,CAAC,CAAC;YACvB,MAAM,WAAW,GAAG,SAAS,CAAC,MAAM,CAAC;YAErC,mCAAmC;YACnC,MAAM,WAAW,GAAG,IAAI,CAAC,cAAc,CAAC,SAAS,EAAE,OAAO,CAAC,CAAC;YAC5D,KAAK,IAAI,WAAW,GAAG,WAAW,CAAC,MAAM,CAAC;YAE1C,IAAI,IAAI,CAAC,MAAM,GAAG,KAAK,GAAG,WAAW,CAAC,MAAM,EAAE,CAAC;gBAC7C,MAAM;YACR,CAAC;QACH,CAAC;QAED,OAAO,MAAM,CAAC;IAChB,CAAC;IAED;;OAEG;IACK,qBAAqB,CAAC,IAAY,EAAE,SAAiB;QAC3D,IAAI,IAAA,uBAAW,EAAC,IAAI,CAAC,IAAI,SAAS,EAAE,CAAC;YACnC,OAAO,IAAI,CAAC;QACd,CAAC;QAED,iDAAiD;QACjD,IAAI,KAAK,GAAG,CAAC,CAAC;QACd,IAAI,GAAG,GAAG,IAAI,CAAC,MAAM,CAAC;QACtB,IAAI,SAAS,GAAG,EAAE,CAAC;QAEnB,OAAO,KAAK,GAAG,GAAG,EAAE,CAAC;YACnB,MAAM,GAAG,GAAG,IAAI,CAAC,KAAK,CAAC,CAAC,KAAK,GAAG,GAAG,CAAC,GAAG,CAAC,CAAC,CAAC;YAC1C,MAAM,SAAS,GAAG,IAAI,CAAC,KAAK,CAAC,CAAC,EAAE,GAAG,CAAC,CAAC;YACrC,MAAM,MAAM,GAAG,IAAA,uBAAW,EAAC,SAAS,CAAC,CAAC;YAEtC,IAAI,MAAM,IAAI,SAAS,EAAE,CAAC;gBACxB,SAAS,GAAG,SAAS,CAAC;gBACtB,KAAK,GAAG,GAAG,GAAG,CAAC,CAAC;YAClB,CAAC;iBAAM,CAAC;gBACN,GAAG,GAAG,GAAG,CAAC;YACZ,CAAC;QACH,CAAC;QAED,OAAO,SAAS,IAAI,IAAI,CAAC,KAAK,CAAC,CAAC,EAAE,IAAI,CAAC,KAAK,CAAC,IAAI,CAAC,MAAM,GAAG,GAAG,CAAC,CAAC,CAAC,CAAC,kBAAkB;IACtF,CAAC;CACF;AA3ND,8DA2NC"}
1
+ {"version":3,"file":"recursive.js","sourceRoot":"","sources":["../../../../src/chunking/strategies/recursive.ts"],"names":[],"mappings":";;;AAAA,2EAA2E;AAC3E,+BAAoC;AAEpC,kDAAiD;AAEjD,MAAa,yBAAyB;IACpC,OAAO;QACL,OAAO,WAAW,CAAC;IACrB,CAAC;IAED,KAAK,CAAC,IAAY,EAAE,UAAkB,EAAE,OAAwB;QAC9D,uDAAuD;QACvD,MAAM,UAAU,GAAI,OAAe,CAAC,UAAU,IAAI,IAAI,CAAC;QACvD,MAAM,OAAO,GAAG,IAAI,CAAC,GAAG,CAAC,OAAO,CAAC,YAAY,IAAI,IAAI,EAAE,UAAU,CAAC,CAAC;QACnE,MAAM,OAAO,GAAG,IAAI,CAAC,GAAG,CAAC,OAAO,CAAC,OAAO,IAAI,GAAG,EAAE,IAAI,CAAC,KAAK,CAAC,OAAO,GAAG,GAAG,CAAC,CAAC,CAAC,CAAC,0CAA0C;QAEvH,iFAAiF;QACjF,MAAM,iBAAiB,GAAG;YACxB,MAAM,EAAG,YAAY;YACrB,IAAI,EAAK,aAAa;YACtB,IAAI,EAAK,WAAW;YACpB,IAAI,EAAK,WAAW;YACpB,IAAI,EAAK,WAAW;YACpB,IAAI,EAAK,SAAS;YAClB,IAAI,EAAK,SAAS;YAClB,GAAG,EAAM,OAAO;YAChB,EAAE,EAAO,YAAY;SACtB,CAAC;QAEF,MAAM,UAAU,GAAG,OAAO,CAAC,SAAS;YAClC,CAAC,CAAC,CAAC,KAAK,CAAC,OAAO,CAAC,OAAO,CAAC,SAAS,CAAC,CAAC,CAAC,CAAC,OAAO,CAAC,SAAS,CAAC,CAAC,CAAC,CAAC,OAAO,CAAC,SAAS,CAAC,CAAC;YAC9E,CAAC,CAAC,iBAAiB,CAAC;QAEtB,MAAM,MAAM,GAAG,IAAI,CAAC,cAAc,CAAC,IAAI,EAAE,OAAO,EAAE,OAAO,EAAE,UAAU,CAAC,CAAC;QAEvE,yCAAyC;QACzC,IAAI,SAAS,GAAG,CAAC,CAAC;QAClB,MAAM,MAAM,GAAY,EAAE,CAAC;QAE3B,MAAM,CAAC,OAAO,CAAC,CAAC,SAAS,EAAE,KAAK,EAAE,EAAE;YAClC,MAAM,OAAO,GAAG,SAAS,GAAG,SAAS,CAAC,MAAM,CAAC;YAE7C,MAAM,CAAC,IAAI,CAAC;gBACV,EAAE,EAAE,IAAA,SAAM,GAAE;gBACZ,IAAI,EAAE,SAAS;gBACf,QAAQ,EAAE;oBACR,UAAU;oBACV,UAAU,EAAE,KAAK;oBACjB,WAAW,EAAE,MAAM,CAAC,MAAM;oBAC1B,SAAS;oBACT,OAAO;oBACP,GAAG,OAAO,CAAC,QAAQ;iBACpB;aACF,CAAC,CAAC;YAEH,wCAAwC;YACxC,SAAS,GAAG,OAAO,GAAG,OAAO,CAAC;QAChC,CAAC,CAAC,CAAC;QAEH,OAAO,MAAM,CAAC;IAChB,CAAC;IAEO,cAAc,CACpB,IAAY,EACZ,OAAe,EAAE,2BAA2B;IAC5C,OAAe,EAAE,2BAA2B;IAC5C,UAAoB;QAEpB,MAAM,WAAW,GAAa,EAAE,CAAC;QAEjC,4DAA4D;QAC5D,MAAM,UAAU,GAAG,IAAA,uBAAW,EAAC,IAAI,CAAC,CAAC;QACrC,IAAI,UAAU,IAAI,OAAO,EAAE,CAAC;YAC1B,OAAO,IAAI,CAAC,IAAI,EAAE,CAAC,CAAC,CAAC,CAAC,IAAI,CAAC,IAAI,EAAE,CAAC,CAAC,CAAC,CAAC,EAAE,CAAC;QAC1C,CAAC;QAED,8BAA8B;QAC9B,KAAK,MAAM,SAAS,IAAI,UAAU,EAAE,CAAC;YACnC,IAAI,SAAS,KAAK,EAAE,EAAE,CAAC;gBACrB,uCAAuC;gBACvC,OAAO,IAAI,CAAC,iBAAiB,CAAC,IAAI,EAAE,OAAO,EAAE,OAAO,CAAC,CAAC;YACxD,CAAC;YAED,IAAI,IAAI,CAAC,QAAQ,CAAC,SAAS,CAAC,EAAE,CAAC;gBAC7B,MAAM,MAAM,GAAG,IAAI,CAAC,KAAK,CAAC,SAAS,CAAC,CAAC;gBACrC,MAAM,MAAM,GAAG,IAAI,CAAC,WAAW,CAAC,MAAM,EAAE,SAAS,EAAE,OAAO,EAAE,OAAO,CAAC,CAAC;gBAErE,yEAAyE;gBACzE,KAAK,MAAM,KAAK,IAAI,MAAM,EAAE,CAAC;oBAC3B,MAAM,WAAW,GAAG,IAAA,uBAAW,EAAC,KAAK,CAAC,CAAC;oBACvC,IAAI,WAAW,GAAG,OAAO,EAAE,CAAC;wBAC1B,kCAAkC;wBAClC,MAAM,kBAAkB,GAAG,UAAU,CAAC,OAAO,CAAC,SAAS,CAAC,GAAG,CAAC,CAAC;wBAC7D,MAAM,mBAAmB,GAAG,UAAU,CAAC,KAAK,CAAC,kBAAkB,CAAC,CAAC;wBACjE,WAAW,CAAC,IAAI,CAAC,GAAG,IAAI,CAAC,cAAc,CAAC,KAAK,EAAE,OAAO,EAAE,OAAO,EAAE,mBAAmB,CAAC,CAAC,CAAC;oBACzF,CAAC;yBAAM,IAAI,KAAK,CAAC,IAAI,EAAE,EAAE,CAAC;wBACxB,WAAW,CAAC,IAAI,CAAC,KAAK,CAAC,IAAI,EAAE,CAAC,CAAC;oBACjC,CAAC;gBACH,CAAC;gBAED,OAAO,WAAW,CAAC;YACrB,CAAC;QACH,CAAC;QAED,8BAA8B;QAC9B,OAAO,IAAI,CAAC,iBAAiB,CAAC,IAAI,EAAE,OAAO,EAAE,OAAO,CAAC,CAAC;IACxD,CAAC;IAEO,WAAW,CACjB,MAAgB,EAChB,SAAiB,EACjB,OAAe,EAAE,2BAA2B;IAC5C,OAAe,CAAC,2BAA2B;;QAE3C,MAAM,MAAM,GAAa,EAAE,CAAC;QAC5B,IAAI,YAAY,GAAG,EAAE,CAAC;QAEtB,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,MAAM,CAAC,MAAM,EAAE,CAAC,EAAE,EAAE,CAAC;YACvC,MAAM,KAAK,GAAG,MAAM,CAAC,CAAC,CAAC,CAAC;YACxB,MAAM,KAAK,GAAG,CAAC,GAAG,MAAM,CAAC,MAAM,GAAG,CAAC,CAAC,CAAC,CAAC,KAAK,GAAG,SAAS,CAAC,CAAC,CAAC,KAAK,CAAC;YAChE,MAAM,QAAQ,GAAG,YAAY,GAAG,KAAK,CAAC;YACtC,MAAM,cAAc,GAAG,IAAA,uBAAW,EAAC,QAAQ,CAAC,CAAC;YAE7C,IAAI,cAAc,IAAI,OAAO,EAAE,CAAC;gBAC9B,YAAY,GAAG,QAAQ,CAAC;YAC1B,CAAC;iBAAM,CAAC;gBACN,IAAI,YAAY,EAAE,CAAC;oBACjB,MAAM,CAAC,IAAI,CAAC,YAAY,CAAC,CAAC;oBAC1B,2CAA2C;oBAC3C,8DAA8D;oBAC9D,MAAM,WAAW,GAAG,IAAI,CAAC,cAAc,CAAC,YAAY,EAAE,OAAO,CAAC,CAAC;oBAC/D,YAAY,GAAG,WAAW,GAAG,KAAK,CAAC;gBACrC,CAAC;qBAAM,CAAC;oBACN,iFAAiF;oBACjF,YAAY,GAAG,KAAK,CAAC;gBACvB,CAAC;YACH,CAAC;QACH,CAAC;QAED,IAAI,YAAY,EAAE,CAAC;YACjB,MAAM,CAAC,IAAI,CAAC,YAAY,CAAC,CAAC;QAC5B,CAAC;QAED,OAAO,MAAM,CAAC;IAChB,CAAC;IAED;;OAEG;IACK,cAAc,CAAC,IAAY,EAAE,aAAqB;QACxD,IAAI,aAAa,KAAK,CAAC;YAAE,OAAO,EAAE,CAAC;QAEnC,6CAA6C;QAC7C,IAAI,KAAK,GAAG,CAAC,CAAC;QACd,IAAI,GAAG,GAAG,IAAI,CAAC,MAAM,CAAC;QACtB,IAAI,SAAS,GAAG,EAAE,CAAC;QAEnB,OAAO,KAAK,GAAG,GAAG,EAAE,CAAC;YACnB,MAAM,GAAG,GAAG,IAAI,CAAC,KAAK,CAAC,CAAC,KAAK,GAAG,GAAG,CAAC,GAAG,CAAC,CAAC,CAAC;YAC1C,MAAM,SAAS,GAAG,IAAI,CAAC,KAAK,CAAC,GAAG,CAAC,CAAC;YAClC,MAAM,MAAM,GAAG,IAAA,uBAAW,EAAC,SAAS,CAAC,CAAC;YAEtC,IAAI,MAAM,IAAI,aAAa,EAAE,CAAC;gBAC5B,SAAS,GAAG,SAAS,CAAC;gBACtB,GAAG,GAAG,GAAG,CAAC;YACZ,CAAC;iBAAM,CAAC;gBACN,KAAK,GAAG,GAAG,GAAG,CAAC,CAAC;YAClB,CAAC;QACH,CAAC;QAED,OAAO,SAAS,IAAI,IAAI,CAAC,KAAK,CAAC,CAAC,IAAI,CAAC,KAAK,CAAC,IAAI,CAAC,MAAM,GAAG,GAAG,CAAC,CAAC,CAAC,CAAC,uBAAuB;IACzF,CAAC;IAEO,iBAAiB,CAAC,IAAY,EAAE,OAAe,EAAE,OAAe;QACtE,wEAAwE;QACxE,wEAAwE;QACxE,MAAM,MAAM,GAAa,EAAE,CAAC;QAC5B,IAAI,KAAK,GAAG,CAAC,CAAC;QAEd,OAAO,KAAK,GAAG,IAAI,CAAC,MAAM,EAAE,CAAC;YAC3B,MAAM,SAAS,GAAG,IAAI,CAAC,qBAAqB,CAAC,IAAI,CAAC,KAAK,CAAC,KAAK,CAAC,EAAE,OAAO,CAAC,CAAC;YACzE,IAAI,CAAC,SAAS;gBAAE,MAAM;YAEtB,MAAM,CAAC,IAAI,CAAC,SAAS,CAAC,CAAC;YACvB,MAAM,WAAW,GAAG,SAAS,CAAC,MAAM,CAAC;YAErC,IAAI,KAAK,GAAG,WAAW,IAAI,IAAI,CAAC,MAAM,EAAE,CAAC;gBACvC,MAAM;YACR,CAAC;YAED,mCAAmC;YACnC,MAAM,WAAW,GAAG,IAAI,CAAC,cAAc,CAAC,SAAS,EAAE,OAAO,CAAC,CAAC;YAC5D,MAAM,SAAS,GAAG,KAAK,GAAG,WAAW,GAAG,WAAW,CAAC,MAAM,CAAC;YAC3D,IAAI,SAAS,IAAI,KAAK,EAAE,CAAC;gBACvB,MAAM;YACR,CAAC;YAED,KAAK,GAAG,SAAS,CAAC;YAElB,IAAI,IAAI,CAAC,MAAM,GAAG,KAAK,GAAG,WAAW,CAAC,MAAM,EAAE,CAAC;gBAC7C,MAAM;YACR,CAAC;QACH,CAAC;QAED,OAAO,MAAM,CAAC;IAChB,CAAC;IAED;;OAEG;IACK,qBAAqB,CAAC,IAAY,EAAE,SAAiB;QAC3D,IAAI,IAAA,uBAAW,EAAC,IAAI,CAAC,IAAI,SAAS,EAAE,CAAC;YACnC,OAAO,IAAI,CAAC;QACd,CAAC;QAED,iDAAiD;QACjD,IAAI,KAAK,GAAG,CAAC,CAAC;QACd,IAAI,GAAG,GAAG,IAAI,CAAC,MAAM,CAAC;QACtB,IAAI,SAAS,GAAG,EAAE,CAAC;QAEnB,OAAO,KAAK,GAAG,GAAG,EAAE,CAAC;YACnB,MAAM,GAAG,GAAG,IAAI,CAAC,KAAK,CAAC,CAAC,KAAK,GAAG,GAAG,CAAC,GAAG,CAAC,CAAC,CAAC;YAC1C,MAAM,SAAS,GAAG,IAAI,CAAC,KAAK,CAAC,CAAC,EAAE,GAAG,CAAC,CAAC;YACrC,MAAM,MAAM,GAAG,IAAA,uBAAW,EAAC,SAAS,CAAC,CAAC;YAEtC,IAAI,MAAM,IAAI,SAAS,EAAE,CAAC;gBACxB,SAAS,GAAG,SAAS,CAAC;gBACtB,KAAK,GAAG,GAAG,GAAG,CAAC,CAAC;YAClB,CAAC;iBAAM,CAAC;gBACN,GAAG,GAAG,GAAG,CAAC;YACZ,CAAC;QACH,CAAC;QAED,OAAO,SAAS,IAAI,IAAI,CAAC,KAAK,CAAC,CAAC,EAAE,IAAI,CAAC,KAAK,CAAC,IAAI,CAAC,MAAM,GAAG,GAAG,CAAC,CAAC,CAAC,CAAC,kBAAkB;IACtF,CAAC;CACF;AAtOD,8DAsOC"}
@@ -40,8 +40,8 @@ class SentenceChunkingStrategy {
40
40
  ...options.metadata,
41
41
  },
42
42
  });
43
- // Keep last N sentences for overlap
44
- const overlapSentences = currentChunk.slice(-overlap);
43
+ // Keep last N sentences for overlap. slice(-0) equals slice(0), so handle zero explicitly.
44
+ const overlapSentences = overlap > 0 ? currentChunk.slice(-overlap) : [];
45
45
  currentChunk = [...overlapSentences, sentence];
46
46
  currentTokens = (0, tokenizer_1.countTokens)(overlapSentences.join(' ')) + sentenceTokens;
47
47
  startChar = endChar - (overlapSentences.join(' ').length);
@@ -1 +1 @@
1
- {"version":3,"file":"sentence.js","sourceRoot":"","sources":["../../../../src/chunking/strategies/sentence.ts"],"names":[],"mappings":";;;AAAA,mCAAmC;AACnC,+BAAoC;AAEpC,kDAAiD;AAEjD,MAAa,wBAAwB;IACnC,OAAO;QACL,OAAO,UAAU,CAAC;IACpB,CAAC;IAED,KAAK,CAAC,IAAY,EAAE,UAAkB,EAAE,OAAwB;QAC9D,uDAAuD;QACvD,MAAM,UAAU,GAAI,OAAe,CAAC,UAAU,IAAI,IAAI,CAAC;QACvD,MAAM,OAAO,GAAG,IAAI,CAAC,GAAG,CAAC,OAAO,CAAC,YAAY,IAAI,IAAI,EAAE,UAAU,CAAC,CAAC;QACnE,MAAM,OAAO,GAAG,OAAO,CAAC,OAAO,IAAI,CAAC,CAAC,CAAC,iCAAiC;QAEvE,4BAA4B;QAC5B,MAAM,SAAS,GAAG,IAAI,CAAC,kBAAkB,CAAC,IAAI,CAAC,CAAC;QAEhD,MAAM,MAAM,GAAY,EAAE,CAAC;QAC3B,IAAI,YAAY,GAAa,EAAE,CAAC;QAChC,IAAI,aAAa,GAAG,CAAC,CAAC;QACtB,IAAI,UAAU,GAAG,CAAC,CAAC;QACnB,IAAI,SAAS,GAAG,CAAC,CAAC;QAElB,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,SAAS,CAAC,MAAM,EAAE,CAAC,EAAE,EAAE,CAAC;YAC1C,MAAM,QAAQ,GAAG,SAAS,CAAC,CAAC,CAAC,CAAC;YAC9B,MAAM,cAAc,GAAG,IAAA,uBAAW,EAAC,QAAQ,CAAC,CAAC;YAE7C,+FAA+F;YAC/F,IAAI,aAAa,GAAG,cAAc,GAAG,OAAO,IAAI,YAAY,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;gBACxE,MAAM,SAAS,GAAG,YAAY,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC,IAAI,EAAE,CAAC;gBAChD,MAAM,OAAO,GAAG,SAAS,GAAG,SAAS,CAAC,MAAM,CAAC;gBAE7C,MAAM,CAAC,IAAI,CAAC;oBACV,EAAE,EAAE,IAAA,SAAM,GAAE;oBACZ,IAAI,EAAE,SAAS;oBACf,QAAQ,EAAE;wBACR,UAAU;wBACV,UAAU;wBACV,WAAW,EAAE,CAAC,EAAE,wBAAwB;wBACxC,SAAS;wBACT,OAAO;wBACP,SAAS,EAAE,YAAY,CAAC,MAAM;wBAC9B,GAAG,OAAO,CAAC,QAAQ;qBACpB;iBACF,CAAC,CAAC;gBAEH,oCAAoC;gBACpC,MAAM,gBAAgB,GAAG,YAAY,CAAC,KAAK,CAAC,CAAC,OAAO,CAAC,CAAC;gBACtD,YAAY,GAAG,CAAC,GAAG,gBAAgB,EAAE,QAAQ,CAAC,CAAC;gBAC/C,aAAa,GAAG,IAAA,uBAAW,EAAC,gBAAgB,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC,GAAG,cAAc,CAAC;gBACzE,SAAS,GAAG,OAAO,GAAG,CAAC,gBAAgB,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC,MAAM,CAAC,CAAC;gBAC1D,UAAU,EAAE,CAAC;YACf,CAAC;iBAAM,CAAC;gBACN,YAAY,CAAC,IAAI,CAAC,QAAQ,CAAC,CAAC;gBAC5B,aAAa,IAAI,cAAc,CAAC;YAClC,CAAC;QACH,CAAC;QAED,uCAAuC;QACvC,IAAI,YAAY,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;YAC5B,MAAM,SAAS,GAAG,YAAY,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC,IAAI,EAAE,CAAC;YAChD,MAAM,OAAO,GAAG,SAAS,GAAG,SAAS,CAAC,MAAM,CAAC;YAE7C,MAAM,CAAC,IAAI,CAAC;gBACV,EAAE,EAAE,IAAA,SAAM,GAAE;gBACZ,IAAI,EAAE,SAAS;gBACf,QAAQ,EAAE;oBACR,UAAU;oBACV,UAAU;oBACV,WAAW,EAAE,CAAC;oBACd,SAAS;oBACT,OAAO;oBACP,SAAS,EAAE,YAAY,CAAC,MAAM;oBAC9B,GAAG,OAAO,CAAC,QAAQ;iBACpB;aACF,CAAC,CAAC;QACL,CAAC;QAED,qBAAqB;QACrB,MAAM,CAAC,OAAO,CAAC,KAAK,CAAC,EAAE;YACrB,KAAK,CAAC,QAAQ,CAAC,WAAW,GAAG,MAAM,CAAC,MAAM,CAAC;QAC7C,CAAC,CAAC,CAAC;QAEH,OAAO,MAAM,CAAC;IAChB,CAAC;IAEO,kBAAkB,CAAC,IAAY;QACrC,gEAAgE;QAChE,+BAA+B;QAC/B,MAAM,SAAS,GAAa,EAAE,CAAC;QAE/B,qDAAqD;QACrD,IAAI,UAAU,GAAG,IAAI;aAClB,OAAO,CAAC,OAAO,EAAE,IAAI,CAAC;aACtB,OAAO,CAAC,QAAQ,EAAE,KAAK,CAAC;aACxB,OAAO,CAAC,OAAO,EAAE,IAAI,CAAC;aACtB,OAAO,CAAC,OAAO,EAAE,IAAI,CAAC;aACtB,OAAO,CAAC,OAAO,EAAE,IAAI,CAAC;aACtB,OAAO,CAAC,QAAQ,EAAE,KAAK,CAAC;aACxB,OAAO,CAAC,SAAS,EAAE,IAAI,CAAC;aACxB,OAAO,CAAC,SAAS,EAAE,IAAI,CAAC,CAAC;QAE5B,+BAA+B;QAC/B,MAAM,KAAK,GAAG,UAAU,CAAC,KAAK,CAAC,iBAAiB,CAAC,CAAC;QAElD,IAAI,eAAe,GAAG,EAAE,CAAC;QAEzB,KAAK,MAAM,IAAI,IAAI,KAAK,EAAE,CAAC;YACzB,IAAI,eAAe,CAAC,IAAI,CAAC,IAAI,CAAC,EAAE,CAAC;gBAC/B,eAAe,IAAI,IAAI,CAAC,IAAI,EAAE,CAAC;gBAC/B,IAAI,eAAe,CAAC,IAAI,EAAE,EAAE,CAAC;oBAC3B,SAAS,CAAC,IAAI,CAAC,eAAe,CAAC,IAAI,EAAE,CAAC,CAAC;gBACzC,CAAC;gBACD,eAAe,GAAG,EAAE,CAAC;YACvB,CAAC;iBAAM,CAAC;gBACN,eAAe,IAAI,IAAI,CAAC;YAC1B,CAAC;QACH,CAAC;QAED,4BAA4B;QAC5B,IAAI,eAAe,CAAC,IAAI,EAAE,EAAE,CAAC;YAC3B,SAAS,CAAC,IAAI,CAAC,eAAe,CAAC,IAAI,EAAE,CAAC,CAAC;QACzC,CAAC;QAED,OAAO,SAAS,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,MAAM,GAAG,CAAC,CAAC,CAAC;IAC7C,CAAC;CACF;AA3HD,4DA2HC"}
1
+ {"version":3,"file":"sentence.js","sourceRoot":"","sources":["../../../../src/chunking/strategies/sentence.ts"],"names":[],"mappings":";;;AAAA,mCAAmC;AACnC,+BAAoC;AAEpC,kDAAiD;AAEjD,MAAa,wBAAwB;IACnC,OAAO;QACL,OAAO,UAAU,CAAC;IACpB,CAAC;IAED,KAAK,CAAC,IAAY,EAAE,UAAkB,EAAE,OAAwB;QAC9D,uDAAuD;QACvD,MAAM,UAAU,GAAI,OAAe,CAAC,UAAU,IAAI,IAAI,CAAC;QACvD,MAAM,OAAO,GAAG,IAAI,CAAC,GAAG,CAAC,OAAO,CAAC,YAAY,IAAI,IAAI,EAAE,UAAU,CAAC,CAAC;QACnE,MAAM,OAAO,GAAG,OAAO,CAAC,OAAO,IAAI,CAAC,CAAC,CAAC,iCAAiC;QAEvE,4BAA4B;QAC5B,MAAM,SAAS,GAAG,IAAI,CAAC,kBAAkB,CAAC,IAAI,CAAC,CAAC;QAEhD,MAAM,MAAM,GAAY,EAAE,CAAC;QAC3B,IAAI,YAAY,GAAa,EAAE,CAAC;QAChC,IAAI,aAAa,GAAG,CAAC,CAAC;QACtB,IAAI,UAAU,GAAG,CAAC,CAAC;QACnB,IAAI,SAAS,GAAG,CAAC,CAAC;QAElB,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,SAAS,CAAC,MAAM,EAAE,CAAC,EAAE,EAAE,CAAC;YAC1C,MAAM,QAAQ,GAAG,SAAS,CAAC,CAAC,CAAC,CAAC;YAC9B,MAAM,cAAc,GAAG,IAAA,uBAAW,EAAC,QAAQ,CAAC,CAAC;YAE7C,+FAA+F;YAC/F,IAAI,aAAa,GAAG,cAAc,GAAG,OAAO,IAAI,YAAY,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;gBACxE,MAAM,SAAS,GAAG,YAAY,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC,IAAI,EAAE,CAAC;gBAChD,MAAM,OAAO,GAAG,SAAS,GAAG,SAAS,CAAC,MAAM,CAAC;gBAE7C,MAAM,CAAC,IAAI,CAAC;oBACV,EAAE,EAAE,IAAA,SAAM,GAAE;oBACZ,IAAI,EAAE,SAAS;oBACf,QAAQ,EAAE;wBACR,UAAU;wBACV,UAAU;wBACV,WAAW,EAAE,CAAC,EAAE,wBAAwB;wBACxC,SAAS;wBACT,OAAO;wBACP,SAAS,EAAE,YAAY,CAAC,MAAM;wBAC9B,GAAG,OAAO,CAAC,QAAQ;qBACpB;iBACF,CAAC,CAAC;gBAEH,2FAA2F;gBAC3F,MAAM,gBAAgB,GAAG,OAAO,GAAG,CAAC,CAAC,CAAC,CAAC,YAAY,CAAC,KAAK,CAAC,CAAC,OAAO,CAAC,CAAC,CAAC,CAAC,EAAE,CAAC;gBACzE,YAAY,GAAG,CAAC,GAAG,gBAAgB,EAAE,QAAQ,CAAC,CAAC;gBAC/C,aAAa,GAAG,IAAA,uBAAW,EAAC,gBAAgB,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC,GAAG,cAAc,CAAC;gBACzE,SAAS,GAAG,OAAO,GAAG,CAAC,gBAAgB,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC,MAAM,CAAC,CAAC;gBAC1D,UAAU,EAAE,CAAC;YACf,CAAC;iBAAM,CAAC;gBACN,YAAY,CAAC,IAAI,CAAC,QAAQ,CAAC,CAAC;gBAC5B,aAAa,IAAI,cAAc,CAAC;YAClC,CAAC;QACH,CAAC;QAED,uCAAuC;QACvC,IAAI,YAAY,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;YAC5B,MAAM,SAAS,GAAG,YAAY,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC,IAAI,EAAE,CAAC;YAChD,MAAM,OAAO,GAAG,SAAS,GAAG,SAAS,CAAC,MAAM,CAAC;YAE7C,MAAM,CAAC,IAAI,CAAC;gBACV,EAAE,EAAE,IAAA,SAAM,GAAE;gBACZ,IAAI,EAAE,SAAS;gBACf,QAAQ,EAAE;oBACR,UAAU;oBACV,UAAU;oBACV,WAAW,EAAE,CAAC;oBACd,SAAS;oBACT,OAAO;oBACP,SAAS,EAAE,YAAY,CAAC,MAAM;oBAC9B,GAAG,OAAO,CAAC,QAAQ;iBACpB;aACF,CAAC,CAAC;QACL,CAAC;QAED,qBAAqB;QACrB,MAAM,CAAC,OAAO,CAAC,KAAK,CAAC,EAAE;YACrB,KAAK,CAAC,QAAQ,CAAC,WAAW,GAAG,MAAM,CAAC,MAAM,CAAC;QAC7C,CAAC,CAAC,CAAC;QAEH,OAAO,MAAM,CAAC;IAChB,CAAC;IAEO,kBAAkB,CAAC,IAAY;QACrC,gEAAgE;QAChE,+BAA+B;QAC/B,MAAM,SAAS,GAAa,EAAE,CAAC;QAE/B,qDAAqD;QACrD,IAAI,UAAU,GAAG,IAAI;aAClB,OAAO,CAAC,OAAO,EAAE,IAAI,CAAC;aACtB,OAAO,CAAC,QAAQ,EAAE,KAAK,CAAC;aACxB,OAAO,CAAC,OAAO,EAAE,IAAI,CAAC;aACtB,OAAO,CAAC,OAAO,EAAE,IAAI,CAAC;aACtB,OAAO,CAAC,OAAO,EAAE,IAAI,CAAC;aACtB,OAAO,CAAC,QAAQ,EAAE,KAAK,CAAC;aACxB,OAAO,CAAC,SAAS,EAAE,IAAI,CAAC;aACxB,OAAO,CAAC,SAAS,EAAE,IAAI,CAAC,CAAC;QAE5B,+BAA+B;QAC/B,MAAM,KAAK,GAAG,UAAU,CAAC,KAAK,CAAC,iBAAiB,CAAC,CAAC;QAElD,IAAI,eAAe,GAAG,EAAE,CAAC;QAEzB,KAAK,MAAM,IAAI,IAAI,KAAK,EAAE,CAAC;YACzB,IAAI,eAAe,CAAC,IAAI,CAAC,IAAI,CAAC,EAAE,CAAC;gBAC/B,eAAe,IAAI,IAAI,CAAC,IAAI,EAAE,CAAC;gBAC/B,IAAI,eAAe,CAAC,IAAI,EAAE,EAAE,CAAC;oBAC3B,SAAS,CAAC,IAAI,CAAC,eAAe,CAAC,IAAI,EAAE,CAAC,CAAC;gBACzC,CAAC;gBACD,eAAe,GAAG,EAAE,CAAC;YACvB,CAAC;iBAAM,CAAC;gBACN,eAAe,IAAI,IAAI,CAAC;YAC1B,CAAC;QACH,CAAC;QAED,4BAA4B;QAC5B,IAAI,eAAe,CAAC,IAAI,EAAE,EAAE,CAAC;YAC3B,SAAS,CAAC,IAAI,CAAC,eAAe,CAAC,IAAI,EAAE,CAAC,CAAC;QACzC,CAAC;QAED,OAAO,SAAS,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,MAAM,GAAG,CAAC,CAAC,CAAC;IAC7C,CAAC;CACF;AA3HD,4DA2HC"}
@@ -1 +1 @@
1
- {"version":3,"file":"tokenizer.d.ts","sourceRoot":"","sources":["../../../../src/chunking/utils/tokenizer.ts"],"names":[],"mappings":"AAoBA;;GAEG;AACH,wBAAgB,WAAW,CAAC,IAAI,EAAE,MAAM,GAAG,MAAM,CAahD;AAED;;;GAGG;AACH,wBAAgB,cAAc,CAAC,IAAI,EAAE,MAAM,GAAG,MAAM,CAMnD"}
1
+ {"version":3,"file":"tokenizer.d.ts","sourceRoot":"","sources":["../../../../src/chunking/utils/tokenizer.ts"],"names":[],"mappings":"AAoBA;;GAEG;AACH,wBAAgB,WAAW,CAAC,IAAI,EAAE,MAAM,GAAG,MAAM,CAchD;AAED;;;GAGG;AACH,wBAAgB,cAAc,CAAC,IAAI,EAAE,MAAM,GAAG,MAAM,CAMnD"}
@@ -15,7 +15,7 @@ function getEmbeddingEncoding() {
15
15
  if (!cachedEncoding) {
16
16
  // Use cl100k_base encoding which is used by text-embedding-3 models
17
17
  // This is compatible with GPT-4 and text-embedding-3 models
18
- cachedEncoding = (0, tiktoken_1.encoding_for_model)('gpt-4');
18
+ cachedEncoding = (0, tiktoken_1.encoding_for_model)('text-embedding-3-small');
19
19
  }
20
20
  return cachedEncoding;
21
21
  }
@@ -31,6 +31,7 @@ function countTokens(text) {
31
31
  return encoding.encode(text).length;
32
32
  }
33
33
  catch (error) {
34
+ console.error("using fallback tokenizer, error:", error);
34
35
  // Fallback to approximation if tiktoken fails
35
36
  // Rough approximation: 1 token ≈ 4 characters for English text
36
37
  return Math.ceil(text.length / 4);
@@ -1 +1 @@
1
- {"version":3,"file":"tokenizer.js","sourceRoot":"","sources":["../../../../src/chunking/utils/tokenizer.ts"],"names":[],"mappings":";;AAuBA,kCAaC;AAMD,wCAMC;AAhDD,iDAAiD;AACjD,4CAA4C;AAC5C,uCAA8C;AAE9C,wCAAwC;AACxC,IAAI,cAAc,GAAiD,IAAI,CAAC;AAExE;;;GAGG;AACH,SAAS,oBAAoB;IAC3B,IAAI,CAAC,cAAc,EAAE,CAAC;QACpB,oEAAoE;QACpE,4DAA4D;QAC5D,cAAc,GAAG,IAAA,6BAAkB,EAAC,OAAO,CAAC,CAAC;IAC/C,CAAC;IACD,OAAO,cAAc,CAAC;AACxB,CAAC;AAED;;GAEG;AACH,SAAgB,WAAW,CAAC,IAAY;IACtC,IAAI,CAAC,IAAI,IAAI,IAAI,CAAC,MAAM,KAAK,CAAC,EAAE,CAAC;QAC/B,OAAO,CAAC,CAAC;IACX,CAAC;IAED,IAAI,CAAC;QACH,MAAM,QAAQ,GAAG,oBAAoB,EAAE,CAAC;QACxC,OAAO,QAAQ,CAAC,MAAM,CAAC,IAAI,CAAC,CAAC,MAAM,CAAC;IACtC,CAAC;IAAC,OAAO,KAAK,EAAE,CAAC;QACf,8CAA8C;QAC9C,+DAA+D;QAC/D,OAAO,IAAI,CAAC,IAAI,CAAC,IAAI,CAAC,MAAM,GAAG,CAAC,CAAC,CAAC;IACpC,CAAC;AACH,CAAC;AAED;;;GAGG;AACH,SAAgB,cAAc,CAAC,IAAY;IACzC,IAAI,CAAC,IAAI,IAAI,IAAI,CAAC,MAAM,KAAK,CAAC,EAAE,CAAC;QAC/B,OAAO,CAAC,CAAC;IACX,CAAC;IACD,+DAA+D;IAC/D,OAAO,IAAI,CAAC,IAAI,CAAC,IAAI,CAAC,MAAM,GAAG,CAAC,CAAC,CAAC;AACpC,CAAC"}
1
+ {"version":3,"file":"tokenizer.js","sourceRoot":"","sources":["../../../../src/chunking/utils/tokenizer.ts"],"names":[],"mappings":";;AAuBA,kCAcC;AAMD,wCAMC;AAjDD,iDAAiD;AACjD,4CAA4C;AAC5C,uCAA8C;AAE9C,wCAAwC;AACxC,IAAI,cAAc,GAAiD,IAAI,CAAC;AAExE;;;GAGG;AACH,SAAS,oBAAoB;IAC3B,IAAI,CAAC,cAAc,EAAE,CAAC;QACpB,oEAAoE;QACpE,4DAA4D;QAC5D,cAAc,GAAG,IAAA,6BAAkB,EAAC,wBAAwB,CAAC,CAAC;IAChE,CAAC;IACD,OAAO,cAAc,CAAC;AACxB,CAAC;AAED;;GAEG;AACH,SAAgB,WAAW,CAAC,IAAY;IACtC,IAAI,CAAC,IAAI,IAAI,IAAI,CAAC,MAAM,KAAK,CAAC,EAAE,CAAC;QAC/B,OAAO,CAAC,CAAC;IACX,CAAC;IAED,IAAI,CAAC;QACH,MAAM,QAAQ,GAAG,oBAAoB,EAAE,CAAC;QACxC,OAAO,QAAQ,CAAC,MAAM,CAAC,IAAI,CAAC,CAAC,MAAM,CAAC;IACtC,CAAC;IAAC,OAAO,KAAK,EAAE,CAAC;QACf,OAAO,CAAC,KAAK,CAAC,kCAAkC,EAAE,KAAK,CAAC,CAAC;QACzD,8CAA8C;QAC9C,+DAA+D;QAC/D,OAAO,IAAI,CAAC,IAAI,CAAC,IAAI,CAAC,MAAM,GAAG,CAAC,CAAC,CAAC;IACpC,CAAC;AACH,CAAC;AAED;;;GAGG;AACH,SAAgB,cAAc,CAAC,IAAY;IACzC,IAAI,CAAC,IAAI,IAAI,IAAI,CAAC,MAAM,KAAK,CAAC,EAAE,CAAC;QAC/B,OAAO,CAAC,CAAC;IACX,CAAC;IACD,+DAA+D;IAC/D,OAAO,IAAI,CAAC,IAAI,CAAC,IAAI,CAAC,MAAM,GAAG,CAAC,CAAC,CAAC;AACpC,CAAC"}
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "voctar",
3
- "version": "0.1.2",
3
+ "version": "0.1.3",
4
4
  "description": "TypeScript library with RAG primitives for vector embeddings, chunking, storing and retrieval.",
5
5
  "main": "dist/index.js",
6
6
  "types": "dist/index.d.ts",