@vivantel/virage-core 0.2.5 → 0.2.6
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/strategies/chunk/markdown-headers.d.ts.map +1 -1
- package/dist/strategies/chunk/markdown-headers.js +5 -2
- package/dist/strategies/chunk/markdown-headers.js.map +1 -1
- package/dist/strategies/chunk/semantic.d.ts.map +1 -1
- package/dist/strategies/chunk/semantic.js +7 -1
- package/dist/strategies/chunk/semantic.js.map +1 -1
- package/dist/strategies/chunk/token.d.ts.map +1 -1
- package/dist/strategies/chunk/token.js +14 -4
- package/dist/strategies/chunk/token.js.map +1 -1
- package/package.json +1 -1
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"markdown-headers.d.ts","sourceRoot":"","sources":["../../../src/strategies/chunk/markdown-headers.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,aAAa,EAAS,MAAM,2BAA2B,CAAC;AAIjE,MAAM,WAAW,sBAAsB;IACrC,YAAY,CAAC,EAAE,MAAM,CAAC;IACtB,YAAY,CAAC,EAAE,MAAM,CAAC;CACvB;AAED,wBAAgB,uBAAuB,CACrC,OAAO,GAAE,sBAA2B,GACnC,aAAa,
|
|
1
|
+
{"version":3,"file":"markdown-headers.d.ts","sourceRoot":"","sources":["../../../src/strategies/chunk/markdown-headers.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,aAAa,EAAS,MAAM,2BAA2B,CAAC;AAIjE,MAAM,WAAW,sBAAsB;IACrC,YAAY,CAAC,EAAE,MAAM,CAAC;IACtB,YAAY,CAAC,EAAE,MAAM,CAAC;CACvB;AAED,wBAAgB,uBAAuB,CACrC,OAAO,GAAE,sBAA2B,GACnC,aAAa,CAuGf"}
|
|
@@ -6,7 +6,8 @@ export function markdownHeadersStrategy(options = {}) {
|
|
|
6
6
|
name: "markdown-headers",
|
|
7
7
|
async chunk(text, filePath) {
|
|
8
8
|
const chunks = [];
|
|
9
|
-
|
|
9
|
+
// Normalise line endings so CRLF files don't leave \r on header text/content.
|
|
10
|
+
const lines = text.split("\n").map((l) => l.replace(/\r$/, ""));
|
|
10
11
|
let currentChunk = [];
|
|
11
12
|
let currentHeader = "";
|
|
12
13
|
let currentHeaderLevel = 0;
|
|
@@ -48,12 +49,14 @@ export function markdownHeadersStrategy(options = {}) {
|
|
|
48
49
|
strategy: this.name,
|
|
49
50
|
header: currentHeader,
|
|
50
51
|
header_level: currentHeaderLevel,
|
|
52
|
+
source_file: filePath,
|
|
51
53
|
truncated: true,
|
|
52
54
|
},
|
|
53
55
|
sourceFile: filePath || "unknown",
|
|
54
56
|
commitHash: "",
|
|
55
57
|
});
|
|
56
|
-
|
|
58
|
+
// Keep the header line so the continuation chunk is self-contained.
|
|
59
|
+
currentChunk = currentChunk.length > 0 ? [currentChunk[0]] : [];
|
|
57
60
|
}
|
|
58
61
|
}
|
|
59
62
|
// Last chunk
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"markdown-headers.js","sourceRoot":"","sources":["../../../src/strategies/chunk/markdown-headers.ts"],"names":[],"mappings":"AAEA,OAAO,EAAE,0BAA0B,EAAE,MAAM,sBAAsB,CAAC;AAOlE,MAAM,UAAU,uBAAuB,CACrC,UAAkC,EAAE;IAEpC,MAAM,YAAY,GAAG,OAAO,CAAC,YAAY,IAAI,GAAG,CAAC;IACjD,MAAM,YAAY,GAAG,OAAO,CAAC,YAAY,IAAI,IAAI,CAAC;IAElD,OAAO;QACL,IAAI,EAAE,kBAAkB;QAExB,KAAK,CAAC,KAAK,CAAC,IAAY,EAAE,QAAiB;YACzC,MAAM,MAAM,GAAY,EAAE,CAAC;YAC3B,MAAM,KAAK,GAAG,IAAI,CAAC,KAAK,CAAC,IAAI,CAAC,CAAC;
|
|
1
|
+
{"version":3,"file":"markdown-headers.js","sourceRoot":"","sources":["../../../src/strategies/chunk/markdown-headers.ts"],"names":[],"mappings":"AAEA,OAAO,EAAE,0BAA0B,EAAE,MAAM,sBAAsB,CAAC;AAOlE,MAAM,UAAU,uBAAuB,CACrC,UAAkC,EAAE;IAEpC,MAAM,YAAY,GAAG,OAAO,CAAC,YAAY,IAAI,GAAG,CAAC;IACjD,MAAM,YAAY,GAAG,OAAO,CAAC,YAAY,IAAI,IAAI,CAAC;IAElD,OAAO;QACL,IAAI,EAAE,kBAAkB;QAExB,KAAK,CAAC,KAAK,CAAC,IAAY,EAAE,QAAiB;YACzC,MAAM,MAAM,GAAY,EAAE,CAAC;YAC3B,8EAA8E;YAC9E,MAAM,KAAK,GAAG,IAAI,CAAC,KAAK,CAAC,IAAI,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,OAAO,CAAC,KAAK,EAAE,EAAE,CAAC,CAAC,CAAC;YAEhE,IAAI,YAAY,GAAa,EAAE,CAAC;YAChC,IAAI,aAAa,GAAG,EAAE,CAAC;YACvB,IAAI,kBAAkB,GAAG,CAAC,CAAC;YAE3B,KAAK,MAAM,IAAI,IAAI,KAAK,EAAE,CAAC;gBACzB,MAAM,WAAW,GAAG,IAAI,CAAC,KAAK,CAAC,mBAAmB,CAAC,CAAC;gBAEpD,IAAI,WAAW,EAAE,CAAC;oBAChB,mCAAmC;oBACnC,IAAI,YAAY,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;wBAC5B,MAAM,OAAO,GAAG,YAAY,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC,IAAI,EAAE,CAAC;wBAC/C,IAAI,OAAO,CAAC,MAAM,IAAI,YAAY,EAAE,CAAC;4BACnC,MAAM,CAAC,IAAI,CAAC;gCACV,OAAO;gCACP,QAAQ,EAAE;oCACR,QAAQ,EAAE,IAAI,CAAC,IAAI;oCACnB,MAAM,EAAE,aAAa;oCACrB,YAAY,EAAE,kBAAkB;oCAChC,WAAW,EAAE,QAAQ;iCACtB;gCACD,UAAU,EAAE,QAAQ,IAAI,SAAS;gCACjC,UAAU,EAAE,EAAE;6BACf,CAAC,CAAC;wBACL,CAAC;oBACH,CAAC;oBAED,kBAAkB;oBAClB,kBAAkB,GAAG,WAAW,CAAC,CAAC,CAAC,CAAC,MAAM,CAAC;oBAC3C,aAAa,GAAG,WAAW,CAAC,CAAC,CAAC,CAAC;oBAC/B,YAAY,GAAG,CAAC,IAAI,CAAC,CAAC;gBACxB,CAAC;qBAAM,CAAC;oBACN,YAAY,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;gBAC1B,CAAC;gBAED,wCAAwC;gBACxC,MAAM,WAAW,GAAG,YAAY,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC,MAAM,CAAC;gBACnD,IAAI,WAAW,GAAG,YAAY,IAAI,YAAY,CAAC,MAAM,GAAG,EAAE,EAAE,CAAC;oBAC3D,MAAM,OAAO,GAAG,YAAY,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC,IAAI,EAAE,CAAC;oBAC/C,MAAM,CAAC,IAAI,CAAC;wBACV,OAAO;wBACP,QAAQ,EAAE;4BACR,QAAQ,EAAE,IAAI,CAAC,IAAI;4BACnB,MAAM,EAAE,aAAa;4BACrB,YAAY,EAAE,kBAAkB;4BAChC,WAAW,EAAE,QAAQ;4BACrB,SAAS,EAAE,IAAI;yBAChB;wBACD,UAAU,EAAE,QAAQ,IAAI,SAAS;wBACjC,UAAU,EAAE,EAAE;qBACf,CAAC,CAAC;oBACH,oEAAoE;oBACpE,YAAY,GAAG,YAAY,CAAC,MAAM,GAAG,CAAC,CAAC,CAAC,CAAC,CAAC,YAAY,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,EAAE,CAAC;gBAClE,CAAC;YACH,CAAC;YAED,aAAa;YACb,IAAI,YAAY,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;gBAC5B,MAAM,OAAO,GAAG,YAAY,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC,IAAI,EAAE,CAAC;gBAC/C,IAAI,OAAO,CAAC,MAAM,IAAI,YAAY,EAAE,CAAC;oBACnC,MAAM,CAAC,IAAI,CAAC;wBACV,OAAO;wBACP,QAAQ,EAAE;4BACR,QAAQ,EAAE,IAAI,CAAC,IAAI;4BACnB,MAAM,EAAE,aAAa;4BACrB,YAAY,EAAE,kBAAkB;4BAChC,WAAW,EAAE,QAAQ;4BACrB,OAAO,EAAE,IAAI;yBACd;wBACD,UAAU,EAAE,QAAQ,IAAI,SAAS;wBACjC,UAAU,EAAE,EAAE;qBACf,CAAC,CAAC;gBACL,CAAC;YACH,CAAC;YAED,OAAO,MAAM,CAAC;QAChB,CAAC;QAED,eAAe,CAAC,IAAY,EAAE,SAAkB;YAC9C,MAAM,WAAW,GAAG,IAAI,CAAC,KAAK,CAAC,oBAAoB,CAAC,CAAC;YACrD,OAAO;gBACL,QAAQ,EAAE,IAAI,CAAC,IAAI;gBACnB,WAAW,EAAE,CAAC,CAAC,WAAW;gBAC1B,YAAY,EAAE,WAAW,EAAE,CAAC,CAAC,CAAC;gBAC9B,UAAU,EAAE,IAAI,CAAC,KAAK,CAAC,IAAI,CAAC,CAAC,MAAM;aACpC,CAAC;QACJ,CAAC;QAED,iBAAiB,CAAC,MAAe;YAC/B,OAAO,0BAA0B,CAAC,MAAM,CAAC,CAAC;QAC5C,CAAC;KACF,CAAC;AACJ,CAAC"}
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"semantic.d.ts","sourceRoot":"","sources":["../../../src/strategies/chunk/semantic.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,aAAa,EAAS,MAAM,2BAA2B,CAAC;AAIjE,MAAM,WAAW,uBAAuB;IACtC,QAAQ,CAAC,EAAE,MAAM,CAAC;IAClB,QAAQ,CAAC,EAAE,MAAM,CAAC;CACnB;AAED,wBAAgB,gBAAgB,CAC9B,OAAO,GAAE,uBAA4B,GACpC,aAAa,
|
|
1
|
+
{"version":3,"file":"semantic.d.ts","sourceRoot":"","sources":["../../../src/strategies/chunk/semantic.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,aAAa,EAAS,MAAM,2BAA2B,CAAC;AAIjE,MAAM,WAAW,uBAAuB;IACtC,QAAQ,CAAC,EAAE,MAAM,CAAC;IAClB,QAAQ,CAAC,EAAE,MAAM,CAAC;CACnB;AAED,wBAAgB,gBAAgB,CAC9B,OAAO,GAAE,uBAA4B,GACpC,aAAa,CA+Ef"}
|
|
@@ -32,7 +32,7 @@ export function semanticStrategy(options = {}) {
|
|
|
32
32
|
currentChunk.push(sentence);
|
|
33
33
|
currentSize += sentenceSize;
|
|
34
34
|
}
|
|
35
|
-
// Last chunk
|
|
35
|
+
// Last chunk: append to previous if too small, otherwise save as its own chunk.
|
|
36
36
|
if (currentChunk.length > 0) {
|
|
37
37
|
const content = currentChunk.join(" ").trim();
|
|
38
38
|
if (content.length >= minChars) {
|
|
@@ -48,6 +48,12 @@ export function semanticStrategy(options = {}) {
|
|
|
48
48
|
commitHash: "",
|
|
49
49
|
});
|
|
50
50
|
}
|
|
51
|
+
else if (chunks.length > 0) {
|
|
52
|
+
// Merge into the previous chunk rather than silently dropping it.
|
|
53
|
+
const prev = chunks[chunks.length - 1];
|
|
54
|
+
prev.content = `${prev.content} ${content}`.trim();
|
|
55
|
+
prev.metadata.is_last = true;
|
|
56
|
+
}
|
|
51
57
|
}
|
|
52
58
|
return chunks;
|
|
53
59
|
},
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"semantic.js","sourceRoot":"","sources":["../../../src/strategies/chunk/semantic.ts"],"names":[],"mappings":"AAEA,OAAO,EAAE,0BAA0B,EAAE,MAAM,sBAAsB,CAAC;AAOlE,MAAM,UAAU,gBAAgB,CAC9B,UAAmC,EAAE;IAErC,MAAM,QAAQ,GAAG,OAAO,CAAC,QAAQ,IAAI,IAAI,CAAC;IAC1C,MAAM,QAAQ,GAAG,OAAO,CAAC,QAAQ,IAAI,GAAG,CAAC;IAEzC,OAAO;QACL,IAAI,EAAE,UAAU;QAEhB,KAAK,CAAC,KAAK,CAAC,IAAY,EAAE,QAAiB;YACzC,MAAM,MAAM,GAAY,EAAE,CAAC;YAE3B,uCAAuC;YACvC,MAAM,SAAS,GAAG,IAAI,CAAC,KAAK,CAAC,eAAe,CAAC,CAAC;YAE9C,IAAI,YAAY,GAAa,EAAE,CAAC;YAChC,IAAI,WAAW,GAAG,CAAC,CAAC;YAEpB,KAAK,MAAM,QAAQ,IAAI,SAAS,EAAE,CAAC;gBACjC,MAAM,YAAY,GAAG,QAAQ,CAAC,MAAM,CAAC;gBAErC,IAAI,WAAW,GAAG,YAAY,GAAG,QAAQ,IAAI,YAAY,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;oBACrE,MAAM,OAAO,GAAG,YAAY,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC,IAAI,EAAE,CAAC;oBAC9C,IAAI,OAAO,CAAC,MAAM,IAAI,QAAQ,EAAE,CAAC;wBAC/B,MAAM,CAAC,IAAI,CAAC;4BACV,OAAO;4BACP,QAAQ,EAAE;gCACR,QAAQ,EAAE,IAAI,CAAC,IAAI;gCACnB,cAAc,EAAE,YAAY,CAAC,MAAM;gCACnC,WAAW,EAAE,QAAQ;6BACtB;4BACD,UAAU,EAAE,QAAQ,IAAI,SAAS;4BACjC,UAAU,EAAE,EAAE;yBACf,CAAC,CAAC;oBACL,CAAC;oBACD,YAAY,GAAG,EAAE,CAAC;oBAClB,WAAW,GAAG,CAAC,CAAC;gBAClB,CAAC;gBAED,YAAY,CAAC,IAAI,CAAC,QAAQ,CAAC,CAAC;gBAC5B,WAAW,IAAI,YAAY,CAAC;YAC9B,CAAC;YAED,
|
|
1
|
+
{"version":3,"file":"semantic.js","sourceRoot":"","sources":["../../../src/strategies/chunk/semantic.ts"],"names":[],"mappings":"AAEA,OAAO,EAAE,0BAA0B,EAAE,MAAM,sBAAsB,CAAC;AAOlE,MAAM,UAAU,gBAAgB,CAC9B,UAAmC,EAAE;IAErC,MAAM,QAAQ,GAAG,OAAO,CAAC,QAAQ,IAAI,IAAI,CAAC;IAC1C,MAAM,QAAQ,GAAG,OAAO,CAAC,QAAQ,IAAI,GAAG,CAAC;IAEzC,OAAO;QACL,IAAI,EAAE,UAAU;QAEhB,KAAK,CAAC,KAAK,CAAC,IAAY,EAAE,QAAiB;YACzC,MAAM,MAAM,GAAY,EAAE,CAAC;YAE3B,uCAAuC;YACvC,MAAM,SAAS,GAAG,IAAI,CAAC,KAAK,CAAC,eAAe,CAAC,CAAC;YAE9C,IAAI,YAAY,GAAa,EAAE,CAAC;YAChC,IAAI,WAAW,GAAG,CAAC,CAAC;YAEpB,KAAK,MAAM,QAAQ,IAAI,SAAS,EAAE,CAAC;gBACjC,MAAM,YAAY,GAAG,QAAQ,CAAC,MAAM,CAAC;gBAErC,IAAI,WAAW,GAAG,YAAY,GAAG,QAAQ,IAAI,YAAY,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;oBACrE,MAAM,OAAO,GAAG,YAAY,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC,IAAI,EAAE,CAAC;oBAC9C,IAAI,OAAO,CAAC,MAAM,IAAI,QAAQ,EAAE,CAAC;wBAC/B,MAAM,CAAC,IAAI,CAAC;4BACV,OAAO;4BACP,QAAQ,EAAE;gCACR,QAAQ,EAAE,IAAI,CAAC,IAAI;gCACnB,cAAc,EAAE,YAAY,CAAC,MAAM;gCACnC,WAAW,EAAE,QAAQ;6BACtB;4BACD,UAAU,EAAE,QAAQ,IAAI,SAAS;4BACjC,UAAU,EAAE,EAAE;yBACf,CAAC,CAAC;oBACL,CAAC;oBACD,YAAY,GAAG,EAAE,CAAC;oBAClB,WAAW,GAAG,CAAC,CAAC;gBAClB,CAAC;gBAED,YAAY,CAAC,IAAI,CAAC,QAAQ,CAAC,CAAC;gBAC5B,WAAW,IAAI,YAAY,CAAC;YAC9B,CAAC;YAED,gFAAgF;YAChF,IAAI,YAAY,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;gBAC5B,MAAM,OAAO,GAAG,YAAY,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC,IAAI,EAAE,CAAC;gBAC9C,IAAI,OAAO,CAAC,MAAM,IAAI,QAAQ,EAAE,CAAC;oBAC/B,MAAM,CAAC,IAAI,CAAC;wBACV,OAAO;wBACP,QAAQ,EAAE;4BACR,QAAQ,EAAE,IAAI,CAAC,IAAI;4BACnB,cAAc,EAAE,YAAY,CAAC,MAAM;4BACnC,WAAW,EAAE,QAAQ;4BACrB,OAAO,EAAE,IAAI;yBACd;wBACD,UAAU,EAAE,QAAQ,IAAI,SAAS;wBACjC,UAAU,EAAE,EAAE;qBACf,CAAC,CAAC;gBACL,CAAC;qBAAM,IAAI,MAAM,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;oBAC7B,kEAAkE;oBAClE,MAAM,IAAI,GAAG,MAAM,CAAC,MAAM,CAAC,MAAM,GAAG,CAAC,CAAC,CAAC;oBACvC,IAAI,CAAC,OAAO,GAAG,GAAG,IAAI,CAAC,OAAO,IAAI,OAAO,EAAE,CAAC,IAAI,EAAE,CAAC;oBACnD,IAAI,CAAC,QAAQ,CAAC,OAAO,GAAG,IAAI,CAAC;gBAC/B,CAAC;YACH,CAAC;YAED,OAAO,MAAM,CAAC;QAChB,CAAC;QAED,eAAe,CAAC,IAAY,EAAE,SAAkB;YAC9C,OAAO;gBACL,QAAQ,EAAE,IAAI,CAAC,IAAI;gBACnB,cAAc,EAAE,IAAI,CAAC,KAAK,CAAC,QAAQ,CAAC,CAAC,MAAM;gBAC3C,UAAU,EAAE,IAAI,CAAC,MAAM;aACxB,CAAC;QACJ,CAAC;QAED,iBAAiB,CAAC,MAAe;YAC/B,OAAO,0BAA0B,CAAC,MAAM,CAAC,CAAC;QAC5C,CAAC;KACF,CAAC;AACJ,CAAC"}
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"token.d.ts","sourceRoot":"","sources":["../../../src/strategies/chunk/token.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,aAAa,EAAS,MAAM,2BAA2B,CAAC;AAIjE,MAAM,WAAW,oBAAoB;IACnC,SAAS,CAAC,EAAE,MAAM,CAAC;IACnB,OAAO,CAAC,EAAE,MAAM,CAAC;CAClB;AAED;;;;GAIG;AACH,wBAAgB,aAAa,CAC3B,OAAO,GAAE,oBAAyB,GACjC,aAAa,
|
|
1
|
+
{"version":3,"file":"token.d.ts","sourceRoot":"","sources":["../../../src/strategies/chunk/token.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,aAAa,EAAS,MAAM,2BAA2B,CAAC;AAIjE,MAAM,WAAW,oBAAoB;IACnC,SAAS,CAAC,EAAE,MAAM,CAAC;IACnB,OAAO,CAAC,EAAE,MAAM,CAAC;CAClB;AAED;;;;GAIG;AACH,wBAAgB,aAAa,CAC3B,OAAO,GAAE,oBAAyB,GACjC,aAAa,CAuEf"}
|
|
@@ -9,7 +9,10 @@ export function tokenStrategy(options = {}) {
|
|
|
9
9
|
const overlap = options.overlap ?? 50;
|
|
10
10
|
const charsPerToken = 4;
|
|
11
11
|
const maxChars = maxTokens * charsPerToken;
|
|
12
|
-
|
|
12
|
+
// Clamp overlap so it never equals or exceeds maxChars, guaranteeing forward progress.
|
|
13
|
+
const safeOverlap = Math.min(overlap, maxTokens - 1);
|
|
14
|
+
const overlapChars = safeOverlap * charsPerToken;
|
|
15
|
+
const stepChars = maxChars - overlapChars; // minimum advance per iteration
|
|
13
16
|
return {
|
|
14
17
|
name: `token-${maxTokens}`,
|
|
15
18
|
async chunk(text, filePath) {
|
|
@@ -17,12 +20,16 @@ export function tokenStrategy(options = {}) {
|
|
|
17
20
|
let start = 0;
|
|
18
21
|
while (start < text.length) {
|
|
19
22
|
let end = Math.min(start + maxChars, text.length);
|
|
20
|
-
//
|
|
23
|
+
// Only snap to a sentence/line boundary when it falls in the second half
|
|
24
|
+
// of the window. Searching from the full end position can land on a break
|
|
25
|
+
// point just past `start` (e.g. first newline inside a template literal),
|
|
26
|
+
// which produces tiny chunks and triggers the one-char crawl loop.
|
|
21
27
|
if (end < text.length) {
|
|
28
|
+
const minBreak = start + Math.floor(maxChars / 2);
|
|
22
29
|
const lastPeriod = text.lastIndexOf(".", end);
|
|
23
30
|
const lastNewline = text.lastIndexOf("\n", end);
|
|
24
31
|
const breakPoint = Math.max(lastPeriod, lastNewline);
|
|
25
|
-
if (breakPoint
|
|
32
|
+
if (breakPoint >= minBreak) {
|
|
26
33
|
end = breakPoint + 1;
|
|
27
34
|
}
|
|
28
35
|
}
|
|
@@ -41,7 +48,10 @@ export function tokenStrategy(options = {}) {
|
|
|
41
48
|
commitHash: "", // Will be filled by caller
|
|
42
49
|
});
|
|
43
50
|
}
|
|
44
|
-
|
|
51
|
+
// Advance by overlap window, but never less than stepChars to prevent
|
|
52
|
+
// the one-char crawl when the boundary lands too close to start.
|
|
53
|
+
const nextStart = end - overlapChars;
|
|
54
|
+
start = nextStart > start ? nextStart : start + stepChars;
|
|
45
55
|
}
|
|
46
56
|
return chunks;
|
|
47
57
|
},
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"token.js","sourceRoot":"","sources":["../../../src/strategies/chunk/token.ts"],"names":[],"mappings":"AAEA,OAAO,EAAE,0BAA0B,EAAE,MAAM,sBAAsB,CAAC;AAOlE;;;;GAIG;AACH,MAAM,UAAU,aAAa,CAC3B,UAAgC,EAAE;IAElC,MAAM,SAAS,GAAG,OAAO,CAAC,SAAS,IAAI,GAAG,CAAC;IAC3C,MAAM,OAAO,GAAG,OAAO,CAAC,OAAO,IAAI,EAAE,CAAC;IACtC,MAAM,aAAa,GAAG,CAAC,CAAC;IACxB,MAAM,QAAQ,GAAG,SAAS,GAAG,aAAa,CAAC;IAC3C,MAAM,
|
|
1
|
+
{"version":3,"file":"token.js","sourceRoot":"","sources":["../../../src/strategies/chunk/token.ts"],"names":[],"mappings":"AAEA,OAAO,EAAE,0BAA0B,EAAE,MAAM,sBAAsB,CAAC;AAOlE;;;;GAIG;AACH,MAAM,UAAU,aAAa,CAC3B,UAAgC,EAAE;IAElC,MAAM,SAAS,GAAG,OAAO,CAAC,SAAS,IAAI,GAAG,CAAC;IAC3C,MAAM,OAAO,GAAG,OAAO,CAAC,OAAO,IAAI,EAAE,CAAC;IACtC,MAAM,aAAa,GAAG,CAAC,CAAC;IACxB,MAAM,QAAQ,GAAG,SAAS,GAAG,aAAa,CAAC;IAC3C,uFAAuF;IACvF,MAAM,WAAW,GAAG,IAAI,CAAC,GAAG,CAAC,OAAO,EAAE,SAAS,GAAG,CAAC,CAAC,CAAC;IACrD,MAAM,YAAY,GAAG,WAAW,GAAG,aAAa,CAAC;IACjD,MAAM,SAAS,GAAG,QAAQ,GAAG,YAAY,CAAC,CAAC,gCAAgC;IAE3E,OAAO;QACL,IAAI,EAAE,SAAS,SAAS,EAAE;QAE1B,KAAK,CAAC,KAAK,CAAC,IAAY,EAAE,QAAiB;YACzC,MAAM,MAAM,GAAY,EAAE,CAAC;YAC3B,IAAI,KAAK,GAAG,CAAC,CAAC;YAEd,OAAO,KAAK,GAAG,IAAI,CAAC,MAAM,EAAE,CAAC;gBAC3B,IAAI,GAAG,GAAG,IAAI,CAAC,GAAG,CAAC,KAAK,GAAG,QAAQ,EAAE,IAAI,CAAC,MAAM,CAAC,CAAC;gBAElD,yEAAyE;gBACzE,0EAA0E;gBAC1E,0EAA0E;gBAC1E,mEAAmE;gBACnE,IAAI,GAAG,GAAG,IAAI,CAAC,MAAM,EAAE,CAAC;oBACtB,MAAM,QAAQ,GAAG,KAAK,GAAG,IAAI,CAAC,KAAK,CAAC,QAAQ,GAAG,CAAC,CAAC,CAAC;oBAClD,MAAM,UAAU,GAAG,IAAI,CAAC,WAAW,CAAC,GAAG,EAAE,GAAG,CAAC,CAAC;oBAC9C,MAAM,WAAW,GAAG,IAAI,CAAC,WAAW,CAAC,IAAI,EAAE,GAAG,CAAC,CAAC;oBAChD,MAAM,UAAU,GAAG,IAAI,CAAC,GAAG,CAAC,UAAU,EAAE,WAAW,CAAC,CAAC;oBACrD,IAAI,UAAU,IAAI,QAAQ,EAAE,CAAC;wBAC3B,GAAG,GAAG,UAAU,GAAG,CAAC,CAAC;oBACvB,CAAC;gBACH,CAAC;gBAED,MAAM,OAAO,GAAG,IAAI,CAAC,KAAK,CAAC,KAAK,EAAE,GAAG,CAAC,CAAC,IAAI,EAAE,CAAC;gBAC9C,IAAI,OAAO,EAAE,CAAC;oBACZ,MAAM,CAAC,IAAI,CAAC;wBACV,OAAO;wBACP,QAAQ,EAAE;4BACR,QAAQ,EAAE,IAAI,CAAC,IAAI;4BACnB,WAAW,EAAE,MAAM,CAAC,MAAM;4BAC1B,WAAW,EAAE,QAAQ;4BACrB,UAAU,EAAE,KAAK;4BACjB,QAAQ,EAAE,GAAG;yBACd;wBACD,UAAU,EAAE,QAAQ,IAAI,SAAS;wBACjC,UAAU,EAAE,EAAE,EAAE,2BAA2B;qBAC5C,CAAC,CAAC;gBACL,CAAC;gBAED,sEAAsE;gBACtE,iEAAiE;gBACjE,MAAM,SAAS,GAAG,GAAG,GAAG,YAAY,CAAC;gBACrC,KAAK,GAAG,SAAS,GAAG,KAAK,CAAC,CAAC,CAAC,SAAS,CAAC,CAAC,CAAC,KAAK,GAAG,SAAS,CAAC;YAC5D,CAAC;YAED,OAAO,MAAM,CAAC;QAChB,CAAC;QAED,eAAe,CAAC,IAAY,EAAE,SAAkB;YAC9C,OAAO;gBACL,QAAQ,EAAE,IAAI,CAAC,IAAI;gBACnB,UAAU,EAAE,IAAI,CAAC,MAAM;gBACvB,gBAAgB,EAAE,IAAI,CAAC,IAAI,CAAC,IAAI,CAAC,MAAM,GAAG,aAAa,CAAC;aACzD,CAAC;QACJ,CAAC;QAED,iBAAiB,CAAC,MAAe;YAC/B,OAAO,0BAA0B,CAAC,MAAM,CAAC,CAAC;QAC5C,CAAC;KACF,CAAC;AACJ,CAAC"}
|