@vivantel/virage-chunker-ce-ast 0.1.0 → 0.1.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/ast-walker.d.ts +2 -1
- package/dist/ast-walker.d.ts.map +1 -1
- package/dist/ast-walker.js +1 -0
- package/dist/ast-walker.js.map +1 -1
- package/dist/chunker.d.ts +36 -12
- package/dist/chunker.d.ts.map +1 -1
- package/dist/chunker.js +220 -78
- package/dist/chunker.js.map +1 -1
- package/dist/factory.d.ts +56 -0
- package/dist/factory.d.ts.map +1 -0
- package/dist/factory.js +69 -0
- package/dist/factory.js.map +1 -0
- package/dist/index.d.ts +4 -2
- package/dist/index.d.ts.map +1 -1
- package/dist/index.js +1 -0
- package/dist/index.js.map +1 -1
- package/dist/types.d.ts +93 -9
- package/dist/types.d.ts.map +1 -1
- package/package.json +10 -5
package/dist/ast-walker.d.ts
CHANGED
|
@@ -1,6 +1,7 @@
|
|
|
1
|
-
import type { DocNode, DocNodeAttrs } from "./types.js";
|
|
1
|
+
import type { DocNode, DocNodeAttrs, DocNodeType } from "./types.js";
|
|
2
2
|
export interface TextSegment {
|
|
3
3
|
text: string;
|
|
4
|
+
nodeType: DocNodeType;
|
|
4
5
|
attrs: DocNodeAttrs;
|
|
5
6
|
/** Ancestor heading texts at the time this segment was emitted. */
|
|
6
7
|
breadcrumb: string[];
|
package/dist/ast-walker.d.ts.map
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"ast-walker.d.ts","sourceRoot":"","sources":["../src/ast-walker.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,OAAO,EAAE,YAAY,EAAE,MAAM,YAAY,CAAC;
|
|
1
|
+
{"version":3,"file":"ast-walker.d.ts","sourceRoot":"","sources":["../src/ast-walker.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,OAAO,EAAE,YAAY,EAAE,WAAW,EAAE,MAAM,YAAY,CAAC;AAErE,MAAM,WAAW,WAAW;IAC1B,IAAI,EAAE,MAAM,CAAC;IACb,QAAQ,EAAE,WAAW,CAAC;IACtB,KAAK,EAAE,YAAY,CAAC;IACpB,mEAAmE;IACnE,UAAU,EAAE,MAAM,EAAE,CAAC;CACtB;AAED;;;GAGG;AACH,wBAAgB,WAAW,CAAC,IAAI,EAAE,OAAO,GAAG,WAAW,EAAE,CAkCxD"}
|
package/dist/ast-walker.js
CHANGED
package/dist/ast-walker.js.map
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"ast-walker.js","sourceRoot":"","sources":["../src/ast-walker.ts"],"names":[],"mappings":"
|
|
1
|
+
{"version":3,"file":"ast-walker.js","sourceRoot":"","sources":["../src/ast-walker.ts"],"names":[],"mappings":"AAUA;;;GAGG;AACH,MAAM,UAAU,WAAW,CAAC,IAAa;IACvC,MAAM,QAAQ,GAAkB,EAAE,CAAC;IACnC,MAAM,UAAU,GAAa,EAAE,CAAC;IAEhC,SAAS,KAAK,CAAC,IAAa;QAC1B,IAAI,IAAI,CAAC,IAAI,KAAK,SAAS,IAAI,IAAI,CAAC,IAAI,EAAE,CAAC;YACzC,MAAM,KAAK,GAAG,CAAC,IAAI,CAAC,KAAK,CAAC,YAAY,IAAI,CAAC,CAAC,GAAG,CAAC,CAAC;YACjD,sEAAsE;YACtE,UAAU,CAAC,MAAM,CAAC,KAAK,EAAE,UAAU,CAAC,MAAM,GAAG,KAAK,EAAE,IAAI,CAAC,IAAI,CAAC,CAAC;QACjE,CAAC;QAED,iEAAiE;QACjE,MAAM,UAAU,GACd,IAAI,CAAC,IAAI,IAAI,IAAI;YACjB,IAAI,CAAC,IAAI,KAAK,SAAS;YACvB,IAAI,CAAC,IAAI,KAAK,OAAO;YACrB,IAAI,CAAC,IAAI,KAAK,MAAM,CAAC;QAEvB,IAAI,UAAU,IAAI,IAAI,CAAC,IAAI,EAAE,CAAC;YAC5B,QAAQ,CAAC,IAAI,CAAC;gBACZ,IAAI,EAAE,IAAI,CAAC,IAAI;gBACf,QAAQ,EAAE,IAAI,CAAC,IAAI;gBACnB,KAAK,EAAE,IAAI,CAAC,KAAK;gBACjB,UAAU,EAAE,CAAC,GAAG,UAAU,CAAC;aAC5B,CAAC,CAAC;QACL,CAAC;QAED,KAAK,MAAM,KAAK,IAAI,IAAI,CAAC,QAAQ,IAAI,EAAE,EAAE,CAAC;YACxC,KAAK,CAAC,KAAK,CAAC,CAAC;QACf,CAAC;IACH,CAAC;IAED,KAAK,CAAC,IAAI,CAAC,CAAC;IACZ,OAAO,QAAQ,CAAC;AAClB,CAAC"}
|
package/dist/chunker.d.ts
CHANGED
|
@@ -1,27 +1,51 @@
|
|
|
1
|
-
import type { DocNode,
|
|
1
|
+
import type { DocNode, ArtifactSet } from "./types.js";
|
|
2
2
|
export interface WalkOptions {
|
|
3
3
|
sourceFile: string;
|
|
4
4
|
sourceFormat: string;
|
|
5
5
|
commitHash: string;
|
|
6
6
|
strategy: string;
|
|
7
|
+
/** Maximum tokens per chunk window (default: 512). */
|
|
7
8
|
maxTokens?: number;
|
|
9
|
+
/** Minimum tokens before merging a trailing window into its predecessor (default: maxTokens / 4). */
|
|
8
10
|
minTokens?: number;
|
|
11
|
+
/**
|
|
12
|
+
* Sliding-window overlap as a fraction 0–1 (default: 0).
|
|
13
|
+
* An overlap of 0.2 means each new window reuses the last 20 % of the previous
|
|
14
|
+
* window's content, producing overlapping SearchRepresentations that share context.
|
|
15
|
+
*/
|
|
16
|
+
overlap?: number;
|
|
17
|
+
/**
|
|
18
|
+
* Paragraphs (text segments) to prepend/append from adjacent windows into
|
|
19
|
+
* FinalAnswerChunk.paddedContent only. Does NOT affect anchorText or preview.
|
|
20
|
+
*/
|
|
21
|
+
boundaryPadding?: {
|
|
22
|
+
before?: number;
|
|
23
|
+
after?: number;
|
|
24
|
+
};
|
|
25
|
+
/**
|
|
26
|
+
* When true, segments that exceed maxTokens are recursively split on character
|
|
27
|
+
* boundaries before windowing, rather than hard-cut with content loss (default: false).
|
|
28
|
+
*/
|
|
29
|
+
recursive?: boolean;
|
|
30
|
+
/**
|
|
31
|
+
* When true, the effective flush threshold for `code` and `table-cell` segments
|
|
32
|
+
* is halved, producing smaller chunks for dense technical content (default: false).
|
|
33
|
+
*/
|
|
34
|
+
adaptiveSize?: boolean;
|
|
9
35
|
fileHash?: string;
|
|
10
36
|
fileSizeBytes?: number;
|
|
11
37
|
fileModifiedAt?: string;
|
|
12
38
|
}
|
|
13
|
-
export interface ChunkResult {
|
|
14
|
-
content: string;
|
|
15
|
-
metadata: ChunkMeta;
|
|
16
|
-
sourceFile: string;
|
|
17
|
-
commitHash: string;
|
|
18
|
-
contentHash?: string;
|
|
19
|
-
}
|
|
20
39
|
/**
|
|
21
|
-
* Walk a ViDoc AST and produce
|
|
40
|
+
* Walk a ViDoc AST and produce one ArtifactSet per logical window.
|
|
41
|
+
*
|
|
42
|
+
* Each window is split at paragraph/segment boundaries when the accumulated
|
|
43
|
+
* token count reaches maxTokens. A section boundary (breadcrumb change) also
|
|
44
|
+
* flushes the current window. Level 2 modifiers (overlap, boundaryPadding,
|
|
45
|
+
* recursive, adaptiveSize) refine this behaviour.
|
|
22
46
|
*
|
|
23
|
-
*
|
|
24
|
-
*
|
|
47
|
+
* finalAnswerChunk.content includes a heading prefix derived from the window's
|
|
48
|
+
* breadcrumb so the LLM always has heading context.
|
|
25
49
|
*/
|
|
26
|
-
export declare function walkToChunks(root: DocNode, opts: WalkOptions):
|
|
50
|
+
export declare function walkToChunks(root: DocNode, opts: WalkOptions): ArtifactSet[];
|
|
27
51
|
//# sourceMappingURL=chunker.d.ts.map
|
package/dist/chunker.d.ts.map
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"chunker.d.ts","sourceRoot":"","sources":["../src/chunker.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,OAAO,EAAE,
|
|
1
|
+
{"version":3,"file":"chunker.d.ts","sourceRoot":"","sources":["../src/chunker.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,OAAO,EAAE,WAAW,EAAyB,MAAM,YAAY,CAAC;AAK9E,MAAM,WAAW,WAAW;IAC1B,UAAU,EAAE,MAAM,CAAC;IACnB,YAAY,EAAE,MAAM,CAAC;IACrB,UAAU,EAAE,MAAM,CAAC;IACnB,QAAQ,EAAE,MAAM,CAAC;IACjB,sDAAsD;IACtD,SAAS,CAAC,EAAE,MAAM,CAAC;IACnB,qGAAqG;IACrG,SAAS,CAAC,EAAE,MAAM,CAAC;IACnB;;;;OAIG;IACH,OAAO,CAAC,EAAE,MAAM,CAAC;IACjB;;;OAGG;IACH,eAAe,CAAC,EAAE;QAChB,MAAM,CAAC,EAAE,MAAM,CAAC;QAChB,KAAK,CAAC,EAAE,MAAM,CAAC;KAChB,CAAC;IACF;;;OAGG;IACH,SAAS,CAAC,EAAE,OAAO,CAAC;IACpB;;;OAGG;IACH,YAAY,CAAC,EAAE,OAAO,CAAC;IACvB,QAAQ,CAAC,EAAE,MAAM,CAAC;IAClB,aAAa,CAAC,EAAE,MAAM,CAAC;IACvB,cAAc,CAAC,EAAE,MAAM,CAAC;CACzB;AA0ED;;;;;;;;;;GAUG;AACH,wBAAgB,YAAY,CAAC,IAAI,EAAE,OAAO,EAAE,IAAI,EAAE,WAAW,GAAG,WAAW,EAAE,CAiO5E"}
|
package/dist/chunker.js
CHANGED
|
@@ -1,127 +1,269 @@
|
|
|
1
1
|
import { walkDocNode } from "./ast-walker.js";
|
|
2
2
|
import { extractOutline } from "./outline.js";
|
|
3
3
|
const CHARS_PER_TOKEN = 4;
|
|
4
|
+
const PREVIEW_CHARS = 250;
|
|
4
5
|
function estimateTokens(text) {
|
|
5
6
|
return Math.ceil(text.length / CHARS_PER_TOKEN);
|
|
6
7
|
}
|
|
8
|
+
function makePreview(content) {
|
|
9
|
+
if (content.length <= PREVIEW_CHARS)
|
|
10
|
+
return content;
|
|
11
|
+
const cut = content.slice(0, PREVIEW_CHARS);
|
|
12
|
+
const lastSpace = cut.lastIndexOf(" ");
|
|
13
|
+
return lastSpace > 0 ? cut.slice(0, lastSpace) : cut;
|
|
14
|
+
}
|
|
15
|
+
function makeAnchorText(breadcrumb, rawContent) {
|
|
16
|
+
const prefix = breadcrumb.length > 0 ? breadcrumb.join(" › ") + ". " : "";
|
|
17
|
+
const dotIdx = rawContent.search(/[.!?]\s/);
|
|
18
|
+
const firstSentence = dotIdx > 0 ? rawContent.slice(0, dotIdx + 1) : rawContent.slice(0, 150);
|
|
19
|
+
return (prefix + firstSentence).slice(0, 250);
|
|
20
|
+
}
|
|
21
|
+
function makeSparseTerms(text) {
|
|
22
|
+
return text
|
|
23
|
+
.toLowerCase()
|
|
24
|
+
.split(/\W+/)
|
|
25
|
+
.filter((t) => t.length > 2);
|
|
26
|
+
}
|
|
27
|
+
function sameBreadcrumb(a, b) {
|
|
28
|
+
return a.length === b.length && a.every((v, i) => v === b[i]);
|
|
29
|
+
}
|
|
7
30
|
/**
|
|
8
|
-
*
|
|
31
|
+
* Pre-split text segments that individually exceed maxTokens into maxTokens-sized
|
|
32
|
+
* pieces. Used when recursive=true to avoid content loss on hard-cut.
|
|
33
|
+
*/
|
|
34
|
+
function splitOversized(seg, maxTokens) {
|
|
35
|
+
const maxChars = maxTokens * CHARS_PER_TOKEN;
|
|
36
|
+
if (seg.text.length <= maxChars)
|
|
37
|
+
return [seg];
|
|
38
|
+
const parts = [];
|
|
39
|
+
let pos = 0;
|
|
40
|
+
while (pos < seg.text.length) {
|
|
41
|
+
const text = seg.text.slice(pos, pos + maxChars);
|
|
42
|
+
parts.push({
|
|
43
|
+
...seg,
|
|
44
|
+
text,
|
|
45
|
+
attrs: {
|
|
46
|
+
...seg.attrs,
|
|
47
|
+
byteStart: seg.attrs.byteStart + pos,
|
|
48
|
+
byteEnd: seg.attrs.byteStart + pos + text.length,
|
|
49
|
+
},
|
|
50
|
+
});
|
|
51
|
+
pos += maxChars;
|
|
52
|
+
}
|
|
53
|
+
return parts;
|
|
54
|
+
}
|
|
55
|
+
/**
|
|
56
|
+
* Walk a ViDoc AST and produce one ArtifactSet per logical window.
|
|
57
|
+
*
|
|
58
|
+
* Each window is split at paragraph/segment boundaries when the accumulated
|
|
59
|
+
* token count reaches maxTokens. A section boundary (breadcrumb change) also
|
|
60
|
+
* flushes the current window. Level 2 modifiers (overlap, boundaryPadding,
|
|
61
|
+
* recursive, adaptiveSize) refine this behaviour.
|
|
9
62
|
*
|
|
10
|
-
*
|
|
11
|
-
*
|
|
63
|
+
* finalAnswerChunk.content includes a heading prefix derived from the window's
|
|
64
|
+
* breadcrumb so the LLM always has heading context.
|
|
12
65
|
*/
|
|
13
66
|
export function walkToChunks(root, opts) {
|
|
14
67
|
const maxTokens = opts.maxTokens ?? 512;
|
|
15
68
|
const minTokens = opts.minTokens ?? Math.floor(maxTokens / 4);
|
|
69
|
+
const overlap = Math.min(Math.max(opts.overlap ?? 0, 0), 0.9);
|
|
70
|
+
const padBefore = opts.boundaryPadding?.before ?? 0;
|
|
71
|
+
const padAfter = opts.boundaryPadding?.after ?? 0;
|
|
72
|
+
const recursive = opts.recursive ?? false;
|
|
73
|
+
const adaptiveSize = opts.adaptiveSize ?? false;
|
|
16
74
|
const documentOutline = extractOutline(root);
|
|
17
|
-
const
|
|
18
|
-
if (
|
|
75
|
+
const rawSegments = walkDocNode(root);
|
|
76
|
+
if (rawSegments.length === 0)
|
|
19
77
|
return [];
|
|
78
|
+
// Pre-split oversized segments in recursive mode to avoid content loss.
|
|
79
|
+
const segments = recursive
|
|
80
|
+
? rawSegments.flatMap((s) => splitOversized(s, maxTokens))
|
|
81
|
+
: rawSegments;
|
|
82
|
+
// ── Build windows ──────────────────────────────────────────────────────────
|
|
20
83
|
const windows = [];
|
|
21
|
-
|
|
22
|
-
|
|
23
|
-
|
|
24
|
-
|
|
25
|
-
|
|
26
|
-
|
|
27
|
-
|
|
28
|
-
|
|
29
|
-
|
|
30
|
-
|
|
31
|
-
|
|
32
|
-
|
|
33
|
-
|
|
34
|
-
|
|
35
|
-
|
|
36
|
-
|
|
37
|
-
|
|
38
|
-
|
|
39
|
-
|
|
40
|
-
|
|
41
|
-
|
|
84
|
+
// Track which segment index each window starts/ends at (for boundaryPadding).
|
|
85
|
+
const windowSegBounds = [];
|
|
86
|
+
let startIdx = 0;
|
|
87
|
+
while (startIdx < segments.length) {
|
|
88
|
+
const firstSeg = segments[startIdx];
|
|
89
|
+
const win = {
|
|
90
|
+
texts: [],
|
|
91
|
+
byteStart: firstSeg.attrs.byteStart,
|
|
92
|
+
byteEnd: firstSeg.attrs.byteEnd,
|
|
93
|
+
breadcrumb: firstSeg.breadcrumb,
|
|
94
|
+
truncated: false,
|
|
95
|
+
};
|
|
96
|
+
let currentTokens = 0;
|
|
97
|
+
let idx = startIdx;
|
|
98
|
+
while (idx < segments.length) {
|
|
99
|
+
const seg = segments[idx];
|
|
100
|
+
const isCompact = adaptiveSize &&
|
|
101
|
+
(seg.nodeType === "code" || seg.nodeType === "table-cell");
|
|
102
|
+
const effectiveMax = isCompact ? Math.ceil(maxTokens / 2) : maxTokens;
|
|
103
|
+
const segTokens = estimateTokens(seg.text);
|
|
104
|
+
// Flush if entering a new section (different breadcrumb).
|
|
105
|
+
if (currentTokens > 0 && !sameBreadcrumb(seg.breadcrumb, win.breadcrumb)) {
|
|
106
|
+
break;
|
|
107
|
+
}
|
|
108
|
+
// Flush if adding this segment would overflow an already-populated window.
|
|
109
|
+
if (currentTokens > 0 && currentTokens + segTokens > effectiveMax) {
|
|
110
|
+
break;
|
|
111
|
+
}
|
|
112
|
+
// Hard-cut a single oversized segment (only reached when recursive=false).
|
|
113
|
+
if (segTokens > maxTokens) {
|
|
114
|
+
const maxChars = maxTokens * CHARS_PER_TOKEN;
|
|
115
|
+
win.texts.push(seg.text.slice(0, maxChars));
|
|
116
|
+
win.byteEnd = seg.attrs.byteEnd;
|
|
117
|
+
win.truncated = true;
|
|
118
|
+
idx++;
|
|
119
|
+
break;
|
|
120
|
+
}
|
|
121
|
+
win.texts.push(seg.text);
|
|
122
|
+
win.byteEnd = seg.attrs.byteEnd;
|
|
123
|
+
if (win.lineStart == null && seg.attrs.lineStart != null)
|
|
124
|
+
win.lineStart = seg.attrs.lineStart;
|
|
125
|
+
if (seg.attrs.lineEnd != null)
|
|
126
|
+
win.lineEnd = seg.attrs.lineEnd;
|
|
127
|
+
if (win.pageStart == null && seg.attrs.pageNumber != null)
|
|
128
|
+
win.pageStart = seg.attrs.pageNumber;
|
|
129
|
+
if (seg.attrs.pageNumber != null)
|
|
130
|
+
win.pageEnd = seg.attrs.pageNumber;
|
|
131
|
+
if (!win.lang && seg.attrs.lang)
|
|
132
|
+
win.lang = seg.attrs.lang;
|
|
133
|
+
if (!win.codeLanguage && seg.attrs.codeLanguage)
|
|
134
|
+
win.codeLanguage = seg.attrs.codeLanguage;
|
|
135
|
+
currentTokens += segTokens;
|
|
136
|
+
idx++;
|
|
42
137
|
}
|
|
43
|
-
|
|
44
|
-
|
|
45
|
-
|
|
46
|
-
|
|
47
|
-
|
|
48
|
-
|
|
49
|
-
|
|
50
|
-
|
|
51
|
-
|
|
52
|
-
|
|
53
|
-
|
|
54
|
-
|
|
55
|
-
|
|
56
|
-
|
|
57
|
-
|
|
58
|
-
|
|
138
|
+
if (win.texts.length > 0) {
|
|
139
|
+
windows.push(win);
|
|
140
|
+
windowSegBounds.push({ start: startIdx, end: idx });
|
|
141
|
+
// Compute the next start index, accounting for overlap.
|
|
142
|
+
if (overlap > 0 && idx > startIdx + 1) {
|
|
143
|
+
// Walk backwards from idx until we've accumulated overlap * currentTokens.
|
|
144
|
+
const targetOverlap = currentTokens * overlap;
|
|
145
|
+
let accumulated = 0;
|
|
146
|
+
let back = idx - 1;
|
|
147
|
+
while (back > startIdx && accumulated < targetOverlap) {
|
|
148
|
+
accumulated += estimateTokens(segments[back].text);
|
|
149
|
+
back--;
|
|
150
|
+
}
|
|
151
|
+
startIdx = Math.max(startIdx + 1, back + 1);
|
|
152
|
+
}
|
|
153
|
+
else {
|
|
154
|
+
startIdx = idx;
|
|
155
|
+
}
|
|
156
|
+
}
|
|
157
|
+
else {
|
|
158
|
+
// Safety: always advance to avoid infinite loop.
|
|
159
|
+
startIdx++;
|
|
59
160
|
}
|
|
60
|
-
current.texts.push(seg.text);
|
|
61
|
-
current.byteEnd = seg.attrs.byteEnd;
|
|
62
|
-
if (current.lineStart == null && seg.attrs.lineStart != null)
|
|
63
|
-
current.lineStart = seg.attrs.lineStart;
|
|
64
|
-
if (seg.attrs.lineEnd != null)
|
|
65
|
-
current.lineEnd = seg.attrs.lineEnd;
|
|
66
|
-
if (current.pageStart == null && seg.attrs.pageNumber != null)
|
|
67
|
-
current.pageStart = seg.attrs.pageNumber;
|
|
68
|
-
if (seg.attrs.pageNumber != null)
|
|
69
|
-
current.pageEnd = seg.attrs.pageNumber;
|
|
70
|
-
if (!current.lang && seg.attrs.lang)
|
|
71
|
-
current.lang = seg.attrs.lang;
|
|
72
|
-
if (!current.codeLanguage && seg.attrs.codeLanguage)
|
|
73
|
-
current.codeLanguage = seg.attrs.codeLanguage;
|
|
74
|
-
currentTokens += segTokens;
|
|
75
|
-
}
|
|
76
|
-
if (current.texts.length > 0) {
|
|
77
|
-
windows.push(current);
|
|
78
161
|
}
|
|
79
|
-
// Merge trailing window into predecessor
|
|
162
|
+
// ── Merge trailing short window into predecessor (same section only) ───────
|
|
80
163
|
if (windows.length > 1) {
|
|
81
164
|
const last = windows[windows.length - 1];
|
|
165
|
+
const prev = windows[windows.length - 2];
|
|
82
166
|
const lastTokens = estimateTokens(last.texts.join("\n\n"));
|
|
83
|
-
if (lastTokens < minTokens) {
|
|
84
|
-
const prev = windows[windows.length - 2];
|
|
167
|
+
if (lastTokens < minTokens && sameBreadcrumb(last.breadcrumb, prev.breadcrumb)) {
|
|
85
168
|
prev.texts.push(...last.texts);
|
|
86
169
|
prev.byteEnd = last.byteEnd;
|
|
87
170
|
prev.lineEnd = last.lineEnd;
|
|
88
171
|
prev.pageEnd = last.pageEnd;
|
|
172
|
+
const lastBounds = windowSegBounds.pop();
|
|
173
|
+
windowSegBounds[windowSegBounds.length - 1].end = lastBounds.end;
|
|
89
174
|
windows.pop();
|
|
90
175
|
}
|
|
91
176
|
}
|
|
92
177
|
const totalChunks = windows.length;
|
|
93
|
-
|
|
94
|
-
|
|
95
|
-
const
|
|
178
|
+
// ── Build ArtifactSet[] ───────────────────────────────────────────────────
|
|
179
|
+
const artifacts = windows.map((win, i) => {
|
|
180
|
+
const rawContent = win.texts.join("\n\n");
|
|
181
|
+
const headingPrefix = win.breadcrumb.length > 0
|
|
182
|
+
? win.breadcrumb.map((h, k) => "#".repeat(k + 1) + " " + h).join("\n") +
|
|
183
|
+
"\n\n"
|
|
184
|
+
: "";
|
|
185
|
+
const content = headingPrefix + rawContent;
|
|
186
|
+
const id = `${opts.sourceFile}:${i}`;
|
|
187
|
+
const filterMeta = {
|
|
96
188
|
sourceFile: opts.sourceFile,
|
|
97
189
|
sourceFormat: opts.sourceFormat,
|
|
190
|
+
breadcrumb: win.breadcrumb,
|
|
98
191
|
byteStart: win.byteStart,
|
|
99
192
|
byteEnd: win.byteEnd,
|
|
100
193
|
lineStart: win.lineStart,
|
|
101
194
|
lineEnd: win.lineEnd,
|
|
102
195
|
pageStart: win.pageStart,
|
|
103
196
|
pageEnd: win.pageEnd,
|
|
104
|
-
breadcrumb: win.breadcrumb,
|
|
105
|
-
sectionTitle: win.breadcrumb.at(-1),
|
|
106
|
-
headingLevel: win.breadcrumb.length > 0 ? win.breadcrumb.length : undefined,
|
|
107
|
-
documentOutline,
|
|
108
197
|
lang: win.lang,
|
|
109
198
|
codeLanguage: win.codeLanguage,
|
|
110
|
-
strategy: opts.strategy,
|
|
111
199
|
chunkIndex: i,
|
|
112
200
|
totalChunks,
|
|
113
|
-
|
|
114
|
-
|
|
115
|
-
...(opts.fileHash ? { fileHash: opts.fileHash } : {}),
|
|
116
|
-
...(opts.
|
|
117
|
-
|
|
201
|
+
strategy: opts.strategy,
|
|
202
|
+
estimatedTokens: estimateTokens(rawContent),
|
|
203
|
+
...(opts.fileHash != null ? { fileHash: opts.fileHash } : {}),
|
|
204
|
+
...(opts.fileModifiedAt != null
|
|
205
|
+
? { fileModifiedAt: opts.fileModifiedAt }
|
|
206
|
+
: {}),
|
|
207
|
+
...(opts.fileSizeBytes != null
|
|
208
|
+
? { fileSizeBytes: opts.fileSizeBytes }
|
|
209
|
+
: {}),
|
|
210
|
+
};
|
|
211
|
+
const fullMeta = {
|
|
212
|
+
...filterMeta,
|
|
213
|
+
sectionTitle: win.breadcrumb.at(-1),
|
|
214
|
+
headingLevel: win.breadcrumb.length > 0 ? win.breadcrumb.length : undefined,
|
|
215
|
+
documentOutline,
|
|
216
|
+
truncated: win.truncated || undefined,
|
|
118
217
|
};
|
|
119
218
|
return {
|
|
120
|
-
content,
|
|
121
|
-
metadata: meta,
|
|
122
219
|
sourceFile: opts.sourceFile,
|
|
123
220
|
commitHash: opts.commitHash,
|
|
221
|
+
searchRepresentation: {
|
|
222
|
+
id,
|
|
223
|
+
anchorText: makeAnchorText(win.breadcrumb, rawContent),
|
|
224
|
+
sparseTerms: makeSparseTerms(rawContent),
|
|
225
|
+
filterMetadata: filterMeta,
|
|
226
|
+
},
|
|
227
|
+
candidateChunk: {
|
|
228
|
+
id,
|
|
229
|
+
preview: makePreview(content),
|
|
230
|
+
fullMeta,
|
|
231
|
+
},
|
|
232
|
+
finalAnswerChunk: {
|
|
233
|
+
id,
|
|
234
|
+
content,
|
|
235
|
+
},
|
|
124
236
|
};
|
|
125
237
|
});
|
|
238
|
+
// ── Assign sibling IDs ────────────────────────────────────────────────────
|
|
239
|
+
for (let i = 0; i < artifacts.length; i++) {
|
|
240
|
+
const a = artifacts[i];
|
|
241
|
+
if (i > 0)
|
|
242
|
+
a.candidateChunk.fullMeta.siblingPrev =
|
|
243
|
+
artifacts[i - 1].searchRepresentation.id;
|
|
244
|
+
if (i < artifacts.length - 1)
|
|
245
|
+
a.candidateChunk.fullMeta.siblingNext =
|
|
246
|
+
artifacts[i + 1].searchRepresentation.id;
|
|
247
|
+
}
|
|
248
|
+
// ── Apply boundary padding to FinalAnswerChunk ────────────────────────────
|
|
249
|
+
if (padBefore > 0 || padAfter > 0) {
|
|
250
|
+
for (let i = 0; i < artifacts.length; i++) {
|
|
251
|
+
const parts = [];
|
|
252
|
+
if (padBefore > 0 && i > 0) {
|
|
253
|
+
const prevTexts = windows[i - 1].texts;
|
|
254
|
+
parts.push(prevTexts.slice(-padBefore).join("\n\n"));
|
|
255
|
+
}
|
|
256
|
+
parts.push(artifacts[i].finalAnswerChunk.content);
|
|
257
|
+
if (padAfter > 0 && i < artifacts.length - 1) {
|
|
258
|
+
const nextTexts = windows[i + 1].texts;
|
|
259
|
+
parts.push(nextTexts.slice(0, padAfter).join("\n\n"));
|
|
260
|
+
}
|
|
261
|
+
const padded = parts.filter(Boolean).join("\n\n");
|
|
262
|
+
if (padded !== artifacts[i].finalAnswerChunk.content) {
|
|
263
|
+
artifacts[i].finalAnswerChunk.paddedContent = padded;
|
|
264
|
+
}
|
|
265
|
+
}
|
|
266
|
+
}
|
|
267
|
+
return artifacts;
|
|
126
268
|
}
|
|
127
269
|
//# sourceMappingURL=chunker.js.map
|
package/dist/chunker.js.map
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"chunker.js","sourceRoot":"","sources":["../src/chunker.ts"],"names":[],"mappings":"AACA,OAAO,EAAE,WAAW,EAAE,MAAM,iBAAiB,CAAC;AAC9C,OAAO,EAAE,cAAc,EAAE,MAAM,cAAc,CAAC;AAsB9C,MAAM,eAAe,GAAG,CAAC,CAAC;AAE1B,SAAS,cAAc,CAAC,IAAY;IAClC,OAAO,IAAI,CAAC,IAAI,CAAC,IAAI,CAAC,MAAM,GAAG,eAAe,CAAC,CAAC;AAClD,CAAC;AAED;;;;;GAKG;AACH,MAAM,UAAU,YAAY,CAAC,IAAa,EAAE,IAAiB;IAC3D,MAAM,SAAS,GAAG,IAAI,CAAC,SAAS,IAAI,GAAG,CAAC;IACxC,MAAM,SAAS,GAAG,IAAI,CAAC,SAAS,IAAI,IAAI,CAAC,KAAK,CAAC,SAAS,GAAG,CAAC,CAAC,CAAC;IAC9D,MAAM,eAAe,GAAG,cAAc,CAAC,IAAI,CAAC,CAAC;IAC7C,MAAM,QAAQ,GAAG,WAAW,CAAC,IAAI,CAAC,CAAC;IAEnC,IAAI,QAAQ,CAAC,MAAM,KAAK,CAAC;QAAE,OAAO,EAAE,CAAC;IAiBrC,MAAM,OAAO,GAAa,EAAE,CAAC;IAC7B,IAAI,OAAO,GAAW;QACpB,KAAK,EAAE,EAAE;QACT,SAAS,EAAE,QAAQ,CAAC,CAAC,CAAE,CAAC,KAAK,CAAC,SAAS;QACvC,OAAO,EAAE,QAAQ,CAAC,CAAC,CAAE,CAAC,KAAK,CAAC,OAAO;QACnC,UAAU,EAAE,QAAQ,CAAC,CAAC,CAAE,CAAC,UAAU;QACnC,SAAS,EAAE,KAAK;KACjB,CAAC;IACF,IAAI,aAAa,GAAG,CAAC,CAAC;IAEtB,KAAK,MAAM,GAAG,IAAI,QAAQ,EAAE,CAAC;QAC3B,MAAM,SAAS,GAAG,cAAc,CAAC,GAAG,CAAC,IAAI,CAAC,CAAC;QAE3C,4EAA4E;QAC5E,IAAI,aAAa,GAAG,CAAC,IAAI,aAAa,GAAG,SAAS,GAAG,SAAS,EAAE,CAAC;YAC/D,OAAO,CAAC,IAAI,CAAC,OAAO,CAAC,CAAC;YACtB,OAAO,GAAG;gBACR,KAAK,EAAE,EAAE;gBACT,SAAS,EAAE,GAAG,CAAC,KAAK,CAAC,SAAS;gBAC9B,OAAO,EAAE,GAAG,CAAC,KAAK,CAAC,OAAO;gBAC1B,UAAU,EAAE,GAAG,CAAC,UAAU;gBAC1B,SAAS,EAAE,KAAK;aACjB,CAAC;YACF,aAAa,GAAG,CAAC,CAAC;QACpB,CAAC;QAED,sDAAsD;QACtD,IAAI,SAAS,GAAG,SAAS,EAAE,CAAC;YAC1B,MAAM,QAAQ,GAAG,SAAS,GAAG,eAAe,CAAC;YAC7C,OAAO,CAAC,KAAK,CAAC,IAAI,CAAC,GAAG,CAAC,IAAI,CAAC,KAAK,CAAC,CAAC,EAAE,QAAQ,CAAC,CAAC,CAAC;YAChD,OAAO,CAAC,OAAO,GAAG,GAAG,CAAC,KAAK,CAAC,OAAO,CAAC;YACpC,OAAO,CAAC,SAAS,GAAG,IAAI,CAAC;YACzB,OAAO,CAAC,IAAI,CAAC,OAAO,CAAC,CAAC;YACtB,OAAO,GAAG;gBACR,KAAK,EAAE,EAAE;gBACT,SAAS,EAAE,GAAG,CAAC,KAAK,CAAC,OAAO;gBAC5B,OAAO,EAAE,GAAG,CAAC,KAAK,CAAC,OAAO;gBAC1B,UAAU,EAAE,GAAG,CAAC,UAAU;gBAC1B,SAAS,EAAE,KAAK;aACjB,CAAC;YACF,aAAa,GAAG,CAAC,CAAC;YAClB,SAAS;QACX,CAAC;QAED,OAAO,CAAC,KAAK,CAAC,IAAI,CAAC,GAAG,CAAC,IAAI,CAAC,CAAC;QAC7B,OAAO,CAAC,OAAO,GAAG,GAAG,CAAC,KAAK,CAAC,OAAO,CAAC;QACpC,IAAI,OAAO,CAAC,SAAS,IAAI,IAAI,IAAI,GAAG,CAAC,KAAK,CAAC,SAAS,IAAI,IAAI;YAAE,OAAO,CAAC,SAAS,GAAG,GAAG,CAAC,KAAK,CAAC,SAAS,CAAC;QACtG,IAAI,GAAG,CAAC,KAAK,CAAC,OAAO,IAAI,IAAI;YAAE,OAAO,CAAC,OAAO,GAAG,GAAG,CAAC,KAAK,CAAC,OAAO,CAAC;QACnE,IAAI,OAAO,CAAC,SAAS,IAAI,IAAI,IAAI,GAAG,CAAC,KAAK,CAAC,UAAU,IAAI,IAAI;YAAE,OAAO,CAAC,SAAS,GAAG,GAAG,CAAC,KAAK,CAAC,UAAU,CAAC;QACxG,IAAI,GAAG,CAAC,KAAK,CAAC,UAAU,IAAI,IAAI;YAAE,OAAO,CAAC,OAAO,GAAG,GAAG,CAAC,KAAK,CAAC,UAAU,CAAC;QACzE,IAAI,CAAC,OAAO,CAAC,IAAI,IAAI,GAAG,CAAC,KAAK,CAAC,IAAI;YAAE,OAAO,CAAC,IAAI,GAAG,GAAG,CAAC,KAAK,CAAC,IAAI,CAAC;QACnE,IAAI,CAAC,OAAO,CAAC,YAAY,IAAI,GAAG,CAAC,KAAK,CAAC,YAAY;YAAE,OAAO,CAAC,YAAY,GAAG,GAAG,CAAC,KAAK,CAAC,YAAY,CAAC;QACnG,aAAa,IAAI,SAAS,CAAC;IAC7B,CAAC;IAED,IAAI,OAAO,CAAC,KAAK,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;QAC7B,OAAO,CAAC,IAAI,CAAC,OAAO,CAAC,CAAC;IACxB,CAAC;IAED,mEAAmE;IACnE,IAAI,OAAO,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;QACvB,MAAM,IAAI,GAAG,OAAO,CAAC,OAAO,CAAC,MAAM,GAAG,CAAC,CAAE,CAAC;QAC1C,MAAM,UAAU,GAAG,cAAc,CAAC,IAAI,CAAC,KAAK,CAAC,IAAI,CAAC,MAAM,CAAC,CAAC,CAAC;QAC3D,IAAI,UAAU,GAAG,SAAS,EAAE,CAAC;YAC3B,MAAM,IAAI,GAAG,OAAO,CAAC,OAAO,CAAC,MAAM,GAAG,CAAC,CAAE,CAAC;YAC1C,IAAI,CAAC,KAAK,CAAC,IAAI,CAAC,GAAG,IAAI,CAAC,KAAK,CAAC,CAAC;YAC/B,IAAI,CAAC,OAAO,GAAG,IAAI,CAAC,OAAO,CAAC;YAC5B,IAAI,CAAC,OAAO,GAAG,IAAI,CAAC,OAAO,CAAC;YAC5B,IAAI,CAAC,OAAO,GAAG,IAAI,CAAC,OAAO,CAAC;YAC5B,OAAO,CAAC,GAAG,EAAE,CAAC;QAChB,CAAC;IACH,CAAC;IAED,MAAM,WAAW,GAAG,OAAO,CAAC,MAAM,CAAC;IAEnC,OAAO,OAAO,CAAC,GAAG,CAAC,CAAC,GAAG,EAAE,CAAC,EAAE,EAAE;QAC5B,MAAM,OAAO,GAAG,GAAG,CAAC,KAAK,CAAC,IAAI,CAAC,MAAM,CAAC,CAAC;QACvC,MAAM,IAAI,GAAc;YACtB,UAAU,EAAE,IAAI,CAAC,UAAU;YAC3B,YAAY,EAAE,IAAI,CAAC,YAAY;YAC/B,SAAS,EAAE,GAAG,CAAC,SAAS;YACxB,OAAO,EAAE,GAAG,CAAC,OAAO;YACpB,SAAS,EAAE,GAAG,CAAC,SAAS;YACxB,OAAO,EAAE,GAAG,CAAC,OAAO;YACpB,SAAS,EAAE,GAAG,CAAC,SAAS;YACxB,OAAO,EAAE,GAAG,CAAC,OAAO;YACpB,UAAU,EAAE,GAAG,CAAC,UAAU;YAC1B,YAAY,EAAE,GAAG,CAAC,UAAU,CAAC,EAAE,CAAC,CAAC,CAAC,CAAC;YACnC,YAAY,EAAE,GAAG,CAAC,UAAU,CAAC,MAAM,GAAG,CAAC,CAAC,CAAC,CAAC,GAAG,CAAC,UAAU,CAAC,MAAM,CAAC,CAAC,CAAC,SAAS;YAC3E,eAAe;YACf,IAAI,EAAE,GAAG,CAAC,IAAI;YACd,YAAY,EAAE,GAAG,CAAC,YAAY;YAC9B,QAAQ,EAAE,IAAI,CAAC,QAAQ;YACvB,UAAU,EAAE,CAAC;YACb,WAAW;YACX,eAAe,EAAE,cAAc,CAAC,OAAO,CAAC;YACxC,SAAS,EAAE,GAAG,CAAC,SAAS;YACxB,GAAG,CAAC,IAAI,CAAC,QAAQ,CAAC,CAAC,CAAC,EAAE,QAAQ,EAAE,IAAI,CAAC,QAAQ,EAAE,CAAC,CAAC,CAAC,EAAE,CAAC;YACrD,GAAG,CAAC,IAAI,CAAC,aAAa,IAAI,IAAI,CAAC,CAAC,CAAC,EAAE,aAAa,EAAE,IAAI,CAAC,aAAa,EAAE,CAAC,CAAC,CAAC,EAAE,CAAC;YAC5E,GAAG,CAAC,IAAI,CAAC,cAAc,CAAC,CAAC,CAAC,EAAE,cAAc,EAAE,IAAI,CAAC,cAAc,EAAE,CAAC,CAAC,CAAC,EAAE,CAAC;SACxE,CAAC;QAEF,OAAO;YACL,OAAO;YACP,QAAQ,EAAE,IAAI;YACd,UAAU,EAAE,IAAI,CAAC,UAAU;YAC3B,UAAU,EAAE,IAAI,CAAC,UAAU;SAC5B,CAAC;IACJ,CAAC,CAAC,CAAC;AACL,CAAC"}
|
|
1
|
+
{"version":3,"file":"chunker.js","sourceRoot":"","sources":["../src/chunker.ts"],"names":[],"mappings":"AACA,OAAO,EAAE,WAAW,EAAE,MAAM,iBAAiB,CAAC;AAC9C,OAAO,EAAE,cAAc,EAAE,MAAM,cAAc,CAAC;AAyC9C,MAAM,eAAe,GAAG,CAAC,CAAC;AAC1B,MAAM,aAAa,GAAG,GAAG,CAAC;AAE1B,SAAS,cAAc,CAAC,IAAY;IAClC,OAAO,IAAI,CAAC,IAAI,CAAC,IAAI,CAAC,MAAM,GAAG,eAAe,CAAC,CAAC;AAClD,CAAC;AAED,SAAS,WAAW,CAAC,OAAe;IAClC,IAAI,OAAO,CAAC,MAAM,IAAI,aAAa;QAAE,OAAO,OAAO,CAAC;IACpD,MAAM,GAAG,GAAG,OAAO,CAAC,KAAK,CAAC,CAAC,EAAE,aAAa,CAAC,CAAC;IAC5C,MAAM,SAAS,GAAG,GAAG,CAAC,WAAW,CAAC,GAAG,CAAC,CAAC;IACvC,OAAO,SAAS,GAAG,CAAC,CAAC,CAAC,CAAC,GAAG,CAAC,KAAK,CAAC,CAAC,EAAE,SAAS,CAAC,CAAC,CAAC,CAAC,GAAG,CAAC;AACvD,CAAC;AAED,SAAS,cAAc,CAAC,UAAoB,EAAE,UAAkB;IAC9D,MAAM,MAAM,GAAG,UAAU,CAAC,MAAM,GAAG,CAAC,CAAC,CAAC,CAAC,UAAU,CAAC,IAAI,CAAC,KAAK,CAAC,GAAG,IAAI,CAAC,CAAC,CAAC,EAAE,CAAC;IAC1E,MAAM,MAAM,GAAG,UAAU,CAAC,MAAM,CAAC,SAAS,CAAC,CAAC;IAC5C,MAAM,aAAa,GACjB,MAAM,GAAG,CAAC,CAAC,CAAC,CAAC,UAAU,CAAC,KAAK,CAAC,CAAC,EAAE,MAAM,GAAG,CAAC,CAAC,CAAC,CAAC,CAAC,UAAU,CAAC,KAAK,CAAC,CAAC,EAAE,GAAG,CAAC,CAAC;IAC1E,OAAO,CAAC,MAAM,GAAG,aAAa,CAAC,CAAC,KAAK,CAAC,CAAC,EAAE,GAAG,CAAC,CAAC;AAChD,CAAC;AAED,SAAS,eAAe,CAAC,IAAY;IACnC,OAAO,IAAI;SACR,WAAW,EAAE;SACb,KAAK,CAAC,KAAK,CAAC;SACZ,MAAM,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,MAAM,GAAG,CAAC,CAAC,CAAC;AACjC,CAAC;AAED,SAAS,cAAc,CAAC,CAAW,EAAE,CAAW;IAC9C,OAAO,CAAC,CAAC,MAAM,KAAK,CAAC,CAAC,MAAM,IAAI,CAAC,CAAC,KAAK,CAAC,CAAC,CAAC,EAAE,CAAC,EAAE,EAAE,CAAC,CAAC,KAAK,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC;AAChE,CAAC;AAED;;;GAGG;AACH,SAAS,cAAc,CAAC,GAAgB,EAAE,SAAiB;IACzD,MAAM,QAAQ,GAAG,SAAS,GAAG,eAAe,CAAC;IAC7C,IAAI,GAAG,CAAC,IAAI,CAAC,MAAM,IAAI,QAAQ;QAAE,OAAO,CAAC,GAAG,CAAC,CAAC;IAC9C,MAAM,KAAK,GAAkB,EAAE,CAAC;IAChC,IAAI,GAAG,GAAG,CAAC,CAAC;IACZ,OAAO,GAAG,GAAG,GAAG,CAAC,IAAI,CAAC,MAAM,EAAE,CAAC;QAC7B,MAAM,IAAI,GAAG,GAAG,CAAC,IAAI,CAAC,KAAK,CAAC,GAAG,EAAE,GAAG,GAAG,QAAQ,CAAC,CAAC;QACjD,KAAK,CAAC,IAAI,CAAC;YACT,GAAG,GAAG;YACN,IAAI;YACJ,KAAK,EAAE;gBACL,GAAG,GAAG,CAAC,KAAK;gBACZ,SAAS,EAAE,GAAG,CAAC,KAAK,CAAC,SAAS,GAAG,GAAG;gBACpC,OAAO,EAAE,GAAG,CAAC,KAAK,CAAC,SAAS,GAAG,GAAG,GAAG,IAAI,CAAC,MAAM;aACjD;SACF,CAAC,CAAC;QACH,GAAG,IAAI,QAAQ,CAAC;IAClB,CAAC;IACD,OAAO,KAAK,CAAC;AACf,CAAC;AAgBD;;;;;;;;;;GAUG;AACH,MAAM,UAAU,YAAY,CAAC,IAAa,EAAE,IAAiB;IAC3D,MAAM,SAAS,GAAG,IAAI,CAAC,SAAS,IAAI,GAAG,CAAC;IACxC,MAAM,SAAS,GAAG,IAAI,CAAC,SAAS,IAAI,IAAI,CAAC,KAAK,CAAC,SAAS,GAAG,CAAC,CAAC,CAAC;IAC9D,MAAM,OAAO,GAAG,IAAI,CAAC,GAAG,CAAC,IAAI,CAAC,GAAG,CAAC,IAAI,CAAC,OAAO,IAAI,CAAC,EAAE,CAAC,CAAC,EAAE,GAAG,CAAC,CAAC;IAC9D,MAAM,SAAS,GAAG,IAAI,CAAC,eAAe,EAAE,MAAM,IAAI,CAAC,CAAC;IACpD,MAAM,QAAQ,GAAG,IAAI,CAAC,eAAe,EAAE,KAAK,IAAI,CAAC,CAAC;IAClD,MAAM,SAAS,GAAG,IAAI,CAAC,SAAS,IAAI,KAAK,CAAC;IAC1C,MAAM,YAAY,GAAG,IAAI,CAAC,YAAY,IAAI,KAAK,CAAC;IAEhD,MAAM,eAAe,GAAG,cAAc,CAAC,IAAI,CAAC,CAAC;IAC7C,MAAM,WAAW,GAAG,WAAW,CAAC,IAAI,CAAC,CAAC;IAEtC,IAAI,WAAW,CAAC,MAAM,KAAK,CAAC;QAAE,OAAO,EAAE,CAAC;IAExC,wEAAwE;IACxE,MAAM,QAAQ,GAAkB,SAAS;QACvC,CAAC,CAAC,WAAW,CAAC,OAAO,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,cAAc,CAAC,CAAC,EAAE,SAAS,CAAC,CAAC;QAC1D,CAAC,CAAC,WAAW,CAAC;IAEhB,8EAA8E;IAE9E,MAAM,OAAO,GAAa,EAAE,CAAC;IAC7B,8EAA8E;IAC9E,MAAM,eAAe,GAA0C,EAAE,CAAC;IAClE,IAAI,QAAQ,GAAG,CAAC,CAAC;IAEjB,OAAO,QAAQ,GAAG,QAAQ,CAAC,MAAM,EAAE,CAAC;QAClC,MAAM,QAAQ,GAAG,QAAQ,CAAC,QAAQ,CAAE,CAAC;QACrC,MAAM,GAAG,GAAW;YAClB,KAAK,EAAE,EAAE;YACT,SAAS,EAAE,QAAQ,CAAC,KAAK,CAAC,SAAS;YACnC,OAAO,EAAE,QAAQ,CAAC,KAAK,CAAC,OAAO;YAC/B,UAAU,EAAE,QAAQ,CAAC,UAAU;YAC/B,SAAS,EAAE,KAAK;SACjB,CAAC;QACF,IAAI,aAAa,GAAG,CAAC,CAAC;QACtB,IAAI,GAAG,GAAG,QAAQ,CAAC;QAEnB,OAAO,GAAG,GAAG,QAAQ,CAAC,MAAM,EAAE,CAAC;YAC7B,MAAM,GAAG,GAAG,QAAQ,CAAC,GAAG,CAAE,CAAC;YAC3B,MAAM,SAAS,GACb,YAAY;gBACZ,CAAC,GAAG,CAAC,QAAQ,KAAK,MAAM,IAAI,GAAG,CAAC,QAAQ,KAAK,YAAY,CAAC,CAAC;YAC7D,MAAM,YAAY,GAAG,SAAS,CAAC,CAAC,CAAC,IAAI,CAAC,IAAI,CAAC,SAAS,GAAG,CAAC,CAAC,CAAC,CAAC,CAAC,SAAS,CAAC;YACtE,MAAM,SAAS,GAAG,cAAc,CAAC,GAAG,CAAC,IAAI,CAAC,CAAC;YAE3C,0DAA0D;YAC1D,IAAI,aAAa,GAAG,CAAC,IAAI,CAAC,cAAc,CAAC,GAAG,CAAC,UAAU,EAAE,GAAG,CAAC,UAAU,CAAC,EAAE,CAAC;gBACzE,MAAM;YACR,CAAC;YAED,2EAA2E;YAC3E,IAAI,aAAa,GAAG,CAAC,IAAI,aAAa,GAAG,SAAS,GAAG,YAAY,EAAE,CAAC;gBAClE,MAAM;YACR,CAAC;YAED,2EAA2E;YAC3E,IAAI,SAAS,GAAG,SAAS,EAAE,CAAC;gBAC1B,MAAM,QAAQ,GAAG,SAAS,GAAG,eAAe,CAAC;gBAC7C,GAAG,CAAC,KAAK,CAAC,IAAI,CAAC,GAAG,CAAC,IAAI,CAAC,KAAK,CAAC,CAAC,EAAE,QAAQ,CAAC,CAAC,CAAC;gBAC5C,GAAG,CAAC,OAAO,GAAG,GAAG,CAAC,KAAK,CAAC,OAAO,CAAC;gBAChC,GAAG,CAAC,SAAS,GAAG,IAAI,CAAC;gBACrB,GAAG,EAAE,CAAC;gBACN,MAAM;YACR,CAAC;YAED,GAAG,CAAC,KAAK,CAAC,IAAI,CAAC,GAAG,CAAC,IAAI,CAAC,CAAC;YACzB,GAAG,CAAC,OAAO,GAAG,GAAG,CAAC,KAAK,CAAC,OAAO,CAAC;YAChC,IAAI,GAAG,CAAC,SAAS,IAAI,IAAI,IAAI,GAAG,CAAC,KAAK,CAAC,SAAS,IAAI,IAAI;gBACtD,GAAG,CAAC,SAAS,GAAG,GAAG,CAAC,KAAK,CAAC,SAAS,CAAC;YACtC,IAAI,GAAG,CAAC,KAAK,CAAC,OAAO,IAAI,IAAI;gBAAE,GAAG,CAAC,OAAO,GAAG,GAAG,CAAC,KAAK,CAAC,OAAO,CAAC;YAC/D,IAAI,GAAG,CAAC,SAAS,IAAI,IAAI,IAAI,GAAG,CAAC,KAAK,CAAC,UAAU,IAAI,IAAI;gBACvD,GAAG,CAAC,SAAS,GAAG,GAAG,CAAC,KAAK,CAAC,UAAU,CAAC;YACvC,IAAI,GAAG,CAAC,KAAK,CAAC,UAAU,IAAI,IAAI;gBAAE,GAAG,CAAC,OAAO,GAAG,GAAG,CAAC,KAAK,CAAC,UAAU,CAAC;YACrE,IAAI,CAAC,GAAG,CAAC,IAAI,IAAI,GAAG,CAAC,KAAK,CAAC,IAAI;gBAAE,GAAG,CAAC,IAAI,GAAG,GAAG,CAAC,KAAK,CAAC,IAAI,CAAC;YAC3D,IAAI,CAAC,GAAG,CAAC,YAAY,IAAI,GAAG,CAAC,KAAK,CAAC,YAAY;gBAC7C,GAAG,CAAC,YAAY,GAAG,GAAG,CAAC,KAAK,CAAC,YAAY,CAAC;YAC5C,aAAa,IAAI,SAAS,CAAC;YAC3B,GAAG,EAAE,CAAC;QACR,CAAC;QAED,IAAI,GAAG,CAAC,KAAK,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;YACzB,OAAO,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC;YAClB,eAAe,CAAC,IAAI,CAAC,EAAE,KAAK,EAAE,QAAQ,EAAE,GAAG,EAAE,GAAG,EAAE,CAAC,CAAC;YAEpD,wDAAwD;YACxD,IAAI,OAAO,GAAG,CAAC,IAAI,GAAG,GAAG,QAAQ,GAAG,CAAC,EAAE,CAAC;gBACtC,2EAA2E;gBAC3E,MAAM,aAAa,GAAG,aAAa,GAAG,OAAO,CAAC;gBAC9C,IAAI,WAAW,GAAG,CAAC,CAAC;gBACpB,IAAI,IAAI,GAAG,GAAG,GAAG,CAAC,CAAC;gBACnB,OAAO,IAAI,GAAG,QAAQ,IAAI,WAAW,GAAG,aAAa,EAAE,CAAC;oBACtD,WAAW,IAAI,cAAc,CAAC,QAAQ,CAAC,IAAI,CAAE,CAAC,IAAI,CAAC,CAAC;oBACpD,IAAI,EAAE,CAAC;gBACT,CAAC;gBACD,QAAQ,GAAG,IAAI,CAAC,GAAG,CAAC,QAAQ,GAAG,CAAC,EAAE,IAAI,GAAG,CAAC,CAAC,CAAC;YAC9C,CAAC;iBAAM,CAAC;gBACN,QAAQ,GAAG,GAAG,CAAC;YACjB,CAAC;QACH,CAAC;aAAM,CAAC;YACN,iDAAiD;YACjD,QAAQ,EAAE,CAAC;QACb,CAAC;IACH,CAAC;IAED,8EAA8E;IAC9E,IAAI,OAAO,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;QACvB,MAAM,IAAI,GAAG,OAAO,CAAC,OAAO,CAAC,MAAM,GAAG,CAAC,CAAE,CAAC;QAC1C,MAAM,IAAI,GAAG,OAAO,CAAC,OAAO,CAAC,MAAM,GAAG,CAAC,CAAE,CAAC;QAC1C,MAAM,UAAU,GAAG,cAAc,CAAC,IAAI,CAAC,KAAK,CAAC,IAAI,CAAC,MAAM,CAAC,CAAC,CAAC;QAC3D,IAAI,UAAU,GAAG,SAAS,IAAI,cAAc,CAAC,IAAI,CAAC,UAAU,EAAE,IAAI,CAAC,UAAU,CAAC,EAAE,CAAC;YAC/E,IAAI,CAAC,KAAK,CAAC,IAAI,CAAC,GAAG,IAAI,CAAC,KAAK,CAAC,CAAC;YAC/B,IAAI,CAAC,OAAO,GAAG,IAAI,CAAC,OAAO,CAAC;YAC5B,IAAI,CAAC,OAAO,GAAG,IAAI,CAAC,OAAO,CAAC;YAC5B,IAAI,CAAC,OAAO,GAAG,IAAI,CAAC,OAAO,CAAC;YAC5B,MAAM,UAAU,GAAG,eAAe,CAAC,GAAG,EAAG,CAAC;YAC1C,eAAe,CAAC,eAAe,CAAC,MAAM,GAAG,CAAC,CAAE,CAAC,GAAG,GAAG,UAAU,CAAC,GAAG,CAAC;YAClE,OAAO,CAAC,GAAG,EAAE,CAAC;QAChB,CAAC;IACH,CAAC;IAED,MAAM,WAAW,GAAG,OAAO,CAAC,MAAM,CAAC;IAEnC,6EAA6E;IAC7E,MAAM,SAAS,GAAkB,OAAO,CAAC,GAAG,CAAC,CAAC,GAAG,EAAE,CAAC,EAAE,EAAE;QACtD,MAAM,UAAU,GAAG,GAAG,CAAC,KAAK,CAAC,IAAI,CAAC,MAAM,CAAC,CAAC;QAC1C,MAAM,aAAa,GACjB,GAAG,CAAC,UAAU,CAAC,MAAM,GAAG,CAAC;YACvB,CAAC,CAAC,GAAG,CAAC,UAAU,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,CAAC,EAAE,EAAE,CAAC,GAAG,CAAC,MAAM,CAAC,CAAC,GAAG,CAAC,CAAC,GAAG,GAAG,GAAG,CAAC,CAAC,CAAC,IAAI,CAAC,IAAI,CAAC;gBACpE,MAAM;YACR,CAAC,CAAC,EAAE,CAAC;QACT,MAAM,OAAO,GAAG,aAAa,GAAG,UAAU,CAAC;QAC3C,MAAM,EAAE,GAAG,GAAG,IAAI,CAAC,UAAU,IAAI,CAAC,EAAE,CAAC;QAErC,MAAM,UAAU,GAAe;YAC7B,UAAU,EAAE,IAAI,CAAC,UAAU;YAC3B,YAAY,EAAE,IAAI,CAAC,YAAY;YAC/B,UAAU,EAAE,GAAG,CAAC,UAAU;YAC1B,SAAS,EAAE,GAAG,CAAC,SAAS;YACxB,OAAO,EAAE,GAAG,CAAC,OAAO;YACpB,SAAS,EAAE,GAAG,CAAC,SAAS;YACxB,OAAO,EAAE,GAAG,CAAC,OAAO;YACpB,SAAS,EAAE,GAAG,CAAC,SAAS;YACxB,OAAO,EAAE,GAAG,CAAC,OAAO;YACpB,IAAI,EAAE,GAAG,CAAC,IAAI;YACd,YAAY,EAAE,GAAG,CAAC,YAAY;YAC9B,UAAU,EAAE,CAAC;YACb,WAAW;YACX,QAAQ,EAAE,IAAI,CAAC,QAAQ;YACvB,eAAe,EAAE,cAAc,CAAC,UAAU,CAAC;YAC3C,GAAG,CAAC,IAAI,CAAC,QAAQ,IAAI,IAAI,CAAC,CAAC,CAAC,EAAE,QAAQ,EAAE,IAAI,CAAC,QAAQ,EAAE,CAAC,CAAC,CAAC,EAAE,CAAC;YAC7D,GAAG,CAAC,IAAI,CAAC,cAAc,IAAI,IAAI;gBAC7B,CAAC,CAAC,EAAE,cAAc,EAAE,IAAI,CAAC,cAAc,EAAE;gBACzC,CAAC,CAAC,EAAE,CAAC;YACP,GAAG,CAAC,IAAI,CAAC,aAAa,IAAI,IAAI;gBAC5B,CAAC,CAAC,EAAE,aAAa,EAAE,IAAI,CAAC,aAAa,EAAE;gBACvC,CAAC,CAAC,EAAE,CAAC;SACR,CAAC;QAEF,MAAM,QAAQ,GAAc;YAC1B,GAAG,UAAU;YACb,YAAY,EAAE,GAAG,CAAC,UAAU,CAAC,EAAE,CAAC,CAAC,CAAC,CAAC;YACnC,YAAY,EACV,GAAG,CAAC,UAAU,CAAC,MAAM,GAAG,CAAC,CAAC,CAAC,CAAC,GAAG,CAAC,UAAU,CAAC,MAAM,CAAC,CAAC,CAAC,SAAS;YAC/D,eAAe;YACf,SAAS,EAAE,GAAG,CAAC,SAAS,IAAI,SAAS;SACtC,CAAC;QAEF,OAAO;YACL,UAAU,EAAE,IAAI,CAAC,UAAU;YAC3B,UAAU,EAAE,IAAI,CAAC,UAAU;YAC3B,oBAAoB,EAAE;gBACpB,EAAE;gBACF,UAAU,EAAE,cAAc,CAAC,GAAG,CAAC,UAAU,EAAE,UAAU,CAAC;gBACtD,WAAW,EAAE,eAAe,CAAC,UAAU,CAAC;gBACxC,cAAc,EAAE,UAAU;aAC3B;YACD,cAAc,EAAE;gBACd,EAAE;gBACF,OAAO,EAAE,WAAW,CAAC,OAAO,CAAC;gBAC7B,QAAQ;aACT;YACD,gBAAgB,EAAE;gBAChB,EAAE;gBACF,OAAO;aACR;SACF,CAAC;IACJ,CAAC,CAAC,CAAC;IAEH,6EAA6E;IAC7E,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,SAAS,CAAC,MAAM,EAAE,CAAC,EAAE,EAAE,CAAC;QAC1C,MAAM,CAAC,GAAG,SAAS,CAAC,CAAC,CAAE,CAAC;QACxB,IAAI,CAAC,GAAG,CAAC;YACP,CAAC,CAAC,cAAc,CAAC,QAAQ,CAAC,WAAW;gBACnC,SAAS,CAAC,CAAC,GAAG,CAAC,CAAE,CAAC,oBAAoB,CAAC,EAAE,CAAC;QAC9C,IAAI,CAAC,GAAG,SAAS,CAAC,MAAM,GAAG,CAAC;YAC1B,CAAC,CAAC,cAAc,CAAC,QAAQ,CAAC,WAAW;gBACnC,SAAS,CAAC,CAAC,GAAG,CAAC,CAAE,CAAC,oBAAoB,CAAC,EAAE,CAAC;IAChD,CAAC;IAED,6EAA6E;IAC7E,IAAI,SAAS,GAAG,CAAC,IAAI,QAAQ,GAAG,CAAC,EAAE,CAAC;QAClC,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,SAAS,CAAC,MAAM,EAAE,CAAC,EAAE,EAAE,CAAC;YAC1C,MAAM,KAAK,GAAa,EAAE,CAAC;YAE3B,IAAI,SAAS,GAAG,CAAC,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC;gBAC3B,MAAM,SAAS,GAAG,OAAO,CAAC,CAAC,GAAG,CAAC,CAAE,CAAC,KAAK,CAAC;gBACxC,KAAK,CAAC,IAAI,CAAC,SAAS,CAAC,KAAK,CAAC,CAAC,SAAS,CAAC,CAAC,IAAI,CAAC,MAAM,CAAC,CAAC,CAAC;YACvD,CAAC;YAED,KAAK,CAAC,IAAI,CAAC,SAAS,CAAC,CAAC,CAAE,CAAC,gBAAgB,CAAC,OAAO,CAAC,CAAC;YAEnD,IAAI,QAAQ,GAAG,CAAC,IAAI,CAAC,GAAG,SAAS,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;gBAC7C,MAAM,SAAS,GAAG,OAAO,CAAC,CAAC,GAAG,CAAC,CAAE,CAAC,KAAK,CAAC;gBACxC,KAAK,CAAC,IAAI,CAAC,SAAS,CAAC,KAAK,CAAC,CAAC,EAAE,QAAQ,CAAC,CAAC,IAAI,CAAC,MAAM,CAAC,CAAC,CAAC;YACxD,CAAC;YAED,MAAM,MAAM,GAAG,KAAK,CAAC,MAAM,CAAC,OAAO,CAAC,CAAC,IAAI,CAAC,MAAM,CAAC,CAAC;YAClD,IAAI,MAAM,KAAK,SAAS,CAAC,CAAC,CAAE,CAAC,gBAAgB,CAAC,OAAO,EAAE,CAAC;gBACtD,SAAS,CAAC,CAAC,CAAE,CAAC,gBAAgB,CAAC,aAAa,GAAG,MAAM,CAAC;YACxD,CAAC;QACH,CAAC;IACH,CAAC;IAED,OAAO,SAAS,CAAC;AACnB,CAAC"}
|
|
@@ -0,0 +1,56 @@
|
|
|
1
|
+
import type { ArtifactSet, ArtifactChunker, DocNode } from "./types.js";
|
|
2
|
+
import type { WalkOptions } from "./chunker.js";
|
|
3
|
+
export interface BaseOptions {
|
|
4
|
+
maxTokens?: number;
|
|
5
|
+
minTokens?: number;
|
|
6
|
+
overlap?: number;
|
|
7
|
+
boundaryPadding?: {
|
|
8
|
+
before?: number;
|
|
9
|
+
after?: number;
|
|
10
|
+
};
|
|
11
|
+
adaptiveSize?: boolean;
|
|
12
|
+
recursive?: boolean;
|
|
13
|
+
ignore?: string[];
|
|
14
|
+
}
|
|
15
|
+
/** Returned by every native chunker binding. Rust reads the file, computes
|
|
16
|
+
* the SHA-256 hash, and returns all four fields so JS never holds the file
|
|
17
|
+
* bytes. */
|
|
18
|
+
export interface ParseResult {
|
|
19
|
+
tree: string;
|
|
20
|
+
hash: string;
|
|
21
|
+
size: number;
|
|
22
|
+
modifiedMs: number;
|
|
23
|
+
}
|
|
24
|
+
export interface NativeChunkerDef<TOptions extends BaseOptions> {
|
|
25
|
+
/** npm package name, used as the `strategy` field in ArtifactSet. */
|
|
26
|
+
name: string;
|
|
27
|
+
/** "pdf" | "md" | "docx" | "xlsx" etc. — becomes `sourceFormat`. */
|
|
28
|
+
sourceFormat: string;
|
|
29
|
+
/** Glob patterns this chunker accepts, e.g. ["**\/*.pdf"]. */
|
|
30
|
+
patterns: string[];
|
|
31
|
+
/**
|
|
32
|
+
* Return the native napi binding object. Called at most once per chunker
|
|
33
|
+
* instance (lazily, on the first `chunk()` call).
|
|
34
|
+
*/
|
|
35
|
+
loadBinding: () => Record<string, (...args: unknown[]) => unknown>;
|
|
36
|
+
/**
|
|
37
|
+
* Invoke the correct function on the already-loaded binding, passing the
|
|
38
|
+
* file path. Rust reads the file and returns a ParseResult — no file data
|
|
39
|
+
* crosses the JS/Rust boundary.
|
|
40
|
+
*/
|
|
41
|
+
callNative: (binding: ReturnType<NativeChunkerDef<TOptions>["loadBinding"]>, filePath: string, opts: TOptions) => ParseResult;
|
|
42
|
+
/** Format-specific WalkOptions defaults. Spread before user opts so user opts win. */
|
|
43
|
+
extraWalkOpts?: (opts: TOptions) => Partial<WalkOptions>;
|
|
44
|
+
/** Optional post-walk hook for format-specific enrichment (XLSX cell refs, etc.). */
|
|
45
|
+
enrich?: (sets: ArtifactSet[], docNode: DocNode, opts: TOptions) => ArtifactSet[];
|
|
46
|
+
}
|
|
47
|
+
/**
|
|
48
|
+
* Factory that eliminates boilerplate common to every native-binary chunker:
|
|
49
|
+
* lazy binding load, walkToChunks, and optional enrich hook.
|
|
50
|
+
* File I/O and hashing happen inside Rust via callNative.
|
|
51
|
+
*
|
|
52
|
+
* Usage:
|
|
53
|
+
* export const createChunker = createNativeChunker<MyOptions>({ ... });
|
|
54
|
+
*/
|
|
55
|
+
export declare function createNativeChunker<TOptions extends BaseOptions>(def: NativeChunkerDef<TOptions>): (opts?: TOptions) => ArtifactChunker;
|
|
56
|
+
//# sourceMappingURL=factory.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"factory.d.ts","sourceRoot":"","sources":["../src/factory.ts"],"names":[],"mappings":"AACA,OAAO,KAAK,EAAE,WAAW,EAAE,eAAe,EAAE,OAAO,EAAE,MAAM,YAAY,CAAC;AAExE,OAAO,KAAK,EAAE,WAAW,EAAE,MAAM,cAAc,CAAC;AAEhD,MAAM,WAAW,WAAW;IAC1B,SAAS,CAAC,EAAE,MAAM,CAAC;IACnB,SAAS,CAAC,EAAE,MAAM,CAAC;IACnB,OAAO,CAAC,EAAE,MAAM,CAAC;IACjB,eAAe,CAAC,EAAE;QAAE,MAAM,CAAC,EAAE,MAAM,CAAC;QAAC,KAAK,CAAC,EAAE,MAAM,CAAA;KAAE,CAAC;IACtD,YAAY,CAAC,EAAE,OAAO,CAAC;IACvB,SAAS,CAAC,EAAE,OAAO,CAAC;IACpB,MAAM,CAAC,EAAE,MAAM,EAAE,CAAC;CACnB;AAED;;aAEa;AACb,MAAM,WAAW,WAAW;IAC1B,IAAI,EAAE,MAAM,CAAC;IACb,IAAI,EAAE,MAAM,CAAC;IACb,IAAI,EAAE,MAAM,CAAC;IACb,UAAU,EAAE,MAAM,CAAC;CACpB;AAED,MAAM,WAAW,gBAAgB,CAAC,QAAQ,SAAS,WAAW;IAC5D,qEAAqE;IACrE,IAAI,EAAE,MAAM,CAAC;IACb,oEAAoE;IACpE,YAAY,EAAE,MAAM,CAAC;IACrB,8DAA8D;IAC9D,QAAQ,EAAE,MAAM,EAAE,CAAC;IACnB;;;OAGG;IACH,WAAW,EAAE,MAAM,MAAM,CAAC,MAAM,EAAE,CAAC,GAAG,IAAI,EAAE,OAAO,EAAE,KAAK,OAAO,CAAC,CAAC;IACnE;;;;OAIG;IACH,UAAU,EAAE,CACV,OAAO,EAAE,UAAU,CAAC,gBAAgB,CAAC,QAAQ,CAAC,CAAC,aAAa,CAAC,CAAC,EAC9D,QAAQ,EAAE,MAAM,EAChB,IAAI,EAAE,QAAQ,KACX,WAAW,CAAC;IACjB,sFAAsF;IACtF,aAAa,CAAC,EAAE,CAAC,IAAI,EAAE,QAAQ,KAAK,OAAO,CAAC,WAAW,CAAC,CAAC;IACzD,qFAAqF;IACrF,MAAM,CAAC,EAAE,CAAC,IAAI,EAAE,WAAW,EAAE,EAAE,OAAO,EAAE,OAAO,EAAE,IAAI,EAAE,QAAQ,KAAK,WAAW,EAAE,CAAC;CACnF;AAED;;;;;;;GAOG;AACH,wBAAgB,mBAAmB,CAAC,QAAQ,SAAS,WAAW,EAC9D,GAAG,EAAE,gBAAgB,CAAC,QAAQ,CAAC,GAC9B,CAAC,IAAI,CAAC,EAAE,QAAQ,KAAK,eAAe,CAiEtC"}
|
package/dist/factory.js
ADDED
|
@@ -0,0 +1,69 @@
|
|
|
1
|
+
import { minimatch } from "minimatch";
|
|
2
|
+
import { walkToChunks } from "./chunker.js";
|
|
3
|
+
/**
|
|
4
|
+
* Factory that eliminates boilerplate common to every native-binary chunker:
|
|
5
|
+
* lazy binding load, walkToChunks, and optional enrich hook.
|
|
6
|
+
* File I/O and hashing happen inside Rust via callNative.
|
|
7
|
+
*
|
|
8
|
+
* Usage:
|
|
9
|
+
* export const createChunker = createNativeChunker<MyOptions>({ ... });
|
|
10
|
+
*/
|
|
11
|
+
export function createNativeChunker(def) {
|
|
12
|
+
return (opts) => {
|
|
13
|
+
const resolvedOpts = (opts ?? {});
|
|
14
|
+
const ignore = resolvedOpts.ignore ?? [];
|
|
15
|
+
let _binding = null;
|
|
16
|
+
function getBinding() {
|
|
17
|
+
if (_binding == null) {
|
|
18
|
+
_binding = def.loadBinding();
|
|
19
|
+
}
|
|
20
|
+
return _binding;
|
|
21
|
+
}
|
|
22
|
+
return {
|
|
23
|
+
name: def.name,
|
|
24
|
+
patterns: def.patterns,
|
|
25
|
+
async chunk(filePath, commitHash) {
|
|
26
|
+
const binding = getBinding();
|
|
27
|
+
const result = def.callNative(binding, filePath, resolvedOpts);
|
|
28
|
+
const docNode = JSON.parse(result.tree);
|
|
29
|
+
const extra = def.extraWalkOpts ? def.extraWalkOpts(resolvedOpts) : {};
|
|
30
|
+
const sets = walkToChunks(docNode, {
|
|
31
|
+
...extra,
|
|
32
|
+
...(resolvedOpts.maxTokens != null
|
|
33
|
+
? { maxTokens: resolvedOpts.maxTokens }
|
|
34
|
+
: {}),
|
|
35
|
+
...(resolvedOpts.minTokens != null
|
|
36
|
+
? { minTokens: resolvedOpts.minTokens }
|
|
37
|
+
: {}),
|
|
38
|
+
...(resolvedOpts.overlap != null
|
|
39
|
+
? { overlap: resolvedOpts.overlap }
|
|
40
|
+
: {}),
|
|
41
|
+
...(resolvedOpts.boundaryPadding != null
|
|
42
|
+
? { boundaryPadding: resolvedOpts.boundaryPadding }
|
|
43
|
+
: {}),
|
|
44
|
+
...(resolvedOpts.adaptiveSize != null
|
|
45
|
+
? { adaptiveSize: resolvedOpts.adaptiveSize }
|
|
46
|
+
: {}),
|
|
47
|
+
...(resolvedOpts.recursive != null
|
|
48
|
+
? { recursive: resolvedOpts.recursive }
|
|
49
|
+
: {}),
|
|
50
|
+
sourceFile: filePath,
|
|
51
|
+
sourceFormat: def.sourceFormat,
|
|
52
|
+
commitHash,
|
|
53
|
+
strategy: def.name,
|
|
54
|
+
fileHash: result.hash,
|
|
55
|
+
fileSizeBytes: result.size,
|
|
56
|
+
fileModifiedAt: new Date(result.modifiedMs).toISOString(),
|
|
57
|
+
});
|
|
58
|
+
return def.enrich ? def.enrich(sets, docNode, resolvedOpts) : sets;
|
|
59
|
+
},
|
|
60
|
+
async canProcess(filePath) {
|
|
61
|
+
if (ignore.some((p) => minimatch(filePath, p, { matchBase: true }))) {
|
|
62
|
+
return false;
|
|
63
|
+
}
|
|
64
|
+
return def.patterns.some((p) => minimatch(filePath, p, { matchBase: true }));
|
|
65
|
+
},
|
|
66
|
+
};
|
|
67
|
+
};
|
|
68
|
+
}
|
|
69
|
+
//# sourceMappingURL=factory.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"factory.js","sourceRoot":"","sources":["../src/factory.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,SAAS,EAAE,MAAM,WAAW,CAAC;AAEtC,OAAO,EAAE,YAAY,EAAE,MAAM,cAAc,CAAC;AAmD5C;;;;;;;GAOG;AACH,MAAM,UAAU,mBAAmB,CACjC,GAA+B;IAE/B,OAAO,CAAC,IAAe,EAAmB,EAAE;QAC1C,MAAM,YAAY,GAAG,CAAC,IAAI,IAAI,EAAE,CAAa,CAAC;QAC9C,MAAM,MAAM,GAAG,YAAY,CAAC,MAAM,IAAI,EAAE,CAAC;QACzC,IAAI,QAAQ,GAAiE,IAAI,CAAC;QAElF,SAAS,UAAU;YACjB,IAAI,QAAQ,IAAI,IAAI,EAAE,CAAC;gBACrB,QAAQ,GAAG,GAAG,CAAC,WAAW,EAAE,CAAC;YAC/B,CAAC;YACD,OAAO,QAAQ,CAAC;QAClB,CAAC;QAED,OAAO;YACL,IAAI,EAAE,GAAG,CAAC,IAAI;YACd,QAAQ,EAAE,GAAG,CAAC,QAAQ;YAEtB,KAAK,CAAC,KAAK,CAAC,QAAgB,EAAE,UAAkB;gBAC9C,MAAM,OAAO,GAAG,UAAU,EAAE,CAAC;gBAC7B,MAAM,MAAM,GAAG,GAAG,CAAC,UAAU,CAAC,OAAO,EAAE,QAAQ,EAAE,YAAY,CAAC,CAAC;gBAC/D,MAAM,OAAO,GAAG,IAAI,CAAC,KAAK,CAAC,MAAM,CAAC,IAAI,CAAY,CAAC;gBAEnD,MAAM,KAAK,GAAG,GAAG,CAAC,aAAa,CAAC,CAAC,CAAC,GAAG,CAAC,aAAa,CAAC,YAAY,CAAC,CAAC,CAAC,CAAC,EAAE,CAAC;gBACvE,MAAM,IAAI,GAAG,YAAY,CAAC,OAAO,EAAE;oBACjC,GAAG,KAAK;oBACR,GAAG,CAAC,YAAY,CAAC,SAAS,IAAI,IAAI;wBAChC,CAAC,CAAC,EAAE,SAAS,EAAE,YAAY,CAAC,SAAS,EAAE;wBACvC,CAAC,CAAC,EAAE,CAAC;oBACP,GAAG,CAAC,YAAY,CAAC,SAAS,IAAI,IAAI;wBAChC,CAAC,CAAC,EAAE,SAAS,EAAE,YAAY,CAAC,SAAS,EAAE;wBACvC,CAAC,CAAC,EAAE,CAAC;oBACP,GAAG,CAAC,YAAY,CAAC,OAAO,IAAI,IAAI;wBAC9B,CAAC,CAAC,EAAE,OAAO,EAAE,YAAY,CAAC,OAAO,EAAE;wBACnC,CAAC,CAAC,EAAE,CAAC;oBACP,GAAG,CAAC,YAAY,CAAC,eAAe,IAAI,IAAI;wBACtC,CAAC,CAAC,EAAE,eAAe,EAAE,YAAY,CAAC,eAAe,EAAE;wBACnD,CAAC,CAAC,EAAE,CAAC;oBACP,GAAG,CAAC,YAAY,CAAC,YAAY,IAAI,IAAI;wBACnC,CAAC,CAAC,EAAE,YAAY,EAAE,YAAY,CAAC,YAAY,EAAE;wBAC7C,CAAC,CAAC,EAAE,CAAC;oBACP,GAAG,CAAC,YAAY,CAAC,SAAS,IAAI,IAAI;wBAChC,CAAC,CAAC,EAAE,SAAS,EAAE,YAAY,CAAC,SAAS,EAAE;wBACvC,CAAC,CAAC,EAAE,CAAC;oBACP,UAAU,EAAE,QAAQ;oBACpB,YAAY,EAAE,GAAG,CAAC,YAAY;oBAC9B,UAAU;oBACV,QAAQ,EAAE,GAAG,CAAC,IAAI;oBAClB,QAAQ,EAAE,MAAM,CAAC,IAAI;oBACrB,aAAa,EAAE,MAAM,CAAC,IAAI;oBAC1B,cAAc,EAAE,IAAI,IAAI,CAAC,MAAM,CAAC,UAAU,CAAC,CAAC,WAAW,EAAE;iBAC1D,CAAC,CAAC;gBAEH,OAAO,GAAG,CAAC,MAAM,CAAC,CAAC,CAAC,GAAG,CAAC,MAAM,CAAC,IAAI,EAAE,OAAO,EAAE,YAAY,CAAC,CAAC,CAAC,CAAC,IAAI,CAAC;YACrE,CAAC;YAED,KAAK,CAAC,UAAU,CAAC,QAAgB;gBAC/B,IAAI,MAAM,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,SAAS,CAAC,QAAQ,EAAE,CAAC,EAAE,EAAE,SAAS,EAAE,IAAI,EAAE,CAAC,CAAC,EAAE,CAAC;oBACpE,OAAO,KAAK,CAAC;gBACf,CAAC;gBACD,OAAO,GAAG,CAAC,QAAQ,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,EAAE,CAC7B,SAAS,CAAC,QAAQ,EAAE,CAAC,EAAE,EAAE,SAAS,EAAE,IAAI,EAAE,CAAC,CAC5C,CAAC;YACJ,CAAC;SACF,CAAC;IACJ,CAAC,CAAC;AACJ,CAAC"}
|
package/dist/index.d.ts
CHANGED
|
@@ -1,7 +1,9 @@
|
|
|
1
|
-
export type { DocNode, DocNodeType, DocNodeAttrs, ChunkMeta } from "./types.js";
|
|
1
|
+
export type { DocNode, DocNodeType, DocNodeAttrs, FilterMeta, ChunkMeta, InjectedContext, SearchRepresentation, CandidateChunk, FinalAnswerChunk, ArtifactSet, ArtifactChunker, } from "./types.js";
|
|
2
2
|
export { walkDocNode } from "./ast-walker.js";
|
|
3
3
|
export type { TextSegment } from "./ast-walker.js";
|
|
4
4
|
export { extractOutline } from "./outline.js";
|
|
5
5
|
export { walkToChunks } from "./chunker.js";
|
|
6
|
-
export type { WalkOptions
|
|
6
|
+
export type { WalkOptions } from "./chunker.js";
|
|
7
|
+
export { createNativeChunker } from "./factory.js";
|
|
8
|
+
export type { BaseOptions, NativeChunkerDef, ParseResult } from "./factory.js";
|
|
7
9
|
//# sourceMappingURL=index.d.ts.map
|
package/dist/index.d.ts.map
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../src/index.ts"],"names":[],"mappings":"AAAA,YAAY,
|
|
1
|
+
{"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../src/index.ts"],"names":[],"mappings":"AAAA,YAAY,EACV,OAAO,EACP,WAAW,EACX,YAAY,EACZ,UAAU,EACV,SAAS,EACT,eAAe,EACf,oBAAoB,EACpB,cAAc,EACd,gBAAgB,EAChB,WAAW,EACX,eAAe,GAChB,MAAM,YAAY,CAAC;AACpB,OAAO,EAAE,WAAW,EAAE,MAAM,iBAAiB,CAAC;AAC9C,YAAY,EAAE,WAAW,EAAE,MAAM,iBAAiB,CAAC;AACnD,OAAO,EAAE,cAAc,EAAE,MAAM,cAAc,CAAC;AAC9C,OAAO,EAAE,YAAY,EAAE,MAAM,cAAc,CAAC;AAC5C,YAAY,EAAE,WAAW,EAAE,MAAM,cAAc,CAAC;AAChD,OAAO,EAAE,mBAAmB,EAAE,MAAM,cAAc,CAAC;AACnD,YAAY,EAAE,WAAW,EAAE,gBAAgB,EAAE,WAAW,EAAE,MAAM,cAAc,CAAC"}
|
package/dist/index.js
CHANGED
package/dist/index.js.map
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"index.js","sourceRoot":"","sources":["../src/index.ts"],"names":[],"mappings":"
|
|
1
|
+
{"version":3,"file":"index.js","sourceRoot":"","sources":["../src/index.ts"],"names":[],"mappings":"AAaA,OAAO,EAAE,WAAW,EAAE,MAAM,iBAAiB,CAAC;AAE9C,OAAO,EAAE,cAAc,EAAE,MAAM,cAAc,CAAC;AAC9C,OAAO,EAAE,YAAY,EAAE,MAAM,cAAc,CAAC;AAE5C,OAAO,EAAE,mBAAmB,EAAE,MAAM,cAAc,CAAC"}
|
package/dist/types.d.ts
CHANGED
|
@@ -23,29 +23,113 @@ export interface DocNode {
|
|
|
23
23
|
text?: string;
|
|
24
24
|
attrs: DocNodeAttrs;
|
|
25
25
|
}
|
|
26
|
-
|
|
26
|
+
/**
|
|
27
|
+
* Filterable metadata stored alongside the Search Representation in the vector
|
|
28
|
+
* index. Contains only fields that are useful for pre-retrieval filtering.
|
|
29
|
+
*/
|
|
30
|
+
export interface FilterMeta {
|
|
27
31
|
sourceFile: string;
|
|
28
32
|
sourceFormat: string;
|
|
33
|
+
breadcrumb: string[];
|
|
29
34
|
byteStart: number;
|
|
30
35
|
byteEnd: number;
|
|
31
36
|
lineStart?: number;
|
|
32
37
|
lineEnd?: number;
|
|
33
38
|
pageStart?: number;
|
|
34
39
|
pageEnd?: number;
|
|
35
|
-
fileSizeBytes?: number;
|
|
36
|
-
fileModifiedAt?: string;
|
|
37
|
-
fileHash?: string;
|
|
38
|
-
breadcrumb: string[];
|
|
39
|
-
sectionTitle?: string;
|
|
40
|
-
headingLevel?: number;
|
|
41
|
-
documentOutline?: string[];
|
|
42
40
|
lang?: string;
|
|
43
41
|
codeLanguage?: string;
|
|
44
|
-
strategy: string;
|
|
45
42
|
chunkIndex: number;
|
|
46
43
|
totalChunks: number;
|
|
44
|
+
strategy: string;
|
|
47
45
|
estimatedTokens: number;
|
|
46
|
+
fileHash?: string;
|
|
47
|
+
fileModifiedAt?: string;
|
|
48
|
+
fileSizeBytes?: number;
|
|
49
|
+
}
|
|
50
|
+
/**
|
|
51
|
+
* Full enrichment payload carried by CandidateChunk. A superset of FilterMeta
|
|
52
|
+
* that includes hierarchy details, sibling links, and format-specific fields
|
|
53
|
+
* used during cross-encoder re-ranking.
|
|
54
|
+
*/
|
|
55
|
+
export interface ChunkMeta extends FilterMeta {
|
|
56
|
+
sectionTitle?: string;
|
|
57
|
+
headingLevel?: number;
|
|
58
|
+
documentOutline?: string[];
|
|
59
|
+
siblingPrev?: string;
|
|
60
|
+
siblingNext?: string;
|
|
48
61
|
qualityScore?: number;
|
|
49
62
|
truncated?: boolean;
|
|
63
|
+
fqn?: string;
|
|
64
|
+
imports?: string[];
|
|
65
|
+
inheritanceChain?: string[];
|
|
66
|
+
sheetName?: string;
|
|
67
|
+
columnHeaders?: string[];
|
|
68
|
+
cellReference?: string;
|
|
69
|
+
formulaDependencies?: string[];
|
|
70
|
+
keywords?: string[];
|
|
71
|
+
summary?: string;
|
|
72
|
+
nerEntities?: Array<{
|
|
73
|
+
text: string;
|
|
74
|
+
label: string;
|
|
75
|
+
}>;
|
|
76
|
+
}
|
|
77
|
+
/** Extra context injected into FinalAnswerChunk that was NOT used for search. */
|
|
78
|
+
export interface InjectedContext {
|
|
79
|
+
parentSectionText?: string;
|
|
80
|
+
imports?: string[];
|
|
81
|
+
fqnDeclarations?: string[];
|
|
82
|
+
neighborPrev?: string;
|
|
83
|
+
neighborNext?: string;
|
|
84
|
+
}
|
|
85
|
+
/**
|
|
86
|
+
* Stored in the vector index. Contains sanitized anchor text for dense/sparse
|
|
87
|
+
* retrieval and filterable metadata only — no raw content dump.
|
|
88
|
+
*/
|
|
89
|
+
export interface SearchRepresentation {
|
|
90
|
+
id: string;
|
|
91
|
+
anchorText: string;
|
|
92
|
+
sparseTerms?: string[];
|
|
93
|
+
filterMetadata: FilterMeta;
|
|
94
|
+
}
|
|
95
|
+
/**
|
|
96
|
+
* Returned by ANN retrieval. Contains a short preview and full metadata
|
|
97
|
+
* for cross-encoder re-ranking. Fetched from the vector store payload.
|
|
98
|
+
*/
|
|
99
|
+
export interface CandidateChunk {
|
|
100
|
+
id: string;
|
|
101
|
+
preview: string;
|
|
102
|
+
fullMeta: ChunkMeta;
|
|
103
|
+
}
|
|
104
|
+
/**
|
|
105
|
+
* Fetched after re-ranking. Contains the full raw text (with optional boundary
|
|
106
|
+
* padding and injected context) that is passed to the LLM prompt.
|
|
107
|
+
*/
|
|
108
|
+
export interface FinalAnswerChunk {
|
|
109
|
+
id: string;
|
|
110
|
+
content: string;
|
|
111
|
+
paddedContent?: string;
|
|
112
|
+
injectedContext?: InjectedContext;
|
|
113
|
+
}
|
|
114
|
+
/**
|
|
115
|
+
* The atomic unit produced by walkToChunks — one per logical segment.
|
|
116
|
+
* Encapsulates all three artifact tiers derived from the same source window.
|
|
117
|
+
*/
|
|
118
|
+
export interface ArtifactSet {
|
|
119
|
+
sourceFile: string;
|
|
120
|
+
commitHash: string;
|
|
121
|
+
searchRepresentation: SearchRepresentation;
|
|
122
|
+
candidateChunk: CandidateChunk;
|
|
123
|
+
finalAnswerChunk: FinalAnswerChunk;
|
|
124
|
+
}
|
|
125
|
+
/**
|
|
126
|
+
* Upgraded chunker contract that returns ArtifactSet[].
|
|
127
|
+
* Replaces the virage-core FileChunker once Phase 5 lands in the main repo.
|
|
128
|
+
*/
|
|
129
|
+
export interface ArtifactChunker {
|
|
130
|
+
name: string;
|
|
131
|
+
patterns: string[];
|
|
132
|
+
chunk(filePath: string, commitHash: string): Promise<ArtifactSet[]>;
|
|
133
|
+
canProcess?(filePath: string, content?: string): Promise<boolean>;
|
|
50
134
|
}
|
|
51
135
|
//# sourceMappingURL=types.d.ts.map
|
package/dist/types.d.ts.map
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"types.d.ts","sourceRoot":"","sources":["../src/types.ts"],"names":[],"mappings":"AAIA,MAAM,MAAM,WAAW,GACnB,UAAU,GACV,SAAS,GACT,SAAS,GACT,WAAW,GACX,OAAO,GACP,WAAW,GACX,YAAY,GACZ,MAAM,GACN,WAAW,GACX,MAAM,GACN,SAAS,GACT,OAAO,GACP,MAAM,GACN,UAAU,GACV,SAAS,GACT,UAAU,GACV,UAAU,CAAC;AAEf,MAAM,WAAW,YAAY;IAC3B,YAAY,CAAC,EAAE,CAAC,GAAG,CAAC,GAAG,CAAC,GAAG,CAAC,GAAG,CAAC,GAAG,CAAC,CAAC;IACrC,IAAI,CAAC,EAAE,SAAS,GAAG,UAAU,GAAG,UAAU,GAAG,WAAW,GAAG,QAAQ,GAAG,QAAQ,CAAC;IAC/E,UAAU,CAAC,EAAE,MAAM,EAAE,CAAC;IACtB,SAAS,EAAE,MAAM,CAAC;IAClB,OAAO,EAAE,MAAM,CAAC;IAChB,SAAS,CAAC,EAAE,MAAM,CAAC;IACnB,OAAO,CAAC,EAAE,MAAM,CAAC;IACjB,UAAU,CAAC,EAAE,MAAM,CAAC;IACpB,IAAI,CAAC,EAAE,MAAM,CAAC;IACd,YAAY,CAAC,EAAE,MAAM,CAAC;IACtB,QAAQ,CAAC,EAAE,MAAM,CAAC;IAClB,QAAQ,CAAC,EAAE,MAAM,CAAC;IAClB,QAAQ,CAAC,EAAE,OAAO,CAAC;IACnB,SAAS,CAAC,EAAE,MAAM,CAAC;IACnB,OAAO,CAAC,EAAE,OAAO,CAAC;IAClB,YAAY,CAAC,EAAE,MAAM,CAAC;CACvB;AAED,MAAM,WAAW,OAAO;IACtB,IAAI,EAAE,WAAW,CAAC;IAClB,QAAQ,CAAC,EAAE,OAAO,EAAE,CAAC;IACrB,IAAI,CAAC,EAAE,MAAM,CAAC;IACd,KAAK,EAAE,YAAY,CAAC;CACrB;AAID,MAAM,WAAW,
|
|
1
|
+
{"version":3,"file":"types.d.ts","sourceRoot":"","sources":["../src/types.ts"],"names":[],"mappings":"AAIA,MAAM,MAAM,WAAW,GACnB,UAAU,GACV,SAAS,GACT,SAAS,GACT,WAAW,GACX,OAAO,GACP,WAAW,GACX,YAAY,GACZ,MAAM,GACN,WAAW,GACX,MAAM,GACN,SAAS,GACT,OAAO,GACP,MAAM,GACN,UAAU,GACV,SAAS,GACT,UAAU,GACV,UAAU,CAAC;AAEf,MAAM,WAAW,YAAY;IAC3B,YAAY,CAAC,EAAE,CAAC,GAAG,CAAC,GAAG,CAAC,GAAG,CAAC,GAAG,CAAC,GAAG,CAAC,CAAC;IACrC,IAAI,CAAC,EAAE,SAAS,GAAG,UAAU,GAAG,UAAU,GAAG,WAAW,GAAG,QAAQ,GAAG,QAAQ,CAAC;IAC/E,UAAU,CAAC,EAAE,MAAM,EAAE,CAAC;IACtB,SAAS,EAAE,MAAM,CAAC;IAClB,OAAO,EAAE,MAAM,CAAC;IAChB,SAAS,CAAC,EAAE,MAAM,CAAC;IACnB,OAAO,CAAC,EAAE,MAAM,CAAC;IACjB,UAAU,CAAC,EAAE,MAAM,CAAC;IACpB,IAAI,CAAC,EAAE,MAAM,CAAC;IACd,YAAY,CAAC,EAAE,MAAM,CAAC;IACtB,QAAQ,CAAC,EAAE,MAAM,CAAC;IAClB,QAAQ,CAAC,EAAE,MAAM,CAAC;IAClB,QAAQ,CAAC,EAAE,OAAO,CAAC;IACnB,SAAS,CAAC,EAAE,MAAM,CAAC;IACnB,OAAO,CAAC,EAAE,OAAO,CAAC;IAClB,YAAY,CAAC,EAAE,MAAM,CAAC;CACvB;AAED,MAAM,WAAW,OAAO;IACtB,IAAI,EAAE,WAAW,CAAC;IAClB,QAAQ,CAAC,EAAE,OAAO,EAAE,CAAC;IACrB,IAAI,CAAC,EAAE,MAAM,CAAC;IACd,KAAK,EAAE,YAAY,CAAC;CACrB;AAID;;;GAGG;AACH,MAAM,WAAW,UAAU;IACzB,UAAU,EAAE,MAAM,CAAC;IACnB,YAAY,EAAE,MAAM,CAAC;IACrB,UAAU,EAAE,MAAM,EAAE,CAAC;IACrB,SAAS,EAAE,MAAM,CAAC;IAClB,OAAO,EAAE,MAAM,CAAC;IAChB,SAAS,CAAC,EAAE,MAAM,CAAC;IACnB,OAAO,CAAC,EAAE,MAAM,CAAC;IACjB,SAAS,CAAC,EAAE,MAAM,CAAC;IACnB,OAAO,CAAC,EAAE,MAAM,CAAC;IACjB,IAAI,CAAC,EAAE,MAAM,CAAC;IACd,YAAY,CAAC,EAAE,MAAM,CAAC;IACtB,UAAU,EAAE,MAAM,CAAC;IACnB,WAAW,EAAE,MAAM,CAAC;IACpB,QAAQ,EAAE,MAAM,CAAC;IACjB,eAAe,EAAE,MAAM,CAAC;IACxB,QAAQ,CAAC,EAAE,MAAM,CAAC;IAClB,cAAc,CAAC,EAAE,MAAM,CAAC;IACxB,aAAa,CAAC,EAAE,MAAM,CAAC;CACxB;AAED;;;;GAIG;AACH,MAAM,WAAW,SAAU,SAAQ,UAAU;IAC3C,YAAY,CAAC,EAAE,MAAM,CAAC;IACtB,YAAY,CAAC,EAAE,MAAM,CAAC;IACtB,eAAe,CAAC,EAAE,MAAM,EAAE,CAAC;IAC3B,WAAW,CAAC,EAAE,MAAM,CAAC;IACrB,WAAW,CAAC,EAAE,MAAM,CAAC;IACrB,YAAY,CAAC,EAAE,MAAM,CAAC;IACtB,SAAS,CAAC,EAAE,OAAO,CAAC;IAGpB,GAAG,CAAC,EAAE,MAAM,CAAC;IACb,OAAO,CAAC,EAAE,MAAM,EAAE,CAAC;IACnB,gBAAgB,CAAC,EAAE,MAAM,EAAE,CAAC;IAG5B,SAAS,CAAC,EAAE,MAAM,CAAC;IACnB,aAAa,CAAC,EAAE,MAAM,EAAE,CAAC;IACzB,aAAa,CAAC,EAAE,MAAM,CAAC;IACvB,mBAAmB,CAAC,EAAE,MAAM,EAAE,CAAC;IAG/B,QAAQ,CAAC,EAAE,MAAM,EAAE,CAAC;IACpB,OAAO,CAAC,EAAE,MAAM,CAAC;IACjB,WAAW,CAAC,EAAE,KAAK,CAAC;QAAE,IAAI,EAAE,MAAM,CAAC;QAAC,KAAK,EAAE,MAAM,CAAA;KAAE,CAAC,CAAC;CACtD;AAED,iFAAiF;AACjF,MAAM,WAAW,eAAe;IAC9B,iBAAiB,CAAC,EAAE,MAAM,CAAC;IAC3B,OAAO,CAAC,EAAE,MAAM,EAAE,CAAC;IACnB,eAAe,CAAC,EAAE,MAAM,EAAE,CAAC;IAC3B,YAAY,CAAC,EAAE,MAAM,CAAC;IACtB,YAAY,CAAC,EAAE,MAAM,CAAC;CACvB;AAID;;;GAGG;AACH,MAAM,WAAW,oBAAoB;IACnC,EAAE,EAAE,MAAM,CAAC;IACX,UAAU,EAAE,MAAM,CAAC;IACnB,WAAW,CAAC,EAAE,MAAM,EAAE,CAAC;IACvB,cAAc,EAAE,UAAU,CAAC;CAC5B;AAED;;;GAGG;AACH,MAAM,WAAW,cAAc;IAC7B,EAAE,EAAE,MAAM,CAAC;IACX,OAAO,EAAE,MAAM,CAAC;IAChB,QAAQ,EAAE,SAAS,CAAC;CACrB;AAED;;;GAGG;AACH,MAAM,WAAW,gBAAgB;IAC/B,EAAE,EAAE,MAAM,CAAC;IACX,OAAO,EAAE,MAAM,CAAC;IAChB,aAAa,CAAC,EAAE,MAAM,CAAC;IACvB,eAAe,CAAC,EAAE,eAAe,CAAC;CACnC;AAED;;;GAGG;AACH,MAAM,WAAW,WAAW;IAC1B,UAAU,EAAE,MAAM,CAAC;IACnB,UAAU,EAAE,MAAM,CAAC;IACnB,oBAAoB,EAAE,oBAAoB,CAAC;IAC3C,cAAc,EAAE,cAAc,CAAC;IAC/B,gBAAgB,EAAE,gBAAgB,CAAC;CACpC;AAED;;;GAGG;AACH,MAAM,WAAW,eAAe;IAC9B,IAAI,EAAE,MAAM,CAAC;IACb,QAAQ,EAAE,MAAM,EAAE,CAAC;IACnB,KAAK,CAAC,QAAQ,EAAE,MAAM,EAAE,UAAU,EAAE,MAAM,GAAG,OAAO,CAAC,WAAW,EAAE,CAAC,CAAC;IACpE,UAAU,CAAC,CAAC,QAAQ,EAAE,MAAM,EAAE,OAAO,CAAC,EAAE,MAAM,GAAG,OAAO,CAAC,OAAO,CAAC,CAAC;CACnE"}
|
package/package.json
CHANGED
|
@@ -1,20 +1,25 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@vivantel/virage-chunker-ce-ast",
|
|
3
|
-
"version": "0.1.
|
|
3
|
+
"version": "0.1.3",
|
|
4
4
|
"description": "Generalized ViDoc AST walker — shared chunking strategy for all structured document formats",
|
|
5
5
|
"type": "module",
|
|
6
6
|
"main": "./dist/index.js",
|
|
7
7
|
"exports": {
|
|
8
8
|
".": {
|
|
9
9
|
"import": "./dist/index.js",
|
|
10
|
-
"types":
|
|
10
|
+
"types": "./dist/index.d.ts"
|
|
11
11
|
}
|
|
12
12
|
},
|
|
13
|
-
"files": [
|
|
13
|
+
"files": [
|
|
14
|
+
"dist"
|
|
15
|
+
],
|
|
14
16
|
"scripts": {
|
|
15
|
-
"build":
|
|
17
|
+
"build": "tsc --build",
|
|
16
18
|
"type-check": "tsc --build --noEmit",
|
|
17
|
-
"test":
|
|
19
|
+
"test": "vitest run"
|
|
20
|
+
},
|
|
21
|
+
"dependencies": {
|
|
22
|
+
"minimatch": "^10.0.0"
|
|
18
23
|
},
|
|
19
24
|
"peerDependencies": {
|
|
20
25
|
"@vivantel/virage-core": ">=0.2"
|