@vivantel/virage-chunker-ce-ast 0.1.0 → 0.1.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,6 +1,7 @@
1
- import type { DocNode, DocNodeAttrs } from "./types.js";
1
+ import type { DocNode, DocNodeAttrs, DocNodeType } from "./types.js";
2
2
  export interface TextSegment {
3
3
  text: string;
4
+ nodeType: DocNodeType;
4
5
  attrs: DocNodeAttrs;
5
6
  /** Ancestor heading texts at the time this segment was emitted. */
6
7
  breadcrumb: string[];
@@ -1 +1 @@
1
- {"version":3,"file":"ast-walker.d.ts","sourceRoot":"","sources":["../src/ast-walker.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,OAAO,EAAE,YAAY,EAAE,MAAM,YAAY,CAAC;AAExD,MAAM,WAAW,WAAW;IAC1B,IAAI,EAAE,MAAM,CAAC;IACb,KAAK,EAAE,YAAY,CAAC;IACpB,mEAAmE;IACnE,UAAU,EAAE,MAAM,EAAE,CAAC;CACtB;AAED;;;GAGG;AACH,wBAAgB,WAAW,CAAC,IAAI,EAAE,OAAO,GAAG,WAAW,EAAE,CAiCxD"}
1
+ {"version":3,"file":"ast-walker.d.ts","sourceRoot":"","sources":["../src/ast-walker.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,OAAO,EAAE,YAAY,EAAE,WAAW,EAAE,MAAM,YAAY,CAAC;AAErE,MAAM,WAAW,WAAW;IAC1B,IAAI,EAAE,MAAM,CAAC;IACb,QAAQ,EAAE,WAAW,CAAC;IACtB,KAAK,EAAE,YAAY,CAAC;IACpB,mEAAmE;IACnE,UAAU,EAAE,MAAM,EAAE,CAAC;CACtB;AAED;;;GAGG;AACH,wBAAgB,WAAW,CAAC,IAAI,EAAE,OAAO,GAAG,WAAW,EAAE,CAkCxD"}
@@ -19,6 +19,7 @@ export function walkDocNode(root) {
19
19
  if (isLeafText && node.text) {
20
20
  segments.push({
21
21
  text: node.text,
22
+ nodeType: node.type,
22
23
  attrs: node.attrs,
23
24
  breadcrumb: [...breadcrumb],
24
25
  });
@@ -1 +1 @@
1
- {"version":3,"file":"ast-walker.js","sourceRoot":"","sources":["../src/ast-walker.ts"],"names":[],"mappings":"AASA;;;GAGG;AACH,MAAM,UAAU,WAAW,CAAC,IAAa;IACvC,MAAM,QAAQ,GAAkB,EAAE,CAAC;IACnC,MAAM,UAAU,GAAa,EAAE,CAAC;IAEhC,SAAS,KAAK,CAAC,IAAa;QAC1B,IAAI,IAAI,CAAC,IAAI,KAAK,SAAS,IAAI,IAAI,CAAC,IAAI,EAAE,CAAC;YACzC,MAAM,KAAK,GAAG,CAAC,IAAI,CAAC,KAAK,CAAC,YAAY,IAAI,CAAC,CAAC,GAAG,CAAC,CAAC;YACjD,sEAAsE;YACtE,UAAU,CAAC,MAAM,CAAC,KAAK,EAAE,UAAU,CAAC,MAAM,GAAG,KAAK,EAAE,IAAI,CAAC,IAAI,CAAC,CAAC;QACjE,CAAC;QAED,iEAAiE;QACjE,MAAM,UAAU,GACd,IAAI,CAAC,IAAI,IAAI,IAAI;YACjB,IAAI,CAAC,IAAI,KAAK,SAAS;YACvB,IAAI,CAAC,IAAI,KAAK,OAAO;YACrB,IAAI,CAAC,IAAI,KAAK,MAAM,CAAC;QAEvB,IAAI,UAAU,IAAI,IAAI,CAAC,IAAI,EAAE,CAAC;YAC5B,QAAQ,CAAC,IAAI,CAAC;gBACZ,IAAI,EAAE,IAAI,CAAC,IAAI;gBACf,KAAK,EAAE,IAAI,CAAC,KAAK;gBACjB,UAAU,EAAE,CAAC,GAAG,UAAU,CAAC;aAC5B,CAAC,CAAC;QACL,CAAC;QAED,KAAK,MAAM,KAAK,IAAI,IAAI,CAAC,QAAQ,IAAI,EAAE,EAAE,CAAC;YACxC,KAAK,CAAC,KAAK,CAAC,CAAC;QACf,CAAC;IACH,CAAC;IAED,KAAK,CAAC,IAAI,CAAC,CAAC;IACZ,OAAO,QAAQ,CAAC;AAClB,CAAC"}
1
+ {"version":3,"file":"ast-walker.js","sourceRoot":"","sources":["../src/ast-walker.ts"],"names":[],"mappings":"AAUA;;;GAGG;AACH,MAAM,UAAU,WAAW,CAAC,IAAa;IACvC,MAAM,QAAQ,GAAkB,EAAE,CAAC;IACnC,MAAM,UAAU,GAAa,EAAE,CAAC;IAEhC,SAAS,KAAK,CAAC,IAAa;QAC1B,IAAI,IAAI,CAAC,IAAI,KAAK,SAAS,IAAI,IAAI,CAAC,IAAI,EAAE,CAAC;YACzC,MAAM,KAAK,GAAG,CAAC,IAAI,CAAC,KAAK,CAAC,YAAY,IAAI,CAAC,CAAC,GAAG,CAAC,CAAC;YACjD,sEAAsE;YACtE,UAAU,CAAC,MAAM,CAAC,KAAK,EAAE,UAAU,CAAC,MAAM,GAAG,KAAK,EAAE,IAAI,CAAC,IAAI,CAAC,CAAC;QACjE,CAAC;QAED,iEAAiE;QACjE,MAAM,UAAU,GACd,IAAI,CAAC,IAAI,IAAI,IAAI;YACjB,IAAI,CAAC,IAAI,KAAK,SAAS;YACvB,IAAI,CAAC,IAAI,KAAK,OAAO;YACrB,IAAI,CAAC,IAAI,KAAK,MAAM,CAAC;QAEvB,IAAI,UAAU,IAAI,IAAI,CAAC,IAAI,EAAE,CAAC;YAC5B,QAAQ,CAAC,IAAI,CAAC;gBACZ,IAAI,EAAE,IAAI,CAAC,IAAI;gBACf,QAAQ,EAAE,IAAI,CAAC,IAAI;gBACnB,KAAK,EAAE,IAAI,CAAC,KAAK;gBACjB,UAAU,EAAE,CAAC,GAAG,UAAU,CAAC;aAC5B,CAAC,CAAC;QACL,CAAC;QAED,KAAK,MAAM,KAAK,IAAI,IAAI,CAAC,QAAQ,IAAI,EAAE,EAAE,CAAC;YACxC,KAAK,CAAC,KAAK,CAAC,CAAC;QACf,CAAC;IACH,CAAC;IAED,KAAK,CAAC,IAAI,CAAC,CAAC;IACZ,OAAO,QAAQ,CAAC;AAClB,CAAC"}
package/dist/chunker.d.ts CHANGED
@@ -1,27 +1,51 @@
1
- import type { DocNode, ChunkMeta } from "./types.js";
1
+ import type { DocNode, ArtifactSet } from "./types.js";
2
2
  export interface WalkOptions {
3
3
  sourceFile: string;
4
4
  sourceFormat: string;
5
5
  commitHash: string;
6
6
  strategy: string;
7
+ /** Maximum tokens per chunk window (default: 512). */
7
8
  maxTokens?: number;
9
+ /** Minimum tokens before merging a trailing window into its predecessor (default: maxTokens / 4). */
8
10
  minTokens?: number;
11
+ /**
12
+ * Sliding-window overlap as a fraction 0–1 (default: 0).
13
+ * An overlap of 0.2 means each new window reuses the last 20 % of the previous
14
+ * window's content, producing overlapping SearchRepresentations that share context.
15
+ */
16
+ overlap?: number;
17
+ /**
18
+ * Paragraphs (text segments) to prepend/append from adjacent windows into
19
+ * FinalAnswerChunk.paddedContent only. Does NOT affect anchorText or preview.
20
+ */
21
+ boundaryPadding?: {
22
+ before?: number;
23
+ after?: number;
24
+ };
25
+ /**
26
+ * When true, segments that exceed maxTokens are recursively split on character
27
+ * boundaries before windowing, rather than hard-cut with content loss (default: false).
28
+ */
29
+ recursive?: boolean;
30
+ /**
31
+ * When true, the effective flush threshold for `code` and `table-cell` segments
32
+ * is halved, producing smaller chunks for dense technical content (default: false).
33
+ */
34
+ adaptiveSize?: boolean;
9
35
  fileHash?: string;
10
36
  fileSizeBytes?: number;
11
37
  fileModifiedAt?: string;
12
38
  }
13
- export interface ChunkResult {
14
- content: string;
15
- metadata: ChunkMeta;
16
- sourceFile: string;
17
- commitHash: string;
18
- contentHash?: string;
19
- }
20
39
  /**
21
- * Walk a ViDoc AST and produce Chunk[] with full ChunkMeta.
40
+ * Walk a ViDoc AST and produce one ArtifactSet per logical window.
41
+ *
42
+ * Each window is split at paragraph/segment boundaries when the accumulated
43
+ * token count reaches maxTokens. A section boundary (breadcrumb change) also
44
+ * flushes the current window. Level 2 modifiers (overlap, boundaryPadding,
45
+ * recursive, adaptiveSize) refine this behaviour.
22
46
  *
23
- * Splits at paragraph boundaries when the buffer reaches maxTokens.
24
- * Merges trailing windows shorter than minTokens into the predecessor.
47
+ * finalAnswerChunk.content includes a heading prefix derived from the window's
48
+ * breadcrumb so the LLM always has heading context.
25
49
  */
26
- export declare function walkToChunks(root: DocNode, opts: WalkOptions): ChunkResult[];
50
+ export declare function walkToChunks(root: DocNode, opts: WalkOptions): ArtifactSet[];
27
51
  //# sourceMappingURL=chunker.d.ts.map
@@ -1 +1 @@
1
- {"version":3,"file":"chunker.d.ts","sourceRoot":"","sources":["../src/chunker.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,OAAO,EAAE,SAAS,EAAE,MAAM,YAAY,CAAC;AAIrD,MAAM,WAAW,WAAW;IAC1B,UAAU,EAAE,MAAM,CAAC;IACnB,YAAY,EAAE,MAAM,CAAC;IACrB,UAAU,EAAE,MAAM,CAAC;IACnB,QAAQ,EAAE,MAAM,CAAC;IACjB,SAAS,CAAC,EAAE,MAAM,CAAC;IACnB,SAAS,CAAC,EAAE,MAAM,CAAC;IACnB,QAAQ,CAAC,EAAE,MAAM,CAAC;IAClB,aAAa,CAAC,EAAE,MAAM,CAAC;IACvB,cAAc,CAAC,EAAE,MAAM,CAAC;CACzB;AAED,MAAM,WAAW,WAAW;IAC1B,OAAO,EAAE,MAAM,CAAC;IAChB,QAAQ,EAAE,SAAS,CAAC;IACpB,UAAU,EAAE,MAAM,CAAC;IACnB,UAAU,EAAE,MAAM,CAAC;IACnB,WAAW,CAAC,EAAE,MAAM,CAAC;CACtB;AAQD;;;;;GAKG;AACH,wBAAgB,YAAY,CAAC,IAAI,EAAE,OAAO,EAAE,IAAI,EAAE,WAAW,GAAG,WAAW,EAAE,CAoI5E"}
1
+ {"version":3,"file":"chunker.d.ts","sourceRoot":"","sources":["../src/chunker.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,OAAO,EAAE,WAAW,EAAyB,MAAM,YAAY,CAAC;AAK9E,MAAM,WAAW,WAAW;IAC1B,UAAU,EAAE,MAAM,CAAC;IACnB,YAAY,EAAE,MAAM,CAAC;IACrB,UAAU,EAAE,MAAM,CAAC;IACnB,QAAQ,EAAE,MAAM,CAAC;IACjB,sDAAsD;IACtD,SAAS,CAAC,EAAE,MAAM,CAAC;IACnB,qGAAqG;IACrG,SAAS,CAAC,EAAE,MAAM,CAAC;IACnB;;;;OAIG;IACH,OAAO,CAAC,EAAE,MAAM,CAAC;IACjB;;;OAGG;IACH,eAAe,CAAC,EAAE;QAChB,MAAM,CAAC,EAAE,MAAM,CAAC;QAChB,KAAK,CAAC,EAAE,MAAM,CAAC;KAChB,CAAC;IACF;;;OAGG;IACH,SAAS,CAAC,EAAE,OAAO,CAAC;IACpB;;;OAGG;IACH,YAAY,CAAC,EAAE,OAAO,CAAC;IACvB,QAAQ,CAAC,EAAE,MAAM,CAAC;IAClB,aAAa,CAAC,EAAE,MAAM,CAAC;IACvB,cAAc,CAAC,EAAE,MAAM,CAAC;CACzB;AA0ED;;;;;;;;;;GAUG;AACH,wBAAgB,YAAY,CAAC,IAAI,EAAE,OAAO,EAAE,IAAI,EAAE,WAAW,GAAG,WAAW,EAAE,CAiO5E"}
package/dist/chunker.js CHANGED
@@ -1,127 +1,269 @@
1
1
  import { walkDocNode } from "./ast-walker.js";
2
2
  import { extractOutline } from "./outline.js";
3
3
  const CHARS_PER_TOKEN = 4;
4
+ const PREVIEW_CHARS = 250;
4
5
  function estimateTokens(text) {
5
6
  return Math.ceil(text.length / CHARS_PER_TOKEN);
6
7
  }
8
+ function makePreview(content) {
9
+ if (content.length <= PREVIEW_CHARS)
10
+ return content;
11
+ const cut = content.slice(0, PREVIEW_CHARS);
12
+ const lastSpace = cut.lastIndexOf(" ");
13
+ return lastSpace > 0 ? cut.slice(0, lastSpace) : cut;
14
+ }
15
+ function makeAnchorText(breadcrumb, rawContent) {
16
+ const prefix = breadcrumb.length > 0 ? breadcrumb.join(" › ") + ". " : "";
17
+ const dotIdx = rawContent.search(/[.!?]\s/);
18
+ const firstSentence = dotIdx > 0 ? rawContent.slice(0, dotIdx + 1) : rawContent.slice(0, 150);
19
+ return (prefix + firstSentence).slice(0, 250);
20
+ }
21
+ function makeSparseTerms(text) {
22
+ return text
23
+ .toLowerCase()
24
+ .split(/\W+/)
25
+ .filter((t) => t.length > 2);
26
+ }
27
+ function sameBreadcrumb(a, b) {
28
+ return a.length === b.length && a.every((v, i) => v === b[i]);
29
+ }
7
30
  /**
8
- * Walk a ViDoc AST and produce Chunk[] with full ChunkMeta.
31
+ * Pre-split text segments that individually exceed maxTokens into maxTokens-sized
32
+ * pieces. Used when recursive=true to avoid content loss on hard-cut.
33
+ */
34
+ function splitOversized(seg, maxTokens) {
35
+ const maxChars = maxTokens * CHARS_PER_TOKEN;
36
+ if (seg.text.length <= maxChars)
37
+ return [seg];
38
+ const parts = [];
39
+ let pos = 0;
40
+ while (pos < seg.text.length) {
41
+ const text = seg.text.slice(pos, pos + maxChars);
42
+ parts.push({
43
+ ...seg,
44
+ text,
45
+ attrs: {
46
+ ...seg.attrs,
47
+ byteStart: seg.attrs.byteStart + pos,
48
+ byteEnd: seg.attrs.byteStart + pos + text.length,
49
+ },
50
+ });
51
+ pos += maxChars;
52
+ }
53
+ return parts;
54
+ }
55
+ /**
56
+ * Walk a ViDoc AST and produce one ArtifactSet per logical window.
57
+ *
58
+ * Each window is split at paragraph/segment boundaries when the accumulated
59
+ * token count reaches maxTokens. A section boundary (breadcrumb change) also
60
+ * flushes the current window. Level 2 modifiers (overlap, boundaryPadding,
61
+ * recursive, adaptiveSize) refine this behaviour.
9
62
  *
10
- * Splits at paragraph boundaries when the buffer reaches maxTokens.
11
- * Merges trailing windows shorter than minTokens into the predecessor.
63
+ * finalAnswerChunk.content includes a heading prefix derived from the window's
64
+ * breadcrumb so the LLM always has heading context.
12
65
  */
13
66
  export function walkToChunks(root, opts) {
14
67
  const maxTokens = opts.maxTokens ?? 512;
15
68
  const minTokens = opts.minTokens ?? Math.floor(maxTokens / 4);
69
+ const overlap = Math.min(Math.max(opts.overlap ?? 0, 0), 0.9);
70
+ const padBefore = opts.boundaryPadding?.before ?? 0;
71
+ const padAfter = opts.boundaryPadding?.after ?? 0;
72
+ const recursive = opts.recursive ?? false;
73
+ const adaptiveSize = opts.adaptiveSize ?? false;
16
74
  const documentOutline = extractOutline(root);
17
- const segments = walkDocNode(root);
18
- if (segments.length === 0)
75
+ const rawSegments = walkDocNode(root);
76
+ if (rawSegments.length === 0)
19
77
  return [];
78
+ // Pre-split oversized segments in recursive mode to avoid content loss.
79
+ const segments = recursive
80
+ ? rawSegments.flatMap((s) => splitOversized(s, maxTokens))
81
+ : rawSegments;
82
+ // ── Build windows ──────────────────────────────────────────────────────────
20
83
  const windows = [];
21
- let current = {
22
- texts: [],
23
- byteStart: segments[0].attrs.byteStart,
24
- byteEnd: segments[0].attrs.byteEnd,
25
- breadcrumb: segments[0].breadcrumb,
26
- truncated: false,
27
- };
28
- let currentTokens = 0;
29
- for (const seg of segments) {
30
- const segTokens = estimateTokens(seg.text);
31
- // If adding this segment would overflow and we already have content, flush.
32
- if (currentTokens > 0 && currentTokens + segTokens > maxTokens) {
33
- windows.push(current);
34
- current = {
35
- texts: [],
36
- byteStart: seg.attrs.byteStart,
37
- byteEnd: seg.attrs.byteEnd,
38
- breadcrumb: seg.breadcrumb,
39
- truncated: false,
40
- };
41
- currentTokens = 0;
84
+ // Track which segment index each window starts/ends at (for boundaryPadding).
85
+ const windowSegBounds = [];
86
+ let startIdx = 0;
87
+ while (startIdx < segments.length) {
88
+ const firstSeg = segments[startIdx];
89
+ const win = {
90
+ texts: [],
91
+ byteStart: firstSeg.attrs.byteStart,
92
+ byteEnd: firstSeg.attrs.byteEnd,
93
+ breadcrumb: firstSeg.breadcrumb,
94
+ truncated: false,
95
+ };
96
+ let currentTokens = 0;
97
+ let idx = startIdx;
98
+ while (idx < segments.length) {
99
+ const seg = segments[idx];
100
+ const isCompact = adaptiveSize &&
101
+ (seg.nodeType === "code" || seg.nodeType === "table-cell");
102
+ const effectiveMax = isCompact ? Math.ceil(maxTokens / 2) : maxTokens;
103
+ const segTokens = estimateTokens(seg.text);
104
+ // Flush if entering a new section (different breadcrumb).
105
+ if (currentTokens > 0 && !sameBreadcrumb(seg.breadcrumb, win.breadcrumb)) {
106
+ break;
107
+ }
108
+ // Flush if adding this segment would overflow an already-populated window.
109
+ if (currentTokens > 0 && currentTokens + segTokens > effectiveMax) {
110
+ break;
111
+ }
112
+ // Hard-cut a single oversized segment (only reached when recursive=false).
113
+ if (segTokens > maxTokens) {
114
+ const maxChars = maxTokens * CHARS_PER_TOKEN;
115
+ win.texts.push(seg.text.slice(0, maxChars));
116
+ win.byteEnd = seg.attrs.byteEnd;
117
+ win.truncated = true;
118
+ idx++;
119
+ break;
120
+ }
121
+ win.texts.push(seg.text);
122
+ win.byteEnd = seg.attrs.byteEnd;
123
+ if (win.lineStart == null && seg.attrs.lineStart != null)
124
+ win.lineStart = seg.attrs.lineStart;
125
+ if (seg.attrs.lineEnd != null)
126
+ win.lineEnd = seg.attrs.lineEnd;
127
+ if (win.pageStart == null && seg.attrs.pageNumber != null)
128
+ win.pageStart = seg.attrs.pageNumber;
129
+ if (seg.attrs.pageNumber != null)
130
+ win.pageEnd = seg.attrs.pageNumber;
131
+ if (!win.lang && seg.attrs.lang)
132
+ win.lang = seg.attrs.lang;
133
+ if (!win.codeLanguage && seg.attrs.codeLanguage)
134
+ win.codeLanguage = seg.attrs.codeLanguage;
135
+ currentTokens += segTokens;
136
+ idx++;
42
137
  }
43
- // If a single segment exceeds maxTokens, hard-cut it.
44
- if (segTokens > maxTokens) {
45
- const maxChars = maxTokens * CHARS_PER_TOKEN;
46
- current.texts.push(seg.text.slice(0, maxChars));
47
- current.byteEnd = seg.attrs.byteEnd;
48
- current.truncated = true;
49
- windows.push(current);
50
- current = {
51
- texts: [],
52
- byteStart: seg.attrs.byteEnd,
53
- byteEnd: seg.attrs.byteEnd,
54
- breadcrumb: seg.breadcrumb,
55
- truncated: false,
56
- };
57
- currentTokens = 0;
58
- continue;
138
+ if (win.texts.length > 0) {
139
+ windows.push(win);
140
+ windowSegBounds.push({ start: startIdx, end: idx });
141
+ // Compute the next start index, accounting for overlap.
142
+ if (overlap > 0 && idx > startIdx + 1) {
143
+ // Walk backwards from idx until we've accumulated overlap * currentTokens.
144
+ const targetOverlap = currentTokens * overlap;
145
+ let accumulated = 0;
146
+ let back = idx - 1;
147
+ while (back > startIdx && accumulated < targetOverlap) {
148
+ accumulated += estimateTokens(segments[back].text);
149
+ back--;
150
+ }
151
+ startIdx = Math.max(startIdx + 1, back + 1);
152
+ }
153
+ else {
154
+ startIdx = idx;
155
+ }
156
+ }
157
+ else {
158
+ // Safety: always advance to avoid infinite loop.
159
+ startIdx++;
59
160
  }
60
- current.texts.push(seg.text);
61
- current.byteEnd = seg.attrs.byteEnd;
62
- if (current.lineStart == null && seg.attrs.lineStart != null)
63
- current.lineStart = seg.attrs.lineStart;
64
- if (seg.attrs.lineEnd != null)
65
- current.lineEnd = seg.attrs.lineEnd;
66
- if (current.pageStart == null && seg.attrs.pageNumber != null)
67
- current.pageStart = seg.attrs.pageNumber;
68
- if (seg.attrs.pageNumber != null)
69
- current.pageEnd = seg.attrs.pageNumber;
70
- if (!current.lang && seg.attrs.lang)
71
- current.lang = seg.attrs.lang;
72
- if (!current.codeLanguage && seg.attrs.codeLanguage)
73
- current.codeLanguage = seg.attrs.codeLanguage;
74
- currentTokens += segTokens;
75
- }
76
- if (current.texts.length > 0) {
77
- windows.push(current);
78
161
  }
79
- // Merge trailing window into predecessor if it is below minTokens.
162
+ // ── Merge trailing short window into predecessor (same section only) ───────
80
163
  if (windows.length > 1) {
81
164
  const last = windows[windows.length - 1];
165
+ const prev = windows[windows.length - 2];
82
166
  const lastTokens = estimateTokens(last.texts.join("\n\n"));
83
- if (lastTokens < minTokens) {
84
- const prev = windows[windows.length - 2];
167
+ if (lastTokens < minTokens && sameBreadcrumb(last.breadcrumb, prev.breadcrumb)) {
85
168
  prev.texts.push(...last.texts);
86
169
  prev.byteEnd = last.byteEnd;
87
170
  prev.lineEnd = last.lineEnd;
88
171
  prev.pageEnd = last.pageEnd;
172
+ const lastBounds = windowSegBounds.pop();
173
+ windowSegBounds[windowSegBounds.length - 1].end = lastBounds.end;
89
174
  windows.pop();
90
175
  }
91
176
  }
92
177
  const totalChunks = windows.length;
93
- return windows.map((win, i) => {
94
- const content = win.texts.join("\n\n");
95
- const meta = {
178
+ // ── Build ArtifactSet[] ───────────────────────────────────────────────────
179
+ const artifacts = windows.map((win, i) => {
180
+ const rawContent = win.texts.join("\n\n");
181
+ const headingPrefix = win.breadcrumb.length > 0
182
+ ? win.breadcrumb.map((h, k) => "#".repeat(k + 1) + " " + h).join("\n") +
183
+ "\n\n"
184
+ : "";
185
+ const content = headingPrefix + rawContent;
186
+ const id = `${opts.sourceFile}:${i}`;
187
+ const filterMeta = {
96
188
  sourceFile: opts.sourceFile,
97
189
  sourceFormat: opts.sourceFormat,
190
+ breadcrumb: win.breadcrumb,
98
191
  byteStart: win.byteStart,
99
192
  byteEnd: win.byteEnd,
100
193
  lineStart: win.lineStart,
101
194
  lineEnd: win.lineEnd,
102
195
  pageStart: win.pageStart,
103
196
  pageEnd: win.pageEnd,
104
- breadcrumb: win.breadcrumb,
105
- sectionTitle: win.breadcrumb.at(-1),
106
- headingLevel: win.breadcrumb.length > 0 ? win.breadcrumb.length : undefined,
107
- documentOutline,
108
197
  lang: win.lang,
109
198
  codeLanguage: win.codeLanguage,
110
- strategy: opts.strategy,
111
199
  chunkIndex: i,
112
200
  totalChunks,
113
- estimatedTokens: estimateTokens(content),
114
- truncated: win.truncated,
115
- ...(opts.fileHash ? { fileHash: opts.fileHash } : {}),
116
- ...(opts.fileSizeBytes != null ? { fileSizeBytes: opts.fileSizeBytes } : {}),
117
- ...(opts.fileModifiedAt ? { fileModifiedAt: opts.fileModifiedAt } : {}),
201
+ strategy: opts.strategy,
202
+ estimatedTokens: estimateTokens(rawContent),
203
+ ...(opts.fileHash != null ? { fileHash: opts.fileHash } : {}),
204
+ ...(opts.fileModifiedAt != null
205
+ ? { fileModifiedAt: opts.fileModifiedAt }
206
+ : {}),
207
+ ...(opts.fileSizeBytes != null
208
+ ? { fileSizeBytes: opts.fileSizeBytes }
209
+ : {}),
210
+ };
211
+ const fullMeta = {
212
+ ...filterMeta,
213
+ sectionTitle: win.breadcrumb.at(-1),
214
+ headingLevel: win.breadcrumb.length > 0 ? win.breadcrumb.length : undefined,
215
+ documentOutline,
216
+ truncated: win.truncated || undefined,
118
217
  };
119
218
  return {
120
- content,
121
- metadata: meta,
122
219
  sourceFile: opts.sourceFile,
123
220
  commitHash: opts.commitHash,
221
+ searchRepresentation: {
222
+ id,
223
+ anchorText: makeAnchorText(win.breadcrumb, rawContent),
224
+ sparseTerms: makeSparseTerms(rawContent),
225
+ filterMetadata: filterMeta,
226
+ },
227
+ candidateChunk: {
228
+ id,
229
+ preview: makePreview(content),
230
+ fullMeta,
231
+ },
232
+ finalAnswerChunk: {
233
+ id,
234
+ content,
235
+ },
124
236
  };
125
237
  });
238
+ // ── Assign sibling IDs ────────────────────────────────────────────────────
239
+ for (let i = 0; i < artifacts.length; i++) {
240
+ const a = artifacts[i];
241
+ if (i > 0)
242
+ a.candidateChunk.fullMeta.siblingPrev =
243
+ artifacts[i - 1].searchRepresentation.id;
244
+ if (i < artifacts.length - 1)
245
+ a.candidateChunk.fullMeta.siblingNext =
246
+ artifacts[i + 1].searchRepresentation.id;
247
+ }
248
+ // ── Apply boundary padding to FinalAnswerChunk ────────────────────────────
249
+ if (padBefore > 0 || padAfter > 0) {
250
+ for (let i = 0; i < artifacts.length; i++) {
251
+ const parts = [];
252
+ if (padBefore > 0 && i > 0) {
253
+ const prevTexts = windows[i - 1].texts;
254
+ parts.push(prevTexts.slice(-padBefore).join("\n\n"));
255
+ }
256
+ parts.push(artifacts[i].finalAnswerChunk.content);
257
+ if (padAfter > 0 && i < artifacts.length - 1) {
258
+ const nextTexts = windows[i + 1].texts;
259
+ parts.push(nextTexts.slice(0, padAfter).join("\n\n"));
260
+ }
261
+ const padded = parts.filter(Boolean).join("\n\n");
262
+ if (padded !== artifacts[i].finalAnswerChunk.content) {
263
+ artifacts[i].finalAnswerChunk.paddedContent = padded;
264
+ }
265
+ }
266
+ }
267
+ return artifacts;
126
268
  }
127
269
  //# sourceMappingURL=chunker.js.map
@@ -1 +1 @@
1
- {"version":3,"file":"chunker.js","sourceRoot":"","sources":["../src/chunker.ts"],"names":[],"mappings":"AACA,OAAO,EAAE,WAAW,EAAE,MAAM,iBAAiB,CAAC;AAC9C,OAAO,EAAE,cAAc,EAAE,MAAM,cAAc,CAAC;AAsB9C,MAAM,eAAe,GAAG,CAAC,CAAC;AAE1B,SAAS,cAAc,CAAC,IAAY;IAClC,OAAO,IAAI,CAAC,IAAI,CAAC,IAAI,CAAC,MAAM,GAAG,eAAe,CAAC,CAAC;AAClD,CAAC;AAED;;;;;GAKG;AACH,MAAM,UAAU,YAAY,CAAC,IAAa,EAAE,IAAiB;IAC3D,MAAM,SAAS,GAAG,IAAI,CAAC,SAAS,IAAI,GAAG,CAAC;IACxC,MAAM,SAAS,GAAG,IAAI,CAAC,SAAS,IAAI,IAAI,CAAC,KAAK,CAAC,SAAS,GAAG,CAAC,CAAC,CAAC;IAC9D,MAAM,eAAe,GAAG,cAAc,CAAC,IAAI,CAAC,CAAC;IAC7C,MAAM,QAAQ,GAAG,WAAW,CAAC,IAAI,CAAC,CAAC;IAEnC,IAAI,QAAQ,CAAC,MAAM,KAAK,CAAC;QAAE,OAAO,EAAE,CAAC;IAiBrC,MAAM,OAAO,GAAa,EAAE,CAAC;IAC7B,IAAI,OAAO,GAAW;QACpB,KAAK,EAAE,EAAE;QACT,SAAS,EAAE,QAAQ,CAAC,CAAC,CAAE,CAAC,KAAK,CAAC,SAAS;QACvC,OAAO,EAAE,QAAQ,CAAC,CAAC,CAAE,CAAC,KAAK,CAAC,OAAO;QACnC,UAAU,EAAE,QAAQ,CAAC,CAAC,CAAE,CAAC,UAAU;QACnC,SAAS,EAAE,KAAK;KACjB,CAAC;IACF,IAAI,aAAa,GAAG,CAAC,CAAC;IAEtB,KAAK,MAAM,GAAG,IAAI,QAAQ,EAAE,CAAC;QAC3B,MAAM,SAAS,GAAG,cAAc,CAAC,GAAG,CAAC,IAAI,CAAC,CAAC;QAE3C,4EAA4E;QAC5E,IAAI,aAAa,GAAG,CAAC,IAAI,aAAa,GAAG,SAAS,GAAG,SAAS,EAAE,CAAC;YAC/D,OAAO,CAAC,IAAI,CAAC,OAAO,CAAC,CAAC;YACtB,OAAO,GAAG;gBACR,KAAK,EAAE,EAAE;gBACT,SAAS,EAAE,GAAG,CAAC,KAAK,CAAC,SAAS;gBAC9B,OAAO,EAAE,GAAG,CAAC,KAAK,CAAC,OAAO;gBAC1B,UAAU,EAAE,GAAG,CAAC,UAAU;gBAC1B,SAAS,EAAE,KAAK;aACjB,CAAC;YACF,aAAa,GAAG,CAAC,CAAC;QACpB,CAAC;QAED,sDAAsD;QACtD,IAAI,SAAS,GAAG,SAAS,EAAE,CAAC;YAC1B,MAAM,QAAQ,GAAG,SAAS,GAAG,eAAe,CAAC;YAC7C,OAAO,CAAC,KAAK,CAAC,IAAI,CAAC,GAAG,CAAC,IAAI,CAAC,KAAK,CAAC,CAAC,EAAE,QAAQ,CAAC,CAAC,CAAC;YAChD,OAAO,CAAC,OAAO,GAAG,GAAG,CAAC,KAAK,CAAC,OAAO,CAAC;YACpC,OAAO,CAAC,SAAS,GAAG,IAAI,CAAC;YACzB,OAAO,CAAC,IAAI,CAAC,OAAO,CAAC,CAAC;YACtB,OAAO,GAAG;gBACR,KAAK,EAAE,EAAE;gBACT,SAAS,EAAE,GAAG,CAAC,KAAK,CAAC,OAAO;gBAC5B,OAAO,EAAE,GAAG,CAAC,KAAK,CAAC,OAAO;gBAC1B,UAAU,EAAE,GAAG,CAAC,UAAU;gBAC1B,SAAS,EAAE,KAAK;aACjB,CAAC;YACF,aAAa,GAAG,CAAC,CAAC;YAClB,SAAS;QACX,CAAC;QAED,OAAO,CAAC,KAAK,CAAC,IAAI,CAAC,GAAG,CAAC,IAAI,CAAC,CAAC;QAC7B,OAAO,CAAC,OAAO,GAAG,GAAG,CAAC,KAAK,CAAC,OAAO,CAAC;QACpC,IAAI,OAAO,CAAC,SAAS,IAAI,IAAI,IAAI,GAAG,CAAC,KAAK,CAAC,SAAS,IAAI,IAAI;YAAE,OAAO,CAAC,SAAS,GAAG,GAAG,CAAC,KAAK,CAAC,SAAS,CAAC;QACtG,IAAI,GAAG,CAAC,KAAK,CAAC,OAAO,IAAI,IAAI;YAAE,OAAO,CAAC,OAAO,GAAG,GAAG,CAAC,KAAK,CAAC,OAAO,CAAC;QACnE,IAAI,OAAO,CAAC,SAAS,IAAI,IAAI,IAAI,GAAG,CAAC,KAAK,CAAC,UAAU,IAAI,IAAI;YAAE,OAAO,CAAC,SAAS,GAAG,GAAG,CAAC,KAAK,CAAC,UAAU,CAAC;QACxG,IAAI,GAAG,CAAC,KAAK,CAAC,UAAU,IAAI,IAAI;YAAE,OAAO,CAAC,OAAO,GAAG,GAAG,CAAC,KAAK,CAAC,UAAU,CAAC;QACzE,IAAI,CAAC,OAAO,CAAC,IAAI,IAAI,GAAG,CAAC,KAAK,CAAC,IAAI;YAAE,OAAO,CAAC,IAAI,GAAG,GAAG,CAAC,KAAK,CAAC,IAAI,CAAC;QACnE,IAAI,CAAC,OAAO,CAAC,YAAY,IAAI,GAAG,CAAC,KAAK,CAAC,YAAY;YAAE,OAAO,CAAC,YAAY,GAAG,GAAG,CAAC,KAAK,CAAC,YAAY,CAAC;QACnG,aAAa,IAAI,SAAS,CAAC;IAC7B,CAAC;IAED,IAAI,OAAO,CAAC,KAAK,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;QAC7B,OAAO,CAAC,IAAI,CAAC,OAAO,CAAC,CAAC;IACxB,CAAC;IAED,mEAAmE;IACnE,IAAI,OAAO,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;QACvB,MAAM,IAAI,GAAG,OAAO,CAAC,OAAO,CAAC,MAAM,GAAG,CAAC,CAAE,CAAC;QAC1C,MAAM,UAAU,GAAG,cAAc,CAAC,IAAI,CAAC,KAAK,CAAC,IAAI,CAAC,MAAM,CAAC,CAAC,CAAC;QAC3D,IAAI,UAAU,GAAG,SAAS,EAAE,CAAC;YAC3B,MAAM,IAAI,GAAG,OAAO,CAAC,OAAO,CAAC,MAAM,GAAG,CAAC,CAAE,CAAC;YAC1C,IAAI,CAAC,KAAK,CAAC,IAAI,CAAC,GAAG,IAAI,CAAC,KAAK,CAAC,CAAC;YAC/B,IAAI,CAAC,OAAO,GAAG,IAAI,CAAC,OAAO,CAAC;YAC5B,IAAI,CAAC,OAAO,GAAG,IAAI,CAAC,OAAO,CAAC;YAC5B,IAAI,CAAC,OAAO,GAAG,IAAI,CAAC,OAAO,CAAC;YAC5B,OAAO,CAAC,GAAG,EAAE,CAAC;QAChB,CAAC;IACH,CAAC;IAED,MAAM,WAAW,GAAG,OAAO,CAAC,MAAM,CAAC;IAEnC,OAAO,OAAO,CAAC,GAAG,CAAC,CAAC,GAAG,EAAE,CAAC,EAAE,EAAE;QAC5B,MAAM,OAAO,GAAG,GAAG,CAAC,KAAK,CAAC,IAAI,CAAC,MAAM,CAAC,CAAC;QACvC,MAAM,IAAI,GAAc;YACtB,UAAU,EAAE,IAAI,CAAC,UAAU;YAC3B,YAAY,EAAE,IAAI,CAAC,YAAY;YAC/B,SAAS,EAAE,GAAG,CAAC,SAAS;YACxB,OAAO,EAAE,GAAG,CAAC,OAAO;YACpB,SAAS,EAAE,GAAG,CAAC,SAAS;YACxB,OAAO,EAAE,GAAG,CAAC,OAAO;YACpB,SAAS,EAAE,GAAG,CAAC,SAAS;YACxB,OAAO,EAAE,GAAG,CAAC,OAAO;YACpB,UAAU,EAAE,GAAG,CAAC,UAAU;YAC1B,YAAY,EAAE,GAAG,CAAC,UAAU,CAAC,EAAE,CAAC,CAAC,CAAC,CAAC;YACnC,YAAY,EAAE,GAAG,CAAC,UAAU,CAAC,MAAM,GAAG,CAAC,CAAC,CAAC,CAAC,GAAG,CAAC,UAAU,CAAC,MAAM,CAAC,CAAC,CAAC,SAAS;YAC3E,eAAe;YACf,IAAI,EAAE,GAAG,CAAC,IAAI;YACd,YAAY,EAAE,GAAG,CAAC,YAAY;YAC9B,QAAQ,EAAE,IAAI,CAAC,QAAQ;YACvB,UAAU,EAAE,CAAC;YACb,WAAW;YACX,eAAe,EAAE,cAAc,CAAC,OAAO,CAAC;YACxC,SAAS,EAAE,GAAG,CAAC,SAAS;YACxB,GAAG,CAAC,IAAI,CAAC,QAAQ,CAAC,CAAC,CAAC,EAAE,QAAQ,EAAE,IAAI,CAAC,QAAQ,EAAE,CAAC,CAAC,CAAC,EAAE,CAAC;YACrD,GAAG,CAAC,IAAI,CAAC,aAAa,IAAI,IAAI,CAAC,CAAC,CAAC,EAAE,aAAa,EAAE,IAAI,CAAC,aAAa,EAAE,CAAC,CAAC,CAAC,EAAE,CAAC;YAC5E,GAAG,CAAC,IAAI,CAAC,cAAc,CAAC,CAAC,CAAC,EAAE,cAAc,EAAE,IAAI,CAAC,cAAc,EAAE,CAAC,CAAC,CAAC,EAAE,CAAC;SACxE,CAAC;QAEF,OAAO;YACL,OAAO;YACP,QAAQ,EAAE,IAAI;YACd,UAAU,EAAE,IAAI,CAAC,UAAU;YAC3B,UAAU,EAAE,IAAI,CAAC,UAAU;SAC5B,CAAC;IACJ,CAAC,CAAC,CAAC;AACL,CAAC"}
1
+ {"version":3,"file":"chunker.js","sourceRoot":"","sources":["../src/chunker.ts"],"names":[],"mappings":"AACA,OAAO,EAAE,WAAW,EAAE,MAAM,iBAAiB,CAAC;AAC9C,OAAO,EAAE,cAAc,EAAE,MAAM,cAAc,CAAC;AAyC9C,MAAM,eAAe,GAAG,CAAC,CAAC;AAC1B,MAAM,aAAa,GAAG,GAAG,CAAC;AAE1B,SAAS,cAAc,CAAC,IAAY;IAClC,OAAO,IAAI,CAAC,IAAI,CAAC,IAAI,CAAC,MAAM,GAAG,eAAe,CAAC,CAAC;AAClD,CAAC;AAED,SAAS,WAAW,CAAC,OAAe;IAClC,IAAI,OAAO,CAAC,MAAM,IAAI,aAAa;QAAE,OAAO,OAAO,CAAC;IACpD,MAAM,GAAG,GAAG,OAAO,CAAC,KAAK,CAAC,CAAC,EAAE,aAAa,CAAC,CAAC;IAC5C,MAAM,SAAS,GAAG,GAAG,CAAC,WAAW,CAAC,GAAG,CAAC,CAAC;IACvC,OAAO,SAAS,GAAG,CAAC,CAAC,CAAC,CAAC,GAAG,CAAC,KAAK,CAAC,CAAC,EAAE,SAAS,CAAC,CAAC,CAAC,CAAC,GAAG,CAAC;AACvD,CAAC;AAED,SAAS,cAAc,CAAC,UAAoB,EAAE,UAAkB;IAC9D,MAAM,MAAM,GAAG,UAAU,CAAC,MAAM,GAAG,CAAC,CAAC,CAAC,CAAC,UAAU,CAAC,IAAI,CAAC,KAAK,CAAC,GAAG,IAAI,CAAC,CAAC,CAAC,EAAE,CAAC;IAC1E,MAAM,MAAM,GAAG,UAAU,CAAC,MAAM,CAAC,SAAS,CAAC,CAAC;IAC5C,MAAM,aAAa,GACjB,MAAM,GAAG,CAAC,CAAC,CAAC,CAAC,UAAU,CAAC,KAAK,CAAC,CAAC,EAAE,MAAM,GAAG,CAAC,CAAC,CAAC,CAAC,CAAC,UAAU,CAAC,KAAK,CAAC,CAAC,EAAE,GAAG,CAAC,CAAC;IAC1E,OAAO,CAAC,MAAM,GAAG,aAAa,CAAC,CAAC,KAAK,CAAC,CAAC,EAAE,GAAG,CAAC,CAAC;AAChD,CAAC;AAED,SAAS,eAAe,CAAC,IAAY;IACnC,OAAO,IAAI;SACR,WAAW,EAAE;SACb,KAAK,CAAC,KAAK,CAAC;SACZ,MAAM,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,MAAM,GAAG,CAAC,CAAC,CAAC;AACjC,CAAC;AAED,SAAS,cAAc,CAAC,CAAW,EAAE,CAAW;IAC9C,OAAO,CAAC,CAAC,MAAM,KAAK,CAAC,CAAC,MAAM,IAAI,CAAC,CAAC,KAAK,CAAC,CAAC,CAAC,EAAE,CAAC,EAAE,EAAE,CAAC,CAAC,KAAK,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC;AAChE,CAAC;AAED;;;GAGG;AACH,SAAS,cAAc,CAAC,GAAgB,EAAE,SAAiB;IACzD,MAAM,QAAQ,GAAG,SAAS,GAAG,eAAe,CAAC;IAC7C,IAAI,GAAG,CAAC,IAAI,CAAC,MAAM,IAAI,QAAQ;QAAE,OAAO,CAAC,GAAG,CAAC,CAAC;IAC9C,MAAM,KAAK,GAAkB,EAAE,CAAC;IAChC,IAAI,GAAG,GAAG,CAAC,CAAC;IACZ,OAAO,GAAG,GAAG,GAAG,CAAC,IAAI,CAAC,MAAM,EAAE,CAAC;QAC7B,MAAM,IAAI,GAAG,GAAG,CAAC,IAAI,CAAC,KAAK,CAAC,GAAG,EAAE,GAAG,GAAG,QAAQ,CAAC,CAAC;QACjD,KAAK,CAAC,IAAI,CAAC;YACT,GAAG,GAAG;YACN,IAAI;YACJ,KAAK,EAAE;gBACL,GAAG,GAAG,CAAC,KAAK;gBACZ,SAAS,EAAE,GAAG,CAAC,KAAK,CAAC,SAAS,GAAG,GAAG;gBACpC,OAAO,EAAE,GAAG,CAAC,KAAK,CAAC,SAAS,GAAG,GAAG,GAAG,IAAI,CAAC,MAAM;aACjD;SACF,CAAC,CAAC;QACH,GAAG,IAAI,QAAQ,CAAC;IAClB,CAAC;IACD,OAAO,KAAK,CAAC;AACf,CAAC;AAgBD;;;;;;;;;;GAUG;AACH,MAAM,UAAU,YAAY,CAAC,IAAa,EAAE,IAAiB;IAC3D,MAAM,SAAS,GAAG,IAAI,CAAC,SAAS,IAAI,GAAG,CAAC;IACxC,MAAM,SAAS,GAAG,IAAI,CAAC,SAAS,IAAI,IAAI,CAAC,KAAK,CAAC,SAAS,GAAG,CAAC,CAAC,CAAC;IAC9D,MAAM,OAAO,GAAG,IAAI,CAAC,GAAG,CAAC,IAAI,CAAC,GAAG,CAAC,IAAI,CAAC,OAAO,IAAI,CAAC,EAAE,CAAC,CAAC,EAAE,GAAG,CAAC,CAAC;IAC9D,MAAM,SAAS,GAAG,IAAI,CAAC,eAAe,EAAE,MAAM,IAAI,CAAC,CAAC;IACpD,MAAM,QAAQ,GAAG,IAAI,CAAC,eAAe,EAAE,KAAK,IAAI,CAAC,CAAC;IAClD,MAAM,SAAS,GAAG,IAAI,CAAC,SAAS,IAAI,KAAK,CAAC;IAC1C,MAAM,YAAY,GAAG,IAAI,CAAC,YAAY,IAAI,KAAK,CAAC;IAEhD,MAAM,eAAe,GAAG,cAAc,CAAC,IAAI,CAAC,CAAC;IAC7C,MAAM,WAAW,GAAG,WAAW,CAAC,IAAI,CAAC,CAAC;IAEtC,IAAI,WAAW,CAAC,MAAM,KAAK,CAAC;QAAE,OAAO,EAAE,CAAC;IAExC,wEAAwE;IACxE,MAAM,QAAQ,GAAkB,SAAS;QACvC,CAAC,CAAC,WAAW,CAAC,OAAO,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,cAAc,CAAC,CAAC,EAAE,SAAS,CAAC,CAAC;QAC1D,CAAC,CAAC,WAAW,CAAC;IAEhB,8EAA8E;IAE9E,MAAM,OAAO,GAAa,EAAE,CAAC;IAC7B,8EAA8E;IAC9E,MAAM,eAAe,GAA0C,EAAE,CAAC;IAClE,IAAI,QAAQ,GAAG,CAAC,CAAC;IAEjB,OAAO,QAAQ,GAAG,QAAQ,CAAC,MAAM,EAAE,CAAC;QAClC,MAAM,QAAQ,GAAG,QAAQ,CAAC,QAAQ,CAAE,CAAC;QACrC,MAAM,GAAG,GAAW;YAClB,KAAK,EAAE,EAAE;YACT,SAAS,EAAE,QAAQ,CAAC,KAAK,CAAC,SAAS;YACnC,OAAO,EAAE,QAAQ,CAAC,KAAK,CAAC,OAAO;YAC/B,UAAU,EAAE,QAAQ,CAAC,UAAU;YAC/B,SAAS,EAAE,KAAK;SACjB,CAAC;QACF,IAAI,aAAa,GAAG,CAAC,CAAC;QACtB,IAAI,GAAG,GAAG,QAAQ,CAAC;QAEnB,OAAO,GAAG,GAAG,QAAQ,CAAC,MAAM,EAAE,CAAC;YAC7B,MAAM,GAAG,GAAG,QAAQ,CAAC,GAAG,CAAE,CAAC;YAC3B,MAAM,SAAS,GACb,YAAY;gBACZ,CAAC,GAAG,CAAC,QAAQ,KAAK,MAAM,IAAI,GAAG,CAAC,QAAQ,KAAK,YAAY,CAAC,CAAC;YAC7D,MAAM,YAAY,GAAG,SAAS,CAAC,CAAC,CAAC,IAAI,CAAC,IAAI,CAAC,SAAS,GAAG,CAAC,CAAC,CAAC,CAAC,CAAC,SAAS,CAAC;YACtE,MAAM,SAAS,GAAG,cAAc,CAAC,GAAG,CAAC,IAAI,CAAC,CAAC;YAE3C,0DAA0D;YAC1D,IAAI,aAAa,GAAG,CAAC,IAAI,CAAC,cAAc,CAAC,GAAG,CAAC,UAAU,EAAE,GAAG,CAAC,UAAU,CAAC,EAAE,CAAC;gBACzE,MAAM;YACR,CAAC;YAED,2EAA2E;YAC3E,IAAI,aAAa,GAAG,CAAC,IAAI,aAAa,GAAG,SAAS,GAAG,YAAY,EAAE,CAAC;gBAClE,MAAM;YACR,CAAC;YAED,2EAA2E;YAC3E,IAAI,SAAS,GAAG,SAAS,EAAE,CAAC;gBAC1B,MAAM,QAAQ,GAAG,SAAS,GAAG,eAAe,CAAC;gBAC7C,GAAG,CAAC,KAAK,CAAC,IAAI,CAAC,GAAG,CAAC,IAAI,CAAC,KAAK,CAAC,CAAC,EAAE,QAAQ,CAAC,CAAC,CAAC;gBAC5C,GAAG,CAAC,OAAO,GAAG,GAAG,CAAC,KAAK,CAAC,OAAO,CAAC;gBAChC,GAAG,CAAC,SAAS,GAAG,IAAI,CAAC;gBACrB,GAAG,EAAE,CAAC;gBACN,MAAM;YACR,CAAC;YAED,GAAG,CAAC,KAAK,CAAC,IAAI,CAAC,GAAG,CAAC,IAAI,CAAC,CAAC;YACzB,GAAG,CAAC,OAAO,GAAG,GAAG,CAAC,KAAK,CAAC,OAAO,CAAC;YAChC,IAAI,GAAG,CAAC,SAAS,IAAI,IAAI,IAAI,GAAG,CAAC,KAAK,CAAC,SAAS,IAAI,IAAI;gBACtD,GAAG,CAAC,SAAS,GAAG,GAAG,CAAC,KAAK,CAAC,SAAS,CAAC;YACtC,IAAI,GAAG,CAAC,KAAK,CAAC,OAAO,IAAI,IAAI;gBAAE,GAAG,CAAC,OAAO,GAAG,GAAG,CAAC,KAAK,CAAC,OAAO,CAAC;YAC/D,IAAI,GAAG,CAAC,SAAS,IAAI,IAAI,IAAI,GAAG,CAAC,KAAK,CAAC,UAAU,IAAI,IAAI;gBACvD,GAAG,CAAC,SAAS,GAAG,GAAG,CAAC,KAAK,CAAC,UAAU,CAAC;YACvC,IAAI,GAAG,CAAC,KAAK,CAAC,UAAU,IAAI,IAAI;gBAAE,GAAG,CAAC,OAAO,GAAG,GAAG,CAAC,KAAK,CAAC,UAAU,CAAC;YACrE,IAAI,CAAC,GAAG,CAAC,IAAI,IAAI,GAAG,CAAC,KAAK,CAAC,IAAI;gBAAE,GAAG,CAAC,IAAI,GAAG,GAAG,CAAC,KAAK,CAAC,IAAI,CAAC;YAC3D,IAAI,CAAC,GAAG,CAAC,YAAY,IAAI,GAAG,CAAC,KAAK,CAAC,YAAY;gBAC7C,GAAG,CAAC,YAAY,GAAG,GAAG,CAAC,KAAK,CAAC,YAAY,CAAC;YAC5C,aAAa,IAAI,SAAS,CAAC;YAC3B,GAAG,EAAE,CAAC;QACR,CAAC;QAED,IAAI,GAAG,CAAC,KAAK,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;YACzB,OAAO,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC;YAClB,eAAe,CAAC,IAAI,CAAC,EAAE,KAAK,EAAE,QAAQ,EAAE,GAAG,EAAE,GAAG,EAAE,CAAC,CAAC;YAEpD,wDAAwD;YACxD,IAAI,OAAO,GAAG,CAAC,IAAI,GAAG,GAAG,QAAQ,GAAG,CAAC,EAAE,CAAC;gBACtC,2EAA2E;gBAC3E,MAAM,aAAa,GAAG,aAAa,GAAG,OAAO,CAAC;gBAC9C,IAAI,WAAW,GAAG,CAAC,CAAC;gBACpB,IAAI,IAAI,GAAG,GAAG,GAAG,CAAC,CAAC;gBACnB,OAAO,IAAI,GAAG,QAAQ,IAAI,WAAW,GAAG,aAAa,EAAE,CAAC;oBACtD,WAAW,IAAI,cAAc,CAAC,QAAQ,CAAC,IAAI,CAAE,CAAC,IAAI,CAAC,CAAC;oBACpD,IAAI,EAAE,CAAC;gBACT,CAAC;gBACD,QAAQ,GAAG,IAAI,CAAC,GAAG,CAAC,QAAQ,GAAG,CAAC,EAAE,IAAI,GAAG,CAAC,CAAC,CAAC;YAC9C,CAAC;iBAAM,CAAC;gBACN,QAAQ,GAAG,GAAG,CAAC;YACjB,CAAC;QACH,CAAC;aAAM,CAAC;YACN,iDAAiD;YACjD,QAAQ,EAAE,CAAC;QACb,CAAC;IACH,CAAC;IAED,8EAA8E;IAC9E,IAAI,OAAO,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;QACvB,MAAM,IAAI,GAAG,OAAO,CAAC,OAAO,CAAC,MAAM,GAAG,CAAC,CAAE,CAAC;QAC1C,MAAM,IAAI,GAAG,OAAO,CAAC,OAAO,CAAC,MAAM,GAAG,CAAC,CAAE,CAAC;QAC1C,MAAM,UAAU,GAAG,cAAc,CAAC,IAAI,CAAC,KAAK,CAAC,IAAI,CAAC,MAAM,CAAC,CAAC,CAAC;QAC3D,IAAI,UAAU,GAAG,SAAS,IAAI,cAAc,CAAC,IAAI,CAAC,UAAU,EAAE,IAAI,CAAC,UAAU,CAAC,EAAE,CAAC;YAC/E,IAAI,CAAC,KAAK,CAAC,IAAI,CAAC,GAAG,IAAI,CAAC,KAAK,CAAC,CAAC;YAC/B,IAAI,CAAC,OAAO,GAAG,IAAI,CAAC,OAAO,CAAC;YAC5B,IAAI,CAAC,OAAO,GAAG,IAAI,CAAC,OAAO,CAAC;YAC5B,IAAI,CAAC,OAAO,GAAG,IAAI,CAAC,OAAO,CAAC;YAC5B,MAAM,UAAU,GAAG,eAAe,CAAC,GAAG,EAAG,CAAC;YAC1C,eAAe,CAAC,eAAe,CAAC,MAAM,GAAG,CAAC,CAAE,CAAC,GAAG,GAAG,UAAU,CAAC,GAAG,CAAC;YAClE,OAAO,CAAC,GAAG,EAAE,CAAC;QAChB,CAAC;IACH,CAAC;IAED,MAAM,WAAW,GAAG,OAAO,CAAC,MAAM,CAAC;IAEnC,6EAA6E;IAC7E,MAAM,SAAS,GAAkB,OAAO,CAAC,GAAG,CAAC,CAAC,GAAG,EAAE,CAAC,EAAE,EAAE;QACtD,MAAM,UAAU,GAAG,GAAG,CAAC,KAAK,CAAC,IAAI,CAAC,MAAM,CAAC,CAAC;QAC1C,MAAM,aAAa,GACjB,GAAG,CAAC,UAAU,CAAC,MAAM,GAAG,CAAC;YACvB,CAAC,CAAC,GAAG,CAAC,UAAU,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,CAAC,EAAE,EAAE,CAAC,GAAG,CAAC,MAAM,CAAC,CAAC,GAAG,CAAC,CAAC,GAAG,GAAG,GAAG,CAAC,CAAC,CAAC,IAAI,CAAC,IAAI,CAAC;gBACpE,MAAM;YACR,CAAC,CAAC,EAAE,CAAC;QACT,MAAM,OAAO,GAAG,aAAa,GAAG,UAAU,CAAC;QAC3C,MAAM,EAAE,GAAG,GAAG,IAAI,CAAC,UAAU,IAAI,CAAC,EAAE,CAAC;QAErC,MAAM,UAAU,GAAe;YAC7B,UAAU,EAAE,IAAI,CAAC,UAAU;YAC3B,YAAY,EAAE,IAAI,CAAC,YAAY;YAC/B,UAAU,EAAE,GAAG,CAAC,UAAU;YAC1B,SAAS,EAAE,GAAG,CAAC,SAAS;YACxB,OAAO,EAAE,GAAG,CAAC,OAAO;YACpB,SAAS,EAAE,GAAG,CAAC,SAAS;YACxB,OAAO,EAAE,GAAG,CAAC,OAAO;YACpB,SAAS,EAAE,GAAG,CAAC,SAAS;YACxB,OAAO,EAAE,GAAG,CAAC,OAAO;YACpB,IAAI,EAAE,GAAG,CAAC,IAAI;YACd,YAAY,EAAE,GAAG,CAAC,YAAY;YAC9B,UAAU,EAAE,CAAC;YACb,WAAW;YACX,QAAQ,EAAE,IAAI,CAAC,QAAQ;YACvB,eAAe,EAAE,cAAc,CAAC,UAAU,CAAC;YAC3C,GAAG,CAAC,IAAI,CAAC,QAAQ,IAAI,IAAI,CAAC,CAAC,CAAC,EAAE,QAAQ,EAAE,IAAI,CAAC,QAAQ,EAAE,CAAC,CAAC,CAAC,EAAE,CAAC;YAC7D,GAAG,CAAC,IAAI,CAAC,cAAc,IAAI,IAAI;gBAC7B,CAAC,CAAC,EAAE,cAAc,EAAE,IAAI,CAAC,cAAc,EAAE;gBACzC,CAAC,CAAC,EAAE,CAAC;YACP,GAAG,CAAC,IAAI,CAAC,aAAa,IAAI,IAAI;gBAC5B,CAAC,CAAC,EAAE,aAAa,EAAE,IAAI,CAAC,aAAa,EAAE;gBACvC,CAAC,CAAC,EAAE,CAAC;SACR,CAAC;QAEF,MAAM,QAAQ,GAAc;YAC1B,GAAG,UAAU;YACb,YAAY,EAAE,GAAG,CAAC,UAAU,CAAC,EAAE,CAAC,CAAC,CAAC,CAAC;YACnC,YAAY,EACV,GAAG,CAAC,UAAU,CAAC,MAAM,GAAG,CAAC,CAAC,CAAC,CAAC,GAAG,CAAC,UAAU,CAAC,MAAM,CAAC,CAAC,CAAC,SAAS;YAC/D,eAAe;YACf,SAAS,EAAE,GAAG,CAAC,SAAS,IAAI,SAAS;SACtC,CAAC;QAEF,OAAO;YACL,UAAU,EAAE,IAAI,CAAC,UAAU;YAC3B,UAAU,EAAE,IAAI,CAAC,UAAU;YAC3B,oBAAoB,EAAE;gBACpB,EAAE;gBACF,UAAU,EAAE,cAAc,CAAC,GAAG,CAAC,UAAU,EAAE,UAAU,CAAC;gBACtD,WAAW,EAAE,eAAe,CAAC,UAAU,CAAC;gBACxC,cAAc,EAAE,UAAU;aAC3B;YACD,cAAc,EAAE;gBACd,EAAE;gBACF,OAAO,EAAE,WAAW,CAAC,OAAO,CAAC;gBAC7B,QAAQ;aACT;YACD,gBAAgB,EAAE;gBAChB,EAAE;gBACF,OAAO;aACR;SACF,CAAC;IACJ,CAAC,CAAC,CAAC;IAEH,6EAA6E;IAC7E,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,SAAS,CAAC,MAAM,EAAE,CAAC,EAAE,EAAE,CAAC;QAC1C,MAAM,CAAC,GAAG,SAAS,CAAC,CAAC,CAAE,CAAC;QACxB,IAAI,CAAC,GAAG,CAAC;YACP,CAAC,CAAC,cAAc,CAAC,QAAQ,CAAC,WAAW;gBACnC,SAAS,CAAC,CAAC,GAAG,CAAC,CAAE,CAAC,oBAAoB,CAAC,EAAE,CAAC;QAC9C,IAAI,CAAC,GAAG,SAAS,CAAC,MAAM,GAAG,CAAC;YAC1B,CAAC,CAAC,cAAc,CAAC,QAAQ,CAAC,WAAW;gBACnC,SAAS,CAAC,CAAC,GAAG,CAAC,CAAE,CAAC,oBAAoB,CAAC,EAAE,CAAC;IAChD,CAAC;IAED,6EAA6E;IAC7E,IAAI,SAAS,GAAG,CAAC,IAAI,QAAQ,GAAG,CAAC,EAAE,CAAC;QAClC,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,SAAS,CAAC,MAAM,EAAE,CAAC,EAAE,EAAE,CAAC;YAC1C,MAAM,KAAK,GAAa,EAAE,CAAC;YAE3B,IAAI,SAAS,GAAG,CAAC,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC;gBAC3B,MAAM,SAAS,GAAG,OAAO,CAAC,CAAC,GAAG,CAAC,CAAE,CAAC,KAAK,CAAC;gBACxC,KAAK,CAAC,IAAI,CAAC,SAAS,CAAC,KAAK,CAAC,CAAC,SAAS,CAAC,CAAC,IAAI,CAAC,MAAM,CAAC,CAAC,CAAC;YACvD,CAAC;YAED,KAAK,CAAC,IAAI,CAAC,SAAS,CAAC,CAAC,CAAE,CAAC,gBAAgB,CAAC,OAAO,CAAC,CAAC;YAEnD,IAAI,QAAQ,GAAG,CAAC,IAAI,CAAC,GAAG,SAAS,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;gBAC7C,MAAM,SAAS,GAAG,OAAO,CAAC,CAAC,GAAG,CAAC,CAAE,CAAC,KAAK,CAAC;gBACxC,KAAK,CAAC,IAAI,CAAC,SAAS,CAAC,KAAK,CAAC,CAAC,EAAE,QAAQ,CAAC,CAAC,IAAI,CAAC,MAAM,CAAC,CAAC,CAAC;YACxD,CAAC;YAED,MAAM,MAAM,GAAG,KAAK,CAAC,MAAM,CAAC,OAAO,CAAC,CAAC,IAAI,CAAC,MAAM,CAAC,CAAC;YAClD,IAAI,MAAM,KAAK,SAAS,CAAC,CAAC,CAAE,CAAC,gBAAgB,CAAC,OAAO,EAAE,CAAC;gBACtD,SAAS,CAAC,CAAC,CAAE,CAAC,gBAAgB,CAAC,aAAa,GAAG,MAAM,CAAC;YACxD,CAAC;QACH,CAAC;IACH,CAAC;IAED,OAAO,SAAS,CAAC;AACnB,CAAC"}
@@ -0,0 +1,46 @@
1
+ import type { ArtifactSet, ArtifactChunker, DocNode } from "./types.js";
2
+ import type { WalkOptions } from "./chunker.js";
3
+ export interface BaseOptions {
4
+ maxTokens?: number;
5
+ minTokens?: number;
6
+ overlap?: number;
7
+ boundaryPadding?: {
8
+ before?: number;
9
+ after?: number;
10
+ };
11
+ adaptiveSize?: boolean;
12
+ recursive?: boolean;
13
+ ignore?: string[];
14
+ }
15
+ export interface NativeChunkerDef<TOptions extends BaseOptions> {
16
+ /** npm package name, used as the `strategy` field in ArtifactSet. */
17
+ name: string;
18
+ /** "pdf" | "md" | "docx" | "xlsx" etc. — becomes `sourceFormat`. */
19
+ sourceFormat: string;
20
+ /** Glob patterns this chunker accepts, e.g. ["**\/*.pdf"]. */
21
+ patterns: string[];
22
+ /**
23
+ * Return the native napi binding object. Called at most once per chunker
24
+ * instance (lazily, on the first `chunk()` call).
25
+ */
26
+ loadBinding: () => Record<string, (...args: unknown[]) => string>;
27
+ /**
28
+ * Invoke the correct function on the already-loaded binding.
29
+ * Must return a JSON-encoded DocNode string.
30
+ */
31
+ callNative: (binding: ReturnType<NativeChunkerDef<TOptions>["loadBinding"]>, buf: Buffer, opts: TOptions) => string;
32
+ /** Format-specific WalkOptions defaults. Spread before user opts so user opts win. */
33
+ extraWalkOpts?: (opts: TOptions) => Partial<WalkOptions>;
34
+ /** Optional post-walk hook for format-specific enrichment (XLSX cell refs, etc.). */
35
+ enrich?: (sets: ArtifactSet[], docNode: DocNode, opts: TOptions) => ArtifactSet[];
36
+ }
37
+ /**
38
+ * Factory that eliminates boilerplate common to every native-binary chunker:
39
+ * file read, sha256 hash, lazy native binding load, walkToChunks call, and
40
+ * optional enrich hook.
41
+ *
42
+ * Usage:
43
+ * export const createChunker = createNativeChunker<MyOptions>({ ... });
44
+ */
45
+ export declare function createNativeChunker<TOptions extends BaseOptions>(def: NativeChunkerDef<TOptions>): (opts?: TOptions) => ArtifactChunker;
46
+ //# sourceMappingURL=factory.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"factory.d.ts","sourceRoot":"","sources":["../src/factory.ts"],"names":[],"mappings":"AAGA,OAAO,KAAK,EAAE,WAAW,EAAE,eAAe,EAAE,OAAO,EAAE,MAAM,YAAY,CAAC;AAExE,OAAO,KAAK,EAAE,WAAW,EAAE,MAAM,cAAc,CAAC;AAEhD,MAAM,WAAW,WAAW;IAC1B,SAAS,CAAC,EAAE,MAAM,CAAC;IACnB,SAAS,CAAC,EAAE,MAAM,CAAC;IACnB,OAAO,CAAC,EAAE,MAAM,CAAC;IACjB,eAAe,CAAC,EAAE;QAAE,MAAM,CAAC,EAAE,MAAM,CAAC;QAAC,KAAK,CAAC,EAAE,MAAM,CAAA;KAAE,CAAC;IACtD,YAAY,CAAC,EAAE,OAAO,CAAC;IACvB,SAAS,CAAC,EAAE,OAAO,CAAC;IACpB,MAAM,CAAC,EAAE,MAAM,EAAE,CAAC;CACnB;AAED,MAAM,WAAW,gBAAgB,CAAC,QAAQ,SAAS,WAAW;IAC5D,qEAAqE;IACrE,IAAI,EAAE,MAAM,CAAC;IACb,oEAAoE;IACpE,YAAY,EAAE,MAAM,CAAC;IACrB,8DAA8D;IAC9D,QAAQ,EAAE,MAAM,EAAE,CAAC;IACnB;;;OAGG;IACH,WAAW,EAAE,MAAM,MAAM,CAAC,MAAM,EAAE,CAAC,GAAG,IAAI,EAAE,OAAO,EAAE,KAAK,MAAM,CAAC,CAAC;IAClE;;;OAGG;IACH,UAAU,EAAE,CACV,OAAO,EAAE,UAAU,CAAC,gBAAgB,CAAC,QAAQ,CAAC,CAAC,aAAa,CAAC,CAAC,EAC9D,GAAG,EAAE,MAAM,EACX,IAAI,EAAE,QAAQ,KACX,MAAM,CAAC;IACZ,sFAAsF;IACtF,aAAa,CAAC,EAAE,CAAC,IAAI,EAAE,QAAQ,KAAK,OAAO,CAAC,WAAW,CAAC,CAAC;IACzD,qFAAqF;IACrF,MAAM,CAAC,EAAE,CAAC,IAAI,EAAE,WAAW,EAAE,EAAE,OAAO,EAAE,OAAO,EAAE,IAAI,EAAE,QAAQ,KAAK,WAAW,EAAE,CAAC;CACnF;AAED;;;;;;;GAOG;AACH,wBAAgB,mBAAmB,CAAC,QAAQ,SAAS,WAAW,EAC9D,GAAG,EAAE,gBAAgB,CAAC,QAAQ,CAAC,GAC9B,CAAC,IAAI,CAAC,EAAE,QAAQ,KAAK,eAAe,CAqEtC"}
@@ -0,0 +1,75 @@
1
+ import { readFile, stat } from "node:fs/promises";
2
+ import { createHash } from "node:crypto";
3
+ import { minimatch } from "minimatch";
4
+ import { walkToChunks } from "./chunker.js";
5
+ /**
6
+ * Factory that eliminates boilerplate common to every native-binary chunker:
7
+ * file read, sha256 hash, lazy native binding load, walkToChunks call, and
8
+ * optional enrich hook.
9
+ *
10
+ * Usage:
11
+ * export const createChunker = createNativeChunker<MyOptions>({ ... });
12
+ */
13
+ export function createNativeChunker(def) {
14
+ return (opts) => {
15
+ const resolvedOpts = (opts ?? {});
16
+ const ignore = resolvedOpts.ignore ?? [];
17
+ let _binding = null;
18
+ function getBinding() {
19
+ if (_binding == null) {
20
+ _binding = def.loadBinding();
21
+ }
22
+ return _binding;
23
+ }
24
+ return {
25
+ name: def.name,
26
+ patterns: def.patterns,
27
+ async chunk(filePath, commitHash) {
28
+ const [buf, stats] = await Promise.all([readFile(filePath), stat(filePath)]);
29
+ const fileHash = createHash("sha256").update(buf).digest("hex");
30
+ const binding = getBinding();
31
+ const raw = def.callNative(binding, buf, resolvedOpts);
32
+ const docNode = JSON.parse(raw);
33
+ // extraWalkOpts provides format-specific defaults; explicit user opts
34
+ // override them; required fields are always set last.
35
+ const extra = def.extraWalkOpts ? def.extraWalkOpts(resolvedOpts) : {};
36
+ const sets = walkToChunks(docNode, {
37
+ ...extra,
38
+ ...(resolvedOpts.maxTokens != null
39
+ ? { maxTokens: resolvedOpts.maxTokens }
40
+ : {}),
41
+ ...(resolvedOpts.minTokens != null
42
+ ? { minTokens: resolvedOpts.minTokens }
43
+ : {}),
44
+ ...(resolvedOpts.overlap != null
45
+ ? { overlap: resolvedOpts.overlap }
46
+ : {}),
47
+ ...(resolvedOpts.boundaryPadding != null
48
+ ? { boundaryPadding: resolvedOpts.boundaryPadding }
49
+ : {}),
50
+ ...(resolvedOpts.adaptiveSize != null
51
+ ? { adaptiveSize: resolvedOpts.adaptiveSize }
52
+ : {}),
53
+ ...(resolvedOpts.recursive != null
54
+ ? { recursive: resolvedOpts.recursive }
55
+ : {}),
56
+ sourceFile: filePath,
57
+ sourceFormat: def.sourceFormat,
58
+ commitHash,
59
+ strategy: def.name,
60
+ fileHash,
61
+ fileSizeBytes: stats.size,
62
+ fileModifiedAt: stats.mtime.toISOString(),
63
+ });
64
+ return def.enrich ? def.enrich(sets, docNode, resolvedOpts) : sets;
65
+ },
66
+ async canProcess(filePath) {
67
+ if (ignore.some((p) => minimatch(filePath, p, { matchBase: true }))) {
68
+ return false;
69
+ }
70
+ return def.patterns.some((p) => minimatch(filePath, p, { matchBase: true }));
71
+ },
72
+ };
73
+ };
74
+ }
75
+ //# sourceMappingURL=factory.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"factory.js","sourceRoot":"","sources":["../src/factory.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,QAAQ,EAAE,IAAI,EAAE,MAAM,kBAAkB,CAAC;AAClD,OAAO,EAAE,UAAU,EAAE,MAAM,aAAa,CAAC;AACzC,OAAO,EAAE,SAAS,EAAE,MAAM,WAAW,CAAC;AAEtC,OAAO,EAAE,YAAY,EAAE,MAAM,cAAc,CAAC;AAwC5C;;;;;;;GAOG;AACH,MAAM,UAAU,mBAAmB,CACjC,GAA+B;IAE/B,OAAO,CAAC,IAAe,EAAmB,EAAE;QAC1C,MAAM,YAAY,GAAG,CAAC,IAAI,IAAI,EAAE,CAAa,CAAC;QAC9C,MAAM,MAAM,GAAG,YAAY,CAAC,MAAM,IAAI,EAAE,CAAC;QACzC,IAAI,QAAQ,GAAiE,IAAI,CAAC;QAElF,SAAS,UAAU;YACjB,IAAI,QAAQ,IAAI,IAAI,EAAE,CAAC;gBACrB,QAAQ,GAAG,GAAG,CAAC,WAAW,EAAE,CAAC;YAC/B,CAAC;YACD,OAAO,QAAQ,CAAC;QAClB,CAAC;QAED,OAAO;YACL,IAAI,EAAE,GAAG,CAAC,IAAI;YACd,QAAQ,EAAE,GAAG,CAAC,QAAQ;YAEtB,KAAK,CAAC,KAAK,CAAC,QAAgB,EAAE,UAAkB;gBAC9C,MAAM,CAAC,GAAG,EAAE,KAAK,CAAC,GAAG,MAAM,OAAO,CAAC,GAAG,CAAC,CAAC,QAAQ,CAAC,QAAQ,CAAC,EAAE,IAAI,CAAC,QAAQ,CAAC,CAAC,CAAC,CAAC;gBAC7E,MAAM,QAAQ,GAAG,UAAU,CAAC,QAAQ,CAAC,CAAC,MAAM,CAAC,GAAG,CAAC,CAAC,MAAM,CAAC,KAAK,CAAC,CAAC;gBAChE,MAAM,OAAO,GAAG,UAAU,EAAE,CAAC;gBAC7B,MAAM,GAAG,GAAG,GAAG,CAAC,UAAU,CAAC,OAAO,EAAE,GAAG,EAAE,YAAY,CAAC,CAAC;gBACvD,MAAM,OAAO,GAAG,IAAI,CAAC,KAAK,CAAC,GAAG,CAAY,CAAC;gBAE3C,sEAAsE;gBACtE,sDAAsD;gBACtD,MAAM,KAAK,GAAG,GAAG,CAAC,aAAa,CAAC,CAAC,CAAC,GAAG,CAAC,aAAa,CAAC,YAAY,CAAC,CAAC,CAAC,CAAC,EAAE,CAAC;gBACvE,MAAM,IAAI,GAAG,YAAY,CAAC,OAAO,EAAE;oBACjC,GAAG,KAAK;oBACR,GAAG,CAAC,YAAY,CAAC,SAAS,IAAI,IAAI;wBAChC,CAAC,CAAC,EAAE,SAAS,EAAE,YAAY,CAAC,SAAS,EAAE;wBACvC,CAAC,CAAC,EAAE,CAAC;oBACP,GAAG,CAAC,YAAY,CAAC,SAAS,IAAI,IAAI;wBAChC,CAAC,CAAC,EAAE,SAAS,EAAE,YAAY,CAAC,SAAS,EAAE;wBACvC,CAAC,CAAC,EAAE,CAAC;oBACP,GAAG,CAAC,YAAY,CAAC,OAAO,IAAI,IAAI;wBAC9B,CAAC,CAAC,EAAE,OAAO,EAAE,YAAY,CAAC,OAAO,EAAE;wBACnC,CAAC,CAAC,EAAE,CAAC;oBACP,GAAG,CAAC,YAAY,CAAC,eAAe,IAAI,IAAI;wBACtC,CAAC,CAAC,EAAE,eAAe,EAAE,YAAY,CAAC,eAAe,EAAE;wBACnD,CAAC,CAAC,EAAE,CAAC;oBACP,GAAG,CAAC,YAAY,CAAC,YAAY,IAAI,IAAI;wBACnC,CAAC,CAAC,EAAE,YAAY,EAAE,YAAY,CAAC,YAAY,EAAE;wBAC7C,CAAC,CAAC,EAAE,CAAC;oBACP,GAAG,CAAC,YAAY,CAAC,SAAS,IAAI,IAAI;wBAChC,CAAC,CAAC,EAAE,SAAS,EAAE,YAAY,CAAC,SAAS,EAAE;wBACvC,CAAC,CAAC,EAAE,CAAC;oBACP,UAAU,EAAE,QAAQ;oBACpB,YAAY,EAAE,GAAG,CAAC,YAAY;oBAC9B,UAAU;oBACV,QAAQ,EAAE,GAAG,CAAC,IAAI;oBAClB,QAAQ;oBACR,aAAa,EAAE,KAAK,CAAC,IAAI;oBACzB,cAAc,EAAE,KAAK,CAAC,KAAK,CAAC,WAAW,EAAE;iBAC1C,CAAC,CAAC;gBAEH,OAAO,GAAG,CAAC,MAAM,CAAC,CAAC,CAAC,GAAG,CAAC,MAAM,CAAC,IAAI,EAAE,OAAO,EAAE,YAAY,CAAC,CAAC,CAAC,CAAC,IAAI,CAAC;YACrE,CAAC;YAED,KAAK,CAAC,UAAU,CAAC,QAAgB;gBAC/B,IAAI,MAAM,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,SAAS,CAAC,QAAQ,EAAE,CAAC,EAAE,EAAE,SAAS,EAAE,IAAI,EAAE,CAAC,CAAC,EAAE,CAAC;oBACpE,OAAO,KAAK,CAAC;gBACf,CAAC;gBACD,OAAO,GAAG,CAAC,QAAQ,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,EAAE,CAC7B,SAAS,CAAC,QAAQ,EAAE,CAAC,EAAE,EAAE,SAAS,EAAE,IAAI,EAAE,CAAC,CAC5C,CAAC;YACJ,CAAC;SACF,CAAC;IACJ,CAAC,CAAC;AACJ,CAAC"}
package/dist/index.d.ts CHANGED
@@ -1,7 +1,9 @@
1
- export type { DocNode, DocNodeType, DocNodeAttrs, ChunkMeta } from "./types.js";
1
+ export type { DocNode, DocNodeType, DocNodeAttrs, FilterMeta, ChunkMeta, InjectedContext, SearchRepresentation, CandidateChunk, FinalAnswerChunk, ArtifactSet, ArtifactChunker, } from "./types.js";
2
2
  export { walkDocNode } from "./ast-walker.js";
3
3
  export type { TextSegment } from "./ast-walker.js";
4
4
  export { extractOutline } from "./outline.js";
5
5
  export { walkToChunks } from "./chunker.js";
6
- export type { WalkOptions, ChunkResult } from "./chunker.js";
6
+ export type { WalkOptions } from "./chunker.js";
7
+ export { createNativeChunker } from "./factory.js";
8
+ export type { BaseOptions, NativeChunkerDef } from "./factory.js";
7
9
  //# sourceMappingURL=index.d.ts.map
@@ -1 +1 @@
1
- {"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../src/index.ts"],"names":[],"mappings":"AAAA,YAAY,EAAE,OAAO,EAAE,WAAW,EAAE,YAAY,EAAE,SAAS,EAAE,MAAM,YAAY,CAAC;AAChF,OAAO,EAAE,WAAW,EAAE,MAAM,iBAAiB,CAAC;AAC9C,YAAY,EAAE,WAAW,EAAE,MAAM,iBAAiB,CAAC;AACnD,OAAO,EAAE,cAAc,EAAE,MAAM,cAAc,CAAC;AAC9C,OAAO,EAAE,YAAY,EAAE,MAAM,cAAc,CAAC;AAC5C,YAAY,EAAE,WAAW,EAAE,WAAW,EAAE,MAAM,cAAc,CAAC"}
1
+ {"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../src/index.ts"],"names":[],"mappings":"AAAA,YAAY,EACV,OAAO,EACP,WAAW,EACX,YAAY,EACZ,UAAU,EACV,SAAS,EACT,eAAe,EACf,oBAAoB,EACpB,cAAc,EACd,gBAAgB,EAChB,WAAW,EACX,eAAe,GAChB,MAAM,YAAY,CAAC;AACpB,OAAO,EAAE,WAAW,EAAE,MAAM,iBAAiB,CAAC;AAC9C,YAAY,EAAE,WAAW,EAAE,MAAM,iBAAiB,CAAC;AACnD,OAAO,EAAE,cAAc,EAAE,MAAM,cAAc,CAAC;AAC9C,OAAO,EAAE,YAAY,EAAE,MAAM,cAAc,CAAC;AAC5C,YAAY,EAAE,WAAW,EAAE,MAAM,cAAc,CAAC;AAChD,OAAO,EAAE,mBAAmB,EAAE,MAAM,cAAc,CAAC;AACnD,YAAY,EAAE,WAAW,EAAE,gBAAgB,EAAE,MAAM,cAAc,CAAC"}
package/dist/index.js CHANGED
@@ -1,4 +1,5 @@
1
1
  export { walkDocNode } from "./ast-walker.js";
2
2
  export { extractOutline } from "./outline.js";
3
3
  export { walkToChunks } from "./chunker.js";
4
+ export { createNativeChunker } from "./factory.js";
4
5
  //# sourceMappingURL=index.js.map
package/dist/index.js.map CHANGED
@@ -1 +1 @@
1
- {"version":3,"file":"index.js","sourceRoot":"","sources":["../src/index.ts"],"names":[],"mappings":"AACA,OAAO,EAAE,WAAW,EAAE,MAAM,iBAAiB,CAAC;AAE9C,OAAO,EAAE,cAAc,EAAE,MAAM,cAAc,CAAC;AAC9C,OAAO,EAAE,YAAY,EAAE,MAAM,cAAc,CAAC"}
1
+ {"version":3,"file":"index.js","sourceRoot":"","sources":["../src/index.ts"],"names":[],"mappings":"AAaA,OAAO,EAAE,WAAW,EAAE,MAAM,iBAAiB,CAAC;AAE9C,OAAO,EAAE,cAAc,EAAE,MAAM,cAAc,CAAC;AAC9C,OAAO,EAAE,YAAY,EAAE,MAAM,cAAc,CAAC;AAE5C,OAAO,EAAE,mBAAmB,EAAE,MAAM,cAAc,CAAC"}
package/dist/types.d.ts CHANGED
@@ -23,29 +23,113 @@ export interface DocNode {
23
23
  text?: string;
24
24
  attrs: DocNodeAttrs;
25
25
  }
26
- export interface ChunkMeta extends Record<string, unknown> {
26
+ /**
27
+ * Filterable metadata stored alongside the Search Representation in the vector
28
+ * index. Contains only fields that are useful for pre-retrieval filtering.
29
+ */
30
+ export interface FilterMeta {
27
31
  sourceFile: string;
28
32
  sourceFormat: string;
33
+ breadcrumb: string[];
29
34
  byteStart: number;
30
35
  byteEnd: number;
31
36
  lineStart?: number;
32
37
  lineEnd?: number;
33
38
  pageStart?: number;
34
39
  pageEnd?: number;
35
- fileSizeBytes?: number;
36
- fileModifiedAt?: string;
37
- fileHash?: string;
38
- breadcrumb: string[];
39
- sectionTitle?: string;
40
- headingLevel?: number;
41
- documentOutline?: string[];
42
40
  lang?: string;
43
41
  codeLanguage?: string;
44
- strategy: string;
45
42
  chunkIndex: number;
46
43
  totalChunks: number;
44
+ strategy: string;
47
45
  estimatedTokens: number;
46
+ fileHash?: string;
47
+ fileModifiedAt?: string;
48
+ fileSizeBytes?: number;
49
+ }
50
+ /**
51
+ * Full enrichment payload carried by CandidateChunk. A superset of FilterMeta
52
+ * that includes hierarchy details, sibling links, and format-specific fields
53
+ * used during cross-encoder re-ranking.
54
+ */
55
+ export interface ChunkMeta extends FilterMeta {
56
+ sectionTitle?: string;
57
+ headingLevel?: number;
58
+ documentOutline?: string[];
59
+ siblingPrev?: string;
60
+ siblingNext?: string;
48
61
  qualityScore?: number;
49
62
  truncated?: boolean;
63
+ fqn?: string;
64
+ imports?: string[];
65
+ inheritanceChain?: string[];
66
+ sheetName?: string;
67
+ columnHeaders?: string[];
68
+ cellReference?: string;
69
+ formulaDependencies?: string[];
70
+ keywords?: string[];
71
+ summary?: string;
72
+ nerEntities?: Array<{
73
+ text: string;
74
+ label: string;
75
+ }>;
76
+ }
77
+ /** Extra context injected into FinalAnswerChunk that was NOT used for search. */
78
+ export interface InjectedContext {
79
+ parentSectionText?: string;
80
+ imports?: string[];
81
+ fqnDeclarations?: string[];
82
+ neighborPrev?: string;
83
+ neighborNext?: string;
84
+ }
85
+ /**
86
+ * Stored in the vector index. Contains sanitized anchor text for dense/sparse
87
+ * retrieval and filterable metadata only — no raw content dump.
88
+ */
89
+ export interface SearchRepresentation {
90
+ id: string;
91
+ anchorText: string;
92
+ sparseTerms?: string[];
93
+ filterMetadata: FilterMeta;
94
+ }
95
+ /**
96
+ * Returned by ANN retrieval. Contains a short preview and full metadata
97
+ * for cross-encoder re-ranking. Fetched from the vector store payload.
98
+ */
99
+ export interface CandidateChunk {
100
+ id: string;
101
+ preview: string;
102
+ fullMeta: ChunkMeta;
103
+ }
104
+ /**
105
+ * Fetched after re-ranking. Contains the full raw text (with optional boundary
106
+ * padding and injected context) that is passed to the LLM prompt.
107
+ */
108
+ export interface FinalAnswerChunk {
109
+ id: string;
110
+ content: string;
111
+ paddedContent?: string;
112
+ injectedContext?: InjectedContext;
113
+ }
114
+ /**
115
+ * The atomic unit produced by walkToChunks — one per logical segment.
116
+ * Encapsulates all three artifact tiers derived from the same source window.
117
+ */
118
+ export interface ArtifactSet {
119
+ sourceFile: string;
120
+ commitHash: string;
121
+ searchRepresentation: SearchRepresentation;
122
+ candidateChunk: CandidateChunk;
123
+ finalAnswerChunk: FinalAnswerChunk;
124
+ }
125
+ /**
126
+ * Upgraded chunker contract that returns ArtifactSet[].
127
+ * Replaces the virage-core FileChunker once Phase 5 lands in the main repo.
128
+ */
129
+ export interface ArtifactChunker {
130
+ name: string;
131
+ patterns: string[];
132
+ chunk(filePath: string, commitHash: string): Promise<ArtifactSet[]>;
133
+ canProcess?(filePath: string, content?: string): Promise<boolean>;
50
134
  }
51
135
  //# sourceMappingURL=types.d.ts.map
@@ -1 +1 @@
1
- {"version":3,"file":"types.d.ts","sourceRoot":"","sources":["../src/types.ts"],"names":[],"mappings":"AAIA,MAAM,MAAM,WAAW,GACnB,UAAU,GACV,SAAS,GACT,SAAS,GACT,WAAW,GACX,OAAO,GACP,WAAW,GACX,YAAY,GACZ,MAAM,GACN,WAAW,GACX,MAAM,GACN,SAAS,GACT,OAAO,GACP,MAAM,GACN,UAAU,GACV,SAAS,GACT,UAAU,GACV,UAAU,CAAC;AAEf,MAAM,WAAW,YAAY;IAC3B,YAAY,CAAC,EAAE,CAAC,GAAG,CAAC,GAAG,CAAC,GAAG,CAAC,GAAG,CAAC,GAAG,CAAC,CAAC;IACrC,IAAI,CAAC,EAAE,SAAS,GAAG,UAAU,GAAG,UAAU,GAAG,WAAW,GAAG,QAAQ,GAAG,QAAQ,CAAC;IAC/E,UAAU,CAAC,EAAE,MAAM,EAAE,CAAC;IACtB,SAAS,EAAE,MAAM,CAAC;IAClB,OAAO,EAAE,MAAM,CAAC;IAChB,SAAS,CAAC,EAAE,MAAM,CAAC;IACnB,OAAO,CAAC,EAAE,MAAM,CAAC;IACjB,UAAU,CAAC,EAAE,MAAM,CAAC;IACpB,IAAI,CAAC,EAAE,MAAM,CAAC;IACd,YAAY,CAAC,EAAE,MAAM,CAAC;IACtB,QAAQ,CAAC,EAAE,MAAM,CAAC;IAClB,QAAQ,CAAC,EAAE,MAAM,CAAC;IAClB,QAAQ,CAAC,EAAE,OAAO,CAAC;IACnB,SAAS,CAAC,EAAE,MAAM,CAAC;IACnB,OAAO,CAAC,EAAE,OAAO,CAAC;IAClB,YAAY,CAAC,EAAE,MAAM,CAAC;CACvB;AAED,MAAM,WAAW,OAAO;IACtB,IAAI,EAAE,WAAW,CAAC;IAClB,QAAQ,CAAC,EAAE,OAAO,EAAE,CAAC;IACrB,IAAI,CAAC,EAAE,MAAM,CAAC;IACd,KAAK,EAAE,YAAY,CAAC;CACrB;AAID,MAAM,WAAW,SAAU,SAAQ,MAAM,CAAC,MAAM,EAAE,OAAO,CAAC;IAExD,UAAU,EAAE,MAAM,CAAC;IACnB,YAAY,EAAE,MAAM,CAAC;IACrB,SAAS,EAAE,MAAM,CAAC;IAClB,OAAO,EAAE,MAAM,CAAC;IAChB,SAAS,CAAC,EAAE,MAAM,CAAC;IACnB,OAAO,CAAC,EAAE,MAAM,CAAC;IACjB,SAAS,CAAC,EAAE,MAAM,CAAC;IACnB,OAAO,CAAC,EAAE,MAAM,CAAC;IAGjB,aAAa,CAAC,EAAE,MAAM,CAAC;IACvB,cAAc,CAAC,EAAE,MAAM,CAAC;IACxB,QAAQ,CAAC,EAAE,MAAM,CAAC;IAGlB,UAAU,EAAE,MAAM,EAAE,CAAC;IACrB,YAAY,CAAC,EAAE,MAAM,CAAC;IACtB,YAAY,CAAC,EAAE,MAAM,CAAC;IACtB,eAAe,CAAC,EAAE,MAAM,EAAE,CAAC;IAG3B,IAAI,CAAC,EAAE,MAAM,CAAC;IACd,YAAY,CAAC,EAAE,MAAM,CAAC;IAGtB,QAAQ,EAAE,MAAM,CAAC;IACjB,UAAU,EAAE,MAAM,CAAC;IACnB,WAAW,EAAE,MAAM,CAAC;IAGpB,eAAe,EAAE,MAAM,CAAC;IACxB,YAAY,CAAC,EAAE,MAAM,CAAC;IACtB,SAAS,CAAC,EAAE,OAAO,CAAC;CACrB"}
1
+ {"version":3,"file":"types.d.ts","sourceRoot":"","sources":["../src/types.ts"],"names":[],"mappings":"AAIA,MAAM,MAAM,WAAW,GACnB,UAAU,GACV,SAAS,GACT,SAAS,GACT,WAAW,GACX,OAAO,GACP,WAAW,GACX,YAAY,GACZ,MAAM,GACN,WAAW,GACX,MAAM,GACN,SAAS,GACT,OAAO,GACP,MAAM,GACN,UAAU,GACV,SAAS,GACT,UAAU,GACV,UAAU,CAAC;AAEf,MAAM,WAAW,YAAY;IAC3B,YAAY,CAAC,EAAE,CAAC,GAAG,CAAC,GAAG,CAAC,GAAG,CAAC,GAAG,CAAC,GAAG,CAAC,CAAC;IACrC,IAAI,CAAC,EAAE,SAAS,GAAG,UAAU,GAAG,UAAU,GAAG,WAAW,GAAG,QAAQ,GAAG,QAAQ,CAAC;IAC/E,UAAU,CAAC,EAAE,MAAM,EAAE,CAAC;IACtB,SAAS,EAAE,MAAM,CAAC;IAClB,OAAO,EAAE,MAAM,CAAC;IAChB,SAAS,CAAC,EAAE,MAAM,CAAC;IACnB,OAAO,CAAC,EAAE,MAAM,CAAC;IACjB,UAAU,CAAC,EAAE,MAAM,CAAC;IACpB,IAAI,CAAC,EAAE,MAAM,CAAC;IACd,YAAY,CAAC,EAAE,MAAM,CAAC;IACtB,QAAQ,CAAC,EAAE,MAAM,CAAC;IAClB,QAAQ,CAAC,EAAE,MAAM,CAAC;IAClB,QAAQ,CAAC,EAAE,OAAO,CAAC;IACnB,SAAS,CAAC,EAAE,MAAM,CAAC;IACnB,OAAO,CAAC,EAAE,OAAO,CAAC;IAClB,YAAY,CAAC,EAAE,MAAM,CAAC;CACvB;AAED,MAAM,WAAW,OAAO;IACtB,IAAI,EAAE,WAAW,CAAC;IAClB,QAAQ,CAAC,EAAE,OAAO,EAAE,CAAC;IACrB,IAAI,CAAC,EAAE,MAAM,CAAC;IACd,KAAK,EAAE,YAAY,CAAC;CACrB;AAID;;;GAGG;AACH,MAAM,WAAW,UAAU;IACzB,UAAU,EAAE,MAAM,CAAC;IACnB,YAAY,EAAE,MAAM,CAAC;IACrB,UAAU,EAAE,MAAM,EAAE,CAAC;IACrB,SAAS,EAAE,MAAM,CAAC;IAClB,OAAO,EAAE,MAAM,CAAC;IAChB,SAAS,CAAC,EAAE,MAAM,CAAC;IACnB,OAAO,CAAC,EAAE,MAAM,CAAC;IACjB,SAAS,CAAC,EAAE,MAAM,CAAC;IACnB,OAAO,CAAC,EAAE,MAAM,CAAC;IACjB,IAAI,CAAC,EAAE,MAAM,CAAC;IACd,YAAY,CAAC,EAAE,MAAM,CAAC;IACtB,UAAU,EAAE,MAAM,CAAC;IACnB,WAAW,EAAE,MAAM,CAAC;IACpB,QAAQ,EAAE,MAAM,CAAC;IACjB,eAAe,EAAE,MAAM,CAAC;IACxB,QAAQ,CAAC,EAAE,MAAM,CAAC;IAClB,cAAc,CAAC,EAAE,MAAM,CAAC;IACxB,aAAa,CAAC,EAAE,MAAM,CAAC;CACxB;AAED;;;;GAIG;AACH,MAAM,WAAW,SAAU,SAAQ,UAAU;IAC3C,YAAY,CAAC,EAAE,MAAM,CAAC;IACtB,YAAY,CAAC,EAAE,MAAM,CAAC;IACtB,eAAe,CAAC,EAAE,MAAM,EAAE,CAAC;IAC3B,WAAW,CAAC,EAAE,MAAM,CAAC;IACrB,WAAW,CAAC,EAAE,MAAM,CAAC;IACrB,YAAY,CAAC,EAAE,MAAM,CAAC;IACtB,SAAS,CAAC,EAAE,OAAO,CAAC;IAGpB,GAAG,CAAC,EAAE,MAAM,CAAC;IACb,OAAO,CAAC,EAAE,MAAM,EAAE,CAAC;IACnB,gBAAgB,CAAC,EAAE,MAAM,EAAE,CAAC;IAG5B,SAAS,CAAC,EAAE,MAAM,CAAC;IACnB,aAAa,CAAC,EAAE,MAAM,EAAE,CAAC;IACzB,aAAa,CAAC,EAAE,MAAM,CAAC;IACvB,mBAAmB,CAAC,EAAE,MAAM,EAAE,CAAC;IAG/B,QAAQ,CAAC,EAAE,MAAM,EAAE,CAAC;IACpB,OAAO,CAAC,EAAE,MAAM,CAAC;IACjB,WAAW,CAAC,EAAE,KAAK,CAAC;QAAE,IAAI,EAAE,MAAM,CAAC;QAAC,KAAK,EAAE,MAAM,CAAA;KAAE,CAAC,CAAC;CACtD;AAED,iFAAiF;AACjF,MAAM,WAAW,eAAe;IAC9B,iBAAiB,CAAC,EAAE,MAAM,CAAC;IAC3B,OAAO,CAAC,EAAE,MAAM,EAAE,CAAC;IACnB,eAAe,CAAC,EAAE,MAAM,EAAE,CAAC;IAC3B,YAAY,CAAC,EAAE,MAAM,CAAC;IACtB,YAAY,CAAC,EAAE,MAAM,CAAC;CACvB;AAID;;;GAGG;AACH,MAAM,WAAW,oBAAoB;IACnC,EAAE,EAAE,MAAM,CAAC;IACX,UAAU,EAAE,MAAM,CAAC;IACnB,WAAW,CAAC,EAAE,MAAM,EAAE,CAAC;IACvB,cAAc,EAAE,UAAU,CAAC;CAC5B;AAED;;;GAGG;AACH,MAAM,WAAW,cAAc;IAC7B,EAAE,EAAE,MAAM,CAAC;IACX,OAAO,EAAE,MAAM,CAAC;IAChB,QAAQ,EAAE,SAAS,CAAC;CACrB;AAED;;;GAGG;AACH,MAAM,WAAW,gBAAgB;IAC/B,EAAE,EAAE,MAAM,CAAC;IACX,OAAO,EAAE,MAAM,CAAC;IAChB,aAAa,CAAC,EAAE,MAAM,CAAC;IACvB,eAAe,CAAC,EAAE,eAAe,CAAC;CACnC;AAED;;;GAGG;AACH,MAAM,WAAW,WAAW;IAC1B,UAAU,EAAE,MAAM,CAAC;IACnB,UAAU,EAAE,MAAM,CAAC;IACnB,oBAAoB,EAAE,oBAAoB,CAAC;IAC3C,cAAc,EAAE,cAAc,CAAC;IAC/B,gBAAgB,EAAE,gBAAgB,CAAC;CACpC;AAED;;;GAGG;AACH,MAAM,WAAW,eAAe;IAC9B,IAAI,EAAE,MAAM,CAAC;IACb,QAAQ,EAAE,MAAM,EAAE,CAAC;IACnB,KAAK,CAAC,QAAQ,EAAE,MAAM,EAAE,UAAU,EAAE,MAAM,GAAG,OAAO,CAAC,WAAW,EAAE,CAAC,CAAC;IACpE,UAAU,CAAC,CAAC,QAAQ,EAAE,MAAM,EAAE,OAAO,CAAC,EAAE,MAAM,GAAG,OAAO,CAAC,OAAO,CAAC,CAAC;CACnE"}
package/package.json CHANGED
@@ -1,20 +1,25 @@
1
1
  {
2
2
  "name": "@vivantel/virage-chunker-ce-ast",
3
- "version": "0.1.0",
3
+ "version": "0.1.2",
4
4
  "description": "Generalized ViDoc AST walker — shared chunking strategy for all structured document formats",
5
5
  "type": "module",
6
6
  "main": "./dist/index.js",
7
7
  "exports": {
8
8
  ".": {
9
9
  "import": "./dist/index.js",
10
- "types": "./dist/index.d.ts"
10
+ "types": "./dist/index.d.ts"
11
11
  }
12
12
  },
13
- "files": ["dist"],
13
+ "files": [
14
+ "dist"
15
+ ],
14
16
  "scripts": {
15
- "build": "tsc --build",
17
+ "build": "tsc --build",
16
18
  "type-check": "tsc --build --noEmit",
17
- "test": "vitest run"
19
+ "test": "vitest run"
20
+ },
21
+ "dependencies": {
22
+ "minimatch": "^10.0.0"
18
23
  },
19
24
  "peerDependencies": {
20
25
  "@vivantel/virage-core": ">=0.2"