@fragments-sdk/context 0.1.3 → 0.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/chunk-3FEHRHFQ.js +103 -0
- package/dist/chunk-3VPR67FN.js +76 -0
- package/dist/chunk-HINI3FCI.js +42 -0
- package/dist/chunk-JFV27WLV.js +168 -0
- package/dist/chunk-KKABP4K4.js +228 -0
- package/dist/chunk-KQIRG24U.js +260 -0
- package/dist/chunk-ZMBYQK43.js +91 -0
- package/dist/chunking/index.d.ts +53 -0
- package/dist/chunking/index.js +11 -0
- package/dist/citations/index.d.ts +94 -0
- package/dist/citations/index.js +10 -0
- package/dist/embeddings/voyage.d.ts +44 -0
- package/dist/embeddings/voyage.js +8 -0
- package/dist/generate/index.d.ts +43 -0
- package/dist/generate/index.js +10 -0
- package/dist/index.d.ts +8 -0
- package/dist/index.js +56 -0
- package/dist/indexing/index.d.ts +52 -0
- package/dist/indexing/index.js +20 -0
- package/dist/search/index.d.ts +29 -0
- package/dist/search/index.js +8 -0
- package/dist/types/index.d.ts +170 -0
- package/dist/types/index.js +0 -0
- package/dist/types-B7duBj6U.d.ts +39 -0
- package/package.json +13 -1
|
@@ -0,0 +1,260 @@
|
|
|
1
|
+
import {
|
|
2
|
+
AST_SUPPORTED_LANGUAGES,
|
|
3
|
+
getTreeSitterGrammar
|
|
4
|
+
} from "./chunk-JFV27WLV.js";
|
|
5
|
+
|
|
6
|
+
// src/chunking/line-chunker.ts
|
|
7
|
+
function chunkByLines(content, filePath, language, options) {
|
|
8
|
+
const chunkSize = options?.maxChunkLines ?? 50;
|
|
9
|
+
const overlap = options?.overlapLines ?? 10;
|
|
10
|
+
const lines = content.split("\n");
|
|
11
|
+
if (lines.length === 0 || lines.length === 1 && lines[0].trim() === "") {
|
|
12
|
+
return [];
|
|
13
|
+
}
|
|
14
|
+
const header = `// File: ${filePath}
|
|
15
|
+
`;
|
|
16
|
+
const chunks = [];
|
|
17
|
+
if (lines.length <= chunkSize) {
|
|
18
|
+
chunks.push({
|
|
19
|
+
content: header + lines.join("\n"),
|
|
20
|
+
filePath,
|
|
21
|
+
startLine: 1,
|
|
22
|
+
endLine: lines.length,
|
|
23
|
+
language,
|
|
24
|
+
chunkType: "module"
|
|
25
|
+
});
|
|
26
|
+
return chunks;
|
|
27
|
+
}
|
|
28
|
+
let start = 0;
|
|
29
|
+
while (start < lines.length) {
|
|
30
|
+
const end = Math.min(start + chunkSize, lines.length);
|
|
31
|
+
chunks.push({
|
|
32
|
+
content: header + lines.slice(start, end).join("\n"),
|
|
33
|
+
filePath,
|
|
34
|
+
startLine: start + 1,
|
|
35
|
+
endLine: end,
|
|
36
|
+
language,
|
|
37
|
+
chunkType: "block"
|
|
38
|
+
});
|
|
39
|
+
if (end >= lines.length) break;
|
|
40
|
+
start += chunkSize - overlap;
|
|
41
|
+
}
|
|
42
|
+
return chunks;
|
|
43
|
+
}
|
|
44
|
+
|
|
45
|
+
// src/chunking/ast-chunker.ts
|
|
46
|
+
var SYMBOL_NODE_TYPES = /* @__PURE__ */ new Set([
|
|
47
|
+
// TypeScript / JavaScript
|
|
48
|
+
"function_declaration",
|
|
49
|
+
"method_definition",
|
|
50
|
+
"arrow_function",
|
|
51
|
+
"class_declaration",
|
|
52
|
+
"interface_declaration",
|
|
53
|
+
"type_alias_declaration",
|
|
54
|
+
"enum_declaration",
|
|
55
|
+
"export_statement",
|
|
56
|
+
"lexical_declaration",
|
|
57
|
+
"variable_declaration",
|
|
58
|
+
// Python
|
|
59
|
+
"function_definition",
|
|
60
|
+
"class_definition",
|
|
61
|
+
"decorated_definition",
|
|
62
|
+
// Go
|
|
63
|
+
"function_declaration",
|
|
64
|
+
"method_declaration",
|
|
65
|
+
"type_declaration",
|
|
66
|
+
// Rust
|
|
67
|
+
"function_item",
|
|
68
|
+
"impl_item",
|
|
69
|
+
"struct_item",
|
|
70
|
+
"enum_item",
|
|
71
|
+
"trait_item",
|
|
72
|
+
// Java
|
|
73
|
+
"method_declaration",
|
|
74
|
+
"class_declaration",
|
|
75
|
+
"interface_declaration",
|
|
76
|
+
"enum_declaration",
|
|
77
|
+
"constructor_declaration"
|
|
78
|
+
]);
|
|
79
|
+
var NAME_FIELD_NODES = /* @__PURE__ */ new Set([
|
|
80
|
+
"function_declaration",
|
|
81
|
+
"method_definition",
|
|
82
|
+
"class_declaration",
|
|
83
|
+
"interface_declaration",
|
|
84
|
+
"type_alias_declaration",
|
|
85
|
+
"enum_declaration",
|
|
86
|
+
"function_definition",
|
|
87
|
+
"class_definition",
|
|
88
|
+
"function_item",
|
|
89
|
+
"impl_item",
|
|
90
|
+
"struct_item",
|
|
91
|
+
"enum_item",
|
|
92
|
+
"trait_item",
|
|
93
|
+
"method_declaration",
|
|
94
|
+
"constructor_declaration"
|
|
95
|
+
]);
|
|
96
|
+
var tsModule = null;
|
|
97
|
+
var initialized = false;
|
|
98
|
+
async function getTreeSitter() {
|
|
99
|
+
if (!tsModule) {
|
|
100
|
+
const mod = await import("web-tree-sitter");
|
|
101
|
+
tsModule = mod;
|
|
102
|
+
}
|
|
103
|
+
if (!initialized) {
|
|
104
|
+
await tsModule.Parser.init();
|
|
105
|
+
initialized = true;
|
|
106
|
+
}
|
|
107
|
+
return tsModule;
|
|
108
|
+
}
|
|
109
|
+
var GRAMMAR_PACKAGES = {
|
|
110
|
+
typescript: "tree-sitter-typescript/tree-sitter-typescript.wasm",
|
|
111
|
+
tsx: "tree-sitter-typescript/tree-sitter-tsx.wasm",
|
|
112
|
+
javascript: "tree-sitter-javascript/tree-sitter-javascript.wasm",
|
|
113
|
+
python: "tree-sitter-python/tree-sitter-python.wasm",
|
|
114
|
+
go: "tree-sitter-go/tree-sitter-go.wasm",
|
|
115
|
+
rust: "tree-sitter-rust/tree-sitter-rust.wasm",
|
|
116
|
+
java: "tree-sitter-java/tree-sitter-java.wasm"
|
|
117
|
+
};
|
|
118
|
+
var languageCache = /* @__PURE__ */ new Map();
|
|
119
|
+
async function loadLanguage(grammarName, wasmBasePath) {
|
|
120
|
+
const cached = languageCache.get(grammarName);
|
|
121
|
+
if (cached) return cached;
|
|
122
|
+
const { Language } = await getTreeSitter();
|
|
123
|
+
const grammarFile = GRAMMAR_PACKAGES[grammarName];
|
|
124
|
+
if (!grammarFile) {
|
|
125
|
+
throw new Error(`No grammar available for: ${grammarName}`);
|
|
126
|
+
}
|
|
127
|
+
let wasmPath;
|
|
128
|
+
if (wasmBasePath) {
|
|
129
|
+
wasmPath = `${wasmBasePath}/${grammarFile}`;
|
|
130
|
+
} else {
|
|
131
|
+
const { createRequire } = await import("module");
|
|
132
|
+
const require2 = createRequire(import.meta.url);
|
|
133
|
+
wasmPath = require2.resolve(grammarFile);
|
|
134
|
+
}
|
|
135
|
+
const lang = await Language.load(wasmPath);
|
|
136
|
+
languageCache.set(grammarName, lang);
|
|
137
|
+
return lang;
|
|
138
|
+
}
|
|
139
|
+
function getNodeName(node) {
|
|
140
|
+
if (!NAME_FIELD_NODES.has(node.type)) return void 0;
|
|
141
|
+
const nameNode = node.childForFieldName("name");
|
|
142
|
+
return nameNode?.text;
|
|
143
|
+
}
|
|
144
|
+
function getChunkType(nodeType) {
|
|
145
|
+
if (nodeType.includes("function") || nodeType.includes("method") || nodeType === "arrow_function" || nodeType === "constructor_declaration") {
|
|
146
|
+
return "function";
|
|
147
|
+
}
|
|
148
|
+
if (nodeType.includes("class") || nodeType.includes("impl") || nodeType.includes("struct") || nodeType.includes("trait") || nodeType.includes("interface") || nodeType.includes("enum")) {
|
|
149
|
+
return "class";
|
|
150
|
+
}
|
|
151
|
+
return "block";
|
|
152
|
+
}
|
|
153
|
+
function extractChunks(node, parentScope, maxChars, minChars) {
|
|
154
|
+
const text = node.text;
|
|
155
|
+
const charCount = text.length;
|
|
156
|
+
if (charCount <= maxChars && SYMBOL_NODE_TYPES.has(node.type)) {
|
|
157
|
+
const name2 = getNodeName(node);
|
|
158
|
+
const scopeChain = parentScope ? name2 ? `${parentScope}.${name2}` : parentScope : name2;
|
|
159
|
+
return [{
|
|
160
|
+
text,
|
|
161
|
+
startLine: node.startPosition.row + 1,
|
|
162
|
+
endLine: node.endPosition.row + 1,
|
|
163
|
+
symbolName: name2,
|
|
164
|
+
scopeChain,
|
|
165
|
+
chunkType: getChunkType(node.type)
|
|
166
|
+
}];
|
|
167
|
+
}
|
|
168
|
+
if (charCount <= maxChars && node.childCount === 0) {
|
|
169
|
+
return [{
|
|
170
|
+
text,
|
|
171
|
+
startLine: node.startPosition.row + 1,
|
|
172
|
+
endLine: node.endPosition.row + 1,
|
|
173
|
+
chunkType: "block"
|
|
174
|
+
}];
|
|
175
|
+
}
|
|
176
|
+
const name = getNodeName(node);
|
|
177
|
+
const currentScope = parentScope ? name ? `${parentScope}.${name}` : parentScope : name ?? "";
|
|
178
|
+
const childChunks = [];
|
|
179
|
+
for (const child of node.namedChildren) {
|
|
180
|
+
const chunks = extractChunks(child, currentScope, maxChars, minChars);
|
|
181
|
+
childChunks.push(...chunks);
|
|
182
|
+
}
|
|
183
|
+
return mergeSmallChunks(childChunks, maxChars, minChars);
|
|
184
|
+
}
|
|
185
|
+
function mergeSmallChunks(chunks, maxChars, minChars) {
|
|
186
|
+
if (chunks.length === 0) return chunks;
|
|
187
|
+
const merged = [];
|
|
188
|
+
let current = chunks[0];
|
|
189
|
+
for (let i = 1; i < chunks.length; i++) {
|
|
190
|
+
const next = chunks[i];
|
|
191
|
+
const combinedLength = current.text.length + next.text.length + 1;
|
|
192
|
+
if (combinedLength <= maxChars && current.text.length < minChars) {
|
|
193
|
+
current = {
|
|
194
|
+
text: current.text + "\n" + next.text,
|
|
195
|
+
startLine: current.startLine,
|
|
196
|
+
endLine: next.endLine,
|
|
197
|
+
symbolName: current.symbolName ?? next.symbolName,
|
|
198
|
+
scopeChain: current.scopeChain ?? next.scopeChain,
|
|
199
|
+
chunkType: current.symbolName ? current.chunkType : next.symbolName ? next.chunkType : "mixed"
|
|
200
|
+
};
|
|
201
|
+
} else {
|
|
202
|
+
merged.push(current);
|
|
203
|
+
current = next;
|
|
204
|
+
}
|
|
205
|
+
}
|
|
206
|
+
merged.push(current);
|
|
207
|
+
return merged;
|
|
208
|
+
}
|
|
209
|
+
async function chunkByAST(content, filePath, language, grammarName, options) {
|
|
210
|
+
const maxChars = options?.maxChunkChars ?? 1500;
|
|
211
|
+
const minChars = options?.minChunkChars ?? 100;
|
|
212
|
+
const lang = await loadLanguage(grammarName, options?.wasmBasePath);
|
|
213
|
+
const { Parser } = await getTreeSitter();
|
|
214
|
+
const parser = new Parser();
|
|
215
|
+
parser.setLanguage(lang);
|
|
216
|
+
const tree = parser.parse(content);
|
|
217
|
+
const rawChunks = extractChunks(tree.rootNode, "", maxChars, minChars);
|
|
218
|
+
if (rawChunks.length === 0) {
|
|
219
|
+
return [{
|
|
220
|
+
content: `// File: ${filePath}
|
|
221
|
+
${content}`,
|
|
222
|
+
filePath,
|
|
223
|
+
startLine: 1,
|
|
224
|
+
endLine: content.split("\n").length,
|
|
225
|
+
language,
|
|
226
|
+
chunkType: "module"
|
|
227
|
+
}];
|
|
228
|
+
}
|
|
229
|
+
return rawChunks.map((chunk) => ({
|
|
230
|
+
content: `// File: ${filePath}
|
|
231
|
+
${chunk.scopeChain ? `// Scope: ${chunk.scopeChain}
|
|
232
|
+
` : ""}${chunk.text}`,
|
|
233
|
+
filePath,
|
|
234
|
+
startLine: chunk.startLine,
|
|
235
|
+
endLine: chunk.endLine,
|
|
236
|
+
language,
|
|
237
|
+
symbolName: chunk.symbolName,
|
|
238
|
+
scopeChain: chunk.scopeChain,
|
|
239
|
+
chunkType: chunk.chunkType
|
|
240
|
+
}));
|
|
241
|
+
}
|
|
242
|
+
|
|
243
|
+
// src/chunking/index.ts
|
|
244
|
+
async function chunkFile(content, filePath, language, options) {
|
|
245
|
+
const grammarName = getTreeSitterGrammar(filePath);
|
|
246
|
+
if (grammarName && AST_SUPPORTED_LANGUAGES.has(grammarName)) {
|
|
247
|
+
try {
|
|
248
|
+
return await chunkByAST(content, filePath, language, grammarName, options);
|
|
249
|
+
} catch (err) {
|
|
250
|
+
console.warn(`AST chunking failed for ${filePath}, falling back to line-based:`, err);
|
|
251
|
+
}
|
|
252
|
+
}
|
|
253
|
+
return chunkByLines(content, filePath, language, options);
|
|
254
|
+
}
|
|
255
|
+
|
|
256
|
+
export {
|
|
257
|
+
chunkByLines,
|
|
258
|
+
chunkByAST,
|
|
259
|
+
chunkFile
|
|
260
|
+
};
|
|
@@ -0,0 +1,91 @@
|
|
|
1
|
+
// src/search/fusion.ts
|
|
2
|
+
function reciprocalRankFusion(resultSets, k = 60) {
|
|
3
|
+
const scoreMap = /* @__PURE__ */ new Map();
|
|
4
|
+
for (const { label, results } of resultSets) {
|
|
5
|
+
for (let rank = 0; rank < results.length; rank++) {
|
|
6
|
+
const result = results[rank];
|
|
7
|
+
const rrfScore = 1 / (k + rank + 1);
|
|
8
|
+
const existing = scoreMap.get(result.id);
|
|
9
|
+
if (existing) {
|
|
10
|
+
existing.score += rrfScore;
|
|
11
|
+
existing.sources.push(label);
|
|
12
|
+
} else {
|
|
13
|
+
scoreMap.set(result.id, {
|
|
14
|
+
score: rrfScore,
|
|
15
|
+
sources: [label]
|
|
16
|
+
});
|
|
17
|
+
}
|
|
18
|
+
}
|
|
19
|
+
}
|
|
20
|
+
const fused = [];
|
|
21
|
+
for (const [id, { score, sources }] of scoreMap) {
|
|
22
|
+
fused.push({ id, fusedScore: score, sources });
|
|
23
|
+
}
|
|
24
|
+
fused.sort((a, b) => b.fusedScore - a.fusedScore);
|
|
25
|
+
return fused;
|
|
26
|
+
}
|
|
27
|
+
|
|
28
|
+
// src/search/deduplicator.ts
|
|
29
|
+
function deduplicateChunks(chunks) {
|
|
30
|
+
if (chunks.length <= 1) return chunks;
|
|
31
|
+
const byFile = /* @__PURE__ */ new Map();
|
|
32
|
+
for (const chunk of chunks) {
|
|
33
|
+
const group = byFile.get(chunk.filePath);
|
|
34
|
+
if (group) {
|
|
35
|
+
group.push(chunk);
|
|
36
|
+
} else {
|
|
37
|
+
byFile.set(chunk.filePath, [chunk]);
|
|
38
|
+
}
|
|
39
|
+
}
|
|
40
|
+
const result = [];
|
|
41
|
+
for (const [, fileChunks] of byFile) {
|
|
42
|
+
if (fileChunks.length === 1) {
|
|
43
|
+
result.push(fileChunks[0]);
|
|
44
|
+
continue;
|
|
45
|
+
}
|
|
46
|
+
fileChunks.sort((a, b) => a.startLine - b.startLine);
|
|
47
|
+
const merged = mergeOverlapping(fileChunks);
|
|
48
|
+
result.push(...merged);
|
|
49
|
+
}
|
|
50
|
+
result.sort((a, b) => b.score - a.score);
|
|
51
|
+
return result;
|
|
52
|
+
}
|
|
53
|
+
function mergeOverlapping(chunks) {
|
|
54
|
+
const merged = [];
|
|
55
|
+
let current = { ...chunks[0] };
|
|
56
|
+
for (let i = 1; i < chunks.length; i++) {
|
|
57
|
+
const next = chunks[i];
|
|
58
|
+
if (next.startLine <= current.endLine + 1) {
|
|
59
|
+
const currentLines = current.content.split("\n");
|
|
60
|
+
const nextLines = next.content.split("\n");
|
|
61
|
+
const overlapStart = next.startLine - current.startLine;
|
|
62
|
+
const newLines = nextLines.slice(current.endLine - next.startLine + 1);
|
|
63
|
+
if (newLines.length > 0) {
|
|
64
|
+
current = {
|
|
65
|
+
...current,
|
|
66
|
+
content: currentLines.join("\n") + "\n" + newLines.join("\n"),
|
|
67
|
+
endLine: Math.max(current.endLine, next.endLine),
|
|
68
|
+
score: Math.max(current.score, next.score),
|
|
69
|
+
// Keep symbolName from higher-scored chunk
|
|
70
|
+
symbolName: current.score >= next.score ? current.symbolName : next.symbolName
|
|
71
|
+
};
|
|
72
|
+
} else {
|
|
73
|
+
current = {
|
|
74
|
+
...current,
|
|
75
|
+
endLine: Math.max(current.endLine, next.endLine),
|
|
76
|
+
score: Math.max(current.score, next.score)
|
|
77
|
+
};
|
|
78
|
+
}
|
|
79
|
+
} else {
|
|
80
|
+
merged.push(current);
|
|
81
|
+
current = { ...next };
|
|
82
|
+
}
|
|
83
|
+
}
|
|
84
|
+
merged.push(current);
|
|
85
|
+
return merged;
|
|
86
|
+
}
|
|
87
|
+
|
|
88
|
+
export {
|
|
89
|
+
reciprocalRankFusion,
|
|
90
|
+
deduplicateChunks
|
|
91
|
+
};
|
|
@@ -0,0 +1,53 @@
|
|
|
1
|
+
interface CodeChunk {
|
|
2
|
+
content: string;
|
|
3
|
+
filePath: string;
|
|
4
|
+
startLine: number;
|
|
5
|
+
endLine: number;
|
|
6
|
+
language: string;
|
|
7
|
+
/** Name of the top-level symbol (function, class, etc.) this chunk represents. */
|
|
8
|
+
symbolName?: string;
|
|
9
|
+
/** Dot-separated scope chain, e.g. "UserService.login". */
|
|
10
|
+
scopeChain?: string;
|
|
11
|
+
/** What kind of AST node this chunk represents. */
|
|
12
|
+
chunkType?: "function" | "class" | "module" | "block" | "mixed";
|
|
13
|
+
}
|
|
14
|
+
interface ChunkOptions {
|
|
15
|
+
/** Max characters per chunk (default: 1500). */
|
|
16
|
+
maxChunkChars?: number;
|
|
17
|
+
/** Min characters to avoid tiny chunks (default: 100). */
|
|
18
|
+
minChunkChars?: number;
|
|
19
|
+
/** Number of overlap lines for line-based fallback (default: 10). */
|
|
20
|
+
overlapLines?: number;
|
|
21
|
+
/** Max lines per chunk for line-based fallback (default: 50). */
|
|
22
|
+
maxChunkLines?: number;
|
|
23
|
+
}
|
|
24
|
+
|
|
25
|
+
interface ASTChunkerOptions extends ChunkOptions {
|
|
26
|
+
/** Base path for WASM grammar files. If not set, resolves from node_modules. */
|
|
27
|
+
wasmBasePath?: string;
|
|
28
|
+
}
|
|
29
|
+
/**
|
|
30
|
+
* AST-aware chunker using tree-sitter.
|
|
31
|
+
*
|
|
32
|
+
* Algorithm: parse AST → if node fits budget, keep as one chunk →
|
|
33
|
+
* if too large, recurse into children → greedily merge adjacent small siblings.
|
|
34
|
+
*
|
|
35
|
+
* Each chunk is enriched with file path, scope chain, and symbol name.
|
|
36
|
+
*/
|
|
37
|
+
declare function chunkByAST(content: string, filePath: string, language: string, grammarName: string, options?: ASTChunkerOptions): Promise<CodeChunk[]>;
|
|
38
|
+
|
|
39
|
+
/**
|
|
40
|
+
* Line-based chunker — splits file content into fixed-size chunks with overlap.
|
|
41
|
+
* Used as fallback when AST parsing is unavailable for a language.
|
|
42
|
+
*/
|
|
43
|
+
declare function chunkByLines(content: string, filePath: string, language: string, options?: ChunkOptions): CodeChunk[];
|
|
44
|
+
|
|
45
|
+
/**
|
|
46
|
+
* Main entry point for chunking a file.
|
|
47
|
+
*
|
|
48
|
+
* Tries AST-aware chunking if tree-sitter supports the language,
|
|
49
|
+
* falls back to line-based chunking otherwise.
|
|
50
|
+
*/
|
|
51
|
+
declare function chunkFile(content: string, filePath: string, language: string, options?: ASTChunkerOptions): Promise<CodeChunk[]>;
|
|
52
|
+
|
|
53
|
+
export { type ASTChunkerOptions, type ChunkOptions, type CodeChunk, chunkByAST, chunkByLines, chunkFile };
|
|
@@ -0,0 +1,94 @@
|
|
|
1
|
+
import { a as ScoredChunk } from '../types-B7duBj6U.js';
|
|
2
|
+
|
|
3
|
+
interface CitationDocumentBlock {
|
|
4
|
+
type: "document";
|
|
5
|
+
source: {
|
|
6
|
+
type: "content";
|
|
7
|
+
content: Array<{
|
|
8
|
+
type: "text";
|
|
9
|
+
text: string;
|
|
10
|
+
}>;
|
|
11
|
+
} | {
|
|
12
|
+
type: "text";
|
|
13
|
+
media_type: "text/plain";
|
|
14
|
+
data: string;
|
|
15
|
+
};
|
|
16
|
+
title?: string | null;
|
|
17
|
+
context?: string | null;
|
|
18
|
+
citations: {
|
|
19
|
+
enabled: true;
|
|
20
|
+
};
|
|
21
|
+
}
|
|
22
|
+
interface ChunkMapping {
|
|
23
|
+
/** Position in the content array of the document block. */
|
|
24
|
+
blockIndex: number;
|
|
25
|
+
startLine: number;
|
|
26
|
+
endLine: number;
|
|
27
|
+
language: string;
|
|
28
|
+
symbolName?: string;
|
|
29
|
+
score: number;
|
|
30
|
+
}
|
|
31
|
+
interface DocumentMapping {
|
|
32
|
+
/** Index of this document in the documents array sent to Anthropic. */
|
|
33
|
+
documentIndex: number;
|
|
34
|
+
filePath: string;
|
|
35
|
+
sourceType: "code" | "plaintext";
|
|
36
|
+
chunks: ChunkMapping[];
|
|
37
|
+
}
|
|
38
|
+
interface ResolvedCitation {
|
|
39
|
+
/** 1-indexed display number. */
|
|
40
|
+
index: number;
|
|
41
|
+
citedText: string;
|
|
42
|
+
documentTitle: string | null;
|
|
43
|
+
filePath: string;
|
|
44
|
+
sourceType: "code" | "plaintext";
|
|
45
|
+
startLine?: number;
|
|
46
|
+
endLine?: number;
|
|
47
|
+
language?: string;
|
|
48
|
+
symbolName?: string;
|
|
49
|
+
/** Character position in the response text where this citation appears. */
|
|
50
|
+
textOffset: number;
|
|
51
|
+
}
|
|
52
|
+
interface CitationDocumentOptions {
|
|
53
|
+
/** Extra plain-text documents to include (e.g. spec content, commit history). */
|
|
54
|
+
additionalDocuments?: Array<{
|
|
55
|
+
title: string;
|
|
56
|
+
content: string;
|
|
57
|
+
}>;
|
|
58
|
+
}
|
|
59
|
+
interface RawCitation {
|
|
60
|
+
type: "content_block_location" | "char_location";
|
|
61
|
+
cited_text: string;
|
|
62
|
+
document_index: number;
|
|
63
|
+
document_title: string | null;
|
|
64
|
+
start_block_index?: number;
|
|
65
|
+
end_block_index?: number;
|
|
66
|
+
start_char_index?: number;
|
|
67
|
+
end_char_index?: number;
|
|
68
|
+
}
|
|
69
|
+
|
|
70
|
+
interface BuildResult {
|
|
71
|
+
documents: CitationDocumentBlock[];
|
|
72
|
+
documentMap: DocumentMapping[];
|
|
73
|
+
}
|
|
74
|
+
/**
|
|
75
|
+
* Converts deduplicated ScoredChunk[] into Anthropic-compatible citation
|
|
76
|
+
* document blocks with a mapping table for resolving raw citations back
|
|
77
|
+
* to source file/line metadata.
|
|
78
|
+
*
|
|
79
|
+
* Pipeline position: after deduplicateChunks() → before Anthropic API call.
|
|
80
|
+
*/
|
|
81
|
+
declare function buildCitationDocuments(chunks: ScoredChunk[], options?: CitationDocumentOptions): BuildResult;
|
|
82
|
+
|
|
83
|
+
/**
|
|
84
|
+
* Resolves a single raw Anthropic citation against the document map,
|
|
85
|
+
* producing a fully enriched ResolvedCitation with file/line metadata.
|
|
86
|
+
*/
|
|
87
|
+
declare function resolveCitation(raw: RawCitation, documentMap: DocumentMapping[], citationIndex: number, textOffset: number): ResolvedCitation;
|
|
88
|
+
/**
|
|
89
|
+
* Batch-resolves an array of raw citations. Auto-increments citation index
|
|
90
|
+
* starting from 1.
|
|
91
|
+
*/
|
|
92
|
+
declare function resolveCitations(raws: RawCitation[], documentMap: DocumentMapping[], textOffsets: number[]): ResolvedCitation[];
|
|
93
|
+
|
|
94
|
+
export { type ChunkMapping, type CitationDocumentBlock, type CitationDocumentOptions, type DocumentMapping, type RawCitation, type ResolvedCitation, buildCitationDocuments, resolveCitation, resolveCitations };
|
|
@@ -0,0 +1,44 @@
|
|
|
1
|
+
interface EmbeddingOptions {
|
|
2
|
+
/** API key for the embedding provider. */
|
|
3
|
+
apiKey: string;
|
|
4
|
+
/** Whether the input is a document (for indexing) or a query (for search). */
|
|
5
|
+
inputType: "document" | "query";
|
|
6
|
+
/** Number of dimensions for the embedding vector (default: 1024). */
|
|
7
|
+
dimensions?: number;
|
|
8
|
+
/** Max characters per text before truncation (default: 32000). */
|
|
9
|
+
maxChars?: number;
|
|
10
|
+
/** Number of texts to embed per API call (default: 128). */
|
|
11
|
+
batchSize?: number;
|
|
12
|
+
}
|
|
13
|
+
interface RerankOptions {
|
|
14
|
+
/** API key for the reranking provider. */
|
|
15
|
+
apiKey: string;
|
|
16
|
+
/** Number of top results to return (default: 15). */
|
|
17
|
+
topK?: number;
|
|
18
|
+
/** Model to use for reranking (default: "rerank-2.5"). */
|
|
19
|
+
model?: string;
|
|
20
|
+
}
|
|
21
|
+
interface EmbeddingResult {
|
|
22
|
+
embedding: number[];
|
|
23
|
+
index: number;
|
|
24
|
+
}
|
|
25
|
+
interface RerankResult {
|
|
26
|
+
index: number;
|
|
27
|
+
relevanceScore: number;
|
|
28
|
+
}
|
|
29
|
+
|
|
30
|
+
/**
|
|
31
|
+
* Generate embeddings using Voyage AI's code-optimized model.
|
|
32
|
+
*
|
|
33
|
+
* Supports asymmetric embedding: use inputType "document" for indexing
|
|
34
|
+
* and "query" for search queries.
|
|
35
|
+
*/
|
|
36
|
+
declare function generateEmbeddings(texts: string[], options: EmbeddingOptions): Promise<number[][]>;
|
|
37
|
+
/**
|
|
38
|
+
* Rerank documents using Voyage AI's cross-encoder model.
|
|
39
|
+
*
|
|
40
|
+
* Takes a query and candidate documents, returns them reranked by relevance.
|
|
41
|
+
*/
|
|
42
|
+
declare function rerankResults(query: string, documents: string[], options: RerankOptions): Promise<RerankResult[]>;
|
|
43
|
+
|
|
44
|
+
export { type EmbeddingOptions, type EmbeddingResult, type RerankOptions, type RerankResult, generateEmbeddings, rerankResults };
|
|
@@ -0,0 +1,43 @@
|
|
|
1
|
+
import { CompiledSegment, CompiledBlock } from '../types/index.js';
|
|
2
|
+
|
|
3
|
+
/**
|
|
4
|
+
* Context generation for AI agents.
|
|
5
|
+
*
|
|
6
|
+
* Generates AI-ready context documents from compiled fragment data.
|
|
7
|
+
*/
|
|
8
|
+
|
|
9
|
+
/**
|
|
10
|
+
* Placeholder patterns to filter out from usage text.
|
|
11
|
+
*/
|
|
12
|
+
declare const PLACEHOLDER_PATTERNS: RegExp[];
|
|
13
|
+
/**
|
|
14
|
+
* Filter out placeholder text from usage arrays
|
|
15
|
+
*/
|
|
16
|
+
declare function filterPlaceholders(items: string[] | undefined): string[];
|
|
17
|
+
/**
|
|
18
|
+
* Options for context generation
|
|
19
|
+
*/
|
|
20
|
+
interface ContextOptions {
|
|
21
|
+
format?: "markdown" | "json";
|
|
22
|
+
include?: {
|
|
23
|
+
props?: boolean;
|
|
24
|
+
variants?: boolean;
|
|
25
|
+
usage?: boolean;
|
|
26
|
+
relations?: boolean;
|
|
27
|
+
code?: boolean;
|
|
28
|
+
};
|
|
29
|
+
compact?: boolean;
|
|
30
|
+
}
|
|
31
|
+
/**
|
|
32
|
+
* Result of context generation
|
|
33
|
+
*/
|
|
34
|
+
interface ContextResult {
|
|
35
|
+
content: string;
|
|
36
|
+
tokenEstimate: number;
|
|
37
|
+
}
|
|
38
|
+
/**
|
|
39
|
+
* Generate AI-ready context from compiled segments and optional blocks
|
|
40
|
+
*/
|
|
41
|
+
declare function generateContext(segments: CompiledSegment[], options?: ContextOptions, blocks?: CompiledBlock[]): ContextResult;
|
|
42
|
+
|
|
43
|
+
export { type ContextOptions, type ContextResult, PLACEHOLDER_PATTERNS, filterPlaceholders, generateContext };
|
package/dist/index.d.ts
ADDED
|
@@ -0,0 +1,8 @@
|
|
|
1
|
+
export { AIMetadata, CompiledBlock, CompiledSegment, CompiledSegmentsFile, CompiledTokenData, CompiledTokenEntry, ComponentRelation, PropDefinition, SegmentContract, SegmentGenerated, SegmentMeta, SegmentUsage, Theme, VerifyResult } from './types/index.js';
|
|
2
|
+
export { ContextOptions, ContextResult, PLACEHOLDER_PATTERNS, filterPlaceholders, generateContext } from './generate/index.js';
|
|
3
|
+
export { ASTChunkerOptions, ChunkOptions, CodeChunk, chunkByAST, chunkByLines, chunkFile } from './chunking/index.js';
|
|
4
|
+
export { EmbeddingOptions, EmbeddingResult, RerankOptions, RerankResult, generateEmbeddings, rerankResults } from './embeddings/voyage.js';
|
|
5
|
+
export { deduplicateChunks, reciprocalRankFusion } from './search/index.js';
|
|
6
|
+
export { F as FusedResult, R as RankedResult, a as ScoredChunk, S as SearchResult } from './types-B7duBj6U.js';
|
|
7
|
+
export { AST_SUPPORTED_LANGUAGES, ChangedFiles, FileEntry, GrammarMapping, INDEXABLE_EXTENSIONS, detectLanguage, getTreeSitterGrammar, hashContent, resolveChanges, shouldIndexFile } from './indexing/index.js';
|
|
8
|
+
export { ChunkMapping, CitationDocumentBlock, CitationDocumentOptions, DocumentMapping, RawCitation, ResolvedCitation, buildCitationDocuments, resolveCitation, resolveCitations } from './citations/index.js';
|
package/dist/index.js
ADDED
|
@@ -0,0 +1,56 @@
|
|
|
1
|
+
import {
|
|
2
|
+
chunkByAST,
|
|
3
|
+
chunkByLines,
|
|
4
|
+
chunkFile
|
|
5
|
+
} from "./chunk-KQIRG24U.js";
|
|
6
|
+
import {
|
|
7
|
+
generateEmbeddings,
|
|
8
|
+
rerankResults
|
|
9
|
+
} from "./chunk-3VPR67FN.js";
|
|
10
|
+
import {
|
|
11
|
+
deduplicateChunks,
|
|
12
|
+
reciprocalRankFusion
|
|
13
|
+
} from "./chunk-ZMBYQK43.js";
|
|
14
|
+
import {
|
|
15
|
+
hashContent,
|
|
16
|
+
resolveChanges
|
|
17
|
+
} from "./chunk-HINI3FCI.js";
|
|
18
|
+
import {
|
|
19
|
+
AST_SUPPORTED_LANGUAGES,
|
|
20
|
+
INDEXABLE_EXTENSIONS,
|
|
21
|
+
detectLanguage,
|
|
22
|
+
getTreeSitterGrammar,
|
|
23
|
+
shouldIndexFile
|
|
24
|
+
} from "./chunk-JFV27WLV.js";
|
|
25
|
+
import {
|
|
26
|
+
PLACEHOLDER_PATTERNS,
|
|
27
|
+
filterPlaceholders,
|
|
28
|
+
generateContext
|
|
29
|
+
} from "./chunk-KKABP4K4.js";
|
|
30
|
+
import {
|
|
31
|
+
buildCitationDocuments,
|
|
32
|
+
resolveCitation,
|
|
33
|
+
resolveCitations
|
|
34
|
+
} from "./chunk-3FEHRHFQ.js";
|
|
35
|
+
export {
|
|
36
|
+
AST_SUPPORTED_LANGUAGES,
|
|
37
|
+
INDEXABLE_EXTENSIONS,
|
|
38
|
+
PLACEHOLDER_PATTERNS,
|
|
39
|
+
buildCitationDocuments,
|
|
40
|
+
chunkByAST,
|
|
41
|
+
chunkByLines,
|
|
42
|
+
chunkFile,
|
|
43
|
+
deduplicateChunks,
|
|
44
|
+
detectLanguage,
|
|
45
|
+
filterPlaceholders,
|
|
46
|
+
generateContext,
|
|
47
|
+
generateEmbeddings,
|
|
48
|
+
getTreeSitterGrammar,
|
|
49
|
+
hashContent,
|
|
50
|
+
reciprocalRankFusion,
|
|
51
|
+
rerankResults,
|
|
52
|
+
resolveChanges,
|
|
53
|
+
resolveCitation,
|
|
54
|
+
resolveCitations,
|
|
55
|
+
shouldIndexFile
|
|
56
|
+
};
|