@fragments-sdk/context 0.1.2 → 0.1.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +21 -0
- package/README.md +62 -0
- package/package.json +10 -10
- package/dist/chunk-3VPR67FN.js +0 -76
- package/dist/chunk-HINI3FCI.js +0 -42
- package/dist/chunk-JFV27WLV.js +0 -168
- package/dist/chunk-KQIRG24U.js +0 -260
- package/dist/chunk-ZMBYQK43.js +0 -91
- package/dist/chunking/index.d.ts +0 -53
- package/dist/chunking/index.js +0 -11
- package/dist/embeddings/voyage.d.ts +0 -44
- package/dist/embeddings/voyage.js +0 -8
- package/dist/index.d.ts +0 -4
- package/dist/index.js +0 -40
- package/dist/indexing/index.d.ts +0 -52
- package/dist/indexing/index.js +0 -20
- package/dist/search/index.d.ts +0 -64
- package/dist/search/index.js +0 -8
package/LICENSE
ADDED
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
MIT License
|
|
2
|
+
|
|
3
|
+
Copyright (c) 2024 Conan McNicholl
|
|
4
|
+
|
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
7
|
+
in the Software without restriction, including without limitation the rights
|
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
10
|
+
furnished to do so, subject to the following conditions:
|
|
11
|
+
|
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
|
13
|
+
copies or substantial portions of the Software.
|
|
14
|
+
|
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
21
|
+
SOFTWARE.
|
package/README.md
ADDED
|
@@ -0,0 +1,62 @@
|
|
|
1
|
+
# @fragments-sdk/context
|
|
2
|
+
|
|
3
|
+
Code intelligence and RAG primitives for codebase indexing and search.
|
|
4
|
+
|
|
5
|
+
## Modules
|
|
6
|
+
|
|
7
|
+
### Chunking (`@fragments-sdk/context/chunking`)
|
|
8
|
+
|
|
9
|
+
Split source files into semantically meaningful chunks for embedding.
|
|
10
|
+
|
|
11
|
+
- **`chunkFile(content, filePath, language)`** — Auto-selects AST or line-based chunking
|
|
12
|
+
- **`chunkByAST(content, filePath, language, grammar)`** — Tree-sitter AST-aware chunking (preserves functions, classes, etc.)
|
|
13
|
+
- **`chunkByLines(content, filePath, language)`** — Sliding-window fallback
|
|
14
|
+
|
|
15
|
+
AST chunking supports: TypeScript, TSX, JavaScript, Python, Go, Rust, Java.
|
|
16
|
+
|
|
17
|
+
```ts
|
|
18
|
+
import { chunkFile } from "@fragments-sdk/context/chunking";
|
|
19
|
+
|
|
20
|
+
const chunks = await chunkFile(code, "src/auth.ts", "typescript");
|
|
21
|
+
// Each chunk has: content, filePath, startLine, endLine, language, symbolName?, scopeChain?
|
|
22
|
+
```
|
|
23
|
+
|
|
24
|
+
### Embeddings (`@fragments-sdk/context/embeddings`)
|
|
25
|
+
|
|
26
|
+
Generate code-optimized embeddings via Voyage AI.
|
|
27
|
+
|
|
28
|
+
- **`generateEmbeddings(texts, options)`** — Batch embed with `voyage-code-3`
|
|
29
|
+
- **`rerankResults(query, documents, options)`** — Cross-encoder reranking with `rerank-2.5`
|
|
30
|
+
|
|
31
|
+
```ts
|
|
32
|
+
import { generateEmbeddings } from "@fragments-sdk/context/embeddings";
|
|
33
|
+
|
|
34
|
+
const vectors = await generateEmbeddings(chunks.map(c => c.content), {
|
|
35
|
+
apiKey: process.env.VOYAGE_API_KEY!,
|
|
36
|
+
inputType: "document",
|
|
37
|
+
});
|
|
38
|
+
```
|
|
39
|
+
|
|
40
|
+
### Search (`@fragments-sdk/context/search`)
|
|
41
|
+
|
|
42
|
+
Post-retrieval utilities for combining and deduplicating results.
|
|
43
|
+
|
|
44
|
+
- **`reciprocalRankFusion(resultSets)`** — Merge multiple ranked lists (e.g. vector + keyword)
|
|
45
|
+
- **`deduplicateChunks(chunks)`** — Remove near-duplicate results
|
|
46
|
+
|
|
47
|
+
### Indexing (`@fragments-sdk/context/indexing`)
|
|
48
|
+
|
|
49
|
+
File filtering, change detection, and language detection.
|
|
50
|
+
|
|
51
|
+
- **`shouldIndexFile(path, size)`** — Filter by extension, size, ignored dirs
|
|
52
|
+
- **`detectLanguage(path)`** — Map file extension to language name
|
|
53
|
+
- **`resolveChanges(oldFiles, newFiles)`** — Diff two file trees for incremental re-indexing
|
|
54
|
+
- **`hashContent(content)`** — Content hashing for change detection
|
|
55
|
+
|
|
56
|
+
## Install
|
|
57
|
+
|
|
58
|
+
```sh
|
|
59
|
+
pnpm add @fragments-sdk/context
|
|
60
|
+
```
|
|
61
|
+
|
|
62
|
+
Requires Node.js 18+. Tree-sitter WASM grammars are included as dependencies.
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@fragments-sdk/context",
|
|
3
|
-
"version": "0.1.
|
|
3
|
+
"version": "0.1.3",
|
|
4
4
|
"description": "Code intelligence and RAG primitives for design system and codebase indexing",
|
|
5
5
|
"type": "module",
|
|
6
6
|
"main": "./dist/index.js",
|
|
@@ -34,14 +34,6 @@
|
|
|
34
34
|
"files": [
|
|
35
35
|
"dist"
|
|
36
36
|
],
|
|
37
|
-
"scripts": {
|
|
38
|
-
"build": "tsup",
|
|
39
|
-
"dev": "tsup --watch",
|
|
40
|
-
"lint": "eslint src",
|
|
41
|
-
"test": "vitest run",
|
|
42
|
-
"typecheck": "tsc --noEmit",
|
|
43
|
-
"clean": "rm -rf dist"
|
|
44
|
-
},
|
|
45
37
|
"dependencies": {
|
|
46
38
|
"tree-sitter-go": "^0.25.0",
|
|
47
39
|
"tree-sitter-java": "^0.23.5",
|
|
@@ -56,5 +48,13 @@
|
|
|
56
48
|
"tsup": "^8.3.5",
|
|
57
49
|
"typescript": "^5.7.2",
|
|
58
50
|
"vitest": "^2.1.8"
|
|
51
|
+
},
|
|
52
|
+
"scripts": {
|
|
53
|
+
"build": "tsup",
|
|
54
|
+
"dev": "tsup --watch",
|
|
55
|
+
"lint": "eslint src",
|
|
56
|
+
"test": "vitest run --passWithNoTests",
|
|
57
|
+
"typecheck": "tsc --noEmit",
|
|
58
|
+
"clean": "rm -rf dist"
|
|
59
59
|
}
|
|
60
|
-
}
|
|
60
|
+
}
|
package/dist/chunk-3VPR67FN.js
DELETED
|
@@ -1,76 +0,0 @@
|
|
|
1
|
-
// src/embeddings/voyage.ts
|
|
2
|
-
var VOYAGE_EMBEDDINGS_URL = "https://api.voyageai.com/v1/embeddings";
|
|
3
|
-
var VOYAGE_RERANK_URL = "https://api.voyageai.com/v1/rerank";
|
|
4
|
-
var DEFAULT_MODEL = "voyage-code-3";
|
|
5
|
-
var DEFAULT_RERANK_MODEL = "rerank-2.5";
|
|
6
|
-
var DEFAULT_DIMENSIONS = 1024;
|
|
7
|
-
var DEFAULT_MAX_CHARS = 32e3;
|
|
8
|
-
var DEFAULT_BATCH_SIZE = 128;
|
|
9
|
-
async function generateEmbeddings(texts, options) {
|
|
10
|
-
if (texts.length === 0) return [];
|
|
11
|
-
const maxChars = options.maxChars ?? DEFAULT_MAX_CHARS;
|
|
12
|
-
const batchSize = options.batchSize ?? DEFAULT_BATCH_SIZE;
|
|
13
|
-
const dimensions = options.dimensions ?? DEFAULT_DIMENSIONS;
|
|
14
|
-
const truncated = texts.map(
|
|
15
|
-
(t) => t.length > maxChars ? t.slice(0, maxChars) : t
|
|
16
|
-
);
|
|
17
|
-
const results = [];
|
|
18
|
-
for (let i = 0; i < truncated.length; i += batchSize) {
|
|
19
|
-
const batch = truncated.slice(i, i + batchSize);
|
|
20
|
-
const response = await fetch(VOYAGE_EMBEDDINGS_URL, {
|
|
21
|
-
method: "POST",
|
|
22
|
-
headers: {
|
|
23
|
-
"Content-Type": "application/json",
|
|
24
|
-
Authorization: `Bearer ${options.apiKey}`
|
|
25
|
-
},
|
|
26
|
-
body: JSON.stringify({
|
|
27
|
-
model: DEFAULT_MODEL,
|
|
28
|
-
input: batch,
|
|
29
|
-
input_type: options.inputType,
|
|
30
|
-
output_dimension: dimensions
|
|
31
|
-
})
|
|
32
|
-
});
|
|
33
|
-
if (!response.ok) {
|
|
34
|
-
const body = await response.text();
|
|
35
|
-
throw new Error(`Voyage API error ${response.status}: ${body}`);
|
|
36
|
-
}
|
|
37
|
-
const data = await response.json();
|
|
38
|
-
const sorted = data.data.sort((a, b) => a.index - b.index);
|
|
39
|
-
for (const item of sorted) {
|
|
40
|
-
results.push(item.embedding);
|
|
41
|
-
}
|
|
42
|
-
}
|
|
43
|
-
return results;
|
|
44
|
-
}
|
|
45
|
-
async function rerankResults(query, documents, options) {
|
|
46
|
-
if (documents.length === 0) return [];
|
|
47
|
-
const topK = options.topK ?? 15;
|
|
48
|
-
const model = options.model ?? DEFAULT_RERANK_MODEL;
|
|
49
|
-
const response = await fetch(VOYAGE_RERANK_URL, {
|
|
50
|
-
method: "POST",
|
|
51
|
-
headers: {
|
|
52
|
-
"Content-Type": "application/json",
|
|
53
|
-
Authorization: `Bearer ${options.apiKey}`
|
|
54
|
-
},
|
|
55
|
-
body: JSON.stringify({
|
|
56
|
-
model,
|
|
57
|
-
query,
|
|
58
|
-
documents,
|
|
59
|
-
top_k: topK
|
|
60
|
-
})
|
|
61
|
-
});
|
|
62
|
-
if (!response.ok) {
|
|
63
|
-
const body = await response.text();
|
|
64
|
-
throw new Error(`Voyage Rerank API error ${response.status}: ${body}`);
|
|
65
|
-
}
|
|
66
|
-
const data = await response.json();
|
|
67
|
-
return data.data.map((item) => ({
|
|
68
|
-
index: item.index,
|
|
69
|
-
relevanceScore: item.relevance_score
|
|
70
|
-
}));
|
|
71
|
-
}
|
|
72
|
-
|
|
73
|
-
export {
|
|
74
|
-
generateEmbeddings,
|
|
75
|
-
rerankResults
|
|
76
|
-
};
|
package/dist/chunk-HINI3FCI.js
DELETED
|
@@ -1,42 +0,0 @@
|
|
|
1
|
-
// src/indexing/hasher.ts
|
|
2
|
-
import { createHash } from "crypto";
|
|
3
|
-
function hashContent(content) {
|
|
4
|
-
return createHash("sha256").update(content, "utf-8").digest("hex");
|
|
5
|
-
}
|
|
6
|
-
|
|
7
|
-
// src/indexing/diff-resolver.ts
|
|
8
|
-
function resolveChanges(oldEntries, newEntries) {
|
|
9
|
-
const oldMap = /* @__PURE__ */ new Map();
|
|
10
|
-
for (const entry of oldEntries) {
|
|
11
|
-
oldMap.set(entry.filePath, entry);
|
|
12
|
-
}
|
|
13
|
-
const newMap = /* @__PURE__ */ new Map();
|
|
14
|
-
for (const entry of newEntries) {
|
|
15
|
-
newMap.set(entry.filePath, entry);
|
|
16
|
-
}
|
|
17
|
-
const added = [];
|
|
18
|
-
const modified = [];
|
|
19
|
-
const unchanged = [];
|
|
20
|
-
const deleted = [];
|
|
21
|
-
for (const [filePath, newEntry] of newMap) {
|
|
22
|
-
const oldEntry = oldMap.get(filePath);
|
|
23
|
-
if (!oldEntry) {
|
|
24
|
-
added.push(filePath);
|
|
25
|
-
} else if (oldEntry.contentHash !== newEntry.contentHash) {
|
|
26
|
-
modified.push(filePath);
|
|
27
|
-
} else {
|
|
28
|
-
unchanged.push(filePath);
|
|
29
|
-
}
|
|
30
|
-
}
|
|
31
|
-
for (const filePath of oldMap.keys()) {
|
|
32
|
-
if (!newMap.has(filePath)) {
|
|
33
|
-
deleted.push(filePath);
|
|
34
|
-
}
|
|
35
|
-
}
|
|
36
|
-
return { added, modified, deleted, unchanged };
|
|
37
|
-
}
|
|
38
|
-
|
|
39
|
-
export {
|
|
40
|
-
hashContent,
|
|
41
|
-
resolveChanges
|
|
42
|
-
};
|
package/dist/chunk-JFV27WLV.js
DELETED
|
@@ -1,168 +0,0 @@
|
|
|
1
|
-
// src/indexing/file-filter.ts
|
|
2
|
-
var INDEXABLE_EXTENSIONS = /* @__PURE__ */ new Set([
|
|
3
|
-
".ts",
|
|
4
|
-
".tsx",
|
|
5
|
-
".js",
|
|
6
|
-
".jsx",
|
|
7
|
-
".mjs",
|
|
8
|
-
".cjs",
|
|
9
|
-
".py",
|
|
10
|
-
".go",
|
|
11
|
-
".rs",
|
|
12
|
-
".java",
|
|
13
|
-
".kt",
|
|
14
|
-
".swift",
|
|
15
|
-
".c",
|
|
16
|
-
".cpp",
|
|
17
|
-
".h",
|
|
18
|
-
".hpp",
|
|
19
|
-
".cs",
|
|
20
|
-
".rb",
|
|
21
|
-
".php",
|
|
22
|
-
".scala",
|
|
23
|
-
".sh",
|
|
24
|
-
".bash",
|
|
25
|
-
".zsh",
|
|
26
|
-
".sql",
|
|
27
|
-
".graphql",
|
|
28
|
-
".gql",
|
|
29
|
-
".css",
|
|
30
|
-
".scss",
|
|
31
|
-
".less",
|
|
32
|
-
".html",
|
|
33
|
-
".svelte",
|
|
34
|
-
".vue",
|
|
35
|
-
".md",
|
|
36
|
-
".mdx",
|
|
37
|
-
".json",
|
|
38
|
-
".yaml",
|
|
39
|
-
".yml",
|
|
40
|
-
".toml",
|
|
41
|
-
".xml",
|
|
42
|
-
".proto",
|
|
43
|
-
".tf",
|
|
44
|
-
".hcl",
|
|
45
|
-
".dockerfile",
|
|
46
|
-
".prisma"
|
|
47
|
-
]);
|
|
48
|
-
var IGNORED_DIRS = [
|
|
49
|
-
"node_modules/",
|
|
50
|
-
"dist/",
|
|
51
|
-
".git/",
|
|
52
|
-
".next/",
|
|
53
|
-
"__pycache__/",
|
|
54
|
-
".cache/",
|
|
55
|
-
"coverage/",
|
|
56
|
-
"build/",
|
|
57
|
-
".turbo/",
|
|
58
|
-
"vendor/",
|
|
59
|
-
".venv/"
|
|
60
|
-
];
|
|
61
|
-
var IGNORED_FILES = [
|
|
62
|
-
"pnpm-lock.yaml",
|
|
63
|
-
"package-lock.json",
|
|
64
|
-
"yarn.lock",
|
|
65
|
-
"bun.lockb",
|
|
66
|
-
"Cargo.lock",
|
|
67
|
-
"go.sum",
|
|
68
|
-
"poetry.lock",
|
|
69
|
-
"Pipfile.lock",
|
|
70
|
-
"composer.lock",
|
|
71
|
-
"Gemfile.lock"
|
|
72
|
-
];
|
|
73
|
-
var MAX_FILE_SIZE = 100 * 1024;
|
|
74
|
-
function shouldIndexFile(path, size) {
|
|
75
|
-
if (size > MAX_FILE_SIZE) return false;
|
|
76
|
-
if (IGNORED_FILES.includes(path.split("/").pop() ?? "")) return false;
|
|
77
|
-
for (const dir of IGNORED_DIRS) {
|
|
78
|
-
if (path.includes(dir)) return false;
|
|
79
|
-
}
|
|
80
|
-
const ext = getExtension(path);
|
|
81
|
-
if (!ext) {
|
|
82
|
-
const basename = path.split("/").pop() ?? "";
|
|
83
|
-
return basename === "Dockerfile" || basename === "Makefile";
|
|
84
|
-
}
|
|
85
|
-
return INDEXABLE_EXTENSIONS.has(ext);
|
|
86
|
-
}
|
|
87
|
-
var LANGUAGE_MAP = {
|
|
88
|
-
".ts": { language: "typescript", grammar: "typescript" },
|
|
89
|
-
".tsx": { language: "typescript", grammar: "tsx" },
|
|
90
|
-
".js": { language: "javascript", grammar: "javascript" },
|
|
91
|
-
".jsx": { language: "javascript", grammar: "javascript" },
|
|
92
|
-
".mjs": { language: "javascript", grammar: "javascript" },
|
|
93
|
-
".cjs": { language: "javascript", grammar: "javascript" },
|
|
94
|
-
".py": { language: "python", grammar: "python" },
|
|
95
|
-
".go": { language: "go", grammar: "go" },
|
|
96
|
-
".rs": { language: "rust", grammar: "rust" },
|
|
97
|
-
".java": { language: "java", grammar: "java" },
|
|
98
|
-
".kt": { language: "kotlin", grammar: "" },
|
|
99
|
-
".swift": { language: "swift", grammar: "" },
|
|
100
|
-
".c": { language: "c", grammar: "" },
|
|
101
|
-
".cpp": { language: "cpp", grammar: "" },
|
|
102
|
-
".h": { language: "c", grammar: "" },
|
|
103
|
-
".hpp": { language: "cpp", grammar: "" },
|
|
104
|
-
".cs": { language: "csharp", grammar: "" },
|
|
105
|
-
".rb": { language: "ruby", grammar: "" },
|
|
106
|
-
".php": { language: "php", grammar: "" },
|
|
107
|
-
".scala": { language: "scala", grammar: "" },
|
|
108
|
-
".sh": { language: "shell", grammar: "" },
|
|
109
|
-
".bash": { language: "shell", grammar: "" },
|
|
110
|
-
".zsh": { language: "shell", grammar: "" },
|
|
111
|
-
".sql": { language: "sql", grammar: "" },
|
|
112
|
-
".graphql": { language: "graphql", grammar: "" },
|
|
113
|
-
".gql": { language: "graphql", grammar: "" },
|
|
114
|
-
".css": { language: "css", grammar: "" },
|
|
115
|
-
".scss": { language: "scss", grammar: "" },
|
|
116
|
-
".less": { language: "less", grammar: "" },
|
|
117
|
-
".html": { language: "html", grammar: "" },
|
|
118
|
-
".svelte": { language: "svelte", grammar: "" },
|
|
119
|
-
".vue": { language: "vue", grammar: "" },
|
|
120
|
-
".md": { language: "markdown", grammar: "" },
|
|
121
|
-
".mdx": { language: "markdown", grammar: "" },
|
|
122
|
-
".json": { language: "json", grammar: "" },
|
|
123
|
-
".yaml": { language: "yaml", grammar: "" },
|
|
124
|
-
".yml": { language: "yaml", grammar: "" },
|
|
125
|
-
".toml": { language: "toml", grammar: "" },
|
|
126
|
-
".xml": { language: "xml", grammar: "" },
|
|
127
|
-
".proto": { language: "protobuf", grammar: "" },
|
|
128
|
-
".tf": { language: "terraform", grammar: "" },
|
|
129
|
-
".hcl": { language: "hcl", grammar: "" },
|
|
130
|
-
".dockerfile": { language: "dockerfile", grammar: "" },
|
|
131
|
-
".prisma": { language: "prisma", grammar: "" }
|
|
132
|
-
};
|
|
133
|
-
var AST_SUPPORTED_LANGUAGES = /* @__PURE__ */ new Set([
|
|
134
|
-
"typescript",
|
|
135
|
-
"tsx",
|
|
136
|
-
"javascript",
|
|
137
|
-
"python",
|
|
138
|
-
"go",
|
|
139
|
-
"rust",
|
|
140
|
-
"java"
|
|
141
|
-
]);
|
|
142
|
-
function detectLanguage(path) {
|
|
143
|
-
const ext = getExtension(path);
|
|
144
|
-
if (ext && ext in LANGUAGE_MAP) return LANGUAGE_MAP[ext].language;
|
|
145
|
-
const basename = path.split("/").pop() ?? "";
|
|
146
|
-
if (basename === "Dockerfile") return "dockerfile";
|
|
147
|
-
if (basename === "Makefile") return "makefile";
|
|
148
|
-
return "unknown";
|
|
149
|
-
}
|
|
150
|
-
function getTreeSitterGrammar(path) {
|
|
151
|
-
const ext = getExtension(path);
|
|
152
|
-
if (ext && ext in LANGUAGE_MAP) return LANGUAGE_MAP[ext].grammar;
|
|
153
|
-
return "";
|
|
154
|
-
}
|
|
155
|
-
function getExtension(path) {
|
|
156
|
-
const basename = path.split("/").pop() ?? "";
|
|
157
|
-
const dotIdx = basename.lastIndexOf(".");
|
|
158
|
-
if (dotIdx <= 0) return "";
|
|
159
|
-
return basename.slice(dotIdx).toLowerCase();
|
|
160
|
-
}
|
|
161
|
-
|
|
162
|
-
export {
|
|
163
|
-
INDEXABLE_EXTENSIONS,
|
|
164
|
-
shouldIndexFile,
|
|
165
|
-
AST_SUPPORTED_LANGUAGES,
|
|
166
|
-
detectLanguage,
|
|
167
|
-
getTreeSitterGrammar
|
|
168
|
-
};
|
package/dist/chunk-KQIRG24U.js
DELETED
|
@@ -1,260 +0,0 @@
|
|
|
1
|
-
import {
|
|
2
|
-
AST_SUPPORTED_LANGUAGES,
|
|
3
|
-
getTreeSitterGrammar
|
|
4
|
-
} from "./chunk-JFV27WLV.js";
|
|
5
|
-
|
|
6
|
-
// src/chunking/line-chunker.ts
|
|
7
|
-
function chunkByLines(content, filePath, language, options) {
|
|
8
|
-
const chunkSize = options?.maxChunkLines ?? 50;
|
|
9
|
-
const overlap = options?.overlapLines ?? 10;
|
|
10
|
-
const lines = content.split("\n");
|
|
11
|
-
if (lines.length === 0 || lines.length === 1 && lines[0].trim() === "") {
|
|
12
|
-
return [];
|
|
13
|
-
}
|
|
14
|
-
const header = `// File: ${filePath}
|
|
15
|
-
`;
|
|
16
|
-
const chunks = [];
|
|
17
|
-
if (lines.length <= chunkSize) {
|
|
18
|
-
chunks.push({
|
|
19
|
-
content: header + lines.join("\n"),
|
|
20
|
-
filePath,
|
|
21
|
-
startLine: 1,
|
|
22
|
-
endLine: lines.length,
|
|
23
|
-
language,
|
|
24
|
-
chunkType: "module"
|
|
25
|
-
});
|
|
26
|
-
return chunks;
|
|
27
|
-
}
|
|
28
|
-
let start = 0;
|
|
29
|
-
while (start < lines.length) {
|
|
30
|
-
const end = Math.min(start + chunkSize, lines.length);
|
|
31
|
-
chunks.push({
|
|
32
|
-
content: header + lines.slice(start, end).join("\n"),
|
|
33
|
-
filePath,
|
|
34
|
-
startLine: start + 1,
|
|
35
|
-
endLine: end,
|
|
36
|
-
language,
|
|
37
|
-
chunkType: "block"
|
|
38
|
-
});
|
|
39
|
-
if (end >= lines.length) break;
|
|
40
|
-
start += chunkSize - overlap;
|
|
41
|
-
}
|
|
42
|
-
return chunks;
|
|
43
|
-
}
|
|
44
|
-
|
|
45
|
-
// src/chunking/ast-chunker.ts
|
|
46
|
-
var SYMBOL_NODE_TYPES = /* @__PURE__ */ new Set([
|
|
47
|
-
// TypeScript / JavaScript
|
|
48
|
-
"function_declaration",
|
|
49
|
-
"method_definition",
|
|
50
|
-
"arrow_function",
|
|
51
|
-
"class_declaration",
|
|
52
|
-
"interface_declaration",
|
|
53
|
-
"type_alias_declaration",
|
|
54
|
-
"enum_declaration",
|
|
55
|
-
"export_statement",
|
|
56
|
-
"lexical_declaration",
|
|
57
|
-
"variable_declaration",
|
|
58
|
-
// Python
|
|
59
|
-
"function_definition",
|
|
60
|
-
"class_definition",
|
|
61
|
-
"decorated_definition",
|
|
62
|
-
// Go
|
|
63
|
-
"function_declaration",
|
|
64
|
-
"method_declaration",
|
|
65
|
-
"type_declaration",
|
|
66
|
-
// Rust
|
|
67
|
-
"function_item",
|
|
68
|
-
"impl_item",
|
|
69
|
-
"struct_item",
|
|
70
|
-
"enum_item",
|
|
71
|
-
"trait_item",
|
|
72
|
-
// Java
|
|
73
|
-
"method_declaration",
|
|
74
|
-
"class_declaration",
|
|
75
|
-
"interface_declaration",
|
|
76
|
-
"enum_declaration",
|
|
77
|
-
"constructor_declaration"
|
|
78
|
-
]);
|
|
79
|
-
var NAME_FIELD_NODES = /* @__PURE__ */ new Set([
|
|
80
|
-
"function_declaration",
|
|
81
|
-
"method_definition",
|
|
82
|
-
"class_declaration",
|
|
83
|
-
"interface_declaration",
|
|
84
|
-
"type_alias_declaration",
|
|
85
|
-
"enum_declaration",
|
|
86
|
-
"function_definition",
|
|
87
|
-
"class_definition",
|
|
88
|
-
"function_item",
|
|
89
|
-
"impl_item",
|
|
90
|
-
"struct_item",
|
|
91
|
-
"enum_item",
|
|
92
|
-
"trait_item",
|
|
93
|
-
"method_declaration",
|
|
94
|
-
"constructor_declaration"
|
|
95
|
-
]);
|
|
96
|
-
var tsModule = null;
|
|
97
|
-
var initialized = false;
|
|
98
|
-
async function getTreeSitter() {
|
|
99
|
-
if (!tsModule) {
|
|
100
|
-
const mod = await import("web-tree-sitter");
|
|
101
|
-
tsModule = mod;
|
|
102
|
-
}
|
|
103
|
-
if (!initialized) {
|
|
104
|
-
await tsModule.Parser.init();
|
|
105
|
-
initialized = true;
|
|
106
|
-
}
|
|
107
|
-
return tsModule;
|
|
108
|
-
}
|
|
109
|
-
var GRAMMAR_PACKAGES = {
|
|
110
|
-
typescript: "tree-sitter-typescript/tree-sitter-typescript.wasm",
|
|
111
|
-
tsx: "tree-sitter-typescript/tree-sitter-tsx.wasm",
|
|
112
|
-
javascript: "tree-sitter-javascript/tree-sitter-javascript.wasm",
|
|
113
|
-
python: "tree-sitter-python/tree-sitter-python.wasm",
|
|
114
|
-
go: "tree-sitter-go/tree-sitter-go.wasm",
|
|
115
|
-
rust: "tree-sitter-rust/tree-sitter-rust.wasm",
|
|
116
|
-
java: "tree-sitter-java/tree-sitter-java.wasm"
|
|
117
|
-
};
|
|
118
|
-
var languageCache = /* @__PURE__ */ new Map();
|
|
119
|
-
async function loadLanguage(grammarName, wasmBasePath) {
|
|
120
|
-
const cached = languageCache.get(grammarName);
|
|
121
|
-
if (cached) return cached;
|
|
122
|
-
const { Language } = await getTreeSitter();
|
|
123
|
-
const grammarFile = GRAMMAR_PACKAGES[grammarName];
|
|
124
|
-
if (!grammarFile) {
|
|
125
|
-
throw new Error(`No grammar available for: ${grammarName}`);
|
|
126
|
-
}
|
|
127
|
-
let wasmPath;
|
|
128
|
-
if (wasmBasePath) {
|
|
129
|
-
wasmPath = `${wasmBasePath}/${grammarFile}`;
|
|
130
|
-
} else {
|
|
131
|
-
const { createRequire } = await import("module");
|
|
132
|
-
const require2 = createRequire(import.meta.url);
|
|
133
|
-
wasmPath = require2.resolve(grammarFile);
|
|
134
|
-
}
|
|
135
|
-
const lang = await Language.load(wasmPath);
|
|
136
|
-
languageCache.set(grammarName, lang);
|
|
137
|
-
return lang;
|
|
138
|
-
}
|
|
139
|
-
function getNodeName(node) {
|
|
140
|
-
if (!NAME_FIELD_NODES.has(node.type)) return void 0;
|
|
141
|
-
const nameNode = node.childForFieldName("name");
|
|
142
|
-
return nameNode?.text;
|
|
143
|
-
}
|
|
144
|
-
function getChunkType(nodeType) {
|
|
145
|
-
if (nodeType.includes("function") || nodeType.includes("method") || nodeType === "arrow_function" || nodeType === "constructor_declaration") {
|
|
146
|
-
return "function";
|
|
147
|
-
}
|
|
148
|
-
if (nodeType.includes("class") || nodeType.includes("impl") || nodeType.includes("struct") || nodeType.includes("trait") || nodeType.includes("interface") || nodeType.includes("enum")) {
|
|
149
|
-
return "class";
|
|
150
|
-
}
|
|
151
|
-
return "block";
|
|
152
|
-
}
|
|
153
|
-
function extractChunks(node, parentScope, maxChars, minChars) {
|
|
154
|
-
const text = node.text;
|
|
155
|
-
const charCount = text.length;
|
|
156
|
-
if (charCount <= maxChars && SYMBOL_NODE_TYPES.has(node.type)) {
|
|
157
|
-
const name2 = getNodeName(node);
|
|
158
|
-
const scopeChain = parentScope ? name2 ? `${parentScope}.${name2}` : parentScope : name2;
|
|
159
|
-
return [{
|
|
160
|
-
text,
|
|
161
|
-
startLine: node.startPosition.row + 1,
|
|
162
|
-
endLine: node.endPosition.row + 1,
|
|
163
|
-
symbolName: name2,
|
|
164
|
-
scopeChain,
|
|
165
|
-
chunkType: getChunkType(node.type)
|
|
166
|
-
}];
|
|
167
|
-
}
|
|
168
|
-
if (charCount <= maxChars && node.childCount === 0) {
|
|
169
|
-
return [{
|
|
170
|
-
text,
|
|
171
|
-
startLine: node.startPosition.row + 1,
|
|
172
|
-
endLine: node.endPosition.row + 1,
|
|
173
|
-
chunkType: "block"
|
|
174
|
-
}];
|
|
175
|
-
}
|
|
176
|
-
const name = getNodeName(node);
|
|
177
|
-
const currentScope = parentScope ? name ? `${parentScope}.${name}` : parentScope : name ?? "";
|
|
178
|
-
const childChunks = [];
|
|
179
|
-
for (const child of node.namedChildren) {
|
|
180
|
-
const chunks = extractChunks(child, currentScope, maxChars, minChars);
|
|
181
|
-
childChunks.push(...chunks);
|
|
182
|
-
}
|
|
183
|
-
return mergeSmallChunks(childChunks, maxChars, minChars);
|
|
184
|
-
}
|
|
185
|
-
function mergeSmallChunks(chunks, maxChars, minChars) {
|
|
186
|
-
if (chunks.length === 0) return chunks;
|
|
187
|
-
const merged = [];
|
|
188
|
-
let current = chunks[0];
|
|
189
|
-
for (let i = 1; i < chunks.length; i++) {
|
|
190
|
-
const next = chunks[i];
|
|
191
|
-
const combinedLength = current.text.length + next.text.length + 1;
|
|
192
|
-
if (combinedLength <= maxChars && current.text.length < minChars) {
|
|
193
|
-
current = {
|
|
194
|
-
text: current.text + "\n" + next.text,
|
|
195
|
-
startLine: current.startLine,
|
|
196
|
-
endLine: next.endLine,
|
|
197
|
-
symbolName: current.symbolName ?? next.symbolName,
|
|
198
|
-
scopeChain: current.scopeChain ?? next.scopeChain,
|
|
199
|
-
chunkType: current.symbolName ? current.chunkType : next.symbolName ? next.chunkType : "mixed"
|
|
200
|
-
};
|
|
201
|
-
} else {
|
|
202
|
-
merged.push(current);
|
|
203
|
-
current = next;
|
|
204
|
-
}
|
|
205
|
-
}
|
|
206
|
-
merged.push(current);
|
|
207
|
-
return merged;
|
|
208
|
-
}
|
|
209
|
-
async function chunkByAST(content, filePath, language, grammarName, options) {
|
|
210
|
-
const maxChars = options?.maxChunkChars ?? 1500;
|
|
211
|
-
const minChars = options?.minChunkChars ?? 100;
|
|
212
|
-
const lang = await loadLanguage(grammarName, options?.wasmBasePath);
|
|
213
|
-
const { Parser } = await getTreeSitter();
|
|
214
|
-
const parser = new Parser();
|
|
215
|
-
parser.setLanguage(lang);
|
|
216
|
-
const tree = parser.parse(content);
|
|
217
|
-
const rawChunks = extractChunks(tree.rootNode, "", maxChars, minChars);
|
|
218
|
-
if (rawChunks.length === 0) {
|
|
219
|
-
return [{
|
|
220
|
-
content: `// File: ${filePath}
|
|
221
|
-
${content}`,
|
|
222
|
-
filePath,
|
|
223
|
-
startLine: 1,
|
|
224
|
-
endLine: content.split("\n").length,
|
|
225
|
-
language,
|
|
226
|
-
chunkType: "module"
|
|
227
|
-
}];
|
|
228
|
-
}
|
|
229
|
-
return rawChunks.map((chunk) => ({
|
|
230
|
-
content: `// File: ${filePath}
|
|
231
|
-
${chunk.scopeChain ? `// Scope: ${chunk.scopeChain}
|
|
232
|
-
` : ""}${chunk.text}`,
|
|
233
|
-
filePath,
|
|
234
|
-
startLine: chunk.startLine,
|
|
235
|
-
endLine: chunk.endLine,
|
|
236
|
-
language,
|
|
237
|
-
symbolName: chunk.symbolName,
|
|
238
|
-
scopeChain: chunk.scopeChain,
|
|
239
|
-
chunkType: chunk.chunkType
|
|
240
|
-
}));
|
|
241
|
-
}
|
|
242
|
-
|
|
243
|
-
// src/chunking/index.ts
|
|
244
|
-
async function chunkFile(content, filePath, language, options) {
|
|
245
|
-
const grammarName = getTreeSitterGrammar(filePath);
|
|
246
|
-
if (grammarName && AST_SUPPORTED_LANGUAGES.has(grammarName)) {
|
|
247
|
-
try {
|
|
248
|
-
return await chunkByAST(content, filePath, language, grammarName, options);
|
|
249
|
-
} catch (err) {
|
|
250
|
-
console.warn(`AST chunking failed for ${filePath}, falling back to line-based:`, err);
|
|
251
|
-
}
|
|
252
|
-
}
|
|
253
|
-
return chunkByLines(content, filePath, language, options);
|
|
254
|
-
}
|
|
255
|
-
|
|
256
|
-
export {
|
|
257
|
-
chunkByLines,
|
|
258
|
-
chunkByAST,
|
|
259
|
-
chunkFile
|
|
260
|
-
};
|
package/dist/chunk-ZMBYQK43.js
DELETED
|
@@ -1,91 +0,0 @@
|
|
|
1
|
-
// src/search/fusion.ts
|
|
2
|
-
function reciprocalRankFusion(resultSets, k = 60) {
|
|
3
|
-
const scoreMap = /* @__PURE__ */ new Map();
|
|
4
|
-
for (const { label, results } of resultSets) {
|
|
5
|
-
for (let rank = 0; rank < results.length; rank++) {
|
|
6
|
-
const result = results[rank];
|
|
7
|
-
const rrfScore = 1 / (k + rank + 1);
|
|
8
|
-
const existing = scoreMap.get(result.id);
|
|
9
|
-
if (existing) {
|
|
10
|
-
existing.score += rrfScore;
|
|
11
|
-
existing.sources.push(label);
|
|
12
|
-
} else {
|
|
13
|
-
scoreMap.set(result.id, {
|
|
14
|
-
score: rrfScore,
|
|
15
|
-
sources: [label]
|
|
16
|
-
});
|
|
17
|
-
}
|
|
18
|
-
}
|
|
19
|
-
}
|
|
20
|
-
const fused = [];
|
|
21
|
-
for (const [id, { score, sources }] of scoreMap) {
|
|
22
|
-
fused.push({ id, fusedScore: score, sources });
|
|
23
|
-
}
|
|
24
|
-
fused.sort((a, b) => b.fusedScore - a.fusedScore);
|
|
25
|
-
return fused;
|
|
26
|
-
}
|
|
27
|
-
|
|
28
|
-
// src/search/deduplicator.ts
|
|
29
|
-
function deduplicateChunks(chunks) {
|
|
30
|
-
if (chunks.length <= 1) return chunks;
|
|
31
|
-
const byFile = /* @__PURE__ */ new Map();
|
|
32
|
-
for (const chunk of chunks) {
|
|
33
|
-
const group = byFile.get(chunk.filePath);
|
|
34
|
-
if (group) {
|
|
35
|
-
group.push(chunk);
|
|
36
|
-
} else {
|
|
37
|
-
byFile.set(chunk.filePath, [chunk]);
|
|
38
|
-
}
|
|
39
|
-
}
|
|
40
|
-
const result = [];
|
|
41
|
-
for (const [, fileChunks] of byFile) {
|
|
42
|
-
if (fileChunks.length === 1) {
|
|
43
|
-
result.push(fileChunks[0]);
|
|
44
|
-
continue;
|
|
45
|
-
}
|
|
46
|
-
fileChunks.sort((a, b) => a.startLine - b.startLine);
|
|
47
|
-
const merged = mergeOverlapping(fileChunks);
|
|
48
|
-
result.push(...merged);
|
|
49
|
-
}
|
|
50
|
-
result.sort((a, b) => b.score - a.score);
|
|
51
|
-
return result;
|
|
52
|
-
}
|
|
53
|
-
function mergeOverlapping(chunks) {
|
|
54
|
-
const merged = [];
|
|
55
|
-
let current = { ...chunks[0] };
|
|
56
|
-
for (let i = 1; i < chunks.length; i++) {
|
|
57
|
-
const next = chunks[i];
|
|
58
|
-
if (next.startLine <= current.endLine + 1) {
|
|
59
|
-
const currentLines = current.content.split("\n");
|
|
60
|
-
const nextLines = next.content.split("\n");
|
|
61
|
-
const overlapStart = next.startLine - current.startLine;
|
|
62
|
-
const newLines = nextLines.slice(current.endLine - next.startLine + 1);
|
|
63
|
-
if (newLines.length > 0) {
|
|
64
|
-
current = {
|
|
65
|
-
...current,
|
|
66
|
-
content: currentLines.join("\n") + "\n" + newLines.join("\n"),
|
|
67
|
-
endLine: Math.max(current.endLine, next.endLine),
|
|
68
|
-
score: Math.max(current.score, next.score),
|
|
69
|
-
// Keep symbolName from higher-scored chunk
|
|
70
|
-
symbolName: current.score >= next.score ? current.symbolName : next.symbolName
|
|
71
|
-
};
|
|
72
|
-
} else {
|
|
73
|
-
current = {
|
|
74
|
-
...current,
|
|
75
|
-
endLine: Math.max(current.endLine, next.endLine),
|
|
76
|
-
score: Math.max(current.score, next.score)
|
|
77
|
-
};
|
|
78
|
-
}
|
|
79
|
-
} else {
|
|
80
|
-
merged.push(current);
|
|
81
|
-
current = { ...next };
|
|
82
|
-
}
|
|
83
|
-
}
|
|
84
|
-
merged.push(current);
|
|
85
|
-
return merged;
|
|
86
|
-
}
|
|
87
|
-
|
|
88
|
-
export {
|
|
89
|
-
reciprocalRankFusion,
|
|
90
|
-
deduplicateChunks
|
|
91
|
-
};
|
package/dist/chunking/index.d.ts
DELETED
|
@@ -1,53 +0,0 @@
|
|
|
1
|
-
interface CodeChunk {
|
|
2
|
-
content: string;
|
|
3
|
-
filePath: string;
|
|
4
|
-
startLine: number;
|
|
5
|
-
endLine: number;
|
|
6
|
-
language: string;
|
|
7
|
-
/** Name of the top-level symbol (function, class, etc.) this chunk represents. */
|
|
8
|
-
symbolName?: string;
|
|
9
|
-
/** Dot-separated scope chain, e.g. "UserService.login". */
|
|
10
|
-
scopeChain?: string;
|
|
11
|
-
/** What kind of AST node this chunk represents. */
|
|
12
|
-
chunkType?: "function" | "class" | "module" | "block" | "mixed";
|
|
13
|
-
}
|
|
14
|
-
interface ChunkOptions {
|
|
15
|
-
/** Max characters per chunk (default: 1500). */
|
|
16
|
-
maxChunkChars?: number;
|
|
17
|
-
/** Min characters to avoid tiny chunks (default: 100). */
|
|
18
|
-
minChunkChars?: number;
|
|
19
|
-
/** Number of overlap lines for line-based fallback (default: 10). */
|
|
20
|
-
overlapLines?: number;
|
|
21
|
-
/** Max lines per chunk for line-based fallback (default: 50). */
|
|
22
|
-
maxChunkLines?: number;
|
|
23
|
-
}
|
|
24
|
-
|
|
25
|
-
interface ASTChunkerOptions extends ChunkOptions {
|
|
26
|
-
/** Base path for WASM grammar files. If not set, resolves from node_modules. */
|
|
27
|
-
wasmBasePath?: string;
|
|
28
|
-
}
|
|
29
|
-
/**
|
|
30
|
-
* AST-aware chunker using tree-sitter.
|
|
31
|
-
*
|
|
32
|
-
* Algorithm: parse AST → if node fits budget, keep as one chunk →
|
|
33
|
-
* if too large, recurse into children → greedily merge adjacent small siblings.
|
|
34
|
-
*
|
|
35
|
-
* Each chunk is enriched with file path, scope chain, and symbol name.
|
|
36
|
-
*/
|
|
37
|
-
declare function chunkByAST(content: string, filePath: string, language: string, grammarName: string, options?: ASTChunkerOptions): Promise<CodeChunk[]>;
|
|
38
|
-
|
|
39
|
-
/**
|
|
40
|
-
* Line-based chunker — splits file content into fixed-size chunks with overlap.
|
|
41
|
-
* Used as fallback when AST parsing is unavailable for a language.
|
|
42
|
-
*/
|
|
43
|
-
declare function chunkByLines(content: string, filePath: string, language: string, options?: ChunkOptions): CodeChunk[];
|
|
44
|
-
|
|
45
|
-
/**
|
|
46
|
-
* Main entry point for chunking a file.
|
|
47
|
-
*
|
|
48
|
-
* Tries AST-aware chunking if tree-sitter supports the language,
|
|
49
|
-
* falls back to line-based chunking otherwise.
|
|
50
|
-
*/
|
|
51
|
-
declare function chunkFile(content: string, filePath: string, language: string, options?: ASTChunkerOptions): Promise<CodeChunk[]>;
|
|
52
|
-
|
|
53
|
-
export { type ASTChunkerOptions, type ChunkOptions, type CodeChunk, chunkByAST, chunkByLines, chunkFile };
|
package/dist/chunking/index.js
DELETED
|
@@ -1,44 +0,0 @@
|
|
|
1
|
-
interface EmbeddingOptions {
|
|
2
|
-
/** API key for the embedding provider. */
|
|
3
|
-
apiKey: string;
|
|
4
|
-
/** Whether the input is a document (for indexing) or a query (for search). */
|
|
5
|
-
inputType: "document" | "query";
|
|
6
|
-
/** Number of dimensions for the embedding vector (default: 1024). */
|
|
7
|
-
dimensions?: number;
|
|
8
|
-
/** Max characters per text before truncation (default: 32000). */
|
|
9
|
-
maxChars?: number;
|
|
10
|
-
/** Number of texts to embed per API call (default: 128). */
|
|
11
|
-
batchSize?: number;
|
|
12
|
-
}
|
|
13
|
-
interface RerankOptions {
|
|
14
|
-
/** API key for the reranking provider. */
|
|
15
|
-
apiKey: string;
|
|
16
|
-
/** Number of top results to return (default: 15). */
|
|
17
|
-
topK?: number;
|
|
18
|
-
/** Model to use for reranking (default: "rerank-2.5"). */
|
|
19
|
-
model?: string;
|
|
20
|
-
}
|
|
21
|
-
interface EmbeddingResult {
|
|
22
|
-
embedding: number[];
|
|
23
|
-
index: number;
|
|
24
|
-
}
|
|
25
|
-
interface RerankResult {
|
|
26
|
-
index: number;
|
|
27
|
-
relevanceScore: number;
|
|
28
|
-
}
|
|
29
|
-
|
|
30
|
-
/**
|
|
31
|
-
* Generate embeddings using Voyage AI's code-optimized model.
|
|
32
|
-
*
|
|
33
|
-
* Supports asymmetric embedding: use inputType "document" for indexing
|
|
34
|
-
* and "query" for search queries.
|
|
35
|
-
*/
|
|
36
|
-
declare function generateEmbeddings(texts: string[], options: EmbeddingOptions): Promise<number[][]>;
|
|
37
|
-
/**
|
|
38
|
-
* Rerank documents using Voyage AI's cross-encoder model.
|
|
39
|
-
*
|
|
40
|
-
* Takes a query and candidate documents, returns them reranked by relevance.
|
|
41
|
-
*/
|
|
42
|
-
declare function rerankResults(query: string, documents: string[], options: RerankOptions): Promise<RerankResult[]>;
|
|
43
|
-
|
|
44
|
-
export { type EmbeddingOptions, type EmbeddingResult, type RerankOptions, type RerankResult, generateEmbeddings, rerankResults };
|
package/dist/index.d.ts
DELETED
|
@@ -1,4 +0,0 @@
|
|
|
1
|
-
export { ASTChunkerOptions, ChunkOptions, CodeChunk, chunkByAST, chunkByLines, chunkFile } from './chunking/index.js';
|
|
2
|
-
export { EmbeddingOptions, EmbeddingResult, RerankOptions, RerankResult, generateEmbeddings, rerankResults } from './embeddings/voyage.js';
|
|
3
|
-
export { FusedResult, RankedResult, ScoredChunk, SearchResult, deduplicateChunks, reciprocalRankFusion } from './search/index.js';
|
|
4
|
-
export { AST_SUPPORTED_LANGUAGES, ChangedFiles, FileEntry, GrammarMapping, INDEXABLE_EXTENSIONS, detectLanguage, getTreeSitterGrammar, hashContent, resolveChanges, shouldIndexFile } from './indexing/index.js';
|
package/dist/index.js
DELETED
|
@@ -1,40 +0,0 @@
|
|
|
1
|
-
import {
|
|
2
|
-
chunkByAST,
|
|
3
|
-
chunkByLines,
|
|
4
|
-
chunkFile
|
|
5
|
-
} from "./chunk-KQIRG24U.js";
|
|
6
|
-
import {
|
|
7
|
-
generateEmbeddings,
|
|
8
|
-
rerankResults
|
|
9
|
-
} from "./chunk-3VPR67FN.js";
|
|
10
|
-
import {
|
|
11
|
-
deduplicateChunks,
|
|
12
|
-
reciprocalRankFusion
|
|
13
|
-
} from "./chunk-ZMBYQK43.js";
|
|
14
|
-
import {
|
|
15
|
-
hashContent,
|
|
16
|
-
resolveChanges
|
|
17
|
-
} from "./chunk-HINI3FCI.js";
|
|
18
|
-
import {
|
|
19
|
-
AST_SUPPORTED_LANGUAGES,
|
|
20
|
-
INDEXABLE_EXTENSIONS,
|
|
21
|
-
detectLanguage,
|
|
22
|
-
getTreeSitterGrammar,
|
|
23
|
-
shouldIndexFile
|
|
24
|
-
} from "./chunk-JFV27WLV.js";
|
|
25
|
-
export {
|
|
26
|
-
AST_SUPPORTED_LANGUAGES,
|
|
27
|
-
INDEXABLE_EXTENSIONS,
|
|
28
|
-
chunkByAST,
|
|
29
|
-
chunkByLines,
|
|
30
|
-
chunkFile,
|
|
31
|
-
deduplicateChunks,
|
|
32
|
-
detectLanguage,
|
|
33
|
-
generateEmbeddings,
|
|
34
|
-
getTreeSitterGrammar,
|
|
35
|
-
hashContent,
|
|
36
|
-
reciprocalRankFusion,
|
|
37
|
-
rerankResults,
|
|
38
|
-
resolveChanges,
|
|
39
|
-
shouldIndexFile
|
|
40
|
-
};
|
package/dist/indexing/index.d.ts
DELETED
|
@@ -1,52 +0,0 @@
|
|
|
1
|
-
/**
|
|
2
|
-
* Compute a SHA-256 hash of the given content.
|
|
3
|
-
* Used for incremental indexing — only re-embed files whose hash has changed.
|
|
4
|
-
*/
|
|
5
|
-
declare function hashContent(content: string): string;
|
|
6
|
-
|
|
7
|
-
interface FileEntry {
|
|
8
|
-
/** File path relative to repo root. */
|
|
9
|
-
filePath: string;
|
|
10
|
-
/** SHA-256 content hash. */
|
|
11
|
-
contentHash: string;
|
|
12
|
-
/** Optional: IDs of chunks generated from this file. */
|
|
13
|
-
chunkIds?: string[];
|
|
14
|
-
}
|
|
15
|
-
interface ChangedFiles {
|
|
16
|
-
/** Files that are new (not in old set). */
|
|
17
|
-
added: string[];
|
|
18
|
-
/** Files that exist in both but have different content hashes. */
|
|
19
|
-
modified: string[];
|
|
20
|
-
/** Files that were in old set but not in new set. */
|
|
21
|
-
deleted: string[];
|
|
22
|
-
/** Files that are unchanged. */
|
|
23
|
-
unchanged: string[];
|
|
24
|
-
}
|
|
25
|
-
/** Mapping from file extension to tree-sitter grammar name. */
|
|
26
|
-
interface GrammarMapping {
|
|
27
|
-
extension: string;
|
|
28
|
-
language: string;
|
|
29
|
-
treeSitterGrammar: string;
|
|
30
|
-
}
|
|
31
|
-
|
|
32
|
-
/**
|
|
33
|
-
* Compare old and new file entry lists to determine which files were
|
|
34
|
-
* added, modified, deleted, or unchanged.
|
|
35
|
-
*
|
|
36
|
-
* Uses content hashes for comparison — two files with identical content
|
|
37
|
-
* are considered unchanged even if their metadata differs.
|
|
38
|
-
*/
|
|
39
|
-
declare function resolveChanges(oldEntries: FileEntry[], newEntries: FileEntry[]): ChangedFiles;
|
|
40
|
-
|
|
41
|
-
declare const INDEXABLE_EXTENSIONS: Set<string>;
|
|
42
|
-
declare function shouldIndexFile(path: string, size: number): boolean;
|
|
43
|
-
/** Languages that have tree-sitter grammar support for AST chunking. */
|
|
44
|
-
declare const AST_SUPPORTED_LANGUAGES: Set<string>;
|
|
45
|
-
declare function detectLanguage(path: string): string;
|
|
46
|
-
/**
|
|
47
|
-
* Get the tree-sitter grammar name for a file extension.
|
|
48
|
-
* Returns empty string if no grammar is available (falls back to line chunker).
|
|
49
|
-
*/
|
|
50
|
-
declare function getTreeSitterGrammar(path: string): string;
|
|
51
|
-
|
|
52
|
-
export { AST_SUPPORTED_LANGUAGES, type ChangedFiles, type FileEntry, type GrammarMapping, INDEXABLE_EXTENSIONS, detectLanguage, getTreeSitterGrammar, hashContent, resolveChanges, shouldIndexFile };
|
package/dist/indexing/index.js
DELETED
|
@@ -1,20 +0,0 @@
|
|
|
1
|
-
import {
|
|
2
|
-
hashContent,
|
|
3
|
-
resolveChanges
|
|
4
|
-
} from "../chunk-HINI3FCI.js";
|
|
5
|
-
import {
|
|
6
|
-
AST_SUPPORTED_LANGUAGES,
|
|
7
|
-
INDEXABLE_EXTENSIONS,
|
|
8
|
-
detectLanguage,
|
|
9
|
-
getTreeSitterGrammar,
|
|
10
|
-
shouldIndexFile
|
|
11
|
-
} from "../chunk-JFV27WLV.js";
|
|
12
|
-
export {
|
|
13
|
-
AST_SUPPORTED_LANGUAGES,
|
|
14
|
-
INDEXABLE_EXTENSIONS,
|
|
15
|
-
detectLanguage,
|
|
16
|
-
getTreeSitterGrammar,
|
|
17
|
-
hashContent,
|
|
18
|
-
resolveChanges,
|
|
19
|
-
shouldIndexFile
|
|
20
|
-
};
|
package/dist/search/index.d.ts
DELETED
|
@@ -1,64 +0,0 @@
|
|
|
1
|
-
interface SearchResult {
|
|
2
|
-
/** Unique identifier for the result. */
|
|
3
|
-
id: string;
|
|
4
|
-
/** Rank position in the result list (0-indexed). */
|
|
5
|
-
rank: number;
|
|
6
|
-
/** Optional score from the search system. */
|
|
7
|
-
score?: number;
|
|
8
|
-
}
|
|
9
|
-
interface RankedResult extends SearchResult {
|
|
10
|
-
/** The content or document associated with this result. */
|
|
11
|
-
content: string;
|
|
12
|
-
/** File path of the source. */
|
|
13
|
-
filePath: string;
|
|
14
|
-
/** Start line in the source file. */
|
|
15
|
-
startLine: number;
|
|
16
|
-
/** End line in the source file. */
|
|
17
|
-
endLine: number;
|
|
18
|
-
}
|
|
19
|
-
interface FusedResult {
|
|
20
|
-
/** Unique identifier for the result. */
|
|
21
|
-
id: string;
|
|
22
|
-
/** Fused RRF score across all result sets. */
|
|
23
|
-
fusedScore: number;
|
|
24
|
-
/** Which result sets contributed to this result. */
|
|
25
|
-
sources: string[];
|
|
26
|
-
}
|
|
27
|
-
interface ScoredChunk {
|
|
28
|
-
id: string;
|
|
29
|
-
content: string;
|
|
30
|
-
filePath: string;
|
|
31
|
-
startLine: number;
|
|
32
|
-
endLine: number;
|
|
33
|
-
language: string;
|
|
34
|
-
score: number;
|
|
35
|
-
symbolName?: string;
|
|
36
|
-
chunkType?: string;
|
|
37
|
-
}
|
|
38
|
-
|
|
39
|
-
/**
|
|
40
|
-
* Reciprocal Rank Fusion (RRF) — merges multiple ranked result lists
|
|
41
|
-
* into a single ranking using the formula: score = sum(1 / (k + rank))
|
|
42
|
-
*
|
|
43
|
-
* This is a standard technique for combining results from different
|
|
44
|
-
* search systems (e.g., vector search + BM25 full-text search).
|
|
45
|
-
*
|
|
46
|
-
* @param resultSets - Array of result sets, each with a label and ranked results
|
|
47
|
-
* @param k - Smoothing constant (default: 60, as per the original RRF paper)
|
|
48
|
-
* @returns Fused results sorted by combined score (highest first)
|
|
49
|
-
*/
|
|
50
|
-
declare function reciprocalRankFusion(resultSets: Array<{
|
|
51
|
-
label: string;
|
|
52
|
-
results: SearchResult[];
|
|
53
|
-
}>, k?: number): FusedResult[];
|
|
54
|
-
|
|
55
|
-
/**
|
|
56
|
-
* Deduplicate and merge overlapping chunks from the same file.
|
|
57
|
-
*
|
|
58
|
-
* When hybrid search + reranking returns multiple chunks from the same file
|
|
59
|
-
* with overlapping line ranges, merge them into a single contiguous block
|
|
60
|
-
* to avoid redundancy in the LLM context window.
|
|
61
|
-
*/
|
|
62
|
-
declare function deduplicateChunks(chunks: ScoredChunk[]): ScoredChunk[];
|
|
63
|
-
|
|
64
|
-
export { type FusedResult, type RankedResult, type ScoredChunk, type SearchResult, deduplicateChunks, reciprocalRankFusion };
|