@cue-dev/retrieval-core 0.1.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/AGENTS.md +27 -0
- package/dist/.tsbuildinfo +1 -0
- package/dist/chunking.d.ts +64 -0
- package/dist/chunking.js +983 -0
- package/dist/index.d.ts +673 -0
- package/dist/index.js +6605 -0
- package/dist/indexing-ignore.d.ts +9 -0
- package/dist/indexing-ignore.js +151 -0
- package/dist/remote-sync.d.ts +193 -0
- package/dist/remote-sync.js +816 -0
- package/package.json +37 -0
- package/scripts/poc-node-parser-host.cjs +105 -0
- package/scripts/poc-parser-availability-benchmark.ts +338 -0
- package/src/chunking.ts +1187 -0
- package/src/index.ts +8338 -0
- package/src/indexing-ignore.ts +179 -0
- package/src/remote-sync.ts +1119 -0
- package/test/benchmark.thresholds.test.ts +815 -0
- package/test/chunking.config.test.ts +84 -0
- package/test/chunking.language-aware.test.ts +1248 -0
- package/test/chunking.parser-availability.poc.test.ts +86 -0
- package/test/claude-agent-provider.test.ts +209 -0
- package/test/embedding-context-prefix.test.ts +101 -0
- package/test/embedding-provider.test.ts +570 -0
- package/test/enhance-confidence.test.ts +752 -0
- package/test/index-prep.concurrency.regression.test.ts +142 -0
- package/test/integration.test.ts +508 -0
- package/test/local-sqlite.integration.test.ts +258 -0
- package/test/mcp-search-quality.regression.test.ts +1358 -0
- package/test/remote-sync.integration.test.ts +350 -0
- package/test/smart-cutoff.config.test.ts +86 -0
- package/test/snippet-integrity.config.test.ts +59 -0
- package/tsconfig.build.json +17 -0
- package/tsconfig.json +4 -0
|
@@ -0,0 +1,179 @@
|
|
|
1
|
+
import { readFile } from "node:fs/promises";
|
|
2
|
+
import { join, resolve } from "node:path";
|
|
3
|
+
|
|
4
|
+
export const INDEXING_IGNORE_FILENAMES = [".contextignore", ".cueignore"] as const;
|
|
5
|
+
|
|
6
|
+
const INDEXING_CONTROL_FILENAMES = new Set(INDEXING_IGNORE_FILENAMES.map((name) => name.toLowerCase()));
|
|
7
|
+
|
|
8
|
+
type IndexingPathKind = "file" | "dir";
|
|
9
|
+
|
|
10
|
+
interface CompiledIgnoreRule {
|
|
11
|
+
directory_only: boolean;
|
|
12
|
+
has_slash: boolean;
|
|
13
|
+
regex: RegExp;
|
|
14
|
+
}
|
|
15
|
+
|
|
16
|
+
export interface IndexingIgnoreMatcher {
|
|
17
|
+
patterns: string[];
|
|
18
|
+
shouldIgnorePath(path: string, kind: IndexingPathKind): boolean;
|
|
19
|
+
}
|
|
20
|
+
|
|
21
|
+
function escapeRegexChar(char: string): string {
|
|
22
|
+
return /[\\^$+?.()|[\]{}]/.test(char) ? `\\${char}` : char;
|
|
23
|
+
}
|
|
24
|
+
|
|
25
|
+
function compileGlobPattern(pattern: string): RegExp {
|
|
26
|
+
let output = "";
|
|
27
|
+
for (let i = 0; i < pattern.length; i += 1) {
|
|
28
|
+
const char = pattern[i];
|
|
29
|
+
if (char === undefined) {
|
|
30
|
+
continue;
|
|
31
|
+
}
|
|
32
|
+
if (char === "*") {
|
|
33
|
+
const next = pattern[i + 1];
|
|
34
|
+
if (next === "*") {
|
|
35
|
+
const afterDouble = pattern[i + 2];
|
|
36
|
+
if (afterDouble === "/") {
|
|
37
|
+
output += "(?:.*/)?";
|
|
38
|
+
i += 2;
|
|
39
|
+
continue;
|
|
40
|
+
}
|
|
41
|
+
output += ".*";
|
|
42
|
+
i += 1;
|
|
43
|
+
continue;
|
|
44
|
+
}
|
|
45
|
+
output += "[^/]*";
|
|
46
|
+
continue;
|
|
47
|
+
}
|
|
48
|
+
if (char === "?") {
|
|
49
|
+
output += "[^/]";
|
|
50
|
+
continue;
|
|
51
|
+
}
|
|
52
|
+
output += escapeRegexChar(char);
|
|
53
|
+
}
|
|
54
|
+
return new RegExp(`^${output}$`);
|
|
55
|
+
}
|
|
56
|
+
|
|
57
|
+
export function normalizeRepoRelativePath(path: string): string {
|
|
58
|
+
const normalized = path
|
|
59
|
+
.replace(/\\/g, "/")
|
|
60
|
+
.replace(/^\.\/+/, "")
|
|
61
|
+
.replace(/^\/+/, "")
|
|
62
|
+
.replace(/\/+/g, "/")
|
|
63
|
+
.replace(/\/+$/, "");
|
|
64
|
+
return normalized;
|
|
65
|
+
}
|
|
66
|
+
|
|
67
|
+
function basename(path: string): string {
|
|
68
|
+
const normalized = normalizeRepoRelativePath(path);
|
|
69
|
+
if (normalized.length === 0) {
|
|
70
|
+
return "";
|
|
71
|
+
}
|
|
72
|
+
const lastSlash = normalized.lastIndexOf("/");
|
|
73
|
+
return lastSlash === -1 ? normalized : normalized.slice(lastSlash + 1);
|
|
74
|
+
}
|
|
75
|
+
|
|
76
|
+
function listAncestorDirectories(path: string, kind: IndexingPathKind): string[] {
|
|
77
|
+
const normalized = normalizeRepoRelativePath(path);
|
|
78
|
+
if (normalized.length === 0) {
|
|
79
|
+
return [];
|
|
80
|
+
}
|
|
81
|
+
|
|
82
|
+
const segments = normalized.split("/");
|
|
83
|
+
const depth = kind === "dir" ? segments.length : Math.max(segments.length - 1, 0);
|
|
84
|
+
const output: string[] = [];
|
|
85
|
+
let current = "";
|
|
86
|
+
for (let i = 0; i < depth; i += 1) {
|
|
87
|
+
current = current.length === 0 ? segments[i]! : `${current}/${segments[i]!}`;
|
|
88
|
+
output.push(current);
|
|
89
|
+
}
|
|
90
|
+
return output;
|
|
91
|
+
}
|
|
92
|
+
|
|
93
|
+
function parseIgnorePatterns(content: string): string[] {
|
|
94
|
+
const lines = content.split(/\r?\n/);
|
|
95
|
+
const output: string[] = [];
|
|
96
|
+
for (const line of lines) {
|
|
97
|
+
const trimmed = line.trim();
|
|
98
|
+
if (!trimmed || trimmed.startsWith("#")) {
|
|
99
|
+
continue;
|
|
100
|
+
}
|
|
101
|
+
// v1 is exclude-only; ignore negation directives for deterministic behavior.
|
|
102
|
+
if (trimmed.startsWith("!")) {
|
|
103
|
+
continue;
|
|
104
|
+
}
|
|
105
|
+
const normalized = normalizeRepoRelativePath(trimmed);
|
|
106
|
+
if (normalized.length === 0) {
|
|
107
|
+
continue;
|
|
108
|
+
}
|
|
109
|
+
output.push(trimmed.endsWith("/") ? `${normalized}/` : normalized);
|
|
110
|
+
}
|
|
111
|
+
return output;
|
|
112
|
+
}
|
|
113
|
+
|
|
114
|
+
function compileIgnoreRules(patterns: string[]): CompiledIgnoreRule[] {
|
|
115
|
+
return patterns
|
|
116
|
+
.map((pattern) => {
|
|
117
|
+
const directoryOnly = pattern.endsWith("/");
|
|
118
|
+
const normalizedPattern = directoryOnly ? pattern.slice(0, -1) : pattern;
|
|
119
|
+
const normalized = normalizeRepoRelativePath(normalizedPattern);
|
|
120
|
+
if (normalized.length === 0) {
|
|
121
|
+
return undefined;
|
|
122
|
+
}
|
|
123
|
+
return {
|
|
124
|
+
directory_only: directoryOnly,
|
|
125
|
+
has_slash: normalized.includes("/"),
|
|
126
|
+
regex: compileGlobPattern(normalized)
|
|
127
|
+
} satisfies CompiledIgnoreRule;
|
|
128
|
+
})
|
|
129
|
+
.filter((rule): rule is CompiledIgnoreRule => rule !== undefined);
|
|
130
|
+
}
|
|
131
|
+
|
|
132
|
+
function matchesRule(rule: CompiledIgnoreRule, path: string, kind: IndexingPathKind): boolean {
|
|
133
|
+
const normalized = normalizeRepoRelativePath(path);
|
|
134
|
+
if (normalized.length === 0) {
|
|
135
|
+
return false;
|
|
136
|
+
}
|
|
137
|
+
|
|
138
|
+
if (!rule.directory_only) {
|
|
139
|
+
if (rule.has_slash) {
|
|
140
|
+
return rule.regex.test(normalized);
|
|
141
|
+
}
|
|
142
|
+
return rule.regex.test(basename(normalized));
|
|
143
|
+
}
|
|
144
|
+
|
|
145
|
+
const ancestors = listAncestorDirectories(normalized, kind);
|
|
146
|
+
if (rule.has_slash) {
|
|
147
|
+
return ancestors.some((ancestor) => rule.regex.test(ancestor));
|
|
148
|
+
}
|
|
149
|
+
return ancestors.some((ancestor) => rule.regex.test(basename(ancestor)));
|
|
150
|
+
}
|
|
151
|
+
|
|
152
|
+
export async function loadIndexingIgnoreMatcher(projectRootPath: string): Promise<IndexingIgnoreMatcher> {
|
|
153
|
+
const root = resolve(projectRootPath);
|
|
154
|
+
const patterns: string[] = [];
|
|
155
|
+
|
|
156
|
+
for (const filename of INDEXING_IGNORE_FILENAMES) {
|
|
157
|
+
try {
|
|
158
|
+
const content = await readFile(join(root, filename), "utf8");
|
|
159
|
+
patterns.push(...parseIgnorePatterns(content));
|
|
160
|
+
} catch {
|
|
161
|
+
continue;
|
|
162
|
+
}
|
|
163
|
+
}
|
|
164
|
+
|
|
165
|
+
const rules = compileIgnoreRules(patterns);
|
|
166
|
+
return {
|
|
167
|
+
patterns,
|
|
168
|
+
shouldIgnorePath(path: string, kind: IndexingPathKind): boolean {
|
|
169
|
+
const normalized = normalizeRepoRelativePath(path);
|
|
170
|
+
if (normalized.length === 0) {
|
|
171
|
+
return false;
|
|
172
|
+
}
|
|
173
|
+
if (kind === "file" && INDEXING_CONTROL_FILENAMES.has(basename(normalized).toLowerCase())) {
|
|
174
|
+
return true;
|
|
175
|
+
}
|
|
176
|
+
return rules.some((rule) => matchesRule(rule, normalized, kind));
|
|
177
|
+
}
|
|
178
|
+
};
|
|
179
|
+
}
|