@nomos-arc/arc 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.claude/settings.local.json +10 -0
- package/.nomos-config.json +5 -0
- package/CLAUDE.md +108 -0
- package/LICENSE +190 -0
- package/README.md +569 -0
- package/dist/cli.js +21120 -0
- package/docs/auth/googel_plan.yaml +1093 -0
- package/docs/auth/google_task.md +235 -0
- package/docs/auth/hardened_blueprint.yaml +1658 -0
- package/docs/auth/red_team_report.yaml +336 -0
- package/docs/auth/session_state.yaml +162 -0
- package/docs/certificate/cer_enhance_plan.md +605 -0
- package/docs/certificate/certificate_report.md +338 -0
- package/docs/dev_overview.md +419 -0
- package/docs/feature_assessment.md +156 -0
- package/docs/how_it_works.md +78 -0
- package/docs/infrastructure/map.md +867 -0
- package/docs/init/master_plan.md +3581 -0
- package/docs/init/red_team_report.md +215 -0
- package/docs/init/report_phase_1a.md +304 -0
- package/docs/integrity-gate/enhance_drift.md +703 -0
- package/docs/integrity-gate/overview.md +108 -0
- package/docs/management/manger-task.md +99 -0
- package/docs/management/scafffold.md +76 -0
- package/docs/map/ATOMIC_BLUEPRINT.md +1349 -0
- package/docs/map/RED_TEAM_REPORT.md +159 -0
- package/docs/map/map_task.md +147 -0
- package/docs/map/semantic_graph_task.md +792 -0
- package/docs/map/semantic_master_plan.md +705 -0
- package/docs/phase7/TEAM_RED.md +249 -0
- package/docs/phase7/plan.md +1682 -0
- package/docs/phase7/task.md +275 -0
- package/docs/prompts/USAGE.md +312 -0
- package/docs/prompts/architect.md +165 -0
- package/docs/prompts/executer.md +190 -0
- package/docs/prompts/hardener.md +190 -0
- package/docs/prompts/red_team.md +146 -0
- package/docs/verification/goveranance-overview.md +396 -0
- package/docs/verification/governance-overview.md +245 -0
- package/docs/verification/verification-arc-ar.md +560 -0
- package/docs/verification/verification-architecture.md +560 -0
- package/docs/very_next.md +52 -0
- package/docs/whitepaper.md +89 -0
- package/overview.md +1469 -0
- package/package.json +63 -0
- package/src/adapters/__tests__/git.test.ts +296 -0
- package/src/adapters/__tests__/stdio.test.ts +70 -0
- package/src/adapters/git.ts +226 -0
- package/src/adapters/pty.ts +159 -0
- package/src/adapters/stdio.ts +113 -0
- package/src/cli.ts +83 -0
- package/src/commands/apply.ts +47 -0
- package/src/commands/auth.ts +301 -0
- package/src/commands/certificate.ts +89 -0
- package/src/commands/discard.ts +24 -0
- package/src/commands/drift.ts +116 -0
- package/src/commands/index.ts +78 -0
- package/src/commands/init.ts +121 -0
- package/src/commands/list.ts +75 -0
- package/src/commands/map.ts +55 -0
- package/src/commands/plan.ts +30 -0
- package/src/commands/review.ts +58 -0
- package/src/commands/run.ts +63 -0
- package/src/commands/search.ts +147 -0
- package/src/commands/show.ts +63 -0
- package/src/commands/status.ts +59 -0
- package/src/core/__tests__/budget.test.ts +213 -0
- package/src/core/__tests__/certificate.test.ts +385 -0
- package/src/core/__tests__/config.test.ts +191 -0
- package/src/core/__tests__/preflight.test.ts +24 -0
- package/src/core/__tests__/prompt.test.ts +358 -0
- package/src/core/__tests__/review.test.ts +161 -0
- package/src/core/__tests__/state.test.ts +362 -0
- package/src/core/auth/__tests__/manager.test.ts +166 -0
- package/src/core/auth/__tests__/server.test.ts +220 -0
- package/src/core/auth/gcp-projects.ts +160 -0
- package/src/core/auth/manager.ts +114 -0
- package/src/core/auth/server.ts +141 -0
- package/src/core/budget.ts +119 -0
- package/src/core/certificate.ts +502 -0
- package/src/core/config.ts +212 -0
- package/src/core/errors.ts +54 -0
- package/src/core/factory.ts +49 -0
- package/src/core/graph/__tests__/builder.test.ts +272 -0
- package/src/core/graph/__tests__/contract-writer.test.ts +175 -0
- package/src/core/graph/__tests__/enricher.test.ts +299 -0
- package/src/core/graph/__tests__/parser.test.ts +200 -0
- package/src/core/graph/__tests__/pipeline.test.ts +202 -0
- package/src/core/graph/__tests__/renderer.test.ts +128 -0
- package/src/core/graph/__tests__/resolver.test.ts +185 -0
- package/src/core/graph/__tests__/scanner.test.ts +231 -0
- package/src/core/graph/__tests__/show.test.ts +134 -0
- package/src/core/graph/builder.ts +303 -0
- package/src/core/graph/constraints.ts +94 -0
- package/src/core/graph/contract-writer.ts +93 -0
- package/src/core/graph/drift/__tests__/classifier.test.ts +215 -0
- package/src/core/graph/drift/__tests__/comparator.test.ts +335 -0
- package/src/core/graph/drift/__tests__/drift.test.ts +453 -0
- package/src/core/graph/drift/__tests__/reporter.test.ts +203 -0
- package/src/core/graph/drift/classifier.ts +165 -0
- package/src/core/graph/drift/comparator.ts +205 -0
- package/src/core/graph/drift/reporter.ts +77 -0
- package/src/core/graph/enricher.ts +251 -0
- package/src/core/graph/grammar-paths.ts +30 -0
- package/src/core/graph/html-template.ts +493 -0
- package/src/core/graph/map-schema.ts +137 -0
- package/src/core/graph/parser.ts +336 -0
- package/src/core/graph/pipeline.ts +209 -0
- package/src/core/graph/renderer.ts +92 -0
- package/src/core/graph/resolver.ts +195 -0
- package/src/core/graph/scanner.ts +145 -0
- package/src/core/logger.ts +46 -0
- package/src/core/orchestrator.ts +792 -0
- package/src/core/plan-file-manager.ts +66 -0
- package/src/core/preflight.ts +64 -0
- package/src/core/prompt.ts +173 -0
- package/src/core/review.ts +95 -0
- package/src/core/state.ts +294 -0
- package/src/core/worktree-coordinator.ts +77 -0
- package/src/search/__tests__/chunk-extractor.test.ts +339 -0
- package/src/search/__tests__/embedder-auth.test.ts +124 -0
- package/src/search/__tests__/embedder.test.ts +267 -0
- package/src/search/__tests__/graph-enricher.test.ts +178 -0
- package/src/search/__tests__/indexer.test.ts +518 -0
- package/src/search/__tests__/integration.test.ts +649 -0
- package/src/search/__tests__/query-engine.test.ts +334 -0
- package/src/search/__tests__/similarity.test.ts +78 -0
- package/src/search/__tests__/vector-store.test.ts +281 -0
- package/src/search/chunk-extractor.ts +167 -0
- package/src/search/embedder.ts +209 -0
- package/src/search/graph-enricher.ts +95 -0
- package/src/search/indexer.ts +483 -0
- package/src/search/lexical-searcher.ts +190 -0
- package/src/search/query-engine.ts +225 -0
- package/src/search/vector-store.ts +311 -0
- package/src/types/index.ts +572 -0
- package/src/utils/__tests__/ansi.test.ts +54 -0
- package/src/utils/__tests__/frontmatter.test.ts +79 -0
- package/src/utils/__tests__/sanitize.test.ts +229 -0
- package/src/utils/ansi.ts +19 -0
- package/src/utils/context.ts +44 -0
- package/src/utils/frontmatter.ts +27 -0
- package/src/utils/sanitize.ts +78 -0
- package/test/e2e/lifecycle.test.ts +330 -0
- package/test/fixtures/mock-planner-hang.ts +5 -0
- package/test/fixtures/mock-planner.ts +26 -0
- package/test/fixtures/mock-reviewer-bad.ts +8 -0
- package/test/fixtures/mock-reviewer-retry.ts +34 -0
- package/test/fixtures/mock-reviewer.ts +18 -0
- package/test/fixtures/sample-project/src/circular-a.ts +6 -0
- package/test/fixtures/sample-project/src/circular-b.ts +6 -0
- package/test/fixtures/sample-project/src/config.ts +15 -0
- package/test/fixtures/sample-project/src/main.ts +19 -0
- package/test/fixtures/sample-project/src/services/product-service.ts +20 -0
- package/test/fixtures/sample-project/src/services/user-service.ts +18 -0
- package/test/fixtures/sample-project/src/types.ts +14 -0
- package/test/fixtures/sample-project/src/utils/index.ts +14 -0
- package/test/fixtures/sample-project/src/utils/validate.ts +12 -0
- package/tsconfig.json +20 -0
- package/vitest.config.ts +12 -0
|
@@ -0,0 +1,225 @@
|
|
|
1
|
+
import fs from 'node:fs/promises';
|
|
2
|
+
import path from 'node:path';
|
|
3
|
+
import type { Logger } from 'winston';
|
|
4
|
+
import { NomosError } from '../core/errors.js';
|
|
5
|
+
import type { IndexMetadata, NomosConfig, ProjectMap, SearchResult } from '../types/index.js';
|
|
6
|
+
import { Embedder } from './embedder.js';
|
|
7
|
+
import { GraphEnricher } from './graph-enricher.js';
|
|
8
|
+
import { LexicalSearcher } from './lexical-searcher.js';
|
|
9
|
+
import { VectorStore } from './vector-store.js';
|
|
10
|
+
import { AuthManager } from '../core/auth/manager.js';
|
|
11
|
+
|
|
12
|
+
// ─── QueryEngine ──────────────────────────────────────────────────────────────
|
|
13
|
+
|
|
14
|
+
export class QueryEngine {
|
|
15
|
+
private _embedder: Embedder | null = null; // lazy — not created until first query
|
|
16
|
+
private readonly store: VectorStore;
|
|
17
|
+
private readonly enricher: GraphEnricher;
|
|
18
|
+
private readonly lexical: LexicalSearcher;
|
|
19
|
+
private readonly metaPath: string;
|
|
20
|
+
private readonly projectMapPath: string;
|
|
21
|
+
|
|
22
|
+
constructor(
|
|
23
|
+
private readonly projectRoot: string,
|
|
24
|
+
private readonly config: NomosConfig,
|
|
25
|
+
private readonly logger: Logger,
|
|
26
|
+
private readonly authManager?: AuthManager | null,
|
|
27
|
+
) {
|
|
28
|
+
this.store = new VectorStore(
|
|
29
|
+
config.search.vector_store_path,
|
|
30
|
+
logger,
|
|
31
|
+
config.search.embedding_dimensions,
|
|
32
|
+
);
|
|
33
|
+
this.metaPath = path.join(config.search.vector_store_path, 'index-meta.json');
|
|
34
|
+
this.projectMapPath = path.join(projectRoot, config.graph.output_dir, 'project_map.json');
|
|
35
|
+
this.enricher = new GraphEnricher(this.projectMapPath, logger);
|
|
36
|
+
this.lexical = new LexicalSearcher();
|
|
37
|
+
}
|
|
38
|
+
|
|
39
|
+
/**
|
|
40
|
+
* Execute a hybrid search query.
|
|
41
|
+
*
|
|
42
|
+
* Pipeline:
|
|
43
|
+
* 1. If vector index exists → embed query → vector search → graph enrich
|
|
44
|
+
* 2. Find files NOT in the vector index (structural-only) → lexical search
|
|
45
|
+
* 3. Merge results (rank fusion: vector results ranked higher) → deduplicate → return
|
|
46
|
+
*
|
|
47
|
+
* If NO vector index exists, falls back entirely to lexical search against project_map.
|
|
48
|
+
*/
|
|
49
|
+
async search(
|
|
50
|
+
query: string,
|
|
51
|
+
options?: { topK?: number; threshold?: number },
|
|
52
|
+
): Promise<SearchResult[]> {
|
|
53
|
+
const topK = options?.topK ?? this.config.search.default_top_k;
|
|
54
|
+
const threshold = options?.threshold ?? this.config.search.default_threshold;
|
|
55
|
+
|
|
56
|
+
// Step 1: Validate query
|
|
57
|
+
if (!query.trim()) {
|
|
58
|
+
throw new NomosError('search_query_failed', 'Query must be a non-empty string.');
|
|
59
|
+
}
|
|
60
|
+
|
|
61
|
+
// Step 2: Try to load vector index metadata
|
|
62
|
+
let meta: IndexMetadata | null = null;
|
|
63
|
+
try {
|
|
64
|
+
const raw = await fs.readFile(this.metaPath, 'utf-8');
|
|
65
|
+
meta = JSON.parse(raw) as IndexMetadata;
|
|
66
|
+
} catch {
|
|
67
|
+
// No vector index — will use lexical-only search
|
|
68
|
+
}
|
|
69
|
+
|
|
70
|
+
// Step 3: Load project map (required for both lexical and graph enrichment)
|
|
71
|
+
let projectMap: ProjectMap;
|
|
72
|
+
try {
|
|
73
|
+
const raw = await fs.readFile(this.projectMapPath, 'utf-8');
|
|
74
|
+
projectMap = JSON.parse(raw) as ProjectMap;
|
|
75
|
+
} catch {
|
|
76
|
+
throw new NomosError(
|
|
77
|
+
'search_index_not_found',
|
|
78
|
+
'No project map found. Run: arc map',
|
|
79
|
+
);
|
|
80
|
+
}
|
|
81
|
+
|
|
82
|
+
let vectorResults: SearchResult[] = [];
|
|
83
|
+
let indexedFilePaths = new Set<string>();
|
|
84
|
+
|
|
85
|
+
// Step 4: Vector search (if index available)
|
|
86
|
+
if (meta !== null) {
|
|
87
|
+
if (meta.status === 'in_progress') {
|
|
88
|
+
this.logger.warn(
|
|
89
|
+
'[nomos:search:warn] Index is incomplete. Results may be partial.',
|
|
90
|
+
);
|
|
91
|
+
}
|
|
92
|
+
await this.warnIfIndexStale(meta);
|
|
93
|
+
|
|
94
|
+
try {
|
|
95
|
+
await this.store.init();
|
|
96
|
+
const queryVector = await (await this.getEmbedder()).embedOne(query.trim());
|
|
97
|
+
const rawResults = await this.store.query(queryVector, topK, threshold);
|
|
98
|
+
vectorResults = await this.enricher.enrich(rawResults);
|
|
99
|
+
indexedFilePaths = new Set(Object.keys(meta.files));
|
|
100
|
+
} catch (err) {
|
|
101
|
+
// Vector search failed — log and continue with lexical-only
|
|
102
|
+
this.logger.warn(
|
|
103
|
+
`[nomos:search:warn] Vector search failed, falling back to lexical search: ${
|
|
104
|
+
err instanceof Error ? err.message : String(err)
|
|
105
|
+
}`,
|
|
106
|
+
);
|
|
107
|
+
}
|
|
108
|
+
} else {
|
|
109
|
+
this.logger.info(
|
|
110
|
+
'[nomos:search:info] No vector index found. Using lexical search only.',
|
|
111
|
+
);
|
|
112
|
+
}
|
|
113
|
+
|
|
114
|
+
// Step 5: Lexical search for files NOT in the vector index
|
|
115
|
+
const nonIndexedFiles = new Set<string>();
|
|
116
|
+
for (const fp of Object.keys(projectMap.files)) {
|
|
117
|
+
if (!indexedFilePaths.has(fp)) {
|
|
118
|
+
nonIndexedFiles.add(fp);
|
|
119
|
+
}
|
|
120
|
+
}
|
|
121
|
+
|
|
122
|
+
let lexicalResults: SearchResult[] = [];
|
|
123
|
+
if (nonIndexedFiles.size > 0) {
|
|
124
|
+
lexicalResults = this.lexical.search(
|
|
125
|
+
query, projectMap, nonIndexedFiles, topK, threshold,
|
|
126
|
+
);
|
|
127
|
+
if (lexicalResults.length > 0) {
|
|
128
|
+
this.logger.info(
|
|
129
|
+
`[nomos:search:info] Lexical fallback found ${lexicalResults.length} results from ${nonIndexedFiles.size} non-indexed files.`,
|
|
130
|
+
);
|
|
131
|
+
}
|
|
132
|
+
}
|
|
133
|
+
|
|
134
|
+
// Step 6: Merge results (vector results naturally score higher)
|
|
135
|
+
const merged = [...vectorResults, ...lexicalResults];
|
|
136
|
+
|
|
137
|
+
// Step 7: De-duplicate
|
|
138
|
+
const deduped = this.deduplicate(merged);
|
|
139
|
+
|
|
140
|
+
// Step 8: Sort by similarity_score descending
|
|
141
|
+
deduped.sort((a, b) => b.similarity_score - a.similarity_score);
|
|
142
|
+
|
|
143
|
+
// Step 9: Return top-K
|
|
144
|
+
return deduped.slice(0, topK);
|
|
145
|
+
}
|
|
146
|
+
|
|
147
|
+
/** Lazy Embedder accessor — throws at construction time only if actually needed. */
|
|
148
|
+
private async getEmbedder(): Promise<Embedder> {
|
|
149
|
+
if (!this._embedder) {
|
|
150
|
+
this._embedder = await Embedder.create(
|
|
151
|
+
this.config.search, this.logger, this.authManager,
|
|
152
|
+
);
|
|
153
|
+
}
|
|
154
|
+
return this._embedder;
|
|
155
|
+
}
|
|
156
|
+
|
|
157
|
+
// ─── Private helpers ────────────────────────────────────────────────────────
|
|
158
|
+
|
|
159
|
+
/**
|
|
160
|
+
* De-duplication [TRAP-3]:
|
|
161
|
+
* For each file_path that has BOTH a 'file' type result AND one or more 'symbol'
|
|
162
|
+
* type results:
|
|
163
|
+
* - Compute score gap = abs(file_result.similarity_score - max(symbol_scores))
|
|
164
|
+
* - If gap <= 0.05: REMOVE the file-level result (keep only symbol results)
|
|
165
|
+
* - If gap > 0.05: keep both (sufficiently distinct in relevance)
|
|
166
|
+
*/
|
|
167
|
+
private deduplicate(results: SearchResult[]): SearchResult[] {
|
|
168
|
+
// Group by file_path
|
|
169
|
+
const byFile = new Map<string, { file: SearchResult | null; symbols: SearchResult[] }>();
|
|
170
|
+
|
|
171
|
+
for (const r of results) {
|
|
172
|
+
if (!byFile.has(r.file_path)) {
|
|
173
|
+
byFile.set(r.file_path, { file: null, symbols: [] });
|
|
174
|
+
}
|
|
175
|
+
const group = byFile.get(r.file_path)!;
|
|
176
|
+
if (r.type === 'file') {
|
|
177
|
+
group.file = r;
|
|
178
|
+
} else {
|
|
179
|
+
group.symbols.push(r);
|
|
180
|
+
}
|
|
181
|
+
}
|
|
182
|
+
|
|
183
|
+
const output: SearchResult[] = [];
|
|
184
|
+
|
|
185
|
+
for (const group of byFile.values()) {
|
|
186
|
+
if (group.file && group.symbols.length > 0) {
|
|
187
|
+
const maxSymbolScore = Math.max(...group.symbols.map((s) => s.similarity_score));
|
|
188
|
+
const gap = Math.abs(group.file.similarity_score - maxSymbolScore);
|
|
189
|
+
|
|
190
|
+
if (gap <= 0.05) {
|
|
191
|
+
// TRAP-3: file result is within 0.05 of best symbol — remove file-level result
|
|
192
|
+
output.push(...group.symbols);
|
|
193
|
+
} else {
|
|
194
|
+
// gap > 0.05: both are sufficiently distinct, keep both
|
|
195
|
+
output.push(group.file, ...group.symbols);
|
|
196
|
+
}
|
|
197
|
+
} else {
|
|
198
|
+
// Only file-level, or only symbol-level — keep everything
|
|
199
|
+
if (group.file) output.push(group.file);
|
|
200
|
+
output.push(...group.symbols);
|
|
201
|
+
}
|
|
202
|
+
}
|
|
203
|
+
|
|
204
|
+
return output;
|
|
205
|
+
}
|
|
206
|
+
|
|
207
|
+
/**
|
|
208
|
+
* Warn if the vector index is older than the project map [STALE INDEX WARNING].
|
|
209
|
+
*/
|
|
210
|
+
private async warnIfIndexStale(meta: IndexMetadata): Promise<void> {
|
|
211
|
+
try {
|
|
212
|
+
const raw = await fs.readFile(this.projectMapPath, 'utf-8');
|
|
213
|
+
const projectMap = JSON.parse(raw) as { generated_at: string };
|
|
214
|
+
const indexTime = new Date(meta.last_full_index).getTime();
|
|
215
|
+
const mapTime = new Date(projectMap.generated_at).getTime();
|
|
216
|
+
if (indexTime < mapTime) {
|
|
217
|
+
this.logger.warn(
|
|
218
|
+
'[nomos:search:warn] Index is older than project map. Consider running: arc index --incremental',
|
|
219
|
+
);
|
|
220
|
+
}
|
|
221
|
+
} catch {
|
|
222
|
+
// project_map.json may not exist — non-fatal, skip warning
|
|
223
|
+
}
|
|
224
|
+
}
|
|
225
|
+
}
|
|
@@ -0,0 +1,311 @@
|
|
|
1
|
+
import * as lancedb from '@lancedb/lancedb';
|
|
2
|
+
import { makeArrowTable } from '@lancedb/lancedb';
|
|
3
|
+
import { Field, FixedSizeList, Float32, Int32, Schema, Utf8 } from 'apache-arrow';
|
|
4
|
+
import type { Connection } from '@lancedb/lancedb';
|
|
5
|
+
import type { Logger } from 'winston';
|
|
6
|
+
import { NomosError } from '../core/errors.js';
|
|
7
|
+
import type { VectorRecord } from '../types/index.js';
|
|
8
|
+
|
|
9
|
+
const LIVE_TABLE = 'nomos_vectors';
|
|
10
|
+
const STAGING_TABLE = 'nomos_vectors_staging';
|
|
11
|
+
|
|
12
|
+
/**
|
|
13
|
+
* Explicit Arrow schema for VectorRecord rows.
|
|
14
|
+
* Required because LanceDB cannot infer types for nullable (null-valued) fields.
|
|
15
|
+
* vector_dims is parameterized — must match embedding_dimensions in config.
|
|
16
|
+
*/
|
|
17
|
+
function makeVectorSchema(vectorDims: number): Schema {
|
|
18
|
+
return new Schema([
|
|
19
|
+
new Field('id', new Utf8(), false),
|
|
20
|
+
new Field('type', new Utf8(), false),
|
|
21
|
+
new Field('vector', new FixedSizeList(vectorDims, new Field('item', new Float32(), true)), false),
|
|
22
|
+
new Field('file_path', new Utf8(), false),
|
|
23
|
+
new Field('module', new Utf8(), false),
|
|
24
|
+
new Field('purpose', new Utf8(), false),
|
|
25
|
+
new Field('symbol_name', new Utf8(), true), // nullable
|
|
26
|
+
new Field('symbol_type', new Utf8(), true), // nullable
|
|
27
|
+
new Field('line_start', new Int32(), true), // nullable
|
|
28
|
+
new Field('line_end', new Int32(), true), // nullable
|
|
29
|
+
new Field('parent_file_id', new Utf8(), true), // nullable
|
|
30
|
+
new Field('graph_depth', new Int32(), false),
|
|
31
|
+
new Field('dependents_count', new Int32(), false),
|
|
32
|
+
new Field('last_indexed', new Utf8(), false),
|
|
33
|
+
new Field('content_hash', new Utf8(), false),
|
|
34
|
+
]);
|
|
35
|
+
}
|
|
36
|
+
|
|
37
|
+
type PlainRow = Record<string, unknown>;
|
|
38
|
+
type RawQueryRow = PlainRow & { _distance: number };
|
|
39
|
+
|
|
40
|
+
export class VectorStore {
|
|
41
|
+
private db: Connection | null = null;
|
|
42
|
+
private hasMergeInsert: boolean = false; // [TRAP-1] detected at init
|
|
43
|
+
private vectorDims: number;
|
|
44
|
+
|
|
45
|
+
constructor(
|
|
46
|
+
private readonly storePath: string,
|
|
47
|
+
private readonly logger: Logger,
|
|
48
|
+
vectorDims: number = 768,
|
|
49
|
+
) {
|
|
50
|
+
this.vectorDims = vectorDims;
|
|
51
|
+
}
|
|
52
|
+
|
|
53
|
+
/**
|
|
54
|
+
* Initialize the DB connection. Detects mergeInsert capability. [GAP-2] wrapped in try-catch.
|
|
55
|
+
*/
|
|
56
|
+
async init(): Promise<void> {
|
|
57
|
+
try {
|
|
58
|
+
this.db = await lancedb.connect(this.storePath);
|
|
59
|
+
} catch (err) {
|
|
60
|
+
throw new NomosError(
|
|
61
|
+
'search_index_corrupted',
|
|
62
|
+
`Failed to open vector store at ${this.storePath}. ` +
|
|
63
|
+
`The index may be corrupted. Run: arc index --force\n` +
|
|
64
|
+
`Original error: ${err instanceof Error ? err.message : String(err)}`,
|
|
65
|
+
);
|
|
66
|
+
}
|
|
67
|
+
|
|
68
|
+
// [TRAP-1] Detect mergeInsert capability
|
|
69
|
+
try {
|
|
70
|
+
const names = await this.db.tableNames();
|
|
71
|
+
if (names.includes(LIVE_TABLE)) {
|
|
72
|
+
const table = await this.db.openTable(LIVE_TABLE);
|
|
73
|
+
this.hasMergeInsert = typeof table.mergeInsert === 'function';
|
|
74
|
+
}
|
|
75
|
+
} catch {
|
|
76
|
+
this.hasMergeInsert = false;
|
|
77
|
+
}
|
|
78
|
+
}
|
|
79
|
+
|
|
80
|
+
/**
|
|
81
|
+
* Upsert a batch of vector records into the LIVE table.
|
|
82
|
+
* Uses mergeInsert if available; otherwise overwrites by id.
|
|
83
|
+
* Called per-batch during incremental indexing [BLOCKER-4].
|
|
84
|
+
*/
|
|
85
|
+
async upsert(records: VectorRecord[]): Promise<void> {
|
|
86
|
+
this.assertConnected();
|
|
87
|
+
const schema = makeVectorSchema(this.vectorDims);
|
|
88
|
+
const names = await this.db!.tableNames();
|
|
89
|
+
|
|
90
|
+
if (!names.includes(LIVE_TABLE)) {
|
|
91
|
+
// Table doesn't exist yet — create with explicit schema
|
|
92
|
+
const arrowTable = makeArrowTable(this.toPlainRows(records), { schema });
|
|
93
|
+
await this.db!.createTable(LIVE_TABLE, arrowTable, { mode: 'overwrite' });
|
|
94
|
+
this.hasMergeInsert =
|
|
95
|
+
typeof (await this.db!.openTable(LIVE_TABLE)).mergeInsert === 'function';
|
|
96
|
+
return;
|
|
97
|
+
}
|
|
98
|
+
|
|
99
|
+
const table = await this.db!.openTable(LIVE_TABLE);
|
|
100
|
+
|
|
101
|
+
if (this.hasMergeInsert) {
|
|
102
|
+
const arrowTable = makeArrowTable(this.toPlainRows(records), { schema });
|
|
103
|
+
await table
|
|
104
|
+
.mergeInsert('id')
|
|
105
|
+
.whenMatchedUpdateAll()
|
|
106
|
+
.whenNotMatchedInsertAll()
|
|
107
|
+
.execute(arrowTable);
|
|
108
|
+
} else {
|
|
109
|
+
// [TRAP-1] Fallback: single-transaction overwrite — read existing, merge by id, overwrite.
|
|
110
|
+
// No delete-then-add: avoids a window where records are missing.
|
|
111
|
+
const existing = (await table.query().toArray()) as PlainRow[];
|
|
112
|
+
const merged = new Map(existing.map((r) => [r['id'] as string, r]));
|
|
113
|
+
for (const rec of this.toPlainRows(records)) {
|
|
114
|
+
merged.set(rec['id'] as string, rec);
|
|
115
|
+
}
|
|
116
|
+
const arrowTable = makeArrowTable(Array.from(merged.values()), { schema });
|
|
117
|
+
await this.db!.createTable(LIVE_TABLE, arrowTable, { mode: 'overwrite' });
|
|
118
|
+
}
|
|
119
|
+
}
|
|
120
|
+
|
|
121
|
+
/**
|
|
122
|
+
* Write records to the STAGING table for full re-index [BLOCKER-1].
|
|
123
|
+
* Does NOT touch the live table. Called per-batch.
|
|
124
|
+
*/
|
|
125
|
+
async upsertToStaging(records: VectorRecord[]): Promise<void> {
|
|
126
|
+
this.assertConnected();
|
|
127
|
+
const schema = makeVectorSchema(this.vectorDims);
|
|
128
|
+
const names = await this.db!.tableNames();
|
|
129
|
+
const arrowTable = makeArrowTable(this.toPlainRows(records), { schema });
|
|
130
|
+
|
|
131
|
+
if (!names.includes(STAGING_TABLE)) {
|
|
132
|
+
await this.db!.createTable(STAGING_TABLE, arrowTable, { mode: 'overwrite' });
|
|
133
|
+
} else {
|
|
134
|
+
const staging = await this.db!.openTable(STAGING_TABLE);
|
|
135
|
+
await staging.add(arrowTable);
|
|
136
|
+
}
|
|
137
|
+
}
|
|
138
|
+
|
|
139
|
+
/**
|
|
140
|
+
* Atomic table swap: drop live table, rename staging → live [BLOCKER-1].
|
|
141
|
+
* LanceDB lacks a native rename; workaround: read staging → create live → drop staging.
|
|
142
|
+
* If staging doesn't exist, throws immediately — live table is never touched.
|
|
143
|
+
*/
|
|
144
|
+
async promoteStagingToLive(): Promise<void> {
|
|
145
|
+
this.assertConnected();
|
|
146
|
+
const names = await this.db!.tableNames();
|
|
147
|
+
|
|
148
|
+
if (!names.includes(STAGING_TABLE)) {
|
|
149
|
+
throw new NomosError(
|
|
150
|
+
'search_index_failed',
|
|
151
|
+
'Staging table does not exist. Index may have failed before any data was written.',
|
|
152
|
+
);
|
|
153
|
+
}
|
|
154
|
+
|
|
155
|
+
// Drop old live table if it exists — committed to the swap from this point
|
|
156
|
+
if (names.includes(LIVE_TABLE)) {
|
|
157
|
+
await this.db!.dropTable(LIVE_TABLE);
|
|
158
|
+
}
|
|
159
|
+
|
|
160
|
+
// Read all staging data as an Arrow Table and re-create as the live table.
|
|
161
|
+
// Use toArrow() to preserve the exact Arrow schema from the staging table —
|
|
162
|
+
// avoids re-inferring types for Arrow-native vector fields.
|
|
163
|
+
const staging = await this.db!.openTable(STAGING_TABLE);
|
|
164
|
+
const stagingArrow = await staging.query().toArrow();
|
|
165
|
+
await this.db!.createTable(LIVE_TABLE, stagingArrow, { mode: 'overwrite' });
|
|
166
|
+
await this.db!.dropTable(STAGING_TABLE);
|
|
167
|
+
|
|
168
|
+
// Re-detect mergeInsert capability on new live table
|
|
169
|
+
try {
|
|
170
|
+
const live = await this.db!.openTable(LIVE_TABLE);
|
|
171
|
+
this.hasMergeInsert = typeof live.mergeInsert === 'function';
|
|
172
|
+
} catch {
|
|
173
|
+
this.hasMergeInsert = false;
|
|
174
|
+
}
|
|
175
|
+
}
|
|
176
|
+
|
|
177
|
+
/**
|
|
178
|
+
* Drop the staging table if it exists (cleanup after failed/interrupted index). [GAP-3]
|
|
179
|
+
*/
|
|
180
|
+
async cleanupStaging(): Promise<void> {
|
|
181
|
+
this.assertConnected();
|
|
182
|
+
try {
|
|
183
|
+
const names = await this.db!.tableNames();
|
|
184
|
+
if (names.includes(STAGING_TABLE)) {
|
|
185
|
+
await this.db!.dropTable(STAGING_TABLE);
|
|
186
|
+
this.logger.info('[nomos:search] Cleaned up orphaned staging table from prior run.');
|
|
187
|
+
}
|
|
188
|
+
} catch (err) {
|
|
189
|
+
// Non-fatal: log and continue
|
|
190
|
+
this.logger.warn(
|
|
191
|
+
`[nomos:search:warn] Failed to clean up staging table: ${err instanceof Error ? err.message : String(err)}`,
|
|
192
|
+
);
|
|
193
|
+
}
|
|
194
|
+
}
|
|
195
|
+
|
|
196
|
+
/**
|
|
197
|
+
* Query the LIVE table with a vector. Returns top-K results above threshold.
|
|
198
|
+
*
|
|
199
|
+
* Distance type: `dot` (mathematically equivalent to cosine for L2-normalized vectors,
|
|
200
|
+
* which is what Gemini embeddings produce). Cosine SIMD path requires AVX2 which is not
|
|
201
|
+
* universally available; dot product path is hardware-portable. [S-3]
|
|
202
|
+
*
|
|
203
|
+
* Similarity conversion: similarity = 1 - _distance, then clamped to [0, 1].
|
|
204
|
+
* Result records do NOT contain the raw vector field. [S-5]
|
|
205
|
+
*/
|
|
206
|
+
async query(
|
|
207
|
+
vector: Float32Array,
|
|
208
|
+
topK: number,
|
|
209
|
+
threshold: number,
|
|
210
|
+
): Promise<Array<Omit<VectorRecord, 'vector'> & { similarity_score: number }>> {
|
|
211
|
+
this.assertConnected();
|
|
212
|
+
const names = await this.db!.tableNames();
|
|
213
|
+
if (!names.includes(LIVE_TABLE)) {
|
|
214
|
+
throw new NomosError(
|
|
215
|
+
'search_index_not_found',
|
|
216
|
+
`Vector index not found at ${this.storePath}. Run: arc index`,
|
|
217
|
+
);
|
|
218
|
+
}
|
|
219
|
+
|
|
220
|
+
const table = await this.db!.openTable(LIVE_TABLE);
|
|
221
|
+
const raw = (await table
|
|
222
|
+
.query()
|
|
223
|
+
.nearestTo(Array.from(vector))
|
|
224
|
+
.distanceType('dot') // equivalent to cosine for normalized vectors; hardware-portable
|
|
225
|
+
.limit(topK * 2) // over-fetch to allow post-filter
|
|
226
|
+
.toArray()) as RawQueryRow[];
|
|
227
|
+
|
|
228
|
+
return raw
|
|
229
|
+
.map((r) => {
|
|
230
|
+
const similarity = 1 - r._distance;
|
|
231
|
+
// [S-3] Clamp similarity ∈ [0, 1]
|
|
232
|
+
const similarity_score = Math.max(0, Math.min(1, similarity));
|
|
233
|
+
|
|
234
|
+
// Strip _distance and vector from output [S-5]
|
|
235
|
+
const { _distance, vector: _vec, ...rest } = r as RawQueryRow & { vector?: unknown };
|
|
236
|
+
void _distance;
|
|
237
|
+
void _vec;
|
|
238
|
+
return { ...(rest as Omit<VectorRecord, 'vector'>), similarity_score };
|
|
239
|
+
})
|
|
240
|
+
.filter((r) => r.similarity_score >= threshold)
|
|
241
|
+
.slice(0, topK);
|
|
242
|
+
}
|
|
243
|
+
|
|
244
|
+
/**
|
|
245
|
+
* Delete all records whose file_path matches any of the given paths.
|
|
246
|
+
* Used for incremental re-indexing: delete stale → upsert fresh.
|
|
247
|
+
*/
|
|
248
|
+
async deleteByFilePaths(filePaths: string[]): Promise<void> {
|
|
249
|
+
this.assertConnected();
|
|
250
|
+
const names = await this.db!.tableNames();
|
|
251
|
+
if (!names.includes(LIVE_TABLE)) {
|
|
252
|
+
return; // Nothing to delete
|
|
253
|
+
}
|
|
254
|
+
|
|
255
|
+
if (filePaths.length === 0) return;
|
|
256
|
+
|
|
257
|
+
const table = await this.db!.openTable(LIVE_TABLE);
|
|
258
|
+
// Build SQL-safe IN clause — escape single quotes in paths
|
|
259
|
+
const escaped = filePaths.map((p) => `'${p.replace(/'/g, "''")}'`).join(', ');
|
|
260
|
+
await table.delete(`file_path IN (${escaped})`);
|
|
261
|
+
}
|
|
262
|
+
|
|
263
|
+
/**
|
|
264
|
+
* Return total record count in live table.
|
|
265
|
+
*/
|
|
266
|
+
async count(): Promise<number> {
|
|
267
|
+
this.assertConnected();
|
|
268
|
+
const names = await this.db!.tableNames();
|
|
269
|
+
if (!names.includes(LIVE_TABLE)) {
|
|
270
|
+
return 0;
|
|
271
|
+
}
|
|
272
|
+
const table = await this.db!.openTable(LIVE_TABLE);
|
|
273
|
+
return await table.countRows();
|
|
274
|
+
}
|
|
275
|
+
|
|
276
|
+
// ─── Private helpers ────────────────────────────────────────────────────────
|
|
277
|
+
|
|
278
|
+
private assertConnected(): void {
|
|
279
|
+
if (!this.db) {
|
|
280
|
+
throw new NomosError(
|
|
281
|
+
'search_index_corrupted',
|
|
282
|
+
'VectorStore.init() must be called before any operation.',
|
|
283
|
+
);
|
|
284
|
+
}
|
|
285
|
+
}
|
|
286
|
+
|
|
287
|
+
/**
|
|
288
|
+
* Convert VectorRecord[] to plain row objects for LanceDB.
|
|
289
|
+
* vector: Float32Array → number[] (LanceDB handles typed arrays but plain arrays are safer).
|
|
290
|
+
* Null fields preserved as-is — schema declares them nullable.
|
|
291
|
+
*/
|
|
292
|
+
private toPlainRows(records: VectorRecord[]): PlainRow[] {
|
|
293
|
+
return records.map((r) => ({
|
|
294
|
+
id: r.id,
|
|
295
|
+
type: r.type,
|
|
296
|
+
vector: Array.from(r.vector),
|
|
297
|
+
file_path: r.file_path,
|
|
298
|
+
module: r.module,
|
|
299
|
+
purpose: r.purpose,
|
|
300
|
+
symbol_name: r.symbol_name,
|
|
301
|
+
symbol_type: r.symbol_type,
|
|
302
|
+
line_start: r.line_start,
|
|
303
|
+
line_end: r.line_end,
|
|
304
|
+
parent_file_id: r.parent_file_id,
|
|
305
|
+
graph_depth: r.graph_depth,
|
|
306
|
+
dependents_count: r.dependents_count,
|
|
307
|
+
last_indexed: r.last_indexed,
|
|
308
|
+
content_hash: r.content_hash,
|
|
309
|
+
}));
|
|
310
|
+
}
|
|
311
|
+
}
|