@nomos-arc/arc 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (160) hide show
  1. package/.claude/settings.local.json +10 -0
  2. package/.nomos-config.json +5 -0
  3. package/CLAUDE.md +108 -0
  4. package/LICENSE +190 -0
  5. package/README.md +569 -0
  6. package/dist/cli.js +21120 -0
  7. package/docs/auth/googel_plan.yaml +1093 -0
  8. package/docs/auth/google_task.md +235 -0
  9. package/docs/auth/hardened_blueprint.yaml +1658 -0
  10. package/docs/auth/red_team_report.yaml +336 -0
  11. package/docs/auth/session_state.yaml +162 -0
  12. package/docs/certificate/cer_enhance_plan.md +605 -0
  13. package/docs/certificate/certificate_report.md +338 -0
  14. package/docs/dev_overview.md +419 -0
  15. package/docs/feature_assessment.md +156 -0
  16. package/docs/how_it_works.md +78 -0
  17. package/docs/infrastructure/map.md +867 -0
  18. package/docs/init/master_plan.md +3581 -0
  19. package/docs/init/red_team_report.md +215 -0
  20. package/docs/init/report_phase_1a.md +304 -0
  21. package/docs/integrity-gate/enhance_drift.md +703 -0
  22. package/docs/integrity-gate/overview.md +108 -0
  23. package/docs/management/manger-task.md +99 -0
  24. package/docs/management/scafffold.md +76 -0
  25. package/docs/map/ATOMIC_BLUEPRINT.md +1349 -0
  26. package/docs/map/RED_TEAM_REPORT.md +159 -0
  27. package/docs/map/map_task.md +147 -0
  28. package/docs/map/semantic_graph_task.md +792 -0
  29. package/docs/map/semantic_master_plan.md +705 -0
  30. package/docs/phase7/TEAM_RED.md +249 -0
  31. package/docs/phase7/plan.md +1682 -0
  32. package/docs/phase7/task.md +275 -0
  33. package/docs/prompts/USAGE.md +312 -0
  34. package/docs/prompts/architect.md +165 -0
  35. package/docs/prompts/executer.md +190 -0
  36. package/docs/prompts/hardener.md +190 -0
  37. package/docs/prompts/red_team.md +146 -0
  38. package/docs/verification/goveranance-overview.md +396 -0
  39. package/docs/verification/governance-overview.md +245 -0
  40. package/docs/verification/verification-arc-ar.md +560 -0
  41. package/docs/verification/verification-architecture.md +560 -0
  42. package/docs/very_next.md +52 -0
  43. package/docs/whitepaper.md +89 -0
  44. package/overview.md +1469 -0
  45. package/package.json +63 -0
  46. package/src/adapters/__tests__/git.test.ts +296 -0
  47. package/src/adapters/__tests__/stdio.test.ts +70 -0
  48. package/src/adapters/git.ts +226 -0
  49. package/src/adapters/pty.ts +159 -0
  50. package/src/adapters/stdio.ts +113 -0
  51. package/src/cli.ts +83 -0
  52. package/src/commands/apply.ts +47 -0
  53. package/src/commands/auth.ts +301 -0
  54. package/src/commands/certificate.ts +89 -0
  55. package/src/commands/discard.ts +24 -0
  56. package/src/commands/drift.ts +116 -0
  57. package/src/commands/index.ts +78 -0
  58. package/src/commands/init.ts +121 -0
  59. package/src/commands/list.ts +75 -0
  60. package/src/commands/map.ts +55 -0
  61. package/src/commands/plan.ts +30 -0
  62. package/src/commands/review.ts +58 -0
  63. package/src/commands/run.ts +63 -0
  64. package/src/commands/search.ts +147 -0
  65. package/src/commands/show.ts +63 -0
  66. package/src/commands/status.ts +59 -0
  67. package/src/core/__tests__/budget.test.ts +213 -0
  68. package/src/core/__tests__/certificate.test.ts +385 -0
  69. package/src/core/__tests__/config.test.ts +191 -0
  70. package/src/core/__tests__/preflight.test.ts +24 -0
  71. package/src/core/__tests__/prompt.test.ts +358 -0
  72. package/src/core/__tests__/review.test.ts +161 -0
  73. package/src/core/__tests__/state.test.ts +362 -0
  74. package/src/core/auth/__tests__/manager.test.ts +166 -0
  75. package/src/core/auth/__tests__/server.test.ts +220 -0
  76. package/src/core/auth/gcp-projects.ts +160 -0
  77. package/src/core/auth/manager.ts +114 -0
  78. package/src/core/auth/server.ts +141 -0
  79. package/src/core/budget.ts +119 -0
  80. package/src/core/certificate.ts +502 -0
  81. package/src/core/config.ts +212 -0
  82. package/src/core/errors.ts +54 -0
  83. package/src/core/factory.ts +49 -0
  84. package/src/core/graph/__tests__/builder.test.ts +272 -0
  85. package/src/core/graph/__tests__/contract-writer.test.ts +175 -0
  86. package/src/core/graph/__tests__/enricher.test.ts +299 -0
  87. package/src/core/graph/__tests__/parser.test.ts +200 -0
  88. package/src/core/graph/__tests__/pipeline.test.ts +202 -0
  89. package/src/core/graph/__tests__/renderer.test.ts +128 -0
  90. package/src/core/graph/__tests__/resolver.test.ts +185 -0
  91. package/src/core/graph/__tests__/scanner.test.ts +231 -0
  92. package/src/core/graph/__tests__/show.test.ts +134 -0
  93. package/src/core/graph/builder.ts +303 -0
  94. package/src/core/graph/constraints.ts +94 -0
  95. package/src/core/graph/contract-writer.ts +93 -0
  96. package/src/core/graph/drift/__tests__/classifier.test.ts +215 -0
  97. package/src/core/graph/drift/__tests__/comparator.test.ts +335 -0
  98. package/src/core/graph/drift/__tests__/drift.test.ts +453 -0
  99. package/src/core/graph/drift/__tests__/reporter.test.ts +203 -0
  100. package/src/core/graph/drift/classifier.ts +165 -0
  101. package/src/core/graph/drift/comparator.ts +205 -0
  102. package/src/core/graph/drift/reporter.ts +77 -0
  103. package/src/core/graph/enricher.ts +251 -0
  104. package/src/core/graph/grammar-paths.ts +30 -0
  105. package/src/core/graph/html-template.ts +493 -0
  106. package/src/core/graph/map-schema.ts +137 -0
  107. package/src/core/graph/parser.ts +336 -0
  108. package/src/core/graph/pipeline.ts +209 -0
  109. package/src/core/graph/renderer.ts +92 -0
  110. package/src/core/graph/resolver.ts +195 -0
  111. package/src/core/graph/scanner.ts +145 -0
  112. package/src/core/logger.ts +46 -0
  113. package/src/core/orchestrator.ts +792 -0
  114. package/src/core/plan-file-manager.ts +66 -0
  115. package/src/core/preflight.ts +64 -0
  116. package/src/core/prompt.ts +173 -0
  117. package/src/core/review.ts +95 -0
  118. package/src/core/state.ts +294 -0
  119. package/src/core/worktree-coordinator.ts +77 -0
  120. package/src/search/__tests__/chunk-extractor.test.ts +339 -0
  121. package/src/search/__tests__/embedder-auth.test.ts +124 -0
  122. package/src/search/__tests__/embedder.test.ts +267 -0
  123. package/src/search/__tests__/graph-enricher.test.ts +178 -0
  124. package/src/search/__tests__/indexer.test.ts +518 -0
  125. package/src/search/__tests__/integration.test.ts +649 -0
  126. package/src/search/__tests__/query-engine.test.ts +334 -0
  127. package/src/search/__tests__/similarity.test.ts +78 -0
  128. package/src/search/__tests__/vector-store.test.ts +281 -0
  129. package/src/search/chunk-extractor.ts +167 -0
  130. package/src/search/embedder.ts +209 -0
  131. package/src/search/graph-enricher.ts +95 -0
  132. package/src/search/indexer.ts +483 -0
  133. package/src/search/lexical-searcher.ts +190 -0
  134. package/src/search/query-engine.ts +225 -0
  135. package/src/search/vector-store.ts +311 -0
  136. package/src/types/index.ts +572 -0
  137. package/src/utils/__tests__/ansi.test.ts +54 -0
  138. package/src/utils/__tests__/frontmatter.test.ts +79 -0
  139. package/src/utils/__tests__/sanitize.test.ts +229 -0
  140. package/src/utils/ansi.ts +19 -0
  141. package/src/utils/context.ts +44 -0
  142. package/src/utils/frontmatter.ts +27 -0
  143. package/src/utils/sanitize.ts +78 -0
  144. package/test/e2e/lifecycle.test.ts +330 -0
  145. package/test/fixtures/mock-planner-hang.ts +5 -0
  146. package/test/fixtures/mock-planner.ts +26 -0
  147. package/test/fixtures/mock-reviewer-bad.ts +8 -0
  148. package/test/fixtures/mock-reviewer-retry.ts +34 -0
  149. package/test/fixtures/mock-reviewer.ts +18 -0
  150. package/test/fixtures/sample-project/src/circular-a.ts +6 -0
  151. package/test/fixtures/sample-project/src/circular-b.ts +6 -0
  152. package/test/fixtures/sample-project/src/config.ts +15 -0
  153. package/test/fixtures/sample-project/src/main.ts +19 -0
  154. package/test/fixtures/sample-project/src/services/product-service.ts +20 -0
  155. package/test/fixtures/sample-project/src/services/user-service.ts +18 -0
  156. package/test/fixtures/sample-project/src/types.ts +14 -0
  157. package/test/fixtures/sample-project/src/utils/index.ts +14 -0
  158. package/test/fixtures/sample-project/src/utils/validate.ts +12 -0
  159. package/tsconfig.json +20 -0
  160. package/vitest.config.ts +12 -0
@@ -0,0 +1,334 @@
1
+ import { describe, it, expect, vi, beforeEach } from 'vitest';
2
+ import { QueryEngine } from '../query-engine.js';
3
+ import { NomosError } from '../../core/errors.js';
4
+ import type { Logger } from 'winston';
5
+ import type { IndexMetadata, NomosConfig, SearchResult } from '../../types/index.js';
6
+
7
+ // ─── Mocks ────────────────────────────────────────────────────────────────────
8
+
9
+ // Hoist mock references so vi.mock factories can close over them
10
+ const {
11
+ mockEmbedderEmbedOne,
12
+ mockVectorStoreInit,
13
+ mockVectorStoreQuery,
14
+ mockGraphEnricherEnrich,
15
+ } = vi.hoisted(() => ({
16
+ mockEmbedderEmbedOne: vi.fn(),
17
+ mockVectorStoreInit: vi.fn(),
18
+ mockVectorStoreQuery: vi.fn(),
19
+ mockGraphEnricherEnrich: vi.fn(),
20
+ }));
21
+
22
+ vi.mock('../embedder.js', () => {
23
+ const EmbedderMock = Object.assign(
24
+ vi.fn().mockImplementation(function (this: Record<string, unknown>) {
25
+ this['embedOne'] = mockEmbedderEmbedOne;
26
+ }),
27
+ {
28
+ create: vi.fn().mockImplementation(async () => ({
29
+ embedOne: mockEmbedderEmbedOne,
30
+ })),
31
+ },
32
+ );
33
+ return { Embedder: EmbedderMock };
34
+ });
35
+
36
+ vi.mock('../vector-store.js', () => ({
37
+ VectorStore: vi.fn().mockImplementation(function (this: Record<string, unknown>) {
38
+ this['init'] = mockVectorStoreInit;
39
+ this['query'] = mockVectorStoreQuery;
40
+ }),
41
+ }));
42
+
43
+ vi.mock('../graph-enricher.js', () => ({
44
+ GraphEnricher: vi.fn().mockImplementation(function (this: Record<string, unknown>) {
45
+ this['enrich'] = mockGraphEnricherEnrich;
46
+ }),
47
+ }));
48
+
49
+ vi.mock('node:fs/promises', () => ({
50
+ default: { readFile: vi.fn() },
51
+ }));
52
+
53
+ import fs from 'node:fs/promises';
54
+ const mockReadFile = vi.mocked(fs.readFile);
55
+
56
+ // ─── Fixtures ─────────────────────────────────────────────────────────────────
57
+
58
+ function makeLogger(): Logger {
59
+ return {
60
+ info: vi.fn(),
61
+ warn: vi.fn(),
62
+ error: vi.fn(),
63
+ debug: vi.fn(),
64
+ } as unknown as Logger;
65
+ }
66
+
67
+ function makeConfig(overrides: Partial<NomosConfig['search']> = {}): NomosConfig {
68
+ return {
69
+ execution: {
70
+ default_mode: 'supervised',
71
+ shadow_branch_prefix: 'nomos/',
72
+ worktree_base: '.nomos/worktrees',
73
+ supervised_heartbeat_timeout_ms: 30_000,
74
+ },
75
+ binaries: {
76
+ planner: { cmd: 'claude', args: [], pty: true, total_timeout_ms: 60000, heartbeat_timeout_ms: 15000, max_output_bytes: 1048576, usage_pattern: null },
77
+ reviewer: { cmd: 'openai', args: [], pty: false, total_timeout_ms: 60000, heartbeat_timeout_ms: 15000, max_output_bytes: 1048576, usage_pattern: null },
78
+ },
79
+ convergence: { score_threshold: 8, max_iterations: 3 },
80
+ budget: { max_tokens_per_task: 100000, warn_at_percent: 80, cost_per_1k_tokens: {} },
81
+ security: { sanitize_patterns: [], entropy_threshold: 4.5, sanitize_on: [], safe_commands: [], redaction_label: '[REDACTED]' },
82
+ git: { auto_commit: false, include_logs: false, commit_prefix: 'nomos:', sign_commits: false },
83
+ review: { max_context_files: 5 },
84
+ graph: {
85
+ exclude_patterns: [],
86
+ ai_enrichment: false,
87
+ ai_model: 'claude-3-5-sonnet',
88
+ ai_concurrency: 2,
89
+ ai_requests_per_minute: 20,
90
+ max_file_chars: 10000,
91
+ core_modules_count: 10,
92
+ output_dir: '.nomos/graph',
93
+ },
94
+ logging: { level: 'info', retain_days: 7 },
95
+ search: {
96
+ embedding_model: 'gemini-embedding-001',
97
+ embedding_dimensions: 768,
98
+ vector_store_path: '.nomos/vectors',
99
+ default_top_k: 10,
100
+ default_threshold: 0.7,
101
+ batch_size: 50,
102
+ embedding_requests_per_minute: 60,
103
+ request_timeout_ms: 30_000,
104
+ ...overrides,
105
+ },
106
+ auth: {
107
+ credentials_path: '~/.nomos/credentials.json',
108
+ redirect_port: 3000,
109
+ },
110
+ };
111
+ }
112
+
113
+ function makeIndexMeta(overrides: Partial<IndexMetadata> = {}): IndexMetadata {
114
+ return {
115
+ status: 'complete',
116
+ last_full_index: '2026-04-01T00:00:00.000Z',
117
+ last_incremental_index: null,
118
+ total_files_indexed: 10,
119
+ total_symbols_indexed: 50,
120
+ total_chunks: 60,
121
+ embedding_model: 'gemini-embedding-001',
122
+ vector_dimensions: 768,
123
+ failed_files: [],
124
+ files: {},
125
+ ...overrides,
126
+ };
127
+ }
128
+
129
+ function makeSearchResult(overrides: Partial<SearchResult> = {}): SearchResult {
130
+ return {
131
+ id: 'src/foo.ts',
132
+ type: 'file',
133
+ file_path: 'src/foo.ts',
134
+ symbol_name: null,
135
+ symbol_type: null,
136
+ line_start: null,
137
+ line_end: null,
138
+ purpose: 'A module',
139
+ similarity_score: 0.9,
140
+ graph_depth: 2,
141
+ dependents_count: 3,
142
+ is_core_module: false,
143
+ is_stale: false,
144
+ ...overrides,
145
+ };
146
+ }
147
+
148
+ // ─── Tests ────────────────────────────────────────────────────────────────────
149
+
150
+ describe('QueryEngine', () => {
151
+ let logger: Logger;
152
+ let engine: QueryEngine;
153
+ const QUERY_VECTOR = new Float32Array(768).fill(0.1);
154
+
155
+ beforeEach(() => {
156
+ vi.clearAllMocks();
157
+ logger = makeLogger();
158
+ engine = new QueryEngine('/project', makeConfig(), logger);
159
+
160
+ // Default happy-path stubs
161
+ mockReadFile.mockResolvedValue(JSON.stringify(makeIndexMeta()) );
162
+ mockVectorStoreInit.mockResolvedValue(undefined);
163
+ mockEmbedderEmbedOne.mockResolvedValue(QUERY_VECTOR);
164
+ mockVectorStoreQuery.mockResolvedValue([]);
165
+ mockGraphEnricherEnrich.mockResolvedValue([]);
166
+ });
167
+
168
+ // ── Test 1: Happy path ─────────────────────────────────────────────────────
169
+
170
+ it('embeds query, queries store, enriches, and returns ranked results', async () => {
171
+ const result1 = makeSearchResult({ id: 'src/a.ts', file_path: 'src/a.ts', similarity_score: 0.85 });
172
+ const result2 = makeSearchResult({ id: 'src/b.ts', file_path: 'src/b.ts', similarity_score: 0.95 });
173
+ mockGraphEnricherEnrich.mockResolvedValueOnce([result1, result2]);
174
+
175
+ const results = await engine.search('find the auth module');
176
+
177
+ expect(mockEmbedderEmbedOne).toHaveBeenCalledWith('find the auth module');
178
+ expect(mockVectorStoreInit).toHaveBeenCalledOnce();
179
+ expect(mockVectorStoreQuery).toHaveBeenCalledWith(QUERY_VECTOR, 10, 0.7);
180
+ expect(mockGraphEnricherEnrich).toHaveBeenCalledOnce();
181
+
182
+ // Results must be sorted descending by similarity_score
183
+ expect(results[0]!.similarity_score).toBeGreaterThanOrEqual(results[1]!.similarity_score);
184
+ });
185
+
186
+ // ── Test 2: No index AND no project map ─────────────────────────────────
187
+
188
+ it('throws NomosError(search_index_not_found) when neither index nor project map exists', async () => {
189
+ // Both readFile calls fail: index-meta.json then project_map.json
190
+ mockReadFile
191
+ .mockRejectedValueOnce(new Error('ENOENT')) // index-meta.json
192
+ .mockRejectedValueOnce(new Error('ENOENT')); // project_map.json
193
+
194
+ await expect(engine.search('something')).rejects.toMatchObject({
195
+ code: 'search_index_not_found',
196
+ });
197
+ });
198
+
199
+ it('returns lexical results when no vector index exists but project map is available', async () => {
200
+ const projectMap = {
201
+ schema_version: 1, generated_at: '2026-04-01T00:00:00.000Z', root: '/project',
202
+ files: {
203
+ 'src/auth.ts': {
204
+ file: 'src/auth.ts', hash: 'h1', language: 'typescript',
205
+ symbols: [{ name: 'login', kind: 'function', line: 1, end_line: 10, signature: 'login()', exported: true }],
206
+ imports: [], dependents: [], dependencies: [], depth: 0,
207
+ last_parsed_at: null, semantic: null, enrichment_status: 'structural',
208
+ },
209
+ },
210
+ stats: { total_files: 1, total_symbols: 1, total_edges: 0, core_modules: [], structural_only: 1, semantically_enriched: 0, indexed: 0 },
211
+ };
212
+ mockReadFile
213
+ .mockRejectedValueOnce(new Error('ENOENT')) // index-meta.json — not found
214
+ .mockResolvedValueOnce(JSON.stringify(projectMap)); // project_map.json
215
+
216
+ const results = await engine.search('login');
217
+ // Should get lexical results without error
218
+ expect(results.length).toBeGreaterThanOrEqual(0);
219
+ });
220
+
221
+ // ── Test 3: Empty query ────────────────────────────────────────────────────
222
+
223
+ it('throws NomosError(search_query_failed) on empty query string', async () => {
224
+ await expect(engine.search('')).rejects.toMatchObject({
225
+ code: 'search_query_failed',
226
+ });
227
+ await expect(engine.search(' ')).rejects.toMatchObject({
228
+ code: 'search_query_failed',
229
+ });
230
+ });
231
+
232
+ // ── Test 4: topK and threshold overrides ──────────────────────────────────
233
+
234
+ it('respects topK and threshold overrides', async () => {
235
+ await engine.search('test query', { topK: 5, threshold: 0.85 });
236
+ expect(mockVectorStoreQuery).toHaveBeenCalledWith(QUERY_VECTOR, 5, 0.85);
237
+ });
238
+
239
+ // ── Test 5: Stale index warning ───────────────────────────────────────────
240
+
241
+ it('logs stale index warning when index is older than project map', async () => {
242
+ const meta = makeIndexMeta({ last_full_index: '2026-01-01T00:00:00.000Z' });
243
+ const projectMap = {
244
+ schema_version: 1, generated_at: '2026-04-01T00:00:00.000Z', root: '/project',
245
+ files: {}, stats: { total_files: 0, total_symbols: 0, total_edges: 0, core_modules: [], structural_only: 0, semantically_enriched: 0, indexed: 0 },
246
+ };
247
+
248
+ // Call order: index-meta.json → project_map.json → project_map.json (stale check)
249
+ mockReadFile
250
+ .mockResolvedValueOnce(JSON.stringify(meta))
251
+ .mockResolvedValueOnce(JSON.stringify(projectMap))
252
+ .mockResolvedValueOnce(JSON.stringify(projectMap));
253
+
254
+ await engine.search('find module');
255
+
256
+ expect(logger.warn).toHaveBeenCalledWith(
257
+ expect.stringContaining('Index is older than project map'),
258
+ );
259
+ });
260
+
261
+ // ── Test 6: De-duplication — symbol within 0.05, file removed [TRAP-3] ───
262
+
263
+ it('removes file-level result when symbol result is within 0.05 [TRAP-3]', async () => {
264
+ const fileResult = makeSearchResult({
265
+ id: 'src/auth.ts',
266
+ type: 'file',
267
+ file_path: 'src/auth.ts',
268
+ similarity_score: 0.90,
269
+ });
270
+ const symbolResult = makeSearchResult({
271
+ id: 'src/auth.ts::login',
272
+ type: 'symbol',
273
+ file_path: 'src/auth.ts',
274
+ symbol_name: 'login',
275
+ similarity_score: 0.88, // gap = 0.02 ≤ 0.05 → file result must be removed
276
+ });
277
+
278
+ mockGraphEnricherEnrich.mockResolvedValueOnce([fileResult, symbolResult]);
279
+
280
+ const results = await engine.search('login function');
281
+
282
+ const resultIds = results.map((r) => r.id);
283
+ expect(resultIds).not.toContain('src/auth.ts'); // file-level removed
284
+ expect(resultIds).toContain('src/auth.ts::login'); // symbol kept
285
+ });
286
+
287
+ // ── Test 7: De-duplication — gap > 0.05, both kept [TRAP-3] ─────────────
288
+
289
+ it('keeps both file and symbol results when gap > 0.05 [TRAP-3]', async () => {
290
+ const fileResult = makeSearchResult({
291
+ id: 'src/auth.ts',
292
+ type: 'file',
293
+ file_path: 'src/auth.ts',
294
+ similarity_score: 0.95,
295
+ });
296
+ const symbolResult = makeSearchResult({
297
+ id: 'src/auth.ts::logout',
298
+ type: 'symbol',
299
+ file_path: 'src/auth.ts',
300
+ symbol_name: 'logout',
301
+ similarity_score: 0.80, // gap = 0.15 > 0.05 → both kept
302
+ });
303
+
304
+ mockGraphEnricherEnrich.mockResolvedValueOnce([fileResult, symbolResult]);
305
+
306
+ const results = await engine.search('auth module');
307
+
308
+ const resultIds = results.map((r) => r.id);
309
+ expect(resultIds).toContain('src/auth.ts');
310
+ expect(resultIds).toContain('src/auth.ts::logout');
311
+ });
312
+
313
+ // ── Test 8: VectorStore.init() failure → falls back to lexical [GAP-2] ───
314
+
315
+ it('falls back to lexical search when VectorStore.init() fails [GAP-2]', async () => {
316
+ const projectMap = {
317
+ schema_version: 1, generated_at: '2026-04-01T00:00:00.000Z', root: '/project',
318
+ files: {}, stats: { total_files: 0, total_symbols: 0, total_edges: 0, core_modules: [], structural_only: 0, semantically_enriched: 0, indexed: 0 },
319
+ };
320
+ mockReadFile
321
+ .mockResolvedValueOnce(JSON.stringify(makeIndexMeta())) // index-meta.json
322
+ .mockResolvedValueOnce(JSON.stringify(projectMap)); // project_map.json
323
+
324
+ mockVectorStoreInit.mockRejectedValueOnce(new Error('LanceDB corrupted'));
325
+
326
+ const results = await engine.search('anything');
327
+
328
+ // Should not throw — falls back to lexical
329
+ expect(results).toEqual([]);
330
+ expect(logger.warn).toHaveBeenCalledWith(
331
+ expect.stringContaining('Vector search failed, falling back to lexical search'),
332
+ );
333
+ });
334
+ });
@@ -0,0 +1,78 @@
1
+ /**
2
+ * Task 7.7.3 — Verify cosine similarity correctness.
3
+ *
4
+ * Guards: all tests require GEMINI_API_KEY. Skipped in CI without key.
5
+ */
6
+
7
+ import { describe, it, expect } from 'vitest';
8
+ import type { Logger } from 'winston';
9
+ import { vi } from 'vitest';
10
+ import { Embedder } from '../embedder.js';
11
+ import type { NomosConfig } from '../../types/index.js';
12
+
13
+ function makeLogger(): Logger {
14
+ return {
15
+ info: vi.fn(),
16
+ warn: vi.fn(),
17
+ error: vi.fn(),
18
+ debug: vi.fn(),
19
+ } as unknown as Logger;
20
+ }
21
+
22
+ /** Pure cosine similarity between two Float32Arrays. */
23
+ function cosineSimilarity(a: Float32Array, b: Float32Array): number {
24
+ let dot = 0;
25
+ let normA = 0;
26
+ let normB = 0;
27
+ for (let i = 0; i < a.length; i++) {
28
+ dot += a[i]! * b[i]!;
29
+ normA += a[i]! * a[i]!;
30
+ normB += b[i]! * b[i]!;
31
+ }
32
+ return dot / (Math.sqrt(normA) * Math.sqrt(normB));
33
+ }
34
+
35
+ const EMBEDDING_DIMS = 768;
36
+
37
+ function makeSearchConfig(): NomosConfig['search'] {
38
+ return {
39
+ embedding_model: 'gemini-embedding-001',
40
+ embedding_dimensions: EMBEDDING_DIMS,
41
+ vector_store_path: '/tmp/nomos-similarity-test',
42
+ default_top_k: 10,
43
+ default_threshold: 0.7,
44
+ batch_size: 5,
45
+ embedding_requests_per_minute: 60,
46
+ request_timeout_ms: 30_000,
47
+ };
48
+ }
49
+
50
+ describe.skipIf(!process.env['GEMINI_API_KEY'])(
51
+ 'cosine similarity correctness (requires GEMINI_API_KEY)',
52
+ () => {
53
+ it('similar texts produce similarity > 0.8', async () => {
54
+ const embedder = new Embedder(makeSearchConfig(), makeLogger());
55
+ const v1 = await embedder.embedOne('process payment refund');
56
+ const v2 = await embedder.embedOne('handle refund for customer payment');
57
+ const similarity = cosineSimilarity(v1, v2);
58
+ expect(similarity).toBeGreaterThan(0.8);
59
+ }, 30_000);
60
+
61
+ it('unrelated texts produce similarity < 0.5', async () => {
62
+ const embedder = new Embedder(makeSearchConfig(), makeLogger());
63
+ const v1 = await embedder.embedOne('process payment refund');
64
+ const v2 = await embedder.embedOne('configure webpack build optimization');
65
+ const similarity = cosineSimilarity(v1, v2);
66
+ expect(similarity).toBeLessThan(0.5);
67
+ }, 30_000);
68
+
69
+ it('similarity is always in [0, 1]', async () => {
70
+ const embedder = new Embedder(makeSearchConfig(), makeLogger());
71
+ const v1 = await embedder.embedOne('any arbitrary text');
72
+ const v2 = await embedder.embedOne('completely different content');
73
+ const similarity = cosineSimilarity(v1, v2);
74
+ expect(similarity).toBeGreaterThanOrEqual(0);
75
+ expect(similarity).toBeLessThanOrEqual(1);
76
+ }, 30_000);
77
+ },
78
+ );
@@ -0,0 +1,281 @@
1
+ import { describe, it, expect, beforeEach, afterEach } from 'vitest';
2
+ import * as os from 'node:os';
3
+ import * as fs from 'node:fs/promises';
4
+ import * as path from 'node:path';
5
+ import { VectorStore } from '../vector-store.js';
6
+ import { NomosError } from '../../core/errors.js';
7
+ import type { VectorRecord } from '../../types/index.js';
8
+ import type { Logger } from 'winston';
9
+
10
+ // ── Fixtures ──────────────────────────────────────────────────────────────────
11
+
12
+ const VECTOR_DIMS = 3;
13
+
14
+ function makeLogger(): Logger {
15
+ return {
16
+ info: () => undefined,
17
+ warn: () => undefined,
18
+ error: () => undefined,
19
+ debug: () => undefined,
20
+ } as unknown as Logger;
21
+ }
22
+
23
+ let recordCounter = 0;
24
+
25
+ function makeRecord(overrides: Partial<VectorRecord> = {}): VectorRecord {
26
+ recordCounter++;
27
+ const base = new Float32Array(VECTOR_DIMS);
28
+ for (let i = 0; i < VECTOR_DIMS; i++) {
29
+ base[i] = (0.1 * recordCounter * (i + 1)) % 1.0;
30
+ }
31
+ return {
32
+ id: `rec-${recordCounter}`,
33
+ type: 'file',
34
+ vector: base,
35
+ file_path: `src/foo${recordCounter}.ts`,
36
+ module: 'src',
37
+ purpose: `Purpose ${recordCounter}`,
38
+ symbol_name: null,
39
+ symbol_type: null,
40
+ line_start: null,
41
+ line_end: null,
42
+ parent_file_id: null,
43
+ graph_depth: 1,
44
+ dependents_count: 0,
45
+ last_indexed: new Date().toISOString(),
46
+ content_hash: `hash${recordCounter}`,
47
+ ...overrides,
48
+ };
49
+ }
50
+
51
+ /** Return an L2-normalized unit vector of given values. */
52
+ function unitVec(values: number[]): Float32Array {
53
+ const len = Math.sqrt(values.reduce((s, v) => s + v * v, 0));
54
+ return new Float32Array(values.map((v) => v / len));
55
+ }
56
+
57
+ // ── Test setup ────────────────────────────────────────────────────────────────
58
+
59
+ let tmpDir: string;
60
+ let store: VectorStore;
61
+
62
+ beforeEach(async () => {
63
+ recordCounter = 0;
64
+ tmpDir = await fs.mkdtemp(path.join(os.tmpdir(), 'nomos-vs-test-'));
65
+ store = new VectorStore(tmpDir, makeLogger(), VECTOR_DIMS);
66
+ });
67
+
68
+ afterEach(async () => {
69
+ await fs.rm(tmpDir, { recursive: true, force: true });
70
+ });
71
+
72
+ // ── Tests ─────────────────────────────────────────────────────────────────────
73
+
74
+ describe('VectorStore', () => {
75
+ // Test 1: init() creates the DB directory if it does not exist
76
+ it('init() creates the DB directory if it does not exist', async () => {
77
+ const nonExistentDir = path.join(tmpDir, 'new-subdir', 'vector_index');
78
+ const freshStore = new VectorStore(nonExistentDir, makeLogger(), VECTOR_DIMS);
79
+ await expect(freshStore.init()).resolves.toBeUndefined();
80
+ const stat = await fs.stat(nonExistentDir).catch(() => null);
81
+ expect(stat).not.toBeNull();
82
+ });
83
+
84
+ // Test 2: init() wraps connection failure in NomosError('search_index_corrupted')
85
+ // Trigger: create a read-only parent so LanceDB cannot create the database directory.
86
+ it('init() wraps connection failure in NomosError(search_index_corrupted)', async () => {
87
+ const roParent = path.join(tmpDir, 'readonly-parent');
88
+ await fs.mkdir(roParent, { mode: 0o555 }); // read + execute, no write
89
+ const blockedPath = path.join(roParent, 'vector_index');
90
+ const brokenStore = new VectorStore(blockedPath, makeLogger(), VECTOR_DIMS);
91
+
92
+ await expect(brokenStore.init()).rejects.toSatisfy(
93
+ (e: unknown) =>
94
+ e instanceof NomosError && (e as NomosError).code === 'search_index_corrupted',
95
+ );
96
+ });
97
+
98
+ // Test 3: upsert() inserts records; count() returns correct total
99
+ it('upsert() inserts records and count() returns the correct total', async () => {
100
+ await store.init();
101
+ const records = [makeRecord(), makeRecord(), makeRecord()];
102
+ await store.upsert(records);
103
+ expect(await store.count()).toBe(3);
104
+ });
105
+
106
+ // Test 4: upsert() with duplicate id overwrites the existing record
107
+ it('upsert() with duplicate id overwrites the existing record', async () => {
108
+ await store.init();
109
+ const original = makeRecord({ id: 'dup-id', purpose: 'original' });
110
+ await store.upsert([original]);
111
+ expect(await store.count()).toBe(1);
112
+
113
+ const updated = makeRecord({ id: 'dup-id', purpose: 'updated' });
114
+ await store.upsert([updated]);
115
+ expect(await store.count()).toBe(1); // still 1, not 2
116
+ });
117
+
118
+ // Test 5: query() returns results ranked by similarity (closest first)
119
+ // Uses unit vectors so dot distance ≡ cosine distance.
120
+ it('query() returns results ranked by similarity, closest first', async () => {
121
+ await store.init();
122
+ const recA = makeRecord({ id: 'a', vector: unitVec([1, 0, 0]), file_path: 'src/a.ts' });
123
+ const recB = makeRecord({ id: 'b', vector: unitVec([0, 1, 0]), file_path: 'src/b.ts' });
124
+ const recC = makeRecord({ id: 'c', vector: unitVec([0.9, 0.1, 0]), file_path: 'src/c.ts' });
125
+ await store.upsert([recA, recB, recC]);
126
+
127
+ const queryVec = unitVec([1, 0, 0]); // identical to recA
128
+ const results = await store.query(queryVec, 3, 0);
129
+
130
+ expect(results.length).toBeGreaterThan(0);
131
+ expect(results[0].id).toBe('a'); // closest match first
132
+ // Scores must be non-increasing
133
+ for (let i = 1; i < results.length; i++) {
134
+ expect(results[i - 1].similarity_score).toBeGreaterThanOrEqual(results[i].similarity_score);
135
+ }
136
+ });
137
+
138
+ // Test 6: query() with threshold filters out low-similarity results
139
+ it('query() with threshold filters out low-similarity results', async () => {
140
+ await store.init();
141
+ const recA = makeRecord({ id: 'a', vector: unitVec([1, 0, 0]), file_path: 'src/a.ts' });
142
+ const recB = makeRecord({ id: 'b', vector: unitVec([0, 0, 1]), file_path: 'src/b.ts' }); // orthogonal
143
+ await store.upsert([recA, recB]);
144
+
145
+ const queryVec = unitVec([1, 0, 0]);
146
+ const results = await store.query(queryVec, 5, 0.9); // high threshold
147
+
148
+ expect(results.every((r) => r.similarity_score >= 0.9)).toBe(true);
149
+ });
150
+
151
+ // Test 7: query() returns similarity_score ∈ [0, 1] [S-3]
152
+ it('query() similarity_score is always in [0, 1]', async () => {
153
+ await store.init();
154
+ const records = Array.from({ length: 5 }, (_, i) =>
155
+ makeRecord({
156
+ id: `r${i}`,
157
+ vector: unitVec([i + 1, i, 0.5]),
158
+ file_path: `src/r${i}.ts`,
159
+ }),
160
+ );
161
+ await store.upsert(records);
162
+
163
+ const results = await store.query(unitVec([1, 1, 1]), 10, 0);
164
+ expect(results.length).toBeGreaterThan(0);
165
+ for (const r of results) {
166
+ expect(r.similarity_score).toBeGreaterThanOrEqual(0);
167
+ expect(r.similarity_score).toBeLessThanOrEqual(1);
168
+ }
169
+ });
170
+
171
+ // Test 8: query() results do NOT contain vector field [S-5]
172
+ it('query() results do not contain the vector field', async () => {
173
+ await store.init();
174
+ const rec = makeRecord({
175
+ id: 'v-check',
176
+ vector: unitVec([1, 2, 3]),
177
+ file_path: 'src/v.ts',
178
+ });
179
+ await store.upsert([rec]);
180
+
181
+ const results = await store.query(unitVec([1, 2, 3]), 5, 0);
182
+ expect(results.length).toBeGreaterThan(0);
183
+ for (const r of results) {
184
+ expect(Object.prototype.hasOwnProperty.call(r, 'vector')).toBe(false);
185
+ // Also verify JSON serialization does not leak vector data
186
+ const json = JSON.parse(JSON.stringify(r)) as Record<string, unknown>;
187
+ expect(json).not.toHaveProperty('vector');
188
+ }
189
+ });
190
+
191
+ // Test 9: deleteByFilePaths() removes only matching records
192
+ it('deleteByFilePaths() removes only records matching given file paths', async () => {
193
+ await store.init();
194
+ const recA = makeRecord({ id: 'da', file_path: 'src/delete-me.ts' });
195
+ const recB = makeRecord({ id: 'db', file_path: 'src/keep-me.ts' });
196
+ const recC = makeRecord({ id: 'dc', file_path: 'src/delete-me.ts' }); // same path
197
+ await store.upsert([recA, recB, recC]);
198
+ expect(await store.count()).toBe(3);
199
+
200
+ await store.deleteByFilePaths(['src/delete-me.ts']);
201
+ expect(await store.count()).toBe(1);
202
+ });
203
+
204
+ // Test 10: Table-swap full cycle [BLOCKER-1]
205
+ describe('table-swap full cycle (BLOCKER-1)', () => {
206
+ it('upsertToStaging does not affect live; promoteStagingToLive swaps', async () => {
207
+ await store.init();
208
+
209
+ // Seed live table with "old" data
210
+ const oldRec = makeRecord({ id: 'old', purpose: 'old data', file_path: 'src/old.ts' });
211
+ await store.upsert([oldRec]);
212
+ expect(await store.count()).toBe(1);
213
+
214
+ // Write new data to staging — does NOT touch live
215
+ const newRec = makeRecord({ id: 'new', purpose: 'new data', file_path: 'src/new.ts' });
216
+ await store.upsertToStaging([newRec]);
217
+
218
+ // Live still has old data
219
+ expect(await store.count()).toBe(1);
220
+
221
+ // Promote staging → live
222
+ await store.promoteStagingToLive();
223
+
224
+ // After promotion, live contains new data only
225
+ expect(await store.count()).toBe(1);
226
+ });
227
+
228
+ it('promoteStagingToLive() throws search_index_failed if no staging table', async () => {
229
+ await store.init();
230
+ await expect(store.promoteStagingToLive()).rejects.toSatisfy(
231
+ (e: unknown) =>
232
+ e instanceof NomosError && (e as NomosError).code === 'search_index_failed',
233
+ );
234
+ });
235
+ });
236
+
237
+ // Test 11: cleanupStaging() removes orphaned staging without affecting live [GAP-3]
238
+ it('cleanupStaging() removes orphaned staging table without affecting live', async () => {
239
+ await store.init();
240
+
241
+ // Seed live table
242
+ const liveRec = makeRecord({ file_path: 'src/live.ts' });
243
+ await store.upsert([liveRec]);
244
+
245
+ // Create orphaned staging table
246
+ const stagingRec = makeRecord({ file_path: 'src/staging.ts' });
247
+ await store.upsertToStaging([stagingRec]);
248
+
249
+ await store.cleanupStaging();
250
+ expect(await store.count()).toBe(1); // live untouched
251
+
252
+ // Idempotent: calling again with no staging table is a no-op
253
+ await expect(store.cleanupStaging()).resolves.toBeUndefined();
254
+ });
255
+
256
+ // Test 12: Concurrent upsert() calls do not corrupt the store
257
+ // Pre-seed the table so concurrent upserts all use the mergeInsert path (not the
258
+ // initial-creation path which serializes on overwrite mode by design).
259
+ it('concurrent upsert() calls do not corrupt the store', async () => {
260
+ await store.init();
261
+
262
+ // Seed the table first so the live table exists before concurrent ops begin
263
+ const seed = makeRecord({ id: 'seed', file_path: 'src/seed.ts' });
264
+ await store.upsert([seed]);
265
+
266
+ const batches = Array.from({ length: 4 }, (_, i) =>
267
+ [0, 1, 2].map((j) =>
268
+ makeRecord({
269
+ id: `concurrent-${i}-${j}`,
270
+ file_path: `src/concurrent-${i}-${j}.ts`,
271
+ }),
272
+ ),
273
+ );
274
+
275
+ await Promise.all(batches.map((batch) => store.upsert(batch)));
276
+
277
+ // All 12 new records + 1 seed = 13, but seed has a unique id so no conflict
278
+ // With mergeInsert: all upserts are independent non-overlapping ids → 13 total
279
+ expect(await store.count()).toBe(13);
280
+ });
281
+ });