@nomos-arc/arc 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (160) hide show
  1. package/.claude/settings.local.json +10 -0
  2. package/.nomos-config.json +5 -0
  3. package/CLAUDE.md +108 -0
  4. package/LICENSE +190 -0
  5. package/README.md +569 -0
  6. package/dist/cli.js +21120 -0
  7. package/docs/auth/googel_plan.yaml +1093 -0
  8. package/docs/auth/google_task.md +235 -0
  9. package/docs/auth/hardened_blueprint.yaml +1658 -0
  10. package/docs/auth/red_team_report.yaml +336 -0
  11. package/docs/auth/session_state.yaml +162 -0
  12. package/docs/certificate/cer_enhance_plan.md +605 -0
  13. package/docs/certificate/certificate_report.md +338 -0
  14. package/docs/dev_overview.md +419 -0
  15. package/docs/feature_assessment.md +156 -0
  16. package/docs/how_it_works.md +78 -0
  17. package/docs/infrastructure/map.md +867 -0
  18. package/docs/init/master_plan.md +3581 -0
  19. package/docs/init/red_team_report.md +215 -0
  20. package/docs/init/report_phase_1a.md +304 -0
  21. package/docs/integrity-gate/enhance_drift.md +703 -0
  22. package/docs/integrity-gate/overview.md +108 -0
  23. package/docs/management/manger-task.md +99 -0
  24. package/docs/management/scafffold.md +76 -0
  25. package/docs/map/ATOMIC_BLUEPRINT.md +1349 -0
  26. package/docs/map/RED_TEAM_REPORT.md +159 -0
  27. package/docs/map/map_task.md +147 -0
  28. package/docs/map/semantic_graph_task.md +792 -0
  29. package/docs/map/semantic_master_plan.md +705 -0
  30. package/docs/phase7/TEAM_RED.md +249 -0
  31. package/docs/phase7/plan.md +1682 -0
  32. package/docs/phase7/task.md +275 -0
  33. package/docs/prompts/USAGE.md +312 -0
  34. package/docs/prompts/architect.md +165 -0
  35. package/docs/prompts/executer.md +190 -0
  36. package/docs/prompts/hardener.md +190 -0
  37. package/docs/prompts/red_team.md +146 -0
  38. package/docs/verification/goveranance-overview.md +396 -0
  39. package/docs/verification/governance-overview.md +245 -0
  40. package/docs/verification/verification-arc-ar.md +560 -0
  41. package/docs/verification/verification-architecture.md +560 -0
  42. package/docs/very_next.md +52 -0
  43. package/docs/whitepaper.md +89 -0
  44. package/overview.md +1469 -0
  45. package/package.json +63 -0
  46. package/src/adapters/__tests__/git.test.ts +296 -0
  47. package/src/adapters/__tests__/stdio.test.ts +70 -0
  48. package/src/adapters/git.ts +226 -0
  49. package/src/adapters/pty.ts +159 -0
  50. package/src/adapters/stdio.ts +113 -0
  51. package/src/cli.ts +83 -0
  52. package/src/commands/apply.ts +47 -0
  53. package/src/commands/auth.ts +301 -0
  54. package/src/commands/certificate.ts +89 -0
  55. package/src/commands/discard.ts +24 -0
  56. package/src/commands/drift.ts +116 -0
  57. package/src/commands/index.ts +78 -0
  58. package/src/commands/init.ts +121 -0
  59. package/src/commands/list.ts +75 -0
  60. package/src/commands/map.ts +55 -0
  61. package/src/commands/plan.ts +30 -0
  62. package/src/commands/review.ts +58 -0
  63. package/src/commands/run.ts +63 -0
  64. package/src/commands/search.ts +147 -0
  65. package/src/commands/show.ts +63 -0
  66. package/src/commands/status.ts +59 -0
  67. package/src/core/__tests__/budget.test.ts +213 -0
  68. package/src/core/__tests__/certificate.test.ts +385 -0
  69. package/src/core/__tests__/config.test.ts +191 -0
  70. package/src/core/__tests__/preflight.test.ts +24 -0
  71. package/src/core/__tests__/prompt.test.ts +358 -0
  72. package/src/core/__tests__/review.test.ts +161 -0
  73. package/src/core/__tests__/state.test.ts +362 -0
  74. package/src/core/auth/__tests__/manager.test.ts +166 -0
  75. package/src/core/auth/__tests__/server.test.ts +220 -0
  76. package/src/core/auth/gcp-projects.ts +160 -0
  77. package/src/core/auth/manager.ts +114 -0
  78. package/src/core/auth/server.ts +141 -0
  79. package/src/core/budget.ts +119 -0
  80. package/src/core/certificate.ts +502 -0
  81. package/src/core/config.ts +212 -0
  82. package/src/core/errors.ts +54 -0
  83. package/src/core/factory.ts +49 -0
  84. package/src/core/graph/__tests__/builder.test.ts +272 -0
  85. package/src/core/graph/__tests__/contract-writer.test.ts +175 -0
  86. package/src/core/graph/__tests__/enricher.test.ts +299 -0
  87. package/src/core/graph/__tests__/parser.test.ts +200 -0
  88. package/src/core/graph/__tests__/pipeline.test.ts +202 -0
  89. package/src/core/graph/__tests__/renderer.test.ts +128 -0
  90. package/src/core/graph/__tests__/resolver.test.ts +185 -0
  91. package/src/core/graph/__tests__/scanner.test.ts +231 -0
  92. package/src/core/graph/__tests__/show.test.ts +134 -0
  93. package/src/core/graph/builder.ts +303 -0
  94. package/src/core/graph/constraints.ts +94 -0
  95. package/src/core/graph/contract-writer.ts +93 -0
  96. package/src/core/graph/drift/__tests__/classifier.test.ts +215 -0
  97. package/src/core/graph/drift/__tests__/comparator.test.ts +335 -0
  98. package/src/core/graph/drift/__tests__/drift.test.ts +453 -0
  99. package/src/core/graph/drift/__tests__/reporter.test.ts +203 -0
  100. package/src/core/graph/drift/classifier.ts +165 -0
  101. package/src/core/graph/drift/comparator.ts +205 -0
  102. package/src/core/graph/drift/reporter.ts +77 -0
  103. package/src/core/graph/enricher.ts +251 -0
  104. package/src/core/graph/grammar-paths.ts +30 -0
  105. package/src/core/graph/html-template.ts +493 -0
  106. package/src/core/graph/map-schema.ts +137 -0
  107. package/src/core/graph/parser.ts +336 -0
  108. package/src/core/graph/pipeline.ts +209 -0
  109. package/src/core/graph/renderer.ts +92 -0
  110. package/src/core/graph/resolver.ts +195 -0
  111. package/src/core/graph/scanner.ts +145 -0
  112. package/src/core/logger.ts +46 -0
  113. package/src/core/orchestrator.ts +792 -0
  114. package/src/core/plan-file-manager.ts +66 -0
  115. package/src/core/preflight.ts +64 -0
  116. package/src/core/prompt.ts +173 -0
  117. package/src/core/review.ts +95 -0
  118. package/src/core/state.ts +294 -0
  119. package/src/core/worktree-coordinator.ts +77 -0
  120. package/src/search/__tests__/chunk-extractor.test.ts +339 -0
  121. package/src/search/__tests__/embedder-auth.test.ts +124 -0
  122. package/src/search/__tests__/embedder.test.ts +267 -0
  123. package/src/search/__tests__/graph-enricher.test.ts +178 -0
  124. package/src/search/__tests__/indexer.test.ts +518 -0
  125. package/src/search/__tests__/integration.test.ts +649 -0
  126. package/src/search/__tests__/query-engine.test.ts +334 -0
  127. package/src/search/__tests__/similarity.test.ts +78 -0
  128. package/src/search/__tests__/vector-store.test.ts +281 -0
  129. package/src/search/chunk-extractor.ts +167 -0
  130. package/src/search/embedder.ts +209 -0
  131. package/src/search/graph-enricher.ts +95 -0
  132. package/src/search/indexer.ts +483 -0
  133. package/src/search/lexical-searcher.ts +190 -0
  134. package/src/search/query-engine.ts +225 -0
  135. package/src/search/vector-store.ts +311 -0
  136. package/src/types/index.ts +572 -0
  137. package/src/utils/__tests__/ansi.test.ts +54 -0
  138. package/src/utils/__tests__/frontmatter.test.ts +79 -0
  139. package/src/utils/__tests__/sanitize.test.ts +229 -0
  140. package/src/utils/ansi.ts +19 -0
  141. package/src/utils/context.ts +44 -0
  142. package/src/utils/frontmatter.ts +27 -0
  143. package/src/utils/sanitize.ts +78 -0
  144. package/test/e2e/lifecycle.test.ts +330 -0
  145. package/test/fixtures/mock-planner-hang.ts +5 -0
  146. package/test/fixtures/mock-planner.ts +26 -0
  147. package/test/fixtures/mock-reviewer-bad.ts +8 -0
  148. package/test/fixtures/mock-reviewer-retry.ts +34 -0
  149. package/test/fixtures/mock-reviewer.ts +18 -0
  150. package/test/fixtures/sample-project/src/circular-a.ts +6 -0
  151. package/test/fixtures/sample-project/src/circular-b.ts +6 -0
  152. package/test/fixtures/sample-project/src/config.ts +15 -0
  153. package/test/fixtures/sample-project/src/main.ts +19 -0
  154. package/test/fixtures/sample-project/src/services/product-service.ts +20 -0
  155. package/test/fixtures/sample-project/src/services/user-service.ts +18 -0
  156. package/test/fixtures/sample-project/src/types.ts +14 -0
  157. package/test/fixtures/sample-project/src/utils/index.ts +14 -0
  158. package/test/fixtures/sample-project/src/utils/validate.ts +12 -0
  159. package/tsconfig.json +20 -0
  160. package/vitest.config.ts +12 -0
@@ -0,0 +1,518 @@
1
+ import * as os from 'node:os';
2
+ import * as fs from 'node:fs/promises';
3
+ import * as path from 'node:path';
4
+ import { describe, it, expect, vi, beforeEach, afterEach } from 'vitest';
5
+ import type { Logger } from 'winston';
6
+ import { SearchIndexer } from '../indexer.js';
7
+ import { NomosError } from '../../core/errors.js';
8
+ import type { IndexMetadata, NomosConfig, ProjectMap } from '../../types/index.js';
9
+
10
+ // ─── Mocks ────────────────────────────────────────────────────────────────────
11
+
12
+ // Mock Embedder: deterministic fake vectors, never calls Gemini
13
+ const mockEmbedBatch = vi.fn<(texts: string[]) => Promise<Float32Array[]>>();
14
+ const mockEmbedOne = vi.fn<(text: string) => Promise<Float32Array>>();
15
+
16
+ vi.mock('../embedder.js', () => {
17
+ const EmbedderMock = Object.assign(
18
+ vi.fn().mockImplementation(function (this: Record<string, unknown>) {
19
+ this['embedBatch'] = mockEmbedBatch;
20
+ this['embedOne'] = mockEmbedOne;
21
+ this['dimensions'] = 4;
22
+ }),
23
+ {
24
+ create: vi.fn().mockImplementation(async () => ({
25
+ embedBatch: mockEmbedBatch,
26
+ embedOne: mockEmbedOne,
27
+ dimensions: 4,
28
+ })),
29
+ },
30
+ );
31
+ return { Embedder: EmbedderMock };
32
+ });
33
+
34
+ // ─── Helpers ──────────────────────────────────────────────────────────────────
35
+
36
+ const VECTOR_DIMS = 4;
37
+
38
+ function makeLogger(): Logger {
39
+ return {
40
+ info: vi.fn(),
41
+ warn: vi.fn(),
42
+ error: vi.fn(),
43
+ debug: vi.fn(),
44
+ } as unknown as Logger;
45
+ }
46
+
47
+ function makeConfig(vectorStorePath: string, graphOutputDir: string): NomosConfig {
48
+ return {
49
+ execution: { default_mode: 'supervised', shadow_branch_prefix: 'nomos/', worktree_base: '/tmp', supervised_heartbeat_timeout_ms: 30000 },
50
+ binaries: {
51
+ planner: { cmd: 'claude', args: [], pty: true, total_timeout_ms: 60000, heartbeat_timeout_ms: 30000, max_output_bytes: 1_000_000, usage_pattern: null },
52
+ reviewer: { cmd: 'openai', args: [], pty: false, total_timeout_ms: 60000, heartbeat_timeout_ms: 30000, max_output_bytes: 1_000_000, usage_pattern: null },
53
+ },
54
+ convergence: { score_threshold: 8, max_iterations: 3 },
55
+ budget: { max_tokens_per_task: 100_000, warn_at_percent: 80, cost_per_1k_tokens: {} },
56
+ security: { sanitize_patterns: [], entropy_threshold: 4.5, sanitize_on: ['input', 'output'], safe_commands: [], redaction_label: '[REDACTED]' },
57
+ git: { auto_commit: false, include_logs: true, commit_prefix: 'arc:', sign_commits: false },
58
+ review: { max_context_files: 10 },
59
+ graph: {
60
+ exclude_patterns: [],
61
+ ai_enrichment: false,
62
+ ai_model: 'gemini-pro',
63
+ ai_concurrency: 1,
64
+ ai_requests_per_minute: 60,
65
+ max_file_chars: 50_000,
66
+ core_modules_count: 5,
67
+ output_dir: graphOutputDir,
68
+ },
69
+ logging: { level: 'info', retain_days: 7 },
70
+ search: {
71
+ embedding_model: 'text-embedding-004',
72
+ embedding_dimensions: VECTOR_DIMS,
73
+ vector_store_path: vectorStorePath,
74
+ default_top_k: 10,
75
+ default_threshold: 0.7,
76
+ batch_size: 2, // small for test control
77
+ embedding_requests_per_minute: 60,
78
+ request_timeout_ms: 30_000,
79
+ },
80
+ } as unknown as NomosConfig;
81
+ }
82
+
83
+ function makeProjectMap(graphDir: string): ProjectMap {
84
+ return {
85
+ schema_version: 1,
86
+ generated_at: '2024-01-01T00:00:00.000Z',
87
+ root: '/project',
88
+ files: {
89
+ 'src/foo.ts': {
90
+ file: 'src/foo.ts',
91
+ hash: 'abc',
92
+ language: 'typescript',
93
+ symbols: [
94
+ { name: 'Foo', kind: 'class', line: 1, end_line: 10, signature: 'class Foo', exported: true },
95
+ ],
96
+ imports: [],
97
+ dependents: ['src/bar.ts'],
98
+ dependencies: [],
99
+ depth: 1,
100
+ last_parsed_at: null,
101
+ semantic: {
102
+ overview: 'Foo module',
103
+ purpose: 'Does foo things',
104
+ key_logic: ['key logic'],
105
+ usage_context: ['used everywhere'],
106
+ source_hash: 'h1',
107
+ enriched_at: '2024-01-01T00:00:00.000Z',
108
+ model: 'gemini-pro',
109
+ },
110
+ enrichment_status: 'semantic',
111
+ },
112
+ 'src/bar.ts': {
113
+ file: 'src/bar.ts',
114
+ hash: 'def',
115
+ language: 'typescript',
116
+ symbols: [
117
+ { name: 'Bar', kind: 'function', line: 1, end_line: 5, signature: 'function Bar()', exported: true },
118
+ ],
119
+ imports: [{ source: 'src/foo.ts', resolved: 'src/foo.ts', symbols: ['Foo'], is_external: false }],
120
+ dependents: [],
121
+ dependencies: ['src/foo.ts'],
122
+ depth: 2,
123
+ last_parsed_at: null,
124
+ semantic: null,
125
+ enrichment_status: 'structural',
126
+ },
127
+ },
128
+ stats: { total_files: 2, total_symbols: 2, total_edges: 1, core_modules: [], structural_only: 1, semantically_enriched: 1, indexed: 0 },
129
+ };
130
+ }
131
+
132
+ /** Fake embed: returns VECTOR_DIMS-length Float32Array per text, deterministic. */
133
+ function makeFakeVectors(count: number): Float32Array[] {
134
+ return Array.from({ length: count }, (_, i) => {
135
+ const v = new Float32Array(VECTOR_DIMS);
136
+ v.fill((i + 1) * 0.1);
137
+ return v;
138
+ });
139
+ }
140
+
141
+ // ─── Test setup ───────────────────────────────────────────────────────────────
142
+
143
+ let tmpDir: string;
144
+ let vectorStorePath: string;
145
+ let graphDir: string;
146
+ let logger: Logger;
147
+ let config: NomosConfig;
148
+
149
+ beforeEach(async () => {
150
+ tmpDir = await fs.mkdtemp(path.join(os.tmpdir(), 'nomos-idx-'));
151
+ vectorStorePath = path.join(tmpDir, 'vectors');
152
+ graphDir = path.join(tmpDir, 'graph');
153
+ await fs.mkdir(graphDir, { recursive: true });
154
+
155
+ // Write project_map.json
156
+ const projectMap = makeProjectMap(graphDir);
157
+ await fs.writeFile(
158
+ path.join(graphDir, 'project_map.json'),
159
+ JSON.stringify(projectMap),
160
+ 'utf-8',
161
+ );
162
+
163
+ logger = makeLogger();
164
+ config = makeConfig(vectorStorePath, graphDir);
165
+
166
+ // Default: embedBatch returns fake vectors matching batch length
167
+ mockEmbedBatch.mockImplementation(async (texts: string[]) => makeFakeVectors(texts.length));
168
+ mockEmbedOne.mockImplementation(async () => makeFakeVectors(1)[0]!);
169
+ });
170
+
171
+ afterEach(async () => {
172
+ vi.clearAllMocks();
173
+ await fs.rm(tmpDir, { recursive: true, force: true });
174
+ });
175
+
176
+ // ─── Tests ────────────────────────────────────────────────────────────────────
177
+
178
+ describe('SearchIndexer', () => {
179
+
180
+ // ── Test 1: fullIndex() runs the full pipeline ────────────────────────────
181
+ it('fullIndex() loads project map, extracts chunks, embeds, and writes metadata', async () => {
182
+ const indexer = new SearchIndexer(tmpDir, config, logger);
183
+
184
+ const meta = await indexer.fullIndex();
185
+
186
+ expect(meta.status).toBe('complete');
187
+ expect(meta.embedding_model).toBe(config.search.embedding_model);
188
+ expect(meta.total_chunks).toBeGreaterThan(0);
189
+ expect(mockEmbedBatch).toHaveBeenCalled();
190
+
191
+ // Verify metadata file was written
192
+ const metaRaw = await fs.readFile(
193
+ path.join(vectorStorePath, 'index-meta.json'),
194
+ 'utf-8',
195
+ );
196
+ const writtenMeta = JSON.parse(metaRaw) as IndexMetadata;
197
+ expect(writtenMeta.status).toBe('complete');
198
+ });
199
+
200
+ // ── Test 2: fullIndex() uses table-swap NOT reset()+upsert() [BLOCKER-1] ──
201
+ it('fullIndex() writes to staging then promotes — never calls upsert() directly [BLOCKER-1]', async () => {
202
+ const { VectorStore } = await import('../vector-store.js');
203
+ const upsertSpy = vi.spyOn(VectorStore.prototype, 'upsert');
204
+ const upsertToStagingSpy = vi.spyOn(VectorStore.prototype, 'upsertToStaging');
205
+ const promoteSpy = vi.spyOn(VectorStore.prototype, 'promoteStagingToLive');
206
+
207
+ const indexer = new SearchIndexer(tmpDir, config, logger);
208
+ await indexer.fullIndex();
209
+
210
+ // Must use staging path, not direct upsert
211
+ expect(upsertToStagingSpy).toHaveBeenCalled();
212
+ expect(promoteSpy).toHaveBeenCalled();
213
+ expect(upsertSpy).not.toHaveBeenCalled();
214
+ });
215
+
216
+ // ── Test 3: fullIndex() writes "in_progress" BEFORE embedding [BLOCKER-2] ──
217
+ it('fullIndex() writes status: "in_progress" before embedding, "complete" after [BLOCKER-2]', async () => {
218
+ const metaPath = path.join(vectorStorePath, 'index-meta.json');
219
+ const statusLog: string[] = [];
220
+
221
+ mockEmbedBatch.mockImplementation(async (texts: string[]) => {
222
+ // Read meta file during embedding — should be "in_progress"
223
+ try {
224
+ const raw = await fs.readFile(metaPath, 'utf-8');
225
+ const m = JSON.parse(raw) as IndexMetadata;
226
+ statusLog.push(m.status);
227
+ } catch {
228
+ statusLog.push('missing');
229
+ }
230
+ return makeFakeVectors(texts.length);
231
+ });
232
+
233
+ const indexer = new SearchIndexer(tmpDir, config, logger);
234
+ const finalMeta = await indexer.fullIndex();
235
+
236
+ // During embedding, status must have been "in_progress"
237
+ expect(statusLog.some(s => s === 'in_progress')).toBe(true);
238
+ // After completion, status is "complete"
239
+ expect(finalMeta.status).toBe('complete');
240
+ });
241
+
242
+ // ── Test 4: fullIndex() upserts per-batch, not all-at-once [BLOCKER-4] ────
243
+ it('fullIndex() calls upsertToStaging once per batch, not once for all chunks [BLOCKER-4]', async () => {
244
+ const { VectorStore } = await import('../vector-store.js');
245
+ const upsertToStagingSpy = vi.spyOn(VectorStore.prototype, 'upsertToStaging');
246
+
247
+ const indexer = new SearchIndexer(tmpDir, config, logger);
248
+ await indexer.fullIndex();
249
+
250
+ // batch_size = 2; project map has 2 files × ~2 chunks each → at least 2 batches
251
+ expect(upsertToStagingSpy.mock.calls.length).toBeGreaterThan(1);
252
+ // Each call must be to a partial array, not all chunks at once
253
+ for (const [records] of upsertToStagingSpy.mock.calls) {
254
+ expect((records as unknown[]).length).toBeLessThanOrEqual(config.search.batch_size);
255
+ }
256
+ });
257
+
258
+ // ── Test 5: incrementalIndex() only re-embeds changed files ───────────────
259
+ it('incrementalIndex() only re-embeds changed files (changed hash)', async () => {
260
+ const indexer = new SearchIndexer(tmpDir, config, logger);
261
+ await indexer.fullIndex();
262
+
263
+ const firstCallCount = mockEmbedBatch.mock.calls.length;
264
+ mockEmbedBatch.mockClear();
265
+
266
+ // Modify one file's hash in project_map.json to trigger incremental re-index
267
+ const projectMap = makeProjectMap(graphDir);
268
+ projectMap.files['src/foo.ts']!.hash = 'CHANGED-HASH';
269
+ if (projectMap.files['src/foo.ts']!.semantic) {
270
+ projectMap.files['src/foo.ts']!.semantic!.overview = 'Changed overview';
271
+ }
272
+ await fs.writeFile(
273
+ path.join(graphDir, 'project_map.json'),
274
+ JSON.stringify(projectMap),
275
+ 'utf-8',
276
+ );
277
+
278
+ const meta = await indexer.incrementalIndex();
279
+ expect(meta.status).toBe('complete');
280
+
281
+ // embedBatch should be called fewer times than full index
282
+ expect(mockEmbedBatch.mock.calls.length).toBeLessThan(firstCallCount);
283
+ });
284
+
285
+ // ── Test 6: incrementalIndex() re-embeds files in failed_files [GAP-1] ────
286
+ it('incrementalIndex() re-embeds files in failed_files even if hash unchanged [GAP-1]', async () => {
287
+ const indexer = new SearchIndexer(tmpDir, config, logger);
288
+ await indexer.fullIndex();
289
+
290
+ // Manually write metadata marking foo.ts as failed
291
+ const metaPath = path.join(vectorStorePath, 'index-meta.json');
292
+ const existing = JSON.parse(await fs.readFile(metaPath, 'utf-8')) as IndexMetadata;
293
+ const manipulated: IndexMetadata = {
294
+ ...existing,
295
+ failed_files: ['src/foo.ts'],
296
+ };
297
+ await fs.writeFile(metaPath, JSON.stringify(manipulated), 'utf-8');
298
+
299
+ mockEmbedBatch.mockClear();
300
+
301
+ const meta = await indexer.incrementalIndex();
302
+ expect(meta.status).toBe('complete');
303
+
304
+ // Should have called embedBatch for src/foo.ts chunks
305
+ expect(mockEmbedBatch).toHaveBeenCalled();
306
+ const allEmbeddedTexts = mockEmbedBatch.mock.calls.flatMap(([texts]) => texts);
307
+ expect(allEmbeddedTexts.some((t: string) => t.includes('src/foo.ts'))).toBe(true);
308
+ });
309
+
310
+ // ── Test 7: incrementalIndex() deletes records for removed files ──────────
311
+ it('incrementalIndex() deletes records for files removed from project map', async () => {
312
+ const { VectorStore } = await import('../vector-store.js');
313
+ const deleteSpy = vi.spyOn(VectorStore.prototype, 'deleteByFilePaths');
314
+
315
+ const indexer = new SearchIndexer(tmpDir, config, logger);
316
+ await indexer.fullIndex();
317
+
318
+ // Remove src/bar.ts from project map
319
+ const projectMap = makeProjectMap(graphDir);
320
+ delete projectMap.files['src/bar.ts'];
321
+ projectMap.stats.total_files = 1;
322
+ await fs.writeFile(
323
+ path.join(graphDir, 'project_map.json'),
324
+ JSON.stringify(projectMap),
325
+ 'utf-8',
326
+ );
327
+
328
+ deleteSpy.mockClear();
329
+ await indexer.incrementalIndex();
330
+
331
+ // deleteByFilePaths must have been called with src/bar.ts
332
+ const deletedPaths = deleteSpy.mock.calls.flatMap(([paths]) => paths);
333
+ expect(deletedPaths).toContain('src/bar.ts');
334
+ });
335
+
336
+ // ── Test 8: incrementalIndex() falls back to fullIndex() when no metadata ──
337
+ it('incrementalIndex() falls back to fullIndex() when no index-meta.json exists', async () => {
338
+ // No prior fullIndex — metadata file doesn't exist
339
+ const { VectorStore } = await import('../vector-store.js');
340
+ const promoteSpy = vi.spyOn(VectorStore.prototype, 'promoteStagingToLive');
341
+
342
+ const indexer = new SearchIndexer(tmpDir, config, logger);
343
+ const meta = await indexer.incrementalIndex();
344
+
345
+ // Should have performed a full index (staging → live swap)
346
+ expect(promoteSpy).toHaveBeenCalled();
347
+ expect(meta.status).toBe('complete');
348
+ });
349
+
350
+ // ── Test 9: incrementalIndex() falls back when status === "in_progress" [BLOCKER-2]
351
+ it('incrementalIndex() falls back to fullIndex() when metadata status is "in_progress" [BLOCKER-2]', async () => {
352
+ const metaPath = path.join(vectorStorePath, 'index-meta.json');
353
+ await fs.mkdir(vectorStorePath, { recursive: true });
354
+ const stale: IndexMetadata = {
355
+ status: 'in_progress',
356
+ last_full_index: new Date().toISOString(),
357
+ last_incremental_index: null,
358
+ total_files_indexed: 0,
359
+ total_symbols_indexed: 0,
360
+ total_chunks: 0,
361
+ embedding_model: config.search.embedding_model,
362
+ vector_dimensions: config.search.embedding_dimensions,
363
+ failed_files: [],
364
+ files: {},
365
+ };
366
+ await fs.writeFile(metaPath, JSON.stringify(stale), 'utf-8');
367
+
368
+ const { VectorStore } = await import('../vector-store.js');
369
+ const promoteSpy = vi.spyOn(VectorStore.prototype, 'promoteStagingToLive');
370
+
371
+ const indexer = new SearchIndexer(tmpDir, config, logger);
372
+ await indexer.incrementalIndex();
373
+
374
+ expect(logger.warn).toHaveBeenCalledWith(
375
+ expect.stringContaining('Previous index incomplete'),
376
+ );
377
+ expect(promoteSpy).toHaveBeenCalled();
378
+ });
379
+
380
+ // ── Test 10: incrementalIndex() falls back on embedding model mismatch [BLOCKER-3]
381
+ it('incrementalIndex() falls back to fullIndex() on embedding model mismatch [BLOCKER-3]', async () => {
382
+ const indexer = new SearchIndexer(tmpDir, config, logger);
383
+ await indexer.fullIndex();
384
+
385
+ // Overwrite metadata with a different embedding_model
386
+ const metaPath = path.join(vectorStorePath, 'index-meta.json');
387
+ const existing = JSON.parse(await fs.readFile(metaPath, 'utf-8')) as IndexMetadata;
388
+ await fs.writeFile(
389
+ metaPath,
390
+ JSON.stringify({ ...existing, embedding_model: 'old-model-v1' }),
391
+ 'utf-8',
392
+ );
393
+
394
+ const { VectorStore } = await import('../vector-store.js');
395
+ const promoteSpy = vi.spyOn(VectorStore.prototype, 'promoteStagingToLive');
396
+ promoteSpy.mockClear();
397
+
398
+ await indexer.incrementalIndex();
399
+
400
+ expect(logger.warn).toHaveBeenCalledWith(
401
+ expect.stringContaining('Embedding model/dimensions changed'),
402
+ );
403
+ expect(promoteSpy).toHaveBeenCalled();
404
+ });
405
+
406
+ // ── Test 11: incrementalIndex() falls back on vector dimension mismatch [BLOCKER-3]
407
+ it('incrementalIndex() falls back to fullIndex() on vector dimension mismatch [BLOCKER-3]', async () => {
408
+ const indexer = new SearchIndexer(tmpDir, config, logger);
409
+ await indexer.fullIndex();
410
+
411
+ const metaPath = path.join(vectorStorePath, 'index-meta.json');
412
+ const existing = JSON.parse(await fs.readFile(metaPath, 'utf-8')) as IndexMetadata;
413
+ await fs.writeFile(
414
+ metaPath,
415
+ JSON.stringify({ ...existing, vector_dimensions: 9999 }),
416
+ 'utf-8',
417
+ );
418
+
419
+ const { VectorStore } = await import('../vector-store.js');
420
+ const promoteSpy = vi.spyOn(VectorStore.prototype, 'promoteStagingToLive');
421
+ promoteSpy.mockClear();
422
+
423
+ await indexer.incrementalIndex();
424
+
425
+ expect(logger.warn).toHaveBeenCalledWith(
426
+ expect.stringContaining('Embedding model/dimensions changed'),
427
+ );
428
+ expect(promoteSpy).toHaveBeenCalled();
429
+ });
430
+
431
+ // ── Test 12: metadata contains correct totals, hashes, and failed_files ───
432
+ it('fullIndex() metadata contains correct totals, per-file hashes, and failed_files', async () => {
433
+ const indexer = new SearchIndexer(tmpDir, config, logger);
434
+ const meta = await indexer.fullIndex();
435
+
436
+ expect(meta.status).toBe('complete');
437
+ expect(meta.total_chunks).toBe(meta.total_files_indexed + meta.total_symbols_indexed);
438
+ expect(meta.failed_files).toEqual([]);
439
+ expect(typeof meta.last_full_index).toBe('string');
440
+
441
+ // Per-file entries should include hashes
442
+ for (const [, entry] of Object.entries(meta.files)) {
443
+ expect(typeof entry.content_hash).toBe('string');
444
+ expect(entry.content_hash.length).toBeGreaterThan(0);
445
+ }
446
+ });
447
+
448
+ // ── Test 13: Cancellation flag stops processing, writes "in_progress" ─────
449
+ it('cancellation flag stops processing between batches; metadata written as "in_progress"', async () => {
450
+ const cancellationFlag = { cancelled: false };
451
+
452
+ // Cancel after first batch
453
+ let batchCount = 0;
454
+ mockEmbedBatch.mockImplementation(async (texts: string[]) => {
455
+ batchCount++;
456
+ if (batchCount >= 1) {
457
+ cancellationFlag.cancelled = true;
458
+ }
459
+ return makeFakeVectors(texts.length);
460
+ });
461
+
462
+ const indexer = new SearchIndexer(tmpDir, config, logger);
463
+ const meta = await indexer.fullIndex(cancellationFlag);
464
+
465
+ expect(meta.status).toBe('in_progress');
466
+ });
467
+
468
+ // ── Test 14: Missing project_map.json throws NomosError ───────────────────
469
+ it('fullIndex() throws NomosError("search_index_failed") when project_map.json is missing', async () => {
470
+ // Remove project_map.json
471
+ await fs.rm(path.join(graphDir, 'project_map.json'));
472
+
473
+ const indexer = new SearchIndexer(tmpDir, config, logger);
474
+
475
+ await expect(indexer.fullIndex()).rejects.toThrow(NomosError);
476
+ await expect(indexer.fullIndex()).rejects.toMatchObject({ code: 'search_index_failed' });
477
+ });
478
+
479
+ // ── Test 15: Partial embedding failure — failed files recorded [GAP-1] ────
480
+ it('partial embedding failure: failed files recorded in failed_files, rest indexed [GAP-1]', async () => {
481
+ let callCount = 0;
482
+ mockEmbedBatch.mockImplementation(async (texts: string[]) => {
483
+ callCount++;
484
+ if (callCount === 1) {
485
+ // First batch fails
486
+ throw new Error('Simulated embedding API failure');
487
+ }
488
+ return makeFakeVectors(texts.length);
489
+ });
490
+
491
+ const indexer = new SearchIndexer(tmpDir, config, logger);
492
+ const meta = await indexer.fullIndex();
493
+
494
+ // Some files failed, rest succeeded
495
+ expect(meta.failed_files.length).toBeGreaterThan(0);
496
+ // Index is still marked complete (partial success)
497
+ expect(meta.status).toBe('complete');
498
+ // Total chunks should reflect only successful batches
499
+ expect(meta.total_chunks).toBeGreaterThanOrEqual(0);
500
+ });
501
+
502
+ // ── Test 16: dryRun() returns chunk counts without calling Embedder or VectorStore [S-2]
503
+ it('dryRun() returns chunk counts without calling Embedder or VectorStore [S-2]', async () => {
504
+ const { VectorStore } = await import('../vector-store.js');
505
+ const initSpy = vi.spyOn(VectorStore.prototype, 'init');
506
+
507
+ const indexer = new SearchIndexer(tmpDir, config, logger);
508
+ const result = await indexer.dryRun();
509
+
510
+ expect(result.totalChunks).toBe(result.fileChunks + result.symbolChunks);
511
+ expect(result.totalChunks).toBeGreaterThan(0);
512
+
513
+ // Must NOT call Embedder or VectorStore
514
+ expect(mockEmbedBatch).not.toHaveBeenCalled();
515
+ expect(mockEmbedOne).not.toHaveBeenCalled();
516
+ expect(initSpy).not.toHaveBeenCalled();
517
+ });
518
+ });