@nomos-arc/arc 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.claude/settings.local.json +10 -0
- package/.nomos-config.json +5 -0
- package/CLAUDE.md +108 -0
- package/LICENSE +190 -0
- package/README.md +569 -0
- package/dist/cli.js +21120 -0
- package/docs/auth/googel_plan.yaml +1093 -0
- package/docs/auth/google_task.md +235 -0
- package/docs/auth/hardened_blueprint.yaml +1658 -0
- package/docs/auth/red_team_report.yaml +336 -0
- package/docs/auth/session_state.yaml +162 -0
- package/docs/certificate/cer_enhance_plan.md +605 -0
- package/docs/certificate/certificate_report.md +338 -0
- package/docs/dev_overview.md +419 -0
- package/docs/feature_assessment.md +156 -0
- package/docs/how_it_works.md +78 -0
- package/docs/infrastructure/map.md +867 -0
- package/docs/init/master_plan.md +3581 -0
- package/docs/init/red_team_report.md +215 -0
- package/docs/init/report_phase_1a.md +304 -0
- package/docs/integrity-gate/enhance_drift.md +703 -0
- package/docs/integrity-gate/overview.md +108 -0
- package/docs/management/manger-task.md +99 -0
- package/docs/management/scafffold.md +76 -0
- package/docs/map/ATOMIC_BLUEPRINT.md +1349 -0
- package/docs/map/RED_TEAM_REPORT.md +159 -0
- package/docs/map/map_task.md +147 -0
- package/docs/map/semantic_graph_task.md +792 -0
- package/docs/map/semantic_master_plan.md +705 -0
- package/docs/phase7/TEAM_RED.md +249 -0
- package/docs/phase7/plan.md +1682 -0
- package/docs/phase7/task.md +275 -0
- package/docs/prompts/USAGE.md +312 -0
- package/docs/prompts/architect.md +165 -0
- package/docs/prompts/executer.md +190 -0
- package/docs/prompts/hardener.md +190 -0
- package/docs/prompts/red_team.md +146 -0
- package/docs/verification/goveranance-overview.md +396 -0
- package/docs/verification/governance-overview.md +245 -0
- package/docs/verification/verification-arc-ar.md +560 -0
- package/docs/verification/verification-architecture.md +560 -0
- package/docs/very_next.md +52 -0
- package/docs/whitepaper.md +89 -0
- package/overview.md +1469 -0
- package/package.json +63 -0
- package/src/adapters/__tests__/git.test.ts +296 -0
- package/src/adapters/__tests__/stdio.test.ts +70 -0
- package/src/adapters/git.ts +226 -0
- package/src/adapters/pty.ts +159 -0
- package/src/adapters/stdio.ts +113 -0
- package/src/cli.ts +83 -0
- package/src/commands/apply.ts +47 -0
- package/src/commands/auth.ts +301 -0
- package/src/commands/certificate.ts +89 -0
- package/src/commands/discard.ts +24 -0
- package/src/commands/drift.ts +116 -0
- package/src/commands/index.ts +78 -0
- package/src/commands/init.ts +121 -0
- package/src/commands/list.ts +75 -0
- package/src/commands/map.ts +55 -0
- package/src/commands/plan.ts +30 -0
- package/src/commands/review.ts +58 -0
- package/src/commands/run.ts +63 -0
- package/src/commands/search.ts +147 -0
- package/src/commands/show.ts +63 -0
- package/src/commands/status.ts +59 -0
- package/src/core/__tests__/budget.test.ts +213 -0
- package/src/core/__tests__/certificate.test.ts +385 -0
- package/src/core/__tests__/config.test.ts +191 -0
- package/src/core/__tests__/preflight.test.ts +24 -0
- package/src/core/__tests__/prompt.test.ts +358 -0
- package/src/core/__tests__/review.test.ts +161 -0
- package/src/core/__tests__/state.test.ts +362 -0
- package/src/core/auth/__tests__/manager.test.ts +166 -0
- package/src/core/auth/__tests__/server.test.ts +220 -0
- package/src/core/auth/gcp-projects.ts +160 -0
- package/src/core/auth/manager.ts +114 -0
- package/src/core/auth/server.ts +141 -0
- package/src/core/budget.ts +119 -0
- package/src/core/certificate.ts +502 -0
- package/src/core/config.ts +212 -0
- package/src/core/errors.ts +54 -0
- package/src/core/factory.ts +49 -0
- package/src/core/graph/__tests__/builder.test.ts +272 -0
- package/src/core/graph/__tests__/contract-writer.test.ts +175 -0
- package/src/core/graph/__tests__/enricher.test.ts +299 -0
- package/src/core/graph/__tests__/parser.test.ts +200 -0
- package/src/core/graph/__tests__/pipeline.test.ts +202 -0
- package/src/core/graph/__tests__/renderer.test.ts +128 -0
- package/src/core/graph/__tests__/resolver.test.ts +185 -0
- package/src/core/graph/__tests__/scanner.test.ts +231 -0
- package/src/core/graph/__tests__/show.test.ts +134 -0
- package/src/core/graph/builder.ts +303 -0
- package/src/core/graph/constraints.ts +94 -0
- package/src/core/graph/contract-writer.ts +93 -0
- package/src/core/graph/drift/__tests__/classifier.test.ts +215 -0
- package/src/core/graph/drift/__tests__/comparator.test.ts +335 -0
- package/src/core/graph/drift/__tests__/drift.test.ts +453 -0
- package/src/core/graph/drift/__tests__/reporter.test.ts +203 -0
- package/src/core/graph/drift/classifier.ts +165 -0
- package/src/core/graph/drift/comparator.ts +205 -0
- package/src/core/graph/drift/reporter.ts +77 -0
- package/src/core/graph/enricher.ts +251 -0
- package/src/core/graph/grammar-paths.ts +30 -0
- package/src/core/graph/html-template.ts +493 -0
- package/src/core/graph/map-schema.ts +137 -0
- package/src/core/graph/parser.ts +336 -0
- package/src/core/graph/pipeline.ts +209 -0
- package/src/core/graph/renderer.ts +92 -0
- package/src/core/graph/resolver.ts +195 -0
- package/src/core/graph/scanner.ts +145 -0
- package/src/core/logger.ts +46 -0
- package/src/core/orchestrator.ts +792 -0
- package/src/core/plan-file-manager.ts +66 -0
- package/src/core/preflight.ts +64 -0
- package/src/core/prompt.ts +173 -0
- package/src/core/review.ts +95 -0
- package/src/core/state.ts +294 -0
- package/src/core/worktree-coordinator.ts +77 -0
- package/src/search/__tests__/chunk-extractor.test.ts +339 -0
- package/src/search/__tests__/embedder-auth.test.ts +124 -0
- package/src/search/__tests__/embedder.test.ts +267 -0
- package/src/search/__tests__/graph-enricher.test.ts +178 -0
- package/src/search/__tests__/indexer.test.ts +518 -0
- package/src/search/__tests__/integration.test.ts +649 -0
- package/src/search/__tests__/query-engine.test.ts +334 -0
- package/src/search/__tests__/similarity.test.ts +78 -0
- package/src/search/__tests__/vector-store.test.ts +281 -0
- package/src/search/chunk-extractor.ts +167 -0
- package/src/search/embedder.ts +209 -0
- package/src/search/graph-enricher.ts +95 -0
- package/src/search/indexer.ts +483 -0
- package/src/search/lexical-searcher.ts +190 -0
- package/src/search/query-engine.ts +225 -0
- package/src/search/vector-store.ts +311 -0
- package/src/types/index.ts +572 -0
- package/src/utils/__tests__/ansi.test.ts +54 -0
- package/src/utils/__tests__/frontmatter.test.ts +79 -0
- package/src/utils/__tests__/sanitize.test.ts +229 -0
- package/src/utils/ansi.ts +19 -0
- package/src/utils/context.ts +44 -0
- package/src/utils/frontmatter.ts +27 -0
- package/src/utils/sanitize.ts +78 -0
- package/test/e2e/lifecycle.test.ts +330 -0
- package/test/fixtures/mock-planner-hang.ts +5 -0
- package/test/fixtures/mock-planner.ts +26 -0
- package/test/fixtures/mock-reviewer-bad.ts +8 -0
- package/test/fixtures/mock-reviewer-retry.ts +34 -0
- package/test/fixtures/mock-reviewer.ts +18 -0
- package/test/fixtures/sample-project/src/circular-a.ts +6 -0
- package/test/fixtures/sample-project/src/circular-b.ts +6 -0
- package/test/fixtures/sample-project/src/config.ts +15 -0
- package/test/fixtures/sample-project/src/main.ts +19 -0
- package/test/fixtures/sample-project/src/services/product-service.ts +20 -0
- package/test/fixtures/sample-project/src/services/user-service.ts +18 -0
- package/test/fixtures/sample-project/src/types.ts +14 -0
- package/test/fixtures/sample-project/src/utils/index.ts +14 -0
- package/test/fixtures/sample-project/src/utils/validate.ts +12 -0
- package/tsconfig.json +20 -0
- package/vitest.config.ts +12 -0
|
@@ -0,0 +1,518 @@
|
|
|
1
|
+
import * as os from 'node:os';
|
|
2
|
+
import * as fs from 'node:fs/promises';
|
|
3
|
+
import * as path from 'node:path';
|
|
4
|
+
import { describe, it, expect, vi, beforeEach, afterEach } from 'vitest';
|
|
5
|
+
import type { Logger } from 'winston';
|
|
6
|
+
import { SearchIndexer } from '../indexer.js';
|
|
7
|
+
import { NomosError } from '../../core/errors.js';
|
|
8
|
+
import type { IndexMetadata, NomosConfig, ProjectMap } from '../../types/index.js';
|
|
9
|
+
|
|
10
|
+
// ─── Mocks ────────────────────────────────────────────────────────────────────
|
|
11
|
+
|
|
12
|
+
// Mock Embedder: deterministic fake vectors, never calls Gemini
|
|
13
|
+
const mockEmbedBatch = vi.fn<(texts: string[]) => Promise<Float32Array[]>>();
|
|
14
|
+
const mockEmbedOne = vi.fn<(text: string) => Promise<Float32Array>>();
|
|
15
|
+
|
|
16
|
+
vi.mock('../embedder.js', () => {
|
|
17
|
+
const EmbedderMock = Object.assign(
|
|
18
|
+
vi.fn().mockImplementation(function (this: Record<string, unknown>) {
|
|
19
|
+
this['embedBatch'] = mockEmbedBatch;
|
|
20
|
+
this['embedOne'] = mockEmbedOne;
|
|
21
|
+
this['dimensions'] = 4;
|
|
22
|
+
}),
|
|
23
|
+
{
|
|
24
|
+
create: vi.fn().mockImplementation(async () => ({
|
|
25
|
+
embedBatch: mockEmbedBatch,
|
|
26
|
+
embedOne: mockEmbedOne,
|
|
27
|
+
dimensions: 4,
|
|
28
|
+
})),
|
|
29
|
+
},
|
|
30
|
+
);
|
|
31
|
+
return { Embedder: EmbedderMock };
|
|
32
|
+
});
|
|
33
|
+
|
|
34
|
+
// ─── Helpers ──────────────────────────────────────────────────────────────────
|
|
35
|
+
|
|
36
|
+
const VECTOR_DIMS = 4;
|
|
37
|
+
|
|
38
|
+
function makeLogger(): Logger {
|
|
39
|
+
return {
|
|
40
|
+
info: vi.fn(),
|
|
41
|
+
warn: vi.fn(),
|
|
42
|
+
error: vi.fn(),
|
|
43
|
+
debug: vi.fn(),
|
|
44
|
+
} as unknown as Logger;
|
|
45
|
+
}
|
|
46
|
+
|
|
47
|
+
function makeConfig(vectorStorePath: string, graphOutputDir: string): NomosConfig {
|
|
48
|
+
return {
|
|
49
|
+
execution: { default_mode: 'supervised', shadow_branch_prefix: 'nomos/', worktree_base: '/tmp', supervised_heartbeat_timeout_ms: 30000 },
|
|
50
|
+
binaries: {
|
|
51
|
+
planner: { cmd: 'claude', args: [], pty: true, total_timeout_ms: 60000, heartbeat_timeout_ms: 30000, max_output_bytes: 1_000_000, usage_pattern: null },
|
|
52
|
+
reviewer: { cmd: 'openai', args: [], pty: false, total_timeout_ms: 60000, heartbeat_timeout_ms: 30000, max_output_bytes: 1_000_000, usage_pattern: null },
|
|
53
|
+
},
|
|
54
|
+
convergence: { score_threshold: 8, max_iterations: 3 },
|
|
55
|
+
budget: { max_tokens_per_task: 100_000, warn_at_percent: 80, cost_per_1k_tokens: {} },
|
|
56
|
+
security: { sanitize_patterns: [], entropy_threshold: 4.5, sanitize_on: ['input', 'output'], safe_commands: [], redaction_label: '[REDACTED]' },
|
|
57
|
+
git: { auto_commit: false, include_logs: true, commit_prefix: 'arc:', sign_commits: false },
|
|
58
|
+
review: { max_context_files: 10 },
|
|
59
|
+
graph: {
|
|
60
|
+
exclude_patterns: [],
|
|
61
|
+
ai_enrichment: false,
|
|
62
|
+
ai_model: 'gemini-pro',
|
|
63
|
+
ai_concurrency: 1,
|
|
64
|
+
ai_requests_per_minute: 60,
|
|
65
|
+
max_file_chars: 50_000,
|
|
66
|
+
core_modules_count: 5,
|
|
67
|
+
output_dir: graphOutputDir,
|
|
68
|
+
},
|
|
69
|
+
logging: { level: 'info', retain_days: 7 },
|
|
70
|
+
search: {
|
|
71
|
+
embedding_model: 'text-embedding-004',
|
|
72
|
+
embedding_dimensions: VECTOR_DIMS,
|
|
73
|
+
vector_store_path: vectorStorePath,
|
|
74
|
+
default_top_k: 10,
|
|
75
|
+
default_threshold: 0.7,
|
|
76
|
+
batch_size: 2, // small for test control
|
|
77
|
+
embedding_requests_per_minute: 60,
|
|
78
|
+
request_timeout_ms: 30_000,
|
|
79
|
+
},
|
|
80
|
+
} as unknown as NomosConfig;
|
|
81
|
+
}
|
|
82
|
+
|
|
83
|
+
function makeProjectMap(graphDir: string): ProjectMap {
|
|
84
|
+
return {
|
|
85
|
+
schema_version: 1,
|
|
86
|
+
generated_at: '2024-01-01T00:00:00.000Z',
|
|
87
|
+
root: '/project',
|
|
88
|
+
files: {
|
|
89
|
+
'src/foo.ts': {
|
|
90
|
+
file: 'src/foo.ts',
|
|
91
|
+
hash: 'abc',
|
|
92
|
+
language: 'typescript',
|
|
93
|
+
symbols: [
|
|
94
|
+
{ name: 'Foo', kind: 'class', line: 1, end_line: 10, signature: 'class Foo', exported: true },
|
|
95
|
+
],
|
|
96
|
+
imports: [],
|
|
97
|
+
dependents: ['src/bar.ts'],
|
|
98
|
+
dependencies: [],
|
|
99
|
+
depth: 1,
|
|
100
|
+
last_parsed_at: null,
|
|
101
|
+
semantic: {
|
|
102
|
+
overview: 'Foo module',
|
|
103
|
+
purpose: 'Does foo things',
|
|
104
|
+
key_logic: ['key logic'],
|
|
105
|
+
usage_context: ['used everywhere'],
|
|
106
|
+
source_hash: 'h1',
|
|
107
|
+
enriched_at: '2024-01-01T00:00:00.000Z',
|
|
108
|
+
model: 'gemini-pro',
|
|
109
|
+
},
|
|
110
|
+
enrichment_status: 'semantic',
|
|
111
|
+
},
|
|
112
|
+
'src/bar.ts': {
|
|
113
|
+
file: 'src/bar.ts',
|
|
114
|
+
hash: 'def',
|
|
115
|
+
language: 'typescript',
|
|
116
|
+
symbols: [
|
|
117
|
+
{ name: 'Bar', kind: 'function', line: 1, end_line: 5, signature: 'function Bar()', exported: true },
|
|
118
|
+
],
|
|
119
|
+
imports: [{ source: 'src/foo.ts', resolved: 'src/foo.ts', symbols: ['Foo'], is_external: false }],
|
|
120
|
+
dependents: [],
|
|
121
|
+
dependencies: ['src/foo.ts'],
|
|
122
|
+
depth: 2,
|
|
123
|
+
last_parsed_at: null,
|
|
124
|
+
semantic: null,
|
|
125
|
+
enrichment_status: 'structural',
|
|
126
|
+
},
|
|
127
|
+
},
|
|
128
|
+
stats: { total_files: 2, total_symbols: 2, total_edges: 1, core_modules: [], structural_only: 1, semantically_enriched: 1, indexed: 0 },
|
|
129
|
+
};
|
|
130
|
+
}
|
|
131
|
+
|
|
132
|
+
/** Fake embed: returns VECTOR_DIMS-length Float32Array per text, deterministic. */
|
|
133
|
+
function makeFakeVectors(count: number): Float32Array[] {
|
|
134
|
+
return Array.from({ length: count }, (_, i) => {
|
|
135
|
+
const v = new Float32Array(VECTOR_DIMS);
|
|
136
|
+
v.fill((i + 1) * 0.1);
|
|
137
|
+
return v;
|
|
138
|
+
});
|
|
139
|
+
}
|
|
140
|
+
|
|
141
|
+
// ─── Test setup ───────────────────────────────────────────────────────────────
|
|
142
|
+
|
|
143
|
+
let tmpDir: string;
|
|
144
|
+
let vectorStorePath: string;
|
|
145
|
+
let graphDir: string;
|
|
146
|
+
let logger: Logger;
|
|
147
|
+
let config: NomosConfig;
|
|
148
|
+
|
|
149
|
+
beforeEach(async () => {
|
|
150
|
+
tmpDir = await fs.mkdtemp(path.join(os.tmpdir(), 'nomos-idx-'));
|
|
151
|
+
vectorStorePath = path.join(tmpDir, 'vectors');
|
|
152
|
+
graphDir = path.join(tmpDir, 'graph');
|
|
153
|
+
await fs.mkdir(graphDir, { recursive: true });
|
|
154
|
+
|
|
155
|
+
// Write project_map.json
|
|
156
|
+
const projectMap = makeProjectMap(graphDir);
|
|
157
|
+
await fs.writeFile(
|
|
158
|
+
path.join(graphDir, 'project_map.json'),
|
|
159
|
+
JSON.stringify(projectMap),
|
|
160
|
+
'utf-8',
|
|
161
|
+
);
|
|
162
|
+
|
|
163
|
+
logger = makeLogger();
|
|
164
|
+
config = makeConfig(vectorStorePath, graphDir);
|
|
165
|
+
|
|
166
|
+
// Default: embedBatch returns fake vectors matching batch length
|
|
167
|
+
mockEmbedBatch.mockImplementation(async (texts: string[]) => makeFakeVectors(texts.length));
|
|
168
|
+
mockEmbedOne.mockImplementation(async () => makeFakeVectors(1)[0]!);
|
|
169
|
+
});
|
|
170
|
+
|
|
171
|
+
afterEach(async () => {
|
|
172
|
+
vi.clearAllMocks();
|
|
173
|
+
await fs.rm(tmpDir, { recursive: true, force: true });
|
|
174
|
+
});
|
|
175
|
+
|
|
176
|
+
// ─── Tests ────────────────────────────────────────────────────────────────────
|
|
177
|
+
|
|
178
|
+
describe('SearchIndexer', () => {
|
|
179
|
+
|
|
180
|
+
// ── Test 1: fullIndex() runs the full pipeline ────────────────────────────
|
|
181
|
+
it('fullIndex() loads project map, extracts chunks, embeds, and writes metadata', async () => {
|
|
182
|
+
const indexer = new SearchIndexer(tmpDir, config, logger);
|
|
183
|
+
|
|
184
|
+
const meta = await indexer.fullIndex();
|
|
185
|
+
|
|
186
|
+
expect(meta.status).toBe('complete');
|
|
187
|
+
expect(meta.embedding_model).toBe(config.search.embedding_model);
|
|
188
|
+
expect(meta.total_chunks).toBeGreaterThan(0);
|
|
189
|
+
expect(mockEmbedBatch).toHaveBeenCalled();
|
|
190
|
+
|
|
191
|
+
// Verify metadata file was written
|
|
192
|
+
const metaRaw = await fs.readFile(
|
|
193
|
+
path.join(vectorStorePath, 'index-meta.json'),
|
|
194
|
+
'utf-8',
|
|
195
|
+
);
|
|
196
|
+
const writtenMeta = JSON.parse(metaRaw) as IndexMetadata;
|
|
197
|
+
expect(writtenMeta.status).toBe('complete');
|
|
198
|
+
});
|
|
199
|
+
|
|
200
|
+
// ── Test 2: fullIndex() uses table-swap NOT reset()+upsert() [BLOCKER-1] ──
|
|
201
|
+
it('fullIndex() writes to staging then promotes — never calls upsert() directly [BLOCKER-1]', async () => {
|
|
202
|
+
const { VectorStore } = await import('../vector-store.js');
|
|
203
|
+
const upsertSpy = vi.spyOn(VectorStore.prototype, 'upsert');
|
|
204
|
+
const upsertToStagingSpy = vi.spyOn(VectorStore.prototype, 'upsertToStaging');
|
|
205
|
+
const promoteSpy = vi.spyOn(VectorStore.prototype, 'promoteStagingToLive');
|
|
206
|
+
|
|
207
|
+
const indexer = new SearchIndexer(tmpDir, config, logger);
|
|
208
|
+
await indexer.fullIndex();
|
|
209
|
+
|
|
210
|
+
// Must use staging path, not direct upsert
|
|
211
|
+
expect(upsertToStagingSpy).toHaveBeenCalled();
|
|
212
|
+
expect(promoteSpy).toHaveBeenCalled();
|
|
213
|
+
expect(upsertSpy).not.toHaveBeenCalled();
|
|
214
|
+
});
|
|
215
|
+
|
|
216
|
+
// ── Test 3: fullIndex() writes "in_progress" BEFORE embedding [BLOCKER-2] ──
|
|
217
|
+
it('fullIndex() writes status: "in_progress" before embedding, "complete" after [BLOCKER-2]', async () => {
|
|
218
|
+
const metaPath = path.join(vectorStorePath, 'index-meta.json');
|
|
219
|
+
const statusLog: string[] = [];
|
|
220
|
+
|
|
221
|
+
mockEmbedBatch.mockImplementation(async (texts: string[]) => {
|
|
222
|
+
// Read meta file during embedding — should be "in_progress"
|
|
223
|
+
try {
|
|
224
|
+
const raw = await fs.readFile(metaPath, 'utf-8');
|
|
225
|
+
const m = JSON.parse(raw) as IndexMetadata;
|
|
226
|
+
statusLog.push(m.status);
|
|
227
|
+
} catch {
|
|
228
|
+
statusLog.push('missing');
|
|
229
|
+
}
|
|
230
|
+
return makeFakeVectors(texts.length);
|
|
231
|
+
});
|
|
232
|
+
|
|
233
|
+
const indexer = new SearchIndexer(tmpDir, config, logger);
|
|
234
|
+
const finalMeta = await indexer.fullIndex();
|
|
235
|
+
|
|
236
|
+
// During embedding, status must have been "in_progress"
|
|
237
|
+
expect(statusLog.some(s => s === 'in_progress')).toBe(true);
|
|
238
|
+
// After completion, status is "complete"
|
|
239
|
+
expect(finalMeta.status).toBe('complete');
|
|
240
|
+
});
|
|
241
|
+
|
|
242
|
+
// ── Test 4: fullIndex() upserts per-batch, not all-at-once [BLOCKER-4] ────
|
|
243
|
+
it('fullIndex() calls upsertToStaging once per batch, not once for all chunks [BLOCKER-4]', async () => {
|
|
244
|
+
const { VectorStore } = await import('../vector-store.js');
|
|
245
|
+
const upsertToStagingSpy = vi.spyOn(VectorStore.prototype, 'upsertToStaging');
|
|
246
|
+
|
|
247
|
+
const indexer = new SearchIndexer(tmpDir, config, logger);
|
|
248
|
+
await indexer.fullIndex();
|
|
249
|
+
|
|
250
|
+
// batch_size = 2; project map has 2 files × ~2 chunks each → at least 2 batches
|
|
251
|
+
expect(upsertToStagingSpy.mock.calls.length).toBeGreaterThan(1);
|
|
252
|
+
// Each call must be to a partial array, not all chunks at once
|
|
253
|
+
for (const [records] of upsertToStagingSpy.mock.calls) {
|
|
254
|
+
expect((records as unknown[]).length).toBeLessThanOrEqual(config.search.batch_size);
|
|
255
|
+
}
|
|
256
|
+
});
|
|
257
|
+
|
|
258
|
+
// ── Test 5: incrementalIndex() only re-embeds changed files ───────────────
|
|
259
|
+
it('incrementalIndex() only re-embeds changed files (changed hash)', async () => {
|
|
260
|
+
const indexer = new SearchIndexer(tmpDir, config, logger);
|
|
261
|
+
await indexer.fullIndex();
|
|
262
|
+
|
|
263
|
+
const firstCallCount = mockEmbedBatch.mock.calls.length;
|
|
264
|
+
mockEmbedBatch.mockClear();
|
|
265
|
+
|
|
266
|
+
// Modify one file's hash in project_map.json to trigger incremental re-index
|
|
267
|
+
const projectMap = makeProjectMap(graphDir);
|
|
268
|
+
projectMap.files['src/foo.ts']!.hash = 'CHANGED-HASH';
|
|
269
|
+
if (projectMap.files['src/foo.ts']!.semantic) {
|
|
270
|
+
projectMap.files['src/foo.ts']!.semantic!.overview = 'Changed overview';
|
|
271
|
+
}
|
|
272
|
+
await fs.writeFile(
|
|
273
|
+
path.join(graphDir, 'project_map.json'),
|
|
274
|
+
JSON.stringify(projectMap),
|
|
275
|
+
'utf-8',
|
|
276
|
+
);
|
|
277
|
+
|
|
278
|
+
const meta = await indexer.incrementalIndex();
|
|
279
|
+
expect(meta.status).toBe('complete');
|
|
280
|
+
|
|
281
|
+
// embedBatch should be called fewer times than full index
|
|
282
|
+
expect(mockEmbedBatch.mock.calls.length).toBeLessThan(firstCallCount);
|
|
283
|
+
});
|
|
284
|
+
|
|
285
|
+
// ── Test 6: incrementalIndex() re-embeds files in failed_files [GAP-1] ────
|
|
286
|
+
it('incrementalIndex() re-embeds files in failed_files even if hash unchanged [GAP-1]', async () => {
|
|
287
|
+
const indexer = new SearchIndexer(tmpDir, config, logger);
|
|
288
|
+
await indexer.fullIndex();
|
|
289
|
+
|
|
290
|
+
// Manually write metadata marking foo.ts as failed
|
|
291
|
+
const metaPath = path.join(vectorStorePath, 'index-meta.json');
|
|
292
|
+
const existing = JSON.parse(await fs.readFile(metaPath, 'utf-8')) as IndexMetadata;
|
|
293
|
+
const manipulated: IndexMetadata = {
|
|
294
|
+
...existing,
|
|
295
|
+
failed_files: ['src/foo.ts'],
|
|
296
|
+
};
|
|
297
|
+
await fs.writeFile(metaPath, JSON.stringify(manipulated), 'utf-8');
|
|
298
|
+
|
|
299
|
+
mockEmbedBatch.mockClear();
|
|
300
|
+
|
|
301
|
+
const meta = await indexer.incrementalIndex();
|
|
302
|
+
expect(meta.status).toBe('complete');
|
|
303
|
+
|
|
304
|
+
// Should have called embedBatch for src/foo.ts chunks
|
|
305
|
+
expect(mockEmbedBatch).toHaveBeenCalled();
|
|
306
|
+
const allEmbeddedTexts = mockEmbedBatch.mock.calls.flatMap(([texts]) => texts);
|
|
307
|
+
expect(allEmbeddedTexts.some((t: string) => t.includes('src/foo.ts'))).toBe(true);
|
|
308
|
+
});
|
|
309
|
+
|
|
310
|
+
// ── Test 7: incrementalIndex() deletes records for removed files ──────────
|
|
311
|
+
it('incrementalIndex() deletes records for files removed from project map', async () => {
|
|
312
|
+
const { VectorStore } = await import('../vector-store.js');
|
|
313
|
+
const deleteSpy = vi.spyOn(VectorStore.prototype, 'deleteByFilePaths');
|
|
314
|
+
|
|
315
|
+
const indexer = new SearchIndexer(tmpDir, config, logger);
|
|
316
|
+
await indexer.fullIndex();
|
|
317
|
+
|
|
318
|
+
// Remove src/bar.ts from project map
|
|
319
|
+
const projectMap = makeProjectMap(graphDir);
|
|
320
|
+
delete projectMap.files['src/bar.ts'];
|
|
321
|
+
projectMap.stats.total_files = 1;
|
|
322
|
+
await fs.writeFile(
|
|
323
|
+
path.join(graphDir, 'project_map.json'),
|
|
324
|
+
JSON.stringify(projectMap),
|
|
325
|
+
'utf-8',
|
|
326
|
+
);
|
|
327
|
+
|
|
328
|
+
deleteSpy.mockClear();
|
|
329
|
+
await indexer.incrementalIndex();
|
|
330
|
+
|
|
331
|
+
// deleteByFilePaths must have been called with src/bar.ts
|
|
332
|
+
const deletedPaths = deleteSpy.mock.calls.flatMap(([paths]) => paths);
|
|
333
|
+
expect(deletedPaths).toContain('src/bar.ts');
|
|
334
|
+
});
|
|
335
|
+
|
|
336
|
+
// ── Test 8: incrementalIndex() falls back to fullIndex() when no metadata ──
|
|
337
|
+
it('incrementalIndex() falls back to fullIndex() when no index-meta.json exists', async () => {
|
|
338
|
+
// No prior fullIndex — metadata file doesn't exist
|
|
339
|
+
const { VectorStore } = await import('../vector-store.js');
|
|
340
|
+
const promoteSpy = vi.spyOn(VectorStore.prototype, 'promoteStagingToLive');
|
|
341
|
+
|
|
342
|
+
const indexer = new SearchIndexer(tmpDir, config, logger);
|
|
343
|
+
const meta = await indexer.incrementalIndex();
|
|
344
|
+
|
|
345
|
+
// Should have performed a full index (staging → live swap)
|
|
346
|
+
expect(promoteSpy).toHaveBeenCalled();
|
|
347
|
+
expect(meta.status).toBe('complete');
|
|
348
|
+
});
|
|
349
|
+
|
|
350
|
+
// ── Test 9: incrementalIndex() falls back when status === "in_progress" [BLOCKER-2]
|
|
351
|
+
it('incrementalIndex() falls back to fullIndex() when metadata status is "in_progress" [BLOCKER-2]', async () => {
|
|
352
|
+
const metaPath = path.join(vectorStorePath, 'index-meta.json');
|
|
353
|
+
await fs.mkdir(vectorStorePath, { recursive: true });
|
|
354
|
+
const stale: IndexMetadata = {
|
|
355
|
+
status: 'in_progress',
|
|
356
|
+
last_full_index: new Date().toISOString(),
|
|
357
|
+
last_incremental_index: null,
|
|
358
|
+
total_files_indexed: 0,
|
|
359
|
+
total_symbols_indexed: 0,
|
|
360
|
+
total_chunks: 0,
|
|
361
|
+
embedding_model: config.search.embedding_model,
|
|
362
|
+
vector_dimensions: config.search.embedding_dimensions,
|
|
363
|
+
failed_files: [],
|
|
364
|
+
files: {},
|
|
365
|
+
};
|
|
366
|
+
await fs.writeFile(metaPath, JSON.stringify(stale), 'utf-8');
|
|
367
|
+
|
|
368
|
+
const { VectorStore } = await import('../vector-store.js');
|
|
369
|
+
const promoteSpy = vi.spyOn(VectorStore.prototype, 'promoteStagingToLive');
|
|
370
|
+
|
|
371
|
+
const indexer = new SearchIndexer(tmpDir, config, logger);
|
|
372
|
+
await indexer.incrementalIndex();
|
|
373
|
+
|
|
374
|
+
expect(logger.warn).toHaveBeenCalledWith(
|
|
375
|
+
expect.stringContaining('Previous index incomplete'),
|
|
376
|
+
);
|
|
377
|
+
expect(promoteSpy).toHaveBeenCalled();
|
|
378
|
+
});
|
|
379
|
+
|
|
380
|
+
// ── Test 10: incrementalIndex() falls back on embedding model mismatch [BLOCKER-3]
|
|
381
|
+
it('incrementalIndex() falls back to fullIndex() on embedding model mismatch [BLOCKER-3]', async () => {
|
|
382
|
+
const indexer = new SearchIndexer(tmpDir, config, logger);
|
|
383
|
+
await indexer.fullIndex();
|
|
384
|
+
|
|
385
|
+
// Overwrite metadata with a different embedding_model
|
|
386
|
+
const metaPath = path.join(vectorStorePath, 'index-meta.json');
|
|
387
|
+
const existing = JSON.parse(await fs.readFile(metaPath, 'utf-8')) as IndexMetadata;
|
|
388
|
+
await fs.writeFile(
|
|
389
|
+
metaPath,
|
|
390
|
+
JSON.stringify({ ...existing, embedding_model: 'old-model-v1' }),
|
|
391
|
+
'utf-8',
|
|
392
|
+
);
|
|
393
|
+
|
|
394
|
+
const { VectorStore } = await import('../vector-store.js');
|
|
395
|
+
const promoteSpy = vi.spyOn(VectorStore.prototype, 'promoteStagingToLive');
|
|
396
|
+
promoteSpy.mockClear();
|
|
397
|
+
|
|
398
|
+
await indexer.incrementalIndex();
|
|
399
|
+
|
|
400
|
+
expect(logger.warn).toHaveBeenCalledWith(
|
|
401
|
+
expect.stringContaining('Embedding model/dimensions changed'),
|
|
402
|
+
);
|
|
403
|
+
expect(promoteSpy).toHaveBeenCalled();
|
|
404
|
+
});
|
|
405
|
+
|
|
406
|
+
// ── Test 11: incrementalIndex() falls back on vector dimension mismatch [BLOCKER-3]
|
|
407
|
+
it('incrementalIndex() falls back to fullIndex() on vector dimension mismatch [BLOCKER-3]', async () => {
|
|
408
|
+
const indexer = new SearchIndexer(tmpDir, config, logger);
|
|
409
|
+
await indexer.fullIndex();
|
|
410
|
+
|
|
411
|
+
const metaPath = path.join(vectorStorePath, 'index-meta.json');
|
|
412
|
+
const existing = JSON.parse(await fs.readFile(metaPath, 'utf-8')) as IndexMetadata;
|
|
413
|
+
await fs.writeFile(
|
|
414
|
+
metaPath,
|
|
415
|
+
JSON.stringify({ ...existing, vector_dimensions: 9999 }),
|
|
416
|
+
'utf-8',
|
|
417
|
+
);
|
|
418
|
+
|
|
419
|
+
const { VectorStore } = await import('../vector-store.js');
|
|
420
|
+
const promoteSpy = vi.spyOn(VectorStore.prototype, 'promoteStagingToLive');
|
|
421
|
+
promoteSpy.mockClear();
|
|
422
|
+
|
|
423
|
+
await indexer.incrementalIndex();
|
|
424
|
+
|
|
425
|
+
expect(logger.warn).toHaveBeenCalledWith(
|
|
426
|
+
expect.stringContaining('Embedding model/dimensions changed'),
|
|
427
|
+
);
|
|
428
|
+
expect(promoteSpy).toHaveBeenCalled();
|
|
429
|
+
});
|
|
430
|
+
|
|
431
|
+
// ── Test 12: metadata contains correct totals, hashes, and failed_files ───
|
|
432
|
+
it('fullIndex() metadata contains correct totals, per-file hashes, and failed_files', async () => {
|
|
433
|
+
const indexer = new SearchIndexer(tmpDir, config, logger);
|
|
434
|
+
const meta = await indexer.fullIndex();
|
|
435
|
+
|
|
436
|
+
expect(meta.status).toBe('complete');
|
|
437
|
+
expect(meta.total_chunks).toBe(meta.total_files_indexed + meta.total_symbols_indexed);
|
|
438
|
+
expect(meta.failed_files).toEqual([]);
|
|
439
|
+
expect(typeof meta.last_full_index).toBe('string');
|
|
440
|
+
|
|
441
|
+
// Per-file entries should include hashes
|
|
442
|
+
for (const [, entry] of Object.entries(meta.files)) {
|
|
443
|
+
expect(typeof entry.content_hash).toBe('string');
|
|
444
|
+
expect(entry.content_hash.length).toBeGreaterThan(0);
|
|
445
|
+
}
|
|
446
|
+
});
|
|
447
|
+
|
|
448
|
+
// ── Test 13: Cancellation flag stops processing, writes "in_progress" ─────
|
|
449
|
+
it('cancellation flag stops processing between batches; metadata written as "in_progress"', async () => {
|
|
450
|
+
const cancellationFlag = { cancelled: false };
|
|
451
|
+
|
|
452
|
+
// Cancel after first batch
|
|
453
|
+
let batchCount = 0;
|
|
454
|
+
mockEmbedBatch.mockImplementation(async (texts: string[]) => {
|
|
455
|
+
batchCount++;
|
|
456
|
+
if (batchCount >= 1) {
|
|
457
|
+
cancellationFlag.cancelled = true;
|
|
458
|
+
}
|
|
459
|
+
return makeFakeVectors(texts.length);
|
|
460
|
+
});
|
|
461
|
+
|
|
462
|
+
const indexer = new SearchIndexer(tmpDir, config, logger);
|
|
463
|
+
const meta = await indexer.fullIndex(cancellationFlag);
|
|
464
|
+
|
|
465
|
+
expect(meta.status).toBe('in_progress');
|
|
466
|
+
});
|
|
467
|
+
|
|
468
|
+
// ── Test 14: Missing project_map.json throws NomosError ───────────────────
|
|
469
|
+
it('fullIndex() throws NomosError("search_index_failed") when project_map.json is missing', async () => {
|
|
470
|
+
// Remove project_map.json
|
|
471
|
+
await fs.rm(path.join(graphDir, 'project_map.json'));
|
|
472
|
+
|
|
473
|
+
const indexer = new SearchIndexer(tmpDir, config, logger);
|
|
474
|
+
|
|
475
|
+
await expect(indexer.fullIndex()).rejects.toThrow(NomosError);
|
|
476
|
+
await expect(indexer.fullIndex()).rejects.toMatchObject({ code: 'search_index_failed' });
|
|
477
|
+
});
|
|
478
|
+
|
|
479
|
+
// ── Test 15: Partial embedding failure — failed files recorded [GAP-1] ────
|
|
480
|
+
it('partial embedding failure: failed files recorded in failed_files, rest indexed [GAP-1]', async () => {
|
|
481
|
+
let callCount = 0;
|
|
482
|
+
mockEmbedBatch.mockImplementation(async (texts: string[]) => {
|
|
483
|
+
callCount++;
|
|
484
|
+
if (callCount === 1) {
|
|
485
|
+
// First batch fails
|
|
486
|
+
throw new Error('Simulated embedding API failure');
|
|
487
|
+
}
|
|
488
|
+
return makeFakeVectors(texts.length);
|
|
489
|
+
});
|
|
490
|
+
|
|
491
|
+
const indexer = new SearchIndexer(tmpDir, config, logger);
|
|
492
|
+
const meta = await indexer.fullIndex();
|
|
493
|
+
|
|
494
|
+
// Some files failed, rest succeeded
|
|
495
|
+
expect(meta.failed_files.length).toBeGreaterThan(0);
|
|
496
|
+
// Index is still marked complete (partial success)
|
|
497
|
+
expect(meta.status).toBe('complete');
|
|
498
|
+
// Total chunks should reflect only successful batches
|
|
499
|
+
expect(meta.total_chunks).toBeGreaterThanOrEqual(0);
|
|
500
|
+
});
|
|
501
|
+
|
|
502
|
+
// ── Test 16: dryRun() returns chunk counts without calling Embedder or VectorStore [S-2]
|
|
503
|
+
it('dryRun() returns chunk counts without calling Embedder or VectorStore [S-2]', async () => {
|
|
504
|
+
const { VectorStore } = await import('../vector-store.js');
|
|
505
|
+
const initSpy = vi.spyOn(VectorStore.prototype, 'init');
|
|
506
|
+
|
|
507
|
+
const indexer = new SearchIndexer(tmpDir, config, logger);
|
|
508
|
+
const result = await indexer.dryRun();
|
|
509
|
+
|
|
510
|
+
expect(result.totalChunks).toBe(result.fileChunks + result.symbolChunks);
|
|
511
|
+
expect(result.totalChunks).toBeGreaterThan(0);
|
|
512
|
+
|
|
513
|
+
// Must NOT call Embedder or VectorStore
|
|
514
|
+
expect(mockEmbedBatch).not.toHaveBeenCalled();
|
|
515
|
+
expect(mockEmbedOne).not.toHaveBeenCalled();
|
|
516
|
+
expect(initSpy).not.toHaveBeenCalled();
|
|
517
|
+
});
|
|
518
|
+
});
|