@kaelio/ktx 0.2.0 → 0.4.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (87) hide show
  1. package/assets/python/{kaelio_ktx-0.2.0-py3-none-any.whl → kaelio_ktx-0.4.0-py3-none-any.whl} +0 -0
  2. package/assets/python/manifest.json +4 -4
  3. package/dist/admin-reindex.js +10 -17
  4. package/dist/admin-reindex.test.js +1 -1
  5. package/dist/cli-program.test.js +0 -2
  6. package/dist/cli-project.d.ts +18 -0
  7. package/dist/cli-project.js +52 -0
  8. package/dist/cli-project.test.js +149 -0
  9. package/dist/cli-runtime.d.ts +0 -2
  10. package/dist/cli-runtime.js +2 -8
  11. package/dist/commands/runtime-commands.js +2 -2
  12. package/dist/context-build-view.js +1 -1
  13. package/dist/index.test.js +21 -25
  14. package/dist/ingest.js +9 -2
  15. package/dist/ingest.test.js +27 -3
  16. package/dist/managed-local-embeddings.d.ts +0 -2
  17. package/dist/managed-local-embeddings.js +2 -5
  18. package/dist/managed-local-embeddings.test.js +5 -8
  19. package/dist/managed-python-daemon.js +2 -2
  20. package/dist/managed-python-daemon.test.js +1 -1
  21. package/dist/managed-python-http.js +3 -3
  22. package/dist/managed-python-http.test.js +6 -6
  23. package/dist/print-command-tree.js +0 -2
  24. package/dist/public-ingest.d.ts +4 -2
  25. package/dist/public-ingest.js +9 -3
  26. package/dist/release-version.d.ts +1 -5
  27. package/dist/release-version.js +2 -39
  28. package/dist/runtime-requirements.js +1 -1
  29. package/dist/runtime.js +6 -6
  30. package/dist/runtime.test.js +7 -7
  31. package/dist/scan.js +7 -2
  32. package/dist/scan.test.js +1 -1
  33. package/dist/setup-embeddings.js +1 -1
  34. package/dist/setup-embeddings.test.js +2 -2
  35. package/dist/setup-runtime.test.js +1 -1
  36. package/node_modules/@ktx/context/dist/core/git.service.d.ts +1 -0
  37. package/node_modules/@ktx/context/dist/core/git.service.js +12 -0
  38. package/node_modules/@ktx/context/dist/ingest/adapters/historic-sql/historic-sql.adapter.d.ts +2 -1
  39. package/node_modules/@ktx/context/dist/ingest/adapters/historic-sql/historic-sql.adapter.js +18 -0
  40. package/node_modules/@ktx/context/dist/ingest/adapters/historic-sql/local-ingest-acceptance.test.js +6 -6
  41. package/node_modules/@ktx/context/dist/ingest/adapters/historic-sql/projection.d.ts +5 -0
  42. package/node_modules/@ktx/context/dist/ingest/adapters/historic-sql/projection.js +48 -0
  43. package/node_modules/@ktx/context/dist/ingest/adapters/historic-sql/projection.test.js +83 -0
  44. package/node_modules/@ktx/context/dist/ingest/adapters/live-database/daemon-introspection.js +4 -1
  45. package/node_modules/@ktx/context/dist/ingest/adapters/live-database/daemon-introspection.test.js +32 -0
  46. package/node_modules/@ktx/context/dist/ingest/finalization-scope.d.ts +22 -0
  47. package/node_modules/@ktx/context/dist/ingest/finalization-scope.js +95 -0
  48. package/node_modules/@ktx/context/dist/ingest/finalization-scope.test.d.ts +1 -0
  49. package/node_modules/@ktx/context/dist/ingest/finalization-scope.test.js +114 -0
  50. package/node_modules/@ktx/context/dist/ingest/index.d.ts +1 -2
  51. package/node_modules/@ktx/context/dist/ingest/index.js +0 -1
  52. package/node_modules/@ktx/context/dist/ingest/ingest-bundle.runner.d.ts +2 -0
  53. package/node_modules/@ktx/context/dist/ingest/ingest-bundle.runner.isolated-diff.test.js +166 -0
  54. package/node_modules/@ktx/context/dist/ingest/ingest-bundle.runner.js +235 -45
  55. package/node_modules/@ktx/context/dist/ingest/ingest-bundle.runner.test.js +193 -38
  56. package/node_modules/@ktx/context/dist/ingest/local-bundle-ingest.test.js +22 -3
  57. package/node_modules/@ktx/context/dist/ingest/local-bundle-runtime.js +0 -4
  58. package/node_modules/@ktx/context/dist/ingest/local-ingest.js +0 -7
  59. package/node_modules/@ktx/context/dist/ingest/local-stage-ingest.js +15 -5
  60. package/node_modules/@ktx/context/dist/ingest/local-stage-ingest.test.js +29 -0
  61. package/node_modules/@ktx/context/dist/ingest/memory-flow/schema.d.ts +2 -2
  62. package/node_modules/@ktx/context/dist/ingest/memory-flow/schema.js +1 -1
  63. package/node_modules/@ktx/context/dist/ingest/memory-flow/types.d.ts +1 -1
  64. package/node_modules/@ktx/context/dist/ingest/ports.d.ts +1 -20
  65. package/node_modules/@ktx/context/dist/ingest/report-snapshot.d.ts +71 -0
  66. package/node_modules/@ktx/context/dist/ingest/report-snapshot.js +27 -0
  67. package/node_modules/@ktx/context/dist/ingest/reports.d.ts +23 -5
  68. package/node_modules/@ktx/context/dist/ingest/reports.js +7 -24
  69. package/node_modules/@ktx/context/dist/ingest/types.d.ts +33 -0
  70. package/node_modules/@ktx/context/dist/llm/index.d.ts +1 -1
  71. package/node_modules/@ktx/context/dist/llm/index.js +1 -1
  72. package/node_modules/@ktx/context/dist/llm/local-config.d.ts +0 -1
  73. package/node_modules/@ktx/context/dist/llm/local-config.js +2 -12
  74. package/node_modules/@ktx/context/dist/llm/local-config.test.js +2 -23
  75. package/node_modules/@ktx/context/dist/package-exports.test.js +2 -2
  76. package/node_modules/@ktx/context/dist/project/config.d.ts +16 -0
  77. package/node_modules/@ktx/context/dist/project/driver-schemas.d.ts +8 -0
  78. package/node_modules/@ktx/context/dist/project/driver-schemas.js +4 -0
  79. package/node_modules/@ktx/context/dist/scan/enabled-tables.d.ts +3 -0
  80. package/node_modules/@ktx/context/dist/scan/enabled-tables.js +15 -0
  81. package/node_modules/@ktx/context/dist/scan/local-scan.d.ts +2 -4
  82. package/node_modules/@ktx/context/dist/scan/local-scan.js +2 -15
  83. package/package.json +1 -1
  84. package/node_modules/@ktx/context/dist/ingest/adapters/historic-sql/post-processor.d.ts +0 -4
  85. package/node_modules/@ktx/context/dist/ingest/adapters/historic-sql/post-processor.js +0 -38
  86. package/node_modules/@ktx/context/dist/ingest/adapters/historic-sql/post-processor.test.js +0 -63
  87. /package/{node_modules/@ktx/context/dist/ingest/adapters/historic-sql/post-processor.test.d.ts → dist/cli-project.test.d.ts} +0 -0
@@ -85,6 +85,7 @@ const makeDeps = () => {
85
85
  triageSupported: undefined,
86
86
  detect: vi.fn().mockResolvedValue(true),
87
87
  listTargetConnectionIds: undefined,
88
+ finalize: undefined,
88
89
  chunk: vi.fn().mockResolvedValue({
89
90
  workUnits: [{ unitKey: 'u1', rawFiles: ['a.yml'], peerFileIndex: [], dependencyPaths: [] }],
90
91
  }),
@@ -121,6 +122,7 @@ const makeDeps = () => {
121
122
  }),
122
123
  applyPatchFile3WayIndex: vi.fn(),
123
124
  diffNameStatus: vi.fn().mockResolvedValue([]),
125
+ changedPaths: vi.fn().mockResolvedValue([]),
124
126
  };
125
127
  const sessionWorktreeService = {
126
128
  create: vi.fn().mockResolvedValue({
@@ -1344,25 +1346,65 @@ describe('IngestBundleRunner — Stages 1 → 7', () => {
1344
1346
  }),
1345
1347
  }));
1346
1348
  });
1347
- it('runs a registered post-processor before squash, records the outcome, and reindexes touched sources after squash', async () => {
1349
+ it('runs adapter finalization before squash, records the outcome, and reindexes touched sources', async () => {
1348
1350
  const deps = makeDeps();
1349
1351
  deps.adapter.source = 'metricflow';
1350
1352
  deps.registry.get.mockReturnValue(deps.adapter);
1351
1353
  deps.adapter.chunk.mockResolvedValue({
1352
- workUnits: [{ unitKey: 'u1', rawFiles: ['semantic_models.yml'], peerFileIndex: [], dependencyPaths: [] }],
1354
+ workUnits: [],
1353
1355
  parseArtifacts: { semanticModels: [{ name: 'orders' }] },
1354
1356
  });
1355
1357
  deps.adapter.listTargetConnectionIds = vi.fn().mockResolvedValue(['warehouse-2']);
1358
+ deps.adapter.finalize = vi.fn().mockResolvedValue({
1359
+ result: { sourcesTouched: 1 },
1360
+ warnings: ['kept going'],
1361
+ errors: [],
1362
+ touchedSources: [{ connectionId: 'warehouse-2', sourceName: 'orders' }],
1363
+ changedWikiPageKeys: [],
1364
+ actions: [
1365
+ {
1366
+ target: 'sl',
1367
+ type: 'updated',
1368
+ key: 'orders',
1369
+ targetConnectionId: 'warehouse-2',
1370
+ detail: 'Finalized orders usage',
1371
+ rawPaths: ['semantic_models.yml'],
1372
+ },
1373
+ ],
1374
+ });
1356
1375
  deps.semanticLayerService.loadAllSources.mockImplementation((connectionId) => Promise.resolve({ sources: [{ name: `${connectionId}_source` }], loadErrors: [] }));
1357
- const postProcessor = {
1358
- run: vi.fn().mockResolvedValue({
1359
- result: { sourcesCreated: 1 },
1360
- warnings: ['kept going'],
1361
- errors: [],
1362
- touchedSources: [{ connectionId: 'warehouse-2', sourceName: 'orders' }],
1376
+ let head = 'pre-finalization';
1377
+ const git = {
1378
+ revParseHead: vi.fn(async () => head),
1379
+ commitFiles: vi.fn().mockImplementation(async (paths) => {
1380
+ if (paths.includes('semantic-layer/warehouse-2/orders.yaml')) {
1381
+ head = 'post-finalization';
1382
+ return { created: true, commitHash: 'finalization-sha' };
1383
+ }
1384
+ return { created: true, commitHash: head };
1385
+ }),
1386
+ commitStaged: vi.fn().mockResolvedValue({ created: false, commitHash: 'post-finalization' }),
1387
+ resetHardTo: vi.fn(),
1388
+ assertWorktreeClean: vi.fn().mockResolvedValue(undefined),
1389
+ writeBinaryNoRenamePatch: vi.fn(async (_base, _head, patchPath) => {
1390
+ await writeFile(patchPath, '', 'utf-8');
1363
1391
  }),
1392
+ applyPatchFile3WayIndex: vi.fn(),
1393
+ diffNameStatus: vi.fn().mockImplementation(async (from, to) => from === 'pre-finalization' && to === 'post-finalization'
1394
+ ? [{ status: 'M', path: 'semantic-layer/warehouse-2/orders.yaml' }]
1395
+ : []),
1396
+ changedPaths: vi.fn().mockResolvedValue(['semantic-layer/warehouse-2/orders.yaml']),
1364
1397
  };
1365
- const runner = buildRunner(deps, { postProcessors: { metricflow: postProcessor } });
1398
+ deps.sessionWorktreeService.create.mockResolvedValue({
1399
+ chatId: 'j1',
1400
+ workdir: '/tmp/wt',
1401
+ branch: 'session/j1',
1402
+ baseSha: 'b',
1403
+ createdAt: new Date(),
1404
+ git,
1405
+ config: {},
1406
+ });
1407
+ const runner = buildRunner(deps);
1366
1408
  runner.stageRawFilesStage1 = vi.fn().mockResolvedValue({
1367
1409
  currentHashes: new Map([['semantic_models.yml', 'h1']]),
1368
1410
  rawDirInWorktree: 'raw-sources/c1/metricflow/s',
@@ -1375,7 +1417,7 @@ describe('IngestBundleRunner — Stages 1 → 7', () => {
1375
1417
  trigger: 'upload',
1376
1418
  bundleRef: { kind: 'upload', uploadId: 'upload-x' },
1377
1419
  });
1378
- expect(postProcessor.run).toHaveBeenCalledWith({
1420
+ expect(deps.adapter.finalize).toHaveBeenCalledWith(expect.objectContaining({
1379
1421
  connectionId: 'c1',
1380
1422
  sourceKey: 'metricflow',
1381
1423
  syncId: expect.any(String),
@@ -1383,24 +1425,25 @@ describe('IngestBundleRunner — Stages 1 → 7', () => {
1383
1425
  runId: 'run-1',
1384
1426
  workdir: '/tmp/wt',
1385
1427
  parseArtifacts: { semanticModels: [{ name: 'orders' }] },
1386
- });
1428
+ }));
1387
1429
  expect(deps.reportsRepo.create).toHaveBeenCalledWith(expect.objectContaining({
1388
1430
  body: expect.objectContaining({
1389
- postProcessor: {
1431
+ finalization: expect.objectContaining({
1390
1432
  sourceKey: 'metricflow',
1391
1433
  status: 'success',
1392
- result: { sourcesCreated: 1 },
1393
- warnings: ['kept going'],
1394
- errors: [],
1395
- touchedSources: [{ connectionId: 'warehouse-2', sourceName: 'orders' }],
1396
- },
1434
+ commitSha: 'finalization-sha',
1435
+ touchedPaths: ['semantic-layer/warehouse-2/orders.yaml'],
1436
+ derivedTouchedSources: [{ connectionId: 'warehouse-2', sourceName: 'orders' }],
1437
+ declaredTouchedSources: [{ connectionId: 'warehouse-2', sourceName: 'orders' }],
1438
+ actions: [expect.objectContaining({ key: 'orders' })],
1439
+ }),
1397
1440
  }),
1398
1441
  }));
1399
1442
  expect(deps.semanticLayerService.loadAllSources).toHaveBeenCalledWith('warehouse-2');
1400
1443
  expect(deps.slSearchService.indexSources).toHaveBeenCalledWith('warehouse-2', [{ name: 'warehouse-2_source' }]);
1401
1444
  expect(deps.sessionWorktreeService.cleanup).toHaveBeenCalledWith(expect.any(Object), 'success');
1402
1445
  });
1403
- it('includes historic-sql post-processor output in memory-flow saved counts', async () => {
1446
+ it('includes finalization actions in memory-flow saved counts', async () => {
1404
1447
  const deps = makeDeps();
1405
1448
  deps.adapter.source = 'historic-sql';
1406
1449
  deps.registry.get.mockReturnValue(deps.adapter);
@@ -1414,21 +1457,19 @@ describe('IngestBundleRunner — Stages 1 → 7', () => {
1414
1457
  },
1415
1458
  ],
1416
1459
  });
1417
- const postProcessor = {
1418
- run: vi.fn().mockResolvedValue({
1419
- result: {
1420
- tableUsageMerged: 2,
1421
- staleTablesMarked: 1,
1422
- patternPagesWritten: 3,
1423
- stalePatternPagesMarked: 1,
1424
- archivedPatternPages: 1,
1425
- },
1426
- warnings: [],
1427
- errors: [],
1428
- touchedSources: [{ connectionId: 'c1', sourceName: 'orders' }],
1429
- }),
1430
- };
1431
- const runner = buildRunner(deps, { postProcessors: { 'historic-sql': postProcessor } });
1460
+ deps.adapter.finalize = vi.fn().mockResolvedValue({
1461
+ warnings: [],
1462
+ errors: [],
1463
+ touchedSources: [],
1464
+ changedWikiPageKeys: [],
1465
+ actions: [
1466
+ { target: 'sl', type: 'updated', key: 'orders', detail: 'Merged usage' },
1467
+ { target: 'sl', type: 'updated', key: 'customers', detail: 'Merged usage' },
1468
+ { target: 'wiki', type: 'created', key: 'historic-sql-orders', detail: 'Projected pattern' },
1469
+ { target: 'wiki', type: 'updated', key: 'historic-sql-customers', detail: 'Projected pattern' },
1470
+ ],
1471
+ });
1472
+ const runner = buildRunner(deps);
1432
1473
  runner.stageRawFilesStage1 = vi.fn().mockResolvedValue({
1433
1474
  currentHashes: new Map([['tables/public/orders.json', 'h1']]),
1434
1475
  rawDirInWorktree: 'raw-sources/c1/historic-sql/s',
@@ -1448,11 +1489,11 @@ describe('IngestBundleRunner — Stages 1 → 7', () => {
1448
1489
  });
1449
1490
  expect(memoryFlow.snapshot().events).toContainEqual(expect.objectContaining({
1450
1491
  type: 'saved',
1451
- wikiCount: 5,
1452
- slCount: 3,
1492
+ wikiCount: 2,
1493
+ slCount: 2,
1453
1494
  }));
1454
1495
  });
1455
- it('marks post-processor infrastructure failure as failed and preserves worktree cleanup state', async () => {
1496
+ it('marks finalization infrastructure failure as failed and preserves worktree cleanup state', async () => {
1456
1497
  const deps = makeDeps();
1457
1498
  deps.adapter.source = 'metricflow';
1458
1499
  deps.registry.get.mockReturnValue(deps.adapter);
@@ -1460,8 +1501,8 @@ describe('IngestBundleRunner — Stages 1 → 7', () => {
1460
1501
  workUnits: [{ unitKey: 'u1', rawFiles: ['semantic_models.yml'], peerFileIndex: [], dependencyPaths: [] }],
1461
1502
  parseArtifacts: { semanticModels: [{ name: 'orders' }] },
1462
1503
  });
1463
- const postProcessor = { run: vi.fn().mockRejectedValue(new Error('worktree write failed')) };
1464
- const runner = buildRunner(deps, { postProcessors: { metricflow: postProcessor } });
1504
+ deps.adapter.finalize = vi.fn().mockRejectedValue(new Error('worktree write failed'));
1505
+ const runner = buildRunner(deps);
1465
1506
  runner.stageRawFilesStage1 = vi.fn().mockResolvedValue({
1466
1507
  currentHashes: new Map([['semantic_models.yml', 'h1']]),
1467
1508
  rawDirInWorktree: 'raw-sources/c1/metricflow/s',
@@ -1478,6 +1519,120 @@ describe('IngestBundleRunner — Stages 1 → 7', () => {
1478
1519
  expect(deps.gitService.squashMergeIntoMain).not.toHaveBeenCalled();
1479
1520
  expect(deps.sessionWorktreeService.cleanup).toHaveBeenCalledWith(expect.any(Object), 'crash');
1480
1521
  });
1522
+ it('reports finalization actions excluded from provenance when raw paths are not defensible', async () => {
1523
+ const deps = makeDeps();
1524
+ deps.adapter.finalize = vi.fn().mockResolvedValue({
1525
+ warnings: [],
1526
+ errors: [],
1527
+ touchedSources: [],
1528
+ changedWikiPageKeys: [],
1529
+ actions: [
1530
+ { target: 'wiki', type: 'updated', key: 'historic-sql-pattern', detail: 'No raw path' },
1531
+ { target: 'sl', type: 'updated', key: 'orders', detail: 'Invalid raw path', rawPaths: ['missing.json'] },
1532
+ ],
1533
+ });
1534
+ const runner = buildRunner(deps);
1535
+ runner.stageRawFilesStage1 = vi.fn().mockResolvedValue({
1536
+ currentHashes: new Map([['current.json', 'h1']]),
1537
+ rawDirInWorktree: 'raw-sources/c1/fake/s',
1538
+ });
1539
+ runner.resolveStagedDir = vi.fn().mockResolvedValue('/tmp/stage/upload-x');
1540
+ await runner.run({
1541
+ jobId: 'j1',
1542
+ connectionId: 'c1',
1543
+ sourceKey: 'fake',
1544
+ trigger: 'upload',
1545
+ bundleRef: { kind: 'upload', uploadId: 'upload-x' },
1546
+ });
1547
+ expect(deps.reportsRepo.create).toHaveBeenCalledWith(expect.objectContaining({
1548
+ body: expect.objectContaining({
1549
+ finalization: expect.objectContaining({
1550
+ provenanceExclusions: [
1551
+ expect.objectContaining({ reason: 'missing_raw_paths' }),
1552
+ expect.objectContaining({ reason: 'raw_path_not_defensible', invalidRawPaths: ['missing.json'] }),
1553
+ ],
1554
+ }),
1555
+ }),
1556
+ }));
1557
+ expect(deps.provenanceRepo.insertMany).not.toHaveBeenCalledWith(expect.arrayContaining([expect.objectContaining({ rawPath: 'missing.json' })]));
1558
+ });
1559
+ it('passes explicit override replay metadata and no current work unit outcomes', async () => {
1560
+ const deps = makeDeps();
1561
+ deps.reportsRepo.findByJobId.mockResolvedValue({
1562
+ id: 'prior-report',
1563
+ runId: 'prior-run',
1564
+ jobId: 'prior-job',
1565
+ connectionId: 'c1',
1566
+ sourceKey: 'fake',
1567
+ createdAt: '2026-05-18T00:00:00.000Z',
1568
+ body: {
1569
+ status: 'completed',
1570
+ syncId: 'prior-sync',
1571
+ diffSummary: { added: 0, modified: 0, deleted: 0, unchanged: 0 },
1572
+ commitSha: 'prior-sha',
1573
+ workUnits: [
1574
+ {
1575
+ unitKey: 'prior-unit',
1576
+ rawFiles: ['prior.json'],
1577
+ status: 'success',
1578
+ actions: [{ target: 'wiki', type: 'created', key: 'prior', detail: 'prior' }],
1579
+ touchedSlSources: [],
1580
+ },
1581
+ ],
1582
+ failedWorkUnits: [],
1583
+ reconciliationSkipped: false,
1584
+ conflictsResolved: [],
1585
+ evictionsApplied: [
1586
+ {
1587
+ rawPath: 'do-not-replay.json',
1588
+ artifactKind: 'wiki',
1589
+ artifactKey: 'old',
1590
+ action: 'removed',
1591
+ reason: 'prior',
1592
+ },
1593
+ ],
1594
+ unmappedFallbacks: [],
1595
+ artifactResolutions: [],
1596
+ evictionInputs: ['evicted-from-prior-report.json'],
1597
+ unresolvedCards: [],
1598
+ supersededBy: null,
1599
+ overrideOf: null,
1600
+ provenanceRows: [],
1601
+ toolTranscripts: [],
1602
+ },
1603
+ });
1604
+ deps.adapter.finalize = vi.fn().mockResolvedValue({
1605
+ warnings: [],
1606
+ errors: [],
1607
+ touchedSources: [],
1608
+ changedWikiPageKeys: [],
1609
+ actions: [],
1610
+ });
1611
+ deps.gitService.listFilesAtHead.mockResolvedValue(['raw-sources/c1/fake/prior-sync/prior.json']);
1612
+ deps.gitService.getFileAtCommit.mockResolvedValue('{"id":1}\n');
1613
+ const runner = buildRunner(deps);
1614
+ runner.stageRawFilesStage1 = vi.fn().mockResolvedValue({
1615
+ currentHashes: new Map([['prior.json', 'h1']]),
1616
+ rawDirInWorktree: 'raw-sources/c1/fake/prior-sync',
1617
+ });
1618
+ runner.resolveStagedDir = vi.fn().mockResolvedValue('/tmp/stage/prior');
1619
+ await runner.run({
1620
+ jobId: 'override-job',
1621
+ connectionId: 'c1',
1622
+ sourceKey: 'fake',
1623
+ trigger: 'manual_override',
1624
+ bundleRef: { kind: 'override', priorJobId: 'prior-job' },
1625
+ });
1626
+ expect(deps.adapter.finalize).toHaveBeenCalledWith(expect.objectContaining({
1627
+ workUnitOutcomes: [],
1628
+ overrideReplay: {
1629
+ priorJobId: 'prior-job',
1630
+ priorRunId: 'prior-run',
1631
+ priorSyncId: 'prior-sync',
1632
+ evictionRawPaths: ['evicted-from-prior-report.json'],
1633
+ },
1634
+ }));
1635
+ });
1481
1636
  it('includes existing global wiki pages in WorkUnit prompts', async () => {
1482
1637
  const deps = makeDeps();
1483
1638
  deps.knowledgeIndex.listPagesForUser.mockResolvedValue([
@@ -7,6 +7,7 @@ import { initKtxProject, loadKtxProject } from '../project/index.js';
7
7
  import { makeLocalGitRepo } from '../test/make-local-git-repo.js';
8
8
  import { afterEach, beforeEach, describe, expect, it, vi } from 'vitest';
9
9
  import { FakeSourceAdapter } from './adapters/fake/fake.adapter.js';
10
+ import { projectHistoricSqlEvidence } from './adapters/historic-sql/projection.js';
10
11
  import { LocalLookerRuntimeStore } from './adapters/looker/local-runtime-store.js';
11
12
  import { createDefaultLocalIngestAdapters, localPullConfigForAdapter } from './local-adapters.js';
12
13
  import { getLocalIngestStatus, runLocalIngest } from './local-ingest.js';
@@ -159,6 +160,23 @@ class HistoricSqlEvidenceTestAdapter {
159
160
  ],
160
161
  });
161
162
  }
163
+ async finalize(ctx) {
164
+ const projection = await projectHistoricSqlEvidence({
165
+ workdir: ctx.workdir,
166
+ connectionId: ctx.connectionId,
167
+ syncId: ctx.syncId,
168
+ runId: ctx.runId,
169
+ overrideReplay: ctx.overrideReplay,
170
+ });
171
+ return {
172
+ result: projection,
173
+ warnings: projection.warnings,
174
+ errors: [],
175
+ touchedSources: projection.touchedSources,
176
+ changedWikiPageKeys: projection.changedWikiPageKeys,
177
+ actions: projection.actions,
178
+ };
179
+ }
162
180
  }
163
181
  function makeLookerRuntimeClient() {
164
182
  const lookerModels = {
@@ -426,7 +444,7 @@ describe('canonical local ingest', () => {
426
444
  },
427
445
  ]);
428
446
  });
429
- it('runs historic-SQL evidence projection through the local bundle post-processor', async () => {
447
+ it('runs historic-SQL evidence projection through local bundle finalization', async () => {
430
448
  const projectDir = join(tempDir, 'historic-sql-project');
431
449
  await initKtxProject({ projectDir });
432
450
  await writeFile(join(projectDir, 'ktx.yaml'), [
@@ -477,11 +495,12 @@ describe('canonical local ingest', () => {
477
495
  agentRunner,
478
496
  });
479
497
  expect(result.result.failedWorkUnits).toEqual([]);
480
- expect(result.report.body.postProcessor).toMatchObject({
498
+ expect(result.report.body.finalization).toMatchObject({
481
499
  sourceKey: 'historic-sql',
482
500
  status: 'success',
483
501
  result: { tableUsageMerged: 1 },
484
- touchedSources: [{ connectionId: 'warehouse', sourceName: 'orders' }],
502
+ declaredTouchedSources: [{ connectionId: 'warehouse', sourceName: 'orders' }],
503
+ derivedTouchedSources: [{ connectionId: 'warehouse', sourceName: 'orders' }],
485
504
  });
486
505
  await expect(readFile(join(projectDir, 'semantic-layer/warehouse/_schema/public.yaml'), 'utf-8')).resolves.toContain('Orders are repeatedly queried by lifecycle status.');
487
506
  });
@@ -13,7 +13,6 @@ import { ContextCandidateMarkTool, ContextCandidateWriteTool, ContextEvidenceNei
13
13
  import { buildKnowledgeSearchText, KnowledgeWikiService, searchLocalKnowledgePages, SqliteKnowledgeIndex, WikiListTagsTool, WikiReadTool, WikiRemoveTool, WikiSearchTool, WikiWriteTool, } from '../wiki/index.js';
14
14
  import { CandidateDedupService, ContextCandidateCarryforwardService, CuratorPaginationService, } from './context-candidates/index.js';
15
15
  import { createEmitHistoricSqlEvidenceTool } from './adapters/historic-sql/evidence-tool.js';
16
- import { HistoricSqlProjectionPostProcessor } from './adapters/historic-sql/post-processor.js';
17
16
  import { ContextEvidenceIndexService, SqliteContextEvidenceStore } from './context-evidence/index.js';
18
17
  import { DiffSetService } from './diff-set.service.js';
19
18
  import { ingestTracePathForJob } from './ingest-trace.js';
@@ -589,9 +588,6 @@ export function createLocalBundleIngestRuntime(options) {
589
588
  settings: { batchSize: 8, maxPasses: 8, stepBudgetPerPass: 60 },
590
589
  logger,
591
590
  }),
592
- postProcessors: {
593
- 'historic-sql': new HistoricSqlProjectionPostProcessor(),
594
- },
595
591
  logger,
596
592
  };
597
593
  return {
@@ -178,13 +178,6 @@ async function recordLocalMetabaseChildFailure(options) {
178
178
  overrideOf: null,
179
179
  provenanceRows: [],
180
180
  toolTranscripts: [],
181
- postProcessor: {
182
- sourceKey: 'metabase',
183
- status: 'failed',
184
- errors: [reason],
185
- warnings: [],
186
- touchedSources: [],
187
- },
188
181
  };
189
182
  const report = await store.create({
190
183
  runId: run.id,
@@ -117,6 +117,11 @@ async function pruneStaleRawFiles(input) {
117
117
  }
118
118
  return staleRawPaths;
119
119
  }
120
+ async function rawSnapshotContainsFiles(project, rawPrefix, relativeFiles) {
121
+ const existing = await project.fileStore.listFiles(rawPrefix);
122
+ const existingFiles = new Set(existing.files);
123
+ return relativeFiles.every((file) => existingFiles.has(`${rawPrefix}/${file}`));
124
+ }
120
125
  async function prepareLocalStagedDir(project, adapter, stagedDir, sourceDir, connectionId) {
121
126
  await rm(stagedDir, { recursive: true, force: true });
122
127
  await mkdir(stagedDir, { recursive: true });
@@ -184,12 +189,17 @@ async function runLocalStageOnlyIngestInner(options) {
184
189
  const scopeDescriptor = adapter.describeScope ? await adapter.describeScope(stagedDir) : null;
185
190
  options.memoryFlow?.emit({ type: 'scope_detected', fingerprint: scopeDescriptor?.fingerprint ?? null });
186
191
  const diffSet = computeDiffSetFromHashes(hashes, priorHashes, scopeDescriptor ? scopeDescriptor.isPathInScope.bind(scopeDescriptor) : undefined);
187
- const unchangedFromLatestCompletedRun = !existingRun &&
192
+ const matchesLatestCompletedRun = !existingRun &&
188
193
  !!latestReport &&
189
194
  diffSet.added.length === 0 &&
190
195
  diffSet.modified.length === 0 &&
191
196
  diffSet.deleted.length === 0;
192
- const syncId = existingRun?.syncId ?? (unchangedFromLatestCompletedRun ? latestReport.syncId : buildSyncId(started, jobId));
197
+ const reusableLatestSyncId = matchesLatestCompletedRun ? latestReport.syncId : null;
198
+ const latestRawPrefix = reusableLatestSyncId
199
+ ? `raw-sources/${connectionId}/${adapter.source}/${reusableLatestSyncId}`
200
+ : null;
201
+ const canReuseLatestCompletedRun = latestRawPrefix !== null && (await rawSnapshotContainsFiles(options.project, latestRawPrefix, relativeFiles));
202
+ const syncId = existingRun?.syncId ?? (canReuseLatestCompletedRun && reusableLatestSyncId ? reusableLatestSyncId : buildSyncId(started, jobId));
193
203
  options.memoryFlow?.update({ syncId });
194
204
  options.memoryFlow?.emit({ type: 'raw_snapshot_written', syncId, rawFileCount: relativeFiles.length });
195
205
  options.memoryFlow?.emit({
@@ -209,7 +219,7 @@ async function runLocalStageOnlyIngestInner(options) {
209
219
  });
210
220
  const rawPrefix = `raw-sources/${connectionId}/${adapter.source}/${syncId}`;
211
221
  const rawPaths = relativeFiles.map((file) => `${rawPrefix}/${file}`);
212
- const staleRawPaths = options.dryRun || unchangedFromLatestCompletedRun
222
+ const staleRawPaths = options.dryRun || canReuseLatestCompletedRun
213
223
  ? []
214
224
  : await pruneStaleRawFiles({
215
225
  project: options.project,
@@ -220,7 +230,7 @@ async function runLocalStageOnlyIngestInner(options) {
220
230
  for (const file of relativeFiles) {
221
231
  const absolutePath = assertInside(stagedDir, join(stagedDir, file));
222
232
  const rawPath = `${rawPrefix}/${file}`;
223
- if (!options.dryRun && !unchangedFromLatestCompletedRun) {
233
+ if (!options.dryRun && !canReuseLatestCompletedRun) {
224
234
  await options.project.fileStore.writeFile(rawPath, await readFile(absolutePath, 'utf-8'), LOCAL_AUTHOR, LOCAL_AUTHOR_EMAIL, `Stage ${adapter.source} raw file: ${file}`, { skipLock: true });
225
235
  }
226
236
  }
@@ -266,7 +276,7 @@ async function runLocalStageOnlyIngestInner(options) {
266
276
  record,
267
277
  rawContentHashes: Object.fromEntries(hashes),
268
278
  });
269
- const commitPaths = unchangedFromLatestCompletedRun ? [] : [...rawPaths, ...staleRawPaths].sort();
279
+ const commitPaths = canReuseLatestCompletedRun ? [] : [...rawPaths, ...staleRawPaths].sort();
270
280
  if (commitPaths.length > 0) {
271
281
  await options.project.git.commitFiles(commitPaths, `ingest(${adapter.source}): ${jobId} syncId=${syncId}`, LOCAL_AUTHOR, LOCAL_AUTHOR_EMAIL);
272
282
  }
@@ -277,6 +277,35 @@ describe('local ingest', () => {
277
277
  await expect(readFile(join(project.projectDir, '.ktx', 'ingest-runs', 'local-job-3.json'), 'utf-8')).rejects.toThrow();
278
278
  await expect(readFile(join(project.projectDir, '.ktx', 'ingest-reports', 'local-job-3.json'), 'utf-8')).rejects.toThrow();
279
279
  });
280
+ it('writes a new raw snapshot when an unchanged latest snapshot is missing from disk', async () => {
281
+ const sourceDir = join(tempDir, 'missing-snapshot-source');
282
+ await mkdir(join(sourceDir, 'orders'), { recursive: true });
283
+ await writeFile(join(sourceDir, 'orders', 'orders.json'), '{"name":"orders","version":1}\n', 'utf-8');
284
+ const first = await runLocalStageOnlyIngest({
285
+ project,
286
+ adapters: [new FakeSourceAdapter()],
287
+ adapter: 'fake',
288
+ connectionId: 'warehouse',
289
+ sourceDir,
290
+ jobId: 'local-missing-snapshot-1',
291
+ now: () => new Date('2026-04-27T12:20:00.000Z'),
292
+ });
293
+ await rm(join(project.projectDir, 'raw-sources/warehouse/fake', first.syncId), { recursive: true, force: true });
294
+ const rerun = await runLocalStageOnlyIngest({
295
+ project,
296
+ adapters: [new FakeSourceAdapter()],
297
+ adapter: 'fake',
298
+ connectionId: 'warehouse',
299
+ sourceDir,
300
+ jobId: 'local-missing-snapshot-2',
301
+ now: () => new Date('2026-04-27T12:25:00.000Z'),
302
+ });
303
+ expect(rerun.previousRunId).toBe(first.runId);
304
+ expect(rerun.syncId).toBe('2026-04-27-122500-local-missing-snapshot-2');
305
+ expect(rerun.diffSummary).toEqual({ added: 0, modified: 0, deleted: 0, unchanged: 1 });
306
+ expect(rerun.workUnitCount).toBe(0);
307
+ await expect(readFile(join(project.projectDir, 'raw-sources/warehouse/fake', rerun.syncId, 'orders/orders.json'), 'utf-8')).resolves.toBe('{"name":"orders","version":1}\n');
308
+ });
280
309
  it('reuses the existing sync id when the same local run id is retried', async () => {
281
310
  const sourceDir = join(tempDir, 'idempotent-source');
282
311
  await mkdir(join(sourceDir, 'orders'), { recursive: true });
@@ -43,8 +43,8 @@ export declare const memoryFlowEventSchema: z.ZodDiscriminatedUnion<[z.ZodObject
43
43
  stage: z.ZodEnum<{
44
44
  source: "source";
45
45
  workUnits: "workUnits";
46
- actions: "actions";
47
46
  chunks: "chunks";
47
+ actions: "actions";
48
48
  gates: "gates";
49
49
  saved: "saved";
50
50
  }>;
@@ -58,7 +58,7 @@ export declare const memoryFlowEventSchema: z.ZodDiscriminatedUnion<[z.ZodObject
58
58
  report: "report";
59
59
  integration: "integration";
60
60
  reconciliation: "reconciliation";
61
- post_processor: "post_processor";
61
+ finalization: "finalization";
62
62
  wiki_sl_ref_repair: "wiki_sl_ref_repair";
63
63
  final_gates: "final_gates";
64
64
  save: "save";
@@ -53,7 +53,7 @@ export const memoryFlowEventSchema = z.discriminatedUnion('type', [
53
53
  'source',
54
54
  'integration',
55
55
  'reconciliation',
56
- 'post_processor',
56
+ 'finalization',
57
57
  'wiki_sl_ref_repair',
58
58
  'final_gates',
59
59
  'save',
@@ -40,7 +40,7 @@ type MemoryFlowEventPayload = {
40
40
  reason: string;
41
41
  } | {
42
42
  type: 'stage_progress';
43
- stage: 'source' | 'integration' | 'reconciliation' | 'post_processor' | 'wiki_sl_ref_repair' | 'final_gates' | 'save' | 'provenance' | 'report';
43
+ stage: 'source' | 'integration' | 'reconciliation' | 'finalization' | 'wiki_sl_ref_repair' | 'final_gates' | 'save' | 'provenance' | 'report';
44
44
  percent: number;
45
45
  message: string;
46
46
  transient?: boolean;
@@ -6,7 +6,7 @@ import type { CaptureSession, MemoryAction, MemoryKnowledgeSlRefsPort } from '..
6
6
  import type { PromptService } from '../prompts/index.js';
7
7
  import type { SkillsRegistryService } from '../skills/index.js';
8
8
  import type { SemanticLayerService, SlConnectionCatalogPort, SlSearchService, SlSourcesIndexPort, SlValidationDeps, SlValidatorPort } from '../sl/index.js';
9
- import type { ToolContext, ToolSession, TouchedSlSource } from '../tools/index.js';
9
+ import type { ToolContext, ToolSession } from '../tools/index.js';
10
10
  import type { KnowledgeIndexPort, KnowledgeWikiService } from '../wiki/index.js';
11
11
  import type { CanonicalPin } from './canonical-pins.js';
12
12
  import type { IngestTraceLevel } from './ingest-trace.js';
@@ -281,24 +281,6 @@ export interface CuratorPaginationPort {
281
281
  warnings: string[];
282
282
  }>;
283
283
  }
284
- export interface IngestBundlePostProcessorInput {
285
- connectionId: string;
286
- sourceKey: string;
287
- syncId: string;
288
- jobId: string;
289
- runId: string;
290
- workdir: string;
291
- parseArtifacts: unknown;
292
- }
293
- export interface IngestBundlePostProcessorResult {
294
- result?: unknown;
295
- warnings: string[];
296
- errors: string[];
297
- touchedSources: TouchedSlSource[];
298
- }
299
- export interface IngestBundlePostProcessorPort {
300
- run(input: IngestBundlePostProcessorInput): Promise<IngestBundlePostProcessorResult>;
301
- }
302
284
  export interface IngestBundleRunnerDeps {
303
285
  runs: IngestRunsPort;
304
286
  provenance: IngestProvenancePort;
@@ -332,7 +314,6 @@ export interface IngestBundleRunnerDeps {
332
314
  candidateDedup?: CandidateDedupPort;
333
315
  contextCandidateCarryforward?: ContextCandidateCarryforwardPort;
334
316
  curatorPagination?: CuratorPaginationPort;
335
- postProcessors?: Record<string, IngestBundlePostProcessorPort>;
336
317
  logger?: KtxLogger;
337
318
  }
338
319
  export interface IngestCaptureState {