@kaelio/ktx 0.1.0 → 0.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (122) hide show
  1. package/assets/python/{kaelio_ktx-0.1.0-py3-none-any.whl → kaelio_ktx-0.2.0-py3-none-any.whl} +0 -0
  2. package/assets/python/manifest.json +4 -4
  3. package/dist/admin-reindex.d.ts +15 -0
  4. package/dist/admin-reindex.js +168 -0
  5. package/dist/admin-reindex.test.js +116 -0
  6. package/dist/{dev.d.ts → admin.d.ts} +1 -1
  7. package/dist/{dev.js → admin.js} +14 -12
  8. package/dist/admin.test.d.ts +1 -0
  9. package/dist/{dev.test.js → admin.test.js} +36 -31
  10. package/dist/cli-program.js +7 -7
  11. package/dist/cli-program.test.js +1 -1
  12. package/dist/cli-runtime.d.ts +2 -0
  13. package/dist/commands/connection-commands.js +11 -10
  14. package/dist/commands/connection-selection.d.ts +11 -0
  15. package/dist/commands/connection-selection.js +9 -0
  16. package/dist/commands/ingest-commands.js +32 -26
  17. package/dist/commands/knowledge-commands.js +17 -28
  18. package/dist/commands/mcp-commands.js +17 -11
  19. package/dist/commands/setup-commands.js +14 -26
  20. package/dist/commands/sl-commands.js +27 -32
  21. package/dist/doctor.test.js +7 -8
  22. package/dist/example-smoke.test.js +3 -3
  23. package/dist/index.test.js +102 -70
  24. package/dist/ingest-depth.js +0 -1
  25. package/dist/ingest.test-utils.js +2 -2
  26. package/dist/ingest.test.js +4 -4
  27. package/dist/io/print-list.test.js +4 -4
  28. package/dist/knowledge.js +1 -1
  29. package/dist/managed-local-embeddings.d.ts +2 -0
  30. package/dist/managed-local-embeddings.js +2 -0
  31. package/dist/managed-local-embeddings.test.js +2 -0
  32. package/dist/managed-mcp-daemon.js +3 -2
  33. package/dist/managed-mcp-daemon.test.js +25 -0
  34. package/dist/managed-python-command.js +2 -2
  35. package/dist/managed-python-command.test.js +4 -3
  36. package/dist/managed-python-daemon.js +3 -2
  37. package/dist/managed-python-daemon.test.js +20 -0
  38. package/dist/managed-python-runtime.d.ts +5 -1
  39. package/dist/managed-python-runtime.js +50 -6
  40. package/dist/managed-python-runtime.test.js +53 -23
  41. package/dist/memory-flow-tui.test.js +2 -2
  42. package/dist/next-steps.d.ts +6 -6
  43. package/dist/next-steps.js +4 -4
  44. package/dist/next-steps.test.js +5 -5
  45. package/dist/print-command-tree.test.js +1 -1
  46. package/dist/proxy-env.d.ts +1 -0
  47. package/dist/proxy-env.js +23 -0
  48. package/dist/proxy-env.test.d.ts +1 -0
  49. package/dist/proxy-env.test.js +17 -0
  50. package/dist/public-ingest.js +3 -5
  51. package/dist/public-ingest.test.js +7 -3
  52. package/dist/runtime.test.js +2 -1
  53. package/dist/scan.test.js +2 -2
  54. package/dist/setup-agents.js +6 -4
  55. package/dist/setup-agents.test.js +35 -1
  56. package/dist/setup-embeddings.d.ts +1 -0
  57. package/dist/setup-embeddings.js +29 -7
  58. package/dist/setup-embeddings.test.js +49 -7
  59. package/dist/setup-models.d.ts +0 -1
  60. package/dist/setup-models.js +2 -3
  61. package/dist/setup-models.test.js +8 -10
  62. package/dist/setup-project.d.ts +9 -1
  63. package/dist/setup-project.js +52 -25
  64. package/dist/setup-project.test.js +8 -8
  65. package/dist/setup-runtime.test.js +4 -2
  66. package/dist/setup.d.ts +1 -2
  67. package/dist/setup.js +21 -5
  68. package/dist/setup.test.js +160 -43
  69. package/dist/sl.js +1 -1
  70. package/dist/sl.test.js +2 -1
  71. package/dist/standalone-smoke.test.js +8 -5
  72. package/dist/status-project.js +1 -10
  73. package/node_modules/@ktx/context/dist/index-sync/index.d.ts +2 -0
  74. package/node_modules/@ktx/context/dist/index-sync/index.js +1 -0
  75. package/node_modules/@ktx/context/dist/index-sync/reindex.d.ts +20 -0
  76. package/node_modules/@ktx/context/dist/index-sync/reindex.js +141 -0
  77. package/node_modules/@ktx/context/dist/index-sync/reindex.test.d.ts +1 -0
  78. package/node_modules/@ktx/context/dist/index-sync/reindex.test.js +139 -0
  79. package/node_modules/@ktx/context/dist/index-sync/types.d.ts +29 -0
  80. package/node_modules/@ktx/context/dist/index-sync/types.js +1 -0
  81. package/node_modules/@ktx/context/dist/index.d.ts +1 -0
  82. package/node_modules/@ktx/context/dist/index.js +1 -0
  83. package/node_modules/@ktx/context/dist/ingest/adapters/historic-sql/local-ingest-acceptance.test.js +1 -1
  84. package/node_modules/@ktx/context/dist/ingest/local-bundle-ingest.test.js +8 -8
  85. package/node_modules/@ktx/context/dist/ingest/local-bundle-runtime.js +4 -1
  86. package/node_modules/@ktx/context/dist/ingest/local-bundle-runtime.test.js +3 -3
  87. package/node_modules/@ktx/context/dist/ingest/local-embedding-provider.integration.test.js +9 -10
  88. package/node_modules/@ktx/context/dist/ingest/memory-flow/schema.d.ts +2 -2
  89. package/node_modules/@ktx/context/dist/ingest/report-snapshot.d.ts +2 -2
  90. package/node_modules/@ktx/context/dist/llm/local-config.js +2 -15
  91. package/node_modules/@ktx/context/dist/llm/local-config.test.js +3 -7
  92. package/node_modules/@ktx/context/dist/memory/local-memory.js +9 -3
  93. package/node_modules/@ktx/context/dist/project/config.d.ts +0 -5
  94. package/node_modules/@ktx/context/dist/project/config.js +5 -5
  95. package/node_modules/@ktx/context/dist/project/config.test.js +4 -7
  96. package/node_modules/@ktx/context/dist/scan/enrichment-state.test.js +4 -4
  97. package/node_modules/@ktx/context/dist/scan/index.d.ts +1 -1
  98. package/node_modules/@ktx/context/dist/scan/local-enrichment.d.ts +2 -6
  99. package/node_modules/@ktx/context/dist/scan/local-enrichment.js +31 -47
  100. package/node_modules/@ktx/context/dist/scan/local-enrichment.test.js +35 -18
  101. package/node_modules/@ktx/context/dist/scan/local-scan.test.js +2 -3
  102. package/node_modules/@ktx/context/dist/sl/ports.d.ts +3 -3
  103. package/node_modules/@ktx/context/dist/sl/sl-search.service.d.ts +3 -2
  104. package/node_modules/@ktx/context/dist/sl/sl-search.service.js +47 -45
  105. package/node_modules/@ktx/context/dist/sl/sl-search.service.test.js +61 -0
  106. package/node_modules/@ktx/context/dist/sl/sqlite-sl-sources-index.d.ts +4 -3
  107. package/node_modules/@ktx/context/dist/sl/sqlite-sl-sources-index.js +15 -5
  108. package/node_modules/@ktx/context/dist/sl/sqlite-sl-sources-index.test.js +24 -0
  109. package/node_modules/@ktx/context/dist/wiki/knowledge-wiki.service.d.ts +3 -2
  110. package/node_modules/@ktx/context/dist/wiki/knowledge-wiki.service.js +62 -51
  111. package/node_modules/@ktx/context/dist/wiki/knowledge-wiki.service.test.js +59 -3
  112. package/node_modules/@ktx/context/dist/wiki/ports.d.ts +3 -3
  113. package/node_modules/@ktx/context/dist/wiki/sqlite-knowledge-index.d.ts +33 -0
  114. package/node_modules/@ktx/context/dist/wiki/sqlite-knowledge-index.js +155 -2
  115. package/node_modules/@ktx/context/dist/wiki/sqlite-knowledge-index.test.js +26 -0
  116. package/node_modules/@ktx/context/package.json +5 -0
  117. package/node_modules/@ktx/llm/dist/embedding-provider.d.ts +0 -7
  118. package/node_modules/@ktx/llm/dist/embedding-provider.js +12 -138
  119. package/node_modules/@ktx/llm/dist/embedding-provider.test.js +10 -25
  120. package/node_modules/@ktx/llm/dist/types.d.ts +1 -1
  121. package/package.json +1 -1
  122. /package/dist/{dev.test.d.ts → admin-reindex.test.d.ts} +0 -0
@@ -69,7 +69,6 @@ declare const llmSchema: z.ZodObject<{
69
69
  }, z.core.$strict>;
70
70
  declare const embeddingSchema: z.ZodObject<{
71
71
  backend: z.ZodDefault<z.ZodEnum<{
72
- deterministic: "deterministic";
73
72
  none: "none";
74
73
  openai: "openai";
75
74
  "sentence-transformers": "sentence-transformers";
@@ -102,7 +101,6 @@ declare const scanEnrichmentSchema: z.ZodObject<{
102
101
  }>>;
103
102
  embeddings: z.ZodOptional<z.ZodObject<{
104
103
  backend: z.ZodDefault<z.ZodEnum<{
105
- deterministic: "deterministic";
106
104
  none: "none";
107
105
  openai: "openai";
108
106
  "sentence-transformers": "sentence-transformers";
@@ -141,7 +139,6 @@ declare const scanSchema: z.ZodObject<{
141
139
  }>>;
142
140
  embeddings: z.ZodOptional<z.ZodObject<{
143
141
  backend: z.ZodDefault<z.ZodEnum<{
144
- deterministic: "deterministic";
145
142
  none: "none";
146
143
  openai: "openai";
147
144
  "sentence-transformers": "sentence-transformers";
@@ -462,7 +459,6 @@ declare const ktxProjectConfigSchema: z.ZodObject<{
462
459
  adapters: z.ZodDefault<z.ZodArray<z.ZodString>>;
463
460
  embeddings: z.ZodPrefault<z.ZodObject<{
464
461
  backend: z.ZodDefault<z.ZodEnum<{
465
- deterministic: "deterministic";
466
462
  none: "none";
467
463
  openai: "openai";
468
464
  "sentence-transformers": "sentence-transformers";
@@ -507,7 +503,6 @@ declare const ktxProjectConfigSchema: z.ZodObject<{
507
503
  }>>;
508
504
  embeddings: z.ZodOptional<z.ZodObject<{
509
505
  backend: z.ZodDefault<z.ZodEnum<{
510
- deterministic: "deterministic";
511
506
  none: "none";
512
507
  openai: "openai";
513
508
  "sentence-transformers": "sentence-transformers";
@@ -3,7 +3,7 @@ import YAML from 'yaml';
3
3
  import * as z from 'zod';
4
4
  import { connectionConfigSchema } from './driver-schemas.js';
5
5
  const KTX_LLM_BACKENDS = ['none', 'anthropic', 'vertex', 'gateway', 'claude-code'];
6
- const KTX_EMBEDDING_BACKENDS = ['none', 'deterministic', 'openai', 'sentence-transformers'];
6
+ const KTX_EMBEDDING_BACKENDS = ['none', 'openai', 'sentence-transformers'];
7
7
  const KTX_PROMPT_CACHE_TTLS = ['5m', '1h'];
8
8
  const KTX_ENRICHMENT_MODES = ['none', 'deterministic', 'llm'];
9
9
  const KTX_WORK_UNIT_FAILURE_MODES = ['abort', 'continue'];
@@ -69,9 +69,9 @@ const embeddingSchema = z
69
69
  .strictObject({
70
70
  backend: z
71
71
  .enum(KTX_EMBEDDING_BACKENDS)
72
- .default('deterministic')
73
- .describe('Embedding backend. "deterministic" is a built-in hash-based vector for offline use; "openai" and "sentence-transformers" call out to those providers; "none" disables embeddings.'),
74
- model: z.string().min(1).optional().describe('Provider-specific embedding model identifier (e.g. "text-embedding-3-small"). Ignored by the "deterministic" backend.'),
72
+ .default('none')
73
+ .describe('Embedding backend. "openai" and "sentence-transformers" call out to those providers; "none" disables embeddings.'),
74
+ model: z.string().min(1).optional().describe('Provider-specific embedding model identifier (e.g. "text-embedding-3-small").'),
75
75
  dimensions: z.int().positive().default(8).describe('Embedding vector dimensionality. Must match the chosen model when using a real provider.'),
76
76
  openai: apiCredentialsSchema.optional().describe('OpenAI credentials, used when backend is "openai".'),
77
77
  sentenceTransformers: sentenceTransformersSchema.optional().describe('Sentence-transformers server config, used when backend is "sentence-transformers".'),
@@ -95,7 +95,7 @@ const ingestSchema = z
95
95
  .default([])
96
96
  .describe('Ingest adapter identifiers to run (e.g. "metabase", "looker", "historic-sql"). Empty array means no adapters are run.'),
97
97
  embeddings: embeddingSchema
98
- .prefault({ backend: 'deterministic', model: 'deterministic' })
98
+ .prefault({ backend: 'none' })
99
99
  .describe('Embedding configuration used when ingest adapters need to embed documents.'),
100
100
  workUnits: workUnitsSchema.prefault({}).describe('Concurrency and failure handling for ingest work units.'),
101
101
  })
@@ -32,8 +32,7 @@ connections:
32
32
  ingest: {
33
33
  adapters: [],
34
34
  embeddings: {
35
- backend: 'deterministic',
36
- model: 'deterministic',
35
+ backend: 'none',
37
36
  dimensions: 8,
38
37
  },
39
38
  workUnits: {
@@ -75,11 +74,10 @@ connections:
75
74
  const parsed = parseKtxProjectConfig(serialized);
76
75
  expect(serialized).not.toContain('project:');
77
76
  expect(serialized).not.toContain('live-database');
78
- expect(serialized).toContain(' embeddings:\n backend: deterministic\n model: deterministic\n dimensions: 8');
77
+ expect(serialized).toContain(' embeddings:\n backend: none\n dimensions: 8');
79
78
  expect(parsed.ingest.adapters).toEqual([]);
80
79
  expect(parsed.ingest.embeddings).toEqual({
81
- backend: 'deterministic',
82
- model: 'deterministic',
80
+ backend: 'none',
83
81
  dimensions: 8,
84
82
  });
85
83
  });
@@ -350,8 +348,7 @@ scan:
350
348
  const config = parseKtxProjectConfig('{}\n');
351
349
  expect(config).toEqual(buildDefaultKtxProjectConfig());
352
350
  expect(config.ingest.embeddings).toEqual({
353
- backend: 'deterministic',
354
- model: 'deterministic',
351
+ backend: 'none',
355
352
  dimensions: 8,
356
353
  });
357
354
  });
@@ -48,13 +48,13 @@ describe('scan enrichment state', () => {
48
48
  snapshot,
49
49
  mode: 'enriched',
50
50
  detectRelationships: true,
51
- providerIdentity: { provider: 'deterministic', embeddingDimensions: 8, llmModel: 'a' },
51
+ providerIdentity: { provider: 'local-heuristic', llmModel: 'a' },
52
52
  });
53
53
  const second = computeKtxScanEnrichmentInputHash({
54
54
  snapshot: { ...snapshot, metadata: {} },
55
55
  mode: 'enriched',
56
56
  detectRelationships: true,
57
- providerIdentity: { llmModel: 'a', embeddingDimensions: 8, provider: 'deterministic' },
57
+ providerIdentity: { llmModel: 'a', provider: 'local-heuristic' },
58
58
  });
59
59
  const firstTable = snapshot.tables[0];
60
60
  if (!firstTable) {
@@ -64,7 +64,7 @@ describe('scan enrichment state', () => {
64
64
  snapshot: { ...snapshot, tables: [{ ...firstTable, name: 'orders_v2' }] },
65
65
  mode: 'enriched',
66
66
  detectRelationships: true,
67
- providerIdentity: { provider: 'deterministic', embeddingDimensions: 8, llmModel: 'a' },
67
+ providerIdentity: { provider: 'local-heuristic', llmModel: 'a' },
68
68
  });
69
69
  expect(first).toMatch(/^[a-f0-9]{64}$/);
70
70
  expect(second).toBe(first);
@@ -75,7 +75,7 @@ describe('scan enrichment state', () => {
75
75
  snapshot,
76
76
  mode: 'enriched',
77
77
  detectRelationships: true,
78
- providerIdentity: { provider: 'deterministic', embeddingDimensions: 8 },
78
+ providerIdentity: { provider: 'local-heuristic' },
79
79
  });
80
80
  await store.saveCompletedStage({
81
81
  runId: 'scan-run-1',
@@ -13,7 +13,7 @@ export { createKtxEntityDetailsService } from './entity-details.js';
13
13
  export type { DisplayTargetResolution, RawSchemaHit, TableDetail, WarehouseCatalogServiceDeps, } from './warehouse-catalog.js';
14
14
  export { WarehouseCatalogService } from './warehouse-catalog.js';
15
15
  export type { KtxColumnSampleUpdate, KtxDescriptionSource, KtxDescriptionUpdate, KtxEmbeddingUpdate, KtxEnrichedColumn, KtxEnrichedRelationship, KtxEnrichedSchema, KtxEnrichedTable, KtxRelationshipEndpoint, KtxRelationshipSource, KtxRelationshipType, KtxRelationshipUpdate, KtxScanMetadataStore, KtxSkippedRelationship, KtxStructuralSyncPlan, } from './enrichment-types.js';
16
- export type { DeterministicLocalScanEnrichmentProviderOptions, KtxLocalScanEnrichmentInput, KtxLocalScanEnrichmentProviders, KtxLocalScanEnrichmentResult, } from './local-enrichment.js';
16
+ export type { KtxLocalScanEnrichmentInput, KtxLocalScanEnrichmentProviders, KtxLocalScanEnrichmentResult, } from './local-enrichment.js';
17
17
  export { createDeterministicLocalScanEnrichmentProviders, runLocalScanEnrichment, snapshotToKtxEnrichedSchema, } from './local-enrichment.js';
18
18
  export type { WriteLocalScanEnrichmentArtifactsInput, WriteLocalScanEnrichmentArtifactsResult, WriteLocalScanManifestShardsInput, WriteLocalScanManifestShardsResult, } from './local-enrichment-artifacts.js';
19
19
  export { writeLocalScanEnrichmentArtifacts, writeLocalScanManifestShards, } from './local-enrichment-artifacts.js';
@@ -6,13 +6,9 @@ import type { KtxCompositeRelationshipCandidate } from './relationship-composite
6
6
  import type { KtxResolvedRelationshipDiscoveryCandidate } from './relationship-graph-resolver.js';
7
7
  import type { KtxRelationshipProfileArtifact } from './relationship-profiling.js';
8
8
  import type { KtxEmbeddingPort, KtxScanConnector, KtxScanContext, KtxScanEnrichmentStateSummary, KtxScanEnrichmentSummary, KtxScanMode, KtxScanRelationshipSummary, KtxScanWarning, KtxSchemaSnapshot, KtxTableRef } from './types.js';
9
- export interface DeterministicLocalScanEnrichmentProviderOptions {
10
- embeddingDimensions?: number;
11
- maxBatchSize?: number;
12
- }
13
9
  export interface KtxLocalScanEnrichmentProviders {
14
10
  llmRuntime: KtxLlmRuntimePort;
15
- embedding: KtxEmbeddingPort;
11
+ embedding?: KtxEmbeddingPort | null;
16
12
  }
17
13
  export interface KtxLocalScanEnrichmentInput {
18
14
  connectionId: string;
@@ -44,6 +40,6 @@ export interface KtxLocalScanEnrichmentResult {
44
40
  resolvedRelationships: KtxResolvedRelationshipDiscoveryCandidate[] | null;
45
41
  compositeRelationships: KtxCompositeRelationshipCandidate[] | null;
46
42
  }
47
- export declare function createDeterministicLocalScanEnrichmentProviders(options?: DeterministicLocalScanEnrichmentProviderOptions): KtxLocalScanEnrichmentProviders;
43
+ export declare function createDeterministicLocalScanEnrichmentProviders(): KtxLocalScanEnrichmentProviders;
48
44
  export declare function snapshotToKtxEnrichedSchema(snapshot: KtxSchemaSnapshot, embeddingsByColumnId?: ReadonlyMap<string, number[]>): KtxEnrichedSchema;
49
45
  export declare function runLocalScanEnrichment(input: KtxLocalScanEnrichmentInput): Promise<KtxLocalScanEnrichmentResult>;
@@ -76,28 +76,9 @@ function providerlessEnrichedWarning(relationshipDetection) {
76
76
  },
77
77
  };
78
78
  }
79
- function hashEmbedding(text, dimensions) {
80
- const values = Array.from({ length: dimensions }, (_, index) => {
81
- let hash = index + 17;
82
- for (const char of text) {
83
- hash = (hash * 31 + char.charCodeAt(0) + index) % 1009;
84
- }
85
- return Number(((hash % 200) / 100 - 1).toFixed(4));
86
- });
87
- return values;
88
- }
89
- export function createDeterministicLocalScanEnrichmentProviders(options = {}) {
90
- const dimensions = options.embeddingDimensions ?? 8;
91
- const maxBatchSize = options.maxBatchSize ?? 64;
79
+ export function createDeterministicLocalScanEnrichmentProviders() {
92
80
  return {
93
81
  llmRuntime: deterministicLlmRuntime(),
94
- embedding: {
95
- dimensions,
96
- maxBatchSize,
97
- async embedBatch(texts) {
98
- return texts.map((text) => hashEmbedding(text, dimensions));
99
- },
100
- },
101
82
  };
102
83
  }
103
84
  function deterministicLlmRuntime() {
@@ -262,7 +243,7 @@ async function buildEmbeddings(input) {
262
243
  }
263
244
  }
264
245
  const embeddings = [];
265
- const maxBatchSize = embeddingBatchSize(input.providers.embedding.maxBatchSize);
246
+ const maxBatchSize = embeddingBatchSize(input.embedding.maxBatchSize);
266
247
  const embeddingTexts = texts.map((item) => item.text);
267
248
  const batchCount = Math.ceil(embeddingTexts.length / maxBatchSize);
268
249
  if (batchCount === 0) {
@@ -274,7 +255,7 @@ async function buildEmbeddings(input) {
274
255
  transient: true,
275
256
  });
276
257
  const batch = embeddingTexts.slice(offset, offset + maxBatchSize);
277
- const batchEmbeddings = await input.providers.embedding.embedBatch(batch);
258
+ const batchEmbeddings = await input.embedding.embedBatch(batch);
278
259
  if (batchEmbeddings.length !== batch.length) {
279
260
  throw new Error(`expected ${batch.length} embeddings, received ${batchEmbeddings.length}`);
280
261
  }
@@ -395,34 +376,37 @@ export async function runLocalScanEnrichment(input) {
395
376
  warnings,
396
377
  }),
397
378
  });
398
- const embeddingProgress = progress?.startPhase(0.2);
399
- embeddingUpdates = await runEnrichmentStage({
400
- stateStore: input.stateStore,
401
- runId: input.context.runId,
402
- connectionId: input.connectionId,
403
- syncId,
404
- mode: input.mode,
405
- stage: 'embeddings',
406
- inputHash,
407
- now,
408
- resumedStages: state.resumedStages,
409
- completedStages: state.completedStages,
410
- failedStages: state.failedStages,
411
- compute: async () => {
412
- const embeddings = await buildEmbeddings({
413
- snapshot,
414
- providers,
415
- descriptions,
416
- progress: embeddingProgress,
417
- });
418
- return embeddings.updates;
419
- },
420
- });
421
- schema = snapshotToKtxEnrichedSchema(snapshot, embeddingsByColumnId(embeddingUpdates));
422
379
  summary.dataDictionary = input.connector.sampleColumn ? 'completed' : 'skipped';
423
380
  summary.tableDescriptions = 'completed';
424
381
  summary.columnDescriptions = 'completed';
425
- summary.embeddings = 'completed';
382
+ const embeddingProgress = progress?.startPhase(0.2);
383
+ const embedding = providers.embedding;
384
+ if (embedding) {
385
+ embeddingUpdates = await runEnrichmentStage({
386
+ stateStore: input.stateStore,
387
+ runId: input.context.runId,
388
+ connectionId: input.connectionId,
389
+ syncId,
390
+ mode: input.mode,
391
+ stage: 'embeddings',
392
+ inputHash,
393
+ now,
394
+ resumedStages: state.resumedStages,
395
+ completedStages: state.completedStages,
396
+ failedStages: state.failedStages,
397
+ compute: async () => {
398
+ const embeddings = await buildEmbeddings({
399
+ snapshot,
400
+ embedding,
401
+ descriptions,
402
+ progress: embeddingProgress,
403
+ });
404
+ return embeddings.updates;
405
+ },
406
+ });
407
+ schema = snapshotToKtxEnrichedSchema(snapshot, embeddingsByColumnId(embeddingUpdates));
408
+ summary.embeddings = 'completed';
409
+ }
426
410
  }
427
411
  let relationshipUpdate = null;
428
412
  let relationshipProfile = null;
@@ -4,6 +4,15 @@ import { buildDefaultKtxProjectConfig } from '../project/config.js';
4
4
  import { createDeterministicLocalScanEnrichmentProviders, runLocalScanEnrichment, snapshotToKtxEnrichedSchema, } from './local-enrichment.js';
5
5
  import { createLocalScanEnrichmentProvidersFromConfig } from './local-scan.js';
6
6
  import { createKtxConnectorCapabilities, } from './types.js';
7
+ function fakeScanEmbedding(options) {
8
+ return {
9
+ dimensions: options.dimensions,
10
+ maxBatchSize: options.maxBatchSize ?? 64,
11
+ async embedBatch(texts) {
12
+ return texts.map((_, textIndex) => Array.from({ length: options.dimensions }, (__, dimensionIndex) => textIndex + dimensionIndex));
13
+ },
14
+ };
15
+ }
7
16
  const snapshot = {
8
17
  connectionId: 'warehouse',
9
18
  driver: 'postgres',
@@ -317,7 +326,7 @@ describe('local scan enrichment', () => {
317
326
  }
318
327
  });
319
328
  it('honors scan relationship config when LLM proposals are disabled', async () => {
320
- const providers = createDeterministicLocalScanEnrichmentProviders({ embeddingDimensions: 3 });
329
+ const providers = createDeterministicLocalScanEnrichmentProviders();
321
330
  const generateObject = vi.fn();
322
331
  const result = await runLocalScanEnrichment({
323
332
  connectionId: 'warehouse',
@@ -381,7 +390,7 @@ describe('local scan enrichment', () => {
381
390
  detectRelationships: false,
382
391
  connector: failingConnector,
383
392
  context: { runId: 'scan-run-warnings', logger },
384
- providers: createDeterministicLocalScanEnrichmentProviders({ embeddingDimensions: 6 }),
393
+ providers: createDeterministicLocalScanEnrichmentProviders(),
385
394
  });
386
395
  const codes = result.warnings.map((warning) => warning.code);
387
396
  expect(codes).toContain('sampling_failed');
@@ -394,24 +403,23 @@ describe('local scan enrichment', () => {
394
403
  // Sampling was retried 3× for each of the 2 tables = 6 calls
395
404
  expect(failingConnector.sampleTable).toHaveBeenCalledTimes(6);
396
405
  });
397
- it('runs configured deterministic enrichment with descriptions and embeddings', async () => {
406
+ it('runs configured deterministic enrichment with descriptions and no embeddings', async () => {
398
407
  const result = await runLocalScanEnrichment({
399
408
  connectionId: 'warehouse',
400
409
  mode: 'enriched',
401
410
  detectRelationships: true,
402
411
  connector: connector(),
403
412
  context: { runId: 'scan-run-2' },
404
- providers: createDeterministicLocalScanEnrichmentProviders({ embeddingDimensions: 6 }),
413
+ providers: createDeterministicLocalScanEnrichmentProviders(),
405
414
  });
406
415
  expect(result.summary).toMatchObject({
407
416
  dataDictionary: 'completed',
408
417
  tableDescriptions: 'completed',
409
418
  columnDescriptions: 'completed',
410
- embeddings: 'completed',
419
+ embeddings: 'skipped',
411
420
  deterministicRelationships: 'completed',
412
421
  });
413
- expect(result.embeddingUpdates).toHaveLength(3);
414
- expect(result.embeddingUpdates[0]?.embedding).toHaveLength(6);
422
+ expect(result.embeddingUpdates).toEqual([]);
415
423
  expect(result.snapshot).toEqual(snapshot);
416
424
  expect(result.relationships).toEqual({ accepted: 0, review: 1, rejected: 0, skipped: 0 });
417
425
  });
@@ -470,7 +478,7 @@ describe('local scan enrichment', () => {
470
478
  mode: 'enriched',
471
479
  connector: scanConnector,
472
480
  context: { runId: 'scan-run-concurrent-descriptions' },
473
- providers: createDeterministicLocalScanEnrichmentProviders({ embeddingDimensions: 3 }),
481
+ providers: createDeterministicLocalScanEnrichmentProviders(),
474
482
  relationshipSettings: settings,
475
483
  });
476
484
  expect(maxActiveColumnSamples).toBe(6);
@@ -491,7 +499,10 @@ describe('local scan enrichment', () => {
491
499
  detectRelationships: true,
492
500
  connector: connector(),
493
501
  context: { runId: 'scan-run-progress', progress },
494
- providers: createDeterministicLocalScanEnrichmentProviders({ embeddingDimensions: 6 }),
502
+ providers: {
503
+ ...createDeterministicLocalScanEnrichmentProviders(),
504
+ embedding: fakeScanEmbedding({ dimensions: 6 }),
505
+ },
495
506
  });
496
507
  expect(events).toEqual(expect.arrayContaining([
497
508
  expect.objectContaining({ message: 'Generating descriptions 1/2 tables', transient: true }),
@@ -555,7 +566,7 @@ describe('local scan enrichment', () => {
555
566
  ...connector(),
556
567
  introspect: vi.fn(async () => manyColumnSnapshot),
557
568
  };
558
- const deterministicProviders = createDeterministicLocalScanEnrichmentProviders({ embeddingDimensions: 3 });
569
+ const deterministicProviders = createDeterministicLocalScanEnrichmentProviders();
559
570
  const embedBatch = vi.fn(async (texts) => {
560
571
  if (texts.length > 2) {
561
572
  throw new Error(`Embedding batch size ${texts.length} exceeds maximum 2`);
@@ -583,7 +594,10 @@ describe('local scan enrichment', () => {
583
594
  it('reuses completed description and embedding stages for the same run id and snapshot hash', async () => {
584
595
  const stateStore = memoryEnrichmentStateStore();
585
596
  const scanConnector = connector();
586
- const providers = createDeterministicLocalScanEnrichmentProviders({ embeddingDimensions: 6 });
597
+ const providers = {
598
+ ...createDeterministicLocalScanEnrichmentProviders(),
599
+ embedding: fakeScanEmbedding({ dimensions: 6 }),
600
+ };
587
601
  const first = await runLocalScanEnrichment({
588
602
  connectionId: 'warehouse',
589
603
  mode: 'enriched',
@@ -593,7 +607,7 @@ describe('local scan enrichment', () => {
593
607
  providers,
594
608
  stateStore,
595
609
  syncId: 'sync-resume-1',
596
- providerIdentity: { provider: 'deterministic', embeddingDimensions: 6 },
610
+ providerIdentity: { provider: 'fake', embeddingDimensions: 6 },
597
611
  });
598
612
  const generateText = vi.spyOn(providers.llmRuntime, 'generateText');
599
613
  const embedBatch = vi.spyOn(providers.embedding, 'embedBatch');
@@ -606,7 +620,7 @@ describe('local scan enrichment', () => {
606
620
  providers,
607
621
  stateStore,
608
622
  syncId: 'sync-resume-1',
609
- providerIdentity: { provider: 'deterministic', embeddingDimensions: 6 },
623
+ providerIdentity: { provider: 'fake', embeddingDimensions: 6 },
610
624
  });
611
625
  expect(first.state.completedStages).toEqual(['descriptions', 'embeddings', 'relationships']);
612
626
  expect(first.state.resumedStages).toEqual([]);
@@ -620,7 +634,10 @@ describe('local scan enrichment', () => {
620
634
  });
621
635
  it('does not reuse completed stages when the snapshot changes', async () => {
622
636
  const stateStore = memoryEnrichmentStateStore();
623
- const providers = createDeterministicLocalScanEnrichmentProviders({ embeddingDimensions: 6 });
637
+ const providers = {
638
+ ...createDeterministicLocalScanEnrichmentProviders(),
639
+ embedding: fakeScanEmbedding({ dimensions: 6 }),
640
+ };
624
641
  const scanConnector = connector();
625
642
  await runLocalScanEnrichment({
626
643
  connectionId: 'warehouse',
@@ -631,7 +648,7 @@ describe('local scan enrichment', () => {
631
648
  providers,
632
649
  stateStore,
633
650
  syncId: 'sync-resume-hash',
634
- providerIdentity: { provider: 'deterministic', embeddingDimensions: 6 },
651
+ providerIdentity: { provider: 'fake', embeddingDimensions: 6 },
635
652
  });
636
653
  const firstTable = snapshot.tables[0];
637
654
  if (!firstTable) {
@@ -654,7 +671,7 @@ describe('local scan enrichment', () => {
654
671
  providers,
655
672
  stateStore,
656
673
  syncId: 'sync-resume-hash',
657
- providerIdentity: { provider: 'deterministic', embeddingDimensions: 6 },
674
+ providerIdentity: { provider: 'fake', embeddingDimensions: 6 },
658
675
  });
659
676
  expect(result.state.resumedStages).toEqual([]);
660
677
  expect(result.state.completedStages).toEqual(['descriptions', 'embeddings', 'relationships']);
@@ -749,8 +766,8 @@ describe('local scan enrichment', () => {
749
766
  createKtxEmbeddingProvider: createKtxEmbeddingProvider,
750
767
  env: { OPENAI_API_KEY: 'openai-key' }, // pragma: allowlist secret
751
768
  });
752
- expect(providers?.embedding.dimensions).toBe(1536);
753
- expect(providers?.embedding.maxBatchSize).toBe(8);
769
+ expect(providers?.embedding?.dimensions).toBe(1536);
770
+ expect(providers?.embedding?.maxBatchSize).toBe(8);
754
771
  expect(createKtxLlmProvider).toHaveBeenCalledWith(expect.objectContaining({ backend: 'gateway', modelSlots: { default: 'provider/language-model' } }));
755
772
  expect(createKtxEmbeddingProvider).toHaveBeenCalledWith(expect.objectContaining({ backend: 'openai', model: 'provider/embedding-model' }));
756
773
  });
@@ -916,7 +916,7 @@ describe('local scan', () => {
916
916
  expect(persistedReport).toContain('postgres://reader:<redacted>@example.test/db');
917
917
  expect(persistedReport).not.toContain('postgres://reader:secret@example.test/db'); // pragma: allowlist secret
918
918
  });
919
- it('runs enriched scans when deterministic standalone enrichment is configured', async () => {
919
+ it('runs enriched scans when deterministic standalone enrichment is configured without embeddings', async () => {
920
920
  await writeFile(join(project.projectDir, 'ktx.yaml'), [
921
921
  'connections:',
922
922
  ' warehouse:',
@@ -995,10 +995,9 @@ describe('local scan', () => {
995
995
  expect(result.report.mode).toBe('enriched');
996
996
  expect(result.report.enrichment.tableDescriptions).toBe('completed');
997
997
  expect(result.report.enrichment.columnDescriptions).toBe('completed');
998
- expect(result.report.enrichment.embeddings).toBe('completed');
998
+ expect(result.report.enrichment.embeddings).toBe('skipped');
999
999
  expect(result.report.artifactPaths.enrichmentArtifacts).toEqual([
1000
1000
  'raw-sources/warehouse/live-database/2026-04-29-091500-scan-enriched-1/enrichment/descriptions.json',
1001
- 'raw-sources/warehouse/live-database/2026-04-29-091500-scan-enriched-1/enrichment/embeddings.json',
1002
1001
  'raw-sources/warehouse/live-database/2026-04-29-091500-scan-enriched-1/enrichment/relationships.json',
1003
1002
  'raw-sources/warehouse/live-database/2026-04-29-091500-scan-enriched-1/enrichment/relationship-profile.json',
1004
1003
  'raw-sources/warehouse/live-database/2026-04-29-091500-scan-enriched-1/enrichment/relationship-diagnostics.json',
@@ -50,9 +50,9 @@ export interface SlSourcesIndexPort {
50
50
  searchText: string;
51
51
  hasEmbedding: boolean;
52
52
  }>>;
53
- deleteStale(connectionId: string, keepNames: string[]): Promise<void>;
54
- deleteByConnection(connectionId: string): Promise<void>;
55
- deleteByConnectionAndName(connectionId: string, sourceName: string): Promise<void>;
53
+ deleteStale(connectionId: string, keepNames: string[]): Promise<number>;
54
+ deleteByConnection(connectionId: string): Promise<number>;
55
+ deleteByConnectionAndName(connectionId: string, sourceName: string): Promise<number>;
56
56
  search(connectionId: string, queryEmbedding: number[] | null, queryText: string, limit: number, minRrfScore?: number): Promise<Array<{
57
57
  sourceName: string;
58
58
  rrfScore: number;
@@ -1,4 +1,5 @@
1
1
  import type { KtxEmbeddingPort, KtxLogger } from '../core/index.js';
2
+ import type { ReindexWorkResult } from '../index-sync/types.js';
2
3
  import type { SlSourcesIndexPort } from './ports.js';
3
4
  import type { SemanticLayerSource } from './types.js';
4
5
  export declare function buildSemanticLayerSourceSearchText(source: SemanticLayerSource, priority?: string[]): string;
@@ -6,8 +7,8 @@ export declare class SlSearchService {
6
7
  private readonly embeddingService;
7
8
  private readonly slSourcesRepository;
8
9
  private readonly logger;
9
- constructor(embeddingService: KtxEmbeddingPort, slSourcesRepository: SlSourcesIndexPort, logger?: KtxLogger);
10
- indexSources(connectionId: string, sources: SemanticLayerSource[]): Promise<void>;
10
+ constructor(embeddingService: KtxEmbeddingPort | null, slSourcesRepository: SlSourcesIndexPort, logger?: KtxLogger);
11
+ indexSources(connectionId: string, sources: SemanticLayerSource[]): Promise<ReindexWorkResult>;
11
12
  search(connectionId: string, query: string, limit?: number, minRrfScore?: number): Promise<Array<{
12
13
  sourceName: string;
13
14
  score: number;
@@ -84,64 +84,66 @@ export class SlSearchService {
84
84
  this.logger = logger;
85
85
  }
86
86
  async indexSources(connectionId, sources) {
87
+ const existing = await this.slSourcesRepository.getExistingSearchTexts(connectionId);
87
88
  if (sources.length === 0) {
88
- await this.slSourcesRepository.deleteByConnection(connectionId);
89
- return;
89
+ const deleted = await this.slSourcesRepository.deleteByConnection(connectionId);
90
+ return { scanned: 0, updated: 0, deleted, embeddingsRecomputed: 0, embeddingsFailed: 0 };
90
91
  }
91
- // Detect which sources actually changed by comparing search_text
92
- const existing = await this.slSourcesRepository.getExistingSearchTexts(connectionId);
93
92
  const searchTexts = sources.map((s) => this.buildSearchText(s));
93
+ const embeddingService = this.embeddingService;
94
94
  const changedIndices = [];
95
- for (let i = 0; i < sources.length; i++) {
96
- const prev = existing.get(sources[i].name);
97
- if (!prev || prev.searchText !== searchTexts[i] || !prev.hasEmbedding) {
95
+ for (let i = 0; i < sources.length; i += 1) {
96
+ const previous = existing.get(sources[i].name);
97
+ if (!previous ||
98
+ previous.searchText !== searchTexts[i] ||
99
+ (embeddingService !== null && !previous.hasEmbedding)) {
98
100
  changedIndices.push(i);
99
101
  }
100
102
  }
101
- if (changedIndices.length === 0) {
102
- // Still clean up stale sources even if nothing changed
103
- const keepNames = sources.map((s) => s.name);
104
- await this.slSourcesRepository.deleteStale(connectionId, keepNames);
105
- this.logger.log(`SL sources for connection ${connectionId}: all ${sources.length} up to date, 0 reindexed`);
106
- return;
107
- }
108
- // Compute embeddings only for changed sources
109
- const changedTexts = changedIndices.map((i) => searchTexts[i]);
110
- let changedEmbeddings;
111
- try {
112
- const batchSize = this.embeddingService.maxBatchSize;
113
- const allEmbeddings = [];
114
- for (let i = 0; i < changedTexts.length; i += batchSize) {
115
- const batch = changedTexts.slice(i, i + batchSize);
116
- const batchEmbeddings = await this.embeddingService.computeEmbeddingsBulk(batch);
117
- allEmbeddings.push(...batchEmbeddings);
103
+ let changedEmbeddings = changedIndices.map(() => null);
104
+ let embeddingsRecomputed = 0;
105
+ let embeddingsFailed = 0;
106
+ if (embeddingService && changedIndices.length > 0) {
107
+ try {
108
+ const changedTexts = changedIndices.map((index) => searchTexts[index]);
109
+ const allEmbeddings = [];
110
+ for (let i = 0; i < changedTexts.length; i += embeddingService.maxBatchSize) {
111
+ const batch = changedTexts.slice(i, i + embeddingService.maxBatchSize);
112
+ allEmbeddings.push(...(await embeddingService.computeEmbeddingsBulk(batch)));
113
+ }
114
+ changedEmbeddings = allEmbeddings;
115
+ embeddingsRecomputed = allEmbeddings.length;
116
+ }
117
+ catch (error) {
118
+ this.logger.warn(`Failed to compute SL source embeddings: ${error instanceof Error ? error.message : String(error)}`);
119
+ embeddingsFailed = changedIndices.length;
118
120
  }
119
- changedEmbeddings = allEmbeddings;
120
- }
121
- catch (error) {
122
- this.logger.warn(`Failed to compute SL source embeddings: ${error instanceof Error ? error.message : String(error)}`);
123
- changedEmbeddings = changedIndices.map(() => null);
124
121
  }
125
- const rows = changedIndices.map((srcIdx, i) => {
126
- return {
127
- sourceName: sources[srcIdx].name,
128
- searchText: searchTexts[srcIdx],
129
- embedding: changedEmbeddings[i],
130
- };
131
- });
122
+ const rows = changedIndices.map((sourceIndex, embeddingIndex) => ({
123
+ sourceName: sources[sourceIndex].name,
124
+ searchText: searchTexts[sourceIndex],
125
+ embedding: changedEmbeddings[embeddingIndex] ?? null,
126
+ }));
132
127
  await this.slSourcesRepository.upsertSources(connectionId, rows);
133
- // Remove sources that no longer exist in YAML
134
- const keepNames = sources.map((s) => s.name);
135
- await this.slSourcesRepository.deleteStale(connectionId, keepNames);
136
- this.logger.log(`SL sources for connection ${connectionId}: ${changedIndices.length}/${sources.length} reindexed, ${sources.length - changedIndices.length} unchanged`);
128
+ const keepNames = sources.map((source) => source.name);
129
+ const deleted = await this.slSourcesRepository.deleteStale(connectionId, keepNames);
130
+ return {
131
+ scanned: sources.length,
132
+ updated: changedIndices.length,
133
+ deleted,
134
+ embeddingsRecomputed,
135
+ embeddingsFailed,
136
+ };
137
137
  }
138
138
  async search(connectionId, query, limit = 15, minRrfScore = 0) {
139
139
  let queryEmbedding = null;
140
- try {
141
- queryEmbedding = await this.embeddingService.computeEmbedding(query);
142
- }
143
- catch (error) {
144
- this.logger.warn(`Failed to compute query embedding, falling back to FTS + trigram: ${error instanceof Error ? error.message : String(error)}`);
140
+ if (this.embeddingService) {
141
+ try {
142
+ queryEmbedding = await this.embeddingService.computeEmbedding(query);
143
+ }
144
+ catch (error) {
145
+ this.logger.warn(`Failed to compute query embedding, falling back to FTS + trigram: ${error instanceof Error ? error.message : String(error)}`);
146
+ }
145
147
  }
146
148
  const results = await this.slSourcesRepository.search(connectionId, queryEmbedding, query, limit, minRrfScore);
147
149
  return results.map((result) => ({