@memberjunction/db-auto-doc 5.13.0 → 5.15.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (58) hide show
  1. package/README.md +169 -29
  2. package/bin/run.js +1 -1
  3. package/dist/commands/analyze.d.ts +3 -0
  4. package/dist/commands/analyze.d.ts.map +1 -1
  5. package/dist/commands/analyze.js +33 -3
  6. package/dist/commands/analyze.js.map +1 -1
  7. package/dist/commands/prune.d.ts +17 -0
  8. package/dist/commands/prune.d.ts.map +1 -0
  9. package/dist/commands/prune.js +153 -0
  10. package/dist/commands/prune.js.map +1 -0
  11. package/dist/core/AnalysisEngine.d.ts +44 -0
  12. package/dist/core/AnalysisEngine.d.ts.map +1 -1
  13. package/dist/core/AnalysisEngine.js +427 -1
  14. package/dist/core/AnalysisEngine.js.map +1 -1
  15. package/dist/core/AnalysisOrchestrator.d.ts.map +1 -1
  16. package/dist/core/AnalysisOrchestrator.js +33 -10
  17. package/dist/core/AnalysisOrchestrator.js.map +1 -1
  18. package/dist/discovery/FKDetector.d.ts +6 -0
  19. package/dist/discovery/FKDetector.d.ts.map +1 -1
  20. package/dist/discovery/FKDetector.js +101 -4
  21. package/dist/discovery/FKDetector.js.map +1 -1
  22. package/dist/discovery/PKDetector.d.ts +7 -0
  23. package/dist/discovery/PKDetector.d.ts.map +1 -1
  24. package/dist/discovery/PKDetector.js +121 -6
  25. package/dist/discovery/PKDetector.js.map +1 -1
  26. package/dist/drivers/MySQLDriver.d.ts.map +1 -1
  27. package/dist/drivers/MySQLDriver.js +2 -0
  28. package/dist/drivers/MySQLDriver.js.map +1 -1
  29. package/dist/drivers/PostgreSQLDriver.d.ts.map +1 -1
  30. package/dist/drivers/PostgreSQLDriver.js +2 -0
  31. package/dist/drivers/PostgreSQLDriver.js.map +1 -1
  32. package/dist/drivers/SQLServerDriver.d.ts.map +1 -1
  33. package/dist/drivers/SQLServerDriver.js +2 -0
  34. package/dist/drivers/SQLServerDriver.js.map +1 -1
  35. package/dist/prompts/PromptEngine.d.ts +19 -0
  36. package/dist/prompts/PromptEngine.d.ts.map +1 -1
  37. package/dist/prompts/PromptEngine.js +91 -7
  38. package/dist/prompts/PromptEngine.js.map +1 -1
  39. package/dist/types/analysis.d.ts +10 -0
  40. package/dist/types/analysis.d.ts.map +1 -1
  41. package/dist/types/config.d.ts +47 -0
  42. package/dist/types/config.d.ts.map +1 -1
  43. package/dist/types/config.js.map +1 -1
  44. package/dist/types/prompts.d.ts +26 -0
  45. package/dist/types/prompts.d.ts.map +1 -1
  46. package/dist/utils/config-loader.js +2 -2
  47. package/dist/utils/config-loader.js.map +1 -1
  48. package/dist/utils/ensureArray.d.ts +13 -0
  49. package/dist/utils/ensureArray.d.ts.map +1 -0
  50. package/dist/utils/ensureArray.js +39 -0
  51. package/dist/utils/ensureArray.js.map +1 -0
  52. package/package.json +5 -5
  53. package/prompts/fk-evaluation.md +94 -0
  54. package/prompts/fk-pruning-holistic.md +57 -0
  55. package/prompts/fk-pruning-table.md +51 -0
  56. package/prompts/pk-pruning-holistic.md +26 -0
  57. package/prompts/pk-pruning-table.md +35 -0
  58. package/prompts/table-analysis.md +28 -3
@@ -24,6 +24,38 @@ export declare class AnalysisEngine {
24
24
  * Initialize timing for guardrails and set current run
25
25
  */
26
26
  startAnalysis(run: AnalysisRun): void;
27
+ /**
28
+ * Lock interim ground truth: FKs with confidence ≥ threshold become immutable.
29
+ * Call this AFTER the iterative analysis completes but BEFORE the pruning pass.
30
+ */
31
+ lockInterimGroundTruth(state: DatabaseDocumentation, confidenceThreshold?: number): {
32
+ locked: number;
33
+ unlocked: number;
34
+ };
35
+ /**
36
+ * Lock high-confidence PK candidates as interim ground truth.
37
+ */
38
+ lockInterimPKGroundTruth(state: DatabaseDocumentation, confidenceThreshold?: number): {
39
+ locked: number;
40
+ unlocked: number;
41
+ };
42
+ /**
43
+ * Two-pass PK pruning using a potentially stronger model.
44
+ */
45
+ prunePrimaryKeys(state: DatabaseDocumentation, run: AnalysisRun): Promise<{
46
+ removed: number;
47
+ kept: number;
48
+ }>;
49
+ /**
50
+ * Two-pass FK pruning using a potentially stronger model.
51
+ * Pass 1: Per-table — evaluate each table's unlocked FKs, propose removals.
52
+ * Pass 2: Holistic — review all proposed removals at once for final decision.
53
+ * Locked FKs (interim ground truth) are never touched.
54
+ */
55
+ pruneForeignKeys(state: DatabaseDocumentation, run: AnalysisRun): Promise<{
56
+ removed: number;
57
+ kept: number;
58
+ }>;
27
59
  /**
28
60
  * Process a single dependency level
29
61
  */
@@ -39,6 +71,12 @@ export declare class AnalysisEngine {
39
71
  * Build context for table analysis
40
72
  */
41
73
  private buildTableContext;
74
+ /**
75
+ * Build FK candidate stats from the discovery phase for this table.
76
+ * Provides the LLM with cross-table relationship evidence (value overlap,
77
+ * cardinality ratio) to make better FK decisions.
78
+ */
79
+ private buildFKCandidateStats;
42
80
  /**
43
81
  * Build ground truth context for a table from config
44
82
  */
@@ -91,6 +129,12 @@ export declare class AnalysisEngine {
91
129
  * For now, this method is disabled to prevent brittle regex-based FK detection.
92
130
  */
93
131
  private extractAndFeedbackFKInsights;
132
+ /**
133
+ * Process PK proposal from LLM. The LLM can propose a PK, but ALL proposed columns
134
+ * must pass deterministic eligibility: zero nulls, zero blanks, 100% unique values.
135
+ * If any column fails, the entire proposal is rejected.
136
+ */
137
+ private processPKInsightFromLLM;
94
138
  /**
95
139
  * Process structured FK insights from LLM and create feedback to discovery phase
96
140
  *
@@ -1 +1 @@
1
- {"version":3,"file":"AnalysisEngine.d.ts","sourceRoot":"","sources":["../../src/core/AnalysisEngine.ts"],"names":[],"mappings":"AAAA;;;GAGG;AAEH,OAAO,EAAE,qBAAqB,EAAE,WAAW,EAAE,gBAAgB,EAAqC,MAAM,mBAAmB,CAAC;AAC5H,OAAO,EAAE,SAAS,EAAE,sBAAsB,EAAiD,MAAM,sBAAsB,CAAC;AAUxH,OAAO,EAAE,eAAe,EAAuC,MAAM,oBAAoB,CAAC;AAC1F,OAAO,EAAE,YAAY,EAAE,MAAM,4BAA4B,CAAC;AAC1D,OAAO,EAAE,YAAY,EAAE,MAAM,0BAA0B,CAAC;AACxD,OAAO,EAAE,gBAAgB,EAAE,MAAM,8BAA8B,CAAC;AAKhE,qBAAa,cAAc;IAUvB,OAAO,CAAC,MAAM;IACd,OAAO,CAAC,YAAY;IACpB,OAAO,CAAC,YAAY;IACpB,OAAO,CAAC,gBAAgB;IAZ1B,OAAO,CAAC,qBAAqB,CAAwB;IACrD,OAAO,CAAC,mBAAmB,CAAsB;IACjD,OAAO,CAAC,iBAAiB,CAAoB;IAC7C,OAAO,CAAC,SAAS,CAAa;IAC9B,OAAO,CAAC,UAAU,CAAC,CAAc;IAEjC,OAAO,CAAC,UAAU,CAA4D;gBAGpE,MAAM,EAAE,eAAe,EACvB,YAAY,EAAE,YAAY,EAC1B,YAAY,EAAE,YAAY,EAC1B,gBAAgB,EAAE,gBAAgB,EAC1C,UAAU,CAAC,EAAE,CAAC,OAAO,EAAE,MAAM,EAAE,IAAI,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,OAAO,CAAC,KAAK,IAAI;IA8BxE;;OAEG;IACI,aAAa,CAAC,GAAG,EAAE,WAAW,GAAG,IAAI;IAM5C;;OAEG;IACU,YAAY,CACvB,KAAK,EAAE,qBAAqB,EAC5B,GAAG,EAAE,WAAW,EAChB,KAAK,EAAE,MAAM,EACb,MAAM,EAAE,SAAS,EAAE,GAClB,OAAO,CAAC;QAAE,QAAQ,EAAE,sBAAsB,EAAE,CAAC;QAAC,iBAAiB,EAAE,OAAO,CAAA;KAAE,CAAC;IAgC9E;;OAEG;YACW,YAAY;IAqK1B;;OAEG;IACH,OAAO,CAAC,iBAAiB;IA2DzB;;OAEG;IACH,OAAO,CAAC,uBAAuB;IA+B/B;;OAEG;YACW,mBAAmB;IAuEjC;;;OAGG;IACU,iCAAiC,CAC5C,KAAK,EAAE,qBAAqB,EAC5B,GAAG,EAAE,WAAW,EAChB,KAAK,EAAE,MAAM,EACb,MAAM,EAAE,SAAS,EAAE,GAClB,OAAO,CAAC,OAAO,CAAC;IAwEnB;;;OAGG;IACU,6BAA6B,CACxC,KAAK,EAAE,qBAAqB,EAC5B,GAAG,EAAE,WAAW,EAChB,MAAM,EAAE,gBAAgB,GACvB,OAAO,CAAC,OAAO,CAAC;IA6FnB;;;OAGG;IACU,6BAA6B,CACxC,KAAK,EAAE,qBAAqB,EAC5B,GAAG,EAAE,WAAW,GACf,OAAO,CAAC,OAAO,CAAC;IA+EnB;;OAEG;IACI,gBAAgB,CAAC,KAAK,EAAE,qBAAqB,EAAE,GAAG,EAAE,WAAW,GAAG,OAAO;IAWhF;;OAEG;IACU,sBAAsB,CACjC,KAAK,EAAE,qBAAqB,EAC5B,GAAG,EAAE,WAAW,EAChB,QAAQ,EAAE,sBAAsB,EAAE,GACjC,OAAO,CAAC,IAAI,CAAC;IAahB;;;;;;;;;;;;;;;;;;;OAmBG;IACH,OAAO,CAAC,4BAA4B;IAWpC;;;;;OAKG;IACH,OAAO,CAAC,wBAAwB;IAgJhC;;OAEG;IACH,OAAO,CAAC,iBAAiB;IAezB;;;OAGG;IACH,OAAO,CAAC,uBAAuB;IAoE/B;;;OAGG;IACH,OAAO,CAAC,kBAAkB;CAiE3B"}
1
+ {"version":3,"file":"AnalysisEngine.d.ts","sourceRoot":"","sources":["../../src/core/AnalysisEngine.ts"],"names":[],"mappings":"AAAA;;;GAGG;AAEH,OAAO,EAAE,qBAAqB,EAAE,WAAW,EAAE,gBAAgB,EAAqC,MAAM,mBAAmB,CAAC;AAE5H,OAAO,EAAE,SAAS,EAAE,sBAAsB,EAAiD,MAAM,sBAAsB,CAAC;AAUxH,OAAO,EAAE,eAAe,EAAuC,MAAM,oBAAoB,CAAC;AAC1F,OAAO,EAAE,YAAY,EAAE,MAAM,4BAA4B,CAAC;AAC1D,OAAO,EAAE,YAAY,EAAE,MAAM,0BAA0B,CAAC;AACxD,OAAO,EAAE,gBAAgB,EAAE,MAAM,8BAA8B,CAAC;AAKhE,qBAAa,cAAc;IAUvB,OAAO,CAAC,MAAM;IACd,OAAO,CAAC,YAAY;IACpB,OAAO,CAAC,YAAY;IACpB,OAAO,CAAC,gBAAgB;IAZ1B,OAAO,CAAC,qBAAqB,CAAwB;IACrD,OAAO,CAAC,mBAAmB,CAAsB;IACjD,OAAO,CAAC,iBAAiB,CAAoB;IAC7C,OAAO,CAAC,SAAS,CAAa;IAC9B,OAAO,CAAC,UAAU,CAAC,CAAc;IAEjC,OAAO,CAAC,UAAU,CAA4D;gBAGpE,MAAM,EAAE,eAAe,EACvB,YAAY,EAAE,YAAY,EAC1B,YAAY,EAAE,YAAY,EAC1B,gBAAgB,EAAE,gBAAgB,EAC1C,UAAU,CAAC,EAAE,CAAC,OAAO,EAAE,MAAM,EAAE,IAAI,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,OAAO,CAAC,KAAK,IAAI;IA8BxE;;OAEG;IACI,aAAa,CAAC,GAAG,EAAE,WAAW,GAAG,IAAI;IAM5C;;;OAGG;IACI,sBAAsB,CAC3B,KAAK,EAAE,qBAAqB,EAC5B,mBAAmB,GAAE,MAAW,GAC/B;QAAE,MAAM,EAAE,MAAM,CAAC;QAAC,QAAQ,EAAE,MAAM,CAAA;KAAE;IAqBvC;;OAEG;IACI,wBAAwB,CAC7B,KAAK,EAAE,qBAAqB,EAC5B,mBAAmB,GAAE,MAAW,GAC/B;QAAE,MAAM,EAAE,MAAM,CAAC;QAAC,QAAQ,EAAE,MAAM,CAAA;KAAE;IAqBvC;;OAEG;IACU,gBAAgB,CAC3B,KAAK,EAAE,qBAAqB,EAC5B,GAAG,EAAE,WAAW,GACf,OAAO,CAAC;QAAE,OAAO,EAAE,MAAM,CAAC;QAAC,IAAI,EAAE,MAAM,CAAA;KAAE,CAAC;IAyF3C;;;;;KAKC;IACU,gBAAgB,CAC3B,KAAK,EAAE,qBAAqB,EAC5B,GAAG,EAAE,WAAW,GACf,OAAO,CAAC;QAAE,OAAO,EAAE,MAAM,CAAC;QAAC,IAAI,EAAE,MAAM,CAAA;KAAE,CAAC;IA4L7C;;OAEG;IACU,YAAY,CACvB,KAAK,EAAE,qBAAqB,EAC5B,GAAG,EAAE,WAAW,EAChB,KAAK,EAAE,MAAM,EACb,MAAM,EAAE,SAAS,EAAE,GAClB,OAAO,CAAC;QAAE,QAAQ,EAAE,sBAAsB,EAAE,CAAC;QAAC,iBAAiB,EAAE,OAAO,CAAA;KAAE,CAAC;IAgC9E;;OAEG;YACW,YAAY;IA+K1B;;OAEG;IACH,OAAO,CAAC,iBAAiB;IA+DzB;;;;OAIG;IACH,OAAO,CAAC,qBAAqB;IAyB7B;;OAEG;IACH,OAAO,CAAC,uBAAuB;IA+B/B;;OAEG;YACW,mBAAmB;IAuEjC;;;OAGG;IACU,iCAAiC,CAC5C,KAAK,EAAE,qBAAqB,EAC5B,GAAG,EAAE,WAAW,EAChB,KAAK,EAAE,MAAM,EACb,MAAM,EAAE,SAAS,EAAE,GAClB,OAAO,CAAC,OAAO,CAAC;IAwEnB;;;OAGG;IACU,6BAA6B,CACxC,KAAK,EAAE,qBAAqB,EAC5B,GAAG,EAAE,WAAW,EAChB,MAAM,EAAE,gBAAgB,GACvB,OAAO,CAAC,OAAO,CAAC;IA6FnB;;;OAGG;IACU,6BAA6B,CACxC,KAAK,EAAE,qBAAqB,EAC5B,GAAG,EAAE,WAAW,GACf,OAAO,CAAC,OAAO,CAAC;IA+EnB;;OAEG;IACI,gBAAgB,CAAC,KAAK,EAAE,qBAAqB,EAAE,GAAG,EAAE,WAAW,GAAG,OAAO;IAWhF;;OAEG;IACU,sBAAsB,CACjC,KAAK,EAAE,qBAAqB,EAC5B,GAAG,EAAE,WAAW,EAChB,QAAQ,EAAE,sBAAsB,EAAE,GACjC,OAAO,CAAC,IAAI,CAAC;IAahB;;;;;;;;;;;;;;;;;;;OAmBG;IACH,OAAO,CAAC,4BAA4B;IAWpC;;;;OAIG;IACH,OAAO,CAAC,uBAAuB;IA6H/B;;;;;OAKG;IACH,OAAO,CAAC,wBAAwB;IAgJhC;;OAEG;IACH,OAAO,CAAC,iBAAiB;IAezB;;;OAGG;IACH,OAAO,CAAC,uBAAuB;IAoE/B;;;OAGG;IACH,OAAO,CAAC,kBAAkB;CAiE3B"}
@@ -2,6 +2,7 @@
2
2
  * Main analysis orchestrator
3
3
  * Coordinates the entire documentation generation workflow
4
4
  */
5
+ import { ensureArray } from "../utils/ensureArray.js";
5
6
  import { BackpropagationEngine } from './BackpropagationEngine.js';
6
7
  import { ConvergenceDetector } from './ConvergenceDetector.js';
7
8
  import { GuardrailsManager } from './GuardrailsManager.js';
@@ -34,6 +35,293 @@ export class AnalysisEngine {
34
35
  this.currentRun = run;
35
36
  this.guardrailsManager.startPhase('analysis');
36
37
  }
38
+ /**
39
+ * Lock interim ground truth: FKs with confidence ≥ threshold become immutable.
40
+ * Call this AFTER the iterative analysis completes but BEFORE the pruning pass.
41
+ */
42
+ lockInterimGroundTruth(state, confidenceThreshold = 90) {
43
+ const discoveryPhase = state.phases.keyDetection;
44
+ if (!discoveryPhase)
45
+ return { locked: 0, unlocked: 0 };
46
+ let locked = 0;
47
+ let unlocked = 0;
48
+ for (const fk of discoveryPhase.discovered.foreignKeys) {
49
+ if (fk.status === 'rejected')
50
+ continue;
51
+ if (fk.confidence >= confidenceThreshold) {
52
+ fk.status = 'confirmed';
53
+ locked++;
54
+ }
55
+ else {
56
+ unlocked++;
57
+ }
58
+ }
59
+ console.log(`[AnalysisEngine] Interim ground truth locked: ${locked} FKs at ≥${confidenceThreshold}% confidence, ${unlocked} unlocked for pruning`);
60
+ this.onProgress('Interim ground truth locked', { locked, unlocked, threshold: confidenceThreshold });
61
+ return { locked, unlocked };
62
+ }
63
+ /**
64
+ * Lock high-confidence PK candidates as interim ground truth.
65
+ */
66
+ lockInterimPKGroundTruth(state, confidenceThreshold = 90) {
67
+ const discoveryPhase = state.phases.keyDetection;
68
+ if (!discoveryPhase)
69
+ return { locked: 0, unlocked: 0 };
70
+ let locked = 0;
71
+ let unlocked = 0;
72
+ for (const pk of discoveryPhase.discovered.primaryKeys) {
73
+ if (pk.status === 'rejected')
74
+ continue;
75
+ if (pk.confidence >= confidenceThreshold) {
76
+ pk.status = 'confirmed';
77
+ locked++;
78
+ }
79
+ else {
80
+ unlocked++;
81
+ }
82
+ }
83
+ console.log(`[AnalysisEngine] Interim PK ground truth locked: ${locked} PKs at >=${confidenceThreshold}% confidence, ${unlocked} unlocked for pruning`);
84
+ this.onProgress('Interim PK ground truth locked', { locked, unlocked, threshold: confidenceThreshold });
85
+ return { locked, unlocked };
86
+ }
87
+ /**
88
+ * Two-pass PK pruning using a potentially stronger model.
89
+ */
90
+ async prunePrimaryKeys(state, run) {
91
+ const discoveryPhase = state.phases.keyDetection;
92
+ if (!discoveryPhase)
93
+ return { removed: 0, kept: 0 };
94
+ const override = this.config.ai.modelOverrides?.['pkPruning'] ?? this.config.ai.modelOverrides?.['fkPruning'];
95
+ const effectiveModel = override?.model ?? this.config.ai.model;
96
+ const allTables = state.schemas.flatMap(s => s.tables.map(t => {
97
+ const pk = discoveryPhase.discovered.primaryKeys.find(p => p.schemaName === s.name && p.tableName === t.name && p.status === 'confirmed');
98
+ return { schema: s.name, name: t.name, description: t.description || '', pk: pk ? pk.columnNames.join(', ') : '' };
99
+ }));
100
+ const allPKs = discoveryPhase.discovered.primaryKeys.filter(pk => pk.status !== 'rejected');
101
+ const pksByTable = new Map();
102
+ for (const pk of allPKs) {
103
+ const key = `${pk.schemaName}.${pk.tableName}`;
104
+ if (!pksByTable.has(key))
105
+ pksByTable.set(key, []);
106
+ pksByTable.get(key).push(pk);
107
+ }
108
+ this.onProgress('PK pruning pass 1: per-table analysis', { tables: pksByTable.size, model: effectiveModel });
109
+ const allProposals = [];
110
+ for (const [tableKey, tablePKs] of pksByTable.entries()) {
111
+ const hasUnlocked = tablePKs.some(pk => pk.status !== 'confirmed');
112
+ if (!hasUnlocked)
113
+ continue;
114
+ const [schemaName, tableName] = tableKey.split('.');
115
+ const table = this.stateManager.findTable(state, schemaName, tableName);
116
+ const candidates = tablePKs.map(pk => ({ columns: pk.columnNames, confidence: pk.confidence, locked: pk.status === 'confirmed' }));
117
+ const context = { sourceSchema: schemaName, sourceTable: tableName, tableDescription: table?.description || '', allTables, candidates, seedContext: state.seedContext ?? this.config.seedContext };
118
+ const result = await this.promptEngine.executePrompt('pk-pruning-table', context, { responseFormat: 'JSON', temperature: override?.temperature ?? 0.05, maxTokens: override?.maxTokens ?? this.config.ai.maxTokens, modelOverride: override?.model, effortLevelOverride: override?.effortLevel });
119
+ if (!result.success || !result.result) {
120
+ console.log(`[AnalysisEngine] PK pruning failed for ${tableKey}: ${result.errorMessage}`);
121
+ continue;
122
+ }
123
+ try {
124
+ for (const proposal of ensureArray(result.result, "PK pruning per-table")) {
125
+ if (proposal.action === 'remove' && proposal.index >= 1 && proposal.index <= tablePKs.length) {
126
+ const pk = tablePKs[proposal.index - 1];
127
+ if (pk.status === 'confirmed') {
128
+ console.log(`[AnalysisEngine] BLOCKED removal of locked PK: ${tableKey} [${pk.columnNames.join(', ')}]`);
129
+ continue;
130
+ }
131
+ allProposals.push({ pk, reasoning: proposal.reasoning, sourceSchema: pk.schemaName, sourceTable: pk.tableName, columns: pk.columnNames, confidence: pk.confidence });
132
+ }
133
+ }
134
+ }
135
+ catch (pruneErr) {
136
+ console.log(`[AnalysisEngine] PK pruning error for ${tableKey}: ${pruneErr.message}`);
137
+ }
138
+ }
139
+ this.onProgress('PK pruning pass 1 complete', { proposals: allProposals.length });
140
+ if (allProposals.length === 0)
141
+ return { removed: 0, kept: allPKs.length };
142
+ this.onProgress('PK pruning pass 2: holistic review', { proposals: allProposals.length, model: effectiveModel });
143
+ const holisticContext = { allTables, proposals: allProposals.map(p => ({ sourceSchema: p.sourceSchema, sourceTable: p.sourceTable, columns: p.columns, confidence: p.confidence, reasoning: p.reasoning })), seedContext: state.seedContext ?? this.config.seedContext };
144
+ const holisticResult = await this.promptEngine.executePrompt('pk-pruning-holistic', holisticContext, { responseFormat: 'JSON', temperature: override?.temperature ?? 0.05, maxTokens: override?.maxTokens ?? this.config.ai.maxTokens, modelOverride: override?.model, effortLevelOverride: override?.effortLevel });
145
+ let removed = 0;
146
+ if (holisticResult.success && holisticResult.result) {
147
+ for (const decision of ensureArray(holisticResult.result, "holistic pruning")) {
148
+ if (decision.action === 'remove' && decision.index >= 1 && decision.index <= allProposals.length) {
149
+ const proposal = allProposals[decision.index - 1];
150
+ proposal.pk.status = 'rejected';
151
+ removed++;
152
+ console.log(`[AnalysisEngine] Pruned PK: ${proposal.sourceSchema}.${proposal.sourceTable} [${proposal.columns.join(', ')}] - ${decision.reasoning}`);
153
+ }
154
+ }
155
+ }
156
+ else {
157
+ console.log(`[AnalysisEngine] Holistic PK pruning failed: ${holisticResult.errorMessage}. Applying pass 1 proposals.`);
158
+ for (const proposal of allProposals) {
159
+ proposal.pk.status = 'rejected';
160
+ removed++;
161
+ }
162
+ }
163
+ const kept = allPKs.length - removed;
164
+ console.log(`[AnalysisEngine] PK pruning complete: ${removed} removed, ${kept} kept (model: ${effectiveModel})`);
165
+ this.onProgress('PK pruning complete', { removed, kept, model: effectiveModel });
166
+ this.stateManager.updateSummary(state);
167
+ await this.stateManager.save(state);
168
+ return { removed, kept };
169
+ }
170
+ /**
171
+ * Two-pass FK pruning using a potentially stronger model.
172
+ * Pass 1: Per-table — evaluate each table's unlocked FKs, propose removals.
173
+ * Pass 2: Holistic — review all proposed removals at once for final decision.
174
+ * Locked FKs (interim ground truth) are never touched.
175
+ */
176
+ async pruneForeignKeys(state, run) {
177
+ const discoveryPhase = state.phases.keyDetection;
178
+ if (!discoveryPhase)
179
+ return { removed: 0, kept: 0 };
180
+ const override = this.config.ai.modelOverrides?.['fkPruning'];
181
+ const effectiveModel = override?.model ?? this.config.ai.model;
182
+ // Build table info for context
183
+ const allTables = state.schemas.flatMap(s => s.tables.map(t => {
184
+ const pk = discoveryPhase.discovered.primaryKeys.find(p => p.schemaName === s.name && p.tableName === t.name);
185
+ return {
186
+ schema: s.name,
187
+ name: t.name,
188
+ description: t.description || '',
189
+ pk: pk ? pk.columnNames.join(', ') : ''
190
+ };
191
+ }));
192
+ // Group non-rejected FKs by source table
193
+ const allFKs = discoveryPhase.discovered.foreignKeys.filter(fk => fk.status !== 'rejected');
194
+ const fksByTable = new Map();
195
+ for (const fk of allFKs) {
196
+ const key = `${fk.schemaName}.${fk.sourceTable}`;
197
+ if (!fksByTable.has(key))
198
+ fksByTable.set(key, []);
199
+ fksByTable.get(key).push(fk);
200
+ }
201
+ // ==================== PASS 1: Per-table pruning proposals ====================
202
+ this.onProgress('FK pruning pass 1: per-table analysis', { tables: fksByTable.size, model: effectiveModel });
203
+ const allProposals = [];
204
+ let tableIdx = 0;
205
+ for (const [tableKey, tableFKs] of fksByTable.entries()) {
206
+ tableIdx++;
207
+ // Skip tables where ALL FKs are locked
208
+ const hasUnlocked = tableFKs.some(fk => fk.status !== 'confirmed');
209
+ if (!hasUnlocked)
210
+ continue;
211
+ if (tableIdx % 10 === 1) {
212
+ this.onProgress(`FK pruning: table ${tableIdx}/${fksByTable.size}`);
213
+ }
214
+ const [schemaName, tableName] = tableKey.split('.');
215
+ const table = this.stateManager.findTable(state, schemaName, tableName);
216
+ const candidates = tableFKs.map(fk => ({
217
+ sourceColumn: fk.sourceColumn,
218
+ targetSchema: fk.targetSchema,
219
+ targetTable: fk.targetTable,
220
+ targetColumn: fk.targetColumn,
221
+ confidence: fk.confidence,
222
+ locked: fk.status === 'confirmed'
223
+ }));
224
+ const context = {
225
+ sourceSchema: schemaName,
226
+ sourceTable: tableName,
227
+ tableDescription: table?.description || '',
228
+ allTables,
229
+ candidates,
230
+ seedContext: state.seedContext ?? this.config.seedContext
231
+ };
232
+ const result = await this.promptEngine.executePrompt('fk-pruning-table', context, {
233
+ responseFormat: 'JSON',
234
+ temperature: override?.temperature ?? 0.05,
235
+ maxTokens: override?.maxTokens ?? this.config.ai.maxTokens,
236
+ modelOverride: override?.model,
237
+ effortLevelOverride: override?.effortLevel
238
+ });
239
+ if (!result.success || !result.result) {
240
+ console.log(`[AnalysisEngine] FK pruning failed for ${tableKey}: ${result.errorMessage}`);
241
+ continue;
242
+ }
243
+ try {
244
+ for (const proposal of ensureArray(result.result, "FK pruning per-table")) {
245
+ if (proposal.action === 'remove' && proposal.index >= 1 && proposal.index <= tableFKs.length) {
246
+ const fk = tableFKs[proposal.index - 1];
247
+ if (fk.status === 'confirmed') {
248
+ console.log(`[AnalysisEngine] BLOCKED removal of locked FK: ${tableKey}.${fk.sourceColumn} -> ${fk.targetTable}.${fk.targetColumn}`);
249
+ continue;
250
+ }
251
+ allProposals.push({
252
+ fk,
253
+ reasoning: proposal.reasoning,
254
+ sourceSchema: fk.schemaName,
255
+ sourceTable: fk.sourceTable,
256
+ sourceColumn: fk.sourceColumn,
257
+ targetSchema: fk.targetSchema,
258
+ targetTable: fk.targetTable,
259
+ targetColumn: fk.targetColumn,
260
+ confidence: fk.confidence
261
+ });
262
+ }
263
+ }
264
+ }
265
+ catch (pruneErr) {
266
+ console.log(`[AnalysisEngine] FK pruning error for ${tableKey}: ${pruneErr.message}`);
267
+ }
268
+ console.log(`[AnalysisEngine] FK pruning ${tableKey}: ${result.result.length} removals proposed`);
269
+ }
270
+ console.log(`[AnalysisEngine] Pass 1 complete: ${allProposals.length} total removals proposed`);
271
+ this.onProgress('FK pruning pass 1 complete', { proposals: allProposals.length });
272
+ if (allProposals.length === 0) {
273
+ return { removed: 0, kept: allFKs.length };
274
+ }
275
+ // ==================== PASS 2: Holistic review of all proposals ====================
276
+ this.onProgress('FK pruning pass 2: holistic review', { proposals: allProposals.length, model: effectiveModel });
277
+ const holisticContext = {
278
+ allTables,
279
+ proposals: allProposals.map(p => ({
280
+ sourceSchema: p.sourceSchema,
281
+ sourceTable: p.sourceTable,
282
+ sourceColumn: p.sourceColumn,
283
+ targetSchema: p.targetSchema,
284
+ targetTable: p.targetTable,
285
+ targetColumn: p.targetColumn,
286
+ confidence: p.confidence,
287
+ reasoning: p.reasoning
288
+ })),
289
+ seedContext: state.seedContext ?? this.config.seedContext
290
+ };
291
+ const holisticResult = await this.promptEngine.executePrompt('fk-pruning-holistic', holisticContext, {
292
+ responseFormat: 'JSON',
293
+ temperature: override?.temperature ?? 0.05,
294
+ maxTokens: override?.maxTokens ?? this.config.ai.maxTokens,
295
+ modelOverride: override?.model,
296
+ effortLevelOverride: override?.effortLevel
297
+ });
298
+ let removed = 0;
299
+ if (holisticResult.success && holisticResult.result) {
300
+ for (const decision of ensureArray(holisticResult.result, "holistic pruning")) {
301
+ if (decision.action === 'remove' && decision.index >= 1 && decision.index <= allProposals.length) {
302
+ const proposal = allProposals[decision.index - 1];
303
+ proposal.fk.status = 'rejected';
304
+ removed++;
305
+ console.log(`[AnalysisEngine] Pruned FK: ${proposal.sourceSchema}.${proposal.sourceTable}.${proposal.sourceColumn} -> ${proposal.targetSchema}.${proposal.targetTable}.${proposal.targetColumn} — ${decision.reasoning}`);
306
+ }
307
+ }
308
+ }
309
+ else {
310
+ console.log(`[AnalysisEngine] Holistic pruning failed: ${holisticResult.errorMessage}. Falling back to pass 1 proposals.`);
311
+ // Fallback: apply all pass 1 proposals directly
312
+ for (const proposal of allProposals) {
313
+ proposal.fk.status = 'rejected';
314
+ removed++;
315
+ }
316
+ }
317
+ const kept = allFKs.length - removed;
318
+ console.log(`[AnalysisEngine] FK pruning complete: ${removed} removed, ${kept} kept (model: ${effectiveModel})`);
319
+ this.onProgress('FK pruning complete', { removed, kept, model: effectiveModel });
320
+ // Save state
321
+ this.stateManager.updateSummary(state);
322
+ await this.stateManager.save(state);
323
+ return { removed, kept };
324
+ }
37
325
  /**
38
326
  * Process a single dependency level
39
327
  */
@@ -124,6 +412,10 @@ export class AnalysisEngine {
124
412
  if (state.phases.keyDetection && result.result.foreignKeys) {
125
413
  this.processFKInsightsFromLLM(state, tableNode.schema, tableNode.table, result.result.foreignKeys);
126
414
  }
415
+ // Process PK proposal from LLM — verify eligibility deterministically
416
+ if (state.phases.keyDetection && result.result.primaryKey) {
417
+ this.processPKInsightFromLLM(state, tableNode.schema, tableNode.table, result.result.primaryKey);
418
+ }
127
419
  // Update inferred business domain
128
420
  if (result.result.inferredBusinessDomain) {
129
421
  // Could store this in table metadata if needed
@@ -184,6 +476,8 @@ export class AnalysisEngine {
184
476
  }
185
477
  // Build ground truth context if available
186
478
  const groundTruthContext = this.buildGroundTruthContext(tableNode.schema, tableNode.table);
479
+ // Build FK candidate stats from discovery phase for LLM context
480
+ const fkCandidateStats = this.buildFKCandidateStats(state, tableNode.schema, tableNode.table);
187
481
  return {
188
482
  schema: tableNode.schema,
189
483
  table: tableNode.table,
@@ -207,9 +501,33 @@ export class AnalysisEngine {
207
501
  userNotes: table.userNotes,
208
502
  seedContext: state.seedContext ?? this.config.seedContext,
209
503
  allTables,
210
- groundTruth: groundTruthContext
504
+ groundTruth: groundTruthContext,
505
+ fkCandidateStats
211
506
  };
212
507
  }
508
+ /**
509
+ * Build FK candidate stats from the discovery phase for this table.
510
+ * Provides the LLM with cross-table relationship evidence (value overlap,
511
+ * cardinality ratio) to make better FK decisions.
512
+ */
513
+ buildFKCandidateStats(state, schemaName, tableName) {
514
+ const discoveryPhase = state.phases.keyDetection;
515
+ if (!discoveryPhase)
516
+ return [];
517
+ return discoveryPhase.discovered.foreignKeys
518
+ .filter(fk => fk.schemaName === schemaName &&
519
+ fk.sourceTable === tableName &&
520
+ fk.status !== 'rejected')
521
+ .map(fk => ({
522
+ sourceColumn: fk.sourceColumn,
523
+ targetSchema: fk.targetSchema,
524
+ targetTable: fk.targetTable,
525
+ targetColumn: fk.targetColumn,
526
+ valueOverlap: fk.evidence.valueOverlap,
527
+ cardinalityRatio: fk.evidence.cardinalityRatio,
528
+ confidence: fk.confidence
529
+ }));
530
+ }
213
531
  /**
214
532
  * Build ground truth context for a table from config
215
533
  */
@@ -539,6 +857,114 @@ export class AnalysisEngine {
539
857
  console.log(`[AnalysisEngine] extractAndFeedbackFKInsights disabled - awaiting structured FK output from LLM`);
540
858
  return;
541
859
  }
860
+ /**
861
+ * Process PK proposal from LLM. The LLM can propose a PK, but ALL proposed columns
862
+ * must pass deterministic eligibility: zero nulls, zero blanks, 100% unique values.
863
+ * If any column fails, the entire proposal is rejected.
864
+ */
865
+ processPKInsightFromLLM(state, schemaName, tableName, pkProposal) {
866
+ const discoveryPhase = state.phases.keyDetection;
867
+ if (!discoveryPhase || !pkProposal || !pkProposal.columns || pkProposal.columns.length === 0)
868
+ return;
869
+ const columns = pkProposal.columns;
870
+ const confidence = Math.round(pkProposal.confidence * 100);
871
+ // Check if we already have a confirmed PK for this table
872
+ const existingConfirmedPK = discoveryPhase.discovered.primaryKeys.find(pk => pk.schemaName === schemaName &&
873
+ pk.tableName === tableName &&
874
+ pk.status === 'confirmed');
875
+ if (existingConfirmedPK) {
876
+ // Already have a confirmed PK — check if LLM agrees
877
+ const sameColumns = existingConfirmedPK.columnNames.length === columns.length &&
878
+ existingConfirmedPK.columnNames.every(c => columns.some(pc => pc.toLowerCase() === c.toLowerCase()));
879
+ if (sameColumns) {
880
+ // LLM agrees with existing PK — boost confidence
881
+ existingConfirmedPK.confidence = Math.min(existingConfirmedPK.confidence + 10, 100);
882
+ existingConfirmedPK.validatedByLLM = true;
883
+ console.log(`[AnalysisEngine] LLM confirmed existing PK: ${schemaName}.${tableName} (${columns.join(', ')}), confidence: ${existingConfirmedPK.confidence}`);
884
+ }
885
+ else {
886
+ // LLM disagrees — log but don't override a confirmed PK
887
+ console.log(`[AnalysisEngine] LLM proposed different PK for ${schemaName}.${tableName}: [${columns.join(', ')}] vs confirmed [${existingConfirmedPK.columnNames.join(', ')}] — keeping confirmed`);
888
+ }
889
+ return;
890
+ }
891
+ // Check if an existing candidate matches
892
+ const existingCandidate = discoveryPhase.discovered.primaryKeys.find(pk => pk.schemaName === schemaName &&
893
+ pk.tableName === tableName &&
894
+ pk.columnNames.length === columns.length &&
895
+ pk.columnNames.every(c => columns.some(pc => pc.toLowerCase() === c.toLowerCase())));
896
+ if (existingCandidate) {
897
+ // LLM confirms a stats candidate — promote and boost
898
+ existingCandidate.validatedByLLM = true;
899
+ existingCandidate.status = 'confirmed';
900
+ existingCandidate.confidence = Math.min(existingCandidate.confidence + 20, 100);
901
+ console.log(`[AnalysisEngine] LLM confirmed PK candidate: ${schemaName}.${tableName} (${columns.join(', ')}), confidence: ${existingCandidate.confidence}`);
902
+ // Update column flags
903
+ for (const colName of columns) {
904
+ const column = this.findColumnInState(state, schemaName, tableName, colName);
905
+ if (column)
906
+ column.isPrimaryKey = true;
907
+ }
908
+ return;
909
+ }
910
+ // New PK proposal — verify ALL columns are PK-eligible deterministically
911
+ const table = this.stateManager.findTable(state, schemaName, tableName);
912
+ if (!table)
913
+ return;
914
+ for (const colName of columns) {
915
+ const column = table.columns.find(c => c.name.toLowerCase() === colName.toLowerCase());
916
+ if (!column) {
917
+ console.log(`[AnalysisEngine] LLM PK rejected: ${schemaName}.${tableName} — column "${colName}" not found`);
918
+ return;
919
+ }
920
+ // Check PK eligibility from stats
921
+ const stats = column.statistics;
922
+ if (!stats) {
923
+ console.log(`[AnalysisEngine] LLM PK rejected: ${schemaName}.${tableName}.${colName} — no statistics available`);
924
+ return;
925
+ }
926
+ // Safety: prefer pre-computed uniquenessRatio if available (totalRows can be 0 due to field naming bug)
927
+ const uniqueness = stats.uniquenessRatio != null && stats.uniquenessRatio > 0
928
+ ? stats.uniquenessRatio
929
+ : (stats.totalRows > 0 ? stats.distinctCount / stats.totalRows : 0);
930
+ const hasNulls = (stats.nullCount || 0) > 0;
931
+ if (hasNulls) {
932
+ console.log(`[AnalysisEngine] LLM PK rejected: ${schemaName}.${tableName}.${colName} — has ${stats.nullCount} nulls`);
933
+ return;
934
+ }
935
+ if (uniqueness < 1.0) {
936
+ console.log(`[AnalysisEngine] LLM PK rejected: ${schemaName}.${tableName}.${colName} — uniqueness ${(uniqueness * 100).toFixed(1)}% (must be 100%)`);
937
+ return;
938
+ }
939
+ }
940
+ // All columns pass — create new PK candidate
941
+ const newPK = {
942
+ schemaName,
943
+ tableName,
944
+ columnNames: columns,
945
+ confidence,
946
+ evidence: {
947
+ uniqueness: 1.0,
948
+ nullCount: 0,
949
+ totalRows: table.rowCount || 0,
950
+ dataPattern: columns.length > 1 ? 'composite' : 'unknown',
951
+ namingScore: 0.5,
952
+ dataTypeScore: 0.8,
953
+ warnings: ['Created from LLM proposal — passed deterministic eligibility']
954
+ },
955
+ discoveredInIteration: 1,
956
+ validatedByLLM: true,
957
+ status: 'confirmed'
958
+ };
959
+ discoveryPhase.discovered.primaryKeys.push(newPK);
960
+ // Update column flags
961
+ for (const colName of columns) {
962
+ const column = this.findColumnInState(state, schemaName, tableName, colName);
963
+ if (column)
964
+ column.isPrimaryKey = true;
965
+ }
966
+ console.log(`[AnalysisEngine] Created PK from LLM: ${schemaName}.${tableName} (${columns.join(', ')}) confidence: ${confidence}`);
967
+ }
542
968
  /**
543
969
  * Process structured FK insights from LLM and create feedback to discovery phase
544
970
  *