@memberjunction/db-auto-doc 5.13.0 → 5.15.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +169 -29
- package/bin/run.js +1 -1
- package/dist/commands/analyze.d.ts +3 -0
- package/dist/commands/analyze.d.ts.map +1 -1
- package/dist/commands/analyze.js +33 -3
- package/dist/commands/analyze.js.map +1 -1
- package/dist/commands/prune.d.ts +17 -0
- package/dist/commands/prune.d.ts.map +1 -0
- package/dist/commands/prune.js +153 -0
- package/dist/commands/prune.js.map +1 -0
- package/dist/core/AnalysisEngine.d.ts +44 -0
- package/dist/core/AnalysisEngine.d.ts.map +1 -1
- package/dist/core/AnalysisEngine.js +427 -1
- package/dist/core/AnalysisEngine.js.map +1 -1
- package/dist/core/AnalysisOrchestrator.d.ts.map +1 -1
- package/dist/core/AnalysisOrchestrator.js +33 -10
- package/dist/core/AnalysisOrchestrator.js.map +1 -1
- package/dist/discovery/FKDetector.d.ts +6 -0
- package/dist/discovery/FKDetector.d.ts.map +1 -1
- package/dist/discovery/FKDetector.js +101 -4
- package/dist/discovery/FKDetector.js.map +1 -1
- package/dist/discovery/PKDetector.d.ts +7 -0
- package/dist/discovery/PKDetector.d.ts.map +1 -1
- package/dist/discovery/PKDetector.js +121 -6
- package/dist/discovery/PKDetector.js.map +1 -1
- package/dist/drivers/MySQLDriver.d.ts.map +1 -1
- package/dist/drivers/MySQLDriver.js +2 -0
- package/dist/drivers/MySQLDriver.js.map +1 -1
- package/dist/drivers/PostgreSQLDriver.d.ts.map +1 -1
- package/dist/drivers/PostgreSQLDriver.js +2 -0
- package/dist/drivers/PostgreSQLDriver.js.map +1 -1
- package/dist/drivers/SQLServerDriver.d.ts.map +1 -1
- package/dist/drivers/SQLServerDriver.js +2 -0
- package/dist/drivers/SQLServerDriver.js.map +1 -1
- package/dist/prompts/PromptEngine.d.ts +19 -0
- package/dist/prompts/PromptEngine.d.ts.map +1 -1
- package/dist/prompts/PromptEngine.js +91 -7
- package/dist/prompts/PromptEngine.js.map +1 -1
- package/dist/types/analysis.d.ts +10 -0
- package/dist/types/analysis.d.ts.map +1 -1
- package/dist/types/config.d.ts +47 -0
- package/dist/types/config.d.ts.map +1 -1
- package/dist/types/config.js.map +1 -1
- package/dist/types/prompts.d.ts +26 -0
- package/dist/types/prompts.d.ts.map +1 -1
- package/dist/utils/config-loader.js +2 -2
- package/dist/utils/config-loader.js.map +1 -1
- package/dist/utils/ensureArray.d.ts +13 -0
- package/dist/utils/ensureArray.d.ts.map +1 -0
- package/dist/utils/ensureArray.js +39 -0
- package/dist/utils/ensureArray.js.map +1 -0
- package/package.json +5 -5
- package/prompts/fk-evaluation.md +94 -0
- package/prompts/fk-pruning-holistic.md +57 -0
- package/prompts/fk-pruning-table.md +51 -0
- package/prompts/pk-pruning-holistic.md +26 -0
- package/prompts/pk-pruning-table.md +35 -0
- package/prompts/table-analysis.md +28 -3
|
@@ -24,6 +24,38 @@ export declare class AnalysisEngine {
|
|
|
24
24
|
* Initialize timing for guardrails and set current run
|
|
25
25
|
*/
|
|
26
26
|
startAnalysis(run: AnalysisRun): void;
|
|
27
|
+
/**
|
|
28
|
+
* Lock interim ground truth: FKs with confidence ≥ threshold become immutable.
|
|
29
|
+
* Call this AFTER the iterative analysis completes but BEFORE the pruning pass.
|
|
30
|
+
*/
|
|
31
|
+
lockInterimGroundTruth(state: DatabaseDocumentation, confidenceThreshold?: number): {
|
|
32
|
+
locked: number;
|
|
33
|
+
unlocked: number;
|
|
34
|
+
};
|
|
35
|
+
/**
|
|
36
|
+
* Lock high-confidence PK candidates as interim ground truth.
|
|
37
|
+
*/
|
|
38
|
+
lockInterimPKGroundTruth(state: DatabaseDocumentation, confidenceThreshold?: number): {
|
|
39
|
+
locked: number;
|
|
40
|
+
unlocked: number;
|
|
41
|
+
};
|
|
42
|
+
/**
|
|
43
|
+
* Two-pass PK pruning using a potentially stronger model.
|
|
44
|
+
*/
|
|
45
|
+
prunePrimaryKeys(state: DatabaseDocumentation, run: AnalysisRun): Promise<{
|
|
46
|
+
removed: number;
|
|
47
|
+
kept: number;
|
|
48
|
+
}>;
|
|
49
|
+
/**
|
|
50
|
+
* Two-pass FK pruning using a potentially stronger model.
|
|
51
|
+
* Pass 1: Per-table — evaluate each table's unlocked FKs, propose removals.
|
|
52
|
+
* Pass 2: Holistic — review all proposed removals at once for final decision.
|
|
53
|
+
* Locked FKs (interim ground truth) are never touched.
|
|
54
|
+
*/
|
|
55
|
+
pruneForeignKeys(state: DatabaseDocumentation, run: AnalysisRun): Promise<{
|
|
56
|
+
removed: number;
|
|
57
|
+
kept: number;
|
|
58
|
+
}>;
|
|
27
59
|
/**
|
|
28
60
|
* Process a single dependency level
|
|
29
61
|
*/
|
|
@@ -39,6 +71,12 @@ export declare class AnalysisEngine {
|
|
|
39
71
|
* Build context for table analysis
|
|
40
72
|
*/
|
|
41
73
|
private buildTableContext;
|
|
74
|
+
/**
|
|
75
|
+
* Build FK candidate stats from the discovery phase for this table.
|
|
76
|
+
* Provides the LLM with cross-table relationship evidence (value overlap,
|
|
77
|
+
* cardinality ratio) to make better FK decisions.
|
|
78
|
+
*/
|
|
79
|
+
private buildFKCandidateStats;
|
|
42
80
|
/**
|
|
43
81
|
* Build ground truth context for a table from config
|
|
44
82
|
*/
|
|
@@ -91,6 +129,12 @@ export declare class AnalysisEngine {
|
|
|
91
129
|
* For now, this method is disabled to prevent brittle regex-based FK detection.
|
|
92
130
|
*/
|
|
93
131
|
private extractAndFeedbackFKInsights;
|
|
132
|
+
/**
|
|
133
|
+
* Process PK proposal from LLM. The LLM can propose a PK, but ALL proposed columns
|
|
134
|
+
* must pass deterministic eligibility: zero nulls, zero blanks, 100% unique values.
|
|
135
|
+
* If any column fails, the entire proposal is rejected.
|
|
136
|
+
*/
|
|
137
|
+
private processPKInsightFromLLM;
|
|
94
138
|
/**
|
|
95
139
|
* Process structured FK insights from LLM and create feedback to discovery phase
|
|
96
140
|
*
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"AnalysisEngine.d.ts","sourceRoot":"","sources":["../../src/core/AnalysisEngine.ts"],"names":[],"mappings":"AAAA;;;GAGG;AAEH,OAAO,EAAE,qBAAqB,EAAE,WAAW,EAAE,gBAAgB,EAAqC,MAAM,mBAAmB,CAAC;
|
|
1
|
+
{"version":3,"file":"AnalysisEngine.d.ts","sourceRoot":"","sources":["../../src/core/AnalysisEngine.ts"],"names":[],"mappings":"AAAA;;;GAGG;AAEH,OAAO,EAAE,qBAAqB,EAAE,WAAW,EAAE,gBAAgB,EAAqC,MAAM,mBAAmB,CAAC;AAE5H,OAAO,EAAE,SAAS,EAAE,sBAAsB,EAAiD,MAAM,sBAAsB,CAAC;AAUxH,OAAO,EAAE,eAAe,EAAuC,MAAM,oBAAoB,CAAC;AAC1F,OAAO,EAAE,YAAY,EAAE,MAAM,4BAA4B,CAAC;AAC1D,OAAO,EAAE,YAAY,EAAE,MAAM,0BAA0B,CAAC;AACxD,OAAO,EAAE,gBAAgB,EAAE,MAAM,8BAA8B,CAAC;AAKhE,qBAAa,cAAc;IAUvB,OAAO,CAAC,MAAM;IACd,OAAO,CAAC,YAAY;IACpB,OAAO,CAAC,YAAY;IACpB,OAAO,CAAC,gBAAgB;IAZ1B,OAAO,CAAC,qBAAqB,CAAwB;IACrD,OAAO,CAAC,mBAAmB,CAAsB;IACjD,OAAO,CAAC,iBAAiB,CAAoB;IAC7C,OAAO,CAAC,SAAS,CAAa;IAC9B,OAAO,CAAC,UAAU,CAAC,CAAc;IAEjC,OAAO,CAAC,UAAU,CAA4D;gBAGpE,MAAM,EAAE,eAAe,EACvB,YAAY,EAAE,YAAY,EAC1B,YAAY,EAAE,YAAY,EAC1B,gBAAgB,EAAE,gBAAgB,EAC1C,UAAU,CAAC,EAAE,CAAC,OAAO,EAAE,MAAM,EAAE,IAAI,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,OAAO,CAAC,KAAK,IAAI;IA8BxE;;OAEG;IACI,aAAa,CAAC,GAAG,EAAE,WAAW,GAAG,IAAI;IAM5C;;;OAGG;IACI,sBAAsB,CAC3B,KAAK,EAAE,qBAAqB,EAC5B,mBAAmB,GAAE,MAAW,GAC/B;QAAE,MAAM,EAAE,MAAM,CAAC;QAAC,QAAQ,EAAE,MAAM,CAAA;KAAE;IAqBvC;;OAEG;IACI,wBAAwB,CAC7B,KAAK,EAAE,qBAAqB,EAC5B,mBAAmB,GAAE,MAAW,GAC/B;QAAE,MAAM,EAAE,MAAM,CAAC;QAAC,QAAQ,EAAE,MAAM,CAAA;KAAE;IAqBvC;;OAEG;IACU,gBAAgB,CAC3B,KAAK,EAAE,qBAAqB,EAC5B,GAAG,EAAE,WAAW,GACf,OAAO,CAAC;QAAE,OAAO,EAAE,MAAM,CAAC;QAAC,IAAI,EAAE,MAAM,CAAA;KAAE,CAAC;IAyF3C;;;;;KAKC;IACU,gBAAgB,CAC3B,KAAK,EAAE,qBAAqB,EAC5B,GAAG,EAAE,WAAW,GACf,OAAO,CAAC;QAAE,OAAO,EAAE,MAAM,CAAC;QAAC,IAAI,EAAE,MAAM,CAAA;KAAE,CAAC;IA4L7C;;OAEG;IACU,YAAY,CACvB,KAAK,EAAE,qBAAqB,EAC5B,GAAG,EAAE,WAAW,EAChB,KAAK,EAAE,MAAM,EACb,MAAM,EAAE,SAAS,EAAE,GAClB,OAAO,CAAC;QAAE,QAAQ,EAAE,sBAAsB,EAAE,CAAC;QAAC,iBAAiB,EAAE,OAAO,CAAA;KAAE,CAAC;IAgC9E;;OAEG;YACW,YAAY;IA+K1B;;OAEG;IACH,OAAO,CAAC,iBAAiB;IA+DzB;;;;OAIG;IACH,OAAO,CAAC,qBAAqB;IAyB7B;;OAEG;IACH,OAAO,CAAC,uBAAuB;IA+B/B;;OAEG;YACW,mBAAmB;IAuEjC;;;OAGG;IACU,iCAAiC,CAC5C,KAAK,EAAE,qBAAqB,EAC5B,GAAG,EAAE,WAAW,EAChB,KAAK,EAAE,MAAM,EACb,MAAM,EAAE,SAAS,EAAE,GAClB,OAAO,CAAC,OAAO,CAAC;IAwEnB;;;OAGG;IACU,6BAA6B,CACxC,KAAK,EAAE,qBAAqB,EAC5B,GAAG,EAAE,WAAW,EAChB,MAAM,EAAE,gBAAgB,GACvB,OAAO,CAAC,OAAO,CAAC;IA6FnB;;;OAGG;IACU,6BAA6B,CACxC,KAAK,EAAE,qBAAqB,EAC5B,GAAG,EAAE,WAAW,GACf,OAAO,CAAC,OAAO,CAAC;IA+EnB;;OAEG;IACI,gBAAgB,CAAC,KAAK,EAAE,qBAAqB,EAAE,GAAG,EAAE,WAAW,GAAG,OAAO;IAWhF;;OAEG;IACU,sBAAsB,CACjC,KAAK,EAAE,qBAAqB,EAC5B,GAAG,EAAE,WAAW,EAChB,QAAQ,EAAE,sBAAsB,EAAE,GACjC,OAAO,CAAC,IAAI,CAAC;IAahB;;;;;;;;;;;;;;;;;;;OAmBG;IACH,OAAO,CAAC,4BAA4B;IAWpC;;;;OAIG;IACH,OAAO,CAAC,uBAAuB;IA6H/B;;;;;OAKG;IACH,OAAO,CAAC,wBAAwB;IAgJhC;;OAEG;IACH,OAAO,CAAC,iBAAiB;IAezB;;;OAGG;IACH,OAAO,CAAC,uBAAuB;IAoE/B;;;OAGG;IACH,OAAO,CAAC,kBAAkB;CAiE3B"}
|
|
@@ -2,6 +2,7 @@
|
|
|
2
2
|
* Main analysis orchestrator
|
|
3
3
|
* Coordinates the entire documentation generation workflow
|
|
4
4
|
*/
|
|
5
|
+
import { ensureArray } from "../utils/ensureArray.js";
|
|
5
6
|
import { BackpropagationEngine } from './BackpropagationEngine.js';
|
|
6
7
|
import { ConvergenceDetector } from './ConvergenceDetector.js';
|
|
7
8
|
import { GuardrailsManager } from './GuardrailsManager.js';
|
|
@@ -34,6 +35,293 @@ export class AnalysisEngine {
|
|
|
34
35
|
this.currentRun = run;
|
|
35
36
|
this.guardrailsManager.startPhase('analysis');
|
|
36
37
|
}
|
|
38
|
+
/**
|
|
39
|
+
* Lock interim ground truth: FKs with confidence ≥ threshold become immutable.
|
|
40
|
+
* Call this AFTER the iterative analysis completes but BEFORE the pruning pass.
|
|
41
|
+
*/
|
|
42
|
+
lockInterimGroundTruth(state, confidenceThreshold = 90) {
|
|
43
|
+
const discoveryPhase = state.phases.keyDetection;
|
|
44
|
+
if (!discoveryPhase)
|
|
45
|
+
return { locked: 0, unlocked: 0 };
|
|
46
|
+
let locked = 0;
|
|
47
|
+
let unlocked = 0;
|
|
48
|
+
for (const fk of discoveryPhase.discovered.foreignKeys) {
|
|
49
|
+
if (fk.status === 'rejected')
|
|
50
|
+
continue;
|
|
51
|
+
if (fk.confidence >= confidenceThreshold) {
|
|
52
|
+
fk.status = 'confirmed';
|
|
53
|
+
locked++;
|
|
54
|
+
}
|
|
55
|
+
else {
|
|
56
|
+
unlocked++;
|
|
57
|
+
}
|
|
58
|
+
}
|
|
59
|
+
console.log(`[AnalysisEngine] Interim ground truth locked: ${locked} FKs at ≥${confidenceThreshold}% confidence, ${unlocked} unlocked for pruning`);
|
|
60
|
+
this.onProgress('Interim ground truth locked', { locked, unlocked, threshold: confidenceThreshold });
|
|
61
|
+
return { locked, unlocked };
|
|
62
|
+
}
|
|
63
|
+
/**
|
|
64
|
+
* Lock high-confidence PK candidates as interim ground truth.
|
|
65
|
+
*/
|
|
66
|
+
lockInterimPKGroundTruth(state, confidenceThreshold = 90) {
|
|
67
|
+
const discoveryPhase = state.phases.keyDetection;
|
|
68
|
+
if (!discoveryPhase)
|
|
69
|
+
return { locked: 0, unlocked: 0 };
|
|
70
|
+
let locked = 0;
|
|
71
|
+
let unlocked = 0;
|
|
72
|
+
for (const pk of discoveryPhase.discovered.primaryKeys) {
|
|
73
|
+
if (pk.status === 'rejected')
|
|
74
|
+
continue;
|
|
75
|
+
if (pk.confidence >= confidenceThreshold) {
|
|
76
|
+
pk.status = 'confirmed';
|
|
77
|
+
locked++;
|
|
78
|
+
}
|
|
79
|
+
else {
|
|
80
|
+
unlocked++;
|
|
81
|
+
}
|
|
82
|
+
}
|
|
83
|
+
console.log(`[AnalysisEngine] Interim PK ground truth locked: ${locked} PKs at >=${confidenceThreshold}% confidence, ${unlocked} unlocked for pruning`);
|
|
84
|
+
this.onProgress('Interim PK ground truth locked', { locked, unlocked, threshold: confidenceThreshold });
|
|
85
|
+
return { locked, unlocked };
|
|
86
|
+
}
|
|
87
|
+
/**
|
|
88
|
+
* Two-pass PK pruning using a potentially stronger model.
|
|
89
|
+
*/
|
|
90
|
+
async prunePrimaryKeys(state, run) {
|
|
91
|
+
const discoveryPhase = state.phases.keyDetection;
|
|
92
|
+
if (!discoveryPhase)
|
|
93
|
+
return { removed: 0, kept: 0 };
|
|
94
|
+
const override = this.config.ai.modelOverrides?.['pkPruning'] ?? this.config.ai.modelOverrides?.['fkPruning'];
|
|
95
|
+
const effectiveModel = override?.model ?? this.config.ai.model;
|
|
96
|
+
const allTables = state.schemas.flatMap(s => s.tables.map(t => {
|
|
97
|
+
const pk = discoveryPhase.discovered.primaryKeys.find(p => p.schemaName === s.name && p.tableName === t.name && p.status === 'confirmed');
|
|
98
|
+
return { schema: s.name, name: t.name, description: t.description || '', pk: pk ? pk.columnNames.join(', ') : '' };
|
|
99
|
+
}));
|
|
100
|
+
const allPKs = discoveryPhase.discovered.primaryKeys.filter(pk => pk.status !== 'rejected');
|
|
101
|
+
const pksByTable = new Map();
|
|
102
|
+
for (const pk of allPKs) {
|
|
103
|
+
const key = `${pk.schemaName}.${pk.tableName}`;
|
|
104
|
+
if (!pksByTable.has(key))
|
|
105
|
+
pksByTable.set(key, []);
|
|
106
|
+
pksByTable.get(key).push(pk);
|
|
107
|
+
}
|
|
108
|
+
this.onProgress('PK pruning pass 1: per-table analysis', { tables: pksByTable.size, model: effectiveModel });
|
|
109
|
+
const allProposals = [];
|
|
110
|
+
for (const [tableKey, tablePKs] of pksByTable.entries()) {
|
|
111
|
+
const hasUnlocked = tablePKs.some(pk => pk.status !== 'confirmed');
|
|
112
|
+
if (!hasUnlocked)
|
|
113
|
+
continue;
|
|
114
|
+
const [schemaName, tableName] = tableKey.split('.');
|
|
115
|
+
const table = this.stateManager.findTable(state, schemaName, tableName);
|
|
116
|
+
const candidates = tablePKs.map(pk => ({ columns: pk.columnNames, confidence: pk.confidence, locked: pk.status === 'confirmed' }));
|
|
117
|
+
const context = { sourceSchema: schemaName, sourceTable: tableName, tableDescription: table?.description || '', allTables, candidates, seedContext: state.seedContext ?? this.config.seedContext };
|
|
118
|
+
const result = await this.promptEngine.executePrompt('pk-pruning-table', context, { responseFormat: 'JSON', temperature: override?.temperature ?? 0.05, maxTokens: override?.maxTokens ?? this.config.ai.maxTokens, modelOverride: override?.model, effortLevelOverride: override?.effortLevel });
|
|
119
|
+
if (!result.success || !result.result) {
|
|
120
|
+
console.log(`[AnalysisEngine] PK pruning failed for ${tableKey}: ${result.errorMessage}`);
|
|
121
|
+
continue;
|
|
122
|
+
}
|
|
123
|
+
try {
|
|
124
|
+
for (const proposal of ensureArray(result.result, "PK pruning per-table")) {
|
|
125
|
+
if (proposal.action === 'remove' && proposal.index >= 1 && proposal.index <= tablePKs.length) {
|
|
126
|
+
const pk = tablePKs[proposal.index - 1];
|
|
127
|
+
if (pk.status === 'confirmed') {
|
|
128
|
+
console.log(`[AnalysisEngine] BLOCKED removal of locked PK: ${tableKey} [${pk.columnNames.join(', ')}]`);
|
|
129
|
+
continue;
|
|
130
|
+
}
|
|
131
|
+
allProposals.push({ pk, reasoning: proposal.reasoning, sourceSchema: pk.schemaName, sourceTable: pk.tableName, columns: pk.columnNames, confidence: pk.confidence });
|
|
132
|
+
}
|
|
133
|
+
}
|
|
134
|
+
}
|
|
135
|
+
catch (pruneErr) {
|
|
136
|
+
console.log(`[AnalysisEngine] PK pruning error for ${tableKey}: ${pruneErr.message}`);
|
|
137
|
+
}
|
|
138
|
+
}
|
|
139
|
+
this.onProgress('PK pruning pass 1 complete', { proposals: allProposals.length });
|
|
140
|
+
if (allProposals.length === 0)
|
|
141
|
+
return { removed: 0, kept: allPKs.length };
|
|
142
|
+
this.onProgress('PK pruning pass 2: holistic review', { proposals: allProposals.length, model: effectiveModel });
|
|
143
|
+
const holisticContext = { allTables, proposals: allProposals.map(p => ({ sourceSchema: p.sourceSchema, sourceTable: p.sourceTable, columns: p.columns, confidence: p.confidence, reasoning: p.reasoning })), seedContext: state.seedContext ?? this.config.seedContext };
|
|
144
|
+
const holisticResult = await this.promptEngine.executePrompt('pk-pruning-holistic', holisticContext, { responseFormat: 'JSON', temperature: override?.temperature ?? 0.05, maxTokens: override?.maxTokens ?? this.config.ai.maxTokens, modelOverride: override?.model, effortLevelOverride: override?.effortLevel });
|
|
145
|
+
let removed = 0;
|
|
146
|
+
if (holisticResult.success && holisticResult.result) {
|
|
147
|
+
for (const decision of ensureArray(holisticResult.result, "holistic pruning")) {
|
|
148
|
+
if (decision.action === 'remove' && decision.index >= 1 && decision.index <= allProposals.length) {
|
|
149
|
+
const proposal = allProposals[decision.index - 1];
|
|
150
|
+
proposal.pk.status = 'rejected';
|
|
151
|
+
removed++;
|
|
152
|
+
console.log(`[AnalysisEngine] Pruned PK: ${proposal.sourceSchema}.${proposal.sourceTable} [${proposal.columns.join(', ')}] - ${decision.reasoning}`);
|
|
153
|
+
}
|
|
154
|
+
}
|
|
155
|
+
}
|
|
156
|
+
else {
|
|
157
|
+
console.log(`[AnalysisEngine] Holistic PK pruning failed: ${holisticResult.errorMessage}. Applying pass 1 proposals.`);
|
|
158
|
+
for (const proposal of allProposals) {
|
|
159
|
+
proposal.pk.status = 'rejected';
|
|
160
|
+
removed++;
|
|
161
|
+
}
|
|
162
|
+
}
|
|
163
|
+
const kept = allPKs.length - removed;
|
|
164
|
+
console.log(`[AnalysisEngine] PK pruning complete: ${removed} removed, ${kept} kept (model: ${effectiveModel})`);
|
|
165
|
+
this.onProgress('PK pruning complete', { removed, kept, model: effectiveModel });
|
|
166
|
+
this.stateManager.updateSummary(state);
|
|
167
|
+
await this.stateManager.save(state);
|
|
168
|
+
return { removed, kept };
|
|
169
|
+
}
|
|
170
|
+
/**
|
|
171
|
+
* Two-pass FK pruning using a potentially stronger model.
|
|
172
|
+
* Pass 1: Per-table — evaluate each table's unlocked FKs, propose removals.
|
|
173
|
+
* Pass 2: Holistic — review all proposed removals at once for final decision.
|
|
174
|
+
* Locked FKs (interim ground truth) are never touched.
|
|
175
|
+
*/
|
|
176
|
+
async pruneForeignKeys(state, run) {
|
|
177
|
+
const discoveryPhase = state.phases.keyDetection;
|
|
178
|
+
if (!discoveryPhase)
|
|
179
|
+
return { removed: 0, kept: 0 };
|
|
180
|
+
const override = this.config.ai.modelOverrides?.['fkPruning'];
|
|
181
|
+
const effectiveModel = override?.model ?? this.config.ai.model;
|
|
182
|
+
// Build table info for context
|
|
183
|
+
const allTables = state.schemas.flatMap(s => s.tables.map(t => {
|
|
184
|
+
const pk = discoveryPhase.discovered.primaryKeys.find(p => p.schemaName === s.name && p.tableName === t.name);
|
|
185
|
+
return {
|
|
186
|
+
schema: s.name,
|
|
187
|
+
name: t.name,
|
|
188
|
+
description: t.description || '',
|
|
189
|
+
pk: pk ? pk.columnNames.join(', ') : ''
|
|
190
|
+
};
|
|
191
|
+
}));
|
|
192
|
+
// Group non-rejected FKs by source table
|
|
193
|
+
const allFKs = discoveryPhase.discovered.foreignKeys.filter(fk => fk.status !== 'rejected');
|
|
194
|
+
const fksByTable = new Map();
|
|
195
|
+
for (const fk of allFKs) {
|
|
196
|
+
const key = `${fk.schemaName}.${fk.sourceTable}`;
|
|
197
|
+
if (!fksByTable.has(key))
|
|
198
|
+
fksByTable.set(key, []);
|
|
199
|
+
fksByTable.get(key).push(fk);
|
|
200
|
+
}
|
|
201
|
+
// ==================== PASS 1: Per-table pruning proposals ====================
|
|
202
|
+
this.onProgress('FK pruning pass 1: per-table analysis', { tables: fksByTable.size, model: effectiveModel });
|
|
203
|
+
const allProposals = [];
|
|
204
|
+
let tableIdx = 0;
|
|
205
|
+
for (const [tableKey, tableFKs] of fksByTable.entries()) {
|
|
206
|
+
tableIdx++;
|
|
207
|
+
// Skip tables where ALL FKs are locked
|
|
208
|
+
const hasUnlocked = tableFKs.some(fk => fk.status !== 'confirmed');
|
|
209
|
+
if (!hasUnlocked)
|
|
210
|
+
continue;
|
|
211
|
+
if (tableIdx % 10 === 1) {
|
|
212
|
+
this.onProgress(`FK pruning: table ${tableIdx}/${fksByTable.size}`);
|
|
213
|
+
}
|
|
214
|
+
const [schemaName, tableName] = tableKey.split('.');
|
|
215
|
+
const table = this.stateManager.findTable(state, schemaName, tableName);
|
|
216
|
+
const candidates = tableFKs.map(fk => ({
|
|
217
|
+
sourceColumn: fk.sourceColumn,
|
|
218
|
+
targetSchema: fk.targetSchema,
|
|
219
|
+
targetTable: fk.targetTable,
|
|
220
|
+
targetColumn: fk.targetColumn,
|
|
221
|
+
confidence: fk.confidence,
|
|
222
|
+
locked: fk.status === 'confirmed'
|
|
223
|
+
}));
|
|
224
|
+
const context = {
|
|
225
|
+
sourceSchema: schemaName,
|
|
226
|
+
sourceTable: tableName,
|
|
227
|
+
tableDescription: table?.description || '',
|
|
228
|
+
allTables,
|
|
229
|
+
candidates,
|
|
230
|
+
seedContext: state.seedContext ?? this.config.seedContext
|
|
231
|
+
};
|
|
232
|
+
const result = await this.promptEngine.executePrompt('fk-pruning-table', context, {
|
|
233
|
+
responseFormat: 'JSON',
|
|
234
|
+
temperature: override?.temperature ?? 0.05,
|
|
235
|
+
maxTokens: override?.maxTokens ?? this.config.ai.maxTokens,
|
|
236
|
+
modelOverride: override?.model,
|
|
237
|
+
effortLevelOverride: override?.effortLevel
|
|
238
|
+
});
|
|
239
|
+
if (!result.success || !result.result) {
|
|
240
|
+
console.log(`[AnalysisEngine] FK pruning failed for ${tableKey}: ${result.errorMessage}`);
|
|
241
|
+
continue;
|
|
242
|
+
}
|
|
243
|
+
try {
|
|
244
|
+
for (const proposal of ensureArray(result.result, "FK pruning per-table")) {
|
|
245
|
+
if (proposal.action === 'remove' && proposal.index >= 1 && proposal.index <= tableFKs.length) {
|
|
246
|
+
const fk = tableFKs[proposal.index - 1];
|
|
247
|
+
if (fk.status === 'confirmed') {
|
|
248
|
+
console.log(`[AnalysisEngine] BLOCKED removal of locked FK: ${tableKey}.${fk.sourceColumn} -> ${fk.targetTable}.${fk.targetColumn}`);
|
|
249
|
+
continue;
|
|
250
|
+
}
|
|
251
|
+
allProposals.push({
|
|
252
|
+
fk,
|
|
253
|
+
reasoning: proposal.reasoning,
|
|
254
|
+
sourceSchema: fk.schemaName,
|
|
255
|
+
sourceTable: fk.sourceTable,
|
|
256
|
+
sourceColumn: fk.sourceColumn,
|
|
257
|
+
targetSchema: fk.targetSchema,
|
|
258
|
+
targetTable: fk.targetTable,
|
|
259
|
+
targetColumn: fk.targetColumn,
|
|
260
|
+
confidence: fk.confidence
|
|
261
|
+
});
|
|
262
|
+
}
|
|
263
|
+
}
|
|
264
|
+
}
|
|
265
|
+
catch (pruneErr) {
|
|
266
|
+
console.log(`[AnalysisEngine] FK pruning error for ${tableKey}: ${pruneErr.message}`);
|
|
267
|
+
}
|
|
268
|
+
console.log(`[AnalysisEngine] FK pruning ${tableKey}: ${result.result.length} removals proposed`);
|
|
269
|
+
}
|
|
270
|
+
console.log(`[AnalysisEngine] Pass 1 complete: ${allProposals.length} total removals proposed`);
|
|
271
|
+
this.onProgress('FK pruning pass 1 complete', { proposals: allProposals.length });
|
|
272
|
+
if (allProposals.length === 0) {
|
|
273
|
+
return { removed: 0, kept: allFKs.length };
|
|
274
|
+
}
|
|
275
|
+
// ==================== PASS 2: Holistic review of all proposals ====================
|
|
276
|
+
this.onProgress('FK pruning pass 2: holistic review', { proposals: allProposals.length, model: effectiveModel });
|
|
277
|
+
const holisticContext = {
|
|
278
|
+
allTables,
|
|
279
|
+
proposals: allProposals.map(p => ({
|
|
280
|
+
sourceSchema: p.sourceSchema,
|
|
281
|
+
sourceTable: p.sourceTable,
|
|
282
|
+
sourceColumn: p.sourceColumn,
|
|
283
|
+
targetSchema: p.targetSchema,
|
|
284
|
+
targetTable: p.targetTable,
|
|
285
|
+
targetColumn: p.targetColumn,
|
|
286
|
+
confidence: p.confidence,
|
|
287
|
+
reasoning: p.reasoning
|
|
288
|
+
})),
|
|
289
|
+
seedContext: state.seedContext ?? this.config.seedContext
|
|
290
|
+
};
|
|
291
|
+
const holisticResult = await this.promptEngine.executePrompt('fk-pruning-holistic', holisticContext, {
|
|
292
|
+
responseFormat: 'JSON',
|
|
293
|
+
temperature: override?.temperature ?? 0.05,
|
|
294
|
+
maxTokens: override?.maxTokens ?? this.config.ai.maxTokens,
|
|
295
|
+
modelOverride: override?.model,
|
|
296
|
+
effortLevelOverride: override?.effortLevel
|
|
297
|
+
});
|
|
298
|
+
let removed = 0;
|
|
299
|
+
if (holisticResult.success && holisticResult.result) {
|
|
300
|
+
for (const decision of ensureArray(holisticResult.result, "holistic pruning")) {
|
|
301
|
+
if (decision.action === 'remove' && decision.index >= 1 && decision.index <= allProposals.length) {
|
|
302
|
+
const proposal = allProposals[decision.index - 1];
|
|
303
|
+
proposal.fk.status = 'rejected';
|
|
304
|
+
removed++;
|
|
305
|
+
console.log(`[AnalysisEngine] Pruned FK: ${proposal.sourceSchema}.${proposal.sourceTable}.${proposal.sourceColumn} -> ${proposal.targetSchema}.${proposal.targetTable}.${proposal.targetColumn} — ${decision.reasoning}`);
|
|
306
|
+
}
|
|
307
|
+
}
|
|
308
|
+
}
|
|
309
|
+
else {
|
|
310
|
+
console.log(`[AnalysisEngine] Holistic pruning failed: ${holisticResult.errorMessage}. Falling back to pass 1 proposals.`);
|
|
311
|
+
// Fallback: apply all pass 1 proposals directly
|
|
312
|
+
for (const proposal of allProposals) {
|
|
313
|
+
proposal.fk.status = 'rejected';
|
|
314
|
+
removed++;
|
|
315
|
+
}
|
|
316
|
+
}
|
|
317
|
+
const kept = allFKs.length - removed;
|
|
318
|
+
console.log(`[AnalysisEngine] FK pruning complete: ${removed} removed, ${kept} kept (model: ${effectiveModel})`);
|
|
319
|
+
this.onProgress('FK pruning complete', { removed, kept, model: effectiveModel });
|
|
320
|
+
// Save state
|
|
321
|
+
this.stateManager.updateSummary(state);
|
|
322
|
+
await this.stateManager.save(state);
|
|
323
|
+
return { removed, kept };
|
|
324
|
+
}
|
|
37
325
|
/**
|
|
38
326
|
* Process a single dependency level
|
|
39
327
|
*/
|
|
@@ -124,6 +412,10 @@ export class AnalysisEngine {
|
|
|
124
412
|
if (state.phases.keyDetection && result.result.foreignKeys) {
|
|
125
413
|
this.processFKInsightsFromLLM(state, tableNode.schema, tableNode.table, result.result.foreignKeys);
|
|
126
414
|
}
|
|
415
|
+
// Process PK proposal from LLM — verify eligibility deterministically
|
|
416
|
+
if (state.phases.keyDetection && result.result.primaryKey) {
|
|
417
|
+
this.processPKInsightFromLLM(state, tableNode.schema, tableNode.table, result.result.primaryKey);
|
|
418
|
+
}
|
|
127
419
|
// Update inferred business domain
|
|
128
420
|
if (result.result.inferredBusinessDomain) {
|
|
129
421
|
// Could store this in table metadata if needed
|
|
@@ -184,6 +476,8 @@ export class AnalysisEngine {
|
|
|
184
476
|
}
|
|
185
477
|
// Build ground truth context if available
|
|
186
478
|
const groundTruthContext = this.buildGroundTruthContext(tableNode.schema, tableNode.table);
|
|
479
|
+
// Build FK candidate stats from discovery phase for LLM context
|
|
480
|
+
const fkCandidateStats = this.buildFKCandidateStats(state, tableNode.schema, tableNode.table);
|
|
187
481
|
return {
|
|
188
482
|
schema: tableNode.schema,
|
|
189
483
|
table: tableNode.table,
|
|
@@ -207,9 +501,33 @@ export class AnalysisEngine {
|
|
|
207
501
|
userNotes: table.userNotes,
|
|
208
502
|
seedContext: state.seedContext ?? this.config.seedContext,
|
|
209
503
|
allTables,
|
|
210
|
-
groundTruth: groundTruthContext
|
|
504
|
+
groundTruth: groundTruthContext,
|
|
505
|
+
fkCandidateStats
|
|
211
506
|
};
|
|
212
507
|
}
|
|
508
|
+
/**
|
|
509
|
+
* Build FK candidate stats from the discovery phase for this table.
|
|
510
|
+
* Provides the LLM with cross-table relationship evidence (value overlap,
|
|
511
|
+
* cardinality ratio) to make better FK decisions.
|
|
512
|
+
*/
|
|
513
|
+
buildFKCandidateStats(state, schemaName, tableName) {
|
|
514
|
+
const discoveryPhase = state.phases.keyDetection;
|
|
515
|
+
if (!discoveryPhase)
|
|
516
|
+
return [];
|
|
517
|
+
return discoveryPhase.discovered.foreignKeys
|
|
518
|
+
.filter(fk => fk.schemaName === schemaName &&
|
|
519
|
+
fk.sourceTable === tableName &&
|
|
520
|
+
fk.status !== 'rejected')
|
|
521
|
+
.map(fk => ({
|
|
522
|
+
sourceColumn: fk.sourceColumn,
|
|
523
|
+
targetSchema: fk.targetSchema,
|
|
524
|
+
targetTable: fk.targetTable,
|
|
525
|
+
targetColumn: fk.targetColumn,
|
|
526
|
+
valueOverlap: fk.evidence.valueOverlap,
|
|
527
|
+
cardinalityRatio: fk.evidence.cardinalityRatio,
|
|
528
|
+
confidence: fk.confidence
|
|
529
|
+
}));
|
|
530
|
+
}
|
|
213
531
|
/**
|
|
214
532
|
* Build ground truth context for a table from config
|
|
215
533
|
*/
|
|
@@ -539,6 +857,114 @@ export class AnalysisEngine {
|
|
|
539
857
|
console.log(`[AnalysisEngine] extractAndFeedbackFKInsights disabled - awaiting structured FK output from LLM`);
|
|
540
858
|
return;
|
|
541
859
|
}
|
|
860
|
+
/**
|
|
861
|
+
* Process PK proposal from LLM. The LLM can propose a PK, but ALL proposed columns
|
|
862
|
+
* must pass deterministic eligibility: zero nulls, zero blanks, 100% unique values.
|
|
863
|
+
* If any column fails, the entire proposal is rejected.
|
|
864
|
+
*/
|
|
865
|
+
processPKInsightFromLLM(state, schemaName, tableName, pkProposal) {
|
|
866
|
+
const discoveryPhase = state.phases.keyDetection;
|
|
867
|
+
if (!discoveryPhase || !pkProposal || !pkProposal.columns || pkProposal.columns.length === 0)
|
|
868
|
+
return;
|
|
869
|
+
const columns = pkProposal.columns;
|
|
870
|
+
const confidence = Math.round(pkProposal.confidence * 100);
|
|
871
|
+
// Check if we already have a confirmed PK for this table
|
|
872
|
+
const existingConfirmedPK = discoveryPhase.discovered.primaryKeys.find(pk => pk.schemaName === schemaName &&
|
|
873
|
+
pk.tableName === tableName &&
|
|
874
|
+
pk.status === 'confirmed');
|
|
875
|
+
if (existingConfirmedPK) {
|
|
876
|
+
// Already have a confirmed PK — check if LLM agrees
|
|
877
|
+
const sameColumns = existingConfirmedPK.columnNames.length === columns.length &&
|
|
878
|
+
existingConfirmedPK.columnNames.every(c => columns.some(pc => pc.toLowerCase() === c.toLowerCase()));
|
|
879
|
+
if (sameColumns) {
|
|
880
|
+
// LLM agrees with existing PK — boost confidence
|
|
881
|
+
existingConfirmedPK.confidence = Math.min(existingConfirmedPK.confidence + 10, 100);
|
|
882
|
+
existingConfirmedPK.validatedByLLM = true;
|
|
883
|
+
console.log(`[AnalysisEngine] LLM confirmed existing PK: ${schemaName}.${tableName} (${columns.join(', ')}), confidence: ${existingConfirmedPK.confidence}`);
|
|
884
|
+
}
|
|
885
|
+
else {
|
|
886
|
+
// LLM disagrees — log but don't override a confirmed PK
|
|
887
|
+
console.log(`[AnalysisEngine] LLM proposed different PK for ${schemaName}.${tableName}: [${columns.join(', ')}] vs confirmed [${existingConfirmedPK.columnNames.join(', ')}] — keeping confirmed`);
|
|
888
|
+
}
|
|
889
|
+
return;
|
|
890
|
+
}
|
|
891
|
+
// Check if an existing candidate matches
|
|
892
|
+
const existingCandidate = discoveryPhase.discovered.primaryKeys.find(pk => pk.schemaName === schemaName &&
|
|
893
|
+
pk.tableName === tableName &&
|
|
894
|
+
pk.columnNames.length === columns.length &&
|
|
895
|
+
pk.columnNames.every(c => columns.some(pc => pc.toLowerCase() === c.toLowerCase())));
|
|
896
|
+
if (existingCandidate) {
|
|
897
|
+
// LLM confirms a stats candidate — promote and boost
|
|
898
|
+
existingCandidate.validatedByLLM = true;
|
|
899
|
+
existingCandidate.status = 'confirmed';
|
|
900
|
+
existingCandidate.confidence = Math.min(existingCandidate.confidence + 20, 100);
|
|
901
|
+
console.log(`[AnalysisEngine] LLM confirmed PK candidate: ${schemaName}.${tableName} (${columns.join(', ')}), confidence: ${existingCandidate.confidence}`);
|
|
902
|
+
// Update column flags
|
|
903
|
+
for (const colName of columns) {
|
|
904
|
+
const column = this.findColumnInState(state, schemaName, tableName, colName);
|
|
905
|
+
if (column)
|
|
906
|
+
column.isPrimaryKey = true;
|
|
907
|
+
}
|
|
908
|
+
return;
|
|
909
|
+
}
|
|
910
|
+
// New PK proposal — verify ALL columns are PK-eligible deterministically
|
|
911
|
+
const table = this.stateManager.findTable(state, schemaName, tableName);
|
|
912
|
+
if (!table)
|
|
913
|
+
return;
|
|
914
|
+
for (const colName of columns) {
|
|
915
|
+
const column = table.columns.find(c => c.name.toLowerCase() === colName.toLowerCase());
|
|
916
|
+
if (!column) {
|
|
917
|
+
console.log(`[AnalysisEngine] LLM PK rejected: ${schemaName}.${tableName} — column "${colName}" not found`);
|
|
918
|
+
return;
|
|
919
|
+
}
|
|
920
|
+
// Check PK eligibility from stats
|
|
921
|
+
const stats = column.statistics;
|
|
922
|
+
if (!stats) {
|
|
923
|
+
console.log(`[AnalysisEngine] LLM PK rejected: ${schemaName}.${tableName}.${colName} — no statistics available`);
|
|
924
|
+
return;
|
|
925
|
+
}
|
|
926
|
+
// Safety: prefer pre-computed uniquenessRatio if available (totalRows can be 0 due to field naming bug)
|
|
927
|
+
const uniqueness = stats.uniquenessRatio != null && stats.uniquenessRatio > 0
|
|
928
|
+
? stats.uniquenessRatio
|
|
929
|
+
: (stats.totalRows > 0 ? stats.distinctCount / stats.totalRows : 0);
|
|
930
|
+
const hasNulls = (stats.nullCount || 0) > 0;
|
|
931
|
+
if (hasNulls) {
|
|
932
|
+
console.log(`[AnalysisEngine] LLM PK rejected: ${schemaName}.${tableName}.${colName} — has ${stats.nullCount} nulls`);
|
|
933
|
+
return;
|
|
934
|
+
}
|
|
935
|
+
if (uniqueness < 1.0) {
|
|
936
|
+
console.log(`[AnalysisEngine] LLM PK rejected: ${schemaName}.${tableName}.${colName} — uniqueness ${(uniqueness * 100).toFixed(1)}% (must be 100%)`);
|
|
937
|
+
return;
|
|
938
|
+
}
|
|
939
|
+
}
|
|
940
|
+
// All columns pass — create new PK candidate
|
|
941
|
+
const newPK = {
|
|
942
|
+
schemaName,
|
|
943
|
+
tableName,
|
|
944
|
+
columnNames: columns,
|
|
945
|
+
confidence,
|
|
946
|
+
evidence: {
|
|
947
|
+
uniqueness: 1.0,
|
|
948
|
+
nullCount: 0,
|
|
949
|
+
totalRows: table.rowCount || 0,
|
|
950
|
+
dataPattern: columns.length > 1 ? 'composite' : 'unknown',
|
|
951
|
+
namingScore: 0.5,
|
|
952
|
+
dataTypeScore: 0.8,
|
|
953
|
+
warnings: ['Created from LLM proposal — passed deterministic eligibility']
|
|
954
|
+
},
|
|
955
|
+
discoveredInIteration: 1,
|
|
956
|
+
validatedByLLM: true,
|
|
957
|
+
status: 'confirmed'
|
|
958
|
+
};
|
|
959
|
+
discoveryPhase.discovered.primaryKeys.push(newPK);
|
|
960
|
+
// Update column flags
|
|
961
|
+
for (const colName of columns) {
|
|
962
|
+
const column = this.findColumnInState(state, schemaName, tableName, colName);
|
|
963
|
+
if (column)
|
|
964
|
+
column.isPrimaryKey = true;
|
|
965
|
+
}
|
|
966
|
+
console.log(`[AnalysisEngine] Created PK from LLM: ${schemaName}.${tableName} (${columns.join(', ')}) confidence: ${confidence}`);
|
|
967
|
+
}
|
|
542
968
|
/**
|
|
543
969
|
* Process structured FK insights from LLM and create feedback to discovery phase
|
|
544
970
|
*
|