@memberjunction/db-auto-doc 5.37.0 → 5.39.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (66) hide show
  1. package/README.md +31 -0
  2. package/dist/core/AnalysisOrchestrator.d.ts.map +1 -1
  3. package/dist/core/AnalysisOrchestrator.js +32 -2
  4. package/dist/core/AnalysisOrchestrator.js.map +1 -1
  5. package/dist/discovery/BridgeViewSQLGenerator.d.ts +67 -0
  6. package/dist/discovery/BridgeViewSQLGenerator.d.ts.map +1 -0
  7. package/dist/discovery/BridgeViewSQLGenerator.js +99 -0
  8. package/dist/discovery/BridgeViewSQLGenerator.js.map +1 -0
  9. package/dist/discovery/ColumnClusterer.d.ts +63 -0
  10. package/dist/discovery/ColumnClusterer.d.ts.map +1 -0
  11. package/dist/discovery/ColumnClusterer.js +205 -0
  12. package/dist/discovery/ColumnClusterer.js.map +1 -0
  13. package/dist/discovery/ColumnNormalizer.d.ts +106 -0
  14. package/dist/discovery/ColumnNormalizer.d.ts.map +1 -0
  15. package/dist/discovery/ColumnNormalizer.js +376 -0
  16. package/dist/discovery/ColumnNormalizer.js.map +1 -0
  17. package/dist/discovery/Composer.d.ts +59 -0
  18. package/dist/discovery/Composer.d.ts.map +1 -0
  19. package/dist/discovery/Composer.js +95 -0
  20. package/dist/discovery/Composer.js.map +1 -0
  21. package/dist/discovery/EmbeddingProvider.d.ts +27 -0
  22. package/dist/discovery/EmbeddingProvider.d.ts.map +1 -0
  23. package/dist/discovery/EmbeddingProvider.js +87 -0
  24. package/dist/discovery/EmbeddingProvider.js.map +1 -0
  25. package/dist/discovery/FKGraphWalker.d.ts +108 -0
  26. package/dist/discovery/FKGraphWalker.d.ts.map +1 -0
  27. package/dist/discovery/FKGraphWalker.js +169 -0
  28. package/dist/discovery/FKGraphWalker.js.map +1 -0
  29. package/dist/discovery/OrganicKeyDetector.d.ts +51 -0
  30. package/dist/discovery/OrganicKeyDetector.d.ts.map +1 -0
  31. package/dist/discovery/OrganicKeyDetector.js +78 -0
  32. package/dist/discovery/OrganicKeyDetector.js.map +1 -0
  33. package/dist/discovery/OrganicKeyTranslator.d.ts +78 -0
  34. package/dist/discovery/OrganicKeyTranslator.d.ts.map +1 -0
  35. package/dist/discovery/OrganicKeyTranslator.js +166 -0
  36. package/dist/discovery/OrganicKeyTranslator.js.map +1 -0
  37. package/dist/discovery/SemanticPhase.d.ts +70 -0
  38. package/dist/discovery/SemanticPhase.d.ts.map +1 -0
  39. package/dist/discovery/SemanticPhase.js +423 -0
  40. package/dist/discovery/SemanticPhase.js.map +1 -0
  41. package/dist/discovery/StructuralPhase.d.ts +24 -0
  42. package/dist/discovery/StructuralPhase.d.ts.map +1 -0
  43. package/dist/discovery/StructuralPhase.js +23 -0
  44. package/dist/discovery/StructuralPhase.js.map +1 -0
  45. package/dist/discovery/TransitiveBridgeDetector.d.ts +65 -0
  46. package/dist/discovery/TransitiveBridgeDetector.d.ts.map +1 -0
  47. package/dist/discovery/TransitiveBridgeDetector.js +244 -0
  48. package/dist/discovery/TransitiveBridgeDetector.js.map +1 -0
  49. package/dist/generators/AdditionalSchemaInfoGenerator.d.ts +12 -0
  50. package/dist/generators/AdditionalSchemaInfoGenerator.d.ts.map +1 -1
  51. package/dist/generators/AdditionalSchemaInfoGenerator.js +31 -0
  52. package/dist/generators/AdditionalSchemaInfoGenerator.js.map +1 -1
  53. package/dist/types/config.d.ts +71 -0
  54. package/dist/types/config.d.ts.map +1 -1
  55. package/dist/types/config.js.map +1 -1
  56. package/dist/types/organic-keys.d.ts +141 -0
  57. package/dist/types/organic-keys.d.ts.map +1 -0
  58. package/dist/types/organic-keys.js +27 -0
  59. package/dist/types/organic-keys.js.map +1 -0
  60. package/dist/types/state.d.ts +7 -0
  61. package/dist/types/state.d.ts.map +1 -1
  62. package/dist/utils/json.d.ts +40 -0
  63. package/dist/utils/json.d.ts.map +1 -0
  64. package/dist/utils/json.js +141 -0
  65. package/dist/utils/json.js.map +1 -0
  66. package/package.json +5 -5
@@ -0,0 +1,376 @@
1
+ /**
2
+ * TableNormalizer — one LLM call per TABLE (not per column), upstream of embedding.
3
+ *
4
+ * For each in-scope table, a single LLM call sees:
5
+ * - The table's name + description + sibling columns
6
+ * - Every column's identity + description + sample values + FK/PK status
7
+ *
8
+ * The call returns one normalized entry per column with:
9
+ * - conceptName : canonical snake_case (`email_address`, `customer_id`, ...)
10
+ * - normalizationStrategy : how values should be compared
11
+ * - normalizedDescription : business-concept-focused, system-agnostic sentence
12
+ * - isUsefulOrganicKey : false for audit/system/free-form (filtered out)
13
+ * - confidence + reasoning
14
+ *
15
+ * Why per-table instead of per-column:
16
+ * - Fewer calls (5K cols across 500 tables → 500 calls instead of 5K). At
17
+ * Gemini Flash pricing, that's ~$0.04 instead of ~$0.20 for APTIFY-scale.
18
+ * - System prompt amortizes across all columns in the table.
19
+ * - The LLM sees siblings as context — knowing the table has FirstName + LastName
20
+ * next to an Email column reveals it's a person email, not a server hostname.
21
+ * - More token-efficient: one JSON array out instead of N independent objects.
22
+ *
23
+ * The single most important constraint: same-concept columns from DIFFERENT
24
+ * TABLES (across systems) must produce the same conceptName and a similar
25
+ * normalizedDescription so the embedding step naturally clusters them. The
26
+ * prompt enforces this via a canonical concept-name list.
27
+ */
28
+ import { createLLMInstance } from '../utils/llm-factory.js';
29
+ import { cleanAndParseJSON } from '../utils/json.js';
30
+ export class TableNormalizer {
31
+ constructor(aiConfig) {
32
+ this.aiConfig = aiConfig;
33
+ this.llm = createLLMInstance(aiConfig.provider, aiConfig.apiKey);
34
+ }
35
+ /** Normalize one table — one LLM call returning per-column entries. */
36
+ async normalizeTable(input, maxRetries = 2) {
37
+ if (input.columns.length === 0) {
38
+ return { normalized: [], tokens: { total: 0, input: 0, output: 0 } };
39
+ }
40
+ const userPrompt = buildUserPrompt(input);
41
+ const params = {
42
+ model: this.aiConfig.model,
43
+ messages: [
44
+ { role: 'system', content: SYSTEM_PROMPT },
45
+ { role: 'user', content: userPrompt },
46
+ ],
47
+ temperature: this.aiConfig.temperature ?? 0,
48
+ maxOutputTokens: this.aiConfig.maxTokens,
49
+ responseFormat: 'JSON',
50
+ };
51
+ let lastError = '';
52
+ let cumTokens = { total: 0, input: 0, output: 0 };
53
+ for (let attempt = 0; attempt <= maxRetries; attempt++) {
54
+ let result;
55
+ try {
56
+ result = await this.llm.ChatCompletion(params);
57
+ }
58
+ catch (err) {
59
+ lastError = `LLM call threw: ${err.message}`;
60
+ continue;
61
+ }
62
+ if (!result.success) {
63
+ lastError = `LLM call failed: ${result.errorMessage ?? 'unknown'}`;
64
+ continue;
65
+ }
66
+ const content = result.data?.choices?.[0]?.message?.content ?? '';
67
+ const usage = result.data?.usage;
68
+ cumTokens = {
69
+ total: cumTokens.total + (usage?.totalTokens ?? 0),
70
+ input: cumTokens.input + (usage?.promptTokens ?? 0),
71
+ output: cumTokens.output + (usage?.completionTokens ?? 0),
72
+ };
73
+ let parsed = null;
74
+ try {
75
+ parsed = cleanAndParseJSON(content);
76
+ }
77
+ catch (err) {
78
+ lastError = `JSON parse threw: ${err.message}. Content prefix: ${content.slice(0, 200)}`;
79
+ if (attempt < maxRetries)
80
+ continue;
81
+ return { normalized: [], tokens: cumTokens, errorMessage: lastError };
82
+ }
83
+ if (!parsed || !Array.isArray(parsed.columns)) {
84
+ lastError = `JSON parse returned bad shape. Content prefix: ${content.slice(0, 200)}`;
85
+ if (attempt < maxRetries)
86
+ continue;
87
+ return { normalized: [], tokens: cumTokens, errorMessage: lastError };
88
+ }
89
+ // Match the LLM's response entries back to input columns by name.
90
+ const byName = new Map(input.columns.map((c) => [c.column.toLowerCase(), c]));
91
+ const normalized = [];
92
+ for (const entry of parsed.columns) {
93
+ if (!entry || typeof entry.column !== 'string')
94
+ continue;
95
+ const inputCol = byName.get(entry.column.toLowerCase());
96
+ if (!inputCol)
97
+ continue; // LLM hallucinated a column name; skip
98
+ normalized.push({
99
+ ...inputCol,
100
+ conceptName: entry.conceptName ?? '',
101
+ normalizationStrategy: entry.normalizationStrategy ?? 'LowerCaseTrim',
102
+ customNormalizationExpression: sanitizePlaceholder(entry.customNormalizationExpression),
103
+ normalizedDescription: entry.normalizedDescription ?? '',
104
+ isUsefulOrganicKey: !!entry.isUsefulOrganicKey,
105
+ confidence: clamp01(entry.confidence),
106
+ reasoning: entry.reasoning ?? '',
107
+ });
108
+ }
109
+ return { normalized, tokens: cumTokens };
110
+ }
111
+ return { normalized: [], tokens: cumTokens, errorMessage: lastError || 'unknown failure after retries' };
112
+ }
113
+ /** Batch normalize many tables with bounded concurrency. */
114
+ async normalizeAll(tables, opts = {}) {
115
+ const concurrency = Math.max(1, opts.concurrency ?? 8);
116
+ const maxRetries = Math.max(0, opts.maxRetries ?? 2);
117
+ const allNormalized = [];
118
+ let rejected = 0;
119
+ let errors = 0;
120
+ let total = 0;
121
+ let input = 0;
122
+ let output = 0;
123
+ let completed = 0;
124
+ let cursor = 0;
125
+ const runners = Array.from({ length: concurrency }, async () => {
126
+ while (true) {
127
+ const idx = cursor++;
128
+ if (idx >= tables.length)
129
+ return;
130
+ const r = await this.normalizeTable(tables[idx], maxRetries);
131
+ total += r.tokens.total;
132
+ input += r.tokens.input;
133
+ output += r.tokens.output;
134
+ if (r.errorMessage) {
135
+ errors++;
136
+ }
137
+ else {
138
+ for (const n of r.normalized) {
139
+ if (n.isUsefulOrganicKey)
140
+ allNormalized.push(n);
141
+ else
142
+ rejected++;
143
+ }
144
+ }
145
+ completed++;
146
+ opts.onProgress?.(completed, tables.length);
147
+ }
148
+ });
149
+ await Promise.all(runners);
150
+ return { normalized: allNormalized, rejected, errors, tokens: { total, input, output } };
151
+ }
152
+ }
153
+ // ─── Prompt ─────────────────────────────────────────────────────────────────
154
+ const SYSTEM_PROMPT = `You are translating database columns into a NORMALIZED BUSINESS-CONCEPT REPRESENTATION
155
+ for organic-key detection per MemberJunction PR #2193.
156
+
157
+ ═══ PR #2193 — WHAT AN ORGANIC KEY IS ═══
158
+
159
+ An organic key is a column whose value can be used to MATCH two rows that
160
+ refer to the SAME real-world entity, WITHOUT going through a declared foreign
161
+ key. PR #2193 lets the framework "join by value" wherever the schema lacks an
162
+ explicit FK link (cross-system data, late-bound integrations, denormalized
163
+ warehouses, partial schemas).
164
+
165
+ KEEP a column as an organic-key candidate when this test passes:
166
+
167
+ "If I take two rows that have the same value in this column, do they
168
+ refer to the SAME real-world entity (the same customer, the same person,
169
+ the same order, the same product, the same location, the same legal
170
+ entity, the same communication endpoint)?"
171
+
172
+ This test is SATISFIED by:
173
+ - Customer / member / employee / person / company / product / order IDs —
174
+ natural OR surrogate. Within a database an EmployeeID of 42 in one table
175
+ DOES refer to the same employee as EmployeeID 42 in another table. That
176
+ is the WHOLE POINT of PR #2193 — these are the matches it makes navigable.
177
+ - Email addresses, phone numbers, fax numbers, URLs.
178
+ - Tax IDs, social security numbers, account numbers, license numbers.
179
+ - ISBNs, SKUs, product codes, part numbers.
180
+ - Postal codes, street addresses, geocodes (they identify a delivery point
181
+ or location entity).
182
+ - Full names, first names, last names, organization names — identifiers of
183
+ persons or organizations even when fuzzy.
184
+
185
+ FK columns are EXPLICITLY ALLOWED. PR #2193 organic keys often overlap with
186
+ FK columns by design — the same EmployeeID that's a declared FK in one table
187
+ is the organic match key for tables where the FK isn't declared. Do not
188
+ disqualify a column just because it participates in a FK.
189
+
190
+ REJECT (isUsefulOrganicKey=false) ONLY in these cases:
191
+
192
+ - Categorical / enum-like values with a small fixed vocabulary (status =
193
+ 'Active'/'Pending'/'Closed'; type = 'A'/'B'/'C'; region code = 'NA'/'EMEA';
194
+ country code = 'US'/'UK'/'FR'). Two rows sharing status='Active' do NOT
195
+ refer to the same real-world entity — they're just both active.
196
+
197
+ - Booleans / flags (IsActive, HasDiscount).
198
+
199
+ - Measurements, quantities, prices, percentages, aggregates (price, qty,
200
+ discount, count, score, weight).
201
+
202
+ - Audit metadata (created_at, modified_by, version, row_version, rowguid,
203
+ last_login_at).
204
+
205
+ - Free-form descriptive text (notes, comments, description paragraphs,
206
+ long-text fields).
207
+
208
+ - System paths / blob references (photo_path, file_url, attachment_uri)
209
+ when they're pointers to assets rather than the asset's identity.
210
+
211
+ DO NOT REJECT a column just because:
212
+ - It is auto-increment (auto-increment IDs are still valid organic keys
213
+ across tables in the same system).
214
+ - It is a foreign key (FKs are valid organic keys, see above).
215
+ - It "identifies a location, not the parent row's entity" (a postal code
216
+ DOES identify a delivery location entity, which is a valid organic-key
217
+ use; clustering will decide whether it's useful).
218
+ - Names "could collide" (low uniqueness lowers confidence but does NOT
219
+ disqualify — names are valid organic-key candidates per PR #2193).
220
+
221
+ ═══ YOUR TASK ═══
222
+
223
+ For each column you receive, produce a JSON object with these fields. Look at
224
+ the sibling columns and the table's purpose for context — they often clarify
225
+ whether a value identifies or categorizes.
226
+
227
+ 1. normalizedDescription — A STRUCTURED BUSINESS-FOCUSED SENTENCE that any
228
+ reader (or embedding model) can use to recognize this kind of value. Use
229
+ this exact structural template:
230
+
231
+ "<value-kind> identifying <entity-kind>; <normalization rule>."
232
+
233
+ Example shapes:
234
+ "RFC-5322 email address identifying a natural person; case-insensitive
235
+ whitespace-trimmed equality."
236
+ "E.164 phone number identifying a person or organization; digits-only
237
+ equality after stripping formatting."
238
+ "Customer identifier (auto-increment or business code) identifying a
239
+ customer entity across tables; exact equality after trimming."
240
+ "Employee identifier identifying an employee across HR tables; exact
241
+ equality."
242
+ "Postal / ZIP code identifying a delivery area; exact equality after
243
+ trimming."
244
+ "Family name (last name) identifying a natural person; case-insensitive
245
+ whitespace-trimmed equality."
246
+
247
+ For REJECTED columns, the template flips:
248
+ "ISO-3166 country code (categorical, 250 buckets); not an entity
249
+ identifier; not applicable."
250
+ "Order line quantity (measurement); not an entity identifier; not
251
+ applicable."
252
+ "Modification timestamp (audit metadata); not applicable."
253
+
254
+ Same-concept columns from different tables MUST produce highly similar
255
+ normalizedDescription strings so they cluster geometrically. Generic prose
256
+ ("a code", "an identifier") is NOT acceptable — name the value kind
257
+ (email address, phone number, customer id, postal code, family name, etc.)
258
+ and the entity kind explicitly.
259
+
260
+ 2. conceptName — A canonical snake_case label for this value kind
261
+ (e.g. "email_address", "phone_number", "postal_code", "customer_id",
262
+ "employee_id", "person_family_name"). Use the same name for the same
263
+ concept across tables. Used as a cluster label hint downstream.
264
+
265
+ 3. normalizationStrategy — How equality should be tested at match time:
266
+ "LowerCaseTrim" (default for case-insensitive text)
267
+ "Trim" (whitespace only)
268
+ "ExactMatch" (codes, IDs that are case-sensitive)
269
+ "Custom" (provide customNormalizationExpression — a SQL expression
270
+ that MUST use the literal placeholder {{FieldName}} where the
271
+ column reference goes, e.g.
272
+ REPLACE(REPLACE({{FieldName}}, '-', ''), ' ', ''). Do NOT use
273
+ 'value', 'x', or a column name — only {{FieldName}}.)
274
+
275
+ 4. isUsefulOrganicKey — Apply the test at the top of the prompt:
276
+ "If I take two rows with the same value in this column, do they refer to
277
+ the SAME real-world entity?" True for IDs, emails, phones, names,
278
+ addresses, codes that are entity-level. False ONLY for categorical
279
+ enums, booleans, measurements, audit metadata, free-form text, and
280
+ asset paths.
281
+
282
+ 5. confidence — 0.0 to 1.0. Reflect uncertainty honestly; sample values that
283
+ contradict the column name should lower confidence even if you commit to
284
+ a judgment.
285
+
286
+ 6. reasoning — One short sentence stating why this kind of value satisfies
287
+ or fails the test in field 4.
288
+
289
+ Sample values are the strongest signal. If the column is named "Status" but
290
+ samples are all distinct uuid-like tokens, trust the data over the name.
291
+
292
+ Output STRICT JSON only, no markdown fences:
293
+ {
294
+ "columns": [
295
+ {
296
+ "column": "<exact column name from input>",
297
+ "conceptName": "snake_case_name",
298
+ "normalizationStrategy": "LowerCaseTrim" | "Trim" | "ExactMatch" | "Custom",
299
+ "customNormalizationExpression": "...",
300
+ "normalizedDescription": "<structured sentence per template above>",
301
+ "isUsefulOrganicKey": true,
302
+ "confidence": 0.95,
303
+ "reasoning": "One short sentence."
304
+ }
305
+ ]
306
+ }
307
+
308
+ Include EVERY column from the input, in the same order. Match the "column" field
309
+ exactly to the input column name.`;
310
+ /**
311
+ * Normalize whatever column placeholder the LLM used in a Custom expression to the
312
+ * canonical {{FieldName}} token that CodeGen + the PR #2193 runtime substitute. The
313
+ * prompt asks for {{FieldName}}, but models frequently emit `value`, `x`, `col`, or
314
+ * `column` instead — this guards against the resulting silent runtime breakage where
315
+ * the literal placeholder would survive into the executed SQL.
316
+ */
317
+ function sanitizePlaceholder(expr) {
318
+ if (!expr)
319
+ return expr;
320
+ let out = expr;
321
+ // Already correct — leave alone.
322
+ if (/\{\{\s*FieldName\s*\}\}/.test(out)) {
323
+ return out.replace(/\{\{\s*FieldName\s*\}\}/g, '{{FieldName}}');
324
+ }
325
+ // Replace common standalone placeholder identifiers (word-boundary, not inside quotes).
326
+ // Order matters: longer tokens first.
327
+ for (const token of ['column', 'value', 'col', 'x']) {
328
+ const re = new RegExp(`\\b${token}\\b`, 'g');
329
+ if (re.test(out)) {
330
+ out = out.replace(re, '{{FieldName}}');
331
+ break; // only the first matching convention is the placeholder
332
+ }
333
+ }
334
+ return out;
335
+ }
336
+ // ─── Helpers ────────────────────────────────────────────────────────────────
337
+ function buildUserPrompt(input) {
338
+ const lines = [];
339
+ lines.push(`Table: ${input.schema}.${input.table}`);
340
+ if (input.schemaDescription)
341
+ lines.push(`Schema purpose: ${truncate(input.schemaDescription, 240)}`);
342
+ if (input.tableDescription)
343
+ lines.push(`Table purpose: ${truncate(input.tableDescription, 240)}`);
344
+ lines.push('');
345
+ lines.push(`Columns (${input.columns.length}):`);
346
+ for (const c of input.columns) {
347
+ lines.push(` - ${c.column} [${c.dataType}]${c.isPrimaryKey ? ' PK' : ''}${c.participatesInFK ? ` FK${c.fkTarget ? `→${c.fkTarget.schema}.${c.fkTarget.table}.${c.fkTarget.column}` : ''}` : ''}`);
348
+ if (c.originalDescription)
349
+ lines.push(` description: ${truncate(c.originalDescription, 240)}`);
350
+ if (c.sampleValues && c.sampleValues.length > 0) {
351
+ const samples = c.sampleValues
352
+ .slice(0, 5)
353
+ .map((v) => JSON.stringify(truncate(String(v), 80)))
354
+ .join(', ');
355
+ lines.push(` samples: [${samples}]`);
356
+ }
357
+ }
358
+ lines.push('');
359
+ lines.push('Output the normalized JSON per the system prompt — one entry per column, in order.');
360
+ return lines.join('\n');
361
+ }
362
+ function clamp01(x) {
363
+ if (!Number.isFinite(x))
364
+ return 0;
365
+ if (x < 0)
366
+ return 0;
367
+ if (x > 1)
368
+ return 1;
369
+ return x;
370
+ }
371
+ function truncate(s, n) {
372
+ if (!s)
373
+ return '';
374
+ return s.length > n ? s.slice(0, n - 1) + '…' : s;
375
+ }
376
+ //# sourceMappingURL=ColumnNormalizer.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"ColumnNormalizer.js","sourceRoot":"","sources":["../../src/discovery/ColumnNormalizer.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;;;;;;;;;;;;;GA0BG;AAGH,OAAO,EAAE,iBAAiB,EAAE,MAAM,yBAAyB,CAAC;AAG5D,OAAO,EAAE,iBAAiB,EAAE,MAAM,kBAAkB,CAAC;AA0DrD,MAAM,OAAO,eAAe;IAGxB,YAA6B,QAAkB;QAAlB,aAAQ,GAAR,QAAQ,CAAU;QAC3C,IAAI,CAAC,GAAG,GAAG,iBAAiB,CAAC,QAAQ,CAAC,QAAQ,EAAE,QAAQ,CAAC,MAAM,CAAC,CAAC;IACrE,CAAC;IAED,uEAAuE;IAChE,KAAK,CAAC,cAAc,CACvB,KAA8B,EAC9B,UAAU,GAAG,CAAC;QAMd,IAAI,KAAK,CAAC,OAAO,CAAC,MAAM,KAAK,CAAC,EAAE,CAAC;YAC7B,OAAO,EAAE,UAAU,EAAE,EAAE,EAAE,MAAM,EAAE,EAAE,KAAK,EAAE,CAAC,EAAE,KAAK,EAAE,CAAC,EAAE,MAAM,EAAE,CAAC,EAAE,EAAE,CAAC;QACzE,CAAC;QAED,MAAM,UAAU,GAAG,eAAe,CAAC,KAAK,CAAC,CAAC;QAC1C,MAAM,MAAM,GAAe;YACvB,KAAK,EAAE,IAAI,CAAC,QAAQ,CAAC,KAAK;YAC1B,QAAQ,EAAE;gBACN,EAAE,IAAI,EAAE,QAAQ,EAAE,OAAO,EAAE,aAAa,EAAE;gBAC1C,EAAE,IAAI,EAAE,MAAM,EAAE,OAAO,EAAE,UAAU,EAAE;aACxC;YACD,WAAW,EAAE,IAAI,CAAC,QAAQ,CAAC,WAAW,IAAI,CAAC;YAC3C,eAAe,EAAE,IAAI,CAAC,QAAQ,CAAC,SAAS;YACxC,cAAc,EAAE,MAAM;SACzB,CAAC;QAEF,IAAI,SAAS,GAAG,EAAE,CAAC;QACnB,IAAI,SAAS,GAAG,EAAE,KAAK,EAAE,CAAC,EAAE,KAAK,EAAE,CAAC,EAAE,MAAM,EAAE,CAAC,EAAE,CAAC;QAClD,KAAK,IAAI,OAAO,GAAG,CAAC,EAAE,OAAO,IAAI,UAAU,EAAE,OAAO,EAAE,EAAE,CAAC;YACrD,IAAI,MAA8B,CAAC;YACnC,IAAI,CAAC;gBACD,MAAM,GAAG,MAAM,IAAI,CAAC,GAAG,CAAC,cAAc,CAAC,MAAM,CAAC,CAAC;YACnD,CAAC;YAAC,OAAO,GAAG,EAAE,CAAC;gBACX,SAAS,GAAG,mBAAoB,GAAa,CAAC,OAAO,EAAE,CAAC;gBACxD,SAAS;YACb,CAAC;YACD,IAAI,CAAC,MAAM,CAAC,OAAO,EAAE,CAAC;gBAClB,SAAS,GAAG,oBAAoB,MAAM,CAAC,YAAY,IAAI,SAAS,EAAE,CAAC;gBACnE,SAAS;YACb,CAAC;YAED,MAAM,OAAO,GAAG,MAAM,CAAC,IAAI,EAAE,OAAO,EAAE,CAAC,CAAC,CAAC,EAAE,OAAO,EAAE,OAAO,IAAI,EAAE,CAAC;YAClE,MAAM,KAAK,GAAG,MAAM,CAAC,IAAI,EAAE,KAAK,CAAC;YACjC,SAAS,GAAG;gBACR,KAAK,EAAE,SAAS,CAAC,KAAK,GAAG,CAAC,KAAK,EAAE,WAAW,IAAI,CAAC,CAAC;gBAClD,KAAK,EAAE,SAAS,CAAC,KAAK,GAAG,CAAC,KAAK,EAAE,YAAY,IAAI,CAAC,CAAC;gBACnD,MAAM,EAAE,SAAS,CAAC,MAAM,GAAG,CAAC,KAAK,EAAE,gBAAgB,IAAI,CAAC,CAAC;aAC5D,CAAC;YAEF,IAAI,MAAM,GAA4B,IAAI,CAAC;YAC3C,IAAI,CAAC;gBACD,MAAM,GAAG,iBAAiB,CAAmB,OAAO,CAAC,CAAC;YAC1D,CAAC;YAAC,OAAO,GAAG,EAAE,CAAC;gBACX,SAAS,GAAG,qBAAsB,GAAa,CAAC,OAAO,qBAAqB,OAAO,CAAC,KAAK,CAAC,CAAC,EAAE,GAAG,CAAC,EAAE,CAAC;gBACpG,IAAI,OAAO,GAAG,UAAU;oBAAE,SAAS;gBACnC,OAAO,EAAE,UAAU,EAAE,EAAE,EAAE,MAAM,EAAE,SAAS,EAAE,YAAY,EAAE,SAAS,EAAE,CAAC;YAC1E,CAAC;YACD,IAAI,CAAC,MAAM,IAAI,CAAC,KAAK,CAAC,OAAO,CAAC,MAAM,CAAC,OAAO,CAAC,EAAE,CAAC;gBAC5C,SAAS,GAAG,kDAAkD,OAAO,CAAC,KAAK,CAAC,CAAC,EAAE,GAAG,CAAC,EAAE,CAAC;gBACtF,IAAI,OAAO,GAAG,UAAU;oBAAE,SAAS;gBACnC,OAAO,EAAE,UAAU,EAAE,EAAE,EAAE,MAAM,EAAE,SAAS,EAAE,YAAY,EAAE,SAAS,EAAE,CAAC;YAC1E,CAAC;YAED,kEAAkE;YAClE,MAAM,MAAM,GAAG,IAAI,GAAG,CAAC,KAAK,CAAC,OAAO,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,CAAC,MAAM,CAAC,WAAW,EAAE,EAAE,CAAC,CAAC,CAAC,CAAC,CAAC;YAC9E,MAAM,UAAU,GAAuB,EAAE,CAAC;YAC1C,KAAK,MAAM,KAAK,IAAI,MAAM,CAAC,OAAO,EAAE,CAAC;gBACjC,IAAI,CAAC,KAAK,IAAI,OAAO,KAAK,CAAC,MAAM,KAAK,QAAQ;oBAAE,SAAS;gBACzD,MAAM,QAAQ,GAAG,MAAM,CAAC,GAAG,CAAC,KAAK,CAAC,MAAM,CAAC,WAAW,EAAE,CAAC,CAAC;gBACxD,IAAI,CAAC,QAAQ;oBAAE,SAAS,CAAC,uCAAuC;gBAChE,UAAU,CAAC,IAAI,CAAC;oBACZ,GAAG,QAAQ;oBACX,WAAW,EAAE,KAAK,CAAC,WAAW,IAAI,EAAE;oBACpC,qBAAqB,EAAE,KAAK,CAAC,qBAAqB,IAAI,eAAe;oBACrE,6BAA6B,EAAE,mBAAmB,CAAC,KAAK,CAAC,6BAA6B,CAAC;oBACvF,qBAAqB,EAAE,KAAK,CAAC,qBAAqB,IAAI,EAAE;oBACxD,kBAAkB,EAAE,CAAC,CAAC,KAAK,CAAC,kBAAkB;oBAC9C,UAAU,EAAE,OAAO,CAAC,KAAK,CAAC,UAAU,CAAC;oBACrC,SAAS,EAAE,KAAK,CAAC,SAAS,IAAI,EAAE;iBACnC,CAAC,CAAC;YACP,CAAC;YACD,OAAO,EAAE,UAAU,EAAE,MAAM,EAAE,SAAS,EAAE,CAAC;QAC7C,CAAC;QACD,OAAO,EAAE,UAAU,EAAE,EAAE,EAAE,MAAM,EAAE,SAAS,EAAE,YAAY,EAAE,SAAS,IAAI,+BAA+B,EAAE,CAAC;IAC7G,CAAC;IAED,4DAA4D;IACrD,KAAK,CAAC,YAAY,CACrB,MAAiC,EACjC,OAA0B,EAAE;QAE5B,MAAM,WAAW,GAAG,IAAI,CAAC,GAAG,CAAC,CAAC,EAAE,IAAI,CAAC,WAAW,IAAI,CAAC,CAAC,CAAC;QACvD,MAAM,UAAU,GAAG,IAAI,CAAC,GAAG,CAAC,CAAC,EAAE,IAAI,CAAC,UAAU,IAAI,CAAC,CAAC,CAAC;QAErD,MAAM,aAAa,GAAuB,EAAE,CAAC;QAC7C,IAAI,QAAQ,GAAG,CAAC,CAAC;QACjB,IAAI,MAAM,GAAG,CAAC,CAAC;QACf,IAAI,KAAK,GAAG,CAAC,CAAC;QACd,IAAI,KAAK,GAAG,CAAC,CAAC;QACd,IAAI,MAAM,GAAG,CAAC,CAAC;QACf,IAAI,SAAS,GAAG,CAAC,CAAC;QAElB,IAAI,MAAM,GAAG,CAAC,CAAC;QACf,MAAM,OAAO,GAAG,KAAK,CAAC,IAAI,CAAC,EAAE,MAAM,EAAE,WAAW,EAAE,EAAE,KAAK,IAAI,EAAE;YAC3D,OAAO,IAAI,EAAE,CAAC;gBACV,MAAM,GAAG,GAAG,MAAM,EAAE,CAAC;gBACrB,IAAI,GAAG,IAAI,MAAM,CAAC,MAAM;oBAAE,OAAO;gBACjC,MAAM,CAAC,GAAG,MAAM,IAAI,CAAC,cAAc,CAAC,MAAM,CAAC,GAAG,CAAC,EAAE,UAAU,CAAC,CAAC;gBAC7D,KAAK,IAAI,CAAC,CAAC,MAAM,CAAC,KAAK,CAAC;gBACxB,KAAK,IAAI,CAAC,CAAC,MAAM,CAAC,KAAK,CAAC;gBACxB,MAAM,IAAI,CAAC,CAAC,MAAM,CAAC,MAAM,CAAC;gBAE1B,IAAI,CAAC,CAAC,YAAY,EAAE,CAAC;oBACjB,MAAM,EAAE,CAAC;gBACb,CAAC;qBAAM,CAAC;oBACJ,KAAK,MAAM,CAAC,IAAI,CAAC,CAAC,UAAU,EAAE,CAAC;wBAC3B,IAAI,CAAC,CAAC,kBAAkB;4BAAE,aAAa,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;;4BAC3C,QAAQ,EAAE,CAAC;oBACpB,CAAC;gBACL,CAAC;gBACD,SAAS,EAAE,CAAC;gBACZ,IAAI,CAAC,UAAU,EAAE,CAAC,SAAS,EAAE,MAAM,CAAC,MAAM,CAAC,CAAC;YAChD,CAAC;QACL,CAAC,CAAC,CAAC;QACH,MAAM,OAAO,CAAC,GAAG,CAAC,OAAO,CAAC,CAAC;QAC3B,OAAO,EAAE,UAAU,EAAE,aAAa,EAAE,QAAQ,EAAE,MAAM,EAAE,MAAM,EAAE,EAAE,KAAK,EAAE,KAAK,EAAE,MAAM,EAAE,EAAE,CAAC;IAC7F,CAAC;CACJ;AAED,+EAA+E;AAE/E,MAAM,aAAa,GAAG;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;kCA2JY,CAAC;AAEnC;;;;;;GAMG;AACH,SAAS,mBAAmB,CAAC,IAAwB;IACjD,IAAI,CAAC,IAAI;QAAE,OAAO,IAAI,CAAC;IACvB,IAAI,GAAG,GAAG,IAAI,CAAC;IACf,iCAAiC;IACjC,IAAI,yBAAyB,CAAC,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC;QACtC,OAAO,GAAG,CAAC,OAAO,CAAC,0BAA0B,EAAE,eAAe,CAAC,CAAC;IACpE,CAAC;IACD,wFAAwF;IACxF,sCAAsC;IACtC,KAAK,MAAM,KAAK,IAAI,CAAC,QAAQ,EAAE,OAAO,EAAE,KAAK,EAAE,GAAG,CAAC,EAAE,CAAC;QAClD,MAAM,EAAE,GAAG,IAAI,MAAM,CAAC,MAAM,KAAK,KAAK,EAAE,GAAG,CAAC,CAAC;QAC7C,IAAI,EAAE,CAAC,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC;YACf,GAAG,GAAG,GAAG,CAAC,OAAO,CAAC,EAAE,EAAE,eAAe,CAAC,CAAC;YACvC,MAAM,CAAC,wDAAwD;QACnE,CAAC;IACL,CAAC;IACD,OAAO,GAAG,CAAC;AACf,CAAC;AAqBD,+EAA+E;AAE/E,SAAS,eAAe,CAAC,KAA8B;IACnD,MAAM,KAAK,GAAa,EAAE,CAAC;IAC3B,KAAK,CAAC,IAAI,CAAC,UAAU,KAAK,CAAC,MAAM,IAAI,KAAK,CAAC,KAAK,EAAE,CAAC,CAAC;IACpD,IAAI,KAAK,CAAC,iBAAiB;QAAE,KAAK,CAAC,IAAI,CAAC,mBAAmB,QAAQ,CAAC,KAAK,CAAC,iBAAiB,EAAE,GAAG,CAAC,EAAE,CAAC,CAAC;IACrG,IAAI,KAAK,CAAC,gBAAgB;QAAE,KAAK,CAAC,IAAI,CAAC,mBAAmB,QAAQ,CAAC,KAAK,CAAC,gBAAgB,EAAE,GAAG,CAAC,EAAE,CAAC,CAAC;IACnG,KAAK,CAAC,IAAI,CAAC,EAAE,CAAC,CAAC;IACf,KAAK,CAAC,IAAI,CAAC,YAAY,KAAK,CAAC,OAAO,CAAC,MAAM,IAAI,CAAC,CAAC;IACjD,KAAK,MAAM,CAAC,IAAI,KAAK,CAAC,OAAO,EAAE,CAAC;QAC5B,KAAK,CAAC,IAAI,CAAC,OAAO,CAAC,CAAC,MAAM,MAAM,CAAC,CAAC,QAAQ,IAAI,CAAC,CAAC,YAAY,CAAC,CAAC,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,GAAG,CAAC,CAAC,gBAAgB,CAAC,CAAC,CAAC,OAAO,CAAC,CAAC,QAAQ,CAAC,CAAC,CAAC,IAAI,CAAC,CAAC,QAAQ,CAAC,MAAM,IAAI,CAAC,CAAC,QAAQ,CAAC,KAAK,IAAI,CAAC,CAAC,QAAQ,CAAC,MAAM,EAAE,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC;QACtM,IAAI,CAAC,CAAC,mBAAmB;YAAE,KAAK,CAAC,IAAI,CAAC,sBAAsB,QAAQ,CAAC,CAAC,CAAC,mBAAmB,EAAE,GAAG,CAAC,EAAE,CAAC,CAAC;QACpG,IAAI,CAAC,CAAC,YAAY,IAAI,CAAC,CAAC,YAAY,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;YAC9C,MAAM,OAAO,GAAG,CAAC,CAAC,YAAY;iBACzB,KAAK,CAAC,CAAC,EAAE,CAAC,CAAC;iBACX,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,IAAI,CAAC,SAAS,CAAC,QAAQ,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC;iBACnD,IAAI,CAAC,IAAI,CAAC,CAAC;YAChB,KAAK,CAAC,IAAI,CAAC,mBAAmB,OAAO,GAAG,CAAC,CAAC;QAC9C,CAAC;IACL,CAAC;IACD,KAAK,CAAC,IAAI,CAAC,EAAE,CAAC,CAAC;IACf,KAAK,CAAC,IAAI,CAAC,oFAAoF,CAAC,CAAC;IACjG,OAAO,KAAK,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;AAC5B,CAAC;AAED,SAAS,OAAO,CAAC,CAAS;IACtB,IAAI,CAAC,MAAM,CAAC,QAAQ,CAAC,CAAC,CAAC;QAAE,OAAO,CAAC,CAAC;IAClC,IAAI,CAAC,GAAG,CAAC;QAAE,OAAO,CAAC,CAAC;IACpB,IAAI,CAAC,GAAG,CAAC;QAAE,OAAO,CAAC,CAAC;IACpB,OAAO,CAAC,CAAC;AACb,CAAC;AAED,SAAS,QAAQ,CAAC,CAAS,EAAE,CAAS;IAClC,IAAI,CAAC,CAAC;QAAE,OAAO,EAAE,CAAC;IAClB,OAAO,CAAC,CAAC,MAAM,GAAG,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,KAAK,CAAC,CAAC,EAAE,CAAC,GAAG,CAAC,CAAC,GAAG,GAAG,CAAC,CAAC,CAAC,CAAC,CAAC;AACtD,CAAC"}
@@ -0,0 +1,59 @@
1
+ /**
2
+ * Composer — flag-and-emit (no destructive filtering).
3
+ *
4
+ * Computes `isFKRedundant` for each cluster (PR #2193 organic keys are meant
5
+ * to be used "in place of a foreign-key reference" — when every non-PK member
6
+ * is a declared FK pointing to the PK member, the cluster adds no new
7
+ * navigation). Sets the flag on the cluster but does NOT drop — well-modeled
8
+ * OLTP schemas would lose 30-50% of valid organic-key candidates if we
9
+ * dropped, and the discovery value (cross-system extension, naming
10
+ * consistency checks) survives the redundancy.
11
+ *
12
+ * The dashboard surfaces a "hide FK-redundant" filter so users get the lookup-
13
+ * table-PK noise out of view without losing the underlying candidates.
14
+ */
15
+ import { OrganicKeyCluster, OrganicKeyClusterMember } from '../types/organic-keys.js';
16
+ import { DetectedOrganicKeysOutput } from './OrganicKeyTranslator.js';
17
+ import { TransitiveBridgeFinding } from './TransitiveBridgeDetector.js';
18
+ /** Output of the compose step: the PR #2193 JSON, the FK-redundancy-annotated clusters, and emit counts. */
19
+ export interface ComposerResult {
20
+ output: DetectedOrganicKeysOutput;
21
+ /** Clusters with isFKRedundant filled in — callers that persist the cluster list
22
+ * (e.g. detector → state.json → dashboard) should use THIS, not the pre-compose
23
+ * input, otherwise the flag is silently lost. */
24
+ annotatedClusters: OrganicKeyCluster[];
25
+ emitted: number;
26
+ flaggedFKRedundant: number;
27
+ summary: {
28
+ outputSchemas: number;
29
+ outputTables: number;
30
+ outputKeys: number;
31
+ outputSpokes: number;
32
+ };
33
+ }
34
+ /**
35
+ * Compose detected clusters + transitive bridges into the PR #2193 emit JSON.
36
+ *
37
+ * Each cluster is annotated with `isFKRedundant` (true when it's already navigable via a
38
+ * declared foreign key — kept but flagged, not dropped). Matching transitive bridges are
39
+ * attached as spokes. Returns the JSON plus the annotated clusters and emit counts.
40
+ */
41
+ export declare function compose(clusters: OrganicKeyCluster[], bridges: TransitiveBridgeFinding[]): ComposerResult;
42
+ /**
43
+ * A cluster is FK-redundant when ALL non-PK members are declared FKs pointing
44
+ * at the same target column (typically the PK member of the cluster). PR #2193
45
+ * organic keys are "used in place of a foreign-key reference" — if the FK is
46
+ * already declared, the cluster doesn't add navigability.
47
+ *
48
+ * Requires at least one PK and at least one FK in the cluster to apply.
49
+ * Returns false for clusters that are entirely PKs, entirely non-FKs, or that
50
+ * have mixed FK targets (the latter is a genuine value-based correlation that
51
+ * no single FK covers).
52
+ */
53
+ declare function isFKRedundant(cluster: OrganicKeyCluster): boolean;
54
+ /** Re-export for tests / observability. */
55
+ export declare const __test__: {
56
+ isFKRedundant: typeof isFKRedundant;
57
+ };
58
+ export type { OrganicKeyClusterMember };
59
+ //# sourceMappingURL=Composer.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"Composer.d.ts","sourceRoot":"","sources":["../../src/discovery/Composer.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;GAaG;AAEH,OAAO,EAAE,iBAAiB,EAAE,uBAAuB,EAAE,MAAM,0BAA0B,CAAC;AACtF,OAAO,EACH,yBAAyB,EAI5B,MAAM,2BAA2B,CAAC;AACnC,OAAO,EAAE,uBAAuB,EAAE,MAAM,+BAA+B,CAAC;AAExE,4GAA4G;AAC5G,MAAM,WAAW,cAAc;IAC3B,MAAM,EAAE,yBAAyB,CAAC;IAClC;;sDAEkD;IAClD,iBAAiB,EAAE,iBAAiB,EAAE,CAAC;IACvC,OAAO,EAAE,MAAM,CAAC;IAChB,kBAAkB,EAAE,MAAM,CAAC;IAC3B,OAAO,EAAE;QAAE,aAAa,EAAE,MAAM,CAAC;QAAC,YAAY,EAAE,MAAM,CAAC;QAAC,UAAU,EAAE,MAAM,CAAC;QAAC,YAAY,EAAE,MAAM,CAAA;KAAE,CAAC;CACtG;AAED;;;;;;GAMG;AACH,wBAAgB,OAAO,CACnB,QAAQ,EAAE,iBAAiB,EAAE,EAC7B,OAAO,EAAE,uBAAuB,EAAE,GACnC,cAAc,CA0ChB;AAED;;;;;;;;;;GAUG;AACH,iBAAS,aAAa,CAAC,OAAO,EAAE,iBAAiB,GAAG,OAAO,CAiB1D;AAED,2CAA2C;AAC3C,eAAO,MAAM,QAAQ;;CAAoB,CAAC;AAC1C,YAAY,EAAE,uBAAuB,EAAE,CAAC"}
@@ -0,0 +1,95 @@
1
+ /**
2
+ * Composer — flag-and-emit (no destructive filtering).
3
+ *
4
+ * Computes `isFKRedundant` for each cluster (PR #2193 organic keys are meant
5
+ * to be used "in place of a foreign-key reference" — when every non-PK member
6
+ * is a declared FK pointing to the PK member, the cluster adds no new
7
+ * navigation). Sets the flag on the cluster but does NOT drop — well-modeled
8
+ * OLTP schemas would lose 30-50% of valid organic-key candidates if we
9
+ * dropped, and the discovery value (cross-system extension, naming
10
+ * consistency checks) survives the redundancy.
11
+ *
12
+ * The dashboard surfaces a "hide FK-redundant" filter so users get the lookup-
13
+ * table-PK noise out of view without losing the underlying candidates.
14
+ */
15
+ import { translateClusters, countOutputEntries, } from './OrganicKeyTranslator.js';
16
+ /**
17
+ * Compose detected clusters + transitive bridges into the PR #2193 emit JSON.
18
+ *
19
+ * Each cluster is annotated with `isFKRedundant` (true when it's already navigable via a
20
+ * declared foreign key — kept but flagged, not dropped). Matching transitive bridges are
21
+ * attached as spokes. Returns the JSON plus the annotated clusters and emit counts.
22
+ */
23
+ export function compose(clusters, bridges) {
24
+ let flaggedCount = 0;
25
+ const annotated = clusters.map((c) => {
26
+ const redundant = isFKRedundant(c);
27
+ if (redundant)
28
+ flaggedCount += 1;
29
+ return { ...c, isFKRedundant: redundant };
30
+ });
31
+ const hubKeys = new Set();
32
+ for (const c of annotated) {
33
+ for (const m of c.members)
34
+ hubKeys.add(`${m.schema}.${m.table}.${m.column}`);
35
+ }
36
+ const spokes = bridges
37
+ .filter((b) => hubKeys.has(`${b.hubSchema}.${b.hubTable}.${b.hubKeyFields[0]}`))
38
+ .map((b) => ({
39
+ hubSchema: b.hubSchema,
40
+ hubTable: b.hubTable,
41
+ hubKeyFields: b.hubKeyFields,
42
+ spokeSchema: b.spokeSchema,
43
+ spokeTable: b.spokeTable,
44
+ transitiveView: { Name: b.view.viewName, SchemaName: b.view.schemaName, SQL: b.view.sql },
45
+ transitiveMatchFieldNames: [b.view.hubKeyField],
46
+ transitiveOutputFieldName: b.view.spokeOutputField,
47
+ relatedEntityJoinFieldName: b.view.spokeJoinField,
48
+ hubConcept: b.hubConcept,
49
+ }));
50
+ const output = translateClusters(annotated, spokes);
51
+ const counts = countOutputEntries(output);
52
+ return {
53
+ output,
54
+ annotatedClusters: annotated,
55
+ emitted: annotated.length,
56
+ flaggedFKRedundant: flaggedCount,
57
+ summary: {
58
+ outputSchemas: counts.schemas,
59
+ outputTables: counts.tables,
60
+ outputKeys: counts.keys,
61
+ outputSpokes: counts.spokes,
62
+ },
63
+ };
64
+ }
65
+ /**
66
+ * A cluster is FK-redundant when ALL non-PK members are declared FKs pointing
67
+ * at the same target column (typically the PK member of the cluster). PR #2193
68
+ * organic keys are "used in place of a foreign-key reference" — if the FK is
69
+ * already declared, the cluster doesn't add navigability.
70
+ *
71
+ * Requires at least one PK and at least one FK in the cluster to apply.
72
+ * Returns false for clusters that are entirely PKs, entirely non-FKs, or that
73
+ * have mixed FK targets (the latter is a genuine value-based correlation that
74
+ * no single FK covers).
75
+ */
76
+ function isFKRedundant(cluster) {
77
+ const pkMembers = cluster.members.filter((m) => m.isPrimaryKey);
78
+ const nonPK = cluster.members.filter((m) => !m.isPrimaryKey);
79
+ if (pkMembers.length === 0 || nonPK.length === 0)
80
+ return false;
81
+ // Build the set of plausible "target" identifiers from the PK members.
82
+ const pkTargets = new Set(pkMembers.map((m) => `${m.schema}.${m.table}.${m.column}`.toLowerCase()));
83
+ // Every non-PK member must be a FK pointing into one of the PK targets.
84
+ for (const m of nonPK) {
85
+ if (!m.participatesInFK || !m.fkTarget)
86
+ return false;
87
+ const key = `${m.fkTarget.schema}.${m.fkTarget.table}.${m.fkTarget.column}`.toLowerCase();
88
+ if (!pkTargets.has(key))
89
+ return false;
90
+ }
91
+ return true;
92
+ }
93
+ /** Re-export for tests / observability. */
94
+ export const __test__ = { isFKRedundant };
95
+ //# sourceMappingURL=Composer.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"Composer.js","sourceRoot":"","sources":["../../src/discovery/Composer.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;GAaG;AAGH,OAAO,EAGH,iBAAiB,EACjB,kBAAkB,GACrB,MAAM,2BAA2B,CAAC;AAenC;;;;;;GAMG;AACH,MAAM,UAAU,OAAO,CACnB,QAA6B,EAC7B,OAAkC;IAElC,IAAI,YAAY,GAAG,CAAC,CAAC;IACrB,MAAM,SAAS,GAAwB,QAAQ,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE;QACtD,MAAM,SAAS,GAAG,aAAa,CAAC,CAAC,CAAC,CAAC;QACnC,IAAI,SAAS;YAAE,YAAY,IAAI,CAAC,CAAC;QACjC,OAAO,EAAE,GAAG,CAAC,EAAE,aAAa,EAAE,SAAS,EAAE,CAAC;IAC9C,CAAC,CAAC,CAAC;IAEH,MAAM,OAAO,GAAG,IAAI,GAAG,EAAU,CAAC;IAClC,KAAK,MAAM,CAAC,IAAI,SAAS,EAAE,CAAC;QACxB,KAAK,MAAM,CAAC,IAAI,CAAC,CAAC,OAAO;YAAE,OAAO,CAAC,GAAG,CAAC,GAAG,CAAC,CAAC,MAAM,IAAI,CAAC,CAAC,KAAK,IAAI,CAAC,CAAC,MAAM,EAAE,CAAC,CAAC;IACjF,CAAC;IACD,MAAM,MAAM,GAA2B,OAAO;SACzC,MAAM,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,OAAO,CAAC,GAAG,CAAC,GAAG,CAAC,CAAC,SAAS,IAAI,CAAC,CAAC,QAAQ,IAAI,CAAC,CAAC,YAAY,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC;SAC/E,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC;QACT,SAAS,EAAE,CAAC,CAAC,SAAS;QACtB,QAAQ,EAAE,CAAC,CAAC,QAAQ;QACpB,YAAY,EAAE,CAAC,CAAC,YAAY;QAC5B,WAAW,EAAE,CAAC,CAAC,WAAW;QAC1B,UAAU,EAAE,CAAC,CAAC,UAAU;QACxB,cAAc,EAAE,EAAE,IAAI,EAAE,CAAC,CAAC,IAAI,CAAC,QAAQ,EAAE,UAAU,EAAE,CAAC,CAAC,IAAI,CAAC,UAAU,EAAE,GAAG,EAAE,CAAC,CAAC,IAAI,CAAC,GAAG,EAAE;QACzF,yBAAyB,EAAE,CAAC,CAAC,CAAC,IAAI,CAAC,WAAW,CAAC;QAC/C,yBAAyB,EAAE,CAAC,CAAC,IAAI,CAAC,gBAAgB;QAClD,0BAA0B,EAAE,CAAC,CAAC,IAAI,CAAC,cAAc;QACjD,UAAU,EAAE,CAAC,CAAC,UAAU;KAC3B,CAAC,CAAC,CAAC;IAER,MAAM,MAAM,GAAG,iBAAiB,CAAC,SAAS,EAAE,MAAM,CAAC,CAAC;IACpD,MAAM,MAAM,GAAG,kBAAkB,CAAC,MAAM,CAAC,CAAC;IAE1C,OAAO;QACH,MAAM;QACN,iBAAiB,EAAE,SAAS;QAC5B,OAAO,EAAE,SAAS,CAAC,MAAM;QACzB,kBAAkB,EAAE,YAAY;QAChC,OAAO,EAAE;YACL,aAAa,EAAE,MAAM,CAAC,OAAO;YAC7B,YAAY,EAAE,MAAM,CAAC,MAAM;YAC3B,UAAU,EAAE,MAAM,CAAC,IAAI;YACvB,YAAY,EAAE,MAAM,CAAC,MAAM;SAC9B;KACJ,CAAC;AACN,CAAC;AAED;;;;;;;;;;GAUG;AACH,SAAS,aAAa,CAAC,OAA0B;IAC7C,MAAM,SAAS,GAAG,OAAO,CAAC,OAAO,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,YAAY,CAAC,CAAC;IAChE,MAAM,KAAK,GAAG,OAAO,CAAC,OAAO,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,CAAC,YAAY,CAAC,CAAC;IAC7D,IAAI,SAAS,CAAC,MAAM,KAAK,CAAC,IAAI,KAAK,CAAC,MAAM,KAAK,CAAC;QAAE,OAAO,KAAK,CAAC;IAE/D,uEAAuE;IACvE,MAAM,SAAS,GAAG,IAAI,GAAG,CACrB,SAAS,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,GAAG,CAAC,CAAC,MAAM,IAAI,CAAC,CAAC,KAAK,IAAI,CAAC,CAAC,MAAM,EAAE,CAAC,WAAW,EAAE,CAAC,CAC3E,CAAC;IAEF,wEAAwE;IACxE,KAAK,MAAM,CAAC,IAAI,KAAK,EAAE,CAAC;QACpB,IAAI,CAAC,CAAC,CAAC,gBAAgB,IAAI,CAAC,CAAC,CAAC,QAAQ;YAAE,OAAO,KAAK,CAAC;QACrD,MAAM,GAAG,GAAG,GAAG,CAAC,CAAC,QAAQ,CAAC,MAAM,IAAI,CAAC,CAAC,QAAQ,CAAC,KAAK,IAAI,CAAC,CAAC,QAAQ,CAAC,MAAM,EAAE,CAAC,WAAW,EAAE,CAAC;QAC1F,IAAI,CAAC,SAAS,CAAC,GAAG,CAAC,GAAG,CAAC;YAAE,OAAO,KAAK,CAAC;IAC1C,CAAC;IACD,OAAO,IAAI,CAAC;AAChB,CAAC;AAED,2CAA2C;AAC3C,MAAM,CAAC,MAAM,QAAQ,GAAG,EAAE,aAAa,EAAE,CAAC"}
@@ -0,0 +1,27 @@
1
+ /**
2
+ * EmbeddingProvider — thin wrapper over MemberJunction's `BaseEmbeddings` infrastructure.
3
+ *
4
+ * Embeddings are produced through the same MJ ClassFactory + driver pattern that
5
+ * `llm-factory` uses for LLMs (so DBAutoDoc stays coupled to MJ's AI stack rather
6
+ * than talking to provider REST endpoints directly). The concrete driver class is
7
+ * resolved from the provider name and instantiated with the supplied API key.
8
+ *
9
+ * Vectors are unit-normalized so the clustering step can use cosine distance
10
+ * directly regardless of whether the underlying model returns normalized output.
11
+ */
12
+ /** Provider names that map to a registered `BaseEmbeddings` driver class. */
13
+ export type EmbeddingProviderName = 'openai' | 'mistral' | 'azure' | 'bedrock' | 'ollama' | 'local';
14
+ export interface EmbeddingProviderConfig {
15
+ provider: EmbeddingProviderName;
16
+ apiKey: string;
17
+ model?: string;
18
+ dimensions?: number;
19
+ batchSize?: number;
20
+ endpoint?: string;
21
+ }
22
+ export interface EmbeddingProvider {
23
+ readonly provider: EmbeddingProviderName;
24
+ embed(texts: string[]): Promise<Float32Array[]>;
25
+ }
26
+ export declare function createEmbeddingProvider(config: EmbeddingProviderConfig): EmbeddingProvider;
27
+ //# sourceMappingURL=EmbeddingProvider.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"EmbeddingProvider.d.ts","sourceRoot":"","sources":["../../src/discovery/EmbeddingProvider.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;GAUG;AAKH,6EAA6E;AAC7E,MAAM,MAAM,qBAAqB,GAAG,QAAQ,GAAG,SAAS,GAAG,OAAO,GAAG,SAAS,GAAG,QAAQ,GAAG,OAAO,CAAC;AAEpG,MAAM,WAAW,uBAAuB;IACpC,QAAQ,EAAE,qBAAqB,CAAC;IAChC,MAAM,EAAE,MAAM,CAAC;IACf,KAAK,CAAC,EAAE,MAAM,CAAC;IACf,UAAU,CAAC,EAAE,MAAM,CAAC;IACpB,SAAS,CAAC,EAAE,MAAM,CAAC;IACnB,QAAQ,CAAC,EAAE,MAAM,CAAC;CACrB;AAED,MAAM,WAAW,iBAAiB;IAC9B,QAAQ,CAAC,QAAQ,EAAE,qBAAqB,CAAC;IACzC,KAAK,CAAC,KAAK,EAAE,MAAM,EAAE,GAAG,OAAO,CAAC,YAAY,EAAE,CAAC,CAAC;CACnD;AAyBD,wBAAgB,uBAAuB,CAAC,MAAM,EAAE,uBAAuB,GAAG,iBAAiB,CAO1F"}