@meshxdata/fops 0.1.35 → 0.1.37

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,4 +1,4 @@
1
- id,customer_id,price,ccy,date,order_status,geo_region
1
+ order_id,customer_id,price,ccy,order_date,status,region
2
2
  4001,8001,99.00,USD,2024-03-01,completed,north_america
3
3
  4002,8002,145.75,EUR,2024-03-02,pending,europe
4
4
  4003,8003,280.00,USD,2024-03-03,completed,asia_pacific
@@ -447,20 +447,89 @@ export function register(api) {
447
447
  }
448
448
 
449
449
  // ── 9. Show column mapping (if available) ──────────────────────
450
- if (selectedRef.columnMappings?.length > 0) {
450
+ let currentMappings = [...(selectedRef.columnMappings || [])];
451
+ if (currentMappings.length > 0) {
451
452
  const refData2 = references.find((r) => r.id === selectedRef.id);
452
453
  if (refData2) {
453
454
  const refTag = `${selectedRef.name} (${selectedRef.id.slice(0, 8)})`;
454
- const colMapOutput = formatColumnMapping(
455
- selectedRef.columnMappings,
456
- candResult.headers,
457
- refData2.headers,
458
- pathMod.basename(candidate),
459
- refTag,
460
- chalk,
461
- );
462
- console.log("");
463
- console.log(colMapOutput);
455
+
456
+ const showMappingTable = () => {
457
+ console.log("");
458
+ console.log(formatColumnMapping(
459
+ currentMappings,
460
+ candResult.headers,
461
+ refData2.headers,
462
+ pathMod.basename(candidate),
463
+ refTag,
464
+ chalk,
465
+ ));
466
+ };
467
+ showMappingTable();
468
+
469
+ // ── 9b. Interactive mapping adjustment (TTY only) ──────────
470
+ if (isTTY && !opts.json) {
471
+ let _selectOpt;
472
+ const _cpPath = api.getCliPath?.("src", "ui", "confirm.js");
473
+ if (_cpPath) {
474
+ const { pathToFileURL: _pu } = await import("node:url");
475
+ ({ selectOption: _selectOpt } = await import(_pu(_cpPath).href));
476
+ } else {
477
+ ({ selectOption: _selectOpt } = await import("../../../ui/confirm.js"));
478
+ }
479
+
480
+ let adjusting = true;
481
+ while (adjusting) {
482
+ const choice = await _selectOpt("Column mappings:", [
483
+ { label: "Continue to validation", value: "continue" },
484
+ { label: "Adjust a mapping", value: "adjust" },
485
+ ]);
486
+ if (choice !== "adjust") { adjusting = false; break; }
487
+
488
+ // Step 1: pick candidate column to remap
489
+ const mappedCandSet = new Set(currentMappings.map((m) => m.candidateCol));
490
+ const candOptions = [
491
+ ...currentMappings.map((m) => {
492
+ const sim = m.similarity == null ? "manual"
493
+ : m.candidateCol.toLowerCase() === m.referenceCol.toLowerCase() ? "exact"
494
+ : m.similarity.toFixed(2);
495
+ return { label: `${m.candidateCol.padEnd(22)} → ${m.referenceCol} (${sim})`, value: m.candidateCol };
496
+ }),
497
+ ...candResult.headers
498
+ .filter((h) => !mappedCandSet.has(h))
499
+ .map((h) => ({ label: `${h.padEnd(22)} → (unmapped)`, value: h })),
500
+ ];
501
+
502
+ const chosenCand = await _selectOpt("Which column to remap:", candOptions);
503
+ if (chosenCand == null) continue;
504
+
505
+ // Step 2: pick reference field — show all, marking already-mapped ones
506
+ const refFieldOwner = new Map(
507
+ currentMappings.filter((m) => m.candidateCol !== chosenCand).map((m) => [m.referenceCol, m.candidateCol]),
508
+ );
509
+ const refFieldOptions = [
510
+ ...refData2.headers.map((h) => {
511
+ const owner = refFieldOwner.get(h);
512
+ return owner
513
+ ? { label: `${h} (currently: ${owner})`, value: h }
514
+ : { label: h, value: h };
515
+ }),
516
+ { label: "— leave unmapped —", value: "__none__" },
517
+ ];
518
+
519
+ const chosenRef = await _selectOpt(`Map "${chosenCand}" to:`, refFieldOptions);
520
+ if (chosenRef == null) continue;
521
+
522
+ // Apply — remove chosen candidate's old mapping and displace any
523
+ // existing owner of the target reference field
524
+ currentMappings = currentMappings.filter(
525
+ (m) => m.candidateCol !== chosenCand && m.referenceCol !== chosenRef,
526
+ );
527
+ if (chosenRef !== "__none__") {
528
+ currentMappings.push({ candidateCol: chosenCand, referenceCol: chosenRef, similarity: null });
529
+ }
530
+ showMappingTable();
531
+ }
532
+ }
464
533
  }
465
534
  }
466
535
 
@@ -481,7 +550,7 @@ export function register(api) {
481
550
  // names to their matched reference names so schema/type checks are meaningful.
482
551
  let candHeadersForValidation = candResult.headers;
483
552
  let candTypesForValidation = candResult.types;
484
- const colMap = selectedRef.columnMappings;
553
+ const colMap = currentMappings;
485
554
  if (colMap?.length > 0) {
486
555
  const translatedHeaders = [];
487
556
  const translatedTypes = {};
@@ -44,6 +44,69 @@ export function buildFingerprint({ headers, types, name }) {
44
44
  return { columnText, typeText, nameText };
45
45
  }
46
46
 
47
+ // ── Levenshtein ─────────────────────────────────────────────────────────────
48
+
49
+ function _levenshteinDist(a, b) {
50
+ const m = a.length, n = b.length;
51
+ const dp = Array.from({ length: m + 1 }, () => new Array(n + 1).fill(0));
52
+ for (let i = 0; i <= m; i++) dp[i][0] = i;
53
+ for (let j = 0; j <= n; j++) dp[0][j] = j;
54
+ for (let i = 1; i <= m; i++) {
55
+ for (let j = 1; j <= n; j++) {
56
+ const cost = a[i - 1] === b[j - 1] ? 0 : 1;
57
+ dp[i][j] = Math.min(dp[i - 1][j] + 1, dp[i][j - 1] + 1, dp[i - 1][j - 1] + cost);
58
+ }
59
+ }
60
+ return dp[m][n];
61
+ }
62
+
63
+ function _levenshteinRatio(a, b) {
64
+ if (a === b) return 1;
65
+ if (!a || !b) return 0;
66
+ return 1 - _levenshteinDist(a, b) / Math.max(a.length, b.length);
67
+ }
68
+
69
+ /**
70
+ * Greedy bipartite column matching using Levenshtein ratio + containment boost.
71
+ * Same output shape as greedyColumnMatch so callers are interchangeable.
72
+ *
73
+ * Containment boost: single-token candidate = last segment of a compound reference
74
+ * (e.g. "id" → "order_id", "date" → "order_date") gets +0.30 so it beats
75
+ * weaker multi-token candidates claiming the same field.
76
+ *
77
+ * @param {string[]} candHeaders
78
+ * @param {string[]} refHeaders
79
+ * @param {object} [opts]
80
+ * @param {number} [opts.minSim=0.45]
81
+ * @returns {{score: number, mappings: Array<{candIdx: number, refIdx: number, sim: number}>}}
82
+ */
83
+ export function levenshteinColumnMatch(candHeaders, refHeaders, opts = {}) {
84
+ const minSim = opts.minSim ?? 0.45;
85
+ const M = candHeaders.length, N = refHeaders.length;
86
+ if (M === 0 || N === 0) return { score: 0, mappings: [] };
87
+
88
+ const norm = (s) => s.toLowerCase().replace(/[_\-\s]+/g, "_");
89
+
90
+ const simMatrix = [];
91
+ for (let i = 0; i < M; i++) {
92
+ simMatrix[i] = [];
93
+ const cn = norm(candHeaders[i]);
94
+ const cParts = cn.split("_").filter(Boolean);
95
+ for (let j = 0; j < N; j++) {
96
+ const rn = norm(refHeaders[j]);
97
+ const rParts = rn.split("_").filter(Boolean);
98
+ let sim = _levenshteinRatio(cn, rn);
99
+ // Containment: single-token candidate IS the last segment of the reference
100
+ if (cParts.length === 1 && rParts.length > 1 && rParts[rParts.length - 1] === cParts[0]) {
101
+ sim = Math.min(1, sim + 0.30);
102
+ }
103
+ simMatrix[i][j] = sim;
104
+ }
105
+ }
106
+
107
+ return greedyColumnMatch(simMatrix, { minSim });
108
+ }
109
+
47
110
  // ── Jaccard Overlap ─────────────────────────────────────────────────────────
48
111
 
49
112
  /**
@@ -167,8 +230,11 @@ function normalizeColumnName(name) {
167
230
  .map((p) => p.toLowerCase());
168
231
  if (parts.length === 0) return name || "";
169
232
  if (parts.length === 1) return parts[0];
170
- // Repeat the last token to emphasise the semantic role over the entity prefix
171
- return parts.join(" ") + " " + parts[parts.length - 1];
233
+ // Repeat the last token 3 extra times so the semantic role (suffix) dominates
234
+ // over the entity prefix. e.g. "order_status" "order status status status status"
235
+ // vs "order_id" → "order id id id id" — "status" and "id" are now discriminating.
236
+ const last = parts[parts.length - 1];
237
+ return parts.join(" ") + " " + last + " " + last + " " + last;
172
238
  }
173
239
 
174
240
  /**
@@ -194,21 +260,27 @@ export function buildSimilarityMatrix(candVecs, refVecs) {
194
260
  /**
195
261
  * Greedy bipartite matching on a similarity matrix.
196
262
  * Sort all (i,j,sim) triples descending, greedily assign unmatched pairs.
263
+ * Pairs below minSim are left unmatched (shown as unmapped in the display).
197
264
  *
198
265
  * @param {number[][]} simMatrix - M×N cosine similarity matrix
266
+ * @param {object} [opts]
267
+ * @param {number} [opts.minSim=0.45] - Minimum similarity to accept a pairing
199
268
  * @returns {{score: number, mappings: Array<{candIdx: number, refIdx: number, sim: number}>}}
200
269
  */
201
- export function greedyColumnMatch(simMatrix) {
270
+ export function greedyColumnMatch(simMatrix, opts = {}) {
271
+ const minSim = opts.minSim ?? 0.45;
202
272
  const M = simMatrix.length;
203
273
  if (M === 0) return { score: 0, mappings: [] };
204
274
  const N = simMatrix[0].length;
205
275
  if (N === 0) return { score: 0, mappings: [] };
206
276
 
207
- // Collect all pairs
277
+ // Collect all pairs above the minimum threshold
208
278
  const pairs = [];
209
279
  for (let i = 0; i < M; i++) {
210
280
  for (let j = 0; j < N; j++) {
211
- pairs.push({ candIdx: i, refIdx: j, sim: simMatrix[i][j] });
281
+ if (simMatrix[i][j] >= minSim) {
282
+ pairs.push({ candIdx: i, refIdx: j, sim: simMatrix[i][j] });
283
+ }
212
284
  }
213
285
  }
214
286
  pairs.sort((a, b) => b.sim - a.sim);
@@ -337,16 +409,46 @@ export function rankMatches(candidate, references, opts = {}) {
337
409
  .sort((a, b) => b.jaccardScore - a.jaccardScore);
338
410
 
339
411
  if (!hasEmb && !hasColEmb) {
340
- // Jaccard-only mode: normalize score to 0–100
341
- return jaccardList.slice(0, topK).map((r) => ({
342
- id: r.id,
343
- name: r.name,
344
- entityType: r.entityType,
345
- score: Math.round(r.jaccardScore * 100),
346
- overlapCount: r.overlap.count,
347
- overlapTotal: r.overlap.total,
348
- signals: { jaccard: r.jaccardScore },
349
- }));
412
+ // Jaccard + Levenshtein column match — no ONNX needed.
413
+ // Levenshtein catches abbreviations (cust_id ↔ customer_id) and containment
414
+ // (id ↔ order_id) that pure Jaccard misses.
415
+ const levList = references.map((ref) => {
416
+ const { score, mappings } = levenshteinColumnMatch(candidate.headers, ref.headers);
417
+ const columnMappings = mappings.map((m) => ({
418
+ candidateCol: candidate.headers[m.candIdx],
419
+ referenceCol: ref.headers[m.refIdx],
420
+ similarity: m.sim,
421
+ }));
422
+ return { id: ref.id, levScore: score, columnMappings };
423
+ }).sort((a, b) => b.levScore - a.levScore);
424
+
425
+ const fused = mergeRRF([
426
+ jaccardList.map((r) => ({ id: r.id })),
427
+ levList.map((r) => ({ id: r.id })),
428
+ ], topK);
429
+
430
+ const jaccardMap = new Map(jaccardList.map((r) => [r.id, r]));
431
+ const levMap = new Map(levList.map((r) => [r.id, r]));
432
+ const refById = new Map(references.map((r) => [r.id, r]));
433
+
434
+ const W_JAC = 0.40, W_LEV = 0.60;
435
+
436
+ return fused.map((r) => {
437
+ const j = jaccardMap.get(r.id) || { overlap: { count: 0, total: 0 }, jaccardScore: 0 };
438
+ const l = levMap.get(r.id) || { levScore: 0, columnMappings: [] };
439
+ const ref = refById.get(r.id);
440
+ const quality = W_JAC * j.jaccardScore + W_LEV * l.levScore;
441
+ return {
442
+ id: r.id,
443
+ name: ref?.name || r.id,
444
+ entityType: ref?.entityType,
445
+ score: Math.round(quality * 100),
446
+ overlapCount: j.overlap.count,
447
+ overlapTotal: j.overlap.total,
448
+ columnMappings: l.columnMappings,
449
+ signals: { jaccard: j.jaccardScore, levenshtein: l.levScore },
450
+ };
451
+ });
350
452
  }
351
453
 
352
454
  // ── Column-level pipeline (v3) ──────────────────────────────────────────
@@ -373,6 +475,22 @@ function _rankWithColumnEmbeddings(candidate, references, jaccardList, topK) {
373
475
 
374
476
  if (candColVecs.length > 0 && refColVecs.length > 0) {
375
477
  const simMatrix = buildSimilarityMatrix(candColVecs, refColVecs);
478
+
479
+ // Containment boost: a single-token candidate that equals the last segment of a
480
+ // compound reference name is almost certainly the right match (e.g. "id" → "order_id",
481
+ // "date" → "order_date"). Boost its score so the greedy assigns it before a weaker
482
+ // multi-token candidate (e.g. "order_status") can claim the reference field.
483
+ for (let i = 0; i < candidate.headers.length; i++) {
484
+ const cParts = candidate.headers[i].toLowerCase().split(/[_\-\s]+/).filter(Boolean);
485
+ if (cParts.length !== 1) continue; // only single-token names
486
+ for (let j = 0; j < ref.headers.length; j++) {
487
+ const rParts = ref.headers[j].toLowerCase().split(/[_\-\s]+/).filter(Boolean);
488
+ if (rParts.length > 1 && rParts[rParts.length - 1] === cParts[0]) {
489
+ simMatrix[i][j] = Math.min(1, simMatrix[i][j] + 0.15);
490
+ }
491
+ }
492
+ }
493
+
376
494
  colMatch = greedyColumnMatch(simMatrix);
377
495
  typeCompat = typeCompatibilityScore(
378
496
  candidate.types, ref.types,
@@ -136,6 +136,9 @@ export function formatColumnMapping(mappings, candHeaders, refHeaders, candLabel
136
136
  if (m.candidateCol.toLowerCase() === m.referenceCol.toLowerCase()) {
137
137
  matchLabel = "exact";
138
138
  color = chalk.green;
139
+ } else if (m.similarity == null) {
140
+ matchLabel = "manual";
141
+ color = chalk.cyan;
139
142
  } else if (m.similarity >= 0.85) {
140
143
  matchLabel = m.similarity.toFixed(2);
141
144
  color = chalk.green;
@@ -1527,8 +1527,19 @@ class AppDelegate: NSObject, NSApplicationDelegate, NSMenuDelegate {
1527
1527
  }
1528
1528
 
1529
1529
  @objc func runUpdate() {
1530
+ updateItem.title = "⬆ Updating…"
1531
+ updateItem.isEnabled = false
1530
1532
  let npm = "/opt/homebrew/bin/npm"
1531
- openTerminal(command: npm + " install -g @meshxdata/fops")
1533
+ let p = Process()
1534
+ p.executableURL = URL(fileURLWithPath: npm)
1535
+ p.arguments = ["install", "-g", "@meshxdata/fops"]
1536
+ p.terminationHandler = { _ in
1537
+ DispatchQueue.main.async {
1538
+ updateItem.title = "✓ Updated — restart tray to apply"
1539
+ updateItem.isEnabled = false
1540
+ }
1541
+ }
1542
+ try? p.run()
1532
1543
  }
1533
1544
 
1534
1545
  // Rebuild Compose submenu each time it opens
@@ -90,17 +90,21 @@ function sleep(ms) {
90
90
  return new Promise((resolve) => setTimeout(resolve, ms));
91
91
  }
92
92
 
93
- /** Like request() but retries on network errors and 5xx responses. */
93
+ /** Like request() but retries on network errors and 5xx responses.
94
+ * Non-idempotent methods (POST, PUT, PATCH, DELETE) are never retried on 5xx
95
+ * to avoid duplicate writes or zombie state (e.g. partial Iceberg table creation). */
94
96
  async function requestWithRetries(method, url, headers, body, timeoutMs, maxRetries = RETRY_COUNT, delayMs = RETRY_DELAY_MS) {
95
- for (let attempt = 1; attempt <= maxRetries; attempt++) {
97
+ const idempotent = method === "GET" || method === "HEAD" || method === "OPTIONS";
98
+ const effectiveMaxRetries = idempotent ? maxRetries : 1;
99
+ for (let attempt = 1; attempt <= effectiveMaxRetries; attempt++) {
96
100
  let res;
97
101
  try {
98
102
  res = await request(method, url, headers, body, timeoutMs);
99
103
  } catch (e) {
100
- if (attempt < maxRetries) { await sleep(delayMs); continue; }
101
- throw new Error(`Request failed after ${maxRetries} attempts: ${e.message}`);
104
+ if (attempt < effectiveMaxRetries) { await sleep(delayMs); continue; }
105
+ throw new Error(`Request failed after ${attempt} attempts: ${e.message}`);
102
106
  }
103
- if (res.status >= 500 && attempt < maxRetries) {
107
+ if (res.status >= 500 && attempt < effectiveMaxRetries) {
104
108
  await sleep(delayMs);
105
109
  continue;
106
110
  }
@@ -1,6 +1,38 @@
1
1
  import { parseListResponse } from "../../../../fops-plugin-foundation/lib/api-spec.js";
2
2
 
3
3
  export const dataProductResolvers = {
4
+ Mutation: {
5
+ async updateDataProductSchema(_root, { identifier, input }, { client }) {
6
+ const body = {
7
+ details: {
8
+ ...(input.dataProductType != null && { data_product_type: input.dataProductType }),
9
+ fields: input.fields.map((f) => ({
10
+ name: f.name,
11
+ description: f.description ?? null,
12
+ primary: f.primary ?? false,
13
+ optional: f.optional ?? false,
14
+ data_type: {
15
+ meta: {},
16
+ column_type: f.dataType ?? "VARCHAR",
17
+ },
18
+ classification: f.classification ?? "internal",
19
+ sensitivity: f.sensitivity ?? null,
20
+ tags: f.tags ?? [],
21
+ })),
22
+ },
23
+ };
24
+ const res = await client.put(`/data/data_product/schema?identifier=${identifier}`, body);
25
+ const details = res?.details ?? res?.schema ?? res ?? null;
26
+ const columns = (res?.columns || res?.fields || input.fields).map((f) => ({
27
+ name: f.name || f.column_name,
28
+ dataType: f.data_type?.column_type ?? f.dataType ?? null,
29
+ primary: f.primary ?? false,
30
+ nullable: !(f.optional ?? false),
31
+ }));
32
+ return { details, columns };
33
+ },
34
+ },
35
+
4
36
  Query: {
5
37
  async dataProducts(_root, _args, { client }) {
6
38
  const res = await client.get("/data/data_product/list?per_page=200");
@@ -1,11 +1,30 @@
1
1
  /**
2
2
  * GraphQL SDL for the Foundation data mesh API.
3
- * Read-only in v1 — no mutations.
4
3
  */
5
4
 
6
5
  export const typeDefs = /* GraphQL */ `
7
6
  scalar JSON
8
7
 
8
+ type Mutation {
9
+ updateDataProductSchema(identifier: ID!, input: DataProductSchemaInput!): DataProductSchema
10
+ }
11
+
12
+ input DataProductSchemaInput {
13
+ dataProductType: String
14
+ fields: [SchemaFieldInput!]!
15
+ }
16
+
17
+ input SchemaFieldInput {
18
+ name: String!
19
+ description: String
20
+ primary: Boolean
21
+ optional: Boolean
22
+ dataType: String
23
+ classification: String
24
+ sensitivity: String
25
+ tags: [String!]
26
+ }
27
+
9
28
  type Query {
10
29
  meshes: [Mesh!]!
11
30
  mesh(identifier: ID!): Mesh