npm - @meshxdata/fops - Versions diffs - 0.1.35 → 0.1.37 - Mend

@meshxdata/fops 0.1.35 → 0.1.37

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (21) hide show

package/src/plugins/bundled/fops-plugin-file/demo/orders_renamed.aligned.csv CHANGED Viewed

@@ -1,4 +1,4 @@
-id,customer_id,price,ccy,date,order_status,geo_region
+order_id,customer_id,price,ccy,order_date,status,region
 4001,8001,99.00,USD,2024-03-01,completed,north_america
 4002,8002,145.75,EUR,2024-03-02,pending,europe
 4003,8003,280.00,USD,2024-03-03,completed,asia_pacific

package/src/plugins/bundled/fops-plugin-file/index.js CHANGED Viewed

@@ -447,20 +447,89 @@ export function register(api) {
           }
           // ── 9. Show column mapping (if available) ──────────────────────
-          if (selectedRef.columnMappings?.length > 0) {
+          let currentMappings = [...(selectedRef.columnMappings || [])];
+          if (currentMappings.length > 0) {
             const refData2 = references.find((r) => r.id === selectedRef.id);
             if (refData2) {
               const refTag = `${selectedRef.name} (${selectedRef.id.slice(0, 8)})`;
-              const colMapOutput = formatColumnMapping(
-                selectedRef.columnMappings,
-                candResult.headers,
-                refData2.headers,
-                pathMod.basename(candidate),
-                refTag,
-                chalk,
-              );
-              console.log("");
-              console.log(colMapOutput);
+              const showMappingTable = () => {
+                console.log("");
+                console.log(formatColumnMapping(
+                  currentMappings,
+                  candResult.headers,
+                  refData2.headers,
+                  pathMod.basename(candidate),
+                  refTag,
+                  chalk,
+                ));
+              };
+              showMappingTable();
+              // ── 9b. Interactive mapping adjustment (TTY only) ──────────
+              if (isTTY && !opts.json) {
+                let _selectOpt;
+                const _cpPath = api.getCliPath?.("src", "ui", "confirm.js");
+                if (_cpPath) {
+                  const { pathToFileURL: _pu } = await import("node:url");
+                  ({ selectOption: _selectOpt } = await import(_pu(_cpPath).href));
+                } else {
+                  ({ selectOption: _selectOpt } = await import("../../../ui/confirm.js"));
+                }
+                let adjusting = true;
+                while (adjusting) {
+                  const choice = await _selectOpt("Column mappings:", [
+                    { label: "Continue to validation", value: "continue" },
+                    { label: "Adjust a mapping", value: "adjust" },
+                  ]);
+                  if (choice !== "adjust") { adjusting = false; break; }
+                  // Step 1: pick candidate column to remap
+                  const mappedCandSet = new Set(currentMappings.map((m) => m.candidateCol));
+                  const candOptions = [
+                    ...currentMappings.map((m) => {
+                      const sim = m.similarity == null ? "manual"
+                        : m.candidateCol.toLowerCase() === m.referenceCol.toLowerCase() ? "exact"
+                        : m.similarity.toFixed(2);
+                      return { label: `${m.candidateCol.padEnd(22)} → ${m.referenceCol}  (${sim})`, value: m.candidateCol };
+                    }),
+                    ...candResult.headers
+                      .filter((h) => !mappedCandSet.has(h))
+                      .map((h) => ({ label: `${h.padEnd(22)} → (unmapped)`, value: h })),
+                  ];
+                  const chosenCand = await _selectOpt("Which column to remap:", candOptions);
+                  if (chosenCand == null) continue;
+                  // Step 2: pick reference field — show all, marking already-mapped ones
+                  const refFieldOwner = new Map(
+                    currentMappings.filter((m) => m.candidateCol !== chosenCand).map((m) => [m.referenceCol, m.candidateCol]),
+                  );
+                  const refFieldOptions = [
+                    ...refData2.headers.map((h) => {
+                      const owner = refFieldOwner.get(h);
+                      return owner
+                        ? { label: `${h}  (currently: ${owner})`, value: h }
+                        : { label: h, value: h };
+                    }),
+                    { label: "— leave unmapped —", value: "__none__" },
+                  ];
+                  const chosenRef = await _selectOpt(`Map "${chosenCand}" to:`, refFieldOptions);
+                  if (chosenRef == null) continue;
+                  // Apply — remove chosen candidate's old mapping and displace any
+                  // existing owner of the target reference field
+                  currentMappings = currentMappings.filter(
+                    (m) => m.candidateCol !== chosenCand && m.referenceCol !== chosenRef,
+                  );
+                  if (chosenRef !== "__none__") {
+                    currentMappings.push({ candidateCol: chosenCand, referenceCol: chosenRef, similarity: null });
+                  }
+                  showMappingTable();
+                }
+              }
             }
           }
@@ -481,7 +550,7 @@ export function register(api) {
           // names to their matched reference names so schema/type checks are meaningful.
           let candHeadersForValidation = candResult.headers;
           let candTypesForValidation = candResult.types;
-          const colMap = selectedRef.columnMappings;
+          const colMap = currentMappings;
           if (colMap?.length > 0) {
             const translatedHeaders = [];
             const translatedTypes = {};

package/src/plugins/bundled/fops-plugin-file/lib/match.js CHANGED Viewed

@@ -44,6 +44,69 @@ export function buildFingerprint({ headers, types, name }) {
   return { columnText, typeText, nameText };
 }
+// ── Levenshtein ─────────────────────────────────────────────────────────────
+function _levenshteinDist(a, b) {
+  const m = a.length, n = b.length;
+  const dp = Array.from({ length: m + 1 }, () => new Array(n + 1).fill(0));
+  for (let i = 0; i <= m; i++) dp[i][0] = i;
+  for (let j = 0; j <= n; j++) dp[0][j] = j;
+  for (let i = 1; i <= m; i++) {
+    for (let j = 1; j <= n; j++) {
+      const cost = a[i - 1] === b[j - 1] ? 0 : 1;
+      dp[i][j] = Math.min(dp[i - 1][j] + 1, dp[i][j - 1] + 1, dp[i - 1][j - 1] + cost);
+    }
+  }
+  return dp[m][n];
+}
+function _levenshteinRatio(a, b) {
+  if (a === b) return 1;
+  if (!a || !b) return 0;
+  return 1 - _levenshteinDist(a, b) / Math.max(a.length, b.length);
+}
+/**
+ * Greedy bipartite column matching using Levenshtein ratio + containment boost.
+ * Same output shape as greedyColumnMatch so callers are interchangeable.
+ *
+ * Containment boost: single-token candidate = last segment of a compound reference
+ * (e.g. "id" → "order_id", "date" → "order_date") gets +0.30 so it beats
+ * weaker multi-token candidates claiming the same field.
+ *
+ * @param {string[]} candHeaders
+ * @param {string[]} refHeaders
+ * @param {object} [opts]
+ * @param {number} [opts.minSim=0.45]
+ * @returns {{score: number, mappings: Array<{candIdx: number, refIdx: number, sim: number}>}}
+ */
+export function levenshteinColumnMatch(candHeaders, refHeaders, opts = {}) {
+  const minSim = opts.minSim ?? 0.45;
+  const M = candHeaders.length, N = refHeaders.length;
+  if (M === 0 || N === 0) return { score: 0, mappings: [] };
+  const norm = (s) => s.toLowerCase().replace(/[_\-\s]+/g, "_");
+  const simMatrix = [];
+  for (let i = 0; i < M; i++) {
+    simMatrix[i] = [];
+    const cn = norm(candHeaders[i]);
+    const cParts = cn.split("_").filter(Boolean);
+    for (let j = 0; j < N; j++) {
+      const rn = norm(refHeaders[j]);
+      const rParts = rn.split("_").filter(Boolean);
+      let sim = _levenshteinRatio(cn, rn);
+      // Containment: single-token candidate IS the last segment of the reference
+      if (cParts.length === 1 && rParts.length > 1 && rParts[rParts.length - 1] === cParts[0]) {
+        sim = Math.min(1, sim + 0.30);
+      }
+      simMatrix[i][j] = sim;
+    }
+  }
+  return greedyColumnMatch(simMatrix, { minSim });
+}
 // ── Jaccard Overlap ─────────────────────────────────────────────────────────
 /**
@@ -167,8 +230,11 @@ function normalizeColumnName(name) {
     .map((p) => p.toLowerCase());
   if (parts.length === 0) return name || "";
   if (parts.length === 1) return parts[0];
-  // Repeat the last token to emphasise the semantic role over the entity prefix
-  return parts.join(" ") + " " + parts[parts.length - 1];
+  // Repeat the last token 3 extra times so the semantic role (suffix) dominates
+  // over the entity prefix. e.g. "order_status" → "order status status status status"
+  // vs "order_id" → "order id id id id" — "status" and "id" are now discriminating.
+  const last = parts[parts.length - 1];
+  return parts.join(" ") + " " + last + " " + last + " " + last;
 }
 /**
@@ -194,21 +260,27 @@ export function buildSimilarityMatrix(candVecs, refVecs) {
 /**
  * Greedy bipartite matching on a similarity matrix.
  * Sort all (i,j,sim) triples descending, greedily assign unmatched pairs.
+ * Pairs below minSim are left unmatched (shown as unmapped in the display).
  *
  * @param {number[][]} simMatrix - M×N cosine similarity matrix
+ * @param {object} [opts]
+ * @param {number} [opts.minSim=0.45] - Minimum similarity to accept a pairing
  * @returns {{score: number, mappings: Array<{candIdx: number, refIdx: number, sim: number}>}}
  */
-export function greedyColumnMatch(simMatrix) {
+export function greedyColumnMatch(simMatrix, opts = {}) {
+  const minSim = opts.minSim ?? 0.45;
   const M = simMatrix.length;
   if (M === 0) return { score: 0, mappings: [] };
   const N = simMatrix[0].length;
   if (N === 0) return { score: 0, mappings: [] };
-  // Collect all pairs
+  // Collect all pairs above the minimum threshold
   const pairs = [];
   for (let i = 0; i < M; i++) {
     for (let j = 0; j < N; j++) {
-      pairs.push({ candIdx: i, refIdx: j, sim: simMatrix[i][j] });
+      if (simMatrix[i][j] >= minSim) {
+        pairs.push({ candIdx: i, refIdx: j, sim: simMatrix[i][j] });
+      }
     }
   }
   pairs.sort((a, b) => b.sim - a.sim);
@@ -337,16 +409,46 @@ export function rankMatches(candidate, references, opts = {}) {
     .sort((a, b) => b.jaccardScore - a.jaccardScore);
   if (!hasEmb && !hasColEmb) {
-    // Jaccard-only mode: normalize score to 0–100
-    return jaccardList.slice(0, topK).map((r) => ({
-      id: r.id,
-      name: r.name,
-      entityType: r.entityType,
-      score: Math.round(r.jaccardScore * 100),
-      overlapCount: r.overlap.count,
-      overlapTotal: r.overlap.total,
-      signals: { jaccard: r.jaccardScore },
-    }));
+    // Jaccard + Levenshtein column match — no ONNX needed.
+    // Levenshtein catches abbreviations (cust_id ↔ customer_id) and containment
+    // (id ↔ order_id) that pure Jaccard misses.
+    const levList = references.map((ref) => {
+      const { score, mappings } = levenshteinColumnMatch(candidate.headers, ref.headers);
+      const columnMappings = mappings.map((m) => ({
+        candidateCol: candidate.headers[m.candIdx],
+        referenceCol: ref.headers[m.refIdx],
+        similarity: m.sim,
+      }));
+      return { id: ref.id, levScore: score, columnMappings };
+    }).sort((a, b) => b.levScore - a.levScore);
+    const fused = mergeRRF([
+      jaccardList.map((r) => ({ id: r.id })),
+      levList.map((r) => ({ id: r.id })),
+    ], topK);
+    const jaccardMap = new Map(jaccardList.map((r) => [r.id, r]));
+    const levMap = new Map(levList.map((r) => [r.id, r]));
+    const refById = new Map(references.map((r) => [r.id, r]));
+    const W_JAC = 0.40, W_LEV = 0.60;
+    return fused.map((r) => {
+      const j = jaccardMap.get(r.id) || { overlap: { count: 0, total: 0 }, jaccardScore: 0 };
+      const l = levMap.get(r.id) || { levScore: 0, columnMappings: [] };
+      const ref = refById.get(r.id);
+      const quality = W_JAC * j.jaccardScore + W_LEV * l.levScore;
+      return {
+        id: r.id,
+        name: ref?.name || r.id,
+        entityType: ref?.entityType,
+        score: Math.round(quality * 100),
+        overlapCount: j.overlap.count,
+        overlapTotal: j.overlap.total,
+        columnMappings: l.columnMappings,
+        signals: { jaccard: j.jaccardScore, levenshtein: l.levScore },
+      };
+    });
   }
   // ── Column-level pipeline (v3) ──────────────────────────────────────────
@@ -373,6 +475,22 @@ function _rankWithColumnEmbeddings(candidate, references, jaccardList, topK) {
     if (candColVecs.length > 0 && refColVecs.length > 0) {
       const simMatrix = buildSimilarityMatrix(candColVecs, refColVecs);
+      // Containment boost: a single-token candidate that equals the last segment of a
+      // compound reference name is almost certainly the right match (e.g. "id" → "order_id",
+      // "date" → "order_date"). Boost its score so the greedy assigns it before a weaker
+      // multi-token candidate (e.g. "order_status") can claim the reference field.
+      for (let i = 0; i < candidate.headers.length; i++) {
+        const cParts = candidate.headers[i].toLowerCase().split(/[_\-\s]+/).filter(Boolean);
+        if (cParts.length !== 1) continue; // only single-token names
+        for (let j = 0; j < ref.headers.length; j++) {
+          const rParts = ref.headers[j].toLowerCase().split(/[_\-\s]+/).filter(Boolean);
+          if (rParts.length > 1 && rParts[rParts.length - 1] === cParts[0]) {
+            simMatrix[i][j] = Math.min(1, simMatrix[i][j] + 0.15);
+          }
+        }
+      }
       colMatch = greedyColumnMatch(simMatrix);
       typeCompat = typeCompatibilityScore(
         candidate.types, ref.types,

package/src/plugins/bundled/fops-plugin-file/lib/report.js CHANGED Viewed

@@ -136,6 +136,9 @@ export function formatColumnMapping(mappings, candHeaders, refHeaders, candLabel
     if (m.candidateCol.toLowerCase() === m.referenceCol.toLowerCase()) {
       matchLabel = "exact";
       color = chalk.green;
+    } else if (m.similarity == null) {
+      matchLabel = "manual";
+      color = chalk.cyan;
     } else if (m.similarity >= 0.85) {
       matchLabel = m.similarity.toFixed(2);
       color = chalk.green;

package/src/plugins/bundled/fops-plugin-foundation/index.js CHANGED Viewed

@@ -1527,8 +1527,19 @@ class AppDelegate: NSObject, NSApplicationDelegate, NSMenuDelegate {
     }
     @objc func runUpdate() {
+        updateItem.title = "⬆ Updating…"
+        updateItem.isEnabled = false
         let npm = "/opt/homebrew/bin/npm"
-        openTerminal(command: npm + " install -g @meshxdata/fops")
+        let p = Process()
+        p.executableURL = URL(fileURLWithPath: npm)
+        p.arguments = ["install", "-g", "@meshxdata/fops"]
+        p.terminationHandler = { _ in
+            DispatchQueue.main.async {
+                updateItem.title = "✓ Updated — restart tray to apply"
+                updateItem.isEnabled = false
+            }
+        }
+        try? p.run()
     }
     // Rebuild Compose submenu each time it opens

package/src/plugins/bundled/fops-plugin-foundation/lib/client.js CHANGED Viewed

@@ -90,17 +90,21 @@ function sleep(ms) {
   return new Promise((resolve) => setTimeout(resolve, ms));
 }
-/** Like request() but retries on network errors and 5xx responses. */
+/** Like request() but retries on network errors and 5xx responses.
+ *  Non-idempotent methods (POST, PUT, PATCH, DELETE) are never retried on 5xx
+ *  to avoid duplicate writes or zombie state (e.g. partial Iceberg table creation). */
 async function requestWithRetries(method, url, headers, body, timeoutMs, maxRetries = RETRY_COUNT, delayMs = RETRY_DELAY_MS) {
-  for (let attempt = 1; attempt <= maxRetries; attempt++) {
+  const idempotent = method === "GET" || method === "HEAD" || method === "OPTIONS";
+  const effectiveMaxRetries = idempotent ? maxRetries : 1;
+  for (let attempt = 1; attempt <= effectiveMaxRetries; attempt++) {
     let res;
     try {
       res = await request(method, url, headers, body, timeoutMs);
     } catch (e) {
-      if (attempt < maxRetries) { await sleep(delayMs); continue; }
-      throw new Error(`Request failed after ${maxRetries} attempts: ${e.message}`);
+      if (attempt < effectiveMaxRetries) { await sleep(delayMs); continue; }
+      throw new Error(`Request failed after ${attempt} attempts: ${e.message}`);
     }
-    if (res.status >= 500 && attempt < maxRetries) {
+    if (res.status >= 500 && attempt < effectiveMaxRetries) {
       await sleep(delayMs);
       continue;
     }

package/src/plugins/bundled/fops-plugin-foundation-graphql/lib/graphql/resolvers/data-product.js CHANGED Viewed

@@ -1,6 +1,38 @@
 import { parseListResponse } from "../../../../fops-plugin-foundation/lib/api-spec.js";
 export const dataProductResolvers = {
+  Mutation: {
+    async updateDataProductSchema(_root, { identifier, input }, { client }) {
+      const body = {
+        details: {
+          ...(input.dataProductType != null && { data_product_type: input.dataProductType }),
+          fields: input.fields.map((f) => ({
+            name: f.name,
+            description: f.description ?? null,
+            primary: f.primary ?? false,
+            optional: f.optional ?? false,
+            data_type: {
+              meta: {},
+              column_type: f.dataType ?? "VARCHAR",
+            },
+            classification: f.classification ?? "internal",
+            sensitivity: f.sensitivity ?? null,
+            tags: f.tags ?? [],
+          })),
+        },
+      };
+      const res = await client.put(`/data/data_product/schema?identifier=${identifier}`, body);
+      const details = res?.details ?? res?.schema ?? res ?? null;
+      const columns = (res?.columns || res?.fields || input.fields).map((f) => ({
+        name: f.name || f.column_name,
+        dataType: f.data_type?.column_type ?? f.dataType ?? null,
+        primary: f.primary ?? false,
+        nullable: !(f.optional ?? false),
+      }));
+      return { details, columns };
+    },
+  },
   Query: {
     async dataProducts(_root, _args, { client }) {
       const res = await client.get("/data/data_product/list?per_page=200");

package/src/plugins/bundled/fops-plugin-foundation-graphql/lib/graphql/schema.js CHANGED Viewed

@@ -1,11 +1,30 @@
 /**
  * GraphQL SDL for the Foundation data mesh API.
- * Read-only in v1 — no mutations.
  */
 export const typeDefs = /* GraphQL */ `
   scalar JSON
+  type Mutation {
+    updateDataProductSchema(identifier: ID!, input: DataProductSchemaInput!): DataProductSchema
+  }
+  input DataProductSchemaInput {
+    dataProductType: String
+    fields: [SchemaFieldInput!]!
+  }
+  input SchemaFieldInput {
+    name: String!
+    description: String
+    primary: Boolean
+    optional: Boolean
+    dataType: String
+    classification: String
+    sensitivity: String
+    tags: [String!]
+  }
   type Query {
     meshes: [Mesh!]!
     mesh(identifier: ID!): Mesh