@meshxdata/fops 0.1.32 → 0.1.34

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of @meshxdata/fops might be problematic. Click here for more details.

Files changed (23) hide show
  1. package/CHANGELOG.md +184 -0
  2. package/package.json +1 -2
  3. package/src/commands/lifecycle.js +16 -0
  4. package/src/plugins/bundled/fops-plugin-embeddings/index.js +3 -1
  5. package/src/plugins/bundled/fops-plugin-embeddings/lib/indexer.js +1 -1
  6. package/src/plugins/bundled/fops-plugin-file/demo/landscape.yaml +67 -0
  7. package/src/plugins/bundled/fops-plugin-file/demo/orders_bad.csv +6 -0
  8. package/src/plugins/bundled/fops-plugin-file/demo/orders_good.csv +7 -0
  9. package/src/plugins/bundled/fops-plugin-file/demo/orders_reference.csv +6 -0
  10. package/src/plugins/bundled/fops-plugin-file/demo/orders_renamed.aligned.csv +6 -0
  11. package/src/plugins/bundled/fops-plugin-file/demo/orders_renamed.csv +6 -0
  12. package/src/plugins/bundled/fops-plugin-file/demo/rules.json +8 -0
  13. package/src/plugins/bundled/fops-plugin-file/demo/run.sh +110 -0
  14. package/src/plugins/bundled/fops-plugin-file/index.js +140 -24
  15. package/src/plugins/bundled/fops-plugin-file/lib/embed-index.js +7 -0
  16. package/src/plugins/bundled/fops-plugin-file/lib/match.js +11 -4
  17. package/src/plugins/bundled/fops-plugin-foundation/index.js +1574 -101
  18. package/src/plugins/bundled/fops-plugin-foundation/lib/align.js +42 -4
  19. package/src/plugins/bundled/fops-plugin-foundation/lib/apply.js +83 -41
  20. package/src/plugins/bundled/fops-plugin-foundation/lib/stack-apply.js +4 -1
  21. package/src/plugins/bundled/fops-plugin-foundation-graphql/index.js +39 -1
  22. package/src/plugins/bundled/fops-plugin-foundation-graphql/lib/graphql/resolvers/data-object.js +9 -6
  23. package/src/plugins/bundled/fops-plugin-foundation-graphql/lib/graphql/resolvers/data-product.js +9 -6
@@ -21,7 +21,7 @@ function cosineSimilarity(a, b) {
21
21
  return denom === 0 ? 0 : dot / denom;
22
22
  }
23
23
 
24
- function levenshteinRatio(a, b) {
24
+ export function levenshteinRatio(a, b) {
25
25
  const x = String(a || "");
26
26
  const y = String(b || "");
27
27
  if (x === y) return 1;
@@ -65,12 +65,50 @@ export async function alignColumns(sourceColumns, expectedColumns, { embedTexts,
65
65
  }
66
66
  }
67
67
 
68
+ // Helpers shared by embedding and Levenshtein passes
69
+ // Expand common opaque abbreviations so the embedding sees real words
70
+ const ABBREV = {
71
+ ccy: "currency code", cust: "customer", amt: "amount", qty: "quantity",
72
+ dt: "date", desc: "description", num: "number", addr: "address",
73
+ prod: "product", yr: "year", mo: "month", cnt: "count",
74
+ avg: "average", pct: "percent", dept: "department",
75
+ };
76
+ // Suffixes that strongly constrain a column's semantic role
77
+ const SEMANTIC_SUFFIXES = new Set([
78
+ "id", "date", "status", "name", "type", "code", "key",
79
+ "region", "currency", "amount", "count", "number",
80
+ ]);
81
+ const toHuman = (s) => {
82
+ const parts = normalizeColName(s).split("_").filter(Boolean);
83
+ return parts.map((p) => ABBREV[p] || p).join(" ");
84
+ };
85
+ const getSuffix = (s) => {
86
+ const parts = normalizeColName(s).split("_").filter(Boolean);
87
+ const last = parts[parts.length - 1];
88
+ return SEMANTIC_SUFFIXES.has(last) ? last : null;
89
+ };
90
+ // Adjust a raw similarity score by structural compatibility:
91
+ // containment: source name IS the last token of expected → +0.30 (e.g. "id" → "order_id")
92
+ // same suffix: both end with same known suffix → +0.10 (confirms alignment)
93
+ // suffix mismatch: both have different known suffixes → -0.15 (strong signal of mismatch)
94
+ const adjustScore = (raw, src, exp) => {
95
+ let score = raw;
96
+ const ss = getSuffix(src), es = getSuffix(exp);
97
+ if (ss && es) score = ss === es ? Math.min(1, score + 0.1) : Math.max(0, score - 0.15);
98
+ // Containment: "id" IS the last token of "order_id" → strong structural match
99
+ const normSrc = normalizeColName(src);
100
+ const expParts = normalizeColName(exp).split("_").filter(Boolean);
101
+ if (expParts.length > 1 && expParts[expParts.length - 1] === normSrc) {
102
+ score = Math.min(1, score + 0.3);
103
+ }
104
+ return score;
105
+ };
106
+
68
107
  // Pass 2 — Embedding (if embedTexts provided)
69
108
  if (typeof embedTexts === "function" && unmappedSrc.size > 0 && unmappedExp.size > 0) {
70
109
  try {
71
110
  const srcList = [...unmappedSrc];
72
111
  const expList = [...unmappedExp];
73
- const toHuman = (s) => normalizeColName(s).replace(/_/g, " ");
74
112
  const srcTexts = srcList.map(toHuman);
75
113
  const expTexts = expList.map(toHuman);
76
114
 
@@ -83,7 +121,7 @@ export async function alignColumns(sourceColumns, expectedColumns, { embedTexts,
83
121
  const candidates = [];
84
122
  for (let i = 0; i < srcList.length; i++) {
85
123
  for (let j = 0; j < expList.length; j++) {
86
- const score = cosineSimilarity(srcVecs[i], expVecs[j]);
124
+ const score = adjustScore(cosineSimilarity(srcVecs[i], expVecs[j]), srcList[i], expList[j]);
87
125
  if (score >= threshold) candidates.push({ i, j, score });
88
126
  }
89
127
  }
@@ -109,7 +147,7 @@ export async function alignColumns(sourceColumns, expectedColumns, { embedTexts,
109
147
  const candidates = [];
110
148
  for (const src of srcList) {
111
149
  for (const exp of expList) {
112
- const score = levenshteinRatio(normalizeColName(src), normalizeColName(exp));
150
+ const score = adjustScore(levenshteinRatio(normalizeColName(src), normalizeColName(exp)), src, exp);
113
151
  if (score >= threshold) candidates.push({ src, exp, score });
114
152
  }
115
153
  }
@@ -324,8 +324,9 @@ export function parseYamlContent(content) {
324
324
  throw err;
325
325
  }
326
326
  }
327
- if (!parsed || !parsed.mesh) {
328
- throw new Error("No mesh block found in YAML content");
327
+ const hasRootEntities = ENTITY_ORDER.slice(1).some((t) => parsed?.[t]);
328
+ if (!parsed || (!parsed.mesh && !hasRootEntities)) {
329
+ throw new Error("No mesh block or entity blocks found in YAML content");
329
330
  }
330
331
  return { operations: parseYamlOperations(parsed), fileName: "inline.yaml" };
331
332
  }
@@ -348,8 +349,9 @@ export function parseYamlLandscape(filePath) {
348
349
  }
349
350
  }
350
351
 
351
- if (!parsed || !parsed.mesh) {
352
- throw new Error("No mesh block found in YAML file");
352
+ const hasRootEntities = ENTITY_ORDER.slice(1).some((t) => parsed?.[t]);
353
+ if (!parsed || (!parsed.mesh && !hasRootEntities)) {
354
+ throw new Error("No mesh block or entity blocks found in YAML file");
353
355
  }
354
356
 
355
357
  return { operations: parseYamlOperations(parsed), fileName: path.basename(filePath) };
@@ -377,6 +379,43 @@ function parseYamlOperations(parsed) {
377
379
  ENTITY_ORDER.slice(1).map((type) => [type, parsed[type]]),
378
380
  );
379
381
 
382
+ function parseEntityBlock(entityType, entityName, eBlock, meshName) {
383
+ const props = {};
384
+ const refs = {};
385
+ const op = { type: entityType, name: entityName, props, mesh: meshName || null };
386
+
387
+ for (const [k, v] of Object.entries(eBlock)) {
388
+ if (k === "connection" && typeof v === "object" && !Array.isArray(v)) {
389
+ op.connection = v;
390
+ } else if (k === "config" && typeof v === "object" && !Array.isArray(v)) {
391
+ op.config = v;
392
+ } else if (k === "template") {
393
+ op.template = v;
394
+ } else if (k === "select_columns") {
395
+ op.selectColumns = v;
396
+ } else if (k === "cast_changes") {
397
+ op.castChanges = Array.isArray(v) ? v : [v];
398
+ } else if (k === "schema" && Array.isArray(v)) {
399
+ op.schema = v;
400
+ } else if (k === "pipeline" && typeof v === "object" && !Array.isArray(v)) {
401
+ op.pipeline = parseYamlPipeline(v);
402
+ } else if (k === "secret" && typeof v === "object" && !Array.isArray(v)) {
403
+ op.secret = v;
404
+ } else if (REF_TO_TYPE[k]) {
405
+ const ref = parseYamlRef(v);
406
+ if (ref) refs[k] = ref;
407
+ } else if (!SPECIAL_KEYS.has(k)) {
408
+ props[k] = v;
409
+ }
410
+ }
411
+
412
+ if (refs.system) op.parent = refs.system;
413
+ else if (refs.source) op.parent = refs.source;
414
+ else if (refs.object) op.parent = refs.object;
415
+
416
+ return op;
417
+ }
418
+
380
419
  for (const [meshName, meshBlock] of meshEntries) {
381
420
  if (!meshBlock || typeof meshBlock !== "object" || Array.isArray(meshBlock)) continue;
382
421
  const meshProps = {};
@@ -397,40 +436,19 @@ function parseYamlOperations(parsed) {
397
436
 
398
437
  for (const [entityName, eBlock] of Object.entries(group)) {
399
438
  if (!eBlock || typeof eBlock !== "object" || Array.isArray(eBlock)) continue;
400
- const props = {};
401
- const refs = {};
402
- const op = { type: entityType, name: entityName, props, mesh: meshName };
403
-
404
- for (const [k, v] of Object.entries(eBlock)) {
405
- if (k === "connection" && typeof v === "object" && !Array.isArray(v)) {
406
- op.connection = v;
407
- } else if (k === "config" && typeof v === "object" && !Array.isArray(v)) {
408
- op.config = v;
409
- } else if (k === "template") {
410
- op.template = v;
411
- } else if (k === "select_columns") {
412
- op.selectColumns = v;
413
- } else if (k === "cast_changes") {
414
- op.castChanges = Array.isArray(v) ? v : [v];
415
- } else if (k === "schema" && Array.isArray(v)) {
416
- op.schema = v;
417
- } else if (k === "pipeline" && typeof v === "object" && !Array.isArray(v)) {
418
- op.pipeline = parseYamlPipeline(v);
419
- } else if (k === "secret" && typeof v === "object" && !Array.isArray(v)) {
420
- op.secret = v;
421
- } else if (REF_TO_TYPE[k]) {
422
- const ref = parseYamlRef(v);
423
- if (ref) refs[k] = ref;
424
- } else if (!SPECIAL_KEYS.has(k)) {
425
- props[k] = v;
426
- }
427
- }
428
-
429
- if (refs.system) op.parent = refs.system;
430
- else if (refs.source) op.parent = refs.source;
431
- else if (refs.object) op.parent = refs.object;
439
+ operations.push(parseEntityBlock(entityType, entityName, eBlock, meshName));
440
+ }
441
+ }
442
+ }
432
443
 
433
- operations.push(op);
444
+ // Handle root-level entity blocks with no mesh context (e.g. UUID-keyed updates)
445
+ if (meshEntries.length === 0) {
446
+ for (const entityType of ENTITY_ORDER.slice(1)) {
447
+ const group = rootEntityGroups[entityType];
448
+ if (!group || typeof group !== "object") continue;
449
+ for (const [entityName, eBlock] of Object.entries(group)) {
450
+ if (!eBlock || typeof eBlock !== "object" || Array.isArray(eBlock)) continue;
451
+ operations.push(parseEntityBlock(entityType, entityName, eBlock, null));
434
452
  }
435
453
  }
436
454
  }
@@ -591,12 +609,21 @@ async function deleteEntity(client, type, identifier) {
591
609
  /**
592
610
  * Search for an existing entity by type and name. Returns its identifier or null.
593
611
  */
612
+ const UUID_RE = /^[0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12}$/i;
613
+
594
614
  async function findEntity(client, type, name) {
615
+ // If name is a UUID, look up directly by identifier
616
+ if (UUID_RE.test(name)) {
617
+ try {
618
+ const res = await client.get(`/data/${type}/${name}`);
619
+ if (res?.identifier) return res.identifier;
620
+ } catch { /* fall through */ }
621
+ }
595
622
  try {
596
623
  // Use type-specific list endpoint for reliable lookup
597
624
  const res = await client.get(`/data/${type}/list?per_page=100`);
598
625
  const entities = parseListResponse(res);
599
- const match = entities.find((e) => e.name === name);
626
+ const match = entities.find((e) => e.name === name || (UUID_RE.test(name) && e.identifier === name));
600
627
  if (match) return match.identifier;
601
628
  } catch { /* fall through */ }
602
629
  try {
@@ -891,7 +918,8 @@ async function runSubActions(client, op, uuid, refs) {
891
918
  */
892
919
  async function applyPipelineOp(client, op, uuid, refs) {
893
920
  const pipelineInputs = op.pipeline.inputs.map((inp) => {
894
- const inputUuid = refs[inp.ref];
921
+ // Allow raw UUIDs as refs (e.g. from FCL patch) — no lookup needed
922
+ const inputUuid = refs[inp.ref] ?? (UUID_RE.test(inp.ref) ? inp.ref : null);
895
923
  if (!inputUuid) throw new Error(`unresolved pipeline input ref: ${inp.ref}`);
896
924
  return { input_type: inp.type, identifier: inputUuid };
897
925
  });
@@ -899,7 +927,7 @@ async function applyPipelineOp(client, op, uuid, refs) {
899
927
  // Build refToKey map for resolving transform "other" refs
900
928
  const refToKey = {};
901
929
  for (const inp of op.pipeline.inputs) {
902
- const inputUuid = refs[inp.ref];
930
+ const inputUuid = refs[inp.ref] ?? (UUID_RE.test(inp.ref) ? inp.ref : null);
903
931
  if (inputUuid) refToKey[inp.ref] = `input_${inputUuid.replace(/-/g, "_")}`;
904
932
  }
905
933
 
@@ -931,6 +959,18 @@ async function applyPipelineOp(client, op, uuid, refs) {
931
959
  if (cols && cols.length > 0) slotCols[slot] = cols;
932
960
  }
933
961
  if (Object.keys(slotCols).length > 0) {
962
+ // Prune rename_column steps: remove entries whose source column doesn't exist
963
+ // in any slot (data is already named correctly — mapping was generated from a
964
+ // raw file whose column names differ from the ingested data object schema).
965
+ const allSlotCols = new Set(Object.values(slotCols).flat());
966
+ builderBody.transformations = (builderBody.transformations || []).flatMap((t) => {
967
+ if (t.transform !== "rename_column") return [t];
968
+ const changes = Object.fromEntries(
969
+ Object.entries(t.changes || {}).filter(([from]) => allSlotCols.has(from))
970
+ );
971
+ if (Object.keys(changes).length === 0) return []; // step is a no-op — drop it
972
+ return [{ ...t, changes }];
973
+ });
934
974
  const flowErr = validateTransformColumnFlow(slotCols, builderBody.transformations || []);
935
975
  if (flowErr) throw new Error(flowErr);
936
976
  }
@@ -998,7 +1038,9 @@ export async function applyLandscape(client, operations, opts = {}) {
998
1038
  const existing = await findEntity(client, op.type, op.name);
999
1039
  if (existing) {
1000
1040
  let recreateFallThrough = false;
1001
- if (recreateBroken && (op.type === "data_product" || op.type === "data_object" || op.type === "data_source")) {
1041
+ // When op.name is a UUID, we're targeting an existing entity by identifier — never delete/recreate.
1042
+ const opNameIsUuid = UUID_RE.test(op.name);
1043
+ if (!opNameIsUuid && recreateBroken && (op.type === "data_product" || op.type === "data_object" || op.type === "data_source")) {
1002
1044
  const broken = await isEntityBroken(client, op.type, existing, op);
1003
1045
  if (broken) {
1004
1046
  const deleted = await deleteEntity(client, op.type, existing);
@@ -70,10 +70,13 @@ export function parseStackFile(filePath) {
70
70
  const parsed = yaml.load(content);
71
71
  if (!parsed) return { stack: null, hasStack: false, hasMesh: false };
72
72
 
73
+ const ENTITY_TYPES = ["data_system", "data_source", "data_object", "data_product", "application"];
74
+ const hasMesh = !!parsed.mesh || ENTITY_TYPES.some((t) => !!parsed[t]);
75
+
73
76
  return {
74
77
  stack: parsed.stack ? parseStackBlock(parsed.stack) : null,
75
78
  hasStack: !!parsed.stack,
76
- hasMesh: !!parsed.mesh,
79
+ hasMesh,
77
80
  };
78
81
  }
79
82
 
@@ -81,7 +81,7 @@ export function register(api) {
81
81
  },
82
82
  });
83
83
 
84
- // ─── CLI Command: fops foundation query ──────────────────────────────────
84
+ // ─── CLI Commands: fops foundation graphql / query ───────────────────────
85
85
  api.registerCommand((program) => {
86
86
  // Find or create the 'foundation' parent command
87
87
  let foundation = program.commands.find((c) => c.name() === "foundation");
@@ -89,6 +89,44 @@ export function register(api) {
89
89
  foundation = program.command("foundation").description("Foundation platform operations");
90
90
  }
91
91
 
92
+ foundation
93
+ .command("graphql")
94
+ .description("Start a local GraphiQL explorer for the Foundation GraphQL API")
95
+ .option("--port <port>", "Port to listen on (default: random)", "0")
96
+ .action(async (opts) => {
97
+ try {
98
+ const facade = await getFacade();
99
+ const { createGraphQLRoute } = await import("./lib/graphql/hono-route.js");
100
+ const { serve } = await import("@hono/node-server");
101
+ const { exec } = await import("node:child_process");
102
+
103
+ const route = createGraphQLRoute(facade);
104
+ const port = parseInt(opts.port, 10) || 0;
105
+
106
+ const server = serve({ fetch: route.fetch, port }, (info) => {
107
+ const url = `http://127.0.0.1:${info.port}`;
108
+ console.log(chalk.cyan(`\n ── Foundation GraphQL Explorer ${"─".repeat(18)}`));
109
+ console.log(` ${chalk.green("✓")} Listening on ${chalk.bold(url)}`);
110
+ console.log(chalk.dim(" Press Ctrl+C to stop\n"));
111
+
112
+ // Open browser with platform-specific command
113
+ const opener =
114
+ process.platform === "darwin" ? "open" :
115
+ process.platform === "win32" ? "start" : "xdg-open";
116
+ exec(`${opener} ${url}`);
117
+ });
118
+
119
+ // Keep alive until Ctrl+C
120
+ await new Promise((resolve) => {
121
+ process.on("SIGINT", () => { server.close(); resolve(); });
122
+ process.on("SIGTERM", () => { server.close(); resolve(); });
123
+ });
124
+ } catch (err) {
125
+ console.error(chalk.red(` ${err.message}`));
126
+ process.exitCode = 1;
127
+ }
128
+ });
129
+
92
130
  foundation
93
131
  .command("query <graphql>")
94
132
  .description("Execute an ad-hoc GraphQL query against the Foundation API")
@@ -28,12 +28,15 @@ export const dataObjectResolvers = {
28
28
  // Normalize: the API may return { fields: [...] } or { schema: { fields: [...] } }
29
29
  const fields = res?.fields || res?.schema?.fields || [];
30
30
  return {
31
- fields: fields.map((f) => ({
32
- name: f.name || f.column_name,
33
- dataType: f.data_type || f.dataType || f.type,
34
- primary: f.primary ?? f.is_primary ?? false,
35
- nullable: f.nullable ?? f.is_nullable ?? true,
36
- })),
31
+ fields: fields.map((f) => {
32
+ const raw = f.data_type || f.dataType || f.type;
33
+ return {
34
+ name: f.name || f.column_name,
35
+ dataType: raw && typeof raw === "object" ? raw.column_type ?? null : raw ?? null,
36
+ primary: f.primary ?? f.is_primary ?? false,
37
+ nullable: f.nullable ?? f.is_nullable ?? true,
38
+ };
39
+ }),
37
40
  };
38
41
  } catch { return null; }
39
42
  },
@@ -29,12 +29,15 @@ export const dataProductResolvers = {
29
29
  const res = await loaders.dataProductSchema.load(id);
30
30
  if (res instanceof Error) return null;
31
31
  const details = res?.details || res?.schema || res;
32
- const columns = (res?.columns || res?.fields || []).map((f) => ({
33
- name: f.name || f.column_name,
34
- dataType: f.data_type || f.dataType || f.type,
35
- primary: f.primary ?? f.is_primary ?? false,
36
- nullable: f.nullable ?? f.is_nullable ?? true,
37
- }));
32
+ const columns = (res?.columns || res?.fields || []).map((f) => {
33
+ const raw = f.data_type || f.dataType || f.type;
34
+ return {
35
+ name: f.name || f.column_name,
36
+ dataType: raw && typeof raw === "object" ? raw.column_type ?? null : raw ?? null,
37
+ primary: f.primary ?? f.is_primary ?? false,
38
+ nullable: f.nullable ?? f.is_nullable ?? true,
39
+ };
40
+ });
38
41
  return { details, columns };
39
42
  } catch { return null; }
40
43
  },