@meshxdata/fops 0.1.32 → 0.1.35

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (29) hide show
  1. package/CHANGELOG.md +372 -0
  2. package/package.json +1 -2
  3. package/src/agent/llm.js +3 -3
  4. package/src/commands/lifecycle.js +16 -0
  5. package/src/plugins/bundled/fops-plugin-dai-ttyd/fops.plugin.json +6 -0
  6. package/src/plugins/bundled/fops-plugin-dai-ttyd/index.js +182 -0
  7. package/src/plugins/bundled/fops-plugin-dai-ttyd/lib/client.js +164 -0
  8. package/src/plugins/bundled/fops-plugin-dai-ttyd/package.json +1 -0
  9. package/src/plugins/bundled/fops-plugin-embeddings/index.js +3 -1
  10. package/src/plugins/bundled/fops-plugin-embeddings/lib/indexer.js +1 -1
  11. package/src/plugins/bundled/fops-plugin-file/demo/landscape.yaml +67 -0
  12. package/src/plugins/bundled/fops-plugin-file/demo/orders_bad.csv +6 -0
  13. package/src/plugins/bundled/fops-plugin-file/demo/orders_good.csv +7 -0
  14. package/src/plugins/bundled/fops-plugin-file/demo/orders_reference.csv +6 -0
  15. package/src/plugins/bundled/fops-plugin-file/demo/orders_renamed.aligned.csv +6 -0
  16. package/src/plugins/bundled/fops-plugin-file/demo/orders_renamed.csv +6 -0
  17. package/src/plugins/bundled/fops-plugin-file/demo/rules.json +8 -0
  18. package/src/plugins/bundled/fops-plugin-file/demo/run.sh +110 -0
  19. package/src/plugins/bundled/fops-plugin-file/index.js +140 -24
  20. package/src/plugins/bundled/fops-plugin-file/lib/embed-index.js +7 -0
  21. package/src/plugins/bundled/fops-plugin-file/lib/match.js +11 -4
  22. package/src/plugins/bundled/fops-plugin-foundation/index.js +1574 -101
  23. package/src/plugins/bundled/fops-plugin-foundation/lib/align.js +42 -4
  24. package/src/plugins/bundled/fops-plugin-foundation/lib/apply.js +83 -41
  25. package/src/plugins/bundled/fops-plugin-foundation/lib/stack-apply.js +4 -1
  26. package/src/plugins/bundled/fops-plugin-foundation-graphql/index.js +39 -1
  27. package/src/plugins/bundled/fops-plugin-foundation-graphql/lib/graphql/resolvers/data-object.js +9 -6
  28. package/src/plugins/bundled/fops-plugin-foundation-graphql/lib/graphql/resolvers/data-product.js +9 -6
  29. package/src/ui/tui/App.js +1 -1
@@ -0,0 +1,164 @@
1
+ import http from "node:http";
2
+ import https from "node:https";
3
+
4
+ const DEFAULT_BASE_URL = "https://api.dashboards.ai";
5
+
6
+ function resolveBaseUrl(config) {
7
+ return (
8
+ config?.baseUrl?.trim() ||
9
+ process.env.DAI_TTYD_URL?.trim() ||
10
+ DEFAULT_BASE_URL
11
+ ).replace(/\/+$/, "");
12
+ }
13
+
14
+ function resolveAuthToken(config) {
15
+ const raw = config?.authToken || process.env.DAI_AUTH_TOKEN || "";
16
+ return raw.replace(/\s+/g, ""); // strip whitespace/newlines from env or shell wrapping
17
+ }
18
+
19
+ export class DaiTTYDClient {
20
+ constructor(config = {}) {
21
+ this.baseUrl = resolveBaseUrl(config);
22
+ this.authToken = resolveAuthToken(config);
23
+ }
24
+
25
+ _headers() {
26
+ return {
27
+ "Content-Type": "application/json",
28
+ Accept: "application/json",
29
+ Authorization: `Bearer ${this.authToken}`,
30
+ };
31
+ }
32
+
33
+ /** POST /ttyd/threads → thread id (number) */
34
+ async createThread(title = "fops session") {
35
+ const url = `${this.baseUrl}/ttyd/threads`;
36
+ let { status, data } = await request("POST", url, this._headers(), { title });
37
+ // Backend may return 400 "User with email already exists" on the first call
38
+ // when auto-provisioning the user — retry once, it should succeed
39
+ if (status === 400 && data.includes("already exists")) {
40
+ ({ status, data } = await request("POST", url, this._headers(), { title }));
41
+ }
42
+ if (status >= 400) throw new Error(`Failed to create thread (HTTP ${status}): ${data}`);
43
+ const body = JSON.parse(data);
44
+ const id = body.id ?? body.thread_id;
45
+ if (id == null) throw new Error(`No thread id in response: ${data}`);
46
+ return id;
47
+ }
48
+
49
+ /**
50
+ * POST /ttyd/answer/stream — async generator yielding SSE event objects.
51
+ * Each event: { type: "sql"|"answer"|"done"|"error", content?: string, message?: string }
52
+ */
53
+ async *askStream(question, threadId, { dashboardId, tileId } = {}) {
54
+ const url = `${this.baseUrl}/ttyd/answer/stream`;
55
+ const payload = { question, thread_id: threadId };
56
+ if (dashboardId != null) payload.dashboard_id = Number(dashboardId);
57
+ if (tileId != null) payload.tile_id = Number(tileId);
58
+ yield* streamSSE("POST", url, this._headers(), payload);
59
+ }
60
+
61
+ /** Convenience: create a thread, ask one question, collect full sql + summary. */
62
+ async ask(question, { threadId, dashboardId, tileId } = {}) {
63
+ const id = threadId ?? (await this.createThread());
64
+ let sql = "";
65
+ const summaryParts = [];
66
+ for await (const event of this.askStream(question, id, { dashboardId, tileId })) {
67
+ if (event.type === "sql") sql = event.content ?? "";
68
+ else if (event.type === "answer") summaryParts.push(event.content ?? "");
69
+ else if (event.type === "error") throw new Error(event.message ?? "TTYD error");
70
+ else if (event.type === "done") break;
71
+ }
72
+ return { threadId: id, sql, summary: summaryParts.join("") };
73
+ }
74
+
75
+ /** Verify connectivity — returns true/false. */
76
+ async ping() {
77
+ try {
78
+ const url = `${this.baseUrl}/ttyd/threads`;
79
+ const { status } = await request("POST", url, this._headers(), { title: "ping" });
80
+ return status < 500;
81
+ } catch {
82
+ return false;
83
+ }
84
+ }
85
+ }
86
+
87
+ // ─── Helpers ────────────────────────────────────────────────────────────────
88
+
89
+ function request(method, url, headers, body, timeoutMs = 15_000) {
90
+ return new Promise((resolve, reject) => {
91
+ const parsed = new URL(url);
92
+ const lib = parsed.protocol === "https:" ? https : http;
93
+ const encoded = JSON.stringify(body);
94
+ const options = {
95
+ hostname: parsed.hostname,
96
+ port: parsed.port || (parsed.protocol === "https:" ? 443 : 80),
97
+ path: parsed.pathname + parsed.search,
98
+ method,
99
+ headers: { ...headers, "Content-Length": Buffer.byteLength(encoded) },
100
+ timeout: timeoutMs,
101
+ };
102
+ const req = lib.request(options, (res) => {
103
+ let data = "";
104
+ res.on("data", (c) => { data += c; });
105
+ res.on("end", () => resolve({ status: res.statusCode, data }));
106
+ });
107
+ req.on("error", reject);
108
+ req.on("timeout", () => { req.destroy(); reject(new Error("Request timeout")); });
109
+ req.write(encoded);
110
+ req.end();
111
+ });
112
+ }
113
+
114
+ /**
115
+ * Async generator that streams SSE from an HTTP endpoint.
116
+ * Yields parsed JSON objects from each "data: {...}" line.
117
+ */
118
+ async function* streamSSE(method, url, headers, body, timeoutMs = 300_000) {
119
+ const parsed = new URL(url);
120
+ const lib = parsed.protocol === "https:" ? https : http;
121
+ const encoded = JSON.stringify(body);
122
+ const options = {
123
+ hostname: parsed.hostname,
124
+ port: parsed.port || (parsed.protocol === "https:" ? 443 : 80),
125
+ path: parsed.pathname + parsed.search,
126
+ method,
127
+ headers: {
128
+ ...headers,
129
+ "Content-Length": Buffer.byteLength(encoded),
130
+ Accept: "text/event-stream",
131
+ },
132
+ timeout: timeoutMs,
133
+ };
134
+
135
+ const res = await new Promise((resolve, reject) => {
136
+ const req = lib.request(options, resolve);
137
+ req.on("error", reject);
138
+ req.on("timeout", () => { req.destroy(); reject(new Error("Stream timeout")); });
139
+ req.write(encoded);
140
+ req.end();
141
+ });
142
+
143
+ if (res.statusCode >= 400) {
144
+ let errData = "";
145
+ for await (const chunk of res) errData += chunk;
146
+ throw new Error(`HTTP ${res.statusCode}: ${errData}`);
147
+ }
148
+
149
+ let buffer = "";
150
+ for await (const chunk of res) {
151
+ buffer += chunk.toString();
152
+ const lines = buffer.split("\n");
153
+ buffer = lines.pop(); // keep any incomplete line
154
+ for (const line of lines) {
155
+ if (!line.startsWith("data:")) continue;
156
+ const raw = line.slice("data:".length).trim();
157
+ if (!raw) continue;
158
+ let data;
159
+ try { data = JSON.parse(raw); } catch { continue; }
160
+ yield data;
161
+ if (data?.type === "done" || data?.type === "error") return;
162
+ }
163
+ }
164
+ }
@@ -0,0 +1 @@
1
+ { "type": "module" }
@@ -264,7 +264,9 @@ export default {
264
264
  if (!texts || texts.length === 0) return [];
265
265
  if (!embeddingClient.isModelCached()) return [];
266
266
  try {
267
- return await embeddingClient.embed(texts);
267
+ await embeddingClient.isReady();
268
+ markOnnxLoaded();
269
+ return await embeddingClient.embedBatch(texts);
268
270
  } catch { return []; }
269
271
  },
270
272
  });
@@ -1047,7 +1047,7 @@ export async function runIndex({ source, sources: explicitSources, force, onProg
1047
1047
  // If the stored model differs from current NL_MODEL, force full re-index
1048
1048
  const currentStoreModel = nlStore.load().model;
1049
1049
  if (currentStoreModel && currentStoreModel !== NL_MODEL) {
1050
- log(WARN(` Model changed (${currentStoreModel} → ${NL_MODEL}), forcing full re-index`));
1050
+ log(WARN(` Model changed (${currentStoreModel} → ${NL_MODEL}) one-time re-index to rebuild vectors with the new model`));
1051
1051
  force = true;
1052
1052
  nlStore.clear();
1053
1053
  }
@@ -0,0 +1,67 @@
1
+ mesh:
2
+ ecommerce:
3
+ label: EC
4
+ description: E-commerce analytics mesh (file plugin demo)
5
+ purpose: Demonstrate fops embed file smart matching
6
+
7
+ data_system:
8
+ datalake:
9
+ label: DL
10
+ description: Central data lake
11
+
12
+ data_source:
13
+ orders_bucket:
14
+ label: ORD
15
+ description: Orders data in S3
16
+ system: data_system.datalake
17
+ connection:
18
+ type: s3
19
+ url: http://foundation-storage-engine:8080
20
+ access_key: S3_ACCESS_KEY
21
+ access_secret: S3_SECRET_KEY
22
+ secret:
23
+ S3_ACCESS_KEY: minio
24
+ S3_SECRET_KEY: minio123
25
+
26
+ data_object:
27
+ orders:
28
+ label: ORD
29
+ description: Raw orders dataset
30
+ source: data_source.orders_bucket
31
+ config:
32
+ data_object_type: csv
33
+ path: /raw/orders/orders.csv
34
+ has_header: true
35
+ delimiter: ","
36
+
37
+ data_product:
38
+ orders_sadp:
39
+ label: ORD
40
+ description: Source-aligned orders data product
41
+ object: data_object.orders
42
+ template: sadp_passthrough
43
+ select_columns: [order_id, customer_id, amount, currency, order_date, status, region]
44
+ cast_changes:
45
+ - column: amount
46
+ data_type: decimal
47
+ kwargs:
48
+ precision: 10
49
+ scale: 2
50
+ schema:
51
+ - name: order_id
52
+ type: integer
53
+ primary: true
54
+ - name: customer_id
55
+ type: integer
56
+ - name: amount
57
+ type: decimal
58
+ precision: 10
59
+ scale: 2
60
+ - name: currency
61
+ type: varchar
62
+ - name: order_date
63
+ type: date
64
+ - name: status
65
+ type: varchar
66
+ - name: region
67
+ type: varchar
@@ -0,0 +1,6 @@
1
+ order_id,customer_id,total_amount,currency,order_date,region
2
+ 3001,7001,120.00,USD,2024-03-01,north_america
3
+ 3002,7002,not_a_number,EUR,2024-03-02,europe
4
+ 3003,7003,88.50,USD,bad-date,asia_pacific
5
+ 3004,7004,-50.00,GBP,2024-03-04,europe
6
+ 3005,7005,405.00,USD,2024-03-05,north_america
@@ -0,0 +1,7 @@
1
+ order_id,customer_id,amount,currency,order_date,status,region
2
+ 2001,6001,99.00,USD,2024-03-01,completed,north_america
3
+ 2002,6002,175.50,EUR,2024-03-02,pending,europe
4
+ 2003,6003,420.00,USD,2024-03-03,completed,asia_pacific
5
+ 2004,6004,58.25,GBP,2024-03-04,completed,europe
6
+ 2005,6005,200.00,USD,2024-03-05,pending,north_america
7
+ 2006,6006,315.00,USD,2024-03-06,completed,north_america
@@ -0,0 +1,6 @@
1
+ order_id,customer_id,amount,currency,order_date,status,region
2
+ 1001,5001,149.99,USD,2024-01-03,completed,north_america
3
+ 1002,5002,89.50,EUR,2024-01-04,pending,europe
4
+ 1003,5003,230.00,USD,2024-01-05,completed,north_america
5
+ 1004,5004,45.00,GBP,2024-01-06,cancelled,europe
6
+ 1005,5005,310.75,USD,2024-01-07,completed,asia_pacific
@@ -0,0 +1,6 @@
1
+ id,customer_id,price,ccy,date,order_status,geo_region
2
+ 4001,8001,99.00,USD,2024-03-01,completed,north_america
3
+ 4002,8002,145.75,EUR,2024-03-02,pending,europe
4
+ 4003,8003,280.00,USD,2024-03-03,completed,asia_pacific
5
+ 4004,8004,62.50,GBP,2024-03-04,completed,europe
6
+ 4005,8005,195.00,USD,2024-03-05,pending,north_america
@@ -0,0 +1,6 @@
1
+ id,cust_id,price,ccy,date,order_status,geo_region
2
+ 4001,8001,99.00,USD,2024-03-01,completed,north_america
3
+ 4002,8002,145.75,EUR,2024-03-02,pending,europe
4
+ 4003,8003,280.00,USD,2024-03-03,completed,asia_pacific
5
+ 4004,8004,62.50,GBP,2024-03-04,completed,europe
6
+ 4005,8005,195.00,USD,2024-03-05,pending,north_america
@@ -0,0 +1,8 @@
1
+ {
2
+ "columns": {
3
+ "amount": { "min": 0, "type": "decimal" },
4
+ "order_id": { "type": "integer", "unique": true },
5
+ "currency": { "allowed": ["USD", "EUR", "GBP"] },
6
+ "status": { "allowed": ["pending", "completed", "cancelled"] }
7
+ }
8
+ }
@@ -0,0 +1,110 @@
1
+ #!/usr/bin/env bash
2
+ # fops embed file — demo script
3
+ #
4
+ # Showcases the full pipeline:
5
+ # Steps 1-5: pure schema / type comparison (no index needed)
6
+ # Step 6: MiniLM + RRF smart matching (requires Foundation + `fops embed file index`)
7
+ # Step 7: Fix CSV — rename columns to match reference (fops foundation align)
8
+ # In interactive mode, step 6 prompts to fix CSV or update the Data Product automatically.
9
+ #
10
+ # Files:
11
+ # orders_reference.csv — expected schema (7 cols: order_id, customer_id, amount, currency, order_date, status, region)
12
+ # orders_good.csv — clean file, matches reference exactly → PASS
13
+ # orders_bad.csv — renamed column (total_amount), missing status → FAIL
14
+ # orders_renamed.csv — same data, all column names abbreviated (id, cust_id, price...) → no Jaccard match → MiniLM matches
15
+
16
+ set -e
17
+ DEMO_DIR="$(cd "$(dirname "$0")" && pwd)"
18
+
19
+ section() {
20
+ echo
21
+ echo "━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━"
22
+ echo " $1"
23
+ echo "━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━"
24
+ }
25
+
26
+ # ── 1. Preview: inspect schema + inferred types ───────────────────────────────
27
+ section "1 · Preview — inspect schema + inferred types"
28
+ fops embed file preview "$DEMO_DIR/orders_good.csv"
29
+
30
+ # ── 2. Validate: clean file against reference → PASS ─────────────────────────
31
+ section "2 · Validate — clean file (expect PASS)"
32
+ fops embed file validate "$DEMO_DIR/orders_good.csv" \
33
+ --reference "$DEMO_DIR/orders_reference.csv"
34
+
35
+ # ── 3. Validate: bad file → FAIL (missing column, renamed column) ─────────────
36
+ section "3 · Validate — bad file (expect FAIL)"
37
+ fops embed file validate "$DEMO_DIR/orders_bad.csv" \
38
+ --reference "$DEMO_DIR/orders_reference.csv" || true
39
+
40
+ # ── 4. Preview with reference diff ────────────────────────────────────────────
41
+ section "4 · Preview with diff — visualise column drift vs reference"
42
+ fops embed file preview "$DEMO_DIR/orders_bad.csv" \
43
+ --reference "$DEMO_DIR/orders_reference.csv"
44
+
45
+ # ── 5. Smart match (Jaccard only, no index) ───────────────────────────────────
46
+ # orders_renamed.csv has fully abbreviated column names — zero Jaccard overlap.
47
+ # --no-semantic forces Jaccard-only mode (skips MiniLM, no ONNX load).
48
+ section "5 · Smart match — Jaccard only (no index): expect no strong match"
49
+ fops embed file "$DEMO_DIR/orders_renamed.csv" --local --no-semantic || true
50
+
51
+ # ── 6. Smart match (MiniLM + RRF, requires index) ────────────────────────────
52
+ # When the SQLite schema index exists (built by `fops embed file index`), the
53
+ # matching pipeline upgrades to:
54
+ # - Signal 1: column-level MiniLM cosine + greedy bipartite matching
55
+ # - Signal 2: Jaccard overlap on exact column names
56
+ # - Signal 3: type compatibility score on matched column pairs
57
+ # - Signal 4: MiniLM cosine on table/entity names
58
+ # - Signal 5: fraction of candidate columns with sim > 0.75
59
+ # All fused via Reciprocal Rank Fusion (RRF k=60).
60
+ #
61
+ # With MiniLM, "id"→"order_id", "price"→"amount", "ccy"→"currency" etc. all
62
+ # match semantically even with zero Jaccard overlap.
63
+ section "6 · Smart match — MiniLM + RRF (requires Foundation + index)"
64
+
65
+ SCHEMA_DB="$HOME/.fops/file/schema-index.db"
66
+ if [ -f "$SCHEMA_DB" ]; then
67
+ echo " Index found — running full semantic match..."
68
+ fops embed file "$DEMO_DIR/orders_renamed.csv"
69
+ else
70
+ echo " No schema index found. To enable MiniLM + RRF matching, run:"
71
+ echo
72
+ echo " # 1. Start Foundation"
73
+ echo " fops up"
74
+ echo
75
+ echo " # 2. Register your data objects (or use the demo landscape)"
76
+ echo " fops apply $DEMO_DIR/landscape.yaml"
77
+ echo
78
+ echo " # 3. Build the SQLite schema index (embeds column names with MiniLM)"
79
+ echo " fops embed file index"
80
+ echo
81
+ echo " # 4. Re-run this step — orders_renamed.csv will now match via"
82
+ echo " # semantic similarity (id→order_id, price→amount, ccy→currency)"
83
+ echo " fops embed file $DEMO_DIR/orders_renamed.csv"
84
+ echo
85
+ echo " Matching signals when index is present:"
86
+ echo " [1] MiniLM column cosine — greedy bipartite match per column pair"
87
+ echo " [2] Jaccard overlap — exact column name set overlap"
88
+ echo " [3] Type compatibility — integer/decimal/date match on mapped cols"
89
+ echo " [4] Name semantic — MiniLM cosine on table/entity names"
90
+ echo " [5] Mapping confidence — fraction of cols with sim > 0.75"
91
+ echo " ↳ all fused via Reciprocal Rank Fusion (RRF k=60)"
92
+ fi
93
+
94
+ # ── 7. Fix CSV — rename abbreviated columns to reference names ────────────────
95
+ # After a smart match, fops embed file (interactive) prompts:
96
+ # ▸ Fix CSV — rename columns to match reference (→ orders_renamed.aligned.csv)
97
+ # Fix Data Product — update schema to match this file
98
+ # Nothing — done
99
+ #
100
+ # This step demonstrates the "Fix CSV" path explicitly via fops foundation align,
101
+ # which uses the same MiniLM + Levenshtein column alignment under the hood.
102
+ section "7 · Fix CSV — rename columns to match reference"
103
+ # Uses MiniLM semantic matching to map abbreviated column names to reference schema.
104
+ # The interactive Fix CSV prompt inside `fops embed file` (step 6) runs this automatically.
105
+ # || true: ONNX native handles trigger a SIGKILL on exit — this is expected and harmless.
106
+ fops foundation align "$DEMO_DIR/orders_renamed.csv" \
107
+ "order_id,customer_id,amount,currency,order_date,status,region" \
108
+ --output "$DEMO_DIR/orders_renamed.aligned.csv" || true
109
+
110
+ echo
@@ -35,6 +35,7 @@ export function register(api) {
35
35
  .option("--json", "Output matches as JSON (for scripting)")
36
36
  .option("--no-auto", "Always show picker, never auto-select")
37
37
  .option("--local", "Match against local directory files instead of Foundation landscape")
38
+ .option("--no-semantic", "Jaccard-only matching — skip MiniLM embeddings (useful for testing without the model)")
38
39
  .option("--sheet <name>", "Sheet name or index for XLSX files")
39
40
  .option("--data-source <id>", "Data source identifier for attempt logging")
40
41
  .option("--file-family <name>", "Logical file family name for logging")
@@ -71,7 +72,7 @@ export function register(api) {
71
72
  }
72
73
 
73
74
  let embeddingClient = null;
74
- if (typeof api.getService === "function") {
75
+ if (!opts.noSemantic && typeof api.getService === "function") {
75
76
  try { embeddingClient = api.getService("embeddings"); } catch { /* not loaded */ }
76
77
  }
77
78
 
@@ -338,17 +339,24 @@ export function register(api) {
338
339
  columnEmbeddings: candResult.columnEmbeddings,
339
340
  nameEmbedding: candResult.nameEmbedding,
340
341
  };
341
- const ranked = rankMatches(candidateData, references, rankOpts)
342
- .filter((r) => r.score >= threshold)
343
- .filter((r) => r.overlapCount >= 2 || r.overlapCount >= r.overlapTotal);
342
+ const allRanked = rankMatches(candidateData, references, rankOpts)
343
+ .filter((r) => r.score >= threshold);
344
+
345
+ // In Jaccard-only mode, require at least 2 overlapping column names to avoid false positives.
346
+ // When embeddings are active (v2 or v3), semantic matching works without exact name overlap.
347
+ const ranked = (hasEmbeddings || hasColumnEmbeddings)
348
+ ? allRanked
349
+ : allRanked.filter((r) => r.overlapCount >= 2 || r.overlapCount >= r.overlapTotal);
344
350
 
345
351
  spinner.stop();
346
352
 
347
353
  if (ranked.length === 0) {
348
- console.log(chalk.yellow(" No strong matches found (need at least 2 overlapping columns)."));
354
+ console.log(chalk.yellow(" No strong matches found."));
349
355
  console.log(chalk.dim(" Use `fops embed file validate <file> --reference <ref>` for explicit validation."));
350
- if (!hasEmbeddings && foundationAvailable) {
351
- console.log(chalk.dim(" Tip: run `fops embed index` to enable semantic matching."));
356
+ if (!hasEmbeddings && !hasColumnEmbeddings && foundationAvailable) {
357
+ console.log(chalk.dim(" Tip: run `fops embed index` then `fops embed file index` to enable semantic matching."));
358
+ } else if (hasEmbeddings && !hasColumnEmbeddings && foundationAvailable) {
359
+ console.log(chalk.dim(" Tip: run `fops embed file index` (model cached) for column-level semantic matching."));
352
360
  }
353
361
  return;
354
362
  }
@@ -469,24 +477,52 @@ export function register(api) {
469
477
  rows: null,
470
478
  };
471
479
 
480
+ // When semantic column mappings are available, translate candidate column
481
+ // names to their matched reference names so schema/type checks are meaningful.
482
+ let candHeadersForValidation = candResult.headers;
483
+ let candTypesForValidation = candResult.types;
484
+ const colMap = selectedRef.columnMappings;
485
+ if (colMap?.length > 0) {
486
+ const translatedHeaders = [];
487
+ const translatedTypes = {};
488
+ for (const m of colMap) {
489
+ translatedHeaders.push(m.referenceCol);
490
+ if (candResult.types?.[m.candidateCol] !== undefined) {
491
+ translatedTypes[m.referenceCol] = candResult.types[m.candidateCol];
492
+ }
493
+ }
494
+ candHeadersForValidation = translatedHeaders;
495
+ candTypesForValidation = translatedTypes;
496
+ }
497
+
472
498
  const schemaResult = compareSchemas(
473
499
  { headers: refResult.headers },
474
- { headers: candResult.headers },
500
+ { headers: candHeadersForValidation },
475
501
  { candidateCentric: true },
476
502
  );
477
503
 
478
504
  let typeResult = null;
479
- const commonCols = candResult.headers.filter((h) => refResult.headers.includes(h));
480
- if (commonCols.length > 0 && refResult.types && candResult.types) {
481
- typeResult = compareTypes(refResult.types, candResult.types, commonCols);
505
+ const commonCols = candHeadersForValidation.filter((h) => refResult.headers.includes(h));
506
+ if (commonCols.length > 0 && refResult.types && candTypesForValidation) {
507
+ typeResult = compareTypes(refResult.types, candTypesForValidation, commonCols);
482
508
  }
483
509
 
484
510
  // Auto-derive value rules from schema metadata (sample up to 10K rows)
485
511
  const autoRules = deriveRules(refResult.types, commonCols);
486
512
  let valuesResult = null;
487
513
  if (autoRules && candResult.rows?.length > 0) {
514
+ // Map row values to reference column names for value checks
488
515
  const sampleRows = candResult.rows.length > 10000 ? candResult.rows.slice(0, 10000) : candResult.rows;
489
- valuesResult = checkValues(sampleRows, candResult.headers, autoRules);
516
+ if (colMap?.length > 0) {
517
+ // Re-key rows from candidate names to reference names
518
+ const keyMap = Object.fromEntries(colMap.map((m) => [m.candidateCol, m.referenceCol]));
519
+ const remappedRows = sampleRows.map((row) =>
520
+ Object.fromEntries(Object.entries(row).map(([k, v]) => [keyMap[k] ?? k, v])),
521
+ );
522
+ valuesResult = checkValues(remappedRows, candHeadersForValidation, autoRules);
523
+ } else {
524
+ valuesResult = checkValues(sampleRows, candResult.headers, autoRules);
525
+ }
490
526
  }
491
527
 
492
528
  const report = buildReport(schemaResult, typeResult, valuesResult, null);
@@ -503,6 +539,82 @@ export function register(api) {
503
539
  if (report.verdict === "FAIL") {
504
540
  process.exitCode = 1;
505
541
  }
542
+
543
+ // ── 11. Post-match action prompt ────────────────────────────────
544
+ if (isTTY && !opts.json) {
545
+ const ext = pathMod.extname(candidate).toLowerCase();
546
+ const canFixCsv = ext === ".csv" && colMap?.length > 0;
547
+ const canFixDp = selectedRef.entityType === "data_product" && !!foundationClient;
548
+
549
+ if (canFixCsv || canFixDp) {
550
+ const baseName = pathMod.basename(candidate, ext);
551
+ const alignedPath = pathMod.join(pathMod.dirname(candidate), `${baseName}.aligned${ext}`);
552
+
553
+ const actionOptions = [];
554
+ if (canFixCsv) {
555
+ actionOptions.push({
556
+ label: `Fix CSV — rename columns to match reference (→ ${pathMod.basename(alignedPath)})`,
557
+ value: "fix-csv",
558
+ });
559
+ }
560
+ if (canFixDp) {
561
+ actionOptions.push({
562
+ label: `Fix Data Product — update "${selectedRef.name}" schema to match this file`,
563
+ value: "fix-dp",
564
+ });
565
+ }
566
+ actionOptions.push({ label: "Nothing — done", value: null });
567
+
568
+ let selectActionFn;
569
+ const confirmPath2 = api.getCliPath?.("src", "ui", "confirm.js");
570
+ if (confirmPath2) {
571
+ const { pathToFileURL: p2u } = await import("node:url");
572
+ ({ selectOption: selectActionFn } = await import(p2u(confirmPath2).href));
573
+ } else {
574
+ ({ selectOption: selectActionFn } = await import("../../../ui/confirm.js"));
575
+ }
576
+
577
+ console.log("");
578
+ const action = await selectActionFn("Next step:", actionOptions);
579
+
580
+ if (action === "fix-csv") {
581
+ const Papa = (await import("papaparse")).default;
582
+ const renameMap = Object.fromEntries(colMap.map((m) => [m.candidateCol, m.referenceCol]));
583
+ const newHeaders = candResult.headers.map((h) => renameMap[h] ?? h);
584
+ const newRows = (candResult.rows || []).map((row) =>
585
+ Object.fromEntries(Object.entries(row).map(([k, v]) => [renameMap[k] ?? k, v])),
586
+ );
587
+ const csvText = Papa.unparse({ fields: newHeaders, data: newRows });
588
+ (await import("node:fs")).writeFileSync(alignedPath, csvText, "utf8");
589
+ console.log(chalk.green(`\n ✓ Written: ${alignedPath}`));
590
+ const renamed = colMap.filter((m) => m.candidateCol !== m.referenceCol);
591
+ if (renamed.length > 0) {
592
+ console.log(chalk.dim(`\n Renamed ${renamed.length} column(s):`));
593
+ for (const m of renamed) {
594
+ console.log(chalk.dim(` ${m.candidateCol.padEnd(22)} → ${m.referenceCol}`));
595
+ }
596
+ }
597
+ } else if (action === "fix-dp") {
598
+ const typeToApiCol = (t) => ({ integer: "INTEGER", decimal: "DECIMAL", date: "DATE", timestamp: "TIMESTAMP", boolean: "BOOLEAN" })[t] || "VARCHAR";
599
+ const fields = candResult.headers.map((h) => {
600
+ const ti = candResult.types?.[h];
601
+ const col = { name: h, data_type: { column_type: typeToApiCol(ti?.type || "string") } };
602
+ if (ti?.type === "decimal" && ti.precision != null) {
603
+ col.data_type.meta = { precision: String(ti.precision), scale: String(ti.scale ?? 0) };
604
+ }
605
+ return col;
606
+ });
607
+ const schemaBody = { details: { data_product_type: "user", fields } };
608
+ try {
609
+ await foundationClient.put(`/data/data_product/schema?identifier=${selectedRef.id}`, schemaBody);
610
+ console.log(chalk.green(`\n ✓ Schema updated for "${selectedRef.name}"`));
611
+ console.log(chalk.dim(` ${fields.length} columns from ${pathMod.basename(candidate)}`));
612
+ } catch (err) {
613
+ console.error(chalk.red(`\n ✗ Schema update failed: ${err.message}`));
614
+ }
615
+ }
616
+ }
617
+ }
506
618
  } catch (err) {
507
619
  spinner.stop();
508
620
  console.error(chalk.red(`\n ✗ ${err.message}\n`));
@@ -777,21 +889,23 @@ export function register(api) {
777
889
  let refTypes = null; // { colName: { type } }
778
890
  let refLabel = null;
779
891
 
780
- if (opts.reference) {
892
+ // opts.reference may be consumed by the parent fileCmd (Commander option inheritance)
893
+ const referenceArg = opts.reference || fileCmd.opts().reference;
894
+ if (referenceArg) {
781
895
  const UUID_RE = /^[0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12}$/i;
782
- if (UUID_RE.test(opts.reference)) {
896
+ if (UUID_RE.test(referenceArg)) {
783
897
  spinner.text = "Fetching reference schema...";
784
- const refResult = await resolveSchemaFromApi(opts.reference, foundationClient, mapApiTypeToInferred);
898
+ const refResult = await resolveSchemaFromApi(referenceArg, foundationClient, mapApiTypeToInferred);
785
899
  refHeaders = new Set(refResult.headers);
786
900
  refTypes = refResult.types;
787
- refLabel = `data_object:${opts.reference.slice(0, 8)}`;
901
+ refLabel = `data_object:${referenceArg.slice(0, 8)}`;
788
902
  } else {
789
903
  spinner.text = "Parsing reference...";
790
- const refInput = await resolveInput(opts.reference, storage, fs, pathMod);
904
+ const refInput = await resolveInput(referenceArg, storage, fs, pathMod);
791
905
  const refResult = await parseFile(refInput.buffer, { format: refInput.format, maxRows: 1 });
792
906
  refHeaders = new Set(refResult.headers);
793
907
  refTypes = refResult.types;
794
- refLabel = pathMod.basename(opts.reference);
908
+ refLabel = pathMod.basename(referenceArg);
795
909
  }
796
910
  }
797
911
 
@@ -1003,16 +1117,18 @@ export function register(api) {
1003
1117
  let refResult = null;
1004
1118
  let refLabel = "(none)";
1005
1119
 
1006
- if (opts.reference) {
1007
- if (isDataObjectUuid(opts.reference)) {
1120
+ // opts.reference may be consumed by the parent fileCmd (Commander option inheritance)
1121
+ const referenceArg = opts.reference || fileCmd.opts().reference;
1122
+ if (referenceArg) {
1123
+ if (isDataObjectUuid(referenceArg)) {
1008
1124
  spinner.text = "Fetching reference schema from Foundation API...";
1009
- refResult = await resolveSchemaFromApi(opts.reference, foundationClient, mapApiTypeToInferred);
1010
- refLabel = `data_object:${opts.reference.slice(0, 8)}`;
1125
+ refResult = await resolveSchemaFromApi(referenceArg, foundationClient, mapApiTypeToInferred);
1126
+ refLabel = `data_object:${referenceArg.slice(0, 8)}`;
1011
1127
  } else {
1012
1128
  spinner.text = "Parsing reference file...";
1013
- const refInput = await resolveInput(opts.reference, storage, fs, path);
1129
+ const refInput = await resolveInput(referenceArg, storage, fs, path);
1014
1130
  refResult = await parseFile(refInput.buffer, { format: refInput.format, sheet: opts.sheet });
1015
- refLabel = path.basename(opts.reference);
1131
+ refLabel = path.basename(referenceArg);
1016
1132
  }
1017
1133
  }
1018
1134