@meshxdata/fops 0.1.32 → 0.1.34
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +184 -0
- package/package.json +1 -2
- package/src/commands/lifecycle.js +16 -0
- package/src/plugins/bundled/fops-plugin-embeddings/index.js +3 -1
- package/src/plugins/bundled/fops-plugin-embeddings/lib/indexer.js +1 -1
- package/src/plugins/bundled/fops-plugin-file/demo/landscape.yaml +67 -0
- package/src/plugins/bundled/fops-plugin-file/demo/orders_bad.csv +6 -0
- package/src/plugins/bundled/fops-plugin-file/demo/orders_good.csv +7 -0
- package/src/plugins/bundled/fops-plugin-file/demo/orders_reference.csv +6 -0
- package/src/plugins/bundled/fops-plugin-file/demo/orders_renamed.aligned.csv +6 -0
- package/src/plugins/bundled/fops-plugin-file/demo/orders_renamed.csv +6 -0
- package/src/plugins/bundled/fops-plugin-file/demo/rules.json +8 -0
- package/src/plugins/bundled/fops-plugin-file/demo/run.sh +110 -0
- package/src/plugins/bundled/fops-plugin-file/index.js +140 -24
- package/src/plugins/bundled/fops-plugin-file/lib/embed-index.js +7 -0
- package/src/plugins/bundled/fops-plugin-file/lib/match.js +11 -4
- package/src/plugins/bundled/fops-plugin-foundation/index.js +1574 -101
- package/src/plugins/bundled/fops-plugin-foundation/lib/align.js +42 -4
- package/src/plugins/bundled/fops-plugin-foundation/lib/apply.js +83 -41
- package/src/plugins/bundled/fops-plugin-foundation/lib/stack-apply.js +4 -1
- package/src/plugins/bundled/fops-plugin-foundation-graphql/index.js +39 -1
- package/src/plugins/bundled/fops-plugin-foundation-graphql/lib/graphql/resolvers/data-object.js +9 -6
- package/src/plugins/bundled/fops-plugin-foundation-graphql/lib/graphql/resolvers/data-product.js +9 -6
package/CHANGELOG.md
CHANGED
|
@@ -1,3 +1,187 @@
|
|
|
1
|
+
## [0.1.34] - 2026-03-05
|
|
2
|
+
|
|
3
|
+
- electron app (59ad0bb)
|
|
4
|
+
- compose and fops file plugin (1cf0e81)
|
|
5
|
+
- bump (346ffc1)
|
|
6
|
+
- localhost replaced by 127.0.0.1 (82b9f30)
|
|
7
|
+
- .29 (587b0e1)
|
|
8
|
+
- improve up down and bootstrap script (b79ebaf)
|
|
9
|
+
- checksum (22c8086)
|
|
10
|
+
- checksum (96b434f)
|
|
11
|
+
- checksum (15ed3c0)
|
|
12
|
+
- checksum (8a6543a)
|
|
13
|
+
- bump embed trino linksg (8440504)
|
|
14
|
+
- bump data (765ffd9)
|
|
15
|
+
- bump (cb8b232)
|
|
16
|
+
- broken tests (c532229)
|
|
17
|
+
- release 0.1.18, preflight checks (d902249)
|
|
18
|
+
- fix compute display bug (d10f5d9)
|
|
19
|
+
- cleanup packer files (6330f18)
|
|
20
|
+
- plan mode (cb36a8a)
|
|
21
|
+
- bump to 0.1.16 - agent ui (41ac1a2)
|
|
22
|
+
- bump to 0.1.15 - agent ui (4ebe2e1)
|
|
23
|
+
- bump to 0.1.14 (6c3a7fa)
|
|
24
|
+
- bump to 0.1.13 (8db570f)
|
|
25
|
+
- release 0.1.12 (c1c79e5)
|
|
26
|
+
- bump (11aa3b0)
|
|
27
|
+
- git keep and bump tui (be1678e)
|
|
28
|
+
- skills, index, rrf, compacted context (100k > 10k) (7b2fffd)
|
|
29
|
+
- cloudflare and token consumption, graphs indexing (0ad9eec)
|
|
30
|
+
- bump storage default (22c83ba)
|
|
31
|
+
- storage fix (68a22a0)
|
|
32
|
+
- skills update (7f56500)
|
|
33
|
+
- v9 bump (3864446)
|
|
34
|
+
- bump (c95eedc)
|
|
35
|
+
- rrf (dbf8c95)
|
|
36
|
+
- feat: warning when running predictions (95e8c52)
|
|
37
|
+
- feat: support for local predictions (45cf26b)
|
|
38
|
+
- feat: wip support for predictions + mlflow (3457052)
|
|
39
|
+
- add Reciprocal Rank Fusion (RRF) to knowledge and skill retrieval (61549bc)
|
|
40
|
+
- validate CSV headers in compute_run readiness check (a8c7a43)
|
|
41
|
+
- fix corrupted Iceberg metadata: probe tables + force cleanup on re-apply (50578af)
|
|
42
|
+
- enforce: never use foundation_apply to fix broken products (2e049bf)
|
|
43
|
+
- update SKILL.md with complete tool reference for knowledge retrieval (30b1924)
|
|
44
|
+
- add storage read, input DP table probe, and compute_run improvements (34e6c4c)
|
|
45
|
+
- skills update (1220385)
|
|
46
|
+
- skills update (bb66958)
|
|
47
|
+
- some tui improvement andd tools apply overwrite (e90c35c)
|
|
48
|
+
- skills update (e9227a1)
|
|
49
|
+
- skills update (669c4b3)
|
|
50
|
+
- fix plugin pre-flight checks (f741743)
|
|
51
|
+
- increase agent context (6479aaa)
|
|
52
|
+
- skills and init sql fixes (5fce35e)
|
|
53
|
+
- checksum (3518b56)
|
|
54
|
+
- penging job limit (a139861)
|
|
55
|
+
- checksum (575d28c)
|
|
56
|
+
- bump (92049ba)
|
|
57
|
+
- fix bug per tab status (0a33657)
|
|
58
|
+
- fix bug per tab status (50457c6)
|
|
59
|
+
- checksumming (0ad842e)
|
|
60
|
+
- shot af mardkwon overlapping (51f63b9)
|
|
61
|
+
- add spark dockerfile for multiarch builds (95abbd1)
|
|
62
|
+
- fix plugin initialization (16b9782)
|
|
63
|
+
- split index.js (50902a2)
|
|
64
|
+
- cloudflare cidr (cc4e021)
|
|
65
|
+
- cloduflare restrictions (2f6ba2d)
|
|
66
|
+
- sequential start (86b496e)
|
|
67
|
+
- sequential start (4930fe1)
|
|
68
|
+
- sequential start (353f014)
|
|
69
|
+
- qa tests (2dc6a1a)
|
|
70
|
+
- bump sha for .85 (dc2edfe)
|
|
71
|
+
- preserve env on sudo (7831227)
|
|
72
|
+
- bump sha for .84 (6c052f9)
|
|
73
|
+
- non interactive for azure vms (0aa8a2f)
|
|
74
|
+
- keep .env if present (d072450)
|
|
75
|
+
- bump (7a8e732)
|
|
76
|
+
- ensure opa is on compose if not set (f4a5228)
|
|
77
|
+
- checksum bump (a2ccc20)
|
|
78
|
+
- netrc defensive checks (a0b0ccc)
|
|
79
|
+
- netrc defensive checks (ae37403)
|
|
80
|
+
- checksum (ec45d11)
|
|
81
|
+
- update sync and fix up (7f9af72)
|
|
82
|
+
- expand test for azure and add new per app tag support (388a168)
|
|
83
|
+
- checksum on update (44005fc)
|
|
84
|
+
- cleanup for later (15e5313)
|
|
85
|
+
- cleanup for later (11c9597)
|
|
86
|
+
- switch branch feature (822fecc)
|
|
87
|
+
- add pull (d1c19ab)
|
|
88
|
+
- Bump hono from 4.11.9 to 4.12.0 in /operator-cli (ad25144)
|
|
89
|
+
- tests (f180a9a)
|
|
90
|
+
- cleanup (39c49a3)
|
|
91
|
+
- registry (7b7126a)
|
|
92
|
+
- reconcile kafka (832d0db)
|
|
93
|
+
- gh login bug (025886c)
|
|
94
|
+
- cleanup (bb96cab)
|
|
95
|
+
- strip envs from process (2421180)
|
|
96
|
+
- force use of gh creds not tokens in envs var (fff7787)
|
|
97
|
+
- resolve import between npm installs and npm link (79522e1)
|
|
98
|
+
- fix gh scope and azure states (afd846c)
|
|
99
|
+
- refactoring (da50352)
|
|
100
|
+
- split fops repo (d447638)
|
|
101
|
+
- aks (b791f8f)
|
|
102
|
+
- refactor azure (67d3bad)
|
|
103
|
+
- wildcard (391f023)
|
|
104
|
+
- azure plugin (c074074)
|
|
105
|
+
- zap (d7e6e7f)
|
|
106
|
+
- fix knock (cf89c05)
|
|
107
|
+
- azure (4adec98)
|
|
108
|
+
- Bump tar from 7.5.7 to 7.5.9 in /operator-cli (e41e98e)
|
|
109
|
+
- azure stack index.js split (de12272)
|
|
110
|
+
- Bump ajv from 8.17.1 to 8.18.0 in /operator-cli (76da21f)
|
|
111
|
+
- packer (9665fbc)
|
|
112
|
+
- remove stack api (db0fd4d)
|
|
113
|
+
- packer cleanup (fe1bf14)
|
|
114
|
+
- force refresh token (3a3d7e2)
|
|
115
|
+
- provision shell (2ad505f)
|
|
116
|
+
- azure vm management (91dcb31)
|
|
117
|
+
- azure specific (2b0cca8)
|
|
118
|
+
- azure packer (12175b8)
|
|
119
|
+
- init hashed pwd (db8523c)
|
|
120
|
+
- packer (5b5c7c4)
|
|
121
|
+
- doctor for azure vm (ed524fa)
|
|
122
|
+
- packer and 1pwd (c6d053e)
|
|
123
|
+
- split big index.js (dc85a1b)
|
|
124
|
+
- kafka volume update (21815ec)
|
|
125
|
+
- fix openai azure tools confirmation and flow (0118cd1)
|
|
126
|
+
- nighly fixx, test fix (5e0d04f)
|
|
127
|
+
- open ai training (cdc494a)
|
|
128
|
+
- openai integration in azure (1ca1475)
|
|
129
|
+
- ci (672cea9)
|
|
130
|
+
- refresh ghcr creds (4220c48)
|
|
131
|
+
- cleaned up version (1a0074f)
|
|
132
|
+
- traefik on ghcr and templates (8e31a05)
|
|
133
|
+
- apply fcl (e78911f)
|
|
134
|
+
- demo landscape (dd205fe)
|
|
135
|
+
- smarter login and schema (1af514f)
|
|
136
|
+
- no down before up unless something broke (56b1132)
|
|
137
|
+
- dai, reconcile failed containers (12907fa)
|
|
138
|
+
- reconcile dead container (7da75e4)
|
|
139
|
+
- defensive around storage buckets dir (b98871d)
|
|
140
|
+
- defensive around storage buckets dir (e86e132)
|
|
141
|
+
- gear in for multiarch (bf3fa3e)
|
|
142
|
+
- up autofix (99c7f89)
|
|
143
|
+
- autofix stale containers on up (43c7d0f)
|
|
144
|
+
- shared sessions fix (5de1359)
|
|
145
|
+
- share sessions between ui and tui (8321391)
|
|
146
|
+
- fix chat view display details (e263996)
|
|
147
|
+
- fix chat view display details (9babdda)
|
|
148
|
+
- tui up fixes (86e9f17)
|
|
149
|
+
- fix commands init (442538b)
|
|
150
|
+
- enable k3s profile (b2dcfc8)
|
|
151
|
+
- test up till job creation (656d388)
|
|
152
|
+
- tui fixes (0599779)
|
|
153
|
+
- cleanup (27731f0)
|
|
154
|
+
- train (90bf559)
|
|
155
|
+
- training (f809bf6)
|
|
156
|
+
- training (ba2b836)
|
|
157
|
+
- training (6fc5267)
|
|
158
|
+
- training (4af8ac9)
|
|
159
|
+
- fix build script (bd82836)
|
|
160
|
+
- infra test (5b79815)
|
|
161
|
+
- infra test (3a0ac05)
|
|
162
|
+
- infra test (e5c67b5)
|
|
163
|
+
- tests (ae7b621)
|
|
164
|
+
- tests (c09ae6a)
|
|
165
|
+
- update tui (4784153)
|
|
166
|
+
- training (0a5a330)
|
|
167
|
+
- tui (df4dd4a)
|
|
168
|
+
- pkg builds (4dc9993)
|
|
169
|
+
- also source env for creds (9a17d8f)
|
|
170
|
+
- fcl support (e8a5743)
|
|
171
|
+
- fcl support (8d6b6cd)
|
|
172
|
+
- fcl support (cb76a4a)
|
|
173
|
+
- bump package (df2ee85)
|
|
174
|
+
- add iam mgmt (2d3c294)
|
|
175
|
+
- fix k3s (976ae77)
|
|
176
|
+
- fix trino, add storage plugin (75cb1f4)
|
|
177
|
+
- add project root as config (a2863c6)
|
|
178
|
+
- failure learnings (637ef5c)
|
|
179
|
+
- Apple signed binaries (63a610e)
|
|
180
|
+
- send build info to apple for notary service (300c220)
|
|
181
|
+
- migration failure fixes (c7f0b2f)
|
|
182
|
+
- release to wipe clean pg on duplicate key error (a38bf4d)
|
|
183
|
+
- small fix (a26a674)
|
|
184
|
+
|
|
1
185
|
# Changelog
|
|
2
186
|
|
|
3
187
|
All notable changes to @meshxdata/fops (Foundation Operator CLI) are documented here.
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@meshxdata/fops",
|
|
3
|
-
"version": "0.1.
|
|
3
|
+
"version": "0.1.34",
|
|
4
4
|
"description": "CLI to install and manage data mesh platforms",
|
|
5
5
|
"keywords": [
|
|
6
6
|
"fops",
|
|
@@ -46,7 +46,6 @@
|
|
|
46
46
|
"chalk": "^5.3.0",
|
|
47
47
|
"commander": "^12.0.0",
|
|
48
48
|
"dataloader": "^2.2.3",
|
|
49
|
-
"electron": "^34.5.8",
|
|
50
49
|
"execa": "^9.5.2",
|
|
51
50
|
"graphql": "^16.13.0",
|
|
52
51
|
"hcl2-parser": "^1.0.3",
|
|
@@ -1316,6 +1316,11 @@ async function runUp(program, registry, opts) {
|
|
|
1316
1316
|
const hasTraefik = profileArgs.includes("traefik");
|
|
1317
1317
|
const needsSequentialUp = hasK3s && hasTraefik && !componentOnlyUp;
|
|
1318
1318
|
|
|
1319
|
+
// Stop the outer pre-flight spinner before runUpInner starts its own ticker.
|
|
1320
|
+
// Without this, two setInterval calls both write \r\x1b[K to stderr simultaneously,
|
|
1321
|
+
// causing the status line to flicker between the outer and inner spinner text.
|
|
1322
|
+
clearSpinner();
|
|
1323
|
+
|
|
1319
1324
|
let result;
|
|
1320
1325
|
if (componentOnlyUp) {
|
|
1321
1326
|
const serviceList = opts.frontendDev && opts.component === "frontend"
|
|
@@ -1521,6 +1526,17 @@ async function runUp(program, registry, opts) {
|
|
|
1521
1526
|
console.log(chalk.dim(`\n Foundation is running at ${browseUrl}`));
|
|
1522
1527
|
console.log(chalk.dim(" Run `fops agent` to start the AI assistant.\n"));
|
|
1523
1528
|
}
|
|
1529
|
+
|
|
1530
|
+
// Launch the system tray (macOS menu bar or Windows taskbar, full-stack up only)
|
|
1531
|
+
if ((process.platform === "darwin" || process.platform === "win32") && !opts.component) {
|
|
1532
|
+
const { spawn } = await import("node:child_process");
|
|
1533
|
+
const tray = spawn(process.argv[0], [process.argv[1], "foundation", "tray"], {
|
|
1534
|
+
stdio: "ignore",
|
|
1535
|
+
detached: true,
|
|
1536
|
+
env: { ...process.env },
|
|
1537
|
+
});
|
|
1538
|
+
tray.unref();
|
|
1539
|
+
}
|
|
1524
1540
|
}
|
|
1525
1541
|
|
|
1526
1542
|
// ── Restart backend after grant-admin ────────────────────────────────────────
|
|
@@ -264,7 +264,9 @@ export default {
|
|
|
264
264
|
if (!texts || texts.length === 0) return [];
|
|
265
265
|
if (!embeddingClient.isModelCached()) return [];
|
|
266
266
|
try {
|
|
267
|
-
|
|
267
|
+
await embeddingClient.isReady();
|
|
268
|
+
markOnnxLoaded();
|
|
269
|
+
return await embeddingClient.embedBatch(texts);
|
|
268
270
|
} catch { return []; }
|
|
269
271
|
},
|
|
270
272
|
});
|
|
@@ -1047,7 +1047,7 @@ export async function runIndex({ source, sources: explicitSources, force, onProg
|
|
|
1047
1047
|
// If the stored model differs from current NL_MODEL, force full re-index
|
|
1048
1048
|
const currentStoreModel = nlStore.load().model;
|
|
1049
1049
|
if (currentStoreModel && currentStoreModel !== NL_MODEL) {
|
|
1050
|
-
log(WARN(` Model changed (${currentStoreModel} → ${NL_MODEL})
|
|
1050
|
+
log(WARN(` Model changed (${currentStoreModel} → ${NL_MODEL}) — one-time re-index to rebuild vectors with the new model`));
|
|
1051
1051
|
force = true;
|
|
1052
1052
|
nlStore.clear();
|
|
1053
1053
|
}
|
|
@@ -0,0 +1,67 @@
|
|
|
1
|
+
mesh:
|
|
2
|
+
ecommerce:
|
|
3
|
+
label: EC
|
|
4
|
+
description: E-commerce analytics mesh (file plugin demo)
|
|
5
|
+
purpose: Demonstrate fops embed file smart matching
|
|
6
|
+
|
|
7
|
+
data_system:
|
|
8
|
+
datalake:
|
|
9
|
+
label: DL
|
|
10
|
+
description: Central data lake
|
|
11
|
+
|
|
12
|
+
data_source:
|
|
13
|
+
orders_bucket:
|
|
14
|
+
label: ORD
|
|
15
|
+
description: Orders data in S3
|
|
16
|
+
system: data_system.datalake
|
|
17
|
+
connection:
|
|
18
|
+
type: s3
|
|
19
|
+
url: http://foundation-storage-engine:8080
|
|
20
|
+
access_key: S3_ACCESS_KEY
|
|
21
|
+
access_secret: S3_SECRET_KEY
|
|
22
|
+
secret:
|
|
23
|
+
S3_ACCESS_KEY: minio
|
|
24
|
+
S3_SECRET_KEY: minio123
|
|
25
|
+
|
|
26
|
+
data_object:
|
|
27
|
+
orders:
|
|
28
|
+
label: ORD
|
|
29
|
+
description: Raw orders dataset
|
|
30
|
+
source: data_source.orders_bucket
|
|
31
|
+
config:
|
|
32
|
+
data_object_type: csv
|
|
33
|
+
path: /raw/orders/orders.csv
|
|
34
|
+
has_header: true
|
|
35
|
+
delimiter: ","
|
|
36
|
+
|
|
37
|
+
data_product:
|
|
38
|
+
orders_sadp:
|
|
39
|
+
label: ORD
|
|
40
|
+
description: Source-aligned orders data product
|
|
41
|
+
object: data_object.orders
|
|
42
|
+
template: sadp_passthrough
|
|
43
|
+
select_columns: [order_id, customer_id, amount, currency, order_date, status, region]
|
|
44
|
+
cast_changes:
|
|
45
|
+
- column: amount
|
|
46
|
+
data_type: decimal
|
|
47
|
+
kwargs:
|
|
48
|
+
precision: 10
|
|
49
|
+
scale: 2
|
|
50
|
+
schema:
|
|
51
|
+
- name: order_id
|
|
52
|
+
type: integer
|
|
53
|
+
primary: true
|
|
54
|
+
- name: customer_id
|
|
55
|
+
type: integer
|
|
56
|
+
- name: amount
|
|
57
|
+
type: decimal
|
|
58
|
+
precision: 10
|
|
59
|
+
scale: 2
|
|
60
|
+
- name: currency
|
|
61
|
+
type: varchar
|
|
62
|
+
- name: order_date
|
|
63
|
+
type: date
|
|
64
|
+
- name: status
|
|
65
|
+
type: varchar
|
|
66
|
+
- name: region
|
|
67
|
+
type: varchar
|
|
@@ -0,0 +1,6 @@
|
|
|
1
|
+
order_id,customer_id,total_amount,currency,order_date,region
|
|
2
|
+
3001,7001,120.00,USD,2024-03-01,north_america
|
|
3
|
+
3002,7002,not_a_number,EUR,2024-03-02,europe
|
|
4
|
+
3003,7003,88.50,USD,bad-date,asia_pacific
|
|
5
|
+
3004,7004,-50.00,GBP,2024-03-04,europe
|
|
6
|
+
3005,7005,405.00,USD,2024-03-05,north_america
|
|
@@ -0,0 +1,7 @@
|
|
|
1
|
+
order_id,customer_id,amount,currency,order_date,status,region
|
|
2
|
+
2001,6001,99.00,USD,2024-03-01,completed,north_america
|
|
3
|
+
2002,6002,175.50,EUR,2024-03-02,pending,europe
|
|
4
|
+
2003,6003,420.00,USD,2024-03-03,completed,asia_pacific
|
|
5
|
+
2004,6004,58.25,GBP,2024-03-04,completed,europe
|
|
6
|
+
2005,6005,200.00,USD,2024-03-05,pending,north_america
|
|
7
|
+
2006,6006,315.00,USD,2024-03-06,completed,north_america
|
|
@@ -0,0 +1,6 @@
|
|
|
1
|
+
order_id,customer_id,amount,currency,order_date,status,region
|
|
2
|
+
1001,5001,149.99,USD,2024-01-03,completed,north_america
|
|
3
|
+
1002,5002,89.50,EUR,2024-01-04,pending,europe
|
|
4
|
+
1003,5003,230.00,USD,2024-01-05,completed,north_america
|
|
5
|
+
1004,5004,45.00,GBP,2024-01-06,cancelled,europe
|
|
6
|
+
1005,5005,310.75,USD,2024-01-07,completed,asia_pacific
|
|
@@ -0,0 +1,6 @@
|
|
|
1
|
+
id,customer_id,price,ccy,date,order_status,geo_region
|
|
2
|
+
4001,8001,99.00,USD,2024-03-01,completed,north_america
|
|
3
|
+
4002,8002,145.75,EUR,2024-03-02,pending,europe
|
|
4
|
+
4003,8003,280.00,USD,2024-03-03,completed,asia_pacific
|
|
5
|
+
4004,8004,62.50,GBP,2024-03-04,completed,europe
|
|
6
|
+
4005,8005,195.00,USD,2024-03-05,pending,north_america
|
|
@@ -0,0 +1,6 @@
|
|
|
1
|
+
id,cust_id,price,ccy,date,order_status,geo_region
|
|
2
|
+
4001,8001,99.00,USD,2024-03-01,completed,north_america
|
|
3
|
+
4002,8002,145.75,EUR,2024-03-02,pending,europe
|
|
4
|
+
4003,8003,280.00,USD,2024-03-03,completed,asia_pacific
|
|
5
|
+
4004,8004,62.50,GBP,2024-03-04,completed,europe
|
|
6
|
+
4005,8005,195.00,USD,2024-03-05,pending,north_america
|
|
@@ -0,0 +1,110 @@
|
|
|
1
|
+
#!/usr/bin/env bash
|
|
2
|
+
# fops embed file — demo script
|
|
3
|
+
#
|
|
4
|
+
# Showcases the full pipeline:
|
|
5
|
+
# Steps 1-5: pure schema / type comparison (no index needed)
|
|
6
|
+
# Step 6: MiniLM + RRF smart matching (requires Foundation + `fops embed file index`)
|
|
7
|
+
# Step 7: Fix CSV — rename columns to match reference (fops foundation align)
|
|
8
|
+
# In interactive mode, step 6 prompts to fix CSV or update the Data Product automatically.
|
|
9
|
+
#
|
|
10
|
+
# Files:
|
|
11
|
+
# orders_reference.csv — expected schema (7 cols: order_id, customer_id, amount, currency, order_date, status, region)
|
|
12
|
+
# orders_good.csv — clean file, matches reference exactly → PASS
|
|
13
|
+
# orders_bad.csv — renamed column (total_amount), missing status → FAIL
|
|
14
|
+
# orders_renamed.csv — same data, all column names abbreviated (id, cust_id, price...) → no Jaccard match → MiniLM matches
|
|
15
|
+
|
|
16
|
+
set -e
|
|
17
|
+
DEMO_DIR="$(cd "$(dirname "$0")" && pwd)"
|
|
18
|
+
|
|
19
|
+
section() {
|
|
20
|
+
echo
|
|
21
|
+
echo "━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━"
|
|
22
|
+
echo " $1"
|
|
23
|
+
echo "━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━"
|
|
24
|
+
}
|
|
25
|
+
|
|
26
|
+
# ── 1. Preview: inspect schema + inferred types ───────────────────────────────
|
|
27
|
+
section "1 · Preview — inspect schema + inferred types"
|
|
28
|
+
fops embed file preview "$DEMO_DIR/orders_good.csv"
|
|
29
|
+
|
|
30
|
+
# ── 2. Validate: clean file against reference → PASS ─────────────────────────
|
|
31
|
+
section "2 · Validate — clean file (expect PASS)"
|
|
32
|
+
fops embed file validate "$DEMO_DIR/orders_good.csv" \
|
|
33
|
+
--reference "$DEMO_DIR/orders_reference.csv"
|
|
34
|
+
|
|
35
|
+
# ── 3. Validate: bad file → FAIL (missing column, renamed column) ─────────────
|
|
36
|
+
section "3 · Validate — bad file (expect FAIL)"
|
|
37
|
+
fops embed file validate "$DEMO_DIR/orders_bad.csv" \
|
|
38
|
+
--reference "$DEMO_DIR/orders_reference.csv" || true
|
|
39
|
+
|
|
40
|
+
# ── 4. Preview with reference diff ────────────────────────────────────────────
|
|
41
|
+
section "4 · Preview with diff — visualise column drift vs reference"
|
|
42
|
+
fops embed file preview "$DEMO_DIR/orders_bad.csv" \
|
|
43
|
+
--reference "$DEMO_DIR/orders_reference.csv"
|
|
44
|
+
|
|
45
|
+
# ── 5. Smart match (Jaccard only, no index) ───────────────────────────────────
|
|
46
|
+
# orders_renamed.csv has fully abbreviated column names — zero Jaccard overlap.
|
|
47
|
+
# --no-semantic forces Jaccard-only mode (skips MiniLM, no ONNX load).
|
|
48
|
+
section "5 · Smart match — Jaccard only (no index): expect no strong match"
|
|
49
|
+
fops embed file "$DEMO_DIR/orders_renamed.csv" --local --no-semantic || true
|
|
50
|
+
|
|
51
|
+
# ── 6. Smart match (MiniLM + RRF, requires index) ────────────────────────────
|
|
52
|
+
# When the SQLite schema index exists (built by `fops embed file index`), the
|
|
53
|
+
# matching pipeline upgrades to:
|
|
54
|
+
# - Signal 1: column-level MiniLM cosine + greedy bipartite matching
|
|
55
|
+
# - Signal 2: Jaccard overlap on exact column names
|
|
56
|
+
# - Signal 3: type compatibility score on matched column pairs
|
|
57
|
+
# - Signal 4: MiniLM cosine on table/entity names
|
|
58
|
+
# - Signal 5: fraction of candidate columns with sim > 0.75
|
|
59
|
+
# All fused via Reciprocal Rank Fusion (RRF k=60).
|
|
60
|
+
#
|
|
61
|
+
# With MiniLM, "id"→"order_id", "price"→"amount", "ccy"→"currency" etc. all
|
|
62
|
+
# match semantically even with zero Jaccard overlap.
|
|
63
|
+
section "6 · Smart match — MiniLM + RRF (requires Foundation + index)"
|
|
64
|
+
|
|
65
|
+
SCHEMA_DB="$HOME/.fops/file/schema-index.db"
|
|
66
|
+
if [ -f "$SCHEMA_DB" ]; then
|
|
67
|
+
echo " Index found — running full semantic match..."
|
|
68
|
+
fops embed file "$DEMO_DIR/orders_renamed.csv"
|
|
69
|
+
else
|
|
70
|
+
echo " No schema index found. To enable MiniLM + RRF matching, run:"
|
|
71
|
+
echo
|
|
72
|
+
echo " # 1. Start Foundation"
|
|
73
|
+
echo " fops up"
|
|
74
|
+
echo
|
|
75
|
+
echo " # 2. Register your data objects (or use the demo landscape)"
|
|
76
|
+
echo " fops apply $DEMO_DIR/landscape.yaml"
|
|
77
|
+
echo
|
|
78
|
+
echo " # 3. Build the SQLite schema index (embeds column names with MiniLM)"
|
|
79
|
+
echo " fops embed file index"
|
|
80
|
+
echo
|
|
81
|
+
echo " # 4. Re-run this step — orders_renamed.csv will now match via"
|
|
82
|
+
echo " # semantic similarity (id→order_id, price→amount, ccy→currency)"
|
|
83
|
+
echo " fops embed file $DEMO_DIR/orders_renamed.csv"
|
|
84
|
+
echo
|
|
85
|
+
echo " Matching signals when index is present:"
|
|
86
|
+
echo " [1] MiniLM column cosine — greedy bipartite match per column pair"
|
|
87
|
+
echo " [2] Jaccard overlap — exact column name set overlap"
|
|
88
|
+
echo " [3] Type compatibility — integer/decimal/date match on mapped cols"
|
|
89
|
+
echo " [4] Name semantic — MiniLM cosine on table/entity names"
|
|
90
|
+
echo " [5] Mapping confidence — fraction of cols with sim > 0.75"
|
|
91
|
+
echo " ↳ all fused via Reciprocal Rank Fusion (RRF k=60)"
|
|
92
|
+
fi
|
|
93
|
+
|
|
94
|
+
# ── 7. Fix CSV — rename abbreviated columns to reference names ────────────────
|
|
95
|
+
# After a smart match, fops embed file (interactive) prompts:
|
|
96
|
+
# ▸ Fix CSV — rename columns to match reference (→ orders_renamed.aligned.csv)
|
|
97
|
+
# Fix Data Product — update schema to match this file
|
|
98
|
+
# Nothing — done
|
|
99
|
+
#
|
|
100
|
+
# This step demonstrates the "Fix CSV" path explicitly via fops foundation align,
|
|
101
|
+
# which uses the same MiniLM + Levenshtein column alignment under the hood.
|
|
102
|
+
section "7 · Fix CSV — rename columns to match reference"
|
|
103
|
+
# Uses MiniLM semantic matching to map abbreviated column names to reference schema.
|
|
104
|
+
# The interactive Fix CSV prompt inside `fops embed file` (step 6) runs this automatically.
|
|
105
|
+
# || true: ONNX native handles trigger a SIGKILL on exit — this is expected and harmless.
|
|
106
|
+
fops foundation align "$DEMO_DIR/orders_renamed.csv" \
|
|
107
|
+
"order_id,customer_id,amount,currency,order_date,status,region" \
|
|
108
|
+
--output "$DEMO_DIR/orders_renamed.aligned.csv" || true
|
|
109
|
+
|
|
110
|
+
echo
|