@meshxdata/fops 0.1.58 → 0.1.60

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/CHANGELOG.md CHANGED
@@ -2,6 +2,376 @@
2
2
 
3
3
  All notable changes to @meshxdata/fops (Foundation Operator CLI) are documented here.
4
4
 
5
+ ## [0.1.60] - 2026-03-26
6
+
7
+ - fix(provision): detect and repair broken submodule mounts from Docker (13269c5)
8
+ - fix(provision): recheck Docker after install failure instead of blocking (d7bc11c)
9
+ - bump cli vm lifecycle (a3521ac)
10
+ - fix(k3s): remove minio123 fallback from sync-secrets (e185cb9)
11
+ - fix: add frontend-prod profile to fopsUpCmd, FOUNDATION_ROOT always wins in rootDir (a412709)
12
+ - bump storage (a1a5761)
13
+ - restore all missing services (pgpool, exporters, grafana, etc), add loki to k3s profile, always activate loki profile in fops up (4e2744a)
14
+ - fix: grafana alert-rules provisioning, ENVIRONMENT_NAME from --url, k3s secret sync, vm-sizes endpoint, project root resolution (9839052)
15
+ - feat(azure): add 'fops azure reconcile <name>' command for VM drift fix (79ba6e2)
16
+ - fix(otel,loki): remove duplicate spanmetrics dimensions, use .env for loki S3 creds (e3d1def)
17
+ - fix(loki): pass S3 credentials from .env so loki works without vault-init (c57906d)
18
+ - fix(azure): improve VM provisioning reliability (2ddd669)
19
+ - cluster discovery (009257d)
20
+ - feat(storage): add loki container to provisioning (898c544)
21
+ - feat(azure): add ping command to check backend health (8336825)
22
+ - operator cli bump 0.1.52 (f052cb5)
23
+ - fix(doctor): set KUBECONFIG for k3s kubectl commands (db9359b)
24
+ - fix(azure): move --landscape to test run command, not separate subcommand (4b9b089)
25
+ - feat(azure): add test integration command with landscape support (b2990a0)
26
+ - fix(fleet): skip VMs without public IPs in fleet exec (39acbaa)
27
+ - feat(azure): detect and fix External Secrets identity issues (f907d11)
28
+ - operator cli bump 0.1.51 (db55bdc)
29
+ - feat: add postgres-exporter and Azure tray menu improvements (2a337ac)
30
+ - operator cli plugin fix (4dae908)
31
+ - operator cli plugin fix (25620cc)
32
+ - operator cli test fixes (1d1c18f)
33
+ - feat(test): add setup-users command for QA test user creation (b929507)
34
+ - feat(aks): show HA standby clusters with visual grouping (8fb640c)
35
+ - refactor(provision): extract VM provisioning to dedicated module (af321a7)
36
+ - refactor(provision): extract post-start health checks to dedicated module (6ed5f2d)
37
+ - fix: ping timeout 15s, fix prometheus sed escaping (d11ac14)
38
+ - refactor(vm): extract terraform HCL generation to dedicated module (896a64b)
39
+ - refactor(keyvault): extract key operations to dedicated module (716bbe4)
40
+ - refactor(azure): extract swarm functions to azure-fleet-swarm.js (4690e34)
41
+ - refactor(azure): extract SSH/remote functions to azure-ops-ssh.js (e62b8f0)
42
+ - refactor(azure): split azure-ops.js into smaller modules (4515425)
43
+ - feat(aks): add --ha flag for full cross-region HA setup (ece68c5)
44
+ - feat(fops): inject ENVIRONMENT_NAME on VM provisioning (6ef2a27)
45
+ - fix(postgres): disable SSL mode to fix connection issues (c789ae9)
46
+ - feat(trino): add caching configuration for docker-compose (3668224)
47
+ - fix(fops-azure): run pytest directly instead of missing scripts (29f8410)
48
+ - add -d detach option for local frontend dev, remove hive cpu limits (3306667)
49
+ - release 0.1.49 (dcca32b)
50
+ - release 0.1.48 (9b195e5)
51
+ - stash on updates (2916c01)
52
+ - stash on updates (b5c14df)
53
+ - stash on updates (d0453d1)
54
+ - frontend dev fixes (0ca7b00)
55
+ - fix: update azure test commands (77c81da)
56
+ - default locust to CLI mode, add --web for UI (ca35bff)
57
+ - add locust command for load testing AKS clusters (1278722)
58
+ - update spot node pool default autoscaling to 1-20 (617c182)
59
+ - module for aks (3dd1a61)
60
+ - add hive to PG_SERVICE_DBS for fops pg-setup (afccb16)
61
+ - feat(azure): enhance aks doctor with ExternalSecrets and PGSSLMODE checks (8b14861)
62
+ - add foundation-postgres ExternalName service to reconciler (ea88e11)
63
+ - new flux templates (0e2e372)
64
+ - feat(azure): add storage-engine secrets to Key Vault (a4f488e)
65
+ - feat(azure-aks): add AUTH0_DOMAIN to template rendering variables (216c37e)
66
+ - feat(azure): add storage account creation per cluster (aa1b138)
67
+ - bump watcher (ab24473)
68
+ - fix: concurrent compute calls (#66) (03e2edf)
69
+ - bump backend version (5058ff5)
70
+ - bump fops to 0.1.44 (8c0ef5d)
71
+ - Mlflow and azure plugin fix (176881f)
72
+ - fix lifecycle (a2cb9e7)
73
+ - callback url for localhost (821fb94)
74
+ - disable 4 scaffolding plugin by default. (bfb2b76)
75
+ - jaccard improvements (b7494a0)
76
+ - refactor azure plugin (68dfef4)
77
+ - refactor azure plugin (b24a008)
78
+ - fix trino catalog missing (4928a55)
79
+ - v36 bump and changelog generation on openai (37a0440)
80
+ - v36 bump and changelog generation on openai (a3b02d9)
81
+ - bump (a990058)
82
+ - status bar fix and new plugin for ttyd (27dde1e)
83
+ - file demo and tray (1a3e704)
84
+ - electron app (59ad0bb)
85
+ - compose and fops file plugin (1cf0e81)
86
+ - bump (346ffc1)
87
+ - localhost replaced by 127.0.0.1 (82b9f30)
88
+ - .29 (587b0e1)
89
+ - improve up down and bootstrap script (b79ebaf)
90
+ - checksum (22c8086)
91
+ - checksum (96b434f)
92
+ - checksum (15ed3c0)
93
+ - checksum (8a6543a)
94
+ - bump embed trino linksg (8440504)
95
+ - bump data (765ffd9)
96
+ - bump (cb8b232)
97
+ - broken tests (c532229)
98
+ - release 0.1.18, preflight checks (d902249)
99
+ - fix compute display bug (d10f5d9)
100
+ - cleanup packer files (6330f18)
101
+ - plan mode (cb36a8a)
102
+ - bump to 0.1.16 - agent ui (41ac1a2)
103
+ - bump to 0.1.15 - agent ui (4ebe2e1)
104
+ - bump to 0.1.14 (6c3a7fa)
105
+ - bump to 0.1.13 (8db570f)
106
+ - release 0.1.12 (c1c79e5)
107
+ - bump (11aa3b0)
108
+ - git keep and bump tui (be1678e)
109
+ - skills, index, rrf, compacted context (100k > 10k) (7b2fffd)
110
+ - cloudflare and token consumption, graphs indexing (0ad9eec)
111
+ - bump storage default (22c83ba)
112
+ - storage fix (68a22a0)
113
+ - skills update (7f56500)
114
+ - v9 bump (3864446)
115
+ - bump (c95eedc)
116
+ - rrf (dbf8c95)
117
+ - feat: warning when running predictions (95e8c52)
118
+ - feat: support for local predictions (45cf26b)
119
+ - feat: wip support for predictions + mlflow (3457052)
120
+ - add Reciprocal Rank Fusion (RRF) to knowledge and skill retrieval (61549bc)
121
+ - validate CSV headers in compute_run readiness check (a8c7a43)
122
+ - fix corrupted Iceberg metadata: probe tables + force cleanup on re-apply (50578af)
123
+ - enforce: never use foundation_apply to fix broken products (2e049bf)
124
+ - update SKILL.md with complete tool reference for knowledge retrieval (30b1924)
125
+ - add storage read, input DP table probe, and compute_run improvements (34e6c4c)
126
+ - skills update (1220385)
127
+ - skills update (bb66958)
128
+ - some tui improvement andd tools apply overwrite (e90c35c)
129
+ - skills update (e9227a1)
130
+ - skills update (669c4b3)
131
+ - fix plugin pre-flight checks (f741743)
132
+ - increase agent context (6479aaa)
133
+ - skills and init sql fixes (5fce35e)
134
+ - checksum (3518b56)
135
+ - penging job limit (a139861)
136
+ - checksum (575d28c)
137
+ - bump (92049ba)
138
+ - fix bug per tab status (0a33657)
139
+ - fix bug per tab status (50457c6)
140
+ - checksumming (0ad842e)
141
+ - shot af mardkwon overlapping (51f63b9)
142
+ - add spark dockerfile for multiarch builds (95abbd1)
143
+ - fix plugin initialization (16b9782)
144
+ - split index.js (50902a2)
145
+ - cloudflare cidr (cc4e021)
146
+ - cloduflare restrictions (2f6ba2d)
147
+ - sequential start (86b496e)
148
+ - sequential start (4930fe1)
149
+ - sequential start (353f014)
150
+ - qa tests (2dc6a1a)
151
+ - bump sha for .85 (dc2edfe)
152
+ - preserve env on sudo (7831227)
153
+ - bump sha for .84 (6c052f9)
154
+ - non interactive for azure vms (0aa8a2f)
155
+ - keep .env if present (d072450)
156
+ - bump (7a8e732)
157
+ - ensure opa is on compose if not set (f4a5228)
158
+ - checksum bump (a2ccc20)
159
+ - netrc defensive checks (a0b0ccc)
160
+ - netrc defensive checks (ae37403)
161
+ - checksum (ec45d11)
162
+ - update sync and fix up (7f9af72)
163
+ - expand test for azure and add new per app tag support (388a168)
164
+ - checksum on update (44005fc)
165
+ - cleanup for later (15e5313)
166
+ - cleanup for later (11c9597)
167
+ - switch branch feature (822fecc)
168
+ - add pull (d1c19ab)
169
+ - Bump hono from 4.11.9 to 4.12.0 in /operator-cli (ad25144)
170
+ - tests (f180a9a)
171
+ - cleanup (39c49a3)
172
+ - registry (7b7126a)
173
+ - reconcile kafka (832d0db)
174
+ - gh login bug (025886c)
175
+ - cleanup (bb96cab)
176
+ - strip envs from process (2421180)
177
+ - force use of gh creds not tokens in envs var (fff7787)
178
+ - resolve import between npm installs and npm link (79522e1)
179
+ - fix gh scope and azure states (afd846c)
180
+ - refactoring (da50352)
181
+ - split fops repo (d447638)
182
+ - aks (b791f8f)
183
+ - refactor azure (67d3bad)
184
+ - wildcard (391f023)
185
+ - azure plugin (c074074)
186
+ - zap (d7e6e7f)
187
+
188
+ ## [0.1.59] - 2026-03-26
189
+
190
+ - fix(k3s): remove minio123 fallback from sync-secrets (e185cb9)
191
+ - fix: add frontend-prod profile to fopsUpCmd, FOUNDATION_ROOT always wins in rootDir (a412709)
192
+ - bump storage (a1a5761)
193
+ - restore all missing services (pgpool, exporters, grafana, etc), add loki to k3s profile, always activate loki profile in fops up (4e2744a)
194
+ - fix: grafana alert-rules provisioning, ENVIRONMENT_NAME from --url, k3s secret sync, vm-sizes endpoint, project root resolution (9839052)
195
+ - feat(azure): add 'fops azure reconcile <name>' command for VM drift fix (79ba6e2)
196
+ - fix(otel,loki): remove duplicate spanmetrics dimensions, use .env for loki S3 creds (e3d1def)
197
+ - fix(loki): pass S3 credentials from .env so loki works without vault-init (c57906d)
198
+ - fix(azure): improve VM provisioning reliability (2ddd669)
199
+ - cluster discovery (009257d)
200
+ - feat(storage): add loki container to provisioning (898c544)
201
+ - feat(azure): add ping command to check backend health (8336825)
202
+ - operator cli bump 0.1.52 (f052cb5)
203
+ - fix(doctor): set KUBECONFIG for k3s kubectl commands (db9359b)
204
+ - fix(azure): move --landscape to test run command, not separate subcommand (4b9b089)
205
+ - feat(azure): add test integration command with landscape support (b2990a0)
206
+ - fix(fleet): skip VMs without public IPs in fleet exec (39acbaa)
207
+ - feat(azure): detect and fix External Secrets identity issues (f907d11)
208
+ - operator cli bump 0.1.51 (db55bdc)
209
+ - feat: add postgres-exporter and Azure tray menu improvements (2a337ac)
210
+ - operator cli plugin fix (4dae908)
211
+ - operator cli plugin fix (25620cc)
212
+ - operator cli test fixes (1d1c18f)
213
+ - feat(test): add setup-users command for QA test user creation (b929507)
214
+ - feat(aks): show HA standby clusters with visual grouping (8fb640c)
215
+ - refactor(provision): extract VM provisioning to dedicated module (af321a7)
216
+ - refactor(provision): extract post-start health checks to dedicated module (6ed5f2d)
217
+ - fix: ping timeout 15s, fix prometheus sed escaping (d11ac14)
218
+ - refactor(vm): extract terraform HCL generation to dedicated module (896a64b)
219
+ - refactor(keyvault): extract key operations to dedicated module (716bbe4)
220
+ - refactor(azure): extract swarm functions to azure-fleet-swarm.js (4690e34)
221
+ - refactor(azure): extract SSH/remote functions to azure-ops-ssh.js (e62b8f0)
222
+ - refactor(azure): split azure-ops.js into smaller modules (4515425)
223
+ - feat(aks): add --ha flag for full cross-region HA setup (ece68c5)
224
+ - feat(fops): inject ENVIRONMENT_NAME on VM provisioning (6ef2a27)
225
+ - fix(postgres): disable SSL mode to fix connection issues (c789ae9)
226
+ - feat(trino): add caching configuration for docker-compose (3668224)
227
+ - fix(fops-azure): run pytest directly instead of missing scripts (29f8410)
228
+ - add -d detach option for local frontend dev, remove hive cpu limits (3306667)
229
+ - release 0.1.49 (dcca32b)
230
+ - release 0.1.48 (9b195e5)
231
+ - stash on updates (2916c01)
232
+ - stash on updates (b5c14df)
233
+ - stash on updates (d0453d1)
234
+ - frontend dev fixes (0ca7b00)
235
+ - fix: update azure test commands (77c81da)
236
+ - default locust to CLI mode, add --web for UI (ca35bff)
237
+ - add locust command for load testing AKS clusters (1278722)
238
+ - update spot node pool default autoscaling to 1-20 (617c182)
239
+ - module for aks (3dd1a61)
240
+ - add hive to PG_SERVICE_DBS for fops pg-setup (afccb16)
241
+ - feat(azure): enhance aks doctor with ExternalSecrets and PGSSLMODE checks (8b14861)
242
+ - add foundation-postgres ExternalName service to reconciler (ea88e11)
243
+ - new flux templates (0e2e372)
244
+ - feat(azure): add storage-engine secrets to Key Vault (a4f488e)
245
+ - feat(azure-aks): add AUTH0_DOMAIN to template rendering variables (216c37e)
246
+ - feat(azure): add storage account creation per cluster (aa1b138)
247
+ - bump watcher (ab24473)
248
+ - fix: concurrent compute calls (#66) (03e2edf)
249
+ - bump backend version (5058ff5)
250
+ - bump fops to 0.1.44 (8c0ef5d)
251
+ - Mlflow and azure plugin fix (176881f)
252
+ - fix lifecycle (a2cb9e7)
253
+ - callback url for localhost (821fb94)
254
+ - disable 4 scaffolding plugin by default. (bfb2b76)
255
+ - jaccard improvements (b7494a0)
256
+ - refactor azure plugin (68dfef4)
257
+ - refactor azure plugin (b24a008)
258
+ - fix trino catalog missing (4928a55)
259
+ - v36 bump and changelog generation on openai (37a0440)
260
+ - v36 bump and changelog generation on openai (a3b02d9)
261
+ - bump (a990058)
262
+ - status bar fix and new plugin for ttyd (27dde1e)
263
+ - file demo and tray (1a3e704)
264
+ - electron app (59ad0bb)
265
+ - compose and fops file plugin (1cf0e81)
266
+ - bump (346ffc1)
267
+ - localhost replaced by 127.0.0.1 (82b9f30)
268
+ - .29 (587b0e1)
269
+ - improve up down and bootstrap script (b79ebaf)
270
+ - checksum (22c8086)
271
+ - checksum (96b434f)
272
+ - checksum (15ed3c0)
273
+ - checksum (8a6543a)
274
+ - bump embed trino linksg (8440504)
275
+ - bump data (765ffd9)
276
+ - bump (cb8b232)
277
+ - broken tests (c532229)
278
+ - release 0.1.18, preflight checks (d902249)
279
+ - fix compute display bug (d10f5d9)
280
+ - cleanup packer files (6330f18)
281
+ - plan mode (cb36a8a)
282
+ - bump to 0.1.16 - agent ui (41ac1a2)
283
+ - bump to 0.1.15 - agent ui (4ebe2e1)
284
+ - bump to 0.1.14 (6c3a7fa)
285
+ - bump to 0.1.13 (8db570f)
286
+ - release 0.1.12 (c1c79e5)
287
+ - bump (11aa3b0)
288
+ - git keep and bump tui (be1678e)
289
+ - skills, index, rrf, compacted context (100k > 10k) (7b2fffd)
290
+ - cloudflare and token consumption, graphs indexing (0ad9eec)
291
+ - bump storage default (22c83ba)
292
+ - storage fix (68a22a0)
293
+ - skills update (7f56500)
294
+ - v9 bump (3864446)
295
+ - bump (c95eedc)
296
+ - rrf (dbf8c95)
297
+ - feat: warning when running predictions (95e8c52)
298
+ - feat: support for local predictions (45cf26b)
299
+ - feat: wip support for predictions + mlflow (3457052)
300
+ - add Reciprocal Rank Fusion (RRF) to knowledge and skill retrieval (61549bc)
301
+ - validate CSV headers in compute_run readiness check (a8c7a43)
302
+ - fix corrupted Iceberg metadata: probe tables + force cleanup on re-apply (50578af)
303
+ - enforce: never use foundation_apply to fix broken products (2e049bf)
304
+ - update SKILL.md with complete tool reference for knowledge retrieval (30b1924)
305
+ - add storage read, input DP table probe, and compute_run improvements (34e6c4c)
306
+ - skills update (1220385)
307
+ - skills update (bb66958)
308
+ - some tui improvement andd tools apply overwrite (e90c35c)
309
+ - skills update (e9227a1)
310
+ - skills update (669c4b3)
311
+ - fix plugin pre-flight checks (f741743)
312
+ - increase agent context (6479aaa)
313
+ - skills and init sql fixes (5fce35e)
314
+ - checksum (3518b56)
315
+ - penging job limit (a139861)
316
+ - checksum (575d28c)
317
+ - bump (92049ba)
318
+ - fix bug per tab status (0a33657)
319
+ - fix bug per tab status (50457c6)
320
+ - checksumming (0ad842e)
321
+ - shot af mardkwon overlapping (51f63b9)
322
+ - add spark dockerfile for multiarch builds (95abbd1)
323
+ - fix plugin initialization (16b9782)
324
+ - split index.js (50902a2)
325
+ - cloudflare cidr (cc4e021)
326
+ - cloduflare restrictions (2f6ba2d)
327
+ - sequential start (86b496e)
328
+ - sequential start (4930fe1)
329
+ - sequential start (353f014)
330
+ - qa tests (2dc6a1a)
331
+ - bump sha for .85 (dc2edfe)
332
+ - preserve env on sudo (7831227)
333
+ - bump sha for .84 (6c052f9)
334
+ - non interactive for azure vms (0aa8a2f)
335
+ - keep .env if present (d072450)
336
+ - bump (7a8e732)
337
+ - ensure opa is on compose if not set (f4a5228)
338
+ - checksum bump (a2ccc20)
339
+ - netrc defensive checks (a0b0ccc)
340
+ - netrc defensive checks (ae37403)
341
+ - checksum (ec45d11)
342
+ - update sync and fix up (7f9af72)
343
+ - expand test for azure and add new per app tag support (388a168)
344
+ - checksum on update (44005fc)
345
+ - cleanup for later (15e5313)
346
+ - cleanup for later (11c9597)
347
+ - switch branch feature (822fecc)
348
+ - add pull (d1c19ab)
349
+ - Bump hono from 4.11.9 to 4.12.0 in /operator-cli (ad25144)
350
+ - tests (f180a9a)
351
+ - cleanup (39c49a3)
352
+ - registry (7b7126a)
353
+ - reconcile kafka (832d0db)
354
+ - gh login bug (025886c)
355
+ - cleanup (bb96cab)
356
+ - strip envs from process (2421180)
357
+ - force use of gh creds not tokens in envs var (fff7787)
358
+ - resolve import between npm installs and npm link (79522e1)
359
+ - fix gh scope and azure states (afd846c)
360
+ - refactoring (da50352)
361
+ - split fops repo (d447638)
362
+ - aks (b791f8f)
363
+ - refactor azure (67d3bad)
364
+ - wildcard (391f023)
365
+ - azure plugin (c074074)
366
+ - zap (d7e6e7f)
367
+ - fix knock (cf89c05)
368
+ - azure (4adec98)
369
+ - Bump tar from 7.5.7 to 7.5.9 in /operator-cli (e41e98e)
370
+
371
+ # Changelog
372
+
373
+ All notable changes to @meshxdata/fops (Foundation Operator CLI) are documented here.
374
+
5
375
  ## [0.1.58] - 2026-03-26
6
376
 
7
377
  - bump storage (a1a5761)
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@meshxdata/fops",
3
- "version": "0.1.58",
3
+ "version": "0.1.60",
4
4
  "description": "CLI to install and manage data mesh platforms",
5
5
  "keywords": [
6
6
  "fops",
@@ -21,7 +21,7 @@ async function kubectlApply(execa, args) {
21
21
  if (apply.exitCode !== 0) throw new Error(`kubectl apply failed: ${apply.stderr}`);
22
22
  }
23
23
 
24
- async function syncSecrets(root) {
24
+ export async function syncSecrets(root) {
25
25
  const { execa } = await import("execa");
26
26
  const { loadEnvFromFile } = await import("../utils/load-env.js");
27
27
 
@@ -38,7 +38,7 @@ async function syncSecrets(root) {
38
38
  // Load credentials from .env (same resolution as setup-kubernetes.sh)
39
39
  const env = loadEnvFromFile(path.join(root, ".env"));
40
40
  const s3Id = env.BOOTSTRAP_STORAGE_ACCESS_KEY || env.AUTH_IDENTITY || "minio";
41
- const s3Pw = env.BOOTSTRAP_STORAGE_SECRET_KEY || env.AUTH_CREDENTIAL || "minio123";
41
+ const s3Pw = env.BOOTSTRAP_STORAGE_SECRET_KEY || env.AUTH_CREDENTIAL || "";
42
42
 
43
43
  console.log(DIM(` Storage credentials: ${s3Id} / ${"*".repeat(Math.min(s3Pw.length, 8))}`));
44
44
 
@@ -84,7 +84,25 @@ async function syncSecrets(root) {
84
84
  ], { timeout: 10000, reject: false });
85
85
  console.log(OK(` ✓ ${sparkSecretName} (with label)`));
86
86
 
87
- console.log(OK("\n ✓ All storage secrets synced to k3s"));
87
+ // 4. Update stale credentials in the database (public.secret table)
88
+ console.log(DIM(" Updating storage credentials in database..."));
89
+ const secretKeys = ["AWS_SECRET_ACCESS_KEY", "MY_S3_SECRET", "S3_SECRET", "S3_SECRET_KEY"];
90
+ const accessKeys = ["AWS_ACCESS_KEY_ID", "MY_S3_ACCESS", "S3_ACCESS", "S3_ACCESS_KEY"];
91
+ const updateSql = [
92
+ ...secretKeys.map(k => `UPDATE public.secret SET value = '${s3Pw.replace(/'/g, "''")}' WHERE key = '${k}' AND value != '${s3Pw.replace(/'/g, "''")}';`),
93
+ ...accessKeys.map(k => `UPDATE public.secret SET value = '${s3Id.replace(/'/g, "''")}' WHERE key = '${k}' AND value != '${s3Id.replace(/'/g, "''")}';`),
94
+ ].join("\n");
95
+ const dbResult = await execa("docker", [
96
+ "compose", "exec", "-T", "postgres",
97
+ "psql", "-U", "foundation", "-d", "foundation", "-c", updateSql,
98
+ ], { cwd: root, timeout: 15000, reject: false });
99
+ if (dbResult.exitCode === 0) {
100
+ console.log(OK(" ✓ Database secrets updated"));
101
+ } else {
102
+ console.log(WARN(" ⚠ Database secret update failed (postgres may not be running)"));
103
+ }
104
+
105
+ console.log(OK("\n ✓ All storage secrets synced"));
88
106
  }
89
107
 
90
108
  export function registerK3sCommands(program) {
@@ -1473,6 +1473,18 @@ async function runUp(program, registry, opts) {
1473
1473
  }
1474
1474
  console.log(chalk.green("\n ✓ Foundation services started successfully!"));
1475
1475
 
1476
+ // Sync storage secrets into k3s so Spark jobs use current credentials
1477
+ if (hasK3s) {
1478
+ try {
1479
+ const { syncSecrets } = await import("./k3s-cmd.js");
1480
+ console.log(chalk.dim(" Syncing storage secrets to k3s..."));
1481
+ await syncSecrets(root);
1482
+ } catch (err) {
1483
+ console.log(chalk.yellow(` ⚠ k3s secret sync failed: ${err.message}`));
1484
+ console.log(chalk.dim(" Run manually: fops k3s sync-secrets"));
1485
+ }
1486
+ }
1487
+
1476
1488
  // Start DAI services if requested
1477
1489
  if (opts.dai) {
1478
1490
  const daiRoot = path.join(root, "dai-compose");
@@ -830,7 +830,7 @@ export async function ensureOpenAiNetworkAccess(execa, publicIp, sub) {
830
830
  // ── Remote fops up command builder ──────────────────────────────────────────
831
831
 
832
832
  export function fopsUpCmd(publicUrl, { k3s, traefik, dai } = {}) {
833
- const profiles = ["k3s", "traefik", "loki"];
833
+ const profiles = ["k3s", "traefik", "loki", "frontend-prod"];
834
834
  if (dai) profiles.push("dai");
835
835
 
836
836
  const profileEnv = `COMPOSE_PROFILES=${profiles.join(",")} `;
@@ -158,22 +158,24 @@ export async function postStartChecks(execa, ip, adminUser, { maxWait = POSTGRES
158
158
  console.log(OK(" ✓ Postgres ready"));
159
159
 
160
160
  hint("Waiting for backend migrations…");
161
+ const MIGRATION_MAX_WAIT_MS = 300000; // 5 min — migrations can be slow after recovery or image pulls
162
+ const MIGRATION_POLL_INTERVAL_MS = 10000;
163
+ const MIGRATION_PROGRESS_INTERVAL_MS = 60000;
161
164
  const migStart = Date.now();
162
165
  let migReady = false;
163
- while (Date.now() - migStart < 120000) {
166
+ let lastMigProgressAt = 0;
167
+ while (Date.now() - migStart < MIGRATION_MAX_WAIT_MS) {
164
168
  const { exitCode: migCheck } = await ssh(
165
169
  `cd /opt/foundation-compose && sudo docker compose exec -T postgres psql -U foundation -d foundation -c "SELECT 1 FROM \\"user\\" LIMIT 1" >/dev/null 2>&1`
166
170
  );
167
171
  if (migCheck === 0) { migReady = true; break; }
168
- await new Promise((r) => setTimeout(r, 5000));
169
- }
170
- if (!migReady) {
171
- hint("Retrying migration check in 30s…");
172
- await new Promise((r) => setTimeout(r, 30000));
173
- const { exitCode: retryCheck } = await ssh(
174
- `cd /opt/foundation-compose && sudo docker compose exec -T postgres psql -U foundation -d foundation -c "SELECT 1 FROM \\"user\\" LIMIT 1" >/dev/null 2>&1`
175
- );
176
- if (retryCheck === 0) migReady = true;
172
+ const now = Date.now();
173
+ if (now - lastMigProgressAt >= MIGRATION_PROGRESS_INTERVAL_MS) {
174
+ const elapsed = Math.round((now - migStart) / 1000);
175
+ hint(`Still waiting for migrations… [${elapsed}s]`);
176
+ lastMigProgressAt = now;
177
+ }
178
+ await new Promise((r) => setTimeout(r, MIGRATION_POLL_INTERVAL_MS));
177
179
  }
178
180
  if (!migReady) {
179
181
  console.log(WARN(" ⚠ Migrations not complete — skipping grant-admin (schema not ready)"));
@@ -95,8 +95,15 @@ export async function configureVm(execa, ip, user, publicUrl, { githubToken, k3s
95
95
  if (installExit === 0) {
96
96
  if (!quiet) console.log(chalk.green(" ✓ Docker installed"));
97
97
  } else {
98
- console.log(chalk.red(" ✗ Docker installation failed container operations will not work"));
99
- console.log(chalk.dim(` SSH in and check: ssh ${user}@${ip} "sudo apt-get install -y docker-ce"`));
98
+ // apt can fail due to lock conflicts with cloud-init but Docker may have installed anyway
99
+ const { exitCode: recheck } = await ssh("sudo docker info >/dev/null 2>&1");
100
+ if (recheck === 0) {
101
+ if (!quiet) console.log(chalk.yellow(" ⚠ Docker install had warnings but Docker is working"));
102
+ } else {
103
+ console.log(chalk.red(" ✗ Docker installation failed — cannot continue provisioning"));
104
+ console.log(chalk.dim(` SSH in and check: ssh ${user}@${ip} "sudo apt-get install -y docker-ce"`));
105
+ throw new Error("Docker installation failed");
106
+ }
100
107
  }
101
108
  }
102
109
 
@@ -1281,6 +1288,18 @@ async function vmReconcileRepo(ctx) {
1281
1288
 
1282
1289
  const { stdout: exists } = await ssh("[ -d /opt/foundation-compose/.git ] && echo yes || echo no");
1283
1290
  if (exists?.trim() === "yes") {
1291
+ // Fix broken submodule mounts: Docker creates empty dirs when bind-mounting missing files.
1292
+ // Detect and repair: if a submodule dir exists but is empty or has no real files, re-init it.
1293
+ const checkSubs = "cd /opt/foundation-compose && for d in foundation-backend foundation-frontend foundation-watcher foundation-processor foundation-scheduler foundation-storage-engine; do [ -d $d ] && [ ! -f $d/pyproject.toml ] && [ ! -f $d/package.json ] && [ ! -f $d/Makefile ] && echo $d; done";
1294
+ const { stdout: brokenSubs } = await ssh(checkSubs);
1295
+ const broken = (brokenSubs || "").trim().split("\n").filter(Boolean);
1296
+ if (broken.length > 0) {
1297
+ console.log(chalk.yellow(` ↻ Fixing ${broken.length} broken submodule(s): ${broken.join(", ")}`));
1298
+ for (const sub of broken) {
1299
+ await ssh(`cd /opt/foundation-compose && sudo rm -rf ${sub} && git checkout ${sub} && git submodule update --init --recursive --depth 1 ${sub}`, 60000);
1300
+ }
1301
+ await ssh("sudo chown -R azureuser:azureuser /opt/foundation-compose");
1302
+ }
1284
1303
  reconcileOk("Repository", "/opt/foundation-compose");
1285
1304
  return;
1286
1305
  }
@@ -47,6 +47,8 @@ export class AzureService {
47
47
  publicIp: vm.publicIp,
48
48
  publicUrl: vm.publicUrl,
49
49
  subscriptionId: vm.subscriptionId,
50
+ vmSize: vm.vmSize || null,
51
+ image: vm.image || null,
50
52
  active: name === activeVm,
51
53
  createdAt: vm.createdAt || vm.discoveredAt || null,
52
54
  }));
@@ -263,7 +263,7 @@ export async function azureUp(opts = {}) {
263
263
 
264
264
  // Persist IP immediately so it's never lost if later steps fail or user Ctrl+C's
265
265
  const publicUrl = opts.url || defaultUrl;
266
- writeVmState(vmName, { resourceGroup: rg, location, publicIp, publicUrl, subscriptionId: subId, createdAt: new Date().toISOString() });
266
+ writeVmState(vmName, { resourceGroup: rg, location, publicIp, publicUrl, subscriptionId: subId, vmSize, image, createdAt: new Date().toISOString() });
267
267
 
268
268
  hint("Enabling accelerated networking…");
269
269
  const nicName = `${vmName}VMNic`;
@@ -337,13 +337,13 @@ export function createCloudApi(registry) {
337
337
 
338
338
  app.get("/vm-sizes", (c) => {
339
339
  return c.json([
340
- "Standard_D4s_v5",
341
- "Standard_D8s_v5",
342
- "Standard_D16s_v5",
343
- "Standard_D32s_v5",
344
- "Standard_D48s_v5",
345
- "Standard_D64s_v5",
346
- "Standard_D96s_v5",
340
+ "Standard_D2s_v3",
341
+ "Standard_D4s_v3",
342
+ "Standard_D8s_v3",
343
+ "Standard_D16s_v3",
344
+ "Standard_D32s_v3",
345
+ "Standard_D48s_v3",
346
+ "Standard_D64s_v3",
347
347
  ]);
348
348
  });
349
349
 
package/src/project.js CHANGED
@@ -41,14 +41,19 @@ function saveProjectRoot(root) {
41
41
  }
42
42
 
43
43
  export function rootDir(cwd = process.cwd()) {
44
-
45
- // Check FOUNDATION_ROOT env var first (explicit override)
44
+ // FOUNDATION_ROOT always wins — explicit override
46
45
  const envRoot = process.env.FOUNDATION_ROOT;
47
46
  if (envRoot && isFoundationRoot(envRoot)) {
48
47
  return path.resolve(envRoot);
49
48
  }
50
49
 
51
- // Check ~/.fops.json for saved project root (projectRoot or foundationRoot)
50
+ const dir = path.resolve(cwd);
51
+ if (isFoundationRoot(dir)) {
52
+ saveProjectRoot(dir);
53
+ return dir;
54
+ }
55
+
56
+ // Fallback: ~/.fops.json saved project root
52
57
  try {
53
58
  const fopsConfig = JSON.parse(fs.readFileSync(path.join(os.homedir(), ".fops.json"), "utf8"));
54
59
  const configRoot = fopsConfig.foundationRoot || fopsConfig.projectRoot;
@@ -57,12 +62,6 @@ export function rootDir(cwd = process.cwd()) {
57
62
  }
58
63
  } catch {}
59
64
 
60
- const dir = path.resolve(cwd);
61
- if (isFoundationRoot(dir)) {
62
- saveProjectRoot(dir);
63
- return dir;
64
- }
65
-
66
65
  // Look one level down — e.g. cwd is ~/code/foundation, find foundation-compose/ inside
67
66
  try {
68
67
  const entries = fs.readdirSync(dir, { withFileTypes: true });