@meshxdata/fops 0.1.41 → 0.1.43

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/CHANGELOG.md CHANGED
@@ -1,3 +1,377 @@
1
+ ## [0.1.43] - 2026-03-11
2
+
3
+ - Mlflow and azure plugin fix (176881f)
4
+ - fix lifecycle (a2cb9e7)
5
+ - callback url for localhost (821fb94)
6
+ - disable 4 scaffolding plugin by default. (bfb2b76)
7
+ - jaccard improvements (b7494a0)
8
+ - refactor azure plugin (68dfef4)
9
+ - refactor azure plugin (b24a008)
10
+ - fix trino catalog missing (4928a55)
11
+ - v36 bump and changelog generation on openai (37a0440)
12
+ - v36 bump and changelog generation on openai (a3b02d9)
13
+ - bump (a990058)
14
+ - status bar fix and new plugin for ttyd (27dde1e)
15
+ - file demo and tray (1a3e704)
16
+ - electron app (59ad0bb)
17
+ - compose and fops file plugin (1cf0e81)
18
+ - bump (346ffc1)
19
+ - localhost replaced by 127.0.0.1 (82b9f30)
20
+ - .29 (587b0e1)
21
+ - improve up down and bootstrap script (b79ebaf)
22
+ - checksum (22c8086)
23
+ - checksum (96b434f)
24
+ - checksum (15ed3c0)
25
+ - checksum (8a6543a)
26
+ - bump embed trino linksg (8440504)
27
+ - bump data (765ffd9)
28
+ - bump (cb8b232)
29
+ - broken tests (c532229)
30
+ - release 0.1.18, preflight checks (d902249)
31
+ - fix compute display bug (d10f5d9)
32
+ - cleanup packer files (6330f18)
33
+ - plan mode (cb36a8a)
34
+ - bump to 0.1.16 - agent ui (41ac1a2)
35
+ - bump to 0.1.15 - agent ui (4ebe2e1)
36
+ - bump to 0.1.14 (6c3a7fa)
37
+ - bump to 0.1.13 (8db570f)
38
+ - release 0.1.12 (c1c79e5)
39
+ - bump (11aa3b0)
40
+ - git keep and bump tui (be1678e)
41
+ - skills, index, rrf, compacted context (100k > 10k) (7b2fffd)
42
+ - cloudflare and token consumption, graphs indexing (0ad9eec)
43
+ - bump storage default (22c83ba)
44
+ - storage fix (68a22a0)
45
+ - skills update (7f56500)
46
+ - v9 bump (3864446)
47
+ - bump (c95eedc)
48
+ - rrf (dbf8c95)
49
+ - feat: warning when running predictions (95e8c52)
50
+ - feat: support for local predictions (45cf26b)
51
+ - feat: wip support for predictions + mlflow (3457052)
52
+ - add Reciprocal Rank Fusion (RRF) to knowledge and skill retrieval (61549bc)
53
+ - validate CSV headers in compute_run readiness check (a8c7a43)
54
+ - fix corrupted Iceberg metadata: probe tables + force cleanup on re-apply (50578af)
55
+ - enforce: never use foundation_apply to fix broken products (2e049bf)
56
+ - update SKILL.md with complete tool reference for knowledge retrieval (30b1924)
57
+ - add storage read, input DP table probe, and compute_run improvements (34e6c4c)
58
+ - skills update (1220385)
59
+ - skills update (bb66958)
60
+ - some tui improvement andd tools apply overwrite (e90c35c)
61
+ - skills update (e9227a1)
62
+ - skills update (669c4b3)
63
+ - fix plugin pre-flight checks (f741743)
64
+ - increase agent context (6479aaa)
65
+ - skills and init sql fixes (5fce35e)
66
+ - checksum (3518b56)
67
+ - penging job limit (a139861)
68
+ - checksum (575d28c)
69
+ - bump (92049ba)
70
+ - fix bug per tab status (0a33657)
71
+ - fix bug per tab status (50457c6)
72
+ - checksumming (0ad842e)
73
+ - shot af mardkwon overlapping (51f63b9)
74
+ - add spark dockerfile for multiarch builds (95abbd1)
75
+ - fix plugin initialization (16b9782)
76
+ - split index.js (50902a2)
77
+ - cloudflare cidr (cc4e021)
78
+ - cloduflare restrictions (2f6ba2d)
79
+ - sequential start (86b496e)
80
+ - sequential start (4930fe1)
81
+ - sequential start (353f014)
82
+ - qa tests (2dc6a1a)
83
+ - bump sha for .85 (dc2edfe)
84
+ - preserve env on sudo (7831227)
85
+ - bump sha for .84 (6c052f9)
86
+ - non interactive for azure vms (0aa8a2f)
87
+ - keep .env if present (d072450)
88
+ - bump (7a8e732)
89
+ - ensure opa is on compose if not set (f4a5228)
90
+ - checksum bump (a2ccc20)
91
+ - netrc defensive checks (a0b0ccc)
92
+ - netrc defensive checks (ae37403)
93
+ - checksum (ec45d11)
94
+ - update sync and fix up (7f9af72)
95
+ - expand test for azure and add new per app tag support (388a168)
96
+ - checksum on update (44005fc)
97
+ - cleanup for later (15e5313)
98
+ - cleanup for later (11c9597)
99
+ - switch branch feature (822fecc)
100
+ - add pull (d1c19ab)
101
+ - Bump hono from 4.11.9 to 4.12.0 in /operator-cli (ad25144)
102
+ - tests (f180a9a)
103
+ - cleanup (39c49a3)
104
+ - registry (7b7126a)
105
+ - reconcile kafka (832d0db)
106
+ - gh login bug (025886c)
107
+ - cleanup (bb96cab)
108
+ - strip envs from process (2421180)
109
+ - force use of gh creds not tokens in envs var (fff7787)
110
+ - resolve import between npm installs and npm link (79522e1)
111
+ - fix gh scope and azure states (afd846c)
112
+ - refactoring (da50352)
113
+ - split fops repo (d447638)
114
+ - aks (b791f8f)
115
+ - refactor azure (67d3bad)
116
+ - wildcard (391f023)
117
+ - azure plugin (c074074)
118
+ - zap (d7e6e7f)
119
+ - fix knock (cf89c05)
120
+ - azure (4adec98)
121
+ - Bump tar from 7.5.7 to 7.5.9 in /operator-cli (e41e98e)
122
+ - azure stack index.js split (de12272)
123
+ - Bump ajv from 8.17.1 to 8.18.0 in /operator-cli (76da21f)
124
+ - packer (9665fbc)
125
+ - remove stack api (db0fd4d)
126
+ - packer cleanup (fe1bf14)
127
+ - force refresh token (3a3d7e2)
128
+ - provision shell (2ad505f)
129
+ - azure vm management (91dcb31)
130
+ - azure specific (2b0cca8)
131
+ - azure packer (12175b8)
132
+ - init hashed pwd (db8523c)
133
+ - packer (5b5c7c4)
134
+ - doctor for azure vm (ed524fa)
135
+ - packer and 1pwd (c6d053e)
136
+ - split big index.js (dc85a1b)
137
+ - kafka volume update (21815ec)
138
+ - fix openai azure tools confirmation and flow (0118cd1)
139
+ - nighly fixx, test fix (5e0d04f)
140
+ - open ai training (cdc494a)
141
+ - openai integration in azure (1ca1475)
142
+ - ci (672cea9)
143
+ - refresh ghcr creds (4220c48)
144
+ - cleaned up version (1a0074f)
145
+ - traefik on ghcr and templates (8e31a05)
146
+ - apply fcl (e78911f)
147
+ - demo landscape (dd205fe)
148
+ - smarter login and schema (1af514f)
149
+ - no down before up unless something broke (56b1132)
150
+ - dai, reconcile failed containers (12907fa)
151
+ - reconcile dead container (7da75e4)
152
+ - defensive around storage buckets dir (b98871d)
153
+ - defensive around storage buckets dir (e86e132)
154
+ - gear in for multiarch (bf3fa3e)
155
+ - up autofix (99c7f89)
156
+ - autofix stale containers on up (43c7d0f)
157
+ - shared sessions fix (5de1359)
158
+ - share sessions between ui and tui (8321391)
159
+ - fix chat view display details (e263996)
160
+ - fix chat view display details (9babdda)
161
+ - tui up fixes (86e9f17)
162
+ - fix commands init (442538b)
163
+ - enable k3s profile (b2dcfc8)
164
+ - test up till job creation (656d388)
165
+ - tui fixes (0599779)
166
+ - cleanup (27731f0)
167
+ - train (90bf559)
168
+ - training (f809bf6)
169
+ - training (ba2b836)
170
+ - training (6fc5267)
171
+ - training (4af8ac9)
172
+ - fix build script (bd82836)
173
+ - infra test (5b79815)
174
+ - infra test (3a0ac05)
175
+ - infra test (e5c67b5)
176
+ - tests (ae7b621)
177
+ - tests (c09ae6a)
178
+ - update tui (4784153)
179
+ - training (0a5a330)
180
+ - tui (df4dd4a)
181
+ - pkg builds (4dc9993)
182
+ - also source env for creds (9a17d8f)
183
+ - fcl support (e8a5743)
184
+ - fcl support (8d6b6cd)
185
+
186
+ # Changelog
187
+
188
+ All notable changes to @meshxdata/fops (Foundation Operator CLI) are documented here.
189
+
190
+ ## [0.1.42] - 2026-03-11
191
+
192
+ - Mlflow and azure plugin fix (176881f)
193
+ - fix lifecycle (a2cb9e7)
194
+ - callback url for localhost (821fb94)
195
+ - disable 4 scaffolding plugin by default. (bfb2b76)
196
+ - jaccard improvements (b7494a0)
197
+ - refactor azure plugin (68dfef4)
198
+ - refactor azure plugin (b24a008)
199
+ - fix trino catalog missing (4928a55)
200
+ - v36 bump and changelog generation on openai (37a0440)
201
+ - v36 bump and changelog generation on openai (a3b02d9)
202
+ - bump (a990058)
203
+ - status bar fix and new plugin for ttyd (27dde1e)
204
+ - file demo and tray (1a3e704)
205
+ - electron app (59ad0bb)
206
+ - compose and fops file plugin (1cf0e81)
207
+ - bump (346ffc1)
208
+ - localhost replaced by 127.0.0.1 (82b9f30)
209
+ - .29 (587b0e1)
210
+ - improve up down and bootstrap script (b79ebaf)
211
+ - checksum (22c8086)
212
+ - checksum (96b434f)
213
+ - checksum (15ed3c0)
214
+ - checksum (8a6543a)
215
+ - bump embed trino linksg (8440504)
216
+ - bump data (765ffd9)
217
+ - bump (cb8b232)
218
+ - broken tests (c532229)
219
+ - release 0.1.18, preflight checks (d902249)
220
+ - fix compute display bug (d10f5d9)
221
+ - cleanup packer files (6330f18)
222
+ - plan mode (cb36a8a)
223
+ - bump to 0.1.16 - agent ui (41ac1a2)
224
+ - bump to 0.1.15 - agent ui (4ebe2e1)
225
+ - bump to 0.1.14 (6c3a7fa)
226
+ - bump to 0.1.13 (8db570f)
227
+ - release 0.1.12 (c1c79e5)
228
+ - bump (11aa3b0)
229
+ - git keep and bump tui (be1678e)
230
+ - skills, index, rrf, compacted context (100k > 10k) (7b2fffd)
231
+ - cloudflare and token consumption, graphs indexing (0ad9eec)
232
+ - bump storage default (22c83ba)
233
+ - storage fix (68a22a0)
234
+ - skills update (7f56500)
235
+ - v9 bump (3864446)
236
+ - bump (c95eedc)
237
+ - rrf (dbf8c95)
238
+ - feat: warning when running predictions (95e8c52)
239
+ - feat: support for local predictions (45cf26b)
240
+ - feat: wip support for predictions + mlflow (3457052)
241
+ - add Reciprocal Rank Fusion (RRF) to knowledge and skill retrieval (61549bc)
242
+ - validate CSV headers in compute_run readiness check (a8c7a43)
243
+ - fix corrupted Iceberg metadata: probe tables + force cleanup on re-apply (50578af)
244
+ - enforce: never use foundation_apply to fix broken products (2e049bf)
245
+ - update SKILL.md with complete tool reference for knowledge retrieval (30b1924)
246
+ - add storage read, input DP table probe, and compute_run improvements (34e6c4c)
247
+ - skills update (1220385)
248
+ - skills update (bb66958)
249
+ - some tui improvement andd tools apply overwrite (e90c35c)
250
+ - skills update (e9227a1)
251
+ - skills update (669c4b3)
252
+ - fix plugin pre-flight checks (f741743)
253
+ - increase agent context (6479aaa)
254
+ - skills and init sql fixes (5fce35e)
255
+ - checksum (3518b56)
256
+ - penging job limit (a139861)
257
+ - checksum (575d28c)
258
+ - bump (92049ba)
259
+ - fix bug per tab status (0a33657)
260
+ - fix bug per tab status (50457c6)
261
+ - checksumming (0ad842e)
262
+ - shot af mardkwon overlapping (51f63b9)
263
+ - add spark dockerfile for multiarch builds (95abbd1)
264
+ - fix plugin initialization (16b9782)
265
+ - split index.js (50902a2)
266
+ - cloudflare cidr (cc4e021)
267
+ - cloduflare restrictions (2f6ba2d)
268
+ - sequential start (86b496e)
269
+ - sequential start (4930fe1)
270
+ - sequential start (353f014)
271
+ - qa tests (2dc6a1a)
272
+ - bump sha for .85 (dc2edfe)
273
+ - preserve env on sudo (7831227)
274
+ - bump sha for .84 (6c052f9)
275
+ - non interactive for azure vms (0aa8a2f)
276
+ - keep .env if present (d072450)
277
+ - bump (7a8e732)
278
+ - ensure opa is on compose if not set (f4a5228)
279
+ - checksum bump (a2ccc20)
280
+ - netrc defensive checks (a0b0ccc)
281
+ - netrc defensive checks (ae37403)
282
+ - checksum (ec45d11)
283
+ - update sync and fix up (7f9af72)
284
+ - expand test for azure and add new per app tag support (388a168)
285
+ - checksum on update (44005fc)
286
+ - cleanup for later (15e5313)
287
+ - cleanup for later (11c9597)
288
+ - switch branch feature (822fecc)
289
+ - add pull (d1c19ab)
290
+ - Bump hono from 4.11.9 to 4.12.0 in /operator-cli (ad25144)
291
+ - tests (f180a9a)
292
+ - cleanup (39c49a3)
293
+ - registry (7b7126a)
294
+ - reconcile kafka (832d0db)
295
+ - gh login bug (025886c)
296
+ - cleanup (bb96cab)
297
+ - strip envs from process (2421180)
298
+ - force use of gh creds not tokens in envs var (fff7787)
299
+ - resolve import between npm installs and npm link (79522e1)
300
+ - fix gh scope and azure states (afd846c)
301
+ - refactoring (da50352)
302
+ - split fops repo (d447638)
303
+ - aks (b791f8f)
304
+ - refactor azure (67d3bad)
305
+ - wildcard (391f023)
306
+ - azure plugin (c074074)
307
+ - zap (d7e6e7f)
308
+ - fix knock (cf89c05)
309
+ - azure (4adec98)
310
+ - Bump tar from 7.5.7 to 7.5.9 in /operator-cli (e41e98e)
311
+ - azure stack index.js split (de12272)
312
+ - Bump ajv from 8.17.1 to 8.18.0 in /operator-cli (76da21f)
313
+ - packer (9665fbc)
314
+ - remove stack api (db0fd4d)
315
+ - packer cleanup (fe1bf14)
316
+ - force refresh token (3a3d7e2)
317
+ - provision shell (2ad505f)
318
+ - azure vm management (91dcb31)
319
+ - azure specific (2b0cca8)
320
+ - azure packer (12175b8)
321
+ - init hashed pwd (db8523c)
322
+ - packer (5b5c7c4)
323
+ - doctor for azure vm (ed524fa)
324
+ - packer and 1pwd (c6d053e)
325
+ - split big index.js (dc85a1b)
326
+ - kafka volume update (21815ec)
327
+ - fix openai azure tools confirmation and flow (0118cd1)
328
+ - nighly fixx, test fix (5e0d04f)
329
+ - open ai training (cdc494a)
330
+ - openai integration in azure (1ca1475)
331
+ - ci (672cea9)
332
+ - refresh ghcr creds (4220c48)
333
+ - cleaned up version (1a0074f)
334
+ - traefik on ghcr and templates (8e31a05)
335
+ - apply fcl (e78911f)
336
+ - demo landscape (dd205fe)
337
+ - smarter login and schema (1af514f)
338
+ - no down before up unless something broke (56b1132)
339
+ - dai, reconcile failed containers (12907fa)
340
+ - reconcile dead container (7da75e4)
341
+ - defensive around storage buckets dir (b98871d)
342
+ - defensive around storage buckets dir (e86e132)
343
+ - gear in for multiarch (bf3fa3e)
344
+ - up autofix (99c7f89)
345
+ - autofix stale containers on up (43c7d0f)
346
+ - shared sessions fix (5de1359)
347
+ - share sessions between ui and tui (8321391)
348
+ - fix chat view display details (e263996)
349
+ - fix chat view display details (9babdda)
350
+ - tui up fixes (86e9f17)
351
+ - fix commands init (442538b)
352
+ - enable k3s profile (b2dcfc8)
353
+ - test up till job creation (656d388)
354
+ - tui fixes (0599779)
355
+ - cleanup (27731f0)
356
+ - train (90bf559)
357
+ - training (f809bf6)
358
+ - training (ba2b836)
359
+ - training (6fc5267)
360
+ - training (4af8ac9)
361
+ - fix build script (bd82836)
362
+ - infra test (5b79815)
363
+ - infra test (3a0ac05)
364
+ - infra test (e5c67b5)
365
+ - tests (ae7b621)
366
+ - tests (c09ae6a)
367
+ - update tui (4784153)
368
+ - training (0a5a330)
369
+ - tui (df4dd4a)
370
+ - pkg builds (4dc9993)
371
+ - also source env for creds (9a17d8f)
372
+ - fcl support (e8a5743)
373
+ - fcl support (8d6b6cd)
374
+
1
375
  ## [0.1.41] - 2026-03-11
2
376
 
3
377
  - fix lifecycle (a2cb9e7)
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@meshxdata/fops",
3
- "version": "0.1.41",
3
+ "version": "0.1.43",
4
4
  "description": "CLI to install and manage data mesh platforms",
5
5
  "keywords": [
6
6
  "fops",
@@ -92,7 +92,7 @@ export function resolveAuth0Config() {
92
92
  * Tries the backend /iam/login first, then falls back to Auth0 ROPC.
93
93
  */
94
94
  export async function authenticateVm(vmUrl, ip, creds) {
95
- if (creds.bearerToken) return creds.bearerToken;
95
+ if (creds.bearerToken && !isJwtExpired(creds.bearerToken)) return creds.bearerToken;
96
96
 
97
97
  const hasDomain = vmUrl && !vmUrl.match(/^https?:\/\/\d+\.\d+\.\d+\.\d+/);
98
98
  const apiUrls = hasDomain
@@ -148,6 +148,28 @@ export function isJwt(token) {
148
148
  return token && token.split(".").length === 3;
149
149
  }
150
150
 
151
+ /**
152
+ * Decode a JWT payload without verification (for expiry checks only).
153
+ * Returns the parsed payload or null on failure.
154
+ */
155
+ function decodeJwtPayload(token) {
156
+ try {
157
+ const parts = token.split(".");
158
+ if (parts.length !== 3) return null;
159
+ const payload = Buffer.from(parts[1], "base64url").toString("utf8");
160
+ return JSON.parse(payload);
161
+ } catch { return null; }
162
+ }
163
+
164
+ /**
165
+ * Check if a JWT is expired (with 60s grace buffer).
166
+ */
167
+ export function isJwtExpired(token) {
168
+ const payload = decodeJwtPayload(token);
169
+ if (!payload?.exp) return false; // no exp claim → assume valid
170
+ return Date.now() / 1000 > payload.exp - 60;
171
+ }
172
+
151
173
  /**
152
174
  * Resolve a valid JWT bearer token for a remote VM/cluster.
153
175
  * Auth chain: local bearer → pre-auth /iam/login → Auth0 ROPC → SSH fetch from VM.
@@ -165,9 +187,12 @@ export async function resolveRemoteAuth(opts = {}) {
165
187
  let qaPass = creds?.password || process.env.QA_PASSWORD || "";
166
188
  let bearerToken = creds?.bearerToken || "";
167
189
 
168
- // 1) Use local bearer if it's a valid JWT
190
+ // 1) Use local bearer if it's a valid, non-expired JWT
169
191
  if (bearerToken && isJwt(bearerToken)) {
170
- return { bearerToken, qaUser, qaPass, useTokenMode: true };
192
+ if (!isJwtExpired(bearerToken)) {
193
+ return { bearerToken, qaUser, qaPass, useTokenMode: true };
194
+ }
195
+ log(chalk.dim(" Local bearer token expired — refreshing…"));
171
196
  }
172
197
  bearerToken = "";
173
198
 
@@ -224,32 +249,8 @@ export async function resolveRemoteAuth(opts = {}) {
224
249
  if (resp.ok) {
225
250
  const data = await resp.json();
226
251
  if (data.access_token) {
227
- // Validate the token against the target API before committing to it.
228
- // Local Auth0 config may have a different audience than the remote VM expects.
229
- let tokenValid = true;
230
- if (apiUrl) {
231
- try {
232
- const prev = process.env.NODE_TLS_REJECT_UNAUTHORIZED;
233
- process.env.NODE_TLS_REJECT_UNAUTHORIZED = "0";
234
- try {
235
- const check = await fetch(`${apiUrl}/iam/me`, {
236
- headers: { Authorization: `Bearer ${data.access_token}` },
237
- signal: AbortSignal.timeout(8_000),
238
- });
239
- if (check.status === 401 || check.status === 403) {
240
- tokenValid = false;
241
- log(chalk.dim(` Auth0 token rejected by API (wrong audience) — trying SSH fallback…`));
242
- }
243
- } finally {
244
- if (prev === undefined) delete process.env.NODE_TLS_REJECT_UNAUTHORIZED;
245
- else process.env.NODE_TLS_REJECT_UNAUTHORIZED = prev;
246
- }
247
- } catch { /* network error — assume token is OK */ }
248
- }
249
- if (tokenValid) {
250
- log(chalk.green(` ✓ Authenticated as ${qaUser} via Auth0`));
251
- return { bearerToken: data.access_token, qaUser, qaPass, useTokenMode: true };
252
- }
252
+ log(chalk.green(` ✓ Authenticated as ${qaUser} via Auth0`));
253
+ return { bearerToken: data.access_token, qaUser, qaPass, useTokenMode: true };
253
254
  }
254
255
  } else {
255
256
  log(chalk.dim(` Auth0 rejected: HTTP ${resp.status}`));
@@ -279,8 +280,11 @@ export async function resolveRemoteAuth(opts = {}) {
279
280
 
280
281
  const remoteToken = remoteEnv.BEARER_TOKEN;
281
282
  if (remoteToken && isJwt(remoteToken)) {
282
- log(chalk.green(" ✓ Got JWT bearer token from VM"));
283
- return { bearerToken: remoteToken, qaUser, qaPass, useTokenMode: true };
283
+ if (!isJwtExpired(remoteToken)) {
284
+ log(chalk.green(" ✓ Got JWT bearer token from VM"));
285
+ return { bearerToken: remoteToken, qaUser, qaPass, useTokenMode: true };
286
+ }
287
+ log(chalk.dim(" Remote bearer token expired — trying VM Auth0…"));
284
288
  }
285
289
 
286
290
  if (remoteEnv.MX_AUTH0_DOMAIN && remoteEnv.MX_AUTH0_CLIENT_ID) {
@@ -369,10 +369,33 @@ export async function resolvePublicIp(execa, rg, vmName, fallback) {
369
369
  "vm", "list-ip-addresses", "-g", rg, "-n", vmName, "--output", "json",
370
370
  ], { reject: false, timeout: 15000 });
371
371
  const ips = JSON.parse(stdout);
372
- return ips?.[0]?.virtualMachine?.network?.publicIpAddresses?.[0]?.ipAddress || fallback || "";
373
- } catch {
374
- return fallback || "";
375
- }
372
+ const ip = ips?.[0]?.virtualMachine?.network?.publicIpAddresses?.[0]?.ipAddress;
373
+ if (ip) return ip;
374
+ } catch {}
375
+ // Fallback: query the VM's NIC directly (az vm list-ip-addresses can miss attached IPs)
376
+ try {
377
+ const { stdout: nicOut } = await execa("az", [
378
+ "vm", "show", "-g", rg, "-n", vmName,
379
+ "--query", "networkProfile.networkInterfaces[0].id", "--output", "tsv",
380
+ ], { reject: false, timeout: 10000 });
381
+ const nicId = nicOut?.trim();
382
+ if (nicId) {
383
+ const { stdout: ipOut } = await execa("az", [
384
+ "network", "nic", "show", "--ids", nicId,
385
+ "--query", "ipConfigurations[0].publicIPAddress.id", "--output", "tsv",
386
+ ], { reject: false, timeout: 10000 });
387
+ const pubId = ipOut?.trim();
388
+ if (pubId) {
389
+ const { stdout: addrOut } = await execa("az", [
390
+ "network", "public-ip", "show", "--ids", pubId,
391
+ "--query", "ipAddress", "--output", "tsv",
392
+ ], { reject: false, timeout: 10000 });
393
+ const addr = addrOut?.trim();
394
+ if (addr) return addr;
395
+ }
396
+ }
397
+ } catch {}
398
+ return fallback || "";
376
399
  }
377
400
 
378
401
  // ── GitHub token helpers ────────────────────────────────────────────────────
@@ -2394,6 +2394,28 @@ export async function azureList(opts = {}) {
2394
2394
  return;
2395
2395
  }
2396
2396
 
2397
+ // Resolve missing public IPs from Azure before sync (so sync can SSH into VMs)
2398
+ const needIp = vmNames.filter((n) => !vms[n]?.publicIp && vms[n]?.resourceGroup);
2399
+ if (needIp.length > 0) {
2400
+ try {
2401
+ const execa = await lazyExeca();
2402
+ await ensureAzCli(execa);
2403
+ const resolved = await Promise.all(
2404
+ needIp.map(async (name) => {
2405
+ const ip = await resolvePublicIp(execa, vms[name].resourceGroup, name, null);
2406
+ return { name, ip };
2407
+ }),
2408
+ );
2409
+ for (const { name, ip } of resolved) {
2410
+ if (ip) writeVmState(name, { publicIp: ip });
2411
+ }
2412
+ const updated = resolved.filter((r) => r.ip).length;
2413
+ if (updated > 0) {
2414
+ ({ vms } = listVms());
2415
+ }
2416
+ } catch { /* ignore */ }
2417
+ }
2418
+
2397
2419
  // Use cache if fresh, otherwise try shared tags, then fall back to full sync
2398
2420
  const forceLive = opts.live;
2399
2421
  let cache = readCache();
@@ -2429,31 +2451,6 @@ export async function azureList(opts = {}) {
2429
2451
  ? DIM(` (synced ${timeSince(cacheTime)} ago${sourceLabel})`)
2430
2452
  : "";
2431
2453
 
2432
- // Resolve missing public IPs from Azure (e.g. VM added by discover but IP not in state yet)
2433
- const needIp = vmNames.filter((n) => !vms[n]?.publicIp && !cachedVms[n]?.publicIp && vms[n]?.resourceGroup);
2434
- if (needIp.length > 0) {
2435
- try {
2436
- const execa = await lazyExeca();
2437
- await ensureAzCli(execa);
2438
- const resolved = await Promise.all(
2439
- needIp.map(async (name) => {
2440
- const ip = await resolvePublicIp(execa, vms[name].resourceGroup, name, null);
2441
- return { name, ip };
2442
- }),
2443
- );
2444
- for (const { name, ip } of resolved) {
2445
- if (ip) {
2446
- writeVmState(name, { publicIp: ip });
2447
- if (cachedVms[name]) cachedVms[name].publicIp = ip;
2448
- }
2449
- }
2450
- const updated = resolved.filter((r) => r.ip).length;
2451
- if (updated > 0) {
2452
- ({ vms } = listVms());
2453
- }
2454
- } catch { /* ignore */ }
2455
- }
2456
-
2457
2454
  // ── VM sizes (one az call for all fops-managed VMs) ───────────────────────
2458
2455
  let vmSizes = {};
2459
2456
  if (vmNames.length > 0) {
@@ -118,6 +118,10 @@ function _tryRecover() {
118
118
  if (current[k] !== undefined) recovered.state[k] = current[k];
119
119
  }
120
120
  } catch { /* current file unreadable or invalid JSON, skip merge */ }
121
+ // Recompute checksum after merge so next readState() doesn't see a mismatch
122
+ if (recovered.state._meta) {
123
+ recovered.state._meta.checksum = _checksum(JSON.stringify(_stateBody(recovered.state)));
124
+ }
121
125
  try { _atomicWrite(STATE_PATH, JSON.stringify(recovered.state, null, 2) + "\n"); } catch {}
122
126
  return recovered.state;
123
127
  }
@@ -15,14 +15,17 @@ export function registerTestCommands(azure) {
15
15
  .description("Run QA automation tests locally against a remote VM")
16
16
  .option("--vm-name <name>", "Target VM (default: active)")
17
17
  .action(async (name, opts) => {
18
- const { requireVmState, knockForVm } = await import("../azure.js");
19
- const { resolveCliSrc } = await import("../azure-helpers.js");
18
+ const { resolveCliSrc, lazyExeca, ensureAzCli, ensureAzAuth, resolvePublicIp } = await import("../azure-helpers.js");
19
+ const { requireVmState, knockForVm, sshCmd, MUX_OPTS } = await import("../azure.js");
20
20
  const { rootDir } = await import(resolveCliSrc("project.js"));
21
21
  const fsp = await import("node:fs/promises");
22
22
  const path = await import("node:path");
23
23
 
24
24
  const state = requireVmState(opts.vmName || name);
25
- const ip = state.publicIp;
25
+ const execa = await lazyExeca();
26
+ await ensureAzCli(execa);
27
+ await ensureAzAuth(execa);
28
+ const ip = await resolvePublicIp(execa, state.resourceGroup, state.vmName, state.publicIp);
26
29
  if (!ip) {
27
30
  console.error(chalk.red("\n No IP address. Is the VM running? Try: fops azure start\n"));
28
31
  process.exit(1);
@@ -45,13 +48,11 @@ export function registerTestCommands(azure) {
45
48
 
46
49
  const vmUrl = state.publicUrl || `https://${ip}`;
47
50
  const apiUrl = `${vmUrl}/api`;
48
- const { execa: execaFn } = await import("execa");
49
- const { sshCmd, MUX_OPTS } = await import("../azure.js");
50
51
 
51
52
  console.log(chalk.dim(` Authenticating against ${vmUrl}…`));
52
53
  const auth = await resolveRemoteAuth({
53
54
  apiUrl, ip, vmState: state,
54
- execaFn, sshCmd, knockForVm, suppressTlsWarning,
55
+ execaFn: execa, sshCmd, knockForVm, suppressTlsWarning,
55
56
  });
56
57
  let { bearerToken, qaUser, qaPass, useTokenMode } = auth;
57
58
 
@@ -102,8 +103,8 @@ export function registerTestCommands(azure) {
102
103
  await fsp.access(path.join(qaDir, "venv"));
103
104
  } catch {
104
105
  console.log(chalk.cyan(" Setting up QA automation environment…"));
105
- await execaFn("python3", ["-m", "venv", "venv"], { cwd: qaDir, stdio: "inherit" });
106
- await execaFn("bash", ["-c", "source venv/bin/activate && pip install -r requirements.txt && playwright install"], { cwd: qaDir, stdio: "inherit" });
106
+ await execa("python3", ["-m", "venv", "venv"], { cwd: qaDir, stdio: "inherit" });
107
+ await execa("bash", ["-c", "source venv/bin/activate && pip install -r requirements.txt && playwright install"], { cwd: qaDir, stdio: "inherit" });
107
108
  }
108
109
 
109
110
  // Knock to ensure VM is reachable
@@ -156,7 +157,7 @@ export function registerTestCommands(azure) {
156
157
  }
157
158
 
158
159
  const startMs = Date.now();
159
- const proc = execaFn(
160
+ const proc = execa(
160
161
  "bash",
161
162
  ["-c", `source venv/bin/activate && pytest ${pytestArgs}`],
162
163
  { cwd: qaDir, timeout: 600_000, reject: false, env: testEnv },