robot-resources 1.9.4 → 1.9.6

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/lib/service.js CHANGED
@@ -216,6 +216,28 @@ WantedBy=default.target
216
216
  `;
217
217
  }
218
218
 
219
+ /**
220
+ * Check whether linger is enabled for the current user.
221
+ *
222
+ * Without linger, systemd-user services are torn down when the user logs
223
+ * out (SSH disconnect, login manager logout). On the Finland signup
224
+ * (2026-04-23) this was the root cause: 3 heartbeats, then session ended,
225
+ * then the router died with the session.
226
+ */
227
+ function isLingerEnabled() {
228
+ try {
229
+ const user = process.env.USER || process.env.LOGNAME;
230
+ if (!user) return false;
231
+ const res = spawnSync('loginctl', ['show-user', user, '--property=Linger'], {
232
+ stdio: 'pipe', encoding: 'utf-8',
233
+ });
234
+ if (res.status !== 0) return false;
235
+ return /^Linger=yes\s*$/m.test(res.stdout || '');
236
+ } catch {
237
+ return false;
238
+ }
239
+ }
240
+
219
241
  function installSystemdUser(venvPythonPath) {
220
242
  const unitPath = getUserUnitPath();
221
243
  const logsDir = join(homedir(), '.robot-resources', 'logs');
@@ -232,12 +254,17 @@ function installSystemdUser(venvPythonPath) {
232
254
  execSync('systemctl --user enable robot-resources-router.service', { stdio: 'pipe' });
233
255
  execSync('systemctl --user start robot-resources-router.service', { stdio: 'pipe' });
234
256
 
235
- // Enable linger so the service survives SSH disconnects (critical for VMs)
257
+ // Enable linger so the service survives SSH disconnects (critical for VMs).
258
+ // On many distros this needs polkit auth and silently no-ops from a
259
+ // non-interactive shell — we attempt it then VERIFY the result.
260
+ let lingerEnabled = false;
236
261
  try {
237
262
  execSync('loginctl enable-linger', { stdio: 'pipe' });
238
263
  } catch {
239
- // Non-fatal linger may not be available (e.g. no loginctl)
264
+ // fall through to verification
240
265
  }
266
+ lingerEnabled = isLingerEnabled();
267
+ return { lingerEnabled };
241
268
  }
242
269
 
243
270
  function uninstallSystemdUser() {
@@ -603,8 +630,29 @@ export function installService(venvPythonPath) {
603
630
  }
604
631
 
605
632
  // mode === 'user'
606
- installSystemdUser(venvPythonPath);
607
- return { type: 'systemd-user', path: getUserUnitPath() };
633
+ const { lingerEnabled } = installSystemdUser(venvPythonPath);
634
+
635
+ // Belt-and-suspenders: ALSO install crontab @reboot so the router comes
636
+ // back on reboot even if linger isn't taking effect (polkit denied,
637
+ // container restrictions, etc.). Idempotent — removes any existing
638
+ // RR crontab entry before adding the fresh one. Safe to call even
639
+ // when crontab is absent (we skip silently).
640
+ let crontabFallback = false;
641
+ if (hasCrontab()) {
642
+ try {
643
+ installCrontab(venvPythonPath);
644
+ crontabFallback = true;
645
+ } catch {
646
+ // Non-fatal — systemd-user still works while user is logged in.
647
+ }
648
+ }
649
+
650
+ return {
651
+ type: 'systemd-user',
652
+ path: getUserUnitPath(),
653
+ lingerEnabled,
654
+ crontabFallback,
655
+ };
608
656
  }
609
657
 
610
658
  if (process.platform === 'win32') {
@@ -624,10 +672,12 @@ export function installService(venvPythonPath) {
624
672
  export function uninstallService() {
625
673
  if (process.platform === 'darwin') return uninstallLaunchd();
626
674
  if (process.platform === 'linux') {
627
- // Clean up whichever variant is installed.
628
- if (existsSync(SYSTEM_UNIT_PATH)) return uninstallSystemdSystem();
629
- if (existsSync(getUserUnitPath())) return uninstallSystemdUser();
630
- if (isCrontabInstalled()) return uninstallCrontab();
675
+ // Clean up whatever variants are installed. systemd-user users may
676
+ // also have a crontab belt installed alongside — remove both.
677
+ if (existsSync(SYSTEM_UNIT_PATH)) uninstallSystemdSystem();
678
+ if (existsSync(getUserUnitPath())) uninstallSystemdUser();
679
+ if (isCrontabInstalled()) uninstallCrontab();
680
+ return;
631
681
  }
632
682
  if (process.platform === 'win32') return uninstallTaskScheduler();
633
683
  }
package/lib/wizard.js CHANGED
@@ -1,6 +1,6 @@
1
1
  import { readFileSync, writeFileSync, mkdirSync } from 'node:fs';
2
2
  import { join } from 'node:path';
3
- import { homedir, hostname } from 'node:os';
3
+ import { homedir, hostname, release as osRelease } from 'node:os';
4
4
  import { readConfig, writeConfig } from '@robot-resources/cli-core/config.mjs';
5
5
  import { findPython, isPortAvailable, isHeadless, isOpenClawInstalled } from './detect.js';
6
6
  import { getOrCreateMachineId } from './machine-id.js';
@@ -69,6 +69,8 @@ function classifyRouterError(err) {
69
69
  export async function runWizard({ nonInteractive = false } = {}) {
70
70
  header();
71
71
 
72
+ const wizardStartMs = Date.now();
73
+
72
74
  const results = {
73
75
  auth: false,
74
76
  authMethod: null, // 'config' | 'apikey' | 'github'
@@ -76,6 +78,18 @@ export async function runWizard({ nonInteractive = false } = {}) {
76
78
  routerError: null,
77
79
  providerKeys: false,
78
80
  service: false,
81
+ // Diagnostic fields populated as the wizard progresses. All are sent
82
+ // in install_complete (success AND failure) so we can distinguish
83
+ // "pip installed but router never served a request" from a real
84
+ // working setup in post-hoc telemetry.
85
+ serviceType: null,
86
+ lingerEnabled: null,
87
+ crontabFallback: null,
88
+ pluginInstalled: false,
89
+ openclawDetected: false,
90
+ openclawConfigPatched: false,
91
+ scraperMcpRegistered: false,
92
+ healthCheck: { attempted: false },
79
93
  };
80
94
 
81
95
  // ── Step 0: Provision API key (before anything else) ────────────────────
@@ -222,12 +236,25 @@ export async function runWizard({ nonInteractive = false } = {}) {
222
236
 
223
237
  try {
224
238
  const svc = installService(getVenvPythonPath());
239
+ results.serviceType = svc.type || null;
240
+ // systemd-user only survives user sessions with linger enabled; the
241
+ // installer now verifies the bit actually flipped and installs a
242
+ // crontab @reboot belt when it didn't. Capture both signals so we
243
+ // can tell which users land on a live-forever setup vs one that
244
+ // dies on logout.
245
+ results.lingerEnabled = svc.lingerEnabled ?? null;
246
+ results.crontabFallback = svc.crontabFallback ?? null;
225
247
  if (svc.type === 'skipped') {
226
248
  warn(svc.reason);
227
249
  results.service = false;
228
250
  } else {
229
251
  success(`Router registered as ${svc.type} service`);
230
252
  info(`Config: ${svc.path}`);
253
+ if (svc.type === 'systemd-user') {
254
+ if (svc.lingerEnabled) info('Linger enabled — router survives logout');
255
+ else warn('Linger not enabled — router may stop when you log out');
256
+ if (svc.crontabFallback) info('Crontab @reboot installed as fallback');
257
+ }
231
258
  info('Router will start automatically and restart on crash');
232
259
  results.service = true;
233
260
  }
@@ -247,6 +274,15 @@ export async function runWizard({ nonInteractive = false } = {}) {
247
274
  const toolResults = configureToolRouting();
248
275
  results.tools = toolResults;
249
276
 
277
+ // Surface OC-specific signals for install_complete diagnostics.
278
+ results.openclawDetected = isOpenClawInstalled();
279
+ const ocResult = toolResults.find((r) => r.name === 'OpenClaw');
280
+ if (ocResult) {
281
+ results.pluginInstalled =
282
+ ocResult.action === 'installed' || ocResult.action === 'already_configured';
283
+ results.openclawConfigPatched = Boolean(ocResult.configActivated);
284
+ }
285
+
250
286
  if (toolResults.length === 0) {
251
287
  info('No supported AI tools detected');
252
288
  info('Point your tool at http://localhost:3838 to enable cost optimization');
@@ -289,6 +325,7 @@ export async function runWizard({ nonInteractive = false } = {}) {
289
325
  if (scraperRegistered) {
290
326
  success('Scraper MCP registered in OpenClaw — scraper_compress_url(url) available');
291
327
  results.scraper = true;
328
+ results.scraperMcpRegistered = true;
292
329
  } else {
293
330
  // Either already registered, or no openclaw.json
294
331
  try {
@@ -296,6 +333,7 @@ export async function runWizard({ nonInteractive = false } = {}) {
296
333
  if (ocConfig?.mcp?.servers?.['robot-resources-scraper']) {
297
334
  success('Scraper MCP already registered in OpenClaw');
298
335
  results.scraper = true;
336
+ results.scraperMcpRegistered = true;
299
337
  }
300
338
  } catch {
301
339
  // No openclaw.json — not on OC, skip
@@ -303,13 +341,25 @@ export async function runWizard({ nonInteractive = false } = {}) {
303
341
  }
304
342
 
305
343
  // ── Step 4.5: Router Healthcheck ──────────────────────────────────────
344
+ //
345
+ // Verify the router is actually serving /health — not just that pip
346
+ // exited 0. Runs regardless of whether service registration succeeded:
347
+ // a router started by the wizard's spawn (or by a running OC) still
348
+ // deserves to be probed, and a router that pip-installed but fails to
349
+ // respond means the install is NOT actually complete.
350
+ //
351
+ // If we declared router=true from Step 1 (pip success) but /health
352
+ // won't answer, downgrade router→false with a dedicated error reason.
353
+ // This closes the "install looks green but nothing works" gap that
354
+ // produced 34 silent-after-install real users with no diagnostics.
306
355
 
307
- // Router: verify it's responding on localhost:3838
308
- if (results.service) {
356
+ if (results.router) {
309
357
  blank();
310
358
  step('Verifying Router is responding...');
311
359
 
312
- let healthy = false;
360
+ const checkStart = Date.now();
361
+ let healthData = null;
362
+ let lastErr = null;
313
363
  // Retry a few times — the service may need a moment to start
314
364
  for (let attempt = 0; attempt < 3; attempt++) {
315
365
  try {
@@ -319,20 +369,38 @@ export async function runWizard({ nonInteractive = false } = {}) {
319
369
  if (res.ok) {
320
370
  const data = await res.json();
321
371
  if (data.status === 'healthy' || data.status === 'degraded') {
322
- success(`Router healthy (v${data.version || 'unknown'})`);
323
- healthy = true;
372
+ healthData = data;
324
373
  break;
325
374
  }
326
375
  }
327
- } catch {
328
- // Wait before retrying
329
- await new Promise((r) => setTimeout(r, 2000));
376
+ } catch (err) {
377
+ lastErr = err?.message || String(err);
330
378
  }
379
+ if (attempt < 2) await new Promise((r) => setTimeout(r, 2000));
331
380
  }
332
381
 
333
- if (!healthy) {
334
- warn('Router not responding yet — it may need a few more seconds to start');
382
+ results.healthCheck = {
383
+ attempted: true,
384
+ passed: Boolean(healthData),
385
+ version: healthData?.version ?? null,
386
+ status: healthData?.status ?? null,
387
+ latencyMs: Date.now() - checkStart,
388
+ error: healthData ? null : lastErr,
389
+ };
390
+
391
+ if (healthData) {
392
+ success(`Router healthy (v${healthData.version || 'unknown'})`);
393
+ } else {
394
+ warn('Router not responding — marking install as failed.');
335
395
  info('Check manually: curl http://localhost:3838/health');
396
+ // Bug fix: previously we left router=true here. Now we downgrade
397
+ // so install_complete reflects reality and the error is classified.
398
+ results.router = false;
399
+ results.routerError = {
400
+ reason: 'health_check_failed',
401
+ detail: (lastErr || 'no response').slice(-500),
402
+ exitCode: null,
403
+ };
336
404
  }
337
405
  }
338
406
 
@@ -349,22 +417,33 @@ export async function runWizard({ nonInteractive = false } = {}) {
349
417
  try {
350
418
  const config = readConfig();
351
419
  const platformUrl = process.env.RR_PLATFORM_URL || 'https://api.robotresources.ai';
420
+ // Everything populated unconditionally so success installs carry
421
+ // the same diagnostic weight as failures. Prior versions only
422
+ // captured routerError+platform on failure, leaving 34 "successful"
423
+ // installs with no post-hoc signal to explain why they never emit
424
+ // another event.
352
425
  const installPayload = {
426
+ source: 'wizard',
353
427
  router: results.router || false,
354
428
  service: results.service || false,
355
429
  scraper: results.scraper || false,
356
- source: 'wizard',
430
+ platform: process.platform,
431
+ os_release: osRelease(),
432
+ node_version: process.version,
433
+ install_duration_ms: Date.now() - wizardStartMs,
434
+ python_source: results.pythonSource ?? null,
435
+ service_type: results.serviceType ?? null,
436
+ linger_enabled: results.lingerEnabled,
437
+ crontab_fallback: results.crontabFallback,
438
+ health_check: results.healthCheck,
439
+ plugin_installed: results.pluginInstalled,
440
+ openclaw_detected: results.openclawDetected,
441
+ openclaw_config_patched: results.openclawConfigPatched,
442
+ scraper_mcp_registered: results.scraperMcpRegistered,
357
443
  };
358
- if (results.pythonSource) {
359
- // 'system' when the user had Python installed, 'uv' when we
360
- // auto-bootstrapped one. Lets us measure how many installs were
361
- // rescued by the uv fallback.
362
- installPayload.pythonSource = results.pythonSource;
363
- }
364
444
  if (results.routerError && typeof results.routerError === 'object') {
365
445
  installPayload.routerError = results.routerError.reason;
366
446
  installPayload.routerErrorDetail = results.routerError.detail;
367
- installPayload.platform = process.platform;
368
447
  }
369
448
  const body = JSON.stringify({
370
449
  product: 'cli',
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "robot-resources",
3
- "version": "1.9.4",
3
+ "version": "1.9.6",
4
4
  "description": "Robot Resources — AI agent tools. One command to install everything.",
5
5
  "type": "module",
6
6
  "bin": {