@rubytech/create-maxy 1.0.655 → 1.0.656

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (29) hide show
  1. package/package.json +1 -1
  2. package/payload/platform/plugins/admin/PLUGIN.md +2 -1
  3. package/payload/platform/plugins/admin/mcp/dist/index.js +56 -0
  4. package/payload/platform/plugins/admin/mcp/dist/index.js.map +1 -1
  5. package/payload/platform/plugins/cloudflare/scripts/list-cf-domains.ts +97 -32
  6. package/payload/platform/plugins/docs/references/adherence.md +98 -0
  7. package/payload/platform/plugins/docs/references/cloudflare.md +1 -1
  8. package/payload/platform/plugins/docs/references/platform.md +1 -1
  9. package/payload/platform/plugins/docs/references/troubleshooting.md +14 -0
  10. package/payload/platform/templates/agents/admin/IDENTITY.md +2 -0
  11. package/payload/server/package.json +2 -1
  12. package/payload/server/public/assets/{admin-CVZaji3A.js → admin-CcPqY5ao.js} +4 -4
  13. package/payload/server/public/assets/{data-DgI19qYm.js → data-Jicczbp2.js} +1 -1
  14. package/payload/server/public/assets/{file-J1JpJF4E.js → file-CpoHig3h.js} +1 -1
  15. package/payload/server/public/assets/{graph-CFwxUVS0.js → graph-DP93PA2D.js} +1 -1
  16. package/payload/server/public/assets/{house-Dche6_m0.js → house-BJnnbtXo.js} +1 -1
  17. package/payload/server/public/assets/jsx-runtime-POVQm-te.css +1 -0
  18. package/payload/server/public/assets/{public-LhnMTdDE.js → public-CQlgNVSl.js} +1 -1
  19. package/payload/server/public/assets/{share-2-6hJtFYgM.js → share-2-Cb1yEAij.js} +1 -1
  20. package/payload/server/public/assets/{useVoiceRecorder-PUde6itK.js → useVoiceRecorder-C_zUudei.js} +1 -1
  21. package/payload/server/public/assets/x-CdsUXLpH.js +1 -0
  22. package/payload/server/public/data.html +6 -6
  23. package/payload/server/public/graph.html +6 -6
  24. package/payload/server/public/index.html +7 -7
  25. package/payload/server/public/public.html +4 -4
  26. package/payload/server/server.js +1265 -745
  27. package/payload/server/public/assets/jsx-runtime-C7zbe_Pq.css +0 -1
  28. package/payload/server/public/assets/x-DmqRGGHj.js +0 -1
  29. /package/payload/server/public/assets/{jsx-runtime-BE1CBORz.js → jsx-runtime-BkM8jsiV.js} +0 -0
@@ -371,6 +371,79 @@ export type CdpEvaluator = (expression: string) => Promise<unknown>;
371
371
  // without proportional robustness.
372
372
  const STABLE_POLL_THRESHOLD = 2;
373
373
 
374
+ // Cap the `domains=[…]` payload on per-poll and complete phase lines. The
375
+ // extractor already implicitly bounds the list via the FQDN regex + Set
376
+ // dedupe, but a pathological CF redesign could emit thousands of href
377
+ // matches; the cap keeps the stream log readable without masking the real
378
+ // capture size (the `count=` field carries the exact n).
379
+ const DOMAINS_PAYLOAD_MAX_CHARS = 1000;
380
+
381
+ function formatDomains(domains: string[]): string {
382
+ const joined = domains.join(",");
383
+ return joined.length > DOMAINS_PAYLOAD_MAX_CHARS
384
+ ? joined.slice(0, DOMAINS_PAYLOAD_MAX_CHARS - 3) + "..."
385
+ : joined;
386
+ }
387
+
388
+ type DumpMode = "stable" | "unstable" | "empty-or-drift";
389
+ type DumpResult = { path: string } | { err: string };
390
+
391
+ // Snapshot the operator's current dashboard HTML so post-hoc diagnosis of a
392
+ // partial, unstable, or empty scrape has the exact DOM the scrape observed
393
+ // at exit. Called on all three `scrapeDomains` exit paths — the filename's
394
+ // `mode` field names which path fired, and the `pid<pid>` suffix prevents
395
+ // collision if two invocations land in the same millisecond. Loud-fail
396
+ // preserved: on any throw (evaluator rejected, CONFIG_DIR unset, writeFile
397
+ // ENOENT), return `{err}` so the caller emits a separate
398
+ // `phase=dump-write-failed` line and the complete line carries
399
+ // `dump=failed`. The scrape's real signal must never be masked.
400
+ async function dumpHtml(
401
+ evaluator: CdpEvaluator,
402
+ count: number,
403
+ mode: DumpMode,
404
+ ): Promise<DumpResult> {
405
+ try {
406
+ const html = (await evaluator(
407
+ "document.documentElement.outerHTML.slice(0, 100000)",
408
+ )) as string;
409
+ // CONFIG_DIR is set by list-cf-domains.sh before the spawn. A silent
410
+ // fallback would dump logs into the wrong brand's directory on a Real
411
+ // Agent install — a silent-miswrite masking the wrapper-side break it
412
+ // came from. Per Task 473 doctrine: loud-fail on absent runtime-derived
413
+ // values.
414
+ const configDir = process.env.CONFIG_DIR;
415
+ if (!configDir) {
416
+ throw new Error(
417
+ "CONFIG_DIR env var not set by wrapper — refusing to guess brand log directory",
418
+ );
419
+ }
420
+ const logDir = resolve(homedir(), configDir, "logs");
421
+ const ts = new Date().toISOString().replace(/[:.]/g, "-");
422
+ const dumpPath = resolve(
423
+ logDir,
424
+ `list-cf-domains-${ts}-count${count}-${mode}-pid${process.pid}.html`,
425
+ );
426
+ await writeFile(dumpPath, typeof html === "string" ? html : String(html), "utf-8");
427
+ return { path: dumpPath };
428
+ } catch (err) {
429
+ return {
430
+ err: (err instanceof Error ? err.message : String(err)).slice(0, 120),
431
+ };
432
+ }
433
+ }
434
+
435
+ // Render the `dump=<path>` field for a complete-line, and emit a separate
436
+ // loud-fail `phase=dump-write-failed` line on failure. Keeps the on-success
437
+ // complete line terse and routes every failure through the same observation
438
+ // primitive so investigators have one grep pattern for all dump write errors.
439
+ function dumpField(result: DumpResult, mode: DumpMode): string {
440
+ if ("path" in result) return `dump=${result.path}`;
441
+ logPhase(
442
+ `phase=dump-write-failed mode=${mode} detail="${result.err.replace(/"/g, "'")}"`,
443
+ );
444
+ return "dump=failed";
445
+ }
446
+
374
447
  export async function scrapeDomains(evaluator: CdpEvaluator): Promise<string[]> {
375
448
  const deadline = Date.now() + SCRAPE_POLL_MS;
376
449
  let lastOutcome: ScrapeOutcome | null = null;
@@ -388,13 +461,25 @@ export async function scrapeDomains(evaluator: CdpEvaluator): Promise<string[]>
388
461
  const outcome = (await evaluator(SCRAPE_EXPRESSION)) as ScrapeOutcome;
389
462
  lastOutcome = outcome;
390
463
 
464
+ // Per-poll trajectory line (Task 608). Names the exact list captured at
465
+ // this iteration so a partial-capture scenario (e.g. 2-zone account
466
+ // rendering count=1 on poll K because zone B has not hydrated yet) is
467
+ // legible from the stream log alone. Emitted only on successful
468
+ // evaluator returns — the catch branch emits `phase=scrape-retry`
469
+ // instead, because "zero observed" and "failed to observe" are
470
+ // semantically distinct signals for a downstream reader.
471
+ logPhase(
472
+ `phase=dom-scrape-poll n=${polls} count=${outcome.domains.length} domains=[${formatDomains(outcome.domains)}]`,
473
+ );
474
+
391
475
  if (outcome.reason === "ok" && outcome.domains.length > 0) {
392
476
  lastNonEmptyDomains = outcome.domains;
393
477
  if (outcome.domains.length === stableCount) {
394
478
  stableIterations += 1;
395
479
  if (stableIterations >= STABLE_POLL_THRESHOLD) {
480
+ const dump = await dumpHtml(evaluator, outcome.domains.length, "stable");
396
481
  logPhase(
397
- `phase=dom-scrape-complete result=ok count=${outcome.domains.length} polls=${polls} stable_polls=${stableIterations} unstable=false`,
482
+ `phase=dom-scrape-complete result=ok count=${outcome.domains.length} polls=${polls} stable_polls=${stableIterations} unstable=false domains=[${formatDomains(outcome.domains)}] ${dumpField(dump, "stable")}`,
398
483
  );
399
484
  return outcome.domains;
400
485
  }
@@ -412,7 +497,7 @@ export async function scrapeDomains(evaluator: CdpEvaluator): Promise<string[]>
412
497
  }
413
498
  } catch (err) {
414
499
  logPhase(
415
- `phase=scrape-retry err="${(err instanceof Error ? err.message : String(err)).slice(0, 120)}"`,
500
+ `phase=scrape-retry n=${polls} err="${(err instanceof Error ? err.message : String(err)).slice(0, 120)}"`,
416
501
  );
417
502
  }
418
503
  await new Promise((r) => setTimeout(r, POLL_INTERVAL_MS));
@@ -422,10 +507,10 @@ export async function scrapeDomains(evaluator: CdpEvaluator): Promise<string[]>
422
507
  //
423
508
  // (i) We saw non-empty results but they never stabilised across two
424
509
  // consecutive polls — the page is either paginating or re-rendering
425
- // the zone list. Return the last non-empty observation and flag
426
- // `unstable=true` on the phase line so the operator can see the race
427
- // in the stream log. This is NOT drift the HTML dump would be
428
- // misleading noise, so we suppress it.
510
+ // the zone list. Return the last non-empty observation, flag
511
+ // `unstable=true` on the phase line, and snapshot the DOM so
512
+ // partial-capture-that-also-races is diagnosable (Task 608 extends
513
+ // the dump to this path — pre-task, only empty-or-drift dumped).
429
514
  //
430
515
  // (ii) We never saw non-empty results — this is either a genuinely empty
431
516
  // account OR CF has drifted the href URL shape so Source A yields
@@ -433,37 +518,17 @@ export async function scrapeDomains(evaluator: CdpEvaluator): Promise<string[]>
433
518
  // HTML so the operator can distinguish the two by inspecting the
434
519
  // dump file.
435
520
  if (lastNonEmptyDomains.length > 0) {
521
+ const dump = await dumpHtml(evaluator, lastNonEmptyDomains.length, "unstable");
436
522
  logPhase(
437
- `phase=dom-scrape-complete result=ok count=${lastNonEmptyDomains.length} polls=${polls} stable_polls=${stableIterations} unstable=true`,
523
+ `phase=dom-scrape-complete result=ok count=${lastNonEmptyDomains.length} polls=${polls} stable_polls=${stableIterations} unstable=true domains=[${formatDomains(lastNonEmptyDomains)}] ${dumpField(dump, "unstable")}`,
438
524
  );
439
525
  return lastNonEmptyDomains;
440
526
  }
441
527
 
442
- try {
443
- const html = (await evaluator("document.documentElement.outerHTML.slice(0, 100000)")) as string;
444
- // CONFIG_DIR is set by list-cf-domains.sh before the spawn. A silent
445
- // fallback would dump logs into the wrong brand's directory on a Real
446
- // Agent install — a silent-miswrite masking the wrapper-side break it
447
- // came from. Per Task 473 doctrine: loud-fail on absent runtime-derived
448
- // values.
449
- const configDir = process.env.CONFIG_DIR;
450
- if (!configDir) {
451
- throw new Error(
452
- "CONFIG_DIR env var not set by wrapper — refusing to guess brand log directory",
453
- );
454
- }
455
- const logDir = resolve(homedir(), configDir, "logs");
456
- const ts = new Date().toISOString().replace(/[:.]/g, "-");
457
- const dumpPath = resolve(logDir, `list-cf-domains-${ts}.html`);
458
- await writeFile(dumpPath, typeof html === "string" ? html : String(html), "utf-8");
459
- logPhase(
460
- `phase=dom-scrape-complete result=empty-or-drift dump=${dumpPath} lastReason=${lastOutcome?.reason ?? "unknown"} polls=${polls}`,
461
- );
462
- } catch (err) {
463
- logPhase(
464
- `phase=dom-scrape-complete result=empty-or-drift dump=failed lastReason=${lastOutcome?.reason ?? "unknown"} polls=${polls} err="${(err instanceof Error ? err.message : String(err)).slice(0, 120)}"`,
465
- );
466
- }
528
+ const dump = await dumpHtml(evaluator, 0, "empty-or-drift");
529
+ logPhase(
530
+ `phase=dom-scrape-complete result=empty-or-drift count=0 polls=${polls} lastReason=${lastOutcome?.reason ?? "unknown"} ${dumpField(dump, "empty-or-drift")}`,
531
+ );
467
532
  return [];
468
533
  }
469
534
 
@@ -0,0 +1,98 @@
1
+ # Adherence Fidelity
2
+
3
+ User-facing reference for the attention-weighted correction ledger that makes agent adherence compound. Canonical platform documentation lives at [`.docs/agents.md`](../../../../.docs/agents.md) § Adherence Fidelity — this reference mirrors the same behaviour for operators reading plugin docs.
4
+
5
+ ---
6
+
7
+ ## What it solves
8
+
9
+ The agent's prerogatives (PRECISE, CONCISE, EVIDENCE-BASED) are prose at the top of every system prompt. Without a compounding mechanism, a rule corrected 50 times has the same attention weight as a rule corrected once. Adherence Fidelity adds a per-agent ledger whose rendered summary is inserted into the system prompt every turn, so the agent sees its own recidivism with counts, samples, and recency.
10
+
11
+ ## How an operator sees it
12
+
13
+ **In chat:** ask the agent *"what is my adherence score?"* or *"what are my top rule violations?"* The admin agent answers via the `adherence-read` tool, which reads the ledger file on disk — the number is authoritative.
14
+
15
+ **Via API:** `GET /api/admin/adherence?agent=admin` returns the ledger JSON, plus `constraints` (whether capability routing is active for the next turn) and an optional `rendered` block when called with `?block=1`.
16
+
17
+ **On the filesystem:** `{accountDir}/agents/{agentName}/adherence-ledger.json` is the source of truth. `jq` queries work directly:
18
+
19
+ ```bash
20
+ jq '{score, top: (.rules | sort_by(-.count) | .[0])}' \
21
+ ~/.maxy/<accountId>/agents/admin/adherence-ledger.json
22
+ ```
23
+
24
+ ## Score
25
+
26
+ ```
27
+ score = 100 × (1 − rules_violating_in_rolling_7d / n_rules)
28
+ ```
29
+
30
+ An agent with three rules, zero of which have a violation in the last 7 days, scores 100%. One out of three scores 67%. All three scores 0%.
31
+
32
+ ## Top offenders
33
+
34
+ The rendered ledger block bolds the top-3 rules by `count` (all-time, not just the rolling window) and quotes their most recent `last_sample`. Rules 4 through 10 appear as one-liners. Zero-recidivism rules (`count = 0` and `rolling_7d = 0`) are omitted entirely.
35
+
36
+ Sort order: `count DESC, last_violated_at DESC`.
37
+
38
+ ## Capability routing at threshold
39
+
40
+ When any rule's `rolling_7d` reaches `5`, the next turn's spawn is clamped:
41
+
42
+ - `--max-turns` drops to `5` — the agent has fewer turns to sprawl.
43
+ - Non-core tools drop from the allowed set (currently the specialist roles).
44
+ - The offending rule renders with a `BLOCKING:` prefix in the ledger block so it dominates the prompt.
45
+
46
+ The constraint is computed once per turn at the top of `invokeAgent` and frozen for that spawn — one-turn granularity. The next turn reads the updated ledger and re-evaluates, so a single clean turn begins to lift the constraint as `rolling_7d` decays.
47
+
48
+ ## Data flow per turn
49
+
50
+ ```
51
+ ┌─ Pre-turn ────────────────────────────────────┐
52
+ │ loadAdherenceLedger(accountDir, accountId) │
53
+ │ renderAdherenceLedger(ledger, blockingRules) │
54
+ │ → inject at <!-- ADHERENCE-LEDGER-INSERT --> │
55
+ │ computeConstraints(ledger) │
56
+ │ → clamp max-turns, drop tools │
57
+ └────────────────────────────────────────────────┘
58
+
59
+
60
+ Assistant stream
61
+
62
+
63
+ ┌─ Post-turn ───────────────────────────────────┐
64
+ │ criticAndRecord(responseText) — Haiku │
65
+ │ → verdict=pass → recordPass() │
66
+ │ → verdict=violation → recordViolation() │
67
+ │ Fire-and-forget. Non-blocking. │
68
+ └────────────────────────────────────────────────┘
69
+ ```
70
+
71
+ ## What the ledger file looks like
72
+
73
+ ```json
74
+ {
75
+ "agent_id": "admin",
76
+ "account_id": "abc123...",
77
+ "rules": [
78
+ {
79
+ "rule_id": "PRECISE",
80
+ "canonical_text": "Use exact names...",
81
+ "rule_family": "prerogative",
82
+ "count": 7,
83
+ "violations": ["2026-04-19T10:12:00Z", "..."],
84
+ "last_violated_at": "2026-04-21T09:30:12Z",
85
+ "last_sample": "Something that paraphrased tool output…",
86
+ "current_streak": 2,
87
+ "rolling_7d": 5
88
+ }
89
+ ],
90
+ "updated_at": "2026-04-21T09:30:13Z"
91
+ }
92
+ ```
93
+
94
+ ## Limits and deferrals
95
+
96
+ v1 covers the admin agent only. Specialist subagents (`personal-assistant`, `project-manager`, `research-assistant`, `content-producer`) do not receive their own ledger injection yet — their `.md` templates load via `--plugin-dir` and have no TS-side assembly site. Follow-up task filed.
97
+
98
+ No cross-agent rule inheritance, no user-visible correction-ack signal, no blocking-critic retry loop in v1 — each is a separate follow-up task. See [`.docs/agents.md`](../../../../.docs/agents.md) § Adherence Fidelity for the full deferral list with task numbers.
@@ -8,7 +8,7 @@ Each installation has its own Cloudflare account. Sign-in is OAuth in the device
8
8
  |------|--------|
9
9
  | **Product identity** (Maxy vs Real Agent) | `brand.json` (`productName`, `configDir`) — known at install. |
10
10
  | **Cloudflare account identity** | `cert.pem` from OAuth. One account per brand per device. |
11
- | **Domain scope** (which zones the operator can route) | Live Cloudflare dashboard at form-render time via `list-cf-domains.sh`, not `brand.json`. Brand identity has no authority over which domains the operator's CF account holds. |
11
+ | **Domain scope** (which zones the operator can route) | Live Cloudflare dashboard at form-render time via `list-cf-domains.sh`, not `brand.json`. Brand identity has no authority over which domains the operator's CF account holds. When the scrape returns an unexpected count (e.g. 1 on a two-zone account), the stream log's per-poll `phase=dom-scrape-poll n=<k> count=<n> domains=[…]` trajectory + the on-disk HTML dump at `~/{configDir}/logs/list-cf-domains-<ts>-count<n>-<mode>-pid<pid>.html` (Task 608 — written on every scrape outcome, not just empty ones) give the operator everything they need to triage the cause without re-running. |
12
12
  | **Local tunnel state** | `~/{configDir}/cloudflared/` — `cert.pem`, `<UUID>.json`, `config.yml`, `tunnel.state`, `alias-domains.json`. |
13
13
 
14
14
  There is no token-based auth for the operator-owned path (Mode A). To switch Cloudflare accounts, run `reset-tunnel.sh` (which deletes the cert and every tunnel on the current account), then run `setup-tunnel.sh` again — `cloudflared tunnel login` inside the setup script will pick a fresh account when you sign in.
@@ -68,7 +68,7 @@ The admin UI includes a live terminal surface that opens a real shell on your Pi
68
68
 
69
69
  The tmux session outlives admin-server restarts — running an upgrade inside this terminal means you see the live shell output continuously, even through the admin server's own restart mid-upgrade. Closing the browser tab does not kill the running work; re-opening the Software Update window reattaches to the same session during an active upgrade and scrollback shows everything that happened in the meantime. Password-protected `sudo` prompts appear natively inside the terminal, and the password you type never leaves the Pi — the admin-server proxy is a raw byte pipe that never inspects frame payloads.
70
70
 
71
- The Software Update window mounts the terminal lazily: the WebSocket is opened on the first Upgrade click, not when the window opens. Until you click Upgrade, the terminal area shows "Ready to upgrade." and no network traffic flows. If the admin server cannot reach `ttyd`, the window renders an inline "Admin terminal not available" message with the exact re-install command and a Try again button — no silent reconnect loops, no empty black rectangle. The scrollback-across-reopen behaviour above still applies during an active upgrade (a sessionStorage flag remembers that an upgrade is in flight so reopening the window re-mounts the terminal and reattaches).
71
+ The Software Update window mounts the terminal lazily: neither the terminal, its WebSocket, nor its black-backgrounded container render until you click Upgrade. Pre-click, the window shows a small "Ready to upgrade — click Upgrade to begin." line, no network traffic flows, and the upgrade UI is the lifecycle indicator — not the terminal. After you click, the window adds a status row above the terminal ("Upgrading to v… · elapsed: Ns · Downloading installer…" flipping to "Running installer…" on the first byte of installer output) so the 5–30 second npx cold-start window is never silent. The upgrade command is dispatched the moment the WebSocket opens — you won't see "terminal not ready" warnings on a healthy device. If the admin server cannot reach `ttyd`, the window renders an inline "Admin terminal not available" message with the exact re-install command and a Try again button. The scrollback-across-reopen behaviour above still applies during an active upgrade (a sessionStorage flag remembers that an upgrade is in flight so reopening the window re-mounts the terminal and reattaches; the elapsed counter keeps ticking from the original start time).
72
72
 
73
73
  ## AI Content Provenance
74
74
 
@@ -125,6 +125,20 @@ npx -y @rubytech/create-maxy@latest
125
125
 
126
126
  Then return to the upgrade window and click **Try again**. The window re-probes `/api/health` and, once ttyd is listening, the terminal area mounts as normal. If the problem persists, check the boot log for `[ttyd] upstream NOT reachable on 127.0.0.1:7681` and follow the `maxy-ttyd` restart steps above.
127
127
 
128
+ ## Upgrade spinner turns but terminal stays blank
129
+
130
+ **Symptom:** You clicked **Upgrade**, the progress row is showing with an elapsed counter ticking, but the terminal area below stays empty for more than about a minute with no output.
131
+
132
+ **What it means:** The upgrade command was dispatched successfully (`onReady` fired), but `ttyd` is not relaying any bytes back from the installer — the `npx` process may have crashed before it printed anything, or `ttyd` itself has lost its PTY.
133
+
134
+ **Fix:** SSH to the device and check the ttyd unit:
135
+
136
+ ```bash
137
+ sudo systemctl --user status maxy-ttyd
138
+ ```
139
+
140
+ If it's not running, restart it with `sudo systemctl --user restart maxy-ttyd`. Then close and reopen the Software Update window. If `ttyd` is healthy and the spinner keeps turning with no output, the installer process itself has died — re-run `npx -y @rubytech/create-maxy@latest` from an SSH shell directly.
141
+
128
142
  ## Orphan Account Directory Archived to `.trash/`
129
143
 
130
144
  **What happened:** During upgrade, the installer detected multiple account directories under `~/maxy/data/accounts/` and identified one as live (its `admins` list matches the device's `users.json`). Non-matching siblings are archived — not deleted — under `~/maxy/data/accounts/.trash/<uuid>-<ISO8601-ts>/`.
@@ -16,6 +16,8 @@ Three rules govern every turn. They are load-bearing — when they conflict with
16
16
 
17
17
  A landfill graph defeats EVIDENCE-BASED: search returns noise, the agent re-writes the noise, the noise compounds. Compress on write; filter on read.
18
18
 
19
+ <!-- ADHERENCE-LEDGER-INSERT -->
20
+
19
21
  ---
20
22
 
21
23
  ## Intent Gate — First Principle
@@ -6,6 +6,7 @@
6
6
  "neo4j-driver": "^6.0.1",
7
7
  "@anthropic-ai/sdk": "^0.55.0",
8
8
  "@whiskeysockets/baileys": "7.0.0-rc.9",
9
- "zod": "^4.3.6"
9
+ "zod": "^4.3.6",
10
+ "proper-lockfile": "^4.1.2"
10
11
  }
11
12
  }