@rubytech/create-maxy 1.0.633 → 1.0.635

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -203,27 +203,58 @@ fi
203
203
 
204
204
  # --------------------------------------------------------------------------
205
205
  # Step 2+3: Create tunnel if absent; otherwise reuse. Capture UUID.
206
+ # Emit phase_line step=tunnel-resolve with action=reused|created so the
207
+ # stream log tailer shows which tunnel identity Steps 4+5 are writing
208
+ # against (Task 559 — Bug B: previously a bare `echo` that only surfaced
209
+ # in the Bash tool_result after subprocess exit).
206
210
  # --------------------------------------------------------------------------
207
211
 
208
212
  TUNNEL_NAME="${BRAND}-$(hostname -s)"
209
213
  TUNNEL_ID="$(cloudflared --origincert "${CFG_DIR}/cert.pem" tunnel list --output json 2>/dev/null \
210
214
  | jq -r --arg N "${TUNNEL_NAME}" '.[]? | select(.name == $N) | .id' | head -1)"
215
+ TUNNEL_ACTION="reused"
211
216
  if [ -z "${TUNNEL_ID}" ] || [ "${TUNNEL_ID}" = "null" ]; then
212
217
  cloudflared --origincert "${CFG_DIR}/cert.pem" tunnel create "${TUNNEL_NAME}"
213
218
  TUNNEL_ID="$(cloudflared --origincert "${CFG_DIR}/cert.pem" tunnel list --output json \
214
219
  | jq -r --arg N "${TUNNEL_NAME}" '.[]? | select(.name == $N) | .id' | head -1)"
220
+ TUNNEL_ACTION="created"
215
221
  fi
216
222
  if [ -z "${TUNNEL_ID}" ] || [ "${TUNNEL_ID}" = "null" ]; then
223
+ phase_line setup-tunnel step=tunnel-resolve result=error \
224
+ reason=uuid-missing tunnel_name="${TUNNEL_NAME}"
217
225
  echo "ERROR: failed to create or find tunnel ${TUNNEL_NAME}" >&2
218
226
  exit 1
219
227
  fi
228
+ phase_line setup-tunnel step=tunnel-resolve tunnel_name="${TUNNEL_NAME}" \
229
+ tunnel_id="${TUNNEL_ID}" action="${TUNNEL_ACTION}"
220
230
  echo "tunnel: ${TUNNEL_NAME} (${TUNNEL_ID})"
221
231
 
222
232
  # --------------------------------------------------------------------------
223
- # Step 4: Route DNS. Apex hostnames (exactly two DNS labels) cannot be
224
- # routed via `cloudflared tunnel route dns` it misroutes them into
225
- # another zone on the account. Skip CLI routing for apex; collect for the
226
- # ACTION REQUIRED summary at the end.
233
+ # Step 3b: Zone pre-flight. Before routing DNS, verify every non-apex
234
+ # hostname's registrable parent (last two labels, e.g. rogerblack.maxy.bot
235
+ # maxy.bot) has NS records pointing at Cloudflare. If any hostname's
236
+ # parent zone is not on Cloudflare, refuse the whole run before calling
237
+ # `cloudflared tunnel route dns`.
238
+ #
239
+ # DESIGN NOTE — what this catches and what it does NOT catch (Task 559):
240
+ # CATCHES: parent zone does not exist, or its NS records do not point
241
+ # at Cloudflare's nameservers. Pre-529 the shell relied on a post-
242
+ # flight sed of cloudflared's stdout for this defence; Task 559
243
+ # deletes that parser because it rejects the idempotent no-op output
244
+ # shape (session 25674fe3) and replaces it with this inline NS probe.
245
+ # Same primitive the MCP path uses in
246
+ # cloudflared.ts::checkZoneParentOnCloudflare.
247
+ # DOES NOT CATCH: the zone is on Cloudflare but on a DIFFERENT account
248
+ # than the one cert.pem is bound to. A true account-zone-list check
249
+ # requires either a cloudflared CLI zone-list subcommand (does not
250
+ # exist as of 2026-04) or persisting the bound account's zones at
251
+ # tunnel-login time (deferred — separate task). The wrong-account
252
+ # case is detected post-hoc by tunnel-status's hostname probe, not
253
+ # here. This is an explicitly accepted gap per Task 559's scope.
254
+ #
255
+ # Probe uses 1.1.1.1 directly to bypass the device's local resolver
256
+ # (matching Resolver.setServers in the MCP path) — avoids cache /
257
+ # split-horizon issues on the Pi.
227
258
  # --------------------------------------------------------------------------
228
259
 
229
260
  is_apex() {
@@ -233,39 +264,110 @@ is_apex() {
233
264
  [ "$(echo -n "$h" | tr -cd '.' | wc -c)" = "1" ]
234
265
  }
235
266
 
267
+ registrable_parent() {
268
+ local h="$1"
269
+ local labels n
270
+ IFS='.' read -ra labels <<< "${h}"
271
+ n=${#labels[@]}
272
+ if [ "${n}" -le 2 ]; then
273
+ printf '%s' "${h}"
274
+ else
275
+ printf '%s.%s' "${labels[$((n-2))]}" "${labels[$((n-1))]}"
276
+ fi
277
+ }
278
+
279
+ if ! command -v dig >/dev/null 2>&1; then
280
+ phase_line setup-tunnel step=zone-preflight result=error \
281
+ reason=dig-missing
282
+ echo "ERROR: dig is not in PATH — required for the zone pre-flight check." >&2
283
+ echo " Install DNS tooling: sudo apt-get install -y bind9-dnsutils" >&2
284
+ exit 1
285
+ fi
286
+
287
+ ZONES_SEEN=""
288
+ MISSING_PARENT=""
289
+ for H in "${HOSTNAMES[@]}"; do
290
+ if is_apex "$H"; then continue; fi
291
+ ZONE="$(registrable_parent "$H")"
292
+ NS_OUT="$(dig +short +time=3 +tries=1 NS "${ZONE}" @1.1.1.1 2>/dev/null || true)"
293
+ if printf '%s' "${NS_OUT}" | grep -qiE '\.ns\.cloudflare\.com\.?$'; then
294
+ case ",${ZONES_SEEN}," in
295
+ *",${ZONE},"*) ;;
296
+ *) ZONES_SEEN="${ZONES_SEEN:+${ZONES_SEEN},}${ZONE}" ;;
297
+ esac
298
+ else
299
+ MISSING_PARENT="${H}"
300
+ break
301
+ fi
302
+ done
303
+
304
+ if [ -n "${MISSING_PARENT}" ]; then
305
+ MISSING_ZONE="$(registrable_parent "${MISSING_PARENT}")"
306
+ phase_line setup-tunnel step=zone-preflight result=error \
307
+ missing_parent_for="${MISSING_PARENT}" \
308
+ zones_on_account="${ZONES_SEEN}"
309
+ echo "" >&2
310
+ echo "ERROR: cannot route ${MISSING_PARENT} — its parent zone ${MISSING_ZONE}" >&2
311
+ echo " is not on Cloudflare (NS records do not point at *.ns.cloudflare.com)." >&2
312
+ echo " Zones confirmed on Cloudflare so far: ${ZONES_SEEN:-none}" >&2
313
+ echo "" >&2
314
+ echo " Fix: sign into the Cloudflare account that owns ${MISSING_ZONE}" >&2
315
+ echo " 1. ~/reset-tunnel.sh # clear cert.pem and tunnel state" >&2
316
+ echo " 2. ~/setup-tunnel.sh ... # re-run while signed into the correct account" >&2
317
+ exit 1
318
+ fi
319
+
320
+ phase_line setup-tunnel step=zone-preflight result=ok \
321
+ zones_on_account="${ZONES_SEEN}"
322
+
323
+ # --------------------------------------------------------------------------
324
+ # Step 4: Route DNS. Apex hostnames (exactly two DNS labels) cannot be
325
+ # routed via `cloudflared tunnel route dns` — it misroutes them into
326
+ # another zone on the account. Skip CLI routing for apex; collect for the
327
+ # ACTION REQUIRED summary at the end.
328
+ #
329
+ # Control flow (Task 559): cloudflared's exit code is the sole decision
330
+ # signal. No stdout parsing. `cloudflared tunnel route dns --overwrite-dns`
331
+ # exits 0 on every legitimate outcome (create, overwrite, already-correct
332
+ # no-op) and non-zero on every legitimate failure. The pre-flight above
333
+ # already refused if the parent zone is not on Cloudflare; the post-flight
334
+ # parser the shell historically carried (deleted in 559) rejected the
335
+ # idempotent no-op output shape `INF <h> is already configured to
336
+ # route...` and caused session 25674fe3 to die after cloudflared exited 0.
337
+ # --------------------------------------------------------------------------
338
+
236
339
  APEX_HOSTNAMES=()
237
340
  for H in "${HOSTNAMES[@]}"; do
238
341
  if is_apex "$H"; then
239
342
  APEX_HOSTNAMES+=("$H")
343
+ phase_line setup-tunnel step=route-dns hostname="${H}" result=apex-skip
240
344
  echo "apex ${H} — skipping CLI DNS routing (manual dashboard step required)"
241
345
  continue
242
346
  fi
243
- ROUTE_OUT=$(cloudflared --origincert "${CFG_DIR}/cert.pem" \
244
- tunnel route dns --overwrite-dns "${TUNNEL_ID}" "${H}" 2>&1)
245
- echo "${ROUTE_OUT}"
246
- # Post-flight FQDN validation: cert.pem is zone-scoped for DNS routing;
247
- # if the requested hostname is not under cert's zone, cloudflared silently
248
- # prepends it as a sub-label (e.g. admin.maxy.bot admin.maxy.bot.maxy.chat
249
- # when cert is for maxy.chat zone). Parse the output and fail loudly.
250
- ACTUAL_FQDN=$(echo "${ROUTE_OUT}" | sed -n 's|.*Added CNAME \([^ ]*\) which will route.*|\1|p')
251
- if [ -z "${ACTUAL_FQDN}" ]; then
252
- echo "ERROR: could not parse CNAME FQDN from cloudflared output for ${H}" >&2
253
- exit 1
254
- fi
255
- if [ "${ACTUAL_FQDN}" != "${H}" ]; then
256
- echo "" >&2
257
- echo "ERROR: cloudflared misrouted ${H} → ${ACTUAL_FQDN}" >&2
258
- echo " The cert.pem at ${CFG_DIR}/cert.pem is scoped to a zone that does not own ${H}." >&2
259
- echo " Fix:" >&2
260
- echo " 1. Delete the stray CNAME ${ACTUAL_FQDN} in the CF dashboard." >&2
261
- echo " 2. Re-authorize cloudflared against the zone that owns ${H}:" >&2
262
- echo " rm ${CFG_DIR}/cert.pem" >&2
263
- echo " DISPLAY=:99 cloudflared --origincert ${CFG_DIR}/cert.pem tunnel login" >&2
264
- echo " (then pick the correct zone in the dashboard consent screen)" >&2
265
- echo " mv ~/.cloudflared/cert.pem ${CFG_DIR}/cert.pem" >&2
266
- echo " 3. Re-run this script." >&2
347
+
348
+ phase_line setup-tunnel step=route-dns hostname="${H}" tunnel_id="${TUNNEL_ID}"
349
+ ROUTE_LOG="$(mktemp -t maxy-route-dns.XXXXXX)"
350
+ # tee_subprocess_capture streams cloudflared's combined stdout+stderr
351
+ # into STREAM_LOG_PATH line-by-line with the [setup-tunnel:cloudflared]
352
+ # tag (live-tailable) AND passes the same output through this shell's
353
+ # stdout so the `> "${ROUTE_LOG}"` redirection can capture it for the
354
+ # failure-path phase_line. Exit code is cloudflared's PIPESTATUS[0].
355
+ if tee_subprocess_capture setup-tunnel:cloudflared -- \
356
+ cloudflared --origincert "${CFG_DIR}/cert.pem" \
357
+ tunnel route dns --overwrite-dns "${TUNNEL_ID}" "${H}" \
358
+ > "${ROUTE_LOG}"; then
359
+ phase_line setup-tunnel step=route-dns hostname="${H}" result=ok
360
+ else
361
+ ROUTE_RC=$?
362
+ STDERR_BOUNDED="$(tr '\n' ' ' < "${ROUTE_LOG}" | head -c 400)"
363
+ phase_line setup-tunnel step=route-dns hostname="${H}" result=error \
364
+ exit="${ROUTE_RC}" stderr="${STDERR_BOUNDED}"
365
+ echo "ERROR: cloudflared tunnel route dns failed for ${H} (exit=${ROUTE_RC})" >&2
366
+ echo " stderr: ${STDERR_BOUNDED}" >&2
367
+ rm -f "${ROUTE_LOG}"
267
368
  exit 1
268
369
  fi
370
+ rm -f "${ROUTE_LOG}"
269
371
  done
270
372
 
271
373
  # --------------------------------------------------------------------------
@@ -24,8 +24,9 @@ Ask the agent to set up Cloudflare. The agent collects four things before acting
24
24
  The agent then invokes `setup-tunnel.sh` on the device with your inputs. The script runs end-to-end:
25
25
 
26
26
  - `cloudflared tunnel login` — OAuth browser sign-in. The VNC browser opens the Cloudflare authorize page; pick the account that owns your domain, click Authorize. `cert.pem` lands.
27
- - Tunnel creation under the naming convention `{brand}-{hostname}` (e.g. `maxy-neo`).
28
- - `cloudflared tunnel route dns` for each subdomain hostname. Apex hostnames cannot be routed this way the script prints an **ACTION REQUIRED** block naming the exact dashboard record to add or edit.
27
+ - Tunnel creation under the naming convention `{brand}-{hostname}` (e.g. `maxy-neo`). Stream log emits `step=tunnel-resolve action=reused|created` once the UUID is known so the admin agent can see which tunnel the later steps will write against.
28
+ - **Zone pre-flight** for every non-apex hostname the script queries `1.1.1.1` for the registrable parent's NS records and refuses the whole run if they don't point at Cloudflare. Stream log: `step=zone-preflight result=ok|error zones_on_account=… missing_parent_for=…`. Catches "domain not on Cloudflare"; does not catch "domain on a different Cloudflare account than `cert.pem` is bound to" that case surfaces later via `tunnel-status`.
29
+ - `cloudflared tunnel route dns` for each subdomain hostname. Apex hostnames cannot be routed this way — the script prints an **ACTION REQUIRED** block naming the exact dashboard record to add or edit. Stream log emits `step=route-dns hostname=… tunnel_id=…` before the call and `step=route-dns hostname=… result=ok|apex-skip|error` after; on error the bounded cloudflared stderr (≤400 chars) rides in the same phase line. **The script does not parse cloudflared's stdout** — exit code is the sole decision signal, so all three legitimate cloudflared output shapes (new record, overwrite, idempotent "already configured") are treated as success.
29
30
  - `config.yml` and `tunnel.state` written under `${CFG_DIR}`.
30
31
  - `systemctl --user restart ${BRAND}.service` — restarts the platform service so the new tunnel spawns via the service's `ExecStartPre=resume-tunnel.sh`.
31
32
  - Post-restart verification — `ps -ef | grep '[c]loudflared'` confirms the connector is alive, then `curl -I https://<hostname>` against each subdomain (up to 60 s per host) confirms a non-530 response.
@@ -113,3 +113,11 @@ After this, every `console.error("[your-tool] ...")` from any tool in the plugin
113
113
  **Tee-state markers** land in the stream log: `[platform] [mcp-tee-attach] server=<name> streamLogPath=...` when the tee wires up, `[platform] [mcp-tee-skip] server=<name> destination=... reason=...` when a destination fails (missing `LOG_DIR`, unwritable path, `STREAM_LOG_PATH` not set, etc.), `[platform] [mcp-tee-detach] server=<name>` on graceful shutdown. If a server invoked tools but no `[mcp:<name>]` lines appear in the conversation's log, look for the skip marker first.
114
114
 
115
115
  **Main-subprocess stderr (Task 535).** The same teeing pattern applies to the main Claude Code subprocess's stderr — every line lands in the per-conversation stream log as `[subproc-stderr] …`, with lifecycle markers `[subproc-stderr-tee-attached] pid=…` and `[subproc-stderr-tee-detached] pid=… bytes=N lines=N`. A `bytes=0 lines=0` detach means the tee was attached but the subprocess emitted nothing on stderr — which is the normal state today, because the Claude Code CLI is a bundled Bun runtime binary that does not honour Node's `NODE_DEBUG` env var. The platform records this explicitly with one line per spawn: `[subproc-debug-unavailable] reason=bundled-bun-binary-ignores-node-debug pid=… cli=claude`. A reader who finds a `[spawn]` without these markers should treat that as a regression of the tee infrastructure, not as silence.
116
+
117
+ ## Failure-path observability contract (Task 560)
118
+
119
+ The `initStderrTee` wrapper writes to the per-conversation stream log and per-server raw file via `createWriteStream` — async, buffered. Any diagnostic `console.error(…)` followed by an immediate `process.exit(…)` is lost: the event loop never drains the WriteStream before the process terminates. Plugins that call `process.exit()` during module load (rare — `graph-mcp` is the only in-tree example today; it spawns a child at boot to proxy upstream stdio) MUST use `fs.appendFileSync` at every exit path to guarantee the cause lands in both log destinations before exit. Lines should follow the `[mcp:<name>] [<plugin-prefix>] <cause>` format so existing `grep '[mcp:<name>]'` investigator paths work. Each destination must be wrapped in its own try/catch — an unwritable log must not mask the primary failure.
120
+
121
+ A second observability layer closes the same gap from the platform side: when `claude-agent.ts` observes an `init` event with any MCP server reporting `status:"failed"`, it reads the last 512 bytes of `${LOG_DIR}/mcp-<name>-stderr-<date>.log` and emits `[mcp-init-error] server=<name> tail=<quoted>` into the stream log. Absent file → `tail="(no stderr file)"`; empty file → `tail="(empty)"`. This works for every plugin regardless of whether it adopted the sync-write discipline — the tail of whatever landed in the raw stderr file (from whichever destination made it out of the async buffer) is always captured.
122
+
123
+ Signal inventory after a failed session: `[init] FAILED MCP servers: <names>` (names), `[mcp-init-error] server=<name> tail=…` (cause for each, from platform), optionally `[mcp:<name>] [<plugin>] …` (cause for each, from plugin's own sync-writes when the plugin is disciplined). Their union gives the investigator two independent sources for the same failure.
@@ -15,6 +15,21 @@ The connected Neo4j instance contains only this brand's data (per-brand
15
15
  instance architecture — see `.docs/neo4j.md`). You never need an account
16
16
  filter in the query.
17
17
 
18
+ ## When the graph tools are absent
19
+
20
+ If neither `maxy-graph_read_neo4j_cypher` nor `maxy-graph_get_neo4j_schema`
21
+ appears in your tool list, the graph MCP server failed to start on this
22
+ device. Reply once with exactly:
23
+
24
+ > The graph MCP server failed to start on this device. Run the admin
25
+ > system-status check to diagnose — do not retry by other routes.
26
+
27
+ Then stop. Do not search for a similarly-named tool via `ToolSearch`, do
28
+ not fall back to `cypher-shell` via `Bash`, do not paraphrase — the
29
+ deterministic path through the shim is the only supported way to read
30
+ the graph, and any substitute path loses the read-only + namespace +
31
+ token-limit discipline the upstream server enforces.
32
+
18
33
  ## Non-negotiable: never return raw nodes
19
34
 
20
35
  `RETURN n` dumps every property, including the 768-dim `embedding` float