npm - @rubytech/create-maxy - Versions diffs - 1.0.800 → 1.0.801 - Mend

@rubytech/create-maxy 1.0.800 → 1.0.801

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (15) hide show

package/payload/platform/plugins/whatsapp-import/bin/whatsapp-ingest.sh ADDED Viewed

@@ -0,0 +1,102 @@
+#!/usr/bin/env bash
+# =============================================================================
+# whatsapp-ingest.sh — single deterministic Bash entry for WhatsApp archive
+# ingestion (Task 855). Thin wrapper: arg-validate, resolve env, invoke
+# ingest.mjs in-process. The script is the database-operator subagent's only
+# operator-facing handle on the parse → archive-write → insight pipeline.
+#
+# Usage:
+#   bash whatsapp-ingest.sh <archive.zip|dir|_chat.txt>
+#         --owner-element-id <id>
+#         --scope <admin|public>
+#         [--account-id <accountId>]
+#         [--timezone <iana-zone>]
+#         [--date-format <DD/MM/YY|MM/DD/YY|DD/MM/YYYY|MM/DD/YYYY>]
+#         [--no-insight]
+#
+# Exit 0  + JSON summary on stdout on success.
+# Exit !0 + one [whatsapp-ingest] FAIL line on stderr on failure.
+# =============================================================================
+set -euo pipefail
+# Pre-flight arg check — emit the same single-line FAIL shape ingest.mjs uses
+# so the operator's grep pattern catches both layers uniformly.
+arg_fail() {
+  local reason="$1"
+  echo "[whatsapp-ingest] FAIL phase=argv reason=\"${reason}\"" >&2
+  exit 1
+}
+# Locate platform root from this script's location:
+#   platform/plugins/whatsapp-import/bin/ → ../../.. = platform/
+SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
+PLATFORM_ROOT="$(cd "$SCRIPT_DIR/../../.." && pwd)"
+INGEST_MJS="$SCRIPT_DIR/ingest.mjs"
+if [ ! -f "$INGEST_MJS" ]; then
+  arg_fail "ingest.mjs not found at $INGEST_MJS — run from a built install"
+fi
+# Validate required flags up-front — gives a cleaner pre-flight error than
+# the node-side throw and keeps the failure surface single-line. argv
+# validation runs before env resolution so a wrong invocation fails on the
+# wrong invocation, not on a missing password.
+ARCHIVE=""
+OWNER_VAL=""
+SCOPE_VAL=""
+HAS_OWNER=0
+HAS_SCOPE=0
+ARGS=("$@")
+i=0
+while [ $i -lt ${#ARGS[@]} ]; do
+  a="${ARGS[$i]}"
+  case "$a" in
+    --owner-element-id) HAS_OWNER=1; OWNER_VAL="${ARGS[$((i + 1))]:-}"; i=$((i + 2)); continue ;;
+    --scope)            HAS_SCOPE=1; SCOPE_VAL="${ARGS[$((i + 1))]:-}"; i=$((i + 2)); continue ;;
+    --account-id|--timezone|--date-format) i=$((i + 2)); continue ;;
+    --no-insight)       i=$((i + 1)); continue ;;
+    --*)                i=$((i + 2)); continue ;;
+    *)
+      if [ -z "$ARCHIVE" ]; then ARCHIVE="$a"; fi
+      i=$((i + 1))
+      continue
+      ;;
+  esac
+done
+[ -n "$ARCHIVE" ]                          || arg_fail "missing positional <archive>"
+[ "$HAS_OWNER" -eq 1 ] && [ -n "$OWNER_VAL" ] || arg_fail "missing --owner-element-id (or empty value)"
+[ "$HAS_SCOPE" -eq 1 ] && [ -n "$SCOPE_VAL" ] || arg_fail "missing --scope (or empty value)"
+case "$SCOPE_VAL" in
+  admin|public) : ;;
+  *) arg_fail "invalid --scope \"$SCOPE_VAL\" (admin|public)" ;;
+esac
+# Lift NEO4J_PASSWORD from the install's config file when env doesn't carry it
+# (e.g. operator running directly from a shell rather than via the platform
+# server). Mirror seed-neo4j.sh precedence: env var wins, then the file.
+if [ -z "${NEO4J_PASSWORD:-}" ]; then
+  NEO4J_PASSWORD_FILE="$PLATFORM_ROOT/config/.neo4j-password"
+  if [ -f "$NEO4J_PASSWORD_FILE" ]; then
+    NEO4J_PASSWORD="$(cat "$NEO4J_PASSWORD_FILE")"
+    export NEO4J_PASSWORD
+  else
+    arg_fail "NEO4J_PASSWORD not in env and $NEO4J_PASSWORD_FILE not found"
+  fi
+fi
+# NEO4J_URI is hard-required (Task 787). The wrapper does not fabricate a
+# default — the platform server sets this at spawn time and the operator can
+# pass it explicitly when running standalone.
+if [ -z "${NEO4J_URI:-}" ]; then
+  arg_fail "NEO4J_URI not set (no default — set in env)"
+fi
+export NEO4J_USER="${NEO4J_USER:-neo4j}"
+export MAXY_PLATFORM_ROOT="$PLATFORM_ROOT"
+# Hand off to node. ingest.mjs handles all observability; the wrapper does
+# not double-log. Forward exit code verbatim.
+exec node "$INGEST_MJS" "$@"

package/payload/platform/plugins/whatsapp-import/skills/whatsapp-import/SKILL.md CHANGED Viewed

@@ -1,122 +1,74 @@
 ---
 name: whatsapp-import
-description: Import a WhatsApp `_chat.txt` export into a {{productName}} Neo4j graph as a Conversation with chronologically-chained Messages, then derive typed insights (mentions, preferences, commitments, observed relationships) as first-class graph entities. Triggers when the user asks to import a WhatsApp chat, ingest a `_chat.txt` file, or drops the contents of an "Export Chat" folder into chat. Distinct from the live `whatsapp` plugin (Baileys); this is import-from-export only.
+description: Import a WhatsApp `_chat.txt` export into a {{productName}} Neo4j graph as a Conversation with chronologically-chained Messages, then derive typed insights (mentions, tasks, preferences, observed relationships) as first-class graph entities. Triggers when the user asks to import a WhatsApp chat, ingest a `_chat.txt` file, or drops the contents of an "Export Chat" folder into chat. Distinct from the live `whatsapp` plugin (Baileys); this is import-from-export only.
 ---
 # WhatsApp Import
-Ingests a WhatsApp "Export Chat" archive — `_chat.txt` plus media attachments — into a {{productName}} Neo4j graph. Two passes:
+Ingests a WhatsApp "Export Chat" archive — `_chat.txt` plus media attachments — into a {{productName}} Neo4j graph in one in-process pipeline: parse → archive-write → insight (Haiku via OAuth). The deterministic ingest is delivered as a single Bash entry; the database-operator subagent invokes it once and yields with the operator-facing summary.
-1. **Deterministic ingest** — Conversation + Messages + chronology + sender edges, written via the fixed Cypher inside `memory-archive-write`.
-2. **Insight extraction** — analysis-derived nodes and edges (mentions, topics, preferences, commitments, observed relationships) written via existing `memory-write` / `memory-update` tools after pass 1 completes.
+## Owner confirmation (mandatory first step)
-Every node and edge carries `source='whatsapp'`, `createdByAgent='whatsapp-import'`, `createdBySession=<this-skill-run-uuid>`, and `archiveSourceFile=<sha256-prefix>` so the operator can grep this ingest's footprint at any time.
+A WhatsApp export belongs to exactly one operator (the person whose phone produced the export). The owner is metadata stamped on the `:Conversation` node — the row-level participants are auto-created by the script and promoted in a later semantic-enrichment pass.
-## Owner + participant confirmation (mandatory first step)
+1. List every `:AdminUser` in the graph via `mcp__graph__maxy-graph-read_neo4j_cypher`:
+   `MATCH (u:AdminUser) RETURN elementId(u) AS elementId, u.name AS name, u.userId AS userId, u.accountId AS accountId`
+2. Ask the operator: "Who exported this `_chat.txt`?" — accept either an existing `:AdminUser` elementId or, if the operator names someone not in the graph, surface that as a blocker (auto-creating an unknown owner is refused).
+3. Echo the chosen owner back verbatim. Require explicit yes/no confirmation.
+4. Persist the resolved owner's `elementId` for the script invocation as `--owner-element-id`.
-A WhatsApp export belongs to exactly one operator (the person whose phone produced the export) and contains messages from a known set of senders. Both must be confirmed before any line is written. The flow:
+## Invoke the ingest
-### Step 1 — Owner
-The owner is metadata: who exported this chat. Stamped on the `:Conversation` node as `createdBySession` provenance. The owner is **not** a row-level subject — every message has its own sender.
-1. List every `:AdminUser` in the graph: `{userId, name, accountId(s)}`.
-2. Ask the operator: "Who exported this `_chat.txt`?" Accept either an existing `:AdminUser` userId or a new external `:Person` (with `givenName`+`familyName`+ at least one of `email`/`telephone`).
-3. Echo the chosen owner back verbatim. Require explicit yes/no confirmation before proceeding.
-4. Persist the resolved owner's `elementId` as `$ownerNodeId`.
-### Step 2 — Participants
-Parse the `_chat.txt` by invoking `mcp__memory__whatsapp-export-parse(filePath: <path>, timezone: <iana-zone>)` (Task 805). The tool returns `{conversationId, archiveSourceFile, parsedLines[], counters}` — the deterministic Node parser in [platform/plugins/whatsapp-import/lib/](../../../lib/) walks the line grammar; the agent does not tokenise lines itself. See [export-parse.md](references/export-parse.md) for the parser's behaviour reference.
-For each distinct sender name in `parsedLines[]`, capture: `{senderName, firstSeen, lastSeen, messageCount}`. Display the list in chat with these counts; the operator sees who they're about to ingest before any write.
-For each distinct sender, ask the operator to choose:
-- **Existing `:AdminUser`** — typically themselves (when their own messages are in the export). Resolve via `memory-search` by `userId` or `name`. Persist the elementId.
-- **Existing `:Person`** — match by `givenName`+`familyName`, `email`, or `telephone`. Use `memory-search` to find candidates; if multiple match, surface them and require operator pick. Persist the elementId.
-- **New external `:Person`** — mint via `memory-write` with `givenName`+`familyName`+ at least one of `email`/`telephone`. Provenance: `source='whatsapp'`, `createdByAgent='whatsapp-import'`, `createdBySession=$sessionId`. Capture the resulting elementId.
-- **Skip** — exclude this sender's messages from the import. Operator may pick this for noisy auto-replies, bots, etc.
-**Refusing to invent identity is load-bearing.** The skill never silently mints a `:Person` from a WhatsApp display name alone (which is often just a phone number or "Mum"). A new `:Person` requires confirmation of `givenName`+`familyName`+ contact information. This is the first contract `feedback_archives_are_not_documents.md` enforces.
-### Step 3 — Persist the participant map
-Build `$participantNodeIds = {senderName → senderElementId}`. Echo back to operator one final time (`Confirm: 5 senders, 4 :AdminUser/:Person, 1 skipped — proceed?`). On yes, the participant map flows into every row of the `memory-archive-write` call.
-### Step 4 — Same-person, multiple display-names heuristic
-WhatsApp displays a sender by their phone-saved name when known, by phone number otherwise. If the operator's contact list changed mid-conversation, the same person may appear under two distinct senderNames (`+44 7...` and `Joel Smalley`). Detect this heuristically: surface any senderName that is digit-only (a phone number) and ask `Is "+44 7..." the same person as "Joel Smalley"?`. On yes, both senderNames map to the same elementId. On no, keep them distinct.
-## Selective-ingest threshold (bulk archives)
-WhatsApp 1:1 chats commonly contain 1,000–10,000 messages; group chats 10,000+. Writing all of them in one shot defeats compression-on-write and produces a landfill graph. The skill compresses by interrogating the operator before the bulk write.
-**Threshold:** when the parsed `rows[]` count exceeds **100 messages**, pause and ask the operator to filter along the natural axes:
-- **Date range** — "messages between 2026-01-01 and 2026-04-01"
-- **Sender** — "only messages from Joel and Sarah"
-- **Keyword** — body contains "Q3 report" / "office hours" / etc.
-- **All** — accept the full archive (rare; only for small chats or when the operator explicitly wants every message)
-Apply the chosen filter to `rows[]` before invoking `memory-archive-write`. Compress on write, never after — a 5,000-message blanket import is noise; a 200-message filtered import is signal.
-When the threshold trips, emit one log line BEFORE the prompt:
+Single Bash call:
+```bash
+bash platform/plugins/whatsapp-import/bin/whatsapp-ingest.sh <archive.zip|dir|_chat.txt> \
+  --owner-element-id <id> \
+  --scope <admin|public>
 ```
-[whatsapp-import] selective-ingest-gate count=<n> threshold=100 axes=date,sender,keyword
-```
-## Stable IDs
-- `conversationId = whatsapp-export:<sha256(_chat.txt bytes)>:<accountId>` — same archive, same operator account → idempotent re-import. Different archive (even for the same conversation) → different conversationId.
-- `messageId = whatsapp-export:<conversationId>:<lineHash>` where `lineHash = sha256(<original-line-text>)`. Re-imports of the same archive are zero-write idempotent; re-exports with appended messages add the delta cleanly.
-## Timezone
+Optional flags:
+- `--account-id <id>` — explicit account id when more than one exists under `data/accounts/` (Phase 0 has one).
+- `--timezone <iana>` — IANA zone for timestamps (default `Europe/London`).
+- `--date-format <DD/MM/YY|MM/DD/YY|DD/MM/YYYY|MM/DD/YYYY>` — override auto-detect for ambiguous locales.
+- `--no-insight` — skip the Haiku insight pass (parse + archive-write only).
-WhatsApp's `[DD/MM/YY, HH:MM:SS]` line prefix lacks a timezone offset. The skill **must not silently assume UTC.** When the timezone is non-obvious (the operator hasn't said where they were when the messages were sent), ask:
+The script:
+- Unzips the archive if needed; locates `_chat.txt`.
+- Parses the file deterministically (year shape, sender/body grammar, timezone offset).
+- Auto-creates one `:Person {participantStatus:'auto-created'}` per distinct senderName, scoped to the account, MERGEd on `(accountId, source, name)`.
+- Writes the Conversation + Messages + edges + NEXT chronology via `memoryArchiveWrite` directly (no MCP envelope between steps).
+- Runs the insight pass: chunked Haiku calls (1500 messages per chunk), in-process Cypher writes of `:Observation` nodes connected `:OBSERVED_IN`→`:Conversation`. Insight failures log and proceed; archive-write success is what determines exit code.
-> The export uses `[DD/MM/YY, HH:MM:SS]` but doesn't include a timezone. Which timezone should I tag these messages with? (e.g., Europe/London, America/New_York, UTC)
+## Outcome
-Convert each parsed timestamp to ISO 8601 with the supplied offset before passing to `memory-archive-write`. The Cypher's `datetime()` then preserves the exact instant.
+- **Exit 0** + JSON summary on stdout:
+  ```json
+  {"conversationId": "whatsapp-export:<sha>:<accountId>",
+   "parsed": 1707, "mediaSkipped": 0, "systemSkipped": 0,
+   "createdMessages": 1707,
+   "insightCounters": {"chunks": 2, "mentions": 12, "tasks": 3, "preferences": 1, "observedRelationships": 0},
+   "ms": 67000}
+  ```
+  Surface this to the operator as one chat message: `Imported 1707 messages from <archive> into conversation <conversationId>; insights: 12 mentions, 3 tasks, 1 preference, 0 relationships.`
-## Execution model
+- **Exit non-zero** + one stderr line: `[whatsapp-ingest] FAIL phase=<parse|archive-write|argv|import|uncaught> reason="<sanitised first 80c>" ...`. Surface this verbatim to the operator and yield. **Do not retry. Do not edit parser source.** The archive-ingest-surface-gate denies parser-source edits, JS test runners, and the legacy `whatsapp-export-parse` / `whatsapp-export-insight-write` / `memory-archive-write{archiveType:whatsapp-export}` MCP tools — none of those are escape hatches in your surface.
-1. **Parse** — Invoke `mcp__memory__whatsapp-export-parse(filePath, timezone, dateFormat?)`. The deterministic parser walks the line grammar, returns `{conversationId, archiveSourceFile, parsedLines[], counters}`. LOUD-FAIL on encoding error / empty file / malformed timestamp surfaces as the tool's `isError` content; the skill aborts the import without further work. The `archiveSourceFile` is `whatsapp-export:<sha256-of-file-bytes>` — keep this exact value; `memory-archive-write` will recompute and assert it matches in Step 6.
-2. **Owner+participant confirmation** — Steps 1–3 above. Persist `$ownerNodeId` + `$participantNodeIds`.
-3. **Selective-ingest gate** — If `parsedLines.length > 100`, pause for filter selection. Apply filter.
-4. **Build rows[]** — Map each parsed line to `{messageId, conversationId, senderNodeId, senderName, dateSent (ISO 8601), body, sequenceIndex}`. Compute `messageId` per line.
-5. **Build conversation block** — `{conversationId, archiveSourceFile, firstMessageAt, lastMessageAt, participantCount, messageCount}` from the rows[]. `conversationId` and `archiveSourceFile` come straight from the parser's return value.
-6. **Dispatch** `mcp__memory__memory-archive-write` once with `archiveType='whatsapp-export'`, `ownerNodeId`, `conversation`, `participantNodeIds` (the distinct elementIds from the map), `rows`, `sessionId`, **and `archiveFilePath: <same path you passed to whatsapp-export-parse>`**. The server re-computes `sha256(file)` and asserts it matches `conversation.archiveSourceFile` before any write — mismatch is a hard reject (Task 805 silent-substitution gate). The tool MERGEs the Conversation, MERGEs Messages, links PART_OF + SENT + PARTICIPANT_IN edges per row, and runs the `finalize` hook to MERGE the NEXT chronology by dateSent ordering.
-7. **Emit per-export log line:**
-   ```
-   [whatsapp-import] file=<chat.txt> conversationId=<cid> participants=<n> messages-parsed=<n> media-skipped=<n> system-skipped=<n> ms=<elapsed>
-   ```
-8. **Insight pass** — Run pass 2 per [insight-extraction.md](references/insight-extraction.md). Read the just-written messages via `memory-search`, classify within the specialist's own LLM turn, and write typed observations through `memory-write` / `memory-update`. **`:MENTIONS` and `:RELATED_TO` edges route through `mcp__memory__whatsapp-export-insight-write` (Task 805) — that tool re-runs `memory-search` server-side and asserts the agent's claimed candidate elementIds appear in the live result; rejects single-first-name names without `disambiguatorOk=true`; refuses `:RELATED_TO` writes without `operatorConfirmed=true`. The agent never authors `:MENTIONS` / `:RELATED_TO` Cypher directly.** Emit:
-   ```
-   [whatsapp-import] insight-pass model=sonnet chunks=<n> mentions=<n> preferences=<n> tasks=<n> observed-relationships=<n> novel-insights=<n> ms=<elapsed>
-   ```
+## Idempotency
-## Doctrine — raw Cypher and `cypher-shell` are forbidden in this skill
+Re-running the script against the same archive is a no-op: `createdMessages: 0`, NEXT chain unchanged, conversation scalars refreshed via `lastImportedAt` / `lastImportedBySession`. Re-exports with appended messages add only the delta and extend the NEXT chain to cover the new tail.
-All writes route through `mcp__memory__memory-archive-write` (bulk Conversation+Messages) or `mcp__memory__memory-write` / `mcp__memory__memory-update` (second-pass typed observations). The agent never authors Cypher. If the operator hits a write shape these tools do not express, **do not improvise** — surface the gap as a structured task per the database-operator's LOUD-FAIL prerogative. See [database-operator.md](../../../../templates/specialists/agents/database-operator.md#prerogatives).
-## LOUD-FAIL on parse errors
-`mcp__memory__whatsapp-export-parse` is the LOUD-FAIL surface (Task 805). When the grammar can't classify a line, the tool throws with `parse-error file=<...> line=<n> reason=<r>` and the MCP layer returns `isError: true` with that message. The skill MUST abort the import on a parse-error response — do not retry, do not "best effort" the rest of the file. The operator gets a named error and re-exports if necessary.
-The deterministic parser also LOUD-FAILs on UTF-8 decode failure (`reason=encoding-error`), zero parsed lines (`reason=not-a-_chat.txt`), and missing required arguments (`reason=accountId|timezone`). All of these surface through the same tool error path; the agent does not need to detect them itself.
-**Structurally enforced (Task 846).** The harness-level `platform/plugins/admin/hooks/archive-ingest-gate.sh` watches every `mcp__memory__whatsapp-export-parse` PostToolUse event; on `isError: true` it sets a parse-error flag and blocks every subsequent tool call this turn until the operator submits the next prompt. Editing parser source under `platform/plugins/*/lib/*` and running `vitest`/`bun test`/`npm test`/`npx jest` is denied unconditionally. The skill's behavioural rule above is the contract; the hook is the enforcement. See [database-operator's LOUD-FAIL prerogative](../../../../templates/specialists/agents/database-operator.md#prerogatives) and [.docs/hooks.md](../../../../../.docs/hooks.md) for the gate's full surface.
+## Verification (post-write)
-## Idempotency contract
+Run via `mcp__graph__maxy-graph-read_neo4j_cypher`:
-Re-importing the same `_chat.txt` is a no-op (`createdMessages=0`, `mergedMessages=N`, NEXT chain unchanged). Re-importing a re-exported file with appended messages adds only the delta and extends the NEXT chain to cover the new tail. Both paths are server-enforced via MERGE on `messageId` and the finalize hook's idempotent NEXT-MERGE.
+- `MATCH (c:Conversation:WhatsAppConversation {conversationId: $cid}) RETURN c.messageCount, c.participantCount, c.firstMessageAt, c.lastMessageAt` — agrees with the JSON summary.
+- `MATCH (m:Message)-[:PART_OF]->(c {conversationId: $cid}) RETURN count(m)` — equals `parsed`.
+- `MATCH p=(:Message {conversationId: $cid})-[:NEXT*]->() WITH max(length(p)) AS chain RETURN chain` — equals `createdMessages - 1` (or the Conversation's `messageCount - 1`).
+- `MATCH (o:Observation {createdBySession: $sessionId})-[:OBSERVED_IN]->(:Conversation {conversationId: $cid}) RETURN o.kind, count(*)` — agrees with `insightCounters`.
-## Verification (post-write)
+## What this is not
-- `MATCH (c:Conversation:WhatsAppConversation {conversationId: $cid}) RETURN c.messageCount, c.participantCount, c.firstMessageAt, c.lastMessageAt` — agrees with the per-export log line counts.
-- `MATCH (m:Message:WhatsAppMessage)-[:PART_OF]->(c {conversationId: $cid}) RETURN count(m)` — equals post-filter line count.
-- `MATCH p=(m:Message {conversationId: $cid})-[:NEXT*]->(end) WITH max(length(p)) AS chain RETURN chain` — equals `messageCount - 1`.
-- `MATCH (m:Message {conversationId: $cid}) RETURN min(m.dateSent), max(m.dateSent)` — matches the file's first/last lines (modulo the operator-confirmed timezone).
-- `MATCH (n) WHERE n.createdBySession = $sessionId RETURN labels(n) AS l, count(*) ORDER BY count(*) DESC` — the full graph footprint of this ingest, sortable by label.
+- **Not** the live `whatsapp` plugin. That plugin (Baileys QR pairing) holds messages in an in-memory store cleared on restart. This plugin imports historical exports into Neo4j as persistent graph nodes.
+- **Not** a media-transcription pipeline. Voice notes, photos, PDFs are skipped at parse with a counter logged.
+- **Not** the operator-level semantic enrichment pass. The auto-created participants and `:Observation` nodes are deliberately raw — disambiguation, edge wiring to specific `:Person` / `:Task` / `:Service` entities, and operator confirmation of `:RELATED_TO`-style relationships are the responsibility of a later pass (Task 859).

package/payload/platform/plugins/whatsapp-import/skills/whatsapp-import/references/export-parse.md CHANGED Viewed

@@ -1,6 +1,6 @@
 # Reference: `_chat.txt` parsing — implementation reference
-> **Task 805 — this is no longer operator instruction.** The agent does NOT walk this grammar in its own LLM turn. Parsing runs deterministically in [`platform/plugins/whatsapp-import/lib/src/parse-export.ts`](../../../lib/src/parse-export.ts), invoked via `mcp__memory__whatsapp-export-parse`. The vitest grid in [`lib/src/__tests__/parse-export.test.ts`](../../../lib/src/__tests__/parse-export.test.ts) is the executable contract; this prose is the human-readable companion. Extend the grammar by adding a failing test first.
+> **Task 855 — this is no longer operator instruction.** The agent does NOT walk this grammar in its own LLM turn. Parsing runs deterministically in [`platform/plugins/whatsapp-import/lib/src/parse-export.ts`](../../../lib/src/parse-export.ts), invoked in-process by [`bin/ingest.mjs`](../../../bin/ingest.mjs) (which the operator calls via [`bin/whatsapp-ingest.sh`](../../../bin/whatsapp-ingest.sh) — the single deterministic Bash entry). The legacy MCP wrapper is blocked at the harness gate. The vitest grid in [`lib/src/__tests__/parse-export.test.ts`](../../../lib/src/__tests__/parse-export.test.ts) is the executable contract; this prose is the human-readable companion. Extend the grammar by adding a failing test first.
 WhatsApp's "Export Chat" produces a UTF-8 text file with a deterministic line grammar. This reference describes what the parser library does when it converts that file into the `{senderName, dateSent, body, sequenceIndex}[]` structure the SKILL.md consumes.

package/payload/platform/scripts/seed-neo4j.sh CHANGED Viewed

@@ -98,15 +98,24 @@ fi
 #                webfetch-preflight.mjs: detects JS-SPA shells before WebFetch's
 #                  60s extraction timeout (Task 536). Fail-open on any error;
 #                  on positive SPA detection exits 2 with WEBFETCH_CANNOT_READ_JS_SPA.
-#                archive-ingest-gate.sh (Task 846): blocks plugin-source edits, JS
-#                  test runners, and any tool call after a *-export-parse /
-#                  *-import-parse tool returns isError=true (cleared on next user
-#                  prompt). Wired at three points: explicit Edit/Write/NotebookEdit/
-#                  Bash matchers (path + test-runner enforcement), a no-matcher
-#                  PreToolUse entry (post-parse-error gate fires on every tool),
+#                archive-ingest-surface-gate.sh (Task 855; supersedes Task 846):
+#                  narrows the database-operator subagent's effective surface
+#                  during WhatsApp archive ingestion to exactly one Bash entry
+#                  (whatsapp-import/bin/whatsapp-ingest.sh) plus read-only
+#                  neighbours, by blocking the legacy MCP deviation tools
+#                  (mcp__memory__whatsapp-export-parse,
+#                  mcp__memory__whatsapp-export-insight-write, and
+#                  mcp__memory__memory-archive-write when archiveType is
+#                  whatsapp-export). Preserves Task 846's plugin-source edit,
+#                  JS test-runner, and post-parse-error blocks (LinkedIn and
+#                  future per-source archive parsers still use the legacy MCP
+#                  path until they migrate to deterministic Bash entries).
+#                  Wired at three points: explicit Edit/Write/NotebookEdit/Bash
+#                  matchers, a no-matcher PreToolUse entry (catches the legacy
+#                  MCP tools and post-parse-error gate fires on every tool),
 #                  and a PostToolUse regex matcher for parse tools.
-#                UserPromptSubmit — archive-ingest-gate.sh clears the parse-error
-#                  flag when the operator's next prompt arrives.
+#                UserPromptSubmit — archive-ingest-surface-gate.sh clears the
+#                  parse-error flag when the operator's next prompt arrives.
 HOOKS_PATH="\$PLATFORM_ROOT/plugins/admin/hooks"
 cat > "$ACCOUNT_SETTINGS" << SETTINGS_EOF
 {
@@ -119,32 +128,32 @@ cat > "$ACCOUNT_SETTINGS" << SETTINGS_EOF
         "matcher": "Write",
         "hooks": [
           { "type": "command", "command": "bash $HOOKS_PATH/pre-tool-use.sh admin" },
-          { "type": "command", "command": "bash $HOOKS_PATH/archive-ingest-gate.sh" }
+          { "type": "command", "command": "bash $HOOKS_PATH/archive-ingest-surface-gate.sh" }
         ]
       },
       {
         "matcher": "Edit",
         "hooks": [
           { "type": "command", "command": "bash $HOOKS_PATH/pre-tool-use.sh admin" },
-          { "type": "command", "command": "bash $HOOKS_PATH/archive-ingest-gate.sh" }
+          { "type": "command", "command": "bash $HOOKS_PATH/archive-ingest-surface-gate.sh" }
         ]
       },
       {
         "matcher": "NotebookEdit",
         "hooks": [
-          { "type": "command", "command": "bash $HOOKS_PATH/archive-ingest-gate.sh" }
+          { "type": "command", "command": "bash $HOOKS_PATH/archive-ingest-surface-gate.sh" }
         ]
       },
       {
         "matcher": "Bash",
         "hooks": [
           { "type": "command", "command": "bash $HOOKS_PATH/pre-tool-use.sh admin" },
-          { "type": "command", "command": "bash $HOOKS_PATH/archive-ingest-gate.sh" }
+          { "type": "command", "command": "bash $HOOKS_PATH/archive-ingest-surface-gate.sh" }
         ]
       },
       {
         "hooks": [
-          { "type": "command", "command": "bash $HOOKS_PATH/archive-ingest-gate.sh" }
+          { "type": "command", "command": "bash $HOOKS_PATH/archive-ingest-surface-gate.sh" }
         ]
       },
       {
@@ -164,14 +173,14 @@ cat > "$ACCOUNT_SETTINGS" << SETTINGS_EOF
       {
         "matcher": "mcp__.*__.*-(export|import)-parse$",
         "hooks": [
-          { "type": "command", "command": "bash $HOOKS_PATH/archive-ingest-gate.sh" }
+          { "type": "command", "command": "bash $HOOKS_PATH/archive-ingest-surface-gate.sh" }
         ]
       }
     ],
     "UserPromptSubmit": [
       {
         "hooks": [
-          { "type": "command", "command": "bash $HOOKS_PATH/archive-ingest-gate.sh" }
+          { "type": "command", "command": "bash $HOOKS_PATH/archive-ingest-surface-gate.sh" }
         ]
       }
     ]

package/payload/platform/templates/specialists/agents/database-operator.md CHANGED Viewed

@@ -3,7 +3,7 @@ name: database-operator
 description: "Document and archive ingestion and ad-hoc graph operations — running the universal `document-ingest` skill for any unstructured document (PDF, text, transcript, web page, audio, video) and per-source archive-import skills (LinkedIn Basic Data Export today; CRM-type seed archives as each plugin ships), plus operator-driven graph hygiene (prune orphans, deduplicate entities, add edges, normalise labels). Delegate when the operator uploads any document, drops an archive directory into chat, or asks for any graph operation that is not a routine per-turn write."
 summary: "Ingests every unstructured document and external archive into your graph (LinkedIn today; other CRM sources in future) and handles ad-hoc graph tidy-ups on request. For example, when you upload a CV, a pricing guide, or a contract; when you drop a LinkedIn export folder into chat; or when you ask to prune orphan nodes, merge duplicate people, or add edges between entities."
 model: claude-sonnet-4-6
-tools: Read, Bash, Glob, Grep, mcp__graph__maxy-graph-read_neo4j_cypher, mcp__graph__maxy-graph-write_neo4j_cypher, mcp__graph__maxy-graph-get_neo4j_schema, mcp__memory__memory-write, mcp__memory__memory-update, mcp__memory__memory-delete, mcp__memory__memory-search, mcp__memory__memory-rank, mcp__memory__memory-reindex, mcp__memory__memory-find-candidates, mcp__memory__memory-ingest, mcp__memory__memory-ingest-extract, mcp__memory__memory-ingest-web, mcp__memory__memory-classify, mcp__memory__memory-archive-write, mcp__memory__whatsapp-export-parse, mcp__memory__whatsapp-export-insight-write, mcp__memory__graph-prune-denylist-list, mcp__memory__graph-prune-denylist-add, mcp__memory__graph-prune-denylist-remove, mcp__contacts__contact-create, mcp__contacts__contact-update, mcp__contacts__contact-lookup, mcp__contacts__contact-list, mcp__admin__file-attach, mcp__admin__plugin-read
+tools: Read, Bash, Glob, Grep, mcp__graph__maxy-graph-read_neo4j_cypher, mcp__graph__maxy-graph-write_neo4j_cypher, mcp__graph__maxy-graph-get_neo4j_schema, mcp__memory__memory-write, mcp__memory__memory-update, mcp__memory__memory-delete, mcp__memory__memory-search, mcp__memory__memory-rank, mcp__memory__memory-reindex, mcp__memory__memory-find-candidates, mcp__memory__memory-ingest, mcp__memory__memory-ingest-extract, mcp__memory__memory-ingest-web, mcp__memory__memory-classify, mcp__memory__memory-archive-write, mcp__memory__graph-prune-denylist-list, mcp__memory__graph-prune-denylist-add, mcp__memory__graph-prune-denylist-remove, mcp__contacts__contact-create, mcp__contacts__contact-update, mcp__contacts__contact-lookup, mcp__contacts__contact-list, mcp__admin__file-attach, mcp__admin__plugin-read
 ---
 # Database Operator
@@ -30,7 +30,17 @@ A landfill graph defeats EVIDENCE-BASED: search returns noise, the agent re-writ
 The pre-publish gate (`platform/scripts/verify-skill-tool-surface.sh`) statically asserts every shipped skill's prescribed `mcp__*` tokens resolve against your frontmatter `tools:` list, so a missing tool is a build error, not a production discovery. LOUD-FAIL is the runtime backstop when that gate is bypassed (e.g. operator-edited skill).
-**Archive-ingest gate (Task 846).** During archive imports the LOUD-FAIL contract is structurally enforced by `platform/plugins/admin/hooks/archive-ingest-gate.sh`. When any `mcp__*__*-export-parse` / `mcp__*__*-import-parse` tool returns `isError: true`, the hook records a parse-error flag — every subsequent tool call in this turn is blocked at the harness, and you must yield back to the admin agent with a user-facing message naming the parse-error. The flag clears on the operator's next prompt. The same hook denies `Edit`/`Write`/`NotebookEdit` against `platform/plugins/*/lib/*` and Bash invocations of `vitest`/`bun test`/`npm test`/`npx jest` regardless of state — the operator does not own plugin source. *Failure symptoms (now harness-blocked):* appending vitest cases to a parser test file after `whatsapp-export-parse` returned isError, running `npx vitest` to "diagnose" a parser, editing `parse-export.ts` to "fix" a malformed input. Treat these blocks as confirmation that the gate is doing its job — surface the parse-error and yield, do not retry around the block.
+**Archive-ingest surface gate (Task 855; supersedes Task 846).** Each per-source archive importer ships a single deterministic Bash entry under `platform/plugins/<name>/bin/<name>-ingest.sh`. The harness-level gate at `platform/plugins/admin/hooks/archive-ingest-surface-gate.sh` enforces the surface filter that makes the LLM mechanically incapable of deviating mid-ingest:
+- **Legacy WhatsApp MCP tools blocked unconditionally:** `mcp__memory__whatsapp-export-parse`, `mcp__memory__whatsapp-export-insight-write`, and `mcp__memory__memory-archive-write` when `archiveType=whatsapp-export`. The deterministic Bash entry (`platform/plugins/whatsapp-import/bin/whatsapp-ingest.sh`) is the only supported path; parse, archive-write, and insight all run in-process.
+- **Non-WhatsApp archiveTypes flow unchanged:** `memory-archive-write` with `archiveType=linkedin-connections` (and future per-source archiveTypes) is allowed. Each per-source plugin migrates to its own `bin/<name>-ingest.sh` on its own schedule.
+- **Plugin-source edits blocked** (preserved from Task 846): `Edit`/`Write`/`NotebookEdit` against `platform/plugins/*/lib/*` is denied. The operator does not own plugin source.
+- **JS test runners blocked** (preserved): `vitest` / `bun test` / `npm test` / `npx jest` Bash commands are denied. The operator does not run plugin tests.
+- **Post-parse-error flag** (preserved for non-migrated parsers): when any `mcp__*__*-export-parse` / `mcp__*__*-import-parse` tool returns `isError: true`, every subsequent tool call this turn is blocked until the operator submits a new prompt.
+Every PreToolUse decision emits `[archive-ingest-gate] decision=<allow|block> tool=<n> reason=<r> ...` to server.log so the full trail of one ingest is greppable alongside the `[whatsapp-ingest]` script lines.
+*Failure symptoms (now harness-blocked):* invoking `mcp__memory__whatsapp-export-parse` instead of the Bash script, calling `mcp__memory__memory-archive-write` with `archiveType:'whatsapp-export'` directly, editing `parse-export.ts` to "fix" a malformed input, running `npx vitest` to "diagnose" a parser. Treat these blocks as confirmation the gate is doing its job — invoke the script, surface its FAIL line if it fails, and yield. There is no around-the-block path.
 ---
@@ -109,7 +119,7 @@ The classifier maps document sections to typed ontology labels. It does not inve
 Per-source archive imports keep their own skill because their CSVs already encode entity types deterministically and need no LLM classifier. Currently shipped:
 - **linkedin-import** — LinkedIn Basic Data Export. Ships with references for `Profile.csv` and `Connections.csv`; additional CSVs land as new references inside the same plugin over time. Path: `platform/plugins/linkedin-import/skills/linkedin-import/SKILL.md`. Load via `plugin-read` before any ingestion.
-- **whatsapp-import** — WhatsApp `_chat.txt` export ingestion. Imports historical Conversation + Messages with chronological NEXT chain via `memory-archive-write` (archiveType=`whatsapp-export`), then derives typed insights (mentions, preferences, commitments, observed relationships) inline through existing memory tools. Distinct from the live `whatsapp` plugin (Baileys QR pairing, in-memory store). Path: `platform/plugins/whatsapp-import/skills/whatsapp-import/SKILL.md`. Load via `plugin-read` before any ingestion.
+- **whatsapp-import** — WhatsApp `_chat.txt` export ingestion. Imports historical Conversation + Messages with chronological NEXT chain plus typed insights (mentions, tasks, preferences, observed relationships) via the single deterministic Bash entry at `platform/plugins/whatsapp-import/bin/whatsapp-ingest.sh` — parse, archive-write, and Haiku insight all run in-process; no MCP envelope between steps (Task 855). The legacy `mcp__memory__whatsapp-export-parse` / `whatsapp-export-insight-write` / `memory-archive-write{archiveType:whatsapp-export}` MCP tools are blocked at the harness; the Bash script is the only supported path. Distinct from the live `whatsapp` plugin (Baileys QR pairing, in-memory store). SKILL: `platform/plugins/whatsapp-import/skills/whatsapp-import/SKILL.md`. Load via `plugin-read` before any ingestion.
 Future CRM-type seed plugins (HubSpot, Salesforce, Pipedrive, iCloud contacts, Gmail CSV, etc.) will ship under the same pattern — each as its own opt-in plugin, each with its own `SKILL.md` path under `platform/plugins/<name>/skills/`. When the admin adds a new archive-import skill, its PLUGIN.md will name itself here and in the admin's `<plugin-manifest>`. No prompt change required.

package/payload/platform/plugins/admin/hooks/__tests__/archive-ingest-gate.test.sh DELETED Viewed

@@ -1,166 +0,0 @@
-#!/usr/bin/env bash
-# Regression test for archive-ingest-gate.sh (Task 846).
-#
-# Six cases cover the contract:
-#   1. Edit on /platform/plugins/<x>/lib/* is BLOCKED (exit 2).
-#   2. Edit on a benign path is ALLOWED (exit 0).
-#   3. Bash with `npx vitest` is BLOCKED.
-#   4. PostToolUse on whatsapp-export-parse with isError:true sets the flag.
-#   5. Subsequent PreToolUse on ANY tool is BLOCKED (post-parse-error gate).
-#   6. UserPromptSubmit clears the flag, restoring normal allow behavior.
-#
-# Tests use ARCHIVE_INGEST_GATE_STATE_DIR to point at a tmp dir so they run
-# without a real account layout.
-set -u
-HOOK="$(cd "$(dirname "$0")/.." && pwd)/archive-ingest-gate.sh"
-if [[ ! -x "$HOOK" ]]; then
-  echo "FAIL: $HOOK not executable" >&2
-  exit 1
-fi
-# Per-run isolated state dir
-STATE_DIR=$(mktemp -d)
-export ARCHIVE_INGEST_GATE_STATE_DIR="$STATE_DIR"
-FLAG_FILE="$STATE_DIR/archive-ingest-parse-error.flag"
-cleanup() { rm -rf "$STATE_DIR"; }
-trap cleanup EXIT
-PASS=0
-FAIL=0
-run_case() {
-  local name="$1" stdin="$2" expected_exit="$3"
-  local actual_exit
-  printf '%s' "$stdin" | bash "$HOOK" >/dev/null 2>/dev/null
-  actual_exit=$?
-  if [[ "$actual_exit" -eq "$expected_exit" ]]; then
-    echo "PASS: $name (exit=$actual_exit)"
-    PASS=$((PASS + 1))
-  else
-    echo "FAIL: $name (expected exit=$expected_exit, got=$actual_exit)" >&2
-    FAIL=$((FAIL + 1))
-  fi
-}
-# Case 1 — Edit on plugin lib path: BLOCKED
-run_case "Edit on platform/plugins/whatsapp-import/lib/src/parse-export.ts → BLOCKED" \
-  '{"hook_event_name":"PreToolUse","tool_name":"Edit","tool_input":{"file_path":"/Users/x/repo/platform/plugins/whatsapp-import/lib/src/parse-export.ts","old_string":"a","new_string":"b"}}' \
-  2
-# Case 2 — Edit on a benign path: ALLOWED
-run_case "Edit on README.md → ALLOWED" \
-  '{"hook_event_name":"PreToolUse","tool_name":"Edit","tool_input":{"file_path":"/Users/x/repo/README.md","old_string":"a","new_string":"b"}}' \
-  0
-# Case 3 — Bash with `npx vitest`: BLOCKED
-run_case "Bash 'npx vitest run parse-export.test.ts' → BLOCKED" \
-  '{"hook_event_name":"PreToolUse","tool_name":"Bash","tool_input":{"command":"npx vitest run parse-export.test.ts"}}' \
-  2
-# Case 3b — Bash with benign command: ALLOWED
-run_case "Bash 'ls -la' → ALLOWED" \
-  '{"hook_event_name":"PreToolUse","tool_name":"Bash","tool_input":{"command":"ls -la"}}' \
-  0
-# Case 3c — Bash with `bun test`: BLOCKED
-run_case "Bash 'bun test' → BLOCKED" \
-  '{"hook_event_name":"PreToolUse","tool_name":"Bash","tool_input":{"command":"bun test"}}' \
-  2
-# Case 3d — Bash with `npm test`: BLOCKED
-run_case "Bash 'npm test' → BLOCKED" \
-  '{"hook_event_name":"PreToolUse","tool_name":"Bash","tool_input":{"command":"npm test"}}' \
-  2
-# Make sure flag is absent before parse-error simulation
-rm -f "$FLAG_FILE"
-# Case 4 — PostToolUse on whatsapp-export-parse with isError:true sets flag
-run_case "PostToolUse parse-error sets flag (exit 0, flag side-effect)" \
-  '{"hook_event_name":"PostToolUse","tool_name":"mcp__memory__whatsapp-export-parse","tool_input":{"filePath":"_chat.txt"},"tool_response":{"isError":true,"content":[{"type":"text","text":"parse-error file=_chat.txt line=1 reason=not-a-_chat.txt"}]}}' \
-  0
-if [[ -f "$FLAG_FILE" ]]; then
-  echo "PASS: parse-error flag created at $FLAG_FILE"
-  PASS=$((PASS + 1))
-else
-  echo "FAIL: parse-error flag NOT created at $FLAG_FILE" >&2
-  FAIL=$((FAIL + 1))
-fi
-# Case 5 — Subsequent PreToolUse on ANY tool BLOCKED while flag is fresh
-run_case "PreToolUse Read after parse-error → BLOCKED" \
-  '{"hook_event_name":"PreToolUse","tool_name":"Read","tool_input":{"file_path":"/tmp/foo"}}' \
-  2
-run_case "PreToolUse Bash after parse-error → BLOCKED" \
-  '{"hook_event_name":"PreToolUse","tool_name":"Bash","tool_input":{"command":"echo hi"}}' \
-  2
-# Case 6 — UserPromptSubmit clears flag
-run_case "UserPromptSubmit clears flag (exit 0)" \
-  '{"hook_event_name":"UserPromptSubmit","prompt":"retry"}' \
-  0
-if [[ ! -f "$FLAG_FILE" ]]; then
-  echo "PASS: UserPromptSubmit cleared flag"
-  PASS=$((PASS + 1))
-else
-  echo "FAIL: UserPromptSubmit did NOT clear flag" >&2
-  FAIL=$((FAIL + 1))
-fi
-# Case 7 — After clearance, normal allow resumes
-run_case "PreToolUse Read after clearance → ALLOWED" \
-  '{"hook_event_name":"PreToolUse","tool_name":"Read","tool_input":{"file_path":"/tmp/foo"}}' \
-  0
-# Case 8 — PostToolUse with isError:false does NOT set flag
-rm -f "$FLAG_FILE"
-run_case "PostToolUse parse-success (isError:false) does NOT set flag" \
-  '{"hook_event_name":"PostToolUse","tool_name":"mcp__memory__whatsapp-export-parse","tool_input":{"filePath":"_chat.txt"},"tool_response":{"isError":false,"content":[{"type":"text","text":"{\"parsedLines\":[]}"}]}}' \
-  0
-if [[ ! -f "$FLAG_FILE" ]]; then
-  echo "PASS: parse-success leaves flag absent"
-  PASS=$((PASS + 1))
-else
-  echo "FAIL: parse-success incorrectly created flag" >&2
-  FAIL=$((FAIL + 1))
-fi
-# Case 9 — Stale flag (>600s) auto-clears + allows
-PAST=$(( $(date -u +%s) - 700 ))
-echo "$PAST" > "$FLAG_FILE"
-run_case "Stale flag auto-clears, PreToolUse Read → ALLOWED" \
-  '{"hook_event_name":"PreToolUse","tool_name":"Read","tool_input":{"file_path":"/tmp/foo"}}' \
-  0
-# Case 10 — No stdin (terminal) fails closed
-echo "Probing fail-closed behaviour (no stdin)..."
-bash "$HOOK" </dev/null >/dev/null 2>/dev/null
-ACTUAL=$?
-# /dev/null IS a stdin — the `[ -t 0 ]` check tests for terminal, not file.
-# A file/pipe stdin reads as empty, which produces empty hook_event_name and
-# falls through to default `exit 0` (allow). The terminal-only fail-closed
-# branch can't be tested non-interactively; verify the script reads `[ -t 0 ]`.
-if grep -q '\[ -t 0 \]' "$HOOK"; then
-  echo "PASS: fail-closed terminal check is present"
-  PASS=$((PASS + 1))
-else
-  echo "FAIL: fail-closed terminal check missing" >&2
-  FAIL=$((FAIL + 1))
-fi
-echo
-echo "──────── archive-ingest-gate test summary ────────"
-echo "PASS: $PASS"
-echo "FAIL: $FAIL"
-if [[ "$FAIL" -gt 0 ]]; then
-  exit 1
-fi
-exit 0