@rubytech/create-maxy 1.0.800 → 1.0.801

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,102 @@
1
+ #!/usr/bin/env bash
2
+ # =============================================================================
3
+ # whatsapp-ingest.sh — single deterministic Bash entry for WhatsApp archive
4
+ # ingestion (Task 855). Thin wrapper: arg-validate, resolve env, invoke
5
+ # ingest.mjs in-process. The script is the database-operator subagent's only
6
+ # operator-facing handle on the parse → archive-write → insight pipeline.
7
+ #
8
+ # Usage:
9
+ # bash whatsapp-ingest.sh <archive.zip|dir|_chat.txt>
10
+ # --owner-element-id <id>
11
+ # --scope <admin|public>
12
+ # [--account-id <accountId>]
13
+ # [--timezone <iana-zone>]
14
+ # [--date-format <DD/MM/YY|MM/DD/YY|DD/MM/YYYY|MM/DD/YYYY>]
15
+ # [--no-insight]
16
+ #
17
+ # Exit 0 + JSON summary on stdout on success.
18
+ # Exit !0 + one [whatsapp-ingest] FAIL line on stderr on failure.
19
+ # =============================================================================
20
+
21
+ set -euo pipefail
22
+
23
+ # Pre-flight arg check — emit the same single-line FAIL shape ingest.mjs uses
24
+ # so the operator's grep pattern catches both layers uniformly.
25
+ arg_fail() {
26
+ local reason="$1"
27
+ echo "[whatsapp-ingest] FAIL phase=argv reason=\"${reason}\"" >&2
28
+ exit 1
29
+ }
30
+
31
+ # Locate platform root from this script's location:
32
+ # platform/plugins/whatsapp-import/bin/ → ../../.. = platform/
33
+ SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
34
+ PLATFORM_ROOT="$(cd "$SCRIPT_DIR/../../.." && pwd)"
35
+ INGEST_MJS="$SCRIPT_DIR/ingest.mjs"
36
+
37
+ if [ ! -f "$INGEST_MJS" ]; then
38
+ arg_fail "ingest.mjs not found at $INGEST_MJS — run from a built install"
39
+ fi
40
+
41
+ # Validate required flags up-front — gives a cleaner pre-flight error than
42
+ # the node-side throw and keeps the failure surface single-line. argv
43
+ # validation runs before env resolution so a wrong invocation fails on the
44
+ # wrong invocation, not on a missing password.
45
+ ARCHIVE=""
46
+ OWNER_VAL=""
47
+ SCOPE_VAL=""
48
+ HAS_OWNER=0
49
+ HAS_SCOPE=0
50
+
51
+ ARGS=("$@")
52
+ i=0
53
+ while [ $i -lt ${#ARGS[@]} ]; do
54
+ a="${ARGS[$i]}"
55
+ case "$a" in
56
+ --owner-element-id) HAS_OWNER=1; OWNER_VAL="${ARGS[$((i + 1))]:-}"; i=$((i + 2)); continue ;;
57
+ --scope) HAS_SCOPE=1; SCOPE_VAL="${ARGS[$((i + 1))]:-}"; i=$((i + 2)); continue ;;
58
+ --account-id|--timezone|--date-format) i=$((i + 2)); continue ;;
59
+ --no-insight) i=$((i + 1)); continue ;;
60
+ --*) i=$((i + 2)); continue ;;
61
+ *)
62
+ if [ -z "$ARCHIVE" ]; then ARCHIVE="$a"; fi
63
+ i=$((i + 1))
64
+ continue
65
+ ;;
66
+ esac
67
+ done
68
+
69
+ [ -n "$ARCHIVE" ] || arg_fail "missing positional <archive>"
70
+ [ "$HAS_OWNER" -eq 1 ] && [ -n "$OWNER_VAL" ] || arg_fail "missing --owner-element-id (or empty value)"
71
+ [ "$HAS_SCOPE" -eq 1 ] && [ -n "$SCOPE_VAL" ] || arg_fail "missing --scope (or empty value)"
72
+ case "$SCOPE_VAL" in
73
+ admin|public) : ;;
74
+ *) arg_fail "invalid --scope \"$SCOPE_VAL\" (admin|public)" ;;
75
+ esac
76
+
77
+ # Lift NEO4J_PASSWORD from the install's config file when env doesn't carry it
78
+ # (e.g. operator running directly from a shell rather than via the platform
79
+ # server). Mirror seed-neo4j.sh precedence: env var wins, then the file.
80
+ if [ -z "${NEO4J_PASSWORD:-}" ]; then
81
+ NEO4J_PASSWORD_FILE="$PLATFORM_ROOT/config/.neo4j-password"
82
+ if [ -f "$NEO4J_PASSWORD_FILE" ]; then
83
+ NEO4J_PASSWORD="$(cat "$NEO4J_PASSWORD_FILE")"
84
+ export NEO4J_PASSWORD
85
+ else
86
+ arg_fail "NEO4J_PASSWORD not in env and $NEO4J_PASSWORD_FILE not found"
87
+ fi
88
+ fi
89
+
90
+ # NEO4J_URI is hard-required (Task 787). The wrapper does not fabricate a
91
+ # default — the platform server sets this at spawn time and the operator can
92
+ # pass it explicitly when running standalone.
93
+ if [ -z "${NEO4J_URI:-}" ]; then
94
+ arg_fail "NEO4J_URI not set (no default — set in env)"
95
+ fi
96
+
97
+ export NEO4J_USER="${NEO4J_USER:-neo4j}"
98
+ export MAXY_PLATFORM_ROOT="$PLATFORM_ROOT"
99
+
100
+ # Hand off to node. ingest.mjs handles all observability; the wrapper does
101
+ # not double-log. Forward exit code verbatim.
102
+ exec node "$INGEST_MJS" "$@"
@@ -1,122 +1,74 @@
1
1
  ---
2
2
  name: whatsapp-import
3
- description: Import a WhatsApp `_chat.txt` export into a {{productName}} Neo4j graph as a Conversation with chronologically-chained Messages, then derive typed insights (mentions, preferences, commitments, observed relationships) as first-class graph entities. Triggers when the user asks to import a WhatsApp chat, ingest a `_chat.txt` file, or drops the contents of an "Export Chat" folder into chat. Distinct from the live `whatsapp` plugin (Baileys); this is import-from-export only.
3
+ description: Import a WhatsApp `_chat.txt` export into a {{productName}} Neo4j graph as a Conversation with chronologically-chained Messages, then derive typed insights (mentions, tasks, preferences, observed relationships) as first-class graph entities. Triggers when the user asks to import a WhatsApp chat, ingest a `_chat.txt` file, or drops the contents of an "Export Chat" folder into chat. Distinct from the live `whatsapp` plugin (Baileys); this is import-from-export only.
4
4
  ---
5
5
 
6
6
  # WhatsApp Import
7
7
 
8
- Ingests a WhatsApp "Export Chat" archive — `_chat.txt` plus media attachments — into a {{productName}} Neo4j graph. Two passes:
8
+ Ingests a WhatsApp "Export Chat" archive — `_chat.txt` plus media attachments — into a {{productName}} Neo4j graph in one in-process pipeline: parse → archive-write → insight (Haiku via OAuth). The deterministic ingest is delivered as a single Bash entry; the database-operator subagent invokes it once and yields with the operator-facing summary.
9
9
 
10
- 1. **Deterministic ingest** Conversation + Messages + chronology + sender edges, written via the fixed Cypher inside `memory-archive-write`.
11
- 2. **Insight extraction** — analysis-derived nodes and edges (mentions, topics, preferences, commitments, observed relationships) written via existing `memory-write` / `memory-update` tools after pass 1 completes.
10
+ ## Owner confirmation (mandatory first step)
12
11
 
13
- Every node and edge carries `source='whatsapp'`, `createdByAgent='whatsapp-import'`, `createdBySession=<this-skill-run-uuid>`, and `archiveSourceFile=<sha256-prefix>` so the operator can grep this ingest's footprint at any time.
12
+ A WhatsApp export belongs to exactly one operator (the person whose phone produced the export). The owner is metadata stamped on the `:Conversation` node — the row-level participants are auto-created by the script and promoted in a later semantic-enrichment pass.
14
13
 
15
- ## Owner + participant confirmation (mandatory first step)
14
+ 1. List every `:AdminUser` in the graph via `mcp__graph__maxy-graph-read_neo4j_cypher`:
15
+ `MATCH (u:AdminUser) RETURN elementId(u) AS elementId, u.name AS name, u.userId AS userId, u.accountId AS accountId`
16
+ 2. Ask the operator: "Who exported this `_chat.txt`?" — accept either an existing `:AdminUser` elementId or, if the operator names someone not in the graph, surface that as a blocker (auto-creating an unknown owner is refused).
17
+ 3. Echo the chosen owner back verbatim. Require explicit yes/no confirmation.
18
+ 4. Persist the resolved owner's `elementId` for the script invocation as `--owner-element-id`.
16
19
 
17
- A WhatsApp export belongs to exactly one operator (the person whose phone produced the export) and contains messages from a known set of senders. Both must be confirmed before any line is written. The flow:
20
+ ## Invoke the ingest
18
21
 
19
- ### Step 1 — Owner
20
-
21
- The owner is metadata: who exported this chat. Stamped on the `:Conversation` node as `createdBySession` provenance. The owner is **not** a row-level subject — every message has its own sender.
22
-
23
- 1. List every `:AdminUser` in the graph: `{userId, name, accountId(s)}`.
24
- 2. Ask the operator: "Who exported this `_chat.txt`?" Accept either an existing `:AdminUser` userId or a new external `:Person` (with `givenName`+`familyName`+ at least one of `email`/`telephone`).
25
- 3. Echo the chosen owner back verbatim. Require explicit yes/no confirmation before proceeding.
26
- 4. Persist the resolved owner's `elementId` as `$ownerNodeId`.
27
-
28
- ### Step 2 — Participants
29
-
30
- Parse the `_chat.txt` by invoking `mcp__memory__whatsapp-export-parse(filePath: <path>, timezone: <iana-zone>)` (Task 805). The tool returns `{conversationId, archiveSourceFile, parsedLines[], counters}` — the deterministic Node parser in [platform/plugins/whatsapp-import/lib/](../../../lib/) walks the line grammar; the agent does not tokenise lines itself. See [export-parse.md](references/export-parse.md) for the parser's behaviour reference.
31
-
32
- For each distinct sender name in `parsedLines[]`, capture: `{senderName, firstSeen, lastSeen, messageCount}`. Display the list in chat with these counts; the operator sees who they're about to ingest before any write.
33
-
34
- For each distinct sender, ask the operator to choose:
35
-
36
- - **Existing `:AdminUser`** — typically themselves (when their own messages are in the export). Resolve via `memory-search` by `userId` or `name`. Persist the elementId.
37
- - **Existing `:Person`** — match by `givenName`+`familyName`, `email`, or `telephone`. Use `memory-search` to find candidates; if multiple match, surface them and require operator pick. Persist the elementId.
38
- - **New external `:Person`** — mint via `memory-write` with `givenName`+`familyName`+ at least one of `email`/`telephone`. Provenance: `source='whatsapp'`, `createdByAgent='whatsapp-import'`, `createdBySession=$sessionId`. Capture the resulting elementId.
39
- - **Skip** — exclude this sender's messages from the import. Operator may pick this for noisy auto-replies, bots, etc.
40
-
41
- **Refusing to invent identity is load-bearing.** The skill never silently mints a `:Person` from a WhatsApp display name alone (which is often just a phone number or "Mum"). A new `:Person` requires confirmation of `givenName`+`familyName`+ contact information. This is the first contract `feedback_archives_are_not_documents.md` enforces.
42
-
43
- ### Step 3 — Persist the participant map
44
-
45
- Build `$participantNodeIds = {senderName → senderElementId}`. Echo back to operator one final time (`Confirm: 5 senders, 4 :AdminUser/:Person, 1 skipped — proceed?`). On yes, the participant map flows into every row of the `memory-archive-write` call.
46
-
47
- ### Step 4 — Same-person, multiple display-names heuristic
48
-
49
- WhatsApp displays a sender by their phone-saved name when known, by phone number otherwise. If the operator's contact list changed mid-conversation, the same person may appear under two distinct senderNames (`+44 7...` and `Joel Smalley`). Detect this heuristically: surface any senderName that is digit-only (a phone number) and ask `Is "+44 7..." the same person as "Joel Smalley"?`. On yes, both senderNames map to the same elementId. On no, keep them distinct.
50
-
51
- ## Selective-ingest threshold (bulk archives)
52
-
53
- WhatsApp 1:1 chats commonly contain 1,000–10,000 messages; group chats 10,000+. Writing all of them in one shot defeats compression-on-write and produces a landfill graph. The skill compresses by interrogating the operator before the bulk write.
54
-
55
- **Threshold:** when the parsed `rows[]` count exceeds **100 messages**, pause and ask the operator to filter along the natural axes:
56
-
57
- - **Date range** — "messages between 2026-01-01 and 2026-04-01"
58
- - **Sender** — "only messages from Joel and Sarah"
59
- - **Keyword** — body contains "Q3 report" / "office hours" / etc.
60
- - **All** — accept the full archive (rare; only for small chats or when the operator explicitly wants every message)
61
-
62
- Apply the chosen filter to `rows[]` before invoking `memory-archive-write`. Compress on write, never after — a 5,000-message blanket import is noise; a 200-message filtered import is signal.
63
-
64
- When the threshold trips, emit one log line BEFORE the prompt:
22
+ Single Bash call:
65
23
 
24
+ ```bash
25
+ bash platform/plugins/whatsapp-import/bin/whatsapp-ingest.sh <archive.zip|dir|_chat.txt> \
26
+ --owner-element-id <id> \
27
+ --scope <admin|public>
66
28
  ```
67
- [whatsapp-import] selective-ingest-gate count=<n> threshold=100 axes=date,sender,keyword
68
- ```
69
-
70
- ## Stable IDs
71
-
72
- - `conversationId = whatsapp-export:<sha256(_chat.txt bytes)>:<accountId>` — same archive, same operator account → idempotent re-import. Different archive (even for the same conversation) → different conversationId.
73
- - `messageId = whatsapp-export:<conversationId>:<lineHash>` where `lineHash = sha256(<original-line-text>)`. Re-imports of the same archive are zero-write idempotent; re-exports with appended messages add the delta cleanly.
74
29
 
75
- ## Timezone
30
+ Optional flags:
31
+ - `--account-id <id>` — explicit account id when more than one exists under `data/accounts/` (Phase 0 has one).
32
+ - `--timezone <iana>` — IANA zone for timestamps (default `Europe/London`).
33
+ - `--date-format <DD/MM/YY|MM/DD/YY|DD/MM/YYYY|MM/DD/YYYY>` — override auto-detect for ambiguous locales.
34
+ - `--no-insight` — skip the Haiku insight pass (parse + archive-write only).
76
35
 
77
- WhatsApp's `[DD/MM/YY, HH:MM:SS]` line prefix lacks a timezone offset. The skill **must not silently assume UTC.** When the timezone is non-obvious (the operator hasn't said where they were when the messages were sent), ask:
36
+ The script:
37
+ - Unzips the archive if needed; locates `_chat.txt`.
38
+ - Parses the file deterministically (year shape, sender/body grammar, timezone offset).
39
+ - Auto-creates one `:Person {participantStatus:'auto-created'}` per distinct senderName, scoped to the account, MERGEd on `(accountId, source, name)`.
40
+ - Writes the Conversation + Messages + edges + NEXT chronology via `memoryArchiveWrite` directly (no MCP envelope between steps).
41
+ - Runs the insight pass: chunked Haiku calls (1500 messages per chunk), in-process Cypher writes of `:Observation` nodes connected `:OBSERVED_IN`→`:Conversation`. Insight failures log and proceed; archive-write success is what determines exit code.
78
42
 
79
- > The export uses `[DD/MM/YY, HH:MM:SS]` but doesn't include a timezone. Which timezone should I tag these messages with? (e.g., Europe/London, America/New_York, UTC)
43
+ ## Outcome
80
44
 
81
- Convert each parsed timestamp to ISO 8601 with the supplied offset before passing to `memory-archive-write`. The Cypher's `datetime()` then preserves the exact instant.
45
+ - **Exit 0** + JSON summary on stdout:
46
+ ```json
47
+ {"conversationId": "whatsapp-export:<sha>:<accountId>",
48
+ "parsed": 1707, "mediaSkipped": 0, "systemSkipped": 0,
49
+ "createdMessages": 1707,
50
+ "insightCounters": {"chunks": 2, "mentions": 12, "tasks": 3, "preferences": 1, "observedRelationships": 0},
51
+ "ms": 67000}
52
+ ```
53
+ Surface this to the operator as one chat message: `Imported 1707 messages from <archive> into conversation <conversationId>; insights: 12 mentions, 3 tasks, 1 preference, 0 relationships.`
82
54
 
83
- ## Execution model
55
+ - **Exit non-zero** + one stderr line: `[whatsapp-ingest] FAIL phase=<parse|archive-write|argv|import|uncaught> reason="<sanitised first 80c>" ...`. Surface this verbatim to the operator and yield. **Do not retry. Do not edit parser source.** The archive-ingest-surface-gate denies parser-source edits, JS test runners, and the legacy `whatsapp-export-parse` / `whatsapp-export-insight-write` / `memory-archive-write{archiveType:whatsapp-export}` MCP tools — none of those are escape hatches in your surface.
84
56
 
85
- 1. **Parse** — Invoke `mcp__memory__whatsapp-export-parse(filePath, timezone, dateFormat?)`. The deterministic parser walks the line grammar, returns `{conversationId, archiveSourceFile, parsedLines[], counters}`. LOUD-FAIL on encoding error / empty file / malformed timestamp surfaces as the tool's `isError` content; the skill aborts the import without further work. The `archiveSourceFile` is `whatsapp-export:<sha256-of-file-bytes>` — keep this exact value; `memory-archive-write` will recompute and assert it matches in Step 6.
86
- 2. **Owner+participant confirmation** — Steps 1–3 above. Persist `$ownerNodeId` + `$participantNodeIds`.
87
- 3. **Selective-ingest gate** — If `parsedLines.length > 100`, pause for filter selection. Apply filter.
88
- 4. **Build rows[]** — Map each parsed line to `{messageId, conversationId, senderNodeId, senderName, dateSent (ISO 8601), body, sequenceIndex}`. Compute `messageId` per line.
89
- 5. **Build conversation block** — `{conversationId, archiveSourceFile, firstMessageAt, lastMessageAt, participantCount, messageCount}` from the rows[]. `conversationId` and `archiveSourceFile` come straight from the parser's return value.
90
- 6. **Dispatch** `mcp__memory__memory-archive-write` once with `archiveType='whatsapp-export'`, `ownerNodeId`, `conversation`, `participantNodeIds` (the distinct elementIds from the map), `rows`, `sessionId`, **and `archiveFilePath: <same path you passed to whatsapp-export-parse>`**. The server re-computes `sha256(file)` and asserts it matches `conversation.archiveSourceFile` before any write — mismatch is a hard reject (Task 805 silent-substitution gate). The tool MERGEs the Conversation, MERGEs Messages, links PART_OF + SENT + PARTICIPANT_IN edges per row, and runs the `finalize` hook to MERGE the NEXT chronology by dateSent ordering.
91
- 7. **Emit per-export log line:**
92
- ```
93
- [whatsapp-import] file=<chat.txt> conversationId=<cid> participants=<n> messages-parsed=<n> media-skipped=<n> system-skipped=<n> ms=<elapsed>
94
- ```
95
- 8. **Insight pass** — Run pass 2 per [insight-extraction.md](references/insight-extraction.md). Read the just-written messages via `memory-search`, classify within the specialist's own LLM turn, and write typed observations through `memory-write` / `memory-update`. **`:MENTIONS` and `:RELATED_TO` edges route through `mcp__memory__whatsapp-export-insight-write` (Task 805) — that tool re-runs `memory-search` server-side and asserts the agent's claimed candidate elementIds appear in the live result; rejects single-first-name names without `disambiguatorOk=true`; refuses `:RELATED_TO` writes without `operatorConfirmed=true`. The agent never authors `:MENTIONS` / `:RELATED_TO` Cypher directly.** Emit:
96
- ```
97
- [whatsapp-import] insight-pass model=sonnet chunks=<n> mentions=<n> preferences=<n> tasks=<n> observed-relationships=<n> novel-insights=<n> ms=<elapsed>
98
- ```
57
+ ## Idempotency
99
58
 
100
- ## Doctrine raw Cypher and `cypher-shell` are forbidden in this skill
59
+ Re-running the script against the same archive is a no-op: `createdMessages: 0`, NEXT chain unchanged, conversation scalars refreshed via `lastImportedAt` / `lastImportedBySession`. Re-exports with appended messages add only the delta and extend the NEXT chain to cover the new tail.
101
60
 
102
- All writes route through `mcp__memory__memory-archive-write` (bulk Conversation+Messages) or `mcp__memory__memory-write` / `mcp__memory__memory-update` (second-pass typed observations). The agent never authors Cypher. If the operator hits a write shape these tools do not express, **do not improvise** — surface the gap as a structured task per the database-operator's LOUD-FAIL prerogative. See [database-operator.md](../../../../templates/specialists/agents/database-operator.md#prerogatives).
103
-
104
- ## LOUD-FAIL on parse errors
105
-
106
- `mcp__memory__whatsapp-export-parse` is the LOUD-FAIL surface (Task 805). When the grammar can't classify a line, the tool throws with `parse-error file=<...> line=<n> reason=<r>` and the MCP layer returns `isError: true` with that message. The skill MUST abort the import on a parse-error response — do not retry, do not "best effort" the rest of the file. The operator gets a named error and re-exports if necessary.
107
-
108
- The deterministic parser also LOUD-FAILs on UTF-8 decode failure (`reason=encoding-error`), zero parsed lines (`reason=not-a-_chat.txt`), and missing required arguments (`reason=accountId|timezone`). All of these surface through the same tool error path; the agent does not need to detect them itself.
109
-
110
- **Structurally enforced (Task 846).** The harness-level `platform/plugins/admin/hooks/archive-ingest-gate.sh` watches every `mcp__memory__whatsapp-export-parse` PostToolUse event; on `isError: true` it sets a parse-error flag and blocks every subsequent tool call this turn until the operator submits the next prompt. Editing parser source under `platform/plugins/*/lib/*` and running `vitest`/`bun test`/`npm test`/`npx jest` is denied unconditionally. The skill's behavioural rule above is the contract; the hook is the enforcement. See [database-operator's LOUD-FAIL prerogative](../../../../templates/specialists/agents/database-operator.md#prerogatives) and [.docs/hooks.md](../../../../../.docs/hooks.md) for the gate's full surface.
61
+ ## Verification (post-write)
111
62
 
112
- ## Idempotency contract
63
+ Run via `mcp__graph__maxy-graph-read_neo4j_cypher`:
113
64
 
114
- Re-importing the same `_chat.txt` is a no-op (`createdMessages=0`, `mergedMessages=N`, NEXT chain unchanged). Re-importing a re-exported file with appended messages adds only the delta and extends the NEXT chain to cover the new tail. Both paths are server-enforced via MERGE on `messageId` and the finalize hook's idempotent NEXT-MERGE.
65
+ - `MATCH (c:Conversation:WhatsAppConversation {conversationId: $cid}) RETURN c.messageCount, c.participantCount, c.firstMessageAt, c.lastMessageAt` agrees with the JSON summary.
66
+ - `MATCH (m:Message)-[:PART_OF]->(c {conversationId: $cid}) RETURN count(m)` — equals `parsed`.
67
+ - `MATCH p=(:Message {conversationId: $cid})-[:NEXT*]->() WITH max(length(p)) AS chain RETURN chain` — equals `createdMessages - 1` (or the Conversation's `messageCount - 1`).
68
+ - `MATCH (o:Observation {createdBySession: $sessionId})-[:OBSERVED_IN]->(:Conversation {conversationId: $cid}) RETURN o.kind, count(*)` — agrees with `insightCounters`.
115
69
 
116
- ## Verification (post-write)
70
+ ## What this is not
117
71
 
118
- - `MATCH (c:Conversation:WhatsAppConversation {conversationId: $cid}) RETURN c.messageCount, c.participantCount, c.firstMessageAt, c.lastMessageAt` agrees with the per-export log line counts.
119
- - `MATCH (m:Message:WhatsAppMessage)-[:PART_OF]->(c {conversationId: $cid}) RETURN count(m)` equals post-filter line count.
120
- - `MATCH p=(m:Message {conversationId: $cid})-[:NEXT*]->(end) WITH max(length(p)) AS chain RETURN chain` — equals `messageCount - 1`.
121
- - `MATCH (m:Message {conversationId: $cid}) RETURN min(m.dateSent), max(m.dateSent)` — matches the file's first/last lines (modulo the operator-confirmed timezone).
122
- - `MATCH (n) WHERE n.createdBySession = $sessionId RETURN labels(n) AS l, count(*) ORDER BY count(*) DESC` — the full graph footprint of this ingest, sortable by label.
72
+ - **Not** the live `whatsapp` plugin. That plugin (Baileys QR pairing) holds messages in an in-memory store cleared on restart. This plugin imports historical exports into Neo4j as persistent graph nodes.
73
+ - **Not** a media-transcription pipeline. Voice notes, photos, PDFs are skipped at parse with a counter logged.
74
+ - **Not** the operator-level semantic enrichment pass. The auto-created participants and `:Observation` nodes are deliberately raw disambiguation, edge wiring to specific `:Person` / `:Task` / `:Service` entities, and operator confirmation of `:RELATED_TO`-style relationships are the responsibility of a later pass (Task 859).
@@ -1,6 +1,6 @@
1
1
  # Reference: `_chat.txt` parsing — implementation reference
2
2
 
3
- > **Task 805 — this is no longer operator instruction.** The agent does NOT walk this grammar in its own LLM turn. Parsing runs deterministically in [`platform/plugins/whatsapp-import/lib/src/parse-export.ts`](../../../lib/src/parse-export.ts), invoked via `mcp__memory__whatsapp-export-parse`. The vitest grid in [`lib/src/__tests__/parse-export.test.ts`](../../../lib/src/__tests__/parse-export.test.ts) is the executable contract; this prose is the human-readable companion. Extend the grammar by adding a failing test first.
3
+ > **Task 855 — this is no longer operator instruction.** The agent does NOT walk this grammar in its own LLM turn. Parsing runs deterministically in [`platform/plugins/whatsapp-import/lib/src/parse-export.ts`](../../../lib/src/parse-export.ts), invoked in-process by [`bin/ingest.mjs`](../../../bin/ingest.mjs) (which the operator calls via [`bin/whatsapp-ingest.sh`](../../../bin/whatsapp-ingest.sh) — the single deterministic Bash entry). The legacy MCP wrapper is blocked at the harness gate. The vitest grid in [`lib/src/__tests__/parse-export.test.ts`](../../../lib/src/__tests__/parse-export.test.ts) is the executable contract; this prose is the human-readable companion. Extend the grammar by adding a failing test first.
4
4
 
5
5
  WhatsApp's "Export Chat" produces a UTF-8 text file with a deterministic line grammar. This reference describes what the parser library does when it converts that file into the `{senderName, dateSent, body, sequenceIndex}[]` structure the SKILL.md consumes.
6
6
 
@@ -98,15 +98,24 @@ fi
98
98
  # webfetch-preflight.mjs: detects JS-SPA shells before WebFetch's
99
99
  # 60s extraction timeout (Task 536). Fail-open on any error;
100
100
  # on positive SPA detection exits 2 with WEBFETCH_CANNOT_READ_JS_SPA.
101
- # archive-ingest-gate.sh (Task 846): blocks plugin-source edits, JS
102
- # test runners, and any tool call after a *-export-parse /
103
- # *-import-parse tool returns isError=true (cleared on next user
104
- # prompt). Wired at three points: explicit Edit/Write/NotebookEdit/
105
- # Bash matchers (path + test-runner enforcement), a no-matcher
106
- # PreToolUse entry (post-parse-error gate fires on every tool),
101
+ # archive-ingest-surface-gate.sh (Task 855; supersedes Task 846):
102
+ # narrows the database-operator subagent's effective surface
103
+ # during WhatsApp archive ingestion to exactly one Bash entry
104
+ # (whatsapp-import/bin/whatsapp-ingest.sh) plus read-only
105
+ # neighbours, by blocking the legacy MCP deviation tools
106
+ # (mcp__memory__whatsapp-export-parse,
107
+ # mcp__memory__whatsapp-export-insight-write, and
108
+ # mcp__memory__memory-archive-write when archiveType is
109
+ # whatsapp-export). Preserves Task 846's plugin-source edit,
110
+ # JS test-runner, and post-parse-error blocks (LinkedIn and
111
+ # future per-source archive parsers still use the legacy MCP
112
+ # path until they migrate to deterministic Bash entries).
113
+ # Wired at three points: explicit Edit/Write/NotebookEdit/Bash
114
+ # matchers, a no-matcher PreToolUse entry (catches the legacy
115
+ # MCP tools and post-parse-error gate fires on every tool),
107
116
  # and a PostToolUse regex matcher for parse tools.
108
- # UserPromptSubmit — archive-ingest-gate.sh clears the parse-error
109
- # flag when the operator's next prompt arrives.
117
+ # UserPromptSubmit — archive-ingest-surface-gate.sh clears the
118
+ # parse-error flag when the operator's next prompt arrives.
110
119
  HOOKS_PATH="\$PLATFORM_ROOT/plugins/admin/hooks"
111
120
  cat > "$ACCOUNT_SETTINGS" << SETTINGS_EOF
112
121
  {
@@ -119,32 +128,32 @@ cat > "$ACCOUNT_SETTINGS" << SETTINGS_EOF
119
128
  "matcher": "Write",
120
129
  "hooks": [
121
130
  { "type": "command", "command": "bash $HOOKS_PATH/pre-tool-use.sh admin" },
122
- { "type": "command", "command": "bash $HOOKS_PATH/archive-ingest-gate.sh" }
131
+ { "type": "command", "command": "bash $HOOKS_PATH/archive-ingest-surface-gate.sh" }
123
132
  ]
124
133
  },
125
134
  {
126
135
  "matcher": "Edit",
127
136
  "hooks": [
128
137
  { "type": "command", "command": "bash $HOOKS_PATH/pre-tool-use.sh admin" },
129
- { "type": "command", "command": "bash $HOOKS_PATH/archive-ingest-gate.sh" }
138
+ { "type": "command", "command": "bash $HOOKS_PATH/archive-ingest-surface-gate.sh" }
130
139
  ]
131
140
  },
132
141
  {
133
142
  "matcher": "NotebookEdit",
134
143
  "hooks": [
135
- { "type": "command", "command": "bash $HOOKS_PATH/archive-ingest-gate.sh" }
144
+ { "type": "command", "command": "bash $HOOKS_PATH/archive-ingest-surface-gate.sh" }
136
145
  ]
137
146
  },
138
147
  {
139
148
  "matcher": "Bash",
140
149
  "hooks": [
141
150
  { "type": "command", "command": "bash $HOOKS_PATH/pre-tool-use.sh admin" },
142
- { "type": "command", "command": "bash $HOOKS_PATH/archive-ingest-gate.sh" }
151
+ { "type": "command", "command": "bash $HOOKS_PATH/archive-ingest-surface-gate.sh" }
143
152
  ]
144
153
  },
145
154
  {
146
155
  "hooks": [
147
- { "type": "command", "command": "bash $HOOKS_PATH/archive-ingest-gate.sh" }
156
+ { "type": "command", "command": "bash $HOOKS_PATH/archive-ingest-surface-gate.sh" }
148
157
  ]
149
158
  },
150
159
  {
@@ -164,14 +173,14 @@ cat > "$ACCOUNT_SETTINGS" << SETTINGS_EOF
164
173
  {
165
174
  "matcher": "mcp__.*__.*-(export|import)-parse$",
166
175
  "hooks": [
167
- { "type": "command", "command": "bash $HOOKS_PATH/archive-ingest-gate.sh" }
176
+ { "type": "command", "command": "bash $HOOKS_PATH/archive-ingest-surface-gate.sh" }
168
177
  ]
169
178
  }
170
179
  ],
171
180
  "UserPromptSubmit": [
172
181
  {
173
182
  "hooks": [
174
- { "type": "command", "command": "bash $HOOKS_PATH/archive-ingest-gate.sh" }
183
+ { "type": "command", "command": "bash $HOOKS_PATH/archive-ingest-surface-gate.sh" }
175
184
  ]
176
185
  }
177
186
  ]
@@ -3,7 +3,7 @@ name: database-operator
3
3
  description: "Document and archive ingestion and ad-hoc graph operations — running the universal `document-ingest` skill for any unstructured document (PDF, text, transcript, web page, audio, video) and per-source archive-import skills (LinkedIn Basic Data Export today; CRM-type seed archives as each plugin ships), plus operator-driven graph hygiene (prune orphans, deduplicate entities, add edges, normalise labels). Delegate when the operator uploads any document, drops an archive directory into chat, or asks for any graph operation that is not a routine per-turn write."
4
4
  summary: "Ingests every unstructured document and external archive into your graph (LinkedIn today; other CRM sources in future) and handles ad-hoc graph tidy-ups on request. For example, when you upload a CV, a pricing guide, or a contract; when you drop a LinkedIn export folder into chat; or when you ask to prune orphan nodes, merge duplicate people, or add edges between entities."
5
5
  model: claude-sonnet-4-6
6
- tools: Read, Bash, Glob, Grep, mcp__graph__maxy-graph-read_neo4j_cypher, mcp__graph__maxy-graph-write_neo4j_cypher, mcp__graph__maxy-graph-get_neo4j_schema, mcp__memory__memory-write, mcp__memory__memory-update, mcp__memory__memory-delete, mcp__memory__memory-search, mcp__memory__memory-rank, mcp__memory__memory-reindex, mcp__memory__memory-find-candidates, mcp__memory__memory-ingest, mcp__memory__memory-ingest-extract, mcp__memory__memory-ingest-web, mcp__memory__memory-classify, mcp__memory__memory-archive-write, mcp__memory__whatsapp-export-parse, mcp__memory__whatsapp-export-insight-write, mcp__memory__graph-prune-denylist-list, mcp__memory__graph-prune-denylist-add, mcp__memory__graph-prune-denylist-remove, mcp__contacts__contact-create, mcp__contacts__contact-update, mcp__contacts__contact-lookup, mcp__contacts__contact-list, mcp__admin__file-attach, mcp__admin__plugin-read
6
+ tools: Read, Bash, Glob, Grep, mcp__graph__maxy-graph-read_neo4j_cypher, mcp__graph__maxy-graph-write_neo4j_cypher, mcp__graph__maxy-graph-get_neo4j_schema, mcp__memory__memory-write, mcp__memory__memory-update, mcp__memory__memory-delete, mcp__memory__memory-search, mcp__memory__memory-rank, mcp__memory__memory-reindex, mcp__memory__memory-find-candidates, mcp__memory__memory-ingest, mcp__memory__memory-ingest-extract, mcp__memory__memory-ingest-web, mcp__memory__memory-classify, mcp__memory__memory-archive-write, mcp__memory__graph-prune-denylist-list, mcp__memory__graph-prune-denylist-add, mcp__memory__graph-prune-denylist-remove, mcp__contacts__contact-create, mcp__contacts__contact-update, mcp__contacts__contact-lookup, mcp__contacts__contact-list, mcp__admin__file-attach, mcp__admin__plugin-read
7
7
  ---
8
8
 
9
9
  # Database Operator
@@ -30,7 +30,17 @@ A landfill graph defeats EVIDENCE-BASED: search returns noise, the agent re-writ
30
30
 
31
31
  The pre-publish gate (`platform/scripts/verify-skill-tool-surface.sh`) statically asserts every shipped skill's prescribed `mcp__*` tokens resolve against your frontmatter `tools:` list, so a missing tool is a build error, not a production discovery. LOUD-FAIL is the runtime backstop when that gate is bypassed (e.g. operator-edited skill).
32
32
 
33
- **Archive-ingest gate (Task 846).** During archive imports the LOUD-FAIL contract is structurally enforced by `platform/plugins/admin/hooks/archive-ingest-gate.sh`. When any `mcp__*__*-export-parse` / `mcp__*__*-import-parse` tool returns `isError: true`, the hook records a parse-error flag — every subsequent tool call in this turn is blocked at the harness, and you must yield back to the admin agent with a user-facing message naming the parse-error. The flag clears on the operator's next prompt. The same hook denies `Edit`/`Write`/`NotebookEdit` against `platform/plugins/*/lib/*` and Bash invocations of `vitest`/`bun test`/`npm test`/`npx jest` regardless of state — the operator does not own plugin source. *Failure symptoms (now harness-blocked):* appending vitest cases to a parser test file after `whatsapp-export-parse` returned isError, running `npx vitest` to "diagnose" a parser, editing `parse-export.ts` to "fix" a malformed input. Treat these blocks as confirmation that the gate is doing its job — surface the parse-error and yield, do not retry around the block.
33
+ **Archive-ingest surface gate (Task 855; supersedes Task 846).** Each per-source archive importer ships a single deterministic Bash entry under `platform/plugins/<name>/bin/<name>-ingest.sh`. The harness-level gate at `platform/plugins/admin/hooks/archive-ingest-surface-gate.sh` enforces the surface filter that makes the LLM mechanically incapable of deviating mid-ingest:
34
+
35
+ - **Legacy WhatsApp MCP tools blocked unconditionally:** `mcp__memory__whatsapp-export-parse`, `mcp__memory__whatsapp-export-insight-write`, and `mcp__memory__memory-archive-write` when `archiveType=whatsapp-export`. The deterministic Bash entry (`platform/plugins/whatsapp-import/bin/whatsapp-ingest.sh`) is the only supported path; parse, archive-write, and insight all run in-process.
36
+ - **Non-WhatsApp archiveTypes flow unchanged:** `memory-archive-write` with `archiveType=linkedin-connections` (and future per-source archiveTypes) is allowed. Each per-source plugin migrates to its own `bin/<name>-ingest.sh` on its own schedule.
37
+ - **Plugin-source edits blocked** (preserved from Task 846): `Edit`/`Write`/`NotebookEdit` against `platform/plugins/*/lib/*` is denied. The operator does not own plugin source.
38
+ - **JS test runners blocked** (preserved): `vitest` / `bun test` / `npm test` / `npx jest` Bash commands are denied. The operator does not run plugin tests.
39
+ - **Post-parse-error flag** (preserved for non-migrated parsers): when any `mcp__*__*-export-parse` / `mcp__*__*-import-parse` tool returns `isError: true`, every subsequent tool call this turn is blocked until the operator submits a new prompt.
40
+
41
+ Every PreToolUse decision emits `[archive-ingest-gate] decision=<allow|block> tool=<n> reason=<r> ...` to server.log so the full trail of one ingest is greppable alongside the `[whatsapp-ingest]` script lines.
42
+
43
+ *Failure symptoms (now harness-blocked):* invoking `mcp__memory__whatsapp-export-parse` instead of the Bash script, calling `mcp__memory__memory-archive-write` with `archiveType:'whatsapp-export'` directly, editing `parse-export.ts` to "fix" a malformed input, running `npx vitest` to "diagnose" a parser. Treat these blocks as confirmation the gate is doing its job — invoke the script, surface its FAIL line if it fails, and yield. There is no around-the-block path.
34
44
 
35
45
  ---
36
46
 
@@ -109,7 +119,7 @@ The classifier maps document sections to typed ontology labels. It does not inve
109
119
  Per-source archive imports keep their own skill because their CSVs already encode entity types deterministically and need no LLM classifier. Currently shipped:
110
120
 
111
121
  - **linkedin-import** — LinkedIn Basic Data Export. Ships with references for `Profile.csv` and `Connections.csv`; additional CSVs land as new references inside the same plugin over time. Path: `platform/plugins/linkedin-import/skills/linkedin-import/SKILL.md`. Load via `plugin-read` before any ingestion.
112
- - **whatsapp-import** — WhatsApp `_chat.txt` export ingestion. Imports historical Conversation + Messages with chronological NEXT chain via `memory-archive-write` (archiveType=`whatsapp-export`), then derives typed insights (mentions, preferences, commitments, observed relationships) inline through existing memory tools. Distinct from the live `whatsapp` plugin (Baileys QR pairing, in-memory store). Path: `platform/plugins/whatsapp-import/skills/whatsapp-import/SKILL.md`. Load via `plugin-read` before any ingestion.
122
+ - **whatsapp-import** — WhatsApp `_chat.txt` export ingestion. Imports historical Conversation + Messages with chronological NEXT chain plus typed insights (mentions, tasks, preferences, observed relationships) via the single deterministic Bash entry at `platform/plugins/whatsapp-import/bin/whatsapp-ingest.sh` — parse, archive-write, and Haiku insight all run in-process; no MCP envelope between steps (Task 855). The legacy `mcp__memory__whatsapp-export-parse` / `whatsapp-export-insight-write` / `memory-archive-write{archiveType:whatsapp-export}` MCP tools are blocked at the harness; the Bash script is the only supported path. Distinct from the live `whatsapp` plugin (Baileys QR pairing, in-memory store). SKILL: `platform/plugins/whatsapp-import/skills/whatsapp-import/SKILL.md`. Load via `plugin-read` before any ingestion.
113
123
 
114
124
  Future CRM-type seed plugins (HubSpot, Salesforce, Pipedrive, iCloud contacts, Gmail CSV, etc.) will ship under the same pattern — each as its own opt-in plugin, each with its own `SKILL.md` path under `platform/plugins/<name>/skills/`. When the admin adds a new archive-import skill, its PLUGIN.md will name itself here and in the admin's `<plugin-manifest>`. No prompt change required.
115
125
 
@@ -1,166 +0,0 @@
1
- #!/usr/bin/env bash
2
- # Regression test for archive-ingest-gate.sh (Task 846).
3
- #
4
- # Six cases cover the contract:
5
- # 1. Edit on /platform/plugins/<x>/lib/* is BLOCKED (exit 2).
6
- # 2. Edit on a benign path is ALLOWED (exit 0).
7
- # 3. Bash with `npx vitest` is BLOCKED.
8
- # 4. PostToolUse on whatsapp-export-parse with isError:true sets the flag.
9
- # 5. Subsequent PreToolUse on ANY tool is BLOCKED (post-parse-error gate).
10
- # 6. UserPromptSubmit clears the flag, restoring normal allow behavior.
11
- #
12
- # Tests use ARCHIVE_INGEST_GATE_STATE_DIR to point at a tmp dir so they run
13
- # without a real account layout.
14
-
15
- set -u
16
-
17
- HOOK="$(cd "$(dirname "$0")/.." && pwd)/archive-ingest-gate.sh"
18
- if [[ ! -x "$HOOK" ]]; then
19
- echo "FAIL: $HOOK not executable" >&2
20
- exit 1
21
- fi
22
-
23
- # Per-run isolated state dir
24
- STATE_DIR=$(mktemp -d)
25
- export ARCHIVE_INGEST_GATE_STATE_DIR="$STATE_DIR"
26
- FLAG_FILE="$STATE_DIR/archive-ingest-parse-error.flag"
27
-
28
- cleanup() { rm -rf "$STATE_DIR"; }
29
- trap cleanup EXIT
30
-
31
- PASS=0
32
- FAIL=0
33
-
34
- run_case() {
35
- local name="$1" stdin="$2" expected_exit="$3"
36
- local actual_exit
37
- printf '%s' "$stdin" | bash "$HOOK" >/dev/null 2>/dev/null
38
- actual_exit=$?
39
- if [[ "$actual_exit" -eq "$expected_exit" ]]; then
40
- echo "PASS: $name (exit=$actual_exit)"
41
- PASS=$((PASS + 1))
42
- else
43
- echo "FAIL: $name (expected exit=$expected_exit, got=$actual_exit)" >&2
44
- FAIL=$((FAIL + 1))
45
- fi
46
- }
47
-
48
- # Case 1 — Edit on plugin lib path: BLOCKED
49
- run_case "Edit on platform/plugins/whatsapp-import/lib/src/parse-export.ts → BLOCKED" \
50
- '{"hook_event_name":"PreToolUse","tool_name":"Edit","tool_input":{"file_path":"/Users/x/repo/platform/plugins/whatsapp-import/lib/src/parse-export.ts","old_string":"a","new_string":"b"}}' \
51
- 2
52
-
53
- # Case 2 — Edit on a benign path: ALLOWED
54
- run_case "Edit on README.md → ALLOWED" \
55
- '{"hook_event_name":"PreToolUse","tool_name":"Edit","tool_input":{"file_path":"/Users/x/repo/README.md","old_string":"a","new_string":"b"}}' \
56
- 0
57
-
58
- # Case 3 — Bash with `npx vitest`: BLOCKED
59
- run_case "Bash 'npx vitest run parse-export.test.ts' → BLOCKED" \
60
- '{"hook_event_name":"PreToolUse","tool_name":"Bash","tool_input":{"command":"npx vitest run parse-export.test.ts"}}' \
61
- 2
62
-
63
- # Case 3b — Bash with benign command: ALLOWED
64
- run_case "Bash 'ls -la' → ALLOWED" \
65
- '{"hook_event_name":"PreToolUse","tool_name":"Bash","tool_input":{"command":"ls -la"}}' \
66
- 0
67
-
68
- # Case 3c — Bash with `bun test`: BLOCKED
69
- run_case "Bash 'bun test' → BLOCKED" \
70
- '{"hook_event_name":"PreToolUse","tool_name":"Bash","tool_input":{"command":"bun test"}}' \
71
- 2
72
-
73
- # Case 3d — Bash with `npm test`: BLOCKED
74
- run_case "Bash 'npm test' → BLOCKED" \
75
- '{"hook_event_name":"PreToolUse","tool_name":"Bash","tool_input":{"command":"npm test"}}' \
76
- 2
77
-
78
- # Make sure flag is absent before parse-error simulation
79
- rm -f "$FLAG_FILE"
80
-
81
- # Case 4 — PostToolUse on whatsapp-export-parse with isError:true sets flag
82
- run_case "PostToolUse parse-error sets flag (exit 0, flag side-effect)" \
83
- '{"hook_event_name":"PostToolUse","tool_name":"mcp__memory__whatsapp-export-parse","tool_input":{"filePath":"_chat.txt"},"tool_response":{"isError":true,"content":[{"type":"text","text":"parse-error file=_chat.txt line=1 reason=not-a-_chat.txt"}]}}' \
84
- 0
85
-
86
- if [[ -f "$FLAG_FILE" ]]; then
87
- echo "PASS: parse-error flag created at $FLAG_FILE"
88
- PASS=$((PASS + 1))
89
- else
90
- echo "FAIL: parse-error flag NOT created at $FLAG_FILE" >&2
91
- FAIL=$((FAIL + 1))
92
- fi
93
-
94
- # Case 5 — Subsequent PreToolUse on ANY tool BLOCKED while flag is fresh
95
- run_case "PreToolUse Read after parse-error → BLOCKED" \
96
- '{"hook_event_name":"PreToolUse","tool_name":"Read","tool_input":{"file_path":"/tmp/foo"}}' \
97
- 2
98
-
99
- run_case "PreToolUse Bash after parse-error → BLOCKED" \
100
- '{"hook_event_name":"PreToolUse","tool_name":"Bash","tool_input":{"command":"echo hi"}}' \
101
- 2
102
-
103
- # Case 6 — UserPromptSubmit clears flag
104
- run_case "UserPromptSubmit clears flag (exit 0)" \
105
- '{"hook_event_name":"UserPromptSubmit","prompt":"retry"}' \
106
- 0
107
-
108
- if [[ ! -f "$FLAG_FILE" ]]; then
109
- echo "PASS: UserPromptSubmit cleared flag"
110
- PASS=$((PASS + 1))
111
- else
112
- echo "FAIL: UserPromptSubmit did NOT clear flag" >&2
113
- FAIL=$((FAIL + 1))
114
- fi
115
-
116
- # Case 7 — After clearance, normal allow resumes
117
- run_case "PreToolUse Read after clearance → ALLOWED" \
118
- '{"hook_event_name":"PreToolUse","tool_name":"Read","tool_input":{"file_path":"/tmp/foo"}}' \
119
- 0
120
-
121
- # Case 8 — PostToolUse with isError:false does NOT set flag
122
- rm -f "$FLAG_FILE"
123
- run_case "PostToolUse parse-success (isError:false) does NOT set flag" \
124
- '{"hook_event_name":"PostToolUse","tool_name":"mcp__memory__whatsapp-export-parse","tool_input":{"filePath":"_chat.txt"},"tool_response":{"isError":false,"content":[{"type":"text","text":"{\"parsedLines\":[]}"}]}}' \
125
- 0
126
-
127
- if [[ ! -f "$FLAG_FILE" ]]; then
128
- echo "PASS: parse-success leaves flag absent"
129
- PASS=$((PASS + 1))
130
- else
131
- echo "FAIL: parse-success incorrectly created flag" >&2
132
- FAIL=$((FAIL + 1))
133
- fi
134
-
135
- # Case 9 — Stale flag (>600s) auto-clears + allows
136
- PAST=$(( $(date -u +%s) - 700 ))
137
- echo "$PAST" > "$FLAG_FILE"
138
- run_case "Stale flag auto-clears, PreToolUse Read → ALLOWED" \
139
- '{"hook_event_name":"PreToolUse","tool_name":"Read","tool_input":{"file_path":"/tmp/foo"}}' \
140
- 0
141
-
142
- # Case 10 — No stdin (terminal) fails closed
143
- echo "Probing fail-closed behaviour (no stdin)..."
144
- bash "$HOOK" </dev/null >/dev/null 2>/dev/null
145
- ACTUAL=$?
146
- # /dev/null IS a stdin — the `[ -t 0 ]` check tests for terminal, not file.
147
- # A file/pipe stdin reads as empty, which produces empty hook_event_name and
148
- # falls through to default `exit 0` (allow). The terminal-only fail-closed
149
- # branch can't be tested non-interactively; verify the script reads `[ -t 0 ]`.
150
- if grep -q '\[ -t 0 \]' "$HOOK"; then
151
- echo "PASS: fail-closed terminal check is present"
152
- PASS=$((PASS + 1))
153
- else
154
- echo "FAIL: fail-closed terminal check missing" >&2
155
- FAIL=$((FAIL + 1))
156
- fi
157
-
158
- echo
159
- echo "──────── archive-ingest-gate test summary ────────"
160
- echo "PASS: $PASS"
161
- echo "FAIL: $FAIL"
162
-
163
- if [[ "$FAIL" -gt 0 ]]; then
164
- exit 1
165
- fi
166
- exit 0