@rubytech/create-realagent 1.0.825 → 1.0.828
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/package.json +1 -1
- package/payload/platform/lib/task-secrets/dist/index.d.ts +40 -0
- package/payload/platform/lib/task-secrets/dist/index.d.ts.map +1 -0
- package/payload/platform/lib/task-secrets/dist/index.js +44 -0
- package/payload/platform/lib/task-secrets/dist/index.js.map +1 -0
- package/payload/platform/lib/task-secrets/src/__tests__/redact-secrets.test.ts +127 -0
- package/payload/platform/lib/task-secrets/src/index.ts +77 -0
- package/payload/platform/lib/task-secrets/tsconfig.json +9 -0
- package/payload/platform/lib/task-secrets/vitest.config.ts +9 -0
- package/payload/platform/neo4j/schema.cypher +34 -2
- package/payload/platform/package.json +2 -2
- package/payload/platform/plugins/admin/hooks/archive-ingest-surface-gate.sh +19 -13
- package/payload/platform/plugins/admin/skills/business-profile/SKILL.md +2 -2
- package/payload/platform/plugins/admin/skills/onboarding/SKILL.md +13 -12
- package/payload/platform/plugins/admin/skills/plugin-management/SKILL.md +4 -4
- package/payload/platform/plugins/admin/skills/public-agent-manager/SKILL.md +2 -2
- package/payload/platform/plugins/admin/skills/stream-log-review/SKILL.md +6 -6
- package/payload/platform/plugins/admin/skills/unzip-attachment/references/safety.md +1 -1
- package/payload/platform/plugins/cloudflare/references/manual-setup.md +3 -3
- package/payload/platform/plugins/cloudflare/skills/setup-tunnel/SKILL.md +4 -4
- package/payload/platform/plugins/docs/references/cloudflare.md +2 -2
- package/payload/platform/plugins/docs/references/internals.md +2 -2
- package/payload/platform/plugins/docs/references/plugins-guide.md +1 -1
- package/payload/platform/plugins/docs/references/troubleshooting.md +2 -1
- package/payload/platform/plugins/linkedin-import/skills/linkedin-import/SKILL.md +2 -2
- package/payload/platform/plugins/linkedin-import/skills/linkedin-import/references/connections.md +1 -1
- package/payload/platform/plugins/memory/PLUGIN.md +1 -1
- package/payload/platform/plugins/memory/mcp/dist/index.js +6 -41
- package/payload/platform/plugins/memory/mcp/dist/index.js.map +1 -1
- package/payload/platform/plugins/memory/mcp/dist/lib/__tests__/llm-classifier.test.js +51 -0
- package/payload/platform/plugins/memory/mcp/dist/lib/__tests__/llm-classifier.test.js.map +1 -1
- package/payload/platform/plugins/memory/mcp/dist/lib/llm-classifier.d.ts +19 -4
- package/payload/platform/plugins/memory/mcp/dist/lib/llm-classifier.d.ts.map +1 -1
- package/payload/platform/plugins/memory/mcp/dist/lib/llm-classifier.js +139 -56
- package/payload/platform/plugins/memory/mcp/dist/lib/llm-classifier.js.map +1 -1
- package/payload/platform/plugins/memory/mcp/dist/tools/__tests__/memory-ingest.test.d.ts +2 -0
- package/payload/platform/plugins/memory/mcp/dist/tools/__tests__/memory-ingest.test.d.ts.map +1 -0
- package/payload/platform/plugins/memory/mcp/dist/tools/__tests__/memory-ingest.test.js +61 -0
- package/payload/platform/plugins/memory/mcp/dist/tools/__tests__/memory-ingest.test.js.map +1 -0
- package/payload/platform/plugins/memory/mcp/dist/tools/memory-ingest.d.ts +34 -0
- package/payload/platform/plugins/memory/mcp/dist/tools/memory-ingest.d.ts.map +1 -1
- package/payload/platform/plugins/memory/mcp/dist/tools/memory-ingest.js +241 -0
- package/payload/platform/plugins/memory/mcp/dist/tools/memory-ingest.js.map +1 -1
- package/payload/platform/plugins/memory/references/graph-primitives.md +5 -5
- package/payload/platform/plugins/memory/references/schema-base.md +6 -3
- package/payload/platform/plugins/memory/skills/document-ingest/SKILL.md +6 -6
- package/payload/platform/plugins/tasks/PLUGIN.md +1 -1
- package/payload/platform/plugins/tasks/mcp/dist/index.js +11 -2
- package/payload/platform/plugins/tasks/mcp/dist/index.js.map +1 -1
- package/payload/platform/plugins/tasks/mcp/dist/tools/task-create.d.ts +19 -2
- package/payload/platform/plugins/tasks/mcp/dist/tools/task-create.d.ts.map +1 -1
- package/payload/platform/plugins/tasks/mcp/dist/tools/task-create.js +17 -1
- package/payload/platform/plugins/tasks/mcp/dist/tools/task-create.js.map +1 -1
- package/payload/platform/plugins/whatsapp-import/PLUGIN.md +17 -15
- package/payload/platform/plugins/whatsapp-import/bin/ingest.mjs +313 -366
- package/payload/platform/plugins/whatsapp-import/bin/whatsapp-ingest.sh +27 -60
- package/payload/platform/plugins/whatsapp-import/lib/dist/delta-cursor.d.ts +18 -0
- package/payload/platform/plugins/whatsapp-import/lib/dist/delta-cursor.d.ts.map +1 -0
- package/payload/platform/plugins/whatsapp-import/lib/dist/delta-cursor.js +31 -0
- package/payload/platform/plugins/whatsapp-import/lib/dist/delta-cursor.js.map +1 -0
- package/payload/platform/plugins/whatsapp-import/lib/dist/derive-keys.d.ts +27 -12
- package/payload/platform/plugins/whatsapp-import/lib/dist/derive-keys.d.ts.map +1 -1
- package/payload/platform/plugins/whatsapp-import/lib/dist/derive-keys.js +40 -20
- package/payload/platform/plugins/whatsapp-import/lib/dist/derive-keys.js.map +1 -1
- package/payload/platform/plugins/whatsapp-import/lib/dist/index.d.ts +7 -4
- package/payload/platform/plugins/whatsapp-import/lib/dist/index.d.ts.map +1 -1
- package/payload/platform/plugins/whatsapp-import/lib/dist/index.js +9 -6
- package/payload/platform/plugins/whatsapp-import/lib/dist/index.js.map +1 -1
- package/payload/platform/plugins/whatsapp-import/lib/dist/sessionize.d.ts +25 -0
- package/payload/platform/plugins/whatsapp-import/lib/dist/sessionize.d.ts.map +1 -0
- package/payload/platform/plugins/whatsapp-import/lib/dist/sessionize.js +48 -0
- package/payload/platform/plugins/whatsapp-import/lib/dist/sessionize.js.map +1 -0
- package/payload/platform/plugins/whatsapp-import/lib/dist/to-classifier-input.d.ts +3 -0
- package/payload/platform/plugins/whatsapp-import/lib/dist/to-classifier-input.d.ts.map +1 -0
- package/payload/platform/plugins/whatsapp-import/lib/dist/to-classifier-input.js +47 -0
- package/payload/platform/plugins/whatsapp-import/lib/dist/to-classifier-input.js.map +1 -0
- package/payload/platform/plugins/whatsapp-import/lib/src/__tests__/delta-append.test.ts +163 -0
- package/payload/platform/plugins/whatsapp-import/lib/src/__tests__/sessionize.test.ts +91 -0
- package/payload/platform/plugins/whatsapp-import/lib/src/__tests__/to-classifier-input.test.ts +59 -0
- package/payload/platform/plugins/whatsapp-import/lib/src/delta-cursor.ts +54 -0
- package/payload/platform/plugins/whatsapp-import/lib/src/derive-keys.ts +55 -32
- package/payload/platform/plugins/whatsapp-import/lib/src/index.ts +9 -6
- package/payload/platform/plugins/whatsapp-import/lib/src/sessionize.ts +81 -0
- package/payload/platform/plugins/whatsapp-import/lib/src/to-classifier-input.ts +48 -0
- package/payload/platform/plugins/whatsapp-import/skills/whatsapp-import/SKILL.md +66 -73
- package/payload/platform/plugins/whatsapp-import/skills/whatsapp-import/references/conversation-archive-shape.md +143 -0
- package/payload/platform/plugins/whatsapp-import/skills/whatsapp-import/references/export-parse.md +2 -2
- package/payload/platform/templates/specialists/agents/database-operator.md +17 -18
- package/payload/server/chunk-T2OPNP3L.js +654 -0
- package/payload/server/chunk-ZTBTX3IO.js +642 -0
- package/payload/server/cloudflare-task-tracker-BAMJY4MH.js +17 -0
- package/payload/server/cloudflare-task-tracker-CR6TL4VL.js +19 -0
- package/payload/server/public/assets/{admin-DOkUspG1.js → admin-BNwPsMhJ.js} +2 -2
- package/payload/server/public/assets/{graph-LLMJa4Ch.js → graph-N_Bw-8oT.js} +1 -1
- package/payload/server/public/assets/{page-DoaF3DB0.js → page-BKLGP-th.js} +1 -1
- package/payload/server/public/graph.html +2 -2
- package/payload/server/public/index.html +2 -2
- package/payload/server/server.js +291 -172
- package/payload/platform/plugins/whatsapp-import/lib/src/__tests__/filter-gate.test.ts +0 -172
- package/payload/platform/plugins/whatsapp-import/lib/src/__tests__/ingest-idempotence.test.ts +0 -141
- package/payload/platform/plugins/whatsapp-import/lib/src/filter.ts +0 -136
- package/payload/platform/plugins/whatsapp-import/skills/whatsapp-import-enrich/SKILL.md +0 -333
|
@@ -1,131 +1,124 @@
|
|
|
1
1
|
---
|
|
2
2
|
name: whatsapp-import
|
|
3
|
-
description:
|
|
3
|
+
description: Single-phase WhatsApp `_chat.txt` ingest contract (Task 891). Confirms the owner + every distinct sender against existing `:AdminUser` / `:Person` nodes (no auto-creation), then invokes the deterministic Bash entry `whatsapp-ingest.sh`. The script parses the export, sessionizes at gap-hours boundaries, classifies each session via Haiku (mode='chat') into `:Section:Conversation` chunks with summary + topic keywords, and writes them under a parent `:ConversationArchive` MERGEd on `conversationIdentity = sha256(accountId + ":" + sortedParticipantElementIds)`. Re-imports are delta-append: prior chunks are never touched; only messages after `lastIngestedMessageHash` flow through the pipeline. Triggers when the operator drops a `_chat.txt` file or its containing export folder into chat. Distinct from the live `whatsapp` plugin (Baileys QR pairing).
|
|
4
4
|
---
|
|
5
5
|
|
|
6
|
-
# WhatsApp Import —
|
|
6
|
+
# WhatsApp Import — Conversation Archive
|
|
7
7
|
|
|
8
|
-
|
|
8
|
+
Single-phase ingest. The deterministic Bash entry parses the export, splits it into sessions, classifies each session into topic-bounded `:Section:Conversation` chunks via Haiku, and writes everything under a parent `:ConversationArchive`. Insight derivation (`:Observation` / `:Task` / `:Preference` / `:MENTIONS`) is deferred to a separate follow-up task that operates on chunks rather than per-message rows.
|
|
9
9
|
|
|
10
|
-
## Owner +
|
|
10
|
+
## Owner + all-participants confirmation (mandatory first step)
|
|
11
11
|
|
|
12
|
-
A
|
|
12
|
+
A `_chat.txt` carries N distinct senders (1 owner + N-1 others). Every distinct senderName must resolve to an existing `:AdminUser` or `:Person` elementId before the script runs — the writer LOUD-FAILs on any unresolved sender.
|
|
13
13
|
|
|
14
|
-
|
|
15
|
-
`MATCH (u:AdminUser) RETURN elementId(u) AS elementId, u.name AS name, u.userId AS userId, u.accountId AS accountId`
|
|
16
|
-
2. Ask the operator: "Who exported this `_chat.txt`?" — accept either an existing `:AdminUser` elementId or, if the operator names someone not in the graph, surface that as a blocker (auto-creating an unknown owner is refused).
|
|
17
|
-
3. Identify the third party from the preview's sender histogram. Look up the matching `:Person` (by name); if no match, ask the operator to confirm a `:Person` elementId or block until one exists. **Auto-creating the third-party `:Person` is forbidden** — the operator must confirm the canonical node.
|
|
18
|
-
4. Echo both back verbatim and require explicit yes/no confirmation.
|
|
19
|
-
5. Persist the owner's `elementId` as `--owner-element-id` and the subject's as `--subject-person-id`.
|
|
14
|
+
The flow:
|
|
20
15
|
|
|
21
|
-
|
|
16
|
+
1. **Preview the senders.** Call `mcp__memory__whatsapp-export-preview` with the operator-supplied path (read-only, no Cypher writes):
|
|
17
|
+
```json
|
|
18
|
+
{
|
|
19
|
+
"filePath": "/abs/path/to/_chat.txt",
|
|
20
|
+
"timezone": "Europe/London"
|
|
21
|
+
}
|
|
22
|
+
```
|
|
23
|
+
Returns counters + the sender histogram. Surface to the operator as one chat message — counters and the histogram, no prose.
|
|
22
24
|
|
|
23
|
-
|
|
25
|
+
2. **List candidate `:AdminUser` and existing `:Person`** rows for the senders via `mcp__graph__maxy-graph-read_neo4j_cypher`.
|
|
24
26
|
|
|
25
|
-
|
|
26
|
-
{
|
|
27
|
-
"filePath": "/abs/path/to/_chat.txt",
|
|
28
|
-
"timezone": "Europe/London"
|
|
29
|
-
}
|
|
30
|
-
```
|
|
31
|
-
|
|
32
|
-
Returns: `{conversationSha256, archiveSourceFile, archiveBytes, parsed, mediaSkipped, systemSkipped, totalMessages, dateRange:{first,last}, senders:[{name,messageCount}, …]}`. No Cypher writes; the call is read-only and does NOT touch Neo4j.
|
|
27
|
+
3. **Iterate the histogram, one operator question per distinct senderName.** For each sender: `"Sender '<name>' (<count> messages) — pick existing :AdminUser/:Person or block?"`. The operator either picks an existing elementId or names "block" (refuses to map to a node). **Never auto-create a `:Person`** — the operator must confirm a canonical node, mirroring `feedback_archives_are_not_documents.md`'s closed-set discipline.
|
|
33
28
|
|
|
34
|
-
|
|
29
|
+
4. **Identify the owner** from the resolved set — the operator who exported the chat. Echo back: `"Owner = :AdminUser <name> (<elementId>); other participants = <list>. Confirm yes/no."`
|
|
35
30
|
|
|
36
|
-
|
|
31
|
+
5. **Persist the resolved IDs** as `--owner-element-id` + `--participant-person-ids <csv>` for the script call.
|
|
37
32
|
|
|
38
|
-
|
|
33
|
+
DM and group follow the identical flow. A 1:1 chat resolves 2 senders; a group resolves N senders. Identity (`conversationIdentity = sha256(accountId + ":" + sortedParticipantElementIds.join(","))`) is identical regardless of group size — DM and group use the same MERGE key.
|
|
39
34
|
|
|
40
|
-
|
|
41
|
-
|
|
42
|
-
| Filter | Effect |
|
|
43
|
-
|--------|--------|
|
|
44
|
-
| `all` | Write every parsed row. Operator's explicit "I want the full archive" choice. |
|
|
45
|
-
| `senders=Alice,Bob Carter` | Keep only rows whose senderName matches one of the comma-separated names exactly (whitespace trimmed). |
|
|
46
|
-
| `date-range=2024-01-01..2024-06-30` | Keep only rows whose `dateSent` falls inside the inclusive range (date-only or full ISO 8601 endpoints both accepted). |
|
|
47
|
-
|
|
48
|
-
Echo the chosen filter back; require explicit yes/no confirmation before the write.
|
|
49
|
-
|
|
50
|
-
## Step 3 — archive-write
|
|
35
|
+
## Step 2 — invoke the script
|
|
51
36
|
|
|
52
37
|
Single Bash call:
|
|
53
38
|
|
|
54
39
|
```bash
|
|
55
40
|
bash platform/plugins/whatsapp-import/bin/whatsapp-ingest.sh <archive.zip|dir|_chat.txt> \
|
|
56
41
|
--owner-element-id <id> \
|
|
57
|
-
--
|
|
58
|
-
--scope <admin|public>
|
|
59
|
-
--filter <all|senders=<csv>|date-range=<isoFrom>..<isoTo>>
|
|
42
|
+
--participant-person-ids <id1>,<id2>,... \
|
|
43
|
+
--scope <admin|public>
|
|
60
44
|
```
|
|
61
45
|
|
|
62
46
|
Optional flags:
|
|
63
47
|
- `--account-id <id>` — explicit account id when more than one exists under `data/accounts/` (Phase 0 has one).
|
|
64
48
|
- `--timezone <iana>` — IANA zone for timestamps (default `Europe/London`).
|
|
65
49
|
- `--date-format <DD/MM/YY|MM/DD/YY|DD/MM/YYYY|MM/DD/YYYY>` — override auto-detect for ambiguous locales.
|
|
50
|
+
- `--session-gap-hours <N>` — gap threshold (in hours) used to split parsed messages into sessions for chunking (default `12`). Smaller values produce more sessions, more Haiku calls, finer chunks; larger values group more messages per session.
|
|
66
51
|
|
|
67
52
|
The script:
|
|
68
53
|
- Unzips the archive if needed; locates `_chat.txt`.
|
|
69
|
-
- Parses the file deterministically (year shape, sender/body grammar, timezone offset, U+200E/U+200F
|
|
70
|
-
-
|
|
71
|
-
- Validates every distinct parsed senderName against the
|
|
72
|
-
-
|
|
54
|
+
- Parses the file deterministically (year shape, sender/body grammar, timezone offset, U+200E/U+200F bidi-strip).
|
|
55
|
+
- Computes the source file's `archiveSha256` (provenance + cleanup discriminator).
|
|
56
|
+
- Validates every distinct parsed senderName against the closed set of `{owner, participants...}` candidate names. Any miss LOUD-FAILs `parser-miss reason="senderName=<verbatim> not in confirmed participant set ..."`.
|
|
57
|
+
- Computes `conversationIdentity` from accountId + sorted participant elementIds.
|
|
58
|
+
- Looks up any prior `:ConversationArchive` carrying that identity → reads `lastIngestedMessageHash`. If found, slices parsed lines after the cursor (delta-append). Cursor not found → `FAIL delta-cursor-missing`. Cursor at last line → empty-delta noop (exit 0, no writes).
|
|
59
|
+
- Sessionizes the delta lines at the operator-supplied gap-hours boundary.
|
|
60
|
+
- For each session: renders as turn-attributed text (`[ts] Sender: body\n…`) and calls `memory-classify` with `mode='chat'`. Returns one or more `:Section:Conversation` chunk specs with `summary`, `keywords[]`, `firstMessageAt`, `lastMessageAt`, `participantNames[]`, `messageCount`, `body` (verbatim turn-attributed text).
|
|
61
|
+
- Calls `memory-ingest` with `parentLabel='ConversationArchive'`. Server MERGEs the parent on `conversationIdentity`, MERGEs `:PARTICIPANT_IN` edges from each confirmed participant, drops any prior chunks stamped with this `archiveSha256` (idempotency for re-running the same export bytes), CREATEs new chunks, extends the existing `:NEXT` chain from its tail, advances `lastIngestedMessageHash` + `lastIngestedMessageAt`.
|
|
62
|
+
|
|
63
|
+
NO insight pass runs. Phase 2 (operator-driven `:Observation` / `:Task` / `:Preference` derivation against chunks) is its own follow-up task with its own skill.
|
|
73
64
|
|
|
74
|
-
|
|
65
|
+
## Three operator messages per ingest
|
|
75
66
|
|
|
76
|
-
|
|
67
|
+
After the script succeeds, formulate the three operator-facing messages from the JSON summary on stdout (one operator message per surfaceable phase):
|
|
77
68
|
|
|
78
|
-
|
|
69
|
+
1. **Parse summary.** `Parsed <archiveSourceFile>: <parsed> messages across <sessions> sessions, date range <dateRange.first> → <dateRange.last>. Participants: <senderHistogram[i].name (count), …>.`
|
|
70
|
+
2. **Classify summary.** `Classified into <chunks> chunks, covering: <topicKeywords[0], topicKeywords[1], …>.`
|
|
71
|
+
3. **Write summary.** `Created :ConversationArchive <archiveElementId> with <chunks> :Section:Conversation chunks (NEXT chain length <chunks - 1>). Participants linked via :PARTICIPANT_IN: <participantsLinked>.`
|
|
72
|
+
|
|
73
|
+
For an empty-delta re-import (`delta.kind === "empty-delta"`): emit only message 1 + a noop line `noop reason="no new messages since <priorLastIngestedMessageAt>"`.
|
|
74
|
+
|
|
75
|
+
## Stdout JSON shape (success)
|
|
79
76
|
|
|
80
77
|
```json
|
|
81
78
|
{
|
|
82
|
-
"
|
|
83
|
-
"
|
|
79
|
+
"archiveElementId": "4:abcd…:42",
|
|
80
|
+
"conversationIdentity": "<sha256-hex>",
|
|
81
|
+
"archiveSha256": "<sha256-hex>",
|
|
82
|
+
"archiveSourceFile": "_chat.txt",
|
|
84
83
|
"parsed": 1707,
|
|
85
84
|
"mediaSkipped": 0,
|
|
86
85
|
"systemSkipped": 0,
|
|
87
|
-
"
|
|
88
|
-
"
|
|
89
|
-
"
|
|
90
|
-
"
|
|
91
|
-
"
|
|
92
|
-
"
|
|
86
|
+
"delta": { "kind": "first-ingest|delta|empty-delta", "deltaStart": 0, "deltaMessages": 1707 },
|
|
87
|
+
"sessions": 38,
|
|
88
|
+
"chunks": 142,
|
|
89
|
+
"nextEdgesCreated": 141,
|
|
90
|
+
"participantsLinked": 2,
|
|
91
|
+
"dateRange": { "first": "2024-01-15T09:30:00+00:00", "last": "2026-04-30T18:42:00+01:00" },
|
|
92
|
+
"senderHistogram": [{ "name": "Joel", "count": 812 }, { "name": "Adam", "count": 895 }],
|
|
93
|
+
"topicKeywords": ["pricing", "scheduling", "introductions", "..."],
|
|
93
94
|
"ms": 6800
|
|
94
95
|
}
|
|
95
96
|
```
|
|
96
97
|
|
|
97
|
-
Surface to the admin agent as exactly one message (the agent-return shape per Task 871 success criterion 6 — counters first, one sentence pointing at the Phase 2 surface):
|
|
98
|
-
|
|
99
|
-
> Imported `<written>` messages from `<archive>` into conversation `<conversationElementId>` (`<conversationId>`); already existed: `<messagesAlreadyExisted>`; NEXT edges created: `<nextEdgesCreated>`. Use `mcp__memory__whatsapp-export-preview` for any future re-import preview; trigger semantic enrichment via the `whatsapp-import-enrich` skill ("enrich the `<chat-name>` chat") when ready.
|
|
100
|
-
|
|
101
|
-
NO inline enumeration of mention/task/preference/relationship counts. NO multi-paragraph "ask to enrich" prose. The above shape is load-bearing — the brief's `feedback_concision_over_completeness.md` and the Task-871 root incident (one ingest blew the operator's context with the count enumeration) require this discipline.
|
|
102
|
-
|
|
103
|
-
### Re-import signal
|
|
104
|
-
|
|
105
|
-
A second invocation against the same archive should report `messagesAlreadyExisted > 0 AND written > 0` (after Task 870's stable-messageId contract lands; pre-Task 870 the messageId is unstable and re-imports double). The subagent asserts both counters appear non-trivially before claiming a re-import landed cleanly.
|
|
106
|
-
|
|
107
98
|
## Failure path — single FAIL line
|
|
108
99
|
|
|
109
|
-
- **Exit non-zero** + one stderr line: `[whatsapp-
|
|
110
|
-
|
|
111
|
-
|
|
100
|
+
- **Exit non-zero** + one stderr line: `[whatsapp-import] FAIL phase=<argv|parse|classify|delta-cursor-missing|memory-ingest|uncaught> reason="..."`. Surface this verbatim to the operator and yield. **Do not retry.** The archive-ingest-surface-gate denies parser-source edits, JS test runners, and the legacy `whatsapp-export-parse` / `whatsapp-export-insight-write` MCP tools — none of those are escape hatches in your surface.
|
|
101
|
+
- `parser-miss` LOUD-FAIL: an unconfirmed senderName slipped through. Either re-run with the missing :Person elementId added to `--participant-person-ids`, or report a parser bug.
|
|
102
|
+
- `delta-cursor-missing` LOUD-FAIL: the prior `lastIngestedMessageHash` is not present in the re-export. Either the operator deleted prior messages from the archive, or this is a different chat. Investigation required — never re-run blindly.
|
|
112
103
|
|
|
113
104
|
## Idempotency
|
|
114
105
|
|
|
115
|
-
Re-running
|
|
106
|
+
- **Re-running the same export bytes** is a no-op: the cleanup-by-`archiveSha256` step drops THIS export's prior chunks and re-creates them with identical content. `:NEXT` chain length unchanged. Counters: `chunks` non-zero, `delta.kind` either `first-ingest` (if no prior archive) or `delta` (cursor advanced past previous run).
|
|
107
|
+
- **Re-running with appended messages** (a fresh export from the same chat with new messages at the tail): cursor lookup finds the prior `lastIngestedMessageHash`, slices new messages, sessionizes only those, and appends new chunks at the tail of the existing `:NEXT` chain. Pre-existing chunks are never touched (their `archiveSha256` differs from this run's).
|
|
116
108
|
|
|
117
109
|
## Verification (post-write)
|
|
118
110
|
|
|
119
111
|
Run via `mcp__graph__maxy-graph-read_neo4j_cypher`:
|
|
120
112
|
|
|
121
|
-
- `MATCH (
|
|
122
|
-
- `MATCH (
|
|
123
|
-
- `MATCH p=(:
|
|
124
|
-
-
|
|
113
|
+
- `MATCH (a:ConversationArchive { conversationIdentity: $cid }) RETURN elementId(a), a.lastIngestedMessageAt, a.lastIngestedMessageHash` — agrees with the JSON summary.
|
|
114
|
+
- `MATCH (a:ConversationArchive { conversationIdentity: $cid })-[:HAS_SECTION]->(c:Section:Conversation) RETURN count(c)` — equals `chunks`.
|
|
115
|
+
- `MATCH p=(:Section:Conversation)-[:NEXT*]->(:Section:Conversation) WHERE startNode(relationships(p)[0]).archiveSourceFile = $file WITH max(length(p)) AS chain RETURN chain` — equals `chunks - 1` for a fresh-only ingest, or longer for a delta-append (full chain since first ever ingest).
|
|
116
|
+
- `MATCH (p)-[:PARTICIPANT_IN]->(:ConversationArchive { conversationIdentity: $cid }) RETURN count(p)` — equals `participantsLinked` after a first-ingest, or the running total of all confirmed participants ever.
|
|
117
|
+
- Phase 1 wrote ZERO observations: `MATCH (o:Observation)-[:OBSERVED_IN]->(:ConversationArchive { conversationIdentity: $cid }) RETURN count(o)` — should be 0 today (Phase 2 deferred).
|
|
125
118
|
|
|
126
119
|
## What this is not
|
|
127
120
|
|
|
128
121
|
- **Not** the live `whatsapp` plugin. That plugin (Baileys QR pairing) holds messages in an in-memory store cleared on restart. This plugin imports historical exports into Neo4j as persistent graph nodes.
|
|
129
122
|
- **Not** a media-transcription pipeline. Voice notes, photos, PDFs are skipped at parse with a counter logged.
|
|
130
|
-
- **Not**
|
|
131
|
-
- **Not**
|
|
123
|
+
- **Not** an insight-extraction pass. Phase 2 (`:Observation` / `:Task` / `:Preference` / `:MENTIONS` derivation, anchored to chunks) ships in its own task.
|
|
124
|
+
- **Not** automatic. The owner + all-participants confirmation gate is mandatory before any line is written, mirroring `feedback_archives_are_not_documents.md`'s closed-set discipline.
|
|
@@ -0,0 +1,143 @@
|
|
|
1
|
+
# Conversation Archive — graph shape, identity, edges, delta protocol
|
|
2
|
+
|
|
3
|
+
The reference document for the `:ConversationArchive` shape introduced by Task 891. This is the source of truth for what the graph looks like after a `whatsapp-ingest.sh` run — the SKILL.md prescribes the workflow; this file specifies the schema.
|
|
4
|
+
|
|
5
|
+
## Labels
|
|
6
|
+
|
|
7
|
+
| Label | Role | MERGE key | Schema constraint |
|
|
8
|
+
|---|---|---|---|
|
|
9
|
+
| `:ConversationArchive` | Parent node — one per chat | `conversationIdentity` | `FOR (a:ConversationArchive) REQUIRE a.conversationIdentity IS UNIQUE` |
|
|
10
|
+
| `:Section:Conversation` | Topic-bounded chunk of messages | (no MERGE — CREATE only) | inherits `:Section` indices |
|
|
11
|
+
|
|
12
|
+
`:Section:Conversation` is the chat-mode counterpart of document-mode `:Section:Chapter` etc. Same `:Section` base label, same indexing, different secondary label.
|
|
13
|
+
|
|
14
|
+
## Identity formula
|
|
15
|
+
|
|
16
|
+
```
|
|
17
|
+
conversationIdentity = sha256(accountId + ":" + sortedParticipantElementIds.join(","))
|
|
18
|
+
```
|
|
19
|
+
|
|
20
|
+
- Stable across re-exports — same accountId + same operator-confirmed participant set always produces the same identity, regardless of the source file's bytes.
|
|
21
|
+
- DM and group are identical — the difference is array length.
|
|
22
|
+
- Participant order is sorted before joining, so the operator can supply elementIds in any order.
|
|
23
|
+
|
|
24
|
+
## Required indices
|
|
25
|
+
|
|
26
|
+
```
|
|
27
|
+
INDEX :ConversationArchive(accountId)
|
|
28
|
+
INDEX :ConversationArchive(createdBySession)
|
|
29
|
+
```
|
|
30
|
+
|
|
31
|
+
Plus the constraint above (which doubles as a uniqueness index on `conversationIdentity`).
|
|
32
|
+
|
|
33
|
+
## Properties on `:ConversationArchive`
|
|
34
|
+
|
|
35
|
+
| Property | Type | Source | When set |
|
|
36
|
+
|---|---|---|---|
|
|
37
|
+
| `conversationIdentity` | string (sha256-hex) | derived | ON CREATE |
|
|
38
|
+
| `accountId` | string (UUID) | argv | ON CREATE |
|
|
39
|
+
| `scope` | string (`admin` / `public`) | argv | ON CREATE |
|
|
40
|
+
| `summary` | string | classifier (synthetic) | ON CREATE |
|
|
41
|
+
| `keywords` | string[] | classifier (aggregated across sessions) | ON CREATE |
|
|
42
|
+
| `embedding` | float[] | embed(summary) | ON CREATE |
|
|
43
|
+
| `archiveSourceFile` | string (basename) | this export | ON CREATE |
|
|
44
|
+
| `createdAt` | ISO 8601 | this run | ON CREATE |
|
|
45
|
+
| `createdByAgent` | string | constant `"whatsapp-import"` | ON CREATE |
|
|
46
|
+
| `createdBySession` | string (UUID) | env / argv | ON CREATE |
|
|
47
|
+
| `source` | string | constant `"whatsapp"` | ON CREATE |
|
|
48
|
+
| `updatedAt` | ISO 8601 | this run | ON CREATE / ON MATCH |
|
|
49
|
+
| `lastIngestedMessageHash` | string (sha256-hex) | derived from last delta line | ON CREATE / ON MATCH |
|
|
50
|
+
| `lastIngestedMessageAt` | ISO 8601 | last delta line's `dateSent` | ON CREATE / ON MATCH |
|
|
51
|
+
| `lastIngestedBySession` | string (UUID) | this run | ON CREATE / ON MATCH |
|
|
52
|
+
| `lastIngestedArchiveSha256` | string (sha256-hex) | this export's file bytes | ON CREATE / ON MATCH |
|
|
53
|
+
|
|
54
|
+
## Properties on `:Section:Conversation` (each chunk)
|
|
55
|
+
|
|
56
|
+
| Property | Type | Source |
|
|
57
|
+
|---|---|---|
|
|
58
|
+
| `accountId` | string | inherited |
|
|
59
|
+
| `title` | string | classifier |
|
|
60
|
+
| `body` | string | classifier (verbatim turn-attributed text — `[ts] Sender: body\n…`) |
|
|
61
|
+
| `bodyPreview` | string (≤150 chars) | first 150 chars of body |
|
|
62
|
+
| `position` | int | chunk index within this run |
|
|
63
|
+
| `scope` | string | inherited |
|
|
64
|
+
| `embedding` | float[] | embed(body) |
|
|
65
|
+
| `summary` | string | classifier (1–3 sentences) |
|
|
66
|
+
| `keywords` | string[] | classifier |
|
|
67
|
+
| `firstMessageAt` | ISO 8601 | first `[ts]` in chunk |
|
|
68
|
+
| `lastMessageAt` | ISO 8601 | last `[ts]` in chunk |
|
|
69
|
+
| `participantNames` | string[] | distinct senderNames in chunk |
|
|
70
|
+
| `messageCount` | int | message count in chunk |
|
|
71
|
+
| `archiveSha256` | string (sha256-hex) | this export's file bytes (cleanup discriminator) |
|
|
72
|
+
| `archiveSourceFile` | string (basename) | this export |
|
|
73
|
+
| `createdAt` | ISO 8601 | this run |
|
|
74
|
+
| `createdByAgent` / `createdBySession` / `source` | provenance | this run |
|
|
75
|
+
|
|
76
|
+
## Edges
|
|
77
|
+
|
|
78
|
+
| Edge | From | To | Cardinality | When written |
|
|
79
|
+
|---|---|---|---|---|
|
|
80
|
+
| `:HAS_SECTION` | `:ConversationArchive` | `:Section:Conversation` | one per chunk | every run |
|
|
81
|
+
| `:NEXT` | `:Section:Conversation` | `:Section:Conversation` | chunks − 1 (chronological chain across all sessions across all runs) | extending tail per run |
|
|
82
|
+
| `:PARTICIPANT_IN` | `:Person` / `:AdminUser` | `:ConversationArchive` | one per confirmed participant | MERGEd every run (idempotent) |
|
|
83
|
+
|
|
84
|
+
Every edge carries `createdAt`, `createdByAgent`, `createdBySession`, `source`, plus `archiveSha256` for HAS_SECTION + NEXT (so cleanup-by-archiveSha256 catches the right edges).
|
|
85
|
+
|
|
86
|
+
## Delta-append protocol
|
|
87
|
+
|
|
88
|
+
```
|
|
89
|
+
first ingest (or empty graph)
|
|
90
|
+
│
|
|
91
|
+
▼
|
|
92
|
+
parsed_lines ──── all of them ────► sessionize ──► classify ──► memory-ingest
|
|
93
|
+
│
|
|
94
|
+
▼
|
|
95
|
+
:ConversationArchive (NEW)
|
|
96
|
+
└── :HAS_SECTION ──► chunks (NEXT chain length K-1)
|
|
97
|
+
|
|
98
|
+
|
|
99
|
+
re-import (delta)
|
|
100
|
+
│
|
|
101
|
+
▼
|
|
102
|
+
parsed_lines ──┐
|
|
103
|
+
│
|
|
104
|
+
▼
|
|
105
|
+
find cursor where deriveMessageContentHash(line) == archive.lastIngestedMessageHash
|
|
106
|
+
│
|
|
107
|
+
┌──────────┼──────────┬───────────────────┐
|
|
108
|
+
│ │ │ │
|
|
109
|
+
found missing empty (cursor at last line)
|
|
110
|
+
│ │ │
|
|
111
|
+
▼ ▼ ▼
|
|
112
|
+
slice from FAIL noop (exit 0,
|
|
113
|
+
cursor+1 non-zero no writes)
|
|
114
|
+
│
|
|
115
|
+
▼
|
|
116
|
+
delta_lines ──► sessionize ──► classify ──► memory-ingest (parentLabel='ConversationArchive')
|
|
117
|
+
│
|
|
118
|
+
▼
|
|
119
|
+
:ConversationArchive (MERGE on conversationIdentity)
|
|
120
|
+
└── :HAS_SECTION ──► NEW chunks
|
|
121
|
+
── :NEXT extends from prior tail
|
|
122
|
+
prior chunks unchanged
|
|
123
|
+
cursor advances
|
|
124
|
+
```
|
|
125
|
+
|
|
126
|
+
## Why the cursor is content-only (not file-byte-based)
|
|
127
|
+
|
|
128
|
+
`lastIngestedMessageHash = sha256(dateSent + "|" + NFKC-trim-lower(senderName) + "|" + body)`. The hash deliberately excludes the source file's bytes — a fresh re-export of the same chat has different file bytes (different SHA-256 of the file) but the SAME message tuples. Without content-only hashing, every delta-import would `delta-cursor-missing` because the file SHA-256 always changes.
|
|
129
|
+
|
|
130
|
+
## Why DM and group are identical
|
|
131
|
+
|
|
132
|
+
The brief's "DM-only" Task 887 §A0 contract was a workaround for the per-message writer's auto-Person leak. Under the chunked archive shape, the writer never auto-creates anyone — every participant is operator-confirmed up front. The 2-vs-3-vs-N participant case is just the array length on the right of the identity formula. No special-casing.
|
|
133
|
+
|
|
134
|
+
## Provenance discipline (for cleanup correctness)
|
|
135
|
+
|
|
136
|
+
Every node and edge written by this pipeline is stamped with:
|
|
137
|
+
- `source = 'whatsapp'`
|
|
138
|
+
- `createdByAgent = 'whatsapp-import'`
|
|
139
|
+
- `createdBySession = <this run's session UUID>`
|
|
140
|
+
- `archiveSha256 = <this export's file SHA-256>` (chunks + HAS_SECTION + NEXT only — the parent records the LAST archiveSha256 separately)
|
|
141
|
+
- `archiveSourceFile = <this export's basename>` (parent + chunks)
|
|
142
|
+
|
|
143
|
+
The cleanup-by-`archiveSha256` step in memory-ingest's chat path drops only chunks whose `archiveSha256` matches THIS run's. Re-running the same export bytes is a no-op idempotently; re-running with a fresh delta export (different bytes, different SHA-256) leaves prior chunks untouched and appends new ones at the tail.
|
package/payload/platform/plugins/whatsapp-import/skills/whatsapp-import/references/export-parse.md
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
# Reference: `_chat.txt` parsing — implementation reference
|
|
2
2
|
|
|
3
|
-
> **
|
|
3
|
+
> **This is no longer operator instruction.** The agent does NOT walk this grammar in its own LLM turn. Parsing runs deterministically in [`platform/plugins/whatsapp-import/lib/src/parse-export.ts`](../../../lib/src/parse-export.ts), invoked in-process by [`bin/ingest.mjs`](../../../bin/ingest.mjs) (which the operator calls via [`bin/whatsapp-ingest.sh`](../../../bin/whatsapp-ingest.sh) — the single deterministic Bash entry). The legacy MCP wrapper is blocked at the harness gate. The vitest grid in [`lib/src/__tests__/parse-export.test.ts`](../../../lib/src/__tests__/parse-export.test.ts) is the executable contract; this prose is the human-readable companion. Extend the grammar by adding a failing test first.
|
|
4
4
|
|
|
5
5
|
WhatsApp's "Export Chat" produces a UTF-8 text file with a deterministic line grammar. This reference describes what the parser library does when it converts that file into the `{senderName, dateSent, body, sequenceIndex}[]` structure the SKILL.md consumes.
|
|
6
6
|
|
|
@@ -102,7 +102,7 @@ The skill consumes this directly. The `messageId` is computed by the skill (not
|
|
|
102
102
|
The parser throws (and `whatsapp-export-parse` returns `isError: true`) on:
|
|
103
103
|
|
|
104
104
|
- Encoding error at file open (UTF-8 decode fails — the parser uses `TextDecoder` with `fatal: true`, so any invalid byte sequence aborts loudly rather than silently substituting U+FFFD).
|
|
105
|
-
- Empty file or zero parsed lines after walking the file (the file isn't a `_chat.txt`). The thrown error and the `[whatsapp-import] parse-grammar-miss first-line="<sample>"` stderr line both carry a sanitised first-line sample (control chars stripped, truncated to 80 chars) so the operator can recognise the offending header shape without re-running with a debugger
|
|
105
|
+
- Empty file or zero parsed lines after walking the file (the file isn't a `_chat.txt`). The thrown error and the `[whatsapp-import] parse-grammar-miss first-line="<sample>"` stderr line both carry a sanitised first-line sample (control chars stripped, truncated to 80 chars) so the operator can recognise the offending header shape without re-running with a debugger.
|
|
106
106
|
- A timestamp prefix matches but the body parse fails (no `: ` separator after the closing `]` AND no system-pattern match) — emits `parse-error file=<...> line=<n> reason=no-sender-body-separator content="<...>"`.
|
|
107
107
|
- Missing required input (`accountId`, `timezone`).
|
|
108
108
|
|
|
@@ -3,7 +3,7 @@ name: database-operator
|
|
|
3
3
|
description: "Document and archive ingestion and ad-hoc graph operations — running the universal `document-ingest` skill for any unstructured document (PDF, text, transcript, web page, audio, video) and per-source archive-import skills (LinkedIn Basic Data Export today; CRM-type seed archives as each plugin ships), plus operator-driven graph hygiene (prune orphans, deduplicate entities, add edges, normalise labels). Delegate when the operator uploads any document, drops an archive directory into chat, or asks for any graph operation that is not a routine per-turn write."
|
|
4
4
|
summary: "Ingests every unstructured document and external archive into your graph (LinkedIn today; other CRM sources in future) and handles ad-hoc graph tidy-ups on request. For example, when you upload a CV, a pricing guide, or a contract; when you drop a LinkedIn export folder into chat; or when you ask to prune orphan nodes, merge duplicate people, or add edges between entities."
|
|
5
5
|
model: claude-sonnet-4-6
|
|
6
|
-
tools: Read, Bash, Glob, Grep, mcp__graph__maxy-graph-read_neo4j_cypher, mcp__graph__maxy-graph-write_neo4j_cypher, mcp__graph__maxy-graph-get_neo4j_schema, mcp__memory__memory-write, mcp__memory__memory-update, mcp__memory__memory-delete, mcp__memory__memory-search, mcp__memory__memory-rank, mcp__memory__memory-reindex, mcp__memory__memory-find-candidates, mcp__memory__memory-ingest, mcp__memory__memory-ingest-extract, mcp__memory__memory-ingest-web, mcp__memory__memory-classify, mcp__memory__memory-archive-write, mcp__memory__whatsapp-export-preview,
|
|
6
|
+
tools: Read, Bash, Glob, Grep, mcp__graph__maxy-graph-read_neo4j_cypher, mcp__graph__maxy-graph-write_neo4j_cypher, mcp__graph__maxy-graph-get_neo4j_schema, mcp__memory__memory-write, mcp__memory__memory-update, mcp__memory__memory-delete, mcp__memory__memory-search, mcp__memory__memory-rank, mcp__memory__memory-reindex, mcp__memory__memory-find-candidates, mcp__memory__memory-ingest, mcp__memory__memory-ingest-extract, mcp__memory__memory-ingest-web, mcp__memory__memory-classify, mcp__memory__memory-archive-write, mcp__memory__whatsapp-export-preview, mcp__memory__graph-prune-denylist-list, mcp__memory__graph-prune-denylist-add, mcp__memory__graph-prune-denylist-remove, mcp__contacts__contact-create, mcp__contacts__contact-update, mcp__contacts__contact-lookup, mcp__contacts__contact-list, mcp__tasks__task-create, mcp__admin__file-attach, mcp__admin__plugin-read
|
|
7
7
|
---
|
|
8
8
|
|
|
9
9
|
# Database Operator
|
|
@@ -26,15 +26,15 @@ Four rules govern every turn. They are load-bearing — when they conflict with
|
|
|
26
26
|
|
|
27
27
|
A landfill graph defeats EVIDENCE-BASED: search returns noise, the agent re-writes the noise, the noise compounds. Compress on write; filter on read.
|
|
28
28
|
|
|
29
|
-
**LOUD-FAIL.** If a dispatched skill prescribes a tool not present in your live tool surface, or a credential not provided in your tool input, terminate with a structured blocker — never improvise via Bash, never search the filesystem for credentials, never construct a parallel write path. Return: `Skill <name> prescribes <tool/credential>; not available. Cannot proceed. Operator must <remediation>.`
|
|
29
|
+
**LOUD-FAIL.** If a dispatched skill prescribes a tool not present in your live tool surface, or a credential not provided in your tool input, terminate with a structured blocker — never improvise via Bash, never search the filesystem for credentials, never construct a parallel write path. Return: `Skill <name> prescribes <tool/credential>; not available. Cannot proceed. Operator must <remediation>.` Same doctrine as classifier failure and graph-MCP loud-fail elsewhere in the platform. *Failure symptoms:* `cypher-shell` invocation, `find … neo4j` / `grep … NEO4J_PASSWORD` filesystem probes, `curl` against Neo4j HTTP endpoints, any Bash improvisation that recreates the missing tool's effect.
|
|
30
30
|
|
|
31
31
|
The pre-publish gate (`platform/scripts/verify-skill-tool-surface.sh`) statically asserts every shipped skill's prescribed `mcp__*` tokens resolve against your frontmatter `tools:` list, so a missing tool is a build error, not a production discovery. LOUD-FAIL is the runtime backstop when that gate is bypassed (e.g. operator-edited skill).
|
|
32
32
|
|
|
33
|
-
**Archive-ingest surface gate
|
|
33
|
+
**Archive-ingest surface gate.** Each per-source archive importer ships a single deterministic Bash entry under `platform/plugins/<name>/bin/<name>-ingest.sh`. The harness-level gate at `platform/plugins/admin/hooks/archive-ingest-surface-gate.sh` enforces the surface filter that makes the LLM mechanically incapable of deviating mid-ingest:
|
|
34
34
|
|
|
35
|
-
- **Legacy WhatsApp MCP tools blocked unconditionally:** `mcp__memory__whatsapp-export-parse`, `mcp__memory__whatsapp-export-insight-write`, and `mcp__memory__memory-archive-write` when `archiveType=whatsapp-export`. The deterministic Bash entry (`platform/plugins/whatsapp-import/bin/whatsapp-ingest.sh`) is the only supported path; parse,
|
|
35
|
+
- **Legacy WhatsApp MCP tools blocked unconditionally:** `mcp__memory__whatsapp-export-parse`, `mcp__memory__whatsapp-export-insight-write`, `mcp__memory__whatsapp-export-insight-pass` (deleted by Task 891), and `mcp__memory__memory-archive-write` when `archiveType=whatsapp-export`. The deterministic Bash entry (`platform/plugins/whatsapp-import/bin/whatsapp-ingest.sh`) is the only supported path; parse, sessionize, classify (mode='chat'), and memory-ingest (parentLabel='ConversationArchive') all run in-process.
|
|
36
36
|
- **Non-WhatsApp archiveTypes flow unchanged:** `memory-archive-write` with `archiveType=linkedin-connections` (and future per-source archiveTypes) is allowed. Each per-source plugin migrates to its own `bin/<name>-ingest.sh` on its own schedule.
|
|
37
|
-
- **Plugin-source edits blocked
|
|
37
|
+
- **Plugin-source edits blocked:** `Edit`/`Write`/`NotebookEdit` against `platform/plugins/*/lib/*` is denied. The operator does not own plugin source.
|
|
38
38
|
- **JS test runners blocked** (preserved): `vitest` / `bun test` / `npm test` / `npx jest` Bash commands are denied. The operator does not run plugin tests.
|
|
39
39
|
- **Post-parse-error flag** (preserved for non-migrated parsers): when any `mcp__*__*-export-parse` / `mcp__*__*-import-parse` tool returns `isError: true`, every subsequent tool call this turn is blocked until the operator submits a new prompt.
|
|
40
40
|
|
|
@@ -53,7 +53,7 @@ Return to the admin agent:
|
|
|
53
53
|
|
|
54
54
|
Do not return raw CSV rows, raw Cypher bodies, or raw tool-result dumps. Compression is the output discipline.
|
|
55
55
|
|
|
56
|
-
### Four-step operator narrative for document ingestion
|
|
56
|
+
### Four-step operator narrative for document ingestion
|
|
57
57
|
|
|
58
58
|
When the dispatch is a document ingestion (Branch A, the `document-ingest` skill), the operator sees up to four messages — one at each phase. You emit steps 2, 3, and 4 directly into chat at the moment each phase completes; admin emits step 1 before dispatching to you.
|
|
59
59
|
|
|
@@ -65,7 +65,7 @@ When the dispatch is a document ingestion (Branch A, the `document-ingest` skill
|
|
|
65
65
|
|
|
66
66
|
Use the actual numbers from the tool result, not approximations. Don't omit orphan candidates — they're the operator's primary debugging surface.
|
|
67
67
|
|
|
68
|
-
**Step 4 (after `wire-brief-entities` step completes
|
|
68
|
+
**Step 4 (after `wire-brief-entities` step completes).** When the dispatch brief named entities the document should connect to (Persons, Organizations, Services, Tasks, Events, KnowledgeDocuments, BrandingData), execute the brief-driven entity-wiring discipline (see "Brief-driven entity wiring" below) and emit one chat message:
|
|
69
69
|
|
|
70
70
|
> Wired `<N>` brief entities: `<K>` Persons via `<edge>`, `<M>` Organizations via `<edge>`, `<T>` Tasks via `REFERENCES`. `<P>` entities not found in graph: `<comma-separated names>`.
|
|
71
71
|
|
|
@@ -78,7 +78,7 @@ Drop the "not found" clause when every brief entity resolved. Suppress the chat
|
|
|
78
78
|
|
|
79
79
|
This is the operator's narrative — it must be truthful, specific, and complete. Never paraphrase the tool's structured output into a vague "ingested OK" — the verification cypher will catch the mismatch (`[memory-ingest] sections=… typed=… edges=… orphans=…` and `[document-ingest] wire-brief-entities …` log lines must agree with the chat numbers).
|
|
80
80
|
|
|
81
|
-
### Brief-driven entity wiring
|
|
81
|
+
### Brief-driven entity wiring
|
|
82
82
|
|
|
83
83
|
When the admin agent dispatches you with a document and the brief names "key entities to connect" (Persons, Organizations, Services, Tasks, Events, KnowledgeDocuments, BrandingData), those connections are deliverables. The brief is the operator's intent translated into structured input — landing the document as an island anchored to one node while the named Persons/Organizations/Tasks stay disconnected silently degrades the graph into KnowledgeDocuments unreachable from the entities they describe.
|
|
84
84
|
|
|
@@ -91,7 +91,7 @@ When the admin agent dispatches you with a document and the brief names "key ent
|
|
|
91
91
|
|
|
92
92
|
Skip entities the classifier already wired via `documentEdges` (common for emails and contracts where the document body itself names the parties). The classifier output's `edgeBreakdown` enumerates these — compare against your brief list before each `memory-write` to avoid duplicate edges.
|
|
93
93
|
|
|
94
|
-
The brief is the contract; the wiring outcome is in the four-step narrative's step 4. Returning *"meeting notes processed as a KnowledgeDocument anchored to <X>"* without listing wired/unresolved brief entities is a regression of the failure mode that produced this discipline (
|
|
94
|
+
The brief is the contract; the wiring outcome is in the four-step narrative's step 4. Returning *"meeting notes processed as a KnowledgeDocument anchored to <X>"* without listing wired/unresolved brief entities is a regression of the failure mode that produced this discipline (a meeting was once ingested with the anchor only, leaving three named Persons + four named Tasks disconnected until the operator surfaced the gap manually).
|
|
95
95
|
|
|
96
96
|
---
|
|
97
97
|
|
|
@@ -119,15 +119,14 @@ The classifier maps document sections to typed ontology labels. It does not inve
|
|
|
119
119
|
Per-source archive imports keep their own skill because their CSVs already encode entity types deterministically and need no LLM classifier. Currently shipped:
|
|
120
120
|
|
|
121
121
|
- **linkedin-import** — LinkedIn Basic Data Export. Ships with references for `Profile.csv` and `Connections.csv`; additional CSVs land as new references inside the same plugin over time. Path: `platform/plugins/linkedin-import/skills/linkedin-import/SKILL.md`. Load via `plugin-read` before any ingestion.
|
|
122
|
-
- **whatsapp-import** — WhatsApp `_chat.txt` export ingestion. **
|
|
123
|
-
|
|
124
|
-
|
|
125
|
-
|
|
126
|
-
3. **Archive-write** via `bash platform/plugins/whatsapp-import/bin/whatsapp-ingest.sh <archive> --owner-element-id <id> --subject-person-id <id> --scope <admin|public> --filter <chosen>`. Parses, applies the filter, writes Conversation + Messages with chronological NEXT chain. Writer is bound to the `{owner, subject}` pair from the preview histogram — any parsed senderName outside that closed set LOUD-FAILs (Task 887 §A0); the script does NOT auto-create participant `:Person` nodes. ZERO `:Observation` writes — the LLM insight pass moved to Phase 2.
|
|
122
|
+
- **whatsapp-import** — WhatsApp `_chat.txt` export ingestion. **Single-phase chunked-archive contract** (Task 891 — supersedes the prior two-phase load+enrich flow):
|
|
123
|
+
1. **Preview** via `mcp__memory__whatsapp-export-preview` — read-only parse that returns `{conversationSha256, parsed, mediaSkipped, systemSkipped, dateRange, senders:[{name,messageCount}], totalMessages, archiveBytes}`. No Cypher writes.
|
|
124
|
+
2. **Operator confirms owner + every distinct sender.** Iterate the preview's sender histogram one question at a time: for each sender, ask the operator to pick an existing `:AdminUser`/`:Person` elementId or block. Auto-creating participants is forbidden. Identify the owner from the resolved set, then echo back the `{owner, participants...}` pair for explicit yes/no confirmation.
|
|
125
|
+
3. **Archive-write** via `bash platform/plugins/whatsapp-import/bin/whatsapp-ingest.sh <archive> --owner-element-id <id> --participant-person-ids <id1>,<id2>,... --scope <admin|public>`. Parses, sessionizes the parsed messages at gap-hours boundaries (default 12h), classifies each session via Haiku (`memory-classify` with `mode='chat'`) into topic-bounded `:Section:Conversation` chunks, and writes them under a parent `:ConversationArchive` MERGEd on `conversationIdentity = sha256(accountId + ":" + sortedParticipantElementIds)`. Re-imports are delta-append: prior chunks never touched; only messages after `lastIngestedMessageHash` flow through the pipeline. Writer is bound to the operator-confirmed sender set — any parsed senderName outside that closed set LOUD-FAILs `parser-miss reason="..."`.
|
|
127
126
|
|
|
128
|
-
|
|
129
|
-
|
|
130
|
-
|
|
127
|
+
Surface to the operator as three chat messages built from the JSON summary on stdout: (a) parse summary with sender histogram + date range; (b) classify summary with chunk count + topic keywords; (c) write summary with `:ConversationArchive` elementId + chunk count + NEXT-chain length + participants linked. Empty-delta re-imports collapse to step (a) plus a noop line. The legacy `mcp__memory__whatsapp-export-parse` / `whatsapp-export-insight-write` / `whatsapp-export-insight-pass` / `memory-archive-write{archiveType:whatsapp-export}` MCP tools are blocked at the harness; the Bash script is the only supported invocation surface. SKILL: `platform/plugins/whatsapp-import/skills/whatsapp-import/SKILL.md`. Phase 2 insight derivation (`:Observation` / `:Task` / `:Preference` / `:MENTIONS` against chunks) is deferred to a separate follow-up task with its own skill.
|
|
128
|
+
|
|
129
|
+
Distinct from the live `whatsapp` plugin (Baileys QR pairing, in-memory store).
|
|
131
130
|
|
|
132
131
|
Future CRM-type seed plugins (HubSpot, Salesforce, Pipedrive, iCloud contacts, Gmail CSV, etc.) will ship under the same pattern — each as its own opt-in plugin, each with its own `SKILL.md` path under `platform/plugins/<name>/skills/`. When the admin adds a new archive-import skill, its PLUGIN.md will name itself here and in the admin's `<plugin-manifest>`. No prompt change required.
|
|
133
132
|
|
|
@@ -155,7 +154,7 @@ Two rules govern every raw Cypher write you author. They require LLM judgement
|
|
|
155
154
|
|
|
156
155
|
**2. Every edge type is in the live ontology.** Inventing types fragments retrieval — `KNOWS` ≠ `knows` ≠ `HAS_KNOWN`. Call `mcp__graph__maxy-graph-get_neo4j_schema` before authoring any write whose edge type you are not certain about; if no fitting type exists, stop and ask the admin agent for ontology guidance — never coin a synonym. *Why:* edge typology compounds over time. A synonym today blocks every future query that expected the canonical type, and the only fix is a label-rewrite Cypher pass that touches the same edge from both sides.
|
|
157
156
|
|
|
158
|
-
**Structural enforcement
|
|
157
|
+
**Structural enforcement.** The shim auto-stamps `createdAt`, `createdByAgent`, `createdByTool`, `createdBySession` on every `CREATE`/`MERGE` alias before forwarding to Neo4j — you do not write these properties yourself. The shim runs the cypher inside a managed `executeWrite` and self-audits for unattached nodes before committing; if any node you created has zero edges in the same transaction, the entire transaction rolls back and you receive a structured error naming the orphan label(s). Treat the rollback as a hard failure (do not retry the same cypher); your job is to author atomic CREATE/MERGE-with-edge statements per Rule 1, not to write defensive WITH/MATCH/RETURN audits or hand-written SET clauses for `createdBy*` fields. The `[graph-cypher-write]` audit lines (`auto-stamp applied`, `accepted`, `orphan-rollback`, `orphan-warning`, `missing-provenance-warning`, `unknown-type-warning`) name what the structural enforcement saw — they are observation surfaces, not duties.
|
|
159
158
|
|
|
160
159
|
The two rules together replace the LOUD-FAIL improvisation pattern that prior versions of this prompt prescribed when a wrapped writer lacked an edge-between-existing-nodes path. You no longer loud-fail on missing graph-write tools — you have them. You loud-fail on credentials, on out-of-surface tools (a skill prescribing a non-graph MCP token you do not hold), and on dispatched skills whose prerequisites are unmet — exactly as the LOUD-FAIL prerogative names.
|
|
161
160
|
|