@rubytech/create-realagent 1.0.826 → 1.0.829
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/package.json +1 -1
- package/payload/platform/neo4j/schema.cypher +35 -2
- package/payload/platform/package.json +2 -2
- package/payload/platform/plugins/admin/hooks/__tests__/archive-ingest-surface-gate.test.sh +39 -54
- package/payload/platform/plugins/admin/hooks/archive-ingest-surface-gate.sh +26 -52
- package/payload/platform/plugins/admin/skills/onboarding/SKILL.md +7 -7
- package/payload/platform/plugins/docs/references/cloudflare.md +1 -1
- package/payload/platform/plugins/docs/references/plugins-guide.md +1 -1
- package/payload/platform/plugins/docs/references/troubleshooting.md +1 -0
- package/payload/platform/plugins/memory/PLUGIN.md +5 -5
- package/payload/platform/plugins/memory/mcp/dist/index.js +18 -253
- package/payload/platform/plugins/memory/mcp/dist/index.js.map +1 -1
- package/payload/platform/plugins/memory/mcp/dist/lib/__tests__/llm-classifier.test.js +51 -0
- package/payload/platform/plugins/memory/mcp/dist/lib/__tests__/llm-classifier.test.js.map +1 -1
- package/payload/platform/plugins/memory/mcp/dist/lib/__tests__/schema-validator.test.js +103 -0
- package/payload/platform/plugins/memory/mcp/dist/lib/__tests__/schema-validator.test.js.map +1 -1
- package/payload/platform/plugins/memory/mcp/dist/lib/llm-classifier.d.ts +19 -4
- package/payload/platform/plugins/memory/mcp/dist/lib/llm-classifier.d.ts.map +1 -1
- package/payload/platform/plugins/memory/mcp/dist/lib/llm-classifier.js +149 -56
- package/payload/platform/plugins/memory/mcp/dist/lib/llm-classifier.js.map +1 -1
- package/payload/platform/plugins/memory/mcp/dist/lib/schema-validator.d.ts +16 -1
- package/payload/platform/plugins/memory/mcp/dist/lib/schema-validator.d.ts.map +1 -1
- package/payload/platform/plugins/memory/mcp/dist/lib/schema-validator.js +12 -3
- package/payload/platform/plugins/memory/mcp/dist/lib/schema-validator.js.map +1 -1
- package/payload/platform/plugins/memory/mcp/dist/tools/__tests__/memory-archive-write.test.js +2 -138
- package/payload/platform/plugins/memory/mcp/dist/tools/__tests__/memory-archive-write.test.js.map +1 -1
- package/payload/platform/plugins/memory/mcp/dist/tools/__tests__/memory-ingest.test.d.ts +2 -0
- package/payload/platform/plugins/memory/mcp/dist/tools/__tests__/memory-ingest.test.d.ts.map +1 -0
- package/payload/platform/plugins/memory/mcp/dist/tools/__tests__/memory-ingest.test.js +66 -0
- package/payload/platform/plugins/memory/mcp/dist/tools/__tests__/memory-ingest.test.js.map +1 -0
- package/payload/platform/plugins/memory/mcp/dist/tools/__tests__/profile-update-personfields-open.test.d.ts +2 -0
- package/payload/platform/plugins/memory/mcp/dist/tools/__tests__/profile-update-personfields-open.test.d.ts.map +1 -0
- package/payload/platform/plugins/memory/mcp/dist/tools/__tests__/profile-update-personfields-open.test.js +148 -0
- package/payload/platform/plugins/memory/mcp/dist/tools/__tests__/profile-update-personfields-open.test.js.map +1 -0
- package/payload/platform/plugins/memory/mcp/dist/tools/memory-archive-write.d.ts +1 -64
- package/payload/platform/plugins/memory/mcp/dist/tools/memory-archive-write.d.ts.map +1 -1
- package/payload/platform/plugins/memory/mcp/dist/tools/memory-archive-write.js +6 -336
- package/payload/platform/plugins/memory/mcp/dist/tools/memory-archive-write.js.map +1 -1
- package/payload/platform/plugins/memory/mcp/dist/tools/memory-ingest.d.ts +30 -0
- package/payload/platform/plugins/memory/mcp/dist/tools/memory-ingest.d.ts.map +1 -1
- package/payload/platform/plugins/memory/mcp/dist/tools/memory-ingest.js +231 -0
- package/payload/platform/plugins/memory/mcp/dist/tools/memory-ingest.js.map +1 -1
- package/payload/platform/plugins/memory/mcp/dist/tools/profile-update.d.ts +21 -17
- package/payload/platform/plugins/memory/mcp/dist/tools/profile-update.d.ts.map +1 -1
- package/payload/platform/plugins/memory/mcp/dist/tools/profile-update.js +77 -37
- package/payload/platform/plugins/memory/mcp/dist/tools/profile-update.js.map +1 -1
- package/payload/platform/plugins/memory/references/schema-base.md +7 -2
- package/payload/platform/plugins/memory/skills/document-ingest/SKILL.md +54 -4
- package/payload/platform/plugins/whatsapp/PLUGIN.md +1 -1
- package/payload/platform/plugins/whatsapp-import/lib/dist/delta-cursor.d.ts +18 -0
- package/payload/platform/plugins/whatsapp-import/lib/dist/delta-cursor.d.ts.map +1 -0
- package/payload/platform/plugins/whatsapp-import/lib/dist/delta-cursor.js +31 -0
- package/payload/platform/plugins/whatsapp-import/lib/dist/delta-cursor.js.map +1 -0
- package/payload/platform/plugins/whatsapp-import/lib/dist/derive-keys.d.ts +27 -12
- package/payload/platform/plugins/whatsapp-import/lib/dist/derive-keys.d.ts.map +1 -1
- package/payload/platform/plugins/whatsapp-import/lib/dist/derive-keys.js +40 -20
- package/payload/platform/plugins/whatsapp-import/lib/dist/derive-keys.js.map +1 -1
- package/payload/platform/plugins/whatsapp-import/lib/dist/index.d.ts +7 -4
- package/payload/platform/plugins/whatsapp-import/lib/dist/index.d.ts.map +1 -1
- package/payload/platform/plugins/whatsapp-import/lib/dist/index.js +9 -6
- package/payload/platform/plugins/whatsapp-import/lib/dist/index.js.map +1 -1
- package/payload/platform/plugins/whatsapp-import/lib/dist/sessionize.d.ts +25 -0
- package/payload/platform/plugins/whatsapp-import/lib/dist/sessionize.d.ts.map +1 -0
- package/payload/platform/plugins/whatsapp-import/lib/dist/sessionize.js +48 -0
- package/payload/platform/plugins/whatsapp-import/lib/dist/sessionize.js.map +1 -0
- package/payload/platform/plugins/whatsapp-import/lib/dist/to-classifier-input.d.ts +3 -0
- package/payload/platform/plugins/whatsapp-import/lib/dist/to-classifier-input.d.ts.map +1 -0
- package/payload/platform/plugins/whatsapp-import/lib/dist/to-classifier-input.js +47 -0
- package/payload/platform/plugins/whatsapp-import/lib/dist/to-classifier-input.js.map +1 -0
- package/payload/platform/scripts/seed-neo4j.sh +15 -14
- package/payload/platform/templates/specialists/agents/database-operator.md +10 -17
- package/payload/server/chunk-CUSH3UXP.js +2305 -0
- package/payload/server/chunk-IWNDVGKT.js +10077 -0
- package/payload/server/chunk-KC7NUABI.js +654 -0
- package/payload/server/chunk-T2OPNP3L.js +654 -0
- package/payload/server/chunk-WUVXPZIV.js +1116 -0
- package/payload/server/client-pool-3TM3SRIA.js +32 -0
- package/payload/server/cloudflare-task-tracker-4NIODMGL.js +19 -0
- package/payload/server/cloudflare-task-tracker-CR6TL4VL.js +19 -0
- package/payload/server/maxy-edge.js +3 -3
- package/payload/server/neo4j-migrations-XTQ4WEV6.js +428 -0
- package/payload/server/public/assets/{admin-DOkUspG1.js → admin-BNwPsMhJ.js} +2 -2
- package/payload/server/public/assets/{graph-LLMJa4Ch.js → graph-N_Bw-8oT.js} +1 -1
- package/payload/server/public/assets/{page-DoaF3DB0.js → page-BKLGP-th.js} +1 -1
- package/payload/server/public/graph.html +2 -2
- package/payload/server/public/index.html +2 -2
- package/payload/server/server.js +281 -168
- package/payload/platform/plugins/whatsapp-import/PLUGIN.md +0 -46
- package/payload/platform/plugins/whatsapp-import/bin/ingest.mjs +0 -670
- package/payload/platform/plugins/whatsapp-import/bin/whatsapp-ingest.sh +0 -131
- package/payload/platform/plugins/whatsapp-import/lib/src/__tests__/filter-gate.test.ts +0 -172
- package/payload/platform/plugins/whatsapp-import/lib/src/__tests__/ingest-idempotence.test.ts +0 -141
- package/payload/platform/plugins/whatsapp-import/lib/src/__tests__/parse-export-lrm.test.ts +0 -83
- package/payload/platform/plugins/whatsapp-import/lib/src/__tests__/parse-export.test.ts +0 -678
- package/payload/platform/plugins/whatsapp-import/lib/src/derive-keys.ts +0 -59
- package/payload/platform/plugins/whatsapp-import/lib/src/filter.ts +0 -136
- package/payload/platform/plugins/whatsapp-import/lib/src/index.ts +0 -19
- package/payload/platform/plugins/whatsapp-import/lib/src/parse-export.ts +0 -471
- package/payload/platform/plugins/whatsapp-import/lib/tsconfig.json +0 -9
- package/payload/platform/plugins/whatsapp-import/lib/vitest.config.ts +0 -9
- package/payload/platform/plugins/whatsapp-import/skills/whatsapp-import/SKILL.md +0 -131
- package/payload/platform/plugins/whatsapp-import/skills/whatsapp-import/references/export-parse.md +0 -109
- package/payload/platform/plugins/whatsapp-import/skills/whatsapp-import-enrich/SKILL.md +0 -333
|
@@ -37,13 +37,16 @@ When loading this reference, confirm which schema files were consulted by noting
|
|
|
37
37
|
| CloudflareTunnel | `CloudflareTunnel` | platform-native | — | `accountId`, `tunnelId`, `tunnelName` |
|
|
38
38
|
| CloudflareHostname | `CloudflareHostname` | platform-native | — | `accountId`, `hostnameValue`, `tunnelId` |
|
|
39
39
|
| Position | `Position` | platform-native (analogue of `schema:OrganizationRole`) | — | `accountId`, `title`, `startDate` |
|
|
40
|
-
| WhatsApp Conversation | `WhatsAppConversation` | extends `schema:Conversation` | — | `accountId`, `conversationId`, `archiveSourceFile`, `firstMessageAt`, `lastMessageAt`, `participantCount`, `messageCount`, `scope`, `createdByAgent`, `createdBySession`, `createdAt` |
|
|
41
|
-
| WhatsApp Message | `WhatsAppMessage` | extends `schema:Message` | — | `accountId`, `conversationId`, `messageId`, `dateSent`, `body`, `senderName`, `sequenceIndex`, `scope`, `createdByAgent`, `createdBySession`, `createdAt` |
|
|
40
|
+
| WhatsApp Conversation (legacy) | `WhatsAppConversation` | extends `schema:Conversation` | — | `accountId`, `conversationId`, `archiveSourceFile`, `firstMessageAt`, `lastMessageAt`, `participantCount`, `messageCount`, `scope`, `createdByAgent`, `createdBySession`, `createdAt` |
|
|
41
|
+
| WhatsApp Message (legacy) | `WhatsAppMessage` | extends `schema:Message` | — | `accountId`, `conversationId`, `messageId`, `dateSent`, `body`, `senderName`, `sequenceIndex`, `scope`, `createdByAgent`, `createdBySession`, `createdAt` |
|
|
42
|
+
| ConversationArchive (Task 891) | `ConversationArchive` | platform-native (chunked WhatsApp / messaging archive parent) | — | `accountId`, `conversationIdentity`, `archiveSourceFile`, `summary`, `keywords`, `lastIngestedMessageHash`, `lastIngestedMessageAt`, `lastIngestedArchiveSha256`, `scope`, `createdByAgent`, `createdBySession`, `createdAt` |
|
|
42
43
|
|
|
43
44
|
**Branding properties on LocalBusiness:** `primaryColor`, `accentColor`, `backgroundColor`, `tagline` — optional, used to brand the public chat endpoint. Written via `memory-update` on the LocalBusiness node. Hex color values must match `#[0-9a-fA-F]{3,8}`. Logo and icon are linked via `HAS_BRAND_ASSET → ImageObject` with `purpose: "logo"` or `"icon"`.
|
|
44
45
|
|
|
45
46
|
**Personal-profile role on Person (Task 704):** the optional `role` property on a `Person` node carries the value `"admin-personal"` when the node is the operator's personal-profile bootstrap (onboarding step 9 personal mode). The graph-write gate accepts `Person {role: "admin-personal"}` in lieu of a `LocalBusiness` so personal-mode users can complete onboarding without registering a business. Do not set `role: "admin-personal"` on customer or staff Person nodes — it's the personal-profile discriminator and only the bootstrap node should carry it.
|
|
46
47
|
|
|
48
|
+
**Personal-profile Person is open by default (Task 893).** Any Person property the agent judges useful for serving the operator is permitted on the personal-profile Person — identity, contact, context, anything that makes future assistance more useful. The set is comprehensive, not enumerated. The only constraints are the Forbidden Properties table (Task 849; e.g. `Person.name` rejects in favour of `givenName` + `familyName`) and the Schema.org synonym table (e.g. `phone` rejects in favour of `telephone`). The schema validator is the central enforcement; callers do not pre-rewrite or pre-filter. The `profile-update` tool's `personFields` parameter is the conversational surface that writes here, validating in `mode: "update"` so SET-only writes do not re-run the required-property check (givenName/familyName were established at PIN setup).
|
|
49
|
+
|
|
47
50
|
**CreativeWork** covers long-form textual records written by the platform: session summaries (`session-compact`), workflow digests (e.g. public chat review), and skill definitions saved by `skill-builder`. The `title` is a short identifier; the `abstract` carries the full content that drives the embedding.
|
|
48
51
|
|
|
49
52
|
**DefinedTerm** covers reusable concepts, learnings, and glossary entries — e.g. project retrospective learnings (`name` is a short handle, `description` is the full learning). Additional properties like `category` are encouraged but not required.
|
|
@@ -165,6 +168,8 @@ The closed enumeration:
|
|
|
165
168
|
|
|
166
169
|
`Preface`, `Abstract` (alias for Summary / ExecutiveSummary), `Introduction`, `TableOfContents`, `Chapter`, `Conclusion`, `Appendix`, `Bibliography` (alias for References), `Glossary`, `Acknowledgments` — each becomes `:Section:<Kind>` linked to the document via `(:KnowledgeDocument)-[:HAS_SECTION]->` and chained to siblings via `(:Section)-[:NEXT]->(:Section)` in reading order.
|
|
167
170
|
|
|
171
|
+
`Conversation` (Task 891) — chat-mode chunks emitted by `memory-classify` with `mode='chat'`. Becomes `:Section:Conversation` linked via `(:ConversationArchive)-[:HAS_SECTION]->` (a different parent than KnowledgeDocument). Properties beyond the base section shape: `summary`, `keywords`, `firstMessageAt`, `lastMessageAt`, `participantNames`, `messageCount`, `archiveSha256`. The `:Section` validator rules (embedding, body, position) apply uniformly.
|
|
172
|
+
|
|
168
173
|
**Contract-clause kinds (HAS_SECTION + NEXT, plus special-case extras for two kinds):**
|
|
169
174
|
|
|
170
175
|
`Parties`, `Recitals`, `Definitions`, `Scope`, `Term`, `Payment`, `Confidentiality`, `IntellectualProperty`, `Warranties`, `Indemnification`, `Liability`, `Termination`, `GoverningLaw`, `ForceMajeure`, `Notices`, `EntireAgreement`, `Amendment`, `Assignment`, `Severability`, `Signatures`.
|
|
@@ -1,11 +1,27 @@
|
|
|
1
1
|
---
|
|
2
2
|
name: document-ingest
|
|
3
|
-
description: Universal document ingestion — maps any unstructured document (PDF, text, transcript, web page) to ontologically-grounded
|
|
3
|
+
description: Universal document ingestion — maps any unstructured document (PDF, text, transcript, web page) OR chat archive (WhatsApp `_chat.txt`) to ontologically-grounded graph nodes via Haiku-driven classification. Triggers when the operator uploads or fetches a document or chat export for ingestion. One skill for every input shape — no per-doctype, no per-channel branching.
|
|
4
4
|
---
|
|
5
5
|
|
|
6
6
|
# Document Ingest
|
|
7
7
|
|
|
8
|
-
Ingests any unstructured
|
|
8
|
+
Ingests any unstructured input — documents (PDF, text, transcript, web page) and chat archives (WhatsApp `_chat.txt`) — into the graph. Every classified section becomes one `:Section` node. **Two parent shapes, one pipeline:**
|
|
9
|
+
|
|
10
|
+
| Input shape | Parent label | Section secondary label | Identity property | mode |
|
|
11
|
+
|---|---|---|---|---|
|
|
12
|
+
| PDF / text / web (default) | `:KnowledgeDocument` | `:Section:<Kind>` from closed enumeration (`Position`, `Chapter`, `Parties`, …) | `attachmentId` | `document` |
|
|
13
|
+
| Chat archive (`_chat.txt`) | `:ConversationArchive` | `:Section:Conversation` | `conversationIdentity` | `chat` |
|
|
14
|
+
|
|
15
|
+
The classifier in `memory-classify` decides which section kinds each section maps to (document mode) or chunks the archive into topic-bounded `:Section:Conversation` nodes (chat mode). The skill orchestrates the pipeline; the classifier reads the loaded ontology; the writer enforces the validator. **Classifier failure is terminal — the ingest aborts entirely; nothing is written. Loud failures, never silent landfill.**
|
|
16
|
+
|
|
17
|
+
## Routing — chat vs document (mandatory first decision)
|
|
18
|
+
|
|
19
|
+
Before anchor confirmation, decide the parent shape from the input:
|
|
20
|
+
|
|
21
|
+
- **Chat archive** — input filename ends in `_chat.txt`, the dispatch brief names the input as a WhatsApp chat / messaging-channel export, or the operator labels it as such. Set `mode='chat'` and `parentLabel='ConversationArchive'`. Skip anchor confirmation; run participant confirmation instead (see § Participant confirmation). The classifier produces `:Section:Conversation` chunks; no anchor edges, no related entities.
|
|
22
|
+
- **Document** — everything else. Set `mode='document'` and `parentLabel='KnowledgeDocument'` (or omit — these are the defaults). Run the anchor confirmation flow below.
|
|
23
|
+
|
|
24
|
+
Both branches go through the same three tools (`memory-ingest-extract` → `memory-classify` → `memory-ingest`); only the parameters differ.
|
|
9
25
|
|
|
10
26
|
## Anchor confirmation (mandatory first step)
|
|
11
27
|
|
|
@@ -27,6 +43,26 @@ The confirmation flow:
|
|
|
27
43
|
3. Run a one-shot graph read to resolve the anchor's element ID. For UserProfile: `MATCH (u:UserProfile {accountId: $accountId}) RETURN elementId(u) AS anchorId, 'UserProfile' AS anchorLabel`. For LocalBusiness: `MATCH (b:LocalBusiness {accountId: $accountId}) RETURN elementId(b) AS anchorId, 'LocalBusiness' AS anchorLabel`. For a third party: search by name via `memory-search` and pick the matching node.
|
|
28
44
|
4. Persist `$anchorNodeId` and `$anchorLabel` for the rest of the run. These flow into both `memory-classify` (as part of the `anchorDescription`) and `memory-ingest` (as the `anchorNodeId` + `anchorLabel` parameters).
|
|
29
45
|
|
|
46
|
+
## Participant confirmation (chat mode only)
|
|
47
|
+
|
|
48
|
+
Chat archives are multi-party — no single subject anchor. Instead, every distinct sender name in the archive must resolve to an existing `:AdminUser` or `:Person` elementId before any classify or ingest call. No auto-creation; missing participants are blockers, not silent skips.
|
|
49
|
+
|
|
50
|
+
The confirmation flow:
|
|
51
|
+
|
|
52
|
+
1. Read the dispatch brief. Extract the archive path and any operator-stated participant identities.
|
|
53
|
+
2. Read a small sample of the archive (head ~50 lines) via the `Read` tool to discover the distinct sender names that appear at line starts after the bracketed-timestamp prefix.
|
|
54
|
+
3. For every distinct senderName, search the graph via `memory-search` (or a one-shot `MATCH (n) WHERE (n:Person OR n:AdminUser) AND n.accountId = $accountId AND (n.name = $name OR (n.givenName + ' ' + n.familyName) = $name) RETURN elementId(n)`).
|
|
55
|
+
4. **Resolved fully** — every senderName mapped to exactly one elementId. Capture the owner's elementId (the operator who exported the archive — usually the `:AdminUser` for this account; ask if ambiguous) and the comma-separated list of remaining participant elementIds.
|
|
56
|
+
5. **Unresolved** — at least one senderName has no matching node. Surface to the operator: *"Archive `<filename>` mentions sender `<name>` but no `:Person` / `:AdminUser` matches. Create the contact first or correct the name, then re-dispatch."* Do NOT proceed.
|
|
57
|
+
6. **Ambiguous** — a senderName matches multiple nodes. Ask the operator which one. Do NOT proceed until disambiguated.
|
|
58
|
+
|
|
59
|
+
Persist `$ownerElementId` and `$participantElementIds` (array, owner excluded) for the run. These flow into `memory-ingest` as the `participantElementIds` parameter (owner + others, deduped).
|
|
60
|
+
|
|
61
|
+
Compute archive metadata before classify:
|
|
62
|
+
- `archiveSha256` — `bash sha256sum "<file>" | cut -d' ' -f1`. Stamped on the parent + every chunk.
|
|
63
|
+
- `archiveSourceFile` — the basename (e.g. `_chat.txt`).
|
|
64
|
+
- `conversationIdentity` — pass as the `attachmentId` parameter to `memory-ingest`. Format: `chat:<sha256(accountId + ":" + sortedParticipantElementIds)>` where `sortedParticipantElementIds` is the sorted-then-comma-joined list of `[owner, ...participants]`. Same conversation across re-exports → same identity → idempotent MERGE on the `:ConversationArchive`.
|
|
65
|
+
|
|
30
66
|
## Pipeline
|
|
31
67
|
|
|
32
68
|
Four steps in order. Steps 1–3 are deterministic tool calls (the agent does not classify; the agent calls the classifier tool). Step 4 is agent-driven graph writes against the existing graph, gated by the dispatch brief's named entity list. Hallucination defence and ontology validation stay server-side in `memory-classify` and the `memory-write` validator.
|
|
@@ -68,7 +104,13 @@ Form: "Classifying `<filename>` (`<N>` chars) — expect ~`<estimate>`."
|
|
|
68
104
|
|
|
69
105
|
### 2. `memory-classify`
|
|
70
106
|
|
|
71
|
-
Calls Haiku with the loaded ontology and the cached text.
|
|
107
|
+
Calls Haiku with the loaded ontology and the cached text.
|
|
108
|
+
|
|
109
|
+
**Document mode (default).** Inputs: `attachmentId` (same one), `anchorDescription` (a short sentence built from the confirmed anchor — e.g. `"subject = UserProfile (the account owner); edges from UserProfile."` or `"subject = LocalBusiness {name: 'Acme Roofing'} (the operator's business); edges from LocalBusiness."`).
|
|
110
|
+
|
|
111
|
+
**Chat mode.** Inputs: `attachmentId` (same one), `mode='chat'`, `anchorDescription` (a short sentence naming the conversation — e.g. `"WhatsApp conversation between Joel and Adam (2 participants)"`). The chat prompt drops the natural-edge map, the closed enumeration, and the orphan logic — Haiku produces topic-bounded `:Section:Conversation` chunks with `summary`, `keywords`, `firstMessageAt`, `lastMessageAt`, `participantNames`, `messageCount` per chunk. The whole archive may produce one chunk (short conversation) or many (long chat with topic transitions); chunks cover every message in chronological order with no gaps.
|
|
112
|
+
|
|
113
|
+
Returns:
|
|
72
114
|
|
|
73
115
|
- `documentSummary` — 1-3 sentences for the KnowledgeDocument node
|
|
74
116
|
- `documentKeywords` — 3-10 lowercase topic keywords
|
|
@@ -83,7 +125,13 @@ After step 2 succeeds, emit a chat message before step 3 naming what the classif
|
|
|
83
125
|
|
|
84
126
|
### 3. `memory-ingest`
|
|
85
127
|
|
|
86
|
-
Writes the classified document
|
|
128
|
+
Writes the classified document or chat archive.
|
|
129
|
+
|
|
130
|
+
**Document mode (default).** Inputs: `attachmentId`, `documentSummary`, `anchorNodeId`, `anchorLabel`, `sections`, `documentEdges` (pass through if present), `orphanCandidates` (pass through if present), `scope` (from the brief — confirm with the operator if absent), optional `documentKeywords`, `userKeywords`, `sourceUrl`, `sourceType`.
|
|
131
|
+
|
|
132
|
+
**Chat mode.** Inputs: `attachmentId` (set to `conversationIdentity`), `parentLabel='ConversationArchive'`, `documentSummary`, `sections` (the chunks from chat-mode classify), `scope`, plus the chat-archive metadata: `archiveSha256` (cleanup discriminator), `archiveSourceFile` (audit), `participantElementIds` (owner + others, for `:PARTICIPANT_IN` edges). Pass `anchorNodeId` and `anchorLabel` as any non-empty placeholder (e.g. the owner's elementId + `'AdminUser'`) — they are unused on the chat path but the parameter is non-optional. The writer MERGEs `:ConversationArchive { conversationIdentity }`, drops any chunks stamped with this `archiveSha256` (idempotent re-ingest), CREATEs new chunks chained by `:NEXT`, and MERGEs `:PARTICIPANT_IN` edges from each participant.
|
|
133
|
+
|
|
134
|
+
Returns:
|
|
87
135
|
|
|
88
136
|
- `documentNodeId`, `sectionCount`
|
|
89
137
|
- `kindBreakdown` — per-kind count, e.g. `{"Position": 4, "Chapter": 12, "Other": 1}`
|
|
@@ -99,6 +147,8 @@ Re-ingesting the same `attachmentId` is safe — the writer drops prior `:Sectio
|
|
|
99
147
|
|
|
100
148
|
### 4. `wire-brief-entities`
|
|
101
149
|
|
|
150
|
+
**Skipped in chat mode** — `:ConversationArchive` does not carry KD-level brief-wired edges; participants are already attached via `:PARTICIPANT_IN` and message bodies stay verbatim inside chunk text (mention extraction is deferred to a separate insight-derivation task). Document mode only.
|
|
151
|
+
|
|
102
152
|
After `memory-ingest` returns the new KnowledgeDocument's `documentNodeId`, this step iterates the entities the dispatch brief named and connects each to the new document with the natural KD-level edge.
|
|
103
153
|
|
|
104
154
|
**Entity sources.** The dispatch brief's "key entities to connect" list. Brief shape: prose names of Persons, Organizations, Services, Tasks, Events, KnowledgeDocuments, BrandingData that the document describes or references. Example: *"Person nodes for Joel Smalley, Adam Mackay, Dan McLeod; LocalBusiness / Organization nodes for Real Agent / Real Agency; Any existing Task nodes related to Real Agent Lettings."* Extract every named entity from the brief before any `memory-write`.
|
|
@@ -51,7 +51,7 @@ When per-group activation is `mention`, the agent fires only if the inbound mess
|
|
|
51
51
|
|
|
52
52
|
## Live persistence
|
|
53
53
|
|
|
54
|
-
Every `messages.upsert` event (both `notify` and `append`, both `fromMe` directions) writes a `:Message:WhatsAppMessage` row to Neo4j attached to the sessionKey-keyed `:Conversation`. A single capture site at `platform/ui/app/lib/whatsapp/manager.ts` covers inbound, outbound (Baileys echoes agent-sent messages back through `messages.upsert` with `fromMe=true`), and owner-mirror — without touching `outbound/send.ts`. `messageId` namespace is `whatsapp-live:<waName>:<remoteJid>:<msg.key.id>` where `<waName>` is the Baileys credential dirname (e.g. `default`);
|
|
54
|
+
Every `messages.upsert` event (both `notify` and `append`, both `fromMe` directions) writes a `:Message:WhatsAppMessage` row to Neo4j attached to the sessionKey-keyed `:Conversation`. A single capture site at `platform/ui/app/lib/whatsapp/manager.ts` covers inbound, outbound (Baileys echoes agent-sent messages back through `messages.upsert` with `fromMe=true`), and owner-mirror — without touching `outbound/send.ts`. `messageId` namespace is `whatsapp-live:<waName>:<remoteJid>:<msg.key.id>` where `<waName>` is the Baileys credential dirname (e.g. `default`). The live plugin writes `:Message:WhatsAppMessage` rows; offline `_chat.txt` archives are document-shaped narrative and route through `document-ingest` with `parentLabel='ConversationArchive'` (unified-ingest pipeline migration) producing `:Section:Conversation` chunks — entity types differ, no namespace collision. Persist failures are loud (`[whatsapp-persist] FAIL …`) and never block dispatch — silent loss is the worse failure mode.
|
|
55
55
|
|
|
56
56
|
**`accountId` contract.** `n.accountId` on every `:Conversation`, `:Person`, and `:Message:WhatsAppMessage` row stamped by this plugin is the **platform-side UUID** resolved by [`resolvePlatformAccountId()`](../../ui/app/lib/whatsapp/platform-account-id.ts) from `data/accounts/<uuid>/account.json` — NOT the Baileys credential dirname (which is only used as the `messageId`/`sessionKey` namespace token). The boot-time line `[whatsapp-persist] resolved-account-id waname=<dir> uuid=<uuid>` records the resolution. Doctrine: see `.docs/neo4j.md` "Account isolation invariant" — migration 004 `pruneAlienAccounts` `DETACH DELETE`s any node whose `accountId` is not a UUID dir on every boot. The helper loud-throws on zero or multi accounts (Phase 0 single-account invariant), aborting the WhatsApp connection start before any write can occur.
|
|
57
57
|
|
|
@@ -0,0 +1,18 @@
|
|
|
1
|
+
import type { ParsedLine } from "./parse-export.js";
|
|
2
|
+
export type CursorResult = {
|
|
3
|
+
kind: "found";
|
|
4
|
+
deltaStart: number;
|
|
5
|
+
} | {
|
|
6
|
+
kind: "empty";
|
|
7
|
+
} | {
|
|
8
|
+
kind: "missing";
|
|
9
|
+
};
|
|
10
|
+
/**
|
|
11
|
+
* Walk parsed lines forward and return the first index whose content hash
|
|
12
|
+
* matches `lastIngestedMessageHash`. The first match is correct because
|
|
13
|
+
* messages with identical (dateSent, normalisedSenderName, body) tuples
|
|
14
|
+
* are genuine duplicates — there is no way to disambiguate them and slicing
|
|
15
|
+
* after the first occurrence is the chronologically safe choice.
|
|
16
|
+
*/
|
|
17
|
+
export declare function findDeltaCursor(parsedLines: readonly ParsedLine[], lastIngestedMessageHash: string): CursorResult;
|
|
18
|
+
//# sourceMappingURL=delta-cursor.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"delta-cursor.d.ts","sourceRoot":"","sources":["../src/delta-cursor.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,UAAU,EAAE,MAAM,mBAAmB,CAAC;AAqBpD,MAAM,MAAM,YAAY,GACpB;IAAE,IAAI,EAAE,OAAO,CAAC;IAAC,UAAU,EAAE,MAAM,CAAA;CAAE,GACrC;IAAE,IAAI,EAAE,OAAO,CAAA;CAAE,GACjB;IAAE,IAAI,EAAE,SAAS,CAAA;CAAE,CAAC;AAExB;;;;;;GAMG;AACH,wBAAgB,eAAe,CAC7B,WAAW,EAAE,SAAS,UAAU,EAAE,EAClC,uBAAuB,EAAE,MAAM,GAC9B,YAAY,CAiBd"}
|
|
@@ -0,0 +1,31 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
|
3
|
+
exports.findDeltaCursor = findDeltaCursor;
|
|
4
|
+
const derive_keys_js_1 = require("./derive-keys.js");
|
|
5
|
+
/**
|
|
6
|
+
* Walk parsed lines forward and return the first index whose content hash
|
|
7
|
+
* matches `lastIngestedMessageHash`. The first match is correct because
|
|
8
|
+
* messages with identical (dateSent, normalisedSenderName, body) tuples
|
|
9
|
+
* are genuine duplicates — there is no way to disambiguate them and slicing
|
|
10
|
+
* after the first occurrence is the chronologically safe choice.
|
|
11
|
+
*/
|
|
12
|
+
function findDeltaCursor(parsedLines, lastIngestedMessageHash) {
|
|
13
|
+
if (!lastIngestedMessageHash || !lastIngestedMessageHash.trim()) {
|
|
14
|
+
throw new Error("findDeltaCursor: lastIngestedMessageHash must be non-empty");
|
|
15
|
+
}
|
|
16
|
+
for (let i = 0; i < parsedLines.length; i++) {
|
|
17
|
+
const line = parsedLines[i];
|
|
18
|
+
const hash = (0, derive_keys_js_1.deriveMessageContentHash)({
|
|
19
|
+
dateSent: line.dateSent,
|
|
20
|
+
senderName: line.senderName,
|
|
21
|
+
body: line.body,
|
|
22
|
+
});
|
|
23
|
+
if (hash === lastIngestedMessageHash) {
|
|
24
|
+
if (i === parsedLines.length - 1)
|
|
25
|
+
return { kind: "empty" };
|
|
26
|
+
return { kind: "found", deltaStart: i + 1 };
|
|
27
|
+
}
|
|
28
|
+
}
|
|
29
|
+
return { kind: "missing" };
|
|
30
|
+
}
|
|
31
|
+
//# sourceMappingURL=delta-cursor.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"delta-cursor.js","sourceRoot":"","sources":["../src/delta-cursor.ts"],"names":[],"mappings":";;AAiCA,0CAoBC;AApDD,qDAA4D;AAyB5D;;;;;;GAMG;AACH,SAAgB,eAAe,CAC7B,WAAkC,EAClC,uBAA+B;IAE/B,IAAI,CAAC,uBAAuB,IAAI,CAAC,uBAAuB,CAAC,IAAI,EAAE,EAAE,CAAC;QAChE,MAAM,IAAI,KAAK,CAAC,4DAA4D,CAAC,CAAC;IAChF,CAAC;IACD,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,WAAW,CAAC,MAAM,EAAE,CAAC,EAAE,EAAE,CAAC;QAC5C,MAAM,IAAI,GAAG,WAAW,CAAC,CAAC,CAAC,CAAC;QAC5B,MAAM,IAAI,GAAG,IAAA,yCAAwB,EAAC;YACpC,QAAQ,EAAE,IAAI,CAAC,QAAQ;YACvB,UAAU,EAAE,IAAI,CAAC,UAAU;YAC3B,IAAI,EAAE,IAAI,CAAC,IAAI;SAChB,CAAC,CAAC;QACH,IAAI,IAAI,KAAK,uBAAuB,EAAE,CAAC;YACrC,IAAI,CAAC,KAAK,WAAW,CAAC,MAAM,GAAG,CAAC;gBAAE,OAAO,EAAE,IAAI,EAAE,OAAO,EAAE,CAAC;YAC3D,OAAO,EAAE,IAAI,EAAE,OAAO,EAAE,UAAU,EAAE,CAAC,GAAG,CAAC,EAAE,CAAC;QAC9C,CAAC;IACH,CAAC;IACD,OAAO,EAAE,IAAI,EAAE,SAAS,EAAE,CAAC;AAC7B,CAAC"}
|
|
@@ -1,21 +1,36 @@
|
|
|
1
1
|
export declare function normaliseSenderName(name: string): string;
|
|
2
2
|
export declare function sha256Hex(input: string): string;
|
|
3
|
-
export interface
|
|
4
|
-
|
|
5
|
-
|
|
3
|
+
export interface DeriveConversationIdentityInput {
|
|
4
|
+
accountId: string;
|
|
5
|
+
/**
|
|
6
|
+
* Element IDs of every confirmed participant (owner + others). Order is
|
|
7
|
+
* not significant; the function sorts internally so the same set always
|
|
8
|
+
* produces the same identity.
|
|
9
|
+
*/
|
|
10
|
+
participantElementIds: readonly string[];
|
|
11
|
+
}
|
|
12
|
+
/**
|
|
13
|
+
* Compute the stable identity for a conversation. Same accountId + same
|
|
14
|
+
* participant set ⇒ same identity, regardless of message content or export
|
|
15
|
+
* file bytes. DM and group chats use this identical formula.
|
|
16
|
+
*/
|
|
17
|
+
export declare function deriveConversationIdentity(input: DeriveConversationIdentityInput): string;
|
|
18
|
+
export interface DeriveMessageContentHashInput {
|
|
6
19
|
/** ISO 8601 with timezone offset, as emitted by parseExport. */
|
|
7
20
|
dateSent: string;
|
|
8
21
|
/** Raw senderName from the export line. Normalised internally. */
|
|
9
22
|
senderName: string;
|
|
10
|
-
/** Raw message body.
|
|
23
|
+
/** Raw message body. */
|
|
11
24
|
body: string;
|
|
12
25
|
}
|
|
13
|
-
|
|
14
|
-
|
|
15
|
-
|
|
16
|
-
|
|
17
|
-
|
|
18
|
-
|
|
19
|
-
|
|
20
|
-
export
|
|
26
|
+
/**
|
|
27
|
+
* Compute a content-only hash for a single message. Used as the delta-append
|
|
28
|
+
* cursor: `:ConversationArchive.lastIngestedMessageHash` records the hash of
|
|
29
|
+
* the last ingested message; on re-import, the orchestrator finds the line
|
|
30
|
+
* with the matching hash and slices everything after it.
|
|
31
|
+
*
|
|
32
|
+
* Excludes archive sha256 deliberately — the cursor must survive a fresh
|
|
33
|
+
* re-export of the same chat (different file bytes, same message tuples).
|
|
34
|
+
*/
|
|
35
|
+
export declare function deriveMessageContentHash(input: DeriveMessageContentHashInput): string;
|
|
21
36
|
//# sourceMappingURL=derive-keys.d.ts.map
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"derive-keys.d.ts","sourceRoot":"","sources":["../src/derive-keys.ts"],"names":[],"mappings":"
|
|
1
|
+
{"version":3,"file":"derive-keys.d.ts","sourceRoot":"","sources":["../src/derive-keys.ts"],"names":[],"mappings":"AAsBA,wBAAgB,mBAAmB,CAAC,IAAI,EAAE,MAAM,GAAG,MAAM,CAExD;AAED,wBAAgB,SAAS,CAAC,KAAK,EAAE,MAAM,GAAG,MAAM,CAE/C;AAED,MAAM,WAAW,+BAA+B;IAC9C,SAAS,EAAE,MAAM,CAAC;IAClB;;;;OAIG;IACH,qBAAqB,EAAE,SAAS,MAAM,EAAE,CAAC;CAC1C;AAED;;;;GAIG;AACH,wBAAgB,0BAA0B,CACxC,KAAK,EAAE,+BAA+B,GACrC,MAAM,CASR;AAED,MAAM,WAAW,6BAA6B;IAC5C,gEAAgE;IAChE,QAAQ,EAAE,MAAM,CAAC;IACjB,kEAAkE;IAClE,UAAU,EAAE,MAAM,CAAC;IACnB,wBAAwB;IACxB,IAAI,EAAE,MAAM,CAAC;CACd;AAED;;;;;;;;GAQG;AACH,wBAAgB,wBAAwB,CACtC,KAAK,EAAE,6BAA6B,GACnC,MAAM,CAGR"}
|
|
@@ -2,26 +2,27 @@
|
|
|
2
2
|
Object.defineProperty(exports, "__esModule", { value: true });
|
|
3
3
|
exports.normaliseSenderName = normaliseSenderName;
|
|
4
4
|
exports.sha256Hex = sha256Hex;
|
|
5
|
-
exports.
|
|
6
|
-
exports.
|
|
5
|
+
exports.deriveConversationIdentity = deriveConversationIdentity;
|
|
6
|
+
exports.deriveMessageContentHash = deriveMessageContentHash;
|
|
7
7
|
const node_crypto_1 = require("node:crypto");
|
|
8
8
|
// ---------------------------------------------------------------------------
|
|
9
|
-
// derive-keys — natural-key derivation for whatsapp-import (Task
|
|
9
|
+
// derive-keys — natural-key derivation for whatsapp-import (Task 891,
|
|
10
|
+
// supersedes Task 870's per-message contract).
|
|
10
11
|
//
|
|
11
12
|
// Pure functions. No I/O. The whole point is that re-imports of the same
|
|
12
|
-
// archive collapse to the same
|
|
13
|
-
//
|
|
13
|
+
// archive collapse to the same identity regardless of release-level drift in
|
|
14
|
+
// chunk indices, hash widths, or arbitrary tiebreakers.
|
|
14
15
|
//
|
|
15
|
-
//
|
|
16
|
+
// Identity contracts (Task 891 brief):
|
|
16
17
|
//
|
|
17
|
-
//
|
|
18
|
-
//
|
|
19
|
-
// :<sha256-hex(body)>
|
|
18
|
+
// conversationIdentity = sha256(accountId + ":" + sortedParticipantElementIds.join(","))
|
|
19
|
+
// messageContentHash = sha256(dateSent + "|" + NFKC-trim-lower(senderName) + "|" + body)
|
|
20
20
|
//
|
|
21
|
-
//
|
|
22
|
-
//
|
|
23
|
-
//
|
|
24
|
-
//
|
|
21
|
+
// `conversationIdentity` is stable across re-exports — same operator + same
|
|
22
|
+
// participant set → same identity, regardless of file bytes. DM and group
|
|
23
|
+
// follow the same formula; the difference is the participant array length.
|
|
24
|
+
// `messageContentHash` is content-only (no archive sha256, no chunk index)
|
|
25
|
+
// so cursor lookup survives a fresh re-export of the same chat.
|
|
25
26
|
// ---------------------------------------------------------------------------
|
|
26
27
|
function normaliseSenderName(name) {
|
|
27
28
|
return name.normalize("NFKC").trim().toLowerCase();
|
|
@@ -29,13 +30,32 @@ function normaliseSenderName(name) {
|
|
|
29
30
|
function sha256Hex(input) {
|
|
30
31
|
return (0, node_crypto_1.createHash)("sha256").update(input).digest("hex");
|
|
31
32
|
}
|
|
32
|
-
|
|
33
|
-
|
|
34
|
-
|
|
35
|
-
|
|
33
|
+
/**
|
|
34
|
+
* Compute the stable identity for a conversation. Same accountId + same
|
|
35
|
+
* participant set ⇒ same identity, regardless of message content or export
|
|
36
|
+
* file bytes. DM and group chats use this identical formula.
|
|
37
|
+
*/
|
|
38
|
+
function deriveConversationIdentity(input) {
|
|
39
|
+
if (!input.accountId || !input.accountId.trim()) {
|
|
40
|
+
throw new Error("deriveConversationIdentity: accountId is required");
|
|
41
|
+
}
|
|
42
|
+
if (input.participantElementIds.length === 0) {
|
|
43
|
+
throw new Error("deriveConversationIdentity: participantElementIds must be non-empty");
|
|
44
|
+
}
|
|
45
|
+
const sorted = [...input.participantElementIds].sort();
|
|
46
|
+
return sha256Hex(`${input.accountId}:${sorted.join(",")}`);
|
|
36
47
|
}
|
|
37
|
-
|
|
38
|
-
|
|
39
|
-
|
|
48
|
+
/**
|
|
49
|
+
* Compute a content-only hash for a single message. Used as the delta-append
|
|
50
|
+
* cursor: `:ConversationArchive.lastIngestedMessageHash` records the hash of
|
|
51
|
+
* the last ingested message; on re-import, the orchestrator finds the line
|
|
52
|
+
* with the matching hash and slices everything after it.
|
|
53
|
+
*
|
|
54
|
+
* Excludes archive sha256 deliberately — the cursor must survive a fresh
|
|
55
|
+
* re-export of the same chat (different file bytes, same message tuples).
|
|
56
|
+
*/
|
|
57
|
+
function deriveMessageContentHash(input) {
|
|
58
|
+
const norm = normaliseSenderName(input.senderName);
|
|
59
|
+
return sha256Hex(`${input.dateSent}|${norm}|${input.body}`);
|
|
40
60
|
}
|
|
41
61
|
//# sourceMappingURL=derive-keys.js.map
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"derive-keys.js","sourceRoot":"","sources":["../src/derive-keys.ts"],"names":[],"mappings":";;
|
|
1
|
+
{"version":3,"file":"derive-keys.js","sourceRoot":"","sources":["../src/derive-keys.ts"],"names":[],"mappings":";;AAsBA,kDAEC;AAED,8BAEC;AAiBD,gEAWC;AAoBD,4DAKC;AAjFD,6CAAyC;AAEzC,8EAA8E;AAC9E,sEAAsE;AACtE,+CAA+C;AAC/C,EAAE;AACF,yEAAyE;AACzE,6EAA6E;AAC7E,wDAAwD;AACxD,EAAE;AACF,uCAAuC;AACvC,EAAE;AACF,6FAA6F;AAC7F,+FAA+F;AAC/F,EAAE;AACF,4EAA4E;AAC5E,0EAA0E;AAC1E,2EAA2E;AAC3E,2EAA2E;AAC3E,gEAAgE;AAChE,8EAA8E;AAE9E,SAAgB,mBAAmB,CAAC,IAAY;IAC9C,OAAO,IAAI,CAAC,SAAS,CAAC,MAAM,CAAC,CAAC,IAAI,EAAE,CAAC,WAAW,EAAE,CAAC;AACrD,CAAC;AAED,SAAgB,SAAS,CAAC,KAAa;IACrC,OAAO,IAAA,wBAAU,EAAC,QAAQ,CAAC,CAAC,MAAM,CAAC,KAAK,CAAC,CAAC,MAAM,CAAC,KAAK,CAAC,CAAC;AAC1D,CAAC;AAYD;;;;GAIG;AACH,SAAgB,0BAA0B,CACxC,KAAsC;IAEtC,IAAI,CAAC,KAAK,CAAC,SAAS,IAAI,CAAC,KAAK,CAAC,SAAS,CAAC,IAAI,EAAE,EAAE,CAAC;QAChD,MAAM,IAAI,KAAK,CAAC,mDAAmD,CAAC,CAAC;IACvE,CAAC;IACD,IAAI,KAAK,CAAC,qBAAqB,CAAC,MAAM,KAAK,CAAC,EAAE,CAAC;QAC7C,MAAM,IAAI,KAAK,CAAC,qEAAqE,CAAC,CAAC;IACzF,CAAC;IACD,MAAM,MAAM,GAAG,CAAC,GAAG,KAAK,CAAC,qBAAqB,CAAC,CAAC,IAAI,EAAE,CAAC;IACvD,OAAO,SAAS,CAAC,GAAG,KAAK,CAAC,SAAS,IAAI,MAAM,CAAC,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,CAAC;AAC7D,CAAC;AAWD;;;;;;;;GAQG;AACH,SAAgB,wBAAwB,CACtC,KAAoC;IAEpC,MAAM,IAAI,GAAG,mBAAmB,CAAC,KAAK,CAAC,UAAU,CAAC,CAAC;IACnD,OAAO,SAAS,CAAC,GAAG,KAAK,CAAC,QAAQ,IAAI,IAAI,IAAI,KAAK,CAAC,IAAI,EAAE,CAAC,CAAC;AAC9D,CAAC"}
|
|
@@ -1,7 +1,10 @@
|
|
|
1
1
|
export { parseExport } from "./parse-export.js";
|
|
2
2
|
export type { ParseExportInput, ParseExportResult, ParseExportCounters, ParsedLine, } from "./parse-export.js";
|
|
3
|
-
export {
|
|
4
|
-
export type {
|
|
5
|
-
export {
|
|
6
|
-
export type {
|
|
3
|
+
export { normaliseSenderName, sha256Hex, deriveConversationIdentity, deriveMessageContentHash, } from "./derive-keys.js";
|
|
4
|
+
export type { DeriveConversationIdentityInput, DeriveMessageContentHashInput, } from "./derive-keys.js";
|
|
5
|
+
export { sessionize } from "./sessionize.js";
|
|
6
|
+
export type { Session } from "./sessionize.js";
|
|
7
|
+
export { toClassifierInput } from "./to-classifier-input.js";
|
|
8
|
+
export { findDeltaCursor } from "./delta-cursor.js";
|
|
9
|
+
export type { CursorResult } from "./delta-cursor.js";
|
|
7
10
|
//# sourceMappingURL=index.d.ts.map
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../src/index.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,WAAW,EAAE,MAAM,mBAAmB,CAAC;AAChD,YAAY,EACV,gBAAgB,EAChB,iBAAiB,EACjB,mBAAmB,EACnB,UAAU,GACX,MAAM,mBAAmB,CAAC;AAC3B,OAAO,
|
|
1
|
+
{"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../src/index.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,WAAW,EAAE,MAAM,mBAAmB,CAAC;AAChD,YAAY,EACV,gBAAgB,EAChB,iBAAiB,EACjB,mBAAmB,EACnB,UAAU,GACX,MAAM,mBAAmB,CAAC;AAC3B,OAAO,EACL,mBAAmB,EACnB,SAAS,EACT,0BAA0B,EAC1B,wBAAwB,GACzB,MAAM,kBAAkB,CAAC;AAC1B,YAAY,EACV,+BAA+B,EAC/B,6BAA6B,GAC9B,MAAM,kBAAkB,CAAC;AAC1B,OAAO,EAAE,UAAU,EAAE,MAAM,iBAAiB,CAAC;AAC7C,YAAY,EAAE,OAAO,EAAE,MAAM,iBAAiB,CAAC;AAC/C,OAAO,EAAE,iBAAiB,EAAE,MAAM,0BAA0B,CAAC;AAC7D,OAAO,EAAE,eAAe,EAAE,MAAM,mBAAmB,CAAC;AACpD,YAAY,EAAE,YAAY,EAAE,MAAM,mBAAmB,CAAC"}
|
|
@@ -1,14 +1,17 @@
|
|
|
1
1
|
"use strict";
|
|
2
2
|
Object.defineProperty(exports, "__esModule", { value: true });
|
|
3
|
-
exports.
|
|
3
|
+
exports.findDeltaCursor = exports.toClassifierInput = exports.sessionize = exports.deriveMessageContentHash = exports.deriveConversationIdentity = exports.sha256Hex = exports.normaliseSenderName = exports.parseExport = void 0;
|
|
4
4
|
var parse_export_js_1 = require("./parse-export.js");
|
|
5
5
|
Object.defineProperty(exports, "parseExport", { enumerable: true, get: function () { return parse_export_js_1.parseExport; } });
|
|
6
|
-
var filter_js_1 = require("./filter.js");
|
|
7
|
-
Object.defineProperty(exports, "parseFilterArg", { enumerable: true, get: function () { return filter_js_1.parseFilterArg; } });
|
|
8
|
-
Object.defineProperty(exports, "applyFilter", { enumerable: true, get: function () { return filter_js_1.applyFilter; } });
|
|
9
6
|
var derive_keys_js_1 = require("./derive-keys.js");
|
|
10
7
|
Object.defineProperty(exports, "normaliseSenderName", { enumerable: true, get: function () { return derive_keys_js_1.normaliseSenderName; } });
|
|
11
8
|
Object.defineProperty(exports, "sha256Hex", { enumerable: true, get: function () { return derive_keys_js_1.sha256Hex; } });
|
|
12
|
-
Object.defineProperty(exports, "
|
|
13
|
-
Object.defineProperty(exports, "
|
|
9
|
+
Object.defineProperty(exports, "deriveConversationIdentity", { enumerable: true, get: function () { return derive_keys_js_1.deriveConversationIdentity; } });
|
|
10
|
+
Object.defineProperty(exports, "deriveMessageContentHash", { enumerable: true, get: function () { return derive_keys_js_1.deriveMessageContentHash; } });
|
|
11
|
+
var sessionize_js_1 = require("./sessionize.js");
|
|
12
|
+
Object.defineProperty(exports, "sessionize", { enumerable: true, get: function () { return sessionize_js_1.sessionize; } });
|
|
13
|
+
var to_classifier_input_js_1 = require("./to-classifier-input.js");
|
|
14
|
+
Object.defineProperty(exports, "toClassifierInput", { enumerable: true, get: function () { return to_classifier_input_js_1.toClassifierInput; } });
|
|
15
|
+
var delta_cursor_js_1 = require("./delta-cursor.js");
|
|
16
|
+
Object.defineProperty(exports, "findDeltaCursor", { enumerable: true, get: function () { return delta_cursor_js_1.findDeltaCursor; } });
|
|
14
17
|
//# sourceMappingURL=index.js.map
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"index.js","sourceRoot":"","sources":["../src/index.ts"],"names":[],"mappings":";;;AAAA,qDAAgD;AAAvC,8GAAA,WAAW,OAAA;AAOpB,
|
|
1
|
+
{"version":3,"file":"index.js","sourceRoot":"","sources":["../src/index.ts"],"names":[],"mappings":";;;AAAA,qDAAgD;AAAvC,8GAAA,WAAW,OAAA;AAOpB,mDAK0B;AAJxB,qHAAA,mBAAmB,OAAA;AACnB,2GAAA,SAAS,OAAA;AACT,4HAAA,0BAA0B,OAAA;AAC1B,0HAAA,wBAAwB,OAAA;AAM1B,iDAA6C;AAApC,2GAAA,UAAU,OAAA;AAEnB,mEAA6D;AAApD,2HAAA,iBAAiB,OAAA;AAC1B,qDAAoD;AAA3C,kHAAA,eAAe,OAAA"}
|
|
@@ -0,0 +1,25 @@
|
|
|
1
|
+
import type { ParsedLine } from "./parse-export.js";
|
|
2
|
+
export interface Session {
|
|
3
|
+
/** 0-based index across the archive's sessions. */
|
|
4
|
+
index: number;
|
|
5
|
+
/** ISO 8601 timestamp of the first message in the session. */
|
|
6
|
+
firstMessageAt: string;
|
|
7
|
+
/** ISO 8601 timestamp of the last message in the session. */
|
|
8
|
+
lastMessageAt: string;
|
|
9
|
+
/** Messages in the session, chronological. */
|
|
10
|
+
messages: ParsedLine[];
|
|
11
|
+
}
|
|
12
|
+
/**
|
|
13
|
+
* Split parsed messages into sessions on gaps ≥ `gapHours`. Input must be
|
|
14
|
+
* pre-sorted by `dateSent` (parse-export emits in file order, which IS
|
|
15
|
+
* chronological for any well-formed `_chat.txt`).
|
|
16
|
+
*
|
|
17
|
+
* Boundary semantics (exact-at-threshold):
|
|
18
|
+
* gap == gapHours → cut here (start a new session)
|
|
19
|
+
* gap < gapHours → same session
|
|
20
|
+
* gap > gapHours → cut here
|
|
21
|
+
*
|
|
22
|
+
* Empty input returns []; single-message input returns one one-message session.
|
|
23
|
+
*/
|
|
24
|
+
export declare function sessionize(messages: readonly ParsedLine[], gapHours: number): Session[];
|
|
25
|
+
//# sourceMappingURL=sessionize.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"sessionize.d.ts","sourceRoot":"","sources":["../src/sessionize.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,UAAU,EAAE,MAAM,mBAAmB,CAAC;AAsBpD,MAAM,WAAW,OAAO;IACtB,mDAAmD;IACnD,KAAK,EAAE,MAAM,CAAC;IACd,8DAA8D;IAC9D,cAAc,EAAE,MAAM,CAAC;IACvB,6DAA6D;IAC7D,aAAa,EAAE,MAAM,CAAC;IACtB,8CAA8C;IAC9C,QAAQ,EAAE,UAAU,EAAE,CAAC;CACxB;AAED;;;;;;;;;;;GAWG;AACH,wBAAgB,UAAU,CACxB,QAAQ,EAAE,SAAS,UAAU,EAAE,EAC/B,QAAQ,EAAE,MAAM,GACf,OAAO,EAAE,CAgCX"}
|
|
@@ -0,0 +1,48 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
|
3
|
+
exports.sessionize = sessionize;
|
|
4
|
+
/**
|
|
5
|
+
* Split parsed messages into sessions on gaps ≥ `gapHours`. Input must be
|
|
6
|
+
* pre-sorted by `dateSent` (parse-export emits in file order, which IS
|
|
7
|
+
* chronological for any well-formed `_chat.txt`).
|
|
8
|
+
*
|
|
9
|
+
* Boundary semantics (exact-at-threshold):
|
|
10
|
+
* gap == gapHours → cut here (start a new session)
|
|
11
|
+
* gap < gapHours → same session
|
|
12
|
+
* gap > gapHours → cut here
|
|
13
|
+
*
|
|
14
|
+
* Empty input returns []; single-message input returns one one-message session.
|
|
15
|
+
*/
|
|
16
|
+
function sessionize(messages, gapHours) {
|
|
17
|
+
if (gapHours <= 0) {
|
|
18
|
+
throw new Error(`sessionize: gapHours must be positive, got ${gapHours}`);
|
|
19
|
+
}
|
|
20
|
+
if (messages.length === 0)
|
|
21
|
+
return [];
|
|
22
|
+
const gapMs = gapHours * 60 * 60 * 1000;
|
|
23
|
+
const sessions = [];
|
|
24
|
+
let currentMessages = [messages[0]];
|
|
25
|
+
const flush = () => {
|
|
26
|
+
sessions.push({
|
|
27
|
+
index: sessions.length,
|
|
28
|
+
firstMessageAt: currentMessages[0].dateSent,
|
|
29
|
+
lastMessageAt: currentMessages[currentMessages.length - 1].dateSent,
|
|
30
|
+
messages: currentMessages,
|
|
31
|
+
});
|
|
32
|
+
};
|
|
33
|
+
for (let i = 1; i < messages.length; i++) {
|
|
34
|
+
const prevMs = Date.parse(messages[i - 1].dateSent);
|
|
35
|
+
const currMs = Date.parse(messages[i].dateSent);
|
|
36
|
+
const gap = currMs - prevMs;
|
|
37
|
+
if (gap >= gapMs) {
|
|
38
|
+
flush();
|
|
39
|
+
currentMessages = [messages[i]];
|
|
40
|
+
}
|
|
41
|
+
else {
|
|
42
|
+
currentMessages.push(messages[i]);
|
|
43
|
+
}
|
|
44
|
+
}
|
|
45
|
+
flush();
|
|
46
|
+
return sessions;
|
|
47
|
+
}
|
|
48
|
+
//# sourceMappingURL=sessionize.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"sessionize.js","sourceRoot":"","sources":["../src/sessionize.ts"],"names":[],"mappings":";;AA6CA,gCAmCC;AA/CD;;;;;;;;;;;GAWG;AACH,SAAgB,UAAU,CACxB,QAA+B,EAC/B,QAAgB;IAEhB,IAAI,QAAQ,IAAI,CAAC,EAAE,CAAC;QAClB,MAAM,IAAI,KAAK,CAAC,8CAA8C,QAAQ,EAAE,CAAC,CAAC;IAC5E,CAAC;IACD,IAAI,QAAQ,CAAC,MAAM,KAAK,CAAC;QAAE,OAAO,EAAE,CAAC;IAErC,MAAM,KAAK,GAAG,QAAQ,GAAG,EAAE,GAAG,EAAE,GAAG,IAAI,CAAC;IACxC,MAAM,QAAQ,GAAc,EAAE,CAAC;IAC/B,IAAI,eAAe,GAAiB,CAAC,QAAQ,CAAC,CAAC,CAAC,CAAC,CAAC;IAElD,MAAM,KAAK,GAAG,GAAG,EAAE;QACjB,QAAQ,CAAC,IAAI,CAAC;YACZ,KAAK,EAAE,QAAQ,CAAC,MAAM;YACtB,cAAc,EAAE,eAAe,CAAC,CAAC,CAAC,CAAC,QAAQ;YAC3C,aAAa,EAAE,eAAe,CAAC,eAAe,CAAC,MAAM,GAAG,CAAC,CAAC,CAAC,QAAQ;YACnE,QAAQ,EAAE,eAAe;SAC1B,CAAC,CAAC;IACL,CAAC,CAAC;IAEF,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,QAAQ,CAAC,MAAM,EAAE,CAAC,EAAE,EAAE,CAAC;QACzC,MAAM,MAAM,GAAG,IAAI,CAAC,KAAK,CAAC,QAAQ,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC,QAAQ,CAAC,CAAC;QACpD,MAAM,MAAM,GAAG,IAAI,CAAC,KAAK,CAAC,QAAQ,CAAC,CAAC,CAAC,CAAC,QAAQ,CAAC,CAAC;QAChD,MAAM,GAAG,GAAG,MAAM,GAAG,MAAM,CAAC;QAC5B,IAAI,GAAG,IAAI,KAAK,EAAE,CAAC;YACjB,KAAK,EAAE,CAAC;YACR,eAAe,GAAG,CAAC,QAAQ,CAAC,CAAC,CAAC,CAAC,CAAC;QAClC,CAAC;aAAM,CAAC;YACN,eAAe,CAAC,IAAI,CAAC,QAAQ,CAAC,CAAC,CAAC,CAAC,CAAC;QACpC,CAAC;IACH,CAAC;IACD,KAAK,EAAE,CAAC;IACR,OAAO,QAAQ,CAAC;AAClB,CAAC"}
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"to-classifier-input.d.ts","sourceRoot":"","sources":["../src/to-classifier-input.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,OAAO,EAAE,MAAM,iBAAiB,CAAC;AAwB/C,wBAAgB,iBAAiB,CAAC,OAAO,EAAE,OAAO,GAAG,MAAM,CAM1D"}
|
|
@@ -0,0 +1,47 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
|
3
|
+
exports.toClassifierInput = toClassifierInput;
|
|
4
|
+
// ---------------------------------------------------------------------------
|
|
5
|
+
// to-classifier-input — Pass 2 input formatter (Task 891).
|
|
6
|
+
//
|
|
7
|
+
// Pure function. Renders one Session as a turn-attributed text block ready
|
|
8
|
+
// to hand to memory-classify (mode='chat'). Format:
|
|
9
|
+
//
|
|
10
|
+
// [YYYY-MM-DD HH:MM:SS TZ] <Sender>: <body>
|
|
11
|
+
// [YYYY-MM-DD HH:MM:SS TZ] <Sender>: <body>
|
|
12
|
+
// ...
|
|
13
|
+
//
|
|
14
|
+
// Multi-line message bodies are kept verbatim (with their internal newlines).
|
|
15
|
+
// The leading `[ts] <Sender>: ` prefix is the only structural addition; the
|
|
16
|
+
// classifier prompt instructs Haiku to preserve it in the chunk `body` so
|
|
17
|
+
// downstream Phase 2 work can recover per-message provenance via snippet
|
|
18
|
+
// matching against the conversation tail.
|
|
19
|
+
//
|
|
20
|
+
// Timezone: each `dateSent` ISO already carries an offset (set by parseExport
|
|
21
|
+
// from the operator's confirmed IANA zone). The renderer prints the
|
|
22
|
+
// human-readable wall-clock for that offset; the trailing "TZ" suffix is the
|
|
23
|
+
// offset itself, not a zone name.
|
|
24
|
+
// ---------------------------------------------------------------------------
|
|
25
|
+
function toClassifierInput(session) {
|
|
26
|
+
const lines = [];
|
|
27
|
+
for (const m of session.messages) {
|
|
28
|
+
lines.push(`[${formatWallClock(m.dateSent)}] ${m.senderName}: ${m.body}`);
|
|
29
|
+
}
|
|
30
|
+
return lines.join("\n");
|
|
31
|
+
}
|
|
32
|
+
/**
|
|
33
|
+
* Format an ISO 8601 instant with offset as `YYYY-MM-DD HH:MM:SS ±HH:MM`,
|
|
34
|
+
* preserving the offset that the parser set from the operator's IANA zone.
|
|
35
|
+
* The wall-clock components are read directly from the ISO string — no
|
|
36
|
+
* Date construction (which would re-interpret in the local zone).
|
|
37
|
+
*/
|
|
38
|
+
function formatWallClock(iso) {
|
|
39
|
+
// ISO from parse-export is shaped: "YYYY-MM-DDTHH:MM:SS±HH:MM" (or "Z").
|
|
40
|
+
const m = iso.match(/^(\d{4})-(\d{2})-(\d{2})T(\d{2}):(\d{2}):(\d{2})(?:\.\d+)?(Z|[+-]\d{2}:?\d{2})$/);
|
|
41
|
+
if (!m)
|
|
42
|
+
return iso; // surface the raw value if the shape drifted; pure function never throws on caller-supplied data
|
|
43
|
+
const [, y, mo, d, h, mi, s, off] = m;
|
|
44
|
+
const offsetLabel = off === "Z" ? "+00:00" : off;
|
|
45
|
+
return `${y}-${mo}-${d} ${h}:${mi}:${s} ${offsetLabel}`;
|
|
46
|
+
}
|
|
47
|
+
//# sourceMappingURL=to-classifier-input.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"to-classifier-input.js","sourceRoot":"","sources":["../src/to-classifier-input.ts"],"names":[],"mappings":";;AAwBA,8CAMC;AA5BD,8EAA8E;AAC9E,2DAA2D;AAC3D,EAAE;AACF,2EAA2E;AAC3E,oDAAoD;AACpD,EAAE;AACF,8CAA8C;AAC9C,8CAA8C;AAC9C,QAAQ;AACR,EAAE;AACF,8EAA8E;AAC9E,4EAA4E;AAC5E,0EAA0E;AAC1E,yEAAyE;AACzE,0CAA0C;AAC1C,EAAE;AACF,8EAA8E;AAC9E,oEAAoE;AACpE,6EAA6E;AAC7E,kCAAkC;AAClC,8EAA8E;AAE9E,SAAgB,iBAAiB,CAAC,OAAgB;IAChD,MAAM,KAAK,GAAa,EAAE,CAAC;IAC3B,KAAK,MAAM,CAAC,IAAI,OAAO,CAAC,QAAQ,EAAE,CAAC;QACjC,KAAK,CAAC,IAAI,CAAC,IAAI,eAAe,CAAC,CAAC,CAAC,QAAQ,CAAC,KAAK,CAAC,CAAC,UAAU,KAAK,CAAC,CAAC,IAAI,EAAE,CAAC,CAAC;IAC5E,CAAC;IACD,OAAO,KAAK,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;AAC1B,CAAC;AAED;;;;;GAKG;AACH,SAAS,eAAe,CAAC,GAAW;IAClC,yEAAyE;IACzE,MAAM,CAAC,GAAG,GAAG,CAAC,KAAK,CACjB,iFAAiF,CAClF,CAAC;IACF,IAAI,CAAC,CAAC;QAAE,OAAO,GAAG,CAAC,CAAC,iGAAiG;IACrH,MAAM,CAAC,EAAE,CAAC,EAAE,EAAE,EAAE,CAAC,EAAE,CAAC,EAAE,EAAE,EAAE,CAAC,EAAE,GAAG,CAAC,GAAG,CAAC,CAAC;IACtC,MAAM,WAAW,GAAG,GAAG,KAAK,GAAG,CAAC,CAAC,CAAC,QAAQ,CAAC,CAAC,CAAC,GAAG,CAAC;IACjD,OAAO,GAAG,CAAC,IAAI,EAAE,IAAI,CAAC,IAAI,CAAC,IAAI,EAAE,IAAI,CAAC,IAAI,WAAW,EAAE,CAAC;AAC1D,CAAC"}
|
|
@@ -98,22 +98,23 @@ fi
|
|
|
98
98
|
# webfetch-preflight.mjs: detects JS-SPA shells before WebFetch's
|
|
99
99
|
# 60s extraction timeout (Task 536). Fail-open on any error;
|
|
100
100
|
# on positive SPA detection exits 2 with WEBFETCH_CANNOT_READ_JS_SPA.
|
|
101
|
-
# archive-ingest-surface-gate.sh (Task 855;
|
|
101
|
+
# archive-ingest-surface-gate.sh (Task 855; trimmed by Task 894):
|
|
102
102
|
# narrows the database-operator subagent's effective surface
|
|
103
|
-
# during
|
|
104
|
-
#
|
|
105
|
-
#
|
|
106
|
-
#
|
|
107
|
-
#
|
|
108
|
-
#
|
|
109
|
-
#
|
|
110
|
-
#
|
|
111
|
-
#
|
|
112
|
-
#
|
|
103
|
+
# during flat-dataset archive ingestion (LinkedIn today;
|
|
104
|
+
# future CRM-type seeds) by enforcing three blocks:
|
|
105
|
+
# plugin-source edits under platform/plugins/*/lib/*, JS
|
|
106
|
+
# test-runner Bash commands, and a post-parse-error flag
|
|
107
|
+
# that blocks the rest of the turn after any
|
|
108
|
+
# *-export-parse / *-import-parse tool returns isError=true.
|
|
109
|
+
# Task 894 removed the WhatsApp-specific MCP-tool blocks
|
|
110
|
+
# (the three legacy whatsapp-export tools were deleted; chat
|
|
111
|
+
# archives now flow through the unified document-ingest
|
|
112
|
+
# pipeline with parentLabel='ConversationArchive' and
|
|
113
|
+
# mode='chat').
|
|
113
114
|
# Wired at three points: explicit Edit/Write/NotebookEdit/Bash
|
|
114
|
-
# matchers, a no-matcher PreToolUse entry (
|
|
115
|
-
#
|
|
116
|
-
#
|
|
115
|
+
# matchers, a no-matcher PreToolUse entry (post-parse-error
|
|
116
|
+
# gate fires on every tool), and a PostToolUse regex matcher
|
|
117
|
+
# for parse tools.
|
|
117
118
|
# UserPromptSubmit — archive-ingest-surface-gate.sh clears the
|
|
118
119
|
# parse-error flag when the operator's next prompt arrives.
|
|
119
120
|
HOOKS_PATH="\$PLATFORM_ROOT/plugins/admin/hooks"
|