@rubytech/create-realagent 1.0.825 → 1.0.828
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/package.json +1 -1
- package/payload/platform/lib/task-secrets/dist/index.d.ts +40 -0
- package/payload/platform/lib/task-secrets/dist/index.d.ts.map +1 -0
- package/payload/platform/lib/task-secrets/dist/index.js +44 -0
- package/payload/platform/lib/task-secrets/dist/index.js.map +1 -0
- package/payload/platform/lib/task-secrets/src/__tests__/redact-secrets.test.ts +127 -0
- package/payload/platform/lib/task-secrets/src/index.ts +77 -0
- package/payload/platform/lib/task-secrets/tsconfig.json +9 -0
- package/payload/platform/lib/task-secrets/vitest.config.ts +9 -0
- package/payload/platform/neo4j/schema.cypher +34 -2
- package/payload/platform/package.json +2 -2
- package/payload/platform/plugins/admin/hooks/archive-ingest-surface-gate.sh +19 -13
- package/payload/platform/plugins/admin/skills/business-profile/SKILL.md +2 -2
- package/payload/platform/plugins/admin/skills/onboarding/SKILL.md +13 -12
- package/payload/platform/plugins/admin/skills/plugin-management/SKILL.md +4 -4
- package/payload/platform/plugins/admin/skills/public-agent-manager/SKILL.md +2 -2
- package/payload/platform/plugins/admin/skills/stream-log-review/SKILL.md +6 -6
- package/payload/platform/plugins/admin/skills/unzip-attachment/references/safety.md +1 -1
- package/payload/platform/plugins/cloudflare/references/manual-setup.md +3 -3
- package/payload/platform/plugins/cloudflare/skills/setup-tunnel/SKILL.md +4 -4
- package/payload/platform/plugins/docs/references/cloudflare.md +2 -2
- package/payload/platform/plugins/docs/references/internals.md +2 -2
- package/payload/platform/plugins/docs/references/plugins-guide.md +1 -1
- package/payload/platform/plugins/docs/references/troubleshooting.md +2 -1
- package/payload/platform/plugins/linkedin-import/skills/linkedin-import/SKILL.md +2 -2
- package/payload/platform/plugins/linkedin-import/skills/linkedin-import/references/connections.md +1 -1
- package/payload/platform/plugins/memory/PLUGIN.md +1 -1
- package/payload/platform/plugins/memory/mcp/dist/index.js +6 -41
- package/payload/platform/plugins/memory/mcp/dist/index.js.map +1 -1
- package/payload/platform/plugins/memory/mcp/dist/lib/__tests__/llm-classifier.test.js +51 -0
- package/payload/platform/plugins/memory/mcp/dist/lib/__tests__/llm-classifier.test.js.map +1 -1
- package/payload/platform/plugins/memory/mcp/dist/lib/llm-classifier.d.ts +19 -4
- package/payload/platform/plugins/memory/mcp/dist/lib/llm-classifier.d.ts.map +1 -1
- package/payload/platform/plugins/memory/mcp/dist/lib/llm-classifier.js +139 -56
- package/payload/platform/plugins/memory/mcp/dist/lib/llm-classifier.js.map +1 -1
- package/payload/platform/plugins/memory/mcp/dist/tools/__tests__/memory-ingest.test.d.ts +2 -0
- package/payload/platform/plugins/memory/mcp/dist/tools/__tests__/memory-ingest.test.d.ts.map +1 -0
- package/payload/platform/plugins/memory/mcp/dist/tools/__tests__/memory-ingest.test.js +61 -0
- package/payload/platform/plugins/memory/mcp/dist/tools/__tests__/memory-ingest.test.js.map +1 -0
- package/payload/platform/plugins/memory/mcp/dist/tools/memory-ingest.d.ts +34 -0
- package/payload/platform/plugins/memory/mcp/dist/tools/memory-ingest.d.ts.map +1 -1
- package/payload/platform/plugins/memory/mcp/dist/tools/memory-ingest.js +241 -0
- package/payload/platform/plugins/memory/mcp/dist/tools/memory-ingest.js.map +1 -1
- package/payload/platform/plugins/memory/references/graph-primitives.md +5 -5
- package/payload/platform/plugins/memory/references/schema-base.md +6 -3
- package/payload/platform/plugins/memory/skills/document-ingest/SKILL.md +6 -6
- package/payload/platform/plugins/tasks/PLUGIN.md +1 -1
- package/payload/platform/plugins/tasks/mcp/dist/index.js +11 -2
- package/payload/platform/plugins/tasks/mcp/dist/index.js.map +1 -1
- package/payload/platform/plugins/tasks/mcp/dist/tools/task-create.d.ts +19 -2
- package/payload/platform/plugins/tasks/mcp/dist/tools/task-create.d.ts.map +1 -1
- package/payload/platform/plugins/tasks/mcp/dist/tools/task-create.js +17 -1
- package/payload/platform/plugins/tasks/mcp/dist/tools/task-create.js.map +1 -1
- package/payload/platform/plugins/whatsapp-import/PLUGIN.md +17 -15
- package/payload/platform/plugins/whatsapp-import/bin/ingest.mjs +313 -366
- package/payload/platform/plugins/whatsapp-import/bin/whatsapp-ingest.sh +27 -60
- package/payload/platform/plugins/whatsapp-import/lib/dist/delta-cursor.d.ts +18 -0
- package/payload/platform/plugins/whatsapp-import/lib/dist/delta-cursor.d.ts.map +1 -0
- package/payload/platform/plugins/whatsapp-import/lib/dist/delta-cursor.js +31 -0
- package/payload/platform/plugins/whatsapp-import/lib/dist/delta-cursor.js.map +1 -0
- package/payload/platform/plugins/whatsapp-import/lib/dist/derive-keys.d.ts +27 -12
- package/payload/platform/plugins/whatsapp-import/lib/dist/derive-keys.d.ts.map +1 -1
- package/payload/platform/plugins/whatsapp-import/lib/dist/derive-keys.js +40 -20
- package/payload/platform/plugins/whatsapp-import/lib/dist/derive-keys.js.map +1 -1
- package/payload/platform/plugins/whatsapp-import/lib/dist/index.d.ts +7 -4
- package/payload/platform/plugins/whatsapp-import/lib/dist/index.d.ts.map +1 -1
- package/payload/platform/plugins/whatsapp-import/lib/dist/index.js +9 -6
- package/payload/platform/plugins/whatsapp-import/lib/dist/index.js.map +1 -1
- package/payload/platform/plugins/whatsapp-import/lib/dist/sessionize.d.ts +25 -0
- package/payload/platform/plugins/whatsapp-import/lib/dist/sessionize.d.ts.map +1 -0
- package/payload/platform/plugins/whatsapp-import/lib/dist/sessionize.js +48 -0
- package/payload/platform/plugins/whatsapp-import/lib/dist/sessionize.js.map +1 -0
- package/payload/platform/plugins/whatsapp-import/lib/dist/to-classifier-input.d.ts +3 -0
- package/payload/platform/plugins/whatsapp-import/lib/dist/to-classifier-input.d.ts.map +1 -0
- package/payload/platform/plugins/whatsapp-import/lib/dist/to-classifier-input.js +47 -0
- package/payload/platform/plugins/whatsapp-import/lib/dist/to-classifier-input.js.map +1 -0
- package/payload/platform/plugins/whatsapp-import/lib/src/__tests__/delta-append.test.ts +163 -0
- package/payload/platform/plugins/whatsapp-import/lib/src/__tests__/sessionize.test.ts +91 -0
- package/payload/platform/plugins/whatsapp-import/lib/src/__tests__/to-classifier-input.test.ts +59 -0
- package/payload/platform/plugins/whatsapp-import/lib/src/delta-cursor.ts +54 -0
- package/payload/platform/plugins/whatsapp-import/lib/src/derive-keys.ts +55 -32
- package/payload/platform/plugins/whatsapp-import/lib/src/index.ts +9 -6
- package/payload/platform/plugins/whatsapp-import/lib/src/sessionize.ts +81 -0
- package/payload/platform/plugins/whatsapp-import/lib/src/to-classifier-input.ts +48 -0
- package/payload/platform/plugins/whatsapp-import/skills/whatsapp-import/SKILL.md +66 -73
- package/payload/platform/plugins/whatsapp-import/skills/whatsapp-import/references/conversation-archive-shape.md +143 -0
- package/payload/platform/plugins/whatsapp-import/skills/whatsapp-import/references/export-parse.md +2 -2
- package/payload/platform/templates/specialists/agents/database-operator.md +17 -18
- package/payload/server/chunk-T2OPNP3L.js +654 -0
- package/payload/server/chunk-ZTBTX3IO.js +642 -0
- package/payload/server/cloudflare-task-tracker-BAMJY4MH.js +17 -0
- package/payload/server/cloudflare-task-tracker-CR6TL4VL.js +19 -0
- package/payload/server/public/assets/{admin-DOkUspG1.js → admin-BNwPsMhJ.js} +2 -2
- package/payload/server/public/assets/{graph-LLMJa4Ch.js → graph-N_Bw-8oT.js} +1 -1
- package/payload/server/public/assets/{page-DoaF3DB0.js → page-BKLGP-th.js} +1 -1
- package/payload/server/public/graph.html +2 -2
- package/payload/server/public/index.html +2 -2
- package/payload/server/server.js +291 -172
- package/payload/platform/plugins/whatsapp-import/lib/src/__tests__/filter-gate.test.ts +0 -172
- package/payload/platform/plugins/whatsapp-import/lib/src/__tests__/ingest-idempotence.test.ts +0 -141
- package/payload/platform/plugins/whatsapp-import/lib/src/filter.ts +0 -136
- package/payload/platform/plugins/whatsapp-import/skills/whatsapp-import-enrich/SKILL.md +0 -333
|
@@ -1,52 +1,38 @@
|
|
|
1
1
|
#!/usr/bin/env node
|
|
2
2
|
// =============================================================================
|
|
3
|
-
// ingest.mjs — in-process orchestrator for whatsapp-ingest.sh.
|
|
3
|
+
// ingest.mjs — in-process orchestrator for whatsapp-ingest.sh (Task 891).
|
|
4
4
|
//
|
|
5
|
-
//
|
|
6
|
-
//
|
|
7
|
-
//
|
|
8
|
-
//
|
|
9
|
-
//
|
|
10
|
-
//
|
|
5
|
+
// Pipeline (single phase — Phase 2 insight derivation deferred to its own
|
|
6
|
+
// follow-up task):
|
|
7
|
+
//
|
|
8
|
+
// parse → bind canonical senders → derive conversationIdentity
|
|
9
|
+
// → look up prior :ConversationArchive (delta cursor)
|
|
10
|
+
// → sessionize delta at gapHours boundaries
|
|
11
|
+
// → for each session: classify (mode='chat') → collect chunks
|
|
12
|
+
// → memoryIngest(parentLabel='ConversationArchive')
|
|
11
13
|
//
|
|
12
14
|
// Argv (positional): <archive-path>
|
|
13
|
-
// Argv (flags): --owner-element-id <id>
|
|
14
|
-
// --
|
|
15
|
-
// --
|
|
16
|
-
// [--
|
|
15
|
+
// Argv (flags): --owner-element-id <id>
|
|
16
|
+
// --participant-person-ids <csv>
|
|
17
|
+
// --scope <admin|public>
|
|
18
|
+
// [--session-gap-hours <N>] (default 12)
|
|
19
|
+
// [--account-id <accountId>]
|
|
20
|
+
// [--timezone <iana>]
|
|
17
21
|
// [--date-format <DD/MM/YY|MM/DD/YY|DD/MM/YYYY|MM/DD/YYYY>]
|
|
18
22
|
//
|
|
19
|
-
//
|
|
20
|
-
//
|
|
21
|
-
//
|
|
22
|
-
//
|
|
23
|
-
// `parser-miss reason="senderName=<verbatim> not in preview histogram
|
|
24
|
-
// (parser failure — re-export or report)"`. Bounds writer cardinality to
|
|
25
|
-
// the deterministic preview output — closes the auto-Person leak structurally.
|
|
26
|
-
//
|
|
27
|
-
// Stdout (success): one JSON line — Honest counters per Task 871.5.
|
|
28
|
-
// {conversationElementId, conversationId,
|
|
29
|
-
// parsed, mediaSkipped, systemSkipped,
|
|
30
|
-
// filtered,
|
|
31
|
-
// written, messagesAlreadyExisted,
|
|
32
|
-
// nextEdgesProcessed, nextEdgesCreated,
|
|
33
|
-
// participantsAlreadyExisted,
|
|
34
|
-
// ms}
|
|
35
|
-
// The skill (`whatsapp-import` SKILL.md) maps this verbose-diagnostic shape
|
|
36
|
-
// to the agent-return short shape per Task 871.6 (`alreadyExisted` etc.)
|
|
37
|
-
// when surfacing the result to the admin agent.
|
|
23
|
+
// Owner + participants form the closed sender set; any parsed senderName
|
|
24
|
+
// outside that set LOUD-FAILs with `parser-miss` (preserves Task 887 §A0).
|
|
25
|
+
// `--subject-person-id` and `--filter` are gone (the chunked archive shape
|
|
26
|
+
// makes per-message filtering obsolete — chunking bounds operator surface).
|
|
38
27
|
//
|
|
39
|
-
//
|
|
28
|
+
// Stdout (success): one JSON line — all the counters the skill needs to
|
|
29
|
+
// formulate the three operator-facing messages. See SKILL.md for the shape.
|
|
30
|
+
// Stderr: one [whatsapp-import] FAIL line on failure, exit non-zero.
|
|
40
31
|
// =============================================================================
|
|
41
32
|
|
|
42
|
-
import {
|
|
43
|
-
|
|
44
|
-
|
|
45
|
-
readdirSync,
|
|
46
|
-
rmSync,
|
|
47
|
-
statSync,
|
|
48
|
-
} from "node:fs";
|
|
49
|
-
import { join, resolve, dirname } from "node:path";
|
|
33
|
+
import { existsSync, mkdtempSync, readdirSync, rmSync, statSync, createReadStream } from "node:fs";
|
|
34
|
+
import { createHash } from "node:crypto";
|
|
35
|
+
import { join, resolve, dirname, basename } from "node:path";
|
|
50
36
|
import { tmpdir } from "node:os";
|
|
51
37
|
import { spawnSync } from "node:child_process";
|
|
52
38
|
import { fileURLToPath } from "node:url";
|
|
@@ -54,13 +40,8 @@ import { fileURLToPath } from "node:url";
|
|
|
54
40
|
const __dirname = dirname(fileURLToPath(import.meta.url));
|
|
55
41
|
|
|
56
42
|
// ---------------------------------------------------------------------------
|
|
57
|
-
// 1. Resolve dist paths
|
|
43
|
+
// 1. Resolve dist paths.
|
|
58
44
|
// ---------------------------------------------------------------------------
|
|
59
|
-
// MAXY_PLATFORM_ROOT is set by the wrapper script (and by claude-agent.ts in
|
|
60
|
-
// production). Fall back to walking up from this file's location: the bin/
|
|
61
|
-
// directory sits at platform/plugins/whatsapp-import/bin/, so platform/ is
|
|
62
|
-
// three levels up.
|
|
63
|
-
|
|
64
45
|
const platformRoot =
|
|
65
46
|
process.env.MAXY_PLATFORM_ROOT?.trim() ||
|
|
66
47
|
resolve(__dirname, "..", "..", "..");
|
|
@@ -73,44 +54,41 @@ const PARSE_EXPORT_PATH = resolve(
|
|
|
73
54
|
"dist",
|
|
74
55
|
"index.js",
|
|
75
56
|
);
|
|
76
|
-
const
|
|
57
|
+
const NEO4J_LIB_PATH = resolve(
|
|
77
58
|
platformRoot,
|
|
78
59
|
"plugins",
|
|
79
60
|
"memory",
|
|
80
61
|
"mcp",
|
|
81
62
|
"dist",
|
|
82
|
-
"
|
|
83
|
-
"
|
|
63
|
+
"lib",
|
|
64
|
+
"neo4j.js",
|
|
84
65
|
);
|
|
85
|
-
const
|
|
66
|
+
const LLM_CLASSIFIER_PATH = resolve(
|
|
86
67
|
platformRoot,
|
|
87
68
|
"plugins",
|
|
88
69
|
"memory",
|
|
89
70
|
"mcp",
|
|
90
71
|
"dist",
|
|
91
72
|
"lib",
|
|
92
|
-
"
|
|
73
|
+
"llm-classifier.js",
|
|
93
74
|
);
|
|
94
|
-
|
|
95
|
-
const DERIVE_KEYS_PATH = resolve(
|
|
75
|
+
const MEMORY_INGEST_PATH = resolve(
|
|
96
76
|
platformRoot,
|
|
97
77
|
"plugins",
|
|
98
|
-
"
|
|
99
|
-
"
|
|
78
|
+
"memory",
|
|
79
|
+
"mcp",
|
|
100
80
|
"dist",
|
|
101
|
-
"
|
|
81
|
+
"tools",
|
|
82
|
+
"memory-ingest.js",
|
|
102
83
|
);
|
|
103
84
|
|
|
104
85
|
// ---------------------------------------------------------------------------
|
|
105
|
-
// 2. Logger
|
|
86
|
+
// 2. Logger
|
|
106
87
|
// ---------------------------------------------------------------------------
|
|
107
|
-
|
|
108
88
|
function log(line) {
|
|
109
|
-
process.stderr.write(`[whatsapp-
|
|
89
|
+
process.stderr.write(`[whatsapp-import] ${line}\n`);
|
|
110
90
|
}
|
|
111
|
-
|
|
112
91
|
function fail(phase, fields) {
|
|
113
|
-
// Single failure line. Used as the only stderr line on non-zero exit.
|
|
114
92
|
const fieldStr = Object.entries(fields)
|
|
115
93
|
.map(([k, v]) =>
|
|
116
94
|
typeof v === "string" && (v.includes(" ") || v.includes("="))
|
|
@@ -118,14 +96,13 @@ function fail(phase, fields) {
|
|
|
118
96
|
: `${k}=${v ?? "-"}`,
|
|
119
97
|
)
|
|
120
98
|
.join(" ");
|
|
121
|
-
process.stderr.write(`[whatsapp-
|
|
99
|
+
process.stderr.write(`[whatsapp-import] FAIL phase=${phase} ${fieldStr}\n`);
|
|
122
100
|
process.exit(1);
|
|
123
101
|
}
|
|
124
102
|
|
|
125
103
|
// ---------------------------------------------------------------------------
|
|
126
|
-
// 3. Argv parsing
|
|
104
|
+
// 3. Argv parsing
|
|
127
105
|
// ---------------------------------------------------------------------------
|
|
128
|
-
|
|
129
106
|
function parseArgv(argv) {
|
|
130
107
|
const args = argv.slice(2);
|
|
131
108
|
let archive = null;
|
|
@@ -144,52 +121,33 @@ function parseArgv(argv) {
|
|
|
144
121
|
}
|
|
145
122
|
if (!archive) fail("argv", { reason: "missing positional <archive>" });
|
|
146
123
|
if (!flags.ownerElementId) fail("argv", { reason: "missing --owner-element-id" });
|
|
124
|
+
if (!flags.participantPersonIds) {
|
|
125
|
+
fail("argv", {
|
|
126
|
+
reason: "missing --participant-person-ids (csv of operator-confirmed :Person/:AdminUser elementIds, owner excluded)",
|
|
127
|
+
});
|
|
128
|
+
}
|
|
147
129
|
if (!flags.scope) fail("argv", { reason: "missing --scope" });
|
|
148
130
|
if (flags.scope !== "admin" && flags.scope !== "public") {
|
|
149
131
|
fail("argv", { reason: `invalid --scope "${flags.scope}" (admin|public)` });
|
|
150
132
|
}
|
|
151
|
-
// Task 887 §A0 — DM scope: a single `--subject-person-id` identifies the
|
|
152
|
-
// third party in the conversation. The owner + subject pair is the
|
|
153
|
-
// canonical sender set; the writer rejects any other senderName as
|
|
154
|
-
// parser-miss. Group-chat ingest (>2 distinct senders) is a future task.
|
|
155
|
-
if (!flags.subjectPersonId) {
|
|
156
|
-
fail("argv", { reason: "missing --subject-person-id (Task 887: operator-confirmed third-party :Person elementId from preview histogram)" });
|
|
157
|
-
}
|
|
158
|
-
// Task 871: --filter is mandatory. The deterministic Bash entry refuses
|
|
159
|
-
// bulk archive writes without an operator-supplied filter — closes the
|
|
160
|
-
// doctrine gap named in feedback_compress_at_ingest_for_bulk_archives.md.
|
|
161
|
-
if (!flags.filter || !flags.filter.trim()) {
|
|
162
|
-
process.stderr.write(
|
|
163
|
-
`[whatsapp-ingest] FAIL filter-required reason="bulk-archive-gate (Task 871) — operator must specify --filter (one of all, senders=<csv>, date-range=<isoFrom>..<isoTo>)"\n`,
|
|
164
|
-
);
|
|
165
|
-
fail("argv", { reason: "--filter is required (one of all, senders=<csv>, date-range=<isoFrom>..<isoTo>)" });
|
|
166
|
-
}
|
|
167
133
|
return { archive, flags };
|
|
168
134
|
}
|
|
169
|
-
|
|
170
135
|
function camelCase(s) {
|
|
171
136
|
return s.replace(/-([a-z])/g, (_m, c) => c.toUpperCase());
|
|
172
137
|
}
|
|
173
138
|
|
|
174
139
|
// ---------------------------------------------------------------------------
|
|
175
|
-
// 4. Archive resolution
|
|
176
|
-
// path to _chat.txt and a cleanup callback for any tmp dir we created.
|
|
140
|
+
// 4. Archive resolution
|
|
177
141
|
// ---------------------------------------------------------------------------
|
|
178
|
-
|
|
179
142
|
function resolveChatTxt(archivePath) {
|
|
180
143
|
const abs = resolve(archivePath);
|
|
181
|
-
if (!existsSync(abs)) {
|
|
182
|
-
fail("argv", { reason: `archive path not found: ${abs}` });
|
|
183
|
-
}
|
|
144
|
+
if (!existsSync(abs)) fail("argv", { reason: `archive path not found: ${abs}` });
|
|
184
145
|
const st = statSync(abs);
|
|
185
|
-
|
|
186
146
|
if (st.isFile() && abs.endsWith(".zip")) {
|
|
187
147
|
const tmp = mkdtempSync(join(tmpdir(), "whatsapp-ingest-"));
|
|
188
|
-
const unzip = spawnSync("unzip", ["-q", "-o", abs, "-d", tmp], {
|
|
189
|
-
encoding: "utf8",
|
|
190
|
-
});
|
|
148
|
+
const unzip = spawnSync("unzip", ["-q", "-o", abs, "-d", tmp], { encoding: "utf8" });
|
|
191
149
|
if (unzip.status !== 0) {
|
|
192
|
-
rmSync(tmp, { recursive: true
|
|
150
|
+
rmSync(tmp, { recursive: true });
|
|
193
151
|
fail("argv", {
|
|
194
152
|
reason: "unzip failed",
|
|
195
153
|
archive: abs,
|
|
@@ -198,27 +156,22 @@ function resolveChatTxt(archivePath) {
|
|
|
198
156
|
}
|
|
199
157
|
const chat = findChatTxt(tmp);
|
|
200
158
|
if (!chat) {
|
|
201
|
-
rmSync(tmp, { recursive: true
|
|
159
|
+
rmSync(tmp, { recursive: true });
|
|
202
160
|
fail("argv", { reason: "_chat.txt not found in zip", archive: abs });
|
|
203
161
|
}
|
|
204
|
-
return { chatTxt: chat, cleanup: () => rmSync(tmp, { recursive: true
|
|
162
|
+
return { chatTxt: chat, cleanup: () => rmSync(tmp, { recursive: true }) };
|
|
205
163
|
}
|
|
206
|
-
|
|
207
164
|
if (st.isDirectory()) {
|
|
208
165
|
const chat = findChatTxt(abs);
|
|
209
166
|
if (!chat) fail("argv", { reason: "_chat.txt not found in directory", archive: abs });
|
|
210
167
|
return { chatTxt: chat, cleanup: () => {} };
|
|
211
168
|
}
|
|
212
|
-
|
|
213
169
|
if (st.isFile()) {
|
|
214
170
|
return { chatTxt: abs, cleanup: () => {} };
|
|
215
171
|
}
|
|
216
|
-
|
|
217
172
|
fail("argv", { reason: `unsupported archive shape: ${abs}` });
|
|
218
|
-
// unreachable, but keeps tools happy
|
|
219
173
|
return { chatTxt: abs, cleanup: () => {} };
|
|
220
174
|
}
|
|
221
|
-
|
|
222
175
|
function findChatTxt(dir) {
|
|
223
176
|
const entries = readdirSync(dir, { withFileTypes: true });
|
|
224
177
|
for (const e of entries) {
|
|
@@ -234,26 +187,19 @@ function findChatTxt(dir) {
|
|
|
234
187
|
}
|
|
235
188
|
|
|
236
189
|
// ---------------------------------------------------------------------------
|
|
237
|
-
// 5. Account resolution
|
|
238
|
-
// --account-id when it is known; otherwise we pick the single dir under
|
|
239
|
-
// {install}/data/accounts/.
|
|
190
|
+
// 5. Account resolution (Phase 0 = single account)
|
|
240
191
|
// ---------------------------------------------------------------------------
|
|
241
|
-
|
|
242
192
|
function resolveAccountId(flags) {
|
|
243
193
|
if (flags.accountId && flags.accountId.trim()) return flags.accountId.trim();
|
|
244
194
|
const installDir = resolve(platformRoot, "..");
|
|
245
195
|
const accountsDir = join(installDir, "data", "accounts");
|
|
246
196
|
if (!existsSync(accountsDir)) {
|
|
247
|
-
fail("argv", {
|
|
248
|
-
reason: `accounts dir not found: ${accountsDir}; pass --account-id explicitly`,
|
|
249
|
-
});
|
|
197
|
+
fail("argv", { reason: `accounts dir not found: ${accountsDir}; pass --account-id explicitly` });
|
|
250
198
|
}
|
|
251
199
|
const dirs = readdirSync(accountsDir, { withFileTypes: true })
|
|
252
200
|
.filter((d) => d.isDirectory() && !d.name.startsWith("."))
|
|
253
201
|
.map((d) => d.name);
|
|
254
|
-
if (dirs.length === 0) {
|
|
255
|
-
fail("argv", { reason: `no accounts found under ${accountsDir}` });
|
|
256
|
-
}
|
|
202
|
+
if (dirs.length === 0) fail("argv", { reason: `no accounts found under ${accountsDir}` });
|
|
257
203
|
if (dirs.length > 1) {
|
|
258
204
|
fail("argv", {
|
|
259
205
|
reason: `multiple accounts under ${accountsDir} (${dirs.join(",")}); pass --account-id explicitly`,
|
|
@@ -263,26 +209,12 @@ function resolveAccountId(flags) {
|
|
|
263
209
|
}
|
|
264
210
|
|
|
265
211
|
// ---------------------------------------------------------------------------
|
|
266
|
-
// 6. Bind
|
|
212
|
+
// 6. Bind canonical sender set (Task 887 §A0 preserved).
|
|
267
213
|
//
|
|
268
|
-
//
|
|
269
|
-
//
|
|
270
|
-
// `:Person {participantStatus:'auto-created'}` node for any miss. That
|
|
271
|
-
// fallback path was the structural defect: any parser failure that produced
|
|
272
|
-
// a polluted senderName (Task 845's LRM-prefixed body glued onto the prior
|
|
273
|
-
// header → senderName="\"Adam Mackay:\\n[04/02/2026, 11:52:16] Adam Mackay\"")
|
|
274
|
-
// leaked one bogus :Person per distinct miss. 23 leaked from the Adam Mackay
|
|
275
|
-
// archive in a single ingest.
|
|
276
|
-
//
|
|
277
|
-
// 887 §A0 closes the leak by deleting the fallback. The operator now passes
|
|
278
|
-
// `--subject-person-id` (third-party Person elementId from preview), the
|
|
279
|
-
// dispatch passes `--owner-element-id`, and the writer accepts EXACTLY the
|
|
280
|
-
// name candidates of those two nodes. Any parsed senderName outside that
|
|
281
|
-
// closed set LOUD-FAILs the ingest with the verbatim string — surfacing a
|
|
282
|
-
// parser bug as a hard exit instead of as graph pollution.
|
|
214
|
+
// All distinct parsed senderNames must resolve to {owner, participants...}.
|
|
215
|
+
// Any miss LOUD-FAILs with `parser-miss reason="senderName=<...>"`.
|
|
283
216
|
// ---------------------------------------------------------------------------
|
|
284
|
-
|
|
285
|
-
const CANONICAL_PAIR_FETCH_CYPHER = `
|
|
217
|
+
const CANONICAL_FETCH_CYPHER = `
|
|
286
218
|
UNWIND $ids AS id
|
|
287
219
|
MATCH (n) WHERE elementId(n) = id
|
|
288
220
|
RETURN elementId(n) AS elemId,
|
|
@@ -293,12 +225,6 @@ RETURN elementId(n) AS elemId,
|
|
|
293
225
|
coalesce(n.accountId, '') AS accountId
|
|
294
226
|
`;
|
|
295
227
|
|
|
296
|
-
/**
|
|
297
|
-
* Sentinel error class so `main()`'s try/catch can recognise an operator
|
|
298
|
-
* LOUD-FAIL (already-emitted FAIL line) and exit cleanly with cleanup.
|
|
299
|
-
* Plain `process.exit(1)` from inside `bindCanonicalSenders` would skip
|
|
300
|
-
* `main()`'s `cleanup()` (unzip tmp dir) and `session.close()` paths.
|
|
301
|
-
*/
|
|
302
228
|
class IngestUserFacingError extends Error {
|
|
303
229
|
constructor(message) {
|
|
304
230
|
super(message);
|
|
@@ -311,61 +237,36 @@ async function bindCanonicalSenders({
|
|
|
311
237
|
session,
|
|
312
238
|
accountId,
|
|
313
239
|
ownerElementId,
|
|
314
|
-
|
|
240
|
+
participantElementIds,
|
|
315
241
|
senderNames,
|
|
316
242
|
normaliseSenderName,
|
|
317
243
|
}) {
|
|
318
|
-
|
|
319
|
-
|
|
320
|
-
|
|
321
|
-
// `participantsAlreadyExisted=1` would propagate to the JSON summary
|
|
322
|
-
// silently. Refuse early, name the cause.
|
|
323
|
-
if (ownerElementId === subjectPersonId) {
|
|
244
|
+
const allIds = [ownerElementId, ...participantElementIds];
|
|
245
|
+
const distinctIds = Array.from(new Set(allIds));
|
|
246
|
+
if (distinctIds.length !== allIds.length) {
|
|
324
247
|
throw new IngestUserFacingError(
|
|
325
|
-
|
|
248
|
+
`participant id list contains duplicates (owner appears in --participant-person-ids?)`,
|
|
326
249
|
);
|
|
327
250
|
}
|
|
328
|
-
|
|
329
|
-
const ids = [ownerElementId, subjectPersonId];
|
|
330
|
-
const res = await session.executeRead(async (tx) =>
|
|
331
|
-
tx.run(CANONICAL_PAIR_FETCH_CYPHER, { ids }),
|
|
332
|
-
);
|
|
333
|
-
|
|
334
|
-
// Build normalised-name → elementId index from owner + subject candidates.
|
|
335
|
-
// For each node we accept the full name, given name, family name, and
|
|
336
|
-
// "given family" composite as match candidates so an export header that
|
|
337
|
-
// says "Adam" or "Adam Mackay" both resolve to the same node.
|
|
338
|
-
const index = new Map();
|
|
251
|
+
const res = await session.executeRead(async (tx) => tx.run(CANONICAL_FETCH_CYPHER, { ids: distinctIds }));
|
|
339
252
|
const seenIds = new Set();
|
|
340
253
|
const labelByElemId = new Map();
|
|
254
|
+
const index = new Map();
|
|
341
255
|
for (const r of res.records) {
|
|
342
256
|
const elemId = r.get("elemId");
|
|
343
257
|
const labels = r.get("labels") || [];
|
|
344
258
|
const acct = r.get("accountId") || "";
|
|
345
|
-
// Empty accountId on a canonical Person/AdminUser is a graph-data
|
|
346
|
-
// defect (migration 004 normally prunes account-less nodes). Refuse
|
|
347
|
-
// rather than silently accept — bound-pair correctness depends on
|
|
348
|
-
// accountId being present and matching.
|
|
349
259
|
if (!acct) {
|
|
350
|
-
throw new IngestUserFacingError(
|
|
351
|
-
`node ${elemId} has no accountId — corrupt canonical Person/AdminUser; refusing ingest`,
|
|
352
|
-
);
|
|
260
|
+
throw new IngestUserFacingError(`node ${elemId} has no accountId — corrupt canonical Person/AdminUser`);
|
|
353
261
|
}
|
|
354
262
|
if (acct !== accountId) {
|
|
355
|
-
throw new IngestUserFacingError(
|
|
356
|
-
`node ${elemId} belongs to account ${acct}, not ${accountId} — refusing cross-account ingest`,
|
|
357
|
-
);
|
|
263
|
+
throw new IngestUserFacingError(`node ${elemId} belongs to account ${acct}, not ${accountId}`);
|
|
358
264
|
}
|
|
359
265
|
if (!labels.includes("Person") && !labels.includes("AdminUser")) {
|
|
360
|
-
throw new IngestUserFacingError(
|
|
361
|
-
`node ${elemId} has labels [${labels.join(",")}]; expected :Person or :AdminUser`,
|
|
362
|
-
);
|
|
266
|
+
throw new IngestUserFacingError(`node ${elemId} has labels [${labels.join(",")}]; expected :Person or :AdminUser`);
|
|
363
267
|
}
|
|
364
268
|
seenIds.add(elemId);
|
|
365
|
-
labelByElemId.set(
|
|
366
|
-
elemId,
|
|
367
|
-
labels.includes("Person") ? "Person" : "AdminUser",
|
|
368
|
-
);
|
|
269
|
+
labelByElemId.set(elemId, labels.includes("Person") ? "Person" : "AdminUser");
|
|
369
270
|
const candidates = [];
|
|
370
271
|
const name = r.get("name") || "";
|
|
371
272
|
const given = r.get("givenName") || "";
|
|
@@ -380,290 +281,336 @@ async function bindCanonicalSenders({
|
|
|
380
281
|
if (!index.has(norm)) index.set(norm, elemId);
|
|
381
282
|
}
|
|
382
283
|
}
|
|
383
|
-
|
|
384
|
-
|
|
385
|
-
|
|
386
|
-
|
|
387
|
-
throw new IngestUserFacingError(
|
|
388
|
-
`--owner-element-id ${ownerElementId} not found in graph`,
|
|
389
|
-
);
|
|
390
|
-
}
|
|
391
|
-
if (!seenIds.has(subjectPersonId)) {
|
|
392
|
-
throw new IngestUserFacingError(
|
|
393
|
-
`--subject-person-id ${subjectPersonId} not found in graph`,
|
|
394
|
-
);
|
|
395
|
-
}
|
|
396
|
-
// Subject must specifically be a `:Person` — the third party in a DM is
|
|
397
|
-
// never the operator's `:AdminUser`. (Owner can be either; both Adam and
|
|
398
|
-
// an external collaborator owning an export are operator-curated cases.)
|
|
399
|
-
if (labelByElemId.get(subjectPersonId) !== "Person") {
|
|
400
|
-
throw new IngestUserFacingError(
|
|
401
|
-
`--subject-person-id ${subjectPersonId} resolves to a :${labelByElemId.get(subjectPersonId)} — subject must be a :Person`,
|
|
402
|
-
);
|
|
403
|
-
}
|
|
404
|
-
|
|
405
|
-
// Group-chat early-detect: the singular `--subject-person-id` flag is
|
|
406
|
-
// DM-scoped. A `_chat.txt` carrying ≥3 distinct senders is an unsupported
|
|
407
|
-
// scope, NOT a parser bug. Emit a distinct reason so the operator does
|
|
408
|
-
// not chase a phantom parser regression. Group-chat support is the
|
|
409
|
-
// separate Task 889 lane.
|
|
410
|
-
if (senderNames.length > 2) {
|
|
411
|
-
throw new IngestUserFacingError(
|
|
412
|
-
`unsupported-scope reason="archive carries ${senderNames.length} distinct senders; --subject-person-id is DM-only (≤2 senders) — group-chat ingest is the separate Task 889 lane"`,
|
|
413
|
-
);
|
|
284
|
+
for (const id of distinctIds) {
|
|
285
|
+
if (!seenIds.has(id)) {
|
|
286
|
+
throw new IngestUserFacingError(`elementId ${id} not found in graph`);
|
|
287
|
+
}
|
|
414
288
|
}
|
|
415
|
-
|
|
416
|
-
// Validate every distinct parsed senderName against the closed candidate
|
|
417
|
-
// set. The first miss is the LOUD-FAIL — operators see one parser-miss
|
|
418
|
-
// line per bad import, not 23.
|
|
419
|
-
const idsByName = new Map();
|
|
289
|
+
// Validate every distinct parsed senderName against the closed candidate set.
|
|
420
290
|
for (const senderName of senderNames) {
|
|
421
291
|
const norm = normaliseSenderName(senderName);
|
|
422
292
|
const hit = index.get(norm);
|
|
423
293
|
if (!hit) {
|
|
424
294
|
throw new IngestUserFacingError(
|
|
425
|
-
`parser-miss reason="senderName=${senderName} not in
|
|
295
|
+
`parser-miss reason="senderName=${senderName} not in confirmed participant set (${distinctIds.length} confirmed elementIds; re-run with the missing :Person elementId in --participant-person-ids)"`,
|
|
426
296
|
);
|
|
427
297
|
}
|
|
428
|
-
idsByName.set(senderName, hit);
|
|
429
|
-
log(
|
|
430
|
-
`participant-resolved senderName="${senderName}" matched=canonical nodeId=${hit}`,
|
|
431
|
-
);
|
|
432
298
|
}
|
|
433
|
-
|
|
434
|
-
|
|
435
|
-
|
|
299
|
+
return { participantsResolved: seenIds.size };
|
|
300
|
+
}
|
|
301
|
+
|
|
302
|
+
// ---------------------------------------------------------------------------
|
|
303
|
+
// 7. SHA-256 of file bytes (for archiveSha256)
|
|
304
|
+
// ---------------------------------------------------------------------------
|
|
305
|
+
async function fileSha256(filePath) {
|
|
306
|
+
return new Promise((resolveProm, rejectProm) => {
|
|
307
|
+
const hash = createHash("sha256");
|
|
308
|
+
const stream = createReadStream(filePath);
|
|
309
|
+
stream.on("data", (chunk) => hash.update(chunk));
|
|
310
|
+
stream.on("end", () => resolveProm(hash.digest("hex")));
|
|
311
|
+
stream.on("error", rejectProm);
|
|
312
|
+
});
|
|
436
313
|
}
|
|
437
314
|
|
|
438
315
|
// ---------------------------------------------------------------------------
|
|
439
|
-
//
|
|
316
|
+
// 8. Build natural-edge map for chat-mode classifier.
|
|
317
|
+
// Chat mode doesn't use it (the chat prompt drops edge proposals), but
|
|
318
|
+
// classifyDocument's signature still takes the param. Pass the empty string.
|
|
440
319
|
// ---------------------------------------------------------------------------
|
|
441
320
|
|
|
321
|
+
// ---------------------------------------------------------------------------
|
|
322
|
+
// 9. Main
|
|
323
|
+
// ---------------------------------------------------------------------------
|
|
442
324
|
async function main() {
|
|
443
325
|
const startedMs = Date.now();
|
|
444
326
|
const { archive, flags } = parseArgv(process.argv);
|
|
445
327
|
const ownerElementId = flags.ownerElementId;
|
|
446
|
-
const
|
|
328
|
+
const participantElementIds = flags.participantPersonIds
|
|
329
|
+
.split(",")
|
|
330
|
+
.map((s) => s.trim())
|
|
331
|
+
.filter((s) => s.length > 0);
|
|
332
|
+
if (participantElementIds.length === 0) {
|
|
333
|
+
fail("argv", { reason: "--participant-person-ids must list at least one elementId" });
|
|
334
|
+
}
|
|
447
335
|
const scope = flags.scope;
|
|
448
336
|
const accountId = resolveAccountId(flags);
|
|
449
337
|
const timezone = flags.timezone || "Europe/London";
|
|
450
338
|
const dateFormat = flags.dateFormat;
|
|
339
|
+
const sessionGapHours = flags.sessionGapHours
|
|
340
|
+
? parseFloat(flags.sessionGapHours)
|
|
341
|
+
: 12;
|
|
342
|
+
if (!Number.isFinite(sessionGapHours) || sessionGapHours <= 0) {
|
|
343
|
+
fail("argv", { reason: `invalid --session-gap-hours "${flags.sessionGapHours}" (must be positive number)` });
|
|
344
|
+
}
|
|
451
345
|
const sessionId =
|
|
452
|
-
flags.sessionId || `whatsapp-
|
|
346
|
+
flags.sessionId || `whatsapp-import:${Date.now()}:${Math.random().toString(36).slice(2, 10)}`;
|
|
453
347
|
|
|
454
|
-
|
|
455
|
-
let
|
|
348
|
+
// Imports — fail loudly if any compiled dist missing
|
|
349
|
+
let parseExport, sessionize, toClassifierInput, findDeltaCursor;
|
|
350
|
+
let normaliseSenderName, deriveConversationIdentity, deriveMessageContentHash;
|
|
351
|
+
let getSession, classifyDocument, memoryIngest;
|
|
456
352
|
try {
|
|
457
|
-
({
|
|
458
|
-
|
|
353
|
+
({
|
|
354
|
+
parseExport,
|
|
355
|
+
sessionize,
|
|
356
|
+
toClassifierInput,
|
|
357
|
+
findDeltaCursor,
|
|
358
|
+
normaliseSenderName,
|
|
359
|
+
deriveConversationIdentity,
|
|
360
|
+
deriveMessageContentHash,
|
|
361
|
+
} = await import(PARSE_EXPORT_PATH));
|
|
459
362
|
({ getSession } = await import(NEO4J_LIB_PATH));
|
|
460
|
-
({
|
|
363
|
+
({ classifyDocument } = await import(LLM_CLASSIFIER_PATH));
|
|
364
|
+
({ memoryIngest } = await import(MEMORY_INGEST_PATH));
|
|
461
365
|
} catch (err) {
|
|
462
366
|
fail("import", {
|
|
463
|
-
reason:
|
|
367
|
+
reason: "failed to import compiled dist",
|
|
464
368
|
detail: err instanceof Error ? err.message : String(err),
|
|
465
369
|
});
|
|
466
370
|
}
|
|
467
371
|
|
|
468
|
-
//
|
|
469
|
-
// value LOUD-FAILs before unzip / Neo4j connection.
|
|
470
|
-
let filter;
|
|
471
|
-
try {
|
|
472
|
-
filter = parseFilterArg(flags.filter);
|
|
473
|
-
} catch (err) {
|
|
474
|
-
fail("argv", { reason: err instanceof Error ? err.message : String(err) });
|
|
475
|
-
}
|
|
476
|
-
|
|
477
|
-
// 7b. Resolve _chat.txt
|
|
372
|
+
// 9a. Resolve _chat.txt + sha256
|
|
478
373
|
const { chatTxt, cleanup } = resolveChatTxt(archive);
|
|
479
|
-
const
|
|
374
|
+
const archiveSha256 = await fileSha256(chatTxt);
|
|
375
|
+
const archiveSourceFile = basename(chatTxt);
|
|
480
376
|
log(
|
|
481
|
-
`start file=${
|
|
377
|
+
`start file=${archiveSourceFile} owner=${ownerElementId} participants=${participantElementIds.length} scope=${scope} accountId=${accountId} archiveSha256=${archiveSha256.slice(0, 12)} session-gap-hours=${sessionGapHours}`,
|
|
482
378
|
);
|
|
483
379
|
|
|
484
|
-
//
|
|
380
|
+
// 9b. Parse
|
|
485
381
|
let parseResult;
|
|
486
|
-
const parseStart = Date.now();
|
|
487
382
|
try {
|
|
488
383
|
parseResult = parseExport({ filePath: chatTxt, accountId, timezone, dateFormat });
|
|
489
384
|
} catch (err) {
|
|
490
385
|
cleanup();
|
|
491
386
|
fail("parse", { reason: err instanceof Error ? err.message : String(err) });
|
|
492
387
|
}
|
|
493
|
-
const
|
|
494
|
-
log(
|
|
495
|
-
`parsed lines=${parseResult.counters.parsed} media-skipped=${parseResult.counters.mediaSkipped} system-skipped=${parseResult.counters.systemSkipped} ms=${parseMs}`,
|
|
496
|
-
);
|
|
497
|
-
|
|
498
|
-
// 7d. Apply filter
|
|
499
|
-
const filteredLines = applyFilter(parseResult.parsedLines, filter);
|
|
388
|
+
const allLines = parseResult.parsedLines;
|
|
500
389
|
log(
|
|
501
|
-
`
|
|
390
|
+
`parsed lines=${parseResult.counters.parsed} media-skipped=${parseResult.counters.mediaSkipped} system-skipped=${parseResult.counters.systemSkipped}`,
|
|
502
391
|
);
|
|
503
|
-
if (
|
|
392
|
+
if (allLines.length === 0) {
|
|
504
393
|
cleanup();
|
|
505
|
-
fail("
|
|
506
|
-
reason: `filter "${flags.filter}" matched zero rows from ${parseResult.parsedLines.length} parsed lines — refusing to write an empty conversation`,
|
|
507
|
-
});
|
|
394
|
+
fail("parse", { reason: "zero parsed lines after walking archive" });
|
|
508
395
|
}
|
|
509
396
|
|
|
510
|
-
|
|
511
|
-
const
|
|
512
|
-
|
|
513
|
-
// 7e. Auto-create participants and build rows
|
|
397
|
+
// 9c. Bind canonical senders against the confirmed set
|
|
398
|
+
const distinctSenderNames = Array.from(new Set(allLines.map((l) => l.senderName)));
|
|
399
|
+
const senderHistogram = computeSenderHistogram(allLines);
|
|
514
400
|
let session = getSession();
|
|
515
|
-
let participantUpsert = { idsByName: new Map(), participantsAlreadyExisted: 0 };
|
|
516
|
-
const distinctSenderNames = Array.from(
|
|
517
|
-
new Set(filteredLines.map((l) => l.senderName)),
|
|
518
|
-
);
|
|
519
|
-
|
|
520
401
|
try {
|
|
521
|
-
|
|
402
|
+
await bindCanonicalSenders({
|
|
522
403
|
session,
|
|
523
404
|
accountId,
|
|
524
405
|
ownerElementId,
|
|
525
|
-
|
|
406
|
+
participantElementIds,
|
|
526
407
|
senderNames: distinctSenderNames,
|
|
527
408
|
normaliseSenderName,
|
|
528
409
|
});
|
|
529
410
|
} catch (err) {
|
|
530
411
|
await session.close().catch(() => {});
|
|
531
412
|
cleanup();
|
|
532
|
-
// IngestUserFacingError carries a brief-shaped FAIL line (parser-miss /
|
|
533
|
-
// unsupported-scope / argv mismatch) that the operator's grep recipes
|
|
534
|
-
// already match on. Preserve it verbatim instead of wrapping in
|
|
535
|
-
// phase=archive-write — wrapping would defeat
|
|
536
|
-
// `grep '\[whatsapp-ingest\] FAIL parser-miss'` and friends.
|
|
537
413
|
if (err && err.userFacing) {
|
|
538
|
-
process.stderr.write(`[whatsapp-
|
|
414
|
+
process.stderr.write(`[whatsapp-import] FAIL ${err.message}\n`);
|
|
539
415
|
process.exit(1);
|
|
540
416
|
}
|
|
541
|
-
fail("
|
|
542
|
-
phase: "bind-canonical-senders",
|
|
543
|
-
reason: err instanceof Error ? err.message : String(err),
|
|
544
|
-
});
|
|
417
|
+
fail("argv", { reason: err instanceof Error ? err.message : String(err) });
|
|
545
418
|
}
|
|
546
419
|
|
|
547
|
-
|
|
548
|
-
const
|
|
549
|
-
|
|
550
|
-
|
|
551
|
-
|
|
552
|
-
|
|
420
|
+
// 9d. Derive conversationIdentity and look up prior :ConversationArchive
|
|
421
|
+
const conversationIdentity = deriveConversationIdentity({
|
|
422
|
+
accountId,
|
|
423
|
+
participantElementIds: [ownerElementId, ...participantElementIds],
|
|
424
|
+
});
|
|
425
|
+
let priorArchive = null;
|
|
426
|
+
try {
|
|
427
|
+
const r = await session.run(
|
|
428
|
+
`MATCH (a:ConversationArchive { conversationIdentity: $cid })
|
|
429
|
+
RETURN elementId(a) AS elemId,
|
|
430
|
+
a.lastIngestedMessageHash AS lastHash,
|
|
431
|
+
a.lastIngestedMessageAt AS lastAt LIMIT 1`,
|
|
432
|
+
{ cid: conversationIdentity },
|
|
553
433
|
);
|
|
434
|
+
if (r.records[0]) {
|
|
435
|
+
priorArchive = {
|
|
436
|
+
elemId: r.records[0].get("elemId"),
|
|
437
|
+
lastHash: r.records[0].get("lastHash"),
|
|
438
|
+
lastAt: r.records[0].get("lastAt"),
|
|
439
|
+
};
|
|
440
|
+
}
|
|
441
|
+
} catch (err) {
|
|
442
|
+
await session.close().catch(() => {});
|
|
443
|
+
cleanup();
|
|
444
|
+
fail("delta-cursor-missing", { reason: `conversationArchive lookup failed: ${err instanceof Error ? err.message : String(err)}` });
|
|
554
445
|
}
|
|
446
|
+
await session.close().catch(() => {});
|
|
555
447
|
|
|
556
|
-
|
|
557
|
-
|
|
558
|
-
|
|
559
|
-
|
|
560
|
-
|
|
561
|
-
|
|
562
|
-
|
|
563
|
-
|
|
448
|
+
// 9e. Compute deltaStart
|
|
449
|
+
let deltaStart = 0;
|
|
450
|
+
let deltaKind = "first-ingest";
|
|
451
|
+
if (priorArchive && priorArchive.lastHash) {
|
|
452
|
+
const cursor = findDeltaCursor(allLines, priorArchive.lastHash);
|
|
453
|
+
if (cursor.kind === "missing") {
|
|
454
|
+
cleanup();
|
|
455
|
+
fail("delta-cursor-missing", {
|
|
456
|
+
reason: `prior cursor not found in re-export (operator deleted prior messages, or this is a different chat archive)`,
|
|
457
|
+
priorArchive: priorArchive.elemId,
|
|
458
|
+
lastIngestedMessageAt: priorArchive.lastAt,
|
|
459
|
+
});
|
|
460
|
+
}
|
|
461
|
+
if (cursor.kind === "empty") {
|
|
462
|
+
log(`noop reason="no new messages since ${priorArchive.lastAt}"`);
|
|
463
|
+
cleanup();
|
|
464
|
+
const totalMs = Date.now() - startedMs;
|
|
465
|
+
process.stdout.write(JSON.stringify({
|
|
466
|
+
archiveElementId: priorArchive.elemId,
|
|
467
|
+
conversationIdentity,
|
|
468
|
+
archiveSha256,
|
|
469
|
+
archiveSourceFile,
|
|
470
|
+
parsed: parseResult.counters.parsed,
|
|
471
|
+
mediaSkipped: parseResult.counters.mediaSkipped,
|
|
472
|
+
systemSkipped: parseResult.counters.systemSkipped,
|
|
473
|
+
delta: { kind: "empty-delta", deltaStart: allLines.length, deltaMessages: 0 },
|
|
474
|
+
sessions: 0,
|
|
475
|
+
chunks: 0,
|
|
476
|
+
nextEdgesCreated: 0,
|
|
477
|
+
participantsLinked: 0,
|
|
478
|
+
dateRange: { first: allLines[0].dateSent, last: allLines[allLines.length - 1].dateSent },
|
|
479
|
+
senderHistogram,
|
|
480
|
+
topicKeywords: [],
|
|
481
|
+
ms: totalMs,
|
|
482
|
+
priorLastIngestedMessageAt: priorArchive.lastAt,
|
|
483
|
+
}) + "\n");
|
|
484
|
+
process.exit(0);
|
|
485
|
+
}
|
|
486
|
+
deltaStart = cursor.deltaStart;
|
|
487
|
+
deltaKind = "delta";
|
|
488
|
+
}
|
|
489
|
+
const deltaLines = allLines.slice(deltaStart);
|
|
490
|
+
log(
|
|
491
|
+
`delta cursor=${priorArchive ? priorArchive.lastHash.slice(0, 12) : "(first-ingest)"} cursor-line=${deltaStart} delta-messages=${deltaLines.length}`,
|
|
564
492
|
);
|
|
565
|
-
const rows = filteredLines.map((l) => ({
|
|
566
|
-
messageId: deriveMessageId({
|
|
567
|
-
conversationSha256,
|
|
568
|
-
dateSent: l.dateSent,
|
|
569
|
-
senderName: l.senderName,
|
|
570
|
-
body: l.body,
|
|
571
|
-
}),
|
|
572
|
-
conversationId,
|
|
573
|
-
senderNodeId: participantIds.get(l.senderName) || ownerElementId,
|
|
574
|
-
senderName: l.senderName,
|
|
575
|
-
dateSent: l.dateSent,
|
|
576
|
-
body: l.body,
|
|
577
|
-
// sequenceIndex preserved on the Message node for chain ordering tiebreaker
|
|
578
|
-
// in WHATSAPP_NEXT_CHAIN_CYPHER. NOT part of messageId — that would couple
|
|
579
|
-
// identity to array position and re-introduce shadowing.
|
|
580
|
-
sequenceIndex: l.sequenceIndex,
|
|
581
|
-
}));
|
|
582
493
|
|
|
583
|
-
|
|
584
|
-
|
|
585
|
-
|
|
586
|
-
|
|
587
|
-
|
|
588
|
-
participantCount: distinctSenderNames.length,
|
|
589
|
-
messageCount: rows.length,
|
|
590
|
-
};
|
|
494
|
+
// 9f. Sessionize delta
|
|
495
|
+
const sessions = sessionize(deltaLines, sessionGapHours);
|
|
496
|
+
log(
|
|
497
|
+
`sessionize file=${archiveSourceFile} archiveSha256=${archiveSha256.slice(0, 12)} messages=${deltaLines.length} sessions=${sessions.length} gap-hours=${sessionGapHours}`,
|
|
498
|
+
);
|
|
591
499
|
|
|
592
|
-
//
|
|
593
|
-
|
|
594
|
-
|
|
500
|
+
// 9g. Classify each session via Haiku (mode='chat')
|
|
501
|
+
const allChunks = [];
|
|
502
|
+
const allKeywords = new Set();
|
|
503
|
+
for (const s of sessions) {
|
|
504
|
+
const sessionStart = Date.now();
|
|
505
|
+
const text = toClassifierInput(s);
|
|
506
|
+
const result = await classifyDocument({
|
|
507
|
+
accountId,
|
|
508
|
+
mode: "chat",
|
|
509
|
+
anchorDescription: `WhatsApp conversation between ${[ownerElementId, ...participantElementIds].length} participants (session ${s.index + 1} of ${sessions.length})`,
|
|
510
|
+
ontologyLabels: new Set([]),
|
|
511
|
+
naturalEdgeMap: "",
|
|
512
|
+
documentText: text,
|
|
513
|
+
});
|
|
514
|
+
if (result.kind === "fallback") {
|
|
515
|
+
cleanup();
|
|
516
|
+
fail("classify", { reason: `Haiku fallback on session ${s.index}: ${result.reason}` });
|
|
517
|
+
}
|
|
518
|
+
const chunkCount = result.output.sections.length;
|
|
519
|
+
log(
|
|
520
|
+
`classify-session sessionIndex=${s.index + 1}/${sessions.length} messages=${s.messages.length} chars=${text.length} chunks=${chunkCount} ms=${Date.now() - sessionStart}`,
|
|
521
|
+
);
|
|
522
|
+
if (chunkCount === 0 && s.messages.length > 0) {
|
|
523
|
+
cleanup();
|
|
524
|
+
fail("classify", {
|
|
525
|
+
reason: `session ${s.index} of ${s.messages.length} messages produced zero chunks (classifier-prompt regression)`,
|
|
526
|
+
});
|
|
527
|
+
}
|
|
528
|
+
for (const sec of result.output.sections) {
|
|
529
|
+
allChunks.push(sec);
|
|
530
|
+
}
|
|
531
|
+
for (const kw of result.output.documentKeywords) {
|
|
532
|
+
allKeywords.add(kw);
|
|
533
|
+
}
|
|
534
|
+
}
|
|
595
535
|
|
|
596
|
-
//
|
|
597
|
-
|
|
598
|
-
const
|
|
536
|
+
// 9h. Compute lastIngestedMessageHash from the last delta line
|
|
537
|
+
const lastLine = deltaLines[deltaLines.length - 1];
|
|
538
|
+
const lastIngestedMessageHash = deriveMessageContentHash({
|
|
539
|
+
dateSent: lastLine.dateSent,
|
|
540
|
+
senderName: lastLine.senderName,
|
|
541
|
+
body: lastLine.body,
|
|
542
|
+
});
|
|
543
|
+
const lastIngestedMessageAt = lastLine.dateSent;
|
|
544
|
+
|
|
545
|
+
// 9i. Aggregate document-level summary across sessions (concatenate first
|
|
546
|
+
// two-three classifier summaries; the parent's summary is informational)
|
|
547
|
+
const documentSummary = sessions.length === 1
|
|
548
|
+
? `${deltaLines.length} messages in 1 session, ${allChunks.length} chunks.`
|
|
549
|
+
: `${deltaLines.length} messages in ${sessions.length} sessions, ${allChunks.length} chunks.`;
|
|
550
|
+
|
|
551
|
+
// 9j. Call memoryIngest with parentLabel='ConversationArchive'
|
|
552
|
+
let ingestResult;
|
|
553
|
+
const ingestStart = Date.now();
|
|
599
554
|
try {
|
|
600
|
-
|
|
601
|
-
archiveType: "whatsapp-export",
|
|
602
|
-
ownerNodeId: ownerElementId,
|
|
555
|
+
ingestResult = await memoryIngest({
|
|
603
556
|
accountId,
|
|
604
|
-
|
|
605
|
-
|
|
606
|
-
|
|
557
|
+
attachmentId: conversationIdentity, // semantic rename per parentLabel
|
|
558
|
+
parentLabel: "ConversationArchive",
|
|
559
|
+
documentSummary,
|
|
560
|
+
anchorNodeId: ownerElementId,
|
|
561
|
+
anchorLabel: "AdminUser", // unused in chat path but required by IngestParams
|
|
562
|
+
sections: allChunks,
|
|
563
|
+
scope,
|
|
607
564
|
sessionId,
|
|
608
|
-
|
|
565
|
+
documentKeywords: Array.from(allKeywords),
|
|
566
|
+
archiveSha256,
|
|
567
|
+
archiveSourceFile,
|
|
568
|
+
lastIngestedMessageHash,
|
|
569
|
+
lastIngestedMessageAt,
|
|
570
|
+
participantElementIds: [ownerElementId, ...participantElementIds],
|
|
609
571
|
});
|
|
610
572
|
} catch (err) {
|
|
611
573
|
cleanup();
|
|
612
|
-
fail("
|
|
613
|
-
reason: err instanceof Error ? err.message : String(err),
|
|
614
|
-
});
|
|
574
|
+
fail("memory-ingest", { reason: err instanceof Error ? err.message : String(err) });
|
|
615
575
|
}
|
|
616
|
-
const writeMs = Date.now() - writeStart;
|
|
617
|
-
const createdMessages = archiveResult.counters.createdMessages || 0;
|
|
618
|
-
const nextEdgesProcessed = archiveResult.counters.nextEdgesProcessed || 0;
|
|
619
|
-
const nextEdgesCreated = archiveResult.counters.nextEdgesCreated || 0;
|
|
620
|
-
const messagesAlreadyExisted = rows.length - createdMessages;
|
|
621
576
|
log(
|
|
622
|
-
`
|
|
577
|
+
`file=${archiveSourceFile} conversationIdentity=${conversationIdentity.slice(0, 12)} archiveElementId=${ingestResult.documentNodeId} chunks-written=${ingestResult.sectionCount} next-edges=${ingestResult.edgeBreakdown.NEXT ?? 0} participants=${ingestResult.edgeBreakdown.PARTICIPANT_IN ?? 0} ms=${Date.now() - ingestStart}`,
|
|
623
578
|
);
|
|
624
579
|
|
|
625
|
-
// 7g. Resolve conversationElementId for the agent-return shape (Task 871.6).
|
|
626
|
-
// One small read-after-write — the operator's database-operator subagent
|
|
627
|
-
// surfaces this as the canonical handle for downstream Phase 2 calls.
|
|
628
|
-
let conversationElementId = null;
|
|
629
|
-
const ridSession = getSession();
|
|
630
|
-
try {
|
|
631
|
-
const res = await ridSession.run(
|
|
632
|
-
`MATCH (c:Conversation:WhatsAppConversation {conversationId: $cid})
|
|
633
|
-
RETURN elementId(c) AS elemId LIMIT 1`,
|
|
634
|
-
{ cid: conversationId },
|
|
635
|
-
);
|
|
636
|
-
conversationElementId = res.records[0]?.get("elemId") ?? null;
|
|
637
|
-
} catch (err) {
|
|
638
|
-
log(`conversation-elementid-lookup failed reason="${err instanceof Error ? err.message : String(err)}"`);
|
|
639
|
-
} finally {
|
|
640
|
-
await ridSession.close().catch(() => {});
|
|
641
|
-
}
|
|
642
|
-
|
|
643
580
|
cleanup();
|
|
644
|
-
|
|
645
581
|
const totalMs = Date.now() - startedMs;
|
|
646
|
-
log(`done
|
|
647
|
-
|
|
648
|
-
process.stdout.write(
|
|
649
|
-
|
|
650
|
-
|
|
651
|
-
|
|
652
|
-
|
|
653
|
-
|
|
654
|
-
|
|
655
|
-
|
|
656
|
-
|
|
657
|
-
|
|
658
|
-
|
|
659
|
-
|
|
660
|
-
|
|
661
|
-
|
|
662
|
-
|
|
663
|
-
|
|
582
|
+
log(`done conversationIdentity=${conversationIdentity.slice(0, 12)} total-ms=${totalMs} exit=0`);
|
|
583
|
+
|
|
584
|
+
process.stdout.write(JSON.stringify({
|
|
585
|
+
archiveElementId: ingestResult.documentNodeId,
|
|
586
|
+
conversationIdentity,
|
|
587
|
+
archiveSha256,
|
|
588
|
+
archiveSourceFile,
|
|
589
|
+
parsed: parseResult.counters.parsed,
|
|
590
|
+
mediaSkipped: parseResult.counters.mediaSkipped,
|
|
591
|
+
systemSkipped: parseResult.counters.systemSkipped,
|
|
592
|
+
delta: { kind: deltaKind, deltaStart, deltaMessages: deltaLines.length },
|
|
593
|
+
sessions: sessions.length,
|
|
594
|
+
chunks: ingestResult.sectionCount,
|
|
595
|
+
nextEdgesCreated: ingestResult.edgeBreakdown.NEXT ?? 0,
|
|
596
|
+
participantsLinked: ingestResult.edgeBreakdown.PARTICIPANT_IN ?? 0,
|
|
597
|
+
dateRange: { first: allLines[0].dateSent, last: allLines[allLines.length - 1].dateSent },
|
|
598
|
+
senderHistogram,
|
|
599
|
+
topicKeywords: Array.from(allKeywords),
|
|
600
|
+
ms: totalMs,
|
|
601
|
+
}) + "\n");
|
|
664
602
|
process.exit(0);
|
|
665
603
|
}
|
|
666
604
|
|
|
605
|
+
function computeSenderHistogram(lines) {
|
|
606
|
+
const counts = new Map();
|
|
607
|
+
for (const l of lines) {
|
|
608
|
+
counts.set(l.senderName, (counts.get(l.senderName) ?? 0) + 1);
|
|
609
|
+
}
|
|
610
|
+
return Array.from(counts.entries())
|
|
611
|
+
.map(([name, count]) => ({ name, count }))
|
|
612
|
+
.sort((a, b) => b.count - a.count);
|
|
613
|
+
}
|
|
667
614
|
|
|
668
615
|
main().catch((err) => {
|
|
669
616
|
fail("uncaught", { reason: err instanceof Error ? err.message : String(err) });
|