@rubytech/create-realagent 1.0.828 → 1.0.829
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/package.json +1 -1
- package/payload/platform/neo4j/schema.cypher +2 -1
- package/payload/platform/package.json +2 -2
- package/payload/platform/plugins/admin/hooks/__tests__/archive-ingest-surface-gate.test.sh +39 -54
- package/payload/platform/plugins/admin/hooks/archive-ingest-surface-gate.sh +26 -58
- package/payload/platform/plugins/admin/skills/onboarding/SKILL.md +2 -2
- package/payload/platform/plugins/docs/references/plugins-guide.md +1 -1
- package/payload/platform/plugins/memory/PLUGIN.md +4 -4
- package/payload/platform/plugins/memory/mcp/dist/index.js +18 -218
- package/payload/platform/plugins/memory/mcp/dist/index.js.map +1 -1
- package/payload/platform/plugins/memory/mcp/dist/lib/__tests__/schema-validator.test.js +103 -0
- package/payload/platform/plugins/memory/mcp/dist/lib/__tests__/schema-validator.test.js.map +1 -1
- package/payload/platform/plugins/memory/mcp/dist/lib/llm-classifier.d.ts.map +1 -1
- package/payload/platform/plugins/memory/mcp/dist/lib/llm-classifier.js +30 -20
- package/payload/platform/plugins/memory/mcp/dist/lib/llm-classifier.js.map +1 -1
- package/payload/platform/plugins/memory/mcp/dist/lib/schema-validator.d.ts +16 -1
- package/payload/platform/plugins/memory/mcp/dist/lib/schema-validator.d.ts.map +1 -1
- package/payload/platform/plugins/memory/mcp/dist/lib/schema-validator.js +12 -3
- package/payload/platform/plugins/memory/mcp/dist/lib/schema-validator.js.map +1 -1
- package/payload/platform/plugins/memory/mcp/dist/tools/__tests__/memory-archive-write.test.js +2 -138
- package/payload/platform/plugins/memory/mcp/dist/tools/__tests__/memory-archive-write.test.js.map +1 -1
- package/payload/platform/plugins/memory/mcp/dist/tools/__tests__/memory-ingest.test.js +10 -5
- package/payload/platform/plugins/memory/mcp/dist/tools/__tests__/memory-ingest.test.js.map +1 -1
- package/payload/platform/plugins/memory/mcp/dist/tools/__tests__/profile-update-personfields-open.test.d.ts +2 -0
- package/payload/platform/plugins/memory/mcp/dist/tools/__tests__/profile-update-personfields-open.test.d.ts.map +1 -0
- package/payload/platform/plugins/memory/mcp/dist/tools/__tests__/profile-update-personfields-open.test.js +148 -0
- package/payload/platform/plugins/memory/mcp/dist/tools/__tests__/profile-update-personfields-open.test.js.map +1 -0
- package/payload/platform/plugins/memory/mcp/dist/tools/memory-archive-write.d.ts +1 -64
- package/payload/platform/plugins/memory/mcp/dist/tools/memory-archive-write.d.ts.map +1 -1
- package/payload/platform/plugins/memory/mcp/dist/tools/memory-archive-write.js +6 -336
- package/payload/platform/plugins/memory/mcp/dist/tools/memory-archive-write.js.map +1 -1
- package/payload/platform/plugins/memory/mcp/dist/tools/memory-ingest.d.ts +7 -11
- package/payload/platform/plugins/memory/mcp/dist/tools/memory-ingest.d.ts.map +1 -1
- package/payload/platform/plugins/memory/mcp/dist/tools/memory-ingest.js +1 -11
- package/payload/platform/plugins/memory/mcp/dist/tools/memory-ingest.js.map +1 -1
- package/payload/platform/plugins/memory/mcp/dist/tools/profile-update.d.ts +21 -17
- package/payload/platform/plugins/memory/mcp/dist/tools/profile-update.d.ts.map +1 -1
- package/payload/platform/plugins/memory/mcp/dist/tools/profile-update.js +77 -37
- package/payload/platform/plugins/memory/mcp/dist/tools/profile-update.js.map +1 -1
- package/payload/platform/plugins/memory/references/schema-base.md +2 -0
- package/payload/platform/plugins/memory/skills/document-ingest/SKILL.md +54 -4
- package/payload/platform/plugins/whatsapp/PLUGIN.md +1 -1
- package/payload/platform/scripts/seed-neo4j.sh +15 -14
- package/payload/platform/templates/specialists/agents/database-operator.md +9 -15
- package/payload/server/chunk-CUSH3UXP.js +2305 -0
- package/payload/server/chunk-IWNDVGKT.js +10077 -0
- package/payload/server/chunk-KC7NUABI.js +654 -0
- package/payload/server/chunk-WUVXPZIV.js +1116 -0
- package/payload/server/client-pool-3TM3SRIA.js +32 -0
- package/payload/server/cloudflare-task-tracker-4NIODMGL.js +19 -0
- package/payload/server/maxy-edge.js +3 -3
- package/payload/server/neo4j-migrations-XTQ4WEV6.js +428 -0
- package/payload/server/server.js +6 -6
- package/payload/platform/plugins/whatsapp-import/PLUGIN.md +0 -48
- package/payload/platform/plugins/whatsapp-import/bin/ingest.mjs +0 -617
- package/payload/platform/plugins/whatsapp-import/bin/whatsapp-ingest.sh +0 -98
- package/payload/platform/plugins/whatsapp-import/lib/src/__tests__/delta-append.test.ts +0 -163
- package/payload/platform/plugins/whatsapp-import/lib/src/__tests__/parse-export-lrm.test.ts +0 -83
- package/payload/platform/plugins/whatsapp-import/lib/src/__tests__/parse-export.test.ts +0 -678
- package/payload/platform/plugins/whatsapp-import/lib/src/__tests__/sessionize.test.ts +0 -91
- package/payload/platform/plugins/whatsapp-import/lib/src/__tests__/to-classifier-input.test.ts +0 -59
- package/payload/platform/plugins/whatsapp-import/lib/src/delta-cursor.ts +0 -54
- package/payload/platform/plugins/whatsapp-import/lib/src/derive-keys.ts +0 -82
- package/payload/platform/plugins/whatsapp-import/lib/src/index.ts +0 -22
- package/payload/platform/plugins/whatsapp-import/lib/src/parse-export.ts +0 -471
- package/payload/platform/plugins/whatsapp-import/lib/src/sessionize.ts +0 -81
- package/payload/platform/plugins/whatsapp-import/lib/src/to-classifier-input.ts +0 -48
- package/payload/platform/plugins/whatsapp-import/lib/tsconfig.json +0 -9
- package/payload/platform/plugins/whatsapp-import/lib/vitest.config.ts +0 -9
- package/payload/platform/plugins/whatsapp-import/skills/whatsapp-import/SKILL.md +0 -124
- package/payload/platform/plugins/whatsapp-import/skills/whatsapp-import/references/conversation-archive-shape.md +0 -143
- package/payload/platform/plugins/whatsapp-import/skills/whatsapp-import/references/export-parse.md +0 -109
|
@@ -1,617 +0,0 @@
|
|
|
1
|
-
#!/usr/bin/env node
|
|
2
|
-
// =============================================================================
|
|
3
|
-
// ingest.mjs — in-process orchestrator for whatsapp-ingest.sh (Task 891).
|
|
4
|
-
//
|
|
5
|
-
// Pipeline (single phase — Phase 2 insight derivation deferred to its own
|
|
6
|
-
// follow-up task):
|
|
7
|
-
//
|
|
8
|
-
// parse → bind canonical senders → derive conversationIdentity
|
|
9
|
-
// → look up prior :ConversationArchive (delta cursor)
|
|
10
|
-
// → sessionize delta at gapHours boundaries
|
|
11
|
-
// → for each session: classify (mode='chat') → collect chunks
|
|
12
|
-
// → memoryIngest(parentLabel='ConversationArchive')
|
|
13
|
-
//
|
|
14
|
-
// Argv (positional): <archive-path>
|
|
15
|
-
// Argv (flags): --owner-element-id <id>
|
|
16
|
-
// --participant-person-ids <csv>
|
|
17
|
-
// --scope <admin|public>
|
|
18
|
-
// [--session-gap-hours <N>] (default 12)
|
|
19
|
-
// [--account-id <accountId>]
|
|
20
|
-
// [--timezone <iana>]
|
|
21
|
-
// [--date-format <DD/MM/YY|MM/DD/YY|DD/MM/YYYY|MM/DD/YYYY>]
|
|
22
|
-
//
|
|
23
|
-
// Owner + participants form the closed sender set; any parsed senderName
|
|
24
|
-
// outside that set LOUD-FAILs with `parser-miss` (preserves Task 887 §A0).
|
|
25
|
-
// `--subject-person-id` and `--filter` are gone (the chunked archive shape
|
|
26
|
-
// makes per-message filtering obsolete — chunking bounds operator surface).
|
|
27
|
-
//
|
|
28
|
-
// Stdout (success): one JSON line — all the counters the skill needs to
|
|
29
|
-
// formulate the three operator-facing messages. See SKILL.md for the shape.
|
|
30
|
-
// Stderr: one [whatsapp-import] FAIL line on failure, exit non-zero.
|
|
31
|
-
// =============================================================================
|
|
32
|
-
|
|
33
|
-
import { existsSync, mkdtempSync, readdirSync, rmSync, statSync, createReadStream } from "node:fs";
|
|
34
|
-
import { createHash } from "node:crypto";
|
|
35
|
-
import { join, resolve, dirname, basename } from "node:path";
|
|
36
|
-
import { tmpdir } from "node:os";
|
|
37
|
-
import { spawnSync } from "node:child_process";
|
|
38
|
-
import { fileURLToPath } from "node:url";
|
|
39
|
-
|
|
40
|
-
const __dirname = dirname(fileURLToPath(import.meta.url));
|
|
41
|
-
|
|
42
|
-
// ---------------------------------------------------------------------------
|
|
43
|
-
// 1. Resolve dist paths.
|
|
44
|
-
// ---------------------------------------------------------------------------
|
|
45
|
-
const platformRoot =
|
|
46
|
-
process.env.MAXY_PLATFORM_ROOT?.trim() ||
|
|
47
|
-
resolve(__dirname, "..", "..", "..");
|
|
48
|
-
|
|
49
|
-
const PARSE_EXPORT_PATH = resolve(
|
|
50
|
-
platformRoot,
|
|
51
|
-
"plugins",
|
|
52
|
-
"whatsapp-import",
|
|
53
|
-
"lib",
|
|
54
|
-
"dist",
|
|
55
|
-
"index.js",
|
|
56
|
-
);
|
|
57
|
-
const NEO4J_LIB_PATH = resolve(
|
|
58
|
-
platformRoot,
|
|
59
|
-
"plugins",
|
|
60
|
-
"memory",
|
|
61
|
-
"mcp",
|
|
62
|
-
"dist",
|
|
63
|
-
"lib",
|
|
64
|
-
"neo4j.js",
|
|
65
|
-
);
|
|
66
|
-
const LLM_CLASSIFIER_PATH = resolve(
|
|
67
|
-
platformRoot,
|
|
68
|
-
"plugins",
|
|
69
|
-
"memory",
|
|
70
|
-
"mcp",
|
|
71
|
-
"dist",
|
|
72
|
-
"lib",
|
|
73
|
-
"llm-classifier.js",
|
|
74
|
-
);
|
|
75
|
-
const MEMORY_INGEST_PATH = resolve(
|
|
76
|
-
platformRoot,
|
|
77
|
-
"plugins",
|
|
78
|
-
"memory",
|
|
79
|
-
"mcp",
|
|
80
|
-
"dist",
|
|
81
|
-
"tools",
|
|
82
|
-
"memory-ingest.js",
|
|
83
|
-
);
|
|
84
|
-
|
|
85
|
-
// ---------------------------------------------------------------------------
|
|
86
|
-
// 2. Logger
|
|
87
|
-
// ---------------------------------------------------------------------------
|
|
88
|
-
function log(line) {
|
|
89
|
-
process.stderr.write(`[whatsapp-import] ${line}\n`);
|
|
90
|
-
}
|
|
91
|
-
function fail(phase, fields) {
|
|
92
|
-
const fieldStr = Object.entries(fields)
|
|
93
|
-
.map(([k, v]) =>
|
|
94
|
-
typeof v === "string" && (v.includes(" ") || v.includes("="))
|
|
95
|
-
? `${k}="${v.replace(/"/g, '\\"')}"`
|
|
96
|
-
: `${k}=${v ?? "-"}`,
|
|
97
|
-
)
|
|
98
|
-
.join(" ");
|
|
99
|
-
process.stderr.write(`[whatsapp-import] FAIL phase=${phase} ${fieldStr}\n`);
|
|
100
|
-
process.exit(1);
|
|
101
|
-
}
|
|
102
|
-
|
|
103
|
-
// ---------------------------------------------------------------------------
|
|
104
|
-
// 3. Argv parsing
|
|
105
|
-
// ---------------------------------------------------------------------------
|
|
106
|
-
function parseArgv(argv) {
|
|
107
|
-
const args = argv.slice(2);
|
|
108
|
-
let archive = null;
|
|
109
|
-
const flags = {};
|
|
110
|
-
for (let i = 0; i < args.length; i++) {
|
|
111
|
-
const a = args[i];
|
|
112
|
-
if (!a.startsWith("--")) {
|
|
113
|
-
if (archive == null) archive = a;
|
|
114
|
-
else fail("argv", { reason: `unexpected positional argument "${a}"` });
|
|
115
|
-
continue;
|
|
116
|
-
}
|
|
117
|
-
const key = a.slice(2);
|
|
118
|
-
const v = args[++i];
|
|
119
|
-
if (v == null) fail("argv", { reason: `flag --${key} requires a value` });
|
|
120
|
-
flags[camelCase(key)] = v;
|
|
121
|
-
}
|
|
122
|
-
if (!archive) fail("argv", { reason: "missing positional <archive>" });
|
|
123
|
-
if (!flags.ownerElementId) fail("argv", { reason: "missing --owner-element-id" });
|
|
124
|
-
if (!flags.participantPersonIds) {
|
|
125
|
-
fail("argv", {
|
|
126
|
-
reason: "missing --participant-person-ids (csv of operator-confirmed :Person/:AdminUser elementIds, owner excluded)",
|
|
127
|
-
});
|
|
128
|
-
}
|
|
129
|
-
if (!flags.scope) fail("argv", { reason: "missing --scope" });
|
|
130
|
-
if (flags.scope !== "admin" && flags.scope !== "public") {
|
|
131
|
-
fail("argv", { reason: `invalid --scope "${flags.scope}" (admin|public)` });
|
|
132
|
-
}
|
|
133
|
-
return { archive, flags };
|
|
134
|
-
}
|
|
135
|
-
function camelCase(s) {
|
|
136
|
-
return s.replace(/-([a-z])/g, (_m, c) => c.toUpperCase());
|
|
137
|
-
}
|
|
138
|
-
|
|
139
|
-
// ---------------------------------------------------------------------------
|
|
140
|
-
// 4. Archive resolution
|
|
141
|
-
// ---------------------------------------------------------------------------
|
|
142
|
-
function resolveChatTxt(archivePath) {
|
|
143
|
-
const abs = resolve(archivePath);
|
|
144
|
-
if (!existsSync(abs)) fail("argv", { reason: `archive path not found: ${abs}` });
|
|
145
|
-
const st = statSync(abs);
|
|
146
|
-
if (st.isFile() && abs.endsWith(".zip")) {
|
|
147
|
-
const tmp = mkdtempSync(join(tmpdir(), "whatsapp-ingest-"));
|
|
148
|
-
const unzip = spawnSync("unzip", ["-q", "-o", abs, "-d", tmp], { encoding: "utf8" });
|
|
149
|
-
if (unzip.status !== 0) {
|
|
150
|
-
rmSync(tmp, { recursive: true });
|
|
151
|
-
fail("argv", {
|
|
152
|
-
reason: "unzip failed",
|
|
153
|
-
archive: abs,
|
|
154
|
-
stderr: (unzip.stderr || "").slice(0, 200),
|
|
155
|
-
});
|
|
156
|
-
}
|
|
157
|
-
const chat = findChatTxt(tmp);
|
|
158
|
-
if (!chat) {
|
|
159
|
-
rmSync(tmp, { recursive: true });
|
|
160
|
-
fail("argv", { reason: "_chat.txt not found in zip", archive: abs });
|
|
161
|
-
}
|
|
162
|
-
return { chatTxt: chat, cleanup: () => rmSync(tmp, { recursive: true }) };
|
|
163
|
-
}
|
|
164
|
-
if (st.isDirectory()) {
|
|
165
|
-
const chat = findChatTxt(abs);
|
|
166
|
-
if (!chat) fail("argv", { reason: "_chat.txt not found in directory", archive: abs });
|
|
167
|
-
return { chatTxt: chat, cleanup: () => {} };
|
|
168
|
-
}
|
|
169
|
-
if (st.isFile()) {
|
|
170
|
-
return { chatTxt: abs, cleanup: () => {} };
|
|
171
|
-
}
|
|
172
|
-
fail("argv", { reason: `unsupported archive shape: ${abs}` });
|
|
173
|
-
return { chatTxt: abs, cleanup: () => {} };
|
|
174
|
-
}
|
|
175
|
-
function findChatTxt(dir) {
|
|
176
|
-
const entries = readdirSync(dir, { withFileTypes: true });
|
|
177
|
-
for (const e of entries) {
|
|
178
|
-
if (e.isFile() && e.name === "_chat.txt") return join(dir, e.name);
|
|
179
|
-
}
|
|
180
|
-
for (const e of entries) {
|
|
181
|
-
if (e.isDirectory()) {
|
|
182
|
-
const nested = findChatTxt(join(dir, e.name));
|
|
183
|
-
if (nested) return nested;
|
|
184
|
-
}
|
|
185
|
-
}
|
|
186
|
-
return null;
|
|
187
|
-
}
|
|
188
|
-
|
|
189
|
-
// ---------------------------------------------------------------------------
|
|
190
|
-
// 5. Account resolution (Phase 0 = single account)
|
|
191
|
-
// ---------------------------------------------------------------------------
|
|
192
|
-
function resolveAccountId(flags) {
|
|
193
|
-
if (flags.accountId && flags.accountId.trim()) return flags.accountId.trim();
|
|
194
|
-
const installDir = resolve(platformRoot, "..");
|
|
195
|
-
const accountsDir = join(installDir, "data", "accounts");
|
|
196
|
-
if (!existsSync(accountsDir)) {
|
|
197
|
-
fail("argv", { reason: `accounts dir not found: ${accountsDir}; pass --account-id explicitly` });
|
|
198
|
-
}
|
|
199
|
-
const dirs = readdirSync(accountsDir, { withFileTypes: true })
|
|
200
|
-
.filter((d) => d.isDirectory() && !d.name.startsWith("."))
|
|
201
|
-
.map((d) => d.name);
|
|
202
|
-
if (dirs.length === 0) fail("argv", { reason: `no accounts found under ${accountsDir}` });
|
|
203
|
-
if (dirs.length > 1) {
|
|
204
|
-
fail("argv", {
|
|
205
|
-
reason: `multiple accounts under ${accountsDir} (${dirs.join(",")}); pass --account-id explicitly`,
|
|
206
|
-
});
|
|
207
|
-
}
|
|
208
|
-
return dirs[0];
|
|
209
|
-
}
|
|
210
|
-
|
|
211
|
-
// ---------------------------------------------------------------------------
|
|
212
|
-
// 6. Bind canonical sender set (Task 887 §A0 preserved).
|
|
213
|
-
//
|
|
214
|
-
// All distinct parsed senderNames must resolve to {owner, participants...}.
|
|
215
|
-
// Any miss LOUD-FAILs with `parser-miss reason="senderName=<...>"`.
|
|
216
|
-
// ---------------------------------------------------------------------------
|
|
217
|
-
const CANONICAL_FETCH_CYPHER = `
|
|
218
|
-
UNWIND $ids AS id
|
|
219
|
-
MATCH (n) WHERE elementId(n) = id
|
|
220
|
-
RETURN elementId(n) AS elemId,
|
|
221
|
-
labels(n) AS labels,
|
|
222
|
-
coalesce(n.name, '') AS name,
|
|
223
|
-
coalesce(n.givenName, '') AS givenName,
|
|
224
|
-
coalesce(n.familyName, '') AS familyName,
|
|
225
|
-
coalesce(n.accountId, '') AS accountId
|
|
226
|
-
`;
|
|
227
|
-
|
|
228
|
-
class IngestUserFacingError extends Error {
|
|
229
|
-
constructor(message) {
|
|
230
|
-
super(message);
|
|
231
|
-
this.name = "IngestUserFacingError";
|
|
232
|
-
this.userFacing = true;
|
|
233
|
-
}
|
|
234
|
-
}
|
|
235
|
-
|
|
236
|
-
async function bindCanonicalSenders({
|
|
237
|
-
session,
|
|
238
|
-
accountId,
|
|
239
|
-
ownerElementId,
|
|
240
|
-
participantElementIds,
|
|
241
|
-
senderNames,
|
|
242
|
-
normaliseSenderName,
|
|
243
|
-
}) {
|
|
244
|
-
const allIds = [ownerElementId, ...participantElementIds];
|
|
245
|
-
const distinctIds = Array.from(new Set(allIds));
|
|
246
|
-
if (distinctIds.length !== allIds.length) {
|
|
247
|
-
throw new IngestUserFacingError(
|
|
248
|
-
`participant id list contains duplicates (owner appears in --participant-person-ids?)`,
|
|
249
|
-
);
|
|
250
|
-
}
|
|
251
|
-
const res = await session.executeRead(async (tx) => tx.run(CANONICAL_FETCH_CYPHER, { ids: distinctIds }));
|
|
252
|
-
const seenIds = new Set();
|
|
253
|
-
const labelByElemId = new Map();
|
|
254
|
-
const index = new Map();
|
|
255
|
-
for (const r of res.records) {
|
|
256
|
-
const elemId = r.get("elemId");
|
|
257
|
-
const labels = r.get("labels") || [];
|
|
258
|
-
const acct = r.get("accountId") || "";
|
|
259
|
-
if (!acct) {
|
|
260
|
-
throw new IngestUserFacingError(`node ${elemId} has no accountId — corrupt canonical Person/AdminUser`);
|
|
261
|
-
}
|
|
262
|
-
if (acct !== accountId) {
|
|
263
|
-
throw new IngestUserFacingError(`node ${elemId} belongs to account ${acct}, not ${accountId}`);
|
|
264
|
-
}
|
|
265
|
-
if (!labels.includes("Person") && !labels.includes("AdminUser")) {
|
|
266
|
-
throw new IngestUserFacingError(`node ${elemId} has labels [${labels.join(",")}]; expected :Person or :AdminUser`);
|
|
267
|
-
}
|
|
268
|
-
seenIds.add(elemId);
|
|
269
|
-
labelByElemId.set(elemId, labels.includes("Person") ? "Person" : "AdminUser");
|
|
270
|
-
const candidates = [];
|
|
271
|
-
const name = r.get("name") || "";
|
|
272
|
-
const given = r.get("givenName") || "";
|
|
273
|
-
const family = r.get("familyName") || "";
|
|
274
|
-
if (name) candidates.push(name);
|
|
275
|
-
if (given && family) candidates.push(`${given} ${family}`);
|
|
276
|
-
if (given) candidates.push(given);
|
|
277
|
-
if (family) candidates.push(family);
|
|
278
|
-
for (const c of candidates) {
|
|
279
|
-
const norm = normaliseSenderName(c);
|
|
280
|
-
if (!norm) continue;
|
|
281
|
-
if (!index.has(norm)) index.set(norm, elemId);
|
|
282
|
-
}
|
|
283
|
-
}
|
|
284
|
-
for (const id of distinctIds) {
|
|
285
|
-
if (!seenIds.has(id)) {
|
|
286
|
-
throw new IngestUserFacingError(`elementId ${id} not found in graph`);
|
|
287
|
-
}
|
|
288
|
-
}
|
|
289
|
-
// Validate every distinct parsed senderName against the closed candidate set.
|
|
290
|
-
for (const senderName of senderNames) {
|
|
291
|
-
const norm = normaliseSenderName(senderName);
|
|
292
|
-
const hit = index.get(norm);
|
|
293
|
-
if (!hit) {
|
|
294
|
-
throw new IngestUserFacingError(
|
|
295
|
-
`parser-miss reason="senderName=${senderName} not in confirmed participant set (${distinctIds.length} confirmed elementIds; re-run with the missing :Person elementId in --participant-person-ids)"`,
|
|
296
|
-
);
|
|
297
|
-
}
|
|
298
|
-
}
|
|
299
|
-
return { participantsResolved: seenIds.size };
|
|
300
|
-
}
|
|
301
|
-
|
|
302
|
-
// ---------------------------------------------------------------------------
|
|
303
|
-
// 7. SHA-256 of file bytes (for archiveSha256)
|
|
304
|
-
// ---------------------------------------------------------------------------
|
|
305
|
-
async function fileSha256(filePath) {
|
|
306
|
-
return new Promise((resolveProm, rejectProm) => {
|
|
307
|
-
const hash = createHash("sha256");
|
|
308
|
-
const stream = createReadStream(filePath);
|
|
309
|
-
stream.on("data", (chunk) => hash.update(chunk));
|
|
310
|
-
stream.on("end", () => resolveProm(hash.digest("hex")));
|
|
311
|
-
stream.on("error", rejectProm);
|
|
312
|
-
});
|
|
313
|
-
}
|
|
314
|
-
|
|
315
|
-
// ---------------------------------------------------------------------------
|
|
316
|
-
// 8. Build natural-edge map for chat-mode classifier.
|
|
317
|
-
// Chat mode doesn't use it (the chat prompt drops edge proposals), but
|
|
318
|
-
// classifyDocument's signature still takes the param. Pass the empty string.
|
|
319
|
-
// ---------------------------------------------------------------------------
|
|
320
|
-
|
|
321
|
-
// ---------------------------------------------------------------------------
|
|
322
|
-
// 9. Main
|
|
323
|
-
// ---------------------------------------------------------------------------
|
|
324
|
-
async function main() {
|
|
325
|
-
const startedMs = Date.now();
|
|
326
|
-
const { archive, flags } = parseArgv(process.argv);
|
|
327
|
-
const ownerElementId = flags.ownerElementId;
|
|
328
|
-
const participantElementIds = flags.participantPersonIds
|
|
329
|
-
.split(",")
|
|
330
|
-
.map((s) => s.trim())
|
|
331
|
-
.filter((s) => s.length > 0);
|
|
332
|
-
if (participantElementIds.length === 0) {
|
|
333
|
-
fail("argv", { reason: "--participant-person-ids must list at least one elementId" });
|
|
334
|
-
}
|
|
335
|
-
const scope = flags.scope;
|
|
336
|
-
const accountId = resolveAccountId(flags);
|
|
337
|
-
const timezone = flags.timezone || "Europe/London";
|
|
338
|
-
const dateFormat = flags.dateFormat;
|
|
339
|
-
const sessionGapHours = flags.sessionGapHours
|
|
340
|
-
? parseFloat(flags.sessionGapHours)
|
|
341
|
-
: 12;
|
|
342
|
-
if (!Number.isFinite(sessionGapHours) || sessionGapHours <= 0) {
|
|
343
|
-
fail("argv", { reason: `invalid --session-gap-hours "${flags.sessionGapHours}" (must be positive number)` });
|
|
344
|
-
}
|
|
345
|
-
const sessionId =
|
|
346
|
-
flags.sessionId || `whatsapp-import:${Date.now()}:${Math.random().toString(36).slice(2, 10)}`;
|
|
347
|
-
|
|
348
|
-
// Imports — fail loudly if any compiled dist missing
|
|
349
|
-
let parseExport, sessionize, toClassifierInput, findDeltaCursor;
|
|
350
|
-
let normaliseSenderName, deriveConversationIdentity, deriveMessageContentHash;
|
|
351
|
-
let getSession, classifyDocument, memoryIngest;
|
|
352
|
-
try {
|
|
353
|
-
({
|
|
354
|
-
parseExport,
|
|
355
|
-
sessionize,
|
|
356
|
-
toClassifierInput,
|
|
357
|
-
findDeltaCursor,
|
|
358
|
-
normaliseSenderName,
|
|
359
|
-
deriveConversationIdentity,
|
|
360
|
-
deriveMessageContentHash,
|
|
361
|
-
} = await import(PARSE_EXPORT_PATH));
|
|
362
|
-
({ getSession } = await import(NEO4J_LIB_PATH));
|
|
363
|
-
({ classifyDocument } = await import(LLM_CLASSIFIER_PATH));
|
|
364
|
-
({ memoryIngest } = await import(MEMORY_INGEST_PATH));
|
|
365
|
-
} catch (err) {
|
|
366
|
-
fail("import", {
|
|
367
|
-
reason: "failed to import compiled dist",
|
|
368
|
-
detail: err instanceof Error ? err.message : String(err),
|
|
369
|
-
});
|
|
370
|
-
}
|
|
371
|
-
|
|
372
|
-
// 9a. Resolve _chat.txt + sha256
|
|
373
|
-
const { chatTxt, cleanup } = resolveChatTxt(archive);
|
|
374
|
-
const archiveSha256 = await fileSha256(chatTxt);
|
|
375
|
-
const archiveSourceFile = basename(chatTxt);
|
|
376
|
-
log(
|
|
377
|
-
`start file=${archiveSourceFile} owner=${ownerElementId} participants=${participantElementIds.length} scope=${scope} accountId=${accountId} archiveSha256=${archiveSha256.slice(0, 12)} session-gap-hours=${sessionGapHours}`,
|
|
378
|
-
);
|
|
379
|
-
|
|
380
|
-
// 9b. Parse
|
|
381
|
-
let parseResult;
|
|
382
|
-
try {
|
|
383
|
-
parseResult = parseExport({ filePath: chatTxt, accountId, timezone, dateFormat });
|
|
384
|
-
} catch (err) {
|
|
385
|
-
cleanup();
|
|
386
|
-
fail("parse", { reason: err instanceof Error ? err.message : String(err) });
|
|
387
|
-
}
|
|
388
|
-
const allLines = parseResult.parsedLines;
|
|
389
|
-
log(
|
|
390
|
-
`parsed lines=${parseResult.counters.parsed} media-skipped=${parseResult.counters.mediaSkipped} system-skipped=${parseResult.counters.systemSkipped}`,
|
|
391
|
-
);
|
|
392
|
-
if (allLines.length === 0) {
|
|
393
|
-
cleanup();
|
|
394
|
-
fail("parse", { reason: "zero parsed lines after walking archive" });
|
|
395
|
-
}
|
|
396
|
-
|
|
397
|
-
// 9c. Bind canonical senders against the confirmed set
|
|
398
|
-
const distinctSenderNames = Array.from(new Set(allLines.map((l) => l.senderName)));
|
|
399
|
-
const senderHistogram = computeSenderHistogram(allLines);
|
|
400
|
-
let session = getSession();
|
|
401
|
-
try {
|
|
402
|
-
await bindCanonicalSenders({
|
|
403
|
-
session,
|
|
404
|
-
accountId,
|
|
405
|
-
ownerElementId,
|
|
406
|
-
participantElementIds,
|
|
407
|
-
senderNames: distinctSenderNames,
|
|
408
|
-
normaliseSenderName,
|
|
409
|
-
});
|
|
410
|
-
} catch (err) {
|
|
411
|
-
await session.close().catch(() => {});
|
|
412
|
-
cleanup();
|
|
413
|
-
if (err && err.userFacing) {
|
|
414
|
-
process.stderr.write(`[whatsapp-import] FAIL ${err.message}\n`);
|
|
415
|
-
process.exit(1);
|
|
416
|
-
}
|
|
417
|
-
fail("argv", { reason: err instanceof Error ? err.message : String(err) });
|
|
418
|
-
}
|
|
419
|
-
|
|
420
|
-
// 9d. Derive conversationIdentity and look up prior :ConversationArchive
|
|
421
|
-
const conversationIdentity = deriveConversationIdentity({
|
|
422
|
-
accountId,
|
|
423
|
-
participantElementIds: [ownerElementId, ...participantElementIds],
|
|
424
|
-
});
|
|
425
|
-
let priorArchive = null;
|
|
426
|
-
try {
|
|
427
|
-
const r = await session.run(
|
|
428
|
-
`MATCH (a:ConversationArchive { conversationIdentity: $cid })
|
|
429
|
-
RETURN elementId(a) AS elemId,
|
|
430
|
-
a.lastIngestedMessageHash AS lastHash,
|
|
431
|
-
a.lastIngestedMessageAt AS lastAt LIMIT 1`,
|
|
432
|
-
{ cid: conversationIdentity },
|
|
433
|
-
);
|
|
434
|
-
if (r.records[0]) {
|
|
435
|
-
priorArchive = {
|
|
436
|
-
elemId: r.records[0].get("elemId"),
|
|
437
|
-
lastHash: r.records[0].get("lastHash"),
|
|
438
|
-
lastAt: r.records[0].get("lastAt"),
|
|
439
|
-
};
|
|
440
|
-
}
|
|
441
|
-
} catch (err) {
|
|
442
|
-
await session.close().catch(() => {});
|
|
443
|
-
cleanup();
|
|
444
|
-
fail("delta-cursor-missing", { reason: `conversationArchive lookup failed: ${err instanceof Error ? err.message : String(err)}` });
|
|
445
|
-
}
|
|
446
|
-
await session.close().catch(() => {});
|
|
447
|
-
|
|
448
|
-
// 9e. Compute deltaStart
|
|
449
|
-
let deltaStart = 0;
|
|
450
|
-
let deltaKind = "first-ingest";
|
|
451
|
-
if (priorArchive && priorArchive.lastHash) {
|
|
452
|
-
const cursor = findDeltaCursor(allLines, priorArchive.lastHash);
|
|
453
|
-
if (cursor.kind === "missing") {
|
|
454
|
-
cleanup();
|
|
455
|
-
fail("delta-cursor-missing", {
|
|
456
|
-
reason: `prior cursor not found in re-export (operator deleted prior messages, or this is a different chat archive)`,
|
|
457
|
-
priorArchive: priorArchive.elemId,
|
|
458
|
-
lastIngestedMessageAt: priorArchive.lastAt,
|
|
459
|
-
});
|
|
460
|
-
}
|
|
461
|
-
if (cursor.kind === "empty") {
|
|
462
|
-
log(`noop reason="no new messages since ${priorArchive.lastAt}"`);
|
|
463
|
-
cleanup();
|
|
464
|
-
const totalMs = Date.now() - startedMs;
|
|
465
|
-
process.stdout.write(JSON.stringify({
|
|
466
|
-
archiveElementId: priorArchive.elemId,
|
|
467
|
-
conversationIdentity,
|
|
468
|
-
archiveSha256,
|
|
469
|
-
archiveSourceFile,
|
|
470
|
-
parsed: parseResult.counters.parsed,
|
|
471
|
-
mediaSkipped: parseResult.counters.mediaSkipped,
|
|
472
|
-
systemSkipped: parseResult.counters.systemSkipped,
|
|
473
|
-
delta: { kind: "empty-delta", deltaStart: allLines.length, deltaMessages: 0 },
|
|
474
|
-
sessions: 0,
|
|
475
|
-
chunks: 0,
|
|
476
|
-
nextEdgesCreated: 0,
|
|
477
|
-
participantsLinked: 0,
|
|
478
|
-
dateRange: { first: allLines[0].dateSent, last: allLines[allLines.length - 1].dateSent },
|
|
479
|
-
senderHistogram,
|
|
480
|
-
topicKeywords: [],
|
|
481
|
-
ms: totalMs,
|
|
482
|
-
priorLastIngestedMessageAt: priorArchive.lastAt,
|
|
483
|
-
}) + "\n");
|
|
484
|
-
process.exit(0);
|
|
485
|
-
}
|
|
486
|
-
deltaStart = cursor.deltaStart;
|
|
487
|
-
deltaKind = "delta";
|
|
488
|
-
}
|
|
489
|
-
const deltaLines = allLines.slice(deltaStart);
|
|
490
|
-
log(
|
|
491
|
-
`delta cursor=${priorArchive ? priorArchive.lastHash.slice(0, 12) : "(first-ingest)"} cursor-line=${deltaStart} delta-messages=${deltaLines.length}`,
|
|
492
|
-
);
|
|
493
|
-
|
|
494
|
-
// 9f. Sessionize delta
|
|
495
|
-
const sessions = sessionize(deltaLines, sessionGapHours);
|
|
496
|
-
log(
|
|
497
|
-
`sessionize file=${archiveSourceFile} archiveSha256=${archiveSha256.slice(0, 12)} messages=${deltaLines.length} sessions=${sessions.length} gap-hours=${sessionGapHours}`,
|
|
498
|
-
);
|
|
499
|
-
|
|
500
|
-
// 9g. Classify each session via Haiku (mode='chat')
|
|
501
|
-
const allChunks = [];
|
|
502
|
-
const allKeywords = new Set();
|
|
503
|
-
for (const s of sessions) {
|
|
504
|
-
const sessionStart = Date.now();
|
|
505
|
-
const text = toClassifierInput(s);
|
|
506
|
-
const result = await classifyDocument({
|
|
507
|
-
accountId,
|
|
508
|
-
mode: "chat",
|
|
509
|
-
anchorDescription: `WhatsApp conversation between ${[ownerElementId, ...participantElementIds].length} participants (session ${s.index + 1} of ${sessions.length})`,
|
|
510
|
-
ontologyLabels: new Set([]),
|
|
511
|
-
naturalEdgeMap: "",
|
|
512
|
-
documentText: text,
|
|
513
|
-
});
|
|
514
|
-
if (result.kind === "fallback") {
|
|
515
|
-
cleanup();
|
|
516
|
-
fail("classify", { reason: `Haiku fallback on session ${s.index}: ${result.reason}` });
|
|
517
|
-
}
|
|
518
|
-
const chunkCount = result.output.sections.length;
|
|
519
|
-
log(
|
|
520
|
-
`classify-session sessionIndex=${s.index + 1}/${sessions.length} messages=${s.messages.length} chars=${text.length} chunks=${chunkCount} ms=${Date.now() - sessionStart}`,
|
|
521
|
-
);
|
|
522
|
-
if (chunkCount === 0 && s.messages.length > 0) {
|
|
523
|
-
cleanup();
|
|
524
|
-
fail("classify", {
|
|
525
|
-
reason: `session ${s.index} of ${s.messages.length} messages produced zero chunks (classifier-prompt regression)`,
|
|
526
|
-
});
|
|
527
|
-
}
|
|
528
|
-
for (const sec of result.output.sections) {
|
|
529
|
-
allChunks.push(sec);
|
|
530
|
-
}
|
|
531
|
-
for (const kw of result.output.documentKeywords) {
|
|
532
|
-
allKeywords.add(kw);
|
|
533
|
-
}
|
|
534
|
-
}
|
|
535
|
-
|
|
536
|
-
// 9h. Compute lastIngestedMessageHash from the last delta line
|
|
537
|
-
const lastLine = deltaLines[deltaLines.length - 1];
|
|
538
|
-
const lastIngestedMessageHash = deriveMessageContentHash({
|
|
539
|
-
dateSent: lastLine.dateSent,
|
|
540
|
-
senderName: lastLine.senderName,
|
|
541
|
-
body: lastLine.body,
|
|
542
|
-
});
|
|
543
|
-
const lastIngestedMessageAt = lastLine.dateSent;
|
|
544
|
-
|
|
545
|
-
// 9i. Aggregate document-level summary across sessions (concatenate first
|
|
546
|
-
// two-three classifier summaries; the parent's summary is informational)
|
|
547
|
-
const documentSummary = sessions.length === 1
|
|
548
|
-
? `${deltaLines.length} messages in 1 session, ${allChunks.length} chunks.`
|
|
549
|
-
: `${deltaLines.length} messages in ${sessions.length} sessions, ${allChunks.length} chunks.`;
|
|
550
|
-
|
|
551
|
-
// 9j. Call memoryIngest with parentLabel='ConversationArchive'
|
|
552
|
-
let ingestResult;
|
|
553
|
-
const ingestStart = Date.now();
|
|
554
|
-
try {
|
|
555
|
-
ingestResult = await memoryIngest({
|
|
556
|
-
accountId,
|
|
557
|
-
attachmentId: conversationIdentity, // semantic rename per parentLabel
|
|
558
|
-
parentLabel: "ConversationArchive",
|
|
559
|
-
documentSummary,
|
|
560
|
-
anchorNodeId: ownerElementId,
|
|
561
|
-
anchorLabel: "AdminUser", // unused in chat path but required by IngestParams
|
|
562
|
-
sections: allChunks,
|
|
563
|
-
scope,
|
|
564
|
-
sessionId,
|
|
565
|
-
documentKeywords: Array.from(allKeywords),
|
|
566
|
-
archiveSha256,
|
|
567
|
-
archiveSourceFile,
|
|
568
|
-
lastIngestedMessageHash,
|
|
569
|
-
lastIngestedMessageAt,
|
|
570
|
-
participantElementIds: [ownerElementId, ...participantElementIds],
|
|
571
|
-
});
|
|
572
|
-
} catch (err) {
|
|
573
|
-
cleanup();
|
|
574
|
-
fail("memory-ingest", { reason: err instanceof Error ? err.message : String(err) });
|
|
575
|
-
}
|
|
576
|
-
log(
|
|
577
|
-
`file=${archiveSourceFile} conversationIdentity=${conversationIdentity.slice(0, 12)} archiveElementId=${ingestResult.documentNodeId} chunks-written=${ingestResult.sectionCount} next-edges=${ingestResult.edgeBreakdown.NEXT ?? 0} participants=${ingestResult.edgeBreakdown.PARTICIPANT_IN ?? 0} ms=${Date.now() - ingestStart}`,
|
|
578
|
-
);
|
|
579
|
-
|
|
580
|
-
cleanup();
|
|
581
|
-
const totalMs = Date.now() - startedMs;
|
|
582
|
-
log(`done conversationIdentity=${conversationIdentity.slice(0, 12)} total-ms=${totalMs} exit=0`);
|
|
583
|
-
|
|
584
|
-
process.stdout.write(JSON.stringify({
|
|
585
|
-
archiveElementId: ingestResult.documentNodeId,
|
|
586
|
-
conversationIdentity,
|
|
587
|
-
archiveSha256,
|
|
588
|
-
archiveSourceFile,
|
|
589
|
-
parsed: parseResult.counters.parsed,
|
|
590
|
-
mediaSkipped: parseResult.counters.mediaSkipped,
|
|
591
|
-
systemSkipped: parseResult.counters.systemSkipped,
|
|
592
|
-
delta: { kind: deltaKind, deltaStart, deltaMessages: deltaLines.length },
|
|
593
|
-
sessions: sessions.length,
|
|
594
|
-
chunks: ingestResult.sectionCount,
|
|
595
|
-
nextEdgesCreated: ingestResult.edgeBreakdown.NEXT ?? 0,
|
|
596
|
-
participantsLinked: ingestResult.edgeBreakdown.PARTICIPANT_IN ?? 0,
|
|
597
|
-
dateRange: { first: allLines[0].dateSent, last: allLines[allLines.length - 1].dateSent },
|
|
598
|
-
senderHistogram,
|
|
599
|
-
topicKeywords: Array.from(allKeywords),
|
|
600
|
-
ms: totalMs,
|
|
601
|
-
}) + "\n");
|
|
602
|
-
process.exit(0);
|
|
603
|
-
}
|
|
604
|
-
|
|
605
|
-
function computeSenderHistogram(lines) {
|
|
606
|
-
const counts = new Map();
|
|
607
|
-
for (const l of lines) {
|
|
608
|
-
counts.set(l.senderName, (counts.get(l.senderName) ?? 0) + 1);
|
|
609
|
-
}
|
|
610
|
-
return Array.from(counts.entries())
|
|
611
|
-
.map(([name, count]) => ({ name, count }))
|
|
612
|
-
.sort((a, b) => b.count - a.count);
|
|
613
|
-
}
|
|
614
|
-
|
|
615
|
-
main().catch((err) => {
|
|
616
|
-
fail("uncaught", { reason: err instanceof Error ? err.message : String(err) });
|
|
617
|
-
});
|