@rubytech/create-maxy 1.0.831 → 1.0.832
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/package.json +1 -1
- package/payload/platform/neo4j/migrations/004-project-admin-agent.ts +36 -3
- package/payload/platform/plugins/memory/bin/conversation-archive-ingest.mjs +253 -110
- package/payload/platform/plugins/memory/bin/conversation-archive-ingest.sh +41 -14
- package/payload/platform/plugins/memory/mcp/dist/lib/uuid.d.ts +3 -0
- package/payload/platform/plugins/memory/mcp/dist/lib/uuid.d.ts.map +1 -0
- package/payload/platform/plugins/memory/mcp/dist/lib/uuid.js +12 -0
- package/payload/platform/plugins/memory/mcp/dist/lib/uuid.js.map +1 -0
- package/payload/platform/plugins/memory/mcp/dist/tools/memory-ingest.d.ts +11 -0
- package/payload/platform/plugins/memory/mcp/dist/tools/memory-ingest.d.ts.map +1 -1
- package/payload/platform/plugins/memory/mcp/dist/tools/memory-ingest.js +17 -9
- package/payload/platform/plugins/memory/mcp/dist/tools/memory-ingest.js.map +1 -1
- package/payload/platform/plugins/memory/skills/conversation-archive/SKILL.md +41 -9
- package/payload/platform/scripts/lib/resolve-account-dir.sh +19 -1
- package/payload/server/chunk-25QDCOE5.js +1116 -0
- package/payload/server/chunk-35YZS3KL.js +328 -0
- package/payload/server/chunk-7CBRZKZS.js +654 -0
- package/payload/server/chunk-IXOPV36P.js +2305 -0
- package/payload/server/chunk-ZKGAYLAK.js +10097 -0
- package/payload/server/client-pool-NBVGONQL.js +32 -0
- package/payload/server/cloudflare-task-tracker-R4FIORFL.js +19 -0
- package/payload/server/maxy-edge.js +4 -4
- package/payload/server/neo4j-migrations-3A6K2EB5.js +552 -0
- package/payload/server/server.js +19 -7
package/package.json
CHANGED
|
@@ -46,11 +46,45 @@
|
|
|
46
46
|
* final totals line. Both entry points emit identical log lines.
|
|
47
47
|
*/
|
|
48
48
|
|
|
49
|
-
import { existsSync, readdirSync } from "node:fs";
|
|
49
|
+
import { existsSync, readFileSync, readdirSync } from "node:fs";
|
|
50
50
|
import { resolve } from "node:path";
|
|
51
51
|
import { projectAgent, getSession } from "../../ui/app/lib/neo4j-store";
|
|
52
52
|
import { ACCOUNTS_DIR } from "../../ui/app/lib/claude-agent/account";
|
|
53
53
|
|
|
54
|
+
/**
|
|
55
|
+
* Account-json filter (Task 900 sub-scope F) — admits only directories whose
|
|
56
|
+
* `account.json` parses. Stub directories (e.g. `0dbf29ef-…/logs/` left
|
|
57
|
+
* behind after install 1's `account.json` was lost) are SKIPPED with a
|
|
58
|
+
* one-line `[platform] accounts-state STUB-DIR id=<dir>` log. Mirrors
|
|
59
|
+
* `listValidAccounts()` in `account.ts`; copied here because this migration
|
|
60
|
+
* runs at boot from a separate node_modules-resolution context (see the
|
|
61
|
+
* structural-typing rationale at the top of `004-prune-alien-accounts.ts`).
|
|
62
|
+
*/
|
|
63
|
+
function readValidAccountDirs(accountsDir: string): string[] {
|
|
64
|
+
const valid: string[] = [];
|
|
65
|
+
const entries = readdirSync(accountsDir, { withFileTypes: true })
|
|
66
|
+
.filter((e) => e.isDirectory())
|
|
67
|
+
.filter((e) => !e.name.startsWith("."));
|
|
68
|
+
for (const e of entries) {
|
|
69
|
+
const configPath = resolve(accountsDir, e.name, "account.json");
|
|
70
|
+
if (!existsSync(configPath)) {
|
|
71
|
+
console.error(
|
|
72
|
+
`[platform] accounts-state STUB-DIR id=${e.name} — onboarding leak; remove or repair`,
|
|
73
|
+
);
|
|
74
|
+
continue;
|
|
75
|
+
}
|
|
76
|
+
try {
|
|
77
|
+
JSON.parse(readFileSync(configPath, "utf-8"));
|
|
78
|
+
valid.push(e.name);
|
|
79
|
+
} catch {
|
|
80
|
+
console.error(
|
|
81
|
+
`[platform] accounts-state CORRUPT-JSON id=${e.name} — account.json failed to parse`,
|
|
82
|
+
);
|
|
83
|
+
}
|
|
84
|
+
}
|
|
85
|
+
return valid;
|
|
86
|
+
}
|
|
87
|
+
|
|
54
88
|
/**
|
|
55
89
|
* Structural alias for the `Driver` instance the runner passes in. Same
|
|
56
90
|
* rationale as `004-prune-alien-accounts.ts`: this file lives outside
|
|
@@ -209,8 +243,7 @@ export async function applyAdminAgentBackfill(
|
|
|
209
243
|
return;
|
|
210
244
|
}
|
|
211
245
|
|
|
212
|
-
const accountEntries =
|
|
213
|
-
.filter((e) => e.isDirectory());
|
|
246
|
+
const accountEntries = readValidAccountDirs(accountsDir).map((name) => ({ name }));
|
|
214
247
|
|
|
215
248
|
console.error(
|
|
216
249
|
`[admin-agent-graph-backfill] start accounts=${accountEntries.length}`,
|
|
@@ -1,7 +1,9 @@
|
|
|
1
1
|
#!/usr/bin/env node
|
|
2
2
|
// =============================================================================
|
|
3
3
|
// conversation-archive-ingest.mjs — in-process orchestrator for
|
|
4
|
-
// conversation-archive-ingest.sh (Task 894 — supersedes whatsapp-import bin
|
|
4
|
+
// conversation-archive-ingest.sh (Task 894 — supersedes whatsapp-import bin;
|
|
5
|
+
// Task 900 — env-only identity, fsync-per-line progress sink, per-session
|
|
6
|
+
// checkpointing).
|
|
5
7
|
//
|
|
6
8
|
// Source-agnostic. The same pipeline runs for every conversation source;
|
|
7
9
|
// `--source <enum>` selects the normaliser at the top of the pipeline:
|
|
@@ -9,26 +11,44 @@
|
|
|
9
11
|
// normalise → bind canonical senders → derive conversationIdentity
|
|
10
12
|
// → look up prior :ConversationArchive (delta cursor)
|
|
11
13
|
// → sessionize delta at gap-hours boundaries
|
|
12
|
-
// → for each session: classify (mode='chat') →
|
|
13
|
-
//
|
|
14
|
+
// → for each session: classify (mode='chat') → memoryIngest immediately
|
|
15
|
+
// → advance lastIngestedMessageHash + lastIngestedMessageAt
|
|
16
|
+
//
|
|
17
|
+
// Identity (Task 900): `process.env.ACCOUNT_ID` and `process.env.USER_ID` are
|
|
18
|
+
// the only inputs. Both are plumbed by `spawn-env.ts` into every Bash
|
|
19
|
+
// subprocess; missing/malformed → LOUD-FAIL at phase=argv. `--account-id` and
|
|
20
|
+
// `--owner-element-id` flags are GONE; the writer derives the AdminUser
|
|
21
|
+
// elementId from (accountId, userId) one Cypher hop away.
|
|
14
22
|
//
|
|
15
23
|
// Argv (positional): <archive-path>
|
|
16
|
-
// Argv (flags): --source <
|
|
17
|
-
// --owner-element-id <id>
|
|
24
|
+
// Argv (flags): --source <enum>
|
|
18
25
|
// --participant-person-ids <csv>
|
|
19
26
|
// --scope <admin|public>
|
|
20
27
|
// [--session-gap-hours <N>] (default 12)
|
|
21
|
-
// [--account-id <accountId>]
|
|
22
28
|
// [--timezone <iana>]
|
|
23
29
|
// [--date-format <DD/MM/YY|MM/DD/YY|DD/MM/YYYY|MM/DD/YYYY>]
|
|
24
30
|
// [--session-id <id>]
|
|
25
31
|
//
|
|
26
32
|
// Stdout (success): one JSON line — counters the skill needs to formulate
|
|
27
33
|
// the three operator-facing messages. See SKILL.md for the shape.
|
|
28
|
-
// Stderr:
|
|
34
|
+
// Stderr + progress file: `[conversation-archive] ...` lines, fsync per write.
|
|
35
|
+
// Progress path: data/accounts/<accountId>/logs/conversation-archive-<sessionId>.log
|
|
36
|
+
// Wrapper sh prints `[conversation-archive] progress-file=<absolute-path>` on
|
|
37
|
+
// stdout before exec node, so the agent computes nothing.
|
|
29
38
|
// =============================================================================
|
|
30
39
|
|
|
31
|
-
import {
|
|
40
|
+
import {
|
|
41
|
+
closeSync,
|
|
42
|
+
existsSync,
|
|
43
|
+
fsyncSync,
|
|
44
|
+
mkdirSync,
|
|
45
|
+
mkdtempSync,
|
|
46
|
+
openSync,
|
|
47
|
+
readdirSync,
|
|
48
|
+
rmSync,
|
|
49
|
+
statSync,
|
|
50
|
+
writeSync,
|
|
51
|
+
} from "node:fs";
|
|
32
52
|
import { join, resolve, dirname } from "node:path";
|
|
33
53
|
import { tmpdir } from "node:os";
|
|
34
54
|
import { spawnSync } from "node:child_process";
|
|
@@ -89,13 +109,60 @@ const MEMORY_INGEST_PATH = resolve(
|
|
|
89
109
|
"tools",
|
|
90
110
|
"memory-ingest.js",
|
|
91
111
|
);
|
|
112
|
+
const UUID_LIB_PATH = resolve(
|
|
113
|
+
platformRoot,
|
|
114
|
+
"plugins",
|
|
115
|
+
"memory",
|
|
116
|
+
"mcp",
|
|
117
|
+
"dist",
|
|
118
|
+
"lib",
|
|
119
|
+
"uuid.js",
|
|
120
|
+
);
|
|
92
121
|
|
|
93
122
|
// ---------------------------------------------------------------------------
|
|
94
|
-
// 2.
|
|
123
|
+
// 2. Progress sink + logger fan-out (Task 900 sub-scope A).
|
|
124
|
+
// `log()` and `fail()` write to stderr AND the progress file with fsync per
|
|
125
|
+
// write. Parent Bash returns nothing until child exits; the progress file
|
|
126
|
+
// is the only surface the agent's heartbeat poll can read mid-run.
|
|
95
127
|
// ---------------------------------------------------------------------------
|
|
128
|
+
let progressFd = -1;
|
|
129
|
+
const startedMs = Date.now();
|
|
130
|
+
|
|
131
|
+
function openProgressSink(absolutePath) {
|
|
132
|
+
mkdirSync(dirname(absolutePath), { recursive: true });
|
|
133
|
+
progressFd = openSync(absolutePath, "a");
|
|
134
|
+
}
|
|
135
|
+
|
|
136
|
+
function writeProgressLine(line) {
|
|
137
|
+
// stderr first — preserves existing observability for parents that read
|
|
138
|
+
// stderr after the child exits, AND makes the line visible via the
|
|
139
|
+
// mcp-spawn-tee'd device server.log without a separate sink.
|
|
140
|
+
process.stderr.write(line);
|
|
141
|
+
if (progressFd >= 0) {
|
|
142
|
+
try {
|
|
143
|
+
writeSync(progressFd, line);
|
|
144
|
+
fsyncSync(progressFd);
|
|
145
|
+
} catch {
|
|
146
|
+
// Disk pressure / EBADF: don't kill the run — stderr already carries
|
|
147
|
+
// the line.
|
|
148
|
+
}
|
|
149
|
+
}
|
|
150
|
+
}
|
|
151
|
+
|
|
96
152
|
function log(line) {
|
|
97
|
-
|
|
153
|
+
writeProgressLine(`[conversation-archive] ${line}\n`);
|
|
98
154
|
}
|
|
155
|
+
|
|
156
|
+
function endRun(exitCode) {
|
|
157
|
+
const totalMs = Date.now() - startedMs;
|
|
158
|
+
writeProgressLine(`[conversation-archive] end exit-code=${exitCode} ms=${totalMs}\n`);
|
|
159
|
+
if (progressFd >= 0) {
|
|
160
|
+
try { closeSync(progressFd); } catch { /* fd already closed */ }
|
|
161
|
+
progressFd = -1;
|
|
162
|
+
}
|
|
163
|
+
process.exit(exitCode);
|
|
164
|
+
}
|
|
165
|
+
|
|
99
166
|
function fail(phase, fields) {
|
|
100
167
|
const fieldStr = Object.entries(fields)
|
|
101
168
|
.map(([k, v]) =>
|
|
@@ -104,8 +171,8 @@ function fail(phase, fields) {
|
|
|
104
171
|
: `${k}=${v ?? "-"}`,
|
|
105
172
|
)
|
|
106
173
|
.join(" ");
|
|
107
|
-
|
|
108
|
-
|
|
174
|
+
writeProgressLine(`[conversation-archive] FAIL phase=${phase} ${fieldStr}\n`);
|
|
175
|
+
endRun(1);
|
|
109
176
|
}
|
|
110
177
|
|
|
111
178
|
// ---------------------------------------------------------------------------
|
|
@@ -144,7 +211,6 @@ function parseArgv(argv) {
|
|
|
144
211
|
if (!VALID_SOURCES.has(flags.source)) {
|
|
145
212
|
fail("argv", { reason: `invalid --source "${flags.source}" (whatsapp|telegram|signal|linkedin-messages|zoom-transcript|meeting-minutes|imessage|slack|other)` });
|
|
146
213
|
}
|
|
147
|
-
if (!flags.ownerElementId) fail("argv", { reason: "missing --owner-element-id" });
|
|
148
214
|
if (!flags.participantPersonIds) {
|
|
149
215
|
fail("argv", {
|
|
150
216
|
reason: "missing --participant-person-ids (csv of operator-confirmed :Person/:AdminUser elementIds, owner excluded)",
|
|
@@ -216,35 +282,41 @@ function findChatTxt(dir) {
|
|
|
216
282
|
}
|
|
217
283
|
|
|
218
284
|
// ---------------------------------------------------------------------------
|
|
219
|
-
// 5.
|
|
220
|
-
// ---------------------------------------------------------------------------
|
|
221
|
-
function resolveAccountId(flags) {
|
|
222
|
-
if (flags.accountId && flags.accountId.trim()) return flags.accountId.trim();
|
|
223
|
-
const installDir = resolve(platformRoot, "..");
|
|
224
|
-
const accountsDir = join(installDir, "data", "accounts");
|
|
225
|
-
if (!existsSync(accountsDir)) {
|
|
226
|
-
fail("argv", { reason: `accounts dir not found: ${accountsDir}; pass --account-id explicitly` });
|
|
227
|
-
}
|
|
228
|
-
const dirs = readdirSync(accountsDir, { withFileTypes: true })
|
|
229
|
-
.filter((d) => d.isDirectory() && !d.name.startsWith("."))
|
|
230
|
-
.map((d) => d.name);
|
|
231
|
-
if (dirs.length === 0) fail("argv", { reason: `no accounts found under ${accountsDir}` });
|
|
232
|
-
if (dirs.length > 1) {
|
|
233
|
-
fail("argv", {
|
|
234
|
-
reason: `multiple accounts under ${accountsDir} (${dirs.join(",")}); pass --account-id explicitly`,
|
|
235
|
-
});
|
|
236
|
-
}
|
|
237
|
-
return dirs[0];
|
|
238
|
-
}
|
|
239
|
-
|
|
240
|
-
// ---------------------------------------------------------------------------
|
|
241
|
-
// 6. Main
|
|
285
|
+
// 5. Main
|
|
242
286
|
// ---------------------------------------------------------------------------
|
|
243
287
|
async function main() {
|
|
244
|
-
const startedMs = Date.now();
|
|
245
288
|
const { archive, flags } = parseArgv(process.argv);
|
|
246
289
|
const source = flags.source;
|
|
247
|
-
|
|
290
|
+
|
|
291
|
+
// Identity is env-only (Task 900 sub-scope C). spawn-env.ts plumbs both
|
|
292
|
+
// ACCOUNT_ID and USER_ID into every Bash subprocess; the bin LOUD-FAILs if
|
|
293
|
+
// either is absent or malformed. The disk-scan resolver and the
|
|
294
|
+
// --account-id / --owner-element-id flags are gone.
|
|
295
|
+
let UUID_REGEX;
|
|
296
|
+
try {
|
|
297
|
+
({ UUID_REGEX } = await import(UUID_LIB_PATH));
|
|
298
|
+
} catch (err) {
|
|
299
|
+
fail("import", {
|
|
300
|
+
reason: "failed to import compiled uuid lib",
|
|
301
|
+
detail: err instanceof Error ? err.message : String(err),
|
|
302
|
+
});
|
|
303
|
+
}
|
|
304
|
+
|
|
305
|
+
const accountId = process.env.ACCOUNT_ID?.trim();
|
|
306
|
+
if (!accountId) {
|
|
307
|
+
fail("argv", { reason: "ACCOUNT_ID env missing — bin must be invoked under an authenticated agent context" });
|
|
308
|
+
}
|
|
309
|
+
if (!UUID_REGEX.test(accountId)) {
|
|
310
|
+
fail("argv", { reason: `ACCOUNT_ID env malformed (not a UUID): "${accountId}"` });
|
|
311
|
+
}
|
|
312
|
+
const userId = process.env.USER_ID?.trim();
|
|
313
|
+
if (!userId) {
|
|
314
|
+
fail("argv", { reason: "USER_ID env missing — bin must be invoked under an authenticated user session" });
|
|
315
|
+
}
|
|
316
|
+
if (!UUID_REGEX.test(userId)) {
|
|
317
|
+
fail("argv", { reason: `USER_ID env malformed (not a UUID): "${userId}"` });
|
|
318
|
+
}
|
|
319
|
+
|
|
248
320
|
const participantElementIds = flags.participantPersonIds
|
|
249
321
|
.split(",")
|
|
250
322
|
.map((s) => s.trim())
|
|
@@ -253,7 +325,6 @@ async function main() {
|
|
|
253
325
|
fail("argv", { reason: "--participant-person-ids must list at least one elementId" });
|
|
254
326
|
}
|
|
255
327
|
const scope = flags.scope;
|
|
256
|
-
const accountId = resolveAccountId(flags);
|
|
257
328
|
const timezone = flags.timezone || "Europe/London";
|
|
258
329
|
const dateFormat = flags.dateFormat;
|
|
259
330
|
const sessionGapHours = flags.sessionGapHours
|
|
@@ -266,6 +337,20 @@ async function main() {
|
|
|
266
337
|
flags.sessionId ||
|
|
267
338
|
`conversation-archive:${source}:${Date.now()}:${Math.random().toString(36).slice(2, 10)}`;
|
|
268
339
|
|
|
340
|
+
// ---------------------------------------------------------------------------
|
|
341
|
+
// Open progress sink BEFORE any await. Path is deterministic from
|
|
342
|
+
// (accountId, sessionId); the wrapper sh prints the same path on stdout so
|
|
343
|
+
// the agent's heartbeat polls the right file.
|
|
344
|
+
// ---------------------------------------------------------------------------
|
|
345
|
+
const accountsDir = resolve(platformRoot, "..", "data", "accounts");
|
|
346
|
+
const logsDir = resolve(accountsDir, accountId, "logs");
|
|
347
|
+
const progressFilePath = resolve(logsDir, `conversation-archive-${sessionId}.log`);
|
|
348
|
+
openProgressSink(progressFilePath);
|
|
349
|
+
|
|
350
|
+
log(
|
|
351
|
+
`start session=${sessionId} source=${source} archive=${archive} accountId=${accountId} userId=${userId} participants=${participantElementIds.length} scope=${scope} pid=${process.pid}`,
|
|
352
|
+
);
|
|
353
|
+
|
|
269
354
|
// Imports — fail loudly if any compiled dist missing
|
|
270
355
|
let getNormaliser;
|
|
271
356
|
let sessionize, toTurnText, findDeltaCursor;
|
|
@@ -290,6 +375,36 @@ async function main() {
|
|
|
290
375
|
});
|
|
291
376
|
}
|
|
292
377
|
|
|
378
|
+
// Resolve owner AdminUser elementId from (accountId, userId). One Cypher
|
|
379
|
+
// hop replaces the deleted --owner-element-id flag — owner identity is env
|
|
380
|
+
// input, never an out-of-band flag.
|
|
381
|
+
let ownerElementId;
|
|
382
|
+
{
|
|
383
|
+
const lookupSession = getSession();
|
|
384
|
+
try {
|
|
385
|
+
const r = await lookupSession.run(
|
|
386
|
+
`MATCH (au:AdminUser { accountId: $accountId, userId: $userId })
|
|
387
|
+
RETURN elementId(au) AS ownerElementId LIMIT 1`,
|
|
388
|
+
{ accountId, userId },
|
|
389
|
+
);
|
|
390
|
+
const row = r.records[0]?.get("ownerElementId");
|
|
391
|
+
if (typeof row !== "string" || !row) {
|
|
392
|
+
await lookupSession.close().catch(() => {});
|
|
393
|
+
fail("argv", {
|
|
394
|
+
reason: `USER_ID does not resolve to an AdminUser under ACCOUNT_ID (accountId=${accountId.slice(0, 8)}, userId=${userId.slice(0, 8)})`,
|
|
395
|
+
});
|
|
396
|
+
}
|
|
397
|
+
ownerElementId = row;
|
|
398
|
+
} catch (err) {
|
|
399
|
+
await lookupSession.close().catch(() => {});
|
|
400
|
+
fail("argv", {
|
|
401
|
+
reason: `AdminUser lookup failed: ${err instanceof Error ? err.message : String(err)}`,
|
|
402
|
+
});
|
|
403
|
+
}
|
|
404
|
+
await lookupSession.close().catch(() => {});
|
|
405
|
+
}
|
|
406
|
+
log(`owner-resolved ownerElementId=${ownerElementId.slice(0, 12)} userId=${userId.slice(0, 8)}`);
|
|
407
|
+
|
|
293
408
|
// 6a. Resolve source file
|
|
294
409
|
const { sourceFile, cleanup } = resolveSourceFile(archive, source);
|
|
295
410
|
|
|
@@ -342,8 +457,8 @@ async function main() {
|
|
|
342
457
|
await session.close().catch(() => {});
|
|
343
458
|
cleanup();
|
|
344
459
|
if (err && err.userFacing) {
|
|
345
|
-
|
|
346
|
-
|
|
460
|
+
writeProgressLine(`[conversation-archive] FAIL ${err.message}\n`);
|
|
461
|
+
endRun(1);
|
|
347
462
|
}
|
|
348
463
|
fail("argv", { reason: err instanceof Error ? err.message : String(err) });
|
|
349
464
|
}
|
|
@@ -414,7 +529,7 @@ async function main() {
|
|
|
414
529
|
ms: totalMs,
|
|
415
530
|
priorLastIngestedMessageAt: priorArchive.lastAt,
|
|
416
531
|
}) + "\n");
|
|
417
|
-
|
|
532
|
+
endRun(0);
|
|
418
533
|
}
|
|
419
534
|
deltaStart = cursor.deltaStart;
|
|
420
535
|
deltaKind = "delta";
|
|
@@ -430,15 +545,24 @@ async function main() {
|
|
|
430
545
|
`sessionize source=${source} archiveSha256=${archiveSha256.slice(0, 12)} messages=${deltaLines.length} sessions=${sessions.length} gap-hours=${sessionGapHours}`,
|
|
431
546
|
);
|
|
432
547
|
|
|
433
|
-
// 6g.
|
|
434
|
-
//
|
|
435
|
-
//
|
|
436
|
-
//
|
|
437
|
-
//
|
|
438
|
-
|
|
548
|
+
// 6g. Per-session classify + immediate memoryIngest (Task 900 sub-scope E).
|
|
549
|
+
// Each session commits atomically: chunks + cursor advance happen together
|
|
550
|
+
// via memoryIngest. A kill mid-loop leaves a partial archive whose cursor
|
|
551
|
+
// is at session N-1's last message; the next invocation slices from there
|
|
552
|
+
// and re-classifies only session N onward. degrade-on-error from Task 897
|
|
553
|
+
// is preserved: a Haiku error on session N still emits an unenriched chunk
|
|
554
|
+
// and advances the cursor, so resumption never re-attempts the same Haiku
|
|
555
|
+
// error.
|
|
439
556
|
const allKeywords = new Set();
|
|
440
557
|
let sessionsUnenriched = 0;
|
|
441
|
-
|
|
558
|
+
let totalChunksWritten = 0;
|
|
559
|
+
let totalNextEdges = 0;
|
|
560
|
+
let participantsLinked = 0;
|
|
561
|
+
let archiveElementIdLatest = priorArchive ? priorArchive.elemId : "";
|
|
562
|
+
|
|
563
|
+
for (let sIdx = 0; sIdx < sessions.length; sIdx++) {
|
|
564
|
+
const s = sessions[sIdx];
|
|
565
|
+
const isFirstSessionInRun = sIdx === 0;
|
|
442
566
|
const sessionStart = Date.now();
|
|
443
567
|
const text = toTurnText(s);
|
|
444
568
|
const result = await classifyDocument({
|
|
@@ -449,12 +573,15 @@ async function main() {
|
|
|
449
573
|
naturalEdgeMap: "",
|
|
450
574
|
documentText: text,
|
|
451
575
|
});
|
|
576
|
+
|
|
577
|
+
let sessionChunks;
|
|
578
|
+
let sessionKeywords = [];
|
|
452
579
|
if (result.kind === "error") {
|
|
453
580
|
sessionsUnenriched += 1;
|
|
454
581
|
log(`session-unenriched session=${s.index + 1}/${sessions.length} reason="${result.reason.replace(/"/g, '\\"')}"`);
|
|
455
582
|
// Emit one raw :Section:Conversation chunk covering the whole session.
|
|
456
583
|
// No summary, no topicKeywords — a follow-up pass can re-enrich.
|
|
457
|
-
|
|
584
|
+
sessionChunks = [{
|
|
458
585
|
kind: "Conversation",
|
|
459
586
|
title: `Unenriched session ${s.index + 1}`,
|
|
460
587
|
body: text,
|
|
@@ -469,75 +596,91 @@ async function main() {
|
|
|
469
596
|
messageCount: s.messages.length,
|
|
470
597
|
},
|
|
471
598
|
anchorEdge: null,
|
|
472
|
-
}
|
|
473
|
-
|
|
599
|
+
}];
|
|
600
|
+
} else {
|
|
601
|
+
const chunkCount = result.output.sections.length;
|
|
602
|
+
log(
|
|
603
|
+
`classify-session sessionIndex=${s.index + 1}/${sessions.length} messages=${s.messages.length} chars=${text.length} chunks=${chunkCount} ms=${Date.now() - sessionStart}`,
|
|
604
|
+
);
|
|
605
|
+
if (chunkCount === 0 && s.messages.length > 0) {
|
|
606
|
+
cleanup();
|
|
607
|
+
fail("classify", {
|
|
608
|
+
reason: `session ${s.index} of ${s.messages.length} messages produced zero chunks (classifier-prompt regression)`,
|
|
609
|
+
});
|
|
610
|
+
}
|
|
611
|
+
sessionChunks = result.output.sections;
|
|
612
|
+
sessionKeywords = result.output.documentKeywords ?? [];
|
|
613
|
+
for (const kw of sessionKeywords) allKeywords.add(kw);
|
|
474
614
|
}
|
|
475
|
-
|
|
476
|
-
|
|
477
|
-
|
|
478
|
-
|
|
479
|
-
|
|
615
|
+
|
|
616
|
+
// Per-session cursor: advance to THIS session's last message. Atomic with
|
|
617
|
+
// chunk writes inside memoryIngest's MERGE/CREATE Cypher transaction.
|
|
618
|
+
const lastSessionLine = s.messages[s.messages.length - 1];
|
|
619
|
+
const sessionLastHash = deriveMessageContentHash({
|
|
620
|
+
dateSent: lastSessionLine.dateSent,
|
|
621
|
+
body: lastSessionLine.body,
|
|
622
|
+
});
|
|
623
|
+
const sessionLastAt = lastSessionLine.dateSent;
|
|
624
|
+
const sessionDocumentSummary = `Session ${s.index + 1}/${sessions.length}: ${s.messages.length} messages, ${sessionChunks.length} chunks.`;
|
|
625
|
+
|
|
626
|
+
let ingestResult;
|
|
627
|
+
const ingestStart = Date.now();
|
|
628
|
+
try {
|
|
629
|
+
ingestResult = await memoryIngest({
|
|
630
|
+
accountId,
|
|
631
|
+
attachmentId: conversationIdentity,
|
|
632
|
+
parentLabel: "ConversationArchive",
|
|
633
|
+
source,
|
|
634
|
+
documentSummary: sessionDocumentSummary,
|
|
635
|
+
anchorNodeId: ownerElementId,
|
|
636
|
+
anchorLabel: "AdminUser",
|
|
637
|
+
sections: sessionChunks,
|
|
638
|
+
scope,
|
|
639
|
+
sessionId,
|
|
640
|
+
documentKeywords: Array.from(allKeywords),
|
|
641
|
+
archiveSha256,
|
|
642
|
+
archiveSourceFile,
|
|
643
|
+
lastIngestedMessageHash: sessionLastHash,
|
|
644
|
+
lastIngestedMessageAt: sessionLastAt,
|
|
645
|
+
participantElementIds: [ownerElementId, ...participantElementIds],
|
|
646
|
+
// First session of THIS run cleans prior chunks for matching
|
|
647
|
+
// archiveSha256 (re-run-with-same-bytes idempotency); subsequent
|
|
648
|
+
// sessions in the same run skip cleanup or they would delete the
|
|
649
|
+
// chunks just written by sessions above them.
|
|
650
|
+
cleanupPriorChunks: isFirstSessionInRun,
|
|
651
|
+
});
|
|
652
|
+
} catch (err) {
|
|
480
653
|
cleanup();
|
|
481
|
-
fail("
|
|
482
|
-
reason:
|
|
654
|
+
fail("memory-ingest", {
|
|
655
|
+
reason: err instanceof Error ? err.message : String(err),
|
|
656
|
+
sessionIndex: s.index + 1,
|
|
657
|
+
sessionsTotal: sessions.length,
|
|
483
658
|
});
|
|
484
659
|
}
|
|
485
|
-
|
|
486
|
-
|
|
660
|
+
|
|
661
|
+
archiveElementIdLatest = ingestResult.documentNodeId;
|
|
662
|
+
totalChunksWritten += ingestResult.sectionCount;
|
|
663
|
+
totalNextEdges += ingestResult.edgeBreakdown.NEXT ?? 0;
|
|
664
|
+
if (isFirstSessionInRun) {
|
|
665
|
+
participantsLinked = ingestResult.edgeBreakdown.PARTICIPANT_IN ?? 0;
|
|
666
|
+
}
|
|
667
|
+
log(
|
|
668
|
+
`session-committed sessionIndex=${s.index + 1}/${sessions.length} chunks-written=${ingestResult.sectionCount} cursor-at="${sessionLastAt}" ms=${Date.now() - ingestStart}`,
|
|
669
|
+
);
|
|
487
670
|
}
|
|
671
|
+
|
|
488
672
|
if (sessionsUnenriched > 0) {
|
|
489
673
|
log(`classify-summary sessions=${sessions.length} unenriched=${sessionsUnenriched} enriched=${sessions.length - sessionsUnenriched}`);
|
|
490
674
|
}
|
|
491
675
|
|
|
492
|
-
// 6h. Compute lastIngestedMessageHash from the last delta line
|
|
493
|
-
const lastLine = deltaLines[deltaLines.length - 1];
|
|
494
|
-
const lastIngestedMessageHash = deriveMessageContentHash({
|
|
495
|
-
dateSent: lastLine.dateSent,
|
|
496
|
-
body: lastLine.body,
|
|
497
|
-
});
|
|
498
|
-
const lastIngestedMessageAt = lastLine.dateSent;
|
|
499
|
-
|
|
500
|
-
// 6i. Aggregate document-level summary across sessions
|
|
501
|
-
const documentSummary = sessions.length === 1
|
|
502
|
-
? `${deltaLines.length} messages in 1 session, ${allChunks.length} chunks.`
|
|
503
|
-
: `${deltaLines.length} messages in ${sessions.length} sessions, ${allChunks.length} chunks.`;
|
|
504
|
-
|
|
505
|
-
// 6j. Call memoryIngest with parentLabel='ConversationArchive'
|
|
506
|
-
let ingestResult;
|
|
507
|
-
const ingestStart = Date.now();
|
|
508
|
-
try {
|
|
509
|
-
ingestResult = await memoryIngest({
|
|
510
|
-
accountId,
|
|
511
|
-
attachmentId: conversationIdentity,
|
|
512
|
-
parentLabel: "ConversationArchive",
|
|
513
|
-
source,
|
|
514
|
-
documentSummary,
|
|
515
|
-
anchorNodeId: ownerElementId,
|
|
516
|
-
anchorLabel: "AdminUser",
|
|
517
|
-
sections: allChunks,
|
|
518
|
-
scope,
|
|
519
|
-
sessionId,
|
|
520
|
-
documentKeywords: Array.from(allKeywords),
|
|
521
|
-
archiveSha256,
|
|
522
|
-
archiveSourceFile,
|
|
523
|
-
lastIngestedMessageHash,
|
|
524
|
-
lastIngestedMessageAt,
|
|
525
|
-
participantElementIds: [ownerElementId, ...participantElementIds],
|
|
526
|
-
});
|
|
527
|
-
} catch (err) {
|
|
528
|
-
cleanup();
|
|
529
|
-
fail("memory-ingest", { reason: err instanceof Error ? err.message : String(err) });
|
|
530
|
-
}
|
|
531
|
-
log(
|
|
532
|
-
`source=${source} file=${archiveSourceFile} conversationIdentity=${conversationIdentity.slice(0, 12)} archiveElementId=${ingestResult.documentNodeId} chunks-written=${ingestResult.sectionCount} next-edges=${ingestResult.edgeBreakdown.NEXT ?? 0} participants=${ingestResult.edgeBreakdown.PARTICIPANT_IN ?? 0} ms=${Date.now() - ingestStart}`,
|
|
533
|
-
);
|
|
534
|
-
|
|
535
676
|
cleanup();
|
|
536
677
|
const totalMs = Date.now() - startedMs;
|
|
537
|
-
log(
|
|
678
|
+
log(
|
|
679
|
+
`done source=${source} conversationIdentity=${conversationIdentity.slice(0, 12)} archiveElementId=${archiveElementIdLatest} chunks-written=${totalChunksWritten} sessions=${sessions.length} total-ms=${totalMs}`,
|
|
680
|
+
);
|
|
538
681
|
|
|
539
682
|
process.stdout.write(JSON.stringify({
|
|
540
|
-
archiveElementId:
|
|
683
|
+
archiveElementId: archiveElementIdLatest,
|
|
541
684
|
conversationIdentity,
|
|
542
685
|
archiveSha256,
|
|
543
686
|
archiveSourceFile,
|
|
@@ -547,16 +690,16 @@ async function main() {
|
|
|
547
690
|
systemSkipped: normaliserResult.counters.systemSkipped,
|
|
548
691
|
delta: { kind: deltaKind, deltaStart, deltaMessages: deltaLines.length },
|
|
549
692
|
sessions: sessions.length,
|
|
550
|
-
chunks:
|
|
551
|
-
nextEdgesCreated:
|
|
552
|
-
participantsLinked
|
|
693
|
+
chunks: totalChunksWritten,
|
|
694
|
+
nextEdgesCreated: totalNextEdges,
|
|
695
|
+
participantsLinked,
|
|
553
696
|
dateRange: { first: allLines[0].dateSent, last: allLines[allLines.length - 1].dateSent },
|
|
554
697
|
parsedLineCount: allLines.length,
|
|
555
698
|
sessionsUnenriched,
|
|
556
699
|
topicKeywords: Array.from(allKeywords),
|
|
557
700
|
ms: totalMs,
|
|
558
701
|
}) + "\n");
|
|
559
|
-
|
|
702
|
+
endRun(0);
|
|
560
703
|
}
|
|
561
704
|
|
|
562
705
|
main().catch((err) => {
|