@rubytech/create-realagent 1.0.828 → 1.0.830
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/package.json +1 -1
- package/payload/platform/config/brand.json +1 -1
- package/payload/platform/lib/oauth-llm/dist/index.d.ts +1 -1
- package/payload/platform/lib/oauth-llm/dist/index.d.ts.map +1 -1
- package/payload/platform/lib/oauth-llm/dist/index.js +21 -0
- package/payload/platform/lib/oauth-llm/dist/index.js.map +1 -1
- package/payload/platform/lib/oauth-llm/src/index.ts +24 -0
- package/payload/platform/neo4j/migrations/007-conversation-archive-source.ts +116 -0
- package/payload/platform/neo4j/schema.cypher +12 -2
- package/payload/platform/package.json +2 -2
- package/payload/platform/plugins/admin/hooks/__tests__/archive-ingest-surface-gate.test.sh +6 -6
- package/payload/platform/plugins/admin/hooks/archive-ingest-surface-gate.sh +14 -8
- package/payload/platform/plugins/admin/skills/onboarding/SKILL.md +2 -2
- package/payload/platform/plugins/contacts/mcp/dist/index.js +5 -5
- package/payload/platform/plugins/contacts/mcp/dist/index.js.map +1 -1
- package/payload/platform/plugins/contacts/mcp/dist/tools/contact-create.d.ts +1 -1
- package/payload/platform/plugins/contacts/mcp/dist/tools/contact-create.d.ts.map +1 -1
- package/payload/platform/plugins/contacts/mcp/dist/tools/contact-create.js +29 -23
- package/payload/platform/plugins/contacts/mcp/dist/tools/contact-create.js.map +1 -1
- package/payload/platform/plugins/docs/references/plugins-guide.md +1 -1
- package/payload/platform/plugins/memory/PLUGIN.md +6 -5
- package/payload/platform/plugins/{whatsapp-import/bin/ingest.mjs → memory/bin/conversation-archive-ingest.mjs} +136 -212
- package/payload/platform/plugins/{whatsapp-import/bin/whatsapp-ingest.sh → memory/bin/conversation-archive-ingest.sh} +27 -19
- package/payload/platform/plugins/memory/mcp/dist/index.js +26 -212
- package/payload/platform/plugins/memory/mcp/dist/index.js.map +1 -1
- package/payload/platform/plugins/memory/mcp/dist/lib/__tests__/llm-classifier.test.js +4 -3
- package/payload/platform/plugins/memory/mcp/dist/lib/__tests__/llm-classifier.test.js.map +1 -1
- package/payload/platform/plugins/memory/mcp/dist/lib/__tests__/schema-loader.test.js +11 -6
- package/payload/platform/plugins/memory/mcp/dist/lib/__tests__/schema-loader.test.js.map +1 -1
- package/payload/platform/plugins/memory/mcp/dist/lib/__tests__/schema-validator.test.js +103 -0
- package/payload/platform/plugins/memory/mcp/dist/lib/__tests__/schema-validator.test.js.map +1 -1
- package/payload/platform/plugins/memory/mcp/dist/lib/conversation-normalisers/index.d.ts +5 -0
- package/payload/platform/plugins/memory/mcp/dist/lib/conversation-normalisers/index.d.ts.map +1 -0
- package/payload/platform/plugins/memory/mcp/dist/lib/conversation-normalisers/index.js +30 -0
- package/payload/platform/plugins/memory/mcp/dist/lib/conversation-normalisers/index.js.map +1 -0
- package/payload/platform/plugins/memory/mcp/dist/lib/conversation-normalisers/types.d.ts +48 -0
- package/payload/platform/plugins/memory/mcp/dist/lib/conversation-normalisers/types.d.ts.map +1 -0
- package/payload/platform/plugins/memory/mcp/dist/lib/conversation-normalisers/types.js +23 -0
- package/payload/platform/plugins/memory/mcp/dist/lib/conversation-normalisers/types.js.map +1 -0
- package/payload/platform/plugins/memory/mcp/dist/lib/conversation-normalisers/whatsapp-text.d.ts +3 -0
- package/payload/platform/plugins/memory/mcp/dist/lib/conversation-normalisers/whatsapp-text.d.ts.map +1 -0
- package/payload/platform/plugins/memory/mcp/dist/lib/conversation-normalisers/whatsapp-text.js +237 -0
- package/payload/platform/plugins/memory/mcp/dist/lib/conversation-normalisers/whatsapp-text.js.map +1 -0
- package/payload/platform/plugins/memory/mcp/dist/lib/conversation-pipeline/delta-cursor.d.ts +11 -0
- package/payload/platform/plugins/memory/mcp/dist/lib/conversation-pipeline/delta-cursor.d.ts.map +1 -0
- package/payload/platform/plugins/memory/mcp/dist/lib/conversation-pipeline/delta-cursor.js +21 -0
- package/payload/platform/plugins/memory/mcp/dist/lib/conversation-pipeline/delta-cursor.js.map +1 -0
- package/payload/platform/plugins/memory/mcp/dist/lib/conversation-pipeline/derive-keys.d.ts +16 -0
- package/payload/platform/plugins/memory/mcp/dist/lib/conversation-pipeline/derive-keys.d.ts.map +1 -0
- package/payload/platform/plugins/memory/mcp/dist/lib/conversation-pipeline/derive-keys.js +39 -0
- package/payload/platform/plugins/memory/mcp/dist/lib/conversation-pipeline/derive-keys.js.map +1 -0
- package/payload/platform/plugins/memory/mcp/dist/lib/conversation-pipeline/sender-bind.d.ts +17 -0
- package/payload/platform/plugins/memory/mcp/dist/lib/conversation-pipeline/sender-bind.d.ts.map +1 -0
- package/payload/platform/plugins/memory/mcp/dist/lib/conversation-pipeline/sender-bind.js +90 -0
- package/payload/platform/plugins/memory/mcp/dist/lib/conversation-pipeline/sender-bind.js.map +1 -0
- package/payload/platform/plugins/memory/mcp/dist/lib/conversation-pipeline/sessionize.d.ts +9 -0
- package/payload/platform/plugins/memory/mcp/dist/lib/conversation-pipeline/sessionize.d.ts.map +1 -0
- package/payload/platform/plugins/memory/mcp/dist/lib/conversation-pipeline/sessionize.js +32 -0
- package/payload/platform/plugins/memory/mcp/dist/lib/conversation-pipeline/sessionize.js.map +1 -0
- package/payload/platform/plugins/memory/mcp/dist/lib/conversation-pipeline/to-turn-text.d.ts +3 -0
- package/payload/platform/plugins/memory/mcp/dist/lib/conversation-pipeline/to-turn-text.d.ts.map +1 -0
- package/payload/platform/plugins/memory/mcp/dist/lib/conversation-pipeline/to-turn-text.js +27 -0
- package/payload/platform/plugins/memory/mcp/dist/lib/conversation-pipeline/to-turn-text.js.map +1 -0
- package/payload/platform/plugins/memory/mcp/dist/lib/document-chunker.d.ts +45 -0
- package/payload/platform/plugins/memory/mcp/dist/lib/document-chunker.d.ts.map +1 -0
- package/payload/platform/plugins/memory/mcp/dist/lib/document-chunker.js +125 -0
- package/payload/platform/plugins/memory/mcp/dist/lib/document-chunker.js.map +1 -0
- package/payload/platform/plugins/memory/mcp/dist/lib/llm-classifier.d.ts +24 -1
- package/payload/platform/plugins/memory/mcp/dist/lib/llm-classifier.d.ts.map +1 -1
- package/payload/platform/plugins/memory/mcp/dist/lib/llm-classifier.js +293 -33
- package/payload/platform/plugins/memory/mcp/dist/lib/llm-classifier.js.map +1 -1
- package/payload/platform/plugins/memory/mcp/dist/lib/llm-ranker.d.ts.map +1 -1
- package/payload/platform/plugins/memory/mcp/dist/lib/llm-ranker.js +9 -2
- package/payload/platform/plugins/memory/mcp/dist/lib/llm-ranker.js.map +1 -1
- package/payload/platform/plugins/memory/mcp/dist/lib/schema-validator.d.ts +16 -1
- package/payload/platform/plugins/memory/mcp/dist/lib/schema-validator.d.ts.map +1 -1
- package/payload/platform/plugins/memory/mcp/dist/lib/schema-validator.js +12 -3
- package/payload/platform/plugins/memory/mcp/dist/lib/schema-validator.js.map +1 -1
- package/payload/platform/plugins/memory/mcp/dist/tools/__tests__/conversation-normalisers-source-agnosticism.test.d.ts +2 -0
- package/payload/platform/plugins/memory/mcp/dist/tools/__tests__/conversation-normalisers-source-agnosticism.test.d.ts.map +1 -0
- package/payload/platform/plugins/memory/mcp/dist/tools/__tests__/conversation-normalisers-source-agnosticism.test.js +75 -0
- package/payload/platform/plugins/memory/mcp/dist/tools/__tests__/conversation-normalisers-source-agnosticism.test.js.map +1 -0
- package/payload/platform/plugins/memory/mcp/dist/tools/__tests__/conversation-normalisers-whatsapp-text.test.d.ts +2 -0
- package/payload/platform/plugins/memory/mcp/dist/tools/__tests__/conversation-normalisers-whatsapp-text.test.d.ts.map +1 -0
- package/payload/platform/plugins/memory/mcp/dist/tools/__tests__/conversation-normalisers-whatsapp-text.test.js +67 -0
- package/payload/platform/plugins/memory/mcp/dist/tools/__tests__/conversation-normalisers-whatsapp-text.test.js.map +1 -0
- package/payload/platform/plugins/memory/mcp/dist/tools/__tests__/memory-archive-write.test.js +2 -138
- package/payload/platform/plugins/memory/mcp/dist/tools/__tests__/memory-archive-write.test.js.map +1 -1
- package/payload/platform/plugins/memory/mcp/dist/tools/__tests__/memory-ingest.test.js +39 -3
- package/payload/platform/plugins/memory/mcp/dist/tools/__tests__/memory-ingest.test.js.map +1 -1
- package/payload/platform/plugins/memory/mcp/dist/tools/__tests__/profile-update-personfields-open.test.d.ts +2 -0
- package/payload/platform/plugins/memory/mcp/dist/tools/__tests__/profile-update-personfields-open.test.d.ts.map +1 -0
- package/payload/platform/plugins/memory/mcp/dist/tools/__tests__/profile-update-personfields-open.test.js +148 -0
- package/payload/platform/plugins/memory/mcp/dist/tools/__tests__/profile-update-personfields-open.test.js.map +1 -0
- package/payload/platform/plugins/memory/mcp/dist/tools/memory-archive-write.d.ts +1 -47
- package/payload/platform/plugins/memory/mcp/dist/tools/memory-archive-write.d.ts.map +1 -1
- package/payload/platform/plugins/memory/mcp/dist/tools/memory-archive-write.js +9 -318
- package/payload/platform/plugins/memory/mcp/dist/tools/memory-archive-write.js.map +1 -1
- package/payload/platform/plugins/memory/mcp/dist/tools/memory-ingest.d.ts +7 -0
- package/payload/platform/plugins/memory/mcp/dist/tools/memory-ingest.d.ts.map +1 -1
- package/payload/platform/plugins/memory/mcp/dist/tools/memory-ingest.js +14 -8
- package/payload/platform/plugins/memory/mcp/dist/tools/memory-ingest.js.map +1 -1
- package/payload/platform/plugins/memory/mcp/dist/tools/profile-update.d.ts +21 -17
- package/payload/platform/plugins/memory/mcp/dist/tools/profile-update.d.ts.map +1 -1
- package/payload/platform/plugins/memory/mcp/dist/tools/profile-update.js +77 -37
- package/payload/platform/plugins/memory/mcp/dist/tools/profile-update.js.map +1 -1
- package/payload/platform/plugins/memory/references/schema-base.md +3 -1
- package/payload/platform/plugins/{whatsapp-import/skills/whatsapp-import → memory/skills/conversation-archive}/SKILL.md +45 -36
- package/payload/platform/plugins/memory/skills/document-ingest/SKILL.md +59 -6
- package/payload/platform/plugins/whatsapp/PLUGIN.md +1 -1
- package/payload/platform/scripts/seed-neo4j.sh +9 -8
- package/payload/platform/templates/specialists/agents/database-operator.md +7 -14
- package/payload/server/chunk-7BO5HDJC.js +10093 -0
- package/payload/server/chunk-CUSH3UXP.js +2305 -0
- package/payload/server/chunk-EL4DZ56X.js +1116 -0
- package/payload/server/chunk-IWNDVGKT.js +10077 -0
- package/payload/server/chunk-KC7NUABI.js +654 -0
- package/payload/server/chunk-QOJ2D26Z.js +654 -0
- package/payload/server/chunk-RC46ZYGT.js +2305 -0
- package/payload/server/chunk-WUVXPZIV.js +1116 -0
- package/payload/server/client-pool-3TM3SRIA.js +32 -0
- package/payload/server/client-pool-7NTEFNVQ.js +32 -0
- package/payload/server/cloudflare-task-tracker-4NIODMGL.js +19 -0
- package/payload/server/cloudflare-task-tracker-WE77WXSI.js +19 -0
- package/payload/server/maxy-edge.js +3 -3
- package/payload/server/neo4j-migrations-4XPNJNM6.js +490 -0
- package/payload/server/neo4j-migrations-XTQ4WEV6.js +428 -0
- package/payload/server/server.js +6 -6
- package/payload/platform/plugins/whatsapp-import/PLUGIN.md +0 -48
- package/payload/platform/plugins/whatsapp-import/lib/src/__tests__/delta-append.test.ts +0 -163
- package/payload/platform/plugins/whatsapp-import/lib/src/__tests__/parse-export-lrm.test.ts +0 -83
- package/payload/platform/plugins/whatsapp-import/lib/src/__tests__/parse-export.test.ts +0 -678
- package/payload/platform/plugins/whatsapp-import/lib/src/__tests__/sessionize.test.ts +0 -91
- package/payload/platform/plugins/whatsapp-import/lib/src/__tests__/to-classifier-input.test.ts +0 -59
- package/payload/platform/plugins/whatsapp-import/lib/src/delta-cursor.ts +0 -54
- package/payload/platform/plugins/whatsapp-import/lib/src/derive-keys.ts +0 -82
- package/payload/platform/plugins/whatsapp-import/lib/src/index.ts +0 -22
- package/payload/platform/plugins/whatsapp-import/lib/src/parse-export.ts +0 -471
- package/payload/platform/plugins/whatsapp-import/lib/src/sessionize.ts +0 -81
- package/payload/platform/plugins/whatsapp-import/lib/src/to-classifier-input.ts +0 -48
- package/payload/platform/plugins/whatsapp-import/lib/tsconfig.json +0 -9
- package/payload/platform/plugins/whatsapp-import/lib/vitest.config.ts +0 -9
- package/payload/platform/plugins/whatsapp-import/skills/whatsapp-import/references/conversation-archive-shape.md +0 -143
- package/payload/platform/plugins/whatsapp-import/skills/whatsapp-import/references/export-parse.md +0 -109
|
@@ -1,38 +1,35 @@
|
|
|
1
1
|
#!/usr/bin/env node
|
|
2
2
|
// =============================================================================
|
|
3
|
-
// ingest.mjs — in-process orchestrator for
|
|
3
|
+
// conversation-archive-ingest.mjs — in-process orchestrator for
|
|
4
|
+
// conversation-archive-ingest.sh (Task 894 — supersedes whatsapp-import bin).
|
|
4
5
|
//
|
|
5
|
-
//
|
|
6
|
-
//
|
|
6
|
+
// Source-agnostic. The same pipeline runs for every conversation source;
|
|
7
|
+
// `--source <enum>` selects the normaliser at the top of the pipeline:
|
|
7
8
|
//
|
|
8
|
-
//
|
|
9
|
+
// normalise → bind canonical senders → derive conversationIdentity
|
|
9
10
|
// → look up prior :ConversationArchive (delta cursor)
|
|
10
|
-
// → sessionize delta at
|
|
11
|
+
// → sessionize delta at gap-hours boundaries
|
|
11
12
|
// → for each session: classify (mode='chat') → collect chunks
|
|
12
|
-
// → memoryIngest(parentLabel='ConversationArchive')
|
|
13
|
+
// → memoryIngest(parentLabel='ConversationArchive', source=<enum>)
|
|
13
14
|
//
|
|
14
15
|
// Argv (positional): <archive-path>
|
|
15
|
-
// Argv (flags): --
|
|
16
|
+
// Argv (flags): --source <whatsapp|telegram|signal|linkedin-messages|zoom-transcript|meeting-minutes|imessage|slack|other>
|
|
17
|
+
// --owner-element-id <id>
|
|
16
18
|
// --participant-person-ids <csv>
|
|
17
19
|
// --scope <admin|public>
|
|
18
20
|
// [--session-gap-hours <N>] (default 12)
|
|
19
21
|
// [--account-id <accountId>]
|
|
20
22
|
// [--timezone <iana>]
|
|
21
23
|
// [--date-format <DD/MM/YY|MM/DD/YY|DD/MM/YYYY|MM/DD/YYYY>]
|
|
24
|
+
// [--session-id <id>]
|
|
22
25
|
//
|
|
23
|
-
//
|
|
24
|
-
//
|
|
25
|
-
//
|
|
26
|
-
// makes per-message filtering obsolete — chunking bounds operator surface).
|
|
27
|
-
//
|
|
28
|
-
// Stdout (success): one JSON line — all the counters the skill needs to
|
|
29
|
-
// formulate the three operator-facing messages. See SKILL.md for the shape.
|
|
30
|
-
// Stderr: one [whatsapp-import] FAIL line on failure, exit non-zero.
|
|
26
|
+
// Stdout (success): one JSON line — counters the skill needs to formulate
|
|
27
|
+
// the three operator-facing messages. See SKILL.md for the shape.
|
|
28
|
+
// Stderr: one [conversation-archive] FAIL line on failure, exit non-zero.
|
|
31
29
|
// =============================================================================
|
|
32
30
|
|
|
33
|
-
import { existsSync, mkdtempSync, readdirSync, rmSync, statSync
|
|
34
|
-
import {
|
|
35
|
-
import { join, resolve, dirname, basename } from "node:path";
|
|
31
|
+
import { existsSync, mkdtempSync, readdirSync, rmSync, statSync } from "node:fs";
|
|
32
|
+
import { join, resolve, dirname } from "node:path";
|
|
36
33
|
import { tmpdir } from "node:os";
|
|
37
34
|
import { spawnSync } from "node:child_process";
|
|
38
35
|
import { fileURLToPath } from "node:url";
|
|
@@ -46,14 +43,25 @@ const platformRoot =
|
|
|
46
43
|
process.env.MAXY_PLATFORM_ROOT?.trim() ||
|
|
47
44
|
resolve(__dirname, "..", "..", "..");
|
|
48
45
|
|
|
49
|
-
const
|
|
46
|
+
const NORMALISERS_PATH = resolve(
|
|
50
47
|
platformRoot,
|
|
51
48
|
"plugins",
|
|
52
|
-
"
|
|
53
|
-
"
|
|
49
|
+
"memory",
|
|
50
|
+
"mcp",
|
|
54
51
|
"dist",
|
|
52
|
+
"lib",
|
|
53
|
+
"conversation-normalisers",
|
|
55
54
|
"index.js",
|
|
56
55
|
);
|
|
56
|
+
const PIPELINE_PATH = resolve(
|
|
57
|
+
platformRoot,
|
|
58
|
+
"plugins",
|
|
59
|
+
"memory",
|
|
60
|
+
"mcp",
|
|
61
|
+
"dist",
|
|
62
|
+
"lib",
|
|
63
|
+
"conversation-pipeline",
|
|
64
|
+
);
|
|
57
65
|
const NEO4J_LIB_PATH = resolve(
|
|
58
66
|
platformRoot,
|
|
59
67
|
"plugins",
|
|
@@ -86,7 +94,7 @@ const MEMORY_INGEST_PATH = resolve(
|
|
|
86
94
|
// 2. Logger
|
|
87
95
|
// ---------------------------------------------------------------------------
|
|
88
96
|
function log(line) {
|
|
89
|
-
process.stderr.write(`[
|
|
97
|
+
process.stderr.write(`[conversation-archive] ${line}\n`);
|
|
90
98
|
}
|
|
91
99
|
function fail(phase, fields) {
|
|
92
100
|
const fieldStr = Object.entries(fields)
|
|
@@ -96,13 +104,25 @@ function fail(phase, fields) {
|
|
|
96
104
|
: `${k}=${v ?? "-"}`,
|
|
97
105
|
)
|
|
98
106
|
.join(" ");
|
|
99
|
-
process.stderr.write(`[
|
|
107
|
+
process.stderr.write(`[conversation-archive] FAIL phase=${phase} ${fieldStr}\n`);
|
|
100
108
|
process.exit(1);
|
|
101
109
|
}
|
|
102
110
|
|
|
103
111
|
// ---------------------------------------------------------------------------
|
|
104
112
|
// 3. Argv parsing
|
|
105
113
|
// ---------------------------------------------------------------------------
|
|
114
|
+
const VALID_SOURCES = new Set([
|
|
115
|
+
"whatsapp",
|
|
116
|
+
"telegram",
|
|
117
|
+
"signal",
|
|
118
|
+
"linkedin-messages",
|
|
119
|
+
"zoom-transcript",
|
|
120
|
+
"meeting-minutes",
|
|
121
|
+
"imessage",
|
|
122
|
+
"slack",
|
|
123
|
+
"other",
|
|
124
|
+
]);
|
|
125
|
+
|
|
106
126
|
function parseArgv(argv) {
|
|
107
127
|
const args = argv.slice(2);
|
|
108
128
|
let archive = null;
|
|
@@ -120,6 +140,10 @@ function parseArgv(argv) {
|
|
|
120
140
|
flags[camelCase(key)] = v;
|
|
121
141
|
}
|
|
122
142
|
if (!archive) fail("argv", { reason: "missing positional <archive>" });
|
|
143
|
+
if (!flags.source) fail("argv", { reason: "missing --source" });
|
|
144
|
+
if (!VALID_SOURCES.has(flags.source)) {
|
|
145
|
+
fail("argv", { reason: `invalid --source "${flags.source}" (whatsapp|telegram|signal|linkedin-messages|zoom-transcript|meeting-minutes|imessage|slack|other)` });
|
|
146
|
+
}
|
|
123
147
|
if (!flags.ownerElementId) fail("argv", { reason: "missing --owner-element-id" });
|
|
124
148
|
if (!flags.participantPersonIds) {
|
|
125
149
|
fail("argv", {
|
|
@@ -137,40 +161,45 @@ function camelCase(s) {
|
|
|
137
161
|
}
|
|
138
162
|
|
|
139
163
|
// ---------------------------------------------------------------------------
|
|
140
|
-
// 4. Archive resolution
|
|
164
|
+
// 4. Archive resolution. WhatsApp ships zip-or-dir-or-_chat.txt; other
|
|
165
|
+
// sources pass a single file path that the normaliser interprets directly.
|
|
141
166
|
// ---------------------------------------------------------------------------
|
|
142
|
-
function
|
|
167
|
+
function resolveSourceFile(archivePath, source) {
|
|
143
168
|
const abs = resolve(archivePath);
|
|
144
169
|
if (!existsSync(abs)) fail("argv", { reason: `archive path not found: ${abs}` });
|
|
145
170
|
const st = statSync(abs);
|
|
146
|
-
|
|
147
|
-
|
|
148
|
-
|
|
149
|
-
|
|
150
|
-
|
|
151
|
-
|
|
152
|
-
|
|
153
|
-
|
|
154
|
-
|
|
155
|
-
|
|
171
|
+
|
|
172
|
+
if (source === "whatsapp") {
|
|
173
|
+
if (st.isFile() && abs.endsWith(".zip")) {
|
|
174
|
+
const tmp = mkdtempSync(join(tmpdir(), "conversation-archive-"));
|
|
175
|
+
const unzip = spawnSync("unzip", ["-q", "-o", abs, "-d", tmp], { encoding: "utf8" });
|
|
176
|
+
if (unzip.status !== 0) {
|
|
177
|
+
rmSync(tmp, { recursive: true });
|
|
178
|
+
fail("argv", {
|
|
179
|
+
reason: "unzip failed",
|
|
180
|
+
archive: abs,
|
|
181
|
+
stderr: (unzip.stderr || "").slice(0, 200),
|
|
182
|
+
});
|
|
183
|
+
}
|
|
184
|
+
const chat = findChatTxt(tmp);
|
|
185
|
+
if (!chat) {
|
|
186
|
+
rmSync(tmp, { recursive: true });
|
|
187
|
+
fail("argv", { reason: "_chat.txt not found in zip", archive: abs });
|
|
188
|
+
}
|
|
189
|
+
return { sourceFile: chat, cleanup: () => rmSync(tmp, { recursive: true }) };
|
|
156
190
|
}
|
|
157
|
-
|
|
158
|
-
|
|
159
|
-
|
|
160
|
-
|
|
191
|
+
if (st.isDirectory()) {
|
|
192
|
+
const chat = findChatTxt(abs);
|
|
193
|
+
if (!chat) fail("argv", { reason: "_chat.txt not found in directory", archive: abs });
|
|
194
|
+
return { sourceFile: chat, cleanup: () => {} };
|
|
161
195
|
}
|
|
162
|
-
return { chatTxt: chat, cleanup: () => rmSync(tmp, { recursive: true }) };
|
|
163
|
-
}
|
|
164
|
-
if (st.isDirectory()) {
|
|
165
|
-
const chat = findChatTxt(abs);
|
|
166
|
-
if (!chat) fail("argv", { reason: "_chat.txt not found in directory", archive: abs });
|
|
167
|
-
return { chatTxt: chat, cleanup: () => {} };
|
|
168
196
|
}
|
|
197
|
+
|
|
169
198
|
if (st.isFile()) {
|
|
170
|
-
return {
|
|
199
|
+
return { sourceFile: abs, cleanup: () => {} };
|
|
171
200
|
}
|
|
172
|
-
fail("argv", { reason: `unsupported archive shape: ${abs}` });
|
|
173
|
-
return {
|
|
201
|
+
fail("argv", { reason: `unsupported archive shape for source=${source}: ${abs}` });
|
|
202
|
+
return { sourceFile: abs, cleanup: () => {} };
|
|
174
203
|
}
|
|
175
204
|
function findChatTxt(dir) {
|
|
176
205
|
const entries = readdirSync(dir, { withFileTypes: true });
|
|
@@ -209,121 +238,12 @@ function resolveAccountId(flags) {
|
|
|
209
238
|
}
|
|
210
239
|
|
|
211
240
|
// ---------------------------------------------------------------------------
|
|
212
|
-
// 6.
|
|
213
|
-
//
|
|
214
|
-
// All distinct parsed senderNames must resolve to {owner, participants...}.
|
|
215
|
-
// Any miss LOUD-FAILs with `parser-miss reason="senderName=<...>"`.
|
|
216
|
-
// ---------------------------------------------------------------------------
|
|
217
|
-
const CANONICAL_FETCH_CYPHER = `
|
|
218
|
-
UNWIND $ids AS id
|
|
219
|
-
MATCH (n) WHERE elementId(n) = id
|
|
220
|
-
RETURN elementId(n) AS elemId,
|
|
221
|
-
labels(n) AS labels,
|
|
222
|
-
coalesce(n.name, '') AS name,
|
|
223
|
-
coalesce(n.givenName, '') AS givenName,
|
|
224
|
-
coalesce(n.familyName, '') AS familyName,
|
|
225
|
-
coalesce(n.accountId, '') AS accountId
|
|
226
|
-
`;
|
|
227
|
-
|
|
228
|
-
class IngestUserFacingError extends Error {
|
|
229
|
-
constructor(message) {
|
|
230
|
-
super(message);
|
|
231
|
-
this.name = "IngestUserFacingError";
|
|
232
|
-
this.userFacing = true;
|
|
233
|
-
}
|
|
234
|
-
}
|
|
235
|
-
|
|
236
|
-
async function bindCanonicalSenders({
|
|
237
|
-
session,
|
|
238
|
-
accountId,
|
|
239
|
-
ownerElementId,
|
|
240
|
-
participantElementIds,
|
|
241
|
-
senderNames,
|
|
242
|
-
normaliseSenderName,
|
|
243
|
-
}) {
|
|
244
|
-
const allIds = [ownerElementId, ...participantElementIds];
|
|
245
|
-
const distinctIds = Array.from(new Set(allIds));
|
|
246
|
-
if (distinctIds.length !== allIds.length) {
|
|
247
|
-
throw new IngestUserFacingError(
|
|
248
|
-
`participant id list contains duplicates (owner appears in --participant-person-ids?)`,
|
|
249
|
-
);
|
|
250
|
-
}
|
|
251
|
-
const res = await session.executeRead(async (tx) => tx.run(CANONICAL_FETCH_CYPHER, { ids: distinctIds }));
|
|
252
|
-
const seenIds = new Set();
|
|
253
|
-
const labelByElemId = new Map();
|
|
254
|
-
const index = new Map();
|
|
255
|
-
for (const r of res.records) {
|
|
256
|
-
const elemId = r.get("elemId");
|
|
257
|
-
const labels = r.get("labels") || [];
|
|
258
|
-
const acct = r.get("accountId") || "";
|
|
259
|
-
if (!acct) {
|
|
260
|
-
throw new IngestUserFacingError(`node ${elemId} has no accountId — corrupt canonical Person/AdminUser`);
|
|
261
|
-
}
|
|
262
|
-
if (acct !== accountId) {
|
|
263
|
-
throw new IngestUserFacingError(`node ${elemId} belongs to account ${acct}, not ${accountId}`);
|
|
264
|
-
}
|
|
265
|
-
if (!labels.includes("Person") && !labels.includes("AdminUser")) {
|
|
266
|
-
throw new IngestUserFacingError(`node ${elemId} has labels [${labels.join(",")}]; expected :Person or :AdminUser`);
|
|
267
|
-
}
|
|
268
|
-
seenIds.add(elemId);
|
|
269
|
-
labelByElemId.set(elemId, labels.includes("Person") ? "Person" : "AdminUser");
|
|
270
|
-
const candidates = [];
|
|
271
|
-
const name = r.get("name") || "";
|
|
272
|
-
const given = r.get("givenName") || "";
|
|
273
|
-
const family = r.get("familyName") || "";
|
|
274
|
-
if (name) candidates.push(name);
|
|
275
|
-
if (given && family) candidates.push(`${given} ${family}`);
|
|
276
|
-
if (given) candidates.push(given);
|
|
277
|
-
if (family) candidates.push(family);
|
|
278
|
-
for (const c of candidates) {
|
|
279
|
-
const norm = normaliseSenderName(c);
|
|
280
|
-
if (!norm) continue;
|
|
281
|
-
if (!index.has(norm)) index.set(norm, elemId);
|
|
282
|
-
}
|
|
283
|
-
}
|
|
284
|
-
for (const id of distinctIds) {
|
|
285
|
-
if (!seenIds.has(id)) {
|
|
286
|
-
throw new IngestUserFacingError(`elementId ${id} not found in graph`);
|
|
287
|
-
}
|
|
288
|
-
}
|
|
289
|
-
// Validate every distinct parsed senderName against the closed candidate set.
|
|
290
|
-
for (const senderName of senderNames) {
|
|
291
|
-
const norm = normaliseSenderName(senderName);
|
|
292
|
-
const hit = index.get(norm);
|
|
293
|
-
if (!hit) {
|
|
294
|
-
throw new IngestUserFacingError(
|
|
295
|
-
`parser-miss reason="senderName=${senderName} not in confirmed participant set (${distinctIds.length} confirmed elementIds; re-run with the missing :Person elementId in --participant-person-ids)"`,
|
|
296
|
-
);
|
|
297
|
-
}
|
|
298
|
-
}
|
|
299
|
-
return { participantsResolved: seenIds.size };
|
|
300
|
-
}
|
|
301
|
-
|
|
302
|
-
// ---------------------------------------------------------------------------
|
|
303
|
-
// 7. SHA-256 of file bytes (for archiveSha256)
|
|
304
|
-
// ---------------------------------------------------------------------------
|
|
305
|
-
async function fileSha256(filePath) {
|
|
306
|
-
return new Promise((resolveProm, rejectProm) => {
|
|
307
|
-
const hash = createHash("sha256");
|
|
308
|
-
const stream = createReadStream(filePath);
|
|
309
|
-
stream.on("data", (chunk) => hash.update(chunk));
|
|
310
|
-
stream.on("end", () => resolveProm(hash.digest("hex")));
|
|
311
|
-
stream.on("error", rejectProm);
|
|
312
|
-
});
|
|
313
|
-
}
|
|
314
|
-
|
|
315
|
-
// ---------------------------------------------------------------------------
|
|
316
|
-
// 8. Build natural-edge map for chat-mode classifier.
|
|
317
|
-
// Chat mode doesn't use it (the chat prompt drops edge proposals), but
|
|
318
|
-
// classifyDocument's signature still takes the param. Pass the empty string.
|
|
319
|
-
// ---------------------------------------------------------------------------
|
|
320
|
-
|
|
321
|
-
// ---------------------------------------------------------------------------
|
|
322
|
-
// 9. Main
|
|
241
|
+
// 6. Main
|
|
323
242
|
// ---------------------------------------------------------------------------
|
|
324
243
|
async function main() {
|
|
325
244
|
const startedMs = Date.now();
|
|
326
245
|
const { archive, flags } = parseArgv(process.argv);
|
|
246
|
+
const source = flags.source;
|
|
327
247
|
const ownerElementId = flags.ownerElementId;
|
|
328
248
|
const participantElementIds = flags.participantPersonIds
|
|
329
249
|
.split(",")
|
|
@@ -343,22 +263,23 @@ async function main() {
|
|
|
343
263
|
fail("argv", { reason: `invalid --session-gap-hours "${flags.sessionGapHours}" (must be positive number)` });
|
|
344
264
|
}
|
|
345
265
|
const sessionId =
|
|
346
|
-
flags.sessionId ||
|
|
266
|
+
flags.sessionId ||
|
|
267
|
+
`conversation-archive:${source}:${Date.now()}:${Math.random().toString(36).slice(2, 10)}`;
|
|
347
268
|
|
|
348
269
|
// Imports — fail loudly if any compiled dist missing
|
|
349
|
-
let
|
|
270
|
+
let getNormaliser;
|
|
271
|
+
let sessionize, toTurnText, findDeltaCursor;
|
|
350
272
|
let normaliseSenderName, deriveConversationIdentity, deriveMessageContentHash;
|
|
273
|
+
let bindCanonicalSenders;
|
|
351
274
|
let getSession, classifyDocument, memoryIngest;
|
|
352
275
|
try {
|
|
353
|
-
({
|
|
354
|
-
|
|
355
|
-
|
|
356
|
-
|
|
357
|
-
|
|
358
|
-
|
|
359
|
-
|
|
360
|
-
deriveMessageContentHash,
|
|
361
|
-
} = await import(PARSE_EXPORT_PATH));
|
|
276
|
+
({ getNormaliser } = await import(NORMALISERS_PATH));
|
|
277
|
+
({ sessionize } = await import(join(PIPELINE_PATH, "sessionize.js")));
|
|
278
|
+
({ toTurnText } = await import(join(PIPELINE_PATH, "to-turn-text.js")));
|
|
279
|
+
({ findDeltaCursor } = await import(join(PIPELINE_PATH, "delta-cursor.js")));
|
|
280
|
+
({ normaliseSenderName, deriveConversationIdentity, deriveMessageContentHash } =
|
|
281
|
+
await import(join(PIPELINE_PATH, "derive-keys.js")));
|
|
282
|
+
({ bindCanonicalSenders } = await import(join(PIPELINE_PATH, "sender-bind.js")));
|
|
362
283
|
({ getSession } = await import(NEO4J_LIB_PATH));
|
|
363
284
|
({ classifyDocument } = await import(LLM_CLASSIFIER_PATH));
|
|
364
285
|
({ memoryIngest } = await import(MEMORY_INGEST_PATH));
|
|
@@ -369,32 +290,38 @@ async function main() {
|
|
|
369
290
|
});
|
|
370
291
|
}
|
|
371
292
|
|
|
372
|
-
//
|
|
373
|
-
const {
|
|
374
|
-
const archiveSha256 = await fileSha256(chatTxt);
|
|
375
|
-
const archiveSourceFile = basename(chatTxt);
|
|
376
|
-
log(
|
|
377
|
-
`start file=${archiveSourceFile} owner=${ownerElementId} participants=${participantElementIds.length} scope=${scope} accountId=${accountId} archiveSha256=${archiveSha256.slice(0, 12)} session-gap-hours=${sessionGapHours}`,
|
|
378
|
-
);
|
|
293
|
+
// 6a. Resolve source file
|
|
294
|
+
const { sourceFile, cleanup } = resolveSourceFile(archive, source);
|
|
379
295
|
|
|
380
|
-
//
|
|
381
|
-
let
|
|
296
|
+
// 6b. Run the source-specific normaliser
|
|
297
|
+
let normaliserResult;
|
|
382
298
|
try {
|
|
383
|
-
|
|
299
|
+
const normaliser = getNormaliser(source);
|
|
300
|
+
normaliserResult = await normaliser({
|
|
301
|
+
filePath: sourceFile,
|
|
302
|
+
accountId,
|
|
303
|
+
timezone,
|
|
304
|
+
opts: dateFormat ? { dateFormat } : undefined,
|
|
305
|
+
});
|
|
384
306
|
} catch (err) {
|
|
385
307
|
cleanup();
|
|
386
308
|
fail("parse", { reason: err instanceof Error ? err.message : String(err) });
|
|
387
309
|
}
|
|
388
|
-
const allLines =
|
|
310
|
+
const allLines = normaliserResult.parsedLines;
|
|
311
|
+
const archiveSha256 = normaliserResult.archiveSha256;
|
|
312
|
+
const archiveSourceFile = normaliserResult.archiveSourceFile;
|
|
313
|
+
log(
|
|
314
|
+
`source=${source} file=${archiveSourceFile} owner=${ownerElementId} participants=${participantElementIds.length} scope=${scope} accountId=${accountId} archiveSha256=${archiveSha256.slice(0, 12)} session-gap-hours=${sessionGapHours}`,
|
|
315
|
+
);
|
|
389
316
|
log(
|
|
390
|
-
`parsed lines=${
|
|
317
|
+
`parsed lines=${normaliserResult.counters.parsed} media-skipped=${normaliserResult.counters.mediaSkipped} system-skipped=${normaliserResult.counters.systemSkipped}`,
|
|
391
318
|
);
|
|
392
319
|
if (allLines.length === 0) {
|
|
393
320
|
cleanup();
|
|
394
321
|
fail("parse", { reason: "zero parsed lines after walking archive" });
|
|
395
322
|
}
|
|
396
323
|
|
|
397
|
-
//
|
|
324
|
+
// 6c. Bind canonical senders against the confirmed set
|
|
398
325
|
const distinctSenderNames = Array.from(new Set(allLines.map((l) => l.senderName)));
|
|
399
326
|
const senderHistogram = computeSenderHistogram(allLines);
|
|
400
327
|
let session = getSession();
|
|
@@ -405,19 +332,18 @@ async function main() {
|
|
|
405
332
|
ownerElementId,
|
|
406
333
|
participantElementIds,
|
|
407
334
|
senderNames: distinctSenderNames,
|
|
408
|
-
normaliseSenderName,
|
|
409
335
|
});
|
|
410
336
|
} catch (err) {
|
|
411
337
|
await session.close().catch(() => {});
|
|
412
338
|
cleanup();
|
|
413
339
|
if (err && err.userFacing) {
|
|
414
|
-
process.stderr.write(`[
|
|
340
|
+
process.stderr.write(`[conversation-archive] FAIL ${err.message}\n`);
|
|
415
341
|
process.exit(1);
|
|
416
342
|
}
|
|
417
343
|
fail("argv", { reason: err instanceof Error ? err.message : String(err) });
|
|
418
344
|
}
|
|
419
345
|
|
|
420
|
-
//
|
|
346
|
+
// 6d. Derive conversationIdentity and look up prior :ConversationArchive
|
|
421
347
|
const conversationIdentity = deriveConversationIdentity({
|
|
422
348
|
accountId,
|
|
423
349
|
participantElementIds: [ownerElementId, ...participantElementIds],
|
|
@@ -445,7 +371,7 @@ async function main() {
|
|
|
445
371
|
}
|
|
446
372
|
await session.close().catch(() => {});
|
|
447
373
|
|
|
448
|
-
//
|
|
374
|
+
// 6e. Compute deltaStart
|
|
449
375
|
let deltaStart = 0;
|
|
450
376
|
let deltaKind = "first-ingest";
|
|
451
377
|
if (priorArchive && priorArchive.lastHash) {
|
|
@@ -467,9 +393,10 @@ async function main() {
|
|
|
467
393
|
conversationIdentity,
|
|
468
394
|
archiveSha256,
|
|
469
395
|
archiveSourceFile,
|
|
470
|
-
|
|
471
|
-
|
|
472
|
-
|
|
396
|
+
source,
|
|
397
|
+
parsed: normaliserResult.counters.parsed,
|
|
398
|
+
mediaSkipped: normaliserResult.counters.mediaSkipped,
|
|
399
|
+
systemSkipped: normaliserResult.counters.systemSkipped,
|
|
473
400
|
delta: { kind: "empty-delta", deltaStart: allLines.length, deltaMessages: 0 },
|
|
474
401
|
sessions: 0,
|
|
475
402
|
chunks: 0,
|
|
@@ -491,22 +418,22 @@ async function main() {
|
|
|
491
418
|
`delta cursor=${priorArchive ? priorArchive.lastHash.slice(0, 12) : "(first-ingest)"} cursor-line=${deltaStart} delta-messages=${deltaLines.length}`,
|
|
492
419
|
);
|
|
493
420
|
|
|
494
|
-
//
|
|
421
|
+
// 6f. Sessionize delta
|
|
495
422
|
const sessions = sessionize(deltaLines, sessionGapHours);
|
|
496
423
|
log(
|
|
497
|
-
`sessionize
|
|
424
|
+
`sessionize source=${source} archiveSha256=${archiveSha256.slice(0, 12)} messages=${deltaLines.length} sessions=${sessions.length} gap-hours=${sessionGapHours}`,
|
|
498
425
|
);
|
|
499
426
|
|
|
500
|
-
//
|
|
427
|
+
// 6g. Classify each session via Haiku (mode='chat')
|
|
501
428
|
const allChunks = [];
|
|
502
429
|
const allKeywords = new Set();
|
|
503
430
|
for (const s of sessions) {
|
|
504
431
|
const sessionStart = Date.now();
|
|
505
|
-
const text =
|
|
432
|
+
const text = toTurnText(s);
|
|
506
433
|
const result = await classifyDocument({
|
|
507
434
|
accountId,
|
|
508
435
|
mode: "chat",
|
|
509
|
-
anchorDescription: `
|
|
436
|
+
anchorDescription: `Conversation transcript (${[ownerElementId, ...participantElementIds].length} participants, session ${s.index + 1} of ${sessions.length})`,
|
|
510
437
|
ontologyLabels: new Set([]),
|
|
511
438
|
naturalEdgeMap: "",
|
|
512
439
|
documentText: text,
|
|
@@ -525,15 +452,11 @@ async function main() {
|
|
|
525
452
|
reason: `session ${s.index} of ${s.messages.length} messages produced zero chunks (classifier-prompt regression)`,
|
|
526
453
|
});
|
|
527
454
|
}
|
|
528
|
-
for (const sec of result.output.sections)
|
|
529
|
-
|
|
530
|
-
}
|
|
531
|
-
for (const kw of result.output.documentKeywords) {
|
|
532
|
-
allKeywords.add(kw);
|
|
533
|
-
}
|
|
455
|
+
for (const sec of result.output.sections) allChunks.push(sec);
|
|
456
|
+
for (const kw of result.output.documentKeywords) allKeywords.add(kw);
|
|
534
457
|
}
|
|
535
458
|
|
|
536
|
-
//
|
|
459
|
+
// 6h. Compute lastIngestedMessageHash from the last delta line
|
|
537
460
|
const lastLine = deltaLines[deltaLines.length - 1];
|
|
538
461
|
const lastIngestedMessageHash = deriveMessageContentHash({
|
|
539
462
|
dateSent: lastLine.dateSent,
|
|
@@ -542,23 +465,23 @@ async function main() {
|
|
|
542
465
|
});
|
|
543
466
|
const lastIngestedMessageAt = lastLine.dateSent;
|
|
544
467
|
|
|
545
|
-
//
|
|
546
|
-
// two-three classifier summaries; the parent's summary is informational)
|
|
468
|
+
// 6i. Aggregate document-level summary across sessions
|
|
547
469
|
const documentSummary = sessions.length === 1
|
|
548
470
|
? `${deltaLines.length} messages in 1 session, ${allChunks.length} chunks.`
|
|
549
471
|
: `${deltaLines.length} messages in ${sessions.length} sessions, ${allChunks.length} chunks.`;
|
|
550
472
|
|
|
551
|
-
//
|
|
473
|
+
// 6j. Call memoryIngest with parentLabel='ConversationArchive'
|
|
552
474
|
let ingestResult;
|
|
553
475
|
const ingestStart = Date.now();
|
|
554
476
|
try {
|
|
555
477
|
ingestResult = await memoryIngest({
|
|
556
478
|
accountId,
|
|
557
|
-
attachmentId: conversationIdentity,
|
|
479
|
+
attachmentId: conversationIdentity,
|
|
558
480
|
parentLabel: "ConversationArchive",
|
|
481
|
+
source,
|
|
559
482
|
documentSummary,
|
|
560
483
|
anchorNodeId: ownerElementId,
|
|
561
|
-
anchorLabel: "AdminUser",
|
|
484
|
+
anchorLabel: "AdminUser",
|
|
562
485
|
sections: allChunks,
|
|
563
486
|
scope,
|
|
564
487
|
sessionId,
|
|
@@ -574,21 +497,22 @@ async function main() {
|
|
|
574
497
|
fail("memory-ingest", { reason: err instanceof Error ? err.message : String(err) });
|
|
575
498
|
}
|
|
576
499
|
log(
|
|
577
|
-
`file=${archiveSourceFile} conversationIdentity=${conversationIdentity.slice(0, 12)} archiveElementId=${ingestResult.documentNodeId} chunks-written=${ingestResult.sectionCount} next-edges=${ingestResult.edgeBreakdown.NEXT ?? 0} participants=${ingestResult.edgeBreakdown.PARTICIPANT_IN ?? 0} ms=${Date.now() - ingestStart}`,
|
|
500
|
+
`source=${source} file=${archiveSourceFile} conversationIdentity=${conversationIdentity.slice(0, 12)} archiveElementId=${ingestResult.documentNodeId} chunks-written=${ingestResult.sectionCount} next-edges=${ingestResult.edgeBreakdown.NEXT ?? 0} participants=${ingestResult.edgeBreakdown.PARTICIPANT_IN ?? 0} ms=${Date.now() - ingestStart}`,
|
|
578
501
|
);
|
|
579
502
|
|
|
580
503
|
cleanup();
|
|
581
504
|
const totalMs = Date.now() - startedMs;
|
|
582
|
-
log(`done conversationIdentity=${conversationIdentity.slice(0, 12)} total-ms=${totalMs} exit=0`);
|
|
505
|
+
log(`done source=${source} conversationIdentity=${conversationIdentity.slice(0, 12)} total-ms=${totalMs} exit=0`);
|
|
583
506
|
|
|
584
507
|
process.stdout.write(JSON.stringify({
|
|
585
508
|
archiveElementId: ingestResult.documentNodeId,
|
|
586
509
|
conversationIdentity,
|
|
587
510
|
archiveSha256,
|
|
588
511
|
archiveSourceFile,
|
|
589
|
-
|
|
590
|
-
|
|
591
|
-
|
|
512
|
+
source,
|
|
513
|
+
parsed: normaliserResult.counters.parsed,
|
|
514
|
+
mediaSkipped: normaliserResult.counters.mediaSkipped,
|
|
515
|
+
systemSkipped: normaliserResult.counters.systemSkipped,
|
|
592
516
|
delta: { kind: deltaKind, deltaStart, deltaMessages: deltaLines.length },
|
|
593
517
|
sessions: sessions.length,
|
|
594
518
|
chunks: ingestResult.sectionCount,
|
|
@@ -1,49 +1,55 @@
|
|
|
1
1
|
#!/usr/bin/env bash
|
|
2
2
|
# =============================================================================
|
|
3
|
-
#
|
|
4
|
-
# ingestion (Task
|
|
3
|
+
# conversation-archive-ingest.sh — single deterministic Bash entry for
|
|
4
|
+
# conversation-archive ingestion (Task 894 — supersedes whatsapp-ingest.sh).
|
|
5
5
|
#
|
|
6
|
-
#
|
|
7
|
-
#
|
|
8
|
-
#
|
|
9
|
-
#
|
|
6
|
+
# Source-agnostic: WhatsApp `_chat.txt`, Telegram, Signal, LinkedIn DMs, Zoom
|
|
7
|
+
# transcript, meeting minutes, iMessage, Slack — every source flows through
|
|
8
|
+
# the same pipeline with `--source` selecting the normaliser.
|
|
9
|
+
#
|
|
10
|
+
# Pipeline: normalise (per source) → bind canonical sender set →
|
|
11
|
+
# derive conversationIdentity → look up prior :ConversationArchive (delta
|
|
12
|
+
# cursor) → sessionize delta at gap-hours boundary → classify each session
|
|
13
|
+
# via Haiku (mode='chat') → memory-ingest with parentLabel='ConversationArchive'.
|
|
10
14
|
#
|
|
11
15
|
# Usage:
|
|
12
|
-
# bash
|
|
16
|
+
# bash conversation-archive-ingest.sh <archive-path>
|
|
17
|
+
# --source <whatsapp|telegram|signal|linkedin-messages|zoom-transcript|meeting-minutes|imessage|slack|other>
|
|
13
18
|
# --owner-element-id <id>
|
|
14
19
|
# --participant-person-ids <csv>
|
|
15
20
|
# --scope <admin|public>
|
|
16
21
|
# [--session-gap-hours <N>] (default 12)
|
|
17
22
|
# [--account-id <accountId>]
|
|
18
23
|
# [--timezone <iana-zone>]
|
|
19
|
-
# [--date-format <DD/MM/YY|MM/DD/YY|DD/MM/YYYY|MM/DD/YYYY>]
|
|
24
|
+
# [--date-format <DD/MM/YY|MM/DD/YY|DD/MM/YYYY|MM/DD/YYYY>] (whatsapp only)
|
|
20
25
|
#
|
|
21
26
|
# `--owner-element-id` + `--participant-person-ids` form the closed sender
|
|
22
27
|
# set; any parsed senderName outside that set LOUD-FAILs with `parser-miss`
|
|
23
|
-
# and exits non-zero.
|
|
24
|
-
# Section:Conversation rows bound the operator surface naturally.
|
|
28
|
+
# and exits non-zero.
|
|
25
29
|
#
|
|
26
30
|
# Exit 0 + JSON summary on stdout on success.
|
|
27
|
-
# Exit !0 + one [
|
|
31
|
+
# Exit !0 + one [conversation-archive] FAIL line on stderr on failure.
|
|
28
32
|
# =============================================================================
|
|
29
33
|
|
|
30
34
|
set -euo pipefail
|
|
31
35
|
|
|
32
36
|
arg_fail() {
|
|
33
37
|
local reason="$1"
|
|
34
|
-
echo "[
|
|
38
|
+
echo "[conversation-archive] FAIL phase=argv reason=\"${reason}\"" >&2
|
|
35
39
|
exit 1
|
|
36
40
|
}
|
|
37
41
|
|
|
38
42
|
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
|
|
39
43
|
PLATFORM_ROOT="$(cd "$SCRIPT_DIR/../../.." && pwd)"
|
|
40
|
-
INGEST_MJS="$SCRIPT_DIR/ingest.mjs"
|
|
44
|
+
INGEST_MJS="$SCRIPT_DIR/conversation-archive-ingest.mjs"
|
|
41
45
|
|
|
42
46
|
if [ ! -f "$INGEST_MJS" ]; then
|
|
43
|
-
arg_fail "ingest.mjs not found at $INGEST_MJS — run from a built install"
|
|
47
|
+
arg_fail "conversation-archive-ingest.mjs not found at $INGEST_MJS — run from a built install"
|
|
44
48
|
fi
|
|
45
49
|
|
|
46
50
|
ARCHIVE=""
|
|
51
|
+
HAS_SOURCE=0
|
|
52
|
+
SOURCE_VAL=""
|
|
47
53
|
HAS_OWNER=0
|
|
48
54
|
OWNER_VAL=""
|
|
49
55
|
HAS_PARTICIPANTS=0
|
|
@@ -56,10 +62,11 @@ i=0
|
|
|
56
62
|
while [ $i -lt ${#ARGS[@]} ]; do
|
|
57
63
|
a="${ARGS[$i]}"
|
|
58
64
|
case "$a" in
|
|
65
|
+
--source) HAS_SOURCE=1; SOURCE_VAL="${ARGS[$((i + 1))]:-}"; i=$((i + 2)); continue ;;
|
|
59
66
|
--owner-element-id) HAS_OWNER=1; OWNER_VAL="${ARGS[$((i + 1))]:-}"; i=$((i + 2)); continue ;;
|
|
60
67
|
--participant-person-ids) HAS_PARTICIPANTS=1; PARTICIPANTS_VAL="${ARGS[$((i + 1))]:-}"; i=$((i + 2)); continue ;;
|
|
61
68
|
--scope) HAS_SCOPE=1; SCOPE_VAL="${ARGS[$((i + 1))]:-}"; i=$((i + 2)); continue ;;
|
|
62
|
-
--session-gap-hours|--account-id|--timezone|--date-format) i=$((i + 2)); continue ;;
|
|
69
|
+
--session-gap-hours|--account-id|--timezone|--date-format|--session-id) i=$((i + 2)); continue ;;
|
|
63
70
|
--*) i=$((i + 2)); continue ;;
|
|
64
71
|
*)
|
|
65
72
|
if [ -z "$ARCHIVE" ]; then ARCHIVE="$a"; fi
|
|
@@ -69,10 +76,11 @@ while [ $i -lt ${#ARGS[@]} ]; do
|
|
|
69
76
|
esac
|
|
70
77
|
done
|
|
71
78
|
|
|
72
|
-
[ -n "$ARCHIVE" ]
|
|
73
|
-
[ "$
|
|
74
|
-
[ "$
|
|
75
|
-
[ "$
|
|
79
|
+
[ -n "$ARCHIVE" ] || arg_fail "missing positional <archive>"
|
|
80
|
+
[ "$HAS_SOURCE" -eq 1 ] && [ -n "$SOURCE_VAL" ] || arg_fail "missing --source (whatsapp|telegram|signal|linkedin-messages|zoom-transcript|meeting-minutes|imessage|slack|other)"
|
|
81
|
+
[ "$HAS_OWNER" -eq 1 ] && [ -n "$OWNER_VAL" ] || arg_fail "missing --owner-element-id (or empty value)"
|
|
82
|
+
[ "$HAS_PARTICIPANTS" -eq 1 ] && [ -n "$PARTICIPANTS_VAL" ] || arg_fail "missing --participant-person-ids (csv of operator-confirmed :Person/:AdminUser elementIds, owner excluded)"
|
|
83
|
+
[ "$HAS_SCOPE" -eq 1 ] && [ -n "$SCOPE_VAL" ] || arg_fail "missing --scope (or empty value)"
|
|
76
84
|
case "$SCOPE_VAL" in
|
|
77
85
|
admin|public) : ;;
|
|
78
86
|
*) arg_fail "invalid --scope \"$SCOPE_VAL\" (admin|public)" ;;
|