@rubytech/create-realagent 1.0.826 → 1.0.829
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/package.json +1 -1
- package/payload/platform/neo4j/schema.cypher +35 -2
- package/payload/platform/package.json +2 -2
- package/payload/platform/plugins/admin/hooks/__tests__/archive-ingest-surface-gate.test.sh +39 -54
- package/payload/platform/plugins/admin/hooks/archive-ingest-surface-gate.sh +26 -52
- package/payload/platform/plugins/admin/skills/onboarding/SKILL.md +7 -7
- package/payload/platform/plugins/docs/references/cloudflare.md +1 -1
- package/payload/platform/plugins/docs/references/plugins-guide.md +1 -1
- package/payload/platform/plugins/docs/references/troubleshooting.md +1 -0
- package/payload/platform/plugins/memory/PLUGIN.md +5 -5
- package/payload/platform/plugins/memory/mcp/dist/index.js +18 -253
- package/payload/platform/plugins/memory/mcp/dist/index.js.map +1 -1
- package/payload/platform/plugins/memory/mcp/dist/lib/__tests__/llm-classifier.test.js +51 -0
- package/payload/platform/plugins/memory/mcp/dist/lib/__tests__/llm-classifier.test.js.map +1 -1
- package/payload/platform/plugins/memory/mcp/dist/lib/__tests__/schema-validator.test.js +103 -0
- package/payload/platform/plugins/memory/mcp/dist/lib/__tests__/schema-validator.test.js.map +1 -1
- package/payload/platform/plugins/memory/mcp/dist/lib/llm-classifier.d.ts +19 -4
- package/payload/platform/plugins/memory/mcp/dist/lib/llm-classifier.d.ts.map +1 -1
- package/payload/platform/plugins/memory/mcp/dist/lib/llm-classifier.js +149 -56
- package/payload/platform/plugins/memory/mcp/dist/lib/llm-classifier.js.map +1 -1
- package/payload/platform/plugins/memory/mcp/dist/lib/schema-validator.d.ts +16 -1
- package/payload/platform/plugins/memory/mcp/dist/lib/schema-validator.d.ts.map +1 -1
- package/payload/platform/plugins/memory/mcp/dist/lib/schema-validator.js +12 -3
- package/payload/platform/plugins/memory/mcp/dist/lib/schema-validator.js.map +1 -1
- package/payload/platform/plugins/memory/mcp/dist/tools/__tests__/memory-archive-write.test.js +2 -138
- package/payload/platform/plugins/memory/mcp/dist/tools/__tests__/memory-archive-write.test.js.map +1 -1
- package/payload/platform/plugins/memory/mcp/dist/tools/__tests__/memory-ingest.test.d.ts +2 -0
- package/payload/platform/plugins/memory/mcp/dist/tools/__tests__/memory-ingest.test.d.ts.map +1 -0
- package/payload/platform/plugins/memory/mcp/dist/tools/__tests__/memory-ingest.test.js +66 -0
- package/payload/platform/plugins/memory/mcp/dist/tools/__tests__/memory-ingest.test.js.map +1 -0
- package/payload/platform/plugins/memory/mcp/dist/tools/__tests__/profile-update-personfields-open.test.d.ts +2 -0
- package/payload/platform/plugins/memory/mcp/dist/tools/__tests__/profile-update-personfields-open.test.d.ts.map +1 -0
- package/payload/platform/plugins/memory/mcp/dist/tools/__tests__/profile-update-personfields-open.test.js +148 -0
- package/payload/platform/plugins/memory/mcp/dist/tools/__tests__/profile-update-personfields-open.test.js.map +1 -0
- package/payload/platform/plugins/memory/mcp/dist/tools/memory-archive-write.d.ts +1 -64
- package/payload/platform/plugins/memory/mcp/dist/tools/memory-archive-write.d.ts.map +1 -1
- package/payload/platform/plugins/memory/mcp/dist/tools/memory-archive-write.js +6 -336
- package/payload/platform/plugins/memory/mcp/dist/tools/memory-archive-write.js.map +1 -1
- package/payload/platform/plugins/memory/mcp/dist/tools/memory-ingest.d.ts +30 -0
- package/payload/platform/plugins/memory/mcp/dist/tools/memory-ingest.d.ts.map +1 -1
- package/payload/platform/plugins/memory/mcp/dist/tools/memory-ingest.js +231 -0
- package/payload/platform/plugins/memory/mcp/dist/tools/memory-ingest.js.map +1 -1
- package/payload/platform/plugins/memory/mcp/dist/tools/profile-update.d.ts +21 -17
- package/payload/platform/plugins/memory/mcp/dist/tools/profile-update.d.ts.map +1 -1
- package/payload/platform/plugins/memory/mcp/dist/tools/profile-update.js +77 -37
- package/payload/platform/plugins/memory/mcp/dist/tools/profile-update.js.map +1 -1
- package/payload/platform/plugins/memory/references/schema-base.md +7 -2
- package/payload/platform/plugins/memory/skills/document-ingest/SKILL.md +54 -4
- package/payload/platform/plugins/whatsapp/PLUGIN.md +1 -1
- package/payload/platform/plugins/whatsapp-import/lib/dist/delta-cursor.d.ts +18 -0
- package/payload/platform/plugins/whatsapp-import/lib/dist/delta-cursor.d.ts.map +1 -0
- package/payload/platform/plugins/whatsapp-import/lib/dist/delta-cursor.js +31 -0
- package/payload/platform/plugins/whatsapp-import/lib/dist/delta-cursor.js.map +1 -0
- package/payload/platform/plugins/whatsapp-import/lib/dist/derive-keys.d.ts +27 -12
- package/payload/platform/plugins/whatsapp-import/lib/dist/derive-keys.d.ts.map +1 -1
- package/payload/platform/plugins/whatsapp-import/lib/dist/derive-keys.js +40 -20
- package/payload/platform/plugins/whatsapp-import/lib/dist/derive-keys.js.map +1 -1
- package/payload/platform/plugins/whatsapp-import/lib/dist/index.d.ts +7 -4
- package/payload/platform/plugins/whatsapp-import/lib/dist/index.d.ts.map +1 -1
- package/payload/platform/plugins/whatsapp-import/lib/dist/index.js +9 -6
- package/payload/platform/plugins/whatsapp-import/lib/dist/index.js.map +1 -1
- package/payload/platform/plugins/whatsapp-import/lib/dist/sessionize.d.ts +25 -0
- package/payload/platform/plugins/whatsapp-import/lib/dist/sessionize.d.ts.map +1 -0
- package/payload/platform/plugins/whatsapp-import/lib/dist/sessionize.js +48 -0
- package/payload/platform/plugins/whatsapp-import/lib/dist/sessionize.js.map +1 -0
- package/payload/platform/plugins/whatsapp-import/lib/dist/to-classifier-input.d.ts +3 -0
- package/payload/platform/plugins/whatsapp-import/lib/dist/to-classifier-input.d.ts.map +1 -0
- package/payload/platform/plugins/whatsapp-import/lib/dist/to-classifier-input.js +47 -0
- package/payload/platform/plugins/whatsapp-import/lib/dist/to-classifier-input.js.map +1 -0
- package/payload/platform/scripts/seed-neo4j.sh +15 -14
- package/payload/platform/templates/specialists/agents/database-operator.md +10 -17
- package/payload/server/chunk-CUSH3UXP.js +2305 -0
- package/payload/server/chunk-IWNDVGKT.js +10077 -0
- package/payload/server/chunk-KC7NUABI.js +654 -0
- package/payload/server/chunk-T2OPNP3L.js +654 -0
- package/payload/server/chunk-WUVXPZIV.js +1116 -0
- package/payload/server/client-pool-3TM3SRIA.js +32 -0
- package/payload/server/cloudflare-task-tracker-4NIODMGL.js +19 -0
- package/payload/server/cloudflare-task-tracker-CR6TL4VL.js +19 -0
- package/payload/server/maxy-edge.js +3 -3
- package/payload/server/neo4j-migrations-XTQ4WEV6.js +428 -0
- package/payload/server/public/assets/{admin-DOkUspG1.js → admin-BNwPsMhJ.js} +2 -2
- package/payload/server/public/assets/{graph-LLMJa4Ch.js → graph-N_Bw-8oT.js} +1 -1
- package/payload/server/public/assets/{page-DoaF3DB0.js → page-BKLGP-th.js} +1 -1
- package/payload/server/public/graph.html +2 -2
- package/payload/server/public/index.html +2 -2
- package/payload/server/server.js +281 -168
- package/payload/platform/plugins/whatsapp-import/PLUGIN.md +0 -46
- package/payload/platform/plugins/whatsapp-import/bin/ingest.mjs +0 -670
- package/payload/platform/plugins/whatsapp-import/bin/whatsapp-ingest.sh +0 -131
- package/payload/platform/plugins/whatsapp-import/lib/src/__tests__/filter-gate.test.ts +0 -172
- package/payload/platform/plugins/whatsapp-import/lib/src/__tests__/ingest-idempotence.test.ts +0 -141
- package/payload/platform/plugins/whatsapp-import/lib/src/__tests__/parse-export-lrm.test.ts +0 -83
- package/payload/platform/plugins/whatsapp-import/lib/src/__tests__/parse-export.test.ts +0 -678
- package/payload/platform/plugins/whatsapp-import/lib/src/derive-keys.ts +0 -59
- package/payload/platform/plugins/whatsapp-import/lib/src/filter.ts +0 -136
- package/payload/platform/plugins/whatsapp-import/lib/src/index.ts +0 -19
- package/payload/platform/plugins/whatsapp-import/lib/src/parse-export.ts +0 -471
- package/payload/platform/plugins/whatsapp-import/lib/tsconfig.json +0 -9
- package/payload/platform/plugins/whatsapp-import/lib/vitest.config.ts +0 -9
- package/payload/platform/plugins/whatsapp-import/skills/whatsapp-import/SKILL.md +0 -131
- package/payload/platform/plugins/whatsapp-import/skills/whatsapp-import/references/export-parse.md +0 -109
- package/payload/platform/plugins/whatsapp-import/skills/whatsapp-import-enrich/SKILL.md +0 -333
|
@@ -1,131 +0,0 @@
|
|
|
1
|
-
#!/usr/bin/env bash
|
|
2
|
-
# =============================================================================
|
|
3
|
-
# whatsapp-ingest.sh — single deterministic Bash entry for WhatsApp archive
|
|
4
|
-
# ingestion (Task 855 / Task 871). Thin wrapper: arg-validate, resolve env,
|
|
5
|
-
# invoke ingest.mjs in-process. The script is the database-operator
|
|
6
|
-
# subagent's only operator-facing handle on the parse → filter → archive-write
|
|
7
|
-
# pipeline. Phase 1 has NO LLM. The Haiku insight pass is Phase 2 — invoked
|
|
8
|
-
# consciously via `mcp__memory__whatsapp-export-insight-pass`.
|
|
9
|
-
#
|
|
10
|
-
# Usage:
|
|
11
|
-
# bash whatsapp-ingest.sh <archive.zip|dir|_chat.txt>
|
|
12
|
-
# --owner-element-id <id>
|
|
13
|
-
# --subject-person-id <id>
|
|
14
|
-
# --scope <admin|public>
|
|
15
|
-
# --filter <all|senders=<csv>|date-range=<isoFrom>..<isoTo>>
|
|
16
|
-
# [--account-id <accountId>]
|
|
17
|
-
# [--timezone <iana-zone>]
|
|
18
|
-
# [--date-format <DD/MM/YY|MM/DD/YY|DD/MM/YYYY|MM/DD/YYYY>]
|
|
19
|
-
#
|
|
20
|
-
# `--subject-person-id` is required (Task 887 §A0). DM scope: the
|
|
21
|
-
# operator-confirmed third-party :Person elementId from the preview
|
|
22
|
-
# histogram. Owner + subject form the closed sender set; any parsed
|
|
23
|
-
# senderName outside that set LOUD-FAILs with `parser-miss` and exits
|
|
24
|
-
# non-zero — bounds writer cardinality to the deterministic preview output.
|
|
25
|
-
#
|
|
26
|
-
# `--filter` is mandatory (Task 871). Forms:
|
|
27
|
-
# all — write every parsed row
|
|
28
|
-
# senders=Alice,Bob Carter — keep rows whose senderName ∈ csv
|
|
29
|
-
# date-range=2024-01-01..2024-06-30
|
|
30
|
-
# — keep rows whose dateSent falls inside
|
|
31
|
-
# the inclusive range (date-only or full
|
|
32
|
-
# ISO 8601 endpoints both accepted)
|
|
33
|
-
#
|
|
34
|
-
# Exit 0 + JSON summary on stdout on success.
|
|
35
|
-
# Exit !0 + one [whatsapp-ingest] FAIL line on stderr on failure.
|
|
36
|
-
# =============================================================================
|
|
37
|
-
|
|
38
|
-
set -euo pipefail
|
|
39
|
-
|
|
40
|
-
# Pre-flight arg check — emit the same single-line FAIL shape ingest.mjs uses
|
|
41
|
-
# so the operator's grep pattern catches both layers uniformly.
|
|
42
|
-
arg_fail() {
|
|
43
|
-
local reason="$1"
|
|
44
|
-
echo "[whatsapp-ingest] FAIL phase=argv reason=\"${reason}\"" >&2
|
|
45
|
-
exit 1
|
|
46
|
-
}
|
|
47
|
-
|
|
48
|
-
# Locate platform root from this script's location:
|
|
49
|
-
# platform/plugins/whatsapp-import/bin/ → ../../.. = platform/
|
|
50
|
-
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
|
|
51
|
-
PLATFORM_ROOT="$(cd "$SCRIPT_DIR/../../.." && pwd)"
|
|
52
|
-
INGEST_MJS="$SCRIPT_DIR/ingest.mjs"
|
|
53
|
-
|
|
54
|
-
if [ ! -f "$INGEST_MJS" ]; then
|
|
55
|
-
arg_fail "ingest.mjs not found at $INGEST_MJS — run from a built install"
|
|
56
|
-
fi
|
|
57
|
-
|
|
58
|
-
# Validate required flags up-front — gives a cleaner pre-flight error than
|
|
59
|
-
# the node-side throw and keeps the failure surface single-line. argv
|
|
60
|
-
# validation runs before env resolution so a wrong invocation fails on the
|
|
61
|
-
# wrong invocation, not on a missing password.
|
|
62
|
-
ARCHIVE=""
|
|
63
|
-
OWNER_VAL=""
|
|
64
|
-
SUBJECT_VAL=""
|
|
65
|
-
SCOPE_VAL=""
|
|
66
|
-
FILTER_VAL=""
|
|
67
|
-
HAS_OWNER=0
|
|
68
|
-
HAS_SUBJECT=0
|
|
69
|
-
HAS_SCOPE=0
|
|
70
|
-
HAS_FILTER=0
|
|
71
|
-
|
|
72
|
-
ARGS=("$@")
|
|
73
|
-
i=0
|
|
74
|
-
while [ $i -lt ${#ARGS[@]} ]; do
|
|
75
|
-
a="${ARGS[$i]}"
|
|
76
|
-
case "$a" in
|
|
77
|
-
--owner-element-id) HAS_OWNER=1; OWNER_VAL="${ARGS[$((i + 1))]:-}"; i=$((i + 2)); continue ;;
|
|
78
|
-
--subject-person-id) HAS_SUBJECT=1; SUBJECT_VAL="${ARGS[$((i + 1))]:-}"; i=$((i + 2)); continue ;;
|
|
79
|
-
--scope) HAS_SCOPE=1; SCOPE_VAL="${ARGS[$((i + 1))]:-}"; i=$((i + 2)); continue ;;
|
|
80
|
-
--filter) HAS_FILTER=1; FILTER_VAL="${ARGS[$((i + 1))]:-}"; i=$((i + 2)); continue ;;
|
|
81
|
-
--account-id|--timezone|--date-format) i=$((i + 2)); continue ;;
|
|
82
|
-
--*) i=$((i + 2)); continue ;;
|
|
83
|
-
*)
|
|
84
|
-
if [ -z "$ARCHIVE" ]; then ARCHIVE="$a"; fi
|
|
85
|
-
i=$((i + 1))
|
|
86
|
-
continue
|
|
87
|
-
;;
|
|
88
|
-
esac
|
|
89
|
-
done
|
|
90
|
-
|
|
91
|
-
[ -n "$ARCHIVE" ] || arg_fail "missing positional <archive>"
|
|
92
|
-
[ "$HAS_OWNER" -eq 1 ] && [ -n "$OWNER_VAL" ] || arg_fail "missing --owner-element-id (or empty value)"
|
|
93
|
-
[ "$HAS_SUBJECT" -eq 1 ] && [ -n "$SUBJECT_VAL" ] || arg_fail "missing --subject-person-id (Task 887: operator-confirmed third-party :Person elementId from preview histogram)"
|
|
94
|
-
[ "$HAS_SCOPE" -eq 1 ] && [ -n "$SCOPE_VAL" ] || arg_fail "missing --scope (or empty value)"
|
|
95
|
-
case "$SCOPE_VAL" in
|
|
96
|
-
admin|public) : ;;
|
|
97
|
-
*) arg_fail "invalid --scope \"$SCOPE_VAL\" (admin|public)" ;;
|
|
98
|
-
esac
|
|
99
|
-
if [ "$HAS_FILTER" -ne 1 ] || [ -z "$FILTER_VAL" ]; then
|
|
100
|
-
# Mirror ingest.mjs's pinned LOUD-FAIL line so a single grep covers both
|
|
101
|
-
# layers — the operator's runbook recipe is `grep '\[whatsapp-ingest\] FAIL filter-required'`.
|
|
102
|
-
echo "[whatsapp-ingest] FAIL filter-required reason=\"bulk-archive-gate (Task 871) — operator must specify --filter (one of all, senders=<csv>, date-range=<isoFrom>..<isoTo>)\"" >&2
|
|
103
|
-
arg_fail "missing --filter (one of all, senders=<csv>, date-range=<isoFrom>..<isoTo>)"
|
|
104
|
-
fi
|
|
105
|
-
|
|
106
|
-
# Lift NEO4J_PASSWORD from the install's config file when env doesn't carry it
|
|
107
|
-
# (e.g. operator running directly from a shell rather than via the platform
|
|
108
|
-
# server). Mirror seed-neo4j.sh precedence: env var wins, then the file.
|
|
109
|
-
if [ -z "${NEO4J_PASSWORD:-}" ]; then
|
|
110
|
-
NEO4J_PASSWORD_FILE="$PLATFORM_ROOT/config/.neo4j-password"
|
|
111
|
-
if [ -f "$NEO4J_PASSWORD_FILE" ]; then
|
|
112
|
-
NEO4J_PASSWORD="$(cat "$NEO4J_PASSWORD_FILE")"
|
|
113
|
-
export NEO4J_PASSWORD
|
|
114
|
-
else
|
|
115
|
-
arg_fail "NEO4J_PASSWORD not in env and $NEO4J_PASSWORD_FILE not found"
|
|
116
|
-
fi
|
|
117
|
-
fi
|
|
118
|
-
|
|
119
|
-
# NEO4J_URI is hard-required (Task 787). The wrapper does not fabricate a
|
|
120
|
-
# default — the platform server sets this at spawn time and the operator can
|
|
121
|
-
# pass it explicitly when running standalone.
|
|
122
|
-
if [ -z "${NEO4J_URI:-}" ]; then
|
|
123
|
-
arg_fail "NEO4J_URI not set (no default — set in env)"
|
|
124
|
-
fi
|
|
125
|
-
|
|
126
|
-
export NEO4J_USER="${NEO4J_USER:-neo4j}"
|
|
127
|
-
export MAXY_PLATFORM_ROOT="$PLATFORM_ROOT"
|
|
128
|
-
|
|
129
|
-
# Hand off to node. ingest.mjs handles all observability; the wrapper does
|
|
130
|
-
# not double-log. Forward exit code verbatim.
|
|
131
|
-
exec node "$INGEST_MJS" "$@"
|
|
@@ -1,172 +0,0 @@
|
|
|
1
|
-
import { describe, it, expect } from "vitest";
|
|
2
|
-
import { spawnSync } from "node:child_process";
|
|
3
|
-
import { mkdtempSync, writeFileSync, rmSync } from "node:fs";
|
|
4
|
-
import { tmpdir } from "node:os";
|
|
5
|
-
import { join, resolve, dirname } from "node:path";
|
|
6
|
-
import { fileURLToPath } from "node:url";
|
|
7
|
-
|
|
8
|
-
import { parseFilterArg, applyFilter } from "../filter.js";
|
|
9
|
-
import type { ParsedLine } from "../parse-export.js";
|
|
10
|
-
|
|
11
|
-
// ---------------------------------------------------------------------------
|
|
12
|
-
// filter-gate — Task 871 contract: --filter is mandatory, three forms.
|
|
13
|
-
//
|
|
14
|
-
// The unit tests cover the helper module's pure-function surface (parse +
|
|
15
|
-
// apply). The integration test spawns ingest.mjs as a subprocess and
|
|
16
|
-
// asserts the no-flag invocation exits non-zero with the LOUD-FAIL line
|
|
17
|
-
// (`[whatsapp-ingest] FAIL phase=argv reason="--filter is required …"`)
|
|
18
|
-
// before any Neo4j connection — argv parsing fires before imports/sessions.
|
|
19
|
-
// ---------------------------------------------------------------------------
|
|
20
|
-
|
|
21
|
-
const __dirname = dirname(fileURLToPath(import.meta.url));
|
|
22
|
-
// src/__tests__/ → ../../bin/ingest.mjs (up to src/, up to lib/, up to plugin
|
|
23
|
-
// root, then bin/).
|
|
24
|
-
const INGEST_MJS = resolve(__dirname, "..", "..", "..", "bin", "ingest.mjs");
|
|
25
|
-
|
|
26
|
-
function lines(parsed: ParsedLine[]): string[] {
|
|
27
|
-
return parsed.map((p) => `${p.senderName}@${p.dateSent}:${p.body}`);
|
|
28
|
-
}
|
|
29
|
-
|
|
30
|
-
const SAMPLE: ParsedLine[] = [
|
|
31
|
-
{ senderName: "Alice", dateSent: "2024-01-15T09:00:00+00:00", body: "morning", sequenceIndex: 0 },
|
|
32
|
-
{ senderName: "Bob", dateSent: "2024-01-15T09:05:00+00:00", body: "hi", sequenceIndex: 1 },
|
|
33
|
-
{ senderName: "Alice", dateSent: "2024-03-20T11:00:00+00:00", body: "lunch?", sequenceIndex: 2 },
|
|
34
|
-
{ senderName: "Carol", dateSent: "2024-06-30T23:59:59+00:00: ", body: "bye", sequenceIndex: 3 },
|
|
35
|
-
{ senderName: "Bob", dateSent: "2024-07-01T00:00:01+00:00", body: "next", sequenceIndex: 4 },
|
|
36
|
-
];
|
|
37
|
-
|
|
38
|
-
// Repair the malformed dateSent fixture (extra ':' suffix would Date.parse to
|
|
39
|
-
// NaN). Keep this in code, not in the const above, so the fixture reads
|
|
40
|
-
// natural while we still hold a strict tuple.
|
|
41
|
-
SAMPLE[3] = { ...SAMPLE[3], dateSent: "2024-06-30T23:59:59+00:00" };
|
|
42
|
-
|
|
43
|
-
describe("parseFilterArg — happy path", () => {
|
|
44
|
-
it('parses --filter all', () => {
|
|
45
|
-
expect(parseFilterArg("all")).toEqual({ kind: "all" });
|
|
46
|
-
});
|
|
47
|
-
|
|
48
|
-
it('parses senders=<csv> with whitespace tolerance', () => {
|
|
49
|
-
expect(parseFilterArg("senders=Alice, Bob Carter , Carol")).toEqual({
|
|
50
|
-
kind: "senders",
|
|
51
|
-
senders: ["Alice", "Bob Carter", "Carol"],
|
|
52
|
-
});
|
|
53
|
-
});
|
|
54
|
-
|
|
55
|
-
it('parses date-range=<from>..<to>', () => {
|
|
56
|
-
expect(parseFilterArg("date-range=2024-01-01..2024-06-30")).toEqual({
|
|
57
|
-
kind: "date-range",
|
|
58
|
-
fromIso: "2024-01-01",
|
|
59
|
-
toIso: "2024-06-30",
|
|
60
|
-
});
|
|
61
|
-
});
|
|
62
|
-
});
|
|
63
|
-
|
|
64
|
-
describe("parseFilterArg — LOUD-FAIL on malformed input", () => {
|
|
65
|
-
it("rejects undefined / empty", () => {
|
|
66
|
-
expect(() => parseFilterArg(undefined)).toThrow(/--filter is required/);
|
|
67
|
-
expect(() => parseFilterArg("")).toThrow(/--filter is required/);
|
|
68
|
-
expect(() => parseFilterArg(" ")).toThrow(/--filter is required/);
|
|
69
|
-
});
|
|
70
|
-
|
|
71
|
-
it("rejects unknown form", () => {
|
|
72
|
-
expect(() => parseFilterArg("everyone")).toThrow(/unrecognised form/);
|
|
73
|
-
expect(() => parseFilterArg("nope=yes")).toThrow(/unrecognised form/);
|
|
74
|
-
});
|
|
75
|
-
|
|
76
|
-
it("rejects senders= with no names", () => {
|
|
77
|
-
expect(() => parseFilterArg("senders=")).toThrow(/at least one/);
|
|
78
|
-
expect(() => parseFilterArg("senders=,,,")).toThrow(/at least one/);
|
|
79
|
-
});
|
|
80
|
-
|
|
81
|
-
it("rejects date-range with malformed shape or unparseable ISO", () => {
|
|
82
|
-
expect(() => parseFilterArg("date-range=2024-01-01")).toThrow(/<isoFrom>\.\.<isoTo>/);
|
|
83
|
-
expect(() => parseFilterArg("date-range=..2024-06-30")).toThrow(/both endpoints/);
|
|
84
|
-
expect(() => parseFilterArg("date-range=2024-01-01..")).toThrow(/both endpoints/);
|
|
85
|
-
expect(() => parseFilterArg("date-range=not-a-date..2024-06-30")).toThrow(/not parseable/);
|
|
86
|
-
expect(() => parseFilterArg("date-range=2024-06-30..2024-01-01")).toThrow(/later than/);
|
|
87
|
-
});
|
|
88
|
-
});
|
|
89
|
-
|
|
90
|
-
describe("applyFilter — semantics", () => {
|
|
91
|
-
it("--filter all keeps every row, preserves the parser's original sequenceIndex", () => {
|
|
92
|
-
const out = applyFilter(SAMPLE, parseFilterArg("all"));
|
|
93
|
-
expect(out.length).toBe(SAMPLE.length);
|
|
94
|
-
expect(out.map((l) => l.sequenceIndex)).toEqual([0, 1, 2, 3, 4]);
|
|
95
|
-
});
|
|
96
|
-
|
|
97
|
-
it("--filter senders=Alice,Bob keeps only those senders, sequenceIndex stays as the parser-stamped value (Carol's index 3 drops out)", () => {
|
|
98
|
-
const out = applyFilter(SAMPLE, parseFilterArg("senders=Alice,Bob"));
|
|
99
|
-
expect(lines(out)).toEqual([
|
|
100
|
-
"Alice@2024-01-15T09:00:00+00:00:morning",
|
|
101
|
-
"Bob@2024-01-15T09:05:00+00:00:hi",
|
|
102
|
-
"Alice@2024-03-20T11:00:00+00:00:lunch?",
|
|
103
|
-
"Bob@2024-07-01T00:00:01+00:00:next",
|
|
104
|
-
]);
|
|
105
|
-
expect(out.map((l) => l.sequenceIndex)).toEqual([0, 1, 2, 4]);
|
|
106
|
-
});
|
|
107
|
-
|
|
108
|
-
it("--filter date-range clips inclusively on both ends", () => {
|
|
109
|
-
const out = applyFilter(SAMPLE, parseFilterArg("date-range=2024-01-01..2024-06-30"));
|
|
110
|
-
// 2024-06-30T23:59:59 IS inclusive; 2024-07-01 is NOT.
|
|
111
|
-
expect(out.map((l) => `${l.senderName}/${l.dateSent}`)).toEqual([
|
|
112
|
-
"Alice/2024-01-15T09:00:00+00:00",
|
|
113
|
-
"Bob/2024-01-15T09:05:00+00:00",
|
|
114
|
-
"Alice/2024-03-20T11:00:00+00:00",
|
|
115
|
-
"Carol/2024-06-30T23:59:59+00:00",
|
|
116
|
-
]);
|
|
117
|
-
});
|
|
118
|
-
|
|
119
|
-
it("--filter date-range with full ISO 8601 endpoints", () => {
|
|
120
|
-
const out = applyFilter(
|
|
121
|
-
SAMPLE,
|
|
122
|
-
parseFilterArg("date-range=2024-03-01T00:00:00Z..2024-06-15T00:00:00Z"),
|
|
123
|
-
);
|
|
124
|
-
expect(out).toHaveLength(1);
|
|
125
|
-
expect(out[0].senderName).toBe("Alice");
|
|
126
|
-
expect(out[0].body).toBe("lunch?");
|
|
127
|
-
});
|
|
128
|
-
|
|
129
|
-
it("an unmatched senders filter returns []", () => {
|
|
130
|
-
const out = applyFilter(SAMPLE, parseFilterArg("senders=Nobody"));
|
|
131
|
-
expect(out).toEqual([]);
|
|
132
|
-
});
|
|
133
|
-
});
|
|
134
|
-
|
|
135
|
-
describe("ingest.mjs — missing --filter exits non-zero with LOUD-FAIL line", () => {
|
|
136
|
-
it("emits [whatsapp-ingest] FAIL phase=argv reason=\"--filter required ...\" before touching Neo4j", () => {
|
|
137
|
-
// Write a stub _chat.txt; argv parse fires first and rejects the call
|
|
138
|
-
// before resolveChatTxt/Neo4j connection. We do NOT need a real archive
|
|
139
|
-
// to test the gate.
|
|
140
|
-
const work = mkdtempSync(join(tmpdir(), "filter-gate-"));
|
|
141
|
-
try {
|
|
142
|
-
const stub = join(work, "_chat.txt");
|
|
143
|
-
writeFileSync(stub, "[14/03/26, 10:15:23] Joel: stub\n");
|
|
144
|
-
|
|
145
|
-
const res = spawnSync(
|
|
146
|
-
"node",
|
|
147
|
-
[
|
|
148
|
-
INGEST_MJS,
|
|
149
|
-
stub,
|
|
150
|
-
"--owner-element-id",
|
|
151
|
-
"stub-owner",
|
|
152
|
-
"--subject-person-id",
|
|
153
|
-
"stub-subject",
|
|
154
|
-
"--scope",
|
|
155
|
-
"admin",
|
|
156
|
-
],
|
|
157
|
-
{ encoding: "utf8" },
|
|
158
|
-
);
|
|
159
|
-
|
|
160
|
-
expect(res.status).not.toBe(0);
|
|
161
|
-
const stderr = res.stderr ?? "";
|
|
162
|
-
expect(stderr).toMatch(/\[whatsapp-ingest\] FAIL phase=argv/);
|
|
163
|
-
expect(stderr).toMatch(/reason="--filter is required/);
|
|
164
|
-
// Confirm the gate fired BEFORE any Neo4j or import work — no
|
|
165
|
-
// [whatsapp-ingest] start line, no archive-write log line.
|
|
166
|
-
expect(stderr).not.toMatch(/\[whatsapp-ingest\] start /);
|
|
167
|
-
expect(stderr).not.toMatch(/\[memory-archive-write\]/);
|
|
168
|
-
} finally {
|
|
169
|
-
rmSync(work, { recursive: true, force: true });
|
|
170
|
-
}
|
|
171
|
-
});
|
|
172
|
-
});
|
package/payload/platform/plugins/whatsapp-import/lib/src/__tests__/ingest-idempotence.test.ts
DELETED
|
@@ -1,141 +0,0 @@
|
|
|
1
|
-
import { describe, it, expect } from "vitest";
|
|
2
|
-
import {
|
|
3
|
-
normaliseSenderName,
|
|
4
|
-
sha256Hex,
|
|
5
|
-
deriveMessageId,
|
|
6
|
-
observationContentHash,
|
|
7
|
-
} from "../derive-keys.js";
|
|
8
|
-
|
|
9
|
-
describe("normaliseSenderName", () => {
|
|
10
|
-
it("returns NFKC-trim-lower form", () => {
|
|
11
|
-
expect(normaliseSenderName(" Adam Mackay ")).toBe("adam mackay");
|
|
12
|
-
});
|
|
13
|
-
|
|
14
|
-
it("collapses NFKC equivalent forms (composed vs decomposed accents)", () => {
|
|
15
|
-
const composed = "Adám";
|
|
16
|
-
const decomposed = "Adám";
|
|
17
|
-
expect(normaliseSenderName(composed)).toBe(normaliseSenderName(decomposed));
|
|
18
|
-
});
|
|
19
|
-
|
|
20
|
-
it("collapses full-width characters to ASCII via NFKC", () => {
|
|
21
|
-
expect(normaliseSenderName("Adam")).toBe("adam");
|
|
22
|
-
});
|
|
23
|
-
|
|
24
|
-
it("returns empty string for empty input without throwing", () => {
|
|
25
|
-
expect(normaliseSenderName("")).toBe("");
|
|
26
|
-
});
|
|
27
|
-
});
|
|
28
|
-
|
|
29
|
-
describe("sha256Hex", () => {
|
|
30
|
-
it("matches the canonical sha256 of an empty string", () => {
|
|
31
|
-
expect(sha256Hex("")).toBe(
|
|
32
|
-
"e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855",
|
|
33
|
-
);
|
|
34
|
-
});
|
|
35
|
-
|
|
36
|
-
it("produces a deterministic hex digest", () => {
|
|
37
|
-
expect(sha256Hex("hello")).toBe(sha256Hex("hello"));
|
|
38
|
-
expect(sha256Hex("hello")).not.toBe(sha256Hex("world"));
|
|
39
|
-
});
|
|
40
|
-
});
|
|
41
|
-
|
|
42
|
-
describe("deriveMessageId", () => {
|
|
43
|
-
const baseInputs = {
|
|
44
|
-
conversationSha256: "abc123",
|
|
45
|
-
dateSent: "2026-03-14T10:15:23+00:00",
|
|
46
|
-
senderName: "Adam Mackay",
|
|
47
|
-
body: "Hello there",
|
|
48
|
-
};
|
|
49
|
-
|
|
50
|
-
it("produces a stable id for identical inputs", () => {
|
|
51
|
-
const id1 = deriveMessageId(baseInputs);
|
|
52
|
-
const id2 = deriveMessageId({ ...baseInputs });
|
|
53
|
-
expect(id1).toBe(id2);
|
|
54
|
-
});
|
|
55
|
-
|
|
56
|
-
it("collapses identical (sender, dateSent, body) tuples to one id under NFKC-trim-lower (correct for export duplicates)", () => {
|
|
57
|
-
const id1 = deriveMessageId(baseInputs);
|
|
58
|
-
const id2 = deriveMessageId({ ...baseInputs, senderName: " ADAM Mackay " });
|
|
59
|
-
expect(id1).toBe(id2);
|
|
60
|
-
});
|
|
61
|
-
|
|
62
|
-
it("changes when the body differs", () => {
|
|
63
|
-
const id1 = deriveMessageId(baseInputs);
|
|
64
|
-
const id2 = deriveMessageId({ ...baseInputs, body: "Hello there!" });
|
|
65
|
-
expect(id1).not.toBe(id2);
|
|
66
|
-
});
|
|
67
|
-
|
|
68
|
-
it("changes when the sender differs (after normalisation)", () => {
|
|
69
|
-
const id1 = deriveMessageId(baseInputs);
|
|
70
|
-
const id2 = deriveMessageId({ ...baseInputs, senderName: "Joel" });
|
|
71
|
-
expect(id1).not.toBe(id2);
|
|
72
|
-
});
|
|
73
|
-
|
|
74
|
-
it("changes when the dateSent differs", () => {
|
|
75
|
-
const id1 = deriveMessageId(baseInputs);
|
|
76
|
-
const id2 = deriveMessageId({
|
|
77
|
-
...baseInputs,
|
|
78
|
-
dateSent: "2026-03-14T10:15:24+00:00",
|
|
79
|
-
});
|
|
80
|
-
expect(id1).not.toBe(id2);
|
|
81
|
-
});
|
|
82
|
-
|
|
83
|
-
it("changes when the conversation changes", () => {
|
|
84
|
-
const id1 = deriveMessageId(baseInputs);
|
|
85
|
-
const id2 = deriveMessageId({ ...baseInputs, conversationSha256: "def456" });
|
|
86
|
-
expect(id1).not.toBe(id2);
|
|
87
|
-
});
|
|
88
|
-
|
|
89
|
-
it("starts with the whatsapp-export:msg prefix and embeds normalised sender", () => {
|
|
90
|
-
const id = deriveMessageId(baseInputs);
|
|
91
|
-
expect(id.startsWith("whatsapp-export:msg:")).toBe(true);
|
|
92
|
-
expect(id).toContain(":adam mackay:");
|
|
93
|
-
});
|
|
94
|
-
|
|
95
|
-
it("does not embed array-position or FNV32 collapse (Task 870 contract)", () => {
|
|
96
|
-
const id = deriveMessageId(baseInputs);
|
|
97
|
-
expect(id).toContain(":msg:");
|
|
98
|
-
expect(id).toMatch(/:[a-f0-9]{64}$/);
|
|
99
|
-
expect(id).not.toMatch(/:\d+:[a-f0-9]{8}$/);
|
|
100
|
-
});
|
|
101
|
-
|
|
102
|
-
it("produces a stable id for empty body", () => {
|
|
103
|
-
const id = deriveMessageId({ ...baseInputs, body: "" });
|
|
104
|
-
expect(id).toBe(deriveMessageId({ ...baseInputs, body: "" }));
|
|
105
|
-
});
|
|
106
|
-
});
|
|
107
|
-
|
|
108
|
-
describe("observationContentHash", () => {
|
|
109
|
-
it("is deterministic for identical inputs", () => {
|
|
110
|
-
const fields = { summary: "Adam said hi", from: "Adam", to: "Joel", subject: null };
|
|
111
|
-
expect(observationContentHash(fields)).toBe(observationContentHash(fields));
|
|
112
|
-
});
|
|
113
|
-
|
|
114
|
-
it("normalises NFKC + trim + lowercase across all fields", () => {
|
|
115
|
-
const a = { summary: " Hello ", from: "ADAM", to: null, subject: null };
|
|
116
|
-
const b = { summary: "hello", from: "adam", to: null, subject: null };
|
|
117
|
-
expect(observationContentHash(a)).toBe(observationContentHash(b));
|
|
118
|
-
});
|
|
119
|
-
|
|
120
|
-
it("treats null and empty string equivalently", () => {
|
|
121
|
-
const withNull = { summary: "x", from: null, to: null, subject: null };
|
|
122
|
-
const withEmpty = { summary: "x", from: "", to: "", subject: "" };
|
|
123
|
-
expect(observationContentHash(withNull)).toBe(observationContentHash(withEmpty));
|
|
124
|
-
});
|
|
125
|
-
|
|
126
|
-
it("changes when any field changes", () => {
|
|
127
|
-
const base = { summary: "x", from: null, to: null, subject: null };
|
|
128
|
-
expect(observationContentHash(base)).not.toBe(
|
|
129
|
-
observationContentHash({ ...base, summary: "y" }),
|
|
130
|
-
);
|
|
131
|
-
expect(observationContentHash(base)).not.toBe(
|
|
132
|
-
observationContentHash({ ...base, from: "z" }),
|
|
133
|
-
);
|
|
134
|
-
});
|
|
135
|
-
|
|
136
|
-
it("collapses NFKC equivalent forms in summary", () => {
|
|
137
|
-
const composed = { summary: "Adám", from: null, to: null, subject: null };
|
|
138
|
-
const decomposed = { summary: "Adám", from: null, to: null, subject: null };
|
|
139
|
-
expect(observationContentHash(composed)).toBe(observationContentHash(decomposed));
|
|
140
|
-
});
|
|
141
|
-
});
|
|
@@ -1,83 +0,0 @@
|
|
|
1
|
-
import { describe, it, expect, beforeEach, afterEach } from "vitest";
|
|
2
|
-
import { mkdtempSync, rmSync, writeFileSync } from "node:fs";
|
|
3
|
-
import { tmpdir } from "node:os";
|
|
4
|
-
import { join } from "node:path";
|
|
5
|
-
import { parseExport } from "../parse-export.js";
|
|
6
|
-
|
|
7
|
-
// Task 887 — bidi-strip regression. Some WhatsApp exports prefix every
|
|
8
|
-
// timestamp header with U+200E (LEFT-TO-RIGHT MARK) or U+200F (RTL MARK).
|
|
9
|
-
// Pre-fix `decodeAndNormalise` left those bytes in place; the timestamp
|
|
10
|
-
// regex (`^\[(\d{2})\/...`) failed; the LRM-prefixed line was glued onto
|
|
11
|
-
// the previous body as a continuation; the next clean header parsed its
|
|
12
|
-
// senderName off the polluted body — leaking 23 :Person nodes per import
|
|
13
|
-
// in the Adam Mackay archive. The fix strips U+200E/U+200F before
|
|
14
|
-
// tokenisation; this test reproduces the failure shape.
|
|
15
|
-
|
|
16
|
-
let workDir: string;
|
|
17
|
-
|
|
18
|
-
beforeEach(() => {
|
|
19
|
-
workDir = mkdtempSync(join(tmpdir(), "whatsapp-export-lrm-"));
|
|
20
|
-
});
|
|
21
|
-
|
|
22
|
-
afterEach(() => {
|
|
23
|
-
rmSync(workDir, { recursive: true, force: true });
|
|
24
|
-
});
|
|
25
|
-
|
|
26
|
-
function writeChat(name: string, content: string): string {
|
|
27
|
-
const filePath = join(workDir, name);
|
|
28
|
-
writeFileSync(filePath, content);
|
|
29
|
-
return filePath;
|
|
30
|
-
}
|
|
31
|
-
|
|
32
|
-
describe("parseExport — bidi-strip (Task 887)", () => {
|
|
33
|
-
it("strips U+200E from timestamp headers and parses each row independently", () => {
|
|
34
|
-
const LRM = "";
|
|
35
|
-
const filePath = writeChat(
|
|
36
|
-
"_chat.txt",
|
|
37
|
-
[
|
|
38
|
-
`${LRM}[04/02/26, 11:52:16] Adam Mackay: hi`,
|
|
39
|
-
`${LRM}[04/02/26, 11:52:30] Joel Smalley: hey`,
|
|
40
|
-
"",
|
|
41
|
-
].join("\n"),
|
|
42
|
-
);
|
|
43
|
-
|
|
44
|
-
const result = parseExport({
|
|
45
|
-
filePath,
|
|
46
|
-
accountId: "acct-887",
|
|
47
|
-
timezone: "Europe/London",
|
|
48
|
-
});
|
|
49
|
-
|
|
50
|
-
expect(result.parsedLines).toHaveLength(2);
|
|
51
|
-
expect(result.parsedLines.map((l) => l.senderName).sort()).toEqual([
|
|
52
|
-
"Adam Mackay",
|
|
53
|
-
"Joel Smalley",
|
|
54
|
-
]);
|
|
55
|
-
for (const line of result.parsedLines) {
|
|
56
|
-
expect(line.senderName).not.toContain("\n");
|
|
57
|
-
expect(line.senderName).not.toContain("[");
|
|
58
|
-
expect(line.senderName).not.toContain(LRM);
|
|
59
|
-
}
|
|
60
|
-
});
|
|
61
|
-
|
|
62
|
-
it("strips U+200F (RLM) on the timestamp line", () => {
|
|
63
|
-
const RLM = "";
|
|
64
|
-
const filePath = writeChat(
|
|
65
|
-
"_chat.txt",
|
|
66
|
-
[
|
|
67
|
-
`${RLM}[14/03/26, 10:15:23] Joel: Hello`,
|
|
68
|
-
`${RLM}[14/03/26, 10:16:01] Sarah: Hi back`,
|
|
69
|
-
"",
|
|
70
|
-
].join("\n"),
|
|
71
|
-
);
|
|
72
|
-
|
|
73
|
-
const result = parseExport({
|
|
74
|
-
filePath,
|
|
75
|
-
accountId: "acct-887",
|
|
76
|
-
timezone: "Europe/London",
|
|
77
|
-
});
|
|
78
|
-
|
|
79
|
-
expect(result.parsedLines).toHaveLength(2);
|
|
80
|
-
expect(result.parsedLines[0].senderName).toBe("Joel");
|
|
81
|
-
expect(result.parsedLines[1].senderName).toBe("Sarah");
|
|
82
|
-
});
|
|
83
|
-
});
|