@rubytech/create-realagent 1.0.826 → 1.0.829

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (103) hide show
  1. package/package.json +1 -1
  2. package/payload/platform/neo4j/schema.cypher +35 -2
  3. package/payload/platform/package.json +2 -2
  4. package/payload/platform/plugins/admin/hooks/__tests__/archive-ingest-surface-gate.test.sh +39 -54
  5. package/payload/platform/plugins/admin/hooks/archive-ingest-surface-gate.sh +26 -52
  6. package/payload/platform/plugins/admin/skills/onboarding/SKILL.md +7 -7
  7. package/payload/platform/plugins/docs/references/cloudflare.md +1 -1
  8. package/payload/platform/plugins/docs/references/plugins-guide.md +1 -1
  9. package/payload/platform/plugins/docs/references/troubleshooting.md +1 -0
  10. package/payload/platform/plugins/memory/PLUGIN.md +5 -5
  11. package/payload/platform/plugins/memory/mcp/dist/index.js +18 -253
  12. package/payload/platform/plugins/memory/mcp/dist/index.js.map +1 -1
  13. package/payload/platform/plugins/memory/mcp/dist/lib/__tests__/llm-classifier.test.js +51 -0
  14. package/payload/platform/plugins/memory/mcp/dist/lib/__tests__/llm-classifier.test.js.map +1 -1
  15. package/payload/platform/plugins/memory/mcp/dist/lib/__tests__/schema-validator.test.js +103 -0
  16. package/payload/platform/plugins/memory/mcp/dist/lib/__tests__/schema-validator.test.js.map +1 -1
  17. package/payload/platform/plugins/memory/mcp/dist/lib/llm-classifier.d.ts +19 -4
  18. package/payload/platform/plugins/memory/mcp/dist/lib/llm-classifier.d.ts.map +1 -1
  19. package/payload/platform/plugins/memory/mcp/dist/lib/llm-classifier.js +149 -56
  20. package/payload/platform/plugins/memory/mcp/dist/lib/llm-classifier.js.map +1 -1
  21. package/payload/platform/plugins/memory/mcp/dist/lib/schema-validator.d.ts +16 -1
  22. package/payload/platform/plugins/memory/mcp/dist/lib/schema-validator.d.ts.map +1 -1
  23. package/payload/platform/plugins/memory/mcp/dist/lib/schema-validator.js +12 -3
  24. package/payload/platform/plugins/memory/mcp/dist/lib/schema-validator.js.map +1 -1
  25. package/payload/platform/plugins/memory/mcp/dist/tools/__tests__/memory-archive-write.test.js +2 -138
  26. package/payload/platform/plugins/memory/mcp/dist/tools/__tests__/memory-archive-write.test.js.map +1 -1
  27. package/payload/platform/plugins/memory/mcp/dist/tools/__tests__/memory-ingest.test.d.ts +2 -0
  28. package/payload/platform/plugins/memory/mcp/dist/tools/__tests__/memory-ingest.test.d.ts.map +1 -0
  29. package/payload/platform/plugins/memory/mcp/dist/tools/__tests__/memory-ingest.test.js +66 -0
  30. package/payload/platform/plugins/memory/mcp/dist/tools/__tests__/memory-ingest.test.js.map +1 -0
  31. package/payload/platform/plugins/memory/mcp/dist/tools/__tests__/profile-update-personfields-open.test.d.ts +2 -0
  32. package/payload/platform/plugins/memory/mcp/dist/tools/__tests__/profile-update-personfields-open.test.d.ts.map +1 -0
  33. package/payload/platform/plugins/memory/mcp/dist/tools/__tests__/profile-update-personfields-open.test.js +148 -0
  34. package/payload/platform/plugins/memory/mcp/dist/tools/__tests__/profile-update-personfields-open.test.js.map +1 -0
  35. package/payload/platform/plugins/memory/mcp/dist/tools/memory-archive-write.d.ts +1 -64
  36. package/payload/platform/plugins/memory/mcp/dist/tools/memory-archive-write.d.ts.map +1 -1
  37. package/payload/platform/plugins/memory/mcp/dist/tools/memory-archive-write.js +6 -336
  38. package/payload/platform/plugins/memory/mcp/dist/tools/memory-archive-write.js.map +1 -1
  39. package/payload/platform/plugins/memory/mcp/dist/tools/memory-ingest.d.ts +30 -0
  40. package/payload/platform/plugins/memory/mcp/dist/tools/memory-ingest.d.ts.map +1 -1
  41. package/payload/platform/plugins/memory/mcp/dist/tools/memory-ingest.js +231 -0
  42. package/payload/platform/plugins/memory/mcp/dist/tools/memory-ingest.js.map +1 -1
  43. package/payload/platform/plugins/memory/mcp/dist/tools/profile-update.d.ts +21 -17
  44. package/payload/platform/plugins/memory/mcp/dist/tools/profile-update.d.ts.map +1 -1
  45. package/payload/platform/plugins/memory/mcp/dist/tools/profile-update.js +77 -37
  46. package/payload/platform/plugins/memory/mcp/dist/tools/profile-update.js.map +1 -1
  47. package/payload/platform/plugins/memory/references/schema-base.md +7 -2
  48. package/payload/platform/plugins/memory/skills/document-ingest/SKILL.md +54 -4
  49. package/payload/platform/plugins/whatsapp/PLUGIN.md +1 -1
  50. package/payload/platform/plugins/whatsapp-import/lib/dist/delta-cursor.d.ts +18 -0
  51. package/payload/platform/plugins/whatsapp-import/lib/dist/delta-cursor.d.ts.map +1 -0
  52. package/payload/platform/plugins/whatsapp-import/lib/dist/delta-cursor.js +31 -0
  53. package/payload/platform/plugins/whatsapp-import/lib/dist/delta-cursor.js.map +1 -0
  54. package/payload/platform/plugins/whatsapp-import/lib/dist/derive-keys.d.ts +27 -12
  55. package/payload/platform/plugins/whatsapp-import/lib/dist/derive-keys.d.ts.map +1 -1
  56. package/payload/platform/plugins/whatsapp-import/lib/dist/derive-keys.js +40 -20
  57. package/payload/platform/plugins/whatsapp-import/lib/dist/derive-keys.js.map +1 -1
  58. package/payload/platform/plugins/whatsapp-import/lib/dist/index.d.ts +7 -4
  59. package/payload/platform/plugins/whatsapp-import/lib/dist/index.d.ts.map +1 -1
  60. package/payload/platform/plugins/whatsapp-import/lib/dist/index.js +9 -6
  61. package/payload/platform/plugins/whatsapp-import/lib/dist/index.js.map +1 -1
  62. package/payload/platform/plugins/whatsapp-import/lib/dist/sessionize.d.ts +25 -0
  63. package/payload/platform/plugins/whatsapp-import/lib/dist/sessionize.d.ts.map +1 -0
  64. package/payload/platform/plugins/whatsapp-import/lib/dist/sessionize.js +48 -0
  65. package/payload/platform/plugins/whatsapp-import/lib/dist/sessionize.js.map +1 -0
  66. package/payload/platform/plugins/whatsapp-import/lib/dist/to-classifier-input.d.ts +3 -0
  67. package/payload/platform/plugins/whatsapp-import/lib/dist/to-classifier-input.d.ts.map +1 -0
  68. package/payload/platform/plugins/whatsapp-import/lib/dist/to-classifier-input.js +47 -0
  69. package/payload/platform/plugins/whatsapp-import/lib/dist/to-classifier-input.js.map +1 -0
  70. package/payload/platform/scripts/seed-neo4j.sh +15 -14
  71. package/payload/platform/templates/specialists/agents/database-operator.md +10 -17
  72. package/payload/server/chunk-CUSH3UXP.js +2305 -0
  73. package/payload/server/chunk-IWNDVGKT.js +10077 -0
  74. package/payload/server/chunk-KC7NUABI.js +654 -0
  75. package/payload/server/chunk-T2OPNP3L.js +654 -0
  76. package/payload/server/chunk-WUVXPZIV.js +1116 -0
  77. package/payload/server/client-pool-3TM3SRIA.js +32 -0
  78. package/payload/server/cloudflare-task-tracker-4NIODMGL.js +19 -0
  79. package/payload/server/cloudflare-task-tracker-CR6TL4VL.js +19 -0
  80. package/payload/server/maxy-edge.js +3 -3
  81. package/payload/server/neo4j-migrations-XTQ4WEV6.js +428 -0
  82. package/payload/server/public/assets/{admin-DOkUspG1.js → admin-BNwPsMhJ.js} +2 -2
  83. package/payload/server/public/assets/{graph-LLMJa4Ch.js → graph-N_Bw-8oT.js} +1 -1
  84. package/payload/server/public/assets/{page-DoaF3DB0.js → page-BKLGP-th.js} +1 -1
  85. package/payload/server/public/graph.html +2 -2
  86. package/payload/server/public/index.html +2 -2
  87. package/payload/server/server.js +281 -168
  88. package/payload/platform/plugins/whatsapp-import/PLUGIN.md +0 -46
  89. package/payload/platform/plugins/whatsapp-import/bin/ingest.mjs +0 -670
  90. package/payload/platform/plugins/whatsapp-import/bin/whatsapp-ingest.sh +0 -131
  91. package/payload/platform/plugins/whatsapp-import/lib/src/__tests__/filter-gate.test.ts +0 -172
  92. package/payload/platform/plugins/whatsapp-import/lib/src/__tests__/ingest-idempotence.test.ts +0 -141
  93. package/payload/platform/plugins/whatsapp-import/lib/src/__tests__/parse-export-lrm.test.ts +0 -83
  94. package/payload/platform/plugins/whatsapp-import/lib/src/__tests__/parse-export.test.ts +0 -678
  95. package/payload/platform/plugins/whatsapp-import/lib/src/derive-keys.ts +0 -59
  96. package/payload/platform/plugins/whatsapp-import/lib/src/filter.ts +0 -136
  97. package/payload/platform/plugins/whatsapp-import/lib/src/index.ts +0 -19
  98. package/payload/platform/plugins/whatsapp-import/lib/src/parse-export.ts +0 -471
  99. package/payload/platform/plugins/whatsapp-import/lib/tsconfig.json +0 -9
  100. package/payload/platform/plugins/whatsapp-import/lib/vitest.config.ts +0 -9
  101. package/payload/platform/plugins/whatsapp-import/skills/whatsapp-import/SKILL.md +0 -131
  102. package/payload/platform/plugins/whatsapp-import/skills/whatsapp-import/references/export-parse.md +0 -109
  103. package/payload/platform/plugins/whatsapp-import/skills/whatsapp-import-enrich/SKILL.md +0 -333
@@ -1,131 +0,0 @@
1
- #!/usr/bin/env bash
2
- # =============================================================================
3
- # whatsapp-ingest.sh — single deterministic Bash entry for WhatsApp archive
4
- # ingestion (Task 855 / Task 871). Thin wrapper: arg-validate, resolve env,
5
- # invoke ingest.mjs in-process. The script is the database-operator
6
- # subagent's only operator-facing handle on the parse → filter → archive-write
7
- # pipeline. Phase 1 has NO LLM. The Haiku insight pass is Phase 2 — invoked
8
- # consciously via `mcp__memory__whatsapp-export-insight-pass`.
9
- #
10
- # Usage:
11
- # bash whatsapp-ingest.sh <archive.zip|dir|_chat.txt>
12
- # --owner-element-id <id>
13
- # --subject-person-id <id>
14
- # --scope <admin|public>
15
- # --filter <all|senders=<csv>|date-range=<isoFrom>..<isoTo>>
16
- # [--account-id <accountId>]
17
- # [--timezone <iana-zone>]
18
- # [--date-format <DD/MM/YY|MM/DD/YY|DD/MM/YYYY|MM/DD/YYYY>]
19
- #
20
- # `--subject-person-id` is required (Task 887 §A0). DM scope: the
21
- # operator-confirmed third-party :Person elementId from the preview
22
- # histogram. Owner + subject form the closed sender set; any parsed
23
- # senderName outside that set LOUD-FAILs with `parser-miss` and exits
24
- # non-zero — bounds writer cardinality to the deterministic preview output.
25
- #
26
- # `--filter` is mandatory (Task 871). Forms:
27
- # all — write every parsed row
28
- # senders=Alice,Bob Carter — keep rows whose senderName ∈ csv
29
- # date-range=2024-01-01..2024-06-30
30
- # — keep rows whose dateSent falls inside
31
- # the inclusive range (date-only or full
32
- # ISO 8601 endpoints both accepted)
33
- #
34
- # Exit 0 + JSON summary on stdout on success.
35
- # Exit !0 + one [whatsapp-ingest] FAIL line on stderr on failure.
36
- # =============================================================================
37
-
38
- set -euo pipefail
39
-
40
- # Pre-flight arg check — emit the same single-line FAIL shape ingest.mjs uses
41
- # so the operator's grep pattern catches both layers uniformly.
42
- arg_fail() {
43
- local reason="$1"
44
- echo "[whatsapp-ingest] FAIL phase=argv reason=\"${reason}\"" >&2
45
- exit 1
46
- }
47
-
48
- # Locate platform root from this script's location:
49
- # platform/plugins/whatsapp-import/bin/ → ../../.. = platform/
50
- SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
51
- PLATFORM_ROOT="$(cd "$SCRIPT_DIR/../../.." && pwd)"
52
- INGEST_MJS="$SCRIPT_DIR/ingest.mjs"
53
-
54
- if [ ! -f "$INGEST_MJS" ]; then
55
- arg_fail "ingest.mjs not found at $INGEST_MJS — run from a built install"
56
- fi
57
-
58
- # Validate required flags up-front — gives a cleaner pre-flight error than
59
- # the node-side throw and keeps the failure surface single-line. argv
60
- # validation runs before env resolution so a wrong invocation fails on the
61
- # wrong invocation, not on a missing password.
62
- ARCHIVE=""
63
- OWNER_VAL=""
64
- SUBJECT_VAL=""
65
- SCOPE_VAL=""
66
- FILTER_VAL=""
67
- HAS_OWNER=0
68
- HAS_SUBJECT=0
69
- HAS_SCOPE=0
70
- HAS_FILTER=0
71
-
72
- ARGS=("$@")
73
- i=0
74
- while [ $i -lt ${#ARGS[@]} ]; do
75
- a="${ARGS[$i]}"
76
- case "$a" in
77
- --owner-element-id) HAS_OWNER=1; OWNER_VAL="${ARGS[$((i + 1))]:-}"; i=$((i + 2)); continue ;;
78
- --subject-person-id) HAS_SUBJECT=1; SUBJECT_VAL="${ARGS[$((i + 1))]:-}"; i=$((i + 2)); continue ;;
79
- --scope) HAS_SCOPE=1; SCOPE_VAL="${ARGS[$((i + 1))]:-}"; i=$((i + 2)); continue ;;
80
- --filter) HAS_FILTER=1; FILTER_VAL="${ARGS[$((i + 1))]:-}"; i=$((i + 2)); continue ;;
81
- --account-id|--timezone|--date-format) i=$((i + 2)); continue ;;
82
- --*) i=$((i + 2)); continue ;;
83
- *)
84
- if [ -z "$ARCHIVE" ]; then ARCHIVE="$a"; fi
85
- i=$((i + 1))
86
- continue
87
- ;;
88
- esac
89
- done
90
-
91
- [ -n "$ARCHIVE" ] || arg_fail "missing positional <archive>"
92
- [ "$HAS_OWNER" -eq 1 ] && [ -n "$OWNER_VAL" ] || arg_fail "missing --owner-element-id (or empty value)"
93
- [ "$HAS_SUBJECT" -eq 1 ] && [ -n "$SUBJECT_VAL" ] || arg_fail "missing --subject-person-id (Task 887: operator-confirmed third-party :Person elementId from preview histogram)"
94
- [ "$HAS_SCOPE" -eq 1 ] && [ -n "$SCOPE_VAL" ] || arg_fail "missing --scope (or empty value)"
95
- case "$SCOPE_VAL" in
96
- admin|public) : ;;
97
- *) arg_fail "invalid --scope \"$SCOPE_VAL\" (admin|public)" ;;
98
- esac
99
- if [ "$HAS_FILTER" -ne 1 ] || [ -z "$FILTER_VAL" ]; then
100
- # Mirror ingest.mjs's pinned LOUD-FAIL line so a single grep covers both
101
- # layers — the operator's runbook recipe is `grep '\[whatsapp-ingest\] FAIL filter-required'`.
102
- echo "[whatsapp-ingest] FAIL filter-required reason=\"bulk-archive-gate (Task 871) — operator must specify --filter (one of all, senders=<csv>, date-range=<isoFrom>..<isoTo>)\"" >&2
103
- arg_fail "missing --filter (one of all, senders=<csv>, date-range=<isoFrom>..<isoTo>)"
104
- fi
105
-
106
- # Lift NEO4J_PASSWORD from the install's config file when env doesn't carry it
107
- # (e.g. operator running directly from a shell rather than via the platform
108
- # server). Mirror seed-neo4j.sh precedence: env var wins, then the file.
109
- if [ -z "${NEO4J_PASSWORD:-}" ]; then
110
- NEO4J_PASSWORD_FILE="$PLATFORM_ROOT/config/.neo4j-password"
111
- if [ -f "$NEO4J_PASSWORD_FILE" ]; then
112
- NEO4J_PASSWORD="$(cat "$NEO4J_PASSWORD_FILE")"
113
- export NEO4J_PASSWORD
114
- else
115
- arg_fail "NEO4J_PASSWORD not in env and $NEO4J_PASSWORD_FILE not found"
116
- fi
117
- fi
118
-
119
- # NEO4J_URI is hard-required (Task 787). The wrapper does not fabricate a
120
- # default — the platform server sets this at spawn time and the operator can
121
- # pass it explicitly when running standalone.
122
- if [ -z "${NEO4J_URI:-}" ]; then
123
- arg_fail "NEO4J_URI not set (no default — set in env)"
124
- fi
125
-
126
- export NEO4J_USER="${NEO4J_USER:-neo4j}"
127
- export MAXY_PLATFORM_ROOT="$PLATFORM_ROOT"
128
-
129
- # Hand off to node. ingest.mjs handles all observability; the wrapper does
130
- # not double-log. Forward exit code verbatim.
131
- exec node "$INGEST_MJS" "$@"
@@ -1,172 +0,0 @@
1
- import { describe, it, expect } from "vitest";
2
- import { spawnSync } from "node:child_process";
3
- import { mkdtempSync, writeFileSync, rmSync } from "node:fs";
4
- import { tmpdir } from "node:os";
5
- import { join, resolve, dirname } from "node:path";
6
- import { fileURLToPath } from "node:url";
7
-
8
- import { parseFilterArg, applyFilter } from "../filter.js";
9
- import type { ParsedLine } from "../parse-export.js";
10
-
11
- // ---------------------------------------------------------------------------
12
- // filter-gate — Task 871 contract: --filter is mandatory, three forms.
13
- //
14
- // The unit tests cover the helper module's pure-function surface (parse +
15
- // apply). The integration test spawns ingest.mjs as a subprocess and
16
- // asserts the no-flag invocation exits non-zero with the LOUD-FAIL line
17
- // (`[whatsapp-ingest] FAIL phase=argv reason="--filter is required …"`)
18
- // before any Neo4j connection — argv parsing fires before imports/sessions.
19
- // ---------------------------------------------------------------------------
20
-
21
- const __dirname = dirname(fileURLToPath(import.meta.url));
22
- // src/__tests__/ → ../../bin/ingest.mjs (up to src/, up to lib/, up to plugin
23
- // root, then bin/).
24
- const INGEST_MJS = resolve(__dirname, "..", "..", "..", "bin", "ingest.mjs");
25
-
26
- function lines(parsed: ParsedLine[]): string[] {
27
- return parsed.map((p) => `${p.senderName}@${p.dateSent}:${p.body}`);
28
- }
29
-
30
- const SAMPLE: ParsedLine[] = [
31
- { senderName: "Alice", dateSent: "2024-01-15T09:00:00+00:00", body: "morning", sequenceIndex: 0 },
32
- { senderName: "Bob", dateSent: "2024-01-15T09:05:00+00:00", body: "hi", sequenceIndex: 1 },
33
- { senderName: "Alice", dateSent: "2024-03-20T11:00:00+00:00", body: "lunch?", sequenceIndex: 2 },
34
- { senderName: "Carol", dateSent: "2024-06-30T23:59:59+00:00: ", body: "bye", sequenceIndex: 3 },
35
- { senderName: "Bob", dateSent: "2024-07-01T00:00:01+00:00", body: "next", sequenceIndex: 4 },
36
- ];
37
-
38
- // Repair the malformed dateSent fixture (extra ':' suffix would Date.parse to
39
- // NaN). Keep this in code, not in the const above, so the fixture reads
40
- // natural while we still hold a strict tuple.
41
- SAMPLE[3] = { ...SAMPLE[3], dateSent: "2024-06-30T23:59:59+00:00" };
42
-
43
- describe("parseFilterArg — happy path", () => {
44
- it('parses --filter all', () => {
45
- expect(parseFilterArg("all")).toEqual({ kind: "all" });
46
- });
47
-
48
- it('parses senders=<csv> with whitespace tolerance', () => {
49
- expect(parseFilterArg("senders=Alice, Bob Carter , Carol")).toEqual({
50
- kind: "senders",
51
- senders: ["Alice", "Bob Carter", "Carol"],
52
- });
53
- });
54
-
55
- it('parses date-range=<from>..<to>', () => {
56
- expect(parseFilterArg("date-range=2024-01-01..2024-06-30")).toEqual({
57
- kind: "date-range",
58
- fromIso: "2024-01-01",
59
- toIso: "2024-06-30",
60
- });
61
- });
62
- });
63
-
64
- describe("parseFilterArg — LOUD-FAIL on malformed input", () => {
65
- it("rejects undefined / empty", () => {
66
- expect(() => parseFilterArg(undefined)).toThrow(/--filter is required/);
67
- expect(() => parseFilterArg("")).toThrow(/--filter is required/);
68
- expect(() => parseFilterArg(" ")).toThrow(/--filter is required/);
69
- });
70
-
71
- it("rejects unknown form", () => {
72
- expect(() => parseFilterArg("everyone")).toThrow(/unrecognised form/);
73
- expect(() => parseFilterArg("nope=yes")).toThrow(/unrecognised form/);
74
- });
75
-
76
- it("rejects senders= with no names", () => {
77
- expect(() => parseFilterArg("senders=")).toThrow(/at least one/);
78
- expect(() => parseFilterArg("senders=,,,")).toThrow(/at least one/);
79
- });
80
-
81
- it("rejects date-range with malformed shape or unparseable ISO", () => {
82
- expect(() => parseFilterArg("date-range=2024-01-01")).toThrow(/<isoFrom>\.\.<isoTo>/);
83
- expect(() => parseFilterArg("date-range=..2024-06-30")).toThrow(/both endpoints/);
84
- expect(() => parseFilterArg("date-range=2024-01-01..")).toThrow(/both endpoints/);
85
- expect(() => parseFilterArg("date-range=not-a-date..2024-06-30")).toThrow(/not parseable/);
86
- expect(() => parseFilterArg("date-range=2024-06-30..2024-01-01")).toThrow(/later than/);
87
- });
88
- });
89
-
90
- describe("applyFilter — semantics", () => {
91
- it("--filter all keeps every row, preserves the parser's original sequenceIndex", () => {
92
- const out = applyFilter(SAMPLE, parseFilterArg("all"));
93
- expect(out.length).toBe(SAMPLE.length);
94
- expect(out.map((l) => l.sequenceIndex)).toEqual([0, 1, 2, 3, 4]);
95
- });
96
-
97
- it("--filter senders=Alice,Bob keeps only those senders, sequenceIndex stays as the parser-stamped value (Carol's index 3 drops out)", () => {
98
- const out = applyFilter(SAMPLE, parseFilterArg("senders=Alice,Bob"));
99
- expect(lines(out)).toEqual([
100
- "Alice@2024-01-15T09:00:00+00:00:morning",
101
- "Bob@2024-01-15T09:05:00+00:00:hi",
102
- "Alice@2024-03-20T11:00:00+00:00:lunch?",
103
- "Bob@2024-07-01T00:00:01+00:00:next",
104
- ]);
105
- expect(out.map((l) => l.sequenceIndex)).toEqual([0, 1, 2, 4]);
106
- });
107
-
108
- it("--filter date-range clips inclusively on both ends", () => {
109
- const out = applyFilter(SAMPLE, parseFilterArg("date-range=2024-01-01..2024-06-30"));
110
- // 2024-06-30T23:59:59 IS inclusive; 2024-07-01 is NOT.
111
- expect(out.map((l) => `${l.senderName}/${l.dateSent}`)).toEqual([
112
- "Alice/2024-01-15T09:00:00+00:00",
113
- "Bob/2024-01-15T09:05:00+00:00",
114
- "Alice/2024-03-20T11:00:00+00:00",
115
- "Carol/2024-06-30T23:59:59+00:00",
116
- ]);
117
- });
118
-
119
- it("--filter date-range with full ISO 8601 endpoints", () => {
120
- const out = applyFilter(
121
- SAMPLE,
122
- parseFilterArg("date-range=2024-03-01T00:00:00Z..2024-06-15T00:00:00Z"),
123
- );
124
- expect(out).toHaveLength(1);
125
- expect(out[0].senderName).toBe("Alice");
126
- expect(out[0].body).toBe("lunch?");
127
- });
128
-
129
- it("an unmatched senders filter returns []", () => {
130
- const out = applyFilter(SAMPLE, parseFilterArg("senders=Nobody"));
131
- expect(out).toEqual([]);
132
- });
133
- });
134
-
135
- describe("ingest.mjs — missing --filter exits non-zero with LOUD-FAIL line", () => {
136
- it("emits [whatsapp-ingest] FAIL phase=argv reason=\"--filter required ...\" before touching Neo4j", () => {
137
- // Write a stub _chat.txt; argv parse fires first and rejects the call
138
- // before resolveChatTxt/Neo4j connection. We do NOT need a real archive
139
- // to test the gate.
140
- const work = mkdtempSync(join(tmpdir(), "filter-gate-"));
141
- try {
142
- const stub = join(work, "_chat.txt");
143
- writeFileSync(stub, "[14/03/26, 10:15:23] Joel: stub\n");
144
-
145
- const res = spawnSync(
146
- "node",
147
- [
148
- INGEST_MJS,
149
- stub,
150
- "--owner-element-id",
151
- "stub-owner",
152
- "--subject-person-id",
153
- "stub-subject",
154
- "--scope",
155
- "admin",
156
- ],
157
- { encoding: "utf8" },
158
- );
159
-
160
- expect(res.status).not.toBe(0);
161
- const stderr = res.stderr ?? "";
162
- expect(stderr).toMatch(/\[whatsapp-ingest\] FAIL phase=argv/);
163
- expect(stderr).toMatch(/reason="--filter is required/);
164
- // Confirm the gate fired BEFORE any Neo4j or import work — no
165
- // [whatsapp-ingest] start line, no archive-write log line.
166
- expect(stderr).not.toMatch(/\[whatsapp-ingest\] start /);
167
- expect(stderr).not.toMatch(/\[memory-archive-write\]/);
168
- } finally {
169
- rmSync(work, { recursive: true, force: true });
170
- }
171
- });
172
- });
@@ -1,141 +0,0 @@
1
- import { describe, it, expect } from "vitest";
2
- import {
3
- normaliseSenderName,
4
- sha256Hex,
5
- deriveMessageId,
6
- observationContentHash,
7
- } from "../derive-keys.js";
8
-
9
- describe("normaliseSenderName", () => {
10
- it("returns NFKC-trim-lower form", () => {
11
- expect(normaliseSenderName(" Adam Mackay ")).toBe("adam mackay");
12
- });
13
-
14
- it("collapses NFKC equivalent forms (composed vs decomposed accents)", () => {
15
- const composed = "Adám";
16
- const decomposed = "Adám";
17
- expect(normaliseSenderName(composed)).toBe(normaliseSenderName(decomposed));
18
- });
19
-
20
- it("collapses full-width characters to ASCII via NFKC", () => {
21
- expect(normaliseSenderName("Adam")).toBe("adam");
22
- });
23
-
24
- it("returns empty string for empty input without throwing", () => {
25
- expect(normaliseSenderName("")).toBe("");
26
- });
27
- });
28
-
29
- describe("sha256Hex", () => {
30
- it("matches the canonical sha256 of an empty string", () => {
31
- expect(sha256Hex("")).toBe(
32
- "e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855",
33
- );
34
- });
35
-
36
- it("produces a deterministic hex digest", () => {
37
- expect(sha256Hex("hello")).toBe(sha256Hex("hello"));
38
- expect(sha256Hex("hello")).not.toBe(sha256Hex("world"));
39
- });
40
- });
41
-
42
- describe("deriveMessageId", () => {
43
- const baseInputs = {
44
- conversationSha256: "abc123",
45
- dateSent: "2026-03-14T10:15:23+00:00",
46
- senderName: "Adam Mackay",
47
- body: "Hello there",
48
- };
49
-
50
- it("produces a stable id for identical inputs", () => {
51
- const id1 = deriveMessageId(baseInputs);
52
- const id2 = deriveMessageId({ ...baseInputs });
53
- expect(id1).toBe(id2);
54
- });
55
-
56
- it("collapses identical (sender, dateSent, body) tuples to one id under NFKC-trim-lower (correct for export duplicates)", () => {
57
- const id1 = deriveMessageId(baseInputs);
58
- const id2 = deriveMessageId({ ...baseInputs, senderName: " ADAM Mackay " });
59
- expect(id1).toBe(id2);
60
- });
61
-
62
- it("changes when the body differs", () => {
63
- const id1 = deriveMessageId(baseInputs);
64
- const id2 = deriveMessageId({ ...baseInputs, body: "Hello there!" });
65
- expect(id1).not.toBe(id2);
66
- });
67
-
68
- it("changes when the sender differs (after normalisation)", () => {
69
- const id1 = deriveMessageId(baseInputs);
70
- const id2 = deriveMessageId({ ...baseInputs, senderName: "Joel" });
71
- expect(id1).not.toBe(id2);
72
- });
73
-
74
- it("changes when the dateSent differs", () => {
75
- const id1 = deriveMessageId(baseInputs);
76
- const id2 = deriveMessageId({
77
- ...baseInputs,
78
- dateSent: "2026-03-14T10:15:24+00:00",
79
- });
80
- expect(id1).not.toBe(id2);
81
- });
82
-
83
- it("changes when the conversation changes", () => {
84
- const id1 = deriveMessageId(baseInputs);
85
- const id2 = deriveMessageId({ ...baseInputs, conversationSha256: "def456" });
86
- expect(id1).not.toBe(id2);
87
- });
88
-
89
- it("starts with the whatsapp-export:msg prefix and embeds normalised sender", () => {
90
- const id = deriveMessageId(baseInputs);
91
- expect(id.startsWith("whatsapp-export:msg:")).toBe(true);
92
- expect(id).toContain(":adam mackay:");
93
- });
94
-
95
- it("does not embed array-position or FNV32 collapse (Task 870 contract)", () => {
96
- const id = deriveMessageId(baseInputs);
97
- expect(id).toContain(":msg:");
98
- expect(id).toMatch(/:[a-f0-9]{64}$/);
99
- expect(id).not.toMatch(/:\d+:[a-f0-9]{8}$/);
100
- });
101
-
102
- it("produces a stable id for empty body", () => {
103
- const id = deriveMessageId({ ...baseInputs, body: "" });
104
- expect(id).toBe(deriveMessageId({ ...baseInputs, body: "" }));
105
- });
106
- });
107
-
108
- describe("observationContentHash", () => {
109
- it("is deterministic for identical inputs", () => {
110
- const fields = { summary: "Adam said hi", from: "Adam", to: "Joel", subject: null };
111
- expect(observationContentHash(fields)).toBe(observationContentHash(fields));
112
- });
113
-
114
- it("normalises NFKC + trim + lowercase across all fields", () => {
115
- const a = { summary: " Hello ", from: "ADAM", to: null, subject: null };
116
- const b = { summary: "hello", from: "adam", to: null, subject: null };
117
- expect(observationContentHash(a)).toBe(observationContentHash(b));
118
- });
119
-
120
- it("treats null and empty string equivalently", () => {
121
- const withNull = { summary: "x", from: null, to: null, subject: null };
122
- const withEmpty = { summary: "x", from: "", to: "", subject: "" };
123
- expect(observationContentHash(withNull)).toBe(observationContentHash(withEmpty));
124
- });
125
-
126
- it("changes when any field changes", () => {
127
- const base = { summary: "x", from: null, to: null, subject: null };
128
- expect(observationContentHash(base)).not.toBe(
129
- observationContentHash({ ...base, summary: "y" }),
130
- );
131
- expect(observationContentHash(base)).not.toBe(
132
- observationContentHash({ ...base, from: "z" }),
133
- );
134
- });
135
-
136
- it("collapses NFKC equivalent forms in summary", () => {
137
- const composed = { summary: "Adám", from: null, to: null, subject: null };
138
- const decomposed = { summary: "Adám", from: null, to: null, subject: null };
139
- expect(observationContentHash(composed)).toBe(observationContentHash(decomposed));
140
- });
141
- });
@@ -1,83 +0,0 @@
1
- import { describe, it, expect, beforeEach, afterEach } from "vitest";
2
- import { mkdtempSync, rmSync, writeFileSync } from "node:fs";
3
- import { tmpdir } from "node:os";
4
- import { join } from "node:path";
5
- import { parseExport } from "../parse-export.js";
6
-
7
- // Task 887 — bidi-strip regression. Some WhatsApp exports prefix every
8
- // timestamp header with U+200E (LEFT-TO-RIGHT MARK) or U+200F (RTL MARK).
9
- // Pre-fix `decodeAndNormalise` left those bytes in place; the timestamp
10
- // regex (`^\[(\d{2})\/...`) failed; the LRM-prefixed line was glued onto
11
- // the previous body as a continuation; the next clean header parsed its
12
- // senderName off the polluted body — leaking 23 :Person nodes per import
13
- // in the Adam Mackay archive. The fix strips U+200E/U+200F before
14
- // tokenisation; this test reproduces the failure shape.
15
-
16
- let workDir: string;
17
-
18
- beforeEach(() => {
19
- workDir = mkdtempSync(join(tmpdir(), "whatsapp-export-lrm-"));
20
- });
21
-
22
- afterEach(() => {
23
- rmSync(workDir, { recursive: true, force: true });
24
- });
25
-
26
- function writeChat(name: string, content: string): string {
27
- const filePath = join(workDir, name);
28
- writeFileSync(filePath, content);
29
- return filePath;
30
- }
31
-
32
- describe("parseExport — bidi-strip (Task 887)", () => {
33
- it("strips U+200E from timestamp headers and parses each row independently", () => {
34
- const LRM = "‎";
35
- const filePath = writeChat(
36
- "_chat.txt",
37
- [
38
- `${LRM}[04/02/26, 11:52:16] Adam Mackay: hi`,
39
- `${LRM}[04/02/26, 11:52:30] Joel Smalley: hey`,
40
- "",
41
- ].join("\n"),
42
- );
43
-
44
- const result = parseExport({
45
- filePath,
46
- accountId: "acct-887",
47
- timezone: "Europe/London",
48
- });
49
-
50
- expect(result.parsedLines).toHaveLength(2);
51
- expect(result.parsedLines.map((l) => l.senderName).sort()).toEqual([
52
- "Adam Mackay",
53
- "Joel Smalley",
54
- ]);
55
- for (const line of result.parsedLines) {
56
- expect(line.senderName).not.toContain("\n");
57
- expect(line.senderName).not.toContain("[");
58
- expect(line.senderName).not.toContain(LRM);
59
- }
60
- });
61
-
62
- it("strips U+200F (RLM) on the timestamp line", () => {
63
- const RLM = "‏";
64
- const filePath = writeChat(
65
- "_chat.txt",
66
- [
67
- `${RLM}[14/03/26, 10:15:23] Joel: Hello`,
68
- `${RLM}[14/03/26, 10:16:01] Sarah: Hi back`,
69
- "",
70
- ].join("\n"),
71
- );
72
-
73
- const result = parseExport({
74
- filePath,
75
- accountId: "acct-887",
76
- timezone: "Europe/London",
77
- });
78
-
79
- expect(result.parsedLines).toHaveLength(2);
80
- expect(result.parsedLines[0].senderName).toBe("Joel");
81
- expect(result.parsedLines[1].senderName).toBe("Sarah");
82
- });
83
- });