@rubytech/create-realagent 1.0.828 → 1.0.830

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (144) hide show
  1. package/package.json +1 -1
  2. package/payload/platform/config/brand.json +1 -1
  3. package/payload/platform/lib/oauth-llm/dist/index.d.ts +1 -1
  4. package/payload/platform/lib/oauth-llm/dist/index.d.ts.map +1 -1
  5. package/payload/platform/lib/oauth-llm/dist/index.js +21 -0
  6. package/payload/platform/lib/oauth-llm/dist/index.js.map +1 -1
  7. package/payload/platform/lib/oauth-llm/src/index.ts +24 -0
  8. package/payload/platform/neo4j/migrations/007-conversation-archive-source.ts +116 -0
  9. package/payload/platform/neo4j/schema.cypher +12 -2
  10. package/payload/platform/package.json +2 -2
  11. package/payload/platform/plugins/admin/hooks/__tests__/archive-ingest-surface-gate.test.sh +6 -6
  12. package/payload/platform/plugins/admin/hooks/archive-ingest-surface-gate.sh +14 -8
  13. package/payload/platform/plugins/admin/skills/onboarding/SKILL.md +2 -2
  14. package/payload/platform/plugins/contacts/mcp/dist/index.js +5 -5
  15. package/payload/platform/plugins/contacts/mcp/dist/index.js.map +1 -1
  16. package/payload/platform/plugins/contacts/mcp/dist/tools/contact-create.d.ts +1 -1
  17. package/payload/platform/plugins/contacts/mcp/dist/tools/contact-create.d.ts.map +1 -1
  18. package/payload/platform/plugins/contacts/mcp/dist/tools/contact-create.js +29 -23
  19. package/payload/platform/plugins/contacts/mcp/dist/tools/contact-create.js.map +1 -1
  20. package/payload/platform/plugins/docs/references/plugins-guide.md +1 -1
  21. package/payload/platform/plugins/memory/PLUGIN.md +6 -5
  22. package/payload/platform/plugins/{whatsapp-import/bin/ingest.mjs → memory/bin/conversation-archive-ingest.mjs} +136 -212
  23. package/payload/platform/plugins/{whatsapp-import/bin/whatsapp-ingest.sh → memory/bin/conversation-archive-ingest.sh} +27 -19
  24. package/payload/platform/plugins/memory/mcp/dist/index.js +26 -212
  25. package/payload/platform/plugins/memory/mcp/dist/index.js.map +1 -1
  26. package/payload/platform/plugins/memory/mcp/dist/lib/__tests__/llm-classifier.test.js +4 -3
  27. package/payload/platform/plugins/memory/mcp/dist/lib/__tests__/llm-classifier.test.js.map +1 -1
  28. package/payload/platform/plugins/memory/mcp/dist/lib/__tests__/schema-loader.test.js +11 -6
  29. package/payload/platform/plugins/memory/mcp/dist/lib/__tests__/schema-loader.test.js.map +1 -1
  30. package/payload/platform/plugins/memory/mcp/dist/lib/__tests__/schema-validator.test.js +103 -0
  31. package/payload/platform/plugins/memory/mcp/dist/lib/__tests__/schema-validator.test.js.map +1 -1
  32. package/payload/platform/plugins/memory/mcp/dist/lib/conversation-normalisers/index.d.ts +5 -0
  33. package/payload/platform/plugins/memory/mcp/dist/lib/conversation-normalisers/index.d.ts.map +1 -0
  34. package/payload/platform/plugins/memory/mcp/dist/lib/conversation-normalisers/index.js +30 -0
  35. package/payload/platform/plugins/memory/mcp/dist/lib/conversation-normalisers/index.js.map +1 -0
  36. package/payload/platform/plugins/memory/mcp/dist/lib/conversation-normalisers/types.d.ts +48 -0
  37. package/payload/platform/plugins/memory/mcp/dist/lib/conversation-normalisers/types.d.ts.map +1 -0
  38. package/payload/platform/plugins/memory/mcp/dist/lib/conversation-normalisers/types.js +23 -0
  39. package/payload/platform/plugins/memory/mcp/dist/lib/conversation-normalisers/types.js.map +1 -0
  40. package/payload/platform/plugins/memory/mcp/dist/lib/conversation-normalisers/whatsapp-text.d.ts +3 -0
  41. package/payload/platform/plugins/memory/mcp/dist/lib/conversation-normalisers/whatsapp-text.d.ts.map +1 -0
  42. package/payload/platform/plugins/memory/mcp/dist/lib/conversation-normalisers/whatsapp-text.js +237 -0
  43. package/payload/platform/plugins/memory/mcp/dist/lib/conversation-normalisers/whatsapp-text.js.map +1 -0
  44. package/payload/platform/plugins/memory/mcp/dist/lib/conversation-pipeline/delta-cursor.d.ts +11 -0
  45. package/payload/platform/plugins/memory/mcp/dist/lib/conversation-pipeline/delta-cursor.d.ts.map +1 -0
  46. package/payload/platform/plugins/memory/mcp/dist/lib/conversation-pipeline/delta-cursor.js +21 -0
  47. package/payload/platform/plugins/memory/mcp/dist/lib/conversation-pipeline/delta-cursor.js.map +1 -0
  48. package/payload/platform/plugins/memory/mcp/dist/lib/conversation-pipeline/derive-keys.d.ts +16 -0
  49. package/payload/platform/plugins/memory/mcp/dist/lib/conversation-pipeline/derive-keys.d.ts.map +1 -0
  50. package/payload/platform/plugins/memory/mcp/dist/lib/conversation-pipeline/derive-keys.js +39 -0
  51. package/payload/platform/plugins/memory/mcp/dist/lib/conversation-pipeline/derive-keys.js.map +1 -0
  52. package/payload/platform/plugins/memory/mcp/dist/lib/conversation-pipeline/sender-bind.d.ts +17 -0
  53. package/payload/platform/plugins/memory/mcp/dist/lib/conversation-pipeline/sender-bind.d.ts.map +1 -0
  54. package/payload/platform/plugins/memory/mcp/dist/lib/conversation-pipeline/sender-bind.js +90 -0
  55. package/payload/platform/plugins/memory/mcp/dist/lib/conversation-pipeline/sender-bind.js.map +1 -0
  56. package/payload/platform/plugins/memory/mcp/dist/lib/conversation-pipeline/sessionize.d.ts +9 -0
  57. package/payload/platform/plugins/memory/mcp/dist/lib/conversation-pipeline/sessionize.d.ts.map +1 -0
  58. package/payload/platform/plugins/memory/mcp/dist/lib/conversation-pipeline/sessionize.js +32 -0
  59. package/payload/platform/plugins/memory/mcp/dist/lib/conversation-pipeline/sessionize.js.map +1 -0
  60. package/payload/platform/plugins/memory/mcp/dist/lib/conversation-pipeline/to-turn-text.d.ts +3 -0
  61. package/payload/platform/plugins/memory/mcp/dist/lib/conversation-pipeline/to-turn-text.d.ts.map +1 -0
  62. package/payload/platform/plugins/memory/mcp/dist/lib/conversation-pipeline/to-turn-text.js +27 -0
  63. package/payload/platform/plugins/memory/mcp/dist/lib/conversation-pipeline/to-turn-text.js.map +1 -0
  64. package/payload/platform/plugins/memory/mcp/dist/lib/document-chunker.d.ts +45 -0
  65. package/payload/platform/plugins/memory/mcp/dist/lib/document-chunker.d.ts.map +1 -0
  66. package/payload/platform/plugins/memory/mcp/dist/lib/document-chunker.js +125 -0
  67. package/payload/platform/plugins/memory/mcp/dist/lib/document-chunker.js.map +1 -0
  68. package/payload/platform/plugins/memory/mcp/dist/lib/llm-classifier.d.ts +24 -1
  69. package/payload/platform/plugins/memory/mcp/dist/lib/llm-classifier.d.ts.map +1 -1
  70. package/payload/platform/plugins/memory/mcp/dist/lib/llm-classifier.js +293 -33
  71. package/payload/platform/plugins/memory/mcp/dist/lib/llm-classifier.js.map +1 -1
  72. package/payload/platform/plugins/memory/mcp/dist/lib/llm-ranker.d.ts.map +1 -1
  73. package/payload/platform/plugins/memory/mcp/dist/lib/llm-ranker.js +9 -2
  74. package/payload/platform/plugins/memory/mcp/dist/lib/llm-ranker.js.map +1 -1
  75. package/payload/platform/plugins/memory/mcp/dist/lib/schema-validator.d.ts +16 -1
  76. package/payload/platform/plugins/memory/mcp/dist/lib/schema-validator.d.ts.map +1 -1
  77. package/payload/platform/plugins/memory/mcp/dist/lib/schema-validator.js +12 -3
  78. package/payload/platform/plugins/memory/mcp/dist/lib/schema-validator.js.map +1 -1
  79. package/payload/platform/plugins/memory/mcp/dist/tools/__tests__/conversation-normalisers-source-agnosticism.test.d.ts +2 -0
  80. package/payload/platform/plugins/memory/mcp/dist/tools/__tests__/conversation-normalisers-source-agnosticism.test.d.ts.map +1 -0
  81. package/payload/platform/plugins/memory/mcp/dist/tools/__tests__/conversation-normalisers-source-agnosticism.test.js +75 -0
  82. package/payload/platform/plugins/memory/mcp/dist/tools/__tests__/conversation-normalisers-source-agnosticism.test.js.map +1 -0
  83. package/payload/platform/plugins/memory/mcp/dist/tools/__tests__/conversation-normalisers-whatsapp-text.test.d.ts +2 -0
  84. package/payload/platform/plugins/memory/mcp/dist/tools/__tests__/conversation-normalisers-whatsapp-text.test.d.ts.map +1 -0
  85. package/payload/platform/plugins/memory/mcp/dist/tools/__tests__/conversation-normalisers-whatsapp-text.test.js +67 -0
  86. package/payload/platform/plugins/memory/mcp/dist/tools/__tests__/conversation-normalisers-whatsapp-text.test.js.map +1 -0
  87. package/payload/platform/plugins/memory/mcp/dist/tools/__tests__/memory-archive-write.test.js +2 -138
  88. package/payload/platform/plugins/memory/mcp/dist/tools/__tests__/memory-archive-write.test.js.map +1 -1
  89. package/payload/platform/plugins/memory/mcp/dist/tools/__tests__/memory-ingest.test.js +39 -3
  90. package/payload/platform/plugins/memory/mcp/dist/tools/__tests__/memory-ingest.test.js.map +1 -1
  91. package/payload/platform/plugins/memory/mcp/dist/tools/__tests__/profile-update-personfields-open.test.d.ts +2 -0
  92. package/payload/platform/plugins/memory/mcp/dist/tools/__tests__/profile-update-personfields-open.test.d.ts.map +1 -0
  93. package/payload/platform/plugins/memory/mcp/dist/tools/__tests__/profile-update-personfields-open.test.js +148 -0
  94. package/payload/platform/plugins/memory/mcp/dist/tools/__tests__/profile-update-personfields-open.test.js.map +1 -0
  95. package/payload/platform/plugins/memory/mcp/dist/tools/memory-archive-write.d.ts +1 -47
  96. package/payload/platform/plugins/memory/mcp/dist/tools/memory-archive-write.d.ts.map +1 -1
  97. package/payload/platform/plugins/memory/mcp/dist/tools/memory-archive-write.js +9 -318
  98. package/payload/platform/plugins/memory/mcp/dist/tools/memory-archive-write.js.map +1 -1
  99. package/payload/platform/plugins/memory/mcp/dist/tools/memory-ingest.d.ts +7 -0
  100. package/payload/platform/plugins/memory/mcp/dist/tools/memory-ingest.d.ts.map +1 -1
  101. package/payload/platform/plugins/memory/mcp/dist/tools/memory-ingest.js +14 -8
  102. package/payload/platform/plugins/memory/mcp/dist/tools/memory-ingest.js.map +1 -1
  103. package/payload/platform/plugins/memory/mcp/dist/tools/profile-update.d.ts +21 -17
  104. package/payload/platform/plugins/memory/mcp/dist/tools/profile-update.d.ts.map +1 -1
  105. package/payload/platform/plugins/memory/mcp/dist/tools/profile-update.js +77 -37
  106. package/payload/platform/plugins/memory/mcp/dist/tools/profile-update.js.map +1 -1
  107. package/payload/platform/plugins/memory/references/schema-base.md +3 -1
  108. package/payload/platform/plugins/{whatsapp-import/skills/whatsapp-import → memory/skills/conversation-archive}/SKILL.md +45 -36
  109. package/payload/platform/plugins/memory/skills/document-ingest/SKILL.md +59 -6
  110. package/payload/platform/plugins/whatsapp/PLUGIN.md +1 -1
  111. package/payload/platform/scripts/seed-neo4j.sh +9 -8
  112. package/payload/platform/templates/specialists/agents/database-operator.md +7 -14
  113. package/payload/server/chunk-7BO5HDJC.js +10093 -0
  114. package/payload/server/chunk-CUSH3UXP.js +2305 -0
  115. package/payload/server/chunk-EL4DZ56X.js +1116 -0
  116. package/payload/server/chunk-IWNDVGKT.js +10077 -0
  117. package/payload/server/chunk-KC7NUABI.js +654 -0
  118. package/payload/server/chunk-QOJ2D26Z.js +654 -0
  119. package/payload/server/chunk-RC46ZYGT.js +2305 -0
  120. package/payload/server/chunk-WUVXPZIV.js +1116 -0
  121. package/payload/server/client-pool-3TM3SRIA.js +32 -0
  122. package/payload/server/client-pool-7NTEFNVQ.js +32 -0
  123. package/payload/server/cloudflare-task-tracker-4NIODMGL.js +19 -0
  124. package/payload/server/cloudflare-task-tracker-WE77WXSI.js +19 -0
  125. package/payload/server/maxy-edge.js +3 -3
  126. package/payload/server/neo4j-migrations-4XPNJNM6.js +490 -0
  127. package/payload/server/neo4j-migrations-XTQ4WEV6.js +428 -0
  128. package/payload/server/server.js +6 -6
  129. package/payload/platform/plugins/whatsapp-import/PLUGIN.md +0 -48
  130. package/payload/platform/plugins/whatsapp-import/lib/src/__tests__/delta-append.test.ts +0 -163
  131. package/payload/platform/plugins/whatsapp-import/lib/src/__tests__/parse-export-lrm.test.ts +0 -83
  132. package/payload/platform/plugins/whatsapp-import/lib/src/__tests__/parse-export.test.ts +0 -678
  133. package/payload/platform/plugins/whatsapp-import/lib/src/__tests__/sessionize.test.ts +0 -91
  134. package/payload/platform/plugins/whatsapp-import/lib/src/__tests__/to-classifier-input.test.ts +0 -59
  135. package/payload/platform/plugins/whatsapp-import/lib/src/delta-cursor.ts +0 -54
  136. package/payload/platform/plugins/whatsapp-import/lib/src/derive-keys.ts +0 -82
  137. package/payload/platform/plugins/whatsapp-import/lib/src/index.ts +0 -22
  138. package/payload/platform/plugins/whatsapp-import/lib/src/parse-export.ts +0 -471
  139. package/payload/platform/plugins/whatsapp-import/lib/src/sessionize.ts +0 -81
  140. package/payload/platform/plugins/whatsapp-import/lib/src/to-classifier-input.ts +0 -48
  141. package/payload/platform/plugins/whatsapp-import/lib/tsconfig.json +0 -9
  142. package/payload/platform/plugins/whatsapp-import/lib/vitest.config.ts +0 -9
  143. package/payload/platform/plugins/whatsapp-import/skills/whatsapp-import/references/conversation-archive-shape.md +0 -143
  144. package/payload/platform/plugins/whatsapp-import/skills/whatsapp-import/references/export-parse.md +0 -109
@@ -1,678 +0,0 @@
1
- import { describe, it, expect, beforeEach, afterEach } from "vitest";
2
- import { mkdtempSync, rmSync, writeFileSync } from "node:fs";
3
- import { tmpdir } from "node:os";
4
- import { join } from "node:path";
5
- import { parseExport } from "../parse-export.js";
6
-
7
- let workDir: string;
8
-
9
- beforeEach(() => {
10
- workDir = mkdtempSync(join(tmpdir(), "whatsapp-export-parser-"));
11
- });
12
-
13
- afterEach(() => {
14
- rmSync(workDir, { recursive: true, force: true });
15
- });
16
-
17
- function writeChat(name: string, content: string | Buffer): string {
18
- const filePath = join(workDir, name);
19
- writeFileSync(filePath, content);
20
- return filePath;
21
- }
22
-
23
- describe("parseExport — happy path", () => {
24
- it("parses two single-line messages and emits ISO 8601 timestamps with the supplied timezone", () => {
25
- const filePath = writeChat(
26
- "_chat.txt",
27
- [
28
- "[14/03/26, 10:15:23] Joel: Hello",
29
- "[14/03/26, 10:16:01] Sarah: Hi back",
30
- "",
31
- ].join("\n"),
32
- );
33
-
34
- const result = parseExport({
35
- filePath,
36
- accountId: "acct-123",
37
- timezone: "Europe/London",
38
- });
39
-
40
- expect(result.parsedLines).toHaveLength(2);
41
- expect(result.parsedLines[0]).toMatchObject({
42
- senderName: "Joel",
43
- body: "Hello",
44
- sequenceIndex: 0,
45
- });
46
- expect(result.parsedLines[1]).toMatchObject({
47
- senderName: "Sarah",
48
- body: "Hi back",
49
- sequenceIndex: 1,
50
- });
51
- // ISO 8601 with timezone offset.
52
- expect(result.parsedLines[0].dateSent).toMatch(/^2026-03-14T10:15:23(\.\d+)?[+-]\d{2}:\d{2}$/);
53
- expect(result.counters.parsed).toBe(2);
54
- expect(result.counters.systemSkipped).toBe(0);
55
- expect(result.counters.mediaSkipped).toBe(0);
56
- expect(result.counters.parseErrors).toBe(0);
57
- });
58
-
59
- it("emits archiveSourceFile = sha256 of the raw file bytes (with whatsapp-export: prefix)", () => {
60
- const content = "[14/03/26, 10:15:23] Joel: Hello\n";
61
- const filePath = writeChat("_chat.txt", content);
62
-
63
- const result = parseExport({
64
- filePath,
65
- accountId: "acct-123",
66
- timezone: "Europe/London",
67
- });
68
-
69
- // Pre-computed sha256 of the literal content above.
70
- // Compute deterministically rather than hard-coding so the assert reflects the spec, not a magic string.
71
- const { createHash } = require("node:crypto") as typeof import("node:crypto");
72
- const expected = createHash("sha256").update(content).digest("hex");
73
- expect(result.archiveSourceFile).toBe(`whatsapp-export:${expected}`);
74
- });
75
-
76
- it("derives conversationId = whatsapp-export:<sha256>:<accountId>", () => {
77
- const filePath = writeChat("_chat.txt", "[14/03/26, 10:15:23] Joel: Hello\n");
78
- const result = parseExport({
79
- filePath,
80
- accountId: "acct-abc",
81
- timezone: "Europe/London",
82
- });
83
- expect(result.conversationId).toMatch(/^whatsapp-export:[a-f0-9]{64}:acct-abc$/);
84
- });
85
-
86
- it("parsing the same file twice returns identical output (idempotent)", () => {
87
- const filePath = writeChat(
88
- "_chat.txt",
89
- "[14/03/26, 10:15:23] Joel: Hello\n[14/03/26, 10:16:01] Sarah: Hi\n",
90
- );
91
- const a = parseExport({
92
- filePath,
93
- accountId: "acct-123",
94
- timezone: "Europe/London",
95
- });
96
- const b = parseExport({
97
- filePath,
98
- accountId: "acct-123",
99
- timezone: "Europe/London",
100
- });
101
- expect(a).toEqual(b);
102
- });
103
- });
104
-
105
- describe("parseExport — multi-line bodies", () => {
106
- it("accumulates continuation lines into the previous message's body, joined with \\n", () => {
107
- const filePath = writeChat(
108
- "_chat.txt",
109
- [
110
- "[14/03/26, 10:15:23] Joel: Quick question about the deck —",
111
- "do you have the v3 PDF anywhere?",
112
- "I checked Drive and only see v2.",
113
- "[14/03/26, 10:16:01] Sarah: Sec, will dig it out",
114
- "",
115
- ].join("\n"),
116
- );
117
-
118
- const result = parseExport({
119
- filePath,
120
- accountId: "acct-123",
121
- timezone: "Europe/London",
122
- });
123
-
124
- expect(result.parsedLines).toHaveLength(2);
125
- expect(result.parsedLines[0].body).toBe(
126
- "Quick question about the deck —\ndo you have the v3 PDF anywhere?\nI checked Drive and only see v2.",
127
- );
128
- expect(result.parsedLines[1].body).toBe("Sec, will dig it out");
129
- });
130
- });
131
-
132
- describe("parseExport — system messages skipped with counter", () => {
133
- it("skips end-to-end-encrypted header and group-event lines, increments systemSkipped", () => {
134
- // Real WhatsApp exports emit system events WITHOUT a `: ` separator:
135
- // the sender's name flows straight into the verb phrase. Body-level
136
- // events (`You deleted this message.`) keep the colon and are caught by
137
- // a separate body-level filter.
138
- const filePath = writeChat(
139
- "_chat.txt",
140
- [
141
- "[14/03/26, 09:00:00] Messages and calls are end-to-end encrypted. No one outside of this chat, not even WhatsApp, can read or listen to them.",
142
- "[14/03/26, 10:00:00] Joel created group \"Q3 planning\"",
143
- "[14/03/26, 10:00:01] Joel added Sarah",
144
- "[14/03/26, 10:15:23] Joel: Hello",
145
- "[14/03/26, 11:00:00] Sarah left",
146
- "[14/03/26, 11:01:00] Sarah's security code changed.",
147
- "",
148
- ].join("\n"),
149
- );
150
-
151
- const result = parseExport({
152
- filePath,
153
- accountId: "acct-123",
154
- timezone: "Europe/London",
155
- });
156
-
157
- expect(result.parsedLines).toHaveLength(1);
158
- expect(result.parsedLines[0].body).toBe("Hello");
159
- expect(result.counters.systemSkipped).toBeGreaterThanOrEqual(5);
160
- });
161
-
162
- it("skips 'You deleted this message.' and 'This message was deleted.'", () => {
163
- const filePath = writeChat(
164
- "_chat.txt",
165
- [
166
- "[14/03/26, 10:15:23] Joel: You deleted this message.",
167
- "[14/03/26, 10:16:01] Sarah: This message was deleted.",
168
- "[14/03/26, 10:17:01] Joel: Real message",
169
- "",
170
- ].join("\n"),
171
- );
172
-
173
- const result = parseExport({
174
- filePath,
175
- accountId: "acct-123",
176
- timezone: "Europe/London",
177
- });
178
-
179
- expect(result.parsedLines).toHaveLength(1);
180
- expect(result.parsedLines[0].body).toBe("Real message");
181
- expect(result.counters.systemSkipped).toBe(2);
182
- });
183
- });
184
-
185
- describe("parseExport — media-only lines skipped", () => {
186
- it("skips <Media omitted>, IMG-/VID-/PTT-/AUD-/STK- attachments, and PDF attachments", () => {
187
- const filePath = writeChat(
188
- "_chat.txt",
189
- [
190
- "[14/03/26, 10:00:00] Joel: <Media omitted>",
191
- "[14/03/26, 10:01:00] Joel: IMG-20260314-WA0001.jpg (file attached)",
192
- "[14/03/26, 10:02:00] Joel: VID-20260314-WA0002.mp4 (file attached)",
193
- "[14/03/26, 10:03:00] Joel: PTT-20260314-WA0003.opus (file attached)",
194
- "[14/03/26, 10:04:00] Joel: AUD-20260314-WA0004.opus (file attached)",
195
- "[14/03/26, 10:05:00] Joel: STK-20260314-WA0005.webp (file attached)",
196
- "[14/03/26, 10:06:00] Joel: deck-v3.pdf (file attached)",
197
- "[14/03/26, 10:07:00] Joel: contract.docx (file attached)",
198
- "[14/03/26, 10:15:23] Joel: Hello",
199
- "",
200
- ].join("\n"),
201
- );
202
-
203
- const result = parseExport({
204
- filePath,
205
- accountId: "acct-123",
206
- timezone: "Europe/London",
207
- });
208
-
209
- expect(result.parsedLines).toHaveLength(1);
210
- expect(result.parsedLines[0].body).toBe("Hello");
211
- expect(result.counters.mediaSkipped).toBe(8);
212
- });
213
-
214
- it("keeps mixed messages where the body has text plus a media reference", () => {
215
- const filePath = writeChat(
216
- "_chat.txt",
217
- [
218
- "[14/03/26, 10:01:00] Joel: see attached IMG-20260314-WA0001.jpg",
219
- "",
220
- ].join("\n"),
221
- );
222
-
223
- const result = parseExport({
224
- filePath,
225
- accountId: "acct-123",
226
- timezone: "Europe/London",
227
- });
228
-
229
- expect(result.parsedLines).toHaveLength(1);
230
- expect(result.parsedLines[0].body).toBe("see attached IMG-20260314-WA0001.jpg");
231
- expect(result.counters.mediaSkipped).toBe(0);
232
- });
233
- });
234
-
235
- describe("parseExport — empty body skipped with counter", () => {
236
- it("treats an empty body (timestamp + sender + colon + nothing) as a system skip", () => {
237
- const filePath = writeChat(
238
- "_chat.txt",
239
- [
240
- "[14/03/26, 10:15:23] Joel: ",
241
- "[14/03/26, 10:16:01] Sarah: Hi",
242
- "",
243
- ].join("\n"),
244
- );
245
-
246
- const result = parseExport({
247
- filePath,
248
- accountId: "acct-123",
249
- timezone: "Europe/London",
250
- });
251
-
252
- expect(result.parsedLines).toHaveLength(1);
253
- expect(result.parsedLines[0].senderName).toBe("Sarah");
254
- expect(result.counters.systemSkipped).toBe(1);
255
- });
256
- });
257
-
258
- describe("parseExport — encoding + line ending invariants", () => {
259
- it("strips a leading UTF-8 BOM (U+FEFF) before parsing the first line", () => {
260
- const bom = Buffer.from([0xef, 0xbb, 0xbf]);
261
- const body = Buffer.from("[14/03/26, 10:15:23] Joel: Hello\n", "utf8");
262
- const filePath = writeChat("_chat.txt", Buffer.concat([bom, body]));
263
-
264
- const result = parseExport({
265
- filePath,
266
- accountId: "acct-123",
267
- timezone: "Europe/London",
268
- });
269
-
270
- expect(result.parsedLines).toHaveLength(1);
271
- expect(result.parsedLines[0].senderName).toBe("Joel");
272
- });
273
-
274
- it("normalises CRLF line endings to LF before tokenisation", () => {
275
- const filePath = writeChat(
276
- "_chat.txt",
277
- "[14/03/26, 10:15:23] Joel: Hello\r\n[14/03/26, 10:16:01] Sarah: Hi\r\n",
278
- );
279
-
280
- const result = parseExport({
281
- filePath,
282
- accountId: "acct-123",
283
- timezone: "Europe/London",
284
- });
285
-
286
- expect(result.parsedLines).toHaveLength(2);
287
- expect(result.parsedLines[0].body).toBe("Hello");
288
- expect(result.parsedLines[1].body).toBe("Hi");
289
- });
290
- });
291
-
292
- describe("parseExport — sender containing a colon (splits on the FIRST ': ')", () => {
293
- it("preserves the colon inside the sender display name", () => {
294
- const filePath = writeChat(
295
- "_chat.txt",
296
- "[14/03/26, 10:15:23] Joel: Work: today's update — see deck\n",
297
- );
298
- const result = parseExport({
299
- filePath,
300
- accountId: "acct-123",
301
- timezone: "Europe/London",
302
- });
303
- expect(result.parsedLines).toHaveLength(1);
304
- expect(result.parsedLines[0].senderName).toBe("Joel");
305
- expect(result.parsedLines[0].body).toBe("Work: today's update — see deck");
306
- });
307
- });
308
-
309
- describe("parseExport — forwarded messages preserve LRM character", () => {
310
- it("keeps the U+200E LEFT-TO-RIGHT MARK in the body verbatim", () => {
311
- const filePath = writeChat(
312
- "_chat.txt",
313
- "[14/03/26, 10:15:23] Joel: ‎Forwarded\nForwarded body\n",
314
- );
315
- const result = parseExport({
316
- filePath,
317
- accountId: "acct-123",
318
- timezone: "Europe/London",
319
- });
320
- expect(result.parsedLines).toHaveLength(1);
321
- expect(result.parsedLines[0].body).toContain("‎");
322
- });
323
- });
324
-
325
- describe("parseExport — older exports with HH:MM (no seconds)", () => {
326
- it("treats missing seconds as 0", () => {
327
- const filePath = writeChat(
328
- "_chat.txt",
329
- "[14/03/26, 10:15] Joel: Hello\n",
330
- );
331
- const result = parseExport({
332
- filePath,
333
- accountId: "acct-123",
334
- timezone: "Europe/London",
335
- });
336
- expect(result.parsedLines).toHaveLength(1);
337
- expect(result.parsedLines[0].dateSent).toMatch(/^2026-03-14T10:15:00(\.\d+)?[+-]\d{2}:\d{2}$/);
338
- });
339
- });
340
-
341
- describe("parseExport — date format toggle", () => {
342
- it("defaults to DD/MM/YY", () => {
343
- const filePath = writeChat(
344
- "_chat.txt",
345
- "[14/03/26, 10:15:23] Joel: Hello\n",
346
- );
347
- const result = parseExport({
348
- filePath,
349
- accountId: "acct-123",
350
- timezone: "Europe/London",
351
- });
352
- expect(result.parsedLines[0].dateSent).toMatch(/^2026-03-14T/);
353
- });
354
-
355
- it("accepts dateFormat='MM/DD/YY' and parses the alternate ordering", () => {
356
- const filePath = writeChat(
357
- "_chat.txt",
358
- "[03/14/26, 10:15:23] Joel: Hello\n",
359
- );
360
- const result = parseExport({
361
- filePath,
362
- accountId: "acct-123",
363
- timezone: "America/New_York",
364
- dateFormat: "MM/DD/YY",
365
- });
366
- expect(result.parsedLines[0].dateSent).toMatch(/^2026-03-14T/);
367
- });
368
-
369
- it("rejects out-of-range components (month/day) instead of silently rolling over", () => {
370
- // 14/03/26 parsed as MM/DD/YY would have month=14 → Date.UTC silently
371
- // rolls to year+1 / February. The range check turns it back into a
372
- // non-match so the parser falls into "zero parsed lines" rather than
373
- // emitting wrong timestamps.
374
- const filePath = writeChat(
375
- "_chat.txt",
376
- "[14/03/26, 10:15:23] Joel: Hello\n",
377
- );
378
- expect(() =>
379
- parseExport({
380
- filePath,
381
- accountId: "acct-123",
382
- timezone: "Europe/London",
383
- dateFormat: "MM/DD/YY",
384
- }),
385
- ).toThrow(/zero parsed lines/i);
386
- });
387
- });
388
-
389
- describe("parseExport — 4-digit year grammar (Task 845)", () => {
390
- it("parses DD/MM/YYYY with explicit 4-digit dateFormat", () => {
391
- // Joel→Adam Mackay export shape: WhatsApp's modern locale emits 4-digit years.
392
- const filePath = writeChat(
393
- "_chat.txt",
394
- "[20/06/2025, 16:35:12] Joel Smalley: Hello\n",
395
- );
396
- const result = parseExport({
397
- filePath,
398
- accountId: "acct-123",
399
- timezone: "Europe/London",
400
- dateFormat: "DD/MM/YYYY",
401
- });
402
- expect(result.parsedLines).toHaveLength(1);
403
- expect(result.parsedLines[0].senderName).toBe("Joel Smalley");
404
- expect(result.parsedLines[0].body).toBe("Hello");
405
- expect(result.parsedLines[0].dateSent).toMatch(/^2025-06-20T16:35:12/);
406
- });
407
-
408
- it("parses MM/DD/YYYY with explicit 4-digit dateFormat", () => {
409
- const filePath = writeChat(
410
- "_chat.txt",
411
- "[06/20/2025, 16:35:12] Joel: Hello\n",
412
- );
413
- const result = parseExport({
414
- filePath,
415
- accountId: "acct-123",
416
- timezone: "America/New_York",
417
- dateFormat: "MM/DD/YYYY",
418
- });
419
- expect(result.parsedLines).toHaveLength(1);
420
- expect(result.parsedLines[0].dateSent).toMatch(/^2025-06-20T16:35:12/);
421
- });
422
-
423
- it("auto-detects DD/MM/YYYY when dateFormat omitted (4-digit year, default WhatsApp locale)", () => {
424
- // No explicit dateFormat; first matched line `[20/06/2025, ...]` is range-valid
425
- // as DD/MM (day=20, month=6) → DD/MM lock.
426
- const filePath = writeChat(
427
- "_chat.txt",
428
- "[20/06/2025, 16:35:12] Joel: Hello\n",
429
- );
430
- const result = parseExport({
431
- filePath,
432
- accountId: "acct-123",
433
- timezone: "Europe/London",
434
- });
435
- expect(result.parsedLines).toHaveLength(1);
436
- expect(result.parsedLines[0].dateSent).toMatch(/^2025-06-20T/);
437
- });
438
-
439
- it("auto-detects MM/DD/YYYY when first line range-fails as DD/MM (US-locale export)", () => {
440
- // `[06/20/2025, ...]` interpreted DD/MM → day=6, month=20 → range-fail.
441
- // Auto-detect probes DD/MM first; falls back to MM/DD lock.
442
- const filePath = writeChat(
443
- "_chat.txt",
444
- "[06/20/2025, 16:35:12] Joel: Hello\n",
445
- );
446
- const result = parseExport({
447
- filePath,
448
- accountId: "acct-123",
449
- timezone: "America/New_York",
450
- });
451
- expect(result.parsedLines).toHaveLength(1);
452
- expect(result.parsedLines[0].dateSent).toMatch(/^2025-06-20T/);
453
- });
454
-
455
- it("accepts mixed 2-digit and 4-digit years in the same file (regex \\d{2,4})", () => {
456
- const filePath = writeChat(
457
- "_chat.txt",
458
- [
459
- "[20/06/25, 16:35:12] Joel: Two-digit year",
460
- "[21/06/2025, 09:00:00] Joel: Four-digit year",
461
- "",
462
- ].join("\n"),
463
- );
464
- const result = parseExport({
465
- filePath,
466
- accountId: "acct-123",
467
- timezone: "Europe/London",
468
- });
469
- expect(result.parsedLines).toHaveLength(2);
470
- expect(result.parsedLines[0].dateSent).toMatch(/^2025-06-20T16:35:12/);
471
- expect(result.parsedLines[1].dateSent).toMatch(/^2025-06-21T09:00:00/);
472
- });
473
-
474
- it("2-digit year still maps to 2000+yy after grammar widening (regression)", () => {
475
- const filePath = writeChat(
476
- "_chat.txt",
477
- "[14/03/26, 10:15:23] Joel: Hello\n",
478
- );
479
- const result = parseExport({
480
- filePath,
481
- accountId: "acct-123",
482
- timezone: "Europe/London",
483
- });
484
- expect(result.parsedLines[0].dateSent).toMatch(/^2026-03-14T/);
485
- });
486
-
487
- it("4-digit year passes through unchanged (no 2000+ shift)", () => {
488
- const filePath = writeChat(
489
- "_chat.txt",
490
- "[14/03/2099, 10:15:23] Joel: Hello\n",
491
- );
492
- const result = parseExport({
493
- filePath,
494
- accountId: "acct-123",
495
- timezone: "Europe/London",
496
- });
497
- expect(result.parsedLines[0].dateSent).toMatch(/^2099-03-14T/);
498
- });
499
-
500
- it("rejects 3-digit years (truncation typos, hand-edited files) — silent year-202-AD coercion is the failure mode this guards against", () => {
501
- // Without the explicit (\d{4}|\d{2}) alternation, the greedy `\d{2,4}`
502
- // would match "202" and the year-length branch would emit year 202 AD
503
- // timestamps that pass downstream validation but are clearly wrong.
504
- // Reject as not-a-prefix; the file falls into the parse-grammar-miss
505
- // diagnostic path so the operator sees the offending header shape.
506
- const filePath = writeChat(
507
- "_chat.txt",
508
- "[14/03/202, 10:15:23] Joel: Hello\n",
509
- );
510
- expect(() =>
511
- parseExport({
512
- filePath,
513
- accountId: "acct-123",
514
- timezone: "Europe/London",
515
- }),
516
- ).toThrow(/parse-grammar-miss first-line="\[14\/03\/202, 10:15:23\] Joel: Hello"/);
517
- });
518
- });
519
-
520
- describe("parseExport — parse-grammar-miss diagnostic (Task 845)", () => {
521
- it("includes a sanitised first-line sample in the thrown error when no prefix matches", () => {
522
- // Junk file with no timestamp prefixes → zero-parsed-lines path.
523
- // The thrown error must include the first non-blank line so the operator
524
- // knows WHY the file rejected, instead of guessing as in conversation
525
- // 47c6a590-0c2c-4006-9aca-6ee9ec93c95f.
526
- const filePath = writeChat(
527
- "_chat.txt",
528
- "this is some random text\nwith no timestamp prefixes\nat all.\n",
529
- );
530
- expect(() =>
531
- parseExport({
532
- filePath,
533
- accountId: "acct-123",
534
- timezone: "Europe/London",
535
- }),
536
- ).toThrow(/parse-grammar-miss first-line="this is some random text"/);
537
- });
538
-
539
- it("truncates the first-line sample to 80 characters", () => {
540
- const longLine = "x".repeat(200);
541
- const filePath = writeChat("_chat.txt", longLine + "\n");
542
- expect(() =>
543
- parseExport({
544
- filePath,
545
- accountId: "acct-123",
546
- timezone: "Europe/London",
547
- }),
548
- ).toThrow(/parse-grammar-miss first-line="x{80}"/);
549
- });
550
-
551
- it("strips control characters from the first-line sample", () => {
552
- // Tab, bell, and other control chars should not leak into the diagnostic.
553
- const filePath = writeChat("_chat.txt", "junk\twith\x07control\x01chars\n");
554
- expect(() =>
555
- parseExport({
556
- filePath,
557
- accountId: "acct-123",
558
- timezone: "Europe/London",
559
- }),
560
- ).toThrow(/parse-grammar-miss first-line="junkwithcontrolchars"/);
561
- });
562
- });
563
-
564
- describe("parseExport — LOUD-FAIL scenarios", () => {
565
- it("throws when the file is empty", () => {
566
- const filePath = writeChat("_chat.txt", "");
567
- expect(() =>
568
- parseExport({
569
- filePath,
570
- accountId: "acct-123",
571
- timezone: "Europe/London",
572
- }),
573
- ).toThrow(/empty|zero parsed lines|not.*_chat\.txt/i);
574
- });
575
-
576
- it("throws when no lines parse (file isn't a _chat.txt)", () => {
577
- const filePath = writeChat(
578
- "_chat.txt",
579
- "this is some random text\nwith no timestamp prefixes\nat all.\n",
580
- );
581
- expect(() =>
582
- parseExport({
583
- filePath,
584
- accountId: "acct-123",
585
- timezone: "Europe/London",
586
- }),
587
- ).toThrow(/zero parsed lines|not.*_chat\.txt/i);
588
- });
589
-
590
- it("throws when a timestamp prefix matches but the body parse fails (no ': ' separator)", () => {
591
- const filePath = writeChat(
592
- "_chat.txt",
593
- [
594
- "[14/03/26, 10:15:23] Joel: Hello",
595
- "[14/03/26, 10:16:01] BrokenLineWithoutColonSeparator",
596
- "[14/03/26, 10:17:01] Sarah: Hi",
597
- "",
598
- ].join("\n"),
599
- );
600
-
601
- expect(() =>
602
- parseExport({
603
- filePath,
604
- accountId: "acct-123",
605
- timezone: "Europe/London",
606
- }),
607
- ).toThrow(/parse-error.*line=2|line 2|malformed/i);
608
- });
609
-
610
- it("throws when accountId is missing or empty", () => {
611
- const filePath = writeChat(
612
- "_chat.txt",
613
- "[14/03/26, 10:15:23] Joel: Hello\n",
614
- );
615
- expect(() =>
616
- parseExport({
617
- filePath,
618
- accountId: "",
619
- timezone: "Europe/London",
620
- }),
621
- ).toThrow(/accountId/i);
622
- });
623
-
624
- it("throws when timezone is missing or empty", () => {
625
- const filePath = writeChat(
626
- "_chat.txt",
627
- "[14/03/26, 10:15:23] Joel: Hello\n",
628
- );
629
- expect(() =>
630
- parseExport({
631
- filePath,
632
- accountId: "acct-123",
633
- timezone: "",
634
- }),
635
- ).toThrow(/timezone/i);
636
- });
637
- });
638
-
639
- describe("parseExport — sequenceIndex monotonicity", () => {
640
- it("assigns sequenceIndex by parsed-message order, starting at 0", () => {
641
- const filePath = writeChat(
642
- "_chat.txt",
643
- [
644
- "[14/03/26, 10:15:23] Joel: A",
645
- "[14/03/26, 10:15:24] Joel: <Media omitted>",
646
- "[14/03/26, 10:15:25] Sarah: B",
647
- "[14/03/26, 10:15:26] Sarah: C",
648
- "",
649
- ].join("\n"),
650
- );
651
-
652
- const result = parseExport({
653
- filePath,
654
- accountId: "acct-123",
655
- timezone: "Europe/London",
656
- });
657
-
658
- expect(result.parsedLines.map((p) => p.sequenceIndex)).toEqual([0, 1, 2]);
659
- expect(result.parsedLines.map((p) => p.body)).toEqual(["A", "B", "C"]);
660
- expect(result.counters.mediaSkipped).toBe(1);
661
- });
662
- });
663
-
664
- describe("parseExport — phone-number senders", () => {
665
- it("accepts phone-number-style senderNames verbatim (no normalisation)", () => {
666
- const filePath = writeChat(
667
- "_chat.txt",
668
- "[14/03/26, 10:15:23] +44 7700 900123: Hello\n",
669
- );
670
- const result = parseExport({
671
- filePath,
672
- accountId: "acct-123",
673
- timezone: "Europe/London",
674
- });
675
- expect(result.parsedLines).toHaveLength(1);
676
- expect(result.parsedLines[0].senderName).toBe("+44 7700 900123");
677
- });
678
- });