@nathanvale/chatline 0.2.1 → 0.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.js ADDED
@@ -0,0 +1,1729 @@
1
+ // src/config/generator.ts
2
+ import { constants } from "node:fs";
3
+ import { access, writeFile } from "node:fs/promises";
4
+ import yaml from "js-yaml";
5
+
6
+ // src/config/schema.ts
7
+ import { z } from "zod";
8
+ var GeminiConfigSchema = z.object({
9
+ apiKey: z.string().min(1, "Gemini API key is required"),
10
+ model: z.string().default("gemini-1.5-pro"),
11
+ rateLimitDelay: z.number().min(0).default(1000),
12
+ maxRetries: z.number().min(0).max(10).default(3)
13
+ });
14
+ var FirecrawlConfigSchema = z.object({
15
+ apiKey: z.string().optional(),
16
+ enabled: z.boolean().default(true)
17
+ }).optional();
18
+ var EnrichmentConfigSchema = z.object({
19
+ enableVisionAnalysis: z.boolean().default(true),
20
+ enableAudioTranscription: z.boolean().default(true),
21
+ enableLinkEnrichment: z.boolean().default(true),
22
+ imageCacheDir: z.string().default("./.cache/images"),
23
+ checkpointInterval: z.number().min(1).max(1e4).default(100),
24
+ forceRefresh: z.boolean().default(false)
25
+ });
26
+ var RenderConfigSchema = z.object({
27
+ groupByTimeOfDay: z.boolean().default(true),
28
+ renderRepliesAsNested: z.boolean().default(true),
29
+ renderTapbacksAsEmoji: z.boolean().default(true),
30
+ maxNestingDepth: z.number().min(1).max(100).default(10)
31
+ });
32
+ var ConfigSchema = z.object({
33
+ version: z.string().default("1.0"),
34
+ attachmentRoots: z.array(z.string().min(1, "Attachment root path cannot be empty")).min(1, "At least one attachment root is required").default(["~/Library/Messages/Attachments"]),
35
+ gemini: GeminiConfigSchema,
36
+ firecrawl: FirecrawlConfigSchema,
37
+ enrichment: EnrichmentConfigSchema.default({
38
+ enableVisionAnalysis: true,
39
+ enableAudioTranscription: true,
40
+ enableLinkEnrichment: true,
41
+ imageCacheDir: "./.cache/images",
42
+ checkpointInterval: 100,
43
+ forceRefresh: false
44
+ }),
45
+ render: RenderConfigSchema.default({
46
+ groupByTimeOfDay: true,
47
+ renderRepliesAsNested: true,
48
+ renderTapbacksAsEmoji: true,
49
+ maxNestingDepth: 10
50
+ })
51
+ });
52
+ function validateConfig(config) {
53
+ return ConfigSchema.parse(config);
54
+ }
55
+ function validateConfigSafe(config) {
56
+ const result = ConfigSchema.safeParse(config);
57
+ if (result.success) {
58
+ return { success: true, data: result.data };
59
+ }
60
+ return {
61
+ success: false,
62
+ errors: result.error.errors.map((err) => ({
63
+ path: err.path.join("."),
64
+ message: err.message
65
+ }))
66
+ };
67
+ }
68
+ var DEFAULT_CONFIG = {
69
+ version: "1.0",
70
+ attachmentRoots: ["~/Library/Messages/Attachments"],
71
+ enrichment: {
72
+ enableVisionAnalysis: true,
73
+ enableAudioTranscription: true,
74
+ enableLinkEnrichment: true,
75
+ imageCacheDir: "./.cache/images",
76
+ checkpointInterval: 100,
77
+ forceRefresh: false
78
+ },
79
+ render: {
80
+ groupByTimeOfDay: true,
81
+ renderRepliesAsNested: true,
82
+ renderTapbacksAsEmoji: true,
83
+ maxNestingDepth: 10
84
+ }
85
+ };
86
+ var CONFIG_FILE_PATTERNS = [
87
+ "./imessage-config.yaml",
88
+ "./imessage-config.yml",
89
+ "./imessage-config.json"
90
+ ];
91
+ function detectConfigFormat(filePath) {
92
+ if (filePath.endsWith(".json")) {
93
+ return "json";
94
+ }
95
+ if (filePath.endsWith(".yaml") || filePath.endsWith(".yml")) {
96
+ return "yaml";
97
+ }
98
+ throw new Error(`Unsupported config file format: ${filePath}. Supported formats: .json, .yaml, .yml`);
99
+ }
100
+
101
+ // src/config/generator.ts
102
+ var CONFIG_TEMPLATE_WITH_DOCS = `# iMessage Timeline Configuration File
103
+ # Generated by: chatline init
104
+ #
105
+ # This file configures the iMessage message export and enrichment pipeline.
106
+ # Supports both YAML and JSON formats with environment variable substitution.
107
+
108
+ # Schema version (for future migrations)
109
+ version: "1.0"
110
+
111
+ # ============================================================================
112
+ # Attachment Resolution
113
+ # ============================================================================
114
+
115
+ # Directories to search for message attachments
116
+ # Multiple paths supported; first match wins
117
+ # Environment variable expansion supported: \${HOME}/Library/Messages
118
+ attachmentRoots:
119
+ - "~/Library/Messages/Attachments"
120
+ # Add additional search paths if needed:
121
+ # - "/path/to/imazing/backup/attachments"
122
+
123
+ # ============================================================================
124
+ # Gemini AI Configuration
125
+ # ============================================================================
126
+
127
+ gemini:
128
+ # Google Gemini API key (required for AI enrichment)
129
+ # Get your key from: https://makersuite.google.com/app/apikey
130
+ # Environment variable recommended for security: \${GEMINI_API_KEY}
131
+ apiKey: "\${GEMINI_API_KEY}"
132
+
133
+ # Gemini model to use for enrichment
134
+ # Options: gemini-1.5-pro, gemini-1.5-flash
135
+ # Default: gemini-1.5-pro (more accurate, slower)
136
+ model: "gemini-1.5-pro"
137
+
138
+ # Delay between API calls (milliseconds)
139
+ # Prevents rate limiting; adjust based on your quota
140
+ # Free tier: 60 requests/minute = 1000ms delay minimum
141
+ # Default: 1000 (1 second)
142
+ rateLimitDelay: 1000
143
+
144
+ # Maximum retries for failed API calls
145
+ # Applies exponential backoff with jitter
146
+ # Range: 0-10
147
+ # Default: 3
148
+ maxRetries: 3
149
+
150
+ # ============================================================================
151
+ # Firecrawl Configuration (Optional)
152
+ # ============================================================================
153
+
154
+ # firecrawl:
155
+ # # Firecrawl API key for enhanced link scraping (optional)
156
+ # # Get your key from: https://firecrawl.dev
157
+ # # Falls back to built-in scrapers if not provided
158
+ # apiKey: "\${FIRECRAWL_API_KEY}"
159
+ #
160
+ # # Enable/disable Firecrawl for link enrichment
161
+ # # Default: true (if apiKey provided)
162
+ # enabled: true
163
+
164
+ # ============================================================================
165
+ # Enrichment Pipeline Configuration
166
+ # ============================================================================
167
+
168
+ enrichment:
169
+ # Enable image analysis (captions, descriptions via Gemini Vision)
170
+ # HEIC/TIFF files converted to JPG for analysis
171
+ # Default: true
172
+ enableVisionAnalysis: true
173
+
174
+ # Enable audio transcription (voice memos, audio messages)
175
+ # Includes timestamps, speaker detection, and summaries
176
+ # Default: true
177
+ enableAudioTranscription: true
178
+
179
+ # Enable link context extraction (titles, summaries, metadata)
180
+ # Uses Firecrawl if configured, falls back to built-in scrapers
181
+ # Default: true
182
+ enableLinkEnrichment: true
183
+
184
+ # Cache directory for image previews (HEIC/TIFF → JPG)
185
+ # Prevents redundant conversions on re-runs
186
+ # Default: ./.cache/images
187
+ imageCacheDir: "./.cache/images"
188
+
189
+ # Checkpoint interval (messages per checkpoint)
190
+ # Lower = more frequent checkpoints = safer but slower
191
+ # Higher = faster but more work lost on interruption
192
+ # Range: 1-10000
193
+ # Default: 100
194
+ checkpointInterval: 100
195
+
196
+ # Force refresh of existing enrichments
197
+ # If true, re-enriches messages even if already enriched
198
+ # Default: false (skip already-enriched messages)
199
+ forceRefresh: false
200
+
201
+ # ============================================================================
202
+ # Markdown Rendering Configuration
203
+ # ============================================================================
204
+
205
+ render:
206
+ # Group messages by time-of-day (Morning, Afternoon, Evening)
207
+ # If false, renders as flat chronological list
208
+ # Default: true
209
+ groupByTimeOfDay: true
210
+
211
+ # Render replies as nested blockquotes under parent message
212
+ # If false, renders as flat list with reply indicators
213
+ # Default: true
214
+ renderRepliesAsNested: true
215
+
216
+ # Render tapbacks (reactions) as emoji (❤️, \uD83D\uDE02, etc.)
217
+ # If false, renders as text descriptions
218
+ # Default: true
219
+ renderTapbacksAsEmoji: true
220
+
221
+ # Maximum nesting depth for nested replies
222
+ # Prevents infinite recursion in circular reply chains
223
+ # Range: 1-100
224
+ # Default: 10
225
+ maxNestingDepth: 10
226
+ `;
227
+ var CONFIG_TEMPLATE_JSON = {
228
+ _comment: "iMessage Timeline Configuration File",
229
+ _generated: "Generated by: chatline init",
230
+ _docs: "Full documentation: https://github.com/your-repo/chatline",
231
+ version: "1.0",
232
+ attachmentRoots: ["~/Library/Messages/Attachments"],
233
+ gemini: {
234
+ _comment: "Get API key from: https://makersuite.google.com/app/apikey",
235
+ apiKey: "${GEMINI_API_KEY}",
236
+ model: "gemini-1.5-pro",
237
+ rateLimitDelay: 1000,
238
+ maxRetries: 3
239
+ },
240
+ enrichment: {
241
+ enableVisionAnalysis: true,
242
+ enableAudioTranscription: true,
243
+ enableLinkEnrichment: true,
244
+ imageCacheDir: "./.cache/images",
245
+ checkpointInterval: 100,
246
+ forceRefresh: false
247
+ },
248
+ render: {
249
+ groupByTimeOfDay: true,
250
+ renderRepliesAsNested: true,
251
+ renderTapbacksAsEmoji: true,
252
+ maxNestingDepth: 10
253
+ }
254
+ };
255
+ function generateConfigContent(format) {
256
+ if (format === "yaml") {
257
+ return CONFIG_TEMPLATE_WITH_DOCS;
258
+ }
259
+ return `${JSON.stringify(CONFIG_TEMPLATE_JSON, null, 2)}
260
+ `;
261
+ }
262
+ function validateGeneratedConfig(content, format) {
263
+ try {
264
+ let parsed;
265
+ if (format === "json") {
266
+ parsed = JSON.parse(content);
267
+ } else {
268
+ parsed = yaml.load(content);
269
+ }
270
+ validateConfig(parsed);
271
+ return { valid: true };
272
+ } catch (error) {
273
+ if (error instanceof Error) {
274
+ return {
275
+ valid: false,
276
+ errors: [error.message]
277
+ };
278
+ }
279
+ return {
280
+ valid: false,
281
+ errors: ["Unknown validation error"]
282
+ };
283
+ }
284
+ }
285
+ function getDefaultConfigPath(format) {
286
+ return format === "yaml" ? "./imessage-config.yaml" : "./imessage-config.json";
287
+ }
288
+ // src/config/loader.ts
289
+ import { constants as constants2 } from "node:fs";
290
+ import { access as access2, readFile } from "node:fs/promises";
291
+ import yaml2 from "js-yaml";
292
+ var configCache = null;
293
+ var configCachePath = null;
294
+ async function discoverConfigFile(baseDir = process.cwd()) {
295
+ const fileNames = [
296
+ "imessage-config.yaml",
297
+ "imessage-config.yml",
298
+ "imessage-config.json"
299
+ ];
300
+ for (const fileName of fileNames) {
301
+ const filePath = baseDir.startsWith("/") ? `${baseDir}/${fileName}` : `./${baseDir}/${fileName}`.replace(/\/\.\//g, "/");
302
+ try {
303
+ await access2(filePath, constants2.R_OK);
304
+ return filePath;
305
+ } catch {}
306
+ }
307
+ return null;
308
+ }
309
+ async function loadConfigFile(filePath) {
310
+ const content = await readFile(filePath, "utf-8");
311
+ const format = detectConfigFormat(filePath);
312
+ if (format === "json") {
313
+ try {
314
+ return JSON.parse(content);
315
+ } catch (error) {
316
+ throw new Error(`Failed to parse JSON config file ${filePath}: ${error instanceof Error ? error.message : String(error)}`);
317
+ }
318
+ } else if (format === "yaml") {
319
+ try {
320
+ return yaml2.load(content, { schema: yaml2.JSON_SCHEMA });
321
+ } catch (error) {
322
+ throw new Error(`Failed to parse YAML config file ${filePath}: ${error instanceof Error ? error.message : String(error)}`);
323
+ }
324
+ }
325
+ throw new Error(`Unsupported config format: ${filePath}`);
326
+ }
327
+ function substituteEnvVars(obj) {
328
+ if (typeof obj === "string") {
329
+ return obj.replace(/\$\{(\w+)\}/g, (_match, envVar) => {
330
+ const value = process.env[envVar];
331
+ if (value === undefined) {
332
+ throw new Error(`Environment variable ${envVar} is not set but referenced in config`);
333
+ }
334
+ return value;
335
+ });
336
+ }
337
+ if (Array.isArray(obj)) {
338
+ return obj.map(substituteEnvVars);
339
+ }
340
+ if (typeof obj === "object" && obj !== null) {
341
+ return Object.fromEntries(Object.entries(obj).map(([key, value]) => [
342
+ key,
343
+ substituteEnvVars(value)
344
+ ]));
345
+ }
346
+ return obj;
347
+ }
348
+ function mergeConfig(fileConfig, cliOptions = {}) {
349
+ const merged = {
350
+ ...fileConfig,
351
+ ...cliOptions
352
+ };
353
+ if (fileConfig.gemini || cliOptions.gemini) {
354
+ merged.gemini = {
355
+ ...fileConfig.gemini,
356
+ ...cliOptions.gemini
357
+ };
358
+ }
359
+ if (cliOptions.firecrawl !== undefined) {
360
+ merged.firecrawl = cliOptions.firecrawl;
361
+ } else if (fileConfig.firecrawl !== undefined) {
362
+ merged.firecrawl = fileConfig.firecrawl;
363
+ }
364
+ if (fileConfig.enrichment || cliOptions.enrichment) {
365
+ merged.enrichment = {
366
+ ...fileConfig.enrichment,
367
+ ...cliOptions.enrichment
368
+ };
369
+ }
370
+ if (fileConfig.render || cliOptions.render) {
371
+ merged.render = {
372
+ ...fileConfig.render,
373
+ ...cliOptions.render
374
+ };
375
+ }
376
+ return merged;
377
+ }
378
+ async function loadConfig(options = {}) {
379
+ const { configPath, cliOptions = {}, skipCache = false } = options;
380
+ if (!skipCache && configCache && configCachePath === configPath) {
381
+ return configCache;
382
+ }
383
+ const filePath = configPath || await discoverConfigFile();
384
+ let fileConfig = {};
385
+ if (filePath) {
386
+ try {
387
+ const rawConfig = await loadConfigFile(filePath);
388
+ const withEnvVars = substituteEnvVars(rawConfig);
389
+ fileConfig = withEnvVars;
390
+ } catch (error) {
391
+ throw new Error(`Failed to load config from ${filePath}: ${error instanceof Error ? error.message : String(error)}`);
392
+ }
393
+ }
394
+ const merged = mergeConfig(fileConfig, cliOptions);
395
+ try {
396
+ const validated = validateConfig(merged);
397
+ configCache = validated;
398
+ configCachePath = configPath || null;
399
+ return validated;
400
+ } catch (error) {
401
+ throw new Error(`Config validation failed: ${error instanceof Error ? error.message : String(error)}`);
402
+ }
403
+ }
404
+ function clearConfigCache() {
405
+ configCache = null;
406
+ configCachePath = null;
407
+ }
408
+ function isConfigCached() {
409
+ return configCache !== null;
410
+ }
411
+ // src/enrich/rate-limiting.ts
412
+ class RateLimiter {
413
+ config;
414
+ state;
415
+ constructor(partialConfig) {
416
+ this.config = {
417
+ rateLimitDelay: partialConfig?.rateLimitDelay ?? 1000,
418
+ maxRetries: partialConfig?.maxRetries ?? 3,
419
+ circuitBreakerThreshold: partialConfig?.circuitBreakerThreshold ?? 5,
420
+ circuitBreakerResetMs: partialConfig?.circuitBreakerResetMs ?? 60000
421
+ };
422
+ this.validateConfig(this.config);
423
+ this.state = {
424
+ consecutiveFailures: 0,
425
+ circuitOpen: false,
426
+ circuitOpenedAt: null,
427
+ lastCallTime: null
428
+ };
429
+ }
430
+ validateConfig(config) {
431
+ if (config.rateLimitDelay < 0)
432
+ throw new Error("rateLimitDelay must be non-negative");
433
+ if (config.maxRetries < 0)
434
+ throw new Error("maxRetries must be non-negative");
435
+ if (config.circuitBreakerThreshold < 1)
436
+ throw new Error("circuitBreakerThreshold must be >= 1");
437
+ if (config.circuitBreakerResetMs < 0)
438
+ throw new Error("circuitBreakerResetMs must be non-negative");
439
+ }
440
+ shouldRateLimit() {
441
+ if (this.state.lastCallTime === null) {
442
+ return 0;
443
+ }
444
+ const timeSinceLastCall = Date.now() - this.state.lastCallTime;
445
+ const requiredDelay = this.config.rateLimitDelay;
446
+ if (timeSinceLastCall < requiredDelay) {
447
+ return requiredDelay - timeSinceLastCall;
448
+ }
449
+ return 0;
450
+ }
451
+ recordCall() {
452
+ this.state.lastCallTime = Date.now();
453
+ }
454
+ calculateExponentialBackoff(attemptNumber) {
455
+ const baseDelaySeconds = 2 ** attemptNumber;
456
+ const baseDelayMs = baseDelaySeconds * 1000;
457
+ const jitterAmount = baseDelayMs * 0.25;
458
+ const jitter = (Math.random() - 0.5) * 2 * jitterAmount;
459
+ return baseDelayMs + jitter;
460
+ }
461
+ parseRetryAfterHeader(retryAfterValue) {
462
+ if (retryAfterValue === undefined || retryAfterValue === null) {
463
+ return null;
464
+ }
465
+ if (typeof retryAfterValue === "number") {
466
+ return retryAfterValue * 1000;
467
+ }
468
+ const strValue = String(retryAfterValue).trim();
469
+ const seconds = Number.parseInt(strValue, 10);
470
+ if (!Number.isNaN(seconds) && seconds >= 0) {
471
+ return seconds * 1000;
472
+ }
473
+ try {
474
+ const date = new Date(strValue);
475
+ if (!Number.isNaN(date.getTime())) {
476
+ const delayMs = date.getTime() - Date.now();
477
+ return Math.max(0, delayMs);
478
+ }
479
+ } catch {}
480
+ return null;
481
+ }
482
+ getRetryStrategy(response, attemptNumber) {
483
+ const { status, headers } = response;
484
+ if (status >= 200 && status < 300) {
485
+ return { shouldRetry: false, delayMs: 0 };
486
+ }
487
+ const isRetryableStatus = status === 429 || status >= 500 && status < 600;
488
+ if (!isRetryableStatus) {
489
+ return { shouldRetry: false, delayMs: 0 };
490
+ }
491
+ const retryAfterMs = this.parseRetryAfterHeader(headers?.["Retry-After"]);
492
+ if (retryAfterMs !== null) {
493
+ return { shouldRetry: true, delayMs: retryAfterMs };
494
+ }
495
+ const backoffMs = this.calculateExponentialBackoff(attemptNumber);
496
+ return { shouldRetry: true, delayMs: backoffMs };
497
+ }
498
+ shouldRetryAttempt(attemptNumber) {
499
+ return attemptNumber <= this.config.maxRetries;
500
+ }
501
+ isCircuitOpen() {
502
+ if (!this.state.circuitOpen) {
503
+ return false;
504
+ }
505
+ const timeSinceOpened = Date.now() - (this.state.circuitOpenedAt ?? 0);
506
+ if (timeSinceOpened >= this.config.circuitBreakerResetMs) {
507
+ this.resetCircuitBreaker();
508
+ return false;
509
+ }
510
+ return true;
511
+ }
512
+ recordFailure() {
513
+ this.state.consecutiveFailures += 1;
514
+ if (this.state.consecutiveFailures >= this.config.circuitBreakerThreshold) {
515
+ this.state.circuitOpen = true;
516
+ this.state.circuitOpenedAt = Date.now();
517
+ }
518
+ }
519
+ recordSuccess() {
520
+ this.state.consecutiveFailures = 0;
521
+ this.state.circuitOpen = false;
522
+ this.state.circuitOpenedAt = null;
523
+ }
524
+ resetCircuitBreaker() {
525
+ this.state.consecutiveFailures = 0;
526
+ this.state.circuitOpen = false;
527
+ this.state.circuitOpenedAt = null;
528
+ }
529
+ getState() {
530
+ return { ...this.state };
531
+ }
532
+ getConfig() {
533
+ return { ...this.config };
534
+ }
535
+ reset() {
536
+ this.state = {
537
+ consecutiveFailures: 0,
538
+ circuitOpen: false,
539
+ circuitOpenedAt: null,
540
+ lastCallTime: null
541
+ };
542
+ }
543
+ }
544
+ function createRateLimiter(config) {
545
+ return new RateLimiter(config);
546
+ }
547
+ function is5xx(status) {
548
+ return status >= 500 && status < 600;
549
+ }
550
+ function isRetryableStatus(status) {
551
+ return status === 429 || is5xx(status);
552
+ }
553
+ // src/ingest/dedup-merge.ts
554
+ function dedupAndMerge(csvMessages, dbMessages) {
555
+ const sortedCsv = [...csvMessages].sort((a, b) => a.guid.localeCompare(b.guid));
556
+ const sortedDb = [...dbMessages].sort((a, b) => a.guid.localeCompare(b.guid));
557
+ const stats = {
558
+ csvCount: csvMessages.length,
559
+ dbCount: dbMessages.length,
560
+ outputCount: 0,
561
+ exactMatches: 0,
562
+ contentMatches: 0,
563
+ conflicts: 0,
564
+ noMatches: 0
565
+ };
566
+ const outputMessages = [];
567
+ const matchedDbGuids = new Set;
568
+ const dbByGuid = new Map;
569
+ const dbByNormalizedContent = new Map;
570
+ for (const dbMsg of sortedDb) {
571
+ dbByGuid.set(dbMsg.guid, dbMsg);
572
+ if (dbMsg.messageKind === "text" && dbMsg.text) {
573
+ const normalizedKey = `${dbMsg.handle || ""}:${normalizeTextForIndex(dbMsg.text)}`;
574
+ const existing = dbByNormalizedContent.get(normalizedKey);
575
+ if (existing) {
576
+ existing.push(dbMsg);
577
+ } else {
578
+ dbByNormalizedContent.set(normalizedKey, [dbMsg]);
579
+ }
580
+ }
581
+ }
582
+ for (const csvMsg of sortedCsv) {
583
+ const exactMatch = dbByGuid.get(csvMsg.guid) || null;
584
+ if (exactMatch) {
585
+ const merged = applyDbAuthoritiveness(csvMsg, exactMatch);
586
+ outputMessages.push(merged);
587
+ matchedDbGuids.add(exactMatch.guid);
588
+ stats.exactMatches++;
589
+ } else {
590
+ let contentMatch = detectContentEquivalenceIndexed(csvMsg, dbByNormalizedContent, matchedDbGuids);
591
+ if (!contentMatch && csvMsg.messageKind !== "text") {
592
+ const unmatchedDbMessages = sortedDb.filter((dbMsg) => !matchedDbGuids.has(dbMsg.guid));
593
+ contentMatch = detectContentEquivalence(csvMsg, unmatchedDbMessages);
594
+ }
595
+ if (contentMatch) {
596
+ const merged = applyDbAuthoritiveness(csvMsg, contentMatch.message);
597
+ outputMessages.push(merged);
598
+ matchedDbGuids.add(contentMatch.message.guid);
599
+ stats.contentMatches++;
600
+ } else {
601
+ outputMessages.push(csvMsg);
602
+ stats.noMatches++;
603
+ }
604
+ }
605
+ }
606
+ for (const dbMsg of sortedDb) {
607
+ if (!matchedDbGuids.has(dbMsg.guid)) {
608
+ outputMessages.push(dbMsg);
609
+ }
610
+ }
611
+ stats.outputCount = outputMessages.length;
612
+ return {
613
+ messages: outputMessages,
614
+ stats
615
+ };
616
+ }
617
+ function detectContentEquivalence(csvMsg, candidates, threshold = 0.9) {
618
+ for (const candidate of candidates) {
619
+ const reasons = [];
620
+ let confidence = 0;
621
+ if (csvMsg.messageKind !== candidate.messageKind) {
622
+ continue;
623
+ }
624
+ const csvHandle = csvMsg.handle || null;
625
+ const candidateHandle = candidate.handle || null;
626
+ if (csvHandle !== candidateHandle) {
627
+ continue;
628
+ }
629
+ if (csvMsg.messageKind === "text" && candidate.messageKind === "text") {
630
+ const csvText = normalizeText(csvMsg.text || "");
631
+ const candidateText = normalizeText(candidate.text || "");
632
+ if (csvText === candidateText) {
633
+ confidence = 1;
634
+ reasons.push("exact text match after normalization");
635
+ } else {
636
+ continue;
637
+ }
638
+ } else if (csvMsg.messageKind === "media" && candidate.messageKind === "media") {
639
+ const csvMediaId = csvMsg.media?.id;
640
+ const candidateMediaId = candidate.media?.id;
641
+ if (csvMediaId && candidateMediaId && csvMediaId === candidateMediaId) {
642
+ confidence = 1;
643
+ reasons.push("exact media ID match");
644
+ } else {
645
+ continue;
646
+ }
647
+ } else {
648
+ continue;
649
+ }
650
+ if (confidence >= threshold) {
651
+ return {
652
+ message: candidate,
653
+ confidence,
654
+ reasons
655
+ };
656
+ }
657
+ }
658
+ return null;
659
+ }
660
+ function normalizeText(text) {
661
+ return text.toLowerCase().trim().replace(/[^\w\s]/g, "").replace(/\s+/g, " ").trim();
662
+ }
663
+ function normalizeTextForIndex(text) {
664
+ return normalizeText(text);
665
+ }
666
+ function detectContentEquivalenceIndexed(csvMsg, contentIndex, matchedGuids) {
667
+ if (csvMsg.messageKind !== "text" || !csvMsg.text) {
668
+ return null;
669
+ }
670
+ const normalizedKey = `${csvMsg.handle || ""}:${normalizeTextForIndex(csvMsg.text)}`;
671
+ const candidates = contentIndex.get(normalizedKey);
672
+ if (!candidates || candidates.length === 0) {
673
+ return null;
674
+ }
675
+ for (const candidate of candidates) {
676
+ if (!matchedGuids.has(candidate.guid)) {
677
+ return {
678
+ message: candidate,
679
+ confidence: 1,
680
+ reasons: ["exact text match after normalization (indexed)"]
681
+ };
682
+ }
683
+ }
684
+ return null;
685
+ }
686
+ function applyDbAuthoritiveness(csvMsg, dbMsg) {
687
+ const merged = { ...csvMsg };
688
+ merged.date = dbMsg.date;
689
+ if (dbMsg.dateRead !== undefined)
690
+ merged.dateRead = dbMsg.dateRead;
691
+ if (dbMsg.dateDelivered !== undefined)
692
+ merged.dateDelivered = dbMsg.dateDelivered;
693
+ if (dbMsg.dateEdited !== undefined)
694
+ merged.dateEdited = dbMsg.dateEdited;
695
+ if (dbMsg.handle !== undefined)
696
+ merged.handle = dbMsg.handle;
697
+ if (dbMsg.replyingTo?.targetMessageGuid !== undefined) {
698
+ merged.replyingTo = {
699
+ ...merged.replyingTo,
700
+ targetMessageGuid: dbMsg.replyingTo.targetMessageGuid
701
+ };
702
+ }
703
+ if (dbMsg.isRead !== undefined)
704
+ merged.isRead = dbMsg.isRead;
705
+ merged.guid = dbMsg.guid;
706
+ return merged;
707
+ }
708
+ // src/ingest/ingest-csv.ts
709
+ import { existsSync, readdirSync, readFileSync } from "node:fs";
710
+ import * as os from "node:os";
711
+ import * as path from "node:path";
712
+ import { parse } from "csv-parse/sync";
713
+
714
+ // src/schema/message.ts
715
+ import { z as z2 } from "zod";
716
+ var MediaEnrichmentSchema = z2.object({
717
+ kind: z2.enum([
718
+ "image",
719
+ "audio",
720
+ "link",
721
+ "video",
722
+ "pdf",
723
+ "unknown",
724
+ "transcription",
725
+ "pdf_summary",
726
+ "video_metadata",
727
+ "link_context",
728
+ "image_analysis"
729
+ ]),
730
+ model: z2.string().optional(),
731
+ createdAt: z2.string().datetime(),
732
+ visionSummary: z2.string().optional(),
733
+ shortDescription: z2.string().optional(),
734
+ transcription: z2.string().optional(),
735
+ transcript: z2.string().optional(),
736
+ speakers: z2.array(z2.string()).optional(),
737
+ timestamps: z2.array(z2.object({
738
+ time: z2.string(),
739
+ speaker: z2.string(),
740
+ content: z2.string()
741
+ })).optional(),
742
+ pdfSummary: z2.string().optional(),
743
+ videoMetadata: z2.object({
744
+ filename: z2.string().optional(),
745
+ size: z2.number().optional(),
746
+ duration: z2.number().optional(),
747
+ analyzed: z2.boolean().optional(),
748
+ note: z2.string().optional()
749
+ }).optional(),
750
+ error: z2.string().optional(),
751
+ usedFallback: z2.boolean().optional(),
752
+ failedProviders: z2.array(z2.string()).optional(),
753
+ url: z2.string().url().optional(),
754
+ title: z2.string().optional(),
755
+ summary: z2.string().optional(),
756
+ provider: z2.enum([
757
+ "gemini",
758
+ "firecrawl",
759
+ "local",
760
+ "youtube",
761
+ "spotify",
762
+ "twitter",
763
+ "instagram",
764
+ "generic"
765
+ ]),
766
+ version: z2.string()
767
+ }).superRefine((enrichment, ctx) => {
768
+ if (enrichment.createdAt && !enrichment.createdAt.match(/Z$/)) {
769
+ ctx.addIssue({
770
+ code: z2.ZodIssueCode.custom,
771
+ message: "enrichment.createdAt must be ISO 8601 with Z suffix (UTC)"
772
+ });
773
+ }
774
+ });
775
+ var MediaProvenanceSchema = z2.object({
776
+ source: z2.enum(["csv", "db", "merged"]),
777
+ lastSeen: z2.string().datetime(),
778
+ resolvedAt: z2.string().datetime()
779
+ }).superRefine((prov, ctx) => {
780
+ if (prov.lastSeen && !prov.lastSeen.match(/Z$/)) {
781
+ ctx.addIssue({
782
+ code: z2.ZodIssueCode.custom,
783
+ message: "lastSeen must be ISO 8601 with Z suffix (UTC)",
784
+ path: ["lastSeen"]
785
+ });
786
+ }
787
+ if (prov.resolvedAt && !prov.resolvedAt.match(/Z$/)) {
788
+ ctx.addIssue({
789
+ code: z2.ZodIssueCode.custom,
790
+ message: "resolvedAt must be ISO 8601 with Z suffix (UTC)",
791
+ path: ["resolvedAt"]
792
+ });
793
+ }
794
+ });
795
+ var MediaMetaSchema = z2.object({
796
+ id: z2.string(),
797
+ filename: z2.string(),
798
+ path: z2.string(),
799
+ size: z2.number().optional(),
800
+ mimeType: z2.string().optional(),
801
+ uti: z2.string().nullable().optional(),
802
+ isSticker: z2.boolean().optional(),
803
+ hidden: z2.boolean().optional(),
804
+ mediaKind: z2.enum(["image", "audio", "video", "pdf", "unknown"]).optional(),
805
+ enrichment: z2.array(MediaEnrichmentSchema).optional(),
806
+ provenance: MediaProvenanceSchema.optional()
807
+ }).superRefine((media, ctx) => {
808
+ if (media.path && !media.path.startsWith("/")) {
809
+ ctx.addIssue({
810
+ code: z2.ZodIssueCode.custom,
811
+ message: "media.path must be an absolute path (starting with /)",
812
+ path: ["path"]
813
+ });
814
+ }
815
+ });
816
+ var ReplyInfoSchema = z2.object({
817
+ sender: z2.string().optional(),
818
+ date: z2.string().datetime().optional(),
819
+ text: z2.string().optional(),
820
+ targetMessageGuid: z2.string().optional()
821
+ }).superRefine((reply, ctx) => {
822
+ if (reply.date) {
823
+ if (!reply.date.match(/Z$/)) {
824
+ ctx.addIssue({
825
+ code: z2.ZodIssueCode.custom,
826
+ message: "replyingTo.date must be ISO 8601 with Z suffix (UTC)",
827
+ path: ["date"]
828
+ });
829
+ }
830
+ if (Number.isNaN(Date.parse(reply.date))) {
831
+ ctx.addIssue({
832
+ code: z2.ZodIssueCode.custom,
833
+ message: "replyingTo.date must be a valid ISO 8601 date",
834
+ path: ["date"]
835
+ });
836
+ }
837
+ }
838
+ });
839
+ var TapbackInfoSchema = z2.object({
840
+ type: z2.enum([
841
+ "loved",
842
+ "liked",
843
+ "disliked",
844
+ "laughed",
845
+ "emphasized",
846
+ "questioned",
847
+ "emoji"
848
+ ]),
849
+ action: z2.enum(["added", "removed"]),
850
+ targetMessageGuid: z2.string().optional(),
851
+ targetMessagePart: z2.number().int().optional(),
852
+ targetText: z2.string().optional(),
853
+ isMedia: z2.boolean().optional(),
854
+ emoji: z2.string().optional()
855
+ });
856
+ var MessageCoreSchema = z2.object({
857
+ guid: z2.string(),
858
+ rowid: z2.number().optional(),
859
+ chatId: z2.string().nullable().optional(),
860
+ service: z2.string().nullable().optional(),
861
+ subject: z2.string().nullable().optional(),
862
+ handleId: z2.number().nullable().optional(),
863
+ handle: z2.string().nullable().optional(),
864
+ destinationCallerId: z2.string().nullable().optional(),
865
+ isFromMe: z2.boolean(),
866
+ otherHandle: z2.number().nullable().optional(),
867
+ date: z2.string().datetime(),
868
+ dateRead: z2.string().datetime().nullable().optional(),
869
+ dateDelivered: z2.string().datetime().nullable().optional(),
870
+ dateEdited: z2.string().datetime().nullable().optional(),
871
+ isRead: z2.boolean().optional(),
872
+ itemType: z2.number().optional(),
873
+ groupActionType: z2.number().optional(),
874
+ groupTitle: z2.string().nullable().optional(),
875
+ shareStatus: z2.boolean().optional(),
876
+ shareDirection: z2.boolean().nullable().optional(),
877
+ expressiveSendStyleId: z2.string().nullable().optional(),
878
+ balloonBundleId: z2.string().nullable().optional(),
879
+ threadOriginatorGuid: z2.string().nullable().optional(),
880
+ threadOriginatorPart: z2.number().nullable().optional(),
881
+ numReplies: z2.number().optional(),
882
+ deletedFrom: z2.number().nullable().optional()
883
+ });
884
+ var MessageSchema = z2.object({
885
+ guid: z2.string(),
886
+ rowid: z2.number().optional(),
887
+ chatId: z2.string().nullable().optional(),
888
+ service: z2.string().nullable().optional(),
889
+ subject: z2.string().nullable().optional(),
890
+ handleId: z2.number().nullable().optional(),
891
+ handle: z2.string().nullable().optional(),
892
+ destinationCallerId: z2.string().nullable().optional(),
893
+ isFromMe: z2.boolean(),
894
+ otherHandle: z2.number().nullable().optional(),
895
+ date: z2.string().datetime(),
896
+ dateRead: z2.string().datetime().nullable().optional(),
897
+ dateDelivered: z2.string().datetime().nullable().optional(),
898
+ dateEdited: z2.string().datetime().nullable().optional(),
899
+ isRead: z2.boolean().optional(),
900
+ itemType: z2.number().optional(),
901
+ groupActionType: z2.number().optional(),
902
+ groupTitle: z2.string().nullable().optional(),
903
+ shareStatus: z2.boolean().optional(),
904
+ shareDirection: z2.boolean().nullable().optional(),
905
+ expressiveSendStyleId: z2.string().nullable().optional(),
906
+ balloonBundleId: z2.string().nullable().optional(),
907
+ threadOriginatorGuid: z2.string().nullable().optional(),
908
+ threadOriginatorPart: z2.number().nullable().optional(),
909
+ numReplies: z2.number().optional(),
910
+ deletedFrom: z2.number().nullable().optional(),
911
+ messageKind: z2.enum(["text", "media", "tapback", "notification"]),
912
+ text: z2.string().nullable().optional(),
913
+ tapback: TapbackInfoSchema.nullable().optional(),
914
+ replyingTo: ReplyInfoSchema.nullable().optional(),
915
+ replyingToRaw: z2.string().nullable().optional(),
916
+ media: MediaMetaSchema.nullable().optional(),
917
+ groupGuid: z2.string().nullable().optional(),
918
+ exportTimestamp: z2.string().datetime().optional(),
919
+ exportVersion: z2.string().optional(),
920
+ isUnsent: z2.boolean().optional(),
921
+ isEdited: z2.boolean().optional()
922
+ }).superRefine((msg, ctx) => {
923
+ const dateFields = [
924
+ "date",
925
+ "dateRead",
926
+ "dateDelivered",
927
+ "dateEdited",
928
+ "exportTimestamp"
929
+ ];
930
+ for (const field of dateFields) {
931
+ const value = msg[field];
932
+ if (value && typeof value === "string") {
933
+ if (!value.match(/Z$/)) {
934
+ ctx.addIssue({
935
+ code: z2.ZodIssueCode.custom,
936
+ message: `${field} must be ISO 8601 with Z suffix (UTC)`,
937
+ path: [field]
938
+ });
939
+ }
940
+ }
941
+ }
942
+ if (msg.messageKind === "tapback" && !msg.tapback) {
943
+ ctx.addIssue({
944
+ code: z2.ZodIssueCode.custom,
945
+ message: "tapback kind requires tapback payload",
946
+ path: ["tapback"]
947
+ });
948
+ }
949
+ if (msg.messageKind === "media" && !msg.media) {
950
+ ctx.addIssue({
951
+ code: z2.ZodIssueCode.custom,
952
+ message: "media kind requires media payload",
953
+ path: ["media"]
954
+ });
955
+ }
956
+ if (msg.messageKind !== "media" && msg.media) {
957
+ ctx.addIssue({
958
+ code: z2.ZodIssueCode.custom,
959
+ message: "media payload present on non-media message",
960
+ path: ["media"]
961
+ });
962
+ }
963
+ if (msg.messageKind === "media" && msg.media) {
964
+ if (!msg.media.id) {
965
+ ctx.addIssue({
966
+ code: z2.ZodIssueCode.custom,
967
+ message: "media.id is required when messageKind is media",
968
+ path: ["media", "id"]
969
+ });
970
+ }
971
+ if (!msg.media.filename) {
972
+ ctx.addIssue({
973
+ code: z2.ZodIssueCode.custom,
974
+ message: "media.filename is required when messageKind is media",
975
+ path: ["media", "filename"]
976
+ });
977
+ }
978
+ if (!msg.media.path) {
979
+ ctx.addIssue({
980
+ code: z2.ZodIssueCode.custom,
981
+ message: "media.path is required when messageKind is media",
982
+ path: ["media", "path"]
983
+ });
984
+ }
985
+ }
986
+ });
987
+ var ExportEnvelopeSchema = z2.object({
988
+ schemaVersion: z2.string(),
989
+ source: z2.enum(["csv", "db", "merged"]),
990
+ createdAt: z2.string().datetime(),
991
+ messages: z2.array(MessageSchema),
992
+ meta: z2.record(z2.any()).optional()
993
+ }).superRefine((envelope, ctx) => {
994
+ if (envelope.createdAt && !envelope.createdAt.match(/Z$/)) {
995
+ ctx.addIssue({
996
+ code: z2.ZodIssueCode.custom,
997
+ message: "createdAt must be ISO 8601 with Z suffix (UTC)",
998
+ path: ["createdAt"]
999
+ });
1000
+ }
1001
+ });
1002
+
1003
+ // src/ingest/ingest-csv.ts
1004
+ function ingestCSV(csvFilePath, options) {
1005
+ const csvContent = readFileSync(csvFilePath, "utf-8");
1006
+ const rows = parse(csvContent, { columns: true });
1007
+ const messages = [];
1008
+ let lineNumber = 2;
1009
+ for (const row of rows) {
1010
+ const rowMessages = parseCSVRow(row, lineNumber, options);
1011
+ messages.push(...rowMessages);
1012
+ lineNumber++;
1013
+ }
1014
+ return messages;
1015
+ }
1016
+ function parseCSVRow(row, lineNumber, options) {
1017
+ const messages = [];
1018
+ const messageDate = row["Message Date"];
1019
+ const deliveredDate = row["Delivered Date"];
1020
+ const readDate = row["Read Date"];
1021
+ const editedDate = row["Edited Date"];
1022
+ const service = row.Service;
1023
+ const type = row.Type;
1024
+ const senderName = row["Sender Name"];
1025
+ const senderID = row["Sender ID"];
1026
+ const status = row.Status;
1027
+ const text = row.Text;
1028
+ const subject = row.Subject;
1029
+ const attachment = row.Attachment;
1030
+ const attachmentType = row["Attachment type"];
1031
+ const replyingTo = row["Replying to"];
1032
+ const date = convertToISO8601(messageDate || "");
1033
+ if (!date)
1034
+ return [];
1035
+ const isFromMe = type === "Outgoing" || type === "Sent";
1036
+ let messageKind = "text";
1037
+ if (type === "Notification") {
1038
+ messageKind = "notification";
1039
+ }
1040
+ const baseMessage = {
1041
+ isFromMe,
1042
+ date
1043
+ };
1044
+ const handle = senderName || senderID;
1045
+ if (handle)
1046
+ baseMessage.handle = handle;
1047
+ if (service)
1048
+ baseMessage.service = service;
1049
+ if (subject)
1050
+ baseMessage.subject = subject;
1051
+ if (readDate)
1052
+ baseMessage.dateRead = convertToISO8601(readDate);
1053
+ if (deliveredDate)
1054
+ baseMessage.dateDelivered = convertToISO8601(deliveredDate);
1055
+ if (editedDate) {
1056
+ baseMessage.dateEdited = convertToISO8601(editedDate);
1057
+ baseMessage.isEdited = true;
1058
+ }
1059
+ if (status === "Read")
1060
+ baseMessage.isRead = true;
1061
+ else if (status === "Unread")
1062
+ baseMessage.isRead = false;
1063
+ const baseExportMetadata = {
1064
+ source: "csv",
1065
+ lineNumber,
1066
+ csvGuid: `csv:${lineNumber}:0`,
1067
+ ...replyingTo && { replyingTo }
1068
+ };
1069
+ if (messageKind === "text" && text) {
1070
+ const textMessage = {
1071
+ ...baseMessage,
1072
+ guid: `csv:${lineNumber}:0`,
1073
+ messageKind: "text",
1074
+ text,
1075
+ exportMetadata: baseExportMetadata
1076
+ };
1077
+ messages.push(textMessage);
1078
+ }
1079
+ if (attachment && attachment.trim() !== "") {
1080
+ const resolvedPath = resolveAttachmentPath({ filename: attachment }, {
1081
+ ...options,
1082
+ messageDate: date
1083
+ });
1084
+ if (resolvedPath) {
1085
+ const mediaMessage = {
1086
+ ...baseMessage,
1087
+ guid: `csv:${lineNumber}:0:media`,
1088
+ messageKind: "media",
1089
+ media: {
1090
+ id: `media:csv:${lineNumber}:0`,
1091
+ filename: attachment,
1092
+ path: resolvedPath,
1093
+ mimeType: attachmentType || undefined,
1094
+ mediaKind: inferMediaKind(attachmentType || "")
1095
+ },
1096
+ exportMetadata: {
1097
+ ...baseExportMetadata,
1098
+ attachmentIndex: 0
1099
+ }
1100
+ };
1101
+ messages.push(mediaMessage);
1102
+ }
1103
+ }
1104
+ if (messageKind === "notification") {
1105
+ const notificationMessage = {
1106
+ ...baseMessage,
1107
+ guid: `csv:${lineNumber}:0`,
1108
+ messageKind: "notification",
1109
+ exportMetadata: baseExportMetadata
1110
+ };
1111
+ messages.push(notificationMessage);
1112
+ }
1113
+ if (messages.length === 0 && text) {
1114
+ const fallbackMessage = {
1115
+ ...baseMessage,
1116
+ guid: `csv:${lineNumber}:0`,
1117
+ messageKind: "text",
1118
+ text,
1119
+ exportMetadata: baseExportMetadata
1120
+ };
1121
+ messages.push(fallbackMessage);
1122
+ }
1123
+ return messages;
1124
+ }
1125
+ function convertToISO8601(csvDate) {
1126
+ if (!csvDate || csvDate.trim() === "") {
1127
+ return null;
1128
+ }
1129
+ try {
1130
+ const normalized = csvDate.trim().replace(" ", "T");
1131
+ if (!normalized.includes("-") && !normalized.includes("/")) {
1132
+ return null;
1133
+ }
1134
+ let isoString = normalized;
1135
+ if (!normalized.includes("Z") && !normalized.match(/[+-]\d{2}:/)) {
1136
+ isoString = `${normalized}Z`;
1137
+ }
1138
+ const date = new Date(isoString);
1139
+ if (Number.isNaN(date.getTime())) {
1140
+ return null;
1141
+ }
1142
+ return date.toISOString();
1143
+ } catch {
1144
+ return null;
1145
+ }
1146
+ }
1147
+ function resolveAttachmentPath(attachment, options) {
1148
+ if (!attachment)
1149
+ return null;
1150
+ const { attachmentRoots, messageDate } = options;
1151
+ if (attachment.copied_path?.startsWith("/")) {
1152
+ if (existsSync(attachment.copied_path)) {
1153
+ return attachment.copied_path;
1154
+ }
1155
+ }
1156
+ if (attachment.copied_path?.startsWith("~")) {
1157
+ const expanded = attachment.copied_path.replace("~", os.homedir());
1158
+ if (existsSync(expanded)) {
1159
+ return expanded;
1160
+ }
1161
+ }
1162
+ if (messageDate && attachmentRoots.length > 0) {
1163
+ const dateStr = formatDateForAttachmentSearch(messageDate);
1164
+ const filename = attachment.filename || "unknown";
1165
+ const senderName = attachment.senderName || "*";
1166
+ for (const root of attachmentRoots) {
1167
+ const pattern = `${dateStr} - ${senderName} - ${filename}`;
1168
+ const fullPath = path.join(root, pattern);
1169
+ if (existsSync(fullPath)) {
1170
+ return fullPath;
1171
+ }
1172
+ if (senderName === "*" && existsSync(root)) {
1173
+ try {
1174
+ const files = readdirSync(root).filter((f) => {
1175
+ return f.includes(dateStr) && f.endsWith(filename);
1176
+ });
1177
+ if (files.length > 0 && files[0]) {
1178
+ return path.join(root, files[0]);
1179
+ }
1180
+ } catch {}
1181
+ }
1182
+ }
1183
+ }
1184
+ return null;
1185
+ }
1186
+ function inferMediaKind(mimeType) {
1187
+ if (!mimeType)
1188
+ return "unknown";
1189
+ if (mimeType.startsWith("image/"))
1190
+ return "image";
1191
+ if (mimeType.startsWith("audio/"))
1192
+ return "audio";
1193
+ if (mimeType.startsWith("video/"))
1194
+ return "video";
1195
+ if (mimeType.includes("pdf"))
1196
+ return "pdf";
1197
+ return "unknown";
1198
+ }
1199
+ function formatDateForAttachmentSearch(isoDate) {
1200
+ try {
1201
+ const date = new Date(isoDate);
1202
+ const year = date.getUTCFullYear();
1203
+ const month = String(date.getUTCMonth() + 1).padStart(2, "0");
1204
+ const day = String(date.getUTCDate()).padStart(2, "0");
1205
+ const hours = String(date.getUTCHours()).padStart(2, "0");
1206
+ const minutes = String(date.getUTCMinutes()).padStart(2, "0");
1207
+ const seconds = String(date.getUTCSeconds()).padStart(2, "0");
1208
+ return `${year}-${month}-${day} ${hours} ${minutes} ${seconds}`;
1209
+ } catch {
1210
+ return "";
1211
+ }
1212
+ }
1213
+ function createExportEnvelope(messages) {
1214
+ return {
1215
+ schemaVersion: "2.0.0",
1216
+ source: "csv",
1217
+ createdAt: new Date().toISOString(),
1218
+ messages
1219
+ };
1220
+ }
1221
+ // src/utils/human.ts
1222
+ var humanEnabled = true;
1223
+ function safeConsole(kind, ...args) {
1224
+ if (!humanEnabled)
1225
+ return;
1226
+ const c = globalThis.console;
1227
+ c?.[kind]?.(...args);
1228
+ }
1229
+ function humanInfo(...args) {
1230
+ safeConsole("info", ...args);
1231
+ }
1232
+
1233
+ // src/utils/logger.ts
1234
+ import { AsyncLocalStorage } from "node:async_hooks";
1235
+ import fs from "node:fs";
1236
+ import path2 from "node:path";
1237
+ import pino from "pino";
1238
+ // package.json
1239
+ var package_default = {
1240
+ author: {
1241
+ name: "Nathan Vale",
1242
+ url: "https://github.com/nathanvale"
1243
+ },
1244
+ bin: {
1245
+ chatline: "./dist/bin/index.js"
1246
+ },
1247
+ bugs: {
1248
+ url: "https://github.com/nathanvale/chatline/issues"
1249
+ },
1250
+ dependencies: {
1251
+ "@google/generative-ai": "^0.21.0",
1252
+ "@mendable/firecrawl-js": "^4.3.7",
1253
+ "cli-progress": "^3.12.0",
1254
+ commander: "^14.0.1",
1255
+ "csv-parse": "^5.6.0",
1256
+ "csv-stringify": "^6.5.2",
1257
+ "js-yaml": "^4.1.0",
1258
+ pino: "^10.0.0",
1259
+ sharp: "^0.34.4",
1260
+ zod: "^3.23.8"
1261
+ },
1262
+ description: "Extract, transform, and analyze iMessage conversations with AI-powered enrichment and timeline rendering.",
1263
+ devDependencies: {
1264
+ "@arethetypeswrong/cli": "^0.18.2",
1265
+ "@biomejs/biome": "^2.3.7",
1266
+ "@changesets/changelog-github": "^0.5.1",
1267
+ "@changesets/cli": "^2.29.7",
1268
+ "@commitlint/cli": "^20.1.0",
1269
+ "@commitlint/config-conventional": "^20.0.0",
1270
+ "@testing-library/jest-dom": "^6.9.1",
1271
+ "@testing-library/react": "^16.3.0",
1272
+ "@types/cli-progress": "^3.11.6",
1273
+ "@types/js-yaml": "^4.0.9",
1274
+ "@types/node": "^24.8.1",
1275
+ "@vitest/coverage-v8": "^4.0.14",
1276
+ "better-sqlite3": "^12.4.1",
1277
+ "bun-types": "^1.3.3",
1278
+ bunup: "^0.16.10",
1279
+ changesets: "^1.0.2",
1280
+ husky: "^9.1.7",
1281
+ jsdom: "^25.0.1",
1282
+ "lint-staged": "^15.2.10",
1283
+ "pino-pretty": "^11.2.2",
1284
+ publint: "^0.3.15",
1285
+ rimraf: "^6.0.1",
1286
+ tsx: "^4.20.6",
1287
+ typescript: "^5.9.3",
1288
+ vitest: "^4.0.14"
1289
+ },
1290
+ engines: {
1291
+ node: ">=22.20"
1292
+ },
1293
+ exports: {
1294
+ ".": {
1295
+ types: "./dist/index.d.ts",
1296
+ import: "./dist/index.js"
1297
+ },
1298
+ "./package.json": "./package.json"
1299
+ },
1300
+ files: [
1301
+ "dist/**",
1302
+ "README.md",
1303
+ "LICENSE",
1304
+ "CHANGELOG.md"
1305
+ ],
1306
+ funding: [
1307
+ {
1308
+ type: "github",
1309
+ url: "https://github.com/sponsors/nathanvale"
1310
+ }
1311
+ ],
1312
+ homepage: "https://github.com/nathanvale/chatline#readme",
1313
+ imports: {
1314
+ "#enrich/*": "./src/enrich/*",
1315
+ "#ingest/*": "./src/ingest/*",
1316
+ "#normalize/*": "./src/normalize/*",
1317
+ "#render/*": "./src/render/*",
1318
+ "#schema/*": "./src/schema/*",
1319
+ "#utils/*": "./src/utils/*"
1320
+ },
1321
+ keywords: [
1322
+ "imessage",
1323
+ "typescript",
1324
+ "cli",
1325
+ "data-pipeline",
1326
+ "message-analysis",
1327
+ "timeline",
1328
+ "ai-enrichment"
1329
+ ],
1330
+ overrides: {
1331
+ express: "4.22.0"
1332
+ },
1333
+ license: "MIT",
1334
+ "lint-staged": {
1335
+ "*.{ts,tsx,js,jsx,mts,cts,json}": [
1336
+ "biome check --write"
1337
+ ]
1338
+ },
1339
+ main: "./dist/index.js",
1340
+ name: "@nathanvale/chatline",
1341
+ publishConfig: {
1342
+ access: "public",
1343
+ provenance: true
1344
+ },
1345
+ repository: {
1346
+ type: "git",
1347
+ url: "git+https://github.com/nathanvale/chatline.git"
1348
+ },
1349
+ scripts: {
1350
+ "research:actionlint": "bun scripts/firecrawl/actionlint-best-practices.ts",
1351
+ build: "bunx bunup",
1352
+ "check:publint": "publint",
1353
+ "check:types": "attw --pack",
1354
+ "check:updates": "bun pm ls || true",
1355
+ clean: "rimraf dist 2>/dev/null || true",
1356
+ cli: "node ./dist/bin/index.js",
1357
+ coverage: "bun test --coverage",
1358
+ dev: "bun src/cli.ts",
1359
+ "docs:build": "cd website && bun run build",
1360
+ "docs:dev": "cd website && bun run start",
1361
+ "docs:serve": "cd website && bun run serve",
1362
+ check: "biome check --write .",
1363
+ format: "biome format --write .",
1364
+ "format:check": "biome format .",
1365
+ hygiene: "bun run check:publint && bun run check:types",
1366
+ lint: "biome lint .",
1367
+ "lint:fix": "biome lint --write .",
1368
+ "lint:scripts": "shellcheck .github/scripts/*.sh",
1369
+ "lint:workflows": "actionlint -color -verbose",
1370
+ "pack:dry": "mkdir -p .pack && bun pm pack --destination .pack --ignore-scripts && ls -lah .pack && tar -tf .pack/*.tgz | sort | sed 's/^/ - /'",
1371
+ "pre:enter:beta": "changeset pre enter beta",
1372
+ "pre:enter:next": "changeset pre enter next",
1373
+ "pre:enter:rc": "changeset pre enter rc",
1374
+ "quality-check:ci": "biome check . && bun run typecheck",
1375
+ "pre:exit": "changeset pre exit",
1376
+ prepare: "husky",
1377
+ "publish:pre": "changeset publish --provenance",
1378
+ release: "changeset publish --provenance",
1379
+ "release:snapshot:canary": "changeset version --snapshot canary && changeset publish --tag canary",
1380
+ "security:audit": "npm audit",
1381
+ start: "node ./dist/bin/index.js",
1382
+ test: "bun test --recursive",
1383
+ "test:ci": "TF_BUILD=true bun test --recursive",
1384
+ "test:coverage": "bun test --coverage",
1385
+ "test:scripts": "bash tests/smoke-test-scripts.sh",
1386
+ "test:watch": "bun test --watch",
1387
+ typecheck: "tsc -p tsconfig.eslint.json --noEmit",
1388
+ "upgrade:minor": "bun update",
1389
+ validate: "bun run lint && bun run typecheck && bun run build && TF_BUILD=true bun run test",
1390
+ "validate:json": "bun scripts/validate-json.ts",
1391
+ "version:gen": "changeset",
1392
+ "version:pre": "changeset version",
1393
+ "watch:types": "tsc -p tsconfig.eslint.json --noEmit --watch"
1394
+ },
1395
+ sideEffects: false,
1396
+ type: "module",
1397
+ types: "./dist/index.d.ts",
1398
+ version: "0.3.0",
1399
+ workspaces: [
1400
+ ".",
1401
+ "website"
1402
+ ]
1403
+ };
1404
+
1405
+ // src/utils/logger.ts
1406
+ var LEVEL_ORDER = {
1407
+ debug: 10,
1408
+ info: 20,
1409
+ warn: 30,
1410
+ error: 40
1411
+ };
1412
+ var sequenceCounter = 0;
1413
+ var cachedVersion;
1414
+ var currentCorrelationId;
1415
+ var correlationStore = new AsyncLocalStorage;
1416
+ function loadVersion() {
1417
+ if (cachedVersion)
1418
+ return cachedVersion;
1419
+ const version = package_default.version || "0.0.0";
1420
+ cachedVersion = version;
1421
+ return version;
1422
+ }
1423
+ var isTestEnv = process.env.VITEST === "true" || false;
1424
+ var shouldWriteFile = (process.env.LOG_TO_FILE ?? (isTestEnv ? "false" : "true")) === "true";
1425
+ var baseLevel = process.env.LOG_LEVEL || "info";
1426
+ var pinoTransport = process.env.LOG_FORMAT === "pretty" ? pino.transport({
1427
+ target: "pino-pretty",
1428
+ options: { colorize: true, singleLine: true }
1429
+ }) : undefined;
1430
+ var pinoStdout = pino({ level: baseLevel, base: null }, pinoTransport);
1431
+ var fileStream;
1432
+ var fileDate;
1433
+ function ensureFileStream() {
1434
+ if (!shouldWriteFile)
1435
+ return;
1436
+ const nowDate = new Date().toISOString().slice(0, 10);
1437
+ if (fileStream && fileDate === nowDate)
1438
+ return;
1439
+ try {
1440
+ const logsDir = path2.resolve(process.cwd(), "logs");
1441
+ fs.mkdirSync(logsDir, { recursive: true });
1442
+ const filePath = path2.join(logsDir, `${nowDate}.jsonl`);
1443
+ if (fileStream)
1444
+ fileStream.end();
1445
+ fileStream = fs.createWriteStream(filePath, { flags: "a" });
1446
+ fileDate = nowDate;
1447
+ } catch {
1448
+ fileStream = undefined;
1449
+ fileDate = undefined;
1450
+ }
1451
+ }
1452
+ function log(component, level, msg, context) {
1453
+ if (!dynamicShouldLog(level))
1454
+ return;
1455
+ const effectiveCorrelationId = correlationStore.getStore() ?? currentCorrelationId;
1456
+ const entryBase = {
1457
+ ts: new Date().toISOString(),
1458
+ level,
1459
+ component,
1460
+ msg,
1461
+ pid: process.pid,
1462
+ ver: loadVersion(),
1463
+ seq: ++sequenceCounter
1464
+ };
1465
+ const withCorr = effectiveCorrelationId ? { correlationId: effectiveCorrelationId } : {};
1466
+ const entryNoCtx = { ...entryBase, ...withCorr };
1467
+ const entry = context ? { ...entryNoCtx, context } : entryNoCtx;
1468
+ const bindings = {
1469
+ component,
1470
+ ver: entry.ver,
1471
+ seq: entry.seq
1472
+ };
1473
+ if (entry.correlationId)
1474
+ bindings.correlationId = entry.correlationId;
1475
+ const logger = pinoStdout.child(bindings);
1476
+ logger[level](context ?? {}, msg);
1477
+ if (shouldWriteFile) {
1478
+ ensureFileStream();
1479
+ if (fileStream)
1480
+ fileStream.write(`${JSON.stringify(entry)}
1481
+ `);
1482
+ }
1483
+ if (sinks.length > 0) {
1484
+ try {
1485
+ for (const s of sinks)
1486
+ s(entry);
1487
+ } catch {}
1488
+ }
1489
+ }
1490
+ function createLogger(component) {
1491
+ return {
1492
+ debug: (msg, context) => log(component, "debug", msg, context),
1493
+ info: (msg, context) => log(component, "info", msg, context),
1494
+ warn: (msg, context) => log(component, "warn", msg, context),
1495
+ error: (msg, context) => log(component, "error", msg, context)
1496
+ };
1497
+ }
1498
+ var sinks = [];
1499
+ var dynamicLevel;
1500
+ function getEffectiveLevel() {
1501
+ return dynamicLevel || "info";
1502
+ }
1503
+ var dynamicShouldLog = (level) => {
1504
+ const envLevel = getEffectiveLevel();
1505
+ return LEVEL_ORDER[level] >= LEVEL_ORDER[envLevel];
1506
+ };
1507
+
1508
+ // src/utils/incremental-state.ts
1509
+ import crypto from "node:crypto";
1510
+ import { promises as fs2 } from "node:fs";
1511
+ import path3 from "node:path";
1512
+ function detectNewMessages(currentGuids, state) {
1513
+ const enrichedSet = new Set(state.enrichedGuids);
1514
+ const newGuids = [];
1515
+ for (const guid of currentGuids) {
1516
+ if (!enrichedSet.has(guid)) {
1517
+ newGuids.push(guid);
1518
+ }
1519
+ }
1520
+ return newGuids;
1521
+ }
1522
+ async function loadIncrementalState(filePath) {
1523
+ try {
1524
+ const content = await fs2.readFile(filePath, "utf-8");
1525
+ const parsed = JSON.parse(content);
1526
+ if (parsed.version !== "1.0") {
1527
+ const logger = createLogger("utils:incremental-state");
1528
+ logger.warn("Unknown state version. Ignoring.", {
1529
+ version: parsed.version
1530
+ });
1531
+ return null;
1532
+ }
1533
+ return parsed;
1534
+ } catch (error) {
1535
+ if (error instanceof Error && error.message.includes("ENOENT")) {
1536
+ return null;
1537
+ }
1538
+ return null;
1539
+ }
1540
+ }
1541
+
1542
+ // src/utils/delta-detection.ts
1543
+ async function loadPreviousState(stateFilePath) {
1544
+ return loadIncrementalState(stateFilePath);
1545
+ }
1546
+ function extractGuidsFromMessages(messages) {
1547
+ const guids = new Set;
1548
+ for (const msg of messages) {
1549
+ guids.add(msg.guid);
1550
+ }
1551
+ return guids;
1552
+ }
1553
+ function computeDelta(currentGuids, previousState) {
1554
+ if (!previousState) {
1555
+ return Array.from(currentGuids);
1556
+ }
1557
+ return detectNewMessages(currentGuids, previousState);
1558
+ }
1559
+ function logDeltaSummary(result) {
1560
+ const logger = createLogger("utils:delta-detection");
1561
+ const newCount = result.newCount;
1562
+ const totalCount = result.totalMessages;
1563
+ const previousCount = result.previousEnrichedCount;
1564
+ if (result.isFirstRun) {
1565
+ logger.info("First enrichment run", { totalMessages: totalCount });
1566
+ humanInfo(`First enrichment run: ${totalCount} messages`);
1567
+ } else {
1568
+ const percentNew = totalCount > 0 ? newCount / totalCount * 100 : 0;
1569
+ logger.info("Delta detected", {
1570
+ newMessages: newCount,
1571
+ percentNew,
1572
+ totalMessages: totalCount,
1573
+ previouslyEnriched: previousCount
1574
+ });
1575
+ humanInfo(`Delta detected: ${newCount} new messages (${percentNew.toFixed(1)}%) of ${totalCount} total messages`);
1576
+ humanInfo(`Previously enriched: ${previousCount}`);
1577
+ }
1578
+ }
1579
+ async function detectDelta(messages, stateFilePath) {
1580
+ const previousState = await loadPreviousState(stateFilePath);
1581
+ const isFirstRun = previousState === null;
1582
+ const currentGuids = extractGuidsFromMessages(messages);
1583
+ const newGuids = computeDelta(currentGuids, previousState);
1584
+ const result = {
1585
+ newGuids,
1586
+ totalMessages: messages.length,
1587
+ previousEnrichedCount: previousState?.enrichedGuids.length ?? 0,
1588
+ newCount: newGuids.length,
1589
+ isFirstRun,
1590
+ state: previousState ?? {
1591
+ version: "1.0",
1592
+ lastEnrichedAt: new Date().toISOString(),
1593
+ totalMessages: messages.length,
1594
+ enrichedGuids: [],
1595
+ pipelineConfig: {
1596
+ configHash: ""
1597
+ },
1598
+ enrichmentStats: null
1599
+ }
1600
+ };
1601
+ logDeltaSummary(result);
1602
+ return result;
1603
+ }
1604
+ function getDeltaStats(result) {
1605
+ const total = result.totalMessages;
1606
+ const newCount = result.newCount;
1607
+ const previousCount = result.previousEnrichedCount;
1608
+ return {
1609
+ total,
1610
+ new: newCount,
1611
+ previous: previousCount,
1612
+ percentNew: total > 0 ? newCount / total * 100 : 0,
1613
+ percentPrevious: total > 0 ? previousCount / total * 100 : 0
1614
+ };
1615
+ }
1616
+ // src/utils/enrichment-merge.ts
1617
+ import { promises as fs3 } from "node:fs";
1618
+ function mergeEnrichments(existingMessages, newMessages, options = {}) {
1619
+ const existingByGuid = new Map;
1620
+ for (const msg of existingMessages) {
1621
+ existingByGuid.set(msg.guid, msg);
1622
+ }
1623
+ let mergedCount = 0;
1624
+ let addedCount = 0;
1625
+ let preservedCount = 0;
1626
+ const processedGuids = new Set;
1627
+ const resultMessages = [];
1628
+ for (const newMsg of newMessages) {
1629
+ const existing = existingByGuid.get(newMsg.guid);
1630
+ if (existing) {
1631
+ if (!processedGuids.has(newMsg.guid)) {
1632
+ const merged = mergeMessageEnrichments(existing, newMsg, options);
1633
+ resultMessages.push(merged);
1634
+ mergedCount++;
1635
+ if (merged.messageKind === "media" && merged.media?.enrichment && merged.media.enrichment.length > 0) {
1636
+ preservedCount++;
1637
+ }
1638
+ processedGuids.add(newMsg.guid);
1639
+ }
1640
+ } else {
1641
+ if (!processedGuids.has(newMsg.guid)) {
1642
+ resultMessages.push(newMsg);
1643
+ addedCount++;
1644
+ processedGuids.add(newMsg.guid);
1645
+ }
1646
+ }
1647
+ }
1648
+ const totalMessages = resultMessages.length;
1649
+ return {
1650
+ messages: resultMessages,
1651
+ statistics: {
1652
+ mergedCount,
1653
+ addedCount,
1654
+ preservedCount,
1655
+ totalMessages,
1656
+ mergedPercentage: totalMessages > 0 ? mergedCount / totalMessages * 100 : 0,
1657
+ addedPercentage: totalMessages > 0 ? addedCount / totalMessages * 100 : 0
1658
+ },
1659
+ mergedCount,
1660
+ addedCount,
1661
+ preservedCount
1662
+ };
1663
+ }
1664
+ function mergeMessageEnrichments(existing, newMsg, options) {
1665
+ if (existing.messageKind !== "media" || !existing.media) {
1666
+ return existing;
1667
+ }
1668
+ if (!newMsg.media?.enrichment || newMsg.media.enrichment.length === 0) {
1669
+ return existing;
1670
+ }
1671
+ if (options.forceRefresh) {
1672
+ return {
1673
+ ...existing,
1674
+ media: {
1675
+ ...existing.media,
1676
+ enrichment: newMsg.media.enrichment
1677
+ }
1678
+ };
1679
+ }
1680
+ const existingEnrichment = existing.media.enrichment ?? [];
1681
+ const newEnrichment = newMsg.media.enrichment ?? [];
1682
+ const existingKinds = new Set(existingEnrichment.map((e) => e.kind));
1683
+ const mergedEnrichment = [
1684
+ ...existingEnrichment,
1685
+ ...newEnrichment.filter((e) => !existingKinds.has(e.kind))
1686
+ ];
1687
+ return {
1688
+ ...existing,
1689
+ media: {
1690
+ ...existing.media,
1691
+ enrichment: mergedEnrichment
1692
+ }
1693
+ };
1694
+ }
1695
+ export {
1696
+ validateGeneratedConfig,
1697
+ validateConfigSafe,
1698
+ validateConfig,
1699
+ substituteEnvVars,
1700
+ mergeEnrichments,
1701
+ mergeConfig,
1702
+ logDeltaSummary,
1703
+ loadConfigFile,
1704
+ loadConfig,
1705
+ isRetryableStatus,
1706
+ isConfigCached,
1707
+ is5xx,
1708
+ ingestCSV,
1709
+ getDeltaStats,
1710
+ getDefaultConfigPath,
1711
+ generateConfigContent,
1712
+ extractGuidsFromMessages,
1713
+ discoverConfigFile,
1714
+ detectDelta,
1715
+ detectConfigFormat,
1716
+ dedupAndMerge,
1717
+ createRateLimiter,
1718
+ createExportEnvelope,
1719
+ clearConfigCache,
1720
+ TapbackInfoSchema,
1721
+ ReplyInfoSchema,
1722
+ RateLimiter,
1723
+ MessageCoreSchema,
1724
+ MediaProvenanceSchema,
1725
+ MediaMetaSchema,
1726
+ MediaEnrichmentSchema,
1727
+ DEFAULT_CONFIG,
1728
+ CONFIG_FILE_PATTERNS
1729
+ };