@nathanvale/chatline 0.2.1 → 0.3.0-next.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +6 -0
- package/dist/bin/index.js +5121 -0
- package/dist/index.d.ts +688 -0
- package/dist/index.js +1729 -0
- package/package.json +1 -1
package/dist/index.js
ADDED
|
@@ -0,0 +1,1729 @@
|
|
|
1
|
+
// src/config/generator.ts
|
|
2
|
+
import { constants } from "node:fs";
|
|
3
|
+
import { access, writeFile } from "node:fs/promises";
|
|
4
|
+
import yaml from "js-yaml";
|
|
5
|
+
|
|
6
|
+
// src/config/schema.ts
|
|
7
|
+
import { z } from "zod";
|
|
8
|
+
var GeminiConfigSchema = z.object({
|
|
9
|
+
apiKey: z.string().min(1, "Gemini API key is required"),
|
|
10
|
+
model: z.string().default("gemini-1.5-pro"),
|
|
11
|
+
rateLimitDelay: z.number().min(0).default(1000),
|
|
12
|
+
maxRetries: z.number().min(0).max(10).default(3)
|
|
13
|
+
});
|
|
14
|
+
var FirecrawlConfigSchema = z.object({
|
|
15
|
+
apiKey: z.string().optional(),
|
|
16
|
+
enabled: z.boolean().default(true)
|
|
17
|
+
}).optional();
|
|
18
|
+
var EnrichmentConfigSchema = z.object({
|
|
19
|
+
enableVisionAnalysis: z.boolean().default(true),
|
|
20
|
+
enableAudioTranscription: z.boolean().default(true),
|
|
21
|
+
enableLinkEnrichment: z.boolean().default(true),
|
|
22
|
+
imageCacheDir: z.string().default("./.cache/images"),
|
|
23
|
+
checkpointInterval: z.number().min(1).max(1e4).default(100),
|
|
24
|
+
forceRefresh: z.boolean().default(false)
|
|
25
|
+
});
|
|
26
|
+
var RenderConfigSchema = z.object({
|
|
27
|
+
groupByTimeOfDay: z.boolean().default(true),
|
|
28
|
+
renderRepliesAsNested: z.boolean().default(true),
|
|
29
|
+
renderTapbacksAsEmoji: z.boolean().default(true),
|
|
30
|
+
maxNestingDepth: z.number().min(1).max(100).default(10)
|
|
31
|
+
});
|
|
32
|
+
var ConfigSchema = z.object({
|
|
33
|
+
version: z.string().default("1.0"),
|
|
34
|
+
attachmentRoots: z.array(z.string().min(1, "Attachment root path cannot be empty")).min(1, "At least one attachment root is required").default(["~/Library/Messages/Attachments"]),
|
|
35
|
+
gemini: GeminiConfigSchema,
|
|
36
|
+
firecrawl: FirecrawlConfigSchema,
|
|
37
|
+
enrichment: EnrichmentConfigSchema.default({
|
|
38
|
+
enableVisionAnalysis: true,
|
|
39
|
+
enableAudioTranscription: true,
|
|
40
|
+
enableLinkEnrichment: true,
|
|
41
|
+
imageCacheDir: "./.cache/images",
|
|
42
|
+
checkpointInterval: 100,
|
|
43
|
+
forceRefresh: false
|
|
44
|
+
}),
|
|
45
|
+
render: RenderConfigSchema.default({
|
|
46
|
+
groupByTimeOfDay: true,
|
|
47
|
+
renderRepliesAsNested: true,
|
|
48
|
+
renderTapbacksAsEmoji: true,
|
|
49
|
+
maxNestingDepth: 10
|
|
50
|
+
})
|
|
51
|
+
});
|
|
52
|
+
function validateConfig(config) {
|
|
53
|
+
return ConfigSchema.parse(config);
|
|
54
|
+
}
|
|
55
|
+
function validateConfigSafe(config) {
|
|
56
|
+
const result = ConfigSchema.safeParse(config);
|
|
57
|
+
if (result.success) {
|
|
58
|
+
return { success: true, data: result.data };
|
|
59
|
+
}
|
|
60
|
+
return {
|
|
61
|
+
success: false,
|
|
62
|
+
errors: result.error.errors.map((err) => ({
|
|
63
|
+
path: err.path.join("."),
|
|
64
|
+
message: err.message
|
|
65
|
+
}))
|
|
66
|
+
};
|
|
67
|
+
}
|
|
68
|
+
var DEFAULT_CONFIG = {
|
|
69
|
+
version: "1.0",
|
|
70
|
+
attachmentRoots: ["~/Library/Messages/Attachments"],
|
|
71
|
+
enrichment: {
|
|
72
|
+
enableVisionAnalysis: true,
|
|
73
|
+
enableAudioTranscription: true,
|
|
74
|
+
enableLinkEnrichment: true,
|
|
75
|
+
imageCacheDir: "./.cache/images",
|
|
76
|
+
checkpointInterval: 100,
|
|
77
|
+
forceRefresh: false
|
|
78
|
+
},
|
|
79
|
+
render: {
|
|
80
|
+
groupByTimeOfDay: true,
|
|
81
|
+
renderRepliesAsNested: true,
|
|
82
|
+
renderTapbacksAsEmoji: true,
|
|
83
|
+
maxNestingDepth: 10
|
|
84
|
+
}
|
|
85
|
+
};
|
|
86
|
+
var CONFIG_FILE_PATTERNS = [
|
|
87
|
+
"./imessage-config.yaml",
|
|
88
|
+
"./imessage-config.yml",
|
|
89
|
+
"./imessage-config.json"
|
|
90
|
+
];
|
|
91
|
+
function detectConfigFormat(filePath) {
|
|
92
|
+
if (filePath.endsWith(".json")) {
|
|
93
|
+
return "json";
|
|
94
|
+
}
|
|
95
|
+
if (filePath.endsWith(".yaml") || filePath.endsWith(".yml")) {
|
|
96
|
+
return "yaml";
|
|
97
|
+
}
|
|
98
|
+
throw new Error(`Unsupported config file format: ${filePath}. Supported formats: .json, .yaml, .yml`);
|
|
99
|
+
}
|
|
100
|
+
|
|
101
|
+
// src/config/generator.ts
|
|
102
|
+
var CONFIG_TEMPLATE_WITH_DOCS = `# iMessage Timeline Configuration File
|
|
103
|
+
# Generated by: chatline init
|
|
104
|
+
#
|
|
105
|
+
# This file configures the iMessage message export and enrichment pipeline.
|
|
106
|
+
# Supports both YAML and JSON formats with environment variable substitution.
|
|
107
|
+
|
|
108
|
+
# Schema version (for future migrations)
|
|
109
|
+
version: "1.0"
|
|
110
|
+
|
|
111
|
+
# ============================================================================
|
|
112
|
+
# Attachment Resolution
|
|
113
|
+
# ============================================================================
|
|
114
|
+
|
|
115
|
+
# Directories to search for message attachments
|
|
116
|
+
# Multiple paths supported; first match wins
|
|
117
|
+
# Environment variable expansion supported: \${HOME}/Library/Messages
|
|
118
|
+
attachmentRoots:
|
|
119
|
+
- "~/Library/Messages/Attachments"
|
|
120
|
+
# Add additional search paths if needed:
|
|
121
|
+
# - "/path/to/imazing/backup/attachments"
|
|
122
|
+
|
|
123
|
+
# ============================================================================
|
|
124
|
+
# Gemini AI Configuration
|
|
125
|
+
# ============================================================================
|
|
126
|
+
|
|
127
|
+
gemini:
|
|
128
|
+
# Google Gemini API key (required for AI enrichment)
|
|
129
|
+
# Get your key from: https://makersuite.google.com/app/apikey
|
|
130
|
+
# Environment variable recommended for security: \${GEMINI_API_KEY}
|
|
131
|
+
apiKey: "\${GEMINI_API_KEY}"
|
|
132
|
+
|
|
133
|
+
# Gemini model to use for enrichment
|
|
134
|
+
# Options: gemini-1.5-pro, gemini-1.5-flash
|
|
135
|
+
# Default: gemini-1.5-pro (more accurate, slower)
|
|
136
|
+
model: "gemini-1.5-pro"
|
|
137
|
+
|
|
138
|
+
# Delay between API calls (milliseconds)
|
|
139
|
+
# Prevents rate limiting; adjust based on your quota
|
|
140
|
+
# Free tier: 60 requests/minute = 1000ms delay minimum
|
|
141
|
+
# Default: 1000 (1 second)
|
|
142
|
+
rateLimitDelay: 1000
|
|
143
|
+
|
|
144
|
+
# Maximum retries for failed API calls
|
|
145
|
+
# Applies exponential backoff with jitter
|
|
146
|
+
# Range: 0-10
|
|
147
|
+
# Default: 3
|
|
148
|
+
maxRetries: 3
|
|
149
|
+
|
|
150
|
+
# ============================================================================
|
|
151
|
+
# Firecrawl Configuration (Optional)
|
|
152
|
+
# ============================================================================
|
|
153
|
+
|
|
154
|
+
# firecrawl:
|
|
155
|
+
# # Firecrawl API key for enhanced link scraping (optional)
|
|
156
|
+
# # Get your key from: https://firecrawl.dev
|
|
157
|
+
# # Falls back to built-in scrapers if not provided
|
|
158
|
+
# apiKey: "\${FIRECRAWL_API_KEY}"
|
|
159
|
+
#
|
|
160
|
+
# # Enable/disable Firecrawl for link enrichment
|
|
161
|
+
# # Default: true (if apiKey provided)
|
|
162
|
+
# enabled: true
|
|
163
|
+
|
|
164
|
+
# ============================================================================
|
|
165
|
+
# Enrichment Pipeline Configuration
|
|
166
|
+
# ============================================================================
|
|
167
|
+
|
|
168
|
+
enrichment:
|
|
169
|
+
# Enable image analysis (captions, descriptions via Gemini Vision)
|
|
170
|
+
# HEIC/TIFF files converted to JPG for analysis
|
|
171
|
+
# Default: true
|
|
172
|
+
enableVisionAnalysis: true
|
|
173
|
+
|
|
174
|
+
# Enable audio transcription (voice memos, audio messages)
|
|
175
|
+
# Includes timestamps, speaker detection, and summaries
|
|
176
|
+
# Default: true
|
|
177
|
+
enableAudioTranscription: true
|
|
178
|
+
|
|
179
|
+
# Enable link context extraction (titles, summaries, metadata)
|
|
180
|
+
# Uses Firecrawl if configured, falls back to built-in scrapers
|
|
181
|
+
# Default: true
|
|
182
|
+
enableLinkEnrichment: true
|
|
183
|
+
|
|
184
|
+
# Cache directory for image previews (HEIC/TIFF → JPG)
|
|
185
|
+
# Prevents redundant conversions on re-runs
|
|
186
|
+
# Default: ./.cache/images
|
|
187
|
+
imageCacheDir: "./.cache/images"
|
|
188
|
+
|
|
189
|
+
# Checkpoint interval (messages per checkpoint)
|
|
190
|
+
# Lower = more frequent checkpoints = safer but slower
|
|
191
|
+
# Higher = faster but more work lost on interruption
|
|
192
|
+
# Range: 1-10000
|
|
193
|
+
# Default: 100
|
|
194
|
+
checkpointInterval: 100
|
|
195
|
+
|
|
196
|
+
# Force refresh of existing enrichments
|
|
197
|
+
# If true, re-enriches messages even if already enriched
|
|
198
|
+
# Default: false (skip already-enriched messages)
|
|
199
|
+
forceRefresh: false
|
|
200
|
+
|
|
201
|
+
# ============================================================================
|
|
202
|
+
# Markdown Rendering Configuration
|
|
203
|
+
# ============================================================================
|
|
204
|
+
|
|
205
|
+
render:
|
|
206
|
+
# Group messages by time-of-day (Morning, Afternoon, Evening)
|
|
207
|
+
# If false, renders as flat chronological list
|
|
208
|
+
# Default: true
|
|
209
|
+
groupByTimeOfDay: true
|
|
210
|
+
|
|
211
|
+
# Render replies as nested blockquotes under parent message
|
|
212
|
+
# If false, renders as flat list with reply indicators
|
|
213
|
+
# Default: true
|
|
214
|
+
renderRepliesAsNested: true
|
|
215
|
+
|
|
216
|
+
# Render tapbacks (reactions) as emoji (❤️, \uD83D\uDE02, etc.)
|
|
217
|
+
# If false, renders as text descriptions
|
|
218
|
+
# Default: true
|
|
219
|
+
renderTapbacksAsEmoji: true
|
|
220
|
+
|
|
221
|
+
# Maximum nesting depth for nested replies
|
|
222
|
+
# Prevents infinite recursion in circular reply chains
|
|
223
|
+
# Range: 1-100
|
|
224
|
+
# Default: 10
|
|
225
|
+
maxNestingDepth: 10
|
|
226
|
+
`;
|
|
227
|
+
var CONFIG_TEMPLATE_JSON = {
|
|
228
|
+
_comment: "iMessage Timeline Configuration File",
|
|
229
|
+
_generated: "Generated by: chatline init",
|
|
230
|
+
_docs: "Full documentation: https://github.com/your-repo/chatline",
|
|
231
|
+
version: "1.0",
|
|
232
|
+
attachmentRoots: ["~/Library/Messages/Attachments"],
|
|
233
|
+
gemini: {
|
|
234
|
+
_comment: "Get API key from: https://makersuite.google.com/app/apikey",
|
|
235
|
+
apiKey: "${GEMINI_API_KEY}",
|
|
236
|
+
model: "gemini-1.5-pro",
|
|
237
|
+
rateLimitDelay: 1000,
|
|
238
|
+
maxRetries: 3
|
|
239
|
+
},
|
|
240
|
+
enrichment: {
|
|
241
|
+
enableVisionAnalysis: true,
|
|
242
|
+
enableAudioTranscription: true,
|
|
243
|
+
enableLinkEnrichment: true,
|
|
244
|
+
imageCacheDir: "./.cache/images",
|
|
245
|
+
checkpointInterval: 100,
|
|
246
|
+
forceRefresh: false
|
|
247
|
+
},
|
|
248
|
+
render: {
|
|
249
|
+
groupByTimeOfDay: true,
|
|
250
|
+
renderRepliesAsNested: true,
|
|
251
|
+
renderTapbacksAsEmoji: true,
|
|
252
|
+
maxNestingDepth: 10
|
|
253
|
+
}
|
|
254
|
+
};
|
|
255
|
+
function generateConfigContent(format) {
|
|
256
|
+
if (format === "yaml") {
|
|
257
|
+
return CONFIG_TEMPLATE_WITH_DOCS;
|
|
258
|
+
}
|
|
259
|
+
return `${JSON.stringify(CONFIG_TEMPLATE_JSON, null, 2)}
|
|
260
|
+
`;
|
|
261
|
+
}
|
|
262
|
+
function validateGeneratedConfig(content, format) {
|
|
263
|
+
try {
|
|
264
|
+
let parsed;
|
|
265
|
+
if (format === "json") {
|
|
266
|
+
parsed = JSON.parse(content);
|
|
267
|
+
} else {
|
|
268
|
+
parsed = yaml.load(content);
|
|
269
|
+
}
|
|
270
|
+
validateConfig(parsed);
|
|
271
|
+
return { valid: true };
|
|
272
|
+
} catch (error) {
|
|
273
|
+
if (error instanceof Error) {
|
|
274
|
+
return {
|
|
275
|
+
valid: false,
|
|
276
|
+
errors: [error.message]
|
|
277
|
+
};
|
|
278
|
+
}
|
|
279
|
+
return {
|
|
280
|
+
valid: false,
|
|
281
|
+
errors: ["Unknown validation error"]
|
|
282
|
+
};
|
|
283
|
+
}
|
|
284
|
+
}
|
|
285
|
+
function getDefaultConfigPath(format) {
|
|
286
|
+
return format === "yaml" ? "./imessage-config.yaml" : "./imessage-config.json";
|
|
287
|
+
}
|
|
288
|
+
// src/config/loader.ts
|
|
289
|
+
import { constants as constants2 } from "node:fs";
|
|
290
|
+
import { access as access2, readFile } from "node:fs/promises";
|
|
291
|
+
import yaml2 from "js-yaml";
|
|
292
|
+
var configCache = null;
|
|
293
|
+
var configCachePath = null;
|
|
294
|
+
async function discoverConfigFile(baseDir = process.cwd()) {
|
|
295
|
+
const fileNames = [
|
|
296
|
+
"imessage-config.yaml",
|
|
297
|
+
"imessage-config.yml",
|
|
298
|
+
"imessage-config.json"
|
|
299
|
+
];
|
|
300
|
+
for (const fileName of fileNames) {
|
|
301
|
+
const filePath = baseDir.startsWith("/") ? `${baseDir}/${fileName}` : `./${baseDir}/${fileName}`.replace(/\/\.\//g, "/");
|
|
302
|
+
try {
|
|
303
|
+
await access2(filePath, constants2.R_OK);
|
|
304
|
+
return filePath;
|
|
305
|
+
} catch {}
|
|
306
|
+
}
|
|
307
|
+
return null;
|
|
308
|
+
}
|
|
309
|
+
async function loadConfigFile(filePath) {
|
|
310
|
+
const content = await readFile(filePath, "utf-8");
|
|
311
|
+
const format = detectConfigFormat(filePath);
|
|
312
|
+
if (format === "json") {
|
|
313
|
+
try {
|
|
314
|
+
return JSON.parse(content);
|
|
315
|
+
} catch (error) {
|
|
316
|
+
throw new Error(`Failed to parse JSON config file ${filePath}: ${error instanceof Error ? error.message : String(error)}`);
|
|
317
|
+
}
|
|
318
|
+
} else if (format === "yaml") {
|
|
319
|
+
try {
|
|
320
|
+
return yaml2.load(content, { schema: yaml2.JSON_SCHEMA });
|
|
321
|
+
} catch (error) {
|
|
322
|
+
throw new Error(`Failed to parse YAML config file ${filePath}: ${error instanceof Error ? error.message : String(error)}`);
|
|
323
|
+
}
|
|
324
|
+
}
|
|
325
|
+
throw new Error(`Unsupported config format: ${filePath}`);
|
|
326
|
+
}
|
|
327
|
+
function substituteEnvVars(obj) {
|
|
328
|
+
if (typeof obj === "string") {
|
|
329
|
+
return obj.replace(/\$\{(\w+)\}/g, (_match, envVar) => {
|
|
330
|
+
const value = process.env[envVar];
|
|
331
|
+
if (value === undefined) {
|
|
332
|
+
throw new Error(`Environment variable ${envVar} is not set but referenced in config`);
|
|
333
|
+
}
|
|
334
|
+
return value;
|
|
335
|
+
});
|
|
336
|
+
}
|
|
337
|
+
if (Array.isArray(obj)) {
|
|
338
|
+
return obj.map(substituteEnvVars);
|
|
339
|
+
}
|
|
340
|
+
if (typeof obj === "object" && obj !== null) {
|
|
341
|
+
return Object.fromEntries(Object.entries(obj).map(([key, value]) => [
|
|
342
|
+
key,
|
|
343
|
+
substituteEnvVars(value)
|
|
344
|
+
]));
|
|
345
|
+
}
|
|
346
|
+
return obj;
|
|
347
|
+
}
|
|
348
|
+
function mergeConfig(fileConfig, cliOptions = {}) {
|
|
349
|
+
const merged = {
|
|
350
|
+
...fileConfig,
|
|
351
|
+
...cliOptions
|
|
352
|
+
};
|
|
353
|
+
if (fileConfig.gemini || cliOptions.gemini) {
|
|
354
|
+
merged.gemini = {
|
|
355
|
+
...fileConfig.gemini,
|
|
356
|
+
...cliOptions.gemini
|
|
357
|
+
};
|
|
358
|
+
}
|
|
359
|
+
if (cliOptions.firecrawl !== undefined) {
|
|
360
|
+
merged.firecrawl = cliOptions.firecrawl;
|
|
361
|
+
} else if (fileConfig.firecrawl !== undefined) {
|
|
362
|
+
merged.firecrawl = fileConfig.firecrawl;
|
|
363
|
+
}
|
|
364
|
+
if (fileConfig.enrichment || cliOptions.enrichment) {
|
|
365
|
+
merged.enrichment = {
|
|
366
|
+
...fileConfig.enrichment,
|
|
367
|
+
...cliOptions.enrichment
|
|
368
|
+
};
|
|
369
|
+
}
|
|
370
|
+
if (fileConfig.render || cliOptions.render) {
|
|
371
|
+
merged.render = {
|
|
372
|
+
...fileConfig.render,
|
|
373
|
+
...cliOptions.render
|
|
374
|
+
};
|
|
375
|
+
}
|
|
376
|
+
return merged;
|
|
377
|
+
}
|
|
378
|
+
async function loadConfig(options = {}) {
|
|
379
|
+
const { configPath, cliOptions = {}, skipCache = false } = options;
|
|
380
|
+
if (!skipCache && configCache && configCachePath === configPath) {
|
|
381
|
+
return configCache;
|
|
382
|
+
}
|
|
383
|
+
const filePath = configPath || await discoverConfigFile();
|
|
384
|
+
let fileConfig = {};
|
|
385
|
+
if (filePath) {
|
|
386
|
+
try {
|
|
387
|
+
const rawConfig = await loadConfigFile(filePath);
|
|
388
|
+
const withEnvVars = substituteEnvVars(rawConfig);
|
|
389
|
+
fileConfig = withEnvVars;
|
|
390
|
+
} catch (error) {
|
|
391
|
+
throw new Error(`Failed to load config from ${filePath}: ${error instanceof Error ? error.message : String(error)}`);
|
|
392
|
+
}
|
|
393
|
+
}
|
|
394
|
+
const merged = mergeConfig(fileConfig, cliOptions);
|
|
395
|
+
try {
|
|
396
|
+
const validated = validateConfig(merged);
|
|
397
|
+
configCache = validated;
|
|
398
|
+
configCachePath = configPath || null;
|
|
399
|
+
return validated;
|
|
400
|
+
} catch (error) {
|
|
401
|
+
throw new Error(`Config validation failed: ${error instanceof Error ? error.message : String(error)}`);
|
|
402
|
+
}
|
|
403
|
+
}
|
|
404
|
+
function clearConfigCache() {
|
|
405
|
+
configCache = null;
|
|
406
|
+
configCachePath = null;
|
|
407
|
+
}
|
|
408
|
+
function isConfigCached() {
|
|
409
|
+
return configCache !== null;
|
|
410
|
+
}
|
|
411
|
+
// src/enrich/rate-limiting.ts
|
|
412
|
+
class RateLimiter {
|
|
413
|
+
config;
|
|
414
|
+
state;
|
|
415
|
+
constructor(partialConfig) {
|
|
416
|
+
this.config = {
|
|
417
|
+
rateLimitDelay: partialConfig?.rateLimitDelay ?? 1000,
|
|
418
|
+
maxRetries: partialConfig?.maxRetries ?? 3,
|
|
419
|
+
circuitBreakerThreshold: partialConfig?.circuitBreakerThreshold ?? 5,
|
|
420
|
+
circuitBreakerResetMs: partialConfig?.circuitBreakerResetMs ?? 60000
|
|
421
|
+
};
|
|
422
|
+
this.validateConfig(this.config);
|
|
423
|
+
this.state = {
|
|
424
|
+
consecutiveFailures: 0,
|
|
425
|
+
circuitOpen: false,
|
|
426
|
+
circuitOpenedAt: null,
|
|
427
|
+
lastCallTime: null
|
|
428
|
+
};
|
|
429
|
+
}
|
|
430
|
+
validateConfig(config) {
|
|
431
|
+
if (config.rateLimitDelay < 0)
|
|
432
|
+
throw new Error("rateLimitDelay must be non-negative");
|
|
433
|
+
if (config.maxRetries < 0)
|
|
434
|
+
throw new Error("maxRetries must be non-negative");
|
|
435
|
+
if (config.circuitBreakerThreshold < 1)
|
|
436
|
+
throw new Error("circuitBreakerThreshold must be >= 1");
|
|
437
|
+
if (config.circuitBreakerResetMs < 0)
|
|
438
|
+
throw new Error("circuitBreakerResetMs must be non-negative");
|
|
439
|
+
}
|
|
440
|
+
shouldRateLimit() {
|
|
441
|
+
if (this.state.lastCallTime === null) {
|
|
442
|
+
return 0;
|
|
443
|
+
}
|
|
444
|
+
const timeSinceLastCall = Date.now() - this.state.lastCallTime;
|
|
445
|
+
const requiredDelay = this.config.rateLimitDelay;
|
|
446
|
+
if (timeSinceLastCall < requiredDelay) {
|
|
447
|
+
return requiredDelay - timeSinceLastCall;
|
|
448
|
+
}
|
|
449
|
+
return 0;
|
|
450
|
+
}
|
|
451
|
+
recordCall() {
|
|
452
|
+
this.state.lastCallTime = Date.now();
|
|
453
|
+
}
|
|
454
|
+
calculateExponentialBackoff(attemptNumber) {
|
|
455
|
+
const baseDelaySeconds = 2 ** attemptNumber;
|
|
456
|
+
const baseDelayMs = baseDelaySeconds * 1000;
|
|
457
|
+
const jitterAmount = baseDelayMs * 0.25;
|
|
458
|
+
const jitter = (Math.random() - 0.5) * 2 * jitterAmount;
|
|
459
|
+
return baseDelayMs + jitter;
|
|
460
|
+
}
|
|
461
|
+
parseRetryAfterHeader(retryAfterValue) {
|
|
462
|
+
if (retryAfterValue === undefined || retryAfterValue === null) {
|
|
463
|
+
return null;
|
|
464
|
+
}
|
|
465
|
+
if (typeof retryAfterValue === "number") {
|
|
466
|
+
return retryAfterValue * 1000;
|
|
467
|
+
}
|
|
468
|
+
const strValue = String(retryAfterValue).trim();
|
|
469
|
+
const seconds = Number.parseInt(strValue, 10);
|
|
470
|
+
if (!Number.isNaN(seconds) && seconds >= 0) {
|
|
471
|
+
return seconds * 1000;
|
|
472
|
+
}
|
|
473
|
+
try {
|
|
474
|
+
const date = new Date(strValue);
|
|
475
|
+
if (!Number.isNaN(date.getTime())) {
|
|
476
|
+
const delayMs = date.getTime() - Date.now();
|
|
477
|
+
return Math.max(0, delayMs);
|
|
478
|
+
}
|
|
479
|
+
} catch {}
|
|
480
|
+
return null;
|
|
481
|
+
}
|
|
482
|
+
getRetryStrategy(response, attemptNumber) {
|
|
483
|
+
const { status, headers } = response;
|
|
484
|
+
if (status >= 200 && status < 300) {
|
|
485
|
+
return { shouldRetry: false, delayMs: 0 };
|
|
486
|
+
}
|
|
487
|
+
const isRetryableStatus = status === 429 || status >= 500 && status < 600;
|
|
488
|
+
if (!isRetryableStatus) {
|
|
489
|
+
return { shouldRetry: false, delayMs: 0 };
|
|
490
|
+
}
|
|
491
|
+
const retryAfterMs = this.parseRetryAfterHeader(headers?.["Retry-After"]);
|
|
492
|
+
if (retryAfterMs !== null) {
|
|
493
|
+
return { shouldRetry: true, delayMs: retryAfterMs };
|
|
494
|
+
}
|
|
495
|
+
const backoffMs = this.calculateExponentialBackoff(attemptNumber);
|
|
496
|
+
return { shouldRetry: true, delayMs: backoffMs };
|
|
497
|
+
}
|
|
498
|
+
shouldRetryAttempt(attemptNumber) {
|
|
499
|
+
return attemptNumber <= this.config.maxRetries;
|
|
500
|
+
}
|
|
501
|
+
isCircuitOpen() {
|
|
502
|
+
if (!this.state.circuitOpen) {
|
|
503
|
+
return false;
|
|
504
|
+
}
|
|
505
|
+
const timeSinceOpened = Date.now() - (this.state.circuitOpenedAt ?? 0);
|
|
506
|
+
if (timeSinceOpened >= this.config.circuitBreakerResetMs) {
|
|
507
|
+
this.resetCircuitBreaker();
|
|
508
|
+
return false;
|
|
509
|
+
}
|
|
510
|
+
return true;
|
|
511
|
+
}
|
|
512
|
+
recordFailure() {
|
|
513
|
+
this.state.consecutiveFailures += 1;
|
|
514
|
+
if (this.state.consecutiveFailures >= this.config.circuitBreakerThreshold) {
|
|
515
|
+
this.state.circuitOpen = true;
|
|
516
|
+
this.state.circuitOpenedAt = Date.now();
|
|
517
|
+
}
|
|
518
|
+
}
|
|
519
|
+
recordSuccess() {
|
|
520
|
+
this.state.consecutiveFailures = 0;
|
|
521
|
+
this.state.circuitOpen = false;
|
|
522
|
+
this.state.circuitOpenedAt = null;
|
|
523
|
+
}
|
|
524
|
+
resetCircuitBreaker() {
|
|
525
|
+
this.state.consecutiveFailures = 0;
|
|
526
|
+
this.state.circuitOpen = false;
|
|
527
|
+
this.state.circuitOpenedAt = null;
|
|
528
|
+
}
|
|
529
|
+
getState() {
|
|
530
|
+
return { ...this.state };
|
|
531
|
+
}
|
|
532
|
+
getConfig() {
|
|
533
|
+
return { ...this.config };
|
|
534
|
+
}
|
|
535
|
+
reset() {
|
|
536
|
+
this.state = {
|
|
537
|
+
consecutiveFailures: 0,
|
|
538
|
+
circuitOpen: false,
|
|
539
|
+
circuitOpenedAt: null,
|
|
540
|
+
lastCallTime: null
|
|
541
|
+
};
|
|
542
|
+
}
|
|
543
|
+
}
|
|
544
|
+
function createRateLimiter(config) {
|
|
545
|
+
return new RateLimiter(config);
|
|
546
|
+
}
|
|
547
|
+
function is5xx(status) {
|
|
548
|
+
return status >= 500 && status < 600;
|
|
549
|
+
}
|
|
550
|
+
function isRetryableStatus(status) {
|
|
551
|
+
return status === 429 || is5xx(status);
|
|
552
|
+
}
|
|
553
|
+
// src/ingest/dedup-merge.ts
|
|
554
|
+
function dedupAndMerge(csvMessages, dbMessages) {
|
|
555
|
+
const sortedCsv = [...csvMessages].sort((a, b) => a.guid.localeCompare(b.guid));
|
|
556
|
+
const sortedDb = [...dbMessages].sort((a, b) => a.guid.localeCompare(b.guid));
|
|
557
|
+
const stats = {
|
|
558
|
+
csvCount: csvMessages.length,
|
|
559
|
+
dbCount: dbMessages.length,
|
|
560
|
+
outputCount: 0,
|
|
561
|
+
exactMatches: 0,
|
|
562
|
+
contentMatches: 0,
|
|
563
|
+
conflicts: 0,
|
|
564
|
+
noMatches: 0
|
|
565
|
+
};
|
|
566
|
+
const outputMessages = [];
|
|
567
|
+
const matchedDbGuids = new Set;
|
|
568
|
+
const dbByGuid = new Map;
|
|
569
|
+
const dbByNormalizedContent = new Map;
|
|
570
|
+
for (const dbMsg of sortedDb) {
|
|
571
|
+
dbByGuid.set(dbMsg.guid, dbMsg);
|
|
572
|
+
if (dbMsg.messageKind === "text" && dbMsg.text) {
|
|
573
|
+
const normalizedKey = `${dbMsg.handle || ""}:${normalizeTextForIndex(dbMsg.text)}`;
|
|
574
|
+
const existing = dbByNormalizedContent.get(normalizedKey);
|
|
575
|
+
if (existing) {
|
|
576
|
+
existing.push(dbMsg);
|
|
577
|
+
} else {
|
|
578
|
+
dbByNormalizedContent.set(normalizedKey, [dbMsg]);
|
|
579
|
+
}
|
|
580
|
+
}
|
|
581
|
+
}
|
|
582
|
+
for (const csvMsg of sortedCsv) {
|
|
583
|
+
const exactMatch = dbByGuid.get(csvMsg.guid) || null;
|
|
584
|
+
if (exactMatch) {
|
|
585
|
+
const merged = applyDbAuthoritiveness(csvMsg, exactMatch);
|
|
586
|
+
outputMessages.push(merged);
|
|
587
|
+
matchedDbGuids.add(exactMatch.guid);
|
|
588
|
+
stats.exactMatches++;
|
|
589
|
+
} else {
|
|
590
|
+
let contentMatch = detectContentEquivalenceIndexed(csvMsg, dbByNormalizedContent, matchedDbGuids);
|
|
591
|
+
if (!contentMatch && csvMsg.messageKind !== "text") {
|
|
592
|
+
const unmatchedDbMessages = sortedDb.filter((dbMsg) => !matchedDbGuids.has(dbMsg.guid));
|
|
593
|
+
contentMatch = detectContentEquivalence(csvMsg, unmatchedDbMessages);
|
|
594
|
+
}
|
|
595
|
+
if (contentMatch) {
|
|
596
|
+
const merged = applyDbAuthoritiveness(csvMsg, contentMatch.message);
|
|
597
|
+
outputMessages.push(merged);
|
|
598
|
+
matchedDbGuids.add(contentMatch.message.guid);
|
|
599
|
+
stats.contentMatches++;
|
|
600
|
+
} else {
|
|
601
|
+
outputMessages.push(csvMsg);
|
|
602
|
+
stats.noMatches++;
|
|
603
|
+
}
|
|
604
|
+
}
|
|
605
|
+
}
|
|
606
|
+
for (const dbMsg of sortedDb) {
|
|
607
|
+
if (!matchedDbGuids.has(dbMsg.guid)) {
|
|
608
|
+
outputMessages.push(dbMsg);
|
|
609
|
+
}
|
|
610
|
+
}
|
|
611
|
+
stats.outputCount = outputMessages.length;
|
|
612
|
+
return {
|
|
613
|
+
messages: outputMessages,
|
|
614
|
+
stats
|
|
615
|
+
};
|
|
616
|
+
}
|
|
617
|
+
function detectContentEquivalence(csvMsg, candidates, threshold = 0.9) {
|
|
618
|
+
for (const candidate of candidates) {
|
|
619
|
+
const reasons = [];
|
|
620
|
+
let confidence = 0;
|
|
621
|
+
if (csvMsg.messageKind !== candidate.messageKind) {
|
|
622
|
+
continue;
|
|
623
|
+
}
|
|
624
|
+
const csvHandle = csvMsg.handle || null;
|
|
625
|
+
const candidateHandle = candidate.handle || null;
|
|
626
|
+
if (csvHandle !== candidateHandle) {
|
|
627
|
+
continue;
|
|
628
|
+
}
|
|
629
|
+
if (csvMsg.messageKind === "text" && candidate.messageKind === "text") {
|
|
630
|
+
const csvText = normalizeText(csvMsg.text || "");
|
|
631
|
+
const candidateText = normalizeText(candidate.text || "");
|
|
632
|
+
if (csvText === candidateText) {
|
|
633
|
+
confidence = 1;
|
|
634
|
+
reasons.push("exact text match after normalization");
|
|
635
|
+
} else {
|
|
636
|
+
continue;
|
|
637
|
+
}
|
|
638
|
+
} else if (csvMsg.messageKind === "media" && candidate.messageKind === "media") {
|
|
639
|
+
const csvMediaId = csvMsg.media?.id;
|
|
640
|
+
const candidateMediaId = candidate.media?.id;
|
|
641
|
+
if (csvMediaId && candidateMediaId && csvMediaId === candidateMediaId) {
|
|
642
|
+
confidence = 1;
|
|
643
|
+
reasons.push("exact media ID match");
|
|
644
|
+
} else {
|
|
645
|
+
continue;
|
|
646
|
+
}
|
|
647
|
+
} else {
|
|
648
|
+
continue;
|
|
649
|
+
}
|
|
650
|
+
if (confidence >= threshold) {
|
|
651
|
+
return {
|
|
652
|
+
message: candidate,
|
|
653
|
+
confidence,
|
|
654
|
+
reasons
|
|
655
|
+
};
|
|
656
|
+
}
|
|
657
|
+
}
|
|
658
|
+
return null;
|
|
659
|
+
}
|
|
660
|
+
function normalizeText(text) {
|
|
661
|
+
return text.toLowerCase().trim().replace(/[^\w\s]/g, "").replace(/\s+/g, " ").trim();
|
|
662
|
+
}
|
|
663
|
+
function normalizeTextForIndex(text) {
|
|
664
|
+
return normalizeText(text);
|
|
665
|
+
}
|
|
666
|
+
function detectContentEquivalenceIndexed(csvMsg, contentIndex, matchedGuids) {
|
|
667
|
+
if (csvMsg.messageKind !== "text" || !csvMsg.text) {
|
|
668
|
+
return null;
|
|
669
|
+
}
|
|
670
|
+
const normalizedKey = `${csvMsg.handle || ""}:${normalizeTextForIndex(csvMsg.text)}`;
|
|
671
|
+
const candidates = contentIndex.get(normalizedKey);
|
|
672
|
+
if (!candidates || candidates.length === 0) {
|
|
673
|
+
return null;
|
|
674
|
+
}
|
|
675
|
+
for (const candidate of candidates) {
|
|
676
|
+
if (!matchedGuids.has(candidate.guid)) {
|
|
677
|
+
return {
|
|
678
|
+
message: candidate,
|
|
679
|
+
confidence: 1,
|
|
680
|
+
reasons: ["exact text match after normalization (indexed)"]
|
|
681
|
+
};
|
|
682
|
+
}
|
|
683
|
+
}
|
|
684
|
+
return null;
|
|
685
|
+
}
|
|
686
|
+
function applyDbAuthoritiveness(csvMsg, dbMsg) {
|
|
687
|
+
const merged = { ...csvMsg };
|
|
688
|
+
merged.date = dbMsg.date;
|
|
689
|
+
if (dbMsg.dateRead !== undefined)
|
|
690
|
+
merged.dateRead = dbMsg.dateRead;
|
|
691
|
+
if (dbMsg.dateDelivered !== undefined)
|
|
692
|
+
merged.dateDelivered = dbMsg.dateDelivered;
|
|
693
|
+
if (dbMsg.dateEdited !== undefined)
|
|
694
|
+
merged.dateEdited = dbMsg.dateEdited;
|
|
695
|
+
if (dbMsg.handle !== undefined)
|
|
696
|
+
merged.handle = dbMsg.handle;
|
|
697
|
+
if (dbMsg.replyingTo?.targetMessageGuid !== undefined) {
|
|
698
|
+
merged.replyingTo = {
|
|
699
|
+
...merged.replyingTo,
|
|
700
|
+
targetMessageGuid: dbMsg.replyingTo.targetMessageGuid
|
|
701
|
+
};
|
|
702
|
+
}
|
|
703
|
+
if (dbMsg.isRead !== undefined)
|
|
704
|
+
merged.isRead = dbMsg.isRead;
|
|
705
|
+
merged.guid = dbMsg.guid;
|
|
706
|
+
return merged;
|
|
707
|
+
}
|
|
708
|
+
// src/ingest/ingest-csv.ts
|
|
709
|
+
import { existsSync, readdirSync, readFileSync } from "node:fs";
|
|
710
|
+
import * as os from "node:os";
|
|
711
|
+
import * as path from "node:path";
|
|
712
|
+
import { parse } from "csv-parse/sync";
|
|
713
|
+
|
|
714
|
+
// src/schema/message.ts
|
|
715
|
+
import { z as z2 } from "zod";
|
|
716
|
+
var MediaEnrichmentSchema = z2.object({
|
|
717
|
+
kind: z2.enum([
|
|
718
|
+
"image",
|
|
719
|
+
"audio",
|
|
720
|
+
"link",
|
|
721
|
+
"video",
|
|
722
|
+
"pdf",
|
|
723
|
+
"unknown",
|
|
724
|
+
"transcription",
|
|
725
|
+
"pdf_summary",
|
|
726
|
+
"video_metadata",
|
|
727
|
+
"link_context",
|
|
728
|
+
"image_analysis"
|
|
729
|
+
]),
|
|
730
|
+
model: z2.string().optional(),
|
|
731
|
+
createdAt: z2.string().datetime(),
|
|
732
|
+
visionSummary: z2.string().optional(),
|
|
733
|
+
shortDescription: z2.string().optional(),
|
|
734
|
+
transcription: z2.string().optional(),
|
|
735
|
+
transcript: z2.string().optional(),
|
|
736
|
+
speakers: z2.array(z2.string()).optional(),
|
|
737
|
+
timestamps: z2.array(z2.object({
|
|
738
|
+
time: z2.string(),
|
|
739
|
+
speaker: z2.string(),
|
|
740
|
+
content: z2.string()
|
|
741
|
+
})).optional(),
|
|
742
|
+
pdfSummary: z2.string().optional(),
|
|
743
|
+
videoMetadata: z2.object({
|
|
744
|
+
filename: z2.string().optional(),
|
|
745
|
+
size: z2.number().optional(),
|
|
746
|
+
duration: z2.number().optional(),
|
|
747
|
+
analyzed: z2.boolean().optional(),
|
|
748
|
+
note: z2.string().optional()
|
|
749
|
+
}).optional(),
|
|
750
|
+
error: z2.string().optional(),
|
|
751
|
+
usedFallback: z2.boolean().optional(),
|
|
752
|
+
failedProviders: z2.array(z2.string()).optional(),
|
|
753
|
+
url: z2.string().url().optional(),
|
|
754
|
+
title: z2.string().optional(),
|
|
755
|
+
summary: z2.string().optional(),
|
|
756
|
+
provider: z2.enum([
|
|
757
|
+
"gemini",
|
|
758
|
+
"firecrawl",
|
|
759
|
+
"local",
|
|
760
|
+
"youtube",
|
|
761
|
+
"spotify",
|
|
762
|
+
"twitter",
|
|
763
|
+
"instagram",
|
|
764
|
+
"generic"
|
|
765
|
+
]),
|
|
766
|
+
version: z2.string()
|
|
767
|
+
}).superRefine((enrichment, ctx) => {
|
|
768
|
+
if (enrichment.createdAt && !enrichment.createdAt.match(/Z$/)) {
|
|
769
|
+
ctx.addIssue({
|
|
770
|
+
code: z2.ZodIssueCode.custom,
|
|
771
|
+
message: "enrichment.createdAt must be ISO 8601 with Z suffix (UTC)"
|
|
772
|
+
});
|
|
773
|
+
}
|
|
774
|
+
});
|
|
775
|
+
var MediaProvenanceSchema = z2.object({
|
|
776
|
+
source: z2.enum(["csv", "db", "merged"]),
|
|
777
|
+
lastSeen: z2.string().datetime(),
|
|
778
|
+
resolvedAt: z2.string().datetime()
|
|
779
|
+
}).superRefine((prov, ctx) => {
|
|
780
|
+
if (prov.lastSeen && !prov.lastSeen.match(/Z$/)) {
|
|
781
|
+
ctx.addIssue({
|
|
782
|
+
code: z2.ZodIssueCode.custom,
|
|
783
|
+
message: "lastSeen must be ISO 8601 with Z suffix (UTC)",
|
|
784
|
+
path: ["lastSeen"]
|
|
785
|
+
});
|
|
786
|
+
}
|
|
787
|
+
if (prov.resolvedAt && !prov.resolvedAt.match(/Z$/)) {
|
|
788
|
+
ctx.addIssue({
|
|
789
|
+
code: z2.ZodIssueCode.custom,
|
|
790
|
+
message: "resolvedAt must be ISO 8601 with Z suffix (UTC)",
|
|
791
|
+
path: ["resolvedAt"]
|
|
792
|
+
});
|
|
793
|
+
}
|
|
794
|
+
});
|
|
795
|
+
var MediaMetaSchema = z2.object({
|
|
796
|
+
id: z2.string(),
|
|
797
|
+
filename: z2.string(),
|
|
798
|
+
path: z2.string(),
|
|
799
|
+
size: z2.number().optional(),
|
|
800
|
+
mimeType: z2.string().optional(),
|
|
801
|
+
uti: z2.string().nullable().optional(),
|
|
802
|
+
isSticker: z2.boolean().optional(),
|
|
803
|
+
hidden: z2.boolean().optional(),
|
|
804
|
+
mediaKind: z2.enum(["image", "audio", "video", "pdf", "unknown"]).optional(),
|
|
805
|
+
enrichment: z2.array(MediaEnrichmentSchema).optional(),
|
|
806
|
+
provenance: MediaProvenanceSchema.optional()
|
|
807
|
+
}).superRefine((media, ctx) => {
|
|
808
|
+
if (media.path && !media.path.startsWith("/")) {
|
|
809
|
+
ctx.addIssue({
|
|
810
|
+
code: z2.ZodIssueCode.custom,
|
|
811
|
+
message: "media.path must be an absolute path (starting with /)",
|
|
812
|
+
path: ["path"]
|
|
813
|
+
});
|
|
814
|
+
}
|
|
815
|
+
});
|
|
816
|
+
var ReplyInfoSchema = z2.object({
|
|
817
|
+
sender: z2.string().optional(),
|
|
818
|
+
date: z2.string().datetime().optional(),
|
|
819
|
+
text: z2.string().optional(),
|
|
820
|
+
targetMessageGuid: z2.string().optional()
|
|
821
|
+
}).superRefine((reply, ctx) => {
|
|
822
|
+
if (reply.date) {
|
|
823
|
+
if (!reply.date.match(/Z$/)) {
|
|
824
|
+
ctx.addIssue({
|
|
825
|
+
code: z2.ZodIssueCode.custom,
|
|
826
|
+
message: "replyingTo.date must be ISO 8601 with Z suffix (UTC)",
|
|
827
|
+
path: ["date"]
|
|
828
|
+
});
|
|
829
|
+
}
|
|
830
|
+
if (Number.isNaN(Date.parse(reply.date))) {
|
|
831
|
+
ctx.addIssue({
|
|
832
|
+
code: z2.ZodIssueCode.custom,
|
|
833
|
+
message: "replyingTo.date must be a valid ISO 8601 date",
|
|
834
|
+
path: ["date"]
|
|
835
|
+
});
|
|
836
|
+
}
|
|
837
|
+
}
|
|
838
|
+
});
|
|
839
|
+
var TapbackInfoSchema = z2.object({
|
|
840
|
+
type: z2.enum([
|
|
841
|
+
"loved",
|
|
842
|
+
"liked",
|
|
843
|
+
"disliked",
|
|
844
|
+
"laughed",
|
|
845
|
+
"emphasized",
|
|
846
|
+
"questioned",
|
|
847
|
+
"emoji"
|
|
848
|
+
]),
|
|
849
|
+
action: z2.enum(["added", "removed"]),
|
|
850
|
+
targetMessageGuid: z2.string().optional(),
|
|
851
|
+
targetMessagePart: z2.number().int().optional(),
|
|
852
|
+
targetText: z2.string().optional(),
|
|
853
|
+
isMedia: z2.boolean().optional(),
|
|
854
|
+
emoji: z2.string().optional()
|
|
855
|
+
});
|
|
856
|
+
var MessageCoreSchema = z2.object({
|
|
857
|
+
guid: z2.string(),
|
|
858
|
+
rowid: z2.number().optional(),
|
|
859
|
+
chatId: z2.string().nullable().optional(),
|
|
860
|
+
service: z2.string().nullable().optional(),
|
|
861
|
+
subject: z2.string().nullable().optional(),
|
|
862
|
+
handleId: z2.number().nullable().optional(),
|
|
863
|
+
handle: z2.string().nullable().optional(),
|
|
864
|
+
destinationCallerId: z2.string().nullable().optional(),
|
|
865
|
+
isFromMe: z2.boolean(),
|
|
866
|
+
otherHandle: z2.number().nullable().optional(),
|
|
867
|
+
date: z2.string().datetime(),
|
|
868
|
+
dateRead: z2.string().datetime().nullable().optional(),
|
|
869
|
+
dateDelivered: z2.string().datetime().nullable().optional(),
|
|
870
|
+
dateEdited: z2.string().datetime().nullable().optional(),
|
|
871
|
+
isRead: z2.boolean().optional(),
|
|
872
|
+
itemType: z2.number().optional(),
|
|
873
|
+
groupActionType: z2.number().optional(),
|
|
874
|
+
groupTitle: z2.string().nullable().optional(),
|
|
875
|
+
shareStatus: z2.boolean().optional(),
|
|
876
|
+
shareDirection: z2.boolean().nullable().optional(),
|
|
877
|
+
expressiveSendStyleId: z2.string().nullable().optional(),
|
|
878
|
+
balloonBundleId: z2.string().nullable().optional(),
|
|
879
|
+
threadOriginatorGuid: z2.string().nullable().optional(),
|
|
880
|
+
threadOriginatorPart: z2.number().nullable().optional(),
|
|
881
|
+
numReplies: z2.number().optional(),
|
|
882
|
+
deletedFrom: z2.number().nullable().optional()
|
|
883
|
+
});
|
|
884
|
+
var MessageSchema = z2.object({
|
|
885
|
+
guid: z2.string(),
|
|
886
|
+
rowid: z2.number().optional(),
|
|
887
|
+
chatId: z2.string().nullable().optional(),
|
|
888
|
+
service: z2.string().nullable().optional(),
|
|
889
|
+
subject: z2.string().nullable().optional(),
|
|
890
|
+
handleId: z2.number().nullable().optional(),
|
|
891
|
+
handle: z2.string().nullable().optional(),
|
|
892
|
+
destinationCallerId: z2.string().nullable().optional(),
|
|
893
|
+
isFromMe: z2.boolean(),
|
|
894
|
+
otherHandle: z2.number().nullable().optional(),
|
|
895
|
+
date: z2.string().datetime(),
|
|
896
|
+
dateRead: z2.string().datetime().nullable().optional(),
|
|
897
|
+
dateDelivered: z2.string().datetime().nullable().optional(),
|
|
898
|
+
dateEdited: z2.string().datetime().nullable().optional(),
|
|
899
|
+
isRead: z2.boolean().optional(),
|
|
900
|
+
itemType: z2.number().optional(),
|
|
901
|
+
groupActionType: z2.number().optional(),
|
|
902
|
+
groupTitle: z2.string().nullable().optional(),
|
|
903
|
+
shareStatus: z2.boolean().optional(),
|
|
904
|
+
shareDirection: z2.boolean().nullable().optional(),
|
|
905
|
+
expressiveSendStyleId: z2.string().nullable().optional(),
|
|
906
|
+
balloonBundleId: z2.string().nullable().optional(),
|
|
907
|
+
threadOriginatorGuid: z2.string().nullable().optional(),
|
|
908
|
+
threadOriginatorPart: z2.number().nullable().optional(),
|
|
909
|
+
numReplies: z2.number().optional(),
|
|
910
|
+
deletedFrom: z2.number().nullable().optional(),
|
|
911
|
+
messageKind: z2.enum(["text", "media", "tapback", "notification"]),
|
|
912
|
+
text: z2.string().nullable().optional(),
|
|
913
|
+
tapback: TapbackInfoSchema.nullable().optional(),
|
|
914
|
+
replyingTo: ReplyInfoSchema.nullable().optional(),
|
|
915
|
+
replyingToRaw: z2.string().nullable().optional(),
|
|
916
|
+
media: MediaMetaSchema.nullable().optional(),
|
|
917
|
+
groupGuid: z2.string().nullable().optional(),
|
|
918
|
+
exportTimestamp: z2.string().datetime().optional(),
|
|
919
|
+
exportVersion: z2.string().optional(),
|
|
920
|
+
isUnsent: z2.boolean().optional(),
|
|
921
|
+
isEdited: z2.boolean().optional()
|
|
922
|
+
}).superRefine((msg, ctx) => {
|
|
923
|
+
const dateFields = [
|
|
924
|
+
"date",
|
|
925
|
+
"dateRead",
|
|
926
|
+
"dateDelivered",
|
|
927
|
+
"dateEdited",
|
|
928
|
+
"exportTimestamp"
|
|
929
|
+
];
|
|
930
|
+
for (const field of dateFields) {
|
|
931
|
+
const value = msg[field];
|
|
932
|
+
if (value && typeof value === "string") {
|
|
933
|
+
if (!value.match(/Z$/)) {
|
|
934
|
+
ctx.addIssue({
|
|
935
|
+
code: z2.ZodIssueCode.custom,
|
|
936
|
+
message: `${field} must be ISO 8601 with Z suffix (UTC)`,
|
|
937
|
+
path: [field]
|
|
938
|
+
});
|
|
939
|
+
}
|
|
940
|
+
}
|
|
941
|
+
}
|
|
942
|
+
if (msg.messageKind === "tapback" && !msg.tapback) {
|
|
943
|
+
ctx.addIssue({
|
|
944
|
+
code: z2.ZodIssueCode.custom,
|
|
945
|
+
message: "tapback kind requires tapback payload",
|
|
946
|
+
path: ["tapback"]
|
|
947
|
+
});
|
|
948
|
+
}
|
|
949
|
+
if (msg.messageKind === "media" && !msg.media) {
|
|
950
|
+
ctx.addIssue({
|
|
951
|
+
code: z2.ZodIssueCode.custom,
|
|
952
|
+
message: "media kind requires media payload",
|
|
953
|
+
path: ["media"]
|
|
954
|
+
});
|
|
955
|
+
}
|
|
956
|
+
if (msg.messageKind !== "media" && msg.media) {
|
|
957
|
+
ctx.addIssue({
|
|
958
|
+
code: z2.ZodIssueCode.custom,
|
|
959
|
+
message: "media payload present on non-media message",
|
|
960
|
+
path: ["media"]
|
|
961
|
+
});
|
|
962
|
+
}
|
|
963
|
+
if (msg.messageKind === "media" && msg.media) {
|
|
964
|
+
if (!msg.media.id) {
|
|
965
|
+
ctx.addIssue({
|
|
966
|
+
code: z2.ZodIssueCode.custom,
|
|
967
|
+
message: "media.id is required when messageKind is media",
|
|
968
|
+
path: ["media", "id"]
|
|
969
|
+
});
|
|
970
|
+
}
|
|
971
|
+
if (!msg.media.filename) {
|
|
972
|
+
ctx.addIssue({
|
|
973
|
+
code: z2.ZodIssueCode.custom,
|
|
974
|
+
message: "media.filename is required when messageKind is media",
|
|
975
|
+
path: ["media", "filename"]
|
|
976
|
+
});
|
|
977
|
+
}
|
|
978
|
+
if (!msg.media.path) {
|
|
979
|
+
ctx.addIssue({
|
|
980
|
+
code: z2.ZodIssueCode.custom,
|
|
981
|
+
message: "media.path is required when messageKind is media",
|
|
982
|
+
path: ["media", "path"]
|
|
983
|
+
});
|
|
984
|
+
}
|
|
985
|
+
}
|
|
986
|
+
});
|
|
987
|
+
var ExportEnvelopeSchema = z2.object({
|
|
988
|
+
schemaVersion: z2.string(),
|
|
989
|
+
source: z2.enum(["csv", "db", "merged"]),
|
|
990
|
+
createdAt: z2.string().datetime(),
|
|
991
|
+
messages: z2.array(MessageSchema),
|
|
992
|
+
meta: z2.record(z2.any()).optional()
|
|
993
|
+
}).superRefine((envelope, ctx) => {
|
|
994
|
+
if (envelope.createdAt && !envelope.createdAt.match(/Z$/)) {
|
|
995
|
+
ctx.addIssue({
|
|
996
|
+
code: z2.ZodIssueCode.custom,
|
|
997
|
+
message: "createdAt must be ISO 8601 with Z suffix (UTC)",
|
|
998
|
+
path: ["createdAt"]
|
|
999
|
+
});
|
|
1000
|
+
}
|
|
1001
|
+
});
|
|
1002
|
+
|
|
1003
|
+
// src/ingest/ingest-csv.ts
|
|
1004
|
+
function ingestCSV(csvFilePath, options) {
|
|
1005
|
+
const csvContent = readFileSync(csvFilePath, "utf-8");
|
|
1006
|
+
const rows = parse(csvContent, { columns: true });
|
|
1007
|
+
const messages = [];
|
|
1008
|
+
let lineNumber = 2;
|
|
1009
|
+
for (const row of rows) {
|
|
1010
|
+
const rowMessages = parseCSVRow(row, lineNumber, options);
|
|
1011
|
+
messages.push(...rowMessages);
|
|
1012
|
+
lineNumber++;
|
|
1013
|
+
}
|
|
1014
|
+
return messages;
|
|
1015
|
+
}
|
|
1016
|
+
function parseCSVRow(row, lineNumber, options) {
|
|
1017
|
+
const messages = [];
|
|
1018
|
+
const messageDate = row["Message Date"];
|
|
1019
|
+
const deliveredDate = row["Delivered Date"];
|
|
1020
|
+
const readDate = row["Read Date"];
|
|
1021
|
+
const editedDate = row["Edited Date"];
|
|
1022
|
+
const service = row.Service;
|
|
1023
|
+
const type = row.Type;
|
|
1024
|
+
const senderName = row["Sender Name"];
|
|
1025
|
+
const senderID = row["Sender ID"];
|
|
1026
|
+
const status = row.Status;
|
|
1027
|
+
const text = row.Text;
|
|
1028
|
+
const subject = row.Subject;
|
|
1029
|
+
const attachment = row.Attachment;
|
|
1030
|
+
const attachmentType = row["Attachment type"];
|
|
1031
|
+
const replyingTo = row["Replying to"];
|
|
1032
|
+
const date = convertToISO8601(messageDate || "");
|
|
1033
|
+
if (!date)
|
|
1034
|
+
return [];
|
|
1035
|
+
const isFromMe = type === "Outgoing" || type === "Sent";
|
|
1036
|
+
let messageKind = "text";
|
|
1037
|
+
if (type === "Notification") {
|
|
1038
|
+
messageKind = "notification";
|
|
1039
|
+
}
|
|
1040
|
+
const baseMessage = {
|
|
1041
|
+
isFromMe,
|
|
1042
|
+
date
|
|
1043
|
+
};
|
|
1044
|
+
const handle = senderName || senderID;
|
|
1045
|
+
if (handle)
|
|
1046
|
+
baseMessage.handle = handle;
|
|
1047
|
+
if (service)
|
|
1048
|
+
baseMessage.service = service;
|
|
1049
|
+
if (subject)
|
|
1050
|
+
baseMessage.subject = subject;
|
|
1051
|
+
if (readDate)
|
|
1052
|
+
baseMessage.dateRead = convertToISO8601(readDate);
|
|
1053
|
+
if (deliveredDate)
|
|
1054
|
+
baseMessage.dateDelivered = convertToISO8601(deliveredDate);
|
|
1055
|
+
if (editedDate) {
|
|
1056
|
+
baseMessage.dateEdited = convertToISO8601(editedDate);
|
|
1057
|
+
baseMessage.isEdited = true;
|
|
1058
|
+
}
|
|
1059
|
+
if (status === "Read")
|
|
1060
|
+
baseMessage.isRead = true;
|
|
1061
|
+
else if (status === "Unread")
|
|
1062
|
+
baseMessage.isRead = false;
|
|
1063
|
+
const baseExportMetadata = {
|
|
1064
|
+
source: "csv",
|
|
1065
|
+
lineNumber,
|
|
1066
|
+
csvGuid: `csv:${lineNumber}:0`,
|
|
1067
|
+
...replyingTo && { replyingTo }
|
|
1068
|
+
};
|
|
1069
|
+
if (messageKind === "text" && text) {
|
|
1070
|
+
const textMessage = {
|
|
1071
|
+
...baseMessage,
|
|
1072
|
+
guid: `csv:${lineNumber}:0`,
|
|
1073
|
+
messageKind: "text",
|
|
1074
|
+
text,
|
|
1075
|
+
exportMetadata: baseExportMetadata
|
|
1076
|
+
};
|
|
1077
|
+
messages.push(textMessage);
|
|
1078
|
+
}
|
|
1079
|
+
if (attachment && attachment.trim() !== "") {
|
|
1080
|
+
const resolvedPath = resolveAttachmentPath({ filename: attachment }, {
|
|
1081
|
+
...options,
|
|
1082
|
+
messageDate: date
|
|
1083
|
+
});
|
|
1084
|
+
if (resolvedPath) {
|
|
1085
|
+
const mediaMessage = {
|
|
1086
|
+
...baseMessage,
|
|
1087
|
+
guid: `csv:${lineNumber}:0:media`,
|
|
1088
|
+
messageKind: "media",
|
|
1089
|
+
media: {
|
|
1090
|
+
id: `media:csv:${lineNumber}:0`,
|
|
1091
|
+
filename: attachment,
|
|
1092
|
+
path: resolvedPath,
|
|
1093
|
+
mimeType: attachmentType || undefined,
|
|
1094
|
+
mediaKind: inferMediaKind(attachmentType || "")
|
|
1095
|
+
},
|
|
1096
|
+
exportMetadata: {
|
|
1097
|
+
...baseExportMetadata,
|
|
1098
|
+
attachmentIndex: 0
|
|
1099
|
+
}
|
|
1100
|
+
};
|
|
1101
|
+
messages.push(mediaMessage);
|
|
1102
|
+
}
|
|
1103
|
+
}
|
|
1104
|
+
if (messageKind === "notification") {
|
|
1105
|
+
const notificationMessage = {
|
|
1106
|
+
...baseMessage,
|
|
1107
|
+
guid: `csv:${lineNumber}:0`,
|
|
1108
|
+
messageKind: "notification",
|
|
1109
|
+
exportMetadata: baseExportMetadata
|
|
1110
|
+
};
|
|
1111
|
+
messages.push(notificationMessage);
|
|
1112
|
+
}
|
|
1113
|
+
if (messages.length === 0 && text) {
|
|
1114
|
+
const fallbackMessage = {
|
|
1115
|
+
...baseMessage,
|
|
1116
|
+
guid: `csv:${lineNumber}:0`,
|
|
1117
|
+
messageKind: "text",
|
|
1118
|
+
text,
|
|
1119
|
+
exportMetadata: baseExportMetadata
|
|
1120
|
+
};
|
|
1121
|
+
messages.push(fallbackMessage);
|
|
1122
|
+
}
|
|
1123
|
+
return messages;
|
|
1124
|
+
}
|
|
1125
|
+
function convertToISO8601(csvDate) {
|
|
1126
|
+
if (!csvDate || csvDate.trim() === "") {
|
|
1127
|
+
return null;
|
|
1128
|
+
}
|
|
1129
|
+
try {
|
|
1130
|
+
const normalized = csvDate.trim().replace(" ", "T");
|
|
1131
|
+
if (!normalized.includes("-") && !normalized.includes("/")) {
|
|
1132
|
+
return null;
|
|
1133
|
+
}
|
|
1134
|
+
let isoString = normalized;
|
|
1135
|
+
if (!normalized.includes("Z") && !normalized.match(/[+-]\d{2}:/)) {
|
|
1136
|
+
isoString = `${normalized}Z`;
|
|
1137
|
+
}
|
|
1138
|
+
const date = new Date(isoString);
|
|
1139
|
+
if (Number.isNaN(date.getTime())) {
|
|
1140
|
+
return null;
|
|
1141
|
+
}
|
|
1142
|
+
return date.toISOString();
|
|
1143
|
+
} catch {
|
|
1144
|
+
return null;
|
|
1145
|
+
}
|
|
1146
|
+
}
|
|
1147
|
+
function resolveAttachmentPath(attachment, options) {
|
|
1148
|
+
if (!attachment)
|
|
1149
|
+
return null;
|
|
1150
|
+
const { attachmentRoots, messageDate } = options;
|
|
1151
|
+
if (attachment.copied_path?.startsWith("/")) {
|
|
1152
|
+
if (existsSync(attachment.copied_path)) {
|
|
1153
|
+
return attachment.copied_path;
|
|
1154
|
+
}
|
|
1155
|
+
}
|
|
1156
|
+
if (attachment.copied_path?.startsWith("~")) {
|
|
1157
|
+
const expanded = attachment.copied_path.replace("~", os.homedir());
|
|
1158
|
+
if (existsSync(expanded)) {
|
|
1159
|
+
return expanded;
|
|
1160
|
+
}
|
|
1161
|
+
}
|
|
1162
|
+
if (messageDate && attachmentRoots.length > 0) {
|
|
1163
|
+
const dateStr = formatDateForAttachmentSearch(messageDate);
|
|
1164
|
+
const filename = attachment.filename || "unknown";
|
|
1165
|
+
const senderName = attachment.senderName || "*";
|
|
1166
|
+
for (const root of attachmentRoots) {
|
|
1167
|
+
const pattern = `${dateStr} - ${senderName} - ${filename}`;
|
|
1168
|
+
const fullPath = path.join(root, pattern);
|
|
1169
|
+
if (existsSync(fullPath)) {
|
|
1170
|
+
return fullPath;
|
|
1171
|
+
}
|
|
1172
|
+
if (senderName === "*" && existsSync(root)) {
|
|
1173
|
+
try {
|
|
1174
|
+
const files = readdirSync(root).filter((f) => {
|
|
1175
|
+
return f.includes(dateStr) && f.endsWith(filename);
|
|
1176
|
+
});
|
|
1177
|
+
if (files.length > 0 && files[0]) {
|
|
1178
|
+
return path.join(root, files[0]);
|
|
1179
|
+
}
|
|
1180
|
+
} catch {}
|
|
1181
|
+
}
|
|
1182
|
+
}
|
|
1183
|
+
}
|
|
1184
|
+
return null;
|
|
1185
|
+
}
|
|
1186
|
+
function inferMediaKind(mimeType) {
|
|
1187
|
+
if (!mimeType)
|
|
1188
|
+
return "unknown";
|
|
1189
|
+
if (mimeType.startsWith("image/"))
|
|
1190
|
+
return "image";
|
|
1191
|
+
if (mimeType.startsWith("audio/"))
|
|
1192
|
+
return "audio";
|
|
1193
|
+
if (mimeType.startsWith("video/"))
|
|
1194
|
+
return "video";
|
|
1195
|
+
if (mimeType.includes("pdf"))
|
|
1196
|
+
return "pdf";
|
|
1197
|
+
return "unknown";
|
|
1198
|
+
}
|
|
1199
|
+
function formatDateForAttachmentSearch(isoDate) {
|
|
1200
|
+
try {
|
|
1201
|
+
const date = new Date(isoDate);
|
|
1202
|
+
const year = date.getUTCFullYear();
|
|
1203
|
+
const month = String(date.getUTCMonth() + 1).padStart(2, "0");
|
|
1204
|
+
const day = String(date.getUTCDate()).padStart(2, "0");
|
|
1205
|
+
const hours = String(date.getUTCHours()).padStart(2, "0");
|
|
1206
|
+
const minutes = String(date.getUTCMinutes()).padStart(2, "0");
|
|
1207
|
+
const seconds = String(date.getUTCSeconds()).padStart(2, "0");
|
|
1208
|
+
return `${year}-${month}-${day} ${hours} ${minutes} ${seconds}`;
|
|
1209
|
+
} catch {
|
|
1210
|
+
return "";
|
|
1211
|
+
}
|
|
1212
|
+
}
|
|
1213
|
+
function createExportEnvelope(messages) {
|
|
1214
|
+
return {
|
|
1215
|
+
schemaVersion: "2.0.0",
|
|
1216
|
+
source: "csv",
|
|
1217
|
+
createdAt: new Date().toISOString(),
|
|
1218
|
+
messages
|
|
1219
|
+
};
|
|
1220
|
+
}
|
|
1221
|
+
// src/utils/human.ts
|
|
1222
|
+
var humanEnabled = true;
|
|
1223
|
+
function safeConsole(kind, ...args) {
|
|
1224
|
+
if (!humanEnabled)
|
|
1225
|
+
return;
|
|
1226
|
+
const c = globalThis.console;
|
|
1227
|
+
c?.[kind]?.(...args);
|
|
1228
|
+
}
|
|
1229
|
+
function humanInfo(...args) {
|
|
1230
|
+
safeConsole("info", ...args);
|
|
1231
|
+
}
|
|
1232
|
+
|
|
1233
|
+
// src/utils/logger.ts
|
|
1234
|
+
import { AsyncLocalStorage } from "node:async_hooks";
|
|
1235
|
+
import fs from "node:fs";
|
|
1236
|
+
import path2 from "node:path";
|
|
1237
|
+
import pino from "pino";
|
|
1238
|
+
// package.json
|
|
1239
|
+
var package_default = {
|
|
1240
|
+
author: {
|
|
1241
|
+
name: "Nathan Vale",
|
|
1242
|
+
url: "https://github.com/nathanvale"
|
|
1243
|
+
},
|
|
1244
|
+
bin: {
|
|
1245
|
+
chatline: "./dist/bin/index.js"
|
|
1246
|
+
},
|
|
1247
|
+
bugs: {
|
|
1248
|
+
url: "https://github.com/nathanvale/chatline/issues"
|
|
1249
|
+
},
|
|
1250
|
+
dependencies: {
|
|
1251
|
+
"@google/generative-ai": "^0.21.0",
|
|
1252
|
+
"@mendable/firecrawl-js": "^4.3.7",
|
|
1253
|
+
"cli-progress": "^3.12.0",
|
|
1254
|
+
commander: "^14.0.1",
|
|
1255
|
+
"csv-parse": "^5.6.0",
|
|
1256
|
+
"csv-stringify": "^6.5.2",
|
|
1257
|
+
"js-yaml": "^4.1.0",
|
|
1258
|
+
pino: "^10.0.0",
|
|
1259
|
+
sharp: "^0.34.4",
|
|
1260
|
+
zod: "^3.23.8"
|
|
1261
|
+
},
|
|
1262
|
+
description: "Extract, transform, and analyze iMessage conversations with AI-powered enrichment and timeline rendering.",
|
|
1263
|
+
devDependencies: {
|
|
1264
|
+
"@arethetypeswrong/cli": "^0.18.2",
|
|
1265
|
+
"@biomejs/biome": "^2.3.7",
|
|
1266
|
+
"@changesets/changelog-github": "^0.5.1",
|
|
1267
|
+
"@changesets/cli": "^2.29.7",
|
|
1268
|
+
"@commitlint/cli": "^20.1.0",
|
|
1269
|
+
"@commitlint/config-conventional": "^20.0.0",
|
|
1270
|
+
"@testing-library/jest-dom": "^6.9.1",
|
|
1271
|
+
"@testing-library/react": "^16.3.0",
|
|
1272
|
+
"@types/cli-progress": "^3.11.6",
|
|
1273
|
+
"@types/js-yaml": "^4.0.9",
|
|
1274
|
+
"@types/node": "^24.8.1",
|
|
1275
|
+
"@vitest/coverage-v8": "^4.0.14",
|
|
1276
|
+
"better-sqlite3": "^12.4.1",
|
|
1277
|
+
"bun-types": "^1.3.3",
|
|
1278
|
+
bunup: "^0.16.10",
|
|
1279
|
+
changesets: "^1.0.2",
|
|
1280
|
+
husky: "^9.1.7",
|
|
1281
|
+
jsdom: "^25.0.1",
|
|
1282
|
+
"lint-staged": "^15.2.10",
|
|
1283
|
+
"pino-pretty": "^11.2.2",
|
|
1284
|
+
publint: "^0.3.15",
|
|
1285
|
+
rimraf: "^6.0.1",
|
|
1286
|
+
tsx: "^4.20.6",
|
|
1287
|
+
typescript: "^5.9.3",
|
|
1288
|
+
vitest: "^4.0.14"
|
|
1289
|
+
},
|
|
1290
|
+
engines: {
|
|
1291
|
+
node: ">=22.20"
|
|
1292
|
+
},
|
|
1293
|
+
exports: {
|
|
1294
|
+
".": {
|
|
1295
|
+
types: "./dist/index.d.ts",
|
|
1296
|
+
import: "./dist/index.js"
|
|
1297
|
+
},
|
|
1298
|
+
"./package.json": "./package.json"
|
|
1299
|
+
},
|
|
1300
|
+
files: [
|
|
1301
|
+
"dist/**",
|
|
1302
|
+
"README.md",
|
|
1303
|
+
"LICENSE",
|
|
1304
|
+
"CHANGELOG.md"
|
|
1305
|
+
],
|
|
1306
|
+
funding: [
|
|
1307
|
+
{
|
|
1308
|
+
type: "github",
|
|
1309
|
+
url: "https://github.com/sponsors/nathanvale"
|
|
1310
|
+
}
|
|
1311
|
+
],
|
|
1312
|
+
homepage: "https://github.com/nathanvale/chatline#readme",
|
|
1313
|
+
imports: {
|
|
1314
|
+
"#enrich/*": "./src/enrich/*",
|
|
1315
|
+
"#ingest/*": "./src/ingest/*",
|
|
1316
|
+
"#normalize/*": "./src/normalize/*",
|
|
1317
|
+
"#render/*": "./src/render/*",
|
|
1318
|
+
"#schema/*": "./src/schema/*",
|
|
1319
|
+
"#utils/*": "./src/utils/*"
|
|
1320
|
+
},
|
|
1321
|
+
keywords: [
|
|
1322
|
+
"imessage",
|
|
1323
|
+
"typescript",
|
|
1324
|
+
"cli",
|
|
1325
|
+
"data-pipeline",
|
|
1326
|
+
"message-analysis",
|
|
1327
|
+
"timeline",
|
|
1328
|
+
"ai-enrichment"
|
|
1329
|
+
],
|
|
1330
|
+
overrides: {
|
|
1331
|
+
express: "4.22.0"
|
|
1332
|
+
},
|
|
1333
|
+
license: "MIT",
|
|
1334
|
+
"lint-staged": {
|
|
1335
|
+
"*.{ts,tsx,js,jsx,mts,cts,json}": [
|
|
1336
|
+
"biome check --write"
|
|
1337
|
+
]
|
|
1338
|
+
},
|
|
1339
|
+
main: "./dist/index.js",
|
|
1340
|
+
name: "@nathanvale/chatline",
|
|
1341
|
+
publishConfig: {
|
|
1342
|
+
access: "public",
|
|
1343
|
+
provenance: true
|
|
1344
|
+
},
|
|
1345
|
+
repository: {
|
|
1346
|
+
type: "git",
|
|
1347
|
+
url: "git+https://github.com/nathanvale/chatline.git"
|
|
1348
|
+
},
|
|
1349
|
+
scripts: {
|
|
1350
|
+
"research:actionlint": "bun scripts/firecrawl/actionlint-best-practices.ts",
|
|
1351
|
+
build: "bunx bunup",
|
|
1352
|
+
"check:publint": "publint",
|
|
1353
|
+
"check:types": "attw --pack",
|
|
1354
|
+
"check:updates": "bun pm ls || true",
|
|
1355
|
+
clean: "rimraf dist 2>/dev/null || true",
|
|
1356
|
+
cli: "node ./dist/bin/index.js",
|
|
1357
|
+
coverage: "bun test --coverage",
|
|
1358
|
+
dev: "bun src/cli.ts",
|
|
1359
|
+
"docs:build": "cd website && bun run build",
|
|
1360
|
+
"docs:dev": "cd website && bun run start",
|
|
1361
|
+
"docs:serve": "cd website && bun run serve",
|
|
1362
|
+
check: "biome check --write .",
|
|
1363
|
+
format: "biome format --write .",
|
|
1364
|
+
"format:check": "biome format .",
|
|
1365
|
+
hygiene: "bun run check:publint && bun run check:types",
|
|
1366
|
+
lint: "biome lint .",
|
|
1367
|
+
"lint:fix": "biome lint --write .",
|
|
1368
|
+
"lint:scripts": "shellcheck .github/scripts/*.sh",
|
|
1369
|
+
"lint:workflows": "actionlint -color -verbose",
|
|
1370
|
+
"pack:dry": "mkdir -p .pack && bun pm pack --destination .pack --ignore-scripts && ls -lah .pack && tar -tf .pack/*.tgz | sort | sed 's/^/ - /'",
|
|
1371
|
+
"pre:enter:beta": "changeset pre enter beta",
|
|
1372
|
+
"pre:enter:next": "changeset pre enter next",
|
|
1373
|
+
"pre:enter:rc": "changeset pre enter rc",
|
|
1374
|
+
"quality-check:ci": "biome check . && bun run typecheck",
|
|
1375
|
+
"pre:exit": "changeset pre exit",
|
|
1376
|
+
prepare: "husky",
|
|
1377
|
+
"publish:pre": "changeset publish --provenance",
|
|
1378
|
+
release: "changeset publish --provenance",
|
|
1379
|
+
"release:snapshot:canary": "changeset version --snapshot canary && changeset publish --tag canary",
|
|
1380
|
+
"security:audit": "npm audit",
|
|
1381
|
+
start: "node ./dist/bin/index.js",
|
|
1382
|
+
test: "bun test --recursive",
|
|
1383
|
+
"test:ci": "TF_BUILD=true bun test --recursive",
|
|
1384
|
+
"test:coverage": "bun test --coverage",
|
|
1385
|
+
"test:scripts": "bash tests/smoke-test-scripts.sh",
|
|
1386
|
+
"test:watch": "bun test --watch",
|
|
1387
|
+
typecheck: "tsc -p tsconfig.eslint.json --noEmit",
|
|
1388
|
+
"upgrade:minor": "bun update",
|
|
1389
|
+
validate: "bun run lint && bun run typecheck && bun run build && TF_BUILD=true bun run test",
|
|
1390
|
+
"validate:json": "bun scripts/validate-json.ts",
|
|
1391
|
+
"version:gen": "changeset",
|
|
1392
|
+
"version:pre": "changeset version",
|
|
1393
|
+
"watch:types": "tsc -p tsconfig.eslint.json --noEmit --watch"
|
|
1394
|
+
},
|
|
1395
|
+
sideEffects: false,
|
|
1396
|
+
type: "module",
|
|
1397
|
+
types: "./dist/index.d.ts",
|
|
1398
|
+
version: "0.3.0-next.0",
|
|
1399
|
+
workspaces: [
|
|
1400
|
+
".",
|
|
1401
|
+
"website"
|
|
1402
|
+
]
|
|
1403
|
+
};
|
|
1404
|
+
|
|
1405
|
+
// src/utils/logger.ts
|
|
1406
|
+
var LEVEL_ORDER = {
|
|
1407
|
+
debug: 10,
|
|
1408
|
+
info: 20,
|
|
1409
|
+
warn: 30,
|
|
1410
|
+
error: 40
|
|
1411
|
+
};
|
|
1412
|
+
var sequenceCounter = 0;
|
|
1413
|
+
var cachedVersion;
|
|
1414
|
+
var currentCorrelationId;
|
|
1415
|
+
var correlationStore = new AsyncLocalStorage;
|
|
1416
|
+
function loadVersion() {
|
|
1417
|
+
if (cachedVersion)
|
|
1418
|
+
return cachedVersion;
|
|
1419
|
+
const version = package_default.version || "0.0.0";
|
|
1420
|
+
cachedVersion = version;
|
|
1421
|
+
return version;
|
|
1422
|
+
}
|
|
1423
|
+
var isTestEnv = process.env.VITEST === "true" || false;
|
|
1424
|
+
var shouldWriteFile = (process.env.LOG_TO_FILE ?? (isTestEnv ? "false" : "true")) === "true";
|
|
1425
|
+
var baseLevel = process.env.LOG_LEVEL || "info";
|
|
1426
|
+
var pinoTransport = process.env.LOG_FORMAT === "pretty" ? pino.transport({
|
|
1427
|
+
target: "pino-pretty",
|
|
1428
|
+
options: { colorize: true, singleLine: true }
|
|
1429
|
+
}) : undefined;
|
|
1430
|
+
var pinoStdout = pino({ level: baseLevel, base: null }, pinoTransport);
|
|
1431
|
+
var fileStream;
|
|
1432
|
+
var fileDate;
|
|
1433
|
+
function ensureFileStream() {
|
|
1434
|
+
if (!shouldWriteFile)
|
|
1435
|
+
return;
|
|
1436
|
+
const nowDate = new Date().toISOString().slice(0, 10);
|
|
1437
|
+
if (fileStream && fileDate === nowDate)
|
|
1438
|
+
return;
|
|
1439
|
+
try {
|
|
1440
|
+
const logsDir = path2.resolve(process.cwd(), "logs");
|
|
1441
|
+
fs.mkdirSync(logsDir, { recursive: true });
|
|
1442
|
+
const filePath = path2.join(logsDir, `${nowDate}.jsonl`);
|
|
1443
|
+
if (fileStream)
|
|
1444
|
+
fileStream.end();
|
|
1445
|
+
fileStream = fs.createWriteStream(filePath, { flags: "a" });
|
|
1446
|
+
fileDate = nowDate;
|
|
1447
|
+
} catch {
|
|
1448
|
+
fileStream = undefined;
|
|
1449
|
+
fileDate = undefined;
|
|
1450
|
+
}
|
|
1451
|
+
}
|
|
1452
|
+
function log(component, level, msg, context) {
|
|
1453
|
+
if (!dynamicShouldLog(level))
|
|
1454
|
+
return;
|
|
1455
|
+
const effectiveCorrelationId = correlationStore.getStore() ?? currentCorrelationId;
|
|
1456
|
+
const entryBase = {
|
|
1457
|
+
ts: new Date().toISOString(),
|
|
1458
|
+
level,
|
|
1459
|
+
component,
|
|
1460
|
+
msg,
|
|
1461
|
+
pid: process.pid,
|
|
1462
|
+
ver: loadVersion(),
|
|
1463
|
+
seq: ++sequenceCounter
|
|
1464
|
+
};
|
|
1465
|
+
const withCorr = effectiveCorrelationId ? { correlationId: effectiveCorrelationId } : {};
|
|
1466
|
+
const entryNoCtx = { ...entryBase, ...withCorr };
|
|
1467
|
+
const entry = context ? { ...entryNoCtx, context } : entryNoCtx;
|
|
1468
|
+
const bindings = {
|
|
1469
|
+
component,
|
|
1470
|
+
ver: entry.ver,
|
|
1471
|
+
seq: entry.seq
|
|
1472
|
+
};
|
|
1473
|
+
if (entry.correlationId)
|
|
1474
|
+
bindings.correlationId = entry.correlationId;
|
|
1475
|
+
const logger = pinoStdout.child(bindings);
|
|
1476
|
+
logger[level](context ?? {}, msg);
|
|
1477
|
+
if (shouldWriteFile) {
|
|
1478
|
+
ensureFileStream();
|
|
1479
|
+
if (fileStream)
|
|
1480
|
+
fileStream.write(`${JSON.stringify(entry)}
|
|
1481
|
+
`);
|
|
1482
|
+
}
|
|
1483
|
+
if (sinks.length > 0) {
|
|
1484
|
+
try {
|
|
1485
|
+
for (const s of sinks)
|
|
1486
|
+
s(entry);
|
|
1487
|
+
} catch {}
|
|
1488
|
+
}
|
|
1489
|
+
}
|
|
1490
|
+
function createLogger(component) {
|
|
1491
|
+
return {
|
|
1492
|
+
debug: (msg, context) => log(component, "debug", msg, context),
|
|
1493
|
+
info: (msg, context) => log(component, "info", msg, context),
|
|
1494
|
+
warn: (msg, context) => log(component, "warn", msg, context),
|
|
1495
|
+
error: (msg, context) => log(component, "error", msg, context)
|
|
1496
|
+
};
|
|
1497
|
+
}
|
|
1498
|
+
var sinks = [];
|
|
1499
|
+
var dynamicLevel;
|
|
1500
|
+
function getEffectiveLevel() {
|
|
1501
|
+
return dynamicLevel || "info";
|
|
1502
|
+
}
|
|
1503
|
+
var dynamicShouldLog = (level) => {
|
|
1504
|
+
const envLevel = getEffectiveLevel();
|
|
1505
|
+
return LEVEL_ORDER[level] >= LEVEL_ORDER[envLevel];
|
|
1506
|
+
};
|
|
1507
|
+
|
|
1508
|
+
// src/utils/incremental-state.ts
|
|
1509
|
+
import crypto from "node:crypto";
|
|
1510
|
+
import { promises as fs2 } from "node:fs";
|
|
1511
|
+
import path3 from "node:path";
|
|
1512
|
+
function detectNewMessages(currentGuids, state) {
|
|
1513
|
+
const enrichedSet = new Set(state.enrichedGuids);
|
|
1514
|
+
const newGuids = [];
|
|
1515
|
+
for (const guid of currentGuids) {
|
|
1516
|
+
if (!enrichedSet.has(guid)) {
|
|
1517
|
+
newGuids.push(guid);
|
|
1518
|
+
}
|
|
1519
|
+
}
|
|
1520
|
+
return newGuids;
|
|
1521
|
+
}
|
|
1522
|
+
async function loadIncrementalState(filePath) {
|
|
1523
|
+
try {
|
|
1524
|
+
const content = await fs2.readFile(filePath, "utf-8");
|
|
1525
|
+
const parsed = JSON.parse(content);
|
|
1526
|
+
if (parsed.version !== "1.0") {
|
|
1527
|
+
const logger = createLogger("utils:incremental-state");
|
|
1528
|
+
logger.warn("Unknown state version. Ignoring.", {
|
|
1529
|
+
version: parsed.version
|
|
1530
|
+
});
|
|
1531
|
+
return null;
|
|
1532
|
+
}
|
|
1533
|
+
return parsed;
|
|
1534
|
+
} catch (error) {
|
|
1535
|
+
if (error instanceof Error && error.message.includes("ENOENT")) {
|
|
1536
|
+
return null;
|
|
1537
|
+
}
|
|
1538
|
+
return null;
|
|
1539
|
+
}
|
|
1540
|
+
}
|
|
1541
|
+
|
|
1542
|
+
// src/utils/delta-detection.ts
|
|
1543
|
+
async function loadPreviousState(stateFilePath) {
|
|
1544
|
+
return loadIncrementalState(stateFilePath);
|
|
1545
|
+
}
|
|
1546
|
+
function extractGuidsFromMessages(messages) {
|
|
1547
|
+
const guids = new Set;
|
|
1548
|
+
for (const msg of messages) {
|
|
1549
|
+
guids.add(msg.guid);
|
|
1550
|
+
}
|
|
1551
|
+
return guids;
|
|
1552
|
+
}
|
|
1553
|
+
function computeDelta(currentGuids, previousState) {
|
|
1554
|
+
if (!previousState) {
|
|
1555
|
+
return Array.from(currentGuids);
|
|
1556
|
+
}
|
|
1557
|
+
return detectNewMessages(currentGuids, previousState);
|
|
1558
|
+
}
|
|
1559
|
+
function logDeltaSummary(result) {
|
|
1560
|
+
const logger = createLogger("utils:delta-detection");
|
|
1561
|
+
const newCount = result.newCount;
|
|
1562
|
+
const totalCount = result.totalMessages;
|
|
1563
|
+
const previousCount = result.previousEnrichedCount;
|
|
1564
|
+
if (result.isFirstRun) {
|
|
1565
|
+
logger.info("First enrichment run", { totalMessages: totalCount });
|
|
1566
|
+
humanInfo(`First enrichment run: ${totalCount} messages`);
|
|
1567
|
+
} else {
|
|
1568
|
+
const percentNew = totalCount > 0 ? newCount / totalCount * 100 : 0;
|
|
1569
|
+
logger.info("Delta detected", {
|
|
1570
|
+
newMessages: newCount,
|
|
1571
|
+
percentNew,
|
|
1572
|
+
totalMessages: totalCount,
|
|
1573
|
+
previouslyEnriched: previousCount
|
|
1574
|
+
});
|
|
1575
|
+
humanInfo(`Delta detected: ${newCount} new messages (${percentNew.toFixed(1)}%) of ${totalCount} total messages`);
|
|
1576
|
+
humanInfo(`Previously enriched: ${previousCount}`);
|
|
1577
|
+
}
|
|
1578
|
+
}
|
|
1579
|
+
async function detectDelta(messages, stateFilePath) {
|
|
1580
|
+
const previousState = await loadPreviousState(stateFilePath);
|
|
1581
|
+
const isFirstRun = previousState === null;
|
|
1582
|
+
const currentGuids = extractGuidsFromMessages(messages);
|
|
1583
|
+
const newGuids = computeDelta(currentGuids, previousState);
|
|
1584
|
+
const result = {
|
|
1585
|
+
newGuids,
|
|
1586
|
+
totalMessages: messages.length,
|
|
1587
|
+
previousEnrichedCount: previousState?.enrichedGuids.length ?? 0,
|
|
1588
|
+
newCount: newGuids.length,
|
|
1589
|
+
isFirstRun,
|
|
1590
|
+
state: previousState ?? {
|
|
1591
|
+
version: "1.0",
|
|
1592
|
+
lastEnrichedAt: new Date().toISOString(),
|
|
1593
|
+
totalMessages: messages.length,
|
|
1594
|
+
enrichedGuids: [],
|
|
1595
|
+
pipelineConfig: {
|
|
1596
|
+
configHash: ""
|
|
1597
|
+
},
|
|
1598
|
+
enrichmentStats: null
|
|
1599
|
+
}
|
|
1600
|
+
};
|
|
1601
|
+
logDeltaSummary(result);
|
|
1602
|
+
return result;
|
|
1603
|
+
}
|
|
1604
|
+
function getDeltaStats(result) {
|
|
1605
|
+
const total = result.totalMessages;
|
|
1606
|
+
const newCount = result.newCount;
|
|
1607
|
+
const previousCount = result.previousEnrichedCount;
|
|
1608
|
+
return {
|
|
1609
|
+
total,
|
|
1610
|
+
new: newCount,
|
|
1611
|
+
previous: previousCount,
|
|
1612
|
+
percentNew: total > 0 ? newCount / total * 100 : 0,
|
|
1613
|
+
percentPrevious: total > 0 ? previousCount / total * 100 : 0
|
|
1614
|
+
};
|
|
1615
|
+
}
|
|
1616
|
+
// src/utils/enrichment-merge.ts
|
|
1617
|
+
import { promises as fs3 } from "node:fs";
|
|
1618
|
+
function mergeEnrichments(existingMessages, newMessages, options = {}) {
|
|
1619
|
+
const existingByGuid = new Map;
|
|
1620
|
+
for (const msg of existingMessages) {
|
|
1621
|
+
existingByGuid.set(msg.guid, msg);
|
|
1622
|
+
}
|
|
1623
|
+
let mergedCount = 0;
|
|
1624
|
+
let addedCount = 0;
|
|
1625
|
+
let preservedCount = 0;
|
|
1626
|
+
const processedGuids = new Set;
|
|
1627
|
+
const resultMessages = [];
|
|
1628
|
+
for (const newMsg of newMessages) {
|
|
1629
|
+
const existing = existingByGuid.get(newMsg.guid);
|
|
1630
|
+
if (existing) {
|
|
1631
|
+
if (!processedGuids.has(newMsg.guid)) {
|
|
1632
|
+
const merged = mergeMessageEnrichments(existing, newMsg, options);
|
|
1633
|
+
resultMessages.push(merged);
|
|
1634
|
+
mergedCount++;
|
|
1635
|
+
if (merged.messageKind === "media" && merged.media?.enrichment && merged.media.enrichment.length > 0) {
|
|
1636
|
+
preservedCount++;
|
|
1637
|
+
}
|
|
1638
|
+
processedGuids.add(newMsg.guid);
|
|
1639
|
+
}
|
|
1640
|
+
} else {
|
|
1641
|
+
if (!processedGuids.has(newMsg.guid)) {
|
|
1642
|
+
resultMessages.push(newMsg);
|
|
1643
|
+
addedCount++;
|
|
1644
|
+
processedGuids.add(newMsg.guid);
|
|
1645
|
+
}
|
|
1646
|
+
}
|
|
1647
|
+
}
|
|
1648
|
+
const totalMessages = resultMessages.length;
|
|
1649
|
+
return {
|
|
1650
|
+
messages: resultMessages,
|
|
1651
|
+
statistics: {
|
|
1652
|
+
mergedCount,
|
|
1653
|
+
addedCount,
|
|
1654
|
+
preservedCount,
|
|
1655
|
+
totalMessages,
|
|
1656
|
+
mergedPercentage: totalMessages > 0 ? mergedCount / totalMessages * 100 : 0,
|
|
1657
|
+
addedPercentage: totalMessages > 0 ? addedCount / totalMessages * 100 : 0
|
|
1658
|
+
},
|
|
1659
|
+
mergedCount,
|
|
1660
|
+
addedCount,
|
|
1661
|
+
preservedCount
|
|
1662
|
+
};
|
|
1663
|
+
}
|
|
1664
|
+
function mergeMessageEnrichments(existing, newMsg, options) {
|
|
1665
|
+
if (existing.messageKind !== "media" || !existing.media) {
|
|
1666
|
+
return existing;
|
|
1667
|
+
}
|
|
1668
|
+
if (!newMsg.media?.enrichment || newMsg.media.enrichment.length === 0) {
|
|
1669
|
+
return existing;
|
|
1670
|
+
}
|
|
1671
|
+
if (options.forceRefresh) {
|
|
1672
|
+
return {
|
|
1673
|
+
...existing,
|
|
1674
|
+
media: {
|
|
1675
|
+
...existing.media,
|
|
1676
|
+
enrichment: newMsg.media.enrichment
|
|
1677
|
+
}
|
|
1678
|
+
};
|
|
1679
|
+
}
|
|
1680
|
+
const existingEnrichment = existing.media.enrichment ?? [];
|
|
1681
|
+
const newEnrichment = newMsg.media.enrichment ?? [];
|
|
1682
|
+
const existingKinds = new Set(existingEnrichment.map((e) => e.kind));
|
|
1683
|
+
const mergedEnrichment = [
|
|
1684
|
+
...existingEnrichment,
|
|
1685
|
+
...newEnrichment.filter((e) => !existingKinds.has(e.kind))
|
|
1686
|
+
];
|
|
1687
|
+
return {
|
|
1688
|
+
...existing,
|
|
1689
|
+
media: {
|
|
1690
|
+
...existing.media,
|
|
1691
|
+
enrichment: mergedEnrichment
|
|
1692
|
+
}
|
|
1693
|
+
};
|
|
1694
|
+
}
|
|
1695
|
+
export {
|
|
1696
|
+
validateGeneratedConfig,
|
|
1697
|
+
validateConfigSafe,
|
|
1698
|
+
validateConfig,
|
|
1699
|
+
substituteEnvVars,
|
|
1700
|
+
mergeEnrichments,
|
|
1701
|
+
mergeConfig,
|
|
1702
|
+
logDeltaSummary,
|
|
1703
|
+
loadConfigFile,
|
|
1704
|
+
loadConfig,
|
|
1705
|
+
isRetryableStatus,
|
|
1706
|
+
isConfigCached,
|
|
1707
|
+
is5xx,
|
|
1708
|
+
ingestCSV,
|
|
1709
|
+
getDeltaStats,
|
|
1710
|
+
getDefaultConfigPath,
|
|
1711
|
+
generateConfigContent,
|
|
1712
|
+
extractGuidsFromMessages,
|
|
1713
|
+
discoverConfigFile,
|
|
1714
|
+
detectDelta,
|
|
1715
|
+
detectConfigFormat,
|
|
1716
|
+
dedupAndMerge,
|
|
1717
|
+
createRateLimiter,
|
|
1718
|
+
createExportEnvelope,
|
|
1719
|
+
clearConfigCache,
|
|
1720
|
+
TapbackInfoSchema,
|
|
1721
|
+
ReplyInfoSchema,
|
|
1722
|
+
RateLimiter,
|
|
1723
|
+
MessageCoreSchema,
|
|
1724
|
+
MediaProvenanceSchema,
|
|
1725
|
+
MediaMetaSchema,
|
|
1726
|
+
MediaEnrichmentSchema,
|
|
1727
|
+
DEFAULT_CONFIG,
|
|
1728
|
+
CONFIG_FILE_PATTERNS
|
|
1729
|
+
};
|