@nathanvale/chatline 0.0.1 → 0.0.2-next.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +12 -0
- package/dist/bin/index.js +1 -1
- package/dist/index.js +1 -1
- package/package.json +1 -1
- package/dist/cli/commands/clean.d.ts +0 -17
- package/dist/cli/commands/clean.d.ts.map +0 -1
- package/dist/cli/commands/clean.js +0 -142
- package/dist/cli/commands/clean.js.map +0 -1
- package/dist/cli/commands/doctor.d.ts +0 -17
- package/dist/cli/commands/doctor.d.ts.map +0 -1
- package/dist/cli/commands/doctor.js +0 -202
- package/dist/cli/commands/doctor.js.map +0 -1
- package/dist/cli/commands/enrich-ai.d.ts +0 -17
- package/dist/cli/commands/enrich-ai.d.ts.map +0 -1
- package/dist/cli/commands/enrich-ai.js +0 -371
- package/dist/cli/commands/enrich-ai.js.map +0 -1
- package/dist/cli/commands/index.d.ts +0 -16
- package/dist/cli/commands/index.d.ts.map +0 -1
- package/dist/cli/commands/index.js +0 -16
- package/dist/cli/commands/index.js.map +0 -1
- package/dist/cli/commands/ingest-csv.d.ts +0 -17
- package/dist/cli/commands/ingest-csv.d.ts.map +0 -1
- package/dist/cli/commands/ingest-csv.js +0 -138
- package/dist/cli/commands/ingest-csv.js.map +0 -1
- package/dist/cli/commands/ingest-db.d.ts +0 -17
- package/dist/cli/commands/ingest-db.d.ts.map +0 -1
- package/dist/cli/commands/ingest-db.js +0 -159
- package/dist/cli/commands/ingest-db.js.map +0 -1
- package/dist/cli/commands/init.d.ts +0 -17
- package/dist/cli/commands/init.d.ts.map +0 -1
- package/dist/cli/commands/init.js +0 -110
- package/dist/cli/commands/init.js.map +0 -1
- package/dist/cli/commands/normalize-link.d.ts +0 -16
- package/dist/cli/commands/normalize-link.d.ts.map +0 -1
- package/dist/cli/commands/normalize-link.js +0 -144
- package/dist/cli/commands/normalize-link.js.map +0 -1
- package/dist/cli/commands/render-markdown.d.ts +0 -17
- package/dist/cli/commands/render-markdown.d.ts.map +0 -1
- package/dist/cli/commands/render-markdown.js +0 -218
- package/dist/cli/commands/render-markdown.js.map +0 -1
- package/dist/cli/commands/stats.d.ts +0 -17
- package/dist/cli/commands/stats.d.ts.map +0 -1
- package/dist/cli/commands/stats.js +0 -175
- package/dist/cli/commands/stats.js.map +0 -1
- package/dist/cli/commands/validate.d.ts +0 -17
- package/dist/cli/commands/validate.d.ts.map +0 -1
- package/dist/cli/commands/validate.js +0 -152
- package/dist/cli/commands/validate.js.map +0 -1
- package/dist/cli/index.d.ts +0 -13
- package/dist/cli/index.d.ts.map +0 -1
- package/dist/cli/index.js +0 -121
- package/dist/cli/index.js.map +0 -1
- package/dist/cli/types.d.ts +0 -93
- package/dist/cli/types.d.ts.map +0 -1
- package/dist/cli/types.js +0 -7
- package/dist/cli/types.js.map +0 -1
- package/dist/cli/utils.d.ts +0 -29
- package/dist/cli/utils.d.ts.map +0 -1
- package/dist/cli/utils.js +0 -53
- package/dist/cli/utils.js.map +0 -1
- package/dist/cli.d.ts +0 -9
- package/dist/cli.d.ts.map +0 -1
- package/dist/cli.js +0 -1805
- package/dist/config/generator.d.ts +0 -90
- package/dist/config/generator.d.ts.map +0 -1
- package/dist/config/generator.js +0 -320
- package/dist/config/generator.js.map +0 -1
- package/dist/config/loader.d.ts +0 -107
- package/dist/config/loader.d.ts.map +0 -1
- package/dist/config/loader.js +0 -251
- package/dist/config/loader.js.map +0 -1
- package/dist/config/schema.d.ts +0 -107
- package/dist/config/schema.d.ts.map +0 -1
- package/dist/config/schema.js +0 -169
- package/dist/config/schema.js.map +0 -1
- package/dist/enrich/audio-transcription.d.ts +0 -77
- package/dist/enrich/audio-transcription.d.ts.map +0 -1
- package/dist/enrich/audio-transcription.js +0 -370
- package/dist/enrich/audio-transcription.js.map +0 -1
- package/dist/enrich/checkpoint.d.ts +0 -137
- package/dist/enrich/checkpoint.d.ts.map +0 -1
- package/dist/enrich/checkpoint.js +0 -205
- package/dist/enrich/checkpoint.js.map +0 -1
- package/dist/enrich/idempotency.d.ts +0 -90
- package/dist/enrich/idempotency.d.ts.map +0 -1
- package/dist/enrich/idempotency.js +0 -188
- package/dist/enrich/idempotency.js.map +0 -1
- package/dist/enrich/image-analysis.d.ts +0 -62
- package/dist/enrich/image-analysis.d.ts.map +0 -1
- package/dist/enrich/image-analysis.js +0 -264
- package/dist/enrich/image-analysis.js.map +0 -1
- package/dist/enrich/index.d.ts +0 -60
- package/dist/enrich/index.d.ts.map +0 -1
- package/dist/enrich/index.js +0 -74
- package/dist/enrich/index.js.map +0 -1
- package/dist/enrich/link-enrichment.d.ts +0 -37
- package/dist/enrich/link-enrichment.d.ts.map +0 -1
- package/dist/enrich/link-enrichment.js +0 -202
- package/dist/enrich/link-enrichment.js.map +0 -1
- package/dist/enrich/pdf-video-handling.d.ts +0 -49
- package/dist/enrich/pdf-video-handling.d.ts.map +0 -1
- package/dist/enrich/pdf-video-handling.js +0 -325
- package/dist/enrich/pdf-video-handling.js.map +0 -1
- package/dist/enrich/progress-tracker.d.ts +0 -120
- package/dist/enrich/progress-tracker.d.ts.map +0 -1
- package/dist/enrich/progress-tracker.js +0 -220
- package/dist/enrich/progress-tracker.js.map +0 -1
- package/dist/enrich/providers/firecrawl.d.ts +0 -18
- package/dist/enrich/providers/firecrawl.d.ts.map +0 -1
- package/dist/enrich/providers/firecrawl.js +0 -48
- package/dist/enrich/providers/firecrawl.js.map +0 -1
- package/dist/enrich/providers/generic.d.ts +0 -16
- package/dist/enrich/providers/generic.d.ts.map +0 -1
- package/dist/enrich/providers/generic.js +0 -36
- package/dist/enrich/providers/generic.js.map +0 -1
- package/dist/enrich/providers/index.d.ts +0 -14
- package/dist/enrich/providers/index.d.ts.map +0 -1
- package/dist/enrich/providers/index.js +0 -13
- package/dist/enrich/providers/index.js.map +0 -1
- package/dist/enrich/providers/instagram.d.ts +0 -16
- package/dist/enrich/providers/instagram.d.ts.map +0 -1
- package/dist/enrich/providers/instagram.js +0 -43
- package/dist/enrich/providers/instagram.js.map +0 -1
- package/dist/enrich/providers/spotify.d.ts +0 -16
- package/dist/enrich/providers/spotify.d.ts.map +0 -1
- package/dist/enrich/providers/spotify.js +0 -45
- package/dist/enrich/providers/spotify.js.map +0 -1
- package/dist/enrich/providers/twitter.d.ts +0 -16
- package/dist/enrich/providers/twitter.d.ts.map +0 -1
- package/dist/enrich/providers/twitter.js +0 -43
- package/dist/enrich/providers/twitter.js.map +0 -1
- package/dist/enrich/providers/types.d.ts +0 -47
- package/dist/enrich/providers/types.d.ts.map +0 -1
- package/dist/enrich/providers/types.js +0 -15
- package/dist/enrich/providers/types.js.map +0 -1
- package/dist/enrich/providers/youtube.d.ts +0 -16
- package/dist/enrich/providers/youtube.d.ts.map +0 -1
- package/dist/enrich/providers/youtube.js +0 -43
- package/dist/enrich/providers/youtube.js.map +0 -1
- package/dist/enrich/rate-limiting.d.ts +0 -118
- package/dist/enrich/rate-limiting.d.ts.map +0 -1
- package/dist/enrich/rate-limiting.js +0 -258
- package/dist/enrich/rate-limiting.js.map +0 -1
- package/dist/index.d.ts.map +0 -1
- package/dist/index.js.map +0 -1
- package/dist/ingest/dedup-merge.d.ts +0 -82
- package/dist/ingest/dedup-merge.d.ts.map +0 -1
- package/dist/ingest/dedup-merge.js +0 -262
- package/dist/ingest/dedup-merge.js.map +0 -1
- package/dist/ingest/ingest-csv.d.ts +0 -62
- package/dist/ingest/ingest-csv.d.ts.map +0 -1
- package/dist/ingest/ingest-csv.js +0 -300
- package/dist/ingest/ingest-csv.js.map +0 -1
- package/dist/ingest/ingest-db.d.ts +0 -64
- package/dist/ingest/ingest-db.d.ts.map +0 -1
- package/dist/ingest/ingest-db.js +0 -172
- package/dist/ingest/ingest-db.js.map +0 -1
- package/dist/ingest/link-replies-and-tapbacks.d.ts +0 -53
- package/dist/ingest/link-replies-and-tapbacks.d.ts.map +0 -1
- package/dist/ingest/link-replies-and-tapbacks.js +0 -381
- package/dist/ingest/link-replies-and-tapbacks.js.map +0 -1
- package/dist/normalize/date-converters.d.ts +0 -45
- package/dist/normalize/date-converters.d.ts.map +0 -1
- package/dist/normalize/date-converters.js +0 -166
- package/dist/normalize/date-converters.js.map +0 -1
- package/dist/normalize/path-validator.d.ts +0 -65
- package/dist/normalize/path-validator.d.ts.map +0 -1
- package/dist/normalize/path-validator.js +0 -221
- package/dist/normalize/path-validator.js.map +0 -1
- package/dist/normalize/validate-normalized.d.ts +0 -45
- package/dist/normalize/validate-normalized.d.ts.map +0 -1
- package/dist/normalize/validate-normalized.js +0 -144
- package/dist/normalize/validate-normalized.js.map +0 -1
- package/dist/render/embeds-blockquotes.d.ts +0 -84
- package/dist/render/embeds-blockquotes.d.ts.map +0 -1
- package/dist/render/embeds-blockquotes.js +0 -204
- package/dist/render/embeds-blockquotes.js.map +0 -1
- package/dist/render/grouping.d.ts +0 -78
- package/dist/render/grouping.d.ts.map +0 -1
- package/dist/render/grouping.js +0 -134
- package/dist/render/grouping.js.map +0 -1
- package/dist/render/index.d.ts +0 -47
- package/dist/render/index.d.ts.map +0 -1
- package/dist/render/index.js +0 -245
- package/dist/render/index.js.map +0 -1
- package/dist/render/reply-rendering.d.ts +0 -88
- package/dist/render/reply-rendering.d.ts.map +0 -1
- package/dist/render/reply-rendering.js +0 -196
- package/dist/render/reply-rendering.js.map +0 -1
- package/dist/schema/message.d.ts +0 -125
- package/dist/schema/message.d.ts.map +0 -1
- package/dist/schema/message.js +0 -331
- package/dist/schema/message.js.map +0 -1
- package/dist/utils/delta-detection.d.ts +0 -107
- package/dist/utils/delta-detection.d.ts.map +0 -1
- package/dist/utils/delta-detection.js +0 -199
- package/dist/utils/delta-detection.js.map +0 -1
- package/dist/utils/enrichment-merge.d.ts +0 -135
- package/dist/utils/enrichment-merge.d.ts.map +0 -1
- package/dist/utils/enrichment-merge.js +0 -280
- package/dist/utils/enrichment-merge.js.map +0 -1
- package/dist/utils/human.d.ts +0 -15
- package/dist/utils/human.d.ts.map +0 -1
- package/dist/utils/human.js +0 -27
- package/dist/utils/human.js.map +0 -1
- package/dist/utils/incremental-state.d.ts +0 -133
- package/dist/utils/incremental-state.d.ts.map +0 -1
- package/dist/utils/incremental-state.js +0 -237
- package/dist/utils/incremental-state.js.map +0 -1
- package/dist/utils/logger.d.ts +0 -40
- package/dist/utils/logger.d.ts.map +0 -1
- package/dist/utils/logger.js +0 -176
- package/dist/utils/logger.js.map +0 -1
|
@@ -1,64 +0,0 @@
|
|
|
1
|
-
import type { ExportEnvelope, Message } from '../schema/message.js';
|
|
2
|
-
export type DBMessage = {
|
|
3
|
-
guid: string;
|
|
4
|
-
rowid?: number;
|
|
5
|
-
text?: string | null;
|
|
6
|
-
is_from_me: number;
|
|
7
|
-
date: number;
|
|
8
|
-
chat_id?: string;
|
|
9
|
-
handle?: string;
|
|
10
|
-
service?: string;
|
|
11
|
-
subject?: string | null;
|
|
12
|
-
attachments?: DBAttachment[];
|
|
13
|
-
[key: string]: unknown;
|
|
14
|
-
};
|
|
15
|
-
export type DBAttachment = {
|
|
16
|
-
id: string;
|
|
17
|
-
filename: string;
|
|
18
|
-
mime_type?: string;
|
|
19
|
-
uti?: string | null;
|
|
20
|
-
copied_path?: string;
|
|
21
|
-
total_bytes?: number;
|
|
22
|
-
[key: string]: unknown;
|
|
23
|
-
};
|
|
24
|
-
export type IngestOptions = {
|
|
25
|
-
attachmentRoots: string[];
|
|
26
|
-
};
|
|
27
|
-
/**
|
|
28
|
-
* Split a single DB message into multiple Message objects
|
|
29
|
-
* - 1 text message (if text exists)
|
|
30
|
-
* - N media messages (one per attachment)
|
|
31
|
-
* All parts share same groupGuid (original DB guid) and timestamps
|
|
32
|
-
*/
|
|
33
|
-
export declare function splitDBMessage(dbMessage: DBMessage, lineNumber: number, _options: IngestOptions): Message[];
|
|
34
|
-
/**
|
|
35
|
-
* Generate stable part GUID using format: p:<index>/<original_guid>
|
|
36
|
-
* This ensures:
|
|
37
|
-
* - Deterministic generation (same input → same output)
|
|
38
|
-
* - Stable ordering (index reflects order in split)
|
|
39
|
-
* - Uniqueness within a message's parts
|
|
40
|
-
*/
|
|
41
|
-
export declare function generatePartGUID(originalGuid: string, index: number): string;
|
|
42
|
-
/**
|
|
43
|
-
* Convert Apple epoch timestamp to ISO 8601 UTC with Z suffix
|
|
44
|
-
* Apple epoch = seconds since 2001-01-01 00:00:00 UTC
|
|
45
|
-
*
|
|
46
|
-
* Handles:
|
|
47
|
-
* - Seconds precision: 718110777
|
|
48
|
-
* - Milliseconds precision: 718110777000
|
|
49
|
-
* - Nanoseconds precision: 718110777123456789 (truncate to seconds)
|
|
50
|
-
*/
|
|
51
|
-
export declare function convertAppleEpochToISO8601(appleEpoch: number): string | null;
|
|
52
|
-
/**
|
|
53
|
-
* Infer media kind from MIME type
|
|
54
|
-
*/
|
|
55
|
-
export declare function inferMediaKind(mimeType: string): 'image' | 'audio' | 'video' | 'pdf' | 'unknown';
|
|
56
|
-
/**
|
|
57
|
-
* Main entry point: Ingest DB messages and split into normalized schema
|
|
58
|
-
*/
|
|
59
|
-
export declare function ingestDBMessages(dbMessages: DBMessage[], options: IngestOptions): Message[];
|
|
60
|
-
/**
|
|
61
|
-
* Create export envelope for DB ingestion output
|
|
62
|
-
*/
|
|
63
|
-
export declare function createExportEnvelope(messages: Message[]): ExportEnvelope;
|
|
64
|
-
//# sourceMappingURL=ingest-db.d.ts.map
|
|
@@ -1 +0,0 @@
|
|
|
1
|
-
{"version":3,"file":"ingest-db.d.ts","sourceRoot":"","sources":["../../src/ingest/ingest-db.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,cAAc,EAAE,OAAO,EAAE,MAAM,sBAAsB,CAAA;AAEnE,MAAM,MAAM,SAAS,GAAG;IACvB,IAAI,EAAE,MAAM,CAAA;IACZ,KAAK,CAAC,EAAE,MAAM,CAAA;IACd,IAAI,CAAC,EAAE,MAAM,GAAG,IAAI,CAAA;IACpB,UAAU,EAAE,MAAM,CAAA;IAClB,IAAI,EAAE,MAAM,CAAA;IACZ,OAAO,CAAC,EAAE,MAAM,CAAA;IAChB,MAAM,CAAC,EAAE,MAAM,CAAA;IACf,OAAO,CAAC,EAAE,MAAM,CAAA;IAChB,OAAO,CAAC,EAAE,MAAM,GAAG,IAAI,CAAA;IACvB,WAAW,CAAC,EAAE,YAAY,EAAE,CAAA;IAC5B,CAAC,GAAG,EAAE,MAAM,GAAG,OAAO,CAAA;CACtB,CAAA;AAED,MAAM,MAAM,YAAY,GAAG;IAC1B,EAAE,EAAE,MAAM,CAAA;IACV,QAAQ,EAAE,MAAM,CAAA;IAChB,SAAS,CAAC,EAAE,MAAM,CAAA;IAClB,GAAG,CAAC,EAAE,MAAM,GAAG,IAAI,CAAA;IACnB,WAAW,CAAC,EAAE,MAAM,CAAA;IACpB,WAAW,CAAC,EAAE,MAAM,CAAA;IACpB,CAAC,GAAG,EAAE,MAAM,GAAG,OAAO,CAAA;CACtB,CAAA;AAED,MAAM,MAAM,aAAa,GAAG;IAC3B,eAAe,EAAE,MAAM,EAAE,CAAA;CACzB,CAAA;AASD;;;;;GAKG;AACH,wBAAgB,cAAc,CAC7B,SAAS,EAAE,SAAS,EACpB,UAAU,EAAE,MAAM,EAClB,QAAQ,EAAE,aAAa,GACrB,OAAO,EAAE,CAyEX;AAED;;;;;;GAMG;AACH,wBAAgB,gBAAgB,CAAC,YAAY,EAAE,MAAM,EAAE,KAAK,EAAE,MAAM,GAAG,MAAM,CAE5E;AAED;;;;;;;;GAQG;AACH,wBAAgB,0BAA0B,CAAC,UAAU,EAAE,MAAM,GAAG,MAAM,GAAG,IAAI,CA6B5E;AAED;;GAEG;AACH,wBAAgB,cAAc,CAC7B,QAAQ,EAAE,MAAM,GACd,OAAO,GAAG,OAAO,GAAG,OAAO,GAAG,KAAK,GAAG,SAAS,CASjD;AAED;;GAEG;AACH,wBAAgB,gBAAgB,CAC/B,UAAU,EAAE,SAAS,EAAE,EACvB,OAAO,EAAE,aAAa,GACpB,OAAO,EAAE,CAWX;AAED;;GAEG;AACH,wBAAgB,oBAAoB,CAAC,QAAQ,EAAE,OAAO,EAAE,GAAG,cAAc,CAOxE"}
|
package/dist/ingest/ingest-db.js
DELETED
|
@@ -1,172 +0,0 @@
|
|
|
1
|
-
/**
|
|
2
|
-
* Apple epoch reference: seconds since 2001-01-01 00:00:00 UTC
|
|
3
|
-
* Unix epoch reference: seconds since 1970-01-01 00:00:00 UTC
|
|
4
|
-
* Difference: 31 years = 978307200 seconds
|
|
5
|
-
*/
|
|
6
|
-
const APPLE_EPOCH_OFFSET = 978307200;
|
|
7
|
-
/**
|
|
8
|
-
* Split a single DB message into multiple Message objects
|
|
9
|
-
* - 1 text message (if text exists)
|
|
10
|
-
* - N media messages (one per attachment)
|
|
11
|
-
* All parts share same groupGuid (original DB guid) and timestamps
|
|
12
|
-
*/
|
|
13
|
-
export function splitDBMessage(dbMessage, lineNumber, _options) {
|
|
14
|
-
const messages = [];
|
|
15
|
-
const originalGuid = dbMessage.guid;
|
|
16
|
-
const attachments = dbMessage.attachments || [];
|
|
17
|
-
const date = convertAppleEpochToISO8601(dbMessage.date);
|
|
18
|
-
const isFromMe = dbMessage.is_from_me === 1;
|
|
19
|
-
if (!date)
|
|
20
|
-
return []; // Skip invalid dates
|
|
21
|
-
// Common fields for all parts
|
|
22
|
-
const baseMessage = {
|
|
23
|
-
isFromMe,
|
|
24
|
-
date,
|
|
25
|
-
groupGuid: originalGuid,
|
|
26
|
-
};
|
|
27
|
-
// Conditionally add optional fields to satisfy exactOptionalPropertyTypes
|
|
28
|
-
if (dbMessage.handle)
|
|
29
|
-
baseMessage.handle = dbMessage.handle;
|
|
30
|
-
if (dbMessage.chat_id)
|
|
31
|
-
baseMessage.chatId = dbMessage.chat_id;
|
|
32
|
-
if (dbMessage.service)
|
|
33
|
-
baseMessage.service = dbMessage.service;
|
|
34
|
-
if (dbMessage.subject)
|
|
35
|
-
baseMessage.subject = dbMessage.subject;
|
|
36
|
-
// Part index counter (0 = text, 1+ = media)
|
|
37
|
-
let partIndex = 0;
|
|
38
|
-
// 1. Create text message if text exists
|
|
39
|
-
if (dbMessage.text) {
|
|
40
|
-
const textMessage = {
|
|
41
|
-
...baseMessage,
|
|
42
|
-
guid: generatePartGUID(originalGuid, partIndex),
|
|
43
|
-
messageKind: 'text',
|
|
44
|
-
text: dbMessage.text,
|
|
45
|
-
exportMetadata: {
|
|
46
|
-
source: 'db',
|
|
47
|
-
lineNumber,
|
|
48
|
-
parentGUID: originalGuid,
|
|
49
|
-
partIndex,
|
|
50
|
-
},
|
|
51
|
-
};
|
|
52
|
-
messages.push(textMessage);
|
|
53
|
-
partIndex++;
|
|
54
|
-
}
|
|
55
|
-
// 2. Create media messages for each attachment
|
|
56
|
-
attachments.forEach((att, attachmentIndex) => {
|
|
57
|
-
const mediaMessage = {
|
|
58
|
-
...baseMessage,
|
|
59
|
-
guid: generatePartGUID(originalGuid, partIndex),
|
|
60
|
-
messageKind: 'media',
|
|
61
|
-
media: {
|
|
62
|
-
id: att.id || `media:${originalGuid}:${attachmentIndex}`,
|
|
63
|
-
filename: att.filename || 'unknown',
|
|
64
|
-
path: att.copied_path || null,
|
|
65
|
-
mimeType: att.mime_type || undefined,
|
|
66
|
-
uti: att.uti || undefined,
|
|
67
|
-
size: att.total_bytes || undefined,
|
|
68
|
-
mediaKind: inferMediaKind(att.mime_type || ''),
|
|
69
|
-
},
|
|
70
|
-
exportMetadata: {
|
|
71
|
-
source: 'db',
|
|
72
|
-
lineNumber,
|
|
73
|
-
parentGUID: originalGuid,
|
|
74
|
-
partIndex,
|
|
75
|
-
attachmentIndex,
|
|
76
|
-
},
|
|
77
|
-
};
|
|
78
|
-
messages.push(mediaMessage);
|
|
79
|
-
partIndex++;
|
|
80
|
-
});
|
|
81
|
-
return messages;
|
|
82
|
-
}
|
|
83
|
-
/**
|
|
84
|
-
* Generate stable part GUID using format: p:<index>/<original_guid>
|
|
85
|
-
* This ensures:
|
|
86
|
-
* - Deterministic generation (same input → same output)
|
|
87
|
-
* - Stable ordering (index reflects order in split)
|
|
88
|
-
* - Uniqueness within a message's parts
|
|
89
|
-
*/
|
|
90
|
-
export function generatePartGUID(originalGuid, index) {
|
|
91
|
-
return `p:${index}/${originalGuid}`;
|
|
92
|
-
}
|
|
93
|
-
/**
|
|
94
|
-
* Convert Apple epoch timestamp to ISO 8601 UTC with Z suffix
|
|
95
|
-
* Apple epoch = seconds since 2001-01-01 00:00:00 UTC
|
|
96
|
-
*
|
|
97
|
-
* Handles:
|
|
98
|
-
* - Seconds precision: 718110777
|
|
99
|
-
* - Milliseconds precision: 718110777000
|
|
100
|
-
* - Nanoseconds precision: 718110777123456789 (truncate to seconds)
|
|
101
|
-
*/
|
|
102
|
-
export function convertAppleEpochToISO8601(appleEpoch) {
|
|
103
|
-
try {
|
|
104
|
-
// Determine if input is in seconds, milliseconds, or nanoseconds
|
|
105
|
-
// Apple epoch realistic ranges:
|
|
106
|
-
// - Seconds: 0 to ~5,000,000,000 (2001 to ~2159)
|
|
107
|
-
// - Milliseconds: 0 to ~5,000,000,000,000 (2001 to ~2159)
|
|
108
|
-
// - Nanoseconds: anything > 1e15
|
|
109
|
-
let seconds;
|
|
110
|
-
if (appleEpoch > 1000000000000000) {
|
|
111
|
-
// Clearly nanoseconds (> 1 quadrillion)
|
|
112
|
-
seconds = Math.floor(appleEpoch / 1000000000);
|
|
113
|
-
}
|
|
114
|
-
else if (appleEpoch > 100000000000) {
|
|
115
|
-
// Likely milliseconds (> 100 billion, beyond realistic seconds range)
|
|
116
|
-
seconds = Math.floor(appleEpoch / 1000);
|
|
117
|
-
}
|
|
118
|
-
else {
|
|
119
|
-
// Seconds (includes values up to ~5 billion, covering years 2001-2159)
|
|
120
|
-
seconds = appleEpoch;
|
|
121
|
-
}
|
|
122
|
-
// Convert Apple epoch to Unix epoch
|
|
123
|
-
const unixSeconds = seconds + APPLE_EPOCH_OFFSET;
|
|
124
|
-
// Create Date and convert to ISO 8601
|
|
125
|
-
const date = new Date(unixSeconds * 1000);
|
|
126
|
-
return date.toISOString();
|
|
127
|
-
}
|
|
128
|
-
catch {
|
|
129
|
-
return null;
|
|
130
|
-
}
|
|
131
|
-
}
|
|
132
|
-
/**
|
|
133
|
-
* Infer media kind from MIME type
|
|
134
|
-
*/
|
|
135
|
-
export function inferMediaKind(mimeType) {
|
|
136
|
-
if (!mimeType)
|
|
137
|
-
return 'unknown';
|
|
138
|
-
if (mimeType.startsWith('image/'))
|
|
139
|
-
return 'image';
|
|
140
|
-
if (mimeType.startsWith('audio/'))
|
|
141
|
-
return 'audio';
|
|
142
|
-
if (mimeType.startsWith('video/'))
|
|
143
|
-
return 'video';
|
|
144
|
-
if (mimeType.includes('pdf'))
|
|
145
|
-
return 'pdf';
|
|
146
|
-
return 'unknown';
|
|
147
|
-
}
|
|
148
|
-
/**
|
|
149
|
-
* Main entry point: Ingest DB messages and split into normalized schema
|
|
150
|
-
*/
|
|
151
|
-
export function ingestDBMessages(dbMessages, options) {
|
|
152
|
-
const messages = [];
|
|
153
|
-
let lineNumber = 1;
|
|
154
|
-
for (const dbMsg of dbMessages) {
|
|
155
|
-
const splitMessages = splitDBMessage(dbMsg, lineNumber, options);
|
|
156
|
-
messages.push(...splitMessages);
|
|
157
|
-
lineNumber++;
|
|
158
|
-
}
|
|
159
|
-
return messages;
|
|
160
|
-
}
|
|
161
|
-
/**
|
|
162
|
-
* Create export envelope for DB ingestion output
|
|
163
|
-
*/
|
|
164
|
-
export function createExportEnvelope(messages) {
|
|
165
|
-
return {
|
|
166
|
-
schemaVersion: '2.0.0',
|
|
167
|
-
source: 'db',
|
|
168
|
-
createdAt: new Date().toISOString(),
|
|
169
|
-
messages,
|
|
170
|
-
};
|
|
171
|
-
}
|
|
172
|
-
//# sourceMappingURL=ingest-db.js.map
|
|
@@ -1 +0,0 @@
|
|
|
1
|
-
{"version":3,"file":"ingest-db.js","sourceRoot":"","sources":["../../src/ingest/ingest-db.ts"],"names":[],"mappings":"AA8BA;;;;GAIG;AACH,MAAM,kBAAkB,GAAG,SAAS,CAAA;AAEpC;;;;;GAKG;AACH,MAAM,UAAU,cAAc,CAC7B,SAAoB,EACpB,UAAkB,EAClB,QAAuB;IAEvB,MAAM,QAAQ,GAAc,EAAE,CAAA;IAC9B,MAAM,YAAY,GAAG,SAAS,CAAC,IAAI,CAAA;IACnC,MAAM,WAAW,GAAG,SAAS,CAAC,WAAW,IAAI,EAAE,CAAA;IAC/C,MAAM,IAAI,GAAG,0BAA0B,CAAC,SAAS,CAAC,IAAI,CAAC,CAAA;IACvD,MAAM,QAAQ,GAAG,SAAS,CAAC,UAAU,KAAK,CAAC,CAAA;IAE3C,IAAI,CAAC,IAAI;QAAE,OAAO,EAAE,CAAA,CAAC,qBAAqB;IAE1C,8BAA8B;IAC9B,MAAM,WAAW,GAAqB;QACrC,QAAQ;QACR,IAAI;QACJ,SAAS,EAAE,YAAY;KACvB,CAAA;IAED,0EAA0E;IAC1E,IAAI,SAAS,CAAC,MAAM;QAAE,WAAW,CAAC,MAAM,GAAG,SAAS,CAAC,MAAM,CAAA;IAC3D,IAAI,SAAS,CAAC,OAAO;QAAE,WAAW,CAAC,MAAM,GAAG,SAAS,CAAC,OAAO,CAAA;IAC7D,IAAI,SAAS,CAAC,OAAO;QAAE,WAAW,CAAC,OAAO,GAAG,SAAS,CAAC,OAAO,CAAA;IAC9D,IAAI,SAAS,CAAC,OAAO;QAAE,WAAW,CAAC,OAAO,GAAG,SAAS,CAAC,OAAO,CAAA;IAE9D,4CAA4C;IAC5C,IAAI,SAAS,GAAG,CAAC,CAAA;IAEjB,wCAAwC;IACxC,IAAI,SAAS,CAAC,IAAI,EAAE,CAAC;QACpB,MAAM,WAAW,GAAY;YAC5B,GAAG,WAAW;YACd,IAAI,EAAE,gBAAgB,CAAC,YAAY,EAAE,SAAS,CAAC;YAC/C,WAAW,EAAE,MAAM;YACnB,IAAI,EAAE,SAAS,CAAC,IAAI;YACpB,cAAc,EAAE;gBACf,MAAM,EAAE,IAAI;gBACZ,UAAU;gBACV,UAAU,EAAE,YAAY;gBACxB,SAAS;aACT;SACU,CAAA;QAEZ,QAAQ,CAAC,IAAI,CAAC,WAAW,CAAC,CAAA;QAC1B,SAAS,EAAE,CAAA;IACZ,CAAC;IAED,+CAA+C;IAC/C,WAAW,CAAC,OAAO,CAAC,CAAC,GAAG,EAAE,eAAe,EAAE,EAAE;QAC5C,MAAM,YAAY,GAAY;YAC7B,GAAG,WAAW;YACd,IAAI,EAAE,gBAAgB,CAAC,YAAY,EAAE,SAAS,CAAC;YAC/C,WAAW,EAAE,OAAO;YACpB,KAAK,EAAE;gBACN,EAAE,EAAE,GAAG,CAAC,EAAE,IAAI,SAAS,YAAY,IAAI,eAAe,EAAE;gBACxD,QAAQ,EAAE,GAAG,CAAC,QAAQ,IAAI,SAAS;gBACnC,IAAI,EAAE,GAAG,CAAC,WAAW,IAAI,IAAI;gBAC7B,QAAQ,EAAE,GAAG,CAAC,SAAS,IAAI,SAAS;gBACpC,GAAG,EAAE,GAAG,CAAC,GAAG,IAAI,SAAS;gBACzB,IAAI,EAAE,GAAG,CAAC,WAAW,IAAI,SAAS;gBAClC,SAAS,EAAE,cAAc,CAAC,GAAG,CAAC,SAAS,IAAI,EAAE,CAAC;aAC9C;YACD,cAAc,EAAE;gBACf,MAAM,EAAE,IAAI;gBACZ,UAAU;gBACV,UAAU,EAAE,YAAY;gBACxB,SAAS;gBACT,eAAe;aACf;SACU,CAAA;QAEZ,QAAQ,CAAC,IAAI,CAAC,YAAY,CAAC,CAAA;QAC3B,SAAS,EAAE,CAAA;IACZ,CAAC,CAAC,CAAA;IAEF,OAAO,QAAQ,CAAA;AAChB,CAAC;AAED;;;;;;GAMG;AACH,MAAM,UAAU,gBAAgB,CAAC,YAAoB,EAAE,KAAa;IACnE,OAAO,KAAK,KAAK,IAAI,YAAY,EAAE,CAAA;AACpC,CAAC;AAED;;;;;;;;GAQG;AACH,MAAM,UAAU,0BAA0B,CAAC,UAAkB;IAC5D,IAAI,CAAC;QACJ,iEAAiE;QACjE,gCAAgC;QAChC,iDAAiD;QACjD,0DAA0D;QAC1D,iCAAiC;QACjC,IAAI,OAAe,CAAA;QAEnB,IAAI,UAAU,GAAG,gBAAgB,EAAE,CAAC;YACnC,wCAAwC;YACxC,OAAO,GAAG,IAAI,CAAC,KAAK,CAAC,UAAU,GAAG,UAAU,CAAC,CAAA;QAC9C,CAAC;aAAM,IAAI,UAAU,GAAG,YAAY,EAAE,CAAC;YACtC,sEAAsE;YACtE,OAAO,GAAG,IAAI,CAAC,KAAK,CAAC,UAAU,GAAG,IAAI,CAAC,CAAA;QACxC,CAAC;aAAM,CAAC;YACP,uEAAuE;YACvE,OAAO,GAAG,UAAU,CAAA;QACrB,CAAC;QAED,oCAAoC;QACpC,MAAM,WAAW,GAAG,OAAO,GAAG,kBAAkB,CAAA;QAEhD,sCAAsC;QACtC,MAAM,IAAI,GAAG,IAAI,IAAI,CAAC,WAAW,GAAG,IAAI,CAAC,CAAA;QACzC,OAAO,IAAI,CAAC,WAAW,EAAE,CAAA;IAC1B,CAAC;IAAC,MAAM,CAAC;QACR,OAAO,IAAI,CAAA;IACZ,CAAC;AACF,CAAC;AAED;;GAEG;AACH,MAAM,UAAU,cAAc,CAC7B,QAAgB;IAEhB,IAAI,CAAC,QAAQ;QAAE,OAAO,SAAS,CAAA;IAE/B,IAAI,QAAQ,CAAC,UAAU,CAAC,QAAQ,CAAC;QAAE,OAAO,OAAO,CAAA;IACjD,IAAI,QAAQ,CAAC,UAAU,CAAC,QAAQ,CAAC;QAAE,OAAO,OAAO,CAAA;IACjD,IAAI,QAAQ,CAAC,UAAU,CAAC,QAAQ,CAAC;QAAE,OAAO,OAAO,CAAA;IACjD,IAAI,QAAQ,CAAC,QAAQ,CAAC,KAAK,CAAC;QAAE,OAAO,KAAK,CAAA;IAE1C,OAAO,SAAS,CAAA;AACjB,CAAC;AAED;;GAEG;AACH,MAAM,UAAU,gBAAgB,CAC/B,UAAuB,EACvB,OAAsB;IAEtB,MAAM,QAAQ,GAAc,EAAE,CAAA;IAC9B,IAAI,UAAU,GAAG,CAAC,CAAA;IAElB,KAAK,MAAM,KAAK,IAAI,UAAU,EAAE,CAAC;QAChC,MAAM,aAAa,GAAG,cAAc,CAAC,KAAK,EAAE,UAAU,EAAE,OAAO,CAAC,CAAA;QAChE,QAAQ,CAAC,IAAI,CAAC,GAAG,aAAa,CAAC,CAAA;QAC/B,UAAU,EAAE,CAAA;IACb,CAAC;IAED,OAAO,QAAQ,CAAA;AAChB,CAAC;AAED;;GAEG;AACH,MAAM,UAAU,oBAAoB,CAAC,QAAmB;IACvD,OAAO;QACN,aAAa,EAAE,OAAO;QACtB,MAAM,EAAE,IAAI;QACZ,SAAS,EAAE,IAAI,IAAI,EAAE,CAAC,WAAW,EAAE;QACnC,QAAQ;KACR,CAAA;AACF,CAAC"}
|
|
@@ -1,53 +0,0 @@
|
|
|
1
|
-
import type { Message } from '#schema/message';
|
|
2
|
-
type ScoredCandidate = {
|
|
3
|
-
message: Message;
|
|
4
|
-
score: number;
|
|
5
|
-
reasons: string[];
|
|
6
|
-
};
|
|
7
|
-
type AmbiguousLink = {
|
|
8
|
-
messageGuid: string;
|
|
9
|
-
selectedTarget: string;
|
|
10
|
-
candidates: ScoredCandidate[];
|
|
11
|
-
tieCount: number;
|
|
12
|
-
confidenceScore: number;
|
|
13
|
-
};
|
|
14
|
-
type LinkingOptions = {
|
|
15
|
-
trackAmbiguous?: boolean;
|
|
16
|
-
minConfidenceThreshold?: number;
|
|
17
|
-
};
|
|
18
|
-
type LinkingResult = {
|
|
19
|
-
messages: Message[];
|
|
20
|
-
ambiguousLinks?: AmbiguousLink[];
|
|
21
|
-
};
|
|
22
|
-
/**
|
|
23
|
-
* AC01 + AC02: Link replies to their parent messages
|
|
24
|
-
*
|
|
25
|
-
* Primary: DB association_guid when present
|
|
26
|
-
* Fallback: Heuristics using timestamp and content matching
|
|
27
|
-
*/
|
|
28
|
-
export declare function linkRepliesToParents(messages: Message[], options?: LinkingOptions): Message[] | LinkingResult;
|
|
29
|
-
/**
|
|
30
|
-
* AC03: Link tapbacks to their parent messages
|
|
31
|
-
*
|
|
32
|
-
* Primary: DB association_guid when present
|
|
33
|
-
* Fallback: Heuristics preferring media messages
|
|
34
|
-
*/
|
|
35
|
-
export declare function linkTapbacksToParents(messages: Message[], options?: LinkingOptions): Message[] | LinkingResult;
|
|
36
|
-
/**
|
|
37
|
-
* AC04: Detect and report ambiguous links with confidence scores
|
|
38
|
-
*/
|
|
39
|
-
export declare function detectAmbiguousLinks(messages: Message[]): {
|
|
40
|
-
tieCount: number;
|
|
41
|
-
ambiguousMessages: {
|
|
42
|
-
messageGuid: string;
|
|
43
|
-
selectedTarget: string;
|
|
44
|
-
tieCount: number;
|
|
45
|
-
topCandidates: {
|
|
46
|
-
guid: string;
|
|
47
|
-
score: number;
|
|
48
|
-
reasons: string[];
|
|
49
|
-
}[];
|
|
50
|
-
}[];
|
|
51
|
-
};
|
|
52
|
-
export type { LinkingResult, AmbiguousLink };
|
|
53
|
-
//# sourceMappingURL=link-replies-and-tapbacks.d.ts.map
|
|
@@ -1 +0,0 @@
|
|
|
1
|
-
{"version":3,"file":"link-replies-and-tapbacks.d.ts","sourceRoot":"","sources":["../../src/ingest/link-replies-and-tapbacks.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,OAAO,EAAE,MAAM,iBAAiB,CAAA;AAiB9C,KAAK,eAAe,GAAG;IACtB,OAAO,EAAE,OAAO,CAAA;IAChB,KAAK,EAAE,MAAM,CAAA;IACb,OAAO,EAAE,MAAM,EAAE,CAAA;CACjB,CAAA;AAED,KAAK,aAAa,GAAG;IACpB,WAAW,EAAE,MAAM,CAAA;IACnB,cAAc,EAAE,MAAM,CAAA;IACtB,UAAU,EAAE,eAAe,EAAE,CAAA;IAC7B,QAAQ,EAAE,MAAM,CAAA;IAChB,eAAe,EAAE,MAAM,CAAA;CACvB,CAAA;AAED,KAAK,cAAc,GAAG;IACrB,cAAc,CAAC,EAAE,OAAO,CAAA;IACxB,sBAAsB,CAAC,EAAE,MAAM,CAAA;CAC/B,CAAA;AAED,KAAK,aAAa,GAAG;IACpB,QAAQ,EAAE,OAAO,EAAE,CAAA;IACnB,cAAc,CAAC,EAAE,aAAa,EAAE,CAAA;CAChC,CAAA;AAED;;;;;GAKG;AACH,wBAAgB,oBAAoB,CACnC,QAAQ,EAAE,OAAO,EAAE,EACnB,OAAO,GAAE,cAAmB,GAC1B,OAAO,EAAE,GAAG,aAAa,CAuF3B;AAED;;;;;GAKG;AACH,wBAAgB,qBAAqB,CACpC,QAAQ,EAAE,OAAO,EAAE,EACnB,OAAO,GAAE,cAAmB,GAC1B,OAAO,EAAE,GAAG,aAAa,CAkF3B;AAED;;GAEG;AACH,wBAAgB,oBAAoB,CAAC,QAAQ,EAAE,OAAO,EAAE;;;;;;;;;;;;EA0BvD;AA2PD,YAAY,EAAE,aAAa,EAAE,aAAa,EAAE,CAAA"}
|
|
@@ -1,381 +0,0 @@
|
|
|
1
|
-
/**
|
|
2
|
-
* Reply and tapback linking for NORMALIZE--T03
|
|
3
|
-
*
|
|
4
|
-
* Implements:
|
|
5
|
-
* AC01: Link replies using DB association_guid as primary method
|
|
6
|
-
* AC02: Apply heuristics for unlinked replies (timestamp proximity <30s, content patterns)
|
|
7
|
-
* AC03: Link tapbacks to parent message GUIDs (including part GUIDs)
|
|
8
|
-
* AC04: Handle ambiguous links with structured logging and tie counters
|
|
9
|
-
* AC05: Maintain parity with CSV linking rules from original analyzer
|
|
10
|
-
*/
|
|
11
|
-
const REPLY_WINDOW_SECONDS = 30; // AC02: <30s proximity threshold
|
|
12
|
-
const REPLY_SEARCH_WINDOW_MINUTES = 5; // Expand to ±5 minutes if needed
|
|
13
|
-
const TAPBACK_WINDOW_SECONDS = 30; // Tapbacks within 30s of parent
|
|
14
|
-
/**
|
|
15
|
-
* AC01 + AC02: Link replies to their parent messages
|
|
16
|
-
*
|
|
17
|
-
* Primary: DB association_guid when present
|
|
18
|
-
* Fallback: Heuristics using timestamp and content matching
|
|
19
|
-
*/
|
|
20
|
-
export function linkRepliesToParents(messages, options = {}) {
|
|
21
|
-
const { trackAmbiguous = false, minConfidenceThreshold: _minConfidenceThreshold = 0.7, } = options;
|
|
22
|
-
// Build indices for fast lookup
|
|
23
|
-
const byGuid = new Map();
|
|
24
|
-
const byTimestamp = new Map();
|
|
25
|
-
messages.forEach((msg) => {
|
|
26
|
-
byGuid.set(msg.guid, msg);
|
|
27
|
-
// Use minute-based buckets for O(1) lookup in time window searches
|
|
28
|
-
const minuteBucket = new Date(msg.date).toISOString().slice(0, 16); // YYYY-MM-DDTHH:mm
|
|
29
|
-
if (!byTimestamp.has(minuteBucket)) {
|
|
30
|
-
byTimestamp.set(minuteBucket, []);
|
|
31
|
-
}
|
|
32
|
-
byTimestamp.get(minuteBucket).push(msg);
|
|
33
|
-
});
|
|
34
|
-
const ambiguousLinks = [];
|
|
35
|
-
const result = messages.map((msg) => {
|
|
36
|
-
// Only process text and media replies (not already linked)
|
|
37
|
-
if (msg.messageKind !== 'text' && msg.messageKind !== 'media') {
|
|
38
|
-
return msg;
|
|
39
|
-
}
|
|
40
|
-
// Skip if already has DB association
|
|
41
|
-
if (msg.replyingTo?.targetMessageGuid) {
|
|
42
|
-
return msg;
|
|
43
|
-
}
|
|
44
|
-
// Skip empty replies
|
|
45
|
-
if (!msg.text || msg.text.trim().length === 0) {
|
|
46
|
-
return msg;
|
|
47
|
-
}
|
|
48
|
-
// Try to link using heuristics
|
|
49
|
-
const candidates = findReplyParentCandidates(msg, messages, byGuid, byTimestamp);
|
|
50
|
-
if (candidates.length === 0) {
|
|
51
|
-
return msg;
|
|
52
|
-
}
|
|
53
|
-
// Sort by score (descending)
|
|
54
|
-
candidates.sort((a, b) => b.score - a.score);
|
|
55
|
-
const topCandidate = candidates[0];
|
|
56
|
-
if (!topCandidate) {
|
|
57
|
-
return msg; // Should never happen since we checked length > 0
|
|
58
|
-
}
|
|
59
|
-
const topScore = topCandidate.score;
|
|
60
|
-
// Check for ties
|
|
61
|
-
const tiedCandidates = candidates.filter((c) => c.score === topScore);
|
|
62
|
-
const isTie = tiedCandidates.length > 1;
|
|
63
|
-
if (isTie && trackAmbiguous) {
|
|
64
|
-
const firstTied = tiedCandidates[0];
|
|
65
|
-
if (firstTied) {
|
|
66
|
-
ambiguousLinks.push({
|
|
67
|
-
messageGuid: msg.guid,
|
|
68
|
-
selectedTarget: firstTied.message.guid,
|
|
69
|
-
candidates: tiedCandidates,
|
|
70
|
-
tieCount: tiedCandidates.length,
|
|
71
|
-
confidenceScore: topScore,
|
|
72
|
-
});
|
|
73
|
-
}
|
|
74
|
-
}
|
|
75
|
-
// Link to best candidate
|
|
76
|
-
return {
|
|
77
|
-
...msg,
|
|
78
|
-
replyingTo: {
|
|
79
|
-
...msg.replyingTo,
|
|
80
|
-
targetMessageGuid: topCandidate.message.guid,
|
|
81
|
-
},
|
|
82
|
-
};
|
|
83
|
-
});
|
|
84
|
-
return trackAmbiguous ? { messages: result, ambiguousLinks } : result;
|
|
85
|
-
}
|
|
86
|
-
/**
|
|
87
|
-
* AC03: Link tapbacks to their parent messages
|
|
88
|
-
*
|
|
89
|
-
* Primary: DB association_guid when present
|
|
90
|
-
* Fallback: Heuristics preferring media messages
|
|
91
|
-
*/
|
|
92
|
-
export function linkTapbacksToParents(messages, options = {}) {
|
|
93
|
-
const { trackAmbiguous = false } = options;
|
|
94
|
-
// Build indices
|
|
95
|
-
const byGuid = new Map();
|
|
96
|
-
const byTimestamp = new Map();
|
|
97
|
-
messages.forEach((msg) => {
|
|
98
|
-
byGuid.set(msg.guid, msg);
|
|
99
|
-
// Use minute-based buckets for O(1) lookup in time window searches
|
|
100
|
-
const minuteBucket = new Date(msg.date).toISOString().slice(0, 16); // YYYY-MM-DDTHH:mm
|
|
101
|
-
if (!byTimestamp.has(minuteBucket)) {
|
|
102
|
-
byTimestamp.set(minuteBucket, []);
|
|
103
|
-
}
|
|
104
|
-
byTimestamp.get(minuteBucket).push(msg);
|
|
105
|
-
});
|
|
106
|
-
const ambiguousLinks = [];
|
|
107
|
-
const result = messages.map((msg) => {
|
|
108
|
-
// Only process tapback messages
|
|
109
|
-
if (msg.messageKind !== 'tapback') {
|
|
110
|
-
return msg;
|
|
111
|
-
}
|
|
112
|
-
// Skip if already has DB association
|
|
113
|
-
if (msg.tapback?.targetMessageGuid) {
|
|
114
|
-
return msg;
|
|
115
|
-
}
|
|
116
|
-
// Find parent for this tapback
|
|
117
|
-
const candidates = findTapbackParentCandidates(msg, messages, byGuid, byTimestamp);
|
|
118
|
-
if (candidates.length === 0) {
|
|
119
|
-
return msg;
|
|
120
|
-
}
|
|
121
|
-
// Sort by score
|
|
122
|
-
candidates.sort((a, b) => b.score - a.score);
|
|
123
|
-
const topCandidate = candidates[0];
|
|
124
|
-
if (!topCandidate) {
|
|
125
|
-
return msg; // Should never happen since we checked length > 0
|
|
126
|
-
}
|
|
127
|
-
const topScore = topCandidate.score;
|
|
128
|
-
// Check for ties
|
|
129
|
-
const tiedCandidates = candidates.filter((c) => c.score === topScore);
|
|
130
|
-
if (tiedCandidates.length > 1 && trackAmbiguous) {
|
|
131
|
-
const firstTied = tiedCandidates[0];
|
|
132
|
-
if (firstTied) {
|
|
133
|
-
ambiguousLinks.push({
|
|
134
|
-
messageGuid: msg.guid,
|
|
135
|
-
selectedTarget: firstTied.message.guid,
|
|
136
|
-
candidates: tiedCandidates,
|
|
137
|
-
tieCount: tiedCandidates.length,
|
|
138
|
-
confidenceScore: topScore,
|
|
139
|
-
});
|
|
140
|
-
}
|
|
141
|
-
}
|
|
142
|
-
// Link to best candidate
|
|
143
|
-
if (!msg.tapback) {
|
|
144
|
-
return msg; // Shouldn't happen for tapback messages
|
|
145
|
-
}
|
|
146
|
-
return {
|
|
147
|
-
...msg,
|
|
148
|
-
tapback: {
|
|
149
|
-
...msg.tapback,
|
|
150
|
-
targetMessageGuid: topCandidate.message.guid,
|
|
151
|
-
},
|
|
152
|
-
};
|
|
153
|
-
});
|
|
154
|
-
return trackAmbiguous ? { messages: result, ambiguousLinks } : result;
|
|
155
|
-
}
|
|
156
|
-
/**
|
|
157
|
-
* AC04: Detect and report ambiguous links with confidence scores
|
|
158
|
-
*/
|
|
159
|
-
export function detectAmbiguousLinks(messages) {
|
|
160
|
-
const ambiguous = linkRepliesToParents(messages, {
|
|
161
|
-
trackAmbiguous: true,
|
|
162
|
-
});
|
|
163
|
-
const tapbackAmbiguous = linkTapbacksToParents(messages, {
|
|
164
|
-
trackAmbiguous: true,
|
|
165
|
-
});
|
|
166
|
-
const allAmbiguous = [
|
|
167
|
-
...(ambiguous.ambiguousLinks || []),
|
|
168
|
-
...(tapbackAmbiguous.ambiguousLinks || []),
|
|
169
|
-
];
|
|
170
|
-
return {
|
|
171
|
-
tieCount: allAmbiguous.length,
|
|
172
|
-
ambiguousMessages: allAmbiguous.map((link) => ({
|
|
173
|
-
messageGuid: link.messageGuid,
|
|
174
|
-
selectedTarget: link.selectedTarget,
|
|
175
|
-
tieCount: link.tieCount,
|
|
176
|
-
topCandidates: link.candidates.map((c) => ({
|
|
177
|
-
guid: c.message.guid,
|
|
178
|
-
score: c.score,
|
|
179
|
-
reasons: c.reasons,
|
|
180
|
-
})),
|
|
181
|
-
})),
|
|
182
|
-
};
|
|
183
|
-
}
|
|
184
|
-
// ============================================================================
|
|
185
|
-
// Helper Functions
|
|
186
|
-
// ============================================================================
|
|
187
|
-
/**
|
|
188
|
-
* Get time bucket keys for a date within a window (for O(1) lookups)
|
|
189
|
-
*/
|
|
190
|
-
function getTimeBucketKeys(date, windowMinutes) {
|
|
191
|
-
const keys = [];
|
|
192
|
-
const baseTime = date.getTime();
|
|
193
|
-
// Generate bucket keys for the window before the date
|
|
194
|
-
for (let i = 0; i <= windowMinutes; i++) {
|
|
195
|
-
const bucketDate = new Date(baseTime - i * 60 * 1000);
|
|
196
|
-
keys.push(bucketDate.toISOString().slice(0, 16)); // YYYY-MM-DDTHH:mm (minute bucket)
|
|
197
|
-
}
|
|
198
|
-
return keys;
|
|
199
|
-
}
|
|
200
|
-
/**
|
|
201
|
-
* Find candidate parent messages for a reply
|
|
202
|
-
* Returns scored candidates
|
|
203
|
-
* Uses byTimestamp Map for O(1) bucket lookups instead of O(n) scan
|
|
204
|
-
*/
|
|
205
|
-
function findReplyParentCandidates(reply, _allMessages, _byGuid, byTimestamp) {
|
|
206
|
-
const replyDate = new Date(reply.date);
|
|
207
|
-
const replyTime = replyDate.getTime();
|
|
208
|
-
const candidates = [];
|
|
209
|
-
// Use replyTime for arithmetic operations (replyDate is Date object)
|
|
210
|
-
// Extract snippet from reply if present (CSV pattern: "➜ Replying to: \"<snippet>\"")
|
|
211
|
-
const snippetMatch = reply.text?.match(/(?:➜\s*Replying to:?\s+[«"]([^»"]+)[»"]|Replying to:?\s+[«"]([^»"]+)[»"])/);
|
|
212
|
-
const snippet = snippetMatch?.[1] || snippetMatch?.[2];
|
|
213
|
-
// Use time-bucketed lookup for O(1) average case per bucket
|
|
214
|
-
const bucketKeys = getTimeBucketKeys(replyDate, REPLY_SEARCH_WINDOW_MINUTES);
|
|
215
|
-
const seenGuids = new Set();
|
|
216
|
-
const potentialParents = [];
|
|
217
|
-
for (const key of bucketKeys) {
|
|
218
|
-
const bucketMessages = byTimestamp.get(key);
|
|
219
|
-
if (bucketMessages) {
|
|
220
|
-
for (const msg of bucketMessages) {
|
|
221
|
-
if (!seenGuids.has(msg.guid) &&
|
|
222
|
-
msg.messageKind !== 'tapback' &&
|
|
223
|
-
msg.messageKind !== 'notification' &&
|
|
224
|
-
msg.guid !== reply.guid) {
|
|
225
|
-
seenGuids.add(msg.guid);
|
|
226
|
-
potentialParents.push(msg);
|
|
227
|
-
}
|
|
228
|
-
}
|
|
229
|
-
}
|
|
230
|
-
}
|
|
231
|
-
// Score each candidate
|
|
232
|
-
for (const candidate of potentialParents) {
|
|
233
|
-
if (!candidate.text && candidate.messageKind !== 'media') {
|
|
234
|
-
continue; // Skip messages without text or media
|
|
235
|
-
}
|
|
236
|
-
const candidateTime = new Date(candidate.date).getTime();
|
|
237
|
-
const timeDeltaMs = replyTime - candidateTime;
|
|
238
|
-
const timeDeltaSeconds = timeDeltaMs / 1000;
|
|
239
|
-
// Skip if too old (not within search window)
|
|
240
|
-
if (timeDeltaSeconds < 0 ||
|
|
241
|
-
timeDeltaSeconds > REPLY_SEARCH_WINDOW_MINUTES * 60) {
|
|
242
|
-
continue;
|
|
243
|
-
}
|
|
244
|
-
let score = 0;
|
|
245
|
-
const reasons = [];
|
|
246
|
-
// AC02: Timestamp proximity scoring
|
|
247
|
-
if (timeDeltaSeconds <= REPLY_WINDOW_SECONDS) {
|
|
248
|
-
score += 20;
|
|
249
|
-
reasons.push(`exact_second_match (Δ${timeDeltaSeconds.toFixed(1)}s)`);
|
|
250
|
-
}
|
|
251
|
-
// Snippet matching (AC05: CSV parity)
|
|
252
|
-
let hasContentMatch = false;
|
|
253
|
-
if (snippet && candidate.text) {
|
|
254
|
-
const normalizedText = candidate.text.toLowerCase();
|
|
255
|
-
const normalizedSnippet = snippet.toLowerCase();
|
|
256
|
-
if (normalizedText.startsWith(normalizedSnippet)) {
|
|
257
|
-
score += 100;
|
|
258
|
-
reasons.push('snippet_startswith');
|
|
259
|
-
hasContentMatch = true;
|
|
260
|
-
}
|
|
261
|
-
else if (normalizedText.includes(normalizedSnippet)) {
|
|
262
|
-
score += 50;
|
|
263
|
-
reasons.push('snippet_includes');
|
|
264
|
-
hasContentMatch = true;
|
|
265
|
-
}
|
|
266
|
-
}
|
|
267
|
-
// Media-implied replies (AC05: CSV parity)
|
|
268
|
-
if (candidate.messageKind === 'media') {
|
|
269
|
-
if (!snippet ||
|
|
270
|
-
reply.text?.toLowerCase().includes('photo') ||
|
|
271
|
-
reply.text?.toLowerCase().includes('image')) {
|
|
272
|
-
score += 80;
|
|
273
|
-
reasons.push('media_candidate');
|
|
274
|
-
hasContentMatch = true;
|
|
275
|
-
// Prefer lower timestamp_index (earlier part)
|
|
276
|
-
const indexMatch = candidate.guid.match(/p:(\d+)\//);
|
|
277
|
-
if (indexMatch?.[1]) {
|
|
278
|
-
score += 10 - Number.parseInt(indexMatch[1], 10);
|
|
279
|
-
reasons.push(`index_preference(${indexMatch[1]})`);
|
|
280
|
-
}
|
|
281
|
-
}
|
|
282
|
-
}
|
|
283
|
-
// Only extend beyond 30s window if there's strong content evidence
|
|
284
|
-
if (timeDeltaSeconds > REPLY_WINDOW_SECONDS && hasContentMatch) {
|
|
285
|
-
score -= timeDeltaSeconds / 100; // Mild penalty for distance
|
|
286
|
-
reasons.push(`extended_window (Δ${timeDeltaSeconds.toFixed(1)}s)`);
|
|
287
|
-
}
|
|
288
|
-
// Same sender preference
|
|
289
|
-
if (reply.handle && candidate.handle === reply.handle) {
|
|
290
|
-
score += 15;
|
|
291
|
-
reasons.push('same_sender');
|
|
292
|
-
}
|
|
293
|
-
// Same group/moment preference
|
|
294
|
-
if (reply.groupGuid && candidate.groupGuid === reply.groupGuid) {
|
|
295
|
-
score += 10;
|
|
296
|
-
reasons.push('same_group');
|
|
297
|
-
}
|
|
298
|
-
if (score > 0) {
|
|
299
|
-
candidates.push({ message: candidate, score, reasons });
|
|
300
|
-
}
|
|
301
|
-
}
|
|
302
|
-
// Sort all candidates: first by score (desc), then by time proximity (asc) for tiebreaking
|
|
303
|
-
candidates.sort((a, b) => {
|
|
304
|
-
// Primary: score (higher is better)
|
|
305
|
-
if (a.score !== b.score) {
|
|
306
|
-
return b.score - a.score;
|
|
307
|
-
}
|
|
308
|
-
// Tiebreaker: nearest prior message (lowest time delta)
|
|
309
|
-
const aDelta = replyTime - new Date(a.message.date).getTime();
|
|
310
|
-
const bDelta = replyTime - new Date(b.message.date).getTime();
|
|
311
|
-
return aDelta - bDelta;
|
|
312
|
-
});
|
|
313
|
-
return candidates;
|
|
314
|
-
}
|
|
315
|
-
/**
|
|
316
|
-
* Find candidate parent messages for a tapback
|
|
317
|
-
* Prefers media messages
|
|
318
|
-
* Uses byTimestamp Map for O(1) bucket lookups instead of O(n) scan
|
|
319
|
-
*/
|
|
320
|
-
function findTapbackParentCandidates(tapback, _allMessages, _byGuid, byTimestamp) {
|
|
321
|
-
const tapbackDate = new Date(tapback.date);
|
|
322
|
-
const tapbackTime = tapbackDate.getTime();
|
|
323
|
-
const candidates = [];
|
|
324
|
-
// Use time-bucketed lookup for O(1) average case per bucket
|
|
325
|
-
const bucketKeys = getTimeBucketKeys(tapbackDate, REPLY_SEARCH_WINDOW_MINUTES);
|
|
326
|
-
const seenGuids = new Set();
|
|
327
|
-
const potentialParents = [];
|
|
328
|
-
for (const key of bucketKeys) {
|
|
329
|
-
const bucketMessages = byTimestamp.get(key);
|
|
330
|
-
if (bucketMessages) {
|
|
331
|
-
for (const msg of bucketMessages) {
|
|
332
|
-
if (!seenGuids.has(msg.guid) &&
|
|
333
|
-
msg.messageKind !== 'tapback' &&
|
|
334
|
-
msg.messageKind !== 'notification' &&
|
|
335
|
-
msg.guid !== tapback.guid) {
|
|
336
|
-
seenGuids.add(msg.guid);
|
|
337
|
-
potentialParents.push(msg);
|
|
338
|
-
}
|
|
339
|
-
}
|
|
340
|
-
}
|
|
341
|
-
}
|
|
342
|
-
// Score each candidate
|
|
343
|
-
for (const candidate of potentialParents) {
|
|
344
|
-
const candidateTime = new Date(candidate.date).getTime();
|
|
345
|
-
const timeDeltaSeconds = (tapbackTime - candidateTime) / 1000;
|
|
346
|
-
// Skip if too old or in future
|
|
347
|
-
if (timeDeltaSeconds < 0 ||
|
|
348
|
-
timeDeltaSeconds > REPLY_SEARCH_WINDOW_MINUTES * 60) {
|
|
349
|
-
continue;
|
|
350
|
-
}
|
|
351
|
-
let score = 0;
|
|
352
|
-
const reasons = [];
|
|
353
|
-
// Timestamp proximity
|
|
354
|
-
if (timeDeltaSeconds <= TAPBACK_WINDOW_SECONDS) {
|
|
355
|
-
score += 20;
|
|
356
|
-
reasons.push(`near_tap (Δ${timeDeltaSeconds.toFixed(1)}s)`);
|
|
357
|
-
}
|
|
358
|
-
else {
|
|
359
|
-
score -= timeDeltaSeconds;
|
|
360
|
-
}
|
|
361
|
-
// Media messages score higher (AC03: preferred targets)
|
|
362
|
-
if (candidate.messageKind === 'media') {
|
|
363
|
-
score += 80;
|
|
364
|
-
reasons.push('is_media');
|
|
365
|
-
}
|
|
366
|
-
else if (candidate.messageKind === 'text') {
|
|
367
|
-
score += 20;
|
|
368
|
-
reasons.push('is_text');
|
|
369
|
-
}
|
|
370
|
-
// Same group preference
|
|
371
|
-
if (tapback.groupGuid && candidate.groupGuid === tapback.groupGuid) {
|
|
372
|
-
score += 10;
|
|
373
|
-
reasons.push('same_group');
|
|
374
|
-
}
|
|
375
|
-
if (score > 0) {
|
|
376
|
-
candidates.push({ message: candidate, score, reasons });
|
|
377
|
-
}
|
|
378
|
-
}
|
|
379
|
-
return candidates;
|
|
380
|
-
}
|
|
381
|
-
//# sourceMappingURL=link-replies-and-tapbacks.js.map
|