@nathanvale/chatline 0.0.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +1 -0
- package/LICENSE +21 -0
- package/README.md +1535 -0
- package/dist/bin/index.js +5121 -0
- package/dist/cli/commands/clean.d.ts +17 -0
- package/dist/cli/commands/clean.d.ts.map +1 -0
- package/dist/cli/commands/clean.js +142 -0
- package/dist/cli/commands/clean.js.map +1 -0
- package/dist/cli/commands/doctor.d.ts +17 -0
- package/dist/cli/commands/doctor.d.ts.map +1 -0
- package/dist/cli/commands/doctor.js +202 -0
- package/dist/cli/commands/doctor.js.map +1 -0
- package/dist/cli/commands/enrich-ai.d.ts +17 -0
- package/dist/cli/commands/enrich-ai.d.ts.map +1 -0
- package/dist/cli/commands/enrich-ai.js +371 -0
- package/dist/cli/commands/enrich-ai.js.map +1 -0
- package/dist/cli/commands/index.d.ts +16 -0
- package/dist/cli/commands/index.d.ts.map +1 -0
- package/dist/cli/commands/index.js +16 -0
- package/dist/cli/commands/index.js.map +1 -0
- package/dist/cli/commands/ingest-csv.d.ts +17 -0
- package/dist/cli/commands/ingest-csv.d.ts.map +1 -0
- package/dist/cli/commands/ingest-csv.js +138 -0
- package/dist/cli/commands/ingest-csv.js.map +1 -0
- package/dist/cli/commands/ingest-db.d.ts +17 -0
- package/dist/cli/commands/ingest-db.d.ts.map +1 -0
- package/dist/cli/commands/ingest-db.js +159 -0
- package/dist/cli/commands/ingest-db.js.map +1 -0
- package/dist/cli/commands/init.d.ts +17 -0
- package/dist/cli/commands/init.d.ts.map +1 -0
- package/dist/cli/commands/init.js +110 -0
- package/dist/cli/commands/init.js.map +1 -0
- package/dist/cli/commands/normalize-link.d.ts +16 -0
- package/dist/cli/commands/normalize-link.d.ts.map +1 -0
- package/dist/cli/commands/normalize-link.js +144 -0
- package/dist/cli/commands/normalize-link.js.map +1 -0
- package/dist/cli/commands/render-markdown.d.ts +17 -0
- package/dist/cli/commands/render-markdown.d.ts.map +1 -0
- package/dist/cli/commands/render-markdown.js +218 -0
- package/dist/cli/commands/render-markdown.js.map +1 -0
- package/dist/cli/commands/stats.d.ts +17 -0
- package/dist/cli/commands/stats.d.ts.map +1 -0
- package/dist/cli/commands/stats.js +175 -0
- package/dist/cli/commands/stats.js.map +1 -0
- package/dist/cli/commands/validate.d.ts +17 -0
- package/dist/cli/commands/validate.d.ts.map +1 -0
- package/dist/cli/commands/validate.js +152 -0
- package/dist/cli/commands/validate.js.map +1 -0
- package/dist/cli/index.d.ts +13 -0
- package/dist/cli/index.d.ts.map +1 -0
- package/dist/cli/index.js +121 -0
- package/dist/cli/index.js.map +1 -0
- package/dist/cli/types.d.ts +93 -0
- package/dist/cli/types.d.ts.map +1 -0
- package/dist/cli/types.js +7 -0
- package/dist/cli/types.js.map +1 -0
- package/dist/cli/utils.d.ts +29 -0
- package/dist/cli/utils.d.ts.map +1 -0
- package/dist/cli/utils.js +53 -0
- package/dist/cli/utils.js.map +1 -0
- package/dist/cli.d.ts +9 -0
- package/dist/cli.d.ts.map +1 -0
- package/dist/cli.js +1805 -0
- package/dist/config/generator.d.ts +90 -0
- package/dist/config/generator.d.ts.map +1 -0
- package/dist/config/generator.js +320 -0
- package/dist/config/generator.js.map +1 -0
- package/dist/config/loader.d.ts +107 -0
- package/dist/config/loader.d.ts.map +1 -0
- package/dist/config/loader.js +251 -0
- package/dist/config/loader.js.map +1 -0
- package/dist/config/schema.d.ts +107 -0
- package/dist/config/schema.d.ts.map +1 -0
- package/dist/config/schema.js +169 -0
- package/dist/config/schema.js.map +1 -0
- package/dist/enrich/audio-transcription.d.ts +77 -0
- package/dist/enrich/audio-transcription.d.ts.map +1 -0
- package/dist/enrich/audio-transcription.js +370 -0
- package/dist/enrich/audio-transcription.js.map +1 -0
- package/dist/enrich/checkpoint.d.ts +137 -0
- package/dist/enrich/checkpoint.d.ts.map +1 -0
- package/dist/enrich/checkpoint.js +205 -0
- package/dist/enrich/checkpoint.js.map +1 -0
- package/dist/enrich/idempotency.d.ts +90 -0
- package/dist/enrich/idempotency.d.ts.map +1 -0
- package/dist/enrich/idempotency.js +188 -0
- package/dist/enrich/idempotency.js.map +1 -0
- package/dist/enrich/image-analysis.d.ts +62 -0
- package/dist/enrich/image-analysis.d.ts.map +1 -0
- package/dist/enrich/image-analysis.js +264 -0
- package/dist/enrich/image-analysis.js.map +1 -0
- package/dist/enrich/index.d.ts +60 -0
- package/dist/enrich/index.d.ts.map +1 -0
- package/dist/enrich/index.js +74 -0
- package/dist/enrich/index.js.map +1 -0
- package/dist/enrich/link-enrichment.d.ts +37 -0
- package/dist/enrich/link-enrichment.d.ts.map +1 -0
- package/dist/enrich/link-enrichment.js +202 -0
- package/dist/enrich/link-enrichment.js.map +1 -0
- package/dist/enrich/pdf-video-handling.d.ts +49 -0
- package/dist/enrich/pdf-video-handling.d.ts.map +1 -0
- package/dist/enrich/pdf-video-handling.js +325 -0
- package/dist/enrich/pdf-video-handling.js.map +1 -0
- package/dist/enrich/progress-tracker.d.ts +120 -0
- package/dist/enrich/progress-tracker.d.ts.map +1 -0
- package/dist/enrich/progress-tracker.js +220 -0
- package/dist/enrich/progress-tracker.js.map +1 -0
- package/dist/enrich/providers/firecrawl.d.ts +18 -0
- package/dist/enrich/providers/firecrawl.d.ts.map +1 -0
- package/dist/enrich/providers/firecrawl.js +48 -0
- package/dist/enrich/providers/firecrawl.js.map +1 -0
- package/dist/enrich/providers/generic.d.ts +16 -0
- package/dist/enrich/providers/generic.d.ts.map +1 -0
- package/dist/enrich/providers/generic.js +36 -0
- package/dist/enrich/providers/generic.js.map +1 -0
- package/dist/enrich/providers/index.d.ts +14 -0
- package/dist/enrich/providers/index.d.ts.map +1 -0
- package/dist/enrich/providers/index.js +13 -0
- package/dist/enrich/providers/index.js.map +1 -0
- package/dist/enrich/providers/instagram.d.ts +16 -0
- package/dist/enrich/providers/instagram.d.ts.map +1 -0
- package/dist/enrich/providers/instagram.js +43 -0
- package/dist/enrich/providers/instagram.js.map +1 -0
- package/dist/enrich/providers/spotify.d.ts +16 -0
- package/dist/enrich/providers/spotify.d.ts.map +1 -0
- package/dist/enrich/providers/spotify.js +45 -0
- package/dist/enrich/providers/spotify.js.map +1 -0
- package/dist/enrich/providers/twitter.d.ts +16 -0
- package/dist/enrich/providers/twitter.d.ts.map +1 -0
- package/dist/enrich/providers/twitter.js +43 -0
- package/dist/enrich/providers/twitter.js.map +1 -0
- package/dist/enrich/providers/types.d.ts +47 -0
- package/dist/enrich/providers/types.d.ts.map +1 -0
- package/dist/enrich/providers/types.js +15 -0
- package/dist/enrich/providers/types.js.map +1 -0
- package/dist/enrich/providers/youtube.d.ts +16 -0
- package/dist/enrich/providers/youtube.d.ts.map +1 -0
- package/dist/enrich/providers/youtube.js +43 -0
- package/dist/enrich/providers/youtube.js.map +1 -0
- package/dist/enrich/rate-limiting.d.ts +118 -0
- package/dist/enrich/rate-limiting.d.ts.map +1 -0
- package/dist/enrich/rate-limiting.js +258 -0
- package/dist/enrich/rate-limiting.js.map +1 -0
- package/dist/index.d.ts +688 -0
- package/dist/index.d.ts.map +1 -0
- package/dist/index.js +1729 -0
- package/dist/index.js.map +1 -0
- package/dist/ingest/dedup-merge.d.ts +82 -0
- package/dist/ingest/dedup-merge.d.ts.map +1 -0
- package/dist/ingest/dedup-merge.js +262 -0
- package/dist/ingest/dedup-merge.js.map +1 -0
- package/dist/ingest/ingest-csv.d.ts +62 -0
- package/dist/ingest/ingest-csv.d.ts.map +1 -0
- package/dist/ingest/ingest-csv.js +300 -0
- package/dist/ingest/ingest-csv.js.map +1 -0
- package/dist/ingest/ingest-db.d.ts +64 -0
- package/dist/ingest/ingest-db.d.ts.map +1 -0
- package/dist/ingest/ingest-db.js +172 -0
- package/dist/ingest/ingest-db.js.map +1 -0
- package/dist/ingest/link-replies-and-tapbacks.d.ts +53 -0
- package/dist/ingest/link-replies-and-tapbacks.d.ts.map +1 -0
- package/dist/ingest/link-replies-and-tapbacks.js +381 -0
- package/dist/ingest/link-replies-and-tapbacks.js.map +1 -0
- package/dist/normalize/date-converters.d.ts +45 -0
- package/dist/normalize/date-converters.d.ts.map +1 -0
- package/dist/normalize/date-converters.js +166 -0
- package/dist/normalize/date-converters.js.map +1 -0
- package/dist/normalize/path-validator.d.ts +65 -0
- package/dist/normalize/path-validator.d.ts.map +1 -0
- package/dist/normalize/path-validator.js +221 -0
- package/dist/normalize/path-validator.js.map +1 -0
- package/dist/normalize/validate-normalized.d.ts +45 -0
- package/dist/normalize/validate-normalized.d.ts.map +1 -0
- package/dist/normalize/validate-normalized.js +144 -0
- package/dist/normalize/validate-normalized.js.map +1 -0
- package/dist/render/embeds-blockquotes.d.ts +84 -0
- package/dist/render/embeds-blockquotes.d.ts.map +1 -0
- package/dist/render/embeds-blockquotes.js +204 -0
- package/dist/render/embeds-blockquotes.js.map +1 -0
- package/dist/render/grouping.d.ts +78 -0
- package/dist/render/grouping.d.ts.map +1 -0
- package/dist/render/grouping.js +134 -0
- package/dist/render/grouping.js.map +1 -0
- package/dist/render/index.d.ts +47 -0
- package/dist/render/index.d.ts.map +1 -0
- package/dist/render/index.js +245 -0
- package/dist/render/index.js.map +1 -0
- package/dist/render/reply-rendering.d.ts +88 -0
- package/dist/render/reply-rendering.d.ts.map +1 -0
- package/dist/render/reply-rendering.js +196 -0
- package/dist/render/reply-rendering.js.map +1 -0
- package/dist/schema/message.d.ts +125 -0
- package/dist/schema/message.d.ts.map +1 -0
- package/dist/schema/message.js +331 -0
- package/dist/schema/message.js.map +1 -0
- package/dist/utils/delta-detection.d.ts +107 -0
- package/dist/utils/delta-detection.d.ts.map +1 -0
- package/dist/utils/delta-detection.js +199 -0
- package/dist/utils/delta-detection.js.map +1 -0
- package/dist/utils/enrichment-merge.d.ts +135 -0
- package/dist/utils/enrichment-merge.d.ts.map +1 -0
- package/dist/utils/enrichment-merge.js +280 -0
- package/dist/utils/enrichment-merge.js.map +1 -0
- package/dist/utils/human.d.ts +15 -0
- package/dist/utils/human.d.ts.map +1 -0
- package/dist/utils/human.js +27 -0
- package/dist/utils/human.js.map +1 -0
- package/dist/utils/incremental-state.d.ts +133 -0
- package/dist/utils/incremental-state.d.ts.map +1 -0
- package/dist/utils/incremental-state.js +237 -0
- package/dist/utils/incremental-state.js.map +1 -0
- package/dist/utils/logger.d.ts +40 -0
- package/dist/utils/logger.d.ts.map +1 -0
- package/dist/utils/logger.js +176 -0
- package/dist/utils/logger.js.map +1 -0
- package/package.json +165 -0
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"index.js","sourceRoot":"","sources":["../src/index.ts"],"names":[],"mappings":"AAAA;;;;;;;;;GASG;AAEH,OAAO,EACN,qBAAqB,EACrB,oBAAoB,EACpB,uBAAuB,GACvB,MAAM,uBAAuB,CAAA;AAC9B,gCAAgC;AAChC,OAAO,EACN,gBAAgB,EAChB,kBAAkB,EAClB,cAAc,EACd,UAAU,EACV,cAAc,EACd,WAAW,EACX,iBAAiB,GACjB,MAAM,oBAAoB,CAAA;AAE3B,OAAO,EACN,oBAAoB,EACpB,cAAc,EACd,kBAAkB,EAClB,cAAc,EACd,kBAAkB,GAClB,MAAM,oBAAoB,CAAA;AAM3B,4BAA4B;AAC5B,OAAO,EACN,iBAAiB,EACjB,KAAK,EACL,iBAAiB,EACjB,WAAW,GACX,MAAM,2BAA2B,CAAA;AAKlC,OAAO,EAAE,aAAa,EAAE,MAAM,yBAAyB,CAAA;AAEvD,+BAA+B;AAC/B,OAAO,EAAE,oBAAoB,EAAE,SAAS,EAAE,MAAM,wBAAwB,CAAA;AAexE,OAAO,EACN,qBAAqB,EACrB,eAAe,EACf,qBAAqB,EACrB,iBAAiB,EACjB,eAAe,EACf,iBAAiB,GACjB,MAAM,qBAAqB,CAAA;AAE5B,wBAAwB;AACxB,OAAO,EACN,WAAW,EACX,wBAAwB,EACxB,aAAa,EACb,eAAe,GACf,MAAM,4BAA4B,CAAA;AAMnC,OAAO,EAAE,gBAAgB,EAAE,MAAM,6BAA6B,CAAA"}
|
|
@@ -0,0 +1,82 @@
|
|
|
1
|
+
import type { Message } from '#schema/message';
|
|
2
|
+
/**
|
|
3
|
+
* Deduplication and merge logic for NORMALIZE--T04
|
|
4
|
+
*
|
|
5
|
+
* Merges CSV and DB message sources with:
|
|
6
|
+
* AC01: Exact GUID matching (primary)
|
|
7
|
+
* AC02: DB authoritiveness for conflicts
|
|
8
|
+
* AC03: Content equivalence detection
|
|
9
|
+
* AC04: Data loss verification
|
|
10
|
+
* AC05: Deterministic GUID assignment
|
|
11
|
+
*/
|
|
12
|
+
export type MergeStats = {
|
|
13
|
+
csvCount: number;
|
|
14
|
+
dbCount: number;
|
|
15
|
+
outputCount: number;
|
|
16
|
+
exactMatches: number;
|
|
17
|
+
contentMatches: number;
|
|
18
|
+
conflicts: number;
|
|
19
|
+
noMatches: number;
|
|
20
|
+
};
|
|
21
|
+
export type ContentMatch = {
|
|
22
|
+
message: Message;
|
|
23
|
+
confidence: number;
|
|
24
|
+
reasons: string[];
|
|
25
|
+
};
|
|
26
|
+
export type MergeResult = {
|
|
27
|
+
messages: Message[];
|
|
28
|
+
stats: MergeStats;
|
|
29
|
+
conflicts?: Array<{
|
|
30
|
+
csvMsg: Message;
|
|
31
|
+
dbMsg: Message;
|
|
32
|
+
confidence: number;
|
|
33
|
+
}>;
|
|
34
|
+
warnings?: string[];
|
|
35
|
+
};
|
|
36
|
+
/**
|
|
37
|
+
* AC01 + AC02 + AC03 + AC04 + AC05: Main dedup and merge function
|
|
38
|
+
*
|
|
39
|
+
* Strategy:
|
|
40
|
+
* 1. Build GUID index for fast lookup
|
|
41
|
+
* 2. For each CSV message:
|
|
42
|
+
* a. Try exact GUID match (AC01)
|
|
43
|
+
* b. Try content equivalence (AC03)
|
|
44
|
+
* c. Apply DB authoritiveness if merging (AC02)
|
|
45
|
+
* d. Keep separate if no match
|
|
46
|
+
* 3. Add unmatched DB messages
|
|
47
|
+
* 4. Verify no data loss (AC04)
|
|
48
|
+
* 5. Ensure determinism (AC05)
|
|
49
|
+
*/
|
|
50
|
+
export declare function dedupAndMerge(csvMessages: Message[], dbMessages: Message[]): MergeResult;
|
|
51
|
+
/**
|
|
52
|
+
* AC01: Find exact GUID match in DB messages
|
|
53
|
+
*/
|
|
54
|
+
export declare function findExactMatch(message: Message, dbMessages: Message[]): Message | null;
|
|
55
|
+
/**
|
|
56
|
+
* AC03: Detect content equivalence
|
|
57
|
+
*
|
|
58
|
+
* Normalizes text and compares:
|
|
59
|
+
* - Normalized text content (lowercase, trimmed, punctuation removed)
|
|
60
|
+
* - messageKind must match
|
|
61
|
+
* - sender (handle) must match
|
|
62
|
+
*
|
|
63
|
+
* Returns match with confidence score (1.0 = exact match)
|
|
64
|
+
*/
|
|
65
|
+
export declare function detectContentEquivalence(csvMsg: Message, candidates: Message[], threshold?: number): ContentMatch | null;
|
|
66
|
+
/**
|
|
67
|
+
* AC02: Merge messages with DB authoritiveness
|
|
68
|
+
*
|
|
69
|
+
* DB is authoritative for:
|
|
70
|
+
* - All timestamps (date, dateRead, dateDelivered, dateEdited)
|
|
71
|
+
* - Associations (replyingTo.targetMessageGuid)
|
|
72
|
+
* - handle
|
|
73
|
+
*
|
|
74
|
+
* CSV fields are preserved when DB doesn't have them
|
|
75
|
+
*/
|
|
76
|
+
export declare function applyDbAuthoritiveness(csvMsg: Message, dbMsg: Message): Message;
|
|
77
|
+
/**
|
|
78
|
+
* AC04: Verify count invariants to prevent data loss
|
|
79
|
+
*/
|
|
80
|
+
export declare function verifyNoDataLoss(csvCount: number, dbCount: number, outputCount: number): boolean;
|
|
81
|
+
export type { Message };
|
|
82
|
+
//# sourceMappingURL=dedup-merge.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"dedup-merge.d.ts","sourceRoot":"","sources":["../../src/ingest/dedup-merge.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,OAAO,EAAE,MAAM,iBAAiB,CAAA;AAE9C;;;;;;;;;GASG;AAEH,MAAM,MAAM,UAAU,GAAG;IACxB,QAAQ,EAAE,MAAM,CAAA;IAChB,OAAO,EAAE,MAAM,CAAA;IACf,WAAW,EAAE,MAAM,CAAA;IACnB,YAAY,EAAE,MAAM,CAAA;IACpB,cAAc,EAAE,MAAM,CAAA;IACtB,SAAS,EAAE,MAAM,CAAA;IACjB,SAAS,EAAE,MAAM,CAAA;CACjB,CAAA;AAED,MAAM,MAAM,YAAY,GAAG;IAC1B,OAAO,EAAE,OAAO,CAAA;IAChB,UAAU,EAAE,MAAM,CAAA;IAClB,OAAO,EAAE,MAAM,EAAE,CAAA;CACjB,CAAA;AAED,MAAM,MAAM,WAAW,GAAG;IACzB,QAAQ,EAAE,OAAO,EAAE,CAAA;IACnB,KAAK,EAAE,UAAU,CAAA;IACjB,SAAS,CAAC,EAAE,KAAK,CAAC;QAAE,MAAM,EAAE,OAAO,CAAC;QAAC,KAAK,EAAE,OAAO,CAAC;QAAC,UAAU,EAAE,MAAM,CAAA;KAAE,CAAC,CAAA;IAC1E,QAAQ,CAAC,EAAE,MAAM,EAAE,CAAA;CACnB,CAAA;AAED;;;;;;;;;;;;;GAaG;AACH,wBAAgB,aAAa,CAC5B,WAAW,EAAE,OAAO,EAAE,EACtB,UAAU,EAAE,OAAO,EAAE,GACnB,WAAW,CA8Fb;AAED;;GAEG;AACH,wBAAgB,cAAc,CAC7B,OAAO,EAAE,OAAO,EAChB,UAAU,EAAE,OAAO,EAAE,GACnB,OAAO,GAAG,IAAI,CAEhB;AAED;;;;;;;;;GASG;AACH,wBAAgB,wBAAwB,CACvC,MAAM,EAAE,OAAO,EACf,UAAU,EAAE,OAAO,EAAE,EACrB,SAAS,SAAM,GACb,YAAY,GAAG,IAAI,CA2DrB;AAiED;;;;;;;;;GASG;AACH,wBAAgB,sBAAsB,CACrC,MAAM,EAAE,OAAO,EACf,KAAK,EAAE,OAAO,GACZ,OAAO,CA6BT;AAED;;GAEG;AACH,wBAAgB,gBAAgB,CAC/B,QAAQ,EAAE,MAAM,EAChB,OAAO,EAAE,MAAM,EACf,WAAW,EAAE,MAAM,GACjB,OAAO,CAIT;AAED,YAAY,EAAE,OAAO,EAAE,CAAA"}
|
|
@@ -0,0 +1,262 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* AC01 + AC02 + AC03 + AC04 + AC05: Main dedup and merge function
|
|
3
|
+
*
|
|
4
|
+
* Strategy:
|
|
5
|
+
* 1. Build GUID index for fast lookup
|
|
6
|
+
* 2. For each CSV message:
|
|
7
|
+
* a. Try exact GUID match (AC01)
|
|
8
|
+
* b. Try content equivalence (AC03)
|
|
9
|
+
* c. Apply DB authoritiveness if merging (AC02)
|
|
10
|
+
* d. Keep separate if no match
|
|
11
|
+
* 3. Add unmatched DB messages
|
|
12
|
+
* 4. Verify no data loss (AC04)
|
|
13
|
+
* 5. Ensure determinism (AC05)
|
|
14
|
+
*/
|
|
15
|
+
export function dedupAndMerge(csvMessages, dbMessages) {
|
|
16
|
+
// AC05: Sort inputs for determinism
|
|
17
|
+
const sortedCsv = [...csvMessages].sort((a, b) => a.guid.localeCompare(b.guid));
|
|
18
|
+
const sortedDb = [...dbMessages].sort((a, b) => a.guid.localeCompare(b.guid));
|
|
19
|
+
const stats = {
|
|
20
|
+
csvCount: csvMessages.length,
|
|
21
|
+
dbCount: dbMessages.length,
|
|
22
|
+
outputCount: 0,
|
|
23
|
+
exactMatches: 0,
|
|
24
|
+
contentMatches: 0,
|
|
25
|
+
conflicts: 0,
|
|
26
|
+
noMatches: 0,
|
|
27
|
+
};
|
|
28
|
+
const outputMessages = [];
|
|
29
|
+
const matchedDbGuids = new Set();
|
|
30
|
+
// Build O(1) lookup indices for DB messages
|
|
31
|
+
const dbByGuid = new Map();
|
|
32
|
+
const dbByNormalizedContent = new Map();
|
|
33
|
+
for (const dbMsg of sortedDb) {
|
|
34
|
+
dbByGuid.set(dbMsg.guid, dbMsg);
|
|
35
|
+
// Index by normalized text for content equivalence (text messages only)
|
|
36
|
+
if (dbMsg.messageKind === 'text' && dbMsg.text) {
|
|
37
|
+
const normalizedKey = `${dbMsg.handle || ''}:${normalizeTextForIndex(dbMsg.text)}`;
|
|
38
|
+
const existing = dbByNormalizedContent.get(normalizedKey);
|
|
39
|
+
if (existing) {
|
|
40
|
+
existing.push(dbMsg);
|
|
41
|
+
}
|
|
42
|
+
else {
|
|
43
|
+
dbByNormalizedContent.set(normalizedKey, [dbMsg]);
|
|
44
|
+
}
|
|
45
|
+
}
|
|
46
|
+
}
|
|
47
|
+
// Process each CSV message
|
|
48
|
+
for (const csvMsg of sortedCsv) {
|
|
49
|
+
// AC01: Try exact GUID match first - O(1) lookup
|
|
50
|
+
const exactMatch = dbByGuid.get(csvMsg.guid) || null;
|
|
51
|
+
if (exactMatch) {
|
|
52
|
+
// AC02: Merge with DB authoritiveness
|
|
53
|
+
const merged = applyDbAuthoritiveness(csvMsg, exactMatch);
|
|
54
|
+
outputMessages.push(merged);
|
|
55
|
+
matchedDbGuids.add(exactMatch.guid);
|
|
56
|
+
stats.exactMatches++;
|
|
57
|
+
}
|
|
58
|
+
else {
|
|
59
|
+
// AC03: Try content equivalence using indexed lookup - O(1) average case for text
|
|
60
|
+
let contentMatch = detectContentEquivalenceIndexed(csvMsg, dbByNormalizedContent, matchedDbGuids);
|
|
61
|
+
// Fallback to linear scan for non-text messages (media, tapback, notification)
|
|
62
|
+
// The indexed lookup only handles text messages; media uses media.id comparison
|
|
63
|
+
if (!contentMatch && csvMsg.messageKind !== 'text') {
|
|
64
|
+
const unmatchedDbMessages = sortedDb.filter((dbMsg) => !matchedDbGuids.has(dbMsg.guid));
|
|
65
|
+
contentMatch = detectContentEquivalence(csvMsg, unmatchedDbMessages);
|
|
66
|
+
}
|
|
67
|
+
if (contentMatch) {
|
|
68
|
+
// AC02: Merge with DB authoritiveness
|
|
69
|
+
const merged = applyDbAuthoritiveness(csvMsg, contentMatch.message);
|
|
70
|
+
outputMessages.push(merged);
|
|
71
|
+
matchedDbGuids.add(contentMatch.message.guid);
|
|
72
|
+
stats.contentMatches++;
|
|
73
|
+
}
|
|
74
|
+
else {
|
|
75
|
+
// No match found, keep CSV message as-is
|
|
76
|
+
outputMessages.push(csvMsg);
|
|
77
|
+
stats.noMatches++;
|
|
78
|
+
}
|
|
79
|
+
}
|
|
80
|
+
}
|
|
81
|
+
// Add unmatched DB messages
|
|
82
|
+
for (const dbMsg of sortedDb) {
|
|
83
|
+
if (!matchedDbGuids.has(dbMsg.guid)) {
|
|
84
|
+
outputMessages.push(dbMsg);
|
|
85
|
+
}
|
|
86
|
+
}
|
|
87
|
+
stats.outputCount = outputMessages.length;
|
|
88
|
+
return {
|
|
89
|
+
messages: outputMessages,
|
|
90
|
+
stats,
|
|
91
|
+
};
|
|
92
|
+
}
|
|
93
|
+
/**
|
|
94
|
+
* AC01: Find exact GUID match in DB messages
|
|
95
|
+
*/
|
|
96
|
+
export function findExactMatch(message, dbMessages) {
|
|
97
|
+
return dbMessages.find((dbMsg) => dbMsg.guid === message.guid) || null;
|
|
98
|
+
}
|
|
99
|
+
/**
|
|
100
|
+
* AC03: Detect content equivalence
|
|
101
|
+
*
|
|
102
|
+
* Normalizes text and compares:
|
|
103
|
+
* - Normalized text content (lowercase, trimmed, punctuation removed)
|
|
104
|
+
* - messageKind must match
|
|
105
|
+
* - sender (handle) must match
|
|
106
|
+
*
|
|
107
|
+
* Returns match with confidence score (1.0 = exact match)
|
|
108
|
+
*/
|
|
109
|
+
export function detectContentEquivalence(csvMsg, candidates, threshold = 0.9) {
|
|
110
|
+
for (const candidate of candidates) {
|
|
111
|
+
const reasons = [];
|
|
112
|
+
let confidence = 0;
|
|
113
|
+
// Must have same messageKind
|
|
114
|
+
if (csvMsg.messageKind !== candidate.messageKind) {
|
|
115
|
+
continue;
|
|
116
|
+
}
|
|
117
|
+
// Must have same sender (handle)
|
|
118
|
+
const csvHandle = csvMsg.handle || null;
|
|
119
|
+
const candidateHandle = candidate.handle || null;
|
|
120
|
+
if (csvHandle !== candidateHandle) {
|
|
121
|
+
continue;
|
|
122
|
+
}
|
|
123
|
+
// For text messages, compare normalized text
|
|
124
|
+
if (csvMsg.messageKind === 'text' && candidate.messageKind === 'text') {
|
|
125
|
+
const csvText = normalizeText(csvMsg.text || '');
|
|
126
|
+
const candidateText = normalizeText(candidate.text || '');
|
|
127
|
+
if (csvText === candidateText) {
|
|
128
|
+
confidence = 1.0;
|
|
129
|
+
reasons.push('exact text match after normalization');
|
|
130
|
+
}
|
|
131
|
+
else {
|
|
132
|
+
// Not an exact match, skip to avoid false positives
|
|
133
|
+
continue;
|
|
134
|
+
}
|
|
135
|
+
}
|
|
136
|
+
else if (csvMsg.messageKind === 'media' &&
|
|
137
|
+
candidate.messageKind === 'media') {
|
|
138
|
+
// For media messages, compare media metadata
|
|
139
|
+
const csvMediaId = csvMsg.media?.id;
|
|
140
|
+
const candidateMediaId = candidate.media?.id;
|
|
141
|
+
if (csvMediaId && candidateMediaId && csvMediaId === candidateMediaId) {
|
|
142
|
+
confidence = 1.0;
|
|
143
|
+
reasons.push('exact media ID match');
|
|
144
|
+
}
|
|
145
|
+
else {
|
|
146
|
+
continue;
|
|
147
|
+
}
|
|
148
|
+
}
|
|
149
|
+
else {
|
|
150
|
+
// Other message types - require exact text or skip
|
|
151
|
+
continue;
|
|
152
|
+
}
|
|
153
|
+
// Only return if confidence meets threshold
|
|
154
|
+
if (confidence >= threshold) {
|
|
155
|
+
return {
|
|
156
|
+
message: candidate,
|
|
157
|
+
confidence,
|
|
158
|
+
reasons,
|
|
159
|
+
};
|
|
160
|
+
}
|
|
161
|
+
}
|
|
162
|
+
return null;
|
|
163
|
+
}
|
|
164
|
+
/**
|
|
165
|
+
* Normalize text for content equivalence detection
|
|
166
|
+
* - Lowercase
|
|
167
|
+
* - Trim whitespace
|
|
168
|
+
* - Remove punctuation and extra spaces
|
|
169
|
+
*/
|
|
170
|
+
function normalizeText(text) {
|
|
171
|
+
return text
|
|
172
|
+
.toLowerCase()
|
|
173
|
+
.trim()
|
|
174
|
+
.replace(/[^\w\s]/g, '') // Remove punctuation
|
|
175
|
+
.replace(/\s+/g, ' ') // Normalize whitespace
|
|
176
|
+
.trim();
|
|
177
|
+
}
|
|
178
|
+
/**
|
|
179
|
+
* Normalize text for Map index key (same as normalizeText)
|
|
180
|
+
* Used when building the content equivalence index
|
|
181
|
+
*/
|
|
182
|
+
function normalizeTextForIndex(text) {
|
|
183
|
+
return normalizeText(text);
|
|
184
|
+
}
|
|
185
|
+
/**
|
|
186
|
+
* AC03: O(1) content equivalence detection using pre-built index
|
|
187
|
+
*
|
|
188
|
+
* @param csvMsg - CSV message to find match for
|
|
189
|
+
* @param contentIndex - Map of normalized content key to DB messages
|
|
190
|
+
* @param matchedGuids - Set of already-matched GUIDs to skip
|
|
191
|
+
* @returns ContentMatch if found, null otherwise
|
|
192
|
+
*/
|
|
193
|
+
function detectContentEquivalenceIndexed(csvMsg, contentIndex, matchedGuids) {
|
|
194
|
+
// Only text messages are indexed
|
|
195
|
+
if (csvMsg.messageKind !== 'text' || !csvMsg.text) {
|
|
196
|
+
return null;
|
|
197
|
+
}
|
|
198
|
+
// Build the lookup key
|
|
199
|
+
const normalizedKey = `${csvMsg.handle || ''}:${normalizeTextForIndex(csvMsg.text)}`;
|
|
200
|
+
const candidates = contentIndex.get(normalizedKey);
|
|
201
|
+
if (!candidates || candidates.length === 0) {
|
|
202
|
+
return null;
|
|
203
|
+
}
|
|
204
|
+
// Find first unmatched candidate
|
|
205
|
+
for (const candidate of candidates) {
|
|
206
|
+
if (!matchedGuids.has(candidate.guid)) {
|
|
207
|
+
return {
|
|
208
|
+
message: candidate,
|
|
209
|
+
confidence: 1.0,
|
|
210
|
+
reasons: ['exact text match after normalization (indexed)'],
|
|
211
|
+
};
|
|
212
|
+
}
|
|
213
|
+
}
|
|
214
|
+
return null;
|
|
215
|
+
}
|
|
216
|
+
/**
|
|
217
|
+
* AC02: Merge messages with DB authoritiveness
|
|
218
|
+
*
|
|
219
|
+
* DB is authoritative for:
|
|
220
|
+
* - All timestamps (date, dateRead, dateDelivered, dateEdited)
|
|
221
|
+
* - Associations (replyingTo.targetMessageGuid)
|
|
222
|
+
* - handle
|
|
223
|
+
*
|
|
224
|
+
* CSV fields are preserved when DB doesn't have them
|
|
225
|
+
*/
|
|
226
|
+
export function applyDbAuthoritiveness(csvMsg, dbMsg) {
|
|
227
|
+
// Start with CSV message
|
|
228
|
+
const merged = { ...csvMsg };
|
|
229
|
+
// DB authoritative: timestamps
|
|
230
|
+
merged.date = dbMsg.date;
|
|
231
|
+
if (dbMsg.dateRead !== undefined)
|
|
232
|
+
merged.dateRead = dbMsg.dateRead;
|
|
233
|
+
if (dbMsg.dateDelivered !== undefined)
|
|
234
|
+
merged.dateDelivered = dbMsg.dateDelivered;
|
|
235
|
+
if (dbMsg.dateEdited !== undefined)
|
|
236
|
+
merged.dateEdited = dbMsg.dateEdited;
|
|
237
|
+
// DB authoritative: handle
|
|
238
|
+
if (dbMsg.handle !== undefined)
|
|
239
|
+
merged.handle = dbMsg.handle;
|
|
240
|
+
// DB authoritative: associations (replyingTo)
|
|
241
|
+
if (dbMsg.replyingTo?.targetMessageGuid !== undefined) {
|
|
242
|
+
merged.replyingTo = {
|
|
243
|
+
...merged.replyingTo,
|
|
244
|
+
targetMessageGuid: dbMsg.replyingTo.targetMessageGuid,
|
|
245
|
+
};
|
|
246
|
+
}
|
|
247
|
+
// DB authoritative: isRead status
|
|
248
|
+
if (dbMsg.isRead !== undefined)
|
|
249
|
+
merged.isRead = dbMsg.isRead;
|
|
250
|
+
// Prefer DB GUID (stable choice)
|
|
251
|
+
merged.guid = dbMsg.guid;
|
|
252
|
+
return merged;
|
|
253
|
+
}
|
|
254
|
+
/**
|
|
255
|
+
* AC04: Verify count invariants to prevent data loss
|
|
256
|
+
*/
|
|
257
|
+
export function verifyNoDataLoss(csvCount, dbCount, outputCount) {
|
|
258
|
+
// TODO: Implement count verification
|
|
259
|
+
// Invariant: outputCount >= max(csvCount, dbCount) - dedup count
|
|
260
|
+
return outputCount >= Math.max(csvCount, dbCount);
|
|
261
|
+
}
|
|
262
|
+
//# sourceMappingURL=dedup-merge.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"dedup-merge.js","sourceRoot":"","sources":["../../src/ingest/dedup-merge.ts"],"names":[],"mappings":"AAoCA;;;;;;;;;;;;;GAaG;AACH,MAAM,UAAU,aAAa,CAC5B,WAAsB,EACtB,UAAqB;IAErB,oCAAoC;IACpC,MAAM,SAAS,GAAG,CAAC,GAAG,WAAW,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,CAAC,EAAE,EAAE,CAChD,CAAC,CAAC,IAAI,CAAC,aAAa,CAAC,CAAC,CAAC,IAAI,CAAC,CAC5B,CAAA;IACD,MAAM,QAAQ,GAAG,CAAC,GAAG,UAAU,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,IAAI,CAAC,aAAa,CAAC,CAAC,CAAC,IAAI,CAAC,CAAC,CAAA;IAE7E,MAAM,KAAK,GAAe;QACzB,QAAQ,EAAE,WAAW,CAAC,MAAM;QAC5B,OAAO,EAAE,UAAU,CAAC,MAAM;QAC1B,WAAW,EAAE,CAAC;QACd,YAAY,EAAE,CAAC;QACf,cAAc,EAAE,CAAC;QACjB,SAAS,EAAE,CAAC;QACZ,SAAS,EAAE,CAAC;KACZ,CAAA;IAED,MAAM,cAAc,GAAc,EAAE,CAAA;IACpC,MAAM,cAAc,GAAG,IAAI,GAAG,EAAU,CAAA;IAExC,4CAA4C;IAC5C,MAAM,QAAQ,GAAG,IAAI,GAAG,EAAmB,CAAA;IAC3C,MAAM,qBAAqB,GAAG,IAAI,GAAG,EAAqB,CAAA;IAE1D,KAAK,MAAM,KAAK,IAAI,QAAQ,EAAE,CAAC;QAC9B,QAAQ,CAAC,GAAG,CAAC,KAAK,CAAC,IAAI,EAAE,KAAK,CAAC,CAAA;QAE/B,wEAAwE;QACxE,IAAI,KAAK,CAAC,WAAW,KAAK,MAAM,IAAI,KAAK,CAAC,IAAI,EAAE,CAAC;YAChD,MAAM,aAAa,GAAG,GAAG,KAAK,CAAC,MAAM,IAAI,EAAE,IAAI,qBAAqB,CAAC,KAAK,CAAC,IAAI,CAAC,EAAE,CAAA;YAClF,MAAM,QAAQ,GAAG,qBAAqB,CAAC,GAAG,CAAC,aAAa,CAAC,CAAA;YACzD,IAAI,QAAQ,EAAE,CAAC;gBACd,QAAQ,CAAC,IAAI,CAAC,KAAK,CAAC,CAAA;YACrB,CAAC;iBAAM,CAAC;gBACP,qBAAqB,CAAC,GAAG,CAAC,aAAa,EAAE,CAAC,KAAK,CAAC,CAAC,CAAA;YAClD,CAAC;QACF,CAAC;IACF,CAAC;IAED,2BAA2B;IAC3B,KAAK,MAAM,MAAM,IAAI,SAAS,EAAE,CAAC;QAChC,iDAAiD;QACjD,MAAM,UAAU,GAAG,QAAQ,CAAC,GAAG,CAAC,MAAM,CAAC,IAAI,CAAC,IAAI,IAAI,CAAA;QAEpD,IAAI,UAAU,EAAE,CAAC;YAChB,sCAAsC;YACtC,MAAM,MAAM,GAAG,sBAAsB,CAAC,MAAM,EAAE,UAAU,CAAC,CAAA;YACzD,cAAc,CAAC,IAAI,CAAC,MAAM,CAAC,CAAA;YAC3B,cAAc,CAAC,GAAG,CAAC,UAAU,CAAC,IAAI,CAAC,CAAA;YACnC,KAAK,CAAC,YAAY,EAAE,CAAA;QACrB,CAAC;aAAM,CAAC;YACP,kFAAkF;YAClF,IAAI,YAAY,GAAG,+BAA+B,CACjD,MAAM,EACN,qBAAqB,EACrB,cAAc,CACd,CAAA;YAED,+EAA+E;YAC/E,gFAAgF;YAChF,IAAI,CAAC,YAAY,IAAI,MAAM,CAAC,WAAW,KAAK,MAAM,EAAE,CAAC;gBACpD,MAAM,mBAAmB,GAAG,QAAQ,CAAC,MAAM,CAC1C,CAAC,KAAK,EAAE,EAAE,CAAC,CAAC,cAAc,CAAC,GAAG,CAAC,KAAK,CAAC,IAAI,CAAC,CAC1C,CAAA;gBACD,YAAY,GAAG,wBAAwB,CAAC,MAAM,EAAE,mBAAmB,CAAC,CAAA;YACrE,CAAC;YAED,IAAI,YAAY,EAAE,CAAC;gBAClB,sCAAsC;gBACtC,MAAM,MAAM,GAAG,sBAAsB,CAAC,MAAM,EAAE,YAAY,CAAC,OAAO,CAAC,CAAA;gBACnE,cAAc,CAAC,IAAI,CAAC,MAAM,CAAC,CAAA;gBAC3B,cAAc,CAAC,GAAG,CAAC,YAAY,CAAC,OAAO,CAAC,IAAI,CAAC,CAAA;gBAC7C,KAAK,CAAC,cAAc,EAAE,CAAA;YACvB,CAAC;iBAAM,CAAC;gBACP,yCAAyC;gBACzC,cAAc,CAAC,IAAI,CAAC,MAAM,CAAC,CAAA;gBAC3B,KAAK,CAAC,SAAS,EAAE,CAAA;YAClB,CAAC;QACF,CAAC;IACF,CAAC;IAED,4BAA4B;IAC5B,KAAK,MAAM,KAAK,IAAI,QAAQ,EAAE,CAAC;QAC9B,IAAI,CAAC,cAAc,CAAC,GAAG,CAAC,KAAK,CAAC,IAAI,CAAC,EAAE,CAAC;YACrC,cAAc,CAAC,IAAI,CAAC,KAAK,CAAC,CAAA;QAC3B,CAAC;IACF,CAAC;IAED,KAAK,CAAC,WAAW,GAAG,cAAc,CAAC,MAAM,CAAA;IAEzC,OAAO;QACN,QAAQ,EAAE,cAAc;QACxB,KAAK;KACL,CAAA;AACF,CAAC;AAED;;GAEG;AACH,MAAM,UAAU,cAAc,CAC7B,OAAgB,EAChB,UAAqB;IAErB,OAAO,UAAU,CAAC,IAAI,CAAC,CAAC,KAAK,EAAE,EAAE,CAAC,KAAK,CAAC,IAAI,KAAK,OAAO,CAAC,IAAI,CAAC,IAAI,IAAI,CAAA;AACvE,CAAC;AAED;;;;;;;;;GASG;AACH,MAAM,UAAU,wBAAwB,CACvC,MAAe,EACf,UAAqB,EACrB,SAAS,GAAG,GAAG;IAEf,KAAK,MAAM,SAAS,IAAI,UAAU,EAAE,CAAC;QACpC,MAAM,OAAO,GAAa,EAAE,CAAA;QAC5B,IAAI,UAAU,GAAG,CAAC,CAAA;QAElB,6BAA6B;QAC7B,IAAI,MAAM,CAAC,WAAW,KAAK,SAAS,CAAC,WAAW,EAAE,CAAC;YAClD,SAAQ;QACT,CAAC;QAED,iCAAiC;QACjC,MAAM,SAAS,GAAG,MAAM,CAAC,MAAM,IAAI,IAAI,CAAA;QACvC,MAAM,eAAe,GAAG,SAAS,CAAC,MAAM,IAAI,IAAI,CAAA;QAChD,IAAI,SAAS,KAAK,eAAe,EAAE,CAAC;YACnC,SAAQ;QACT,CAAC;QAED,6CAA6C;QAC7C,IAAI,MAAM,CAAC,WAAW,KAAK,MAAM,IAAI,SAAS,CAAC,WAAW,KAAK,MAAM,EAAE,CAAC;YACvE,MAAM,OAAO,GAAG,aAAa,CAAC,MAAM,CAAC,IAAI,IAAI,EAAE,CAAC,CAAA;YAChD,MAAM,aAAa,GAAG,aAAa,CAAC,SAAS,CAAC,IAAI,IAAI,EAAE,CAAC,CAAA;YAEzD,IAAI,OAAO,KAAK,aAAa,EAAE,CAAC;gBAC/B,UAAU,GAAG,GAAG,CAAA;gBAChB,OAAO,CAAC,IAAI,CAAC,sCAAsC,CAAC,CAAA;YACrD,CAAC;iBAAM,CAAC;gBACP,oDAAoD;gBACpD,SAAQ;YACT,CAAC;QACF,CAAC;aAAM,IACN,MAAM,CAAC,WAAW,KAAK,OAAO;YAC9B,SAAS,CAAC,WAAW,KAAK,OAAO,EAChC,CAAC;YACF,6CAA6C;YAC7C,MAAM,UAAU,GAAG,MAAM,CAAC,KAAK,EAAE,EAAE,CAAA;YACnC,MAAM,gBAAgB,GAAG,SAAS,CAAC,KAAK,EAAE,EAAE,CAAA;YAE5C,IAAI,UAAU,IAAI,gBAAgB,IAAI,UAAU,KAAK,gBAAgB,EAAE,CAAC;gBACvE,UAAU,GAAG,GAAG,CAAA;gBAChB,OAAO,CAAC,IAAI,CAAC,sBAAsB,CAAC,CAAA;YACrC,CAAC;iBAAM,CAAC;gBACP,SAAQ;YACT,CAAC;QACF,CAAC;aAAM,CAAC;YACP,mDAAmD;YACnD,SAAQ;QACT,CAAC;QAED,4CAA4C;QAC5C,IAAI,UAAU,IAAI,SAAS,EAAE,CAAC;YAC7B,OAAO;gBACN,OAAO,EAAE,SAAS;gBAClB,UAAU;gBACV,OAAO;aACP,CAAA;QACF,CAAC;IACF,CAAC;IAED,OAAO,IAAI,CAAA;AACZ,CAAC;AAED;;;;;GAKG;AACH,SAAS,aAAa,CAAC,IAAY;IAClC,OAAO,IAAI;SACT,WAAW,EAAE;SACb,IAAI,EAAE;SACN,OAAO,CAAC,UAAU,EAAE,EAAE,CAAC,CAAC,qBAAqB;SAC7C,OAAO,CAAC,MAAM,EAAE,GAAG,CAAC,CAAC,uBAAuB;SAC5C,IAAI,EAAE,CAAA;AACT,CAAC;AAED;;;GAGG;AACH,SAAS,qBAAqB,CAAC,IAAY;IAC1C,OAAO,aAAa,CAAC,IAAI,CAAC,CAAA;AAC3B,CAAC;AAED;;;;;;;GAOG;AACH,SAAS,+BAA+B,CACvC,MAAe,EACf,YAAoC,EACpC,YAAyB;IAEzB,iCAAiC;IACjC,IAAI,MAAM,CAAC,WAAW,KAAK,MAAM,IAAI,CAAC,MAAM,CAAC,IAAI,EAAE,CAAC;QACnD,OAAO,IAAI,CAAA;IACZ,CAAC;IAED,uBAAuB;IACvB,MAAM,aAAa,GAAG,GAAG,MAAM,CAAC,MAAM,IAAI,EAAE,IAAI,qBAAqB,CAAC,MAAM,CAAC,IAAI,CAAC,EAAE,CAAA;IACpF,MAAM,UAAU,GAAG,YAAY,CAAC,GAAG,CAAC,aAAa,CAAC,CAAA;IAElD,IAAI,CAAC,UAAU,IAAI,UAAU,CAAC,MAAM,KAAK,CAAC,EAAE,CAAC;QAC5C,OAAO,IAAI,CAAA;IACZ,CAAC;IAED,iCAAiC;IACjC,KAAK,MAAM,SAAS,IAAI,UAAU,EAAE,CAAC;QACpC,IAAI,CAAC,YAAY,CAAC,GAAG,CAAC,SAAS,CAAC,IAAI,CAAC,EAAE,CAAC;YACvC,OAAO;gBACN,OAAO,EAAE,SAAS;gBAClB,UAAU,EAAE,GAAG;gBACf,OAAO,EAAE,CAAC,gDAAgD,CAAC;aAC3D,CAAA;QACF,CAAC;IACF,CAAC;IAED,OAAO,IAAI,CAAA;AACZ,CAAC;AAED;;;;;;;;;GASG;AACH,MAAM,UAAU,sBAAsB,CACrC,MAAe,EACf,KAAc;IAEd,yBAAyB;IACzB,MAAM,MAAM,GAAY,EAAE,GAAG,MAAM,EAAE,CAAA;IAErC,+BAA+B;IAC/B,MAAM,CAAC,IAAI,GAAG,KAAK,CAAC,IAAI,CAAA;IACxB,IAAI,KAAK,CAAC,QAAQ,KAAK,SAAS;QAAE,MAAM,CAAC,QAAQ,GAAG,KAAK,CAAC,QAAQ,CAAA;IAClE,IAAI,KAAK,CAAC,aAAa,KAAK,SAAS;QACpC,MAAM,CAAC,aAAa,GAAG,KAAK,CAAC,aAAa,CAAA;IAC3C,IAAI,KAAK,CAAC,UAAU,KAAK,SAAS;QAAE,MAAM,CAAC,UAAU,GAAG,KAAK,CAAC,UAAU,CAAA;IAExE,2BAA2B;IAC3B,IAAI,KAAK,CAAC,MAAM,KAAK,SAAS;QAAE,MAAM,CAAC,MAAM,GAAG,KAAK,CAAC,MAAM,CAAA;IAE5D,8CAA8C;IAC9C,IAAI,KAAK,CAAC,UAAU,EAAE,iBAAiB,KAAK,SAAS,EAAE,CAAC;QACvD,MAAM,CAAC,UAAU,GAAG;YACnB,GAAG,MAAM,CAAC,UAAU;YACpB,iBAAiB,EAAE,KAAK,CAAC,UAAU,CAAC,iBAAiB;SACrD,CAAA;IACF,CAAC;IAED,kCAAkC;IAClC,IAAI,KAAK,CAAC,MAAM,KAAK,SAAS;QAAE,MAAM,CAAC,MAAM,GAAG,KAAK,CAAC,MAAM,CAAA;IAE5D,iCAAiC;IACjC,MAAM,CAAC,IAAI,GAAG,KAAK,CAAC,IAAI,CAAA;IAExB,OAAO,MAAM,CAAA;AACd,CAAC;AAED;;GAEG;AACH,MAAM,UAAU,gBAAgB,CAC/B,QAAgB,EAChB,OAAe,EACf,WAAmB;IAEnB,qCAAqC;IACrC,iEAAiE;IACjE,OAAO,WAAW,IAAI,IAAI,CAAC,GAAG,CAAC,QAAQ,EAAE,OAAO,CAAC,CAAA;AAClD,CAAC"}
|
|
@@ -0,0 +1,62 @@
|
|
|
1
|
+
import type { ExportEnvelope, Message } from '../schema/message.js';
|
|
2
|
+
export type IngestOptions = {
|
|
3
|
+
attachmentRoots: string[];
|
|
4
|
+
messageDate?: string;
|
|
5
|
+
};
|
|
6
|
+
export type CSVRow = {
|
|
7
|
+
[key: string]: string | undefined;
|
|
8
|
+
};
|
|
9
|
+
/**
|
|
10
|
+
* Main entry point: Ingest CSV file and convert to unified Message schema
|
|
11
|
+
*/
|
|
12
|
+
export declare function ingestCSV(csvFilePath: string, options: IngestOptions): Message[];
|
|
13
|
+
/**
|
|
14
|
+
* Parse a single CSV row and produce 1-N messages
|
|
15
|
+
* Maps iMazing CSV format to unified Message schema
|
|
16
|
+
*
|
|
17
|
+
* AC01: Parse iMazing CSV rows with correct field mapping per CSV header
|
|
18
|
+
*/
|
|
19
|
+
export declare function parseCSVRow(row: CSVRow, lineNumber: number, options: IngestOptions): Message[];
|
|
20
|
+
/**
|
|
21
|
+
* Convert CSV date to ISO 8601 UTC with Z suffix
|
|
22
|
+
* Input format: "YYYY-MM-DD HH:MM:SS" (space-separated)
|
|
23
|
+
*/
|
|
24
|
+
export declare function convertToISO8601(csvDate: string): string | null;
|
|
25
|
+
type AttachmentRecord = {
|
|
26
|
+
copied_path?: string;
|
|
27
|
+
filename?: string;
|
|
28
|
+
senderName?: string;
|
|
29
|
+
};
|
|
30
|
+
/**
|
|
31
|
+
* Resolve attachment path to absolute path when file exists
|
|
32
|
+
*/
|
|
33
|
+
export declare function resolveAttachmentPath(attachment: AttachmentRecord | null | undefined, options: IngestOptions & {
|
|
34
|
+
messageDate?: string;
|
|
35
|
+
}): string | null;
|
|
36
|
+
/**
|
|
37
|
+
* Infer media kind from MIME type
|
|
38
|
+
*/
|
|
39
|
+
export declare function inferMediaKind(mimeType: string): 'image' | 'audio' | 'video' | 'pdf' | 'unknown';
|
|
40
|
+
/**
|
|
41
|
+
* Format ISO 8601 date for attachment search pattern
|
|
42
|
+
* Converts: 2023-10-23T06:52:57.000Z → 2023-10-23 06 52 57
|
|
43
|
+
*/
|
|
44
|
+
export declare function formatDateForAttachmentSearch(isoDate: string): string;
|
|
45
|
+
/**
|
|
46
|
+
* Export envelope wrapper for CSV ingestion output
|
|
47
|
+
*/
|
|
48
|
+
export declare function createExportEnvelope(messages: Message[]): ExportEnvelope;
|
|
49
|
+
type ValidationError = {
|
|
50
|
+
index: number;
|
|
51
|
+
message: Message;
|
|
52
|
+
issues: unknown[];
|
|
53
|
+
};
|
|
54
|
+
/**
|
|
55
|
+
* Validate all messages pass schema validation
|
|
56
|
+
*/
|
|
57
|
+
export declare function validateMessages(messages: Message[]): {
|
|
58
|
+
valid: boolean;
|
|
59
|
+
errors: ValidationError[];
|
|
60
|
+
};
|
|
61
|
+
export {};
|
|
62
|
+
//# sourceMappingURL=ingest-csv.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"ingest-csv.d.ts","sourceRoot":"","sources":["../../src/ingest/ingest-csv.ts"],"names":[],"mappings":"AAKA,OAAO,KAAK,EAAE,cAAc,EAAE,OAAO,EAAE,MAAM,sBAAsB,CAAA;AAGnE,MAAM,MAAM,aAAa,GAAG;IAC3B,eAAe,EAAE,MAAM,EAAE,CAAA;IACzB,WAAW,CAAC,EAAE,MAAM,CAAA;CACpB,CAAA;AAED,MAAM,MAAM,MAAM,GAAG;IACpB,CAAC,GAAG,EAAE,MAAM,GAAG,MAAM,GAAG,SAAS,CAAA;CACjC,CAAA;AAED;;GAEG;AACH,wBAAgB,SAAS,CACxB,WAAW,EAAE,MAAM,EACnB,OAAO,EAAE,aAAa,GACpB,OAAO,EAAE,CAcX;AAED;;;;;GAKG;AACH,wBAAgB,WAAW,CAC1B,GAAG,EAAE,MAAM,EACX,UAAU,EAAE,MAAM,EAClB,OAAO,EAAE,aAAa,GACpB,OAAO,EAAE,CAoIX;AAED;;;GAGG;AACH,wBAAgB,gBAAgB,CAAC,OAAO,EAAE,MAAM,GAAG,MAAM,GAAG,IAAI,CA+B/D;AAED,KAAK,gBAAgB,GAAG;IACvB,WAAW,CAAC,EAAE,MAAM,CAAA;IACpB,QAAQ,CAAC,EAAE,MAAM,CAAA;IACjB,UAAU,CAAC,EAAE,MAAM,CAAA;CACnB,CAAA;AAED;;GAEG;AACH,wBAAgB,qBAAqB,CACpC,UAAU,EAAE,gBAAgB,GAAG,IAAI,GAAG,SAAS,EAC/C,OAAO,EAAE,aAAa,GAAG;IAAE,WAAW,CAAC,EAAE,MAAM,CAAA;CAAE,GAC/C,MAAM,GAAG,IAAI,CAsDf;AAED;;GAEG;AACH,wBAAgB,cAAc,CAC7B,QAAQ,EAAE,MAAM,GACd,OAAO,GAAG,OAAO,GAAG,OAAO,GAAG,KAAK,GAAG,SAAS,CASjD;AAED;;;GAGG;AACH,wBAAgB,6BAA6B,CAAC,OAAO,EAAE,MAAM,GAAG,MAAM,CAcrE;AAED;;GAEG;AACH,wBAAgB,oBAAoB,CAAC,QAAQ,EAAE,OAAO,EAAE,GAAG,cAAc,CAOxE;AAED,KAAK,eAAe,GAAG;IACtB,KAAK,EAAE,MAAM,CAAA;IACb,OAAO,EAAE,OAAO,CAAA;IAChB,MAAM,EAAE,OAAO,EAAE,CAAA;CACjB,CAAA;AAED;;GAEG;AACH,wBAAgB,gBAAgB,CAAC,QAAQ,EAAE,OAAO,EAAE,GAAG;IACtD,KAAK,EAAE,OAAO,CAAA;IACd,MAAM,EAAE,eAAe,EAAE,CAAA;CACzB,CAqBA"}
|