@chainlesschain/personal-data-hub 0.3.1 → 0.3.6
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/__tests__/adapters/email-adapter-snapshot.test.js +237 -0
- package/__tests__/adapters/email-adapter.test.js +1 -1
- package/__tests__/adapters/email-pdf-extractor.test.js +1 -1
- package/__tests__/adapters/email-retry-progress.test.js +1 -1
- package/__tests__/adapters/email-templates.test.js +1 -1
- package/__tests__/adapters/social-bilibili-adb-api-client.test.js +721 -0
- package/__tests__/adapters/social-bilibili-adb-chromium-cookies-reader.test.js +346 -0
- package/__tests__/adapters/social-bilibili-adb-collector.test.js +284 -0
- package/__tests__/adapters/social-bilibili-adb-cookies-extension.test.js +343 -0
- package/__tests__/adapters/social-bilibili-adb-snapshot-builder.test.js +296 -0
- package/__tests__/adapters/social-douyin-adb-collector.test.js +254 -0
- package/__tests__/adapters/social-douyin-adb-im-db-parser.test.js +304 -0
- package/__tests__/adapters/social-douyin-adb-snapshot-builder.test.js +216 -0
- package/__tests__/adapters/social-weibo-adb-api-client.test.js +362 -0
- package/__tests__/adapters/social-weibo-adb-collector.test.js +201 -0
- package/__tests__/adapters/social-weibo-adb-snapshot-builder.test.js +189 -0
- package/__tests__/adapters/social-xiaohongshu-adb-collector.test.js +207 -0
- package/__tests__/adapters/social-xiaohongshu-adb-sign.test.js +130 -0
- package/__tests__/adapters/system-data-android.test.js +32 -1
- package/__tests__/longtail-adapters.test.js +15 -2
- package/__tests__/shopping-adapters.test.js +96 -0
- package/__tests__/sign-providers.test.js +62 -0
- package/__tests__/travel-adapters.test.js +66 -0
- package/__tests__/whatsapp-adapter.test.js +5 -2
- package/lib/adapters/browser-history-chrome/chrome-db-reader.js +11 -1
- package/lib/adapters/email-imap/email-adapter.js +224 -17
- package/lib/adapters/messaging-telegram/index.js +15 -12
- package/lib/adapters/messaging-whatsapp/index.js +15 -12
- package/lib/adapters/shopping-taobao/index.js +161 -21
- package/lib/adapters/social-bilibili-adb/api-client.js +555 -0
- package/lib/adapters/social-bilibili-adb/chromium-cookies-reader.js +296 -0
- package/lib/adapters/social-bilibili-adb/collector.js +190 -0
- package/lib/adapters/social-bilibili-adb/cookies-extension.js +250 -0
- package/lib/adapters/social-bilibili-adb/index.js +51 -0
- package/lib/adapters/social-bilibili-adb/snapshot-builder.js +197 -0
- package/lib/adapters/social-douyin/index.js +4 -0
- package/lib/adapters/social-douyin-adb/collector.js +165 -0
- package/lib/adapters/social-douyin-adb/db-extension.js +281 -0
- package/lib/adapters/social-douyin-adb/im-db-parser.js +287 -0
- package/lib/adapters/social-douyin-adb/index.js +57 -0
- package/lib/adapters/social-douyin-adb/snapshot-builder.js +174 -0
- package/lib/adapters/social-weibo-adb/api-client.js +281 -0
- package/lib/adapters/social-weibo-adb/collector.js +169 -0
- package/lib/adapters/social-weibo-adb/cookies-extension.js +251 -0
- package/lib/adapters/social-weibo-adb/index.js +55 -0
- package/lib/adapters/social-weibo-adb/snapshot-builder.js +145 -0
- package/lib/adapters/social-xiaohongshu-adb/api-client.js +278 -0
- package/lib/adapters/social-xiaohongshu-adb/collector.js +158 -0
- package/lib/adapters/social-xiaohongshu-adb/cookies-extension.js +211 -0
- package/lib/adapters/social-xiaohongshu-adb/index.js +50 -0
- package/lib/adapters/social-xiaohongshu-adb/sign.js +90 -0
- package/lib/adapters/social-xiaohongshu-adb/snapshot-builder.js +126 -0
- package/lib/adapters/system-data-android/adapter.js +77 -3
- package/lib/adapters/travel-amap/index.js +16 -10
- package/lib/adapters/travel-ctrip/index.js +25 -9
- package/lib/adapters/vscode/vscode-reader.js +7 -1
- package/lib/sign-providers/index.js +20 -0
- package/lib/sign-providers/interface.js +82 -0
- package/lib/sign-providers/null-sign-provider.js +30 -0
- package/package.json +6 -1
|
@@ -15,6 +15,7 @@
|
|
|
15
15
|
|
|
16
16
|
"use strict";
|
|
17
17
|
|
|
18
|
+
const fs = require("node:fs");
|
|
18
19
|
const {
|
|
19
20
|
EVENT_SUBTYPES,
|
|
20
21
|
PERSON_SUBTYPES,
|
|
@@ -34,26 +35,45 @@ const { extractPdfText, passwordsFromHints } = require("./pdf-extractor");
|
|
|
34
35
|
const { extractTransactions } = require("./transactions");
|
|
35
36
|
|
|
36
37
|
const NAME = "email-imap";
|
|
37
|
-
const VERSION = "0.
|
|
38
|
+
const VERSION = "0.7.0"; // Phase 5.8 — snapshot mode for Android in-APK IMAP fetch
|
|
39
|
+
const SNAPSHOT_SCHEMA_VERSION = 1;
|
|
38
40
|
|
|
39
41
|
class EmailAdapter {
|
|
40
42
|
constructor(opts) {
|
|
41
43
|
if (!opts || typeof opts !== "object") {
|
|
42
44
|
throw new Error("EmailAdapter: opts required");
|
|
43
45
|
}
|
|
44
|
-
|
|
45
|
-
|
|
46
|
-
|
|
47
|
-
|
|
48
|
-
|
|
49
|
-
|
|
50
|
-
|
|
51
|
-
|
|
52
|
-
|
|
46
|
+
|
|
47
|
+
// Phase 5.8 — snapshot mode: Android EmailLocalCollector ships pre-fetched
|
|
48
|
+
// {records:[]} JSON via ccRunner.syncAdapter("email-imap", path). When
|
|
49
|
+
// snapshotMode=true: skip IMAP-account validation (no IMAP login needed)
|
|
50
|
+
// and switch authenticate/sync to the snapshot path. The single registered
|
|
51
|
+
// instance handles every Android vendor; each snapshot file carries its
|
|
52
|
+
// own vendor + user, no per-account constructor needed (mirror of
|
|
53
|
+
// travel-12306 / travel-baidu-map / social-bilibili snapshot mode).
|
|
54
|
+
this._snapshotMode = !!opts.snapshotMode;
|
|
55
|
+
|
|
56
|
+
if (!this._snapshotMode) {
|
|
57
|
+
const account = opts.account;
|
|
58
|
+
if (!account || typeof account !== "object") {
|
|
59
|
+
throw new Error("EmailAdapter: opts.account required");
|
|
60
|
+
}
|
|
61
|
+
if (typeof account.email !== "string" || !account.email.includes("@")) {
|
|
62
|
+
throw new Error("EmailAdapter: account.email must be a full address");
|
|
63
|
+
}
|
|
64
|
+
if (typeof account.authCode !== "string" || account.authCode.length === 0) {
|
|
65
|
+
throw new Error("EmailAdapter: account.authCode required (provider authorization code)");
|
|
66
|
+
}
|
|
67
|
+
this.account = account;
|
|
68
|
+
this._provider = resolveProvider(account);
|
|
69
|
+
} else {
|
|
70
|
+
// Snapshot-mode stub: account fields used by _envelopeToRawEvent/
|
|
71
|
+
// normalize fall back to "(snapshot)" placeholders. Real per-record
|
|
72
|
+
// vendor + user surface in the snapshot envelope payload instead.
|
|
73
|
+
this.account = opts.account || { email: "(snapshot)", authCode: "(snapshot)" };
|
|
74
|
+
this._provider = null;
|
|
53
75
|
}
|
|
54
76
|
|
|
55
|
-
this.account = account;
|
|
56
|
-
this._provider = resolveProvider(account);
|
|
57
77
|
this._sessionFactory = typeof opts.sessionFactory === "function"
|
|
58
78
|
? opts.sessionFactory
|
|
59
79
|
: (cfg) => new ImapSession(cfg);
|
|
@@ -129,16 +149,15 @@ class EmailAdapter {
|
|
|
129
149
|
this.name = NAME;
|
|
130
150
|
this.version = VERSION;
|
|
131
151
|
this.capabilities = [
|
|
132
|
-
"sync:imap",
|
|
133
|
-
"auth:authcode",
|
|
152
|
+
...(this._snapshotMode ? ["sync:snapshot"] : ["sync:imap"]),
|
|
153
|
+
...(this._snapshotMode ? [] : ["auth:authcode"]),
|
|
134
154
|
"parse:mime-body",
|
|
135
155
|
"parse:attachment-metadata",
|
|
136
156
|
"classify:layer1-rules",
|
|
137
157
|
...(this._llm ? ["classify:layer2-llm"] : []),
|
|
138
158
|
"extract:6-templates",
|
|
139
159
|
...(this._disablePdfExtraction ? [] : ["decrypt:pdf-bills"]),
|
|
140
|
-
"sync:retry-backoff",
|
|
141
|
-
"sync:progress-stream",
|
|
160
|
+
...(this._snapshotMode ? [] : ["sync:retry-backoff", "sync:progress-stream"]),
|
|
142
161
|
];
|
|
143
162
|
this.rateLimits = { perMinute: 60 };
|
|
144
163
|
this.dataDisclosure = {
|
|
@@ -159,7 +178,30 @@ class EmailAdapter {
|
|
|
159
178
|
};
|
|
160
179
|
}
|
|
161
180
|
|
|
162
|
-
async authenticate(
|
|
181
|
+
async authenticate(ctx = {}) {
|
|
182
|
+
// Phase 5.8 — snapshot mode authenticate: validate ctx.inputPath is
|
|
183
|
+
// readable; no IMAP login. Snapshot mode WITHOUT inputPath in ctx
|
|
184
|
+
// returns NO_INPUT (parallel to travel-12306 / travel-baidu-map shape).
|
|
185
|
+
if (this._snapshotMode || (ctx && typeof ctx.inputPath === "string" && ctx.inputPath.length > 0)) {
|
|
186
|
+
if (!ctx || typeof ctx.inputPath !== "string" || ctx.inputPath.length === 0) {
|
|
187
|
+
return {
|
|
188
|
+
ok: false,
|
|
189
|
+
reason: "NO_INPUT",
|
|
190
|
+
message: "email-imap (snapshot mode): ctx.inputPath required",
|
|
191
|
+
};
|
|
192
|
+
}
|
|
193
|
+
try {
|
|
194
|
+
fs.accessSync(ctx.inputPath, fs.constants.R_OK);
|
|
195
|
+
} catch (err) {
|
|
196
|
+
return {
|
|
197
|
+
ok: false,
|
|
198
|
+
reason: "INPUT_PATH_UNREADABLE",
|
|
199
|
+
message: `snapshot not readable at ${ctx.inputPath}: ${err.message}`,
|
|
200
|
+
};
|
|
201
|
+
}
|
|
202
|
+
return { ok: true, mode: "snapshot-file" };
|
|
203
|
+
}
|
|
204
|
+
|
|
163
205
|
const session = this._sessionFactory(this._sessionConfig());
|
|
164
206
|
try {
|
|
165
207
|
await session.connect();
|
|
@@ -184,6 +226,15 @@ class EmailAdapter {
|
|
|
184
226
|
}
|
|
185
227
|
|
|
186
228
|
async *sync(opts = {}) {
|
|
229
|
+
// Phase 5.8 — snapshot mode: bypass IMAP session entirely, read Android
|
|
230
|
+
// EmailLocalCollector's staging JSON, yield one raw event per record.
|
|
231
|
+
// Classification + extraction reused on envelope-only data (bodyPreview
|
|
232
|
+
// is the only text we get; PDF decryption skipped since attachment
|
|
233
|
+
// buffers never crossed the Android → desktop boundary).
|
|
234
|
+
if (this._snapshotMode || (typeof opts.inputPath === "string" && opts.inputPath.length > 0)) {
|
|
235
|
+
yield* this._syncViaSnapshot(opts);
|
|
236
|
+
return;
|
|
237
|
+
}
|
|
187
238
|
const folders = Array.isArray(opts.folders) && opts.folders.length > 0
|
|
188
239
|
? opts.folders
|
|
189
240
|
: this._provider.folders;
|
|
@@ -329,6 +380,143 @@ class EmailAdapter {
|
|
|
329
380
|
}
|
|
330
381
|
}
|
|
331
382
|
|
|
383
|
+
/**
|
|
384
|
+
* Phase 5.8 — snapshot path: read Android EmailLocalCollector's staging
|
|
385
|
+
* JSON, convert each record to an IMAP-shaped envelope, run classifier +
|
|
386
|
+
* extractor (no PDF — Android only ships bodyPreview), yield raw events.
|
|
387
|
+
*
|
|
388
|
+
* Expected snapshot shape (matches EmailLocalCollector.kt:135-156):
|
|
389
|
+
* {vendor, user, fetchedAt, records: [{
|
|
390
|
+
* messageNumber, subject, from, to, sentDateMs, bodyPreview,
|
|
391
|
+
* hasAttachments
|
|
392
|
+
* }]}
|
|
393
|
+
*
|
|
394
|
+
* Lossy compared to IMAP path:
|
|
395
|
+
* - No HTML body (Android Jakarta Mail only ships text/plain or
|
|
396
|
+
* stripped-html as bodyPreview, capped 8KB).
|
|
397
|
+
* - No attachment buffers → no PDF decryption / transaction extraction
|
|
398
|
+
* even for bill-template matches. `hasAttachments` boolean only.
|
|
399
|
+
* - No real Message-ID → originalId synthesized from
|
|
400
|
+
* `android-snapshot:<vendor>:<user>:<messageNumber>` (stable per device).
|
|
401
|
+
* - No flags / cc / size; UID = Android messageNumber (per-folder).
|
|
402
|
+
*/
|
|
403
|
+
async *_syncViaSnapshot(opts) {
|
|
404
|
+
const raw = fs.readFileSync(opts.inputPath, "utf-8");
|
|
405
|
+
let snapshot;
|
|
406
|
+
try {
|
|
407
|
+
snapshot = JSON.parse(raw);
|
|
408
|
+
} catch (err) {
|
|
409
|
+
throw new Error(
|
|
410
|
+
`email-imap.sync (snapshot): bad JSON at ${opts.inputPath}: ${err.message}`,
|
|
411
|
+
);
|
|
412
|
+
}
|
|
413
|
+
if (!snapshot || typeof snapshot !== "object") {
|
|
414
|
+
throw new Error(
|
|
415
|
+
`email-imap.sync (snapshot): expected object, got ${typeof snapshot}`,
|
|
416
|
+
);
|
|
417
|
+
}
|
|
418
|
+
if (!Array.isArray(snapshot.records)) {
|
|
419
|
+
throw new Error(
|
|
420
|
+
"email-imap.sync (snapshot): expected {records: [...]} shape (Android EmailLocalCollector writes this)",
|
|
421
|
+
);
|
|
422
|
+
}
|
|
423
|
+
const vendor = typeof snapshot.vendor === "string" ? snapshot.vendor : "unknown";
|
|
424
|
+
const user = typeof snapshot.user === "string" ? snapshot.user : "unknown@snapshot";
|
|
425
|
+
const fallbackCapturedAt =
|
|
426
|
+
Number.isFinite(snapshot.fetchedAt) && snapshot.fetchedAt > 0
|
|
427
|
+
? Math.floor(snapshot.fetchedAt)
|
|
428
|
+
: Date.now();
|
|
429
|
+
|
|
430
|
+
const limit = Number.isInteger(opts.limit) && opts.limit > 0 ? opts.limit : Infinity;
|
|
431
|
+
let emitted = 0;
|
|
432
|
+
|
|
433
|
+
for (const r of snapshot.records) {
|
|
434
|
+
if (emitted >= limit) return;
|
|
435
|
+
if (!r || typeof r !== "object") continue;
|
|
436
|
+
const env = this._androidRecordToEnvelope(r, vendor, user, fallbackCapturedAt);
|
|
437
|
+
// bodyPreview is the only text we have — wrap as a thin parsedBody so
|
|
438
|
+
// the classifier sees the same shape it does for IMAP-fetched mail.
|
|
439
|
+
const parsedBody = {
|
|
440
|
+
textBody: typeof r.bodyPreview === "string" ? r.bodyPreview : "",
|
|
441
|
+
htmlBody: "",
|
|
442
|
+
attachments: r.hasAttachments
|
|
443
|
+
? [{ filename: "(unknown)", contentType: "application/octet-stream", size: 0 }]
|
|
444
|
+
: [],
|
|
445
|
+
headers: {},
|
|
446
|
+
};
|
|
447
|
+
|
|
448
|
+
let classification = null;
|
|
449
|
+
if (!this._disableClassification) {
|
|
450
|
+
try {
|
|
451
|
+
classification = await this._classifier(
|
|
452
|
+
this._classifierInput(env, parsedBody),
|
|
453
|
+
{ llm: this._llm, minLayer1Confidence: this._minLayer1Confidence },
|
|
454
|
+
);
|
|
455
|
+
} catch (err) {
|
|
456
|
+
classification = {
|
|
457
|
+
category: CATEGORIES.OTHER,
|
|
458
|
+
confidence: 0,
|
|
459
|
+
layer: "error",
|
|
460
|
+
error: err && err.message ? err.message : String(err),
|
|
461
|
+
};
|
|
462
|
+
}
|
|
463
|
+
}
|
|
464
|
+
|
|
465
|
+
let extraction = null;
|
|
466
|
+
if (!this._disableExtraction) {
|
|
467
|
+
try {
|
|
468
|
+
extraction = await this._extractor(
|
|
469
|
+
this._classifierInput(env, parsedBody),
|
|
470
|
+
classification || { category: CATEGORIES.OTHER },
|
|
471
|
+
{ llm: this._llm },
|
|
472
|
+
);
|
|
473
|
+
} catch (err) {
|
|
474
|
+
extraction = {
|
|
475
|
+
template: null,
|
|
476
|
+
fields: null,
|
|
477
|
+
warnings: [`extractor threw: ${err && err.message ? err.message : err}`],
|
|
478
|
+
};
|
|
479
|
+
}
|
|
480
|
+
}
|
|
481
|
+
|
|
482
|
+
// PDF extraction intentionally skipped — attachment buffers never crossed
|
|
483
|
+
// the Android → desktop boundary. Bill-template extractions on snapshot
|
|
484
|
+
// records get extraction.fields but no transactions list.
|
|
485
|
+
yield this._envelopeToRawEvent(env, "INBOX", parsedBody, classification, extraction);
|
|
486
|
+
emitted += 1;
|
|
487
|
+
}
|
|
488
|
+
}
|
|
489
|
+
|
|
490
|
+
/**
|
|
491
|
+
* Convert Android EmailLocalCollector record → IMAP-shaped envelope.
|
|
492
|
+
* Address strings ("Name <addr@x>" / "addr@x" / "addr@x, addr2@y") parse
|
|
493
|
+
* into {address, name} objects matching mailparser's output. Multi-recipient
|
|
494
|
+
* `to` strings split on comma.
|
|
495
|
+
*/
|
|
496
|
+
_androidRecordToEnvelope(r, vendor, user, fallbackCapturedAt) {
|
|
497
|
+
const messageNumber = Number.isInteger(r.messageNumber) ? r.messageNumber : 0;
|
|
498
|
+
const sentDate = Number.isFinite(r.sentDateMs) && r.sentDateMs > 0
|
|
499
|
+
? new Date(r.sentDateMs)
|
|
500
|
+
: new Date(fallbackCapturedAt);
|
|
501
|
+
return {
|
|
502
|
+
uid: messageNumber,
|
|
503
|
+
messageId: `android-snapshot:${vendor}:${user}:${messageNumber}`,
|
|
504
|
+
folder: "INBOX",
|
|
505
|
+
subject: typeof r.subject === "string" ? r.subject : "(no subject)",
|
|
506
|
+
from: typeof r.from === "string" && r.from.length > 0
|
|
507
|
+
? [parseSnapshotAddress(r.from)]
|
|
508
|
+
: [],
|
|
509
|
+
to: typeof r.to === "string" && r.to.length > 0
|
|
510
|
+
? r.to.split(",").map((s) => parseSnapshotAddress(s.trim())).filter(Boolean)
|
|
511
|
+
: [],
|
|
512
|
+
cc: [],
|
|
513
|
+
flags: [],
|
|
514
|
+
size: 0,
|
|
515
|
+
internalDate: sentDate,
|
|
516
|
+
date: sentDate,
|
|
517
|
+
};
|
|
518
|
+
}
|
|
519
|
+
|
|
332
520
|
/**
|
|
333
521
|
* Phase 5.7: connect with retry on transient errors. Auth failures
|
|
334
522
|
* (AUTH_FAILED) and mailbox-not-found (MAILBOX_NOT_FOUND) bypass retry —
|
|
@@ -673,6 +861,25 @@ function formatAddr(a) {
|
|
|
673
861
|
return a.name ? `${a.name} <${a.address}>` : a.address;
|
|
674
862
|
}
|
|
675
863
|
|
|
864
|
+
/**
|
|
865
|
+
* Phase 5.8 — snapshot mode address parser. Android records ship address
|
|
866
|
+
* fields as strings like "Name <addr@x.com>" or "addr@x.com". Convert to
|
|
867
|
+
* mailparser-compatible {address, name} shape. Returns null for blank input.
|
|
868
|
+
*/
|
|
869
|
+
function parseSnapshotAddress(s) {
|
|
870
|
+
if (typeof s !== "string") return null;
|
|
871
|
+
const t = s.trim();
|
|
872
|
+
if (t.length === 0) return null;
|
|
873
|
+
// "Name <addr@x>" form
|
|
874
|
+
const m = t.match(/^(.*?)\s*<([^>]+)>\s*$/);
|
|
875
|
+
if (m) {
|
|
876
|
+
const name = m[1].trim().replace(/^["']|["']$/g, "");
|
|
877
|
+
return { address: m[2].trim(), name: name.length > 0 ? name : undefined };
|
|
878
|
+
}
|
|
879
|
+
// Bare address form
|
|
880
|
+
return { address: t, name: undefined };
|
|
881
|
+
}
|
|
882
|
+
|
|
676
883
|
function formatRecipients(list) {
|
|
677
884
|
if (!Array.isArray(list) || list.length === 0) return "?";
|
|
678
885
|
const head = list.slice(0, 3).map(formatAddr).join(", ");
|
|
@@ -18,20 +18,22 @@ const fs = require("node:fs");
|
|
|
18
18
|
const { newId } = require("../../ids");
|
|
19
19
|
|
|
20
20
|
const NAME = "messaging-telegram";
|
|
21
|
-
const VERSION = "0.
|
|
21
|
+
const VERSION = "0.6.0"; // 2026-05-25 — account.userId OPTIONAL + inputPath alias
|
|
22
22
|
|
|
23
23
|
class TelegramAdapter {
|
|
24
24
|
constructor(opts = {}) {
|
|
25
|
-
|
|
26
|
-
|
|
27
|
-
|
|
28
|
-
|
|
29
|
-
|
|
25
|
+
// 2026-05-25 — account.userId OPTIONAL (mirror Taobao/Ctrip dual-mode).
|
|
26
|
+
// sqlite-mode adapter still requires user to provide a decrypted
|
|
27
|
+
// cache4.db (Telegram cache db is unencrypted — easier than WeChat).
|
|
28
|
+
// Earlier strict ctor blocked auto-register at boot → silent "no adapter
|
|
29
|
+
// messaging-telegram" when Android collector ships extracted db.
|
|
30
|
+
this.account = opts.account || null;
|
|
31
|
+
this._dbPath = opts.dbPath || opts.inputPath || null;
|
|
30
32
|
this._dbDriverFactory = opts.dbDriverFactory || null;
|
|
31
33
|
|
|
32
34
|
this.name = NAME;
|
|
33
35
|
this.version = VERSION;
|
|
34
|
-
this.capabilities = ["sync:sqlite", "parse:telegram-messages"];
|
|
36
|
+
this.capabilities = ["sync:sqlite", "sync:snapshot", "parse:telegram-messages"];
|
|
35
37
|
this.extractMode = "device-pull";
|
|
36
38
|
this.rateLimits = {};
|
|
37
39
|
this.dataDisclosure = {
|
|
@@ -43,11 +45,12 @@ class TelegramAdapter {
|
|
|
43
45
|
};
|
|
44
46
|
}
|
|
45
47
|
|
|
46
|
-
async authenticate() {
|
|
47
|
-
|
|
48
|
-
|
|
48
|
+
async authenticate(ctx = {}) {
|
|
49
|
+
const dbPath = (ctx && (ctx.inputPath || ctx.dbPath)) || this._dbPath;
|
|
50
|
+
if (!dbPath || !fs.existsSync(dbPath)) {
|
|
51
|
+
return { ok: false, reason: "DB_NOT_PULLED", message: "needs ctx.inputPath / opts.dbPath pointing to extracted cache4.db" };
|
|
49
52
|
}
|
|
50
|
-
return { ok: true, account: this.account.userId };
|
|
53
|
+
return { ok: true, account: this.account ? this.account.userId : null, mode: "snapshot-file" };
|
|
51
54
|
}
|
|
52
55
|
|
|
53
56
|
async healthCheck() {
|
|
@@ -56,7 +59,7 @@ class TelegramAdapter {
|
|
|
56
59
|
}
|
|
57
60
|
|
|
58
61
|
async *sync(opts = {}) {
|
|
59
|
-
const dbPath = opts.dbPath || this._dbPath;
|
|
62
|
+
const dbPath = opts.inputPath || opts.dbPath || this._dbPath;
|
|
60
63
|
if (!dbPath || !fs.existsSync(dbPath)) return;
|
|
61
64
|
const Driver = this._dbDriverFactory
|
|
62
65
|
? this._dbDriverFactory()
|
|
@@ -22,21 +22,23 @@ const fs = require("node:fs");
|
|
|
22
22
|
const { newId } = require("../../ids");
|
|
23
23
|
|
|
24
24
|
const NAME = "messaging-whatsapp";
|
|
25
|
-
const VERSION = "0.
|
|
25
|
+
const VERSION = "0.6.0"; // 2026-05-25 — account.phone OPTIONAL + inputPath alias
|
|
26
26
|
|
|
27
27
|
class WhatsAppAdapter {
|
|
28
28
|
constructor(opts = {}) {
|
|
29
|
-
|
|
30
|
-
|
|
31
|
-
|
|
32
|
-
|
|
33
|
-
|
|
29
|
+
// 2026-05-25 — account.phone OPTIONAL (mirror Taobao/Ctrip/Telegram).
|
|
30
|
+
// sqlite-mode adapter still requires user to provide a decrypted
|
|
31
|
+
// msgstore.db (user pre-decrypts with WhatsApp Crypt key — out of band).
|
|
32
|
+
// Earlier strict ctor blocked auto-register at boot → silent "no adapter
|
|
33
|
+
// messaging-whatsapp" when Android collector ships extracted db.
|
|
34
|
+
this.account = opts.account || null;
|
|
35
|
+
this._dbPath = opts.dbPath || opts.inputPath || null;
|
|
34
36
|
this._keyProvider = opts.keyProvider || null;
|
|
35
37
|
this._dbDriverFactory = opts.dbDriverFactory || null;
|
|
36
38
|
|
|
37
39
|
this.name = NAME;
|
|
38
40
|
this.version = VERSION;
|
|
39
|
-
this.capabilities = ["sync:sqlite", "parse:whatsapp-messages"];
|
|
41
|
+
this.capabilities = ["sync:sqlite", "sync:snapshot", "parse:whatsapp-messages"];
|
|
40
42
|
this.extractMode = "device-pull";
|
|
41
43
|
this.rateLimits = {};
|
|
42
44
|
this.dataDisclosure = {
|
|
@@ -50,11 +52,12 @@ class WhatsAppAdapter {
|
|
|
50
52
|
};
|
|
51
53
|
}
|
|
52
54
|
|
|
53
|
-
async authenticate() {
|
|
54
|
-
|
|
55
|
-
|
|
55
|
+
async authenticate(ctx = {}) {
|
|
56
|
+
const dbPath = (ctx && (ctx.inputPath || ctx.dbPath)) || this._dbPath;
|
|
57
|
+
if (!dbPath || !fs.existsSync(dbPath)) {
|
|
58
|
+
return { ok: false, reason: "DB_NOT_PULLED", message: "needs ctx.inputPath / opts.dbPath pointing to decrypted msgstore.db" };
|
|
56
59
|
}
|
|
57
|
-
return { ok: true, account: this.account.phone };
|
|
60
|
+
return { ok: true, account: this.account ? this.account.phone : null, mode: "snapshot-file" };
|
|
58
61
|
}
|
|
59
62
|
|
|
60
63
|
async healthCheck() {
|
|
@@ -63,7 +66,7 @@ class WhatsAppAdapter {
|
|
|
63
66
|
}
|
|
64
67
|
|
|
65
68
|
async *sync(opts = {}) {
|
|
66
|
-
const dbPath = opts.dbPath || this._dbPath;
|
|
69
|
+
const dbPath = opts.inputPath || opts.dbPath || this._dbPath;
|
|
67
70
|
if (!dbPath || !fs.existsSync(dbPath)) return;
|
|
68
71
|
const Driver = this._dbDriverFactory
|
|
69
72
|
? this._dbDriverFactory()
|
|
@@ -18,28 +18,36 @@
|
|
|
18
18
|
|
|
19
19
|
"use strict";
|
|
20
20
|
|
|
21
|
+
const fs = require("node:fs");
|
|
21
22
|
const { normalizeOrderRecord, CookieAuth } = require("../shopping-base");
|
|
22
23
|
|
|
23
24
|
const NAME = "shopping-taobao";
|
|
24
|
-
const VERSION = "0.
|
|
25
|
+
const VERSION = "0.6.0"; // §2.4d snapshot mode for Android in-APK cc
|
|
26
|
+
const SNAPSHOT_SCHEMA_VERSION = 1;
|
|
27
|
+
|
|
28
|
+
const KIND_ORDER = "order";
|
|
29
|
+
const VALID_SNAPSHOT_KINDS = Object.freeze([KIND_ORDER]);
|
|
25
30
|
|
|
26
31
|
const TAOBAO_ORDERS_URL = "https://h5.m.taobao.com/mlapp/olist.html";
|
|
27
32
|
|
|
28
33
|
class TaobaoAdapter {
|
|
29
34
|
constructor(opts = {}) {
|
|
30
|
-
|
|
31
|
-
|
|
32
|
-
|
|
33
|
-
|
|
34
|
-
this.
|
|
35
|
-
|
|
36
|
-
|
|
37
|
-
|
|
35
|
+
// §2.4d v0.2 — account.userId OPTIONAL (mirror shopping-jd/meituan/pinduoduo
|
|
36
|
+
// dual-mode). Snapshot mode is stateless; cookie mode requires it; checked
|
|
37
|
+
// at sync time, not construction. Earlier strict ctor blocked auto-register
|
|
38
|
+
// at boot → user-driven HTML import worked but JSON snapshot path didn't.
|
|
39
|
+
this.account = opts.account || null;
|
|
40
|
+
this._cookieAuth = opts.account
|
|
41
|
+
? new CookieAuth({
|
|
42
|
+
platform: "taobao",
|
|
43
|
+
cookies: opts.account.cookies || "",
|
|
44
|
+
})
|
|
45
|
+
: null;
|
|
38
46
|
this._fetchFn = typeof opts.fetchFn === "function" ? opts.fetchFn : defaultFetch;
|
|
39
47
|
|
|
40
48
|
this.name = NAME;
|
|
41
49
|
this.version = VERSION;
|
|
42
|
-
this.capabilities = ["sync:cookie-api", "parse:taobao-orders"];
|
|
50
|
+
this.capabilities = ["sync:snapshot", "sync:cookie-api", "parse:taobao-orders"];
|
|
43
51
|
this.extractMode = "web-api";
|
|
44
52
|
this.rateLimits = { perMinute: 6, perDay: 200 }; // respect Taobao风控
|
|
45
53
|
this.dataDisclosure = {
|
|
@@ -48,23 +56,132 @@ class TaobaoAdapter {
|
|
|
48
56
|
],
|
|
49
57
|
sensitivity: "high",
|
|
50
58
|
legalGate: false,
|
|
59
|
+
defaultInclude: { order: true },
|
|
51
60
|
};
|
|
61
|
+
|
|
62
|
+
// _deps injection seam — vi.mock fs doesn't intercept inlined CJS require.
|
|
63
|
+
this._deps = { fs };
|
|
52
64
|
}
|
|
53
65
|
|
|
54
|
-
async authenticate() {
|
|
55
|
-
|
|
56
|
-
|
|
57
|
-
|
|
66
|
+
async authenticate(ctx = {}) {
|
|
67
|
+
if (ctx && typeof ctx.inputPath === "string" && ctx.inputPath.length > 0) {
|
|
68
|
+
try {
|
|
69
|
+
this._deps.fs.accessSync(ctx.inputPath, this._deps.fs.constants.R_OK);
|
|
70
|
+
} catch (err) {
|
|
71
|
+
return {
|
|
72
|
+
ok: false,
|
|
73
|
+
reason: "INPUT_PATH_UNREADABLE",
|
|
74
|
+
message: `snapshot not readable at ${ctx.inputPath}: ${err.message}`,
|
|
75
|
+
};
|
|
76
|
+
}
|
|
77
|
+
return { ok: true, mode: "snapshot-file" };
|
|
78
|
+
}
|
|
79
|
+
if (this._cookieAuth) {
|
|
80
|
+
const ok = await this._cookieAuth.validate();
|
|
81
|
+
if (!ok) return { ok: false, reason: "INVALID_COOKIE", error: "cookies missing or empty" };
|
|
82
|
+
if (!this.account || !this.account.userId) {
|
|
83
|
+
return { ok: false, reason: "NO_ACCOUNT_USERID", message: "cookie mode requires account.userId" };
|
|
84
|
+
}
|
|
85
|
+
return { ok: true, account: this.account.userId, mode: "cookie" };
|
|
86
|
+
}
|
|
87
|
+
return {
|
|
88
|
+
ok: false,
|
|
89
|
+
reason: "NO_INPUT",
|
|
90
|
+
message: "TaobaoAdapter.authenticate: needs opts.inputPath (snapshot mode) OR opts.account.cookies (cookie mode)",
|
|
91
|
+
};
|
|
58
92
|
}
|
|
59
93
|
|
|
60
94
|
async healthCheck() {
|
|
61
|
-
|
|
62
|
-
|
|
63
|
-
|
|
64
|
-
|
|
95
|
+
if (this._cookieAuth) {
|
|
96
|
+
const r = await this.authenticate();
|
|
97
|
+
return r.ok
|
|
98
|
+
? { ok: true, lastChecked: Date.now() }
|
|
99
|
+
: { ok: false, reason: r.reason, error: r.error };
|
|
100
|
+
}
|
|
101
|
+
return { ok: true, lastChecked: Date.now() };
|
|
65
102
|
}
|
|
66
103
|
|
|
67
104
|
async *sync(opts = {}) {
|
|
105
|
+
if (typeof opts.inputPath === "string" && opts.inputPath.length > 0) {
|
|
106
|
+
yield* this._syncViaSnapshot(opts);
|
|
107
|
+
return;
|
|
108
|
+
}
|
|
109
|
+
if (this._cookieAuth) {
|
|
110
|
+
yield* this._syncViaCookie(opts);
|
|
111
|
+
return;
|
|
112
|
+
}
|
|
113
|
+
throw new Error(
|
|
114
|
+
"TaobaoAdapter.sync: needs opts.inputPath (snapshot mode, Android in-APK cc) OR opts.account.cookies (cookie mode)",
|
|
115
|
+
);
|
|
116
|
+
}
|
|
117
|
+
|
|
118
|
+
async *_syncViaSnapshot(opts) {
|
|
119
|
+
const raw = this._deps.fs.readFileSync(opts.inputPath, "utf-8");
|
|
120
|
+
let snapshot;
|
|
121
|
+
try {
|
|
122
|
+
snapshot = JSON.parse(raw);
|
|
123
|
+
} catch (err) {
|
|
124
|
+
throw new Error(
|
|
125
|
+
`shopping-taobao.sync: snapshot must be JSON (v0.3 will add HTML parsing for SAF-exported pages). Got parse error: ${err.message}`,
|
|
126
|
+
);
|
|
127
|
+
}
|
|
128
|
+
if (
|
|
129
|
+
!snapshot ||
|
|
130
|
+
typeof snapshot !== "object" ||
|
|
131
|
+
snapshot.schemaVersion !== SNAPSHOT_SCHEMA_VERSION
|
|
132
|
+
) {
|
|
133
|
+
throw new Error(
|
|
134
|
+
`shopping-taobao.sync: snapshot schemaVersion mismatch (got ${snapshot && snapshot.schemaVersion}, expected ${SNAPSHOT_SCHEMA_VERSION})`,
|
|
135
|
+
);
|
|
136
|
+
}
|
|
137
|
+
const fallbackCapturedAt =
|
|
138
|
+
Number.isFinite(snapshot.snapshottedAt) && snapshot.snapshottedAt > 0
|
|
139
|
+
? Math.floor(snapshot.snapshottedAt)
|
|
140
|
+
: Date.now();
|
|
141
|
+
const account =
|
|
142
|
+
snapshot.account && typeof snapshot.account === "object"
|
|
143
|
+
? snapshot.account
|
|
144
|
+
: null;
|
|
145
|
+
const include = opts.include || {};
|
|
146
|
+
const limit =
|
|
147
|
+
Number.isInteger(opts.limit) && opts.limit > 0 ? opts.limit : Infinity;
|
|
148
|
+
|
|
149
|
+
const events = Array.isArray(snapshot.events) ? snapshot.events : [];
|
|
150
|
+
let emitted = 0;
|
|
151
|
+
for (const ev of events) {
|
|
152
|
+
if (emitted >= limit) return;
|
|
153
|
+
if (!ev || typeof ev !== "object") continue;
|
|
154
|
+
const kind = ev.kind;
|
|
155
|
+
if (!VALID_SNAPSHOT_KINDS.includes(kind)) continue;
|
|
156
|
+
if (include[kind] === false) continue;
|
|
157
|
+
|
|
158
|
+
const capturedAt =
|
|
159
|
+
parseTime(ev.capturedAt) ||
|
|
160
|
+
parseTime(ev.placedAt) ||
|
|
161
|
+
parseTime(ev.paidAt) ||
|
|
162
|
+
fallbackCapturedAt;
|
|
163
|
+
const id =
|
|
164
|
+
(typeof ev.id === "string" && ev.id.length > 0 && ev.id) ||
|
|
165
|
+
ev.orderId ||
|
|
166
|
+
null;
|
|
167
|
+
|
|
168
|
+
yield {
|
|
169
|
+
adapter: NAME,
|
|
170
|
+
kind,
|
|
171
|
+
originalId: stableOriginalId(kind, id),
|
|
172
|
+
capturedAt,
|
|
173
|
+
payload: { ...ev, account },
|
|
174
|
+
};
|
|
175
|
+
emitted += 1;
|
|
176
|
+
}
|
|
177
|
+
}
|
|
178
|
+
|
|
179
|
+
async *_syncViaCookie(opts = {}) {
|
|
180
|
+
if (!this.account || !this.account.userId) {
|
|
181
|
+
throw new Error(
|
|
182
|
+
"TaobaoAdapter._syncViaCookie: account.userId required (set via new TaobaoAdapter({ account: { userId } }))",
|
|
183
|
+
);
|
|
184
|
+
}
|
|
68
185
|
if (!(await this._cookieAuth.validate())) return;
|
|
69
186
|
const sinceMs = opts.sinceWatermark != null
|
|
70
187
|
? parseWatermarkMs(opts.sinceWatermark)
|
|
@@ -97,16 +214,39 @@ class TaobaoAdapter {
|
|
|
97
214
|
}
|
|
98
215
|
|
|
99
216
|
normalize(raw) {
|
|
100
|
-
if (!raw || !raw.payload
|
|
101
|
-
throw new Error("TaobaoAdapter.normalize: raw.payload
|
|
217
|
+
if (!raw || !raw.payload) {
|
|
218
|
+
throw new Error("TaobaoAdapter.normalize: raw.payload missing");
|
|
219
|
+
}
|
|
220
|
+
// Snapshot mode payload is the raw event spread + account; cookie mode
|
|
221
|
+
// wraps a normalized record under payload.record. Dispatch on shape.
|
|
222
|
+
if (raw.payload.record) {
|
|
223
|
+
return normalizeOrderRecord(raw.payload.record, {
|
|
224
|
+
adapterName: NAME,
|
|
225
|
+
adapterVersion: VERSION,
|
|
226
|
+
});
|
|
102
227
|
}
|
|
103
|
-
|
|
228
|
+
// Snapshot path: the Android collector ships records that already match
|
|
229
|
+
// the OrderRecord shape (vendorId/orderId/placedAt/...). Pass through.
|
|
230
|
+
return normalizeOrderRecord(raw.payload, {
|
|
104
231
|
adapterName: NAME,
|
|
105
232
|
adapterVersion: VERSION,
|
|
106
233
|
});
|
|
107
234
|
}
|
|
108
235
|
}
|
|
109
236
|
|
|
237
|
+
function parseTime(v) {
|
|
238
|
+
if (Number.isFinite(v) && v > 0) return v < 1e12 ? v * 1000 : v;
|
|
239
|
+
if (typeof v === "string") {
|
|
240
|
+
const t = Date.parse(v);
|
|
241
|
+
if (Number.isFinite(t)) return t;
|
|
242
|
+
}
|
|
243
|
+
return null;
|
|
244
|
+
}
|
|
245
|
+
|
|
246
|
+
function stableOriginalId(kind, id) {
|
|
247
|
+
return id ? `taobao:${kind}:${id}` : `taobao:${kind}:unknown-${Date.now()}`;
|
|
248
|
+
}
|
|
249
|
+
|
|
110
250
|
function orderToRecord(o) {
|
|
111
251
|
if (!o || typeof o !== "object") return null;
|
|
112
252
|
const orderId = o.bizOrderId || o.orderId || o.id;
|