@chainlesschain/personal-data-hub 0.3.1 → 0.3.6

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (60) hide show
  1. package/__tests__/adapters/email-adapter-snapshot.test.js +237 -0
  2. package/__tests__/adapters/email-adapter.test.js +1 -1
  3. package/__tests__/adapters/email-pdf-extractor.test.js +1 -1
  4. package/__tests__/adapters/email-retry-progress.test.js +1 -1
  5. package/__tests__/adapters/email-templates.test.js +1 -1
  6. package/__tests__/adapters/social-bilibili-adb-api-client.test.js +721 -0
  7. package/__tests__/adapters/social-bilibili-adb-chromium-cookies-reader.test.js +346 -0
  8. package/__tests__/adapters/social-bilibili-adb-collector.test.js +284 -0
  9. package/__tests__/adapters/social-bilibili-adb-cookies-extension.test.js +343 -0
  10. package/__tests__/adapters/social-bilibili-adb-snapshot-builder.test.js +296 -0
  11. package/__tests__/adapters/social-douyin-adb-collector.test.js +254 -0
  12. package/__tests__/adapters/social-douyin-adb-im-db-parser.test.js +304 -0
  13. package/__tests__/adapters/social-douyin-adb-snapshot-builder.test.js +216 -0
  14. package/__tests__/adapters/social-weibo-adb-api-client.test.js +362 -0
  15. package/__tests__/adapters/social-weibo-adb-collector.test.js +201 -0
  16. package/__tests__/adapters/social-weibo-adb-snapshot-builder.test.js +189 -0
  17. package/__tests__/adapters/social-xiaohongshu-adb-collector.test.js +207 -0
  18. package/__tests__/adapters/social-xiaohongshu-adb-sign.test.js +130 -0
  19. package/__tests__/adapters/system-data-android.test.js +32 -1
  20. package/__tests__/longtail-adapters.test.js +15 -2
  21. package/__tests__/shopping-adapters.test.js +96 -0
  22. package/__tests__/sign-providers.test.js +62 -0
  23. package/__tests__/travel-adapters.test.js +66 -0
  24. package/__tests__/whatsapp-adapter.test.js +5 -2
  25. package/lib/adapters/browser-history-chrome/chrome-db-reader.js +11 -1
  26. package/lib/adapters/email-imap/email-adapter.js +224 -17
  27. package/lib/adapters/messaging-telegram/index.js +15 -12
  28. package/lib/adapters/messaging-whatsapp/index.js +15 -12
  29. package/lib/adapters/shopping-taobao/index.js +161 -21
  30. package/lib/adapters/social-bilibili-adb/api-client.js +555 -0
  31. package/lib/adapters/social-bilibili-adb/chromium-cookies-reader.js +296 -0
  32. package/lib/adapters/social-bilibili-adb/collector.js +190 -0
  33. package/lib/adapters/social-bilibili-adb/cookies-extension.js +250 -0
  34. package/lib/adapters/social-bilibili-adb/index.js +51 -0
  35. package/lib/adapters/social-bilibili-adb/snapshot-builder.js +197 -0
  36. package/lib/adapters/social-douyin/index.js +4 -0
  37. package/lib/adapters/social-douyin-adb/collector.js +165 -0
  38. package/lib/adapters/social-douyin-adb/db-extension.js +281 -0
  39. package/lib/adapters/social-douyin-adb/im-db-parser.js +287 -0
  40. package/lib/adapters/social-douyin-adb/index.js +57 -0
  41. package/lib/adapters/social-douyin-adb/snapshot-builder.js +174 -0
  42. package/lib/adapters/social-weibo-adb/api-client.js +281 -0
  43. package/lib/adapters/social-weibo-adb/collector.js +169 -0
  44. package/lib/adapters/social-weibo-adb/cookies-extension.js +251 -0
  45. package/lib/adapters/social-weibo-adb/index.js +55 -0
  46. package/lib/adapters/social-weibo-adb/snapshot-builder.js +145 -0
  47. package/lib/adapters/social-xiaohongshu-adb/api-client.js +278 -0
  48. package/lib/adapters/social-xiaohongshu-adb/collector.js +158 -0
  49. package/lib/adapters/social-xiaohongshu-adb/cookies-extension.js +211 -0
  50. package/lib/adapters/social-xiaohongshu-adb/index.js +50 -0
  51. package/lib/adapters/social-xiaohongshu-adb/sign.js +90 -0
  52. package/lib/adapters/social-xiaohongshu-adb/snapshot-builder.js +126 -0
  53. package/lib/adapters/system-data-android/adapter.js +77 -3
  54. package/lib/adapters/travel-amap/index.js +16 -10
  55. package/lib/adapters/travel-ctrip/index.js +25 -9
  56. package/lib/adapters/vscode/vscode-reader.js +7 -1
  57. package/lib/sign-providers/index.js +20 -0
  58. package/lib/sign-providers/interface.js +82 -0
  59. package/lib/sign-providers/null-sign-provider.js +30 -0
  60. package/package.json +6 -1
@@ -15,6 +15,7 @@
15
15
 
16
16
  "use strict";
17
17
 
18
+ const fs = require("node:fs");
18
19
  const {
19
20
  EVENT_SUBTYPES,
20
21
  PERSON_SUBTYPES,
@@ -34,26 +35,45 @@ const { extractPdfText, passwordsFromHints } = require("./pdf-extractor");
34
35
  const { extractTransactions } = require("./transactions");
35
36
 
36
37
  const NAME = "email-imap";
37
- const VERSION = "0.6.0"; // Phase 5.7retry-with-backoff + progress streaming
38
+ const VERSION = "0.7.0"; // Phase 5.8snapshot mode for Android in-APK IMAP fetch
39
+ const SNAPSHOT_SCHEMA_VERSION = 1;
38
40
 
39
41
  class EmailAdapter {
40
42
  constructor(opts) {
41
43
  if (!opts || typeof opts !== "object") {
42
44
  throw new Error("EmailAdapter: opts required");
43
45
  }
44
- const account = opts.account;
45
- if (!account || typeof account !== "object") {
46
- throw new Error("EmailAdapter: opts.account required");
47
- }
48
- if (typeof account.email !== "string" || !account.email.includes("@")) {
49
- throw new Error("EmailAdapter: account.email must be a full address");
50
- }
51
- if (typeof account.authCode !== "string" || account.authCode.length === 0) {
52
- throw new Error("EmailAdapter: account.authCode required (provider authorization code)");
46
+
47
+ // Phase 5.8 snapshot mode: Android EmailLocalCollector ships pre-fetched
48
+ // {records:[]} JSON via ccRunner.syncAdapter("email-imap", path). When
49
+ // snapshotMode=true: skip IMAP-account validation (no IMAP login needed)
50
+ // and switch authenticate/sync to the snapshot path. The single registered
51
+ // instance handles every Android vendor; each snapshot file carries its
52
+ // own vendor + user, no per-account constructor needed (mirror of
53
+ // travel-12306 / travel-baidu-map / social-bilibili snapshot mode).
54
+ this._snapshotMode = !!opts.snapshotMode;
55
+
56
+ if (!this._snapshotMode) {
57
+ const account = opts.account;
58
+ if (!account || typeof account !== "object") {
59
+ throw new Error("EmailAdapter: opts.account required");
60
+ }
61
+ if (typeof account.email !== "string" || !account.email.includes("@")) {
62
+ throw new Error("EmailAdapter: account.email must be a full address");
63
+ }
64
+ if (typeof account.authCode !== "string" || account.authCode.length === 0) {
65
+ throw new Error("EmailAdapter: account.authCode required (provider authorization code)");
66
+ }
67
+ this.account = account;
68
+ this._provider = resolveProvider(account);
69
+ } else {
70
+ // Snapshot-mode stub: account fields used by _envelopeToRawEvent/
71
+ // normalize fall back to "(snapshot)" placeholders. Real per-record
72
+ // vendor + user surface in the snapshot envelope payload instead.
73
+ this.account = opts.account || { email: "(snapshot)", authCode: "(snapshot)" };
74
+ this._provider = null;
53
75
  }
54
76
 
55
- this.account = account;
56
- this._provider = resolveProvider(account);
57
77
  this._sessionFactory = typeof opts.sessionFactory === "function"
58
78
  ? opts.sessionFactory
59
79
  : (cfg) => new ImapSession(cfg);
@@ -129,16 +149,15 @@ class EmailAdapter {
129
149
  this.name = NAME;
130
150
  this.version = VERSION;
131
151
  this.capabilities = [
132
- "sync:imap",
133
- "auth:authcode",
152
+ ...(this._snapshotMode ? ["sync:snapshot"] : ["sync:imap"]),
153
+ ...(this._snapshotMode ? [] : ["auth:authcode"]),
134
154
  "parse:mime-body",
135
155
  "parse:attachment-metadata",
136
156
  "classify:layer1-rules",
137
157
  ...(this._llm ? ["classify:layer2-llm"] : []),
138
158
  "extract:6-templates",
139
159
  ...(this._disablePdfExtraction ? [] : ["decrypt:pdf-bills"]),
140
- "sync:retry-backoff",
141
- "sync:progress-stream",
160
+ ...(this._snapshotMode ? [] : ["sync:retry-backoff", "sync:progress-stream"]),
142
161
  ];
143
162
  this.rateLimits = { perMinute: 60 };
144
163
  this.dataDisclosure = {
@@ -159,7 +178,30 @@ class EmailAdapter {
159
178
  };
160
179
  }
161
180
 
162
- async authenticate(_ctx = {}) {
181
+ async authenticate(ctx = {}) {
182
+ // Phase 5.8 — snapshot mode authenticate: validate ctx.inputPath is
183
+ // readable; no IMAP login. Snapshot mode WITHOUT inputPath in ctx
184
+ // returns NO_INPUT (parallel to travel-12306 / travel-baidu-map shape).
185
+ if (this._snapshotMode || (ctx && typeof ctx.inputPath === "string" && ctx.inputPath.length > 0)) {
186
+ if (!ctx || typeof ctx.inputPath !== "string" || ctx.inputPath.length === 0) {
187
+ return {
188
+ ok: false,
189
+ reason: "NO_INPUT",
190
+ message: "email-imap (snapshot mode): ctx.inputPath required",
191
+ };
192
+ }
193
+ try {
194
+ fs.accessSync(ctx.inputPath, fs.constants.R_OK);
195
+ } catch (err) {
196
+ return {
197
+ ok: false,
198
+ reason: "INPUT_PATH_UNREADABLE",
199
+ message: `snapshot not readable at ${ctx.inputPath}: ${err.message}`,
200
+ };
201
+ }
202
+ return { ok: true, mode: "snapshot-file" };
203
+ }
204
+
163
205
  const session = this._sessionFactory(this._sessionConfig());
164
206
  try {
165
207
  await session.connect();
@@ -184,6 +226,15 @@ class EmailAdapter {
184
226
  }
185
227
 
186
228
  async *sync(opts = {}) {
229
+ // Phase 5.8 — snapshot mode: bypass IMAP session entirely, read Android
230
+ // EmailLocalCollector's staging JSON, yield one raw event per record.
231
+ // Classification + extraction reused on envelope-only data (bodyPreview
232
+ // is the only text we get; PDF decryption skipped since attachment
233
+ // buffers never crossed the Android → desktop boundary).
234
+ if (this._snapshotMode || (typeof opts.inputPath === "string" && opts.inputPath.length > 0)) {
235
+ yield* this._syncViaSnapshot(opts);
236
+ return;
237
+ }
187
238
  const folders = Array.isArray(opts.folders) && opts.folders.length > 0
188
239
  ? opts.folders
189
240
  : this._provider.folders;
@@ -329,6 +380,143 @@ class EmailAdapter {
329
380
  }
330
381
  }
331
382
 
383
+ /**
384
+ * Phase 5.8 — snapshot path: read Android EmailLocalCollector's staging
385
+ * JSON, convert each record to an IMAP-shaped envelope, run classifier +
386
+ * extractor (no PDF — Android only ships bodyPreview), yield raw events.
387
+ *
388
+ * Expected snapshot shape (matches EmailLocalCollector.kt:135-156):
389
+ * {vendor, user, fetchedAt, records: [{
390
+ * messageNumber, subject, from, to, sentDateMs, bodyPreview,
391
+ * hasAttachments
392
+ * }]}
393
+ *
394
+ * Lossy compared to IMAP path:
395
+ * - No HTML body (Android Jakarta Mail only ships text/plain or
396
+ * stripped-html as bodyPreview, capped 8KB).
397
+ * - No attachment buffers → no PDF decryption / transaction extraction
398
+ * even for bill-template matches. `hasAttachments` boolean only.
399
+ * - No real Message-ID → originalId synthesized from
400
+ * `android-snapshot:<vendor>:<user>:<messageNumber>` (stable per device).
401
+ * - No flags / cc / size; UID = Android messageNumber (per-folder).
402
+ */
403
+ async *_syncViaSnapshot(opts) {
404
+ const raw = fs.readFileSync(opts.inputPath, "utf-8");
405
+ let snapshot;
406
+ try {
407
+ snapshot = JSON.parse(raw);
408
+ } catch (err) {
409
+ throw new Error(
410
+ `email-imap.sync (snapshot): bad JSON at ${opts.inputPath}: ${err.message}`,
411
+ );
412
+ }
413
+ if (!snapshot || typeof snapshot !== "object") {
414
+ throw new Error(
415
+ `email-imap.sync (snapshot): expected object, got ${typeof snapshot}`,
416
+ );
417
+ }
418
+ if (!Array.isArray(snapshot.records)) {
419
+ throw new Error(
420
+ "email-imap.sync (snapshot): expected {records: [...]} shape (Android EmailLocalCollector writes this)",
421
+ );
422
+ }
423
+ const vendor = typeof snapshot.vendor === "string" ? snapshot.vendor : "unknown";
424
+ const user = typeof snapshot.user === "string" ? snapshot.user : "unknown@snapshot";
425
+ const fallbackCapturedAt =
426
+ Number.isFinite(snapshot.fetchedAt) && snapshot.fetchedAt > 0
427
+ ? Math.floor(snapshot.fetchedAt)
428
+ : Date.now();
429
+
430
+ const limit = Number.isInteger(opts.limit) && opts.limit > 0 ? opts.limit : Infinity;
431
+ let emitted = 0;
432
+
433
+ for (const r of snapshot.records) {
434
+ if (emitted >= limit) return;
435
+ if (!r || typeof r !== "object") continue;
436
+ const env = this._androidRecordToEnvelope(r, vendor, user, fallbackCapturedAt);
437
+ // bodyPreview is the only text we have — wrap as a thin parsedBody so
438
+ // the classifier sees the same shape it does for IMAP-fetched mail.
439
+ const parsedBody = {
440
+ textBody: typeof r.bodyPreview === "string" ? r.bodyPreview : "",
441
+ htmlBody: "",
442
+ attachments: r.hasAttachments
443
+ ? [{ filename: "(unknown)", contentType: "application/octet-stream", size: 0 }]
444
+ : [],
445
+ headers: {},
446
+ };
447
+
448
+ let classification = null;
449
+ if (!this._disableClassification) {
450
+ try {
451
+ classification = await this._classifier(
452
+ this._classifierInput(env, parsedBody),
453
+ { llm: this._llm, minLayer1Confidence: this._minLayer1Confidence },
454
+ );
455
+ } catch (err) {
456
+ classification = {
457
+ category: CATEGORIES.OTHER,
458
+ confidence: 0,
459
+ layer: "error",
460
+ error: err && err.message ? err.message : String(err),
461
+ };
462
+ }
463
+ }
464
+
465
+ let extraction = null;
466
+ if (!this._disableExtraction) {
467
+ try {
468
+ extraction = await this._extractor(
469
+ this._classifierInput(env, parsedBody),
470
+ classification || { category: CATEGORIES.OTHER },
471
+ { llm: this._llm },
472
+ );
473
+ } catch (err) {
474
+ extraction = {
475
+ template: null,
476
+ fields: null,
477
+ warnings: [`extractor threw: ${err && err.message ? err.message : err}`],
478
+ };
479
+ }
480
+ }
481
+
482
+ // PDF extraction intentionally skipped — attachment buffers never crossed
483
+ // the Android → desktop boundary. Bill-template extractions on snapshot
484
+ // records get extraction.fields but no transactions list.
485
+ yield this._envelopeToRawEvent(env, "INBOX", parsedBody, classification, extraction);
486
+ emitted += 1;
487
+ }
488
+ }
489
+
490
+ /**
491
+ * Convert Android EmailLocalCollector record → IMAP-shaped envelope.
492
+ * Address strings ("Name <addr@x>" / "addr@x" / "addr@x, addr2@y") parse
493
+ * into {address, name} objects matching mailparser's output. Multi-recipient
494
+ * `to` strings split on comma.
495
+ */
496
+ _androidRecordToEnvelope(r, vendor, user, fallbackCapturedAt) {
497
+ const messageNumber = Number.isInteger(r.messageNumber) ? r.messageNumber : 0;
498
+ const sentDate = Number.isFinite(r.sentDateMs) && r.sentDateMs > 0
499
+ ? new Date(r.sentDateMs)
500
+ : new Date(fallbackCapturedAt);
501
+ return {
502
+ uid: messageNumber,
503
+ messageId: `android-snapshot:${vendor}:${user}:${messageNumber}`,
504
+ folder: "INBOX",
505
+ subject: typeof r.subject === "string" ? r.subject : "(no subject)",
506
+ from: typeof r.from === "string" && r.from.length > 0
507
+ ? [parseSnapshotAddress(r.from)]
508
+ : [],
509
+ to: typeof r.to === "string" && r.to.length > 0
510
+ ? r.to.split(",").map((s) => parseSnapshotAddress(s.trim())).filter(Boolean)
511
+ : [],
512
+ cc: [],
513
+ flags: [],
514
+ size: 0,
515
+ internalDate: sentDate,
516
+ date: sentDate,
517
+ };
518
+ }
519
+
332
520
  /**
333
521
  * Phase 5.7: connect with retry on transient errors. Auth failures
334
522
  * (AUTH_FAILED) and mailbox-not-found (MAILBOX_NOT_FOUND) bypass retry —
@@ -673,6 +861,25 @@ function formatAddr(a) {
673
861
  return a.name ? `${a.name} <${a.address}>` : a.address;
674
862
  }
675
863
 
864
+ /**
865
+ * Phase 5.8 — snapshot mode address parser. Android records ship address
866
+ * fields as strings like "Name <addr@x.com>" or "addr@x.com". Convert to
867
+ * mailparser-compatible {address, name} shape. Returns null for blank input.
868
+ */
869
+ function parseSnapshotAddress(s) {
870
+ if (typeof s !== "string") return null;
871
+ const t = s.trim();
872
+ if (t.length === 0) return null;
873
+ // "Name <addr@x>" form
874
+ const m = t.match(/^(.*?)\s*<([^>]+)>\s*$/);
875
+ if (m) {
876
+ const name = m[1].trim().replace(/^["']|["']$/g, "");
877
+ return { address: m[2].trim(), name: name.length > 0 ? name : undefined };
878
+ }
879
+ // Bare address form
880
+ return { address: t, name: undefined };
881
+ }
882
+
676
883
  function formatRecipients(list) {
677
884
  if (!Array.isArray(list) || list.length === 0) return "?";
678
885
  const head = list.slice(0, 3).map(formatAddr).join(", ");
@@ -18,20 +18,22 @@ const fs = require("node:fs");
18
18
  const { newId } = require("../../ids");
19
19
 
20
20
  const NAME = "messaging-telegram";
21
- const VERSION = "0.5.0";
21
+ const VERSION = "0.6.0"; // 2026-05-25 — account.userId OPTIONAL + inputPath alias
22
22
 
23
23
  class TelegramAdapter {
24
24
  constructor(opts = {}) {
25
- if (!opts.account || !opts.account.userId) {
26
- throw new Error("TelegramAdapter: opts.account.userId required");
27
- }
28
- this.account = opts.account;
29
- this._dbPath = opts.dbPath || null;
25
+ // 2026-05-25 account.userId OPTIONAL (mirror Taobao/Ctrip dual-mode).
26
+ // sqlite-mode adapter still requires user to provide a decrypted
27
+ // cache4.db (Telegram cache db is unencrypted — easier than WeChat).
28
+ // Earlier strict ctor blocked auto-register at boot → silent "no adapter
29
+ // messaging-telegram" when Android collector ships extracted db.
30
+ this.account = opts.account || null;
31
+ this._dbPath = opts.dbPath || opts.inputPath || null;
30
32
  this._dbDriverFactory = opts.dbDriverFactory || null;
31
33
 
32
34
  this.name = NAME;
33
35
  this.version = VERSION;
34
- this.capabilities = ["sync:sqlite", "parse:telegram-messages"];
36
+ this.capabilities = ["sync:sqlite", "sync:snapshot", "parse:telegram-messages"];
35
37
  this.extractMode = "device-pull";
36
38
  this.rateLimits = {};
37
39
  this.dataDisclosure = {
@@ -43,11 +45,12 @@ class TelegramAdapter {
43
45
  };
44
46
  }
45
47
 
46
- async authenticate() {
47
- if (!this._dbPath || !fs.existsSync(this._dbPath)) {
48
- return { ok: false, reason: "DB_NOT_PULLED" };
48
+ async authenticate(ctx = {}) {
49
+ const dbPath = (ctx && (ctx.inputPath || ctx.dbPath)) || this._dbPath;
50
+ if (!dbPath || !fs.existsSync(dbPath)) {
51
+ return { ok: false, reason: "DB_NOT_PULLED", message: "needs ctx.inputPath / opts.dbPath pointing to extracted cache4.db" };
49
52
  }
50
- return { ok: true, account: this.account.userId };
53
+ return { ok: true, account: this.account ? this.account.userId : null, mode: "snapshot-file" };
51
54
  }
52
55
 
53
56
  async healthCheck() {
@@ -56,7 +59,7 @@ class TelegramAdapter {
56
59
  }
57
60
 
58
61
  async *sync(opts = {}) {
59
- const dbPath = opts.dbPath || this._dbPath;
62
+ const dbPath = opts.inputPath || opts.dbPath || this._dbPath;
60
63
  if (!dbPath || !fs.existsSync(dbPath)) return;
61
64
  const Driver = this._dbDriverFactory
62
65
  ? this._dbDriverFactory()
@@ -22,21 +22,23 @@ const fs = require("node:fs");
22
22
  const { newId } = require("../../ids");
23
23
 
24
24
  const NAME = "messaging-whatsapp";
25
- const VERSION = "0.5.0";
25
+ const VERSION = "0.6.0"; // 2026-05-25 — account.phone OPTIONAL + inputPath alias
26
26
 
27
27
  class WhatsAppAdapter {
28
28
  constructor(opts = {}) {
29
- if (!opts.account || !opts.account.phone) {
30
- throw new Error("WhatsAppAdapter: opts.account.phone required");
31
- }
32
- this.account = opts.account;
33
- this._dbPath = opts.dbPath || null;
29
+ // 2026-05-25 account.phone OPTIONAL (mirror Taobao/Ctrip/Telegram).
30
+ // sqlite-mode adapter still requires user to provide a decrypted
31
+ // msgstore.db (user pre-decrypts with WhatsApp Crypt key — out of band).
32
+ // Earlier strict ctor blocked auto-register at boot → silent "no adapter
33
+ // messaging-whatsapp" when Android collector ships extracted db.
34
+ this.account = opts.account || null;
35
+ this._dbPath = opts.dbPath || opts.inputPath || null;
34
36
  this._keyProvider = opts.keyProvider || null;
35
37
  this._dbDriverFactory = opts.dbDriverFactory || null;
36
38
 
37
39
  this.name = NAME;
38
40
  this.version = VERSION;
39
- this.capabilities = ["sync:sqlite", "parse:whatsapp-messages"];
41
+ this.capabilities = ["sync:sqlite", "sync:snapshot", "parse:whatsapp-messages"];
40
42
  this.extractMode = "device-pull";
41
43
  this.rateLimits = {};
42
44
  this.dataDisclosure = {
@@ -50,11 +52,12 @@ class WhatsAppAdapter {
50
52
  };
51
53
  }
52
54
 
53
- async authenticate() {
54
- if (!this._dbPath || !fs.existsSync(this._dbPath)) {
55
- return { ok: false, reason: "DB_NOT_PULLED" };
55
+ async authenticate(ctx = {}) {
56
+ const dbPath = (ctx && (ctx.inputPath || ctx.dbPath)) || this._dbPath;
57
+ if (!dbPath || !fs.existsSync(dbPath)) {
58
+ return { ok: false, reason: "DB_NOT_PULLED", message: "needs ctx.inputPath / opts.dbPath pointing to decrypted msgstore.db" };
56
59
  }
57
- return { ok: true, account: this.account.phone };
60
+ return { ok: true, account: this.account ? this.account.phone : null, mode: "snapshot-file" };
58
61
  }
59
62
 
60
63
  async healthCheck() {
@@ -63,7 +66,7 @@ class WhatsAppAdapter {
63
66
  }
64
67
 
65
68
  async *sync(opts = {}) {
66
- const dbPath = opts.dbPath || this._dbPath;
69
+ const dbPath = opts.inputPath || opts.dbPath || this._dbPath;
67
70
  if (!dbPath || !fs.existsSync(dbPath)) return;
68
71
  const Driver = this._dbDriverFactory
69
72
  ? this._dbDriverFactory()
@@ -18,28 +18,36 @@
18
18
 
19
19
  "use strict";
20
20
 
21
+ const fs = require("node:fs");
21
22
  const { normalizeOrderRecord, CookieAuth } = require("../shopping-base");
22
23
 
23
24
  const NAME = "shopping-taobao";
24
- const VERSION = "0.5.0";
25
+ const VERSION = "0.6.0"; // §2.4d snapshot mode for Android in-APK cc
26
+ const SNAPSHOT_SCHEMA_VERSION = 1;
27
+
28
+ const KIND_ORDER = "order";
29
+ const VALID_SNAPSHOT_KINDS = Object.freeze([KIND_ORDER]);
25
30
 
26
31
  const TAOBAO_ORDERS_URL = "https://h5.m.taobao.com/mlapp/olist.html";
27
32
 
28
33
  class TaobaoAdapter {
29
34
  constructor(opts = {}) {
30
- if (!opts.account || !opts.account.userId) {
31
- throw new Error("TaobaoAdapter: opts.account.userId required");
32
- }
33
- this.account = opts.account;
34
- this._cookieAuth = new CookieAuth({
35
- platform: "taobao",
36
- cookies: opts.account.cookies || "",
37
- });
35
+ // §2.4d v0.2 account.userId OPTIONAL (mirror shopping-jd/meituan/pinduoduo
36
+ // dual-mode). Snapshot mode is stateless; cookie mode requires it; checked
37
+ // at sync time, not construction. Earlier strict ctor blocked auto-register
38
+ // at boot → user-driven HTML import worked but JSON snapshot path didn't.
39
+ this.account = opts.account || null;
40
+ this._cookieAuth = opts.account
41
+ ? new CookieAuth({
42
+ platform: "taobao",
43
+ cookies: opts.account.cookies || "",
44
+ })
45
+ : null;
38
46
  this._fetchFn = typeof opts.fetchFn === "function" ? opts.fetchFn : defaultFetch;
39
47
 
40
48
  this.name = NAME;
41
49
  this.version = VERSION;
42
- this.capabilities = ["sync:cookie-api", "parse:taobao-orders"];
50
+ this.capabilities = ["sync:snapshot", "sync:cookie-api", "parse:taobao-orders"];
43
51
  this.extractMode = "web-api";
44
52
  this.rateLimits = { perMinute: 6, perDay: 200 }; // respect Taobao风控
45
53
  this.dataDisclosure = {
@@ -48,23 +56,132 @@ class TaobaoAdapter {
48
56
  ],
49
57
  sensitivity: "high",
50
58
  legalGate: false,
59
+ defaultInclude: { order: true },
51
60
  };
61
+
62
+ // _deps injection seam — vi.mock fs doesn't intercept inlined CJS require.
63
+ this._deps = { fs };
52
64
  }
53
65
 
54
- async authenticate() {
55
- const ok = await this._cookieAuth.validate();
56
- if (!ok) return { ok: false, reason: "INVALID_COOKIE", error: "cookies missing or empty" };
57
- return { ok: true, account: this.account.userId };
66
+ async authenticate(ctx = {}) {
67
+ if (ctx && typeof ctx.inputPath === "string" && ctx.inputPath.length > 0) {
68
+ try {
69
+ this._deps.fs.accessSync(ctx.inputPath, this._deps.fs.constants.R_OK);
70
+ } catch (err) {
71
+ return {
72
+ ok: false,
73
+ reason: "INPUT_PATH_UNREADABLE",
74
+ message: `snapshot not readable at ${ctx.inputPath}: ${err.message}`,
75
+ };
76
+ }
77
+ return { ok: true, mode: "snapshot-file" };
78
+ }
79
+ if (this._cookieAuth) {
80
+ const ok = await this._cookieAuth.validate();
81
+ if (!ok) return { ok: false, reason: "INVALID_COOKIE", error: "cookies missing or empty" };
82
+ if (!this.account || !this.account.userId) {
83
+ return { ok: false, reason: "NO_ACCOUNT_USERID", message: "cookie mode requires account.userId" };
84
+ }
85
+ return { ok: true, account: this.account.userId, mode: "cookie" };
86
+ }
87
+ return {
88
+ ok: false,
89
+ reason: "NO_INPUT",
90
+ message: "TaobaoAdapter.authenticate: needs opts.inputPath (snapshot mode) OR opts.account.cookies (cookie mode)",
91
+ };
58
92
  }
59
93
 
60
94
  async healthCheck() {
61
- const r = await this.authenticate();
62
- return r.ok
63
- ? { ok: true, lastChecked: Date.now() }
64
- : { ok: false, reason: r.reason, error: r.error };
95
+ if (this._cookieAuth) {
96
+ const r = await this.authenticate();
97
+ return r.ok
98
+ ? { ok: true, lastChecked: Date.now() }
99
+ : { ok: false, reason: r.reason, error: r.error };
100
+ }
101
+ return { ok: true, lastChecked: Date.now() };
65
102
  }
66
103
 
67
104
  async *sync(opts = {}) {
105
+ if (typeof opts.inputPath === "string" && opts.inputPath.length > 0) {
106
+ yield* this._syncViaSnapshot(opts);
107
+ return;
108
+ }
109
+ if (this._cookieAuth) {
110
+ yield* this._syncViaCookie(opts);
111
+ return;
112
+ }
113
+ throw new Error(
114
+ "TaobaoAdapter.sync: needs opts.inputPath (snapshot mode, Android in-APK cc) OR opts.account.cookies (cookie mode)",
115
+ );
116
+ }
117
+
118
+ async *_syncViaSnapshot(opts) {
119
+ const raw = this._deps.fs.readFileSync(opts.inputPath, "utf-8");
120
+ let snapshot;
121
+ try {
122
+ snapshot = JSON.parse(raw);
123
+ } catch (err) {
124
+ throw new Error(
125
+ `shopping-taobao.sync: snapshot must be JSON (v0.3 will add HTML parsing for SAF-exported pages). Got parse error: ${err.message}`,
126
+ );
127
+ }
128
+ if (
129
+ !snapshot ||
130
+ typeof snapshot !== "object" ||
131
+ snapshot.schemaVersion !== SNAPSHOT_SCHEMA_VERSION
132
+ ) {
133
+ throw new Error(
134
+ `shopping-taobao.sync: snapshot schemaVersion mismatch (got ${snapshot && snapshot.schemaVersion}, expected ${SNAPSHOT_SCHEMA_VERSION})`,
135
+ );
136
+ }
137
+ const fallbackCapturedAt =
138
+ Number.isFinite(snapshot.snapshottedAt) && snapshot.snapshottedAt > 0
139
+ ? Math.floor(snapshot.snapshottedAt)
140
+ : Date.now();
141
+ const account =
142
+ snapshot.account && typeof snapshot.account === "object"
143
+ ? snapshot.account
144
+ : null;
145
+ const include = opts.include || {};
146
+ const limit =
147
+ Number.isInteger(opts.limit) && opts.limit > 0 ? opts.limit : Infinity;
148
+
149
+ const events = Array.isArray(snapshot.events) ? snapshot.events : [];
150
+ let emitted = 0;
151
+ for (const ev of events) {
152
+ if (emitted >= limit) return;
153
+ if (!ev || typeof ev !== "object") continue;
154
+ const kind = ev.kind;
155
+ if (!VALID_SNAPSHOT_KINDS.includes(kind)) continue;
156
+ if (include[kind] === false) continue;
157
+
158
+ const capturedAt =
159
+ parseTime(ev.capturedAt) ||
160
+ parseTime(ev.placedAt) ||
161
+ parseTime(ev.paidAt) ||
162
+ fallbackCapturedAt;
163
+ const id =
164
+ (typeof ev.id === "string" && ev.id.length > 0 && ev.id) ||
165
+ ev.orderId ||
166
+ null;
167
+
168
+ yield {
169
+ adapter: NAME,
170
+ kind,
171
+ originalId: stableOriginalId(kind, id),
172
+ capturedAt,
173
+ payload: { ...ev, account },
174
+ };
175
+ emitted += 1;
176
+ }
177
+ }
178
+
179
+ async *_syncViaCookie(opts = {}) {
180
+ if (!this.account || !this.account.userId) {
181
+ throw new Error(
182
+ "TaobaoAdapter._syncViaCookie: account.userId required (set via new TaobaoAdapter({ account: { userId } }))",
183
+ );
184
+ }
68
185
  if (!(await this._cookieAuth.validate())) return;
69
186
  const sinceMs = opts.sinceWatermark != null
70
187
  ? parseWatermarkMs(opts.sinceWatermark)
@@ -97,16 +214,39 @@ class TaobaoAdapter {
97
214
  }
98
215
 
99
216
  normalize(raw) {
100
- if (!raw || !raw.payload || !raw.payload.record) {
101
- throw new Error("TaobaoAdapter.normalize: raw.payload.record missing");
217
+ if (!raw || !raw.payload) {
218
+ throw new Error("TaobaoAdapter.normalize: raw.payload missing");
219
+ }
220
+ // Snapshot mode payload is the raw event spread + account; cookie mode
221
+ // wraps a normalized record under payload.record. Dispatch on shape.
222
+ if (raw.payload.record) {
223
+ return normalizeOrderRecord(raw.payload.record, {
224
+ adapterName: NAME,
225
+ adapterVersion: VERSION,
226
+ });
102
227
  }
103
- return normalizeOrderRecord(raw.payload.record, {
228
+ // Snapshot path: the Android collector ships records that already match
229
+ // the OrderRecord shape (vendorId/orderId/placedAt/...). Pass through.
230
+ return normalizeOrderRecord(raw.payload, {
104
231
  adapterName: NAME,
105
232
  adapterVersion: VERSION,
106
233
  });
107
234
  }
108
235
  }
109
236
 
237
+ function parseTime(v) {
238
+ if (Number.isFinite(v) && v > 0) return v < 1e12 ? v * 1000 : v;
239
+ if (typeof v === "string") {
240
+ const t = Date.parse(v);
241
+ if (Number.isFinite(t)) return t;
242
+ }
243
+ return null;
244
+ }
245
+
246
+ function stableOriginalId(kind, id) {
247
+ return id ? `taobao:${kind}:${id}` : `taobao:${kind}:unknown-${Date.now()}`;
248
+ }
249
+
110
250
  function orderToRecord(o) {
111
251
  if (!o || typeof o !== "object") return null;
112
252
  const orderId = o.bizOrderId || o.orderId || o.id;