@chainlesschain/personal-data-hub 0.3.0 → 0.3.6
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/__tests__/adapters/email-adapter-snapshot.test.js +237 -0
- package/__tests__/adapters/email-adapter.test.js +1 -1
- package/__tests__/adapters/email-pdf-extractor.test.js +1 -1
- package/__tests__/adapters/email-retry-progress.test.js +1 -1
- package/__tests__/adapters/email-templates.test.js +1 -1
- package/__tests__/adapters/social-bilibili-adb-api-client.test.js +721 -0
- package/__tests__/adapters/social-bilibili-adb-chromium-cookies-reader.test.js +346 -0
- package/__tests__/adapters/social-bilibili-adb-collector.test.js +284 -0
- package/__tests__/adapters/social-bilibili-adb-cookies-extension.test.js +343 -0
- package/__tests__/adapters/social-bilibili-adb-snapshot-builder.test.js +296 -0
- package/__tests__/adapters/social-douyin-adb-collector.test.js +254 -0
- package/__tests__/adapters/social-douyin-adb-im-db-parser.test.js +304 -0
- package/__tests__/adapters/social-douyin-adb-snapshot-builder.test.js +216 -0
- package/__tests__/adapters/social-weibo-adb-api-client.test.js +362 -0
- package/__tests__/adapters/social-weibo-adb-collector.test.js +201 -0
- package/__tests__/adapters/social-weibo-adb-snapshot-builder.test.js +189 -0
- package/__tests__/adapters/social-xiaohongshu-adb-collector.test.js +207 -0
- package/__tests__/adapters/social-xiaohongshu-adb-sign.test.js +130 -0
- package/__tests__/adapters/system-data-android.test.js +32 -1
- package/__tests__/longtail-adapters.test.js +15 -2
- package/__tests__/shopping-adapters.test.js +96 -0
- package/__tests__/sign-providers.test.js +62 -0
- package/__tests__/travel-adapters.test.js +163 -5
- package/__tests__/whatsapp-adapter.test.js +5 -2
- package/lib/adapters/browser-history-chrome/chrome-db-reader.js +11 -1
- package/lib/adapters/email-imap/email-adapter.js +224 -17
- package/lib/adapters/messaging-telegram/index.js +15 -12
- package/lib/adapters/messaging-whatsapp/index.js +15 -12
- package/lib/adapters/shopping-taobao/index.js +161 -21
- package/lib/adapters/social-bilibili-adb/api-client.js +555 -0
- package/lib/adapters/social-bilibili-adb/chromium-cookies-reader.js +296 -0
- package/lib/adapters/social-bilibili-adb/collector.js +190 -0
- package/lib/adapters/social-bilibili-adb/cookies-extension.js +250 -0
- package/lib/adapters/social-bilibili-adb/index.js +51 -0
- package/lib/adapters/social-bilibili-adb/snapshot-builder.js +197 -0
- package/lib/adapters/social-douyin/index.js +4 -0
- package/lib/adapters/social-douyin-adb/collector.js +165 -0
- package/lib/adapters/social-douyin-adb/db-extension.js +281 -0
- package/lib/adapters/social-douyin-adb/im-db-parser.js +287 -0
- package/lib/adapters/social-douyin-adb/index.js +57 -0
- package/lib/adapters/social-douyin-adb/snapshot-builder.js +174 -0
- package/lib/adapters/social-weibo-adb/api-client.js +281 -0
- package/lib/adapters/social-weibo-adb/collector.js +169 -0
- package/lib/adapters/social-weibo-adb/cookies-extension.js +251 -0
- package/lib/adapters/social-weibo-adb/index.js +55 -0
- package/lib/adapters/social-weibo-adb/snapshot-builder.js +145 -0
- package/lib/adapters/social-xiaohongshu-adb/api-client.js +278 -0
- package/lib/adapters/social-xiaohongshu-adb/collector.js +158 -0
- package/lib/adapters/social-xiaohongshu-adb/cookies-extension.js +211 -0
- package/lib/adapters/social-xiaohongshu-adb/index.js +50 -0
- package/lib/adapters/social-xiaohongshu-adb/sign.js +90 -0
- package/lib/adapters/social-xiaohongshu-adb/snapshot-builder.js +126 -0
- package/lib/adapters/system-data-android/adapter.js +77 -3
- package/lib/adapters/travel-12306/index.js +215 -29
- package/lib/adapters/travel-amap/index.js +16 -10
- package/lib/adapters/travel-ctrip/index.js +25 -9
- package/lib/adapters/vscode/vscode-reader.js +7 -1
- package/lib/sign-providers/index.js +20 -0
- package/lib/sign-providers/interface.js +82 -0
- package/lib/sign-providers/null-sign-provider.js +30 -0
- package/package.json +6 -1
|
@@ -0,0 +1,90 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
|
|
3
|
+
/**
|
|
4
|
+
* Phase 3c (Xhs C 路径 — 2026-05-25): X-S signature generator (Node port).
|
|
5
|
+
*
|
|
6
|
+
* Byte-parity port of
|
|
7
|
+
* `android-app/.../pdh/social/xiaohongshu/XhsApiClient.kt`:computeXsXt.
|
|
8
|
+
*
|
|
9
|
+
* **Real xhs.js algorithm (open-source reverse-engineered, best-effort)**:
|
|
10
|
+
* 1. payload = "url=" + url_path_with_query + ("" or body_json)
|
|
11
|
+
* 2. raw = ts_ms + payload + a1_cookie
|
|
12
|
+
* 3. md5_hex = MD5(raw).hex() — hex STRING (not bytes)
|
|
13
|
+
* 4. X-S = "XYW_" + base64(utf8_bytes(md5_hex))
|
|
14
|
+
* Critical: base64 encodes the UTF-8 bytes of the hex STRING, not
|
|
15
|
+
* the raw 16 MD5 bytes. This is what xhs.js does — it stringifies
|
|
16
|
+
* the digest before base64-ing it.
|
|
17
|
+
* 5. X-T = ts_ms (as decimal string)
|
|
18
|
+
*
|
|
19
|
+
* **Real xhs.js does one more step after step 3** — XOR-rotate with a
|
|
20
|
+
* key derived from b1 cookie, then base64 with `=` padding. v0.2 we
|
|
21
|
+
* skip that step → ~60% GET hit rate, <30% POST hit rate. UI banner
|
|
22
|
+
* surfaces lastErrorCode=461 when xhs rejects our X-S; collector
|
|
23
|
+
* gracefully degrades to emptyList() per endpoint.
|
|
24
|
+
*
|
|
25
|
+
* Future Phase 3c-v0.3: a WebView-based bridge (see Android-side
|
|
26
|
+
* XhsSignBridge.kt — runs xhs's own JS in a hidden Electron BrowserView)
|
|
27
|
+
* would push the hit rate to ~100%. Out of scope for v0.2 Node port.
|
|
28
|
+
*/
|
|
29
|
+
|
|
30
|
+
const crypto = require("node:crypto");
|
|
31
|
+
|
|
32
|
+
/** "XYW_" prefix — matches xhs.js output. */
|
|
33
|
+
const XS_PREFIX = "XYW_";
|
|
34
|
+
|
|
35
|
+
/**
|
|
36
|
+
* Compute X-S + X-T headers for a GET request.
|
|
37
|
+
*
|
|
38
|
+
* @param {string} urlPathWithQuery url.pathname + url.search (path + "?" + query, encoded)
|
|
39
|
+
* @param {string|null} body POST body as a JSON string, or null/empty for GET
|
|
40
|
+
* @param {string} a1 a1 cookie value (anti-bot fingerprint)
|
|
41
|
+
* @param {{now?: () => number}} [opts] test seam — inject `now: () => 1716383021000`
|
|
42
|
+
* @returns {{xs: string, xt: string}}
|
|
43
|
+
*/
|
|
44
|
+
function computeXsXt(urlPathWithQuery, body, a1, opts = {}) {
|
|
45
|
+
if (typeof urlPathWithQuery !== "string" || urlPathWithQuery.length === 0) {
|
|
46
|
+
throw new TypeError("computeXsXt: urlPathWithQuery must be non-empty string");
|
|
47
|
+
}
|
|
48
|
+
if (typeof a1 !== "string" || a1.length === 0) {
|
|
49
|
+
throw new TypeError("computeXsXt: a1 must be non-empty string");
|
|
50
|
+
}
|
|
51
|
+
const ts = (opts.now || Date.now)();
|
|
52
|
+
const bodyStr = typeof body === "string" ? body : "";
|
|
53
|
+
const payload = "url=" + urlPathWithQuery + bodyStr;
|
|
54
|
+
const raw = `${ts}${payload}${a1}`;
|
|
55
|
+
const md5Hex = crypto.createHash("md5").update(raw, "utf8").digest("hex");
|
|
56
|
+
// base64 encode the UTF-8 bytes of the hex STRING (32 chars → 32 bytes
|
|
57
|
+
// → 44-char base64 with padding). xhs.js NO_WRAP NO_PADDING flags
|
|
58
|
+
// mirror: replace = padding with "", remove newlines (default in
|
|
59
|
+
// Buffer.toString("base64") already no-newlines, only padding to strip).
|
|
60
|
+
const b64NoPad = Buffer.from(md5Hex, "utf8").toString("base64").replace(/=+$/, "");
|
|
61
|
+
return {
|
|
62
|
+
xs: XS_PREFIX + b64NoPad,
|
|
63
|
+
xt: String(ts),
|
|
64
|
+
};
|
|
65
|
+
}
|
|
66
|
+
|
|
67
|
+
/**
|
|
68
|
+
* Extract a1 cookie value from a Cookie header string.
|
|
69
|
+
*
|
|
70
|
+
* "web_session=abc; a1=18d6e123abc; xsec_token=xxx" → "18d6e123abc"
|
|
71
|
+
*
|
|
72
|
+
* Returns null when a1 not present.
|
|
73
|
+
*/
|
|
74
|
+
function extractA1(cookie) {
|
|
75
|
+
if (typeof cookie !== "string") return null;
|
|
76
|
+
for (const part of cookie.split(";")) {
|
|
77
|
+
const trimmed = part.trim();
|
|
78
|
+
if (trimmed.startsWith("a1=")) {
|
|
79
|
+
const v = trimmed.substring(3);
|
|
80
|
+
return v.length > 0 ? v : null;
|
|
81
|
+
}
|
|
82
|
+
}
|
|
83
|
+
return null;
|
|
84
|
+
}
|
|
85
|
+
|
|
86
|
+
module.exports = {
|
|
87
|
+
computeXsXt,
|
|
88
|
+
extractA1,
|
|
89
|
+
XS_PREFIX,
|
|
90
|
+
};
|
|
@@ -0,0 +1,126 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
|
|
3
|
+
/**
|
|
4
|
+
* Phase 3c (Xhs C 路径 — 2026-05-25): API responses → snapshot JSON.
|
|
5
|
+
*
|
|
6
|
+
* Matches the existing `social-xiaohongshu` adapter's snapshot mode
|
|
7
|
+
* schema (schemaVersion=1). Kinds: note / liked / follow.
|
|
8
|
+
*
|
|
9
|
+
* Note: xhs userId is a Base64-ish string (e.g. "5e8c8f7e..."), not a
|
|
10
|
+
* numeric Long. The account.uid in the snapshot is set to userId
|
|
11
|
+
* verbatim (string passthrough); consumers shouldn't expect numeric uid.
|
|
12
|
+
*/
|
|
13
|
+
|
|
14
|
+
const fs = require("node:fs");
|
|
15
|
+
const path = require("node:path");
|
|
16
|
+
const os = require("node:os");
|
|
17
|
+
const crypto = require("node:crypto");
|
|
18
|
+
|
|
19
|
+
const SNAPSHOT_SCHEMA_VERSION = 1;
|
|
20
|
+
|
|
21
|
+
function buildSnapshot(input) {
|
|
22
|
+
if (!input || typeof input !== "object") {
|
|
23
|
+
throw new TypeError("buildSnapshot: input must be an object");
|
|
24
|
+
}
|
|
25
|
+
const userId = input.userId;
|
|
26
|
+
if (typeof userId !== "string" || userId.length === 0) {
|
|
27
|
+
throw new TypeError("buildSnapshot: input.userId must be a non-empty string");
|
|
28
|
+
}
|
|
29
|
+
const snapshottedAt =
|
|
30
|
+
Number.isFinite(input.snapshottedAt) && input.snapshottedAt > 0
|
|
31
|
+
? input.snapshottedAt
|
|
32
|
+
: Date.now();
|
|
33
|
+
const account = {
|
|
34
|
+
userId, // xhs userId is a string, not numeric
|
|
35
|
+
nickname: typeof input.nickname === "string" ? input.nickname : "",
|
|
36
|
+
};
|
|
37
|
+
const events = [];
|
|
38
|
+
|
|
39
|
+
// notes
|
|
40
|
+
const notes = Array.isArray(input.notes) ? input.notes : [];
|
|
41
|
+
notes.forEach((n, idx) => {
|
|
42
|
+
if (!n || typeof n !== "object") return;
|
|
43
|
+
events.push({
|
|
44
|
+
kind: "note",
|
|
45
|
+
id: n.noteId ? `note-${n.noteId}` : `note-${idx}`,
|
|
46
|
+
capturedAt:
|
|
47
|
+
typeof n.createdAt === "number" && n.createdAt > 0
|
|
48
|
+
? n.createdAt
|
|
49
|
+
: snapshottedAt,
|
|
50
|
+
noteId: n.noteId || null,
|
|
51
|
+
title: n.title || null,
|
|
52
|
+
desc: n.desc || null,
|
|
53
|
+
type: n.type || "normal",
|
|
54
|
+
likedCount: typeof n.likedCount === "number" ? n.likedCount : 0,
|
|
55
|
+
collectedCount:
|
|
56
|
+
typeof n.collectedCount === "number" ? n.collectedCount : 0,
|
|
57
|
+
commentCount: typeof n.commentCount === "number" ? n.commentCount : 0,
|
|
58
|
+
});
|
|
59
|
+
});
|
|
60
|
+
|
|
61
|
+
// liked
|
|
62
|
+
const liked = Array.isArray(input.liked) ? input.liked : [];
|
|
63
|
+
liked.forEach((l, idx) => {
|
|
64
|
+
if (!l || typeof l !== "object") return;
|
|
65
|
+
events.push({
|
|
66
|
+
kind: "liked",
|
|
67
|
+
id: l.noteId ? `liked-${l.noteId}` : `liked-${idx}`,
|
|
68
|
+
// xhs doesn't return liked_at — use snapshottedAt
|
|
69
|
+
capturedAt: snapshottedAt,
|
|
70
|
+
noteId: l.noteId || null,
|
|
71
|
+
title: l.title || null,
|
|
72
|
+
authorNickname: l.authorNickname || null,
|
|
73
|
+
});
|
|
74
|
+
});
|
|
75
|
+
|
|
76
|
+
// follows
|
|
77
|
+
const follows = Array.isArray(input.follows) ? input.follows : [];
|
|
78
|
+
follows.forEach((f, idx) => {
|
|
79
|
+
if (!f || typeof f !== "object") return;
|
|
80
|
+
events.push({
|
|
81
|
+
kind: "follow",
|
|
82
|
+
id: f.userId ? `follow-${f.userId}` : `follow-${idx}`,
|
|
83
|
+
capturedAt: snapshottedAt,
|
|
84
|
+
userId: f.userId || null,
|
|
85
|
+
nickname: f.nickname || null,
|
|
86
|
+
image: f.image || null,
|
|
87
|
+
});
|
|
88
|
+
});
|
|
89
|
+
|
|
90
|
+
return {
|
|
91
|
+
schemaVersion: SNAPSHOT_SCHEMA_VERSION,
|
|
92
|
+
snapshottedAt,
|
|
93
|
+
account,
|
|
94
|
+
events,
|
|
95
|
+
};
|
|
96
|
+
}
|
|
97
|
+
|
|
98
|
+
function writeSnapshotJson(snapshot, opts = {}) {
|
|
99
|
+
const dir = opts.dir || os.tmpdir();
|
|
100
|
+
const fileName =
|
|
101
|
+
opts.fileName || `cc-xhs-snapshot-${crypto.randomUUID()}.json`;
|
|
102
|
+
if (fileName.includes("/") || fileName.includes("\\")) {
|
|
103
|
+
throw new Error(
|
|
104
|
+
"writeSnapshotJson: opts.fileName must be a basename, not a path",
|
|
105
|
+
);
|
|
106
|
+
}
|
|
107
|
+
const full = path.join(dir, fileName);
|
|
108
|
+
fs.writeFileSync(full, JSON.stringify(snapshot), "utf-8");
|
|
109
|
+
return full;
|
|
110
|
+
}
|
|
111
|
+
|
|
112
|
+
function cleanupSnapshotJson(filePath) {
|
|
113
|
+
if (!filePath) return;
|
|
114
|
+
try {
|
|
115
|
+
fs.unlinkSync(filePath);
|
|
116
|
+
} catch (_e) {
|
|
117
|
+
// ignore
|
|
118
|
+
}
|
|
119
|
+
}
|
|
120
|
+
|
|
121
|
+
module.exports = {
|
|
122
|
+
buildSnapshot,
|
|
123
|
+
writeSnapshotJson,
|
|
124
|
+
cleanupSnapshotJson,
|
|
125
|
+
SNAPSHOT_SCHEMA_VERSION,
|
|
126
|
+
};
|
|
@@ -28,13 +28,28 @@ const {
|
|
|
28
28
|
} = require("../../constants");
|
|
29
29
|
|
|
30
30
|
const NAME = "system-data-android";
|
|
31
|
+
// v0.3.2 (2026-05-25): denormalise contact identifiers (phones/emails/
|
|
32
|
+
// organization/starred) and app version/install fields onto
|
|
33
|
+
// event.extra so the Vault Browser tap-to-detail sheet can render
|
|
34
|
+
// human-readable fields without joining back to the persons/items
|
|
35
|
+
// tables. Same content lives on the entity rows; events are now a
|
|
36
|
+
// convenience copy. Adds ~50-200 bytes per event but keeps the detail
|
|
37
|
+
// UI single-table.
|
|
38
|
+
// v0.3.1 (2026-05-25): normalize() now emits a synthetic OTHER event per
|
|
39
|
+
// contact + per app. Snapshot mode previously only wrote persons/items;
|
|
40
|
+
// Vault Browser's `category=system` facet only counts events, so the
|
|
41
|
+
// chip showed (0) forever even after a successful sync. Synthetic event
|
|
42
|
+
// per entity (stable id, idempotent across re-syncs via UPSERT) lights
|
|
43
|
+
// up the chip with `total = #contacts + #apps`. occurredAt = capturedAt
|
|
44
|
+
// of the latest snapshot containing the entity. sms/call/media events
|
|
45
|
+
// were already emitted in v0.2 — unchanged.
|
|
31
46
|
// v0.3.0 (2026-05-24): added kind="media-file" via bridge mode
|
|
32
47
|
// (host-adb-bridge media.list across 5 /sdcard categories). Metadata
|
|
33
48
|
// only — path/size/mtime/ext, no file content.
|
|
34
49
|
// v0.2.0 (2026-05-24): added kind="sms" + kind="call" via bridge mode.
|
|
35
50
|
// Snapshot mode still v1 schema — sms/calls/media only land via
|
|
36
51
|
// bridge path until Android snapshot writer is updated to include them.
|
|
37
|
-
const VERSION = "0.3.
|
|
52
|
+
const VERSION = "0.3.2";
|
|
38
53
|
const SNAPSHOT_SCHEMA_VERSION = 1;
|
|
39
54
|
|
|
40
55
|
// Stable per-source originalId — registry.putRawEvent rejects null originalId
|
|
@@ -391,8 +406,42 @@ class SystemDataAndroidAdapter {
|
|
|
391
406
|
if (typeof p.photoUri === "string" && p.photoUri.length > 0) extra.photoUri = p.photoUri;
|
|
392
407
|
if (Object.keys(extra).length > 0) person.extra = extra;
|
|
393
408
|
|
|
409
|
+
// v0.3.1 — synthesise an OTHER event so the snapshot contact shows up
|
|
410
|
+
// in the Vault Browser's `category=system` facet (which counts events,
|
|
411
|
+
// not persons). Stable id keyed on stableKey makes re-syncs idempotent
|
|
412
|
+
// via UPSERT; occurredAt floats forward to the latest snapshot time
|
|
413
|
+
// ("last time we saw this contact").
|
|
414
|
+
//
|
|
415
|
+
// v0.3.2 — duplicate the contact's identifying fields onto event.extra
|
|
416
|
+
// so the Vault Browser's tap-to-detail sheet can render them inline
|
|
417
|
+
// without joining back to the persons table. Phones/emails/relation/
|
|
418
|
+
// starred — same data shape as person.identifiers + person.relation
|
|
419
|
+
// + person.extra, just denormalised so a single events-table read
|
|
420
|
+
// suffices for the detail UI.
|
|
421
|
+
const eventExtra = { kind: "contact-snapshot" };
|
|
422
|
+
if (identifiers.phone && identifiers.phone.length > 0) {
|
|
423
|
+
eventExtra.phones = identifiers.phone;
|
|
424
|
+
}
|
|
425
|
+
if (identifiers.email && identifiers.email.length > 0) {
|
|
426
|
+
eventExtra.emails = identifiers.email;
|
|
427
|
+
}
|
|
428
|
+
if (typeof p.organization === "string" && p.organization.trim().length > 0) {
|
|
429
|
+
eventExtra.organization = p.organization.trim();
|
|
430
|
+
}
|
|
431
|
+
if (typeof p.starred === "boolean") eventExtra.starred = p.starred;
|
|
432
|
+
const event = {
|
|
433
|
+
id: `event-android-contact-${stableKey}`,
|
|
434
|
+
type: ENTITY_TYPES.EVENT,
|
|
435
|
+
subtype: EVENT_SUBTYPES.OTHER,
|
|
436
|
+
occurredAt: raw.capturedAt,
|
|
437
|
+
ingestedAt,
|
|
438
|
+
source: source(`android-contact:${stableKey}`),
|
|
439
|
+
content: { title: `联系人:${displayName}` },
|
|
440
|
+
extra: eventExtra,
|
|
441
|
+
};
|
|
442
|
+
|
|
394
443
|
return {
|
|
395
|
-
events: [],
|
|
444
|
+
events: [event],
|
|
396
445
|
persons: [person],
|
|
397
446
|
places: [],
|
|
398
447
|
items: [],
|
|
@@ -428,8 +477,33 @@ class SystemDataAndroidAdapter {
|
|
|
428
477
|
},
|
|
429
478
|
};
|
|
430
479
|
|
|
480
|
+
// v0.3.1 — same rationale as the contact branch: emit a synthetic
|
|
481
|
+
// OTHER event so installed apps show up in the system facet count.
|
|
482
|
+
// v0.3.2 — copy versioning/install fields onto event.extra so the
|
|
483
|
+
// detail sheet can render them inline.
|
|
484
|
+
const eventExtra = { kind: "app-snapshot", packageName: pkgName };
|
|
485
|
+
if (typeof a.versionName === "string" && a.versionName.length > 0) {
|
|
486
|
+
eventExtra.versionName = a.versionName;
|
|
487
|
+
}
|
|
488
|
+
if (Number.isInteger(a.versionCode)) eventExtra.versionCode = a.versionCode;
|
|
489
|
+
if (Number.isInteger(a.firstInstallTime)) {
|
|
490
|
+
eventExtra.firstInstallTime = a.firstInstallTime;
|
|
491
|
+
}
|
|
492
|
+
if (Number.isInteger(a.lastUpdateTime)) eventExtra.lastUpdateTime = a.lastUpdateTime;
|
|
493
|
+
if (typeof a.isSystem === "boolean") eventExtra.isSystem = a.isSystem;
|
|
494
|
+
const event = {
|
|
495
|
+
id: `event-android-app-${pkgName}`,
|
|
496
|
+
type: ENTITY_TYPES.EVENT,
|
|
497
|
+
subtype: EVENT_SUBTYPES.OTHER,
|
|
498
|
+
occurredAt: raw.capturedAt,
|
|
499
|
+
ingestedAt,
|
|
500
|
+
source: source(`android-app:${pkgName}`),
|
|
501
|
+
content: { title: `应用:${label}` },
|
|
502
|
+
extra: eventExtra,
|
|
503
|
+
};
|
|
504
|
+
|
|
431
505
|
return {
|
|
432
|
-
events: [],
|
|
506
|
+
events: [event],
|
|
433
507
|
persons: [],
|
|
434
508
|
places: [],
|
|
435
509
|
items: [item],
|
|
@@ -1,18 +1,39 @@
|
|
|
1
1
|
/**
|
|
2
|
-
*
|
|
2
|
+
* §2.5 v0.2 — 12306 (China Railway) ticket adapter, dual-mode.
|
|
3
3
|
*
|
|
4
|
-
*
|
|
5
|
-
*
|
|
6
|
-
*
|
|
7
|
-
*
|
|
8
|
-
*
|
|
9
|
-
*
|
|
10
|
-
* enrich" pattern.
|
|
11
|
-
* 2. user-uploaded JSON dump (e.g. exported from a 3rd-party 12306
|
|
12
|
-
* scraper, or hand-curated). Optional.
|
|
4
|
+
* 1. snapshot mode (opts.inputPath): in-APK Android cc reads a snapshot
|
|
5
|
+
* JSON produced by the phone's Kyfw12306LocalCollector. The collector
|
|
6
|
+
* uses captured login cookie to hit kyfw.12306.cn `/otn/queryOrder/
|
|
7
|
+
* queryMyOrder` + `/otn/queryOrder/queryMyOrderNoComplete` (cookie-only,
|
|
8
|
+
* no signing), parses each ticket into a structured event, writes JSON.
|
|
9
|
+
* Desktop-independent. account is OPTIONAL at construction.
|
|
13
10
|
*
|
|
14
|
-
*
|
|
15
|
-
*
|
|
11
|
+
* 2. file-import mode (opts.dataPath, legacy v0.5): user-uploaded JSON
|
|
12
|
+
* dump from a 3rd-party 12306 scraper or hand-curated. Preserved for
|
|
13
|
+
* backward compat. account.username REQUIRED.
|
|
14
|
+
*
|
|
15
|
+
* Snapshot schema (mirrors Kyfw12306LocalCollector.SNAPSHOT_SCHEMA_VERSION):
|
|
16
|
+
*
|
|
17
|
+
* {
|
|
18
|
+
* "schemaVersion": 1,
|
|
19
|
+
* "snapshottedAt": <epoch-ms>,
|
|
20
|
+
* "vendor": "12306",
|
|
21
|
+
* "events": [
|
|
22
|
+
* { "kind": "ticket", "id": "ticket-<seqNo>:<n>", "capturedAt": <ms>,
|
|
23
|
+
* "orderSequenceNo": "...", "ticketNumber": "...",
|
|
24
|
+
* "passengerName": "张三", "passengerIdLast6": "123456",
|
|
25
|
+
* "trainNumber": "G123",
|
|
26
|
+
* "fromStation": "上海虹桥", "toStation": "北京南",
|
|
27
|
+
* "departureMs": <ms>, "arrivalMs": <ms>,
|
|
28
|
+
* "seatTypeName": "二等座", "coachNo": "05", "seatNo": "12A",
|
|
29
|
+
* "ticketPrice": 553.5, "orderDateMs": <ms>, "orderTotalPrice": 553.5,
|
|
30
|
+
* "isCompleted": true }
|
|
31
|
+
* ]
|
|
32
|
+
* }
|
|
33
|
+
*
|
|
34
|
+
* Sensitivity: medium — ticket history reveals travel patterns + 6 trailing
|
|
35
|
+
* digits of national ID (used for cross-source EntityResolver linking, never
|
|
36
|
+
* exposed in vault search). Snapshot file is purged after sync.
|
|
16
37
|
*/
|
|
17
38
|
|
|
18
39
|
"use strict";
|
|
@@ -21,32 +42,75 @@ const fs = require("node:fs");
|
|
|
21
42
|
const { normalizeTravelRecord, parseChineseDateTime } = require("../travel-base");
|
|
22
43
|
|
|
23
44
|
const NAME = "travel-12306";
|
|
24
|
-
const VERSION = "0.
|
|
45
|
+
const VERSION = "0.6.0";
|
|
46
|
+
const SNAPSHOT_SCHEMA_VERSION = 1;
|
|
47
|
+
|
|
48
|
+
const KIND_TICKET = "ticket";
|
|
49
|
+
const VALID_SNAPSHOT_KINDS = Object.freeze([KIND_TICKET]);
|
|
25
50
|
|
|
26
51
|
class Train12306Adapter {
|
|
27
52
|
constructor(opts = {}) {
|
|
28
|
-
|
|
29
|
-
|
|
30
|
-
|
|
31
|
-
this.account = opts.account;
|
|
53
|
+
// §2.5 v0.2: account.username OPTIONAL — snapshot mode is stateless and
|
|
54
|
+
// doesn't need a pre-known username. file-import mode still requires it,
|
|
55
|
+
// checked at sync time, not construction.
|
|
56
|
+
this.account = opts.account || null;
|
|
32
57
|
this._dataPath = opts.dataPath || null;
|
|
33
58
|
|
|
34
59
|
this.name = NAME;
|
|
35
60
|
this.version = VERSION;
|
|
36
|
-
this.capabilities = [
|
|
37
|
-
|
|
61
|
+
this.capabilities = [
|
|
62
|
+
"sync:snapshot",
|
|
63
|
+
"import:json",
|
|
64
|
+
"parse:12306-orders",
|
|
65
|
+
];
|
|
66
|
+
this.extractMode = "device-pull";
|
|
38
67
|
this.rateLimits = {};
|
|
39
68
|
this.dataDisclosure = {
|
|
40
69
|
fields: [
|
|
41
|
-
"12306:
|
|
70
|
+
"12306:orderSequenceNo / ticketNumber / passengerName / trainNumber / fromStation / toStation / departureMs / arrivalMs / seat / price",
|
|
42
71
|
],
|
|
43
72
|
sensitivity: "medium",
|
|
44
73
|
legalGate: false,
|
|
74
|
+
defaultInclude: {
|
|
75
|
+
ticket: true,
|
|
76
|
+
},
|
|
77
|
+
};
|
|
78
|
+
|
|
79
|
+
// _deps injection seam — vi.mock fs doesn't intercept inlined CJS require.
|
|
80
|
+
this._deps = {
|
|
81
|
+
fs,
|
|
45
82
|
};
|
|
46
83
|
}
|
|
47
84
|
|
|
48
|
-
async authenticate() {
|
|
49
|
-
|
|
85
|
+
async authenticate(ctx = {}) {
|
|
86
|
+
if (ctx && typeof ctx.inputPath === "string" && ctx.inputPath.length > 0) {
|
|
87
|
+
try {
|
|
88
|
+
this._deps.fs.accessSync(ctx.inputPath, this._deps.fs.constants.R_OK);
|
|
89
|
+
} catch (err) {
|
|
90
|
+
return {
|
|
91
|
+
ok: false,
|
|
92
|
+
reason: "INPUT_PATH_UNREADABLE",
|
|
93
|
+
message: `snapshot not readable at ${ctx.inputPath}: ${err.message}`,
|
|
94
|
+
};
|
|
95
|
+
}
|
|
96
|
+
return { ok: true, mode: "snapshot-file" };
|
|
97
|
+
}
|
|
98
|
+
if (this._dataPath || (ctx && typeof ctx.dataPath === "string")) {
|
|
99
|
+
if (!this.account || !this.account.username) {
|
|
100
|
+
return {
|
|
101
|
+
ok: false,
|
|
102
|
+
reason: "NO_ACCOUNT_USERNAME",
|
|
103
|
+
message: "travel-12306.authenticate: file-import mode requires account.username",
|
|
104
|
+
};
|
|
105
|
+
}
|
|
106
|
+
return { ok: true, account: this.account.username, mode: "file-import" };
|
|
107
|
+
}
|
|
108
|
+
return {
|
|
109
|
+
ok: false,
|
|
110
|
+
reason: "NO_INPUT",
|
|
111
|
+
message:
|
|
112
|
+
"travel-12306.authenticate: needs opts.inputPath (snapshot mode) OR opts.dataPath (file-import mode)",
|
|
113
|
+
};
|
|
50
114
|
}
|
|
51
115
|
|
|
52
116
|
async healthCheck() {
|
|
@@ -54,14 +118,83 @@ class Train12306Adapter {
|
|
|
54
118
|
}
|
|
55
119
|
|
|
56
120
|
async *sync(opts = {}) {
|
|
121
|
+
if (typeof opts.inputPath === "string" && opts.inputPath.length > 0) {
|
|
122
|
+
yield* this._syncViaSnapshot(opts);
|
|
123
|
+
return;
|
|
124
|
+
}
|
|
57
125
|
const dataPath = opts.dataPath || this._dataPath;
|
|
58
|
-
if (
|
|
59
|
-
|
|
126
|
+
if (dataPath) {
|
|
127
|
+
yield* this._syncViaFileImport({ ...opts, dataPath });
|
|
128
|
+
return;
|
|
129
|
+
}
|
|
130
|
+
throw new Error(
|
|
131
|
+
"travel-12306.sync: needs opts.inputPath (snapshot mode, Android in-APK cc) OR opts.dataPath (file-import mode, user-uploaded JSON)",
|
|
132
|
+
);
|
|
133
|
+
}
|
|
134
|
+
|
|
135
|
+
async *_syncViaSnapshot(opts) {
|
|
136
|
+
const raw = this._deps.fs.readFileSync(opts.inputPath, "utf-8");
|
|
137
|
+
const snapshot = JSON.parse(raw);
|
|
138
|
+
if (
|
|
139
|
+
!snapshot ||
|
|
140
|
+
typeof snapshot !== "object" ||
|
|
141
|
+
snapshot.schemaVersion !== SNAPSHOT_SCHEMA_VERSION
|
|
142
|
+
) {
|
|
143
|
+
throw new Error(
|
|
144
|
+
`travel-12306.sync: snapshot schemaVersion mismatch (got ${snapshot && snapshot.schemaVersion}, expected ${SNAPSHOT_SCHEMA_VERSION})`,
|
|
145
|
+
);
|
|
146
|
+
}
|
|
147
|
+
const fallbackCapturedAt =
|
|
148
|
+
Number.isFinite(snapshot.snapshottedAt) && snapshot.snapshottedAt > 0
|
|
149
|
+
? Math.floor(snapshot.snapshottedAt)
|
|
150
|
+
: Date.now();
|
|
151
|
+
const include = opts.include || {};
|
|
152
|
+
const limit =
|
|
153
|
+
Number.isInteger(opts.limit) && opts.limit > 0 ? opts.limit : Infinity;
|
|
154
|
+
|
|
155
|
+
const events = Array.isArray(snapshot.events) ? snapshot.events : [];
|
|
156
|
+
let emitted = 0;
|
|
157
|
+
for (const ev of events) {
|
|
158
|
+
if (emitted >= limit) return;
|
|
159
|
+
if (!ev || typeof ev !== "object") continue;
|
|
160
|
+
const kind = ev.kind;
|
|
161
|
+
if (!VALID_SNAPSHOT_KINDS.includes(kind)) continue;
|
|
162
|
+
if (include[kind] === false) continue;
|
|
163
|
+
|
|
164
|
+
const capturedAt =
|
|
165
|
+
(Number.isFinite(ev.capturedAt) && ev.capturedAt) ||
|
|
166
|
+
(Number.isFinite(ev.departureMs) && ev.departureMs) ||
|
|
167
|
+
fallbackCapturedAt;
|
|
168
|
+
const id =
|
|
169
|
+
(typeof ev.id === "string" && ev.id.length > 0 && ev.id) ||
|
|
170
|
+
ev.orderSequenceNo ||
|
|
171
|
+
null;
|
|
172
|
+
|
|
173
|
+
yield {
|
|
174
|
+
adapter: NAME,
|
|
175
|
+
kind,
|
|
176
|
+
originalId: stableOriginalId(id || `unknown-${emitted}`),
|
|
177
|
+
capturedAt,
|
|
178
|
+
payload: { ...ev, snapshot: true },
|
|
179
|
+
};
|
|
180
|
+
emitted += 1;
|
|
181
|
+
}
|
|
182
|
+
}
|
|
183
|
+
|
|
184
|
+
async *_syncViaFileImport(opts) {
|
|
185
|
+
if (!this.account || !this.account.username) {
|
|
186
|
+
throw new Error(
|
|
187
|
+
"travel-12306._syncViaFileImport: account.username required (set via new Train12306Adapter({ account: { username } }))",
|
|
188
|
+
);
|
|
189
|
+
}
|
|
190
|
+
const dataPath = opts.dataPath;
|
|
191
|
+
if (!dataPath || !this._deps.fs.existsSync(dataPath)) return;
|
|
192
|
+
const buf = this._deps.fs.readFileSync(dataPath, "utf-8");
|
|
60
193
|
let records;
|
|
61
194
|
try {
|
|
62
195
|
records = parseRecords(buf);
|
|
63
196
|
} catch (err) {
|
|
64
|
-
throw new Error(`
|
|
197
|
+
throw new Error(`travel-12306._syncViaFileImport: parse failed: ${err.message}`);
|
|
65
198
|
}
|
|
66
199
|
for (const r of records) {
|
|
67
200
|
yield {
|
|
@@ -74,7 +207,18 @@ class Train12306Adapter {
|
|
|
74
207
|
}
|
|
75
208
|
|
|
76
209
|
normalize(raw) {
|
|
77
|
-
if (!raw || !raw.payload
|
|
210
|
+
if (!raw || !raw.payload) {
|
|
211
|
+
throw new Error("Train12306Adapter.normalize: payload missing");
|
|
212
|
+
}
|
|
213
|
+
// Snapshot-mode payload is the parsed event directly; legacy file-import
|
|
214
|
+
// payload has `.record` (already normalized shape).
|
|
215
|
+
if (raw.payload.snapshot) {
|
|
216
|
+
return normalizeTravelRecord(snapshotEventToRecord(raw.payload), {
|
|
217
|
+
adapterName: NAME,
|
|
218
|
+
adapterVersion: VERSION,
|
|
219
|
+
});
|
|
220
|
+
}
|
|
221
|
+
if (!raw.payload.record) {
|
|
78
222
|
throw new Error("Train12306Adapter.normalize: raw.payload.record missing");
|
|
79
223
|
}
|
|
80
224
|
return normalizeTravelRecord(raw.payload.record, {
|
|
@@ -84,8 +228,43 @@ class Train12306Adapter {
|
|
|
84
228
|
}
|
|
85
229
|
}
|
|
86
230
|
|
|
231
|
+
function stableOriginalId(id) {
|
|
232
|
+
return `12306:ticket:${id}`;
|
|
233
|
+
}
|
|
234
|
+
|
|
235
|
+
/** Convert a v0.2 snapshot event into the adapter-neutral travel record
|
|
236
|
+
* shape that [normalizeTravelRecord] expects. */
|
|
237
|
+
function snapshotEventToRecord(ev) {
|
|
238
|
+
return {
|
|
239
|
+
vendorId: "12306",
|
|
240
|
+
recordId: String(ev.id || ev.orderSequenceNo || ev.ticketNumber),
|
|
241
|
+
vehicleType: "train",
|
|
242
|
+
from: { station: ev.fromStation },
|
|
243
|
+
to: { station: ev.toStation },
|
|
244
|
+
departureMs: ev.departureMs || null,
|
|
245
|
+
arrivalMs: ev.arrivalMs || null,
|
|
246
|
+
carrier: "12306",
|
|
247
|
+
vehicleNumber: ev.trainNumber,
|
|
248
|
+
totalCost:
|
|
249
|
+
Number.isFinite(ev.ticketPrice) && ev.ticketPrice > 0
|
|
250
|
+
? { value: ev.ticketPrice, currency: "CNY" }
|
|
251
|
+
: null,
|
|
252
|
+
traveler: ev.passengerName,
|
|
253
|
+
confirmationCode: ev.ticketNumber || ev.orderSequenceNo,
|
|
254
|
+
bookedAt: ev.orderDateMs || null,
|
|
255
|
+
extras: {
|
|
256
|
+
seat: ev.seatTypeName,
|
|
257
|
+
coachNo: ev.coachNo,
|
|
258
|
+
seatNumber: ev.seatNo,
|
|
259
|
+
isCompleted: ev.isCompleted,
|
|
260
|
+
idLast6: ev.passengerIdLast6 || undefined,
|
|
261
|
+
orderTotalPrice: ev.orderTotalPrice || undefined,
|
|
262
|
+
},
|
|
263
|
+
};
|
|
264
|
+
}
|
|
265
|
+
|
|
87
266
|
/**
|
|
88
|
-
* Parse a 12306 dump file. Accepts either:
|
|
267
|
+
* Parse a 12306 dump file (legacy v0.5 file-import mode). Accepts either:
|
|
89
268
|
* - JSON array of order objects
|
|
90
269
|
* - JSON object { orders: [...] }
|
|
91
270
|
* - JSONL (one order per line)
|
|
@@ -134,7 +313,7 @@ function orderToRecord(o) {
|
|
|
134
313
|
extras: {
|
|
135
314
|
seat: o.seat || o.seatType,
|
|
136
315
|
seatNumber: o.seatNumber || o.seat_number,
|
|
137
|
-
idCardLast6: o.idLast6 || undefined,
|
|
316
|
+
idCardLast6: o.idLast6 || undefined,
|
|
138
317
|
},
|
|
139
318
|
};
|
|
140
319
|
}
|
|
@@ -148,4 +327,11 @@ function numberOrParse(v) {
|
|
|
148
327
|
return null;
|
|
149
328
|
}
|
|
150
329
|
|
|
151
|
-
module.exports = {
|
|
330
|
+
module.exports = {
|
|
331
|
+
Train12306Adapter,
|
|
332
|
+
parseRecords,
|
|
333
|
+
NAME,
|
|
334
|
+
VERSION,
|
|
335
|
+
SNAPSHOT_SCHEMA_VERSION,
|
|
336
|
+
VALID_SNAPSHOT_KINDS,
|
|
337
|
+
};
|