@chainlesschain/personal-data-hub 0.4.7 → 0.4.18

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (29) hide show
  1. package/__tests__/adapters/doc-baidu-netdisk.test.js +102 -0
  2. package/__tests__/adapters/doc-platforms.test.js +177 -0
  3. package/__tests__/adapters/music-kugou.test.js +187 -0
  4. package/__tests__/adapters/recruit-boss.test.js +180 -0
  5. package/__tests__/adapters/shopping-dianping.test.js +239 -0
  6. package/__tests__/adapters/social-csdn.test.js +175 -0
  7. package/__tests__/adapters/social-zhihu.test.js +246 -0
  8. package/__tests__/adapters/travel-ctrip.test.js +175 -1
  9. package/__tests__/adapters/travel-didi.test.js +204 -0
  10. package/__tests__/adapters/travel-tongcheng.test.js +289 -0
  11. package/__tests__/adapters/video-platforms.test.js +152 -0
  12. package/lib/adapter-guide.js +13 -1
  13. package/lib/adapters/_document-base.js +370 -0
  14. package/lib/adapters/_video-base.js +331 -0
  15. package/lib/adapters/doc-baidu-netdisk/index.js +91 -0
  16. package/lib/adapters/doc-tencent-docs/index.js +94 -0
  17. package/lib/adapters/doc-wps/index.js +77 -0
  18. package/lib/adapters/music-kugou/index.js +418 -0
  19. package/lib/adapters/recruit-boss/index.js +442 -0
  20. package/lib/adapters/shopping-dianping/index.js +473 -0
  21. package/lib/adapters/social-csdn/index.js +444 -0
  22. package/lib/adapters/social-zhihu/index.js +488 -0
  23. package/lib/adapters/travel-ctrip/index.js +255 -40
  24. package/lib/adapters/travel-didi/index.js +327 -0
  25. package/lib/adapters/travel-tongcheng/index.js +393 -0
  26. package/lib/adapters/video-iqiyi/index.js +75 -0
  27. package/lib/adapters/video-tencent/index.js +78 -0
  28. package/lib/index.js +24 -0
  29. package/package.json +1 -1
@@ -1,22 +1,55 @@
1
1
  /**
2
- * Phase 9.3 — Ctrip (携程) order adapter.
3
- *
4
- * Ctrip has no official user export. Two input paths:
5
- * 1. JSON dump from a 3rd-party scraper or user-curated file
6
- * 2. Email order-confirmation events from Phase 5 (vault-side derive)
2
+ * Phase 9.3 — Ctrip (携程) order adapter, tri-mode.
7
3
  *
8
4
  * Ctrip orders cover 4 sub-types: flight / hotel / train / cruise.
9
5
  * We map each to the appropriate `vehicleType` in TravelRecord:
10
- * flight → "flight", hotel → "hotel", train → "train", cruise → "cruise"
6
+ * flight → "flight", hotel → "hotel", train → "train", cruise → "cruise".
7
+ *
8
+ * 1. snapshot / file-import mode (opts.inputPath | opts.dataPath): ingest a
9
+ * JSON dump from a 3rd-party scraper, a user-curated file, or an on-device
10
+ * Android collector. account is OPTIONAL (file-import is stateless).
11
+ *
12
+ * 2. cookie-api mode (opts.account.cookies, v0.7): fetch the Ctrip order
13
+ * centre directly from the hub, so collection no longer requires a manual
14
+ * export. After login on accounts.ctrip.com the order list is reachable
15
+ * under the `.ctrip.com` cookie domain (confirmed by the Android
16
+ * TravelVendor.kt CTRIP entry — "cookie scrape 完整链路 (有 web API)").
17
+ * As with the shopping adapters the actual HTTP call is delegated to an
18
+ * injected `fetchFn` (Android in-APK cc → OkHttp; desktop hub → Electron
19
+ * WebView net request) so this module stays a pure-Node parser +
20
+ * orchestrator. account OPTIONAL — the cookie carries identity.
21
+ *
22
+ * ── sign seam ──────────────────────────────────────────────────────────
23
+ * Ctrip's SOA order endpoints usually require a request `sign` token
24
+ * computed by client-side JS (analogous to 拼多多 anti_token / 抖音
25
+ * X-Bogus). No pure-Node implementation survives the rotation, so signing
26
+ * is injected via `opts.signProvider` (or constructor `signProvider`).
27
+ * When absent the request is still issued unsigned — best-effort, the
28
+ * endpoint may reject it, which surfaces as zero events rather than a
29
+ * crash. The endpoint constant is best-effort and overridable via
30
+ * `opts.ordersUrl`; Ctrip rotates SOA service numbers, so adjust the
31
+ * constant / pass opts.ordersUrl if it drifts (same playbook as the
32
+ * FAMILY-23 live fetchers — endpoints are not field-verified here).
33
+ *
34
+ * 3. (legacy) Email order-confirmation events from Phase 5 (vault-side derive).
11
35
  */
12
36
 
13
37
  "use strict";
14
38
 
15
39
  const fs = require("node:fs");
16
40
  const { normalizeTravelRecord, parseChineseDateTime } = require("../travel-base");
41
+ const { CookieAuth } = require("../shopping-base");
17
42
 
18
43
  const NAME = "travel-ctrip";
19
- const VERSION = "0.6.0"; // §9.3baccount.email OPTIONAL + inputPath snapshot alias
44
+ const VERSION = "0.7.0"; // §9.3ccookie-api live fetch path (signProvider seam)
45
+
46
+ // Best-effort Ctrip order-centre list endpoint. Overridable via opts.ordersUrl
47
+ // (Ctrip rotates SOA service numbers; the injected fetchFn host may also point
48
+ // at whichever order API the captured cookie is currently scoped to).
49
+ const CTRIP_ORDERS_URL =
50
+ "https://m.ctrip.com/restapi/soa2/24690/getOrderList";
51
+ const DEFAULT_PAGE_SIZE = 20;
52
+ const DEFAULT_MAX_PAGES = 10;
20
53
 
21
54
  class CtripAdapter {
22
55
  constructor(opts = {}) {
@@ -28,9 +61,32 @@ class CtripAdapter {
28
61
  this.account = opts.account || null;
29
62
  this._dataPath = opts.dataPath || null;
30
63
 
64
+ // §9.3c cookie-api mode — activates when account.cookies is supplied.
65
+ this._cookieAuth =
66
+ opts.account && opts.account.cookies
67
+ ? new CookieAuth({ platform: "ctrip", cookies: opts.account.cookies })
68
+ : null;
69
+ // The actual HTTP call is delegated to an injected fetchFn so this module
70
+ // stays a pure-Node parser/orchestrator (same seam as the shopping +
71
+ // travel-12306 adapters). fetchFn({ url, cookies, query, sign }) → JSON.
72
+ this._fetchFn = typeof opts.fetchFn === "function" ? opts.fetchFn : defaultFetch;
73
+ // sign seam — async fn({ url, query, cookies }) → string|null. When absent,
74
+ // requests carry sign: null (best-effort, the endpoint may reject).
75
+ this._signProvider =
76
+ typeof opts.signProvider === "function" ? opts.signProvider : null;
77
+ this._ordersUrl =
78
+ typeof opts.ordersUrl === "string" && opts.ordersUrl.length > 0
79
+ ? opts.ordersUrl
80
+ : CTRIP_ORDERS_URL;
81
+
31
82
  this.name = NAME;
32
83
  this.version = VERSION;
33
- this.capabilities = ["import:json", "sync:snapshot", "parse:ctrip-orders"];
84
+ this.capabilities = [
85
+ "import:json",
86
+ "sync:snapshot",
87
+ "sync:cookie-api",
88
+ "parse:ctrip-orders",
89
+ ];
34
90
  this.extractMode = "file-import";
35
91
  this.rateLimits = {};
36
92
  this.dataDisclosure = {
@@ -40,24 +96,45 @@ class CtripAdapter {
40
96
  sensitivity: "medium",
41
97
  legalGate: false,
42
98
  };
99
+
100
+ // _deps injection seam — vi.mock fs doesn't intercept inlined CJS require
101
+ // (see .claude/rules/testing.md).
102
+ this._deps = { fs };
43
103
  }
44
104
 
45
105
  async authenticate(ctx = {}) {
46
106
  // Snapshot / file-import path: validate file readable when an inputPath
47
- // / dataPath is provided. Otherwise return ok with whatever account
48
- // bookkeeping we have (file path can be supplied later via sync(opts)).
107
+ // / dataPath is provided. Takes priority over cookie mode when both given.
49
108
  const filePath = (ctx && ctx.inputPath) || ctx.dataPath || this._dataPath;
50
109
  if (filePath) {
51
- try { fs.accessSync(filePath, fs.constants.R_OK); }
110
+ try { this._deps.fs.accessSync(filePath, this._deps.fs.constants.R_OK); }
52
111
  catch (err) {
53
112
  return { ok: false, reason: "INPUT_PATH_UNREADABLE", message: `not readable at ${filePath}: ${err.message}` };
54
113
  }
55
114
  return { ok: true, mode: "snapshot-file" };
56
115
  }
116
+ if (this._cookieAuth) {
117
+ const ok = await this._cookieAuth.validate();
118
+ if (!ok) {
119
+ return { ok: false, reason: "INVALID_COOKIE", error: "cookies missing" };
120
+ }
121
+ // account is OPTIONAL in cookie mode — the .ctrip.com cookie carries identity.
122
+ return {
123
+ ok: true,
124
+ account: (this.account && this.account.email) || null,
125
+ mode: "cookie",
126
+ };
127
+ }
57
128
  return { ok: true, account: this.account ? this.account.email : null, mode: "ready" };
58
129
  }
59
130
 
60
131
  async healthCheck() {
132
+ if (this._cookieAuth) {
133
+ const r = await this.authenticate();
134
+ return r.ok
135
+ ? { ok: true, lastChecked: Date.now() }
136
+ : { ok: false, reason: r.reason, error: r.error };
137
+ }
61
138
  return { ok: true, lastChecked: Date.now() };
62
139
  }
63
140
 
@@ -66,21 +143,86 @@ class CtripAdapter {
66
143
  // call syncAdapter("travel-ctrip", path) with the same shape it uses
67
144
  // for the other snapshot-mode adapters (shopping-jd / travel-12306).
68
145
  const dataPath = opts.inputPath || opts.dataPath || this._dataPath;
69
- if (!dataPath || !fs.existsSync(dataPath)) return;
70
- const text = fs.readFileSync(dataPath, "utf-8");
71
- let records;
72
- try {
73
- records = parseRecords(text);
74
- } catch (err) {
75
- throw new Error(`CtripAdapter: parse failed: ${err.message}`);
146
+ if (dataPath) {
147
+ if (!this._deps.fs.existsSync(dataPath)) return;
148
+ const text = this._deps.fs.readFileSync(dataPath, "utf-8");
149
+ let records;
150
+ try {
151
+ records = parseRecords(text);
152
+ } catch (err) {
153
+ throw new Error(`CtripAdapter: parse failed: ${err.message}`);
154
+ }
155
+ for (const r of records) {
156
+ yield {
157
+ adapter: NAME,
158
+ originalId: r.recordId,
159
+ capturedAt: r.bookedAt || r.departureMs || Date.now(),
160
+ payload: { record: r },
161
+ };
162
+ }
163
+ return;
76
164
  }
77
- for (const r of records) {
78
- yield {
79
- adapter: NAME,
80
- originalId: r.recordId,
81
- capturedAt: r.bookedAt || r.departureMs || Date.now(),
82
- payload: { record: r },
83
- };
165
+ if (this._cookieAuth) {
166
+ yield* this._syncViaCookie(opts);
167
+ }
168
+ }
169
+
170
+ /**
171
+ * §9.3c — cookie-api live fetch. Hits the Ctrip order-centre list endpoint
172
+ * via the injected fetchFn, paginates with a pageIndex cursor, stops at the
173
+ * sinceWatermark / maxPages, maps each order through orderToRecord (so the
174
+ * existing normalize path applies unchanged) and yields it.
175
+ */
176
+ async *_syncViaCookie(opts = {}) {
177
+ if (!(await this._cookieAuth.validate())) return;
178
+ const cookies = this._cookieAuth.toHeader();
179
+ const sinceMs =
180
+ opts.sinceWatermark != null
181
+ ? parseInt(String(opts.sinceWatermark), 10) || 0
182
+ : Date.now() - 365 * 24 * 3600_000; // default last year
183
+ const pageSize = Number.isFinite(opts.pageSize) ? opts.pageSize : DEFAULT_PAGE_SIZE;
184
+ const maxPages =
185
+ Number.isInteger(opts.maxPages) && opts.maxPages > 0
186
+ ? opts.maxPages
187
+ : DEFAULT_MAX_PAGES;
188
+ const limit =
189
+ Number.isInteger(opts.limit) && opts.limit > 0 ? opts.limit : Infinity;
190
+
191
+ let emitted = 0;
192
+ let pageIndex = 1;
193
+ while (pageIndex <= maxPages) {
194
+ const query = { pageIndex, pageSize, ts: Date.now() };
195
+ // sign seam — best-effort. null when no signProvider.
196
+ let sign = null;
197
+ if (this._signProvider) {
198
+ sign = await this._signProvider({ url: this._ordersUrl, query, cookies });
199
+ }
200
+ const resp = await this._fetchFn({ url: this._ordersUrl, cookies, query, sign });
201
+ const orders = extractOrders(resp);
202
+ if (!orders.length) break;
203
+
204
+ let pageHasNew = false;
205
+ let reachedWatermark = false;
206
+ for (const raw of orders) {
207
+ const rec = orderToRecord(raw, { capturedVia: "cookie-api" });
208
+ if (!rec) continue;
209
+ const ts = rec.bookedAt || rec.departureMs || null;
210
+ if (ts && ts < sinceMs) {
211
+ reachedWatermark = true; // remaining orders are older
212
+ break;
213
+ }
214
+ pageHasNew = true;
215
+ if (emitted >= limit) return;
216
+ yield {
217
+ adapter: NAME,
218
+ originalId: rec.recordId,
219
+ capturedAt: ts || Date.now(),
220
+ payload: { record: rec },
221
+ };
222
+ emitted += 1;
223
+ }
224
+ if (reachedWatermark || !pageHasNew || orders.length < pageSize) break;
225
+ pageIndex += 1;
84
226
  }
85
227
  }
86
228
 
@@ -120,41 +262,106 @@ function parseRecords(text) {
120
262
  return orders.map(orderToRecord).filter(Boolean);
121
263
  }
122
264
 
123
- function orderToRecord(o) {
265
+ function orderToRecord(o, opts = {}) {
124
266
  if (!o || typeof o !== "object") return null;
125
- const recordId = o.orderId || o.id || o.order_no;
267
+ // Web-API order objects (cookie-api mode) use additional id/field names;
268
+ // file-import / snapshot rows keep priority so existing parsing is unchanged.
269
+ const recordId = o.orderId || o.id || o.order_no || o.orderID || o.orderId_;
126
270
  if (!recordId) return null;
127
- const type = (o.type || o.orderType || "").toLowerCase();
271
+ const type = (
272
+ o.type ||
273
+ o.orderType ||
274
+ o.bizType ||
275
+ o.businessType ||
276
+ o.productType ||
277
+ ""
278
+ )
279
+ .toString()
280
+ .toLowerCase();
128
281
  const vehicleType = TYPE_MAP[type] || "trip";
129
282
 
283
+ const priceRaw =
284
+ o.price != null
285
+ ? o.price
286
+ : o.amount != null
287
+ ? o.amount
288
+ : o.orderAmount != null
289
+ ? o.orderAmount
290
+ : o.totalAmount != null
291
+ ? o.totalAmount
292
+ : o.totalPrice != null
293
+ ? o.totalPrice
294
+ : null;
295
+
130
296
  return {
131
297
  vendorId: "ctrip",
132
298
  recordId: String(recordId),
133
299
  vehicleType,
134
- from: o.fromCity || o.from_city || o.depCity
135
- ? { city: o.fromCity || o.from_city || o.depCity }
300
+ from: o.fromCity || o.from_city || o.depCity || o.departCity
301
+ ? { city: o.fromCity || o.from_city || o.depCity || o.departCity }
136
302
  : null,
137
- to: o.toCity || o.to_city || o.arrCity || o.hotelCity
138
- ? { city: o.toCity || o.to_city || o.arrCity || o.hotelCity }
303
+ to: o.toCity || o.to_city || o.arrCity || o.arriveCity || o.hotelCity
304
+ ? { city: o.toCity || o.to_city || o.arrCity || o.arriveCity || o.hotelCity }
139
305
  : null,
140
- departureMs: numberOrParse(o.departureTime || o.dep_time || o.checkIn || o.check_in),
141
- arrivalMs: numberOrParse(o.arrivalTime || o.arr_time || o.checkOut || o.check_out),
142
- carrier: o.carrier || o.airline || o.hotelName || o.hotel_name || "携程",
306
+ departureMs: numberOrParse(
307
+ o.departureTime || o.dep_time || o.departureDate || o.checkIn || o.check_in || o.startDate,
308
+ ),
309
+ arrivalMs: numberOrParse(
310
+ o.arrivalTime || o.arr_time || o.arrivalDate || o.checkOut || o.check_out || o.endDate,
311
+ ),
312
+ carrier:
313
+ o.carrier || o.airline || o.hotelName || o.hotel_name || o.orderTitle || o.title || "携程",
143
314
  vehicleNumber: o.flightNumber || o.flight_no || o.trainNumber || o.train_no,
144
- totalCost: o.price != null
145
- ? { value: parseFloat(o.price), currency: o.currency || "CNY" }
315
+ totalCost: priceRaw != null
316
+ ? { value: parseFloat(priceRaw), currency: o.currency || "CNY" }
146
317
  : null,
147
- traveler: o.passengerName || o.passenger || o.guestName || o.guest_name,
318
+ traveler:
319
+ o.passengerName || o.passenger || o.guestName || o.guest_name || o.contactName,
148
320
  confirmationCode: o.confirmationCode || o.pnr || o.confirmation_no,
149
- bookedAt: numberOrParse(o.bookedAt || o.order_time),
321
+ bookedAt: numberOrParse(
322
+ o.bookedAt || o.order_time || o.orderDate || o.createTime || o.orderTime,
323
+ ),
150
324
  extras: {
151
325
  type,
152
326
  ...(o.hotel ? { hotel: o.hotel } : {}),
153
327
  ...(o.nights != null ? { nights: o.nights } : {}),
328
+ ...(opts.capturedVia ? { capturedVia: opts.capturedVia } : {}),
154
329
  },
155
330
  };
156
331
  }
157
332
 
333
+ /**
334
+ * Pull the order array out of a Ctrip order-centre response. Ctrip nests the
335
+ * list under different keys across SOA versions; the injected fetchFn may also
336
+ * pre-flatten to `{ orders }`. Tolerant of all common shapes.
337
+ */
338
+ function extractOrders(resp) {
339
+ if (!resp || typeof resp !== "object") return [];
340
+ if (Array.isArray(resp.orders)) return resp.orders;
341
+ if (Array.isArray(resp.orderList)) return resp.orderList;
342
+ if (Array.isArray(resp.list)) return resp.list;
343
+ const data = resp.data && typeof resp.data === "object" ? resp.data : null;
344
+ if (data) {
345
+ if (Array.isArray(data.orders)) return data.orders;
346
+ if (Array.isArray(data.orderList)) return data.orderList;
347
+ if (Array.isArray(data.list)) return data.list;
348
+ }
349
+ const result = resp.result && typeof resp.result === "object" ? resp.result : null;
350
+ if (result) {
351
+ if (Array.isArray(result.orderList)) return result.orderList;
352
+ if (Array.isArray(result.list)) return result.list;
353
+ }
354
+ return [];
355
+ }
356
+
357
+ async function defaultFetch(_opts) {
358
+ // Pure-Node has no HTTP layer; the host (Android cc → OkHttp; desktop hub →
359
+ // Electron WebView net) injects a real fetchFn. A missing fetchFn is a wiring
360
+ // bug, not a runtime data condition, so it throws loudly rather than silently
361
+ // emitting 0 (mirrors travel-12306 / the shopping adapters).
362
+ throw new Error("travel-ctrip: no fetchFn configured for cookie-api mode");
363
+ }
364
+
158
365
  function numberOrParse(v) {
159
366
  if (Number.isFinite(v)) return v;
160
367
  if (typeof v === "string") {
@@ -164,4 +371,12 @@ function numberOrParse(v) {
164
371
  return null;
165
372
  }
166
373
 
167
- module.exports = { CtripAdapter, parseRecords, TYPE_MAP, NAME, VERSION };
374
+ module.exports = {
375
+ CtripAdapter,
376
+ parseRecords,
377
+ orderToRecord,
378
+ extractOrders,
379
+ TYPE_MAP,
380
+ NAME,
381
+ VERSION,
382
+ };