@chainlesschain/personal-data-hub 0.4.6 → 0.4.18
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/__tests__/adapters/doc-baidu-netdisk.test.js +102 -0
- package/__tests__/adapters/doc-platforms.test.js +177 -0
- package/__tests__/adapters/music-kugou.test.js +187 -0
- package/__tests__/adapters/recruit-boss.test.js +180 -0
- package/__tests__/adapters/shopping-dianping.test.js +239 -0
- package/__tests__/adapters/social-csdn.test.js +175 -0
- package/__tests__/adapters/social-zhihu.test.js +246 -0
- package/__tests__/adapters/travel-12306.test.js +234 -1
- package/__tests__/adapters/travel-ctrip.test.js +175 -1
- package/__tests__/adapters/travel-didi.test.js +204 -0
- package/__tests__/adapters/travel-tongcheng.test.js +289 -0
- package/__tests__/adapters/video-platforms.test.js +152 -0
- package/lib/adapter-guide.js +13 -1
- package/lib/adapters/_document-base.js +370 -0
- package/lib/adapters/_video-base.js +331 -0
- package/lib/adapters/doc-baidu-netdisk/index.js +91 -0
- package/lib/adapters/doc-tencent-docs/index.js +94 -0
- package/lib/adapters/doc-wps/index.js +77 -0
- package/lib/adapters/music-kugou/index.js +418 -0
- package/lib/adapters/recruit-boss/index.js +442 -0
- package/lib/adapters/shopping-dianping/index.js +473 -0
- package/lib/adapters/social-csdn/index.js +444 -0
- package/lib/adapters/social-zhihu/index.js +488 -0
- package/lib/adapters/travel-12306/index.js +279 -5
- package/lib/adapters/travel-ctrip/index.js +255 -40
- package/lib/adapters/travel-didi/index.js +327 -0
- package/lib/adapters/travel-tongcheng/index.js +393 -0
- package/lib/adapters/video-iqiyi/index.js +75 -0
- package/lib/adapters/video-tencent/index.js +78 -0
- package/lib/index.js +24 -0
- package/package.json +1 -1
|
@@ -0,0 +1,370 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* _document-base — shared infrastructure for "own-document list" adapters
|
|
3
|
+
* (WPS 云文档 / 腾讯文档 / etc.), Phase 13+ §12.1 "自创文档列表".
|
|
4
|
+
*
|
|
5
|
+
* These platforms all expose the same shape of personal data: a paginated list
|
|
6
|
+
* of documents the user created/owns (title, type, create/modify time, url).
|
|
7
|
+
* Rather than copy ~300 lines per platform (mirroring shopping-base /
|
|
8
|
+
* travel-base / _local-im-pc-adapter), `createDocumentAdapter(config)` returns
|
|
9
|
+
* a fully-formed adapter class with snapshot + cookie-api modes; each platform
|
|
10
|
+
* supplies only its endpoint + field mapping.
|
|
11
|
+
*
|
|
12
|
+
* 1. snapshot mode (opts.inputPath): JSON schemaVersion 1, stateless.
|
|
13
|
+
* 2. cookie-api mode (opts.account.cookies): fetch the owner's document list
|
|
14
|
+
* via the injected `fetchFn` (Android in-APK cc → OkHttp; desktop hub →
|
|
15
|
+
* Electron WebView net request), paginate, map each doc → a DocumentRecord.
|
|
16
|
+
* A sign seam (opts.signProvider) covers any anti-bot token; best-effort
|
|
17
|
+
* unsigned when absent. Endpoint overridable via opts.listUrl (best-effort,
|
|
18
|
+
* not field-verified — FAMILY-23 playbook).
|
|
19
|
+
*
|
|
20
|
+
* normalize() emits, per document: an authoring EVENT (subtype POST) + an ITEM
|
|
21
|
+
* (subtype DOCUMENT), mirroring netease-music's event+item dual-emit so the
|
|
22
|
+
* vault can both timeline "我创建了 X" and list the document entity.
|
|
23
|
+
*
|
|
24
|
+
* Snapshot schema (schemaVersion 1):
|
|
25
|
+
* {
|
|
26
|
+
* "schemaVersion": 1, "snapshottedAt": <ms>,
|
|
27
|
+
* "account": { "userId": "...", "name": "..." },
|
|
28
|
+
* "events": [
|
|
29
|
+
* { "kind": "document", "id": "doc-<id>", "docId": "...", "title": "...",
|
|
30
|
+
* "docType": "doc|sheet|slide|pdf|form|...", "url": "...",
|
|
31
|
+
* "createdTime": <s|ms>, "updatedTime": <s|ms> }
|
|
32
|
+
* ]
|
|
33
|
+
* }
|
|
34
|
+
*/
|
|
35
|
+
|
|
36
|
+
"use strict";
|
|
37
|
+
|
|
38
|
+
const fs = require("node:fs");
|
|
39
|
+
const { newId } = require("../ids");
|
|
40
|
+
const {
|
|
41
|
+
ENTITY_TYPES,
|
|
42
|
+
EVENT_SUBTYPES,
|
|
43
|
+
ITEM_SUBTYPES,
|
|
44
|
+
CAPTURED_BY,
|
|
45
|
+
} = require("../constants");
|
|
46
|
+
|
|
47
|
+
const SNAPSHOT_SCHEMA_VERSION = 1;
|
|
48
|
+
const KIND_DOCUMENT = "document";
|
|
49
|
+
const VALID_SNAPSHOT_KINDS = Object.freeze([KIND_DOCUMENT]);
|
|
50
|
+
const PAGE_SIZE = 20;
|
|
51
|
+
|
|
52
|
+
function parseTime(v) {
|
|
53
|
+
if (Number.isFinite(v)) return v > 1e12 ? v : v * 1000;
|
|
54
|
+
if (typeof v === "string") {
|
|
55
|
+
if (/^\d+$/.test(v)) {
|
|
56
|
+
const n = parseInt(v, 10);
|
|
57
|
+
return n > 1e12 ? n : n * 1000;
|
|
58
|
+
}
|
|
59
|
+
const t = Date.parse(v);
|
|
60
|
+
return Number.isFinite(t) ? t : null;
|
|
61
|
+
}
|
|
62
|
+
return null;
|
|
63
|
+
}
|
|
64
|
+
|
|
65
|
+
/**
|
|
66
|
+
* Build a document-list adapter class.
|
|
67
|
+
*
|
|
68
|
+
* @param {object} config
|
|
69
|
+
* @param {string} config.NAME adapter name, e.g. "doc-wps"
|
|
70
|
+
* @param {string} config.VERSION semver string
|
|
71
|
+
* @param {string} config.platform short platform id, e.g. "wps"
|
|
72
|
+
* @param {string} config.defaultListUrl best-effort list endpoint
|
|
73
|
+
* @param {(resp:any)=>any[]} config.extractDocs pull the doc array from a response
|
|
74
|
+
* @param {(raw:any)=>object|null} config.mapDoc map a raw doc → DocumentRecord
|
|
75
|
+
* DocumentRecord = { docId, title, docType, url, createdMs, updatedMs, extra? }
|
|
76
|
+
*/
|
|
77
|
+
function createDocumentAdapter(config) {
|
|
78
|
+
const {
|
|
79
|
+
NAME,
|
|
80
|
+
VERSION,
|
|
81
|
+
platform,
|
|
82
|
+
defaultListUrl,
|
|
83
|
+
extractDocs,
|
|
84
|
+
mapDoc,
|
|
85
|
+
} = config;
|
|
86
|
+
|
|
87
|
+
const { CookieAuth } = require("./shopping-base");
|
|
88
|
+
|
|
89
|
+
function stableOriginalId(id) {
|
|
90
|
+
const safe =
|
|
91
|
+
(typeof id === "string" && id.length > 0 && id) ||
|
|
92
|
+
(typeof id === "number" && Number.isFinite(id) && String(id)) ||
|
|
93
|
+
`unknown-${Date.now()}-${Math.random().toString(36).slice(2, 8)}`;
|
|
94
|
+
return `${platform}:document:${safe}`;
|
|
95
|
+
}
|
|
96
|
+
|
|
97
|
+
class DocumentAdapter {
|
|
98
|
+
constructor(opts = {}) {
|
|
99
|
+
this.account = opts.account || null;
|
|
100
|
+
this._cookieAuth =
|
|
101
|
+
opts.account && opts.account.cookies
|
|
102
|
+
? new CookieAuth({ platform, cookies: opts.account.cookies })
|
|
103
|
+
: null;
|
|
104
|
+
this._fetchFn = typeof opts.fetchFn === "function" ? opts.fetchFn : defaultFetch;
|
|
105
|
+
this._signProvider =
|
|
106
|
+
typeof opts.signProvider === "function" ? opts.signProvider : null;
|
|
107
|
+
this._listUrl =
|
|
108
|
+
typeof opts.listUrl === "string" && opts.listUrl.length > 0
|
|
109
|
+
? opts.listUrl
|
|
110
|
+
: defaultListUrl;
|
|
111
|
+
|
|
112
|
+
this.name = NAME;
|
|
113
|
+
this.version = VERSION;
|
|
114
|
+
this.capabilities = ["sync:snapshot", "sync:cookie-api", `parse:${platform}-documents`];
|
|
115
|
+
this.extractMode = "web-api";
|
|
116
|
+
this.rateLimits = { perMinute: 8, perDay: 200 };
|
|
117
|
+
this.dataDisclosure = {
|
|
118
|
+
fields: [`${platform}:document (title / docType / createdTime / url)`],
|
|
119
|
+
sensitivity: "medium",
|
|
120
|
+
legalGate: false,
|
|
121
|
+
defaultInclude: { document: true },
|
|
122
|
+
};
|
|
123
|
+
|
|
124
|
+
this._deps = { fs };
|
|
125
|
+
}
|
|
126
|
+
|
|
127
|
+
async authenticate(ctx = {}) {
|
|
128
|
+
if (ctx && typeof ctx.inputPath === "string" && ctx.inputPath.length > 0) {
|
|
129
|
+
try {
|
|
130
|
+
this._deps.fs.accessSync(ctx.inputPath, this._deps.fs.constants.R_OK);
|
|
131
|
+
} catch (err) {
|
|
132
|
+
return {
|
|
133
|
+
ok: false,
|
|
134
|
+
reason: "INPUT_PATH_UNREADABLE",
|
|
135
|
+
message: `snapshot not readable at ${ctx.inputPath}: ${err.message}`,
|
|
136
|
+
};
|
|
137
|
+
}
|
|
138
|
+
return { ok: true, mode: "snapshot-file" };
|
|
139
|
+
}
|
|
140
|
+
if (this._cookieAuth) {
|
|
141
|
+
const ok = await this._cookieAuth.validate();
|
|
142
|
+
if (!ok) return { ok: false, reason: "INVALID_COOKIE", error: "cookies missing" };
|
|
143
|
+
return {
|
|
144
|
+
ok: true,
|
|
145
|
+
account: (this.account && this.account.userId) || null,
|
|
146
|
+
mode: "cookie",
|
|
147
|
+
};
|
|
148
|
+
}
|
|
149
|
+
return {
|
|
150
|
+
ok: false,
|
|
151
|
+
reason: "NO_INPUT",
|
|
152
|
+
message: `${NAME}.authenticate: needs opts.inputPath (snapshot mode) OR opts.account.cookies (cookie-api mode)`,
|
|
153
|
+
};
|
|
154
|
+
}
|
|
155
|
+
|
|
156
|
+
async healthCheck() {
|
|
157
|
+
if (this._cookieAuth) {
|
|
158
|
+
const r = await this.authenticate();
|
|
159
|
+
return r.ok
|
|
160
|
+
? { ok: true, lastChecked: Date.now() }
|
|
161
|
+
: { ok: false, reason: r.reason, error: r.error };
|
|
162
|
+
}
|
|
163
|
+
return { ok: true, lastChecked: Date.now() };
|
|
164
|
+
}
|
|
165
|
+
|
|
166
|
+
async *sync(opts = {}) {
|
|
167
|
+
if (typeof opts.inputPath === "string" && opts.inputPath.length > 0) {
|
|
168
|
+
yield* this._syncViaSnapshot(opts);
|
|
169
|
+
return;
|
|
170
|
+
}
|
|
171
|
+
if (this._cookieAuth) {
|
|
172
|
+
yield* this._syncViaCookie(opts);
|
|
173
|
+
return;
|
|
174
|
+
}
|
|
175
|
+
throw new Error(
|
|
176
|
+
`${NAME}.sync: needs opts.inputPath (snapshot mode) OR opts.account.cookies (cookie-api mode)`,
|
|
177
|
+
);
|
|
178
|
+
}
|
|
179
|
+
|
|
180
|
+
async *_syncViaSnapshot(opts) {
|
|
181
|
+
const raw = this._deps.fs.readFileSync(opts.inputPath, "utf-8");
|
|
182
|
+
const snapshot = JSON.parse(raw);
|
|
183
|
+
if (
|
|
184
|
+
!snapshot ||
|
|
185
|
+
typeof snapshot !== "object" ||
|
|
186
|
+
snapshot.schemaVersion !== SNAPSHOT_SCHEMA_VERSION
|
|
187
|
+
) {
|
|
188
|
+
throw new Error(
|
|
189
|
+
`${NAME}.sync: snapshot schemaVersion mismatch (got ${snapshot && snapshot.schemaVersion}, expected ${SNAPSHOT_SCHEMA_VERSION})`,
|
|
190
|
+
);
|
|
191
|
+
}
|
|
192
|
+
const fallbackCapturedAt =
|
|
193
|
+
Number.isFinite(snapshot.snapshottedAt) && snapshot.snapshottedAt > 0
|
|
194
|
+
? Math.floor(snapshot.snapshottedAt)
|
|
195
|
+
: Date.now();
|
|
196
|
+
const account =
|
|
197
|
+
snapshot.account && typeof snapshot.account === "object" ? snapshot.account : null;
|
|
198
|
+
const include = opts.include || {};
|
|
199
|
+
const limit = Number.isInteger(opts.limit) && opts.limit > 0 ? opts.limit : Infinity;
|
|
200
|
+
|
|
201
|
+
const events = Array.isArray(snapshot.events) ? snapshot.events : [];
|
|
202
|
+
let emitted = 0;
|
|
203
|
+
for (const ev of events) {
|
|
204
|
+
if (emitted >= limit) return;
|
|
205
|
+
if (!ev || typeof ev !== "object") continue;
|
|
206
|
+
if (!VALID_SNAPSHOT_KINDS.includes(ev.kind)) continue;
|
|
207
|
+
if (include[ev.kind] === false) continue;
|
|
208
|
+
|
|
209
|
+
const capturedAt =
|
|
210
|
+
parseTime(ev.capturedAt) ||
|
|
211
|
+
parseTime(ev.updatedTime) ||
|
|
212
|
+
parseTime(ev.createdTime) ||
|
|
213
|
+
fallbackCapturedAt;
|
|
214
|
+
const id =
|
|
215
|
+
(typeof ev.id === "string" && ev.id.length > 0 && ev.id) || ev.docId || null;
|
|
216
|
+
|
|
217
|
+
yield {
|
|
218
|
+
adapter: NAME,
|
|
219
|
+
kind: KIND_DOCUMENT,
|
|
220
|
+
originalId: stableOriginalId(id),
|
|
221
|
+
capturedAt,
|
|
222
|
+
payload: { record: snapshotEventToRecord(ev), account },
|
|
223
|
+
};
|
|
224
|
+
emitted += 1;
|
|
225
|
+
}
|
|
226
|
+
}
|
|
227
|
+
|
|
228
|
+
async *_syncViaCookie(opts = {}) {
|
|
229
|
+
if (!(await this._cookieAuth.validate())) return;
|
|
230
|
+
const cookies = this._cookieAuth.toHeader();
|
|
231
|
+
const include = opts.include || {};
|
|
232
|
+
if (include[KIND_DOCUMENT] === false) return;
|
|
233
|
+
const sinceMs =
|
|
234
|
+
opts.sinceWatermark != null
|
|
235
|
+
? parseInt(String(opts.sinceWatermark), 10) || 0
|
|
236
|
+
: 0;
|
|
237
|
+
const limit = Number.isInteger(opts.limit) && opts.limit > 0 ? opts.limit : Infinity;
|
|
238
|
+
const maxPages =
|
|
239
|
+
Number.isInteger(opts.maxPages) && opts.maxPages > 0 ? opts.maxPages : 20;
|
|
240
|
+
|
|
241
|
+
let emitted = 0;
|
|
242
|
+
let offset = 0;
|
|
243
|
+
let page = 0;
|
|
244
|
+
while (page < maxPages) {
|
|
245
|
+
const query = { offset, limit: PAGE_SIZE };
|
|
246
|
+
let sign = null;
|
|
247
|
+
if (this._signProvider) {
|
|
248
|
+
sign = await this._signProvider({ url: this._listUrl, query, cookies });
|
|
249
|
+
}
|
|
250
|
+
const resp = await this._fetchFn({ url: this._listUrl, cookies, query, sign });
|
|
251
|
+
const docs = extractDocs(resp) || [];
|
|
252
|
+
if (!docs.length) break;
|
|
253
|
+
let reachedWatermark = false;
|
|
254
|
+
for (const d of docs) {
|
|
255
|
+
const rec = mapDoc(d);
|
|
256
|
+
if (!rec || !rec.docId) continue;
|
|
257
|
+
const ts = rec.updatedMs || rec.createdMs || null;
|
|
258
|
+
if (sinceMs && ts && ts < sinceMs) {
|
|
259
|
+
reachedWatermark = true;
|
|
260
|
+
break;
|
|
261
|
+
}
|
|
262
|
+
if (emitted >= limit) return;
|
|
263
|
+
yield {
|
|
264
|
+
adapter: NAME,
|
|
265
|
+
kind: KIND_DOCUMENT,
|
|
266
|
+
originalId: stableOriginalId(rec.docId),
|
|
267
|
+
capturedAt: ts || Date.now(),
|
|
268
|
+
payload: { record: rec },
|
|
269
|
+
};
|
|
270
|
+
emitted += 1;
|
|
271
|
+
}
|
|
272
|
+
if (reachedWatermark || docs.length < PAGE_SIZE) break;
|
|
273
|
+
offset += docs.length;
|
|
274
|
+
page += 1;
|
|
275
|
+
}
|
|
276
|
+
}
|
|
277
|
+
|
|
278
|
+
normalize(raw) {
|
|
279
|
+
if (!raw || !raw.payload || !raw.payload.record) {
|
|
280
|
+
throw new Error(`${NAME}.normalize: payload.record missing`);
|
|
281
|
+
}
|
|
282
|
+
return normalizeDocumentRecord(raw.payload.record, raw, platform, NAME, VERSION);
|
|
283
|
+
}
|
|
284
|
+
}
|
|
285
|
+
|
|
286
|
+
return DocumentAdapter;
|
|
287
|
+
}
|
|
288
|
+
|
|
289
|
+
/** Snapshot event fields → DocumentRecord (the shape mapDoc also produces). */
|
|
290
|
+
function snapshotEventToRecord(ev) {
|
|
291
|
+
return {
|
|
292
|
+
docId: String(ev.docId || ev.id || "unknown"),
|
|
293
|
+
title: ev.title || "(无标题)",
|
|
294
|
+
docType: ev.docType || ev.type || "doc",
|
|
295
|
+
url: ev.url || null,
|
|
296
|
+
createdMs: parseTime(ev.createdTime),
|
|
297
|
+
updatedMs: parseTime(ev.updatedTime),
|
|
298
|
+
extra: ev.extra && typeof ev.extra === "object" ? ev.extra : {},
|
|
299
|
+
};
|
|
300
|
+
}
|
|
301
|
+
|
|
302
|
+
function normalizeDocumentRecord(rec, raw, platform, NAME, VERSION) {
|
|
303
|
+
const ingestedAt = Date.now();
|
|
304
|
+
const occurredAt = rec.updatedMs || rec.createdMs || raw.capturedAt || ingestedAt;
|
|
305
|
+
const source = {
|
|
306
|
+
adapter: NAME,
|
|
307
|
+
adapterVersion: VERSION,
|
|
308
|
+
originalId: raw.originalId,
|
|
309
|
+
capturedAt: raw.capturedAt || occurredAt,
|
|
310
|
+
capturedBy: CAPTURED_BY.API,
|
|
311
|
+
};
|
|
312
|
+
const title = rec.title || "(无标题)";
|
|
313
|
+
const itemId = `item-${platform}-doc-${rec.docId}`;
|
|
314
|
+
return {
|
|
315
|
+
events: [
|
|
316
|
+
{
|
|
317
|
+
id: newId(),
|
|
318
|
+
type: ENTITY_TYPES.EVENT,
|
|
319
|
+
subtype: EVENT_SUBTYPES.POST,
|
|
320
|
+
occurredAt,
|
|
321
|
+
actor: "person-self",
|
|
322
|
+
content: { title: `文档: ${title}`, text: title },
|
|
323
|
+
ingestedAt,
|
|
324
|
+
source,
|
|
325
|
+
extra: {
|
|
326
|
+
platform,
|
|
327
|
+
docId: rec.docId,
|
|
328
|
+
docType: rec.docType || "doc",
|
|
329
|
+
url: rec.url || null,
|
|
330
|
+
createdMs: rec.createdMs || null,
|
|
331
|
+
updatedMs: rec.updatedMs || null,
|
|
332
|
+
itemRef: itemId,
|
|
333
|
+
},
|
|
334
|
+
},
|
|
335
|
+
],
|
|
336
|
+
items: [
|
|
337
|
+
{
|
|
338
|
+
id: itemId,
|
|
339
|
+
type: ENTITY_TYPES.ITEM,
|
|
340
|
+
subtype: ITEM_SUBTYPES.DOCUMENT,
|
|
341
|
+
name: title,
|
|
342
|
+
ingestedAt,
|
|
343
|
+
source,
|
|
344
|
+
extra: {
|
|
345
|
+
platform,
|
|
346
|
+
docId: rec.docId,
|
|
347
|
+
docType: rec.docType || "doc",
|
|
348
|
+
url: rec.url || null,
|
|
349
|
+
...(rec.extra || {}),
|
|
350
|
+
},
|
|
351
|
+
},
|
|
352
|
+
],
|
|
353
|
+
persons: [],
|
|
354
|
+
places: [],
|
|
355
|
+
topics: [],
|
|
356
|
+
};
|
|
357
|
+
}
|
|
358
|
+
|
|
359
|
+
async function defaultFetch(_opts) {
|
|
360
|
+
throw new Error("document-base: no fetchFn configured for cookie-api mode");
|
|
361
|
+
}
|
|
362
|
+
|
|
363
|
+
module.exports = {
|
|
364
|
+
createDocumentAdapter,
|
|
365
|
+
normalizeDocumentRecord,
|
|
366
|
+
parseTime,
|
|
367
|
+
SNAPSHOT_SCHEMA_VERSION,
|
|
368
|
+
KIND_DOCUMENT,
|
|
369
|
+
VALID_SNAPSHOT_KINDS,
|
|
370
|
+
};
|
|
@@ -0,0 +1,331 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* _video-base — shared infrastructure for "video watch-history" adapters
|
|
3
|
+
* (爱奇艺 / 腾讯视频 / etc.), Phase 13+ §12.1 (ROI ⭐⭐ each).
|
|
4
|
+
*
|
|
5
|
+
* These platforms expose the same shape of personal data: a paginated list of
|
|
6
|
+
* videos the user watched (观看记录) + optionally favourited/追剧 (收藏). Rather
|
|
7
|
+
* than copy ~300 lines per platform (mirroring _document-base / shopping-base /
|
|
8
|
+
* travel-base), `createVideoAdapter(config)` returns a fully-formed adapter
|
|
9
|
+
* class with snapshot + cookie-api modes; each platform supplies only its
|
|
10
|
+
* endpoints + field mapping.
|
|
11
|
+
*
|
|
12
|
+
* 1. snapshot mode (opts.inputPath): JSON schemaVersion 1, stateless.
|
|
13
|
+
* 2. cookie-api mode (opts.account.cookies): fetch watch / favourite lists via
|
|
14
|
+
* the injected `fetchFn` (Android in-APK cc → OkHttp; desktop hub →
|
|
15
|
+
* Electron WebView net request), paginate. A sign seam (opts.signProvider)
|
|
16
|
+
* covers anti-bot tokens; best-effort unsigned. Endpoints overridable via
|
|
17
|
+
* opts.watchUrl / opts.favouriteUrl (best-effort, not field-verified —
|
|
18
|
+
* FAMILY-23 playbook).
|
|
19
|
+
*
|
|
20
|
+
* normalize() emits, per item: a MEDIA event (watch) or LIKE event (favourite)
|
|
21
|
+
* + a MEDIA item, mirroring netease-music / music-kugou so the vault can both
|
|
22
|
+
* timeline "我看了 X" and list the video entity.
|
|
23
|
+
*
|
|
24
|
+
* Snapshot schema (schemaVersion 1):
|
|
25
|
+
* {
|
|
26
|
+
* "schemaVersion": 1, "snapshottedAt": <ms>,
|
|
27
|
+
* "account": { "userId": "...", "name": "..." },
|
|
28
|
+
* "events": [
|
|
29
|
+
* { "kind": "watch", "id": "...", "videoId": "...", "title": "...",
|
|
30
|
+
* "category": "movie|tv|variety|anime|...", "episode": "...",
|
|
31
|
+
* "channel": "...", "durationSec": N, "capturedAt": <s|ms> },
|
|
32
|
+
* { "kind": "favourite", "id": "...", "videoId": "...", "title": "...",
|
|
33
|
+
* "category": "...", "capturedAt": <ms> }
|
|
34
|
+
* ]
|
|
35
|
+
* }
|
|
36
|
+
*/
|
|
37
|
+
|
|
38
|
+
"use strict";
|
|
39
|
+
|
|
40
|
+
const fs = require("node:fs");
|
|
41
|
+
const { newId } = require("../ids");
|
|
42
|
+
const {
|
|
43
|
+
ENTITY_TYPES,
|
|
44
|
+
EVENT_SUBTYPES,
|
|
45
|
+
ITEM_SUBTYPES,
|
|
46
|
+
CAPTURED_BY,
|
|
47
|
+
} = require("../constants");
|
|
48
|
+
|
|
49
|
+
const SNAPSHOT_SCHEMA_VERSION = 1;
|
|
50
|
+
const KIND_WATCH = "watch";
|
|
51
|
+
const KIND_FAVOURITE = "favourite";
|
|
52
|
+
const VALID_SNAPSHOT_KINDS = Object.freeze([KIND_WATCH, KIND_FAVOURITE]);
|
|
53
|
+
const PAGE_SIZE = 30;
|
|
54
|
+
|
|
55
|
+
function parseTime(v) {
|
|
56
|
+
if (Number.isFinite(v)) return v > 1e12 ? v : v >= 1e9 ? v * 1000 : v;
|
|
57
|
+
if (typeof v === "string") {
|
|
58
|
+
if (/^\d+$/.test(v)) {
|
|
59
|
+
const n = parseInt(v, 10);
|
|
60
|
+
return n > 1e12 ? n : n >= 1e9 ? n * 1000 : n;
|
|
61
|
+
}
|
|
62
|
+
const t = Date.parse(v);
|
|
63
|
+
return Number.isFinite(t) ? t : null;
|
|
64
|
+
}
|
|
65
|
+
return null;
|
|
66
|
+
}
|
|
67
|
+
|
|
68
|
+
/**
|
|
69
|
+
* @param {object} config
|
|
70
|
+
* @param {string} config.NAME e.g. "video-iqiyi"
|
|
71
|
+
* @param {string} config.VERSION
|
|
72
|
+
* @param {string} config.platform e.g. "iqiyi"
|
|
73
|
+
* @param {string} config.watchUrl best-effort watch-history endpoint
|
|
74
|
+
* @param {string} config.favouriteUrl best-effort favourite/追剧 endpoint
|
|
75
|
+
* @param {(resp:any)=>any[]} config.extractItems
|
|
76
|
+
* @param {(raw:any)=>object|null} config.mapItem
|
|
77
|
+
* VideoRecord = { videoId, title, category, episode, channel, durationSec, url, occurredAt? }
|
|
78
|
+
*/
|
|
79
|
+
function createVideoAdapter(config) {
|
|
80
|
+
const { NAME, VERSION, platform, watchUrl, favouriteUrl, extractItems, mapItem } = config;
|
|
81
|
+
const { CookieAuth } = require("./shopping-base");
|
|
82
|
+
|
|
83
|
+
function stableOriginalId(kind, id) {
|
|
84
|
+
const safe =
|
|
85
|
+
(typeof id === "string" && id.length > 0 && id) ||
|
|
86
|
+
(typeof id === "number" && Number.isFinite(id) && String(id)) ||
|
|
87
|
+
`unknown-${Date.now()}-${Math.random().toString(36).slice(2, 8)}`;
|
|
88
|
+
return `${platform}:${kind}:${safe}`;
|
|
89
|
+
}
|
|
90
|
+
|
|
91
|
+
class VideoAdapter {
|
|
92
|
+
constructor(opts = {}) {
|
|
93
|
+
this.account = opts.account || null;
|
|
94
|
+
this._cookieAuth =
|
|
95
|
+
opts.account && opts.account.cookies
|
|
96
|
+
? new CookieAuth({ platform, cookies: opts.account.cookies })
|
|
97
|
+
: null;
|
|
98
|
+
this._fetchFn = typeof opts.fetchFn === "function" ? opts.fetchFn : defaultFetch;
|
|
99
|
+
this._signProvider =
|
|
100
|
+
typeof opts.signProvider === "function" ? opts.signProvider : null;
|
|
101
|
+
this._urls = {
|
|
102
|
+
watch: opts.watchUrl || watchUrl,
|
|
103
|
+
favourite: opts.favouriteUrl || favouriteUrl,
|
|
104
|
+
};
|
|
105
|
+
|
|
106
|
+
this.name = NAME;
|
|
107
|
+
this.version = VERSION;
|
|
108
|
+
this.capabilities = ["sync:snapshot", "sync:cookie-api", `parse:${platform}-watch`, `parse:${platform}-favourite`];
|
|
109
|
+
this.extractMode = "web-api";
|
|
110
|
+
this.rateLimits = {};
|
|
111
|
+
this.dataDisclosure = {
|
|
112
|
+
fields: [`${platform}:watch (title / category / episode / channel)`, `${platform}:favourite (title / category)`],
|
|
113
|
+
sensitivity: "low",
|
|
114
|
+
legalGate: false,
|
|
115
|
+
defaultInclude: { watch: true, favourite: true },
|
|
116
|
+
};
|
|
117
|
+
this._deps = { fs };
|
|
118
|
+
}
|
|
119
|
+
|
|
120
|
+
async authenticate(ctx = {}) {
|
|
121
|
+
if (ctx && typeof ctx.inputPath === "string" && ctx.inputPath.length > 0) {
|
|
122
|
+
try {
|
|
123
|
+
this._deps.fs.accessSync(ctx.inputPath, this._deps.fs.constants.R_OK);
|
|
124
|
+
} catch (err) {
|
|
125
|
+
return { ok: false, reason: "INPUT_PATH_UNREADABLE", message: `snapshot not readable at ${ctx.inputPath}: ${err.message}` };
|
|
126
|
+
}
|
|
127
|
+
return { ok: true, mode: "snapshot-file" };
|
|
128
|
+
}
|
|
129
|
+
if (this._cookieAuth) {
|
|
130
|
+
const ok = await this._cookieAuth.validate();
|
|
131
|
+
if (!ok) return { ok: false, reason: "INVALID_COOKIE", error: "cookies missing" };
|
|
132
|
+
return { ok: true, account: (this.account && this.account.userId) || null, mode: "cookie" };
|
|
133
|
+
}
|
|
134
|
+
return {
|
|
135
|
+
ok: false,
|
|
136
|
+
reason: "NO_INPUT",
|
|
137
|
+
message: `${NAME}.authenticate: needs opts.inputPath (snapshot mode) OR opts.account.cookies (cookie-api mode)`,
|
|
138
|
+
};
|
|
139
|
+
}
|
|
140
|
+
|
|
141
|
+
async healthCheck() {
|
|
142
|
+
if (this._cookieAuth) {
|
|
143
|
+
const r = await this.authenticate();
|
|
144
|
+
return r.ok ? { ok: true, lastChecked: Date.now() } : { ok: false, reason: r.reason, error: r.error };
|
|
145
|
+
}
|
|
146
|
+
return { ok: true, lastChecked: Date.now() };
|
|
147
|
+
}
|
|
148
|
+
|
|
149
|
+
async *sync(opts = {}) {
|
|
150
|
+
if (typeof opts.inputPath === "string" && opts.inputPath.length > 0) {
|
|
151
|
+
yield* this._syncViaSnapshot(opts);
|
|
152
|
+
return;
|
|
153
|
+
}
|
|
154
|
+
if (this._cookieAuth) {
|
|
155
|
+
yield* this._syncViaCookie(opts);
|
|
156
|
+
return;
|
|
157
|
+
}
|
|
158
|
+
throw new Error(`${NAME}.sync: needs opts.inputPath (snapshot mode) OR opts.account.cookies (cookie-api mode)`);
|
|
159
|
+
}
|
|
160
|
+
|
|
161
|
+
async *_syncViaSnapshot(opts) {
|
|
162
|
+
const raw = this._deps.fs.readFileSync(opts.inputPath, "utf-8");
|
|
163
|
+
const snapshot = JSON.parse(raw);
|
|
164
|
+
if (!snapshot || typeof snapshot !== "object" || snapshot.schemaVersion !== SNAPSHOT_SCHEMA_VERSION) {
|
|
165
|
+
throw new Error(
|
|
166
|
+
`${NAME}.sync: snapshot schemaVersion mismatch (got ${snapshot && snapshot.schemaVersion}, expected ${SNAPSHOT_SCHEMA_VERSION})`,
|
|
167
|
+
);
|
|
168
|
+
}
|
|
169
|
+
const fallback =
|
|
170
|
+
Number.isFinite(snapshot.snapshottedAt) && snapshot.snapshottedAt > 0
|
|
171
|
+
? Math.floor(snapshot.snapshottedAt)
|
|
172
|
+
: Date.now();
|
|
173
|
+
const account = snapshot.account && typeof snapshot.account === "object" ? snapshot.account : null;
|
|
174
|
+
const include = opts.include || {};
|
|
175
|
+
const limit = Number.isInteger(opts.limit) && opts.limit > 0 ? opts.limit : Infinity;
|
|
176
|
+
const events = Array.isArray(snapshot.events) ? snapshot.events : [];
|
|
177
|
+
let emitted = 0;
|
|
178
|
+
for (const ev of events) {
|
|
179
|
+
if (emitted >= limit) return;
|
|
180
|
+
if (!ev || typeof ev !== "object" || !VALID_SNAPSHOT_KINDS.includes(ev.kind)) continue;
|
|
181
|
+
if (include[ev.kind] === false) continue;
|
|
182
|
+
const id = (typeof ev.id === "string" && ev.id) || ev.videoId || null;
|
|
183
|
+
yield {
|
|
184
|
+
adapter: NAME,
|
|
185
|
+
kind: ev.kind,
|
|
186
|
+
originalId: stableOriginalId(ev.kind, id),
|
|
187
|
+
capturedAt: parseTime(ev.capturedAt) || fallback,
|
|
188
|
+
payload: { record: snapshotEventToRecord(ev), kind: ev.kind, account },
|
|
189
|
+
};
|
|
190
|
+
emitted += 1;
|
|
191
|
+
}
|
|
192
|
+
}
|
|
193
|
+
|
|
194
|
+
async *_syncViaCookie(opts = {}) {
|
|
195
|
+
if (!(await this._cookieAuth.validate())) return;
|
|
196
|
+
const cookies = this._cookieAuth.toHeader();
|
|
197
|
+
const include = opts.include || {};
|
|
198
|
+
const limit = Number.isInteger(opts.limit) && opts.limit > 0 ? opts.limit : Infinity;
|
|
199
|
+
const maxPages = Number.isInteger(opts.maxPages) && opts.maxPages > 0 ? opts.maxPages : 10;
|
|
200
|
+
|
|
201
|
+
const plan = [
|
|
202
|
+
{ kind: KIND_WATCH, url: this._urls.watch },
|
|
203
|
+
{ kind: KIND_FAVOURITE, url: this._urls.favourite },
|
|
204
|
+
];
|
|
205
|
+
|
|
206
|
+
let emitted = 0;
|
|
207
|
+
for (const step of plan) {
|
|
208
|
+
if (include[step.kind] === false) continue;
|
|
209
|
+
if (!step.url) continue;
|
|
210
|
+
let page = 1;
|
|
211
|
+
while (page <= maxPages) {
|
|
212
|
+
const query = { page, pageSize: PAGE_SIZE };
|
|
213
|
+
let sign = null;
|
|
214
|
+
if (this._signProvider) {
|
|
215
|
+
sign = await this._signProvider({ url: step.url, query, cookies });
|
|
216
|
+
}
|
|
217
|
+
const resp = await this._fetchFn({ url: step.url, cookies, query, sign });
|
|
218
|
+
const items = extractItems(resp) || [];
|
|
219
|
+
if (!items.length) break;
|
|
220
|
+
for (const it of items) {
|
|
221
|
+
const rec = mapItem(it);
|
|
222
|
+
if (!rec || !rec.videoId) continue;
|
|
223
|
+
if (emitted >= limit) return;
|
|
224
|
+
yield {
|
|
225
|
+
adapter: NAME,
|
|
226
|
+
kind: step.kind,
|
|
227
|
+
originalId: stableOriginalId(step.kind, rec.videoId),
|
|
228
|
+
capturedAt: rec.occurredAt || Date.now(),
|
|
229
|
+
payload: { record: rec, kind: step.kind },
|
|
230
|
+
};
|
|
231
|
+
emitted += 1;
|
|
232
|
+
}
|
|
233
|
+
if (items.length < PAGE_SIZE) break;
|
|
234
|
+
page += 1;
|
|
235
|
+
}
|
|
236
|
+
}
|
|
237
|
+
}
|
|
238
|
+
|
|
239
|
+
normalize(raw) {
|
|
240
|
+
if (!raw || !raw.payload || !raw.payload.record) {
|
|
241
|
+
throw new Error(`${NAME}.normalize: payload.record missing`);
|
|
242
|
+
}
|
|
243
|
+
const kind = raw.kind || raw.payload.kind;
|
|
244
|
+
const subtype = kind === KIND_FAVOURITE ? EVENT_SUBTYPES.LIKE : EVENT_SUBTYPES.MEDIA;
|
|
245
|
+
const verb = kind === KIND_FAVOURITE ? "收藏" : "观看";
|
|
246
|
+
return normalizeVideoRecord(raw.payload.record, raw, platform, NAME, VERSION, subtype, verb);
|
|
247
|
+
}
|
|
248
|
+
}
|
|
249
|
+
|
|
250
|
+
return VideoAdapter;
|
|
251
|
+
}
|
|
252
|
+
|
|
253
|
+
function snapshotEventToRecord(ev) {
|
|
254
|
+
return {
|
|
255
|
+
videoId: String(ev.videoId || ev.id || "unknown"),
|
|
256
|
+
title: ev.title || "(未知视频)",
|
|
257
|
+
category: ev.category || ev.type || null,
|
|
258
|
+
episode: ev.episode || null,
|
|
259
|
+
channel: ev.channel || ev.uploader || null,
|
|
260
|
+
durationSec: Number.isFinite(ev.durationSec) ? ev.durationSec : null,
|
|
261
|
+
url: ev.url || null,
|
|
262
|
+
occurredAt: parseTime(ev.capturedAt),
|
|
263
|
+
};
|
|
264
|
+
}
|
|
265
|
+
|
|
266
|
+
function normalizeVideoRecord(rec, raw, platform, NAME, VERSION, subtype, verb) {
|
|
267
|
+
const ingestedAt = Date.now();
|
|
268
|
+
const occurredAt = rec.occurredAt || raw.capturedAt || ingestedAt;
|
|
269
|
+
const source = {
|
|
270
|
+
adapter: NAME,
|
|
271
|
+
adapterVersion: VERSION,
|
|
272
|
+
originalId: raw.originalId,
|
|
273
|
+
capturedAt: raw.capturedAt || occurredAt,
|
|
274
|
+
capturedBy: CAPTURED_BY.API,
|
|
275
|
+
};
|
|
276
|
+
const title = rec.title || "(未知视频)";
|
|
277
|
+
const epSuffix = rec.episode ? ` ${rec.episode}` : "";
|
|
278
|
+
const itemId = `item-${platform}-video-${rec.videoId}`;
|
|
279
|
+
return {
|
|
280
|
+
events: [
|
|
281
|
+
{
|
|
282
|
+
id: newId(),
|
|
283
|
+
type: ENTITY_TYPES.EVENT,
|
|
284
|
+
subtype,
|
|
285
|
+
occurredAt,
|
|
286
|
+
actor: "person-self",
|
|
287
|
+
content: { title: `${verb}: ${title}${epSuffix}`, text: title },
|
|
288
|
+
ingestedAt,
|
|
289
|
+
source,
|
|
290
|
+
extra: {
|
|
291
|
+
platform,
|
|
292
|
+
videoId: rec.videoId,
|
|
293
|
+
category: rec.category || null,
|
|
294
|
+
episode: rec.episode || null,
|
|
295
|
+
channel: rec.channel || null,
|
|
296
|
+
durationSec: rec.durationSec != null ? rec.durationSec : null,
|
|
297
|
+
url: rec.url || null,
|
|
298
|
+
itemRef: itemId,
|
|
299
|
+
},
|
|
300
|
+
},
|
|
301
|
+
],
|
|
302
|
+
items: [
|
|
303
|
+
{
|
|
304
|
+
id: itemId,
|
|
305
|
+
type: ENTITY_TYPES.ITEM,
|
|
306
|
+
subtype: ITEM_SUBTYPES.MEDIA,
|
|
307
|
+
name: title,
|
|
308
|
+
ingestedAt,
|
|
309
|
+
source,
|
|
310
|
+
extra: { platform, kind: "video", videoId: rec.videoId, category: rec.category || null, channel: rec.channel || null },
|
|
311
|
+
},
|
|
312
|
+
],
|
|
313
|
+
persons: [],
|
|
314
|
+
places: [],
|
|
315
|
+
topics: [],
|
|
316
|
+
};
|
|
317
|
+
}
|
|
318
|
+
|
|
319
|
+
async function defaultFetch(_opts) {
|
|
320
|
+
throw new Error("video-base: no fetchFn configured for cookie-api mode");
|
|
321
|
+
}
|
|
322
|
+
|
|
323
|
+
module.exports = {
|
|
324
|
+
createVideoAdapter,
|
|
325
|
+
normalizeVideoRecord,
|
|
326
|
+
parseTime,
|
|
327
|
+
SNAPSHOT_SCHEMA_VERSION,
|
|
328
|
+
KIND_WATCH,
|
|
329
|
+
KIND_FAVOURITE,
|
|
330
|
+
VALID_SNAPSHOT_KINDS,
|
|
331
|
+
};
|