@chainlesschain/personal-data-hub 0.4.7 → 0.4.18
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/__tests__/adapters/doc-baidu-netdisk.test.js +102 -0
- package/__tests__/adapters/doc-platforms.test.js +177 -0
- package/__tests__/adapters/music-kugou.test.js +187 -0
- package/__tests__/adapters/recruit-boss.test.js +180 -0
- package/__tests__/adapters/shopping-dianping.test.js +239 -0
- package/__tests__/adapters/social-csdn.test.js +175 -0
- package/__tests__/adapters/social-zhihu.test.js +246 -0
- package/__tests__/adapters/travel-ctrip.test.js +175 -1
- package/__tests__/adapters/travel-didi.test.js +204 -0
- package/__tests__/adapters/travel-tongcheng.test.js +289 -0
- package/__tests__/adapters/video-platforms.test.js +152 -0
- package/lib/adapter-guide.js +13 -1
- package/lib/adapters/_document-base.js +370 -0
- package/lib/adapters/_video-base.js +331 -0
- package/lib/adapters/doc-baidu-netdisk/index.js +91 -0
- package/lib/adapters/doc-tencent-docs/index.js +94 -0
- package/lib/adapters/doc-wps/index.js +77 -0
- package/lib/adapters/music-kugou/index.js +418 -0
- package/lib/adapters/recruit-boss/index.js +442 -0
- package/lib/adapters/shopping-dianping/index.js +473 -0
- package/lib/adapters/social-csdn/index.js +444 -0
- package/lib/adapters/social-zhihu/index.js +488 -0
- package/lib/adapters/travel-ctrip/index.js +255 -40
- package/lib/adapters/travel-didi/index.js +327 -0
- package/lib/adapters/travel-tongcheng/index.js +393 -0
- package/lib/adapters/video-iqiyi/index.js +75 -0
- package/lib/adapters/video-tencent/index.js +78 -0
- package/lib/index.js +24 -0
- package/package.json +1 -1
|
@@ -0,0 +1,152 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
|
|
3
|
+
import { describe, it, expect } from "vitest";
|
|
4
|
+
const fs = require("node:fs");
|
|
5
|
+
const path = require("node:path");
|
|
6
|
+
const os = require("node:os");
|
|
7
|
+
const crypto = require("node:crypto");
|
|
8
|
+
|
|
9
|
+
const iqiyi = require("../../lib/adapters/video-iqiyi");
|
|
10
|
+
const tv = require("../../lib/adapters/video-tencent");
|
|
11
|
+
|
|
12
|
+
function writeTmp(content) {
|
|
13
|
+
const p = path.join(os.tmpdir(), `cc-vid-${crypto.randomUUID()}.json`);
|
|
14
|
+
fs.writeFileSync(p, content, "utf-8");
|
|
15
|
+
return p;
|
|
16
|
+
}
|
|
17
|
+
async function collect(gen) {
|
|
18
|
+
const out = [];
|
|
19
|
+
for await (const x of gen) out.push(x);
|
|
20
|
+
return out;
|
|
21
|
+
}
|
|
22
|
+
|
|
23
|
+
const COOKIES = "P00001=abc; QC005=xyz";
|
|
24
|
+
|
|
25
|
+
describe("video-iqiyi mappers", () => {
|
|
26
|
+
it("name/version + mapItem (channel code → category)", () => {
|
|
27
|
+
expect(iqiyi.NAME).toBe("video-iqiyi");
|
|
28
|
+
const rec = iqiyi.mapItem({ tvId: "100", albumName: "庆余年", channelId: 2, videoName: "庆余年", addtime: 1716300000, videoDuration: 2700 });
|
|
29
|
+
expect(rec).toMatchObject({ videoId: "100", title: "庆余年", category: "tv", durationSec: 2700 });
|
|
30
|
+
expect(rec.occurredAt).toBe(1716300000000);
|
|
31
|
+
expect(rec.url).toContain("iqiyi.com");
|
|
32
|
+
expect(iqiyi.mapItem({ albumName: "noid" })).toBe(null);
|
|
33
|
+
});
|
|
34
|
+
it("extractItems tolerant", () => {
|
|
35
|
+
expect(iqiyi.extractItems({ data: [{ tvId: 1 }] })).toHaveLength(1);
|
|
36
|
+
expect(iqiyi.extractItems({ data: { rc: [{ tvId: 1 }] } })).toHaveLength(1);
|
|
37
|
+
expect(iqiyi.extractItems({})).toEqual([]);
|
|
38
|
+
});
|
|
39
|
+
});
|
|
40
|
+
|
|
41
|
+
describe("video-tencent mappers", () => {
|
|
42
|
+
it("name/version + mapItem (typeId → category)", () => {
|
|
43
|
+
expect(tv.NAME).toBe("video-tencent");
|
|
44
|
+
const rec = tv.mapItem({ cid: "C9", cTitle: "三体", cTypeId: 4, viewTime: 1716310000, duration: 3000 });
|
|
45
|
+
expect(rec).toMatchObject({ videoId: "C9", title: "三体", category: "anime", durationSec: 3000 });
|
|
46
|
+
expect(rec.url).toContain("v.qq.com");
|
|
47
|
+
expect(tv.mapItem({ cTitle: "noid" })).toBe(null);
|
|
48
|
+
});
|
|
49
|
+
});
|
|
50
|
+
|
|
51
|
+
describe("IqiyiVideoAdapter (via _video-base)", () => {
|
|
52
|
+
const SNAP = JSON.stringify({
|
|
53
|
+
schemaVersion: 1,
|
|
54
|
+
snapshottedAt: 1716383000000,
|
|
55
|
+
account: { userId: "u1" },
|
|
56
|
+
events: [
|
|
57
|
+
{ kind: "watch", id: "w1", videoId: "V1", title: "狂飙", category: "tv", episode: "第5集", durationSec: 2600, capturedAt: 1716300000000 },
|
|
58
|
+
{ kind: "favourite", id: "fa1", videoId: "V2", title: "流浪地球2", category: "movie" },
|
|
59
|
+
],
|
|
60
|
+
});
|
|
61
|
+
|
|
62
|
+
it("snapshot sync 2 kinds + normalize watch→media / favourite→like + media item", async () => {
|
|
63
|
+
const p = writeTmp(SNAP);
|
|
64
|
+
try {
|
|
65
|
+
const a = new iqiyi.IqiyiVideoAdapter();
|
|
66
|
+
const items = await collect(a.sync({ inputPath: p }));
|
|
67
|
+
expect(items.map((x) => x.kind)).toEqual(["watch", "favourite"]);
|
|
68
|
+
|
|
69
|
+
const watch = a.normalize(items[0]);
|
|
70
|
+
expect(watch.events[0].subtype).toBe("media");
|
|
71
|
+
expect(watch.events[0].content.title).toBe("观看: 狂飙 第5集");
|
|
72
|
+
expect(watch.items[0].subtype).toBe("media");
|
|
73
|
+
expect(watch.items[0].extra.platform).toBe("iqiyi");
|
|
74
|
+
expect(watch.events[0].extra.itemRef).toBe(watch.items[0].id);
|
|
75
|
+
|
|
76
|
+
const fav = a.normalize(items[1]);
|
|
77
|
+
expect(fav.events[0].subtype).toBe("like");
|
|
78
|
+
expect(fav.events[0].content.title).toBe("收藏: 流浪地球2");
|
|
79
|
+
} finally {
|
|
80
|
+
fs.unlinkSync(p);
|
|
81
|
+
}
|
|
82
|
+
});
|
|
83
|
+
|
|
84
|
+
it("schema mismatch + unknown kind + include + limit", async () => {
|
|
85
|
+
const p = writeTmp(SNAP);
|
|
86
|
+
try {
|
|
87
|
+
const a = new iqiyi.IqiyiVideoAdapter();
|
|
88
|
+
expect((await collect(a.sync({ inputPath: p, include: { watch: false } }))).map((x) => x.kind)).toEqual(["favourite"]);
|
|
89
|
+
expect(await collect(a.sync({ inputPath: p, limit: 1 }))).toHaveLength(1);
|
|
90
|
+
expect(() => a.normalize({ payload: {} })).toThrow(/payload.record missing/);
|
|
91
|
+
} finally {
|
|
92
|
+
fs.unlinkSync(p);
|
|
93
|
+
}
|
|
94
|
+
const bad = writeTmp(JSON.stringify({ schemaVersion: 9, events: [] }));
|
|
95
|
+
try {
|
|
96
|
+
const a = new iqiyi.IqiyiVideoAdapter();
|
|
97
|
+
await expect(collect(a.sync({ inputPath: bad }))).rejects.toThrow(/schemaVersion mismatch/);
|
|
98
|
+
} finally {
|
|
99
|
+
fs.unlinkSync(bad);
|
|
100
|
+
}
|
|
101
|
+
});
|
|
102
|
+
});
|
|
103
|
+
|
|
104
|
+
describe("TencentVideoAdapter cookie-api mode", () => {
|
|
105
|
+
it("authenticate cookie (userId optional)", async () => {
|
|
106
|
+
const a = new tv.TencentVideoAdapter({ account: { cookies: COOKIES } });
|
|
107
|
+
expect(await a.authenticate()).toEqual({ ok: true, account: null, mode: "cookie" });
|
|
108
|
+
});
|
|
109
|
+
|
|
110
|
+
it("sync fetches watch+favourite, paginates, normalizes", async () => {
|
|
111
|
+
const byUrl = (u) => (u.includes("History") ? "watch" : "favourite");
|
|
112
|
+
const data = {
|
|
113
|
+
watch: [{ cid: "C1", cTitle: "漫长的季节", cTypeId: 1, viewTime: 1716300000 }],
|
|
114
|
+
favourite: [{ cid: "C2", cTitle: "繁花", cTypeId: 1 }],
|
|
115
|
+
};
|
|
116
|
+
const calls = [];
|
|
117
|
+
const a = new tv.TencentVideoAdapter({
|
|
118
|
+
account: { cookies: COOKIES, userId: "u1" },
|
|
119
|
+
fetchFn: async ({ url, cookies, query, sign }) => {
|
|
120
|
+
const k = byUrl(url);
|
|
121
|
+
calls.push({ k, cookies, page: query.page, sign });
|
|
122
|
+
return { data: { list: query.page === 1 ? data[k] : [] } };
|
|
123
|
+
},
|
|
124
|
+
});
|
|
125
|
+
const items = await collect(a.sync({}));
|
|
126
|
+
expect(items.map((x) => x.kind).sort()).toEqual(["favourite", "watch"]);
|
|
127
|
+
expect(calls.every((c) => c.cookies === COOKIES && c.sign === null)).toBe(true);
|
|
128
|
+
const watch = a.normalize(items.find((x) => x.kind === "watch"));
|
|
129
|
+
expect(watch.events[0].content.title).toBe("观看: 漫长的季节");
|
|
130
|
+
expect(watch.items[0].extra.platform).toBe("tencent-video");
|
|
131
|
+
});
|
|
132
|
+
|
|
133
|
+
it("invokes signProvider + limit + empty + default fetch + no input", async () => {
|
|
134
|
+
const signCalls = [];
|
|
135
|
+
const a = new tv.TencentVideoAdapter({
|
|
136
|
+
account: { cookies: COOKIES },
|
|
137
|
+
fetchFn: async ({ query }) => ({ list: query.page === 1 ? [{ cid: "C1", cTitle: "x" }, { cid: "C2", cTitle: "y" }] : [] }),
|
|
138
|
+
signProvider: async (ctx) => { signCalls.push(ctx); return "sig"; },
|
|
139
|
+
});
|
|
140
|
+
expect(await collect(a.sync({ limit: 1, include: { favourite: false } }))).toHaveLength(1);
|
|
141
|
+
expect(signCalls.length).toBeGreaterThan(0);
|
|
142
|
+
|
|
143
|
+
const a2 = new tv.TencentVideoAdapter({ account: { cookies: COOKIES }, fetchFn: async () => "<html>login</html>" });
|
|
144
|
+
expect(await collect(a2.sync({}))).toEqual([]);
|
|
145
|
+
|
|
146
|
+
const a3 = new tv.TencentVideoAdapter({ account: { cookies: COOKIES } });
|
|
147
|
+
await expect(collect(a3.sync({}))).rejects.toThrow(/no fetchFn configured/);
|
|
148
|
+
|
|
149
|
+
const a4 = new tv.TencentVideoAdapter();
|
|
150
|
+
await expect(collect(a4.sync({}))).rejects.toThrow(/needs opts.inputPath/);
|
|
151
|
+
});
|
|
152
|
+
});
|
package/lib/adapter-guide.js
CHANGED
|
@@ -32,6 +32,9 @@ const { READINESS_CATEGORY } = require("./adapter-readiness");
|
|
|
32
32
|
const DISPLAY_NAMES = Object.freeze({
|
|
33
33
|
"social-bilibili": "哔哩哔哩",
|
|
34
34
|
"social-weibo": "微博",
|
|
35
|
+
"social-zhihu": "知乎",
|
|
36
|
+
"recruit-boss": "BOSS 直聘",
|
|
37
|
+
"social-csdn": "CSDN",
|
|
35
38
|
"social-douyin": "抖音",
|
|
36
39
|
"social-xiaohongshu": "小红书",
|
|
37
40
|
"social-toutiao": "今日头条",
|
|
@@ -51,8 +54,11 @@ const DISPLAY_NAMES = Object.freeze({
|
|
|
51
54
|
"shopping-jd": "京东",
|
|
52
55
|
"shopping-meituan": "美团",
|
|
53
56
|
"shopping-pinduoduo": "拼多多",
|
|
57
|
+
"shopping-dianping": "大众点评",
|
|
54
58
|
"travel-12306": "12306 铁路",
|
|
55
59
|
"travel-ctrip": "携程",
|
|
60
|
+
"travel-tongcheng": "同程旅行",
|
|
61
|
+
"travel-didi": "滴滴企业版",
|
|
56
62
|
"travel-amap": "高德地图",
|
|
57
63
|
"travel-baidu-map": "百度地图",
|
|
58
64
|
"travel-tencent-map": "腾讯地图",
|
|
@@ -63,7 +69,13 @@ const DISPLAY_NAMES = Object.freeze({
|
|
|
63
69
|
"ai-chat-history": "AI 对话历史",
|
|
64
70
|
"apple-health": "Apple 健康",
|
|
65
71
|
"netease-music": "网易云音乐",
|
|
72
|
+
"music-kugou": "酷狗音乐",
|
|
73
|
+
"video-iqiyi": "爱奇艺",
|
|
74
|
+
"video-tencent": "腾讯视频",
|
|
66
75
|
"weread": "微信读书",
|
|
76
|
+
"doc-wps": "WPS 云文档",
|
|
77
|
+
"doc-tencent-docs": "腾讯文档",
|
|
78
|
+
"doc-baidu-netdisk": "百度网盘",
|
|
67
79
|
"browser-history-chrome": "Chrome 浏览历史",
|
|
68
80
|
"browser-history-edge": "Edge 浏览历史",
|
|
69
81
|
"vscode": "VS Code",
|
|
@@ -533,7 +545,7 @@ function getAdapterGuide(name, category) {
|
|
|
533
545
|
// usable standalone, e.g. CLI without a live readiness probe).
|
|
534
546
|
function _inferCategory(name) {
|
|
535
547
|
if (ADAPTER_OVERRIDES[name] && name === "wechat") return READINESS_CATEGORY.DEVICE;
|
|
536
|
-
if (/^(email-imap|finance-alipay|alipay-bill|ai-chat-history|weread)$/.test(name))
|
|
548
|
+
if (/^(email-imap|finance-alipay|alipay-bill|ai-chat-history|weread|doc-wps|doc-tencent-docs|doc-baidu-netdisk|recruit-boss|social-csdn)$/.test(name))
|
|
537
549
|
return READINESS_CATEGORY.CREDENTIAL;
|
|
538
550
|
if (/^(messaging-(telegram|whatsapp)|wechat|wechat-pc|messaging-qq|qq-pc|dingtalk-pc|feishu-pc|travel-amap)$/.test(name))
|
|
539
551
|
return READINESS_CATEGORY.DEVICE;
|
|
@@ -0,0 +1,370 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* _document-base — shared infrastructure for "own-document list" adapters
|
|
3
|
+
* (WPS 云文档 / 腾讯文档 / etc.), Phase 13+ §12.1 "自创文档列表".
|
|
4
|
+
*
|
|
5
|
+
* These platforms all expose the same shape of personal data: a paginated list
|
|
6
|
+
* of documents the user created/owns (title, type, create/modify time, url).
|
|
7
|
+
* Rather than copy ~300 lines per platform (mirroring shopping-base /
|
|
8
|
+
* travel-base / _local-im-pc-adapter), `createDocumentAdapter(config)` returns
|
|
9
|
+
* a fully-formed adapter class with snapshot + cookie-api modes; each platform
|
|
10
|
+
* supplies only its endpoint + field mapping.
|
|
11
|
+
*
|
|
12
|
+
* 1. snapshot mode (opts.inputPath): JSON schemaVersion 1, stateless.
|
|
13
|
+
* 2. cookie-api mode (opts.account.cookies): fetch the owner's document list
|
|
14
|
+
* via the injected `fetchFn` (Android in-APK cc → OkHttp; desktop hub →
|
|
15
|
+
* Electron WebView net request), paginate, map each doc → a DocumentRecord.
|
|
16
|
+
* A sign seam (opts.signProvider) covers any anti-bot token; best-effort
|
|
17
|
+
* unsigned when absent. Endpoint overridable via opts.listUrl (best-effort,
|
|
18
|
+
* not field-verified — FAMILY-23 playbook).
|
|
19
|
+
*
|
|
20
|
+
* normalize() emits, per document: an authoring EVENT (subtype POST) + an ITEM
|
|
21
|
+
* (subtype DOCUMENT), mirroring netease-music's event+item dual-emit so the
|
|
22
|
+
* vault can both timeline "我创建了 X" and list the document entity.
|
|
23
|
+
*
|
|
24
|
+
* Snapshot schema (schemaVersion 1):
|
|
25
|
+
* {
|
|
26
|
+
* "schemaVersion": 1, "snapshottedAt": <ms>,
|
|
27
|
+
* "account": { "userId": "...", "name": "..." },
|
|
28
|
+
* "events": [
|
|
29
|
+
* { "kind": "document", "id": "doc-<id>", "docId": "...", "title": "...",
|
|
30
|
+
* "docType": "doc|sheet|slide|pdf|form|...", "url": "...",
|
|
31
|
+
* "createdTime": <s|ms>, "updatedTime": <s|ms> }
|
|
32
|
+
* ]
|
|
33
|
+
* }
|
|
34
|
+
*/
|
|
35
|
+
|
|
36
|
+
"use strict";
|
|
37
|
+
|
|
38
|
+
const fs = require("node:fs");
|
|
39
|
+
const { newId } = require("../ids");
|
|
40
|
+
const {
|
|
41
|
+
ENTITY_TYPES,
|
|
42
|
+
EVENT_SUBTYPES,
|
|
43
|
+
ITEM_SUBTYPES,
|
|
44
|
+
CAPTURED_BY,
|
|
45
|
+
} = require("../constants");
|
|
46
|
+
|
|
47
|
+
const SNAPSHOT_SCHEMA_VERSION = 1;
|
|
48
|
+
const KIND_DOCUMENT = "document";
|
|
49
|
+
const VALID_SNAPSHOT_KINDS = Object.freeze([KIND_DOCUMENT]);
|
|
50
|
+
const PAGE_SIZE = 20;
|
|
51
|
+
|
|
52
|
+
function parseTime(v) {
|
|
53
|
+
if (Number.isFinite(v)) return v > 1e12 ? v : v * 1000;
|
|
54
|
+
if (typeof v === "string") {
|
|
55
|
+
if (/^\d+$/.test(v)) {
|
|
56
|
+
const n = parseInt(v, 10);
|
|
57
|
+
return n > 1e12 ? n : n * 1000;
|
|
58
|
+
}
|
|
59
|
+
const t = Date.parse(v);
|
|
60
|
+
return Number.isFinite(t) ? t : null;
|
|
61
|
+
}
|
|
62
|
+
return null;
|
|
63
|
+
}
|
|
64
|
+
|
|
65
|
+
/**
|
|
66
|
+
* Build a document-list adapter class.
|
|
67
|
+
*
|
|
68
|
+
* @param {object} config
|
|
69
|
+
* @param {string} config.NAME adapter name, e.g. "doc-wps"
|
|
70
|
+
* @param {string} config.VERSION semver string
|
|
71
|
+
* @param {string} config.platform short platform id, e.g. "wps"
|
|
72
|
+
* @param {string} config.defaultListUrl best-effort list endpoint
|
|
73
|
+
* @param {(resp:any)=>any[]} config.extractDocs pull the doc array from a response
|
|
74
|
+
* @param {(raw:any)=>object|null} config.mapDoc map a raw doc → DocumentRecord
|
|
75
|
+
* DocumentRecord = { docId, title, docType, url, createdMs, updatedMs, extra? }
|
|
76
|
+
*/
|
|
77
|
+
function createDocumentAdapter(config) {
|
|
78
|
+
const {
|
|
79
|
+
NAME,
|
|
80
|
+
VERSION,
|
|
81
|
+
platform,
|
|
82
|
+
defaultListUrl,
|
|
83
|
+
extractDocs,
|
|
84
|
+
mapDoc,
|
|
85
|
+
} = config;
|
|
86
|
+
|
|
87
|
+
const { CookieAuth } = require("./shopping-base");
|
|
88
|
+
|
|
89
|
+
function stableOriginalId(id) {
|
|
90
|
+
const safe =
|
|
91
|
+
(typeof id === "string" && id.length > 0 && id) ||
|
|
92
|
+
(typeof id === "number" && Number.isFinite(id) && String(id)) ||
|
|
93
|
+
`unknown-${Date.now()}-${Math.random().toString(36).slice(2, 8)}`;
|
|
94
|
+
return `${platform}:document:${safe}`;
|
|
95
|
+
}
|
|
96
|
+
|
|
97
|
+
class DocumentAdapter {
|
|
98
|
+
constructor(opts = {}) {
|
|
99
|
+
this.account = opts.account || null;
|
|
100
|
+
this._cookieAuth =
|
|
101
|
+
opts.account && opts.account.cookies
|
|
102
|
+
? new CookieAuth({ platform, cookies: opts.account.cookies })
|
|
103
|
+
: null;
|
|
104
|
+
this._fetchFn = typeof opts.fetchFn === "function" ? opts.fetchFn : defaultFetch;
|
|
105
|
+
this._signProvider =
|
|
106
|
+
typeof opts.signProvider === "function" ? opts.signProvider : null;
|
|
107
|
+
this._listUrl =
|
|
108
|
+
typeof opts.listUrl === "string" && opts.listUrl.length > 0
|
|
109
|
+
? opts.listUrl
|
|
110
|
+
: defaultListUrl;
|
|
111
|
+
|
|
112
|
+
this.name = NAME;
|
|
113
|
+
this.version = VERSION;
|
|
114
|
+
this.capabilities = ["sync:snapshot", "sync:cookie-api", `parse:${platform}-documents`];
|
|
115
|
+
this.extractMode = "web-api";
|
|
116
|
+
this.rateLimits = { perMinute: 8, perDay: 200 };
|
|
117
|
+
this.dataDisclosure = {
|
|
118
|
+
fields: [`${platform}:document (title / docType / createdTime / url)`],
|
|
119
|
+
sensitivity: "medium",
|
|
120
|
+
legalGate: false,
|
|
121
|
+
defaultInclude: { document: true },
|
|
122
|
+
};
|
|
123
|
+
|
|
124
|
+
this._deps = { fs };
|
|
125
|
+
}
|
|
126
|
+
|
|
127
|
+
async authenticate(ctx = {}) {
|
|
128
|
+
if (ctx && typeof ctx.inputPath === "string" && ctx.inputPath.length > 0) {
|
|
129
|
+
try {
|
|
130
|
+
this._deps.fs.accessSync(ctx.inputPath, this._deps.fs.constants.R_OK);
|
|
131
|
+
} catch (err) {
|
|
132
|
+
return {
|
|
133
|
+
ok: false,
|
|
134
|
+
reason: "INPUT_PATH_UNREADABLE",
|
|
135
|
+
message: `snapshot not readable at ${ctx.inputPath}: ${err.message}`,
|
|
136
|
+
};
|
|
137
|
+
}
|
|
138
|
+
return { ok: true, mode: "snapshot-file" };
|
|
139
|
+
}
|
|
140
|
+
if (this._cookieAuth) {
|
|
141
|
+
const ok = await this._cookieAuth.validate();
|
|
142
|
+
if (!ok) return { ok: false, reason: "INVALID_COOKIE", error: "cookies missing" };
|
|
143
|
+
return {
|
|
144
|
+
ok: true,
|
|
145
|
+
account: (this.account && this.account.userId) || null,
|
|
146
|
+
mode: "cookie",
|
|
147
|
+
};
|
|
148
|
+
}
|
|
149
|
+
return {
|
|
150
|
+
ok: false,
|
|
151
|
+
reason: "NO_INPUT",
|
|
152
|
+
message: `${NAME}.authenticate: needs opts.inputPath (snapshot mode) OR opts.account.cookies (cookie-api mode)`,
|
|
153
|
+
};
|
|
154
|
+
}
|
|
155
|
+
|
|
156
|
+
async healthCheck() {
|
|
157
|
+
if (this._cookieAuth) {
|
|
158
|
+
const r = await this.authenticate();
|
|
159
|
+
return r.ok
|
|
160
|
+
? { ok: true, lastChecked: Date.now() }
|
|
161
|
+
: { ok: false, reason: r.reason, error: r.error };
|
|
162
|
+
}
|
|
163
|
+
return { ok: true, lastChecked: Date.now() };
|
|
164
|
+
}
|
|
165
|
+
|
|
166
|
+
async *sync(opts = {}) {
|
|
167
|
+
if (typeof opts.inputPath === "string" && opts.inputPath.length > 0) {
|
|
168
|
+
yield* this._syncViaSnapshot(opts);
|
|
169
|
+
return;
|
|
170
|
+
}
|
|
171
|
+
if (this._cookieAuth) {
|
|
172
|
+
yield* this._syncViaCookie(opts);
|
|
173
|
+
return;
|
|
174
|
+
}
|
|
175
|
+
throw new Error(
|
|
176
|
+
`${NAME}.sync: needs opts.inputPath (snapshot mode) OR opts.account.cookies (cookie-api mode)`,
|
|
177
|
+
);
|
|
178
|
+
}
|
|
179
|
+
|
|
180
|
+
async *_syncViaSnapshot(opts) {
|
|
181
|
+
const raw = this._deps.fs.readFileSync(opts.inputPath, "utf-8");
|
|
182
|
+
const snapshot = JSON.parse(raw);
|
|
183
|
+
if (
|
|
184
|
+
!snapshot ||
|
|
185
|
+
typeof snapshot !== "object" ||
|
|
186
|
+
snapshot.schemaVersion !== SNAPSHOT_SCHEMA_VERSION
|
|
187
|
+
) {
|
|
188
|
+
throw new Error(
|
|
189
|
+
`${NAME}.sync: snapshot schemaVersion mismatch (got ${snapshot && snapshot.schemaVersion}, expected ${SNAPSHOT_SCHEMA_VERSION})`,
|
|
190
|
+
);
|
|
191
|
+
}
|
|
192
|
+
const fallbackCapturedAt =
|
|
193
|
+
Number.isFinite(snapshot.snapshottedAt) && snapshot.snapshottedAt > 0
|
|
194
|
+
? Math.floor(snapshot.snapshottedAt)
|
|
195
|
+
: Date.now();
|
|
196
|
+
const account =
|
|
197
|
+
snapshot.account && typeof snapshot.account === "object" ? snapshot.account : null;
|
|
198
|
+
const include = opts.include || {};
|
|
199
|
+
const limit = Number.isInteger(opts.limit) && opts.limit > 0 ? opts.limit : Infinity;
|
|
200
|
+
|
|
201
|
+
const events = Array.isArray(snapshot.events) ? snapshot.events : [];
|
|
202
|
+
let emitted = 0;
|
|
203
|
+
for (const ev of events) {
|
|
204
|
+
if (emitted >= limit) return;
|
|
205
|
+
if (!ev || typeof ev !== "object") continue;
|
|
206
|
+
if (!VALID_SNAPSHOT_KINDS.includes(ev.kind)) continue;
|
|
207
|
+
if (include[ev.kind] === false) continue;
|
|
208
|
+
|
|
209
|
+
const capturedAt =
|
|
210
|
+
parseTime(ev.capturedAt) ||
|
|
211
|
+
parseTime(ev.updatedTime) ||
|
|
212
|
+
parseTime(ev.createdTime) ||
|
|
213
|
+
fallbackCapturedAt;
|
|
214
|
+
const id =
|
|
215
|
+
(typeof ev.id === "string" && ev.id.length > 0 && ev.id) || ev.docId || null;
|
|
216
|
+
|
|
217
|
+
yield {
|
|
218
|
+
adapter: NAME,
|
|
219
|
+
kind: KIND_DOCUMENT,
|
|
220
|
+
originalId: stableOriginalId(id),
|
|
221
|
+
capturedAt,
|
|
222
|
+
payload: { record: snapshotEventToRecord(ev), account },
|
|
223
|
+
};
|
|
224
|
+
emitted += 1;
|
|
225
|
+
}
|
|
226
|
+
}
|
|
227
|
+
|
|
228
|
+
async *_syncViaCookie(opts = {}) {
|
|
229
|
+
if (!(await this._cookieAuth.validate())) return;
|
|
230
|
+
const cookies = this._cookieAuth.toHeader();
|
|
231
|
+
const include = opts.include || {};
|
|
232
|
+
if (include[KIND_DOCUMENT] === false) return;
|
|
233
|
+
const sinceMs =
|
|
234
|
+
opts.sinceWatermark != null
|
|
235
|
+
? parseInt(String(opts.sinceWatermark), 10) || 0
|
|
236
|
+
: 0;
|
|
237
|
+
const limit = Number.isInteger(opts.limit) && opts.limit > 0 ? opts.limit : Infinity;
|
|
238
|
+
const maxPages =
|
|
239
|
+
Number.isInteger(opts.maxPages) && opts.maxPages > 0 ? opts.maxPages : 20;
|
|
240
|
+
|
|
241
|
+
let emitted = 0;
|
|
242
|
+
let offset = 0;
|
|
243
|
+
let page = 0;
|
|
244
|
+
while (page < maxPages) {
|
|
245
|
+
const query = { offset, limit: PAGE_SIZE };
|
|
246
|
+
let sign = null;
|
|
247
|
+
if (this._signProvider) {
|
|
248
|
+
sign = await this._signProvider({ url: this._listUrl, query, cookies });
|
|
249
|
+
}
|
|
250
|
+
const resp = await this._fetchFn({ url: this._listUrl, cookies, query, sign });
|
|
251
|
+
const docs = extractDocs(resp) || [];
|
|
252
|
+
if (!docs.length) break;
|
|
253
|
+
let reachedWatermark = false;
|
|
254
|
+
for (const d of docs) {
|
|
255
|
+
const rec = mapDoc(d);
|
|
256
|
+
if (!rec || !rec.docId) continue;
|
|
257
|
+
const ts = rec.updatedMs || rec.createdMs || null;
|
|
258
|
+
if (sinceMs && ts && ts < sinceMs) {
|
|
259
|
+
reachedWatermark = true;
|
|
260
|
+
break;
|
|
261
|
+
}
|
|
262
|
+
if (emitted >= limit) return;
|
|
263
|
+
yield {
|
|
264
|
+
adapter: NAME,
|
|
265
|
+
kind: KIND_DOCUMENT,
|
|
266
|
+
originalId: stableOriginalId(rec.docId),
|
|
267
|
+
capturedAt: ts || Date.now(),
|
|
268
|
+
payload: { record: rec },
|
|
269
|
+
};
|
|
270
|
+
emitted += 1;
|
|
271
|
+
}
|
|
272
|
+
if (reachedWatermark || docs.length < PAGE_SIZE) break;
|
|
273
|
+
offset += docs.length;
|
|
274
|
+
page += 1;
|
|
275
|
+
}
|
|
276
|
+
}
|
|
277
|
+
|
|
278
|
+
normalize(raw) {
|
|
279
|
+
if (!raw || !raw.payload || !raw.payload.record) {
|
|
280
|
+
throw new Error(`${NAME}.normalize: payload.record missing`);
|
|
281
|
+
}
|
|
282
|
+
return normalizeDocumentRecord(raw.payload.record, raw, platform, NAME, VERSION);
|
|
283
|
+
}
|
|
284
|
+
}
|
|
285
|
+
|
|
286
|
+
return DocumentAdapter;
|
|
287
|
+
}
|
|
288
|
+
|
|
289
|
+
/** Snapshot event fields → DocumentRecord (the shape mapDoc also produces). */
|
|
290
|
+
function snapshotEventToRecord(ev) {
|
|
291
|
+
return {
|
|
292
|
+
docId: String(ev.docId || ev.id || "unknown"),
|
|
293
|
+
title: ev.title || "(无标题)",
|
|
294
|
+
docType: ev.docType || ev.type || "doc",
|
|
295
|
+
url: ev.url || null,
|
|
296
|
+
createdMs: parseTime(ev.createdTime),
|
|
297
|
+
updatedMs: parseTime(ev.updatedTime),
|
|
298
|
+
extra: ev.extra && typeof ev.extra === "object" ? ev.extra : {},
|
|
299
|
+
};
|
|
300
|
+
}
|
|
301
|
+
|
|
302
|
+
function normalizeDocumentRecord(rec, raw, platform, NAME, VERSION) {
|
|
303
|
+
const ingestedAt = Date.now();
|
|
304
|
+
const occurredAt = rec.updatedMs || rec.createdMs || raw.capturedAt || ingestedAt;
|
|
305
|
+
const source = {
|
|
306
|
+
adapter: NAME,
|
|
307
|
+
adapterVersion: VERSION,
|
|
308
|
+
originalId: raw.originalId,
|
|
309
|
+
capturedAt: raw.capturedAt || occurredAt,
|
|
310
|
+
capturedBy: CAPTURED_BY.API,
|
|
311
|
+
};
|
|
312
|
+
const title = rec.title || "(无标题)";
|
|
313
|
+
const itemId = `item-${platform}-doc-${rec.docId}`;
|
|
314
|
+
return {
|
|
315
|
+
events: [
|
|
316
|
+
{
|
|
317
|
+
id: newId(),
|
|
318
|
+
type: ENTITY_TYPES.EVENT,
|
|
319
|
+
subtype: EVENT_SUBTYPES.POST,
|
|
320
|
+
occurredAt,
|
|
321
|
+
actor: "person-self",
|
|
322
|
+
content: { title: `文档: ${title}`, text: title },
|
|
323
|
+
ingestedAt,
|
|
324
|
+
source,
|
|
325
|
+
extra: {
|
|
326
|
+
platform,
|
|
327
|
+
docId: rec.docId,
|
|
328
|
+
docType: rec.docType || "doc",
|
|
329
|
+
url: rec.url || null,
|
|
330
|
+
createdMs: rec.createdMs || null,
|
|
331
|
+
updatedMs: rec.updatedMs || null,
|
|
332
|
+
itemRef: itemId,
|
|
333
|
+
},
|
|
334
|
+
},
|
|
335
|
+
],
|
|
336
|
+
items: [
|
|
337
|
+
{
|
|
338
|
+
id: itemId,
|
|
339
|
+
type: ENTITY_TYPES.ITEM,
|
|
340
|
+
subtype: ITEM_SUBTYPES.DOCUMENT,
|
|
341
|
+
name: title,
|
|
342
|
+
ingestedAt,
|
|
343
|
+
source,
|
|
344
|
+
extra: {
|
|
345
|
+
platform,
|
|
346
|
+
docId: rec.docId,
|
|
347
|
+
docType: rec.docType || "doc",
|
|
348
|
+
url: rec.url || null,
|
|
349
|
+
...(rec.extra || {}),
|
|
350
|
+
},
|
|
351
|
+
},
|
|
352
|
+
],
|
|
353
|
+
persons: [],
|
|
354
|
+
places: [],
|
|
355
|
+
topics: [],
|
|
356
|
+
};
|
|
357
|
+
}
|
|
358
|
+
|
|
359
|
+
async function defaultFetch(_opts) {
|
|
360
|
+
throw new Error("document-base: no fetchFn configured for cookie-api mode");
|
|
361
|
+
}
|
|
362
|
+
|
|
363
|
+
module.exports = {
|
|
364
|
+
createDocumentAdapter,
|
|
365
|
+
normalizeDocumentRecord,
|
|
366
|
+
parseTime,
|
|
367
|
+
SNAPSHOT_SCHEMA_VERSION,
|
|
368
|
+
KIND_DOCUMENT,
|
|
369
|
+
VALID_SNAPSHOT_KINDS,
|
|
370
|
+
};
|