@chainlesschain/personal-data-hub 0.4.7 → 0.4.23
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/__tests__/adapters/biz-tianyancha.test.js +159 -0
- package/__tests__/adapters/doc-baidu-netdisk.test.js +102 -0
- package/__tests__/adapters/doc-camscanner.test.js +147 -0
- package/__tests__/adapters/doc-platforms.test.js +177 -0
- package/__tests__/adapters/gov-ixiamen.test.js +150 -0
- package/__tests__/adapters/gov-tax.test.js +135 -0
- package/__tests__/adapters/health-meiyou.test.js +125 -0
- package/__tests__/adapters/music-kugou.test.js +187 -0
- package/__tests__/adapters/recruit-boss.test.js +180 -0
- package/__tests__/adapters/shopping-dianping.test.js +239 -0
- package/__tests__/adapters/social-csdn.test.js +175 -0
- package/__tests__/adapters/social-dongchedi.test.js +165 -0
- package/__tests__/adapters/social-zhihu.test.js +246 -0
- package/__tests__/adapters/travel-ctrip.test.js +175 -1
- package/__tests__/adapters/travel-didi.test.js +204 -0
- package/__tests__/adapters/travel-tongcheng.test.js +289 -0
- package/__tests__/adapters/video-platforms.test.js +152 -0
- package/__tests__/adapters/video-xigua.test.js +106 -0
- package/__tests__/adapters/wework-pc.test.js +124 -0
- package/lib/adapter-guide.js +25 -3
- package/lib/adapters/_document-base.js +370 -0
- package/lib/adapters/_video-base.js +331 -0
- package/lib/adapters/biz-tianyancha/index.js +348 -0
- package/lib/adapters/doc-baidu-netdisk/index.js +91 -0
- package/lib/adapters/doc-camscanner/index.js +102 -0
- package/lib/adapters/doc-tencent-docs/index.js +94 -0
- package/lib/adapters/doc-wps/index.js +77 -0
- package/lib/adapters/gov-ixiamen/index.js +380 -0
- package/lib/adapters/gov-tax/index.js +451 -0
- package/lib/adapters/health-meiyou/index.js +393 -0
- package/lib/adapters/music-kugou/index.js +418 -0
- package/lib/adapters/recruit-boss/index.js +442 -0
- package/lib/adapters/shopping-dianping/index.js +473 -0
- package/lib/adapters/social-csdn/index.js +444 -0
- package/lib/adapters/social-dongchedi/index.js +360 -0
- package/lib/adapters/social-zhihu/index.js +488 -0
- package/lib/adapters/travel-ctrip/index.js +255 -40
- package/lib/adapters/travel-didi/index.js +327 -0
- package/lib/adapters/travel-tongcheng/index.js +393 -0
- package/lib/adapters/video-iqiyi/index.js +75 -0
- package/lib/adapters/video-tencent/index.js +78 -0
- package/lib/adapters/video-xigua/index.js +68 -0
- package/lib/adapters/wework-pc/index.js +31 -0
- package/lib/index.js +40 -0
- package/package.json +1 -1
|
@@ -0,0 +1,106 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
|
|
3
|
+
import { describe, it, expect } from "vitest";
|
|
4
|
+
const fs = require("node:fs");
|
|
5
|
+
const path = require("node:path");
|
|
6
|
+
const os = require("node:os");
|
|
7
|
+
const crypto = require("node:crypto");
|
|
8
|
+
|
|
9
|
+
const { XiguaVideoAdapter, extractItems, mapItem, NAME, VERSION } = require("../../lib/adapters/video-xigua");
|
|
10
|
+
|
|
11
|
+
function writeTmp(content) {
|
|
12
|
+
const p = path.join(os.tmpdir(), `cc-xig-${crypto.randomUUID()}.json`);
|
|
13
|
+
fs.writeFileSync(p, content, "utf-8");
|
|
14
|
+
return p;
|
|
15
|
+
}
|
|
16
|
+
async function collect(gen) {
|
|
17
|
+
const out = [];
|
|
18
|
+
for await (const x of gen) out.push(x);
|
|
19
|
+
return out;
|
|
20
|
+
}
|
|
21
|
+
|
|
22
|
+
const COOKIES = "sid_tt=abc; ttwid=xyz";
|
|
23
|
+
|
|
24
|
+
describe("video-xigua mappers", () => {
|
|
25
|
+
it("name/version", () => {
|
|
26
|
+
expect(NAME).toBe("video-xigua");
|
|
27
|
+
expect(VERSION).toBe("0.1.0");
|
|
28
|
+
});
|
|
29
|
+
it("mapItem reads nested article + bytedance fields", () => {
|
|
30
|
+
const rec = mapItem({ behot_time: 1716300000, article: { group_id: "G1", title: "城市骑行 vlog", video_duration: 620, user_name: "骑行小王" } });
|
|
31
|
+
expect(rec).toMatchObject({ videoId: "G1", title: "城市骑行 vlog", durationSec: 620, channel: "骑行小王" });
|
|
32
|
+
expect(rec.occurredAt).toBe(1716300000000);
|
|
33
|
+
expect(rec.url).toContain("ixigua.com");
|
|
34
|
+
expect(mapItem({ article: { title: "noid" } })).toBe(null);
|
|
35
|
+
});
|
|
36
|
+
it("mapItem reads flat item too", () => {
|
|
37
|
+
const rec = mapItem({ group_id: "G2", title: "测评", duration: 300, create_time: 1716310000 });
|
|
38
|
+
expect(rec).toMatchObject({ videoId: "G2", title: "测评", durationSec: 300 });
|
|
39
|
+
});
|
|
40
|
+
it("extractItems tolerant", () => {
|
|
41
|
+
expect(extractItems({ data: { history: [{ group_id: 1 }] } })).toHaveLength(1);
|
|
42
|
+
expect(extractItems({ data: { favorites: [{ group_id: 1 }] } })).toHaveLength(1);
|
|
43
|
+
expect(extractItems({})).toEqual([]);
|
|
44
|
+
});
|
|
45
|
+
});
|
|
46
|
+
|
|
47
|
+
describe("XiguaVideoAdapter (via _video-base)", () => {
|
|
48
|
+
const SNAP = JSON.stringify({
|
|
49
|
+
schemaVersion: 1,
|
|
50
|
+
snapshottedAt: 1716383000000,
|
|
51
|
+
account: { userId: "u1" },
|
|
52
|
+
events: [
|
|
53
|
+
{ kind: "watch", id: "w1", videoId: "V1", title: "纪录片:长江", category: "documentary", durationSec: 3600, capturedAt: 1716300000000 },
|
|
54
|
+
{ kind: "favourite", id: "fa1", videoId: "V2", title: "搞笑合集" },
|
|
55
|
+
],
|
|
56
|
+
});
|
|
57
|
+
|
|
58
|
+
it("snapshot sync 2 kinds + normalize watch→media / favourite→like", async () => {
|
|
59
|
+
const p = writeTmp(SNAP);
|
|
60
|
+
try {
|
|
61
|
+
const a = new XiguaVideoAdapter();
|
|
62
|
+
const items = await collect(a.sync({ inputPath: p }));
|
|
63
|
+
expect(items.map((x) => x.kind)).toEqual(["watch", "favourite"]);
|
|
64
|
+
const w = a.normalize(items[0]);
|
|
65
|
+
expect(w.events[0].subtype).toBe("media");
|
|
66
|
+
expect(w.events[0].content.title).toBe("观看: 纪录片:长江");
|
|
67
|
+
expect(w.items[0].subtype).toBe("media");
|
|
68
|
+
expect(w.items[0].extra.platform).toBe("xigua");
|
|
69
|
+
const fav = a.normalize(items[1]);
|
|
70
|
+
expect(fav.events[0].subtype).toBe("like");
|
|
71
|
+
expect(fav.events[0].content.title).toBe("收藏: 搞笑合集");
|
|
72
|
+
} finally {
|
|
73
|
+
fs.unlinkSync(p);
|
|
74
|
+
}
|
|
75
|
+
});
|
|
76
|
+
|
|
77
|
+
it("cookie-api fetch + normalize", async () => {
|
|
78
|
+
const byUrl = (u) => (u.includes("history") ? "watch" : "favourite");
|
|
79
|
+
const data = {
|
|
80
|
+
watch: [{ behot_time: 1716300000, article: { group_id: "C1", title: "汽车评测", video_duration: 500 } }],
|
|
81
|
+
favourite: [{ group_id: "C2", title: "美食教程" }],
|
|
82
|
+
};
|
|
83
|
+
const calls = [];
|
|
84
|
+
const a = new XiguaVideoAdapter({
|
|
85
|
+
account: { cookies: COOKIES, userId: "u1" },
|
|
86
|
+
fetchFn: async ({ url, cookies, query, sign }) => {
|
|
87
|
+
const k = byUrl(url);
|
|
88
|
+
calls.push({ k, cookies, page: query.page, sign });
|
|
89
|
+
return { data: { list: query.page === 1 ? data[k] : [] } };
|
|
90
|
+
},
|
|
91
|
+
});
|
|
92
|
+
expect(await a.authenticate()).toEqual({ ok: true, account: "u1", mode: "cookie" });
|
|
93
|
+
const items = await collect(a.sync({}));
|
|
94
|
+
expect(items.map((x) => x.kind).sort()).toEqual(["favourite", "watch"]);
|
|
95
|
+
expect(calls.every((c) => c.cookies === COOKIES && c.sign === null)).toBe(true);
|
|
96
|
+
const w = a.normalize(items.find((x) => x.kind === "watch"));
|
|
97
|
+
expect(w.events[0].content.title).toBe("观看: 汽车评测");
|
|
98
|
+
});
|
|
99
|
+
|
|
100
|
+
it("default fetch throws; no input throws", async () => {
|
|
101
|
+
const a = new XiguaVideoAdapter({ account: { cookies: COOKIES } });
|
|
102
|
+
await expect(collect(a.sync({}))).rejects.toThrow(/no fetchFn configured/);
|
|
103
|
+
const b = new XiguaVideoAdapter();
|
|
104
|
+
await expect(collect(b.sync({}))).rejects.toThrow(/needs opts.inputPath/);
|
|
105
|
+
});
|
|
106
|
+
});
|
|
@@ -0,0 +1,124 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
|
|
3
|
+
import { describe, it, expect } from "vitest";
|
|
4
|
+
|
|
5
|
+
const { WeWorkPcAdapter, NAME, VERSION } = require("../../lib/adapters/wework-pc");
|
|
6
|
+
const { partitionBatch } = require("../../lib/batch");
|
|
7
|
+
|
|
8
|
+
// fake driver answering sqlite_master + table_info + SELECT * by table
|
|
9
|
+
function makeFakeDb(spec) {
|
|
10
|
+
class FakeStmt {
|
|
11
|
+
constructor(sql) {
|
|
12
|
+
this.sql = sql;
|
|
13
|
+
}
|
|
14
|
+
all() {
|
|
15
|
+
const s = this.sql;
|
|
16
|
+
if (/type='table'/.test(s)) return (spec.tables || []).map((n) => ({ name: n }));
|
|
17
|
+
const ti = s.match(/table_info\("(\w+)"\)/);
|
|
18
|
+
if (ti) return spec.cols[ti[1]] || [];
|
|
19
|
+
const fr = s.match(/FROM "(\w+)"/);
|
|
20
|
+
if (fr) return spec.rows[fr[1]] || [];
|
|
21
|
+
return [];
|
|
22
|
+
}
|
|
23
|
+
get() {
|
|
24
|
+
return { n: 1 };
|
|
25
|
+
}
|
|
26
|
+
}
|
|
27
|
+
return class FakeDb {
|
|
28
|
+
// eslint-disable-next-line no-unused-vars
|
|
29
|
+
constructor(_p, _o) {}
|
|
30
|
+
prepare(sql) {
|
|
31
|
+
return new FakeStmt(sql);
|
|
32
|
+
}
|
|
33
|
+
pragma() {}
|
|
34
|
+
exec() {}
|
|
35
|
+
close() {}
|
|
36
|
+
};
|
|
37
|
+
}
|
|
38
|
+
|
|
39
|
+
// WeChat Work-ish message table: matches pattern + has time/sender/peer/content.
|
|
40
|
+
const SPEC = {
|
|
41
|
+
tables: ["chat_message", "session_meta", "sqlite_sequence"],
|
|
42
|
+
cols: {
|
|
43
|
+
chat_message: [
|
|
44
|
+
{ name: "localId" },
|
|
45
|
+
{ name: "createTime" },
|
|
46
|
+
{ name: "sender" },
|
|
47
|
+
{ name: "conversationId" },
|
|
48
|
+
{ name: "content" },
|
|
49
|
+
],
|
|
50
|
+
session_meta: [{ name: "vid" }, { name: "name" }],
|
|
51
|
+
},
|
|
52
|
+
rows: {
|
|
53
|
+
chat_message: [
|
|
54
|
+
{ localId: "m1", createTime: 1700000000, sender: "u1", conversationId: "c1", content: "项目周会 10 点" },
|
|
55
|
+
{ localId: "m2", createTime: 1700000010, sender: "u2", conversationId: "c1", content: "收到" },
|
|
56
|
+
],
|
|
57
|
+
},
|
|
58
|
+
};
|
|
59
|
+
|
|
60
|
+
function adapter(spec, { exists = true } = {}) {
|
|
61
|
+
const a = new WeWorkPcAdapter({ dbPath: "/fake.db" });
|
|
62
|
+
a._deps.fs = { existsSync: () => exists, accessSync: () => {}, constants: { R_OK: 4 } };
|
|
63
|
+
a._deps.dbDriverFactory = () => makeFakeDb(spec);
|
|
64
|
+
return a;
|
|
65
|
+
}
|
|
66
|
+
|
|
67
|
+
async function collect(iter) {
|
|
68
|
+
const out = [];
|
|
69
|
+
for await (const r of iter) out.push(r);
|
|
70
|
+
return out;
|
|
71
|
+
}
|
|
72
|
+
|
|
73
|
+
describe("WeWorkPcAdapter (企业微信 honest best-effort)", () => {
|
|
74
|
+
it("exposes name/version", () => {
|
|
75
|
+
expect(NAME).toBe("wework-pc");
|
|
76
|
+
expect(VERSION).toBe("0.1.0");
|
|
77
|
+
expect(new WeWorkPcAdapter().name).toBe("wework-pc");
|
|
78
|
+
});
|
|
79
|
+
|
|
80
|
+
it("no-arg construct + device-pull + legalGate + APP_NOT_INSTALLED readiness", async () => {
|
|
81
|
+
const a = new WeWorkPcAdapter();
|
|
82
|
+
a._deps.discoveryDeps = {
|
|
83
|
+
fs: { existsSync: () => false, readdirSync: () => [], statSync: () => ({ size: 0 }), constants: { R_OK: 4 } },
|
|
84
|
+
home: "/no-home",
|
|
85
|
+
env: {},
|
|
86
|
+
};
|
|
87
|
+
expect(a.extractMode).toBe("device-pull");
|
|
88
|
+
expect(a.dataDisclosure.legalGate).toBe(true);
|
|
89
|
+
const r = await a.authenticate({ readinessOnly: true });
|
|
90
|
+
expect(r.reason).toBe("APP_NOT_INSTALLED");
|
|
91
|
+
});
|
|
92
|
+
|
|
93
|
+
it("reads messages → valid events, platform=wework, raw preserved", async () => {
|
|
94
|
+
const a = adapter(SPEC);
|
|
95
|
+
const raws = await collect(a.sync({ dbPath: "/fake.db" }));
|
|
96
|
+
expect(raws).toHaveLength(2);
|
|
97
|
+
const merged = { events: [], persons: [], places: [], items: [], topics: [] };
|
|
98
|
+
for (const r of raws) {
|
|
99
|
+
const n = a.normalize(r);
|
|
100
|
+
for (const k of Object.keys(merged)) merged[k].push(...n[k]);
|
|
101
|
+
}
|
|
102
|
+
const { valid, invalidReasons } = partitionBatch(merged);
|
|
103
|
+
expect(invalidReasons).toHaveLength(0);
|
|
104
|
+
expect(valid.events).toHaveLength(2);
|
|
105
|
+
expect(valid.events[0].extra.platform).toBe("wework");
|
|
106
|
+
expect(valid.events[0].extra.textResolved).toBe(true);
|
|
107
|
+
expect(valid.events[0].extra.rawRow).toBeTruthy();
|
|
108
|
+
});
|
|
109
|
+
|
|
110
|
+
it("emits local-im-read progress diagnostic", async () => {
|
|
111
|
+
const a = adapter(SPEC);
|
|
112
|
+
const ev = [];
|
|
113
|
+
await collect(a.sync({ dbPath: "/fake.db", onProgress: (e) => ev.push(e) }));
|
|
114
|
+
const d = ev.find((e) => e.phase === "local-im-read");
|
|
115
|
+
expect(d.messageTables).toContain("chat_message");
|
|
116
|
+
expect(d.messageCount).toBe(2);
|
|
117
|
+
});
|
|
118
|
+
|
|
119
|
+
it("missing db yields nothing; unknown kind throws", async () => {
|
|
120
|
+
const a = adapter(SPEC, { exists: false });
|
|
121
|
+
expect(await collect(a.sync({ dbPath: "/no.db" }))).toHaveLength(0);
|
|
122
|
+
expect(() => new WeWorkPcAdapter().normalize({ kind: "x", payload: { kind: "x" } })).toThrow(/unknown kind/);
|
|
123
|
+
});
|
|
124
|
+
});
|
package/lib/adapter-guide.js
CHANGED
|
@@ -32,6 +32,11 @@ const { READINESS_CATEGORY } = require("./adapter-readiness");
|
|
|
32
32
|
const DISPLAY_NAMES = Object.freeze({
|
|
33
33
|
"social-bilibili": "哔哩哔哩",
|
|
34
34
|
"social-weibo": "微博",
|
|
35
|
+
"social-zhihu": "知乎",
|
|
36
|
+
"recruit-boss": "BOSS 直聘",
|
|
37
|
+
"social-csdn": "CSDN",
|
|
38
|
+
"social-dongchedi": "懂车帝",
|
|
39
|
+
"biz-tianyancha": "天眼查",
|
|
35
40
|
"social-douyin": "抖音",
|
|
36
41
|
"social-xiaohongshu": "小红书",
|
|
37
42
|
"social-toutiao": "今日头条",
|
|
@@ -44,6 +49,7 @@ const DISPLAY_NAMES = Object.freeze({
|
|
|
44
49
|
"qq-pc": "QQ(电脑版 NT)",
|
|
45
50
|
"dingtalk-pc": "钉钉(电脑版)",
|
|
46
51
|
"feishu-pc": "飞书(电脑版)",
|
|
52
|
+
"wework-pc": "企业微信(电脑版)",
|
|
47
53
|
"email-imap": "邮箱(IMAP)",
|
|
48
54
|
"finance-alipay": "支付宝",
|
|
49
55
|
"alipay-bill": "支付宝账单",
|
|
@@ -51,8 +57,11 @@ const DISPLAY_NAMES = Object.freeze({
|
|
|
51
57
|
"shopping-jd": "京东",
|
|
52
58
|
"shopping-meituan": "美团",
|
|
53
59
|
"shopping-pinduoduo": "拼多多",
|
|
60
|
+
"shopping-dianping": "大众点评",
|
|
54
61
|
"travel-12306": "12306 铁路",
|
|
55
62
|
"travel-ctrip": "携程",
|
|
63
|
+
"travel-tongcheng": "同程旅行",
|
|
64
|
+
"travel-didi": "滴滴企业版",
|
|
56
65
|
"travel-amap": "高德地图",
|
|
57
66
|
"travel-baidu-map": "百度地图",
|
|
58
67
|
"travel-tencent-map": "腾讯地图",
|
|
@@ -63,7 +72,18 @@ const DISPLAY_NAMES = Object.freeze({
|
|
|
63
72
|
"ai-chat-history": "AI 对话历史",
|
|
64
73
|
"apple-health": "Apple 健康",
|
|
65
74
|
"netease-music": "网易云音乐",
|
|
75
|
+
"music-kugou": "酷狗音乐",
|
|
76
|
+
"video-iqiyi": "爱奇艺",
|
|
77
|
+
"video-tencent": "腾讯视频",
|
|
78
|
+
"video-xigua": "西瓜视频",
|
|
66
79
|
"weread": "微信读书",
|
|
80
|
+
"doc-wps": "WPS 云文档",
|
|
81
|
+
"doc-tencent-docs": "腾讯文档",
|
|
82
|
+
"doc-baidu-netdisk": "百度网盘",
|
|
83
|
+
"doc-camscanner": "扫描全能王",
|
|
84
|
+
"gov-ixiamen": "i厦门",
|
|
85
|
+
"health-meiyou": "美柚",
|
|
86
|
+
"gov-tax": "个人所得税",
|
|
67
87
|
"browser-history-chrome": "Chrome 浏览历史",
|
|
68
88
|
"browser-history-edge": "Edge 浏览历史",
|
|
69
89
|
"vscode": "VS Code",
|
|
@@ -76,7 +96,8 @@ const DISPLAY_NAMES = Object.freeze({
|
|
|
76
96
|
|
|
77
97
|
// Shared guide for honest best-effort desktop IM local-DB sources (钉钉/飞书).
|
|
78
98
|
function localImPcGuide(platform) {
|
|
79
|
-
const adapterName =
|
|
99
|
+
const adapterName =
|
|
100
|
+
platform === "钉钉" ? "dingtalk-pc" : platform === "企业微信" ? "wework-pc" : "feishu-pc";
|
|
80
101
|
return {
|
|
81
102
|
summary: `采集${platform}电脑版的聊天记录(来自本地数据库)。⚠️ v0.1 实验性:${platform}桌面库为私有结构、可能加密、随版本变化,文本解析为尽力而为,原始行会完整保留以便后续解析。`,
|
|
82
103
|
methods: [
|
|
@@ -394,6 +415,7 @@ const ADAPTER_OVERRIDES = Object.freeze({
|
|
|
394
415
|
|
|
395
416
|
"dingtalk-pc": localImPcGuide("钉钉"),
|
|
396
417
|
"feishu-pc": localImPcGuide("飞书"),
|
|
418
|
+
"wework-pc": localImPcGuide("企业微信"),
|
|
397
419
|
|
|
398
420
|
"social-bilibili": socialAdbGuide("哔哩哔哩", "观看历史 / 收藏 / 动态 / 关注"),
|
|
399
421
|
"social-weibo": socialAdbGuide("微博", "微博 / 收藏 / 关注"),
|
|
@@ -533,9 +555,9 @@ function getAdapterGuide(name, category) {
|
|
|
533
555
|
// usable standalone, e.g. CLI without a live readiness probe).
|
|
534
556
|
function _inferCategory(name) {
|
|
535
557
|
if (ADAPTER_OVERRIDES[name] && name === "wechat") return READINESS_CATEGORY.DEVICE;
|
|
536
|
-
if (/^(email-imap|finance-alipay|alipay-bill|ai-chat-history|weread)$/.test(name))
|
|
558
|
+
if (/^(email-imap|finance-alipay|alipay-bill|ai-chat-history|weread|doc-wps|doc-tencent-docs|doc-baidu-netdisk|doc-camscanner|recruit-boss|social-csdn|social-dongchedi|biz-tianyancha|gov-ixiamen|health-meiyou|gov-tax)$/.test(name))
|
|
537
559
|
return READINESS_CATEGORY.CREDENTIAL;
|
|
538
|
-
if (/^(messaging-(telegram|whatsapp)|wechat|wechat-pc|messaging-qq|qq-pc|dingtalk-pc|feishu-pc|travel-amap)$/.test(name))
|
|
560
|
+
if (/^(messaging-(telegram|whatsapp)|wechat|wechat-pc|messaging-qq|qq-pc|dingtalk-pc|feishu-pc|wework-pc|travel-amap)$/.test(name))
|
|
539
561
|
return READINESS_CATEGORY.DEVICE;
|
|
540
562
|
if (
|
|
541
563
|
/^(browser-history-|vscode|win-recent|git-activity|shell-history|local-files|apple-health)/.test(
|
|
@@ -0,0 +1,370 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* _document-base — shared infrastructure for "own-document list" adapters
|
|
3
|
+
* (WPS 云文档 / 腾讯文档 / etc.), Phase 13+ §12.1 "自创文档列表".
|
|
4
|
+
*
|
|
5
|
+
* These platforms all expose the same shape of personal data: a paginated list
|
|
6
|
+
* of documents the user created/owns (title, type, create/modify time, url).
|
|
7
|
+
* Rather than copy ~300 lines per platform (mirroring shopping-base /
|
|
8
|
+
* travel-base / _local-im-pc-adapter), `createDocumentAdapter(config)` returns
|
|
9
|
+
* a fully-formed adapter class with snapshot + cookie-api modes; each platform
|
|
10
|
+
* supplies only its endpoint + field mapping.
|
|
11
|
+
*
|
|
12
|
+
* 1. snapshot mode (opts.inputPath): JSON schemaVersion 1, stateless.
|
|
13
|
+
* 2. cookie-api mode (opts.account.cookies): fetch the owner's document list
|
|
14
|
+
* via the injected `fetchFn` (Android in-APK cc → OkHttp; desktop hub →
|
|
15
|
+
* Electron WebView net request), paginate, map each doc → a DocumentRecord.
|
|
16
|
+
* A sign seam (opts.signProvider) covers any anti-bot token; best-effort
|
|
17
|
+
* unsigned when absent. Endpoint overridable via opts.listUrl (best-effort,
|
|
18
|
+
* not field-verified — FAMILY-23 playbook).
|
|
19
|
+
*
|
|
20
|
+
* normalize() emits, per document: an authoring EVENT (subtype POST) + an ITEM
|
|
21
|
+
* (subtype DOCUMENT), mirroring netease-music's event+item dual-emit so the
|
|
22
|
+
* vault can both timeline "我创建了 X" and list the document entity.
|
|
23
|
+
*
|
|
24
|
+
* Snapshot schema (schemaVersion 1):
|
|
25
|
+
* {
|
|
26
|
+
* "schemaVersion": 1, "snapshottedAt": <ms>,
|
|
27
|
+
* "account": { "userId": "...", "name": "..." },
|
|
28
|
+
* "events": [
|
|
29
|
+
* { "kind": "document", "id": "doc-<id>", "docId": "...", "title": "...",
|
|
30
|
+
* "docType": "doc|sheet|slide|pdf|form|...", "url": "...",
|
|
31
|
+
* "createdTime": <s|ms>, "updatedTime": <s|ms> }
|
|
32
|
+
* ]
|
|
33
|
+
* }
|
|
34
|
+
*/
|
|
35
|
+
|
|
36
|
+
"use strict";
|
|
37
|
+
|
|
38
|
+
const fs = require("node:fs");
|
|
39
|
+
const { newId } = require("../ids");
|
|
40
|
+
const {
|
|
41
|
+
ENTITY_TYPES,
|
|
42
|
+
EVENT_SUBTYPES,
|
|
43
|
+
ITEM_SUBTYPES,
|
|
44
|
+
CAPTURED_BY,
|
|
45
|
+
} = require("../constants");
|
|
46
|
+
|
|
47
|
+
const SNAPSHOT_SCHEMA_VERSION = 1;
|
|
48
|
+
const KIND_DOCUMENT = "document";
|
|
49
|
+
const VALID_SNAPSHOT_KINDS = Object.freeze([KIND_DOCUMENT]);
|
|
50
|
+
const PAGE_SIZE = 20;
|
|
51
|
+
|
|
52
|
+
function parseTime(v) {
|
|
53
|
+
if (Number.isFinite(v)) return v > 1e12 ? v : v * 1000;
|
|
54
|
+
if (typeof v === "string") {
|
|
55
|
+
if (/^\d+$/.test(v)) {
|
|
56
|
+
const n = parseInt(v, 10);
|
|
57
|
+
return n > 1e12 ? n : n * 1000;
|
|
58
|
+
}
|
|
59
|
+
const t = Date.parse(v);
|
|
60
|
+
return Number.isFinite(t) ? t : null;
|
|
61
|
+
}
|
|
62
|
+
return null;
|
|
63
|
+
}
|
|
64
|
+
|
|
65
|
+
/**
|
|
66
|
+
* Build a document-list adapter class.
|
|
67
|
+
*
|
|
68
|
+
* @param {object} config
|
|
69
|
+
* @param {string} config.NAME adapter name, e.g. "doc-wps"
|
|
70
|
+
* @param {string} config.VERSION semver string
|
|
71
|
+
* @param {string} config.platform short platform id, e.g. "wps"
|
|
72
|
+
* @param {string} config.defaultListUrl best-effort list endpoint
|
|
73
|
+
* @param {(resp:any)=>any[]} config.extractDocs pull the doc array from a response
|
|
74
|
+
* @param {(raw:any)=>object|null} config.mapDoc map a raw doc → DocumentRecord
|
|
75
|
+
* DocumentRecord = { docId, title, docType, url, createdMs, updatedMs, extra? }
|
|
76
|
+
*/
|
|
77
|
+
function createDocumentAdapter(config) {
|
|
78
|
+
const {
|
|
79
|
+
NAME,
|
|
80
|
+
VERSION,
|
|
81
|
+
platform,
|
|
82
|
+
defaultListUrl,
|
|
83
|
+
extractDocs,
|
|
84
|
+
mapDoc,
|
|
85
|
+
} = config;
|
|
86
|
+
|
|
87
|
+
const { CookieAuth } = require("./shopping-base");
|
|
88
|
+
|
|
89
|
+
function stableOriginalId(id) {
|
|
90
|
+
const safe =
|
|
91
|
+
(typeof id === "string" && id.length > 0 && id) ||
|
|
92
|
+
(typeof id === "number" && Number.isFinite(id) && String(id)) ||
|
|
93
|
+
`unknown-${Date.now()}-${Math.random().toString(36).slice(2, 8)}`;
|
|
94
|
+
return `${platform}:document:${safe}`;
|
|
95
|
+
}
|
|
96
|
+
|
|
97
|
+
class DocumentAdapter {
|
|
98
|
+
constructor(opts = {}) {
|
|
99
|
+
this.account = opts.account || null;
|
|
100
|
+
this._cookieAuth =
|
|
101
|
+
opts.account && opts.account.cookies
|
|
102
|
+
? new CookieAuth({ platform, cookies: opts.account.cookies })
|
|
103
|
+
: null;
|
|
104
|
+
this._fetchFn = typeof opts.fetchFn === "function" ? opts.fetchFn : defaultFetch;
|
|
105
|
+
this._signProvider =
|
|
106
|
+
typeof opts.signProvider === "function" ? opts.signProvider : null;
|
|
107
|
+
this._listUrl =
|
|
108
|
+
typeof opts.listUrl === "string" && opts.listUrl.length > 0
|
|
109
|
+
? opts.listUrl
|
|
110
|
+
: defaultListUrl;
|
|
111
|
+
|
|
112
|
+
this.name = NAME;
|
|
113
|
+
this.version = VERSION;
|
|
114
|
+
this.capabilities = ["sync:snapshot", "sync:cookie-api", `parse:${platform}-documents`];
|
|
115
|
+
this.extractMode = "web-api";
|
|
116
|
+
this.rateLimits = { perMinute: 8, perDay: 200 };
|
|
117
|
+
this.dataDisclosure = {
|
|
118
|
+
fields: [`${platform}:document (title / docType / createdTime / url)`],
|
|
119
|
+
sensitivity: "medium",
|
|
120
|
+
legalGate: false,
|
|
121
|
+
defaultInclude: { document: true },
|
|
122
|
+
};
|
|
123
|
+
|
|
124
|
+
this._deps = { fs };
|
|
125
|
+
}
|
|
126
|
+
|
|
127
|
+
async authenticate(ctx = {}) {
|
|
128
|
+
if (ctx && typeof ctx.inputPath === "string" && ctx.inputPath.length > 0) {
|
|
129
|
+
try {
|
|
130
|
+
this._deps.fs.accessSync(ctx.inputPath, this._deps.fs.constants.R_OK);
|
|
131
|
+
} catch (err) {
|
|
132
|
+
return {
|
|
133
|
+
ok: false,
|
|
134
|
+
reason: "INPUT_PATH_UNREADABLE",
|
|
135
|
+
message: `snapshot not readable at ${ctx.inputPath}: ${err.message}`,
|
|
136
|
+
};
|
|
137
|
+
}
|
|
138
|
+
return { ok: true, mode: "snapshot-file" };
|
|
139
|
+
}
|
|
140
|
+
if (this._cookieAuth) {
|
|
141
|
+
const ok = await this._cookieAuth.validate();
|
|
142
|
+
if (!ok) return { ok: false, reason: "INVALID_COOKIE", error: "cookies missing" };
|
|
143
|
+
return {
|
|
144
|
+
ok: true,
|
|
145
|
+
account: (this.account && this.account.userId) || null,
|
|
146
|
+
mode: "cookie",
|
|
147
|
+
};
|
|
148
|
+
}
|
|
149
|
+
return {
|
|
150
|
+
ok: false,
|
|
151
|
+
reason: "NO_INPUT",
|
|
152
|
+
message: `${NAME}.authenticate: needs opts.inputPath (snapshot mode) OR opts.account.cookies (cookie-api mode)`,
|
|
153
|
+
};
|
|
154
|
+
}
|
|
155
|
+
|
|
156
|
+
async healthCheck() {
|
|
157
|
+
if (this._cookieAuth) {
|
|
158
|
+
const r = await this.authenticate();
|
|
159
|
+
return r.ok
|
|
160
|
+
? { ok: true, lastChecked: Date.now() }
|
|
161
|
+
: { ok: false, reason: r.reason, error: r.error };
|
|
162
|
+
}
|
|
163
|
+
return { ok: true, lastChecked: Date.now() };
|
|
164
|
+
}
|
|
165
|
+
|
|
166
|
+
async *sync(opts = {}) {
|
|
167
|
+
if (typeof opts.inputPath === "string" && opts.inputPath.length > 0) {
|
|
168
|
+
yield* this._syncViaSnapshot(opts);
|
|
169
|
+
return;
|
|
170
|
+
}
|
|
171
|
+
if (this._cookieAuth) {
|
|
172
|
+
yield* this._syncViaCookie(opts);
|
|
173
|
+
return;
|
|
174
|
+
}
|
|
175
|
+
throw new Error(
|
|
176
|
+
`${NAME}.sync: needs opts.inputPath (snapshot mode) OR opts.account.cookies (cookie-api mode)`,
|
|
177
|
+
);
|
|
178
|
+
}
|
|
179
|
+
|
|
180
|
+
async *_syncViaSnapshot(opts) {
|
|
181
|
+
const raw = this._deps.fs.readFileSync(opts.inputPath, "utf-8");
|
|
182
|
+
const snapshot = JSON.parse(raw);
|
|
183
|
+
if (
|
|
184
|
+
!snapshot ||
|
|
185
|
+
typeof snapshot !== "object" ||
|
|
186
|
+
snapshot.schemaVersion !== SNAPSHOT_SCHEMA_VERSION
|
|
187
|
+
) {
|
|
188
|
+
throw new Error(
|
|
189
|
+
`${NAME}.sync: snapshot schemaVersion mismatch (got ${snapshot && snapshot.schemaVersion}, expected ${SNAPSHOT_SCHEMA_VERSION})`,
|
|
190
|
+
);
|
|
191
|
+
}
|
|
192
|
+
const fallbackCapturedAt =
|
|
193
|
+
Number.isFinite(snapshot.snapshottedAt) && snapshot.snapshottedAt > 0
|
|
194
|
+
? Math.floor(snapshot.snapshottedAt)
|
|
195
|
+
: Date.now();
|
|
196
|
+
const account =
|
|
197
|
+
snapshot.account && typeof snapshot.account === "object" ? snapshot.account : null;
|
|
198
|
+
const include = opts.include || {};
|
|
199
|
+
const limit = Number.isInteger(opts.limit) && opts.limit > 0 ? opts.limit : Infinity;
|
|
200
|
+
|
|
201
|
+
const events = Array.isArray(snapshot.events) ? snapshot.events : [];
|
|
202
|
+
let emitted = 0;
|
|
203
|
+
for (const ev of events) {
|
|
204
|
+
if (emitted >= limit) return;
|
|
205
|
+
if (!ev || typeof ev !== "object") continue;
|
|
206
|
+
if (!VALID_SNAPSHOT_KINDS.includes(ev.kind)) continue;
|
|
207
|
+
if (include[ev.kind] === false) continue;
|
|
208
|
+
|
|
209
|
+
const capturedAt =
|
|
210
|
+
parseTime(ev.capturedAt) ||
|
|
211
|
+
parseTime(ev.updatedTime) ||
|
|
212
|
+
parseTime(ev.createdTime) ||
|
|
213
|
+
fallbackCapturedAt;
|
|
214
|
+
const id =
|
|
215
|
+
(typeof ev.id === "string" && ev.id.length > 0 && ev.id) || ev.docId || null;
|
|
216
|
+
|
|
217
|
+
yield {
|
|
218
|
+
adapter: NAME,
|
|
219
|
+
kind: KIND_DOCUMENT,
|
|
220
|
+
originalId: stableOriginalId(id),
|
|
221
|
+
capturedAt,
|
|
222
|
+
payload: { record: snapshotEventToRecord(ev), account },
|
|
223
|
+
};
|
|
224
|
+
emitted += 1;
|
|
225
|
+
}
|
|
226
|
+
}
|
|
227
|
+
|
|
228
|
+
async *_syncViaCookie(opts = {}) {
|
|
229
|
+
if (!(await this._cookieAuth.validate())) return;
|
|
230
|
+
const cookies = this._cookieAuth.toHeader();
|
|
231
|
+
const include = opts.include || {};
|
|
232
|
+
if (include[KIND_DOCUMENT] === false) return;
|
|
233
|
+
const sinceMs =
|
|
234
|
+
opts.sinceWatermark != null
|
|
235
|
+
? parseInt(String(opts.sinceWatermark), 10) || 0
|
|
236
|
+
: 0;
|
|
237
|
+
const limit = Number.isInteger(opts.limit) && opts.limit > 0 ? opts.limit : Infinity;
|
|
238
|
+
const maxPages =
|
|
239
|
+
Number.isInteger(opts.maxPages) && opts.maxPages > 0 ? opts.maxPages : 20;
|
|
240
|
+
|
|
241
|
+
let emitted = 0;
|
|
242
|
+
let offset = 0;
|
|
243
|
+
let page = 0;
|
|
244
|
+
while (page < maxPages) {
|
|
245
|
+
const query = { offset, limit: PAGE_SIZE };
|
|
246
|
+
let sign = null;
|
|
247
|
+
if (this._signProvider) {
|
|
248
|
+
sign = await this._signProvider({ url: this._listUrl, query, cookies });
|
|
249
|
+
}
|
|
250
|
+
const resp = await this._fetchFn({ url: this._listUrl, cookies, query, sign });
|
|
251
|
+
const docs = extractDocs(resp) || [];
|
|
252
|
+
if (!docs.length) break;
|
|
253
|
+
let reachedWatermark = false;
|
|
254
|
+
for (const d of docs) {
|
|
255
|
+
const rec = mapDoc(d);
|
|
256
|
+
if (!rec || !rec.docId) continue;
|
|
257
|
+
const ts = rec.updatedMs || rec.createdMs || null;
|
|
258
|
+
if (sinceMs && ts && ts < sinceMs) {
|
|
259
|
+
reachedWatermark = true;
|
|
260
|
+
break;
|
|
261
|
+
}
|
|
262
|
+
if (emitted >= limit) return;
|
|
263
|
+
yield {
|
|
264
|
+
adapter: NAME,
|
|
265
|
+
kind: KIND_DOCUMENT,
|
|
266
|
+
originalId: stableOriginalId(rec.docId),
|
|
267
|
+
capturedAt: ts || Date.now(),
|
|
268
|
+
payload: { record: rec },
|
|
269
|
+
};
|
|
270
|
+
emitted += 1;
|
|
271
|
+
}
|
|
272
|
+
if (reachedWatermark || docs.length < PAGE_SIZE) break;
|
|
273
|
+
offset += docs.length;
|
|
274
|
+
page += 1;
|
|
275
|
+
}
|
|
276
|
+
}
|
|
277
|
+
|
|
278
|
+
normalize(raw) {
|
|
279
|
+
if (!raw || !raw.payload || !raw.payload.record) {
|
|
280
|
+
throw new Error(`${NAME}.normalize: payload.record missing`);
|
|
281
|
+
}
|
|
282
|
+
return normalizeDocumentRecord(raw.payload.record, raw, platform, NAME, VERSION);
|
|
283
|
+
}
|
|
284
|
+
}
|
|
285
|
+
|
|
286
|
+
return DocumentAdapter;
|
|
287
|
+
}
|
|
288
|
+
|
|
289
|
+
/** Snapshot event fields → DocumentRecord (the shape mapDoc also produces). */
|
|
290
|
+
function snapshotEventToRecord(ev) {
|
|
291
|
+
return {
|
|
292
|
+
docId: String(ev.docId || ev.id || "unknown"),
|
|
293
|
+
title: ev.title || "(无标题)",
|
|
294
|
+
docType: ev.docType || ev.type || "doc",
|
|
295
|
+
url: ev.url || null,
|
|
296
|
+
createdMs: parseTime(ev.createdTime),
|
|
297
|
+
updatedMs: parseTime(ev.updatedTime),
|
|
298
|
+
extra: ev.extra && typeof ev.extra === "object" ? ev.extra : {},
|
|
299
|
+
};
|
|
300
|
+
}
|
|
301
|
+
|
|
302
|
+
function normalizeDocumentRecord(rec, raw, platform, NAME, VERSION) {
|
|
303
|
+
const ingestedAt = Date.now();
|
|
304
|
+
const occurredAt = rec.updatedMs || rec.createdMs || raw.capturedAt || ingestedAt;
|
|
305
|
+
const source = {
|
|
306
|
+
adapter: NAME,
|
|
307
|
+
adapterVersion: VERSION,
|
|
308
|
+
originalId: raw.originalId,
|
|
309
|
+
capturedAt: raw.capturedAt || occurredAt,
|
|
310
|
+
capturedBy: CAPTURED_BY.API,
|
|
311
|
+
};
|
|
312
|
+
const title = rec.title || "(无标题)";
|
|
313
|
+
const itemId = `item-${platform}-doc-${rec.docId}`;
|
|
314
|
+
return {
|
|
315
|
+
events: [
|
|
316
|
+
{
|
|
317
|
+
id: newId(),
|
|
318
|
+
type: ENTITY_TYPES.EVENT,
|
|
319
|
+
subtype: EVENT_SUBTYPES.POST,
|
|
320
|
+
occurredAt,
|
|
321
|
+
actor: "person-self",
|
|
322
|
+
content: { title: `文档: ${title}`, text: title },
|
|
323
|
+
ingestedAt,
|
|
324
|
+
source,
|
|
325
|
+
extra: {
|
|
326
|
+
platform,
|
|
327
|
+
docId: rec.docId,
|
|
328
|
+
docType: rec.docType || "doc",
|
|
329
|
+
url: rec.url || null,
|
|
330
|
+
createdMs: rec.createdMs || null,
|
|
331
|
+
updatedMs: rec.updatedMs || null,
|
|
332
|
+
itemRef: itemId,
|
|
333
|
+
},
|
|
334
|
+
},
|
|
335
|
+
],
|
|
336
|
+
items: [
|
|
337
|
+
{
|
|
338
|
+
id: itemId,
|
|
339
|
+
type: ENTITY_TYPES.ITEM,
|
|
340
|
+
subtype: ITEM_SUBTYPES.DOCUMENT,
|
|
341
|
+
name: title,
|
|
342
|
+
ingestedAt,
|
|
343
|
+
source,
|
|
344
|
+
extra: {
|
|
345
|
+
platform,
|
|
346
|
+
docId: rec.docId,
|
|
347
|
+
docType: rec.docType || "doc",
|
|
348
|
+
url: rec.url || null,
|
|
349
|
+
...(rec.extra || {}),
|
|
350
|
+
},
|
|
351
|
+
},
|
|
352
|
+
],
|
|
353
|
+
persons: [],
|
|
354
|
+
places: [],
|
|
355
|
+
topics: [],
|
|
356
|
+
};
|
|
357
|
+
}
|
|
358
|
+
|
|
359
|
+
async function defaultFetch(_opts) {
|
|
360
|
+
throw new Error("document-base: no fetchFn configured for cookie-api mode");
|
|
361
|
+
}
|
|
362
|
+
|
|
363
|
+
module.exports = {
|
|
364
|
+
createDocumentAdapter,
|
|
365
|
+
normalizeDocumentRecord,
|
|
366
|
+
parseTime,
|
|
367
|
+
SNAPSHOT_SCHEMA_VERSION,
|
|
368
|
+
KIND_DOCUMENT,
|
|
369
|
+
VALID_SNAPSHOT_KINDS,
|
|
370
|
+
};
|