@chainlesschain/personal-data-hub 0.4.28 → 0.4.30

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (204) hide show
  1. package/README.md +13 -5
  2. package/lib/adapters/social-douyin-adb/usage-profile-reader.js +253 -0
  3. package/lib/adapters/social-douyin-adb/watch-history-reader.js +104 -31
  4. package/lib/adapters/social-toutiao-adb/article-reader.js +202 -0
  5. package/lib/analysis-skills/overview.js +24 -4
  6. package/lib/analysis-skills/spending.js +63 -2
  7. package/lib/analysis-skills/timeline.js +11 -6
  8. package/lib/prompt-builder.js +15 -1
  9. package/lib/query-parser.js +38 -8
  10. package/package.json +4 -1
  11. package/__tests__/adapter-guide.test.js +0 -47
  12. package/__tests__/adapter-spec.test.js +0 -78
  13. package/__tests__/adapters/ai-chat-cookie-capture-spec.test.js +0 -211
  14. package/__tests__/adapters/ai-chat-health-checker.test.js +0 -262
  15. package/__tests__/adapters/ai-chat-history.test.js +0 -396
  16. package/__tests__/adapters/ai-chat-http-client.test.js +0 -242
  17. package/__tests__/adapters/ai-chat-vendors.test.js +0 -874
  18. package/__tests__/adapters/alipay-bill-adapter.test.js +0 -538
  19. package/__tests__/adapters/apple-health.test.js +0 -95
  20. package/__tests__/adapters/bank-family.test.js +0 -125
  21. package/__tests__/adapters/biz-tianyancha.test.js +0 -159
  22. package/__tests__/adapters/browser-history-chrome.test.js +0 -377
  23. package/__tests__/adapters/browser-history-edge.test.js +0 -159
  24. package/__tests__/adapters/car-mercedesme.test.js +0 -74
  25. package/__tests__/adapters/doc-baidu-netdisk.test.js +0 -102
  26. package/__tests__/adapters/doc-camscanner.test.js +0 -147
  27. package/__tests__/adapters/doc-platforms.test.js +0 -177
  28. package/__tests__/adapters/edu-huawei-learning-live.test.js +0 -198
  29. package/__tests__/adapters/edu-zuoyebang-live.test.js +0 -226
  30. package/__tests__/adapters/email-adapter-snapshot.test.js +0 -237
  31. package/__tests__/adapters/email-adapter.test.js +0 -742
  32. package/__tests__/adapters/email-classifier.test.js +0 -347
  33. package/__tests__/adapters/email-imap-session.test.js +0 -334
  34. package/__tests__/adapters/email-parser.test.js +0 -244
  35. package/__tests__/adapters/email-pdf-extractor.test.js +0 -529
  36. package/__tests__/adapters/email-providers.test.js +0 -84
  37. package/__tests__/adapters/email-retry-progress.test.js +0 -294
  38. package/__tests__/adapters/email-templates.test.js +0 -822
  39. package/__tests__/adapters/family-23-collectors-scaffold.test.js +0 -182
  40. package/__tests__/adapters/finance-alipay-live.test.js +0 -258
  41. package/__tests__/adapters/finance-dcep.test.js +0 -74
  42. package/__tests__/adapters/fitness-joyrun.test.js +0 -82
  43. package/__tests__/adapters/game-genshin-live.test.js +0 -238
  44. package/__tests__/adapters/game-genshin-scaffold.test.js +0 -108
  45. package/__tests__/adapters/game-honor-of-kings-live.test.js +0 -230
  46. package/__tests__/adapters/git-activity.test.js +0 -222
  47. package/__tests__/adapters/gov-12123.test.js +0 -103
  48. package/__tests__/adapters/gov-ixiamen.test.js +0 -150
  49. package/__tests__/adapters/gov-tax.test.js +0 -135
  50. package/__tests__/adapters/health-meiyou.test.js +0 -125
  51. package/__tests__/adapters/local-files.test.js +0 -264
  52. package/__tests__/adapters/local-im-pc.test.js +0 -154
  53. package/__tests__/adapters/messaging-whatsapp.test.js +0 -289
  54. package/__tests__/adapters/music-kugou.test.js +0 -187
  55. package/__tests__/adapters/music-qq.test.js +0 -112
  56. package/__tests__/adapters/netease-music-live.test.js +0 -244
  57. package/__tests__/adapters/netease-music.test.js +0 -74
  58. package/__tests__/adapters/pc-local-discovery.test.js +0 -141
  59. package/__tests__/adapters/qq-pc-direct-read.test.js +0 -227
  60. package/__tests__/adapters/reading-family.test.js +0 -108
  61. package/__tests__/adapters/recruit-boss.test.js +0 -180
  62. package/__tests__/adapters/shell-history.test.js +0 -180
  63. package/__tests__/adapters/shopping-base.test.js +0 -179
  64. package/__tests__/adapters/shopping-dianping.test.js +0 -239
  65. package/__tests__/adapters/social-bilibili-adb-api-client.test.js +0 -721
  66. package/__tests__/adapters/social-bilibili-adb-chromium-cookies-reader.test.js +0 -346
  67. package/__tests__/adapters/social-bilibili-adb-collector.test.js +0 -284
  68. package/__tests__/adapters/social-bilibili-adb-cookies-extension.test.js +0 -343
  69. package/__tests__/adapters/social-bilibili-adb-snapshot-builder.test.js +0 -296
  70. package/__tests__/adapters/social-csdn.test.js +0 -175
  71. package/__tests__/adapters/social-dongchedi.test.js +0 -165
  72. package/__tests__/adapters/social-douyin-adb-aweme-detail.test.js +0 -165
  73. package/__tests__/adapters/social-douyin-adb-collector.test.js +0 -254
  74. package/__tests__/adapters/social-douyin-adb-db-extension.test.js +0 -114
  75. package/__tests__/adapters/social-douyin-adb-im-db-parser.test.js +0 -304
  76. package/__tests__/adapters/social-douyin-adb-snapshot-builder.test.js +0 -216
  77. package/__tests__/adapters/social-douyin-adb-watch-history.test.js +0 -192
  78. package/__tests__/adapters/social-kuaishou-adb-api-client.test.js +0 -496
  79. package/__tests__/adapters/social-kuaishou-adb-collector.test.js +0 -276
  80. package/__tests__/adapters/social-kuaishou-adb-cookies-extension.test.js +0 -152
  81. package/__tests__/adapters/social-kuaishou-adb-snapshot-builder.test.js +0 -178
  82. package/__tests__/adapters/social-toutiao-adb-account-reader.test.js +0 -135
  83. package/__tests__/adapters/social-toutiao-adb-api-client.test.js +0 -626
  84. package/__tests__/adapters/social-toutiao-adb-collector.test.js +0 -378
  85. package/__tests__/adapters/social-toutiao-adb-cookies-extension.test.js +0 -193
  86. package/__tests__/adapters/social-toutiao-adb-snapshot-builder.test.js +0 -196
  87. package/__tests__/adapters/social-toutiao-kuaishou-scaffold.test.js +0 -311
  88. package/__tests__/adapters/social-weibo-adb-api-client.test.js +0 -362
  89. package/__tests__/adapters/social-weibo-adb-collector.test.js +0 -201
  90. package/__tests__/adapters/social-weibo-adb-cookies-extension.test.js +0 -167
  91. package/__tests__/adapters/social-weibo-adb-snapshot-builder.test.js +0 -189
  92. package/__tests__/adapters/social-xiaohongshu-adb-api-client.test.js +0 -431
  93. package/__tests__/adapters/social-xiaohongshu-adb-collector.test.js +0 -207
  94. package/__tests__/adapters/social-xiaohongshu-adb-cookies-extension.test.js +0 -0
  95. package/__tests__/adapters/social-xiaohongshu-adb-sign-provider-injection.test.js +0 -351
  96. package/__tests__/adapters/social-xiaohongshu-adb-sign.test.js +0 -130
  97. package/__tests__/adapters/social-xiaohongshu-adb-snapshot-builder.test.js +0 -200
  98. package/__tests__/adapters/social-zhihu.test.js +0 -246
  99. package/__tests__/adapters/system-data-adapter.test.js +0 -443
  100. package/__tests__/adapters/system-data-android-ingest.test.js +0 -144
  101. package/__tests__/adapters/system-data-android.test.js +0 -519
  102. package/__tests__/adapters/system-data-disclosure.test.js +0 -153
  103. package/__tests__/adapters/travel-12306.test.js +0 -512
  104. package/__tests__/adapters/travel-amap.test.js +0 -219
  105. package/__tests__/adapters/travel-baidu-map.test.js +0 -305
  106. package/__tests__/adapters/travel-base.test.js +0 -205
  107. package/__tests__/adapters/travel-ctrip.test.js +0 -377
  108. package/__tests__/adapters/travel-didi-consumer.test.js +0 -66
  109. package/__tests__/adapters/travel-didi.test.js +0 -204
  110. package/__tests__/adapters/travel-tencent-map.test.js +0 -207
  111. package/__tests__/adapters/travel-tongcheng.test.js +0 -289
  112. package/__tests__/adapters/video-platforms.test.js +0 -152
  113. package/__tests__/adapters/video-xigua.test.js +0 -106
  114. package/__tests__/adapters/vscode.test.js +0 -299
  115. package/__tests__/adapters/wechat-bootstrap.test.js +0 -240
  116. package/__tests__/adapters/wechat-env-probe.test.js +0 -162
  117. package/__tests__/adapters/wechat-frida-agent.test.js +0 -322
  118. package/__tests__/adapters/wechat-frida-integration.test.js +0 -149
  119. package/__tests__/adapters/wechat-frida-key-provider.test.js +0 -188
  120. package/__tests__/adapters/wechat-md5-key-provider.test.js +0 -101
  121. package/__tests__/adapters/wechat-pc-direct-read.test.js +0 -365
  122. package/__tests__/adapters/wechat-pc-group-topic.test.js +0 -63
  123. package/__tests__/adapters/wechat-pc-v4-sidecar.test.js +0 -72
  124. package/__tests__/adapters/weread.test.js +0 -123
  125. package/__tests__/adapters/wework-pc.test.js +0 -124
  126. package/__tests__/adapters/win-recent.test.js +0 -192
  127. package/__tests__/analysis-skills.test.js +0 -679
  128. package/__tests__/analysis.test.js +0 -1845
  129. package/__tests__/audio-ximalaya-snapshot.test.js +0 -279
  130. package/__tests__/batch.test.js +0 -133
  131. package/__tests__/bridges-cc-kg.test.js +0 -231
  132. package/__tests__/bridges-cc-llm.test.js +0 -191
  133. package/__tests__/bridges-cc-rag.test.js +0 -162
  134. package/__tests__/categories.test.js +0 -92
  135. package/__tests__/e2e/ai-chat-cross-source-journey.test.js +0 -213
  136. package/__tests__/e2e/full-user-journey.test.js +0 -188
  137. package/__tests__/e2e/local-data-adapters-cli.e2e.test.js +0 -146
  138. package/__tests__/entity-resolver-ingest-hook.test.js +0 -177
  139. package/__tests__/entity-resolver-stages.test.js +0 -411
  140. package/__tests__/entity-resolver-vault.test.js +0 -249
  141. package/__tests__/entity-resolver.test.js +0 -526
  142. package/__tests__/fitness-keep-snapshot.test.js +0 -224
  143. package/__tests__/fixtures/entity-resolver-200-mock.json +0 -96
  144. package/__tests__/ids.test.js +0 -45
  145. package/__tests__/integration/ai-chat-history-registry.test.js +0 -228
  146. package/__tests__/integration/aichat-wizard-end-to-end.test.js +0 -282
  147. package/__tests__/integration/cross-adapter-pipelines.test.js +0 -396
  148. package/__tests__/integration/local-data-adapters-pipeline.test.js +0 -373
  149. package/__tests__/integration/social-bilibili-pipeline.test.js +0 -261
  150. package/__tests__/integration/wechat-bootstrap-end-to-end.test.js +0 -390
  151. package/__tests__/key-providers.test.js +0 -126
  152. package/__tests__/kg-derive.test.js +0 -219
  153. package/__tests__/llm-client.test.js +0 -122
  154. package/__tests__/longtail-adapters.test.js +0 -281
  155. package/__tests__/messaging-qq-snapshot.test.js +0 -294
  156. package/__tests__/mobile-extractor-encrypted.test.js +0 -460
  157. package/__tests__/mobile-extractor.test.js +0 -288
  158. package/__tests__/mock-adapter.test.js +0 -93
  159. package/__tests__/prompt-builder.test.js +0 -249
  160. package/__tests__/query-parser.test.js +0 -302
  161. package/__tests__/rag-derive.test.js +0 -169
  162. package/__tests__/registry-readiness.test.js +0 -292
  163. package/__tests__/registry.test.js +0 -420
  164. package/__tests__/salvage-ingest.test.js +0 -97
  165. package/__tests__/schemas.test.js +0 -331
  166. package/__tests__/shopping-adapters.test.js +0 -392
  167. package/__tests__/shopping-eleme-snapshot.test.js +0 -454
  168. package/__tests__/shopping-pinduoduo-snapshot.test.js +0 -484
  169. package/__tests__/shopping-snapshot.test.js +0 -438
  170. package/__tests__/shopping-vipshop-snapshot.test.js +0 -425
  171. package/__tests__/shopping-xianyu-snapshot.test.js +0 -451
  172. package/__tests__/sidecar-contacts-cross-validate.test.js +0 -186
  173. package/__tests__/sidecar-supervisor.test.js +0 -128
  174. package/__tests__/sign-providers.test.js +0 -62
  175. package/__tests__/social-adapters.test.js +0 -280
  176. package/__tests__/social-bilibili-snapshot.test.js +0 -278
  177. package/__tests__/social-douban-snapshot.test.js +0 -351
  178. package/__tests__/social-douyin-im-direct-read.test.js +0 -377
  179. package/__tests__/social-douyin-salvage-collector.test.js +0 -98
  180. package/__tests__/social-douyin-salvage-mapper.test.js +0 -90
  181. package/__tests__/social-douyin-snapshot.test.js +0 -256
  182. package/__tests__/social-kuaishou-snapshot.test.js +0 -362
  183. package/__tests__/social-toutiao-snapshot.test.js +0 -366
  184. package/__tests__/social-weibo-snapshot.test.js +0 -234
  185. package/__tests__/social-weibo-sqlite-device.test.js +0 -174
  186. package/__tests__/social-xiaohongshu-snapshot.test.js +0 -232
  187. package/__tests__/sqlite-leaf-salvage.test.js +0 -97
  188. package/__tests__/travel-adapters.test.js +0 -483
  189. package/__tests__/travel-maps-snapshot.test.js +0 -426
  190. package/__tests__/vault-driver-error.test.js +0 -74
  191. package/__tests__/vault-search-helpers.test.js +0 -104
  192. package/__tests__/vault-search.test.js +0 -423
  193. package/__tests__/vault.test.js +0 -767
  194. package/__tests__/wechat-adapter.test.js +0 -594
  195. package/__tests__/whatsapp-adapter.test.js +0 -138
  196. package/scripts/_make-fixture-all.js +0 -126
  197. package/scripts/_make-fixture-contacts.js +0 -84
  198. package/scripts/evaluate-entity-resolver.js +0 -213
  199. package/scripts/run-native-tests-sandbox.sh +0 -55
  200. package/scripts/smoke-phase-5-5.js +0 -196
  201. package/scripts/smoke-phase-5-7.js +0 -181
  202. package/scripts/smoke-system-data-contacts.js +0 -309
  203. package/scripts/smoke-system-data.js +0 -312
  204. package/vitest.config.js +0 -88
@@ -1,377 +0,0 @@
1
- "use strict";
2
-
3
- import { describe, it, expect, beforeEach, afterEach } from "vitest";
4
-
5
- const fs = require("node:fs");
6
- const path = require("node:path");
7
- const os = require("node:os");
8
-
9
- const { DouyinAdapter } = require("../lib/adapters/social-douyin");
10
- const { partitionBatch } = require("../lib/batch");
11
-
12
- /**
13
- * 本地直读样板 (Douyin <uid>_im.db local direct-read) + the normalize
14
- * message/contact gap fix.
15
- *
16
- * Two things this covers that nothing else did:
17
- *
18
- * 1. REGRESSION: DouyinAdapter.normalize() used to throw "unknown kind
19
- * message/contact" for IM events — so every 私信 + 联系人 silently
20
- * dropped (registry catches the throw → invalidCount++ → 0 rows in the
21
- * vault) even though the snapshot/ADB path "succeeded". The old snapshot
22
- * test only round-tripped `profile`, so it never caught this.
23
- *
24
- * 2. NEW direct-read mode: `sync({ imDbPath })` / `--input <uid>_im.db`
25
- * opens the plaintext SQLite directly (no ADB, no snapshot JSON) and
26
- * emits message/contact raws whose originalIds match the snapshot path
27
- * (idempotent across both routes).
28
- *
29
- * No native SQLite needed — a fake Database driver is injected via
30
- * `_deps.dbDriverFactory` (the parser accepts it as `_databaseClass`).
31
- */
32
-
33
- // Fake better-sqlite3-style driver answering the parser's PRAGMA + SELECTs.
34
- function makeFakeDb({ msgRows, userRows, msgCols, userCols, partCols, partRows, convCols, convRows }) {
35
- class FakeStmt {
36
- constructor(sql) {
37
- this.sql = sql;
38
- }
39
- all() {
40
- const s = this.sql;
41
- if (/PRAGMA table_info\(msg\)/.test(s)) return msgCols;
42
- if (/FROM msg/.test(s)) return msgRows;
43
- if (/PRAGMA table_info\(SIMPLE_USER\)/.test(s)) return userCols || [];
44
- if (/FROM SIMPLE_USER/.test(s)) return userRows || [];
45
- if (/PRAGMA table_info\(participant\)/.test(s)) return partCols || [];
46
- if (/FROM participant/.test(s)) return partRows || [];
47
- if (/PRAGMA table_info\(conversation_list\)/.test(s)) return convCols || [];
48
- if (/FROM conversation_list/.test(s)) return convRows || [];
49
- return [];
50
- }
51
- }
52
- return class FakeDb {
53
- // eslint-disable-next-line no-unused-vars
54
- constructor(_path, _opts) {}
55
- prepare(sql) {
56
- return new FakeStmt(sql);
57
- }
58
- close() {}
59
- };
60
- }
61
-
62
- const DEFAULT_FAKE = {
63
- msgCols: [
64
- { name: "sender" },
65
- { name: "created_time" },
66
- { name: "content" },
67
- { name: "conversation_id" },
68
- { name: "read_status" },
69
- ],
70
- msgRows: [
71
- {
72
- sender: 111,
73
- createdTime: 1700000000000,
74
- content: JSON.stringify({ text: "你好呀" }),
75
- conversationId: "conv-1",
76
- readStatus: 1,
77
- },
78
- {
79
- sender: 222,
80
- createdTime: 1700000001000,
81
- content: JSON.stringify({ text: "在吗" }),
82
- conversationId: "conv-1",
83
- readStatus: 0,
84
- },
85
- ],
86
- userCols: [
87
- { name: "UID" },
88
- { name: "short_id" },
89
- { name: "name" },
90
- { name: "avatar_url" },
91
- { name: "follow_status" },
92
- ],
93
- userRows: [
94
- {
95
- uid: 222,
96
- shortId: 888,
97
- name: "小明",
98
- avatarUrl: "http://x/a.jpg",
99
- followStatus: 2,
100
- },
101
- ],
102
- };
103
-
104
- function freshAdapter(fakeSpec = DEFAULT_FAKE, fsOverride) {
105
- const a = new DouyinAdapter();
106
- a._deps.fs = fsOverride || { existsSync: () => true };
107
- a._deps.dbDriverFactory = () => makeFakeDb(fakeSpec);
108
- return a;
109
- }
110
-
111
- async function collect(iter) {
112
- const out = [];
113
- for await (const r of iter) out.push(r);
114
- return out;
115
- }
116
-
117
- describe("DouyinAdapter — normalize message/contact (regression)", () => {
118
- it("normalizes a message raw into one MESSAGE event (no throw)", () => {
119
- const a = new DouyinAdapter();
120
- const raw = {
121
- adapter: "social-douyin",
122
- kind: "message",
123
- originalId: "douyin:message:msg-conv-1-1700000000000",
124
- capturedAt: 1700000000000,
125
- payload: {
126
- kind: "message",
127
- text: "你好",
128
- senderUid: "111",
129
- conversationId: "conv-1",
130
- readStatus: 1,
131
- contentBlob: '{"text":"你好"}',
132
- },
133
- };
134
- const n = a.normalize(raw);
135
- expect(n.events).toHaveLength(1);
136
- expect(n.persons).toHaveLength(0);
137
- const ev = n.events[0];
138
- expect(ev.subtype).toBe("message");
139
- expect(ev.content.text).toBe("你好");
140
- expect(ev.extra.senderUid).toBe("111");
141
- expect(ev.extra.conversationId).toBe("conv-1");
142
- expect(ev.extra.platform).toBe("douyin");
143
- });
144
-
145
- it("normalizes a contact raw into one CONTACT person", () => {
146
- const a = new DouyinAdapter();
147
- const raw = {
148
- adapter: "social-douyin",
149
- kind: "contact",
150
- originalId: "douyin:contact:contact-222",
151
- capturedAt: 1700000000000,
152
- payload: {
153
- kind: "contact",
154
- uid: "222",
155
- shortId: "888",
156
- name: "小明",
157
- avatarUrl: "http://x/a.jpg",
158
- followStatus: 2,
159
- },
160
- };
161
- const n = a.normalize(raw);
162
- expect(n.persons).toHaveLength(1);
163
- expect(n.events).toHaveLength(0);
164
- const per = n.persons[0];
165
- expect(per.subtype).toBe("contact");
166
- expect(per.id).toBe("person-douyin-222");
167
- expect(per.names).toEqual(["小明"]);
168
- expect(per.identifiers["douyin-uid"]).toEqual(["222"]);
169
- expect(per.extra.followStatus).toBe(2);
170
- });
171
-
172
- it("an empty-text (non-text) message still produces a valid event", () => {
173
- const a = new DouyinAdapter();
174
- const raw = {
175
- adapter: "social-douyin",
176
- kind: "message",
177
- originalId: "douyin:message:x",
178
- capturedAt: 1700000000000,
179
- payload: { kind: "message", text: null, senderUid: "111" },
180
- };
181
- const n = a.normalize(raw);
182
- const { valid, invalidReasons } = partitionBatch({
183
- events: n.events,
184
- persons: [],
185
- places: [],
186
- items: [],
187
- topics: [],
188
- });
189
- expect(invalidReasons).toHaveLength(0);
190
- expect(valid.events).toHaveLength(1);
191
- });
192
- });
193
-
194
- describe("DouyinAdapter — 本地直读 <uid>_im.db", () => {
195
- let tmpDir;
196
- beforeEach(() => {
197
- tmpDir = fs.mkdtempSync(path.join(os.tmpdir(), "douyin-imdb-"));
198
- });
199
- afterEach(() => {
200
- if (tmpDir && fs.existsSync(tmpDir)) {
201
- fs.rmSync(tmpDir, { recursive: true, force: true });
202
- }
203
- });
204
-
205
- it("sync({ imDbPath }) yields message + contact raws", async () => {
206
- const a = freshAdapter();
207
- const raws = await collect(a.sync({ imDbPath: "/fake/123_im.db" }));
208
- expect(raws.map((r) => r.kind)).toEqual(["message", "message", "contact"]);
209
- });
210
-
211
- it("direct-read events normalize to a fully valid batch (no silent drop)", async () => {
212
- const a = freshAdapter();
213
- const raws = await collect(a.sync({ imDbPath: "/fake/123_im.db" }));
214
- const merged = { events: [], persons: [], places: [], items: [], topics: [] };
215
- for (const r of raws) {
216
- const n = a.normalize(r);
217
- for (const k of Object.keys(merged)) merged[k].push(...n[k]);
218
- }
219
- const { valid, invalidReasons } = partitionBatch(merged);
220
- expect(invalidReasons).toHaveLength(0);
221
- expect(valid.events).toHaveLength(2); // two messages
222
- expect(valid.persons).toHaveLength(1); // one contact
223
- });
224
-
225
- it("originalIds match the snapshot composite strategy (idempotent across routes)", async () => {
226
- const a = freshAdapter();
227
- const raws = await collect(a.sync({ imDbPath: "/fake/123_im.db" }));
228
- expect(raws.map((r) => r.originalId)).toEqual([
229
- "douyin:message:msg-conv-1-1700000000000",
230
- "douyin:message:msg-conv-1-1700000001000",
231
- "douyin:contact:contact-222",
232
- ]);
233
- });
234
-
235
- it("respects include={message:false} / limit", async () => {
236
- const a = freshAdapter();
237
- const onlyContacts = await collect(
238
- a.sync({ imDbPath: "/fake/123_im.db", include: { message: false } }),
239
- );
240
- expect(onlyContacts.every((r) => r.kind === "contact")).toBe(true);
241
-
242
- const capped = await collect(a.sync({ imDbPath: "/fake/123_im.db", limit: 1 }));
243
- expect(capped).toHaveLength(1);
244
- });
245
-
246
- it("emits an im-db-parsed progress event with the diagnostic", async () => {
247
- const a = freshAdapter();
248
- const events = [];
249
- await collect(
250
- a.sync({
251
- imDbPath: "/fake/123_im.db",
252
- onProgress: (e) => events.push(e),
253
- }),
254
- );
255
- const parsed = events.find((e) => e.phase === "im-db-parsed");
256
- expect(parsed).toBeTruthy();
257
- expect(parsed.hadMsgTable).toBe(true);
258
- expect(parsed.hadSimpleUserTable).toBe(true);
259
- expect(parsed.messageCount).toBe(2);
260
- expect(parsed.contactCount).toBe(1);
261
- });
262
-
263
- it("missing db file yields nothing (no throw)", async () => {
264
- const a = freshAdapter(DEFAULT_FAKE, { existsSync: () => false });
265
- const raws = await collect(a.sync({ imDbPath: "/does/not/exist_im.db" }));
266
- expect(raws).toHaveLength(0);
267
- });
268
-
269
- // device-verified 2026-06-16: real Douyin IM schema uses `participant`
270
- // (conversation_id, user_id), not SIMPLE_USER → contacts must come from it.
271
- it("extracts contacts from `participant` when SIMPLE_USER absent (real schema)", async () => {
272
- const spec = {
273
- msgCols: DEFAULT_FAKE.msgCols,
274
- msgRows: DEFAULT_FAKE.msgRows,
275
- userCols: [], // no SIMPLE_USER table on a real device
276
- userRows: [],
277
- partCols: [{ name: "conversation_id" }, { name: "user_id" }, { name: "sort_order" }],
278
- partRows: [{ uid: 111 }, { uid: 222 }, { uid: 222 }], // dup 222 → deduped
279
- };
280
- const a = freshAdapter(spec);
281
- const raws = await collect(a.sync({ imDbPath: "/fake/123_im.db" }));
282
- const contacts = raws.filter((r) => r.kind === "contact");
283
- expect(contacts.map((r) => r.payload.uid).sort()).toEqual(["111", "222"]);
284
- // each participant uid → a CONTACT person keyed by douyin-uid
285
- const n = a.normalize(contacts[0]);
286
- expect(n.persons[0].identifiers["douyin-uid"]).toEqual([contacts[0].payload.uid]);
287
- });
288
-
289
- // device-verified: conversation_list row → PDH TOPIC (one chat thread).
290
- it("maps conversation_list rows to TOPIC entities", async () => {
291
- const spec = {
292
- msgCols: DEFAULT_FAKE.msgCols,
293
- msgRows: DEFAULT_FAKE.msgRows,
294
- userCols: [], userRows: [],
295
- convCols: [
296
- { name: "conversation_id" }, { name: "type" },
297
- { name: "last_msg_create_time" }, { name: "stranger" },
298
- ],
299
- convRows: [
300
- { convId: "conv-1", convType: 0, lastMsgTime: 1700000002000, stranger: 0 },
301
- { convId: "conv-2", convType: 1, lastMsgTime: 1700000003000, stranger: 1 },
302
- ],
303
- };
304
- const a = freshAdapter(spec);
305
- const raws = await collect(a.sync({ imDbPath: "/fake/123_im.db" }));
306
- const convs = raws.filter((r) => r.kind === "conversation");
307
- expect(convs.map((r) => r.payload.conversationId)).toEqual(["conv-1", "conv-2"]);
308
- const n = a.normalize(convs[1]);
309
- expect(n.topics).toHaveLength(1);
310
- expect(n.topics[0].type).toBe("topic");
311
- expect(n.topics[0].extra.conversationId).toBe("conv-2");
312
- expect(n.topics[0].extra.stranger).toBe(true);
313
- expect(n.topics[0].extra.lastMsgTimeMs).toBe(1700000003000);
314
- });
315
-
316
- it("participant dedups against SIMPLE_USER contacts (no double-count)", async () => {
317
- const spec = {
318
- msgCols: DEFAULT_FAKE.msgCols,
319
- msgRows: DEFAULT_FAKE.msgRows,
320
- userCols: DEFAULT_FAKE.userCols,
321
- userRows: DEFAULT_FAKE.userRows, // uid 222 from SIMPLE_USER
322
- partCols: [{ name: "conversation_id" }, { name: "user_id" }],
323
- partRows: [{ uid: 222 }, { uid: 333 }], // 222 already seen, only 333 is new
324
- };
325
- const a = freshAdapter(spec);
326
- const raws = await collect(a.sync({ imDbPath: "/fake/123_im.db" }));
327
- const uids = raws.filter((r) => r.kind === "contact").map((r) => r.payload.uid).sort();
328
- expect(uids).toEqual(["222", "333"]); // 222 not duplicated
329
- });
330
- });
331
-
332
- describe("DouyinAdapter — sync() input routing (sniff)", () => {
333
- let tmpDir;
334
- beforeEach(() => {
335
- tmpDir = fs.mkdtempSync(path.join(os.tmpdir(), "douyin-route-"));
336
- });
337
- afterEach(() => {
338
- if (tmpDir && fs.existsSync(tmpDir)) {
339
- fs.rmSync(tmpDir, { recursive: true, force: true });
340
- }
341
- });
342
-
343
- it("--input <file with SQLite magic header> routes to direct IM read", async () => {
344
- // Real file with the 16-byte SQLite magic header so _looksLikeSqlite
345
- // (which uses real fs) returns true; the fake driver supplies the rows.
346
- const dbFile = path.join(tmpDir, "123_im.db");
347
- const header = Buffer.alloc(100);
348
- header.write("SQLite format 3", 0, "latin1");
349
- fs.writeFileSync(dbFile, header);
350
-
351
- const a = new DouyinAdapter();
352
- a._deps.dbDriverFactory = () => makeFakeDb(DEFAULT_FAKE);
353
- const raws = [];
354
- for await (const r of a.sync({ inputPath: dbFile })) raws.push(r);
355
- expect(raws.map((r) => r.kind)).toEqual(["message", "message", "contact"]);
356
- });
357
-
358
- it("--input <JSON snapshot> routes to snapshot mode (not IM)", async () => {
359
- const snapFile = path.join(tmpDir, "social-douyin.json");
360
- fs.writeFileSync(
361
- snapFile,
362
- JSON.stringify({
363
- schemaVersion: 1,
364
- snapshottedAt: 1700000000000,
365
- account: { secUid: "MS4abc", shortId: "9", displayName: "me" },
366
- events: [
367
- { kind: "profile", id: "profile-MS4abc", capturedAt: 1700000000000, secUid: "MS4abc", nickname: "me" },
368
- ],
369
- }),
370
- );
371
- const a = new DouyinAdapter();
372
- const raws = [];
373
- for await (const r of a.sync({ inputPath: snapFile })) raws.push(r);
374
- expect(raws).toHaveLength(1);
375
- expect(raws[0].kind).toBe("profile");
376
- });
377
- });
@@ -1,98 +0,0 @@
1
- "use strict";
2
-
3
- import { describe, it, expect, beforeAll, afterAll } from "vitest";
4
-
5
- const fs = require("node:fs");
6
- const path = require("node:path");
7
- const os = require("node:os");
8
-
9
- const {
10
- salvageDumpToSnapshot,
11
- salvageAndSync,
12
- } = require("../lib/adapters/social-douyin-adb/collector");
13
-
14
- // Build a real (UTF-8) SQLite DB via the SQLCipher-capable driver and treat its
15
- // raw bytes as a "memory dump" — proving the salvage → snapshot → ingest path
16
- // recovers message rows with no key (the Method-B capstone). The msg-table
17
- // column order matches the device-verified Douyin IM schema.
18
- describe("social-douyin-adb salvage collector", () => {
19
- let dir, dbPath;
20
- const COLUMNS = ["msg_uuid", "conversation_id", "sender", "content", "created_time"];
21
-
22
- beforeAll(() => {
23
- const Database = require("better-sqlite3-multiple-ciphers");
24
- dir = fs.mkdtempSync(path.join(os.tmpdir(), "salvage-col-"));
25
- dbPath = path.join(dir, "u.db");
26
- const db = new Database(dbPath);
27
- db.exec(
28
- "CREATE TABLE msg(msg_uuid TEXT, conversation_id TEXT, sender INTEGER, content TEXT, created_time INTEGER)",
29
- );
30
- const ins = db.prepare("INSERT INTO msg VALUES(?,?,?,?,?)");
31
- ins.run("uuid-1", "conv-1", 111, "你好呀 hello", 1700000000000);
32
- ins.run("uuid-2", "conv-1", 222, "在吗?晚上一起吃饭", 1700000001000);
33
- ins.run("uuid-3", "conv-2", 333, "ok 👍", 1700000002000);
34
- db.close();
35
- });
36
-
37
- afterAll(() => {
38
- try { fs.rmSync(dir, { recursive: true, force: true }); } catch (_e) { /* ignore */ }
39
- });
40
-
41
- it("salvageDumpToSnapshot recovers msg rows → snapshot JSON (explicit columns)", () => {
42
- const res = salvageDumpToSnapshot(dbPath, {
43
- uid: "1234567890123456789",
44
- columns: COLUMNS,
45
- now: () => 1700000099000,
46
- });
47
- expect(res.uid).toBe("1234567890123456789");
48
- expect(res.eventCounts.message).toBe(3);
49
- expect(res.salvage.recordsSalvaged).toBeGreaterThanOrEqual(3);
50
-
51
- const snap = JSON.parse(fs.readFileSync(res.snapshotPath, "utf-8"));
52
- const msgs = snap.events.filter((e) => e.kind === "message");
53
- expect(msgs.length).toBe(3);
54
- const texts = msgs.map((m) => m.text).sort();
55
- expect(texts).toContain("你好呀 hello");
56
- expect(texts).toContain("ok 👍"); // UTF-8 emoji survives
57
- const m1 = msgs.find((m) => m.text === "你好呀 hello");
58
- expect(m1.conversationId).toBe("conv-1");
59
- expect(m1.senderUid).toBe("111");
60
- fs.rmSync(res.snapshotPath, { force: true });
61
- });
62
-
63
- it("infers columns heuristically when none given (content + created_time)", () => {
64
- const res = salvageDumpToSnapshot(dbPath, { now: () => 1700000099000 });
65
- expect(res.eventCounts.message).toBe(3);
66
- const snap = JSON.parse(fs.readFileSync(res.snapshotPath, "utf-8"));
67
- const texts = snap.events.filter((e) => e.kind === "message").map((m) => m.text);
68
- expect(texts).toContain("在吗?晚上一起吃饭");
69
- fs.rmSync(res.snapshotPath, { force: true });
70
- });
71
-
72
- it("salvageAndSync feeds the snapshot to registry.syncAdapter then cleans up", async () => {
73
- let captured = null;
74
- const fakeRegistry = {
75
- syncAdapter: async (name, opts) => {
76
- captured = { name, opts };
77
- // verify the snapshot file exists at sync time
78
- const snap = JSON.parse(fs.readFileSync(opts.inputPath, "utf-8"));
79
- return { ingested: snap.events.length, adapter: name, kgTriples: 0, ragDocs: 0 };
80
- },
81
- };
82
- const report = await salvageAndSync(fakeRegistry, dbPath, {
83
- uid: "1234567890123456789",
84
- columns: COLUMNS,
85
- });
86
- expect(captured.name).toBe("social-douyin");
87
- expect(report.ingested).toBe(3);
88
- expect(report.douyin.mode).toBe("salvage");
89
- expect(report.douyin.eventCounts.message).toBe(3);
90
- expect(report.douyin.cleanupFailed).toBe(false);
91
- // snapshot file cleaned up in finally
92
- expect(fs.existsSync(captured.opts.inputPath)).toBe(false);
93
- });
94
-
95
- it("throws on missing dumpPath", () => {
96
- expect(() => salvageDumpToSnapshot("")).toThrow();
97
- });
98
- });
@@ -1,90 +0,0 @@
1
- "use strict";
2
-
3
- import { describe, it, expect } from "vitest";
4
-
5
- const {
6
- mapMsgRecords,
7
- mapParticipantRecords,
8
- mapConversationRecords,
9
- inferMsgColumns,
10
- mapSalvaged,
11
- } = require("../lib/adapters/social-douyin-adb/salvage-mapper");
12
- const { DouyinAdapter } = require("../lib/adapters/social-douyin");
13
-
14
- // End-to-end glue: leaf-salvaged {rowid,cols} → parseImDb shape → adapter.normalize
15
- // → PDH entities. Closes Method-B: dump → salvage → mapper → ingest.
16
- describe("salvage-mapper — salvaged records → PDH entities", () => {
17
- // msg column order (device-verified subset, see pdh-app-db-schemas.md)
18
- const MSG_COLS = ["msg_uuid", "conversation_id", "sender", "content", "created_time"];
19
- const msgRecords = [
20
- { rowid: "1", cols: ["u1", "conv-1", 111, JSON.stringify({ text: "你好呀 hello" }), 1700000000000] },
21
- { rowid: "2", cols: ["u2", "conv-1", 222, JSON.stringify({ text: "在吗" }), 1700000001000] },
22
- ];
23
-
24
- it("maps msg records → message objects (im-db-parser shape)", () => {
25
- const msgs = mapMsgRecords(msgRecords, MSG_COLS);
26
- expect(msgs).toHaveLength(2);
27
- expect(msgs[0].senderUid).toBe("111");
28
- expect(msgs[0].conversationId).toBe("conv-1");
29
- expect(msgs[0].createdTimeMs).toBe(1700000000000);
30
- expect(msgs[0].text).toBe("你好呀 hello"); // content JSON → text extracted
31
- });
32
-
33
- it("mapped messages normalize through DouyinAdapter → MESSAGE events", () => {
34
- const a = new DouyinAdapter();
35
- const msgs = mapMsgRecords(msgRecords, MSG_COLS);
36
- const raw = {
37
- adapter: "social-douyin",
38
- kind: "message",
39
- originalId: "douyin:message:x",
40
- capturedAt: msgs[0].createdTimeMs,
41
- payload: { kind: "message", ...msgs[0] },
42
- };
43
- const n = a.normalize(raw);
44
- expect(n.events).toHaveLength(1);
45
- expect(n.events[0].subtype).toBe("message");
46
- expect(n.events[0].content.text).toBe("你好呀 hello");
47
- expect(n.events[0].extra.senderUid).toBe("111");
48
- });
49
-
50
- it("maps participant records → deduped contacts (uid only)", () => {
51
- const recs = [
52
- { rowid: "1", cols: ["conv-1", "111", 0] },
53
- { rowid: "2", cols: ["conv-1", "222", 1] },
54
- { rowid: "3", cols: ["conv-2", "222", 0] },
55
- ];
56
- const contacts = mapParticipantRecords(recs, ["conversation_id", "user_id", "sort_order"]);
57
- expect(contacts.map((c) => c.uid).sort()).toEqual(["111", "222"]);
58
- expect(contacts.every((c) => c.fromParticipant)).toBe(true);
59
- });
60
-
61
- it("maps conversation records → conversations (→ TOPIC)", () => {
62
- const recs = [{ rowid: "1", cols: ["conv-9", 1, 1700000002000, 1] }];
63
- const convs = mapConversationRecords(recs, ["conversation_id", "type", "last_msg_create_time", "stranger"]);
64
- expect(convs[0].conversationId).toBe("conv-9");
65
- expect(convs[0].stranger).toBe(true);
66
- expect(convs[0].lastMsgTimeMs).toBe(1700000002000);
67
- });
68
-
69
- it("inferMsgColumns heuristically locates content + created_time", () => {
70
- const cols = inferMsgColumns(msgRecords);
71
- // content = the JSON string col (index 3), created_time = the epoch int (index 4)
72
- expect(cols[3]).toBe("content");
73
- expect(cols[4]).toBe("created_time");
74
- // round-trips through mapMsgRecords
75
- const msgs = mapMsgRecords(msgRecords, cols);
76
- expect(msgs[0].text).toBe("你好呀 hello");
77
- expect(msgs[0].createdTimeMs).toBe(1700000000000);
78
- });
79
-
80
- it("mapSalvaged one-shot returns parseImDb shape", () => {
81
- const out = mapSalvaged({
82
- msg: { records: msgRecords, columns: MSG_COLS },
83
- participant: { records: [{ rowid: "1", cols: ["conv-1", "999"] }], columns: ["conversation_id", "user_id"] },
84
- conversation: { records: [{ rowid: "1", cols: ["conv-1"] }], columns: ["conversation_id"] },
85
- });
86
- expect(out.messages).toHaveLength(2);
87
- expect(out.contacts).toHaveLength(1);
88
- expect(out.conversations).toHaveLength(1);
89
- });
90
- });