@chainlesschain/personal-data-hub 0.4.29 → 0.4.31

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (199) hide show
  1. package/lib/forensics/qq-nt-collect.js +190 -0
  2. package/lib/prompt-builder.js +15 -1
  3. package/package.json +8 -3
  4. package/__tests__/adapter-guide.test.js +0 -47
  5. package/__tests__/adapter-spec.test.js +0 -78
  6. package/__tests__/adapters/ai-chat-cookie-capture-spec.test.js +0 -211
  7. package/__tests__/adapters/ai-chat-health-checker.test.js +0 -262
  8. package/__tests__/adapters/ai-chat-history.test.js +0 -396
  9. package/__tests__/adapters/ai-chat-http-client.test.js +0 -242
  10. package/__tests__/adapters/ai-chat-vendors.test.js +0 -874
  11. package/__tests__/adapters/alipay-bill-adapter.test.js +0 -538
  12. package/__tests__/adapters/apple-health.test.js +0 -95
  13. package/__tests__/adapters/bank-family.test.js +0 -125
  14. package/__tests__/adapters/biz-tianyancha.test.js +0 -159
  15. package/__tests__/adapters/browser-history-chrome.test.js +0 -377
  16. package/__tests__/adapters/browser-history-edge.test.js +0 -159
  17. package/__tests__/adapters/car-mercedesme.test.js +0 -74
  18. package/__tests__/adapters/doc-baidu-netdisk.test.js +0 -102
  19. package/__tests__/adapters/doc-camscanner.test.js +0 -147
  20. package/__tests__/adapters/doc-platforms.test.js +0 -177
  21. package/__tests__/adapters/edu-huawei-learning-live.test.js +0 -198
  22. package/__tests__/adapters/edu-zuoyebang-live.test.js +0 -226
  23. package/__tests__/adapters/email-adapter-snapshot.test.js +0 -237
  24. package/__tests__/adapters/email-adapter.test.js +0 -742
  25. package/__tests__/adapters/email-classifier.test.js +0 -347
  26. package/__tests__/adapters/email-imap-session.test.js +0 -334
  27. package/__tests__/adapters/email-parser.test.js +0 -244
  28. package/__tests__/adapters/email-pdf-extractor.test.js +0 -529
  29. package/__tests__/adapters/email-providers.test.js +0 -84
  30. package/__tests__/adapters/email-retry-progress.test.js +0 -294
  31. package/__tests__/adapters/email-templates.test.js +0 -822
  32. package/__tests__/adapters/family-23-collectors-scaffold.test.js +0 -182
  33. package/__tests__/adapters/finance-alipay-live.test.js +0 -258
  34. package/__tests__/adapters/finance-dcep.test.js +0 -74
  35. package/__tests__/adapters/fitness-joyrun.test.js +0 -82
  36. package/__tests__/adapters/game-genshin-live.test.js +0 -238
  37. package/__tests__/adapters/game-genshin-scaffold.test.js +0 -108
  38. package/__tests__/adapters/game-honor-of-kings-live.test.js +0 -230
  39. package/__tests__/adapters/git-activity.test.js +0 -222
  40. package/__tests__/adapters/gov-12123.test.js +0 -103
  41. package/__tests__/adapters/gov-ixiamen.test.js +0 -150
  42. package/__tests__/adapters/gov-tax.test.js +0 -135
  43. package/__tests__/adapters/health-meiyou.test.js +0 -125
  44. package/__tests__/adapters/local-files.test.js +0 -264
  45. package/__tests__/adapters/local-im-pc.test.js +0 -154
  46. package/__tests__/adapters/messaging-whatsapp.test.js +0 -289
  47. package/__tests__/adapters/music-kugou.test.js +0 -187
  48. package/__tests__/adapters/music-qq.test.js +0 -112
  49. package/__tests__/adapters/netease-music-live.test.js +0 -244
  50. package/__tests__/adapters/netease-music.test.js +0 -74
  51. package/__tests__/adapters/pc-local-discovery.test.js +0 -141
  52. package/__tests__/adapters/qq-pc-direct-read.test.js +0 -227
  53. package/__tests__/adapters/reading-family.test.js +0 -108
  54. package/__tests__/adapters/recruit-boss.test.js +0 -180
  55. package/__tests__/adapters/shell-history.test.js +0 -180
  56. package/__tests__/adapters/shopping-base.test.js +0 -179
  57. package/__tests__/adapters/shopping-dianping.test.js +0 -239
  58. package/__tests__/adapters/social-bilibili-adb-api-client.test.js +0 -721
  59. package/__tests__/adapters/social-bilibili-adb-chromium-cookies-reader.test.js +0 -346
  60. package/__tests__/adapters/social-bilibili-adb-collector.test.js +0 -284
  61. package/__tests__/adapters/social-bilibili-adb-cookies-extension.test.js +0 -343
  62. package/__tests__/adapters/social-bilibili-adb-snapshot-builder.test.js +0 -296
  63. package/__tests__/adapters/social-csdn.test.js +0 -175
  64. package/__tests__/adapters/social-dongchedi.test.js +0 -165
  65. package/__tests__/adapters/social-douyin-adb-aweme-detail.test.js +0 -165
  66. package/__tests__/adapters/social-douyin-adb-collector.test.js +0 -254
  67. package/__tests__/adapters/social-douyin-adb-db-extension.test.js +0 -114
  68. package/__tests__/adapters/social-douyin-adb-im-db-parser.test.js +0 -304
  69. package/__tests__/adapters/social-douyin-adb-snapshot-builder.test.js +0 -216
  70. package/__tests__/adapters/social-douyin-adb-usage-profile.test.js +0 -229
  71. package/__tests__/adapters/social-douyin-adb-watch-history.test.js +0 -269
  72. package/__tests__/adapters/social-kuaishou-adb-api-client.test.js +0 -496
  73. package/__tests__/adapters/social-kuaishou-adb-collector.test.js +0 -276
  74. package/__tests__/adapters/social-kuaishou-adb-cookies-extension.test.js +0 -152
  75. package/__tests__/adapters/social-kuaishou-adb-snapshot-builder.test.js +0 -178
  76. package/__tests__/adapters/social-toutiao-adb-account-reader.test.js +0 -135
  77. package/__tests__/adapters/social-toutiao-adb-api-client.test.js +0 -626
  78. package/__tests__/adapters/social-toutiao-adb-article.test.js +0 -155
  79. package/__tests__/adapters/social-toutiao-adb-collector.test.js +0 -378
  80. package/__tests__/adapters/social-toutiao-adb-cookies-extension.test.js +0 -193
  81. package/__tests__/adapters/social-toutiao-adb-snapshot-builder.test.js +0 -196
  82. package/__tests__/adapters/social-toutiao-kuaishou-scaffold.test.js +0 -311
  83. package/__tests__/adapters/social-weibo-adb-api-client.test.js +0 -362
  84. package/__tests__/adapters/social-weibo-adb-collector.test.js +0 -201
  85. package/__tests__/adapters/social-weibo-adb-cookies-extension.test.js +0 -167
  86. package/__tests__/adapters/social-weibo-adb-snapshot-builder.test.js +0 -189
  87. package/__tests__/adapters/social-xiaohongshu-adb-api-client.test.js +0 -431
  88. package/__tests__/adapters/social-xiaohongshu-adb-collector.test.js +0 -207
  89. package/__tests__/adapters/social-xiaohongshu-adb-cookies-extension.test.js +0 -0
  90. package/__tests__/adapters/social-xiaohongshu-adb-sign-provider-injection.test.js +0 -351
  91. package/__tests__/adapters/social-xiaohongshu-adb-sign.test.js +0 -130
  92. package/__tests__/adapters/social-xiaohongshu-adb-snapshot-builder.test.js +0 -200
  93. package/__tests__/adapters/social-zhihu.test.js +0 -246
  94. package/__tests__/adapters/system-data-adapter.test.js +0 -443
  95. package/__tests__/adapters/system-data-android-ingest.test.js +0 -144
  96. package/__tests__/adapters/system-data-android.test.js +0 -519
  97. package/__tests__/adapters/system-data-disclosure.test.js +0 -153
  98. package/__tests__/adapters/travel-12306.test.js +0 -512
  99. package/__tests__/adapters/travel-amap.test.js +0 -219
  100. package/__tests__/adapters/travel-baidu-map.test.js +0 -305
  101. package/__tests__/adapters/travel-base.test.js +0 -205
  102. package/__tests__/adapters/travel-ctrip.test.js +0 -377
  103. package/__tests__/adapters/travel-didi-consumer.test.js +0 -66
  104. package/__tests__/adapters/travel-didi.test.js +0 -204
  105. package/__tests__/adapters/travel-tencent-map.test.js +0 -207
  106. package/__tests__/adapters/travel-tongcheng.test.js +0 -289
  107. package/__tests__/adapters/video-platforms.test.js +0 -152
  108. package/__tests__/adapters/video-xigua.test.js +0 -106
  109. package/__tests__/adapters/vscode.test.js +0 -299
  110. package/__tests__/adapters/wechat-bootstrap.test.js +0 -240
  111. package/__tests__/adapters/wechat-env-probe.test.js +0 -162
  112. package/__tests__/adapters/wechat-frida-agent.test.js +0 -322
  113. package/__tests__/adapters/wechat-frida-integration.test.js +0 -149
  114. package/__tests__/adapters/wechat-frida-key-provider.test.js +0 -188
  115. package/__tests__/adapters/wechat-md5-key-provider.test.js +0 -101
  116. package/__tests__/adapters/wechat-pc-direct-read.test.js +0 -365
  117. package/__tests__/adapters/wechat-pc-group-topic.test.js +0 -63
  118. package/__tests__/adapters/wechat-pc-v4-sidecar.test.js +0 -72
  119. package/__tests__/adapters/weread.test.js +0 -123
  120. package/__tests__/adapters/wework-pc.test.js +0 -124
  121. package/__tests__/adapters/win-recent.test.js +0 -192
  122. package/__tests__/analysis-skills.test.js +0 -754
  123. package/__tests__/analysis.test.js +0 -1845
  124. package/__tests__/audio-ximalaya-snapshot.test.js +0 -279
  125. package/__tests__/batch.test.js +0 -133
  126. package/__tests__/bridges-cc-kg.test.js +0 -231
  127. package/__tests__/bridges-cc-llm.test.js +0 -191
  128. package/__tests__/bridges-cc-rag.test.js +0 -162
  129. package/__tests__/categories.test.js +0 -92
  130. package/__tests__/e2e/ai-chat-cross-source-journey.test.js +0 -213
  131. package/__tests__/e2e/full-user-journey.test.js +0 -188
  132. package/__tests__/e2e/local-data-adapters-cli.e2e.test.js +0 -146
  133. package/__tests__/entity-resolver-ingest-hook.test.js +0 -177
  134. package/__tests__/entity-resolver-stages.test.js +0 -411
  135. package/__tests__/entity-resolver-vault.test.js +0 -249
  136. package/__tests__/entity-resolver.test.js +0 -526
  137. package/__tests__/fitness-keep-snapshot.test.js +0 -224
  138. package/__tests__/fixtures/entity-resolver-200-mock.json +0 -96
  139. package/__tests__/ids.test.js +0 -45
  140. package/__tests__/integration/ai-chat-history-registry.test.js +0 -228
  141. package/__tests__/integration/aichat-wizard-end-to-end.test.js +0 -282
  142. package/__tests__/integration/cross-adapter-pipelines.test.js +0 -396
  143. package/__tests__/integration/local-data-adapters-pipeline.test.js +0 -373
  144. package/__tests__/integration/social-bilibili-pipeline.test.js +0 -261
  145. package/__tests__/integration/wechat-bootstrap-end-to-end.test.js +0 -390
  146. package/__tests__/key-providers.test.js +0 -126
  147. package/__tests__/kg-derive.test.js +0 -219
  148. package/__tests__/llm-client.test.js +0 -122
  149. package/__tests__/longtail-adapters.test.js +0 -281
  150. package/__tests__/messaging-qq-snapshot.test.js +0 -294
  151. package/__tests__/mobile-extractor-encrypted.test.js +0 -460
  152. package/__tests__/mobile-extractor.test.js +0 -288
  153. package/__tests__/mock-adapter.test.js +0 -93
  154. package/__tests__/prompt-builder.test.js +0 -249
  155. package/__tests__/query-parser.test.js +0 -365
  156. package/__tests__/rag-derive.test.js +0 -169
  157. package/__tests__/registry-readiness.test.js +0 -292
  158. package/__tests__/registry.test.js +0 -420
  159. package/__tests__/salvage-ingest.test.js +0 -97
  160. package/__tests__/schemas.test.js +0 -331
  161. package/__tests__/shopping-adapters.test.js +0 -392
  162. package/__tests__/shopping-eleme-snapshot.test.js +0 -454
  163. package/__tests__/shopping-pinduoduo-snapshot.test.js +0 -484
  164. package/__tests__/shopping-snapshot.test.js +0 -438
  165. package/__tests__/shopping-vipshop-snapshot.test.js +0 -425
  166. package/__tests__/shopping-xianyu-snapshot.test.js +0 -451
  167. package/__tests__/sidecar-contacts-cross-validate.test.js +0 -186
  168. package/__tests__/sidecar-supervisor.test.js +0 -128
  169. package/__tests__/sign-providers.test.js +0 -62
  170. package/__tests__/social-adapters.test.js +0 -280
  171. package/__tests__/social-bilibili-snapshot.test.js +0 -278
  172. package/__tests__/social-douban-snapshot.test.js +0 -351
  173. package/__tests__/social-douyin-im-direct-read.test.js +0 -377
  174. package/__tests__/social-douyin-salvage-collector.test.js +0 -98
  175. package/__tests__/social-douyin-salvage-mapper.test.js +0 -90
  176. package/__tests__/social-douyin-snapshot.test.js +0 -256
  177. package/__tests__/social-kuaishou-snapshot.test.js +0 -362
  178. package/__tests__/social-toutiao-snapshot.test.js +0 -366
  179. package/__tests__/social-weibo-snapshot.test.js +0 -234
  180. package/__tests__/social-weibo-sqlite-device.test.js +0 -174
  181. package/__tests__/social-xiaohongshu-snapshot.test.js +0 -232
  182. package/__tests__/sqlite-leaf-salvage.test.js +0 -97
  183. package/__tests__/travel-adapters.test.js +0 -483
  184. package/__tests__/travel-maps-snapshot.test.js +0 -426
  185. package/__tests__/vault-driver-error.test.js +0 -74
  186. package/__tests__/vault-search-helpers.test.js +0 -104
  187. package/__tests__/vault-search.test.js +0 -423
  188. package/__tests__/vault.test.js +0 -767
  189. package/__tests__/wechat-adapter.test.js +0 -594
  190. package/__tests__/whatsapp-adapter.test.js +0 -138
  191. package/scripts/_make-fixture-all.js +0 -126
  192. package/scripts/_make-fixture-contacts.js +0 -84
  193. package/scripts/evaluate-entity-resolver.js +0 -213
  194. package/scripts/run-native-tests-sandbox.sh +0 -55
  195. package/scripts/smoke-phase-5-5.js +0 -196
  196. package/scripts/smoke-phase-5-7.js +0 -181
  197. package/scripts/smoke-system-data-contacts.js +0 -309
  198. package/scripts/smoke-system-data.js +0 -312
  199. package/vitest.config.js +0 -88
@@ -1,377 +0,0 @@
1
- "use strict";
2
-
3
- import { describe, it, expect, beforeEach, afterEach } from "vitest";
4
-
5
- const fs = require("node:fs");
6
- const path = require("node:path");
7
- const os = require("node:os");
8
-
9
- const { DouyinAdapter } = require("../lib/adapters/social-douyin");
10
- const { partitionBatch } = require("../lib/batch");
11
-
12
- /**
13
- * 本地直读样板 (Douyin <uid>_im.db local direct-read) + the normalize
14
- * message/contact gap fix.
15
- *
16
- * Two things this covers that nothing else did:
17
- *
18
- * 1. REGRESSION: DouyinAdapter.normalize() used to throw "unknown kind
19
- * message/contact" for IM events — so every 私信 + 联系人 silently
20
- * dropped (registry catches the throw → invalidCount++ → 0 rows in the
21
- * vault) even though the snapshot/ADB path "succeeded". The old snapshot
22
- * test only round-tripped `profile`, so it never caught this.
23
- *
24
- * 2. NEW direct-read mode: `sync({ imDbPath })` / `--input <uid>_im.db`
25
- * opens the plaintext SQLite directly (no ADB, no snapshot JSON) and
26
- * emits message/contact raws whose originalIds match the snapshot path
27
- * (idempotent across both routes).
28
- *
29
- * No native SQLite needed — a fake Database driver is injected via
30
- * `_deps.dbDriverFactory` (the parser accepts it as `_databaseClass`).
31
- */
32
-
33
- // Fake better-sqlite3-style driver answering the parser's PRAGMA + SELECTs.
34
- function makeFakeDb({ msgRows, userRows, msgCols, userCols, partCols, partRows, convCols, convRows }) {
35
- class FakeStmt {
36
- constructor(sql) {
37
- this.sql = sql;
38
- }
39
- all() {
40
- const s = this.sql;
41
- if (/PRAGMA table_info\(msg\)/.test(s)) return msgCols;
42
- if (/FROM msg/.test(s)) return msgRows;
43
- if (/PRAGMA table_info\(SIMPLE_USER\)/.test(s)) return userCols || [];
44
- if (/FROM SIMPLE_USER/.test(s)) return userRows || [];
45
- if (/PRAGMA table_info\(participant\)/.test(s)) return partCols || [];
46
- if (/FROM participant/.test(s)) return partRows || [];
47
- if (/PRAGMA table_info\(conversation_list\)/.test(s)) return convCols || [];
48
- if (/FROM conversation_list/.test(s)) return convRows || [];
49
- return [];
50
- }
51
- }
52
- return class FakeDb {
53
- // eslint-disable-next-line no-unused-vars
54
- constructor(_path, _opts) {}
55
- prepare(sql) {
56
- return new FakeStmt(sql);
57
- }
58
- close() {}
59
- };
60
- }
61
-
62
- const DEFAULT_FAKE = {
63
- msgCols: [
64
- { name: "sender" },
65
- { name: "created_time" },
66
- { name: "content" },
67
- { name: "conversation_id" },
68
- { name: "read_status" },
69
- ],
70
- msgRows: [
71
- {
72
- sender: 111,
73
- createdTime: 1700000000000,
74
- content: JSON.stringify({ text: "你好呀" }),
75
- conversationId: "conv-1",
76
- readStatus: 1,
77
- },
78
- {
79
- sender: 222,
80
- createdTime: 1700000001000,
81
- content: JSON.stringify({ text: "在吗" }),
82
- conversationId: "conv-1",
83
- readStatus: 0,
84
- },
85
- ],
86
- userCols: [
87
- { name: "UID" },
88
- { name: "short_id" },
89
- { name: "name" },
90
- { name: "avatar_url" },
91
- { name: "follow_status" },
92
- ],
93
- userRows: [
94
- {
95
- uid: 222,
96
- shortId: 888,
97
- name: "小明",
98
- avatarUrl: "http://x/a.jpg",
99
- followStatus: 2,
100
- },
101
- ],
102
- };
103
-
104
- function freshAdapter(fakeSpec = DEFAULT_FAKE, fsOverride) {
105
- const a = new DouyinAdapter();
106
- a._deps.fs = fsOverride || { existsSync: () => true };
107
- a._deps.dbDriverFactory = () => makeFakeDb(fakeSpec);
108
- return a;
109
- }
110
-
111
- async function collect(iter) {
112
- const out = [];
113
- for await (const r of iter) out.push(r);
114
- return out;
115
- }
116
-
117
- describe("DouyinAdapter — normalize message/contact (regression)", () => {
118
- it("normalizes a message raw into one MESSAGE event (no throw)", () => {
119
- const a = new DouyinAdapter();
120
- const raw = {
121
- adapter: "social-douyin",
122
- kind: "message",
123
- originalId: "douyin:message:msg-conv-1-1700000000000",
124
- capturedAt: 1700000000000,
125
- payload: {
126
- kind: "message",
127
- text: "你好",
128
- senderUid: "111",
129
- conversationId: "conv-1",
130
- readStatus: 1,
131
- contentBlob: '{"text":"你好"}',
132
- },
133
- };
134
- const n = a.normalize(raw);
135
- expect(n.events).toHaveLength(1);
136
- expect(n.persons).toHaveLength(0);
137
- const ev = n.events[0];
138
- expect(ev.subtype).toBe("message");
139
- expect(ev.content.text).toBe("你好");
140
- expect(ev.extra.senderUid).toBe("111");
141
- expect(ev.extra.conversationId).toBe("conv-1");
142
- expect(ev.extra.platform).toBe("douyin");
143
- });
144
-
145
- it("normalizes a contact raw into one CONTACT person", () => {
146
- const a = new DouyinAdapter();
147
- const raw = {
148
- adapter: "social-douyin",
149
- kind: "contact",
150
- originalId: "douyin:contact:contact-222",
151
- capturedAt: 1700000000000,
152
- payload: {
153
- kind: "contact",
154
- uid: "222",
155
- shortId: "888",
156
- name: "小明",
157
- avatarUrl: "http://x/a.jpg",
158
- followStatus: 2,
159
- },
160
- };
161
- const n = a.normalize(raw);
162
- expect(n.persons).toHaveLength(1);
163
- expect(n.events).toHaveLength(0);
164
- const per = n.persons[0];
165
- expect(per.subtype).toBe("contact");
166
- expect(per.id).toBe("person-douyin-222");
167
- expect(per.names).toEqual(["小明"]);
168
- expect(per.identifiers["douyin-uid"]).toEqual(["222"]);
169
- expect(per.extra.followStatus).toBe(2);
170
- });
171
-
172
- it("an empty-text (non-text) message still produces a valid event", () => {
173
- const a = new DouyinAdapter();
174
- const raw = {
175
- adapter: "social-douyin",
176
- kind: "message",
177
- originalId: "douyin:message:x",
178
- capturedAt: 1700000000000,
179
- payload: { kind: "message", text: null, senderUid: "111" },
180
- };
181
- const n = a.normalize(raw);
182
- const { valid, invalidReasons } = partitionBatch({
183
- events: n.events,
184
- persons: [],
185
- places: [],
186
- items: [],
187
- topics: [],
188
- });
189
- expect(invalidReasons).toHaveLength(0);
190
- expect(valid.events).toHaveLength(1);
191
- });
192
- });
193
-
194
- describe("DouyinAdapter — 本地直读 <uid>_im.db", () => {
195
- let tmpDir;
196
- beforeEach(() => {
197
- tmpDir = fs.mkdtempSync(path.join(os.tmpdir(), "douyin-imdb-"));
198
- });
199
- afterEach(() => {
200
- if (tmpDir && fs.existsSync(tmpDir)) {
201
- fs.rmSync(tmpDir, { recursive: true, force: true });
202
- }
203
- });
204
-
205
- it("sync({ imDbPath }) yields message + contact raws", async () => {
206
- const a = freshAdapter();
207
- const raws = await collect(a.sync({ imDbPath: "/fake/123_im.db" }));
208
- expect(raws.map((r) => r.kind)).toEqual(["message", "message", "contact"]);
209
- });
210
-
211
- it("direct-read events normalize to a fully valid batch (no silent drop)", async () => {
212
- const a = freshAdapter();
213
- const raws = await collect(a.sync({ imDbPath: "/fake/123_im.db" }));
214
- const merged = { events: [], persons: [], places: [], items: [], topics: [] };
215
- for (const r of raws) {
216
- const n = a.normalize(r);
217
- for (const k of Object.keys(merged)) merged[k].push(...n[k]);
218
- }
219
- const { valid, invalidReasons } = partitionBatch(merged);
220
- expect(invalidReasons).toHaveLength(0);
221
- expect(valid.events).toHaveLength(2); // two messages
222
- expect(valid.persons).toHaveLength(1); // one contact
223
- });
224
-
225
- it("originalIds match the snapshot composite strategy (idempotent across routes)", async () => {
226
- const a = freshAdapter();
227
- const raws = await collect(a.sync({ imDbPath: "/fake/123_im.db" }));
228
- expect(raws.map((r) => r.originalId)).toEqual([
229
- "douyin:message:msg-conv-1-1700000000000",
230
- "douyin:message:msg-conv-1-1700000001000",
231
- "douyin:contact:contact-222",
232
- ]);
233
- });
234
-
235
- it("respects include={message:false} / limit", async () => {
236
- const a = freshAdapter();
237
- const onlyContacts = await collect(
238
- a.sync({ imDbPath: "/fake/123_im.db", include: { message: false } }),
239
- );
240
- expect(onlyContacts.every((r) => r.kind === "contact")).toBe(true);
241
-
242
- const capped = await collect(a.sync({ imDbPath: "/fake/123_im.db", limit: 1 }));
243
- expect(capped).toHaveLength(1);
244
- });
245
-
246
- it("emits an im-db-parsed progress event with the diagnostic", async () => {
247
- const a = freshAdapter();
248
- const events = [];
249
- await collect(
250
- a.sync({
251
- imDbPath: "/fake/123_im.db",
252
- onProgress: (e) => events.push(e),
253
- }),
254
- );
255
- const parsed = events.find((e) => e.phase === "im-db-parsed");
256
- expect(parsed).toBeTruthy();
257
- expect(parsed.hadMsgTable).toBe(true);
258
- expect(parsed.hadSimpleUserTable).toBe(true);
259
- expect(parsed.messageCount).toBe(2);
260
- expect(parsed.contactCount).toBe(1);
261
- });
262
-
263
- it("missing db file yields nothing (no throw)", async () => {
264
- const a = freshAdapter(DEFAULT_FAKE, { existsSync: () => false });
265
- const raws = await collect(a.sync({ imDbPath: "/does/not/exist_im.db" }));
266
- expect(raws).toHaveLength(0);
267
- });
268
-
269
- // device-verified 2026-06-16: real Douyin IM schema uses `participant`
270
- // (conversation_id, user_id), not SIMPLE_USER → contacts must come from it.
271
- it("extracts contacts from `participant` when SIMPLE_USER absent (real schema)", async () => {
272
- const spec = {
273
- msgCols: DEFAULT_FAKE.msgCols,
274
- msgRows: DEFAULT_FAKE.msgRows,
275
- userCols: [], // no SIMPLE_USER table on a real device
276
- userRows: [],
277
- partCols: [{ name: "conversation_id" }, { name: "user_id" }, { name: "sort_order" }],
278
- partRows: [{ uid: 111 }, { uid: 222 }, { uid: 222 }], // dup 222 → deduped
279
- };
280
- const a = freshAdapter(spec);
281
- const raws = await collect(a.sync({ imDbPath: "/fake/123_im.db" }));
282
- const contacts = raws.filter((r) => r.kind === "contact");
283
- expect(contacts.map((r) => r.payload.uid).sort()).toEqual(["111", "222"]);
284
- // each participant uid → a CONTACT person keyed by douyin-uid
285
- const n = a.normalize(contacts[0]);
286
- expect(n.persons[0].identifiers["douyin-uid"]).toEqual([contacts[0].payload.uid]);
287
- });
288
-
289
- // device-verified: conversation_list row → PDH TOPIC (one chat thread).
290
- it("maps conversation_list rows to TOPIC entities", async () => {
291
- const spec = {
292
- msgCols: DEFAULT_FAKE.msgCols,
293
- msgRows: DEFAULT_FAKE.msgRows,
294
- userCols: [], userRows: [],
295
- convCols: [
296
- { name: "conversation_id" }, { name: "type" },
297
- { name: "last_msg_create_time" }, { name: "stranger" },
298
- ],
299
- convRows: [
300
- { convId: "conv-1", convType: 0, lastMsgTime: 1700000002000, stranger: 0 },
301
- { convId: "conv-2", convType: 1, lastMsgTime: 1700000003000, stranger: 1 },
302
- ],
303
- };
304
- const a = freshAdapter(spec);
305
- const raws = await collect(a.sync({ imDbPath: "/fake/123_im.db" }));
306
- const convs = raws.filter((r) => r.kind === "conversation");
307
- expect(convs.map((r) => r.payload.conversationId)).toEqual(["conv-1", "conv-2"]);
308
- const n = a.normalize(convs[1]);
309
- expect(n.topics).toHaveLength(1);
310
- expect(n.topics[0].type).toBe("topic");
311
- expect(n.topics[0].extra.conversationId).toBe("conv-2");
312
- expect(n.topics[0].extra.stranger).toBe(true);
313
- expect(n.topics[0].extra.lastMsgTimeMs).toBe(1700000003000);
314
- });
315
-
316
- it("participant dedups against SIMPLE_USER contacts (no double-count)", async () => {
317
- const spec = {
318
- msgCols: DEFAULT_FAKE.msgCols,
319
- msgRows: DEFAULT_FAKE.msgRows,
320
- userCols: DEFAULT_FAKE.userCols,
321
- userRows: DEFAULT_FAKE.userRows, // uid 222 from SIMPLE_USER
322
- partCols: [{ name: "conversation_id" }, { name: "user_id" }],
323
- partRows: [{ uid: 222 }, { uid: 333 }], // 222 already seen, only 333 is new
324
- };
325
- const a = freshAdapter(spec);
326
- const raws = await collect(a.sync({ imDbPath: "/fake/123_im.db" }));
327
- const uids = raws.filter((r) => r.kind === "contact").map((r) => r.payload.uid).sort();
328
- expect(uids).toEqual(["222", "333"]); // 222 not duplicated
329
- });
330
- });
331
-
332
- describe("DouyinAdapter — sync() input routing (sniff)", () => {
333
- let tmpDir;
334
- beforeEach(() => {
335
- tmpDir = fs.mkdtempSync(path.join(os.tmpdir(), "douyin-route-"));
336
- });
337
- afterEach(() => {
338
- if (tmpDir && fs.existsSync(tmpDir)) {
339
- fs.rmSync(tmpDir, { recursive: true, force: true });
340
- }
341
- });
342
-
343
- it("--input <file with SQLite magic header> routes to direct IM read", async () => {
344
- // Real file with the 16-byte SQLite magic header so _looksLikeSqlite
345
- // (which uses real fs) returns true; the fake driver supplies the rows.
346
- const dbFile = path.join(tmpDir, "123_im.db");
347
- const header = Buffer.alloc(100);
348
- header.write("SQLite format 3", 0, "latin1");
349
- fs.writeFileSync(dbFile, header);
350
-
351
- const a = new DouyinAdapter();
352
- a._deps.dbDriverFactory = () => makeFakeDb(DEFAULT_FAKE);
353
- const raws = [];
354
- for await (const r of a.sync({ inputPath: dbFile })) raws.push(r);
355
- expect(raws.map((r) => r.kind)).toEqual(["message", "message", "contact"]);
356
- });
357
-
358
- it("--input <JSON snapshot> routes to snapshot mode (not IM)", async () => {
359
- const snapFile = path.join(tmpDir, "social-douyin.json");
360
- fs.writeFileSync(
361
- snapFile,
362
- JSON.stringify({
363
- schemaVersion: 1,
364
- snapshottedAt: 1700000000000,
365
- account: { secUid: "MS4abc", shortId: "9", displayName: "me" },
366
- events: [
367
- { kind: "profile", id: "profile-MS4abc", capturedAt: 1700000000000, secUid: "MS4abc", nickname: "me" },
368
- ],
369
- }),
370
- );
371
- const a = new DouyinAdapter();
372
- const raws = [];
373
- for await (const r of a.sync({ inputPath: snapFile })) raws.push(r);
374
- expect(raws).toHaveLength(1);
375
- expect(raws[0].kind).toBe("profile");
376
- });
377
- });
@@ -1,98 +0,0 @@
1
- "use strict";
2
-
3
- import { describe, it, expect, beforeAll, afterAll } from "vitest";
4
-
5
- const fs = require("node:fs");
6
- const path = require("node:path");
7
- const os = require("node:os");
8
-
9
- const {
10
- salvageDumpToSnapshot,
11
- salvageAndSync,
12
- } = require("../lib/adapters/social-douyin-adb/collector");
13
-
14
- // Build a real (UTF-8) SQLite DB via the SQLCipher-capable driver and treat its
15
- // raw bytes as a "memory dump" — proving the salvage → snapshot → ingest path
16
- // recovers message rows with no key (the Method-B capstone). The msg-table
17
- // column order matches the device-verified Douyin IM schema.
18
- describe("social-douyin-adb salvage collector", () => {
19
- let dir, dbPath;
20
- const COLUMNS = ["msg_uuid", "conversation_id", "sender", "content", "created_time"];
21
-
22
- beforeAll(() => {
23
- const Database = require("better-sqlite3-multiple-ciphers");
24
- dir = fs.mkdtempSync(path.join(os.tmpdir(), "salvage-col-"));
25
- dbPath = path.join(dir, "u.db");
26
- const db = new Database(dbPath);
27
- db.exec(
28
- "CREATE TABLE msg(msg_uuid TEXT, conversation_id TEXT, sender INTEGER, content TEXT, created_time INTEGER)",
29
- );
30
- const ins = db.prepare("INSERT INTO msg VALUES(?,?,?,?,?)");
31
- ins.run("uuid-1", "conv-1", 111, "你好呀 hello", 1700000000000);
32
- ins.run("uuid-2", "conv-1", 222, "在吗?晚上一起吃饭", 1700000001000);
33
- ins.run("uuid-3", "conv-2", 333, "ok 👍", 1700000002000);
34
- db.close();
35
- });
36
-
37
- afterAll(() => {
38
- try { fs.rmSync(dir, { recursive: true, force: true }); } catch (_e) { /* ignore */ }
39
- });
40
-
41
- it("salvageDumpToSnapshot recovers msg rows → snapshot JSON (explicit columns)", () => {
42
- const res = salvageDumpToSnapshot(dbPath, {
43
- uid: "1234567890123456789",
44
- columns: COLUMNS,
45
- now: () => 1700000099000,
46
- });
47
- expect(res.uid).toBe("1234567890123456789");
48
- expect(res.eventCounts.message).toBe(3);
49
- expect(res.salvage.recordsSalvaged).toBeGreaterThanOrEqual(3);
50
-
51
- const snap = JSON.parse(fs.readFileSync(res.snapshotPath, "utf-8"));
52
- const msgs = snap.events.filter((e) => e.kind === "message");
53
- expect(msgs.length).toBe(3);
54
- const texts = msgs.map((m) => m.text).sort();
55
- expect(texts).toContain("你好呀 hello");
56
- expect(texts).toContain("ok 👍"); // UTF-8 emoji survives
57
- const m1 = msgs.find((m) => m.text === "你好呀 hello");
58
- expect(m1.conversationId).toBe("conv-1");
59
- expect(m1.senderUid).toBe("111");
60
- fs.rmSync(res.snapshotPath, { force: true });
61
- });
62
-
63
- it("infers columns heuristically when none given (content + created_time)", () => {
64
- const res = salvageDumpToSnapshot(dbPath, { now: () => 1700000099000 });
65
- expect(res.eventCounts.message).toBe(3);
66
- const snap = JSON.parse(fs.readFileSync(res.snapshotPath, "utf-8"));
67
- const texts = snap.events.filter((e) => e.kind === "message").map((m) => m.text);
68
- expect(texts).toContain("在吗?晚上一起吃饭");
69
- fs.rmSync(res.snapshotPath, { force: true });
70
- });
71
-
72
- it("salvageAndSync feeds the snapshot to registry.syncAdapter then cleans up", async () => {
73
- let captured = null;
74
- const fakeRegistry = {
75
- syncAdapter: async (name, opts) => {
76
- captured = { name, opts };
77
- // verify the snapshot file exists at sync time
78
- const snap = JSON.parse(fs.readFileSync(opts.inputPath, "utf-8"));
79
- return { ingested: snap.events.length, adapter: name, kgTriples: 0, ragDocs: 0 };
80
- },
81
- };
82
- const report = await salvageAndSync(fakeRegistry, dbPath, {
83
- uid: "1234567890123456789",
84
- columns: COLUMNS,
85
- });
86
- expect(captured.name).toBe("social-douyin");
87
- expect(report.ingested).toBe(3);
88
- expect(report.douyin.mode).toBe("salvage");
89
- expect(report.douyin.eventCounts.message).toBe(3);
90
- expect(report.douyin.cleanupFailed).toBe(false);
91
- // snapshot file cleaned up in finally
92
- expect(fs.existsSync(captured.opts.inputPath)).toBe(false);
93
- });
94
-
95
- it("throws on missing dumpPath", () => {
96
- expect(() => salvageDumpToSnapshot("")).toThrow();
97
- });
98
- });
@@ -1,90 +0,0 @@
1
- "use strict";
2
-
3
- import { describe, it, expect } from "vitest";
4
-
5
- const {
6
- mapMsgRecords,
7
- mapParticipantRecords,
8
- mapConversationRecords,
9
- inferMsgColumns,
10
- mapSalvaged,
11
- } = require("../lib/adapters/social-douyin-adb/salvage-mapper");
12
- const { DouyinAdapter } = require("../lib/adapters/social-douyin");
13
-
14
- // End-to-end glue: leaf-salvaged {rowid,cols} → parseImDb shape → adapter.normalize
15
- // → PDH entities. Closes Method-B: dump → salvage → mapper → ingest.
16
- describe("salvage-mapper — salvaged records → PDH entities", () => {
17
- // msg column order (device-verified subset, see pdh-app-db-schemas.md)
18
- const MSG_COLS = ["msg_uuid", "conversation_id", "sender", "content", "created_time"];
19
- const msgRecords = [
20
- { rowid: "1", cols: ["u1", "conv-1", 111, JSON.stringify({ text: "你好呀 hello" }), 1700000000000] },
21
- { rowid: "2", cols: ["u2", "conv-1", 222, JSON.stringify({ text: "在吗" }), 1700000001000] },
22
- ];
23
-
24
- it("maps msg records → message objects (im-db-parser shape)", () => {
25
- const msgs = mapMsgRecords(msgRecords, MSG_COLS);
26
- expect(msgs).toHaveLength(2);
27
- expect(msgs[0].senderUid).toBe("111");
28
- expect(msgs[0].conversationId).toBe("conv-1");
29
- expect(msgs[0].createdTimeMs).toBe(1700000000000);
30
- expect(msgs[0].text).toBe("你好呀 hello"); // content JSON → text extracted
31
- });
32
-
33
- it("mapped messages normalize through DouyinAdapter → MESSAGE events", () => {
34
- const a = new DouyinAdapter();
35
- const msgs = mapMsgRecords(msgRecords, MSG_COLS);
36
- const raw = {
37
- adapter: "social-douyin",
38
- kind: "message",
39
- originalId: "douyin:message:x",
40
- capturedAt: msgs[0].createdTimeMs,
41
- payload: { kind: "message", ...msgs[0] },
42
- };
43
- const n = a.normalize(raw);
44
- expect(n.events).toHaveLength(1);
45
- expect(n.events[0].subtype).toBe("message");
46
- expect(n.events[0].content.text).toBe("你好呀 hello");
47
- expect(n.events[0].extra.senderUid).toBe("111");
48
- });
49
-
50
- it("maps participant records → deduped contacts (uid only)", () => {
51
- const recs = [
52
- { rowid: "1", cols: ["conv-1", "111", 0] },
53
- { rowid: "2", cols: ["conv-1", "222", 1] },
54
- { rowid: "3", cols: ["conv-2", "222", 0] },
55
- ];
56
- const contacts = mapParticipantRecords(recs, ["conversation_id", "user_id", "sort_order"]);
57
- expect(contacts.map((c) => c.uid).sort()).toEqual(["111", "222"]);
58
- expect(contacts.every((c) => c.fromParticipant)).toBe(true);
59
- });
60
-
61
- it("maps conversation records → conversations (→ TOPIC)", () => {
62
- const recs = [{ rowid: "1", cols: ["conv-9", 1, 1700000002000, 1] }];
63
- const convs = mapConversationRecords(recs, ["conversation_id", "type", "last_msg_create_time", "stranger"]);
64
- expect(convs[0].conversationId).toBe("conv-9");
65
- expect(convs[0].stranger).toBe(true);
66
- expect(convs[0].lastMsgTimeMs).toBe(1700000002000);
67
- });
68
-
69
- it("inferMsgColumns heuristically locates content + created_time", () => {
70
- const cols = inferMsgColumns(msgRecords);
71
- // content = the JSON string col (index 3), created_time = the epoch int (index 4)
72
- expect(cols[3]).toBe("content");
73
- expect(cols[4]).toBe("created_time");
74
- // round-trips through mapMsgRecords
75
- const msgs = mapMsgRecords(msgRecords, cols);
76
- expect(msgs[0].text).toBe("你好呀 hello");
77
- expect(msgs[0].createdTimeMs).toBe(1700000000000);
78
- });
79
-
80
- it("mapSalvaged one-shot returns parseImDb shape", () => {
81
- const out = mapSalvaged({
82
- msg: { records: msgRecords, columns: MSG_COLS },
83
- participant: { records: [{ rowid: "1", cols: ["conv-1", "999"] }], columns: ["conversation_id", "user_id"] },
84
- conversation: { records: [{ rowid: "1", cols: ["conv-1"] }], columns: ["conversation_id"] },
85
- });
86
- expect(out.messages).toHaveLength(2);
87
- expect(out.contacts).toHaveLength(1);
88
- expect(out.conversations).toHaveLength(1);
89
- });
90
- });