@chainlesschain/personal-data-hub 0.4.29 → 0.4.30

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (198) hide show
  1. package/lib/prompt-builder.js +15 -1
  2. package/package.json +4 -1
  3. package/__tests__/adapter-guide.test.js +0 -47
  4. package/__tests__/adapter-spec.test.js +0 -78
  5. package/__tests__/adapters/ai-chat-cookie-capture-spec.test.js +0 -211
  6. package/__tests__/adapters/ai-chat-health-checker.test.js +0 -262
  7. package/__tests__/adapters/ai-chat-history.test.js +0 -396
  8. package/__tests__/adapters/ai-chat-http-client.test.js +0 -242
  9. package/__tests__/adapters/ai-chat-vendors.test.js +0 -874
  10. package/__tests__/adapters/alipay-bill-adapter.test.js +0 -538
  11. package/__tests__/adapters/apple-health.test.js +0 -95
  12. package/__tests__/adapters/bank-family.test.js +0 -125
  13. package/__tests__/adapters/biz-tianyancha.test.js +0 -159
  14. package/__tests__/adapters/browser-history-chrome.test.js +0 -377
  15. package/__tests__/adapters/browser-history-edge.test.js +0 -159
  16. package/__tests__/adapters/car-mercedesme.test.js +0 -74
  17. package/__tests__/adapters/doc-baidu-netdisk.test.js +0 -102
  18. package/__tests__/adapters/doc-camscanner.test.js +0 -147
  19. package/__tests__/adapters/doc-platforms.test.js +0 -177
  20. package/__tests__/adapters/edu-huawei-learning-live.test.js +0 -198
  21. package/__tests__/adapters/edu-zuoyebang-live.test.js +0 -226
  22. package/__tests__/adapters/email-adapter-snapshot.test.js +0 -237
  23. package/__tests__/adapters/email-adapter.test.js +0 -742
  24. package/__tests__/adapters/email-classifier.test.js +0 -347
  25. package/__tests__/adapters/email-imap-session.test.js +0 -334
  26. package/__tests__/adapters/email-parser.test.js +0 -244
  27. package/__tests__/adapters/email-pdf-extractor.test.js +0 -529
  28. package/__tests__/adapters/email-providers.test.js +0 -84
  29. package/__tests__/adapters/email-retry-progress.test.js +0 -294
  30. package/__tests__/adapters/email-templates.test.js +0 -822
  31. package/__tests__/adapters/family-23-collectors-scaffold.test.js +0 -182
  32. package/__tests__/adapters/finance-alipay-live.test.js +0 -258
  33. package/__tests__/adapters/finance-dcep.test.js +0 -74
  34. package/__tests__/adapters/fitness-joyrun.test.js +0 -82
  35. package/__tests__/adapters/game-genshin-live.test.js +0 -238
  36. package/__tests__/adapters/game-genshin-scaffold.test.js +0 -108
  37. package/__tests__/adapters/game-honor-of-kings-live.test.js +0 -230
  38. package/__tests__/adapters/git-activity.test.js +0 -222
  39. package/__tests__/adapters/gov-12123.test.js +0 -103
  40. package/__tests__/adapters/gov-ixiamen.test.js +0 -150
  41. package/__tests__/adapters/gov-tax.test.js +0 -135
  42. package/__tests__/adapters/health-meiyou.test.js +0 -125
  43. package/__tests__/adapters/local-files.test.js +0 -264
  44. package/__tests__/adapters/local-im-pc.test.js +0 -154
  45. package/__tests__/adapters/messaging-whatsapp.test.js +0 -289
  46. package/__tests__/adapters/music-kugou.test.js +0 -187
  47. package/__tests__/adapters/music-qq.test.js +0 -112
  48. package/__tests__/adapters/netease-music-live.test.js +0 -244
  49. package/__tests__/adapters/netease-music.test.js +0 -74
  50. package/__tests__/adapters/pc-local-discovery.test.js +0 -141
  51. package/__tests__/adapters/qq-pc-direct-read.test.js +0 -227
  52. package/__tests__/adapters/reading-family.test.js +0 -108
  53. package/__tests__/adapters/recruit-boss.test.js +0 -180
  54. package/__tests__/adapters/shell-history.test.js +0 -180
  55. package/__tests__/adapters/shopping-base.test.js +0 -179
  56. package/__tests__/adapters/shopping-dianping.test.js +0 -239
  57. package/__tests__/adapters/social-bilibili-adb-api-client.test.js +0 -721
  58. package/__tests__/adapters/social-bilibili-adb-chromium-cookies-reader.test.js +0 -346
  59. package/__tests__/adapters/social-bilibili-adb-collector.test.js +0 -284
  60. package/__tests__/adapters/social-bilibili-adb-cookies-extension.test.js +0 -343
  61. package/__tests__/adapters/social-bilibili-adb-snapshot-builder.test.js +0 -296
  62. package/__tests__/adapters/social-csdn.test.js +0 -175
  63. package/__tests__/adapters/social-dongchedi.test.js +0 -165
  64. package/__tests__/adapters/social-douyin-adb-aweme-detail.test.js +0 -165
  65. package/__tests__/adapters/social-douyin-adb-collector.test.js +0 -254
  66. package/__tests__/adapters/social-douyin-adb-db-extension.test.js +0 -114
  67. package/__tests__/adapters/social-douyin-adb-im-db-parser.test.js +0 -304
  68. package/__tests__/adapters/social-douyin-adb-snapshot-builder.test.js +0 -216
  69. package/__tests__/adapters/social-douyin-adb-usage-profile.test.js +0 -229
  70. package/__tests__/adapters/social-douyin-adb-watch-history.test.js +0 -269
  71. package/__tests__/adapters/social-kuaishou-adb-api-client.test.js +0 -496
  72. package/__tests__/adapters/social-kuaishou-adb-collector.test.js +0 -276
  73. package/__tests__/adapters/social-kuaishou-adb-cookies-extension.test.js +0 -152
  74. package/__tests__/adapters/social-kuaishou-adb-snapshot-builder.test.js +0 -178
  75. package/__tests__/adapters/social-toutiao-adb-account-reader.test.js +0 -135
  76. package/__tests__/adapters/social-toutiao-adb-api-client.test.js +0 -626
  77. package/__tests__/adapters/social-toutiao-adb-article.test.js +0 -155
  78. package/__tests__/adapters/social-toutiao-adb-collector.test.js +0 -378
  79. package/__tests__/adapters/social-toutiao-adb-cookies-extension.test.js +0 -193
  80. package/__tests__/adapters/social-toutiao-adb-snapshot-builder.test.js +0 -196
  81. package/__tests__/adapters/social-toutiao-kuaishou-scaffold.test.js +0 -311
  82. package/__tests__/adapters/social-weibo-adb-api-client.test.js +0 -362
  83. package/__tests__/adapters/social-weibo-adb-collector.test.js +0 -201
  84. package/__tests__/adapters/social-weibo-adb-cookies-extension.test.js +0 -167
  85. package/__tests__/adapters/social-weibo-adb-snapshot-builder.test.js +0 -189
  86. package/__tests__/adapters/social-xiaohongshu-adb-api-client.test.js +0 -431
  87. package/__tests__/adapters/social-xiaohongshu-adb-collector.test.js +0 -207
  88. package/__tests__/adapters/social-xiaohongshu-adb-cookies-extension.test.js +0 -0
  89. package/__tests__/adapters/social-xiaohongshu-adb-sign-provider-injection.test.js +0 -351
  90. package/__tests__/adapters/social-xiaohongshu-adb-sign.test.js +0 -130
  91. package/__tests__/adapters/social-xiaohongshu-adb-snapshot-builder.test.js +0 -200
  92. package/__tests__/adapters/social-zhihu.test.js +0 -246
  93. package/__tests__/adapters/system-data-adapter.test.js +0 -443
  94. package/__tests__/adapters/system-data-android-ingest.test.js +0 -144
  95. package/__tests__/adapters/system-data-android.test.js +0 -519
  96. package/__tests__/adapters/system-data-disclosure.test.js +0 -153
  97. package/__tests__/adapters/travel-12306.test.js +0 -512
  98. package/__tests__/adapters/travel-amap.test.js +0 -219
  99. package/__tests__/adapters/travel-baidu-map.test.js +0 -305
  100. package/__tests__/adapters/travel-base.test.js +0 -205
  101. package/__tests__/adapters/travel-ctrip.test.js +0 -377
  102. package/__tests__/adapters/travel-didi-consumer.test.js +0 -66
  103. package/__tests__/adapters/travel-didi.test.js +0 -204
  104. package/__tests__/adapters/travel-tencent-map.test.js +0 -207
  105. package/__tests__/adapters/travel-tongcheng.test.js +0 -289
  106. package/__tests__/adapters/video-platforms.test.js +0 -152
  107. package/__tests__/adapters/video-xigua.test.js +0 -106
  108. package/__tests__/adapters/vscode.test.js +0 -299
  109. package/__tests__/adapters/wechat-bootstrap.test.js +0 -240
  110. package/__tests__/adapters/wechat-env-probe.test.js +0 -162
  111. package/__tests__/adapters/wechat-frida-agent.test.js +0 -322
  112. package/__tests__/adapters/wechat-frida-integration.test.js +0 -149
  113. package/__tests__/adapters/wechat-frida-key-provider.test.js +0 -188
  114. package/__tests__/adapters/wechat-md5-key-provider.test.js +0 -101
  115. package/__tests__/adapters/wechat-pc-direct-read.test.js +0 -365
  116. package/__tests__/adapters/wechat-pc-group-topic.test.js +0 -63
  117. package/__tests__/adapters/wechat-pc-v4-sidecar.test.js +0 -72
  118. package/__tests__/adapters/weread.test.js +0 -123
  119. package/__tests__/adapters/wework-pc.test.js +0 -124
  120. package/__tests__/adapters/win-recent.test.js +0 -192
  121. package/__tests__/analysis-skills.test.js +0 -754
  122. package/__tests__/analysis.test.js +0 -1845
  123. package/__tests__/audio-ximalaya-snapshot.test.js +0 -279
  124. package/__tests__/batch.test.js +0 -133
  125. package/__tests__/bridges-cc-kg.test.js +0 -231
  126. package/__tests__/bridges-cc-llm.test.js +0 -191
  127. package/__tests__/bridges-cc-rag.test.js +0 -162
  128. package/__tests__/categories.test.js +0 -92
  129. package/__tests__/e2e/ai-chat-cross-source-journey.test.js +0 -213
  130. package/__tests__/e2e/full-user-journey.test.js +0 -188
  131. package/__tests__/e2e/local-data-adapters-cli.e2e.test.js +0 -146
  132. package/__tests__/entity-resolver-ingest-hook.test.js +0 -177
  133. package/__tests__/entity-resolver-stages.test.js +0 -411
  134. package/__tests__/entity-resolver-vault.test.js +0 -249
  135. package/__tests__/entity-resolver.test.js +0 -526
  136. package/__tests__/fitness-keep-snapshot.test.js +0 -224
  137. package/__tests__/fixtures/entity-resolver-200-mock.json +0 -96
  138. package/__tests__/ids.test.js +0 -45
  139. package/__tests__/integration/ai-chat-history-registry.test.js +0 -228
  140. package/__tests__/integration/aichat-wizard-end-to-end.test.js +0 -282
  141. package/__tests__/integration/cross-adapter-pipelines.test.js +0 -396
  142. package/__tests__/integration/local-data-adapters-pipeline.test.js +0 -373
  143. package/__tests__/integration/social-bilibili-pipeline.test.js +0 -261
  144. package/__tests__/integration/wechat-bootstrap-end-to-end.test.js +0 -390
  145. package/__tests__/key-providers.test.js +0 -126
  146. package/__tests__/kg-derive.test.js +0 -219
  147. package/__tests__/llm-client.test.js +0 -122
  148. package/__tests__/longtail-adapters.test.js +0 -281
  149. package/__tests__/messaging-qq-snapshot.test.js +0 -294
  150. package/__tests__/mobile-extractor-encrypted.test.js +0 -460
  151. package/__tests__/mobile-extractor.test.js +0 -288
  152. package/__tests__/mock-adapter.test.js +0 -93
  153. package/__tests__/prompt-builder.test.js +0 -249
  154. package/__tests__/query-parser.test.js +0 -365
  155. package/__tests__/rag-derive.test.js +0 -169
  156. package/__tests__/registry-readiness.test.js +0 -292
  157. package/__tests__/registry.test.js +0 -420
  158. package/__tests__/salvage-ingest.test.js +0 -97
  159. package/__tests__/schemas.test.js +0 -331
  160. package/__tests__/shopping-adapters.test.js +0 -392
  161. package/__tests__/shopping-eleme-snapshot.test.js +0 -454
  162. package/__tests__/shopping-pinduoduo-snapshot.test.js +0 -484
  163. package/__tests__/shopping-snapshot.test.js +0 -438
  164. package/__tests__/shopping-vipshop-snapshot.test.js +0 -425
  165. package/__tests__/shopping-xianyu-snapshot.test.js +0 -451
  166. package/__tests__/sidecar-contacts-cross-validate.test.js +0 -186
  167. package/__tests__/sidecar-supervisor.test.js +0 -128
  168. package/__tests__/sign-providers.test.js +0 -62
  169. package/__tests__/social-adapters.test.js +0 -280
  170. package/__tests__/social-bilibili-snapshot.test.js +0 -278
  171. package/__tests__/social-douban-snapshot.test.js +0 -351
  172. package/__tests__/social-douyin-im-direct-read.test.js +0 -377
  173. package/__tests__/social-douyin-salvage-collector.test.js +0 -98
  174. package/__tests__/social-douyin-salvage-mapper.test.js +0 -90
  175. package/__tests__/social-douyin-snapshot.test.js +0 -256
  176. package/__tests__/social-kuaishou-snapshot.test.js +0 -362
  177. package/__tests__/social-toutiao-snapshot.test.js +0 -366
  178. package/__tests__/social-weibo-snapshot.test.js +0 -234
  179. package/__tests__/social-weibo-sqlite-device.test.js +0 -174
  180. package/__tests__/social-xiaohongshu-snapshot.test.js +0 -232
  181. package/__tests__/sqlite-leaf-salvage.test.js +0 -97
  182. package/__tests__/travel-adapters.test.js +0 -483
  183. package/__tests__/travel-maps-snapshot.test.js +0 -426
  184. package/__tests__/vault-driver-error.test.js +0 -74
  185. package/__tests__/vault-search-helpers.test.js +0 -104
  186. package/__tests__/vault-search.test.js +0 -423
  187. package/__tests__/vault.test.js +0 -767
  188. package/__tests__/wechat-adapter.test.js +0 -594
  189. package/__tests__/whatsapp-adapter.test.js +0 -138
  190. package/scripts/_make-fixture-all.js +0 -126
  191. package/scripts/_make-fixture-contacts.js +0 -84
  192. package/scripts/evaluate-entity-resolver.js +0 -213
  193. package/scripts/run-native-tests-sandbox.sh +0 -55
  194. package/scripts/smoke-phase-5-5.js +0 -196
  195. package/scripts/smoke-phase-5-7.js +0 -181
  196. package/scripts/smoke-system-data-contacts.js +0 -309
  197. package/scripts/smoke-system-data.js +0 -312
  198. package/vitest.config.js +0 -88
@@ -1,377 +0,0 @@
1
- "use strict";
2
-
3
- import { describe, it, expect, beforeEach, afterEach } from "vitest";
4
-
5
- const fs = require("node:fs");
6
- const path = require("node:path");
7
- const os = require("node:os");
8
-
9
- const { DouyinAdapter } = require("../lib/adapters/social-douyin");
10
- const { partitionBatch } = require("../lib/batch");
11
-
12
- /**
13
- * 本地直读样板 (Douyin <uid>_im.db local direct-read) + the normalize
14
- * message/contact gap fix.
15
- *
16
- * Two things this covers that nothing else did:
17
- *
18
- * 1. REGRESSION: DouyinAdapter.normalize() used to throw "unknown kind
19
- * message/contact" for IM events — so every 私信 + 联系人 silently
20
- * dropped (registry catches the throw → invalidCount++ → 0 rows in the
21
- * vault) even though the snapshot/ADB path "succeeded". The old snapshot
22
- * test only round-tripped `profile`, so it never caught this.
23
- *
24
- * 2. NEW direct-read mode: `sync({ imDbPath })` / `--input <uid>_im.db`
25
- * opens the plaintext SQLite directly (no ADB, no snapshot JSON) and
26
- * emits message/contact raws whose originalIds match the snapshot path
27
- * (idempotent across both routes).
28
- *
29
- * No native SQLite needed — a fake Database driver is injected via
30
- * `_deps.dbDriverFactory` (the parser accepts it as `_databaseClass`).
31
- */
32
-
33
- // Fake better-sqlite3-style driver answering the parser's PRAGMA + SELECTs.
34
- function makeFakeDb({ msgRows, userRows, msgCols, userCols, partCols, partRows, convCols, convRows }) {
35
- class FakeStmt {
36
- constructor(sql) {
37
- this.sql = sql;
38
- }
39
- all() {
40
- const s = this.sql;
41
- if (/PRAGMA table_info\(msg\)/.test(s)) return msgCols;
42
- if (/FROM msg/.test(s)) return msgRows;
43
- if (/PRAGMA table_info\(SIMPLE_USER\)/.test(s)) return userCols || [];
44
- if (/FROM SIMPLE_USER/.test(s)) return userRows || [];
45
- if (/PRAGMA table_info\(participant\)/.test(s)) return partCols || [];
46
- if (/FROM participant/.test(s)) return partRows || [];
47
- if (/PRAGMA table_info\(conversation_list\)/.test(s)) return convCols || [];
48
- if (/FROM conversation_list/.test(s)) return convRows || [];
49
- return [];
50
- }
51
- }
52
- return class FakeDb {
53
- // eslint-disable-next-line no-unused-vars
54
- constructor(_path, _opts) {}
55
- prepare(sql) {
56
- return new FakeStmt(sql);
57
- }
58
- close() {}
59
- };
60
- }
61
-
62
- const DEFAULT_FAKE = {
63
- msgCols: [
64
- { name: "sender" },
65
- { name: "created_time" },
66
- { name: "content" },
67
- { name: "conversation_id" },
68
- { name: "read_status" },
69
- ],
70
- msgRows: [
71
- {
72
- sender: 111,
73
- createdTime: 1700000000000,
74
- content: JSON.stringify({ text: "你好呀" }),
75
- conversationId: "conv-1",
76
- readStatus: 1,
77
- },
78
- {
79
- sender: 222,
80
- createdTime: 1700000001000,
81
- content: JSON.stringify({ text: "在吗" }),
82
- conversationId: "conv-1",
83
- readStatus: 0,
84
- },
85
- ],
86
- userCols: [
87
- { name: "UID" },
88
- { name: "short_id" },
89
- { name: "name" },
90
- { name: "avatar_url" },
91
- { name: "follow_status" },
92
- ],
93
- userRows: [
94
- {
95
- uid: 222,
96
- shortId: 888,
97
- name: "小明",
98
- avatarUrl: "http://x/a.jpg",
99
- followStatus: 2,
100
- },
101
- ],
102
- };
103
-
104
- function freshAdapter(fakeSpec = DEFAULT_FAKE, fsOverride) {
105
- const a = new DouyinAdapter();
106
- a._deps.fs = fsOverride || { existsSync: () => true };
107
- a._deps.dbDriverFactory = () => makeFakeDb(fakeSpec);
108
- return a;
109
- }
110
-
111
- async function collect(iter) {
112
- const out = [];
113
- for await (const r of iter) out.push(r);
114
- return out;
115
- }
116
-
117
- describe("DouyinAdapter — normalize message/contact (regression)", () => {
118
- it("normalizes a message raw into one MESSAGE event (no throw)", () => {
119
- const a = new DouyinAdapter();
120
- const raw = {
121
- adapter: "social-douyin",
122
- kind: "message",
123
- originalId: "douyin:message:msg-conv-1-1700000000000",
124
- capturedAt: 1700000000000,
125
- payload: {
126
- kind: "message",
127
- text: "你好",
128
- senderUid: "111",
129
- conversationId: "conv-1",
130
- readStatus: 1,
131
- contentBlob: '{"text":"你好"}',
132
- },
133
- };
134
- const n = a.normalize(raw);
135
- expect(n.events).toHaveLength(1);
136
- expect(n.persons).toHaveLength(0);
137
- const ev = n.events[0];
138
- expect(ev.subtype).toBe("message");
139
- expect(ev.content.text).toBe("你好");
140
- expect(ev.extra.senderUid).toBe("111");
141
- expect(ev.extra.conversationId).toBe("conv-1");
142
- expect(ev.extra.platform).toBe("douyin");
143
- });
144
-
145
- it("normalizes a contact raw into one CONTACT person", () => {
146
- const a = new DouyinAdapter();
147
- const raw = {
148
- adapter: "social-douyin",
149
- kind: "contact",
150
- originalId: "douyin:contact:contact-222",
151
- capturedAt: 1700000000000,
152
- payload: {
153
- kind: "contact",
154
- uid: "222",
155
- shortId: "888",
156
- name: "小明",
157
- avatarUrl: "http://x/a.jpg",
158
- followStatus: 2,
159
- },
160
- };
161
- const n = a.normalize(raw);
162
- expect(n.persons).toHaveLength(1);
163
- expect(n.events).toHaveLength(0);
164
- const per = n.persons[0];
165
- expect(per.subtype).toBe("contact");
166
- expect(per.id).toBe("person-douyin-222");
167
- expect(per.names).toEqual(["小明"]);
168
- expect(per.identifiers["douyin-uid"]).toEqual(["222"]);
169
- expect(per.extra.followStatus).toBe(2);
170
- });
171
-
172
- it("an empty-text (non-text) message still produces a valid event", () => {
173
- const a = new DouyinAdapter();
174
- const raw = {
175
- adapter: "social-douyin",
176
- kind: "message",
177
- originalId: "douyin:message:x",
178
- capturedAt: 1700000000000,
179
- payload: { kind: "message", text: null, senderUid: "111" },
180
- };
181
- const n = a.normalize(raw);
182
- const { valid, invalidReasons } = partitionBatch({
183
- events: n.events,
184
- persons: [],
185
- places: [],
186
- items: [],
187
- topics: [],
188
- });
189
- expect(invalidReasons).toHaveLength(0);
190
- expect(valid.events).toHaveLength(1);
191
- });
192
- });
193
-
194
- describe("DouyinAdapter — 本地直读 <uid>_im.db", () => {
195
- let tmpDir;
196
- beforeEach(() => {
197
- tmpDir = fs.mkdtempSync(path.join(os.tmpdir(), "douyin-imdb-"));
198
- });
199
- afterEach(() => {
200
- if (tmpDir && fs.existsSync(tmpDir)) {
201
- fs.rmSync(tmpDir, { recursive: true, force: true });
202
- }
203
- });
204
-
205
- it("sync({ imDbPath }) yields message + contact raws", async () => {
206
- const a = freshAdapter();
207
- const raws = await collect(a.sync({ imDbPath: "/fake/123_im.db" }));
208
- expect(raws.map((r) => r.kind)).toEqual(["message", "message", "contact"]);
209
- });
210
-
211
- it("direct-read events normalize to a fully valid batch (no silent drop)", async () => {
212
- const a = freshAdapter();
213
- const raws = await collect(a.sync({ imDbPath: "/fake/123_im.db" }));
214
- const merged = { events: [], persons: [], places: [], items: [], topics: [] };
215
- for (const r of raws) {
216
- const n = a.normalize(r);
217
- for (const k of Object.keys(merged)) merged[k].push(...n[k]);
218
- }
219
- const { valid, invalidReasons } = partitionBatch(merged);
220
- expect(invalidReasons).toHaveLength(0);
221
- expect(valid.events).toHaveLength(2); // two messages
222
- expect(valid.persons).toHaveLength(1); // one contact
223
- });
224
-
225
- it("originalIds match the snapshot composite strategy (idempotent across routes)", async () => {
226
- const a = freshAdapter();
227
- const raws = await collect(a.sync({ imDbPath: "/fake/123_im.db" }));
228
- expect(raws.map((r) => r.originalId)).toEqual([
229
- "douyin:message:msg-conv-1-1700000000000",
230
- "douyin:message:msg-conv-1-1700000001000",
231
- "douyin:contact:contact-222",
232
- ]);
233
- });
234
-
235
- it("respects include={message:false} / limit", async () => {
236
- const a = freshAdapter();
237
- const onlyContacts = await collect(
238
- a.sync({ imDbPath: "/fake/123_im.db", include: { message: false } }),
239
- );
240
- expect(onlyContacts.every((r) => r.kind === "contact")).toBe(true);
241
-
242
- const capped = await collect(a.sync({ imDbPath: "/fake/123_im.db", limit: 1 }));
243
- expect(capped).toHaveLength(1);
244
- });
245
-
246
- it("emits an im-db-parsed progress event with the diagnostic", async () => {
247
- const a = freshAdapter();
248
- const events = [];
249
- await collect(
250
- a.sync({
251
- imDbPath: "/fake/123_im.db",
252
- onProgress: (e) => events.push(e),
253
- }),
254
- );
255
- const parsed = events.find((e) => e.phase === "im-db-parsed");
256
- expect(parsed).toBeTruthy();
257
- expect(parsed.hadMsgTable).toBe(true);
258
- expect(parsed.hadSimpleUserTable).toBe(true);
259
- expect(parsed.messageCount).toBe(2);
260
- expect(parsed.contactCount).toBe(1);
261
- });
262
-
263
- it("missing db file yields nothing (no throw)", async () => {
264
- const a = freshAdapter(DEFAULT_FAKE, { existsSync: () => false });
265
- const raws = await collect(a.sync({ imDbPath: "/does/not/exist_im.db" }));
266
- expect(raws).toHaveLength(0);
267
- });
268
-
269
- // device-verified 2026-06-16: real Douyin IM schema uses `participant`
270
- // (conversation_id, user_id), not SIMPLE_USER → contacts must come from it.
271
- it("extracts contacts from `participant` when SIMPLE_USER absent (real schema)", async () => {
272
- const spec = {
273
- msgCols: DEFAULT_FAKE.msgCols,
274
- msgRows: DEFAULT_FAKE.msgRows,
275
- userCols: [], // no SIMPLE_USER table on a real device
276
- userRows: [],
277
- partCols: [{ name: "conversation_id" }, { name: "user_id" }, { name: "sort_order" }],
278
- partRows: [{ uid: 111 }, { uid: 222 }, { uid: 222 }], // dup 222 → deduped
279
- };
280
- const a = freshAdapter(spec);
281
- const raws = await collect(a.sync({ imDbPath: "/fake/123_im.db" }));
282
- const contacts = raws.filter((r) => r.kind === "contact");
283
- expect(contacts.map((r) => r.payload.uid).sort()).toEqual(["111", "222"]);
284
- // each participant uid → a CONTACT person keyed by douyin-uid
285
- const n = a.normalize(contacts[0]);
286
- expect(n.persons[0].identifiers["douyin-uid"]).toEqual([contacts[0].payload.uid]);
287
- });
288
-
289
- // device-verified: conversation_list row → PDH TOPIC (one chat thread).
290
- it("maps conversation_list rows to TOPIC entities", async () => {
291
- const spec = {
292
- msgCols: DEFAULT_FAKE.msgCols,
293
- msgRows: DEFAULT_FAKE.msgRows,
294
- userCols: [], userRows: [],
295
- convCols: [
296
- { name: "conversation_id" }, { name: "type" },
297
- { name: "last_msg_create_time" }, { name: "stranger" },
298
- ],
299
- convRows: [
300
- { convId: "conv-1", convType: 0, lastMsgTime: 1700000002000, stranger: 0 },
301
- { convId: "conv-2", convType: 1, lastMsgTime: 1700000003000, stranger: 1 },
302
- ],
303
- };
304
- const a = freshAdapter(spec);
305
- const raws = await collect(a.sync({ imDbPath: "/fake/123_im.db" }));
306
- const convs = raws.filter((r) => r.kind === "conversation");
307
- expect(convs.map((r) => r.payload.conversationId)).toEqual(["conv-1", "conv-2"]);
308
- const n = a.normalize(convs[1]);
309
- expect(n.topics).toHaveLength(1);
310
- expect(n.topics[0].type).toBe("topic");
311
- expect(n.topics[0].extra.conversationId).toBe("conv-2");
312
- expect(n.topics[0].extra.stranger).toBe(true);
313
- expect(n.topics[0].extra.lastMsgTimeMs).toBe(1700000003000);
314
- });
315
-
316
- it("participant dedups against SIMPLE_USER contacts (no double-count)", async () => {
317
- const spec = {
318
- msgCols: DEFAULT_FAKE.msgCols,
319
- msgRows: DEFAULT_FAKE.msgRows,
320
- userCols: DEFAULT_FAKE.userCols,
321
- userRows: DEFAULT_FAKE.userRows, // uid 222 from SIMPLE_USER
322
- partCols: [{ name: "conversation_id" }, { name: "user_id" }],
323
- partRows: [{ uid: 222 }, { uid: 333 }], // 222 already seen, only 333 is new
324
- };
325
- const a = freshAdapter(spec);
326
- const raws = await collect(a.sync({ imDbPath: "/fake/123_im.db" }));
327
- const uids = raws.filter((r) => r.kind === "contact").map((r) => r.payload.uid).sort();
328
- expect(uids).toEqual(["222", "333"]); // 222 not duplicated
329
- });
330
- });
331
-
332
- describe("DouyinAdapter — sync() input routing (sniff)", () => {
333
- let tmpDir;
334
- beforeEach(() => {
335
- tmpDir = fs.mkdtempSync(path.join(os.tmpdir(), "douyin-route-"));
336
- });
337
- afterEach(() => {
338
- if (tmpDir && fs.existsSync(tmpDir)) {
339
- fs.rmSync(tmpDir, { recursive: true, force: true });
340
- }
341
- });
342
-
343
- it("--input <file with SQLite magic header> routes to direct IM read", async () => {
344
- // Real file with the 16-byte SQLite magic header so _looksLikeSqlite
345
- // (which uses real fs) returns true; the fake driver supplies the rows.
346
- const dbFile = path.join(tmpDir, "123_im.db");
347
- const header = Buffer.alloc(100);
348
- header.write("SQLite format 3", 0, "latin1");
349
- fs.writeFileSync(dbFile, header);
350
-
351
- const a = new DouyinAdapter();
352
- a._deps.dbDriverFactory = () => makeFakeDb(DEFAULT_FAKE);
353
- const raws = [];
354
- for await (const r of a.sync({ inputPath: dbFile })) raws.push(r);
355
- expect(raws.map((r) => r.kind)).toEqual(["message", "message", "contact"]);
356
- });
357
-
358
- it("--input <JSON snapshot> routes to snapshot mode (not IM)", async () => {
359
- const snapFile = path.join(tmpDir, "social-douyin.json");
360
- fs.writeFileSync(
361
- snapFile,
362
- JSON.stringify({
363
- schemaVersion: 1,
364
- snapshottedAt: 1700000000000,
365
- account: { secUid: "MS4abc", shortId: "9", displayName: "me" },
366
- events: [
367
- { kind: "profile", id: "profile-MS4abc", capturedAt: 1700000000000, secUid: "MS4abc", nickname: "me" },
368
- ],
369
- }),
370
- );
371
- const a = new DouyinAdapter();
372
- const raws = [];
373
- for await (const r of a.sync({ inputPath: snapFile })) raws.push(r);
374
- expect(raws).toHaveLength(1);
375
- expect(raws[0].kind).toBe("profile");
376
- });
377
- });
@@ -1,98 +0,0 @@
1
- "use strict";
2
-
3
- import { describe, it, expect, beforeAll, afterAll } from "vitest";
4
-
5
- const fs = require("node:fs");
6
- const path = require("node:path");
7
- const os = require("node:os");
8
-
9
- const {
10
- salvageDumpToSnapshot,
11
- salvageAndSync,
12
- } = require("../lib/adapters/social-douyin-adb/collector");
13
-
14
- // Build a real (UTF-8) SQLite DB via the SQLCipher-capable driver and treat its
15
- // raw bytes as a "memory dump" — proving the salvage → snapshot → ingest path
16
- // recovers message rows with no key (the Method-B capstone). The msg-table
17
- // column order matches the device-verified Douyin IM schema.
18
- describe("social-douyin-adb salvage collector", () => {
19
- let dir, dbPath;
20
- const COLUMNS = ["msg_uuid", "conversation_id", "sender", "content", "created_time"];
21
-
22
- beforeAll(() => {
23
- const Database = require("better-sqlite3-multiple-ciphers");
24
- dir = fs.mkdtempSync(path.join(os.tmpdir(), "salvage-col-"));
25
- dbPath = path.join(dir, "u.db");
26
- const db = new Database(dbPath);
27
- db.exec(
28
- "CREATE TABLE msg(msg_uuid TEXT, conversation_id TEXT, sender INTEGER, content TEXT, created_time INTEGER)",
29
- );
30
- const ins = db.prepare("INSERT INTO msg VALUES(?,?,?,?,?)");
31
- ins.run("uuid-1", "conv-1", 111, "你好呀 hello", 1700000000000);
32
- ins.run("uuid-2", "conv-1", 222, "在吗?晚上一起吃饭", 1700000001000);
33
- ins.run("uuid-3", "conv-2", 333, "ok 👍", 1700000002000);
34
- db.close();
35
- });
36
-
37
- afterAll(() => {
38
- try { fs.rmSync(dir, { recursive: true, force: true }); } catch (_e) { /* ignore */ }
39
- });
40
-
41
- it("salvageDumpToSnapshot recovers msg rows → snapshot JSON (explicit columns)", () => {
42
- const res = salvageDumpToSnapshot(dbPath, {
43
- uid: "1234567890123456789",
44
- columns: COLUMNS,
45
- now: () => 1700000099000,
46
- });
47
- expect(res.uid).toBe("1234567890123456789");
48
- expect(res.eventCounts.message).toBe(3);
49
- expect(res.salvage.recordsSalvaged).toBeGreaterThanOrEqual(3);
50
-
51
- const snap = JSON.parse(fs.readFileSync(res.snapshotPath, "utf-8"));
52
- const msgs = snap.events.filter((e) => e.kind === "message");
53
- expect(msgs.length).toBe(3);
54
- const texts = msgs.map((m) => m.text).sort();
55
- expect(texts).toContain("你好呀 hello");
56
- expect(texts).toContain("ok 👍"); // UTF-8 emoji survives
57
- const m1 = msgs.find((m) => m.text === "你好呀 hello");
58
- expect(m1.conversationId).toBe("conv-1");
59
- expect(m1.senderUid).toBe("111");
60
- fs.rmSync(res.snapshotPath, { force: true });
61
- });
62
-
63
- it("infers columns heuristically when none given (content + created_time)", () => {
64
- const res = salvageDumpToSnapshot(dbPath, { now: () => 1700000099000 });
65
- expect(res.eventCounts.message).toBe(3);
66
- const snap = JSON.parse(fs.readFileSync(res.snapshotPath, "utf-8"));
67
- const texts = snap.events.filter((e) => e.kind === "message").map((m) => m.text);
68
- expect(texts).toContain("在吗?晚上一起吃饭");
69
- fs.rmSync(res.snapshotPath, { force: true });
70
- });
71
-
72
- it("salvageAndSync feeds the snapshot to registry.syncAdapter then cleans up", async () => {
73
- let captured = null;
74
- const fakeRegistry = {
75
- syncAdapter: async (name, opts) => {
76
- captured = { name, opts };
77
- // verify the snapshot file exists at sync time
78
- const snap = JSON.parse(fs.readFileSync(opts.inputPath, "utf-8"));
79
- return { ingested: snap.events.length, adapter: name, kgTriples: 0, ragDocs: 0 };
80
- },
81
- };
82
- const report = await salvageAndSync(fakeRegistry, dbPath, {
83
- uid: "1234567890123456789",
84
- columns: COLUMNS,
85
- });
86
- expect(captured.name).toBe("social-douyin");
87
- expect(report.ingested).toBe(3);
88
- expect(report.douyin.mode).toBe("salvage");
89
- expect(report.douyin.eventCounts.message).toBe(3);
90
- expect(report.douyin.cleanupFailed).toBe(false);
91
- // snapshot file cleaned up in finally
92
- expect(fs.existsSync(captured.opts.inputPath)).toBe(false);
93
- });
94
-
95
- it("throws on missing dumpPath", () => {
96
- expect(() => salvageDumpToSnapshot("")).toThrow();
97
- });
98
- });
@@ -1,90 +0,0 @@
1
- "use strict";
2
-
3
- import { describe, it, expect } from "vitest";
4
-
5
- const {
6
- mapMsgRecords,
7
- mapParticipantRecords,
8
- mapConversationRecords,
9
- inferMsgColumns,
10
- mapSalvaged,
11
- } = require("../lib/adapters/social-douyin-adb/salvage-mapper");
12
- const { DouyinAdapter } = require("../lib/adapters/social-douyin");
13
-
14
- // End-to-end glue: leaf-salvaged {rowid,cols} → parseImDb shape → adapter.normalize
15
- // → PDH entities. Closes Method-B: dump → salvage → mapper → ingest.
16
- describe("salvage-mapper — salvaged records → PDH entities", () => {
17
- // msg column order (device-verified subset, see pdh-app-db-schemas.md)
18
- const MSG_COLS = ["msg_uuid", "conversation_id", "sender", "content", "created_time"];
19
- const msgRecords = [
20
- { rowid: "1", cols: ["u1", "conv-1", 111, JSON.stringify({ text: "你好呀 hello" }), 1700000000000] },
21
- { rowid: "2", cols: ["u2", "conv-1", 222, JSON.stringify({ text: "在吗" }), 1700000001000] },
22
- ];
23
-
24
- it("maps msg records → message objects (im-db-parser shape)", () => {
25
- const msgs = mapMsgRecords(msgRecords, MSG_COLS);
26
- expect(msgs).toHaveLength(2);
27
- expect(msgs[0].senderUid).toBe("111");
28
- expect(msgs[0].conversationId).toBe("conv-1");
29
- expect(msgs[0].createdTimeMs).toBe(1700000000000);
30
- expect(msgs[0].text).toBe("你好呀 hello"); // content JSON → text extracted
31
- });
32
-
33
- it("mapped messages normalize through DouyinAdapter → MESSAGE events", () => {
34
- const a = new DouyinAdapter();
35
- const msgs = mapMsgRecords(msgRecords, MSG_COLS);
36
- const raw = {
37
- adapter: "social-douyin",
38
- kind: "message",
39
- originalId: "douyin:message:x",
40
- capturedAt: msgs[0].createdTimeMs,
41
- payload: { kind: "message", ...msgs[0] },
42
- };
43
- const n = a.normalize(raw);
44
- expect(n.events).toHaveLength(1);
45
- expect(n.events[0].subtype).toBe("message");
46
- expect(n.events[0].content.text).toBe("你好呀 hello");
47
- expect(n.events[0].extra.senderUid).toBe("111");
48
- });
49
-
50
- it("maps participant records → deduped contacts (uid only)", () => {
51
- const recs = [
52
- { rowid: "1", cols: ["conv-1", "111", 0] },
53
- { rowid: "2", cols: ["conv-1", "222", 1] },
54
- { rowid: "3", cols: ["conv-2", "222", 0] },
55
- ];
56
- const contacts = mapParticipantRecords(recs, ["conversation_id", "user_id", "sort_order"]);
57
- expect(contacts.map((c) => c.uid).sort()).toEqual(["111", "222"]);
58
- expect(contacts.every((c) => c.fromParticipant)).toBe(true);
59
- });
60
-
61
- it("maps conversation records → conversations (→ TOPIC)", () => {
62
- const recs = [{ rowid: "1", cols: ["conv-9", 1, 1700000002000, 1] }];
63
- const convs = mapConversationRecords(recs, ["conversation_id", "type", "last_msg_create_time", "stranger"]);
64
- expect(convs[0].conversationId).toBe("conv-9");
65
- expect(convs[0].stranger).toBe(true);
66
- expect(convs[0].lastMsgTimeMs).toBe(1700000002000);
67
- });
68
-
69
- it("inferMsgColumns heuristically locates content + created_time", () => {
70
- const cols = inferMsgColumns(msgRecords);
71
- // content = the JSON string col (index 3), created_time = the epoch int (index 4)
72
- expect(cols[3]).toBe("content");
73
- expect(cols[4]).toBe("created_time");
74
- // round-trips through mapMsgRecords
75
- const msgs = mapMsgRecords(msgRecords, cols);
76
- expect(msgs[0].text).toBe("你好呀 hello");
77
- expect(msgs[0].createdTimeMs).toBe(1700000000000);
78
- });
79
-
80
- it("mapSalvaged one-shot returns parseImDb shape", () => {
81
- const out = mapSalvaged({
82
- msg: { records: msgRecords, columns: MSG_COLS },
83
- participant: { records: [{ rowid: "1", cols: ["conv-1", "999"] }], columns: ["conversation_id", "user_id"] },
84
- conversation: { records: [{ rowid: "1", cols: ["conv-1"] }], columns: ["conversation_id"] },
85
- });
86
- expect(out.messages).toHaveLength(2);
87
- expect(out.contacts).toHaveLength(1);
88
- expect(out.conversations).toHaveLength(1);
89
- });
90
- });