@chainlesschain/personal-data-hub 0.2.1 → 0.2.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (39) hide show
  1. package/__tests__/adapters/social-toutiao-kuaishou-scaffold.test.js +58 -16
  2. package/__tests__/adapters/wechat-frida-agent.test.js +132 -1
  3. package/__tests__/integration/social-bilibili-pipeline.test.js +261 -0
  4. package/__tests__/longtail-adapters.test.js +60 -14
  5. package/__tests__/messaging-qq-snapshot.test.js +294 -0
  6. package/__tests__/shopping-pinduoduo-snapshot.test.js +302 -0
  7. package/__tests__/shopping-snapshot.test.js +438 -0
  8. package/__tests__/social-adapters.test.js +91 -17
  9. package/__tests__/social-bilibili-snapshot.test.js +278 -0
  10. package/__tests__/social-douyin-snapshot.test.js +253 -0
  11. package/__tests__/social-kuaishou-snapshot.test.js +309 -0
  12. package/__tests__/social-toutiao-snapshot.test.js +314 -0
  13. package/__tests__/social-weibo-snapshot.test.js +234 -0
  14. package/__tests__/social-xiaohongshu-snapshot.test.js +232 -0
  15. package/__tests__/travel-maps-snapshot.test.js +426 -0
  16. package/__tests__/vault-driver-error.test.js +74 -0
  17. package/__tests__/wechat-adapter.test.js +118 -0
  18. package/lib/adapters/messaging-qq/index.js +498 -92
  19. package/lib/adapters/shopping-jd/index.js +228 -25
  20. package/lib/adapters/shopping-meituan/index.js +222 -26
  21. package/lib/adapters/shopping-pinduoduo/index.js +275 -0
  22. package/lib/adapters/social-bilibili/adapter.js +500 -0
  23. package/lib/adapters/social-bilibili/index.js +21 -169
  24. package/lib/adapters/social-douyin/index.js +454 -63
  25. package/lib/adapters/social-kuaishou/index.js +379 -127
  26. package/lib/adapters/social-toutiao/index.js +400 -130
  27. package/lib/adapters/social-weibo/index.js +393 -95
  28. package/lib/adapters/social-xiaohongshu/index.js +389 -49
  29. package/lib/adapters/travel-baidu-map/index.js +286 -26
  30. package/lib/adapters/travel-tencent-map/index.js +414 -0
  31. package/lib/adapters/wechat/content-parser.js +11 -2
  32. package/lib/adapters/wechat/db-reader.js +88 -10
  33. package/lib/adapters/wechat/frida-agent/loader.js +7 -0
  34. package/lib/adapters/wechat/frida-agent/wechat-key-hook.js +140 -18
  35. package/lib/adapters/wechat/key-providers/frida-key-provider.js +8 -0
  36. package/lib/adapters/wechat/normalize.js +12 -3
  37. package/lib/index.js +5 -1
  38. package/lib/vault.js +60 -8
  39. package/package.json +2 -1
@@ -1,50 +1,117 @@
1
1
  /**
2
- * Phase 13.8(+) — Toutiao 今日头条 adapter (v0.1 scaffold).
2
+ * §A8 v0.2 — Toutiao (今日头条) adapter, dual-mode (snapshot + sqlite).
3
3
  *
4
- * Source: 今日头条 Android app stores user history in SQLite (encrypted in
5
- * newer versions, plaintext in older 7.x builds). Schema is reverse-engineered
6
- * from the open-source sjqz parsers project and is pinned at scaffold quality
7
- * only — Phase 13.10 will fixture-pin real field names after Xiaomi 24115RA8EC
8
- * real-device E2E.
4
+ * Mirror of social-weibo / social-bilibili two-mode pattern:
9
5
  *
10
- * Conjectured tables ( fixture pin in Phase 13.10):
11
- * - read_history read articles
12
- * - collection_article user-collected (saved) articles
13
- * - search_history user search queries
6
+ * 1. snapshot mode (opts.inputPath): in-APK Android cc reads a snapshot
7
+ * JSON produced by the phone's ToutiaoLocalCollector (root-required
8
+ * SQLCipher decrypt of /data/data/com.ss.android.article.news/
9
+ * databases/ on Phase 13.10 real-device E2E; until then the snapshot
10
+ * can be produced by the desktop AndroidExtractor pulling a plaintext
11
+ * 7.x DB and running the same row → snapshot transform offline).
12
+ * Desktop-independent path. account.uid OPTIONAL at construction —
13
+ * payload carries it.
14
14
  *
15
- * Each row Event with subtype "browse" (read_history) / "like" (collection)
16
- * / "post" (search_history reframed as a self-authored "search" event).
15
+ * 2. sqlite mode (opts.dbPath, legacy): Phase 13.8 device-pull path
16
+ * desktop reads the pulled DB directly. Preserved for backward compat;
17
+ * account.uid REQUIRED in this mode (checked lazily at sync time, not
18
+ * at construction, so snapshot-only callers can omit it).
17
19
  *
18
- * Mirrors social-bilibili adapter contract; differs only in table list +
19
- * default sensitivity (toutiao reading patterns may include political /
20
- * health topics so sensitivity is bumped to "high").
20
+ * Snapshot schema (mirrors ToutiaoLocalCollector.SNAPSHOT_SCHEMA_VERSION):
21
+ *
22
+ * {
23
+ * "schemaVersion": 1,
24
+ * "snapshottedAt": <epoch-ms>,
25
+ * "account": { "uid": "12345", "displayName": "alice" },
26
+ * "events": [
27
+ * { "kind": "read", "id": "read-<itemId>", "capturedAt": <ms>,
28
+ * "itemId": "...", "title": "...", "category": "...", "author": "...",
29
+ * "readDuration": N, "source": "..." },
30
+ * { "kind": "collection", "id": "collect-<itemId>", "capturedAt": <ms>,
31
+ * "itemId": "...", "title": "...", "category": "...", "author": "..." },
32
+ * { "kind": "search", "id": "search-<kw>:<ts>", "capturedAt": <ms>,
33
+ * "keyword": "...", "searchAt": <ms> }
34
+ * ]
35
+ * }
36
+ *
37
+ * Sensitivity: bumped to "high" vs Bilibili — Toutiao reading patterns can
38
+ * reveal political / medical / financial topic interest.
21
39
  */
22
40
 
23
41
  "use strict";
24
42
 
25
43
  const fs = require("node:fs");
26
44
  const { newId } = require("../../ids");
45
+ const {
46
+ ENTITY_TYPES,
47
+ EVENT_SUBTYPES,
48
+ CAPTURED_BY,
49
+ } = require("../../constants");
27
50
 
28
51
  const NAME = "social-toutiao";
29
- const VERSION = "0.1.0";
52
+ const VERSION = "0.2.0";
53
+ const SNAPSHOT_SCHEMA_VERSION = 1;
54
+
55
+ const KIND_READ = "read";
56
+ const KIND_COLLECTION = "collection";
57
+ const KIND_SEARCH = "search";
58
+ const VALID_SNAPSHOT_KINDS = Object.freeze([
59
+ KIND_READ,
60
+ KIND_COLLECTION,
61
+ KIND_SEARCH,
62
+ ]);
63
+
64
+ function stableOriginalId(kind, id) {
65
+ const stringified =
66
+ (typeof id === "string" && id.length > 0 && id) ||
67
+ (typeof id === "number" && Number.isFinite(id) && String(id)) ||
68
+ null;
69
+ const safe =
70
+ stringified ||
71
+ `unknown-${Date.now()}-${Math.random().toString(36).slice(2, 8)}`;
72
+ return `toutiao:${kind}:${safe}`;
73
+ }
74
+
75
+ function parseTime(v) {
76
+ if (Number.isFinite(v)) return v > 1e12 ? v : v * 1000;
77
+ if (typeof v === "string") {
78
+ if (/^\d+$/.test(v)) {
79
+ const n = parseInt(v, 10);
80
+ return n > 1e12 ? n : n * 1000;
81
+ }
82
+ const t = Date.parse(v);
83
+ return Number.isFinite(t) ? t : null;
84
+ }
85
+ return null;
86
+ }
87
+
88
+ function trySelect(db, sql) {
89
+ try {
90
+ return db.prepare(sql).all();
91
+ } catch (_e) {
92
+ return null;
93
+ }
94
+ }
30
95
 
31
96
  class ToutiaoAdapter {
32
97
  constructor(opts = {}) {
33
- if (!opts.account || !opts.account.uid) {
34
- throw new Error("ToutiaoAdapter: opts.account.uid required");
35
- }
36
- this.account = opts.account;
98
+ // §A8 v0.2: account.uid now OPTIONAL at construction — snapshot mode is
99
+ // stateless and pulls account from the snapshot file. Sqlite mode (legacy
100
+ // device-pull) still requires it; checked at sync time, not construction.
101
+ this.account = opts.account || null;
37
102
  this._dbPath = opts.dbPath || null;
38
- this._dbDriverFactory = opts.dbDriverFactory || null;
39
103
 
40
104
  this.name = NAME;
41
105
  this.version = VERSION;
42
106
  this.capabilities = [
107
+ "sync:snapshot",
43
108
  "sync:sqlite",
44
109
  "parse:toutiao-read-history",
45
110
  "parse:toutiao-collection",
46
111
  "parse:toutiao-search",
47
112
  ];
113
+ // Existing desktop wiring may key off this — kept as device-pull (the
114
+ // sqlite mode is the desktop-side; snapshot mode is in-APK Android).
48
115
  this.extractMode = "device-pull";
49
116
  this.rateLimits = {};
50
117
  this.dataDisclosure = {
@@ -53,14 +120,53 @@ class ToutiaoAdapter {
53
120
  "toutiao:collection_article (item_id / title / save_time)",
54
121
  "toutiao:search_history (keyword / search_time)",
55
122
  ],
56
- // Bumped vs bilibili: news reading reveals political / medical interest.
123
+ // News reading reveals political / medical / financial topic interest.
57
124
  sensitivity: "high",
58
125
  legalGate: false,
126
+ defaultInclude: {
127
+ read: true,
128
+ collection: true,
129
+ search: true,
130
+ },
131
+ };
132
+
133
+ // _deps injection seam for tests (vi.mock fs/ doesn't intercept require in
134
+ // inlined CJS — see .claude/rules/testing.md).
135
+ this._deps = {
136
+ fs,
137
+ dbDriverFactory: opts.dbDriverFactory || null,
59
138
  };
60
139
  }
61
140
 
62
- async authenticate() {
63
- return { ok: true, account: this.account.uid };
141
+ async authenticate(ctx = {}) {
142
+ if (ctx && typeof ctx.inputPath === "string" && ctx.inputPath.length > 0) {
143
+ try {
144
+ this._deps.fs.accessSync(ctx.inputPath, this._deps.fs.constants.R_OK);
145
+ } catch (err) {
146
+ return {
147
+ ok: false,
148
+ reason: "INPUT_PATH_UNREADABLE",
149
+ message: `snapshot not readable at ${ctx.inputPath}: ${err.message}`,
150
+ };
151
+ }
152
+ return { ok: true, mode: "snapshot-file" };
153
+ }
154
+ if (this._dbPath || (ctx && typeof ctx.dbPath === "string")) {
155
+ if (!this.account || !this.account.uid) {
156
+ return {
157
+ ok: false,
158
+ reason: "NO_ACCOUNT_UID",
159
+ message: "social-toutiao.authenticate: sqlite mode requires account.uid",
160
+ };
161
+ }
162
+ return { ok: true, account: this.account.uid, mode: "sqlite" };
163
+ }
164
+ return {
165
+ ok: false,
166
+ reason: "NO_INPUT",
167
+ message:
168
+ "social-toutiao.authenticate: needs opts.inputPath (snapshot mode) OR opts.dbPath (sqlite mode)",
169
+ };
64
170
  }
65
171
 
66
172
  async healthCheck() {
@@ -68,22 +174,102 @@ class ToutiaoAdapter {
68
174
  }
69
175
 
70
176
  async *sync(opts = {}) {
177
+ if (typeof opts.inputPath === "string" && opts.inputPath.length > 0) {
178
+ yield* this._syncViaSnapshot(opts);
179
+ return;
180
+ }
71
181
  const dbPath = opts.dbPath || this._dbPath;
72
- if (!dbPath || !fs.existsSync(dbPath)) return;
73
- const Driver = this._dbDriverFactory
74
- ? this._dbDriverFactory()
182
+ if (dbPath) {
183
+ yield* this._syncViaSqlite({ ...opts, dbPath });
184
+ return;
185
+ }
186
+ throw new Error(
187
+ "social-toutiao.sync: needs opts.inputPath (snapshot mode, Android in-APK cc) OR opts.dbPath (sqlite mode, legacy device-pull)",
188
+ );
189
+ }
190
+
191
+ async *_syncViaSnapshot(opts) {
192
+ const raw = this._deps.fs.readFileSync(opts.inputPath, "utf-8");
193
+ const snapshot = JSON.parse(raw);
194
+ if (
195
+ !snapshot ||
196
+ typeof snapshot !== "object" ||
197
+ snapshot.schemaVersion !== SNAPSHOT_SCHEMA_VERSION
198
+ ) {
199
+ throw new Error(
200
+ `social-toutiao.sync: snapshot schemaVersion mismatch (got ${snapshot && snapshot.schemaVersion}, expected ${SNAPSHOT_SCHEMA_VERSION})`,
201
+ );
202
+ }
203
+ const fallbackCapturedAt =
204
+ Number.isFinite(snapshot.snapshottedAt) && snapshot.snapshottedAt > 0
205
+ ? Math.floor(snapshot.snapshottedAt)
206
+ : Date.now();
207
+
208
+ const account =
209
+ snapshot.account && typeof snapshot.account === "object"
210
+ ? snapshot.account
211
+ : null;
212
+ const include = opts.include || {};
213
+ const limit =
214
+ Number.isInteger(opts.limit) && opts.limit > 0 ? opts.limit : Infinity;
215
+
216
+ const events = Array.isArray(snapshot.events) ? snapshot.events : [];
217
+ let emitted = 0;
218
+ for (const ev of events) {
219
+ if (emitted >= limit) return;
220
+ if (!ev || typeof ev !== "object") continue;
221
+ const kind = ev.kind;
222
+ if (!VALID_SNAPSHOT_KINDS.includes(kind)) continue;
223
+ if (include[kind] === false) continue;
224
+
225
+ const capturedAt =
226
+ parseTime(ev.capturedAt) ||
227
+ parseTime(ev.time) ||
228
+ fallbackCapturedAt;
229
+ const id =
230
+ (typeof ev.id === "string" && ev.id.length > 0 && ev.id) ||
231
+ ev.itemId ||
232
+ ev.keyword ||
233
+ null;
234
+
235
+ yield {
236
+ adapter: NAME,
237
+ kind,
238
+ originalId: stableOriginalId(kind, id),
239
+ capturedAt,
240
+ payload: { ...ev, account },
241
+ };
242
+ emitted += 1;
243
+ }
244
+ }
245
+
246
+ async *_syncViaSqlite(opts) {
247
+ // Legacy Phase 13.8 path — requires account.uid in constructor and a DB
248
+ // pulled via the desktop AndroidExtractor.
249
+ if (!this.account || !this.account.uid) {
250
+ throw new Error(
251
+ "social-toutiao._syncViaSqlite: account.uid required (set via new ToutiaoAdapter({ account: { uid } }) in cli wiring)",
252
+ );
253
+ }
254
+ const dbPath = opts.dbPath;
255
+ if (!dbPath || !this._deps.fs.existsSync(dbPath)) return;
256
+ const Driver = this._deps.dbDriverFactory
257
+ ? this._deps.dbDriverFactory()
75
258
  : require("better-sqlite3-multiple-ciphers");
76
259
  const db = new Driver(dbPath, { readonly: true });
77
260
 
78
261
  try {
79
262
  const reads =
80
- trySelect(db, "SELECT * FROM read_history ORDER BY read_time DESC LIMIT 5000") || [];
263
+ trySelect(
264
+ db,
265
+ "SELECT * FROM read_history ORDER BY read_time DESC LIMIT 5000",
266
+ ) || [];
81
267
  for (const row of reads) {
82
268
  yield {
83
269
  adapter: NAME,
84
270
  originalId: `read-${row.id || row._id || row.item_id}`,
85
271
  capturedAt: parseTime(row.read_time || row.time || row.create_time),
86
- payload: { row, kind: "read" },
272
+ payload: { row, kind: KIND_READ },
87
273
  };
88
274
  }
89
275
 
@@ -97,7 +283,7 @@ class ToutiaoAdapter {
97
283
  adapter: NAME,
98
284
  originalId: `collect-${row.id || row.item_id}`,
99
285
  capturedAt: parseTime(row.save_time || row.time),
100
- payload: { row, kind: "collection" },
286
+ payload: { row, kind: KIND_COLLECTION },
101
287
  };
102
288
  }
103
289
 
@@ -111,126 +297,210 @@ class ToutiaoAdapter {
111
297
  adapter: NAME,
112
298
  originalId: `search-${row.id || row.keyword + ":" + row.search_time}`,
113
299
  capturedAt: parseTime(row.search_time || row.time),
114
- payload: { row, kind: "search" },
300
+ payload: { row, kind: KIND_SEARCH },
115
301
  };
116
302
  }
117
303
  } finally {
118
304
  try {
119
305
  db.close();
120
- } catch (_e) {}
306
+ } catch (_e) {
307
+ /* ignore */
308
+ }
121
309
  }
122
310
  }
123
311
 
124
312
  normalize(raw) {
125
- if (!raw || !raw.payload || !raw.payload.row) {
126
- throw new Error("ToutiaoAdapter.normalize: row missing");
313
+ if (!raw || !raw.payload) {
314
+ throw new Error("ToutiaoAdapter.normalize: payload missing");
127
315
  }
128
- const { kind, row } = raw.payload;
129
- const now = Date.now();
130
- const occurredAt =
131
- parseTime(row.read_time || row.save_time || row.search_time || row.time) || now;
132
- const source = {
133
- adapter: NAME,
134
- adapterVersion: VERSION,
135
- originalId: raw.originalId,
136
- capturedAt: occurredAt,
137
- capturedBy: "sqlite",
138
- };
316
+ const ingestedAt = Date.now();
317
+ const kind = raw.kind || raw.payload.kind;
318
+ const p = raw.payload;
139
319
 
140
- if (kind === "collection") {
141
- return {
142
- events: [
143
- {
144
- id: newId(),
145
- type: "event",
146
- subtype: "like",
147
- occurredAt,
148
- actor: "person-self",
149
- content: { title: row.title || row.article_title || "(no title)" },
150
- ingestedAt: now,
151
- source,
152
- extra: {
153
- itemId: row.item_id || null,
154
- category: row.category || null,
155
- author: row.author || null,
156
- source: row.source || null,
157
- },
158
- },
159
- ],
160
- persons: [],
161
- places: [],
162
- items: [],
163
- topics: [],
164
- };
320
+ if (kind === KIND_COLLECTION) {
321
+ return normalizeCollection(p, raw, ingestedAt);
165
322
  }
166
- if (kind === "search") {
167
- return {
168
- events: [
169
- {
170
- id: newId(),
171
- type: "event",
172
- subtype: "post",
173
- occurredAt,
174
- actor: "person-self",
175
- content: { title: row.keyword || row.query || "(empty query)" },
176
- ingestedAt: now,
177
- source,
178
- extra: { kind: "search", keyword: row.keyword || row.query || null },
179
- },
180
- ],
181
- persons: [],
182
- places: [],
183
- items: [],
184
- topics: [],
185
- };
323
+ if (kind === KIND_SEARCH) {
324
+ return normalizeSearch(p, raw, ingestedAt);
186
325
  }
187
- // read browse event
188
- return {
189
- events: [
190
- {
191
- id: newId(),
192
- type: "event",
193
- subtype: "browse",
194
- occurredAt,
195
- actor: "person-self",
196
- content: { title: row.title || row.article_title || "(no title)" },
197
- ingestedAt: now,
198
- source,
199
- extra: {
200
- itemId: row.item_id || null,
201
- category: row.category || null,
202
- author: row.author || null,
203
- readDuration: row.read_duration || row.duration || null,
204
- },
205
- },
206
- ],
207
- persons: [],
208
- places: [],
209
- items: [],
210
- topics: [],
211
- };
326
+ if (kind === KIND_READ) {
327
+ return normalizeRead(p, raw, ingestedAt);
328
+ }
329
+ throw new Error(`ToutiaoAdapter.normalize: unknown kind ${kind}`);
212
330
  }
213
331
  }
214
332
 
215
- function trySelect(db, sql) {
216
- try {
217
- return db.prepare(sql).all();
218
- } catch (_e) {
219
- return null;
333
+ function buildSource(raw, occurredAt, capturedBy) {
334
+ return {
335
+ adapter: NAME,
336
+ adapterVersion: VERSION,
337
+ originalId: raw.originalId,
338
+ capturedAt: raw.capturedAt || occurredAt,
339
+ capturedBy,
340
+ };
341
+ }
342
+
343
+ function pickField(p, snapshotKey, ...sqlKeys) {
344
+ // Snapshot mode payload carries fields directly; sqlite-mode payload has a
345
+ // `row` sub-object. Try snapshot key first, then each sqlite-row key.
346
+ if (p[snapshotKey] != null) return p[snapshotKey];
347
+ const row = p.row || {};
348
+ for (const k of sqlKeys) {
349
+ if (row[k] != null) return row[k];
220
350
  }
351
+ return null;
221
352
  }
222
353
 
223
- function parseTime(v) {
224
- if (Number.isFinite(v)) return v > 1e12 ? v : v * 1000;
225
- if (typeof v === "string") {
226
- if (/^\d+$/.test(v)) {
227
- const n = parseInt(v, 10);
228
- return n > 1e12 ? n : n * 1000;
354
+ function normalizeRead(p, raw, ingestedAt) {
355
+ if (!p.row && !p.itemId && !p.title) {
356
+ // payload is sqlite-only legacy shape with missing row → preserve old
357
+ // behaviour for the v0.1 scaffold tests that pass `{ payload: {} }`.
358
+ if (!p.row) {
359
+ throw new Error("ToutiaoAdapter.normalize: row missing");
229
360
  }
230
- const t = Date.parse(v);
231
- return Number.isFinite(t) ? t : null;
232
361
  }
233
- return null;
362
+ const isSnapshot = !p.row;
363
+ const row = p.row || {};
364
+ const title = pickField(p, "title", "title", "article_title") || "(no title)";
365
+ const occurredAt =
366
+ parseTime(p.capturedAt) ||
367
+ parseTime(row.read_time || row.time || row.create_time) ||
368
+ raw.capturedAt ||
369
+ ingestedAt;
370
+ const source = buildSource(
371
+ raw,
372
+ occurredAt,
373
+ isSnapshot ? CAPTURED_BY.API : CAPTURED_BY.SQLITE,
374
+ );
375
+ return {
376
+ events: [
377
+ {
378
+ id: newId(),
379
+ type: ENTITY_TYPES.EVENT,
380
+ subtype: EVENT_SUBTYPES.BROWSE,
381
+ occurredAt,
382
+ actor: "person-self",
383
+ content: { title },
384
+ ingestedAt,
385
+ source,
386
+ extra: {
387
+ platform: "toutiao",
388
+ itemId: pickField(p, "itemId", "item_id"),
389
+ category: pickField(p, "category", "category"),
390
+ author: pickField(p, "author", "author"),
391
+ readDuration: pickField(p, "readDuration", "read_duration", "duration"),
392
+ source: pickField(p, "source", "source"),
393
+ },
394
+ },
395
+ ],
396
+ persons: [],
397
+ places: [],
398
+ items: [],
399
+ topics: [],
400
+ };
401
+ }
402
+
403
+ function normalizeCollection(p, raw, ingestedAt) {
404
+ if (!p.row && !p.itemId && !p.title) {
405
+ if (!p.row) {
406
+ throw new Error("ToutiaoAdapter.normalize: row missing");
407
+ }
408
+ }
409
+ const isSnapshot = !p.row;
410
+ const row = p.row || {};
411
+ const title =
412
+ pickField(p, "title", "title", "article_title") || "(no title)";
413
+ const occurredAt =
414
+ parseTime(p.capturedAt) ||
415
+ parseTime(row.save_time || row.time) ||
416
+ raw.capturedAt ||
417
+ ingestedAt;
418
+ const source = buildSource(
419
+ raw,
420
+ occurredAt,
421
+ isSnapshot ? CAPTURED_BY.API : CAPTURED_BY.SQLITE,
422
+ );
423
+ return {
424
+ events: [
425
+ {
426
+ id: newId(),
427
+ type: ENTITY_TYPES.EVENT,
428
+ subtype: EVENT_SUBTYPES.LIKE,
429
+ occurredAt,
430
+ actor: "person-self",
431
+ content: { title },
432
+ ingestedAt,
433
+ source,
434
+ extra: {
435
+ platform: "toutiao",
436
+ itemId: pickField(p, "itemId", "item_id"),
437
+ category: pickField(p, "category", "category"),
438
+ author: pickField(p, "author", "author"),
439
+ source: pickField(p, "source", "source"),
440
+ },
441
+ },
442
+ ],
443
+ persons: [],
444
+ places: [],
445
+ items: [],
446
+ topics: [],
447
+ };
448
+ }
449
+
450
+ function normalizeSearch(p, raw, ingestedAt) {
451
+ if (!p.row && !p.keyword && !p.query) {
452
+ if (!p.row) {
453
+ throw new Error("ToutiaoAdapter.normalize: row missing");
454
+ }
455
+ }
456
+ const isSnapshot = !p.row;
457
+ const row = p.row || {};
458
+ const keyword =
459
+ pickField(p, "keyword", "keyword", "query") ||
460
+ pickField(p, "query") ||
461
+ "(empty query)";
462
+ const occurredAt =
463
+ parseTime(p.capturedAt) ||
464
+ parseTime(p.searchAt) ||
465
+ parseTime(row.search_time || row.time) ||
466
+ raw.capturedAt ||
467
+ ingestedAt;
468
+ const source = buildSource(
469
+ raw,
470
+ occurredAt,
471
+ isSnapshot ? CAPTURED_BY.API : CAPTURED_BY.SQLITE,
472
+ );
473
+ return {
474
+ events: [
475
+ {
476
+ id: newId(),
477
+ type: ENTITY_TYPES.EVENT,
478
+ // Keep v0.1 scaffold mapping (search → "post") to avoid downstream
479
+ // re-classification — Toutiao searches are user-authored queries.
480
+ subtype: EVENT_SUBTYPES.POST,
481
+ occurredAt,
482
+ actor: "person-self",
483
+ content: { title: keyword },
484
+ ingestedAt,
485
+ source,
486
+ extra: {
487
+ platform: "toutiao",
488
+ kind: "search",
489
+ keyword,
490
+ },
491
+ },
492
+ ],
493
+ persons: [],
494
+ places: [],
495
+ items: [],
496
+ topics: [],
497
+ };
234
498
  }
235
499
 
236
- module.exports = { ToutiaoAdapter, NAME, VERSION };
500
+ module.exports = {
501
+ ToutiaoAdapter,
502
+ NAME,
503
+ VERSION,
504
+ SNAPSHOT_SCHEMA_VERSION,
505
+ VALID_SNAPSHOT_KINDS,
506
+ };