@chainlesschain/personal-data-hub 0.2.2 → 0.2.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/__tests__/adapters/social-toutiao-kuaishou-scaffold.test.js +58 -16
- package/__tests__/analysis.test.js +1 -1
- package/__tests__/longtail-adapters.test.js +67 -16
- package/__tests__/messaging-qq-snapshot.test.js +294 -0
- package/__tests__/shopping-pinduoduo-snapshot.test.js +302 -0
- package/__tests__/shopping-snapshot.test.js +438 -0
- package/__tests__/social-adapters.test.js +28 -3
- package/__tests__/social-douyin-snapshot.test.js +253 -0
- package/__tests__/social-kuaishou-snapshot.test.js +309 -0
- package/__tests__/social-toutiao-snapshot.test.js +314 -0
- package/__tests__/social-weibo-snapshot.test.js +234 -0
- package/__tests__/social-xiaohongshu-snapshot.test.js +232 -0
- package/__tests__/travel-maps-snapshot.test.js +426 -0
- package/__tests__/vault-driver-error.test.js +74 -0
- package/lib/adapters/messaging-qq/index.js +498 -92
- package/lib/adapters/shopping-jd/index.js +228 -25
- package/lib/adapters/shopping-meituan/index.js +222 -26
- package/lib/adapters/shopping-pinduoduo/index.js +275 -0
- package/lib/adapters/social-douyin/index.js +454 -63
- package/lib/adapters/social-kuaishou/index.js +379 -127
- package/lib/adapters/social-toutiao/index.js +400 -130
- package/lib/adapters/social-weibo/index.js +393 -95
- package/lib/adapters/social-xiaohongshu/index.js +389 -49
- package/lib/adapters/travel-baidu-map/index.js +286 -26
- package/lib/adapters/travel-tencent-map/index.js +414 -0
- package/lib/index.js +5 -1
- package/lib/vault.js +60 -8
- package/package.json +2 -1
|
@@ -1,50 +1,117 @@
|
|
|
1
1
|
/**
|
|
2
|
-
*
|
|
2
|
+
* §A8 v0.2 — Toutiao (今日头条) adapter, dual-mode (snapshot + sqlite).
|
|
3
3
|
*
|
|
4
|
-
*
|
|
5
|
-
* newer versions, plaintext in older 7.x builds). Schema is reverse-engineered
|
|
6
|
-
* from the open-source sjqz parsers project and is pinned at scaffold quality
|
|
7
|
-
* only — Phase 13.10 will fixture-pin real field names after Xiaomi 24115RA8EC
|
|
8
|
-
* real-device E2E.
|
|
4
|
+
* Mirror of social-weibo / social-bilibili two-mode pattern:
|
|
9
5
|
*
|
|
10
|
-
*
|
|
11
|
-
*
|
|
12
|
-
*
|
|
13
|
-
*
|
|
6
|
+
* 1. snapshot mode (opts.inputPath): in-APK Android cc reads a snapshot
|
|
7
|
+
* JSON produced by the phone's ToutiaoLocalCollector (root-required
|
|
8
|
+
* SQLCipher decrypt of /data/data/com.ss.android.article.news/
|
|
9
|
+
* databases/ on Phase 13.10 real-device E2E; until then the snapshot
|
|
10
|
+
* can be produced by the desktop AndroidExtractor pulling a plaintext
|
|
11
|
+
* 7.x DB and running the same row → snapshot transform offline).
|
|
12
|
+
* Desktop-independent path. account.uid OPTIONAL at construction —
|
|
13
|
+
* payload carries it.
|
|
14
14
|
*
|
|
15
|
-
*
|
|
16
|
-
*
|
|
15
|
+
* 2. sqlite mode (opts.dbPath, legacy): Phase 13.8 device-pull path —
|
|
16
|
+
* desktop reads the pulled DB directly. Preserved for backward compat;
|
|
17
|
+
* account.uid REQUIRED in this mode (checked lazily at sync time, not
|
|
18
|
+
* at construction, so snapshot-only callers can omit it).
|
|
17
19
|
*
|
|
18
|
-
*
|
|
19
|
-
*
|
|
20
|
-
*
|
|
20
|
+
* Snapshot schema (mirrors ToutiaoLocalCollector.SNAPSHOT_SCHEMA_VERSION):
|
|
21
|
+
*
|
|
22
|
+
* {
|
|
23
|
+
* "schemaVersion": 1,
|
|
24
|
+
* "snapshottedAt": <epoch-ms>,
|
|
25
|
+
* "account": { "uid": "12345", "displayName": "alice" },
|
|
26
|
+
* "events": [
|
|
27
|
+
* { "kind": "read", "id": "read-<itemId>", "capturedAt": <ms>,
|
|
28
|
+
* "itemId": "...", "title": "...", "category": "...", "author": "...",
|
|
29
|
+
* "readDuration": N, "source": "..." },
|
|
30
|
+
* { "kind": "collection", "id": "collect-<itemId>", "capturedAt": <ms>,
|
|
31
|
+
* "itemId": "...", "title": "...", "category": "...", "author": "..." },
|
|
32
|
+
* { "kind": "search", "id": "search-<kw>:<ts>", "capturedAt": <ms>,
|
|
33
|
+
* "keyword": "...", "searchAt": <ms> }
|
|
34
|
+
* ]
|
|
35
|
+
* }
|
|
36
|
+
*
|
|
37
|
+
* Sensitivity: bumped to "high" vs Bilibili — Toutiao reading patterns can
|
|
38
|
+
* reveal political / medical / financial topic interest.
|
|
21
39
|
*/
|
|
22
40
|
|
|
23
41
|
"use strict";
|
|
24
42
|
|
|
25
43
|
const fs = require("node:fs");
|
|
26
44
|
const { newId } = require("../../ids");
|
|
45
|
+
const {
|
|
46
|
+
ENTITY_TYPES,
|
|
47
|
+
EVENT_SUBTYPES,
|
|
48
|
+
CAPTURED_BY,
|
|
49
|
+
} = require("../../constants");
|
|
27
50
|
|
|
28
51
|
const NAME = "social-toutiao";
|
|
29
|
-
const VERSION = "0.
|
|
52
|
+
const VERSION = "0.2.0";
|
|
53
|
+
const SNAPSHOT_SCHEMA_VERSION = 1;
|
|
54
|
+
|
|
55
|
+
const KIND_READ = "read";
|
|
56
|
+
const KIND_COLLECTION = "collection";
|
|
57
|
+
const KIND_SEARCH = "search";
|
|
58
|
+
const VALID_SNAPSHOT_KINDS = Object.freeze([
|
|
59
|
+
KIND_READ,
|
|
60
|
+
KIND_COLLECTION,
|
|
61
|
+
KIND_SEARCH,
|
|
62
|
+
]);
|
|
63
|
+
|
|
64
|
+
function stableOriginalId(kind, id) {
|
|
65
|
+
const stringified =
|
|
66
|
+
(typeof id === "string" && id.length > 0 && id) ||
|
|
67
|
+
(typeof id === "number" && Number.isFinite(id) && String(id)) ||
|
|
68
|
+
null;
|
|
69
|
+
const safe =
|
|
70
|
+
stringified ||
|
|
71
|
+
`unknown-${Date.now()}-${Math.random().toString(36).slice(2, 8)}`;
|
|
72
|
+
return `toutiao:${kind}:${safe}`;
|
|
73
|
+
}
|
|
74
|
+
|
|
75
|
+
function parseTime(v) {
|
|
76
|
+
if (Number.isFinite(v)) return v > 1e12 ? v : v * 1000;
|
|
77
|
+
if (typeof v === "string") {
|
|
78
|
+
if (/^\d+$/.test(v)) {
|
|
79
|
+
const n = parseInt(v, 10);
|
|
80
|
+
return n > 1e12 ? n : n * 1000;
|
|
81
|
+
}
|
|
82
|
+
const t = Date.parse(v);
|
|
83
|
+
return Number.isFinite(t) ? t : null;
|
|
84
|
+
}
|
|
85
|
+
return null;
|
|
86
|
+
}
|
|
87
|
+
|
|
88
|
+
function trySelect(db, sql) {
|
|
89
|
+
try {
|
|
90
|
+
return db.prepare(sql).all();
|
|
91
|
+
} catch (_e) {
|
|
92
|
+
return null;
|
|
93
|
+
}
|
|
94
|
+
}
|
|
30
95
|
|
|
31
96
|
class ToutiaoAdapter {
|
|
32
97
|
constructor(opts = {}) {
|
|
33
|
-
|
|
34
|
-
|
|
35
|
-
|
|
36
|
-
this.account = opts.account;
|
|
98
|
+
// §A8 v0.2: account.uid now OPTIONAL at construction — snapshot mode is
|
|
99
|
+
// stateless and pulls account from the snapshot file. Sqlite mode (legacy
|
|
100
|
+
// device-pull) still requires it; checked at sync time, not construction.
|
|
101
|
+
this.account = opts.account || null;
|
|
37
102
|
this._dbPath = opts.dbPath || null;
|
|
38
|
-
this._dbDriverFactory = opts.dbDriverFactory || null;
|
|
39
103
|
|
|
40
104
|
this.name = NAME;
|
|
41
105
|
this.version = VERSION;
|
|
42
106
|
this.capabilities = [
|
|
107
|
+
"sync:snapshot",
|
|
43
108
|
"sync:sqlite",
|
|
44
109
|
"parse:toutiao-read-history",
|
|
45
110
|
"parse:toutiao-collection",
|
|
46
111
|
"parse:toutiao-search",
|
|
47
112
|
];
|
|
113
|
+
// Existing desktop wiring may key off this — kept as device-pull (the
|
|
114
|
+
// sqlite mode is the desktop-side; snapshot mode is in-APK Android).
|
|
48
115
|
this.extractMode = "device-pull";
|
|
49
116
|
this.rateLimits = {};
|
|
50
117
|
this.dataDisclosure = {
|
|
@@ -53,14 +120,53 @@ class ToutiaoAdapter {
|
|
|
53
120
|
"toutiao:collection_article (item_id / title / save_time)",
|
|
54
121
|
"toutiao:search_history (keyword / search_time)",
|
|
55
122
|
],
|
|
56
|
-
//
|
|
123
|
+
// News reading reveals political / medical / financial topic interest.
|
|
57
124
|
sensitivity: "high",
|
|
58
125
|
legalGate: false,
|
|
126
|
+
defaultInclude: {
|
|
127
|
+
read: true,
|
|
128
|
+
collection: true,
|
|
129
|
+
search: true,
|
|
130
|
+
},
|
|
131
|
+
};
|
|
132
|
+
|
|
133
|
+
// _deps injection seam for tests (vi.mock fs/ doesn't intercept require in
|
|
134
|
+
// inlined CJS — see .claude/rules/testing.md).
|
|
135
|
+
this._deps = {
|
|
136
|
+
fs,
|
|
137
|
+
dbDriverFactory: opts.dbDriverFactory || null,
|
|
59
138
|
};
|
|
60
139
|
}
|
|
61
140
|
|
|
62
|
-
async authenticate() {
|
|
63
|
-
|
|
141
|
+
async authenticate(ctx = {}) {
|
|
142
|
+
if (ctx && typeof ctx.inputPath === "string" && ctx.inputPath.length > 0) {
|
|
143
|
+
try {
|
|
144
|
+
this._deps.fs.accessSync(ctx.inputPath, this._deps.fs.constants.R_OK);
|
|
145
|
+
} catch (err) {
|
|
146
|
+
return {
|
|
147
|
+
ok: false,
|
|
148
|
+
reason: "INPUT_PATH_UNREADABLE",
|
|
149
|
+
message: `snapshot not readable at ${ctx.inputPath}: ${err.message}`,
|
|
150
|
+
};
|
|
151
|
+
}
|
|
152
|
+
return { ok: true, mode: "snapshot-file" };
|
|
153
|
+
}
|
|
154
|
+
if (this._dbPath || (ctx && typeof ctx.dbPath === "string")) {
|
|
155
|
+
if (!this.account || !this.account.uid) {
|
|
156
|
+
return {
|
|
157
|
+
ok: false,
|
|
158
|
+
reason: "NO_ACCOUNT_UID",
|
|
159
|
+
message: "social-toutiao.authenticate: sqlite mode requires account.uid",
|
|
160
|
+
};
|
|
161
|
+
}
|
|
162
|
+
return { ok: true, account: this.account.uid, mode: "sqlite" };
|
|
163
|
+
}
|
|
164
|
+
return {
|
|
165
|
+
ok: false,
|
|
166
|
+
reason: "NO_INPUT",
|
|
167
|
+
message:
|
|
168
|
+
"social-toutiao.authenticate: needs opts.inputPath (snapshot mode) OR opts.dbPath (sqlite mode)",
|
|
169
|
+
};
|
|
64
170
|
}
|
|
65
171
|
|
|
66
172
|
async healthCheck() {
|
|
@@ -68,22 +174,102 @@ class ToutiaoAdapter {
|
|
|
68
174
|
}
|
|
69
175
|
|
|
70
176
|
async *sync(opts = {}) {
|
|
177
|
+
if (typeof opts.inputPath === "string" && opts.inputPath.length > 0) {
|
|
178
|
+
yield* this._syncViaSnapshot(opts);
|
|
179
|
+
return;
|
|
180
|
+
}
|
|
71
181
|
const dbPath = opts.dbPath || this._dbPath;
|
|
72
|
-
if (
|
|
73
|
-
|
|
74
|
-
|
|
182
|
+
if (dbPath) {
|
|
183
|
+
yield* this._syncViaSqlite({ ...opts, dbPath });
|
|
184
|
+
return;
|
|
185
|
+
}
|
|
186
|
+
throw new Error(
|
|
187
|
+
"social-toutiao.sync: needs opts.inputPath (snapshot mode, Android in-APK cc) OR opts.dbPath (sqlite mode, legacy device-pull)",
|
|
188
|
+
);
|
|
189
|
+
}
|
|
190
|
+
|
|
191
|
+
async *_syncViaSnapshot(opts) {
|
|
192
|
+
const raw = this._deps.fs.readFileSync(opts.inputPath, "utf-8");
|
|
193
|
+
const snapshot = JSON.parse(raw);
|
|
194
|
+
if (
|
|
195
|
+
!snapshot ||
|
|
196
|
+
typeof snapshot !== "object" ||
|
|
197
|
+
snapshot.schemaVersion !== SNAPSHOT_SCHEMA_VERSION
|
|
198
|
+
) {
|
|
199
|
+
throw new Error(
|
|
200
|
+
`social-toutiao.sync: snapshot schemaVersion mismatch (got ${snapshot && snapshot.schemaVersion}, expected ${SNAPSHOT_SCHEMA_VERSION})`,
|
|
201
|
+
);
|
|
202
|
+
}
|
|
203
|
+
const fallbackCapturedAt =
|
|
204
|
+
Number.isFinite(snapshot.snapshottedAt) && snapshot.snapshottedAt > 0
|
|
205
|
+
? Math.floor(snapshot.snapshottedAt)
|
|
206
|
+
: Date.now();
|
|
207
|
+
|
|
208
|
+
const account =
|
|
209
|
+
snapshot.account && typeof snapshot.account === "object"
|
|
210
|
+
? snapshot.account
|
|
211
|
+
: null;
|
|
212
|
+
const include = opts.include || {};
|
|
213
|
+
const limit =
|
|
214
|
+
Number.isInteger(opts.limit) && opts.limit > 0 ? opts.limit : Infinity;
|
|
215
|
+
|
|
216
|
+
const events = Array.isArray(snapshot.events) ? snapshot.events : [];
|
|
217
|
+
let emitted = 0;
|
|
218
|
+
for (const ev of events) {
|
|
219
|
+
if (emitted >= limit) return;
|
|
220
|
+
if (!ev || typeof ev !== "object") continue;
|
|
221
|
+
const kind = ev.kind;
|
|
222
|
+
if (!VALID_SNAPSHOT_KINDS.includes(kind)) continue;
|
|
223
|
+
if (include[kind] === false) continue;
|
|
224
|
+
|
|
225
|
+
const capturedAt =
|
|
226
|
+
parseTime(ev.capturedAt) ||
|
|
227
|
+
parseTime(ev.time) ||
|
|
228
|
+
fallbackCapturedAt;
|
|
229
|
+
const id =
|
|
230
|
+
(typeof ev.id === "string" && ev.id.length > 0 && ev.id) ||
|
|
231
|
+
ev.itemId ||
|
|
232
|
+
ev.keyword ||
|
|
233
|
+
null;
|
|
234
|
+
|
|
235
|
+
yield {
|
|
236
|
+
adapter: NAME,
|
|
237
|
+
kind,
|
|
238
|
+
originalId: stableOriginalId(kind, id),
|
|
239
|
+
capturedAt,
|
|
240
|
+
payload: { ...ev, account },
|
|
241
|
+
};
|
|
242
|
+
emitted += 1;
|
|
243
|
+
}
|
|
244
|
+
}
|
|
245
|
+
|
|
246
|
+
async *_syncViaSqlite(opts) {
|
|
247
|
+
// Legacy Phase 13.8 path — requires account.uid in constructor and a DB
|
|
248
|
+
// pulled via the desktop AndroidExtractor.
|
|
249
|
+
if (!this.account || !this.account.uid) {
|
|
250
|
+
throw new Error(
|
|
251
|
+
"social-toutiao._syncViaSqlite: account.uid required (set via new ToutiaoAdapter({ account: { uid } }) in cli wiring)",
|
|
252
|
+
);
|
|
253
|
+
}
|
|
254
|
+
const dbPath = opts.dbPath;
|
|
255
|
+
if (!dbPath || !this._deps.fs.existsSync(dbPath)) return;
|
|
256
|
+
const Driver = this._deps.dbDriverFactory
|
|
257
|
+
? this._deps.dbDriverFactory()
|
|
75
258
|
: require("better-sqlite3-multiple-ciphers");
|
|
76
259
|
const db = new Driver(dbPath, { readonly: true });
|
|
77
260
|
|
|
78
261
|
try {
|
|
79
262
|
const reads =
|
|
80
|
-
trySelect(
|
|
263
|
+
trySelect(
|
|
264
|
+
db,
|
|
265
|
+
"SELECT * FROM read_history ORDER BY read_time DESC LIMIT 5000",
|
|
266
|
+
) || [];
|
|
81
267
|
for (const row of reads) {
|
|
82
268
|
yield {
|
|
83
269
|
adapter: NAME,
|
|
84
270
|
originalId: `read-${row.id || row._id || row.item_id}`,
|
|
85
271
|
capturedAt: parseTime(row.read_time || row.time || row.create_time),
|
|
86
|
-
payload: { row, kind:
|
|
272
|
+
payload: { row, kind: KIND_READ },
|
|
87
273
|
};
|
|
88
274
|
}
|
|
89
275
|
|
|
@@ -97,7 +283,7 @@ class ToutiaoAdapter {
|
|
|
97
283
|
adapter: NAME,
|
|
98
284
|
originalId: `collect-${row.id || row.item_id}`,
|
|
99
285
|
capturedAt: parseTime(row.save_time || row.time),
|
|
100
|
-
payload: { row, kind:
|
|
286
|
+
payload: { row, kind: KIND_COLLECTION },
|
|
101
287
|
};
|
|
102
288
|
}
|
|
103
289
|
|
|
@@ -111,126 +297,210 @@ class ToutiaoAdapter {
|
|
|
111
297
|
adapter: NAME,
|
|
112
298
|
originalId: `search-${row.id || row.keyword + ":" + row.search_time}`,
|
|
113
299
|
capturedAt: parseTime(row.search_time || row.time),
|
|
114
|
-
payload: { row, kind:
|
|
300
|
+
payload: { row, kind: KIND_SEARCH },
|
|
115
301
|
};
|
|
116
302
|
}
|
|
117
303
|
} finally {
|
|
118
304
|
try {
|
|
119
305
|
db.close();
|
|
120
|
-
} catch (_e) {
|
|
306
|
+
} catch (_e) {
|
|
307
|
+
/* ignore */
|
|
308
|
+
}
|
|
121
309
|
}
|
|
122
310
|
}
|
|
123
311
|
|
|
124
312
|
normalize(raw) {
|
|
125
|
-
if (!raw || !raw.payload
|
|
126
|
-
throw new Error("ToutiaoAdapter.normalize:
|
|
313
|
+
if (!raw || !raw.payload) {
|
|
314
|
+
throw new Error("ToutiaoAdapter.normalize: payload missing");
|
|
127
315
|
}
|
|
128
|
-
const
|
|
129
|
-
const
|
|
130
|
-
const
|
|
131
|
-
parseTime(row.read_time || row.save_time || row.search_time || row.time) || now;
|
|
132
|
-
const source = {
|
|
133
|
-
adapter: NAME,
|
|
134
|
-
adapterVersion: VERSION,
|
|
135
|
-
originalId: raw.originalId,
|
|
136
|
-
capturedAt: occurredAt,
|
|
137
|
-
capturedBy: "sqlite",
|
|
138
|
-
};
|
|
316
|
+
const ingestedAt = Date.now();
|
|
317
|
+
const kind = raw.kind || raw.payload.kind;
|
|
318
|
+
const p = raw.payload;
|
|
139
319
|
|
|
140
|
-
if (kind ===
|
|
141
|
-
return
|
|
142
|
-
events: [
|
|
143
|
-
{
|
|
144
|
-
id: newId(),
|
|
145
|
-
type: "event",
|
|
146
|
-
subtype: "like",
|
|
147
|
-
occurredAt,
|
|
148
|
-
actor: "person-self",
|
|
149
|
-
content: { title: row.title || row.article_title || "(no title)" },
|
|
150
|
-
ingestedAt: now,
|
|
151
|
-
source,
|
|
152
|
-
extra: {
|
|
153
|
-
itemId: row.item_id || null,
|
|
154
|
-
category: row.category || null,
|
|
155
|
-
author: row.author || null,
|
|
156
|
-
source: row.source || null,
|
|
157
|
-
},
|
|
158
|
-
},
|
|
159
|
-
],
|
|
160
|
-
persons: [],
|
|
161
|
-
places: [],
|
|
162
|
-
items: [],
|
|
163
|
-
topics: [],
|
|
164
|
-
};
|
|
320
|
+
if (kind === KIND_COLLECTION) {
|
|
321
|
+
return normalizeCollection(p, raw, ingestedAt);
|
|
165
322
|
}
|
|
166
|
-
if (kind ===
|
|
167
|
-
return
|
|
168
|
-
events: [
|
|
169
|
-
{
|
|
170
|
-
id: newId(),
|
|
171
|
-
type: "event",
|
|
172
|
-
subtype: "post",
|
|
173
|
-
occurredAt,
|
|
174
|
-
actor: "person-self",
|
|
175
|
-
content: { title: row.keyword || row.query || "(empty query)" },
|
|
176
|
-
ingestedAt: now,
|
|
177
|
-
source,
|
|
178
|
-
extra: { kind: "search", keyword: row.keyword || row.query || null },
|
|
179
|
-
},
|
|
180
|
-
],
|
|
181
|
-
persons: [],
|
|
182
|
-
places: [],
|
|
183
|
-
items: [],
|
|
184
|
-
topics: [],
|
|
185
|
-
};
|
|
323
|
+
if (kind === KIND_SEARCH) {
|
|
324
|
+
return normalizeSearch(p, raw, ingestedAt);
|
|
186
325
|
}
|
|
187
|
-
|
|
188
|
-
|
|
189
|
-
|
|
190
|
-
|
|
191
|
-
id: newId(),
|
|
192
|
-
type: "event",
|
|
193
|
-
subtype: "browse",
|
|
194
|
-
occurredAt,
|
|
195
|
-
actor: "person-self",
|
|
196
|
-
content: { title: row.title || row.article_title || "(no title)" },
|
|
197
|
-
ingestedAt: now,
|
|
198
|
-
source,
|
|
199
|
-
extra: {
|
|
200
|
-
itemId: row.item_id || null,
|
|
201
|
-
category: row.category || null,
|
|
202
|
-
author: row.author || null,
|
|
203
|
-
readDuration: row.read_duration || row.duration || null,
|
|
204
|
-
},
|
|
205
|
-
},
|
|
206
|
-
],
|
|
207
|
-
persons: [],
|
|
208
|
-
places: [],
|
|
209
|
-
items: [],
|
|
210
|
-
topics: [],
|
|
211
|
-
};
|
|
326
|
+
if (kind === KIND_READ) {
|
|
327
|
+
return normalizeRead(p, raw, ingestedAt);
|
|
328
|
+
}
|
|
329
|
+
throw new Error(`ToutiaoAdapter.normalize: unknown kind ${kind}`);
|
|
212
330
|
}
|
|
213
331
|
}
|
|
214
332
|
|
|
215
|
-
function
|
|
216
|
-
|
|
217
|
-
|
|
218
|
-
|
|
219
|
-
|
|
333
|
+
function buildSource(raw, occurredAt, capturedBy) {
|
|
334
|
+
return {
|
|
335
|
+
adapter: NAME,
|
|
336
|
+
adapterVersion: VERSION,
|
|
337
|
+
originalId: raw.originalId,
|
|
338
|
+
capturedAt: raw.capturedAt || occurredAt,
|
|
339
|
+
capturedBy,
|
|
340
|
+
};
|
|
341
|
+
}
|
|
342
|
+
|
|
343
|
+
function pickField(p, snapshotKey, ...sqlKeys) {
|
|
344
|
+
// Snapshot mode payload carries fields directly; sqlite-mode payload has a
|
|
345
|
+
// `row` sub-object. Try snapshot key first, then each sqlite-row key.
|
|
346
|
+
if (p[snapshotKey] != null) return p[snapshotKey];
|
|
347
|
+
const row = p.row || {};
|
|
348
|
+
for (const k of sqlKeys) {
|
|
349
|
+
if (row[k] != null) return row[k];
|
|
220
350
|
}
|
|
351
|
+
return null;
|
|
221
352
|
}
|
|
222
353
|
|
|
223
|
-
function
|
|
224
|
-
if (
|
|
225
|
-
|
|
226
|
-
|
|
227
|
-
|
|
228
|
-
|
|
354
|
+
function normalizeRead(p, raw, ingestedAt) {
|
|
355
|
+
if (!p.row && !p.itemId && !p.title) {
|
|
356
|
+
// payload is sqlite-only legacy shape with missing row → preserve old
|
|
357
|
+
// behaviour for the v0.1 scaffold tests that pass `{ payload: {} }`.
|
|
358
|
+
if (!p.row) {
|
|
359
|
+
throw new Error("ToutiaoAdapter.normalize: row missing");
|
|
229
360
|
}
|
|
230
|
-
const t = Date.parse(v);
|
|
231
|
-
return Number.isFinite(t) ? t : null;
|
|
232
361
|
}
|
|
233
|
-
|
|
362
|
+
const isSnapshot = !p.row;
|
|
363
|
+
const row = p.row || {};
|
|
364
|
+
const title = pickField(p, "title", "title", "article_title") || "(no title)";
|
|
365
|
+
const occurredAt =
|
|
366
|
+
parseTime(p.capturedAt) ||
|
|
367
|
+
parseTime(row.read_time || row.time || row.create_time) ||
|
|
368
|
+
raw.capturedAt ||
|
|
369
|
+
ingestedAt;
|
|
370
|
+
const source = buildSource(
|
|
371
|
+
raw,
|
|
372
|
+
occurredAt,
|
|
373
|
+
isSnapshot ? CAPTURED_BY.API : CAPTURED_BY.SQLITE,
|
|
374
|
+
);
|
|
375
|
+
return {
|
|
376
|
+
events: [
|
|
377
|
+
{
|
|
378
|
+
id: newId(),
|
|
379
|
+
type: ENTITY_TYPES.EVENT,
|
|
380
|
+
subtype: EVENT_SUBTYPES.BROWSE,
|
|
381
|
+
occurredAt,
|
|
382
|
+
actor: "person-self",
|
|
383
|
+
content: { title },
|
|
384
|
+
ingestedAt,
|
|
385
|
+
source,
|
|
386
|
+
extra: {
|
|
387
|
+
platform: "toutiao",
|
|
388
|
+
itemId: pickField(p, "itemId", "item_id"),
|
|
389
|
+
category: pickField(p, "category", "category"),
|
|
390
|
+
author: pickField(p, "author", "author"),
|
|
391
|
+
readDuration: pickField(p, "readDuration", "read_duration", "duration"),
|
|
392
|
+
source: pickField(p, "source", "source"),
|
|
393
|
+
},
|
|
394
|
+
},
|
|
395
|
+
],
|
|
396
|
+
persons: [],
|
|
397
|
+
places: [],
|
|
398
|
+
items: [],
|
|
399
|
+
topics: [],
|
|
400
|
+
};
|
|
401
|
+
}
|
|
402
|
+
|
|
403
|
+
function normalizeCollection(p, raw, ingestedAt) {
|
|
404
|
+
if (!p.row && !p.itemId && !p.title) {
|
|
405
|
+
if (!p.row) {
|
|
406
|
+
throw new Error("ToutiaoAdapter.normalize: row missing");
|
|
407
|
+
}
|
|
408
|
+
}
|
|
409
|
+
const isSnapshot = !p.row;
|
|
410
|
+
const row = p.row || {};
|
|
411
|
+
const title =
|
|
412
|
+
pickField(p, "title", "title", "article_title") || "(no title)";
|
|
413
|
+
const occurredAt =
|
|
414
|
+
parseTime(p.capturedAt) ||
|
|
415
|
+
parseTime(row.save_time || row.time) ||
|
|
416
|
+
raw.capturedAt ||
|
|
417
|
+
ingestedAt;
|
|
418
|
+
const source = buildSource(
|
|
419
|
+
raw,
|
|
420
|
+
occurredAt,
|
|
421
|
+
isSnapshot ? CAPTURED_BY.API : CAPTURED_BY.SQLITE,
|
|
422
|
+
);
|
|
423
|
+
return {
|
|
424
|
+
events: [
|
|
425
|
+
{
|
|
426
|
+
id: newId(),
|
|
427
|
+
type: ENTITY_TYPES.EVENT,
|
|
428
|
+
subtype: EVENT_SUBTYPES.LIKE,
|
|
429
|
+
occurredAt,
|
|
430
|
+
actor: "person-self",
|
|
431
|
+
content: { title },
|
|
432
|
+
ingestedAt,
|
|
433
|
+
source,
|
|
434
|
+
extra: {
|
|
435
|
+
platform: "toutiao",
|
|
436
|
+
itemId: pickField(p, "itemId", "item_id"),
|
|
437
|
+
category: pickField(p, "category", "category"),
|
|
438
|
+
author: pickField(p, "author", "author"),
|
|
439
|
+
source: pickField(p, "source", "source"),
|
|
440
|
+
},
|
|
441
|
+
},
|
|
442
|
+
],
|
|
443
|
+
persons: [],
|
|
444
|
+
places: [],
|
|
445
|
+
items: [],
|
|
446
|
+
topics: [],
|
|
447
|
+
};
|
|
448
|
+
}
|
|
449
|
+
|
|
450
|
+
function normalizeSearch(p, raw, ingestedAt) {
|
|
451
|
+
if (!p.row && !p.keyword && !p.query) {
|
|
452
|
+
if (!p.row) {
|
|
453
|
+
throw new Error("ToutiaoAdapter.normalize: row missing");
|
|
454
|
+
}
|
|
455
|
+
}
|
|
456
|
+
const isSnapshot = !p.row;
|
|
457
|
+
const row = p.row || {};
|
|
458
|
+
const keyword =
|
|
459
|
+
pickField(p, "keyword", "keyword", "query") ||
|
|
460
|
+
pickField(p, "query") ||
|
|
461
|
+
"(empty query)";
|
|
462
|
+
const occurredAt =
|
|
463
|
+
parseTime(p.capturedAt) ||
|
|
464
|
+
parseTime(p.searchAt) ||
|
|
465
|
+
parseTime(row.search_time || row.time) ||
|
|
466
|
+
raw.capturedAt ||
|
|
467
|
+
ingestedAt;
|
|
468
|
+
const source = buildSource(
|
|
469
|
+
raw,
|
|
470
|
+
occurredAt,
|
|
471
|
+
isSnapshot ? CAPTURED_BY.API : CAPTURED_BY.SQLITE,
|
|
472
|
+
);
|
|
473
|
+
return {
|
|
474
|
+
events: [
|
|
475
|
+
{
|
|
476
|
+
id: newId(),
|
|
477
|
+
type: ENTITY_TYPES.EVENT,
|
|
478
|
+
// Keep v0.1 scaffold mapping (search → "post") to avoid downstream
|
|
479
|
+
// re-classification — Toutiao searches are user-authored queries.
|
|
480
|
+
subtype: EVENT_SUBTYPES.POST,
|
|
481
|
+
occurredAt,
|
|
482
|
+
actor: "person-self",
|
|
483
|
+
content: { title: keyword },
|
|
484
|
+
ingestedAt,
|
|
485
|
+
source,
|
|
486
|
+
extra: {
|
|
487
|
+
platform: "toutiao",
|
|
488
|
+
kind: "search",
|
|
489
|
+
keyword,
|
|
490
|
+
},
|
|
491
|
+
},
|
|
492
|
+
],
|
|
493
|
+
persons: [],
|
|
494
|
+
places: [],
|
|
495
|
+
items: [],
|
|
496
|
+
topics: [],
|
|
497
|
+
};
|
|
234
498
|
}
|
|
235
499
|
|
|
236
|
-
module.exports = {
|
|
500
|
+
module.exports = {
|
|
501
|
+
ToutiaoAdapter,
|
|
502
|
+
NAME,
|
|
503
|
+
VERSION,
|
|
504
|
+
SNAPSHOT_SCHEMA_VERSION,
|
|
505
|
+
VALID_SNAPSHOT_KINDS,
|
|
506
|
+
};
|