@chainlesschain/personal-data-hub 0.4.28 → 0.4.30
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +13 -5
- package/lib/adapters/social-douyin-adb/usage-profile-reader.js +253 -0
- package/lib/adapters/social-douyin-adb/watch-history-reader.js +104 -31
- package/lib/adapters/social-toutiao-adb/article-reader.js +202 -0
- package/lib/analysis-skills/overview.js +24 -4
- package/lib/analysis-skills/spending.js +63 -2
- package/lib/analysis-skills/timeline.js +11 -6
- package/lib/prompt-builder.js +15 -1
- package/lib/query-parser.js +38 -8
- package/package.json +4 -1
- package/__tests__/adapter-guide.test.js +0 -47
- package/__tests__/adapter-spec.test.js +0 -78
- package/__tests__/adapters/ai-chat-cookie-capture-spec.test.js +0 -211
- package/__tests__/adapters/ai-chat-health-checker.test.js +0 -262
- package/__tests__/adapters/ai-chat-history.test.js +0 -396
- package/__tests__/adapters/ai-chat-http-client.test.js +0 -242
- package/__tests__/adapters/ai-chat-vendors.test.js +0 -874
- package/__tests__/adapters/alipay-bill-adapter.test.js +0 -538
- package/__tests__/adapters/apple-health.test.js +0 -95
- package/__tests__/adapters/bank-family.test.js +0 -125
- package/__tests__/adapters/biz-tianyancha.test.js +0 -159
- package/__tests__/adapters/browser-history-chrome.test.js +0 -377
- package/__tests__/adapters/browser-history-edge.test.js +0 -159
- package/__tests__/adapters/car-mercedesme.test.js +0 -74
- package/__tests__/adapters/doc-baidu-netdisk.test.js +0 -102
- package/__tests__/adapters/doc-camscanner.test.js +0 -147
- package/__tests__/adapters/doc-platforms.test.js +0 -177
- package/__tests__/adapters/edu-huawei-learning-live.test.js +0 -198
- package/__tests__/adapters/edu-zuoyebang-live.test.js +0 -226
- package/__tests__/adapters/email-adapter-snapshot.test.js +0 -237
- package/__tests__/adapters/email-adapter.test.js +0 -742
- package/__tests__/adapters/email-classifier.test.js +0 -347
- package/__tests__/adapters/email-imap-session.test.js +0 -334
- package/__tests__/adapters/email-parser.test.js +0 -244
- package/__tests__/adapters/email-pdf-extractor.test.js +0 -529
- package/__tests__/adapters/email-providers.test.js +0 -84
- package/__tests__/adapters/email-retry-progress.test.js +0 -294
- package/__tests__/adapters/email-templates.test.js +0 -822
- package/__tests__/adapters/family-23-collectors-scaffold.test.js +0 -182
- package/__tests__/adapters/finance-alipay-live.test.js +0 -258
- package/__tests__/adapters/finance-dcep.test.js +0 -74
- package/__tests__/adapters/fitness-joyrun.test.js +0 -82
- package/__tests__/adapters/game-genshin-live.test.js +0 -238
- package/__tests__/adapters/game-genshin-scaffold.test.js +0 -108
- package/__tests__/adapters/game-honor-of-kings-live.test.js +0 -230
- package/__tests__/adapters/git-activity.test.js +0 -222
- package/__tests__/adapters/gov-12123.test.js +0 -103
- package/__tests__/adapters/gov-ixiamen.test.js +0 -150
- package/__tests__/adapters/gov-tax.test.js +0 -135
- package/__tests__/adapters/health-meiyou.test.js +0 -125
- package/__tests__/adapters/local-files.test.js +0 -264
- package/__tests__/adapters/local-im-pc.test.js +0 -154
- package/__tests__/adapters/messaging-whatsapp.test.js +0 -289
- package/__tests__/adapters/music-kugou.test.js +0 -187
- package/__tests__/adapters/music-qq.test.js +0 -112
- package/__tests__/adapters/netease-music-live.test.js +0 -244
- package/__tests__/adapters/netease-music.test.js +0 -74
- package/__tests__/adapters/pc-local-discovery.test.js +0 -141
- package/__tests__/adapters/qq-pc-direct-read.test.js +0 -227
- package/__tests__/adapters/reading-family.test.js +0 -108
- package/__tests__/adapters/recruit-boss.test.js +0 -180
- package/__tests__/adapters/shell-history.test.js +0 -180
- package/__tests__/adapters/shopping-base.test.js +0 -179
- package/__tests__/adapters/shopping-dianping.test.js +0 -239
- package/__tests__/adapters/social-bilibili-adb-api-client.test.js +0 -721
- package/__tests__/adapters/social-bilibili-adb-chromium-cookies-reader.test.js +0 -346
- package/__tests__/adapters/social-bilibili-adb-collector.test.js +0 -284
- package/__tests__/adapters/social-bilibili-adb-cookies-extension.test.js +0 -343
- package/__tests__/adapters/social-bilibili-adb-snapshot-builder.test.js +0 -296
- package/__tests__/adapters/social-csdn.test.js +0 -175
- package/__tests__/adapters/social-dongchedi.test.js +0 -165
- package/__tests__/adapters/social-douyin-adb-aweme-detail.test.js +0 -165
- package/__tests__/adapters/social-douyin-adb-collector.test.js +0 -254
- package/__tests__/adapters/social-douyin-adb-db-extension.test.js +0 -114
- package/__tests__/adapters/social-douyin-adb-im-db-parser.test.js +0 -304
- package/__tests__/adapters/social-douyin-adb-snapshot-builder.test.js +0 -216
- package/__tests__/adapters/social-douyin-adb-watch-history.test.js +0 -192
- package/__tests__/adapters/social-kuaishou-adb-api-client.test.js +0 -496
- package/__tests__/adapters/social-kuaishou-adb-collector.test.js +0 -276
- package/__tests__/adapters/social-kuaishou-adb-cookies-extension.test.js +0 -152
- package/__tests__/adapters/social-kuaishou-adb-snapshot-builder.test.js +0 -178
- package/__tests__/adapters/social-toutiao-adb-account-reader.test.js +0 -135
- package/__tests__/adapters/social-toutiao-adb-api-client.test.js +0 -626
- package/__tests__/adapters/social-toutiao-adb-collector.test.js +0 -378
- package/__tests__/adapters/social-toutiao-adb-cookies-extension.test.js +0 -193
- package/__tests__/adapters/social-toutiao-adb-snapshot-builder.test.js +0 -196
- package/__tests__/adapters/social-toutiao-kuaishou-scaffold.test.js +0 -311
- package/__tests__/adapters/social-weibo-adb-api-client.test.js +0 -362
- package/__tests__/adapters/social-weibo-adb-collector.test.js +0 -201
- package/__tests__/adapters/social-weibo-adb-cookies-extension.test.js +0 -167
- package/__tests__/adapters/social-weibo-adb-snapshot-builder.test.js +0 -189
- package/__tests__/adapters/social-xiaohongshu-adb-api-client.test.js +0 -431
- package/__tests__/adapters/social-xiaohongshu-adb-collector.test.js +0 -207
- package/__tests__/adapters/social-xiaohongshu-adb-cookies-extension.test.js +0 -0
- package/__tests__/adapters/social-xiaohongshu-adb-sign-provider-injection.test.js +0 -351
- package/__tests__/adapters/social-xiaohongshu-adb-sign.test.js +0 -130
- package/__tests__/adapters/social-xiaohongshu-adb-snapshot-builder.test.js +0 -200
- package/__tests__/adapters/social-zhihu.test.js +0 -246
- package/__tests__/adapters/system-data-adapter.test.js +0 -443
- package/__tests__/adapters/system-data-android-ingest.test.js +0 -144
- package/__tests__/adapters/system-data-android.test.js +0 -519
- package/__tests__/adapters/system-data-disclosure.test.js +0 -153
- package/__tests__/adapters/travel-12306.test.js +0 -512
- package/__tests__/adapters/travel-amap.test.js +0 -219
- package/__tests__/adapters/travel-baidu-map.test.js +0 -305
- package/__tests__/adapters/travel-base.test.js +0 -205
- package/__tests__/adapters/travel-ctrip.test.js +0 -377
- package/__tests__/adapters/travel-didi-consumer.test.js +0 -66
- package/__tests__/adapters/travel-didi.test.js +0 -204
- package/__tests__/adapters/travel-tencent-map.test.js +0 -207
- package/__tests__/adapters/travel-tongcheng.test.js +0 -289
- package/__tests__/adapters/video-platforms.test.js +0 -152
- package/__tests__/adapters/video-xigua.test.js +0 -106
- package/__tests__/adapters/vscode.test.js +0 -299
- package/__tests__/adapters/wechat-bootstrap.test.js +0 -240
- package/__tests__/adapters/wechat-env-probe.test.js +0 -162
- package/__tests__/adapters/wechat-frida-agent.test.js +0 -322
- package/__tests__/adapters/wechat-frida-integration.test.js +0 -149
- package/__tests__/adapters/wechat-frida-key-provider.test.js +0 -188
- package/__tests__/adapters/wechat-md5-key-provider.test.js +0 -101
- package/__tests__/adapters/wechat-pc-direct-read.test.js +0 -365
- package/__tests__/adapters/wechat-pc-group-topic.test.js +0 -63
- package/__tests__/adapters/wechat-pc-v4-sidecar.test.js +0 -72
- package/__tests__/adapters/weread.test.js +0 -123
- package/__tests__/adapters/wework-pc.test.js +0 -124
- package/__tests__/adapters/win-recent.test.js +0 -192
- package/__tests__/analysis-skills.test.js +0 -679
- package/__tests__/analysis.test.js +0 -1845
- package/__tests__/audio-ximalaya-snapshot.test.js +0 -279
- package/__tests__/batch.test.js +0 -133
- package/__tests__/bridges-cc-kg.test.js +0 -231
- package/__tests__/bridges-cc-llm.test.js +0 -191
- package/__tests__/bridges-cc-rag.test.js +0 -162
- package/__tests__/categories.test.js +0 -92
- package/__tests__/e2e/ai-chat-cross-source-journey.test.js +0 -213
- package/__tests__/e2e/full-user-journey.test.js +0 -188
- package/__tests__/e2e/local-data-adapters-cli.e2e.test.js +0 -146
- package/__tests__/entity-resolver-ingest-hook.test.js +0 -177
- package/__tests__/entity-resolver-stages.test.js +0 -411
- package/__tests__/entity-resolver-vault.test.js +0 -249
- package/__tests__/entity-resolver.test.js +0 -526
- package/__tests__/fitness-keep-snapshot.test.js +0 -224
- package/__tests__/fixtures/entity-resolver-200-mock.json +0 -96
- package/__tests__/ids.test.js +0 -45
- package/__tests__/integration/ai-chat-history-registry.test.js +0 -228
- package/__tests__/integration/aichat-wizard-end-to-end.test.js +0 -282
- package/__tests__/integration/cross-adapter-pipelines.test.js +0 -396
- package/__tests__/integration/local-data-adapters-pipeline.test.js +0 -373
- package/__tests__/integration/social-bilibili-pipeline.test.js +0 -261
- package/__tests__/integration/wechat-bootstrap-end-to-end.test.js +0 -390
- package/__tests__/key-providers.test.js +0 -126
- package/__tests__/kg-derive.test.js +0 -219
- package/__tests__/llm-client.test.js +0 -122
- package/__tests__/longtail-adapters.test.js +0 -281
- package/__tests__/messaging-qq-snapshot.test.js +0 -294
- package/__tests__/mobile-extractor-encrypted.test.js +0 -460
- package/__tests__/mobile-extractor.test.js +0 -288
- package/__tests__/mock-adapter.test.js +0 -93
- package/__tests__/prompt-builder.test.js +0 -249
- package/__tests__/query-parser.test.js +0 -302
- package/__tests__/rag-derive.test.js +0 -169
- package/__tests__/registry-readiness.test.js +0 -292
- package/__tests__/registry.test.js +0 -420
- package/__tests__/salvage-ingest.test.js +0 -97
- package/__tests__/schemas.test.js +0 -331
- package/__tests__/shopping-adapters.test.js +0 -392
- package/__tests__/shopping-eleme-snapshot.test.js +0 -454
- package/__tests__/shopping-pinduoduo-snapshot.test.js +0 -484
- package/__tests__/shopping-snapshot.test.js +0 -438
- package/__tests__/shopping-vipshop-snapshot.test.js +0 -425
- package/__tests__/shopping-xianyu-snapshot.test.js +0 -451
- package/__tests__/sidecar-contacts-cross-validate.test.js +0 -186
- package/__tests__/sidecar-supervisor.test.js +0 -128
- package/__tests__/sign-providers.test.js +0 -62
- package/__tests__/social-adapters.test.js +0 -280
- package/__tests__/social-bilibili-snapshot.test.js +0 -278
- package/__tests__/social-douban-snapshot.test.js +0 -351
- package/__tests__/social-douyin-im-direct-read.test.js +0 -377
- package/__tests__/social-douyin-salvage-collector.test.js +0 -98
- package/__tests__/social-douyin-salvage-mapper.test.js +0 -90
- package/__tests__/social-douyin-snapshot.test.js +0 -256
- package/__tests__/social-kuaishou-snapshot.test.js +0 -362
- package/__tests__/social-toutiao-snapshot.test.js +0 -366
- package/__tests__/social-weibo-snapshot.test.js +0 -234
- package/__tests__/social-weibo-sqlite-device.test.js +0 -174
- package/__tests__/social-xiaohongshu-snapshot.test.js +0 -232
- package/__tests__/sqlite-leaf-salvage.test.js +0 -97
- package/__tests__/travel-adapters.test.js +0 -483
- package/__tests__/travel-maps-snapshot.test.js +0 -426
- package/__tests__/vault-driver-error.test.js +0 -74
- package/__tests__/vault-search-helpers.test.js +0 -104
- package/__tests__/vault-search.test.js +0 -423
- package/__tests__/vault.test.js +0 -767
- package/__tests__/wechat-adapter.test.js +0 -594
- package/__tests__/whatsapp-adapter.test.js +0 -138
- package/scripts/_make-fixture-all.js +0 -126
- package/scripts/_make-fixture-contacts.js +0 -84
- package/scripts/evaluate-entity-resolver.js +0 -213
- package/scripts/run-native-tests-sandbox.sh +0 -55
- package/scripts/smoke-phase-5-5.js +0 -196
- package/scripts/smoke-phase-5-7.js +0 -181
- package/scripts/smoke-system-data-contacts.js +0 -309
- package/scripts/smoke-system-data.js +0 -312
- package/vitest.config.js +0 -88
package/README.md
CHANGED
|
@@ -16,13 +16,21 @@ middleware.
|
|
|
16
16
|
> Ollama / Volcengine / Anthropic / Gemini / DeepSeek), **CcKgSink**, **CcRagSink**
|
|
17
17
|
> — injected at the desktop/CLI entry so this package stays decoupled (Phase 3.5).
|
|
18
18
|
>
|
|
19
|
-
> **
|
|
19
|
+
> **89 adapters are now live** (no longer "later phases"): Email IMAP,
|
|
20
20
|
> Alipay bill, 9 AI-chat vendors, WeChat / QQ / Weibo / Bilibili / Douyin /
|
|
21
|
-
> Xiaohongshu / Toutiao / Kuaishou social, Telegram / WhatsApp messaging,
|
|
22
|
-
> Taobao / JD / Meituan / Pinduoduo
|
|
23
|
-
>
|
|
21
|
+
> Xiaohongshu / Toutiao / Kuaishou / Douban social, Telegram / WhatsApp messaging,
|
|
22
|
+
> Taobao / JD / Meituan / Pinduoduo / Eleme / Xianyu / Vipshop shopping, Amap /
|
|
23
|
+
> Baidu-map / Tencent-map / Ctrip / 12306 / Didi travel, Kugou / Ximalaya audio,
|
|
24
|
+
> Keep / Joyrun fitness, system-data (contacts / calls / sms / location),
|
|
24
25
|
> and the developer-activity set (git / shell / vscode / browser-history /
|
|
25
|
-
> local-files / win-recent).
|
|
26
|
+
> local-files / win-recent). See `lib/adapters/` for the full list.
|
|
27
|
+
>
|
|
28
|
+
> **On-device root forensics (rooted devices):** beyond cookie/sign-based
|
|
29
|
+
> collection, PDH can pull a logged-in app's local encrypted DB directly via
|
|
30
|
+
> **method B** (key-free `/proc/<pid>/mem` memory scan — engine-agnostic,
|
|
31
|
+
> anti-debug-resistant) or **method C** (frida `sqlcipher_export` online decrypt),
|
|
32
|
+
> plus a SQLite leaf-page **salvager** (`--unaligned`) that recovers plaintext
|
|
33
|
+
> pages from corrupt mem dumps. See `docs/internal/pdh-db-decryption-runbook.md`.
|
|
26
34
|
>
|
|
27
35
|
> **New in v0.4.0 (v5.0.3.99):** adapter **readiness** — split out from the
|
|
28
36
|
> loose `healthCheck` sync gate into a real ready/needs_setup/unavailable
|
|
@@ -0,0 +1,253 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Douyin on-device usage-profile reader — recovers the user's app-usage
|
|
3
|
+
* baseline (active hours / session count / total time-on-app) from the local
|
|
4
|
+
* `1128_feature_engineering.db` table `FEInternalUserActivityTable`, a plaintext
|
|
5
|
+
* SQLite table the app keeps for its own client-side feature store.
|
|
6
|
+
*
|
|
7
|
+
* Why this exists (real-device 2026-06-18, user's exported plaintext DB):
|
|
8
|
+
* - `FEInternalUserActivityTable` rows are per-session aggregates:
|
|
9
|
+
* { timestamp(sec), start/end_timestamp_ms, open_app_count,
|
|
10
|
+
* launch_hour_0..23, total_duration(ms) }
|
|
11
|
+
* - 81 rows spanning ~31 days = "how the user uses Douyin": ~175 opens,
|
|
12
|
+
* ~108 hours total, peak 12–17h. This behavioral baseline is exactly what a
|
|
13
|
+
* personal-AI should know, and it's plaintext (no signing/encryption).
|
|
14
|
+
*
|
|
15
|
+
* This module is the testable core (reader + pure summarizer + vault-event
|
|
16
|
+
* builder). The device pull/collector wiring (mirroring watch-history-reader's
|
|
17
|
+
* pullVideoRecordDbViaSu) is a follow-up; the remote db sub-path must be
|
|
18
|
+
* confirmed on a device first.
|
|
19
|
+
*
|
|
20
|
+
* Authorization: only on your own device/account.
|
|
21
|
+
*/
|
|
22
|
+
"use strict";
|
|
23
|
+
|
|
24
|
+
const { newId } = require("../../ids");
|
|
25
|
+
const {
|
|
26
|
+
_internals: { loadDatabaseClass },
|
|
27
|
+
} = require("../social-bilibili-adb/chromium-cookies-reader");
|
|
28
|
+
|
|
29
|
+
const USAGE_TABLE = "FEInternalUserActivityTable";
|
|
30
|
+
const PROFILE_VERSION = "usage-profile-0.1";
|
|
31
|
+
const HOUR_BUCKETS = Object.freeze([
|
|
32
|
+
{ label: "0-5h", from: 0, to: 5 },
|
|
33
|
+
{ label: "6-11h", from: 6, to: 11 },
|
|
34
|
+
{ label: "12-17h", from: 12, to: 17 },
|
|
35
|
+
{ label: "18-23h", from: 18, to: 23 },
|
|
36
|
+
]);
|
|
37
|
+
|
|
38
|
+
/** seconds-or-ms epoch → ms (heuristic: > 1e12 ⇒ already ms). */
|
|
39
|
+
function toEpochMs(v) {
|
|
40
|
+
const n = Number(v);
|
|
41
|
+
if (!Number.isFinite(n) || n <= 0) return null;
|
|
42
|
+
return n > 1e12 ? Math.floor(n) : Math.floor(n * 1000);
|
|
43
|
+
}
|
|
44
|
+
|
|
45
|
+
/**
|
|
46
|
+
* Read per-session usage rows from FEInternalUserActivityTable and aggregate
|
|
47
|
+
* them into a usage profile. Pure once a Database class is injected.
|
|
48
|
+
*
|
|
49
|
+
* @returns {{
|
|
50
|
+
* sessions: number, days: number, from: number|null, to: number|null,
|
|
51
|
+
* totalOpens: number, totalDurationMs: number,
|
|
52
|
+
* hourHistogram: number[], peakHour: number|null,
|
|
53
|
+
* peakBucket: string|null, bucketTotals: Record<string,number>
|
|
54
|
+
* }}
|
|
55
|
+
*/
|
|
56
|
+
function readDouyinUsageProfile(dbPath, opts = {}) {
|
|
57
|
+
const Database = opts._databaseClass || loadDatabaseClass();
|
|
58
|
+
const db = new Database(dbPath, { readonly: true });
|
|
59
|
+
try {
|
|
60
|
+
const exists = db
|
|
61
|
+
.prepare("SELECT name FROM sqlite_master WHERE type='table' AND name=?")
|
|
62
|
+
.get(USAGE_TABLE);
|
|
63
|
+
if (!exists) return emptyProfile();
|
|
64
|
+
|
|
65
|
+
const cols = new Set(
|
|
66
|
+
db.prepare(`PRAGMA table_info("${USAGE_TABLE}")`).all().map((c) => c.name),
|
|
67
|
+
);
|
|
68
|
+
const hourCols = [];
|
|
69
|
+
for (let h = 0; h < 24; h++) {
|
|
70
|
+
hourCols.push(cols.has(`launch_hour_${h}`) ? `launch_hour_${h}` : null);
|
|
71
|
+
}
|
|
72
|
+
const hasOpen = cols.has("open_app_count");
|
|
73
|
+
const hasDur = cols.has("total_duration");
|
|
74
|
+
const hasTs = cols.has("timestamp");
|
|
75
|
+
|
|
76
|
+
const rows = db.prepare(`SELECT * FROM "${USAGE_TABLE}"`).all();
|
|
77
|
+
const histogram = new Array(24).fill(0);
|
|
78
|
+
const days = new Set();
|
|
79
|
+
let totalOpens = 0;
|
|
80
|
+
let totalDurationMs = 0;
|
|
81
|
+
let from = null;
|
|
82
|
+
let to = null;
|
|
83
|
+
|
|
84
|
+
for (const r of rows) {
|
|
85
|
+
if (hasOpen) totalOpens += Number(r.open_app_count) || 0;
|
|
86
|
+
if (hasDur) totalDurationMs += Number(r.total_duration) || 0;
|
|
87
|
+
for (let h = 0; h < 24; h++) {
|
|
88
|
+
if (hourCols[h]) histogram[h] += Number(r[hourCols[h]]) || 0;
|
|
89
|
+
}
|
|
90
|
+
const tsMs = hasTs ? toEpochMs(r.timestamp) : null;
|
|
91
|
+
if (tsMs != null) {
|
|
92
|
+
if (from == null || tsMs < from) from = tsMs;
|
|
93
|
+
if (to == null || tsMs > to) to = tsMs;
|
|
94
|
+
// local-day bucket (UTC day is fine for a coarse "distinct days" count)
|
|
95
|
+
days.add(Math.floor(tsMs / 86_400_000));
|
|
96
|
+
}
|
|
97
|
+
}
|
|
98
|
+
|
|
99
|
+
let peakHour = null;
|
|
100
|
+
let peakVal = -1;
|
|
101
|
+
for (let h = 0; h < 24; h++) {
|
|
102
|
+
if (histogram[h] > peakVal) {
|
|
103
|
+
peakVal = histogram[h];
|
|
104
|
+
peakHour = h;
|
|
105
|
+
}
|
|
106
|
+
}
|
|
107
|
+
if (peakVal <= 0) peakHour = null;
|
|
108
|
+
|
|
109
|
+
const bucketTotals = {};
|
|
110
|
+
let peakBucket = null;
|
|
111
|
+
let peakBucketVal = -1;
|
|
112
|
+
for (const b of HOUR_BUCKETS) {
|
|
113
|
+
let sum = 0;
|
|
114
|
+
for (let h = b.from; h <= b.to; h++) sum += histogram[h];
|
|
115
|
+
bucketTotals[b.label] = sum;
|
|
116
|
+
if (sum > peakBucketVal) {
|
|
117
|
+
peakBucketVal = sum;
|
|
118
|
+
peakBucket = b.label;
|
|
119
|
+
}
|
|
120
|
+
}
|
|
121
|
+
if (peakBucketVal <= 0) peakBucket = null;
|
|
122
|
+
|
|
123
|
+
return {
|
|
124
|
+
sessions: rows.length,
|
|
125
|
+
days: days.size,
|
|
126
|
+
from,
|
|
127
|
+
to,
|
|
128
|
+
totalOpens,
|
|
129
|
+
totalDurationMs,
|
|
130
|
+
hourHistogram: histogram,
|
|
131
|
+
peakHour,
|
|
132
|
+
peakBucket,
|
|
133
|
+
bucketTotals,
|
|
134
|
+
};
|
|
135
|
+
} finally {
|
|
136
|
+
try {
|
|
137
|
+
db.close();
|
|
138
|
+
} catch (_e) {
|
|
139
|
+
/* best-effort */
|
|
140
|
+
}
|
|
141
|
+
}
|
|
142
|
+
}
|
|
143
|
+
|
|
144
|
+
function emptyProfile() {
|
|
145
|
+
const bucketTotals = {};
|
|
146
|
+
for (const b of HOUR_BUCKETS) bucketTotals[b.label] = 0;
|
|
147
|
+
return {
|
|
148
|
+
sessions: 0,
|
|
149
|
+
days: 0,
|
|
150
|
+
from: null,
|
|
151
|
+
to: null,
|
|
152
|
+
totalOpens: 0,
|
|
153
|
+
totalDurationMs: 0,
|
|
154
|
+
hourHistogram: new Array(24).fill(0),
|
|
155
|
+
peakHour: null,
|
|
156
|
+
peakBucket: null,
|
|
157
|
+
bucketTotals,
|
|
158
|
+
};
|
|
159
|
+
}
|
|
160
|
+
|
|
161
|
+
/** Human-readable one-line summary of a usage profile (pure). */
|
|
162
|
+
function summarizeUsageProfile(profile) {
|
|
163
|
+
if (!profile || profile.sessions === 0) return "抖音使用画像:无数据";
|
|
164
|
+
const hours = Math.round((profile.totalDurationMs / 3_600_000) * 10) / 10;
|
|
165
|
+
const peak = profile.peakBucket ? `,高峰时段 ${profile.peakBucket}` : "";
|
|
166
|
+
return (
|
|
167
|
+
`抖音使用画像:${profile.days} 天内 ${profile.sessions} 个会话、` +
|
|
168
|
+
`${profile.totalOpens} 次启动、累计约 ${hours} 小时${peak}`
|
|
169
|
+
);
|
|
170
|
+
}
|
|
171
|
+
|
|
172
|
+
/**
|
|
173
|
+
* Build a single rolling "app-usage baseline" vault event from a profile.
|
|
174
|
+
* Stable originalId → re-ingest UPDATES rather than duplicates. Tagged
|
|
175
|
+
* `extra.kind = "app-usage-profile"` so analysis.timeline can exclude it (it's
|
|
176
|
+
* a baseline, not a timeline activity) while overview/interests can use it.
|
|
177
|
+
*
|
|
178
|
+
* @returns {{events: object[]}}
|
|
179
|
+
*/
|
|
180
|
+
function buildUsageProfileEvents(profile, opts = {}) {
|
|
181
|
+
if (!profile || profile.sessions === 0) return { events: [] };
|
|
182
|
+
const now = Number.isFinite(opts.now) ? opts.now : Date.now();
|
|
183
|
+
const occurredAt = Number.isFinite(profile.to) ? profile.to : now;
|
|
184
|
+
const text = summarizeUsageProfile(profile);
|
|
185
|
+
const event = {
|
|
186
|
+
id: newId(),
|
|
187
|
+
type: "event",
|
|
188
|
+
subtype: "other",
|
|
189
|
+
occurredAt,
|
|
190
|
+
actor: "person-self",
|
|
191
|
+
content: { title: text, text },
|
|
192
|
+
ingestedAt: now,
|
|
193
|
+
source: {
|
|
194
|
+
adapter: "social-douyin",
|
|
195
|
+
adapterVersion: PROFILE_VERSION,
|
|
196
|
+
originalId: "social-douyin:usage-profile",
|
|
197
|
+
capturedAt: occurredAt,
|
|
198
|
+
capturedBy: "sqlite",
|
|
199
|
+
},
|
|
200
|
+
extra: {
|
|
201
|
+
platform: "douyin",
|
|
202
|
+
kind: "app-usage-profile",
|
|
203
|
+
days: profile.days,
|
|
204
|
+
sessions: profile.sessions,
|
|
205
|
+
totalOpens: profile.totalOpens,
|
|
206
|
+
totalDurationMs: profile.totalDurationMs,
|
|
207
|
+
hourHistogram: profile.hourHistogram,
|
|
208
|
+
peakHour: profile.peakHour,
|
|
209
|
+
peakBucket: profile.peakBucket,
|
|
210
|
+
bucketTotals: profile.bucketTotals,
|
|
211
|
+
rangeFrom: profile.from,
|
|
212
|
+
rangeTo: profile.to,
|
|
213
|
+
},
|
|
214
|
+
};
|
|
215
|
+
return { events: [event] };
|
|
216
|
+
}
|
|
217
|
+
|
|
218
|
+
/**
|
|
219
|
+
* Read a feature-engineering db and write the usage-profile baseline event into
|
|
220
|
+
* the vault. Returns counts.
|
|
221
|
+
*
|
|
222
|
+
* @param {object} vault LocalVault (must expose putBatch)
|
|
223
|
+
* @param {string} dbPath path to 1128_feature_engineering.db
|
|
224
|
+
*/
|
|
225
|
+
function usageProfileToVault(vault, dbPath, opts = {}) {
|
|
226
|
+
if (!vault || typeof vault.putBatch !== "function") {
|
|
227
|
+
throw new TypeError("usageProfileToVault: vault with putBatch required");
|
|
228
|
+
}
|
|
229
|
+
if (typeof dbPath !== "string" || !dbPath) {
|
|
230
|
+
throw new TypeError("usageProfileToVault: dbPath required");
|
|
231
|
+
}
|
|
232
|
+
const profile = readDouyinUsageProfile(dbPath, opts);
|
|
233
|
+
const built = buildUsageProfileEvents(profile, opts);
|
|
234
|
+
const res = built.events.length
|
|
235
|
+
? vault.putBatch({ events: built.events })
|
|
236
|
+
: { events: 0 };
|
|
237
|
+
return {
|
|
238
|
+
ingested: res.events || 0,
|
|
239
|
+
sessions: profile.sessions,
|
|
240
|
+
days: profile.days,
|
|
241
|
+
summary: summarizeUsageProfile(profile),
|
|
242
|
+
};
|
|
243
|
+
}
|
|
244
|
+
|
|
245
|
+
module.exports = {
|
|
246
|
+
USAGE_TABLE,
|
|
247
|
+
HOUR_BUCKETS,
|
|
248
|
+
readDouyinUsageProfile,
|
|
249
|
+
summarizeUsageProfile,
|
|
250
|
+
buildUsageProfileEvents,
|
|
251
|
+
usageProfileToVault,
|
|
252
|
+
_internals: { toEpochMs, emptyProfile },
|
|
253
|
+
};
|
|
@@ -29,6 +29,7 @@ const crypto = require("node:crypto");
|
|
|
29
29
|
const {
|
|
30
30
|
_internals: { loadDatabaseClass },
|
|
31
31
|
} = require("../social-bilibili-adb/chromium-cookies-reader");
|
|
32
|
+
const { DouyinAdapter } = require("../social-douyin");
|
|
32
33
|
|
|
33
34
|
const DOUYIN_PACKAGE = "com.ss.android.ugc.aweme";
|
|
34
35
|
const VIDEO_RECORD_DB_REMOTE_PATH =
|
|
@@ -88,9 +89,18 @@ async function pullVideoRecordDbViaSu(adb, serial, opts = {}) {
|
|
|
88
89
|
|
|
89
90
|
/**
|
|
90
91
|
* Read watch records from video_record.db. Tables are named `record_<uid>`
|
|
91
|
-
* (per-account) plus
|
|
92
|
-
*
|
|
93
|
-
*
|
|
92
|
+
* (per-account) plus a default `record_0`. We MERGE every `record_*` table
|
|
93
|
+
* (record_0 included) and dedup by (awemeId, capturedAt), because the watch
|
|
94
|
+
* history is split across tables and which one holds the bulk varies by device:
|
|
95
|
+
*
|
|
96
|
+
* - real-device 2026-06-11 (5lhyaqu8lbwstc6x): record_<uid> = 900 rows.
|
|
97
|
+
* - real-device 2026-06-18: record_0 = 223 rows vs record_<uid> = 9 — the
|
|
98
|
+
* anonymous/default bucket held 96% of the history.
|
|
99
|
+
*
|
|
100
|
+
* The earlier "skip record_0, pick the largest uid table" logic silently
|
|
101
|
+
* dropped the record_0 rows and lost most of the history on the 2nd device.
|
|
102
|
+
* Attribution `uid` is still the largest non-zero `record_<uid>` table (the
|
|
103
|
+
* logged-in account), or null when only record_0 exists.
|
|
94
104
|
*
|
|
95
105
|
* @returns {{uid: string|null, records: Array<{awemeId,capturedAt,enterFrom}>}}
|
|
96
106
|
*/
|
|
@@ -102,43 +112,57 @@ function readDouyinWatchHistory(dbPath, opts = {}) {
|
|
|
102
112
|
const tables = db
|
|
103
113
|
.prepare("SELECT name FROM sqlite_master WHERE type='table' AND name LIKE 'record\\_%' ESCAPE '\\'")
|
|
104
114
|
.all()
|
|
105
|
-
.map((t) => t.name)
|
|
106
|
-
|
|
107
|
-
|
|
115
|
+
.map((t) => t.name)
|
|
116
|
+
.filter((name) => /^record_\d+$/.test(name));
|
|
117
|
+
if (tables.length === 0) return { uid: null, records: [] };
|
|
118
|
+
|
|
119
|
+
let bestUid = null; // largest non-zero record_<uid> table → attribution
|
|
120
|
+
const merged = new Map(); // dedupKey → record (first-seen wins)
|
|
108
121
|
for (const name of tables) {
|
|
109
122
|
const m = /^record_(\d+)$/.exec(name);
|
|
110
|
-
if (!m || m[1] === "0") continue;
|
|
111
123
|
let count = 0;
|
|
112
124
|
try {
|
|
113
125
|
count = db.prepare(`SELECT COUNT(*) c FROM "${name}"`).get().c;
|
|
114
126
|
} catch (_e) {
|
|
115
127
|
continue;
|
|
116
128
|
}
|
|
117
|
-
if (!
|
|
118
|
-
|
|
119
|
-
|
|
120
|
-
|
|
121
|
-
|
|
122
|
-
|
|
123
|
-
|
|
124
|
-
|
|
125
|
-
|
|
126
|
-
|
|
127
|
-
|
|
128
|
-
|
|
129
|
-
|
|
130
|
-
|
|
131
|
-
|
|
132
|
-
|
|
133
|
-
|
|
134
|
-
|
|
135
|
-
|
|
136
|
-
|
|
137
|
-
|
|
138
|
-
|
|
139
|
-
|
|
129
|
+
if (m && m[1] !== "0" && (!bestUid || count > bestUid.count)) {
|
|
130
|
+
bestUid = { uid: m[1], count };
|
|
131
|
+
}
|
|
132
|
+
const cols = new Set(
|
|
133
|
+
db.prepare(`PRAGMA table_info("${name}")`).all().map((c) => c.name),
|
|
134
|
+
);
|
|
135
|
+
const hasEnter = cols.has("enter_from");
|
|
136
|
+
const hasTs = cols.has("view_time_timestamp");
|
|
137
|
+
let rows;
|
|
138
|
+
try {
|
|
139
|
+
rows = db
|
|
140
|
+
.prepare(
|
|
141
|
+
`SELECT aid${hasTs ? ", view_time_timestamp" : ""}${hasEnter ? ", enter_from" : ""} ` +
|
|
142
|
+
`FROM "${name}"${hasTs ? " ORDER BY view_time_timestamp DESC" : ""} LIMIT ${limit}`,
|
|
143
|
+
)
|
|
144
|
+
.all();
|
|
145
|
+
} catch (_e) {
|
|
146
|
+
continue;
|
|
147
|
+
}
|
|
148
|
+
for (const r of rows) {
|
|
149
|
+
const awemeId = r.aid != null ? String(r.aid) : null;
|
|
150
|
+
if (!awemeId) continue;
|
|
151
|
+
const capturedAt = hasTs ? toEpochMs(r.view_time_timestamp) : null;
|
|
152
|
+
const key = `${awemeId}@${capturedAt == null ? "" : capturedAt}`;
|
|
153
|
+
if (merged.has(key)) continue;
|
|
154
|
+
merged.set(key, {
|
|
155
|
+
awemeId,
|
|
156
|
+
capturedAt,
|
|
157
|
+
enterFrom: hasEnter ? r.enter_from || null : null,
|
|
158
|
+
});
|
|
159
|
+
}
|
|
140
160
|
}
|
|
141
|
-
|
|
161
|
+
// Most-recent first (null timestamps sink to the end), then cap.
|
|
162
|
+
const records = Array.from(merged.values())
|
|
163
|
+
.sort((a, b) => (b.capturedAt || 0) - (a.capturedAt || 0))
|
|
164
|
+
.slice(0, limit);
|
|
165
|
+
return { uid: bestUid ? bestUid.uid : null, records };
|
|
142
166
|
} finally {
|
|
143
167
|
try {
|
|
144
168
|
db.close();
|
|
@@ -148,6 +172,53 @@ function readDouyinWatchHistory(dbPath, opts = {}) {
|
|
|
148
172
|
}
|
|
149
173
|
}
|
|
150
174
|
|
|
175
|
+
/**
|
|
176
|
+
* Read watch records from a local video_record.db and write them straight into
|
|
177
|
+
* the vault as canonical BROWSE events (via DouyinAdapter.normalize, so they
|
|
178
|
+
* match exactly what the device-bridge collector path produces). Stable
|
|
179
|
+
* per-record originalId → re-ingest UPDATES rather than duplicates.
|
|
180
|
+
*
|
|
181
|
+
* @param {object} vault LocalVault (must expose putBatch)
|
|
182
|
+
* @param {string} dbPath path to video_record.db
|
|
183
|
+
*/
|
|
184
|
+
function buildWatchHistoryEvents(dbPath, opts = {}) {
|
|
185
|
+
const { uid, records } = readDouyinWatchHistory(dbPath, opts);
|
|
186
|
+
const adapter = opts._adapter || new DouyinAdapter();
|
|
187
|
+
const now = Number.isFinite(opts.now) ? opts.now : Date.now();
|
|
188
|
+
const events = [];
|
|
189
|
+
for (const r of records) {
|
|
190
|
+
if (!r.awemeId) continue;
|
|
191
|
+
const occurredAt =
|
|
192
|
+
Number.isFinite(r.capturedAt) && r.capturedAt > 0 ? r.capturedAt : now;
|
|
193
|
+
const batch = adapter.normalize({
|
|
194
|
+
adapter: "social-douyin",
|
|
195
|
+
kind: "history",
|
|
196
|
+
originalId: `social-douyin:history:${r.awemeId}:${occurredAt}`,
|
|
197
|
+
capturedAt: occurredAt,
|
|
198
|
+
payload: {
|
|
199
|
+
kind: "history",
|
|
200
|
+
awemeId: r.awemeId,
|
|
201
|
+
capturedAt: occurredAt,
|
|
202
|
+
enterFrom: r.enterFrom,
|
|
203
|
+
},
|
|
204
|
+
});
|
|
205
|
+
for (const ev of batch.events) events.push(ev);
|
|
206
|
+
}
|
|
207
|
+
return { events, records: records.length, uid };
|
|
208
|
+
}
|
|
209
|
+
|
|
210
|
+
function watchHistoryToVault(vault, dbPath, opts = {}) {
|
|
211
|
+
if (!vault || typeof vault.putBatch !== "function") {
|
|
212
|
+
throw new TypeError("watchHistoryToVault: vault with putBatch required");
|
|
213
|
+
}
|
|
214
|
+
if (typeof dbPath !== "string" || !dbPath) {
|
|
215
|
+
throw new TypeError("watchHistoryToVault: dbPath required");
|
|
216
|
+
}
|
|
217
|
+
const { events, records, uid } = buildWatchHistoryEvents(dbPath, opts);
|
|
218
|
+
const res = events.length ? vault.putBatch({ events }) : { events: 0 };
|
|
219
|
+
return { ingested: res.events || 0, records, uid };
|
|
220
|
+
}
|
|
221
|
+
|
|
151
222
|
/** Bridge handler factory: `bridge.invoke("douyin.watch-history")` → {uid, records}. */
|
|
152
223
|
function createDouyinWatchExtension(factoryOpts = {}) {
|
|
153
224
|
const timeoutMs = factoryOpts.timeoutMs || 60_000;
|
|
@@ -178,6 +249,8 @@ function createDouyinWatchExtension(factoryOpts = {}) {
|
|
|
178
249
|
|
|
179
250
|
module.exports = {
|
|
180
251
|
createDouyinWatchExtension,
|
|
252
|
+
buildWatchHistoryEvents,
|
|
253
|
+
watchHistoryToVault,
|
|
181
254
|
VIDEO_RECORD_DB_REMOTE_PATH,
|
|
182
255
|
DOUYIN_PACKAGE,
|
|
183
256
|
_internals: {
|
|
@@ -0,0 +1,202 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Toutiao on-device article reader — recovers the user's feed/read articles
|
|
3
|
+
* from the app's local `news_article.db` (table `article`), a plaintext SQLite
|
|
4
|
+
* DB. No signing/encryption needed.
|
|
5
|
+
*
|
|
6
|
+
* Why this exists (real-device 2026-06-18, user's exported plaintext DB):
|
|
7
|
+
* - `article` rows are the local feed cache (48 rows on the test export). The
|
|
8
|
+
* title is NOT a column — it lives in the `share_info` JSON blob
|
|
9
|
+
* ({title, share_url, ...}); `ext_json` is a heavier fallback. `behot_time`
|
|
10
|
+
* is when the item surfaced; `read_timestamp>0` ⇒ actually opened;
|
|
11
|
+
* `is_user_digg`/`is_user_repin` ⇒ engagement.
|
|
12
|
+
* - Modest signal (feed-shown ≈ weak interest; digg/read ≈ strong), but
|
|
13
|
+
* titled + plaintext, so it's a usable "articles I browsed" stream.
|
|
14
|
+
*
|
|
15
|
+
* Emits BROWSE events under source.adapter `social-toutiao` (the canonical
|
|
16
|
+
* adapter name, so byApp aggregation attributes correctly). Stable originalId
|
|
17
|
+
* (`social-toutiao:article:<group_id>`) → re-ingest UPDATES, not duplicates.
|
|
18
|
+
*
|
|
19
|
+
* Authorization: only on your own device/account.
|
|
20
|
+
*/
|
|
21
|
+
"use strict";
|
|
22
|
+
|
|
23
|
+
const { newId } = require("../../ids");
|
|
24
|
+
const {
|
|
25
|
+
_internals: { loadDatabaseClass },
|
|
26
|
+
} = require("../social-bilibili-adb/chromium-cookies-reader");
|
|
27
|
+
|
|
28
|
+
const ARTICLE_TABLE = "article";
|
|
29
|
+
const READER_VERSION = "toutiao-article-0.1";
|
|
30
|
+
const TITLE_SUFFIX = /\s*-\s*今日头条\s*$/;
|
|
31
|
+
|
|
32
|
+
/** seconds-or-ms epoch → ms (heuristic: > 1e12 ⇒ already ms). */
|
|
33
|
+
function toEpochMs(v) {
|
|
34
|
+
const n = Number(v);
|
|
35
|
+
if (!Number.isFinite(n) || n <= 0) return null;
|
|
36
|
+
return n > 1e12 ? Math.floor(n) : Math.floor(n * 1000);
|
|
37
|
+
}
|
|
38
|
+
|
|
39
|
+
function safeParse(s) {
|
|
40
|
+
if (typeof s !== "string" || s.length < 2) return null;
|
|
41
|
+
try {
|
|
42
|
+
return JSON.parse(s);
|
|
43
|
+
} catch (_e) {
|
|
44
|
+
return null;
|
|
45
|
+
}
|
|
46
|
+
}
|
|
47
|
+
|
|
48
|
+
/** Title lives in share_info.title (or ext_json.title); strip the brand suffix. */
|
|
49
|
+
function extractTitle(row) {
|
|
50
|
+
const si = safeParse(row.share_info);
|
|
51
|
+
let title = si && (si.title || si.share_title);
|
|
52
|
+
if (!title) {
|
|
53
|
+
const ej = safeParse(row.ext_json);
|
|
54
|
+
title = ej && (ej.title || ej.share_title || (ej.article && ej.article.title));
|
|
55
|
+
}
|
|
56
|
+
if (typeof title !== "string" || !title.trim()) return null;
|
|
57
|
+
return title.replace(TITLE_SUFFIX, "").trim();
|
|
58
|
+
}
|
|
59
|
+
|
|
60
|
+
function extractUrl(row) {
|
|
61
|
+
const si = safeParse(row.share_info);
|
|
62
|
+
const u = (si && si.share_url) || row.share_url || null;
|
|
63
|
+
if (typeof u !== "string" || !u) return null;
|
|
64
|
+
// Drop the noisy share/tracking query so the same article dedups by url too.
|
|
65
|
+
return u.split("?")[0];
|
|
66
|
+
}
|
|
67
|
+
|
|
68
|
+
function extractCategory(row) {
|
|
69
|
+
const u = row.share_url || "";
|
|
70
|
+
const m = /[?&]category_new=([^&]+)/.exec(u);
|
|
71
|
+
return m ? decodeURIComponent(m[1]) : null;
|
|
72
|
+
}
|
|
73
|
+
|
|
74
|
+
/**
|
|
75
|
+
* Read article rows + parse them into structured records (pure once a Database
|
|
76
|
+
* class is injected). Newest-first.
|
|
77
|
+
*
|
|
78
|
+
* @returns {{articles: Array<{groupId,title,url,category,behotTime,readTimestamp,digg,repin}>}}
|
|
79
|
+
*/
|
|
80
|
+
function readToutiaoArticles(dbPath, opts = {}) {
|
|
81
|
+
const Database = opts._databaseClass || loadDatabaseClass();
|
|
82
|
+
const limit = Number.isInteger(opts.limit) && opts.limit > 0 ? opts.limit : 5000;
|
|
83
|
+
const db = new Database(dbPath, { readonly: true });
|
|
84
|
+
try {
|
|
85
|
+
const exists = db
|
|
86
|
+
.prepare("SELECT name FROM sqlite_master WHERE type='table' AND name=?")
|
|
87
|
+
.get(ARTICLE_TABLE);
|
|
88
|
+
if (!exists) return { articles: [] };
|
|
89
|
+
|
|
90
|
+
const cols = new Set(
|
|
91
|
+
db.prepare(`PRAGMA table_info("${ARTICLE_TABLE}")`).all().map((c) => c.name),
|
|
92
|
+
);
|
|
93
|
+
const hasBehot = cols.has("behot_time");
|
|
94
|
+
const rows = db
|
|
95
|
+
.prepare(
|
|
96
|
+
`SELECT * FROM "${ARTICLE_TABLE}"${hasBehot ? " ORDER BY behot_time DESC" : ""} LIMIT ${limit}`,
|
|
97
|
+
)
|
|
98
|
+
.all();
|
|
99
|
+
|
|
100
|
+
const articles = [];
|
|
101
|
+
for (const r of rows) {
|
|
102
|
+
const groupId =
|
|
103
|
+
r.group_id != null ? String(r.group_id) : r.item_id != null ? String(r.item_id) : null;
|
|
104
|
+
if (!groupId) continue;
|
|
105
|
+
const title = extractTitle(r);
|
|
106
|
+
if (!title) continue; // untitled cache rows carry no signal
|
|
107
|
+
articles.push({
|
|
108
|
+
groupId,
|
|
109
|
+
title,
|
|
110
|
+
url: extractUrl(r),
|
|
111
|
+
category: extractCategory(r),
|
|
112
|
+
behotTime: hasBehot ? toEpochMs(r.behot_time) : null,
|
|
113
|
+
readTimestamp: cols.has("read_timestamp") ? toEpochMs(r.read_timestamp) : null,
|
|
114
|
+
digg: cols.has("is_user_digg") ? !!r.is_user_digg : false,
|
|
115
|
+
repin: cols.has("is_user_repin") ? !!r.is_user_repin : false,
|
|
116
|
+
});
|
|
117
|
+
}
|
|
118
|
+
return { articles };
|
|
119
|
+
} finally {
|
|
120
|
+
try {
|
|
121
|
+
db.close();
|
|
122
|
+
} catch (_e) {
|
|
123
|
+
/* best-effort */
|
|
124
|
+
}
|
|
125
|
+
}
|
|
126
|
+
}
|
|
127
|
+
|
|
128
|
+
/**
|
|
129
|
+
* Map article records → BROWSE events tagged with the toutiao source.
|
|
130
|
+
* @returns {{events: object[]}}
|
|
131
|
+
*/
|
|
132
|
+
function buildArticleEvents(articles, opts = {}) {
|
|
133
|
+
const now = Number.isFinite(opts.now) ? opts.now : Date.now();
|
|
134
|
+
const events = [];
|
|
135
|
+
for (const a of articles || []) {
|
|
136
|
+
if (!a || !a.groupId || !a.title) continue;
|
|
137
|
+
const occurredAt =
|
|
138
|
+
(Number.isFinite(a.readTimestamp) && a.readTimestamp) ||
|
|
139
|
+
(Number.isFinite(a.behotTime) && a.behotTime) ||
|
|
140
|
+
now;
|
|
141
|
+
events.push({
|
|
142
|
+
id: newId(),
|
|
143
|
+
type: "event",
|
|
144
|
+
subtype: "browse",
|
|
145
|
+
occurredAt,
|
|
146
|
+
actor: "person-self",
|
|
147
|
+
content: { title: a.title, text: a.title },
|
|
148
|
+
ingestedAt: now,
|
|
149
|
+
source: {
|
|
150
|
+
adapter: "social-toutiao",
|
|
151
|
+
adapterVersion: READER_VERSION,
|
|
152
|
+
originalId: `social-toutiao:article:${a.groupId}`,
|
|
153
|
+
capturedAt: occurredAt,
|
|
154
|
+
capturedBy: "sqlite",
|
|
155
|
+
},
|
|
156
|
+
extra: {
|
|
157
|
+
platform: "toutiao",
|
|
158
|
+
kind: "article",
|
|
159
|
+
groupId: a.groupId,
|
|
160
|
+
url: a.url || null,
|
|
161
|
+
category: a.category || null,
|
|
162
|
+
digg: a.digg,
|
|
163
|
+
repin: a.repin,
|
|
164
|
+
read: Number.isFinite(a.readTimestamp) && a.readTimestamp > 0,
|
|
165
|
+
},
|
|
166
|
+
});
|
|
167
|
+
}
|
|
168
|
+
return { events };
|
|
169
|
+
}
|
|
170
|
+
|
|
171
|
+
/**
|
|
172
|
+
* Read news_article.db and write the article BROWSE events into the vault.
|
|
173
|
+
* @param {object} vault LocalVault (must expose putBatch)
|
|
174
|
+
* @param {string} dbPath path to news_article.db
|
|
175
|
+
*/
|
|
176
|
+
function articlesToVault(vault, dbPath, opts = {}) {
|
|
177
|
+
if (!vault || typeof vault.putBatch !== "function") {
|
|
178
|
+
throw new TypeError("articlesToVault: vault with putBatch required");
|
|
179
|
+
}
|
|
180
|
+
if (typeof dbPath !== "string" || !dbPath) {
|
|
181
|
+
throw new TypeError("articlesToVault: dbPath required");
|
|
182
|
+
}
|
|
183
|
+
const { articles } = readToutiaoArticles(dbPath, opts);
|
|
184
|
+
const built = buildArticleEvents(articles, opts);
|
|
185
|
+
const res = built.events.length
|
|
186
|
+
? vault.putBatch({ events: built.events })
|
|
187
|
+
: { events: 0 };
|
|
188
|
+
return {
|
|
189
|
+
ingested: res.events || 0,
|
|
190
|
+
articles: articles.length,
|
|
191
|
+
digg: articles.filter((a) => a.digg).length,
|
|
192
|
+
read: articles.filter((a) => Number.isFinite(a.readTimestamp) && a.readTimestamp > 0).length,
|
|
193
|
+
};
|
|
194
|
+
}
|
|
195
|
+
|
|
196
|
+
module.exports = {
|
|
197
|
+
ARTICLE_TABLE,
|
|
198
|
+
readToutiaoArticles,
|
|
199
|
+
buildArticleEvents,
|
|
200
|
+
articlesToVault,
|
|
201
|
+
_internals: { toEpochMs, extractTitle, extractUrl, extractCategory },
|
|
202
|
+
};
|