@chainlesschain/personal-data-hub 0.4.24 → 0.4.26
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/__tests__/adapters/gov-ixiamen.test.js +2 -2
- package/__tests__/analysis-skills.test.js +71 -2
- package/__tests__/analysis.test.js +46 -0
- package/__tests__/social-douyin-im-direct-read.test.js +69 -3
- package/__tests__/social-douyin-salvage-collector.test.js +98 -0
- package/__tests__/social-douyin-salvage-mapper.test.js +90 -0
- package/__tests__/social-weibo-sqlite-device.test.js +174 -0
- package/__tests__/sqlite-leaf-salvage.test.js +97 -0
- package/lib/adapters/gov-ixiamen/index.js +17 -10
- package/lib/adapters/social-douyin/index.js +56 -2
- package/lib/adapters/social-douyin-adb/collector.js +100 -0
- package/lib/adapters/social-douyin-adb/im-db-parser.js +85 -0
- package/lib/adapters/social-douyin-adb/index.js +5 -0
- package/lib/adapters/social-douyin-adb/salvage-mapper.js +119 -0
- package/lib/adapters/social-weibo/index.js +110 -30
- package/lib/analysis-skills/index.js +3 -0
- package/lib/analysis-skills/overview.js +157 -0
- package/lib/analysis.js +50 -0
- package/lib/forensics/leaf-salvage.js +185 -0
- package/lib/prompt-builder.js +9 -0
- package/package.json +1 -1
|
@@ -9,10 +9,23 @@
|
|
|
9
9
|
* — account.uid is OPTIONAL at construction (the snapshot file carries
|
|
10
10
|
* account in payload).
|
|
11
11
|
*
|
|
12
|
-
* 2. sqlite mode (opts.dbPath
|
|
13
|
-
*
|
|
14
|
-
*
|
|
15
|
-
*
|
|
12
|
+
* 2. sqlite mode (opts.dbPath): desktop device-pull path — reads the Weibo
|
|
13
|
+
* Android app's plain SQLite DB `com.sina.weibo/databases/sina_weibo`.
|
|
14
|
+
* account.uid REQUIRED in this mode.
|
|
15
|
+
*
|
|
16
|
+
* Table/column names are DEVICE-VERIFIED against a real install
|
|
17
|
+
* (Redmi M2104K10AC, 微博 16.5.3, 2026-06-16):
|
|
18
|
+
* - posts → `home_table` (timeline cache; own posts = uid==selfUid)
|
|
19
|
+
* cols: mblogid / uid / content / time / rtnum /
|
|
20
|
+
* commentnum / attitudenum / src / longitude / latitude
|
|
21
|
+
* - favourites → `like_table` cols: mblogid / content / time / nick
|
|
22
|
+
* - follows → `follower_table` (following=1 ⇒ accounts the user
|
|
23
|
+
* follows) cols: user_id / screen_name / remark / gender
|
|
24
|
+
* The legacy `post`/`status`/`search_history` queries are kept as
|
|
25
|
+
* FALLBACKS (older builds) — on a modern device those tables don't
|
|
26
|
+
* exist so the adapter previously collected ZERO. Row VALUES were not
|
|
27
|
+
* validated (verification account was empty); column semantics use the
|
|
28
|
+
* standard Weibo schema. See memory pdh_collector_completeness_audit.
|
|
16
29
|
*
|
|
17
30
|
* Snapshot schema (mirrors WeiboLocalCollector.SNAPSHOT_SCHEMA_VERSION):
|
|
18
31
|
*
|
|
@@ -44,7 +57,7 @@ const {
|
|
|
44
57
|
} = require("../../constants");
|
|
45
58
|
|
|
46
59
|
const NAME = "social-weibo";
|
|
47
|
-
const VERSION = "0.
|
|
60
|
+
const VERSION = "0.7.0";
|
|
48
61
|
const SNAPSHOT_SCHEMA_VERSION = 1;
|
|
49
62
|
|
|
50
63
|
const KIND_POST = "post";
|
|
@@ -246,21 +259,64 @@ class WeiboAdapter {
|
|
|
246
259
|
? this._deps.dbDriverFactory()
|
|
247
260
|
: require("better-sqlite3-multiple-ciphers");
|
|
248
261
|
const db = new Driver(dbPath, { readonly: true });
|
|
262
|
+
// selfUid sanitised to digits — interpolated into a WHERE clause and
|
|
263
|
+
// sourced from wiring config (numeric uin). Defensive against injection.
|
|
264
|
+
const selfUid = String(this.account.uid).replace(/[^0-9]/g, "");
|
|
249
265
|
|
|
250
266
|
try {
|
|
267
|
+
// POSTS — device-verified `home_table` (own posts = uid==selfUid);
|
|
268
|
+
// legacy `post`/`status` kept as fallback for older builds.
|
|
251
269
|
const posts =
|
|
252
|
-
|
|
253
|
-
|
|
254
|
-
|
|
270
|
+
(selfUid &&
|
|
271
|
+
trySelect(
|
|
272
|
+
db,
|
|
273
|
+
`SELECT * FROM home_table WHERE uid='${selfUid}' ORDER BY time DESC LIMIT 5000`,
|
|
274
|
+
)) ||
|
|
275
|
+
trySelect(db, "SELECT * FROM post ORDER BY created_at DESC LIMIT 5000") ||
|
|
276
|
+
trySelect(db, "SELECT * FROM status ORDER BY created_at DESC LIMIT 5000") ||
|
|
277
|
+
[];
|
|
255
278
|
for (const row of posts) {
|
|
256
279
|
yield {
|
|
257
280
|
adapter: NAME,
|
|
258
|
-
originalId: `post-${row.id || row.mid || row.idstr}`,
|
|
259
|
-
capturedAt: parseTime(row.
|
|
281
|
+
originalId: `post-${row.mblogid || row.id || row.mid || row.idstr}`,
|
|
282
|
+
capturedAt: parseTime(row.time || row.created_at),
|
|
260
283
|
payload: { row, kind: KIND_POST },
|
|
261
284
|
};
|
|
262
285
|
}
|
|
263
286
|
|
|
287
|
+
// FAVOURITES — device-verified `like_table` (the account's likes).
|
|
288
|
+
// Legacy sqlite had no favourite path (folded into posts pre-A8).
|
|
289
|
+
const favourites =
|
|
290
|
+
trySelect(db, "SELECT * FROM like_table ORDER BY time DESC LIMIT 5000") || [];
|
|
291
|
+
for (const row of favourites) {
|
|
292
|
+
yield {
|
|
293
|
+
adapter: NAME,
|
|
294
|
+
originalId: `fav-${row.mblogid || row.id}`,
|
|
295
|
+
capturedAt: parseTime(row.time),
|
|
296
|
+
payload: { row, kind: KIND_FAVOURITE },
|
|
297
|
+
};
|
|
298
|
+
}
|
|
299
|
+
|
|
300
|
+
// FOLLOWS — device-verified `follower_table`; following=1 ⇒ accounts
|
|
301
|
+
// the user follows (vs followers). Fallback to the whole table.
|
|
302
|
+
const follows =
|
|
303
|
+
trySelect(
|
|
304
|
+
db,
|
|
305
|
+
"SELECT * FROM follower_table WHERE following=1 ORDER BY user_id LIMIT 5000",
|
|
306
|
+
) ||
|
|
307
|
+
trySelect(db, "SELECT * FROM follower_table LIMIT 5000") ||
|
|
308
|
+
[];
|
|
309
|
+
for (const row of follows) {
|
|
310
|
+
yield {
|
|
311
|
+
adapter: NAME,
|
|
312
|
+
originalId: `follow-${row.user_id || row.id}`,
|
|
313
|
+
capturedAt: parseTime(row.time) || Date.now(),
|
|
314
|
+
payload: { row, kind: KIND_FOLLOW },
|
|
315
|
+
};
|
|
316
|
+
}
|
|
317
|
+
|
|
318
|
+
// SEARCH — legacy only (`search_history` doesn't exist on modern
|
|
319
|
+
// weibo; trySelect returns null gracefully, loop is skipped).
|
|
264
320
|
const searches =
|
|
265
321
|
trySelect(db, "SELECT * FROM search_history ORDER BY time DESC LIMIT 5000")
|
|
266
322
|
|| [];
|
|
@@ -342,8 +398,9 @@ function normalizePost(p, raw, ingestedAt) {
|
|
|
342
398
|
// Sqlite mode: { kind:"post", row: { text, mid, ... } }
|
|
343
399
|
const row = p.row || p;
|
|
344
400
|
const isSnapshot = !p.row;
|
|
345
|
-
|
|
346
|
-
const
|
|
401
|
+
// home_table (device-verified) stores body in `content`, id in `mblogid`.
|
|
402
|
+
const text = row.text || row.content || "";
|
|
403
|
+
const mid = row.mid || row.mblogid || row.id || row.idstr || null;
|
|
347
404
|
const occurredAt =
|
|
348
405
|
parseTime(row.created_at || row.createdAt || row.time || raw.capturedAt) ||
|
|
349
406
|
ingestedAt;
|
|
@@ -369,13 +426,13 @@ function normalizePost(p, raw, ingestedAt) {
|
|
|
369
426
|
weiboMid: mid,
|
|
370
427
|
repostsCount:
|
|
371
428
|
row.repostsCount != null ? row.repostsCount
|
|
372
|
-
: row.reposts_count || row.repost || 0,
|
|
429
|
+
: row.reposts_count || row.repost || row.rtnum || 0,
|
|
373
430
|
commentsCount:
|
|
374
431
|
row.commentsCount != null ? row.commentsCount
|
|
375
|
-
: row.comments_count || row.comments || 0,
|
|
432
|
+
: row.comments_count || row.comments || row.commentnum || 0,
|
|
376
433
|
likesCount:
|
|
377
434
|
row.likesCount != null ? row.likesCount
|
|
378
|
-
: row.attitudes_count || row.likes || 0,
|
|
435
|
+
: row.attitudes_count || row.likes || row.attitudenum || 0,
|
|
379
436
|
picCount: row.picCount || row.pic_num || 0,
|
|
380
437
|
source: row.source || null,
|
|
381
438
|
location: row.location || row.geo || null,
|
|
@@ -387,13 +444,21 @@ function normalizePost(p, raw, ingestedAt) {
|
|
|
387
444
|
}
|
|
388
445
|
|
|
389
446
|
function normalizeFavourite(p, raw, ingestedAt) {
|
|
390
|
-
// Snapshot
|
|
391
|
-
//
|
|
392
|
-
//
|
|
393
|
-
const
|
|
394
|
-
const
|
|
395
|
-
const
|
|
396
|
-
const
|
|
447
|
+
// Snapshot: { kind:"favourite", mid, text, capturedAt, authorScreenName }
|
|
448
|
+
// Sqlite (device-verified `like_table`): { row: { mblogid, content, time,
|
|
449
|
+
// nick } }. Both shapes handled below.
|
|
450
|
+
const row = p.row || null;
|
|
451
|
+
const isSqlite = !!row;
|
|
452
|
+
const text = isSqlite ? (row.content || "") : (p.text || "");
|
|
453
|
+
const mid = isSqlite ? (row.mblogid || row.id || null) : (p.mid || null);
|
|
454
|
+
const occurredAt = isSqlite
|
|
455
|
+
? (parseTime(row.time) || raw.capturedAt || ingestedAt)
|
|
456
|
+
: (parseTime(p.capturedAt) || raw.capturedAt || ingestedAt);
|
|
457
|
+
const source = buildSource(
|
|
458
|
+
raw,
|
|
459
|
+
occurredAt,
|
|
460
|
+
isSqlite ? CAPTURED_BY.SQLITE : CAPTURED_BY.API,
|
|
461
|
+
);
|
|
397
462
|
return {
|
|
398
463
|
events: [{
|
|
399
464
|
id: newId(),
|
|
@@ -410,7 +475,9 @@ function normalizeFavourite(p, raw, ingestedAt) {
|
|
|
410
475
|
extra: {
|
|
411
476
|
platform: "weibo",
|
|
412
477
|
weiboMid: mid,
|
|
413
|
-
authorScreenName:
|
|
478
|
+
authorScreenName: isSqlite
|
|
479
|
+
? (row.nick || null)
|
|
480
|
+
: (p.authorScreenName || null),
|
|
414
481
|
},
|
|
415
482
|
}],
|
|
416
483
|
persons: [], places: [], items: [], topics: [],
|
|
@@ -418,15 +485,28 @@ function normalizeFavourite(p, raw, ingestedAt) {
|
|
|
418
485
|
}
|
|
419
486
|
|
|
420
487
|
function normalizeFollow(p, raw, ingestedAt) {
|
|
421
|
-
// Snapshot
|
|
422
|
-
//
|
|
488
|
+
// Snapshot: { kind:"follow", uid, screenName, description, avatarUrl,
|
|
489
|
+
// capturedAt }
|
|
490
|
+
// Sqlite (device-verified `follower_table`): { row: { user_id|id,
|
|
491
|
+
// screen_name, remark, gender } }. Both shapes handled below.
|
|
492
|
+
const row = p.row || null;
|
|
493
|
+
const isSqlite = !!row;
|
|
494
|
+
const rawUid = isSqlite ? (row.user_id || row.id) : p.uid;
|
|
423
495
|
const followUid =
|
|
424
|
-
(typeof
|
|
425
|
-
(typeof
|
|
496
|
+
(typeof rawUid === "number" && rawUid) ||
|
|
497
|
+
(typeof rawUid === "string" && rawUid.length > 0 && rawUid) ||
|
|
426
498
|
`unknown-${newId()}`;
|
|
427
|
-
const screenName =
|
|
428
|
-
|
|
429
|
-
|
|
499
|
+
const screenName = isSqlite
|
|
500
|
+
? (row.screen_name || row.remark || "(unnamed)")
|
|
501
|
+
: (p.screenName || "(unnamed)");
|
|
502
|
+
const occurredAt = isSqlite
|
|
503
|
+
? (parseTime(row.time) || raw.capturedAt || ingestedAt)
|
|
504
|
+
: (parseTime(p.capturedAt) || raw.capturedAt || ingestedAt);
|
|
505
|
+
const source = buildSource(
|
|
506
|
+
raw,
|
|
507
|
+
occurredAt,
|
|
508
|
+
isSqlite ? CAPTURED_BY.SQLITE : CAPTURED_BY.API,
|
|
509
|
+
);
|
|
430
510
|
const person = {
|
|
431
511
|
id: `person-weibo-${followUid}`,
|
|
432
512
|
type: ENTITY_TYPES.PERSON,
|
|
@@ -14,6 +14,7 @@ const { RelationsSkill } = require("./relations");
|
|
|
14
14
|
const { FootprintSkill } = require("./footprint");
|
|
15
15
|
const { InterestsSkill } = require("./interests");
|
|
16
16
|
const { TimelineSkill } = require("./timeline");
|
|
17
|
+
const { OverviewSkill } = require("./overview");
|
|
17
18
|
|
|
18
19
|
const SKILL_REGISTRY = Object.freeze({
|
|
19
20
|
"analysis.spending": SpendingSkill,
|
|
@@ -21,6 +22,7 @@ const SKILL_REGISTRY = Object.freeze({
|
|
|
21
22
|
"analysis.footprint": FootprintSkill,
|
|
22
23
|
"analysis.interests": InterestsSkill,
|
|
23
24
|
"analysis.timeline": TimelineSkill,
|
|
25
|
+
"analysis.overview": OverviewSkill,
|
|
24
26
|
});
|
|
25
27
|
|
|
26
28
|
const SKILL_NAMES = Object.freeze(Object.keys(SKILL_REGISTRY));
|
|
@@ -50,6 +52,7 @@ module.exports = {
|
|
|
50
52
|
FootprintSkill,
|
|
51
53
|
InterestsSkill,
|
|
52
54
|
TimelineSkill,
|
|
55
|
+
OverviewSkill,
|
|
53
56
|
SKILL_REGISTRY,
|
|
54
57
|
SKILL_NAMES,
|
|
55
58
|
ANALYSIS_SKILL_NAMES: SKILL_NAMES,
|
|
@@ -0,0 +1,157 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* analysis.overview — cross-app unified snapshot for decision support.
|
|
3
|
+
*
|
|
4
|
+
* The de-silo capstone: every adapter normalizes into one vault, so this single
|
|
5
|
+
* skill aggregates ALL apps' Events into one picture — activity by app + by type,
|
|
6
|
+
* top relationships (merge-group aware, so the same person across WeChat/Douyin/
|
|
7
|
+
* Weibo counts once), spending across shopping/finance apps, and time trend.
|
|
8
|
+
* Gives the personal AI a unified "基于跨 app 数据" basis for decisions.
|
|
9
|
+
*
|
|
10
|
+
* Output:
|
|
11
|
+
* {
|
|
12
|
+
* skill, summary: { totalEvents, appsActive, period, topAppName },
|
|
13
|
+
* byApp: [{ app, count }], byType: [{ type, count }],
|
|
14
|
+
* monthlyActivity: [{ monthKey, count }],
|
|
15
|
+
* topContacts: [{ personId, name, interactions, byApp }],
|
|
16
|
+
* spending: { total, byDirection, currency },
|
|
17
|
+
* citations, llm_commentary,
|
|
18
|
+
* }
|
|
19
|
+
*/
|
|
20
|
+
"use strict";
|
|
21
|
+
|
|
22
|
+
const { AnalysisSkill } = require("./base");
|
|
23
|
+
|
|
24
|
+
const SPEND_SUBTYPES = new Set([
|
|
25
|
+
"payment", "transfer", "refund", "utility", "redenvelope", "investment", "income", "order",
|
|
26
|
+
]);
|
|
27
|
+
|
|
28
|
+
class OverviewSkill extends AnalysisSkill {
|
|
29
|
+
constructor(opts) {
|
|
30
|
+
super({ ...opts, name: "analysis.overview" });
|
|
31
|
+
}
|
|
32
|
+
|
|
33
|
+
async run(options = {}) {
|
|
34
|
+
const { since, until } = this.resolveTimeWindow(options);
|
|
35
|
+
const topN = Number.isFinite(options.topN) && options.topN > 0 ? options.topN : 10;
|
|
36
|
+
|
|
37
|
+
const q = { limit: Number.isFinite(options.limit) ? options.limit : 50_000 };
|
|
38
|
+
if (since != null) q.since = since;
|
|
39
|
+
if (until != null) q.until = until;
|
|
40
|
+
const events = this.vault.queryEvents(q) || [];
|
|
41
|
+
|
|
42
|
+
const byApp = new Map();
|
|
43
|
+
const byType = new Map();
|
|
44
|
+
const byMonth = new Map();
|
|
45
|
+
const contacts = new Map(); // canonicalPersonId → { interactions, byApp:Map }
|
|
46
|
+
let spendTotal = 0;
|
|
47
|
+
const spendByDir = new Map();
|
|
48
|
+
let currency = null;
|
|
49
|
+
const citations = [];
|
|
50
|
+
|
|
51
|
+
for (const e of events) {
|
|
52
|
+
const app = (e.source && e.source.adapter) || "unknown";
|
|
53
|
+
byApp.set(app, (byApp.get(app) || 0) + 1);
|
|
54
|
+
const type = e.subtype || "other";
|
|
55
|
+
byType.set(type, (byType.get(type) || 0) + 1);
|
|
56
|
+
if (Number.isFinite(e.occurredAt)) {
|
|
57
|
+
const d = new Date(e.occurredAt);
|
|
58
|
+
if (Number.isFinite(d.getTime())) {
|
|
59
|
+
const m = `${d.getFullYear()}-${String(d.getMonth() + 1).padStart(2, "0")}`;
|
|
60
|
+
byMonth.set(m, (byMonth.get(m) || 0) + 1);
|
|
61
|
+
}
|
|
62
|
+
}
|
|
63
|
+
// relationships (actor + participants), merge-group canonicalized
|
|
64
|
+
const ids = (Array.isArray(e.participants) ? e.participants : []).concat(e.actor ? [e.actor] : []);
|
|
65
|
+
for (const pid of ids) {
|
|
66
|
+
if (!pid || pid === "person-self") continue;
|
|
67
|
+
const canon = this._canon(pid);
|
|
68
|
+
const cur = contacts.get(canon) || { interactions: 0, byApp: new Map() };
|
|
69
|
+
cur.interactions += 1;
|
|
70
|
+
cur.byApp.set(app, (cur.byApp.get(app) || 0) + 1);
|
|
71
|
+
contacts.set(canon, cur);
|
|
72
|
+
}
|
|
73
|
+
// spending
|
|
74
|
+
if (SPEND_SUBTYPES.has(type) && e.content && e.content.amount && Number.isFinite(e.content.amount.value)) {
|
|
75
|
+
const v = e.content.amount.value;
|
|
76
|
+
spendTotal += v;
|
|
77
|
+
const dir = e.content.amount.direction || "unknown";
|
|
78
|
+
spendByDir.set(dir, (spendByDir.get(dir) || 0) + v);
|
|
79
|
+
if (!currency && e.content.amount.currency) currency = e.content.amount.currency;
|
|
80
|
+
}
|
|
81
|
+
if (citations.length < 50) citations.push(e.id);
|
|
82
|
+
}
|
|
83
|
+
|
|
84
|
+
const byAppArr = [...byApp.entries()].map(([app, count]) => ({ app, count })).sort((a, b) => b.count - a.count);
|
|
85
|
+
const topContacts = [...contacts.entries()]
|
|
86
|
+
.map(([personId, v]) => ({
|
|
87
|
+
personId,
|
|
88
|
+
name: this._lookupName(personId),
|
|
89
|
+
interactions: v.interactions,
|
|
90
|
+
byApp: Object.fromEntries(v.byApp),
|
|
91
|
+
}))
|
|
92
|
+
.sort((a, b) => b.interactions - a.interactions)
|
|
93
|
+
.slice(0, topN);
|
|
94
|
+
|
|
95
|
+
const summary = {
|
|
96
|
+
totalEvents: events.length,
|
|
97
|
+
appsActive: byApp.size,
|
|
98
|
+
period: { since: since || null, until: until || null },
|
|
99
|
+
topAppName: byAppArr.length ? byAppArr[0].app : null,
|
|
100
|
+
};
|
|
101
|
+
|
|
102
|
+
const result = {
|
|
103
|
+
skill: "analysis.overview",
|
|
104
|
+
summary,
|
|
105
|
+
byApp: byAppArr,
|
|
106
|
+
byType: [...byType.entries()].map(([type, count]) => ({ type, count })).sort((a, b) => b.count - a.count),
|
|
107
|
+
monthlyActivity: [...byMonth.entries()].map(([monthKey, count]) => ({ monthKey, count })).sort((a, b) => a.monthKey.localeCompare(b.monthKey)),
|
|
108
|
+
topContacts,
|
|
109
|
+
spending: {
|
|
110
|
+
total: Math.round(spendTotal * 100) / 100,
|
|
111
|
+
byDirection: Object.fromEntries(spendByDir),
|
|
112
|
+
currency: currency || null,
|
|
113
|
+
},
|
|
114
|
+
citations,
|
|
115
|
+
llm_commentary: null,
|
|
116
|
+
};
|
|
117
|
+
|
|
118
|
+
if (options.commentary !== false && this.llm && events.length > 0) {
|
|
119
|
+
result.llm_commentary = await this._commentary(result, options);
|
|
120
|
+
}
|
|
121
|
+
return result;
|
|
122
|
+
}
|
|
123
|
+
|
|
124
|
+
_canon(personId) {
|
|
125
|
+
const members = this.expandToMergeGroup(personId);
|
|
126
|
+
if (!members || members.length === 0) return personId;
|
|
127
|
+
// canonical = smallest id (stable across the group)
|
|
128
|
+
return [...members].sort()[0];
|
|
129
|
+
}
|
|
130
|
+
|
|
131
|
+
_lookupName(personId) {
|
|
132
|
+
try {
|
|
133
|
+
if (typeof this.vault.getPerson === "function") {
|
|
134
|
+
const p = this.vault.getPerson(personId);
|
|
135
|
+
if (p && Array.isArray(p.names) && p.names.length) return p.names[0];
|
|
136
|
+
}
|
|
137
|
+
} catch (_e) { /* optional */ }
|
|
138
|
+
return null;
|
|
139
|
+
}
|
|
140
|
+
|
|
141
|
+
async _commentary(result, options) {
|
|
142
|
+
const apps = result.byApp.slice(0, 5).map((a) => `${a.app}(${a.count})`).join(", ");
|
|
143
|
+
const types = result.byType.slice(0, 5).map((t) => `${t.type}(${t.count})`).join(", ");
|
|
144
|
+
const msg = `用户跨 ${result.summary.appsActive} 个 app 的数据汇总:
|
|
145
|
+
- 共 ${result.summary.totalEvents} 条事件;活跃 app(Top5): ${apps}
|
|
146
|
+
- 事件类型(Top5): ${types}
|
|
147
|
+
- 跨 app 消费合计: ${result.spending.total} ${result.spending.currency || ""}
|
|
148
|
+
- 高频联系人数: ${result.topContacts.length}
|
|
149
|
+
请用 3-4 句话,从「为个人决策提供依据」的角度,概括其数字生活重心与可关注点。中文。`;
|
|
150
|
+
return await this.callLlmCommentary([
|
|
151
|
+
{ role: "system", content: "你是个人数据中台的跨 app 洞察助手,基于事实给决策参考,克制不臆测。" },
|
|
152
|
+
{ role: "user", content: msg },
|
|
153
|
+
], { acceptNonLocal: options.acceptNonLocal });
|
|
154
|
+
}
|
|
155
|
+
}
|
|
156
|
+
|
|
157
|
+
module.exports = { OverviewSkill };
|
package/lib/analysis.js
CHANGED
|
@@ -22,6 +22,7 @@
|
|
|
22
22
|
"use strict";
|
|
23
23
|
|
|
24
24
|
const { parseQuery, extractEntityTerm, extractPersonNameCandidate } = require("./query-parser");
|
|
25
|
+
const { OverviewSkill } = require("./analysis-skills/overview");
|
|
25
26
|
const {
|
|
26
27
|
buildPrompt,
|
|
27
28
|
parseCitations,
|
|
@@ -212,6 +213,27 @@ class AnalysisEngine {
|
|
|
212
213
|
}
|
|
213
214
|
}
|
|
214
215
|
|
|
216
|
+
// Optional cross-app overview context (opt-in via options.crossApp) — runs
|
|
217
|
+
// the OverviewSkill aggregation and injects a compact summary so the LLM
|
|
218
|
+
// can answer cross-app / decision questions grounded in ALL apps' data.
|
|
219
|
+
let crossAppOverview;
|
|
220
|
+
if (options.crossApp) {
|
|
221
|
+
try {
|
|
222
|
+
const ov = await new OverviewSkill({ vault: this.vault }).run({
|
|
223
|
+
commentary: false,
|
|
224
|
+
topN: 5,
|
|
225
|
+
...(parsed.timeWindow &&
|
|
226
|
+
Number.isFinite(parsed.timeWindow.since) &&
|
|
227
|
+
Number.isFinite(parsed.timeWindow.until)
|
|
228
|
+
? { since: parsed.timeWindow.since, until: parsed.timeWindow.until }
|
|
229
|
+
: {}),
|
|
230
|
+
});
|
|
231
|
+
crossAppOverview = formatCrossAppOverview(ov);
|
|
232
|
+
} catch (_e) {
|
|
233
|
+
/* overview is best-effort context; never abort the ask */
|
|
234
|
+
}
|
|
235
|
+
}
|
|
236
|
+
|
|
215
237
|
// Build prompt.
|
|
216
238
|
const { messages, factIds, factCount, truncated } = buildPrompt({
|
|
217
239
|
question,
|
|
@@ -223,6 +245,7 @@ class AnalysisEngine {
|
|
|
223
245
|
vaultTotals: this._gatherVaultTotals(),
|
|
224
246
|
amountSummary:
|
|
225
247
|
parsed.intent === "sum-amount" ? this._gatherAmountSummary(parsed) : undefined,
|
|
248
|
+
crossAppOverview,
|
|
226
249
|
});
|
|
227
250
|
|
|
228
251
|
// Telemetry: post-cap prompt size + truncation count. If `truncated` > 0
|
|
@@ -820,8 +843,35 @@ class AnalysisEngine {
|
|
|
820
843
|
}
|
|
821
844
|
}
|
|
822
845
|
|
|
846
|
+
/**
|
|
847
|
+
* Compact, prompt-friendly rendering of an OverviewSkill result for the
|
|
848
|
+
* CROSS_APP_OVERVIEW context block. Aggregate signals only (no raw content).
|
|
849
|
+
*/
|
|
850
|
+
function formatCrossAppOverview(ov) {
|
|
851
|
+
if (!ov || !ov.summary) return null;
|
|
852
|
+
const top = (arr, k, v) =>
|
|
853
|
+
(arr || []).slice(0, 5).map((x) => `${x[k]}(${x[v]})`).join(", ") || "无";
|
|
854
|
+
const lines = [
|
|
855
|
+
`共 ${ov.summary.totalEvents} 事件,跨 ${ov.summary.appsActive} 个 app`,
|
|
856
|
+
`活跃 app(Top): ${top(ov.byApp, "app", "count")}`,
|
|
857
|
+
`事件类型(Top): ${top(ov.byType, "type", "count")}`,
|
|
858
|
+
];
|
|
859
|
+
if (ov.spending && Number.isFinite(ov.spending.total) && ov.spending.total !== 0) {
|
|
860
|
+
lines.push(`跨 app 消费合计: ${ov.spending.total} ${ov.spending.currency || ""}`.trim());
|
|
861
|
+
}
|
|
862
|
+
if (Array.isArray(ov.topContacts) && ov.topContacts.length > 0) {
|
|
863
|
+
const c = ov.topContacts
|
|
864
|
+
.slice(0, 5)
|
|
865
|
+
.map((x) => `${x.name || x.personId}(${x.interactions})`)
|
|
866
|
+
.join(", ");
|
|
867
|
+
lines.push(`高频联系人(Top): ${c}`);
|
|
868
|
+
}
|
|
869
|
+
return lines.join("\n");
|
|
870
|
+
}
|
|
871
|
+
|
|
823
872
|
module.exports = {
|
|
824
873
|
AnalysisEngine,
|
|
874
|
+
formatCrossAppOverview,
|
|
825
875
|
DEFAULT_MAX_FACTS,
|
|
826
876
|
DEFAULT_MAX_QUERY_LIMIT,
|
|
827
877
|
LATEST_INTENT_FACT_LIMIT,
|
|
@@ -0,0 +1,185 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
/*
|
|
3
|
+
* PDH SQLite leaf-page record salvager — library form.
|
|
4
|
+
*
|
|
5
|
+
* Method B (`/proc/<pid>/mem` 内存扫描) dumps decrypted pages, but for DBs whose
|
|
6
|
+
* page cache is SCATTERED (not contiguous mmap) the rebuilt file is "malformed"
|
|
7
|
+
* (valid header, broken b-tree). This salvages the DATA anyway: it scans a dump
|
|
8
|
+
* (or concatenated dumps) for SQLite **table b-tree leaf pages** (type 0x0D) and
|
|
9
|
+
* parses each page's records directly — order-independent, exactly what sqlite3
|
|
10
|
+
* `.recover` does, but standalone (platform-tools sqlite3 lacks .recover).
|
|
11
|
+
*
|
|
12
|
+
* This file is the bundle-able home for the parser; the standalone CLI tool
|
|
13
|
+
* `scripts/android/pdh-sqlite-leaf-salvage.js` re-exports from here. Lives in
|
|
14
|
+
* pdh lib so the Android cc bundle (and `cc hub salvage`) can call it on-device.
|
|
15
|
+
*
|
|
16
|
+
* Output: array of {rowid, cols:[...]} — raw positional column tuples (leaf
|
|
17
|
+
* pages carry no column names). Map to a schema downstream (salvage-mapper.js).
|
|
18
|
+
*
|
|
19
|
+
* Authorization: only on data you are entitled to (your own device/account).
|
|
20
|
+
* Docs: docs/internal/pdh-db-decryption-runbook.md (Method B + reconstruction).
|
|
21
|
+
*/
|
|
22
|
+
const fs = require("node:fs");
|
|
23
|
+
|
|
24
|
+
function readVarint(buf, off) {
|
|
25
|
+
// SQLite varint: up to 9 bytes, big-endian, high bit = continuation.
|
|
26
|
+
let result = 0n;
|
|
27
|
+
let i = 0;
|
|
28
|
+
for (; i < 8; i++) {
|
|
29
|
+
const b = buf[off + i];
|
|
30
|
+
if (b === undefined) return [null, off + i];
|
|
31
|
+
result = (result << 7n) | BigInt(b & 0x7f);
|
|
32
|
+
if ((b & 0x80) === 0) return [result, off + i + 1];
|
|
33
|
+
}
|
|
34
|
+
// 9th byte uses all 8 bits
|
|
35
|
+
const b9 = buf[off + 8];
|
|
36
|
+
if (b9 === undefined) return [null, off + 9];
|
|
37
|
+
result = (result << 8n) | BigInt(b9);
|
|
38
|
+
return [result, off + 9];
|
|
39
|
+
}
|
|
40
|
+
|
|
41
|
+
function serialTypeSize(t) {
|
|
42
|
+
// t is a BigInt
|
|
43
|
+
const n = Number(t);
|
|
44
|
+
if (n === 0 || n === 8 || n === 9 || n === 12 || n === 13) {
|
|
45
|
+
return n >= 12 ? (n % 2 === 0 ? (n - 12) / 2 : (n - 13) / 2) : 0;
|
|
46
|
+
}
|
|
47
|
+
if (n === 1) return 1;
|
|
48
|
+
if (n === 2) return 2;
|
|
49
|
+
if (n === 3) return 3;
|
|
50
|
+
if (n === 4) return 4;
|
|
51
|
+
if (n === 5) return 6;
|
|
52
|
+
if (n === 6) return 8;
|
|
53
|
+
if (n === 7) return 8;
|
|
54
|
+
if (n >= 12) return n % 2 === 0 ? (n - 12) / 2 : (n - 13) / 2;
|
|
55
|
+
return 0;
|
|
56
|
+
}
|
|
57
|
+
|
|
58
|
+
function readValue(buf, off, t) {
|
|
59
|
+
const n = Number(t);
|
|
60
|
+
const sz = serialTypeSize(t);
|
|
61
|
+
if (n === 0) return [null, off];
|
|
62
|
+
if (n === 8) return [0, off];
|
|
63
|
+
if (n === 9) return [1, off];
|
|
64
|
+
if (n >= 1 && n <= 6) {
|
|
65
|
+
let v = 0n;
|
|
66
|
+
for (let i = 0; i < sz; i++) v = (v << 8n) | BigInt(buf[off + i] || 0);
|
|
67
|
+
// sign-extend
|
|
68
|
+
const bits = BigInt(sz * 8);
|
|
69
|
+
if (v >= 1n << (bits - 1n)) v -= 1n << bits;
|
|
70
|
+
const num = Number(v);
|
|
71
|
+
return [Number.isSafeInteger(num) ? num : v.toString(), off + sz];
|
|
72
|
+
}
|
|
73
|
+
if (n === 7) return [buf.readDoubleBE(off), off + 8];
|
|
74
|
+
if (n >= 13 && n % 2 === 1) {
|
|
75
|
+
// text
|
|
76
|
+
return [buf.toString("utf8", off, off + sz), off + sz];
|
|
77
|
+
}
|
|
78
|
+
// blob (n>=12 even) — return length marker, not raw bytes
|
|
79
|
+
return [`<blob:${sz}>`, off + sz];
|
|
80
|
+
}
|
|
81
|
+
|
|
82
|
+
// Parse one table-leaf page at `base`. Returns array of {rowid, cols} or null.
|
|
83
|
+
function parseLeafPage(buf, base, pageSize, minCols) {
|
|
84
|
+
if (buf[base] !== 0x0d) return null; // 0x0D = table b-tree leaf
|
|
85
|
+
if (base + pageSize > buf.length) return null;
|
|
86
|
+
const numCells = (buf[base + 3] << 8) | buf[base + 4];
|
|
87
|
+
if (numCells <= 0 || numCells > Math.floor(pageSize / 4)) return null;
|
|
88
|
+
// cell-content-start (bytes 5-6; 0 means 65536) — must sit after the cell
|
|
89
|
+
// pointer array and within the page. This guard rejects the false positives a
|
|
90
|
+
// finer (unaligned) stride would otherwise hit on random 0x0D bytes.
|
|
91
|
+
let cellStart = (buf[base + 5] << 8) | buf[base + 6];
|
|
92
|
+
if (cellStart === 0) cellStart = 65536;
|
|
93
|
+
const hdrEndMin = 8 + numCells * 2;
|
|
94
|
+
if (cellStart < hdrEndMin || cellStart > pageSize) return null;
|
|
95
|
+
const out = [];
|
|
96
|
+
const ptrBase = base + 8;
|
|
97
|
+
for (let c = 0; c < numCells; c++) {
|
|
98
|
+
const ptr = (buf[ptrBase + c * 2] << 8) | buf[ptrBase + c * 2 + 1];
|
|
99
|
+
if (ptr < 8 || ptr >= pageSize) continue;
|
|
100
|
+
let off = base + ptr;
|
|
101
|
+
try {
|
|
102
|
+
const [payloadLen, o1] = readVarint(buf, off); off = o1;
|
|
103
|
+
if (payloadLen === null || payloadLen <= 0n || payloadLen > BigInt(pageSize)) continue;
|
|
104
|
+
const [rowid, o2] = readVarint(buf, off); off = o2;
|
|
105
|
+
// record header
|
|
106
|
+
const recStart = off;
|
|
107
|
+
const [hdrLen, o3] = readVarint(buf, off); off = o3;
|
|
108
|
+
if (hdrLen === null || hdrLen <= 0n) continue;
|
|
109
|
+
const hdrEnd = recStart + Number(hdrLen);
|
|
110
|
+
const serials = [];
|
|
111
|
+
while (off < hdrEnd) {
|
|
112
|
+
const [st, oN] = readVarint(buf, off); off = oN;
|
|
113
|
+
if (st === null) break;
|
|
114
|
+
serials.push(st);
|
|
115
|
+
}
|
|
116
|
+
let vOff = hdrEnd;
|
|
117
|
+
const cols = [];
|
|
118
|
+
for (const st of serials) {
|
|
119
|
+
const [val, vN] = readValue(buf, vOff, st);
|
|
120
|
+
cols.push(val); vOff = vN;
|
|
121
|
+
}
|
|
122
|
+
if (cols.length >= minCols) out.push({ rowid: rowid === null ? null : rowid.toString(), cols });
|
|
123
|
+
} catch (_e) { /* skip malformed cell */ }
|
|
124
|
+
}
|
|
125
|
+
return out.length ? out : null;
|
|
126
|
+
}
|
|
127
|
+
|
|
128
|
+
/**
|
|
129
|
+
* Scan an in-memory dump buffer for table-leaf pages and return all salvaged
|
|
130
|
+
* records (deduped). Engine-agnostic — works on any decrypted-page dump.
|
|
131
|
+
*
|
|
132
|
+
* @param {Buffer} buf
|
|
133
|
+
* @param {{pageSize?: number, minCols?: number, unaligned?: boolean, stride?: number}} [opts]
|
|
134
|
+
* - pageSize: SQLite page size (default 4096)
|
|
135
|
+
* - minCols: drop records with fewer columns (default 3)
|
|
136
|
+
* - unaligned: scan at a finer stride (512) to catch pages not 4096-aligned
|
|
137
|
+
* in a malloc'd page cache; ~8x slower, recovers pages the aligned scan
|
|
138
|
+
* misses. The strengthened header validation rejects the extra false
|
|
139
|
+
* positives.
|
|
140
|
+
* - stride: explicit scan stride (overrides the unaligned default)
|
|
141
|
+
* @returns {{records: Array<{rowid: string|null, cols: any[]}>, pages: number}}
|
|
142
|
+
*/
|
|
143
|
+
function salvageBuffer(buf, opts = {}) {
|
|
144
|
+
if (!Buffer.isBuffer(buf)) {
|
|
145
|
+
throw new TypeError("salvageBuffer: buf must be a Buffer");
|
|
146
|
+
}
|
|
147
|
+
const pageSize = Number.isFinite(opts.pageSize) && opts.pageSize > 0 ? opts.pageSize : 4096;
|
|
148
|
+
const minCols = Number.isFinite(opts.minCols) && opts.minCols >= 0 ? opts.minCols : 3;
|
|
149
|
+
const unaligned = !!opts.unaligned;
|
|
150
|
+
const stride = Number.isFinite(opts.stride) && opts.stride > 0
|
|
151
|
+
? opts.stride
|
|
152
|
+
: (unaligned ? 512 : pageSize);
|
|
153
|
+
|
|
154
|
+
let pages = 0;
|
|
155
|
+
const records = [];
|
|
156
|
+
const seen = new Set(); // dedup overlapping finds
|
|
157
|
+
for (let base = 0; base + 8 <= buf.length; base += stride) {
|
|
158
|
+
if (buf[base] !== 0x0d) continue; // cheap pre-filter before full parse
|
|
159
|
+
const recs = parseLeafPage(buf, base, pageSize, minCols);
|
|
160
|
+
if (!recs) continue;
|
|
161
|
+
pages++;
|
|
162
|
+
for (const r of recs) {
|
|
163
|
+
const key = r.rowid + " " + JSON.stringify(r.cols);
|
|
164
|
+
if (seen.has(key)) continue;
|
|
165
|
+
seen.add(key);
|
|
166
|
+
records.push(r);
|
|
167
|
+
}
|
|
168
|
+
}
|
|
169
|
+
return { records, pages };
|
|
170
|
+
}
|
|
171
|
+
|
|
172
|
+
/** Read a dump file and salvage records from it. See {@link salvageBuffer}. */
|
|
173
|
+
function salvageFile(filePath, opts = {}) {
|
|
174
|
+
const buf = fs.readFileSync(filePath);
|
|
175
|
+
return salvageBuffer(buf, opts);
|
|
176
|
+
}
|
|
177
|
+
|
|
178
|
+
module.exports = {
|
|
179
|
+
readVarint,
|
|
180
|
+
serialTypeSize,
|
|
181
|
+
readValue,
|
|
182
|
+
parseLeafPage,
|
|
183
|
+
salvageBuffer,
|
|
184
|
+
salvageFile,
|
|
185
|
+
};
|