@chainlesschain/personal-data-hub 0.4.24 → 0.4.26

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -9,10 +9,23 @@
9
9
  * — account.uid is OPTIONAL at construction (the snapshot file carries
10
10
  * account in payload).
11
11
  *
12
- * 2. sqlite mode (opts.dbPath, legacy): Phase 13.2 device-pull path —
13
- * reads Weibo Android app's SQLite (history / post / status / search_
14
- * history). Preserved for backward compat with desktop sqlite-mode
15
- * users; account.uid REQUIRED in this mode.
12
+ * 2. sqlite mode (opts.dbPath): desktop device-pull path — reads the Weibo
13
+ * Android app's plain SQLite DB `com.sina.weibo/databases/sina_weibo`.
14
+ * account.uid REQUIRED in this mode.
15
+ *
16
+ * Table/column names are DEVICE-VERIFIED against a real install
17
+ * (Redmi M2104K10AC, 微博 16.5.3, 2026-06-16):
18
+ * - posts → `home_table` (timeline cache; own posts = uid==selfUid)
19
+ * cols: mblogid / uid / content / time / rtnum /
20
+ * commentnum / attitudenum / src / longitude / latitude
21
+ * - favourites → `like_table` cols: mblogid / content / time / nick
22
+ * - follows → `follower_table` (following=1 ⇒ accounts the user
23
+ * follows) cols: user_id / screen_name / remark / gender
24
+ * The legacy `post`/`status`/`search_history` queries are kept as
25
+ * FALLBACKS (older builds) — on a modern device those tables don't
26
+ * exist so the adapter previously collected ZERO. Row VALUES were not
27
+ * validated (verification account was empty); column semantics use the
28
+ * standard Weibo schema. See memory pdh_collector_completeness_audit.
16
29
  *
17
30
  * Snapshot schema (mirrors WeiboLocalCollector.SNAPSHOT_SCHEMA_VERSION):
18
31
  *
@@ -44,7 +57,7 @@ const {
44
57
  } = require("../../constants");
45
58
 
46
59
  const NAME = "social-weibo";
47
- const VERSION = "0.6.0";
60
+ const VERSION = "0.7.0";
48
61
  const SNAPSHOT_SCHEMA_VERSION = 1;
49
62
 
50
63
  const KIND_POST = "post";
@@ -246,21 +259,64 @@ class WeiboAdapter {
246
259
  ? this._deps.dbDriverFactory()
247
260
  : require("better-sqlite3-multiple-ciphers");
248
261
  const db = new Driver(dbPath, { readonly: true });
262
+ // selfUid sanitised to digits — interpolated into a WHERE clause and
263
+ // sourced from wiring config (numeric uin). Defensive against injection.
264
+ const selfUid = String(this.account.uid).replace(/[^0-9]/g, "");
249
265
 
250
266
  try {
267
+ // POSTS — device-verified `home_table` (own posts = uid==selfUid);
268
+ // legacy `post`/`status` kept as fallback for older builds.
251
269
  const posts =
252
- trySelect(db, "SELECT * FROM post ORDER BY created_at DESC LIMIT 5000")
253
- || trySelect(db, "SELECT * FROM status ORDER BY created_at DESC LIMIT 5000")
254
- || [];
270
+ (selfUid &&
271
+ trySelect(
272
+ db,
273
+ `SELECT * FROM home_table WHERE uid='${selfUid}' ORDER BY time DESC LIMIT 5000`,
274
+ )) ||
275
+ trySelect(db, "SELECT * FROM post ORDER BY created_at DESC LIMIT 5000") ||
276
+ trySelect(db, "SELECT * FROM status ORDER BY created_at DESC LIMIT 5000") ||
277
+ [];
255
278
  for (const row of posts) {
256
279
  yield {
257
280
  adapter: NAME,
258
- originalId: `post-${row.id || row.mid || row.idstr}`,
259
- capturedAt: parseTime(row.created_at || row.time),
281
+ originalId: `post-${row.mblogid || row.id || row.mid || row.idstr}`,
282
+ capturedAt: parseTime(row.time || row.created_at),
260
283
  payload: { row, kind: KIND_POST },
261
284
  };
262
285
  }
263
286
 
287
+ // FAVOURITES — device-verified `like_table` (the account's likes).
288
+ // Legacy sqlite had no favourite path (folded into posts pre-A8).
289
+ const favourites =
290
+ trySelect(db, "SELECT * FROM like_table ORDER BY time DESC LIMIT 5000") || [];
291
+ for (const row of favourites) {
292
+ yield {
293
+ adapter: NAME,
294
+ originalId: `fav-${row.mblogid || row.id}`,
295
+ capturedAt: parseTime(row.time),
296
+ payload: { row, kind: KIND_FAVOURITE },
297
+ };
298
+ }
299
+
300
+ // FOLLOWS — device-verified `follower_table`; following=1 ⇒ accounts
301
+ // the user follows (vs followers). Fallback to the whole table.
302
+ const follows =
303
+ trySelect(
304
+ db,
305
+ "SELECT * FROM follower_table WHERE following=1 ORDER BY user_id LIMIT 5000",
306
+ ) ||
307
+ trySelect(db, "SELECT * FROM follower_table LIMIT 5000") ||
308
+ [];
309
+ for (const row of follows) {
310
+ yield {
311
+ adapter: NAME,
312
+ originalId: `follow-${row.user_id || row.id}`,
313
+ capturedAt: parseTime(row.time) || Date.now(),
314
+ payload: { row, kind: KIND_FOLLOW },
315
+ };
316
+ }
317
+
318
+ // SEARCH — legacy only (`search_history` doesn't exist on modern
319
+ // weibo; trySelect returns null gracefully, loop is skipped).
264
320
  const searches =
265
321
  trySelect(db, "SELECT * FROM search_history ORDER BY time DESC LIMIT 5000")
266
322
  || [];
@@ -342,8 +398,9 @@ function normalizePost(p, raw, ingestedAt) {
342
398
  // Sqlite mode: { kind:"post", row: { text, mid, ... } }
343
399
  const row = p.row || p;
344
400
  const isSnapshot = !p.row;
345
- const text = row.text || "";
346
- const mid = row.mid || row.id || row.idstr || null;
401
+ // home_table (device-verified) stores body in `content`, id in `mblogid`.
402
+ const text = row.text || row.content || "";
403
+ const mid = row.mid || row.mblogid || row.id || row.idstr || null;
347
404
  const occurredAt =
348
405
  parseTime(row.created_at || row.createdAt || row.time || raw.capturedAt) ||
349
406
  ingestedAt;
@@ -369,13 +426,13 @@ function normalizePost(p, raw, ingestedAt) {
369
426
  weiboMid: mid,
370
427
  repostsCount:
371
428
  row.repostsCount != null ? row.repostsCount
372
- : row.reposts_count || row.repost || 0,
429
+ : row.reposts_count || row.repost || row.rtnum || 0,
373
430
  commentsCount:
374
431
  row.commentsCount != null ? row.commentsCount
375
- : row.comments_count || row.comments || 0,
432
+ : row.comments_count || row.comments || row.commentnum || 0,
376
433
  likesCount:
377
434
  row.likesCount != null ? row.likesCount
378
- : row.attitudes_count || row.likes || 0,
435
+ : row.attitudes_count || row.likes || row.attitudenum || 0,
379
436
  picCount: row.picCount || row.pic_num || 0,
380
437
  source: row.source || null,
381
438
  location: row.location || row.geo || null,
@@ -387,13 +444,21 @@ function normalizePost(p, raw, ingestedAt) {
387
444
  }
388
445
 
389
446
  function normalizeFavourite(p, raw, ingestedAt) {
390
- // Snapshot only sqlite mode has no favourite kind (legacy parser merged
391
- // favourites into posts pre-A8). Payload: { kind:"favourite", mid, text,
392
- // capturedAt, authorScreenName }
393
- const text = p.text || "";
394
- const mid = p.mid || null;
395
- const occurredAt = parseTime(p.capturedAt) || raw.capturedAt || ingestedAt;
396
- const source = buildSource(raw, occurredAt, CAPTURED_BY.API);
447
+ // Snapshot: { kind:"favourite", mid, text, capturedAt, authorScreenName }
448
+ // Sqlite (device-verified `like_table`): { row: { mblogid, content, time,
449
+ // nick } }. Both shapes handled below.
450
+ const row = p.row || null;
451
+ const isSqlite = !!row;
452
+ const text = isSqlite ? (row.content || "") : (p.text || "");
453
+ const mid = isSqlite ? (row.mblogid || row.id || null) : (p.mid || null);
454
+ const occurredAt = isSqlite
455
+ ? (parseTime(row.time) || raw.capturedAt || ingestedAt)
456
+ : (parseTime(p.capturedAt) || raw.capturedAt || ingestedAt);
457
+ const source = buildSource(
458
+ raw,
459
+ occurredAt,
460
+ isSqlite ? CAPTURED_BY.SQLITE : CAPTURED_BY.API,
461
+ );
397
462
  return {
398
463
  events: [{
399
464
  id: newId(),
@@ -410,7 +475,9 @@ function normalizeFavourite(p, raw, ingestedAt) {
410
475
  extra: {
411
476
  platform: "weibo",
412
477
  weiboMid: mid,
413
- authorScreenName: p.authorScreenName || null,
478
+ authorScreenName: isSqlite
479
+ ? (row.nick || null)
480
+ : (p.authorScreenName || null),
414
481
  },
415
482
  }],
416
483
  persons: [], places: [], items: [], topics: [],
@@ -418,15 +485,28 @@ function normalizeFavourite(p, raw, ingestedAt) {
418
485
  }
419
486
 
420
487
  function normalizeFollow(p, raw, ingestedAt) {
421
- // Snapshot only — payload: { kind:"follow", uid, screenName, description,
422
- // avatarUrl, capturedAt }
488
+ // Snapshot: { kind:"follow", uid, screenName, description, avatarUrl,
489
+ // capturedAt }
490
+ // Sqlite (device-verified `follower_table`): { row: { user_id|id,
491
+ // screen_name, remark, gender } }. Both shapes handled below.
492
+ const row = p.row || null;
493
+ const isSqlite = !!row;
494
+ const rawUid = isSqlite ? (row.user_id || row.id) : p.uid;
423
495
  const followUid =
424
- (typeof p.uid === "number" && p.uid) ||
425
- (typeof p.uid === "string" && p.uid.length > 0 && p.uid) ||
496
+ (typeof rawUid === "number" && rawUid) ||
497
+ (typeof rawUid === "string" && rawUid.length > 0 && rawUid) ||
426
498
  `unknown-${newId()}`;
427
- const screenName = p.screenName || "(unnamed)";
428
- const occurredAt = parseTime(p.capturedAt) || raw.capturedAt || ingestedAt;
429
- const source = buildSource(raw, occurredAt, CAPTURED_BY.API);
499
+ const screenName = isSqlite
500
+ ? (row.screen_name || row.remark || "(unnamed)")
501
+ : (p.screenName || "(unnamed)");
502
+ const occurredAt = isSqlite
503
+ ? (parseTime(row.time) || raw.capturedAt || ingestedAt)
504
+ : (parseTime(p.capturedAt) || raw.capturedAt || ingestedAt);
505
+ const source = buildSource(
506
+ raw,
507
+ occurredAt,
508
+ isSqlite ? CAPTURED_BY.SQLITE : CAPTURED_BY.API,
509
+ );
430
510
  const person = {
431
511
  id: `person-weibo-${followUid}`,
432
512
  type: ENTITY_TYPES.PERSON,
@@ -14,6 +14,7 @@ const { RelationsSkill } = require("./relations");
14
14
  const { FootprintSkill } = require("./footprint");
15
15
  const { InterestsSkill } = require("./interests");
16
16
  const { TimelineSkill } = require("./timeline");
17
+ const { OverviewSkill } = require("./overview");
17
18
 
18
19
  const SKILL_REGISTRY = Object.freeze({
19
20
  "analysis.spending": SpendingSkill,
@@ -21,6 +22,7 @@ const SKILL_REGISTRY = Object.freeze({
21
22
  "analysis.footprint": FootprintSkill,
22
23
  "analysis.interests": InterestsSkill,
23
24
  "analysis.timeline": TimelineSkill,
25
+ "analysis.overview": OverviewSkill,
24
26
  });
25
27
 
26
28
  const SKILL_NAMES = Object.freeze(Object.keys(SKILL_REGISTRY));
@@ -50,6 +52,7 @@ module.exports = {
50
52
  FootprintSkill,
51
53
  InterestsSkill,
52
54
  TimelineSkill,
55
+ OverviewSkill,
53
56
  SKILL_REGISTRY,
54
57
  SKILL_NAMES,
55
58
  ANALYSIS_SKILL_NAMES: SKILL_NAMES,
@@ -0,0 +1,157 @@
1
+ /**
2
+ * analysis.overview — cross-app unified snapshot for decision support.
3
+ *
4
+ * The de-silo capstone: every adapter normalizes into one vault, so this single
5
+ * skill aggregates ALL apps' Events into one picture — activity by app + by type,
6
+ * top relationships (merge-group aware, so the same person across WeChat/Douyin/
7
+ * Weibo counts once), spending across shopping/finance apps, and time trend.
8
+ * Gives the personal AI a unified "基于跨 app 数据" basis for decisions.
9
+ *
10
+ * Output:
11
+ * {
12
+ * skill, summary: { totalEvents, appsActive, period, topAppName },
13
+ * byApp: [{ app, count }], byType: [{ type, count }],
14
+ * monthlyActivity: [{ monthKey, count }],
15
+ * topContacts: [{ personId, name, interactions, byApp }],
16
+ * spending: { total, byDirection, currency },
17
+ * citations, llm_commentary,
18
+ * }
19
+ */
20
+ "use strict";
21
+
22
+ const { AnalysisSkill } = require("./base");
23
+
24
+ const SPEND_SUBTYPES = new Set([
25
+ "payment", "transfer", "refund", "utility", "redenvelope", "investment", "income", "order",
26
+ ]);
27
+
28
+ class OverviewSkill extends AnalysisSkill {
29
+ constructor(opts) {
30
+ super({ ...opts, name: "analysis.overview" });
31
+ }
32
+
33
+ async run(options = {}) {
34
+ const { since, until } = this.resolveTimeWindow(options);
35
+ const topN = Number.isFinite(options.topN) && options.topN > 0 ? options.topN : 10;
36
+
37
+ const q = { limit: Number.isFinite(options.limit) ? options.limit : 50_000 };
38
+ if (since != null) q.since = since;
39
+ if (until != null) q.until = until;
40
+ const events = this.vault.queryEvents(q) || [];
41
+
42
+ const byApp = new Map();
43
+ const byType = new Map();
44
+ const byMonth = new Map();
45
+ const contacts = new Map(); // canonicalPersonId → { interactions, byApp:Map }
46
+ let spendTotal = 0;
47
+ const spendByDir = new Map();
48
+ let currency = null;
49
+ const citations = [];
50
+
51
+ for (const e of events) {
52
+ const app = (e.source && e.source.adapter) || "unknown";
53
+ byApp.set(app, (byApp.get(app) || 0) + 1);
54
+ const type = e.subtype || "other";
55
+ byType.set(type, (byType.get(type) || 0) + 1);
56
+ if (Number.isFinite(e.occurredAt)) {
57
+ const d = new Date(e.occurredAt);
58
+ if (Number.isFinite(d.getTime())) {
59
+ const m = `${d.getFullYear()}-${String(d.getMonth() + 1).padStart(2, "0")}`;
60
+ byMonth.set(m, (byMonth.get(m) || 0) + 1);
61
+ }
62
+ }
63
+ // relationships (actor + participants), merge-group canonicalized
64
+ const ids = (Array.isArray(e.participants) ? e.participants : []).concat(e.actor ? [e.actor] : []);
65
+ for (const pid of ids) {
66
+ if (!pid || pid === "person-self") continue;
67
+ const canon = this._canon(pid);
68
+ const cur = contacts.get(canon) || { interactions: 0, byApp: new Map() };
69
+ cur.interactions += 1;
70
+ cur.byApp.set(app, (cur.byApp.get(app) || 0) + 1);
71
+ contacts.set(canon, cur);
72
+ }
73
+ // spending
74
+ if (SPEND_SUBTYPES.has(type) && e.content && e.content.amount && Number.isFinite(e.content.amount.value)) {
75
+ const v = e.content.amount.value;
76
+ spendTotal += v;
77
+ const dir = e.content.amount.direction || "unknown";
78
+ spendByDir.set(dir, (spendByDir.get(dir) || 0) + v);
79
+ if (!currency && e.content.amount.currency) currency = e.content.amount.currency;
80
+ }
81
+ if (citations.length < 50) citations.push(e.id);
82
+ }
83
+
84
+ const byAppArr = [...byApp.entries()].map(([app, count]) => ({ app, count })).sort((a, b) => b.count - a.count);
85
+ const topContacts = [...contacts.entries()]
86
+ .map(([personId, v]) => ({
87
+ personId,
88
+ name: this._lookupName(personId),
89
+ interactions: v.interactions,
90
+ byApp: Object.fromEntries(v.byApp),
91
+ }))
92
+ .sort((a, b) => b.interactions - a.interactions)
93
+ .slice(0, topN);
94
+
95
+ const summary = {
96
+ totalEvents: events.length,
97
+ appsActive: byApp.size,
98
+ period: { since: since || null, until: until || null },
99
+ topAppName: byAppArr.length ? byAppArr[0].app : null,
100
+ };
101
+
102
+ const result = {
103
+ skill: "analysis.overview",
104
+ summary,
105
+ byApp: byAppArr,
106
+ byType: [...byType.entries()].map(([type, count]) => ({ type, count })).sort((a, b) => b.count - a.count),
107
+ monthlyActivity: [...byMonth.entries()].map(([monthKey, count]) => ({ monthKey, count })).sort((a, b) => a.monthKey.localeCompare(b.monthKey)),
108
+ topContacts,
109
+ spending: {
110
+ total: Math.round(spendTotal * 100) / 100,
111
+ byDirection: Object.fromEntries(spendByDir),
112
+ currency: currency || null,
113
+ },
114
+ citations,
115
+ llm_commentary: null,
116
+ };
117
+
118
+ if (options.commentary !== false && this.llm && events.length > 0) {
119
+ result.llm_commentary = await this._commentary(result, options);
120
+ }
121
+ return result;
122
+ }
123
+
124
+ _canon(personId) {
125
+ const members = this.expandToMergeGroup(personId);
126
+ if (!members || members.length === 0) return personId;
127
+ // canonical = smallest id (stable across the group)
128
+ return [...members].sort()[0];
129
+ }
130
+
131
+ _lookupName(personId) {
132
+ try {
133
+ if (typeof this.vault.getPerson === "function") {
134
+ const p = this.vault.getPerson(personId);
135
+ if (p && Array.isArray(p.names) && p.names.length) return p.names[0];
136
+ }
137
+ } catch (_e) { /* optional */ }
138
+ return null;
139
+ }
140
+
141
+ async _commentary(result, options) {
142
+ const apps = result.byApp.slice(0, 5).map((a) => `${a.app}(${a.count})`).join(", ");
143
+ const types = result.byType.slice(0, 5).map((t) => `${t.type}(${t.count})`).join(", ");
144
+ const msg = `用户跨 ${result.summary.appsActive} 个 app 的数据汇总:
145
+ - 共 ${result.summary.totalEvents} 条事件;活跃 app(Top5): ${apps}
146
+ - 事件类型(Top5): ${types}
147
+ - 跨 app 消费合计: ${result.spending.total} ${result.spending.currency || ""}
148
+ - 高频联系人数: ${result.topContacts.length}
149
+ 请用 3-4 句话,从「为个人决策提供依据」的角度,概括其数字生活重心与可关注点。中文。`;
150
+ return await this.callLlmCommentary([
151
+ { role: "system", content: "你是个人数据中台的跨 app 洞察助手,基于事实给决策参考,克制不臆测。" },
152
+ { role: "user", content: msg },
153
+ ], { acceptNonLocal: options.acceptNonLocal });
154
+ }
155
+ }
156
+
157
+ module.exports = { OverviewSkill };
package/lib/analysis.js CHANGED
@@ -22,6 +22,7 @@
22
22
  "use strict";
23
23
 
24
24
  const { parseQuery, extractEntityTerm, extractPersonNameCandidate } = require("./query-parser");
25
+ const { OverviewSkill } = require("./analysis-skills/overview");
25
26
  const {
26
27
  buildPrompt,
27
28
  parseCitations,
@@ -212,6 +213,27 @@ class AnalysisEngine {
212
213
  }
213
214
  }
214
215
 
216
+ // Optional cross-app overview context (opt-in via options.crossApp) — runs
217
+ // the OverviewSkill aggregation and injects a compact summary so the LLM
218
+ // can answer cross-app / decision questions grounded in ALL apps' data.
219
+ let crossAppOverview;
220
+ if (options.crossApp) {
221
+ try {
222
+ const ov = await new OverviewSkill({ vault: this.vault }).run({
223
+ commentary: false,
224
+ topN: 5,
225
+ ...(parsed.timeWindow &&
226
+ Number.isFinite(parsed.timeWindow.since) &&
227
+ Number.isFinite(parsed.timeWindow.until)
228
+ ? { since: parsed.timeWindow.since, until: parsed.timeWindow.until }
229
+ : {}),
230
+ });
231
+ crossAppOverview = formatCrossAppOverview(ov);
232
+ } catch (_e) {
233
+ /* overview is best-effort context; never abort the ask */
234
+ }
235
+ }
236
+
215
237
  // Build prompt.
216
238
  const { messages, factIds, factCount, truncated } = buildPrompt({
217
239
  question,
@@ -223,6 +245,7 @@ class AnalysisEngine {
223
245
  vaultTotals: this._gatherVaultTotals(),
224
246
  amountSummary:
225
247
  parsed.intent === "sum-amount" ? this._gatherAmountSummary(parsed) : undefined,
248
+ crossAppOverview,
226
249
  });
227
250
 
228
251
  // Telemetry: post-cap prompt size + truncation count. If `truncated` > 0
@@ -820,8 +843,35 @@ class AnalysisEngine {
820
843
  }
821
844
  }
822
845
 
846
+ /**
847
+ * Compact, prompt-friendly rendering of an OverviewSkill result for the
848
+ * CROSS_APP_OVERVIEW context block. Aggregate signals only (no raw content).
849
+ */
850
+ function formatCrossAppOverview(ov) {
851
+ if (!ov || !ov.summary) return null;
852
+ const top = (arr, k, v) =>
853
+ (arr || []).slice(0, 5).map((x) => `${x[k]}(${x[v]})`).join(", ") || "无";
854
+ const lines = [
855
+ `共 ${ov.summary.totalEvents} 事件,跨 ${ov.summary.appsActive} 个 app`,
856
+ `活跃 app(Top): ${top(ov.byApp, "app", "count")}`,
857
+ `事件类型(Top): ${top(ov.byType, "type", "count")}`,
858
+ ];
859
+ if (ov.spending && Number.isFinite(ov.spending.total) && ov.spending.total !== 0) {
860
+ lines.push(`跨 app 消费合计: ${ov.spending.total} ${ov.spending.currency || ""}`.trim());
861
+ }
862
+ if (Array.isArray(ov.topContacts) && ov.topContacts.length > 0) {
863
+ const c = ov.topContacts
864
+ .slice(0, 5)
865
+ .map((x) => `${x.name || x.personId}(${x.interactions})`)
866
+ .join(", ");
867
+ lines.push(`高频联系人(Top): ${c}`);
868
+ }
869
+ return lines.join("\n");
870
+ }
871
+
823
872
  module.exports = {
824
873
  AnalysisEngine,
874
+ formatCrossAppOverview,
825
875
  DEFAULT_MAX_FACTS,
826
876
  DEFAULT_MAX_QUERY_LIMIT,
827
877
  LATEST_INTENT_FACT_LIMIT,
@@ -0,0 +1,185 @@
1
+ "use strict";
2
+ /*
3
+ * PDH SQLite leaf-page record salvager — library form.
4
+ *
5
+ * Method B (`/proc/<pid>/mem` 内存扫描) dumps decrypted pages, but for DBs whose
6
+ * page cache is SCATTERED (not contiguous mmap) the rebuilt file is "malformed"
7
+ * (valid header, broken b-tree). This salvages the DATA anyway: it scans a dump
8
+ * (or concatenated dumps) for SQLite **table b-tree leaf pages** (type 0x0D) and
9
+ * parses each page's records directly — order-independent, exactly what sqlite3
10
+ * `.recover` does, but standalone (platform-tools sqlite3 lacks .recover).
11
+ *
12
+ * This file is the bundle-able home for the parser; the standalone CLI tool
13
+ * `scripts/android/pdh-sqlite-leaf-salvage.js` re-exports from here. Lives in
14
+ * pdh lib so the Android cc bundle (and `cc hub salvage`) can call it on-device.
15
+ *
16
+ * Output: array of {rowid, cols:[...]} — raw positional column tuples (leaf
17
+ * pages carry no column names). Map to a schema downstream (salvage-mapper.js).
18
+ *
19
+ * Authorization: only on data you are entitled to (your own device/account).
20
+ * Docs: docs/internal/pdh-db-decryption-runbook.md (Method B + reconstruction).
21
+ */
22
+ const fs = require("node:fs");
23
+
24
+ function readVarint(buf, off) {
25
+ // SQLite varint: up to 9 bytes, big-endian, high bit = continuation.
26
+ let result = 0n;
27
+ let i = 0;
28
+ for (; i < 8; i++) {
29
+ const b = buf[off + i];
30
+ if (b === undefined) return [null, off + i];
31
+ result = (result << 7n) | BigInt(b & 0x7f);
32
+ if ((b & 0x80) === 0) return [result, off + i + 1];
33
+ }
34
+ // 9th byte uses all 8 bits
35
+ const b9 = buf[off + 8];
36
+ if (b9 === undefined) return [null, off + 9];
37
+ result = (result << 8n) | BigInt(b9);
38
+ return [result, off + 9];
39
+ }
40
+
41
+ function serialTypeSize(t) {
42
+ // t is a BigInt
43
+ const n = Number(t);
44
+ if (n === 0 || n === 8 || n === 9 || n === 12 || n === 13) {
45
+ return n >= 12 ? (n % 2 === 0 ? (n - 12) / 2 : (n - 13) / 2) : 0;
46
+ }
47
+ if (n === 1) return 1;
48
+ if (n === 2) return 2;
49
+ if (n === 3) return 3;
50
+ if (n === 4) return 4;
51
+ if (n === 5) return 6;
52
+ if (n === 6) return 8;
53
+ if (n === 7) return 8;
54
+ if (n >= 12) return n % 2 === 0 ? (n - 12) / 2 : (n - 13) / 2;
55
+ return 0;
56
+ }
57
+
58
+ function readValue(buf, off, t) {
59
+ const n = Number(t);
60
+ const sz = serialTypeSize(t);
61
+ if (n === 0) return [null, off];
62
+ if (n === 8) return [0, off];
63
+ if (n === 9) return [1, off];
64
+ if (n >= 1 && n <= 6) {
65
+ let v = 0n;
66
+ for (let i = 0; i < sz; i++) v = (v << 8n) | BigInt(buf[off + i] || 0);
67
+ // sign-extend
68
+ const bits = BigInt(sz * 8);
69
+ if (v >= 1n << (bits - 1n)) v -= 1n << bits;
70
+ const num = Number(v);
71
+ return [Number.isSafeInteger(num) ? num : v.toString(), off + sz];
72
+ }
73
+ if (n === 7) return [buf.readDoubleBE(off), off + 8];
74
+ if (n >= 13 && n % 2 === 1) {
75
+ // text
76
+ return [buf.toString("utf8", off, off + sz), off + sz];
77
+ }
78
+ // blob (n>=12 even) — return length marker, not raw bytes
79
+ return [`<blob:${sz}>`, off + sz];
80
+ }
81
+
82
+ // Parse one table-leaf page at `base`. Returns array of {rowid, cols} or null.
83
+ function parseLeafPage(buf, base, pageSize, minCols) {
84
+ if (buf[base] !== 0x0d) return null; // 0x0D = table b-tree leaf
85
+ if (base + pageSize > buf.length) return null;
86
+ const numCells = (buf[base + 3] << 8) | buf[base + 4];
87
+ if (numCells <= 0 || numCells > Math.floor(pageSize / 4)) return null;
88
+ // cell-content-start (bytes 5-6; 0 means 65536) — must sit after the cell
89
+ // pointer array and within the page. This guard rejects the false positives a
90
+ // finer (unaligned) stride would otherwise hit on random 0x0D bytes.
91
+ let cellStart = (buf[base + 5] << 8) | buf[base + 6];
92
+ if (cellStart === 0) cellStart = 65536;
93
+ const hdrEndMin = 8 + numCells * 2;
94
+ if (cellStart < hdrEndMin || cellStart > pageSize) return null;
95
+ const out = [];
96
+ const ptrBase = base + 8;
97
+ for (let c = 0; c < numCells; c++) {
98
+ const ptr = (buf[ptrBase + c * 2] << 8) | buf[ptrBase + c * 2 + 1];
99
+ if (ptr < 8 || ptr >= pageSize) continue;
100
+ let off = base + ptr;
101
+ try {
102
+ const [payloadLen, o1] = readVarint(buf, off); off = o1;
103
+ if (payloadLen === null || payloadLen <= 0n || payloadLen > BigInt(pageSize)) continue;
104
+ const [rowid, o2] = readVarint(buf, off); off = o2;
105
+ // record header
106
+ const recStart = off;
107
+ const [hdrLen, o3] = readVarint(buf, off); off = o3;
108
+ if (hdrLen === null || hdrLen <= 0n) continue;
109
+ const hdrEnd = recStart + Number(hdrLen);
110
+ const serials = [];
111
+ while (off < hdrEnd) {
112
+ const [st, oN] = readVarint(buf, off); off = oN;
113
+ if (st === null) break;
114
+ serials.push(st);
115
+ }
116
+ let vOff = hdrEnd;
117
+ const cols = [];
118
+ for (const st of serials) {
119
+ const [val, vN] = readValue(buf, vOff, st);
120
+ cols.push(val); vOff = vN;
121
+ }
122
+ if (cols.length >= minCols) out.push({ rowid: rowid === null ? null : rowid.toString(), cols });
123
+ } catch (_e) { /* skip malformed cell */ }
124
+ }
125
+ return out.length ? out : null;
126
+ }
127
+
128
+ /**
129
+ * Scan an in-memory dump buffer for table-leaf pages and return all salvaged
130
+ * records (deduped). Engine-agnostic — works on any decrypted-page dump.
131
+ *
132
+ * @param {Buffer} buf
133
+ * @param {{pageSize?: number, minCols?: number, unaligned?: boolean, stride?: number}} [opts]
134
+ * - pageSize: SQLite page size (default 4096)
135
+ * - minCols: drop records with fewer columns (default 3)
136
+ * - unaligned: scan at a finer stride (512) to catch pages not 4096-aligned
137
+ * in a malloc'd page cache; ~8x slower, recovers pages the aligned scan
138
+ * misses. The strengthened header validation rejects the extra false
139
+ * positives.
140
+ * - stride: explicit scan stride (overrides the unaligned default)
141
+ * @returns {{records: Array<{rowid: string|null, cols: any[]}>, pages: number}}
142
+ */
143
+ function salvageBuffer(buf, opts = {}) {
144
+ if (!Buffer.isBuffer(buf)) {
145
+ throw new TypeError("salvageBuffer: buf must be a Buffer");
146
+ }
147
+ const pageSize = Number.isFinite(opts.pageSize) && opts.pageSize > 0 ? opts.pageSize : 4096;
148
+ const minCols = Number.isFinite(opts.minCols) && opts.minCols >= 0 ? opts.minCols : 3;
149
+ const unaligned = !!opts.unaligned;
150
+ const stride = Number.isFinite(opts.stride) && opts.stride > 0
151
+ ? opts.stride
152
+ : (unaligned ? 512 : pageSize);
153
+
154
+ let pages = 0;
155
+ const records = [];
156
+ const seen = new Set(); // dedup overlapping finds
157
+ for (let base = 0; base + 8 <= buf.length; base += stride) {
158
+ if (buf[base] !== 0x0d) continue; // cheap pre-filter before full parse
159
+ const recs = parseLeafPage(buf, base, pageSize, minCols);
160
+ if (!recs) continue;
161
+ pages++;
162
+ for (const r of recs) {
163
+ const key = r.rowid + " " + JSON.stringify(r.cols);
164
+ if (seen.has(key)) continue;
165
+ seen.add(key);
166
+ records.push(r);
167
+ }
168
+ }
169
+ return { records, pages };
170
+ }
171
+
172
+ /** Read a dump file and salvage records from it. See {@link salvageBuffer}. */
173
+ function salvageFile(filePath, opts = {}) {
174
+ const buf = fs.readFileSync(filePath);
175
+ return salvageBuffer(buf, opts);
176
+ }
177
+
178
+ module.exports = {
179
+ readVarint,
180
+ serialTypeSize,
181
+ readValue,
182
+ parseLeafPage,
183
+ salvageBuffer,
184
+ salvageFile,
185
+ };