@chainlesschain/personal-data-hub 0.3.9 → 0.4.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +45 -25
- package/__tests__/adapters/apple-health.test.js +95 -0
- package/__tests__/adapters/email-templates.test.js +123 -0
- package/__tests__/adapters/family-23-collectors-scaffold.test.js +178 -0
- package/__tests__/adapters/game-genshin-scaffold.test.js +107 -0
- package/__tests__/adapters/git-activity.test.js +7 -1
- package/__tests__/adapters/local-im-pc.test.js +149 -0
- package/__tests__/adapters/netease-music.test.js +74 -0
- package/__tests__/adapters/qq-pc-direct-read.test.js +186 -0
- package/__tests__/adapters/system-data-adapter.test.js +4 -1
- package/__tests__/adapters/wechat-pc-direct-read.test.js +207 -0
- package/__tests__/adapters/weread.test.js +123 -0
- package/__tests__/analysis.test.js +120 -15
- package/__tests__/mobile-extractor-encrypted.test.js +460 -0
- package/__tests__/prompt-builder.test.js +25 -0
- package/__tests__/registry-readiness.test.js +233 -0
- package/__tests__/social-douyin-im-direct-read.test.js +311 -0
- package/__tests__/social-douyin-snapshot.test.js +5 -2
- package/__tests__/vault.test.js +99 -0
- package/lib/adapter-guide.js +520 -0
- package/lib/adapter-readiness.js +257 -0
- package/lib/adapters/_local-im-db-reader.js +218 -0
- package/lib/adapters/_local-im-pc-adapter.js +162 -0
- package/lib/adapters/apple-health/index.js +329 -0
- package/lib/adapters/dingtalk-pc/index.js +29 -0
- package/lib/adapters/edu-huawei-learning/api-client.js +47 -0
- package/lib/adapters/edu-huawei-learning/index.js +255 -0
- package/lib/adapters/edu-zuoyebang/api-client.js +48 -0
- package/lib/adapters/edu-zuoyebang/index.js +259 -0
- package/lib/adapters/email-imap/email-adapter.js +16 -0
- package/lib/adapters/email-imap/templates/bill.js +174 -18
- package/lib/adapters/feishu-pc/index.js +29 -0
- package/lib/adapters/finance-alipay/api-client.js +48 -0
- package/lib/adapters/finance-alipay/index.js +257 -0
- package/lib/adapters/game-genshin/api-client.js +59 -0
- package/lib/adapters/game-genshin/index.js +274 -0
- package/lib/adapters/game-honor-of-kings/api-client.js +54 -0
- package/lib/adapters/game-honor-of-kings/index.js +259 -0
- package/lib/adapters/netease-music/index.js +227 -0
- package/lib/adapters/qq-pc/index.js +200 -0
- package/lib/adapters/qq-pc/nt-db-reader.js +210 -0
- package/lib/adapters/social-douyin/index.js +194 -1
- package/lib/adapters/wechat/wechat-adapter.js +7 -1
- package/lib/adapters/wechat-pc/index.js +335 -0
- package/lib/adapters/wechat-pc/pc-db-reader.js +327 -0
- package/lib/adapters/weread/api-client.js +128 -0
- package/lib/adapters/weread/index.js +337 -0
- package/lib/analysis.js +65 -0
- package/lib/index.js +39 -0
- package/lib/mobile-extractor/bplist.js +233 -0
- package/lib/mobile-extractor/ios-backup-crypto.js +315 -0
- package/lib/mobile-extractor/ios.js +131 -16
- package/lib/prompt-builder.js +11 -1
- package/lib/registry.js +170 -0
- package/lib/vault.js +105 -0
- package/package.json +1 -1
- package/scripts/run-native-tests-sandbox.sh +2 -0
- package/vitest.config.js +79 -1
|
@@ -0,0 +1,128 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
|
|
3
|
+
/**
|
|
4
|
+
* 微信读书 (WeRead) web API client — cookie-based.
|
|
5
|
+
*
|
|
6
|
+
* Reuses the AI-chat HttpClient (cookie inject + rate-limit + retry +
|
|
7
|
+
* injectable fetch test seam). WeRead's app API host `i.weread.qq.com`
|
|
8
|
+
* accepts the web login cookie (key cookies: wr_vid, wr_skey, wr_name).
|
|
9
|
+
*
|
|
10
|
+
* Endpoints (community-documented; best-effort v0.1 — WeRead occasionally
|
|
11
|
+
* rotates params / adds light signing on some routes, so each method is
|
|
12
|
+
* defensive and a failing endpoint degrades to an empty list rather than
|
|
13
|
+
* aborting the whole sync):
|
|
14
|
+
*
|
|
15
|
+
* GET /user/notebooks → 有笔记/划线的书 (notebooks)
|
|
16
|
+
* GET /book/bookmarklist?bookId= → 划线 (highlights)
|
|
17
|
+
* GET /review/list?bookId=&listType=11&mine=1&synckey=0 → 想法 (reviews)
|
|
18
|
+
* GET /readdata/summary?synckey=0 → 阅读时长汇总 (best-effort)
|
|
19
|
+
*
|
|
20
|
+
* The `wr_skey` cookie expires (~hours/days); a 401/403 surfaces as
|
|
21
|
+
* CookieExpiredError so the UI can prompt re-login.
|
|
22
|
+
*/
|
|
23
|
+
|
|
24
|
+
const { HttpClient, CookieExpiredError } = require("../ai-chat-history/http-client");
|
|
25
|
+
|
|
26
|
+
const DEFAULT_BASE = "https://i.weread.qq.com";
|
|
27
|
+
const DEFAULT_UA =
|
|
28
|
+
"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0 Safari/537.36";
|
|
29
|
+
|
|
30
|
+
class WeReadApiClient {
|
|
31
|
+
/**
|
|
32
|
+
* @param {object} opts
|
|
33
|
+
* @param {string} opts.cookie WeRead cookie header string.
|
|
34
|
+
* @param {string} [opts.baseUrl] API host. Default i.weread.qq.com.
|
|
35
|
+
* @param {function} [opts.fetch] Fetch override (test seam).
|
|
36
|
+
* @param {object} [opts.rateLimits]
|
|
37
|
+
*/
|
|
38
|
+
constructor(opts = {}) {
|
|
39
|
+
if (typeof opts.cookie !== "string" || opts.cookie.length === 0) {
|
|
40
|
+
throw new Error("WeReadApiClient: opts.cookie required");
|
|
41
|
+
}
|
|
42
|
+
this._cookie = opts.cookie;
|
|
43
|
+
this._base = (opts.baseUrl || DEFAULT_BASE).replace(/\/$/, "");
|
|
44
|
+
this._http = new HttpClient({
|
|
45
|
+
vendor: "weread",
|
|
46
|
+
fetch: opts.fetch,
|
|
47
|
+
rateLimits: opts.rateLimits || { perMinute: 30, minIntervalMs: 1200 },
|
|
48
|
+
});
|
|
49
|
+
this.lastErrorCode = null;
|
|
50
|
+
}
|
|
51
|
+
|
|
52
|
+
_headers() {
|
|
53
|
+
return { Cookie: this._cookie, "User-Agent": DEFAULT_UA, Accept: "application/json" };
|
|
54
|
+
}
|
|
55
|
+
|
|
56
|
+
async _get(path) {
|
|
57
|
+
try {
|
|
58
|
+
return await this._http.getJson(`${this._base}${path}`, { headers: this._headers() });
|
|
59
|
+
} catch (err) {
|
|
60
|
+
if (err instanceof CookieExpiredError) {
|
|
61
|
+
this.lastErrorCode = "COOKIE_EXPIRED";
|
|
62
|
+
throw err;
|
|
63
|
+
}
|
|
64
|
+
this.lastErrorCode = err && err.message ? err.message : String(err);
|
|
65
|
+
return null; // degrade — caller treats as empty
|
|
66
|
+
}
|
|
67
|
+
}
|
|
68
|
+
|
|
69
|
+
/** Books that have notes/highlights. Returns array of {bookId, title, author, cover, noteCount, reviewCount}. */
|
|
70
|
+
async getNotebooks() {
|
|
71
|
+
const data = await this._get("/user/notebooks");
|
|
72
|
+
const books = (data && Array.isArray(data.books) && data.books) || [];
|
|
73
|
+
return books.map((b) => {
|
|
74
|
+
const book = b.book || b;
|
|
75
|
+
return {
|
|
76
|
+
bookId: String(b.bookId || book.bookId || ""),
|
|
77
|
+
title: book.title || "(未知书名)",
|
|
78
|
+
author: book.author || null,
|
|
79
|
+
cover: book.cover || null,
|
|
80
|
+
category: book.category || null,
|
|
81
|
+
noteCount: b.noteCount != null ? b.noteCount : null,
|
|
82
|
+
reviewCount: b.reviewCount != null ? b.reviewCount : null,
|
|
83
|
+
bookmarkCount: b.bookmarkCount != null ? b.bookmarkCount : null,
|
|
84
|
+
sort: b.sort != null ? b.sort : null,
|
|
85
|
+
};
|
|
86
|
+
});
|
|
87
|
+
}
|
|
88
|
+
|
|
89
|
+
/** Highlights (划线) for one book. */
|
|
90
|
+
async getBookmarks(bookId) {
|
|
91
|
+
const data = await this._get(`/book/bookmarklist?bookId=${encodeURIComponent(bookId)}`);
|
|
92
|
+
const rows = (data && (data.updated || data.bookmarks)) || [];
|
|
93
|
+
return (Array.isArray(rows) ? rows : []).map((m) => ({
|
|
94
|
+
bookmarkId: String(m.bookmarkId || ""),
|
|
95
|
+
bookId: String(m.bookId || bookId),
|
|
96
|
+
chapterTitle: m.chapterTitle || m.chapterName || null,
|
|
97
|
+
chapterUid: m.chapterUid != null ? m.chapterUid : null,
|
|
98
|
+
markText: m.markText || "",
|
|
99
|
+
createTime: m.createTime != null ? m.createTime : null,
|
|
100
|
+
}));
|
|
101
|
+
}
|
|
102
|
+
|
|
103
|
+
/** Reviews / thoughts (想法) for one book. */
|
|
104
|
+
async getReviews(bookId) {
|
|
105
|
+
const data = await this._get(
|
|
106
|
+
`/review/list?bookId=${encodeURIComponent(bookId)}&listType=11&mine=1&synckey=0`,
|
|
107
|
+
);
|
|
108
|
+
const rows = (data && data.reviews) || [];
|
|
109
|
+
return (Array.isArray(rows) ? rows : []).map((r) => {
|
|
110
|
+
const rev = r.review || r;
|
|
111
|
+
return {
|
|
112
|
+
reviewId: String(rev.reviewId || ""),
|
|
113
|
+
bookId: String(rev.bookId || bookId),
|
|
114
|
+
content: rev.content || "",
|
|
115
|
+
chapterTitle: rev.chapterTitle || null,
|
|
116
|
+
createTime: rev.createTime != null ? rev.createTime : null,
|
|
117
|
+
abstract: rev.abstract || null,
|
|
118
|
+
};
|
|
119
|
+
});
|
|
120
|
+
}
|
|
121
|
+
|
|
122
|
+
/** Reading-time summary (best-effort; shape varies). Returns raw object or null. */
|
|
123
|
+
async getReadSummary() {
|
|
124
|
+
return await this._get("/readdata/summary?synckey=0");
|
|
125
|
+
}
|
|
126
|
+
}
|
|
127
|
+
|
|
128
|
+
module.exports = { WeReadApiClient, DEFAULT_BASE };
|
|
@@ -0,0 +1,337 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
|
|
3
|
+
/**
|
|
4
|
+
* 微信读书 (WeRead) adapter — cookie 模式 + snapshot 模式.
|
|
5
|
+
*
|
|
6
|
+
* 知识阅读类数据源。WeRead 没有像微信/QQ 那样的本地加密大库;最可靠的个人
|
|
7
|
+
* 路径是 **cookie web API**(登录 weread.qq.com 后用 cookie 拉自己的笔记本/
|
|
8
|
+
* 划线/想法/阅读时长)。
|
|
9
|
+
*
|
|
10
|
+
* 1. cookie 模式 (opts.cookie): 用 WeReadApiClient 直接拉 → 产出
|
|
11
|
+
* book / highlight / review 事件。一键友好(UI 抓 cookie 后传入)。
|
|
12
|
+
* 2. snapshot 模式 (opts.inputPath): 消费预先抓好的 JSON(Android 采集器 /
|
|
13
|
+
* 可测)。schema 与 cookie 模式产出的 kinds 对齐。
|
|
14
|
+
*
|
|
15
|
+
* 实体映射:
|
|
16
|
+
* book → ITEM(document《书名》) + EVENT(browse 读了《书名》)
|
|
17
|
+
* highlight → EVENT(other, 划线文本) extra.kind=highlight
|
|
18
|
+
* review → EVENT(post, 想法文本) extra.kind=review
|
|
19
|
+
*
|
|
20
|
+
* snapshot schema (schemaVersion 1):
|
|
21
|
+
* { schemaVersion:1, snapshottedAt, account:{vid,name},
|
|
22
|
+
* events:[ {kind:"book",id,bookId,title,author,noteCount},
|
|
23
|
+
* {kind:"highlight",id,bookId,bookTitle,markText,chapterTitle,createTime},
|
|
24
|
+
* {kind:"review",id,bookId,bookTitle,content,chapterTitle,createTime} ] }
|
|
25
|
+
*/
|
|
26
|
+
|
|
27
|
+
const fs = require("node:fs");
|
|
28
|
+
const { newId } = require("../../ids");
|
|
29
|
+
const {
|
|
30
|
+
ENTITY_TYPES,
|
|
31
|
+
EVENT_SUBTYPES,
|
|
32
|
+
ITEM_SUBTYPES,
|
|
33
|
+
CAPTURED_BY,
|
|
34
|
+
} = require("../../constants");
|
|
35
|
+
|
|
36
|
+
const NAME = "weread";
|
|
37
|
+
const VERSION = "0.1.0";
|
|
38
|
+
const SNAPSHOT_SCHEMA_VERSION = 1;
|
|
39
|
+
|
|
40
|
+
const KIND_BOOK = "book";
|
|
41
|
+
const KIND_HIGHLIGHT = "highlight";
|
|
42
|
+
const KIND_REVIEW = "review";
|
|
43
|
+
const VALID_KINDS = Object.freeze([KIND_BOOK, KIND_HIGHLIGHT, KIND_REVIEW]);
|
|
44
|
+
|
|
45
|
+
function parseTime(v) {
|
|
46
|
+
if (Number.isFinite(v)) return v > 1e12 ? v : v * 1000;
|
|
47
|
+
if (typeof v === "string" && /^\d+$/.test(v)) {
|
|
48
|
+
const n = parseInt(v, 10);
|
|
49
|
+
return n > 1e12 ? n : n * 1000;
|
|
50
|
+
}
|
|
51
|
+
return null;
|
|
52
|
+
}
|
|
53
|
+
|
|
54
|
+
function stableOriginalId(kind, id) {
|
|
55
|
+
const safe =
|
|
56
|
+
(typeof id === "string" && id.length > 0 && id) ||
|
|
57
|
+
(typeof id === "number" && Number.isFinite(id) && String(id)) ||
|
|
58
|
+
`unknown-${Date.now()}-${Math.random().toString(36).slice(2, 8)}`;
|
|
59
|
+
return `weread:${kind}:${safe}`;
|
|
60
|
+
}
|
|
61
|
+
|
|
62
|
+
class WeReadAdapter {
|
|
63
|
+
constructor(opts = {}) {
|
|
64
|
+
this._cookie = opts.cookie || null;
|
|
65
|
+
this._dataPath = opts.inputPath || null;
|
|
66
|
+
this._apiClientFactory = opts.apiClientFactory || null; // test seam
|
|
67
|
+
|
|
68
|
+
this.name = NAME;
|
|
69
|
+
this.version = VERSION;
|
|
70
|
+
this.capabilities = [
|
|
71
|
+
"sync:cookie",
|
|
72
|
+
"sync:snapshot",
|
|
73
|
+
"parse:weread-book",
|
|
74
|
+
"parse:weread-highlight",
|
|
75
|
+
"parse:weread-review",
|
|
76
|
+
];
|
|
77
|
+
this.extractMode = "web-api";
|
|
78
|
+
this.rateLimits = { perMinute: 30 };
|
|
79
|
+
this.dataDisclosure = {
|
|
80
|
+
fields: [
|
|
81
|
+
"weread:book (书名 / 作者 / 笔记数)",
|
|
82
|
+
"weread:highlight (划线文本 / 章节)",
|
|
83
|
+
"weread:review (想法文本 / 章节)",
|
|
84
|
+
],
|
|
85
|
+
sensitivity: "medium",
|
|
86
|
+
legalGate: false,
|
|
87
|
+
};
|
|
88
|
+
|
|
89
|
+
this._deps = { fs };
|
|
90
|
+
}
|
|
91
|
+
|
|
92
|
+
async authenticate(ctx = {}) {
|
|
93
|
+
if (ctx && ctx.readinessOnly) {
|
|
94
|
+
if (this._cookie) return { ok: true, mode: "configured" };
|
|
95
|
+
return {
|
|
96
|
+
ok: false,
|
|
97
|
+
reason: "INVALID_COOKIE",
|
|
98
|
+
message: "weread: 需登录微信读书网页版抓取 cookie(或选择已采集的快照文件)",
|
|
99
|
+
};
|
|
100
|
+
}
|
|
101
|
+
if (ctx && typeof ctx.inputPath === "string" && ctx.inputPath.length > 0) {
|
|
102
|
+
try {
|
|
103
|
+
this._deps.fs.accessSync(ctx.inputPath, this._deps.fs.constants.R_OK);
|
|
104
|
+
} catch (err) {
|
|
105
|
+
return { ok: false, reason: "INPUT_PATH_UNREADABLE", message: `snapshot not readable: ${err.message}` };
|
|
106
|
+
}
|
|
107
|
+
return { ok: true, mode: "snapshot-file" };
|
|
108
|
+
}
|
|
109
|
+
if (ctx.cookie || this._cookie) return { ok: true, mode: "cookie" };
|
|
110
|
+
return {
|
|
111
|
+
ok: false,
|
|
112
|
+
reason: "INVALID_COOKIE",
|
|
113
|
+
message: "weread.authenticate: needs opts.cookie (cookie mode) OR opts.inputPath (snapshot)",
|
|
114
|
+
};
|
|
115
|
+
}
|
|
116
|
+
|
|
117
|
+
async healthCheck() {
|
|
118
|
+
return { ok: true, lastChecked: Date.now() };
|
|
119
|
+
}
|
|
120
|
+
|
|
121
|
+
async *sync(opts = {}) {
|
|
122
|
+
const inputPath = opts.inputPath || this._dataPath;
|
|
123
|
+
if (inputPath && this._deps.fs.existsSync(inputPath)) {
|
|
124
|
+
yield* this._syncViaSnapshot({ ...opts, inputPath });
|
|
125
|
+
return;
|
|
126
|
+
}
|
|
127
|
+
const cookie = opts.cookie || this._cookie;
|
|
128
|
+
if (cookie) {
|
|
129
|
+
yield* this._syncViaCookie({ ...opts, cookie });
|
|
130
|
+
return;
|
|
131
|
+
}
|
|
132
|
+
throw new Error("weread.sync: needs opts.cookie (cookie mode) OR opts.inputPath (snapshot)");
|
|
133
|
+
}
|
|
134
|
+
|
|
135
|
+
async *_syncViaCookie(opts) {
|
|
136
|
+
const client = this._apiClientFactory
|
|
137
|
+
? this._apiClientFactory(opts)
|
|
138
|
+
: new (require("./api-client").WeReadApiClient)({
|
|
139
|
+
cookie: opts.cookie,
|
|
140
|
+
fetch: opts.fetch,
|
|
141
|
+
baseUrl: opts.baseUrl,
|
|
142
|
+
});
|
|
143
|
+
const emit = (phase, extra) => {
|
|
144
|
+
if (typeof opts.onProgress === "function") {
|
|
145
|
+
try { opts.onProgress({ phase, adapter: NAME, ...extra }); } catch (_e) { /* best-effort */ }
|
|
146
|
+
}
|
|
147
|
+
};
|
|
148
|
+
|
|
149
|
+
const maxBooks = Number.isInteger(opts.maxBooks) && opts.maxBooks > 0 ? opts.maxBooks : 500;
|
|
150
|
+
const includeNotes = opts.includeNotes !== false; // pull highlights/reviews per book
|
|
151
|
+
const books = await client.getNotebooks();
|
|
152
|
+
emit("notebooks", { count: books.length });
|
|
153
|
+
|
|
154
|
+
let bookN = 0;
|
|
155
|
+
for (const b of books) {
|
|
156
|
+
if (bookN >= maxBooks) break;
|
|
157
|
+
bookN += 1;
|
|
158
|
+
yield {
|
|
159
|
+
adapter: NAME,
|
|
160
|
+
kind: KIND_BOOK,
|
|
161
|
+
originalId: stableOriginalId(KIND_BOOK, b.bookId),
|
|
162
|
+
capturedAt: Date.now(),
|
|
163
|
+
payload: { kind: KIND_BOOK, ...b },
|
|
164
|
+
};
|
|
165
|
+
|
|
166
|
+
if (!includeNotes || !b.bookId) continue;
|
|
167
|
+
// Highlights
|
|
168
|
+
const marks = await client.getBookmarks(b.bookId);
|
|
169
|
+
for (const m of marks) {
|
|
170
|
+
yield {
|
|
171
|
+
adapter: NAME,
|
|
172
|
+
kind: KIND_HIGHLIGHT,
|
|
173
|
+
originalId: stableOriginalId(KIND_HIGHLIGHT, m.bookmarkId || `${b.bookId}-${m.createTime}`),
|
|
174
|
+
capturedAt: parseTime(m.createTime) || Date.now(),
|
|
175
|
+
payload: { kind: KIND_HIGHLIGHT, ...m, bookTitle: b.title },
|
|
176
|
+
};
|
|
177
|
+
}
|
|
178
|
+
// Reviews / thoughts
|
|
179
|
+
const reviews = await client.getReviews(b.bookId);
|
|
180
|
+
for (const r of reviews) {
|
|
181
|
+
yield {
|
|
182
|
+
adapter: NAME,
|
|
183
|
+
kind: KIND_REVIEW,
|
|
184
|
+
originalId: stableOriginalId(KIND_REVIEW, r.reviewId || `${b.bookId}-${r.createTime}`),
|
|
185
|
+
capturedAt: parseTime(r.createTime) || Date.now(),
|
|
186
|
+
payload: { kind: KIND_REVIEW, ...r, bookTitle: b.title },
|
|
187
|
+
};
|
|
188
|
+
}
|
|
189
|
+
emit("book-done", { bookId: b.bookId, marks: marks.length, reviews: reviews.length });
|
|
190
|
+
}
|
|
191
|
+
}
|
|
192
|
+
|
|
193
|
+
async *_syncViaSnapshot(opts) {
|
|
194
|
+
const snapshot = JSON.parse(this._deps.fs.readFileSync(opts.inputPath, "utf-8"));
|
|
195
|
+
if (!snapshot || snapshot.schemaVersion !== SNAPSHOT_SCHEMA_VERSION) {
|
|
196
|
+
throw new Error(
|
|
197
|
+
`weread.sync: snapshot schemaVersion mismatch (got ${snapshot && snapshot.schemaVersion}, expected ${SNAPSHOT_SCHEMA_VERSION})`,
|
|
198
|
+
);
|
|
199
|
+
}
|
|
200
|
+
const fallback =
|
|
201
|
+
Number.isFinite(snapshot.snapshottedAt) && snapshot.snapshottedAt > 0
|
|
202
|
+
? Math.floor(snapshot.snapshottedAt)
|
|
203
|
+
: Date.now();
|
|
204
|
+
const include = opts.include || {};
|
|
205
|
+
const limit = Number.isInteger(opts.limit) && opts.limit > 0 ? opts.limit : Infinity;
|
|
206
|
+
const events = Array.isArray(snapshot.events) ? snapshot.events : [];
|
|
207
|
+
let emitted = 0;
|
|
208
|
+
for (const ev of events) {
|
|
209
|
+
if (emitted >= limit) return;
|
|
210
|
+
if (!ev || typeof ev !== "object" || !VALID_KINDS.includes(ev.kind)) continue;
|
|
211
|
+
if (include[ev.kind] === false) continue;
|
|
212
|
+
const id = (typeof ev.id === "string" && ev.id) || ev.bookId || ev.bookmarkId || ev.reviewId || null;
|
|
213
|
+
yield {
|
|
214
|
+
adapter: NAME,
|
|
215
|
+
kind: ev.kind,
|
|
216
|
+
originalId: stableOriginalId(ev.kind, id),
|
|
217
|
+
capturedAt: parseTime(ev.capturedAt) || parseTime(ev.createTime) || fallback,
|
|
218
|
+
payload: { ...ev },
|
|
219
|
+
};
|
|
220
|
+
emitted += 1;
|
|
221
|
+
}
|
|
222
|
+
}
|
|
223
|
+
|
|
224
|
+
normalize(raw) {
|
|
225
|
+
if (!raw || !raw.payload) throw new Error("WeReadAdapter.normalize: payload missing");
|
|
226
|
+
const kind = raw.kind || raw.payload.kind;
|
|
227
|
+
const ingestedAt = Date.now();
|
|
228
|
+
if (kind === KIND_BOOK) return normalizeBook(raw.payload, raw, ingestedAt);
|
|
229
|
+
if (kind === KIND_HIGHLIGHT) return normalizeHighlight(raw.payload, raw, ingestedAt);
|
|
230
|
+
if (kind === KIND_REVIEW) return normalizeReview(raw.payload, raw, ingestedAt);
|
|
231
|
+
throw new Error(`WeReadAdapter.normalize: unknown kind ${kind}`);
|
|
232
|
+
}
|
|
233
|
+
}
|
|
234
|
+
|
|
235
|
+
function buildSource(raw, occurredAt) {
|
|
236
|
+
return {
|
|
237
|
+
adapter: NAME,
|
|
238
|
+
adapterVersion: VERSION,
|
|
239
|
+
originalId: raw.originalId,
|
|
240
|
+
capturedAt: raw.capturedAt || occurredAt,
|
|
241
|
+
capturedBy: CAPTURED_BY.API,
|
|
242
|
+
};
|
|
243
|
+
}
|
|
244
|
+
|
|
245
|
+
function bookItemId(bookId) {
|
|
246
|
+
return bookId ? `item-weread-book-${bookId}` : `item-weread-book-${newId()}`;
|
|
247
|
+
}
|
|
248
|
+
|
|
249
|
+
function normalizeBook(p, raw, ingestedAt) {
|
|
250
|
+
const occurredAt = raw.capturedAt || ingestedAt;
|
|
251
|
+
const source = buildSource(raw, occurredAt);
|
|
252
|
+
const title = p.title || "(未知书名)";
|
|
253
|
+
const itemId = bookItemId(p.bookId);
|
|
254
|
+
return {
|
|
255
|
+
events: [{
|
|
256
|
+
id: newId(),
|
|
257
|
+
type: ENTITY_TYPES.EVENT,
|
|
258
|
+
subtype: EVENT_SUBTYPES.BROWSE,
|
|
259
|
+
occurredAt,
|
|
260
|
+
actor: "person-self",
|
|
261
|
+
content: { title: `读《${title}》`, text: title },
|
|
262
|
+
ingestedAt,
|
|
263
|
+
source,
|
|
264
|
+
extra: {
|
|
265
|
+
platform: "weread", kind: "book",
|
|
266
|
+
bookId: p.bookId || null, author: p.author || null,
|
|
267
|
+
noteCount: p.noteCount != null ? p.noteCount : null,
|
|
268
|
+
reviewCount: p.reviewCount != null ? p.reviewCount : null,
|
|
269
|
+
itemRef: itemId,
|
|
270
|
+
},
|
|
271
|
+
}],
|
|
272
|
+
items: [{
|
|
273
|
+
id: itemId,
|
|
274
|
+
type: ENTITY_TYPES.ITEM,
|
|
275
|
+
subtype: ITEM_SUBTYPES.DOCUMENT,
|
|
276
|
+
name: p.author ? `${title} - ${p.author}` : title,
|
|
277
|
+
ingestedAt,
|
|
278
|
+
source,
|
|
279
|
+
extra: { platform: "weread", kind: "book", bookId: p.bookId || null, author: p.author || null, cover: p.cover || null, category: p.category || null },
|
|
280
|
+
}],
|
|
281
|
+
persons: [], places: [], topics: [],
|
|
282
|
+
};
|
|
283
|
+
}
|
|
284
|
+
|
|
285
|
+
function normalizeHighlight(p, raw, ingestedAt) {
|
|
286
|
+
const occurredAt = parseTime(p.createTime) || raw.capturedAt || ingestedAt;
|
|
287
|
+
const source = buildSource(raw, occurredAt);
|
|
288
|
+
const text = p.markText || "";
|
|
289
|
+
const book = p.bookTitle || "";
|
|
290
|
+
return {
|
|
291
|
+
events: [{
|
|
292
|
+
id: newId(),
|
|
293
|
+
type: ENTITY_TYPES.EVENT,
|
|
294
|
+
subtype: EVENT_SUBTYPES.OTHER,
|
|
295
|
+
occurredAt,
|
|
296
|
+
actor: "person-self",
|
|
297
|
+
content: { title: `划线${book ? "《" + book + "》" : ""}: ${text.slice(0, 60)}`, text },
|
|
298
|
+
ingestedAt,
|
|
299
|
+
source,
|
|
300
|
+
extra: {
|
|
301
|
+
platform: "weread", kind: "highlight",
|
|
302
|
+
bookId: p.bookId || null, bookTitle: book || null,
|
|
303
|
+
chapterTitle: p.chapterTitle || null,
|
|
304
|
+
itemRef: p.bookId ? bookItemId(p.bookId) : null,
|
|
305
|
+
},
|
|
306
|
+
}],
|
|
307
|
+
persons: [], places: [], items: [], topics: [],
|
|
308
|
+
};
|
|
309
|
+
}
|
|
310
|
+
|
|
311
|
+
function normalizeReview(p, raw, ingestedAt) {
|
|
312
|
+
const occurredAt = parseTime(p.createTime) || raw.capturedAt || ingestedAt;
|
|
313
|
+
const source = buildSource(raw, occurredAt);
|
|
314
|
+
const text = p.content || "";
|
|
315
|
+
const book = p.bookTitle || "";
|
|
316
|
+
return {
|
|
317
|
+
events: [{
|
|
318
|
+
id: newId(),
|
|
319
|
+
type: ENTITY_TYPES.EVENT,
|
|
320
|
+
subtype: EVENT_SUBTYPES.POST,
|
|
321
|
+
occurredAt,
|
|
322
|
+
actor: "person-self",
|
|
323
|
+
content: { title: `想法${book ? "《" + book + "》" : ""}: ${text.slice(0, 60)}`, text },
|
|
324
|
+
ingestedAt,
|
|
325
|
+
source,
|
|
326
|
+
extra: {
|
|
327
|
+
platform: "weread", kind: "review",
|
|
328
|
+
bookId: p.bookId || null, bookTitle: book || null,
|
|
329
|
+
chapterTitle: p.chapterTitle || null,
|
|
330
|
+
itemRef: p.bookId ? bookItemId(p.bookId) : null,
|
|
331
|
+
},
|
|
332
|
+
}],
|
|
333
|
+
persons: [], places: [], items: [], topics: [],
|
|
334
|
+
};
|
|
335
|
+
}
|
|
336
|
+
|
|
337
|
+
module.exports = { WeReadAdapter, NAME, VERSION, SNAPSHOT_SCHEMA_VERSION, VALID_KINDS };
|
package/lib/analysis.js
CHANGED
|
@@ -41,6 +41,13 @@ const DEFAULT_MAX_QUERY_LIMIT = 200;
|
|
|
41
41
|
// through to the default broader path — see _gatherFacts.
|
|
42
42
|
const LATEST_INTENT_FACT_LIMIT = 3;
|
|
43
43
|
|
|
44
|
+
// intent=count illustrative-sample cap. The TOTALS block (vault.stats per-table
|
|
45
|
+
// counts) is the authoritative count and Rule 6 tells the LLM to quote it, NOT
|
|
46
|
+
// count FACTS — so a count question only needs a few example rows, not the full
|
|
47
|
+
// ≤80 sample. Hard-cap to keep prompt budget free on local small models (2-4K
|
|
48
|
+
// token window). See _gatherFacts intent=count branch.
|
|
49
|
+
const COUNT_INTENT_FACT_LIMIT = 5;
|
|
50
|
+
|
|
44
51
|
// intent=list FTS5 augmentation cap. When the question carries a probable
|
|
45
52
|
// entity-name ("提到王老板的消息", "苹果的订单") we run an extra
|
|
46
53
|
// vault.searchEvents(q=term) and append non-duplicate hits to FACTS. Cap
|
|
@@ -214,6 +221,8 @@ class AnalysisEngine {
|
|
|
214
221
|
timeWindow: parsed.timeWindow,
|
|
215
222
|
maxFacts: effMaxFacts,
|
|
216
223
|
vaultTotals: this._gatherVaultTotals(),
|
|
224
|
+
amountSummary:
|
|
225
|
+
parsed.intent === "sum-amount" ? this._gatherAmountSummary(parsed) : undefined,
|
|
217
226
|
});
|
|
218
227
|
|
|
219
228
|
// Telemetry: post-cap prompt size + truncation count. If `truncated` > 0
|
|
@@ -377,6 +386,8 @@ class AnalysisEngine {
|
|
|
377
386
|
timeWindow: parsed.timeWindow,
|
|
378
387
|
maxFacts: effMaxFacts,
|
|
379
388
|
vaultTotals: this._gatherVaultTotals(),
|
|
389
|
+
amountSummary:
|
|
390
|
+
parsed.intent === "sum-amount" ? this._gatherAmountSummary(parsed) : undefined,
|
|
380
391
|
});
|
|
381
392
|
|
|
382
393
|
const durationMs = Date.now() - startedAt;
|
|
@@ -529,6 +540,33 @@ class AnalysisEngine {
|
|
|
529
540
|
}
|
|
530
541
|
}
|
|
531
542
|
|
|
543
|
+
// intent=count routing — "我有多少订单 / 多少条记录". The TOTALS block
|
|
544
|
+
// (vault.stats per-table counts) already carries the authoritative number
|
|
545
|
+
// and Rule 6 tells the LLM to quote it, NOT count FACTS. Pulling the full
|
|
546
|
+
// effMaxFacts (≤80) event sample is wasted prompt budget — expensive on
|
|
547
|
+
// local small models (2-4K token window). Hard-cap to COUNT_INTENT_FACT_LIMIT
|
|
548
|
+
// illustrative rows so the model can cite a couple examples without burning
|
|
549
|
+
// the budget.
|
|
550
|
+
//
|
|
551
|
+
// Adapter + time window pass through (reliable filters); subtype does NOT
|
|
552
|
+
// (keyword classifier too crude — same rationale as sum-amount). Skip
|
|
553
|
+
// persons/items: count of contacts / apps routes via entityFocus above.
|
|
554
|
+
//
|
|
555
|
+
// 0 hits → fall through to the default broader path (safety net for a
|
|
556
|
+
// low-confidence count misclassification of a list question), same as the
|
|
557
|
+
// latest branch.
|
|
558
|
+
if (parsed.intent === "count") {
|
|
559
|
+
const countQ = { limit: Math.min(COUNT_INTENT_FACT_LIMIT, effMaxFacts) };
|
|
560
|
+
if (parsed.filters && parsed.filters.adapter) countQ.adapter = parsed.filters.adapter;
|
|
561
|
+
if (parsed.timeWindow) {
|
|
562
|
+
if (Number.isFinite(parsed.timeWindow.since)) countQ.since = parsed.timeWindow.since;
|
|
563
|
+
if (Number.isFinite(parsed.timeWindow.until)) countQ.until = parsed.timeWindow.until;
|
|
564
|
+
}
|
|
565
|
+
const countEvents = this.vault.queryEvents(countQ);
|
|
566
|
+
if (countEvents.length > 0) return countEvents;
|
|
567
|
+
// 0 results → fall through to default broader path below.
|
|
568
|
+
}
|
|
569
|
+
|
|
532
570
|
// intent=sum-amount routing — "总共花了多少" / "在淘宝花了多少钱"
|
|
533
571
|
// only needs events from amount-bearing subtypes (order/payment/
|
|
534
572
|
// transfer/income). Pulling messages / visits / browses wastes
|
|
@@ -737,6 +775,32 @@ class AnalysisEngine {
|
|
|
737
775
|
* not expose `stats()`; falling back to undefined makes prompt-builder
|
|
738
776
|
* skip the block entirely.
|
|
739
777
|
*/
|
|
778
|
+
/**
|
|
779
|
+
* intent=sum-amount Phase 2: pull the authoritative amount total from the
|
|
780
|
+
* vault (SQL SUM, not a FACTS sample) so prompt-builder can emit an
|
|
781
|
+
* AMOUNT_SUM block. Scoped by adapter + time window (same reliable filters
|
|
782
|
+
* _gatherFacts uses; subtype is intentionally NOT passed — the keyword
|
|
783
|
+
* classifier is too crude, and non-amount events are excluded by the SUM
|
|
784
|
+
* anyway). Returns undefined when there's nothing to sum (count===0) so the
|
|
785
|
+
* block is omitted rather than showing a misleading ¥0.
|
|
786
|
+
*/
|
|
787
|
+
_gatherAmountSummary(parsed) {
|
|
788
|
+
if (typeof this.vault.sumEventAmount !== "function") return undefined;
|
|
789
|
+
try {
|
|
790
|
+
const f = {};
|
|
791
|
+
if (parsed.filters && parsed.filters.adapter) f.adapter = parsed.filters.adapter;
|
|
792
|
+
if (parsed.timeWindow) {
|
|
793
|
+
if (Number.isFinite(parsed.timeWindow.since)) f.since = parsed.timeWindow.since;
|
|
794
|
+
if (Number.isFinite(parsed.timeWindow.until)) f.until = parsed.timeWindow.until;
|
|
795
|
+
}
|
|
796
|
+
const r = this.vault.sumEventAmount(f);
|
|
797
|
+
if (!r || !r.count) return undefined;
|
|
798
|
+
return r;
|
|
799
|
+
} catch (_e) {
|
|
800
|
+
return undefined;
|
|
801
|
+
}
|
|
802
|
+
}
|
|
803
|
+
|
|
740
804
|
_gatherVaultTotals() {
|
|
741
805
|
if (typeof this.vault.stats !== "function") return undefined;
|
|
742
806
|
try {
|
|
@@ -761,6 +825,7 @@ module.exports = {
|
|
|
761
825
|
DEFAULT_MAX_FACTS,
|
|
762
826
|
DEFAULT_MAX_QUERY_LIMIT,
|
|
763
827
|
LATEST_INTENT_FACT_LIMIT,
|
|
828
|
+
COUNT_INTENT_FACT_LIMIT,
|
|
764
829
|
LIST_INTENT_FTS_LIMIT,
|
|
765
830
|
SUM_AMOUNT_SUBTYPES,
|
|
766
831
|
SUM_AMOUNT_MIN_PER_SUBTYPE,
|
package/lib/index.js
CHANGED
|
@@ -22,6 +22,8 @@ const migrations = require("./migrations");
|
|
|
22
22
|
const keyProviders = require("./key-providers");
|
|
23
23
|
const { LocalVault } = require("./vault");
|
|
24
24
|
const adapterSpec = require("./adapter-spec");
|
|
25
|
+
const adapterReadiness = require("./adapter-readiness");
|
|
26
|
+
const adapterGuide = require("./adapter-guide");
|
|
25
27
|
const kgDerive = require("./kg-derive");
|
|
26
28
|
const ragDerive = require("./rag-derive");
|
|
27
29
|
const { AdapterRegistry, DEFAULT_BATCH_SIZE } = require("./registry");
|
|
@@ -51,7 +53,20 @@ const { DouyinAdapter } = require("./adapters/social-douyin");
|
|
|
51
53
|
const { XiaohongshuAdapter } = require("./adapters/social-xiaohongshu");
|
|
52
54
|
const { ToutiaoAdapter } = require("./adapters/social-toutiao");
|
|
53
55
|
const { KuaishouAdapter } = require("./adapters/social-kuaishou");
|
|
56
|
+
// FAMILY-23 v0.1 — 家庭守护 telemetry collectors (cookie-scrape placeholder).
|
|
57
|
+
const { GenshinAdapter } = require("./adapters/game-genshin");
|
|
58
|
+
const { HonorOfKingsAdapter } = require("./adapters/game-honor-of-kings");
|
|
59
|
+
const { ZuoyebangAdapter } = require("./adapters/edu-zuoyebang");
|
|
60
|
+
const { AlipayAdapter } = require("./adapters/finance-alipay");
|
|
61
|
+
const { HuaweiLearningAdapter } = require("./adapters/edu-huawei-learning");
|
|
54
62
|
const { QQAdapter } = require("./adapters/messaging-qq");
|
|
63
|
+
const { WeChatPcAdapter } = require("./adapters/wechat-pc");
|
|
64
|
+
const { QQPcAdapter } = require("./adapters/qq-pc");
|
|
65
|
+
const { AppleHealthAdapter } = require("./adapters/apple-health");
|
|
66
|
+
const { NeteaseMusicAdapter } = require("./adapters/netease-music");
|
|
67
|
+
const { WeReadAdapter } = require("./adapters/weread");
|
|
68
|
+
const { DingTalkPcAdapter } = require("./adapters/dingtalk-pc");
|
|
69
|
+
const { FeishuPcAdapter } = require("./adapters/feishu-pc");
|
|
55
70
|
const { TelegramAdapter } = require("./adapters/messaging-telegram");
|
|
56
71
|
const { WhatsAppAdapter } = require("./adapters/messaging-whatsapp");
|
|
57
72
|
const entityResolver = require("./entity-resolver");
|
|
@@ -111,6 +126,18 @@ module.exports = {
|
|
|
111
126
|
SENSITIVITY_LEVELS: adapterSpec.SENSITIVITY_LEVELS,
|
|
112
127
|
assertAdapter: adapterSpec.assertAdapter,
|
|
113
128
|
|
|
129
|
+
// Adapter readiness (why-can't-I-collect descriptors)
|
|
130
|
+
describeReadiness: adapterReadiness.describeReadiness,
|
|
131
|
+
categoryForMode: adapterReadiness.categoryForMode,
|
|
132
|
+
READINESS_CATEGORY: adapterReadiness.READINESS_CATEGORY,
|
|
133
|
+
READINESS_STATUS: adapterReadiness.READINESS_STATUS,
|
|
134
|
+
READINESS_REASONS: adapterReadiness.READINESS_REASONS,
|
|
135
|
+
|
|
136
|
+
// Adapter import guides (step-by-step "how to import this source")
|
|
137
|
+
getAdapterGuide: adapterGuide.getAdapterGuide,
|
|
138
|
+
ADAPTER_DISPLAY_NAMES: adapterGuide.DISPLAY_NAMES,
|
|
139
|
+
ADAPTER_CATEGORY_GUIDES: adapterGuide.CATEGORY_GUIDES,
|
|
140
|
+
|
|
114
141
|
// KG + RAG derivation
|
|
115
142
|
triple: kgDerive.triple,
|
|
116
143
|
deriveEventTriples: kgDerive.deriveEventTriples,
|
|
@@ -256,7 +283,19 @@ module.exports = {
|
|
|
256
283
|
XiaohongshuAdapter,
|
|
257
284
|
ToutiaoAdapter,
|
|
258
285
|
KuaishouAdapter,
|
|
286
|
+
GenshinAdapter,
|
|
287
|
+
HonorOfKingsAdapter,
|
|
288
|
+
ZuoyebangAdapter,
|
|
289
|
+
AlipayAdapter,
|
|
290
|
+
HuaweiLearningAdapter,
|
|
259
291
|
QQAdapter,
|
|
292
|
+
WeChatPcAdapter,
|
|
293
|
+
QQPcAdapter,
|
|
294
|
+
AppleHealthAdapter,
|
|
295
|
+
NeteaseMusicAdapter,
|
|
296
|
+
WeReadAdapter,
|
|
297
|
+
DingTalkPcAdapter,
|
|
298
|
+
FeishuPcAdapter,
|
|
260
299
|
TelegramAdapter,
|
|
261
300
|
WhatsAppAdapter,
|
|
262
301
|
|