@chainlesschain/personal-data-hub 0.4.28 → 0.4.29
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +13 -5
- package/__tests__/adapters/social-douyin-adb-usage-profile.test.js +229 -0
- package/__tests__/adapters/social-douyin-adb-watch-history.test.js +88 -11
- package/__tests__/adapters/social-toutiao-adb-article.test.js +155 -0
- package/__tests__/analysis-skills.test.js +75 -0
- package/__tests__/query-parser.test.js +63 -0
- package/lib/adapters/social-douyin-adb/usage-profile-reader.js +253 -0
- package/lib/adapters/social-douyin-adb/watch-history-reader.js +104 -31
- package/lib/adapters/social-toutiao-adb/article-reader.js +202 -0
- package/lib/analysis-skills/overview.js +24 -4
- package/lib/analysis-skills/spending.js +63 -2
- package/lib/analysis-skills/timeline.js +11 -6
- package/lib/query-parser.js +38 -8
- package/package.json +1 -1
|
@@ -0,0 +1,202 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Toutiao on-device article reader — recovers the user's feed/read articles
|
|
3
|
+
* from the app's local `news_article.db` (table `article`), a plaintext SQLite
|
|
4
|
+
* DB. No signing/encryption needed.
|
|
5
|
+
*
|
|
6
|
+
* Why this exists (real-device 2026-06-18, user's exported plaintext DB):
|
|
7
|
+
* - `article` rows are the local feed cache (48 rows on the test export). The
|
|
8
|
+
* title is NOT a column — it lives in the `share_info` JSON blob
|
|
9
|
+
* ({title, share_url, ...}); `ext_json` is a heavier fallback. `behot_time`
|
|
10
|
+
* is when the item surfaced; `read_timestamp>0` ⇒ actually opened;
|
|
11
|
+
* `is_user_digg`/`is_user_repin` ⇒ engagement.
|
|
12
|
+
* - Modest signal (feed-shown ≈ weak interest; digg/read ≈ strong), but
|
|
13
|
+
* titled + plaintext, so it's a usable "articles I browsed" stream.
|
|
14
|
+
*
|
|
15
|
+
* Emits BROWSE events under source.adapter `social-toutiao` (the canonical
|
|
16
|
+
* adapter name, so byApp aggregation attributes correctly). Stable originalId
|
|
17
|
+
* (`social-toutiao:article:<group_id>`) → re-ingest UPDATES, not duplicates.
|
|
18
|
+
*
|
|
19
|
+
* Authorization: only on your own device/account.
|
|
20
|
+
*/
|
|
21
|
+
"use strict";
|
|
22
|
+
|
|
23
|
+
const { newId } = require("../../ids");
|
|
24
|
+
const {
|
|
25
|
+
_internals: { loadDatabaseClass },
|
|
26
|
+
} = require("../social-bilibili-adb/chromium-cookies-reader");
|
|
27
|
+
|
|
28
|
+
const ARTICLE_TABLE = "article";
|
|
29
|
+
const READER_VERSION = "toutiao-article-0.1";
|
|
30
|
+
const TITLE_SUFFIX = /\s*-\s*今日头条\s*$/;
|
|
31
|
+
|
|
32
|
+
/** seconds-or-ms epoch → ms (heuristic: > 1e12 ⇒ already ms). */
|
|
33
|
+
function toEpochMs(v) {
|
|
34
|
+
const n = Number(v);
|
|
35
|
+
if (!Number.isFinite(n) || n <= 0) return null;
|
|
36
|
+
return n > 1e12 ? Math.floor(n) : Math.floor(n * 1000);
|
|
37
|
+
}
|
|
38
|
+
|
|
39
|
+
function safeParse(s) {
|
|
40
|
+
if (typeof s !== "string" || s.length < 2) return null;
|
|
41
|
+
try {
|
|
42
|
+
return JSON.parse(s);
|
|
43
|
+
} catch (_e) {
|
|
44
|
+
return null;
|
|
45
|
+
}
|
|
46
|
+
}
|
|
47
|
+
|
|
48
|
+
/** Title lives in share_info.title (or ext_json.title); strip the brand suffix. */
|
|
49
|
+
function extractTitle(row) {
|
|
50
|
+
const si = safeParse(row.share_info);
|
|
51
|
+
let title = si && (si.title || si.share_title);
|
|
52
|
+
if (!title) {
|
|
53
|
+
const ej = safeParse(row.ext_json);
|
|
54
|
+
title = ej && (ej.title || ej.share_title || (ej.article && ej.article.title));
|
|
55
|
+
}
|
|
56
|
+
if (typeof title !== "string" || !title.trim()) return null;
|
|
57
|
+
return title.replace(TITLE_SUFFIX, "").trim();
|
|
58
|
+
}
|
|
59
|
+
|
|
60
|
+
function extractUrl(row) {
|
|
61
|
+
const si = safeParse(row.share_info);
|
|
62
|
+
const u = (si && si.share_url) || row.share_url || null;
|
|
63
|
+
if (typeof u !== "string" || !u) return null;
|
|
64
|
+
// Drop the noisy share/tracking query so the same article dedups by url too.
|
|
65
|
+
return u.split("?")[0];
|
|
66
|
+
}
|
|
67
|
+
|
|
68
|
+
function extractCategory(row) {
|
|
69
|
+
const u = row.share_url || "";
|
|
70
|
+
const m = /[?&]category_new=([^&]+)/.exec(u);
|
|
71
|
+
return m ? decodeURIComponent(m[1]) : null;
|
|
72
|
+
}
|
|
73
|
+
|
|
74
|
+
/**
|
|
75
|
+
* Read article rows + parse them into structured records (pure once a Database
|
|
76
|
+
* class is injected). Newest-first.
|
|
77
|
+
*
|
|
78
|
+
* @returns {{articles: Array<{groupId,title,url,category,behotTime,readTimestamp,digg,repin}>}}
|
|
79
|
+
*/
|
|
80
|
+
function readToutiaoArticles(dbPath, opts = {}) {
|
|
81
|
+
const Database = opts._databaseClass || loadDatabaseClass();
|
|
82
|
+
const limit = Number.isInteger(opts.limit) && opts.limit > 0 ? opts.limit : 5000;
|
|
83
|
+
const db = new Database(dbPath, { readonly: true });
|
|
84
|
+
try {
|
|
85
|
+
const exists = db
|
|
86
|
+
.prepare("SELECT name FROM sqlite_master WHERE type='table' AND name=?")
|
|
87
|
+
.get(ARTICLE_TABLE);
|
|
88
|
+
if (!exists) return { articles: [] };
|
|
89
|
+
|
|
90
|
+
const cols = new Set(
|
|
91
|
+
db.prepare(`PRAGMA table_info("${ARTICLE_TABLE}")`).all().map((c) => c.name),
|
|
92
|
+
);
|
|
93
|
+
const hasBehot = cols.has("behot_time");
|
|
94
|
+
const rows = db
|
|
95
|
+
.prepare(
|
|
96
|
+
`SELECT * FROM "${ARTICLE_TABLE}"${hasBehot ? " ORDER BY behot_time DESC" : ""} LIMIT ${limit}`,
|
|
97
|
+
)
|
|
98
|
+
.all();
|
|
99
|
+
|
|
100
|
+
const articles = [];
|
|
101
|
+
for (const r of rows) {
|
|
102
|
+
const groupId =
|
|
103
|
+
r.group_id != null ? String(r.group_id) : r.item_id != null ? String(r.item_id) : null;
|
|
104
|
+
if (!groupId) continue;
|
|
105
|
+
const title = extractTitle(r);
|
|
106
|
+
if (!title) continue; // untitled cache rows carry no signal
|
|
107
|
+
articles.push({
|
|
108
|
+
groupId,
|
|
109
|
+
title,
|
|
110
|
+
url: extractUrl(r),
|
|
111
|
+
category: extractCategory(r),
|
|
112
|
+
behotTime: hasBehot ? toEpochMs(r.behot_time) : null,
|
|
113
|
+
readTimestamp: cols.has("read_timestamp") ? toEpochMs(r.read_timestamp) : null,
|
|
114
|
+
digg: cols.has("is_user_digg") ? !!r.is_user_digg : false,
|
|
115
|
+
repin: cols.has("is_user_repin") ? !!r.is_user_repin : false,
|
|
116
|
+
});
|
|
117
|
+
}
|
|
118
|
+
return { articles };
|
|
119
|
+
} finally {
|
|
120
|
+
try {
|
|
121
|
+
db.close();
|
|
122
|
+
} catch (_e) {
|
|
123
|
+
/* best-effort */
|
|
124
|
+
}
|
|
125
|
+
}
|
|
126
|
+
}
|
|
127
|
+
|
|
128
|
+
/**
|
|
129
|
+
* Map article records → BROWSE events tagged with the toutiao source.
|
|
130
|
+
* @returns {{events: object[]}}
|
|
131
|
+
*/
|
|
132
|
+
function buildArticleEvents(articles, opts = {}) {
|
|
133
|
+
const now = Number.isFinite(opts.now) ? opts.now : Date.now();
|
|
134
|
+
const events = [];
|
|
135
|
+
for (const a of articles || []) {
|
|
136
|
+
if (!a || !a.groupId || !a.title) continue;
|
|
137
|
+
const occurredAt =
|
|
138
|
+
(Number.isFinite(a.readTimestamp) && a.readTimestamp) ||
|
|
139
|
+
(Number.isFinite(a.behotTime) && a.behotTime) ||
|
|
140
|
+
now;
|
|
141
|
+
events.push({
|
|
142
|
+
id: newId(),
|
|
143
|
+
type: "event",
|
|
144
|
+
subtype: "browse",
|
|
145
|
+
occurredAt,
|
|
146
|
+
actor: "person-self",
|
|
147
|
+
content: { title: a.title, text: a.title },
|
|
148
|
+
ingestedAt: now,
|
|
149
|
+
source: {
|
|
150
|
+
adapter: "social-toutiao",
|
|
151
|
+
adapterVersion: READER_VERSION,
|
|
152
|
+
originalId: `social-toutiao:article:${a.groupId}`,
|
|
153
|
+
capturedAt: occurredAt,
|
|
154
|
+
capturedBy: "sqlite",
|
|
155
|
+
},
|
|
156
|
+
extra: {
|
|
157
|
+
platform: "toutiao",
|
|
158
|
+
kind: "article",
|
|
159
|
+
groupId: a.groupId,
|
|
160
|
+
url: a.url || null,
|
|
161
|
+
category: a.category || null,
|
|
162
|
+
digg: a.digg,
|
|
163
|
+
repin: a.repin,
|
|
164
|
+
read: Number.isFinite(a.readTimestamp) && a.readTimestamp > 0,
|
|
165
|
+
},
|
|
166
|
+
});
|
|
167
|
+
}
|
|
168
|
+
return { events };
|
|
169
|
+
}
|
|
170
|
+
|
|
171
|
+
/**
|
|
172
|
+
* Read news_article.db and write the article BROWSE events into the vault.
|
|
173
|
+
* @param {object} vault LocalVault (must expose putBatch)
|
|
174
|
+
* @param {string} dbPath path to news_article.db
|
|
175
|
+
*/
|
|
176
|
+
function articlesToVault(vault, dbPath, opts = {}) {
|
|
177
|
+
if (!vault || typeof vault.putBatch !== "function") {
|
|
178
|
+
throw new TypeError("articlesToVault: vault with putBatch required");
|
|
179
|
+
}
|
|
180
|
+
if (typeof dbPath !== "string" || !dbPath) {
|
|
181
|
+
throw new TypeError("articlesToVault: dbPath required");
|
|
182
|
+
}
|
|
183
|
+
const { articles } = readToutiaoArticles(dbPath, opts);
|
|
184
|
+
const built = buildArticleEvents(articles, opts);
|
|
185
|
+
const res = built.events.length
|
|
186
|
+
? vault.putBatch({ events: built.events })
|
|
187
|
+
: { events: 0 };
|
|
188
|
+
return {
|
|
189
|
+
ingested: res.events || 0,
|
|
190
|
+
articles: articles.length,
|
|
191
|
+
digg: articles.filter((a) => a.digg).length,
|
|
192
|
+
read: articles.filter((a) => Number.isFinite(a.readTimestamp) && a.readTimestamp > 0).length,
|
|
193
|
+
};
|
|
194
|
+
}
|
|
195
|
+
|
|
196
|
+
module.exports = {
|
|
197
|
+
ARTICLE_TABLE,
|
|
198
|
+
readToutiaoArticles,
|
|
199
|
+
buildArticleEvents,
|
|
200
|
+
articlesToVault,
|
|
201
|
+
_internals: { toEpochMs, extractTitle, extractUrl, extractCategory },
|
|
202
|
+
};
|
|
@@ -39,6 +39,18 @@ class OverviewSkill extends AnalysisSkill {
|
|
|
39
39
|
if (until != null) q.until = until;
|
|
40
40
|
const events = this.vault.queryEvents(q) || [];
|
|
41
41
|
|
|
42
|
+
// Accurate, uncapped app/type/total counts via SQL GROUP BY. queryEvents
|
|
43
|
+
// hard-caps at 10k rows, so deriving byApp/byType/total from `events`
|
|
44
|
+
// silently undercounts any app whose data is older than the recent-10k
|
|
45
|
+
// window — e.g. on a vault where one chat app dominates recent events,
|
|
46
|
+
// social-douyin showed 10 instead of its true 232. facetCounts honors the
|
|
47
|
+
// same since/until. (Row-derived spend/contacts/monthly stay sample-based —
|
|
48
|
+
// they need actual rows.)
|
|
49
|
+
const facets =
|
|
50
|
+
typeof this.vault.facetCounts === "function"
|
|
51
|
+
? this.vault.facetCounts({ since, until })
|
|
52
|
+
: null;
|
|
53
|
+
|
|
42
54
|
const byApp = new Map();
|
|
43
55
|
const byType = new Map();
|
|
44
56
|
const byMonth = new Map();
|
|
@@ -81,7 +93,11 @@ class OverviewSkill extends AnalysisSkill {
|
|
|
81
93
|
if (citations.length < 50) citations.push(e.id);
|
|
82
94
|
}
|
|
83
95
|
|
|
84
|
-
const byAppArr =
|
|
96
|
+
const byAppArr = (
|
|
97
|
+
facets
|
|
98
|
+
? Object.entries(facets.byAdapter).map(([app, count]) => ({ app, count }))
|
|
99
|
+
: [...byApp.entries()].map(([app, count]) => ({ app, count }))
|
|
100
|
+
).sort((a, b) => b.count - a.count);
|
|
85
101
|
const topContacts = [...contacts.entries()]
|
|
86
102
|
.map(([personId, v]) => ({
|
|
87
103
|
personId,
|
|
@@ -93,8 +109,8 @@ class OverviewSkill extends AnalysisSkill {
|
|
|
93
109
|
.slice(0, topN);
|
|
94
110
|
|
|
95
111
|
const summary = {
|
|
96
|
-
totalEvents: events.length,
|
|
97
|
-
appsActive: byApp.size,
|
|
112
|
+
totalEvents: facets ? facets.total : events.length,
|
|
113
|
+
appsActive: facets ? Object.keys(facets.byAdapter).length : byApp.size,
|
|
98
114
|
period: { since: since || null, until: until || null },
|
|
99
115
|
topAppName: byAppArr.length ? byAppArr[0].app : null,
|
|
100
116
|
};
|
|
@@ -103,7 +119,11 @@ class OverviewSkill extends AnalysisSkill {
|
|
|
103
119
|
skill: "analysis.overview",
|
|
104
120
|
summary,
|
|
105
121
|
byApp: byAppArr,
|
|
106
|
-
byType:
|
|
122
|
+
byType: (
|
|
123
|
+
facets
|
|
124
|
+
? Object.entries(facets.bySubtype).map(([type, count]) => ({ type, count }))
|
|
125
|
+
: [...byType.entries()].map(([type, count]) => ({ type, count }))
|
|
126
|
+
).sort((a, b) => b.count - a.count),
|
|
107
127
|
monthlyActivity: [...byMonth.entries()].map(([monthKey, count]) => ({ monthKey, count })).sort((a, b) => a.monthKey.localeCompare(b.monthKey)),
|
|
108
128
|
topContacts,
|
|
109
129
|
spending: {
|
|
@@ -30,6 +30,13 @@ const { AnalysisSkill } = require("./base");
|
|
|
30
30
|
|
|
31
31
|
const SUPPORTED_DIMENSIONS = new Set(["merchant", "category", "counterparty", "month"]);
|
|
32
32
|
|
|
33
|
+
// Event subtypes that carry content.amount (shared by the row fetch + the
|
|
34
|
+
// accurate SQL-sum path). Phase 7 shopping adapters emit "order".
|
|
35
|
+
const PAYMENT_SUBTYPES = [
|
|
36
|
+
"payment", "transfer", "refund", "utility",
|
|
37
|
+
"redenvelope", "investment", "income", "order",
|
|
38
|
+
];
|
|
39
|
+
|
|
33
40
|
class SpendingSkill extends AnalysisSkill {
|
|
34
41
|
constructor(opts) {
|
|
35
42
|
super({ ...opts, name: "analysis.spending" });
|
|
@@ -48,6 +55,19 @@ class SpendingSkill extends AnalysisSkill {
|
|
|
48
55
|
const filtered = this._applyFilters(events, options);
|
|
49
56
|
|
|
50
57
|
const summary = this._summarize(filtered, since, until);
|
|
58
|
+
// The row fetch caps at 5000 events PER subtype — a heavy alipay/wechat-pay
|
|
59
|
+
// user with >5000 payments would have their TOTAL silently undercounted.
|
|
60
|
+
// When no row-only filter is active (merchant text / personId / direction),
|
|
61
|
+
// recompute the headline totals from the uncapped SQL SUM. Breakdown / trend
|
|
62
|
+
// / citations stay row-sampled (they need actual rows).
|
|
63
|
+
const accurate = this._accurateTotals({ since, until }, options);
|
|
64
|
+
if (accurate) {
|
|
65
|
+
summary.totalSpend = accurate.totalSpend;
|
|
66
|
+
summary.totalIncome = accurate.totalIncome;
|
|
67
|
+
summary.netFlow = accurate.netFlow;
|
|
68
|
+
summary.eventCount = accurate.eventCount;
|
|
69
|
+
if (accurate.currency) summary.currency = accurate.currency;
|
|
70
|
+
}
|
|
51
71
|
const breakdown = this._breakdown(filtered, dimension, topN);
|
|
52
72
|
const trend = this._monthlyTrend(filtered);
|
|
53
73
|
const citations = filtered.slice(0, 50).map((e) => e.id);
|
|
@@ -72,8 +92,7 @@ class SpendingSkill extends AnalysisSkill {
|
|
|
72
92
|
// Phase 7 shopping adapters emit subtype="order" — must include so
|
|
73
93
|
// spending aggregates cover Taobao/JD/Meituan along with Alipay
|
|
74
94
|
// (payment/transfer) + Email (refund) etc.
|
|
75
|
-
const
|
|
76
|
-
for (const subtype of subtypes) {
|
|
95
|
+
for (const subtype of PAYMENT_SUBTYPES) {
|
|
77
96
|
const q = { subtype, limit: 5000 };
|
|
78
97
|
if (since != null) q.since = since;
|
|
79
98
|
if (until != null) q.until = until;
|
|
@@ -88,6 +107,48 @@ class SpendingSkill extends AnalysisSkill {
|
|
|
88
107
|
return events;
|
|
89
108
|
}
|
|
90
109
|
|
|
110
|
+
/**
|
|
111
|
+
* Accurate (uncapped) headline totals via vault.sumEventAmount — used only
|
|
112
|
+
* when the query has no filter SQL can't express. merchantFilter (text match
|
|
113
|
+
* on title/counterparty) and personId (participant expansion) need rows, and
|
|
114
|
+
* a direction filter changes which total/count is meaningful, so any of them
|
|
115
|
+
* → return null and fall back to the row-sampled summary. Returns null when
|
|
116
|
+
* the vault lacks sumEventAmount (older vault → original behavior).
|
|
117
|
+
*/
|
|
118
|
+
_accurateTotals({ since, until }, options) {
|
|
119
|
+
if (
|
|
120
|
+
(typeof options.merchantFilter === "string" && options.merchantFilter.length > 0) ||
|
|
121
|
+
(typeof options.personId === "string" && options.personId.length > 0) ||
|
|
122
|
+
options.direction === "out" ||
|
|
123
|
+
options.direction === "in"
|
|
124
|
+
) {
|
|
125
|
+
return null;
|
|
126
|
+
}
|
|
127
|
+
if (typeof this.vault.sumEventAmount !== "function") return null;
|
|
128
|
+
let totalSpend = 0;
|
|
129
|
+
let totalIncome = 0;
|
|
130
|
+
let eventCount = 0;
|
|
131
|
+
let currency = null;
|
|
132
|
+
for (const subtype of PAYMENT_SUBTYPES) {
|
|
133
|
+
const q = { subtype };
|
|
134
|
+
if (since != null) q.since = since;
|
|
135
|
+
if (until != null) q.until = until;
|
|
136
|
+
const r = this.vault.sumEventAmount(q);
|
|
137
|
+
if (!r) continue;
|
|
138
|
+
totalSpend += (r.byDirection && r.byDirection.out) || 0;
|
|
139
|
+
totalIncome += (r.byDirection && r.byDirection.in) || 0;
|
|
140
|
+
eventCount += r.count || 0;
|
|
141
|
+
if (!currency && r.count > 0 && r.currency) currency = r.currency;
|
|
142
|
+
}
|
|
143
|
+
return {
|
|
144
|
+
totalSpend: Math.round(totalSpend * 100) / 100,
|
|
145
|
+
totalIncome: Math.round(totalIncome * 100) / 100,
|
|
146
|
+
netFlow: Math.round((totalIncome - totalSpend) * 100) / 100,
|
|
147
|
+
eventCount,
|
|
148
|
+
currency,
|
|
149
|
+
};
|
|
150
|
+
}
|
|
151
|
+
|
|
91
152
|
_applyFilters(events, options) {
|
|
92
153
|
let out = events;
|
|
93
154
|
if (typeof options.merchantFilter === "string" && options.merchantFilter.length > 0) {
|
|
@@ -63,12 +63,17 @@ class TimelineSkill extends AnalysisSkill {
|
|
|
63
63
|
}
|
|
64
64
|
|
|
65
65
|
_fetchEvents({ since, until }, limit) {
|
|
66
|
-
// Exclude inventory-snapshot
|
|
67
|
-
// roster from system-data-android)
|
|
68
|
-
// collection-time occurredAt — tens of thousands
|
|
69
|
-
// recent timestamp and would
|
|
70
|
-
//
|
|
71
|
-
|
|
66
|
+
// Exclude inventory-snapshot + aggregate-baseline events. The snapshots
|
|
67
|
+
// (installed-app / contact roster from system-data-android) carry a
|
|
68
|
+
// synthetic collection-time occurredAt — tens of thousands cluster at one
|
|
69
|
+
// recent timestamp and would crowd out real activity. `app-usage-profile`
|
|
70
|
+
// is a single rolling aggregate (e.g. douyin "24天/108h" baseline), not a
|
|
71
|
+
// discrete activity, so it doesn't belong in a chronological narrative.
|
|
72
|
+
// All remain in the vault for facet counts / overview.
|
|
73
|
+
const q = {
|
|
74
|
+
limit,
|
|
75
|
+
excludeExtraKinds: ["app-snapshot", "contact-snapshot", "app-usage-profile"],
|
|
76
|
+
};
|
|
72
77
|
if (since != null) q.since = since;
|
|
73
78
|
if (until != null) q.until = until;
|
|
74
79
|
const events = this.vault.queryEvents(q) || [];
|
package/lib/query-parser.js
CHANGED
|
@@ -127,8 +127,16 @@ function parseTimeWindow(text, now = Date.now()) {
|
|
|
127
127
|
if (m) {
|
|
128
128
|
const n = parseInt(m[1], 10);
|
|
129
129
|
if (Number.isFinite(n) && n > 0) {
|
|
130
|
+
// Safe month subtraction. Naive `setMonth(getMonth()-n)` overflows on a
|
|
131
|
+
// month-end day into a shorter month (e.g. Mar 31 −1mo → "Feb 31" → Mar 3),
|
|
132
|
+
// which silently DROPS the whole previous month from the window. Pin to
|
|
133
|
+
// day 1 first, then clamp the day to the target month's length.
|
|
130
134
|
const target = new Date(now);
|
|
135
|
+
const day = target.getDate();
|
|
136
|
+
target.setDate(1);
|
|
131
137
|
target.setMonth(target.getMonth() - n);
|
|
138
|
+
const lastDay = new Date(target.getFullYear(), target.getMonth() + 1, 0).getDate();
|
|
139
|
+
target.setDate(Math.min(day, lastDay));
|
|
132
140
|
return { since: target.getTime(), until: now };
|
|
133
141
|
}
|
|
134
142
|
}
|
|
@@ -152,7 +160,10 @@ const SUBTYPE_KEYWORDS = [
|
|
|
152
160
|
{ subtype: "order", patterns: [/(订单|下单|买了|购买|下了几单|下了多少单|order)/i] },
|
|
153
161
|
{ subtype: "payment", patterns: [/(支付|付款|花了|花费|消费|开销|payment|spent|spend)/i] },
|
|
154
162
|
{ subtype: "transfer", patterns: [/(转账|转给|转钱|transfer)/i] },
|
|
155
|
-
|
|
163
|
+
// NOTE: bare 收到 ("receive") is deliberately NOT here — you 收到 messages /
|
|
164
|
+
// packages / 红包 too, so it stole "收到多少消息" → income (income is checked
|
|
165
|
+
// before message). 收到转账 still classifies as transfer (checked earlier).
|
|
166
|
+
{ subtype: "income", patterns: [/(收入|工资|进账|入账|income)/i] },
|
|
156
167
|
{ subtype: "message", patterns: [/(聊天|消息|聊了|对话|message|chat)/i] },
|
|
157
168
|
{ subtype: "post", patterns: [/(朋友圈|发了|动态|moment|post)/i] },
|
|
158
169
|
{ subtype: "visit", patterns: [/(去过|到过|visited|去了|来到)/i] },
|
|
@@ -201,20 +212,39 @@ function parseFilters(text) {
|
|
|
201
212
|
|
|
202
213
|
// ─── Intent detection (sum / count / list / latest / ...) ────────────────
|
|
203
214
|
|
|
215
|
+
// Amount/money words — BOTH the spend side (花/消费/开销/spent/金额) and the
|
|
216
|
+
// income side (收入/进账/到账/赚/挣). A question carrying one of these plus a
|
|
217
|
+
// "多少/how much" wants a SUM (sumEventAmount), not a row list.
|
|
218
|
+
const AMOUNT_HINT =
|
|
219
|
+
/(花|花了|花费|消费|开销|spent|金额|多少钱|amount|收入|进账|到账|入账|赚|挣)/;
|
|
220
|
+
// Count quantifier: "多少X" or "几X" for a measure word. 钱 is deliberately
|
|
221
|
+
// EXCLUDED so "多少钱" routes to sum-amount, not count. Symmetric 多少/几 (the
|
|
222
|
+
// old pattern had 几条/几单 but not 多少条/多少单, and 多少部 but not 几部).
|
|
223
|
+
const COUNT_QUANTIFIER =
|
|
224
|
+
/(多少|几)(次|条|单|个|家|人|张|部|篇|集|本|件|笔|顿|杯)|how\s+many|count\s+of/i;
|
|
225
|
+
const HOW_MUCH = /(多少钱|多少|how\s+much)/i;
|
|
226
|
+
|
|
204
227
|
function parseIntent(text) {
|
|
205
228
|
if (typeof text !== "string") return "list";
|
|
206
229
|
if (/(总共|共多少|加起来|sum|total|合计)/.test(text)) {
|
|
207
|
-
// Distinguish amount vs count by presence of
|
|
208
|
-
|
|
230
|
+
// Distinguish amount vs count by presence of amount words (incl. income,
|
|
231
|
+
// so "总共收入多少" is sum-amount, not count).
|
|
232
|
+
if (AMOUNT_HINT.test(text)) return "sum-amount";
|
|
209
233
|
return "count";
|
|
210
234
|
}
|
|
211
|
-
// Count
|
|
212
|
-
//
|
|
213
|
-
|
|
214
|
-
// hint to read authoritative TOTALS instead of the FACTS sample length).
|
|
215
|
-
if (/(多少次|几次|几条|几单|几个|多少个|多少家|多少人|多少张|多少部|how\s+many|count\s+of)/i.test(text)) {
|
|
235
|
+
// Count: 多少X / 几X for a measure word ("多少条朋友圈" / "下了几单" /
|
|
236
|
+
// "几个联系人"). Runs BEFORE the bare-sum rule so "消费了多少次" → count.
|
|
237
|
+
if (COUNT_QUANTIFIER.test(text)) {
|
|
216
238
|
return "count";
|
|
217
239
|
}
|
|
240
|
+
// Spend/income question without an explicit 总共/合计 — "(这个月)花了多少钱" /
|
|
241
|
+
// "在淘宝花了多少" / "这个月收入多少" / "赚了多少". The amount word + a
|
|
242
|
+
// "多少/how much" ⇒ a TOTAL. Without this these common phrasings fell through
|
|
243
|
+
// to intent=list and the engine returned a row sample, not the authoritative
|
|
244
|
+
// sumEventAmount total.
|
|
245
|
+
if (AMOUNT_HINT.test(text) && HOW_MUCH.test(text)) {
|
|
246
|
+
return "sum-amount";
|
|
247
|
+
}
|
|
218
248
|
if (/(最近|最新|latest|recent)/i.test(text)) return "latest";
|
|
219
249
|
return "list";
|
|
220
250
|
}
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@chainlesschain/personal-data-hub",
|
|
3
|
-
"version": "0.4.
|
|
3
|
+
"version": "0.4.29",
|
|
4
4
|
"description": "Personal Data Hub — UnifiedSchema + validators + KG ingest helpers for the data-back-to-the-individual middleware",
|
|
5
5
|
"type": "commonjs",
|
|
6
6
|
"main": "lib/index.js",
|