personal-ai 0.1.0 → 0.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/entry.mjs CHANGED
@@ -10,9 +10,9 @@ import { google } from "googleapis";
10
10
  import open from "open";
11
11
  import chalk from "chalk";
12
12
  import ora from "ora";
13
+ import { execFile } from "node:child_process";
13
14
  import JSON5 from "json5";
14
15
  import { z } from "zod";
15
- import { execFile } from "node:child_process";
16
16
  import matter from "gray-matter";
17
17
  import OpenAI from "openai";
18
18
  import { minimatch } from "minimatch";
@@ -21,6 +21,23 @@ import { Agent } from "@mariozechner/pi-agent-core";
21
21
  import { getModel } from "@mariozechner/pi-ai";
22
22
  import { Type } from "@sinclair/typebox";
23
23
 
24
+ //#region \0rolldown/runtime.js
25
+ var __defProp = Object.defineProperty;
26
+ var __exportAll = (all, no_symbols) => {
27
+ let target = {};
28
+ for (var name in all) {
29
+ __defProp(target, name, {
30
+ get: all[name],
31
+ enumerable: true
32
+ });
33
+ }
34
+ if (!no_symbols) {
35
+ __defProp(target, Symbol.toStringTag, { value: "Module" });
36
+ }
37
+ return target;
38
+ };
39
+
40
+ //#endregion
24
41
  //#region src/config/paths.ts
25
42
  /** Root data directory for pai */
26
43
  function getPaiHome() {
@@ -32,6 +49,20 @@ function getRawDir() {
32
49
  function getVaultDir() {
33
50
  return path.join(getPaiHome(), "vault");
34
51
  }
52
+ function getMemoryDir() {
53
+ return path.join(getPaiHome(), "memory");
54
+ }
55
+ function getWeeklyDir() {
56
+ return path.join(getMemoryDir(), "weekly");
57
+ }
58
+ /** Get the journal file path for a given date (YYYY-MM-DD) */
59
+ function getJournalPath(date) {
60
+ return path.join(getMemoryDir(), `${date}.md`);
61
+ }
62
+ /** Get today's date as YYYY-MM-DD */
63
+ function getTodayDate() {
64
+ return (/* @__PURE__ */ new Date()).toISOString().split("T")[0];
65
+ }
35
66
  function getSkillsDir() {
36
67
  return path.join(getPaiHome(), "skills", "profiles");
37
68
  }
@@ -54,6 +85,10 @@ function getProfilePath() {
54
85
 
55
86
  //#endregion
56
87
  //#region src/auth/encryption.ts
88
+ var encryption_exports = /* @__PURE__ */ __exportAll({
89
+ Encryption: () => Encryption,
90
+ encryption: () => encryption
91
+ });
57
92
  const ALGORITHM = "aes-256-gcm";
58
93
  const KEY_LENGTH = 32;
59
94
  const IV_LENGTH = 16;
@@ -150,13 +185,17 @@ function bold(message) {
150
185
 
151
186
  //#endregion
152
187
  //#region src/auth/google-oauth.ts
188
+ var google_oauth_exports = /* @__PURE__ */ __exportAll({
189
+ GoogleOAuth: () => GoogleOAuth,
190
+ googleOAuth: () => googleOAuth
191
+ });
153
192
  const SCOPES = [
154
193
  "https://www.googleapis.com/auth/gmail.readonly",
155
194
  "https://www.googleapis.com/auth/gmail.send",
156
195
  "https://www.googleapis.com/auth/calendar.readonly"
157
196
  ];
158
197
  const REDIRECT_URI = "http://localhost:8888/callback";
159
- const CALLBACK_TIMEOUT_MS = 120 * 1e3;
198
+ const CALLBACK_TIMEOUT_MS = 300 * 1e3;
160
199
  /**
161
200
  * Embedded OAuth client credentials for Desktop/CLI app.
162
201
  * Google explicitly states that for "installed" (desktop) applications,
@@ -276,6 +315,12 @@ var GoogleOAuth = class {
276
315
  if (!this.oauth2Client) throw new Error("OAuth client not initialized");
277
316
  return this.oauth2Client;
278
317
  }
318
+ /** Returns true if user has valid credentials (access_token or refresh_token). */
319
+ isAuthenticated() {
320
+ if (!this.oauth2Client) return false;
321
+ const creds = this.oauth2Client.credentials;
322
+ return Boolean(creds.access_token || creds.refresh_token);
323
+ }
279
324
  async ensureAuthenticated() {
280
325
  if (!this.oauth2Client || !this.oauth2Client.credentials.access_token) throw new Error("Not authenticated. Run: pai auth google");
281
326
  const now = Date.now();
@@ -505,6 +550,14 @@ function updateRawFrontmatter(fileContent, updates) {
505
550
 
506
551
  //#endregion
507
552
  //#region src/raw/add.ts
553
+ var add_exports = /* @__PURE__ */ __exportAll({
554
+ addConnectorEntry: () => addConnectorEntry,
555
+ addFile: () => addFile,
556
+ addText: () => addText,
557
+ addUrl: () => addUrl,
558
+ listAll: () => listAll,
559
+ listPending: () => listPending
560
+ });
508
561
  /** Add plain text content to raw/local/ */
509
562
  async function addText(content, source = "local") {
510
563
  const title = extractTitle$1(content);
@@ -1529,10 +1582,40 @@ async function rebuildProfile(opts) {
1529
1582
 
1530
1583
  //#endregion
1531
1584
  //#region src/cli/register.init.ts
1532
- /** Create directory structure, write default configs, register QMD, scan & compile profile. */
1585
+ /** Try to install QMD globally via npm */
1586
+ async function installQmd() {
1587
+ const spin = spinner("Installing QMD (search engine)...");
1588
+ return new Promise((resolve) => {
1589
+ execFile("npm", [
1590
+ "install",
1591
+ "-g",
1592
+ "https://github.com/tobi/qmd"
1593
+ ], {
1594
+ encoding: "utf-8",
1595
+ timeout: 12e4
1596
+ }, (err) => {
1597
+ if (err) {
1598
+ spin.fail("QMD installation failed.");
1599
+ warn("Install manually: npm install -g https://github.com/tobi/qmd");
1600
+ resolve(false);
1601
+ } else {
1602
+ spin.succeed("QMD installed.");
1603
+ resolve(true);
1604
+ }
1605
+ });
1606
+ });
1607
+ }
1608
+ const TOTAL_STEPS = 8;
1609
+ /** Create directory structure, write default configs, register QMD, scan, profile, Google import, index. */
1533
1610
  async function runInit(options = {}) {
1534
- const { overwriteConfig = false, skipScan = false } = options;
1611
+ const { overwriteConfig = false, skipScan = false, nonInteractive = false } = options;
1535
1612
  const paiHome = getPaiHome();
1613
+ let profileLines = 0;
1614
+ let profileSources = 0;
1615
+ let gmailCount = 0;
1616
+ let calendarCount = 0;
1617
+ let googleAuthed = false;
1618
+ let qmdReady = false;
1536
1619
  const dirs = [
1537
1620
  path.join(getRawDir(), "local"),
1538
1621
  path.join(getRawDir(), "web"),
@@ -1542,6 +1625,8 @@ async function runInit(options = {}) {
1542
1625
  path.join(getVaultDir(), "life"),
1543
1626
  path.join(getVaultDir(), "preferences"),
1544
1627
  path.join(getVaultDir(), "context"),
1628
+ getMemoryDir(),
1629
+ getWeeklyDir(),
1545
1630
  getSkillsDir(),
1546
1631
  getConfigDir()
1547
1632
  ];
@@ -1574,7 +1659,10 @@ async function runInit(options = {}) {
1574
1659
  success("Created preferences.md — edit to customize AI behavior.");
1575
1660
  }
1576
1661
  }
1577
- if (await isQmdAvailable()) {
1662
+ success(`[1/${TOTAL_STEPS}] Creating directories & config...`);
1663
+ qmdReady = await isQmdAvailable();
1664
+ if (!qmdReady) qmdReady = await installQmd();
1665
+ if (qmdReady) {
1578
1666
  try {
1579
1667
  await execQmd([
1580
1668
  "collection",
@@ -1603,44 +1691,124 @@ async function runInit(options = {}) {
1603
1691
  } catch {
1604
1692
  warn("QMD collection 'vault' may already exist, skipping.");
1605
1693
  }
1606
- } else {
1607
- warn("QMD not found. Install with: npm install -g https://github.com/tobi/qmd");
1608
- warn("Search features won't work until QMD is installed.");
1609
1694
  }
1610
- if (!skipScan) {
1695
+ if (qmdReady) success(`[2/${TOTAL_STEPS}] Search engine (QMD) ready.`);
1696
+ else warn(`[2/${TOTAL_STEPS}] Search engine (QMD) not available; built-in search will be used.`);
1697
+ if (skipScan) {
1611
1698
  log("");
1699
+ info("Directory structure:");
1700
+ log(` ${paiHome}/profile.md — (skipped; run pai profile --rebuild)`);
1701
+ log(` ${paiHome}/raw/ — raw data input`);
1702
+ log(` ${paiHome}/vault/ — PINData knowledge store`);
1703
+ log(` ${paiHome}/memory/ — daily journal & digests`);
1704
+ log(` ${paiHome}/config/ — configuration`);
1705
+ info("Run \"pai profile --rebuild\" to scan and build your profile.");
1706
+ return;
1707
+ }
1708
+ try {
1612
1709
  const { profilePath, results } = await rebuildProfile({ verbose: true });
1613
- success(`Profile compiled → ${profilePath}`);
1614
- const sectionCount = results.length;
1615
- log("");
1616
- info(`Profile built from ${sectionCount} data sources:`);
1617
- for (const r of results) {
1618
- const lineCount = r.content.split("\n").length;
1619
- log(` ${bold(r.id)} — ${r.title} (${lineCount} lines)`);
1710
+ profileSources = results.length;
1711
+ try {
1712
+ profileLines = (await fs.readFile(profilePath, "utf-8")).split("\n").length;
1713
+ } catch {
1714
+ profileLines = 0;
1620
1715
  }
1716
+ success(`[3/${TOTAL_STEPS}] Scanning your machine (${profileSources} sources)...`);
1717
+ } catch (err) {
1718
+ const msg = err instanceof Error ? err.message : String(err);
1719
+ warn(`[3/${TOTAL_STEPS}] Scan failed: ${msg}`);
1621
1720
  }
1721
+ success(`[4/${TOTAL_STEPS}] Compiling profile...`);
1722
+ if (!nonInteractive) try {
1723
+ const { encryption } = await Promise.resolve().then(() => encryption_exports);
1724
+ const { googleOAuth } = await Promise.resolve().then(() => google_oauth_exports);
1725
+ await encryption.loadKey();
1726
+ await googleOAuth.init();
1727
+ if (googleOAuth.isAuthenticated()) {
1728
+ googleAuthed = true;
1729
+ success(`[5/${TOTAL_STEPS}] Connecting Google account... (already connected)`);
1730
+ } else {
1731
+ await googleOAuth.authorize();
1732
+ googleAuthed = true;
1733
+ success(`[5/${TOTAL_STEPS}] Connecting Google account...`);
1734
+ }
1735
+ } catch (err) {
1736
+ const msg = err instanceof Error ? err.message : String(err);
1737
+ warn(`[5/${TOTAL_STEPS}] Google auth skipped: ${msg}`);
1738
+ warn("Run \"pai auth google\" later to connect Gmail and Calendar.");
1739
+ }
1740
+ else info(`[5/${TOTAL_STEPS}] Connecting Google account... (skipped, non-interactive)`);
1741
+ if (googleAuthed) {
1742
+ const spinGmail = spinner(`[6/${TOTAL_STEPS}] Importing Gmail (latest 100)...`);
1743
+ try {
1744
+ const { syncGmail } = await import("./gmail-BhGX6az1.mjs");
1745
+ const { addConnectorEntry } = await Promise.resolve().then(() => add_exports);
1746
+ const entries = await syncGmail({
1747
+ days: 365,
1748
+ maxResults: 100
1749
+ });
1750
+ for (const entry of entries) {
1751
+ const status = await addConnectorEntry("gmail", entry);
1752
+ if (status === "created" || status === "updated") gmailCount++;
1753
+ }
1754
+ spinGmail.succeed(`[6/${TOTAL_STEPS}] Importing Gmail... ${gmailCount} message(s)`);
1755
+ } catch (err) {
1756
+ const msg = err instanceof Error ? err.message : String(err);
1757
+ spinGmail.fail(`[6/${TOTAL_STEPS}] Gmail import failed: ${msg}`);
1758
+ }
1759
+ } else info(`[6/${TOTAL_STEPS}] Importing Gmail... (skipped, no Google auth)`);
1760
+ if (googleAuthed) {
1761
+ const spinCal = spinner(`[7/${TOTAL_STEPS}] Importing Calendar...`);
1762
+ try {
1763
+ const { syncCalendar } = await import("./calendar-DGJSyErS.mjs");
1764
+ const { addConnectorEntry } = await Promise.resolve().then(() => add_exports);
1765
+ const entries = await syncCalendar({
1766
+ lookbackDays: 30,
1767
+ lookforwardDays: 90
1768
+ });
1769
+ for (const entry of entries) {
1770
+ const status = await addConnectorEntry("calendar", entry);
1771
+ if (status === "created" || status === "updated") calendarCount++;
1772
+ }
1773
+ spinCal.succeed(`[7/${TOTAL_STEPS}] Importing Calendar... ${calendarCount} event(s)`);
1774
+ } catch (err) {
1775
+ const msg = err instanceof Error ? err.message : String(err);
1776
+ spinCal.fail(`[7/${TOTAL_STEPS}] Calendar import failed: ${msg}`);
1777
+ }
1778
+ } else info(`[7/${TOTAL_STEPS}] Importing Calendar... (skipped, no Google auth)`);
1779
+ if (qmdReady) try {
1780
+ const { updateIndex } = await Promise.resolve().then(() => search_exports);
1781
+ await updateIndex();
1782
+ success(`[8/${TOTAL_STEPS}] Indexing for search...`);
1783
+ } catch (err) {
1784
+ const msg = err instanceof Error ? err.message : String(err);
1785
+ warn(`[8/${TOTAL_STEPS}] Indexing failed: ${msg}. Built-in search still works.`);
1786
+ }
1787
+ else info(`[8/${TOTAL_STEPS}] Indexing for search... (skipped, QMD not available)`);
1622
1788
  log("");
1623
- info("Directory structure:");
1624
- log(` ${paiHome}/profile.md ${skipScan ? "(skipped scan)" : "your personal profile"}`);
1625
- log(` ${paiHome}/raw/ — raw data input`);
1626
- log(` ${paiHome}/vault/ — distilled knowledge`);
1627
- log(` ${paiHome}/config/ — configuration`);
1789
+ success("Setup complete!");
1790
+ log(` Profile: ${paiHome}/profile.md (${profileLines} lines, ${profileSources} sources)`);
1791
+ const rawCount = gmailCount + calendarCount;
1792
+ if (rawCount > 0) log(` Raw data: ${rawCount} file(s) (${gmailCount} gmail, ${calendarCount} calendar)`);
1793
+ else log(` Raw data: ${paiHome}/raw/`);
1794
+ log(` Search: ${qmdReady ? "QMD hybrid ready" : "built-in keyword (install QMD for semantic)"}`);
1795
+ log(` Google: ${googleAuthed ? "connected" : "not connected"}`);
1628
1796
  log("");
1629
- if (skipScan) info("Run \"pai profile --rebuild\" to scan and build your profile.");
1630
- else {
1631
- info("Run \"pai distribute\" to deploy your profile to Cursor/agents.");
1632
- info("Run \"pai profile\" to view your profile anytime.");
1633
- }
1797
+ info("Next:");
1798
+ log(" pai profile # View your profile");
1799
+ log(" pai distribute # Deploy to Cursor/Claude");
1800
+ log(" pai ask \"question\" # Ask about yourself");
1634
1801
  }
1635
1802
  function registerInitCommand(program) {
1636
- program.command("init").description("Initialize pai: create dirs, scan local machine, compile profile").option("--skip-scan", "Skip local machine scan (for CI/testing)").action(async (opts) => {
1803
+ program.command("init").description("Initialize pai: create dirs, scan, profile, Google import, index (full pipeline)").option("--skip-scan", "Skip scan and steps 3-8 (CI/testing)").option("--non-interactive", "Skip Google OAuth (for agents; no browser)").action(async (opts) => {
1637
1804
  const paiHome = getPaiHome();
1638
1805
  const spin = spinner(`Initializing pai at ${paiHome}...`);
1639
1806
  try {
1640
1807
  spin.succeed(`Initializing pai at ${paiHome}`);
1641
1808
  await runInit({
1642
1809
  overwriteConfig: false,
1643
- skipScan: opts.skipScan
1810
+ skipScan: opts.skipScan,
1811
+ nonInteractive: opts.nonInteractive
1644
1812
  });
1645
1813
  } catch (err) {
1646
1814
  spin.fail("Initialization failed");
@@ -1805,239 +1973,442 @@ async function llmCall(prompt, system, model) {
1805
1973
  }
1806
1974
 
1807
1975
  //#endregion
1808
- //#region src/prompts/triage.ts
1809
- /** Build system prompt for the triage step */
1810
- function triageSystemPrompt() {
1811
- return `You are a personal knowledge management assistant.
1812
- Your job is to evaluate raw input and extract ALL distinct pieces of personal context worth preserving.
1976
+ //#region src/prompts/extract.ts
1977
+ /** Build system prompt for the PINData extract step (replaces triage + distill) */
1978
+ function extractSystemPrompt() {
1979
+ return `Extract PINData entries from the input. Each entry is ONE atomic piece of personal knowledge.
1813
1980
 
1814
- CRITICAL: One raw input often contains MULTIPLE types of data. You MUST split them into separate entries routed to different vault files.
1981
+ Entry types:
1982
+ - fact: concrete data point (name, number, date, account, version, error, relationship)
1983
+ - pref: implied choice or habit (tool preference, workflow pattern, interest signal)
1984
+ - decision: explicit decision with reasoning
1985
+ - entity: person, organization, or project with attributes
1986
+ - event: time-bound occurrence (meeting, deployment, payment, conference)
1815
1987
 
1816
- Two kinds of valuable content:
1988
+ Topic routing (pick the best fit):
1989
+ - context/identity — user's name, email, linked accounts
1990
+ - context/projects — project/repo/app names, deployments, tech stack
1991
+ - context/services — services/tools used, account IDs, subscriptions
1992
+ - preferences/tools — tool choices and why (e.g. Vercel for hosting, Airtable for PM)
1993
+ - preferences/workflow — work habits, communication style, patterns
1994
+ - work/activity — tasks, assignments, colleagues, meetings
1995
+ - work/finance — invoices, payments, amounts, subscriptions
1996
+ - life/interests — interests, events, communities, conferences
1997
+ - coding/lessons — coding lessons, debugging insights, tech decisions
1817
1998
 
1818
- 1) EXPERIENTIAL: concrete lessons, preferences, tips, or experiences.
1819
- 2) SYSTEM CONTEXT: data from system/connector scans identity, environment, habits, preferences.
1999
+ Rules:
2000
+ - Each entry.content = ONE concrete fact with names/numbers/dates. Never generic advice.
2001
+ - Return [] (empty array) if input is pure marketing/spam with zero personal signal.
2002
+ - Prefer multiple small entries over one big entry.
2003
+ - Do NOT extract generic platitudes like "monitor deployments regularly".
1820
2004
 
1821
- Routing targets:
1822
- - vault/context/identity.md — name, email, role, languages, locale, timezone
1823
- - vault/context/active-projects.md — repos, cloud infra, SSH hosts
1824
- - vault/preferences/tools.md — tech stack, IDEs, runtimes, package managers
1825
- - vault/preferences/workflow.md — shell habits, git config, aliases, dev workflow
1826
- - vault/life/interests.md — interests, domains of focus, bookmarks, browsing patterns
1827
- - vault/life/lifestyle.md — calendar, apps, music, media, daily routines
1828
- - vault/coding/*.md — coding lessons (use existing or suggest new)
1829
- - vault/work/*.md — work-related lessons and context
2005
+ Examples:
1830
2006
 
1831
- Do NOT mark as valuable: generic news, ads, or content with no personal signal.
2007
+ Input: "Vercel: Failed deployment for pin-sandman on team pinai"
2008
+ Output: [
2009
+ {"type":"fact","content":"pin-sandman: Vercel production deployment failed, team pinai","topic":"context/projects","tags":["vercel","pin-sandman"]},
2010
+ {"type":"pref","content":"Vercel: used for production deployment","topic":"preferences/tools","tags":["vercel"]}
2011
+ ]
1832
2012
 
1833
- Respond ONLY with valid JSON (no markdown fences):
1834
- {
1835
- "valuable": true/false,
1836
- "entries": [
1837
- { "targetFile": "vault/context/identity.md", "extract": "name, email, role, languages" },
1838
- { "targetFile": "vault/preferences/tools.md", "extract": "IDE, runtimes, package managers" }
1839
- ],
1840
- "reason": "brief explanation"
1841
- }
2013
+ Input: "50% off all shoes this weekend only!"
2014
+ Output: []
1842
2015
 
1843
- Rules:
1844
- - entries array can have 1-6 items — split aggressively by topic
1845
- - Each entry.extract is a SHORT directive telling the distill step WHAT to pull for that target
1846
- - If not valuable, entries should be empty []
1847
- - Use existing vault files from the list when available`;
2016
+ Respond with valid JSON array only (no markdown fences).`;
1848
2017
  }
1849
- /** Build user prompt for triage */
1850
- function triageUserPrompt(rawContent, vaultFiles) {
1851
- return `Evaluate this raw input and decide if it's worth distilling:
2018
+ /** Build user prompt for extract */
2019
+ function extractUserPrompt(rawContent, source) {
2020
+ return `Extract PINData entries from this input:
1852
2021
 
1853
- ---RAW CONTENT---
2022
+ ---INPUT---
1854
2023
  ${rawContent}
1855
- ---END RAW CONTENT---
1856
- ${vaultFiles.length > 0 ? `\nExisting vault files:\n${vaultFiles.map((f) => `- ${f}`).join("\n")}` : "\nNo existing vault files yet."}
2024
+ ---END INPUT---
2025
+ ${source ? `\n(Source: ${source})` : ""}
2026
+ Respond with JSON array only.`;
2027
+ }
2028
+ /** Build system prompt for the daily digest */
2029
+ function digestSystemPrompt() {
2030
+ return `Summarize the day's journal entries and extracted knowledge into 3-5 key bullet points.
2031
+
2032
+ Rules:
2033
+ - Each bullet = one concrete takeaway (what happened, what was decided, what matters)
2034
+ - Include names, projects, and specific details — never vague
2035
+ - If there are action items or follow-ups, list them
2036
+ - Keep it under 200 words total
2037
+ - Output plain markdown bullets only, no JSON`;
2038
+ }
2039
+ /** Build user prompt for digest */
2040
+ function digestUserPrompt(journalContent, newPinData, date) {
2041
+ return `Summarize this day (${date}):
2042
+
2043
+ ---JOURNAL---
2044
+ ${journalContent || "(no journal entries)"}
2045
+ ---END JOURNAL---
2046
+
2047
+ ---NEW KNOWLEDGE EXTRACTED---
2048
+ ${newPinData || "(no new entries)"}
2049
+ ---END KNOWLEDGE---
1857
2050
 
1858
- Route to context/, preferences/, life/, coding/, or work/ as appropriate. Respond with JSON only.`;
2051
+ Write 3-5 bullet points summarizing the day.`;
1859
2052
  }
1860
2053
 
1861
2054
  //#endregion
1862
- //#region src/distill/triage.ts
1863
- /** Run triage on a raw file to determine if it's worth distilling */
1864
- async function triageRawFile(rawContent, vaultFiles) {
1865
- const system = triageSystemPrompt();
1866
- const response = await llmCall(triageUserPrompt(rawContent, vaultFiles), system);
2055
+ //#region src/distill/extract.ts
2056
+ const VALID_TYPES = new Set([
2057
+ "fact",
2058
+ "pref",
2059
+ "decision",
2060
+ "entity",
2061
+ "event"
2062
+ ]);
2063
+ /** Extract PINData entries from raw/journal content via a single LLM call */
2064
+ async function extractPinData(content, source) {
2065
+ const system = extractSystemPrompt();
2066
+ const response = await llmCall(extractUserPrompt(content, source), system);
1867
2067
  try {
1868
2068
  const cleaned = response.replace(/```json?\n?/g, "").replace(/```/g, "").trim();
1869
2069
  const raw = JSON.parse(cleaned);
1870
- let entries = [];
1871
- if (Array.isArray(raw.entries)) entries = raw.entries.filter((e) => typeof e.targetFile === "string" && typeof e.extract === "string").map((e) => ({
1872
- targetFile: e.targetFile,
1873
- extract: e.extract
1874
- }));
1875
- if (entries.length === 0 && typeof raw.targetFile === "string" && raw.targetFile) entries = [{
1876
- targetFile: raw.targetFile,
1877
- extract: "all relevant content"
1878
- }];
2070
+ let rawEntries;
2071
+ if (Array.isArray(raw)) rawEntries = raw;
2072
+ else if (raw && typeof raw === "object" && "entries" in raw && Array.isArray(raw.entries)) rawEntries = raw.entries;
2073
+ else return {
2074
+ entries: [],
2075
+ summary: ""
2076
+ };
2077
+ const entries = [];
2078
+ for (const item of rawEntries) {
2079
+ if (!item || typeof item !== "object") continue;
2080
+ const obj = item;
2081
+ const type = obj.type;
2082
+ const content = obj.content;
2083
+ const topic = obj.topic;
2084
+ if (!type || !content || !topic) continue;
2085
+ if (!VALID_TYPES.has(type)) continue;
2086
+ entries.push({
2087
+ type,
2088
+ content: content.trim(),
2089
+ topic: topic.trim(),
2090
+ tags: Array.isArray(obj.tags) ? obj.tags.filter((t) => typeof t === "string") : void 0
2091
+ });
2092
+ }
1879
2093
  return {
1880
- valuable: Boolean(raw.valuable),
1881
2094
  entries,
1882
- reason: raw.reason ?? ""
2095
+ summary: entries.length > 0 ? entries.slice(0, 3).map((e) => e.content).join("; ") : "no extractable signal"
1883
2096
  };
1884
2097
  } catch {
1885
2098
  return {
1886
- valuable: false,
1887
2099
  entries: [],
1888
- reason: `Failed to parse triage response: ${response.slice(0, 100)}`
2100
+ summary: `Failed to parse extract response: ${response.slice(0, 100)}`
1889
2101
  };
1890
2102
  }
1891
2103
  }
1892
2104
 
1893
2105
  //#endregion
1894
- //#region src/prompts/distill.ts
1895
- /** Build system prompt for the distill & merge step */
1896
- function distillSystemPrompt() {
1897
- return `You are a personal knowledge distillation assistant.
1898
- Your job is to extract and merge valuable personal context from raw input into a vault document.
1899
-
1900
- Two modes:
1901
-
1902
- A) EXPERIENTIAL content (lessons, tips, experiences):
1903
- - Extract ONLY conclusions and actionable knowledge never copy raw text verbatim
1904
- - If a similar experience already exists, increment its verification count
1905
- - Each bullet: (source, date | ref: raw/path)
1906
-
1907
- B) SYSTEM CONTEXT content (identity, environment, habits, bookmarks, etc.):
1908
- - Do NOT dump raw lists verbatim (e.g. every package path or every bookmark folder)
1909
- - Synthesize into a compact, HIGH-DENSITY profile:
1910
- - Identity: name, role, languages, locale, timezone — one line each
1911
- - Tools: summarize tech stack concisely (e.g. "Full-stack: Node/Python/Go/Rust, IDEs: Cursor+Android Studio")
1912
- - Workflow: summarize habits (e.g. "Heavy git user (4762 commands), frequent claude CLI, command-line focused")
1913
- - Life: summarize calendar themes, main apps, browsing focus — with concrete data
1914
- - Interests: summarize bookmark categories AND top domains as interest tags (e.g. "AI/LLM, GIS, iOS, Python, Web3")
1915
- - Projects: summarize active areas with names (e.g. "PINAI (PIN-APP-IOS, PIN-AGENT-WEB), consulting, agent-market")
1916
-
1917
- CRITICAL RULES:
1918
- - NEVER write "未提供", "未指定", "Not specified", "Unknown" — if data is not in the raw input, simply OMIT that field
1919
- - Only include sections and fields that have ACTUAL data from the raw input
1920
- - Do NOT create empty placeholder sections. If an H2 section would be empty, skip it entirely
1921
- - Each vault file handles ONE topic — only include relevant data from the raw input
1922
- - Keep information density HIGH: pack maximum facts per line
1923
- - Maintain H2 (##) section structure
1924
- - Keep each H2 section between 200-800 tokens
1925
- - Preserve all existing vault content — only add or update, never remove
1926
- - Output the COMPLETE updated vault file content (not just the changes)
1927
- - End each new bullet with source ref: (source, date | ref: raw/path/to/file.md)`;
1928
- }
1929
- /** Build user prompt for distill & merge */
1930
- function distillUserPrompt(rawContent, rawFilePath, existingVaultContent, extractDirective) {
1931
- return `Distill the following raw input and merge into the vault document:
1932
-
1933
- ---RAW CONTENT (from: ${rawFilePath})---
1934
- ${rawContent}
1935
- ---END RAW CONTENT---
1936
-
1937
- ${existingVaultContent ? `---EXISTING VAULT DOCUMENT---\n${existingVaultContent}\n---END VAULT DOCUMENT---` : "This is a NEW vault document. Create it with appropriate H1 title and H2 sections."}
1938
- ${extractDirective ? `\nFOCUS: Only extract data related to: ${extractDirective}. Ignore unrelated content in the raw input.` : ""}
1939
- Output the COMPLETE updated vault file content.
1940
- Rules: synthesize into high-density profile. NEVER write "未提供"/"Not specified"/"Unknown" — omit fields with no data instead. Only include sections with actual data.`;
2106
+ //#region src/vault/writer.ts
2107
+ /** Format a PINData entry as a markdown bullet line */
2108
+ function formatEntry(entry, date, ref) {
2109
+ const parts = [];
2110
+ parts.push(date);
2111
+ if (ref) parts.push(`ref:${ref}`);
2112
+ const meta = parts.join(" | ");
2113
+ return `- [${entry.type}] ${entry.content} (${meta})`;
2114
+ }
2115
+ /** Parse a PINData line back into structured data */
2116
+ function parsePinDataLine(line) {
2117
+ const match = line.match(/^- \[(\w+)\] (.+?) \((\d{4}-\d{2}-\d{2})([^)]*)\)$/);
2118
+ if (!match) return null;
2119
+ const verifiedMatch = (match[4] ?? "").match(/verified:(\d+)/);
2120
+ return {
2121
+ type: match[1],
2122
+ content: match[2],
2123
+ date: match[3],
2124
+ verified: verifiedMatch ? parseInt(verifiedMatch[1], 10) : void 0
2125
+ };
2126
+ }
2127
+ /** Check if two PINData entries are semantically duplicate (same type + similar content) */
2128
+ function isDuplicate(existingContent, newEntry) {
2129
+ const lines = existingContent.split("\n");
2130
+ const newLower = newEntry.content.toLowerCase();
2131
+ const newTokens = new Set(newLower.split(/[\s:,;]+/).filter((t) => t.length > 2));
2132
+ for (let i = 0; i < lines.length; i++) {
2133
+ const line = lines[i];
2134
+ if (!line.startsWith(`- [${newEntry.type}]`)) continue;
2135
+ const parsed = parsePinDataLine(line);
2136
+ if (!parsed) continue;
2137
+ const existLower = parsed.content.toLowerCase();
2138
+ const existTokens = new Set(existLower.split(/[\s:,;]+/).filter((t) => t.length > 2));
2139
+ const intersection = [...newTokens].filter((t) => existTokens.has(t));
2140
+ const union = new Set([...newTokens, ...existTokens]);
2141
+ if ((union.size > 0 ? intersection.length / union.size : 0) >= .6) return {
2142
+ lineIndex: i,
2143
+ line
2144
+ };
2145
+ }
2146
+ return null;
1941
2147
  }
1942
-
1943
- //#endregion
1944
- //#region src/distill/merge.ts
1945
2148
  /**
1946
- * Distill raw content and merge into a vault file.
1947
- * Creates the vault file if it doesn't exist.
1948
- * @param extractDirective - Optional hint telling LLM what to extract from raw content for this target
2149
+ * Append PINData entries to the appropriate vault files.
2150
+ * - Routes each entry to vault/{topic}.md based on entry.topic
2151
+ * - Creates file + H1 header if new
2152
+ * - Deduplicates: if similar entry exists, bumps verified count + updates date
2153
+ * Returns number of new entries added and duplicates updated.
1949
2154
  */
1950
- async function distillAndMerge(rawContent, rawFilePath, targetFile, extractDirective) {
2155
+ async function appendPinData(entries, options = {}) {
1951
2156
  const vaultDir = getVaultDir();
1952
- const relativePath = targetFile.replace(/^vault\//, "");
1953
- const fullPath = path.join(vaultDir, relativePath);
1954
- let existingContent = null;
1955
- try {
1956
- existingContent = await fs.readFile(fullPath, "utf-8");
1957
- } catch {}
1958
- const system = distillSystemPrompt();
1959
- const updatedContent = await llmCall(distillUserPrompt(rawContent, rawFilePath, existingContent, extractDirective), system);
1960
- await fs.mkdir(path.dirname(fullPath), { recursive: true });
1961
- await fs.writeFile(fullPath, updatedContent.trim() + "\n", "utf-8");
1962
- return fullPath;
2157
+ const date = options.date ?? (/* @__PURE__ */ new Date()).toISOString().split("T")[0];
2158
+ let added = 0;
2159
+ let updated = 0;
2160
+ const byTopic = /* @__PURE__ */ new Map();
2161
+ for (const entry of entries) {
2162
+ const topic = entry.topic;
2163
+ if (!byTopic.has(topic)) byTopic.set(topic, []);
2164
+ byTopic.get(topic).push(entry);
2165
+ }
2166
+ for (const [topic, topicEntries] of byTopic) {
2167
+ const filePath = path.join(vaultDir, `${topic}.md`);
2168
+ await fs.mkdir(path.dirname(filePath), { recursive: true });
2169
+ let content;
2170
+ try {
2171
+ content = await fs.readFile(filePath, "utf-8");
2172
+ } catch {
2173
+ content = `# ${topic.split("/").pop().replace(/-/g, " ").replace(/\b\w/g, (c) => c.toUpperCase())}\n`;
2174
+ }
2175
+ for (const entry of topicEntries) {
2176
+ const dup = isDuplicate(content, entry);
2177
+ if (dup) {
2178
+ const newVerified = (parsePinDataLine(dup.line)?.verified ?? 1) + 1;
2179
+ const updatedLine = `- [${entry.type}] ${entry.content} (${date} | verified:${newVerified})`;
2180
+ const lines = content.split("\n");
2181
+ lines[dup.lineIndex] = updatedLine;
2182
+ content = lines.join("\n");
2183
+ updated++;
2184
+ } else {
2185
+ const line = formatEntry(entry, date, options.ref);
2186
+ content = content.trimEnd() + "\n" + line + "\n";
2187
+ added++;
2188
+ }
2189
+ }
2190
+ await fs.writeFile(filePath, content, "utf-8");
2191
+ }
2192
+ return {
2193
+ added,
2194
+ updated
2195
+ };
1963
2196
  }
1964
-
1965
- //#endregion
1966
- //#region src/distill/index.ts
1967
- /** List all existing vault files (relative paths like "coding/python.md") */
1968
- async function listVaultFiles() {
2197
+ /** List all PINData entries across all vault files */
2198
+ async function listVaultEntries() {
1969
2199
  const vaultDir = getVaultDir();
1970
- const files = [];
2200
+ const results = [];
1971
2201
  async function walk(dir) {
1972
2202
  try {
1973
2203
  const entries = await fs.readdir(dir, { withFileTypes: true });
1974
2204
  for (const entry of entries) {
1975
2205
  const fullPath = path.join(dir, entry.name);
1976
2206
  if (entry.isDirectory()) await walk(fullPath);
1977
- else if (entry.name.endsWith(".md")) files.push(path.relative(vaultDir, fullPath));
2207
+ else if (entry.name.endsWith(".md")) {
2208
+ const topic = path.relative(vaultDir, fullPath).replace(/\.md$/, "");
2209
+ const content = await fs.readFile(fullPath, "utf-8");
2210
+ for (const line of content.split("\n")) if (line.startsWith("- [")) {
2211
+ const parsed = parsePinDataLine(line);
2212
+ results.push({
2213
+ topic,
2214
+ line,
2215
+ parsed
2216
+ });
2217
+ }
2218
+ }
1978
2219
  }
1979
2220
  } catch {}
1980
2221
  }
1981
2222
  await walk(vaultDir);
1982
- return files;
2223
+ return results;
2224
+ }
2225
+
2226
+ //#endregion
2227
+ //#region src/memory/journal.ts
2228
+ /** Append a log entry to today's journal file (memory/YYYY-MM-DD.md) */
2229
+ async function appendJournal(text, date) {
2230
+ const targetDate = date ?? getTodayDate();
2231
+ const filePath = getJournalPath(targetDate);
2232
+ await fs.mkdir(getMemoryDir(), { recursive: true });
2233
+ const entry = `- ${(/* @__PURE__ */ new Date()).toTimeString().slice(0, 5)} ${text}\n`;
2234
+ let existed = false;
2235
+ let existingContent = "";
2236
+ try {
2237
+ existingContent = await fs.readFile(filePath, "utf-8");
2238
+ existed = true;
2239
+ } catch {}
2240
+ if (!existed) {
2241
+ const header = `# ${targetDate}\n\n`;
2242
+ await fs.writeFile(filePath, header + entry, "utf-8");
2243
+ } else {
2244
+ const markerIdx = existingContent.indexOf("\n<!-- extracted -->");
2245
+ const digestIdx = existingContent.indexOf("\n## Digest");
2246
+ const insertIdx = Math.min(markerIdx >= 0 ? markerIdx : Infinity, digestIdx >= 0 ? digestIdx : Infinity);
2247
+ if (insertIdx < Infinity) {
2248
+ const before = existingContent.slice(0, insertIdx).trimEnd();
2249
+ const after = existingContent.slice(insertIdx).replace("<!-- extracted -->", "").trimStart();
2250
+ const newContent = before + "\n" + entry + (after ? "\n" + after : "") + "\n";
2251
+ await fs.writeFile(filePath, newContent, "utf-8");
2252
+ } else await fs.appendFile(filePath, entry, "utf-8");
2253
+ }
2254
+ return {
2255
+ path: filePath,
2256
+ entryCount: ((await fs.readFile(filePath, "utf-8")).match(/^- \d{2}:\d{2} /gm) || []).length
2257
+ };
1983
2258
  }
1984
- /** Run the full distill pipeline on all pending raw files */
2259
+ /** Read a journal file's content. Returns null if not found. */
2260
+ async function readJournal(date) {
2261
+ const filePath = getJournalPath(date ?? getTodayDate());
2262
+ try {
2263
+ return await fs.readFile(filePath, "utf-8");
2264
+ } catch {
2265
+ return null;
2266
+ }
2267
+ }
2268
+ /** List all journal dates (YYYY-MM-DD) sorted descending */
2269
+ async function listJournalDates() {
2270
+ const memDir = getMemoryDir();
2271
+ try {
2272
+ return (await fs.readdir(memDir)).filter((f) => /^\d{4}-\d{2}-\d{2}\.md$/.test(f)).map((f) => f.replace(".md", "")).sort().reverse();
2273
+ } catch {
2274
+ return [];
2275
+ }
2276
+ }
2277
+ /** Get status of a journal: entry count and whether it has a digest */
2278
+ async function getJournalStatus(date) {
2279
+ const content = await readJournal(date);
2280
+ if (!content) return null;
2281
+ return {
2282
+ entries: (content.match(/^- \d{2}:\d{2} /gm) || []).length,
2283
+ hasDigest: content.includes("## Digest")
2284
+ };
2285
+ }
2286
+ /** Find journal dates with missing entries in the last N days */
2287
+ async function findGaps(days = 30) {
2288
+ const existing = new Set(await listJournalDates());
2289
+ const gaps = [];
2290
+ const now = /* @__PURE__ */ new Date();
2291
+ for (let i = 0; i < days; i++) {
2292
+ const d = new Date(now);
2293
+ d.setDate(d.getDate() - i);
2294
+ const dateStr = d.toISOString().split("T")[0];
2295
+ if (!existing.has(dateStr)) gaps.push(dateStr);
2296
+ }
2297
+ return gaps;
2298
+ }
2299
+ /** Append a digest section to a journal file */
2300
+ async function appendDigest(digest, date) {
2301
+ const targetDate = date ?? getTodayDate();
2302
+ const filePath = getJournalPath(targetDate);
2303
+ let content;
2304
+ try {
2305
+ content = await fs.readFile(filePath, "utf-8");
2306
+ } catch {
2307
+ content = `# ${targetDate}\n\n`;
2308
+ }
2309
+ const digestIndex = content.indexOf("\n## Digest");
2310
+ if (digestIndex !== -1) content = content.slice(0, digestIndex);
2311
+ const digestSection = `\n## Digest\n\n${digest.trim()}\n`;
2312
+ await fs.mkdir(path.dirname(filePath), { recursive: true });
2313
+ await fs.writeFile(filePath, content.trimEnd() + "\n" + digestSection, "utf-8");
2314
+ return filePath;
2315
+ }
2316
+
2317
+ //#endregion
2318
+ //#region src/distill/index.ts
2319
+ /**
2320
+ * Run the PINData extract pipeline on pending raw files + unprocessed journals.
2321
+ * Flow: raw/journal content → 1 LLM extract call → PINData[] → code-append to vault
2322
+ */
1985
2323
  async function distillPipeline(options = {}) {
1986
2324
  const result = {
1987
2325
  processed: 0,
1988
- valuable: 0,
2326
+ extracted: 0,
1989
2327
  discarded: 0,
1990
2328
  errors: []
1991
2329
  };
1992
- let pendingFiles;
1993
- if (options.singleFile) pendingFiles = [options.singleFile];
1994
- else pendingFiles = await listPending();
1995
- if (pendingFiles.length === 0) {
1996
- info("No pending raw files to process.");
2330
+ const inputs = [];
2331
+ if (options.singleFile) {
2332
+ const { content } = parseFrontmatter(await fs.readFile(options.singleFile, "utf-8"));
2333
+ inputs.push({
2334
+ label: path.basename(options.singleFile),
2335
+ content,
2336
+ rawPath: options.singleFile,
2337
+ source: options.singleFile
2338
+ });
2339
+ } else if (!options.today) {
2340
+ const pendingFiles = await listPending();
2341
+ for (const filePath of pendingFiles) {
2342
+ const { content } = parseFrontmatter(await fs.readFile(filePath, "utf-8"));
2343
+ inputs.push({
2344
+ label: path.basename(filePath),
2345
+ content,
2346
+ rawPath: filePath,
2347
+ source: filePath
2348
+ });
2349
+ }
2350
+ }
2351
+ if (!options.singleFile) {
2352
+ const todayDate = getTodayDate();
2353
+ const journal = await readJournal(todayDate);
2354
+ if (journal && journal.trim().length > 0) {
2355
+ if (!journal.includes("<!-- extracted -->")) inputs.push({
2356
+ label: `journal/${todayDate}`,
2357
+ content: journal,
2358
+ source: `memory/${todayDate}.md`
2359
+ });
2360
+ }
2361
+ }
2362
+ if (inputs.length === 0) {
2363
+ info("No pending raw files or journal entries to process.");
1997
2364
  return result;
1998
2365
  }
1999
- info(`Found ${pendingFiles.length} pending file(s) to process.`);
2000
- const vaultFilesList = (await listVaultFiles()).map((f) => `vault/${f}`);
2001
- for (const filePath of pendingFiles) {
2002
- const spin = spinner(`Processing ${path.basename(filePath)}...`);
2366
+ info(`Found ${inputs.length} input(s) to process.`);
2367
+ for (const input of inputs) {
2368
+ const spin = spinner(`Extracting ${input.label}...`);
2003
2369
  try {
2004
- const rawFileContent = await fs.readFile(filePath, "utf-8");
2005
- const { content } = parseFrontmatter(rawFileContent);
2006
- const triage = await triageRawFile(content, vaultFilesList);
2370
+ const extracted = await extractPinData(input.content, input.source);
2007
2371
  result.processed++;
2008
- if (!triage.valuable || triage.entries.length === 0) {
2009
- spin.succeed(`Discarded: ${path.basename(filePath)} — ${triage.reason}`);
2372
+ if (extracted.entries.length === 0) {
2373
+ spin.succeed(`Skipped: ${input.label} — ${extracted.summary}`);
2010
2374
  result.discarded++;
2011
- if (!options.dryRun) {
2012
- const updated = updateRawFrontmatter(rawFileContent, { status: "discarded" });
2013
- await fs.writeFile(filePath, updated, "utf-8");
2375
+ if (input.rawPath && !options.dryRun) {
2376
+ const updated = updateRawFrontmatter(await fs.readFile(input.rawPath, "utf-8"), { status: "discarded" });
2377
+ await fs.writeFile(input.rawPath, updated, "utf-8");
2014
2378
  }
2015
2379
  continue;
2016
2380
  }
2017
- const targets = triage.entries.map((e) => e.targetFile);
2018
2381
  if (options.dryRun) {
2019
- spin.succeed(`[DRY RUN] Would distill ${path.basename(filePath)} ${targets.join(", ")}`);
2020
- result.valuable++;
2382
+ const topics = [...new Set(extracted.entries.map((e) => e.topic))];
2383
+ spin.succeed(`[DRY RUN] ${input.label} → ${extracted.entries.length} entries → ${topics.join(", ")}`);
2384
+ result.extracted += extracted.entries.length;
2021
2385
  continue;
2022
2386
  }
2023
- const mergedPaths = [];
2024
- for (const entry of triage.entries) {
2025
- const targetFile = entry.targetFile || "vault/context/misc.md";
2026
- const vaultPath = await distillAndMerge(content, filePath, targetFile, entry.extract);
2027
- mergedPaths.push(vaultPath);
2028
- if (!vaultFilesList.includes(targetFile)) vaultFilesList.push(targetFile);
2029
- }
2030
- result.valuable++;
2031
- const updated = updateRawFrontmatter(rawFileContent, {
2032
- status: "processed",
2033
- distilled_to: mergedPaths.map((p) => path.relative(path.dirname(filePath).replace(/\/raw\/.*/, "/raw"), p)).join(", ")
2387
+ const date = (/* @__PURE__ */ new Date()).toISOString().split("T")[0];
2388
+ const { added, updated } = await appendPinData(extracted.entries, {
2389
+ date,
2390
+ ref: input.source
2034
2391
  });
2035
- await fs.writeFile(filePath, updated, "utf-8");
2036
- spin.succeed(`Distilled ${path.basename(filePath)} ${targets.join(", ")} (${triage.entries.length} entries)`);
2392
+ result.extracted += extracted.entries.length;
2393
+ const topics = [...new Set(extracted.entries.map((e) => e.topic))];
2394
+ if (input.rawPath) {
2395
+ const updatedRaw = updateRawFrontmatter(await fs.readFile(input.rawPath, "utf-8"), {
2396
+ status: "processed",
2397
+ distilled_to: topics.join(", ")
2398
+ });
2399
+ await fs.writeFile(input.rawPath, updatedRaw, "utf-8");
2400
+ }
2401
+ if (!input.rawPath && input.source.startsWith("memory/")) {
2402
+ const journalPath = path.join(getPaiHome(), input.source);
2403
+ try {
2404
+ if (!(await fs.readFile(journalPath, "utf-8")).includes("<!-- extracted -->")) await fs.appendFile(journalPath, "\n<!-- extracted -->\n", "utf-8");
2405
+ } catch {}
2406
+ }
2407
+ spin.succeed(`Extracted ${input.label} → ${added} new, ${updated} updated → ${topics.join(", ")}`);
2037
2408
  } catch (err) {
2038
2409
  const msg = err instanceof Error ? err.message : String(err);
2039
- spin.fail(`Error processing ${path.basename(filePath)}: ${msg}`);
2040
- result.errors.push(`${filePath}: ${msg}`);
2410
+ spin.fail(`Error processing ${input.label}: ${msg}`);
2411
+ result.errors.push(`${input.label}: ${msg}`);
2041
2412
  }
2042
2413
  }
2043
2414
  return result;
@@ -2046,23 +2417,24 @@ async function distillPipeline(options = {}) {
2046
2417
  //#endregion
2047
2418
  //#region src/cli/register.distill.ts
2048
2419
  function registerDistillCommand(program) {
2049
- program.command("distill").description("Distill pending raw files into vault knowledge").option("--file <path>", "Process a single raw file").option("--dry-run", "Preview what would be distilled without writing").action(async (opts) => {
2420
+ program.command("distill").description("Extract PINData from pending raw files and journal into vault").option("--file <path>", "Process a single raw file").option("--today", "Only process today's journal").option("--dry-run", "Preview what would be extracted without writing").action(async (opts) => {
2050
2421
  try {
2051
2422
  const result = await distillPipeline({
2052
2423
  singleFile: opts.file,
2424
+ today: opts.today,
2053
2425
  dryRun: opts.dryRun
2054
2426
  });
2055
2427
  log("");
2056
2428
  log(bold("Distill summary:"));
2057
2429
  log(` Processed: ${result.processed}`);
2058
- log(` Valuable: ${result.valuable}`);
2430
+ log(` Extracted: ${result.extracted} PINData entries`);
2059
2431
  log(` Discarded: ${result.discarded}`);
2060
2432
  if (result.errors.length > 0) {
2061
2433
  log(` Errors: ${result.errors.length}`);
2062
2434
  for (const e of result.errors) error(` ${e}`);
2063
2435
  }
2064
2436
  log("");
2065
- if (result.valuable > 0 && !opts.dryRun) info("Vault updated. Run \"pai generate\" to update SKILL.md profiles.");
2437
+ if (result.extracted > 0 && !opts.dryRun) info("Vault updated with PINData entries. Run \"pai generate\" to update SKILL.md profiles.");
2066
2438
  } catch (err) {
2067
2439
  const msg = err instanceof Error ? err.message : String(err);
2068
2440
  error(`Distill failed: ${msg}`);
@@ -2075,44 +2447,61 @@ function registerDistillCommand(program) {
2075
2447
  //#region src/prompts/generate.ts
2076
2448
  /** Build system prompt for SKILL.md generation */
2077
2449
  function generateSystemPrompt() {
2078
- return `You are generating a concise SKILL.md personal context file for an AI agent.
2079
- This file helps the agent understand the user's background, preferences, and experiences.
2450
+ return `You generate a SKILL.md a minimal, high-density context file for an AI agent to understand the user.
2451
+ Every line MUST be useful to an AI agent. If a line wouldn't change how an agent responds, delete it.
2080
2452
 
2081
- Rules:
2082
- - Be extremely concise every line must carry HIGH information density
2083
- - Pack maximum concrete facts per bullet (names, numbers, specific tools)
2084
- - Prioritize experiences with higher verification counts when present
2085
- - Use bullet points for each piece of context
2086
- - Follow the exact section structure provided
2087
- - Do NOT include generic advice only user-specific context
2088
- - NEVER write filler like "未提供", "Not specified" or vague statements
2089
- - Write in the same language as the source content
2090
-
2091
- When vault content includes identity/tools/workflow/interests data:
2092
- - "Who I Am": real name, email, languages with specifics, locale, timezone, tech identity
2093
- - "Preferences": specific tools by name, workflow patterns with data (e.g. "git: 4762 commands"), communication style
2094
- - "Hard-won Lessons": only include if genuine lessons exist; otherwise omit or make it 1-2 lines
2095
- - "Current Work": specific project names, focus areas, active repos be concrete`;
2453
+ COMPRESSION RULES:
2454
+ - Identity: pack into 2-3 lines max (name | email | role | languages | timezone — merge onto fewer lines)
2455
+ - Preferences: write as "prefers X for Y" not just "uses X". State the WHY or CONTEXT.
2456
+ GOOD: "Deploys on Vercel (team pinai), manages tasks in Airtable, meetings via Zoom (paid)"
2457
+ BAD: "Tools: Vercel, Zoom, Docker, npm, Airtable" (just a list, no context)
2458
+ - Interests: compress bookmarks/domains/events into TAGS, not raw lists
2459
+ GOOD: "Interests: AI/LLM, Web3 (ETHDenver 2026), GIS, iOS, prediction markets (Kalshi)"
2460
+ BAD: "Bookmarks: Favorites Bar 106项, Read 78项, Tools 67项..." (raw data dump)
2461
+ - Current Work: what are they ACTIVELY building? Project names + what they do, not directory paths
2462
+ GOOD: "Building personal-ai (v0.1.0, npm published), PINAI ecosystem (PIN-APP-IOS, pin-sandman on Vercel)"
2463
+ BAD: "Recent Working Directories: ..: 8次, backend: 2次"
2464
+ - Lessons: ONLY copy verbatim lessons that exist in vault/coding/ or vault/work/ files.
2465
+ NEVER INVENT OR PARAPHRASE lessons. If the vault has no explicit lesson, write "None recorded yet."
2466
+ A deployment failure is a FACT (goes in Current Work), NOT a lesson.
2467
+ GOOD: "FastAPI CORS: must explicitly list allowed_origins, wildcard doesn't work with credentials"
2468
+ BAD: "生产环境部署失败可能导致项目延误" ← THIS IS INVENTED, NEVER DO THIS
2469
+ BAD: "及时关注部署通知有助于快速定位问题" ← THIS IS GENERIC ADVICE, NEVER DO THIS
2470
+ - Financial/services: compress into 1-2 lines
2471
+ GOOD: "Paid services: Zoom (acct 5144380543), BytePlus (acct 3000767749, ~200 USD credits)"
2472
+ - Team: mention key collaborators briefly
2473
+ GOOD: "Works with Ethan Liu (pinai team) on Prediction project via Airtable"
2474
+
2475
+ ANTI-PATTERNS — never do these:
2476
+ - Raw data dumps (bookmark counts, domain visit counts, directory lists)
2477
+ - Generic advice disguised as lessons
2478
+ - Separate lines for each identity field (merge them)
2479
+ - Lists of tool names without context on how/why they're used
2480
+ - "Account Activities" as a separate section (fold into Preferences or Current Work)`;
2096
2481
  }
2097
2482
  /** Build user prompt for SKILL.md generation */
2098
2483
  function generateUserPrompt(profileName, vaultContents, maxLines) {
2099
- return `Generate a SKILL.md profile called "${profileName}" from the following vault content.
2100
- Maximum ${maxLines} lines total.
2484
+ return `Generate SKILL.md "${profileName}" from vault content. Max ${maxLines} lines.
2101
2485
 
2102
- Required sections:
2486
+ Sections:
2103
2487
  # Personal Context — ${profileName}
2104
-
2105
- ## Who I Am
2106
- ## Preferences
2107
- ## Hard-won Lessons
2108
- ## Current Work
2488
+ ## Who I Am (2-4 lines: identity, languages, timezone — packed dense)
2489
+ ## Preferences (tool choices WITH context, workflow habits, communication style)
2490
+ ## Interests (compressed tags from bookmarks/domains/events, NOT raw lists)
2491
+ ## Current Work (active projects with names, what they do, team members)
2492
+ ## Paid Services (services with account IDs, amounts — 1-3 lines)
2493
+ ## Hard-won Lessons (ONLY real lessons from vault, or 1 line "None yet")
2109
2494
 
2110
2495
  ---VAULT CONTENT---
2111
2496
  ${vaultContents}
2112
2497
  ---END VAULT CONTENT---
2113
2498
 
2114
- Generate the SKILL.md now. Stay within ${maxLines} lines.
2115
- IMPORTANT: include ALL concrete facts from the vault names, numbers, tools, project names, bookmark categories, browsing domains. Do not summarize away specific data. Never write filler or "未提供".`;
2499
+ CRITICAL: Compress, don't dump. Every line must be useful to an AI agent.
2500
+ - Merge identity fields onto fewer lines (name | email | timezone on one line)
2501
+ - Write preferences as "prefers X for Y" not bare lists
2502
+ - Convert bookmark counts and domain visits into interest TAGS
2503
+ - Never write generic advice as lessons
2504
+ - Never write raw data (dir paths, bookmark counts, visit numbers)`;
2116
2505
  }
2117
2506
 
2118
2507
  //#endregion
@@ -2218,17 +2607,25 @@ function registerGenerateCommand(program) {
2218
2607
 
2219
2608
  //#endregion
2220
2609
  //#region src/search/index.ts
2610
+ var search_exports = /* @__PURE__ */ __exportAll({
2611
+ search: () => search,
2612
+ updateIndex: () => updateIndex
2613
+ });
2221
2614
  /**
2222
- * Hybrid search via QMD.
2615
+ * Search with automatic fallback:
2616
+ * 1. QMD available → use QMD (query/search/vsearch modes)
2617
+ * 2. QMD not available → built-in grep search (keyword matching, no install needed)
2618
+ *
2619
+ * QMD modes:
2223
2620
  * - "query" (default): BM25 + vector + expansion + reranking (~5s, best quality)
2224
- * - "search": BM25 keyword + rg grep fallback (~50ms, fast)
2225
- * - "vsearch": Vector similarity only (~2s, good for semantic)
2621
+ * - "search": BM25 keyword (~50ms, fast)
2622
+ * - "vsearch": Vector similarity only (~2s, semantic)
2226
2623
  *
2227
2624
  * For "search" (fast) mode, QMD BM25 doesn't support CJK tokenization,
2228
- * so we supplement with ripgrep to cover Chinese/Japanese/Korean.
2625
+ * so we supplement with grep to cover Chinese/Japanese/Korean.
2229
2626
  */
2230
2627
  async function search(query, collection = "vault", n = 5, mode = "query") {
2231
- if (!await isQmdAvailable()) throw new Error("QMD is not installed. Install with: npm install -g https://github.com/tobi/qmd");
2628
+ if (!await isQmdAvailable()) return builtinSearch(query, collection, n);
2232
2629
  let results = parseSearchResults(await execQmd([
2233
2630
  mode,
2234
2631
  query,
@@ -2241,6 +2638,57 @@ async function search(query, collection = "vault", n = 5, mode = "query") {
2241
2638
  if (mode === "search" && results.length === 0 && hasCjk(query)) results = await grepFallback(query, collection, n);
2242
2639
  return results;
2243
2640
  }
2641
+ /**
2642
+ * Built-in keyword search — zero external dependencies.
2643
+ * Reads markdown files and does case-insensitive substring matching.
2644
+ * Good enough for small-medium collections (< 10k files).
2645
+ */
2646
+ async function builtinSearch(query, collection, n) {
2647
+ const baseDir = collection === "raw" ? getRawDir() : getVaultDir();
2648
+ const grepResults = await grepFallback(query, collection, n);
2649
+ if (grepResults.length > 0) return grepResults;
2650
+ return nodeFsSearch(query, baseDir, collection, n);
2651
+ }
2652
+ /**
2653
+ * Pure Node.js file search — absolute zero dependencies.
2654
+ * Recursively reads .md files and matches query as case-insensitive substring.
2655
+ */
2656
+ async function nodeFsSearch(query, baseDir, collection, n) {
2657
+ const results = [];
2658
+ const queryLower = query.toLowerCase();
2659
+ async function walk(dir) {
2660
+ let entries;
2661
+ try {
2662
+ entries = await fs.readdir(dir, { withFileTypes: true });
2663
+ } catch {
2664
+ return;
2665
+ }
2666
+ for (const entry of entries) {
2667
+ if (results.length >= n) return;
2668
+ const full = path.join(dir, entry.name);
2669
+ if (entry.isDirectory()) await walk(full);
2670
+ else if (entry.name.endsWith(".md")) try {
2671
+ const content = await fs.readFile(full, "utf-8");
2672
+ const idx = content.toLowerCase().indexOf(queryLower);
2673
+ if (idx !== -1) {
2674
+ const snippetStart = Math.max(0, idx - 40);
2675
+ const snippetEnd = Math.min(content.length, idx + query.length + 120);
2676
+ const snippet = content.slice(snippetStart, snippetEnd).replace(/\n/g, " ").trim();
2677
+ const titleMatch = content.match(/^#\s+(.+)/m);
2678
+ const relative = path.relative(baseDir, full);
2679
+ results.push({
2680
+ file: `${collection}/${relative}`,
2681
+ title: titleMatch?.[1]?.trim() ?? path.basename(full, ".md"),
2682
+ snippet,
2683
+ score: 1
2684
+ });
2685
+ }
2686
+ } catch {}
2687
+ }
2688
+ }
2689
+ await walk(baseDir);
2690
+ return results;
2691
+ }
2244
2692
  /** Parse QMD JSON search output into SearchResult[] */
2245
2693
  function parseSearchResults(stdout) {
2246
2694
  try {
@@ -2318,6 +2766,7 @@ async function grepFallback(query, collection, n) {
2318
2766
  }
2319
2767
  /** Update QMD index: runs `qmd update` + `qmd embed` */
2320
2768
  async function updateIndex() {
2769
+ if (!await isQmdAvailable()) throw new Error("QMD is not installed. Built-in search works without indexing.\nFor better search quality, install QMD: npm install -g https://github.com/tobi/qmd");
2321
2770
  await execQmd(["update"]);
2322
2771
  await execQmd(["embed"]);
2323
2772
  }
@@ -2400,7 +2849,7 @@ function registerImportCommand(program) {
2400
2849
  program.command("import").description("Import data from connector or directory (use --source mac for system scan)").requiredOption("--source <source>", "Data source (mac, gmail, calendar, twitter, etc.)").option("--path <dir>", "Path to data directory or file (not needed for mac, gmail, calendar)").option("--dry-run", "Preview what would be imported without writing").option("--days <n>", "For gmail/calendar: last N days to fetch (default 30)", "30").option("--query <q>", "For gmail: Gmail search query (e.g. is:important)").action(async (opts) => {
2401
2850
  try {
2402
2851
  if (opts.source === "mac") {
2403
- const { scanMac } = await import("./mac-C9SDXZGK.mjs");
2852
+ const { scanMac } = await import("./mac-vynpftec.mjs");
2404
2853
  const result = await scanMac({ dryRun: opts.dryRun });
2405
2854
  if (opts.dryRun) return;
2406
2855
  log("");
@@ -2410,7 +2859,7 @@ function registerImportCommand(program) {
2410
2859
  return;
2411
2860
  }
2412
2861
  if (opts.source === "gmail") {
2413
- const { syncGmail } = await import("./gmail-B9ja9sKN.mjs");
2862
+ const { syncGmail } = await import("./gmail-BhGX6az1.mjs");
2414
2863
  const parsedDays = parseInt(opts.days ?? "30", 10);
2415
2864
  const days = Number.isNaN(parsedDays) ? 30 : parsedDays;
2416
2865
  const doSync = () => syncGmail({
@@ -2454,7 +2903,7 @@ function registerImportCommand(program) {
2454
2903
  return;
2455
2904
  }
2456
2905
  if (opts.source === "calendar") {
2457
- const { syncCalendar } = await import("./calendar-BHcM4wfQ.mjs");
2906
+ const { syncCalendar } = await import("./calendar-DGJSyErS.mjs");
2458
2907
  const parsedDays = parseInt(opts.days ?? "30", 10);
2459
2908
  const days = Number.isNaN(parsedDays) ? 30 : parsedDays;
2460
2909
  const doSync = () => syncCalendar({
@@ -2606,22 +3055,22 @@ function askConfirm(question) {
2606
3055
  });
2607
3056
  }
2608
3057
  function registerResetCommand(program) {
2609
- program.command("reset").description("Remove all pai data and re-initialize (clean slate for testing)").option("--force", "Skip confirmation prompt").action(async (opts) => {
3058
+ program.command("reset").description("Clear all pai data (remove ~/.pai directory)").option("--force", "Skip confirmation prompt").action(async (opts) => {
2610
3059
  const paiHome = getPaiHome();
2611
3060
  if (!opts.force) {
2612
- if (!await askConfirm(`Remove ALL data at ${paiHome} and re-initialize? [y/N] `)) {
3061
+ if (!await askConfirm(`Remove ALL data at ${paiHome}? [y/N] `)) {
2613
3062
  info("Reset cancelled.");
2614
3063
  return;
2615
3064
  }
2616
3065
  }
2617
- const spin = spinner(`Resetting pai at ${paiHome}...`);
3066
+ const spin = spinner(`Clearing pai data at ${paiHome}...`);
2618
3067
  try {
2619
3068
  await fs.rm(paiHome, {
2620
3069
  recursive: true,
2621
3070
  force: true
2622
3071
  });
2623
- await runInit({ overwriteConfig: true });
2624
- spin.succeed(`pai reset complete. Fresh state at ${paiHome}`);
3072
+ spin.succeed(`Cleared ${paiHome}`);
3073
+ info("Run \"pai init\" to set up again.");
2625
3074
  } catch (err) {
2626
3075
  spin.fail("Reset failed");
2627
3076
  const msg = err instanceof Error ? err.message : String(err);
@@ -3802,6 +4251,116 @@ function registerProfileCommand(program) {
3802
4251
  });
3803
4252
  }
3804
4253
 
4254
+ //#endregion
4255
+ //#region src/cli/register.log.ts
4256
+ function registerLogCommand(program) {
4257
+ program.command("log [text...]").description("Quick-log to today's journal (memory/YYYY-MM-DD.md)").option("--clip", "Read from clipboard (macOS pbpaste)").option("--show", "Show today's journal").option("--date <date>", "Target date (YYYY-MM-DD), default today").action(async (textParts, opts) => {
4258
+ try {
4259
+ const date = opts.date ?? getTodayDate();
4260
+ if (opts.show) {
4261
+ const content = await readJournal(date);
4262
+ if (!content) {
4263
+ info(`No journal for ${date}.`);
4264
+ return;
4265
+ }
4266
+ log(content);
4267
+ const status = await getJournalStatus(date);
4268
+ if (status) log(dim(`(${status.entries} entries${status.hasDigest ? ", has digest" : ""})`));
4269
+ return;
4270
+ }
4271
+ let text = textParts.join(" ").trim();
4272
+ if (opts.clip || !text) {
4273
+ if (opts.clip) {
4274
+ const { execFile } = await import("node:child_process");
4275
+ const { promisify } = await import("node:util");
4276
+ const execFileP = promisify(execFile);
4277
+ try {
4278
+ const { stdout } = await execFileP("pbpaste");
4279
+ text = stdout.trim();
4280
+ } catch {
4281
+ error("Failed to read clipboard. Is pbpaste available?");
4282
+ process.exit(1);
4283
+ }
4284
+ }
4285
+ if (!text) {
4286
+ error("Usage: pai log \"your note\" or pai log --clip");
4287
+ process.exit(1);
4288
+ }
4289
+ }
4290
+ const { path: journalPath, entryCount } = await appendJournal(text, date);
4291
+ success(`Logged to ${date} (entry #${entryCount})`);
4292
+ log(dim(` ${journalPath}`));
4293
+ } catch (err) {
4294
+ const msg = err instanceof Error ? err.message : String(err);
4295
+ error(`Log failed: ${msg}`);
4296
+ process.exit(1);
4297
+ }
4298
+ });
4299
+ }
4300
+
4301
+ //#endregion
4302
+ //#region src/cli/register.digest.ts
4303
+ function registerDigestCommand(program) {
4304
+ program.command("digest").description("Generate daily AI digest for a journal day").option("--date <date>", "Target date (YYYY-MM-DD), default today").option("--dry-run", "Preview digest without writing").action(async (opts) => {
4305
+ try {
4306
+ const date = opts.date ?? getTodayDate();
4307
+ const journal = await readJournal(date);
4308
+ if (!journal) {
4309
+ info(`No journal for ${date}. Use "pai log" to add entries first.`);
4310
+ return;
4311
+ }
4312
+ const spin = spinner(`Generating digest for ${date}...`);
4313
+ const todayEntries = (await listVaultEntries()).filter((e) => e.parsed?.date === date).map((e) => e.line);
4314
+ const pinDataText = todayEntries.length > 0 ? todayEntries.join("\n") : "";
4315
+ const system = digestSystemPrompt();
4316
+ const digest = await llmCall(digestUserPrompt(journal, pinDataText, date), system);
4317
+ if (opts.dryRun) {
4318
+ spin.succeed(`[DRY RUN] Digest for ${date}:`);
4319
+ log("");
4320
+ log(digest);
4321
+ return;
4322
+ }
4323
+ const journalPath = await appendDigest(digest, date);
4324
+ spin.succeed(`Digest written for ${date}`);
4325
+ log("");
4326
+ log(digest);
4327
+ log("");
4328
+ log(dim(` ${journalPath}`));
4329
+ } catch (err) {
4330
+ const msg = err instanceof Error ? err.message : String(err);
4331
+ error(`Digest failed: ${msg}`);
4332
+ process.exit(1);
4333
+ }
4334
+ });
4335
+ }
4336
+
4337
+ //#endregion
4338
+ //#region src/cli/register.gaps.ts
4339
+ function registerGapsCommand(program) {
4340
+ program.command("gaps").description("Check for missing journal entries in the last N days").option("--days <n>", "Number of days to check", "7").action(async (opts) => {
4341
+ try {
4342
+ const days = parseInt(opts.days, 10);
4343
+ if (isNaN(days) || days < 1) {
4344
+ error("--days must be a positive number.");
4345
+ process.exit(1);
4346
+ }
4347
+ const gaps = await findGaps(days);
4348
+ if (gaps.length === 0) {
4349
+ success(`No gaps in the last ${days} day(s). All journals present.`);
4350
+ return;
4351
+ }
4352
+ warn(`Found ${gaps.length} missing journal(s) in the last ${days} days:`);
4353
+ for (const date of gaps) log(` ${date}`);
4354
+ log("");
4355
+ info("Use \"pai log --date YYYY-MM-DD 'note'\" to backfill.");
4356
+ } catch (err) {
4357
+ const msg = err instanceof Error ? err.message : String(err);
4358
+ error(`Gaps check failed: ${msg}`);
4359
+ process.exit(1);
4360
+ }
4361
+ });
4362
+ }
4363
+
3805
4364
  //#endregion
3806
4365
  //#region src/cli/command-registry.ts
3807
4366
  const commandRegistry = [
@@ -3821,6 +4380,10 @@ const commandRegistry = [
3821
4380
  id: "add",
3822
4381
  register: (p) => registerAddCommand(p)
3823
4382
  },
4383
+ {
4384
+ id: "log",
4385
+ register: (p) => registerLogCommand(p)
4386
+ },
3824
4387
  {
3825
4388
  id: "profile",
3826
4389
  register: (p) => registerProfileCommand(p)
@@ -3829,6 +4392,14 @@ const commandRegistry = [
3829
4392
  id: "distill",
3830
4393
  register: (p) => registerDistillCommand(p)
3831
4394
  },
4395
+ {
4396
+ id: "digest",
4397
+ register: (p) => registerDigestCommand(p)
4398
+ },
4399
+ {
4400
+ id: "gaps",
4401
+ register: (p) => registerGapsCommand(p)
4402
+ },
3832
4403
  {
3833
4404
  id: "generate",
3834
4405
  register: (p) => registerGenerateCommand(p)