becki-mcp 1.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +32 -0
- package/README.md +142 -0
- package/dist/core/ai-sessions.js +325 -0
- package/dist/core/db.js +221 -0
- package/dist/core/init.js +218 -0
- package/dist/core/project-activity.js +225 -0
- package/dist/core/runner.js +109 -0
- package/dist/index.js +3412 -0
- package/package.json +61 -0
package/LICENSE
ADDED
|
@@ -0,0 +1,32 @@
|
|
|
1
|
+
becki-mcp — Becki Core MCP server
|
|
2
|
+
Copyright © 2026 BECKI.IO LLC
|
|
3
|
+
|
|
4
|
+
All rights reserved.
|
|
5
|
+
|
|
6
|
+
This npm package and its compiled JavaScript are distributed as a runtime
|
|
7
|
+
client for the Becki service (https://www.becki.io). Use of this software
|
|
8
|
+
requires a valid paid subscription and is governed by the Becki Terms of
|
|
9
|
+
Service at https://www.becki.io/terms and the End User License Agreement
|
|
10
|
+
at https://www.becki.io/eula.
|
|
11
|
+
|
|
12
|
+
You are permitted to:
|
|
13
|
+
- Install this package and run the `becki-mcp` binary on machines you own
|
|
14
|
+
or control, for personal or business use, in conjunction with a valid
|
|
15
|
+
Becki subscription tied to your account.
|
|
16
|
+
- Inspect the compiled JavaScript for security or compatibility purposes.
|
|
17
|
+
|
|
18
|
+
You are NOT permitted to:
|
|
19
|
+
- Redistribute this package, its source, or modified versions.
|
|
20
|
+
- Reverse-engineer, decompile, or extract the binary for purposes of
|
|
21
|
+
creating a competing product or service.
|
|
22
|
+
- Use this software with any backend other than the official Becki
|
|
23
|
+
backend operated by BECKI.IO LLC, except where explicitly authorized
|
|
24
|
+
in writing.
|
|
25
|
+
- Remove or modify the copyright notices, the LICENSE file, or any
|
|
26
|
+
notices embedded in the source.
|
|
27
|
+
|
|
28
|
+
This software is provided "as-is" without warranty of any kind, express or
|
|
29
|
+
implied. BECKI.IO LLC will not be liable for any damages arising from the
|
|
30
|
+
use or inability to use this software.
|
|
31
|
+
|
|
32
|
+
For licensing inquiries, contact: legal@becki.io
|
package/README.md
ADDED
|
@@ -0,0 +1,142 @@
|
|
|
1
|
+
# becki-mcp
|
|
2
|
+
|
|
3
|
+
**[Becki](https://www.becki.io) Core — cross-platform memory layer for AI tools.**
|
|
4
|
+
|
|
5
|
+
A Model Context Protocol (MCP) server that gives Claude, Cursor, Codex, ChatGPT,
|
|
6
|
+
and any MCP-compatible client persistent, structured memory across every session,
|
|
7
|
+
project, and machine.
|
|
8
|
+
|
|
9
|
+
- 🧠 **Structured memory** — decisions, commitments, asks, dead-ends, open loops (not just notes)
|
|
10
|
+
- 🔄 **AI session bootstrap** — auto-indexes your existing Claude Code / Codex / Cursor history
|
|
11
|
+
- 📁 **Project sweeper** — watches your repos, extracts intent from git activity
|
|
12
|
+
- 🌍 **Cross-platform** — Windows, macOS, Linux
|
|
13
|
+
- 🔐 **Auth-scoped** — per-install token, server-issued, revocable from the dashboard
|
|
14
|
+
|
|
15
|
+
Requires a paid Becki subscription. Sign up at [becki.io/account](https://www.becki.io/account)
|
|
16
|
+
($15/month, monthly billing).
|
|
17
|
+
|
|
18
|
+
---
|
|
19
|
+
|
|
20
|
+
## Install
|
|
21
|
+
|
|
22
|
+
```bash
|
|
23
|
+
npm install -g becki-mcp
|
|
24
|
+
```
|
|
25
|
+
|
|
26
|
+
Requires **Node.js 20+**. Works on Windows, macOS, and Linux.
|
|
27
|
+
|
|
28
|
+
## Setup
|
|
29
|
+
|
|
30
|
+
Generate an install token at [becki.io/account](https://www.becki.io/account), then:
|
|
31
|
+
|
|
32
|
+
```bash
|
|
33
|
+
becki-mcp init
|
|
34
|
+
```
|
|
35
|
+
|
|
36
|
+
This will:
|
|
37
|
+
1. Scan common project directories (`~/Documents`, `~/Repos`, `~/Code`, `~/src`, `~/Projects`, `~/Developer`) for git repos
|
|
38
|
+
2. Prompt to register them as Becki projects
|
|
39
|
+
3. Print the MCP config snippet to paste into your AI client
|
|
40
|
+
|
|
41
|
+
Add custom scan paths with `--scan <path>` (repeatable) or auto-confirm everything with `--yes`.
|
|
42
|
+
|
|
43
|
+
## Configure your AI client
|
|
44
|
+
|
|
45
|
+
After `becki-mcp init`, add the printed snippet to your AI client's MCP config:
|
|
46
|
+
|
|
47
|
+
**Claude Desktop** — `~/Library/Application Support/Claude/claude_desktop_config.json`
|
|
48
|
+
(Mac) or `%APPDATA%/Claude/claude_desktop_config.json` (Windows):
|
|
49
|
+
|
|
50
|
+
```json
|
|
51
|
+
{
|
|
52
|
+
"mcpServers": {
|
|
53
|
+
"becki": {
|
|
54
|
+
"command": "becki-mcp"
|
|
55
|
+
}
|
|
56
|
+
}
|
|
57
|
+
}
|
|
58
|
+
```
|
|
59
|
+
|
|
60
|
+
**Cursor / Windsurf / Codex CLI / Zed** — same structure, under `mcpServers`.
|
|
61
|
+
|
|
62
|
+
Restart your AI client. The `becki_*` tools will appear.
|
|
63
|
+
|
|
64
|
+
## Backfill your AI session history
|
|
65
|
+
|
|
66
|
+
To ingest the last 90 days of Claude Code / Codex CLI / Cursor sessions:
|
|
67
|
+
|
|
68
|
+
```bash
|
|
69
|
+
becki-mcp bootstrap
|
|
70
|
+
```
|
|
71
|
+
|
|
72
|
+
Or specify a custom window:
|
|
73
|
+
|
|
74
|
+
```bash
|
|
75
|
+
becki-mcp bootstrap 30 # last 30 days
|
|
76
|
+
```
|
|
77
|
+
|
|
78
|
+
Bootstrap is idempotent — files already processed are tracked in the local
|
|
79
|
+
SQLite cache and skipped on re-run.
|
|
80
|
+
|
|
81
|
+
## Daily digest
|
|
82
|
+
|
|
83
|
+
A background daily digest runs automatically when the MCP server is invoked by
|
|
84
|
+
your AI client (it kicks off after the first request and re-runs every 12h).
|
|
85
|
+
You can also run it manually:
|
|
86
|
+
|
|
87
|
+
```bash
|
|
88
|
+
becki-mcp digest
|
|
89
|
+
```
|
|
90
|
+
|
|
91
|
+
## Local files
|
|
92
|
+
|
|
93
|
+
becki-mcp stores configuration and a local cache at:
|
|
94
|
+
|
|
95
|
+
- **Default:** `~/.becki/` (Windows: `%USERPROFILE%\.becki\`)
|
|
96
|
+
- **Mac with Becki Studio installed:** `~/Library/Application Support/Becki/` (shared with the Mac app)
|
|
97
|
+
- **Override:** set the `BECKI_HOME` env variable
|
|
98
|
+
|
|
99
|
+
The cache (`cache.db`) is a SQLite file holding:
|
|
100
|
+
- Project registrations
|
|
101
|
+
- AI session index (paths + last-processed sizes for idempotency)
|
|
102
|
+
- Recent vault rows for fast MCP reads
|
|
103
|
+
- Daily digest state
|
|
104
|
+
|
|
105
|
+
No audio, no transcripts, no model weights. Footprint typically 10–100 MB
|
|
106
|
+
depending on AI history depth.
|
|
107
|
+
|
|
108
|
+
## Commands
|
|
109
|
+
|
|
110
|
+
| Command | What it does |
|
|
111
|
+
|---|---|
|
|
112
|
+
| `becki-mcp` | Run MCP stdio server (used by AI clients — your normal config target) |
|
|
113
|
+
| `becki-mcp init` | First-time setup; register projects + print MCP config snippet |
|
|
114
|
+
| `becki-mcp digest` | Run today's session-digest sweep right now |
|
|
115
|
+
| `becki-mcp bootstrap [N]` | Historical ingest of AI session logs (default 90 days) |
|
|
116
|
+
| `becki-mcp --help` | Show usage |
|
|
117
|
+
|
|
118
|
+
## Privacy
|
|
119
|
+
|
|
120
|
+
- Your install token authenticates only to the Becki backend (Supabase) — no third parties.
|
|
121
|
+
- All ingested content goes to **your private vault**. Per-user row-level security enforced at the database level.
|
|
122
|
+
- AI session logs are read locally — only extracted *entities* (decisions / commitments / asks / dead-ends) are sent to the backend, never raw transcripts.
|
|
123
|
+
- Becki uses Anthropic Haiku for extraction and Voyage AI for embeddings. Both are on no-train policies — your data is never used for model training.
|
|
124
|
+
|
|
125
|
+
## Becki Studio (Mac users)
|
|
126
|
+
|
|
127
|
+
If you want **meeting capture** (on-device transcription), the native Mac app
|
|
128
|
+
with the **NeuraVault 3D graph**, and the full visual interface, upgrade to
|
|
129
|
+
[Becki Studio](https://www.becki.io/pricing) ($25/month). Studio bundles
|
|
130
|
+
becki-mcp, so you don't need to install it separately.
|
|
131
|
+
|
|
132
|
+
## Support
|
|
133
|
+
|
|
134
|
+
- **Docs:** https://www.becki.io
|
|
135
|
+
- **Issues:** https://github.com/bdsantosDEV/becki-vault/issues
|
|
136
|
+
- **Email:** support@becki.io
|
|
137
|
+
|
|
138
|
+
## License
|
|
139
|
+
|
|
140
|
+
Copyright © 2026 BECKI.IO LLC. All rights reserved. Use of this package is
|
|
141
|
+
governed by the Becki [Terms of Service](https://www.becki.io/terms) and a
|
|
142
|
+
valid paid subscription. See [LICENSE](./LICENSE) for distribution terms.
|
|
@@ -0,0 +1,325 @@
|
|
|
1
|
+
// ai-sessions.ts — Cross-platform AI session log indexer for Becki Core
|
|
2
|
+
// (#191 sub-task 3). Port of AISessionWatcher.swift.
|
|
3
|
+
//
|
|
4
|
+
// Scans `~/.claude/projects/**/*.jsonl` (Claude Code) and
|
|
5
|
+
// `~/.codex/sessions/**/*.jsonl` (Codex CLI) for AI tool conversations,
|
|
6
|
+
// extracts decisions / commitments / dead-ends / open-loops via the
|
|
7
|
+
// Becki extraction endpoint, and ingests results into NeuraVault.
|
|
8
|
+
//
|
|
9
|
+
// Two entry points:
|
|
10
|
+
// runDailyDigest() — daily sweep: settled sessions (1–7 days old,
|
|
11
|
+
// >1h since last activity) only
|
|
12
|
+
// runHistoricalBootstrap — full retroactive sweep on first run
|
|
13
|
+
// (optional age-range slider)
|
|
14
|
+
//
|
|
15
|
+
// Idempotency: tracks per-file size in `ai_session_state` (BeckiCache).
|
|
16
|
+
// Re-ingest only when a file grows.
|
|
17
|
+
//
|
|
18
|
+
// MAC-SPECIFIC PIECES PORTED:
|
|
19
|
+
// NSBackgroundActivityScheduler → setInterval-based scheduler in runner.ts
|
|
20
|
+
// NSHomeDirectory + FileManager → os.homedir() + fs.promises
|
|
21
|
+
// UserDefaults → BeckiCache.config()
|
|
22
|
+
//
|
|
23
|
+
// EXTRACTION CALL: stubbed via the `extract` injection so this module stays
|
|
24
|
+
// transport-agnostic. The real wiring (Becki backend HTTP call or local
|
|
25
|
+
// Anthropic SDK call) happens in runner.ts based on what credentials Core
|
|
26
|
+
// is configured with.
|
|
27
|
+
import { readdir, stat, readFile } from "fs/promises";
|
|
28
|
+
import { join, basename } from "path";
|
|
29
|
+
import { homedir } from "os";
|
|
30
|
+
import { createHash } from "crypto";
|
|
31
|
+
// ── Constants ───────────────────────────────────────────────────────────────
|
|
32
|
+
const MIN_TURNS = 5;
|
|
33
|
+
const MIN_BYTES = 1024;
|
|
34
|
+
const MAX_TRANSCRIPT_CHARS = 40_000;
|
|
35
|
+
const SETTLED_MIN_AGE_MS = 60 * 60 * 1000; // 1h
|
|
36
|
+
const SETTLED_MAX_AGE_MS = 7 * 24 * 60 * 60 * 1000; // 7d
|
|
37
|
+
const DAILY_TOKEN_CAP_DEFAULT = 80_000; // ~$0.10/day at Haiku rates
|
|
38
|
+
const CONFIG_KEYS = {
|
|
39
|
+
enabled: "aiSessionWatcher.enabled",
|
|
40
|
+
lastDigestAt: "aiSessionWatcher.lastDigestAt",
|
|
41
|
+
dailyBudgetUsed: "aiSessionWatcher.dailyBudgetUsedToday",
|
|
42
|
+
dailyBudgetDate: "aiSessionWatcher.dailyBudgetDate",
|
|
43
|
+
dailyTokenCap: "aiSessionWatcher.dailyTokenCapHaiku",
|
|
44
|
+
};
|
|
45
|
+
// ── Discovery ───────────────────────────────────────────────────────────────
|
|
46
|
+
const CLAUDE_DIR = join(homedir(), ".claude", "projects");
|
|
47
|
+
const CODEX_DIR = join(homedir(), ".codex", "sessions");
|
|
48
|
+
async function exists(path) {
|
|
49
|
+
try {
|
|
50
|
+
await stat(path);
|
|
51
|
+
return true;
|
|
52
|
+
}
|
|
53
|
+
catch {
|
|
54
|
+
return false;
|
|
55
|
+
}
|
|
56
|
+
}
|
|
57
|
+
/** Recursively walk `dir` collecting all `.jsonl` paths. Caps depth at 8 to
|
|
58
|
+
* avoid runaway symlink loops on unusual user setups. */
|
|
59
|
+
async function walkJsonl(dir, depth = 0) {
|
|
60
|
+
if (depth > 8)
|
|
61
|
+
return [];
|
|
62
|
+
if (!(await exists(dir)))
|
|
63
|
+
return [];
|
|
64
|
+
const out = [];
|
|
65
|
+
let entries;
|
|
66
|
+
try {
|
|
67
|
+
entries = await readdir(dir, { withFileTypes: true });
|
|
68
|
+
}
|
|
69
|
+
catch {
|
|
70
|
+
return [];
|
|
71
|
+
}
|
|
72
|
+
for (const ent of entries) {
|
|
73
|
+
const full = join(dir, ent.name);
|
|
74
|
+
if (ent.isDirectory()) {
|
|
75
|
+
out.push(...(await walkJsonl(full, depth + 1)));
|
|
76
|
+
}
|
|
77
|
+
else if (ent.isFile() && full.endsWith(".jsonl")) {
|
|
78
|
+
out.push(full);
|
|
79
|
+
}
|
|
80
|
+
}
|
|
81
|
+
return out;
|
|
82
|
+
}
|
|
83
|
+
/** Discover all AI session files on disk (both Claude Code and Codex). */
|
|
84
|
+
export async function discoverAllSessions() {
|
|
85
|
+
const claudePaths = await walkJsonl(CLAUDE_DIR);
|
|
86
|
+
const codexPaths = await walkJsonl(CODEX_DIR);
|
|
87
|
+
const all = [];
|
|
88
|
+
for (const p of claudePaths) {
|
|
89
|
+
try {
|
|
90
|
+
const s = await stat(p);
|
|
91
|
+
all.push({ path: p, source: "claude-code-session", size: s.size, mtimeMs: s.mtimeMs });
|
|
92
|
+
}
|
|
93
|
+
catch { /* skip */ }
|
|
94
|
+
}
|
|
95
|
+
for (const p of codexPaths) {
|
|
96
|
+
try {
|
|
97
|
+
const s = await stat(p);
|
|
98
|
+
all.push({ path: p, source: "codex-cli-session", size: s.size, mtimeMs: s.mtimeMs });
|
|
99
|
+
}
|
|
100
|
+
catch { /* skip */ }
|
|
101
|
+
}
|
|
102
|
+
return all;
|
|
103
|
+
}
|
|
104
|
+
/** Filter to sessions eligible for the daily digest:
|
|
105
|
+
* - modified 1h–7d ago (settled, not active)
|
|
106
|
+
* - grew since last seen (size strictly greater) */
|
|
107
|
+
export function selectSettled(candidates, cache) {
|
|
108
|
+
const now = Date.now();
|
|
109
|
+
return candidates.filter((s) => {
|
|
110
|
+
const ageMs = now - s.mtimeMs;
|
|
111
|
+
if (ageMs < SETTLED_MIN_AGE_MS)
|
|
112
|
+
return false;
|
|
113
|
+
if (ageMs > SETTLED_MAX_AGE_MS)
|
|
114
|
+
return false;
|
|
115
|
+
const prior = cache.getSessionState(s.path);
|
|
116
|
+
if (prior && s.size <= prior.file_size)
|
|
117
|
+
return false;
|
|
118
|
+
return true;
|
|
119
|
+
});
|
|
120
|
+
}
|
|
121
|
+
/** Parse a single jsonl file into a transcript string. Returns null if the
|
|
122
|
+
* session is too small to be worth ingesting. */
|
|
123
|
+
export async function parseSession(path) {
|
|
124
|
+
let buf;
|
|
125
|
+
try {
|
|
126
|
+
buf = await readFile(path, "utf8");
|
|
127
|
+
}
|
|
128
|
+
catch {
|
|
129
|
+
return null;
|
|
130
|
+
}
|
|
131
|
+
const rawSize = Buffer.byteLength(buf, "utf8");
|
|
132
|
+
if (rawSize < MIN_BYTES)
|
|
133
|
+
return null;
|
|
134
|
+
const lines = buf.split("\n");
|
|
135
|
+
let transcript = "";
|
|
136
|
+
let turnCount = 0;
|
|
137
|
+
for (const line of lines) {
|
|
138
|
+
if (!line.trim())
|
|
139
|
+
continue;
|
|
140
|
+
let obj;
|
|
141
|
+
try {
|
|
142
|
+
obj = JSON.parse(line);
|
|
143
|
+
}
|
|
144
|
+
catch {
|
|
145
|
+
continue;
|
|
146
|
+
}
|
|
147
|
+
if (!obj.type)
|
|
148
|
+
continue;
|
|
149
|
+
if (obj.type !== "user" && obj.type !== "assistant" && obj.type !== "message")
|
|
150
|
+
continue;
|
|
151
|
+
const msg = obj.message;
|
|
152
|
+
if (!msg?.role)
|
|
153
|
+
continue;
|
|
154
|
+
let text = null;
|
|
155
|
+
if (typeof msg.content === "string") {
|
|
156
|
+
text = msg.content;
|
|
157
|
+
}
|
|
158
|
+
else if (Array.isArray(msg.content)) {
|
|
159
|
+
const parts = [];
|
|
160
|
+
for (const part of msg.content) {
|
|
161
|
+
if (typeof part?.text === "string")
|
|
162
|
+
parts.push(part.text);
|
|
163
|
+
}
|
|
164
|
+
text = parts.length > 0 ? parts.join("\n") : null;
|
|
165
|
+
}
|
|
166
|
+
if (!text || text.length === 0)
|
|
167
|
+
continue;
|
|
168
|
+
transcript += `[${msg.role.toUpperCase()}] ${text}\n\n`;
|
|
169
|
+
turnCount += 1;
|
|
170
|
+
}
|
|
171
|
+
if (turnCount < MIN_TURNS)
|
|
172
|
+
return null;
|
|
173
|
+
// Tail-cap: when the conversation is huge, the most recent turns are
|
|
174
|
+
// where the actionable content lives (decisions made today, not from a
|
|
175
|
+
// 2-week-old debugging session in the same thread).
|
|
176
|
+
if (transcript.length > MAX_TRANSCRIPT_CHARS) {
|
|
177
|
+
transcript = transcript.slice(-MAX_TRANSCRIPT_CHARS);
|
|
178
|
+
}
|
|
179
|
+
return { transcript, turnCount, charCount: transcript.length, rawSize };
|
|
180
|
+
}
|
|
181
|
+
// ── Slug (must match the Swift toSlug + the index.ts toSlug) ────────────────
|
|
182
|
+
/** Lowercase, replace non-alphanumeric runs with `-`, trim, cap at 60.
|
|
183
|
+
* Keep this identical to the Swift `slug(_)` for cross-source dedup. */
|
|
184
|
+
export function toSlug(input) {
|
|
185
|
+
const lower = input.toLowerCase();
|
|
186
|
+
const collapsed = lower.replace(/[^a-z0-9]+/g, "-");
|
|
187
|
+
const trimmed = collapsed.replace(/^-+|-+$/g, "");
|
|
188
|
+
return trimmed.slice(0, 60);
|
|
189
|
+
}
|
|
190
|
+
/** Stable per-session sourceId: `YYYY-MM-DD-{slugOfSessionFileName}`.
|
|
191
|
+
* Matches the Swift convention so server-side
|
|
192
|
+
* `(user_id, source_type, source_id)` UNIQUE collides reliably. */
|
|
193
|
+
export function makeSourceId(sessionPath, when = new Date()) {
|
|
194
|
+
const date = when.toISOString().slice(0, 10);
|
|
195
|
+
const stem = basename(sessionPath).replace(/\.jsonl$/i, "");
|
|
196
|
+
return `${date}-${toSlug(stem)}`;
|
|
197
|
+
}
|
|
198
|
+
// ── Token budget ────────────────────────────────────────────────────────────
|
|
199
|
+
/** ~chars/4 token estimate (good enough for budget-tripping decisions). */
|
|
200
|
+
function estimateTokens(text) {
|
|
201
|
+
return Math.ceil(text.length / 4);
|
|
202
|
+
}
|
|
203
|
+
function todayKey() {
|
|
204
|
+
return new Date().toISOString().slice(0, 10);
|
|
205
|
+
}
|
|
206
|
+
/** Get-or-reset the daily Haiku budget counter. Resets at UTC date rollover. */
|
|
207
|
+
function ensureBudgetWindow(cache) {
|
|
208
|
+
const today = todayKey();
|
|
209
|
+
const lastDate = cache.getConfig(CONFIG_KEYS.dailyBudgetDate);
|
|
210
|
+
if (lastDate !== today) {
|
|
211
|
+
cache.setConfig(CONFIG_KEYS.dailyBudgetDate, today);
|
|
212
|
+
cache.setConfig(CONFIG_KEYS.dailyBudgetUsed, "0");
|
|
213
|
+
}
|
|
214
|
+
const used = Number(cache.getConfig(CONFIG_KEYS.dailyBudgetUsed) ?? "0");
|
|
215
|
+
const cap = Number(cache.getConfig(CONFIG_KEYS.dailyTokenCap) ?? String(DAILY_TOKEN_CAP_DEFAULT));
|
|
216
|
+
return { used, cap };
|
|
217
|
+
}
|
|
218
|
+
function addBudgetUsage(cache, tokens) {
|
|
219
|
+
ensureBudgetWindow(cache);
|
|
220
|
+
const used = Number(cache.getConfig(CONFIG_KEYS.dailyBudgetUsed) ?? "0");
|
|
221
|
+
cache.setConfig(CONFIG_KEYS.dailyBudgetUsed, String(used + tokens));
|
|
222
|
+
}
|
|
223
|
+
export async function runDigest(opts) {
|
|
224
|
+
const log = opts.logger ?? (() => { });
|
|
225
|
+
const out = {
|
|
226
|
+
processed: 0, skippedSettled: 0, skippedNoGrowth: 0,
|
|
227
|
+
skippedTooSmall: 0, skippedTokenCap: 0, ingested: 0, errors: 0,
|
|
228
|
+
};
|
|
229
|
+
const all = await discoverAllSessions();
|
|
230
|
+
log(`ai-sessions: discovered ${all.length} session files`);
|
|
231
|
+
// Apply settlement filter (or skip for bootstrap)
|
|
232
|
+
let eligible;
|
|
233
|
+
if (opts.isBootstrap) {
|
|
234
|
+
const maxAge = (opts.ageWindowDays ?? 90) * 24 * 60 * 60 * 1000;
|
|
235
|
+
const now = Date.now();
|
|
236
|
+
eligible = all.filter((s) => {
|
|
237
|
+
if (now - s.mtimeMs > maxAge)
|
|
238
|
+
return false;
|
|
239
|
+
// Bootstrap dedup uses the bootstrap_processed flag, not size.
|
|
240
|
+
return !opts.cache.isBootstrapProcessed(s.path);
|
|
241
|
+
});
|
|
242
|
+
}
|
|
243
|
+
else {
|
|
244
|
+
eligible = selectSettled(all, opts.cache);
|
|
245
|
+
out.skippedSettled = all.length - eligible.length;
|
|
246
|
+
}
|
|
247
|
+
log(`ai-sessions: ${eligible.length} eligible for ${opts.isBootstrap ? "bootstrap" : "daily digest"}`);
|
|
248
|
+
for (const file of eligible) {
|
|
249
|
+
out.processed += 1;
|
|
250
|
+
const parsed = await parseSession(file.path);
|
|
251
|
+
if (!parsed) {
|
|
252
|
+
out.skippedTooSmall += 1;
|
|
253
|
+
// Still record file size so we don't re-stat next sweep.
|
|
254
|
+
opts.cache.recordSessionProcessed(file.path, file.size, opts.isBootstrap === true);
|
|
255
|
+
continue;
|
|
256
|
+
}
|
|
257
|
+
// Token budget check before calling extractor (which costs Haiku tokens).
|
|
258
|
+
const estimate = estimateTokens(parsed.transcript);
|
|
259
|
+
const { used, cap } = ensureBudgetWindow(opts.cache);
|
|
260
|
+
if (used + estimate > cap) {
|
|
261
|
+
out.skippedTokenCap += 1;
|
|
262
|
+
log(`ai-sessions: token cap reached (${used}/${cap}); pausing`);
|
|
263
|
+
break;
|
|
264
|
+
}
|
|
265
|
+
let extracted;
|
|
266
|
+
try {
|
|
267
|
+
extracted = await opts.extract(parsed.transcript);
|
|
268
|
+
addBudgetUsage(opts.cache, estimate);
|
|
269
|
+
}
|
|
270
|
+
catch (err) {
|
|
271
|
+
out.errors += 1;
|
|
272
|
+
log(`ai-sessions: extract failed for ${file.path}: ${err.message}`);
|
|
273
|
+
continue;
|
|
274
|
+
}
|
|
275
|
+
const sourceId = makeSourceId(file.path);
|
|
276
|
+
const metadata = {
|
|
277
|
+
session_path: file.path,
|
|
278
|
+
session_source: file.source,
|
|
279
|
+
session_turns: parsed.turnCount,
|
|
280
|
+
session_chars: parsed.charCount,
|
|
281
|
+
session_mtime_ms: file.mtimeMs,
|
|
282
|
+
};
|
|
283
|
+
const ingestBatch = async (items, type) => {
|
|
284
|
+
for (const content of items) {
|
|
285
|
+
if (!content?.trim())
|
|
286
|
+
continue;
|
|
287
|
+
try {
|
|
288
|
+
await opts.ingest({
|
|
289
|
+
type,
|
|
290
|
+
content,
|
|
291
|
+
sourceType: file.source,
|
|
292
|
+
sourceId,
|
|
293
|
+
metadata,
|
|
294
|
+
});
|
|
295
|
+
out.ingested += 1;
|
|
296
|
+
}
|
|
297
|
+
catch (err) {
|
|
298
|
+
out.errors += 1;
|
|
299
|
+
log(`ai-sessions: ingest failed (${type}): ${err.message}`);
|
|
300
|
+
}
|
|
301
|
+
}
|
|
302
|
+
};
|
|
303
|
+
await ingestBatch(extracted.decisions ?? [], "decision");
|
|
304
|
+
await ingestBatch(extracted.deadEnds ?? [], "dead_end");
|
|
305
|
+
await ingestBatch(extracted.commitments ?? [], "commitment");
|
|
306
|
+
await ingestBatch(extracted.openLoops ?? [], "open_loop");
|
|
307
|
+
opts.cache.recordSessionProcessed(file.path, file.size, opts.isBootstrap === true);
|
|
308
|
+
}
|
|
309
|
+
opts.cache.setConfig(CONFIG_KEYS.lastDigestAt, String(Date.now()));
|
|
310
|
+
return out;
|
|
311
|
+
}
|
|
312
|
+
// ── Helpers exported for tests / inspection ─────────────────────────────────
|
|
313
|
+
export const _internals = {
|
|
314
|
+
CLAUDE_DIR,
|
|
315
|
+
CODEX_DIR,
|
|
316
|
+
MIN_TURNS,
|
|
317
|
+
MIN_BYTES,
|
|
318
|
+
MAX_TRANSCRIPT_CHARS,
|
|
319
|
+
SETTLED_MIN_AGE_MS,
|
|
320
|
+
SETTLED_MAX_AGE_MS,
|
|
321
|
+
CONFIG_KEYS,
|
|
322
|
+
estimateTokens,
|
|
323
|
+
todayKey,
|
|
324
|
+
hashFile: (path, content) => createHash("sha256").update(content).digest("hex").slice(0, 16) + ":" + basename(path),
|
|
325
|
+
};
|