ricord 1.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +21 -0
- package/README.md +213 -0
- package/commands/ricord-flush.md +29 -0
- package/commands/ricord-init.md +129 -0
- package/commands/ricord-lint.md +64 -0
- package/commands/ricord-query.md +71 -0
- package/dist/cli/auth.d.ts +16 -0
- package/dist/cli/auth.js +42 -0
- package/dist/cli/auth.js.map +1 -0
- package/dist/cli/bundle.d.ts +25 -0
- package/dist/cli/bundle.js +179 -0
- package/dist/cli/bundle.js.map +1 -0
- package/dist/cli/cache.d.ts +18 -0
- package/dist/cli/cache.js +39 -0
- package/dist/cli/cache.js.map +1 -0
- package/dist/cli/cli.d.ts +21 -0
- package/dist/cli/cli.js +355 -0
- package/dist/cli/cli.js.map +1 -0
- package/dist/cli/client.d.ts +12 -0
- package/dist/cli/client.js +35 -0
- package/dist/cli/client.js.map +1 -0
- package/dist/cli/commands/build.d.ts +44 -0
- package/dist/cli/commands/build.js +437 -0
- package/dist/cli/commands/build.js.map +1 -0
- package/dist/cli/commands/curate.d.ts +32 -0
- package/dist/cli/commands/curate.js +154 -0
- package/dist/cli/commands/curate.js.map +1 -0
- package/dist/cli/commands/doctor.d.ts +16 -0
- package/dist/cli/commands/doctor.js +92 -0
- package/dist/cli/commands/doctor.js.map +1 -0
- package/dist/cli/commands/ingest.d.ts +25 -0
- package/dist/cli/commands/ingest.js +121 -0
- package/dist/cli/commands/ingest.js.map +1 -0
- package/dist/cli/commands/install.d.ts +16 -0
- package/dist/cli/commands/install.js +82 -0
- package/dist/cli/commands/install.js.map +1 -0
- package/dist/cli/commands/pull.d.ts +24 -0
- package/dist/cli/commands/pull.js +104 -0
- package/dist/cli/commands/pull.js.map +1 -0
- package/dist/cli/commands/push.d.ts +28 -0
- package/dist/cli/commands/push.js +164 -0
- package/dist/cli/commands/push.js.map +1 -0
- package/dist/cli/commands/rollup.d.ts +21 -0
- package/dist/cli/commands/rollup.js +118 -0
- package/dist/cli/commands/rollup.js.map +1 -0
- package/dist/cli/commands/setup.d.ts +7 -0
- package/dist/cli/commands/setup.js +43 -0
- package/dist/cli/commands/setup.js.map +1 -0
- package/dist/cli/commands/sync.d.ts +15 -0
- package/dist/cli/commands/sync.js +63 -0
- package/dist/cli/commands/sync.js.map +1 -0
- package/dist/cli/commands/watch.d.ts +17 -0
- package/dist/cli/commands/watch.js +87 -0
- package/dist/cli/commands/watch.js.map +1 -0
- package/dist/cli/config.d.ts +29 -0
- package/dist/cli/config.js +52 -0
- package/dist/cli/config.js.map +1 -0
- package/dist/cli/extract.d.ts +101 -0
- package/dist/cli/extract.js +216 -0
- package/dist/cli/extract.js.map +1 -0
- package/dist/cli/ingest.d.ts +48 -0
- package/dist/cli/ingest.js +74 -0
- package/dist/cli/ingest.js.map +1 -0
- package/dist/cli/ledger.d.ts +44 -0
- package/dist/cli/ledger.js +67 -0
- package/dist/cli/ledger.js.map +1 -0
- package/dist/cli/llm.d.ts +21 -0
- package/dist/cli/llm.js +138 -0
- package/dist/cli/llm.js.map +1 -0
- package/dist/cli/parse.d.ts +13 -0
- package/dist/cli/parse.js +188 -0
- package/dist/cli/parse.js.map +1 -0
- package/dist/cli/run-explore.d.ts +56 -0
- package/dist/cli/run-explore.js +229 -0
- package/dist/cli/run-explore.js.map +1 -0
- package/dist/cli/summarize.d.ts +15 -0
- package/dist/cli/summarize.js +49 -0
- package/dist/cli/summarize.js.map +1 -0
- package/dist/cli/uninstall.d.ts +6 -0
- package/dist/cli/uninstall.js +277 -0
- package/dist/cli/uninstall.js.map +1 -0
- package/dist/cli/walk.d.ts +13 -0
- package/dist/cli/walk.js +62 -0
- package/dist/cli/walk.js.map +1 -0
- package/dist/cli/walker.d.ts +14 -0
- package/dist/cli/walker.js +120 -0
- package/dist/cli/walker.js.map +1 -0
- package/dist/hooks/pre-compact.d.ts +15 -0
- package/dist/hooks/pre-compact.js +127 -0
- package/dist/hooks/pre-compact.js.map +1 -0
- package/dist/hooks/pre-tool-use.d.ts +15 -0
- package/dist/hooks/pre-tool-use.js +25 -0
- package/dist/hooks/pre-tool-use.js.map +1 -0
- package/dist/hooks/session-end.d.ts +21 -0
- package/dist/hooks/session-end.js +186 -0
- package/dist/hooks/session-end.js.map +1 -0
- package/dist/hooks/session-start.d.ts +15 -0
- package/dist/hooks/session-start.js +233 -0
- package/dist/hooks/session-start.js.map +1 -0
- package/dist/hooks/turn-end-post.d.ts +17 -0
- package/dist/hooks/turn-end-post.js +66 -0
- package/dist/hooks/turn-end-post.js.map +1 -0
- package/dist/hooks/turn-end.d.ts +29 -0
- package/dist/hooks/turn-end.js +295 -0
- package/dist/hooks/turn-end.js.map +1 -0
- package/dist/index.d.ts +24 -0
- package/dist/index.js +1547 -0
- package/dist/index.js.map +1 -0
- package/dist/init.d.ts +45 -0
- package/dist/init.js +839 -0
- package/dist/init.js.map +1 -0
- package/dist/lib/active-project.d.ts +14 -0
- package/dist/lib/active-project.js +65 -0
- package/dist/lib/active-project.js.map +1 -0
- package/dist/lib/buffer.d.ts +34 -0
- package/dist/lib/buffer.js +79 -0
- package/dist/lib/buffer.js.map +1 -0
- package/dist/scripts/compile.d.ts +25 -0
- package/dist/scripts/compile.js +185 -0
- package/dist/scripts/compile.js.map +1 -0
- package/dist/scripts/config.d.ts +30 -0
- package/dist/scripts/config.js +68 -0
- package/dist/scripts/config.js.map +1 -0
- package/dist/scripts/flush.d.ts +23 -0
- package/dist/scripts/flush.js +230 -0
- package/dist/scripts/flush.js.map +1 -0
- package/dist/scripts/lint.d.ts +21 -0
- package/dist/scripts/lint.js +242 -0
- package/dist/scripts/lint.js.map +1 -0
- package/dist/scripts/utils.d.ts +43 -0
- package/dist/scripts/utils.js +165 -0
- package/dist/scripts/utils.js.map +1 -0
- package/package.json +74 -0
- package/scripts/postinstall.mjs +56 -0
package/dist/init.js
ADDED
|
@@ -0,0 +1,839 @@
|
|
|
1
|
+
#!/usr/bin/env node
|
|
2
|
+
/**
|
|
3
|
+
* ricord init — Full conversation-to-knowledge compiler.
|
|
4
|
+
*
|
|
5
|
+
* Implements the claude-memory-compiler pipeline via Ricord's API:
|
|
6
|
+
* conversation → turns → facts → memories → KB/wiki articles
|
|
7
|
+
*
|
|
8
|
+
* Pipeline:
|
|
9
|
+
* 1. Scan ~/.claude/projects/ for ALL JSONL conversations
|
|
10
|
+
* 2. Optionally scan ~/.claude/ memory files (CLAUDE.md, memory/)
|
|
11
|
+
* 3. For each conversation:
|
|
12
|
+
* a. Parse ALL turns (user + assistant)
|
|
13
|
+
* b. Generate a stable session_id from file content hash
|
|
14
|
+
* c. Chunk into segments preserving turn boundaries
|
|
15
|
+
* d. For each segment → POST /v1/memories, which auto-triggers:
|
|
16
|
+
* · Atomic fact decomposition
|
|
17
|
+
* · Knowledge graph entity extraction + edge storage
|
|
18
|
+
* · KB page enrichment (auto-generated wiki rollups)
|
|
19
|
+
* · Search key expansion
|
|
20
|
+
* · Conflict detection + resolution
|
|
21
|
+
* f. Write structured daily log entry (claude-memory-compiler format)
|
|
22
|
+
* 4. Process memory files (preferences, feedback, project context)
|
|
23
|
+
* 5. Run compilation pass: refresh all KB pages, run lint
|
|
24
|
+
* 6. Generate summary report
|
|
25
|
+
*
|
|
26
|
+
* Capabilities:
|
|
27
|
+
* - Daily logs (structured source layer)
|
|
28
|
+
* - Compiled wiki articles (KB pages with 500-2000 word bodies)
|
|
29
|
+
* - Connection entities (cross-cutting insights via knowledge graph)
|
|
30
|
+
* - Compounding Q&A (facts accumulate for better retrieval)
|
|
31
|
+
* - Contradiction detection (conflict resolution in wiki pipeline)
|
|
32
|
+
* - Fact extraction (rule-based)
|
|
33
|
+
*
|
|
34
|
+
* Usage:
|
|
35
|
+
* npx ricord-mcp init # process all projects
|
|
36
|
+
* npx ricord-mcp init --project ricord-ai # one project only
|
|
37
|
+
* npx ricord-mcp init --dry-run # show what would be processed
|
|
38
|
+
* npx ricord-mcp init --limit 10 # first 10 conversations
|
|
39
|
+
* npx ricord-mcp init --since 2026-04-01 # only after this date
|
|
40
|
+
* npx ricord-mcp init --include-memory # also process ~/.claude memory files
|
|
41
|
+
* npx ricord-mcp init --memory-only # sync memory files only, skip conversations (fast, for session-start)
|
|
42
|
+
* npx ricord-mcp init --compile # run KB compilation pass after ingest
|
|
43
|
+
* npx ricord-mcp init --full # include-memory + compile (recommended)
|
|
44
|
+
*/
|
|
45
|
+
import { readFileSync, existsSync, readdirSync, statSync, writeFileSync, mkdirSync, appendFileSync, } from "node:fs";
|
|
46
|
+
import { join } from "node:path";
|
|
47
|
+
import { homedir } from "node:os";
|
|
48
|
+
import { createHash } from "node:crypto";
|
|
49
|
+
import { spawnSync } from "node:child_process";
|
|
50
|
+
import { fileURLToPath } from "node:url";
|
|
51
|
+
// ── Constants ───────────────────────────────────────────────────────
|
|
52
|
+
const CLAUDE_DIR = join(homedir(), ".claude");
|
|
53
|
+
const PROJECTS_DIR = join(CLAUDE_DIR, "projects");
|
|
54
|
+
const CREDENTIALS_FILE = join(homedir(), ".ricord", "credentials.json");
|
|
55
|
+
const STATE_DIR = join(homedir(), ".ricord");
|
|
56
|
+
const INIT_STATE_FILE = join(STATE_DIR, "init-state.json");
|
|
57
|
+
// Pipeline tuning
|
|
58
|
+
const MAX_SEGMENT_CHARS = 6000; // ~1500 tokens per segment for rich extraction
|
|
59
|
+
const MAX_TURNS_PER_SEGMENT = 20; // keep segments coherent
|
|
60
|
+
const BATCH_DELAY_MS = 200; // rate-limit friendly
|
|
61
|
+
const DEFAULT_PROJECT = "general";
|
|
62
|
+
// Daily log directory (local audit trail, like claude-memory-compiler's daily/)
|
|
63
|
+
const DAILY_DIR = join(import.meta.dirname ? join(import.meta.dirname, "..", "..") : process.cwd(), "daily");
|
|
64
|
+
// ── CLI args ────────────────────────────────────────────────────────
|
|
65
|
+
const args = process.argv.slice(2);
|
|
66
|
+
function getArg(name) {
|
|
67
|
+
const idx = args.indexOf(`--${name}`);
|
|
68
|
+
return idx !== -1 ? args[idx + 1] : undefined;
|
|
69
|
+
}
|
|
70
|
+
const DRY_RUN = args.includes("--dry-run");
|
|
71
|
+
const LIMIT = parseInt(getArg("limit") || "0", 10);
|
|
72
|
+
const SINCE = getArg("since") || "";
|
|
73
|
+
const PROJECT_FILTER = getArg("project") || "";
|
|
74
|
+
const MEMORY_ONLY = args.includes("--memory-only");
|
|
75
|
+
const INCLUDE_MEMORY = MEMORY_ONLY || args.includes("--include-memory") || args.includes("--full");
|
|
76
|
+
const RUN_COMPILE = !MEMORY_ONLY && (args.includes("--compile") || args.includes("--full"));
|
|
77
|
+
function loadCredentials() {
|
|
78
|
+
try {
|
|
79
|
+
if (existsSync(CREDENTIALS_FILE)) {
|
|
80
|
+
return JSON.parse(readFileSync(CREDENTIALS_FILE, "utf8"));
|
|
81
|
+
}
|
|
82
|
+
}
|
|
83
|
+
catch { }
|
|
84
|
+
return null;
|
|
85
|
+
}
|
|
86
|
+
function loadInitState() {
|
|
87
|
+
try {
|
|
88
|
+
if (existsSync(INIT_STATE_FILE)) {
|
|
89
|
+
return JSON.parse(readFileSync(INIT_STATE_FILE, "utf8"));
|
|
90
|
+
}
|
|
91
|
+
}
|
|
92
|
+
catch { }
|
|
93
|
+
return {
|
|
94
|
+
processed: {},
|
|
95
|
+
memory_files_processed: {},
|
|
96
|
+
total_episodes: 0,
|
|
97
|
+
total_items: 0,
|
|
98
|
+
total_turns: 0,
|
|
99
|
+
last_run: "",
|
|
100
|
+
runs: 0,
|
|
101
|
+
};
|
|
102
|
+
}
|
|
103
|
+
function saveInitState(state) {
|
|
104
|
+
if (!existsSync(STATE_DIR))
|
|
105
|
+
mkdirSync(STATE_DIR, { recursive: true });
|
|
106
|
+
writeFileSync(INIT_STATE_FILE, JSON.stringify(state, null, 2), "utf8");
|
|
107
|
+
}
|
|
108
|
+
function fileContentHash(content) {
|
|
109
|
+
return createHash("sha256").update(content).digest("hex").slice(0, 16);
|
|
110
|
+
}
|
|
111
|
+
// ── Credential scrubbing ────────────────────────────────────────────
|
|
112
|
+
function scrubCredentials(text) {
|
|
113
|
+
return text
|
|
114
|
+
.replace(/sk-[a-zA-Z0-9_\-]{8,}/g, "[REDACTED]")
|
|
115
|
+
.replace(/sk_[a-zA-Z0-9_\-]{8,}/g, "[REDACTED]")
|
|
116
|
+
.replace(/pk-[a-zA-Z0-9_\-]{8,}/g, "[REDACTED]")
|
|
117
|
+
.replace(/key-[a-zA-Z0-9_\-]{8,}/g, "[REDACTED]")
|
|
118
|
+
.replace(/AIza[a-zA-Z0-9_\-]{20,}/g, "[REDACTED]")
|
|
119
|
+
.replace(/ghp_[a-zA-Z0-9]{10,}/g, "[REDACTED]")
|
|
120
|
+
.replace(/gho_[a-zA-Z0-9]{10,}/g, "[REDACTED]")
|
|
121
|
+
.replace(/xox[bp]-[a-zA-Z0-9\-]+/g, "[REDACTED]")
|
|
122
|
+
.replace(/glpat-[a-zA-Z0-9_\-]{10,}/g, "[REDACTED]")
|
|
123
|
+
.replace(/AKIA[A-Z0-9]{12,}/g, "[REDACTED]")
|
|
124
|
+
.replace(/((?:api[_-]?key|token|secret|password|credential|bearer|authorization)\s*[:=]\s*["']?)([a-zA-Z0-9_\-./+=]{10,})/gi, "$1[REDACTED]")
|
|
125
|
+
.replace(/(export\s+\w*(?:KEY|TOKEN|SECRET|PASSWORD)\w*\s*=\s*["']?)([^\s"']{10,})/gi, "$1[REDACTED]")
|
|
126
|
+
.replace(/((?:postgres(?:ql)?|mysql|redis|mongodb):\/\/[^:]+:)[^@]+(@)/gi, "$1[REDACTED]$2")
|
|
127
|
+
.replace(/(Bearer\s+)[a-zA-Z0-9_\-./+=]{10,}/gi, "$1[REDACTED]")
|
|
128
|
+
// SEC-06: Removed overly-broad base64 regex (/\b[A-Za-z0-9+/]{40,200}={0,3}\b/g)
|
|
129
|
+
// which was erroneously redacting git SHAs, file hashes, and other legitimate content.
|
|
130
|
+
.replace(/"(api_key|apiKey|secret_key|secretKey|access_token|accessToken|private_key|privateKey)"\s*:\s*"[^"]{10,}"/gi, '"$1": "[REDACTED]"');
|
|
131
|
+
}
|
|
132
|
+
// ── Project name extraction ─────────────────────────────────────────
|
|
133
|
+
function dirToProjectId(dirName) {
|
|
134
|
+
let name = dirName
|
|
135
|
+
.replace(/^-Users-[^-]+-Documents-/i, "")
|
|
136
|
+
.replace(/^-Users-[^-]+-/i, "")
|
|
137
|
+
.replace(/^-+/, "");
|
|
138
|
+
const parts = name.split("-");
|
|
139
|
+
if (parts[0]?.toLowerCase() === "hexaclaw" && parts.length > 1) {
|
|
140
|
+
name = parts.slice(1).join("-");
|
|
141
|
+
}
|
|
142
|
+
if (name.includes("paperclip"))
|
|
143
|
+
return "paperclip";
|
|
144
|
+
return name.toLowerCase() || DEFAULT_PROJECT;
|
|
145
|
+
}
|
|
146
|
+
/**
|
|
147
|
+
* Parse ALL turns from a JSONL transcript file.
|
|
148
|
+
* Unlike the old init.ts which only took the last 30, this reads everything
|
|
149
|
+
* so the full conversation context is available for extraction.
|
|
150
|
+
*/
|
|
151
|
+
function parseAllTurns(filePath) {
|
|
152
|
+
const turns = [];
|
|
153
|
+
let index = 0;
|
|
154
|
+
try {
|
|
155
|
+
const content = readFileSync(filePath, "utf8");
|
|
156
|
+
for (const line of content.split("\n")) {
|
|
157
|
+
if (!line.trim())
|
|
158
|
+
continue;
|
|
159
|
+
try {
|
|
160
|
+
const entry = JSON.parse(line);
|
|
161
|
+
const msg = entry.message || entry;
|
|
162
|
+
const role = msg.role || "";
|
|
163
|
+
let text = msg.content || "";
|
|
164
|
+
if (!["user", "assistant"].includes(role))
|
|
165
|
+
continue;
|
|
166
|
+
if (Array.isArray(text)) {
|
|
167
|
+
text = text
|
|
168
|
+
.filter((b) => typeof b === "string" || (b.type === "text" && b.text))
|
|
169
|
+
.map((b) => (typeof b === "string" ? b : b.text))
|
|
170
|
+
.join("\n");
|
|
171
|
+
}
|
|
172
|
+
if (typeof text === "string" && text.trim()) {
|
|
173
|
+
turns.push({ role: role, content: text.trim(), index: index++ });
|
|
174
|
+
}
|
|
175
|
+
}
|
|
176
|
+
catch {
|
|
177
|
+
continue;
|
|
178
|
+
}
|
|
179
|
+
}
|
|
180
|
+
}
|
|
181
|
+
catch { /* file read error */ }
|
|
182
|
+
return turns;
|
|
183
|
+
}
|
|
184
|
+
/**
|
|
185
|
+
* Chunk a conversation into segments that preserve turn boundaries.
|
|
186
|
+
* Each segment is small enough for effective extraction but large enough
|
|
187
|
+
* for coherent understanding.
|
|
188
|
+
*
|
|
189
|
+
* This is analogous to claude-memory-compiler's approach of processing
|
|
190
|
+
* the last 30 turns per session — but we process ALL turns in chunks.
|
|
191
|
+
*/
|
|
192
|
+
function segmentConversation(turns) {
|
|
193
|
+
const segments = [];
|
|
194
|
+
let currentTurns = [];
|
|
195
|
+
let currentChars = 0;
|
|
196
|
+
for (const turn of turns) {
|
|
197
|
+
const turnText = `**${turn.role === "user" ? "User" : "Assistant"}:** ${turn.content}`;
|
|
198
|
+
// Start a new segment if this one is full
|
|
199
|
+
if (currentTurns.length >= MAX_TURNS_PER_SEGMENT ||
|
|
200
|
+
(currentChars + turnText.length > MAX_SEGMENT_CHARS && currentTurns.length > 0)) {
|
|
201
|
+
segments.push(buildSegment(currentTurns));
|
|
202
|
+
currentTurns = [];
|
|
203
|
+
currentChars = 0;
|
|
204
|
+
}
|
|
205
|
+
currentTurns.push(turn);
|
|
206
|
+
currentChars += turnText.length;
|
|
207
|
+
}
|
|
208
|
+
// Flush remaining
|
|
209
|
+
if (currentTurns.length > 0) {
|
|
210
|
+
segments.push(buildSegment(currentTurns));
|
|
211
|
+
}
|
|
212
|
+
return segments;
|
|
213
|
+
}
|
|
214
|
+
function buildSegment(turns) {
|
|
215
|
+
const context = turns
|
|
216
|
+
.map(t => `**${t.role === "user" ? "User" : "Assistant"}:** ${t.content}`)
|
|
217
|
+
.join("\n\n");
|
|
218
|
+
const userTurnCount = turns.filter(t => t.role === "user").length;
|
|
219
|
+
const first = turns[0]?.index ?? 0;
|
|
220
|
+
const last = turns[turns.length - 1]?.index ?? 0;
|
|
221
|
+
return {
|
|
222
|
+
turns,
|
|
223
|
+
context: scrubCredentials(context),
|
|
224
|
+
turnRange: `${first + 1}-${last + 1}`,
|
|
225
|
+
userTurnCount,
|
|
226
|
+
};
|
|
227
|
+
}
|
|
228
|
+
function getFileDate(filePath) {
|
|
229
|
+
try {
|
|
230
|
+
return statSync(filePath).mtime.toISOString().slice(0, 10);
|
|
231
|
+
}
|
|
232
|
+
catch {
|
|
233
|
+
return new Date().toISOString().slice(0, 10);
|
|
234
|
+
}
|
|
235
|
+
}
|
|
236
|
+
function scanProjects(initState) {
|
|
237
|
+
const results = [];
|
|
238
|
+
if (!existsSync(PROJECTS_DIR)) {
|
|
239
|
+
console.error("No Claude Code projects found at", PROJECTS_DIR);
|
|
240
|
+
return results;
|
|
241
|
+
}
|
|
242
|
+
const projectDirs = readdirSync(PROJECTS_DIR);
|
|
243
|
+
for (const dir of projectDirs) {
|
|
244
|
+
const projectId = dirToProjectId(dir);
|
|
245
|
+
if (PROJECT_FILTER && !projectId.includes(PROJECT_FILTER.toLowerCase()))
|
|
246
|
+
continue;
|
|
247
|
+
const projectPath = join(PROJECTS_DIR, dir);
|
|
248
|
+
try {
|
|
249
|
+
if (!statSync(projectPath).isDirectory())
|
|
250
|
+
continue;
|
|
251
|
+
}
|
|
252
|
+
catch {
|
|
253
|
+
continue;
|
|
254
|
+
}
|
|
255
|
+
const files = readdirSync(projectPath).filter(f => f.endsWith(".jsonl"));
|
|
256
|
+
for (const file of files) {
|
|
257
|
+
const filePath = join(projectPath, file);
|
|
258
|
+
const fileDate = getFileDate(filePath);
|
|
259
|
+
const fileKey = `${dir}/${file}`;
|
|
260
|
+
const fileSize = statSync(filePath).size;
|
|
261
|
+
if (fileSize < 100)
|
|
262
|
+
continue; // skip empty
|
|
263
|
+
// Compute content hash for dedup (check if file changed since last run)
|
|
264
|
+
const content = readFileSync(filePath, "utf8");
|
|
265
|
+
const hash = fileContentHash(content);
|
|
266
|
+
// Skip if already processed with same hash
|
|
267
|
+
const prev = initState.processed[fileKey];
|
|
268
|
+
if (prev && prev.file_hash === hash)
|
|
269
|
+
continue;
|
|
270
|
+
// Apply date filter
|
|
271
|
+
if (SINCE && fileDate < SINCE)
|
|
272
|
+
continue;
|
|
273
|
+
results.push({
|
|
274
|
+
filePath,
|
|
275
|
+
fileKey,
|
|
276
|
+
projectDir: dir,
|
|
277
|
+
projectId,
|
|
278
|
+
date: fileDate,
|
|
279
|
+
sizeBytes: fileSize,
|
|
280
|
+
fileHash: hash,
|
|
281
|
+
});
|
|
282
|
+
}
|
|
283
|
+
if (LIMIT > 0 && results.length >= LIMIT)
|
|
284
|
+
break;
|
|
285
|
+
}
|
|
286
|
+
results.sort((a, b) => a.date.localeCompare(b.date)); // chronological for daily logs
|
|
287
|
+
if (LIMIT > 0)
|
|
288
|
+
return results.slice(0, LIMIT);
|
|
289
|
+
return results;
|
|
290
|
+
}
|
|
291
|
+
function scanMemoryFiles(initState) {
|
|
292
|
+
const results = [];
|
|
293
|
+
// 1. Global CLAUDE.md
|
|
294
|
+
const globalClaude = join(CLAUDE_DIR, "CLAUDE.md");
|
|
295
|
+
if (existsSync(globalClaude)) {
|
|
296
|
+
const content = readFileSync(globalClaude, "utf8");
|
|
297
|
+
const hash = fileContentHash(content);
|
|
298
|
+
const prev = initState.memory_files_processed["global/CLAUDE.md"];
|
|
299
|
+
if (!prev || prev.hash !== hash) {
|
|
300
|
+
results.push({
|
|
301
|
+
filePath: globalClaude,
|
|
302
|
+
fileKey: "global/CLAUDE.md",
|
|
303
|
+
projectId: "global",
|
|
304
|
+
type: "preference",
|
|
305
|
+
title: "Global Claude Code preferences",
|
|
306
|
+
description: "Global coding guidelines and preferences for all Claude Code projects",
|
|
307
|
+
content: scrubCredentials(content),
|
|
308
|
+
hash,
|
|
309
|
+
});
|
|
310
|
+
}
|
|
311
|
+
}
|
|
312
|
+
// 2. Per-project CLAUDE.md and memory/ files
|
|
313
|
+
if (existsSync(PROJECTS_DIR)) {
|
|
314
|
+
for (const dir of readdirSync(PROJECTS_DIR)) {
|
|
315
|
+
if (PROJECT_FILTER && !dirToProjectId(dir).includes(PROJECT_FILTER.toLowerCase()))
|
|
316
|
+
continue;
|
|
317
|
+
const projectPath = join(PROJECTS_DIR, dir);
|
|
318
|
+
try {
|
|
319
|
+
if (!statSync(projectPath).isDirectory())
|
|
320
|
+
continue;
|
|
321
|
+
}
|
|
322
|
+
catch {
|
|
323
|
+
continue;
|
|
324
|
+
}
|
|
325
|
+
const projectId = dirToProjectId(dir);
|
|
326
|
+
// Project CLAUDE.md
|
|
327
|
+
const projectClaude = join(projectPath, "CLAUDE.md");
|
|
328
|
+
if (existsSync(projectClaude)) {
|
|
329
|
+
const content = readFileSync(projectClaude, "utf8");
|
|
330
|
+
const hash = fileContentHash(content);
|
|
331
|
+
const key = `${dir}/CLAUDE.md`;
|
|
332
|
+
const prev = initState.memory_files_processed[key];
|
|
333
|
+
if (!prev || prev.hash !== hash) {
|
|
334
|
+
results.push({
|
|
335
|
+
filePath: projectClaude,
|
|
336
|
+
fileKey: key,
|
|
337
|
+
projectId,
|
|
338
|
+
type: "reference",
|
|
339
|
+
title: `Project instructions: ${projectId}`,
|
|
340
|
+
description: `Project-level instructions and conventions for ${projectId}`,
|
|
341
|
+
content: scrubCredentials(content),
|
|
342
|
+
hash,
|
|
343
|
+
});
|
|
344
|
+
}
|
|
345
|
+
}
|
|
346
|
+
// Memory directory
|
|
347
|
+
const memoryDir = join(projectPath, "memory");
|
|
348
|
+
if (existsSync(memoryDir)) {
|
|
349
|
+
try {
|
|
350
|
+
for (const memFile of readdirSync(memoryDir)) {
|
|
351
|
+
if (!memFile.endsWith(".md"))
|
|
352
|
+
continue;
|
|
353
|
+
if (memFile === "MEMORY.md")
|
|
354
|
+
continue; // index file, skip
|
|
355
|
+
const memPath = join(memoryDir, memFile);
|
|
356
|
+
const content = readFileSync(memPath, "utf8");
|
|
357
|
+
const hash = fileContentHash(content);
|
|
358
|
+
const key = `${dir}/memory/${memFile}`;
|
|
359
|
+
const prev = initState.memory_files_processed[key];
|
|
360
|
+
if (!prev || prev.hash !== hash) {
|
|
361
|
+
// Parse frontmatter
|
|
362
|
+
const fmMatch = content.match(/^---\n([\s\S]*?)\n---/);
|
|
363
|
+
const fm = fmMatch?.[1] || "";
|
|
364
|
+
const typeMatch = fm.match(/^type:\s*(\w+)/m);
|
|
365
|
+
const nameMatch = fm.match(/^name:\s*(.+)/m);
|
|
366
|
+
const descMatch = fm.match(/^description:\s*(.+)/m);
|
|
367
|
+
const memType = typeMatch?.[1] || "fact";
|
|
368
|
+
const memName = nameMatch?.[1]?.trim() || memFile.replace(".md", "");
|
|
369
|
+
const memDesc = descMatch?.[1]?.trim() || memName;
|
|
370
|
+
// Strip frontmatter from body for cleaner supplementary context
|
|
371
|
+
const bodyWithoutFm = fmMatch
|
|
372
|
+
? content.slice(fmMatch[0].length).trim()
|
|
373
|
+
: content.trim();
|
|
374
|
+
const typeMap = {
|
|
375
|
+
user: "preference",
|
|
376
|
+
feedback: "preference",
|
|
377
|
+
project: "fact",
|
|
378
|
+
reference: "reference",
|
|
379
|
+
};
|
|
380
|
+
results.push({
|
|
381
|
+
filePath: memPath,
|
|
382
|
+
fileKey: key,
|
|
383
|
+
projectId,
|
|
384
|
+
type: typeMap[memType] || "fact",
|
|
385
|
+
title: memName,
|
|
386
|
+
description: memDesc,
|
|
387
|
+
content: scrubCredentials(bodyWithoutFm),
|
|
388
|
+
hash,
|
|
389
|
+
});
|
|
390
|
+
}
|
|
391
|
+
}
|
|
392
|
+
}
|
|
393
|
+
catch { /* memory dir read error */ }
|
|
394
|
+
}
|
|
395
|
+
}
|
|
396
|
+
}
|
|
397
|
+
return results;
|
|
398
|
+
}
|
|
399
|
+
// ── API helpers ─────────────────────────────────────────────────────
|
|
400
|
+
async function apiCall(method, path, apiKey, apiBase, body) {
|
|
401
|
+
const res = await fetch(`${apiBase}${path}`, {
|
|
402
|
+
method,
|
|
403
|
+
headers: {
|
|
404
|
+
Authorization: `Bearer ${apiKey}`,
|
|
405
|
+
"Content-Type": "application/json",
|
|
406
|
+
"User-Agent": "ricord-mcp/init-v2",
|
|
407
|
+
},
|
|
408
|
+
...(body ? { body: JSON.stringify(body) } : {}),
|
|
409
|
+
});
|
|
410
|
+
if (!res.ok) {
|
|
411
|
+
const text = await res.text();
|
|
412
|
+
throw new Error(`${res.status}: ${text.slice(0, 200)}`);
|
|
413
|
+
}
|
|
414
|
+
return res.json();
|
|
415
|
+
}
|
|
416
|
+
async function ingestSegment(segment, convoDate, projectId, sessionId, segmentIndex, totalSegments, apiKey, apiBase) {
|
|
417
|
+
const result = { episodes: 0, items_saved: 0, errors: [] };
|
|
418
|
+
// Build rich content with conversation topics for the episode
|
|
419
|
+
const userLines = segment.turns
|
|
420
|
+
.filter(t => t.role === "user")
|
|
421
|
+
.map(t => t.content.slice(0, 200))
|
|
422
|
+
.slice(-8);
|
|
423
|
+
const topicSummary = userLines.length > 0
|
|
424
|
+
? `Topics discussed:\n${userLines.map(t => `- ${t}`).join("\n")}\n\n---\n\n`
|
|
425
|
+
: "";
|
|
426
|
+
const episodeContent = `${topicSummary}${segment.context}`;
|
|
427
|
+
const segmentLabel = totalSegments > 1
|
|
428
|
+
? ` (segment ${segmentIndex + 1}/${totalSegments}, turns ${segment.turnRange})`
|
|
429
|
+
: "";
|
|
430
|
+
try {
|
|
431
|
+
const wikiResult = await apiCall("POST", "/v1/memories", apiKey, apiBase, {
|
|
432
|
+
content: episodeContent.slice(0, 8000),
|
|
433
|
+
title: `Session ${convoDate} — ${projectId}${segmentLabel}`,
|
|
434
|
+
type: "episode",
|
|
435
|
+
tags: ["auto-init", "session-log", projectId, `session:${sessionId.slice(0, 8)}`],
|
|
436
|
+
namespace: projectId,
|
|
437
|
+
project_id: projectId,
|
|
438
|
+
source: "init",
|
|
439
|
+
session_id: sessionId,
|
|
440
|
+
});
|
|
441
|
+
result.episodes++;
|
|
442
|
+
result.items_saved++;
|
|
443
|
+
// The backend fires background tasks for fact decomposition, KB enrichment,
|
|
444
|
+
// graph extraction, and search key expansion. These happen asynchronously
|
|
445
|
+
// after the response — we don't need to wait for them.
|
|
446
|
+
}
|
|
447
|
+
catch (e) {
|
|
448
|
+
result.errors.push(`episode: ${e.message}`);
|
|
449
|
+
}
|
|
450
|
+
return result;
|
|
451
|
+
}
|
|
452
|
+
// ── Daily log writing ───────────────────────────────────────────────
|
|
453
|
+
function ensureDailyDir() {
|
|
454
|
+
if (!existsSync(DAILY_DIR))
|
|
455
|
+
mkdirSync(DAILY_DIR, { recursive: true });
|
|
456
|
+
}
|
|
457
|
+
/**
|
|
458
|
+
* Write a structured daily log entry in claude-memory-compiler format.
|
|
459
|
+
*
|
|
460
|
+
* Format:
|
|
461
|
+
* ### Session (HH:MM) — project
|
|
462
|
+
* **Context:** One-line summary
|
|
463
|
+
* **Key Exchanges:**
|
|
464
|
+
* - Exchange summaries
|
|
465
|
+
* **Topics:**
|
|
466
|
+
* - Topic list
|
|
467
|
+
*/
|
|
468
|
+
function writeDailyLogEntry(date, projectId, segments, result) {
|
|
469
|
+
ensureDailyDir();
|
|
470
|
+
const logPath = join(DAILY_DIR, `${date}.md`);
|
|
471
|
+
if (!existsSync(logPath)) {
|
|
472
|
+
writeFileSync(logPath, `# Daily Log: ${date}\n\n`, "utf8");
|
|
473
|
+
}
|
|
474
|
+
const timeStr = new Date().toLocaleTimeString("en-GB", { hour: "2-digit", minute: "2-digit" });
|
|
475
|
+
// Extract key exchanges and topics from segments
|
|
476
|
+
const allUserMessages = [];
|
|
477
|
+
const allAssistantMessages = [];
|
|
478
|
+
for (const seg of segments) {
|
|
479
|
+
for (const turn of seg.turns) {
|
|
480
|
+
if (turn.role === "user") {
|
|
481
|
+
allUserMessages.push(turn.content.slice(0, 200));
|
|
482
|
+
}
|
|
483
|
+
else {
|
|
484
|
+
allAssistantMessages.push(turn.content.slice(0, 200));
|
|
485
|
+
}
|
|
486
|
+
}
|
|
487
|
+
}
|
|
488
|
+
// Build structured entry
|
|
489
|
+
const lines = [];
|
|
490
|
+
lines.push(`### Session (${timeStr}) — ${projectId}`);
|
|
491
|
+
lines.push("");
|
|
492
|
+
// Context line
|
|
493
|
+
const firstUserMsg = allUserMessages[0] || "Coding session";
|
|
494
|
+
lines.push(`**Context:** ${firstUserMsg.slice(0, 120)}`);
|
|
495
|
+
lines.push("");
|
|
496
|
+
// Key exchanges (user questions/requests, max 5)
|
|
497
|
+
const keyExchanges = allUserMessages
|
|
498
|
+
.filter(m => m.length > 20) // skip short confirmations
|
|
499
|
+
.slice(0, 5);
|
|
500
|
+
if (keyExchanges.length > 0) {
|
|
501
|
+
lines.push("**Key Exchanges:**");
|
|
502
|
+
for (const exchange of keyExchanges) {
|
|
503
|
+
lines.push(`- ${exchange.slice(0, 150)}`);
|
|
504
|
+
}
|
|
505
|
+
lines.push("");
|
|
506
|
+
}
|
|
507
|
+
// Decisions (look for decision-like patterns in assistant messages)
|
|
508
|
+
const decisions = allAssistantMessages
|
|
509
|
+
.filter(m => /(?:decided|chose|using|switched|went with|will use|recommend|should)/i.test(m) &&
|
|
510
|
+
m.length > 30)
|
|
511
|
+
.slice(0, 3);
|
|
512
|
+
if (decisions.length > 0) {
|
|
513
|
+
lines.push("**Decisions Made:**");
|
|
514
|
+
for (const decision of decisions) {
|
|
515
|
+
lines.push(`- ${decision.slice(0, 150)}`);
|
|
516
|
+
}
|
|
517
|
+
lines.push("");
|
|
518
|
+
}
|
|
519
|
+
// Stats
|
|
520
|
+
lines.push(`**Stats:** ${result.turns} turns, ${segments.length} segments, ` +
|
|
521
|
+
`${result.episodes} episodes, ${result.items_saved} items saved`);
|
|
522
|
+
lines.push("");
|
|
523
|
+
lines.push("---");
|
|
524
|
+
lines.push("");
|
|
525
|
+
appendFileSync(logPath, lines.join("\n"), "utf8");
|
|
526
|
+
}
|
|
527
|
+
// ── Memory file ingestion ───────────────────────────────────────────
|
|
528
|
+
async function ingestMemoryFile(memFile, apiKey, apiBase) {
|
|
529
|
+
try {
|
|
530
|
+
// Send frontmatter-stripped body as embedding content. More tokens = richer
|
|
531
|
+
// semantic surface for varied query phrasings. Frontmatter YAML was the
|
|
532
|
+
// actual noise; description is redundant with the body's first line.
|
|
533
|
+
const embeddingContent = (memFile.content.trim() || memFile.description).slice(0, 8000);
|
|
534
|
+
await apiCall("POST", "/v1/memories", apiKey, apiBase, {
|
|
535
|
+
content: embeddingContent,
|
|
536
|
+
title: memFile.title,
|
|
537
|
+
type: memFile.type,
|
|
538
|
+
tags: ["auto-init", "memory-file", memFile.projectId],
|
|
539
|
+
namespace: memFile.projectId,
|
|
540
|
+
project_id: memFile.projectId,
|
|
541
|
+
source: "init-memory",
|
|
542
|
+
});
|
|
543
|
+
return true;
|
|
544
|
+
}
|
|
545
|
+
catch (e) {
|
|
546
|
+
process.stderr.write(`\n ! ${memFile.fileKey}: ${e.message.slice(0, 100)}\n`);
|
|
547
|
+
return false;
|
|
548
|
+
}
|
|
549
|
+
}
|
|
550
|
+
// ── Compilation pass (KB refresh) ───────────────────────────────────
|
|
551
|
+
async function runCompilationPass(apiKey, apiBase) {
|
|
552
|
+
console.log("\nRunning compilation pass...");
|
|
553
|
+
// 1. List all KB pages
|
|
554
|
+
let pages = [];
|
|
555
|
+
try {
|
|
556
|
+
const kbResult = await apiCall("GET", "/v1/kb/pages?limit=100", apiKey, apiBase);
|
|
557
|
+
pages = kbResult.pages || [];
|
|
558
|
+
console.log(` Found ${pages.length} KB pages`);
|
|
559
|
+
}
|
|
560
|
+
catch (e) {
|
|
561
|
+
console.error(` KB listing failed: ${e.message}`);
|
|
562
|
+
return;
|
|
563
|
+
}
|
|
564
|
+
// 2. Refresh stale pages
|
|
565
|
+
let refreshed = 0;
|
|
566
|
+
const stalePages = pages.filter(p => p.status === "stale" || p.status === "draft");
|
|
567
|
+
for (const page of stalePages.slice(0, 20)) { // limit to 20 refreshes per run
|
|
568
|
+
try {
|
|
569
|
+
await apiCall("POST", `/v1/kb/pages/${encodeURIComponent(page.id)}/refresh`, apiKey, apiBase, {});
|
|
570
|
+
refreshed++;
|
|
571
|
+
process.stdout.write(`\r Refreshing KB pages... ${refreshed}/${stalePages.length}`);
|
|
572
|
+
}
|
|
573
|
+
catch { /* best effort */ }
|
|
574
|
+
await sleep(300); // rate limit
|
|
575
|
+
}
|
|
576
|
+
if (refreshed > 0) {
|
|
577
|
+
console.log(`\n Refreshed ${refreshed} KB pages`);
|
|
578
|
+
}
|
|
579
|
+
else {
|
|
580
|
+
console.log(" All KB pages up to date");
|
|
581
|
+
}
|
|
582
|
+
// 3. Run lint
|
|
583
|
+
try {
|
|
584
|
+
const lintResult = await apiCall("POST", "/v1/kb/lint", apiKey, apiBase, {});
|
|
585
|
+
const issues = lintResult.issues || [];
|
|
586
|
+
if (issues.length > 0) {
|
|
587
|
+
console.log(` Lint: ${issues.length} issues found`);
|
|
588
|
+
for (const issue of issues.slice(0, 5)) {
|
|
589
|
+
console.log(` - [${issue.type}] ${issue.message}`);
|
|
590
|
+
}
|
|
591
|
+
}
|
|
592
|
+
else {
|
|
593
|
+
console.log(" Lint: all checks passed");
|
|
594
|
+
}
|
|
595
|
+
}
|
|
596
|
+
catch { /* best effort */ }
|
|
597
|
+
}
|
|
598
|
+
// ── Helpers ─────────────────────────────────────────────────────────
|
|
599
|
+
function sleep(ms) {
|
|
600
|
+
return new Promise(r => setTimeout(r, ms));
|
|
601
|
+
}
|
|
602
|
+
function formatBytes(bytes) {
|
|
603
|
+
if (bytes < 1024)
|
|
604
|
+
return `${bytes} B`;
|
|
605
|
+
if (bytes < 1024 * 1024)
|
|
606
|
+
return `${(bytes / 1024).toFixed(1)} KB`;
|
|
607
|
+
return `${(bytes / 1024 / 1024).toFixed(1)} MB`;
|
|
608
|
+
}
|
|
609
|
+
// ── Main ────────────────────────────────────────────────────────────
|
|
610
|
+
async function main() {
|
|
611
|
+
if (MEMORY_ONLY) {
|
|
612
|
+
console.log("ricord init — Memory-file sync\n");
|
|
613
|
+
}
|
|
614
|
+
else {
|
|
615
|
+
console.log("ricord init — Full conversation-to-knowledge compiler\n");
|
|
616
|
+
}
|
|
617
|
+
const initState = loadInitState();
|
|
618
|
+
const conversations = MEMORY_ONLY ? [] : scanProjects(initState);
|
|
619
|
+
const memoryFiles = INCLUDE_MEMORY ? scanMemoryFiles(initState) : [];
|
|
620
|
+
// ── Summary ─────────────────────────────────────────────────────
|
|
621
|
+
if (conversations.length === 0 && memoryFiles.length === 0) {
|
|
622
|
+
console.log("No new conversations or memory files to process.");
|
|
623
|
+
if (PROJECT_FILTER)
|
|
624
|
+
console.log(` (filtered by --project ${PROJECT_FILTER})`);
|
|
625
|
+
if (SINCE)
|
|
626
|
+
console.log(` (filtered by --since ${SINCE})`);
|
|
627
|
+
if (Object.keys(initState.processed).length > 0) {
|
|
628
|
+
console.log(` (${Object.keys(initState.processed).length} conversations already processed)`);
|
|
629
|
+
console.log(` To reprocess, delete ${INIT_STATE_FILE}`);
|
|
630
|
+
}
|
|
631
|
+
if (!INCLUDE_MEMORY) {
|
|
632
|
+
console.log(` Use --include-memory to also process ~/.claude memory files`);
|
|
633
|
+
}
|
|
634
|
+
return;
|
|
635
|
+
}
|
|
636
|
+
// Group by project for display
|
|
637
|
+
const byProject = new Map();
|
|
638
|
+
for (const c of conversations) {
|
|
639
|
+
const list = byProject.get(c.projectId) || [];
|
|
640
|
+
list.push(c);
|
|
641
|
+
byProject.set(c.projectId, list);
|
|
642
|
+
}
|
|
643
|
+
console.log(`Found ${conversations.length} new conversations across ${byProject.size} projects:`);
|
|
644
|
+
for (const [projectId, convos] of byProject) {
|
|
645
|
+
const totalSize = convos.reduce((s, c) => s + c.sizeBytes, 0);
|
|
646
|
+
console.log(` ${projectId}: ${convos.length} conversations (${formatBytes(totalSize)})`);
|
|
647
|
+
}
|
|
648
|
+
if (memoryFiles.length > 0) {
|
|
649
|
+
console.log(`\nFound ${memoryFiles.length} memory files to process`);
|
|
650
|
+
}
|
|
651
|
+
console.log(`\nPipeline: segments → episodes → facts → KB → graph`);
|
|
652
|
+
console.log(`Mode: full pipeline (layers=all)`);
|
|
653
|
+
if (DRY_RUN) {
|
|
654
|
+
console.log("\n[dry-run] Would process these conversations:");
|
|
655
|
+
for (const c of conversations.slice(0, 30)) {
|
|
656
|
+
const turns = parseAllTurns(c.filePath);
|
|
657
|
+
const segments = segmentConversation(turns);
|
|
658
|
+
const userTurns = turns.filter(t => t.role === "user").length;
|
|
659
|
+
console.log(` ${c.date} | ${c.projectId} | ${turns.length} turns (${userTurns} user) | ` +
|
|
660
|
+
`${segments.length} segments | ${formatBytes(c.sizeBytes)}`);
|
|
661
|
+
}
|
|
662
|
+
if (conversations.length > 30) {
|
|
663
|
+
console.log(` ... and ${conversations.length - 30} more`);
|
|
664
|
+
}
|
|
665
|
+
if (memoryFiles.length > 0) {
|
|
666
|
+
console.log("\n[dry-run] Would process these memory files:");
|
|
667
|
+
for (const m of memoryFiles) {
|
|
668
|
+
console.log(` ${m.projectId} | [${m.type}] ${m.title}`);
|
|
669
|
+
}
|
|
670
|
+
}
|
|
671
|
+
return;
|
|
672
|
+
}
|
|
673
|
+
// ── Credentials ─────────────────────────────────────────────────
|
|
674
|
+
// Auto-spawn the same `ricord-mcp login` browser flow if no creds found,
|
|
675
|
+
// then continue init. Single command instead of forcing the user to run
|
|
676
|
+
// login → re-run init manually.
|
|
677
|
+
let creds = loadCredentials();
|
|
678
|
+
if (!creds) {
|
|
679
|
+
console.log("\nNo Ricord credentials found — launching browser login...\n");
|
|
680
|
+
const cliPath = fileURLToPath(import.meta.url).replace(/init\.js$/, "index.js");
|
|
681
|
+
const result = spawnSync(process.execPath, [cliPath, "login"], { stdio: "inherit" });
|
|
682
|
+
if (result.status !== 0) {
|
|
683
|
+
console.error("\nLogin failed or cancelled. Run `npx ricord-mcp login` then re-run init.");
|
|
684
|
+
process.exit(1);
|
|
685
|
+
}
|
|
686
|
+
creds = loadCredentials();
|
|
687
|
+
if (!creds) {
|
|
688
|
+
console.error("\nLogin completed but credentials file not found. Aborting init.");
|
|
689
|
+
process.exit(1);
|
|
690
|
+
}
|
|
691
|
+
console.log(""); // spacer between login output and init progress
|
|
692
|
+
}
|
|
693
|
+
const apiBase = creds.api_base || "https://api.ricord.ai";
|
|
694
|
+
// ── Process conversations ───────────────────────────────────────
|
|
695
|
+
let totalEpisodes = 0;
|
|
696
|
+
let totalItems = 0;
|
|
697
|
+
let totalTurns = 0;
|
|
698
|
+
let totalErrors = 0;
|
|
699
|
+
let totalSegments = 0;
|
|
700
|
+
console.log("\nProcessing conversations...\n");
|
|
701
|
+
for (let i = 0; i < conversations.length; i++) {
|
|
702
|
+
const c = conversations[i];
|
|
703
|
+
const pct = ((i + 1) / conversations.length * 100).toFixed(0);
|
|
704
|
+
process.stdout.write(` [${pct}%] ${i + 1}/${conversations.length} — ${c.projectId}/${c.date}`);
|
|
705
|
+
try {
|
|
706
|
+
// Step 1: Parse ALL turns
|
|
707
|
+
const turns = parseAllTurns(c.filePath);
|
|
708
|
+
const userTurns = turns.filter(t => t.role === "user").length;
|
|
709
|
+
if (userTurns < 2) {
|
|
710
|
+
process.stdout.write(" (skipped: <2 user turns)\n");
|
|
711
|
+
continue;
|
|
712
|
+
}
|
|
713
|
+
// Step 2: Generate stable session ID from file hash
|
|
714
|
+
const sessionId = c.fileHash;
|
|
715
|
+
// Step 3: Segment the conversation
|
|
716
|
+
const segments = segmentConversation(turns);
|
|
717
|
+
totalSegments += segments.length;
|
|
718
|
+
process.stdout.write(` — ${turns.length} turns, ${segments.length} segments`);
|
|
719
|
+
// Step 4: Process each segment through the full pipeline
|
|
720
|
+
let convoEpisodes = 0;
|
|
721
|
+
let convoItems = 0;
|
|
722
|
+
const convoErrors = [];
|
|
723
|
+
for (let si = 0; si < segments.length; si++) {
|
|
724
|
+
const seg = segments[si];
|
|
725
|
+
if (seg.userTurnCount === 0)
|
|
726
|
+
continue; // skip assistant-only segments
|
|
727
|
+
const segResult = await ingestSegment(seg, c.date, c.projectId, sessionId, si, segments.length, creds.api_key, apiBase);
|
|
728
|
+
convoEpisodes += segResult.episodes;
|
|
729
|
+
convoItems += segResult.items_saved;
|
|
730
|
+
convoErrors.push(...segResult.errors);
|
|
731
|
+
// Rate limiting between segments
|
|
732
|
+
if (si < segments.length - 1) {
|
|
733
|
+
await sleep(BATCH_DELAY_MS);
|
|
734
|
+
}
|
|
735
|
+
}
|
|
736
|
+
// Step 5: Write daily log entry
|
|
737
|
+
writeDailyLogEntry(c.date, c.projectId, segments, {
|
|
738
|
+
episodes: convoEpisodes,
|
|
739
|
+
items_saved: convoItems,
|
|
740
|
+
turns: turns.length,
|
|
741
|
+
});
|
|
742
|
+
totalEpisodes += convoEpisodes;
|
|
743
|
+
totalItems += convoItems;
|
|
744
|
+
totalTurns += turns.length;
|
|
745
|
+
totalErrors += convoErrors.length;
|
|
746
|
+
for (const err of convoErrors) {
|
|
747
|
+
process.stderr.write(`\n ! ${err}\n`);
|
|
748
|
+
}
|
|
749
|
+
process.stdout.write(` — ${convoEpisodes} episodes saved\n`);
|
|
750
|
+
// Update state
|
|
751
|
+
initState.processed[c.fileKey] = {
|
|
752
|
+
date: c.date,
|
|
753
|
+
items_saved: convoItems,
|
|
754
|
+
turns_ingested: turns.length,
|
|
755
|
+
segments: segments.length,
|
|
756
|
+
file_hash: c.fileHash,
|
|
757
|
+
};
|
|
758
|
+
// Save state periodically
|
|
759
|
+
if ((i + 1) % 10 === 0) {
|
|
760
|
+
initState.last_run = new Date().toISOString();
|
|
761
|
+
saveInitState(initState);
|
|
762
|
+
}
|
|
763
|
+
}
|
|
764
|
+
catch (e) {
|
|
765
|
+
totalErrors++;
|
|
766
|
+
process.stderr.write(` — ERROR: ${e.message.slice(0, 100)}\n`);
|
|
767
|
+
}
|
|
768
|
+
await sleep(BATCH_DELAY_MS);
|
|
769
|
+
}
|
|
770
|
+
// ── Process memory files ────────────────────────────────────────
|
|
771
|
+
if (memoryFiles.length > 0) {
|
|
772
|
+
console.log(`\nProcessing ${memoryFiles.length} memory files...`);
|
|
773
|
+
let memSaved = 0;
|
|
774
|
+
for (let i = 0; i < memoryFiles.length; i++) {
|
|
775
|
+
const m = memoryFiles[i];
|
|
776
|
+
process.stdout.write(` [${i + 1}/${memoryFiles.length}] ${m.title}...`);
|
|
777
|
+
const ok = await ingestMemoryFile(m, creds.api_key, apiBase);
|
|
778
|
+
if (ok) {
|
|
779
|
+
memSaved++;
|
|
780
|
+
process.stdout.write(" saved\n");
|
|
781
|
+
}
|
|
782
|
+
else {
|
|
783
|
+
process.stdout.write(" failed\n");
|
|
784
|
+
}
|
|
785
|
+
initState.memory_files_processed[m.fileKey] = {
|
|
786
|
+
date: new Date().toISOString().slice(0, 10),
|
|
787
|
+
hash: m.hash,
|
|
788
|
+
};
|
|
789
|
+
await sleep(BATCH_DELAY_MS);
|
|
790
|
+
}
|
|
791
|
+
totalItems += memSaved;
|
|
792
|
+
console.log(` Memory files saved: ${memSaved}/${memoryFiles.length}`);
|
|
793
|
+
}
|
|
794
|
+
// ── Compilation pass ────────────────────────────────────────────
|
|
795
|
+
if (RUN_COMPILE) {
|
|
796
|
+
await runCompilationPass(creds.api_key, apiBase);
|
|
797
|
+
}
|
|
798
|
+
// ── Save final state ────────────────────────────────────────────
|
|
799
|
+
initState.total_episodes += totalEpisodes;
|
|
800
|
+
initState.total_items += totalItems;
|
|
801
|
+
initState.total_turns += totalTurns;
|
|
802
|
+
initState.last_run = new Date().toISOString();
|
|
803
|
+
initState.runs++;
|
|
804
|
+
saveInitState(initState);
|
|
805
|
+
// ── Summary ─────────────────────────────────────────────────────
|
|
806
|
+
console.log("\n" + "=".repeat(50));
|
|
807
|
+
console.log("Init complete!\n");
|
|
808
|
+
console.log(` Conversations processed: ${conversations.length}`);
|
|
809
|
+
console.log(` Total turns parsed: ${totalTurns}`);
|
|
810
|
+
console.log(` Segments created: ${totalSegments}`);
|
|
811
|
+
console.log(` Episodes saved: ${totalEpisodes}`);
|
|
812
|
+
console.log(` Total items: ${totalItems}`);
|
|
813
|
+
if (totalErrors > 0)
|
|
814
|
+
console.log(` Errors: ${totalErrors}`);
|
|
815
|
+
console.log(` Projects: ${[...byProject.keys()].join(", ")}`);
|
|
816
|
+
console.log(` Daily logs written: ${DAILY_DIR}`);
|
|
817
|
+
console.log(`\n Pipeline layers triggered per segment:`);
|
|
818
|
+
console.log(` - Atomic fact decomposition (rule-based)`);
|
|
819
|
+
console.log(` - Knowledge graph entity extraction`);
|
|
820
|
+
console.log(` - KB page enrichment (500-2000 word wiki articles)`);
|
|
821
|
+
console.log(` - Search key expansion (+9.4% recall)`);
|
|
822
|
+
console.log(` - Conflict detection + resolution`);
|
|
823
|
+
console.log(`\n All-time stats (${initState.runs} runs):`);
|
|
824
|
+
console.log(` Conversations: ${Object.keys(initState.processed).length}`);
|
|
825
|
+
console.log(` Episodes: ${initState.total_episodes}`);
|
|
826
|
+
console.log(` Items: ${initState.total_items}`);
|
|
827
|
+
if (!RUN_COMPILE) {
|
|
828
|
+
console.log(`\n Tip: Run with --compile or --full to also refresh KB pages`);
|
|
829
|
+
}
|
|
830
|
+
if (!INCLUDE_MEMORY) {
|
|
831
|
+
console.log(` Tip: Run with --include-memory or --full to ingest ~/.claude memory files`);
|
|
832
|
+
}
|
|
833
|
+
console.log(`\n To reprocess everything: rm ${INIT_STATE_FILE}`);
|
|
834
|
+
}
|
|
835
|
+
main().catch((err) => {
|
|
836
|
+
console.error("Fatal:", err);
|
|
837
|
+
process.exit(1);
|
|
838
|
+
});
|
|
839
|
+
//# sourceMappingURL=init.js.map
|