@gamaze/hicortex 0.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/db.js ADDED
@@ -0,0 +1,140 @@
1
+ "use strict";
2
+ /**
3
+ * Database initialization with better-sqlite3 + sqlite-vec.
4
+ * Ported from hicortex/db.py — same schema for migration compatibility.
5
+ */
6
+ var __importDefault = (this && this.__importDefault) || function (mod) {
7
+ return (mod && mod.__esModule) ? mod : { "default": mod };
8
+ };
9
+ Object.defineProperty(exports, "__esModule", { value: true });
10
+ exports.initDb = initDb;
11
+ exports.getStats = getStats;
12
+ const better_sqlite3_1 = __importDefault(require("better-sqlite3"));
13
+ const node_fs_1 = require("node:fs");
14
+ const EMBEDDING_DIMENSIONS = 384;
15
+ const SCHEMA = `
16
+ CREATE TABLE IF NOT EXISTS memories (
17
+ id TEXT PRIMARY KEY,
18
+ content TEXT NOT NULL,
19
+
20
+ -- Decay & Strengthening
21
+ base_strength REAL DEFAULT 0.5,
22
+ last_accessed TIMESTAMP,
23
+ access_count INTEGER DEFAULT 0,
24
+ created_at TIMESTAMP NOT NULL,
25
+ ingested_at TIMESTAMP NOT NULL DEFAULT (strftime('%Y-%m-%dT%H:%M:%f+00:00', 'now')),
26
+
27
+ -- Classification
28
+ source_agent TEXT DEFAULT 'default',
29
+ source_session TEXT,
30
+ project TEXT,
31
+ privacy TEXT DEFAULT 'WORK',
32
+ memory_type TEXT DEFAULT 'episode'
33
+ );
34
+
35
+ CREATE TABLE IF NOT EXISTS memory_links (
36
+ source_id TEXT NOT NULL,
37
+ target_id TEXT NOT NULL,
38
+ relationship TEXT NOT NULL,
39
+ strength REAL DEFAULT 0.5,
40
+ created_at TIMESTAMP NOT NULL,
41
+ PRIMARY KEY (source_id, target_id),
42
+ FOREIGN KEY (source_id) REFERENCES memories(id),
43
+ FOREIGN KEY (target_id) REFERENCES memories(id)
44
+ );
45
+
46
+ CREATE INDEX IF NOT EXISTS idx_memories_project ON memories(project);
47
+ CREATE INDEX IF NOT EXISTS idx_memories_type ON memories(memory_type);
48
+ CREATE INDEX IF NOT EXISTS idx_memories_created ON memories(created_at);
49
+ CREATE INDEX IF NOT EXISTS idx_links_source ON memory_links(source_id);
50
+ CREATE INDEX IF NOT EXISTS idx_links_target ON memory_links(target_id);
51
+ `;
52
+ const FTS_SCHEMA = `
53
+ CREATE VIRTUAL TABLE IF NOT EXISTS memories_fts USING fts5(
54
+ content,
55
+ content_rowid='rowid'
56
+ );
57
+
58
+ CREATE TRIGGER IF NOT EXISTS memories_fts_insert AFTER INSERT ON memories
59
+ BEGIN
60
+ INSERT INTO memories_fts (rowid, content) VALUES (NEW.rowid, NEW.content);
61
+ END;
62
+
63
+ CREATE TRIGGER IF NOT EXISTS memories_fts_update AFTER UPDATE OF content ON memories
64
+ BEGIN
65
+ UPDATE memories_fts SET content = NEW.content WHERE rowid = NEW.rowid;
66
+ END;
67
+
68
+ CREATE TRIGGER IF NOT EXISTS memories_fts_delete AFTER DELETE ON memories
69
+ BEGIN
70
+ DELETE FROM memories_fts WHERE rowid = OLD.rowid;
71
+ END;
72
+ `;
73
+ const VEC_SCHEMA = `
74
+ CREATE VIRTUAL TABLE IF NOT EXISTS memory_vectors USING vec0(
75
+ id TEXT PRIMARY KEY,
76
+ embedding float[${EMBEDDING_DIMENSIONS}]
77
+ );
78
+ `;
79
+ /**
80
+ * Initialize the database: load sqlite-vec, enable WAL, create all tables.
81
+ * Returns the open Database instance (caller manages lifetime).
82
+ */
83
+ function initDb(dbPath) {
84
+ const db = new better_sqlite3_1.default(dbPath);
85
+ // Load sqlite-vec extension
86
+ // eslint-disable-next-line @typescript-eslint/no-var-requires
87
+ const sqliteVec = require("sqlite-vec");
88
+ sqliteVec.load(db);
89
+ // Pragmas
90
+ db.pragma("journal_mode = WAL");
91
+ db.pragma("foreign_keys = ON");
92
+ // Create core tables and indexes
93
+ db.exec(SCHEMA);
94
+ // Create FTS5 virtual table and sync triggers
95
+ db.exec(FTS_SCHEMA);
96
+ // Create vec0 virtual table
97
+ db.exec(VEC_SCHEMA);
98
+ // Run migrations for existing databases
99
+ migrate(db);
100
+ return db;
101
+ }
102
+ /**
103
+ * Apply schema migrations for existing databases.
104
+ */
105
+ function migrate(db) {
106
+ const cols = db.pragma("table_info(memories)");
107
+ const colNames = new Set(cols.map((c) => c.name));
108
+ if (!colNames.has("ingested_at")) {
109
+ db.exec("ALTER TABLE memories ADD COLUMN ingested_at TIMESTAMP");
110
+ db.exec("UPDATE memories SET ingested_at = created_at");
111
+ db.exec("CREATE INDEX IF NOT EXISTS idx_memories_ingested ON memories(ingested_at)");
112
+ }
113
+ }
114
+ /**
115
+ * Return database statistics.
116
+ */
117
+ function getStats(db, dbPath) {
118
+ const memoryCount = db.prepare("SELECT count(*) as cnt FROM memories").get().cnt;
119
+ const linkCount = db.prepare("SELECT count(*) as cnt FROM memory_links").get().cnt;
120
+ let dbSize = 0;
121
+ try {
122
+ dbSize = (0, node_fs_1.statSync)(dbPath).size;
123
+ }
124
+ catch {
125
+ // File may not exist yet
126
+ }
127
+ const typeCounts = {};
128
+ const rows = db
129
+ .prepare("SELECT memory_type, count(*) as cnt FROM memories GROUP BY memory_type")
130
+ .all();
131
+ for (const row of rows) {
132
+ typeCounts[row.memory_type] = row.cnt;
133
+ }
134
+ return {
135
+ memories: memoryCount,
136
+ links: linkCount,
137
+ db_size_bytes: dbSize,
138
+ by_type: typeCounts,
139
+ };
140
+ }
@@ -0,0 +1,15 @@
1
+ /**
2
+ * Session knowledge extraction (distillation).
3
+ * Simplified from hicortex/distiller.py — messages come from agent_end hook,
4
+ * not from filesystem scanning.
5
+ */
6
+ import type { LlmClient } from "./llm.js";
7
+ /**
8
+ * Convert OpenClaw hook messages to a filtered transcript string.
9
+ */
10
+ export declare function extractConversationText(messages: unknown[]): string;
11
+ /**
12
+ * Send filtered conversation to LLM for knowledge extraction.
13
+ * Returns an array of memory entries to ingest, or empty array if nothing worth extracting.
14
+ */
15
+ export declare function distillSession(llm: LlmClient, conversation: string, projectName: string, date: string): Promise<string[]>;
@@ -0,0 +1,186 @@
1
+ "use strict";
2
+ /**
3
+ * Session knowledge extraction (distillation).
4
+ * Simplified from hicortex/distiller.py — messages come from agent_end hook,
5
+ * not from filesystem scanning.
6
+ */
7
+ Object.defineProperty(exports, "__esModule", { value: true });
8
+ exports.extractConversationText = extractConversationText;
9
+ exports.distillSession = distillSession;
10
+ const prompts_js_1 = require("./prompts.js");
11
+ const MAX_TRANSCRIPT_CHARS = 80_000;
12
+ const MIN_CONVERSATION_CHARS = 200;
13
+ // Entry types to skip entirely (from the Python distiller)
14
+ const SKIP_ENTRY_TYPES = new Set([
15
+ "progress",
16
+ "system",
17
+ "file-history-snapshot",
18
+ "queue-operation",
19
+ "summary",
20
+ ]);
21
+ /**
22
+ * Extract readable text from a message content value (string or block list).
23
+ */
24
+ function extractTextFromContent(content) {
25
+ if (typeof content === "string") {
26
+ return content.length > 20_000 ? content.slice(0, 20_000) : content;
27
+ }
28
+ if (!Array.isArray(content))
29
+ return "";
30
+ const texts = [];
31
+ let totalLen = 0;
32
+ for (const block of content) {
33
+ if (typeof block !== "object" || block === null)
34
+ continue;
35
+ const btype = block.type;
36
+ if (btype === "text") {
37
+ const t = String(block.text ?? "");
38
+ texts.push(t.length > 10_000 ? t.slice(0, 10_000) : t);
39
+ totalLen += t.length;
40
+ }
41
+ // Skip: tool_use, tool_result, thinking, image blocks
42
+ if (totalLen > 20_000)
43
+ break;
44
+ }
45
+ return texts.join("\n");
46
+ }
47
+ /**
48
+ * Strip noise from message text, keep the human conversation.
49
+ */
50
+ function cleanMessageContent(text) {
51
+ // Hard cap
52
+ if (text.length > 50_000) {
53
+ text = text.slice(0, 50_000);
54
+ }
55
+ // Remove large code blocks (>10 lines)
56
+ const lines = text.split("\n");
57
+ const cleaned = [];
58
+ let inCodeBlock = false;
59
+ let codeBlockLines = 0;
60
+ let codeBlockStart = 0;
61
+ for (const line of lines) {
62
+ if (line.startsWith("```") && !inCodeBlock) {
63
+ inCodeBlock = true;
64
+ codeBlockLines = 0;
65
+ codeBlockStart = cleaned.length;
66
+ cleaned.push(line);
67
+ }
68
+ else if (line.startsWith("```") && inCodeBlock) {
69
+ inCodeBlock = false;
70
+ if (codeBlockLines > 10) {
71
+ cleaned.length = codeBlockStart;
72
+ cleaned.push("[code block removed]");
73
+ }
74
+ else {
75
+ cleaned.push(line);
76
+ }
77
+ }
78
+ else if (inCodeBlock) {
79
+ codeBlockLines++;
80
+ cleaned.push(line);
81
+ }
82
+ else {
83
+ cleaned.push(line);
84
+ }
85
+ }
86
+ if (inCodeBlock && codeBlockLines > 10) {
87
+ cleaned.length = codeBlockStart;
88
+ cleaned.push("[code block removed]");
89
+ }
90
+ text = cleaned.join("\n");
91
+ // Remove <system-reminder>...</system-reminder>
92
+ text = text.replace(/<system-reminder>[^<]{0,10000}<\/system-reminder>/g, "");
93
+ // Remove file path dumps (Read tool output: " 123->...")
94
+ text = text.replace(/^\s*\d+\u2192.*$/gm, "");
95
+ // Remove base64 content
96
+ text = text.replace(/[A-Za-z0-9+/]{100,}={0,2}/g, "[binary removed]");
97
+ // Collapse excessive whitespace
98
+ text = text.replace(/\n{3,}/g, "\n\n");
99
+ return text.trim();
100
+ }
101
+ /**
102
+ * Convert OpenClaw hook messages to a filtered transcript string.
103
+ */
104
+ function extractConversationText(messages) {
105
+ const parts = [];
106
+ for (const msg of messages) {
107
+ if (typeof msg !== "object" || msg === null)
108
+ continue;
109
+ const m = msg;
110
+ // Entry-level filter
111
+ if (SKIP_ENTRY_TYPES.has(String(m.type ?? "")))
112
+ continue;
113
+ if (m.isSidechain)
114
+ continue;
115
+ // Extract content — OpenClaw messages have content at top level;
116
+ // Python distiller format has message.content
117
+ const content = m.content ?? m.message?.content;
118
+ if (content === undefined || content === null)
119
+ continue;
120
+ let text = extractTextFromContent(content);
121
+ text = cleanMessageContent(text);
122
+ if (text.length < 20)
123
+ continue;
124
+ const role = m.role === "user" || m.type === "user" ? "USER" : "ASSISTANT";
125
+ parts.push(`${role}: ${text}`);
126
+ }
127
+ return parts.join("\n\n");
128
+ }
129
+ /**
130
+ * Send filtered conversation to LLM for knowledge extraction.
131
+ * Returns an array of memory entries to ingest, or empty array if nothing worth extracting.
132
+ */
133
+ async function distillSession(llm, conversation, projectName, date) {
134
+ if (conversation.length < MIN_CONVERSATION_CHARS) {
135
+ return [];
136
+ }
137
+ // Truncate if too long
138
+ let transcript = conversation;
139
+ if (transcript.length > MAX_TRANSCRIPT_CHARS) {
140
+ transcript = transcript.slice(0, MAX_TRANSCRIPT_CHARS) + "\n\n[...truncated...]";
141
+ }
142
+ const prompt = (0, prompts_js_1.distillation)(projectName, date, transcript);
143
+ try {
144
+ const result = await llm.completeDistill(prompt);
145
+ if (!result)
146
+ return [];
147
+ if (result === "NO_EXTRACT" || result.slice(0, 20).includes("NO_EXTRACT")) {
148
+ return [];
149
+ }
150
+ // Split distilled markdown into individual memory entries
151
+ return parseDistilledEntries(result);
152
+ }
153
+ catch (err) {
154
+ const msg = err instanceof Error ? err.message : String(err);
155
+ console.error(`[hicortex] Distillation LLM error: ${msg}`);
156
+ return [];
157
+ }
158
+ }
159
+ /**
160
+ * Parse distilled markdown into individual memory entry strings.
161
+ * Each section item becomes a separate memory.
162
+ */
163
+ function parseDistilledEntries(markdown) {
164
+ const entries = [];
165
+ const lines = markdown.split("\n");
166
+ let currentSection = "";
167
+ for (const line of lines) {
168
+ const trimmed = line.trim();
169
+ // Section headers
170
+ if (trimmed.startsWith("### ")) {
171
+ currentSection = trimmed.slice(4).trim();
172
+ continue;
173
+ }
174
+ // Skip top-level headers and classification
175
+ if (trimmed.startsWith("# ") || trimmed.startsWith("## "))
176
+ continue;
177
+ // Bullet items are individual memories
178
+ if (trimmed.startsWith("- ") && trimmed.length > 5) {
179
+ const entry = currentSection
180
+ ? `[${currentSection}] ${trimmed.slice(2)}`
181
+ : trimmed.slice(2);
182
+ entries.push(entry);
183
+ }
184
+ }
185
+ return entries;
186
+ }
@@ -0,0 +1,20 @@
1
+ /**
2
+ * Local embeddings using @huggingface/transformers.
3
+ * Ported from hicortex/embedder.py — same model (bge-small-en-v1.5, 384-dim).
4
+ *
5
+ * Uses dynamic import so the plugin compiles without @huggingface/transformers
6
+ * installed. The model is lazy-loaded on first call.
7
+ */
8
+ export declare const EMBEDDING_DIMENSIONS = 384;
9
+ /**
10
+ * Embed a single text string. Returns a Float32Array of 384 dimensions.
11
+ */
12
+ export declare function embed(text: string): Promise<Float32Array>;
13
+ /**
14
+ * Embed multiple texts. Returns an array of Float32Array embeddings.
15
+ */
16
+ export declare function embedBatch(texts: string[]): Promise<Float32Array[]>;
17
+ /**
18
+ * Return the embedding dimension count.
19
+ */
20
+ export declare function dimensions(): number;
@@ -0,0 +1,85 @@
1
+ "use strict";
2
+ /**
3
+ * Local embeddings using @huggingface/transformers.
4
+ * Ported from hicortex/embedder.py — same model (bge-small-en-v1.5, 384-dim).
5
+ *
6
+ * Uses dynamic import so the plugin compiles without @huggingface/transformers
7
+ * installed. The model is lazy-loaded on first call.
8
+ */
9
+ Object.defineProperty(exports, "__esModule", { value: true });
10
+ exports.EMBEDDING_DIMENSIONS = void 0;
11
+ exports.embed = embed;
12
+ exports.embedBatch = embedBatch;
13
+ exports.dimensions = dimensions;
14
+ exports.EMBEDDING_DIMENSIONS = 384;
15
+ const MODEL_NAME = "Xenova/bge-small-en-v1.5";
16
+ // Pipeline is lazy-loaded on first use
17
+ let pipeline = null;
18
+ let initPromise = null;
19
+ /**
20
+ * Initialize the embedding pipeline (called lazily on first embed call).
21
+ * Throws with a clear error if @huggingface/transformers is not available.
22
+ */
23
+ async function ensureInit() {
24
+ if (pipeline)
25
+ return;
26
+ if (initPromise) {
27
+ await initPromise;
28
+ return;
29
+ }
30
+ initPromise = (async () => {
31
+ try {
32
+ // Dynamic import — package may not be installed (it's optional)
33
+ // eslint-disable-next-line @typescript-eslint/no-require-imports
34
+ const transformers = await Function('return import("@huggingface/transformers")')();
35
+ const pipelineFn = transformers.pipeline ?? transformers.default?.pipeline;
36
+ if (!pipelineFn) {
37
+ throw new Error("Could not find pipeline function in @huggingface/transformers");
38
+ }
39
+ console.log("[hicortex] Loading embedding model (first run downloads ~130MB)...");
40
+ pipeline = await pipelineFn("feature-extraction", MODEL_NAME, {
41
+ dtype: "fp32",
42
+ });
43
+ console.log("[hicortex] Embedding model ready");
44
+ }
45
+ catch (err) {
46
+ initPromise = null;
47
+ const msg = err instanceof Error ? err.message : String(err);
48
+ if (msg.includes("Cannot find module") ||
49
+ msg.includes("MODULE_NOT_FOUND")) {
50
+ throw new Error(`@huggingface/transformers is not installed. ` +
51
+ `Run: npm install @huggingface/transformers`);
52
+ }
53
+ throw err;
54
+ }
55
+ })();
56
+ await initPromise;
57
+ }
58
+ /**
59
+ * Embed a single text string. Returns a Float32Array of 384 dimensions.
60
+ */
61
+ async function embed(text) {
62
+ await ensureInit();
63
+ const output = await pipeline(text, { pooling: "mean", normalize: true });
64
+ // output.data is a Float32Array from transformers.js
65
+ return new Float32Array(output.data);
66
+ }
67
+ /**
68
+ * Embed multiple texts. Returns an array of Float32Array embeddings.
69
+ */
70
+ async function embedBatch(texts) {
71
+ if (texts.length === 0)
72
+ return [];
73
+ // Process sequentially to avoid OOM on large batches
74
+ const results = [];
75
+ for (const text of texts) {
76
+ results.push(await embed(text));
77
+ }
78
+ return results;
79
+ }
80
+ /**
81
+ * Return the embedding dimension count.
82
+ */
83
+ function dimensions() {
84
+ return exports.EMBEDDING_DIMENSIONS;
85
+ }
@@ -0,0 +1,14 @@
1
+ /**
2
+ * Hicortex OpenClaw Plugin — Long-term Memory That Learns.
3
+ *
4
+ * Pure in-process plugin: no sidecar, no HTTP. Uses better-sqlite3 + sqlite-vec
5
+ * for storage, @huggingface/transformers for embeddings, and multi-provider LLM
6
+ * for distillation and consolidation.
7
+ */
8
+ declare const _default: {
9
+ id: string;
10
+ name: string;
11
+ kind: "lifecycle";
12
+ register(api: any): void;
13
+ };
14
+ export default _default;