@gamaze/hicortex 0.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +119 -0
- package/dist/consolidate.d.ts +36 -0
- package/dist/consolidate.js +482 -0
- package/dist/db.d.ts +19 -0
- package/dist/db.js +140 -0
- package/dist/distiller.d.ts +15 -0
- package/dist/distiller.js +186 -0
- package/dist/embedder.d.ts +20 -0
- package/dist/embedder.js +85 -0
- package/dist/index.d.ts +14 -0
- package/dist/index.js +557 -0
- package/dist/license.d.ts +5 -0
- package/dist/license.js +96 -0
- package/dist/llm.d.ts +66 -0
- package/dist/llm.js +421 -0
- package/dist/prompts.d.ts +16 -0
- package/dist/prompts.js +117 -0
- package/dist/retrieval.d.ts +47 -0
- package/dist/retrieval.js +320 -0
- package/dist/storage.d.ts +98 -0
- package/dist/storage.js +326 -0
- package/dist/types.d.ts +132 -0
- package/dist/types.js +6 -0
- package/openclaw.plugin.json +70 -0
- package/package.json +42 -0
- package/skills/hicortex-activate/SKILL.md +53 -0
- package/skills/hicortex-learn/SKILL.md +40 -0
- package/skills/hicortex-memory/SKILL.md +39 -0
package/dist/db.js
ADDED
|
@@ -0,0 +1,140 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
/**
|
|
3
|
+
* Database initialization with better-sqlite3 + sqlite-vec.
|
|
4
|
+
* Ported from hicortex/db.py — same schema for migration compatibility.
|
|
5
|
+
*/
|
|
6
|
+
var __importDefault = (this && this.__importDefault) || function (mod) {
|
|
7
|
+
return (mod && mod.__esModule) ? mod : { "default": mod };
|
|
8
|
+
};
|
|
9
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
|
10
|
+
exports.initDb = initDb;
|
|
11
|
+
exports.getStats = getStats;
|
|
12
|
+
const better_sqlite3_1 = __importDefault(require("better-sqlite3"));
|
|
13
|
+
const node_fs_1 = require("node:fs");
|
|
14
|
+
const EMBEDDING_DIMENSIONS = 384;
|
|
15
|
+
const SCHEMA = `
|
|
16
|
+
CREATE TABLE IF NOT EXISTS memories (
|
|
17
|
+
id TEXT PRIMARY KEY,
|
|
18
|
+
content TEXT NOT NULL,
|
|
19
|
+
|
|
20
|
+
-- Decay & Strengthening
|
|
21
|
+
base_strength REAL DEFAULT 0.5,
|
|
22
|
+
last_accessed TIMESTAMP,
|
|
23
|
+
access_count INTEGER DEFAULT 0,
|
|
24
|
+
created_at TIMESTAMP NOT NULL,
|
|
25
|
+
ingested_at TIMESTAMP NOT NULL DEFAULT (strftime('%Y-%m-%dT%H:%M:%f+00:00', 'now')),
|
|
26
|
+
|
|
27
|
+
-- Classification
|
|
28
|
+
source_agent TEXT DEFAULT 'default',
|
|
29
|
+
source_session TEXT,
|
|
30
|
+
project TEXT,
|
|
31
|
+
privacy TEXT DEFAULT 'WORK',
|
|
32
|
+
memory_type TEXT DEFAULT 'episode'
|
|
33
|
+
);
|
|
34
|
+
|
|
35
|
+
CREATE TABLE IF NOT EXISTS memory_links (
|
|
36
|
+
source_id TEXT NOT NULL,
|
|
37
|
+
target_id TEXT NOT NULL,
|
|
38
|
+
relationship TEXT NOT NULL,
|
|
39
|
+
strength REAL DEFAULT 0.5,
|
|
40
|
+
created_at TIMESTAMP NOT NULL,
|
|
41
|
+
PRIMARY KEY (source_id, target_id),
|
|
42
|
+
FOREIGN KEY (source_id) REFERENCES memories(id),
|
|
43
|
+
FOREIGN KEY (target_id) REFERENCES memories(id)
|
|
44
|
+
);
|
|
45
|
+
|
|
46
|
+
CREATE INDEX IF NOT EXISTS idx_memories_project ON memories(project);
|
|
47
|
+
CREATE INDEX IF NOT EXISTS idx_memories_type ON memories(memory_type);
|
|
48
|
+
CREATE INDEX IF NOT EXISTS idx_memories_created ON memories(created_at);
|
|
49
|
+
CREATE INDEX IF NOT EXISTS idx_links_source ON memory_links(source_id);
|
|
50
|
+
CREATE INDEX IF NOT EXISTS idx_links_target ON memory_links(target_id);
|
|
51
|
+
`;
|
|
52
|
+
const FTS_SCHEMA = `
|
|
53
|
+
CREATE VIRTUAL TABLE IF NOT EXISTS memories_fts USING fts5(
|
|
54
|
+
content,
|
|
55
|
+
content_rowid='rowid'
|
|
56
|
+
);
|
|
57
|
+
|
|
58
|
+
CREATE TRIGGER IF NOT EXISTS memories_fts_insert AFTER INSERT ON memories
|
|
59
|
+
BEGIN
|
|
60
|
+
INSERT INTO memories_fts (rowid, content) VALUES (NEW.rowid, NEW.content);
|
|
61
|
+
END;
|
|
62
|
+
|
|
63
|
+
CREATE TRIGGER IF NOT EXISTS memories_fts_update AFTER UPDATE OF content ON memories
|
|
64
|
+
BEGIN
|
|
65
|
+
UPDATE memories_fts SET content = NEW.content WHERE rowid = NEW.rowid;
|
|
66
|
+
END;
|
|
67
|
+
|
|
68
|
+
CREATE TRIGGER IF NOT EXISTS memories_fts_delete AFTER DELETE ON memories
|
|
69
|
+
BEGIN
|
|
70
|
+
DELETE FROM memories_fts WHERE rowid = OLD.rowid;
|
|
71
|
+
END;
|
|
72
|
+
`;
|
|
73
|
+
const VEC_SCHEMA = `
|
|
74
|
+
CREATE VIRTUAL TABLE IF NOT EXISTS memory_vectors USING vec0(
|
|
75
|
+
id TEXT PRIMARY KEY,
|
|
76
|
+
embedding float[${EMBEDDING_DIMENSIONS}]
|
|
77
|
+
);
|
|
78
|
+
`;
|
|
79
|
+
/**
|
|
80
|
+
* Initialize the database: load sqlite-vec, enable WAL, create all tables.
|
|
81
|
+
* Returns the open Database instance (caller manages lifetime).
|
|
82
|
+
*/
|
|
83
|
+
function initDb(dbPath) {
|
|
84
|
+
const db = new better_sqlite3_1.default(dbPath);
|
|
85
|
+
// Load sqlite-vec extension
|
|
86
|
+
// eslint-disable-next-line @typescript-eslint/no-var-requires
|
|
87
|
+
const sqliteVec = require("sqlite-vec");
|
|
88
|
+
sqliteVec.load(db);
|
|
89
|
+
// Pragmas
|
|
90
|
+
db.pragma("journal_mode = WAL");
|
|
91
|
+
db.pragma("foreign_keys = ON");
|
|
92
|
+
// Create core tables and indexes
|
|
93
|
+
db.exec(SCHEMA);
|
|
94
|
+
// Create FTS5 virtual table and sync triggers
|
|
95
|
+
db.exec(FTS_SCHEMA);
|
|
96
|
+
// Create vec0 virtual table
|
|
97
|
+
db.exec(VEC_SCHEMA);
|
|
98
|
+
// Run migrations for existing databases
|
|
99
|
+
migrate(db);
|
|
100
|
+
return db;
|
|
101
|
+
}
|
|
102
|
+
/**
|
|
103
|
+
* Apply schema migrations for existing databases.
|
|
104
|
+
*/
|
|
105
|
+
function migrate(db) {
|
|
106
|
+
const cols = db.pragma("table_info(memories)");
|
|
107
|
+
const colNames = new Set(cols.map((c) => c.name));
|
|
108
|
+
if (!colNames.has("ingested_at")) {
|
|
109
|
+
db.exec("ALTER TABLE memories ADD COLUMN ingested_at TIMESTAMP");
|
|
110
|
+
db.exec("UPDATE memories SET ingested_at = created_at");
|
|
111
|
+
db.exec("CREATE INDEX IF NOT EXISTS idx_memories_ingested ON memories(ingested_at)");
|
|
112
|
+
}
|
|
113
|
+
}
|
|
114
|
+
/**
|
|
115
|
+
* Return database statistics.
|
|
116
|
+
*/
|
|
117
|
+
function getStats(db, dbPath) {
|
|
118
|
+
const memoryCount = db.prepare("SELECT count(*) as cnt FROM memories").get().cnt;
|
|
119
|
+
const linkCount = db.prepare("SELECT count(*) as cnt FROM memory_links").get().cnt;
|
|
120
|
+
let dbSize = 0;
|
|
121
|
+
try {
|
|
122
|
+
dbSize = (0, node_fs_1.statSync)(dbPath).size;
|
|
123
|
+
}
|
|
124
|
+
catch {
|
|
125
|
+
// File may not exist yet
|
|
126
|
+
}
|
|
127
|
+
const typeCounts = {};
|
|
128
|
+
const rows = db
|
|
129
|
+
.prepare("SELECT memory_type, count(*) as cnt FROM memories GROUP BY memory_type")
|
|
130
|
+
.all();
|
|
131
|
+
for (const row of rows) {
|
|
132
|
+
typeCounts[row.memory_type] = row.cnt;
|
|
133
|
+
}
|
|
134
|
+
return {
|
|
135
|
+
memories: memoryCount,
|
|
136
|
+
links: linkCount,
|
|
137
|
+
db_size_bytes: dbSize,
|
|
138
|
+
by_type: typeCounts,
|
|
139
|
+
};
|
|
140
|
+
}
|
|
@@ -0,0 +1,15 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Session knowledge extraction (distillation).
|
|
3
|
+
* Simplified from hicortex/distiller.py — messages come from agent_end hook,
|
|
4
|
+
* not from filesystem scanning.
|
|
5
|
+
*/
|
|
6
|
+
import type { LlmClient } from "./llm.js";
|
|
7
|
+
/**
|
|
8
|
+
* Convert OpenClaw hook messages to a filtered transcript string.
|
|
9
|
+
*/
|
|
10
|
+
export declare function extractConversationText(messages: unknown[]): string;
|
|
11
|
+
/**
|
|
12
|
+
* Send filtered conversation to LLM for knowledge extraction.
|
|
13
|
+
* Returns an array of memory entries to ingest, or empty array if nothing worth extracting.
|
|
14
|
+
*/
|
|
15
|
+
export declare function distillSession(llm: LlmClient, conversation: string, projectName: string, date: string): Promise<string[]>;
|
|
@@ -0,0 +1,186 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
/**
|
|
3
|
+
* Session knowledge extraction (distillation).
|
|
4
|
+
* Simplified from hicortex/distiller.py — messages come from agent_end hook,
|
|
5
|
+
* not from filesystem scanning.
|
|
6
|
+
*/
|
|
7
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
|
8
|
+
exports.extractConversationText = extractConversationText;
|
|
9
|
+
exports.distillSession = distillSession;
|
|
10
|
+
const prompts_js_1 = require("./prompts.js");
|
|
11
|
+
const MAX_TRANSCRIPT_CHARS = 80_000;
|
|
12
|
+
const MIN_CONVERSATION_CHARS = 200;
|
|
13
|
+
// Entry types to skip entirely (from the Python distiller)
|
|
14
|
+
const SKIP_ENTRY_TYPES = new Set([
|
|
15
|
+
"progress",
|
|
16
|
+
"system",
|
|
17
|
+
"file-history-snapshot",
|
|
18
|
+
"queue-operation",
|
|
19
|
+
"summary",
|
|
20
|
+
]);
|
|
21
|
+
/**
|
|
22
|
+
* Extract readable text from a message content value (string or block list).
|
|
23
|
+
*/
|
|
24
|
+
function extractTextFromContent(content) {
|
|
25
|
+
if (typeof content === "string") {
|
|
26
|
+
return content.length > 20_000 ? content.slice(0, 20_000) : content;
|
|
27
|
+
}
|
|
28
|
+
if (!Array.isArray(content))
|
|
29
|
+
return "";
|
|
30
|
+
const texts = [];
|
|
31
|
+
let totalLen = 0;
|
|
32
|
+
for (const block of content) {
|
|
33
|
+
if (typeof block !== "object" || block === null)
|
|
34
|
+
continue;
|
|
35
|
+
const btype = block.type;
|
|
36
|
+
if (btype === "text") {
|
|
37
|
+
const t = String(block.text ?? "");
|
|
38
|
+
texts.push(t.length > 10_000 ? t.slice(0, 10_000) : t);
|
|
39
|
+
totalLen += t.length;
|
|
40
|
+
}
|
|
41
|
+
// Skip: tool_use, tool_result, thinking, image blocks
|
|
42
|
+
if (totalLen > 20_000)
|
|
43
|
+
break;
|
|
44
|
+
}
|
|
45
|
+
return texts.join("\n");
|
|
46
|
+
}
|
|
47
|
+
/**
|
|
48
|
+
* Strip noise from message text, keep the human conversation.
|
|
49
|
+
*/
|
|
50
|
+
function cleanMessageContent(text) {
|
|
51
|
+
// Hard cap
|
|
52
|
+
if (text.length > 50_000) {
|
|
53
|
+
text = text.slice(0, 50_000);
|
|
54
|
+
}
|
|
55
|
+
// Remove large code blocks (>10 lines)
|
|
56
|
+
const lines = text.split("\n");
|
|
57
|
+
const cleaned = [];
|
|
58
|
+
let inCodeBlock = false;
|
|
59
|
+
let codeBlockLines = 0;
|
|
60
|
+
let codeBlockStart = 0;
|
|
61
|
+
for (const line of lines) {
|
|
62
|
+
if (line.startsWith("```") && !inCodeBlock) {
|
|
63
|
+
inCodeBlock = true;
|
|
64
|
+
codeBlockLines = 0;
|
|
65
|
+
codeBlockStart = cleaned.length;
|
|
66
|
+
cleaned.push(line);
|
|
67
|
+
}
|
|
68
|
+
else if (line.startsWith("```") && inCodeBlock) {
|
|
69
|
+
inCodeBlock = false;
|
|
70
|
+
if (codeBlockLines > 10) {
|
|
71
|
+
cleaned.length = codeBlockStart;
|
|
72
|
+
cleaned.push("[code block removed]");
|
|
73
|
+
}
|
|
74
|
+
else {
|
|
75
|
+
cleaned.push(line);
|
|
76
|
+
}
|
|
77
|
+
}
|
|
78
|
+
else if (inCodeBlock) {
|
|
79
|
+
codeBlockLines++;
|
|
80
|
+
cleaned.push(line);
|
|
81
|
+
}
|
|
82
|
+
else {
|
|
83
|
+
cleaned.push(line);
|
|
84
|
+
}
|
|
85
|
+
}
|
|
86
|
+
if (inCodeBlock && codeBlockLines > 10) {
|
|
87
|
+
cleaned.length = codeBlockStart;
|
|
88
|
+
cleaned.push("[code block removed]");
|
|
89
|
+
}
|
|
90
|
+
text = cleaned.join("\n");
|
|
91
|
+
// Remove <system-reminder>...</system-reminder>
|
|
92
|
+
text = text.replace(/<system-reminder>[^<]{0,10000}<\/system-reminder>/g, "");
|
|
93
|
+
// Remove file path dumps (Read tool output: " 123->...")
|
|
94
|
+
text = text.replace(/^\s*\d+\u2192.*$/gm, "");
|
|
95
|
+
// Remove base64 content
|
|
96
|
+
text = text.replace(/[A-Za-z0-9+/]{100,}={0,2}/g, "[binary removed]");
|
|
97
|
+
// Collapse excessive whitespace
|
|
98
|
+
text = text.replace(/\n{3,}/g, "\n\n");
|
|
99
|
+
return text.trim();
|
|
100
|
+
}
|
|
101
|
+
/**
|
|
102
|
+
* Convert OpenClaw hook messages to a filtered transcript string.
|
|
103
|
+
*/
|
|
104
|
+
function extractConversationText(messages) {
|
|
105
|
+
const parts = [];
|
|
106
|
+
for (const msg of messages) {
|
|
107
|
+
if (typeof msg !== "object" || msg === null)
|
|
108
|
+
continue;
|
|
109
|
+
const m = msg;
|
|
110
|
+
// Entry-level filter
|
|
111
|
+
if (SKIP_ENTRY_TYPES.has(String(m.type ?? "")))
|
|
112
|
+
continue;
|
|
113
|
+
if (m.isSidechain)
|
|
114
|
+
continue;
|
|
115
|
+
// Extract content — OpenClaw messages have content at top level;
|
|
116
|
+
// Python distiller format has message.content
|
|
117
|
+
const content = m.content ?? m.message?.content;
|
|
118
|
+
if (content === undefined || content === null)
|
|
119
|
+
continue;
|
|
120
|
+
let text = extractTextFromContent(content);
|
|
121
|
+
text = cleanMessageContent(text);
|
|
122
|
+
if (text.length < 20)
|
|
123
|
+
continue;
|
|
124
|
+
const role = m.role === "user" || m.type === "user" ? "USER" : "ASSISTANT";
|
|
125
|
+
parts.push(`${role}: ${text}`);
|
|
126
|
+
}
|
|
127
|
+
return parts.join("\n\n");
|
|
128
|
+
}
|
|
129
|
+
/**
|
|
130
|
+
* Send filtered conversation to LLM for knowledge extraction.
|
|
131
|
+
* Returns an array of memory entries to ingest, or empty array if nothing worth extracting.
|
|
132
|
+
*/
|
|
133
|
+
async function distillSession(llm, conversation, projectName, date) {
|
|
134
|
+
if (conversation.length < MIN_CONVERSATION_CHARS) {
|
|
135
|
+
return [];
|
|
136
|
+
}
|
|
137
|
+
// Truncate if too long
|
|
138
|
+
let transcript = conversation;
|
|
139
|
+
if (transcript.length > MAX_TRANSCRIPT_CHARS) {
|
|
140
|
+
transcript = transcript.slice(0, MAX_TRANSCRIPT_CHARS) + "\n\n[...truncated...]";
|
|
141
|
+
}
|
|
142
|
+
const prompt = (0, prompts_js_1.distillation)(projectName, date, transcript);
|
|
143
|
+
try {
|
|
144
|
+
const result = await llm.completeDistill(prompt);
|
|
145
|
+
if (!result)
|
|
146
|
+
return [];
|
|
147
|
+
if (result === "NO_EXTRACT" || result.slice(0, 20).includes("NO_EXTRACT")) {
|
|
148
|
+
return [];
|
|
149
|
+
}
|
|
150
|
+
// Split distilled markdown into individual memory entries
|
|
151
|
+
return parseDistilledEntries(result);
|
|
152
|
+
}
|
|
153
|
+
catch (err) {
|
|
154
|
+
const msg = err instanceof Error ? err.message : String(err);
|
|
155
|
+
console.error(`[hicortex] Distillation LLM error: ${msg}`);
|
|
156
|
+
return [];
|
|
157
|
+
}
|
|
158
|
+
}
|
|
159
|
+
/**
|
|
160
|
+
* Parse distilled markdown into individual memory entry strings.
|
|
161
|
+
* Each section item becomes a separate memory.
|
|
162
|
+
*/
|
|
163
|
+
function parseDistilledEntries(markdown) {
|
|
164
|
+
const entries = [];
|
|
165
|
+
const lines = markdown.split("\n");
|
|
166
|
+
let currentSection = "";
|
|
167
|
+
for (const line of lines) {
|
|
168
|
+
const trimmed = line.trim();
|
|
169
|
+
// Section headers
|
|
170
|
+
if (trimmed.startsWith("### ")) {
|
|
171
|
+
currentSection = trimmed.slice(4).trim();
|
|
172
|
+
continue;
|
|
173
|
+
}
|
|
174
|
+
// Skip top-level headers and classification
|
|
175
|
+
if (trimmed.startsWith("# ") || trimmed.startsWith("## "))
|
|
176
|
+
continue;
|
|
177
|
+
// Bullet items are individual memories
|
|
178
|
+
if (trimmed.startsWith("- ") && trimmed.length > 5) {
|
|
179
|
+
const entry = currentSection
|
|
180
|
+
? `[${currentSection}] ${trimmed.slice(2)}`
|
|
181
|
+
: trimmed.slice(2);
|
|
182
|
+
entries.push(entry);
|
|
183
|
+
}
|
|
184
|
+
}
|
|
185
|
+
return entries;
|
|
186
|
+
}
|
|
@@ -0,0 +1,20 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Local embeddings using @huggingface/transformers.
|
|
3
|
+
* Ported from hicortex/embedder.py — same model (bge-small-en-v1.5, 384-dim).
|
|
4
|
+
*
|
|
5
|
+
* Uses dynamic import so the plugin compiles without @huggingface/transformers
|
|
6
|
+
* installed. The model is lazy-loaded on first call.
|
|
7
|
+
*/
|
|
8
|
+
export declare const EMBEDDING_DIMENSIONS = 384;
|
|
9
|
+
/**
|
|
10
|
+
* Embed a single text string. Returns a Float32Array of 384 dimensions.
|
|
11
|
+
*/
|
|
12
|
+
export declare function embed(text: string): Promise<Float32Array>;
|
|
13
|
+
/**
|
|
14
|
+
* Embed multiple texts. Returns an array of Float32Array embeddings.
|
|
15
|
+
*/
|
|
16
|
+
export declare function embedBatch(texts: string[]): Promise<Float32Array[]>;
|
|
17
|
+
/**
|
|
18
|
+
* Return the embedding dimension count.
|
|
19
|
+
*/
|
|
20
|
+
export declare function dimensions(): number;
|
package/dist/embedder.js
ADDED
|
@@ -0,0 +1,85 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
/**
|
|
3
|
+
* Local embeddings using @huggingface/transformers.
|
|
4
|
+
* Ported from hicortex/embedder.py — same model (bge-small-en-v1.5, 384-dim).
|
|
5
|
+
*
|
|
6
|
+
* Uses dynamic import so the plugin compiles without @huggingface/transformers
|
|
7
|
+
* installed. The model is lazy-loaded on first call.
|
|
8
|
+
*/
|
|
9
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
|
10
|
+
exports.EMBEDDING_DIMENSIONS = void 0;
|
|
11
|
+
exports.embed = embed;
|
|
12
|
+
exports.embedBatch = embedBatch;
|
|
13
|
+
exports.dimensions = dimensions;
|
|
14
|
+
exports.EMBEDDING_DIMENSIONS = 384;
|
|
15
|
+
const MODEL_NAME = "Xenova/bge-small-en-v1.5";
|
|
16
|
+
// Pipeline is lazy-loaded on first use
|
|
17
|
+
let pipeline = null;
|
|
18
|
+
let initPromise = null;
|
|
19
|
+
/**
|
|
20
|
+
* Initialize the embedding pipeline (called lazily on first embed call).
|
|
21
|
+
* Throws with a clear error if @huggingface/transformers is not available.
|
|
22
|
+
*/
|
|
23
|
+
async function ensureInit() {
|
|
24
|
+
if (pipeline)
|
|
25
|
+
return;
|
|
26
|
+
if (initPromise) {
|
|
27
|
+
await initPromise;
|
|
28
|
+
return;
|
|
29
|
+
}
|
|
30
|
+
initPromise = (async () => {
|
|
31
|
+
try {
|
|
32
|
+
// Dynamic import — package may not be installed (it's optional)
|
|
33
|
+
// eslint-disable-next-line @typescript-eslint/no-require-imports
|
|
34
|
+
const transformers = await Function('return import("@huggingface/transformers")')();
|
|
35
|
+
const pipelineFn = transformers.pipeline ?? transformers.default?.pipeline;
|
|
36
|
+
if (!pipelineFn) {
|
|
37
|
+
throw new Error("Could not find pipeline function in @huggingface/transformers");
|
|
38
|
+
}
|
|
39
|
+
console.log("[hicortex] Loading embedding model (first run downloads ~130MB)...");
|
|
40
|
+
pipeline = await pipelineFn("feature-extraction", MODEL_NAME, {
|
|
41
|
+
dtype: "fp32",
|
|
42
|
+
});
|
|
43
|
+
console.log("[hicortex] Embedding model ready");
|
|
44
|
+
}
|
|
45
|
+
catch (err) {
|
|
46
|
+
initPromise = null;
|
|
47
|
+
const msg = err instanceof Error ? err.message : String(err);
|
|
48
|
+
if (msg.includes("Cannot find module") ||
|
|
49
|
+
msg.includes("MODULE_NOT_FOUND")) {
|
|
50
|
+
throw new Error(`@huggingface/transformers is not installed. ` +
|
|
51
|
+
`Run: npm install @huggingface/transformers`);
|
|
52
|
+
}
|
|
53
|
+
throw err;
|
|
54
|
+
}
|
|
55
|
+
})();
|
|
56
|
+
await initPromise;
|
|
57
|
+
}
|
|
58
|
+
/**
|
|
59
|
+
* Embed a single text string. Returns a Float32Array of 384 dimensions.
|
|
60
|
+
*/
|
|
61
|
+
async function embed(text) {
|
|
62
|
+
await ensureInit();
|
|
63
|
+
const output = await pipeline(text, { pooling: "mean", normalize: true });
|
|
64
|
+
// output.data is a Float32Array from transformers.js
|
|
65
|
+
return new Float32Array(output.data);
|
|
66
|
+
}
|
|
67
|
+
/**
|
|
68
|
+
* Embed multiple texts. Returns an array of Float32Array embeddings.
|
|
69
|
+
*/
|
|
70
|
+
async function embedBatch(texts) {
|
|
71
|
+
if (texts.length === 0)
|
|
72
|
+
return [];
|
|
73
|
+
// Process sequentially to avoid OOM on large batches
|
|
74
|
+
const results = [];
|
|
75
|
+
for (const text of texts) {
|
|
76
|
+
results.push(await embed(text));
|
|
77
|
+
}
|
|
78
|
+
return results;
|
|
79
|
+
}
|
|
80
|
+
/**
|
|
81
|
+
* Return the embedding dimension count.
|
|
82
|
+
*/
|
|
83
|
+
function dimensions() {
|
|
84
|
+
return exports.EMBEDDING_DIMENSIONS;
|
|
85
|
+
}
|
package/dist/index.d.ts
ADDED
|
@@ -0,0 +1,14 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Hicortex OpenClaw Plugin — Long-term Memory That Learns.
|
|
3
|
+
*
|
|
4
|
+
* Pure in-process plugin: no sidecar, no HTTP. Uses better-sqlite3 + sqlite-vec
|
|
5
|
+
* for storage, @huggingface/transformers for embeddings, and multi-provider LLM
|
|
6
|
+
* for distillation and consolidation.
|
|
7
|
+
*/
|
|
8
|
+
declare const _default: {
|
|
9
|
+
id: string;
|
|
10
|
+
name: string;
|
|
11
|
+
kind: "lifecycle";
|
|
12
|
+
register(api: any): void;
|
|
13
|
+
};
|
|
14
|
+
export default _default;
|