morpheus-cli 0.9.5 → 0.9.7
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +63 -43
- package/dist/channels/discord.js +71 -21
- package/dist/channels/telegram.js +73 -19
- package/dist/cli/commands/restart.js +15 -0
- package/dist/cli/commands/start.js +18 -0
- package/dist/config/manager.js +61 -0
- package/dist/config/paths.js +1 -0
- package/dist/config/schemas.js +11 -3
- package/dist/http/api.js +3 -0
- package/dist/http/routers/link.js +239 -0
- package/dist/http/routers/skills.js +1 -8
- package/dist/runtime/apoc.js +1 -1
- package/dist/runtime/audit/repository.js +1 -1
- package/dist/runtime/link-chunker.js +214 -0
- package/dist/runtime/link-repository.js +301 -0
- package/dist/runtime/link-search.js +298 -0
- package/dist/runtime/link-worker.js +284 -0
- package/dist/runtime/link.js +295 -0
- package/dist/runtime/memory/sati/service.js +1 -1
- package/dist/runtime/memory/sqlite.js +52 -0
- package/dist/runtime/neo.js +1 -1
- package/dist/runtime/oracle.js +81 -44
- package/dist/runtime/scaffold.js +4 -17
- package/dist/runtime/skills/__tests__/loader.test.js +7 -10
- package/dist/runtime/skills/__tests__/registry.test.js +2 -18
- package/dist/runtime/skills/__tests__/tool.test.js +55 -224
- package/dist/runtime/skills/index.js +1 -2
- package/dist/runtime/skills/loader.js +0 -2
- package/dist/runtime/skills/registry.js +8 -20
- package/dist/runtime/skills/schema.js +0 -4
- package/dist/runtime/skills/tool.js +42 -209
- package/dist/runtime/smiths/delegator.js +1 -1
- package/dist/runtime/smiths/registry.js +1 -1
- package/dist/runtime/tasks/worker.js +12 -44
- package/dist/runtime/trinity.js +1 -1
- package/dist/types/config.js +14 -0
- package/dist/ui/assets/AuditDashboard-93LCGHG1.js +1 -0
- package/dist/ui/assets/{Chat-BNtutgja.js → Chat-CK5sNcQ1.js} +8 -8
- package/dist/ui/assets/{Chronos-3C8RPZcl.js → Chronos-m2h--GEe.js} +1 -1
- package/dist/ui/assets/{ConfirmationModal-ZQPBeJ2Z.js → ConfirmationModal-Dd5pUJme.js} +1 -1
- package/dist/ui/assets/{Dashboard-CqkHzr2F.js → Dashboard-ODwl7d-a.js} +1 -1
- package/dist/ui/assets/{DeleteConfirmationModal-CioxFWn_.js → DeleteConfirmationModal-CCcojDmr.js} +1 -1
- package/dist/ui/assets/Documents-dWnSoxFO.js +7 -0
- package/dist/ui/assets/{Logs-DBVanS0O.js → Logs-Dc9Z2LBj.js} +1 -1
- package/dist/ui/assets/{MCPManager-vXfL3P2U.js → MCPManager-CMkb8vMn.js} +1 -1
- package/dist/ui/assets/{ModelPricing-DyfdunLT.js → ModelPricing-DtHPPbEQ.js} +1 -1
- package/dist/ui/assets/{Notifications-VL-vep6d.js → Notifications-BPvo-DWP.js} +1 -1
- package/dist/ui/assets/{Pagination-oTGieBLM.js → Pagination-BHZKk42X.js} +1 -1
- package/dist/ui/assets/{SatiMemories-jaadkW0U.js → SatiMemories-BUPu1Lxr.js} +1 -1
- package/dist/ui/assets/SessionAudit-CFKF4DA8.js +9 -0
- package/dist/ui/assets/Settings-C4JrXfsR.js +47 -0
- package/dist/ui/assets/{Skills-DE3zziXL.js → Skills-BUlvJgJ4.js} +1 -1
- package/dist/ui/assets/{Smiths-pmogN1mU.js → Smiths-CDtJdY0I.js} +1 -1
- package/dist/ui/assets/{Tasks-Bs8s34Jc.js → Tasks-DK_cOsNK.js} +1 -1
- package/dist/ui/assets/{TrinityDatabases-D7uihcdp.js → TrinityDatabases-X07by-19.js} +1 -1
- package/dist/ui/assets/{UsageStats-B9gePLZ0.js → UsageStats-dYcgckLq.js} +1 -1
- package/dist/ui/assets/{WebhookManager-B2L3rCLM.js → WebhookManager-DDw5eX2R.js} +1 -1
- package/dist/ui/assets/{audit-Cggeu9mM.js → audit-DZ5WLUEm.js} +1 -1
- package/dist/ui/assets/{chronos-D3-sWhfU.js → chronos-B_HI4mlq.js} +1 -1
- package/dist/ui/assets/{config-CBqRUPgn.js → config-B-YxlVrc.js} +1 -1
- package/dist/ui/assets/index-DVjwJ8jT.css +1 -0
- package/dist/ui/assets/{index-zKplfrXZ.js → index-DfJwcKqG.js} +5 -5
- package/dist/ui/assets/{mcp-uL1R9hyA.js → mcp-k-_pwbqA.js} +1 -1
- package/dist/ui/assets/{skills-jmw8yTJs.js → skills-xMXangks.js} +1 -1
- package/dist/ui/assets/{stats-HOms6GnM.js → stats-C4QZIv5O.js} +1 -1
- package/dist/ui/assets/{vendor-icons-DMd9RGvJ.js → vendor-icons-NHF9HNeN.js} +1 -1
- package/dist/ui/index.html +3 -3
- package/dist/ui/sw.js +1 -1
- package/package.json +3 -1
- package/dist/runtime/__tests__/keymaker.test.js +0 -148
- package/dist/runtime/keymaker.js +0 -157
- package/dist/ui/assets/AuditDashboard-DliJ1CX0.js +0 -1
- package/dist/ui/assets/SessionAudit-BsXrWlwz.js +0 -9
- package/dist/ui/assets/Settings-B4eezRcg.js +0 -47
- package/dist/ui/assets/index-D4fzIKy1.css +0 -1
|
@@ -0,0 +1,214 @@
|
|
|
1
|
+
import { createHash } from 'crypto';
|
|
2
|
+
import fs from 'fs-extra';
|
|
3
|
+
import path from 'path';
|
|
4
|
+
import { PDFParse } from 'pdf-parse';
|
|
5
|
+
import mammoth from 'mammoth';
|
|
6
|
+
// ─── Hashing ─────────────────────────────────────────────────────────────────
|
|
7
|
+
/**
|
|
8
|
+
* Calculate SHA-256 hash of file content.
|
|
9
|
+
*/
|
|
10
|
+
export function hashDocument(content) {
|
|
11
|
+
return createHash('sha256').update(content).digest('hex');
|
|
12
|
+
}
|
|
13
|
+
/**
|
|
14
|
+
* Calculate SHA-256 hash of a file by path.
|
|
15
|
+
*/
|
|
16
|
+
export async function hashFile(filePath) {
|
|
17
|
+
const content = await fs.readFile(filePath);
|
|
18
|
+
return hashDocument(content);
|
|
19
|
+
}
|
|
20
|
+
/**
|
|
21
|
+
* Split text into chunks respecting sentence boundaries.
|
|
22
|
+
* @param text - The text to chunk
|
|
23
|
+
* @param chunkSize - Target size in characters (default: 500)
|
|
24
|
+
* @param minChunkSize - Minimum chunk size to avoid tiny chunks (default: 100)
|
|
25
|
+
*/
|
|
26
|
+
export function chunkText(text, chunkSize = 500, minChunkSize = 100) {
|
|
27
|
+
const chunks = [];
|
|
28
|
+
let position = 0;
|
|
29
|
+
let charPos = 0;
|
|
30
|
+
// Split by paragraphs first
|
|
31
|
+
const paragraphs = text.split(/\n\n+/);
|
|
32
|
+
let currentChunk = '';
|
|
33
|
+
let chunkStart = 0;
|
|
34
|
+
for (const paragraph of paragraphs) {
|
|
35
|
+
// If adding this paragraph exceeds chunk size
|
|
36
|
+
if (currentChunk.length + paragraph.length + 2 > chunkSize && currentChunk.length >= minChunkSize) {
|
|
37
|
+
// Save current chunk
|
|
38
|
+
chunks.push({
|
|
39
|
+
content: currentChunk.trim(),
|
|
40
|
+
position: position++,
|
|
41
|
+
char_start: chunkStart,
|
|
42
|
+
char_end: chunkStart + currentChunk.length,
|
|
43
|
+
});
|
|
44
|
+
currentChunk = paragraph;
|
|
45
|
+
chunkStart = charPos;
|
|
46
|
+
}
|
|
47
|
+
else if (paragraph.length > chunkSize) {
|
|
48
|
+
// Paragraph is too long, split by sentences
|
|
49
|
+
if (currentChunk.length > 0) {
|
|
50
|
+
// Save current chunk first
|
|
51
|
+
chunks.push({
|
|
52
|
+
content: currentChunk.trim(),
|
|
53
|
+
position: position++,
|
|
54
|
+
char_start: chunkStart,
|
|
55
|
+
char_end: chunkStart + currentChunk.length,
|
|
56
|
+
});
|
|
57
|
+
currentChunk = '';
|
|
58
|
+
}
|
|
59
|
+
const sentences = splitBySentences(paragraph);
|
|
60
|
+
let sentenceChunk = '';
|
|
61
|
+
let sentenceStart = charPos;
|
|
62
|
+
for (const sentence of sentences) {
|
|
63
|
+
if (sentenceChunk.length + sentence.length + 1 > chunkSize && sentenceChunk.length >= minChunkSize) {
|
|
64
|
+
chunks.push({
|
|
65
|
+
content: sentenceChunk.trim(),
|
|
66
|
+
position: position++,
|
|
67
|
+
char_start: sentenceStart,
|
|
68
|
+
char_end: sentenceStart + sentenceChunk.length,
|
|
69
|
+
});
|
|
70
|
+
sentenceChunk = sentence;
|
|
71
|
+
sentenceStart = charPos + (paragraph.indexOf(sentence) > 0 ? paragraph.indexOf(sentence) : 0);
|
|
72
|
+
}
|
|
73
|
+
else {
|
|
74
|
+
sentenceChunk += (sentenceChunk ? ' ' : '') + sentence;
|
|
75
|
+
}
|
|
76
|
+
}
|
|
77
|
+
if (sentenceChunk.trim()) {
|
|
78
|
+
currentChunk = sentenceChunk;
|
|
79
|
+
chunkStart = sentenceStart;
|
|
80
|
+
}
|
|
81
|
+
}
|
|
82
|
+
else {
|
|
83
|
+
// Add paragraph to current chunk
|
|
84
|
+
currentChunk += (currentChunk ? '\n\n' : '') + paragraph;
|
|
85
|
+
if (!currentChunk) {
|
|
86
|
+
chunkStart = charPos;
|
|
87
|
+
}
|
|
88
|
+
}
|
|
89
|
+
charPos += paragraph.length + 2; // +2 for paragraph separator
|
|
90
|
+
}
|
|
91
|
+
// Don't forget the last chunk
|
|
92
|
+
if (currentChunk.trim()) {
|
|
93
|
+
chunks.push({
|
|
94
|
+
content: currentChunk.trim(),
|
|
95
|
+
position,
|
|
96
|
+
char_start: chunkStart,
|
|
97
|
+
char_end: chunkStart + currentChunk.length,
|
|
98
|
+
});
|
|
99
|
+
}
|
|
100
|
+
return chunks;
|
|
101
|
+
}
|
|
102
|
+
/**
|
|
103
|
+
* Split text by sentences using common sentence delimiters.
|
|
104
|
+
*/
|
|
105
|
+
function splitBySentences(text) {
|
|
106
|
+
// Match sentences ending with . ! ? followed by space or end of string
|
|
107
|
+
const sentences = text.match(/[^.!?]*[.!?]+(?:\s+|$)/g) || [text];
|
|
108
|
+
return sentences.map(s => s.trim()).filter(Boolean);
|
|
109
|
+
}
|
|
110
|
+
/**
|
|
111
|
+
* Parse PDF file and extract text.
|
|
112
|
+
*/
|
|
113
|
+
export async function parsePDF(filePath) {
|
|
114
|
+
const dataBuffer = await fs.readFile(filePath);
|
|
115
|
+
const parser = new PDFParse({ data: dataBuffer });
|
|
116
|
+
const textResult = await parser.getText();
|
|
117
|
+
const text = textResult.text || '';
|
|
118
|
+
const infoResult = await parser.getInfo();
|
|
119
|
+
return {
|
|
120
|
+
text,
|
|
121
|
+
metadata: {
|
|
122
|
+
pageCount: infoResult.total,
|
|
123
|
+
wordCount: text.split(/\s+/).filter(Boolean).length,
|
|
124
|
+
},
|
|
125
|
+
};
|
|
126
|
+
}
|
|
127
|
+
/**
|
|
128
|
+
* Parse DOCX file and extract text.
|
|
129
|
+
*/
|
|
130
|
+
export async function parseDOCX(filePath) {
|
|
131
|
+
const result = await mammoth.extractRawText({ path: filePath });
|
|
132
|
+
const text = result.value;
|
|
133
|
+
return {
|
|
134
|
+
text,
|
|
135
|
+
metadata: {
|
|
136
|
+
wordCount: text.split(/\s+/).filter(Boolean).length,
|
|
137
|
+
},
|
|
138
|
+
};
|
|
139
|
+
}
|
|
140
|
+
/**
|
|
141
|
+
* Parse plain text file.
|
|
142
|
+
*/
|
|
143
|
+
export async function parseTXT(filePath) {
|
|
144
|
+
const text = await fs.readFile(filePath, 'utf-8');
|
|
145
|
+
return {
|
|
146
|
+
text,
|
|
147
|
+
metadata: {
|
|
148
|
+
wordCount: text.split(/\s+/).filter(Boolean).length,
|
|
149
|
+
},
|
|
150
|
+
};
|
|
151
|
+
}
|
|
152
|
+
/**
|
|
153
|
+
* Parse Markdown file (treated as plain text for chunking).
|
|
154
|
+
*/
|
|
155
|
+
export async function parseMD(filePath) {
|
|
156
|
+
return parseTXT(filePath);
|
|
157
|
+
}
|
|
158
|
+
// ─── Supported Formats ───────────────────────────────────────────────────────
|
|
159
|
+
const SUPPORTED_EXTENSIONS = ['.pdf', '.txt', '.md', '.docx'];
|
|
160
|
+
/**
|
|
161
|
+
* Check if a file extension is supported.
|
|
162
|
+
*/
|
|
163
|
+
export function isSupportedFormat(filePath) {
|
|
164
|
+
const ext = path.extname(filePath).toLowerCase();
|
|
165
|
+
return SUPPORTED_EXTENSIONS.includes(ext);
|
|
166
|
+
}
|
|
167
|
+
/**
|
|
168
|
+
* Get the content type based on file extension.
|
|
169
|
+
*/
|
|
170
|
+
export function getContentType(filePath) {
|
|
171
|
+
const ext = path.extname(filePath).toLowerCase();
|
|
172
|
+
const contentTypes = {
|
|
173
|
+
'.pdf': 'application/pdf',
|
|
174
|
+
'.txt': 'text/plain',
|
|
175
|
+
'.md': 'text/markdown',
|
|
176
|
+
'.docx': 'application/vnd.openxmlformats-officedocument.wordprocessingml.document',
|
|
177
|
+
};
|
|
178
|
+
return contentTypes[ext] || 'application/octet-stream';
|
|
179
|
+
}
|
|
180
|
+
/**
|
|
181
|
+
* Parse a document based on its file extension.
|
|
182
|
+
*/
|
|
183
|
+
export async function parseDocument(filePath) {
|
|
184
|
+
const ext = path.extname(filePath).toLowerCase();
|
|
185
|
+
switch (ext) {
|
|
186
|
+
case '.pdf':
|
|
187
|
+
return parsePDF(filePath);
|
|
188
|
+
case '.docx':
|
|
189
|
+
return parseDOCX(filePath);
|
|
190
|
+
case '.txt':
|
|
191
|
+
return parseTXT(filePath);
|
|
192
|
+
case '.md':
|
|
193
|
+
return parseMD(filePath);
|
|
194
|
+
default:
|
|
195
|
+
throw new Error(`Unsupported file format: ${ext}`);
|
|
196
|
+
}
|
|
197
|
+
}
|
|
198
|
+
/**
|
|
199
|
+
* Process a document: parse, chunk, and return chunks with metadata.
|
|
200
|
+
*/
|
|
201
|
+
export async function processDocument(filePath, chunkSize = 500) {
|
|
202
|
+
// Parse document
|
|
203
|
+
const parsed = await parseDocument(filePath);
|
|
204
|
+
// Calculate hash from raw file bytes (must match hashFile used by the caller)
|
|
205
|
+
const hash = await hashFile(filePath);
|
|
206
|
+
// Chunk text
|
|
207
|
+
const chunks = chunkText(parsed.text, chunkSize);
|
|
208
|
+
return {
|
|
209
|
+
text: parsed.text,
|
|
210
|
+
chunks,
|
|
211
|
+
hash,
|
|
212
|
+
metadata: parsed.metadata,
|
|
213
|
+
};
|
|
214
|
+
}
|
|
@@ -0,0 +1,301 @@
|
|
|
1
|
+
import Database from 'better-sqlite3';
|
|
2
|
+
import fs from 'fs-extra';
|
|
3
|
+
import path from 'path';
|
|
4
|
+
import { homedir } from 'os';
|
|
5
|
+
import { randomUUID } from 'crypto';
|
|
6
|
+
import loadVecExtension from './memory/sqlite-vec.js';
|
|
7
|
+
import { DisplayManager } from './display.js';
|
|
8
|
+
// ─── Repository ──────────────────────────────────────────────────────────────
|
|
9
|
+
const EMBEDDING_DIM = 384;
|
|
10
|
+
export class LinkRepository {
|
|
11
|
+
static instance = null;
|
|
12
|
+
db = null;
|
|
13
|
+
dbPath;
|
|
14
|
+
display = DisplayManager.getInstance();
|
|
15
|
+
constructor(dbPath) {
|
|
16
|
+
this.dbPath = dbPath || path.join(homedir(), '.morpheus', 'memory', 'link.db');
|
|
17
|
+
}
|
|
18
|
+
static getInstance(dbPath) {
|
|
19
|
+
if (!LinkRepository.instance) {
|
|
20
|
+
LinkRepository.instance = new LinkRepository(dbPath);
|
|
21
|
+
}
|
|
22
|
+
return LinkRepository.instance;
|
|
23
|
+
}
|
|
24
|
+
static resetInstance() {
|
|
25
|
+
if (LinkRepository.instance?.db) {
|
|
26
|
+
LinkRepository.instance.db.close();
|
|
27
|
+
}
|
|
28
|
+
LinkRepository.instance = null;
|
|
29
|
+
}
|
|
30
|
+
initialize() {
|
|
31
|
+
fs.ensureDirSync(path.dirname(this.dbPath));
|
|
32
|
+
this.db = new Database(this.dbPath, { timeout: 5000 });
|
|
33
|
+
this.db.pragma('journal_mode = WAL');
|
|
34
|
+
this.db.pragma('foreign_keys = ON');
|
|
35
|
+
loadVecExtension(this.db);
|
|
36
|
+
this.createSchema();
|
|
37
|
+
}
|
|
38
|
+
createSchema() {
|
|
39
|
+
if (!this.db)
|
|
40
|
+
throw new Error('DB not initialized');
|
|
41
|
+
// Documents table
|
|
42
|
+
this.db.exec(`
|
|
43
|
+
CREATE TABLE IF NOT EXISTS documents (
|
|
44
|
+
id TEXT PRIMARY KEY,
|
|
45
|
+
filename TEXT NOT NULL,
|
|
46
|
+
file_path TEXT NOT NULL UNIQUE,
|
|
47
|
+
file_hash TEXT NOT NULL,
|
|
48
|
+
file_size INTEGER NOT NULL,
|
|
49
|
+
status TEXT NOT NULL DEFAULT 'pending',
|
|
50
|
+
error_message TEXT,
|
|
51
|
+
chunk_count INTEGER DEFAULT 0,
|
|
52
|
+
created_at TEXT NOT NULL,
|
|
53
|
+
updated_at TEXT NOT NULL
|
|
54
|
+
);
|
|
55
|
+
`);
|
|
56
|
+
// Chunks table
|
|
57
|
+
this.db.exec(`
|
|
58
|
+
CREATE TABLE IF NOT EXISTS chunks (
|
|
59
|
+
id TEXT PRIMARY KEY,
|
|
60
|
+
document_id TEXT NOT NULL,
|
|
61
|
+
position INTEGER NOT NULL,
|
|
62
|
+
content TEXT NOT NULL,
|
|
63
|
+
char_start INTEGER NOT NULL,
|
|
64
|
+
char_end INTEGER NOT NULL,
|
|
65
|
+
created_at TEXT NOT NULL,
|
|
66
|
+
UNIQUE(document_id, position),
|
|
67
|
+
FOREIGN KEY (document_id) REFERENCES documents(id) ON DELETE CASCADE
|
|
68
|
+
);
|
|
69
|
+
`);
|
|
70
|
+
// Embeddings table (sqlite-vec)
|
|
71
|
+
this.db.exec(`
|
|
72
|
+
CREATE VIRTUAL TABLE IF NOT EXISTS embeddings USING vec0(
|
|
73
|
+
chunk_id TEXT PRIMARY KEY,
|
|
74
|
+
embedding FLOAT[${EMBEDDING_DIM}]
|
|
75
|
+
);
|
|
76
|
+
`);
|
|
77
|
+
// FTS5 virtual table for BM25 search
|
|
78
|
+
this.db.exec(`
|
|
79
|
+
CREATE VIRTUAL TABLE IF NOT EXISTS chunks_fts USING fts5(
|
|
80
|
+
content,
|
|
81
|
+
content='chunks',
|
|
82
|
+
content_rowid='rowid',
|
|
83
|
+
tokenize='porter unicode61'
|
|
84
|
+
);
|
|
85
|
+
`);
|
|
86
|
+
// Indexes
|
|
87
|
+
this.db.exec(`
|
|
88
|
+
CREATE INDEX IF NOT EXISTS idx_documents_status ON documents(status);
|
|
89
|
+
CREATE INDEX IF NOT EXISTS idx_documents_hash ON documents(file_hash);
|
|
90
|
+
CREATE INDEX IF NOT EXISTS idx_chunks_document ON chunks(document_id);
|
|
91
|
+
`);
|
|
92
|
+
// Triggers to keep FTS in sync
|
|
93
|
+
this.db.exec(`
|
|
94
|
+
CREATE TRIGGER IF NOT EXISTS chunks_ai AFTER INSERT ON chunks BEGIN
|
|
95
|
+
INSERT INTO chunks_fts(rowid, content) VALUES (NEW.rowid, NEW.content);
|
|
96
|
+
END;
|
|
97
|
+
|
|
98
|
+
CREATE TRIGGER IF NOT EXISTS chunks_ad AFTER DELETE ON chunks BEGIN
|
|
99
|
+
INSERT INTO chunks_fts(chunks_fts, rowid, content) VALUES('delete', OLD.rowid, OLD.content);
|
|
100
|
+
END;
|
|
101
|
+
|
|
102
|
+
CREATE TRIGGER IF NOT EXISTS chunks_au AFTER UPDATE ON chunks BEGIN
|
|
103
|
+
INSERT INTO chunks_fts(chunks_fts, rowid, content) VALUES('delete', OLD.rowid, OLD.content);
|
|
104
|
+
INSERT INTO chunks_fts(rowid, content) VALUES (NEW.rowid, NEW.content);
|
|
105
|
+
END;
|
|
106
|
+
`);
|
|
107
|
+
}
|
|
108
|
+
// ─── Document CRUD ────────────────────────────────────────────────────────
|
|
109
|
+
createDocument(input) {
|
|
110
|
+
if (!this.db)
|
|
111
|
+
throw new Error('DB not initialized');
|
|
112
|
+
const now = new Date().toISOString();
|
|
113
|
+
const id = randomUUID();
|
|
114
|
+
this.db.prepare(`
|
|
115
|
+
INSERT INTO documents (id, filename, file_path, file_hash, file_size, status, error_message, chunk_count, created_at, updated_at)
|
|
116
|
+
VALUES (?, ?, ?, ?, ?, 'pending', NULL, 0, ?, ?)
|
|
117
|
+
`).run(id, input.filename, input.file_path, input.file_hash, input.file_size, now, now);
|
|
118
|
+
return this.getDocument(id);
|
|
119
|
+
}
|
|
120
|
+
getDocument(id) {
|
|
121
|
+
if (!this.db)
|
|
122
|
+
throw new Error('DB not initialized');
|
|
123
|
+
const row = this.db.prepare('SELECT * FROM documents WHERE id = ?').get(id);
|
|
124
|
+
return row ? this.deserializeDocument(row) : null;
|
|
125
|
+
}
|
|
126
|
+
getDocumentByPath(file_path) {
|
|
127
|
+
if (!this.db)
|
|
128
|
+
throw new Error('DB not initialized');
|
|
129
|
+
const row = this.db.prepare('SELECT * FROM documents WHERE file_path = ?').get(file_path);
|
|
130
|
+
return row ? this.deserializeDocument(row) : null;
|
|
131
|
+
}
|
|
132
|
+
getDocumentByHash(hash) {
|
|
133
|
+
if (!this.db)
|
|
134
|
+
throw new Error('DB not initialized');
|
|
135
|
+
const row = this.db.prepare('SELECT * FROM documents WHERE file_hash = ?').get(hash);
|
|
136
|
+
return row ? this.deserializeDocument(row) : null;
|
|
137
|
+
}
|
|
138
|
+
listDocuments(status) {
|
|
139
|
+
if (!this.db)
|
|
140
|
+
throw new Error('DB not initialized');
|
|
141
|
+
let query = 'SELECT * FROM documents';
|
|
142
|
+
const params = [];
|
|
143
|
+
if (status) {
|
|
144
|
+
query += ' WHERE status = ?';
|
|
145
|
+
params.push(status);
|
|
146
|
+
}
|
|
147
|
+
query += ' ORDER BY updated_at DESC';
|
|
148
|
+
const rows = this.db.prepare(query).all(...params);
|
|
149
|
+
return rows.map((r) => this.deserializeDocument(r));
|
|
150
|
+
}
|
|
151
|
+
updateDocumentStatus(id, status, error_message) {
|
|
152
|
+
if (!this.db)
|
|
153
|
+
throw new Error('DB not initialized');
|
|
154
|
+
const now = new Date().toISOString();
|
|
155
|
+
this.db.prepare(`
|
|
156
|
+
UPDATE documents SET status = ?, error_message = ?, updated_at = ? WHERE id = ?
|
|
157
|
+
`).run(status, error_message ?? null, now, id);
|
|
158
|
+
return this.getDocument(id);
|
|
159
|
+
}
|
|
160
|
+
updateDocumentChunkCount(id, chunk_count) {
|
|
161
|
+
if (!this.db)
|
|
162
|
+
throw new Error('DB not initialized');
|
|
163
|
+
const now = new Date().toISOString();
|
|
164
|
+
this.db.prepare(`
|
|
165
|
+
UPDATE documents SET chunk_count = ?, status = 'indexed', updated_at = ? WHERE id = ?
|
|
166
|
+
`).run(chunk_count, now, id);
|
|
167
|
+
}
|
|
168
|
+
deleteDocument(id) {
|
|
169
|
+
if (!this.db)
|
|
170
|
+
throw new Error('DB not initialized');
|
|
171
|
+
// CASCADE will delete chunks and embeddings automatically
|
|
172
|
+
const result = this.db.prepare('DELETE FROM documents WHERE id = ?').run(id);
|
|
173
|
+
return result.changes > 0;
|
|
174
|
+
}
|
|
175
|
+
deleteDocumentByPath(file_path) {
|
|
176
|
+
if (!this.db)
|
|
177
|
+
throw new Error('DB not initialized');
|
|
178
|
+
const result = this.db.prepare('DELETE FROM documents WHERE file_path = ?').run(file_path);
|
|
179
|
+
return result.changes > 0;
|
|
180
|
+
}
|
|
181
|
+
// ─── Chunk CRUD ───────────────────────────────────────────────────────────
|
|
182
|
+
createChunk(input) {
|
|
183
|
+
if (!this.db)
|
|
184
|
+
throw new Error('DB not initialized');
|
|
185
|
+
const now = new Date().toISOString();
|
|
186
|
+
const id = randomUUID();
|
|
187
|
+
this.db.prepare(`
|
|
188
|
+
INSERT INTO chunks (id, document_id, position, content, char_start, char_end, created_at)
|
|
189
|
+
VALUES (?, ?, ?, ?, ?, ?, ?)
|
|
190
|
+
`).run(id, input.document_id, input.position, input.content, input.char_start, input.char_end, now);
|
|
191
|
+
return this.getChunk(id);
|
|
192
|
+
}
|
|
193
|
+
createChunks(inputs) {
|
|
194
|
+
if (!this.db)
|
|
195
|
+
throw new Error('DB not initialized');
|
|
196
|
+
const now = new Date().toISOString();
|
|
197
|
+
const stmt = this.db.prepare(`
|
|
198
|
+
INSERT INTO chunks (id, document_id, position, content, char_start, char_end, created_at)
|
|
199
|
+
VALUES (?, ?, ?, ?, ?, ?, ?)
|
|
200
|
+
`);
|
|
201
|
+
const insertMany = this.db.transaction((items) => {
|
|
202
|
+
for (const input of items) {
|
|
203
|
+
stmt.run(randomUUID(), input.document_id, input.position, input.content, input.char_start, input.char_end, now);
|
|
204
|
+
}
|
|
205
|
+
});
|
|
206
|
+
insertMany(inputs);
|
|
207
|
+
}
|
|
208
|
+
getChunk(id) {
|
|
209
|
+
if (!this.db)
|
|
210
|
+
throw new Error('DB not initialized');
|
|
211
|
+
const row = this.db.prepare('SELECT * FROM chunks WHERE id = ?').get(id);
|
|
212
|
+
return row ? this.deserializeChunk(row) : null;
|
|
213
|
+
}
|
|
214
|
+
getChunksByDocument(document_id) {
|
|
215
|
+
if (!this.db)
|
|
216
|
+
throw new Error('DB not initialized');
|
|
217
|
+
const rows = this.db.prepare('SELECT * FROM chunks WHERE document_id = ? ORDER BY position').all(document_id);
|
|
218
|
+
return rows.map((r) => this.deserializeChunk(r));
|
|
219
|
+
}
|
|
220
|
+
deleteChunksByDocument(document_id) {
|
|
221
|
+
if (!this.db)
|
|
222
|
+
throw new Error('DB not initialized');
|
|
223
|
+
this.db.prepare('DELETE FROM chunks WHERE document_id = ?').run(document_id);
|
|
224
|
+
}
|
|
225
|
+
// ─── Embeddings ───────────────────────────────────────────────────────────
|
|
226
|
+
createEmbedding(chunk_id, embedding) {
|
|
227
|
+
if (!this.db)
|
|
228
|
+
throw new Error('DB not initialized');
|
|
229
|
+
const embeddingBlob = new Float32Array(embedding);
|
|
230
|
+
this.db.prepare(`
|
|
231
|
+
INSERT INTO embeddings (chunk_id, embedding) VALUES (?, ?)
|
|
232
|
+
`).run(chunk_id, embeddingBlob);
|
|
233
|
+
}
|
|
234
|
+
createEmbeddings(items) {
|
|
235
|
+
if (!this.db)
|
|
236
|
+
throw new Error('DB not initialized');
|
|
237
|
+
const stmt = this.db.prepare(`
|
|
238
|
+
INSERT INTO embeddings (chunk_id, embedding) VALUES (?, ?)
|
|
239
|
+
`);
|
|
240
|
+
const insertMany = this.db.transaction((items) => {
|
|
241
|
+
for (const item of items) {
|
|
242
|
+
const embeddingBlob = new Float32Array(item.embedding);
|
|
243
|
+
stmt.run(item.chunk_id, embeddingBlob);
|
|
244
|
+
}
|
|
245
|
+
});
|
|
246
|
+
insertMany(items);
|
|
247
|
+
}
|
|
248
|
+
deleteEmbeddingsByDocument(document_id) {
|
|
249
|
+
if (!this.db)
|
|
250
|
+
throw new Error('DB not initialized');
|
|
251
|
+
// Get all chunk IDs for this document
|
|
252
|
+
const chunks = this.db.prepare('SELECT id FROM chunks WHERE document_id = ?').all(document_id);
|
|
253
|
+
const chunkIds = chunks.map(c => c.id);
|
|
254
|
+
if (chunkIds.length === 0)
|
|
255
|
+
return;
|
|
256
|
+
const placeholders = chunkIds.map(() => '?').join(',');
|
|
257
|
+
this.db.prepare(`DELETE FROM embeddings WHERE chunk_id IN (${placeholders})`).run(...chunkIds);
|
|
258
|
+
}
|
|
259
|
+
// ─── Stats ─────────────────────────────────────────────────────────────────
|
|
260
|
+
getStats() {
|
|
261
|
+
if (!this.db)
|
|
262
|
+
throw new Error('DB not initialized');
|
|
263
|
+
const documents_total = this.db.prepare('SELECT COUNT(*) as cnt FROM documents').get().cnt;
|
|
264
|
+
const documents_indexed = this.db.prepare("SELECT COUNT(*) as cnt FROM documents WHERE status = 'indexed'").get().cnt;
|
|
265
|
+
const chunks_total = this.db.prepare('SELECT COUNT(*) as cnt FROM chunks').get().cnt;
|
|
266
|
+
return { documents_total, documents_indexed, chunks_total };
|
|
267
|
+
}
|
|
268
|
+
// ─── Cleanup ───────────────────────────────────────────────────────────────
|
|
269
|
+
close() {
|
|
270
|
+
if (this.db) {
|
|
271
|
+
this.db.close();
|
|
272
|
+
this.db = null;
|
|
273
|
+
}
|
|
274
|
+
}
|
|
275
|
+
// ─── Deserializers ─────────────────────────────────────────────────────────
|
|
276
|
+
deserializeDocument(row) {
|
|
277
|
+
return {
|
|
278
|
+
id: row.id,
|
|
279
|
+
filename: row.filename,
|
|
280
|
+
file_path: row.file_path,
|
|
281
|
+
file_hash: row.file_hash,
|
|
282
|
+
file_size: row.file_size,
|
|
283
|
+
status: row.status,
|
|
284
|
+
error_message: row.error_message ?? null,
|
|
285
|
+
chunk_count: row.chunk_count ?? 0,
|
|
286
|
+
created_at: row.created_at,
|
|
287
|
+
updated_at: row.updated_at,
|
|
288
|
+
};
|
|
289
|
+
}
|
|
290
|
+
deserializeChunk(row) {
|
|
291
|
+
return {
|
|
292
|
+
id: row.id,
|
|
293
|
+
document_id: row.document_id,
|
|
294
|
+
position: row.position,
|
|
295
|
+
content: row.content,
|
|
296
|
+
char_start: row.char_start,
|
|
297
|
+
char_end: row.char_end,
|
|
298
|
+
created_at: row.created_at,
|
|
299
|
+
};
|
|
300
|
+
}
|
|
301
|
+
}
|