@context-os/core 1.0.1 → 1.6.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/context.d.ts +10 -0
- package/dist/context.js +34 -0
- package/dist/index.d.ts +8 -9
- package/dist/index.js +9 -27
- package/dist/indexer.d.ts +47 -0
- package/dist/indexer.js +206 -0
- package/dist/services/database.d.ts +30 -0
- package/dist/services/database.js +147 -0
- package/dist/services/embedding.d.ts +24 -0
- package/dist/services/embedding.js +55 -0
- package/dist/services/intelligence.d.ts +22 -0
- package/dist/services/intelligence.js +125 -0
- package/dist/services/knowledge-graph.d.ts +27 -0
- package/dist/services/knowledge-graph.js +62 -0
- package/dist/services/sampling.d.ts +24 -0
- package/dist/services/sampling.js +62 -0
- package/dist/services/validation.d.ts +31 -0
- package/dist/services/validation.js +153 -0
- package/dist/services/watch.d.ts +16 -0
- package/dist/services/watch.js +74 -0
- package/dist/services/workspace.d.ts +18 -0
- package/dist/services/workspace.js +46 -0
- package/dist/tests/core-services.test.d.ts +1 -0
- package/dist/tests/core-services.test.js +49 -0
- package/dist/tests/federated-intelligence.test.d.ts +1 -0
- package/dist/tests/federated-intelligence.test.js +84 -0
- package/dist/tests/semantic-intelligence.test.d.ts +1 -0
- package/dist/tests/semantic-intelligence.test.js +31 -0
- package/dist/tests/sqlite-hybrid.test.d.ts +1 -0
- package/dist/tests/sqlite-hybrid.test.js +51 -0
- package/package.json +14 -3
|
@@ -0,0 +1,10 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Discovers the workspace root by looking for root/soul.md in parent directories.
|
|
3
|
+
*/
|
|
4
|
+
export declare function findWorkspaceRoot(): string;
|
|
5
|
+
export declare const workspaceRoot: string;
|
|
6
|
+
export declare function getWorkspaceRoot(): string;
|
|
7
|
+
/**
|
|
8
|
+
* Standard ContextOS "Buckets" for security isolation.
|
|
9
|
+
*/
|
|
10
|
+
export declare const ALLOWED_BUCKETS: string[];
|
package/dist/context.js
ADDED
|
@@ -0,0 +1,34 @@
|
|
|
1
|
+
import path from 'node:path';
|
|
2
|
+
import fs from 'node:fs';
|
|
3
|
+
/**
|
|
4
|
+
* Discovers the workspace root by looking for root/soul.md in parent directories.
|
|
5
|
+
*/
|
|
6
|
+
export function findWorkspaceRoot() {
|
|
7
|
+
let current = process.cwd();
|
|
8
|
+
const root = path.parse(current).root;
|
|
9
|
+
while (current !== root) {
|
|
10
|
+
if (fs.existsSync(path.join(current, "root", "soul.md"))) {
|
|
11
|
+
return fs.realpathSync(current);
|
|
12
|
+
}
|
|
13
|
+
current = path.dirname(current);
|
|
14
|
+
}
|
|
15
|
+
return fs.realpathSync(process.cwd()); // Fallback to CWD
|
|
16
|
+
}
|
|
17
|
+
export const workspaceRoot = findWorkspaceRoot();
|
|
18
|
+
export function getWorkspaceRoot() {
|
|
19
|
+
return workspaceRoot;
|
|
20
|
+
}
|
|
21
|
+
/**
|
|
22
|
+
* Standard ContextOS "Buckets" for security isolation.
|
|
23
|
+
*/
|
|
24
|
+
export const ALLOWED_BUCKETS = [
|
|
25
|
+
"projects",
|
|
26
|
+
"knowledge",
|
|
27
|
+
"schemas",
|
|
28
|
+
"archive",
|
|
29
|
+
"log",
|
|
30
|
+
"orgs",
|
|
31
|
+
"root",
|
|
32
|
+
"docs",
|
|
33
|
+
"prompts"
|
|
34
|
+
];
|
package/dist/index.d.ts
CHANGED
|
@@ -1,12 +1,11 @@
|
|
|
1
|
-
|
|
2
|
-
*
|
|
3
|
-
|
|
4
|
-
export
|
|
5
|
-
export
|
|
6
|
-
|
|
7
|
-
*
|
|
8
|
-
|
|
9
|
-
export declare const ALLOWED_BUCKETS: string[];
|
|
1
|
+
export * from './context.js';
|
|
2
|
+
export * from './indexer.js';
|
|
3
|
+
export * from './services/intelligence.js';
|
|
4
|
+
export * from './services/validation.js';
|
|
5
|
+
export * from './services/workspace.js';
|
|
6
|
+
export * from './services/knowledge-graph.js';
|
|
7
|
+
export * from './services/sampling.js';
|
|
8
|
+
export * from './services/watch.js';
|
|
10
9
|
/**
|
|
11
10
|
* Validates that a path is within the workspace root and inside an allowed bucket.
|
|
12
11
|
*/
|
package/dist/index.js
CHANGED
|
@@ -1,33 +1,15 @@
|
|
|
1
1
|
import path from 'node:path';
|
|
2
2
|
import fs from 'node:fs';
|
|
3
3
|
import { spawn } from 'node:child_process';
|
|
4
|
-
|
|
5
|
-
*
|
|
6
|
-
|
|
7
|
-
export
|
|
8
|
-
|
|
9
|
-
|
|
10
|
-
|
|
11
|
-
|
|
12
|
-
|
|
13
|
-
}
|
|
14
|
-
current = path.dirname(current);
|
|
15
|
-
}
|
|
16
|
-
return fs.realpathSync(process.cwd()); // Fallback to CWD
|
|
17
|
-
}
|
|
18
|
-
export const workspaceRoot = findWorkspaceRoot();
|
|
19
|
-
/**
|
|
20
|
-
* Standard ContextOS "Buckets" for security isolation.
|
|
21
|
-
*/
|
|
22
|
-
export const ALLOWED_BUCKETS = [
|
|
23
|
-
"projects",
|
|
24
|
-
"knowledge",
|
|
25
|
-
"schemas",
|
|
26
|
-
"archive",
|
|
27
|
-
"log",
|
|
28
|
-
"orgs",
|
|
29
|
-
"root"
|
|
30
|
-
];
|
|
4
|
+
import { workspaceRoot, ALLOWED_BUCKETS } from './context.js';
|
|
5
|
+
export * from './context.js';
|
|
6
|
+
export * from './indexer.js';
|
|
7
|
+
export * from './services/intelligence.js';
|
|
8
|
+
export * from './services/validation.js';
|
|
9
|
+
export * from './services/workspace.js';
|
|
10
|
+
export * from './services/knowledge-graph.js';
|
|
11
|
+
export * from './services/sampling.js';
|
|
12
|
+
export * from './services/watch.js';
|
|
31
13
|
/**
|
|
32
14
|
* Validates that a path is within the workspace root and inside an allowed bucket.
|
|
33
15
|
*/
|
|
@@ -0,0 +1,47 @@
|
|
|
1
|
+
export interface IndexRecord {
|
|
2
|
+
path: string;
|
|
3
|
+
title: string;
|
|
4
|
+
tags: string[];
|
|
5
|
+
status: string;
|
|
6
|
+
lastModified: number;
|
|
7
|
+
excerpt: string;
|
|
8
|
+
content: string;
|
|
9
|
+
mentions: string[];
|
|
10
|
+
}
|
|
11
|
+
export interface ContextIndex {
|
|
12
|
+
version: string;
|
|
13
|
+
lastUpdated: number;
|
|
14
|
+
records: IndexRecord[];
|
|
15
|
+
provider?: string;
|
|
16
|
+
}
|
|
17
|
+
export declare class ContextIndexer {
|
|
18
|
+
private indexPath;
|
|
19
|
+
private previousIndex;
|
|
20
|
+
private dbService;
|
|
21
|
+
private embeddingService;
|
|
22
|
+
constructor();
|
|
23
|
+
/**
|
|
24
|
+
* Recursively indexes the workspace metadata.
|
|
25
|
+
* Implements Incremental logic: only parses files changed since last index.
|
|
26
|
+
*/
|
|
27
|
+
reindex(options?: {
|
|
28
|
+
force?: boolean;
|
|
29
|
+
}): Promise<ContextIndex>;
|
|
30
|
+
private scanDirectory;
|
|
31
|
+
/**
|
|
32
|
+
* Public method to index or update a single file.
|
|
33
|
+
* Synchronizes both SQLite and the JSON index.
|
|
34
|
+
*/
|
|
35
|
+
indexFile(filePath: string, mtimeMs?: number): Promise<IndexRecord | null>;
|
|
36
|
+
/**
|
|
37
|
+
* Public method to remove a file from all index layers.
|
|
38
|
+
*/
|
|
39
|
+
removeFile(relativePath: string): Promise<void>;
|
|
40
|
+
private updateJsonIndex;
|
|
41
|
+
private parseMarkdownFile;
|
|
42
|
+
/**
|
|
43
|
+
* Searches the local index.
|
|
44
|
+
*/
|
|
45
|
+
search(query: string): Promise<IndexRecord[]>;
|
|
46
|
+
}
|
|
47
|
+
export declare const globalIndexer: ContextIndexer;
|
package/dist/indexer.js
ADDED
|
@@ -0,0 +1,206 @@
|
|
|
1
|
+
import path from 'node:path';
|
|
2
|
+
import fs from 'fs-extra';
|
|
3
|
+
import { workspaceRoot, ALLOWED_BUCKETS } from './context.js';
|
|
4
|
+
import { validationService } from './services/validation.js';
|
|
5
|
+
import { DatabaseService } from './services/database.js';
|
|
6
|
+
import { EmbeddingService } from './services/embedding.js';
|
|
7
|
+
export class ContextIndexer {
|
|
8
|
+
indexPath;
|
|
9
|
+
previousIndex = null;
|
|
10
|
+
dbService;
|
|
11
|
+
embeddingService;
|
|
12
|
+
constructor() {
|
|
13
|
+
this.indexPath = path.join(workspaceRoot, '.context-index.json');
|
|
14
|
+
this.dbService = new DatabaseService(workspaceRoot);
|
|
15
|
+
// Load API key if present for Elite mode
|
|
16
|
+
const geminiKey = process.env.GEMINI_API_KEY;
|
|
17
|
+
this.embeddingService = new EmbeddingService(geminiKey);
|
|
18
|
+
}
|
|
19
|
+
/**
|
|
20
|
+
* Recursively indexes the workspace metadata.
|
|
21
|
+
* Implements Incremental logic: only parses files changed since last index.
|
|
22
|
+
*/
|
|
23
|
+
async reindex(options = {}) {
|
|
24
|
+
// 1. Load previous index for incremental comparison
|
|
25
|
+
if (!options.force && await fs.pathExists(this.indexPath)) {
|
|
26
|
+
try {
|
|
27
|
+
this.previousIndex = await fs.readJSON(this.indexPath);
|
|
28
|
+
// Upgrade from v1.2.x to v1.3.0 forces a semantic re-index
|
|
29
|
+
if (this.previousIndex && this.previousIndex.version !== '1.3.0') {
|
|
30
|
+
this.previousIndex = null;
|
|
31
|
+
}
|
|
32
|
+
}
|
|
33
|
+
catch (e) {
|
|
34
|
+
this.previousIndex = null;
|
|
35
|
+
}
|
|
36
|
+
}
|
|
37
|
+
const index = {
|
|
38
|
+
version: '1.3.0',
|
|
39
|
+
lastUpdated: Date.now(),
|
|
40
|
+
records: [],
|
|
41
|
+
provider: await this.embeddingService.getProviderName()
|
|
42
|
+
};
|
|
43
|
+
const existingRecordMap = new Map();
|
|
44
|
+
if (this.previousIndex) {
|
|
45
|
+
this.previousIndex.records.forEach(r => existingRecordMap.set(r.path, r));
|
|
46
|
+
}
|
|
47
|
+
// 2. Scan buckets incrementally
|
|
48
|
+
for (const bucket of ALLOWED_BUCKETS) {
|
|
49
|
+
const bucketPath = path.join(workspaceRoot, bucket);
|
|
50
|
+
if (await fs.pathExists(bucketPath)) {
|
|
51
|
+
await this.scanDirectory(bucketPath, index.records, existingRecordMap);
|
|
52
|
+
}
|
|
53
|
+
}
|
|
54
|
+
// 3. Atomic persistence
|
|
55
|
+
const tempPath = `${this.indexPath}.tmp`;
|
|
56
|
+
await fs.writeJSON(tempPath, index, { spaces: 2 });
|
|
57
|
+
await fs.move(tempPath, this.indexPath, { overwrite: true });
|
|
58
|
+
return index;
|
|
59
|
+
}
|
|
60
|
+
async scanDirectory(dir, records, existingRecordMap) {
|
|
61
|
+
const entries = await fs.readdir(dir, { withFileTypes: true });
|
|
62
|
+
for (const entry of entries) {
|
|
63
|
+
const fullPath = path.join(dir, entry.name);
|
|
64
|
+
if (entry.isDirectory()) {
|
|
65
|
+
await this.scanDirectory(fullPath, records, existingRecordMap);
|
|
66
|
+
continue;
|
|
67
|
+
}
|
|
68
|
+
if (entry.name.endsWith('.md')) {
|
|
69
|
+
const relativePath = path.relative(workspaceRoot, fullPath);
|
|
70
|
+
const stats = await fs.stat(fullPath);
|
|
71
|
+
const existing = existingRecordMap.get(relativePath);
|
|
72
|
+
// Incremental Check: Skip parsing if mtime matches
|
|
73
|
+
if (existing && existing.lastModified === stats.mtimeMs) {
|
|
74
|
+
records.push(existing);
|
|
75
|
+
continue;
|
|
76
|
+
}
|
|
77
|
+
const record = await this.indexFile(fullPath, stats.mtimeMs);
|
|
78
|
+
if (record) {
|
|
79
|
+
records.push(record);
|
|
80
|
+
}
|
|
81
|
+
}
|
|
82
|
+
}
|
|
83
|
+
}
|
|
84
|
+
/**
|
|
85
|
+
* Public method to index or update a single file.
|
|
86
|
+
* Synchronizes both SQLite and the JSON index.
|
|
87
|
+
*/
|
|
88
|
+
async indexFile(filePath, mtimeMs) {
|
|
89
|
+
if (!mtimeMs) {
|
|
90
|
+
const stats = await fs.stat(filePath);
|
|
91
|
+
mtimeMs = stats.mtimeMs;
|
|
92
|
+
}
|
|
93
|
+
const record = await this.parseMarkdownFile(filePath, mtimeMs);
|
|
94
|
+
if (record) {
|
|
95
|
+
// 1. SQLite Upsert + Semantic Generation
|
|
96
|
+
const { id } = this.dbService.upsertDocument({
|
|
97
|
+
path: record.path,
|
|
98
|
+
title: record.title,
|
|
99
|
+
content: record.content,
|
|
100
|
+
excerpt: record.excerpt,
|
|
101
|
+
mtime: record.lastModified,
|
|
102
|
+
metadata: JSON.stringify(record.tags)
|
|
103
|
+
});
|
|
104
|
+
// Generate Vector for Semantic Layer
|
|
105
|
+
const embedding = await this.embeddingService.generate(`${record.title}\n${record.excerpt}\n${record.content}`);
|
|
106
|
+
this.dbService.upsertVector(id, embedding, await this.embeddingService.getProviderName());
|
|
107
|
+
// 2. Update JSON Index (if it exists)
|
|
108
|
+
await this.updateJsonIndex(record);
|
|
109
|
+
}
|
|
110
|
+
return record;
|
|
111
|
+
}
|
|
112
|
+
/**
|
|
113
|
+
* Public method to remove a file from all index layers.
|
|
114
|
+
*/
|
|
115
|
+
async removeFile(relativePath) {
|
|
116
|
+
// 1. SQLite Cleanup
|
|
117
|
+
this.dbService.removeDocument(relativePath);
|
|
118
|
+
// 2. JSON Cleanup
|
|
119
|
+
if (await fs.pathExists(this.indexPath)) {
|
|
120
|
+
try {
|
|
121
|
+
const index = await fs.readJSON(this.indexPath);
|
|
122
|
+
index.records = index.records.filter(r => r.path !== relativePath);
|
|
123
|
+
index.lastUpdated = Date.now();
|
|
124
|
+
await fs.writeJSON(this.indexPath, index, { spaces: 2 });
|
|
125
|
+
}
|
|
126
|
+
catch (e) {
|
|
127
|
+
// Ignore parsing errors
|
|
128
|
+
}
|
|
129
|
+
}
|
|
130
|
+
}
|
|
131
|
+
async updateJsonIndex(record) {
|
|
132
|
+
if (!(await fs.pathExists(this.indexPath)))
|
|
133
|
+
return;
|
|
134
|
+
try {
|
|
135
|
+
const index = await fs.readJSON(this.indexPath);
|
|
136
|
+
const existingIndex = index.records.findIndex(r => r.path === record.path);
|
|
137
|
+
if (existingIndex >= 0) {
|
|
138
|
+
index.records[existingIndex] = record;
|
|
139
|
+
}
|
|
140
|
+
else {
|
|
141
|
+
index.records.push(record);
|
|
142
|
+
}
|
|
143
|
+
index.lastUpdated = Date.now();
|
|
144
|
+
await fs.writeJSON(this.indexPath, index, { spaces: 2 });
|
|
145
|
+
}
|
|
146
|
+
catch (e) {
|
|
147
|
+
// If JSON is corrupt, it'll be fixed on next full reindex
|
|
148
|
+
}
|
|
149
|
+
}
|
|
150
|
+
async parseMarkdownFile(filePath, mtimeMs) {
|
|
151
|
+
try {
|
|
152
|
+
const content = await fs.readFile(filePath, 'utf8');
|
|
153
|
+
const relativePath = path.relative(workspaceRoot, filePath);
|
|
154
|
+
// Use Unified Validation Service for parsing
|
|
155
|
+
const metadata = validationService.extractMetadata(content);
|
|
156
|
+
// Extract title (First H1 or filename)
|
|
157
|
+
let title = metadata.title || '';
|
|
158
|
+
if (!title) {
|
|
159
|
+
const h1Match = content.match(/^#\s+(.*)/m);
|
|
160
|
+
title = h1Match ? h1Match[1] : path.basename(filePath, '.md');
|
|
161
|
+
}
|
|
162
|
+
// Generate Excerpt
|
|
163
|
+
const body = content.replace(/^---[\s\S]*?---/, '').trim();
|
|
164
|
+
const excerpt = body.split(/\n\s*\n/)[0]
|
|
165
|
+
.slice(0, 200)
|
|
166
|
+
.replace(/\r?\n/g, ' ')
|
|
167
|
+
.trim();
|
|
168
|
+
// Entity Extraction
|
|
169
|
+
const mentions = Array.from(body.matchAll(/@(\w+)/g)).map(m => m[1]);
|
|
170
|
+
const bodyTags = Array.from(body.matchAll(/#(\w+)/g)).map(m => m[1]);
|
|
171
|
+
const tags = Array.from(new Set([
|
|
172
|
+
...(Array.isArray(metadata.Tags) ? metadata.Tags : (Array.isArray(metadata.tags) ? metadata.tags : [])),
|
|
173
|
+
...bodyTags
|
|
174
|
+
]));
|
|
175
|
+
return {
|
|
176
|
+
path: relativePath,
|
|
177
|
+
title,
|
|
178
|
+
tags,
|
|
179
|
+
status: metadata.status || 'active',
|
|
180
|
+
lastModified: mtimeMs,
|
|
181
|
+
excerpt,
|
|
182
|
+
content: body,
|
|
183
|
+
mentions
|
|
184
|
+
};
|
|
185
|
+
}
|
|
186
|
+
catch (error) {
|
|
187
|
+
console.error(`Failed to index ${filePath}:`, error);
|
|
188
|
+
return null;
|
|
189
|
+
}
|
|
190
|
+
}
|
|
191
|
+
/**
|
|
192
|
+
* Searches the local index.
|
|
193
|
+
*/
|
|
194
|
+
async search(query) {
|
|
195
|
+
if (!(await fs.pathExists(this.indexPath))) {
|
|
196
|
+
return [];
|
|
197
|
+
}
|
|
198
|
+
const index = await fs.readJSON(this.indexPath);
|
|
199
|
+
const lowerQuery = query.toLowerCase();
|
|
200
|
+
return index.records.filter(record => record.title.toLowerCase().includes(lowerQuery) ||
|
|
201
|
+
record.tags.some(t => t.toLowerCase().includes(lowerQuery)) ||
|
|
202
|
+
record.excerpt.toLowerCase().includes(lowerQuery) ||
|
|
203
|
+
record.path.toLowerCase().includes(lowerQuery));
|
|
204
|
+
}
|
|
205
|
+
}
|
|
206
|
+
export const globalIndexer = new ContextIndexer();
|
|
@@ -0,0 +1,30 @@
|
|
|
1
|
+
import Database from 'better-sqlite3';
|
|
2
|
+
export interface DBRecord {
|
|
3
|
+
id?: number;
|
|
4
|
+
path: string;
|
|
5
|
+
title: string;
|
|
6
|
+
content: string;
|
|
7
|
+
excerpt: string;
|
|
8
|
+
mtime: number;
|
|
9
|
+
metadata: string;
|
|
10
|
+
}
|
|
11
|
+
export declare class DatabaseService {
|
|
12
|
+
private db;
|
|
13
|
+
private dbPath;
|
|
14
|
+
constructor(workspaceRoot: string);
|
|
15
|
+
private initializeSchema;
|
|
16
|
+
upsertDocument(record: DBRecord): {
|
|
17
|
+
id: number;
|
|
18
|
+
};
|
|
19
|
+
upsertVector(docId: number, embedding: Float32Array, provider: string): Database.RunResult;
|
|
20
|
+
searchHybrid(queryEmbedding: Float32Array, queryText: string, limit?: number): {
|
|
21
|
+
semanticResults: unknown[];
|
|
22
|
+
keywordResults: unknown[];
|
|
23
|
+
};
|
|
24
|
+
getDocumentByPath(filePath: string): DBRecord | undefined;
|
|
25
|
+
removeDocument(filePath: string): Database.RunResult;
|
|
26
|
+
getAllDocuments(): DBRecord[];
|
|
27
|
+
getVectorForDocument(docId: number): Float32Array<ArrayBufferLike> | undefined;
|
|
28
|
+
searchSemantic(queryEmbedding: Float32Array, limit?: number): any[];
|
|
29
|
+
close(): void;
|
|
30
|
+
}
|
|
@@ -0,0 +1,147 @@
|
|
|
1
|
+
import Database from 'better-sqlite3';
|
|
2
|
+
import * as sqliteVec from 'sqlite-vec';
|
|
3
|
+
import path from 'path';
|
|
4
|
+
import fs from 'fs-extra';
|
|
5
|
+
export class DatabaseService {
|
|
6
|
+
db;
|
|
7
|
+
dbPath;
|
|
8
|
+
constructor(workspaceRoot) {
|
|
9
|
+
const dbDir = path.join(workspaceRoot, '.context-db');
|
|
10
|
+
fs.ensureDirSync(dbDir);
|
|
11
|
+
this.dbPath = path.join(dbDir, 'context.db');
|
|
12
|
+
this.db = new Database(this.dbPath);
|
|
13
|
+
// Load sqlite-vec extension
|
|
14
|
+
sqliteVec.load(this.db);
|
|
15
|
+
this.initializeSchema();
|
|
16
|
+
}
|
|
17
|
+
initializeSchema() {
|
|
18
|
+
// 1. Documents Table
|
|
19
|
+
this.db.exec(`
|
|
20
|
+
CREATE TABLE IF NOT EXISTS documents (
|
|
21
|
+
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
|
22
|
+
path TEXT UNIQUE,
|
|
23
|
+
title TEXT,
|
|
24
|
+
content TEXT,
|
|
25
|
+
excerpt TEXT,
|
|
26
|
+
mtime INTEGER,
|
|
27
|
+
metadata TEXT
|
|
28
|
+
);
|
|
29
|
+
`);
|
|
30
|
+
// 2. FTS5 Virtual Table for Keyword Search
|
|
31
|
+
this.db.exec(`
|
|
32
|
+
CREATE VIRTUAL TABLE IF NOT EXISTS fts_documents USING fts5(
|
|
33
|
+
title,
|
|
34
|
+
content,
|
|
35
|
+
content='documents',
|
|
36
|
+
content_rowid='id'
|
|
37
|
+
);
|
|
38
|
+
`);
|
|
39
|
+
// 3. Triggers to keep FTS in sync
|
|
40
|
+
this.db.exec(`
|
|
41
|
+
CREATE TRIGGER IF NOT EXISTS documents_ai AFTER INSERT ON documents BEGIN
|
|
42
|
+
INSERT INTO fts_documents(rowid, title, content) VALUES (new.id, new.title, new.content);
|
|
43
|
+
END;
|
|
44
|
+
CREATE TRIGGER IF NOT EXISTS documents_ad AFTER DELETE ON documents BEGIN
|
|
45
|
+
INSERT INTO fts_documents(fts_documents, rowid, title, content) VALUES('delete', old.id, old.title, old.content);
|
|
46
|
+
END;
|
|
47
|
+
CREATE TRIGGER IF NOT EXISTS documents_au AFTER UPDATE ON documents BEGIN
|
|
48
|
+
INSERT INTO fts_documents(fts_documents, rowid, title, content) VALUES('delete', old.id, old.title, old.content);
|
|
49
|
+
INSERT INTO fts_documents(rowid, title, content) VALUES (new.id, new.title, new.content);
|
|
50
|
+
END;
|
|
51
|
+
`);
|
|
52
|
+
// 4. Vector Table (sqlite-vec)
|
|
53
|
+
// We use a fixed 384 dimensions for the local model 'all-MiniLM-L6-v2'
|
|
54
|
+
// But Gemini uses 768. To remain hybrid, we support multiple vector tables or variable columns.
|
|
55
|
+
// For simplicity, we'll use 'vec_documents' with 768 dimensions (Gemini compat)
|
|
56
|
+
// and pad local vectors if needed, or better: just detect which one we have.
|
|
57
|
+
this.db.exec(`
|
|
58
|
+
CREATE TABLE IF NOT EXISTS vec_documents (
|
|
59
|
+
id INTEGER PRIMARY KEY REFERENCES documents(id) ON DELETE CASCADE,
|
|
60
|
+
embedding BLOB, -- Float32Array
|
|
61
|
+
provider TEXT -- 'local' or 'gemini'
|
|
62
|
+
);
|
|
63
|
+
`);
|
|
64
|
+
}
|
|
65
|
+
upsertDocument(record) {
|
|
66
|
+
const stmt = this.db.prepare(`
|
|
67
|
+
INSERT INTO documents (path, title, content, excerpt, mtime, metadata)
|
|
68
|
+
VALUES (@path, @title, @content, @excerpt, @mtime, @metadata)
|
|
69
|
+
ON CONFLICT(path) DO UPDATE SET
|
|
70
|
+
title = excluded.title,
|
|
71
|
+
content = excluded.content,
|
|
72
|
+
excerpt = excluded.excerpt,
|
|
73
|
+
mtime = excluded.mtime,
|
|
74
|
+
metadata = excluded.metadata
|
|
75
|
+
RETURNING id
|
|
76
|
+
`);
|
|
77
|
+
return stmt.get(record);
|
|
78
|
+
}
|
|
79
|
+
upsertVector(docId, embedding, provider) {
|
|
80
|
+
const stmt = this.db.prepare(`
|
|
81
|
+
INSERT INTO vec_documents (id, embedding, provider)
|
|
82
|
+
VALUES (?, ?, ?)
|
|
83
|
+
ON CONFLICT(id) DO UPDATE SET
|
|
84
|
+
embedding = excluded.embedding,
|
|
85
|
+
provider = excluded.provider
|
|
86
|
+
`);
|
|
87
|
+
// sqlite-vec expects raw buffer
|
|
88
|
+
return stmt.run(docId, Buffer.from(embedding.buffer), provider);
|
|
89
|
+
}
|
|
90
|
+
searchHybrid(queryEmbedding, queryText, limit = 10) {
|
|
91
|
+
// This is a simplified hybrid search.
|
|
92
|
+
// It finds top semantic matches and top keyword matches.
|
|
93
|
+
// 1. Semantic Search
|
|
94
|
+
const semanticStmt = this.db.prepare(`
|
|
95
|
+
SELECT
|
|
96
|
+
d.path, d.title, d.excerpt,
|
|
97
|
+
vec_distance_cosine(v.embedding, ?) as distance
|
|
98
|
+
FROM vec_documents v
|
|
99
|
+
JOIN documents d ON v.id = d.id
|
|
100
|
+
ORDER BY distance ASC
|
|
101
|
+
LIMIT ?
|
|
102
|
+
`);
|
|
103
|
+
const semanticResults = semanticStmt.all(Buffer.from(queryEmbedding.buffer), limit);
|
|
104
|
+
// 2. Keyword Search (FTS5)
|
|
105
|
+
const keywordStmt = this.db.prepare(`
|
|
106
|
+
SELECT
|
|
107
|
+
d.path, d.title, d.excerpt,
|
|
108
|
+
rank as fts_score
|
|
109
|
+
FROM fts_documents f
|
|
110
|
+
JOIN documents d ON f.rowid = d.id
|
|
111
|
+
WHERE fts_documents MATCH ?
|
|
112
|
+
ORDER BY rank
|
|
113
|
+
LIMIT ?
|
|
114
|
+
`);
|
|
115
|
+
const keywordResults = keywordStmt.all(queryText, limit);
|
|
116
|
+
return { semanticResults, keywordResults };
|
|
117
|
+
}
|
|
118
|
+
getDocumentByPath(filePath) {
|
|
119
|
+
return this.db.prepare('SELECT * FROM documents WHERE path = ?').get(filePath);
|
|
120
|
+
}
|
|
121
|
+
removeDocument(filePath) {
|
|
122
|
+
const stmt = this.db.prepare('DELETE FROM documents WHERE path = ?');
|
|
123
|
+
return stmt.run(filePath);
|
|
124
|
+
}
|
|
125
|
+
getAllDocuments() {
|
|
126
|
+
return this.db.prepare('SELECT * FROM documents').all();
|
|
127
|
+
}
|
|
128
|
+
getVectorForDocument(docId) {
|
|
129
|
+
const row = this.db.prepare('SELECT embedding FROM vec_documents WHERE id = ?').get(docId);
|
|
130
|
+
return row ? new Float32Array(row.embedding.buffer, row.embedding.byteOffset, row.embedding.byteLength / 4) : undefined;
|
|
131
|
+
}
|
|
132
|
+
searchSemantic(queryEmbedding, limit = 10) {
|
|
133
|
+
const stmt = this.db.prepare(`
|
|
134
|
+
SELECT
|
|
135
|
+
d.path, d.title, d.excerpt,
|
|
136
|
+
vec_distance_cosine(v.embedding, ?) as distance
|
|
137
|
+
FROM vec_documents v
|
|
138
|
+
JOIN documents d ON v.id = d.id
|
|
139
|
+
ORDER BY distance ASC
|
|
140
|
+
LIMIT ?
|
|
141
|
+
`);
|
|
142
|
+
return stmt.all(Buffer.from(queryEmbedding.buffer), limit);
|
|
143
|
+
}
|
|
144
|
+
close() {
|
|
145
|
+
this.db.close();
|
|
146
|
+
}
|
|
147
|
+
}
|
|
@@ -0,0 +1,24 @@
|
|
|
1
|
+
export interface EmbeddingProvider {
|
|
2
|
+
name: string;
|
|
3
|
+
generate(text: string): Promise<Float32Array>;
|
|
4
|
+
dimension: number;
|
|
5
|
+
}
|
|
6
|
+
export declare class TransformersProvider implements EmbeddingProvider {
|
|
7
|
+
name: string;
|
|
8
|
+
dimension: number;
|
|
9
|
+
private extractor;
|
|
10
|
+
generate(text: string): Promise<Float32Array>;
|
|
11
|
+
}
|
|
12
|
+
export declare class GeminiProvider implements EmbeddingProvider {
|
|
13
|
+
name: string;
|
|
14
|
+
dimension: number;
|
|
15
|
+
private apiKey;
|
|
16
|
+
constructor(apiKey: string);
|
|
17
|
+
generate(text: string): Promise<Float32Array>;
|
|
18
|
+
}
|
|
19
|
+
export declare class EmbeddingService {
|
|
20
|
+
private provider;
|
|
21
|
+
constructor(apiKey?: string);
|
|
22
|
+
getProviderName(): Promise<string>;
|
|
23
|
+
generate(text: string): Promise<Float32Array>;
|
|
24
|
+
}
|
|
@@ -0,0 +1,55 @@
|
|
|
1
|
+
import { pipeline } from '@xenova/transformers';
|
|
2
|
+
export class TransformersProvider {
|
|
3
|
+
name = 'local';
|
|
4
|
+
dimension = 384;
|
|
5
|
+
extractor = null;
|
|
6
|
+
async generate(text) {
|
|
7
|
+
if (!this.extractor) {
|
|
8
|
+
this.extractor = await pipeline('feature-extraction', 'Xenova/all-MiniLM-L6-v2');
|
|
9
|
+
}
|
|
10
|
+
const output = await this.extractor(text, { pooling: 'mean', normalize: true });
|
|
11
|
+
return new Float32Array(output.data);
|
|
12
|
+
}
|
|
13
|
+
}
|
|
14
|
+
export class GeminiProvider {
|
|
15
|
+
name = 'gemini';
|
|
16
|
+
dimension = 768; // text-embedding-004 standard
|
|
17
|
+
apiKey;
|
|
18
|
+
constructor(apiKey) {
|
|
19
|
+
this.apiKey = apiKey;
|
|
20
|
+
}
|
|
21
|
+
async generate(text) {
|
|
22
|
+
// Implementation for Gemini Embedding API
|
|
23
|
+
// For now, we provide the structure.
|
|
24
|
+
// We'll use a fetch call to the Gemini API endpoint.
|
|
25
|
+
const response = await fetch(`https://generativelanguage.googleapis.com/v1beta/models/text-embedding-004:embedContent?key=${this.apiKey}`, {
|
|
26
|
+
method: 'POST',
|
|
27
|
+
headers: { 'Content-Type': 'application/json' },
|
|
28
|
+
body: JSON.stringify({
|
|
29
|
+
content: { parts: [{ text }] }
|
|
30
|
+
})
|
|
31
|
+
});
|
|
32
|
+
const data = await response.json();
|
|
33
|
+
if (!data.embedding) {
|
|
34
|
+
throw new Error(`Gemini Embedding Failed: ${JSON.stringify(data)}`);
|
|
35
|
+
}
|
|
36
|
+
return new Float32Array(data.embedding.values);
|
|
37
|
+
}
|
|
38
|
+
}
|
|
39
|
+
export class EmbeddingService {
|
|
40
|
+
provider;
|
|
41
|
+
constructor(apiKey) {
|
|
42
|
+
if (apiKey) {
|
|
43
|
+
this.provider = new GeminiProvider(apiKey);
|
|
44
|
+
}
|
|
45
|
+
else {
|
|
46
|
+
this.provider = new TransformersProvider();
|
|
47
|
+
}
|
|
48
|
+
}
|
|
49
|
+
async getProviderName() {
|
|
50
|
+
return this.provider.name;
|
|
51
|
+
}
|
|
52
|
+
async generate(text) {
|
|
53
|
+
return this.provider.generate(text);
|
|
54
|
+
}
|
|
55
|
+
}
|
|
@@ -0,0 +1,22 @@
|
|
|
1
|
+
export interface SearchResult {
|
|
2
|
+
path: string;
|
|
3
|
+
title: string;
|
|
4
|
+
tags: string[];
|
|
5
|
+
excerpt: string;
|
|
6
|
+
score?: number;
|
|
7
|
+
type: 'index' | 'deep' | 'semantic' | 'hybrid';
|
|
8
|
+
}
|
|
9
|
+
export declare class IntelligenceService {
|
|
10
|
+
private miniSearch;
|
|
11
|
+
private dbService;
|
|
12
|
+
private embeddingService;
|
|
13
|
+
private getIndex;
|
|
14
|
+
/**
|
|
15
|
+
* Hybrid Search: Semantic (sqlite-vec) + Keyword (FTS5) -> Lite Index (MiniSearch) -> Grep Fallback
|
|
16
|
+
*/
|
|
17
|
+
search(query: string, options?: {
|
|
18
|
+
deep?: boolean;
|
|
19
|
+
}): Promise<SearchResult[]>;
|
|
20
|
+
extract(text: string): Promise<string[]>;
|
|
21
|
+
}
|
|
22
|
+
export declare const intelligenceService: IntelligenceService;
|