@staticn0va/wigolo 0.4.0 → 0.5.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/SKILL.md +97 -50
- package/dist/agent/executor.d.ts +13 -0
- package/dist/agent/executor.d.ts.map +1 -0
- package/dist/agent/executor.js +128 -0
- package/dist/agent/executor.js.map +1 -0
- package/dist/agent/pipeline.d.ts +5 -0
- package/dist/agent/pipeline.d.ts.map +1 -0
- package/dist/agent/pipeline.js +198 -0
- package/dist/agent/pipeline.js.map +1 -0
- package/dist/agent/planner.d.ts +9 -0
- package/dist/agent/planner.d.ts.map +1 -0
- package/dist/agent/planner.js +190 -0
- package/dist/agent/planner.js.map +1 -0
- package/dist/cache/db.d.ts.map +1 -1
- package/dist/cache/db.js +32 -0
- package/dist/cache/db.js.map +1 -1
- package/dist/cache/store.d.ts +14 -0
- package/dist/cache/store.d.ts.map +1 -1
- package/dist/cache/store.js +69 -0
- package/dist/cache/store.js.map +1 -1
- package/dist/cli/warmup.d.ts +4 -0
- package/dist/cli/warmup.d.ts.map +1 -1
- package/dist/cli/warmup.js +58 -0
- package/dist/cli/warmup.js.map +1 -1
- package/dist/config.d.ts +8 -0
- package/dist/config.d.ts.map +1 -1
- package/dist/config.js +8 -0
- package/dist/config.js.map +1 -1
- package/dist/embedding/embed.d.ts +19 -0
- package/dist/embedding/embed.d.ts.map +1 -0
- package/dist/embedding/embed.js +131 -0
- package/dist/embedding/embed.js.map +1 -0
- package/dist/embedding/key-terms.d.ts +12 -0
- package/dist/embedding/key-terms.d.ts.map +1 -0
- package/dist/embedding/key-terms.js +138 -0
- package/dist/embedding/key-terms.js.map +1 -0
- package/dist/embedding/subprocess.d.ts +31 -0
- package/dist/embedding/subprocess.d.ts.map +1 -0
- package/dist/embedding/subprocess.js +213 -0
- package/dist/embedding/subprocess.js.map +1 -0
- package/dist/embedding/vector-index.d.ts +26 -0
- package/dist/embedding/vector-index.d.ts.map +1 -0
- package/dist/embedding/vector-index.js +78 -0
- package/dist/embedding/vector-index.js.map +1 -0
- package/dist/fetch/browser-pool.d.ts.map +1 -1
- package/dist/fetch/browser-pool.js +61 -0
- package/dist/fetch/browser-pool.js.map +1 -1
- package/dist/fetch/browser-types.js +1 -1
- package/dist/fetch/browser-types.js.map +1 -1
- package/dist/fetch/lightpanda.d.ts +28 -0
- package/dist/fetch/lightpanda.d.ts.map +1 -0
- package/dist/fetch/lightpanda.js +177 -0
- package/dist/fetch/lightpanda.js.map +1 -0
- package/dist/instructions.d.ts +9 -6
- package/dist/instructions.d.ts.map +1 -1
- package/dist/instructions.js +95 -20
- package/dist/instructions.js.map +1 -1
- package/dist/logger.d.ts +1 -1
- package/dist/logger.d.ts.map +1 -1
- package/dist/repl/commands/agent.d.ts +5 -0
- package/dist/repl/commands/agent.d.ts.map +1 -0
- package/dist/repl/commands/agent.js +48 -0
- package/dist/repl/commands/agent.js.map +1 -0
- package/dist/repl/commands/find-similar.d.ts +5 -0
- package/dist/repl/commands/find-similar.d.ts.map +1 -0
- package/dist/repl/commands/find-similar.js +61 -0
- package/dist/repl/commands/find-similar.js.map +1 -0
- package/dist/repl/commands/research.d.ts +5 -0
- package/dist/repl/commands/research.d.ts.map +1 -0
- package/dist/repl/commands/research.js +50 -0
- package/dist/repl/commands/research.js.map +1 -0
- package/dist/repl/formatters.d.ts +4 -1
- package/dist/repl/formatters.d.ts.map +1 -1
- package/dist/repl/formatters.js +73 -0
- package/dist/repl/formatters.js.map +1 -1
- package/dist/repl/shell.d.ts.map +1 -1
- package/dist/repl/shell.js +22 -1
- package/dist/repl/shell.js.map +1 -1
- package/dist/research/decompose.d.ts +7 -0
- package/dist/research/decompose.d.ts.map +1 -0
- package/dist/research/decompose.js +195 -0
- package/dist/research/decompose.js.map +1 -0
- package/dist/research/pipeline.d.ts +5 -0
- package/dist/research/pipeline.d.ts.map +1 -0
- package/dist/research/pipeline.js +135 -0
- package/dist/research/pipeline.js.map +1 -0
- package/dist/research/synthesize.d.ts +10 -0
- package/dist/research/synthesize.d.ts.map +1 -0
- package/dist/research/synthesize.js +119 -0
- package/dist/research/synthesize.js.map +1 -0
- package/dist/search/answer-synthesis.d.ts +13 -0
- package/dist/search/answer-synthesis.d.ts.map +1 -0
- package/dist/search/answer-synthesis.js +120 -0
- package/dist/search/answer-synthesis.js.map +1 -0
- package/dist/search/find-similar.d.ts +5 -0
- package/dist/search/find-similar.d.ts.map +1 -0
- package/dist/search/find-similar.js +329 -0
- package/dist/search/find-similar.js.map +1 -0
- package/dist/search/multi-query.d.ts +22 -0
- package/dist/search/multi-query.d.ts.map +1 -0
- package/dist/search/multi-query.js +157 -0
- package/dist/search/multi-query.js.map +1 -0
- package/dist/search/rrf.d.ts +17 -0
- package/dist/search/rrf.d.ts.map +1 -0
- package/dist/search/rrf.js +48 -0
- package/dist/search/rrf.js.map +1 -0
- package/dist/search/sampling.d.ts +25 -0
- package/dist/search/sampling.d.ts.map +1 -0
- package/dist/search/sampling.js +52 -0
- package/dist/search/sampling.js.map +1 -0
- package/dist/server.d.ts.map +1 -1
- package/dist/server.js +165 -4
- package/dist/server.js.map +1 -1
- package/dist/tools/agent.d.ts +5 -0
- package/dist/tools/agent.d.ts.map +1 -0
- package/dist/tools/agent.js +67 -0
- package/dist/tools/agent.js.map +1 -0
- package/dist/tools/fetch.d.ts.map +1 -1
- package/dist/tools/fetch.js +10 -0
- package/dist/tools/fetch.js.map +1 -1
- package/dist/tools/find-similar.d.ts +5 -0
- package/dist/tools/find-similar.d.ts.map +1 -0
- package/dist/tools/find-similar.js +48 -0
- package/dist/tools/find-similar.js.map +1 -0
- package/dist/tools/research.d.ts +5 -0
- package/dist/tools/research.d.ts.map +1 -0
- package/dist/tools/research.js +50 -0
- package/dist/tools/research.js.map +1 -0
- package/dist/tools/search.d.ts +2 -1
- package/dist/tools/search.d.ts.map +1 -1
- package/dist/tools/search.js +169 -13
- package/dist/tools/search.js.map +1 -1
- package/dist/types.d.ts +100 -3
- package/dist/types.d.ts.map +1 -1
- package/package.json +9 -3
|
@@ -0,0 +1,19 @@
|
|
|
1
|
+
import { VectorIndex, type SimilarResult } from './vector-index.js';
|
|
2
|
+
export declare class EmbeddingService {
|
|
3
|
+
private subprocess;
|
|
4
|
+
private index;
|
|
5
|
+
private available;
|
|
6
|
+
private initialized;
|
|
7
|
+
constructor();
|
|
8
|
+
init(): Promise<void>;
|
|
9
|
+
isAvailable(): boolean;
|
|
10
|
+
setAvailable(value: boolean): void;
|
|
11
|
+
getIndex(): VectorIndex;
|
|
12
|
+
embedAndStore(url: string, markdown: string): Promise<void>;
|
|
13
|
+
embedAsync(url: string, markdown: string): void;
|
|
14
|
+
findSimilar(queryText: string, topK: number, excludeUrls?: Set<string>): Promise<SimilarResult[]>;
|
|
15
|
+
shutdown(): void;
|
|
16
|
+
}
|
|
17
|
+
export declare function getEmbeddingService(): EmbeddingService;
|
|
18
|
+
export declare function resetEmbeddingService(): void;
|
|
19
|
+
//# sourceMappingURL=embed.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"embed.d.ts","sourceRoot":"","sources":["../../src/embedding/embed.ts"],"names":[],"mappings":"AAEA,OAAO,EAAE,WAAW,EAAE,KAAK,aAAa,EAAE,MAAM,mBAAmB,CAAC;AAOpE,qBAAa,gBAAgB;IAC3B,OAAO,CAAC,UAAU,CAAsB;IACxC,OAAO,CAAC,KAAK,CAAc;IAC3B,OAAO,CAAC,SAAS,CAAS;IAC1B,OAAO,CAAC,WAAW,CAAS;;IAOtB,IAAI,IAAI,OAAO,CAAC,IAAI,CAAC;IAuB3B,WAAW,IAAI,OAAO;IAItB,YAAY,CAAC,KAAK,EAAE,OAAO,GAAG,IAAI;IAIlC,QAAQ,IAAI,WAAW;IAIjB,aAAa,CAAC,GAAG,EAAE,MAAM,EAAE,QAAQ,EAAE,MAAM,GAAG,OAAO,CAAC,IAAI,CAAC;IAoCjE,UAAU,CAAC,GAAG,EAAE,MAAM,EAAE,QAAQ,EAAE,MAAM,GAAG,IAAI;IAQzC,WAAW,CACf,SAAS,EAAE,MAAM,EACjB,IAAI,EAAE,MAAM,EACZ,WAAW,CAAC,EAAE,GAAG,CAAC,MAAM,CAAC,GACxB,OAAO,CAAC,aAAa,EAAE,CAAC;IAsB3B,QAAQ,IAAI,IAAI;CAWjB;AAID,wBAAgB,mBAAmB,IAAI,gBAAgB,CAKtD;AAED,wBAAgB,qBAAqB,IAAI,IAAI,CAK5C"}
|
|
@@ -0,0 +1,131 @@
|
|
|
1
|
+
import { randomUUID } from 'node:crypto';
|
|
2
|
+
import { EmbeddingSubprocess } from './subprocess.js';
|
|
3
|
+
import { VectorIndex } from './vector-index.js';
|
|
4
|
+
import { updateCacheEmbedding, getAllEmbeddings, normalizeUrl } from '../cache/store.js';
|
|
5
|
+
import { getConfig } from '../config.js';
|
|
6
|
+
import { createLogger } from '../logger.js';
|
|
7
|
+
const log = createLogger('embedding');
|
|
8
|
+
export class EmbeddingService {
|
|
9
|
+
subprocess;
|
|
10
|
+
index;
|
|
11
|
+
available = false;
|
|
12
|
+
initialized = false;
|
|
13
|
+
constructor() {
|
|
14
|
+
this.subprocess = new EmbeddingSubprocess();
|
|
15
|
+
this.index = new VectorIndex();
|
|
16
|
+
}
|
|
17
|
+
async init() {
|
|
18
|
+
try {
|
|
19
|
+
const stored = getAllEmbeddings();
|
|
20
|
+
if (stored.length > 0) {
|
|
21
|
+
const entries = stored
|
|
22
|
+
.filter(e => e.embedding && e.dims > 0)
|
|
23
|
+
.map(e => ({
|
|
24
|
+
url: e.normalizedUrl,
|
|
25
|
+
embedding: e.embedding,
|
|
26
|
+
dims: e.dims,
|
|
27
|
+
}));
|
|
28
|
+
const loaded = this.index.loadFromBuffers(entries);
|
|
29
|
+
log.info('loaded embeddings into index', { count: loaded });
|
|
30
|
+
}
|
|
31
|
+
this.available = true;
|
|
32
|
+
this.initialized = true;
|
|
33
|
+
}
|
|
34
|
+
catch (err) {
|
|
35
|
+
log.error('EmbeddingService init failed', { error: String(err) });
|
|
36
|
+
this.available = false;
|
|
37
|
+
}
|
|
38
|
+
}
|
|
39
|
+
isAvailable() {
|
|
40
|
+
return this.available;
|
|
41
|
+
}
|
|
42
|
+
setAvailable(value) {
|
|
43
|
+
this.available = value;
|
|
44
|
+
}
|
|
45
|
+
getIndex() {
|
|
46
|
+
return this.index;
|
|
47
|
+
}
|
|
48
|
+
async embedAndStore(url, markdown) {
|
|
49
|
+
if (!this.available) {
|
|
50
|
+
log.debug('embedding skipped: service not available', { url });
|
|
51
|
+
return;
|
|
52
|
+
}
|
|
53
|
+
try {
|
|
54
|
+
const requestId = randomUUID();
|
|
55
|
+
const response = await this.subprocess.embed(requestId, markdown);
|
|
56
|
+
if (!response.vector || response.error) {
|
|
57
|
+
log.warn('embedding failed for URL', { url, error: response.error });
|
|
58
|
+
return;
|
|
59
|
+
}
|
|
60
|
+
const vector = new Float32Array(response.vector);
|
|
61
|
+
const buffer = Buffer.from(vector.buffer);
|
|
62
|
+
const model = this.subprocess.getModel() ?? getConfig().embeddingModel;
|
|
63
|
+
const dims = this.subprocess.getDims() ?? response.vector.length;
|
|
64
|
+
let normalizedUrl;
|
|
65
|
+
try {
|
|
66
|
+
normalizedUrl = normalizeUrl(url);
|
|
67
|
+
}
|
|
68
|
+
catch {
|
|
69
|
+
normalizedUrl = url;
|
|
70
|
+
}
|
|
71
|
+
updateCacheEmbedding(normalizedUrl, buffer, model, dims);
|
|
72
|
+
this.index.add(normalizedUrl, vector);
|
|
73
|
+
log.debug('embedded and stored', { url: normalizedUrl, dims });
|
|
74
|
+
}
|
|
75
|
+
catch (err) {
|
|
76
|
+
log.warn('embedAndStore failed', { url, error: String(err) });
|
|
77
|
+
}
|
|
78
|
+
}
|
|
79
|
+
embedAsync(url, markdown) {
|
|
80
|
+
if (!this.available)
|
|
81
|
+
return;
|
|
82
|
+
this.embedAndStore(url, markdown).catch(err => {
|
|
83
|
+
log.warn('async embedding failed', { url, error: String(err) });
|
|
84
|
+
});
|
|
85
|
+
}
|
|
86
|
+
async findSimilar(queryText, topK, excludeUrls) {
|
|
87
|
+
if (!this.available || this.index.size() === 0) {
|
|
88
|
+
return [];
|
|
89
|
+
}
|
|
90
|
+
try {
|
|
91
|
+
const requestId = randomUUID();
|
|
92
|
+
const response = await this.subprocess.embed(requestId, queryText);
|
|
93
|
+
if (!response.vector || response.error) {
|
|
94
|
+
log.warn('query embedding failed', { error: response.error });
|
|
95
|
+
return [];
|
|
96
|
+
}
|
|
97
|
+
const queryVector = new Float32Array(response.vector);
|
|
98
|
+
return this.index.findSimilar(queryVector, topK, excludeUrls);
|
|
99
|
+
}
|
|
100
|
+
catch (err) {
|
|
101
|
+
log.warn('findSimilar failed', { error: String(err) });
|
|
102
|
+
return [];
|
|
103
|
+
}
|
|
104
|
+
}
|
|
105
|
+
shutdown() {
|
|
106
|
+
try {
|
|
107
|
+
this.subprocess.shutdown();
|
|
108
|
+
this.index.clear();
|
|
109
|
+
this.available = false;
|
|
110
|
+
this.initialized = false;
|
|
111
|
+
log.info('EmbeddingService shut down');
|
|
112
|
+
}
|
|
113
|
+
catch (err) {
|
|
114
|
+
log.error('EmbeddingService shutdown error', { error: String(err) });
|
|
115
|
+
}
|
|
116
|
+
}
|
|
117
|
+
}
|
|
118
|
+
let globalInstance = null;
|
|
119
|
+
export function getEmbeddingService() {
|
|
120
|
+
if (!globalInstance) {
|
|
121
|
+
globalInstance = new EmbeddingService();
|
|
122
|
+
}
|
|
123
|
+
return globalInstance;
|
|
124
|
+
}
|
|
125
|
+
export function resetEmbeddingService() {
|
|
126
|
+
if (globalInstance) {
|
|
127
|
+
globalInstance.shutdown();
|
|
128
|
+
globalInstance = null;
|
|
129
|
+
}
|
|
130
|
+
}
|
|
131
|
+
//# sourceMappingURL=embed.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"embed.js","sourceRoot":"","sources":["../../src/embedding/embed.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,UAAU,EAAE,MAAM,aAAa,CAAC;AACzC,OAAO,EAAE,mBAAmB,EAAE,MAAM,iBAAiB,CAAC;AACtD,OAAO,EAAE,WAAW,EAAsB,MAAM,mBAAmB,CAAC;AACpE,OAAO,EAAE,oBAAoB,EAAE,gBAAgB,EAAE,YAAY,EAAE,MAAM,mBAAmB,CAAC;AACzF,OAAO,EAAE,SAAS,EAAE,MAAM,cAAc,CAAC;AACzC,OAAO,EAAE,YAAY,EAAE,MAAM,cAAc,CAAC;AAE5C,MAAM,GAAG,GAAG,YAAY,CAAC,WAAW,CAAC,CAAC;AAEtC,MAAM,OAAO,gBAAgB;IACnB,UAAU,CAAsB;IAChC,KAAK,CAAc;IACnB,SAAS,GAAG,KAAK,CAAC;IAClB,WAAW,GAAG,KAAK,CAAC;IAE5B;QACE,IAAI,CAAC,UAAU,GAAG,IAAI,mBAAmB,EAAE,CAAC;QAC5C,IAAI,CAAC,KAAK,GAAG,IAAI,WAAW,EAAE,CAAC;IACjC,CAAC;IAED,KAAK,CAAC,IAAI;QACR,IAAI,CAAC;YACH,MAAM,MAAM,GAAG,gBAAgB,EAAE,CAAC;YAClC,IAAI,MAAM,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;gBACtB,MAAM,OAAO,GAAG,MAAM;qBACnB,MAAM,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,SAAS,IAAI,CAAC,CAAC,IAAI,GAAG,CAAC,CAAC;qBACtC,GAAG,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC;oBACT,GAAG,EAAE,CAAC,CAAC,aAAa;oBACpB,SAAS,EAAE,CAAC,CAAC,SAAS;oBACtB,IAAI,EAAE,CAAC,CAAC,IAAI;iBACb,CAAC,CAAC,CAAC;gBACN,MAAM,MAAM,GAAG,IAAI,CAAC,KAAK,CAAC,eAAe,CAAC,OAAO,CAAC,CAAC;gBACnD,GAAG,CAAC,IAAI,CAAC,8BAA8B,EAAE,EAAE,KAAK,EAAE,MAAM,EAAE,CAAC,CAAC;YAC9D,CAAC;YAED,IAAI,CAAC,SAAS,GAAG,IAAI,CAAC;YACtB,IAAI,CAAC,WAAW,GAAG,IAAI,CAAC;QAC1B,CAAC;QAAC,OAAO,GAAG,EAAE,CAAC;YACb,GAAG,CAAC,KAAK,CAAC,8BAA8B,EAAE,EAAE,KAAK,EAAE,MAAM,CAAC,GAAG,CAAC,EAAE,CAAC,CAAC;YAClE,IAAI,CAAC,SAAS,GAAG,KAAK,CAAC;QACzB,CAAC;IACH,CAAC;IAED,WAAW;QACT,OAAO,IAAI,CAAC,SAAS,CAAC;IACxB,CAAC;IAED,YAAY,CAAC,KAAc;QACzB,IAAI,CAAC,SAAS,GAAG,KAAK,CAAC;IACzB,CAAC;IAED,QAAQ;QACN,OAAO,IAAI,CAAC,KAAK,CAAC;IACpB,CAAC;IAED,KAAK,CAAC,aAAa,CAAC,GAAW,EAAE,QAAgB;QAC/C,IAAI,CAAC,IAAI,CAAC,SAAS,EAAE,CAAC;YACpB,GAAG,CAAC,KAAK,CAAC,0CAA0C,EAAE,EAAE,GAAG,EAAE,CAAC,CAAC;YAC/D,OAAO;QACT,CAAC;QAED,IAAI,CAAC;YACH,MAAM,SAAS,GAAG,UAAU,EAAE,CAAC;YAC/B,MAAM,QAAQ,GAAG,MAAM,IAAI,CAAC,UAAU,CAAC,KAAK,CAAC,SAAS,EAAE,QAAQ,CAAC,CAAC;YAElE,IAAI,CAAC,QAAQ,CAAC,MAAM,IAAI,QAAQ,CAAC,KAAK,EAAE,CAAC;gBACvC,GAAG,CAAC,IAAI,CAAC,0BAA0B,EAAE,EAAE,GAAG,EAAE,KAAK,EAAE,QAAQ,CAAC,KAAK,EAAE,CAAC,CAAC;gBACrE,OAAO;YACT,CAAC;YAED,MAAM,MAAM,GAAG,IAAI,YAAY,CAAC,QAAQ,CAAC,MAAM,CAAC,CAAC;YACjD,MAAM,MAAM,GAAG,MAAM,CAAC,IAAI,CAAC,MAAM,CAAC,MAAM,CAAC,CAAC;YAC1C,MAAM,KAAK,GAAG,IAAI,CAAC,UAAU,CAAC,QAAQ,EAAE,IAAI,SAAS,EAAE,CAAC,cAAc,CAAC;YACvE,MAAM,IAAI,GAAG,IAAI,CAAC,UAAU,CAAC,OAAO,EAAE,IAAI,QAAQ,CAAC,MAAM,CAAC,MAAM,CAAC;YAEjE,IAAI,aAAqB,CAAC;YAC1B,IAAI,CAAC;gBACH,aAAa,GAAG,YAAY,CAAC,GAAG,CAAC,CAAC;YACpC,CAAC;YAAC,MAAM,CAAC;gBACP,aAAa,GAAG,GAAG,CAAC;YACtB,CAAC;YAED,oBAAoB,CAAC,aAAa,EAAE,MAAM,EAAE,KAAK,EAAE,IAAI,CAAC,CAAC;YACzD,IAAI,CAAC,KAAK,CAAC,GAAG,CAAC,aAAa,EAAE,MAAM,CAAC,CAAC;YAEtC,GAAG,CAAC,KAAK,CAAC,qBAAqB,EAAE,EAAE,GAAG,EAAE,aAAa,EAAE,IAAI,EAAE,CAAC,CAAC;QACjE,CAAC;QAAC,OAAO,GAAG,EAAE,CAAC;YACb,GAAG,CAAC,IAAI,CAAC,sBAAsB,EAAE,EAAE,GAAG,EAAE,KAAK,EAAE,MAAM,CAAC,GAAG,CAAC,EAAE,CAAC,CAAC;QAChE,CAAC;IACH,CAAC;IAED,UAAU,CAAC,GAAW,EAAE,QAAgB;QACtC,IAAI,CAAC,IAAI,CAAC,SAAS;YAAE,OAAO;QAE5B,IAAI,CAAC,aAAa,CAAC,GAAG,EAAE,QAAQ,CAAC,CAAC,KAAK,CAAC,GAAG,CAAC,EAAE;YAC5C,GAAG,CAAC,IAAI,CAAC,wBAAwB,EAAE,EAAE,GAAG,EAAE,KAAK,EAAE,MAAM,CAAC,GAAG,CAAC,EAAE,CAAC,CAAC;QAClE,CAAC,CAAC,CAAC;IACL,CAAC;IAED,KAAK,CAAC,WAAW,CACf,SAAiB,EACjB,IAAY,EACZ,WAAyB;QAEzB,IAAI,CAAC,IAAI,CAAC,SAAS,IAAI,IAAI,CAAC,KAAK,CAAC,IAAI,EAAE,KAAK,CAAC,EAAE,CAAC;YAC/C,OAAO,EAAE,CAAC;QACZ,CAAC;QAED,IAAI,CAAC;YACH,MAAM,SAAS,GAAG,UAAU,EAAE,CAAC;YAC/B,MAAM,QAAQ,GAAG,MAAM,IAAI,CAAC,UAAU,CAAC,KAAK,CAAC,SAAS,EAAE,SAAS,CAAC,CAAC;YAEnE,IAAI,CAAC,QAAQ,CAAC,MAAM,IAAI,QAAQ,CAAC,KAAK,EAAE,CAAC;gBACvC,GAAG,CAAC,IAAI,CAAC,wBAAwB,EAAE,EAAE,KAAK,EAAE,QAAQ,CAAC,KAAK,EAAE,CAAC,CAAC;gBAC9D,OAAO,EAAE,CAAC;YACZ,CAAC;YAED,MAAM,WAAW,GAAG,IAAI,YAAY,CAAC,QAAQ,CAAC,MAAM,CAAC,CAAC;YACtD,OAAO,IAAI,CAAC,KAAK,CAAC,WAAW,CAAC,WAAW,EAAE,IAAI,EAAE,WAAW,CAAC,CAAC;QAChE,CAAC;QAAC,OAAO,GAAG,EAAE,CAAC;YACb,GAAG,CAAC,IAAI,CAAC,oBAAoB,EAAE,EAAE,KAAK,EAAE,MAAM,CAAC,GAAG,CAAC,EAAE,CAAC,CAAC;YACvD,OAAO,EAAE,CAAC;QACZ,CAAC;IACH,CAAC;IAED,QAAQ;QACN,IAAI,CAAC;YACH,IAAI,CAAC,UAAU,CAAC,QAAQ,EAAE,CAAC;YAC3B,IAAI,CAAC,KAAK,CAAC,KAAK,EAAE,CAAC;YACnB,IAAI,CAAC,SAAS,GAAG,KAAK,CAAC;YACvB,IAAI,CAAC,WAAW,GAAG,KAAK,CAAC;YACzB,GAAG,CAAC,IAAI,CAAC,4BAA4B,CAAC,CAAC;QACzC,CAAC;QAAC,OAAO,GAAG,EAAE,CAAC;YACb,GAAG,CAAC,KAAK,CAAC,iCAAiC,EAAE,EAAE,KAAK,EAAE,MAAM,CAAC,GAAG,CAAC,EAAE,CAAC,CAAC;QACvE,CAAC;IACH,CAAC;CACF;AAED,IAAI,cAAc,GAA4B,IAAI,CAAC;AAEnD,MAAM,UAAU,mBAAmB;IACjC,IAAI,CAAC,cAAc,EAAE,CAAC;QACpB,cAAc,GAAG,IAAI,gBAAgB,EAAE,CAAC;IAC1C,CAAC;IACD,OAAO,cAAc,CAAC;AACxB,CAAC;AAED,MAAM,UAAU,qBAAqB;IACnC,IAAI,cAAc,EAAE,CAAC;QACnB,cAAc,CAAC,QAAQ,EAAE,CAAC;QAC1B,cAAc,GAAG,IAAI,CAAC;IACxB,CAAC;AACH,CAAC"}
|
|
@@ -0,0 +1,12 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Extract key terms from page content and title for FTS5 query building.
|
|
3
|
+
* Prioritizes: title words > headings > bold text > first paragraph.
|
|
4
|
+
* Returns up to 20 deduplicated, lowercased, stopword-free terms.
|
|
5
|
+
*/
|
|
6
|
+
export declare function extractKeyTerms(content: string, title: string): string[];
|
|
7
|
+
export declare function removeStopwords(words: string[]): string[];
|
|
8
|
+
export declare function extractHeadings(content: string): string[];
|
|
9
|
+
export declare function extractBoldText(content: string): string[];
|
|
10
|
+
export declare function extractFirstParagraph(content: string): string;
|
|
11
|
+
export declare function buildFTS5Query(terms: string[]): string;
|
|
12
|
+
//# sourceMappingURL=key-terms.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"key-terms.d.ts","sourceRoot":"","sources":["../../src/embedding/key-terms.ts"],"names":[],"mappings":"AAwBA;;;;GAIG;AACH,wBAAgB,eAAe,CAAC,OAAO,EAAE,MAAM,EAAE,KAAK,EAAE,MAAM,GAAG,MAAM,EAAE,CAkDxE;AAiBD,wBAAgB,eAAe,CAAC,KAAK,EAAE,MAAM,EAAE,GAAG,MAAM,EAAE,CAEzD;AAED,wBAAgB,eAAe,CAAC,OAAO,EAAE,MAAM,GAAG,MAAM,EAAE,CAazD;AAED,wBAAgB,eAAe,CAAC,OAAO,EAAE,MAAM,GAAG,MAAM,EAAE,CAazD;AAED,wBAAgB,qBAAqB,CAAC,OAAO,EAAE,MAAM,GAAG,MAAM,CAqB7D;AAED,wBAAgB,cAAc,CAAC,KAAK,EAAE,MAAM,EAAE,GAAG,MAAM,CAKtD"}
|
|
@@ -0,0 +1,138 @@
|
|
|
1
|
+
import { createLogger } from '../logger.js';
|
|
2
|
+
const log = createLogger('embedding');
|
|
3
|
+
const STOPWORDS = new Set([
|
|
4
|
+
'a', 'an', 'the', 'and', 'or', 'but', 'in', 'on', 'at', 'to', 'for',
|
|
5
|
+
'of', 'with', 'by', 'from', 'is', 'it', 'as', 'be', 'was', 'were',
|
|
6
|
+
'been', 'are', 'am', 'has', 'have', 'had', 'do', 'does', 'did', 'will',
|
|
7
|
+
'would', 'could', 'should', 'may', 'might', 'can', 'shall', 'not', 'no',
|
|
8
|
+
'nor', 'so', 'yet', 'both', 'each', 'few', 'more', 'most', 'other',
|
|
9
|
+
'some', 'such', 'than', 'too', 'very', 'just', 'about', 'above', 'after',
|
|
10
|
+
'again', 'all', 'also', 'any', 'because', 'before', 'below', 'between',
|
|
11
|
+
'during', 'further', 'here', 'how', 'into', 'its', 'itself', 'me', 'my',
|
|
12
|
+
'myself', 'once', 'only', 'our', 'ours', 'ourselves', 'out', 'over',
|
|
13
|
+
'own', 'same', 'she', 'he', 'her', 'him', 'his', 'hers', 'that', 'their',
|
|
14
|
+
'theirs', 'them', 'themselves', 'then', 'there', 'these', 'they', 'this',
|
|
15
|
+
'those', 'through', 'under', 'until', 'up', 'we', 'what', 'when', 'where',
|
|
16
|
+
'which', 'while', 'who', 'whom', 'why', 'you', 'your', 'yours', 'yourself',
|
|
17
|
+
'i', 'if',
|
|
18
|
+
]);
|
|
19
|
+
const MAX_TERMS = 20;
|
|
20
|
+
const FIRST_PARAGRAPH_MAX_CHARS = 200;
|
|
21
|
+
/**
|
|
22
|
+
* Extract key terms from page content and title for FTS5 query building.
|
|
23
|
+
* Prioritizes: title words > headings > bold text > first paragraph.
|
|
24
|
+
* Returns up to 20 deduplicated, lowercased, stopword-free terms.
|
|
25
|
+
*/
|
|
26
|
+
export function extractKeyTerms(content, title) {
|
|
27
|
+
try {
|
|
28
|
+
if (!content.trim() && !title.trim()) {
|
|
29
|
+
return [];
|
|
30
|
+
}
|
|
31
|
+
const allTerms = [];
|
|
32
|
+
const titleWords = tokenize(title);
|
|
33
|
+
allTerms.push(...titleWords);
|
|
34
|
+
const headings = extractHeadings(content);
|
|
35
|
+
for (const heading of headings) {
|
|
36
|
+
allTerms.push(...tokenize(heading));
|
|
37
|
+
}
|
|
38
|
+
const boldPhrases = extractBoldText(content);
|
|
39
|
+
for (const phrase of boldPhrases) {
|
|
40
|
+
allTerms.push(...tokenize(phrase));
|
|
41
|
+
}
|
|
42
|
+
const firstPara = extractFirstParagraph(content);
|
|
43
|
+
if (firstPara) {
|
|
44
|
+
allTerms.push(...tokenize(firstPara));
|
|
45
|
+
}
|
|
46
|
+
const cleaned = removeStopwords(allTerms);
|
|
47
|
+
const seen = new Set();
|
|
48
|
+
const unique = [];
|
|
49
|
+
for (const term of cleaned) {
|
|
50
|
+
if (!seen.has(term)) {
|
|
51
|
+
seen.add(term);
|
|
52
|
+
unique.push(term);
|
|
53
|
+
}
|
|
54
|
+
if (unique.length >= MAX_TERMS)
|
|
55
|
+
break;
|
|
56
|
+
}
|
|
57
|
+
log.debug('extracted key terms', {
|
|
58
|
+
titleTerms: titleWords.length,
|
|
59
|
+
headingTerms: headings.length,
|
|
60
|
+
boldTerms: boldPhrases.length,
|
|
61
|
+
uniqueTerms: unique.length,
|
|
62
|
+
});
|
|
63
|
+
return unique;
|
|
64
|
+
}
|
|
65
|
+
catch (err) {
|
|
66
|
+
log.error('key term extraction failed', { error: String(err) });
|
|
67
|
+
return [];
|
|
68
|
+
}
|
|
69
|
+
}
|
|
70
|
+
function tokenize(text) {
|
|
71
|
+
if (!text)
|
|
72
|
+
return [];
|
|
73
|
+
let cleaned = text.replace(/https?:\/\/[^\s)]+/g, '');
|
|
74
|
+
cleaned = cleaned.replace(/```[\s\S]*?```/g, '');
|
|
75
|
+
cleaned = cleaned.replace(/`[^`]+`/g, '');
|
|
76
|
+
cleaned = cleaned.replace(/^#{1,6}\s+/gm, '');
|
|
77
|
+
cleaned = cleaned.replace(/\*{1,3}|_{1,3}/g, '');
|
|
78
|
+
return cleaned
|
|
79
|
+
.toLowerCase()
|
|
80
|
+
.split(/[^a-z0-9-]+/)
|
|
81
|
+
.filter(w => w.length > 1 && !/^\d+$/.test(w));
|
|
82
|
+
}
|
|
83
|
+
export function removeStopwords(words) {
|
|
84
|
+
return words.filter(w => w.length > 1 && !STOPWORDS.has(w.toLowerCase()));
|
|
85
|
+
}
|
|
86
|
+
export function extractHeadings(content) {
|
|
87
|
+
if (!content)
|
|
88
|
+
return [];
|
|
89
|
+
const headings = [];
|
|
90
|
+
const regex = /^#{1,3}\s+(.+)$/gm;
|
|
91
|
+
let match;
|
|
92
|
+
while ((match = regex.exec(content)) !== null) {
|
|
93
|
+
const text = match[1].trim();
|
|
94
|
+
if (text)
|
|
95
|
+
headings.push(text);
|
|
96
|
+
}
|
|
97
|
+
return headings;
|
|
98
|
+
}
|
|
99
|
+
export function extractBoldText(content) {
|
|
100
|
+
if (!content)
|
|
101
|
+
return [];
|
|
102
|
+
const bold = [];
|
|
103
|
+
const regex = /\*\*(.+?)\*\*|__(.+?)__/g;
|
|
104
|
+
let match;
|
|
105
|
+
while ((match = regex.exec(content)) !== null) {
|
|
106
|
+
const text = (match[1] || match[2]).trim();
|
|
107
|
+
if (text)
|
|
108
|
+
bold.push(text);
|
|
109
|
+
}
|
|
110
|
+
return bold;
|
|
111
|
+
}
|
|
112
|
+
export function extractFirstParagraph(content) {
|
|
113
|
+
if (!content)
|
|
114
|
+
return '';
|
|
115
|
+
const lines = content.split('\n');
|
|
116
|
+
const paragraphLines = [];
|
|
117
|
+
for (const line of lines) {
|
|
118
|
+
const trimmed = line.trim();
|
|
119
|
+
if (!trimmed || trimmed.startsWith('#'))
|
|
120
|
+
continue;
|
|
121
|
+
if (trimmed.startsWith('```'))
|
|
122
|
+
continue;
|
|
123
|
+
paragraphLines.push(trimmed);
|
|
124
|
+
const joined = paragraphLines.join(' ');
|
|
125
|
+
if (joined.length >= FIRST_PARAGRAPH_MAX_CHARS) {
|
|
126
|
+
return joined.slice(0, FIRST_PARAGRAPH_MAX_CHARS);
|
|
127
|
+
}
|
|
128
|
+
break;
|
|
129
|
+
}
|
|
130
|
+
return paragraphLines.join(' ').slice(0, FIRST_PARAGRAPH_MAX_CHARS);
|
|
131
|
+
}
|
|
132
|
+
export function buildFTS5Query(terms) {
|
|
133
|
+
if (terms.length === 0)
|
|
134
|
+
return '';
|
|
135
|
+
const escaped = terms.map(t => t.replace(/['"()]/g, ''));
|
|
136
|
+
return escaped.filter(Boolean).join(' OR ');
|
|
137
|
+
}
|
|
138
|
+
//# sourceMappingURL=key-terms.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"key-terms.js","sourceRoot":"","sources":["../../src/embedding/key-terms.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,YAAY,EAAE,MAAM,cAAc,CAAC;AAE5C,MAAM,GAAG,GAAG,YAAY,CAAC,WAAW,CAAC,CAAC;AAEtC,MAAM,SAAS,GAAG,IAAI,GAAG,CAAC;IACxB,GAAG,EAAE,IAAI,EAAE,KAAK,EAAE,KAAK,EAAE,IAAI,EAAE,KAAK,EAAE,IAAI,EAAE,IAAI,EAAE,IAAI,EAAE,IAAI,EAAE,KAAK;IACnE,IAAI,EAAE,MAAM,EAAE,IAAI,EAAE,MAAM,EAAE,IAAI,EAAE,IAAI,EAAE,IAAI,EAAE,IAAI,EAAE,KAAK,EAAE,MAAM;IACjE,MAAM,EAAE,KAAK,EAAE,IAAI,EAAE,KAAK,EAAE,MAAM,EAAE,KAAK,EAAE,IAAI,EAAE,MAAM,EAAE,KAAK,EAAE,MAAM;IACtE,OAAO,EAAE,OAAO,EAAE,QAAQ,EAAE,KAAK,EAAE,OAAO,EAAE,KAAK,EAAE,OAAO,EAAE,KAAK,EAAE,IAAI;IACvE,KAAK,EAAE,IAAI,EAAE,KAAK,EAAE,MAAM,EAAE,MAAM,EAAE,KAAK,EAAE,MAAM,EAAE,MAAM,EAAE,OAAO;IAClE,MAAM,EAAE,MAAM,EAAE,MAAM,EAAE,KAAK,EAAE,MAAM,EAAE,MAAM,EAAE,OAAO,EAAE,OAAO,EAAE,OAAO;IACxE,OAAO,EAAE,KAAK,EAAE,MAAM,EAAE,KAAK,EAAE,SAAS,EAAE,QAAQ,EAAE,OAAO,EAAE,SAAS;IACtE,QAAQ,EAAE,SAAS,EAAE,MAAM,EAAE,KAAK,EAAE,MAAM,EAAE,KAAK,EAAE,QAAQ,EAAE,IAAI,EAAE,IAAI;IACvE,QAAQ,EAAE,MAAM,EAAE,MAAM,EAAE,KAAK,EAAE,MAAM,EAAE,WAAW,EAAE,KAAK,EAAE,MAAM;IACnE,KAAK,EAAE,MAAM,EAAE,KAAK,EAAE,IAAI,EAAE,KAAK,EAAE,KAAK,EAAE,KAAK,EAAE,MAAM,EAAE,MAAM,EAAE,OAAO;IACxE,QAAQ,EAAE,MAAM,EAAE,YAAY,EAAE,MAAM,EAAE,OAAO,EAAE,OAAO,EAAE,MAAM,EAAE,MAAM;IACxE,OAAO,EAAE,SAAS,EAAE,OAAO,EAAE,OAAO,EAAE,IAAI,EAAE,IAAI,EAAE,MAAM,EAAE,MAAM,EAAE,OAAO;IACzE,OAAO,EAAE,OAAO,EAAE,KAAK,EAAE,MAAM,EAAE,KAAK,EAAE,KAAK,EAAE,MAAM,EAAE,OAAO,EAAE,UAAU;IAC1E,GAAG,EAAE,IAAI;CACV,CAAC,CAAC;AAEH,MAAM,SAAS,GAAG,EAAE,CAAC;AACrB,MAAM,yBAAyB,GAAG,GAAG,CAAC;AAEtC;;;;GAIG;AACH,MAAM,UAAU,eAAe,CAAC,OAAe,EAAE,KAAa;IAC5D,IAAI,CAAC;QACH,IAAI,CAAC,OAAO,CAAC,IAAI,EAAE,IAAI,CAAC,KAAK,CAAC,IAAI,EAAE,EAAE,CAAC;YACrC,OAAO,EAAE,CAAC;QACZ,CAAC;QAED,MAAM,QAAQ,GAAa,EAAE,CAAC;QAE9B,MAAM,UAAU,GAAG,QAAQ,CAAC,KAAK,CAAC,CAAC;QACnC,QAAQ,CAAC,IAAI,CAAC,GAAG,UAAU,CAAC,CAAC;QAE7B,MAAM,QAAQ,GAAG,eAAe,CAAC,OAAO,CAAC,CAAC;QAC1C,KAAK,MAAM,OAAO,IAAI,QAAQ,EAAE,CAAC;YAC/B,QAAQ,CAAC,IAAI,CAAC,GAAG,QAAQ,CAAC,OAAO,CAAC,CAAC,CAAC;QACtC,CAAC;QAED,MAAM,WAAW,GAAG,eAAe,CAAC,OAAO,CAAC,CAAC;QAC7C,KAAK,MAAM,MAAM,IAAI,WAAW,EAAE,CAAC;YACjC,QAAQ,CAAC,IAAI,CAAC,GAAG,QAAQ,CAAC,MAAM,CAAC,CAAC,CAAC;QACrC,CAAC;QAED,MAAM,SAAS,GAAG,qBAAqB,CAAC,OAAO,CAAC,CAAC;QACjD,IAAI,SAAS,EAAE,CAAC;YACd,QAAQ,CAAC,IAAI,CAAC,GAAG,QAAQ,CAAC,SAAS,CAAC,CAAC,CAAC;QACxC,CAAC;QAED,MAAM,OAAO,GAAG,eAAe,CAAC,QAAQ,CAAC,CAAC;QAC1C,MAAM,IAAI,GAAG,IAAI,GAAG,EAAU,CAAC;QAC/B,MAAM,MAAM,GAAa,EAAE,CAAC;QAE5B,KAAK,MAAM,IAAI,IAAI,OAAO,EAAE,CAAC;YAC3B,IAAI,CAAC,IAAI,CAAC,GAAG,CAAC,IAAI,CAAC,EAAE,CAAC;gBACpB,IAAI,CAAC,GAAG,CAAC,IAAI,CAAC,CAAC;gBACf,MAAM,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;YACpB,CAAC;YACD,IAAI,MAAM,CAAC,MAAM,IAAI,SAAS;gBAAE,MAAM;QACxC,CAAC;QAED,GAAG,CAAC,KAAK,CAAC,qBAAqB,EAAE;YAC/B,UAAU,EAAE,UAAU,CAAC,MAAM;YAC7B,YAAY,EAAE,QAAQ,CAAC,MAAM;YAC7B,SAAS,EAAE,WAAW,CAAC,MAAM;YAC7B,WAAW,EAAE,MAAM,CAAC,MAAM;SAC3B,CAAC,CAAC;QAEH,OAAO,MAAM,CAAC;IAChB,CAAC;IAAC,OAAO,GAAG,EAAE,CAAC;QACb,GAAG,CAAC,KAAK,CAAC,4BAA4B,EAAE,EAAE,KAAK,EAAE,MAAM,CAAC,GAAG,CAAC,EAAE,CAAC,CAAC;QAChE,OAAO,EAAE,CAAC;IACZ,CAAC;AACH,CAAC;AAED,SAAS,QAAQ,CAAC,IAAY;IAC5B,IAAI,CAAC,IAAI;QAAE,OAAO,EAAE,CAAC;IAErB,IAAI,OAAO,GAAG,IAAI,CAAC,OAAO,CAAC,qBAAqB,EAAE,EAAE,CAAC,CAAC;IACtD,OAAO,GAAG,OAAO,CAAC,OAAO,CAAC,iBAAiB,EAAE,EAAE,CAAC,CAAC;IACjD,OAAO,GAAG,OAAO,CAAC,OAAO,CAAC,UAAU,EAAE,EAAE,CAAC,CAAC;IAC1C,OAAO,GAAG,OAAO,CAAC,OAAO,CAAC,cAAc,EAAE,EAAE,CAAC,CAAC;IAC9C,OAAO,GAAG,OAAO,CAAC,OAAO,CAAC,iBAAiB,EAAE,EAAE,CAAC,CAAC;IAEjD,OAAO,OAAO;SACX,WAAW,EAAE;SACb,KAAK,CAAC,aAAa,CAAC;SACpB,MAAM,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,MAAM,GAAG,CAAC,IAAI,CAAC,OAAO,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC,CAAC;AACnD,CAAC;AAED,MAAM,UAAU,eAAe,CAAC,KAAe;IAC7C,OAAO,KAAK,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,MAAM,GAAG,CAAC,IAAI,CAAC,SAAS,CAAC,GAAG,CAAC,CAAC,CAAC,WAAW,EAAE,CAAC,CAAC,CAAC;AAC5E,CAAC;AAED,MAAM,UAAU,eAAe,CAAC,OAAe;IAC7C,IAAI,CAAC,OAAO;QAAE,OAAO,EAAE,CAAC;IAExB,MAAM,QAAQ,GAAa,EAAE,CAAC;IAC9B,MAAM,KAAK,GAAG,mBAAmB,CAAC;IAClC,IAAI,KAA6B,CAAC;IAElC,OAAO,CAAC,KAAK,GAAG,KAAK,CAAC,IAAI,CAAC,OAAO,CAAC,CAAC,KAAK,IAAI,EAAE,CAAC;QAC9C,MAAM,IAAI,GAAG,KAAK,CAAC,CAAC,CAAC,CAAC,IAAI,EAAE,CAAC;QAC7B,IAAI,IAAI;YAAE,QAAQ,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;IAChC,CAAC;IAED,OAAO,QAAQ,CAAC;AAClB,CAAC;AAED,MAAM,UAAU,eAAe,CAAC,OAAe;IAC7C,IAAI,CAAC,OAAO;QAAE,OAAO,EAAE,CAAC;IAExB,MAAM,IAAI,GAAa,EAAE,CAAC;IAC1B,MAAM,KAAK,GAAG,0BAA0B,CAAC;IACzC,IAAI,KAA6B,CAAC;IAElC,OAAO,CAAC,KAAK,GAAG,KAAK,CAAC,IAAI,CAAC,OAAO,CAAC,CAAC,KAAK,IAAI,EAAE,CAAC;QAC9C,MAAM,IAAI,GAAG,CAAC,KAAK,CAAC,CAAC,CAAC,IAAI,KAAK,CAAC,CAAC,CAAC,CAAC,CAAC,IAAI,EAAE,CAAC;QAC3C,IAAI,IAAI;YAAE,IAAI,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;IAC5B,CAAC;IAED,OAAO,IAAI,CAAC;AACd,CAAC;AAED,MAAM,UAAU,qBAAqB,CAAC,OAAe;IACnD,IAAI,CAAC,OAAO;QAAE,OAAO,EAAE,CAAC;IAExB,MAAM,KAAK,GAAG,OAAO,CAAC,KAAK,CAAC,IAAI,CAAC,CAAC;IAClC,MAAM,cAAc,GAAa,EAAE,CAAC;IAEpC,KAAK,MAAM,IAAI,IAAI,KAAK,EAAE,CAAC;QACzB,MAAM,OAAO,GAAG,IAAI,CAAC,IAAI,EAAE,CAAC;QAC5B,IAAI,CAAC,OAAO,IAAI,OAAO,CAAC,UAAU,CAAC,GAAG,CAAC;YAAE,SAAS;QAClD,IAAI,OAAO,CAAC,UAAU,CAAC,KAAK,CAAC;YAAE,SAAS;QAExC,cAAc,CAAC,IAAI,CAAC,OAAO,CAAC,CAAC;QAC7B,MAAM,MAAM,GAAG,cAAc,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC;QACxC,IAAI,MAAM,CAAC,MAAM,IAAI,yBAAyB,EAAE,CAAC;YAC/C,OAAO,MAAM,CAAC,KAAK,CAAC,CAAC,EAAE,yBAAyB,CAAC,CAAC;QACpD,CAAC;QAED,MAAM;IACR,CAAC;IAED,OAAO,cAAc,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC,KAAK,CAAC,CAAC,EAAE,yBAAyB,CAAC,CAAC;AACtE,CAAC;AAED,MAAM,UAAU,cAAc,CAAC,KAAe;IAC5C,IAAI,KAAK,CAAC,MAAM,KAAK,CAAC;QAAE,OAAO,EAAE,CAAC;IAElC,MAAM,OAAO,GAAG,KAAK,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,OAAO,CAAC,SAAS,EAAE,EAAE,CAAC,CAAC,CAAC;IACzD,OAAO,OAAO,CAAC,MAAM,CAAC,OAAO,CAAC,CAAC,IAAI,CAAC,MAAM,CAAC,CAAC;AAC9C,CAAC"}
|
|
@@ -0,0 +1,31 @@
|
|
|
1
|
+
export interface EmbeddingResponse {
|
|
2
|
+
id: string;
|
|
3
|
+
vector?: number[];
|
|
4
|
+
error?: string;
|
|
5
|
+
}
|
|
6
|
+
export interface SubprocessOptions {
|
|
7
|
+
readyTimeoutMs?: number;
|
|
8
|
+
requestTimeoutMs?: number;
|
|
9
|
+
}
|
|
10
|
+
export declare class EmbeddingSubprocess {
|
|
11
|
+
private proc;
|
|
12
|
+
private pending;
|
|
13
|
+
private available;
|
|
14
|
+
private dims;
|
|
15
|
+
private modelName;
|
|
16
|
+
private spawnPromise;
|
|
17
|
+
private stdoutBuffer;
|
|
18
|
+
private idleTimer;
|
|
19
|
+
private readyTimeoutMs;
|
|
20
|
+
private requestTimeoutMs;
|
|
21
|
+
constructor(options?: SubprocessOptions);
|
|
22
|
+
isAvailable(): boolean;
|
|
23
|
+
getDims(): number | null;
|
|
24
|
+
getModel(): string | null;
|
|
25
|
+
embed(id: string, text: string): Promise<EmbeddingResponse>;
|
|
26
|
+
shutdown(): void;
|
|
27
|
+
private spawnProcess;
|
|
28
|
+
private handleStdoutData;
|
|
29
|
+
private resetIdleTimer;
|
|
30
|
+
}
|
|
31
|
+
//# sourceMappingURL=subprocess.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"subprocess.d.ts","sourceRoot":"","sources":["../../src/embedding/subprocess.ts"],"names":[],"mappings":"AAWA,MAAM,WAAW,iBAAiB;IAChC,EAAE,EAAE,MAAM,CAAC;IACX,MAAM,CAAC,EAAE,MAAM,EAAE,CAAC;IAClB,KAAK,CAAC,EAAE,MAAM,CAAC;CAChB;AAQD,MAAM,WAAW,iBAAiB;IAChC,cAAc,CAAC,EAAE,MAAM,CAAC;IACxB,gBAAgB,CAAC,EAAE,MAAM,CAAC;CAC3B;AAED,qBAAa,mBAAmB;IAC9B,OAAO,CAAC,IAAI,CAA6B;IACzC,OAAO,CAAC,OAAO,CAAqC;IACpD,OAAO,CAAC,SAAS,CAAwB;IACzC,OAAO,CAAC,IAAI,CAAuB;IACnC,OAAO,CAAC,SAAS,CAAuB;IACxC,OAAO,CAAC,YAAY,CAA8B;IAClD,OAAO,CAAC,YAAY,CAAM;IAC1B,OAAO,CAAC,SAAS,CAA8C;IAC/D,OAAO,CAAC,cAAc,CAAS;IAC/B,OAAO,CAAC,gBAAgB,CAAS;gBAErB,OAAO,CAAC,EAAE,iBAAiB;IAKvC,WAAW,IAAI,OAAO;IAItB,OAAO,IAAI,MAAM,GAAG,IAAI;IAIxB,QAAQ,IAAI,MAAM,GAAG,IAAI;IAInB,KAAK,CAAC,EAAE,EAAE,MAAM,EAAE,IAAI,EAAE,MAAM,GAAG,OAAO,CAAC,iBAAiB,CAAC;IAmCjE,QAAQ,IAAI,IAAI;YA0BF,YAAY;IAgG1B,OAAO,CAAC,gBAAgB;IAoCxB,OAAO,CAAC,cAAc;CAWvB"}
|
|
@@ -0,0 +1,213 @@
|
|
|
1
|
+
import { spawn } from 'node:child_process';
|
|
2
|
+
import { dirname, join } from 'node:path';
|
|
3
|
+
import { fileURLToPath } from 'node:url';
|
|
4
|
+
import { getConfig } from '../config.js';
|
|
5
|
+
import { createLogger } from '../logger.js';
|
|
6
|
+
const log = createLogger('embedding');
|
|
7
|
+
const __dirname = dirname(fileURLToPath(import.meta.url));
|
|
8
|
+
const SCRIPT_PATH = join(__dirname, '..', 'scripts', 'embedding_server.py');
|
|
9
|
+
export class EmbeddingSubprocess {
|
|
10
|
+
proc = null;
|
|
11
|
+
pending = new Map();
|
|
12
|
+
available = null;
|
|
13
|
+
dims = null;
|
|
14
|
+
modelName = null;
|
|
15
|
+
spawnPromise = null;
|
|
16
|
+
stdoutBuffer = '';
|
|
17
|
+
idleTimer = null;
|
|
18
|
+
readyTimeoutMs;
|
|
19
|
+
requestTimeoutMs;
|
|
20
|
+
constructor(options) {
|
|
21
|
+
this.readyTimeoutMs = options?.readyTimeoutMs ?? 60000;
|
|
22
|
+
this.requestTimeoutMs = options?.requestTimeoutMs ?? 30000;
|
|
23
|
+
}
|
|
24
|
+
isAvailable() {
|
|
25
|
+
return this.available === true;
|
|
26
|
+
}
|
|
27
|
+
getDims() {
|
|
28
|
+
return this.dims;
|
|
29
|
+
}
|
|
30
|
+
getModel() {
|
|
31
|
+
return this.modelName;
|
|
32
|
+
}
|
|
33
|
+
async embed(id, text) {
|
|
34
|
+
try {
|
|
35
|
+
if (!this.proc && !this.spawnPromise) {
|
|
36
|
+
this.spawnPromise = this.spawnProcess();
|
|
37
|
+
}
|
|
38
|
+
if (this.spawnPromise) {
|
|
39
|
+
await this.spawnPromise;
|
|
40
|
+
}
|
|
41
|
+
if (!this.proc || this.available === false) {
|
|
42
|
+
throw new Error('Embedding subprocess not available');
|
|
43
|
+
}
|
|
44
|
+
this.resetIdleTimer();
|
|
45
|
+
const config = getConfig();
|
|
46
|
+
const truncatedText = text.slice(0, config.embeddingMaxTextLength);
|
|
47
|
+
return await new Promise((resolve, reject) => {
|
|
48
|
+
const timeoutHandle = setTimeout(() => {
|
|
49
|
+
this.pending.delete(id);
|
|
50
|
+
reject(new Error(`Embedding request ${id} timed out after ${this.requestTimeoutMs}ms`));
|
|
51
|
+
}, this.requestTimeoutMs);
|
|
52
|
+
this.pending.set(id, { resolve, reject, timeoutHandle });
|
|
53
|
+
const request = JSON.stringify({ id, text: truncatedText }) + '\n';
|
|
54
|
+
this.proc.stdin.write(request);
|
|
55
|
+
});
|
|
56
|
+
}
|
|
57
|
+
catch (err) {
|
|
58
|
+
log.error('embed failed', { id, error: String(err) });
|
|
59
|
+
throw err;
|
|
60
|
+
}
|
|
61
|
+
}
|
|
62
|
+
shutdown() {
|
|
63
|
+
try {
|
|
64
|
+
if (this.idleTimer) {
|
|
65
|
+
clearTimeout(this.idleTimer);
|
|
66
|
+
this.idleTimer = null;
|
|
67
|
+
}
|
|
68
|
+
for (const [id, pending] of this.pending.entries()) {
|
|
69
|
+
clearTimeout(pending.timeoutHandle);
|
|
70
|
+
pending.reject(new Error('Subprocess shutting down'));
|
|
71
|
+
this.pending.delete(id);
|
|
72
|
+
}
|
|
73
|
+
if (this.proc) {
|
|
74
|
+
this.proc.stdin?.end();
|
|
75
|
+
this.proc.kill();
|
|
76
|
+
this.proc = null;
|
|
77
|
+
}
|
|
78
|
+
this.spawnPromise = null;
|
|
79
|
+
log.info('embedding subprocess shut down');
|
|
80
|
+
}
|
|
81
|
+
catch (err) {
|
|
82
|
+
log.error('shutdown error', { error: String(err) });
|
|
83
|
+
}
|
|
84
|
+
}
|
|
85
|
+
async spawnProcess() {
|
|
86
|
+
const config = getConfig();
|
|
87
|
+
try {
|
|
88
|
+
log.info('spawning embedding subprocess', { model: config.embeddingModel });
|
|
89
|
+
const proc = spawn('python3', [
|
|
90
|
+
SCRIPT_PATH,
|
|
91
|
+
config.embeddingModel,
|
|
92
|
+
String(config.embeddingMaxTextLength),
|
|
93
|
+
], {
|
|
94
|
+
stdio: ['pipe', 'pipe', 'pipe'],
|
|
95
|
+
});
|
|
96
|
+
this.proc = proc;
|
|
97
|
+
await new Promise((resolve, reject) => {
|
|
98
|
+
const readyTimeout = setTimeout(() => {
|
|
99
|
+
reject(new Error(`Embedding subprocess READY timeout after ${this.readyTimeoutMs}ms`));
|
|
100
|
+
proc.kill();
|
|
101
|
+
}, this.readyTimeoutMs);
|
|
102
|
+
let stderrBuf = '';
|
|
103
|
+
proc.stderr.on('data', (data) => {
|
|
104
|
+
stderrBuf += data.toString();
|
|
105
|
+
const lines = stderrBuf.split('\n');
|
|
106
|
+
for (const line of lines) {
|
|
107
|
+
if (line.startsWith('READY')) {
|
|
108
|
+
clearTimeout(readyTimeout);
|
|
109
|
+
const modelMatch = line.match(/model=(\S+)/);
|
|
110
|
+
const dimsMatch = line.match(/dims=(\d+)/);
|
|
111
|
+
if (modelMatch)
|
|
112
|
+
this.modelName = modelMatch[1];
|
|
113
|
+
if (dimsMatch)
|
|
114
|
+
this.dims = parseInt(dimsMatch[1], 10);
|
|
115
|
+
this.available = true;
|
|
116
|
+
log.info('embedding subprocess ready', {
|
|
117
|
+
model: this.modelName,
|
|
118
|
+
dims: this.dims,
|
|
119
|
+
});
|
|
120
|
+
resolve();
|
|
121
|
+
return;
|
|
122
|
+
}
|
|
123
|
+
if (line.startsWith('ERROR')) {
|
|
124
|
+
clearTimeout(readyTimeout);
|
|
125
|
+
this.available = false;
|
|
126
|
+
reject(new Error(`Embedding subprocess: ${line}`));
|
|
127
|
+
return;
|
|
128
|
+
}
|
|
129
|
+
}
|
|
130
|
+
});
|
|
131
|
+
proc.on('error', (err) => {
|
|
132
|
+
clearTimeout(readyTimeout);
|
|
133
|
+
this.available = false;
|
|
134
|
+
this.proc = null;
|
|
135
|
+
this.spawnPromise = null;
|
|
136
|
+
log.error('embedding subprocess error', { error: String(err) });
|
|
137
|
+
reject(err);
|
|
138
|
+
});
|
|
139
|
+
proc.on('close', (code) => {
|
|
140
|
+
clearTimeout(readyTimeout);
|
|
141
|
+
this.available = false;
|
|
142
|
+
this.proc = null;
|
|
143
|
+
this.spawnPromise = null;
|
|
144
|
+
for (const [id, pending] of this.pending.entries()) {
|
|
145
|
+
clearTimeout(pending.timeoutHandle);
|
|
146
|
+
pending.reject(new Error(`Embedding subprocess exited with code ${code}`));
|
|
147
|
+
this.pending.delete(id);
|
|
148
|
+
}
|
|
149
|
+
if (code !== 0 && code !== null) {
|
|
150
|
+
log.warn('embedding subprocess exited', { code });
|
|
151
|
+
reject(new Error(`Embedding subprocess exited with code ${code}`));
|
|
152
|
+
}
|
|
153
|
+
});
|
|
154
|
+
});
|
|
155
|
+
proc.stdout.on('data', (data) => {
|
|
156
|
+
this.handleStdoutData(data.toString());
|
|
157
|
+
});
|
|
158
|
+
this.resetIdleTimer();
|
|
159
|
+
}
|
|
160
|
+
catch (err) {
|
|
161
|
+
this.available = false;
|
|
162
|
+
this.proc = null;
|
|
163
|
+
this.spawnPromise = null;
|
|
164
|
+
log.error('failed to spawn embedding subprocess', { error: String(err) });
|
|
165
|
+
throw err;
|
|
166
|
+
}
|
|
167
|
+
}
|
|
168
|
+
handleStdoutData(data) {
|
|
169
|
+
this.stdoutBuffer += data;
|
|
170
|
+
const lines = this.stdoutBuffer.split('\n');
|
|
171
|
+
this.stdoutBuffer = lines.pop() ?? '';
|
|
172
|
+
for (const line of lines) {
|
|
173
|
+
const trimmed = line.trim();
|
|
174
|
+
if (!trimmed)
|
|
175
|
+
continue;
|
|
176
|
+
try {
|
|
177
|
+
const response = JSON.parse(trimmed);
|
|
178
|
+
const id = response.id;
|
|
179
|
+
if (id && this.pending.has(id)) {
|
|
180
|
+
const pending = this.pending.get(id);
|
|
181
|
+
this.pending.delete(id);
|
|
182
|
+
clearTimeout(pending.timeoutHandle);
|
|
183
|
+
if (response.error) {
|
|
184
|
+
pending.reject(new Error(response.error));
|
|
185
|
+
}
|
|
186
|
+
else {
|
|
187
|
+
pending.resolve(response);
|
|
188
|
+
}
|
|
189
|
+
}
|
|
190
|
+
else {
|
|
191
|
+
log.warn('received response for unknown request', { id });
|
|
192
|
+
}
|
|
193
|
+
}
|
|
194
|
+
catch (err) {
|
|
195
|
+
log.warn('failed to parse subprocess stdout line', {
|
|
196
|
+
line: trimmed.slice(0, 200),
|
|
197
|
+
error: String(err),
|
|
198
|
+
});
|
|
199
|
+
}
|
|
200
|
+
}
|
|
201
|
+
}
|
|
202
|
+
resetIdleTimer() {
|
|
203
|
+
if (this.idleTimer) {
|
|
204
|
+
clearTimeout(this.idleTimer);
|
|
205
|
+
}
|
|
206
|
+
const config = getConfig();
|
|
207
|
+
this.idleTimer = setTimeout(() => {
|
|
208
|
+
log.info('embedding subprocess idle timeout, shutting down');
|
|
209
|
+
this.shutdown();
|
|
210
|
+
}, config.embeddingIdleTimeoutMs);
|
|
211
|
+
}
|
|
212
|
+
}
|
|
213
|
+
//# sourceMappingURL=subprocess.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"subprocess.js","sourceRoot":"","sources":["../../src/embedding/subprocess.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,KAAK,EAAqB,MAAM,oBAAoB,CAAC;AAC9D,OAAO,EAAE,OAAO,EAAE,IAAI,EAAE,MAAM,WAAW,CAAC;AAC1C,OAAO,EAAE,aAAa,EAAE,MAAM,UAAU,CAAC;AACzC,OAAO,EAAE,SAAS,EAAE,MAAM,cAAc,CAAC;AACzC,OAAO,EAAE,YAAY,EAAE,MAAM,cAAc,CAAC;AAE5C,MAAM,GAAG,GAAG,YAAY,CAAC,WAAW,CAAC,CAAC;AAEtC,MAAM,SAAS,GAAG,OAAO,CAAC,aAAa,CAAC,MAAM,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC,CAAC;AAC1D,MAAM,WAAW,GAAG,IAAI,CAAC,SAAS,EAAE,IAAI,EAAE,SAAS,EAAE,qBAAqB,CAAC,CAAC;AAmB5E,MAAM,OAAO,mBAAmB;IACtB,IAAI,GAAwB,IAAI,CAAC;IACjC,OAAO,GAAG,IAAI,GAAG,EAA0B,CAAC;IAC5C,SAAS,GAAmB,IAAI,CAAC;IACjC,IAAI,GAAkB,IAAI,CAAC;IAC3B,SAAS,GAAkB,IAAI,CAAC;IAChC,YAAY,GAAyB,IAAI,CAAC;IAC1C,YAAY,GAAG,EAAE,CAAC;IAClB,SAAS,GAAyC,IAAI,CAAC;IACvD,cAAc,CAAS;IACvB,gBAAgB,CAAS;IAEjC,YAAY,OAA2B;QACrC,IAAI,CAAC,cAAc,GAAG,OAAO,EAAE,cAAc,IAAI,KAAK,CAAC;QACvD,IAAI,CAAC,gBAAgB,GAAG,OAAO,EAAE,gBAAgB,IAAI,KAAK,CAAC;IAC7D,CAAC;IAED,WAAW;QACT,OAAO,IAAI,CAAC,SAAS,KAAK,IAAI,CAAC;IACjC,CAAC;IAED,OAAO;QACL,OAAO,IAAI,CAAC,IAAI,CAAC;IACnB,CAAC;IAED,QAAQ;QACN,OAAO,IAAI,CAAC,SAAS,CAAC;IACxB,CAAC;IAED,KAAK,CAAC,KAAK,CAAC,EAAU,EAAE,IAAY;QAClC,IAAI,CAAC;YACH,IAAI,CAAC,IAAI,CAAC,IAAI,IAAI,CAAC,IAAI,CAAC,YAAY,EAAE,CAAC;gBACrC,IAAI,CAAC,YAAY,GAAG,IAAI,CAAC,YAAY,EAAE,CAAC;YAC1C,CAAC;YACD,IAAI,IAAI,CAAC,YAAY,EAAE,CAAC;gBACtB,MAAM,IAAI,CAAC,YAAY,CAAC;YAC1B,CAAC;YAED,IAAI,CAAC,IAAI,CAAC,IAAI,IAAI,IAAI,CAAC,SAAS,KAAK,KAAK,EAAE,CAAC;gBAC3C,MAAM,IAAI,KAAK,CAAC,oCAAoC,CAAC,CAAC;YACxD,CAAC;YAED,IAAI,CAAC,cAAc,EAAE,CAAC;YAEtB,MAAM,MAAM,GAAG,SAAS,EAAE,CAAC;YAC3B,MAAM,aAAa,GAAG,IAAI,CAAC,KAAK,CAAC,CAAC,EAAE,MAAM,CAAC,sBAAsB,CAAC,CAAC;YAEnE,OAAO,MAAM,IAAI,OAAO,CAAoB,CAAC,OAAO,EAAE,MAAM,EAAE,EAAE;gBAC9D,MAAM,aAAa,GAAG,UAAU,CAAC,GAAG,EAAE;oBACpC,IAAI,CAAC,OAAO,CAAC,MAAM,CAAC,EAAE,CAAC,CAAC;oBACxB,MAAM,CAAC,IAAI,KAAK,CAAC,qBAAqB,EAAE,oBAAoB,IAAI,CAAC,gBAAgB,IAAI,CAAC,CAAC,CAAC;gBAC1F,CAAC,EAAE,IAAI,CAAC,gBAAgB,CAAC,CAAC;gBAE1B,IAAI,CAAC,OAAO,CAAC,GAAG,CAAC,EAAE,EAAE,EAAE,OAAO,EAAE,MAAM,EAAE,aAAa,EAAE,CAAC,CAAC;gBAEzD,MAAM,OAAO,GAAG,IAAI,CAAC,SAAS,CAAC,EAAE,EAAE,EAAE,IAAI,EAAE,aAAa,EAAE,CAAC,GAAG,IAAI,CAAC;gBACnE,IAAI,CAAC,IAAK,CAAC,KAAM,CAAC,KAAK,CAAC,OAAO,CAAC,CAAC;YACnC,CAAC,CAAC,CAAC;QACL,CAAC;QAAC,OAAO,GAAG,EAAE,CAAC;YACb,GAAG,CAAC,KAAK,CAAC,cAAc,EAAE,EAAE,EAAE,EAAE,KAAK,EAAE,MAAM,CAAC,GAAG,CAAC,EAAE,CAAC,CAAC;YACtD,MAAM,GAAG,CAAC;QACZ,CAAC;IACH,CAAC;IAED,QAAQ;QACN,IAAI,CAAC;YACH,IAAI,IAAI,CAAC,SAAS,EAAE,CAAC;gBACnB,YAAY,CAAC,IAAI,CAAC,SAAS,CAAC,CAAC;gBAC7B,IAAI,CAAC,SAAS,GAAG,IAAI,CAAC;YACxB,CAAC;YAED,KAAK,MAAM,CAAC,EAAE,EAAE,OAAO,CAAC,IAAI,IAAI,CAAC,OAAO,CAAC,OAAO,EAAE,EAAE,CAAC;gBACnD,YAAY,CAAC,OAAO,CAAC,aAAa,CAAC,CAAC;gBACpC,OAAO,CAAC,MAAM,CAAC,IAAI,KAAK,CAAC,0BAA0B,CAAC,CAAC,CAAC;gBACtD,IAAI,CAAC,OAAO,CAAC,MAAM,CAAC,EAAE,CAAC,CAAC;YAC1B,CAAC;YAED,IAAI,IAAI,CAAC,IAAI,EAAE,CAAC;gBACd,IAAI,CAAC,IAAI,CAAC,KAAK,EAAE,GAAG,EAAE,CAAC;gBACvB,IAAI,CAAC,IAAI,CAAC,IAAI,EAAE,CAAC;gBACjB,IAAI,CAAC,IAAI,GAAG,IAAI,CAAC;YACnB,CAAC;YAED,IAAI,CAAC,YAAY,GAAG,IAAI,CAAC;YACzB,GAAG,CAAC,IAAI,CAAC,gCAAgC,CAAC,CAAC;QAC7C,CAAC;QAAC,OAAO,GAAG,EAAE,CAAC;YACb,GAAG,CAAC,KAAK,CAAC,gBAAgB,EAAE,EAAE,KAAK,EAAE,MAAM,CAAC,GAAG,CAAC,EAAE,CAAC,CAAC;QACtD,CAAC;IACH,CAAC;IAEO,KAAK,CAAC,YAAY;QACxB,MAAM,MAAM,GAAG,SAAS,EAAE,CAAC;QAE3B,IAAI,CAAC;YACH,GAAG,CAAC,IAAI,CAAC,+BAA+B,EAAE,EAAE,KAAK,EAAE,MAAM,CAAC,cAAc,EAAE,CAAC,CAAC;YAE5E,MAAM,IAAI,GAAG,KAAK,CAAC,SAAS,EAAE;gBAC5B,WAAW;gBACX,MAAM,CAAC,cAAc;gBACrB,MAAM,CAAC,MAAM,CAAC,sBAAsB,CAAC;aACtC,EAAE;gBACD,KAAK,EAAE,CAAC,MAAM,EAAE,MAAM,EAAE,MAAM,CAAC;aAChC,CAAC,CAAC;YAEH,IAAI,CAAC,IAAI,GAAG,IAAI,CAAC;YAEjB,MAAM,IAAI,OAAO,CAAO,CAAC,OAAO,EAAE,MAAM,EAAE,EAAE;gBAC1C,MAAM,YAAY,GAAG,UAAU,CAAC,GAAG,EAAE;oBACnC,MAAM,CAAC,IAAI,KAAK,CAAC,4CAA4C,IAAI,CAAC,cAAc,IAAI,CAAC,CAAC,CAAC;oBACvF,IAAI,CAAC,IAAI,EAAE,CAAC;gBACd,CAAC,EAAE,IAAI,CAAC,cAAc,CAAC,CAAC;gBAExB,IAAI,SAAS,GAAG,EAAE,CAAC;gBAEnB,IAAI,CAAC,MAAO,CAAC,EAAE,CAAC,MAAM,EAAE,CAAC,IAAY,EAAE,EAAE;oBACvC,SAAS,IAAI,IAAI,CAAC,QAAQ,EAAE,CAAC;oBAC7B,MAAM,KAAK,GAAG,SAAS,CAAC,KAAK,CAAC,IAAI,CAAC,CAAC;oBAEpC,KAAK,MAAM,IAAI,IAAI,KAAK,EAAE,CAAC;wBACzB,IAAI,IAAI,CAAC,UAAU,CAAC,OAAO,CAAC,EAAE,CAAC;4BAC7B,YAAY,CAAC,YAAY,CAAC,CAAC;4BAC3B,MAAM,UAAU,GAAG,IAAI,CAAC,KAAK,CAAC,aAAa,CAAC,CAAC;4BAC7C,MAAM,SAAS,GAAG,IAAI,CAAC,KAAK,CAAC,YAAY,CAAC,CAAC;4BAC3C,IAAI,UAAU;gCAAE,IAAI,CAAC,SAAS,GAAG,UAAU,CAAC,CAAC,CAAC,CAAC;4BAC/C,IAAI,SAAS;gCAAE,IAAI,CAAC,IAAI,GAAG,QAAQ,CAAC,SAAS,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC;4BAEtD,IAAI,CAAC,SAAS,GAAG,IAAI,CAAC;4BACtB,GAAG,CAAC,IAAI,CAAC,4BAA4B,EAAE;gCACrC,KAAK,EAAE,IAAI,CAAC,SAAS;gCACrB,IAAI,EAAE,IAAI,CAAC,IAAI;6BAChB,CAAC,CAAC;4BACH,OAAO,EAAE,CAAC;4BACV,OAAO;wBACT,CAAC;wBACD,IAAI,IAAI,CAAC,UAAU,CAAC,OAAO,CAAC,EAAE,CAAC;4BAC7B,YAAY,CAAC,YAAY,CAAC,CAAC;4BAC3B,IAAI,CAAC,SAAS,GAAG,KAAK,CAAC;4BACvB,MAAM,CAAC,IAAI,KAAK,CAAC,yBAAyB,IAAI,EAAE,CAAC,CAAC,CAAC;4BACnD,OAAO;wBACT,CAAC;oBACH,CAAC;gBACH,CAAC,CAAC,CAAC;gBAEH,IAAI,CAAC,EAAE,CAAC,OAAO,EAAE,CAAC,GAAG,EAAE,EAAE;oBACvB,YAAY,CAAC,YAAY,CAAC,CAAC;oBAC3B,IAAI,CAAC,SAAS,GAAG,KAAK,CAAC;oBACvB,IAAI,CAAC,IAAI,GAAG,IAAI,CAAC;oBACjB,IAAI,CAAC,YAAY,GAAG,IAAI,CAAC;oBACzB,GAAG,CAAC,KAAK,CAAC,4BAA4B,EAAE,EAAE,KAAK,EAAE,MAAM,CAAC,GAAG,CAAC,EAAE,CAAC,CAAC;oBAChE,MAAM,CAAC,GAAG,CAAC,CAAC;gBACd,CAAC,CAAC,CAAC;gBAEH,IAAI,CAAC,EAAE,CAAC,OAAO,EAAE,CAAC,IAAI,EAAE,EAAE;oBACxB,YAAY,CAAC,YAAY,CAAC,CAAC;oBAC3B,IAAI,CAAC,SAAS,GAAG,KAAK,CAAC;oBACvB,IAAI,CAAC,IAAI,GAAG,IAAI,CAAC;oBACjB,IAAI,CAAC,YAAY,GAAG,IAAI,CAAC;oBAEzB,KAAK,MAAM,CAAC,EAAE,EAAE,OAAO,CAAC,IAAI,IAAI,CAAC,OAAO,CAAC,OAAO,EAAE,EAAE,CAAC;wBACnD,YAAY,CAAC,OAAO,CAAC,aAAa,CAAC,CAAC;wBACpC,OAAO,CAAC,MAAM,CAAC,IAAI,KAAK,CAAC,yCAAyC,IAAI,EAAE,CAAC,CAAC,CAAC;wBAC3E,IAAI,CAAC,OAAO,CAAC,MAAM,CAAC,EAAE,CAAC,CAAC;oBAC1B,CAAC;oBAED,IAAI,IAAI,KAAK,CAAC,IAAI,IAAI,KAAK,IAAI,EAAE,CAAC;wBAChC,GAAG,CAAC,IAAI,CAAC,6BAA6B,EAAE,EAAE,IAAI,EAAE,CAAC,CAAC;wBAClD,MAAM,CAAC,IAAI,KAAK,CAAC,yCAAyC,IAAI,EAAE,CAAC,CAAC,CAAC;oBACrE,CAAC;gBACH,CAAC,CAAC,CAAC;YACL,CAAC,CAAC,CAAC;YAEH,IAAI,CAAC,MAAO,CAAC,EAAE,CAAC,MAAM,EAAE,CAAC,IAAY,EAAE,EAAE;gBACvC,IAAI,CAAC,gBAAgB,CAAC,IAAI,CAAC,QAAQ,EAAE,CAAC,CAAC;YACzC,CAAC,CAAC,CAAC;YAEH,IAAI,CAAC,cAAc,EAAE,CAAC;QAExB,CAAC;QAAC,OAAO,GAAG,EAAE,CAAC;YACb,IAAI,CAAC,SAAS,GAAG,KAAK,CAAC;YACvB,IAAI,CAAC,IAAI,GAAG,IAAI,CAAC;YACjB,IAAI,CAAC,YAAY,GAAG,IAAI,CAAC;YACzB,GAAG,CAAC,KAAK,CAAC,sCAAsC,EAAE,EAAE,KAAK,EAAE,MAAM,CAAC,GAAG,CAAC,EAAE,CAAC,CAAC;YAC1E,MAAM,GAAG,CAAC;QACZ,CAAC;IACH,CAAC;IAEO,gBAAgB,CAAC,IAAY;QACnC,IAAI,CAAC,YAAY,IAAI,IAAI,CAAC;QAE1B,MAAM,KAAK,GAAG,IAAI,CAAC,YAAY,CAAC,KAAK,CAAC,IAAI,CAAC,CAAC;QAC5C,IAAI,CAAC,YAAY,GAAG,KAAK,CAAC,GAAG,EAAE,IAAI,EAAE,CAAC;QAEtC,KAAK,MAAM,IAAI,IAAI,KAAK,EAAE,CAAC;YACzB,MAAM,OAAO,GAAG,IAAI,CAAC,IAAI,EAAE,CAAC;YAC5B,IAAI,CAAC,OAAO;gBAAE,SAAS;YAEvB,IAAI,CAAC;gBACH,MAAM,QAAQ,GAAG,IAAI,CAAC,KAAK,CAAC,OAAO,CAAsB,CAAC;gBAC1D,MAAM,EAAE,GAAG,QAAQ,CAAC,EAAE,CAAC;gBAEvB,IAAI,EAAE,IAAI,IAAI,CAAC,OAAO,CAAC,GAAG,CAAC,EAAE,CAAC,EAAE,CAAC;oBAC/B,MAAM,OAAO,GAAG,IAAI,CAAC,OAAO,CAAC,GAAG,CAAC,EAAE,CAAE,CAAC;oBACtC,IAAI,CAAC,OAAO,CAAC,MAAM,CAAC,EAAE,CAAC,CAAC;oBACxB,YAAY,CAAC,OAAO,CAAC,aAAa,CAAC,CAAC;oBAEpC,IAAI,QAAQ,CAAC,KAAK,EAAE,CAAC;wBACnB,OAAO,CAAC,MAAM,CAAC,IAAI,KAAK,CAAC,QAAQ,CAAC,KAAK,CAAC,CAAC,CAAC;oBAC5C,CAAC;yBAAM,CAAC;wBACN,OAAO,CAAC,OAAO,CAAC,QAAQ,CAAC,CAAC;oBAC5B,CAAC;gBACH,CAAC;qBAAM,CAAC;oBACN,GAAG,CAAC,IAAI,CAAC,uCAAuC,EAAE,EAAE,EAAE,EAAE,CAAC,CAAC;gBAC5D,CAAC;YACH,CAAC;YAAC,OAAO,GAAG,EAAE,CAAC;gBACb,GAAG,CAAC,IAAI,CAAC,wCAAwC,EAAE;oBACjD,IAAI,EAAE,OAAO,CAAC,KAAK,CAAC,CAAC,EAAE,GAAG,CAAC;oBAC3B,KAAK,EAAE,MAAM,CAAC,GAAG,CAAC;iBACnB,CAAC,CAAC;YACL,CAAC;QACH,CAAC;IACH,CAAC;IAEO,cAAc;QACpB,IAAI,IAAI,CAAC,SAAS,EAAE,CAAC;YACnB,YAAY,CAAC,IAAI,CAAC,SAAS,CAAC,CAAC;QAC/B,CAAC;QAED,MAAM,MAAM,GAAG,SAAS,EAAE,CAAC;QAC3B,IAAI,CAAC,SAAS,GAAG,UAAU,CAAC,GAAG,EAAE;YAC/B,GAAG,CAAC,IAAI,CAAC,kDAAkD,CAAC,CAAC;YAC7D,IAAI,CAAC,QAAQ,EAAE,CAAC;QAClB,CAAC,EAAE,MAAM,CAAC,sBAAsB,CAAC,CAAC;IACpC,CAAC;CACF"}
|
|
@@ -0,0 +1,26 @@
|
|
|
1
|
+
export interface VectorEntry {
|
|
2
|
+
url: string;
|
|
3
|
+
vector: Float32Array;
|
|
4
|
+
}
|
|
5
|
+
export interface SimilarResult {
|
|
6
|
+
url: string;
|
|
7
|
+
score: number;
|
|
8
|
+
}
|
|
9
|
+
export declare class VectorIndex {
|
|
10
|
+
private entries;
|
|
11
|
+
size(): number;
|
|
12
|
+
add(url: string, vector: Float32Array): void;
|
|
13
|
+
remove(url: string): boolean;
|
|
14
|
+
has(url: string): boolean;
|
|
15
|
+
get(url: string): Float32Array | undefined;
|
|
16
|
+
findSimilar(queryVector: Float32Array, topK: number, excludeUrls?: Set<string>): SimilarResult[];
|
|
17
|
+
clear(): void;
|
|
18
|
+
loadFromBuffers(entries: Array<{
|
|
19
|
+
url: string;
|
|
20
|
+
embedding: Buffer;
|
|
21
|
+
dims: number;
|
|
22
|
+
}>): number;
|
|
23
|
+
getAllUrls(): string[];
|
|
24
|
+
}
|
|
25
|
+
export declare function cosineSimilarity(a: Float32Array, b: Float32Array): number;
|
|
26
|
+
//# sourceMappingURL=vector-index.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"vector-index.d.ts","sourceRoot":"","sources":["../../src/embedding/vector-index.ts"],"names":[],"mappings":"AAIA,MAAM,WAAW,WAAW;IAC1B,GAAG,EAAE,MAAM,CAAC;IACZ,MAAM,EAAE,YAAY,CAAC;CACtB;AAED,MAAM,WAAW,aAAa;IAC5B,GAAG,EAAE,MAAM,CAAC;IACZ,KAAK,EAAE,MAAM,CAAC;CACf;AAED,qBAAa,WAAW;IACtB,OAAO,CAAC,OAAO,CAAmC;IAElD,IAAI,IAAI,MAAM;IAId,GAAG,CAAC,GAAG,EAAE,MAAM,EAAE,MAAM,EAAE,YAAY,GAAG,IAAI;IAI5C,MAAM,CAAC,GAAG,EAAE,MAAM,GAAG,OAAO;IAI5B,GAAG,CAAC,GAAG,EAAE,MAAM,GAAG,OAAO;IAIzB,GAAG,CAAC,GAAG,EAAE,MAAM,GAAG,YAAY,GAAG,SAAS;IAI1C,WAAW,CAAC,WAAW,EAAE,YAAY,EAAE,IAAI,EAAE,MAAM,EAAE,WAAW,CAAC,EAAE,GAAG,CAAC,MAAM,CAAC,GAAG,aAAa,EAAE;IAoBhG,KAAK,IAAI,IAAI;IAIb,eAAe,CAAC,OAAO,EAAE,KAAK,CAAC;QAAE,GAAG,EAAE,MAAM,CAAC;QAAC,SAAS,EAAE,MAAM,CAAC;QAAC,IAAI,EAAE,MAAM,CAAA;KAAE,CAAC,GAAG,MAAM;IAkBzF,UAAU,IAAI,MAAM,EAAE;CAGvB;AAED,wBAAgB,gBAAgB,CAAC,CAAC,EAAE,YAAY,EAAE,CAAC,EAAE,YAAY,GAAG,MAAM,CAkBzE"}
|