@vainplex/openclaw-knowledge-engine 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/ARCHITECTURE.md +374 -0
- package/dist/index.d.ts +5 -0
- package/dist/index.js +29 -0
- package/dist/src/config.d.ts +15 -0
- package/dist/src/config.js +153 -0
- package/dist/src/embeddings.d.ts +23 -0
- package/dist/src/embeddings.js +63 -0
- package/dist/src/entity-extractor.d.ts +30 -0
- package/dist/src/entity-extractor.js +123 -0
- package/dist/src/fact-store.d.ts +77 -0
- package/dist/src/fact-store.js +222 -0
- package/dist/src/hooks.d.ts +24 -0
- package/dist/src/hooks.js +94 -0
- package/dist/src/http-client.d.ts +9 -0
- package/dist/src/http-client.js +58 -0
- package/dist/src/llm-enhancer.d.ts +44 -0
- package/dist/src/llm-enhancer.js +166 -0
- package/dist/src/maintenance.d.ts +26 -0
- package/dist/src/maintenance.js +87 -0
- package/dist/src/patterns.d.ts +5 -0
- package/dist/src/patterns.js +69 -0
- package/dist/src/storage.d.ts +41 -0
- package/dist/src/storage.js +110 -0
- package/dist/src/types.d.ts +122 -0
- package/dist/src/types.js +2 -0
- package/index.ts +38 -0
- package/openclaw.plugin.json +125 -0
- package/package.json +36 -0
- package/src/config.ts +180 -0
- package/src/embeddings.ts +82 -0
- package/src/entity-extractor.ts +137 -0
- package/src/fact-store.ts +260 -0
- package/src/hooks.ts +125 -0
- package/src/http-client.ts +74 -0
- package/src/llm-enhancer.ts +187 -0
- package/src/maintenance.ts +102 -0
- package/src/patterns.ts +90 -0
- package/src/storage.ts +122 -0
- package/src/types.ts +144 -0
- package/test/config.test.ts +152 -0
- package/test/embeddings.test.ts +118 -0
- package/test/entity-extractor.test.ts +121 -0
- package/test/fact-store.test.ts +266 -0
- package/test/hooks.test.ts +120 -0
- package/test/http-client.test.ts +68 -0
- package/test/llm-enhancer.test.ts +132 -0
- package/test/maintenance.test.ts +117 -0
- package/test/patterns.test.ts +123 -0
- package/test/storage.test.ts +86 -0
- package/tsconfig.json +26 -0
|
@@ -0,0 +1,260 @@
|
|
|
1
|
+
// src/fact-store.ts
|
|
2
|
+
|
|
3
|
+
import { randomUUID } from 'node:crypto';
|
|
4
|
+
import { AtomicStorage } from './storage.js';
|
|
5
|
+
import type { Fact, FactsData, KnowledgeConfig, Logger } from './types.js';
|
|
6
|
+
|
|
7
|
+
/**
|
|
8
|
+
* Manages an in-memory and on-disk store of structured facts.
|
|
9
|
+
* Provides methods for loading, querying, modifying, and persisting facts.
|
|
10
|
+
*/
|
|
11
|
+
export class FactStore {
|
|
12
|
+
private readonly storage: AtomicStorage;
|
|
13
|
+
private readonly config: KnowledgeConfig['storage'];
|
|
14
|
+
private readonly logger: Logger;
|
|
15
|
+
private facts: Map<string, Fact> = new Map();
|
|
16
|
+
private isLoaded: boolean = false;
|
|
17
|
+
|
|
18
|
+
public readonly commit: () => Promise<void>;
|
|
19
|
+
|
|
20
|
+
constructor(
|
|
21
|
+
workspace: string,
|
|
22
|
+
config: KnowledgeConfig['storage'],
|
|
23
|
+
logger: Logger
|
|
24
|
+
) {
|
|
25
|
+
this.storage = new AtomicStorage(workspace, logger);
|
|
26
|
+
this.config = config;
|
|
27
|
+
this.logger = logger;
|
|
28
|
+
|
|
29
|
+
// Create a debounced version of the persist method.
|
|
30
|
+
this.commit = AtomicStorage.debounce(
|
|
31
|
+
this.persist.bind(this),
|
|
32
|
+
this.config.writeDebounceMs
|
|
33
|
+
);
|
|
34
|
+
}
|
|
35
|
+
|
|
36
|
+
/**
|
|
37
|
+
* Immediately flushes any pending debounced writes.
|
|
38
|
+
* Useful in tests and before shutdown to ensure data is persisted.
|
|
39
|
+
*/
|
|
40
|
+
public async flush(): Promise<void> {
|
|
41
|
+
if (this.isLoaded) {
|
|
42
|
+
await this.persist();
|
|
43
|
+
}
|
|
44
|
+
}
|
|
45
|
+
|
|
46
|
+
/**
|
|
47
|
+
* Loads facts from the `facts.json` file into the in-memory store.
|
|
48
|
+
* If the file doesn't exist, it initializes an empty store.
|
|
49
|
+
*/
|
|
50
|
+
public async load(): Promise<void> {
|
|
51
|
+
if (this.isLoaded) {
|
|
52
|
+
this.logger.debug('Fact store is already loaded.');
|
|
53
|
+
return;
|
|
54
|
+
}
|
|
55
|
+
|
|
56
|
+
await this.storage.init();
|
|
57
|
+
const data = await this.storage.readJson<FactsData>('facts.json');
|
|
58
|
+
if (data && Array.isArray(data.facts)) {
|
|
59
|
+
this.facts = new Map(data.facts.map(fact => [fact.id, fact]));
|
|
60
|
+
this.logger.info(`Loaded ${this.facts.size} facts from storage.`);
|
|
61
|
+
} else {
|
|
62
|
+
this.logger.info('No existing fact store found. Initializing a new one.');
|
|
63
|
+
this.facts = new Map();
|
|
64
|
+
}
|
|
65
|
+
this.isLoaded = true;
|
|
66
|
+
}
|
|
67
|
+
|
|
68
|
+
/**
|
|
69
|
+
* Adds a new fact to the store or updates an existing one based on content.
|
|
70
|
+
* @param newFactData The data for the new fact, excluding metadata fields.
|
|
71
|
+
* @returns The newly created or found Fact object.
|
|
72
|
+
*/
|
|
73
|
+
public addFact(
|
|
74
|
+
newFactData: Omit<Fact, 'id' | 'createdAt' | 'lastAccessed' | 'relevance'>
|
|
75
|
+
): Fact {
|
|
76
|
+
if (!this.isLoaded) {
|
|
77
|
+
throw new Error('FactStore has not been loaded yet. Call load() first.');
|
|
78
|
+
}
|
|
79
|
+
const now = new Date().toISOString();
|
|
80
|
+
|
|
81
|
+
// Check if a similar fact already exists to avoid duplicates
|
|
82
|
+
for (const existingFact of this.facts.values()) {
|
|
83
|
+
if (
|
|
84
|
+
existingFact.subject === newFactData.subject &&
|
|
85
|
+
existingFact.predicate === newFactData.predicate &&
|
|
86
|
+
existingFact.object === newFactData.object
|
|
87
|
+
) {
|
|
88
|
+
// Fact already exists, let's just boost its relevance and update timestamp
|
|
89
|
+
existingFact.relevance = this.boostRelevance(existingFact.relevance);
|
|
90
|
+
existingFact.lastAccessed = now;
|
|
91
|
+
this.commit();
|
|
92
|
+
return existingFact;
|
|
93
|
+
}
|
|
94
|
+
}
|
|
95
|
+
|
|
96
|
+
const newFact: Fact = {
|
|
97
|
+
...newFactData,
|
|
98
|
+
id: randomUUID(),
|
|
99
|
+
createdAt: now,
|
|
100
|
+
lastAccessed: now,
|
|
101
|
+
relevance: 1.0, // New facts start with maximum relevance
|
|
102
|
+
};
|
|
103
|
+
|
|
104
|
+
this.facts.set(newFact.id, newFact);
|
|
105
|
+
this.prune(); // Check if we need to prune old facts
|
|
106
|
+
this.commit();
|
|
107
|
+
return newFact;
|
|
108
|
+
}
|
|
109
|
+
|
|
110
|
+
/**
|
|
111
|
+
* Retrieves a fact by its unique ID.
|
|
112
|
+
* @param id The UUID of the fact.
|
|
113
|
+
* @returns The Fact object, or undefined if not found.
|
|
114
|
+
*/
|
|
115
|
+
public getFact(id: string): Fact | undefined {
|
|
116
|
+
const fact = this.facts.get(id);
|
|
117
|
+
if (fact) {
|
|
118
|
+
fact.lastAccessed = new Date().toISOString();
|
|
119
|
+
fact.relevance = this.boostRelevance(fact.relevance);
|
|
120
|
+
this.commit();
|
|
121
|
+
}
|
|
122
|
+
return fact;
|
|
123
|
+
}
|
|
124
|
+
|
|
125
|
+
/**
|
|
126
|
+
* Queries the fact store based on subject, predicate, or object.
|
|
127
|
+
* @param query An object with optional subject, predicate, and/or object to match.
|
|
128
|
+
* @returns An array of matching facts, sorted by relevance.
|
|
129
|
+
*/
|
|
130
|
+
public query(query: { subject?: string; predicate?: string; object?: string }): Fact[] {
|
|
131
|
+
const results: Fact[] = [];
|
|
132
|
+
for (const fact of this.facts.values()) {
|
|
133
|
+
const subjectMatch = !query.subject || fact.subject === query.subject;
|
|
134
|
+
const predicateMatch = !query.predicate || fact.predicate === query.predicate;
|
|
135
|
+
const objectMatch = !query.object || fact.object === query.object;
|
|
136
|
+
|
|
137
|
+
if (subjectMatch && predicateMatch && objectMatch) {
|
|
138
|
+
results.push(fact);
|
|
139
|
+
}
|
|
140
|
+
}
|
|
141
|
+
// Sort by relevance, descending
|
|
142
|
+
return results.sort((a, b) => b.relevance - a.relevance);
|
|
143
|
+
}
|
|
144
|
+
|
|
145
|
+
/**
|
|
146
|
+
* Applies a decay factor to the relevance score of all facts.
|
|
147
|
+
* @param rate The decay rate (e.g., 0.05 for 5%).
|
|
148
|
+
* @returns An object with the count of decayed facts.
|
|
149
|
+
*/
|
|
150
|
+
public decayFacts(rate: number): { decayedCount: number } {
|
|
151
|
+
let decayedCount = 0;
|
|
152
|
+
const minRelevance = 0.1; // Floor to prevent facts from disappearing completely
|
|
153
|
+
|
|
154
|
+
for (const fact of this.facts.values()) {
|
|
155
|
+
const newRelevance = fact.relevance * (1 - rate);
|
|
156
|
+
if (newRelevance !== fact.relevance) {
|
|
157
|
+
fact.relevance = Math.max(minRelevance, newRelevance);
|
|
158
|
+
decayedCount++;
|
|
159
|
+
}
|
|
160
|
+
}
|
|
161
|
+
|
|
162
|
+
if (decayedCount > 0) {
|
|
163
|
+
this.logger.info(`Applied decay rate of ${rate * 100}% to ${decayedCount} facts.`);
|
|
164
|
+
this.commit();
|
|
165
|
+
}
|
|
166
|
+
return { decayedCount };
|
|
167
|
+
}
|
|
168
|
+
|
|
169
|
+
/**
|
|
170
|
+
* Persists the current in-memory fact store to `facts.json`.
|
|
171
|
+
*/
|
|
172
|
+
private async persist(): Promise<void> {
|
|
173
|
+
if (!this.isLoaded) {
|
|
174
|
+
this.logger.warn('Attempted to persist fact store before it was loaded. Aborting.');
|
|
175
|
+
return;
|
|
176
|
+
}
|
|
177
|
+
|
|
178
|
+
const data: FactsData = {
|
|
179
|
+
updated: new Date().toISOString(),
|
|
180
|
+
facts: Array.from(this.facts.values()),
|
|
181
|
+
};
|
|
182
|
+
|
|
183
|
+
try {
|
|
184
|
+
await this.storage.writeJson('facts.json', data);
|
|
185
|
+
this.logger.debug(`Successfully persisted ${data.facts.length} facts.`);
|
|
186
|
+
} catch (err) {
|
|
187
|
+
this.logger.error('Failed to persist fact store.', err as Error);
|
|
188
|
+
}
|
|
189
|
+
}
|
|
190
|
+
|
|
191
|
+
/**
|
|
192
|
+
* Removes the least relevant facts if the store exceeds its configured max size.
|
|
193
|
+
*/
|
|
194
|
+
private prune(): void {
|
|
195
|
+
const factCount = this.facts.size;
|
|
196
|
+
if (factCount <= this.config.maxFacts) {
|
|
197
|
+
return;
|
|
198
|
+
}
|
|
199
|
+
|
|
200
|
+
const factsToPrune = factCount - this.config.maxFacts;
|
|
201
|
+
if (factsToPrune <= 0) return;
|
|
202
|
+
|
|
203
|
+
// Get all facts, sort by relevance (ascending) and then by lastAccessed (ascending)
|
|
204
|
+
const sortedFacts = Array.from(this.facts.values()).sort((a, b) => {
|
|
205
|
+
if (a.relevance !== b.relevance) {
|
|
206
|
+
return a.relevance - b.relevance;
|
|
207
|
+
}
|
|
208
|
+
return new Date(a.lastAccessed).getTime() - new Date(b.lastAccessed).getTime();
|
|
209
|
+
});
|
|
210
|
+
|
|
211
|
+
for (let i = 0; i < factsToPrune; i++) {
|
|
212
|
+
this.facts.delete(sortedFacts[i].id);
|
|
213
|
+
}
|
|
214
|
+
|
|
215
|
+
this.logger.info(`Pruned ${factsToPrune} least relevant facts to maintain store size.`);
|
|
216
|
+
}
|
|
217
|
+
|
|
218
|
+
/**
|
|
219
|
+
* Boosts the relevance of a fact upon access.
|
|
220
|
+
* @param currentRelevance The current relevance score.
|
|
221
|
+
* @returns The new, boosted relevance score.
|
|
222
|
+
*/
|
|
223
|
+
private boostRelevance(currentRelevance: number): number {
|
|
224
|
+
// Push the relevance 50% closer to 1.0
|
|
225
|
+
const boost = (1.0 - currentRelevance) * 0.5;
|
|
226
|
+
return Math.min(1.0, currentRelevance + boost);
|
|
227
|
+
}
|
|
228
|
+
|
|
229
|
+
/**
|
|
230
|
+
* Returns a list of all facts that have not been embedded yet.
|
|
231
|
+
*/
|
|
232
|
+
public getUnembeddedFacts(): Fact[] {
|
|
233
|
+
const results: Fact[] = [];
|
|
234
|
+
for (const fact of this.facts.values()) {
|
|
235
|
+
if (!fact.embedded) {
|
|
236
|
+
results.push(fact);
|
|
237
|
+
}
|
|
238
|
+
}
|
|
239
|
+
return results;
|
|
240
|
+
}
|
|
241
|
+
|
|
242
|
+
/**
|
|
243
|
+
* Marks a list of facts as having been embedded.
|
|
244
|
+
* @param factIds An array of fact IDs to update.
|
|
245
|
+
*/
|
|
246
|
+
public markFactsAsEmbedded(factIds: string[]): void {
|
|
247
|
+
const now = new Date().toISOString();
|
|
248
|
+
let updatedCount = 0;
|
|
249
|
+
for (const id of factIds) {
|
|
250
|
+
const fact = this.facts.get(id);
|
|
251
|
+
if (fact) {
|
|
252
|
+
fact.embedded = now;
|
|
253
|
+
updatedCount++;
|
|
254
|
+
}
|
|
255
|
+
}
|
|
256
|
+
if (updatedCount > 0) {
|
|
257
|
+
this.commit();
|
|
258
|
+
}
|
|
259
|
+
}
|
|
260
|
+
}
|
package/src/hooks.ts
ADDED
|
@@ -0,0 +1,125 @@
|
|
|
1
|
+
// src/hooks.ts
|
|
2
|
+
|
|
3
|
+
import {
|
|
4
|
+
OpenClawPluginApi,
|
|
5
|
+
HookEvent,
|
|
6
|
+
KnowledgeConfig,
|
|
7
|
+
Logger,
|
|
8
|
+
} from './types.js';
|
|
9
|
+
|
|
10
|
+
import { EntityExtractor } from './entity-extractor.js';
|
|
11
|
+
import { FactStore } from './fact-store.js';
|
|
12
|
+
import { LlmEnhancer } from './llm-enhancer.js';
|
|
13
|
+
import { Maintenance } from './maintenance.js';
|
|
14
|
+
import { Embeddings } from './embeddings.js';
|
|
15
|
+
|
|
16
|
+
/**
|
|
17
|
+
* Manages the registration and orchestration of all plugin hooks.
|
|
18
|
+
*/
|
|
19
|
+
export class HookManager {
|
|
20
|
+
private readonly api: OpenClawPluginApi;
|
|
21
|
+
private readonly config: KnowledgeConfig;
|
|
22
|
+
private readonly logger: Logger;
|
|
23
|
+
|
|
24
|
+
private entityExtractor: EntityExtractor;
|
|
25
|
+
private factStore: FactStore;
|
|
26
|
+
private llmEnhancer?: LlmEnhancer;
|
|
27
|
+
private maintenance?: Maintenance;
|
|
28
|
+
|
|
29
|
+
constructor(api: OpenClawPluginApi, config: KnowledgeConfig) {
|
|
30
|
+
this.api = api;
|
|
31
|
+
this.config = config;
|
|
32
|
+
this.logger = api.logger;
|
|
33
|
+
|
|
34
|
+
this.entityExtractor = new EntityExtractor(this.logger);
|
|
35
|
+
this.factStore = new FactStore(
|
|
36
|
+
this.config.workspace,
|
|
37
|
+
this.config.storage,
|
|
38
|
+
this.logger
|
|
39
|
+
);
|
|
40
|
+
|
|
41
|
+
if (this.config.extraction.llm.enabled) {
|
|
42
|
+
this.llmEnhancer = new LlmEnhancer(this.config.extraction.llm, this.logger);
|
|
43
|
+
}
|
|
44
|
+
}
|
|
45
|
+
|
|
46
|
+
/** Registers all the necessary hooks with the OpenClaw host. */
|
|
47
|
+
public registerHooks(): void {
|
|
48
|
+
if (!this.config.enabled) {
|
|
49
|
+
this.logger.info('Knowledge Engine is disabled. No hooks registered.');
|
|
50
|
+
return;
|
|
51
|
+
}
|
|
52
|
+
|
|
53
|
+
this.api.on('session_start', this.onSessionStart.bind(this), { priority: 200 });
|
|
54
|
+
this.api.on('message_received', this.onMessage.bind(this), { priority: 100 });
|
|
55
|
+
this.api.on('message_sent', this.onMessage.bind(this), { priority: 100 });
|
|
56
|
+
this.api.on('gateway_stop', this.onShutdown.bind(this), { priority: 900 });
|
|
57
|
+
|
|
58
|
+
this.logger.info('Registered all Knowledge Engine hooks.');
|
|
59
|
+
}
|
|
60
|
+
|
|
61
|
+
/** Handler for the `session_start` hook. */
|
|
62
|
+
private async onSessionStart(): Promise<void> {
|
|
63
|
+
this.logger.info('Knowledge Engine starting up...');
|
|
64
|
+
await this.factStore.load();
|
|
65
|
+
|
|
66
|
+
const embeddings = this.config.embeddings.enabled
|
|
67
|
+
? new Embeddings(this.config.embeddings, this.logger)
|
|
68
|
+
: undefined;
|
|
69
|
+
|
|
70
|
+
this.maintenance = new Maintenance(
|
|
71
|
+
this.config, this.logger, this.factStore, embeddings
|
|
72
|
+
);
|
|
73
|
+
this.maintenance.start();
|
|
74
|
+
}
|
|
75
|
+
|
|
76
|
+
/** Handler for `gateway_stop` — cleans up timers and flushes state. */
|
|
77
|
+
private async onShutdown(): Promise<void> {
|
|
78
|
+
this.logger.info('Knowledge Engine shutting down...');
|
|
79
|
+
this.maintenance?.stop();
|
|
80
|
+
this.llmEnhancer?.clearTimers();
|
|
81
|
+
this.logger.info('Knowledge Engine shutdown complete.');
|
|
82
|
+
}
|
|
83
|
+
|
|
84
|
+
/** Handler for `message_received` and `message_sent` hooks. */
|
|
85
|
+
private async onMessage(event: HookEvent): Promise<void> {
|
|
86
|
+
const text = event.content || event.message || event.text;
|
|
87
|
+
if (typeof text !== 'string' || text.trim() === '') return;
|
|
88
|
+
|
|
89
|
+
this.logger.debug(`Processing message: "${text.substring(0, 50)}..."`);
|
|
90
|
+
|
|
91
|
+
if (this.config.extraction.regex.enabled) {
|
|
92
|
+
const entities = this.entityExtractor.extract(text);
|
|
93
|
+
if (entities.length > 0) {
|
|
94
|
+
this.logger.info(`Extracted ${entities.length} entities via regex.`);
|
|
95
|
+
}
|
|
96
|
+
}
|
|
97
|
+
|
|
98
|
+
if (this.llmEnhancer) {
|
|
99
|
+
const messageId = `msg-${Date.now()}`;
|
|
100
|
+
this.llmEnhancer.addToBatch({ id: messageId, text });
|
|
101
|
+
this.processLlmBatchWhenReady().catch(err =>
|
|
102
|
+
this.logger.error('LLM batch processing failed', err as Error));
|
|
103
|
+
}
|
|
104
|
+
}
|
|
105
|
+
|
|
106
|
+
/** Fire-and-forget: processes LLM batch results when available. */
|
|
107
|
+
private async processLlmBatchWhenReady(): Promise<void> {
|
|
108
|
+
if (!this.llmEnhancer) return;
|
|
109
|
+
|
|
110
|
+
const result = await this.llmEnhancer.sendBatch();
|
|
111
|
+
if (!result) return;
|
|
112
|
+
|
|
113
|
+
if (result.facts.length > 0) {
|
|
114
|
+
this.logger.info(`Adding ${result.facts.length} LLM facts.`);
|
|
115
|
+
for (const f of result.facts) {
|
|
116
|
+
this.factStore.addFact({
|
|
117
|
+
subject: f.subject,
|
|
118
|
+
predicate: f.predicate,
|
|
119
|
+
object: f.object,
|
|
120
|
+
source: 'extracted-llm',
|
|
121
|
+
});
|
|
122
|
+
}
|
|
123
|
+
}
|
|
124
|
+
}
|
|
125
|
+
}
|
|
@@ -0,0 +1,74 @@
|
|
|
1
|
+
// src/http-client.ts
|
|
2
|
+
|
|
3
|
+
import * as http from 'node:http';
|
|
4
|
+
import * as https from 'node:https';
|
|
5
|
+
|
|
6
|
+
interface HttpPostOptions {
|
|
7
|
+
hostname: string;
|
|
8
|
+
port: string;
|
|
9
|
+
path: string;
|
|
10
|
+
method: 'POST';
|
|
11
|
+
headers: Record<string, string | number>;
|
|
12
|
+
}
|
|
13
|
+
|
|
14
|
+
/**
|
|
15
|
+
* Selects the correct HTTP/HTTPS module based on the URL protocol.
|
|
16
|
+
*/
|
|
17
|
+
function selectTransport(protocol: string): typeof http | typeof https {
|
|
18
|
+
return protocol === 'https:' ? https : http;
|
|
19
|
+
}
|
|
20
|
+
|
|
21
|
+
/**
|
|
22
|
+
* Builds request options from a URL and payload.
|
|
23
|
+
*/
|
|
24
|
+
function buildRequestOptions(url: URL, payload: string): HttpPostOptions {
|
|
25
|
+
return {
|
|
26
|
+
hostname: url.hostname,
|
|
27
|
+
port: url.port,
|
|
28
|
+
path: url.pathname + url.search,
|
|
29
|
+
method: 'POST',
|
|
30
|
+
headers: {
|
|
31
|
+
'Content-Type': 'application/json',
|
|
32
|
+
'Content-Length': Buffer.byteLength(payload),
|
|
33
|
+
},
|
|
34
|
+
};
|
|
35
|
+
}
|
|
36
|
+
|
|
37
|
+
/**
|
|
38
|
+
* Makes an HTTP or HTTPS POST request, auto-selecting the
|
|
39
|
+
* transport based on the URL's protocol.
|
|
40
|
+
*
|
|
41
|
+
* @param url The full URL string to POST to.
|
|
42
|
+
* @param body The payload object to JSON-serialize and send.
|
|
43
|
+
* @returns A promise resolving with the response body string.
|
|
44
|
+
*/
|
|
45
|
+
export function httpPost(url: string, body: unknown): Promise<string> {
|
|
46
|
+
return new Promise((resolve, reject) => {
|
|
47
|
+
const parsed = new URL(url);
|
|
48
|
+
const payload = JSON.stringify(body);
|
|
49
|
+
const options = buildRequestOptions(parsed, payload);
|
|
50
|
+
const transport = selectTransport(parsed.protocol);
|
|
51
|
+
|
|
52
|
+
const req = transport.request(options, (res) => {
|
|
53
|
+
let data = '';
|
|
54
|
+
res.setEncoding('utf8');
|
|
55
|
+
res.on('data', (chunk: string) => { data += chunk; });
|
|
56
|
+
res.on('end', () => {
|
|
57
|
+
if (res.statusCode && res.statusCode >= 200 && res.statusCode < 300) {
|
|
58
|
+
resolve(data);
|
|
59
|
+
} else {
|
|
60
|
+
reject(new Error(
|
|
61
|
+
`HTTP request failed with status ${res.statusCode}: ${data}`
|
|
62
|
+
));
|
|
63
|
+
}
|
|
64
|
+
});
|
|
65
|
+
});
|
|
66
|
+
|
|
67
|
+
req.on('error', (e) => {
|
|
68
|
+
reject(new Error(`HTTP request error: ${e.message}`));
|
|
69
|
+
});
|
|
70
|
+
|
|
71
|
+
req.write(payload);
|
|
72
|
+
req.end();
|
|
73
|
+
});
|
|
74
|
+
}
|
|
@@ -0,0 +1,187 @@
|
|
|
1
|
+
// src/llm-enhancer.ts
|
|
2
|
+
|
|
3
|
+
import { Entity, Fact, KnowledgeConfig, Logger } from './types.js';
|
|
4
|
+
import { httpPost } from './http-client.js';
|
|
5
|
+
|
|
6
|
+
interface LlmBatchItem {
|
|
7
|
+
id: string;
|
|
8
|
+
text: string;
|
|
9
|
+
}
|
|
10
|
+
|
|
11
|
+
interface LlmExtractionResult {
|
|
12
|
+
entities: Omit<Entity, 'id' | 'mentions' | 'count' | 'lastSeen' | 'source'>[];
|
|
13
|
+
facts: Omit<Fact, 'id' | 'relevance' | 'createdAt' | 'lastAccessed' | 'source'>[];
|
|
14
|
+
}
|
|
15
|
+
|
|
16
|
+
/**
|
|
17
|
+
* Manages batched requests to an external LLM for entity and fact extraction.
|
|
18
|
+
*/
|
|
19
|
+
export class LlmEnhancer {
|
|
20
|
+
private readonly config: KnowledgeConfig['extraction']['llm'];
|
|
21
|
+
private readonly logger: Logger;
|
|
22
|
+
private batch: LlmBatchItem[] = [];
|
|
23
|
+
private cooldownTimeout: NodeJS.Timeout | null = null;
|
|
24
|
+
|
|
25
|
+
constructor(config: KnowledgeConfig['extraction']['llm'], logger: Logger) {
|
|
26
|
+
this.config = config;
|
|
27
|
+
this.logger = logger;
|
|
28
|
+
}
|
|
29
|
+
|
|
30
|
+
/**
|
|
31
|
+
* Adds a message to the current batch.
|
|
32
|
+
* Triggers a batch send (with proper error handling) when the size is reached.
|
|
33
|
+
*/
|
|
34
|
+
public addToBatch(item: LlmBatchItem): void {
|
|
35
|
+
if (!this.config.enabled) return;
|
|
36
|
+
|
|
37
|
+
this.batch.push(item);
|
|
38
|
+
this.logger.debug(`Added message ${item.id} to LLM batch. Current size: ${this.batch.length}`);
|
|
39
|
+
|
|
40
|
+
if (this.batch.length >= this.config.batchSize) {
|
|
41
|
+
this.logger.info(`LLM batch size reached (${this.config.batchSize}). Sending immediately.`);
|
|
42
|
+
// S1: properly await and catch errors from sendBatch
|
|
43
|
+
this.sendBatch().catch(err => {
|
|
44
|
+
this.logger.error('Error sending LLM batch.', err as Error);
|
|
45
|
+
});
|
|
46
|
+
} else {
|
|
47
|
+
this.resetCooldownTimer();
|
|
48
|
+
}
|
|
49
|
+
}
|
|
50
|
+
|
|
51
|
+
/** Resets the cooldown timer. When it expires the batch is sent. */
|
|
52
|
+
private resetCooldownTimer(): void {
|
|
53
|
+
if (this.cooldownTimeout) clearTimeout(this.cooldownTimeout);
|
|
54
|
+
this.cooldownTimeout = setTimeout(() => {
|
|
55
|
+
if (this.batch.length > 0) {
|
|
56
|
+
this.logger.info('LLM cooldown expired. Sending batch.');
|
|
57
|
+
this.sendBatch().catch(err => {
|
|
58
|
+
this.logger.error('Error sending LLM batch on cooldown.', err as Error);
|
|
59
|
+
});
|
|
60
|
+
}
|
|
61
|
+
}, this.config.cooldownMs);
|
|
62
|
+
this.cooldownTimeout.unref();
|
|
63
|
+
}
|
|
64
|
+
|
|
65
|
+
/**
|
|
66
|
+
* Clears all pending timers. Called during shutdown.
|
|
67
|
+
*/
|
|
68
|
+
public clearTimers(): void {
|
|
69
|
+
if (this.cooldownTimeout) {
|
|
70
|
+
clearTimeout(this.cooldownTimeout);
|
|
71
|
+
this.cooldownTimeout = null;
|
|
72
|
+
}
|
|
73
|
+
}
|
|
74
|
+
|
|
75
|
+
/**
|
|
76
|
+
* Sends the current batch to the LLM for processing.
|
|
77
|
+
*/
|
|
78
|
+
public async sendBatch(): Promise<{ entities: Entity[]; facts: Fact[] } | null> {
|
|
79
|
+
this.clearTimers();
|
|
80
|
+
|
|
81
|
+
if (this.batch.length === 0) return null;
|
|
82
|
+
|
|
83
|
+
const currentBatch = [...this.batch];
|
|
84
|
+
this.batch = [];
|
|
85
|
+
|
|
86
|
+
const prompt = this.constructPrompt(currentBatch);
|
|
87
|
+
|
|
88
|
+
try {
|
|
89
|
+
const responseJson = await this.makeHttpRequest(prompt);
|
|
90
|
+
const result = this.parseLlmResponse(responseJson);
|
|
91
|
+
const entities = this.transformToEntities(result.entities);
|
|
92
|
+
const facts = this.transformToFacts(result.facts);
|
|
93
|
+
this.logger.info(`LLM extracted ${entities.length} entities and ${facts.length} facts.`);
|
|
94
|
+
return { entities, facts };
|
|
95
|
+
} catch (err) {
|
|
96
|
+
this.logger.error('Failed to send or process LLM batch.', err as Error);
|
|
97
|
+
return null;
|
|
98
|
+
}
|
|
99
|
+
}
|
|
100
|
+
|
|
101
|
+
/** Constructs the prompt to be sent to the LLM. */
|
|
102
|
+
private constructPrompt(batch: LlmBatchItem[]): string {
|
|
103
|
+
const conversation = batch.map(item => item.text).join('\n');
|
|
104
|
+
return [
|
|
105
|
+
'Analyze the following conversation and extract key entities and facts.',
|
|
106
|
+
'Respond with a single JSON object containing "entities" and "facts".',
|
|
107
|
+
'',
|
|
108
|
+
'For "entities", provide objects with "type", "value", and "importance".',
|
|
109
|
+
'Valid types: "person", "location", "organization", "product", "concept".',
|
|
110
|
+
'',
|
|
111
|
+
'For "facts", provide triples (subject, predicate, object).',
|
|
112
|
+
'',
|
|
113
|
+
'Conversation:',
|
|
114
|
+
'---',
|
|
115
|
+
conversation,
|
|
116
|
+
'---',
|
|
117
|
+
'',
|
|
118
|
+
'JSON Response:',
|
|
119
|
+
].join('\n');
|
|
120
|
+
}
|
|
121
|
+
|
|
122
|
+
/** Makes an HTTP(S) request to the configured LLM endpoint. */
|
|
123
|
+
private makeHttpRequest(prompt: string): Promise<string> {
|
|
124
|
+
return httpPost(this.config.endpoint, {
|
|
125
|
+
model: this.config.model,
|
|
126
|
+
prompt,
|
|
127
|
+
stream: false,
|
|
128
|
+
format: 'json',
|
|
129
|
+
});
|
|
130
|
+
}
|
|
131
|
+
|
|
132
|
+
/** Parses and validates the JSON response from the LLM. */
|
|
133
|
+
private parseLlmResponse(responseJson: string): LlmExtractionResult {
|
|
134
|
+
try {
|
|
135
|
+
const outer = JSON.parse(responseJson) as Record<string, unknown>;
|
|
136
|
+
const inner = typeof outer.response === 'string'
|
|
137
|
+
? outer.response : JSON.stringify(outer);
|
|
138
|
+
const data = JSON.parse(inner) as Record<string, unknown>;
|
|
139
|
+
if (!data || typeof data !== 'object') {
|
|
140
|
+
throw new Error('LLM response is not a valid object.');
|
|
141
|
+
}
|
|
142
|
+
return {
|
|
143
|
+
entities: Array.isArray(data.entities) ? data.entities : [],
|
|
144
|
+
facts: Array.isArray(data.facts) ? data.facts : [],
|
|
145
|
+
};
|
|
146
|
+
} catch (err) {
|
|
147
|
+
this.logger.error(`Failed to parse LLM response: ${responseJson}`, err as Error);
|
|
148
|
+
throw new Error('Invalid JSON response from LLM.');
|
|
149
|
+
}
|
|
150
|
+
}
|
|
151
|
+
|
|
152
|
+
/** Transforms raw LLM entity data into the standard Entity format. */
|
|
153
|
+
private transformToEntities(rawEntities: unknown[]): Entity[] {
|
|
154
|
+
const entities: Entity[] = [];
|
|
155
|
+
for (const raw of rawEntities) {
|
|
156
|
+
const r = raw as Record<string, unknown>;
|
|
157
|
+
if (typeof r.value !== 'string' || typeof r.type !== 'string') continue;
|
|
158
|
+
const value = r.value.trim();
|
|
159
|
+
const type = r.type.toLowerCase();
|
|
160
|
+
const id = `${type}:${value.toLowerCase().replace(/\s+/g, '-')}`;
|
|
161
|
+
const imp = typeof r.importance === 'number'
|
|
162
|
+
? Math.max(0, Math.min(1, r.importance)) : 0.7;
|
|
163
|
+
entities.push({
|
|
164
|
+
id, value, type: type as Entity['type'],
|
|
165
|
+
mentions: [value], count: 1, importance: imp,
|
|
166
|
+
lastSeen: new Date().toISOString(), source: ['llm'],
|
|
167
|
+
});
|
|
168
|
+
}
|
|
169
|
+
return entities;
|
|
170
|
+
}
|
|
171
|
+
|
|
172
|
+
/** Transforms raw LLM fact data into partial Fact objects. */
|
|
173
|
+
private transformToFacts(rawFacts: unknown[]): Fact[] {
|
|
174
|
+
const facts: Fact[] = [];
|
|
175
|
+
for (const raw of rawFacts) {
|
|
176
|
+
const r = raw as Record<string, unknown>;
|
|
177
|
+
if (typeof r.subject !== 'string' || typeof r.predicate !== 'string' || typeof r.object !== 'string') continue;
|
|
178
|
+
facts.push({
|
|
179
|
+
subject: r.subject.trim(),
|
|
180
|
+
predicate: r.predicate.trim().toLowerCase().replace(/\s+/g, '-'),
|
|
181
|
+
object: r.object.trim(),
|
|
182
|
+
source: 'extracted-llm',
|
|
183
|
+
} as Fact);
|
|
184
|
+
}
|
|
185
|
+
return facts;
|
|
186
|
+
}
|
|
187
|
+
}
|