@vainplex/openclaw-knowledge-engine 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/ARCHITECTURE.md +374 -0
- package/dist/index.d.ts +5 -0
- package/dist/index.js +29 -0
- package/dist/src/config.d.ts +15 -0
- package/dist/src/config.js +153 -0
- package/dist/src/embeddings.d.ts +23 -0
- package/dist/src/embeddings.js +63 -0
- package/dist/src/entity-extractor.d.ts +30 -0
- package/dist/src/entity-extractor.js +123 -0
- package/dist/src/fact-store.d.ts +77 -0
- package/dist/src/fact-store.js +222 -0
- package/dist/src/hooks.d.ts +24 -0
- package/dist/src/hooks.js +94 -0
- package/dist/src/http-client.d.ts +9 -0
- package/dist/src/http-client.js +58 -0
- package/dist/src/llm-enhancer.d.ts +44 -0
- package/dist/src/llm-enhancer.js +166 -0
- package/dist/src/maintenance.d.ts +26 -0
- package/dist/src/maintenance.js +87 -0
- package/dist/src/patterns.d.ts +5 -0
- package/dist/src/patterns.js +69 -0
- package/dist/src/storage.d.ts +41 -0
- package/dist/src/storage.js +110 -0
- package/dist/src/types.d.ts +122 -0
- package/dist/src/types.js +2 -0
- package/index.ts +38 -0
- package/openclaw.plugin.json +125 -0
- package/package.json +36 -0
- package/src/config.ts +180 -0
- package/src/embeddings.ts +82 -0
- package/src/entity-extractor.ts +137 -0
- package/src/fact-store.ts +260 -0
- package/src/hooks.ts +125 -0
- package/src/http-client.ts +74 -0
- package/src/llm-enhancer.ts +187 -0
- package/src/maintenance.ts +102 -0
- package/src/patterns.ts +90 -0
- package/src/storage.ts +122 -0
- package/src/types.ts +144 -0
- package/test/config.test.ts +152 -0
- package/test/embeddings.test.ts +118 -0
- package/test/entity-extractor.test.ts +121 -0
- package/test/fact-store.test.ts +266 -0
- package/test/hooks.test.ts +120 -0
- package/test/http-client.test.ts +68 -0
- package/test/llm-enhancer.test.ts +132 -0
- package/test/maintenance.test.ts +117 -0
- package/test/patterns.test.ts +123 -0
- package/test/storage.test.ts +86 -0
- package/tsconfig.json +26 -0
|
@@ -0,0 +1,166 @@
|
|
|
1
|
+
// src/llm-enhancer.ts
|
|
2
|
+
import { httpPost } from './http-client.js';
|
|
3
|
+
/**
|
|
4
|
+
* Manages batched requests to an external LLM for entity and fact extraction.
|
|
5
|
+
*/
|
|
6
|
+
export class LlmEnhancer {
|
|
7
|
+
config;
|
|
8
|
+
logger;
|
|
9
|
+
batch = [];
|
|
10
|
+
cooldownTimeout = null;
|
|
11
|
+
constructor(config, logger) {
|
|
12
|
+
this.config = config;
|
|
13
|
+
this.logger = logger;
|
|
14
|
+
}
|
|
15
|
+
/**
|
|
16
|
+
* Adds a message to the current batch.
|
|
17
|
+
* Triggers a batch send (with proper error handling) when the size is reached.
|
|
18
|
+
*/
|
|
19
|
+
addToBatch(item) {
|
|
20
|
+
if (!this.config.enabled)
|
|
21
|
+
return;
|
|
22
|
+
this.batch.push(item);
|
|
23
|
+
this.logger.debug(`Added message ${item.id} to LLM batch. Current size: ${this.batch.length}`);
|
|
24
|
+
if (this.batch.length >= this.config.batchSize) {
|
|
25
|
+
this.logger.info(`LLM batch size reached (${this.config.batchSize}). Sending immediately.`);
|
|
26
|
+
// S1: properly await and catch errors from sendBatch
|
|
27
|
+
this.sendBatch().catch(err => {
|
|
28
|
+
this.logger.error('Error sending LLM batch.', err);
|
|
29
|
+
});
|
|
30
|
+
}
|
|
31
|
+
else {
|
|
32
|
+
this.resetCooldownTimer();
|
|
33
|
+
}
|
|
34
|
+
}
|
|
35
|
+
/** Resets the cooldown timer. When it expires the batch is sent. */
|
|
36
|
+
resetCooldownTimer() {
|
|
37
|
+
if (this.cooldownTimeout)
|
|
38
|
+
clearTimeout(this.cooldownTimeout);
|
|
39
|
+
this.cooldownTimeout = setTimeout(() => {
|
|
40
|
+
if (this.batch.length > 0) {
|
|
41
|
+
this.logger.info('LLM cooldown expired. Sending batch.');
|
|
42
|
+
this.sendBatch().catch(err => {
|
|
43
|
+
this.logger.error('Error sending LLM batch on cooldown.', err);
|
|
44
|
+
});
|
|
45
|
+
}
|
|
46
|
+
}, this.config.cooldownMs);
|
|
47
|
+
this.cooldownTimeout.unref();
|
|
48
|
+
}
|
|
49
|
+
/**
|
|
50
|
+
* Clears all pending timers. Called during shutdown.
|
|
51
|
+
*/
|
|
52
|
+
clearTimers() {
|
|
53
|
+
if (this.cooldownTimeout) {
|
|
54
|
+
clearTimeout(this.cooldownTimeout);
|
|
55
|
+
this.cooldownTimeout = null;
|
|
56
|
+
}
|
|
57
|
+
}
|
|
58
|
+
/**
|
|
59
|
+
* Sends the current batch to the LLM for processing.
|
|
60
|
+
*/
|
|
61
|
+
async sendBatch() {
|
|
62
|
+
this.clearTimers();
|
|
63
|
+
if (this.batch.length === 0)
|
|
64
|
+
return null;
|
|
65
|
+
const currentBatch = [...this.batch];
|
|
66
|
+
this.batch = [];
|
|
67
|
+
const prompt = this.constructPrompt(currentBatch);
|
|
68
|
+
try {
|
|
69
|
+
const responseJson = await this.makeHttpRequest(prompt);
|
|
70
|
+
const result = this.parseLlmResponse(responseJson);
|
|
71
|
+
const entities = this.transformToEntities(result.entities);
|
|
72
|
+
const facts = this.transformToFacts(result.facts);
|
|
73
|
+
this.logger.info(`LLM extracted ${entities.length} entities and ${facts.length} facts.`);
|
|
74
|
+
return { entities, facts };
|
|
75
|
+
}
|
|
76
|
+
catch (err) {
|
|
77
|
+
this.logger.error('Failed to send or process LLM batch.', err);
|
|
78
|
+
return null;
|
|
79
|
+
}
|
|
80
|
+
}
|
|
81
|
+
/** Constructs the prompt to be sent to the LLM. */
|
|
82
|
+
constructPrompt(batch) {
|
|
83
|
+
const conversation = batch.map(item => item.text).join('\n');
|
|
84
|
+
return [
|
|
85
|
+
'Analyze the following conversation and extract key entities and facts.',
|
|
86
|
+
'Respond with a single JSON object containing "entities" and "facts".',
|
|
87
|
+
'',
|
|
88
|
+
'For "entities", provide objects with "type", "value", and "importance".',
|
|
89
|
+
'Valid types: "person", "location", "organization", "product", "concept".',
|
|
90
|
+
'',
|
|
91
|
+
'For "facts", provide triples (subject, predicate, object).',
|
|
92
|
+
'',
|
|
93
|
+
'Conversation:',
|
|
94
|
+
'---',
|
|
95
|
+
conversation,
|
|
96
|
+
'---',
|
|
97
|
+
'',
|
|
98
|
+
'JSON Response:',
|
|
99
|
+
].join('\n');
|
|
100
|
+
}
|
|
101
|
+
/** Makes an HTTP(S) request to the configured LLM endpoint. */
|
|
102
|
+
makeHttpRequest(prompt) {
|
|
103
|
+
return httpPost(this.config.endpoint, {
|
|
104
|
+
model: this.config.model,
|
|
105
|
+
prompt,
|
|
106
|
+
stream: false,
|
|
107
|
+
format: 'json',
|
|
108
|
+
});
|
|
109
|
+
}
|
|
110
|
+
/** Parses and validates the JSON response from the LLM. */
|
|
111
|
+
parseLlmResponse(responseJson) {
|
|
112
|
+
try {
|
|
113
|
+
const outer = JSON.parse(responseJson);
|
|
114
|
+
const inner = typeof outer.response === 'string'
|
|
115
|
+
? outer.response : JSON.stringify(outer);
|
|
116
|
+
const data = JSON.parse(inner);
|
|
117
|
+
if (!data || typeof data !== 'object') {
|
|
118
|
+
throw new Error('LLM response is not a valid object.');
|
|
119
|
+
}
|
|
120
|
+
return {
|
|
121
|
+
entities: Array.isArray(data.entities) ? data.entities : [],
|
|
122
|
+
facts: Array.isArray(data.facts) ? data.facts : [],
|
|
123
|
+
};
|
|
124
|
+
}
|
|
125
|
+
catch (err) {
|
|
126
|
+
this.logger.error(`Failed to parse LLM response: ${responseJson}`, err);
|
|
127
|
+
throw new Error('Invalid JSON response from LLM.');
|
|
128
|
+
}
|
|
129
|
+
}
|
|
130
|
+
/** Transforms raw LLM entity data into the standard Entity format. */
|
|
131
|
+
transformToEntities(rawEntities) {
|
|
132
|
+
const entities = [];
|
|
133
|
+
for (const raw of rawEntities) {
|
|
134
|
+
const r = raw;
|
|
135
|
+
if (typeof r.value !== 'string' || typeof r.type !== 'string')
|
|
136
|
+
continue;
|
|
137
|
+
const value = r.value.trim();
|
|
138
|
+
const type = r.type.toLowerCase();
|
|
139
|
+
const id = `${type}:${value.toLowerCase().replace(/\s+/g, '-')}`;
|
|
140
|
+
const imp = typeof r.importance === 'number'
|
|
141
|
+
? Math.max(0, Math.min(1, r.importance)) : 0.7;
|
|
142
|
+
entities.push({
|
|
143
|
+
id, value, type: type,
|
|
144
|
+
mentions: [value], count: 1, importance: imp,
|
|
145
|
+
lastSeen: new Date().toISOString(), source: ['llm'],
|
|
146
|
+
});
|
|
147
|
+
}
|
|
148
|
+
return entities;
|
|
149
|
+
}
|
|
150
|
+
/** Transforms raw LLM fact data into partial Fact objects. */
|
|
151
|
+
transformToFacts(rawFacts) {
|
|
152
|
+
const facts = [];
|
|
153
|
+
for (const raw of rawFacts) {
|
|
154
|
+
const r = raw;
|
|
155
|
+
if (typeof r.subject !== 'string' || typeof r.predicate !== 'string' || typeof r.object !== 'string')
|
|
156
|
+
continue;
|
|
157
|
+
facts.push({
|
|
158
|
+
subject: r.subject.trim(),
|
|
159
|
+
predicate: r.predicate.trim().toLowerCase().replace(/\s+/g, '-'),
|
|
160
|
+
object: r.object.trim(),
|
|
161
|
+
source: 'extracted-llm',
|
|
162
|
+
});
|
|
163
|
+
}
|
|
164
|
+
return facts;
|
|
165
|
+
}
|
|
166
|
+
}
|
|
@@ -0,0 +1,26 @@
|
|
|
1
|
+
import { Embeddings } from './embeddings.js';
|
|
2
|
+
import { FactStore } from './fact-store.js';
|
|
3
|
+
import { KnowledgeConfig, Logger } from './types.js';
|
|
4
|
+
/**
|
|
5
|
+
* Manages background maintenance tasks for the knowledge engine,
|
|
6
|
+
* such as decaying fact relevance and syncing embeddings.
|
|
7
|
+
*/
|
|
8
|
+
export declare class Maintenance {
|
|
9
|
+
private readonly config;
|
|
10
|
+
private readonly logger;
|
|
11
|
+
private readonly factStore;
|
|
12
|
+
private readonly embeddings?;
|
|
13
|
+
private decayTimer;
|
|
14
|
+
private embeddingsTimer;
|
|
15
|
+
constructor(config: KnowledgeConfig, logger: Logger, factStore: FactStore, embeddings?: Embeddings);
|
|
16
|
+
/** Starts all configured maintenance timers. */
|
|
17
|
+
start(): void;
|
|
18
|
+
/** Stops all running maintenance timers. */
|
|
19
|
+
stop(): void;
|
|
20
|
+
private startDecayTimer;
|
|
21
|
+
private startEmbeddingsTimer;
|
|
22
|
+
/** Executes the fact decay process. */
|
|
23
|
+
runDecay(): void;
|
|
24
|
+
/** Executes the embeddings synchronization process. */
|
|
25
|
+
runEmbeddingsSync(): Promise<void>;
|
|
26
|
+
}
|
|
@@ -0,0 +1,87 @@
|
|
|
1
|
+
// src/maintenance.ts
|
|
2
|
+
/**
|
|
3
|
+
* Manages background maintenance tasks for the knowledge engine,
|
|
4
|
+
* such as decaying fact relevance and syncing embeddings.
|
|
5
|
+
*/
|
|
6
|
+
export class Maintenance {
|
|
7
|
+
config;
|
|
8
|
+
logger;
|
|
9
|
+
factStore;
|
|
10
|
+
embeddings;
|
|
11
|
+
decayTimer = null;
|
|
12
|
+
embeddingsTimer = null;
|
|
13
|
+
constructor(config, logger, factStore, embeddings) {
|
|
14
|
+
this.config = config;
|
|
15
|
+
this.logger = logger;
|
|
16
|
+
this.factStore = factStore;
|
|
17
|
+
this.embeddings = embeddings;
|
|
18
|
+
}
|
|
19
|
+
/** Starts all configured maintenance timers. */
|
|
20
|
+
start() {
|
|
21
|
+
this.logger.info('Starting maintenance service...');
|
|
22
|
+
this.stop();
|
|
23
|
+
this.startDecayTimer();
|
|
24
|
+
this.startEmbeddingsTimer();
|
|
25
|
+
}
|
|
26
|
+
/** Stops all running maintenance timers. */
|
|
27
|
+
stop() {
|
|
28
|
+
if (this.decayTimer) {
|
|
29
|
+
clearInterval(this.decayTimer);
|
|
30
|
+
this.decayTimer = null;
|
|
31
|
+
}
|
|
32
|
+
if (this.embeddingsTimer) {
|
|
33
|
+
clearInterval(this.embeddingsTimer);
|
|
34
|
+
this.embeddingsTimer = null;
|
|
35
|
+
}
|
|
36
|
+
this.logger.info('Stopped maintenance service.');
|
|
37
|
+
}
|
|
38
|
+
startDecayTimer() {
|
|
39
|
+
if (!this.config.decay.enabled)
|
|
40
|
+
return;
|
|
41
|
+
const ms = this.config.decay.intervalHours * 60 * 60 * 1000;
|
|
42
|
+
this.decayTimer = setInterval(() => this.runDecay(), ms);
|
|
43
|
+
this.decayTimer.unref();
|
|
44
|
+
this.logger.info(`Scheduled fact decay every ${this.config.decay.intervalHours} hours.`);
|
|
45
|
+
}
|
|
46
|
+
startEmbeddingsTimer() {
|
|
47
|
+
if (!this.embeddings?.isEnabled())
|
|
48
|
+
return;
|
|
49
|
+
const ms = this.config.embeddings.syncIntervalMinutes * 60 * 1000;
|
|
50
|
+
this.embeddingsTimer = setInterval(() => this.runEmbeddingsSync(), ms);
|
|
51
|
+
this.embeddingsTimer.unref();
|
|
52
|
+
this.logger.info(`Scheduled embeddings sync every ${this.config.embeddings.syncIntervalMinutes} min.`);
|
|
53
|
+
}
|
|
54
|
+
/** Executes the fact decay process. */
|
|
55
|
+
runDecay() {
|
|
56
|
+
this.logger.info('Running scheduled fact decay...');
|
|
57
|
+
try {
|
|
58
|
+
const { decayedCount } = this.factStore.decayFacts(this.config.decay.rate);
|
|
59
|
+
this.logger.info(`Fact decay complete. Decayed ${decayedCount} facts.`);
|
|
60
|
+
}
|
|
61
|
+
catch (err) {
|
|
62
|
+
this.logger.error('Error during fact decay.', err);
|
|
63
|
+
}
|
|
64
|
+
}
|
|
65
|
+
/** Executes the embeddings synchronization process. */
|
|
66
|
+
async runEmbeddingsSync() {
|
|
67
|
+
if (!this.embeddings?.isEnabled())
|
|
68
|
+
return;
|
|
69
|
+
this.logger.info('Running scheduled embeddings sync...');
|
|
70
|
+
try {
|
|
71
|
+
const unembedded = this.factStore.getUnembeddedFacts();
|
|
72
|
+
if (unembedded.length === 0) {
|
|
73
|
+
this.logger.info('No new facts to sync for embeddings.');
|
|
74
|
+
return;
|
|
75
|
+
}
|
|
76
|
+
const synced = await this.embeddings.sync(unembedded);
|
|
77
|
+
if (synced > 0) {
|
|
78
|
+
const ids = unembedded.slice(0, synced).map(f => f.id);
|
|
79
|
+
this.factStore.markFactsAsEmbedded(ids);
|
|
80
|
+
this.logger.info(`Embeddings sync complete. Synced ${synced} facts.`);
|
|
81
|
+
}
|
|
82
|
+
}
|
|
83
|
+
catch (err) {
|
|
84
|
+
this.logger.error('Error during embeddings sync.', err);
|
|
85
|
+
}
|
|
86
|
+
}
|
|
87
|
+
}
|
|
@@ -0,0 +1,69 @@
|
|
|
1
|
+
// src/patterns.ts
|
|
2
|
+
/**
|
|
3
|
+
* Common words that look like proper nouns (start of sentence) but are not.
|
|
4
|
+
*/
|
|
5
|
+
const EXCLUDED_WORDS = [
|
|
6
|
+
'A', 'An', 'The', 'Hello', 'My', 'This', 'Contact', 'He', 'She',
|
|
7
|
+
'It', 'We', 'They', 'I', 'You', 'His', 'Her', 'Our', 'Your',
|
|
8
|
+
'Their', 'Its', 'That', 'These', 'Those', 'What', 'Which', 'Who',
|
|
9
|
+
'How', 'When', 'Where', 'Why', 'But', 'And', 'Or', 'So', 'Not',
|
|
10
|
+
'No', 'Yes', 'Also', 'Just', 'For', 'From', 'With', 'About',
|
|
11
|
+
'After', 'Before', 'Between', 'During', 'Into', 'Through',
|
|
12
|
+
'Event', 'Talk', 'Project', 'Multiple', 'German',
|
|
13
|
+
'Am', 'Are', 'Is', 'Was', 'Were', 'Has', 'Have',
|
|
14
|
+
'Had', 'Do', 'Does', 'Did', 'Will', 'Would', 'Could', 'Should',
|
|
15
|
+
'May', 'Might', 'Must', 'Can', 'Shall', 'If', 'Then',
|
|
16
|
+
];
|
|
17
|
+
const EXCL = EXCLUDED_WORDS.map(w => `${w}\\b`).join('|');
|
|
18
|
+
/** Capitalized word: handles O'Malley, McDonald's, acronyms like USS */
|
|
19
|
+
const CAP = `(?:[A-Z][a-z']*(?:[A-Z][a-z']+)*|[A-Z]{2,})`;
|
|
20
|
+
const DE_MONTHS = 'Januar|Februar|März|Mar|April|Mai|Juni|Juli|August|September|Oktober|November|Dezember';
|
|
21
|
+
const EN_MONTHS = 'January|February|March|April|May|June|July|August|September|October|November|December';
|
|
22
|
+
/** Proper noun: one or more cap-words with exclusion list applied per word. */
|
|
23
|
+
function properNounFactory() {
|
|
24
|
+
return new RegExp(`\\b(?!${EXCL})${CAP}(?:(?:-|\\s)(?!${EXCL})${CAP})*\\b`, 'g');
|
|
25
|
+
}
|
|
26
|
+
/** Product name: three branches for multi-word+Roman, word+version, camelCase. */
|
|
27
|
+
function productNameFactory() {
|
|
28
|
+
return new RegExp(`\\b(?:(?!${EXCL})[A-Z][a-zA-Z0-9]{2,}(?:\\s[a-zA-Z]+)*\\s[IVXLCDM]+` +
|
|
29
|
+
`|[a-zA-Z][a-zA-Z0-9-]{2,}[\\s-]v?\\d+(?:\\.\\d+)?` +
|
|
30
|
+
`|[a-zA-Z][a-zA-Z0-9]+[IVXLCDM]+)\\b`, 'g');
|
|
31
|
+
}
|
|
32
|
+
/** Creates a fresh RegExp factory for each pattern key. */
|
|
33
|
+
function buildPatterns() {
|
|
34
|
+
return {
|
|
35
|
+
email: () => /\b[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,}\b/g,
|
|
36
|
+
url: () => /\bhttps?:\/\/[^\s/$.?#].[^\s]*\b/g,
|
|
37
|
+
iso_date: () => /\b\d{4}-\d{2}-\d{2}(T\d{2}:\d{2}:\d{2}(\.\d+)?Z?)?\b/g,
|
|
38
|
+
common_date: () => /\b(?:\d{1,2}\/\d{1,2}\/\d{2,4})|(?:\d{1,2}\.\d{1,2}\.\d{2,4})\b/g,
|
|
39
|
+
german_date: () => new RegExp(`\\b\\d{1,2}\\.\\s(?:${DE_MONTHS})\\s+\\d{4}\\b`, 'gi'),
|
|
40
|
+
english_date: () => new RegExp(`\\b(?:${EN_MONTHS})\\s+\\d{1,2}(?:st|nd|rd|th)?,\\s+\\d{4}\\b`, 'gi'),
|
|
41
|
+
proper_noun: properNounFactory,
|
|
42
|
+
product_name: productNameFactory,
|
|
43
|
+
organization_suffix: () => new RegExp('\\b(?:[A-Z][A-Za-z0-9]+(?:\\s[A-Z][A-Za-z0-9]+)*),?\\s?' +
|
|
44
|
+
'(?:Inc\\.|LLC|Corp\\.|GmbH|AG|Ltd\\.)', 'g'),
|
|
45
|
+
};
|
|
46
|
+
}
|
|
47
|
+
const PATTERN_FACTORIES = buildPatterns();
|
|
48
|
+
/**
|
|
49
|
+
* A collection of regular expression factories for extracting entities.
|
|
50
|
+
* Each property access creates a fresh RegExp to avoid /g state-bleed.
|
|
51
|
+
*/
|
|
52
|
+
export const REGEX_PATTERNS = new Proxy({}, {
|
|
53
|
+
get(_target, prop) {
|
|
54
|
+
const factory = PATTERN_FACTORIES[prop];
|
|
55
|
+
return factory ? factory() : undefined;
|
|
56
|
+
},
|
|
57
|
+
ownKeys() {
|
|
58
|
+
return Object.keys(PATTERN_FACTORIES);
|
|
59
|
+
},
|
|
60
|
+
getOwnPropertyDescriptor(_target, prop) {
|
|
61
|
+
if (prop in PATTERN_FACTORIES) {
|
|
62
|
+
return { configurable: true, enumerable: true, writable: false };
|
|
63
|
+
}
|
|
64
|
+
return undefined;
|
|
65
|
+
},
|
|
66
|
+
has(_target, prop) {
|
|
67
|
+
return prop in PATTERN_FACTORIES;
|
|
68
|
+
},
|
|
69
|
+
});
|
|
@@ -0,0 +1,41 @@
|
|
|
1
|
+
import { IStorage, Logger } from './types.js';
|
|
2
|
+
/**
|
|
3
|
+
* A utility class for performing atomic and durable file I/O operations.
|
|
4
|
+
* It writes to a temporary file first, then renames it to the final destination,
|
|
5
|
+
* which prevents data corruption in case of a crash during the write.
|
|
6
|
+
*/
|
|
7
|
+
export declare class AtomicStorage implements IStorage {
|
|
8
|
+
private readonly storagePath;
|
|
9
|
+
private readonly logger;
|
|
10
|
+
/**
|
|
11
|
+
* Creates an instance of AtomicStorage.
|
|
12
|
+
* @param storagePath The base directory where files will be stored.
|
|
13
|
+
* @param logger A logger instance for logging errors.
|
|
14
|
+
*/
|
|
15
|
+
constructor(storagePath: string, logger: Logger);
|
|
16
|
+
/**
|
|
17
|
+
* Ensures that the storage directory exists.
|
|
18
|
+
*/
|
|
19
|
+
init(): Promise<void>;
|
|
20
|
+
/**
|
|
21
|
+
* Reads and parses a JSON file from the storage path.
|
|
22
|
+
* @param fileName The name of the file to read (e.g., "facts.json").
|
|
23
|
+
* @returns The parsed JSON object, or null if the file doesn't exist or is invalid.
|
|
24
|
+
*/
|
|
25
|
+
readJson<T>(fileName: string): Promise<T | null>;
|
|
26
|
+
/**
|
|
27
|
+
* Writes a JSON object to a file atomically.
|
|
28
|
+
* @param fileName The name of the file to write (e.g., "facts.json").
|
|
29
|
+
* @param data The JSON object to serialize and write.
|
|
30
|
+
*/
|
|
31
|
+
writeJson<T>(fileName: string, data: T): Promise<void>;
|
|
32
|
+
/**
|
|
33
|
+
* A debouncer function to limit the rate at which a function is executed.
|
|
34
|
+
* This version is designed for async functions and returns a promise that
|
|
35
|
+
* resolves with the result of the last invocation.
|
|
36
|
+
* @param func The async function to debounce.
|
|
37
|
+
* @param delay The debounce delay in milliseconds.
|
|
38
|
+
* @returns A debounced version of the function that returns a promise.
|
|
39
|
+
*/
|
|
40
|
+
static debounce<A extends unknown[], R>(func: (...args: A) => Promise<R>, delay: number): (...args: A) => Promise<R>;
|
|
41
|
+
}
|
|
@@ -0,0 +1,110 @@
|
|
|
1
|
+
// src/storage.ts
|
|
2
|
+
import * as fs from 'node:fs/promises';
|
|
3
|
+
import * as path from 'node:path';
|
|
4
|
+
/** Type guard for Node.js system errors with a `code` property. */
|
|
5
|
+
function isNodeError(err) {
|
|
6
|
+
return err instanceof Error && 'code' in err;
|
|
7
|
+
}
|
|
8
|
+
/**
|
|
9
|
+
* A utility class for performing atomic and durable file I/O operations.
|
|
10
|
+
* It writes to a temporary file first, then renames it to the final destination,
|
|
11
|
+
* which prevents data corruption in case of a crash during the write.
|
|
12
|
+
*/
|
|
13
|
+
export class AtomicStorage {
|
|
14
|
+
storagePath;
|
|
15
|
+
logger;
|
|
16
|
+
/**
|
|
17
|
+
* Creates an instance of AtomicStorage.
|
|
18
|
+
* @param storagePath The base directory where files will be stored.
|
|
19
|
+
* @param logger A logger instance for logging errors.
|
|
20
|
+
*/
|
|
21
|
+
constructor(storagePath, logger) {
|
|
22
|
+
this.storagePath = storagePath;
|
|
23
|
+
this.logger = logger;
|
|
24
|
+
}
|
|
25
|
+
/**
|
|
26
|
+
* Ensures that the storage directory exists.
|
|
27
|
+
*/
|
|
28
|
+
async init() {
|
|
29
|
+
try {
|
|
30
|
+
await fs.mkdir(this.storagePath, { recursive: true });
|
|
31
|
+
}
|
|
32
|
+
catch (err) {
|
|
33
|
+
this.logger.error(`Failed to create storage directory: ${this.storagePath}`, err);
|
|
34
|
+
throw err;
|
|
35
|
+
}
|
|
36
|
+
}
|
|
37
|
+
/**
|
|
38
|
+
* Reads and parses a JSON file from the storage path.
|
|
39
|
+
* @param fileName The name of the file to read (e.g., "facts.json").
|
|
40
|
+
* @returns The parsed JSON object, or null if the file doesn't exist or is invalid.
|
|
41
|
+
*/
|
|
42
|
+
async readJson(fileName) {
|
|
43
|
+
const filePath = path.join(this.storagePath, fileName);
|
|
44
|
+
try {
|
|
45
|
+
const content = await fs.readFile(filePath, 'utf-8');
|
|
46
|
+
return JSON.parse(content);
|
|
47
|
+
}
|
|
48
|
+
catch (err) {
|
|
49
|
+
if (isNodeError(err) && err.code === 'ENOENT') {
|
|
50
|
+
return null;
|
|
51
|
+
}
|
|
52
|
+
this.logger.error(`Failed to read or parse JSON file: ${filePath}`, err);
|
|
53
|
+
return null;
|
|
54
|
+
}
|
|
55
|
+
}
|
|
56
|
+
/**
|
|
57
|
+
* Writes a JSON object to a file atomically.
|
|
58
|
+
* @param fileName The name of the file to write (e.g., "facts.json").
|
|
59
|
+
* @param data The JSON object to serialize and write.
|
|
60
|
+
*/
|
|
61
|
+
async writeJson(fileName, data) {
|
|
62
|
+
const filePath = path.join(this.storagePath, fileName);
|
|
63
|
+
const tempFilePath = `${filePath}.${Date.now()}.tmp`;
|
|
64
|
+
try {
|
|
65
|
+
const jsonString = JSON.stringify(data, null, 2);
|
|
66
|
+
await fs.writeFile(tempFilePath, jsonString, 'utf-8');
|
|
67
|
+
await fs.rename(tempFilePath, filePath);
|
|
68
|
+
}
|
|
69
|
+
catch (err) {
|
|
70
|
+
this.logger.error(`Failed to write JSON file atomically: ${filePath}`, err);
|
|
71
|
+
// Attempt to clean up the temporary file if it exists
|
|
72
|
+
try {
|
|
73
|
+
await fs.unlink(tempFilePath);
|
|
74
|
+
}
|
|
75
|
+
catch (cleanupErr) {
|
|
76
|
+
if (!isNodeError(cleanupErr) || cleanupErr.code !== 'ENOENT') {
|
|
77
|
+
this.logger.warn(`Failed to clean up temporary file: ${tempFilePath}`);
|
|
78
|
+
}
|
|
79
|
+
}
|
|
80
|
+
throw err; // Re-throw the original error
|
|
81
|
+
}
|
|
82
|
+
}
|
|
83
|
+
/**
|
|
84
|
+
* A debouncer function to limit the rate at which a function is executed.
|
|
85
|
+
* This version is designed for async functions and returns a promise that
|
|
86
|
+
* resolves with the result of the last invocation.
|
|
87
|
+
* @param func The async function to debounce.
|
|
88
|
+
* @param delay The debounce delay in milliseconds.
|
|
89
|
+
* @returns A debounced version of the function that returns a promise.
|
|
90
|
+
*/
|
|
91
|
+
static debounce(func, delay) {
|
|
92
|
+
let timeoutId = null;
|
|
93
|
+
let resolvers = [];
|
|
94
|
+
return (...args) => {
|
|
95
|
+
if (timeoutId)
|
|
96
|
+
clearTimeout(timeoutId);
|
|
97
|
+
const promise = new Promise((resolve, reject) => {
|
|
98
|
+
resolvers.push({ resolve, reject });
|
|
99
|
+
});
|
|
100
|
+
timeoutId = setTimeout(() => {
|
|
101
|
+
const current = resolvers;
|
|
102
|
+
resolvers = [];
|
|
103
|
+
func(...args)
|
|
104
|
+
.then(result => current.forEach(r => r.resolve(result)))
|
|
105
|
+
.catch(err => current.forEach(r => r.reject(err)));
|
|
106
|
+
}, delay);
|
|
107
|
+
return promise;
|
|
108
|
+
};
|
|
109
|
+
}
|
|
110
|
+
}
|
|
@@ -0,0 +1,122 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* The public API exposed by the OpenClaw host to the plugin.
|
|
3
|
+
* This is a subset of the full API, containing only what this plugin needs.
|
|
4
|
+
*/
|
|
5
|
+
export interface OpenClawPluginApi {
|
|
6
|
+
pluginConfig: Record<string, unknown>;
|
|
7
|
+
logger: Logger;
|
|
8
|
+
on: (event: string, handler: (event: HookEvent, ctx: HookContext) => void, options?: {
|
|
9
|
+
priority: number;
|
|
10
|
+
}) => void;
|
|
11
|
+
}
|
|
12
|
+
/**
|
|
13
|
+
* A generic logger interface compatible with OpenClaw's logger.
|
|
14
|
+
*/
|
|
15
|
+
export interface Logger {
|
|
16
|
+
info: (msg: string) => void;
|
|
17
|
+
warn: (msg: string) => void;
|
|
18
|
+
error: (msg: string, err?: Error) => void;
|
|
19
|
+
debug: (msg: string) => void;
|
|
20
|
+
}
|
|
21
|
+
/**
|
|
22
|
+
* Represents the data payload for an OpenClaw hook.
|
|
23
|
+
* It's a generic shape, as different hooks have different payloads.
|
|
24
|
+
*/
|
|
25
|
+
export interface HookEvent {
|
|
26
|
+
content?: string;
|
|
27
|
+
message?: string;
|
|
28
|
+
text?: string;
|
|
29
|
+
from?: string;
|
|
30
|
+
sender?: string;
|
|
31
|
+
role?: "user" | "assistant";
|
|
32
|
+
[key: string]: unknown;
|
|
33
|
+
}
|
|
34
|
+
/**
|
|
35
|
+
* Represents the context object passed with each hook event.
|
|
36
|
+
*/
|
|
37
|
+
export interface HookContext {
|
|
38
|
+
workspace: string;
|
|
39
|
+
}
|
|
40
|
+
/**
|
|
41
|
+
* The fully resolved and validated plugin configuration object.
|
|
42
|
+
*/
|
|
43
|
+
export interface KnowledgeConfig {
|
|
44
|
+
enabled: boolean;
|
|
45
|
+
workspace: string;
|
|
46
|
+
extraction: {
|
|
47
|
+
regex: {
|
|
48
|
+
enabled: boolean;
|
|
49
|
+
};
|
|
50
|
+
llm: {
|
|
51
|
+
enabled: boolean;
|
|
52
|
+
model: string;
|
|
53
|
+
endpoint: string;
|
|
54
|
+
batchSize: number;
|
|
55
|
+
cooldownMs: number;
|
|
56
|
+
};
|
|
57
|
+
};
|
|
58
|
+
decay: {
|
|
59
|
+
enabled: boolean;
|
|
60
|
+
intervalHours: number;
|
|
61
|
+
rate: number;
|
|
62
|
+
};
|
|
63
|
+
embeddings: {
|
|
64
|
+
enabled: boolean;
|
|
65
|
+
endpoint: string;
|
|
66
|
+
collectionName: string;
|
|
67
|
+
syncIntervalMinutes: number;
|
|
68
|
+
};
|
|
69
|
+
storage: {
|
|
70
|
+
maxEntities: number;
|
|
71
|
+
maxFacts: number;
|
|
72
|
+
writeDebounceMs: number;
|
|
73
|
+
};
|
|
74
|
+
}
|
|
75
|
+
/**
|
|
76
|
+
* Represents an extracted entity.
|
|
77
|
+
*/
|
|
78
|
+
export interface Entity {
|
|
79
|
+
id: string;
|
|
80
|
+
type: "person" | "location" | "organization" | "date" | "product" | "concept" | "email" | "url" | "unknown";
|
|
81
|
+
value: string;
|
|
82
|
+
mentions: string[];
|
|
83
|
+
count: number;
|
|
84
|
+
importance: number;
|
|
85
|
+
lastSeen: string;
|
|
86
|
+
source: ("regex" | "llm")[];
|
|
87
|
+
}
|
|
88
|
+
/**
|
|
89
|
+
* Represents a structured fact (a triple).
|
|
90
|
+
*/
|
|
91
|
+
export interface Fact {
|
|
92
|
+
id: string;
|
|
93
|
+
subject: string;
|
|
94
|
+
predicate: string;
|
|
95
|
+
object: string;
|
|
96
|
+
relevance: number;
|
|
97
|
+
createdAt: string;
|
|
98
|
+
lastAccessed: string;
|
|
99
|
+
source: "ingested" | "extracted-regex" | "extracted-llm";
|
|
100
|
+
embedded?: string;
|
|
101
|
+
}
|
|
102
|
+
/**
|
|
103
|
+
* The data structure for the entities.json file.
|
|
104
|
+
*/
|
|
105
|
+
export interface EntitiesData {
|
|
106
|
+
updated: string;
|
|
107
|
+
entities: Entity[];
|
|
108
|
+
}
|
|
109
|
+
/**
|
|
110
|
+
* The data structure for the facts.json file.
|
|
111
|
+
*/
|
|
112
|
+
export interface FactsData {
|
|
113
|
+
updated: string;
|
|
114
|
+
facts: Fact[];
|
|
115
|
+
}
|
|
116
|
+
/**
|
|
117
|
+
* Interface for a generic file storage utility.
|
|
118
|
+
*/
|
|
119
|
+
export interface IStorage {
|
|
120
|
+
readJson<T>(fileName: string): Promise<T | null>;
|
|
121
|
+
writeJson<T>(fileName: string, data: T): Promise<void>;
|
|
122
|
+
}
|
package/index.ts
ADDED
|
@@ -0,0 +1,38 @@
|
|
|
1
|
+
// index.ts
|
|
2
|
+
|
|
3
|
+
import { resolveConfig } from './src/config.js';
|
|
4
|
+
import { HookManager } from './src/hooks.js';
|
|
5
|
+
import type { OpenClawPluginApi } from './src/types.js';
|
|
6
|
+
|
|
7
|
+
// The main entry point for the OpenClaw plugin.
|
|
8
|
+
// This function is called by the OpenClaw host during plugin loading.
|
|
9
|
+
export default (api: OpenClawPluginApi, context: { workspace: string }): void => {
|
|
10
|
+
const { pluginConfig, logger } = api;
|
|
11
|
+
const { workspace: openClawWorkspace } = context;
|
|
12
|
+
|
|
13
|
+
// 1. Resolve and validate the configuration
|
|
14
|
+
const config = resolveConfig(pluginConfig, logger, openClawWorkspace);
|
|
15
|
+
|
|
16
|
+
if (!config) {
|
|
17
|
+
logger.error('Failed to initialize Knowledge Engine: Invalid configuration. The plugin will be disabled.');
|
|
18
|
+
return;
|
|
19
|
+
}
|
|
20
|
+
|
|
21
|
+
if (!config.enabled) {
|
|
22
|
+
logger.info('Knowledge Engine is disabled in the configuration.');
|
|
23
|
+
return;
|
|
24
|
+
}
|
|
25
|
+
|
|
26
|
+
// 2. Initialize the Hook Manager with the resolved config
|
|
27
|
+
try {
|
|
28
|
+
const hookManager = new HookManager(api, config);
|
|
29
|
+
|
|
30
|
+
// 3. Register all the event hooks
|
|
31
|
+
hookManager.registerHooks();
|
|
32
|
+
|
|
33
|
+
logger.info('Knowledge Engine plugin initialized successfully.');
|
|
34
|
+
|
|
35
|
+
} catch (err) {
|
|
36
|
+
logger.error('An unexpected error occurred during Knowledge Engine initialization.', err as Error);
|
|
37
|
+
}
|
|
38
|
+
};
|