@vainplex/openclaw-knowledge-engine 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/ARCHITECTURE.md +374 -0
- package/dist/index.d.ts +5 -0
- package/dist/index.js +29 -0
- package/dist/src/config.d.ts +15 -0
- package/dist/src/config.js +153 -0
- package/dist/src/embeddings.d.ts +23 -0
- package/dist/src/embeddings.js +63 -0
- package/dist/src/entity-extractor.d.ts +30 -0
- package/dist/src/entity-extractor.js +123 -0
- package/dist/src/fact-store.d.ts +77 -0
- package/dist/src/fact-store.js +222 -0
- package/dist/src/hooks.d.ts +24 -0
- package/dist/src/hooks.js +94 -0
- package/dist/src/http-client.d.ts +9 -0
- package/dist/src/http-client.js +58 -0
- package/dist/src/llm-enhancer.d.ts +44 -0
- package/dist/src/llm-enhancer.js +166 -0
- package/dist/src/maintenance.d.ts +26 -0
- package/dist/src/maintenance.js +87 -0
- package/dist/src/patterns.d.ts +5 -0
- package/dist/src/patterns.js +69 -0
- package/dist/src/storage.d.ts +41 -0
- package/dist/src/storage.js +110 -0
- package/dist/src/types.d.ts +122 -0
- package/dist/src/types.js +2 -0
- package/index.ts +38 -0
- package/openclaw.plugin.json +125 -0
- package/package.json +36 -0
- package/src/config.ts +180 -0
- package/src/embeddings.ts +82 -0
- package/src/entity-extractor.ts +137 -0
- package/src/fact-store.ts +260 -0
- package/src/hooks.ts +125 -0
- package/src/http-client.ts +74 -0
- package/src/llm-enhancer.ts +187 -0
- package/src/maintenance.ts +102 -0
- package/src/patterns.ts +90 -0
- package/src/storage.ts +122 -0
- package/src/types.ts +144 -0
- package/test/config.test.ts +152 -0
- package/test/embeddings.test.ts +118 -0
- package/test/entity-extractor.test.ts +121 -0
- package/test/fact-store.test.ts +266 -0
- package/test/hooks.test.ts +120 -0
- package/test/http-client.test.ts +68 -0
- package/test/llm-enhancer.test.ts +132 -0
- package/test/maintenance.test.ts +117 -0
- package/test/patterns.test.ts +123 -0
- package/test/storage.test.ts +86 -0
- package/tsconfig.json +26 -0
|
@@ -0,0 +1,30 @@
|
|
|
1
|
+
import { Entity, Logger } from './types.js';
|
|
2
|
+
/**
|
|
3
|
+
* Extracts entities from text using predefined regular expressions.
|
|
4
|
+
*/
|
|
5
|
+
export declare class EntityExtractor {
|
|
6
|
+
private readonly logger;
|
|
7
|
+
constructor(logger: Logger);
|
|
8
|
+
/**
|
|
9
|
+
* Extracts entities from a given text based on the regex patterns.
|
|
10
|
+
* @param text The input text to process.
|
|
11
|
+
* @returns An array of found entities.
|
|
12
|
+
*/
|
|
13
|
+
extract(text: string): Entity[];
|
|
14
|
+
/**
|
|
15
|
+
* Processes a single regex match and upserts it into the entity map.
|
|
16
|
+
*/
|
|
17
|
+
private processMatch;
|
|
18
|
+
/**
|
|
19
|
+
* Cleans and standardizes an entity value based on its type.
|
|
20
|
+
*/
|
|
21
|
+
private canonicalize;
|
|
22
|
+
/**
|
|
23
|
+
* Calculates an initial importance score for an entity.
|
|
24
|
+
*/
|
|
25
|
+
private calculateInitialImportance;
|
|
26
|
+
/**
|
|
27
|
+
* Merges two lists of entities by ID.
|
|
28
|
+
*/
|
|
29
|
+
static mergeEntities(listA: Entity[], listB: Entity[]): Entity[];
|
|
30
|
+
}
|
|
@@ -0,0 +1,123 @@
|
|
|
1
|
+
// src/entity-extractor.ts
|
|
2
|
+
import { REGEX_PATTERNS } from './patterns.js';
|
|
3
|
+
// A map to associate regex pattern names with entity types.
|
|
4
|
+
const PATTERN_TYPE_MAP = {
|
|
5
|
+
email: 'email',
|
|
6
|
+
url: 'url',
|
|
7
|
+
iso_date: 'date',
|
|
8
|
+
common_date: 'date',
|
|
9
|
+
german_date: 'date',
|
|
10
|
+
english_date: 'date',
|
|
11
|
+
proper_noun: 'unknown',
|
|
12
|
+
product_name: 'product',
|
|
13
|
+
organization_suffix: 'organization',
|
|
14
|
+
};
|
|
15
|
+
/**
|
|
16
|
+
* Extracts entities from text using predefined regular expressions.
|
|
17
|
+
*/
|
|
18
|
+
export class EntityExtractor {
|
|
19
|
+
logger;
|
|
20
|
+
constructor(logger) {
|
|
21
|
+
this.logger = logger;
|
|
22
|
+
}
|
|
23
|
+
/**
|
|
24
|
+
* Extracts entities from a given text based on the regex patterns.
|
|
25
|
+
* @param text The input text to process.
|
|
26
|
+
* @returns An array of found entities.
|
|
27
|
+
*/
|
|
28
|
+
extract(text) {
|
|
29
|
+
const foundEntities = new Map();
|
|
30
|
+
for (const key in REGEX_PATTERNS) {
|
|
31
|
+
// Each access returns a fresh RegExp (via Proxy), avoiding /g state-bleed.
|
|
32
|
+
const regex = REGEX_PATTERNS[key];
|
|
33
|
+
if (!regex.global) {
|
|
34
|
+
this.logger.warn(`Regex for "${key}" is not global. Skipping.`);
|
|
35
|
+
continue;
|
|
36
|
+
}
|
|
37
|
+
const entityType = PATTERN_TYPE_MAP[key] || 'unknown';
|
|
38
|
+
let match;
|
|
39
|
+
while ((match = regex.exec(text)) !== null) {
|
|
40
|
+
const value = match[0].trim();
|
|
41
|
+
if (!value)
|
|
42
|
+
continue;
|
|
43
|
+
this.processMatch(key, value, entityType, foundEntities);
|
|
44
|
+
}
|
|
45
|
+
}
|
|
46
|
+
return Array.from(foundEntities.values());
|
|
47
|
+
}
|
|
48
|
+
/**
|
|
49
|
+
* Processes a single regex match and upserts it into the entity map.
|
|
50
|
+
*/
|
|
51
|
+
processMatch(_key, value, entityType, entities) {
|
|
52
|
+
const canonicalValue = this.canonicalize(value, entityType);
|
|
53
|
+
const id = `${entityType}:${canonicalValue.toLowerCase().replace(/\s+/g, '-')}`;
|
|
54
|
+
if (entities.has(id)) {
|
|
55
|
+
const existing = entities.get(id);
|
|
56
|
+
if (!existing.mentions.includes(value))
|
|
57
|
+
existing.mentions.push(value);
|
|
58
|
+
existing.count++;
|
|
59
|
+
if (!existing.source.includes('regex'))
|
|
60
|
+
existing.source.push('regex');
|
|
61
|
+
}
|
|
62
|
+
else {
|
|
63
|
+
entities.set(id, {
|
|
64
|
+
id,
|
|
65
|
+
type: entityType,
|
|
66
|
+
value: canonicalValue,
|
|
67
|
+
mentions: [value],
|
|
68
|
+
count: 1,
|
|
69
|
+
importance: this.calculateInitialImportance(entityType, value),
|
|
70
|
+
lastSeen: new Date().toISOString(),
|
|
71
|
+
source: ['regex'],
|
|
72
|
+
});
|
|
73
|
+
}
|
|
74
|
+
}
|
|
75
|
+
/**
|
|
76
|
+
* Cleans and standardizes an entity value based on its type.
|
|
77
|
+
*/
|
|
78
|
+
canonicalize(value, type) {
|
|
79
|
+
if (type === 'organization') {
|
|
80
|
+
const suffixes = /,?\s?(?:Inc\.|LLC|Corp\.|GmbH|AG|Ltd\.)$/i;
|
|
81
|
+
return value.replace(suffixes, '').trim();
|
|
82
|
+
}
|
|
83
|
+
return value.replace(/[.,!?;:]$/, '').trim();
|
|
84
|
+
}
|
|
85
|
+
/**
|
|
86
|
+
* Calculates an initial importance score for an entity.
|
|
87
|
+
*/
|
|
88
|
+
calculateInitialImportance(type, value) {
|
|
89
|
+
switch (type) {
|
|
90
|
+
case 'organization': return 0.8;
|
|
91
|
+
case 'person': return 0.7;
|
|
92
|
+
case 'product': return 0.6;
|
|
93
|
+
case 'location': return 0.5;
|
|
94
|
+
case 'date':
|
|
95
|
+
case 'email':
|
|
96
|
+
case 'url': return 0.4;
|
|
97
|
+
default: return value.split(/\s|-/).length > 1 ? 0.5 : 0.3;
|
|
98
|
+
}
|
|
99
|
+
}
|
|
100
|
+
/**
|
|
101
|
+
* Merges two lists of entities by ID.
|
|
102
|
+
*/
|
|
103
|
+
static mergeEntities(listA, listB) {
|
|
104
|
+
const merged = new Map();
|
|
105
|
+
for (const e of listA)
|
|
106
|
+
merged.set(e.id, { ...e });
|
|
107
|
+
for (const entity of listB) {
|
|
108
|
+
if (merged.has(entity.id)) {
|
|
109
|
+
const ex = merged.get(entity.id);
|
|
110
|
+
ex.count += entity.count;
|
|
111
|
+
ex.mentions = [...new Set([...ex.mentions, ...entity.mentions])];
|
|
112
|
+
ex.source = [...new Set([...ex.source, ...entity.source])];
|
|
113
|
+
ex.lastSeen = new Date() > new Date(ex.lastSeen)
|
|
114
|
+
? new Date().toISOString() : ex.lastSeen;
|
|
115
|
+
ex.importance = Math.max(ex.importance, entity.importance);
|
|
116
|
+
}
|
|
117
|
+
else {
|
|
118
|
+
merged.set(entity.id, { ...entity });
|
|
119
|
+
}
|
|
120
|
+
}
|
|
121
|
+
return Array.from(merged.values());
|
|
122
|
+
}
|
|
123
|
+
}
|
|
@@ -0,0 +1,77 @@
|
|
|
1
|
+
import type { Fact, KnowledgeConfig, Logger } from './types.js';
|
|
2
|
+
/**
|
|
3
|
+
* Manages an in-memory and on-disk store of structured facts.
|
|
4
|
+
* Provides methods for loading, querying, modifying, and persisting facts.
|
|
5
|
+
*/
|
|
6
|
+
export declare class FactStore {
|
|
7
|
+
private readonly storage;
|
|
8
|
+
private readonly config;
|
|
9
|
+
private readonly logger;
|
|
10
|
+
private facts;
|
|
11
|
+
private isLoaded;
|
|
12
|
+
readonly commit: () => Promise<void>;
|
|
13
|
+
constructor(workspace: string, config: KnowledgeConfig['storage'], logger: Logger);
|
|
14
|
+
/**
|
|
15
|
+
* Immediately flushes any pending debounced writes.
|
|
16
|
+
* Useful in tests and before shutdown to ensure data is persisted.
|
|
17
|
+
*/
|
|
18
|
+
flush(): Promise<void>;
|
|
19
|
+
/**
|
|
20
|
+
* Loads facts from the `facts.json` file into the in-memory store.
|
|
21
|
+
* If the file doesn't exist, it initializes an empty store.
|
|
22
|
+
*/
|
|
23
|
+
load(): Promise<void>;
|
|
24
|
+
/**
|
|
25
|
+
* Adds a new fact to the store or updates an existing one based on content.
|
|
26
|
+
* @param newFactData The data for the new fact, excluding metadata fields.
|
|
27
|
+
* @returns The newly created or found Fact object.
|
|
28
|
+
*/
|
|
29
|
+
addFact(newFactData: Omit<Fact, 'id' | 'createdAt' | 'lastAccessed' | 'relevance'>): Fact;
|
|
30
|
+
/**
|
|
31
|
+
* Retrieves a fact by its unique ID.
|
|
32
|
+
* @param id The UUID of the fact.
|
|
33
|
+
* @returns The Fact object, or undefined if not found.
|
|
34
|
+
*/
|
|
35
|
+
getFact(id: string): Fact | undefined;
|
|
36
|
+
/**
|
|
37
|
+
* Queries the fact store based on subject, predicate, or object.
|
|
38
|
+
* @param query An object with optional subject, predicate, and/or object to match.
|
|
39
|
+
* @returns An array of matching facts, sorted by relevance.
|
|
40
|
+
*/
|
|
41
|
+
query(query: {
|
|
42
|
+
subject?: string;
|
|
43
|
+
predicate?: string;
|
|
44
|
+
object?: string;
|
|
45
|
+
}): Fact[];
|
|
46
|
+
/**
|
|
47
|
+
* Applies a decay factor to the relevance score of all facts.
|
|
48
|
+
* @param rate The decay rate (e.g., 0.05 for 5%).
|
|
49
|
+
* @returns An object with the count of decayed facts.
|
|
50
|
+
*/
|
|
51
|
+
decayFacts(rate: number): {
|
|
52
|
+
decayedCount: number;
|
|
53
|
+
};
|
|
54
|
+
/**
|
|
55
|
+
* Persists the current in-memory fact store to `facts.json`.
|
|
56
|
+
*/
|
|
57
|
+
private persist;
|
|
58
|
+
/**
|
|
59
|
+
* Removes the least relevant facts if the store exceeds its configured max size.
|
|
60
|
+
*/
|
|
61
|
+
private prune;
|
|
62
|
+
/**
|
|
63
|
+
* Boosts the relevance of a fact upon access.
|
|
64
|
+
* @param currentRelevance The current relevance score.
|
|
65
|
+
* @returns The new, boosted relevance score.
|
|
66
|
+
*/
|
|
67
|
+
private boostRelevance;
|
|
68
|
+
/**
|
|
69
|
+
* Returns a list of all facts that have not been embedded yet.
|
|
70
|
+
*/
|
|
71
|
+
getUnembeddedFacts(): Fact[];
|
|
72
|
+
/**
|
|
73
|
+
* Marks a list of facts as having been embedded.
|
|
74
|
+
* @param factIds An array of fact IDs to update.
|
|
75
|
+
*/
|
|
76
|
+
markFactsAsEmbedded(factIds: string[]): void;
|
|
77
|
+
}
|
|
@@ -0,0 +1,222 @@
|
|
|
1
|
+
// src/fact-store.ts
|
|
2
|
+
import { randomUUID } from 'node:crypto';
|
|
3
|
+
import { AtomicStorage } from './storage.js';
|
|
4
|
+
/**
|
|
5
|
+
* Manages an in-memory and on-disk store of structured facts.
|
|
6
|
+
* Provides methods for loading, querying, modifying, and persisting facts.
|
|
7
|
+
*/
|
|
8
|
+
export class FactStore {
|
|
9
|
+
storage;
|
|
10
|
+
config;
|
|
11
|
+
logger;
|
|
12
|
+
facts = new Map();
|
|
13
|
+
isLoaded = false;
|
|
14
|
+
commit;
|
|
15
|
+
constructor(workspace, config, logger) {
|
|
16
|
+
this.storage = new AtomicStorage(workspace, logger);
|
|
17
|
+
this.config = config;
|
|
18
|
+
this.logger = logger;
|
|
19
|
+
// Create a debounced version of the persist method.
|
|
20
|
+
this.commit = AtomicStorage.debounce(this.persist.bind(this), this.config.writeDebounceMs);
|
|
21
|
+
}
|
|
22
|
+
/**
|
|
23
|
+
* Immediately flushes any pending debounced writes.
|
|
24
|
+
* Useful in tests and before shutdown to ensure data is persisted.
|
|
25
|
+
*/
|
|
26
|
+
async flush() {
|
|
27
|
+
if (this.isLoaded) {
|
|
28
|
+
await this.persist();
|
|
29
|
+
}
|
|
30
|
+
}
|
|
31
|
+
/**
|
|
32
|
+
* Loads facts from the `facts.json` file into the in-memory store.
|
|
33
|
+
* If the file doesn't exist, it initializes an empty store.
|
|
34
|
+
*/
|
|
35
|
+
async load() {
|
|
36
|
+
if (this.isLoaded) {
|
|
37
|
+
this.logger.debug('Fact store is already loaded.');
|
|
38
|
+
return;
|
|
39
|
+
}
|
|
40
|
+
await this.storage.init();
|
|
41
|
+
const data = await this.storage.readJson('facts.json');
|
|
42
|
+
if (data && Array.isArray(data.facts)) {
|
|
43
|
+
this.facts = new Map(data.facts.map(fact => [fact.id, fact]));
|
|
44
|
+
this.logger.info(`Loaded ${this.facts.size} facts from storage.`);
|
|
45
|
+
}
|
|
46
|
+
else {
|
|
47
|
+
this.logger.info('No existing fact store found. Initializing a new one.');
|
|
48
|
+
this.facts = new Map();
|
|
49
|
+
}
|
|
50
|
+
this.isLoaded = true;
|
|
51
|
+
}
|
|
52
|
+
/**
|
|
53
|
+
* Adds a new fact to the store or updates an existing one based on content.
|
|
54
|
+
* @param newFactData The data for the new fact, excluding metadata fields.
|
|
55
|
+
* @returns The newly created or found Fact object.
|
|
56
|
+
*/
|
|
57
|
+
addFact(newFactData) {
|
|
58
|
+
if (!this.isLoaded) {
|
|
59
|
+
throw new Error('FactStore has not been loaded yet. Call load() first.');
|
|
60
|
+
}
|
|
61
|
+
const now = new Date().toISOString();
|
|
62
|
+
// Check if a similar fact already exists to avoid duplicates
|
|
63
|
+
for (const existingFact of this.facts.values()) {
|
|
64
|
+
if (existingFact.subject === newFactData.subject &&
|
|
65
|
+
existingFact.predicate === newFactData.predicate &&
|
|
66
|
+
existingFact.object === newFactData.object) {
|
|
67
|
+
// Fact already exists, let's just boost its relevance and update timestamp
|
|
68
|
+
existingFact.relevance = this.boostRelevance(existingFact.relevance);
|
|
69
|
+
existingFact.lastAccessed = now;
|
|
70
|
+
this.commit();
|
|
71
|
+
return existingFact;
|
|
72
|
+
}
|
|
73
|
+
}
|
|
74
|
+
const newFact = {
|
|
75
|
+
...newFactData,
|
|
76
|
+
id: randomUUID(),
|
|
77
|
+
createdAt: now,
|
|
78
|
+
lastAccessed: now,
|
|
79
|
+
relevance: 1.0, // New facts start with maximum relevance
|
|
80
|
+
};
|
|
81
|
+
this.facts.set(newFact.id, newFact);
|
|
82
|
+
this.prune(); // Check if we need to prune old facts
|
|
83
|
+
this.commit();
|
|
84
|
+
return newFact;
|
|
85
|
+
}
|
|
86
|
+
/**
|
|
87
|
+
* Retrieves a fact by its unique ID.
|
|
88
|
+
* @param id The UUID of the fact.
|
|
89
|
+
* @returns The Fact object, or undefined if not found.
|
|
90
|
+
*/
|
|
91
|
+
getFact(id) {
|
|
92
|
+
const fact = this.facts.get(id);
|
|
93
|
+
if (fact) {
|
|
94
|
+
fact.lastAccessed = new Date().toISOString();
|
|
95
|
+
fact.relevance = this.boostRelevance(fact.relevance);
|
|
96
|
+
this.commit();
|
|
97
|
+
}
|
|
98
|
+
return fact;
|
|
99
|
+
}
|
|
100
|
+
/**
|
|
101
|
+
* Queries the fact store based on subject, predicate, or object.
|
|
102
|
+
* @param query An object with optional subject, predicate, and/or object to match.
|
|
103
|
+
* @returns An array of matching facts, sorted by relevance.
|
|
104
|
+
*/
|
|
105
|
+
query(query) {
|
|
106
|
+
const results = [];
|
|
107
|
+
for (const fact of this.facts.values()) {
|
|
108
|
+
const subjectMatch = !query.subject || fact.subject === query.subject;
|
|
109
|
+
const predicateMatch = !query.predicate || fact.predicate === query.predicate;
|
|
110
|
+
const objectMatch = !query.object || fact.object === query.object;
|
|
111
|
+
if (subjectMatch && predicateMatch && objectMatch) {
|
|
112
|
+
results.push(fact);
|
|
113
|
+
}
|
|
114
|
+
}
|
|
115
|
+
// Sort by relevance, descending
|
|
116
|
+
return results.sort((a, b) => b.relevance - a.relevance);
|
|
117
|
+
}
|
|
118
|
+
/**
|
|
119
|
+
* Applies a decay factor to the relevance score of all facts.
|
|
120
|
+
* @param rate The decay rate (e.g., 0.05 for 5%).
|
|
121
|
+
* @returns An object with the count of decayed facts.
|
|
122
|
+
*/
|
|
123
|
+
decayFacts(rate) {
|
|
124
|
+
let decayedCount = 0;
|
|
125
|
+
const minRelevance = 0.1; // Floor to prevent facts from disappearing completely
|
|
126
|
+
for (const fact of this.facts.values()) {
|
|
127
|
+
const newRelevance = fact.relevance * (1 - rate);
|
|
128
|
+
if (newRelevance !== fact.relevance) {
|
|
129
|
+
fact.relevance = Math.max(minRelevance, newRelevance);
|
|
130
|
+
decayedCount++;
|
|
131
|
+
}
|
|
132
|
+
}
|
|
133
|
+
if (decayedCount > 0) {
|
|
134
|
+
this.logger.info(`Applied decay rate of ${rate * 100}% to ${decayedCount} facts.`);
|
|
135
|
+
this.commit();
|
|
136
|
+
}
|
|
137
|
+
return { decayedCount };
|
|
138
|
+
}
|
|
139
|
+
/**
|
|
140
|
+
* Persists the current in-memory fact store to `facts.json`.
|
|
141
|
+
*/
|
|
142
|
+
async persist() {
|
|
143
|
+
if (!this.isLoaded) {
|
|
144
|
+
this.logger.warn('Attempted to persist fact store before it was loaded. Aborting.');
|
|
145
|
+
return;
|
|
146
|
+
}
|
|
147
|
+
const data = {
|
|
148
|
+
updated: new Date().toISOString(),
|
|
149
|
+
facts: Array.from(this.facts.values()),
|
|
150
|
+
};
|
|
151
|
+
try {
|
|
152
|
+
await this.storage.writeJson('facts.json', data);
|
|
153
|
+
this.logger.debug(`Successfully persisted ${data.facts.length} facts.`);
|
|
154
|
+
}
|
|
155
|
+
catch (err) {
|
|
156
|
+
this.logger.error('Failed to persist fact store.', err);
|
|
157
|
+
}
|
|
158
|
+
}
|
|
159
|
+
/**
|
|
160
|
+
* Removes the least relevant facts if the store exceeds its configured max size.
|
|
161
|
+
*/
|
|
162
|
+
prune() {
|
|
163
|
+
const factCount = this.facts.size;
|
|
164
|
+
if (factCount <= this.config.maxFacts) {
|
|
165
|
+
return;
|
|
166
|
+
}
|
|
167
|
+
const factsToPrune = factCount - this.config.maxFacts;
|
|
168
|
+
if (factsToPrune <= 0)
|
|
169
|
+
return;
|
|
170
|
+
// Get all facts, sort by relevance (ascending) and then by lastAccessed (ascending)
|
|
171
|
+
const sortedFacts = Array.from(this.facts.values()).sort((a, b) => {
|
|
172
|
+
if (a.relevance !== b.relevance) {
|
|
173
|
+
return a.relevance - b.relevance;
|
|
174
|
+
}
|
|
175
|
+
return new Date(a.lastAccessed).getTime() - new Date(b.lastAccessed).getTime();
|
|
176
|
+
});
|
|
177
|
+
for (let i = 0; i < factsToPrune; i++) {
|
|
178
|
+
this.facts.delete(sortedFacts[i].id);
|
|
179
|
+
}
|
|
180
|
+
this.logger.info(`Pruned ${factsToPrune} least relevant facts to maintain store size.`);
|
|
181
|
+
}
|
|
182
|
+
/**
|
|
183
|
+
* Boosts the relevance of a fact upon access.
|
|
184
|
+
* @param currentRelevance The current relevance score.
|
|
185
|
+
* @returns The new, boosted relevance score.
|
|
186
|
+
*/
|
|
187
|
+
boostRelevance(currentRelevance) {
|
|
188
|
+
// Push the relevance 50% closer to 1.0
|
|
189
|
+
const boost = (1.0 - currentRelevance) * 0.5;
|
|
190
|
+
return Math.min(1.0, currentRelevance + boost);
|
|
191
|
+
}
|
|
192
|
+
/**
|
|
193
|
+
* Returns a list of all facts that have not been embedded yet.
|
|
194
|
+
*/
|
|
195
|
+
getUnembeddedFacts() {
|
|
196
|
+
const results = [];
|
|
197
|
+
for (const fact of this.facts.values()) {
|
|
198
|
+
if (!fact.embedded) {
|
|
199
|
+
results.push(fact);
|
|
200
|
+
}
|
|
201
|
+
}
|
|
202
|
+
return results;
|
|
203
|
+
}
|
|
204
|
+
/**
|
|
205
|
+
* Marks a list of facts as having been embedded.
|
|
206
|
+
* @param factIds An array of fact IDs to update.
|
|
207
|
+
*/
|
|
208
|
+
markFactsAsEmbedded(factIds) {
|
|
209
|
+
const now = new Date().toISOString();
|
|
210
|
+
let updatedCount = 0;
|
|
211
|
+
for (const id of factIds) {
|
|
212
|
+
const fact = this.facts.get(id);
|
|
213
|
+
if (fact) {
|
|
214
|
+
fact.embedded = now;
|
|
215
|
+
updatedCount++;
|
|
216
|
+
}
|
|
217
|
+
}
|
|
218
|
+
if (updatedCount > 0) {
|
|
219
|
+
this.commit();
|
|
220
|
+
}
|
|
221
|
+
}
|
|
222
|
+
}
|
|
@@ -0,0 +1,24 @@
|
|
|
1
|
+
import { OpenClawPluginApi, KnowledgeConfig } from './types.js';
|
|
2
|
+
/**
|
|
3
|
+
* Manages the registration and orchestration of all plugin hooks.
|
|
4
|
+
*/
|
|
5
|
+
export declare class HookManager {
|
|
6
|
+
private readonly api;
|
|
7
|
+
private readonly config;
|
|
8
|
+
private readonly logger;
|
|
9
|
+
private entityExtractor;
|
|
10
|
+
private factStore;
|
|
11
|
+
private llmEnhancer?;
|
|
12
|
+
private maintenance?;
|
|
13
|
+
constructor(api: OpenClawPluginApi, config: KnowledgeConfig);
|
|
14
|
+
/** Registers all the necessary hooks with the OpenClaw host. */
|
|
15
|
+
registerHooks(): void;
|
|
16
|
+
/** Handler for the `session_start` hook. */
|
|
17
|
+
private onSessionStart;
|
|
18
|
+
/** Handler for `gateway_stop` — cleans up timers and flushes state. */
|
|
19
|
+
private onShutdown;
|
|
20
|
+
/** Handler for `message_received` and `message_sent` hooks. */
|
|
21
|
+
private onMessage;
|
|
22
|
+
/** Fire-and-forget: processes LLM batch results when available. */
|
|
23
|
+
private processLlmBatchWhenReady;
|
|
24
|
+
}
|
|
@@ -0,0 +1,94 @@
|
|
|
1
|
+
// src/hooks.ts
|
|
2
|
+
import { EntityExtractor } from './entity-extractor.js';
|
|
3
|
+
import { FactStore } from './fact-store.js';
|
|
4
|
+
import { LlmEnhancer } from './llm-enhancer.js';
|
|
5
|
+
import { Maintenance } from './maintenance.js';
|
|
6
|
+
import { Embeddings } from './embeddings.js';
|
|
7
|
+
/**
|
|
8
|
+
* Manages the registration and orchestration of all plugin hooks.
|
|
9
|
+
*/
|
|
10
|
+
export class HookManager {
|
|
11
|
+
api;
|
|
12
|
+
config;
|
|
13
|
+
logger;
|
|
14
|
+
entityExtractor;
|
|
15
|
+
factStore;
|
|
16
|
+
llmEnhancer;
|
|
17
|
+
maintenance;
|
|
18
|
+
constructor(api, config) {
|
|
19
|
+
this.api = api;
|
|
20
|
+
this.config = config;
|
|
21
|
+
this.logger = api.logger;
|
|
22
|
+
this.entityExtractor = new EntityExtractor(this.logger);
|
|
23
|
+
this.factStore = new FactStore(this.config.workspace, this.config.storage, this.logger);
|
|
24
|
+
if (this.config.extraction.llm.enabled) {
|
|
25
|
+
this.llmEnhancer = new LlmEnhancer(this.config.extraction.llm, this.logger);
|
|
26
|
+
}
|
|
27
|
+
}
|
|
28
|
+
/** Registers all the necessary hooks with the OpenClaw host. */
|
|
29
|
+
registerHooks() {
|
|
30
|
+
if (!this.config.enabled) {
|
|
31
|
+
this.logger.info('Knowledge Engine is disabled. No hooks registered.');
|
|
32
|
+
return;
|
|
33
|
+
}
|
|
34
|
+
this.api.on('session_start', this.onSessionStart.bind(this), { priority: 200 });
|
|
35
|
+
this.api.on('message_received', this.onMessage.bind(this), { priority: 100 });
|
|
36
|
+
this.api.on('message_sent', this.onMessage.bind(this), { priority: 100 });
|
|
37
|
+
this.api.on('gateway_stop', this.onShutdown.bind(this), { priority: 900 });
|
|
38
|
+
this.logger.info('Registered all Knowledge Engine hooks.');
|
|
39
|
+
}
|
|
40
|
+
/** Handler for the `session_start` hook. */
|
|
41
|
+
async onSessionStart() {
|
|
42
|
+
this.logger.info('Knowledge Engine starting up...');
|
|
43
|
+
await this.factStore.load();
|
|
44
|
+
const embeddings = this.config.embeddings.enabled
|
|
45
|
+
? new Embeddings(this.config.embeddings, this.logger)
|
|
46
|
+
: undefined;
|
|
47
|
+
this.maintenance = new Maintenance(this.config, this.logger, this.factStore, embeddings);
|
|
48
|
+
this.maintenance.start();
|
|
49
|
+
}
|
|
50
|
+
/** Handler for `gateway_stop` — cleans up timers and flushes state. */
|
|
51
|
+
async onShutdown() {
|
|
52
|
+
this.logger.info('Knowledge Engine shutting down...');
|
|
53
|
+
this.maintenance?.stop();
|
|
54
|
+
this.llmEnhancer?.clearTimers();
|
|
55
|
+
this.logger.info('Knowledge Engine shutdown complete.');
|
|
56
|
+
}
|
|
57
|
+
/** Handler for `message_received` and `message_sent` hooks. */
|
|
58
|
+
async onMessage(event) {
|
|
59
|
+
const text = event.content || event.message || event.text;
|
|
60
|
+
if (typeof text !== 'string' || text.trim() === '')
|
|
61
|
+
return;
|
|
62
|
+
this.logger.debug(`Processing message: "${text.substring(0, 50)}..."`);
|
|
63
|
+
if (this.config.extraction.regex.enabled) {
|
|
64
|
+
const entities = this.entityExtractor.extract(text);
|
|
65
|
+
if (entities.length > 0) {
|
|
66
|
+
this.logger.info(`Extracted ${entities.length} entities via regex.`);
|
|
67
|
+
}
|
|
68
|
+
}
|
|
69
|
+
if (this.llmEnhancer) {
|
|
70
|
+
const messageId = `msg-${Date.now()}`;
|
|
71
|
+
this.llmEnhancer.addToBatch({ id: messageId, text });
|
|
72
|
+
this.processLlmBatchWhenReady().catch(err => this.logger.error('LLM batch processing failed', err));
|
|
73
|
+
}
|
|
74
|
+
}
|
|
75
|
+
/** Fire-and-forget: processes LLM batch results when available. */
|
|
76
|
+
async processLlmBatchWhenReady() {
|
|
77
|
+
if (!this.llmEnhancer)
|
|
78
|
+
return;
|
|
79
|
+
const result = await this.llmEnhancer.sendBatch();
|
|
80
|
+
if (!result)
|
|
81
|
+
return;
|
|
82
|
+
if (result.facts.length > 0) {
|
|
83
|
+
this.logger.info(`Adding ${result.facts.length} LLM facts.`);
|
|
84
|
+
for (const f of result.facts) {
|
|
85
|
+
this.factStore.addFact({
|
|
86
|
+
subject: f.subject,
|
|
87
|
+
predicate: f.predicate,
|
|
88
|
+
object: f.object,
|
|
89
|
+
source: 'extracted-llm',
|
|
90
|
+
});
|
|
91
|
+
}
|
|
92
|
+
}
|
|
93
|
+
}
|
|
94
|
+
}
|
|
@@ -0,0 +1,9 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Makes an HTTP or HTTPS POST request, auto-selecting the
|
|
3
|
+
* transport based on the URL's protocol.
|
|
4
|
+
*
|
|
5
|
+
* @param url The full URL string to POST to.
|
|
6
|
+
* @param body The payload object to JSON-serialize and send.
|
|
7
|
+
* @returns A promise resolving with the response body string.
|
|
8
|
+
*/
|
|
9
|
+
export declare function httpPost(url: string, body: unknown): Promise<string>;
|
|
@@ -0,0 +1,58 @@
|
|
|
1
|
+
// src/http-client.ts
|
|
2
|
+
import * as http from 'node:http';
|
|
3
|
+
import * as https from 'node:https';
|
|
4
|
+
/**
|
|
5
|
+
* Selects the correct HTTP/HTTPS module based on the URL protocol.
|
|
6
|
+
*/
|
|
7
|
+
function selectTransport(protocol) {
|
|
8
|
+
return protocol === 'https:' ? https : http;
|
|
9
|
+
}
|
|
10
|
+
/**
|
|
11
|
+
* Builds request options from a URL and payload.
|
|
12
|
+
*/
|
|
13
|
+
function buildRequestOptions(url, payload) {
|
|
14
|
+
return {
|
|
15
|
+
hostname: url.hostname,
|
|
16
|
+
port: url.port,
|
|
17
|
+
path: url.pathname + url.search,
|
|
18
|
+
method: 'POST',
|
|
19
|
+
headers: {
|
|
20
|
+
'Content-Type': 'application/json',
|
|
21
|
+
'Content-Length': Buffer.byteLength(payload),
|
|
22
|
+
},
|
|
23
|
+
};
|
|
24
|
+
}
|
|
25
|
+
/**
|
|
26
|
+
* Makes an HTTP or HTTPS POST request, auto-selecting the
|
|
27
|
+
* transport based on the URL's protocol.
|
|
28
|
+
*
|
|
29
|
+
* @param url The full URL string to POST to.
|
|
30
|
+
* @param body The payload object to JSON-serialize and send.
|
|
31
|
+
* @returns A promise resolving with the response body string.
|
|
32
|
+
*/
|
|
33
|
+
export function httpPost(url, body) {
|
|
34
|
+
return new Promise((resolve, reject) => {
|
|
35
|
+
const parsed = new URL(url);
|
|
36
|
+
const payload = JSON.stringify(body);
|
|
37
|
+
const options = buildRequestOptions(parsed, payload);
|
|
38
|
+
const transport = selectTransport(parsed.protocol);
|
|
39
|
+
const req = transport.request(options, (res) => {
|
|
40
|
+
let data = '';
|
|
41
|
+
res.setEncoding('utf8');
|
|
42
|
+
res.on('data', (chunk) => { data += chunk; });
|
|
43
|
+
res.on('end', () => {
|
|
44
|
+
if (res.statusCode && res.statusCode >= 200 && res.statusCode < 300) {
|
|
45
|
+
resolve(data);
|
|
46
|
+
}
|
|
47
|
+
else {
|
|
48
|
+
reject(new Error(`HTTP request failed with status ${res.statusCode}: ${data}`));
|
|
49
|
+
}
|
|
50
|
+
});
|
|
51
|
+
});
|
|
52
|
+
req.on('error', (e) => {
|
|
53
|
+
reject(new Error(`HTTP request error: ${e.message}`));
|
|
54
|
+
});
|
|
55
|
+
req.write(payload);
|
|
56
|
+
req.end();
|
|
57
|
+
});
|
|
58
|
+
}
|
|
@@ -0,0 +1,44 @@
|
|
|
1
|
+
import { Entity, Fact, KnowledgeConfig, Logger } from './types.js';
|
|
2
|
+
interface LlmBatchItem {
|
|
3
|
+
id: string;
|
|
4
|
+
text: string;
|
|
5
|
+
}
|
|
6
|
+
/**
|
|
7
|
+
* Manages batched requests to an external LLM for entity and fact extraction.
|
|
8
|
+
*/
|
|
9
|
+
export declare class LlmEnhancer {
|
|
10
|
+
private readonly config;
|
|
11
|
+
private readonly logger;
|
|
12
|
+
private batch;
|
|
13
|
+
private cooldownTimeout;
|
|
14
|
+
constructor(config: KnowledgeConfig['extraction']['llm'], logger: Logger);
|
|
15
|
+
/**
|
|
16
|
+
* Adds a message to the current batch.
|
|
17
|
+
* Triggers a batch send (with proper error handling) when the size is reached.
|
|
18
|
+
*/
|
|
19
|
+
addToBatch(item: LlmBatchItem): void;
|
|
20
|
+
/** Resets the cooldown timer. When it expires the batch is sent. */
|
|
21
|
+
private resetCooldownTimer;
|
|
22
|
+
/**
|
|
23
|
+
* Clears all pending timers. Called during shutdown.
|
|
24
|
+
*/
|
|
25
|
+
clearTimers(): void;
|
|
26
|
+
/**
|
|
27
|
+
* Sends the current batch to the LLM for processing.
|
|
28
|
+
*/
|
|
29
|
+
sendBatch(): Promise<{
|
|
30
|
+
entities: Entity[];
|
|
31
|
+
facts: Fact[];
|
|
32
|
+
} | null>;
|
|
33
|
+
/** Constructs the prompt to be sent to the LLM. */
|
|
34
|
+
private constructPrompt;
|
|
35
|
+
/** Makes an HTTP(S) request to the configured LLM endpoint. */
|
|
36
|
+
private makeHttpRequest;
|
|
37
|
+
/** Parses and validates the JSON response from the LLM. */
|
|
38
|
+
private parseLlmResponse;
|
|
39
|
+
/** Transforms raw LLM entity data into the standard Entity format. */
|
|
40
|
+
private transformToEntities;
|
|
41
|
+
/** Transforms raw LLM fact data into partial Fact objects. */
|
|
42
|
+
private transformToFacts;
|
|
43
|
+
}
|
|
44
|
+
export {};
|