@vainplex/openclaw-knowledge-engine 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (50) hide show
  1. package/ARCHITECTURE.md +374 -0
  2. package/dist/index.d.ts +5 -0
  3. package/dist/index.js +29 -0
  4. package/dist/src/config.d.ts +15 -0
  5. package/dist/src/config.js +153 -0
  6. package/dist/src/embeddings.d.ts +23 -0
  7. package/dist/src/embeddings.js +63 -0
  8. package/dist/src/entity-extractor.d.ts +30 -0
  9. package/dist/src/entity-extractor.js +123 -0
  10. package/dist/src/fact-store.d.ts +77 -0
  11. package/dist/src/fact-store.js +222 -0
  12. package/dist/src/hooks.d.ts +24 -0
  13. package/dist/src/hooks.js +94 -0
  14. package/dist/src/http-client.d.ts +9 -0
  15. package/dist/src/http-client.js +58 -0
  16. package/dist/src/llm-enhancer.d.ts +44 -0
  17. package/dist/src/llm-enhancer.js +166 -0
  18. package/dist/src/maintenance.d.ts +26 -0
  19. package/dist/src/maintenance.js +87 -0
  20. package/dist/src/patterns.d.ts +5 -0
  21. package/dist/src/patterns.js +69 -0
  22. package/dist/src/storage.d.ts +41 -0
  23. package/dist/src/storage.js +110 -0
  24. package/dist/src/types.d.ts +122 -0
  25. package/dist/src/types.js +2 -0
  26. package/index.ts +38 -0
  27. package/openclaw.plugin.json +125 -0
  28. package/package.json +36 -0
  29. package/src/config.ts +180 -0
  30. package/src/embeddings.ts +82 -0
  31. package/src/entity-extractor.ts +137 -0
  32. package/src/fact-store.ts +260 -0
  33. package/src/hooks.ts +125 -0
  34. package/src/http-client.ts +74 -0
  35. package/src/llm-enhancer.ts +187 -0
  36. package/src/maintenance.ts +102 -0
  37. package/src/patterns.ts +90 -0
  38. package/src/storage.ts +122 -0
  39. package/src/types.ts +144 -0
  40. package/test/config.test.ts +152 -0
  41. package/test/embeddings.test.ts +118 -0
  42. package/test/entity-extractor.test.ts +121 -0
  43. package/test/fact-store.test.ts +266 -0
  44. package/test/hooks.test.ts +120 -0
  45. package/test/http-client.test.ts +68 -0
  46. package/test/llm-enhancer.test.ts +132 -0
  47. package/test/maintenance.test.ts +117 -0
  48. package/test/patterns.test.ts +123 -0
  49. package/test/storage.test.ts +86 -0
  50. package/tsconfig.json +26 -0
@@ -0,0 +1,125 @@
1
+ {
2
+ "id": "@vainplex/openclaw-knowledge-engine",
3
+ "config": {
4
+ "enabled": {
5
+ "type": "boolean",
6
+ "default": true,
7
+ "description": "Whether the knowledge engine plugin is enabled."
8
+ },
9
+ "workspace": {
10
+ "type": "string",
11
+ "default": "~/.clawd/plugins/knowledge-engine",
12
+ "description": "The directory to store knowledge files (entities.json, facts.json)."
13
+ },
14
+ "extraction": {
15
+ "type": "object",
16
+ "properties": {
17
+ "regex": {
18
+ "type": "object",
19
+ "properties": {
20
+ "enabled": {
21
+ "type": "boolean",
22
+ "default": true,
23
+ "description": "Enable or disable high-speed regex-based entity extraction."
24
+ }
25
+ }
26
+ },
27
+ "llm": {
28
+ "type": "object",
29
+ "properties": {
30
+ "enabled": {
31
+ "type": "boolean",
32
+ "default": true,
33
+ "description": "Enable or disable high-fidelity LLM-based entity and fact extraction."
34
+ },
35
+ "model": {
36
+ "type": "string",
37
+ "default": "mistral:7b",
38
+ "description": "The model name to use for the LLM API call (e.g., Ollama model)."
39
+ },
40
+ "endpoint": {
41
+ "type": "string",
42
+ "default": "http://localhost:11434/api/generate",
43
+ "description": "The HTTP endpoint for the LLM generation API."
44
+ },
45
+ "batchSize": {
46
+ "type": "number",
47
+ "default": 10,
48
+ "description": "Number of messages to batch together before sending to the LLM."
49
+ },
50
+ "cooldownMs": {
51
+ "type": "number",
52
+ "default": 30000,
53
+ "description": "Milliseconds to wait after the last message before sending a batch to the LLM."
54
+ }
55
+ }
56
+ }
57
+ }
58
+ },
59
+ "decay": {
60
+ "type": "object",
61
+ "properties": {
62
+ "enabled": {
63
+ "type": "boolean",
64
+ "default": true,
65
+ "description": "Enable or disable the periodic decay of fact relevance."
66
+ },
67
+ "intervalHours": {
68
+ "type": "number",
69
+ "default": 24,
70
+ "description": "How often (in hours) to run the decay process."
71
+ },
72
+ "rate": {
73
+ "type": "number",
74
+ "default": 0.02,
75
+ "description": "The percentage of relevance to decay in each interval (e.g., 0.02 is 2%)."
76
+ }
77
+ }
78
+ },
79
+ "embeddings": {
80
+ "type": "object",
81
+ "properties": {
82
+ "enabled": {
83
+ "type": "boolean",
84
+ "default": false,
85
+ "description": "Enable or disable syncing of facts to a vector database."
86
+ },
87
+ "endpoint": {
88
+ "type": "string",
89
+ "default": "http://localhost:8000/api/v1/collections/facts/add",
90
+ "description": "The HTTP endpoint for the vector database's add API (ChromaDB compatible)."
91
+ },
92
+ "collectionName": {
93
+ "type": "string",
94
+ "default": "openclaw-facts",
95
+ "description": "The name of the collection to use in the vector database."
96
+ },
97
+ "syncIntervalMinutes": {
98
+ "type": "number",
99
+ "default": 15,
100
+ "description": "How often (in minutes) to sync new facts to the vector database."
101
+ }
102
+ }
103
+ },
104
+ "storage": {
105
+ "type": "object",
106
+ "properties": {
107
+ "maxEntities": {
108
+ "type": "number",
109
+ "default": 5000,
110
+ "description": "The maximum number of entities to store before pruning."
111
+ },
112
+ "maxFacts": {
113
+ "type": "number",
114
+ "default": 10000,
115
+ "description": "The maximum number of facts to store before pruning."
116
+ },
117
+ "writeDebounceMs": {
118
+ "type": "number",
119
+ "default": 15000,
120
+ "description": "Milliseconds to wait after a change before writing data to disk."
121
+ }
122
+ }
123
+ }
124
+ }
125
+ }
package/package.json ADDED
@@ -0,0 +1,36 @@
1
+ {
2
+ "name": "@vainplex/openclaw-knowledge-engine",
3
+ "version": "0.1.0",
4
+ "description": "An OpenClaw plugin for real-time and batch knowledge extraction from conversational data.",
5
+ "main": "dist/index.js",
6
+ "types": "dist/index.d.ts",
7
+ "scripts": {
8
+ "build": "tsc",
9
+ "test": "tsx --test test/*.test.ts",
10
+ "lint": "eslint . --ext .ts",
11
+ "prepublishOnly": "npm run build"
12
+ },
13
+ "keywords": [
14
+ "openclaw",
15
+ "plugin",
16
+ "knowledge-extraction",
17
+ "nlp",
18
+ "entity-extraction"
19
+ ],
20
+ "author": "Vainplex",
21
+ "license": "MIT",
22
+ "repository": {
23
+ "type": "git",
24
+ "url": "https://github.com/alberthild/openclaw-knowledge-engine.git"
25
+ },
26
+ "openclaw": {
27
+ "id": "@vainplex/openclaw-knowledge-engine"
28
+ },
29
+ "devDependencies": {
30
+ "@types/node": "^20.11.24",
31
+ "eslint": "^8.57.0",
32
+ "typescript": "^5.3.3",
33
+ "tsx": "^4.7.1"
34
+ },
35
+ "type": "module"
36
+ }
package/src/config.ts ADDED
@@ -0,0 +1,180 @@
1
+ // src/config.ts
2
+
3
+ import * as path from 'node:path';
4
+ import { KnowledgeConfig, Logger } from './types.js';
5
+
6
+ /**
7
+ * The default configuration values for the plugin.
8
+ * These are merged with the user-provided configuration.
9
+ */
10
+ export const DEFAULT_CONFIG: Omit<KnowledgeConfig, 'workspace'> = {
11
+ enabled: true,
12
+ extraction: {
13
+ regex: { enabled: true },
14
+ llm: {
15
+ enabled: true,
16
+ model: 'mistral:7b',
17
+ endpoint: 'http://localhost:11434/api/generate',
18
+ batchSize: 10,
19
+ cooldownMs: 30000,
20
+ },
21
+ },
22
+ decay: {
23
+ enabled: true,
24
+ intervalHours: 24,
25
+ rate: 0.02,
26
+ },
27
+ embeddings: {
28
+ enabled: false,
29
+ endpoint: 'http://localhost:8000/api/v1/collections/facts/add',
30
+ collectionName: 'openclaw-facts',
31
+ syncIntervalMinutes: 15,
32
+ },
33
+ storage: {
34
+ maxEntities: 5000,
35
+ maxFacts: 10000,
36
+ writeDebounceMs: 15000,
37
+ },
38
+ };
39
+
40
+ /** Type-safe deep merge: spread source into target for Record values. */
41
+ function deepMerge<T extends Record<string, unknown>>(
42
+ target: T,
43
+ source: Record<string, unknown>
44
+ ): T {
45
+ const result = { ...target } as Record<string, unknown>;
46
+ for (const key of Object.keys(source)) {
47
+ const srcVal = source[key];
48
+ const tgtVal = result[key];
49
+ if (isPlainObject(srcVal) && isPlainObject(tgtVal)) {
50
+ result[key] = deepMerge(
51
+ tgtVal as Record<string, unknown>,
52
+ srcVal as Record<string, unknown>
53
+ );
54
+ } else if (srcVal !== undefined) {
55
+ result[key] = srcVal;
56
+ }
57
+ }
58
+ return result as T;
59
+ }
60
+
61
+ function isPlainObject(val: unknown): val is Record<string, unknown> {
62
+ return typeof val === 'object' && val !== null && !Array.isArray(val);
63
+ }
64
+
65
+ /** Merge user config over defaults and resolve workspace. */
66
+ function mergeConfigDefaults(
67
+ userConfig: Record<string, unknown>,
68
+ openClawWorkspace: string
69
+ ): KnowledgeConfig {
70
+ const merged = deepMerge(
71
+ DEFAULT_CONFIG as unknown as Record<string, unknown>,
72
+ userConfig
73
+ );
74
+ const ws = typeof userConfig.workspace === 'string' && userConfig.workspace
75
+ ? userConfig.workspace
76
+ : path.join(openClawWorkspace, 'knowledge-engine');
77
+ return { ...merged, workspace: ws } as KnowledgeConfig;
78
+ }
79
+
80
+ /** Replace a leading tilde with the user's home directory. */
81
+ function resolveTilde(ws: string, logger: Logger, fallback: string): string {
82
+ if (!ws.startsWith('~')) return ws;
83
+ const homeDir = process.env.HOME || process.env.USERPROFILE;
84
+ if (homeDir) return path.join(homeDir, ws.slice(1));
85
+ logger.warn('Could not resolve home directory for workspace path.');
86
+ return fallback;
87
+ }
88
+
89
+ /**
90
+ * Resolves and validates the plugin's configuration.
91
+ *
92
+ * @param userConfig The user-provided configuration from OpenClaw's pluginConfig.
93
+ * @param logger A logger instance for logging warnings or errors.
94
+ * @param openClawWorkspace The root workspace directory provided by OpenClaw.
95
+ * @returns A fully resolved KnowledgeConfig, or null if validation fails.
96
+ */
97
+ export function resolveConfig(
98
+ userConfig: Record<string, unknown>,
99
+ logger: Logger,
100
+ openClawWorkspace: string
101
+ ): KnowledgeConfig | null {
102
+ const config = mergeConfigDefaults(userConfig, openClawWorkspace);
103
+ const fallbackWs = path.join(openClawWorkspace, 'knowledge-engine');
104
+ config.workspace = resolveTilde(config.workspace, logger, fallbackWs);
105
+
106
+ const errors = validateConfig(config);
107
+ if (errors.length > 0) {
108
+ errors.forEach(e => logger.error(`Invalid configuration: ${e}`));
109
+ return null;
110
+ }
111
+
112
+ logger.info('Knowledge Engine configuration resolved successfully.');
113
+ return config;
114
+ }
115
+
116
+ // ── Validation ──────────────────────────────────────────────
117
+
118
+ function validateConfig(config: KnowledgeConfig): string[] {
119
+ return [
120
+ ...validateRoot(config),
121
+ ...validateExtraction(config.extraction),
122
+ ...validateDecay(config.decay),
123
+ ...validateEmbeddings(config.embeddings),
124
+ ...validateStorage(config.storage),
125
+ ];
126
+ }
127
+
128
+ function validateRoot(c: KnowledgeConfig): string[] {
129
+ const errs: string[] = [];
130
+ if (typeof c.enabled !== 'boolean') errs.push('"enabled" must be a boolean.');
131
+ if (typeof c.workspace !== 'string' || c.workspace.trim() === '') {
132
+ errs.push('"workspace" must be a non-empty string.');
133
+ }
134
+ return errs;
135
+ }
136
+
137
+ function validateExtraction(ext: KnowledgeConfig['extraction']): string[] {
138
+ const errs: string[] = [];
139
+ if (ext.llm.enabled) {
140
+ if (!isValidHttpUrl(ext.llm.endpoint)) {
141
+ errs.push('"extraction.llm.endpoint" must be a valid HTTP/S URL.');
142
+ }
143
+ if ((ext.llm.batchSize ?? 0) < 1) {
144
+ errs.push('"extraction.llm.batchSize" must be at least 1.');
145
+ }
146
+ }
147
+ return errs;
148
+ }
149
+
150
+ function validateDecay(d: KnowledgeConfig['decay']): string[] {
151
+ const errs: string[] = [];
152
+ if (d.rate < 0 || d.rate > 1) errs.push('"decay.rate" must be between 0 and 1.');
153
+ if ((d.intervalHours ?? 0) <= 0) errs.push('"decay.intervalHours" must be greater than 0.');
154
+ return errs;
155
+ }
156
+
157
+ function validateEmbeddings(e: KnowledgeConfig['embeddings']): string[] {
158
+ const errs: string[] = [];
159
+ if (e.enabled && !isValidHttpUrl(e.endpoint)) {
160
+ errs.push('"embeddings.endpoint" must be a valid HTTP/S URL.');
161
+ }
162
+ return errs;
163
+ }
164
+
165
+ function validateStorage(s: KnowledgeConfig['storage']): string[] {
166
+ const errs: string[] = [];
167
+ if ((s.writeDebounceMs ?? 0) < 0) {
168
+ errs.push('"storage.writeDebounceMs" must be a non-negative number.');
169
+ }
170
+ return errs;
171
+ }
172
+
173
+ function isValidHttpUrl(str: string): boolean {
174
+ try {
175
+ const url = new URL(str);
176
+ return url.protocol === 'http:' || url.protocol === 'https:';
177
+ } catch {
178
+ return false;
179
+ }
180
+ }
@@ -0,0 +1,82 @@
1
+ // src/embeddings.ts
2
+
3
+ import { Fact, KnowledgeConfig, Logger } from './types.js';
4
+ import { httpPost } from './http-client.js';
5
+
6
+ /** ChromaDB v2 API payload format. */
7
+ interface ChromaPayload {
8
+ ids: string[];
9
+ documents: string[];
10
+ metadatas: Record<string, string>[];
11
+ }
12
+
13
+ /**
14
+ * Manages optional integration with a ChromaDB-compatible vector database.
15
+ */
16
+ export class Embeddings {
17
+ private readonly config: KnowledgeConfig['embeddings'];
18
+ private readonly logger: Logger;
19
+
20
+ constructor(config: KnowledgeConfig['embeddings'], logger: Logger) {
21
+ this.config = config;
22
+ this.logger = logger;
23
+ }
24
+
25
+ /** Checks if the embeddings service is enabled. */
26
+ public isEnabled(): boolean {
27
+ return this.config.enabled;
28
+ }
29
+
30
+ /**
31
+ * Syncs a batch of facts to the vector database.
32
+ * @returns The number of successfully synced facts.
33
+ */
34
+ public async sync(facts: Fact[]): Promise<number> {
35
+ if (!this.isEnabled() || facts.length === 0) return 0;
36
+
37
+ this.logger.info(`Starting embedding sync for ${facts.length} facts.`);
38
+ const payload = this.constructChromaPayload(facts);
39
+ const url = this.buildEndpointUrl();
40
+
41
+ try {
42
+ await httpPost(url, payload);
43
+ this.logger.info(`Successfully synced ${facts.length} facts to ChromaDB.`);
44
+ return facts.length;
45
+ } catch (err) {
46
+ this.logger.error('Failed to sync embeddings to ChromaDB.', err as Error);
47
+ return 0;
48
+ }
49
+ }
50
+
51
+ /** Builds the full endpoint URL with collection name substituted. */
52
+ private buildEndpointUrl(): string {
53
+ return this.config.endpoint
54
+ .replace('{name}', this.config.collectionName)
55
+ .replace('//', '//') // preserve protocol double-slash
56
+ .replace(/([^:])\/\//g, '$1/'); // collapse any other double-slashes
57
+ }
58
+
59
+ /**
60
+ * Constructs the payload for ChromaDB v2 API.
61
+ * Metadata values are all strings (v2 requirement).
62
+ */
63
+ private constructChromaPayload(facts: Fact[]): ChromaPayload {
64
+ const payload: ChromaPayload = { ids: [], documents: [], metadatas: [] };
65
+
66
+ for (const fact of facts) {
67
+ payload.ids.push(fact.id);
68
+ payload.documents.push(
69
+ `${fact.subject} ${fact.predicate.replace(/-/g, ' ')} ${fact.object}.`
70
+ );
71
+ payload.metadatas.push({
72
+ subject: fact.subject,
73
+ predicate: fact.predicate,
74
+ object: fact.object,
75
+ source: fact.source,
76
+ createdAt: fact.createdAt,
77
+ });
78
+ }
79
+
80
+ return payload;
81
+ }
82
+ }
@@ -0,0 +1,137 @@
1
+ // src/entity-extractor.ts
2
+
3
+ import { Entity, Logger } from './types.js';
4
+ import { REGEX_PATTERNS } from './patterns.js';
5
+
6
+ // A map to associate regex pattern names with entity types.
7
+ const PATTERN_TYPE_MAP: Record<string, Entity['type']> = {
8
+ email: 'email',
9
+ url: 'url',
10
+ iso_date: 'date',
11
+ common_date: 'date',
12
+ german_date: 'date',
13
+ english_date: 'date',
14
+ proper_noun: 'unknown',
15
+ product_name: 'product',
16
+ organization_suffix: 'organization',
17
+ };
18
+
19
+ /**
20
+ * Extracts entities from text using predefined regular expressions.
21
+ */
22
+ export class EntityExtractor {
23
+ private readonly logger: Logger;
24
+
25
+ constructor(logger: Logger) {
26
+ this.logger = logger;
27
+ }
28
+
29
+ /**
30
+ * Extracts entities from a given text based on the regex patterns.
31
+ * @param text The input text to process.
32
+ * @returns An array of found entities.
33
+ */
34
+ public extract(text: string): Entity[] {
35
+ const foundEntities: Map<string, Entity> = new Map();
36
+
37
+ for (const key in REGEX_PATTERNS) {
38
+ // Each access returns a fresh RegExp (via Proxy), avoiding /g state-bleed.
39
+ const regex = REGEX_PATTERNS[key];
40
+ if (!regex.global) {
41
+ this.logger.warn(`Regex for "${key}" is not global. Skipping.`);
42
+ continue;
43
+ }
44
+ const entityType = PATTERN_TYPE_MAP[key] || 'unknown';
45
+ let match;
46
+ while ((match = regex.exec(text)) !== null) {
47
+ const value = match[0].trim();
48
+ if (!value) continue;
49
+ this.processMatch(key, value, entityType, foundEntities);
50
+ }
51
+ }
52
+
53
+ return Array.from(foundEntities.values());
54
+ }
55
+
56
+ /**
57
+ * Processes a single regex match and upserts it into the entity map.
58
+ */
59
+ private processMatch(
60
+ _key: string,
61
+ value: string,
62
+ entityType: Entity['type'],
63
+ entities: Map<string, Entity>
64
+ ): void {
65
+ const canonicalValue = this.canonicalize(value, entityType);
66
+ const id = `${entityType}:${canonicalValue.toLowerCase().replace(/\s+/g, '-')}`;
67
+
68
+ if (entities.has(id)) {
69
+ const existing = entities.get(id)!;
70
+ if (!existing.mentions.includes(value)) existing.mentions.push(value);
71
+ existing.count++;
72
+ if (!existing.source.includes('regex')) existing.source.push('regex');
73
+ } else {
74
+ entities.set(id, {
75
+ id,
76
+ type: entityType,
77
+ value: canonicalValue,
78
+ mentions: [value],
79
+ count: 1,
80
+ importance: this.calculateInitialImportance(entityType, value),
81
+ lastSeen: new Date().toISOString(),
82
+ source: ['regex'],
83
+ });
84
+ }
85
+ }
86
+
87
+ /**
88
+ * Cleans and standardizes an entity value based on its type.
89
+ */
90
+ private canonicalize(value: string, type: Entity['type']): string {
91
+ if (type === 'organization') {
92
+ const suffixes = /,?\s?(?:Inc\.|LLC|Corp\.|GmbH|AG|Ltd\.)$/i;
93
+ return value.replace(suffixes, '').trim();
94
+ }
95
+ return value.replace(/[.,!?;:]$/, '').trim();
96
+ }
97
+
98
+ /**
99
+ * Calculates an initial importance score for an entity.
100
+ */
101
+ private calculateInitialImportance(type: Entity['type'], value: string): number {
102
+ switch (type) {
103
+ case 'organization': return 0.8;
104
+ case 'person': return 0.7;
105
+ case 'product': return 0.6;
106
+ case 'location': return 0.5;
107
+ case 'date':
108
+ case 'email':
109
+ case 'url': return 0.4;
110
+ default: return value.split(/\s|-/).length > 1 ? 0.5 : 0.3;
111
+ }
112
+ }
113
+
114
+ /**
115
+ * Merges two lists of entities by ID.
116
+ */
117
+ public static mergeEntities(listA: Entity[], listB: Entity[]): Entity[] {
118
+ const merged: Map<string, Entity> = new Map();
119
+ for (const e of listA) merged.set(e.id, { ...e });
120
+
121
+ for (const entity of listB) {
122
+ if (merged.has(entity.id)) {
123
+ const ex = merged.get(entity.id)!;
124
+ ex.count += entity.count;
125
+ ex.mentions = [...new Set([...ex.mentions, ...entity.mentions])];
126
+ ex.source = [...new Set([...ex.source, ...entity.source])];
127
+ ex.lastSeen = new Date() > new Date(ex.lastSeen)
128
+ ? new Date().toISOString() : ex.lastSeen;
129
+ ex.importance = Math.max(ex.importance, entity.importance);
130
+ } else {
131
+ merged.set(entity.id, { ...entity });
132
+ }
133
+ }
134
+
135
+ return Array.from(merged.values());
136
+ }
137
+ }