@unrdf/knowledge-engine 5.0.1 → 26.4.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/package.json +13 -7
- package/src/ai-enhanced-search.mjs +371 -0
- package/src/anomaly-detector.mjs +226 -0
- package/src/artifact-generator.mjs +251 -0
- package/src/browser.mjs +1 -1
- package/src/chatman/disruption-arithmetic.mjs +140 -0
- package/src/chatman/market-dynamics.mjs +140 -0
- package/src/chatman/organizational-dynamics.mjs +140 -0
- package/src/chatman/strategic-dynamics.mjs +140 -0
- package/src/chatman-config-loader.mjs +282 -0
- package/src/chatman-engine.mjs +431 -0
- package/src/chatman-operator.mjs +342 -0
- package/src/dark-field-detector.mjs +312 -0
- package/src/formation-theorems.mjs +345 -0
- package/src/index.mjs +20 -2
- package/src/knowledge-hook-manager.mjs +1 -1
- package/src/lockchain-writer-browser.mjs +2 -2
- package/src/observability.mjs +40 -4
- package/src/query-optimizer.mjs +1 -1
- package/src/resolution-layer.mjs +1 -1
- package/src/transaction.mjs +11 -9
- package/README.md +0 -84
- package/src/browser-shims.mjs +0 -343
- package/src/canonicalize.mjs +0 -414
- package/src/condition-cache.mjs +0 -109
- package/src/condition-evaluator.mjs +0 -722
- package/src/dark-matter-core.mjs +0 -742
- package/src/define-hook.mjs +0 -213
- package/src/effect-sandbox-browser.mjs +0 -283
- package/src/effect-sandbox-worker.mjs +0 -170
- package/src/effect-sandbox.mjs +0 -517
- package/src/engines/index.mjs +0 -11
- package/src/engines/rdf-engine.mjs +0 -299
- package/src/file-resolver.mjs +0 -387
- package/src/hook-executor-batching.mjs +0 -277
- package/src/hook-executor.mjs +0 -870
- package/src/hook-management.mjs +0 -150
- package/src/ken-parliment.mjs +0 -119
- package/src/ken.mjs +0 -149
- package/src/knowledge-engine/builtin-rules.mjs +0 -190
- package/src/knowledge-engine/inference-engine.mjs +0 -418
- package/src/knowledge-engine/knowledge-engine.mjs +0 -317
- package/src/knowledge-engine/pattern-dsl.mjs +0 -142
- package/src/knowledge-engine/pattern-matcher.mjs +0 -215
- package/src/knowledge-engine/rules.mjs +0 -184
- package/src/knowledge-engine.mjs +0 -319
- package/src/knowledge-hook-engine.mjs +0 -360
- package/src/knowledge-substrate-core.mjs +0 -927
- package/src/lite.mjs +0 -222
- package/src/lockchain-writer.mjs +0 -602
- package/src/monitoring/andon-signals.mjs +0 -775
- package/src/parse.mjs +0 -290
- package/src/performance-optimizer.mjs +0 -678
- package/src/policy-pack.mjs +0 -572
- package/src/query-cache.mjs +0 -116
- package/src/query.mjs +0 -306
- package/src/reason.mjs +0 -350
- package/src/schemas.mjs +0 -1063
- package/src/security/error-sanitizer.mjs +0 -257
- package/src/security/path-validator.mjs +0 -194
- package/src/security/sandbox-restrictions.mjs +0 -331
- package/src/security-validator.mjs +0 -389
- package/src/store-cache.mjs +0 -137
- package/src/telemetry.mjs +0 -167
- package/src/utils/adaptive-monitor.mjs +0 -746
- package/src/utils/circuit-breaker.mjs +0 -513
- package/src/utils/edge-case-handler.mjs +0 -503
- package/src/utils/memory-manager.mjs +0 -498
- package/src/utils/ring-buffer.mjs +0 -282
- package/src/validate.mjs +0 -319
- package/src/validators/index.mjs +0 -338
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@unrdf/knowledge-engine",
|
|
3
|
-
"version": "
|
|
3
|
+
"version": "26.4.3",
|
|
4
4
|
"description": "UNRDF Knowledge Engine - Rule Engine, Inference, and Pattern Matching (Optional Extension)",
|
|
5
5
|
"type": "module",
|
|
6
6
|
"main": "src/index.mjs",
|
|
@@ -8,7 +8,8 @@
|
|
|
8
8
|
".": "./src/index.mjs",
|
|
9
9
|
"./query": "./src/query.mjs",
|
|
10
10
|
"./canonicalize": "./src/canonicalize.mjs",
|
|
11
|
-
"./parse": "./src/parse.mjs"
|
|
11
|
+
"./parse": "./src/parse.mjs",
|
|
12
|
+
"./ai-search": "./src/ai-enhanced-search.mjs"
|
|
12
13
|
},
|
|
13
14
|
"sideEffects": false,
|
|
14
15
|
"files": [
|
|
@@ -25,9 +26,14 @@
|
|
|
25
26
|
"rules"
|
|
26
27
|
],
|
|
27
28
|
"dependencies": {
|
|
29
|
+
"@iarna/toml": "^2.2.5",
|
|
30
|
+
"@noble/hashes": "^1.5.0",
|
|
31
|
+
"@unrdf/core": "26.4.3",
|
|
32
|
+
"@unrdf/oxigraph": "26.4.3",
|
|
33
|
+
"@unrdf/streaming": "26.4.3",
|
|
34
|
+
"@xenova/transformers": "^2.17.2",
|
|
28
35
|
"eyereasoner": "^18.23.0",
|
|
29
|
-
"
|
|
30
|
-
"@unrdf/streaming": "5.0.1"
|
|
36
|
+
"zod": "^3.25.76"
|
|
31
37
|
},
|
|
32
38
|
"devDependencies": {
|
|
33
39
|
"@types/node": "^24.10.1",
|
|
@@ -53,11 +59,11 @@
|
|
|
53
59
|
"test": "vitest run --coverage",
|
|
54
60
|
"test:fast": "vitest run --coverage",
|
|
55
61
|
"test:watch": "vitest --coverage",
|
|
56
|
-
"build": "
|
|
62
|
+
"build": "unbuild || true",
|
|
57
63
|
"lint": "eslint src/ test/ --max-warnings=0",
|
|
58
64
|
"lint:fix": "eslint src/ test/ --fix",
|
|
59
|
-
"format": "prettier --write src/
|
|
60
|
-
"format:check": "prettier --check src/
|
|
65
|
+
"format": "prettier --write src/",
|
|
66
|
+
"format:check": "prettier --check src/",
|
|
61
67
|
"clean": "rm -rf dist/ .nyc_output/ coverage/",
|
|
62
68
|
"dev": "echo 'Development mode for @unrdf/knowledge-engine'"
|
|
63
69
|
}
|
|
@@ -0,0 +1,371 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* AI-Enhanced Knowledge Graph Search
|
|
3
|
+
* Semantic similarity search on RDF triples using transformer models
|
|
4
|
+
*
|
|
5
|
+
* @module @unrdf/knowledge-engine/ai-enhanced-search
|
|
6
|
+
* @description
|
|
7
|
+
* Integrates Xenova Transformers (WASM-based) for semantic embeddings
|
|
8
|
+
* and similarity search over RDF knowledge graphs. Enables natural
|
|
9
|
+
* language queries against structured RDF data with vector similarity.
|
|
10
|
+
*/
|
|
11
|
+
|
|
12
|
+
import { z } from 'zod';
|
|
13
|
+
import { pipeline, env } from '@xenova/transformers';
|
|
14
|
+
import { query as sparqlQuery } from './query.mjs';
|
|
15
|
+
import { KnowledgeSubstrateCore } from './knowledge-substrate-core.mjs';
|
|
16
|
+
|
|
17
|
+
// Configure Transformers.js to use local models (no external downloads in production)
|
|
18
|
+
env.allowRemoteModels = false;
|
|
19
|
+
env.allowLocalModels = true;
|
|
20
|
+
|
|
21
|
+
// =============================================================================
|
|
22
|
+
// Configuration Schemas
|
|
23
|
+
// =============================================================================
|
|
24
|
+
|
|
25
|
+
/**
|
|
26
|
+
* AI search configuration schema
|
|
27
|
+
*/
|
|
28
|
+
export const AISearchConfigSchema = z.object({
|
|
29
|
+
/** Embedding model name (default: Xenova/all-MiniLM-L6-v2) */
|
|
30
|
+
model: z.string().default('Xenova/all-MiniLM-L6-v2'),
|
|
31
|
+
/** Maximum number of results to return */
|
|
32
|
+
topK: z.number().int().positive().default(10),
|
|
33
|
+
/** Minimum similarity threshold (0-1) */
|
|
34
|
+
threshold: z.number().min(0).max(1).default(0.5),
|
|
35
|
+
/** Enable caching of embeddings */
|
|
36
|
+
cache: z.boolean().default(true),
|
|
37
|
+
/** Batch size for embedding generation */
|
|
38
|
+
batchSize: z.number().int().positive().default(32),
|
|
39
|
+
});
|
|
40
|
+
|
|
41
|
+
/**
|
|
42
|
+
* Search result schema
|
|
43
|
+
*/
|
|
44
|
+
export const SearchResultSchema = z.object({
|
|
45
|
+
/** RDF triple (subject, predicate, object) */
|
|
46
|
+
triple: z.object({
|
|
47
|
+
subject: z.string(),
|
|
48
|
+
predicate: z.string(),
|
|
49
|
+
object: z.string(),
|
|
50
|
+
}),
|
|
51
|
+
/** Similarity score (0-1) */
|
|
52
|
+
score: z.number().min(0).max(1),
|
|
53
|
+
/** Embedding vector */
|
|
54
|
+
embedding: z.array(z.number()).optional(),
|
|
55
|
+
/** Additional metadata */
|
|
56
|
+
metadata: z.record(z.string(), z.any()).optional(),
|
|
57
|
+
});
|
|
58
|
+
|
|
59
|
+
// =============================================================================
|
|
60
|
+
// AI-Enhanced Search Engine
|
|
61
|
+
// =============================================================================
|
|
62
|
+
|
|
63
|
+
/**
|
|
64
|
+
* Creates an AI-enhanced semantic search engine for RDF knowledge graphs
|
|
65
|
+
*
|
|
66
|
+
* @param {Object} store - UNRDF store instance
|
|
67
|
+
* @param {Object} config - Search configuration
|
|
68
|
+
* @returns {Promise<Object>} Search engine instance
|
|
69
|
+
*
|
|
70
|
+
* @example
|
|
71
|
+
* const core = await createKnowledgeSubstrateCore();
|
|
72
|
+
* const searchEngine = await createAISearchEngine(core.store, {
|
|
73
|
+
* model: 'Xenova/all-MiniLM-L6-v2',
|
|
74
|
+
* topK: 5,
|
|
75
|
+
* threshold: 0.7
|
|
76
|
+
* });
|
|
77
|
+
*
|
|
78
|
+
* const results = await searchEngine.search('machine learning algorithms');
|
|
79
|
+
*/
|
|
80
|
+
export async function createAISearchEngine(store, config = {}) {
|
|
81
|
+
const validated = AISearchConfigSchema.parse(config);
|
|
82
|
+
|
|
83
|
+
// Initialize embedding pipeline
|
|
84
|
+
let embeddingPipeline = null;
|
|
85
|
+
const embeddingCache = new Map();
|
|
86
|
+
|
|
87
|
+
/**
|
|
88
|
+
* Initialize the transformer model
|
|
89
|
+
* @private
|
|
90
|
+
*/
|
|
91
|
+
async function initModel() {
|
|
92
|
+
if (!embeddingPipeline) {
|
|
93
|
+
try {
|
|
94
|
+
embeddingPipeline = await pipeline('feature-extraction', validated.model);
|
|
95
|
+
} catch (error) {
|
|
96
|
+
throw new Error(`Failed to load embedding model: ${error.message}`);
|
|
97
|
+
}
|
|
98
|
+
}
|
|
99
|
+
return embeddingPipeline;
|
|
100
|
+
}
|
|
101
|
+
|
|
102
|
+
/**
|
|
103
|
+
* Generate embedding for text
|
|
104
|
+
*
|
|
105
|
+
* @param {string} text - Text to embed
|
|
106
|
+
* @returns {Promise<Array<number>>} Embedding vector
|
|
107
|
+
*/
|
|
108
|
+
async function embed(text) {
|
|
109
|
+
// Check cache
|
|
110
|
+
if (validated.cache && embeddingCache.has(text)) {
|
|
111
|
+
return embeddingCache.get(text);
|
|
112
|
+
}
|
|
113
|
+
|
|
114
|
+
const model = await initModel();
|
|
115
|
+
|
|
116
|
+
// Generate embedding
|
|
117
|
+
const output = await model(text, { pooling: 'mean', normalize: true });
|
|
118
|
+
const embedding = Array.from(output.data);
|
|
119
|
+
|
|
120
|
+
// Cache result
|
|
121
|
+
if (validated.cache) {
|
|
122
|
+
embeddingCache.set(text, embedding);
|
|
123
|
+
}
|
|
124
|
+
|
|
125
|
+
return embedding;
|
|
126
|
+
}
|
|
127
|
+
|
|
128
|
+
/**
|
|
129
|
+
* Calculate cosine similarity between two vectors
|
|
130
|
+
*
|
|
131
|
+
* @param {Array<number>} a - First vector
|
|
132
|
+
* @param {Array<number>} b - Second vector
|
|
133
|
+
* @returns {number} Similarity score (0-1)
|
|
134
|
+
*/
|
|
135
|
+
function cosineSimilarity(a, b) {
|
|
136
|
+
if (a.length !== b.length) {
|
|
137
|
+
throw new Error('Vectors must have same length');
|
|
138
|
+
}
|
|
139
|
+
|
|
140
|
+
let dotProduct = 0;
|
|
141
|
+
let normA = 0;
|
|
142
|
+
let normB = 0;
|
|
143
|
+
|
|
144
|
+
for (let i = 0; i < a.length; i++) {
|
|
145
|
+
dotProduct += a[i] * b[i];
|
|
146
|
+
normA += a[i] * a[i];
|
|
147
|
+
normB += b[i] * b[i];
|
|
148
|
+
}
|
|
149
|
+
|
|
150
|
+
return dotProduct / (Math.sqrt(normA) * Math.sqrt(normB));
|
|
151
|
+
}
|
|
152
|
+
|
|
153
|
+
/**
|
|
154
|
+
* Search knowledge graph using natural language query
|
|
155
|
+
*
|
|
156
|
+
* @param {string} queryText - Natural language query
|
|
157
|
+
* @param {Object} options - Search options
|
|
158
|
+
* @returns {Promise<Array<Object>>} Search results ranked by similarity
|
|
159
|
+
*/
|
|
160
|
+
async function search(queryText, options = {}) {
|
|
161
|
+
const opts = { ...validated, ...options };
|
|
162
|
+
|
|
163
|
+
// Generate query embedding
|
|
164
|
+
const queryEmbedding = await embed(queryText);
|
|
165
|
+
|
|
166
|
+
// Fetch all triples from knowledge graph
|
|
167
|
+
const sparqlQueryText = `
|
|
168
|
+
SELECT ?s ?p ?o
|
|
169
|
+
WHERE {
|
|
170
|
+
?s ?p ?o .
|
|
171
|
+
FILTER(isLiteral(?o))
|
|
172
|
+
}
|
|
173
|
+
LIMIT 1000
|
|
174
|
+
`;
|
|
175
|
+
|
|
176
|
+
const triples = await sparqlQuery(store, sparqlQueryText);
|
|
177
|
+
|
|
178
|
+
// Generate embeddings for each triple and calculate similarity
|
|
179
|
+
const results = [];
|
|
180
|
+
|
|
181
|
+
for (const binding of triples.bindings || []) {
|
|
182
|
+
const tripleText = `${binding.s?.value || ''} ${binding.p?.value || ''} ${binding.o?.value || ''}`;
|
|
183
|
+
const tripleEmbedding = await embed(tripleText);
|
|
184
|
+
const score = cosineSimilarity(queryEmbedding, tripleEmbedding);
|
|
185
|
+
|
|
186
|
+
if (score >= opts.threshold) {
|
|
187
|
+
results.push({
|
|
188
|
+
triple: {
|
|
189
|
+
subject: binding.s?.value || '',
|
|
190
|
+
predicate: binding.p?.value || '',
|
|
191
|
+
object: binding.o?.value || '',
|
|
192
|
+
},
|
|
193
|
+
score,
|
|
194
|
+
embedding: opts.includeEmbeddings ? tripleEmbedding : undefined,
|
|
195
|
+
});
|
|
196
|
+
}
|
|
197
|
+
}
|
|
198
|
+
|
|
199
|
+
// Sort by score descending and take top K
|
|
200
|
+
results.sort((a, b) => b.score - a.score);
|
|
201
|
+
const topResults = results.slice(0, opts.topK);
|
|
202
|
+
|
|
203
|
+
return topResults.map(r => SearchResultSchema.parse(r));
|
|
204
|
+
}
|
|
205
|
+
|
|
206
|
+
/**
|
|
207
|
+
* Find similar triples to a given triple
|
|
208
|
+
*
|
|
209
|
+
* @param {Object} triple - Reference triple
|
|
210
|
+
* @param {Object} options - Search options
|
|
211
|
+
* @returns {Promise<Array<Object>>} Similar triples
|
|
212
|
+
*/
|
|
213
|
+
async function findSimilar(triple, options = {}) {
|
|
214
|
+
const tripleText = `${triple.subject} ${triple.predicate} ${triple.object}`;
|
|
215
|
+
return search(tripleText, options);
|
|
216
|
+
}
|
|
217
|
+
|
|
218
|
+
/**
|
|
219
|
+
* Cluster triples by semantic similarity
|
|
220
|
+
*
|
|
221
|
+
* @param {number} numClusters - Number of clusters to create
|
|
222
|
+
* @returns {Promise<Array<Array<Object>>>} Clustered triples
|
|
223
|
+
*/
|
|
224
|
+
async function cluster(numClusters = 5) {
|
|
225
|
+
// Fetch all triples
|
|
226
|
+
const sparqlQueryText = `
|
|
227
|
+
SELECT ?s ?p ?o
|
|
228
|
+
WHERE {
|
|
229
|
+
?s ?p ?o .
|
|
230
|
+
FILTER(isLiteral(?o))
|
|
231
|
+
}
|
|
232
|
+
LIMIT 500
|
|
233
|
+
`;
|
|
234
|
+
|
|
235
|
+
const triples = await sparqlQuery(store, sparqlQueryText);
|
|
236
|
+
|
|
237
|
+
// Generate embeddings for all triples
|
|
238
|
+
const embeddings = [];
|
|
239
|
+
const tripleData = [];
|
|
240
|
+
|
|
241
|
+
for (const binding of triples.bindings || []) {
|
|
242
|
+
const tripleText = `${binding.s?.value || ''} ${binding.p?.value || ''} ${binding.o?.value || ''}`;
|
|
243
|
+
const embedding = await embed(tripleText);
|
|
244
|
+
|
|
245
|
+
embeddings.push(embedding);
|
|
246
|
+
tripleData.push({
|
|
247
|
+
subject: binding.s?.value || '',
|
|
248
|
+
predicate: binding.p?.value || '',
|
|
249
|
+
object: binding.o?.value || '',
|
|
250
|
+
});
|
|
251
|
+
}
|
|
252
|
+
|
|
253
|
+
// Simple k-means clustering (simplified implementation)
|
|
254
|
+
const clusters = Array.from({ length: numClusters }, () => []);
|
|
255
|
+
|
|
256
|
+
// Initialize centroids randomly
|
|
257
|
+
const centroids = [];
|
|
258
|
+
for (let i = 0; i < numClusters; i++) {
|
|
259
|
+
const randomIdx = Math.floor(Math.random() * embeddings.length);
|
|
260
|
+
centroids.push([...embeddings[randomIdx]]);
|
|
261
|
+
}
|
|
262
|
+
|
|
263
|
+
// Assign each triple to nearest centroid
|
|
264
|
+
for (let i = 0; i < embeddings.length; i++) {
|
|
265
|
+
let maxSim = -1;
|
|
266
|
+
let bestCluster = 0;
|
|
267
|
+
|
|
268
|
+
for (let j = 0; j < numClusters; j++) {
|
|
269
|
+
const sim = cosineSimilarity(embeddings[i], centroids[j]);
|
|
270
|
+
if (sim > maxSim) {
|
|
271
|
+
maxSim = sim;
|
|
272
|
+
bestCluster = j;
|
|
273
|
+
}
|
|
274
|
+
}
|
|
275
|
+
|
|
276
|
+
clusters[bestCluster].push({
|
|
277
|
+
triple: tripleData[i],
|
|
278
|
+
embedding: embeddings[i],
|
|
279
|
+
});
|
|
280
|
+
}
|
|
281
|
+
|
|
282
|
+
return clusters;
|
|
283
|
+
}
|
|
284
|
+
|
|
285
|
+
/**
|
|
286
|
+
* Get embedding statistics
|
|
287
|
+
*
|
|
288
|
+
* @returns {Object} Cache statistics
|
|
289
|
+
*/
|
|
290
|
+
function getStats() {
|
|
291
|
+
return {
|
|
292
|
+
cacheSize: embeddingCache.size,
|
|
293
|
+
modelLoaded: embeddingPipeline !== null,
|
|
294
|
+
config: validated,
|
|
295
|
+
};
|
|
296
|
+
}
|
|
297
|
+
|
|
298
|
+
/**
|
|
299
|
+
* Clear embedding cache
|
|
300
|
+
*/
|
|
301
|
+
function clearCache() {
|
|
302
|
+
embeddingCache.clear();
|
|
303
|
+
}
|
|
304
|
+
|
|
305
|
+
return {
|
|
306
|
+
search,
|
|
307
|
+
findSimilar,
|
|
308
|
+
cluster,
|
|
309
|
+
embed,
|
|
310
|
+
getStats,
|
|
311
|
+
clearCache,
|
|
312
|
+
};
|
|
313
|
+
}
|
|
314
|
+
|
|
315
|
+
// =============================================================================
|
|
316
|
+
// Utility Functions
|
|
317
|
+
// =============================================================================
|
|
318
|
+
|
|
319
|
+
/**
|
|
320
|
+
* Perform a quick semantic search without creating an engine instance
|
|
321
|
+
*
|
|
322
|
+
* @param {Object} store - UNRDF store instance
|
|
323
|
+
* @param {string} query - Natural language query
|
|
324
|
+
* @param {Object} options - Search options
|
|
325
|
+
* @returns {Promise<Array<Object>>} Search results
|
|
326
|
+
*
|
|
327
|
+
* @example
|
|
328
|
+
* const results = await semanticSearch(store, 'artificial intelligence', {
|
|
329
|
+
* topK: 5,
|
|
330
|
+
* threshold: 0.7
|
|
331
|
+
* });
|
|
332
|
+
*/
|
|
333
|
+
export async function semanticSearch(store, query, options = {}) {
|
|
334
|
+
const engine = await createAISearchEngine(store, options);
|
|
335
|
+
const results = await engine.search(query);
|
|
336
|
+
engine.clearCache();
|
|
337
|
+
return results;
|
|
338
|
+
}
|
|
339
|
+
|
|
340
|
+
/**
|
|
341
|
+
* Generate embeddings for multiple texts in batch
|
|
342
|
+
*
|
|
343
|
+
* @param {Array<string>} texts - Array of texts to embed
|
|
344
|
+
* @param {Object} config - Configuration options
|
|
345
|
+
* @returns {Promise<Array<Array<number>>>} Array of embeddings
|
|
346
|
+
*
|
|
347
|
+
* @example
|
|
348
|
+
* const embeddings = await batchEmbed([
|
|
349
|
+
* 'machine learning',
|
|
350
|
+
* 'artificial intelligence',
|
|
351
|
+
* 'neural networks'
|
|
352
|
+
* ]);
|
|
353
|
+
*/
|
|
354
|
+
export async function batchEmbed(texts, config = {}) {
|
|
355
|
+
const validated = AISearchConfigSchema.parse(config);
|
|
356
|
+
const model = await pipeline('feature-extraction', validated.model);
|
|
357
|
+
|
|
358
|
+
const embeddings = [];
|
|
359
|
+
|
|
360
|
+
// Process in batches
|
|
361
|
+
for (let i = 0; i < texts.length; i += validated.batchSize) {
|
|
362
|
+
const batch = texts.slice(i, i + validated.batchSize);
|
|
363
|
+
|
|
364
|
+
for (const text of batch) {
|
|
365
|
+
const output = await model(text, { pooling: 'mean', normalize: true });
|
|
366
|
+
embeddings.push(Array.from(output.data));
|
|
367
|
+
}
|
|
368
|
+
}
|
|
369
|
+
|
|
370
|
+
return embeddings;
|
|
371
|
+
}
|
|
@@ -0,0 +1,226 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* @file Z-Score Anomaly Detection for UNRDF Observability
|
|
3
|
+
* @module anomaly-detector
|
|
4
|
+
*
|
|
5
|
+
* @description
|
|
6
|
+
* Implements statistical anomaly detection using z-score analysis
|
|
7
|
+
* for latency spikes and throughput drops in OTEL metrics.
|
|
8
|
+
*/
|
|
9
|
+
|
|
10
|
+
import { z } from 'zod';
|
|
11
|
+
|
|
12
|
+
/**
|
|
13
|
+
* Configuration schema for anomaly detector
|
|
14
|
+
*/
|
|
15
|
+
export const AnomalyDetectorConfigSchema = z.object({
|
|
16
|
+
zScoreThreshold: z.number().positive().default(3.0), // 3 sigma = 99.7% confidence
|
|
17
|
+
windowSize: z.number().int().positive().default(100), // Rolling window size
|
|
18
|
+
minSamples: z.number().int().positive().default(10), // Minimum samples before detection
|
|
19
|
+
cooldownMs: z.number().int().positive().default(60000), // 1 minute cooldown between alerts
|
|
20
|
+
});
|
|
21
|
+
|
|
22
|
+
/**
|
|
23
|
+
* Anomaly event schema
|
|
24
|
+
*/
|
|
25
|
+
export const AnomalyEventSchema = z.object({
|
|
26
|
+
timestamp: z.number(),
|
|
27
|
+
metric: z.string(),
|
|
28
|
+
value: z.number(),
|
|
29
|
+
mean: z.number(),
|
|
30
|
+
stdDev: z.number(),
|
|
31
|
+
zScore: z.number(),
|
|
32
|
+
severity: z.enum(['low', 'medium', 'high', 'critical']),
|
|
33
|
+
message: z.string(),
|
|
34
|
+
});
|
|
35
|
+
|
|
36
|
+
/**
|
|
37
|
+
* Z-Score based anomaly detector
|
|
38
|
+
*/
|
|
39
|
+
export class AnomalyDetector {
|
|
40
|
+
/**
|
|
41
|
+
* Create anomaly detector
|
|
42
|
+
* @param {Object} [config] - Configuration
|
|
43
|
+
*/
|
|
44
|
+
constructor(config = {}) {
|
|
45
|
+
this.config = AnomalyDetectorConfigSchema.parse(config);
|
|
46
|
+
this.windows = new Map(); // metric -> rolling window of values
|
|
47
|
+
this.lastAlerts = new Map(); // metric -> timestamp of last alert
|
|
48
|
+
this.stats = new Map(); // metric -> {mean, stdDev}
|
|
49
|
+
this.anomalyListeners = [];
|
|
50
|
+
}
|
|
51
|
+
|
|
52
|
+
/**
|
|
53
|
+
* Add a metric sample
|
|
54
|
+
* @param {string} metric - Metric name (e.g., 'transaction_latency', 'throughput')
|
|
55
|
+
* @param {number} value - Metric value
|
|
56
|
+
* @returns {Object|null} Anomaly event if detected, null otherwise
|
|
57
|
+
*/
|
|
58
|
+
addSample(metric, value) {
|
|
59
|
+
// Initialize window if needed
|
|
60
|
+
if (!this.windows.has(metric)) {
|
|
61
|
+
this.windows.set(metric, []);
|
|
62
|
+
}
|
|
63
|
+
|
|
64
|
+
const window = this.windows.get(metric);
|
|
65
|
+
window.push(value);
|
|
66
|
+
|
|
67
|
+
// Keep window size bounded
|
|
68
|
+
if (window.length > this.config.windowSize) {
|
|
69
|
+
window.shift();
|
|
70
|
+
}
|
|
71
|
+
|
|
72
|
+
// Need minimum samples for statistical significance
|
|
73
|
+
if (window.length < this.config.minSamples) {
|
|
74
|
+
return null;
|
|
75
|
+
}
|
|
76
|
+
|
|
77
|
+
// Calculate statistics
|
|
78
|
+
const stats = this._calculateStats(window);
|
|
79
|
+
this.stats.set(metric, stats);
|
|
80
|
+
|
|
81
|
+
// Check for anomaly
|
|
82
|
+
const zScore = (value - stats.mean) / stats.stdDev;
|
|
83
|
+
const absZScore = Math.abs(zScore);
|
|
84
|
+
|
|
85
|
+
if (absZScore > this.config.zScoreThreshold) {
|
|
86
|
+
// Check cooldown to prevent alert spam
|
|
87
|
+
const lastAlert = this.lastAlerts.get(metric);
|
|
88
|
+
const now = Date.now();
|
|
89
|
+
if (lastAlert && now - lastAlert < this.config.cooldownMs) {
|
|
90
|
+
return null; // Still in cooldown
|
|
91
|
+
}
|
|
92
|
+
|
|
93
|
+
// Create anomaly event
|
|
94
|
+
const severity = this._calculateSeverity(absZScore);
|
|
95
|
+
const anomaly = AnomalyEventSchema.parse({
|
|
96
|
+
timestamp: now,
|
|
97
|
+
metric,
|
|
98
|
+
value,
|
|
99
|
+
mean: stats.mean,
|
|
100
|
+
stdDev: stats.stdDev,
|
|
101
|
+
zScore,
|
|
102
|
+
severity,
|
|
103
|
+
message: this._generateMessage(metric, value, stats, zScore, severity),
|
|
104
|
+
});
|
|
105
|
+
|
|
106
|
+
// Update last alert time
|
|
107
|
+
this.lastAlerts.set(metric, now);
|
|
108
|
+
|
|
109
|
+
// Notify listeners
|
|
110
|
+
this._notifyListeners(anomaly);
|
|
111
|
+
|
|
112
|
+
return anomaly;
|
|
113
|
+
}
|
|
114
|
+
|
|
115
|
+
return null;
|
|
116
|
+
}
|
|
117
|
+
|
|
118
|
+
/**
|
|
119
|
+
* Calculate mean and standard deviation
|
|
120
|
+
* @param {Array<number>} values - Values
|
|
121
|
+
* @returns {{mean: number, stdDev: number}}
|
|
122
|
+
* @private
|
|
123
|
+
*/
|
|
124
|
+
_calculateStats(values) {
|
|
125
|
+
const n = values.length;
|
|
126
|
+
const mean = values.reduce((sum, v) => sum + v, 0) / n;
|
|
127
|
+
const variance = values.reduce((sum, v) => sum + Math.pow(v - mean, 2), 0) / n;
|
|
128
|
+
const stdDev = Math.sqrt(variance);
|
|
129
|
+
return { mean, stdDev };
|
|
130
|
+
}
|
|
131
|
+
|
|
132
|
+
/**
|
|
133
|
+
* Calculate severity based on z-score
|
|
134
|
+
* @param {number} absZScore - Absolute z-score
|
|
135
|
+
* @returns {string} Severity level
|
|
136
|
+
* @private
|
|
137
|
+
*/
|
|
138
|
+
_calculateSeverity(absZScore) {
|
|
139
|
+
if (absZScore >= 5.0) return 'critical'; // >5σ = extremely rare
|
|
140
|
+
if (absZScore >= 4.0) return 'high'; // 4-5σ
|
|
141
|
+
if (absZScore >= 3.5) return 'medium'; // 3.5-4σ
|
|
142
|
+
return 'low'; // 3-3.5σ
|
|
143
|
+
}
|
|
144
|
+
|
|
145
|
+
/**
|
|
146
|
+
* Generate human-readable message
|
|
147
|
+
* @param {string} metric - Metric name
|
|
148
|
+
* @param {number} value - Current value
|
|
149
|
+
* @param {Object} stats - Statistics
|
|
150
|
+
* @param {number} zScore - Z-score
|
|
151
|
+
* @param {string} severity - Severity
|
|
152
|
+
* @returns {string} Message
|
|
153
|
+
* @private
|
|
154
|
+
*/
|
|
155
|
+
_generateMessage(metric, value, stats, zScore, severity) {
|
|
156
|
+
const direction = zScore > 0 ? 'spike' : 'drop';
|
|
157
|
+
const deviation = Math.abs(zScore).toFixed(2);
|
|
158
|
+
return `[${severity.toUpperCase()}] ${metric} ${direction}: ${value.toFixed(2)} (${deviation}σ from mean ${stats.mean.toFixed(2)})`;
|
|
159
|
+
}
|
|
160
|
+
|
|
161
|
+
/**
|
|
162
|
+
* Register anomaly listener
|
|
163
|
+
* @param {Function} listener - Callback(anomaly)
|
|
164
|
+
*/
|
|
165
|
+
onAnomaly(listener) {
|
|
166
|
+
this.anomalyListeners.push(listener);
|
|
167
|
+
}
|
|
168
|
+
|
|
169
|
+
/**
|
|
170
|
+
* Notify all listeners
|
|
171
|
+
* @param {Object} anomaly - Anomaly event
|
|
172
|
+
* @private
|
|
173
|
+
*/
|
|
174
|
+
_notifyListeners(anomaly) {
|
|
175
|
+
for (const listener of this.anomalyListeners) {
|
|
176
|
+
try {
|
|
177
|
+
listener(anomaly);
|
|
178
|
+
} catch (error) {
|
|
179
|
+
console.error('[AnomalyDetector] Listener error:', error);
|
|
180
|
+
}
|
|
181
|
+
}
|
|
182
|
+
}
|
|
183
|
+
|
|
184
|
+
/**
|
|
185
|
+
* Get statistics for a metric
|
|
186
|
+
* @param {string} metric - Metric name
|
|
187
|
+
* @returns {{mean: number, stdDev: number, samples: number}|null}
|
|
188
|
+
*/
|
|
189
|
+
getStats(metric) {
|
|
190
|
+
const stats = this.stats.get(metric);
|
|
191
|
+
const window = this.windows.get(metric);
|
|
192
|
+
if (!stats || !window) return null;
|
|
193
|
+
return {
|
|
194
|
+
...stats,
|
|
195
|
+
samples: window.length,
|
|
196
|
+
};
|
|
197
|
+
}
|
|
198
|
+
|
|
199
|
+
/**
|
|
200
|
+
* Reset detector state
|
|
201
|
+
*/
|
|
202
|
+
reset() {
|
|
203
|
+
this.windows.clear();
|
|
204
|
+
this.lastAlerts.clear();
|
|
205
|
+
this.stats.clear();
|
|
206
|
+
}
|
|
207
|
+
|
|
208
|
+
/**
|
|
209
|
+
* Clear metric history
|
|
210
|
+
* @param {string} metric - Metric name
|
|
211
|
+
*/
|
|
212
|
+
clearMetric(metric) {
|
|
213
|
+
this.windows.delete(metric);
|
|
214
|
+
this.lastAlerts.delete(metric);
|
|
215
|
+
this.stats.delete(metric);
|
|
216
|
+
}
|
|
217
|
+
}
|
|
218
|
+
|
|
219
|
+
/**
|
|
220
|
+
* Create anomaly detector instance
|
|
221
|
+
* @param {Object} [config] - Configuration
|
|
222
|
+
* @returns {AnomalyDetector}
|
|
223
|
+
*/
|
|
224
|
+
export function createAnomalyDetector(config = {}) {
|
|
225
|
+
return new AnomalyDetector(config);
|
|
226
|
+
}
|