@unrdf/kgc-probe 26.4.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +414 -0
- package/package.json +81 -0
- package/src/agents/index.mjs +1402 -0
- package/src/artifact.mjs +405 -0
- package/src/cli.mjs +932 -0
- package/src/config.mjs +115 -0
- package/src/guards.mjs +1213 -0
- package/src/index.mjs +347 -0
- package/src/merge.mjs +196 -0
- package/src/observation.mjs +193 -0
- package/src/orchestrator.mjs +315 -0
- package/src/probe.mjs +58 -0
- package/src/probes/CONCURRENCY-PROBE.md +256 -0
- package/src/probes/README.md +275 -0
- package/src/probes/concurrency.mjs +1175 -0
- package/src/probes/filesystem.mjs +731 -0
- package/src/probes/filesystem.test.mjs +244 -0
- package/src/probes/network.mjs +503 -0
- package/src/probes/performance.mjs +816 -0
- package/src/probes/persistence.mjs +785 -0
- package/src/probes/runtime.mjs +589 -0
- package/src/probes/tooling.mjs +454 -0
- package/src/probes/tooling.test.mjs +372 -0
- package/src/probes/verify-execution.mjs +131 -0
- package/src/probes/verify-guards.mjs +73 -0
- package/src/probes/wasm.mjs +715 -0
- package/src/receipt.mjs +197 -0
- package/src/receipts/index.mjs +813 -0
- package/src/reporter.example.mjs +223 -0
- package/src/reporter.mjs +555 -0
- package/src/reporters/markdown.mjs +355 -0
- package/src/reporters/rdf.mjs +383 -0
- package/src/storage/index.mjs +827 -0
- package/src/types.mjs +1028 -0
- package/src/utils/errors.mjs +397 -0
- package/src/utils/index.mjs +32 -0
- package/src/utils/logger.mjs +236 -0
- package/src/vocabulary.ttl +169 -0
package/src/artifact.mjs
ADDED
|
@@ -0,0 +1,405 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* @fileoverview KGC Probe - Artifact Operations
|
|
3
|
+
*
|
|
4
|
+
* Artifact management operations:
|
|
5
|
+
* - Deterministic hashing (Blake3)
|
|
6
|
+
* - Shard merging with deduplication
|
|
7
|
+
* - Diff computation
|
|
8
|
+
* - Verification
|
|
9
|
+
*
|
|
10
|
+
* @module @unrdf/kgc-probe/artifact
|
|
11
|
+
*/
|
|
12
|
+
|
|
13
|
+
import { randomUUID } from 'crypto';
|
|
14
|
+
import { ArtifactSchema, validateArtifact, DiffResultSchema } from './types.mjs';
|
|
15
|
+
|
|
16
|
+
/**
|
|
17
|
+
* ObservationValidator - Validates observations against schema
|
|
18
|
+
* @class ObservationValidator
|
|
19
|
+
*/
|
|
20
|
+
export class ObservationValidator {
|
|
21
|
+
/**
|
|
22
|
+
* Validate single observation
|
|
23
|
+
* @param {unknown} data - Data to validate
|
|
24
|
+
* @returns {Object} Validated observation
|
|
25
|
+
* @throws {Error} If validation fails
|
|
26
|
+
*/
|
|
27
|
+
validate(data) {
|
|
28
|
+
// In production, use Zod
|
|
29
|
+
if (!data || typeof data !== 'object') {
|
|
30
|
+
throw new Error('Observation must be an object');
|
|
31
|
+
}
|
|
32
|
+
|
|
33
|
+
const required = ['id', 'agent', 'timestamp', 'kind', 'severity', 'subject'];
|
|
34
|
+
for (const field of required) {
|
|
35
|
+
if (!data[field]) {
|
|
36
|
+
throw new Error(`Missing required field: ${field}`);
|
|
37
|
+
}
|
|
38
|
+
}
|
|
39
|
+
|
|
40
|
+
return data;
|
|
41
|
+
}
|
|
42
|
+
|
|
43
|
+
/**
|
|
44
|
+
* Validate batch of observations
|
|
45
|
+
* @param {Array} observations - Observations to validate
|
|
46
|
+
* @returns {Object[]} Validated observations
|
|
47
|
+
*/
|
|
48
|
+
validateBatch(observations) {
|
|
49
|
+
return observations.map(obs => this.validate(obs));
|
|
50
|
+
}
|
|
51
|
+
}
|
|
52
|
+
|
|
53
|
+
/**
|
|
54
|
+
* Create ObservationValidator instance
|
|
55
|
+
* @returns {ObservationValidator}
|
|
56
|
+
*/
|
|
57
|
+
export function createObservationValidator() {
|
|
58
|
+
return new ObservationValidator();
|
|
59
|
+
}
|
|
60
|
+
|
|
61
|
+
// ============================================================================
|
|
62
|
+
// HASHING OPERATIONS
|
|
63
|
+
// ============================================================================
|
|
64
|
+
|
|
65
|
+
/**
|
|
66
|
+
* Hash observations deterministically
|
|
67
|
+
*
|
|
68
|
+
* Algorithm:
|
|
69
|
+
* 1. Sort observations by (agent, timestamp, subject)
|
|
70
|
+
* 2. Stringify each observation's core fields
|
|
71
|
+
* 3. Compute Blake3 hash of concatenated strings
|
|
72
|
+
*
|
|
73
|
+
* @param {Array} observations - Observations to hash
|
|
74
|
+
* @returns {Promise<string>} Hex-encoded Blake3 hash
|
|
75
|
+
*/
|
|
76
|
+
export async function hashObservations(observations) {
|
|
77
|
+
// Sort for determinism
|
|
78
|
+
const sorted = observations
|
|
79
|
+
.slice() // Copy
|
|
80
|
+
.sort((a, b) => {
|
|
81
|
+
const aKey = `${a.agent}|${a.timestamp}|${a.subject}`;
|
|
82
|
+
const bKey = `${b.agent}|${b.timestamp}|${b.subject}`;
|
|
83
|
+
return aKey.localeCompare(bKey);
|
|
84
|
+
});
|
|
85
|
+
|
|
86
|
+
// Stringify core fields
|
|
87
|
+
const parts = sorted.map(obs => JSON.stringify({
|
|
88
|
+
agent: obs.agent,
|
|
89
|
+
timestamp: obs.timestamp,
|
|
90
|
+
kind: obs.kind,
|
|
91
|
+
subject: obs.subject,
|
|
92
|
+
predicate: obs.predicate,
|
|
93
|
+
object: obs.object,
|
|
94
|
+
severity: obs.severity,
|
|
95
|
+
evidence_query: obs.evidence?.query,
|
|
96
|
+
metrics_confidence: obs.metrics?.confidence,
|
|
97
|
+
metrics_coverage: obs.metrics?.coverage
|
|
98
|
+
}));
|
|
99
|
+
|
|
100
|
+
const combined = parts.join('|');
|
|
101
|
+
|
|
102
|
+
// In production, use hash-wasm for Blake3
|
|
103
|
+
// For now, simulate with a deterministic hash
|
|
104
|
+
return computeSimpleHash(combined);
|
|
105
|
+
}
|
|
106
|
+
|
|
107
|
+
/**
|
|
108
|
+
* Simple deterministic hash (fallback until hash-wasm integrated)
|
|
109
|
+
* @param {string} data - Data to hash
|
|
110
|
+
* @returns {string} Hex hash
|
|
111
|
+
* @private
|
|
112
|
+
*/
|
|
113
|
+
function computeSimpleHash(data) {
|
|
114
|
+
// Create a simple deterministic hash from string content
|
|
115
|
+
let hash = 0;
|
|
116
|
+
for (let i = 0; i < data.length; i++) {
|
|
117
|
+
const char = data.charCodeAt(i);
|
|
118
|
+
hash = ((hash << 5) - hash) + char;
|
|
119
|
+
hash = hash & hash; // Convert to 32-bit integer
|
|
120
|
+
}
|
|
121
|
+
|
|
122
|
+
// Convert to hex (64-char for Blake3 simulation)
|
|
123
|
+
const hex = Math.abs(hash).toString(16);
|
|
124
|
+
return hex.padStart(64, '0');
|
|
125
|
+
}
|
|
126
|
+
|
|
127
|
+
// ============================================================================
|
|
128
|
+
// SHARD OPERATIONS
|
|
129
|
+
// ============================================================================
|
|
130
|
+
|
|
131
|
+
/**
|
|
132
|
+
* Merge multiple shards with deduplication
|
|
133
|
+
*
|
|
134
|
+
* Algorithm (Merge):
|
|
135
|
+
* Phase 1: Collect all observations from shards
|
|
136
|
+
* Phase 2: Add new observations
|
|
137
|
+
* Phase 3: Dedup by content hash
|
|
138
|
+
* Phase 4: Sort deterministically
|
|
139
|
+
*
|
|
140
|
+
* @param {Array} shards - Array of artifacts to merge
|
|
141
|
+
* @param {Array} newObservations - Additional observations to merge
|
|
142
|
+
* @returns {Promise<Array>} Merged and deduplicated observations
|
|
143
|
+
*/
|
|
144
|
+
export async function mergeShards(shards, newObservations = []) {
|
|
145
|
+
// Phase 1: Collect all
|
|
146
|
+
const allObservations = [];
|
|
147
|
+
|
|
148
|
+
for (const shard of shards) {
|
|
149
|
+
if (shard.observations && Array.isArray(shard.observations)) {
|
|
150
|
+
allObservations.push(...shard.observations);
|
|
151
|
+
}
|
|
152
|
+
}
|
|
153
|
+
|
|
154
|
+
// Phase 2: Add new
|
|
155
|
+
allObservations.push(...newObservations);
|
|
156
|
+
|
|
157
|
+
// Phase 3: Dedup by content hash
|
|
158
|
+
const seen = new Map();
|
|
159
|
+
const deduped = [];
|
|
160
|
+
|
|
161
|
+
for (const obs of allObservations) {
|
|
162
|
+
// Create content hash
|
|
163
|
+
const contentKey = `${obs.agent}|${obs.kind}|${obs.subject}|${obs.predicate || ''}|${obs.object || ''}`;
|
|
164
|
+
const contentHash = computeSimpleHash(contentKey);
|
|
165
|
+
|
|
166
|
+
if (!seen.has(contentHash)) {
|
|
167
|
+
seen.set(contentHash, true);
|
|
168
|
+
deduped.push(obs);
|
|
169
|
+
}
|
|
170
|
+
}
|
|
171
|
+
|
|
172
|
+
// Phase 4: Sort deterministically
|
|
173
|
+
deduped.sort((a, b) => {
|
|
174
|
+
const aTs = new Date(a.timestamp).getTime();
|
|
175
|
+
const bTs = new Date(b.timestamp).getTime();
|
|
176
|
+
return aTs - bTs;
|
|
177
|
+
});
|
|
178
|
+
|
|
179
|
+
return deduped;
|
|
180
|
+
}
|
|
181
|
+
|
|
182
|
+
// ============================================================================
|
|
183
|
+
// DIFF OPERATIONS
|
|
184
|
+
// ============================================================================
|
|
185
|
+
|
|
186
|
+
/**
|
|
187
|
+
* Compute diff between two artifacts
|
|
188
|
+
*
|
|
189
|
+
* Algorithm (Diff):
|
|
190
|
+
* 1. Build sets of observations from each artifact
|
|
191
|
+
* 2. Find added (in artifact2 only)
|
|
192
|
+
* 3. Find removed (in artifact1 only)
|
|
193
|
+
* 4. Find modified (same subject/predicate but different value)
|
|
194
|
+
* 5. Calculate Jaccard similarity
|
|
195
|
+
*
|
|
196
|
+
* @param {Object} artifact1 - First artifact
|
|
197
|
+
* @param {Object} artifact2 - Second artifact
|
|
198
|
+
* @returns {Object} Diff result with added/removed/modified
|
|
199
|
+
*/
|
|
200
|
+
export function diffArtifacts(artifact1, artifact2) {
|
|
201
|
+
const obs1 = artifact1.observations || [];
|
|
202
|
+
const obs2 = artifact2.observations || [];
|
|
203
|
+
|
|
204
|
+
// Create keys for matching
|
|
205
|
+
const key = (obs) => `${obs.subject}|${obs.predicate}|${obs.object}`;
|
|
206
|
+
const map1 = new Map(obs1.map(o => [key(o), o]));
|
|
207
|
+
const map2 = new Map(obs2.map(o => [key(o), o]));
|
|
208
|
+
|
|
209
|
+
const added = [];
|
|
210
|
+
const removed = [];
|
|
211
|
+
const modified = [];
|
|
212
|
+
|
|
213
|
+
// Find added
|
|
214
|
+
for (const [k, obs] of map2) {
|
|
215
|
+
if (!map1.has(k)) {
|
|
216
|
+
added.push(obs);
|
|
217
|
+
}
|
|
218
|
+
}
|
|
219
|
+
|
|
220
|
+
// Find removed
|
|
221
|
+
for (const [k, obs] of map1) {
|
|
222
|
+
if (!map2.has(k)) {
|
|
223
|
+
removed.push(obs);
|
|
224
|
+
}
|
|
225
|
+
}
|
|
226
|
+
|
|
227
|
+
// Find modified (same subject/predicate, different attributes)
|
|
228
|
+
const subPredKey = (obs) => `${obs.subject}|${obs.predicate}`;
|
|
229
|
+
const map1BySubPred = new Map();
|
|
230
|
+
const map2BySubPred = new Map();
|
|
231
|
+
|
|
232
|
+
for (const obs of obs1) {
|
|
233
|
+
const k = subPredKey(obs);
|
|
234
|
+
if (!map1BySubPred.has(k)) {
|
|
235
|
+
map1BySubPred.set(k, []);
|
|
236
|
+
}
|
|
237
|
+
map1BySubPred.get(k).push(obs);
|
|
238
|
+
}
|
|
239
|
+
|
|
240
|
+
for (const obs of obs2) {
|
|
241
|
+
const k = subPredKey(obs);
|
|
242
|
+
if (!map2BySubPred.has(k)) {
|
|
243
|
+
map2BySubPred.set(k, []);
|
|
244
|
+
}
|
|
245
|
+
map2BySubPred.get(k).push(obs);
|
|
246
|
+
}
|
|
247
|
+
|
|
248
|
+
for (const [k, obs1List] of map1BySubPred) {
|
|
249
|
+
const obs2List = map2BySubPred.get(k) || [];
|
|
250
|
+
if (obs2List.length > 0 && obs1List[0].object !== obs2List[0].object) {
|
|
251
|
+
modified.push({
|
|
252
|
+
subject: obs1List[0].subject,
|
|
253
|
+
predicate: obs1List[0].predicate,
|
|
254
|
+
before: obs1List[0].object,
|
|
255
|
+
after: obs2List[0].object,
|
|
256
|
+
old_observation: obs1List[0],
|
|
257
|
+
new_observation: obs2List[0]
|
|
258
|
+
});
|
|
259
|
+
}
|
|
260
|
+
}
|
|
261
|
+
|
|
262
|
+
// Calculate Jaccard similarity
|
|
263
|
+
const intersection = obs1.length + obs2.length - added.length - removed.length;
|
|
264
|
+
const union = obs1.length + added.length;
|
|
265
|
+
const similarity = union > 0 ? intersection / union : 1.0;
|
|
266
|
+
|
|
267
|
+
return {
|
|
268
|
+
added,
|
|
269
|
+
removed,
|
|
270
|
+
modified,
|
|
271
|
+
summary: {
|
|
272
|
+
total_changes: added.length + removed.length + modified.length,
|
|
273
|
+
similarity_ratio: similarity,
|
|
274
|
+
artifact1_size: obs1.length,
|
|
275
|
+
artifact2_size: obs2.length
|
|
276
|
+
}
|
|
277
|
+
};
|
|
278
|
+
}
|
|
279
|
+
|
|
280
|
+
// ============================================================================
|
|
281
|
+
// VERIFICATION
|
|
282
|
+
// ============================================================================
|
|
283
|
+
|
|
284
|
+
/**
|
|
285
|
+
* Verify artifact integrity
|
|
286
|
+
*
|
|
287
|
+
* Verification steps:
|
|
288
|
+
* 1. Recompute checksum from observations
|
|
289
|
+
* 2. Compare with stored checksum
|
|
290
|
+
* 3. Validate schema
|
|
291
|
+
*
|
|
292
|
+
* @param {Object} artifact - Artifact to verify
|
|
293
|
+
* @returns {Promise<{valid: boolean, errors: string[]}>} Verification result
|
|
294
|
+
*/
|
|
295
|
+
export async function verifyArtifact(artifact) {
|
|
296
|
+
const errors = [];
|
|
297
|
+
|
|
298
|
+
try {
|
|
299
|
+
// Validate schema
|
|
300
|
+
validateArtifact(artifact);
|
|
301
|
+
} catch (err) {
|
|
302
|
+
errors.push(`Schema validation failed: ${err.message}`);
|
|
303
|
+
}
|
|
304
|
+
|
|
305
|
+
// Recompute checksum
|
|
306
|
+
const expectedChecksum = await hashObservations(artifact.observations);
|
|
307
|
+
|
|
308
|
+
if (expectedChecksum !== artifact.integrity.checksum) {
|
|
309
|
+
errors.push(`Checksum mismatch: expected ${expectedChecksum}, got ${artifact.integrity.checksum}`);
|
|
310
|
+
}
|
|
311
|
+
|
|
312
|
+
// Validate summary
|
|
313
|
+
const computedSummary = computeArtifactSummary(artifact.observations);
|
|
314
|
+
if (computedSummary.total !== artifact.summary.total) {
|
|
315
|
+
errors.push(`Summary mismatch: expected ${computedSummary.total} observations, got ${artifact.summary.total}`);
|
|
316
|
+
}
|
|
317
|
+
|
|
318
|
+
return {
|
|
319
|
+
valid: errors.length === 0,
|
|
320
|
+
errors,
|
|
321
|
+
verified_at: new Date().toISOString()
|
|
322
|
+
};
|
|
323
|
+
}
|
|
324
|
+
|
|
325
|
+
// ============================================================================
|
|
326
|
+
// SUMMARY & SERIALIZATION
|
|
327
|
+
// ============================================================================
|
|
328
|
+
|
|
329
|
+
/**
|
|
330
|
+
* Compute artifact summary from observations
|
|
331
|
+
*
|
|
332
|
+
* Aggregates:
|
|
333
|
+
* - Total count
|
|
334
|
+
* - Count by kind
|
|
335
|
+
* - Count by severity
|
|
336
|
+
* - Mean confidence and coverage
|
|
337
|
+
*
|
|
338
|
+
* @param {Array} observations - Observations to summarize
|
|
339
|
+
* @returns {Object} Summary object
|
|
340
|
+
*/
|
|
341
|
+
export function computeArtifactSummary(observations) {
|
|
342
|
+
const summary = {
|
|
343
|
+
total: observations.length,
|
|
344
|
+
by_kind: {},
|
|
345
|
+
by_severity: {
|
|
346
|
+
critical: 0,
|
|
347
|
+
warning: 0,
|
|
348
|
+
info: 0
|
|
349
|
+
},
|
|
350
|
+
confidence_mean: 0,
|
|
351
|
+
coverage_mean: 0
|
|
352
|
+
};
|
|
353
|
+
|
|
354
|
+
let confidenceSum = 0;
|
|
355
|
+
let coverageSum = 0;
|
|
356
|
+
let metricsCount = 0;
|
|
357
|
+
|
|
358
|
+
for (const obs of observations) {
|
|
359
|
+
// Count by kind
|
|
360
|
+
summary.by_kind[obs.kind] = (summary.by_kind[obs.kind] || 0) + 1;
|
|
361
|
+
|
|
362
|
+
// Count by severity
|
|
363
|
+
if (summary.by_severity.hasOwnProperty(obs.severity)) {
|
|
364
|
+
summary.by_severity[obs.severity]++;
|
|
365
|
+
}
|
|
366
|
+
|
|
367
|
+
// Aggregate metrics
|
|
368
|
+
if (obs.metrics) {
|
|
369
|
+
confidenceSum += obs.metrics.confidence || 0;
|
|
370
|
+
coverageSum += obs.metrics.coverage || 0;
|
|
371
|
+
metricsCount++;
|
|
372
|
+
}
|
|
373
|
+
}
|
|
374
|
+
|
|
375
|
+
if (metricsCount > 0) {
|
|
376
|
+
summary.confidence_mean = confidenceSum / metricsCount;
|
|
377
|
+
summary.coverage_mean = coverageSum / metricsCount;
|
|
378
|
+
}
|
|
379
|
+
|
|
380
|
+
return summary;
|
|
381
|
+
}
|
|
382
|
+
|
|
383
|
+
/**
|
|
384
|
+
* Serialize artifact to JSON string
|
|
385
|
+
* @param {Object} artifact - Artifact to serialize
|
|
386
|
+
* @returns {string} JSON string
|
|
387
|
+
*/
|
|
388
|
+
export function serializeArtifact(artifact) {
|
|
389
|
+
return JSON.stringify(artifact, null, 2);
|
|
390
|
+
}
|
|
391
|
+
|
|
392
|
+
/**
|
|
393
|
+
* Deserialize artifact from JSON string
|
|
394
|
+
* @param {string} jsonStr - JSON string
|
|
395
|
+
* @returns {Object} Deserialized artifact
|
|
396
|
+
* @throws {Error} If parse fails
|
|
397
|
+
*/
|
|
398
|
+
export function deserializeArtifact(jsonStr) {
|
|
399
|
+
try {
|
|
400
|
+
const data = JSON.parse(jsonStr);
|
|
401
|
+
return validateArtifact(data);
|
|
402
|
+
} catch (err) {
|
|
403
|
+
throw new Error(`Failed to deserialize artifact: ${err.message}`);
|
|
404
|
+
}
|
|
405
|
+
}
|