@unrdf/kgc-probe 26.4.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md ADDED
@@ -0,0 +1,414 @@
1
+ # @unrdf/kgc-probe
2
+
3
+ Automated knowledge graph integrity scanning with 10 specialized agents and deterministic artifact validation.
4
+
5
+ ## Overview
6
+
7
+ KGC Probe provides a comprehensive integrity scanning framework for RDF knowledge graphs with:
8
+
9
+ - **10 Specialized Agents**: Completeness, consistency, conformance, coverage, caching, coherence, clustering, classification, and collaboration analysis
10
+ - **Guard-Based Validation**: Quality checks, severity limits, and completeness verification
11
+ - **Deterministic Artifacts**: Blake3 hashed observations for reproducible scans
12
+ - **Distributed Shard Merging**: Multi-node probe result aggregation
13
+ - **Multiple Storage Backends**: Memory, file system, or database-backed
14
+ - **CLI Integration**: Full `kgc probe` command suite
15
+
16
+ ## Installation
17
+
18
+ ```bash
19
+ pnpm add @unrdf/kgc-probe
20
+ ```
21
+
22
+ ## Quick Start
23
+
24
+ ### Basic Scan
25
+
26
+ ```javascript
27
+ import { runProbe } from '@unrdf/kgc-probe';
28
+
29
+ const artifact = await runProbe({
30
+ universe_id: 'my-universe',
31
+ snapshot_id: 'snap_123'
32
+ });
33
+
34
+ console.log(artifact.summary);
35
+ // {
36
+ // total: 42,
37
+ // by_kind: { completeness: 15, consistency: 12, ... },
38
+ // by_severity: { critical: 2, warning: 8, info: 32 },
39
+ // confidence_mean: 0.89,
40
+ // coverage_mean: 0.92
41
+ // }
42
+ ```
43
+
44
+ ### Advanced Orchestration
45
+
46
+ ```javascript
47
+ import { createProbeOrchestrator, createMemoryStorage } from '@unrdf/kgc-probe';
48
+
49
+ const orchestrator = createProbeOrchestrator({
50
+ storage: createMemoryStorage()
51
+ });
52
+
53
+ // Listen to events
54
+ orchestrator.on('agent_complete', (result) => {
55
+ console.log(`Agent ${result.agentId} found ${result.observationCount} observations`);
56
+ });
57
+
58
+ // Run scan
59
+ const result = await orchestrator.scan({
60
+ universe_id: 'my-universe',
61
+ agents: ['completion', 'consistency', 'conformance'], // Specific agents
62
+ distributed: true // Enable shard merging
63
+ });
64
+
65
+ console.log(result.status); // 'success' | 'partial' | 'failed'
66
+ ```
67
+
68
+ ## CLI Usage
69
+
70
+ ```bash
71
+ # Run full scan
72
+ kgc probe scan --args '{"universe":"my-universe"}' --json
73
+
74
+ # List agents
75
+ kgc agent list
76
+
77
+ # List guards
78
+ kgc guard list
79
+
80
+ # Validate artifact
81
+ kgc probe validate --args '{"artifact_id":"run-123"}'
82
+
83
+ # Diff two artifacts
84
+ kgc probe diff --args '{"artifact1_id":"run-1","artifact2_id":"run-2"}'
85
+
86
+ # Merge shards
87
+ kgc shard merge --args '{"universe":"my-universe"}'
88
+ ```
89
+
90
+ ## Architecture
91
+
92
+ ### 10 Agents
93
+
94
+ | Agent | Kind | Purpose |
95
+ |-------|------|---------|
96
+ | **Completion** | completeness | Missing required properties |
97
+ | **Consistency** | consistency | Value conflicts and contradictions |
98
+ | **Conformance** | conformance | SHACL shape violations |
99
+ | **Coverage** | coverage | Triple density and reachability |
100
+ | **Caching** | caching | Cache staleness and efficiency |
101
+ | **Completeness** | completeness_level | Data population ratios |
102
+ | **Coherence** | coherence | Semantic inconsistencies |
103
+ | **Clustering** | clustering | Entity grouping patterns |
104
+ | **Classification** | classification | Type hierarchy issues |
105
+ | **Collaboration** | collaboration | Cross-agent finding fusion |
106
+
107
+ ### 5 Guards
108
+
109
+ | Guard | Purpose |
110
+ |-------|---------|
111
+ | **quality_check** | Validates confidence and observation count |
112
+ | **completeness_check** | Verifies coverage thresholds |
113
+ | **severity_limit** | Enforces maximum critical observations |
114
+ | **integrity_check** | Validates observation structure |
115
+ | **agent_coverage** | Ensures agent diversity |
116
+
117
+ ### Observation Schema
118
+
119
+ ```typescript
120
+ interface Observation {
121
+ id: string; // UUID
122
+ agent: string; // Agent identifier
123
+ timestamp: ISO8601; // When observed
124
+ kind: string; // Observation type
125
+ severity: 'critical' | 'warning' | 'info';
126
+ subject: string; // RDF node
127
+ predicate?: string; // RDF property
128
+ object?: string; // RDF value
129
+ evidence: {
130
+ query: string; // SPARQL or algorithm
131
+ result: unknown; // Computation result
132
+ witnesses: string[]; // Confirming references
133
+ };
134
+ metrics: {
135
+ confidence: number; // [0, 1]
136
+ coverage: number; // [0, 1]
137
+ latency_ms: number;
138
+ };
139
+ tags?: string[];
140
+ xid?: string; // Correlation ID
141
+ }
142
+ ```
143
+
144
+ ### Artifact Schema
145
+
146
+ ```typescript
147
+ interface Artifact {
148
+ version: '1.0';
149
+ universe_id: string;
150
+ snapshot_id: string;
151
+ generated_at: ISO8601;
152
+ probe_run_id: UUID;
153
+ shard_count: number;
154
+ shard_hash: hex64;
155
+ observations: Observation[];
156
+ summary: {
157
+ total: number;
158
+ by_kind: Record<string, number>;
159
+ by_severity: Record<'critical' | 'warning' | 'info', number>;
160
+ confidence_mean: number;
161
+ coverage_mean: number;
162
+ };
163
+ metadata: {
164
+ agents_run: string[];
165
+ guards_applied: string[];
166
+ execution_time_ms: number;
167
+ storage_backend: string;
168
+ config: ProbeConfig;
169
+ };
170
+ integrity: {
171
+ checksum: hex64;
172
+ signature?: string;
173
+ verified_at?: ISO8601;
174
+ };
175
+ }
176
+ ```
177
+
178
+ ## Storage Backends
179
+
180
+ ### Memory Storage
181
+
182
+ In-process Map-based storage. Best for testing and single-process deployments.
183
+
184
+ ```javascript
185
+ import { createMemoryStorage } from '@unrdf/kgc-probe';
186
+
187
+ const storage = createMemoryStorage();
188
+ await storage.saveArtifact(artifact);
189
+ const loaded = await storage.loadArtifact(artifact.probe_run_id);
190
+ ```
191
+
192
+ ### File Storage
193
+
194
+ Filesystem-based storage with JSON serialization.
195
+
196
+ ```javascript
197
+ import { createFileStorage } from '@unrdf/kgc-probe';
198
+
199
+ const storage = createFileStorage('./artifacts');
200
+ await storage.saveArtifact(artifact);
201
+ const shards = await storage.fetchShards(); // Merge-ready artifacts
202
+ ```
203
+
204
+ ### Database Storage
205
+
206
+ KGC Substrate-backed storage using RDF quads (production).
207
+
208
+ ```javascript
209
+ import { createDatabaseStorage } from '@unrdf/kgc-probe';
210
+
211
+ const storage = createDatabaseStorage({
212
+ store: kgcSubstrateStore,
213
+ namespace: 'https://probe.unrdf.org/'
214
+ });
215
+ ```
216
+
217
+ ## Operations
218
+
219
+ ### Merge Shards
220
+
221
+ Deterministic merge of distributed probe results with deduplication:
222
+
223
+ ```javascript
224
+ import { mergeShards } from '@unrdf/kgc-probe';
225
+
226
+ const artifacts = await storage.fetchShards();
227
+ const merged = await mergeShards(artifacts, newObservations);
228
+ // Merged observations are deduplicated and sorted deterministically
229
+ ```
230
+
231
+ ### Diff Artifacts
232
+
233
+ Compare two artifacts to identify changes:
234
+
235
+ ```javascript
236
+ import { diffArtifacts } from '@unrdf/kgc-probe';
237
+
238
+ const diff = diffArtifacts(artifact1, artifact2);
239
+ // {
240
+ // added: Observation[],
241
+ // removed: Observation[],
242
+ // modified: { subject, predicate, before, after }[],
243
+ // summary: { total_changes, similarity_ratio }
244
+ // }
245
+ ```
246
+
247
+ ### Verify Artifact
248
+
249
+ Validate artifact integrity and schema compliance:
250
+
251
+ ```javascript
252
+ import { verifyArtifact } from '@unrdf/kgc-probe';
253
+
254
+ const result = await verifyArtifact(artifact);
255
+ // {
256
+ // valid: boolean,
257
+ // errors: string[],
258
+ // verified_at: ISO8601
259
+ // }
260
+ ```
261
+
262
+ ## Complexity Analysis
263
+
264
+ | Operation | Time | Space |
265
+ |-----------|------|-------|
266
+ | scan() | O(n × m) | O(o) |
267
+ | mergeShards() | O(s log s) | O(s) |
268
+ | diffArtifacts() | O(o) | O(o) |
269
+ | hashObservations() | O(o log o) | O(o) |
270
+
271
+ - n = triples in graph
272
+ - m = agents (parallel)
273
+ - o = observations
274
+ - s = shard observations
275
+
276
+ ## Configuration
277
+
278
+ ### ProbeConfig
279
+
280
+ ```typescript
281
+ interface ProbeConfig {
282
+ universe_id: string; // Required
283
+ snapshot_id?: string; // Optional snapshot reference
284
+ agents?: string[]; // Specific agent IDs (all if omitted)
285
+ guards?: string[]; // Specific guard IDs
286
+ distributed?: boolean; // Enable shard merging (default: false)
287
+ persist?: boolean; // Save to storage (default: true)
288
+ timeout_ms?: number; // Scan timeout (default: 300000)
289
+ batch_size?: number; // Observation batch size (default: 100)
290
+ }
291
+ ```
292
+
293
+ ## Examples
294
+
295
+ ### Full Workflow
296
+
297
+ ```javascript
298
+ import {
299
+ createProbeOrchestrator,
300
+ createFileStorage,
301
+ createGuardRegistry,
302
+ diffArtifacts,
303
+ verifyArtifact
304
+ } from '@unrdf/kgc-probe';
305
+
306
+ // Setup
307
+ const storage = createFileStorage('./probes');
308
+ const orchestrator = createProbeOrchestrator({ storage });
309
+
310
+ // Scan 1
311
+ const result1 = await orchestrator.scan({
312
+ universe_id: 'my-universe',
313
+ persist: true
314
+ });
315
+ const artifact1 = result1.artifact;
316
+
317
+ // Later: Scan 2
318
+ const result2 = await orchestrator.scan({
319
+ universe_id: 'my-universe',
320
+ persist: true
321
+ });
322
+ const artifact2 = result2.artifact;
323
+
324
+ // Compare
325
+ const diff = diffArtifacts(artifact1, artifact2);
326
+ console.log(`Changes: ${diff.summary.total_changes}`);
327
+ console.log(`Similarity: ${(diff.summary.similarity_ratio * 100).toFixed(1)}%`);
328
+
329
+ // Verify
330
+ const verification = await verifyArtifact(artifact2);
331
+ console.log(`Valid: ${verification.valid}`);
332
+ ```
333
+
334
+ ## Design Patterns
335
+
336
+ ### Factory Pattern
337
+
338
+ All exports use factory functions for configuration flexibility:
339
+
340
+ ```javascript
341
+ const orchestrator = createProbeOrchestrator({ storage });
342
+ const registry = createGuardRegistry();
343
+ const storage = createMemoryStorage();
344
+ ```
345
+
346
+ ### Registry Pattern
347
+
348
+ Agents and guards use registry for dynamic registration:
349
+
350
+ ```javascript
351
+ const agents = createAgentRegistry();
352
+ agents.register('custom-agent', new MyAgent());
353
+ const agent = agents.get('custom-agent');
354
+ ```
355
+
356
+ ### Observer Pattern
357
+
358
+ Orchestrator emits events for monitoring:
359
+
360
+ ```javascript
361
+ orchestrator.on('agent_complete', handler);
362
+ orchestrator.on('guard_violation', handler);
363
+ orchestrator.on('scan_complete', handler);
364
+ ```
365
+
366
+ ### Strategy Pattern
367
+
368
+ Storage is pluggable with multiple implementations:
369
+
370
+ ```javascript
371
+ // Swap backends without changing code
372
+ const storage = process.env.NODE_ENV === 'production'
373
+ ? createDatabaseStorage(opts)
374
+ : createMemoryStorage();
375
+ ```
376
+
377
+ ## Performance Characteristics
378
+
379
+ - **Scan time**: Dominated by agent SPARQL queries (varies by graph size)
380
+ - **Memory**: O(observations) - typically 10-100KB per 1000 observations
381
+ - **Determinism**: Blake3 hashing ensures identical results for same input
382
+
383
+ ## Testing
384
+
385
+ ```bash
386
+ pnpm test # Run all tests
387
+ pnpm test:watch # Watch mode
388
+ pnpm test coverage # Coverage report
389
+ ```
390
+
391
+ ## Dependencies
392
+
393
+ - `@unrdf/kgc-substrate` - Knowledge store interface
394
+ - `@unrdf/kgc-4d` - 4D snapshot and universe management
395
+ - `@unrdf/v6-core` - Core UNRDF types and utilities
396
+ - `@unrdf/oxigraph` - SPARQL query engine
397
+ - `@unrdf/hooks` - Hook and policy framework
398
+ - `@unrdf/yawl` - Workflow orchestration
399
+ - `hash-wasm` - Blake3 hashing
400
+ - `zod` - Runtime validation schemas
401
+
402
+ ## License
403
+
404
+ MIT
405
+
406
+ ## Contributing
407
+
408
+ See [CONTRIBUTING.md](../../CONTRIBUTING.md)
409
+
410
+ ## References
411
+
412
+ - [KGC Probe Design](../../DESIGN_KGC_PROBE_PACKAGE.md)
413
+ - [SPARC Methodology](../../docs/sparc-methodology.md)
414
+ - [UNRDF Documentation](https://unrdf.org)
package/package.json ADDED
@@ -0,0 +1,81 @@
1
+ {
2
+ "name": "@unrdf/kgc-probe",
3
+ "version": "26.4.2",
4
+ "description": "KGC Probe - Automated knowledge graph integrity scanning with 10 agents and artifact validation",
5
+ "type": "module",
6
+ "main": "./src/index.mjs",
7
+ "exports": {
8
+ ".": "./src/index.mjs",
9
+ "./orchestrator": "./src/orchestrator.mjs",
10
+ "./guards": "./src/guards.mjs",
11
+ "./agents": "./src/agents/index.mjs",
12
+ "./storage": "./src/storage/index.mjs",
13
+ "./types": "./src/types.mjs",
14
+ "./artifact": "./src/artifact.mjs",
15
+ "./cli": "./src/cli.mjs",
16
+ "./utils": "./src/utils/index.mjs",
17
+ "./utils/logger": "./src/utils/logger.mjs",
18
+ "./utils/errors": "./src/utils/errors.mjs"
19
+ },
20
+ "sideEffects": false,
21
+ "files": [
22
+ "src/",
23
+ "README.md",
24
+ "LICENSE"
25
+ ],
26
+ "scripts": {
27
+ "test": "vitest run --coverage",
28
+ "test:fast": "vitest run --coverage",
29
+ "test:watch": "vitest --coverage",
30
+ "lint": "eslint src/ test/ --max-warnings=0",
31
+ "lint:fix": "eslint src/ test/ --fix",
32
+ "format": "prettier --write src/",
33
+ "format:check": "prettier --check src/",
34
+ "build": "echo 'Build complete (pure ESM, no compilation needed)'",
35
+ "validate": "npm run lint && npm run test"
36
+ },
37
+ "keywords": [
38
+ "rdf",
39
+ "knowledge-graph",
40
+ "probe",
41
+ "integrity",
42
+ "agents",
43
+ "validation",
44
+ "kgc",
45
+ "deterministic",
46
+ "artifact-verification"
47
+ ],
48
+ "dependencies": {
49
+ "@unrdf/kgc-substrate": "workspace:*",
50
+ "@unrdf/kgc-4d": "workspace:*",
51
+ "@unrdf/v6-core": "workspace:*",
52
+ "@unrdf/oxigraph": "workspace:*",
53
+ "@unrdf/hooks": "workspace:*",
54
+ "@unrdf/yawl": "workspace:*",
55
+ "hash-wasm": "^4.12.0",
56
+ "zod": "^4.1.13"
57
+ },
58
+ "devDependencies": {
59
+ "@types/node": "^24.10.1",
60
+ "vitest": "^4.0.15",
61
+ "@vitest/coverage-v8": "^4.0.15"
62
+ },
63
+ "engines": {
64
+ "node": ">=18.0.0",
65
+ "pnpm": ">=7.0.0"
66
+ },
67
+ "repository": {
68
+ "type": "git",
69
+ "url": "https://github.com/unrdf/unrdf.git",
70
+ "directory": "packages/kgc-probe"
71
+ },
72
+ "bugs": {
73
+ "url": "https://github.com/unrdf/unrdf/issues"
74
+ },
75
+ "homepage": "https://github.com/unrdf/unrdf#readme",
76
+ "license": "MIT",
77
+ "author": "UNRDF Contributors",
78
+ "publishConfig": {
79
+ "access": "public"
80
+ }
81
+ }