@unrdf/kgc-probe 26.4.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,193 @@
1
+ #!/usr/bin/env node
2
+ /**
3
+ * @fileoverview Canonical Observation record schema for KGC Probe
4
+ *
5
+ * Observation = atomic unit of knowledge extraction from codebase analysis.
6
+ * All probes emit Observation records that conform to this schema.
7
+ *
8
+ * Design principles:
9
+ * - Immutable: Once created, never modified
10
+ * - Deterministic: Same input → same Observation
11
+ * - Composable: Multiple Observations → Knowledge Graph
12
+ * - Receipt-driven: Each Observation includes hash for verification
13
+ */
14
+
15
+ import { z } from 'zod';
16
+
17
+ /**
18
+ * Observation severity levels (aligned with OTEL)
19
+ * @typedef {'trace' | 'debug' | 'info' | 'warn' | 'error' | 'fatal'} ObservationSeverity
20
+ */
21
+ export const ObservationSeveritySchema = z.enum([
22
+ 'trace', // Fine-grained execution flow
23
+ 'debug', // Developer diagnostics
24
+ 'info', // Informational observations
25
+ 'warn', // Potential issues
26
+ 'error', // Actionable problems
27
+ 'fatal' // Critical failures
28
+ ]);
29
+
30
+ /**
31
+ * Observation category - what aspect of codebase is being observed
32
+ * @typedef {'file' | 'dependency' | 'pattern' | 'metric' | 'security' | 'quality' | 'performance' | 'test' | 'documentation' | 'guard'} ObservationCategory
33
+ */
34
+ export const ObservationCategorySchema = z.enum([
35
+ 'file', // File-level observations (imports, exports, structure)
36
+ 'dependency', // Package dependencies, versions, vulnerabilities
37
+ 'pattern', // Code patterns, anti-patterns, idioms
38
+ 'metric', // Complexity, LOC, coupling metrics
39
+ 'security', // Security issues, secrets, vulnerabilities
40
+ 'quality', // Code quality, style violations
41
+ 'performance', // Performance characteristics, benchmarks
42
+ 'test', // Test coverage, assertions, flakiness
43
+ 'documentation', // Docs completeness, accuracy
44
+ 'guard' // Poka-yoke guard denials (forbidden operations)
45
+ ]);
46
+
47
+ /**
48
+ * Source location within a file
49
+ */
50
+ export const SourceLocationSchema = z.object({
51
+ file: z.string().describe('Absolute file path'),
52
+ line: z.number().int().positive().optional().describe('Line number (1-indexed)'),
53
+ column: z.number().int().positive().optional().describe('Column number (1-indexed)'),
54
+ endLine: z.number().int().positive().optional().describe('End line number'),
55
+ endColumn: z.number().int().positive().optional().describe('End column number')
56
+ });
57
+
58
+ /**
59
+ * Observation metadata (who, when, how)
60
+ */
61
+ export const ObservationMetadataSchema = z.object({
62
+ agentId: z.string().describe('Agent that created this observation (e.g., "agent-2-import-analyzer")'),
63
+ timestamp: z.string().datetime().describe('ISO 8601 timestamp'),
64
+ probeVersion: z.string().describe('KGC Probe version'),
65
+ budgetMs: z.number().int().positive().describe('Time budget allocated (ms)'),
66
+ actualMs: z.number().int().nonnegative().describe('Actual time spent (ms)')
67
+ });
68
+
69
+ /**
70
+ * Canonical Observation record
71
+ *
72
+ * @typedef {Object} Observation
73
+ * @property {string} id - Unique observation ID (hash of deterministic content)
74
+ * @property {ObservationCategory} category - What aspect is being observed
75
+ * @property {ObservationSeverity} severity - Importance level
76
+ * @property {string} message - Human-readable summary
77
+ * @property {SourceLocation} [location] - Where in codebase (optional for global observations)
78
+ * @property {Record<string, any>} data - Structured observation payload
79
+ * @property {ObservationMetadata} metadata - Who/when/how
80
+ * @property {string[]} tags - Searchable tags for filtering
81
+ * @property {string} [receiptHash] - Hash of this observation for verification
82
+ */
83
+ export const ObservationSchema = z.object({
84
+ id: z.string().describe('Unique observation ID (deterministic hash)'),
85
+ category: ObservationCategorySchema,
86
+ severity: ObservationSeveritySchema,
87
+ message: z.string().min(1).describe('Human-readable summary'),
88
+ location: SourceLocationSchema.optional().describe('Source location (optional for global observations)'),
89
+ data: z.record(z.string(), z.any()).describe('Structured observation payload'),
90
+ metadata: ObservationMetadataSchema,
91
+ tags: z.array(z.string()).default([]).describe('Searchable tags'),
92
+ receiptHash: z.string().optional().describe('Hash of this observation for verification')
93
+ });
94
+
95
+ /**
96
+ * Type exports
97
+ */
98
+ export const ObservationType = ObservationSchema;
99
+
100
+ /**
101
+ * Factory function to create Observations with defaults
102
+ *
103
+ * @param {Partial<z.infer<typeof ObservationSchema>>} obs - Partial observation
104
+ * @returns {z.infer<typeof ObservationSchema>} - Validated observation
105
+ */
106
+ export function createObservation(obs) {
107
+ const defaults = {
108
+ id: obs.id || crypto.randomUUID(), // Will be replaced by hash in receipt.mjs
109
+ severity: obs.severity || 'info',
110
+ tags: obs.tags || [],
111
+ data: obs.data || {},
112
+ metadata: {
113
+ timestamp: new Date().toISOString(),
114
+ ...obs.metadata
115
+ }
116
+ };
117
+
118
+ const merged = { ...defaults, ...obs };
119
+ return ObservationSchema.parse(merged);
120
+ }
121
+
122
+ /**
123
+ * Guard denial observation - emitted when poka-yoke guards block an operation
124
+ *
125
+ * @param {Object} params
126
+ * @param {string} params.guardName - Name of guard that triggered
127
+ * @param {string} params.reason - Why operation was denied
128
+ * @param {string} params.agentId - Agent that attempted the operation
129
+ * @param {Record<string, any>} params.context - Additional context (sanitized)
130
+ * @returns {z.infer<typeof ObservationSchema>}
131
+ */
132
+ export function createGuardDenial({ guardName, reason, agentId, context = {} }) {
133
+ return createObservation({
134
+ category: 'guard',
135
+ severity: 'warn',
136
+ message: `Guard '${guardName}' denied operation: ${reason}`,
137
+ data: {
138
+ guardName,
139
+ reason,
140
+ context: sanitizeContext(context)
141
+ },
142
+ metadata: {
143
+ agentId,
144
+ probeVersion: '0.1.0',
145
+ budgetMs: 1, // Guards have minimal overhead
146
+ actualMs: 0,
147
+ timestamp: new Date().toISOString()
148
+ },
149
+ tags: ['guard-denial', guardName]
150
+ });
151
+ }
152
+
153
+ /**
154
+ * Sanitize context to remove sensitive data before logging
155
+ *
156
+ * @param {Record<string, any>} context
157
+ * @returns {Record<string, any>}
158
+ */
159
+ function sanitizeContext(context) {
160
+ const sanitized = { ...context };
161
+
162
+ // Remove sensitive keys
163
+ const sensitiveKeys = ['password', 'token', 'secret', 'key', 'auth', 'credential'];
164
+ for (const key of Object.keys(sanitized)) {
165
+ if (sensitiveKeys.some(sk => key.toLowerCase().includes(sk))) {
166
+ sanitized[key] = '[REDACTED]';
167
+ }
168
+ }
169
+
170
+ return sanitized;
171
+ }
172
+
173
+ /**
174
+ * Validate observation record
175
+ *
176
+ * @param {unknown} data
177
+ * @returns {z.infer<typeof ObservationSchema>}
178
+ * @throws {z.ZodError} if validation fails
179
+ */
180
+ export function validateObservation(data) {
181
+ return ObservationSchema.parse(data);
182
+ }
183
+
184
+ export default {
185
+ ObservationSchema,
186
+ ObservationSeveritySchema,
187
+ ObservationCategorySchema,
188
+ SourceLocationSchema,
189
+ ObservationMetadataSchema,
190
+ createObservation,
191
+ createGuardDenial,
192
+ validateObservation
193
+ };
@@ -0,0 +1,315 @@
1
+ /**
2
+ * @fileoverview KGC Probe - Orchestrator
3
+ *
4
+ * ProbeOrchestrator coordinates agent execution, guard validation,
5
+ * shard merging, and artifact generation with deterministic hashing.
6
+ *
7
+ * Algorithm: OrchestrateScan (5-phase execution)
8
+ *
9
+ * @module @unrdf/kgc-probe/orchestrator
10
+ */
11
+
12
+ import { randomUUID } from 'crypto';
13
+ import { ProbeConfigSchema, validateProbeConfig } from './types.mjs';
14
+ import { createGuardRegistry } from './guards.mjs';
15
+ import { createAgentRegistry } from './agents/index.mjs';
16
+ import { hashObservations, computeArtifactSummary } from './artifact.mjs';
17
+
18
+ /**
19
+ * ProbeOrchestrator - Main orchestration engine
20
+ *
21
+ * Coordinates:
22
+ * 1. Parallel agent execution
23
+ * 2. Guard validation
24
+ * 3. Shard merging
25
+ * 4. Artifact generation
26
+ *
27
+ * @class ProbeOrchestrator
28
+ */
29
+ export class ProbeOrchestrator {
30
+ /**
31
+ * Create orchestrator with storage backend
32
+ * @param {Object} options - Configuration
33
+ * @param {Storage} options.storage - Storage backend (memory/file/db)
34
+ * @param {GuardRegistry} [options.guards] - Custom guard registry
35
+ * @param {AgentRegistry} [options.agents] - Custom agent registry
36
+ */
37
+ constructor(options = {}) {
38
+ if (!options.storage) {
39
+ throw new Error('ProbeOrchestrator requires storage backend');
40
+ }
41
+
42
+ this.storage = options.storage;
43
+ this.guards = options.guards || createGuardRegistry();
44
+ this.agents = options.agents || createAgentRegistry();
45
+
46
+ /** @type {Set<string>} - Event listeners */
47
+ this.listeners = new Map();
48
+ }
49
+
50
+ /**
51
+ * Register event listener
52
+ * @param {string} event - Event name ('agent_complete', 'guard_violation', etc.)
53
+ * @param {Function} callback - Callback function
54
+ */
55
+ on(event, callback) {
56
+ if (!this.listeners.has(event)) {
57
+ this.listeners.set(event, []);
58
+ }
59
+ this.listeners.get(event).push(callback);
60
+ }
61
+
62
+ /**
63
+ * Emit event to registered listeners
64
+ * @param {string} event - Event name
65
+ * @param {unknown} data - Event data
66
+ * @private
67
+ */
68
+ emit(event, data) {
69
+ const callbacks = this.listeners.get(event) || [];
70
+ for (const cb of callbacks) {
71
+ try {
72
+ cb(data);
73
+ } catch (err) {
74
+ console.error(`[ProbeOrchestrator] Listener error for ${event}:`, err);
75
+ }
76
+ }
77
+ }
78
+
79
+ /**
80
+ * Execute full integrity scan
81
+ *
82
+ * Phase 1: Initialization
83
+ * Phase 2: Parallel agent execution
84
+ * Phase 3: Guard validation
85
+ * Phase 4: Shard merging
86
+ * Phase 5: Artifact generation and persistence
87
+ *
88
+ * @param {Object} scanConfig - Scan configuration
89
+ * @param {string} scanConfig.universe_id - Universe to scan
90
+ * @param {string} [scanConfig.snapshot_id] - Optional snapshot reference
91
+ * @param {string[]} [scanConfig.agents] - Agent IDs (all if omitted)
92
+ * @param {boolean} [scanConfig.distributed] - Enable shard merging
93
+ * @param {boolean} [scanConfig.persist] - Save to storage
94
+ * @returns {Promise<ScanResult>} Scan result with artifact
95
+ */
96
+ async scan(scanConfig) {
97
+ // Validate configuration
98
+ let config;
99
+ try {
100
+ config = validateProbeConfig(scanConfig);
101
+ } catch (err) {
102
+ throw new Error(`Invalid probe config: ${err.message}`);
103
+ }
104
+
105
+ const startTime = Date.now();
106
+ const runId = randomUUID();
107
+ const observations = [];
108
+ const errors = [];
109
+
110
+ try {
111
+ // Phase 1: Initialization
112
+ this.emit('scan_start', { runId, config });
113
+
114
+ // Phase 2: Parallel Agent Execution
115
+ const agentIds = config.agents || this.agents.list();
116
+
117
+ const agentPromises = agentIds.map(agentId =>
118
+ this.executeAgent(agentId, config, observations, errors)
119
+ );
120
+
121
+ await Promise.allSettled(agentPromises);
122
+
123
+ this.emit('agents_complete', {
124
+ runId,
125
+ agentCount: agentIds.length,
126
+ observationCount: observations.length
127
+ });
128
+
129
+ // Phase 3: Guard Validation
130
+ const guardIds = config.guards || this.guards.list();
131
+ for (const guardId of guardIds) {
132
+ try {
133
+ const violations = this.guards.validate(guardId, observations);
134
+ for (const violation of violations) {
135
+ observations.push({
136
+ id: randomUUID(),
137
+ agent: `guard:${guardId}`,
138
+ timestamp: new Date().toISOString(),
139
+ kind: 'guard_violation',
140
+ severity: violation.severity,
141
+ subject: 'artifact:self',
142
+ evidence: {
143
+ query: `guard_${guardId}`,
144
+ result: violation.details,
145
+ witnesses: []
146
+ },
147
+ metrics: {
148
+ confidence: 1.0,
149
+ coverage: 1.0,
150
+ latency_ms: 0
151
+ },
152
+ tags: ['guard', guardId]
153
+ });
154
+ }
155
+ this.emit('guard_complete', { guardId, violations: violations.length });
156
+ } catch (err) {
157
+ errors.push({ guard: guardId, error: err.message });
158
+ this.emit('guard_error', { guardId, error: err.message });
159
+ }
160
+ }
161
+
162
+ // Phase 4: Shard Merging
163
+ let shardHash = '';
164
+ let shardCount = 1;
165
+
166
+ if (config.distributed) {
167
+ try {
168
+ const shards = await this.storage.fetchShards?.();
169
+ if (shards && shards.length > 0) {
170
+ shardCount = shards.length;
171
+ // Hash all shards together for determinism
172
+ const shardData = shards.map(s => s.probe_run_id).sort().join('|');
173
+ shardHash = await this.hashString(shardData);
174
+ this.emit('shards_merged', { shardCount, shardHash });
175
+ }
176
+ } catch (err) {
177
+ errors.push({ operation: 'shard_merge', error: err.message });
178
+ this.emit('shard_merge_error', { error: err.message });
179
+ }
180
+ } else {
181
+ shardHash = await this.hashString('');
182
+ }
183
+
184
+ // Phase 5: Artifact Generation
185
+ const endTime = Date.now();
186
+ const executionTime = endTime - startTime;
187
+
188
+ const artifact = {
189
+ version: '1.0',
190
+ universe_id: config.universe_id,
191
+ snapshot_id: config.snapshot_id || 'current',
192
+ generated_at: new Date(endTime).toISOString(),
193
+ probe_run_id: runId,
194
+ shard_count: shardCount,
195
+ shard_hash: shardHash,
196
+ observations: observations,
197
+ summary: computeArtifactSummary(observations),
198
+ metadata: {
199
+ agents_run: agentIds,
200
+ guards_applied: guardIds,
201
+ execution_time_ms: executionTime,
202
+ storage_backend: this.storage.type,
203
+ config: config
204
+ },
205
+ integrity: {
206
+ checksum: await hashObservations(observations),
207
+ verified_at: null
208
+ }
209
+ };
210
+
211
+ // Persistence
212
+ if (config.persist) {
213
+ try {
214
+ await this.storage.saveArtifact(artifact);
215
+ this.emit('artifact_saved', { runId, artifactSize: observations.length });
216
+ } catch (err) {
217
+ errors.push({ operation: 'persist', error: err.message });
218
+ this.emit('artifact_save_error', { error: err.message });
219
+ }
220
+ }
221
+
222
+ this.emit('scan_complete', {
223
+ runId,
224
+ status: errors.length === 0 ? 'success' : 'partial',
225
+ executionTime,
226
+ observationCount: observations.length
227
+ });
228
+
229
+ return {
230
+ artifact,
231
+ status: errors.length === 0 ? 'success' : 'partial',
232
+ errors
233
+ };
234
+ } catch (err) {
235
+ this.emit('scan_error', { runId, error: err.message });
236
+ throw err;
237
+ }
238
+ }
239
+
240
+ /**
241
+ * Execute single agent
242
+ * @param {string} agentId - Agent identifier
243
+ * @param {Object} config - Scan config
244
+ * @param {Array} observations - Shared observations array
245
+ * @param {Array} errors - Shared errors array
246
+ * @private
247
+ */
248
+ async executeAgent(agentId, config, observations, errors) {
249
+ const startTime = Date.now();
250
+ try {
251
+ const agent = this.agents.get(agentId);
252
+ if (!agent) {
253
+ throw new Error(`Agent not found: ${agentId}`);
254
+ }
255
+
256
+ // Call agent scan (would need store passed in real impl)
257
+ const results = await agent.scan(config);
258
+
259
+ if (Array.isArray(results)) {
260
+ observations.push(...results);
261
+ }
262
+
263
+ const endTime = Date.now();
264
+ this.emit('agent_complete', {
265
+ agentId,
266
+ observationCount: results.length,
267
+ latency: endTime - startTime
268
+ });
269
+ } catch (err) {
270
+ errors.push({ agent: agentId, error: err.message });
271
+ this.emit('agent_error', { agentId, error: err.message });
272
+ }
273
+ }
274
+
275
+ /**
276
+ * Hash string using Blake3
277
+ * @param {string} data - Data to hash
278
+ * @returns {Promise<string>} Hex-encoded hash
279
+ * @private
280
+ */
281
+ async hashString(data) {
282
+ // In production, use hash-wasm for Blake3
283
+ // For now, return placeholder
284
+ const encoder = new TextEncoder();
285
+ const buffer = encoder.encode(data);
286
+ // Would be: blake3(buffer).then(h => h.toString('hex'))
287
+ return 'blake3_placeholder_' + buffer.length.toString(16).padStart(64, '0');
288
+ }
289
+
290
+ /**
291
+ * Load artifact from storage
292
+ * @param {string} artifactId - Artifact ID
293
+ * @returns {Promise<Artifact>} Loaded artifact
294
+ */
295
+ async loadArtifact(artifactId) {
296
+ return this.storage.loadArtifact(artifactId);
297
+ }
298
+
299
+ /**
300
+ * List all artifacts in storage
301
+ * @returns {Promise<Artifact[]>} Array of artifacts
302
+ */
303
+ async listArtifacts() {
304
+ return this.storage.listArtifacts();
305
+ }
306
+ }
307
+
308
+ /**
309
+ * Create ProbeOrchestrator instance
310
+ * @param {Object} options - Configuration
311
+ * @returns {ProbeOrchestrator} New orchestrator
312
+ */
313
+ export function createProbeOrchestrator(options) {
314
+ return new ProbeOrchestrator(options);
315
+ }
package/src/probe.mjs ADDED
@@ -0,0 +1,58 @@
1
+ /**
2
+ * @fileoverview KGC Probe - Convenience Function
3
+ *
4
+ * High-level runProbe() for executing a complete scan with one call.
5
+ *
6
+ * @module @unrdf/kgc-probe/probe
7
+ */
8
+
9
+ import { createProbeOrchestrator } from './orchestrator.mjs';
10
+ import { createMemoryStorage } from './storage/index.mjs';
11
+
12
+ /**
13
+ * Run full probe scan with sensible defaults
14
+ *
15
+ * Convenience function that:
16
+ * 1. Creates orchestrator with default storage
17
+ * 2. Runs all agents and guards
18
+ * 3. Returns artifact directly
19
+ *
20
+ * @param {Object} config - Probe configuration
21
+ * @param {string} config.universe_id - Universe to scan
22
+ * @param {string} [config.snapshot_id] - Optional snapshot
23
+ * @param {Object} [config.storage] - Custom storage backend
24
+ * @returns {Promise<Artifact>} Generated artifact
25
+ *
26
+ * @example
27
+ * const artifact = await runProbe({
28
+ * universe_id: 'my-universe',
29
+ * snapshot_id: 'snap_123'
30
+ * });
31
+ *
32
+ * console.log(artifact.summary);
33
+ */
34
+ export async function runProbe(config) {
35
+ if (!config || !config.universe_id) {
36
+ throw new Error('runProbe requires universe_id in config');
37
+ }
38
+
39
+ // Create orchestrator with provided or default storage
40
+ const storage = config.storage || createMemoryStorage();
41
+ const orchestrator = createProbeOrchestrator({ storage });
42
+
43
+ // Run scan
44
+ const result = await orchestrator.scan({
45
+ universe_id: config.universe_id,
46
+ snapshot_id: config.snapshot_id,
47
+ agents: config.agents,
48
+ guards: config.guards,
49
+ distributed: config.distributed || false,
50
+ persist: config.persist !== false // Default true
51
+ });
52
+
53
+ if (result.status === 'failed') {
54
+ throw new Error(`Probe scan failed: ${result.errors.map(e => e.error).join(', ')}`);
55
+ }
56
+
57
+ return result.artifact;
58
+ }