npm - @unrdf/kgc-probe - Versions diffs - 26.4.2 - Mend

@unrdf/kgc-probe 26.4.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (38) hide show

package/README.md +414 -0
package/package.json +81 -0
package/src/agents/index.mjs +1402 -0
package/src/artifact.mjs +405 -0
package/src/cli.mjs +932 -0
package/src/config.mjs +115 -0
package/src/guards.mjs +1213 -0
package/src/index.mjs +347 -0
package/src/merge.mjs +196 -0
package/src/observation.mjs +193 -0
package/src/orchestrator.mjs +315 -0
package/src/probe.mjs +58 -0
package/src/probes/CONCURRENCY-PROBE.md +256 -0
package/src/probes/README.md +275 -0
package/src/probes/concurrency.mjs +1175 -0
package/src/probes/filesystem.mjs +731 -0
package/src/probes/filesystem.test.mjs +244 -0
package/src/probes/network.mjs +503 -0
package/src/probes/performance.mjs +816 -0
package/src/probes/persistence.mjs +785 -0
package/src/probes/runtime.mjs +589 -0
package/src/probes/tooling.mjs +454 -0
package/src/probes/tooling.test.mjs +372 -0
package/src/probes/verify-execution.mjs +131 -0
package/src/probes/verify-guards.mjs +73 -0
package/src/probes/wasm.mjs +715 -0
package/src/receipt.mjs +197 -0
package/src/receipts/index.mjs +813 -0
package/src/reporter.example.mjs +223 -0
package/src/reporter.mjs +555 -0
package/src/reporters/markdown.mjs +355 -0
package/src/reporters/rdf.mjs +383 -0
package/src/storage/index.mjs +827 -0
package/src/types.mjs +1028 -0
package/src/utils/errors.mjs +397 -0
package/src/utils/index.mjs +32 -0
package/src/utils/logger.mjs +236 -0
package/src/vocabulary.ttl +169 -0

package/src/observation.mjs ADDED Viewed

@@ -0,0 +1,193 @@
+#!/usr/bin/env node
+/**
+ * @fileoverview Canonical Observation record schema for KGC Probe
+ *
+ * Observation = atomic unit of knowledge extraction from codebase analysis.
+ * All probes emit Observation records that conform to this schema.
+ *
+ * Design principles:
+ * - Immutable: Once created, never modified
+ * - Deterministic: Same input → same Observation
+ * - Composable: Multiple Observations → Knowledge Graph
+ * - Receipt-driven: Each Observation includes hash for verification
+ */
+import { z } from 'zod';
+/**
+ * Observation severity levels (aligned with OTEL)
+ * @typedef {'trace' | 'debug' | 'info' | 'warn' | 'error' | 'fatal'} ObservationSeverity
+ */
+export const ObservationSeveritySchema = z.enum([
+  'trace',   // Fine-grained execution flow
+  'debug',   // Developer diagnostics
+  'info',    // Informational observations
+  'warn',    // Potential issues
+  'error',   // Actionable problems
+  'fatal'    // Critical failures
+]);
+/**
+ * Observation category - what aspect of codebase is being observed
+ * @typedef {'file' | 'dependency' | 'pattern' | 'metric' | 'security' | 'quality' | 'performance' | 'test' | 'documentation' | 'guard'} ObservationCategory
+ */
+export const ObservationCategorySchema = z.enum([
+  'file',          // File-level observations (imports, exports, structure)
+  'dependency',    // Package dependencies, versions, vulnerabilities
+  'pattern',       // Code patterns, anti-patterns, idioms
+  'metric',        // Complexity, LOC, coupling metrics
+  'security',      // Security issues, secrets, vulnerabilities
+  'quality',       // Code quality, style violations
+  'performance',   // Performance characteristics, benchmarks
+  'test',          // Test coverage, assertions, flakiness
+  'documentation', // Docs completeness, accuracy
+  'guard'          // Poka-yoke guard denials (forbidden operations)
+]);
+/**
+ * Source location within a file
+ */
+export const SourceLocationSchema = z.object({
+  file: z.string().describe('Absolute file path'),
+  line: z.number().int().positive().optional().describe('Line number (1-indexed)'),
+  column: z.number().int().positive().optional().describe('Column number (1-indexed)'),
+  endLine: z.number().int().positive().optional().describe('End line number'),
+  endColumn: z.number().int().positive().optional().describe('End column number')
+});
+/**
+ * Observation metadata (who, when, how)
+ */
+export const ObservationMetadataSchema = z.object({
+  agentId: z.string().describe('Agent that created this observation (e.g., "agent-2-import-analyzer")'),
+  timestamp: z.string().datetime().describe('ISO 8601 timestamp'),
+  probeVersion: z.string().describe('KGC Probe version'),
+  budgetMs: z.number().int().positive().describe('Time budget allocated (ms)'),
+  actualMs: z.number().int().nonnegative().describe('Actual time spent (ms)')
+});
+/**
+ * Canonical Observation record
+ *
+ * @typedef {Object} Observation
+ * @property {string} id - Unique observation ID (hash of deterministic content)
+ * @property {ObservationCategory} category - What aspect is being observed
+ * @property {ObservationSeverity} severity - Importance level
+ * @property {string} message - Human-readable summary
+ * @property {SourceLocation} [location] - Where in codebase (optional for global observations)
+ * @property {Record<string, any>} data - Structured observation payload
+ * @property {ObservationMetadata} metadata - Who/when/how
+ * @property {string[]} tags - Searchable tags for filtering
+ * @property {string} [receiptHash] - Hash of this observation for verification
+ */
+export const ObservationSchema = z.object({
+  id: z.string().describe('Unique observation ID (deterministic hash)'),
+  category: ObservationCategorySchema,
+  severity: ObservationSeveritySchema,
+  message: z.string().min(1).describe('Human-readable summary'),
+  location: SourceLocationSchema.optional().describe('Source location (optional for global observations)'),
+  data: z.record(z.string(), z.any()).describe('Structured observation payload'),
+  metadata: ObservationMetadataSchema,
+  tags: z.array(z.string()).default([]).describe('Searchable tags'),
+  receiptHash: z.string().optional().describe('Hash of this observation for verification')
+});
+/**
+ * Type exports
+ */
+export const ObservationType = ObservationSchema;
+/**
+ * Factory function to create Observations with defaults
+ *
+ * @param {Partial<z.infer<typeof ObservationSchema>>} obs - Partial observation
+ * @returns {z.infer<typeof ObservationSchema>} - Validated observation
+ */
+export function createObservation(obs) {
+  const defaults = {
+    id: obs.id || crypto.randomUUID(), // Will be replaced by hash in receipt.mjs
+    severity: obs.severity || 'info',
+    tags: obs.tags || [],
+    data: obs.data || {},
+    metadata: {
+      timestamp: new Date().toISOString(),
+      ...obs.metadata
+    }
+  };
+  const merged = { ...defaults, ...obs };
+  return ObservationSchema.parse(merged);
+}
+/**
+ * Guard denial observation - emitted when poka-yoke guards block an operation
+ *
+ * @param {Object} params
+ * @param {string} params.guardName - Name of guard that triggered
+ * @param {string} params.reason - Why operation was denied
+ * @param {string} params.agentId - Agent that attempted the operation
+ * @param {Record<string, any>} params.context - Additional context (sanitized)
+ * @returns {z.infer<typeof ObservationSchema>}
+ */
+export function createGuardDenial({ guardName, reason, agentId, context = {} }) {
+  return createObservation({
+    category: 'guard',
+    severity: 'warn',
+    message: `Guard '${guardName}' denied operation: ${reason}`,
+    data: {
+      guardName,
+      reason,
+      context: sanitizeContext(context)
+    },
+    metadata: {
+      agentId,
+      probeVersion: '0.1.0',
+      budgetMs: 1, // Guards have minimal overhead
+      actualMs: 0,
+      timestamp: new Date().toISOString()
+    },
+    tags: ['guard-denial', guardName]
+  });
+}
+/**
+ * Sanitize context to remove sensitive data before logging
+ *
+ * @param {Record<string, any>} context
+ * @returns {Record<string, any>}
+ */
+function sanitizeContext(context) {
+  const sanitized = { ...context };
+  // Remove sensitive keys
+  const sensitiveKeys = ['password', 'token', 'secret', 'key', 'auth', 'credential'];
+  for (const key of Object.keys(sanitized)) {
+    if (sensitiveKeys.some(sk => key.toLowerCase().includes(sk))) {
+      sanitized[key] = '[REDACTED]';
+    }
+  }
+  return sanitized;
+}
+/**
+ * Validate observation record
+ *
+ * @param {unknown} data
+ * @returns {z.infer<typeof ObservationSchema>}
+ * @throws {z.ZodError} if validation fails
+ */
+export function validateObservation(data) {
+  return ObservationSchema.parse(data);
+}
+export default {
+  ObservationSchema,
+  ObservationSeveritySchema,
+  ObservationCategorySchema,
+  SourceLocationSchema,
+  ObservationMetadataSchema,
+  createObservation,
+  createGuardDenial,
+  validateObservation
+};

package/src/orchestrator.mjs ADDED Viewed

@@ -0,0 +1,315 @@
+/**
+ * @fileoverview KGC Probe - Orchestrator
+ *
+ * ProbeOrchestrator coordinates agent execution, guard validation,
+ * shard merging, and artifact generation with deterministic hashing.
+ *
+ * Algorithm: OrchestrateScan (5-phase execution)
+ *
+ * @module @unrdf/kgc-probe/orchestrator
+ */
+import { randomUUID } from 'crypto';
+import { ProbeConfigSchema, validateProbeConfig } from './types.mjs';
+import { createGuardRegistry } from './guards.mjs';
+import { createAgentRegistry } from './agents/index.mjs';
+import { hashObservations, computeArtifactSummary } from './artifact.mjs';
+/**
+ * ProbeOrchestrator - Main orchestration engine
+ *
+ * Coordinates:
+ * 1. Parallel agent execution
+ * 2. Guard validation
+ * 3. Shard merging
+ * 4. Artifact generation
+ *
+ * @class ProbeOrchestrator
+ */
+export class ProbeOrchestrator {
+  /**
+   * Create orchestrator with storage backend
+   * @param {Object} options - Configuration
+   * @param {Storage} options.storage - Storage backend (memory/file/db)
+   * @param {GuardRegistry} [options.guards] - Custom guard registry
+   * @param {AgentRegistry} [options.agents] - Custom agent registry
+   */
+  constructor(options = {}) {
+    if (!options.storage) {
+      throw new Error('ProbeOrchestrator requires storage backend');
+    }
+    this.storage = options.storage;
+    this.guards = options.guards || createGuardRegistry();
+    this.agents = options.agents || createAgentRegistry();
+    /** @type {Set<string>} - Event listeners */
+    this.listeners = new Map();
+  }
+  /**
+   * Register event listener
+   * @param {string} event - Event name ('agent_complete', 'guard_violation', etc.)
+   * @param {Function} callback - Callback function
+   */
+  on(event, callback) {
+    if (!this.listeners.has(event)) {
+      this.listeners.set(event, []);
+    }
+    this.listeners.get(event).push(callback);
+  }
+  /**
+   * Emit event to registered listeners
+   * @param {string} event - Event name
+   * @param {unknown} data - Event data
+   * @private
+   */
+  emit(event, data) {
+    const callbacks = this.listeners.get(event) || [];
+    for (const cb of callbacks) {
+      try {
+        cb(data);
+      } catch (err) {
+        console.error(`[ProbeOrchestrator] Listener error for ${event}:`, err);
+      }
+    }
+  }
+  /**
+   * Execute full integrity scan
+   *
+   * Phase 1: Initialization
+   * Phase 2: Parallel agent execution
+   * Phase 3: Guard validation
+   * Phase 4: Shard merging
+   * Phase 5: Artifact generation and persistence
+   *
+   * @param {Object} scanConfig - Scan configuration
+   * @param {string} scanConfig.universe_id - Universe to scan
+   * @param {string} [scanConfig.snapshot_id] - Optional snapshot reference
+   * @param {string[]} [scanConfig.agents] - Agent IDs (all if omitted)
+   * @param {boolean} [scanConfig.distributed] - Enable shard merging
+   * @param {boolean} [scanConfig.persist] - Save to storage
+   * @returns {Promise<ScanResult>} Scan result with artifact
+   */
+  async scan(scanConfig) {
+    // Validate configuration
+    let config;
+    try {
+      config = validateProbeConfig(scanConfig);
+    } catch (err) {
+      throw new Error(`Invalid probe config: ${err.message}`);
+    }
+    const startTime = Date.now();
+    const runId = randomUUID();
+    const observations = [];
+    const errors = [];
+    try {
+      // Phase 1: Initialization
+      this.emit('scan_start', { runId, config });
+      // Phase 2: Parallel Agent Execution
+      const agentIds = config.agents || this.agents.list();
+      const agentPromises = agentIds.map(agentId =>
+        this.executeAgent(agentId, config, observations, errors)
+      );
+      await Promise.allSettled(agentPromises);
+      this.emit('agents_complete', {
+        runId,
+        agentCount: agentIds.length,
+        observationCount: observations.length
+      });
+      // Phase 3: Guard Validation
+      const guardIds = config.guards || this.guards.list();
+      for (const guardId of guardIds) {
+        try {
+          const violations = this.guards.validate(guardId, observations);
+          for (const violation of violations) {
+            observations.push({
+              id: randomUUID(),
+              agent: `guard:${guardId}`,
+              timestamp: new Date().toISOString(),
+              kind: 'guard_violation',
+              severity: violation.severity,
+              subject: 'artifact:self',
+              evidence: {
+                query: `guard_${guardId}`,
+                result: violation.details,
+                witnesses: []
+              },
+              metrics: {
+                confidence: 1.0,
+                coverage: 1.0,
+                latency_ms: 0
+              },
+              tags: ['guard', guardId]
+            });
+          }
+          this.emit('guard_complete', { guardId, violations: violations.length });
+        } catch (err) {
+          errors.push({ guard: guardId, error: err.message });
+          this.emit('guard_error', { guardId, error: err.message });
+        }
+      }
+      // Phase 4: Shard Merging
+      let shardHash = '';
+      let shardCount = 1;
+      if (config.distributed) {
+        try {
+          const shards = await this.storage.fetchShards?.();
+          if (shards && shards.length > 0) {
+            shardCount = shards.length;
+            // Hash all shards together for determinism
+            const shardData = shards.map(s => s.probe_run_id).sort().join('|');
+            shardHash = await this.hashString(shardData);
+            this.emit('shards_merged', { shardCount, shardHash });
+          }
+        } catch (err) {
+          errors.push({ operation: 'shard_merge', error: err.message });
+          this.emit('shard_merge_error', { error: err.message });
+        }
+      } else {
+        shardHash = await this.hashString('');
+      }
+      // Phase 5: Artifact Generation
+      const endTime = Date.now();
+      const executionTime = endTime - startTime;
+      const artifact = {
+        version: '1.0',
+        universe_id: config.universe_id,
+        snapshot_id: config.snapshot_id || 'current',
+        generated_at: new Date(endTime).toISOString(),
+        probe_run_id: runId,
+        shard_count: shardCount,
+        shard_hash: shardHash,
+        observations: observations,
+        summary: computeArtifactSummary(observations),
+        metadata: {
+          agents_run: agentIds,
+          guards_applied: guardIds,
+          execution_time_ms: executionTime,
+          storage_backend: this.storage.type,
+          config: config
+        },
+        integrity: {
+          checksum: await hashObservations(observations),
+          verified_at: null
+        }
+      };
+      // Persistence
+      if (config.persist) {
+        try {
+          await this.storage.saveArtifact(artifact);
+          this.emit('artifact_saved', { runId, artifactSize: observations.length });
+        } catch (err) {
+          errors.push({ operation: 'persist', error: err.message });
+          this.emit('artifact_save_error', { error: err.message });
+        }
+      }
+      this.emit('scan_complete', {
+        runId,
+        status: errors.length === 0 ? 'success' : 'partial',
+        executionTime,
+        observationCount: observations.length
+      });
+      return {
+        artifact,
+        status: errors.length === 0 ? 'success' : 'partial',
+        errors
+      };
+    } catch (err) {
+      this.emit('scan_error', { runId, error: err.message });
+      throw err;
+    }
+  }
+  /**
+   * Execute single agent
+   * @param {string} agentId - Agent identifier
+   * @param {Object} config - Scan config
+   * @param {Array} observations - Shared observations array
+   * @param {Array} errors - Shared errors array
+   * @private
+   */
+  async executeAgent(agentId, config, observations, errors) {
+    const startTime = Date.now();
+    try {
+      const agent = this.agents.get(agentId);
+      if (!agent) {
+        throw new Error(`Agent not found: ${agentId}`);
+      }
+      // Call agent scan (would need store passed in real impl)
+      const results = await agent.scan(config);
+      if (Array.isArray(results)) {
+        observations.push(...results);
+      }
+      const endTime = Date.now();
+      this.emit('agent_complete', {
+        agentId,
+        observationCount: results.length,
+        latency: endTime - startTime
+      });
+    } catch (err) {
+      errors.push({ agent: agentId, error: err.message });
+      this.emit('agent_error', { agentId, error: err.message });
+    }
+  }
+  /**
+   * Hash string using Blake3
+   * @param {string} data - Data to hash
+   * @returns {Promise<string>} Hex-encoded hash
+   * @private
+   */
+  async hashString(data) {
+    // In production, use hash-wasm for Blake3
+    // For now, return placeholder
+    const encoder = new TextEncoder();
+    const buffer = encoder.encode(data);
+    // Would be: blake3(buffer).then(h => h.toString('hex'))
+    return 'blake3_placeholder_' + buffer.length.toString(16).padStart(64, '0');
+  }
+  /**
+   * Load artifact from storage
+   * @param {string} artifactId - Artifact ID
+   * @returns {Promise<Artifact>} Loaded artifact
+   */
+  async loadArtifact(artifactId) {
+    return this.storage.loadArtifact(artifactId);
+  }
+  /**
+   * List all artifacts in storage
+   * @returns {Promise<Artifact[]>} Array of artifacts
+   */
+  async listArtifacts() {
+    return this.storage.listArtifacts();
+  }
+}
+/**
+ * Create ProbeOrchestrator instance
+ * @param {Object} options - Configuration
+ * @returns {ProbeOrchestrator} New orchestrator
+ */
+export function createProbeOrchestrator(options) {
+  return new ProbeOrchestrator(options);
+}

package/src/probe.mjs ADDED Viewed

@@ -0,0 +1,58 @@
+/**
+ * @fileoverview KGC Probe - Convenience Function
+ *
+ * High-level runProbe() for executing a complete scan with one call.
+ *
+ * @module @unrdf/kgc-probe/probe
+ */
+import { createProbeOrchestrator } from './orchestrator.mjs';
+import { createMemoryStorage } from './storage/index.mjs';
+/**
+ * Run full probe scan with sensible defaults
+ *
+ * Convenience function that:
+ * 1. Creates orchestrator with default storage
+ * 2. Runs all agents and guards
+ * 3. Returns artifact directly
+ *
+ * @param {Object} config - Probe configuration
+ * @param {string} config.universe_id - Universe to scan
+ * @param {string} [config.snapshot_id] - Optional snapshot
+ * @param {Object} [config.storage] - Custom storage backend
+ * @returns {Promise<Artifact>} Generated artifact
+ *
+ * @example
+ * const artifact = await runProbe({
+ *   universe_id: 'my-universe',
+ *   snapshot_id: 'snap_123'
+ * });
+ *
+ * console.log(artifact.summary);
+ */
+export async function runProbe(config) {
+  if (!config || !config.universe_id) {
+    throw new Error('runProbe requires universe_id in config');
+  }
+  // Create orchestrator with provided or default storage
+  const storage = config.storage || createMemoryStorage();
+  const orchestrator = createProbeOrchestrator({ storage });
+  // Run scan
+  const result = await orchestrator.scan({
+    universe_id: config.universe_id,
+    snapshot_id: config.snapshot_id,
+    agents: config.agents,
+    guards: config.guards,
+    distributed: config.distributed || false,
+    persist: config.persist !== false // Default true
+  });
+  if (result.status === 'failed') {
+    throw new Error(`Probe scan failed: ${result.errors.map(e => e.error).join(', ')}`);
+  }
+  return result.artifact;
+}