npm - codebot-ai - Versions diffs - 1.8.0 → 1.9.0 - Mend

codebot-ai 1.8.0 → 1.9.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (13) hide show

package/dist/agent.d.ts CHANGED Viewed

@@ -2,6 +2,8 @@ import { Message, AgentEvent, LLMProvider } from './types';
 import { AuditLogger } from './audit';
 import { PolicyEnforcer } from './policy';
 import { TokenTracker } from './telemetry';
+import { MetricsCollector } from './metrics';
+import { RiskScorer } from './risk';
 export declare class Agent {
     private provider;
     private tools;
@@ -15,6 +17,8 @@ export declare class Agent {
     private auditLogger;
     private policyEnforcer;
     private tokenTracker;
+    private metricsCollector;
+    private riskScorer;
     private branchCreated;
     private askPermission;
     private onMessage?;
@@ -44,6 +48,10 @@ export declare class Agent {
     getPolicyEnforcer(): PolicyEnforcer;
     /** Get the audit logger for verification */
     getAuditLogger(): AuditLogger;
+    /** Get the metrics collector for session metrics */
+    getMetrics(): MetricsCollector;
+    /** Get the risk scorer for risk assessment history */
+    getRiskScorer(): RiskScorer;
     /**
      * Validate and repair message history to prevent OpenAI 400 errors.
      * Handles three types of corruption:

package/dist/agent.js CHANGED Viewed

@@ -48,6 +48,8 @@ const rate_limiter_1 = require("./rate-limiter");
 const audit_1 = require("./audit");
 const policy_1 = require("./policy");
 const telemetry_1 = require("./telemetry");
+const metrics_1 = require("./metrics");
+const risk_1 = require("./risk");
 /** Lightweight schema validation — returns error string or null if valid */
 function validateToolArgs(args, schema) {
     const props = schema.properties;
@@ -104,6 +106,8 @@ class Agent {
     auditLogger;
     policyEnforcer;
     tokenTracker;
+    metricsCollector;
+    riskScorer;
     branchCreated = false;
     askPermission;
     onMessage;
@@ -124,6 +128,8 @@ class Agent {
         this.auditLogger = new audit_1.AuditLogger();
         // Token & cost tracking
         this.tokenTracker = new telemetry_1.TokenTracker(opts.model, opts.providerName || 'unknown');
+        this.metricsCollector = new metrics_1.MetricsCollector();
+        this.riskScorer = new risk_1.RiskScorer();
         const costLimit = this.policyEnforcer.getCostLimitUsd();
         if (costLimit > 0)
             this.tokenTracker.setCostLimit(costLimit);
@@ -196,6 +202,9 @@ class Agent {
                             // Track tokens and cost
                             if (event.usage) {
                                 this.tokenTracker.recordUsage(event.usage.inputTokens || 0, event.usage.outputTokens || 0);
+                                this.metricsCollector.increment('llm_requests_total');
+                                this.metricsCollector.increment('llm_tokens_total', { direction: 'input' }, event.usage.inputTokens || 0);
+                                this.metricsCollector.increment('llm_tokens_total', { direction: 'output' }, event.usage.outputTokens || 0);
                             }
                             yield { type: 'usage', usage: event.usage };
                             break;
@@ -282,10 +291,17 @@ class Agent {
                     prepared.push({ tc, tool, args, denied: false, error: `Error: ${validationError} for ${toolName}` });
                     continue;
                 }
-                yield { type: 'tool_call', toolCall: { name: toolName, args } };
-                // Permission check: policy override > tool default
+                // Compute risk score before execution
                 const policyPermission = this.policyEnforcer.getToolPermission(toolName);
                 const effectivePermission = policyPermission || tool.permission;
+                const riskAssessment = this.riskScorer.assess(toolName, args, effectivePermission);
+                yield { type: 'tool_call', toolCall: { name: toolName, args }, risk: { score: riskAssessment.score, level: riskAssessment.level } };
+                // Log risk breakdown for high-risk calls
+                if (riskAssessment.score > 50) {
+                    const breakdown = riskAssessment.factors.map(f => `${f.name}=${f.rawScore}`).join(', ');
+                    this.auditLogger.log({ tool: toolName, action: 'execute', args, result: `risk:${riskAssessment.score}`, reason: breakdown });
+                }
+                // Permission check: policy override > tool default
                 const needsPermission = effectivePermission === 'always-ask' ||
                     (effectivePermission === 'prompt' && !this.autoApprove);
                 let denied = false;
@@ -294,6 +310,7 @@ class Agent {
                     if (!approved) {
                         denied = true;
                         this.auditLogger.log({ tool: toolName, action: 'deny', args, reason: 'User denied permission' });
+                        this.metricsCollector.increment('permission_denials_total', { tool: toolName });
                     }
                 }
                 prepared.push({ tc, tool, args, denied });
@@ -324,9 +341,10 @@ class Agent {
                     parallelBatch.push(item);
                 }
             }
-            // Helper to execute a single tool with cache + rate limiting
+            // Helper to execute a single tool with cache + rate limiting + metrics
             const executeTool = async (prep) => {
                 const toolName = prep.tc.function.name;
+                const toolStartTime = Date.now();
                 // Auto-branch on first write/edit when always_branch is enabled (v1.8.0)
                 if (toolName === 'write_file' || toolName === 'edit_file' || toolName === 'batch_edit') {
                     const branchName = await this.ensureBranch();
@@ -338,6 +356,7 @@ class Agent {
                 const capBlock = this.checkToolCapabilities(toolName, prep.args);
                 if (capBlock) {
                     this.auditLogger.log({ tool: toolName, action: 'capability_block', args: prep.args, reason: capBlock });
+                    this.metricsCollector.increment('security_blocks_total', { tool: toolName, type: 'capability' });
                     return { content: `Error: ${capBlock}`, is_error: true };
                 }
                 // Check cache first
@@ -345,19 +364,30 @@ class Agent {
                     const cacheKey = cache_1.ToolCache.key(toolName, prep.args);
                     const cached = this.cache.get(cacheKey);
                     if (cached !== null) {
+                        this.metricsCollector.increment('cache_hits_total', { tool: toolName });
                         return { content: cached };
                     }
+                    this.metricsCollector.increment('cache_misses_total', { tool: toolName });
                 }
                 // Rate limit
                 await this.rateLimiter.throttle(toolName);
                 try {
                     const output = await prep.tool.execute(prep.args);
+                    // Record tool latency
+                    const latencyMs = Date.now() - toolStartTime;
+                    this.metricsCollector.observe('tool_latency_seconds', latencyMs / 1000, { tool: toolName });
+                    this.metricsCollector.increment('tool_calls_total', { tool: toolName });
                     // Audit log: successful execution
                     this.auditLogger.log({ tool: toolName, action: 'execute', args: prep.args, result: 'success' });
                     // Telemetry: track tool calls and file modifications
                     this.tokenTracker.recordToolCall();
                     if ((toolName === 'write_file' || toolName === 'edit_file' || toolName === 'batch_edit') && prep.args.path) {
                         this.tokenTracker.recordFileModified(prep.args.path);
+                        this.metricsCollector.increment('files_written_total', { tool: toolName });
+                    }
+                    // Track commands executed
+                    if (toolName === 'execute') {
+                        this.metricsCollector.increment('commands_executed_total');
                     }
                     // Store in cache for cacheable tools
                     if (prep.tool.cacheable) {
@@ -373,11 +403,16 @@ class Agent {
                     // Audit log: check if tool returned a security block
                     if (output.startsWith('Error: Blocked:') || output.startsWith('Error: CWD')) {
                         this.auditLogger.log({ tool: toolName, action: 'security_block', args: prep.args, reason: output });
+                        this.metricsCollector.increment('security_blocks_total', { tool: toolName, type: 'security' });
                     }
                     return { content: output };
                 }
                 catch (err) {
                     const errMsg = err instanceof Error ? err.message : String(err);
+                    // Record latency even on error
+                    const latencyMs = Date.now() - toolStartTime;
+                    this.metricsCollector.observe('tool_latency_seconds', latencyMs / 1000, { tool: toolName });
+                    this.metricsCollector.increment('errors_total', { tool: toolName });
                     // Audit log: error
                     this.auditLogger.log({ tool: toolName, action: 'error', args: prep.args, result: 'error', reason: errMsg });
                     return { content: `Error: ${errMsg}`, is_error: true };
@@ -448,6 +483,14 @@ class Agent {
     getAuditLogger() {
         return this.auditLogger;
     }
+    /** Get the metrics collector for session metrics */
+    getMetrics() {
+        return this.metricsCollector;
+    }
+    /** Get the risk scorer for risk assessment history */
+    getRiskScorer() {
+        return this.riskScorer;
+    }
     /**
      * Validate and repair message history to prevent OpenAI 400 errors.
      * Handles three types of corruption:

package/dist/cli.js CHANGED Viewed

@@ -50,7 +50,9 @@ const audit_1 = require("./audit");
 const policy_1 = require("./policy");
 const sandbox_1 = require("./sandbox");
 const replay_1 = require("./replay");
-const VERSION = '1.8.0';
+const risk_1 = require("./risk");
+const sarif_1 = require("./sarif");
+const VERSION = '1.9.0';
 const C = {
     reset: '\x1b[0m',
     bold: '\x1b[1m',
@@ -228,6 +230,19 @@ async function main() {
         }
         return;
     }
+    // --export-audit sarif: Export audit log as SARIF 2.1.0
+    if (args['export-audit'] === 'sarif' || args['export-audit'] === true) {
+        const logger = new audit_1.AuditLogger();
+        const sessionId = typeof args['session'] === 'string' ? args['session'] : undefined;
+        const entries = sessionId ? logger.query({ sessionId }) : logger.query();
+        if (entries.length === 0) {
+            console.error(c('No audit entries found.', 'yellow'));
+            process.exit(1);
+        }
+        const sarif = (0, sarif_1.exportSarif)(entries, { version: VERSION, sessionId });
+        process.stdout.write((0, sarif_1.sarifToString)(sarif) + '\n');
+        return;
+    }
     // First run: auto-launch setup if nothing is configured
     if ((0, setup_1.isFirstRun)() && process.stdin.isTTY && !args.message) {
         console.log(c('Welcome! No configuration found — launching setup...', 'cyan'));
@@ -299,7 +314,7 @@ async function main() {
     // Cleanup scheduler on exit
     scheduler.stop();
 }
-/** Print session summary with tokens, cost, tool calls, files modified */
+/** Print session summary with tokens, cost, tool calls, files modified, metrics */
 function printSessionSummary(agent) {
     const tracker = agent.getTokenTracker();
     tracker.saveUsage();
@@ -314,6 +329,27 @@ function printSessionSummary(agent) {
     console.log(`  Requests:  ${summary.requestCount}`);
     console.log(`  Tools:     ${summary.toolCalls} calls`);
     console.log(`  Files:     ${summary.filesModified} modified`);
+    // v1.9.0: Per-tool breakdown from MetricsCollector
+    const metrics = agent.getMetrics();
+    const snap = metrics.snapshot();
+    const toolCounters = snap.counters.filter(c => c.name === 'tool_calls_total');
+    if (toolCounters.length > 0) {
+        console.log(c('  Per-tool:', 'dim'));
+        for (const tc of toolCounters.sort((a, b) => b.value - a.value)) {
+            const hist = snap.histograms.find(h => h.name === 'tool_latency_seconds' && h.labels.tool === tc.labels.tool);
+            const avg = hist && hist.count > 0 ? (hist.sum / hist.count * 1000).toFixed(0) : '?';
+            console.log(c(`    ${tc.labels.tool}: ${tc.value} calls (avg ${avg}ms)`, 'dim'));
+        }
+    }
+    // Risk summary
+    const riskScorer = agent.getRiskScorer();
+    const riskAvg = riskScorer.getSessionAverage();
+    if (riskScorer.getHistory().length > 0) {
+        console.log(`  Risk:      avg ${riskAvg}/100`);
+    }
+    // Save metrics
+    metrics.save();
+    metrics.exportOtel();
 }
 function createProvider(config) {
     if (config.provider === 'anthropic') {
@@ -393,8 +429,14 @@ function renderEvent(event, agent) {
                 process.stdout.write('\n');
                 isThinking = false;
             }
-            console.log(c(`\n⚡ ${event.toolCall?.name}`, 'yellow') +
-                c(`(${formatArgs(event.toolCall?.args)})`, 'dim'));
+            {
+                const riskStr = event.risk
+                    ? ' ' + risk_1.RiskScorer.formatIndicator({ score: event.risk.score, level: event.risk.level, factors: [] })
+                    : '';
+                console.log(c(`\n⚡ ${event.toolCall?.name}`, 'yellow') +
+                    c(`(${formatArgs(event.toolCall?.args)})`, 'dim') +
+                    riskStr);
+            }
             break;
         case 'tool_result':
             if (event.toolResult?.is_error) {
@@ -467,6 +509,8 @@ function handleSlashCommand(input, agent, config) {
   /undo      Undo last file edit (/undo [path])
   /usage     Show token usage & cost for this session
   /cost      Show running cost
+  /metrics   Show session metrics (counters + histograms)
+  /risk      Show risk assessment summary
   /policy    Show current security policy
   /audit     Verify audit chain for this session
   /config    Show current config
@@ -543,6 +587,26 @@ function handleSlashCommand(input, agent, config) {
             console.log(c(`  ${tracker.formatStatusLine()}`, 'dim'));
             break;
         }
+        case '/metrics': {
+            const metricsOutput = agent.getMetrics().formatSummary();
+            console.log('\n' + metricsOutput);
+            break;
+        }
+        case '/risk': {
+            const riskHistory = agent.getRiskScorer().getHistory();
+            if (riskHistory.length === 0) {
+                console.log(c('No risk assessments yet.', 'dim'));
+            }
+            else {
+                const avg = agent.getRiskScorer().getSessionAverage();
+                console.log(c(`\nRisk Summary: ${riskHistory.length} assessments, avg ${avg}/100`, 'bold'));
+                const last5 = riskHistory.slice(-5);
+                for (const a of last5) {
+                    console.log(`  ${risk_1.RiskScorer.formatIndicator(a)}`);
+                }
+            }
+            break;
+        }
         case '/policy': {
             const policy = agent.getPolicyEnforcer().getPolicy();
             console.log(c('\nCurrent Policy:', 'bold'));
@@ -702,6 +766,17 @@ function parseArgs(argv) {
             }
             continue;
         }
+        if (arg === '--export-audit') {
+            const next = argv[i + 1];
+            if (next && !next.startsWith('--')) {
+                result['export-audit'] = next;
+                i++;
+            }
+            else {
+                result['export-audit'] = true;
+            }
+            continue;
+        }
         if (arg === '--replay') {
             const next = argv[i + 1];
             if (next && !next.startsWith('--')) {
@@ -770,6 +845,7 @@ ${c('Options:', 'bold')}
 ${c('Security & Policy:', 'bold')}
   --init-policy        Generate default .codebot/policy.json
   --verify-audit [id]  Verify audit log hash chain integrity
+  --export-audit sarif Export audit log as SARIF 2.1.0 JSON
   --sandbox-info       Show Docker sandbox status
 ${c('Debugging & Replay:', 'bold')}
@@ -795,6 +871,7 @@ ${c('Examples:', 'bold')}
   codebot --autonomous "refactor src/"     Full auto, no prompts
   codebot --init-policy                    Create security policy
   codebot --verify-audit                   Check audit integrity
+  codebot --export-audit sarif > r.sarif   Export SARIF report
 ${c('Interactive Commands:', 'bold')}
   /help      Show commands
@@ -806,6 +883,8 @@ ${c('Interactive Commands:', 'bold')}
   /compact   Force context compaction
   /usage     Show token usage & cost
   /cost      Show running cost
+  /metrics   Show session metrics
+  /risk      Show risk assessment summary
   /policy    Show security policy
   /audit     Verify session audit chain
   /config    Show configuration

package/dist/index.d.ts CHANGED Viewed

@@ -17,5 +17,11 @@ export { deriveSessionKey, signMessage, verifyMessage, verifyMessages } from './
 export type { IntegrityResult } from './integrity';
 export { ReplayProvider, loadSessionForReplay, compareOutputs, listReplayableSessions } from './replay';
 export type { SessionReplayData, ReplayDivergence } from './replay';
+export { MetricsCollector } from './metrics';
+export type { MetricsSnapshot, CounterValue, HistogramValue } from './metrics';
+export { RiskScorer } from './risk';
+export type { RiskAssessment, RiskFactor } from './risk';
+export { exportSarif, sarifToString } from './sarif';
+export type { SarifLog, SarifResult, SarifRule } from './sarif';
 export * from './types';
 //# sourceMappingURL=index.d.ts.map

package/dist/index.js CHANGED Viewed

@@ -14,7 +14,7 @@ var __exportStar = (this && this.__exportStar) || function(m, exports) {
     for (var p in m) if (p !== "default" && !Object.prototype.hasOwnProperty.call(exports, p)) __createBinding(exports, m, p);
 };
 Object.defineProperty(exports, "__esModule", { value: true });
-exports.listReplayableSessions = exports.compareOutputs = exports.loadSessionForReplay = exports.ReplayProvider = exports.verifyMessages = exports.verifyMessage = exports.signMessage = exports.deriveSessionKey = exports.CapabilityChecker = exports.detectProvider = exports.getModelInfo = exports.PROVIDER_DEFAULTS = exports.MODEL_REGISTRY = exports.loadMCPTools = exports.loadPlugins = exports.parseToolCalls = exports.MemoryManager = exports.SessionManager = exports.buildRepoMap = exports.ContextManager = exports.ToolRegistry = exports.AnthropicProvider = exports.OpenAIProvider = exports.Agent = void 0;
+exports.sarifToString = exports.exportSarif = exports.RiskScorer = exports.MetricsCollector = exports.listReplayableSessions = exports.compareOutputs = exports.loadSessionForReplay = exports.ReplayProvider = exports.verifyMessages = exports.verifyMessage = exports.signMessage = exports.deriveSessionKey = exports.CapabilityChecker = exports.detectProvider = exports.getModelInfo = exports.PROVIDER_DEFAULTS = exports.MODEL_REGISTRY = exports.loadMCPTools = exports.loadPlugins = exports.parseToolCalls = exports.MemoryManager = exports.SessionManager = exports.buildRepoMap = exports.ContextManager = exports.ToolRegistry = exports.AnthropicProvider = exports.OpenAIProvider = exports.Agent = void 0;
 var agent_1 = require("./agent");
 Object.defineProperty(exports, "Agent", { enumerable: true, get: function () { return agent_1.Agent; } });
 var openai_1 = require("./providers/openai");
@@ -54,5 +54,12 @@ Object.defineProperty(exports, "ReplayProvider", { enumerable: true, get: functi
 Object.defineProperty(exports, "loadSessionForReplay", { enumerable: true, get: function () { return replay_1.loadSessionForReplay; } });
 Object.defineProperty(exports, "compareOutputs", { enumerable: true, get: function () { return replay_1.compareOutputs; } });
 Object.defineProperty(exports, "listReplayableSessions", { enumerable: true, get: function () { return replay_1.listReplayableSessions; } });
+var metrics_1 = require("./metrics");
+Object.defineProperty(exports, "MetricsCollector", { enumerable: true, get: function () { return metrics_1.MetricsCollector; } });
+var risk_1 = require("./risk");
+Object.defineProperty(exports, "RiskScorer", { enumerable: true, get: function () { return risk_1.RiskScorer; } });
+var sarif_1 = require("./sarif");
+Object.defineProperty(exports, "exportSarif", { enumerable: true, get: function () { return sarif_1.exportSarif; } });
+Object.defineProperty(exports, "sarifToString", { enumerable: true, get: function () { return sarif_1.sarifToString; } });
 __exportStar(require("./types"), exports);
 //# sourceMappingURL=index.js.map

package/dist/metrics.d.ts ADDED Viewed

@@ -0,0 +1,60 @@
+/**
+ * MetricsCollector for CodeBot v1.9.0
+ *
+ * Structured telemetry: counters + histograms.
+ * Persists to ~/.codebot/telemetry/metrics-YYYY-MM-DD.jsonl
+ * Optional OTLP HTTP export when OTEL_EXPORTER_OTLP_ENDPOINT is set.
+ *
+ * Pattern: fail-safe, session-scoped, never throws.
+ * Follows TokenTracker conventions from src/telemetry.ts.
+ */
+export interface CounterValue {
+    name: string;
+    labels: Record<string, string>;
+    value: number;
+}
+export interface HistogramValue {
+    name: string;
+    labels: Record<string, string>;
+    count: number;
+    sum: number;
+    min: number;
+    max: number;
+    buckets: number[];
+}
+export interface MetricsSnapshot {
+    sessionId: string;
+    timestamp: string;
+    counters: CounterValue[];
+    histograms: HistogramValue[];
+}
+export declare class MetricsCollector {
+    private sessionId;
+    private counters;
+    private histograms;
+    constructor(sessionId?: string);
+    getSessionId(): string;
+    /** Increment a counter by delta (default 1) */
+    increment(name: string, labels?: Record<string, string>, delta?: number): void;
+    /** Record a histogram observation */
+    observe(name: string, value: number, labels?: Record<string, string>): void;
+    /** Read a counter value */
+    getCounter(name: string, labels?: Record<string, string>): number;
+    /** Read a histogram summary */
+    getHistogram(name: string, labels?: Record<string, string>): HistogramValue | null;
+    /** Full session snapshot */
+    snapshot(): MetricsSnapshot;
+    /** Persist snapshot to ~/.codebot/telemetry/metrics-YYYY-MM-DD.jsonl */
+    save(sessionId?: string): void;
+    /** Human-readable per-tool breakdown */
+    formatSummary(): string;
+    /**
+     * Export snapshot in OTLP JSON format via HTTP POST.
+     * Only fires when OTEL_EXPORTER_OTLP_ENDPOINT is set.
+     * Fails silently — never blocks or crashes.
+     */
+    exportOtel(snap?: MetricsSnapshot): void;
+    /** Build OTLP-compatible JSON payload */
+    private buildOtlpPayload;
+}
+//# sourceMappingURL=metrics.d.ts.map