codebot-ai 1.8.0 → 1.9.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/agent.d.ts CHANGED
@@ -2,6 +2,8 @@ import { Message, AgentEvent, LLMProvider } from './types';
2
2
  import { AuditLogger } from './audit';
3
3
  import { PolicyEnforcer } from './policy';
4
4
  import { TokenTracker } from './telemetry';
5
+ import { MetricsCollector } from './metrics';
6
+ import { RiskScorer } from './risk';
5
7
  export declare class Agent {
6
8
  private provider;
7
9
  private tools;
@@ -15,6 +17,8 @@ export declare class Agent {
15
17
  private auditLogger;
16
18
  private policyEnforcer;
17
19
  private tokenTracker;
20
+ private metricsCollector;
21
+ private riskScorer;
18
22
  private branchCreated;
19
23
  private askPermission;
20
24
  private onMessage?;
@@ -44,6 +48,10 @@ export declare class Agent {
44
48
  getPolicyEnforcer(): PolicyEnforcer;
45
49
  /** Get the audit logger for verification */
46
50
  getAuditLogger(): AuditLogger;
51
+ /** Get the metrics collector for session metrics */
52
+ getMetrics(): MetricsCollector;
53
+ /** Get the risk scorer for risk assessment history */
54
+ getRiskScorer(): RiskScorer;
47
55
  /**
48
56
  * Validate and repair message history to prevent OpenAI 400 errors.
49
57
  * Handles three types of corruption:
package/dist/agent.js CHANGED
@@ -48,6 +48,8 @@ const rate_limiter_1 = require("./rate-limiter");
48
48
  const audit_1 = require("./audit");
49
49
  const policy_1 = require("./policy");
50
50
  const telemetry_1 = require("./telemetry");
51
+ const metrics_1 = require("./metrics");
52
+ const risk_1 = require("./risk");
51
53
  /** Lightweight schema validation — returns error string or null if valid */
52
54
  function validateToolArgs(args, schema) {
53
55
  const props = schema.properties;
@@ -104,6 +106,8 @@ class Agent {
104
106
  auditLogger;
105
107
  policyEnforcer;
106
108
  tokenTracker;
109
+ metricsCollector;
110
+ riskScorer;
107
111
  branchCreated = false;
108
112
  askPermission;
109
113
  onMessage;
@@ -124,6 +128,8 @@ class Agent {
124
128
  this.auditLogger = new audit_1.AuditLogger();
125
129
  // Token & cost tracking
126
130
  this.tokenTracker = new telemetry_1.TokenTracker(opts.model, opts.providerName || 'unknown');
131
+ this.metricsCollector = new metrics_1.MetricsCollector();
132
+ this.riskScorer = new risk_1.RiskScorer();
127
133
  const costLimit = this.policyEnforcer.getCostLimitUsd();
128
134
  if (costLimit > 0)
129
135
  this.tokenTracker.setCostLimit(costLimit);
@@ -196,6 +202,9 @@ class Agent {
196
202
  // Track tokens and cost
197
203
  if (event.usage) {
198
204
  this.tokenTracker.recordUsage(event.usage.inputTokens || 0, event.usage.outputTokens || 0);
205
+ this.metricsCollector.increment('llm_requests_total');
206
+ this.metricsCollector.increment('llm_tokens_total', { direction: 'input' }, event.usage.inputTokens || 0);
207
+ this.metricsCollector.increment('llm_tokens_total', { direction: 'output' }, event.usage.outputTokens || 0);
199
208
  }
200
209
  yield { type: 'usage', usage: event.usage };
201
210
  break;
@@ -282,10 +291,17 @@ class Agent {
282
291
  prepared.push({ tc, tool, args, denied: false, error: `Error: ${validationError} for ${toolName}` });
283
292
  continue;
284
293
  }
285
- yield { type: 'tool_call', toolCall: { name: toolName, args } };
286
- // Permission check: policy override > tool default
294
+ // Compute risk score before execution
287
295
  const policyPermission = this.policyEnforcer.getToolPermission(toolName);
288
296
  const effectivePermission = policyPermission || tool.permission;
297
+ const riskAssessment = this.riskScorer.assess(toolName, args, effectivePermission);
298
+ yield { type: 'tool_call', toolCall: { name: toolName, args }, risk: { score: riskAssessment.score, level: riskAssessment.level } };
299
+ // Log risk breakdown for high-risk calls
300
+ if (riskAssessment.score > 50) {
301
+ const breakdown = riskAssessment.factors.map(f => `${f.name}=${f.rawScore}`).join(', ');
302
+ this.auditLogger.log({ tool: toolName, action: 'execute', args, result: `risk:${riskAssessment.score}`, reason: breakdown });
303
+ }
304
+ // Permission check: policy override > tool default
289
305
  const needsPermission = effectivePermission === 'always-ask' ||
290
306
  (effectivePermission === 'prompt' && !this.autoApprove);
291
307
  let denied = false;
@@ -294,6 +310,7 @@ class Agent {
294
310
  if (!approved) {
295
311
  denied = true;
296
312
  this.auditLogger.log({ tool: toolName, action: 'deny', args, reason: 'User denied permission' });
313
+ this.metricsCollector.increment('permission_denials_total', { tool: toolName });
297
314
  }
298
315
  }
299
316
  prepared.push({ tc, tool, args, denied });
@@ -324,9 +341,10 @@ class Agent {
324
341
  parallelBatch.push(item);
325
342
  }
326
343
  }
327
- // Helper to execute a single tool with cache + rate limiting
344
+ // Helper to execute a single tool with cache + rate limiting + metrics
328
345
  const executeTool = async (prep) => {
329
346
  const toolName = prep.tc.function.name;
347
+ const toolStartTime = Date.now();
330
348
  // Auto-branch on first write/edit when always_branch is enabled (v1.8.0)
331
349
  if (toolName === 'write_file' || toolName === 'edit_file' || toolName === 'batch_edit') {
332
350
  const branchName = await this.ensureBranch();
@@ -338,6 +356,7 @@ class Agent {
338
356
  const capBlock = this.checkToolCapabilities(toolName, prep.args);
339
357
  if (capBlock) {
340
358
  this.auditLogger.log({ tool: toolName, action: 'capability_block', args: prep.args, reason: capBlock });
359
+ this.metricsCollector.increment('security_blocks_total', { tool: toolName, type: 'capability' });
341
360
  return { content: `Error: ${capBlock}`, is_error: true };
342
361
  }
343
362
  // Check cache first
@@ -345,19 +364,30 @@ class Agent {
345
364
  const cacheKey = cache_1.ToolCache.key(toolName, prep.args);
346
365
  const cached = this.cache.get(cacheKey);
347
366
  if (cached !== null) {
367
+ this.metricsCollector.increment('cache_hits_total', { tool: toolName });
348
368
  return { content: cached };
349
369
  }
370
+ this.metricsCollector.increment('cache_misses_total', { tool: toolName });
350
371
  }
351
372
  // Rate limit
352
373
  await this.rateLimiter.throttle(toolName);
353
374
  try {
354
375
  const output = await prep.tool.execute(prep.args);
376
+ // Record tool latency
377
+ const latencyMs = Date.now() - toolStartTime;
378
+ this.metricsCollector.observe('tool_latency_seconds', latencyMs / 1000, { tool: toolName });
379
+ this.metricsCollector.increment('tool_calls_total', { tool: toolName });
355
380
  // Audit log: successful execution
356
381
  this.auditLogger.log({ tool: toolName, action: 'execute', args: prep.args, result: 'success' });
357
382
  // Telemetry: track tool calls and file modifications
358
383
  this.tokenTracker.recordToolCall();
359
384
  if ((toolName === 'write_file' || toolName === 'edit_file' || toolName === 'batch_edit') && prep.args.path) {
360
385
  this.tokenTracker.recordFileModified(prep.args.path);
386
+ this.metricsCollector.increment('files_written_total', { tool: toolName });
387
+ }
388
+ // Track commands executed
389
+ if (toolName === 'execute') {
390
+ this.metricsCollector.increment('commands_executed_total');
361
391
  }
362
392
  // Store in cache for cacheable tools
363
393
  if (prep.tool.cacheable) {
@@ -373,11 +403,16 @@ class Agent {
373
403
  // Audit log: check if tool returned a security block
374
404
  if (output.startsWith('Error: Blocked:') || output.startsWith('Error: CWD')) {
375
405
  this.auditLogger.log({ tool: toolName, action: 'security_block', args: prep.args, reason: output });
406
+ this.metricsCollector.increment('security_blocks_total', { tool: toolName, type: 'security' });
376
407
  }
377
408
  return { content: output };
378
409
  }
379
410
  catch (err) {
380
411
  const errMsg = err instanceof Error ? err.message : String(err);
412
+ // Record latency even on error
413
+ const latencyMs = Date.now() - toolStartTime;
414
+ this.metricsCollector.observe('tool_latency_seconds', latencyMs / 1000, { tool: toolName });
415
+ this.metricsCollector.increment('errors_total', { tool: toolName });
381
416
  // Audit log: error
382
417
  this.auditLogger.log({ tool: toolName, action: 'error', args: prep.args, result: 'error', reason: errMsg });
383
418
  return { content: `Error: ${errMsg}`, is_error: true };
@@ -448,6 +483,14 @@ class Agent {
448
483
  getAuditLogger() {
449
484
  return this.auditLogger;
450
485
  }
486
+ /** Get the metrics collector for session metrics */
487
+ getMetrics() {
488
+ return this.metricsCollector;
489
+ }
490
+ /** Get the risk scorer for risk assessment history */
491
+ getRiskScorer() {
492
+ return this.riskScorer;
493
+ }
451
494
  /**
452
495
  * Validate and repair message history to prevent OpenAI 400 errors.
453
496
  * Handles three types of corruption:
package/dist/cli.js CHANGED
@@ -50,7 +50,9 @@ const audit_1 = require("./audit");
50
50
  const policy_1 = require("./policy");
51
51
  const sandbox_1 = require("./sandbox");
52
52
  const replay_1 = require("./replay");
53
- const VERSION = '1.8.0';
53
+ const risk_1 = require("./risk");
54
+ const sarif_1 = require("./sarif");
55
+ const VERSION = '1.9.0';
54
56
  const C = {
55
57
  reset: '\x1b[0m',
56
58
  bold: '\x1b[1m',
@@ -228,6 +230,19 @@ async function main() {
228
230
  }
229
231
  return;
230
232
  }
233
+ // --export-audit sarif: Export audit log as SARIF 2.1.0
234
+ if (args['export-audit'] === 'sarif' || args['export-audit'] === true) {
235
+ const logger = new audit_1.AuditLogger();
236
+ const sessionId = typeof args['session'] === 'string' ? args['session'] : undefined;
237
+ const entries = sessionId ? logger.query({ sessionId }) : logger.query();
238
+ if (entries.length === 0) {
239
+ console.error(c('No audit entries found.', 'yellow'));
240
+ process.exit(1);
241
+ }
242
+ const sarif = (0, sarif_1.exportSarif)(entries, { version: VERSION, sessionId });
243
+ process.stdout.write((0, sarif_1.sarifToString)(sarif) + '\n');
244
+ return;
245
+ }
231
246
  // First run: auto-launch setup if nothing is configured
232
247
  if ((0, setup_1.isFirstRun)() && process.stdin.isTTY && !args.message) {
233
248
  console.log(c('Welcome! No configuration found — launching setup...', 'cyan'));
@@ -299,7 +314,7 @@ async function main() {
299
314
  // Cleanup scheduler on exit
300
315
  scheduler.stop();
301
316
  }
302
- /** Print session summary with tokens, cost, tool calls, files modified */
317
+ /** Print session summary with tokens, cost, tool calls, files modified, metrics */
303
318
  function printSessionSummary(agent) {
304
319
  const tracker = agent.getTokenTracker();
305
320
  tracker.saveUsage();
@@ -314,6 +329,27 @@ function printSessionSummary(agent) {
314
329
  console.log(` Requests: ${summary.requestCount}`);
315
330
  console.log(` Tools: ${summary.toolCalls} calls`);
316
331
  console.log(` Files: ${summary.filesModified} modified`);
332
+ // v1.9.0: Per-tool breakdown from MetricsCollector
333
+ const metrics = agent.getMetrics();
334
+ const snap = metrics.snapshot();
335
+ const toolCounters = snap.counters.filter(c => c.name === 'tool_calls_total');
336
+ if (toolCounters.length > 0) {
337
+ console.log(c(' Per-tool:', 'dim'));
338
+ for (const tc of toolCounters.sort((a, b) => b.value - a.value)) {
339
+ const hist = snap.histograms.find(h => h.name === 'tool_latency_seconds' && h.labels.tool === tc.labels.tool);
340
+ const avg = hist && hist.count > 0 ? (hist.sum / hist.count * 1000).toFixed(0) : '?';
341
+ console.log(c(` ${tc.labels.tool}: ${tc.value} calls (avg ${avg}ms)`, 'dim'));
342
+ }
343
+ }
344
+ // Risk summary
345
+ const riskScorer = agent.getRiskScorer();
346
+ const riskAvg = riskScorer.getSessionAverage();
347
+ if (riskScorer.getHistory().length > 0) {
348
+ console.log(` Risk: avg ${riskAvg}/100`);
349
+ }
350
+ // Save metrics
351
+ metrics.save();
352
+ metrics.exportOtel();
317
353
  }
318
354
  function createProvider(config) {
319
355
  if (config.provider === 'anthropic') {
@@ -393,8 +429,14 @@ function renderEvent(event, agent) {
393
429
  process.stdout.write('\n');
394
430
  isThinking = false;
395
431
  }
396
- console.log(c(`\n⚡ ${event.toolCall?.name}`, 'yellow') +
397
- c(`(${formatArgs(event.toolCall?.args)})`, 'dim'));
432
+ {
433
+ const riskStr = event.risk
434
+ ? ' ' + risk_1.RiskScorer.formatIndicator({ score: event.risk.score, level: event.risk.level, factors: [] })
435
+ : '';
436
+ console.log(c(`\n⚡ ${event.toolCall?.name}`, 'yellow') +
437
+ c(`(${formatArgs(event.toolCall?.args)})`, 'dim') +
438
+ riskStr);
439
+ }
398
440
  break;
399
441
  case 'tool_result':
400
442
  if (event.toolResult?.is_error) {
@@ -467,6 +509,8 @@ function handleSlashCommand(input, agent, config) {
467
509
  /undo Undo last file edit (/undo [path])
468
510
  /usage Show token usage & cost for this session
469
511
  /cost Show running cost
512
+ /metrics Show session metrics (counters + histograms)
513
+ /risk Show risk assessment summary
470
514
  /policy Show current security policy
471
515
  /audit Verify audit chain for this session
472
516
  /config Show current config
@@ -543,6 +587,26 @@ function handleSlashCommand(input, agent, config) {
543
587
  console.log(c(` ${tracker.formatStatusLine()}`, 'dim'));
544
588
  break;
545
589
  }
590
+ case '/metrics': {
591
+ const metricsOutput = agent.getMetrics().formatSummary();
592
+ console.log('\n' + metricsOutput);
593
+ break;
594
+ }
595
+ case '/risk': {
596
+ const riskHistory = agent.getRiskScorer().getHistory();
597
+ if (riskHistory.length === 0) {
598
+ console.log(c('No risk assessments yet.', 'dim'));
599
+ }
600
+ else {
601
+ const avg = agent.getRiskScorer().getSessionAverage();
602
+ console.log(c(`\nRisk Summary: ${riskHistory.length} assessments, avg ${avg}/100`, 'bold'));
603
+ const last5 = riskHistory.slice(-5);
604
+ for (const a of last5) {
605
+ console.log(` ${risk_1.RiskScorer.formatIndicator(a)}`);
606
+ }
607
+ }
608
+ break;
609
+ }
546
610
  case '/policy': {
547
611
  const policy = agent.getPolicyEnforcer().getPolicy();
548
612
  console.log(c('\nCurrent Policy:', 'bold'));
@@ -702,6 +766,17 @@ function parseArgs(argv) {
702
766
  }
703
767
  continue;
704
768
  }
769
+ if (arg === '--export-audit') {
770
+ const next = argv[i + 1];
771
+ if (next && !next.startsWith('--')) {
772
+ result['export-audit'] = next;
773
+ i++;
774
+ }
775
+ else {
776
+ result['export-audit'] = true;
777
+ }
778
+ continue;
779
+ }
705
780
  if (arg === '--replay') {
706
781
  const next = argv[i + 1];
707
782
  if (next && !next.startsWith('--')) {
@@ -770,6 +845,7 @@ ${c('Options:', 'bold')}
770
845
  ${c('Security & Policy:', 'bold')}
771
846
  --init-policy Generate default .codebot/policy.json
772
847
  --verify-audit [id] Verify audit log hash chain integrity
848
+ --export-audit sarif Export audit log as SARIF 2.1.0 JSON
773
849
  --sandbox-info Show Docker sandbox status
774
850
 
775
851
  ${c('Debugging & Replay:', 'bold')}
@@ -795,6 +871,7 @@ ${c('Examples:', 'bold')}
795
871
  codebot --autonomous "refactor src/" Full auto, no prompts
796
872
  codebot --init-policy Create security policy
797
873
  codebot --verify-audit Check audit integrity
874
+ codebot --export-audit sarif > r.sarif Export SARIF report
798
875
 
799
876
  ${c('Interactive Commands:', 'bold')}
800
877
  /help Show commands
@@ -806,6 +883,8 @@ ${c('Interactive Commands:', 'bold')}
806
883
  /compact Force context compaction
807
884
  /usage Show token usage & cost
808
885
  /cost Show running cost
886
+ /metrics Show session metrics
887
+ /risk Show risk assessment summary
809
888
  /policy Show security policy
810
889
  /audit Verify session audit chain
811
890
  /config Show configuration
package/dist/index.d.ts CHANGED
@@ -17,5 +17,11 @@ export { deriveSessionKey, signMessage, verifyMessage, verifyMessages } from './
17
17
  export type { IntegrityResult } from './integrity';
18
18
  export { ReplayProvider, loadSessionForReplay, compareOutputs, listReplayableSessions } from './replay';
19
19
  export type { SessionReplayData, ReplayDivergence } from './replay';
20
+ export { MetricsCollector } from './metrics';
21
+ export type { MetricsSnapshot, CounterValue, HistogramValue } from './metrics';
22
+ export { RiskScorer } from './risk';
23
+ export type { RiskAssessment, RiskFactor } from './risk';
24
+ export { exportSarif, sarifToString } from './sarif';
25
+ export type { SarifLog, SarifResult, SarifRule } from './sarif';
20
26
  export * from './types';
21
27
  //# sourceMappingURL=index.d.ts.map
package/dist/index.js CHANGED
@@ -14,7 +14,7 @@ var __exportStar = (this && this.__exportStar) || function(m, exports) {
14
14
  for (var p in m) if (p !== "default" && !Object.prototype.hasOwnProperty.call(exports, p)) __createBinding(exports, m, p);
15
15
  };
16
16
  Object.defineProperty(exports, "__esModule", { value: true });
17
- exports.listReplayableSessions = exports.compareOutputs = exports.loadSessionForReplay = exports.ReplayProvider = exports.verifyMessages = exports.verifyMessage = exports.signMessage = exports.deriveSessionKey = exports.CapabilityChecker = exports.detectProvider = exports.getModelInfo = exports.PROVIDER_DEFAULTS = exports.MODEL_REGISTRY = exports.loadMCPTools = exports.loadPlugins = exports.parseToolCalls = exports.MemoryManager = exports.SessionManager = exports.buildRepoMap = exports.ContextManager = exports.ToolRegistry = exports.AnthropicProvider = exports.OpenAIProvider = exports.Agent = void 0;
17
+ exports.sarifToString = exports.exportSarif = exports.RiskScorer = exports.MetricsCollector = exports.listReplayableSessions = exports.compareOutputs = exports.loadSessionForReplay = exports.ReplayProvider = exports.verifyMessages = exports.verifyMessage = exports.signMessage = exports.deriveSessionKey = exports.CapabilityChecker = exports.detectProvider = exports.getModelInfo = exports.PROVIDER_DEFAULTS = exports.MODEL_REGISTRY = exports.loadMCPTools = exports.loadPlugins = exports.parseToolCalls = exports.MemoryManager = exports.SessionManager = exports.buildRepoMap = exports.ContextManager = exports.ToolRegistry = exports.AnthropicProvider = exports.OpenAIProvider = exports.Agent = void 0;
18
18
  var agent_1 = require("./agent");
19
19
  Object.defineProperty(exports, "Agent", { enumerable: true, get: function () { return agent_1.Agent; } });
20
20
  var openai_1 = require("./providers/openai");
@@ -54,5 +54,12 @@ Object.defineProperty(exports, "ReplayProvider", { enumerable: true, get: functi
54
54
  Object.defineProperty(exports, "loadSessionForReplay", { enumerable: true, get: function () { return replay_1.loadSessionForReplay; } });
55
55
  Object.defineProperty(exports, "compareOutputs", { enumerable: true, get: function () { return replay_1.compareOutputs; } });
56
56
  Object.defineProperty(exports, "listReplayableSessions", { enumerable: true, get: function () { return replay_1.listReplayableSessions; } });
57
+ var metrics_1 = require("./metrics");
58
+ Object.defineProperty(exports, "MetricsCollector", { enumerable: true, get: function () { return metrics_1.MetricsCollector; } });
59
+ var risk_1 = require("./risk");
60
+ Object.defineProperty(exports, "RiskScorer", { enumerable: true, get: function () { return risk_1.RiskScorer; } });
61
+ var sarif_1 = require("./sarif");
62
+ Object.defineProperty(exports, "exportSarif", { enumerable: true, get: function () { return sarif_1.exportSarif; } });
63
+ Object.defineProperty(exports, "sarifToString", { enumerable: true, get: function () { return sarif_1.sarifToString; } });
57
64
  __exportStar(require("./types"), exports);
58
65
  //# sourceMappingURL=index.js.map
@@ -0,0 +1,60 @@
1
+ /**
2
+ * MetricsCollector for CodeBot v1.9.0
3
+ *
4
+ * Structured telemetry: counters + histograms.
5
+ * Persists to ~/.codebot/telemetry/metrics-YYYY-MM-DD.jsonl
6
+ * Optional OTLP HTTP export when OTEL_EXPORTER_OTLP_ENDPOINT is set.
7
+ *
8
+ * Pattern: fail-safe, session-scoped, never throws.
9
+ * Follows TokenTracker conventions from src/telemetry.ts.
10
+ */
11
+ export interface CounterValue {
12
+ name: string;
13
+ labels: Record<string, string>;
14
+ value: number;
15
+ }
16
+ export interface HistogramValue {
17
+ name: string;
18
+ labels: Record<string, string>;
19
+ count: number;
20
+ sum: number;
21
+ min: number;
22
+ max: number;
23
+ buckets: number[];
24
+ }
25
+ export interface MetricsSnapshot {
26
+ sessionId: string;
27
+ timestamp: string;
28
+ counters: CounterValue[];
29
+ histograms: HistogramValue[];
30
+ }
31
+ export declare class MetricsCollector {
32
+ private sessionId;
33
+ private counters;
34
+ private histograms;
35
+ constructor(sessionId?: string);
36
+ getSessionId(): string;
37
+ /** Increment a counter by delta (default 1) */
38
+ increment(name: string, labels?: Record<string, string>, delta?: number): void;
39
+ /** Record a histogram observation */
40
+ observe(name: string, value: number, labels?: Record<string, string>): void;
41
+ /** Read a counter value */
42
+ getCounter(name: string, labels?: Record<string, string>): number;
43
+ /** Read a histogram summary */
44
+ getHistogram(name: string, labels?: Record<string, string>): HistogramValue | null;
45
+ /** Full session snapshot */
46
+ snapshot(): MetricsSnapshot;
47
+ /** Persist snapshot to ~/.codebot/telemetry/metrics-YYYY-MM-DD.jsonl */
48
+ save(sessionId?: string): void;
49
+ /** Human-readable per-tool breakdown */
50
+ formatSummary(): string;
51
+ /**
52
+ * Export snapshot in OTLP JSON format via HTTP POST.
53
+ * Only fires when OTEL_EXPORTER_OTLP_ENDPOINT is set.
54
+ * Fails silently — never blocks or crashes.
55
+ */
56
+ exportOtel(snap?: MetricsSnapshot): void;
57
+ /** Build OTLP-compatible JSON payload */
58
+ private buildOtlpPayload;
59
+ }
60
+ //# sourceMappingURL=metrics.d.ts.map