ship-safe 7.0.0 → 9.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -19,6 +19,59 @@ import { createHash } from 'crypto';
19
19
  import * as output from '../utils/output.js';
20
20
  import { ThreatIntel } from '../utils/threat-intel.js';
21
21
 
22
+ // =============================================================================
23
+ // HERMES SKILL FRONTMATTER PATTERNS (Track D — cross-skill/tool binding)
24
+ // =============================================================================
25
+
26
+ // Built-in tool registries that skills may reference.
27
+ // Ship Safe tools are added lazily in checkHermesFrontmatter() to avoid
28
+ // loading hermes-tool-registry.js (and its crypto import) on every invocation.
29
+ const KNOWN_TOOL_REGISTRIES = {
30
+ // Common Hermes community tools (names only — no handler)
31
+ 'web_search': 'hermes-community',
32
+ 'web_browser': 'hermes-community',
33
+ 'file_read': 'hermes-community',
34
+ 'file_write': 'hermes-community',
35
+ 'code_execute': 'hermes-community',
36
+ 'github_api': 'hermes-community',
37
+ 'memory_store': 'hermes-community',
38
+ 'memory_retrieve': 'hermes-community',
39
+ };
40
+
41
+ // Hermes-specific patterns to check in skill markdown/frontmatter
42
+ const HERMES_SKILL_PATTERNS = [
43
+ {
44
+ name: 'Hermes: XML tool_call injection',
45
+ regex: /<tool_call>[\s\S]{0,300}<\/tool_call>/gi,
46
+ severity: 'critical',
47
+ note: 'Skill body contains a <tool_call> block — will be executed by Hermes agents that load this skill.',
48
+ },
49
+ {
50
+ name: 'Hermes: function_calls injection',
51
+ regex: /<function_calls>[\s\S]{0,300}<\/function_calls>/gi,
52
+ severity: 'critical',
53
+ note: 'Skill body contains a <function_calls> block — classic Hermes function-call injection.',
54
+ },
55
+ {
56
+ name: 'Hermes: Forced tool invocation instruction',
57
+ regex: /(?:you\s+must\s+(?:call|invoke|use)\s+(?:the\s+)?tool|always\s+(?:call|invoke|run)\s+(?:the\s+)?(?:tool|function)|tool\s+MUST\s+be\s+(?:called|invoked|used))/gi,
58
+ severity: 'high',
59
+ note: 'Skill instructs agent to call a specific tool unconditionally — bypasses agent autonomy.',
60
+ },
61
+ {
62
+ name: 'Hermes: Plan/goal hijacking',
63
+ regex: /(?:update\s+(?:your\s+)?(?:goal|plan|objective)\s+to|change\s+(?:your\s+)?(?:goal|plan|objective)|your\s+(?:new\s+)?(?:goal|plan|primary\s+objective)\s+(?:is|should\s+be))/gi,
64
+ severity: 'critical',
65
+ note: 'Skill attempts to overwrite the agent\'s goal or plan state — ASI-01 Goal Hijacking.',
66
+ },
67
+ {
68
+ name: 'Hermes: Memory layer write instruction',
69
+ regex: /(?:write\s+(?:this|the\s+following)\s+to\s+(?:memory|episodic|semantic|working)\s+memory|store\s+(?:this|the\s+following)\s+in\s+(?:memory|episodic|semantic))/gi,
70
+ severity: 'high',
71
+ note: 'Skill instructs agent to write attacker-controlled data to memory — ASI-06 Memory Poisoning.',
72
+ },
73
+ ];
74
+
22
75
  // =============================================================================
23
76
  // POPULAR SKILL NAMES (for typosquatting detection)
24
77
  // =============================================================================
@@ -113,7 +166,7 @@ export async function scanSkillCommand(target, options = {}) {
113
166
  console.log(chalk.gray(` Size: ${content.length} bytes`));
114
167
  console.log();
115
168
 
116
- const findings = analyzeSkill(content, skillName, source);
169
+ const findings = await analyzeSkill(content, skillName, source);
117
170
 
118
171
  if (options.json) {
119
172
  console.log(JSON.stringify({ skill: skillName, source, findings, summary: getSummary(findings) }, null, 2));
@@ -127,7 +180,7 @@ export async function scanSkillCommand(target, options = {}) {
127
180
  // SKILL ANALYSIS
128
181
  // =============================================================================
129
182
 
130
- function analyzeSkill(content, skillName, source) {
183
+ async function analyzeSkill(content, skillName, source) {
131
184
  const findings = [];
132
185
 
133
186
  // 1. Static pattern analysis
@@ -152,10 +205,12 @@ function analyzeSkill(content, skillName, source) {
152
205
  try {
153
206
  const manifest = JSON.parse(content);
154
207
  if (manifest.permissions) {
155
- const dangerous = ['shell', 'exec', 'system', 'network', 'filesystem', 'admin', 'root'];
208
+ const dangerousPerm = [/\bshell\b/i, /\bexec\b/i, /\bsystem\b/i, /\badmin\b/i, /\broot\b/i,
209
+ /filesystem\s*:\s*(write|read-write)/i, /network\s*:\s*(unrestricted|all)/i,
210
+ /^filesystem$/i, /^network$/i];
156
211
  for (const perm of (Array.isArray(manifest.permissions) ? manifest.permissions : [])) {
157
212
  const permStr = typeof perm === 'string' ? perm : perm.name || '';
158
- if (dangerous.some(d => permStr.toLowerCase().includes(d))) {
213
+ if (dangerousPerm.some(p => p.test(permStr))) {
159
214
  findings.push({
160
215
  check: 'permission-audit',
161
216
  name: `Dangerous permission: ${permStr}`,
@@ -216,6 +271,194 @@ function analyzeSkill(content, skillName, source) {
216
271
  });
217
272
  }
218
273
 
274
+ // 6. Hermes-specific: frontmatter tool binding + permission drift validation
275
+ findings.push(...(await checkHermesFrontmatter(content)));
276
+
277
+ // 7. Hermes-specific: function-call injection and goal hijacking in body
278
+ findings.push(...checkHermesBodyPatterns(content, lines));
279
+
280
+ return findings;
281
+ }
282
+
283
+ // =============================================================================
284
+ // HERMES FRONTMATTER VALIDATION (Track D)
285
+ // =============================================================================
286
+
287
+ /**
288
+ * Parse YAML frontmatter block (between --- delimiters) from markdown skill.
289
+ * Returns a plain object with string/array values; null if no frontmatter.
290
+ */
291
+ function parseFrontmatter(content) {
292
+ const match = content.match(/^---\r?\n([\s\S]*?)\r?\n---/);
293
+ if (!match) return null;
294
+
295
+ const fm = {};
296
+ const yamlBlock = match[1];
297
+
298
+ for (const line of yamlBlock.split('\n')) {
299
+ const kv = line.match(/^(\w[\w-]*):\s*(.*)$/);
300
+ if (!kv) continue;
301
+ const [, key, rawVal] = kv;
302
+ const val = rawVal.trim();
303
+
304
+ if (val.startsWith('[') && val.endsWith(']')) {
305
+ // Inline array: [a, b, c]
306
+ fm[key] = val.slice(1, -1).split(',').map(s => s.trim().replace(/['"]/g, '')).filter(Boolean);
307
+ } else {
308
+ fm[key] = val.replace(/^['"]|['"]$/g, '');
309
+ }
310
+ }
311
+
312
+ // Collect multi-line list values (indented - items)
313
+ const listRe = /^(\w[\w-]*):\s*\n((?:\s+-\s+.+\n?)+)/gm;
314
+ let m;
315
+ while ((m = listRe.exec(yamlBlock)) !== null) {
316
+ const [, key, block] = m;
317
+ fm[key] = block.match(/-\s+(.+)/g)?.map(s => s.replace(/^-\s+/, '').replace(/['"]/g, '').trim()) ?? [];
318
+ }
319
+
320
+ return fm;
321
+ }
322
+
323
+ let _hermesToolsLoaded = false;
324
+ async function ensureHermesToolsLoaded() {
325
+ if (_hermesToolsLoaded) return;
326
+ try {
327
+ const { HERMES_TOOLS } = await import('../utils/hermes-tool-registry.js');
328
+ for (const t of HERMES_TOOLS) KNOWN_TOOL_REGISTRIES[t.name] = 'ship-safe';
329
+ } catch { /* non-fatal — registry unavailable */ }
330
+ _hermesToolsLoaded = true;
331
+ }
332
+
333
+ async function checkHermesFrontmatter(content) {
334
+ await ensureHermesToolsLoaded();
335
+ const findings = [];
336
+ const fm = parseFrontmatter(content);
337
+
338
+ // Not a markdown skill with frontmatter — skip
339
+ if (!fm) return findings;
340
+
341
+ // ── Check: missing permissions field ──────────────────────────────────────
342
+ if (!fm.permissions) {
343
+ findings.push({
344
+ check: 'hermes-frontmatter',
345
+ name: 'Hermes: Skill missing permissions field (ASI-02 Excessive Agency)',
346
+ severity: 'medium',
347
+ line: 0,
348
+ matched: 'No permissions: field in frontmatter — skill may be granted more access than intended',
349
+ });
350
+ } else {
351
+ // ── Check: wildcard permissions ──────────────────────────────────────────
352
+ const perms = Array.isArray(fm.permissions) ? fm.permissions : [fm.permissions];
353
+ const wildcards = perms.filter(p => /^\*$|^all$|^any$/i.test(String(p)));
354
+ if (wildcards.length > 0) {
355
+ findings.push({
356
+ check: 'hermes-frontmatter',
357
+ name: 'Hermes: Wildcard permissions (* / all) — excessive agency (ASI-02)',
358
+ severity: 'high',
359
+ line: 0,
360
+ matched: `permissions: [${wildcards.join(', ')}]`,
361
+ });
362
+ }
363
+
364
+ // ── Check: dangerous explicit permissions ────────────────────────────────
365
+ // Match whole-word or exact qualified values — don't fire on "filesystem: read-only"
366
+ const dangerousPatterns = [
367
+ /\bshell\b/i, /\bexec\b/i, /\bsystem\b/i, /\badmin\b/i, /\broot\b/i, /\bsudo\b/i,
368
+ /filesystem\s*:\s*write/i, /filesystem\s*:\s*read-write/i,
369
+ /network\s*:\s*unrestricted/i, /network\s*:\s*all/i,
370
+ /^filesystem$/i, /^network$/i, // bare "filesystem" or "network" without qualifier is ambiguous → flag
371
+ ];
372
+ for (const perm of perms) {
373
+ if (dangerousPatterns.some(p => p.test(String(perm)))) {
374
+ findings.push({
375
+ check: 'hermes-frontmatter',
376
+ name: `Hermes: Dangerous permission declared: ${perm}`,
377
+ severity: 'high',
378
+ line: 0,
379
+ matched: `permissions: [${perm}]`,
380
+ });
381
+ }
382
+ }
383
+ }
384
+
385
+ // ── Check: missing version pin ────────────────────────────────────────────
386
+ if (!fm.version) {
387
+ findings.push({
388
+ check: 'hermes-frontmatter',
389
+ name: 'Hermes: Skill missing version field — unpinned skill (ASI-10 Supply Chain)',
390
+ severity: 'medium',
391
+ line: 0,
392
+ matched: 'No version: field in frontmatter — skill version drift cannot be detected',
393
+ });
394
+ }
395
+
396
+ // ── Check: cross-skill tool binding validation ────────────────────────────
397
+ const tools = Array.isArray(fm.tools) ? fm.tools : fm.tools ? [fm.tools] : [];
398
+ for (const toolName of tools) {
399
+ if (!KNOWN_TOOL_REGISTRIES[toolName]) {
400
+ findings.push({
401
+ check: 'hermes-tool-binding',
402
+ name: `Hermes: Unresolvable tool reference: "${toolName}"`,
403
+ severity: 'high',
404
+ line: 0,
405
+ matched: `tools: [${toolName}] — not found in any known tool registry. May cause silent failures or late-binding substitution.`,
406
+ });
407
+ }
408
+ }
409
+
410
+ // ── Check: tools declared but no permissions field ────────────────────────
411
+ if (tools.length > 0 && !fm.permissions) {
412
+ findings.push({
413
+ check: 'hermes-tool-binding',
414
+ name: 'Hermes: Skill declares tools without permissions (permission drift)',
415
+ severity: 'high',
416
+ line: 0,
417
+ matched: `tools: [${tools.join(', ')}] declared but no permissions: field — skill runs with ambient agent permissions`,
418
+ });
419
+ }
420
+
421
+ return findings;
422
+ }
423
+
424
+ function checkHermesBodyPatterns(content, lines) {
425
+ const findings = [];
426
+
427
+ for (let i = 0; i < lines.length; i++) {
428
+ const line = lines[i];
429
+ for (const pattern of HERMES_SKILL_PATTERNS) {
430
+ pattern.regex.lastIndex = 0;
431
+ if (pattern.regex.test(line)) {
432
+ findings.push({
433
+ check: 'hermes-injection',
434
+ name: pattern.name,
435
+ severity: pattern.severity,
436
+ line: i + 1,
437
+ matched: line.trim().slice(0, 100),
438
+ });
439
+ }
440
+ }
441
+ }
442
+
443
+ // Multi-line checks for <tool_call> blocks that span lines
444
+ for (const pattern of HERMES_SKILL_PATTERNS) {
445
+ pattern.regex.lastIndex = 0;
446
+ const match = pattern.regex.exec(content);
447
+ if (match) {
448
+ // Avoid duplicate if already caught line-by-line
449
+ const alreadyFound = findings.some(f => f.name === pattern.name);
450
+ if (!alreadyFound) {
451
+ findings.push({
452
+ check: 'hermes-injection',
453
+ name: pattern.name,
454
+ severity: pattern.severity,
455
+ line: 0,
456
+ matched: match[0].slice(0, 100),
457
+ });
458
+ }
459
+ }
460
+ }
461
+
219
462
  return findings;
220
463
  }
221
464
 
@@ -281,7 +524,7 @@ async function scanAllSkills(rootPath) {
281
524
  const response = await fetch(url);
282
525
  if (!response.ok) throw new Error(`HTTP ${response.status}`);
283
526
  const content = await response.text();
284
- const findings = analyzeSkill(content, name, url);
527
+ const findings = await analyzeSkill(content, name, url);
285
528
  if (findings.length > 0) {
286
529
  printSkillFindings(findings, name);
287
530
  } else {
@@ -13,6 +13,7 @@
13
13
  import fs from 'fs';
14
14
  import path from 'path';
15
15
  import chalk from 'chalk';
16
+ import { execFileSync } from 'child_process';
16
17
  import { SKIP_DIRS, SKIP_EXTENSIONS, SKIP_FILENAMES, SECRET_PATTERNS, SECURITY_PATTERNS } from '../utils/patterns.js';
17
18
  import { isHighEntropyMatch, getConfidence } from '../utils/entropy.js';
18
19
  import * as output from '../utils/output.js';
@@ -289,11 +290,13 @@ function showWatchStatus(rootPath) {
289
290
  // =============================================================================
290
291
 
291
292
  async function watchDeep(absolutePath, options = {}) {
292
- const { buildOrchestrator } = await import('../agents/index.js');
293
+ const { buildOrchestratorAsync } = await import('../agents/index.js');
293
294
  const { ReconAgent } = await import('../agents/recon-agent.js');
294
295
 
295
- const debounceMs = options.debounce || 1500;
296
- const threshold = options.threshold || null;
296
+ const debounceMs = options.debounce || 1500;
297
+ const threshold = options.threshold || null;
298
+ const slackWebhook = options.slack || process.env.SHIP_SAFE_SLACK_WEBHOOK || null;
299
+ const prComments = options.prComment || false;
297
300
  const scoringEngine = new ScoringEngine();
298
301
 
299
302
  console.log();
@@ -301,7 +304,9 @@ async function watchDeep(absolutePath, options = {}) {
301
304
  console.log();
302
305
  console.log(chalk.cyan(' Running full agent scans on file changes'));
303
306
  console.log(chalk.gray(` Debounce: ${debounceMs}ms`));
304
- if (threshold) console.log(chalk.gray(` Threshold: ${threshold}/100`));
307
+ if (threshold) console.log(chalk.gray(` Threshold: ${threshold}/100`));
308
+ if (slackWebhook) console.log(chalk.gray(' Slack: notifications enabled'));
309
+ if (prComments) console.log(chalk.gray(' PR: inline comments enabled (requires gh CLI)'));
305
310
  console.log(chalk.gray(' Press Ctrl+C to stop'));
306
311
  console.log();
307
312
 
@@ -332,7 +337,7 @@ async function watchDeep(absolutePath, options = {}) {
332
337
  console.log(chalk.gray(` [${timestamp}] ${files.length} file(s) changed — deep scanning...`));
333
338
 
334
339
  try {
335
- const orchestrator = buildOrchestrator();
340
+ const orchestrator = await buildOrchestratorAsync(absolutePath, { quiet: true });
336
341
  const context = {
337
342
  rootPath: absolutePath,
338
343
  files,
@@ -391,6 +396,16 @@ async function watchDeep(absolutePath, options = {}) {
391
396
  if (threshold && scoreResult.score < threshold) {
392
397
  console.log(chalk.red.bold(` ⚠ Score ${scoreResult.score} below threshold ${threshold}\n`));
393
398
  }
399
+
400
+ // ── Slack Notification ──────────────────────────────────────────────
401
+ if (slackWebhook && findings.length > 0) {
402
+ await postSlackAlert(slackWebhook, findings, scoreResult, absolutePath).catch(() => {});
403
+ }
404
+
405
+ // ── GitHub PR Inline Comments ────────────────────────────────────────
406
+ if (prComments && findings.length > 0) {
407
+ await postPRComments(findings, absolutePath).catch(() => {});
408
+ }
394
409
  } catch (err) {
395
410
  console.log(chalk.red(` [${timestamp}] Scan error: ${err.message}\n`));
396
411
  }
@@ -431,6 +446,128 @@ async function watchDeep(absolutePath, options = {}) {
431
446
  }
432
447
  }
433
448
 
449
+ // =============================================================================
450
+ // SLACK NOTIFICATIONS
451
+ // =============================================================================
452
+
453
+ /**
454
+ * Post a security alert to a Slack webhook.
455
+ * Webhook URL can be set via --slack or SHIP_SAFE_SLACK_WEBHOOK env var.
456
+ */
457
+ async function postSlackAlert(webhookUrl, findings, scoreResult, rootPath) {
458
+ const repoName = path.basename(rootPath);
459
+ const criticals = findings.filter(f => f.severity === 'critical').length;
460
+ const highs = findings.filter(f => f.severity === 'high').length;
461
+
462
+ const color = criticals > 0 ? 'danger' : highs > 0 ? 'warning' : 'good';
463
+ const emoji = criticals > 0 ? ':rotating_light:' : highs > 0 ? ':warning:' : ':shield:';
464
+
465
+ const topFindings = findings
466
+ .filter(f => f.severity === 'critical' || f.severity === 'high')
467
+ .slice(0, 5)
468
+ .map(f => `• *${f.severity.toUpperCase()}* ${f.title} — \`${f.file ? path.basename(f.file) : '?'}${f.line ? `:${f.line}` : ''}\``)
469
+ .join('\n');
470
+
471
+ const payload = {
472
+ attachments: [{
473
+ color,
474
+ fallback: `Ship Safe: ${findings.length} security finding(s) in ${repoName}`,
475
+ title: `${emoji} Ship Safe — Security Alert`,
476
+ text: `*${repoName}* — Score: *${scoreResult.score ?? '?'}/100* — ${findings.length} finding(s) (${criticals} critical, ${highs} high)`,
477
+ fields: topFindings ? [{ title: 'Top Findings', value: topFindings, short: false }] : [],
478
+ footer: 'ship-safe watch --deep',
479
+ ts: Math.floor(Date.now() / 1000),
480
+ }],
481
+ };
482
+
483
+ const res = await fetch(webhookUrl, {
484
+ method: 'POST',
485
+ headers: { 'Content-Type': 'application/json' },
486
+ body: JSON.stringify(payload),
487
+ signal: AbortSignal.timeout(10000),
488
+ });
489
+
490
+ if (!res.ok) {
491
+ console.log(chalk.yellow(` [Slack] Notification failed: HTTP ${res.status}`));
492
+ }
493
+ }
494
+
495
+ // =============================================================================
496
+ // GITHUB PR INLINE COMMENTS
497
+ // =============================================================================
498
+
499
+ /**
500
+ * Post inline security comments to the currently open PR (if any).
501
+ * Requires `gh` CLI to be installed and authenticated.
502
+ *
503
+ * Posts a review comment for each critical/high finding in a changed file.
504
+ */
505
+ async function postPRComments(findings, rootPath) {
506
+ // Check if gh is available
507
+ try {
508
+ execFileSync('gh', ['--version'], { stdio: 'pipe' });
509
+ } catch {
510
+ console.log(chalk.gray(' [PR] gh CLI not found — skipping PR comments'));
511
+ return;
512
+ }
513
+
514
+ // Get current PR number
515
+ let prNumber;
516
+ try {
517
+ const prJson = execFileSync('gh', ['pr', 'view', '--json', 'number'], {
518
+ cwd: rootPath, encoding: 'utf-8', stdio: 'pipe',
519
+ });
520
+ prNumber = JSON.parse(prJson).number;
521
+ } catch {
522
+ return; // No open PR on this branch
523
+ }
524
+
525
+ // Get current commit SHA
526
+ let sha;
527
+ try {
528
+ sha = execFileSync('git', ['rev-parse', 'HEAD'], {
529
+ cwd: rootPath, encoding: 'utf-8', stdio: 'pipe',
530
+ }).trim();
531
+ } catch {
532
+ return;
533
+ }
534
+
535
+ const criticalOrHigh = findings.filter(f =>
536
+ (f.severity === 'critical' || f.severity === 'high') && f.file && f.line
537
+ ).slice(0, 10); // Max 10 comments per scan
538
+
539
+ for (const f of criticalOrHigh) {
540
+ const relFile = path.relative(rootPath, f.file).replace(/\\/g, '/');
541
+ const body = [
542
+ `**Ship Safe — ${f.severity.toUpperCase()} finding**`,
543
+ '',
544
+ `**${f.title}**`,
545
+ f.description || '',
546
+ '',
547
+ f.remediation ? `**Fix:** ${f.remediation}` : '',
548
+ '',
549
+ `_[${f.rule}] — detected by ship-safe watch_`,
550
+ ].filter(l => l !== undefined).join('\n');
551
+
552
+ try {
553
+ execFileSync('gh', [
554
+ 'api',
555
+ `repos/{owner}/{repo}/pulls/${prNumber}/comments`,
556
+ '--method', 'POST',
557
+ '--field', `body=${body}`,
558
+ '--field', `commit_id=${sha}`,
559
+ '--field', `path=${relFile}`,
560
+ '--field', `line=${f.line}`,
561
+ '--field', 'side=RIGHT',
562
+ ], { cwd: rootPath, stdio: 'pipe' });
563
+ } catch { /* individual comment failure is non-fatal */ }
564
+ }
565
+
566
+ if (criticalOrHigh.length > 0) {
567
+ console.log(chalk.gray(` [PR #${prNumber}] Posted ${criticalOrHigh.length} inline comment(s)`));
568
+ }
569
+ }
570
+
434
571
  // =============================================================================
435
572
  // CONFIG WATCH — scanConfigFiles
436
573
  // =============================================================================
package/cli/index.js CHANGED
@@ -71,3 +71,8 @@ export { CacheManager } from './utils/cache-manager.js';
71
71
 
72
72
  // ── LLM Providers ─────────────────────────────────────────────────────────────
73
73
  export { createProvider, autoDetectProvider } from './providers/llm-provider.js';
74
+
75
+ // ── v8.0.0 — Ship Safe × Hermes Agent ────────────────────────────────────────
76
+ export { HermesSecurityAgent } from './agents/hermes-security-agent.js';
77
+ export { AgentAttestationAgent } from './agents/agent-attestation-agent.js';
78
+ export { HERMES_TOOLS, registerWithHermes, verifyIntegrity } from './utils/hermes-tool-registry.js';
@@ -92,6 +92,9 @@ class AnthropicProvider extends BaseLLMProvider {
92
92
  this.baseUrl = options.baseUrl || 'https://api.anthropic.com/v1/messages';
93
93
  }
94
94
 
95
+ /** Whether this provider supports guaranteed-JSON tool-use output */
96
+ get supportsStructuredOutput() { return true; }
97
+
95
98
  async complete(systemPrompt, userPrompt, options = {}) {
96
99
  const response = await fetch(this.baseUrl, {
97
100
  method: 'POST',
@@ -101,7 +104,7 @@ class AnthropicProvider extends BaseLLMProvider {
101
104
  'content-type': 'application/json',
102
105
  },
103
106
  body: JSON.stringify({
104
- model: this.model,
107
+ model: options.model || this.model,
105
108
  max_tokens: options.maxTokens || 2048,
106
109
  system: systemPrompt,
107
110
  messages: [{ role: 'user', content: userPrompt }],
@@ -109,12 +112,57 @@ class AnthropicProvider extends BaseLLMProvider {
109
112
  });
110
113
 
111
114
  if (!response.ok) {
112
- throw new Error(`Anthropic API error: HTTP ${response.status}`);
115
+ const body = await response.text().catch(() => '');
116
+ throw new Error(`Anthropic API error: HTTP ${response.status} ${body.slice(0, 200)}`);
113
117
  }
114
118
 
115
119
  const data = await response.json();
116
120
  return data.content?.[0]?.text || '';
117
121
  }
122
+
123
+ /**
124
+ * Complete with guaranteed-JSON output via Anthropic tool-use API.
125
+ * The LLM is forced to call the named tool, so the response always matches
126
+ * the provided JSON Schema — no regex cleanup needed.
127
+ *
128
+ * @param {string} systemPrompt
129
+ * @param {string} userPrompt
130
+ * @param {string} toolName — Name of the forced tool call
131
+ * @param {object} inputSchema — JSON Schema for the tool's input
132
+ * @param {object} options — { maxTokens, model }
133
+ * @returns {Promise<object|null>} — Parsed tool input object, or null on failure
134
+ */
135
+ async completeWithTools(systemPrompt, userPrompt, toolName, inputSchema, options = {}) {
136
+ const response = await fetch(this.baseUrl, {
137
+ method: 'POST',
138
+ headers: {
139
+ 'x-api-key': this.apiKey,
140
+ 'anthropic-version': '2023-06-01',
141
+ 'content-type': 'application/json',
142
+ },
143
+ body: JSON.stringify({
144
+ model: options.model || this.model,
145
+ max_tokens: options.maxTokens || 2048,
146
+ system: systemPrompt,
147
+ messages: [{ role: 'user', content: userPrompt }],
148
+ tools: [{
149
+ name: toolName,
150
+ description: `Report ${toolName} results`,
151
+ input_schema: inputSchema,
152
+ }],
153
+ tool_choice: { type: 'tool', name: toolName },
154
+ }),
155
+ });
156
+
157
+ if (!response.ok) {
158
+ const body = await response.text().catch(() => '');
159
+ throw new Error(`Anthropic API error: HTTP ${response.status} ${body.slice(0, 200)}`);
160
+ }
161
+
162
+ const data = await response.json();
163
+ const toolUse = data.content?.find(b => b.type === 'tool_use');
164
+ return toolUse?.input ?? null;
165
+ }
118
166
  }
119
167
 
120
168
  // =============================================================================