spec-gen-cli 1.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +21 -0
- package/README.md +1078 -0
- package/dist/api/analyze.d.ts +17 -0
- package/dist/api/analyze.d.ts.map +1 -0
- package/dist/api/analyze.js +109 -0
- package/dist/api/analyze.js.map +1 -0
- package/dist/api/drift.d.ts +21 -0
- package/dist/api/drift.d.ts.map +1 -0
- package/dist/api/drift.js +145 -0
- package/dist/api/drift.js.map +1 -0
- package/dist/api/generate.d.ts +18 -0
- package/dist/api/generate.d.ts.map +1 -0
- package/dist/api/generate.js +251 -0
- package/dist/api/generate.js.map +1 -0
- package/dist/api/index.d.ts +39 -0
- package/dist/api/index.d.ts.map +1 -0
- package/dist/api/index.js +32 -0
- package/dist/api/index.js.map +1 -0
- package/dist/api/init.d.ts +18 -0
- package/dist/api/init.d.ts.map +1 -0
- package/dist/api/init.js +82 -0
- package/dist/api/init.js.map +1 -0
- package/dist/api/run.d.ts +19 -0
- package/dist/api/run.d.ts.map +1 -0
- package/dist/api/run.js +291 -0
- package/dist/api/run.js.map +1 -0
- package/dist/api/specs.d.ts +49 -0
- package/dist/api/specs.d.ts.map +1 -0
- package/dist/api/specs.js +136 -0
- package/dist/api/specs.js.map +1 -0
- package/dist/api/types.d.ts +176 -0
- package/dist/api/types.d.ts.map +1 -0
- package/dist/api/types.js +9 -0
- package/dist/api/types.js.map +1 -0
- package/dist/api/verify.d.ts +20 -0
- package/dist/api/verify.d.ts.map +1 -0
- package/dist/api/verify.js +117 -0
- package/dist/api/verify.js.map +1 -0
- package/dist/cli/commands/analyze.d.ts +27 -0
- package/dist/cli/commands/analyze.d.ts.map +1 -0
- package/dist/cli/commands/analyze.js +485 -0
- package/dist/cli/commands/analyze.js.map +1 -0
- package/dist/cli/commands/drift.d.ts +9 -0
- package/dist/cli/commands/drift.d.ts.map +1 -0
- package/dist/cli/commands/drift.js +540 -0
- package/dist/cli/commands/drift.js.map +1 -0
- package/dist/cli/commands/generate.d.ts +9 -0
- package/dist/cli/commands/generate.d.ts.map +1 -0
- package/dist/cli/commands/generate.js +633 -0
- package/dist/cli/commands/generate.js.map +1 -0
- package/dist/cli/commands/init.d.ts +9 -0
- package/dist/cli/commands/init.d.ts.map +1 -0
- package/dist/cli/commands/init.js +171 -0
- package/dist/cli/commands/init.js.map +1 -0
- package/dist/cli/commands/mcp.d.ts +638 -0
- package/dist/cli/commands/mcp.d.ts.map +1 -0
- package/dist/cli/commands/mcp.js +574 -0
- package/dist/cli/commands/mcp.js.map +1 -0
- package/dist/cli/commands/run.d.ts +24 -0
- package/dist/cli/commands/run.d.ts.map +1 -0
- package/dist/cli/commands/run.js +546 -0
- package/dist/cli/commands/run.js.map +1 -0
- package/dist/cli/commands/verify.d.ts +9 -0
- package/dist/cli/commands/verify.d.ts.map +1 -0
- package/dist/cli/commands/verify.js +417 -0
- package/dist/cli/commands/verify.js.map +1 -0
- package/dist/cli/commands/view.d.ts +9 -0
- package/dist/cli/commands/view.d.ts.map +1 -0
- package/dist/cli/commands/view.js +511 -0
- package/dist/cli/commands/view.js.map +1 -0
- package/dist/cli/index.d.ts +9 -0
- package/dist/cli/index.d.ts.map +1 -0
- package/dist/cli/index.js +83 -0
- package/dist/cli/index.js.map +1 -0
- package/dist/core/analyzer/architecture-writer.d.ts +67 -0
- package/dist/core/analyzer/architecture-writer.d.ts.map +1 -0
- package/dist/core/analyzer/architecture-writer.js +209 -0
- package/dist/core/analyzer/architecture-writer.js.map +1 -0
- package/dist/core/analyzer/artifact-generator.d.ts +222 -0
- package/dist/core/analyzer/artifact-generator.d.ts.map +1 -0
- package/dist/core/analyzer/artifact-generator.js +726 -0
- package/dist/core/analyzer/artifact-generator.js.map +1 -0
- package/dist/core/analyzer/call-graph.d.ts +83 -0
- package/dist/core/analyzer/call-graph.d.ts.map +1 -0
- package/dist/core/analyzer/call-graph.js +827 -0
- package/dist/core/analyzer/call-graph.js.map +1 -0
- package/dist/core/analyzer/code-shaper.d.ts +33 -0
- package/dist/core/analyzer/code-shaper.d.ts.map +1 -0
- package/dist/core/analyzer/code-shaper.js +149 -0
- package/dist/core/analyzer/code-shaper.js.map +1 -0
- package/dist/core/analyzer/dependency-graph.d.ts +179 -0
- package/dist/core/analyzer/dependency-graph.d.ts.map +1 -0
- package/dist/core/analyzer/dependency-graph.js +574 -0
- package/dist/core/analyzer/dependency-graph.js.map +1 -0
- package/dist/core/analyzer/duplicate-detector.d.ts +52 -0
- package/dist/core/analyzer/duplicate-detector.d.ts.map +1 -0
- package/dist/core/analyzer/duplicate-detector.js +279 -0
- package/dist/core/analyzer/duplicate-detector.js.map +1 -0
- package/dist/core/analyzer/embedding-service.d.ts +50 -0
- package/dist/core/analyzer/embedding-service.d.ts.map +1 -0
- package/dist/core/analyzer/embedding-service.js +104 -0
- package/dist/core/analyzer/embedding-service.js.map +1 -0
- package/dist/core/analyzer/file-walker.d.ts +78 -0
- package/dist/core/analyzer/file-walker.d.ts.map +1 -0
- package/dist/core/analyzer/file-walker.js +531 -0
- package/dist/core/analyzer/file-walker.js.map +1 -0
- package/dist/core/analyzer/import-parser.d.ts +91 -0
- package/dist/core/analyzer/import-parser.d.ts.map +1 -0
- package/dist/core/analyzer/import-parser.js +720 -0
- package/dist/core/analyzer/import-parser.js.map +1 -0
- package/dist/core/analyzer/index.d.ts +10 -0
- package/dist/core/analyzer/index.d.ts.map +1 -0
- package/dist/core/analyzer/index.js +10 -0
- package/dist/core/analyzer/index.js.map +1 -0
- package/dist/core/analyzer/refactor-analyzer.d.ts +80 -0
- package/dist/core/analyzer/refactor-analyzer.d.ts.map +1 -0
- package/dist/core/analyzer/refactor-analyzer.js +339 -0
- package/dist/core/analyzer/refactor-analyzer.js.map +1 -0
- package/dist/core/analyzer/repository-mapper.d.ts +150 -0
- package/dist/core/analyzer/repository-mapper.d.ts.map +1 -0
- package/dist/core/analyzer/repository-mapper.js +731 -0
- package/dist/core/analyzer/repository-mapper.js.map +1 -0
- package/dist/core/analyzer/signature-extractor.d.ts +31 -0
- package/dist/core/analyzer/signature-extractor.d.ts.map +1 -0
- package/dist/core/analyzer/signature-extractor.js +387 -0
- package/dist/core/analyzer/signature-extractor.js.map +1 -0
- package/dist/core/analyzer/significance-scorer.d.ts +79 -0
- package/dist/core/analyzer/significance-scorer.d.ts.map +1 -0
- package/dist/core/analyzer/significance-scorer.js +407 -0
- package/dist/core/analyzer/significance-scorer.js.map +1 -0
- package/dist/core/analyzer/subgraph-extractor.d.ts +43 -0
- package/dist/core/analyzer/subgraph-extractor.d.ts.map +1 -0
- package/dist/core/analyzer/subgraph-extractor.js +129 -0
- package/dist/core/analyzer/subgraph-extractor.js.map +1 -0
- package/dist/core/analyzer/vector-index.d.ts +63 -0
- package/dist/core/analyzer/vector-index.d.ts.map +1 -0
- package/dist/core/analyzer/vector-index.js +169 -0
- package/dist/core/analyzer/vector-index.js.map +1 -0
- package/dist/core/drift/drift-detector.d.ts +102 -0
- package/dist/core/drift/drift-detector.d.ts.map +1 -0
- package/dist/core/drift/drift-detector.js +597 -0
- package/dist/core/drift/drift-detector.js.map +1 -0
- package/dist/core/drift/git-diff.d.ts +55 -0
- package/dist/core/drift/git-diff.d.ts.map +1 -0
- package/dist/core/drift/git-diff.js +356 -0
- package/dist/core/drift/git-diff.js.map +1 -0
- package/dist/core/drift/index.d.ts +12 -0
- package/dist/core/drift/index.d.ts.map +1 -0
- package/dist/core/drift/index.js +9 -0
- package/dist/core/drift/index.js.map +1 -0
- package/dist/core/drift/spec-mapper.d.ts +73 -0
- package/dist/core/drift/spec-mapper.d.ts.map +1 -0
- package/dist/core/drift/spec-mapper.js +353 -0
- package/dist/core/drift/spec-mapper.js.map +1 -0
- package/dist/core/generator/adr-generator.d.ts +32 -0
- package/dist/core/generator/adr-generator.d.ts.map +1 -0
- package/dist/core/generator/adr-generator.js +192 -0
- package/dist/core/generator/adr-generator.js.map +1 -0
- package/dist/core/generator/index.d.ts +9 -0
- package/dist/core/generator/index.d.ts.map +1 -0
- package/dist/core/generator/index.js +12 -0
- package/dist/core/generator/index.js.map +1 -0
- package/dist/core/generator/mapping-generator.d.ts +54 -0
- package/dist/core/generator/mapping-generator.d.ts.map +1 -0
- package/dist/core/generator/mapping-generator.js +239 -0
- package/dist/core/generator/mapping-generator.js.map +1 -0
- package/dist/core/generator/openspec-compat.d.ts +160 -0
- package/dist/core/generator/openspec-compat.d.ts.map +1 -0
- package/dist/core/generator/openspec-compat.js +523 -0
- package/dist/core/generator/openspec-compat.js.map +1 -0
- package/dist/core/generator/openspec-format-generator.d.ts +111 -0
- package/dist/core/generator/openspec-format-generator.d.ts.map +1 -0
- package/dist/core/generator/openspec-format-generator.js +817 -0
- package/dist/core/generator/openspec-format-generator.js.map +1 -0
- package/dist/core/generator/openspec-writer.d.ts +131 -0
- package/dist/core/generator/openspec-writer.d.ts.map +1 -0
- package/dist/core/generator/openspec-writer.js +379 -0
- package/dist/core/generator/openspec-writer.js.map +1 -0
- package/dist/core/generator/prompts.d.ts +35 -0
- package/dist/core/generator/prompts.d.ts.map +1 -0
- package/dist/core/generator/prompts.js +212 -0
- package/dist/core/generator/prompts.js.map +1 -0
- package/dist/core/generator/spec-pipeline.d.ts +94 -0
- package/dist/core/generator/spec-pipeline.d.ts.map +1 -0
- package/dist/core/generator/spec-pipeline.js +474 -0
- package/dist/core/generator/spec-pipeline.js.map +1 -0
- package/dist/core/generator/stages/stage1-survey.d.ts +19 -0
- package/dist/core/generator/stages/stage1-survey.d.ts.map +1 -0
- package/dist/core/generator/stages/stage1-survey.js +105 -0
- package/dist/core/generator/stages/stage1-survey.js.map +1 -0
- package/dist/core/generator/stages/stage2-entities.d.ts +11 -0
- package/dist/core/generator/stages/stage2-entities.d.ts.map +1 -0
- package/dist/core/generator/stages/stage2-entities.js +67 -0
- package/dist/core/generator/stages/stage2-entities.js.map +1 -0
- package/dist/core/generator/stages/stage3-services.d.ts +11 -0
- package/dist/core/generator/stages/stage3-services.d.ts.map +1 -0
- package/dist/core/generator/stages/stage3-services.js +75 -0
- package/dist/core/generator/stages/stage3-services.js.map +1 -0
- package/dist/core/generator/stages/stage4-api.d.ts +11 -0
- package/dist/core/generator/stages/stage4-api.d.ts.map +1 -0
- package/dist/core/generator/stages/stage4-api.js +65 -0
- package/dist/core/generator/stages/stage4-api.js.map +1 -0
- package/dist/core/generator/stages/stage5-architecture.d.ts +10 -0
- package/dist/core/generator/stages/stage5-architecture.d.ts.map +1 -0
- package/dist/core/generator/stages/stage5-architecture.js +62 -0
- package/dist/core/generator/stages/stage5-architecture.js.map +1 -0
- package/dist/core/generator/stages/stage6-adr.d.ts +8 -0
- package/dist/core/generator/stages/stage6-adr.d.ts.map +1 -0
- package/dist/core/generator/stages/stage6-adr.js +41 -0
- package/dist/core/generator/stages/stage6-adr.js.map +1 -0
- package/dist/core/services/chat-agent.d.ts +45 -0
- package/dist/core/services/chat-agent.d.ts.map +1 -0
- package/dist/core/services/chat-agent.js +310 -0
- package/dist/core/services/chat-agent.js.map +1 -0
- package/dist/core/services/chat-tools.d.ts +32 -0
- package/dist/core/services/chat-tools.d.ts.map +1 -0
- package/dist/core/services/chat-tools.js +270 -0
- package/dist/core/services/chat-tools.js.map +1 -0
- package/dist/core/services/config-manager.d.ts +61 -0
- package/dist/core/services/config-manager.d.ts.map +1 -0
- package/dist/core/services/config-manager.js +143 -0
- package/dist/core/services/config-manager.js.map +1 -0
- package/dist/core/services/gitignore-manager.d.ts +29 -0
- package/dist/core/services/gitignore-manager.d.ts.map +1 -0
- package/dist/core/services/gitignore-manager.js +106 -0
- package/dist/core/services/gitignore-manager.js.map +1 -0
- package/dist/core/services/index.d.ts +8 -0
- package/dist/core/services/index.d.ts.map +1 -0
- package/dist/core/services/index.js +8 -0
- package/dist/core/services/index.js.map +1 -0
- package/dist/core/services/llm-service.d.ts +336 -0
- package/dist/core/services/llm-service.d.ts.map +1 -0
- package/dist/core/services/llm-service.js +1155 -0
- package/dist/core/services/llm-service.js.map +1 -0
- package/dist/core/services/mcp-handlers/analysis.d.ts +42 -0
- package/dist/core/services/mcp-handlers/analysis.d.ts.map +1 -0
- package/dist/core/services/mcp-handlers/analysis.js +300 -0
- package/dist/core/services/mcp-handlers/analysis.js.map +1 -0
- package/dist/core/services/mcp-handlers/graph.d.ts +65 -0
- package/dist/core/services/mcp-handlers/graph.d.ts.map +1 -0
- package/dist/core/services/mcp-handlers/graph.js +509 -0
- package/dist/core/services/mcp-handlers/graph.js.map +1 -0
- package/dist/core/services/mcp-handlers/semantic.d.ts +38 -0
- package/dist/core/services/mcp-handlers/semantic.d.ts.map +1 -0
- package/dist/core/services/mcp-handlers/semantic.js +172 -0
- package/dist/core/services/mcp-handlers/semantic.js.map +1 -0
- package/dist/core/services/mcp-handlers/utils.d.ts +21 -0
- package/dist/core/services/mcp-handlers/utils.d.ts.map +1 -0
- package/dist/core/services/mcp-handlers/utils.js +62 -0
- package/dist/core/services/mcp-handlers/utils.js.map +1 -0
- package/dist/core/services/project-detector.d.ts +32 -0
- package/dist/core/services/project-detector.d.ts.map +1 -0
- package/dist/core/services/project-detector.js +111 -0
- package/dist/core/services/project-detector.js.map +1 -0
- package/dist/core/verifier/index.d.ts +5 -0
- package/dist/core/verifier/index.d.ts.map +1 -0
- package/dist/core/verifier/index.js +5 -0
- package/dist/core/verifier/index.js.map +1 -0
- package/dist/core/verifier/verification-engine.d.ts +226 -0
- package/dist/core/verifier/verification-engine.d.ts.map +1 -0
- package/dist/core/verifier/verification-engine.js +681 -0
- package/dist/core/verifier/verification-engine.js.map +1 -0
- package/dist/types/index.d.ts +252 -0
- package/dist/types/index.d.ts.map +1 -0
- package/dist/types/index.js +5 -0
- package/dist/types/index.js.map +1 -0
- package/dist/types/pipeline.d.ts +148 -0
- package/dist/types/pipeline.d.ts.map +1 -0
- package/dist/types/pipeline.js +5 -0
- package/dist/types/pipeline.js.map +1 -0
- package/dist/utils/errors.d.ts +51 -0
- package/dist/utils/errors.d.ts.map +1 -0
- package/dist/utils/errors.js +128 -0
- package/dist/utils/errors.js.map +1 -0
- package/dist/utils/logger.d.ts +149 -0
- package/dist/utils/logger.d.ts.map +1 -0
- package/dist/utils/logger.js +331 -0
- package/dist/utils/logger.js.map +1 -0
- package/dist/utils/progress.d.ts +142 -0
- package/dist/utils/progress.d.ts.map +1 -0
- package/dist/utils/progress.js +280 -0
- package/dist/utils/progress.js.map +1 -0
- package/dist/utils/prompts.d.ts +53 -0
- package/dist/utils/prompts.d.ts.map +1 -0
- package/dist/utils/prompts.js +199 -0
- package/dist/utils/prompts.js.map +1 -0
- package/dist/utils/shutdown.d.ts +89 -0
- package/dist/utils/shutdown.d.ts.map +1 -0
- package/dist/utils/shutdown.js +237 -0
- package/dist/utils/shutdown.js.map +1 -0
- package/package.json +114 -0
- package/src/viewer/InteractiveGraphViewer.jsx +1486 -0
- package/src/viewer/app/index.html +17 -0
- package/src/viewer/app/main.jsx +13 -0
- package/src/viewer/components/ArchitectureView.jsx +177 -0
- package/src/viewer/components/ChatPanel.jsx +448 -0
- package/src/viewer/components/ClusterGraph.jsx +441 -0
- package/src/viewer/components/FilterBar.jsx +179 -0
- package/src/viewer/components/FlatGraph.jsx +275 -0
- package/src/viewer/components/MicroComponents.jsx +83 -0
- package/src/viewer/hooks/usePanZoom.js +79 -0
- package/src/viewer/utils/constants.js +47 -0
- package/src/viewer/utils/graph-helpers.js +291 -0
|
@@ -0,0 +1,1155 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* LLM Service
|
|
3
|
+
*
|
|
4
|
+
* Provides a clean interface for LLM interactions with proper error handling,
|
|
5
|
+
* retry logic, token management, and cost tracking.
|
|
6
|
+
*/
|
|
7
|
+
import { writeFile, mkdir } from 'node:fs/promises';
|
|
8
|
+
import { join } from 'node:path';
|
|
9
|
+
import logger from '../../utils/logger.js';
|
|
10
|
+
// ============================================================================
|
|
11
|
+
// CLAUDE CODE PROVIDER (uses local `claude` CLI, no API key required)
|
|
12
|
+
// ============================================================================
|
|
13
|
+
/**
|
|
14
|
+
* Claude Code CLI provider
|
|
15
|
+
*
|
|
16
|
+
* Routes LLM calls through the local `claude` CLI binary in non-interactive
|
|
17
|
+
* mode (`claude -p ...`). Authentication is handled by the Claude Code session
|
|
18
|
+
* (Max/Pro subscription) — no ANTHROPIC_API_KEY is required.
|
|
19
|
+
*/
|
|
20
|
+
export class ClaudeCodeProvider {
|
|
21
|
+
name = 'claude-code';
|
|
22
|
+
maxContextTokens = 200_000;
|
|
23
|
+
maxOutputTokens = 16_000;
|
|
24
|
+
model;
|
|
25
|
+
constructor(model) {
|
|
26
|
+
// Only pass --model if it looks like a Claude model name.
|
|
27
|
+
// Ignore the sentinel 'claude-code' string and non-Claude model names
|
|
28
|
+
// (e.g. 'mistral-large-latest' from a shared config).
|
|
29
|
+
this.model = model && model !== 'claude-code' && model.startsWith('claude-') ? model : undefined;
|
|
30
|
+
}
|
|
31
|
+
async generateCompletion(request) {
|
|
32
|
+
const { execFileSync } = await import('child_process');
|
|
33
|
+
// Claude Code CLI takes a single prompt; combine system + user prompts
|
|
34
|
+
const fullPrompt = request.systemPrompt
|
|
35
|
+
? `${request.systemPrompt}\n\n---\n\n${request.userPrompt}`
|
|
36
|
+
: request.userPrompt;
|
|
37
|
+
const args = ['-p', fullPrompt, '--output-format', 'json'];
|
|
38
|
+
if (this.model)
|
|
39
|
+
args.push('--model', this.model);
|
|
40
|
+
// Remove Claude Code session env vars so the CLI can run inside an existing session
|
|
41
|
+
const env = { ...process.env };
|
|
42
|
+
delete env.CLAUDECODE;
|
|
43
|
+
delete env.CLAUDE_CODE_ENTRYPOINT;
|
|
44
|
+
delete env.CLAUDE_CODE_SSE_PORT;
|
|
45
|
+
delete env.CLAUDE_CODE_IDE_PORT;
|
|
46
|
+
let raw;
|
|
47
|
+
try {
|
|
48
|
+
raw = execFileSync('claude', args, {
|
|
49
|
+
encoding: 'utf8',
|
|
50
|
+
maxBuffer: 50 * 1024 * 1024, // 50 MB
|
|
51
|
+
timeout: 300_000, // 5 minutes
|
|
52
|
+
env,
|
|
53
|
+
});
|
|
54
|
+
}
|
|
55
|
+
catch (err) {
|
|
56
|
+
const e = err;
|
|
57
|
+
const detail = e.stderr || e.stdout || e.message || String(err);
|
|
58
|
+
throw Object.assign(new Error(`claude CLI failed: ${detail}`), { retryable: false });
|
|
59
|
+
}
|
|
60
|
+
let parsed;
|
|
61
|
+
try {
|
|
62
|
+
parsed = JSON.parse(raw);
|
|
63
|
+
}
|
|
64
|
+
catch {
|
|
65
|
+
throw new Error(`claude CLI returned non-JSON output: ${raw.slice(0, 200)}`);
|
|
66
|
+
}
|
|
67
|
+
if (parsed.is_error) {
|
|
68
|
+
throw Object.assign(new Error(`claude CLI error: ${parsed.result}`), { retryable: false });
|
|
69
|
+
}
|
|
70
|
+
const inputTokens = parsed.usage?.input_tokens ?? estimateTokens(fullPrompt);
|
|
71
|
+
const outputTokens = parsed.usage?.output_tokens ?? estimateTokens(parsed.result ?? '');
|
|
72
|
+
return {
|
|
73
|
+
content: parsed.result ?? '',
|
|
74
|
+
usage: { inputTokens, outputTokens, totalTokens: inputTokens + outputTokens },
|
|
75
|
+
model: this.model ?? 'claude-code',
|
|
76
|
+
finishReason: 'stop',
|
|
77
|
+
};
|
|
78
|
+
}
|
|
79
|
+
countTokens(text) {
|
|
80
|
+
return estimateTokens(text);
|
|
81
|
+
}
|
|
82
|
+
}
|
|
83
|
+
// ============================================================================
|
|
84
|
+
// MISTRAL VIBE PROVIDER (uses local `mistral-vibe` CLI, no API key required)
|
|
85
|
+
// ============================================================================
|
|
86
|
+
/**
|
|
87
|
+
* Mistral Vibe CLI provider
|
|
88
|
+
*
|
|
89
|
+
* Routes LLM calls through the local `mistral-vibe` CLI binary (standalone, no npm).
|
|
90
|
+
* No API key required — uses local LLM execution.
|
|
91
|
+
* If the binary is not on PATH, set MISTRAL_VIBE_CLI to its full path.
|
|
92
|
+
* The CLI is invoked as `vibe` (not `mistral-vibe`).
|
|
93
|
+
*/
|
|
94
|
+
export class MistralVibeProvider {
|
|
95
|
+
name = 'mistral-vibe';
|
|
96
|
+
maxContextTokens = 128_000;
|
|
97
|
+
maxOutputTokens = 4_096;
|
|
98
|
+
model;
|
|
99
|
+
constructor(model) {
|
|
100
|
+
// Ignore the sentinel 'mistral-vibe' string — let the CLI pick the default
|
|
101
|
+
this.model = model && model !== 'mistral-vibe' ? model : undefined;
|
|
102
|
+
}
|
|
103
|
+
async generateCompletion(request) {
|
|
104
|
+
const { execFileSync } = await import('child_process');
|
|
105
|
+
// Mistral Vibe CLI takes a single prompt; combine system + user prompts
|
|
106
|
+
const fullPrompt = request.systemPrompt
|
|
107
|
+
? `${request.systemPrompt}\n\n---\n\n${request.userPrompt}`
|
|
108
|
+
: request.userPrompt;
|
|
109
|
+
// vibe CLI: -p for prompt, --output json for JSON, --agent for model/agent name
|
|
110
|
+
const args = ['-p', fullPrompt, '--output', 'json'];
|
|
111
|
+
if (this.model)
|
|
112
|
+
args.push('--agent', this.model);
|
|
113
|
+
// Use MISTRAL_VIBE_CLI if set (standalone install not on PATH), else 'vibe'
|
|
114
|
+
const mistralVibeBin = process.env.MISTRAL_VIBE_CLI ?? 'vibe';
|
|
115
|
+
let raw;
|
|
116
|
+
try {
|
|
117
|
+
raw = execFileSync(mistralVibeBin, args, {
|
|
118
|
+
encoding: 'utf8',
|
|
119
|
+
maxBuffer: 50 * 1024 * 1024, // 50 MB
|
|
120
|
+
timeout: 300_000, // 5 minutes
|
|
121
|
+
});
|
|
122
|
+
}
|
|
123
|
+
catch (err) {
|
|
124
|
+
const e = err;
|
|
125
|
+
const detail = e.stderr ?? e.stdout ?? e.message ?? String(err);
|
|
126
|
+
throw Object.assign(new Error(`mistral-vibe CLI failed: ${detail}`), { retryable: false });
|
|
127
|
+
}
|
|
128
|
+
// Defensive parsing: vibe --output json format is undocumented.
|
|
129
|
+
// Try multiple known shapes before falling back to raw text.
|
|
130
|
+
let content = '';
|
|
131
|
+
let inputTokens;
|
|
132
|
+
let outputTokens;
|
|
133
|
+
try {
|
|
134
|
+
const parsed = JSON.parse(raw);
|
|
135
|
+
if (Array.isArray(parsed)) {
|
|
136
|
+
// Shape: [{role, content}, ...] — "all messages at end"
|
|
137
|
+
const msgs = parsed;
|
|
138
|
+
const lastAssistant = [...msgs].reverse().find(m => m.role === 'assistant');
|
|
139
|
+
content = String(lastAssistant?.content ?? '');
|
|
140
|
+
}
|
|
141
|
+
else if (typeof parsed === 'object' && parsed !== null) {
|
|
142
|
+
const p = parsed;
|
|
143
|
+
// Shape: {result: string, usage?: {...}} — Claude Code-style
|
|
144
|
+
if (typeof p.result === 'string') {
|
|
145
|
+
content = p.result;
|
|
146
|
+
const u = p.usage;
|
|
147
|
+
inputTokens = u?.input_tokens;
|
|
148
|
+
outputTokens = u?.output_tokens;
|
|
149
|
+
// Shape: {message: string} or {text: string} or {content: string}
|
|
150
|
+
}
|
|
151
|
+
else {
|
|
152
|
+
content = String(p.message ?? p.text ?? p.content ?? '');
|
|
153
|
+
}
|
|
154
|
+
}
|
|
155
|
+
}
|
|
156
|
+
catch {
|
|
157
|
+
// non-JSON output: use raw text
|
|
158
|
+
}
|
|
159
|
+
if (!content)
|
|
160
|
+
content = raw.trim();
|
|
161
|
+
inputTokens ??= estimateTokens(fullPrompt);
|
|
162
|
+
outputTokens ??= estimateTokens(content);
|
|
163
|
+
return {
|
|
164
|
+
content,
|
|
165
|
+
usage: { inputTokens, outputTokens, totalTokens: inputTokens + outputTokens },
|
|
166
|
+
model: this.model ?? 'mistral-vibe',
|
|
167
|
+
finishReason: 'stop',
|
|
168
|
+
};
|
|
169
|
+
}
|
|
170
|
+
countTokens(text) {
|
|
171
|
+
return estimateTokens(text);
|
|
172
|
+
}
|
|
173
|
+
}
|
|
174
|
+
// ============================================================================
|
|
175
|
+
// SSL / FETCH HELPERS
|
|
176
|
+
// ============================================================================
|
|
177
|
+
/**
|
|
178
|
+
* Disable TLS certificate verification for all fetch requests in this process.
|
|
179
|
+
*
|
|
180
|
+
* Node.js native fetch does not support per-request TLS configuration.
|
|
181
|
+
* The only reliable cross-version approach is the NODE_TLS_REJECT_UNAUTHORIZED
|
|
182
|
+
* environment variable, which is process-global. This is set once and logged
|
|
183
|
+
* prominently so the user is aware.
|
|
184
|
+
*/
|
|
185
|
+
function disableSslVerification() {
|
|
186
|
+
if (process.env.NODE_TLS_REJECT_UNAUTHORIZED === '0')
|
|
187
|
+
return; // already disabled
|
|
188
|
+
process.env.NODE_TLS_REJECT_UNAUTHORIZED = '0';
|
|
189
|
+
// Warn prominently: this is process-global and affects all fetch calls.
|
|
190
|
+
console.warn('[spec-gen] WARNING: TLS certificate verification is DISABLED for this process.' +
|
|
191
|
+
' All HTTPS connections (including LLM API calls) are vulnerable to MITM attacks.' +
|
|
192
|
+
' Only use --insecure on trusted private networks with self-signed certificates.');
|
|
193
|
+
}
|
|
194
|
+
/**
|
|
195
|
+
* Validate and normalise an API base URL.
|
|
196
|
+
* Returns the cleaned URL or throws on invalid input.
|
|
197
|
+
*/
|
|
198
|
+
function normalizeApiBase(url) {
|
|
199
|
+
// Must be a valid, absolute URL
|
|
200
|
+
let parsed;
|
|
201
|
+
try {
|
|
202
|
+
parsed = new URL(url);
|
|
203
|
+
}
|
|
204
|
+
catch {
|
|
205
|
+
throw new Error(`Invalid API base URL: "${url}". Must be a valid URL (e.g., http://localhost:8000/v1).`);
|
|
206
|
+
}
|
|
207
|
+
// Only allow http and https schemes
|
|
208
|
+
if (parsed.protocol !== 'http:' && parsed.protocol !== 'https:') {
|
|
209
|
+
throw new Error(`Unsupported protocol in API base URL: "${parsed.protocol}". Only http and https are allowed.`);
|
|
210
|
+
}
|
|
211
|
+
// Strip trailing slashes for consistent path joining
|
|
212
|
+
return parsed.toString().replace(/\/+$/, '');
|
|
213
|
+
}
|
|
214
|
+
// ============================================================================
|
|
215
|
+
// RETRY-AFTER PARSING
|
|
216
|
+
// ============================================================================
|
|
217
|
+
/**
|
|
218
|
+
* Parse the number of milliseconds to wait before retrying a 429 response.
|
|
219
|
+
*
|
|
220
|
+
* Checks (in order):
|
|
221
|
+
* 1. Standard `Retry-After` HTTP header (seconds as integer, or HTTP-date)
|
|
222
|
+
* 2. `Limit resets at: YYYY-MM-DD HH:MM:SS UTC` in the response body
|
|
223
|
+
*
|
|
224
|
+
* Returns `undefined` when nothing useful is found so the caller can fall back
|
|
225
|
+
* to its own exponential-backoff delay.
|
|
226
|
+
*/
|
|
227
|
+
export function parseRetryAfterMs(body, retryAfterHeader) {
|
|
228
|
+
const BUFFER_MS = 500; // small buffer to avoid hitting the wall again immediately
|
|
229
|
+
// 1. Retry-After header
|
|
230
|
+
if (retryAfterHeader) {
|
|
231
|
+
const seconds = Number(retryAfterHeader);
|
|
232
|
+
if (!isNaN(seconds) && seconds > 0) {
|
|
233
|
+
return Math.ceil(seconds * 1000) + BUFFER_MS;
|
|
234
|
+
}
|
|
235
|
+
// HTTP-date format
|
|
236
|
+
const headerDate = Date.parse(retryAfterHeader);
|
|
237
|
+
if (!isNaN(headerDate)) {
|
|
238
|
+
const ms = headerDate - Date.now();
|
|
239
|
+
if (ms > 0)
|
|
240
|
+
return ms + BUFFER_MS;
|
|
241
|
+
}
|
|
242
|
+
}
|
|
243
|
+
// 2. "Limit resets at: YYYY-MM-DD HH:MM:SS UTC" in body
|
|
244
|
+
const match = body.match(/Limit resets at:\s*(\d{4}-\d{2}-\d{2}[T ]\d{2}:\d{2}:\d{2}(?:\.\d+)?\s*UTC)/i);
|
|
245
|
+
if (match) {
|
|
246
|
+
const resetMs = Date.parse(match[1].replace(' UTC', 'Z').replace(' ', 'T'));
|
|
247
|
+
if (!isNaN(resetMs)) {
|
|
248
|
+
const ms = resetMs - Date.now();
|
|
249
|
+
if (ms > 0)
|
|
250
|
+
return ms + BUFFER_MS;
|
|
251
|
+
}
|
|
252
|
+
}
|
|
253
|
+
return undefined;
|
|
254
|
+
}
|
|
255
|
+
// ============================================================================
|
|
256
|
+
// PRICING (per 1M tokens)
|
|
257
|
+
// ============================================================================
|
|
258
|
+
const PRICING = {
|
|
259
|
+
anthropic: {
|
|
260
|
+
// Claude 4 family
|
|
261
|
+
'claude-opus-4': { input: 15.0, output: 75.0 },
|
|
262
|
+
'claude-sonnet-4': { input: 3.0, output: 15.0 },
|
|
263
|
+
'claude-haiku-4': { input: 0.80, output: 4.0 },
|
|
264
|
+
// Claude 3.7 / 3.5
|
|
265
|
+
'claude-3-7-sonnet': { input: 3.0, output: 15.0 },
|
|
266
|
+
'claude-3-5-sonnet': { input: 3.0, output: 15.0 },
|
|
267
|
+
'claude-3-5-haiku': { input: 0.80, output: 4.0 },
|
|
268
|
+
// Claude 3 (legacy)
|
|
269
|
+
'claude-3-opus': { input: 15.0, output: 75.0 },
|
|
270
|
+
'claude-3-sonnet': { input: 3.0, output: 15.0 },
|
|
271
|
+
'claude-3-haiku': { input: 0.25, output: 1.25 },
|
|
272
|
+
// Fallback: assume Sonnet-class pricing
|
|
273
|
+
default: { input: 3.0, output: 15.0 },
|
|
274
|
+
},
|
|
275
|
+
openai: {
|
|
276
|
+
// GPT-4o family
|
|
277
|
+
'gpt-4o': { input: 2.5, output: 10.0 },
|
|
278
|
+
'gpt-4o-mini': { input: 0.15, output: 0.6 },
|
|
279
|
+
// o-series reasoning models
|
|
280
|
+
'o1': { input: 15.0, output: 60.0 },
|
|
281
|
+
'o1-mini': { input: 3.0, output: 12.0 },
|
|
282
|
+
'o3': { input: 10.0, output: 40.0 },
|
|
283
|
+
'o3-mini': { input: 1.1, output: 4.4 },
|
|
284
|
+
'o4-mini': { input: 1.1, output: 4.4 },
|
|
285
|
+
// Legacy (still in use)
|
|
286
|
+
'gpt-4-turbo': { input: 10.0, output: 30.0 },
|
|
287
|
+
'gpt-4': { input: 30.0, output: 60.0 },
|
|
288
|
+
'gpt-3.5-turbo': { input: 0.5, output: 1.5 },
|
|
289
|
+
default: { input: 2.5, output: 10.0 },
|
|
290
|
+
},
|
|
291
|
+
'openai-compat': {
|
|
292
|
+
// Mistral
|
|
293
|
+
'mistral-large-latest': { input: 2.0, output: 6.0 },
|
|
294
|
+
'mistral-small-latest': { input: 0.1, output: 0.3 },
|
|
295
|
+
'codestral-latest': { input: 0.2, output: 0.6 },
|
|
296
|
+
// Groq
|
|
297
|
+
'llama-3.3-70b-versatile': { input: 0.59, output: 0.79 },
|
|
298
|
+
'llama-3.1-8b-instant': { input: 0.05, output: 0.08 },
|
|
299
|
+
default: { input: 1.0, output: 3.0 },
|
|
300
|
+
},
|
|
301
|
+
gemini: {
|
|
302
|
+
'gemini-2.0-flash': { input: 0.1, output: 0.4 },
|
|
303
|
+
'gemini-2.0-flash-lite': { input: 0.075, output: 0.3 },
|
|
304
|
+
'gemini-2.5-pro': { input: 1.25, output: 10.0 },
|
|
305
|
+
'gemini-1.5-pro': { input: 1.25, output: 5.0 },
|
|
306
|
+
'gemini-1.5-flash': { input: 0.075, output: 0.3 },
|
|
307
|
+
default: { input: 0.1, output: 0.4 },
|
|
308
|
+
},
|
|
309
|
+
'claude-code': {
|
|
310
|
+
// No per-token cost: covered by Claude Max/Pro subscription
|
|
311
|
+
default: { input: 0, output: 0 },
|
|
312
|
+
},
|
|
313
|
+
'mistral-vibe': {
|
|
314
|
+
// No per-token cost: local CLI tool
|
|
315
|
+
default: { input: 0, output: 0 },
|
|
316
|
+
},
|
|
317
|
+
'gemini-cli': {
|
|
318
|
+
// No per-token cost: covered by Google account free tier
|
|
319
|
+
default: { input: 0, output: 0 },
|
|
320
|
+
},
|
|
321
|
+
};
|
|
322
|
+
/**
|
|
323
|
+
* Exported for use in pre-flight cost estimation.
|
|
324
|
+
* Look up pricing for a model ID using prefix/family matching.
|
|
325
|
+
* Exact match first, then longest prefix match, then provider default.
|
|
326
|
+
*
|
|
327
|
+
* This is robust to minor version suffixes like "claude-sonnet-4-6-20251120"
|
|
328
|
+
* matching the "claude-sonnet-4" family entry.
|
|
329
|
+
*/
|
|
330
|
+
export function lookupPricing(providerName, modelId) {
|
|
331
|
+
const table = PRICING[providerName] ?? PRICING.anthropic;
|
|
332
|
+
// 1. Exact match
|
|
333
|
+
if (table[modelId])
|
|
334
|
+
return table[modelId];
|
|
335
|
+
// 2. Longest prefix match (handles "claude-sonnet-4-6-20251120" → "claude-sonnet-4")
|
|
336
|
+
const modelLower = modelId.toLowerCase();
|
|
337
|
+
let bestKey = '';
|
|
338
|
+
for (const key of Object.keys(table)) {
|
|
339
|
+
if (key === 'default')
|
|
340
|
+
continue;
|
|
341
|
+
if (modelLower.startsWith(key) && key.length > bestKey.length) {
|
|
342
|
+
bestKey = key;
|
|
343
|
+
}
|
|
344
|
+
}
|
|
345
|
+
if (bestKey)
|
|
346
|
+
return table[bestKey];
|
|
347
|
+
// 3. Provider default
|
|
348
|
+
return table.default ?? { input: 3.0, output: 15.0 };
|
|
349
|
+
}
|
|
350
|
+
// ============================================================================
|
|
351
|
+
// TOKEN ESTIMATION
|
|
352
|
+
// ============================================================================
|
|
353
|
+
/**
|
|
354
|
+
* Estimate token count from text (rough approximation)
|
|
355
|
+
* ~4 characters per token for English text
|
|
356
|
+
*/
|
|
357
|
+
export function estimateTokens(text) {
|
|
358
|
+
// More accurate estimation considering code
|
|
359
|
+
// Code tends to have more tokens per character due to special chars
|
|
360
|
+
const codePatterns = /[{}()[\];:,.<>/\\|`~!@#$%^&*=+]/g;
|
|
361
|
+
const codeCharCount = (text.match(codePatterns) || []).length;
|
|
362
|
+
const regularCharCount = text.length - codeCharCount;
|
|
363
|
+
// Regular text: ~4 chars per token, code chars: ~2 chars per token
|
|
364
|
+
return Math.ceil(regularCharCount / 4 + codeCharCount / 2);
|
|
365
|
+
}
|
|
366
|
+
// ============================================================================
|
|
367
|
+
// ANTHROPIC PROVIDER
|
|
368
|
+
// ============================================================================
|
|
369
|
+
/**
|
|
370
|
+
* Anthropic Claude provider
|
|
371
|
+
*/
|
|
372
|
+
export class AnthropicProvider {
|
|
373
|
+
name = 'anthropic';
|
|
374
|
+
maxContextTokens = 200000;
|
|
375
|
+
maxOutputTokens = 4096;
|
|
376
|
+
apiKey;
|
|
377
|
+
model;
|
|
378
|
+
baseUrl;
|
|
379
|
+
constructor(apiKey, model = 'claude-3-5-sonnet-20241022', baseUrl, sslVerify = true) {
|
|
380
|
+
this.apiKey = apiKey;
|
|
381
|
+
this.model = model;
|
|
382
|
+
this.baseUrl = baseUrl ? normalizeApiBase(baseUrl) : 'https://api.anthropic.com/v1';
|
|
383
|
+
if (!sslVerify)
|
|
384
|
+
disableSslVerification();
|
|
385
|
+
}
|
|
386
|
+
countTokens(text) {
|
|
387
|
+
return estimateTokens(text);
|
|
388
|
+
}
|
|
389
|
+
async generateCompletion(request) {
|
|
390
|
+
const response = await fetch(`${this.baseUrl}/messages`, {
|
|
391
|
+
method: 'POST',
|
|
392
|
+
headers: {
|
|
393
|
+
'Content-Type': 'application/json',
|
|
394
|
+
'x-api-key': this.apiKey,
|
|
395
|
+
'anthropic-version': '2023-06-01',
|
|
396
|
+
},
|
|
397
|
+
body: JSON.stringify({
|
|
398
|
+
model: this.model,
|
|
399
|
+
max_tokens: request.maxTokens ?? this.maxOutputTokens,
|
|
400
|
+
temperature: request.temperature ?? 0.3,
|
|
401
|
+
system: request.systemPrompt,
|
|
402
|
+
messages: [
|
|
403
|
+
{ role: 'user', content: request.userPrompt },
|
|
404
|
+
],
|
|
405
|
+
stop_sequences: request.stopSequences,
|
|
406
|
+
}),
|
|
407
|
+
});
|
|
408
|
+
if (!response.ok) {
|
|
409
|
+
const error = await response.text();
|
|
410
|
+
const errorObj = this.parseError(error, response.status, response.headers.get('retry-after'));
|
|
411
|
+
throw errorObj;
|
|
412
|
+
}
|
|
413
|
+
const data = await response.json();
|
|
414
|
+
const content = data.content
|
|
415
|
+
.filter(c => c.type === 'text')
|
|
416
|
+
.map(c => c.text)
|
|
417
|
+
.join('');
|
|
418
|
+
return {
|
|
419
|
+
content,
|
|
420
|
+
usage: {
|
|
421
|
+
inputTokens: data.usage.input_tokens,
|
|
422
|
+
outputTokens: data.usage.output_tokens,
|
|
423
|
+
totalTokens: data.usage.input_tokens + data.usage.output_tokens,
|
|
424
|
+
},
|
|
425
|
+
model: data.model,
|
|
426
|
+
finishReason: data.stop_reason === 'end_turn' ? 'stop' : data.stop_reason === 'max_tokens' ? 'length' : 'error',
|
|
427
|
+
};
|
|
428
|
+
}
|
|
429
|
+
parseError(error, status, retryAfterHeader) {
|
|
430
|
+
const err = new Error(error);
|
|
431
|
+
err.status = status;
|
|
432
|
+
err.retryable = status === 429 || status >= 500;
|
|
433
|
+
if (status === 429) {
|
|
434
|
+
err.retryAfterMs = parseRetryAfterMs(error, retryAfterHeader);
|
|
435
|
+
}
|
|
436
|
+
return err;
|
|
437
|
+
}
|
|
438
|
+
}
|
|
439
|
+
// ============================================================================
|
|
440
|
+
// OPENAI PROVIDER
|
|
441
|
+
// ============================================================================
|
|
442
|
+
/**
|
|
443
|
+
* OpenAI provider
|
|
444
|
+
*/
|
|
445
|
+
export class OpenAIProvider {
|
|
446
|
+
name = 'openai';
|
|
447
|
+
maxContextTokens = 128000;
|
|
448
|
+
maxOutputTokens = 4096;
|
|
449
|
+
apiKey;
|
|
450
|
+
model;
|
|
451
|
+
baseUrl;
|
|
452
|
+
constructor(apiKey, model = 'gpt-4o', baseUrl, sslVerify = true) {
|
|
453
|
+
this.apiKey = apiKey;
|
|
454
|
+
this.model = model;
|
|
455
|
+
this.baseUrl = baseUrl ? normalizeApiBase(baseUrl) : 'https://api.openai.com/v1';
|
|
456
|
+
if (!sslVerify)
|
|
457
|
+
disableSslVerification();
|
|
458
|
+
}
|
|
459
|
+
countTokens(text) {
|
|
460
|
+
return estimateTokens(text);
|
|
461
|
+
}
|
|
462
|
+
async generateCompletion(request) {
|
|
463
|
+
const messages = [
|
|
464
|
+
{ role: 'system', content: request.systemPrompt },
|
|
465
|
+
{ role: 'user', content: request.userPrompt },
|
|
466
|
+
];
|
|
467
|
+
const body = {
|
|
468
|
+
model: this.model,
|
|
469
|
+
messages,
|
|
470
|
+
max_tokens: request.maxTokens ?? this.maxOutputTokens,
|
|
471
|
+
temperature: request.temperature ?? 0.3,
|
|
472
|
+
stop: request.stopSequences,
|
|
473
|
+
};
|
|
474
|
+
if (request.responseFormat === 'json') {
|
|
475
|
+
body.response_format = { type: 'json_object' };
|
|
476
|
+
}
|
|
477
|
+
const response = await fetch(`${this.baseUrl}/chat/completions`, {
|
|
478
|
+
method: 'POST',
|
|
479
|
+
headers: {
|
|
480
|
+
'Content-Type': 'application/json',
|
|
481
|
+
'Authorization': `Bearer ${this.apiKey}`,
|
|
482
|
+
},
|
|
483
|
+
body: JSON.stringify(body),
|
|
484
|
+
});
|
|
485
|
+
if (!response.ok) {
|
|
486
|
+
const error = await response.text();
|
|
487
|
+
const errorObj = this.parseError(error, response.status, response.headers.get('retry-after'));
|
|
488
|
+
throw errorObj;
|
|
489
|
+
}
|
|
490
|
+
const data = await response.json();
|
|
491
|
+
return {
|
|
492
|
+
content: data.choices[0]?.message?.content ?? '',
|
|
493
|
+
usage: {
|
|
494
|
+
inputTokens: data.usage.prompt_tokens,
|
|
495
|
+
outputTokens: data.usage.completion_tokens,
|
|
496
|
+
totalTokens: data.usage.total_tokens,
|
|
497
|
+
},
|
|
498
|
+
model: data.model,
|
|
499
|
+
finishReason: data.choices[0]?.finish_reason === 'stop' ? 'stop' : data.choices[0]?.finish_reason === 'length' ? 'length' : 'error',
|
|
500
|
+
};
|
|
501
|
+
}
|
|
502
|
+
parseError(error, status, retryAfterHeader) {
|
|
503
|
+
const err = new Error(error);
|
|
504
|
+
err.status = status;
|
|
505
|
+
err.retryable = status === 429 || status >= 500;
|
|
506
|
+
if (status === 429) {
|
|
507
|
+
err.retryAfterMs = parseRetryAfterMs(error, retryAfterHeader);
|
|
508
|
+
}
|
|
509
|
+
return err;
|
|
510
|
+
}
|
|
511
|
+
}
|
|
512
|
+
// ============================================================================
|
|
513
|
+
// OPENAI-COMPATIBLE PROVIDER
|
|
514
|
+
// ============================================================================
|
|
515
|
+
/**
|
|
516
|
+
* Generic OpenAI-compatible provider.
|
|
517
|
+
* Works with any API that implements the OpenAI chat completions format:
|
|
518
|
+
* Mistral AI, Groq, Together AI, Ollama, LM Studio, etc.
|
|
519
|
+
*
|
|
520
|
+
* Required env vars:
|
|
521
|
+
* OPENAI_COMPAT_API_KEY — API key (use "ollama" for local setups without auth)
|
|
522
|
+
* OPENAI_COMPAT_BASE_URL — Base URL, e.g. https://api.mistral.ai/v1
|
|
523
|
+
*/
|
|
524
|
+
export class OpenAICompatibleProvider {
|
|
525
|
+
name = 'openai-compat';
|
|
526
|
+
maxContextTokens = 128000;
|
|
527
|
+
maxOutputTokens = 4096;
|
|
528
|
+
apiKey;
|
|
529
|
+
model;
|
|
530
|
+
baseUrl;
|
|
531
|
+
constructor(apiKey, baseUrl, model = 'mistral-large-latest') {
|
|
532
|
+
this.apiKey = apiKey;
|
|
533
|
+
this.baseUrl = baseUrl.replace(/\/$/, '');
|
|
534
|
+
this.model = model;
|
|
535
|
+
}
|
|
536
|
+
countTokens(text) {
|
|
537
|
+
return estimateTokens(text);
|
|
538
|
+
}
|
|
539
|
+
async generateCompletion(request) {
|
|
540
|
+
const body = {
|
|
541
|
+
model: this.model,
|
|
542
|
+
messages: [
|
|
543
|
+
{ role: 'system', content: request.systemPrompt },
|
|
544
|
+
{ role: 'user', content: request.userPrompt },
|
|
545
|
+
],
|
|
546
|
+
max_tokens: request.maxTokens ?? this.maxOutputTokens,
|
|
547
|
+
temperature: request.temperature ?? 0.3,
|
|
548
|
+
...(request.stopSequences && { stop: request.stopSequences }),
|
|
549
|
+
};
|
|
550
|
+
if (request.responseFormat === 'json') {
|
|
551
|
+
body.response_format = { type: 'json_object' };
|
|
552
|
+
}
|
|
553
|
+
const response = await fetch(`${this.baseUrl}/chat/completions`, {
|
|
554
|
+
method: 'POST',
|
|
555
|
+
headers: {
|
|
556
|
+
'Content-Type': 'application/json',
|
|
557
|
+
'Authorization': `Bearer ${this.apiKey}`,
|
|
558
|
+
},
|
|
559
|
+
body: JSON.stringify(body),
|
|
560
|
+
});
|
|
561
|
+
if (!response.ok) {
|
|
562
|
+
const error = await response.text();
|
|
563
|
+
const err = new Error(error);
|
|
564
|
+
err.status = response.status;
|
|
565
|
+
err.retryable = response.status === 429 || response.status >= 500;
|
|
566
|
+
if (response.status === 429) {
|
|
567
|
+
err.retryAfterMs = parseRetryAfterMs(error, response.headers.get('retry-after'));
|
|
568
|
+
}
|
|
569
|
+
throw err;
|
|
570
|
+
}
|
|
571
|
+
const data = await response.json();
|
|
572
|
+
return {
|
|
573
|
+
content: data.choices[0]?.message?.content ?? '',
|
|
574
|
+
usage: {
|
|
575
|
+
inputTokens: data.usage.prompt_tokens,
|
|
576
|
+
outputTokens: data.usage.completion_tokens,
|
|
577
|
+
totalTokens: data.usage.total_tokens,
|
|
578
|
+
},
|
|
579
|
+
model: data.model ?? this.model,
|
|
580
|
+
finishReason: data.choices[0]?.finish_reason === 'stop' ? 'stop' : data.choices[0]?.finish_reason === 'length' ? 'length' : 'error',
|
|
581
|
+
};
|
|
582
|
+
}
|
|
583
|
+
}
|
|
584
|
+
// ============================================================================
|
|
585
|
+
// GEMINI CLI PROVIDER (uses local `gemini` CLI, no API key required)
|
|
586
|
+
// ============================================================================
|
|
587
|
+
/**
|
|
588
|
+
* Gemini CLI provider
|
|
589
|
+
*
|
|
590
|
+
* Routes LLM calls through the local `gemini` CLI binary in non-interactive
|
|
591
|
+
* mode (`gemini -p ...`). Authentication is handled by the Google account
|
|
592
|
+
* session — no GEMINI_API_KEY is required.
|
|
593
|
+
* If the binary is not on PATH, set GEMINI_CLI to its full path.
|
|
594
|
+
*/
|
|
595
|
+
export class GeminiCLIProvider {
|
|
596
|
+
name = 'gemini-cli';
|
|
597
|
+
maxContextTokens = 1_000_000;
|
|
598
|
+
maxOutputTokens = 8_192;
|
|
599
|
+
model;
|
|
600
|
+
constructor(model) {
|
|
601
|
+
this.model = model && model !== 'gemini-cli' ? model : undefined;
|
|
602
|
+
}
|
|
603
|
+
async generateCompletion(request) {
|
|
604
|
+
const { execFileSync } = await import('child_process');
|
|
605
|
+
const fullPrompt = request.systemPrompt
|
|
606
|
+
? `${request.systemPrompt}\n\n---\n\n${request.userPrompt}`
|
|
607
|
+
: request.userPrompt;
|
|
608
|
+
// gemini CLI: -p for prompt, --output-format json, -m for model
|
|
609
|
+
const args = ['-p', fullPrompt, '--output-format', 'json'];
|
|
610
|
+
if (this.model)
|
|
611
|
+
args.push('-m', this.model);
|
|
612
|
+
const geminiCLIBin = process.env.GEMINI_CLI ?? 'gemini';
|
|
613
|
+
let raw;
|
|
614
|
+
try {
|
|
615
|
+
raw = execFileSync(geminiCLIBin, args, {
|
|
616
|
+
encoding: 'utf8',
|
|
617
|
+
maxBuffer: 50 * 1024 * 1024,
|
|
618
|
+
timeout: 300_000,
|
|
619
|
+
});
|
|
620
|
+
}
|
|
621
|
+
catch (err) {
|
|
622
|
+
const e = err;
|
|
623
|
+
const detail = e.stderr ?? e.stdout ?? e.message ?? String(err);
|
|
624
|
+
throw Object.assign(new Error(`gemini CLI failed: ${detail}`), { retryable: false });
|
|
625
|
+
}
|
|
626
|
+
// Format: {response: string, stats: {models: {[name]: {tokens: {input, candidates, total}}}}}
|
|
627
|
+
let content = '';
|
|
628
|
+
let inputTokens;
|
|
629
|
+
let outputTokens;
|
|
630
|
+
let modelUsed = this.model ?? 'gemini-cli';
|
|
631
|
+
try {
|
|
632
|
+
const parsed = JSON.parse(raw);
|
|
633
|
+
content = parsed.response ?? '';
|
|
634
|
+
if (parsed.stats?.models) {
|
|
635
|
+
const models = Object.entries(parsed.stats.models);
|
|
636
|
+
if (models.length > 0) {
|
|
637
|
+
modelUsed = models[0][0];
|
|
638
|
+
// Sum tokens across all models used (gemini-cli may use multiple internally)
|
|
639
|
+
inputTokens = models.reduce((sum, [, m]) => sum + (m.tokens?.input ?? 0), 0);
|
|
640
|
+
outputTokens = models.reduce((sum, [, m]) => sum + (m.tokens?.candidates ?? 0), 0);
|
|
641
|
+
}
|
|
642
|
+
}
|
|
643
|
+
}
|
|
644
|
+
catch {
|
|
645
|
+
content = raw.trim();
|
|
646
|
+
}
|
|
647
|
+
if (!content)
|
|
648
|
+
content = raw.trim();
|
|
649
|
+
inputTokens ??= estimateTokens(fullPrompt);
|
|
650
|
+
outputTokens ??= estimateTokens(content);
|
|
651
|
+
return {
|
|
652
|
+
content,
|
|
653
|
+
usage: { inputTokens, outputTokens, totalTokens: inputTokens + outputTokens },
|
|
654
|
+
model: modelUsed,
|
|
655
|
+
finishReason: 'stop',
|
|
656
|
+
};
|
|
657
|
+
}
|
|
658
|
+
countTokens(text) {
|
|
659
|
+
return estimateTokens(text);
|
|
660
|
+
}
|
|
661
|
+
}
|
|
662
|
+
// ============================================================================
|
|
663
|
+
// GEMINI PROVIDER
|
|
664
|
+
// ============================================================================
|
|
665
|
+
/**
|
|
666
|
+
* Google Gemini provider
|
|
667
|
+
*/
|
|
668
|
+
export class GeminiProvider {
|
|
669
|
+
name = 'gemini';
|
|
670
|
+
maxContextTokens = 1000000;
|
|
671
|
+
maxOutputTokens = 8192;
|
|
672
|
+
apiKey;
|
|
673
|
+
model;
|
|
674
|
+
baseUrl = 'https://generativelanguage.googleapis.com/v1beta/models';
|
|
675
|
+
constructor(apiKey, model = 'gemini-2.0-flash') {
|
|
676
|
+
this.apiKey = apiKey;
|
|
677
|
+
this.model = model;
|
|
678
|
+
}
|
|
679
|
+
countTokens(text) {
|
|
680
|
+
return estimateTokens(text);
|
|
681
|
+
}
|
|
682
|
+
async generateCompletion(request) {
|
|
683
|
+
const body = {
|
|
684
|
+
contents: [
|
|
685
|
+
{ role: 'user', parts: [{ text: request.userPrompt }] },
|
|
686
|
+
],
|
|
687
|
+
systemInstruction: {
|
|
688
|
+
parts: [{ text: request.systemPrompt }],
|
|
689
|
+
},
|
|
690
|
+
generationConfig: {
|
|
691
|
+
temperature: request.temperature ?? 0.3,
|
|
692
|
+
maxOutputTokens: request.maxTokens ?? this.maxOutputTokens,
|
|
693
|
+
...(request.responseFormat === 'json' && { responseMimeType: 'application/json' }),
|
|
694
|
+
...(request.stopSequences && { stopSequences: request.stopSequences }),
|
|
695
|
+
},
|
|
696
|
+
};
|
|
697
|
+
const url = `${this.baseUrl}/${this.model}:generateContent?key=${this.apiKey}`;
|
|
698
|
+
const response = await fetch(url, {
|
|
699
|
+
method: 'POST',
|
|
700
|
+
headers: { 'Content-Type': 'application/json' },
|
|
701
|
+
body: JSON.stringify(body),
|
|
702
|
+
});
|
|
703
|
+
if (!response.ok) {
|
|
704
|
+
const error = await response.text();
|
|
705
|
+
const err = new Error(error);
|
|
706
|
+
err.status = response.status;
|
|
707
|
+
err.retryable = response.status === 429 || response.status >= 500;
|
|
708
|
+
if (response.status === 429) {
|
|
709
|
+
err.retryAfterMs = parseRetryAfterMs(error, response.headers.get('retry-after'));
|
|
710
|
+
}
|
|
711
|
+
throw err;
|
|
712
|
+
}
|
|
713
|
+
const data = await response.json();
|
|
714
|
+
const content = data.candidates[0]?.content?.parts?.map(p => p.text).join('') ?? '';
|
|
715
|
+
const finishReason = data.candidates[0]?.finishReason;
|
|
716
|
+
return {
|
|
717
|
+
content,
|
|
718
|
+
usage: {
|
|
719
|
+
inputTokens: data.usageMetadata.promptTokenCount,
|
|
720
|
+
outputTokens: data.usageMetadata.candidatesTokenCount,
|
|
721
|
+
totalTokens: data.usageMetadata.totalTokenCount,
|
|
722
|
+
},
|
|
723
|
+
model: this.model,
|
|
724
|
+
finishReason: finishReason === 'STOP' ? 'stop' : finishReason === 'MAX_TOKENS' ? 'length' : 'error',
|
|
725
|
+
};
|
|
726
|
+
}
|
|
727
|
+
}
|
|
728
|
+
// ============================================================================
|
|
729
|
+
// MOCK PROVIDER (for testing)
|
|
730
|
+
// ============================================================================
|
|
731
|
+
/**
|
|
732
|
+
* Mock provider for testing
|
|
733
|
+
*/
|
|
734
|
+
export class MockLLMProvider {
|
|
735
|
+
name = 'mock';
|
|
736
|
+
maxContextTokens = 100000;
|
|
737
|
+
maxOutputTokens = 4096;
|
|
738
|
+
responses = new Map();
|
|
739
|
+
defaultResponse = '{"result": "mock response"}';
|
|
740
|
+
callHistory = [];
|
|
741
|
+
shouldFail = false;
|
|
742
|
+
failCount = 0;
|
|
743
|
+
currentFailCount = 0;
|
|
744
|
+
setResponse(promptContains, response) {
|
|
745
|
+
this.responses.set(promptContains, response);
|
|
746
|
+
}
|
|
747
|
+
setDefaultResponse(response) {
|
|
748
|
+
this.defaultResponse = response;
|
|
749
|
+
}
|
|
750
|
+
countTokens(text) {
|
|
751
|
+
return estimateTokens(text);
|
|
752
|
+
}
|
|
753
|
+
async generateCompletion(request) {
|
|
754
|
+
this.callHistory.push(request);
|
|
755
|
+
if (this.shouldFail && this.currentFailCount < this.failCount) {
|
|
756
|
+
this.currentFailCount++;
|
|
757
|
+
const err = new Error('Mock failure');
|
|
758
|
+
err.status = 500;
|
|
759
|
+
err.retryable = true;
|
|
760
|
+
throw err;
|
|
761
|
+
}
|
|
762
|
+
// Find matching response
|
|
763
|
+
let content = this.defaultResponse;
|
|
764
|
+
for (const [key, value] of this.responses) {
|
|
765
|
+
if (request.userPrompt.includes(key) || request.systemPrompt.includes(key)) {
|
|
766
|
+
content = value;
|
|
767
|
+
break;
|
|
768
|
+
}
|
|
769
|
+
}
|
|
770
|
+
const inputTokens = this.countTokens(request.systemPrompt + request.userPrompt);
|
|
771
|
+
const outputTokens = this.countTokens(content);
|
|
772
|
+
return {
|
|
773
|
+
content,
|
|
774
|
+
usage: {
|
|
775
|
+
inputTokens,
|
|
776
|
+
outputTokens,
|
|
777
|
+
totalTokens: inputTokens + outputTokens,
|
|
778
|
+
},
|
|
779
|
+
model: 'mock-model',
|
|
780
|
+
finishReason: 'stop',
|
|
781
|
+
};
|
|
782
|
+
}
|
|
783
|
+
reset() {
|
|
784
|
+
this.callHistory = [];
|
|
785
|
+
this.shouldFail = false;
|
|
786
|
+
this.failCount = 0;
|
|
787
|
+
this.currentFailCount = 0;
|
|
788
|
+
this.responses.clear();
|
|
789
|
+
}
|
|
790
|
+
}
|
|
791
|
+
// ============================================================================
|
|
792
|
+
// LLM SERVICE
|
|
793
|
+
// ============================================================================
|
|
794
|
+
/**
|
|
795
|
+
* LLM Service - main interface for LLM interactions
|
|
796
|
+
*/
|
|
797
|
+
export class LLMService {
|
|
798
|
+
provider;
|
|
799
|
+
retryConfig;
|
|
800
|
+
options;
|
|
801
|
+
tokenUsage = { inputTokens: 0, outputTokens: 0, totalTokens: 0, requests: 0 };
|
|
802
|
+
costTracking = { estimatedCost: 0, currency: 'USD', byProvider: {} };
|
|
803
|
+
requestLog = [];
|
|
804
|
+
constructor(provider, options = {}) {
|
|
805
|
+
this.provider = provider;
|
|
806
|
+
this.options = {
|
|
807
|
+
provider: options.provider ?? 'anthropic',
|
|
808
|
+
model: options.model ?? '',
|
|
809
|
+
apiBase: options.apiBase ?? '',
|
|
810
|
+
sslVerify: options.sslVerify ?? true,
|
|
811
|
+
openaiCompatBaseUrl: options.openaiCompatBaseUrl ?? '',
|
|
812
|
+
maxRetries: options.maxRetries ?? 3,
|
|
813
|
+
initialDelay: options.initialDelay ?? 1000,
|
|
814
|
+
maxDelay: options.maxDelay ?? 30000,
|
|
815
|
+
timeout: options.timeout ?? 120000,
|
|
816
|
+
costWarningThreshold: options.costWarningThreshold ?? 10.0,
|
|
817
|
+
logDir: options.logDir ?? '.spec-gen/logs',
|
|
818
|
+
enableLogging: options.enableLogging ?? false,
|
|
819
|
+
};
|
|
820
|
+
this.retryConfig = {
|
|
821
|
+
maxRetries: this.options.maxRetries,
|
|
822
|
+
initialDelay: this.options.initialDelay,
|
|
823
|
+
maxDelay: this.options.maxDelay,
|
|
824
|
+
timeout: this.options.timeout,
|
|
825
|
+
};
|
|
826
|
+
}
|
|
827
|
+
/**
|
|
828
|
+
* Get the provider name
|
|
829
|
+
*/
|
|
830
|
+
getProviderName() {
|
|
831
|
+
return this.provider.name;
|
|
832
|
+
}
|
|
833
|
+
/**
|
|
834
|
+
* Get maximum context tokens for the provider
|
|
835
|
+
*/
|
|
836
|
+
getMaxContextTokens() {
|
|
837
|
+
return this.provider.maxContextTokens;
|
|
838
|
+
}
|
|
839
|
+
/**
|
|
840
|
+
* Count tokens in text
|
|
841
|
+
*/
|
|
842
|
+
countTokens(text) {
|
|
843
|
+
return this.provider.countTokens(text);
|
|
844
|
+
}
|
|
845
|
+
/**
|
|
846
|
+
* Get current token usage
|
|
847
|
+
*/
|
|
848
|
+
getTokenUsage() {
|
|
849
|
+
return { ...this.tokenUsage };
|
|
850
|
+
}
|
|
851
|
+
/**
|
|
852
|
+
* Get current cost tracking
|
|
853
|
+
*/
|
|
854
|
+
getCostTracking() {
|
|
855
|
+
return { ...this.costTracking };
|
|
856
|
+
}
|
|
857
|
+
/**
|
|
858
|
+
* Reset usage tracking
|
|
859
|
+
*/
|
|
860
|
+
resetTracking() {
|
|
861
|
+
this.tokenUsage = { inputTokens: 0, outputTokens: 0, totalTokens: 0, requests: 0 };
|
|
862
|
+
this.costTracking = { estimatedCost: 0, currency: 'USD', byProvider: {} };
|
|
863
|
+
this.requestLog = [];
|
|
864
|
+
}
|
|
865
|
+
/**
|
|
866
|
+
* Generate a completion with retry logic
|
|
867
|
+
*/
|
|
868
|
+
async complete(request) {
|
|
869
|
+
// Pre-calculate tokens and warn if approaching limit
|
|
870
|
+
const inputTokens = this.countTokens(request.systemPrompt + request.userPrompt);
|
|
871
|
+
const maxTokens = request.maxTokens ?? this.provider.maxOutputTokens;
|
|
872
|
+
const totalExpected = inputTokens + maxTokens;
|
|
873
|
+
if (totalExpected > this.provider.maxContextTokens * 0.9) {
|
|
874
|
+
logger.warning(`Approaching context limit: ${totalExpected} tokens (max: ${this.provider.maxContextTokens})`);
|
|
875
|
+
}
|
|
876
|
+
if (totalExpected > this.provider.maxContextTokens) {
|
|
877
|
+
throw new Error(`Request exceeds context limit: ${totalExpected} > ${this.provider.maxContextTokens}`);
|
|
878
|
+
}
|
|
879
|
+
// Execute with retry logic
|
|
880
|
+
let lastError = null;
|
|
881
|
+
let delay = this.retryConfig.initialDelay;
|
|
882
|
+
for (let attempt = 0; attempt <= this.retryConfig.maxRetries; attempt++) {
|
|
883
|
+
try {
|
|
884
|
+
logger.debug(`LLM request attempt ${attempt + 1}/${this.retryConfig.maxRetries + 1}`);
|
|
885
|
+
const response = await this.executeWithTimeout(request);
|
|
886
|
+
// Update tracking
|
|
887
|
+
this.updateTracking(response);
|
|
888
|
+
// Log if enabled
|
|
889
|
+
if (this.options.enableLogging) {
|
|
890
|
+
this.logRequest(request, response);
|
|
891
|
+
}
|
|
892
|
+
// Check cost threshold
|
|
893
|
+
if (this.costTracking.estimatedCost > this.options.costWarningThreshold) {
|
|
894
|
+
logger.warning(`Cost threshold exceeded: $${this.costTracking.estimatedCost.toFixed(4)} > $${this.options.costWarningThreshold}`);
|
|
895
|
+
}
|
|
896
|
+
return response;
|
|
897
|
+
}
|
|
898
|
+
catch (error) {
|
|
899
|
+
lastError = error;
|
|
900
|
+
const errWithStatus = error;
|
|
901
|
+
// Log error
|
|
902
|
+
if (this.options.enableLogging) {
|
|
903
|
+
this.logRequest(request, undefined, lastError.message);
|
|
904
|
+
}
|
|
905
|
+
// Check if retryable
|
|
906
|
+
if (!errWithStatus.retryable || attempt === this.retryConfig.maxRetries) {
|
|
907
|
+
throw lastError;
|
|
908
|
+
}
|
|
909
|
+
// Use the provider-supplied reset time if available, otherwise exponential backoff
|
|
910
|
+
const retryAfterMs = errWithStatus.retryAfterMs;
|
|
911
|
+
const waitMs = retryAfterMs !== undefined ? retryAfterMs : delay;
|
|
912
|
+
logger.warning(`LLM request failed (attempt ${attempt + 1}), retrying in ${waitMs}ms: ${lastError.message}`);
|
|
913
|
+
await this.sleep(waitMs);
|
|
914
|
+
// Only advance the backoff delay when we didn't use a provider-supplied wait
|
|
915
|
+
if (retryAfterMs === undefined) {
|
|
916
|
+
delay = Math.min(delay * 2, this.retryConfig.maxDelay);
|
|
917
|
+
}
|
|
918
|
+
}
|
|
919
|
+
}
|
|
920
|
+
throw lastError ?? new Error('Unknown error');
|
|
921
|
+
}
|
|
922
|
+
/**
|
|
923
|
+
* Generate a completion expecting JSON response
|
|
924
|
+
*/
|
|
925
|
+
async completeJSON(request, schema) {
|
|
926
|
+
const jsonRequest = { ...request, responseFormat: 'json' };
|
|
927
|
+
// Add JSON instruction to prompt if not already present
|
|
928
|
+
if (!jsonRequest.systemPrompt.toLowerCase().includes('json')) {
|
|
929
|
+
jsonRequest.systemPrompt += '\n\nRespond with valid JSON only.';
|
|
930
|
+
}
|
|
931
|
+
const response = await this.complete(jsonRequest);
|
|
932
|
+
let content = response.content;
|
|
933
|
+
// Extract JSON from markdown code blocks if present
|
|
934
|
+
const jsonMatch = content.match(/```(?:json)?\s*([\s\S]*?)```/);
|
|
935
|
+
if (jsonMatch) {
|
|
936
|
+
content = jsonMatch[1].trim();
|
|
937
|
+
}
|
|
938
|
+
// Parse JSON
|
|
939
|
+
let parsed;
|
|
940
|
+
try {
|
|
941
|
+
parsed = JSON.parse(content);
|
|
942
|
+
}
|
|
943
|
+
catch (parseError) {
|
|
944
|
+
// Retry with correction prompt for parse errors
|
|
945
|
+
logger.warning('JSON parse failed, attempting correction');
|
|
946
|
+
const correctionRequest = {
|
|
947
|
+
systemPrompt: 'Fix the following invalid JSON and return only valid JSON. Do not include any explanation.',
|
|
948
|
+
userPrompt: `Invalid JSON:\n${content}\n\nError: ${parseError.message}\n\nReturn the corrected JSON:`,
|
|
949
|
+
temperature: 0.1,
|
|
950
|
+
responseFormat: 'json',
|
|
951
|
+
};
|
|
952
|
+
const correctionResponse = await this.complete(correctionRequest);
|
|
953
|
+
let correctedContent = correctionResponse.content;
|
|
954
|
+
// Extract from code blocks again
|
|
955
|
+
const correctedMatch = correctedContent.match(/```(?:json)?\s*([\s\S]*?)```/);
|
|
956
|
+
if (correctedMatch) {
|
|
957
|
+
correctedContent = correctedMatch[1].trim();
|
|
958
|
+
}
|
|
959
|
+
parsed = JSON.parse(correctedContent);
|
|
960
|
+
}
|
|
961
|
+
// Unwrap single-key object whose value is an array (e.g. {entities:[...]} → [...])
|
|
962
|
+
// LLM correction attempts sometimes wrap arrays in an object
|
|
963
|
+
if (parsed !== null &&
|
|
964
|
+
typeof parsed === 'object' &&
|
|
965
|
+
!Array.isArray(parsed)) {
|
|
966
|
+
const keys = Object.keys(parsed);
|
|
967
|
+
if (keys.length === 1) {
|
|
968
|
+
const val = parsed[keys[0]];
|
|
969
|
+
if (Array.isArray(val)) {
|
|
970
|
+
parsed = val;
|
|
971
|
+
}
|
|
972
|
+
}
|
|
973
|
+
}
|
|
974
|
+
// Validate against schema if provided (after successful parsing)
|
|
975
|
+
if (schema) {
|
|
976
|
+
this.validateSchema(parsed, schema);
|
|
977
|
+
}
|
|
978
|
+
return parsed;
|
|
979
|
+
}
|
|
980
|
+
/**
|
|
981
|
+
* Execute request with timeout
|
|
982
|
+
*/
|
|
983
|
+
async executeWithTimeout(request) {
|
|
984
|
+
const controller = new AbortController();
|
|
985
|
+
const timeoutId = setTimeout(() => controller.abort(), this.retryConfig.timeout);
|
|
986
|
+
try {
|
|
987
|
+
// Note: fetch doesn't use AbortController in this simple implementation
|
|
988
|
+
// In production, you'd pass the signal to the provider
|
|
989
|
+
const response = await this.provider.generateCompletion(request);
|
|
990
|
+
return response;
|
|
991
|
+
}
|
|
992
|
+
finally {
|
|
993
|
+
clearTimeout(timeoutId);
|
|
994
|
+
}
|
|
995
|
+
}
|
|
996
|
+
/**
|
|
997
|
+
* Update tracking after a successful request
|
|
998
|
+
*/
|
|
999
|
+
updateTracking(response) {
|
|
1000
|
+
this.tokenUsage.inputTokens += response.usage.inputTokens;
|
|
1001
|
+
this.tokenUsage.outputTokens += response.usage.outputTokens;
|
|
1002
|
+
this.tokenUsage.totalTokens += response.usage.totalTokens;
|
|
1003
|
+
this.tokenUsage.requests++;
|
|
1004
|
+
// Calculate cost
|
|
1005
|
+
const cost = this.calculateCost(response);
|
|
1006
|
+
this.costTracking.estimatedCost += cost;
|
|
1007
|
+
this.costTracking.byProvider[this.provider.name] = (this.costTracking.byProvider[this.provider.name] ?? 0) + cost;
|
|
1008
|
+
}
|
|
1009
|
+
/**
|
|
1010
|
+
* Calculate cost for a response
|
|
1011
|
+
*/
|
|
1012
|
+
calculateCost(response) {
|
|
1013
|
+
const modelPricing = lookupPricing(this.provider.name, response.model);
|
|
1014
|
+
const inputCost = (response.usage.inputTokens / 1_000_000) * modelPricing.input;
|
|
1015
|
+
const outputCost = (response.usage.outputTokens / 1_000_000) * modelPricing.output;
|
|
1016
|
+
return inputCost + outputCost;
|
|
1017
|
+
}
|
|
1018
|
+
/**
|
|
1019
|
+
* Log request/response
|
|
1020
|
+
*/
|
|
1021
|
+
logRequest(request, response, error) {
|
|
1022
|
+
const logEntry = {
|
|
1023
|
+
timestamp: new Date().toISOString(),
|
|
1024
|
+
request: this.redactSecrets(request),
|
|
1025
|
+
response,
|
|
1026
|
+
error,
|
|
1027
|
+
};
|
|
1028
|
+
this.requestLog.push(logEntry);
|
|
1029
|
+
}
|
|
1030
|
+
/**
|
|
1031
|
+
* Redact potential secrets from request
|
|
1032
|
+
*/
|
|
1033
|
+
redactSecrets(request) {
|
|
1034
|
+
const secretPatterns = [
|
|
1035
|
+
/(?:api[_-]?key|password|secret|token|auth)['":\s]*[=:]\s*['"]?[\w-]{20,}['"]?/gi,
|
|
1036
|
+
/['"]?[a-zA-Z0-9]{32,}['"]?/g, // Long alphanumeric strings
|
|
1037
|
+
];
|
|
1038
|
+
let systemPrompt = request.systemPrompt;
|
|
1039
|
+
let userPrompt = request.userPrompt;
|
|
1040
|
+
for (const pattern of secretPatterns) {
|
|
1041
|
+
systemPrompt = systemPrompt.replace(pattern, '[REDACTED]');
|
|
1042
|
+
userPrompt = userPrompt.replace(pattern, '[REDACTED]');
|
|
1043
|
+
}
|
|
1044
|
+
return { ...request, systemPrompt, userPrompt };
|
|
1045
|
+
}
|
|
1046
|
+
/**
|
|
1047
|
+
* Simple schema validation
|
|
1048
|
+
*/
|
|
1049
|
+
validateSchema(data, schema) {
|
|
1050
|
+
// Simple type checking - in production use a proper JSON schema validator
|
|
1051
|
+
const schemaObj = schema;
|
|
1052
|
+
if (schemaObj.type === 'object' && schemaObj.required && Array.isArray(schemaObj.required)) {
|
|
1053
|
+
const dataObj = data;
|
|
1054
|
+
for (const field of schemaObj.required) {
|
|
1055
|
+
if (!(field in dataObj)) {
|
|
1056
|
+
throw new Error(`Missing required field: ${field}`);
|
|
1057
|
+
}
|
|
1058
|
+
}
|
|
1059
|
+
}
|
|
1060
|
+
}
|
|
1061
|
+
/**
|
|
1062
|
+
* Save logs to disk
|
|
1063
|
+
*/
|
|
1064
|
+
async saveLogs() {
|
|
1065
|
+
if (this.requestLog.length === 0)
|
|
1066
|
+
return;
|
|
1067
|
+
await mkdir(this.options.logDir, { recursive: true });
|
|
1068
|
+
const filename = `llm-log-${new Date().toISOString().replace(/[:.]/g, '-')}.json`;
|
|
1069
|
+
const filepath = join(this.options.logDir, filename);
|
|
1070
|
+
await writeFile(filepath, JSON.stringify({
|
|
1071
|
+
summary: {
|
|
1072
|
+
tokenUsage: this.tokenUsage,
|
|
1073
|
+
costTracking: this.costTracking,
|
|
1074
|
+
},
|
|
1075
|
+
requests: this.requestLog,
|
|
1076
|
+
}, null, 2));
|
|
1077
|
+
logger.debug(`Saved LLM logs to ${filepath}`);
|
|
1078
|
+
}
|
|
1079
|
+
/**
|
|
1080
|
+
* Sleep helper
|
|
1081
|
+
*/
|
|
1082
|
+
sleep(ms) {
|
|
1083
|
+
return new Promise(resolve => setTimeout(resolve, ms));
|
|
1084
|
+
}
|
|
1085
|
+
}
|
|
1086
|
+
// ============================================================================
|
|
1087
|
+
// FACTORY FUNCTIONS
|
|
1088
|
+
// ============================================================================
|
|
1089
|
+
/**
|
|
1090
|
+
* Create an LLM service with the specified provider
|
|
1091
|
+
*/
|
|
1092
|
+
export function createLLMService(options = {}) {
|
|
1093
|
+
const providerName = options.provider ?? 'anthropic';
|
|
1094
|
+
const sslVerify = options.sslVerify ?? true;
|
|
1095
|
+
let provider;
|
|
1096
|
+
if (providerName === 'anthropic') {
|
|
1097
|
+
const apiKey = process.env.ANTHROPIC_API_KEY;
|
|
1098
|
+
if (!apiKey) {
|
|
1099
|
+
throw new Error('ANTHROPIC_API_KEY environment variable is not set');
|
|
1100
|
+
}
|
|
1101
|
+
const apiBase = options.apiBase ?? process.env.ANTHROPIC_API_BASE ?? undefined;
|
|
1102
|
+
provider = new AnthropicProvider(apiKey, options.model ?? 'claude-3-5-sonnet-20241022', apiBase, sslVerify);
|
|
1103
|
+
}
|
|
1104
|
+
else if (providerName === 'openai') {
|
|
1105
|
+
const apiKey = process.env.OPENAI_API_KEY;
|
|
1106
|
+
if (!apiKey) {
|
|
1107
|
+
throw new Error('OPENAI_API_KEY environment variable is not set');
|
|
1108
|
+
}
|
|
1109
|
+
const apiBase = options.apiBase ?? process.env.OPENAI_API_BASE ?? undefined;
|
|
1110
|
+
provider = new OpenAIProvider(apiKey, options.model ?? 'gpt-4o', apiBase, sslVerify);
|
|
1111
|
+
}
|
|
1112
|
+
else if (providerName === 'openai-compat') {
|
|
1113
|
+
const apiKey = process.env.OPENAI_COMPAT_API_KEY;
|
|
1114
|
+
const baseUrl = options.openaiCompatBaseUrl ?? options.apiBase ?? process.env.OPENAI_COMPAT_BASE_URL;
|
|
1115
|
+
if (!apiKey) {
|
|
1116
|
+
throw new Error('OPENAI_COMPAT_API_KEY environment variable is not set');
|
|
1117
|
+
}
|
|
1118
|
+
if (!baseUrl) {
|
|
1119
|
+
throw new Error('openaiCompatBaseUrl must be set in config or OPENAI_COMPAT_BASE_URL env var (e.g. https://api.mistral.ai/v1)');
|
|
1120
|
+
}
|
|
1121
|
+
provider = new OpenAICompatibleProvider(apiKey, baseUrl, options.model ?? 'mistral-large-latest');
|
|
1122
|
+
}
|
|
1123
|
+
else if (providerName === 'gemini') {
|
|
1124
|
+
const apiKey = process.env.GEMINI_API_KEY;
|
|
1125
|
+
if (!apiKey) {
|
|
1126
|
+
throw new Error('GEMINI_API_KEY environment variable is not set');
|
|
1127
|
+
}
|
|
1128
|
+
provider = new GeminiProvider(apiKey, options.model ?? 'gemini-2.0-flash');
|
|
1129
|
+
}
|
|
1130
|
+
else if (providerName === 'claude-code') {
|
|
1131
|
+
provider = new ClaudeCodeProvider(options.model);
|
|
1132
|
+
}
|
|
1133
|
+
else if (providerName === 'mistral-vibe') {
|
|
1134
|
+
provider = new MistralVibeProvider(options.model);
|
|
1135
|
+
}
|
|
1136
|
+
else if (providerName === 'gemini-cli') {
|
|
1137
|
+
provider = new GeminiCLIProvider(options.model);
|
|
1138
|
+
}
|
|
1139
|
+
else {
|
|
1140
|
+
throw new Error(`Unknown provider: ${providerName}. Supported: anthropic, openai, openai-compat, gemini, gemini-cli, claude-code, mistral-vibe`);
|
|
1141
|
+
}
|
|
1142
|
+
if (!sslVerify) {
|
|
1143
|
+
logger.warning('SSL verification is disabled. Use only for trusted internal servers.');
|
|
1144
|
+
}
|
|
1145
|
+
return new LLMService(provider, options);
|
|
1146
|
+
}
|
|
1147
|
+
/**
|
|
1148
|
+
* Create an LLM service with a mock provider (for testing)
|
|
1149
|
+
*/
|
|
1150
|
+
export function createMockLLMService(options = {}) {
|
|
1151
|
+
const provider = new MockLLMProvider();
|
|
1152
|
+
const service = new LLMService(provider, options);
|
|
1153
|
+
return { service, provider };
|
|
1154
|
+
}
|
|
1155
|
+
//# sourceMappingURL=llm-service.js.map
|