@agile-vibe-coding/avc 0.1.1 → 0.2.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/cli/agent-loader.js +21 -0
- package/cli/agents/agent-selector.md +129 -0
- package/cli/agents/architecture-recommender.md +418 -0
- package/cli/agents/database-deep-dive.md +470 -0
- package/cli/agents/database-recommender.md +634 -0
- package/cli/agents/doc-distributor.md +176 -0
- package/cli/agents/documentation-updater.md +203 -0
- package/cli/agents/epic-story-decomposer.md +280 -0
- package/cli/agents/feature-context-generator.md +91 -0
- package/cli/agents/gap-checker-epic.md +52 -0
- package/cli/agents/impact-checker-story.md +51 -0
- package/cli/agents/migration-guide-generator.md +305 -0
- package/cli/agents/mission-scope-generator.md +79 -0
- package/cli/agents/mission-scope-validator.md +112 -0
- package/cli/agents/project-context-extractor.md +107 -0
- package/cli/agents/project-documentation-creator.json +226 -0
- package/cli/agents/project-documentation-creator.md +595 -0
- package/cli/agents/question-prefiller.md +269 -0
- package/cli/agents/refiner-epic.md +39 -0
- package/cli/agents/refiner-story.md +42 -0
- package/cli/agents/solver-epic-api.json +15 -0
- package/cli/agents/solver-epic-api.md +39 -0
- package/cli/agents/solver-epic-backend.json +15 -0
- package/cli/agents/solver-epic-backend.md +39 -0
- package/cli/agents/solver-epic-cloud.json +15 -0
- package/cli/agents/solver-epic-cloud.md +39 -0
- package/cli/agents/solver-epic-data.json +15 -0
- package/cli/agents/solver-epic-data.md +39 -0
- package/cli/agents/solver-epic-database.json +15 -0
- package/cli/agents/solver-epic-database.md +39 -0
- package/cli/agents/solver-epic-developer.json +15 -0
- package/cli/agents/solver-epic-developer.md +39 -0
- package/cli/agents/solver-epic-devops.json +15 -0
- package/cli/agents/solver-epic-devops.md +39 -0
- package/cli/agents/solver-epic-frontend.json +15 -0
- package/cli/agents/solver-epic-frontend.md +39 -0
- package/cli/agents/solver-epic-mobile.json +15 -0
- package/cli/agents/solver-epic-mobile.md +39 -0
- package/cli/agents/solver-epic-qa.json +15 -0
- package/cli/agents/solver-epic-qa.md +39 -0
- package/cli/agents/solver-epic-security.json +15 -0
- package/cli/agents/solver-epic-security.md +39 -0
- package/cli/agents/solver-epic-solution-architect.json +15 -0
- package/cli/agents/solver-epic-solution-architect.md +39 -0
- package/cli/agents/solver-epic-test-architect.json +15 -0
- package/cli/agents/solver-epic-test-architect.md +39 -0
- package/cli/agents/solver-epic-ui.json +15 -0
- package/cli/agents/solver-epic-ui.md +39 -0
- package/cli/agents/solver-epic-ux.json +15 -0
- package/cli/agents/solver-epic-ux.md +39 -0
- package/cli/agents/solver-story-api.json +15 -0
- package/cli/agents/solver-story-api.md +39 -0
- package/cli/agents/solver-story-backend.json +15 -0
- package/cli/agents/solver-story-backend.md +39 -0
- package/cli/agents/solver-story-cloud.json +15 -0
- package/cli/agents/solver-story-cloud.md +39 -0
- package/cli/agents/solver-story-data.json +15 -0
- package/cli/agents/solver-story-data.md +39 -0
- package/cli/agents/solver-story-database.json +15 -0
- package/cli/agents/solver-story-database.md +39 -0
- package/cli/agents/solver-story-developer.json +15 -0
- package/cli/agents/solver-story-developer.md +39 -0
- package/cli/agents/solver-story-devops.json +15 -0
- package/cli/agents/solver-story-devops.md +39 -0
- package/cli/agents/solver-story-frontend.json +15 -0
- package/cli/agents/solver-story-frontend.md +39 -0
- package/cli/agents/solver-story-mobile.json +15 -0
- package/cli/agents/solver-story-mobile.md +39 -0
- package/cli/agents/solver-story-qa.json +15 -0
- package/cli/agents/solver-story-qa.md +39 -0
- package/cli/agents/solver-story-security.json +15 -0
- package/cli/agents/solver-story-security.md +39 -0
- package/cli/agents/solver-story-solution-architect.json +15 -0
- package/cli/agents/solver-story-solution-architect.md +39 -0
- package/cli/agents/solver-story-test-architect.json +15 -0
- package/cli/agents/solver-story-test-architect.md +39 -0
- package/cli/agents/solver-story-ui.json +15 -0
- package/cli/agents/solver-story-ui.md +39 -0
- package/cli/agents/solver-story-ux.json +15 -0
- package/cli/agents/solver-story-ux.md +39 -0
- package/cli/agents/story-doc-enricher.md +133 -0
- package/cli/agents/suggestion-business-analyst.md +88 -0
- package/cli/agents/suggestion-deployment-architect.md +263 -0
- package/cli/agents/suggestion-product-manager.md +129 -0
- package/cli/agents/suggestion-security-specialist.md +156 -0
- package/cli/agents/suggestion-technical-architect.md +269 -0
- package/cli/agents/suggestion-ux-researcher.md +93 -0
- package/cli/agents/task-subtask-decomposer.md +188 -0
- package/cli/agents/validator-documentation.json +152 -0
- package/cli/agents/validator-documentation.md +453 -0
- package/cli/agents/validator-epic-api.json +93 -0
- package/cli/agents/validator-epic-api.md +137 -0
- package/cli/agents/validator-epic-backend.json +93 -0
- package/cli/agents/validator-epic-backend.md +130 -0
- package/cli/agents/validator-epic-cloud.json +93 -0
- package/cli/agents/validator-epic-cloud.md +137 -0
- package/cli/agents/validator-epic-data.json +93 -0
- package/cli/agents/validator-epic-data.md +130 -0
- package/cli/agents/validator-epic-database.json +93 -0
- package/cli/agents/validator-epic-database.md +137 -0
- package/cli/agents/validator-epic-developer.json +74 -0
- package/cli/agents/validator-epic-developer.md +153 -0
- package/cli/agents/validator-epic-devops.json +74 -0
- package/cli/agents/validator-epic-devops.md +153 -0
- package/cli/agents/validator-epic-frontend.json +74 -0
- package/cli/agents/validator-epic-frontend.md +153 -0
- package/cli/agents/validator-epic-mobile.json +93 -0
- package/cli/agents/validator-epic-mobile.md +130 -0
- package/cli/agents/validator-epic-qa.json +93 -0
- package/cli/agents/validator-epic-qa.md +130 -0
- package/cli/agents/validator-epic-security.json +74 -0
- package/cli/agents/validator-epic-security.md +154 -0
- package/cli/agents/validator-epic-solution-architect.json +74 -0
- package/cli/agents/validator-epic-solution-architect.md +156 -0
- package/cli/agents/validator-epic-test-architect.json +93 -0
- package/cli/agents/validator-epic-test-architect.md +130 -0
- package/cli/agents/validator-epic-ui.json +93 -0
- package/cli/agents/validator-epic-ui.md +130 -0
- package/cli/agents/validator-epic-ux.json +93 -0
- package/cli/agents/validator-epic-ux.md +130 -0
- package/cli/agents/validator-selector.md +211 -0
- package/cli/agents/validator-story-api.json +104 -0
- package/cli/agents/validator-story-api.md +152 -0
- package/cli/agents/validator-story-backend.json +104 -0
- package/cli/agents/validator-story-backend.md +152 -0
- package/cli/agents/validator-story-cloud.json +104 -0
- package/cli/agents/validator-story-cloud.md +152 -0
- package/cli/agents/validator-story-data.json +104 -0
- package/cli/agents/validator-story-data.md +152 -0
- package/cli/agents/validator-story-database.json +104 -0
- package/cli/agents/validator-story-database.md +152 -0
- package/cli/agents/validator-story-developer.json +104 -0
- package/cli/agents/validator-story-developer.md +152 -0
- package/cli/agents/validator-story-devops.json +104 -0
- package/cli/agents/validator-story-devops.md +152 -0
- package/cli/agents/validator-story-frontend.json +104 -0
- package/cli/agents/validator-story-frontend.md +152 -0
- package/cli/agents/validator-story-mobile.json +104 -0
- package/cli/agents/validator-story-mobile.md +152 -0
- package/cli/agents/validator-story-qa.json +104 -0
- package/cli/agents/validator-story-qa.md +152 -0
- package/cli/agents/validator-story-security.json +104 -0
- package/cli/agents/validator-story-security.md +152 -0
- package/cli/agents/validator-story-solution-architect.json +104 -0
- package/cli/agents/validator-story-solution-architect.md +152 -0
- package/cli/agents/validator-story-test-architect.json +104 -0
- package/cli/agents/validator-story-test-architect.md +152 -0
- package/cli/agents/validator-story-ui.json +104 -0
- package/cli/agents/validator-story-ui.md +152 -0
- package/cli/agents/validator-story-ux.json +104 -0
- package/cli/agents/validator-story-ux.md +152 -0
- package/cli/ansi-colors.js +21 -0
- package/cli/build-docs.js +29 -8
- package/cli/ceremony-history.js +369 -0
- package/cli/command-logger.js +49 -12
- package/cli/components/static-output.js +63 -0
- package/cli/console-output-manager.js +94 -0
- package/cli/docs-sync.js +306 -0
- package/cli/epic-story-validator.js +1174 -0
- package/cli/evaluation-prompts.js +1008 -0
- package/cli/execution-context.js +195 -0
- package/cli/generate-summary-table.js +340 -0
- package/cli/index.js +0 -0
- package/cli/init-model-config.js +697 -0
- package/cli/init.js +1311 -274
- package/cli/kanban-server-manager.js +228 -0
- package/cli/llm-claude.js +83 -1
- package/cli/llm-gemini.js +85 -0
- package/cli/llm-mock.js +233 -0
- package/cli/llm-openai.js +233 -0
- package/cli/llm-provider.js +240 -3
- package/cli/llm-token-limits.js +102 -0
- package/cli/llm-verifier.js +454 -0
- package/cli/message-constants.js +58 -0
- package/cli/message-manager.js +334 -0
- package/cli/message-types.js +96 -0
- package/cli/messaging-api.js +297 -0
- package/cli/model-pricing.js +169 -0
- package/cli/model-query-engine.js +468 -0
- package/cli/model-recommendation-analyzer.js +495 -0
- package/cli/model-selector.js +269 -0
- package/cli/output-buffer.js +107 -0
- package/cli/process-manager.js +73 -2
- package/cli/repl-ink.js +4988 -1217
- package/cli/repl-old.js +4 -4
- package/cli/seed-processor.js +792 -0
- package/cli/sprint-planning-processor.js +1813 -0
- package/cli/template-processor.js +2102 -105
- package/cli/templates/project.md +25 -8
- package/cli/templates/vitepress-config.mts.template +5 -4
- package/cli/token-tracker.js +520 -0
- package/cli/tools/generate-story-validators.js +317 -0
- package/cli/tools/generate-validators.js +669 -0
- package/cli/update-checker.js +19 -17
- package/cli/update-notifier.js +4 -4
- package/cli/validation-router.js +605 -0
- package/cli/verification-tracker.js +563 -0
- package/kanban/README.md +386 -0
- package/kanban/client/README.md +205 -0
- package/kanban/client/components.json +20 -0
- package/kanban/client/dist/assets/index-CiD8PS2e.js +306 -0
- package/kanban/client/dist/assets/index-nLh0m82Q.css +1 -0
- package/kanban/client/dist/index.html +16 -0
- package/kanban/client/dist/vite.svg +1 -0
- package/kanban/client/index.html +15 -0
- package/kanban/client/package-lock.json +9442 -0
- package/kanban/client/package.json +44 -0
- package/kanban/client/postcss.config.js +6 -0
- package/kanban/client/public/vite.svg +1 -0
- package/kanban/client/src/App.jsx +622 -0
- package/kanban/client/src/components/ProjectFileEditorPopup.jsx +117 -0
- package/kanban/client/src/components/ceremony/AskArchPopup.jsx +416 -0
- package/kanban/client/src/components/ceremony/AskModelPopup.jsx +616 -0
- package/kanban/client/src/components/ceremony/CeremonyWorkflowModal.jsx +946 -0
- package/kanban/client/src/components/ceremony/EpicStorySelectionModal.jsx +254 -0
- package/kanban/client/src/components/ceremony/SponsorCallModal.jsx +619 -0
- package/kanban/client/src/components/ceremony/SprintPlanningModal.jsx +704 -0
- package/kanban/client/src/components/ceremony/steps/ArchitectureStep.jsx +150 -0
- package/kanban/client/src/components/ceremony/steps/CompleteStep.jsx +154 -0
- package/kanban/client/src/components/ceremony/steps/DatabaseStep.jsx +202 -0
- package/kanban/client/src/components/ceremony/steps/DeploymentStep.jsx +123 -0
- package/kanban/client/src/components/ceremony/steps/MissionStep.jsx +106 -0
- package/kanban/client/src/components/ceremony/steps/ReviewAnswersStep.jsx +125 -0
- package/kanban/client/src/components/ceremony/steps/RunningStep.jsx +228 -0
- package/kanban/client/src/components/kanban/CardDetailModal.jsx +559 -0
- package/kanban/client/src/components/kanban/EpicSection.jsx +146 -0
- package/kanban/client/src/components/kanban/FilterToolbar.jsx +222 -0
- package/kanban/client/src/components/kanban/GroupingSelector.jsx +57 -0
- package/kanban/client/src/components/kanban/KanbanBoard.jsx +211 -0
- package/kanban/client/src/components/kanban/KanbanCard.jsx +138 -0
- package/kanban/client/src/components/kanban/KanbanColumn.jsx +90 -0
- package/kanban/client/src/components/kanban/RefineWorkItemPopup.jsx +789 -0
- package/kanban/client/src/components/layout/LoadingScreen.jsx +82 -0
- package/kanban/client/src/components/process/ProcessMonitorBar.jsx +80 -0
- package/kanban/client/src/components/settings/AgentEditorPopup.jsx +171 -0
- package/kanban/client/src/components/settings/AgentsTab.jsx +353 -0
- package/kanban/client/src/components/settings/ApiKeysTab.jsx +113 -0
- package/kanban/client/src/components/settings/CeremonyModelsTab.jsx +98 -0
- package/kanban/client/src/components/settings/CostThresholdsTab.jsx +94 -0
- package/kanban/client/src/components/settings/ModelPricingTab.jsx +204 -0
- package/kanban/client/src/components/settings/ServersTab.jsx +121 -0
- package/kanban/client/src/components/settings/SettingsModal.jsx +84 -0
- package/kanban/client/src/components/stats/CostModal.jsx +353 -0
- package/kanban/client/src/components/ui/badge.jsx +27 -0
- package/kanban/client/src/components/ui/dialog.jsx +121 -0
- package/kanban/client/src/components/ui/tabs.jsx +85 -0
- package/kanban/client/src/hooks/__tests__/useGrouping.test.js +232 -0
- package/kanban/client/src/hooks/useGrouping.js +118 -0
- package/kanban/client/src/hooks/useWebSocket.js +120 -0
- package/kanban/client/src/lib/__tests__/api.test.js +196 -0
- package/kanban/client/src/lib/__tests__/status-grouping.test.js +94 -0
- package/kanban/client/src/lib/api.js +401 -0
- package/kanban/client/src/lib/status-grouping.js +144 -0
- package/kanban/client/src/lib/utils.js +11 -0
- package/kanban/client/src/main.jsx +10 -0
- package/kanban/client/src/store/__tests__/kanbanStore.test.js +164 -0
- package/kanban/client/src/store/ceremonyStore.js +172 -0
- package/kanban/client/src/store/filterStore.js +201 -0
- package/kanban/client/src/store/kanbanStore.js +115 -0
- package/kanban/client/src/store/processStore.js +65 -0
- package/kanban/client/src/store/sprintPlanningStore.js +33 -0
- package/kanban/client/src/styles/globals.css +59 -0
- package/kanban/client/tailwind.config.js +77 -0
- package/kanban/client/vite.config.js +28 -0
- package/kanban/client/vitest.config.js +28 -0
- package/kanban/dev-start.sh +47 -0
- package/kanban/package.json +12 -0
- package/kanban/server/index.js +516 -0
- package/kanban/server/routes/ceremony.js +305 -0
- package/kanban/server/routes/costs.js +157 -0
- package/kanban/server/routes/processes.js +50 -0
- package/kanban/server/routes/settings.js +303 -0
- package/kanban/server/routes/websocket.js +276 -0
- package/kanban/server/routes/work-items.js +347 -0
- package/kanban/server/services/CeremonyService.js +1190 -0
- package/kanban/server/services/FileSystemScanner.js +95 -0
- package/kanban/server/services/FileWatcher.js +144 -0
- package/kanban/server/services/HierarchyBuilder.js +196 -0
- package/kanban/server/services/ProcessRegistry.js +122 -0
- package/kanban/server/services/WorkItemReader.js +123 -0
- package/kanban/server/services/WorkItemRefineService.js +510 -0
- package/kanban/server/start.js +49 -0
- package/kanban/server/utils/kanban-logger.js +132 -0
- package/kanban/server/utils/markdown.js +91 -0
- package/kanban/server/utils/status-grouping.js +107 -0
- package/kanban/server/workers/sponsor-call-worker.js +84 -0
- package/kanban/server/workers/sprint-planning-worker.js +130 -0
- package/package.json +18 -5
- package/cli/agents/documentation.md +0 -302
|
@@ -0,0 +1,233 @@
|
|
|
1
|
+
import OpenAI from 'openai';
|
|
2
|
+
import { jsonrepair } from 'jsonrepair';
|
|
3
|
+
import { LLMProvider } from './llm-provider.js';
|
|
4
|
+
import { getMaxTokensForModel } from './llm-token-limits.js';
|
|
5
|
+
|
|
6
|
+
export class OpenAIProvider extends LLMProvider {
|
|
7
|
+
constructor(model = 'gpt-5.2-chat-latest', reasoningEffort = 'medium') {
|
|
8
|
+
super('openai', model);
|
|
9
|
+
this.reasoningEffort = reasoningEffort;
|
|
10
|
+
}
|
|
11
|
+
|
|
12
|
+
_createClient() {
|
|
13
|
+
const apiKey = process.env.OPENAI_API_KEY;
|
|
14
|
+
if (!apiKey) throw new Error('OPENAI_API_KEY not set. Add it to your .env file.');
|
|
15
|
+
return new OpenAI({ apiKey });
|
|
16
|
+
}
|
|
17
|
+
|
|
18
|
+
/**
|
|
19
|
+
* Determine if model uses Responses API instead of Chat Completions API
|
|
20
|
+
* Models that use Responses API: gpt-5.2-pro, gpt-5.2-codex
|
|
21
|
+
*/
|
|
22
|
+
_usesResponsesAPI() {
|
|
23
|
+
const responsesAPIModels = ['gpt-5.2-pro', 'gpt-5.2-codex'];
|
|
24
|
+
return responsesAPIModels.includes(this.model);
|
|
25
|
+
}
|
|
26
|
+
|
|
27
|
+
/**
|
|
28
|
+
* Call using Chat Completions API (standard models)
|
|
29
|
+
*/
|
|
30
|
+
async _callChatCompletions(prompt, maxTokens, systemInstructions) {
|
|
31
|
+
const messages = [];
|
|
32
|
+
|
|
33
|
+
// OpenAI uses message array - system instructions go first as system role
|
|
34
|
+
if (systemInstructions) {
|
|
35
|
+
messages.push({ role: 'system', content: systemInstructions });
|
|
36
|
+
}
|
|
37
|
+
|
|
38
|
+
messages.push({ role: 'user', content: prompt });
|
|
39
|
+
|
|
40
|
+
const params = {
|
|
41
|
+
model: this.model,
|
|
42
|
+
messages
|
|
43
|
+
};
|
|
44
|
+
|
|
45
|
+
// GPT-5+ models use max_completion_tokens, older models use max_tokens
|
|
46
|
+
if (this.model.startsWith('gpt-5') || this.model.startsWith('o1') || this.model.startsWith('o3')) {
|
|
47
|
+
params.max_completion_tokens = maxTokens;
|
|
48
|
+
} else {
|
|
49
|
+
params.max_tokens = maxTokens;
|
|
50
|
+
}
|
|
51
|
+
|
|
52
|
+
const response = await this._client.chat.completions.create(params);
|
|
53
|
+
|
|
54
|
+
this._trackTokens(response.usage);
|
|
55
|
+
return response.choices[0].message.content;
|
|
56
|
+
}
|
|
57
|
+
|
|
58
|
+
/**
|
|
59
|
+
* Call using Responses API (pro/codex models)
|
|
60
|
+
*/
|
|
61
|
+
async _callResponsesAPI(prompt, systemInstructions) {
|
|
62
|
+
// Combine system instructions with prompt
|
|
63
|
+
const fullInput = systemInstructions
|
|
64
|
+
? `${systemInstructions}\n\n${prompt}`
|
|
65
|
+
: prompt;
|
|
66
|
+
|
|
67
|
+
const params = {
|
|
68
|
+
model: this.model,
|
|
69
|
+
input: fullInput
|
|
70
|
+
};
|
|
71
|
+
|
|
72
|
+
// Add reasoning effort for models that support it
|
|
73
|
+
if (this.model === 'gpt-5.2-codex' || this.model === 'gpt-5.2-pro') {
|
|
74
|
+
params.reasoning = { effort: this.reasoningEffort };
|
|
75
|
+
}
|
|
76
|
+
|
|
77
|
+
const response = await this._withRetry(
|
|
78
|
+
() => this._client.responses.create(params),
|
|
79
|
+
'Responses API call'
|
|
80
|
+
);
|
|
81
|
+
|
|
82
|
+
// Track tokens if usage data is available
|
|
83
|
+
if (response.usage) {
|
|
84
|
+
this._trackTokens(response.usage);
|
|
85
|
+
}
|
|
86
|
+
|
|
87
|
+
return response.output_text;
|
|
88
|
+
}
|
|
89
|
+
|
|
90
|
+
async _callProvider(prompt, maxTokens, systemInstructions) {
|
|
91
|
+
if (this._usesResponsesAPI()) {
|
|
92
|
+
return await this._callResponsesAPI(prompt, systemInstructions);
|
|
93
|
+
} else {
|
|
94
|
+
return await this._callChatCompletions(prompt, maxTokens, systemInstructions);
|
|
95
|
+
}
|
|
96
|
+
}
|
|
97
|
+
|
|
98
|
+
async generateJSON(prompt, agentInstructions = null) {
|
|
99
|
+
if (!this._client) {
|
|
100
|
+
this._client = this._createClient();
|
|
101
|
+
}
|
|
102
|
+
|
|
103
|
+
const fullPrompt = agentInstructions ? `${agentInstructions}\n\n${prompt}` : prompt;
|
|
104
|
+
|
|
105
|
+
if (this._usesResponsesAPI()) {
|
|
106
|
+
// Responses API: Use system instructions to enforce JSON
|
|
107
|
+
const systemInstructions = 'You are a helpful assistant that always returns valid JSON. Your response must be a valid JSON object or array, nothing else.';
|
|
108
|
+
const response = await this._callResponsesAPI(fullPrompt, systemInstructions);
|
|
109
|
+
|
|
110
|
+
// Parse and return JSON
|
|
111
|
+
let jsonStr = response.trim();
|
|
112
|
+
if (jsonStr.startsWith('```')) {
|
|
113
|
+
jsonStr = jsonStr.replace(/^```(?:json)?\s*\n?/, '');
|
|
114
|
+
jsonStr = jsonStr.replace(/\n?\s*```\s*$/, '');
|
|
115
|
+
jsonStr = jsonStr.trim();
|
|
116
|
+
}
|
|
117
|
+
|
|
118
|
+
try {
|
|
119
|
+
return JSON.parse(jsonStr);
|
|
120
|
+
} catch (firstError) {
|
|
121
|
+
if (jsonStr.startsWith('{') || jsonStr.startsWith('[')) {
|
|
122
|
+
try {
|
|
123
|
+
return JSON.parse(jsonrepair(jsonStr));
|
|
124
|
+
} catch { /* fall through to throw */ }
|
|
125
|
+
}
|
|
126
|
+
throw new Error(`Failed to parse JSON response: ${firstError.message}\n\nResponse was:\n${response}`);
|
|
127
|
+
}
|
|
128
|
+
} else {
|
|
129
|
+
// Chat Completions API: Use native JSON mode
|
|
130
|
+
const messages = [
|
|
131
|
+
{
|
|
132
|
+
role: 'system',
|
|
133
|
+
content: 'You are a helpful assistant that always returns valid JSON. Your response must be a valid JSON object or array, nothing else.'
|
|
134
|
+
},
|
|
135
|
+
{
|
|
136
|
+
role: 'user',
|
|
137
|
+
content: fullPrompt
|
|
138
|
+
}
|
|
139
|
+
];
|
|
140
|
+
|
|
141
|
+
const params = {
|
|
142
|
+
model: this.model,
|
|
143
|
+
messages
|
|
144
|
+
};
|
|
145
|
+
|
|
146
|
+
// Use model-specific maximum tokens
|
|
147
|
+
const maxTokens = getMaxTokensForModel(this.model);
|
|
148
|
+
|
|
149
|
+
// GPT-5+ models use max_completion_tokens, older models use max_tokens
|
|
150
|
+
if (this.model.startsWith('gpt-5') || this.model.startsWith('o1') || this.model.startsWith('o3')) {
|
|
151
|
+
params.max_completion_tokens = maxTokens;
|
|
152
|
+
} else {
|
|
153
|
+
params.max_tokens = maxTokens;
|
|
154
|
+
}
|
|
155
|
+
|
|
156
|
+
// Enable JSON mode if model supports it (GPT-4+)
|
|
157
|
+
if (this.model.startsWith('gpt-4') || this.model.startsWith('gpt-5') || this.model.startsWith('o')) {
|
|
158
|
+
params.response_format = { type: 'json_object' };
|
|
159
|
+
}
|
|
160
|
+
|
|
161
|
+
const response = await this._withRetry(
|
|
162
|
+
() => this._client.chat.completions.create(params),
|
|
163
|
+
'JSON generation (Chat Completions)'
|
|
164
|
+
);
|
|
165
|
+
|
|
166
|
+
this._trackTokens(response.usage);
|
|
167
|
+
const content = response.choices[0].message.content;
|
|
168
|
+
|
|
169
|
+
// Strip markdown code fences if present (defense-in-depth)
|
|
170
|
+
let jsonStr = content.trim();
|
|
171
|
+
if (jsonStr.startsWith('```')) {
|
|
172
|
+
jsonStr = jsonStr.replace(/^```(?:json)?\s*\n?/, '');
|
|
173
|
+
jsonStr = jsonStr.replace(/\n?\s*```\s*$/, '');
|
|
174
|
+
jsonStr = jsonStr.trim();
|
|
175
|
+
}
|
|
176
|
+
|
|
177
|
+
try {
|
|
178
|
+
return JSON.parse(jsonStr);
|
|
179
|
+
} catch (firstError) {
|
|
180
|
+
if (jsonStr.startsWith('{') || jsonStr.startsWith('[')) {
|
|
181
|
+
try {
|
|
182
|
+
return JSON.parse(jsonrepair(jsonStr));
|
|
183
|
+
} catch { /* fall through to throw */ }
|
|
184
|
+
}
|
|
185
|
+
throw new Error(`Failed to parse JSON response: ${firstError.message}\n\nResponse was:\n${content}`);
|
|
186
|
+
}
|
|
187
|
+
}
|
|
188
|
+
}
|
|
189
|
+
|
|
190
|
+
async generateText(prompt, agentInstructions = null) {
|
|
191
|
+
if (!this._client) {
|
|
192
|
+
this._client = this._createClient();
|
|
193
|
+
}
|
|
194
|
+
|
|
195
|
+
const fullPrompt = agentInstructions ? `${agentInstructions}\n\n${prompt}` : prompt;
|
|
196
|
+
|
|
197
|
+
if (this._usesResponsesAPI()) {
|
|
198
|
+
// Responses API
|
|
199
|
+
return await this._callResponsesAPI(fullPrompt, null);
|
|
200
|
+
} else {
|
|
201
|
+
// Chat Completions API
|
|
202
|
+
const messages = [
|
|
203
|
+
{
|
|
204
|
+
role: 'user',
|
|
205
|
+
content: fullPrompt
|
|
206
|
+
}
|
|
207
|
+
];
|
|
208
|
+
|
|
209
|
+
const params = {
|
|
210
|
+
model: this.model,
|
|
211
|
+
messages
|
|
212
|
+
};
|
|
213
|
+
|
|
214
|
+
// Use model-specific maximum tokens
|
|
215
|
+
const maxTokens = getMaxTokensForModel(this.model);
|
|
216
|
+
|
|
217
|
+
// GPT-5+ models use max_completion_tokens, older models use max_tokens
|
|
218
|
+
if (this.model.startsWith('gpt-5') || this.model.startsWith('o1') || this.model.startsWith('o3')) {
|
|
219
|
+
params.max_completion_tokens = maxTokens;
|
|
220
|
+
} else {
|
|
221
|
+
params.max_tokens = maxTokens;
|
|
222
|
+
}
|
|
223
|
+
|
|
224
|
+
const response = await this._withRetry(
|
|
225
|
+
() => this._client.chat.completions.create(params),
|
|
226
|
+
'Text generation (Chat Completions)'
|
|
227
|
+
);
|
|
228
|
+
|
|
229
|
+
this._trackTokens(response.usage);
|
|
230
|
+
return response.choices[0].message.content;
|
|
231
|
+
}
|
|
232
|
+
}
|
|
233
|
+
}
|
package/cli/llm-provider.js
CHANGED
|
@@ -1,12 +1,237 @@
|
|
|
1
1
|
export class LLMProvider {
|
|
2
|
-
constructor(providerName, model) {
|
|
2
|
+
constructor(providerName, model, retryConfig = {}) {
|
|
3
3
|
this.providerName = providerName;
|
|
4
4
|
this.model = model;
|
|
5
5
|
this._client = null;
|
|
6
|
+
this.tokenUsage = {
|
|
7
|
+
inputTokens: 0,
|
|
8
|
+
outputTokens: 0,
|
|
9
|
+
totalCalls: 0
|
|
10
|
+
};
|
|
11
|
+
|
|
12
|
+
// Per-call token callbacks (fired synchronously after each _trackTokens call)
|
|
13
|
+
this._callCallbacks = [];
|
|
14
|
+
|
|
15
|
+
// Retry configuration
|
|
16
|
+
this.retryConfig = {
|
|
17
|
+
maxRetries: retryConfig.maxRetries || 5,
|
|
18
|
+
initialDelay: retryConfig.initialDelay || 1000, // 1 second
|
|
19
|
+
maxDelay: retryConfig.maxDelay || 32000, // 32 seconds
|
|
20
|
+
backoffMultiplier: retryConfig.backoffMultiplier || 2
|
|
21
|
+
};
|
|
22
|
+
}
|
|
23
|
+
|
|
24
|
+
/**
|
|
25
|
+
* Check if error is retryable (high demand, rate limit, temporary failures)
|
|
26
|
+
* Provider-specific error codes:
|
|
27
|
+
* - OpenAI: 429 (rate limit), 503 (overloaded)
|
|
28
|
+
* - Claude: 429 (rate_limit_error), 529 (overloaded_error)
|
|
29
|
+
* - Gemini: 429 (RESOURCE_EXHAUSTED), 503 (UNAVAILABLE/overloaded)
|
|
30
|
+
*
|
|
31
|
+
* @param {Error} error - The error to check
|
|
32
|
+
* @returns {boolean} True if error is retryable
|
|
33
|
+
*/
|
|
34
|
+
_isRetryableError(error) {
|
|
35
|
+
const errorMessage = error.message?.toLowerCase() || '';
|
|
36
|
+
const errorCode = error.status || error.code;
|
|
37
|
+
const errorType = error.type || error.error?.type || '';
|
|
38
|
+
|
|
39
|
+
// Check for provider-specific error codes
|
|
40
|
+
const retryableStatusCodes = [
|
|
41
|
+
429, // Rate limit (all providers)
|
|
42
|
+
503, // Service unavailable (OpenAI, Gemini)
|
|
43
|
+
529 // Overloaded (Claude/Anthropic)
|
|
44
|
+
];
|
|
45
|
+
|
|
46
|
+
const hasRetryableCode = retryableStatusCodes.includes(errorCode);
|
|
47
|
+
|
|
48
|
+
// Check for provider-specific error types
|
|
49
|
+
const retryableErrorTypes = [
|
|
50
|
+
'rate_limit_error', // Claude
|
|
51
|
+
'overloaded_error', // Claude
|
|
52
|
+
'resource_exhausted', // Gemini
|
|
53
|
+
'unavailable' // Gemini
|
|
54
|
+
];
|
|
55
|
+
|
|
56
|
+
const hasRetryableType = retryableErrorTypes.some(type =>
|
|
57
|
+
errorType.toLowerCase().includes(type)
|
|
58
|
+
);
|
|
59
|
+
|
|
60
|
+
// Check for high demand/overload message patterns
|
|
61
|
+
const highDemandPatterns = [
|
|
62
|
+
'high demand',
|
|
63
|
+
'experiencing high demand',
|
|
64
|
+
'spikes in demand',
|
|
65
|
+
'try again later',
|
|
66
|
+
'temporarily unavailable',
|
|
67
|
+
'service unavailable',
|
|
68
|
+
'overloaded',
|
|
69
|
+
'model is overloaded',
|
|
70
|
+
'too many requests',
|
|
71
|
+
'rate limit',
|
|
72
|
+
'quota exceeded',
|
|
73
|
+
'resource exhausted',
|
|
74
|
+
'resource has been exhausted'
|
|
75
|
+
];
|
|
76
|
+
|
|
77
|
+
const hasHighDemandMessage = highDemandPatterns.some(pattern =>
|
|
78
|
+
errorMessage.includes(pattern)
|
|
79
|
+
);
|
|
80
|
+
|
|
81
|
+
return hasRetryableCode || hasRetryableType || hasHighDemandMessage;
|
|
82
|
+
}
|
|
83
|
+
|
|
84
|
+
/**
|
|
85
|
+
* Sleep for specified milliseconds
|
|
86
|
+
* @param {number} ms - Milliseconds to sleep
|
|
87
|
+
* @returns {Promise<void>}
|
|
88
|
+
*/
|
|
89
|
+
_sleep(ms) {
|
|
90
|
+
return new Promise(resolve => setTimeout(resolve, ms));
|
|
91
|
+
}
|
|
92
|
+
|
|
93
|
+
/**
|
|
94
|
+
* Extract retry delay from error headers (e.g., Claude's retry-after header)
|
|
95
|
+
* @param {Error} error - The error object
|
|
96
|
+
* @returns {number|null} Delay in milliseconds, or null if not found
|
|
97
|
+
*/
|
|
98
|
+
_getRetryAfterDelay(error) {
|
|
99
|
+
// Check for retry-after header (Claude provides this)
|
|
100
|
+
const retryAfter = error.headers?.['retry-after'] || error.error?.headers?.['retry-after'];
|
|
101
|
+
|
|
102
|
+
if (retryAfter) {
|
|
103
|
+
// retry-after can be in seconds
|
|
104
|
+
const seconds = parseInt(retryAfter, 10);
|
|
105
|
+
if (!isNaN(seconds)) {
|
|
106
|
+
return seconds * 1000; // Convert to milliseconds
|
|
107
|
+
}
|
|
108
|
+
}
|
|
109
|
+
|
|
110
|
+
return null;
|
|
111
|
+
}
|
|
112
|
+
|
|
113
|
+
/**
|
|
114
|
+
* Execute an async function with exponential backoff retry strategy
|
|
115
|
+
* Uses retry-after header when available (Claude), otherwise exponential backoff
|
|
116
|
+
*
|
|
117
|
+
* @param {Function} fn - Async function to execute
|
|
118
|
+
* @param {string} operationName - Name of operation for logging
|
|
119
|
+
* @returns {Promise<any>} Result of the function
|
|
120
|
+
*/
|
|
121
|
+
async _withRetry(fn, operationName = 'API call') {
|
|
122
|
+
let lastError;
|
|
123
|
+
let delay = this.retryConfig.initialDelay;
|
|
124
|
+
|
|
125
|
+
for (let attempt = 0; attempt <= this.retryConfig.maxRetries; attempt++) {
|
|
126
|
+
try {
|
|
127
|
+
return await fn();
|
|
128
|
+
} catch (error) {
|
|
129
|
+
lastError = error;
|
|
130
|
+
|
|
131
|
+
// Don't retry if it's not a retryable error
|
|
132
|
+
if (!this._isRetryableError(error)) {
|
|
133
|
+
throw error;
|
|
134
|
+
}
|
|
135
|
+
|
|
136
|
+
// Don't retry if we've exhausted all attempts
|
|
137
|
+
if (attempt === this.retryConfig.maxRetries) {
|
|
138
|
+
console.error(`\n${operationName} failed after ${this.retryConfig.maxRetries + 1} attempts`);
|
|
139
|
+
throw error;
|
|
140
|
+
}
|
|
141
|
+
|
|
142
|
+
// Check for retry-after header (Claude provides this)
|
|
143
|
+
const retryAfterDelay = this._getRetryAfterDelay(error);
|
|
144
|
+
const currentDelay = retryAfterDelay || delay;
|
|
145
|
+
|
|
146
|
+
// Log retry attempt with helpful info
|
|
147
|
+
const retrySource = retryAfterDelay ? 'server directive' : 'exponential backoff';
|
|
148
|
+
console.log(`\n⏳ ${operationName} failed (attempt ${attempt + 1}/${this.retryConfig.maxRetries + 1})`);
|
|
149
|
+
console.log(` Error: ${error.message}`);
|
|
150
|
+
console.log(` Retrying in ${currentDelay / 1000}s (${retrySource})...`);
|
|
151
|
+
|
|
152
|
+
// Wait before retrying
|
|
153
|
+
await this._sleep(currentDelay);
|
|
154
|
+
|
|
155
|
+
// Exponential backoff with max cap (only if not using retry-after)
|
|
156
|
+
if (!retryAfterDelay) {
|
|
157
|
+
delay = Math.min(
|
|
158
|
+
delay * this.retryConfig.backoffMultiplier,
|
|
159
|
+
this.retryConfig.maxDelay
|
|
160
|
+
);
|
|
161
|
+
}
|
|
162
|
+
}
|
|
163
|
+
}
|
|
164
|
+
|
|
165
|
+
throw lastError;
|
|
166
|
+
}
|
|
167
|
+
|
|
168
|
+
/**
|
|
169
|
+
* Register a callback to be fired synchronously after every LLM API call.
|
|
170
|
+
* Receives { input, output, provider, model } delta for that single call.
|
|
171
|
+
* @param {Function} fn - Callback function
|
|
172
|
+
*/
|
|
173
|
+
onCall(fn) {
|
|
174
|
+
this._callCallbacks.push(fn);
|
|
175
|
+
}
|
|
176
|
+
|
|
177
|
+
/**
|
|
178
|
+
* Track token usage from API response and fire per-call callbacks.
|
|
179
|
+
* @param {Object} usage - Usage object from API response
|
|
180
|
+
*/
|
|
181
|
+
_trackTokens(usage) {
|
|
182
|
+
if (usage) {
|
|
183
|
+
const deltaIn = usage.input_tokens || usage.inputTokens || usage.promptTokenCount || usage.prompt_tokens || 0;
|
|
184
|
+
const deltaOut = usage.output_tokens || usage.outputTokens || usage.candidatesTokenCount || usage.completion_tokens || 0;
|
|
185
|
+
this.tokenUsage.inputTokens += deltaIn;
|
|
186
|
+
this.tokenUsage.outputTokens += deltaOut;
|
|
187
|
+
this.tokenUsage.totalCalls++;
|
|
188
|
+
if (this._callCallbacks.length > 0 && (deltaIn > 0 || deltaOut > 0)) {
|
|
189
|
+
const delta = { input: deltaIn, output: deltaOut, provider: this.providerName, model: this.model };
|
|
190
|
+
for (const fn of this._callCallbacks) {
|
|
191
|
+
try { fn(delta); } catch (_) {}
|
|
192
|
+
}
|
|
193
|
+
}
|
|
194
|
+
}
|
|
195
|
+
}
|
|
196
|
+
|
|
197
|
+
/**
|
|
198
|
+
* Get token usage statistics with cost estimate
|
|
199
|
+
* @returns {Object} Token usage and cost information
|
|
200
|
+
*/
|
|
201
|
+
getTokenUsage() {
|
|
202
|
+
const total = this.tokenUsage.inputTokens + this.tokenUsage.outputTokens;
|
|
203
|
+
|
|
204
|
+
// Pricing per 1M tokens (as of 2026-02)
|
|
205
|
+
const pricing = {
|
|
206
|
+
'claude': { input: 3.00, output: 15.00 }, // Claude Sonnet 4.5
|
|
207
|
+
'gemini': { input: 0.15, output: 0.60 }, // Gemini 2.0 Flash
|
|
208
|
+
'openai': { input: 1.75, output: 14.00 } // GPT-5.2
|
|
209
|
+
};
|
|
210
|
+
|
|
211
|
+
const rates = pricing[this.providerName] || { input: 0, output: 0 };
|
|
212
|
+
const inputCost = (this.tokenUsage.inputTokens / 1000000) * rates.input;
|
|
213
|
+
const outputCost = (this.tokenUsage.outputTokens / 1000000) * rates.output;
|
|
214
|
+
const estimatedCost = inputCost + outputCost;
|
|
215
|
+
|
|
216
|
+
return {
|
|
217
|
+
inputTokens: this.tokenUsage.inputTokens,
|
|
218
|
+
outputTokens: this.tokenUsage.outputTokens,
|
|
219
|
+
totalTokens: total,
|
|
220
|
+
totalCalls: this.tokenUsage.totalCalls,
|
|
221
|
+
estimatedCost,
|
|
222
|
+
provider: this.providerName,
|
|
223
|
+
model: this.model
|
|
224
|
+
};
|
|
6
225
|
}
|
|
7
226
|
|
|
8
227
|
// Factory — async because of dynamic import (only loads the SDK you need)
|
|
9
228
|
static async create(providerName, model) {
|
|
229
|
+
// AVC_LLM_MOCK=1: return instant mock provider for E2E testing (no API calls)
|
|
230
|
+
if (process.env.AVC_LLM_MOCK) {
|
|
231
|
+
const { MockLLMProvider } = await import('./llm-mock.js');
|
|
232
|
+
return new MockLLMProvider();
|
|
233
|
+
}
|
|
234
|
+
|
|
10
235
|
switch (providerName) {
|
|
11
236
|
case 'claude': {
|
|
12
237
|
const { ClaudeProvider } = await import('./llm-claude.js');
|
|
@@ -16,8 +241,12 @@ export class LLMProvider {
|
|
|
16
241
|
const { GeminiProvider } = await import('./llm-gemini.js');
|
|
17
242
|
return new GeminiProvider(model);
|
|
18
243
|
}
|
|
244
|
+
case 'openai': {
|
|
245
|
+
const { OpenAIProvider } = await import('./llm-openai.js');
|
|
246
|
+
return new OpenAIProvider(model);
|
|
247
|
+
}
|
|
19
248
|
default:
|
|
20
|
-
throw new Error(`Unknown LLM provider: "${providerName}". Supported: claude, gemini`);
|
|
249
|
+
throw new Error(`Unknown LLM provider: "${providerName}". Supported: claude, gemini, openai`);
|
|
21
250
|
}
|
|
22
251
|
}
|
|
23
252
|
|
|
@@ -26,7 +255,10 @@ export class LLMProvider {
|
|
|
26
255
|
if (!this._client) {
|
|
27
256
|
this._client = this._createClient();
|
|
28
257
|
}
|
|
29
|
-
return this.
|
|
258
|
+
return this._withRetry(
|
|
259
|
+
() => this._callProvider(prompt, maxTokens, systemInstructions),
|
|
260
|
+
'Text generation'
|
|
261
|
+
);
|
|
30
262
|
}
|
|
31
263
|
|
|
32
264
|
// Validate API key and provider connectivity with a minimal test call
|
|
@@ -57,6 +289,11 @@ export class LLMProvider {
|
|
|
57
289
|
}
|
|
58
290
|
}
|
|
59
291
|
|
|
292
|
+
// Generate structured JSON output
|
|
293
|
+
async generateJSON(prompt, agentInstructions = null) {
|
|
294
|
+
throw new Error(`${this.constructor.name} must implement generateJSON()`);
|
|
295
|
+
}
|
|
296
|
+
|
|
60
297
|
// Subclass hooks — throw if not overridden
|
|
61
298
|
_createClient() { throw new Error(`${this.constructor.name} must implement _createClient()`); }
|
|
62
299
|
async _callProvider(prompt, maxTokens, systemInstructions) { throw new Error(`${this.constructor.name} must implement _callProvider()`); }
|
|
@@ -0,0 +1,102 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Model-specific maximum output token limits (ACTUAL API MAXIMUMS)
|
|
3
|
+
*
|
|
4
|
+
* These limits represent the ACTUAL maximum output tokens each model can generate
|
|
5
|
+
* according to official API documentation (verified February 2026).
|
|
6
|
+
*
|
|
7
|
+
* Critical Updates (Feb 2026):
|
|
8
|
+
* - Claude 4.x models: 64K-128K tokens (8x-16x previous limits)
|
|
9
|
+
* - Gemini 2.5 Flash: 65,535 tokens (8x previous limit)
|
|
10
|
+
* - GPT-4o/mini: 16,384 tokens (unchanged)
|
|
11
|
+
*
|
|
12
|
+
* Using actual model limits ensures complete documentation generation without truncation.
|
|
13
|
+
*
|
|
14
|
+
* Sources:
|
|
15
|
+
* - Claude: https://docs.cloud.google.com/vertex-ai/generative-ai/docs/partner-models/claude/
|
|
16
|
+
* - Gemini: https://docs.cloud.google.com/vertex-ai/generative-ai/docs/models/gemini/
|
|
17
|
+
* - OpenAI: https://community.openai.com/t/what-is-the-token-limit-of-the-new-version-gpt-4o/752528
|
|
18
|
+
*/
|
|
19
|
+
|
|
20
|
+
export const MODEL_MAX_TOKENS = {
|
|
21
|
+
// Claude models (Anthropic)
|
|
22
|
+
// Source: https://docs.cloud.google.com/vertex-ai/generative-ai/docs/partner-models/claude/
|
|
23
|
+
'claude-sonnet-4-5-20250929': 64000, // Was 8192 - ACTUAL: 64,000 tokens
|
|
24
|
+
'claude-sonnet-4-5': 64000,
|
|
25
|
+
'claude-sonnet-4': 64000,
|
|
26
|
+
'claude-opus-4-6': 128000, // Was 16384 - ACTUAL: 128,000 tokens
|
|
27
|
+
'claude-opus-4': 128000,
|
|
28
|
+
'claude-haiku-4-5-20251001': 64000, // Was 8192 - ACTUAL: 64,000 tokens
|
|
29
|
+
'claude-haiku-4-5': 64000,
|
|
30
|
+
'claude-haiku-4': 64000,
|
|
31
|
+
|
|
32
|
+
// OpenAI models
|
|
33
|
+
// Source: https://community.openai.com/t/what-is-the-token-limit-of-the-new-version-gpt-4o/752528
|
|
34
|
+
'gpt-5.2-chat-latest': 16384, // Test/future model
|
|
35
|
+
'gpt-5.2-pro': 16384,
|
|
36
|
+
'gpt-5.2-codex': 16384,
|
|
37
|
+
'gpt-4o': 16384, // Correct - max 16,384 tokens
|
|
38
|
+
'gpt-4o-2024-11-20': 16384,
|
|
39
|
+
'gpt-4o-mini': 16384, // Correct - max 16,384 tokens
|
|
40
|
+
'gpt-4-turbo': 4096,
|
|
41
|
+
'gpt-4': 8192,
|
|
42
|
+
'gpt-3.5-turbo': 4096,
|
|
43
|
+
|
|
44
|
+
// Google Gemini models
|
|
45
|
+
// Source: https://docs.cloud.google.com/vertex-ai/generative-ai/docs/models/gemini/
|
|
46
|
+
'gemini-3.1-pro-preview': 65536, // ACTUAL: 65,536 tokens (verified March 2026)
|
|
47
|
+
'gemini-3-flash-preview': 65536, // ACTUAL: 65,536 tokens (verified March 2026)
|
|
48
|
+
'gemini-2.5-pro': 65536, // ACTUAL: 65,536 tokens
|
|
49
|
+
'gemini-2.5-flash': 65535, // Was 8192 - ACTUAL: 65,535 tokens
|
|
50
|
+
'gemini-2.5-flash-lite': 32768, // ACTUAL: 32,768 tokens
|
|
51
|
+
'gemini-2.0-flash': 8192, // Correct - max 8,192 tokens
|
|
52
|
+
'gemini-2.0-flash-exp': 8192,
|
|
53
|
+
'gemini-1.5-pro': 8192,
|
|
54
|
+
'gemini-1.5-flash': 8192,
|
|
55
|
+
'gemini-pro': 8192,
|
|
56
|
+
|
|
57
|
+
// Default fallback
|
|
58
|
+
'default': 8192
|
|
59
|
+
};
|
|
60
|
+
|
|
61
|
+
/**
|
|
62
|
+
* Get maximum output tokens for a specific model
|
|
63
|
+
* @param {string} modelId - The model identifier
|
|
64
|
+
* @returns {number} Maximum output tokens supported by the model
|
|
65
|
+
*/
|
|
66
|
+
export function getMaxTokensForModel(modelId) {
|
|
67
|
+
if (!modelId) {
|
|
68
|
+
return MODEL_MAX_TOKENS['default'];
|
|
69
|
+
}
|
|
70
|
+
|
|
71
|
+
// Direct lookup
|
|
72
|
+
if (MODEL_MAX_TOKENS[modelId]) {
|
|
73
|
+
return MODEL_MAX_TOKENS[modelId];
|
|
74
|
+
}
|
|
75
|
+
|
|
76
|
+
// Fuzzy match for versioned models (e.g., "claude-opus-4-latest" → "claude-opus-4")
|
|
77
|
+
const baseModel = modelId.split('-').slice(0, 3).join('-');
|
|
78
|
+
if (MODEL_MAX_TOKENS[baseModel]) {
|
|
79
|
+
return MODEL_MAX_TOKENS[baseModel];
|
|
80
|
+
}
|
|
81
|
+
|
|
82
|
+
// Fallback to default
|
|
83
|
+
console.warn(`No max tokens configured for model "${modelId}", using default: ${MODEL_MAX_TOKENS['default']}`);
|
|
84
|
+
return MODEL_MAX_TOKENS['default'];
|
|
85
|
+
}
|
|
86
|
+
|
|
87
|
+
/**
|
|
88
|
+
* Validate if requested tokens exceed model limit
|
|
89
|
+
* @param {string} modelId - The model identifier
|
|
90
|
+
* @param {number} requestedTokens - Requested output tokens
|
|
91
|
+
* @returns {number} Clamped token value within model limits
|
|
92
|
+
*/
|
|
93
|
+
export function clampTokensToModelLimit(modelId, requestedTokens) {
|
|
94
|
+
const maxTokens = getMaxTokensForModel(modelId);
|
|
95
|
+
|
|
96
|
+
if (requestedTokens > maxTokens) {
|
|
97
|
+
console.warn(`Requested ${requestedTokens} tokens for ${modelId}, clamping to model limit: ${maxTokens}`);
|
|
98
|
+
return maxTokens;
|
|
99
|
+
}
|
|
100
|
+
|
|
101
|
+
return requestedTokens;
|
|
102
|
+
}
|