@agile-vibe-coding/avc 0.1.1 → 0.3.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/cli/agent-loader.js +21 -0
- package/cli/agents/agent-selector.md +152 -0
- package/cli/agents/architecture-recommender.md +418 -0
- package/cli/agents/code-implementer.md +117 -0
- package/cli/agents/code-validator.md +80 -0
- package/cli/agents/context-reviewer-epic.md +101 -0
- package/cli/agents/context-reviewer-story.md +92 -0
- package/cli/agents/context-writer-epic.md +145 -0
- package/cli/agents/context-writer-story.md +111 -0
- package/cli/agents/database-deep-dive.md +470 -0
- package/cli/agents/database-recommender.md +634 -0
- package/cli/agents/doc-distributor.md +176 -0
- package/cli/agents/doc-writer-epic.md +42 -0
- package/cli/agents/doc-writer-story.md +43 -0
- package/cli/agents/documentation-updater.md +203 -0
- package/cli/agents/duplicate-detector.md +110 -0
- package/cli/agents/epic-story-decomposer.md +559 -0
- package/cli/agents/feature-context-generator.md +91 -0
- package/cli/agents/gap-checker-epic.md +52 -0
- package/cli/agents/impact-checker-story.md +51 -0
- package/cli/agents/migration-guide-generator.md +305 -0
- package/cli/agents/mission-scope-generator.md +143 -0
- package/cli/agents/mission-scope-validator.md +146 -0
- package/cli/agents/project-context-extractor.md +122 -0
- package/cli/agents/project-documentation-creator.json +226 -0
- package/cli/agents/project-documentation-creator.md +595 -0
- package/cli/agents/question-prefiller.md +269 -0
- package/cli/agents/refiner-epic.md +39 -0
- package/cli/agents/refiner-story.md +42 -0
- package/cli/agents/scaffolding-generator.md +99 -0
- package/cli/agents/seed-validator.md +71 -0
- package/cli/agents/story-doc-enricher.md +133 -0
- package/cli/agents/story-scope-reviewer.md +147 -0
- package/cli/agents/story-splitter.md +83 -0
- package/cli/agents/suggestion-business-analyst.md +88 -0
- package/cli/agents/suggestion-deployment-architect.md +263 -0
- package/cli/agents/suggestion-product-manager.md +129 -0
- package/cli/agents/suggestion-security-specialist.md +156 -0
- package/cli/agents/suggestion-technical-architect.md +269 -0
- package/cli/agents/suggestion-ux-researcher.md +93 -0
- package/cli/agents/task-subtask-decomposer.md +188 -0
- package/cli/agents/validator-documentation.json +183 -0
- package/cli/agents/validator-documentation.md +455 -0
- package/cli/agents/validator-selector.md +211 -0
- package/cli/ansi-colors.js +21 -0
- package/cli/api-reference-tool.js +368 -0
- package/cli/build-docs.js +29 -8
- package/cli/ceremony-history.js +369 -0
- package/cli/checks/catalog.json +76 -0
- package/cli/checks/code/quality.json +26 -0
- package/cli/checks/code/testing.json +14 -0
- package/cli/checks/code/traceability.json +26 -0
- package/cli/checks/cross-refs/epic.json +171 -0
- package/cli/checks/cross-refs/story.json +149 -0
- package/cli/checks/epic/api.json +114 -0
- package/cli/checks/epic/backend.json +126 -0
- package/cli/checks/epic/cloud.json +126 -0
- package/cli/checks/epic/data.json +102 -0
- package/cli/checks/epic/database.json +114 -0
- package/cli/checks/epic/developer.json +182 -0
- package/cli/checks/epic/devops.json +174 -0
- package/cli/checks/epic/frontend.json +162 -0
- package/cli/checks/epic/mobile.json +102 -0
- package/cli/checks/epic/qa.json +90 -0
- package/cli/checks/epic/security.json +184 -0
- package/cli/checks/epic/solution-architect.json +192 -0
- package/cli/checks/epic/test-architect.json +90 -0
- package/cli/checks/epic/ui.json +102 -0
- package/cli/checks/epic/ux.json +90 -0
- package/cli/checks/fixes/epic-fix-template.md +10 -0
- package/cli/checks/fixes/story-fix-template.md +10 -0
- package/cli/checks/story/api.json +186 -0
- package/cli/checks/story/backend.json +102 -0
- package/cli/checks/story/cloud.json +102 -0
- package/cli/checks/story/data.json +210 -0
- package/cli/checks/story/database.json +102 -0
- package/cli/checks/story/developer.json +168 -0
- package/cli/checks/story/devops.json +102 -0
- package/cli/checks/story/frontend.json +174 -0
- package/cli/checks/story/mobile.json +102 -0
- package/cli/checks/story/qa.json +210 -0
- package/cli/checks/story/security.json +198 -0
- package/cli/checks/story/solution-architect.json +230 -0
- package/cli/checks/story/test-architect.json +210 -0
- package/cli/checks/story/ui.json +102 -0
- package/cli/checks/story/ux.json +102 -0
- package/cli/coding-order.js +401 -0
- package/cli/command-logger.js +49 -12
- package/cli/components/static-output.js +63 -0
- package/cli/console-output-manager.js +94 -0
- package/cli/dependency-checker.js +72 -0
- package/cli/docs-sync.js +306 -0
- package/cli/epic-story-validator.js +659 -0
- package/cli/evaluation-prompts.js +1008 -0
- package/cli/execution-context.js +195 -0
- package/cli/generate-summary-table.js +340 -0
- package/cli/init-model-config.js +704 -0
- package/cli/init.js +1737 -278
- package/cli/kanban-server-manager.js +227 -0
- package/cli/llm-claude.js +150 -1
- package/cli/llm-gemini.js +109 -0
- package/cli/llm-local.js +493 -0
- package/cli/llm-mock.js +233 -0
- package/cli/llm-openai.js +454 -0
- package/cli/llm-provider.js +379 -3
- package/cli/llm-token-limits.js +211 -0
- package/cli/llm-verifier.js +662 -0
- package/cli/llm-xiaomi.js +143 -0
- package/cli/message-constants.js +49 -0
- package/cli/message-manager.js +334 -0
- package/cli/message-types.js +96 -0
- package/cli/messaging-api.js +291 -0
- package/cli/micro-check-fixer.js +335 -0
- package/cli/micro-check-runner.js +449 -0
- package/cli/micro-check-scorer.js +148 -0
- package/cli/micro-check-validator.js +538 -0
- package/cli/model-pricing.js +192 -0
- package/cli/model-query-engine.js +468 -0
- package/cli/model-recommendation-analyzer.js +495 -0
- package/cli/model-selector.js +270 -0
- package/cli/output-buffer.js +107 -0
- package/cli/process-manager.js +73 -2
- package/cli/prompt-logger.js +57 -0
- package/cli/repl-ink.js +4625 -1094
- package/cli/repl-old.js +3 -4
- package/cli/seed-processor.js +962 -0
- package/cli/sprint-planning-processor.js +4162 -0
- package/cli/template-processor.js +2149 -105
- package/cli/templates/project.md +25 -8
- package/cli/templates/vitepress-config.mts.template +5 -4
- package/cli/token-tracker.js +547 -0
- package/cli/tools/generate-story-validators.js +317 -0
- package/cli/tools/generate-validators.js +669 -0
- package/cli/update-checker.js +19 -17
- package/cli/update-notifier.js +4 -4
- package/cli/validation-router.js +667 -0
- package/cli/verification-tracker.js +563 -0
- package/cli/worktree-runner.js +654 -0
- package/kanban/README.md +386 -0
- package/kanban/client/README.md +205 -0
- package/kanban/client/components.json +20 -0
- package/kanban/client/dist/assets/index-D_KC5EQT.css +1 -0
- package/kanban/client/dist/assets/index-DjY5zqW7.js +351 -0
- package/kanban/client/dist/index.html +16 -0
- package/kanban/client/dist/vite.svg +1 -0
- package/kanban/client/index.html +15 -0
- package/kanban/client/package-lock.json +9442 -0
- package/kanban/client/package.json +44 -0
- package/kanban/client/postcss.config.js +6 -0
- package/kanban/client/public/vite.svg +1 -0
- package/kanban/client/src/App.jsx +651 -0
- package/kanban/client/src/components/ProjectFileEditorPopup.jsx +117 -0
- package/kanban/client/src/components/ceremony/AskArchPopup.jsx +420 -0
- package/kanban/client/src/components/ceremony/AskModelPopup.jsx +629 -0
- package/kanban/client/src/components/ceremony/CeremonyWorkflowModal.jsx +1133 -0
- package/kanban/client/src/components/ceremony/EpicStorySelectionModal.jsx +254 -0
- package/kanban/client/src/components/ceremony/ProviderSwitcherButton.jsx +290 -0
- package/kanban/client/src/components/ceremony/SponsorCallModal.jsx +686 -0
- package/kanban/client/src/components/ceremony/SprintPlanningModal.jsx +838 -0
- package/kanban/client/src/components/ceremony/steps/ArchitectureStep.jsx +150 -0
- package/kanban/client/src/components/ceremony/steps/CompleteStep.jsx +136 -0
- package/kanban/client/src/components/ceremony/steps/DatabaseStep.jsx +202 -0
- package/kanban/client/src/components/ceremony/steps/DeploymentStep.jsx +123 -0
- package/kanban/client/src/components/ceremony/steps/MissionStep.jsx +106 -0
- package/kanban/client/src/components/ceremony/steps/ReviewAnswersStep.jsx +329 -0
- package/kanban/client/src/components/ceremony/steps/RunningStep.jsx +249 -0
- package/kanban/client/src/components/kanban/CardDetailModal.jsx +646 -0
- package/kanban/client/src/components/kanban/EpicSection.jsx +146 -0
- package/kanban/client/src/components/kanban/FilterToolbar.jsx +222 -0
- package/kanban/client/src/components/kanban/GroupingSelector.jsx +63 -0
- package/kanban/client/src/components/kanban/KanbanBoard.jsx +211 -0
- package/kanban/client/src/components/kanban/KanbanCard.jsx +147 -0
- package/kanban/client/src/components/kanban/KanbanColumn.jsx +90 -0
- package/kanban/client/src/components/kanban/RefineWorkItemPopup.jsx +784 -0
- package/kanban/client/src/components/kanban/RunButton.jsx +162 -0
- package/kanban/client/src/components/kanban/SeedButton.jsx +176 -0
- package/kanban/client/src/components/layout/LoadingScreen.jsx +82 -0
- package/kanban/client/src/components/process/ProcessMonitorBar.jsx +80 -0
- package/kanban/client/src/components/settings/AgentEditorPopup.jsx +171 -0
- package/kanban/client/src/components/settings/AgentsTab.jsx +381 -0
- package/kanban/client/src/components/settings/ApiKeysTab.jsx +142 -0
- package/kanban/client/src/components/settings/CeremonyModelsTab.jsx +105 -0
- package/kanban/client/src/components/settings/CheckEditorPopup.jsx +507 -0
- package/kanban/client/src/components/settings/CostThresholdsTab.jsx +95 -0
- package/kanban/client/src/components/settings/ModelPricingTab.jsx +269 -0
- package/kanban/client/src/components/settings/OpenAIAuthSection.jsx +412 -0
- package/kanban/client/src/components/settings/ServersTab.jsx +121 -0
- package/kanban/client/src/components/settings/SettingsModal.jsx +84 -0
- package/kanban/client/src/components/stats/CostModal.jsx +384 -0
- package/kanban/client/src/components/ui/badge.jsx +27 -0
- package/kanban/client/src/components/ui/dialog.jsx +121 -0
- package/kanban/client/src/components/ui/tabs.jsx +85 -0
- package/kanban/client/src/hooks/__tests__/useGrouping.test.js +232 -0
- package/kanban/client/src/hooks/useGrouping.js +177 -0
- package/kanban/client/src/hooks/useWebSocket.js +120 -0
- package/kanban/client/src/lib/__tests__/api.test.js +196 -0
- package/kanban/client/src/lib/__tests__/status-grouping.test.js +94 -0
- package/kanban/client/src/lib/api.js +515 -0
- package/kanban/client/src/lib/status-grouping.js +154 -0
- package/kanban/client/src/lib/utils.js +11 -0
- package/kanban/client/src/main.jsx +10 -0
- package/kanban/client/src/store/__tests__/kanbanStore.test.js +164 -0
- package/kanban/client/src/store/ceremonyStore.js +172 -0
- package/kanban/client/src/store/filterStore.js +201 -0
- package/kanban/client/src/store/kanbanStore.js +123 -0
- package/kanban/client/src/store/processStore.js +65 -0
- package/kanban/client/src/store/sprintPlanningStore.js +33 -0
- package/kanban/client/src/styles/globals.css +59 -0
- package/kanban/client/tailwind.config.js +77 -0
- package/kanban/client/vite.config.js +28 -0
- package/kanban/client/vitest.config.js +28 -0
- package/kanban/dev-start.sh +47 -0
- package/kanban/package.json +12 -0
- package/kanban/server/index.js +537 -0
- package/kanban/server/routes/ceremony.js +454 -0
- package/kanban/server/routes/costs.js +163 -0
- package/kanban/server/routes/openai-oauth.js +366 -0
- package/kanban/server/routes/processes.js +50 -0
- package/kanban/server/routes/settings.js +736 -0
- package/kanban/server/routes/websocket.js +281 -0
- package/kanban/server/routes/work-items.js +487 -0
- package/kanban/server/services/CeremonyService.js +1441 -0
- package/kanban/server/services/FileSystemScanner.js +95 -0
- package/kanban/server/services/FileWatcher.js +144 -0
- package/kanban/server/services/HierarchyBuilder.js +196 -0
- package/kanban/server/services/ProcessRegistry.js +122 -0
- package/kanban/server/services/TaskRunnerService.js +261 -0
- package/kanban/server/services/WorkItemReader.js +123 -0
- package/kanban/server/services/WorkItemRefineService.js +510 -0
- package/kanban/server/start.js +49 -0
- package/kanban/server/utils/kanban-logger.js +132 -0
- package/kanban/server/utils/markdown.js +91 -0
- package/kanban/server/utils/status-grouping.js +107 -0
- package/kanban/server/workers/run-task-worker.js +121 -0
- package/kanban/server/workers/seed-worker.js +94 -0
- package/kanban/server/workers/sponsor-call-worker.js +92 -0
- package/kanban/server/workers/sprint-planning-worker.js +212 -0
- package/package.json +19 -7
- package/cli/agents/documentation.md +0 -302
package/cli/llm-provider.js
CHANGED
|
@@ -1,12 +1,368 @@
|
|
|
1
1
|
export class LLMProvider {
|
|
2
|
-
constructor(providerName, model) {
|
|
2
|
+
constructor(providerName, model, retryConfig = {}) {
|
|
3
3
|
this.providerName = providerName;
|
|
4
4
|
this.model = model;
|
|
5
5
|
this._client = null;
|
|
6
|
+
this.tokenUsage = {
|
|
7
|
+
inputTokens: 0,
|
|
8
|
+
outputTokens: 0,
|
|
9
|
+
cachedInputTokens: 0, // cache reads (already counted inside inputTokens)
|
|
10
|
+
cacheWriteTokens: 0, // cache writes (billed at 1.25× on Claude, free on OpenAI/Gemini)
|
|
11
|
+
totalCalls: 0
|
|
12
|
+
};
|
|
13
|
+
|
|
14
|
+
// Per-call token callbacks (fired synchronously after each _trackTokens call)
|
|
15
|
+
this._callCallbacks = [];
|
|
16
|
+
|
|
17
|
+
// Retry configuration
|
|
18
|
+
this.retryConfig = {
|
|
19
|
+
maxRetries: retryConfig.maxRetries || 5,
|
|
20
|
+
initialDelay: retryConfig.initialDelay || 1000, // 1 second
|
|
21
|
+
maxDelay: retryConfig.maxDelay || 32000, // 32 seconds
|
|
22
|
+
backoffMultiplier: retryConfig.backoffMultiplier || 2
|
|
23
|
+
};
|
|
24
|
+
}
|
|
25
|
+
|
|
26
|
+
/**
|
|
27
|
+
* Check if error is retryable (high demand, rate limit, temporary failures)
|
|
28
|
+
* Provider-specific error codes:
|
|
29
|
+
* - OpenAI: 429 (rate limit), 503 (overloaded)
|
|
30
|
+
* - Claude: 429 (rate_limit_error), 529 (overloaded_error)
|
|
31
|
+
* - Gemini: 429 (RESOURCE_EXHAUSTED), 503 (UNAVAILABLE/overloaded)
|
|
32
|
+
*
|
|
33
|
+
* @param {Error} error - The error to check
|
|
34
|
+
* @returns {boolean} True if error is retryable
|
|
35
|
+
*/
|
|
36
|
+
_isRetryableError(error) {
|
|
37
|
+
const errorMessage = error.message?.toLowerCase() || '';
|
|
38
|
+
const errorCode = error.status || error.code;
|
|
39
|
+
const errorType = error.type || error.error?.type || '';
|
|
40
|
+
|
|
41
|
+
// Check for provider-specific error codes
|
|
42
|
+
const retryableStatusCodes = [
|
|
43
|
+
429, // Rate limit (all providers)
|
|
44
|
+
503, // Service unavailable (OpenAI, Gemini)
|
|
45
|
+
529 // Overloaded (Claude/Anthropic)
|
|
46
|
+
];
|
|
47
|
+
|
|
48
|
+
const hasRetryableCode = retryableStatusCodes.includes(errorCode);
|
|
49
|
+
|
|
50
|
+
// Check for provider-specific error types
|
|
51
|
+
const retryableErrorTypes = [
|
|
52
|
+
'rate_limit_error', // Claude
|
|
53
|
+
'overloaded_error', // Claude
|
|
54
|
+
'resource_exhausted', // Gemini
|
|
55
|
+
'unavailable' // Gemini
|
|
56
|
+
];
|
|
57
|
+
|
|
58
|
+
const hasRetryableType = retryableErrorTypes.some(type =>
|
|
59
|
+
errorType.toLowerCase().includes(type)
|
|
60
|
+
);
|
|
61
|
+
|
|
62
|
+
// Check for high demand/overload message patterns
|
|
63
|
+
const highDemandPatterns = [
|
|
64
|
+
'high demand',
|
|
65
|
+
'experiencing high demand',
|
|
66
|
+
'spikes in demand',
|
|
67
|
+
'try again later',
|
|
68
|
+
'temporarily unavailable',
|
|
69
|
+
'service unavailable',
|
|
70
|
+
'overloaded',
|
|
71
|
+
'model is overloaded',
|
|
72
|
+
'too many requests',
|
|
73
|
+
'rate limit',
|
|
74
|
+
'quota exceeded',
|
|
75
|
+
'resource exhausted',
|
|
76
|
+
'resource has been exhausted',
|
|
77
|
+
// Transient network/connection errors (e.g. WSL2 IPv6, momentary TCP failures)
|
|
78
|
+
'connection error',
|
|
79
|
+
'econnreset',
|
|
80
|
+
'econnrefused',
|
|
81
|
+
'enotfound',
|
|
82
|
+
'etimedout',
|
|
83
|
+
'network error',
|
|
84
|
+
'fetch failed',
|
|
85
|
+
];
|
|
86
|
+
|
|
87
|
+
const hasHighDemandMessage = highDemandPatterns.some(pattern =>
|
|
88
|
+
errorMessage.includes(pattern)
|
|
89
|
+
);
|
|
90
|
+
|
|
91
|
+
return hasRetryableCode || hasRetryableType || hasHighDemandMessage;
|
|
92
|
+
}
|
|
93
|
+
|
|
94
|
+
/**
|
|
95
|
+
* Sleep for specified milliseconds
|
|
96
|
+
* @param {number} ms - Milliseconds to sleep
|
|
97
|
+
* @returns {Promise<void>}
|
|
98
|
+
*/
|
|
99
|
+
_sleep(ms) {
|
|
100
|
+
return new Promise(resolve => setTimeout(resolve, ms));
|
|
101
|
+
}
|
|
102
|
+
|
|
103
|
+
/**
|
|
104
|
+
* Extract retry delay from error headers (e.g., Claude's retry-after header)
|
|
105
|
+
* @param {Error} error - The error object
|
|
106
|
+
* @returns {number|null} Delay in milliseconds, or null if not found
|
|
107
|
+
*/
|
|
108
|
+
_getRetryAfterDelay(error) {
|
|
109
|
+
// Check for retry-after header (Claude provides this)
|
|
110
|
+
const retryAfter = error.headers?.['retry-after'] || error.error?.headers?.['retry-after'];
|
|
111
|
+
|
|
112
|
+
if (retryAfter) {
|
|
113
|
+
// retry-after can be in seconds
|
|
114
|
+
const seconds = parseInt(retryAfter, 10);
|
|
115
|
+
if (!isNaN(seconds)) {
|
|
116
|
+
return seconds * 1000; // Convert to milliseconds
|
|
117
|
+
}
|
|
118
|
+
}
|
|
119
|
+
|
|
120
|
+
return null;
|
|
121
|
+
}
|
|
122
|
+
|
|
123
|
+
/**
|
|
124
|
+
* Execute an async function with exponential backoff retry strategy
|
|
125
|
+
* Uses retry-after header when available (Claude), otherwise exponential backoff
|
|
126
|
+
*
|
|
127
|
+
* @param {Function} fn - Async function to execute
|
|
128
|
+
* @param {string} operationName - Name of operation for logging
|
|
129
|
+
* @returns {Promise<any>} Result of the function
|
|
130
|
+
*/
|
|
131
|
+
async _withRetry(fn, operationName = 'API call') {
|
|
132
|
+
let lastError;
|
|
133
|
+
let delay = this.retryConfig.initialDelay;
|
|
134
|
+
|
|
135
|
+
for (let attempt = 0; attempt <= this.retryConfig.maxRetries; attempt++) {
|
|
136
|
+
try {
|
|
137
|
+
return await fn();
|
|
138
|
+
} catch (error) {
|
|
139
|
+
lastError = error;
|
|
140
|
+
|
|
141
|
+
// Don't retry if it's not a retryable error
|
|
142
|
+
if (!this._isRetryableError(error)) {
|
|
143
|
+
throw error;
|
|
144
|
+
}
|
|
145
|
+
|
|
146
|
+
// Don't retry if we've exhausted all attempts
|
|
147
|
+
if (attempt === this.retryConfig.maxRetries) {
|
|
148
|
+
console.error(`\n${operationName} failed after ${this.retryConfig.maxRetries + 1} attempts`);
|
|
149
|
+
throw error;
|
|
150
|
+
}
|
|
151
|
+
|
|
152
|
+
// Check for retry-after header (Claude provides this)
|
|
153
|
+
const retryAfterDelay = this._getRetryAfterDelay(error);
|
|
154
|
+
const currentDelay = retryAfterDelay || delay;
|
|
155
|
+
|
|
156
|
+
// Log retry attempt with helpful info
|
|
157
|
+
const retrySource = retryAfterDelay ? 'server directive' : 'exponential backoff';
|
|
158
|
+
console.log(`\n⏳ ${operationName} failed (attempt ${attempt + 1}/${this.retryConfig.maxRetries + 1})`);
|
|
159
|
+
const causeMsg = error.cause?.message || error.cause?.code || '';
|
|
160
|
+
console.log(` Error: ${error.message}${causeMsg ? ` (cause: ${causeMsg})` : ''}`);
|
|
161
|
+
console.log(` Retrying in ${currentDelay / 1000}s (${retrySource})...`);
|
|
162
|
+
|
|
163
|
+
// Wait before retrying
|
|
164
|
+
await this._sleep(currentDelay);
|
|
165
|
+
|
|
166
|
+
// Exponential backoff with max cap (only if not using retry-after)
|
|
167
|
+
if (!retryAfterDelay) {
|
|
168
|
+
delay = Math.min(
|
|
169
|
+
delay * this.retryConfig.backoffMultiplier,
|
|
170
|
+
this.retryConfig.maxDelay
|
|
171
|
+
);
|
|
172
|
+
}
|
|
173
|
+
}
|
|
174
|
+
}
|
|
175
|
+
|
|
176
|
+
throw lastError;
|
|
177
|
+
}
|
|
178
|
+
|
|
179
|
+
/**
|
|
180
|
+
* Register a callback to be fired synchronously after every LLM API call.
|
|
181
|
+
* Receives { input, output, provider, model } delta for that single call.
|
|
182
|
+
* @param {Function} fn - Callback function
|
|
183
|
+
*/
|
|
184
|
+
onCall(fn) {
|
|
185
|
+
this._callCallbacks.push(fn);
|
|
186
|
+
}
|
|
187
|
+
|
|
188
|
+
/**
|
|
189
|
+
* Attach a PromptLogger instance and set the stage label for subsequent calls.
|
|
190
|
+
* @param {import('./prompt-logger.js').PromptLogger} promptLogger
|
|
191
|
+
* @param {string} [stage]
|
|
192
|
+
*/
|
|
193
|
+
setPromptLogger(promptLogger, stage) {
|
|
194
|
+
this._promptLogger = promptLogger;
|
|
195
|
+
this._promptLoggerStage = stage || 'unknown';
|
|
196
|
+
}
|
|
197
|
+
|
|
198
|
+
/**
|
|
199
|
+
* Track token usage from API response and fire per-call callbacks.
|
|
200
|
+
* Optionally writes a prompt/response payload to the attached PromptLogger.
|
|
201
|
+
* @param {Object} usage - Usage object from API response
|
|
202
|
+
* @param {Object|null} [promptPayload] - Optional { prompt, agentInstructions, response, elapsed }
|
|
203
|
+
*/
|
|
204
|
+
_trackTokens(usage, promptPayload = null) {
|
|
205
|
+
if (usage) {
|
|
206
|
+
const deltaIn = usage.input_tokens || usage.inputTokens || usage.promptTokenCount || usage.prompt_tokens || 0;
|
|
207
|
+
const deltaOut = usage.output_tokens || usage.outputTokens || usage.candidatesTokenCount || usage.completion_tokens || 0;
|
|
208
|
+
// Cache stats — each provider uses a different field name:
|
|
209
|
+
// Claude: cache_read_input_tokens / cache_creation_input_tokens
|
|
210
|
+
// OpenAI: prompt_tokens_details.cached_tokens
|
|
211
|
+
// Gemini: cachedContentTokenCount
|
|
212
|
+
const deltaCacheRead = usage.cache_read_input_tokens
|
|
213
|
+
|| usage.cachedContentTokenCount
|
|
214
|
+
|| usage.prompt_tokens_details?.cached_tokens
|
|
215
|
+
|| 0;
|
|
216
|
+
const deltaCacheWrite = usage.cache_creation_input_tokens || 0;
|
|
217
|
+
this.tokenUsage.inputTokens += deltaIn;
|
|
218
|
+
this.tokenUsage.outputTokens += deltaOut;
|
|
219
|
+
this.tokenUsage.cachedInputTokens += deltaCacheRead;
|
|
220
|
+
this.tokenUsage.cacheWriteTokens += deltaCacheWrite;
|
|
221
|
+
this.tokenUsage.totalCalls++;
|
|
222
|
+
if (deltaCacheRead > 0 || deltaCacheWrite > 0) {
|
|
223
|
+
console.log(` [cache] write=${deltaCacheWrite} read=${deltaCacheRead} tokens`);
|
|
224
|
+
}
|
|
225
|
+
if (this._callCallbacks.length > 0 && (deltaIn > 0 || deltaOut > 0)) {
|
|
226
|
+
const delta = { input: deltaIn, output: deltaOut, cached: deltaCacheRead, cacheWrite: deltaCacheWrite, provider: this.providerName, model: this.model };
|
|
227
|
+
for (const fn of this._callCallbacks) {
|
|
228
|
+
try { fn(delta); } catch (_) {}
|
|
229
|
+
}
|
|
230
|
+
}
|
|
231
|
+
if (this._promptLogger && promptPayload) {
|
|
232
|
+
this._promptLogger.write({
|
|
233
|
+
ceremony: this._promptLogger.ceremony,
|
|
234
|
+
stage: this._promptLoggerStage || 'unknown',
|
|
235
|
+
call: (this._promptLogger.callCount || 0) + 1,
|
|
236
|
+
timestamp: new Date().toISOString(),
|
|
237
|
+
elapsed_ms: promptPayload.elapsed ?? null,
|
|
238
|
+
provider: this.providerName,
|
|
239
|
+
model: this.model,
|
|
240
|
+
tokens: { input: deltaIn, output: deltaOut },
|
|
241
|
+
prompt: promptPayload.prompt ?? null,
|
|
242
|
+
agentInstructions: promptPayload.agentInstructions ?? null,
|
|
243
|
+
response: promptPayload.response ?? null,
|
|
244
|
+
});
|
|
245
|
+
}
|
|
246
|
+
}
|
|
247
|
+
}
|
|
248
|
+
|
|
249
|
+
/**
|
|
250
|
+
* Get token usage statistics with cost estimate
|
|
251
|
+
* @returns {Object} Token usage and cost information
|
|
252
|
+
*/
|
|
253
|
+
getTokenUsage() {
|
|
254
|
+
const total = this.tokenUsage.inputTokens + this.tokenUsage.outputTokens;
|
|
255
|
+
|
|
256
|
+
// Pricing per 1M tokens (as of 2026-02)
|
|
257
|
+
const pricing = {
|
|
258
|
+
'claude': { input: 3.00, output: 15.00 }, // Claude Sonnet 4.5
|
|
259
|
+
'gemini': { input: 0.15, output: 0.60 }, // Gemini 2.0 Flash
|
|
260
|
+
'openai': { input: 1.75, output: 14.00 }, // GPT-5.2
|
|
261
|
+
'local': { input: 0, output: 0 }, // Local models — free
|
|
262
|
+
'xiaomi': { input: 0.09, output: 0.29 } // MiMo V2 Flash (default)
|
|
263
|
+
};
|
|
264
|
+
|
|
265
|
+
const rates = pricing[this.providerName] || { input: 0, output: 0 };
|
|
266
|
+
const inputCost = (this.tokenUsage.inputTokens / 1000000) * rates.input;
|
|
267
|
+
const outputCost = (this.tokenUsage.outputTokens / 1000000) * rates.output;
|
|
268
|
+
const estimatedCost = inputCost + outputCost;
|
|
269
|
+
|
|
270
|
+
return {
|
|
271
|
+
inputTokens: this.tokenUsage.inputTokens,
|
|
272
|
+
outputTokens: this.tokenUsage.outputTokens,
|
|
273
|
+
cachedInputTokens: this.tokenUsage.cachedInputTokens,
|
|
274
|
+
cacheWriteTokens: this.tokenUsage.cacheWriteTokens,
|
|
275
|
+
totalTokens: total,
|
|
276
|
+
totalCalls: this.tokenUsage.totalCalls,
|
|
277
|
+
estimatedCost,
|
|
278
|
+
provider: this.providerName,
|
|
279
|
+
model: this.model
|
|
280
|
+
};
|
|
281
|
+
}
|
|
282
|
+
|
|
283
|
+
/**
|
|
284
|
+
* Map of cloud providers to their required env var.
|
|
285
|
+
*/
|
|
286
|
+
static PROVIDER_ENV_MAP = {
|
|
287
|
+
'claude': 'ANTHROPIC_API_KEY',
|
|
288
|
+
'gemini': 'GEMINI_API_KEY',
|
|
289
|
+
'openai': 'OPENAI_API_KEY',
|
|
290
|
+
'xiaomi': 'XIAOMI_API_KEY',
|
|
291
|
+
};
|
|
292
|
+
|
|
293
|
+
/**
|
|
294
|
+
* Default model per provider (used as fallback when switching providers).
|
|
295
|
+
*/
|
|
296
|
+
static PROVIDER_DEFAULT_MODELS = {
|
|
297
|
+
'claude': 'claude-sonnet-4-6',
|
|
298
|
+
'gemini': 'gemini-2.5-flash',
|
|
299
|
+
'openai': 'gpt-4.1',
|
|
300
|
+
'xiaomi': 'MiMo-V2-Flash',
|
|
301
|
+
};
|
|
302
|
+
|
|
303
|
+
/**
|
|
304
|
+
* Check whether a provider has valid credentials available.
|
|
305
|
+
* For cloud providers this means the env var is set.
|
|
306
|
+
* For local, probe the server (with a short timeout).
|
|
307
|
+
* @param {string} providerName
|
|
308
|
+
* @returns {Promise<boolean>}
|
|
309
|
+
*/
|
|
310
|
+
static async hasProviderCredentials(providerName) {
|
|
311
|
+
if (providerName === 'local') {
|
|
312
|
+
try {
|
|
313
|
+
const { discoverLocalServers } = await import('./llm-local.js');
|
|
314
|
+
const servers = await Promise.race([
|
|
315
|
+
discoverLocalServers(),
|
|
316
|
+
new Promise((_, reject) => setTimeout(() => reject(new Error('timeout')), 3000))
|
|
317
|
+
]);
|
|
318
|
+
return servers.length > 0;
|
|
319
|
+
} catch {
|
|
320
|
+
return false;
|
|
321
|
+
}
|
|
322
|
+
}
|
|
323
|
+
if (providerName === 'openai') {
|
|
324
|
+
const oauthMode = process.env.OPENAI_AUTH_MODE === 'oauth';
|
|
325
|
+
return !!(process.env.OPENAI_API_KEY || (oauthMode && process.env.OPENAI_OAUTH_TOKEN));
|
|
326
|
+
}
|
|
327
|
+
const envVar = LLMProvider.PROVIDER_ENV_MAP[providerName];
|
|
328
|
+
return envVar ? !!process.env[envVar] : false;
|
|
329
|
+
}
|
|
330
|
+
|
|
331
|
+
/**
|
|
332
|
+
* Resolve a provider+model pair, falling back to any available provider
|
|
333
|
+
* if the requested one has no credentials.
|
|
334
|
+
* @param {string} provider - Requested provider name
|
|
335
|
+
* @param {string} model - Requested model name
|
|
336
|
+
* @returns {Promise<{ provider: string, model: string, fellBack: boolean }>}
|
|
337
|
+
*/
|
|
338
|
+
static async resolveAvailableProvider(provider, model) {
|
|
339
|
+
// Check requested provider first
|
|
340
|
+
if (await LLMProvider.hasProviderCredentials(provider)) {
|
|
341
|
+
return { provider, model, fellBack: false };
|
|
342
|
+
}
|
|
343
|
+
|
|
344
|
+
// Fallback priority: claude → gemini → openai → xiaomi → local
|
|
345
|
+
const fallbackOrder = ['claude', 'gemini', 'openai', 'xiaomi', 'local'];
|
|
346
|
+
for (const candidate of fallbackOrder) {
|
|
347
|
+
if (candidate === provider) continue; // already checked
|
|
348
|
+
if (await LLMProvider.hasProviderCredentials(candidate)) {
|
|
349
|
+
const fallbackModel = LLMProvider.PROVIDER_DEFAULT_MODELS[candidate] || candidate;
|
|
350
|
+
return { provider: candidate, model: fallbackModel, fellBack: true };
|
|
351
|
+
}
|
|
352
|
+
}
|
|
353
|
+
|
|
354
|
+
// Nothing available — return original and let create() throw the usual error
|
|
355
|
+
return { provider, model, fellBack: false };
|
|
6
356
|
}
|
|
7
357
|
|
|
8
358
|
// Factory — async because of dynamic import (only loads the SDK you need)
|
|
9
359
|
static async create(providerName, model) {
|
|
360
|
+
// AVC_LLM_MOCK=1: return instant mock provider for E2E testing (no API calls)
|
|
361
|
+
if (process.env.AVC_LLM_MOCK) {
|
|
362
|
+
const { MockLLMProvider } = await import('./llm-mock.js');
|
|
363
|
+
return new MockLLMProvider();
|
|
364
|
+
}
|
|
365
|
+
|
|
10
366
|
switch (providerName) {
|
|
11
367
|
case 'claude': {
|
|
12
368
|
const { ClaudeProvider } = await import('./llm-claude.js');
|
|
@@ -16,8 +372,20 @@ export class LLMProvider {
|
|
|
16
372
|
const { GeminiProvider } = await import('./llm-gemini.js');
|
|
17
373
|
return new GeminiProvider(model);
|
|
18
374
|
}
|
|
375
|
+
case 'openai': {
|
|
376
|
+
const { OpenAIProvider } = await import('./llm-openai.js');
|
|
377
|
+
return new OpenAIProvider(model);
|
|
378
|
+
}
|
|
379
|
+
case 'local': {
|
|
380
|
+
const { LocalProvider } = await import('./llm-local.js');
|
|
381
|
+
return new LocalProvider(model);
|
|
382
|
+
}
|
|
383
|
+
case 'xiaomi': {
|
|
384
|
+
const { XiaomiProvider } = await import('./llm-xiaomi.js');
|
|
385
|
+
return new XiaomiProvider(model);
|
|
386
|
+
}
|
|
19
387
|
default:
|
|
20
|
-
throw new Error(`Unknown LLM provider: "${providerName}". Supported: claude, gemini`);
|
|
388
|
+
throw new Error(`Unknown LLM provider: "${providerName}". Supported: claude, gemini, openai, local, xiaomi`);
|
|
21
389
|
}
|
|
22
390
|
}
|
|
23
391
|
|
|
@@ -26,7 +394,10 @@ export class LLMProvider {
|
|
|
26
394
|
if (!this._client) {
|
|
27
395
|
this._client = this._createClient();
|
|
28
396
|
}
|
|
29
|
-
return this.
|
|
397
|
+
return this._withRetry(
|
|
398
|
+
() => this._callProvider(prompt, maxTokens, systemInstructions),
|
|
399
|
+
'Text generation'
|
|
400
|
+
);
|
|
30
401
|
}
|
|
31
402
|
|
|
32
403
|
// Validate API key and provider connectivity with a minimal test call
|
|
@@ -57,6 +428,11 @@ export class LLMProvider {
|
|
|
57
428
|
}
|
|
58
429
|
}
|
|
59
430
|
|
|
431
|
+
// Generate structured JSON output
|
|
432
|
+
async generateJSON(prompt, agentInstructions = null) {
|
|
433
|
+
throw new Error(`${this.constructor.name} must implement generateJSON()`);
|
|
434
|
+
}
|
|
435
|
+
|
|
60
436
|
// Subclass hooks — throw if not overridden
|
|
61
437
|
_createClient() { throw new Error(`${this.constructor.name} must implement _createClient()`); }
|
|
62
438
|
async _callProvider(prompt, maxTokens, systemInstructions) { throw new Error(`${this.constructor.name} must implement _callProvider()`); }
|
|
@@ -0,0 +1,211 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Model-specific maximum output token limits (ACTUAL API MAXIMUMS)
|
|
3
|
+
*
|
|
4
|
+
* These limits represent the ACTUAL maximum output tokens each model can generate
|
|
5
|
+
* according to official API documentation (verified February 2026).
|
|
6
|
+
*
|
|
7
|
+
* Critical Updates (Feb 2026):
|
|
8
|
+
* - Claude 4.x models: 64K-128K tokens (8x-16x previous limits)
|
|
9
|
+
* - Gemini 2.5 Flash: 65,535 tokens (8x previous limit)
|
|
10
|
+
* - GPT-4o/mini: 16,384 tokens (unchanged)
|
|
11
|
+
*
|
|
12
|
+
* Using actual model limits ensures complete documentation generation without truncation.
|
|
13
|
+
*
|
|
14
|
+
* Sources:
|
|
15
|
+
* - Claude: https://docs.cloud.google.com/vertex-ai/generative-ai/docs/partner-models/claude/
|
|
16
|
+
* - Gemini: https://docs.cloud.google.com/vertex-ai/generative-ai/docs/models/gemini/
|
|
17
|
+
* - OpenAI: https://community.openai.com/t/what-is-the-token-limit-of-the-new-version-gpt-4o/752528
|
|
18
|
+
*/
|
|
19
|
+
|
|
20
|
+
// Suppress duplicate warnings — each model warned only once per process
|
|
21
|
+
const _warnedModels = new Set();
|
|
22
|
+
|
|
23
|
+
export const MODEL_MAX_TOKENS = {
|
|
24
|
+
// Claude models (Anthropic)
|
|
25
|
+
// Source: https://docs.cloud.google.com/vertex-ai/generative-ai/docs/partner-models/claude/
|
|
26
|
+
'claude-sonnet-4-5-20250929': 64000, // Was 8192 - ACTUAL: 64,000 tokens
|
|
27
|
+
'claude-sonnet-4-5': 64000,
|
|
28
|
+
'claude-sonnet-4': 64000,
|
|
29
|
+
'claude-opus-4-6': 128000, // Was 16384 - ACTUAL: 128,000 tokens
|
|
30
|
+
'claude-opus-4': 128000,
|
|
31
|
+
'claude-haiku-4-5-20251001': 64000, // Was 8192 - ACTUAL: 64,000 tokens
|
|
32
|
+
'claude-haiku-4-5': 64000,
|
|
33
|
+
'claude-haiku-4': 64000,
|
|
34
|
+
|
|
35
|
+
// OpenAI models
|
|
36
|
+
// Source: https://community.openai.com/t/what-is-the-token-limit-of-the-new-version-gpt-4o/752528
|
|
37
|
+
// GPT-5 family (32K output tokens assumed; add specific overrides as official limits are confirmed)
|
|
38
|
+
'gpt-5': 32768,
|
|
39
|
+
'gpt-5.1': 32768,
|
|
40
|
+
'gpt-5.2': 32768,
|
|
41
|
+
'gpt-5.4': 32768,
|
|
42
|
+
'gpt-5-mini': 32768,
|
|
43
|
+
'gpt-5.2-chat-latest': 16384, // Keep at 16384 — existing tests depend on this value
|
|
44
|
+
'gpt-5.2-pro': 16384,
|
|
45
|
+
'gpt-5.2-codex': 16384,
|
|
46
|
+
'gpt-4o': 16384, // Correct - max 16,384 tokens
|
|
47
|
+
'gpt-4o-2024-11-20': 16384,
|
|
48
|
+
'gpt-4o-mini': 16384, // Correct - max 16,384 tokens
|
|
49
|
+
'gpt-4-turbo': 4096,
|
|
50
|
+
'gpt-4': 8192,
|
|
51
|
+
'gpt-3.5-turbo': 4096,
|
|
52
|
+
|
|
53
|
+
// Google Gemini models
|
|
54
|
+
// Source: https://docs.cloud.google.com/vertex-ai/generative-ai/docs/models/gemini/
|
|
55
|
+
'gemini-3.1-pro-preview': 65536, // ACTUAL: 65,536 tokens (verified March 2026)
|
|
56
|
+
'gemini-3-flash-preview': 65536, // ACTUAL: 65,536 tokens (verified March 2026)
|
|
57
|
+
'gemini-2.5-pro': 65536, // ACTUAL: 65,536 tokens
|
|
58
|
+
'gemini-2.5-flash': 65535, // Was 8192 - ACTUAL: 65,535 tokens
|
|
59
|
+
'gemini-2.5-flash-lite': 32768, // ACTUAL: 32,768 tokens
|
|
60
|
+
'gemini-2.0-flash': 8192, // Correct - max 8,192 tokens
|
|
61
|
+
'gemini-2.0-flash-exp': 8192,
|
|
62
|
+
'gemini-1.5-pro': 8192,
|
|
63
|
+
'gemini-1.5-flash': 8192,
|
|
64
|
+
'gemini-pro': 8192,
|
|
65
|
+
|
|
66
|
+
// Default fallback
|
|
67
|
+
'default': 8192
|
|
68
|
+
};
|
|
69
|
+
|
|
70
|
+
/**
|
|
71
|
+
* Model context window sizes (total input + output tokens)
|
|
72
|
+
*
|
|
73
|
+
* These represent the full context window each model supports.
|
|
74
|
+
* For local models, we use conservative defaults since the actual
|
|
75
|
+
* model behind the endpoint is unknown.
|
|
76
|
+
*/
|
|
77
|
+
export const MODEL_CONTEXT_WINDOWS = {
|
|
78
|
+
// Claude models
|
|
79
|
+
'claude-sonnet-4-5': 200000,
|
|
80
|
+
'claude-sonnet-4': 200000,
|
|
81
|
+
'claude-opus-4-6': 200000,
|
|
82
|
+
'claude-opus-4': 200000,
|
|
83
|
+
'claude-haiku-4-5': 200000,
|
|
84
|
+
'claude-haiku-4': 200000,
|
|
85
|
+
|
|
86
|
+
// OpenAI models
|
|
87
|
+
'gpt-5': 128000,
|
|
88
|
+
'gpt-5.1': 128000,
|
|
89
|
+
'gpt-5.2': 128000,
|
|
90
|
+
'gpt-5.4': 128000,
|
|
91
|
+
'gpt-5-mini': 128000,
|
|
92
|
+
'gpt-4o': 128000,
|
|
93
|
+
'gpt-4o-mini': 128000,
|
|
94
|
+
'gpt-4-turbo': 128000,
|
|
95
|
+
'gpt-4': 8192,
|
|
96
|
+
|
|
97
|
+
// Google Gemini models
|
|
98
|
+
'gemini-3.1-pro-preview': 1048576,
|
|
99
|
+
'gemini-3-flash-preview': 1048576,
|
|
100
|
+
'gemini-2.5-pro': 1048576,
|
|
101
|
+
'gemini-2.5-flash': 1048576,
|
|
102
|
+
'gemini-2.0-flash': 1048576,
|
|
103
|
+
'gemini-1.5-pro': 2097152,
|
|
104
|
+
'gemini-1.5-flash': 1048576,
|
|
105
|
+
|
|
106
|
+
// Default for unknown local models - conservative estimate
|
|
107
|
+
'local-default': 8192,
|
|
108
|
+
|
|
109
|
+
// Xiaomi MiMo models
|
|
110
|
+
'mimo-v2-flash': 262144,
|
|
111
|
+
'mimo-v2-pro': 1048576,
|
|
112
|
+
'mimo-v2-omni': 262144,
|
|
113
|
+
};
|
|
114
|
+
|
|
115
|
+
/**
|
|
116
|
+
* Get maximum output tokens for a specific model
|
|
117
|
+
* @param {string} modelId - The model identifier
|
|
118
|
+
* @returns {number} Maximum output tokens supported by the model
|
|
119
|
+
*/
|
|
120
|
+
export function getMaxTokensForModel(modelId) {
|
|
121
|
+
if (!modelId) {
|
|
122
|
+
return MODEL_MAX_TOKENS['default'];
|
|
123
|
+
}
|
|
124
|
+
|
|
125
|
+
// Direct lookup
|
|
126
|
+
if (MODEL_MAX_TOKENS[modelId]) {
|
|
127
|
+
return MODEL_MAX_TOKENS[modelId];
|
|
128
|
+
}
|
|
129
|
+
|
|
130
|
+
// Fuzzy match for versioned models (e.g., "claude-opus-4-latest" → "claude-opus-4")
|
|
131
|
+
const baseModel = modelId.split('-').slice(0, 3).join('-');
|
|
132
|
+
if (MODEL_MAX_TOKENS[baseModel]) {
|
|
133
|
+
return MODEL_MAX_TOKENS[baseModel];
|
|
134
|
+
}
|
|
135
|
+
|
|
136
|
+
// Prefix fallback: catch any gpt-5.x variant not explicitly listed
|
|
137
|
+
if (modelId.startsWith('gpt-5')) {
|
|
138
|
+
if (!_warnedModels.has(modelId)) {
|
|
139
|
+
_warnedModels.add(modelId);
|
|
140
|
+
console.warn(`No exact max tokens for "${modelId}", using GPT-5 family default: 32768`);
|
|
141
|
+
}
|
|
142
|
+
return 32768;
|
|
143
|
+
}
|
|
144
|
+
|
|
145
|
+
// Fallback to default
|
|
146
|
+
if (!_warnedModels.has(modelId)) {
|
|
147
|
+
_warnedModels.add(modelId);
|
|
148
|
+
console.warn(`No max tokens configured for model "${modelId}", using default: ${MODEL_MAX_TOKENS['default']}`);
|
|
149
|
+
}
|
|
150
|
+
return MODEL_MAX_TOKENS['default'];
|
|
151
|
+
}
|
|
152
|
+
|
|
153
|
+
/**
|
|
154
|
+
* Get the context window size for a specific model
|
|
155
|
+
* @param {string} modelId - The model identifier
|
|
156
|
+
* @param {string} provider - The provider name ('claude', 'openai', 'gemini', 'local')
|
|
157
|
+
* @returns {number|null} Context window size in tokens, or null if no limit enforcement
|
|
158
|
+
*/
|
|
159
|
+
export function getContextWindowForModel(modelId, provider) {
|
|
160
|
+
// Local provider: use conservative default, overridable via env var
|
|
161
|
+
if (provider === 'local') {
|
|
162
|
+
const envOverride = process.env.LOCAL_LLM_CONTEXT_WINDOW;
|
|
163
|
+
if (envOverride) {
|
|
164
|
+
const parsed = parseInt(envOverride, 10);
|
|
165
|
+
if (!isNaN(parsed) && parsed > 0) {
|
|
166
|
+
return parsed;
|
|
167
|
+
}
|
|
168
|
+
console.warn(`Invalid LOCAL_LLM_CONTEXT_WINDOW value "${envOverride}", using default: ${MODEL_CONTEXT_WINDOWS['local-default']}`);
|
|
169
|
+
}
|
|
170
|
+
return MODEL_CONTEXT_WINDOWS['local-default'];
|
|
171
|
+
}
|
|
172
|
+
|
|
173
|
+
if (!modelId) {
|
|
174
|
+
return null;
|
|
175
|
+
}
|
|
176
|
+
|
|
177
|
+
// Direct lookup
|
|
178
|
+
if (MODEL_CONTEXT_WINDOWS[modelId]) {
|
|
179
|
+
return MODEL_CONTEXT_WINDOWS[modelId];
|
|
180
|
+
}
|
|
181
|
+
|
|
182
|
+
// Fuzzy match for versioned models (e.g., "claude-opus-4-latest" → "claude-opus-4")
|
|
183
|
+
const baseModel = modelId.split('-').slice(0, 3).join('-');
|
|
184
|
+
if (MODEL_CONTEXT_WINDOWS[baseModel]) {
|
|
185
|
+
return MODEL_CONTEXT_WINDOWS[baseModel];
|
|
186
|
+
}
|
|
187
|
+
|
|
188
|
+
// Unknown cloud model — no limit enforcement
|
|
189
|
+
return null;
|
|
190
|
+
}
|
|
191
|
+
|
|
192
|
+
/**
|
|
193
|
+
* Validate if requested tokens exceed model limit
|
|
194
|
+
* @param {string} modelId - The model identifier
|
|
195
|
+
* @param {number} requestedTokens - Requested output tokens
|
|
196
|
+
* @returns {number} Clamped token value within model limits
|
|
197
|
+
*/
|
|
198
|
+
export function clampTokensToModelLimit(modelId, requestedTokens) {
|
|
199
|
+
const maxTokens = getMaxTokensForModel(modelId);
|
|
200
|
+
|
|
201
|
+
if (requestedTokens > maxTokens) {
|
|
202
|
+
const clampKey = `clamp:${modelId}:${requestedTokens}`;
|
|
203
|
+
if (!_warnedModels.has(clampKey)) {
|
|
204
|
+
_warnedModels.add(clampKey);
|
|
205
|
+
console.warn(`Requested ${requestedTokens} tokens for ${modelId}, clamping to model limit: ${maxTokens}`);
|
|
206
|
+
}
|
|
207
|
+
return maxTokens;
|
|
208
|
+
}
|
|
209
|
+
|
|
210
|
+
return requestedTokens;
|
|
211
|
+
}
|