agentic-qe 2.6.0 → 2.6.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +156 -0
- package/README.md +222 -159
- package/dist/agents/BaseAgent.d.ts +19 -0
- package/dist/agents/BaseAgent.d.ts.map +1 -1
- package/dist/agents/BaseAgent.js +41 -1
- package/dist/agents/BaseAgent.js.map +1 -1
- package/dist/agents/CodeIntelligenceAgent.d.ts +14 -0
- package/dist/agents/CodeIntelligenceAgent.d.ts.map +1 -1
- package/dist/agents/CodeIntelligenceAgent.js +66 -0
- package/dist/agents/CodeIntelligenceAgent.js.map +1 -1
- package/dist/agents/CoverageAnalyzerAgent.d.ts +8 -0
- package/dist/agents/CoverageAnalyzerAgent.d.ts.map +1 -1
- package/dist/agents/CoverageAnalyzerAgent.js +65 -1
- package/dist/agents/CoverageAnalyzerAgent.js.map +1 -1
- package/dist/agents/TestGeneratorAgent.d.ts +2 -2
- package/dist/agents/TestGeneratorAgent.d.ts.map +1 -1
- package/dist/agents/TestGeneratorAgent.js +16 -6
- package/dist/agents/TestGeneratorAgent.js.map +1 -1
- package/dist/agents/adapters/AgentLLMAdapter.d.ts +127 -0
- package/dist/agents/adapters/AgentLLMAdapter.d.ts.map +1 -0
- package/dist/agents/adapters/AgentLLMAdapter.js +366 -0
- package/dist/agents/adapters/AgentLLMAdapter.js.map +1 -0
- package/dist/agents/adapters/index.d.ts +1 -0
- package/dist/agents/adapters/index.d.ts.map +1 -1
- package/dist/agents/adapters/index.js +5 -1
- package/dist/agents/adapters/index.js.map +1 -1
- package/dist/agents/interfaces/IAgentLLM.d.ts +257 -0
- package/dist/agents/interfaces/IAgentLLM.d.ts.map +1 -0
- package/dist/agents/interfaces/IAgentLLM.js +39 -0
- package/dist/agents/interfaces/IAgentLLM.js.map +1 -0
- package/dist/agents/interfaces/index.d.ts +10 -0
- package/dist/agents/interfaces/index.d.ts.map +1 -0
- package/dist/agents/interfaces/index.js +14 -0
- package/dist/agents/interfaces/index.js.map +1 -0
- package/dist/agents/n8n/N8nBaseAgent.d.ts +18 -0
- package/dist/agents/n8n/N8nBaseAgent.d.ts.map +1 -1
- package/dist/agents/n8n/N8nBaseAgent.js +80 -0
- package/dist/agents/n8n/N8nBaseAgent.js.map +1 -1
- package/dist/agents/pool/AgentPool.d.ts +112 -0
- package/dist/agents/pool/AgentPool.d.ts.map +1 -0
- package/dist/agents/pool/AgentPool.js +573 -0
- package/dist/agents/pool/AgentPool.js.map +1 -0
- package/dist/agents/pool/QEAgentPoolFactory.d.ts +118 -0
- package/dist/agents/pool/QEAgentPoolFactory.d.ts.map +1 -0
- package/dist/agents/pool/QEAgentPoolFactory.js +251 -0
- package/dist/agents/pool/QEAgentPoolFactory.js.map +1 -0
- package/dist/agents/pool/index.d.ts +34 -0
- package/dist/agents/pool/index.d.ts.map +1 -0
- package/dist/agents/pool/index.js +44 -0
- package/dist/agents/pool/index.js.map +1 -0
- package/dist/agents/pool/types.d.ts +227 -0
- package/dist/agents/pool/types.d.ts.map +1 -0
- package/dist/agents/pool/types.js +28 -0
- package/dist/agents/pool/types.js.map +1 -0
- package/dist/cli/commands/providers.d.ts +50 -0
- package/dist/cli/commands/providers.d.ts.map +1 -0
- package/dist/cli/commands/providers.js +403 -0
- package/dist/cli/commands/providers.js.map +1 -0
- package/dist/cli/index.js +62 -0
- package/dist/cli/index.js.map +1 -1
- package/dist/code-intelligence/indexing/FileWatcher.d.ts.map +1 -1
- package/dist/code-intelligence/indexing/FileWatcher.js +11 -8
- package/dist/code-intelligence/indexing/FileWatcher.js.map +1 -1
- package/dist/config/ConfigLoader.d.ts +85 -0
- package/dist/config/ConfigLoader.d.ts.map +1 -0
- package/dist/config/ConfigLoader.js +420 -0
- package/dist/config/ConfigLoader.js.map +1 -0
- package/dist/config/ProviderConfig.d.ts +153 -0
- package/dist/config/ProviderConfig.d.ts.map +1 -0
- package/dist/config/ProviderConfig.js +155 -0
- package/dist/config/ProviderConfig.js.map +1 -0
- package/dist/config/index.d.ts +35 -0
- package/dist/config/index.d.ts.map +1 -0
- package/dist/config/index.js +45 -0
- package/dist/config/index.js.map +1 -0
- package/dist/core/memory/HNSWVectorMemory.js +1 -1
- package/dist/mcp/handlers/agent-spawn.d.ts +71 -5
- package/dist/mcp/handlers/agent-spawn.d.ts.map +1 -1
- package/dist/mcp/handlers/agent-spawn.js +336 -110
- package/dist/mcp/handlers/agent-spawn.js.map +1 -1
- package/dist/mcp/handlers/fleet-init.d.ts +24 -0
- package/dist/mcp/handlers/fleet-init.d.ts.map +1 -1
- package/dist/mcp/handlers/fleet-init.js +56 -4
- package/dist/mcp/handlers/fleet-init.js.map +1 -1
- package/dist/mcp/server-instructions.d.ts +1 -1
- package/dist/mcp/server-instructions.js +1 -1
- package/dist/mcp/server.d.ts +1 -0
- package/dist/mcp/server.d.ts.map +1 -1
- package/dist/memory/HNSWPatternStore.d.ts.map +1 -1
- package/dist/memory/HNSWPatternStore.js +23 -0
- package/dist/memory/HNSWPatternStore.js.map +1 -1
- package/dist/memory/RuVectorPatternStore.d.ts +5 -0
- package/dist/memory/RuVectorPatternStore.d.ts.map +1 -1
- package/dist/memory/RuVectorPatternStore.js +11 -0
- package/dist/memory/RuVectorPatternStore.js.map +1 -1
- package/dist/plugins/BasePlugin.d.ts +111 -0
- package/dist/plugins/BasePlugin.d.ts.map +1 -0
- package/dist/plugins/BasePlugin.js +154 -0
- package/dist/plugins/BasePlugin.js.map +1 -0
- package/dist/plugins/PluginManager.d.ts +145 -0
- package/dist/plugins/PluginManager.d.ts.map +1 -0
- package/dist/plugins/PluginManager.js +862 -0
- package/dist/plugins/PluginManager.js.map +1 -0
- package/dist/plugins/adapters/McpToolsPlugin.d.ts +98 -0
- package/dist/plugins/adapters/McpToolsPlugin.d.ts.map +1 -0
- package/dist/plugins/adapters/McpToolsPlugin.js +518 -0
- package/dist/plugins/adapters/McpToolsPlugin.js.map +1 -0
- package/dist/plugins/adapters/PlaywrightPlugin.d.ts +63 -0
- package/dist/plugins/adapters/PlaywrightPlugin.d.ts.map +1 -0
- package/dist/plugins/adapters/PlaywrightPlugin.js +451 -0
- package/dist/plugins/adapters/PlaywrightPlugin.js.map +1 -0
- package/dist/plugins/adapters/VitestPlugin.d.ts +74 -0
- package/dist/plugins/adapters/VitestPlugin.d.ts.map +1 -0
- package/dist/plugins/adapters/VitestPlugin.js +589 -0
- package/dist/plugins/adapters/VitestPlugin.js.map +1 -0
- package/dist/plugins/adapters/index.d.ts +8 -0
- package/dist/plugins/adapters/index.d.ts.map +1 -0
- package/dist/plugins/adapters/index.js +17 -0
- package/dist/plugins/adapters/index.js.map +1 -0
- package/dist/plugins/index.d.ts +32 -0
- package/dist/plugins/index.d.ts.map +1 -0
- package/dist/plugins/index.js +48 -0
- package/dist/plugins/index.js.map +1 -0
- package/dist/plugins/types.d.ts +528 -0
- package/dist/plugins/types.d.ts.map +1 -0
- package/dist/plugins/types.js +61 -0
- package/dist/plugins/types.js.map +1 -0
- package/dist/providers/CostOptimizationStrategies.d.ts +297 -0
- package/dist/providers/CostOptimizationStrategies.d.ts.map +1 -0
- package/dist/providers/CostOptimizationStrategies.js +831 -0
- package/dist/providers/CostOptimizationStrategies.js.map +1 -0
- package/dist/providers/HybridRouter.d.ts +142 -5
- package/dist/providers/HybridRouter.d.ts.map +1 -1
- package/dist/providers/HybridRouter.js +472 -6
- package/dist/providers/HybridRouter.js.map +1 -1
- package/dist/providers/HybridRouterComplexityIntegration.d.ts +169 -0
- package/dist/providers/HybridRouterComplexityIntegration.d.ts.map +1 -0
- package/dist/providers/HybridRouterComplexityIntegration.js +319 -0
- package/dist/providers/HybridRouterComplexityIntegration.js.map +1 -0
- package/dist/providers/HybridRouterModelSelection.d.ts +106 -0
- package/dist/providers/HybridRouterModelSelection.d.ts.map +1 -0
- package/dist/providers/HybridRouterModelSelection.js +420 -0
- package/dist/providers/HybridRouterModelSelection.js.map +1 -0
- package/dist/providers/LLMProviderFactory.d.ts +23 -9
- package/dist/providers/LLMProviderFactory.d.ts.map +1 -1
- package/dist/providers/LLMProviderFactory.js +54 -11
- package/dist/providers/LLMProviderFactory.js.map +1 -1
- package/dist/providers/OllamaProvider.d.ts +122 -0
- package/dist/providers/OllamaProvider.d.ts.map +1 -0
- package/dist/providers/OllamaProvider.js +425 -0
- package/dist/providers/OllamaProvider.js.map +1 -0
- package/dist/providers/index.d.ts +6 -1
- package/dist/providers/index.d.ts.map +1 -1
- package/dist/providers/index.js +17 -1
- package/dist/providers/index.js.map +1 -1
- package/dist/routing/ComplexityClassifier.d.ts +266 -0
- package/dist/routing/ComplexityClassifier.d.ts.map +1 -0
- package/dist/routing/ComplexityClassifier.js +567 -0
- package/dist/routing/ComplexityClassifier.js.map +1 -0
- package/dist/routing/ModelCapabilityRegistry.d.ts +98 -0
- package/dist/routing/ModelCapabilityRegistry.d.ts.map +1 -0
- package/dist/routing/ModelCapabilityRegistry.js +216 -0
- package/dist/routing/ModelCapabilityRegistry.js.map +1 -0
- package/dist/routing/index.d.ts +13 -0
- package/dist/routing/index.d.ts.map +1 -0
- package/dist/routing/index.js +24 -0
- package/dist/routing/index.js.map +1 -0
- package/docs/reference/model-capability-registry.md +402 -0
- package/docs/reference/provider-config-schema.md +608 -0
- package/package.json +21 -3
|
@@ -0,0 +1,402 @@
|
|
|
1
|
+
# Model Capability Registry
|
|
2
|
+
|
|
3
|
+
The Model Capability Registry provides intelligent LLM selection based on task requirements, complexity, and constraints.
|
|
4
|
+
|
|
5
|
+
## Overview
|
|
6
|
+
|
|
7
|
+
The registry maintains a comprehensive database of LLM models across multiple providers (Ollama, OpenRouter, Groq, Together, Claude, RuvLLM) with their capabilities, performance benchmarks, and deployment requirements.
|
|
8
|
+
|
|
9
|
+
## Key Features
|
|
10
|
+
|
|
11
|
+
- **Intelligent Model Selection**: Automatically select the best model for a specific task and complexity level
|
|
12
|
+
- **Multi-Provider Support**: Works with local (Ollama) and cloud providers
|
|
13
|
+
- **Cost Optimization**: Balance quality vs. cost based on constraints
|
|
14
|
+
- **Adaptive Learning**: Update quality ratings based on actual performance
|
|
15
|
+
- **Comprehensive Benchmarks**: Includes HumanEval, SWE-bench, and Aider Polyglot scores
|
|
16
|
+
|
|
17
|
+
## Model Database
|
|
18
|
+
|
|
19
|
+
### Local Models (Ollama)
|
|
20
|
+
|
|
21
|
+
| Model | Parameters | Context | Best For |
|
|
22
|
+
|-------|-----------|---------|----------|
|
|
23
|
+
| qwen2.5-coder:32b | 32B | 131K | Agentic workflows, code generation |
|
|
24
|
+
| llama3.3:70b | 70B | 128K | High-quality reasoning, general purpose |
|
|
25
|
+
| devstral:22b | 22B | 32K | Laptop deployment, balanced performance |
|
|
26
|
+
| deepseek-coder-v2:16b | 16B | 163K | Multi-language, large context |
|
|
27
|
+
| starcoder2:15b | 15B | 16K | Fast code generation |
|
|
28
|
+
|
|
29
|
+
### Free Tier Models
|
|
30
|
+
|
|
31
|
+
| Model | Provider | Context | Benchmarks |
|
|
32
|
+
|-------|----------|---------|------------|
|
|
33
|
+
| llama-3.3-70b-versatile | Groq | 128K | HumanEval: 73.8, SWE-bench: 52.1 |
|
|
34
|
+
| mistralai/devstral-2512:free | OpenRouter | 262K | HumanEval: 84.2, SWE-bench: 72.2 |
|
|
35
|
+
|
|
36
|
+
### Premium Models
|
|
37
|
+
|
|
38
|
+
| Model | Provider | Cost/1M | SWE-bench |
|
|
39
|
+
|-------|----------|---------|-----------|
|
|
40
|
+
| moonshotai/kimi-dev-72b | OpenRouter | $0.50 | 60.4 (SOTA) |
|
|
41
|
+
| mistralai/devstral-2-123b | OpenRouter | $1.50 | 72.2 |
|
|
42
|
+
| anthropic/claude-sonnet-4 | Claude | $3-15 | 68.7 |
|
|
43
|
+
| anthropic/claude-opus-4 | Claude | $15-75 | 71.3 |
|
|
44
|
+
|
|
45
|
+
## Usage
|
|
46
|
+
|
|
47
|
+
### Basic Usage
|
|
48
|
+
|
|
49
|
+
```typescript
|
|
50
|
+
import { ModelCapabilityRegistry } from '@/routing';
|
|
51
|
+
|
|
52
|
+
const registry = new ModelCapabilityRegistry();
|
|
53
|
+
registry.loadDefaultModels();
|
|
54
|
+
|
|
55
|
+
// Get best model for a task
|
|
56
|
+
const modelId = registry.getBestModelForTask(
|
|
57
|
+
'test-generation',
|
|
58
|
+
'moderate'
|
|
59
|
+
);
|
|
60
|
+
|
|
61
|
+
console.log(modelId); // e.g., "mistralai/devstral-2512:free"
|
|
62
|
+
```
|
|
63
|
+
|
|
64
|
+
### With Constraints
|
|
65
|
+
|
|
66
|
+
```typescript
|
|
67
|
+
// Prefer free tier models
|
|
68
|
+
const freeModel = registry.getBestModelForTask(
|
|
69
|
+
'code-review',
|
|
70
|
+
'simple',
|
|
71
|
+
{ preferFree: true }
|
|
72
|
+
);
|
|
73
|
+
|
|
74
|
+
// Local deployment only
|
|
75
|
+
const localModel = registry.getBestModelForTask(
|
|
76
|
+
'bug-detection',
|
|
77
|
+
'moderate',
|
|
78
|
+
{ requiresLocal: true }
|
|
79
|
+
);
|
|
80
|
+
|
|
81
|
+
// Budget constraint
|
|
82
|
+
const budgetModel = registry.getBestModelForTask(
|
|
83
|
+
'refactoring',
|
|
84
|
+
'complex',
|
|
85
|
+
{ maxCostPer1M: 1.0 }
|
|
86
|
+
);
|
|
87
|
+
|
|
88
|
+
// Large context required
|
|
89
|
+
const contextModel = registry.getBestModelForTask(
|
|
90
|
+
'documentation',
|
|
91
|
+
'complex',
|
|
92
|
+
{ minContextWindow: 128000 }
|
|
93
|
+
);
|
|
94
|
+
```
|
|
95
|
+
|
|
96
|
+
### Advanced Constraints
|
|
97
|
+
|
|
98
|
+
```typescript
|
|
99
|
+
const modelId = registry.getBestModelForTask(
|
|
100
|
+
'security-scanning',
|
|
101
|
+
'very_complex',
|
|
102
|
+
{
|
|
103
|
+
maxCostPer1M: 2.0,
|
|
104
|
+
minContextWindow: 100000,
|
|
105
|
+
requiredCapabilities: ['code-specialist', 'high-accuracy']
|
|
106
|
+
}
|
|
107
|
+
);
|
|
108
|
+
```
|
|
109
|
+
|
|
110
|
+
### Adaptive Learning
|
|
111
|
+
|
|
112
|
+
```typescript
|
|
113
|
+
// Execute a task and rate the performance
|
|
114
|
+
const modelId = registry.getBestModelForTask('test-generation', 'moderate');
|
|
115
|
+
|
|
116
|
+
// ... execute task with selected model ...
|
|
117
|
+
|
|
118
|
+
// Update quality rating based on actual performance
|
|
119
|
+
const successRate = 0.92; // 92% of generated tests passed
|
|
120
|
+
registry.updateQualityRating(modelId, 'test-generation', successRate);
|
|
121
|
+
|
|
122
|
+
// Future selections will consider this updated rating
|
|
123
|
+
```
|
|
124
|
+
|
|
125
|
+
### Provider-Specific Selection
|
|
126
|
+
|
|
127
|
+
```typescript
|
|
128
|
+
// Get all Ollama models
|
|
129
|
+
const ollamaModels = registry.getModelsForProvider('ollama');
|
|
130
|
+
|
|
131
|
+
// Get all free tier models
|
|
132
|
+
const freeModels = registry.getAllModels().filter(m =>
|
|
133
|
+
!m.pricing || (m.pricing.inputPer1M === 0 && m.pricing.outputPer1M === 0)
|
|
134
|
+
);
|
|
135
|
+
|
|
136
|
+
// Get Claude models only
|
|
137
|
+
const claudeModels = registry.getModelsForProvider('claude');
|
|
138
|
+
```
|
|
139
|
+
|
|
140
|
+
## Task Types
|
|
141
|
+
|
|
142
|
+
The registry supports the following task types:
|
|
143
|
+
|
|
144
|
+
- `test-generation`: Unit test, integration test generation
|
|
145
|
+
- `coverage-analysis`: Code coverage analysis and gap detection
|
|
146
|
+
- `code-review`: Code quality review and suggestions
|
|
147
|
+
- `bug-detection`: Bug and defect identification
|
|
148
|
+
- `documentation`: Code documentation generation
|
|
149
|
+
- `refactoring`: Code refactoring suggestions
|
|
150
|
+
- `performance-testing`: Performance analysis and optimization
|
|
151
|
+
- `security-scanning`: Security vulnerability detection
|
|
152
|
+
|
|
153
|
+
## Complexity Levels
|
|
154
|
+
|
|
155
|
+
Tasks can be classified into four complexity levels:
|
|
156
|
+
|
|
157
|
+
1. **Simple**: Basic operations, small scope
|
|
158
|
+
- Ideal models: 1B-15B parameters
|
|
159
|
+
- Examples: Single function test, simple refactoring
|
|
160
|
+
|
|
161
|
+
2. **Moderate**: Standard development tasks
|
|
162
|
+
- Ideal models: 10B-40B parameters
|
|
163
|
+
- Examples: Class-level testing, code review
|
|
164
|
+
|
|
165
|
+
3. **Complex**: Advanced tasks requiring deep understanding
|
|
166
|
+
- Ideal models: 30B-80B parameters
|
|
167
|
+
- Examples: System integration tests, architecture review
|
|
168
|
+
|
|
169
|
+
4. **Very Complex**: Sophisticated tasks requiring highest quality
|
|
170
|
+
- Ideal models: 60B+ parameters
|
|
171
|
+
- Examples: Security audits, complex refactoring
|
|
172
|
+
|
|
173
|
+
## Model Selection Algorithm
|
|
174
|
+
|
|
175
|
+
The registry uses a multi-factor scoring system:
|
|
176
|
+
|
|
177
|
+
1. **Quality Rating** (0-40 points): Based on historical performance
|
|
178
|
+
2. **Benchmark Score** (0-30 points): HumanEval, SWE-bench, Aider Polyglot
|
|
179
|
+
3. **Complexity Match** (0-20 points): How well model size fits task complexity
|
|
180
|
+
4. **Cost Efficiency** (0-10 points): Preference for free/low-cost models
|
|
181
|
+
|
|
182
|
+
Higher score = better match. The highest-scoring model is selected.
|
|
183
|
+
|
|
184
|
+
## Model Constraints
|
|
185
|
+
|
|
186
|
+
### Cost Constraints
|
|
187
|
+
|
|
188
|
+
```typescript
|
|
189
|
+
interface ModelConstraints {
|
|
190
|
+
maxCostPer1M?: number; // Maximum cost per 1M tokens
|
|
191
|
+
preferFree?: boolean; // Prefer free tier models
|
|
192
|
+
}
|
|
193
|
+
```
|
|
194
|
+
|
|
195
|
+
### Deployment Constraints
|
|
196
|
+
|
|
197
|
+
```typescript
|
|
198
|
+
interface ModelConstraints {
|
|
199
|
+
requiresLocal?: boolean; // Must be locally deployable
|
|
200
|
+
minContextWindow?: number; // Minimum context window size
|
|
201
|
+
}
|
|
202
|
+
```
|
|
203
|
+
|
|
204
|
+
### Capability Constraints
|
|
205
|
+
|
|
206
|
+
```typescript
|
|
207
|
+
interface ModelConstraints {
|
|
208
|
+
requiredCapabilities?: string[]; // Required capability strings
|
|
209
|
+
}
|
|
210
|
+
```
|
|
211
|
+
|
|
212
|
+
## Model Capabilities Schema
|
|
213
|
+
|
|
214
|
+
```typescript
|
|
215
|
+
interface ModelCapabilities {
|
|
216
|
+
modelId: string;
|
|
217
|
+
provider: 'ollama' | 'openrouter' | 'groq' | 'together' | 'claude' | 'ruvllm';
|
|
218
|
+
|
|
219
|
+
// Core specs
|
|
220
|
+
parameters: string;
|
|
221
|
+
contextWindow: number;
|
|
222
|
+
pricing?: {
|
|
223
|
+
inputPer1M: number;
|
|
224
|
+
outputPer1M: number;
|
|
225
|
+
};
|
|
226
|
+
|
|
227
|
+
// Capabilities
|
|
228
|
+
supportedTasks: TaskType[];
|
|
229
|
+
strengths: string[];
|
|
230
|
+
weaknesses?: string[];
|
|
231
|
+
|
|
232
|
+
// Benchmarks
|
|
233
|
+
benchmarks?: {
|
|
234
|
+
humanEval?: number;
|
|
235
|
+
sweBench?: number;
|
|
236
|
+
aiderPolyglot?: number;
|
|
237
|
+
};
|
|
238
|
+
|
|
239
|
+
// Deployment
|
|
240
|
+
availableOn: string[];
|
|
241
|
+
requiresGPU: boolean;
|
|
242
|
+
vramRequired?: number;
|
|
243
|
+
|
|
244
|
+
// Adaptive ratings
|
|
245
|
+
qualityRatings?: Partial<Record<TaskType, number>>;
|
|
246
|
+
}
|
|
247
|
+
```
|
|
248
|
+
|
|
249
|
+
## Benchmark Interpretation
|
|
250
|
+
|
|
251
|
+
### HumanEval
|
|
252
|
+
- Measures code generation accuracy
|
|
253
|
+
- Range: 0-100
|
|
254
|
+
- Good: >70, Excellent: >85
|
|
255
|
+
|
|
256
|
+
### SWE-bench
|
|
257
|
+
- Measures ability to solve real-world software engineering tasks
|
|
258
|
+
- Range: 0-100
|
|
259
|
+
- Good: >50, Excellent: >70
|
|
260
|
+
|
|
261
|
+
### Aider Polyglot
|
|
262
|
+
- Measures multi-language code editing capability
|
|
263
|
+
- Range: 0-100
|
|
264
|
+
- Good: >60, Excellent: >75
|
|
265
|
+
|
|
266
|
+
## Best Practices
|
|
267
|
+
|
|
268
|
+
### 1. Start with Free Tier
|
|
269
|
+
|
|
270
|
+
```typescript
|
|
271
|
+
// Try free tier first
|
|
272
|
+
const modelId = registry.getBestModelForTask(task, complexity, {
|
|
273
|
+
preferFree: true
|
|
274
|
+
});
|
|
275
|
+
|
|
276
|
+
// Fall back to paid if quality insufficient
|
|
277
|
+
if (!modelId || needsHigherQuality) {
|
|
278
|
+
const premiumModel = registry.getBestModelForTask(task, complexity, {
|
|
279
|
+
maxCostPer1M: 2.0
|
|
280
|
+
});
|
|
281
|
+
}
|
|
282
|
+
```
|
|
283
|
+
|
|
284
|
+
### 2. Use Adaptive Learning
|
|
285
|
+
|
|
286
|
+
```typescript
|
|
287
|
+
async function executeWithLearning(task, complexity) {
|
|
288
|
+
const modelId = registry.getBestModelForTask(task, complexity);
|
|
289
|
+
|
|
290
|
+
const result = await executeTask(modelId, task);
|
|
291
|
+
|
|
292
|
+
// Rate performance
|
|
293
|
+
const rating = calculateSuccessRate(result);
|
|
294
|
+
registry.updateQualityRating(modelId, task, rating);
|
|
295
|
+
|
|
296
|
+
return result;
|
|
297
|
+
}
|
|
298
|
+
```
|
|
299
|
+
|
|
300
|
+
### 3. Balance Cost and Quality
|
|
301
|
+
|
|
302
|
+
```typescript
|
|
303
|
+
// For simple tasks, use smaller models
|
|
304
|
+
const simpleModel = registry.getBestModelForTask('test-generation', 'simple', {
|
|
305
|
+
preferFree: true
|
|
306
|
+
});
|
|
307
|
+
|
|
308
|
+
// For critical tasks, prioritize quality
|
|
309
|
+
const criticalModel = registry.getBestModelForTask('security-scanning', 'very_complex');
|
|
310
|
+
```
|
|
311
|
+
|
|
312
|
+
### 4. Local Development
|
|
313
|
+
|
|
314
|
+
```typescript
|
|
315
|
+
// Use local models during development
|
|
316
|
+
const devModel = registry.getBestModelForTask('code-review', 'moderate', {
|
|
317
|
+
requiresLocal: true
|
|
318
|
+
});
|
|
319
|
+
|
|
320
|
+
// Use cloud models in CI/CD
|
|
321
|
+
const ciModel = registry.getBestModelForTask('code-review', 'moderate', {
|
|
322
|
+
preferFree: true
|
|
323
|
+
});
|
|
324
|
+
```
|
|
325
|
+
|
|
326
|
+
## Extension
|
|
327
|
+
|
|
328
|
+
### Adding Custom Models
|
|
329
|
+
|
|
330
|
+
```typescript
|
|
331
|
+
import { ModelCapabilities } from '@/routing';
|
|
332
|
+
|
|
333
|
+
const customModel: ModelCapabilities = {
|
|
334
|
+
modelId: 'my-custom-model',
|
|
335
|
+
provider: 'ollama',
|
|
336
|
+
parameters: '13B',
|
|
337
|
+
contextWindow: 8192,
|
|
338
|
+
supportedTasks: ['test-generation', 'code-review'],
|
|
339
|
+
strengths: ['fast', 'efficient'],
|
|
340
|
+
benchmarks: {
|
|
341
|
+
humanEval: 68.5
|
|
342
|
+
},
|
|
343
|
+
availableOn: ['local'],
|
|
344
|
+
requiresGPU: true,
|
|
345
|
+
vramRequired: 8
|
|
346
|
+
};
|
|
347
|
+
|
|
348
|
+
registry.registerModel(customModel);
|
|
349
|
+
```
|
|
350
|
+
|
|
351
|
+
### Custom Scoring
|
|
352
|
+
|
|
353
|
+
If you need custom selection logic, you can extend the registry:
|
|
354
|
+
|
|
355
|
+
```typescript
|
|
356
|
+
class CustomRegistry extends ModelCapabilityRegistry {
|
|
357
|
+
getBestModelForTask(task, complexity, constraints) {
|
|
358
|
+
const candidates = this.getAllModels()
|
|
359
|
+
.filter(m => m.supportedTasks.includes(task));
|
|
360
|
+
|
|
361
|
+
// Apply custom scoring logic
|
|
362
|
+
const scored = candidates.map(model => ({
|
|
363
|
+
model,
|
|
364
|
+
score: this.customScore(model, task, complexity)
|
|
365
|
+
}));
|
|
366
|
+
|
|
367
|
+
return scored.sort((a, b) => b.score - a.score)[0]?.model.modelId;
|
|
368
|
+
}
|
|
369
|
+
|
|
370
|
+
private customScore(model, task, complexity) {
|
|
371
|
+
// Your custom scoring logic
|
|
372
|
+
return 0;
|
|
373
|
+
}
|
|
374
|
+
}
|
|
375
|
+
```
|
|
376
|
+
|
|
377
|
+
## Integration with Multi-Model Router
|
|
378
|
+
|
|
379
|
+
The Model Capability Registry integrates with the Multi-Model Router to provide intelligent provider selection:
|
|
380
|
+
|
|
381
|
+
```typescript
|
|
382
|
+
import { MultiModelRouter } from '@/routing';
|
|
383
|
+
import { ModelCapabilityRegistry } from '@/routing';
|
|
384
|
+
|
|
385
|
+
const router = new MultiModelRouter(config);
|
|
386
|
+
const registry = new ModelCapabilityRegistry();
|
|
387
|
+
registry.loadDefaultModels();
|
|
388
|
+
|
|
389
|
+
// Router uses registry for intelligent selection
|
|
390
|
+
const provider = await router.getProvider({
|
|
391
|
+
task: 'test-generation',
|
|
392
|
+
complexity: 'moderate',
|
|
393
|
+
preferFree: true
|
|
394
|
+
});
|
|
395
|
+
```
|
|
396
|
+
|
|
397
|
+
## Related Documentation
|
|
398
|
+
|
|
399
|
+
- [LLM Providers Guide](../guides/llm-providers-guide.md)
|
|
400
|
+
- [Free Tier Guide](../guides/free-tier-guide.md)
|
|
401
|
+
- [Configuration Guide](../guides/configuration-guide.md)
|
|
402
|
+
- [Multi-Model Router](./multi-model-router.md)
|