@michelabboud/visual-forge-mcp 0.7.0 โ 0.9.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +272 -0
- package/README.md +196 -2
- package/config/pricing.json +32 -4
- package/dist/providers/base-provider.d.ts +10 -2
- package/dist/providers/base-provider.d.ts.map +1 -1
- package/dist/providers/base-provider.js +53 -3
- package/dist/providers/base-provider.js.map +1 -1
- package/dist/providers/index.d.ts +2 -0
- package/dist/providers/index.d.ts.map +1 -1
- package/dist/providers/index.js +40 -2
- package/dist/providers/index.js.map +1 -1
- package/dist/providers/zai/zai-provider.d.ts +22 -0
- package/dist/providers/zai/zai-provider.d.ts.map +1 -0
- package/dist/providers/zai/zai-provider.js +154 -0
- package/dist/providers/zai/zai-provider.js.map +1 -0
- package/dist/quality/index.d.ts +1 -0
- package/dist/quality/index.d.ts.map +1 -1
- package/dist/quality/index.js +1 -0
- package/dist/quality/index.js.map +1 -1
- package/dist/quality/model-tester.d.ts +87 -0
- package/dist/quality/model-tester.d.ts.map +1 -0
- package/dist/quality/model-tester.js +357 -0
- package/dist/quality/model-tester.js.map +1 -0
- package/dist/server/mcp-server.d.ts +5 -0
- package/dist/server/mcp-server.d.ts.map +1 -1
- package/dist/server/mcp-server.js +371 -5
- package/dist/server/mcp-server.js.map +1 -1
- package/dist/types/generation.d.ts +1 -1
- package/dist/types/generation.d.ts.map +1 -1
- package/dist/types/provider.d.ts +28 -1
- package/dist/types/provider.d.ts.map +1 -1
- package/dist/utils/index.d.ts +1 -0
- package/dist/utils/index.d.ts.map +1 -1
- package/dist/utils/index.js +1 -0
- package/dist/utils/index.js.map +1 -1
- package/dist/utils/user-config-manager.d.ts +68 -0
- package/dist/utils/user-config-manager.d.ts.map +1 -0
- package/dist/utils/user-config-manager.js +131 -0
- package/dist/utils/user-config-manager.js.map +1 -0
- package/docs/guides/comprehensive-guide.md +1552 -0
- package/package.json +2 -2
package/CHANGELOG.md
CHANGED
|
@@ -9,6 +9,278 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
|
|
|
9
9
|
|
|
10
10
|
---
|
|
11
11
|
|
|
12
|
+
## [0.9.0] - 2026-01-16
|
|
13
|
+
|
|
14
|
+
### Added ๐งช **Model Testing & Comparison System**
|
|
15
|
+
|
|
16
|
+
#### Model Selection Workflow (Phase 1)
|
|
17
|
+
- **Enhanced Provider Configuration**: `configure_provider` now returns available models
|
|
18
|
+
- Shows list of all models offered by the provider
|
|
19
|
+
- Displays model details: name, cost, description, capabilities
|
|
20
|
+
- Indicates which model is currently set as default
|
|
21
|
+
- Guides user to use `set_default_model` for customization
|
|
22
|
+
- **New MCP Tool: `set_default_model`**
|
|
23
|
+
- Choose preferred model for a provider
|
|
24
|
+
- Validates model exists before saving
|
|
25
|
+
- Persists choice in `~/.visual-forge-mcp/user-config.json`
|
|
26
|
+
- Applies to all future generations unless overridden
|
|
27
|
+
- **New MCP Tool: `get_model_info`**
|
|
28
|
+
- Retrieve detailed information about a specific model
|
|
29
|
+
- Shows capabilities, pricing, and description
|
|
30
|
+
- Displays test results if model was previously tested
|
|
31
|
+
- Helps users make informed model selection decisions
|
|
32
|
+
- **User Configuration Manager**: `src/utils/user-config-manager.ts`
|
|
33
|
+
- Persistent storage for user preferences
|
|
34
|
+
- Default model per provider
|
|
35
|
+
- Model test results with quality scores
|
|
36
|
+
- Atomic file writes for crash safety
|
|
37
|
+
|
|
38
|
+
#### Model Testing (Phase 2)
|
|
39
|
+
- **ModelTester Utility**: `src/quality/model-tester.ts`
|
|
40
|
+
- Comprehensive testing framework for AI models
|
|
41
|
+
- Standard automated tests and custom prompt tests
|
|
42
|
+
- Multi-provider comparison with side-by-side results
|
|
43
|
+
- Quality scoring algorithm with weighted metrics
|
|
44
|
+
|
|
45
|
+
- **Quality Scoring Algorithm**:
|
|
46
|
+
- **Sharpness (30%)**: Laplacian variance analysis for edge detection
|
|
47
|
+
- **Brightness (20%)**: Average brightness in optimal range (30-240)
|
|
48
|
+
- **Text Rendering (40%)**: Estimated OCR accuracy and text clarity
|
|
49
|
+
- **Color Accuracy (10%)**: Heuristic-based color validation
|
|
50
|
+
- **Overall Score**: Weighted average, pass threshold 60/100
|
|
51
|
+
- **Auto-Recording**: Test results saved for future reference
|
|
52
|
+
|
|
53
|
+
- **Standard Test Prompt**:
|
|
54
|
+
- Professional quality validation image
|
|
55
|
+
- Multiple text elements (title, subtitle, detailed text)
|
|
56
|
+
- Geometric shapes (red circle, blue square, green triangle)
|
|
57
|
+
- Color gradient background (#1a365d to #0891b2)
|
|
58
|
+
- Technical diagram (simple flowchart)
|
|
59
|
+
- Designed to test sharpness, color accuracy, and text rendering
|
|
60
|
+
|
|
61
|
+
- **New MCP Tool: `test_model`**
|
|
62
|
+
- Two modes:
|
|
63
|
+
- **Standard Test**: Automated quality validation with predefined elements
|
|
64
|
+
- **Custom Prompt**: Test with user's actual use case
|
|
65
|
+
- Cost permission flow (requires confirmation for paid models)
|
|
66
|
+
- Automatic quality scoring and pass/fail determination
|
|
67
|
+
- Test images saved to `generated-images/tests/`
|
|
68
|
+
- Results include quality metrics, generation time, and cost
|
|
69
|
+
|
|
70
|
+
- **New MCP Tool: `compare_models`**
|
|
71
|
+
- Side-by-side comparison of multiple providers/models
|
|
72
|
+
- Same prompt tested across all selected models
|
|
73
|
+
- Automatic ranking by quality score
|
|
74
|
+
- Intelligent recommendation with reasoning
|
|
75
|
+
- Alternative suggestions for budget-conscious users
|
|
76
|
+
- Total cost and time tracking
|
|
77
|
+
- Detailed results per model with success/failure handling
|
|
78
|
+
|
|
79
|
+
- **Permission Flow**:
|
|
80
|
+
- First call shows cost estimate and requires confirmation
|
|
81
|
+
- User confirms by calling again with `skipPermission: true`
|
|
82
|
+
- Prevents accidental spending on paid models
|
|
83
|
+
- Free models (Gemini, HuggingFace) can skip permission
|
|
84
|
+
|
|
85
|
+
#### Files Added
|
|
86
|
+
- `src/quality/model-tester.ts` - Model testing and comparison utility
|
|
87
|
+
- `docs/design/model-selection-workflow.md` - Complete workflow specification
|
|
88
|
+
- `docs/design/IMPLEMENTATION_STATUS.md` - Implementation tracking
|
|
89
|
+
|
|
90
|
+
#### Files Modified
|
|
91
|
+
- `src/server/mcp-server.ts`:
|
|
92
|
+
- Added `userConfigManager` import
|
|
93
|
+
- Added `ModelTester` instance
|
|
94
|
+
- Updated `configure_provider` handler to return models
|
|
95
|
+
- Added `handleSetDefaultModel()` handler
|
|
96
|
+
- Added `handleGetModelInfo()` handler
|
|
97
|
+
- Added `handleTestModel()` handler
|
|
98
|
+
- Added `handleCompareModels()` handler
|
|
99
|
+
- Added routing for 5 new tools
|
|
100
|
+
- Added 'zai' to all provider enums
|
|
101
|
+
- `src/quality/index.ts` - Exported ModelTester
|
|
102
|
+
- `src/utils/index.ts` - Exported UserConfigManager (from 0.8.0)
|
|
103
|
+
|
|
104
|
+
### Benefits
|
|
105
|
+
|
|
106
|
+
#### For Users
|
|
107
|
+
- โ
**Informed Decisions**: Test before committing to a model
|
|
108
|
+
- โ
**Quality Assurance**: Validate model performance on your use case
|
|
109
|
+
- โ
**Cost Awareness**: See estimated costs before generation
|
|
110
|
+
- โ
**Traceability**: Historical test results for comparison
|
|
111
|
+
- โ
**Flexibility**: Different models for different use cases
|
|
112
|
+
- โ
**Side-by-Side Comparison**: Objective ranking with recommendations
|
|
113
|
+
|
|
114
|
+
#### For Workflows
|
|
115
|
+
- โ
**Standard Tests**: Quick automated validation of model quality
|
|
116
|
+
- โ
**Custom Tests**: Real-world testing with actual prompts
|
|
117
|
+
- โ
**Multi-Provider**: Compare 2-8 providers in single call
|
|
118
|
+
- โ
**Automatic Ranking**: Data-driven recommendations
|
|
119
|
+
- โ
**Error Handling**: Graceful failures don't block comparisons
|
|
120
|
+
|
|
121
|
+
### Example Usage
|
|
122
|
+
|
|
123
|
+
```typescript
|
|
124
|
+
// 1. Configure provider and see models
|
|
125
|
+
configure_provider({ provider: "zai", apiKey: "zai-..." })
|
|
126
|
+
// Returns: List of available models with costs
|
|
127
|
+
|
|
128
|
+
// 2. Set preferred model
|
|
129
|
+
set_default_model({ provider: "zai", modelId: "glm-image" })
|
|
130
|
+
|
|
131
|
+
// 3. Test with standard prompt (automated)
|
|
132
|
+
test_model({
|
|
133
|
+
provider: "zai",
|
|
134
|
+
modelId: "glm-image",
|
|
135
|
+
useStandardTest: true
|
|
136
|
+
})
|
|
137
|
+
|
|
138
|
+
// 4. Test with custom prompt (real use case)
|
|
139
|
+
test_model({
|
|
140
|
+
provider: "gemini",
|
|
141
|
+
modelId: "gemini-2.5-flash-image",
|
|
142
|
+
prompt: "AWS VPC architecture diagram with public/private subnets"
|
|
143
|
+
})
|
|
144
|
+
|
|
145
|
+
// 5. Compare multiple models
|
|
146
|
+
compare_models({
|
|
147
|
+
prompt: "Technical diagram showing microservices architecture",
|
|
148
|
+
providers: [
|
|
149
|
+
{ provider: "gemini", model: "gemini-2.5-flash-image" },
|
|
150
|
+
{ provider: "zai", model: "glm-image" },
|
|
151
|
+
{ provider: "huggingface", model: "black-forest-labs/FLUX.1-dev" }
|
|
152
|
+
]
|
|
153
|
+
})
|
|
154
|
+
// Returns: Ranked results with recommendation
|
|
155
|
+
```
|
|
156
|
+
|
|
157
|
+
### Technical Details
|
|
158
|
+
|
|
159
|
+
- **Test Image Storage**: `generated-images/tests/`
|
|
160
|
+
- **Config Storage**: `~/.visual-forge-mcp/user-config.json`
|
|
161
|
+
- **Quality Metrics**: Sharpness, brightness, text rendering, color accuracy
|
|
162
|
+
- **Pass Threshold**: 60/100 overall score
|
|
163
|
+
- **Concurrent Testing**: Up to 3 parallel model tests in comparison mode
|
|
164
|
+
|
|
165
|
+
---
|
|
166
|
+
|
|
167
|
+
## [0.8.0] - 2026-01-16
|
|
168
|
+
|
|
169
|
+
### Added ๐ฏ **Multi-Model Architecture & Z.ai Provider**
|
|
170
|
+
|
|
171
|
+
#### Multi-Model Architecture
|
|
172
|
+
- **Provider-Model Separation**: Distinguish between providers (companies) and models (specific AI implementations)
|
|
173
|
+
- One provider can offer multiple models with different capabilities and pricing
|
|
174
|
+
- Example: OpenAI offers `gpt-image-1` (standard, $0.04) and `gpt-image-1-hd` (HD, $0.12)
|
|
175
|
+
- Example: Gemini offers `gemini-2.5-flash-image` (2K) and `gemini-2.5-flash-image-pro` (4K)
|
|
176
|
+
- Example: Replicate offers `flux-schnell` ($0.003), `flux-dev` ($0.025), `flux-pro` ($0.055)
|
|
177
|
+
- **New Type System**:
|
|
178
|
+
- `ProviderModel` interface: Represents individual models with id, name, cost, capabilities
|
|
179
|
+
- `IImageProvider.getModels()`: Returns array of available models
|
|
180
|
+
- `IImageProvider.getDefaultModel()`: Returns provider's default model
|
|
181
|
+
- `IImageProvider.estimateCost(spec, modelId?)`: Model-aware cost estimation
|
|
182
|
+
- **Backward Compatibility**:
|
|
183
|
+
- Optional `models` and `defaultModel` fields in `ProviderConfig`
|
|
184
|
+
- Legacy `model` field still supported for single-model providers
|
|
185
|
+
- Automatic fallback to legacy config if models array not provided
|
|
186
|
+
- **Files Modified**:
|
|
187
|
+
- `src/types/provider.ts` - Added ProviderModel interface, updated IImageProvider
|
|
188
|
+
- `src/providers/base-provider.ts` - Implemented getModels(), getDefaultModel(), updated estimateCost()
|
|
189
|
+
- `src/providers/index.ts` - Updated all provider initializations
|
|
190
|
+
- `config/pricing.json` - Restructured to support models array per provider (v2.1.0)
|
|
191
|
+
- **Benefits**:
|
|
192
|
+
- โ
Future-proof architecture for providers with multiple model offerings
|
|
193
|
+
- โ
Granular cost control per model
|
|
194
|
+
- โ
Better model selection and capabilities reporting
|
|
195
|
+
- โ
Full backward compatibility with existing code
|
|
196
|
+
|
|
197
|
+
#### Z.ai GLM-Image Provider (8th Provider)
|
|
198
|
+
- **New Provider**: Z.ai (Zhipu AI) with GLM-Image model
|
|
199
|
+
- **Model**: GLM-Image - 16B parameter hybrid autoregressive + diffusion model
|
|
200
|
+
- **Specialty**: Excellent for text-heavy diagrams, posters, and knowledge-dense images
|
|
201
|
+
- **Performance**: Beats Google Gemini on CVTG-2k text rendering benchmark (0.9116 vs 0.7788)
|
|
202
|
+
- **Pricing**: $0.015 per image (2nd cheapest paid option after Replicate $0.003)
|
|
203
|
+
- **Rate Limit**: 15 images/minute
|
|
204
|
+
- **Resolution**: Up to 2048x2048
|
|
205
|
+
- **Supported Formats**: PNG
|
|
206
|
+
- **Files Added**:
|
|
207
|
+
- `src/providers/zai/zai-provider.ts` - Complete Z.ai provider implementation
|
|
208
|
+
- `src/types/generation.ts` - Added 'zai' to ProviderType
|
|
209
|
+
- **Configuration**:
|
|
210
|
+
- Environment variable: `ZAI_API_KEY=zai-...`
|
|
211
|
+
- Runtime configuration via `configure_provider` MCP tool
|
|
212
|
+
- **Features**:
|
|
213
|
+
- Custom prompt adaptation for text-heavy content
|
|
214
|
+
- Emphasizes accurate text rendering and clear labels
|
|
215
|
+
- Professional diagram focus
|
|
216
|
+
- Multilingual support
|
|
217
|
+
- **Benefits**:
|
|
218
|
+
- โ
Best-in-class text rendering for technical diagrams
|
|
219
|
+
- โ
Cost-effective at $0.015/image
|
|
220
|
+
- โ
Perfect for documentation with text-heavy visualizations
|
|
221
|
+
|
|
222
|
+
#### Comprehensive Documentation
|
|
223
|
+
- **New Guide**: `docs/guides/comprehensive-guide.md` - Complete user and developer reference
|
|
224
|
+
- **Description**: Overview of Visual Forge MCP features and use cases
|
|
225
|
+
- **Installation**: Step-by-step setup from prerequisites to MCP client configuration
|
|
226
|
+
- **Architecture**: Detailed system components and data flow diagrams
|
|
227
|
+
- **Environment Variables**: Complete reference for all configuration options
|
|
228
|
+
- **Provider & Model System**: Guide to 8 providers, model selection, capabilities
|
|
229
|
+
- **Usage Workflows**: Basic to advanced workflows with code examples
|
|
230
|
+
- **MCP Tools Reference**: All 13+ MCP tools with parameters and response examples
|
|
231
|
+
- **Testing**: Test infrastructure, running tests, writing tests, manual testing
|
|
232
|
+
- **Troubleshooting**: Common issues and debug strategies
|
|
233
|
+
- **Advanced Topics**: Custom providers, pricing config, backups, optimization
|
|
234
|
+
- **Updated**:
|
|
235
|
+
- `README.md` - Added Z.ai provider, multi-model architecture feature, comprehensive guide link
|
|
236
|
+
- Version badge updated to 0.8.0
|
|
237
|
+
- Provider count updated to 8
|
|
238
|
+
- Added `ZAI_API_KEY` to environment variable examples
|
|
239
|
+
|
|
240
|
+
### Changed
|
|
241
|
+
|
|
242
|
+
#### Provider Priority Order
|
|
243
|
+
- **New Default Priority**: Updated to prioritize cost-effectiveness
|
|
244
|
+
1. `replicate` - Cheapest ($0.003)
|
|
245
|
+
2. `zai` - 2nd cheapest ($0.015) โจ NEW
|
|
246
|
+
3. `gemini` - Free tier
|
|
247
|
+
4. `huggingface` - Free tier
|
|
248
|
+
5. Others by cost
|
|
249
|
+
|
|
250
|
+
#### Pricing Configuration
|
|
251
|
+
- **Updated**: `config/pricing.json` version 2.1.0
|
|
252
|
+
- Added Z.ai provider with GLM-Image model
|
|
253
|
+
- Updated cost comparison to highlight Z.ai as 2nd cheapest
|
|
254
|
+
- Added recommended provider section (Z.ai for technical documentation)
|
|
255
|
+
- Complete model specifications for all providers
|
|
256
|
+
|
|
257
|
+
### Fixed
|
|
258
|
+
|
|
259
|
+
#### TypeScript Type Safety
|
|
260
|
+
- **Fixed**: Handling of optional `models` field in ProviderConfig
|
|
261
|
+
- BaseProvider.init() now safely accesses models array
|
|
262
|
+
- BaseProvider.getDefaultModel() handles undefined models array
|
|
263
|
+
- BaseProvider.estimateCost() checks models existence before access
|
|
264
|
+
- ZaiProvider.generateImage() properly extracts prompt text
|
|
265
|
+
- **Fixed**: Removed unused imports (ProviderModel, fs, path, fileURLToPath from ProviderFactory)
|
|
266
|
+
- **Resolved**: All TypeScript compilation errors for multi-model architecture
|
|
267
|
+
|
|
268
|
+
#### Backward Compatibility
|
|
269
|
+
- **Ensured**: Legacy single-model providers continue to work
|
|
270
|
+
- `config.model` field still supported
|
|
271
|
+
- Automatic conversion to ProviderModel format
|
|
272
|
+
- Fallback to legacy cost if models array missing
|
|
273
|
+
|
|
274
|
+
### Testing
|
|
275
|
+
|
|
276
|
+
#### Test Results
|
|
277
|
+
- **All Tests Passed**: 77 tests across 4 test suites
|
|
278
|
+
- ProviderFactory: 21 tests (provider initialization, selection, fallback, priority)
|
|
279
|
+
- All existing tests remain green with multi-model changes
|
|
280
|
+
- Backward compatibility verified
|
|
281
|
+
|
|
282
|
+
---
|
|
283
|
+
|
|
12
284
|
## [0.7.0] - 2026-01-16
|
|
13
285
|
|
|
14
286
|
### Added ๐จ **Professional Image Generation Pipeline**
|
package/README.md
CHANGED
|
@@ -7,7 +7,7 @@
|
|
|
7
7
|
An MCP (Model Context Protocol) server that automates AI image generation for technical documentation. Parse markdown files containing image prompts and generate professional images using multiple AI providers.
|
|
8
8
|
|
|
9
9
|
[](https://www.npmjs.com/package/@michelabboud/visual-forge-mcp)
|
|
10
|
-
[](https://github.com/michelabboud/visual-forge-mcp)
|
|
11
11
|
[](LICENSE)
|
|
12
12
|
[](https://nodejs.org/)
|
|
13
13
|
|
|
@@ -16,7 +16,7 @@ An MCP (Model Context Protocol) server that automates AI image generation for te
|
|
|
16
16
|
## ๐ Features
|
|
17
17
|
|
|
18
18
|
- **๐ Multi-Format Support**: Extract image specifications from Markdown (.md) and HTML (.html) files
|
|
19
|
-
- **๐ Multi-Provider Support**:
|
|
19
|
+
- **๐ Multi-Provider Support**: 8 AI providers with automatic fallback
|
|
20
20
|
- OpenAI GPT Image
|
|
21
21
|
- Google Gemini 2.5 Flash Image (Nano Banana)
|
|
22
22
|
- Stability AI SDXL
|
|
@@ -24,6 +24,14 @@ An MCP (Model Context Protocol) server that automates AI image generation for te
|
|
|
24
24
|
- Leonardo Phoenix
|
|
25
25
|
- HuggingFace Inference
|
|
26
26
|
- xAI Grok 2 Image
|
|
27
|
+
- Z.ai GLM-Image โจ NEW - Excellent for text-heavy diagrams
|
|
28
|
+
- **๐ฏ Multi-Model Architecture** โจ NEW v0.8.0: Each provider can offer multiple models with different capabilities and pricing (e.g., OpenAI GPT Image 1 vs GPT Image 1 HD)
|
|
29
|
+
- **๐งช Model Testing & Comparison** โจ NEW v0.9.0: Test and compare AI models before production use
|
|
30
|
+
- Standard automated tests with quality scoring (sharpness, brightness, text rendering, color accuracy)
|
|
31
|
+
- Custom prompt testing with real use cases
|
|
32
|
+
- Side-by-side multi-provider comparison with intelligent recommendations
|
|
33
|
+
- Permission flow for cost-aware testing
|
|
34
|
+
- Persistent test results for historical tracking
|
|
27
35
|
- **๐จ Detailed Global Context**: Comprehensive styling system with hex colors, typography, layout rules, and audience targeting for dramatically better, more consistent images
|
|
28
36
|
- **๐ผ๏ธ Multi-Format Optimization** โจ NEW v0.7.0: Automatic generation of WebP (94% smaller), JPEG (85% smaller), and optional lossy PNG (70% smaller) with professional watermarking
|
|
29
37
|
- **๐ Quality Validation & Auto-Regeneration** โจ NEW v0.7.0: OCR-based text detection, sharpness/brightness analysis, and automatic retry on quality failure
|
|
@@ -96,6 +104,9 @@ OPENAI_API_KEY=sk-...
|
|
|
96
104
|
# xAI Grok 2 Image ($0.07/image)
|
|
97
105
|
XAI_API_KEY=xai-...
|
|
98
106
|
|
|
107
|
+
# Z.ai GLM-Image ($0.015/image) - NEW: Excellent for text-heavy diagrams
|
|
108
|
+
ZAI_API_KEY=zai-...
|
|
109
|
+
|
|
99
110
|
# Leonardo Phoenix ($0.02/image)
|
|
100
111
|
LEONARDO_API_KEY=...
|
|
101
112
|
|
|
@@ -115,6 +126,7 @@ IMAGE_GEN_DEFAULT_PROVIDER=gemini
|
|
|
115
126
|
- **Stability AI**: [https://platform.stability.ai](https://platform.stability.ai)
|
|
116
127
|
- **OpenAI**: [https://platform.openai.com](https://platform.openai.com)
|
|
117
128
|
- **xAI**: [https://console.x.ai](https://console.x.ai)
|
|
129
|
+
- **Z.ai**: [https://z.ai](https://z.ai) โจ NEW - Best for text-heavy diagrams
|
|
118
130
|
- **Leonardo**: [https://leonardo.ai](https://leonardo.ai)
|
|
119
131
|
- **HuggingFace**: [https://huggingface.co](https://huggingface.co)
|
|
120
132
|
|
|
@@ -928,6 +940,187 @@ Remove API key for a provider.
|
|
|
928
940
|
|
|
929
941
|
**Example**: "Remove my OpenAI API key"
|
|
930
942
|
|
|
943
|
+
### Model Selection & Testing Tools โจ NEW v0.9.0
|
|
944
|
+
|
|
945
|
+
#### `set_default_model`
|
|
946
|
+
Set the default model for a provider. This model will be used for all future generations unless overridden.
|
|
947
|
+
|
|
948
|
+
```json
|
|
949
|
+
{
|
|
950
|
+
"provider": "zai",
|
|
951
|
+
"modelId": "glm-image"
|
|
952
|
+
}
|
|
953
|
+
```
|
|
954
|
+
|
|
955
|
+
**Example**: "Set Z.ai to use the GLM-Image model"
|
|
956
|
+
|
|
957
|
+
**Returns**:
|
|
958
|
+
```json
|
|
959
|
+
{
|
|
960
|
+
"success": true,
|
|
961
|
+
"provider": "zai",
|
|
962
|
+
"modelId": "glm-image",
|
|
963
|
+
"modelName": "GLM-Image",
|
|
964
|
+
"message": "Default model set to 'GLM-Image' for Z.ai GLM-Image..."
|
|
965
|
+
}
|
|
966
|
+
```
|
|
967
|
+
|
|
968
|
+
#### `get_model_info`
|
|
969
|
+
Get detailed information about a specific model, including test results if available.
|
|
970
|
+
|
|
971
|
+
```json
|
|
972
|
+
{
|
|
973
|
+
"provider": "gemini",
|
|
974
|
+
"modelId": "gemini-2.5-flash-image"
|
|
975
|
+
}
|
|
976
|
+
```
|
|
977
|
+
|
|
978
|
+
**Example**: "What are the details for Gemini Flash Image?"
|
|
979
|
+
|
|
980
|
+
**Returns**:
|
|
981
|
+
```json
|
|
982
|
+
{
|
|
983
|
+
"success": true,
|
|
984
|
+
"provider": "gemini",
|
|
985
|
+
"providerName": "Google Gemini 2.5 Flash Image",
|
|
986
|
+
"model": {
|
|
987
|
+
"id": "gemini-2.5-flash-image",
|
|
988
|
+
"name": "Gemini 2.5 Flash Image",
|
|
989
|
+
"costPerImage": 0.0,
|
|
990
|
+
"description": "Fast, free-tier image generation",
|
|
991
|
+
"capabilities": {
|
|
992
|
+
"maxResolution": "2048x2048",
|
|
993
|
+
"supportedAspectRatios": ["1:1", "16:9", "4:3", "9:16"]
|
|
994
|
+
}
|
|
995
|
+
},
|
|
996
|
+
"testResult": {
|
|
997
|
+
"testedAt": "2026-01-16T10:30:00.000Z",
|
|
998
|
+
"qualityScore": 85.5,
|
|
999
|
+
"passed": true
|
|
1000
|
+
},
|
|
1001
|
+
"message": "Model tested on 1/16/2026 with quality score 85.5/100"
|
|
1002
|
+
}
|
|
1003
|
+
```
|
|
1004
|
+
|
|
1005
|
+
#### `test_model`
|
|
1006
|
+
Test a model with either standard test (automated) or custom prompt (user-provided). Records quality score for future reference.
|
|
1007
|
+
|
|
1008
|
+
**Standard Test (Automated Validation)**:
|
|
1009
|
+
```json
|
|
1010
|
+
{
|
|
1011
|
+
"provider": "zai",
|
|
1012
|
+
"modelId": "glm-image",
|
|
1013
|
+
"useStandardTest": true,
|
|
1014
|
+
"aspectRatio": "16:9"
|
|
1015
|
+
}
|
|
1016
|
+
```
|
|
1017
|
+
|
|
1018
|
+
**Custom Prompt Test (Real Use Case)**:
|
|
1019
|
+
```json
|
|
1020
|
+
{
|
|
1021
|
+
"provider": "gemini",
|
|
1022
|
+
"modelId": "gemini-2.5-flash-image",
|
|
1023
|
+
"prompt": "AWS VPC architecture diagram showing public/private subnets, NAT gateway, and EC2 instances",
|
|
1024
|
+
"aspectRatio": "16:9"
|
|
1025
|
+
}
|
|
1026
|
+
```
|
|
1027
|
+
|
|
1028
|
+
**Example**: "Test the Z.ai GLM-Image model with a standard quality test"
|
|
1029
|
+
|
|
1030
|
+
**Returns**:
|
|
1031
|
+
```json
|
|
1032
|
+
{
|
|
1033
|
+
"success": true,
|
|
1034
|
+
"provider": "zai",
|
|
1035
|
+
"providerName": "Z.ai GLM-Image",
|
|
1036
|
+
"model": "GLM-Image",
|
|
1037
|
+
"testImage": {
|
|
1038
|
+
"filepath": "generated-images/tests/zai-glm-image-test.png",
|
|
1039
|
+
"generationTime": 12000,
|
|
1040
|
+
"actualCost": 0.015
|
|
1041
|
+
},
|
|
1042
|
+
"qualityScore": {
|
|
1043
|
+
"overall": 87.5,
|
|
1044
|
+
"sharpness": 89.2,
|
|
1045
|
+
"brightness": 145,
|
|
1046
|
+
"textRendering": 85.0,
|
|
1047
|
+
"colorAccuracy": 90.0,
|
|
1048
|
+
"passed": true
|
|
1049
|
+
},
|
|
1050
|
+
"message": "Model test passed! Quality score: 87.5/100. Model is ready for production use."
|
|
1051
|
+
}
|
|
1052
|
+
```
|
|
1053
|
+
|
|
1054
|
+
**Quality Metrics**:
|
|
1055
|
+
- **Sharpness (30%)**: Laplacian variance analysis for edge detection
|
|
1056
|
+
- **Brightness (20%)**: Average brightness in optimal range (30-240)
|
|
1057
|
+
- **Text Rendering (40%)**: Estimated OCR accuracy and text clarity
|
|
1058
|
+
- **Color Accuracy (10%)**: Heuristic-based color validation
|
|
1059
|
+
- **Overall Score**: Weighted average, pass threshold 60/100
|
|
1060
|
+
|
|
1061
|
+
#### `compare_models`
|
|
1062
|
+
Compare multiple providers/models side-by-side with the same prompt. Generates quality scores and recommendation.
|
|
1063
|
+
|
|
1064
|
+
```json
|
|
1065
|
+
{
|
|
1066
|
+
"prompt": "Technical diagram showing microservices architecture with API gateway, service mesh, and databases",
|
|
1067
|
+
"providers": [
|
|
1068
|
+
{ "provider": "gemini", "model": "gemini-2.5-flash-image" },
|
|
1069
|
+
{ "provider": "zai", "model": "glm-image" },
|
|
1070
|
+
{ "provider": "huggingface", "model": "black-forest-labs/FLUX.1-dev" }
|
|
1071
|
+
],
|
|
1072
|
+
"aspectRatio": "16:9"
|
|
1073
|
+
}
|
|
1074
|
+
```
|
|
1075
|
+
|
|
1076
|
+
**Example**: "Compare Gemini, Z.ai, and HuggingFace FLUX models for generating a microservices architecture diagram"
|
|
1077
|
+
|
|
1078
|
+
**Returns**:
|
|
1079
|
+
```json
|
|
1080
|
+
{
|
|
1081
|
+
"success": true,
|
|
1082
|
+
"prompt": "Technical diagram showing microservices architecture...",
|
|
1083
|
+
"totalCost": 0.015,
|
|
1084
|
+
"totalTime": 35000,
|
|
1085
|
+
"results": [
|
|
1086
|
+
{
|
|
1087
|
+
"provider": "zai",
|
|
1088
|
+
"model": "GLM-Image",
|
|
1089
|
+
"qualityScore": { "overall": 92.1, "textRendering": 95.8 },
|
|
1090
|
+
"cost": 0.015,
|
|
1091
|
+
"rank": 1
|
|
1092
|
+
},
|
|
1093
|
+
{
|
|
1094
|
+
"provider": "gemini",
|
|
1095
|
+
"model": "Gemini Flash Image",
|
|
1096
|
+
"qualityScore": { "overall": 85.5 },
|
|
1097
|
+
"cost": 0.0,
|
|
1098
|
+
"rank": 2
|
|
1099
|
+
},
|
|
1100
|
+
{
|
|
1101
|
+
"provider": "huggingface",
|
|
1102
|
+
"model": "FLUX.1-dev",
|
|
1103
|
+
"qualityScore": { "overall": 78.3 },
|
|
1104
|
+
"cost": 0.0,
|
|
1105
|
+
"rank": 3
|
|
1106
|
+
}
|
|
1107
|
+
],
|
|
1108
|
+
"recommendation": {
|
|
1109
|
+
"provider": "zai",
|
|
1110
|
+
"model": "glm-image",
|
|
1111
|
+
"reason": "Highest overall quality (92.1/100), especially excellent text rendering (95.8/100). Worth the $0.015 cost for technical diagrams.",
|
|
1112
|
+
"alternatives": [
|
|
1113
|
+
{
|
|
1114
|
+
"provider": "gemini",
|
|
1115
|
+
"model": "gemini-2.5-flash-image",
|
|
1116
|
+
"reason": "Free alternative with good quality (85.5/100)"
|
|
1117
|
+
}
|
|
1118
|
+
]
|
|
1119
|
+
},
|
|
1120
|
+
"message": "Comparison complete! zai glm-image scored highest (rank 1)..."
|
|
1121
|
+
}
|
|
1122
|
+
```
|
|
1123
|
+
|
|
931
1124
|
### Image Generation Tools
|
|
932
1125
|
|
|
933
1126
|
#### `parse_markdown`
|
|
@@ -1327,6 +1520,7 @@ For commercial licensing inquiries, please contact the author.
|
|
|
1327
1520
|
## ๐ Resources
|
|
1328
1521
|
|
|
1329
1522
|
### Documentation
|
|
1523
|
+
- **๐ Comprehensive Guide** โจ NEW: Complete guide covering installation, architecture, environment variables, usage workflows, and testing - [docs/guides/comprehensive-guide.md](docs/guides/comprehensive-guide.md)
|
|
1330
1524
|
- **Visual MCP Server Specification**: [docs/VISUAL_MCP_SERVER.md](docs/VISUAL_MCP_SERVER.md)
|
|
1331
1525
|
- **Implementation Plan**: [docs/IMPLEMENTATION-PLAN.md](docs/IMPLEMENTATION-PLAN.md)
|
|
1332
1526
|
- **Implementation Report**: [docs/IMPLEMENTATION_REPORT.md](docs/IMPLEMENTATION_REPORT.md)
|
package/config/pricing.json
CHANGED
|
@@ -1,8 +1,8 @@
|
|
|
1
1
|
{
|
|
2
2
|
"$schema": "./pricing-schema.json",
|
|
3
|
-
"version": "2.
|
|
4
|
-
"lastUpdated": "2026-01-
|
|
5
|
-
"lastChecked": "2026-01-
|
|
3
|
+
"version": "2.1.0",
|
|
4
|
+
"lastUpdated": "2026-01-16",
|
|
5
|
+
"lastChecked": "2026-01-16T06:15:00.000Z",
|
|
6
6
|
"providers": {
|
|
7
7
|
"openai": {
|
|
8
8
|
"name": "OpenAI",
|
|
@@ -184,6 +184,24 @@
|
|
|
184
184
|
"notes": "Preview model, pricing may change"
|
|
185
185
|
}
|
|
186
186
|
}
|
|
187
|
+
},
|
|
188
|
+
"zai": {
|
|
189
|
+
"name": "Z.ai (Zhipu AI)",
|
|
190
|
+
"pricingUrl": "https://docs.z.ai/guides/overview/pricing",
|
|
191
|
+
"defaultModel": "glm-image",
|
|
192
|
+
"models": {
|
|
193
|
+
"glm-image": {
|
|
194
|
+
"name": "GLM-Image",
|
|
195
|
+
"costPerImage": 0.015,
|
|
196
|
+
"rateLimit": 15,
|
|
197
|
+
"timeout": 90000,
|
|
198
|
+
"maxDimensions": {
|
|
199
|
+
"width": 2048,
|
|
200
|
+
"height": 2048
|
|
201
|
+
},
|
|
202
|
+
"notes": "16B hybrid autoregressive + diffusion model. Excellent for text-heavy diagrams, posters, and knowledge-dense images. Beats Gemini on text rendering benchmarks."
|
|
203
|
+
}
|
|
204
|
+
}
|
|
187
205
|
}
|
|
188
206
|
},
|
|
189
207
|
"costComparison": {
|
|
@@ -192,10 +210,20 @@
|
|
|
192
210
|
"model": "black-forest-labs/flux-schnell",
|
|
193
211
|
"cost": 0.003
|
|
194
212
|
},
|
|
213
|
+
"secondCheapest": {
|
|
214
|
+
"provider": "zai",
|
|
215
|
+
"model": "glm-image",
|
|
216
|
+
"cost": 0.015
|
|
217
|
+
},
|
|
195
218
|
"recommended": {
|
|
219
|
+
"provider": "zai",
|
|
220
|
+
"model": "glm-image",
|
|
221
|
+
"reason": "Best for text-heavy diagrams and technical documentation. Excellent text rendering, 2nd cheapest paid option."
|
|
222
|
+
},
|
|
223
|
+
"recommendedAlternative": {
|
|
196
224
|
"provider": "replicate",
|
|
197
225
|
"model": "black-forest-labs/flux-schnell",
|
|
198
|
-
"reason": "
|
|
226
|
+
"reason": "Cheapest option, excellent quality/cost ratio for general images"
|
|
199
227
|
},
|
|
200
228
|
"fastest": [
|
|
201
229
|
{
|
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
/**
|
|
2
2
|
* Base provider class - all providers extend this
|
|
3
3
|
*/
|
|
4
|
-
import { IImageProvider, ProviderType, ProviderConfig, ProviderCapabilities, GenerationOptions, ImageSpec, GlobalContext, GeneratedImage } from '../types/index.js';
|
|
4
|
+
import { IImageProvider, ProviderType, ProviderConfig, ProviderCapabilities, ProviderModel, GenerationOptions, ImageSpec, GlobalContext, GeneratedImage } from '../types/index.js';
|
|
5
5
|
import { HttpClient } from '../utils/index.js';
|
|
6
6
|
export declare abstract class BaseProvider implements IImageProvider {
|
|
7
7
|
abstract readonly name: ProviderType;
|
|
@@ -13,6 +13,14 @@ export declare abstract class BaseProvider implements IImageProvider {
|
|
|
13
13
|
* Initialize provider (call after construction)
|
|
14
14
|
*/
|
|
15
15
|
protected init(): void;
|
|
16
|
+
/**
|
|
17
|
+
* Get available models for this provider
|
|
18
|
+
*/
|
|
19
|
+
getModels(): ProviderModel[];
|
|
20
|
+
/**
|
|
21
|
+
* Get default model for this provider
|
|
22
|
+
*/
|
|
23
|
+
getDefaultModel(): ProviderModel;
|
|
16
24
|
/**
|
|
17
25
|
* Create output path for generated image with new index-based directory structure
|
|
18
26
|
* Structure: generated-images/{index}-{filename}/{provider}/{image-id}.png
|
|
@@ -29,7 +37,7 @@ export declare abstract class BaseProvider implements IImageProvider {
|
|
|
29
37
|
* Save image metadata and generation log
|
|
30
38
|
*/
|
|
31
39
|
private saveImageMetadata;
|
|
32
|
-
estimateCost(_spec: ImageSpec): number;
|
|
40
|
+
estimateCost(_spec: ImageSpec, modelId?: string): number;
|
|
33
41
|
/**
|
|
34
42
|
* Provider-specific generation logic - must be implemented by subclasses
|
|
35
43
|
*/
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"base-provider.d.ts","sourceRoot":"","sources":["../../src/providers/base-provider.ts"],"names":[],"mappings":"AAAA;;GAEG;AAEH,OAAO,EACL,cAAc,EACd,YAAY,EACZ,cAAc,EACd,oBAAoB,EACpB,iBAAiB,EACjB,SAAS,EACT,aAAa,EACb,cAAc,EACf,MAAM,mBAAmB,CAAC;AAC3B,OAAO,EACL,UAAU,EAWX,MAAM,mBAAmB,CAAC;AAM3B,8BAAsB,YAAa,YAAW,cAAc;IAC1D,QAAQ,CAAC,QAAQ,CAAC,IAAI,EAAE,YAAY,CAAC;IACrC,QAAQ,CAAC,QAAQ,CAAC,WAAW,EAAE,MAAM,CAAC;IAEtC,SAAS,CAAC,MAAM,EAAE,UAAU,CAAC;IAC7B,SAAS,CAAC,MAAM,EAAE,cAAc,CAAC;gBAErB,MAAM,EAAE,cAAc;IAWlC;;OAEG;IACH,SAAS,CAAC,IAAI,IAAI,IAAI;
|
|
1
|
+
{"version":3,"file":"base-provider.d.ts","sourceRoot":"","sources":["../../src/providers/base-provider.ts"],"names":[],"mappings":"AAAA;;GAEG;AAEH,OAAO,EACL,cAAc,EACd,YAAY,EACZ,cAAc,EACd,oBAAoB,EACpB,aAAa,EACb,iBAAiB,EACjB,SAAS,EACT,aAAa,EACb,cAAc,EACf,MAAM,mBAAmB,CAAC;AAC3B,OAAO,EACL,UAAU,EAWX,MAAM,mBAAmB,CAAC;AAM3B,8BAAsB,YAAa,YAAW,cAAc;IAC1D,QAAQ,CAAC,QAAQ,CAAC,IAAI,EAAE,YAAY,CAAC;IACrC,QAAQ,CAAC,QAAQ,CAAC,WAAW,EAAE,MAAM,CAAC;IAEtC,SAAS,CAAC,MAAM,EAAE,UAAU,CAAC;IAC7B,SAAS,CAAC,MAAM,EAAE,cAAc,CAAC;gBAErB,MAAM,EAAE,cAAc;IAWlC;;OAEG;IACH,SAAS,CAAC,IAAI,IAAI,IAAI;IAWtB;;OAEG;IACH,SAAS,IAAI,aAAa,EAAE;IAI5B;;OAEG;IACH,eAAe,IAAI,aAAa;IAgChC;;;;;;;OAOG;cACa,gBAAgB,CAAC,IAAI,EAAE,SAAS,EAAE,SAAS,GAAE,MAAc,GAAG,OAAO,CAAC,MAAM,CAAC;IAmC7F,IAAI,WAAW,IAAI,OAAO,CAEzB;IAED,YAAY,IAAI,OAAO;IASjB,QAAQ,CAAC,IAAI,EAAE,SAAS,EAAE,OAAO,CAAC,EAAE,iBAAiB,GAAG,OAAO,CAAC,cAAc,CAAC;IA0MrF;;OAEG;YACW,iBAAiB;IAkH/B,YAAY,CAAC,KAAK,EAAE,SAAS,EAAE,OAAO,CAAC,EAAE,MAAM,GAAG,MAAM;IAkBxD;;OAEG;IACH,SAAS,CAAC,QAAQ,CAAC,aAAa,CAC9B,IAAI,EAAE,SAAS,EACf,OAAO,CAAC,EAAE,iBAAiB,GAC1B,OAAO,CAAC,cAAc,CAAC;IAE1B;;;OAGG;IACH,QAAQ,CAAC,WAAW,CAAC,MAAM,EAAE,MAAM,EAAE,OAAO,CAAC,EAAE,aAAa,GAAG,MAAM,GAAG,GAAG;IAE3E;;OAEG;IACH,QAAQ,CAAC,eAAe,IAAI,oBAAoB;IAEhD;;;OAGG;IACH,QAAQ,CAAC,cAAc,IAAI,OAAO,CAAC;QACjC,OAAO,EAAE,OAAO,CAAC;QACjB,OAAO,EAAE,MAAM,CAAC;QAChB,OAAO,CAAC,EAAE,MAAM,CAAC;KAClB,CAAC;IAEF;;OAEG;IACH,SAAS,CAAC,cAAc,IAAI,IAAI;IAMhC;;OAEG;IACH,SAAS,CAAC,uBAAuB,CAAC,WAAW,EAAE,MAAM,GAAG;QAAE,KAAK,EAAE,MAAM,CAAC;QAAC,MAAM,EAAE,MAAM,CAAA;KAAE;CAY1F"}
|