@michelabboud/visual-forge-mcp 0.7.0 โ†’ 0.9.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (41) hide show
  1. package/CHANGELOG.md +272 -0
  2. package/README.md +196 -2
  3. package/config/pricing.json +32 -4
  4. package/dist/providers/base-provider.d.ts +10 -2
  5. package/dist/providers/base-provider.d.ts.map +1 -1
  6. package/dist/providers/base-provider.js +53 -3
  7. package/dist/providers/base-provider.js.map +1 -1
  8. package/dist/providers/index.d.ts +2 -0
  9. package/dist/providers/index.d.ts.map +1 -1
  10. package/dist/providers/index.js +40 -2
  11. package/dist/providers/index.js.map +1 -1
  12. package/dist/providers/zai/zai-provider.d.ts +22 -0
  13. package/dist/providers/zai/zai-provider.d.ts.map +1 -0
  14. package/dist/providers/zai/zai-provider.js +154 -0
  15. package/dist/providers/zai/zai-provider.js.map +1 -0
  16. package/dist/quality/index.d.ts +1 -0
  17. package/dist/quality/index.d.ts.map +1 -1
  18. package/dist/quality/index.js +1 -0
  19. package/dist/quality/index.js.map +1 -1
  20. package/dist/quality/model-tester.d.ts +87 -0
  21. package/dist/quality/model-tester.d.ts.map +1 -0
  22. package/dist/quality/model-tester.js +357 -0
  23. package/dist/quality/model-tester.js.map +1 -0
  24. package/dist/server/mcp-server.d.ts +5 -0
  25. package/dist/server/mcp-server.d.ts.map +1 -1
  26. package/dist/server/mcp-server.js +371 -5
  27. package/dist/server/mcp-server.js.map +1 -1
  28. package/dist/types/generation.d.ts +1 -1
  29. package/dist/types/generation.d.ts.map +1 -1
  30. package/dist/types/provider.d.ts +28 -1
  31. package/dist/types/provider.d.ts.map +1 -1
  32. package/dist/utils/index.d.ts +1 -0
  33. package/dist/utils/index.d.ts.map +1 -1
  34. package/dist/utils/index.js +1 -0
  35. package/dist/utils/index.js.map +1 -1
  36. package/dist/utils/user-config-manager.d.ts +68 -0
  37. package/dist/utils/user-config-manager.d.ts.map +1 -0
  38. package/dist/utils/user-config-manager.js +131 -0
  39. package/dist/utils/user-config-manager.js.map +1 -0
  40. package/docs/guides/comprehensive-guide.md +1552 -0
  41. package/package.json +2 -2
package/CHANGELOG.md CHANGED
@@ -9,6 +9,278 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
9
9
 
10
10
  ---
11
11
 
12
+ ## [0.9.0] - 2026-01-16
13
+
14
+ ### Added ๐Ÿงช **Model Testing & Comparison System**
15
+
16
+ #### Model Selection Workflow (Phase 1)
17
+ - **Enhanced Provider Configuration**: `configure_provider` now returns available models
18
+ - Shows list of all models offered by the provider
19
+ - Displays model details: name, cost, description, capabilities
20
+ - Indicates which model is currently set as default
21
+ - Guides user to use `set_default_model` for customization
22
+ - **New MCP Tool: `set_default_model`**
23
+ - Choose preferred model for a provider
24
+ - Validates model exists before saving
25
+ - Persists choice in `~/.visual-forge-mcp/user-config.json`
26
+ - Applies to all future generations unless overridden
27
+ - **New MCP Tool: `get_model_info`**
28
+ - Retrieve detailed information about a specific model
29
+ - Shows capabilities, pricing, and description
30
+ - Displays test results if model was previously tested
31
+ - Helps users make informed model selection decisions
32
+ - **User Configuration Manager**: `src/utils/user-config-manager.ts`
33
+ - Persistent storage for user preferences
34
+ - Default model per provider
35
+ - Model test results with quality scores
36
+ - Atomic file writes for crash safety
37
+
38
+ #### Model Testing (Phase 2)
39
+ - **ModelTester Utility**: `src/quality/model-tester.ts`
40
+ - Comprehensive testing framework for AI models
41
+ - Standard automated tests and custom prompt tests
42
+ - Multi-provider comparison with side-by-side results
43
+ - Quality scoring algorithm with weighted metrics
44
+
45
+ - **Quality Scoring Algorithm**:
46
+ - **Sharpness (30%)**: Laplacian variance analysis for edge detection
47
+ - **Brightness (20%)**: Average brightness in optimal range (30-240)
48
+ - **Text Rendering (40%)**: Estimated OCR accuracy and text clarity
49
+ - **Color Accuracy (10%)**: Heuristic-based color validation
50
+ - **Overall Score**: Weighted average, pass threshold 60/100
51
+ - **Auto-Recording**: Test results saved for future reference
52
+
53
+ - **Standard Test Prompt**:
54
+ - Professional quality validation image
55
+ - Multiple text elements (title, subtitle, detailed text)
56
+ - Geometric shapes (red circle, blue square, green triangle)
57
+ - Color gradient background (#1a365d to #0891b2)
58
+ - Technical diagram (simple flowchart)
59
+ - Designed to test sharpness, color accuracy, and text rendering
60
+
61
+ - **New MCP Tool: `test_model`**
62
+ - Two modes:
63
+ - **Standard Test**: Automated quality validation with predefined elements
64
+ - **Custom Prompt**: Test with user's actual use case
65
+ - Cost permission flow (requires confirmation for paid models)
66
+ - Automatic quality scoring and pass/fail determination
67
+ - Test images saved to `generated-images/tests/`
68
+ - Results include quality metrics, generation time, and cost
69
+
70
+ - **New MCP Tool: `compare_models`**
71
+ - Side-by-side comparison of multiple providers/models
72
+ - Same prompt tested across all selected models
73
+ - Automatic ranking by quality score
74
+ - Intelligent recommendation with reasoning
75
+ - Alternative suggestions for budget-conscious users
76
+ - Total cost and time tracking
77
+ - Detailed results per model with success/failure handling
78
+
79
+ - **Permission Flow**:
80
+ - First call shows cost estimate and requires confirmation
81
+ - User confirms by calling again with `skipPermission: true`
82
+ - Prevents accidental spending on paid models
83
+ - Free models (Gemini, HuggingFace) can skip permission
84
+
85
+ #### Files Added
86
+ - `src/quality/model-tester.ts` - Model testing and comparison utility
87
+ - `docs/design/model-selection-workflow.md` - Complete workflow specification
88
+ - `docs/design/IMPLEMENTATION_STATUS.md` - Implementation tracking
89
+
90
+ #### Files Modified
91
+ - `src/server/mcp-server.ts`:
92
+ - Added `userConfigManager` import
93
+ - Added `ModelTester` instance
94
+ - Updated `configure_provider` handler to return models
95
+ - Added `handleSetDefaultModel()` handler
96
+ - Added `handleGetModelInfo()` handler
97
+ - Added `handleTestModel()` handler
98
+ - Added `handleCompareModels()` handler
99
+ - Added routing for 5 new tools
100
+ - Added 'zai' to all provider enums
101
+ - `src/quality/index.ts` - Exported ModelTester
102
+ - `src/utils/index.ts` - Exported UserConfigManager (from 0.8.0)
103
+
104
+ ### Benefits
105
+
106
+ #### For Users
107
+ - โœ… **Informed Decisions**: Test before committing to a model
108
+ - โœ… **Quality Assurance**: Validate model performance on your use case
109
+ - โœ… **Cost Awareness**: See estimated costs before generation
110
+ - โœ… **Traceability**: Historical test results for comparison
111
+ - โœ… **Flexibility**: Different models for different use cases
112
+ - โœ… **Side-by-Side Comparison**: Objective ranking with recommendations
113
+
114
+ #### For Workflows
115
+ - โœ… **Standard Tests**: Quick automated validation of model quality
116
+ - โœ… **Custom Tests**: Real-world testing with actual prompts
117
+ - โœ… **Multi-Provider**: Compare 2-8 providers in single call
118
+ - โœ… **Automatic Ranking**: Data-driven recommendations
119
+ - โœ… **Error Handling**: Graceful failures don't block comparisons
120
+
121
+ ### Example Usage
122
+
123
+ ```typescript
124
+ // 1. Configure provider and see models
125
+ configure_provider({ provider: "zai", apiKey: "zai-..." })
126
+ // Returns: List of available models with costs
127
+
128
+ // 2. Set preferred model
129
+ set_default_model({ provider: "zai", modelId: "glm-image" })
130
+
131
+ // 3. Test with standard prompt (automated)
132
+ test_model({
133
+ provider: "zai",
134
+ modelId: "glm-image",
135
+ useStandardTest: true
136
+ })
137
+
138
+ // 4. Test with custom prompt (real use case)
139
+ test_model({
140
+ provider: "gemini",
141
+ modelId: "gemini-2.5-flash-image",
142
+ prompt: "AWS VPC architecture diagram with public/private subnets"
143
+ })
144
+
145
+ // 5. Compare multiple models
146
+ compare_models({
147
+ prompt: "Technical diagram showing microservices architecture",
148
+ providers: [
149
+ { provider: "gemini", model: "gemini-2.5-flash-image" },
150
+ { provider: "zai", model: "glm-image" },
151
+ { provider: "huggingface", model: "black-forest-labs/FLUX.1-dev" }
152
+ ]
153
+ })
154
+ // Returns: Ranked results with recommendation
155
+ ```
156
+
157
+ ### Technical Details
158
+
159
+ - **Test Image Storage**: `generated-images/tests/`
160
+ - **Config Storage**: `~/.visual-forge-mcp/user-config.json`
161
+ - **Quality Metrics**: Sharpness, brightness, text rendering, color accuracy
162
+ - **Pass Threshold**: 60/100 overall score
163
+ - **Concurrent Testing**: Up to 3 parallel model tests in comparison mode
164
+
165
+ ---
166
+
167
+ ## [0.8.0] - 2026-01-16
168
+
169
+ ### Added ๐ŸŽฏ **Multi-Model Architecture & Z.ai Provider**
170
+
171
+ #### Multi-Model Architecture
172
+ - **Provider-Model Separation**: Distinguish between providers (companies) and models (specific AI implementations)
173
+ - One provider can offer multiple models with different capabilities and pricing
174
+ - Example: OpenAI offers `gpt-image-1` (standard, $0.04) and `gpt-image-1-hd` (HD, $0.12)
175
+ - Example: Gemini offers `gemini-2.5-flash-image` (2K) and `gemini-2.5-flash-image-pro` (4K)
176
+ - Example: Replicate offers `flux-schnell` ($0.003), `flux-dev` ($0.025), `flux-pro` ($0.055)
177
+ - **New Type System**:
178
+ - `ProviderModel` interface: Represents individual models with id, name, cost, capabilities
179
+ - `IImageProvider.getModels()`: Returns array of available models
180
+ - `IImageProvider.getDefaultModel()`: Returns provider's default model
181
+ - `IImageProvider.estimateCost(spec, modelId?)`: Model-aware cost estimation
182
+ - **Backward Compatibility**:
183
+ - Optional `models` and `defaultModel` fields in `ProviderConfig`
184
+ - Legacy `model` field still supported for single-model providers
185
+ - Automatic fallback to legacy config if models array not provided
186
+ - **Files Modified**:
187
+ - `src/types/provider.ts` - Added ProviderModel interface, updated IImageProvider
188
+ - `src/providers/base-provider.ts` - Implemented getModels(), getDefaultModel(), updated estimateCost()
189
+ - `src/providers/index.ts` - Updated all provider initializations
190
+ - `config/pricing.json` - Restructured to support models array per provider (v2.1.0)
191
+ - **Benefits**:
192
+ - โœ… Future-proof architecture for providers with multiple model offerings
193
+ - โœ… Granular cost control per model
194
+ - โœ… Better model selection and capabilities reporting
195
+ - โœ… Full backward compatibility with existing code
196
+
197
+ #### Z.ai GLM-Image Provider (8th Provider)
198
+ - **New Provider**: Z.ai (Zhipu AI) with GLM-Image model
199
+ - **Model**: GLM-Image - 16B parameter hybrid autoregressive + diffusion model
200
+ - **Specialty**: Excellent for text-heavy diagrams, posters, and knowledge-dense images
201
+ - **Performance**: Beats Google Gemini on CVTG-2k text rendering benchmark (0.9116 vs 0.7788)
202
+ - **Pricing**: $0.015 per image (2nd cheapest paid option after Replicate $0.003)
203
+ - **Rate Limit**: 15 images/minute
204
+ - **Resolution**: Up to 2048x2048
205
+ - **Supported Formats**: PNG
206
+ - **Files Added**:
207
+ - `src/providers/zai/zai-provider.ts` - Complete Z.ai provider implementation
208
+ - `src/types/generation.ts` - Added 'zai' to ProviderType
209
+ - **Configuration**:
210
+ - Environment variable: `ZAI_API_KEY=zai-...`
211
+ - Runtime configuration via `configure_provider` MCP tool
212
+ - **Features**:
213
+ - Custom prompt adaptation for text-heavy content
214
+ - Emphasizes accurate text rendering and clear labels
215
+ - Professional diagram focus
216
+ - Multilingual support
217
+ - **Benefits**:
218
+ - โœ… Best-in-class text rendering for technical diagrams
219
+ - โœ… Cost-effective at $0.015/image
220
+ - โœ… Perfect for documentation with text-heavy visualizations
221
+
222
+ #### Comprehensive Documentation
223
+ - **New Guide**: `docs/guides/comprehensive-guide.md` - Complete user and developer reference
224
+ - **Description**: Overview of Visual Forge MCP features and use cases
225
+ - **Installation**: Step-by-step setup from prerequisites to MCP client configuration
226
+ - **Architecture**: Detailed system components and data flow diagrams
227
+ - **Environment Variables**: Complete reference for all configuration options
228
+ - **Provider & Model System**: Guide to 8 providers, model selection, capabilities
229
+ - **Usage Workflows**: Basic to advanced workflows with code examples
230
+ - **MCP Tools Reference**: All 13+ MCP tools with parameters and response examples
231
+ - **Testing**: Test infrastructure, running tests, writing tests, manual testing
232
+ - **Troubleshooting**: Common issues and debug strategies
233
+ - **Advanced Topics**: Custom providers, pricing config, backups, optimization
234
+ - **Updated**:
235
+ - `README.md` - Added Z.ai provider, multi-model architecture feature, comprehensive guide link
236
+ - Version badge updated to 0.8.0
237
+ - Provider count updated to 8
238
+ - Added `ZAI_API_KEY` to environment variable examples
239
+
240
+ ### Changed
241
+
242
+ #### Provider Priority Order
243
+ - **New Default Priority**: Updated to prioritize cost-effectiveness
244
+ 1. `replicate` - Cheapest ($0.003)
245
+ 2. `zai` - 2nd cheapest ($0.015) โœจ NEW
246
+ 3. `gemini` - Free tier
247
+ 4. `huggingface` - Free tier
248
+ 5. Others by cost
249
+
250
+ #### Pricing Configuration
251
+ - **Updated**: `config/pricing.json` version 2.1.0
252
+ - Added Z.ai provider with GLM-Image model
253
+ - Updated cost comparison to highlight Z.ai as 2nd cheapest
254
+ - Added recommended provider section (Z.ai for technical documentation)
255
+ - Complete model specifications for all providers
256
+
257
+ ### Fixed
258
+
259
+ #### TypeScript Type Safety
260
+ - **Fixed**: Handling of optional `models` field in ProviderConfig
261
+ - BaseProvider.init() now safely accesses models array
262
+ - BaseProvider.getDefaultModel() handles undefined models array
263
+ - BaseProvider.estimateCost() checks models existence before access
264
+ - ZaiProvider.generateImage() properly extracts prompt text
265
+ - **Fixed**: Removed unused imports (ProviderModel, fs, path, fileURLToPath from ProviderFactory)
266
+ - **Resolved**: All TypeScript compilation errors for multi-model architecture
267
+
268
+ #### Backward Compatibility
269
+ - **Ensured**: Legacy single-model providers continue to work
270
+ - `config.model` field still supported
271
+ - Automatic conversion to ProviderModel format
272
+ - Fallback to legacy cost if models array missing
273
+
274
+ ### Testing
275
+
276
+ #### Test Results
277
+ - **All Tests Passed**: 77 tests across 4 test suites
278
+ - ProviderFactory: 21 tests (provider initialization, selection, fallback, priority)
279
+ - All existing tests remain green with multi-model changes
280
+ - Backward compatibility verified
281
+
282
+ ---
283
+
12
284
  ## [0.7.0] - 2026-01-16
13
285
 
14
286
  ### Added ๐ŸŽจ **Professional Image Generation Pipeline**
package/README.md CHANGED
@@ -7,7 +7,7 @@
7
7
  An MCP (Model Context Protocol) server that automates AI image generation for technical documentation. Parse markdown files containing image prompts and generate professional images using multiple AI providers.
8
8
 
9
9
  [![npm version](https://img.shields.io/npm/v/@michelabboud/visual-forge-mcp.svg)](https://www.npmjs.com/package/@michelabboud/visual-forge-mcp)
10
- [![Version](https://img.shields.io/badge/version-0.4.0-blue.svg)](https://github.com/michelabboud/visual-forge-mcp)
10
+ [![Version](https://img.shields.io/badge/version-0.9.0-blue.svg)](https://github.com/michelabboud/visual-forge-mcp)
11
11
  [![License](https://img.shields.io/badge/License-Personal%20Use%20Only-red.svg)](LICENSE)
12
12
  [![Node.js](https://img.shields.io/badge/node-%3E%3D24.0.0-brightgreen.svg)](https://nodejs.org/)
13
13
 
@@ -16,7 +16,7 @@ An MCP (Model Context Protocol) server that automates AI image generation for te
16
16
  ## ๐ŸŒŸ Features
17
17
 
18
18
  - **๐Ÿ“ Multi-Format Support**: Extract image specifications from Markdown (.md) and HTML (.html) files
19
- - **๐Ÿ”Œ Multi-Provider Support**: 7 AI providers with automatic fallback
19
+ - **๐Ÿ”Œ Multi-Provider Support**: 8 AI providers with automatic fallback
20
20
  - OpenAI GPT Image
21
21
  - Google Gemini 2.5 Flash Image (Nano Banana)
22
22
  - Stability AI SDXL
@@ -24,6 +24,14 @@ An MCP (Model Context Protocol) server that automates AI image generation for te
24
24
  - Leonardo Phoenix
25
25
  - HuggingFace Inference
26
26
  - xAI Grok 2 Image
27
+ - Z.ai GLM-Image โœจ NEW - Excellent for text-heavy diagrams
28
+ - **๐ŸŽฏ Multi-Model Architecture** โœจ NEW v0.8.0: Each provider can offer multiple models with different capabilities and pricing (e.g., OpenAI GPT Image 1 vs GPT Image 1 HD)
29
+ - **๐Ÿงช Model Testing & Comparison** โœจ NEW v0.9.0: Test and compare AI models before production use
30
+ - Standard automated tests with quality scoring (sharpness, brightness, text rendering, color accuracy)
31
+ - Custom prompt testing with real use cases
32
+ - Side-by-side multi-provider comparison with intelligent recommendations
33
+ - Permission flow for cost-aware testing
34
+ - Persistent test results for historical tracking
27
35
  - **๐ŸŽจ Detailed Global Context**: Comprehensive styling system with hex colors, typography, layout rules, and audience targeting for dramatically better, more consistent images
28
36
  - **๐Ÿ–ผ๏ธ Multi-Format Optimization** โœจ NEW v0.7.0: Automatic generation of WebP (94% smaller), JPEG (85% smaller), and optional lossy PNG (70% smaller) with professional watermarking
29
37
  - **๐Ÿ” Quality Validation & Auto-Regeneration** โœจ NEW v0.7.0: OCR-based text detection, sharpness/brightness analysis, and automatic retry on quality failure
@@ -96,6 +104,9 @@ OPENAI_API_KEY=sk-...
96
104
  # xAI Grok 2 Image ($0.07/image)
97
105
  XAI_API_KEY=xai-...
98
106
 
107
+ # Z.ai GLM-Image ($0.015/image) - NEW: Excellent for text-heavy diagrams
108
+ ZAI_API_KEY=zai-...
109
+
99
110
  # Leonardo Phoenix ($0.02/image)
100
111
  LEONARDO_API_KEY=...
101
112
 
@@ -115,6 +126,7 @@ IMAGE_GEN_DEFAULT_PROVIDER=gemini
115
126
  - **Stability AI**: [https://platform.stability.ai](https://platform.stability.ai)
116
127
  - **OpenAI**: [https://platform.openai.com](https://platform.openai.com)
117
128
  - **xAI**: [https://console.x.ai](https://console.x.ai)
129
+ - **Z.ai**: [https://z.ai](https://z.ai) โœจ NEW - Best for text-heavy diagrams
118
130
  - **Leonardo**: [https://leonardo.ai](https://leonardo.ai)
119
131
  - **HuggingFace**: [https://huggingface.co](https://huggingface.co)
120
132
 
@@ -928,6 +940,187 @@ Remove API key for a provider.
928
940
 
929
941
  **Example**: "Remove my OpenAI API key"
930
942
 
943
+ ### Model Selection & Testing Tools โœจ NEW v0.9.0
944
+
945
+ #### `set_default_model`
946
+ Set the default model for a provider. This model will be used for all future generations unless overridden.
947
+
948
+ ```json
949
+ {
950
+ "provider": "zai",
951
+ "modelId": "glm-image"
952
+ }
953
+ ```
954
+
955
+ **Example**: "Set Z.ai to use the GLM-Image model"
956
+
957
+ **Returns**:
958
+ ```json
959
+ {
960
+ "success": true,
961
+ "provider": "zai",
962
+ "modelId": "glm-image",
963
+ "modelName": "GLM-Image",
964
+ "message": "Default model set to 'GLM-Image' for Z.ai GLM-Image..."
965
+ }
966
+ ```
967
+
968
+ #### `get_model_info`
969
+ Get detailed information about a specific model, including test results if available.
970
+
971
+ ```json
972
+ {
973
+ "provider": "gemini",
974
+ "modelId": "gemini-2.5-flash-image"
975
+ }
976
+ ```
977
+
978
+ **Example**: "What are the details for Gemini Flash Image?"
979
+
980
+ **Returns**:
981
+ ```json
982
+ {
983
+ "success": true,
984
+ "provider": "gemini",
985
+ "providerName": "Google Gemini 2.5 Flash Image",
986
+ "model": {
987
+ "id": "gemini-2.5-flash-image",
988
+ "name": "Gemini 2.5 Flash Image",
989
+ "costPerImage": 0.0,
990
+ "description": "Fast, free-tier image generation",
991
+ "capabilities": {
992
+ "maxResolution": "2048x2048",
993
+ "supportedAspectRatios": ["1:1", "16:9", "4:3", "9:16"]
994
+ }
995
+ },
996
+ "testResult": {
997
+ "testedAt": "2026-01-16T10:30:00.000Z",
998
+ "qualityScore": 85.5,
999
+ "passed": true
1000
+ },
1001
+ "message": "Model tested on 1/16/2026 with quality score 85.5/100"
1002
+ }
1003
+ ```
1004
+
1005
+ #### `test_model`
1006
+ Test a model with either standard test (automated) or custom prompt (user-provided). Records quality score for future reference.
1007
+
1008
+ **Standard Test (Automated Validation)**:
1009
+ ```json
1010
+ {
1011
+ "provider": "zai",
1012
+ "modelId": "glm-image",
1013
+ "useStandardTest": true,
1014
+ "aspectRatio": "16:9"
1015
+ }
1016
+ ```
1017
+
1018
+ **Custom Prompt Test (Real Use Case)**:
1019
+ ```json
1020
+ {
1021
+ "provider": "gemini",
1022
+ "modelId": "gemini-2.5-flash-image",
1023
+ "prompt": "AWS VPC architecture diagram showing public/private subnets, NAT gateway, and EC2 instances",
1024
+ "aspectRatio": "16:9"
1025
+ }
1026
+ ```
1027
+
1028
+ **Example**: "Test the Z.ai GLM-Image model with a standard quality test"
1029
+
1030
+ **Returns**:
1031
+ ```json
1032
+ {
1033
+ "success": true,
1034
+ "provider": "zai",
1035
+ "providerName": "Z.ai GLM-Image",
1036
+ "model": "GLM-Image",
1037
+ "testImage": {
1038
+ "filepath": "generated-images/tests/zai-glm-image-test.png",
1039
+ "generationTime": 12000,
1040
+ "actualCost": 0.015
1041
+ },
1042
+ "qualityScore": {
1043
+ "overall": 87.5,
1044
+ "sharpness": 89.2,
1045
+ "brightness": 145,
1046
+ "textRendering": 85.0,
1047
+ "colorAccuracy": 90.0,
1048
+ "passed": true
1049
+ },
1050
+ "message": "Model test passed! Quality score: 87.5/100. Model is ready for production use."
1051
+ }
1052
+ ```
1053
+
1054
+ **Quality Metrics**:
1055
+ - **Sharpness (30%)**: Laplacian variance analysis for edge detection
1056
+ - **Brightness (20%)**: Average brightness in optimal range (30-240)
1057
+ - **Text Rendering (40%)**: Estimated OCR accuracy and text clarity
1058
+ - **Color Accuracy (10%)**: Heuristic-based color validation
1059
+ - **Overall Score**: Weighted average, pass threshold 60/100
1060
+
1061
+ #### `compare_models`
1062
+ Compare multiple providers/models side-by-side with the same prompt. Generates quality scores and recommendation.
1063
+
1064
+ ```json
1065
+ {
1066
+ "prompt": "Technical diagram showing microservices architecture with API gateway, service mesh, and databases",
1067
+ "providers": [
1068
+ { "provider": "gemini", "model": "gemini-2.5-flash-image" },
1069
+ { "provider": "zai", "model": "glm-image" },
1070
+ { "provider": "huggingface", "model": "black-forest-labs/FLUX.1-dev" }
1071
+ ],
1072
+ "aspectRatio": "16:9"
1073
+ }
1074
+ ```
1075
+
1076
+ **Example**: "Compare Gemini, Z.ai, and HuggingFace FLUX models for generating a microservices architecture diagram"
1077
+
1078
+ **Returns**:
1079
+ ```json
1080
+ {
1081
+ "success": true,
1082
+ "prompt": "Technical diagram showing microservices architecture...",
1083
+ "totalCost": 0.015,
1084
+ "totalTime": 35000,
1085
+ "results": [
1086
+ {
1087
+ "provider": "zai",
1088
+ "model": "GLM-Image",
1089
+ "qualityScore": { "overall": 92.1, "textRendering": 95.8 },
1090
+ "cost": 0.015,
1091
+ "rank": 1
1092
+ },
1093
+ {
1094
+ "provider": "gemini",
1095
+ "model": "Gemini Flash Image",
1096
+ "qualityScore": { "overall": 85.5 },
1097
+ "cost": 0.0,
1098
+ "rank": 2
1099
+ },
1100
+ {
1101
+ "provider": "huggingface",
1102
+ "model": "FLUX.1-dev",
1103
+ "qualityScore": { "overall": 78.3 },
1104
+ "cost": 0.0,
1105
+ "rank": 3
1106
+ }
1107
+ ],
1108
+ "recommendation": {
1109
+ "provider": "zai",
1110
+ "model": "glm-image",
1111
+ "reason": "Highest overall quality (92.1/100), especially excellent text rendering (95.8/100). Worth the $0.015 cost for technical diagrams.",
1112
+ "alternatives": [
1113
+ {
1114
+ "provider": "gemini",
1115
+ "model": "gemini-2.5-flash-image",
1116
+ "reason": "Free alternative with good quality (85.5/100)"
1117
+ }
1118
+ ]
1119
+ },
1120
+ "message": "Comparison complete! zai glm-image scored highest (rank 1)..."
1121
+ }
1122
+ ```
1123
+
931
1124
  ### Image Generation Tools
932
1125
 
933
1126
  #### `parse_markdown`
@@ -1327,6 +1520,7 @@ For commercial licensing inquiries, please contact the author.
1327
1520
  ## ๐Ÿ“š Resources
1328
1521
 
1329
1522
  ### Documentation
1523
+ - **๐Ÿ“– Comprehensive Guide** โœจ NEW: Complete guide covering installation, architecture, environment variables, usage workflows, and testing - [docs/guides/comprehensive-guide.md](docs/guides/comprehensive-guide.md)
1330
1524
  - **Visual MCP Server Specification**: [docs/VISUAL_MCP_SERVER.md](docs/VISUAL_MCP_SERVER.md)
1331
1525
  - **Implementation Plan**: [docs/IMPLEMENTATION-PLAN.md](docs/IMPLEMENTATION-PLAN.md)
1332
1526
  - **Implementation Report**: [docs/IMPLEMENTATION_REPORT.md](docs/IMPLEMENTATION_REPORT.md)
@@ -1,8 +1,8 @@
1
1
  {
2
2
  "$schema": "./pricing-schema.json",
3
- "version": "2.0.0",
4
- "lastUpdated": "2026-01-13",
5
- "lastChecked": "2026-01-15T17:00:08.513Z",
3
+ "version": "2.1.0",
4
+ "lastUpdated": "2026-01-16",
5
+ "lastChecked": "2026-01-16T06:15:00.000Z",
6
6
  "providers": {
7
7
  "openai": {
8
8
  "name": "OpenAI",
@@ -184,6 +184,24 @@
184
184
  "notes": "Preview model, pricing may change"
185
185
  }
186
186
  }
187
+ },
188
+ "zai": {
189
+ "name": "Z.ai (Zhipu AI)",
190
+ "pricingUrl": "https://docs.z.ai/guides/overview/pricing",
191
+ "defaultModel": "glm-image",
192
+ "models": {
193
+ "glm-image": {
194
+ "name": "GLM-Image",
195
+ "costPerImage": 0.015,
196
+ "rateLimit": 15,
197
+ "timeout": 90000,
198
+ "maxDimensions": {
199
+ "width": 2048,
200
+ "height": 2048
201
+ },
202
+ "notes": "16B hybrid autoregressive + diffusion model. Excellent for text-heavy diagrams, posters, and knowledge-dense images. Beats Gemini on text rendering benchmarks."
203
+ }
204
+ }
187
205
  }
188
206
  },
189
207
  "costComparison": {
@@ -192,10 +210,20 @@
192
210
  "model": "black-forest-labs/flux-schnell",
193
211
  "cost": 0.003
194
212
  },
213
+ "secondCheapest": {
214
+ "provider": "zai",
215
+ "model": "glm-image",
216
+ "cost": 0.015
217
+ },
195
218
  "recommended": {
219
+ "provider": "zai",
220
+ "model": "glm-image",
221
+ "reason": "Best for text-heavy diagrams and technical documentation. Excellent text rendering, 2nd cheapest paid option."
222
+ },
223
+ "recommendedAlternative": {
196
224
  "provider": "replicate",
197
225
  "model": "black-forest-labs/flux-schnell",
198
- "reason": "Best quality/cost ratio"
226
+ "reason": "Cheapest option, excellent quality/cost ratio for general images"
199
227
  },
200
228
  "fastest": [
201
229
  {
@@ -1,7 +1,7 @@
1
1
  /**
2
2
  * Base provider class - all providers extend this
3
3
  */
4
- import { IImageProvider, ProviderType, ProviderConfig, ProviderCapabilities, GenerationOptions, ImageSpec, GlobalContext, GeneratedImage } from '../types/index.js';
4
+ import { IImageProvider, ProviderType, ProviderConfig, ProviderCapabilities, ProviderModel, GenerationOptions, ImageSpec, GlobalContext, GeneratedImage } from '../types/index.js';
5
5
  import { HttpClient } from '../utils/index.js';
6
6
  export declare abstract class BaseProvider implements IImageProvider {
7
7
  abstract readonly name: ProviderType;
@@ -13,6 +13,14 @@ export declare abstract class BaseProvider implements IImageProvider {
13
13
  * Initialize provider (call after construction)
14
14
  */
15
15
  protected init(): void;
16
+ /**
17
+ * Get available models for this provider
18
+ */
19
+ getModels(): ProviderModel[];
20
+ /**
21
+ * Get default model for this provider
22
+ */
23
+ getDefaultModel(): ProviderModel;
16
24
  /**
17
25
  * Create output path for generated image with new index-based directory structure
18
26
  * Structure: generated-images/{index}-{filename}/{provider}/{image-id}.png
@@ -29,7 +37,7 @@ export declare abstract class BaseProvider implements IImageProvider {
29
37
  * Save image metadata and generation log
30
38
  */
31
39
  private saveImageMetadata;
32
- estimateCost(_spec: ImageSpec): number;
40
+ estimateCost(_spec: ImageSpec, modelId?: string): number;
33
41
  /**
34
42
  * Provider-specific generation logic - must be implemented by subclasses
35
43
  */
@@ -1 +1 @@
1
- {"version":3,"file":"base-provider.d.ts","sourceRoot":"","sources":["../../src/providers/base-provider.ts"],"names":[],"mappings":"AAAA;;GAEG;AAEH,OAAO,EACL,cAAc,EACd,YAAY,EACZ,cAAc,EACd,oBAAoB,EACpB,iBAAiB,EACjB,SAAS,EACT,aAAa,EACb,cAAc,EACf,MAAM,mBAAmB,CAAC;AAC3B,OAAO,EACL,UAAU,EAWX,MAAM,mBAAmB,CAAC;AAM3B,8BAAsB,YAAa,YAAW,cAAc;IAC1D,QAAQ,CAAC,QAAQ,CAAC,IAAI,EAAE,YAAY,CAAC;IACrC,QAAQ,CAAC,QAAQ,CAAC,WAAW,EAAE,MAAM,CAAC;IAEtC,SAAS,CAAC,MAAM,EAAE,UAAU,CAAC;IAC7B,SAAS,CAAC,MAAM,EAAE,cAAc,CAAC;gBAErB,MAAM,EAAE,cAAc;IAWlC;;OAEG;IACH,SAAS,CAAC,IAAI,IAAI,IAAI;IAStB;;;;;;;OAOG;cACa,gBAAgB,CAAC,IAAI,EAAE,SAAS,EAAE,SAAS,GAAE,MAAc,GAAG,OAAO,CAAC,MAAM,CAAC;IAmC7F,IAAI,WAAW,IAAI,OAAO,CAEzB;IAED,YAAY,IAAI,OAAO;IASjB,QAAQ,CAAC,IAAI,EAAE,SAAS,EAAE,OAAO,CAAC,EAAE,iBAAiB,GAAG,OAAO,CAAC,cAAc,CAAC;IA0MrF;;OAEG;YACW,iBAAiB;IAkH/B,YAAY,CAAC,KAAK,EAAE,SAAS,GAAG,MAAM;IAKtC;;OAEG;IACH,SAAS,CAAC,QAAQ,CAAC,aAAa,CAC9B,IAAI,EAAE,SAAS,EACf,OAAO,CAAC,EAAE,iBAAiB,GAC1B,OAAO,CAAC,cAAc,CAAC;IAE1B;;;OAGG;IACH,QAAQ,CAAC,WAAW,CAAC,MAAM,EAAE,MAAM,EAAE,OAAO,CAAC,EAAE,aAAa,GAAG,MAAM,GAAG,GAAG;IAE3E;;OAEG;IACH,QAAQ,CAAC,eAAe,IAAI,oBAAoB;IAEhD;;;OAGG;IACH,QAAQ,CAAC,cAAc,IAAI,OAAO,CAAC;QACjC,OAAO,EAAE,OAAO,CAAC;QACjB,OAAO,EAAE,MAAM,CAAC;QAChB,OAAO,CAAC,EAAE,MAAM,CAAC;KAClB,CAAC;IAEF;;OAEG;IACH,SAAS,CAAC,cAAc,IAAI,IAAI;IAMhC;;OAEG;IACH,SAAS,CAAC,uBAAuB,CAAC,WAAW,EAAE,MAAM,GAAG;QAAE,KAAK,EAAE,MAAM,CAAC;QAAC,MAAM,EAAE,MAAM,CAAA;KAAE;CAY1F"}
1
+ {"version":3,"file":"base-provider.d.ts","sourceRoot":"","sources":["../../src/providers/base-provider.ts"],"names":[],"mappings":"AAAA;;GAEG;AAEH,OAAO,EACL,cAAc,EACd,YAAY,EACZ,cAAc,EACd,oBAAoB,EACpB,aAAa,EACb,iBAAiB,EACjB,SAAS,EACT,aAAa,EACb,cAAc,EACf,MAAM,mBAAmB,CAAC;AAC3B,OAAO,EACL,UAAU,EAWX,MAAM,mBAAmB,CAAC;AAM3B,8BAAsB,YAAa,YAAW,cAAc;IAC1D,QAAQ,CAAC,QAAQ,CAAC,IAAI,EAAE,YAAY,CAAC;IACrC,QAAQ,CAAC,QAAQ,CAAC,WAAW,EAAE,MAAM,CAAC;IAEtC,SAAS,CAAC,MAAM,EAAE,UAAU,CAAC;IAC7B,SAAS,CAAC,MAAM,EAAE,cAAc,CAAC;gBAErB,MAAM,EAAE,cAAc;IAWlC;;OAEG;IACH,SAAS,CAAC,IAAI,IAAI,IAAI;IAWtB;;OAEG;IACH,SAAS,IAAI,aAAa,EAAE;IAI5B;;OAEG;IACH,eAAe,IAAI,aAAa;IAgChC;;;;;;;OAOG;cACa,gBAAgB,CAAC,IAAI,EAAE,SAAS,EAAE,SAAS,GAAE,MAAc,GAAG,OAAO,CAAC,MAAM,CAAC;IAmC7F,IAAI,WAAW,IAAI,OAAO,CAEzB;IAED,YAAY,IAAI,OAAO;IASjB,QAAQ,CAAC,IAAI,EAAE,SAAS,EAAE,OAAO,CAAC,EAAE,iBAAiB,GAAG,OAAO,CAAC,cAAc,CAAC;IA0MrF;;OAEG;YACW,iBAAiB;IAkH/B,YAAY,CAAC,KAAK,EAAE,SAAS,EAAE,OAAO,CAAC,EAAE,MAAM,GAAG,MAAM;IAkBxD;;OAEG;IACH,SAAS,CAAC,QAAQ,CAAC,aAAa,CAC9B,IAAI,EAAE,SAAS,EACf,OAAO,CAAC,EAAE,iBAAiB,GAC1B,OAAO,CAAC,cAAc,CAAC;IAE1B;;;OAGG;IACH,QAAQ,CAAC,WAAW,CAAC,MAAM,EAAE,MAAM,EAAE,OAAO,CAAC,EAAE,aAAa,GAAG,MAAM,GAAG,GAAG;IAE3E;;OAEG;IACH,QAAQ,CAAC,eAAe,IAAI,oBAAoB;IAEhD;;;OAGG;IACH,QAAQ,CAAC,cAAc,IAAI,OAAO,CAAC;QACjC,OAAO,EAAE,OAAO,CAAC;QACjB,OAAO,EAAE,MAAM,CAAC;QAChB,OAAO,CAAC,EAAE,MAAM,CAAC;KAClB,CAAC;IAEF;;OAEG;IACH,SAAS,CAAC,cAAc,IAAI,IAAI;IAMhC;;OAEG;IACH,SAAS,CAAC,uBAAuB,CAAC,WAAW,EAAE,MAAM,GAAG;QAAE,KAAK,EAAE,MAAM,CAAC;QAAC,MAAM,EAAE,MAAM,CAAA;KAAE;CAY1F"}