ai-vision-mcp 0.0.4 → 0.0.5-dev.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +102 -132
- package/dist/cli/commands/analyze-image.d.ts +3 -0
- package/dist/cli/commands/analyze-image.d.ts.map +1 -0
- package/dist/cli/commands/analyze-image.js +30 -0
- package/dist/cli/commands/analyze-image.js.map +1 -0
- package/dist/cli/commands/analyze-video.d.ts +3 -0
- package/dist/cli/commands/analyze-video.d.ts.map +1 -0
- package/dist/cli/commands/analyze-video.js +30 -0
- package/dist/cli/commands/analyze-video.js.map +1 -0
- package/dist/cli/commands/compare-images.d.ts +3 -0
- package/dist/cli/commands/compare-images.d.ts.map +1 -0
- package/dist/cli/commands/compare-images.js +30 -0
- package/dist/cli/commands/compare-images.js.map +1 -0
- package/dist/cli/commands/detect-objects.d.ts +3 -0
- package/dist/cli/commands/detect-objects.d.ts.map +1 -0
- package/dist/cli/commands/detect-objects.js +31 -0
- package/dist/cli/commands/detect-objects.js.map +1 -0
- package/dist/cli/index.d.ts +3 -0
- package/dist/cli/index.d.ts.map +1 -0
- package/dist/cli/index.js +63 -0
- package/dist/cli/index.js.map +1 -0
- package/dist/cli/utils.d.ts +17 -0
- package/dist/cli/utils.d.ts.map +1 -0
- package/dist/cli/utils.js +85 -0
- package/dist/cli/utils.js.map +1 -0
- package/dist/file-upload/gemini/GeminiFilesAPI.js +1 -1
- package/dist/file-upload/gemini/GeminiFilesAPI.js.map +1 -1
- package/dist/file-upload/vertexai/VertexAIStorageStrategy.js +1 -1
- package/dist/file-upload/vertexai/VertexAIStorageStrategy.js.map +1 -1
- package/dist/index.d.ts +1 -1
- package/dist/index.d.ts.map +1 -1
- package/dist/index.js +17 -1
- package/dist/index.js.map +1 -1
- package/dist/providers/base/VisionProvider.d.ts +9 -0
- package/dist/providers/base/VisionProvider.d.ts.map +1 -1
- package/dist/providers/base/VisionProvider.js +67 -12
- package/dist/providers/base/VisionProvider.js.map +1 -1
- package/dist/providers/gemini/GeminiProvider.d.ts +2 -0
- package/dist/providers/gemini/GeminiProvider.d.ts.map +1 -1
- package/dist/providers/gemini/GeminiProvider.js +88 -9
- package/dist/providers/gemini/GeminiProvider.js.map +1 -1
- package/dist/providers/vertexai/VertexAIProvider.d.ts.map +1 -1
- package/dist/providers/vertexai/VertexAIProvider.js +11 -11
- package/dist/providers/vertexai/VertexAIProvider.js.map +1 -1
- package/dist/server.d.ts +1 -1
- package/dist/server.d.ts.map +1 -1
- package/dist/server.js +160 -53
- package/dist/server.js.map +1 -1
- package/dist/services/ConfigService.d.ts +3 -0
- package/dist/services/ConfigService.d.ts.map +1 -1
- package/dist/services/ConfigService.js +45 -0
- package/dist/services/ConfigService.js.map +1 -1
- package/dist/services/FileService.d.ts.map +1 -1
- package/dist/services/FileService.js +10 -10
- package/dist/services/FileService.js.map +1 -1
- package/dist/services/LoggerService.d.ts +19 -0
- package/dist/services/LoggerService.d.ts.map +1 -0
- package/dist/services/LoggerService.js +74 -0
- package/dist/services/LoggerService.js.map +1 -0
- package/dist/tools/analyze_image.js +3 -3
- package/dist/tools/analyze_image.js.map +1 -1
- package/dist/tools/analyze_video.d.ts +11 -1
- package/dist/tools/analyze_video.d.ts.map +1 -1
- package/dist/tools/analyze_video.js +36 -1
- package/dist/tools/analyze_video.js.map +1 -1
- package/dist/tools/compare_images.js +3 -3
- package/dist/tools/compare_images.js.map +1 -1
- package/dist/tools/detect_objects_in_image.d.ts.map +1 -1
- package/dist/tools/detect_objects_in_image.js +89 -67
- package/dist/tools/detect_objects_in_image.js.map +1 -1
- package/dist/tools/index.d.ts +1 -1
- package/dist/tools/index.d.ts.map +1 -1
- package/dist/tools/index.js +1 -1
- package/dist/tools/index.js.map +1 -1
- package/dist/types/Analysis.d.ts +6 -0
- package/dist/types/Analysis.d.ts.map +1 -1
- package/dist/types/Config.d.ts +3 -2
- package/dist/types/Config.d.ts.map +1 -1
- package/dist/utils/duration.d.ts +4 -0
- package/dist/utils/duration.d.ts.map +1 -0
- package/dist/utils/duration.js +65 -0
- package/dist/utils/duration.js.map +1 -0
- package/dist/utils/imageAnnotator.d.ts +4 -2
- package/dist/utils/imageAnnotator.d.ts.map +1 -1
- package/dist/utils/imageAnnotator.js +80 -89
- package/dist/utils/imageAnnotator.js.map +1 -1
- package/dist/utils/index.d.ts +2 -0
- package/dist/utils/index.d.ts.map +1 -1
- package/dist/utils/index.js +2 -0
- package/dist/utils/index.js.map +1 -1
- package/dist/utils/validation.d.ts +121 -9
- package/dist/utils/validation.d.ts.map +1 -1
- package/dist/utils/validation.js +7 -1
- package/dist/utils/validation.js.map +1 -1
- package/dist/utils/videoTokens.d.ts +23 -0
- package/dist/utils/videoTokens.d.ts.map +1 -0
- package/dist/utils/videoTokens.js +94 -0
- package/dist/utils/videoTokens.js.map +1 -0
- package/dist/utils/youtube.d.ts +5 -0
- package/dist/utils/youtube.d.ts.map +1 -0
- package/dist/utils/youtube.js +62 -0
- package/dist/utils/youtube.js.map +1 -0
- package/package.json +23 -14
package/README.md
CHANGED
|
@@ -112,13 +112,13 @@ claude mcp add ai-vision-mcp \
|
|
|
112
112
|
```
|
|
113
113
|
|
|
114
114
|
|
|
115
|
-
Note: Increase the MCP tool timeout to about 5 minutes by updating `~\.claude\settings.json` as follows:
|
|
115
|
+
Note: Increase the MCP startup timeout to 1 minutes and MCP tool execution timeout to about 5 minutes by updating `~\.claude\settings.json` as follows:
|
|
116
116
|
|
|
117
117
|
```json
|
|
118
118
|
{
|
|
119
119
|
"env": {
|
|
120
|
-
"MCP_TIMEOUT": "
|
|
121
|
-
"MCP_TOOL_TIMEOUT": "300000"
|
|
120
|
+
"MCP_TIMEOUT": "60000",
|
|
121
|
+
"MCP_TOOL_TIMEOUT": "300000"
|
|
122
122
|
}
|
|
123
123
|
}
|
|
124
124
|
```
|
|
@@ -309,12 +309,12 @@ Detects objects in an image using AI vision models and generates annotated image
|
|
|
309
309
|
**Configuration:**
|
|
310
310
|
This function uses optimized default parameters for object detection and does not accept runtime `options` parameter. To customize the AI parameters (temperature, topP, topK, maxTokens), use environment variables:
|
|
311
311
|
|
|
312
|
-
```
|
|
313
|
-
# Recommended settings for object detection (these are now the defaults)
|
|
314
|
-
|
|
315
|
-
|
|
316
|
-
|
|
317
|
-
|
|
312
|
+
```
|
|
313
|
+
# Recommended environment variable settings for object detection (these are now the defaults)
|
|
314
|
+
TEMPERATURE_FOR_DETECT_OBJECTS_IN_IMAGE=0.0 # Deterministic responses
|
|
315
|
+
TOP_P_FOR_DETECT_OBJECTS_IN_IMAGE=0.95 # Nucleus sampling
|
|
316
|
+
TOP_K_FOR_DETECT_OBJECTS_IN_IMAGE=30 # Vocabulary selection
|
|
317
|
+
MAX_TOKENS_FOR_DETECT_OBJECTS_IN_IMAGE=8192 # High token limit for JSON
|
|
318
318
|
```
|
|
319
319
|
|
|
320
320
|
**File Handling Logic:**
|
|
@@ -388,139 +388,109 @@ Analyzes a video using AI and returns a detailed description.
|
|
|
388
388
|
**Note:** Only YouTube URLs are supported for public video URLs. Other public video URLs are not currently supported.
|
|
389
389
|
|
|
390
390
|
|
|
391
|
-
|
|
392
|
-
| Variable | Required | Description | Default |
|
|
393
|
-
|-----------|-----------|-------------|---------|
|
|
394
|
-
| **Provider Selection** ||||
|
|
395
|
-
| `IMAGE_PROVIDER` | Yes | Provider for image analysis | `google`,`vertex_ai` |
|
|
396
|
-
| `VIDEO_PROVIDER` | Yes | Provider for video analysis | `google`,`vertex_ai` |
|
|
397
|
-
| **Model Selection** ||||
|
|
398
|
-
| `IMAGE_MODEL` | No | Model for image analysis | `gemini-2.5-flash-lite` |
|
|
399
|
-
| `VIDEO_MODEL` | No | Model for video analysis | `gemini-2.5-flash` |
|
|
400
|
-
| **Function-specific Model Selection** ||||
|
|
401
|
-
| `ANALYZE_IMAGE_MODEL` | No | Model for analyze_image function | Uses `IMAGE_MODEL` |
|
|
402
|
-
| `COMPARE_IMAGES_MODEL` | No | Model for compare_images function | Uses `IMAGE_MODEL` |
|
|
403
|
-
| `DETECT_OBJECTS_IN_IMAGE_MODEL` | No | Model for detect_objects_in_image function | Uses `IMAGE_MODEL` |
|
|
404
|
-
| `ANALYZE_VIDEO_MODEL` | No | Model for analyze_video function | Uses `VIDEO_MODEL` |
|
|
405
|
-
| **Google Gemini API** ||||
|
|
406
|
-
| `GEMINI_API_KEY` | Yes if `IMAGE_PROVIDER` or `VIDEO_PROVIDER` = `google` | Google Gemini API key | Required for Gemini |
|
|
407
|
-
| `GEMINI_BASE_URL` | No | Gemini API base URL | `https://generativelanguage.googleapis.com` |
|
|
408
|
-
| **Vertex AI** ||||
|
|
409
|
-
| `VERTEX_CREDENTIALS` | Yes if `IMAGE_PROVIDER` or `VIDEO_PROVIDER` = `vertex_ai` | Path to GCP service account JSON | Required for Vertex AI |
|
|
410
|
-
| `VERTEX_PROJECT_ID` | Auto | Google Cloud project ID | Auto-derived from credentials |
|
|
411
|
-
| `VERTEX_LOCATION` | No | Vertex AI region | `us-central1` |
|
|
412
|
-
| `VERTEX_ENDPOINT` | No | Vertex AI endpoint URL | `https://aiplatform.googleapis.com` |
|
|
413
|
-
| **Google Cloud Storage (Vertex AI)** ||||
|
|
414
|
-
| `GCS_BUCKET_NAME` | Yes if `IMAGE_PROVIDER` or `VIDEO_PROVIDER` = `vertex_ai` | GCS bucket name for Vertex AI uploads | Required for Vertex AI |
|
|
415
|
-
| `GCS_CREDENTIALS` | No | Path to GCS credentials | Defaults to `VERTEX_CREDENTIALS` |
|
|
416
|
-
| `GCS_PROJECT_ID` | No | GCS project ID | Auto-derived from `VERTEX_CREDENTIALS` |
|
|
417
|
-
| `GCS_REGION` | No | GCS region | Defaults to `VERTEX_LOCATION` |
|
|
418
|
-
| **API Configuration** ||||
|
|
419
|
-
| `TEMPERATURE` | No | AI response temperature (0.0–2.0) | `0.8` |
|
|
420
|
-
| `TOP_P` | No | Top-p sampling parameter (0.0–1.0) | `0.95` |
|
|
421
|
-
| `TOP_K` | No | Top-k sampling parameter (1–100) | `30` |
|
|
422
|
-
| `MAX_TOKENS` | No | Maximum tokens for analysis (1–8192) | `1000` |
|
|
423
|
-
| **Task-type level Configuration** |||||
|
|
424
|
-
| `TEMPERATURE_FOR_IMAGE` | No | Image-specific temperature (0.0–2.0) | Uses `TEMPERATURE` |
|
|
425
|
-
| `TOP_P_FOR_IMAGE` | No | Image-specific top-p (0.0–1.0) | Uses `TOP_P` |
|
|
426
|
-
| `TOP_K_FOR_IMAGE` | No | Image-specific top-k (1–100) | Uses `TOP_K` |
|
|
427
|
-
| `TEMPERATURE_FOR_VIDEO` | No | Video-specific temperature (0.0–2.0) | Uses `TEMPERATURE` |
|
|
428
|
-
| `TOP_P_FOR_VIDEO` | No | Video-specific top-p (0.0–1.0) | Uses `TOP_P` |
|
|
429
|
-
| `TOP_K_FOR_VIDEO` | No | Video-specific top-k (1–100) | Uses `TOP_K` |
|
|
430
|
-
| `MAX_TOKENS_FOR_IMAGE` | No | Maximum tokens for image analysis | Uses `MAX_TOKENS` |
|
|
431
|
-
| `MAX_TOKENS_FOR_VIDEO` | No | Maximum tokens for video analysis | Uses `MAX_TOKENS` |
|
|
432
|
-
| **Function-specific Configuration** |||||
|
|
433
|
-
| `TEMPERATURE_FOR_ANALYZE_IMAGE` | No | Temperature for analyze_image function (0.0–2.0) | Uses `TEMPERATURE_FOR_IMAGE` |
|
|
434
|
-
| `TOP_P_FOR_ANALYZE_IMAGE` | No | Top-p for analyze_image function (0.0–1.0) | Uses `TOP_P_FOR_IMAGE` |
|
|
435
|
-
| `TOP_K_FOR_ANALYZE_IMAGE` | No | Top-k for analyze_image function (1–100) | Uses `TOP_K_FOR_IMAGE` |
|
|
436
|
-
| `MAX_TOKENS_FOR_ANALYZE_IMAGE` | No | Max tokens for analyze_image function | Uses `MAX_TOKENS_FOR_IMAGE` |
|
|
437
|
-
| `TEMPERATURE_FOR_COMPARE_IMAGES` | No | Temperature for compare_images function (0.0–2.0) | Uses `TEMPERATURE_FOR_IMAGE` |
|
|
438
|
-
| `TOP_P_FOR_COMPARE_IMAGES` | No | Top-p for compare_images function (0.0–1.0) | Uses `TOP_P_FOR_IMAGE` |
|
|
439
|
-
| `TOP_K_FOR_COMPARE_IMAGES` | No | Top-k for compare_images function (1–100) | Uses `TOP_K_FOR_IMAGE` |
|
|
440
|
-
| `MAX_TOKENS_FOR_COMPARE_IMAGES` | No | Max tokens for compare_images function | Uses `MAX_TOKENS_FOR_IMAGE` |
|
|
441
|
-
| `TEMPERATURE_FOR_DETECT_OBJECTS_IN_IMAGE` | No | Temperature for detect_objects_in_image function (0.0–2.0) | `0.0` |
|
|
442
|
-
| `TOP_P_FOR_DETECT_OBJECTS_IN_IMAGE` | No | Top-p for detect_objects_in_image function (0.0–1.0) | `0.95` |
|
|
443
|
-
| `TOP_K_FOR_DETECT_OBJECTS_IN_IMAGE` | No | Top-k for detect_objects_in_image function (1–100) | `30` |
|
|
444
|
-
| `MAX_TOKENS_FOR_DETECT_OBJECTS_IN_IMAGE` | No | Max tokens for detect_objects_in_image function | `8192` |
|
|
445
|
-
| `TEMPERATURE_FOR_ANALYZE_VIDEO` | No | Temperature for analyze_video function (0.0–2.0) | Uses `TEMPERATURE_FOR_VIDEO` |
|
|
446
|
-
| `TOP_P_FOR_ANALYZE_VIDEO` | No | Top-p for analyze_video function (0.0–1.0) | Uses `TOP_P_FOR_VIDEO` |
|
|
447
|
-
| `TOP_K_FOR_ANALYZE_VIDEO` | No | Top-k for analyze_video function (1–100) | Uses `TOP_K_FOR_VIDEO` |
|
|
448
|
-
| `MAX_TOKENS_FOR_ANALYZE_VIDEO` | No | Max tokens for analyze_video function | Uses `MAX_TOKENS_FOR_VIDEO` |
|
|
449
|
-
| **File Processing** ||||
|
|
450
|
-
| `MAX_IMAGE_SIZE` | No | Maximum image size in bytes | `20971520` (20 MB) |
|
|
451
|
-
| `MAX_VIDEO_SIZE` | No | Maximum video size in bytes | `2147483648` (2 GB) |
|
|
452
|
-
| `MAX_VIDEO_DURATION` | No | Maximum video duration (seconds) | `3600` (1 hour) |
|
|
453
|
-
| `MAX_IMAGES_FOR_COMPARISON` | No | Maximum number of images for comparison, used by compare_images() mcp function | `4` |
|
|
454
|
-
| `ALLOWED_IMAGE_FORMATS` | No | Comma-separated image formats | `png,jpg,jpeg,webp,gif,bmp,tiff` |
|
|
455
|
-
| `ALLOWED_VIDEO_FORMATS` | No | Comma-separated video formats | `mp4,mov,avi,mkv,webm,flv,wmv,3gp` |
|
|
456
|
-
| **Development** ||||
|
|
457
|
-
| `LOG_LEVEL` | No | Logging level | `info` |
|
|
458
|
-
| `NODE_ENV` | No | Environment mode | `development` |
|
|
459
|
-
| `GEMINI_FILES_API_THRESHOLD` | No | Size threshold for Gemini Files API (bytes) | `10485760` (10 MB) |
|
|
460
|
-
| `VERTEX_AI_FILES_API_THRESHOLD` | No | Size threshold for Vertex AI uploads (bytes) | `0` |
|
|
391
|
+
## Environment Configuration
|
|
461
392
|
|
|
393
|
+
For basic setup, you only need to configure the provider selection and required credentials:
|
|
462
394
|
|
|
463
|
-
|
|
464
|
-
|
|
395
|
+
### Google AI Studio Provider (Recommended)
|
|
396
|
+
```bash
|
|
397
|
+
export IMAGE_PROVIDER="google"
|
|
398
|
+
export VIDEO_PROVIDER="google"
|
|
399
|
+
export GEMINI_API_KEY="your-gemini-api-key"
|
|
400
|
+
```
|
|
401
|
+
|
|
402
|
+
### Vertex AI Provider (Production)
|
|
403
|
+
```bash
|
|
404
|
+
export IMAGE_PROVIDER="vertex_ai"
|
|
405
|
+
export VIDEO_PROVIDER="vertex_ai"
|
|
406
|
+
export VERTEX_CREDENTIALS="/path/to/service-account.json"
|
|
407
|
+
export GCS_BUCKET_NAME="your-gcs-bucket"
|
|
408
|
+
```
|
|
409
|
+
|
|
410
|
+
### 📖 **Detailed Configuration Guide**
|
|
411
|
+
|
|
412
|
+
For comprehensive environment variable documentation, including:
|
|
413
|
+
- Complete configuration reference (60+ environment variables)
|
|
414
|
+
- Function-specific optimization examples
|
|
415
|
+
- Advanced configuration patterns
|
|
416
|
+
- Troubleshooting guidance
|
|
465
417
|
|
|
466
|
-
|
|
418
|
+
👉 **[See Environment Variable Guide](docs/environment-variable-guide.md)**
|
|
419
|
+
|
|
420
|
+
### Configuration Priority Overview
|
|
421
|
+
|
|
422
|
+
The server uses a hierarchical configuration system where more specific settings override general ones:
|
|
423
|
+
|
|
424
|
+
1. **LLM-assigned values** (runtime parameters in tool calls)
|
|
425
|
+
2. **Function-specific variables** (`TEMPERATURE_FOR_ANALYZE_IMAGE`, etc.)
|
|
426
|
+
3. **Task-specific variables** (`TEMPERATURE_FOR_IMAGE`, etc.)
|
|
427
|
+
4. **Universal variables** (`TEMPERATURE`, etc.)
|
|
428
|
+
5. **System defaults**
|
|
429
|
+
|
|
430
|
+
<details>
|
|
431
|
+
<summary><strong>Quick Configuration Examples</strong></summary>
|
|
467
432
|
|
|
468
|
-
|
|
469
|
-
|
|
470
|
-
|
|
471
|
-
|
|
472
|
-
|
|
433
|
+
**Basic Optimization:**
|
|
434
|
+
```bash
|
|
435
|
+
# General settings
|
|
436
|
+
export TEMPERATURE=0.7
|
|
437
|
+
export MAX_TOKENS=1500
|
|
473
438
|
|
|
474
|
-
|
|
439
|
+
# Task-specific optimization
|
|
440
|
+
export TEMPERATURE_FOR_IMAGE=0.2 # More precise for images
|
|
441
|
+
export TEMPERATURE_FOR_VIDEO=0.5 # More creative for videos
|
|
442
|
+
```
|
|
475
443
|
|
|
476
|
-
|
|
477
|
-
|
|
478
|
-
|
|
444
|
+
**Function-specific Optimization:**
|
|
445
|
+
```bash
|
|
446
|
+
# Optimize individual functions
|
|
447
|
+
export TEMPERATURE_FOR_ANALYZE_IMAGE=0.1
|
|
448
|
+
export TEMPERATURE_FOR_COMPARE_IMAGES=0.3
|
|
449
|
+
export TEMPERATURE_FOR_DETECT_OBJECTS_IN_IMAGE=0.0 # Deterministic
|
|
450
|
+
export MAX_TOKENS_FOR_DETECT_OBJECTS_IN_IMAGE=8192 # High token limit
|
|
451
|
+
```
|
|
479
452
|
|
|
480
|
-
**
|
|
453
|
+
**Model Selection:**
|
|
481
454
|
```bash
|
|
482
|
-
#
|
|
483
|
-
|
|
484
|
-
|
|
485
|
-
|
|
486
|
-
|
|
487
|
-
TEMPERATURE_FOR_IMAGE=0.1 # More precise for image analysis
|
|
488
|
-
MAX_TOKENS_FOR_VIDEO=1200 # Longer responses for video content
|
|
489
|
-
|
|
490
|
-
# Function-specific overrides
|
|
491
|
-
TEMPERATURE_FOR_ANALYZE_IMAGE=0.05 # Very precise for single image analysis
|
|
492
|
-
TEMPERATURE_FOR_COMPARE_IMAGES=0.2 # More creative for comparisons
|
|
493
|
-
MAX_TOKENS_FOR_COMPARE_IMAGES=1500 # Longer responses for image comparisons
|
|
494
|
-
TEMPERATURE_FOR_DETECT_OBJECTS_IN_IMAGE=0.0 # Deterministic for object detection
|
|
495
|
-
MAX_TOKENS_FOR_DETECT_OBJECTS_IN_IMAGE=8192 # Higher token limit for structured output
|
|
496
|
-
TEMPERATURE_FOR_ANALYZE_VIDEO=0.1 # Precise video analysis
|
|
497
|
-
|
|
498
|
-
# Model selection configuration
|
|
499
|
-
ANALYZE_IMAGE_MODEL="gemini-2.5-flash-lite" # Fast, cost-effective for single image analysis
|
|
500
|
-
COMPARE_IMAGES_MODEL="gemini-2.5-flash-lite"
|
|
501
|
-
DETECT_OBJECTS_IN_IMAGE_MODEL="gemini-2.5-flash-lite" # Structured output capable model
|
|
502
|
-
ANALYZE_VIDEO_MODEL="gemini-2.5-flash-pro" # Most capable for video analysis
|
|
503
|
-
|
|
504
|
-
# Task-specific models (existing pattern still works)
|
|
505
|
-
IMAGE_MODEL="gemini-2.5-flash"
|
|
506
|
-
VIDEO_MODEL="gemini-2.5-flash-pro"
|
|
507
|
-
|
|
508
|
-
# Resolution order for analyze_image:
|
|
509
|
-
# 1. ANALYZE_IMAGE_MODEL
|
|
510
|
-
# 2. IMAGE_MODEL
|
|
511
|
-
# 3. System default ("gemini-2.5-flash-lite")
|
|
512
|
-
|
|
513
|
-
# Resolution order for detect_objects_in_image:
|
|
514
|
-
# 1. DETECT_OBJECTS_IN_IMAGE_MODEL
|
|
515
|
-
# 2. IMAGE_MODEL
|
|
516
|
-
# 3. System default ("gemini-2.5-flash-lite")
|
|
517
|
-
|
|
518
|
-
# LLM can still override at runtime via tool parameters
|
|
519
|
-
```
|
|
520
|
-
|
|
521
|
-
This allows you to set sensible defaults while maintaining granular control per task type and per function, with a clean and maintainable 3-level hierarchy.
|
|
455
|
+
# Choose models per function
|
|
456
|
+
export ANALYZE_IMAGE_MODEL="gemini-2.5-flash-lite"
|
|
457
|
+
export COMPARE_IMAGES_MODEL="gemini-2.5-flash"
|
|
458
|
+
export ANALYZE_VIDEO_MODEL="gemini-2.5-flash-pro"
|
|
459
|
+
```
|
|
522
460
|
</details>
|
|
523
461
|
|
|
462
|
+
## Troubleshooting (stdio / Codex / Claude Code)
|
|
463
|
+
|
|
464
|
+
### 1) "Transport closed" / tool call fails
|
|
465
|
+
|
|
466
|
+
If you see errors like:
|
|
467
|
+
|
|
468
|
+
- `tools/call failed: Transport closed`
|
|
469
|
+
|
|
470
|
+
Common causes:
|
|
471
|
+
|
|
472
|
+
**A) Image annotation dependency failed to load**
|
|
473
|
+
|
|
474
|
+
This server uses [`imagescript`](https://github.com/matmen/ImageScript) for image annotation/dimension extraction.
|
|
475
|
+
|
|
476
|
+
Verify it loads:
|
|
477
|
+
|
|
478
|
+
```bash
|
|
479
|
+
npm run doctor
|
|
480
|
+
# or
|
|
481
|
+
npm run check:imagescript
|
|
482
|
+
```
|
|
483
|
+
|
|
484
|
+
**B) stdout logs corrupt stdio MCP framing**
|
|
485
|
+
|
|
486
|
+
This server uses the MCP **stdio** transport (newline-delimited JSON-RPC over stdout).
|
|
487
|
+
|
|
488
|
+
- ✅ stdout must contain **only** MCP JSON-RPC messages
|
|
489
|
+
- ✅ write logs to **stderr** (e.g. `console.error`)
|
|
490
|
+
- ❌ do not use `console.log` in stdio MCP servers
|
|
491
|
+
|
|
492
|
+
If stdout is polluted, clients (Codex/Claude Code) may disconnect and report `Transport closed`.
|
|
493
|
+
|
|
524
494
|
## Development
|
|
525
495
|
|
|
526
496
|
### Prerequisites
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"analyze-image.d.ts","sourceRoot":"","sources":["../../../src/cli/commands/analyze-image.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,MAAM,EAAE,MAAM,uBAAuB,CAAC;AAOpD,wBAAsB,eAAe,CAAC,IAAI,EAAE,MAAM,EAAE,EAAE,MAAM,EAAE,MAAM,GAAG,OAAO,CAAC,IAAI,CAAC,CAoCnF"}
|
|
@@ -0,0 +1,30 @@
|
|
|
1
|
+
import { ConfigService } from '../../services/ConfigService.js';
|
|
2
|
+
import { VisionProviderFactory } from '../../providers/factory/ProviderFactory.js';
|
|
3
|
+
import { FileService } from '../../services/FileService.js';
|
|
4
|
+
import { analyze_image } from '../../tools/analyze_image.js';
|
|
5
|
+
import { parseOptions, formatOutput, handleError, parseArgs } from '../utils.js';
|
|
6
|
+
export async function runAnalyzeImage(args, config) {
|
|
7
|
+
const { positional, options } = parseArgs(args);
|
|
8
|
+
if (positional.length < 1) {
|
|
9
|
+
console.error('Error: Image source required');
|
|
10
|
+
console.error('Usage: ai-vision analyze-image <source> --prompt <text>');
|
|
11
|
+
process.exit(1);
|
|
12
|
+
}
|
|
13
|
+
const imageSource = positional[0];
|
|
14
|
+
const prompt = options.prompt;
|
|
15
|
+
if (!prompt) {
|
|
16
|
+
console.error('Error: --prompt is required');
|
|
17
|
+
process.exit(1);
|
|
18
|
+
}
|
|
19
|
+
const configService = ConfigService.getInstance();
|
|
20
|
+
const imageProvider = VisionProviderFactory.createProviderWithValidation(config, 'image');
|
|
21
|
+
const imageFileService = new FileService(configService, 'image', imageProvider);
|
|
22
|
+
try {
|
|
23
|
+
const result = await analyze_image({ imageSource, prompt, options: parseOptions(options) }, config, imageProvider, imageFileService);
|
|
24
|
+
console.log(formatOutput(result, 'json' in options));
|
|
25
|
+
}
|
|
26
|
+
catch (error) {
|
|
27
|
+
handleError(error, 'json' in options);
|
|
28
|
+
}
|
|
29
|
+
}
|
|
30
|
+
//# sourceMappingURL=analyze-image.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"analyze-image.js","sourceRoot":"","sources":["../../../src/cli/commands/analyze-image.ts"],"names":[],"mappings":"AACA,OAAO,EAAE,aAAa,EAAE,MAAM,iCAAiC,CAAC;AAChE,OAAO,EAAE,qBAAqB,EAAE,MAAM,4CAA4C,CAAC;AACnF,OAAO,EAAE,WAAW,EAAE,MAAM,+BAA+B,CAAC;AAC5D,OAAO,EAAE,aAAa,EAAE,MAAM,8BAA8B,CAAC;AAC7D,OAAO,EAAE,YAAY,EAAE,YAAY,EAAE,WAAW,EAAE,SAAS,EAAE,MAAM,aAAa,CAAC;AAEjF,MAAM,CAAC,KAAK,UAAU,eAAe,CAAC,IAAc,EAAE,MAAc;IAElE,MAAM,EAAE,UAAU,EAAE,OAAO,EAAE,GAAG,SAAS,CAAC,IAAI,CAAC,CAAC;IAEhD,IAAI,UAAU,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;QAC1B,OAAO,CAAC,KAAK,CAAC,8BAA8B,CAAC,CAAC;QAC9C,OAAO,CAAC,KAAK,CAAC,yDAAyD,CAAC,CAAC;QACzE,OAAO,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;IAClB,CAAC;IAED,MAAM,WAAW,GAAG,UAAU,CAAC,CAAC,CAAC,CAAC;IAClC,MAAM,MAAM,GAAG,OAAO,CAAC,MAAM,CAAC;IAE9B,IAAI,CAAC,MAAM,EAAE,CAAC;QACZ,OAAO,CAAC,KAAK,CAAC,6BAA6B,CAAC,CAAC;QAC7C,OAAO,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;IAClB,CAAC;IAGD,MAAM,aAAa,GAAG,aAAa,CAAC,WAAW,EAAE,CAAC;IAClD,MAAM,aAAa,GAAG,qBAAqB,CAAC,4BAA4B,CAAC,MAAM,EAAE,OAAO,CAAC,CAAC;IAC1F,MAAM,gBAAgB,GAAG,IAAI,WAAW,CAAC,aAAa,EAAE,OAAO,EAAE,aAAoB,CAAC,CAAC;IAGvF,IAAI,CAAC;QACH,MAAM,MAAM,GAAG,MAAM,aAAa,CAChC,EAAE,WAAW,EAAE,MAAM,EAAE,OAAO,EAAE,YAAY,CAAC,OAAO,CAAC,EAAE,EACvD,MAAM,EACN,aAAa,EACb,gBAAgB,CACjB,CAAC;QAEF,OAAO,CAAC,GAAG,CAAC,YAAY,CAAC,MAAM,EAAE,MAAM,IAAI,OAAO,CAAC,CAAC,CAAC;IACvD,CAAC;IAAC,OAAO,KAAK,EAAE,CAAC;QACf,WAAW,CAAC,KAAK,EAAE,MAAM,IAAI,OAAO,CAAC,CAAC;IACxC,CAAC;AACH,CAAC"}
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"analyze-video.d.ts","sourceRoot":"","sources":["../../../src/cli/commands/analyze-video.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,MAAM,EAAE,MAAM,uBAAuB,CAAC;AAOpD,wBAAsB,eAAe,CAAC,IAAI,EAAE,MAAM,EAAE,EAAE,MAAM,EAAE,MAAM,GAAG,OAAO,CAAC,IAAI,CAAC,CAoCnF"}
|
|
@@ -0,0 +1,30 @@
|
|
|
1
|
+
import { ConfigService } from '../../services/ConfigService.js';
|
|
2
|
+
import { VisionProviderFactory } from '../../providers/factory/ProviderFactory.js';
|
|
3
|
+
import { FileService } from '../../services/FileService.js';
|
|
4
|
+
import { analyze_video } from '../../tools/analyze_video.js';
|
|
5
|
+
import { parseOptions, formatOutput, handleError, parseArgs } from '../utils.js';
|
|
6
|
+
export async function runAnalyzeVideo(args, config) {
|
|
7
|
+
const { positional, options } = parseArgs(args);
|
|
8
|
+
if (positional.length < 1) {
|
|
9
|
+
console.error('Error: Video source required');
|
|
10
|
+
console.error('Usage: ai-vision analyze-video <source> --prompt <text>');
|
|
11
|
+
process.exit(1);
|
|
12
|
+
}
|
|
13
|
+
const videoSource = positional[0];
|
|
14
|
+
const prompt = options.prompt;
|
|
15
|
+
if (!prompt) {
|
|
16
|
+
console.error('Error: --prompt is required');
|
|
17
|
+
process.exit(1);
|
|
18
|
+
}
|
|
19
|
+
const configService = ConfigService.getInstance();
|
|
20
|
+
const videoProvider = VisionProviderFactory.createProviderWithValidation(config, 'video');
|
|
21
|
+
const videoFileService = new FileService(configService, 'video', videoProvider);
|
|
22
|
+
try {
|
|
23
|
+
const result = await analyze_video({ videoSource, prompt, options: parseOptions(options) }, config, videoProvider, videoFileService);
|
|
24
|
+
console.log(formatOutput(result, 'json' in options));
|
|
25
|
+
}
|
|
26
|
+
catch (error) {
|
|
27
|
+
handleError(error, 'json' in options);
|
|
28
|
+
}
|
|
29
|
+
}
|
|
30
|
+
//# sourceMappingURL=analyze-video.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"analyze-video.js","sourceRoot":"","sources":["../../../src/cli/commands/analyze-video.ts"],"names":[],"mappings":"AACA,OAAO,EAAE,aAAa,EAAE,MAAM,iCAAiC,CAAC;AAChE,OAAO,EAAE,qBAAqB,EAAE,MAAM,4CAA4C,CAAC;AACnF,OAAO,EAAE,WAAW,EAAE,MAAM,+BAA+B,CAAC;AAC5D,OAAO,EAAE,aAAa,EAAE,MAAM,8BAA8B,CAAC;AAC7D,OAAO,EAAE,YAAY,EAAE,YAAY,EAAE,WAAW,EAAE,SAAS,EAAE,MAAM,aAAa,CAAC;AAEjF,MAAM,CAAC,KAAK,UAAU,eAAe,CAAC,IAAc,EAAE,MAAc;IAElE,MAAM,EAAE,UAAU,EAAE,OAAO,EAAE,GAAG,SAAS,CAAC,IAAI,CAAC,CAAC;IAEhD,IAAI,UAAU,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;QAC1B,OAAO,CAAC,KAAK,CAAC,8BAA8B,CAAC,CAAC;QAC9C,OAAO,CAAC,KAAK,CAAC,yDAAyD,CAAC,CAAC;QACzE,OAAO,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;IAClB,CAAC;IAED,MAAM,WAAW,GAAG,UAAU,CAAC,CAAC,CAAC,CAAC;IAClC,MAAM,MAAM,GAAG,OAAO,CAAC,MAAM,CAAC;IAE9B,IAAI,CAAC,MAAM,EAAE,CAAC;QACZ,OAAO,CAAC,KAAK,CAAC,6BAA6B,CAAC,CAAC;QAC7C,OAAO,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;IAClB,CAAC;IAGD,MAAM,aAAa,GAAG,aAAa,CAAC,WAAW,EAAE,CAAC;IAClD,MAAM,aAAa,GAAG,qBAAqB,CAAC,4BAA4B,CAAC,MAAM,EAAE,OAAO,CAAC,CAAC;IAC1F,MAAM,gBAAgB,GAAG,IAAI,WAAW,CAAC,aAAa,EAAE,OAAO,EAAE,aAAoB,CAAC,CAAC;IAGvF,IAAI,CAAC;QACH,MAAM,MAAM,GAAG,MAAM,aAAa,CAChC,EAAE,WAAW,EAAE,MAAM,EAAE,OAAO,EAAE,YAAY,CAAC,OAAO,CAAC,EAAE,EACvD,MAAM,EACN,aAAa,EACb,gBAAgB,CACjB,CAAC;QAEF,OAAO,CAAC,GAAG,CAAC,YAAY,CAAC,MAAM,EAAE,MAAM,IAAI,OAAO,CAAC,CAAC,CAAC;IACvD,CAAC;IAAC,OAAO,KAAK,EAAE,CAAC;QACf,WAAW,CAAC,KAAK,EAAE,MAAM,IAAI,OAAO,CAAC,CAAC;IACxC,CAAC;AACH,CAAC"}
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"compare-images.d.ts","sourceRoot":"","sources":["../../../src/cli/commands/compare-images.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,MAAM,EAAE,MAAM,uBAAuB,CAAC;AAOpD,wBAAsB,gBAAgB,CAAC,IAAI,EAAE,MAAM,EAAE,EAAE,MAAM,EAAE,MAAM,GAAG,OAAO,CAAC,IAAI,CAAC,CAoCpF"}
|
|
@@ -0,0 +1,30 @@
|
|
|
1
|
+
import { ConfigService } from '../../services/ConfigService.js';
|
|
2
|
+
import { VisionProviderFactory } from '../../providers/factory/ProviderFactory.js';
|
|
3
|
+
import { FileService } from '../../services/FileService.js';
|
|
4
|
+
import { compare_images } from '../../tools/compare_images.js';
|
|
5
|
+
import { parseOptions, formatOutput, handleError, parseArgs } from '../utils.js';
|
|
6
|
+
export async function runCompareImages(args, config) {
|
|
7
|
+
const { positional, options } = parseArgs(args);
|
|
8
|
+
if (positional.length < 2) {
|
|
9
|
+
console.error('Error: At least 2 image sources required');
|
|
10
|
+
console.error('Usage: ai-vision compare-images <source1> <source2> [source3...] --prompt <text>');
|
|
11
|
+
process.exit(1);
|
|
12
|
+
}
|
|
13
|
+
const imageSources = positional;
|
|
14
|
+
const prompt = options.prompt;
|
|
15
|
+
if (!prompt) {
|
|
16
|
+
console.error('Error: --prompt is required');
|
|
17
|
+
process.exit(1);
|
|
18
|
+
}
|
|
19
|
+
const configService = ConfigService.getInstance();
|
|
20
|
+
const imageProvider = VisionProviderFactory.createProviderWithValidation(config, 'image');
|
|
21
|
+
const imageFileService = new FileService(configService, 'image', imageProvider);
|
|
22
|
+
try {
|
|
23
|
+
const result = await compare_images({ imageSources, prompt, options: parseOptions(options) }, config, imageProvider, imageFileService);
|
|
24
|
+
console.log(formatOutput(result, 'json' in options));
|
|
25
|
+
}
|
|
26
|
+
catch (error) {
|
|
27
|
+
handleError(error, 'json' in options);
|
|
28
|
+
}
|
|
29
|
+
}
|
|
30
|
+
//# sourceMappingURL=compare-images.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"compare-images.js","sourceRoot":"","sources":["../../../src/cli/commands/compare-images.ts"],"names":[],"mappings":"AACA,OAAO,EAAE,aAAa,EAAE,MAAM,iCAAiC,CAAC;AAChE,OAAO,EAAE,qBAAqB,EAAE,MAAM,4CAA4C,CAAC;AACnF,OAAO,EAAE,WAAW,EAAE,MAAM,+BAA+B,CAAC;AAC5D,OAAO,EAAE,cAAc,EAAE,MAAM,+BAA+B,CAAC;AAC/D,OAAO,EAAE,YAAY,EAAE,YAAY,EAAE,WAAW,EAAE,SAAS,EAAE,MAAM,aAAa,CAAC;AAEjF,MAAM,CAAC,KAAK,UAAU,gBAAgB,CAAC,IAAc,EAAE,MAAc;IAEnE,MAAM,EAAE,UAAU,EAAE,OAAO,EAAE,GAAG,SAAS,CAAC,IAAI,CAAC,CAAC;IAEhD,IAAI,UAAU,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;QAC1B,OAAO,CAAC,KAAK,CAAC,0CAA0C,CAAC,CAAC;QAC1D,OAAO,CAAC,KAAK,CAAC,kFAAkF,CAAC,CAAC;QAClG,OAAO,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;IAClB,CAAC;IAED,MAAM,YAAY,GAAG,UAAU,CAAC;IAChC,MAAM,MAAM,GAAG,OAAO,CAAC,MAAM,CAAC;IAE9B,IAAI,CAAC,MAAM,EAAE,CAAC;QACZ,OAAO,CAAC,KAAK,CAAC,6BAA6B,CAAC,CAAC;QAC7C,OAAO,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;IAClB,CAAC;IAGD,MAAM,aAAa,GAAG,aAAa,CAAC,WAAW,EAAE,CAAC;IAClD,MAAM,aAAa,GAAG,qBAAqB,CAAC,4BAA4B,CAAC,MAAM,EAAE,OAAO,CAAC,CAAC;IAC1F,MAAM,gBAAgB,GAAG,IAAI,WAAW,CAAC,aAAa,EAAE,OAAO,EAAE,aAAoB,CAAC,CAAC;IAGvF,IAAI,CAAC;QACH,MAAM,MAAM,GAAG,MAAM,cAAc,CACjC,EAAE,YAAY,EAAE,MAAM,EAAE,OAAO,EAAE,YAAY,CAAC,OAAO,CAAC,EAAE,EACxD,MAAM,EACN,aAAa,EACb,gBAAgB,CACjB,CAAC;QAEF,OAAO,CAAC,GAAG,CAAC,YAAY,CAAC,MAAM,EAAE,MAAM,IAAI,OAAO,CAAC,CAAC,CAAC;IACvD,CAAC;IAAC,OAAO,KAAK,EAAE,CAAC;QACf,WAAW,CAAC,KAAK,EAAE,MAAM,IAAI,OAAO,CAAC,CAAC;IACxC,CAAC;AACH,CAAC"}
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"detect-objects.d.ts","sourceRoot":"","sources":["../../../src/cli/commands/detect-objects.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,MAAM,EAAE,MAAM,uBAAuB,CAAC;AAOpD,wBAAsB,gBAAgB,CAAC,IAAI,EAAE,MAAM,EAAE,EAAE,MAAM,EAAE,MAAM,GAAG,OAAO,CAAC,IAAI,CAAC,CAqCpF"}
|
|
@@ -0,0 +1,31 @@
|
|
|
1
|
+
import { ConfigService } from '../../services/ConfigService.js';
|
|
2
|
+
import { VisionProviderFactory } from '../../providers/factory/ProviderFactory.js';
|
|
3
|
+
import { FileService } from '../../services/FileService.js';
|
|
4
|
+
import { detect_objects_in_image } from '../../tools/detect_objects_in_image.js';
|
|
5
|
+
import { parseOptions, formatOutput, handleError, parseArgs } from '../utils.js';
|
|
6
|
+
export async function runDetectObjects(args, config) {
|
|
7
|
+
const { positional, options } = parseArgs(args);
|
|
8
|
+
if (positional.length < 1) {
|
|
9
|
+
console.error('Error: Image source required');
|
|
10
|
+
console.error('Usage: ai-vision detect-objects <source> --prompt <text>');
|
|
11
|
+
process.exit(1);
|
|
12
|
+
}
|
|
13
|
+
const imageSource = positional[0];
|
|
14
|
+
const prompt = options.prompt;
|
|
15
|
+
const outputFilePath = options.output;
|
|
16
|
+
if (!prompt) {
|
|
17
|
+
console.error('Error: --prompt is required');
|
|
18
|
+
process.exit(1);
|
|
19
|
+
}
|
|
20
|
+
const configService = ConfigService.getInstance();
|
|
21
|
+
const imageProvider = VisionProviderFactory.createProviderWithValidation(config, 'image');
|
|
22
|
+
const imageFileService = new FileService(configService, 'image', imageProvider);
|
|
23
|
+
try {
|
|
24
|
+
const result = await detect_objects_in_image({ imageSource, prompt, outputFilePath, options: parseOptions(options) }, config, imageProvider, imageFileService);
|
|
25
|
+
console.log(formatOutput(result, 'json' in options));
|
|
26
|
+
}
|
|
27
|
+
catch (error) {
|
|
28
|
+
handleError(error, 'json' in options);
|
|
29
|
+
}
|
|
30
|
+
}
|
|
31
|
+
//# sourceMappingURL=detect-objects.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"detect-objects.js","sourceRoot":"","sources":["../../../src/cli/commands/detect-objects.ts"],"names":[],"mappings":"AACA,OAAO,EAAE,aAAa,EAAE,MAAM,iCAAiC,CAAC;AAChE,OAAO,EAAE,qBAAqB,EAAE,MAAM,4CAA4C,CAAC;AACnF,OAAO,EAAE,WAAW,EAAE,MAAM,+BAA+B,CAAC;AAC5D,OAAO,EAAE,uBAAuB,EAAE,MAAM,wCAAwC,CAAC;AACjF,OAAO,EAAE,YAAY,EAAE,YAAY,EAAE,WAAW,EAAE,SAAS,EAAE,MAAM,aAAa,CAAC;AAEjF,MAAM,CAAC,KAAK,UAAU,gBAAgB,CAAC,IAAc,EAAE,MAAc;IAEnE,MAAM,EAAE,UAAU,EAAE,OAAO,EAAE,GAAG,SAAS,CAAC,IAAI,CAAC,CAAC;IAEhD,IAAI,UAAU,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;QAC1B,OAAO,CAAC,KAAK,CAAC,8BAA8B,CAAC,CAAC;QAC9C,OAAO,CAAC,KAAK,CAAC,0DAA0D,CAAC,CAAC;QAC1E,OAAO,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;IAClB,CAAC;IAED,MAAM,WAAW,GAAG,UAAU,CAAC,CAAC,CAAC,CAAC;IAClC,MAAM,MAAM,GAAG,OAAO,CAAC,MAAM,CAAC;IAC9B,MAAM,cAAc,GAAG,OAAO,CAAC,MAAM,CAAC;IAEtC,IAAI,CAAC,MAAM,EAAE,CAAC;QACZ,OAAO,CAAC,KAAK,CAAC,6BAA6B,CAAC,CAAC;QAC7C,OAAO,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;IAClB,CAAC;IAGD,MAAM,aAAa,GAAG,aAAa,CAAC,WAAW,EAAE,CAAC;IAClD,MAAM,aAAa,GAAG,qBAAqB,CAAC,4BAA4B,CAAC,MAAM,EAAE,OAAO,CAAC,CAAC;IAC1F,MAAM,gBAAgB,GAAG,IAAI,WAAW,CAAC,aAAa,EAAE,OAAO,EAAE,aAAoB,CAAC,CAAC;IAGvF,IAAI,CAAC;QACH,MAAM,MAAM,GAAG,MAAM,uBAAuB,CAC1C,EAAE,WAAW,EAAE,MAAM,EAAE,cAAc,EAAE,OAAO,EAAE,YAAY,CAAC,OAAO,CAAC,EAAE,EACvE,MAAM,EACN,aAAa,EACb,gBAAgB,CACjB,CAAC;QAEF,OAAO,CAAC,GAAG,CAAC,YAAY,CAAC,MAAM,EAAE,MAAM,IAAI,OAAO,CAAC,CAAC,CAAC;IACvD,CAAC;IAAC,OAAO,KAAK,EAAE,CAAC;QACf,WAAW,CAAC,KAAK,EAAE,MAAM,IAAI,OAAO,CAAC,CAAC;IACxC,CAAC;AACH,CAAC"}
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../../src/cli/index.ts"],"names":[],"mappings":";AAQA,wBAAsB,MAAM,CAAC,IAAI,EAAE,MAAM,EAAE,GAAG,OAAO,CAAC,IAAI,CAAC,CA+B1D"}
|
|
@@ -0,0 +1,63 @@
|
|
|
1
|
+
#!/usr/bin/env node
|
|
2
|
+
import { ConfigService } from '../services/ConfigService.js';
|
|
3
|
+
import { runAnalyzeImage } from './commands/analyze-image.js';
|
|
4
|
+
import { runCompareImages } from './commands/compare-images.js';
|
|
5
|
+
import { runDetectObjects } from './commands/detect-objects.js';
|
|
6
|
+
import { runAnalyzeVideo } from './commands/analyze-video.js';
|
|
7
|
+
export async function runCli(args) {
|
|
8
|
+
const command = args[0];
|
|
9
|
+
const commandArgs = args.slice(1);
|
|
10
|
+
const configService = ConfigService.getInstance();
|
|
11
|
+
const config = configService.getConfig();
|
|
12
|
+
switch (command) {
|
|
13
|
+
case 'analyze-image':
|
|
14
|
+
await runAnalyzeImage(commandArgs, config);
|
|
15
|
+
break;
|
|
16
|
+
case 'compare-images':
|
|
17
|
+
await runCompareImages(commandArgs, config);
|
|
18
|
+
break;
|
|
19
|
+
case 'detect-objects':
|
|
20
|
+
await runDetectObjects(commandArgs, config);
|
|
21
|
+
break;
|
|
22
|
+
case 'analyze-video':
|
|
23
|
+
await runAnalyzeVideo(commandArgs, config);
|
|
24
|
+
break;
|
|
25
|
+
case 'help':
|
|
26
|
+
case '--help':
|
|
27
|
+
case '-h':
|
|
28
|
+
showHelp();
|
|
29
|
+
break;
|
|
30
|
+
default:
|
|
31
|
+
console.error(`Unknown command: ${command}`);
|
|
32
|
+
showHelp();
|
|
33
|
+
process.exit(1);
|
|
34
|
+
}
|
|
35
|
+
}
|
|
36
|
+
function showHelp() {
|
|
37
|
+
console.log(`
|
|
38
|
+
ai-vision CLI
|
|
39
|
+
|
|
40
|
+
Usage: ai-vision <command> [options]
|
|
41
|
+
|
|
42
|
+
Commands:
|
|
43
|
+
analyze-image <source> Analyze an image
|
|
44
|
+
compare-images <sources...> Compare multiple images (2-4)
|
|
45
|
+
detect-objects <source> Detect objects in an image
|
|
46
|
+
analyze-video <source> Analyze a video
|
|
47
|
+
|
|
48
|
+
Global Options:
|
|
49
|
+
--prompt <text> The analysis prompt (required)
|
|
50
|
+
--json Output raw JSON
|
|
51
|
+
--temperature <num> Temperature 0-2 (default: 0.7)
|
|
52
|
+
--top-p <num> Top P 0-1
|
|
53
|
+
--top-k <num> Top K 1-100
|
|
54
|
+
--max-tokens <num> Max output tokens
|
|
55
|
+
--help Show this help
|
|
56
|
+
|
|
57
|
+
Examples:
|
|
58
|
+
ai-vision analyze-image https://example.com/img.jpg --prompt "describe"
|
|
59
|
+
ai-vision compare-images img1.jpg img2.jpg --prompt "find differences" --json
|
|
60
|
+
ai-vision detect-objects photo.jpg --prompt "find all cars" --output annotated.jpg
|
|
61
|
+
`);
|
|
62
|
+
}
|
|
63
|
+
//# sourceMappingURL=index.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"index.js","sourceRoot":"","sources":["../../src/cli/index.ts"],"names":[],"mappings":";AAEA,OAAO,EAAE,aAAa,EAAE,MAAM,8BAA8B,CAAC;AAC7D,OAAO,EAAE,eAAe,EAAE,MAAM,6BAA6B,CAAC;AAC9D,OAAO,EAAE,gBAAgB,EAAE,MAAM,8BAA8B,CAAC;AAChE,OAAO,EAAE,gBAAgB,EAAE,MAAM,8BAA8B,CAAC;AAChE,OAAO,EAAE,eAAe,EAAE,MAAM,6BAA6B,CAAC;AAE9D,MAAM,CAAC,KAAK,UAAU,MAAM,CAAC,IAAc;IACzC,MAAM,OAAO,GAAG,IAAI,CAAC,CAAC,CAAC,CAAC;IACxB,MAAM,WAAW,GAAG,IAAI,CAAC,KAAK,CAAC,CAAC,CAAC,CAAC;IAGlC,MAAM,aAAa,GAAG,aAAa,CAAC,WAAW,EAAE,CAAC;IAClD,MAAM,MAAM,GAAG,aAAa,CAAC,SAAS,EAAE,CAAC;IAEzC,QAAQ,OAAO,EAAE,CAAC;QAChB,KAAK,eAAe;YAClB,MAAM,eAAe,CAAC,WAAW,EAAE,MAAM,CAAC,CAAC;YAC3C,MAAM;QACR,KAAK,gBAAgB;YACnB,MAAM,gBAAgB,CAAC,WAAW,EAAE,MAAM,CAAC,CAAC;YAC5C,MAAM;QACR,KAAK,gBAAgB;YACnB,MAAM,gBAAgB,CAAC,WAAW,EAAE,MAAM,CAAC,CAAC;YAC5C,MAAM;QACR,KAAK,eAAe;YAClB,MAAM,eAAe,CAAC,WAAW,EAAE,MAAM,CAAC,CAAC;YAC3C,MAAM;QACR,KAAK,MAAM,CAAC;QACZ,KAAK,QAAQ,CAAC;QACd,KAAK,IAAI;YACP,QAAQ,EAAE,CAAC;YACX,MAAM;QACR;YACE,OAAO,CAAC,KAAK,CAAC,oBAAoB,OAAO,EAAE,CAAC,CAAC;YAC7C,QAAQ,EAAE,CAAC;YACX,OAAO,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;IACpB,CAAC;AACH,CAAC;AAED,SAAS,QAAQ;IACf,OAAO,CAAC,GAAG,CAAC;;;;;;;;;;;;;;;;;;;;;;;;CAwBb,CAAC,CAAC;AACH,CAAC"}
|
|
@@ -0,0 +1,17 @@
|
|
|
1
|
+
import type { AnalysisResult } from '../types/Providers.js';
|
|
2
|
+
import type { ObjectDetectionResponse } from '../types/ObjectDetection.js';
|
|
3
|
+
import type { VideoMetadata } from '../types/Analysis.js';
|
|
4
|
+
export declare function parseOptions(options: Record<string, string>): {
|
|
5
|
+
temperature?: number;
|
|
6
|
+
topP?: number;
|
|
7
|
+
topK?: number;
|
|
8
|
+
maxTokens?: number;
|
|
9
|
+
videoMetadata?: VideoMetadata;
|
|
10
|
+
};
|
|
11
|
+
export declare function formatOutput(result: AnalysisResult | ObjectDetectionResponse, jsonMode?: boolean): string;
|
|
12
|
+
export declare function handleError(error: unknown, jsonMode?: boolean): never;
|
|
13
|
+
export declare function parseArgs(args: string[]): {
|
|
14
|
+
positional: string[];
|
|
15
|
+
options: Record<string, string>;
|
|
16
|
+
};
|
|
17
|
+
//# sourceMappingURL=utils.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"utils.d.ts","sourceRoot":"","sources":["../../src/cli/utils.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,cAAc,EAAE,MAAM,uBAAuB,CAAC;AAC5D,OAAO,KAAK,EAAE,uBAAuB,EAAE,MAAM,6BAA6B,CAAC;AAC3E,OAAO,KAAK,EAAE,aAAa,EAAE,MAAM,sBAAsB,CAAC;AAE1D,wBAAgB,YAAY,CAAC,OAAO,EAAE,MAAM,CAAC,MAAM,EAAE,MAAM,CAAC,GAAG;IAC7D,WAAW,CAAC,EAAE,MAAM,CAAC;IACrB,IAAI,CAAC,EAAE,MAAM,CAAC;IACd,IAAI,CAAC,EAAE,MAAM,CAAC;IACd,SAAS,CAAC,EAAE,MAAM,CAAC;IACnB,aAAa,CAAC,EAAE,aAAa,CAAC;CAC/B,CA2CA;AAED,wBAAgB,YAAY,CAC1B,MAAM,EAAE,cAAc,GAAG,uBAAuB,EAChD,QAAQ,CAAC,EAAE,OAAO,GACjB,MAAM,CAeR;AAED,wBAAgB,WAAW,CAAC,KAAK,EAAE,OAAO,EAAE,QAAQ,CAAC,EAAE,OAAO,GAAG,KAAK,CAiBrE;AAED,wBAAgB,SAAS,CAAC,IAAI,EAAE,MAAM,EAAE,GAAG;IAAE,UAAU,EAAE,MAAM,EAAE,CAAC;IAAC,OAAO,EAAE,MAAM,CAAC,MAAM,EAAE,MAAM,CAAC,CAAA;CAAE,CAuBnG"}
|
|
@@ -0,0 +1,85 @@
|
|
|
1
|
+
export function parseOptions(options) {
|
|
2
|
+
const result = {};
|
|
3
|
+
if (options.temperature) {
|
|
4
|
+
result.temperature = parseFloat(options.temperature);
|
|
5
|
+
}
|
|
6
|
+
if (options['top-p'] || options.topP) {
|
|
7
|
+
result.topP = parseFloat(options['top-p'] || options.topP);
|
|
8
|
+
}
|
|
9
|
+
if (options['top-k'] || options.topK) {
|
|
10
|
+
result.topK = parseInt(options['top-k'] || options.topK);
|
|
11
|
+
}
|
|
12
|
+
if (options['max-tokens'] || options.maxTokens) {
|
|
13
|
+
result.maxTokens = parseInt(options['max-tokens'] || options.maxTokens);
|
|
14
|
+
}
|
|
15
|
+
const videoMetadata = {};
|
|
16
|
+
if (options['start-offset'] || options.startOffset) {
|
|
17
|
+
videoMetadata.startOffset = options['start-offset'] || options.startOffset;
|
|
18
|
+
}
|
|
19
|
+
if (options['end-offset'] || options.endOffset) {
|
|
20
|
+
videoMetadata.endOffset = options['end-offset'] || options.endOffset;
|
|
21
|
+
}
|
|
22
|
+
if (options.fps) {
|
|
23
|
+
const fpsValue = parseFloat(options.fps);
|
|
24
|
+
if (!isNaN(fpsValue)) {
|
|
25
|
+
videoMetadata.fps = fpsValue;
|
|
26
|
+
}
|
|
27
|
+
}
|
|
28
|
+
if (videoMetadata.startOffset !== undefined || videoMetadata.endOffset !== undefined || videoMetadata.fps !== undefined) {
|
|
29
|
+
result.videoMetadata = videoMetadata;
|
|
30
|
+
}
|
|
31
|
+
return result;
|
|
32
|
+
}
|
|
33
|
+
export function formatOutput(result, jsonMode) {
|
|
34
|
+
if (jsonMode) {
|
|
35
|
+
return JSON.stringify(result, null, 2);
|
|
36
|
+
}
|
|
37
|
+
if ('text' in result) {
|
|
38
|
+
return result.text;
|
|
39
|
+
}
|
|
40
|
+
if ('detections' in result) {
|
|
41
|
+
return result.summary || JSON.stringify(result.detections, null, 2);
|
|
42
|
+
}
|
|
43
|
+
return JSON.stringify(result, null, 2);
|
|
44
|
+
}
|
|
45
|
+
export function handleError(error, jsonMode) {
|
|
46
|
+
if (jsonMode) {
|
|
47
|
+
const errorResponse = {
|
|
48
|
+
error: true,
|
|
49
|
+
message: error instanceof Error ? error.message : 'An unknown error occurred',
|
|
50
|
+
};
|
|
51
|
+
console.log(JSON.stringify(errorResponse, null, 2));
|
|
52
|
+
}
|
|
53
|
+
else {
|
|
54
|
+
if (error instanceof Error) {
|
|
55
|
+
console.error(`Error: ${error.message}`);
|
|
56
|
+
}
|
|
57
|
+
else {
|
|
58
|
+
console.error('An unknown error occurred');
|
|
59
|
+
}
|
|
60
|
+
}
|
|
61
|
+
process.exit(1);
|
|
62
|
+
}
|
|
63
|
+
export function parseArgs(args) {
|
|
64
|
+
const positional = [];
|
|
65
|
+
const options = {};
|
|
66
|
+
for (let i = 0; i < args.length; i++) {
|
|
67
|
+
const arg = args[i];
|
|
68
|
+
if (arg.startsWith('--')) {
|
|
69
|
+
const key = arg.replace('--', '');
|
|
70
|
+
const nextArg = args[i + 1];
|
|
71
|
+
if (nextArg !== undefined && !nextArg.startsWith('--')) {
|
|
72
|
+
options[key] = nextArg;
|
|
73
|
+
i++;
|
|
74
|
+
}
|
|
75
|
+
else {
|
|
76
|
+
options[key] = '';
|
|
77
|
+
}
|
|
78
|
+
}
|
|
79
|
+
else {
|
|
80
|
+
positional.push(arg);
|
|
81
|
+
}
|
|
82
|
+
}
|
|
83
|
+
return { positional, options };
|
|
84
|
+
}
|
|
85
|
+
//# sourceMappingURL=utils.js.map
|