isa-model 0.3.91__py3-none-any.whl → 0.4.3__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- isa_model/client.py +1166 -584
- isa_model/core/cache/redis_cache.py +410 -0
- isa_model/core/config/config_manager.py +282 -12
- isa_model/core/config.py +91 -1
- isa_model/core/database/__init__.py +1 -0
- isa_model/core/database/direct_db_client.py +114 -0
- isa_model/core/database/migration_manager.py +563 -0
- isa_model/core/database/migrations.py +297 -0
- isa_model/core/database/supabase_client.py +258 -0
- isa_model/core/dependencies.py +316 -0
- isa_model/core/discovery/__init__.py +19 -0
- isa_model/core/discovery/consul_discovery.py +190 -0
- isa_model/core/logging/__init__.py +54 -0
- isa_model/core/logging/influx_logger.py +523 -0
- isa_model/core/logging/loki_logger.py +160 -0
- isa_model/core/models/__init__.py +46 -0
- isa_model/core/models/config_models.py +625 -0
- isa_model/core/models/deployment_billing_tracker.py +430 -0
- isa_model/core/models/model_billing_tracker.py +60 -88
- isa_model/core/models/model_manager.py +66 -25
- isa_model/core/models/model_metadata.py +690 -0
- isa_model/core/models/model_repo.py +217 -55
- isa_model/core/models/model_statistics_tracker.py +234 -0
- isa_model/core/models/model_storage.py +0 -1
- isa_model/core/models/model_version_manager.py +959 -0
- isa_model/core/models/system_models.py +857 -0
- isa_model/core/pricing_manager.py +2 -249
- isa_model/core/repositories/__init__.py +9 -0
- isa_model/core/repositories/config_repository.py +912 -0
- isa_model/core/resilience/circuit_breaker.py +366 -0
- isa_model/core/security/secrets.py +358 -0
- isa_model/core/services/__init__.py +2 -4
- isa_model/core/services/intelligent_model_selector.py +479 -370
- isa_model/core/storage/hf_storage.py +2 -2
- isa_model/core/types.py +8 -0
- isa_model/deployment/__init__.py +5 -48
- isa_model/deployment/core/__init__.py +2 -31
- isa_model/deployment/core/deployment_manager.py +1278 -368
- isa_model/deployment/local/__init__.py +31 -0
- isa_model/deployment/local/config.py +248 -0
- isa_model/deployment/local/gpu_gateway.py +607 -0
- isa_model/deployment/local/health_checker.py +428 -0
- isa_model/deployment/local/provider.py +586 -0
- isa_model/deployment/local/tensorrt_service.py +621 -0
- isa_model/deployment/local/transformers_service.py +644 -0
- isa_model/deployment/local/vllm_service.py +527 -0
- isa_model/deployment/modal/__init__.py +8 -0
- isa_model/deployment/modal/config.py +136 -0
- isa_model/deployment/modal/deployer.py +894 -0
- isa_model/deployment/modal/services/__init__.py +3 -0
- isa_model/deployment/modal/services/audio/__init__.py +1 -0
- isa_model/deployment/modal/services/audio/isa_audio_chatTTS_service.py +520 -0
- isa_model/deployment/modal/services/audio/isa_audio_openvoice_service.py +758 -0
- isa_model/deployment/modal/services/audio/isa_audio_service_v2.py +1044 -0
- isa_model/deployment/modal/services/embedding/__init__.py +1 -0
- isa_model/deployment/modal/services/embedding/isa_embed_rerank_service.py +296 -0
- isa_model/deployment/modal/services/llm/__init__.py +1 -0
- isa_model/deployment/modal/services/llm/isa_llm_service.py +424 -0
- isa_model/deployment/modal/services/video/__init__.py +1 -0
- isa_model/deployment/modal/services/video/isa_video_hunyuan_service.py +423 -0
- isa_model/deployment/modal/services/vision/__init__.py +1 -0
- isa_model/deployment/modal/services/vision/isa_vision_ocr_service.py +519 -0
- isa_model/deployment/modal/services/vision/isa_vision_qwen25_service.py +709 -0
- isa_model/deployment/modal/services/vision/isa_vision_table_service.py +676 -0
- isa_model/deployment/modal/services/vision/isa_vision_ui_service.py +833 -0
- isa_model/deployment/modal/services/vision/isa_vision_ui_service_optimized.py +660 -0
- isa_model/deployment/models/org-org-acme-corp-tenant-a-service-llm-20250825-225822/tenant-a-service_modal_service.py +48 -0
- isa_model/deployment/models/org-test-org-123-prefix-test-service-llm-20250825-225822/prefix-test-service_modal_service.py +48 -0
- isa_model/deployment/models/test-llm-service-llm-20250825-204442/test-llm-service_modal_service.py +48 -0
- isa_model/deployment/models/test-monitoring-gpt2-llm-20250825-212906/test-monitoring-gpt2_modal_service.py +48 -0
- isa_model/deployment/models/test-monitoring-gpt2-llm-20250825-213009/test-monitoring-gpt2_modal_service.py +48 -0
- isa_model/deployment/storage/__init__.py +5 -0
- isa_model/deployment/storage/deployment_repository.py +824 -0
- isa_model/deployment/triton/__init__.py +10 -0
- isa_model/deployment/triton/config.py +196 -0
- isa_model/deployment/triton/configs/__init__.py +1 -0
- isa_model/deployment/triton/provider.py +512 -0
- isa_model/deployment/triton/scripts/__init__.py +1 -0
- isa_model/deployment/triton/templates/__init__.py +1 -0
- isa_model/inference/__init__.py +47 -1
- isa_model/inference/ai_factory.py +179 -16
- isa_model/inference/legacy_services/__init__.py +21 -0
- isa_model/inference/legacy_services/model_evaluation.py +637 -0
- isa_model/inference/legacy_services/model_service.py +573 -0
- isa_model/inference/legacy_services/model_serving.py +717 -0
- isa_model/inference/legacy_services/model_training.py +561 -0
- isa_model/inference/models/__init__.py +21 -0
- isa_model/inference/models/inference_config.py +551 -0
- isa_model/inference/models/inference_record.py +675 -0
- isa_model/inference/models/performance_models.py +714 -0
- isa_model/inference/repositories/__init__.py +9 -0
- isa_model/inference/repositories/inference_repository.py +828 -0
- isa_model/inference/services/audio/__init__.py +21 -0
- isa_model/inference/services/audio/base_realtime_service.py +225 -0
- isa_model/inference/services/audio/base_stt_service.py +184 -11
- isa_model/inference/services/audio/isa_tts_service.py +0 -0
- isa_model/inference/services/audio/openai_realtime_service.py +320 -124
- isa_model/inference/services/audio/openai_stt_service.py +53 -11
- isa_model/inference/services/base_service.py +17 -1
- isa_model/inference/services/custom_model_manager.py +277 -0
- isa_model/inference/services/embedding/__init__.py +13 -0
- isa_model/inference/services/embedding/base_embed_service.py +111 -8
- isa_model/inference/services/embedding/isa_embed_service.py +305 -0
- isa_model/inference/services/embedding/ollama_embed_service.py +15 -3
- isa_model/inference/services/embedding/openai_embed_service.py +2 -4
- isa_model/inference/services/embedding/resilient_embed_service.py +285 -0
- isa_model/inference/services/embedding/tests/test_embedding.py +222 -0
- isa_model/inference/services/img/__init__.py +2 -2
- isa_model/inference/services/img/base_image_gen_service.py +24 -7
- isa_model/inference/services/img/replicate_image_gen_service.py +84 -422
- isa_model/inference/services/img/services/replicate_face_swap.py +193 -0
- isa_model/inference/services/img/services/replicate_flux.py +226 -0
- isa_model/inference/services/img/services/replicate_flux_kontext.py +219 -0
- isa_model/inference/services/img/services/replicate_sticker_maker.py +249 -0
- isa_model/inference/services/img/tests/test_img_client.py +297 -0
- isa_model/inference/services/llm/__init__.py +10 -2
- isa_model/inference/services/llm/base_llm_service.py +361 -26
- isa_model/inference/services/llm/cerebras_llm_service.py +628 -0
- isa_model/inference/services/llm/helpers/llm_adapter.py +71 -12
- isa_model/inference/services/llm/helpers/llm_prompts.py +342 -0
- isa_model/inference/services/llm/helpers/llm_utils.py +321 -23
- isa_model/inference/services/llm/huggingface_llm_service.py +581 -0
- isa_model/inference/services/llm/local_llm_service.py +747 -0
- isa_model/inference/services/llm/ollama_llm_service.py +11 -3
- isa_model/inference/services/llm/openai_llm_service.py +670 -56
- isa_model/inference/services/llm/yyds_llm_service.py +10 -3
- isa_model/inference/services/vision/__init__.py +27 -6
- isa_model/inference/services/vision/base_vision_service.py +118 -185
- isa_model/inference/services/vision/blip_vision_service.py +359 -0
- isa_model/inference/services/vision/helpers/image_utils.py +19 -10
- isa_model/inference/services/vision/isa_vision_service.py +634 -0
- isa_model/inference/services/vision/openai_vision_service.py +19 -10
- isa_model/inference/services/vision/tests/test_ocr_client.py +284 -0
- isa_model/inference/services/vision/vgg16_vision_service.py +257 -0
- isa_model/serving/api/cache_manager.py +245 -0
- isa_model/serving/api/dependencies/__init__.py +1 -0
- isa_model/serving/api/dependencies/auth.py +194 -0
- isa_model/serving/api/dependencies/database.py +139 -0
- isa_model/serving/api/error_handlers.py +284 -0
- isa_model/serving/api/fastapi_server.py +240 -18
- isa_model/serving/api/middleware/auth.py +317 -0
- isa_model/serving/api/middleware/security.py +268 -0
- isa_model/serving/api/middleware/tenant_context.py +414 -0
- isa_model/serving/api/routes/analytics.py +489 -0
- isa_model/serving/api/routes/config.py +645 -0
- isa_model/serving/api/routes/deployment_billing.py +315 -0
- isa_model/serving/api/routes/deployments.py +475 -0
- isa_model/serving/api/routes/gpu_gateway.py +440 -0
- isa_model/serving/api/routes/health.py +32 -12
- isa_model/serving/api/routes/inference_monitoring.py +486 -0
- isa_model/serving/api/routes/local_deployments.py +448 -0
- isa_model/serving/api/routes/logs.py +430 -0
- isa_model/serving/api/routes/settings.py +582 -0
- isa_model/serving/api/routes/tenants.py +575 -0
- isa_model/serving/api/routes/unified.py +992 -171
- isa_model/serving/api/routes/webhooks.py +479 -0
- isa_model/serving/api/startup.py +318 -0
- isa_model/serving/modal_proxy_server.py +249 -0
- isa_model/utils/gpu_utils.py +311 -0
- {isa_model-0.3.91.dist-info → isa_model-0.4.3.dist-info}/METADATA +76 -22
- isa_model-0.4.3.dist-info/RECORD +193 -0
- isa_model/deployment/cloud/__init__.py +0 -9
- isa_model/deployment/cloud/modal/__init__.py +0 -10
- isa_model/deployment/cloud/modal/isa_vision_doc_service.py +0 -766
- isa_model/deployment/cloud/modal/isa_vision_table_service.py +0 -532
- isa_model/deployment/cloud/modal/isa_vision_ui_service.py +0 -406
- isa_model/deployment/cloud/modal/register_models.py +0 -321
- isa_model/deployment/core/deployment_config.py +0 -356
- isa_model/deployment/core/isa_deployment_service.py +0 -401
- isa_model/deployment/gpu_int8_ds8/app/server.py +0 -66
- isa_model/deployment/gpu_int8_ds8/scripts/test_client.py +0 -43
- isa_model/deployment/gpu_int8_ds8/scripts/test_client_os.py +0 -35
- isa_model/deployment/runtime/deployed_service.py +0 -338
- isa_model/deployment/services/__init__.py +0 -9
- isa_model/deployment/services/auto_deploy_vision_service.py +0 -538
- isa_model/deployment/services/model_service.py +0 -332
- isa_model/deployment/services/service_monitor.py +0 -356
- isa_model/deployment/services/service_registry.py +0 -527
- isa_model/eval/__init__.py +0 -92
- isa_model/eval/benchmarks.py +0 -469
- isa_model/eval/config/__init__.py +0 -10
- isa_model/eval/config/evaluation_config.py +0 -108
- isa_model/eval/evaluators/__init__.py +0 -18
- isa_model/eval/evaluators/base_evaluator.py +0 -503
- isa_model/eval/evaluators/llm_evaluator.py +0 -472
- isa_model/eval/factory.py +0 -531
- isa_model/eval/infrastructure/__init__.py +0 -24
- isa_model/eval/infrastructure/experiment_tracker.py +0 -466
- isa_model/eval/metrics.py +0 -798
- isa_model/inference/adapter/unified_api.py +0 -248
- isa_model/inference/services/helpers/stacked_config.py +0 -148
- isa_model/inference/services/img/flux_professional_service.py +0 -603
- isa_model/inference/services/img/helpers/base_stacked_service.py +0 -274
- isa_model/inference/services/others/table_transformer_service.py +0 -61
- isa_model/inference/services/vision/doc_analysis_service.py +0 -640
- isa_model/inference/services/vision/helpers/base_stacked_service.py +0 -274
- isa_model/inference/services/vision/ui_analysis_service.py +0 -823
- isa_model/scripts/inference_tracker.py +0 -283
- isa_model/scripts/mlflow_manager.py +0 -379
- isa_model/scripts/model_registry.py +0 -465
- isa_model/scripts/register_models.py +0 -370
- isa_model/scripts/register_models_with_embeddings.py +0 -510
- isa_model/scripts/start_mlflow.py +0 -95
- isa_model/scripts/training_tracker.py +0 -257
- isa_model/training/__init__.py +0 -74
- isa_model/training/annotation/annotation_schema.py +0 -47
- isa_model/training/annotation/processors/annotation_processor.py +0 -126
- isa_model/training/annotation/storage/dataset_manager.py +0 -131
- isa_model/training/annotation/storage/dataset_schema.py +0 -44
- isa_model/training/annotation/tests/test_annotation_flow.py +0 -109
- isa_model/training/annotation/tests/test_minio copy.py +0 -113
- isa_model/training/annotation/tests/test_minio_upload.py +0 -43
- isa_model/training/annotation/views/annotation_controller.py +0 -158
- isa_model/training/cloud/__init__.py +0 -22
- isa_model/training/cloud/job_orchestrator.py +0 -402
- isa_model/training/cloud/runpod_trainer.py +0 -454
- isa_model/training/cloud/storage_manager.py +0 -482
- isa_model/training/core/__init__.py +0 -23
- isa_model/training/core/config.py +0 -181
- isa_model/training/core/dataset.py +0 -222
- isa_model/training/core/trainer.py +0 -720
- isa_model/training/core/utils.py +0 -213
- isa_model/training/factory.py +0 -424
- isa_model-0.3.91.dist-info/RECORD +0 -138
- /isa_model/{core/storage/minio_storage.py → deployment/modal/services/audio/isa_audio_fish_service.py} +0 -0
- /isa_model/deployment/{services → modal/services/vision}/simple_auto_deploy_vision_service.py +0 -0
- {isa_model-0.3.91.dist-info → isa_model-0.4.3.dist-info}/WHEEL +0 -0
- {isa_model-0.3.91.dist-info → isa_model-0.4.3.dist-info}/top_level.txt +0 -0
@@ -1,321 +0,0 @@
|
|
1
|
-
"""
|
2
|
-
Model Registration Script for UI Analysis Pipeline
|
3
|
-
|
4
|
-
Registers the latest versions of UI analysis models in the core model registry
|
5
|
-
Prepares models for Modal deployment with proper version management
|
6
|
-
"""
|
7
|
-
|
8
|
-
import asyncio
|
9
|
-
from pathlib import Path
|
10
|
-
import sys
|
11
|
-
import os
|
12
|
-
|
13
|
-
# Add project root to path
|
14
|
-
project_root = Path(__file__).parent.parent.parent.parent
|
15
|
-
sys.path.insert(0, str(project_root))
|
16
|
-
|
17
|
-
from isa_model.core.model_manager import ModelManager
|
18
|
-
from isa_model.core.model_repo import ModelRegistry, ModelType, ModelCapability
|
19
|
-
|
20
|
-
async def register_ui_analysis_models():
|
21
|
-
"""Register UI analysis models with latest versions"""
|
22
|
-
|
23
|
-
# Initialize model manager and registry
|
24
|
-
model_manager = ModelManager()
|
25
|
-
|
26
|
-
print("🔧 Registering UI Analysis Models...")
|
27
|
-
|
28
|
-
# Debug: Check available capabilities
|
29
|
-
print("Available capabilities:")
|
30
|
-
for cap in ModelCapability:
|
31
|
-
print(f" - {cap.name}: {cap.value}")
|
32
|
-
print()
|
33
|
-
|
34
|
-
# Model definitions with latest versions from HuggingFace
|
35
|
-
models_to_register = [
|
36
|
-
{
|
37
|
-
"model_id": "omniparser-v2.0",
|
38
|
-
"repo_id": "microsoft/OmniParser",
|
39
|
-
"model_type": ModelType.VISION,
|
40
|
-
"capabilities": [
|
41
|
-
ModelCapability.UI_DETECTION,
|
42
|
-
ModelCapability.IMAGE_ANALYSIS,
|
43
|
-
ModelCapability.IMAGE_UNDERSTANDING
|
44
|
-
],
|
45
|
-
"revision": "main", # Latest version
|
46
|
-
"metadata": {
|
47
|
-
"description": "Microsoft OmniParser v2.0 - Advanced UI element detection",
|
48
|
-
"provider": "microsoft",
|
49
|
-
"model_family": "omniparser",
|
50
|
-
"version": "2.0",
|
51
|
-
"paper": "https://arxiv.org/abs/2408.00203",
|
52
|
-
"huggingface_url": "https://huggingface.co/microsoft/OmniParser",
|
53
|
-
"use_case": "UI element detection and parsing",
|
54
|
-
"input_format": "image",
|
55
|
-
"output_format": "structured_elements",
|
56
|
-
"gpu_memory_mb": 8192,
|
57
|
-
"inference_time_ms": 500
|
58
|
-
}
|
59
|
-
},
|
60
|
-
{
|
61
|
-
"model_id": "table-transformer-v1.1-detection",
|
62
|
-
"repo_id": "microsoft/table-transformer-detection",
|
63
|
-
"model_type": ModelType.VISION,
|
64
|
-
"capabilities": [
|
65
|
-
ModelCapability.TABLE_DETECTION,
|
66
|
-
ModelCapability.IMAGE_ANALYSIS
|
67
|
-
],
|
68
|
-
"revision": "main",
|
69
|
-
"metadata": {
|
70
|
-
"description": "Microsoft Table Transformer v1.1 - Table detection model",
|
71
|
-
"provider": "microsoft",
|
72
|
-
"model_family": "table-transformer",
|
73
|
-
"version": "1.1",
|
74
|
-
"paper": "https://arxiv.org/abs/2110.00061",
|
75
|
-
"huggingface_url": "https://huggingface.co/microsoft/table-transformer-detection",
|
76
|
-
"use_case": "Table detection in documents and images",
|
77
|
-
"input_format": "image",
|
78
|
-
"output_format": "bounding_boxes",
|
79
|
-
"gpu_memory_mb": 4096,
|
80
|
-
"inference_time_ms": 300
|
81
|
-
}
|
82
|
-
},
|
83
|
-
{
|
84
|
-
"model_id": "table-transformer-v1.1-structure",
|
85
|
-
"repo_id": "microsoft/table-transformer-structure-recognition",
|
86
|
-
"model_type": ModelType.VISION,
|
87
|
-
"capabilities": [
|
88
|
-
ModelCapability.TABLE_STRUCTURE_RECOGNITION,
|
89
|
-
ModelCapability.IMAGE_ANALYSIS
|
90
|
-
],
|
91
|
-
"revision": "main",
|
92
|
-
"metadata": {
|
93
|
-
"description": "Microsoft Table Transformer v1.1 - Table structure recognition",
|
94
|
-
"provider": "microsoft",
|
95
|
-
"model_family": "table-transformer",
|
96
|
-
"version": "1.1",
|
97
|
-
"paper": "https://arxiv.org/abs/2110.00061",
|
98
|
-
"huggingface_url": "https://huggingface.co/microsoft/table-transformer-structure-recognition",
|
99
|
-
"use_case": "Table structure recognition and cell extraction",
|
100
|
-
"input_format": "image",
|
101
|
-
"output_format": "table_structure",
|
102
|
-
"gpu_memory_mb": 4096,
|
103
|
-
"inference_time_ms": 400
|
104
|
-
}
|
105
|
-
},
|
106
|
-
{
|
107
|
-
"model_id": "paddleocr-v3.0",
|
108
|
-
"repo_id": "PaddlePaddle/PaddleOCR",
|
109
|
-
"model_type": ModelType.VISION,
|
110
|
-
"capabilities": [
|
111
|
-
ModelCapability.OCR,
|
112
|
-
ModelCapability.IMAGE_ANALYSIS
|
113
|
-
],
|
114
|
-
"revision": "release/2.8",
|
115
|
-
"metadata": {
|
116
|
-
"description": "PaddleOCR v3.0 - Multilingual OCR model",
|
117
|
-
"provider": "paddlepaddle",
|
118
|
-
"model_family": "paddleocr",
|
119
|
-
"version": "3.0",
|
120
|
-
"github_url": "https://github.com/PaddlePaddle/PaddleOCR",
|
121
|
-
"huggingface_url": "https://huggingface.co/PaddlePaddle/PaddleOCR",
|
122
|
-
"use_case": "Text extraction from images",
|
123
|
-
"input_format": "image",
|
124
|
-
"output_format": "text_with_coordinates",
|
125
|
-
"languages": ["en", "ch", "multilingual"],
|
126
|
-
"gpu_memory_mb": 2048,
|
127
|
-
"inference_time_ms": 200
|
128
|
-
}
|
129
|
-
},
|
130
|
-
{
|
131
|
-
"model_id": "yolov8n-fallback",
|
132
|
-
"repo_id": "ultralytics/yolov8",
|
133
|
-
"model_type": ModelType.VISION,
|
134
|
-
"capabilities": [
|
135
|
-
ModelCapability.IMAGE_ANALYSIS,
|
136
|
-
ModelCapability.UI_DETECTION # As fallback
|
137
|
-
],
|
138
|
-
"revision": "main",
|
139
|
-
"metadata": {
|
140
|
-
"description": "YOLOv8 Nano - Fallback object detection model",
|
141
|
-
"provider": "ultralytics",
|
142
|
-
"model_family": "yolo",
|
143
|
-
"version": "8.0",
|
144
|
-
"github_url": "https://github.com/ultralytics/ultralytics",
|
145
|
-
"use_case": "General object detection (fallback for UI elements)",
|
146
|
-
"input_format": "image",
|
147
|
-
"output_format": "bounding_boxes",
|
148
|
-
"gpu_memory_mb": 1024,
|
149
|
-
"inference_time_ms": 50
|
150
|
-
}
|
151
|
-
}
|
152
|
-
]
|
153
|
-
|
154
|
-
# Register each model
|
155
|
-
registration_results = []
|
156
|
-
|
157
|
-
for model_config in models_to_register:
|
158
|
-
print(f"\n📝 Registering {model_config['model_id']}...")
|
159
|
-
|
160
|
-
try:
|
161
|
-
# Register model in registry (without downloading)
|
162
|
-
success = model_manager.registry.register_model(
|
163
|
-
model_id=model_config['model_id'],
|
164
|
-
model_type=model_config['model_type'],
|
165
|
-
capabilities=model_config['capabilities'],
|
166
|
-
metadata={
|
167
|
-
**model_config['metadata'],
|
168
|
-
'repo_id': model_config['repo_id'],
|
169
|
-
'revision': model_config['revision'],
|
170
|
-
'registered_at': 'auto',
|
171
|
-
'download_status': 'not_downloaded'
|
172
|
-
}
|
173
|
-
)
|
174
|
-
|
175
|
-
if success:
|
176
|
-
print(f"✅ Successfully registered {model_config['model_id']}")
|
177
|
-
registration_results.append({
|
178
|
-
'model_id': model_config['model_id'],
|
179
|
-
'status': 'success'
|
180
|
-
})
|
181
|
-
else:
|
182
|
-
print(f"❌ Failed to register {model_config['model_id']}")
|
183
|
-
registration_results.append({
|
184
|
-
'model_id': model_config['model_id'],
|
185
|
-
'status': 'failed'
|
186
|
-
})
|
187
|
-
|
188
|
-
except Exception as e:
|
189
|
-
print(f"❌ Error registering {model_config['model_id']}: {e}")
|
190
|
-
registration_results.append({
|
191
|
-
'model_id': model_config['model_id'],
|
192
|
-
'status': 'error',
|
193
|
-
'error': str(e)
|
194
|
-
})
|
195
|
-
|
196
|
-
# Print summary
|
197
|
-
print(f"\n📊 Registration Summary:")
|
198
|
-
successful = [r for r in registration_results if r['status'] == 'success']
|
199
|
-
failed = [r for r in registration_results if r['status'] != 'success']
|
200
|
-
|
201
|
-
print(f"✅ Successfully registered: {len(successful)} models")
|
202
|
-
for result in successful:
|
203
|
-
print(f" - {result['model_id']}")
|
204
|
-
|
205
|
-
if failed:
|
206
|
-
print(f"❌ Failed to register: {len(failed)} models")
|
207
|
-
for result in failed:
|
208
|
-
error_msg = f" ({result.get('error', 'unknown error')})" if 'error' in result else ""
|
209
|
-
print(f" - {result['model_id']}{error_msg}")
|
210
|
-
|
211
|
-
return registration_results
|
212
|
-
|
213
|
-
async def verify_model_registry():
|
214
|
-
"""Verify registered models and their capabilities"""
|
215
|
-
|
216
|
-
model_manager = ModelManager()
|
217
|
-
|
218
|
-
print(f"\n🔍 Verifying Model Registry...")
|
219
|
-
|
220
|
-
# Check models by capability
|
221
|
-
capabilities_to_check = [
|
222
|
-
ModelCapability.UI_DETECTION,
|
223
|
-
ModelCapability.OCR,
|
224
|
-
ModelCapability.TABLE_DETECTION,
|
225
|
-
ModelCapability.TABLE_STRUCTURE_RECOGNITION
|
226
|
-
]
|
227
|
-
|
228
|
-
for capability in capabilities_to_check:
|
229
|
-
models = model_manager.registry.get_models_by_capability(capability)
|
230
|
-
print(f"\n📋 Models with {capability.value} capability:")
|
231
|
-
|
232
|
-
if models:
|
233
|
-
for model_id, model_info in models.items():
|
234
|
-
metadata = model_info.get('metadata', {})
|
235
|
-
version = metadata.get('version', 'unknown')
|
236
|
-
provider = metadata.get('provider', 'unknown')
|
237
|
-
print(f" ✅ {model_id} (v{version}, {provider})")
|
238
|
-
else:
|
239
|
-
print(f" ❌ No models found for {capability.value}")
|
240
|
-
|
241
|
-
# Print overall stats
|
242
|
-
stats = model_manager.registry.get_stats()
|
243
|
-
print(f"\n📈 Registry Statistics:")
|
244
|
-
print(f" Total models: {stats['total_models']}")
|
245
|
-
print(f" Models by type: {stats['models_by_type']}")
|
246
|
-
print(f" Models by capability: {stats['models_by_capability']}")
|
247
|
-
|
248
|
-
def get_model_for_capability(capability: ModelCapability) -> str:
|
249
|
-
"""Get the best model for a specific capability"""
|
250
|
-
|
251
|
-
model_manager = ModelManager()
|
252
|
-
models = model_manager.registry.get_models_by_capability(capability)
|
253
|
-
|
254
|
-
if not models:
|
255
|
-
return None
|
256
|
-
|
257
|
-
# Priority order for UI analysis models
|
258
|
-
priority_order = {
|
259
|
-
ModelCapability.UI_DETECTION: [
|
260
|
-
"omniparser-v2.0",
|
261
|
-
"yolov8n-fallback"
|
262
|
-
],
|
263
|
-
ModelCapability.OCR: [
|
264
|
-
"paddleocr-v3.0"
|
265
|
-
],
|
266
|
-
ModelCapability.TABLE_DETECTION: [
|
267
|
-
"table-transformer-v1.1-detection"
|
268
|
-
],
|
269
|
-
ModelCapability.TABLE_STRUCTURE_RECOGNITION: [
|
270
|
-
"table-transformer-v1.1-structure"
|
271
|
-
]
|
272
|
-
}
|
273
|
-
|
274
|
-
preferred_models = priority_order.get(capability, [])
|
275
|
-
|
276
|
-
# Return the first available preferred model
|
277
|
-
for model_id in preferred_models:
|
278
|
-
if model_id in models:
|
279
|
-
return model_id
|
280
|
-
|
281
|
-
# Fallback to first available model
|
282
|
-
return list(models.keys())[0] if models else None
|
283
|
-
|
284
|
-
async def main():
|
285
|
-
"""Main registration workflow"""
|
286
|
-
|
287
|
-
print("🚀 ISA Model Registry - UI Analysis Models Registration")
|
288
|
-
print("=" * 60)
|
289
|
-
|
290
|
-
try:
|
291
|
-
# Register models
|
292
|
-
results = await register_ui_analysis_models()
|
293
|
-
|
294
|
-
# Verify registration
|
295
|
-
await verify_model_registry()
|
296
|
-
|
297
|
-
print(f"\n🎉 Model registration completed!")
|
298
|
-
print(f" Use ModelManager.get_model() to download and use models")
|
299
|
-
print(f" Use get_model_for_capability() to get recommended models")
|
300
|
-
|
301
|
-
# Show usage example
|
302
|
-
print(f"\n💡 Usage Example:")
|
303
|
-
print(f" from isa_model.core.model_manager import ModelManager")
|
304
|
-
print(f" from isa_model.core.model_repo import ModelCapability")
|
305
|
-
print(f" ")
|
306
|
-
print(f" manager = ModelManager()")
|
307
|
-
print(f" ui_model_path = await manager.get_model(")
|
308
|
-
print(f" model_id='omniparser-v2.0',")
|
309
|
-
print(f" repo_id='microsoft/OmniParser',")
|
310
|
-
print(f" model_type=ModelType.VISION,")
|
311
|
-
print(f" capabilities=[ModelCapability.UI_DETECTION]")
|
312
|
-
print(f" )")
|
313
|
-
|
314
|
-
except Exception as e:
|
315
|
-
print(f"❌ Registration failed: {e}")
|
316
|
-
return False
|
317
|
-
|
318
|
-
return True
|
319
|
-
|
320
|
-
if __name__ == "__main__":
|
321
|
-
asyncio.run(main())
|
@@ -1,356 +0,0 @@
|
|
1
|
-
"""
|
2
|
-
Deployment Configuration Classes
|
3
|
-
|
4
|
-
Defines configuration classes for different deployment scenarios including
|
5
|
-
RunPod serverless, Triton inference server, and TensorRT-LLM backend.
|
6
|
-
"""
|
7
|
-
|
8
|
-
from dataclasses import dataclass, field
|
9
|
-
from typing import Optional, Dict, Any, List
|
10
|
-
from enum import Enum
|
11
|
-
from pathlib import Path
|
12
|
-
|
13
|
-
|
14
|
-
class DeploymentProvider(str, Enum):
|
15
|
-
"""Deployment providers"""
|
16
|
-
RUNPOD_SERVERLESS = "runpod_serverless"
|
17
|
-
RUNPOD_PODS = "runpod_pods"
|
18
|
-
AWS_LAMBDA = "aws_lambda"
|
19
|
-
GOOGLE_CLOUD_RUN = "google_cloud_run"
|
20
|
-
AZURE_CONTAINER_INSTANCES = "azure_container_instances"
|
21
|
-
LOCAL = "local"
|
22
|
-
|
23
|
-
|
24
|
-
class InferenceEngine(str, Enum):
|
25
|
-
"""Inference engines"""
|
26
|
-
TRITON = "triton"
|
27
|
-
VLLM = "vllm"
|
28
|
-
TENSORRT_LLM = "tensorrt_llm"
|
29
|
-
HUGGINGFACE = "huggingface"
|
30
|
-
ONNX = "onnx"
|
31
|
-
TORCHSCRIPT = "torchscript"
|
32
|
-
|
33
|
-
|
34
|
-
class ModelFormat(str, Enum):
|
35
|
-
"""Model formats for deployment"""
|
36
|
-
HUGGINGFACE = "huggingface"
|
37
|
-
TENSORRT = "tensorrt"
|
38
|
-
ONNX = "onnx"
|
39
|
-
TORCHSCRIPT = "torchscript"
|
40
|
-
SAFETENSORS = "safetensors"
|
41
|
-
|
42
|
-
|
43
|
-
@dataclass
|
44
|
-
class TritonConfig:
|
45
|
-
"""Configuration for Triton Inference Server"""
|
46
|
-
|
47
|
-
# Model repository configuration
|
48
|
-
model_repository: str = "/models"
|
49
|
-
model_name: str = "model"
|
50
|
-
model_version: str = "1"
|
51
|
-
|
52
|
-
# Backend configuration
|
53
|
-
backend: str = "tensorrtllm" # tensorrtllm, python, onnxruntime
|
54
|
-
max_batch_size: int = 8
|
55
|
-
max_sequence_length: int = 2048
|
56
|
-
|
57
|
-
# TensorRT-LLM specific
|
58
|
-
tensorrt_llm_model_dir: str = "/models/tensorrt_llm"
|
59
|
-
engine_dir: str = "/models/engines"
|
60
|
-
tokenizer_dir: str = "/models/tokenizer"
|
61
|
-
|
62
|
-
# Performance settings
|
63
|
-
instance_group_count: int = 1
|
64
|
-
instance_group_kind: str = "KIND_GPU" # KIND_GPU, KIND_CPU
|
65
|
-
|
66
|
-
# Memory settings
|
67
|
-
optimization_level: str = "OPTIMIZATION_LEVEL_ENABLED"
|
68
|
-
enable_pinned_input: bool = True
|
69
|
-
enable_pinned_output: bool = True
|
70
|
-
|
71
|
-
def to_dict(self) -> Dict[str, Any]:
|
72
|
-
"""Convert to dictionary"""
|
73
|
-
return self.__dict__.copy()
|
74
|
-
|
75
|
-
|
76
|
-
@dataclass
|
77
|
-
class RunPodServerlessConfig:
|
78
|
-
"""Configuration for RunPod Serverless deployment"""
|
79
|
-
|
80
|
-
# RunPod settings
|
81
|
-
api_key: str
|
82
|
-
endpoint_id: Optional[str] = None
|
83
|
-
template_id: Optional[str] = None
|
84
|
-
|
85
|
-
# Container configuration
|
86
|
-
container_image: str = "runpod/pytorch:2.1.0-py3.10-cuda11.8.0-devel-ubuntu22.04"
|
87
|
-
container_disk_in_gb: int = 20
|
88
|
-
|
89
|
-
# GPU configuration
|
90
|
-
gpu_type: str = "NVIDIA RTX A6000"
|
91
|
-
gpu_count: int = 1
|
92
|
-
|
93
|
-
# Scaling configuration
|
94
|
-
min_workers: int = 0
|
95
|
-
max_workers: int = 3
|
96
|
-
idle_timeout: int = 5 # seconds
|
97
|
-
|
98
|
-
# Network configuration
|
99
|
-
network_volume_id: Optional[str] = None
|
100
|
-
|
101
|
-
# Environment variables
|
102
|
-
env_vars: Dict[str, str] = field(default_factory=dict)
|
103
|
-
|
104
|
-
def to_dict(self) -> Dict[str, Any]:
|
105
|
-
"""Convert to dictionary"""
|
106
|
-
return self.__dict__.copy()
|
107
|
-
|
108
|
-
|
109
|
-
@dataclass
|
110
|
-
class ModelConfig:
|
111
|
-
"""Configuration for model deployment"""
|
112
|
-
|
113
|
-
# Model identification
|
114
|
-
model_id: str
|
115
|
-
model_name: str
|
116
|
-
model_version: str = "1.0.0"
|
117
|
-
|
118
|
-
# Model source
|
119
|
-
source_type: str = "huggingface" # huggingface, local, s3, gcs
|
120
|
-
source_path: str = ""
|
121
|
-
|
122
|
-
# Model format and engine
|
123
|
-
model_format: ModelFormat = ModelFormat.HUGGINGFACE
|
124
|
-
inference_engine: InferenceEngine = InferenceEngine.TRITON
|
125
|
-
|
126
|
-
# Model metadata
|
127
|
-
model_type: str = "llm" # llm, embedding, vision, audio
|
128
|
-
capabilities: List[str] = field(default_factory=lambda: ["text_generation"])
|
129
|
-
|
130
|
-
# Performance configuration
|
131
|
-
max_batch_size: int = 8
|
132
|
-
max_sequence_length: int = 2048
|
133
|
-
dtype: str = "float16" # float32, float16, int8, int4
|
134
|
-
|
135
|
-
# Optimization settings
|
136
|
-
use_tensorrt: bool = True
|
137
|
-
use_quantization: bool = False
|
138
|
-
quantization_method: str = "int8" # int8, int4, awq, gptq
|
139
|
-
|
140
|
-
def to_dict(self) -> Dict[str, Any]:
|
141
|
-
"""Convert to dictionary"""
|
142
|
-
return self.__dict__.copy()
|
143
|
-
|
144
|
-
|
145
|
-
@dataclass
|
146
|
-
class DeploymentConfig:
|
147
|
-
"""Main deployment configuration"""
|
148
|
-
|
149
|
-
# Deployment identification
|
150
|
-
deployment_id: str
|
151
|
-
deployment_name: str
|
152
|
-
description: Optional[str] = None
|
153
|
-
|
154
|
-
# Provider and engine configuration
|
155
|
-
provider: DeploymentProvider = DeploymentProvider.RUNPOD_SERVERLESS
|
156
|
-
inference_engine: InferenceEngine = InferenceEngine.TRITON
|
157
|
-
|
158
|
-
# Model configuration
|
159
|
-
model_config: ModelConfig = None
|
160
|
-
|
161
|
-
# Provider-specific configurations
|
162
|
-
runpod_config: Optional[RunPodServerlessConfig] = None
|
163
|
-
triton_config: Optional[TritonConfig] = None
|
164
|
-
|
165
|
-
# Health check configuration
|
166
|
-
health_check_path: str = "/health"
|
167
|
-
health_check_timeout: int = 30
|
168
|
-
|
169
|
-
# Monitoring configuration
|
170
|
-
enable_logging: bool = True
|
171
|
-
log_level: str = "INFO"
|
172
|
-
enable_metrics: bool = True
|
173
|
-
|
174
|
-
# Networking
|
175
|
-
custom_domain: Optional[str] = None
|
176
|
-
allowed_origins: List[str] = field(default_factory=lambda: ["*"])
|
177
|
-
|
178
|
-
# Additional settings
|
179
|
-
extra_config: Dict[str, Any] = field(default_factory=dict)
|
180
|
-
|
181
|
-
def __post_init__(self):
|
182
|
-
"""Validate configuration after initialization"""
|
183
|
-
if not self.deployment_id:
|
184
|
-
raise ValueError("deployment_id is required")
|
185
|
-
|
186
|
-
if not self.deployment_name:
|
187
|
-
raise ValueError("deployment_name is required")
|
188
|
-
|
189
|
-
if not self.model_config:
|
190
|
-
raise ValueError("model_config is required")
|
191
|
-
|
192
|
-
# Set default provider configs if not provided
|
193
|
-
if self.provider == DeploymentProvider.RUNPOD_SERVERLESS and not self.runpod_config:
|
194
|
-
self.runpod_config = RunPodServerlessConfig(api_key="")
|
195
|
-
|
196
|
-
if self.inference_engine == InferenceEngine.TRITON and not self.triton_config:
|
197
|
-
self.triton_config = TritonConfig()
|
198
|
-
|
199
|
-
def to_dict(self) -> Dict[str, Any]:
|
200
|
-
"""Convert config to dictionary"""
|
201
|
-
config_dict = {}
|
202
|
-
|
203
|
-
for key, value in self.__dict__.items():
|
204
|
-
if key in ['model_config', 'runpod_config', 'triton_config']:
|
205
|
-
if value is not None:
|
206
|
-
config_dict[key] = value.to_dict()
|
207
|
-
else:
|
208
|
-
config_dict[key] = None
|
209
|
-
elif isinstance(value, Enum):
|
210
|
-
config_dict[key] = value.value
|
211
|
-
else:
|
212
|
-
config_dict[key] = value
|
213
|
-
|
214
|
-
return config_dict
|
215
|
-
|
216
|
-
@classmethod
|
217
|
-
def from_dict(cls, config_dict: Dict[str, Any]) -> 'DeploymentConfig':
|
218
|
-
"""Create config from dictionary"""
|
219
|
-
# Handle nested configs
|
220
|
-
if 'model_config' in config_dict and config_dict['model_config'] is not None:
|
221
|
-
config_dict['model_config'] = ModelConfig(**config_dict['model_config'])
|
222
|
-
|
223
|
-
if 'runpod_config' in config_dict and config_dict['runpod_config'] is not None:
|
224
|
-
config_dict['runpod_config'] = RunPodServerlessConfig(**config_dict['runpod_config'])
|
225
|
-
|
226
|
-
if 'triton_config' in config_dict and config_dict['triton_config'] is not None:
|
227
|
-
config_dict['triton_config'] = TritonConfig(**config_dict['triton_config'])
|
228
|
-
|
229
|
-
# Handle enums
|
230
|
-
if 'provider' in config_dict:
|
231
|
-
config_dict['provider'] = DeploymentProvider(config_dict['provider'])
|
232
|
-
|
233
|
-
if 'inference_engine' in config_dict:
|
234
|
-
config_dict['inference_engine'] = InferenceEngine(config_dict['inference_engine'])
|
235
|
-
|
236
|
-
return cls(**config_dict)
|
237
|
-
|
238
|
-
|
239
|
-
# Predefined configurations for common deployment scenarios
|
240
|
-
|
241
|
-
def create_gemma_runpod_triton_config(
|
242
|
-
model_id: str,
|
243
|
-
runpod_api_key: str,
|
244
|
-
model_source_path: str = "xenobordom/gemma-4b-alpaca-v1"
|
245
|
-
) -> DeploymentConfig:
|
246
|
-
"""
|
247
|
-
Create a deployment configuration for Gemma model on RunPod with Triton + TensorRT-LLM.
|
248
|
-
|
249
|
-
Args:
|
250
|
-
model_id: Unique identifier for the deployment
|
251
|
-
runpod_api_key: RunPod API key
|
252
|
-
model_source_path: HuggingFace model path or local path
|
253
|
-
|
254
|
-
Returns:
|
255
|
-
DeploymentConfig for Gemma deployment
|
256
|
-
"""
|
257
|
-
model_config = ModelConfig(
|
258
|
-
model_id=model_id,
|
259
|
-
model_name="gemma-4b-alpaca",
|
260
|
-
source_type="huggingface",
|
261
|
-
source_path=model_source_path,
|
262
|
-
model_format=ModelFormat.HUGGINGFACE,
|
263
|
-
inference_engine=InferenceEngine.TRITON,
|
264
|
-
model_type="llm",
|
265
|
-
capabilities=["text_generation", "chat"],
|
266
|
-
max_batch_size=8,
|
267
|
-
max_sequence_length=2048,
|
268
|
-
dtype="float16",
|
269
|
-
use_tensorrt=True
|
270
|
-
)
|
271
|
-
|
272
|
-
runpod_config = RunPodServerlessConfig(
|
273
|
-
api_key=runpod_api_key,
|
274
|
-
container_image="nvcr.io/nvidia/tritonserver:23.10-trtllm-python-py3",
|
275
|
-
container_disk_in_gb=30,
|
276
|
-
gpu_type="NVIDIA RTX A6000",
|
277
|
-
gpu_count=1,
|
278
|
-
min_workers=0,
|
279
|
-
max_workers=3,
|
280
|
-
idle_timeout=5,
|
281
|
-
env_vars={
|
282
|
-
"TRITON_MODEL_REPOSITORY": "/models",
|
283
|
-
"CUDA_VISIBLE_DEVICES": "0"
|
284
|
-
}
|
285
|
-
)
|
286
|
-
|
287
|
-
triton_config = TritonConfig(
|
288
|
-
model_repository="/models",
|
289
|
-
model_name="gemma-4b-alpaca",
|
290
|
-
backend="tensorrtllm",
|
291
|
-
max_batch_size=8,
|
292
|
-
max_sequence_length=2048,
|
293
|
-
tensorrt_llm_model_dir="/models/tensorrt_llm",
|
294
|
-
engine_dir="/models/engines",
|
295
|
-
tokenizer_dir="/models/tokenizer"
|
296
|
-
)
|
297
|
-
|
298
|
-
return DeploymentConfig(
|
299
|
-
deployment_id=f"gemma-deployment-{model_id}",
|
300
|
-
deployment_name=f"Gemma 4B Alpaca - {model_id}",
|
301
|
-
description="Gemma 4B model fine-tuned on Alpaca dataset, deployed with Triton + TensorRT-LLM",
|
302
|
-
provider=DeploymentProvider.RUNPOD_SERVERLESS,
|
303
|
-
inference_engine=InferenceEngine.TRITON,
|
304
|
-
model_config=model_config,
|
305
|
-
runpod_config=runpod_config,
|
306
|
-
triton_config=triton_config
|
307
|
-
)
|
308
|
-
|
309
|
-
|
310
|
-
def create_local_triton_config(
|
311
|
-
model_id: str,
|
312
|
-
model_source_path: str,
|
313
|
-
triton_model_repository: str = "./models/triton"
|
314
|
-
) -> DeploymentConfig:
|
315
|
-
"""
|
316
|
-
Create a deployment configuration for local Triton deployment.
|
317
|
-
|
318
|
-
Args:
|
319
|
-
model_id: Unique identifier for the deployment
|
320
|
-
model_source_path: Path to the model
|
321
|
-
triton_model_repository: Path to Triton model repository
|
322
|
-
|
323
|
-
Returns:
|
324
|
-
DeploymentConfig for local deployment
|
325
|
-
"""
|
326
|
-
model_config = ModelConfig(
|
327
|
-
model_id=model_id,
|
328
|
-
model_name=f"local-model-{model_id}",
|
329
|
-
source_type="local",
|
330
|
-
source_path=model_source_path,
|
331
|
-
model_format=ModelFormat.HUGGINGFACE,
|
332
|
-
inference_engine=InferenceEngine.TRITON,
|
333
|
-
model_type="llm",
|
334
|
-
capabilities=["text_generation"],
|
335
|
-
max_batch_size=4,
|
336
|
-
max_sequence_length=1024,
|
337
|
-
dtype="float16"
|
338
|
-
)
|
339
|
-
|
340
|
-
triton_config = TritonConfig(
|
341
|
-
model_repository=triton_model_repository,
|
342
|
-
model_name=f"local-model-{model_id}",
|
343
|
-
backend="python", # Use Python backend for local development
|
344
|
-
max_batch_size=4,
|
345
|
-
max_sequence_length=1024
|
346
|
-
)
|
347
|
-
|
348
|
-
return DeploymentConfig(
|
349
|
-
deployment_id=f"local-deployment-{model_id}",
|
350
|
-
deployment_name=f"Local Model - {model_id}",
|
351
|
-
description="Local model deployment for development and testing",
|
352
|
-
provider=DeploymentProvider.LOCAL,
|
353
|
-
inference_engine=InferenceEngine.TRITON,
|
354
|
-
model_config=model_config,
|
355
|
-
triton_config=triton_config
|
356
|
-
)
|