PyPI - isa-model - Versions diffs - 0.3.91__py3-none-any.whl → 0.4.3__py3-none-any.whl - Mend

isa-model 0.3.91py3-none-any.whl → 0.4.3py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (228) hide show

isa_model/client.py +1166 -584
isa_model/core/cache/redis_cache.py +410 -0
isa_model/core/config/config_manager.py +282 -12
isa_model/core/config.py +91 -1
isa_model/core/database/__init__.py +1 -0
isa_model/core/database/direct_db_client.py +114 -0
isa_model/core/database/migration_manager.py +563 -0
isa_model/core/database/migrations.py +297 -0
isa_model/core/database/supabase_client.py +258 -0
isa_model/core/dependencies.py +316 -0
isa_model/core/discovery/__init__.py +19 -0
isa_model/core/discovery/consul_discovery.py +190 -0
isa_model/core/logging/__init__.py +54 -0
isa_model/core/logging/influx_logger.py +523 -0
isa_model/core/logging/loki_logger.py +160 -0
isa_model/core/models/__init__.py +46 -0
isa_model/core/models/config_models.py +625 -0
isa_model/core/models/deployment_billing_tracker.py +430 -0
isa_model/core/models/model_billing_tracker.py +60 -88
isa_model/core/models/model_manager.py +66 -25
isa_model/core/models/model_metadata.py +690 -0
isa_model/core/models/model_repo.py +217 -55
isa_model/core/models/model_statistics_tracker.py +234 -0
isa_model/core/models/model_storage.py +0 -1
isa_model/core/models/model_version_manager.py +959 -0
isa_model/core/models/system_models.py +857 -0
isa_model/core/pricing_manager.py +2 -249
isa_model/core/repositories/__init__.py +9 -0
isa_model/core/repositories/config_repository.py +912 -0
isa_model/core/resilience/circuit_breaker.py +366 -0
isa_model/core/security/secrets.py +358 -0
isa_model/core/services/__init__.py +2 -4
isa_model/core/services/intelligent_model_selector.py +479 -370
isa_model/core/storage/hf_storage.py +2 -2
isa_model/core/types.py +8 -0
isa_model/deployment/__init__.py +5 -48
isa_model/deployment/core/__init__.py +2 -31
isa_model/deployment/core/deployment_manager.py +1278 -368
isa_model/deployment/local/__init__.py +31 -0
isa_model/deployment/local/config.py +248 -0
isa_model/deployment/local/gpu_gateway.py +607 -0
isa_model/deployment/local/health_checker.py +428 -0
isa_model/deployment/local/provider.py +586 -0
isa_model/deployment/local/tensorrt_service.py +621 -0
isa_model/deployment/local/transformers_service.py +644 -0
isa_model/deployment/local/vllm_service.py +527 -0
isa_model/deployment/modal/__init__.py +8 -0
isa_model/deployment/modal/config.py +136 -0
isa_model/deployment/modal/deployer.py +894 -0
isa_model/deployment/modal/services/__init__.py +3 -0
isa_model/deployment/modal/services/audio/__init__.py +1 -0
isa_model/deployment/modal/services/audio/isa_audio_chatTTS_service.py +520 -0
isa_model/deployment/modal/services/audio/isa_audio_openvoice_service.py +758 -0
isa_model/deployment/modal/services/audio/isa_audio_service_v2.py +1044 -0
isa_model/deployment/modal/services/embedding/__init__.py +1 -0
isa_model/deployment/modal/services/embedding/isa_embed_rerank_service.py +296 -0
isa_model/deployment/modal/services/llm/__init__.py +1 -0
isa_model/deployment/modal/services/llm/isa_llm_service.py +424 -0
isa_model/deployment/modal/services/video/__init__.py +1 -0
isa_model/deployment/modal/services/video/isa_video_hunyuan_service.py +423 -0
isa_model/deployment/modal/services/vision/__init__.py +1 -0
isa_model/deployment/modal/services/vision/isa_vision_ocr_service.py +519 -0
isa_model/deployment/modal/services/vision/isa_vision_qwen25_service.py +709 -0
isa_model/deployment/modal/services/vision/isa_vision_table_service.py +676 -0
isa_model/deployment/modal/services/vision/isa_vision_ui_service.py +833 -0
isa_model/deployment/modal/services/vision/isa_vision_ui_service_optimized.py +660 -0
isa_model/deployment/models/org-org-acme-corp-tenant-a-service-llm-20250825-225822/tenant-a-service_modal_service.py +48 -0
isa_model/deployment/models/org-test-org-123-prefix-test-service-llm-20250825-225822/prefix-test-service_modal_service.py +48 -0
isa_model/deployment/models/test-llm-service-llm-20250825-204442/test-llm-service_modal_service.py +48 -0
isa_model/deployment/models/test-monitoring-gpt2-llm-20250825-212906/test-monitoring-gpt2_modal_service.py +48 -0
isa_model/deployment/models/test-monitoring-gpt2-llm-20250825-213009/test-monitoring-gpt2_modal_service.py +48 -0
isa_model/deployment/storage/__init__.py +5 -0
isa_model/deployment/storage/deployment_repository.py +824 -0
isa_model/deployment/triton/__init__.py +10 -0
isa_model/deployment/triton/config.py +196 -0
isa_model/deployment/triton/configs/__init__.py +1 -0
isa_model/deployment/triton/provider.py +512 -0
isa_model/deployment/triton/scripts/__init__.py +1 -0
isa_model/deployment/triton/templates/__init__.py +1 -0
isa_model/inference/__init__.py +47 -1
isa_model/inference/ai_factory.py +179 -16
isa_model/inference/legacy_services/__init__.py +21 -0
isa_model/inference/legacy_services/model_evaluation.py +637 -0
isa_model/inference/legacy_services/model_service.py +573 -0
isa_model/inference/legacy_services/model_serving.py +717 -0
isa_model/inference/legacy_services/model_training.py +561 -0
isa_model/inference/models/__init__.py +21 -0
isa_model/inference/models/inference_config.py +551 -0
isa_model/inference/models/inference_record.py +675 -0
isa_model/inference/models/performance_models.py +714 -0
isa_model/inference/repositories/__init__.py +9 -0
isa_model/inference/repositories/inference_repository.py +828 -0
isa_model/inference/services/audio/__init__.py +21 -0
isa_model/inference/services/audio/base_realtime_service.py +225 -0
isa_model/inference/services/audio/base_stt_service.py +184 -11
isa_model/inference/services/audio/isa_tts_service.py +0 -0
isa_model/inference/services/audio/openai_realtime_service.py +320 -124
isa_model/inference/services/audio/openai_stt_service.py +53 -11
isa_model/inference/services/base_service.py +17 -1
isa_model/inference/services/custom_model_manager.py +277 -0
isa_model/inference/services/embedding/__init__.py +13 -0
isa_model/inference/services/embedding/base_embed_service.py +111 -8
isa_model/inference/services/embedding/isa_embed_service.py +305 -0
isa_model/inference/services/embedding/ollama_embed_service.py +15 -3
isa_model/inference/services/embedding/openai_embed_service.py +2 -4
isa_model/inference/services/embedding/resilient_embed_service.py +285 -0
isa_model/inference/services/embedding/tests/test_embedding.py +222 -0
isa_model/inference/services/img/__init__.py +2 -2
isa_model/inference/services/img/base_image_gen_service.py +24 -7
isa_model/inference/services/img/replicate_image_gen_service.py +84 -422
isa_model/inference/services/img/services/replicate_face_swap.py +193 -0
isa_model/inference/services/img/services/replicate_flux.py +226 -0
isa_model/inference/services/img/services/replicate_flux_kontext.py +219 -0
isa_model/inference/services/img/services/replicate_sticker_maker.py +249 -0
isa_model/inference/services/img/tests/test_img_client.py +297 -0
isa_model/inference/services/llm/__init__.py +10 -2
isa_model/inference/services/llm/base_llm_service.py +361 -26
isa_model/inference/services/llm/cerebras_llm_service.py +628 -0
isa_model/inference/services/llm/helpers/llm_adapter.py +71 -12
isa_model/inference/services/llm/helpers/llm_prompts.py +342 -0
isa_model/inference/services/llm/helpers/llm_utils.py +321 -23
isa_model/inference/services/llm/huggingface_llm_service.py +581 -0
isa_model/inference/services/llm/local_llm_service.py +747 -0
isa_model/inference/services/llm/ollama_llm_service.py +11 -3
isa_model/inference/services/llm/openai_llm_service.py +670 -56
isa_model/inference/services/llm/yyds_llm_service.py +10 -3
isa_model/inference/services/vision/__init__.py +27 -6
isa_model/inference/services/vision/base_vision_service.py +118 -185
isa_model/inference/services/vision/blip_vision_service.py +359 -0
isa_model/inference/services/vision/helpers/image_utils.py +19 -10
isa_model/inference/services/vision/isa_vision_service.py +634 -0
isa_model/inference/services/vision/openai_vision_service.py +19 -10
isa_model/inference/services/vision/tests/test_ocr_client.py +284 -0
isa_model/inference/services/vision/vgg16_vision_service.py +257 -0
isa_model/serving/api/cache_manager.py +245 -0
isa_model/serving/api/dependencies/__init__.py +1 -0
isa_model/serving/api/dependencies/auth.py +194 -0
isa_model/serving/api/dependencies/database.py +139 -0
isa_model/serving/api/error_handlers.py +284 -0
isa_model/serving/api/fastapi_server.py +240 -18
isa_model/serving/api/middleware/auth.py +317 -0
isa_model/serving/api/middleware/security.py +268 -0
isa_model/serving/api/middleware/tenant_context.py +414 -0
isa_model/serving/api/routes/analytics.py +489 -0
isa_model/serving/api/routes/config.py +645 -0
isa_model/serving/api/routes/deployment_billing.py +315 -0
isa_model/serving/api/routes/deployments.py +475 -0
isa_model/serving/api/routes/gpu_gateway.py +440 -0
isa_model/serving/api/routes/health.py +32 -12
isa_model/serving/api/routes/inference_monitoring.py +486 -0
isa_model/serving/api/routes/local_deployments.py +448 -0
isa_model/serving/api/routes/logs.py +430 -0
isa_model/serving/api/routes/settings.py +582 -0
isa_model/serving/api/routes/tenants.py +575 -0
isa_model/serving/api/routes/unified.py +992 -171
isa_model/serving/api/routes/webhooks.py +479 -0
isa_model/serving/api/startup.py +318 -0
isa_model/serving/modal_proxy_server.py +249 -0
isa_model/utils/gpu_utils.py +311 -0
{isa_model-0.3.91.dist-info → isa_model-0.4.3.dist-info}/METADATA +76 -22
isa_model-0.4.3.dist-info/RECORD +193 -0
isa_model/deployment/cloud/__init__.py +0 -9
isa_model/deployment/cloud/modal/__init__.py +0 -10
isa_model/deployment/cloud/modal/isa_vision_doc_service.py +0 -766
isa_model/deployment/cloud/modal/isa_vision_table_service.py +0 -532
isa_model/deployment/cloud/modal/isa_vision_ui_service.py +0 -406
isa_model/deployment/cloud/modal/register_models.py +0 -321
isa_model/deployment/core/deployment_config.py +0 -356
isa_model/deployment/core/isa_deployment_service.py +0 -401
isa_model/deployment/gpu_int8_ds8/app/server.py +0 -66
isa_model/deployment/gpu_int8_ds8/scripts/test_client.py +0 -43
isa_model/deployment/gpu_int8_ds8/scripts/test_client_os.py +0 -35
isa_model/deployment/runtime/deployed_service.py +0 -338
isa_model/deployment/services/__init__.py +0 -9
isa_model/deployment/services/auto_deploy_vision_service.py +0 -538
isa_model/deployment/services/model_service.py +0 -332
isa_model/deployment/services/service_monitor.py +0 -356
isa_model/deployment/services/service_registry.py +0 -527
isa_model/eval/__init__.py +0 -92
isa_model/eval/benchmarks.py +0 -469
isa_model/eval/config/__init__.py +0 -10
isa_model/eval/config/evaluation_config.py +0 -108
isa_model/eval/evaluators/__init__.py +0 -18
isa_model/eval/evaluators/base_evaluator.py +0 -503
isa_model/eval/evaluators/llm_evaluator.py +0 -472
isa_model/eval/factory.py +0 -531
isa_model/eval/infrastructure/__init__.py +0 -24
isa_model/eval/infrastructure/experiment_tracker.py +0 -466
isa_model/eval/metrics.py +0 -798
isa_model/inference/adapter/unified_api.py +0 -248
isa_model/inference/services/helpers/stacked_config.py +0 -148
isa_model/inference/services/img/flux_professional_service.py +0 -603
isa_model/inference/services/img/helpers/base_stacked_service.py +0 -274
isa_model/inference/services/others/table_transformer_service.py +0 -61
isa_model/inference/services/vision/doc_analysis_service.py +0 -640
isa_model/inference/services/vision/helpers/base_stacked_service.py +0 -274
isa_model/inference/services/vision/ui_analysis_service.py +0 -823
isa_model/scripts/inference_tracker.py +0 -283
isa_model/scripts/mlflow_manager.py +0 -379
isa_model/scripts/model_registry.py +0 -465
isa_model/scripts/register_models.py +0 -370
isa_model/scripts/register_models_with_embeddings.py +0 -510
isa_model/scripts/start_mlflow.py +0 -95
isa_model/scripts/training_tracker.py +0 -257
isa_model/training/__init__.py +0 -74
isa_model/training/annotation/annotation_schema.py +0 -47
isa_model/training/annotation/processors/annotation_processor.py +0 -126
isa_model/training/annotation/storage/dataset_manager.py +0 -131
isa_model/training/annotation/storage/dataset_schema.py +0 -44
isa_model/training/annotation/tests/test_annotation_flow.py +0 -109
isa_model/training/annotation/tests/test_minio copy.py +0 -113
isa_model/training/annotation/tests/test_minio_upload.py +0 -43
isa_model/training/annotation/views/annotation_controller.py +0 -158
isa_model/training/cloud/__init__.py +0 -22
isa_model/training/cloud/job_orchestrator.py +0 -402
isa_model/training/cloud/runpod_trainer.py +0 -454
isa_model/training/cloud/storage_manager.py +0 -482
isa_model/training/core/__init__.py +0 -23
isa_model/training/core/config.py +0 -181
isa_model/training/core/dataset.py +0 -222
isa_model/training/core/trainer.py +0 -720
isa_model/training/core/utils.py +0 -213
isa_model/training/factory.py +0 -424
isa_model-0.3.91.dist-info/RECORD +0 -138
/isa_model/{core/storage/minio_storage.py → deployment/modal/services/audio/isa_audio_fish_service.py} +0 -0
/isa_model/deployment/{services → modal/services/vision}/simple_auto_deploy_vision_service.py +0 -0
{isa_model-0.3.91.dist-info → isa_model-0.4.3.dist-info}/WHEEL +0 -0
{isa_model-0.3.91.dist-info → isa_model-0.4.3.dist-info}/top_level.txt +0 -0

isa_model/deployment/cloud/modal/register_models.py DELETED Viewed

@@ -1,321 +0,0 @@
- """
-Model Registration Script for UI Analysis Pipeline
-Registers the latest versions of UI analysis models in the core model registry
-Prepares models for Modal deployment with proper version management
-"""
-import asyncio
-from pathlib import Path
-import sys
-import os
-# Add project root to path
-project_root = Path(__file__).parent.parent.parent.parent
-sys.path.insert(0, str(project_root))
-from isa_model.core.model_manager import ModelManager
-from isa_model.core.model_repo import ModelRegistry, ModelType, ModelCapability
-async def register_ui_analysis_models():
-    """Register UI analysis models with latest versions"""
-    # Initialize model manager and registry
-    model_manager = ModelManager()
-    print("🔧 Registering UI Analysis Models...")
-    # Debug: Check available capabilities
-    print("Available capabilities:")
-    for cap in ModelCapability:
-        print(f"  - {cap.name}: {cap.value}")
-    print()
-    # Model definitions with latest versions from HuggingFace
-    models_to_register = [
-        {
-            "model_id": "omniparser-v2.0",
-            "repo_id": "microsoft/OmniParser",
-            "model_type": ModelType.VISION,
-            "capabilities": [
-                ModelCapability.UI_DETECTION,
-                ModelCapability.IMAGE_ANALYSIS,
-                ModelCapability.IMAGE_UNDERSTANDING
-            ],
-            "revision": "main",  # Latest version
-            "metadata": {
-                "description": "Microsoft OmniParser v2.0 - Advanced UI element detection",
-                "provider": "microsoft",
-                "model_family": "omniparser",
-                "version": "2.0",
-                "paper": "https://arxiv.org/abs/2408.00203",
-                "huggingface_url": "https://huggingface.co/microsoft/OmniParser",
-                "use_case": "UI element detection and parsing",
-                "input_format": "image",
-                "output_format": "structured_elements",
-                "gpu_memory_mb": 8192,
-                "inference_time_ms": 500
-            }
-        },
-        {
-            "model_id": "table-transformer-v1.1-detection",
-            "repo_id": "microsoft/table-transformer-detection",
-            "model_type": ModelType.VISION,
-            "capabilities": [
-                ModelCapability.TABLE_DETECTION,
-                ModelCapability.IMAGE_ANALYSIS
-            ],
-            "revision": "main",
-            "metadata": {
-                "description": "Microsoft Table Transformer v1.1 - Table detection model",
-                "provider": "microsoft",
-                "model_family": "table-transformer",
-                "version": "1.1",
-                "paper": "https://arxiv.org/abs/2110.00061",
-                "huggingface_url": "https://huggingface.co/microsoft/table-transformer-detection",
-                "use_case": "Table detection in documents and images",
-                "input_format": "image",
-                "output_format": "bounding_boxes",
-                "gpu_memory_mb": 4096,
-                "inference_time_ms": 300
-            }
-        },
-        {
-            "model_id": "table-transformer-v1.1-structure",
-            "repo_id": "microsoft/table-transformer-structure-recognition",
-            "model_type": ModelType.VISION,
-            "capabilities": [
-                ModelCapability.TABLE_STRUCTURE_RECOGNITION,
-                ModelCapability.IMAGE_ANALYSIS
-            ],
-            "revision": "main",
-            "metadata": {
-                "description": "Microsoft Table Transformer v1.1 - Table structure recognition",
-                "provider": "microsoft",
-                "model_family": "table-transformer",
-                "version": "1.1",
-                "paper": "https://arxiv.org/abs/2110.00061",
-                "huggingface_url": "https://huggingface.co/microsoft/table-transformer-structure-recognition",
-                "use_case": "Table structure recognition and cell extraction",
-                "input_format": "image",
-                "output_format": "table_structure",
-                "gpu_memory_mb": 4096,
-                "inference_time_ms": 400
-            }
-        },
-        {
-            "model_id": "paddleocr-v3.0",
-            "repo_id": "PaddlePaddle/PaddleOCR",
-            "model_type": ModelType.VISION,
-            "capabilities": [
-                ModelCapability.OCR,
-                ModelCapability.IMAGE_ANALYSIS
-            ],
-            "revision": "release/2.8",
-            "metadata": {
-                "description": "PaddleOCR v3.0 - Multilingual OCR model",
-                "provider": "paddlepaddle",
-                "model_family": "paddleocr",
-                "version": "3.0",
-                "github_url": "https://github.com/PaddlePaddle/PaddleOCR",
-                "huggingface_url": "https://huggingface.co/PaddlePaddle/PaddleOCR",
-                "use_case": "Text extraction from images",
-                "input_format": "image",
-                "output_format": "text_with_coordinates",
-                "languages": ["en", "ch", "multilingual"],
-                "gpu_memory_mb": 2048,
-                "inference_time_ms": 200
-            }
-        },
-        {
-            "model_id": "yolov8n-fallback",
-            "repo_id": "ultralytics/yolov8",
-            "model_type": ModelType.VISION,
-            "capabilities": [
-                ModelCapability.IMAGE_ANALYSIS,
-                ModelCapability.UI_DETECTION  # As fallback
-            ],
-            "revision": "main",
-            "metadata": {
-                "description": "YOLOv8 Nano - Fallback object detection model",
-                "provider": "ultralytics",
-                "model_family": "yolo",
-                "version": "8.0",
-                "github_url": "https://github.com/ultralytics/ultralytics",
-                "use_case": "General object detection (fallback for UI elements)",
-                "input_format": "image",
-                "output_format": "bounding_boxes",
-                "gpu_memory_mb": 1024,
-                "inference_time_ms": 50
-            }
-        }
-    ]
-    # Register each model
-    registration_results = []
-    for model_config in models_to_register:
-        print(f"\n📝 Registering {model_config['model_id']}...")
-        try:
-            # Register model in registry (without downloading)
-            success = model_manager.registry.register_model(
-                model_id=model_config['model_id'],
-                model_type=model_config['model_type'],
-                capabilities=model_config['capabilities'],
-                metadata={
-                    **model_config['metadata'],
-                    'repo_id': model_config['repo_id'],
-                    'revision': model_config['revision'],
-                    'registered_at': 'auto',
-                    'download_status': 'not_downloaded'
-                }
-            )
-            if success:
-                print(f"✅ Successfully registered {model_config['model_id']}")
-                registration_results.append({
-                    'model_id': model_config['model_id'],
-                    'status': 'success'
-                })
-            else:
-                print(f"❌ Failed to register {model_config['model_id']}")
-                registration_results.append({
-                    'model_id': model_config['model_id'],
-                    'status': 'failed'
-                })
-        except Exception as e:
-            print(f"❌ Error registering {model_config['model_id']}: {e}")
-            registration_results.append({
-                'model_id': model_config['model_id'],
-                'status': 'error',
-                'error': str(e)
-            })
-    # Print summary
-    print(f"\n📊 Registration Summary:")
-    successful = [r for r in registration_results if r['status'] == 'success']
-    failed = [r for r in registration_results if r['status'] != 'success']
-    print(f"✅ Successfully registered: {len(successful)} models")
-    for result in successful:
-        print(f"   - {result['model_id']}")
-    if failed:
-        print(f"❌ Failed to register: {len(failed)} models")
-        for result in failed:
-            error_msg = f" ({result.get('error', 'unknown error')})" if 'error' in result else ""
-            print(f"   - {result['model_id']}{error_msg}")
-    return registration_results
-async def verify_model_registry():
-    """Verify registered models and their capabilities"""
-    model_manager = ModelManager()
-    print(f"\n🔍 Verifying Model Registry...")
-    # Check models by capability
-    capabilities_to_check = [
-        ModelCapability.UI_DETECTION,
-        ModelCapability.OCR,
-        ModelCapability.TABLE_DETECTION,
-        ModelCapability.TABLE_STRUCTURE_RECOGNITION
-    ]
-    for capability in capabilities_to_check:
-        models = model_manager.registry.get_models_by_capability(capability)
-        print(f"\n📋 Models with {capability.value} capability:")
-        if models:
-            for model_id, model_info in models.items():
-                metadata = model_info.get('metadata', {})
-                version = metadata.get('version', 'unknown')
-                provider = metadata.get('provider', 'unknown')
-                print(f"   ✅ {model_id} (v{version}, {provider})")
-        else:
-            print(f"   ❌ No models found for {capability.value}")
-    # Print overall stats
-    stats = model_manager.registry.get_stats()
-    print(f"\n📈 Registry Statistics:")
-    print(f"   Total models: {stats['total_models']}")
-    print(f"   Models by type: {stats['models_by_type']}")
-    print(f"   Models by capability: {stats['models_by_capability']}")
-def get_model_for_capability(capability: ModelCapability) -> str:
-    """Get the best model for a specific capability"""
-    model_manager = ModelManager()
-    models = model_manager.registry.get_models_by_capability(capability)
-    if not models:
-        return None
-    # Priority order for UI analysis models
-    priority_order = {
-        ModelCapability.UI_DETECTION: [
-            "omniparser-v2.0",
-            "yolov8n-fallback"
-        ],
-        ModelCapability.OCR: [
-            "paddleocr-v3.0"
-        ],
-        ModelCapability.TABLE_DETECTION: [
-            "table-transformer-v1.1-detection"
-        ],
-        ModelCapability.TABLE_STRUCTURE_RECOGNITION: [
-            "table-transformer-v1.1-structure"
-        ]
-    }
-    preferred_models = priority_order.get(capability, [])
-    # Return the first available preferred model
-    for model_id in preferred_models:
-        if model_id in models:
-            return model_id
-    # Fallback to first available model
-    return list(models.keys())[0] if models else None
-async def main():
-    """Main registration workflow"""
-    print("🚀 ISA Model Registry - UI Analysis Models Registration")
-    print("=" * 60)
-    try:
-        # Register models
-        results = await register_ui_analysis_models()
-        # Verify registration
-        await verify_model_registry()
-        print(f"\n🎉 Model registration completed!")
-        print(f"   Use ModelManager.get_model() to download and use models")
-        print(f"   Use get_model_for_capability() to get recommended models")
-        # Show usage example
-        print(f"\n💡 Usage Example:")
-        print(f"   from isa_model.core.model_manager import ModelManager")
-        print(f"   from isa_model.core.model_repo import ModelCapability")
-        print(f"   ")
-        print(f"   manager = ModelManager()")
-        print(f"   ui_model_path = await manager.get_model(")
-        print(f"       model_id='omniparser-v2.0',")
-        print(f"       repo_id='microsoft/OmniParser',")
-        print(f"       model_type=ModelType.VISION,")
-        print(f"       capabilities=[ModelCapability.UI_DETECTION]")
-        print(f"   )")
-    except Exception as e:
-        print(f"❌ Registration failed: {e}")
-        return False
-    return True
-if __name__ == "__main__":
-    asyncio.run(main())

isa_model/deployment/core/deployment_config.py DELETED Viewed

@@ -1,356 +0,0 @@
-"""
-Deployment Configuration Classes
-Defines configuration classes for different deployment scenarios including
-RunPod serverless, Triton inference server, and TensorRT-LLM backend.
-"""
-from dataclasses import dataclass, field
-from typing import Optional, Dict, Any, List
-from enum import Enum
-from pathlib import Path
-class DeploymentProvider(str, Enum):
-    """Deployment providers"""
-    RUNPOD_SERVERLESS = "runpod_serverless"
-    RUNPOD_PODS = "runpod_pods"
-    AWS_LAMBDA = "aws_lambda"
-    GOOGLE_CLOUD_RUN = "google_cloud_run"
-    AZURE_CONTAINER_INSTANCES = "azure_container_instances"
-    LOCAL = "local"
-class InferenceEngine(str, Enum):
-    """Inference engines"""
-    TRITON = "triton"
-    VLLM = "vllm"
-    TENSORRT_LLM = "tensorrt_llm"
-    HUGGINGFACE = "huggingface"
-    ONNX = "onnx"
-    TORCHSCRIPT = "torchscript"
-class ModelFormat(str, Enum):
-    """Model formats for deployment"""
-    HUGGINGFACE = "huggingface"
-    TENSORRT = "tensorrt"
-    ONNX = "onnx"
-    TORCHSCRIPT = "torchscript"
-    SAFETENSORS = "safetensors"
-@dataclass
-class TritonConfig:
-    """Configuration for Triton Inference Server"""
-    # Model repository configuration
-    model_repository: str = "/models"
-    model_name: str = "model"
-    model_version: str = "1"
-    # Backend configuration
-    backend: str = "tensorrtllm"  # tensorrtllm, python, onnxruntime
-    max_batch_size: int = 8
-    max_sequence_length: int = 2048
-    # TensorRT-LLM specific
-    tensorrt_llm_model_dir: str = "/models/tensorrt_llm"
-    engine_dir: str = "/models/engines"
-    tokenizer_dir: str = "/models/tokenizer"
-    # Performance settings
-    instance_group_count: int = 1
-    instance_group_kind: str = "KIND_GPU"  # KIND_GPU, KIND_CPU
-    # Memory settings
-    optimization_level: str = "OPTIMIZATION_LEVEL_ENABLED"
-    enable_pinned_input: bool = True
-    enable_pinned_output: bool = True
-    def to_dict(self) -> Dict[str, Any]:
-        """Convert to dictionary"""
-        return self.__dict__.copy()
-@dataclass
-class RunPodServerlessConfig:
-    """Configuration for RunPod Serverless deployment"""
-    # RunPod settings
-    api_key: str
-    endpoint_id: Optional[str] = None
-    template_id: Optional[str] = None
-    # Container configuration
-    container_image: str = "runpod/pytorch:2.1.0-py3.10-cuda11.8.0-devel-ubuntu22.04"
-    container_disk_in_gb: int = 20
-    # GPU configuration
-    gpu_type: str = "NVIDIA RTX A6000"
-    gpu_count: int = 1
-    # Scaling configuration
-    min_workers: int = 0
-    max_workers: int = 3
-    idle_timeout: int = 5  # seconds
-    # Network configuration
-    network_volume_id: Optional[str] = None
-    # Environment variables
-    env_vars: Dict[str, str] = field(default_factory=dict)
-    def to_dict(self) -> Dict[str, Any]:
-        """Convert to dictionary"""
-        return self.__dict__.copy()
-@dataclass
-class ModelConfig:
-    """Configuration for model deployment"""
-    # Model identification
-    model_id: str
-    model_name: str
-    model_version: str = "1.0.0"
-    # Model source
-    source_type: str = "huggingface"  # huggingface, local, s3, gcs
-    source_path: str = ""
-    # Model format and engine
-    model_format: ModelFormat = ModelFormat.HUGGINGFACE
-    inference_engine: InferenceEngine = InferenceEngine.TRITON
-    # Model metadata
-    model_type: str = "llm"  # llm, embedding, vision, audio
-    capabilities: List[str] = field(default_factory=lambda: ["text_generation"])
-    # Performance configuration
-    max_batch_size: int = 8
-    max_sequence_length: int = 2048
-    dtype: str = "float16"  # float32, float16, int8, int4
-    # Optimization settings
-    use_tensorrt: bool = True
-    use_quantization: bool = False
-    quantization_method: str = "int8"  # int8, int4, awq, gptq
-    def to_dict(self) -> Dict[str, Any]:
-        """Convert to dictionary"""
-        return self.__dict__.copy()
-@dataclass
-class DeploymentConfig:
-    """Main deployment configuration"""
-    # Deployment identification
-    deployment_id: str
-    deployment_name: str
-    description: Optional[str] = None
-    # Provider and engine configuration
-    provider: DeploymentProvider = DeploymentProvider.RUNPOD_SERVERLESS
-    inference_engine: InferenceEngine = InferenceEngine.TRITON
-    # Model configuration
-    model_config: ModelConfig = None
-    # Provider-specific configurations
-    runpod_config: Optional[RunPodServerlessConfig] = None
-    triton_config: Optional[TritonConfig] = None
-    # Health check configuration
-    health_check_path: str = "/health"
-    health_check_timeout: int = 30
-    # Monitoring configuration
-    enable_logging: bool = True
-    log_level: str = "INFO"
-    enable_metrics: bool = True
-    # Networking
-    custom_domain: Optional[str] = None
-    allowed_origins: List[str] = field(default_factory=lambda: ["*"])
-    # Additional settings
-    extra_config: Dict[str, Any] = field(default_factory=dict)
-    def __post_init__(self):
-        """Validate configuration after initialization"""
-        if not self.deployment_id:
-            raise ValueError("deployment_id is required")
-        if not self.deployment_name:
-            raise ValueError("deployment_name is required")
-        if not self.model_config:
-            raise ValueError("model_config is required")
-        # Set default provider configs if not provided
-        if self.provider == DeploymentProvider.RUNPOD_SERVERLESS and not self.runpod_config:
-            self.runpod_config = RunPodServerlessConfig(api_key="")
-        if self.inference_engine == InferenceEngine.TRITON and not self.triton_config:
-            self.triton_config = TritonConfig()
-    def to_dict(self) -> Dict[str, Any]:
-        """Convert config to dictionary"""
-        config_dict = {}
-        for key, value in self.__dict__.items():
-            if key in ['model_config', 'runpod_config', 'triton_config']:
-                if value is not None:
-                    config_dict[key] = value.to_dict()
-                else:
-                    config_dict[key] = None
-            elif isinstance(value, Enum):
-                config_dict[key] = value.value
-            else:
-                config_dict[key] = value
-        return config_dict
-    @classmethod
-    def from_dict(cls, config_dict: Dict[str, Any]) -> 'DeploymentConfig':
-        """Create config from dictionary"""
-        # Handle nested configs
-        if 'model_config' in config_dict and config_dict['model_config'] is not None:
-            config_dict['model_config'] = ModelConfig(**config_dict['model_config'])
-        if 'runpod_config' in config_dict and config_dict['runpod_config'] is not None:
-            config_dict['runpod_config'] = RunPodServerlessConfig(**config_dict['runpod_config'])
-        if 'triton_config' in config_dict and config_dict['triton_config'] is not None:
-            config_dict['triton_config'] = TritonConfig(**config_dict['triton_config'])
-        # Handle enums
-        if 'provider' in config_dict:
-            config_dict['provider'] = DeploymentProvider(config_dict['provider'])
-        if 'inference_engine' in config_dict:
-            config_dict['inference_engine'] = InferenceEngine(config_dict['inference_engine'])
-        return cls(**config_dict)
-# Predefined configurations for common deployment scenarios
-def create_gemma_runpod_triton_config(
-    model_id: str,
-    runpod_api_key: str,
-    model_source_path: str = "xenobordom/gemma-4b-alpaca-v1"
-) -> DeploymentConfig:
-    """
-    Create a deployment configuration for Gemma model on RunPod with Triton + TensorRT-LLM.
-    Args:
-        model_id: Unique identifier for the deployment
-        runpod_api_key: RunPod API key
-        model_source_path: HuggingFace model path or local path
-    Returns:
-        DeploymentConfig for Gemma deployment
-    """
-    model_config = ModelConfig(
-        model_id=model_id,
-        model_name="gemma-4b-alpaca",
-        source_type="huggingface",
-        source_path=model_source_path,
-        model_format=ModelFormat.HUGGINGFACE,
-        inference_engine=InferenceEngine.TRITON,
-        model_type="llm",
-        capabilities=["text_generation", "chat"],
-        max_batch_size=8,
-        max_sequence_length=2048,
-        dtype="float16",
-        use_tensorrt=True
-    )
-    runpod_config = RunPodServerlessConfig(
-        api_key=runpod_api_key,
-        container_image="nvcr.io/nvidia/tritonserver:23.10-trtllm-python-py3",
-        container_disk_in_gb=30,
-        gpu_type="NVIDIA RTX A6000",
-        gpu_count=1,
-        min_workers=0,
-        max_workers=3,
-        idle_timeout=5,
-        env_vars={
-            "TRITON_MODEL_REPOSITORY": "/models",
-            "CUDA_VISIBLE_DEVICES": "0"
-        }
-    )
-    triton_config = TritonConfig(
-        model_repository="/models",
-        model_name="gemma-4b-alpaca",
-        backend="tensorrtllm",
-        max_batch_size=8,
-        max_sequence_length=2048,
-        tensorrt_llm_model_dir="/models/tensorrt_llm",
-        engine_dir="/models/engines",
-        tokenizer_dir="/models/tokenizer"
-    )
-    return DeploymentConfig(
-        deployment_id=f"gemma-deployment-{model_id}",
-        deployment_name=f"Gemma 4B Alpaca - {model_id}",
-        description="Gemma 4B model fine-tuned on Alpaca dataset, deployed with Triton + TensorRT-LLM",
-        provider=DeploymentProvider.RUNPOD_SERVERLESS,
-        inference_engine=InferenceEngine.TRITON,
-        model_config=model_config,
-        runpod_config=runpod_config,
-        triton_config=triton_config
-    )
-def create_local_triton_config(
-    model_id: str,
-    model_source_path: str,
-    triton_model_repository: str = "./models/triton"
-) -> DeploymentConfig:
-    """
-    Create a deployment configuration for local Triton deployment.
-    Args:
-        model_id: Unique identifier for the deployment
-        model_source_path: Path to the model
-        triton_model_repository: Path to Triton model repository
-    Returns:
-        DeploymentConfig for local deployment
-    """
-    model_config = ModelConfig(
-        model_id=model_id,
-        model_name=f"local-model-{model_id}",
-        source_type="local",
-        source_path=model_source_path,
-        model_format=ModelFormat.HUGGINGFACE,
-        inference_engine=InferenceEngine.TRITON,
-        model_type="llm",
-        capabilities=["text_generation"],
-        max_batch_size=4,
-        max_sequence_length=1024,
-        dtype="float16"
-    )
-    triton_config = TritonConfig(
-        model_repository=triton_model_repository,
-        model_name=f"local-model-{model_id}",
-        backend="python",  # Use Python backend for local development
-        max_batch_size=4,
-        max_sequence_length=1024
-    )
-    return DeploymentConfig(
-        deployment_id=f"local-deployment-{model_id}",
-        deployment_name=f"Local Model - {model_id}",
-        description="Local model deployment for development and testing",
-        provider=DeploymentProvider.LOCAL,
-        inference_engine=InferenceEngine.TRITON,
-        model_config=model_config,
-        triton_config=triton_config
-    )

isa-model 0.3.91__py3-none-any.whl → 0.4.3__py3-none-any.whl

isa-model 0.3.91py3-none-any.whl → 0.4.3py3-none-any.whl