PyPI - isa-model - Versions diffs - 0.4.0__py3-none-any.whl → 0.4.4__py3-none-any.whl - Mend

isa-model 0.4.0py3-none-any.whl → 0.4.4py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (189) hide show

isa_model/client.py +466 -43
isa_model/core/cache/redis_cache.py +12 -3
isa_model/core/config/config_manager.py +230 -3
isa_model/core/config.py +90 -0
isa_model/core/database/direct_db_client.py +114 -0
isa_model/core/database/migration_manager.py +563 -0
isa_model/core/database/migrations.py +21 -1
isa_model/core/database/supabase_client.py +154 -19
isa_model/core/dependencies.py +316 -0
isa_model/core/discovery/__init__.py +19 -0
isa_model/core/discovery/consul_discovery.py +190 -0
isa_model/core/logging/__init__.py +54 -0
isa_model/core/logging/influx_logger.py +523 -0
isa_model/core/logging/loki_logger.py +160 -0
isa_model/core/models/__init__.py +27 -18
isa_model/core/models/config_models.py +625 -0
isa_model/core/models/deployment_billing_tracker.py +430 -0
isa_model/core/models/model_manager.py +35 -80
isa_model/core/models/model_metadata.py +690 -0
isa_model/core/models/model_repo.py +174 -18
isa_model/core/models/system_models.py +857 -0
isa_model/core/repositories/__init__.py +9 -0
isa_model/core/repositories/config_repository.py +912 -0
isa_model/core/services/intelligent_model_selector.py +399 -21
isa_model/core/types.py +1 -0
isa_model/deployment/__init__.py +5 -48
isa_model/deployment/core/__init__.py +2 -31
isa_model/deployment/core/deployment_manager.py +1278 -370
isa_model/deployment/modal/__init__.py +8 -0
isa_model/deployment/modal/config.py +136 -0
isa_model/deployment/{services/auto_hf_modal_deployer.py → modal/deployer.py} +1 -1
isa_model/deployment/modal/services/__init__.py +3 -0
isa_model/deployment/modal/services/audio/__init__.py +1 -0
isa_model/deployment/modal/services/embedding/__init__.py +1 -0
isa_model/deployment/modal/services/llm/__init__.py +1 -0
isa_model/deployment/modal/services/llm/isa_llm_service.py +424 -0
isa_model/deployment/modal/services/video/__init__.py +1 -0
isa_model/deployment/modal/services/vision/__init__.py +1 -0
isa_model/deployment/models/org-org-acme-corp-tenant-a-service-llm-20250825-225822/tenant-a-service_modal_service.py +48 -0
isa_model/deployment/models/org-test-org-123-prefix-test-service-llm-20250825-225822/prefix-test-service_modal_service.py +48 -0
isa_model/deployment/models/test-llm-service-llm-20250825-204442/test-llm-service_modal_service.py +48 -0
isa_model/deployment/models/test-monitoring-gpt2-llm-20250825-212906/test-monitoring-gpt2_modal_service.py +48 -0
isa_model/deployment/models/test-monitoring-gpt2-llm-20250825-213009/test-monitoring-gpt2_modal_service.py +48 -0
isa_model/deployment/storage/__init__.py +5 -0
isa_model/deployment/storage/deployment_repository.py +824 -0
isa_model/deployment/triton/__init__.py +10 -0
isa_model/deployment/triton/config.py +196 -0
isa_model/deployment/triton/configs/__init__.py +1 -0
isa_model/deployment/triton/provider.py +512 -0
isa_model/deployment/triton/scripts/__init__.py +1 -0
isa_model/deployment/triton/templates/__init__.py +1 -0
isa_model/inference/__init__.py +47 -1
isa_model/inference/ai_factory.py +137 -10
isa_model/inference/legacy_services/__init__.py +21 -0
isa_model/inference/legacy_services/model_evaluation.py +637 -0
isa_model/inference/legacy_services/model_service.py +573 -0
isa_model/inference/legacy_services/model_serving.py +717 -0
isa_model/inference/legacy_services/model_training.py +561 -0
isa_model/inference/models/__init__.py +21 -0
isa_model/inference/models/inference_config.py +551 -0
isa_model/inference/models/inference_record.py +675 -0
isa_model/inference/models/performance_models.py +714 -0
isa_model/inference/repositories/__init__.py +9 -0
isa_model/inference/repositories/inference_repository.py +828 -0
isa_model/inference/services/audio/base_stt_service.py +184 -11
isa_model/inference/services/audio/openai_stt_service.py +22 -6
isa_model/inference/services/embedding/ollama_embed_service.py +15 -3
isa_model/inference/services/embedding/resilient_embed_service.py +285 -0
isa_model/inference/services/llm/__init__.py +10 -2
isa_model/inference/services/llm/base_llm_service.py +335 -24
isa_model/inference/services/llm/cerebras_llm_service.py +628 -0
isa_model/inference/services/llm/helpers/llm_adapter.py +9 -4
isa_model/inference/services/llm/helpers/llm_prompts.py +342 -0
isa_model/inference/services/llm/helpers/llm_utils.py +321 -23
isa_model/inference/services/llm/huggingface_llm_service.py +581 -0
isa_model/inference/services/llm/ollama_llm_service.py +9 -2
isa_model/inference/services/llm/openai_llm_service.py +33 -16
isa_model/inference/services/llm/yyds_llm_service.py +8 -2
isa_model/inference/services/vision/__init__.py +22 -1
isa_model/inference/services/vision/helpers/image_utils.py +8 -5
isa_model/inference/services/vision/isa_vision_service.py +65 -4
isa_model/inference/services/vision/openai_vision_service.py +19 -10
isa_model/inference/services/vision/vgg16_vision_service.py +257 -0
isa_model/serving/api/cache_manager.py +245 -0
isa_model/serving/api/dependencies/__init__.py +1 -0
isa_model/serving/api/dependencies/auth.py +194 -0
isa_model/serving/api/dependencies/database.py +139 -0
isa_model/serving/api/error_handlers.py +284 -0
isa_model/serving/api/fastapi_server.py +172 -22
isa_model/serving/api/middleware/auth.py +8 -2
isa_model/serving/api/middleware/security.py +23 -33
isa_model/serving/api/middleware/tenant_context.py +414 -0
isa_model/serving/api/routes/analytics.py +4 -1
isa_model/serving/api/routes/config.py +645 -0
isa_model/serving/api/routes/deployment_billing.py +315 -0
isa_model/serving/api/routes/deployments.py +138 -2
isa_model/serving/api/routes/gpu_gateway.py +440 -0
isa_model/serving/api/routes/health.py +32 -12
isa_model/serving/api/routes/inference_monitoring.py +486 -0
isa_model/serving/api/routes/local_deployments.py +448 -0
isa_model/serving/api/routes/tenants.py +575 -0
isa_model/serving/api/routes/unified.py +680 -18
isa_model/serving/api/routes/webhooks.py +479 -0
isa_model/serving/api/startup.py +68 -54
isa_model/utils/gpu_utils.py +311 -0
{isa_model-0.4.0.dist-info → isa_model-0.4.4.dist-info}/METADATA +71 -24
isa_model-0.4.4.dist-info/RECORD +180 -0
isa_model/core/security/secrets.py +0 -358
isa_model/core/storage/hf_storage.py +0 -419
isa_model/core/storage/minio_storage.py +0 -0
isa_model/deployment/cloud/__init__.py +0 -9
isa_model/deployment/cloud/modal/__init__.py +0 -10
isa_model/deployment/core/deployment_config.py +0 -356
isa_model/deployment/core/isa_deployment_service.py +0 -401
isa_model/deployment/gpu_int8_ds8/app/server.py +0 -66
isa_model/deployment/gpu_int8_ds8/scripts/test_client.py +0 -43
isa_model/deployment/gpu_int8_ds8/scripts/test_client_os.py +0 -35
isa_model/deployment/runtime/deployed_service.py +0 -338
isa_model/deployment/services/__init__.py +0 -9
isa_model/deployment/services/auto_deploy_vision_service.py +0 -538
isa_model/deployment/services/model_service.py +0 -332
isa_model/deployment/services/service_monitor.py +0 -356
isa_model/deployment/services/service_registry.py +0 -527
isa_model/eval/__init__.py +0 -92
isa_model/eval/benchmarks/__init__.py +0 -27
isa_model/eval/benchmarks/multimodal_datasets.py +0 -460
isa_model/eval/benchmarks.py +0 -701
isa_model/eval/config/__init__.py +0 -10
isa_model/eval/config/evaluation_config.py +0 -108
isa_model/eval/evaluators/__init__.py +0 -24
isa_model/eval/evaluators/audio_evaluator.py +0 -727
isa_model/eval/evaluators/base_evaluator.py +0 -503
isa_model/eval/evaluators/embedding_evaluator.py +0 -742
isa_model/eval/evaluators/llm_evaluator.py +0 -472
isa_model/eval/evaluators/vision_evaluator.py +0 -564
isa_model/eval/example_evaluation.py +0 -395
isa_model/eval/factory.py +0 -798
isa_model/eval/infrastructure/__init__.py +0 -24
isa_model/eval/infrastructure/experiment_tracker.py +0 -466
isa_model/eval/isa_benchmarks.py +0 -700
isa_model/eval/isa_integration.py +0 -582
isa_model/eval/metrics.py +0 -951
isa_model/eval/tests/unit/test_basic.py +0 -396
isa_model/serving/api/routes/evaluations.py +0 -579
isa_model/training/__init__.py +0 -168
isa_model/training/annotation/annotation_schema.py +0 -47
isa_model/training/annotation/processors/annotation_processor.py +0 -126
isa_model/training/annotation/storage/dataset_manager.py +0 -131
isa_model/training/annotation/storage/dataset_schema.py +0 -44
isa_model/training/annotation/tests/test_annotation_flow.py +0 -109
isa_model/training/annotation/tests/test_minio copy.py +0 -113
isa_model/training/annotation/tests/test_minio_upload.py +0 -43
isa_model/training/annotation/views/annotation_controller.py +0 -158
isa_model/training/cloud/__init__.py +0 -22
isa_model/training/cloud/job_orchestrator.py +0 -402
isa_model/training/cloud/runpod_trainer.py +0 -454
isa_model/training/cloud/storage_manager.py +0 -482
isa_model/training/core/__init__.py +0 -26
isa_model/training/core/config.py +0 -181
isa_model/training/core/dataset.py +0 -222
isa_model/training/core/trainer.py +0 -720
isa_model/training/core/utils.py +0 -213
isa_model/training/examples/intelligent_training_example.py +0 -281
isa_model/training/factory.py +0 -424
isa_model/training/intelligent/__init__.py +0 -25
isa_model/training/intelligent/decision_engine.py +0 -643
isa_model/training/intelligent/intelligent_factory.py +0 -888
isa_model/training/intelligent/knowledge_base.py +0 -751
isa_model/training/intelligent/resource_optimizer.py +0 -839
isa_model/training/intelligent/task_classifier.py +0 -576
isa_model/training/storage/__init__.py +0 -24
isa_model/training/storage/core_integration.py +0 -439
isa_model/training/storage/training_repository.py +0 -552
isa_model/training/storage/training_storage.py +0 -628
isa_model-0.4.0.dist-info/RECORD +0 -182
/isa_model/deployment/{cloud/modal → modal/services/audio}/isa_audio_chatTTS_service.py +0 -0
/isa_model/deployment/{cloud/modal → modal/services/audio}/isa_audio_fish_service.py +0 -0
/isa_model/deployment/{cloud/modal → modal/services/audio}/isa_audio_openvoice_service.py +0 -0
/isa_model/deployment/{cloud/modal → modal/services/audio}/isa_audio_service_v2.py +0 -0
/isa_model/deployment/{cloud/modal → modal/services/embedding}/isa_embed_rerank_service.py +0 -0
/isa_model/deployment/{cloud/modal → modal/services/video}/isa_video_hunyuan_service.py +0 -0
/isa_model/deployment/{cloud/modal → modal/services/vision}/isa_vision_ocr_service.py +0 -0
/isa_model/deployment/{cloud/modal → modal/services/vision}/isa_vision_qwen25_service.py +0 -0
/isa_model/deployment/{cloud/modal → modal/services/vision}/isa_vision_table_service.py +0 -0
/isa_model/deployment/{cloud/modal → modal/services/vision}/isa_vision_ui_service.py +0 -0
/isa_model/deployment/{cloud/modal → modal/services/vision}/isa_vision_ui_service_optimized.py +0 -0
/isa_model/deployment/{services → modal/services/vision}/simple_auto_deploy_vision_service.py +0 -0
{isa_model-0.4.0.dist-info → isa_model-0.4.4.dist-info}/WHEEL +0 -0
{isa_model-0.4.0.dist-info → isa_model-0.4.4.dist-info}/top_level.txt +0 -0

isa_model/deployment/modal/__init__.py ADDED Viewed

@@ -0,0 +1,8 @@
+"""
+Modal deployment services and utilities
+"""
+from .deployer import ModalDeployer
+from .config import ModalConfig, ModalServiceType, create_llm_config, create_vision_config, create_audio_config, create_embedding_config
+__all__ = ["ModalDeployer", "ModalConfig", "ModalServiceType", "create_llm_config", "create_vision_config", "create_audio_config", "create_embedding_config"]

isa_model/deployment/modal/config.py ADDED Viewed

@@ -0,0 +1,136 @@
+"""
+Modal deployment configuration
+Simplified configuration for Modal-specific deployments.
+"""
+from dataclasses import dataclass, field
+from typing import Dict, Any, Optional
+from enum import Enum
+class ModalServiceType(Enum):
+    """Modal service types"""
+    LLM = "llm"
+    VISION = "vision"
+    AUDIO = "audio"
+    EMBEDDING = "embedding"
+    VIDEO = "video"
+@dataclass
+class ModalConfig:
+    """Configuration for Modal deployment"""
+    # Service identification
+    service_name: str
+    service_type: ModalServiceType
+    model_id: str
+    # Modal-specific settings
+    image_tag: str = "latest"
+    cpu_cores: int = 2
+    memory_gb: int = 8
+    gpu_type: Optional[str] = None  # e.g., "A10G", "T4", "A100"
+    timeout_seconds: int = 300
+    # Scaling configuration
+    min_instances: int = 0
+    max_instances: int = 10
+    concurrency_limit: int = 1
+    # Environment variables
+    environment: Dict[str, str] = field(default_factory=dict)
+    # Service-specific configuration
+    service_config: Dict[str, Any] = field(default_factory=dict)
+    def to_dict(self) -> Dict[str, Any]:
+        """Convert to dictionary for serialization"""
+        return {
+            "service_name": self.service_name,
+            "service_type": self.service_type.value,
+            "model_id": self.model_id,
+            "image_tag": self.image_tag,
+            "cpu_cores": self.cpu_cores,
+            "memory_gb": self.memory_gb,
+            "gpu_type": self.gpu_type,
+            "timeout_seconds": self.timeout_seconds,
+            "min_instances": self.min_instances,
+            "max_instances": self.max_instances,
+            "concurrency_limit": self.concurrency_limit,
+            "environment": self.environment,
+            "service_config": self.service_config
+        }
+    @classmethod
+    def from_dict(cls, data: Dict[str, Any]) -> "ModalConfig":
+        """Create from dictionary"""
+        return cls(
+            service_name=data["service_name"],
+            service_type=ModalServiceType(data["service_type"]),
+            model_id=data["model_id"],
+            image_tag=data.get("image_tag", "latest"),
+            cpu_cores=data.get("cpu_cores", 2),
+            memory_gb=data.get("memory_gb", 8),
+            gpu_type=data.get("gpu_type"),
+            timeout_seconds=data.get("timeout_seconds", 300),
+            min_instances=data.get("min_instances", 0),
+            max_instances=data.get("max_instances", 10),
+            concurrency_limit=data.get("concurrency_limit", 1),
+            environment=data.get("environment", {}),
+            service_config=data.get("service_config", {})
+        )
+# Predefined configurations for common service types
+def create_llm_config(service_name: str, model_id: str, gpu_type: str = "A10G") -> ModalConfig:
+    """Create configuration for LLM service"""
+    return ModalConfig(
+        service_name=service_name,
+        service_type=ModalServiceType.LLM,
+        model_id=model_id,
+        gpu_type=gpu_type,
+        memory_gb=16,
+        timeout_seconds=600,
+        max_instances=5
+    )
+def create_vision_config(service_name: str, model_id: str, gpu_type: str = "T4") -> ModalConfig:
+    """Create configuration for vision service"""
+    return ModalConfig(
+        service_name=service_name,
+        service_type=ModalServiceType.VISION,
+        model_id=model_id,
+        gpu_type=gpu_type,
+        memory_gb=12,
+        timeout_seconds=300,
+        max_instances=10
+    )
+def create_audio_config(service_name: str, model_id: str, gpu_type: str = "T4") -> ModalConfig:
+    """Create configuration for audio service"""
+    return ModalConfig(
+        service_name=service_name,
+        service_type=ModalServiceType.AUDIO,
+        model_id=model_id,
+        gpu_type=gpu_type,
+        memory_gb=8,
+        timeout_seconds=300,
+        max_instances=8
+    )
+def create_embedding_config(service_name: str, model_id: str, gpu_type: str = "T4") -> ModalConfig:
+    """Create configuration for embedding service"""
+    return ModalConfig(
+        service_name=service_name,
+        service_type=ModalServiceType.EMBEDDING,
+        model_id=model_id,
+        gpu_type=gpu_type,
+        memory_gb=6,
+        timeout_seconds=120,
+        max_instances=15
+    )

isa_model/deployment/{services/auto_hf_modal_deployer.py → modal/deployer.py} RENAMED Viewed

@@ -34,7 +34,7 @@ class ModelConfig:
     max_tokens: int = 2048
     estimated_cost_per_hour: float = 0.0
-class HuggingFaceModalDeployer:
+class ModalDeployer:
     """
     Service to automatically deploy HuggingFace models to Modal
     """

isa_model/deployment/modal/services/__init__.py ADDED Viewed

@@ -0,0 +1,3 @@
+"""
+Modal service implementations organized by capability
+"""

isa_model/deployment/modal/services/audio/__init__.py ADDED Viewed

	@@ -0,0 +1 @@
1	+ """Audio services for Modal deployment"""

isa_model/deployment/modal/services/embedding/__init__.py ADDED Viewed

	@@ -0,0 +1 @@
1	+ """Embedding services for Modal deployment"""

isa_model/deployment/modal/services/llm/__init__.py ADDED Viewed

	@@ -0,0 +1 @@
1	+ """LLM services for Modal deployment"""

isa_model/deployment/modal/services/llm/isa_llm_service.py ADDED Viewed

@@ -0,0 +1,424 @@
+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+"""
+ISA LLM Service - Modal deployment for HuggingFace trained models
+Provides inference API for custom trained models
+"""
+import os
+import logging
+from typing import Dict, Any, List, Optional
+import modal
+# Modal app configuration
+app = modal.App("isa-llm-inference")
+# GPU configuration for inference
+GPU_CONFIG = modal.gpu.A10G()
+# Base image with HuggingFace transformers
+image = (
+    modal.Image.debian_slim(python_version="3.11")
+    .pip_install([
+        "torch>=2.0.0",
+        "transformers>=4.35.0",
+        "accelerate>=0.20.0",
+        "huggingface_hub>=0.17.0",
+        "peft>=0.5.0",  # For LoRA models
+        "bitsandbytes>=0.41.0",  # For quantization
+        "sentencepiece>=0.1.99",  # For tokenizers
+    ])
+)
+logger = logging.getLogger(__name__)
+@app.cls(
+    image=image,
+    gpu=GPU_CONFIG,
+    cpu=2.0,
+    memory=16384,  # 16GB memory
+    timeout=300,   # 5 minute timeout
+    container_idle_timeout=60,  # Keep warm for 1 minute
+    allow_concurrent_inputs=5,  # Allow concurrent requests
+)
+class ISALLMService:
+    """
+    ISA LLM Service for inference on HuggingFace trained models
+    Designed to work with models trained through ISA training pipeline
+    """
+    def __init__(self):
+        """Initialize the service (runs on container startup)"""
+        import torch
+        from transformers import AutoTokenizer, AutoModelForCausalLM
+        # Model will be loaded when first requested
+        self.model = None
+        self.tokenizer = None
+        self.current_model_id = None
+        self.device = "cuda" if torch.cuda.is_available() else "cpu"
+        logger.info(f"ISA LLM Service initialized on {self.device}")
+    def _load_model(self, model_id: str, hf_token: str = None):
+        """Load a specific model"""
+        import torch
+        from transformers import AutoTokenizer, AutoModelForCausalLM
+        if self.current_model_id == model_id and self.model is not None:
+            logger.info(f"Model {model_id} already loaded")
+            return
+        logger.info(f"Loading model: {model_id}")
+        try:
+            # Load tokenizer
+            self.tokenizer = AutoTokenizer.from_pretrained(
+                model_id,
+                token=hf_token,
+                trust_remote_code=True
+            )
+            # Set pad token if not exists
+            if self.tokenizer.pad_token is None:
+                self.tokenizer.pad_token = self.tokenizer.eos_token
+            # Load model with GPU optimization
+            self.model = AutoModelForCausalLM.from_pretrained(
+                model_id,
+                token=hf_token,
+                torch_dtype=torch.float16,
+                device_map="auto",
+                trust_remote_code=True,
+                low_cpu_mem_usage=True
+            )
+            self.current_model_id = model_id
+            logger.info(f"Successfully loaded model {model_id}")
+        except Exception as e:
+            logger.error(f"Failed to load model {model_id}: {e}")
+            raise
+    @modal.method
+    def generate_text(
+        self,
+        prompt: str,
+        model_id: str,
+        hf_token: str = None,
+        max_length: int = 100,
+        temperature: float = 0.7,
+        do_sample: bool = True,
+        top_p: float = 0.9,
+        repetition_penalty: float = 1.1,
+        **kwargs
+    ) -> Dict[str, Any]:
+        """
+        Generate text using the specified model
+        Args:
+            prompt: Input text prompt
+            model_id: HuggingFace model ID (e.g., "xenobordom/dialogpt-isa-trained-xxx")
+            hf_token: HuggingFace token for private models
+            max_length: Maximum generation length
+            temperature: Sampling temperature
+            do_sample: Whether to use sampling
+            top_p: Top-p sampling parameter
+            repetition_penalty: Repetition penalty
+            **kwargs: Additional generation parameters
+        Returns:
+            Dictionary containing generated text and metadata
+        """
+        import torch
+        import time
+        start_time = time.time()
+        try:
+            # Load model if needed
+            self._load_model(model_id, hf_token)
+            if self.model is None or self.tokenizer is None:
+                raise RuntimeError("Model not properly loaded")
+            # Tokenize input
+            inputs = self.tokenizer(
+                prompt,
+                return_tensors="pt",
+                padding=True,
+                truncation=True,
+                max_length=512
+            ).to(self.device)
+            # Generate
+            with torch.no_grad():
+                outputs = self.model.generate(
+                    **inputs,
+                    max_length=inputs["input_ids"].shape[1] + max_length,
+                    temperature=temperature,
+                    do_sample=do_sample,
+                    top_p=top_p,
+                    repetition_penalty=repetition_penalty,
+                    pad_token_id=self.tokenizer.pad_token_id,
+                    eos_token_id=self.tokenizer.eos_token_id,
+                    **kwargs
+                )
+            # Decode generated text
+            full_text = self.tokenizer.decode(
+                outputs[0],
+                skip_special_tokens=True,
+                clean_up_tokenization_spaces=True
+            )
+            # Extract only the new generated part
+            generated_text = full_text
+            if generated_text.startswith(prompt):
+                generated_text = generated_text[len(prompt):].strip()
+            processing_time = time.time() - start_time
+            return {
+                "success": True,
+                "text": generated_text,
+                "full_text": full_text,
+                "prompt": prompt,
+                "model_id": model_id,
+                "provider": "ISA",
+                "service": "isa-llm",
+                "generation_config": {
+                    "max_length": max_length,
+                    "temperature": temperature,
+                    "do_sample": do_sample,
+                    "top_p": top_p,
+                    "repetition_penalty": repetition_penalty
+                },
+                "metadata": {
+                    "processing_time": processing_time,
+                    "device": str(self.device),
+                    "input_tokens": inputs["input_ids"].shape[1],
+                    "output_tokens": outputs.shape[1]
+                }
+            }
+        except Exception as e:
+            logger.error(f"Error during text generation: {e}")
+            return {
+                "success": False,
+                "error": str(e),
+                "prompt": prompt,
+                "model_id": model_id,
+                "provider": "ISA",
+                "service": "isa-llm"
+            }
+    @modal.method
+    def chat_completion(
+        self,
+        messages: List[Dict[str, str]],
+        model_id: str,
+        hf_token: str = None,
+        **kwargs
+    ) -> Dict[str, Any]:
+        """
+        Chat completion with conversation history
+        Args:
+            messages: List of message dictionaries with 'role' and 'content'
+            model_id: HuggingFace model ID
+            hf_token: HuggingFace token
+            **kwargs: Additional generation parameters
+        Returns:
+            Dictionary containing generated response and metadata
+        """
+        try:
+            # Convert messages to a single prompt
+            conversation = ""
+            for msg in messages:
+                role = msg.get("role", "user")
+                content = msg.get("content", "")
+                if role == "user":
+                    conversation += f"User: {content}\n"
+                elif role == "assistant":
+                    conversation += f"Assistant: {content}\n"
+                elif role == "system":
+                    conversation += f"System: {content}\n"
+            conversation += "Assistant: "
+            # Generate response
+            result = self.generate_text(
+                prompt=conversation,
+                model_id=model_id,
+                hf_token=hf_token,
+                **kwargs
+            )
+            # Format as chat response
+            if result.get("success"):
+                result["role"] = "assistant"
+                result["conversation"] = conversation
+                result["messages"] = messages
+            return result
+        except Exception as e:
+            logger.error(f"Error during chat completion: {e}")
+            return {
+                "success": False,
+                "error": str(e),
+                "messages": messages,
+                "model_id": model_id,
+                "provider": "ISA",
+                "service": "isa-llm"
+            }
+    @modal.method
+    def get_model_info(self, model_id: str, hf_token: str = None) -> Dict[str, Any]:
+        """Get information about the loaded model"""
+        try:
+            # Load model if needed
+            self._load_model(model_id, hf_token)
+            if self.model is None:
+                return {
+                    "success": False,
+                    "error": "Model not loaded"
+                }
+            # Get model config
+            config = self.model.config
+            # Count parameters
+            total_params = sum(p.numel() for p in self.model.parameters())
+            trainable_params = sum(p.numel() for p in self.model.parameters() if p.requires_grad)
+            return {
+                "success": True,
+                "model_id": model_id,
+                "provider": "ISA",
+                "service": "isa-llm",
+                "architecture": config.model_type if hasattr(config, 'model_type') else "unknown",
+                "vocab_size": config.vocab_size if hasattr(config, 'vocab_size') else None,
+                "hidden_size": config.hidden_size if hasattr(config, 'hidden_size') else None,
+                "num_layers": getattr(config, 'num_layers', getattr(config, 'n_layer', None)),
+                "num_attention_heads": getattr(config, 'num_attention_heads', getattr(config, 'n_head', None)),
+                "total_parameters": total_params,
+                "trainable_parameters": trainable_params,
+                "device": str(self.device),
+                "dtype": str(next(self.model.parameters()).dtype)
+            }
+        except Exception as e:
+            logger.error(f"Error getting model info: {e}")
+            return {
+                "success": False,
+                "error": str(e)
+            }
+    @modal.method
+    def health_check(self) -> Dict[str, Any]:
+        """Health check endpoint"""
+        import torch
+        try:
+            gpu_available = torch.cuda.is_available()
+            gpu_count = torch.cuda.device_count() if gpu_available else 0
+            return {
+                "success": True,
+                "status": "healthy",
+                "service": "isa-llm",
+                "provider": "ISA",
+                "device": str(self.device),
+                "gpu_available": gpu_available,
+                "gpu_count": gpu_count,
+                "current_model": self.current_model_id,
+                "memory_info": {
+                    "allocated": torch.cuda.memory_allocated() if gpu_available else 0,
+                    "cached": torch.cuda.memory_reserved() if gpu_available else 0
+                } if gpu_available else None
+            }
+        except Exception as e:
+            return {
+                "success": False,
+                "status": "error",
+                "error": str(e)
+            }
+# Deployment functions
+@app.function(
+    image=image,
+    schedule=modal.Cron("0 2 * * *"),  # Deploy daily at 2 AM
+    timeout=300
+)
+def deploy_service():
+    """Deploy the ISA LLM service"""
+    print("ISA LLM Service deployed successfully!")
+    return {"status": "deployed", "service": "isa-llm"}
+# Local testing function
+@app.local_entrypoint()
+def test_service():
+    """Test the ISA LLM service locally"""
+    # Test with our trained model
+    test_model_id = "xenobordom/dialogpt-isa-trained-1755493402"
+    test_prompt = "你好"
+    # Get HF token from environment
+    hf_token = os.getenv("HF_TOKEN")
+    if not hf_token:
+        print("❌ HF_TOKEN not found in environment")
+        return
+    print(f"🧪 Testing ISA LLM Service with model: {test_model_id}")
+    # Create service instance
+    service = ISALLMService()
+    # Test health check
+    print("📋 Testing health check...")
+    health = service.health_check.remote()
+    print(f"Health: {health}")
+    # Test model info
+    print("📊 Testing model info...")
+    info = service.get_model_info.remote(test_model_id, hf_token)
+    print(f"Model info: {info}")
+    # Test text generation
+    print("🤖 Testing text generation...")
+    result = service.generate_text.remote(
+        prompt=test_prompt,
+        model_id=test_model_id,
+        hf_token=hf_token,
+        max_length=30,
+        temperature=0.7
+    )
+    print(f"Generation result: {result}")
+    # Test chat completion
+    print("💬 Testing chat completion...")
+    messages = [
+        {"role": "user", "content": "你好"},
+        {"role": "assistant", "content": "你好！很高兴见到你。"},
+        {"role": "user", "content": "你能帮我做什么？"}
+    ]
+    chat_result = service.chat_completion.remote(
+        messages=messages,
+        model_id=test_model_id,
+        hf_token=hf_token,
+        max_length=30
+    )
+    print(f"Chat result: {chat_result}")
+    print("✅ ISA LLM Service test completed!")
+if __name__ == "__main__":
+    # For local development
+    test_service()

isa_model/deployment/modal/services/video/__init__.py ADDED Viewed

	@@ -0,0 +1 @@
1	+ """Video services for Modal deployment"""

isa_model/deployment/modal/services/vision/__init__.py ADDED Viewed

	@@ -0,0 +1 @@
1	+ """Vision services for Modal deployment"""

isa_model/deployment/models/org-org-acme-corp-tenant-a-service-llm-20250825-225822/tenant-a-service_modal_service.py ADDED Viewed

@@ -0,0 +1,48 @@
+"""
+tenant-a-service LLM Service for Modal
+Auto-generated service for model: gpt2
+Architecture: gpt
+"""
+import modal
+from typing import Dict, Any, List
+app = modal.App("tenant-a-service")
+image = modal.Image.debian_slim().pip_install(
+    "accelerate>=0.24.0", "transformers>=4.35.0", "httpx>=0.26.0", "torch>=2.0.0", "requests>=2.31.0", "numpy>=1.24.0", "pydantic>=2.0.0"
+)
+@app.cls(
+    image=image,
+    gpu=modal.gpu.A10G(count=1),
+    container_idle_timeout=300,
+    memory=32768
+)
+class Tenant_A_ServiceService:
+    @modal.enter()
+    def load_model(self):
+        import torch
+        from transformers import AutoTokenizer, AutoModelForCausalLM
+        self.tokenizer = AutoTokenizer.from_pretrained("gpt2")
+        self.model = AutoModelForCausalLM.from_pretrained(
+            "gpt2",
+            torch_dtype=torch.float16,
+            device_map="auto",
+            trust_remote_code=True
+        )
+    @modal.method()
+    def generate(self, messages: List[Dict[str, str]], **kwargs):
+        # Generate response (simplified)
+        prompt = messages[-1]["content"] if messages else ""
+        return {"response": f"Generated response for: {prompt}", "model": "gpt2"}
+@app.function(image=image)
+@modal.web_endpoint(method="POST")
+def inference_endpoint(item: Dict[str, Any]):
+    service = Tenant_A_ServiceService()
+    return service.generate(**item)

isa_model/deployment/models/org-test-org-123-prefix-test-service-llm-20250825-225822/prefix-test-service_modal_service.py ADDED Viewed

@@ -0,0 +1,48 @@
+"""
+prefix-test-service LLM Service for Modal
+Auto-generated service for model: gpt2
+Architecture: gpt
+"""
+import modal
+from typing import Dict, Any, List
+app = modal.App("prefix-test-service")
+image = modal.Image.debian_slim().pip_install(
+    "accelerate>=0.24.0", "transformers>=4.35.0", "httpx>=0.26.0", "torch>=2.0.0", "requests>=2.31.0", "numpy>=1.24.0", "pydantic>=2.0.0"
+)
+@app.cls(
+    image=image,
+    gpu=modal.gpu.A10G(count=1),
+    container_idle_timeout=300,
+    memory=32768
+)
+class Prefix_Test_ServiceService:
+    @modal.enter()
+    def load_model(self):
+        import torch
+        from transformers import AutoTokenizer, AutoModelForCausalLM
+        self.tokenizer = AutoTokenizer.from_pretrained("gpt2")
+        self.model = AutoModelForCausalLM.from_pretrained(
+            "gpt2",
+            torch_dtype=torch.float16,
+            device_map="auto",
+            trust_remote_code=True
+        )
+    @modal.method()
+    def generate(self, messages: List[Dict[str, str]], **kwargs):
+        # Generate response (simplified)
+        prompt = messages[-1]["content"] if messages else ""
+        return {"response": f"Generated response for: {prompt}", "model": "gpt2"}
+@app.function(image=image)
+@modal.web_endpoint(method="POST")
+def inference_endpoint(item: Dict[str, Any]):
+    service = Prefix_Test_ServiceService()
+    return service.generate(**item)

isa-model 0.4.0__py3-none-any.whl → 0.4.4__py3-none-any.whl

isa-model 0.4.0py3-none-any.whl → 0.4.4py3-none-any.whl