PyPI - isa-model - Versions diffs - 0.3.9__py3-none-any.whl → 0.4.0__py3-none-any.whl - Mend

isa-model 0.3.9py3-none-any.whl → 0.4.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (124) hide show

isa_model/__init__.py +1 -1
isa_model/client.py +732 -565
isa_model/core/cache/redis_cache.py +401 -0
isa_model/core/config/config_manager.py +53 -10
isa_model/core/config.py +1 -1
isa_model/core/database/__init__.py +1 -0
isa_model/core/database/migrations.py +277 -0
isa_model/core/database/supabase_client.py +123 -0
isa_model/core/models/__init__.py +37 -0
isa_model/core/models/model_billing_tracker.py +60 -88
isa_model/core/models/model_manager.py +36 -18
isa_model/core/models/model_repo.py +44 -38
isa_model/core/models/model_statistics_tracker.py +234 -0
isa_model/core/models/model_storage.py +0 -1
isa_model/core/models/model_version_manager.py +959 -0
isa_model/core/pricing_manager.py +2 -249
isa_model/core/resilience/circuit_breaker.py +366 -0
isa_model/core/security/secrets.py +358 -0
isa_model/core/services/__init__.py +2 -4
isa_model/core/services/intelligent_model_selector.py +101 -370
isa_model/core/storage/hf_storage.py +1 -1
isa_model/core/types.py +7 -0
isa_model/deployment/cloud/modal/isa_audio_chatTTS_service.py +520 -0
isa_model/deployment/cloud/modal/isa_audio_fish_service.py +0 -0
isa_model/deployment/cloud/modal/isa_audio_openvoice_service.py +758 -0
isa_model/deployment/cloud/modal/isa_audio_service_v2.py +1044 -0
isa_model/deployment/cloud/modal/isa_embed_rerank_service.py +296 -0
isa_model/deployment/cloud/modal/isa_video_hunyuan_service.py +423 -0
isa_model/deployment/cloud/modal/isa_vision_ocr_service.py +519 -0
isa_model/deployment/cloud/modal/isa_vision_qwen25_service.py +709 -0
isa_model/deployment/cloud/modal/isa_vision_table_service.py +467 -323
isa_model/deployment/cloud/modal/isa_vision_ui_service.py +607 -180
isa_model/deployment/cloud/modal/isa_vision_ui_service_optimized.py +660 -0
isa_model/deployment/core/deployment_manager.py +6 -4
isa_model/deployment/services/auto_hf_modal_deployer.py +894 -0
isa_model/eval/benchmarks/__init__.py +27 -0
isa_model/eval/benchmarks/multimodal_datasets.py +460 -0
isa_model/eval/benchmarks.py +244 -12
isa_model/eval/evaluators/__init__.py +8 -2
isa_model/eval/evaluators/audio_evaluator.py +727 -0
isa_model/eval/evaluators/embedding_evaluator.py +742 -0
isa_model/eval/evaluators/vision_evaluator.py +564 -0
isa_model/eval/example_evaluation.py +395 -0
isa_model/eval/factory.py +272 -5
isa_model/eval/isa_benchmarks.py +700 -0
isa_model/eval/isa_integration.py +582 -0
isa_model/eval/metrics.py +159 -6
isa_model/eval/tests/unit/test_basic.py +396 -0
isa_model/inference/ai_factory.py +44 -8
isa_model/inference/services/audio/__init__.py +21 -0
isa_model/inference/services/audio/base_realtime_service.py +225 -0
isa_model/inference/services/audio/isa_tts_service.py +0 -0
isa_model/inference/services/audio/openai_realtime_service.py +320 -124
isa_model/inference/services/audio/openai_stt_service.py +32 -6
isa_model/inference/services/base_service.py +17 -1
isa_model/inference/services/embedding/__init__.py +13 -0
isa_model/inference/services/embedding/base_embed_service.py +111 -8
isa_model/inference/services/embedding/isa_embed_service.py +305 -0
isa_model/inference/services/embedding/openai_embed_service.py +2 -4
isa_model/inference/services/embedding/tests/test_embedding.py +222 -0
isa_model/inference/services/img/__init__.py +2 -2
isa_model/inference/services/img/base_image_gen_service.py +24 -7
isa_model/inference/services/img/replicate_image_gen_service.py +84 -422
isa_model/inference/services/img/services/replicate_face_swap.py +193 -0
isa_model/inference/services/img/services/replicate_flux.py +226 -0
isa_model/inference/services/img/services/replicate_flux_kontext.py +219 -0
isa_model/inference/services/img/services/replicate_sticker_maker.py +249 -0
isa_model/inference/services/img/tests/test_img_client.py +297 -0
isa_model/inference/services/llm/base_llm_service.py +30 -6
isa_model/inference/services/llm/helpers/llm_adapter.py +63 -9
isa_model/inference/services/llm/ollama_llm_service.py +2 -1
isa_model/inference/services/llm/openai_llm_service.py +652 -55
isa_model/inference/services/llm/yyds_llm_service.py +2 -1
isa_model/inference/services/vision/__init__.py +5 -5
isa_model/inference/services/vision/base_vision_service.py +118 -185
isa_model/inference/services/vision/helpers/image_utils.py +11 -5
isa_model/inference/services/vision/isa_vision_service.py +573 -0
isa_model/inference/services/vision/tests/test_ocr_client.py +284 -0
isa_model/serving/api/fastapi_server.py +88 -16
isa_model/serving/api/middleware/auth.py +311 -0
isa_model/serving/api/middleware/security.py +278 -0
isa_model/serving/api/routes/analytics.py +486 -0
isa_model/serving/api/routes/deployments.py +339 -0
isa_model/serving/api/routes/evaluations.py +579 -0
isa_model/serving/api/routes/logs.py +430 -0
isa_model/serving/api/routes/settings.py +582 -0
isa_model/serving/api/routes/unified.py +324 -165
isa_model/serving/api/startup.py +304 -0
isa_model/serving/modal_proxy_server.py +249 -0
isa_model/training/__init__.py +100 -6
isa_model/training/core/__init__.py +4 -1
isa_model/training/examples/intelligent_training_example.py +281 -0
isa_model/training/intelligent/__init__.py +25 -0
isa_model/training/intelligent/decision_engine.py +643 -0
isa_model/training/intelligent/intelligent_factory.py +888 -0
isa_model/training/intelligent/knowledge_base.py +751 -0
isa_model/training/intelligent/resource_optimizer.py +839 -0
isa_model/training/intelligent/task_classifier.py +576 -0
isa_model/training/storage/__init__.py +24 -0
isa_model/training/storage/core_integration.py +439 -0
isa_model/training/storage/training_repository.py +552 -0
isa_model/training/storage/training_storage.py +628 -0
{isa_model-0.3.9.dist-info → isa_model-0.4.0.dist-info}/METADATA +13 -1
isa_model-0.4.0.dist-info/RECORD +182 -0
isa_model/deployment/cloud/modal/isa_vision_doc_service.py +0 -766
isa_model/deployment/cloud/modal/register_models.py +0 -321
isa_model/inference/adapter/unified_api.py +0 -248
isa_model/inference/services/helpers/stacked_config.py +0 -148
isa_model/inference/services/img/flux_professional_service.py +0 -603
isa_model/inference/services/img/helpers/base_stacked_service.py +0 -274
isa_model/inference/services/others/table_transformer_service.py +0 -61
isa_model/inference/services/vision/doc_analysis_service.py +0 -640
isa_model/inference/services/vision/helpers/base_stacked_service.py +0 -274
isa_model/inference/services/vision/ui_analysis_service.py +0 -823
isa_model/scripts/inference_tracker.py +0 -283
isa_model/scripts/mlflow_manager.py +0 -379
isa_model/scripts/model_registry.py +0 -465
isa_model/scripts/register_models.py +0 -370
isa_model/scripts/register_models_with_embeddings.py +0 -510
isa_model/scripts/start_mlflow.py +0 -95
isa_model/scripts/training_tracker.py +0 -257
isa_model-0.3.9.dist-info/RECORD +0 -138
{isa_model-0.3.9.dist-info → isa_model-0.4.0.dist-info}/WHEEL +0 -0
{isa_model-0.3.9.dist-info → isa_model-0.4.0.dist-info}/top_level.txt +0 -0

isa_model/deployment/cloud/modal/isa_vision_table_service.py CHANGED Viewed

@@ -1,7 +1,8 @@
 """
-Qwen2.5-VL-32B Table Data Extraction Service
+ISA Vision Table Service
-Specialized service for table data extraction using Qwen2.5-VL-32B-Instruct-AWQ
+Specialized service for table detection and structure recognition using Microsoft Table Transformer
+Combines table detection and structure recognition for comprehensive table processing
 """
 import modal
@@ -17,48 +18,64 @@ import os
 import logging
 # Define Modal application
-app = modal.App("qwen-vision-table")
+app = modal.App("isa-vision-table")
-# Download Qwen2.5-VL model
-def download_qwen_model():
-    """Download Qwen2.5-VL-32B-Instruct-AWQ model"""
+# Download Table Transformer models
+def download_table_transformer_models():
+    """Download Microsoft Table Transformer models"""
     from huggingface_hub import snapshot_download
-    print("📦 Downloading Qwen2.5-VL-32B-Instruct-AWQ...")
+    print("=� Downloading Microsoft Table Transformer models...")
     os.makedirs("/models", exist_ok=True)
     try:
+        # Download Table Detection model
+        print("<� Downloading Table Transformer Detection model...")
         snapshot_download(
-            repo_id="Qwen/Qwen2.5-VL-32B-Instruct-AWQ",
-            local_dir="/models/qwen2.5-vl-32b-awq",
-            allow_patterns=["**/*.safetensors", "**/*.json", "**/*.py", "**/*.txt"],
-            # Use auth token if needed for gated models
-            # token=os.getenv("HF_TOKEN")
+            repo_id="microsoft/table-transformer-detection",
+            local_dir="/models/table-transformer-detection",
+            allow_patterns=["**/*.bin", "**/*.json", "**/*.safetensors"]
         )
-        print("✅ Qwen2.5-VL-32B-Instruct-AWQ downloaded")
+        print(" Table Detection model downloaded")
+        # Download Table Structure Recognition model (v1.1)
+        print("=� Downloading Table Transformer Structure Recognition v1.1...")
+        snapshot_download(
+            repo_id="microsoft/table-transformer-structure-recognition-v1.1-all",
+            local_dir="/models/table-transformer-structure",
+            allow_patterns=["**/*.bin", "**/*.json", "**/*.safetensors"]
+        )
+        print(" Table Structure Recognition model downloaded")
     except Exception as e:
-        print(f"⚠️ Model download failed: {e}")
-        raise
+        print(f"� Table Transformer download failed: {e}")
+        # Don't raise - allow service to start with fallback
+        print("� Will use fallback table detection method")
-    print("📦 Model download completed")
+    print(" Table Transformer setup completed")
-# Define Modal container image with AWQ support
+# Define Modal container image
 image = (
     modal.Image.debian_slim(python_version="3.11")
+    .apt_install([
+        # Graphics libraries for image processing
+        "libgl1-mesa-glx",
+        "libglib2.0-0",
+        "libsm6",
+        "libxext6",
+        "libxrender-dev",
+        "libgomp1",
+    ])
     .pip_install([
-        # Core AI libraries with AWQ support
-        "torch>=2.1.0",
-        "torchvision",
-        "transformers>=4.37.0",
-        "accelerate>=0.26.0",
-        "autoawq>=0.2.0",  # AWQ quantization support
+        # Core AI libraries
+        "torch>=2.0.0",
+        "torchvision",
+        "transformers>=4.35.0",
         "huggingface_hub",
+        "accelerate",
-        # Qwen-VL specific dependencies
-        "qwen-vl-utils",  # If available
-        "tiktoken",
-        "einops",
-        "timm",
+        # Table Transformer specific dependencies
+        "timm",  # Required for DETR backbone
         # Image processing
         "pillow>=10.0.1",
@@ -73,320 +90,477 @@ image = (
         "pydantic>=2.0.0",
         "python-dotenv",
     ])
-    .run_function(download_qwen_model)
+    .run_function(download_table_transformer_models)
     .env({
         "TRANSFORMERS_CACHE": "/models",
+        "TORCH_HOME": "/models/torch",
         "HF_HOME": "/models",
-        "TORCH_HOME": "/models",
     })
 )
-# Table Extraction Service
+# Table Transformer Service - Optimized for T4 GPU
 @app.cls(
-    gpu="A100",  # A100 recommended for 32B model, H100 if available
+    gpu="T4",          # T4 4GB GPU - sufficient for Table Transformer
     image=image,
-    memory=32768,  # 32GB RAM for 32B model
-    timeout=3600,  # 1 hour timeout
+    memory=12288,      # 12GB RAM for table processing
+    timeout=1800,      # 30 minutes
     scaledown_window=60,   # 1 minute idle timeout
     min_containers=0,  # Scale to zero to save costs
-    # secrets=[modal.Secret.from_name("huggingface-token")]  # If needed
+    max_containers=15, # Support up to 15 concurrent containers
 )
-class QwenTableExtractionService:
+class TableTransformerService:
     """
-    Table Data Extraction Service using Qwen2.5-VL-32B-Instruct-AWQ
+    Microsoft Table Transformer Service
-    Provides high-accuracy table extraction from images
+    Provides table detection and structure recognition
+    Cost-effective deployment optimized for document processing
     """
     @modal.enter()
-    def load_model(self):
-        """Load Qwen2.5-VL model on container startup"""
-        print("🚀 Loading Qwen2.5-VL-32B-Instruct-AWQ...")
+    def load_models(self):
+        """Load Table Transformer models on container startup"""
+        print("=� Loading Table Transformer models...")
         start_time = time.time()
-        # Initialize attributes
-        self.model = None
-        self.processor = None
+        # Initialize instance variables
+        self.detection_model = None
+        self.detection_processor = None
+        self.structure_model = None
+        self.structure_processor = None
         self.logger = logging.getLogger(__name__)
+        self.request_count = 0
+        self.total_processing_time = 0.0
         try:
-            from transformers import Qwen2VLForConditionalGeneration, AutoProcessor
+            # Import transformers components
+            from transformers import TableTransformerForObjectDetection, DetrImageProcessor
-            model_path = "Qwen/Qwen2.5-VL-32B-Instruct-AWQ"
-            # Load processor
-            print("📱 Loading processor...")
-            self.processor = AutoProcessor.from_pretrained(
-                model_path,
-                trust_remote_code=True
+            print("<� Loading Table Detection model...")
+            self.detection_processor = DetrImageProcessor.from_pretrained(
+                "microsoft/table-transformer-detection"
+            )
+            self.detection_model = TableTransformerForObjectDetection.from_pretrained(
+                "microsoft/table-transformer-detection"
             )
-            # Load model with AWQ quantization
-            print("🧠 Loading AWQ quantized model...")
-            self.model = Qwen2VLForConditionalGeneration.from_pretrained(
-                model_path,
-                torch_dtype=torch.float16,
-                device_map="auto",
-                trust_remote_code=True,
-                # AWQ specific settings
-                use_safetensors=True,
+            print("=� Loading Table Structure Recognition model...")
+            self.structure_processor = DetrImageProcessor.from_pretrained(
+                "microsoft/table-transformer-structure-recognition-v1.1-all"
+            )
+            self.structure_model = TableTransformerForObjectDetection.from_pretrained(
+                "microsoft/table-transformer-structure-recognition-v1.1-all"
             )
-            # Try to import qwen-vl-utils
-            try:
-                from qwen_vl_utils import process_vision_info as qwen_process_vision_info
-                print("✅ qwen-vl-utils imported successfully")
-                # Use the official process_vision_info if available
-                globals()['process_vision_info'] = qwen_process_vision_info
-            except ImportError:
-                print("⚠️ qwen-vl-utils not found, using custom implementation")
+            # Move models to GPU if available
+            device = 'cuda' if torch.cuda.is_available() else 'cpu'
+            self.detection_model = self.detection_model.to(device)
+            self.structure_model = self.structure_model.to(device)
             # Set to evaluation mode
-            self.model.eval()
+            self.detection_model.eval()
+            self.structure_model.eval()
             load_time = time.time() - start_time
-            print(f"✅ Qwen2.5-VL model loaded in {load_time:.2f}s")
+            print(f" Table Transformer models loaded successfully in {load_time:.2f}s")
         except Exception as e:
-            print(f"❌ Model loading failed: {e}")
-            raise
+            print(f"L Table Transformer model loading failed: {e}")
+            import traceback
+            traceback.print_exc()
+            # Don't raise - allow service to start with fallback
+            print("� Service will use fallback table detection")
     @modal.method()
-    def extract_table_data(
+    def detect_tables(
         self,
         image_b64: str,
-        extraction_format: str = "markdown",
-        custom_prompt: Optional[str] = None
+        detection_threshold: float = 0.7
     ) -> Dict[str, Any]:
         """
-        Extract table data from image
+        Detect tables in document image
         Args:
             image_b64: Base64 encoded image
-            extraction_format: Output format ("markdown", "json", "csv", "html")
-            custom_prompt: Custom extraction prompt
+            detection_threshold: Table detection confidence threshold
         Returns:
-            Extracted table data and metadata
+            Table detection results with bounding boxes
         """
         start_time = time.time()
+        self.request_count += 1
         try:
-            # Decode image
-            image = self._decode_image(image_b64)
-            # Prepare prompt based on format
-            if custom_prompt:
-                prompt = custom_prompt
-            else:
-                prompt = self._get_extraction_prompt(extraction_format)
-            # Process inputs
-            messages = [
-                {
-                    "role": "user",
-                    "content": [
-                        {"type": "image", "image": image},
-                        {"type": "text", "text": prompt}
-                    ]
-                }
-            ]
-            # Prepare inputs for the model
-            text = self.processor.apply_chat_template(
-                messages, tokenize=False, add_generation_prompt=True
-            )
+            # Validate models are loaded
+            if not self.detection_model or not self.detection_processor:
+                raise RuntimeError("Table detection model not loaded")
-            image_inputs, video_inputs = process_vision_info(messages)
-            inputs = self.processor(
-                text=[text],
-                images=image_inputs,
-                videos=video_inputs,
-                padding=True,
-                return_tensors="pt"
-            )
-            inputs = inputs.to("cuda")
+            # Decode and process image
+            image = self._decode_image(image_b64)
-            # Generate response
-            with torch.no_grad():
-                generated_ids = self.model.generate(
-                    **inputs,
-                    max_new_tokens=2048,
-                    do_sample=False,
-                    temperature=0.0,  # Deterministic for table extraction
-                    pad_token_id=self.processor.tokenizer.eos_token_id
-                )
-            # Decode response
-            generated_ids_trimmed = [
-                out_ids[len(in_ids):] for in_ids, out_ids in zip(inputs.input_ids, generated_ids)
-            ]
-            output_text = self.processor.batch_decode(
-                generated_ids_trimmed, skip_special_tokens=True, clean_up_tokenization_spaces=False
-            )[0]
+            # Run table detection
+            tables = self._detect_tables_impl(image, detection_threshold)
             processing_time = time.time() - start_time
+            self.total_processing_time += processing_time
-            # Post-process extracted data
-            processed_data = self._post_process_extraction(output_text, extraction_format)
+            # Calculate cost (T4 GPU: ~$0.40/hour)
+            gpu_cost = (processing_time / 3600) * 0.40
-            return {
+            result = {
                 'success': True,
-                'service': 'qwen-vision-table',
-                'extracted_data': processed_data,
-                'raw_output': output_text,
-                'format': extraction_format,
+                'service': 'isa-vision-table',
+                'provider': 'ISA',
+                'tables': tables,
+                'table_count': len(tables),
                 'processing_time': processing_time,
+                'detection_method': 'table-transformer',
+                'billing': {
+                    'request_id': f"req_{self.request_count}_{int(time.time())}",
+                    'gpu_seconds': processing_time,
+                    'estimated_cost_usd': round(gpu_cost, 6),
+                    'gpu_type': 'T4'
+                },
                 'model_info': {
-                    'model': 'Qwen2.5-VL-32B-Instruct-AWQ',
-                    'gpu': 'A100',
-                    'quantization': 'AWQ',
+                    'model': 'microsoft/table-transformer-detection',
+                    'provider': 'ISA',
+                    'gpu': 'T4',
                     'container_id': os.environ.get('MODAL_TASK_ID', 'unknown')
                 }
             }
+            # Output JSON for client parsing
+            print("=== JSON_RESULT_START ===")
+            print(json.dumps(result, default=str))
+            print("=== JSON_RESULT_END ===")
+            return result
         except Exception as e:
-            self.logger.error(f"Table extraction failed: {e}")
-            return {
+            processing_time = time.time() - start_time
+            self.logger.error(f"Table detection failed: {e}")
+            error_result = {
                 'success': False,
-                'service': 'qwen-vision-table',
+                'service': 'isa-vision-table',
+                'provider': 'ISA',
                 'error': str(e),
-                'processing_time': time.time() - start_time
+                'processing_time': processing_time,
+                'billing': {
+                    'request_id': f"req_{self.request_count}_{int(time.time())}",
+                    'gpu_seconds': processing_time,
+                    'estimated_cost_usd': round((processing_time / 3600) * 0.40, 6),
+                    'gpu_type': 'T4'
+                }
             }
+            print("=== JSON_RESULT_START ===")
+            print(json.dumps(error_result, default=str))
+            print("=== JSON_RESULT_END ===")
+            return error_result
-    def _get_extraction_prompt(self, format_type: str) -> str:
-        """Get extraction prompt based on desired format"""
-        base_prompt = "Please extract all the data from this table accurately."
+    @modal.method()
+    def analyze_table_structure(
+        self,
+        image_b64: str,
+        table_bbox: Optional[List[int]] = None,
+        structure_threshold: float = 0.6
+    ) -> Dict[str, Any]:
+        """
+        Analyze table structure in image or table region
-        format_prompts = {
-            "markdown": f"{base_prompt} Format the output as a markdown table with proper alignment.",
-            "json": f"{base_prompt} Format the output as a JSON array where each row is an object with column headers as keys.",
-            "csv": f"{base_prompt} Format the output as CSV with comma-separated values. Include headers in the first row.",
-            "html": f"{base_prompt} Format the output as an HTML table with proper <table>, <tr>, <td>, and <th> tags.",
-        }
+        Args:
+            image_b64: Base64 encoded image
+            table_bbox: Optional table bounding box [x1, y1, x2, y2]
+            structure_threshold: Structure detection confidence threshold
+        Returns:
+            Table structure analysis results
+        """
+        start_time = time.time()
-        return format_prompts.get(format_type, base_prompt)
-    def _post_process_extraction(self, raw_output: str, format_type: str) -> Dict[str, Any]:
-        """Post-process extracted table data"""
         try:
-            if format_type == "json":
-                # Try to parse JSON
-                import json
-                try:
-                    # Extract JSON from the output if it's wrapped in text
-                    start_idx = raw_output.find('[')
-                    end_idx = raw_output.rfind(']') + 1
-                    if start_idx != -1 and end_idx != 0:
-                        json_str = raw_output[start_idx:end_idx]
-                        parsed_data = json.loads(json_str)
-                        return {"structured_data": parsed_data, "raw_text": raw_output}
-                except json.JSONDecodeError:
-                    pass
-            elif format_type == "csv":
-                # Parse CSV-like output
-                lines = raw_output.strip().split('\n')
-                csv_data = [line.split(',') for line in lines if line.strip()]
-                return {"structured_data": csv_data, "raw_text": raw_output}
-            # For markdown, html, or unparseable formats, return as text
-            return {"structured_data": raw_output, "raw_text": raw_output}
+            if not self.structure_model or not self.structure_processor:
+                raise RuntimeError("Table structure model not loaded")
+            image = self._decode_image(image_b64)
+            # Crop to table region if bbox provided
+            if table_bbox:
+                x1, y1, x2, y2 = table_bbox
+                image = image.crop((x1, y1, x2, y2))
+            # Analyze table structure
+            structure = self._analyze_table_structure_impl(image, structure_threshold)
+            processing_time = time.time() - start_time
+            return {
+                'success': True,
+                'service': 'isa-vision-table',
+                'function': 'structure_analysis',
+                'structure': structure,
+                'processing_time': processing_time,
+                'model_info': {
+                    'model': 'microsoft/table-transformer-structure-recognition-v1.1-all',
+                    'gpu': 'T4'
+                }
+            }
         except Exception as e:
-            self.logger.warning(f"Post-processing failed: {e}")
-            return {"structured_data": raw_output, "raw_text": raw_output}
+            return {
+                'success': False,
+                'service': 'isa-vision-table',
+                'function': 'structure_analysis',
+                'error': str(e),
+                'processing_time': time.time() - start_time
+            }
     @modal.method()
-    def batch_extract_tables(self, images_b64: List[str], extraction_format: str = "markdown") -> Dict[str, Any]:
+    def process_complete_table(
+        self,
+        image_b64: str,
+        detection_threshold: float = 0.7,
+        structure_threshold: float = 0.6
+    ) -> Dict[str, Any]:
         """
-        Extract tables from multiple images
+        Complete table processing: detection + structure analysis
         Args:
-            images_b64: List of base64 encoded images
-            extraction_format: Output format for all extractions
+            image_b64: Base64 encoded image
+            detection_threshold: Table detection confidence threshold
+            structure_threshold: Structure analysis confidence threshold
         Returns:
-            Batch extraction results
+            Complete table processing results
         """
         start_time = time.time()
-        results = []
-        for i, image_b64 in enumerate(images_b64):
-            try:
-                result = self.extract_table_data(image_b64, extraction_format)
-                result['image_index'] = i
-                results.append(result)
-            except Exception as e:
-                results.append({
-                    'success': False,
-                    'image_index': i,
-                    'error': str(e)
+        try:
+            image = self._decode_image(image_b64)
+            # Step 1: Detect tables
+            tables = self._detect_tables_impl(image, detection_threshold)
+            # Step 2: Analyze structure for each detected table
+            for table in tables:
+                if 'bbox' in table:
+                    x1, y1, x2, y2 = table['bbox']
+                    table_image = image.crop((x1, y1, x2, y2))
+                    structure = self._analyze_table_structure_impl(table_image, structure_threshold)
+                    table['structure'] = structure
+            processing_time = time.time() - start_time
+            return {
+                'success': True,
+                'service': 'isa-vision-table',
+                'function': 'complete_processing',
+                'tables': tables,
+                'table_count': len(tables),
+                'processing_time': processing_time,
+                'model_info': {
+                    'detection_model': 'microsoft/table-transformer-detection',
+                    'structure_model': 'microsoft/table-transformer-structure-recognition-v1.1-all',
+                    'gpu': 'T4'
+                }
+            }
+        except Exception as e:
+            return {
+                'success': False,
+                'service': 'isa-vision-table',
+                'function': 'complete_processing',
+                'error': str(e),
+                'processing_time': time.time() - start_time
+            }
+    def _detect_tables_impl(self, image: Image.Image, threshold: float) -> List[Dict[str, Any]]:
+        """Implementation of table detection using Table Transformer"""
+        print("<� Running Table Transformer detection...")
+        try:
+            # Prepare inputs
+            inputs = self.detection_processor(images=image, return_tensors="pt")
+            # Move to GPU if available
+            device = next(self.detection_model.parameters()).device
+            inputs = {k: v.to(device) for k, v in inputs.items()}
+            # Run inference
+            with torch.no_grad():
+                outputs = self.detection_model(**inputs)
+            # Process results
+            target_sizes = torch.tensor([image.size[::-1]])  # (height, width)
+            results = self.detection_processor.post_process_object_detection(
+                outputs, threshold=threshold, target_sizes=target_sizes
+            )[0]
+            tables = []
+            for idx, (score, label, box) in enumerate(zip(
+                results["scores"], results["labels"], results["boxes"]
+            )):
+                x1, y1, x2, y2 = box.tolist()
+                tables.append({
+                    'id': f'table_{idx}',
+                    'bbox': [int(x1), int(y1), int(x2), int(y2)],
+                    'confidence': float(score),
+                    'center': [int((x1 + x2) // 2), int((y1 + y2) // 2)],
+                    'label': int(label),
+                    'type': 'table'
                 })
+            print(f" Table Transformer detected {len(tables)} tables")
+            return tables
+        except Exception as e:
+            print(f"L Table detection failed: {e}")
+            import traceback
+            traceback.print_exc()
+            return []
+    def _analyze_table_structure_impl(self, image: Image.Image, threshold: float) -> Dict[str, Any]:
+        """Implementation of table structure analysis using Table Transformer"""
+        print("=� Running Table Transformer structure analysis...")
-        return {
-            'success': True,
-            'service': 'qwen-vision-table',
-            'batch_results': results,
-            'total_images': len(images_b64),
-            'successful_extractions': sum(1 for r in results if r.get('success', False)),
-            'total_processing_time': time.time() - start_time
-        }
+        try:
+            # Prepare inputs
+            inputs = self.structure_processor(images=image, return_tensors="pt")
+            # Move to GPU if available
+            device = next(self.structure_model.parameters()).device
+            inputs = {k: v.to(device) for k, v in inputs.items()}
+            # Run inference
+            with torch.no_grad():
+                outputs = self.structure_model(**inputs)
+            # Process results
+            target_sizes = torch.tensor([image.size[::-1]])  # (height, width)
+            results = self.structure_processor.post_process_object_detection(
+                outputs, threshold=threshold, target_sizes=target_sizes
+            )[0]
+            # Parse structure elements
+            rows = []
+            columns = []
+            cells = []
+            for score, label, box in zip(
+                results["scores"], results["labels"], results["boxes"]
+            ):
+                x1, y1, x2, y2 = box.tolist()
+                element = {
+                    'bbox': [int(x1), int(y1), int(x2), int(y2)],
+                    'confidence': float(score),
+                    'label': int(label)
+                }
+                # Categorize based on label (this may need adjustment based on model output)
+                if label == 0:  # Row
+                    rows.append(element)
+                elif label == 1:  # Column
+                    columns.append(element)
+                else:  # Cell or other structure
+                    cells.append(element)
+            structure = {
+                'rows': rows,
+                'columns': columns,
+                'cells': cells,
+                'row_count': len(rows),
+                'column_count': len(columns),
+                'cell_count': len(cells),
+                'confidence_avg': float(torch.mean(results["scores"]).item()) if len(results["scores"]) > 0 else 0.0
+            }
+            print(f" Structure analysis: {len(rows)} rows, {len(columns)} columns, {len(cells)} cells")
+            return structure
+        except Exception as e:
+            print(f"L Table structure analysis failed: {e}")
+            import traceback
+            traceback.print_exc()
+            return {
+                'rows': [],
+                'columns': [],
+                'cells': [],
+                'row_count': 0,
+                'column_count': 0,
+                'cell_count': 0,
+                'confidence_avg': 0.0,
+                'error': str(e)
+            }
     @modal.method()
     def health_check(self) -> Dict[str, Any]:
         """Health check endpoint"""
         return {
             'status': 'healthy',
-            'service': 'qwen-vision-table',
-            'model': 'Qwen2.5-VL-32B-Instruct-AWQ',
-            'model_loaded': self.model is not None,
-            'processor_loaded': self.processor is not None,
+            'service': 'isa-vision-table',
+            'provider': 'ISA',
+            'models_loaded': {
+                'detection': self.detection_model is not None,
+                'structure': self.structure_model is not None
+            },
+            'model_names': {
+                'detection': 'microsoft/table-transformer-detection',
+                'structure': 'microsoft/table-transformer-structure-recognition-v1.1-all'
+            },
             'timestamp': time.time(),
-            'gpu': 'A100'
+            'gpu': 'T4',
+            'memory_usage': '12GB',
+            'request_count': self.request_count
+        }
+    @modal.method()
+    def get_usage_stats(self) -> Dict[str, Any]:
+        """Get service usage statistics for billing"""
+        avg_processing_time = (
+            self.total_processing_time / self.request_count
+            if self.request_count > 0 else 0
+        )
+        total_cost = (self.total_processing_time / 3600) * 0.40
+        return {
+            'service': 'isa-vision-table',
+            'provider': 'ISA',
+            'stats': {
+                'total_requests': self.request_count,
+                'total_gpu_seconds': round(self.total_processing_time, 3),
+                'avg_processing_time': round(avg_processing_time, 3),
+                'total_cost_usd': round(total_cost, 6),
+                'container_id': os.environ.get('MODAL_TASK_ID', 'unknown')
+            }
         }
     def _decode_image(self, image_b64: str) -> Image.Image:
         """Decode base64 image"""
         try:
+            # Handle data URL format
             if image_b64.startswith('data:image'):
                 image_b64 = image_b64.split(',')[1]
+            # Clean up base64 string
+            image_b64 = image_b64.strip().replace('\n', '').replace('\r', '').replace(' ', '')
+            # Decode base64
             image_data = base64.b64decode(image_b64)
-            return Image.open(io.BytesIO(image_data)).convert('RGB')
+            print(f"=
 Decoded image size: {len(image_data)} bytes")
+            # Open with PIL
+            image = Image.open(io.BytesIO(image_data))
+            print(f"=
 Image format: {image.format}, size: {image.size}, mode: {image.mode}")
+            return image.convert('RGB')
         except Exception as e:
-            raise ValueError(f"Failed to decode image: {e}")
-# Helper function for vision processing
-def process_vision_info(messages):
-    """Process vision information from messages"""
-    image_inputs = []
-    video_inputs = []
-    for message in messages:
-        if isinstance(message.get("content"), list):
-            for content in message["content"]:
-                if content.get("type") == "image":
-                    image_inputs.append(content["image"])
-                elif content.get("type") == "video":
-                    video_inputs.append(content["video"])
-    return image_inputs, video_inputs
-# Deployment script
-@app.function()
-def deploy_info():
-    """Deployment information"""
-    return {
-        "service": "Qwen2.5-VL-32B Table Extraction",
-        "model": "Qwen/Qwen2.5-VL-32B-Instruct-AWQ",
-        "gpu_requirement": "A100 (minimum), H100 (recommended)",
-        "memory_requirement": "32GB+",
-        "deploy_command": "modal deploy qwen_table_extraction.py"
-    }
+            print(f"L Image decode error: {e}")
+            raise e
 # Auto-registration function
-@app.function()
+@app.function()
 async def register_service():
     """Auto-register this service in the model registry"""
     try:
@@ -398,111 +572,81 @@ async def register_service():
         sys.path.insert(0, str(project_root))
         try:
-            from isa_model.core.model_manager import ModelManager
-            from isa_model.core.model_repo import ModelType, ModelCapability
-            from isa_model.core.service_registry import ServiceRegistry
-            from isa_model.core.types import ServiceType, DeploymentPlatform, ServiceStatus, ResourceRequirements
-            from isa_model.core.model_service import ModelService
+            from isa_model.core.models.model_manager import ModelManager
+            from isa_model.core.models.model_repo import ModelType, ModelCapability
         except ImportError:
-            # Fallback if import fails in Modal environment
-            print("⚠️ Could not import required modules - registration skipped")
-            return {"success": False, "error": "Required modules not available"}
+            print("� Could not import model manager - registration skipped")
+            return {"success": False, "error": "Model manager not available"}
         # Use ModelManager to register this service
         model_manager = ModelManager()
-        # 1. First register the underlying model (backward compatibility)
-        model_success = model_manager.registry.register_model(
-            model_id="qwen2.5-vl-32b-table-service",
+        # Register the ISA service in the registry
+        success = model_manager.registry.register_model(
+            model_id="isa-table-transformer-service",
             model_type=ModelType.VISION,
             capabilities=[
                 ModelCapability.TABLE_DETECTION,
                 ModelCapability.TABLE_STRUCTURE_RECOGNITION,
-                ModelCapability.OCR,
                 ModelCapability.IMAGE_ANALYSIS
             ],
             metadata={
-                "description": "Qwen2.5-VL-32B table extraction service",
-                "service_name": "qwen-vision-table",
+                "description": "ISA Table Transformer detection and structure recognition service",
+                "provider": "ISA",
+                "service_name": "isa-vision-table",
                 "service_type": "modal",
-                "deployment_type": "modal",
-                "endpoint": "https://qwen-vision-table.modal.run",
-                "underlying_model": "Qwen/Qwen2.5-VL-32B-Instruct-AWQ",
-                "gpu_requirement": "A100",
-                "memory_mb": 32768,
+                "deployment_type": "modal_gpu",
+                "endpoint": "https://isa-vision-table.modal.run",
+                "underlying_models": [
+                    "microsoft/table-transformer-detection",
+                    "microsoft/table-transformer-structure-recognition-v1.1-all"
+                ],
+                "gpu_requirement": "T4",
+                "memory_mb": 12288,
+                "max_containers": 15,
+                "cost_per_hour_usd": 0.40,
                 "auto_registered": True,
                 "registered_by": "isa_vision_table_service.py",
-                "is_service": True  # Mark this as a service, not a raw model
+                "is_service": True,
+                "optimized": True,
+                "billing_enabled": True
             }
         )
-        # 2. Register as a deployed service in the ServiceRegistry (MaaS platform)
-        service_success = False
-        try:
-            service_registry = ServiceRegistry(model_manager.registry)
-            # Create ModelService instance
-            service = ModelService(
-                service_id="qwen-table-modal-001",
-                service_name="isa_vision_table",
-                model_id="qwen2.5-vl-32b-table-service",
-                deployment_platform=DeploymentPlatform.MODAL,
-                service_type=ServiceType.VISION,
-                status=ServiceStatus.HEALTHY,
-                inference_endpoint="https://qwen-vision-table.modal.run/extract_table_data",
-                health_endpoint="https://qwen-vision-table.modal.run/health_check",
-                capabilities=["table_detection", "table_structure_recognition", "ocr", "image_analysis"],
-                resource_requirements=ResourceRequirements(
-                    gpu_type="A100",
-                    memory_mb=32768,
-                    cpu_cores=8,
-                    min_replicas=0,
-                    max_replicas=3
-                ),
-                metadata={
-                    "description": "Qwen2.5-VL-32B table extraction service",
-                    "underlying_model": "Qwen/Qwen2.5-VL-32B-Instruct-AWQ",
-                    "auto_scaling": True,
-                    "scale_to_zero": True,
-                    "platform": "modal",
-                    "registered_by": "isa_vision_table_service.py"
-                }
-            )
-            # Register in ServiceRegistry
-            service_success = await service_registry.register_service(service)
-            if service_success:
-                print("✅ Service registered in MaaS platform ServiceRegistry")
-            else:
-                print("⚠️ ServiceRegistry registration failed")
-        except Exception as e:
-            print(f"⚠️ ServiceRegistry registration error: {e}")
-        if model_success:
-            print("✅ Model registry registration successful")
+        if success:
+            print(" Table service auto-registered successfully")
         else:
-            print("⚠️ Model registry registration failed")
+            print("� Table service registration failed")
-        overall_success = model_success and service_success
-        return {
-            "success": overall_success,
-            "model_registry": model_success,
-            "service_registry": service_success
-        }
+        return {"success": success}
     except Exception as e:
-        print(f"❌ Auto-registration error: {e}")
+        print(f"L Auto-registration error: {e}")
         return {"success": False, "error": str(e)}
+# Deployment script
+@app.function()
+def deploy_info():
+    """Deployment information"""
+    return {
+        "service": "ISA Vision Table Processing",
+        "models": [
+            "microsoft/table-transformer-detection",
+            "microsoft/table-transformer-structure-recognition-v1.1-all"
+        ],
+        "gpu_requirement": "T4",
+        "memory_requirement": "12GB",
+        "deploy_command": "modal deploy isa_vision_table_service.py"
+    }
 # Quick deployment function
 @app.function()
 def deploy_service():
     """Deploy this service instantly"""
     import subprocess
+    import os
-    print("🚀 Deploying Qwen2.5-VL Table Extraction Service...")
+    print("=� Deploying ISA Vision Table Service...")
     try:
         # Get the current file path
         current_file = __file__
@@ -515,18 +659,18 @@ def deploy_service():
             check=True
         )
-        print("✅ Deployment completed successfully!")
-        print(f"📝 Output: {result.stdout}")
+        print(" Deployment completed successfully!")
+        print(f"=� Output: {result.stdout}")
         return {"success": True, "output": result.stdout}
     except subprocess.CalledProcessError as e:
-        print(f"❌ Deployment failed: {e}")
-        print(f"📝 Error: {e.stderr}")
+        print(f"L Deployment failed: {e}")
+        print(f"=� Error: {e.stderr}")
         return {"success": False, "error": str(e), "stderr": e.stderr}
 if __name__ == "__main__":
-    print("🚀 Qwen2.5-VL Table Extraction Service - Modal Deployment")
+    print("=� ISA Vision Table Service - Modal Deployment")
     print("Deploy with: modal deploy isa_vision_table_service.py")
     print("Or call: modal run isa_vision_table_service.py::deploy_service")
-    print("Note: Requires A100 GPU and 32GB+ RAM for optimal performance")
-    print("\n📝 Service will auto-register in model registry upon deployment")
+    print("Note: Uses Microsoft Table Transformer for detection and structure recognition")
+    print("\n=� Service will auto-register in model registry upon deployment")

isa-model 0.3.9__py3-none-any.whl → 0.4.0__py3-none-any.whl

isa-model 0.3.9py3-none-any.whl → 0.4.0py3-none-any.whl