isa-model 0.3.9__py3-none-any.whl → 0.4.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (124) hide show
  1. isa_model/__init__.py +1 -1
  2. isa_model/client.py +732 -565
  3. isa_model/core/cache/redis_cache.py +401 -0
  4. isa_model/core/config/config_manager.py +53 -10
  5. isa_model/core/config.py +1 -1
  6. isa_model/core/database/__init__.py +1 -0
  7. isa_model/core/database/migrations.py +277 -0
  8. isa_model/core/database/supabase_client.py +123 -0
  9. isa_model/core/models/__init__.py +37 -0
  10. isa_model/core/models/model_billing_tracker.py +60 -88
  11. isa_model/core/models/model_manager.py +36 -18
  12. isa_model/core/models/model_repo.py +44 -38
  13. isa_model/core/models/model_statistics_tracker.py +234 -0
  14. isa_model/core/models/model_storage.py +0 -1
  15. isa_model/core/models/model_version_manager.py +959 -0
  16. isa_model/core/pricing_manager.py +2 -249
  17. isa_model/core/resilience/circuit_breaker.py +366 -0
  18. isa_model/core/security/secrets.py +358 -0
  19. isa_model/core/services/__init__.py +2 -4
  20. isa_model/core/services/intelligent_model_selector.py +101 -370
  21. isa_model/core/storage/hf_storage.py +1 -1
  22. isa_model/core/types.py +7 -0
  23. isa_model/deployment/cloud/modal/isa_audio_chatTTS_service.py +520 -0
  24. isa_model/deployment/cloud/modal/isa_audio_fish_service.py +0 -0
  25. isa_model/deployment/cloud/modal/isa_audio_openvoice_service.py +758 -0
  26. isa_model/deployment/cloud/modal/isa_audio_service_v2.py +1044 -0
  27. isa_model/deployment/cloud/modal/isa_embed_rerank_service.py +296 -0
  28. isa_model/deployment/cloud/modal/isa_video_hunyuan_service.py +423 -0
  29. isa_model/deployment/cloud/modal/isa_vision_ocr_service.py +519 -0
  30. isa_model/deployment/cloud/modal/isa_vision_qwen25_service.py +709 -0
  31. isa_model/deployment/cloud/modal/isa_vision_table_service.py +467 -323
  32. isa_model/deployment/cloud/modal/isa_vision_ui_service.py +607 -180
  33. isa_model/deployment/cloud/modal/isa_vision_ui_service_optimized.py +660 -0
  34. isa_model/deployment/core/deployment_manager.py +6 -4
  35. isa_model/deployment/services/auto_hf_modal_deployer.py +894 -0
  36. isa_model/eval/benchmarks/__init__.py +27 -0
  37. isa_model/eval/benchmarks/multimodal_datasets.py +460 -0
  38. isa_model/eval/benchmarks.py +244 -12
  39. isa_model/eval/evaluators/__init__.py +8 -2
  40. isa_model/eval/evaluators/audio_evaluator.py +727 -0
  41. isa_model/eval/evaluators/embedding_evaluator.py +742 -0
  42. isa_model/eval/evaluators/vision_evaluator.py +564 -0
  43. isa_model/eval/example_evaluation.py +395 -0
  44. isa_model/eval/factory.py +272 -5
  45. isa_model/eval/isa_benchmarks.py +700 -0
  46. isa_model/eval/isa_integration.py +582 -0
  47. isa_model/eval/metrics.py +159 -6
  48. isa_model/eval/tests/unit/test_basic.py +396 -0
  49. isa_model/inference/ai_factory.py +44 -8
  50. isa_model/inference/services/audio/__init__.py +21 -0
  51. isa_model/inference/services/audio/base_realtime_service.py +225 -0
  52. isa_model/inference/services/audio/isa_tts_service.py +0 -0
  53. isa_model/inference/services/audio/openai_realtime_service.py +320 -124
  54. isa_model/inference/services/audio/openai_stt_service.py +32 -6
  55. isa_model/inference/services/base_service.py +17 -1
  56. isa_model/inference/services/embedding/__init__.py +13 -0
  57. isa_model/inference/services/embedding/base_embed_service.py +111 -8
  58. isa_model/inference/services/embedding/isa_embed_service.py +305 -0
  59. isa_model/inference/services/embedding/openai_embed_service.py +2 -4
  60. isa_model/inference/services/embedding/tests/test_embedding.py +222 -0
  61. isa_model/inference/services/img/__init__.py +2 -2
  62. isa_model/inference/services/img/base_image_gen_service.py +24 -7
  63. isa_model/inference/services/img/replicate_image_gen_service.py +84 -422
  64. isa_model/inference/services/img/services/replicate_face_swap.py +193 -0
  65. isa_model/inference/services/img/services/replicate_flux.py +226 -0
  66. isa_model/inference/services/img/services/replicate_flux_kontext.py +219 -0
  67. isa_model/inference/services/img/services/replicate_sticker_maker.py +249 -0
  68. isa_model/inference/services/img/tests/test_img_client.py +297 -0
  69. isa_model/inference/services/llm/base_llm_service.py +30 -6
  70. isa_model/inference/services/llm/helpers/llm_adapter.py +63 -9
  71. isa_model/inference/services/llm/ollama_llm_service.py +2 -1
  72. isa_model/inference/services/llm/openai_llm_service.py +652 -55
  73. isa_model/inference/services/llm/yyds_llm_service.py +2 -1
  74. isa_model/inference/services/vision/__init__.py +5 -5
  75. isa_model/inference/services/vision/base_vision_service.py +118 -185
  76. isa_model/inference/services/vision/helpers/image_utils.py +11 -5
  77. isa_model/inference/services/vision/isa_vision_service.py +573 -0
  78. isa_model/inference/services/vision/tests/test_ocr_client.py +284 -0
  79. isa_model/serving/api/fastapi_server.py +88 -16
  80. isa_model/serving/api/middleware/auth.py +311 -0
  81. isa_model/serving/api/middleware/security.py +278 -0
  82. isa_model/serving/api/routes/analytics.py +486 -0
  83. isa_model/serving/api/routes/deployments.py +339 -0
  84. isa_model/serving/api/routes/evaluations.py +579 -0
  85. isa_model/serving/api/routes/logs.py +430 -0
  86. isa_model/serving/api/routes/settings.py +582 -0
  87. isa_model/serving/api/routes/unified.py +324 -165
  88. isa_model/serving/api/startup.py +304 -0
  89. isa_model/serving/modal_proxy_server.py +249 -0
  90. isa_model/training/__init__.py +100 -6
  91. isa_model/training/core/__init__.py +4 -1
  92. isa_model/training/examples/intelligent_training_example.py +281 -0
  93. isa_model/training/intelligent/__init__.py +25 -0
  94. isa_model/training/intelligent/decision_engine.py +643 -0
  95. isa_model/training/intelligent/intelligent_factory.py +888 -0
  96. isa_model/training/intelligent/knowledge_base.py +751 -0
  97. isa_model/training/intelligent/resource_optimizer.py +839 -0
  98. isa_model/training/intelligent/task_classifier.py +576 -0
  99. isa_model/training/storage/__init__.py +24 -0
  100. isa_model/training/storage/core_integration.py +439 -0
  101. isa_model/training/storage/training_repository.py +552 -0
  102. isa_model/training/storage/training_storage.py +628 -0
  103. {isa_model-0.3.9.dist-info → isa_model-0.4.0.dist-info}/METADATA +13 -1
  104. isa_model-0.4.0.dist-info/RECORD +182 -0
  105. isa_model/deployment/cloud/modal/isa_vision_doc_service.py +0 -766
  106. isa_model/deployment/cloud/modal/register_models.py +0 -321
  107. isa_model/inference/adapter/unified_api.py +0 -248
  108. isa_model/inference/services/helpers/stacked_config.py +0 -148
  109. isa_model/inference/services/img/flux_professional_service.py +0 -603
  110. isa_model/inference/services/img/helpers/base_stacked_service.py +0 -274
  111. isa_model/inference/services/others/table_transformer_service.py +0 -61
  112. isa_model/inference/services/vision/doc_analysis_service.py +0 -640
  113. isa_model/inference/services/vision/helpers/base_stacked_service.py +0 -274
  114. isa_model/inference/services/vision/ui_analysis_service.py +0 -823
  115. isa_model/scripts/inference_tracker.py +0 -283
  116. isa_model/scripts/mlflow_manager.py +0 -379
  117. isa_model/scripts/model_registry.py +0 -465
  118. isa_model/scripts/register_models.py +0 -370
  119. isa_model/scripts/register_models_with_embeddings.py +0 -510
  120. isa_model/scripts/start_mlflow.py +0 -95
  121. isa_model/scripts/training_tracker.py +0 -257
  122. isa_model-0.3.9.dist-info/RECORD +0 -138
  123. {isa_model-0.3.9.dist-info → isa_model-0.4.0.dist-info}/WHEEL +0 -0
  124. {isa_model-0.3.9.dist-info → isa_model-0.4.0.dist-info}/top_level.txt +0 -0
@@ -1,7 +1,8 @@
1
1
  """
2
- Qwen2.5-VL-32B Table Data Extraction Service
2
+ ISA Vision Table Service
3
3
 
4
- Specialized service for table data extraction using Qwen2.5-VL-32B-Instruct-AWQ
4
+ Specialized service for table detection and structure recognition using Microsoft Table Transformer
5
+ Combines table detection and structure recognition for comprehensive table processing
5
6
  """
6
7
 
7
8
  import modal
@@ -17,48 +18,64 @@ import os
17
18
  import logging
18
19
 
19
20
  # Define Modal application
20
- app = modal.App("qwen-vision-table")
21
+ app = modal.App("isa-vision-table")
21
22
 
22
- # Download Qwen2.5-VL model
23
- def download_qwen_model():
24
- """Download Qwen2.5-VL-32B-Instruct-AWQ model"""
23
+ # Download Table Transformer models
24
+ def download_table_transformer_models():
25
+ """Download Microsoft Table Transformer models"""
25
26
  from huggingface_hub import snapshot_download
26
27
 
27
- print("📦 Downloading Qwen2.5-VL-32B-Instruct-AWQ...")
28
+ print("=� Downloading Microsoft Table Transformer models...")
28
29
  os.makedirs("/models", exist_ok=True)
29
30
 
30
31
  try:
32
+ # Download Table Detection model
33
+ print("<� Downloading Table Transformer Detection model...")
31
34
  snapshot_download(
32
- repo_id="Qwen/Qwen2.5-VL-32B-Instruct-AWQ",
33
- local_dir="/models/qwen2.5-vl-32b-awq",
34
- allow_patterns=["**/*.safetensors", "**/*.json", "**/*.py", "**/*.txt"],
35
- # Use auth token if needed for gated models
36
- # token=os.getenv("HF_TOKEN")
35
+ repo_id="microsoft/table-transformer-detection",
36
+ local_dir="/models/table-transformer-detection",
37
+ allow_patterns=["**/*.bin", "**/*.json", "**/*.safetensors"]
37
38
  )
38
- print(" Qwen2.5-VL-32B-Instruct-AWQ downloaded")
39
+ print(" Table Detection model downloaded")
40
+
41
+ # Download Table Structure Recognition model (v1.1)
42
+ print("=� Downloading Table Transformer Structure Recognition v1.1...")
43
+ snapshot_download(
44
+ repo_id="microsoft/table-transformer-structure-recognition-v1.1-all",
45
+ local_dir="/models/table-transformer-structure",
46
+ allow_patterns=["**/*.bin", "**/*.json", "**/*.safetensors"]
47
+ )
48
+ print(" Table Structure Recognition model downloaded")
49
+
39
50
  except Exception as e:
40
- print(f"⚠️ Model download failed: {e}")
41
- raise
51
+ print(f"� Table Transformer download failed: {e}")
52
+ # Don't raise - allow service to start with fallback
53
+ print("� Will use fallback table detection method")
42
54
 
43
- print("📦 Model download completed")
55
+ print(" Table Transformer setup completed")
44
56
 
45
- # Define Modal container image with AWQ support
57
+ # Define Modal container image
46
58
  image = (
47
59
  modal.Image.debian_slim(python_version="3.11")
60
+ .apt_install([
61
+ # Graphics libraries for image processing
62
+ "libgl1-mesa-glx",
63
+ "libglib2.0-0",
64
+ "libsm6",
65
+ "libxext6",
66
+ "libxrender-dev",
67
+ "libgomp1",
68
+ ])
48
69
  .pip_install([
49
- # Core AI libraries with AWQ support
50
- "torch>=2.1.0",
51
- "torchvision",
52
- "transformers>=4.37.0",
53
- "accelerate>=0.26.0",
54
- "autoawq>=0.2.0", # AWQ quantization support
70
+ # Core AI libraries
71
+ "torch>=2.0.0",
72
+ "torchvision",
73
+ "transformers>=4.35.0",
55
74
  "huggingface_hub",
75
+ "accelerate",
56
76
 
57
- # Qwen-VL specific dependencies
58
- "qwen-vl-utils", # If available
59
- "tiktoken",
60
- "einops",
61
- "timm",
77
+ # Table Transformer specific dependencies
78
+ "timm", # Required for DETR backbone
62
79
 
63
80
  # Image processing
64
81
  "pillow>=10.0.1",
@@ -73,320 +90,477 @@ image = (
73
90
  "pydantic>=2.0.0",
74
91
  "python-dotenv",
75
92
  ])
76
- .run_function(download_qwen_model)
93
+ .run_function(download_table_transformer_models)
77
94
  .env({
78
95
  "TRANSFORMERS_CACHE": "/models",
96
+ "TORCH_HOME": "/models/torch",
79
97
  "HF_HOME": "/models",
80
- "TORCH_HOME": "/models",
81
98
  })
82
99
  )
83
100
 
84
- # Table Extraction Service
101
+ # Table Transformer Service - Optimized for T4 GPU
85
102
  @app.cls(
86
- gpu="A100", # A100 recommended for 32B model, H100 if available
103
+ gpu="T4", # T4 4GB GPU - sufficient for Table Transformer
87
104
  image=image,
88
- memory=32768, # 32GB RAM for 32B model
89
- timeout=3600, # 1 hour timeout
105
+ memory=12288, # 12GB RAM for table processing
106
+ timeout=1800, # 30 minutes
90
107
  scaledown_window=60, # 1 minute idle timeout
91
108
  min_containers=0, # Scale to zero to save costs
92
- # secrets=[modal.Secret.from_name("huggingface-token")] # If needed
109
+ max_containers=15, # Support up to 15 concurrent containers
93
110
  )
94
- class QwenTableExtractionService:
111
+ class TableTransformerService:
95
112
  """
96
- Table Data Extraction Service using Qwen2.5-VL-32B-Instruct-AWQ
113
+ Microsoft Table Transformer Service
97
114
 
98
- Provides high-accuracy table extraction from images
115
+ Provides table detection and structure recognition
116
+ Cost-effective deployment optimized for document processing
99
117
  """
100
118
 
101
119
  @modal.enter()
102
- def load_model(self):
103
- """Load Qwen2.5-VL model on container startup"""
104
- print("🚀 Loading Qwen2.5-VL-32B-Instruct-AWQ...")
120
+ def load_models(self):
121
+ """Load Table Transformer models on container startup"""
122
+ print("=� Loading Table Transformer models...")
105
123
  start_time = time.time()
106
124
 
107
- # Initialize attributes
108
- self.model = None
109
- self.processor = None
125
+ # Initialize instance variables
126
+ self.detection_model = None
127
+ self.detection_processor = None
128
+ self.structure_model = None
129
+ self.structure_processor = None
110
130
  self.logger = logging.getLogger(__name__)
131
+ self.request_count = 0
132
+ self.total_processing_time = 0.0
111
133
 
112
134
  try:
113
- from transformers import Qwen2VLForConditionalGeneration, AutoProcessor
135
+ # Import transformers components
136
+ from transformers import TableTransformerForObjectDetection, DetrImageProcessor
114
137
 
115
- model_path = "Qwen/Qwen2.5-VL-32B-Instruct-AWQ"
116
-
117
- # Load processor
118
- print("📱 Loading processor...")
119
- self.processor = AutoProcessor.from_pretrained(
120
- model_path,
121
- trust_remote_code=True
138
+ print("<� Loading Table Detection model...")
139
+ self.detection_processor = DetrImageProcessor.from_pretrained(
140
+ "microsoft/table-transformer-detection"
141
+ )
142
+ self.detection_model = TableTransformerForObjectDetection.from_pretrained(
143
+ "microsoft/table-transformer-detection"
122
144
  )
123
145
 
124
- # Load model with AWQ quantization
125
- print("🧠 Loading AWQ quantized model...")
126
- self.model = Qwen2VLForConditionalGeneration.from_pretrained(
127
- model_path,
128
- torch_dtype=torch.float16,
129
- device_map="auto",
130
- trust_remote_code=True,
131
- # AWQ specific settings
132
- use_safetensors=True,
146
+ print("=� Loading Table Structure Recognition model...")
147
+ self.structure_processor = DetrImageProcessor.from_pretrained(
148
+ "microsoft/table-transformer-structure-recognition-v1.1-all"
149
+ )
150
+ self.structure_model = TableTransformerForObjectDetection.from_pretrained(
151
+ "microsoft/table-transformer-structure-recognition-v1.1-all"
133
152
  )
134
153
 
135
- # Try to import qwen-vl-utils
136
- try:
137
- from qwen_vl_utils import process_vision_info as qwen_process_vision_info
138
- print("✅ qwen-vl-utils imported successfully")
139
- # Use the official process_vision_info if available
140
- globals()['process_vision_info'] = qwen_process_vision_info
141
- except ImportError:
142
- print("⚠️ qwen-vl-utils not found, using custom implementation")
154
+ # Move models to GPU if available
155
+ device = 'cuda' if torch.cuda.is_available() else 'cpu'
156
+ self.detection_model = self.detection_model.to(device)
157
+ self.structure_model = self.structure_model.to(device)
143
158
 
144
159
  # Set to evaluation mode
145
- self.model.eval()
160
+ self.detection_model.eval()
161
+ self.structure_model.eval()
146
162
 
147
163
  load_time = time.time() - start_time
148
- print(f" Qwen2.5-VL model loaded in {load_time:.2f}s")
164
+ print(f" Table Transformer models loaded successfully in {load_time:.2f}s")
149
165
 
150
166
  except Exception as e:
151
- print(f" Model loading failed: {e}")
152
- raise
167
+ print(f"L Table Transformer model loading failed: {e}")
168
+ import traceback
169
+ traceback.print_exc()
170
+ # Don't raise - allow service to start with fallback
171
+ print("� Service will use fallback table detection")
153
172
 
154
173
  @modal.method()
155
- def extract_table_data(
174
+ def detect_tables(
156
175
  self,
157
176
  image_b64: str,
158
- extraction_format: str = "markdown",
159
- custom_prompt: Optional[str] = None
177
+ detection_threshold: float = 0.7
160
178
  ) -> Dict[str, Any]:
161
179
  """
162
- Extract table data from image
180
+ Detect tables in document image
163
181
 
164
182
  Args:
165
183
  image_b64: Base64 encoded image
166
- extraction_format: Output format ("markdown", "json", "csv", "html")
167
- custom_prompt: Custom extraction prompt
184
+ detection_threshold: Table detection confidence threshold
168
185
 
169
186
  Returns:
170
- Extracted table data and metadata
187
+ Table detection results with bounding boxes
171
188
  """
172
189
  start_time = time.time()
190
+ self.request_count += 1
173
191
 
174
192
  try:
175
- # Decode image
176
- image = self._decode_image(image_b64)
177
-
178
- # Prepare prompt based on format
179
- if custom_prompt:
180
- prompt = custom_prompt
181
- else:
182
- prompt = self._get_extraction_prompt(extraction_format)
183
-
184
- # Process inputs
185
- messages = [
186
- {
187
- "role": "user",
188
- "content": [
189
- {"type": "image", "image": image},
190
- {"type": "text", "text": prompt}
191
- ]
192
- }
193
- ]
194
-
195
- # Prepare inputs for the model
196
- text = self.processor.apply_chat_template(
197
- messages, tokenize=False, add_generation_prompt=True
198
- )
193
+ # Validate models are loaded
194
+ if not self.detection_model or not self.detection_processor:
195
+ raise RuntimeError("Table detection model not loaded")
199
196
 
200
- image_inputs, video_inputs = process_vision_info(messages)
201
- inputs = self.processor(
202
- text=[text],
203
- images=image_inputs,
204
- videos=video_inputs,
205
- padding=True,
206
- return_tensors="pt"
207
- )
208
- inputs = inputs.to("cuda")
197
+ # Decode and process image
198
+ image = self._decode_image(image_b64)
209
199
 
210
- # Generate response
211
- with torch.no_grad():
212
- generated_ids = self.model.generate(
213
- **inputs,
214
- max_new_tokens=2048,
215
- do_sample=False,
216
- temperature=0.0, # Deterministic for table extraction
217
- pad_token_id=self.processor.tokenizer.eos_token_id
218
- )
219
-
220
- # Decode response
221
- generated_ids_trimmed = [
222
- out_ids[len(in_ids):] for in_ids, out_ids in zip(inputs.input_ids, generated_ids)
223
- ]
224
- output_text = self.processor.batch_decode(
225
- generated_ids_trimmed, skip_special_tokens=True, clean_up_tokenization_spaces=False
226
- )[0]
200
+ # Run table detection
201
+ tables = self._detect_tables_impl(image, detection_threshold)
227
202
 
228
203
  processing_time = time.time() - start_time
204
+ self.total_processing_time += processing_time
229
205
 
230
- # Post-process extracted data
231
- processed_data = self._post_process_extraction(output_text, extraction_format)
206
+ # Calculate cost (T4 GPU: ~$0.40/hour)
207
+ gpu_cost = (processing_time / 3600) * 0.40
232
208
 
233
- return {
209
+ result = {
234
210
  'success': True,
235
- 'service': 'qwen-vision-table',
236
- 'extracted_data': processed_data,
237
- 'raw_output': output_text,
238
- 'format': extraction_format,
211
+ 'service': 'isa-vision-table',
212
+ 'provider': 'ISA',
213
+ 'tables': tables,
214
+ 'table_count': len(tables),
239
215
  'processing_time': processing_time,
216
+ 'detection_method': 'table-transformer',
217
+ 'billing': {
218
+ 'request_id': f"req_{self.request_count}_{int(time.time())}",
219
+ 'gpu_seconds': processing_time,
220
+ 'estimated_cost_usd': round(gpu_cost, 6),
221
+ 'gpu_type': 'T4'
222
+ },
240
223
  'model_info': {
241
- 'model': 'Qwen2.5-VL-32B-Instruct-AWQ',
242
- 'gpu': 'A100',
243
- 'quantization': 'AWQ',
224
+ 'model': 'microsoft/table-transformer-detection',
225
+ 'provider': 'ISA',
226
+ 'gpu': 'T4',
244
227
  'container_id': os.environ.get('MODAL_TASK_ID', 'unknown')
245
228
  }
246
229
  }
247
230
 
231
+ # Output JSON for client parsing
232
+ print("=== JSON_RESULT_START ===")
233
+ print(json.dumps(result, default=str))
234
+ print("=== JSON_RESULT_END ===")
235
+
236
+ return result
237
+
248
238
  except Exception as e:
249
- self.logger.error(f"Table extraction failed: {e}")
250
- return {
239
+ processing_time = time.time() - start_time
240
+ self.logger.error(f"Table detection failed: {e}")
241
+ error_result = {
251
242
  'success': False,
252
- 'service': 'qwen-vision-table',
243
+ 'service': 'isa-vision-table',
244
+ 'provider': 'ISA',
253
245
  'error': str(e),
254
- 'processing_time': time.time() - start_time
246
+ 'processing_time': processing_time,
247
+ 'billing': {
248
+ 'request_id': f"req_{self.request_count}_{int(time.time())}",
249
+ 'gpu_seconds': processing_time,
250
+ 'estimated_cost_usd': round((processing_time / 3600) * 0.40, 6),
251
+ 'gpu_type': 'T4'
252
+ }
255
253
  }
254
+
255
+ print("=== JSON_RESULT_START ===")
256
+ print(json.dumps(error_result, default=str))
257
+ print("=== JSON_RESULT_END ===")
258
+
259
+ return error_result
256
260
 
257
- def _get_extraction_prompt(self, format_type: str) -> str:
258
- """Get extraction prompt based on desired format"""
259
- base_prompt = "Please extract all the data from this table accurately."
261
+ @modal.method()
262
+ def analyze_table_structure(
263
+ self,
264
+ image_b64: str,
265
+ table_bbox: Optional[List[int]] = None,
266
+ structure_threshold: float = 0.6
267
+ ) -> Dict[str, Any]:
268
+ """
269
+ Analyze table structure in image or table region
260
270
 
261
- format_prompts = {
262
- "markdown": f"{base_prompt} Format the output as a markdown table with proper alignment.",
263
- "json": f"{base_prompt} Format the output as a JSON array where each row is an object with column headers as keys.",
264
- "csv": f"{base_prompt} Format the output as CSV with comma-separated values. Include headers in the first row.",
265
- "html": f"{base_prompt} Format the output as an HTML table with proper <table>, <tr>, <td>, and <th> tags.",
266
- }
271
+ Args:
272
+ image_b64: Base64 encoded image
273
+ table_bbox: Optional table bounding box [x1, y1, x2, y2]
274
+ structure_threshold: Structure detection confidence threshold
275
+
276
+ Returns:
277
+ Table structure analysis results
278
+ """
279
+ start_time = time.time()
267
280
 
268
- return format_prompts.get(format_type, base_prompt)
269
-
270
- def _post_process_extraction(self, raw_output: str, format_type: str) -> Dict[str, Any]:
271
- """Post-process extracted table data"""
272
281
  try:
273
- if format_type == "json":
274
- # Try to parse JSON
275
- import json
276
- try:
277
- # Extract JSON from the output if it's wrapped in text
278
- start_idx = raw_output.find('[')
279
- end_idx = raw_output.rfind(']') + 1
280
- if start_idx != -1 and end_idx != 0:
281
- json_str = raw_output[start_idx:end_idx]
282
- parsed_data = json.loads(json_str)
283
- return {"structured_data": parsed_data, "raw_text": raw_output}
284
- except json.JSONDecodeError:
285
- pass
286
-
287
- elif format_type == "csv":
288
- # Parse CSV-like output
289
- lines = raw_output.strip().split('\n')
290
- csv_data = [line.split(',') for line in lines if line.strip()]
291
- return {"structured_data": csv_data, "raw_text": raw_output}
292
-
293
- # For markdown, html, or unparseable formats, return as text
294
- return {"structured_data": raw_output, "raw_text": raw_output}
282
+ if not self.structure_model or not self.structure_processor:
283
+ raise RuntimeError("Table structure model not loaded")
284
+
285
+ image = self._decode_image(image_b64)
286
+
287
+ # Crop to table region if bbox provided
288
+ if table_bbox:
289
+ x1, y1, x2, y2 = table_bbox
290
+ image = image.crop((x1, y1, x2, y2))
291
+
292
+ # Analyze table structure
293
+ structure = self._analyze_table_structure_impl(image, structure_threshold)
294
+
295
+ processing_time = time.time() - start_time
296
+
297
+ return {
298
+ 'success': True,
299
+ 'service': 'isa-vision-table',
300
+ 'function': 'structure_analysis',
301
+ 'structure': structure,
302
+ 'processing_time': processing_time,
303
+ 'model_info': {
304
+ 'model': 'microsoft/table-transformer-structure-recognition-v1.1-all',
305
+ 'gpu': 'T4'
306
+ }
307
+ }
295
308
 
296
309
  except Exception as e:
297
- self.logger.warning(f"Post-processing failed: {e}")
298
- return {"structured_data": raw_output, "raw_text": raw_output}
310
+ return {
311
+ 'success': False,
312
+ 'service': 'isa-vision-table',
313
+ 'function': 'structure_analysis',
314
+ 'error': str(e),
315
+ 'processing_time': time.time() - start_time
316
+ }
299
317
 
300
318
  @modal.method()
301
- def batch_extract_tables(self, images_b64: List[str], extraction_format: str = "markdown") -> Dict[str, Any]:
319
+ def process_complete_table(
320
+ self,
321
+ image_b64: str,
322
+ detection_threshold: float = 0.7,
323
+ structure_threshold: float = 0.6
324
+ ) -> Dict[str, Any]:
302
325
  """
303
- Extract tables from multiple images
326
+ Complete table processing: detection + structure analysis
304
327
 
305
328
  Args:
306
- images_b64: List of base64 encoded images
307
- extraction_format: Output format for all extractions
329
+ image_b64: Base64 encoded image
330
+ detection_threshold: Table detection confidence threshold
331
+ structure_threshold: Structure analysis confidence threshold
308
332
 
309
333
  Returns:
310
- Batch extraction results
334
+ Complete table processing results
311
335
  """
312
336
  start_time = time.time()
313
- results = []
314
337
 
315
- for i, image_b64 in enumerate(images_b64):
316
- try:
317
- result = self.extract_table_data(image_b64, extraction_format)
318
- result['image_index'] = i
319
- results.append(result)
320
- except Exception as e:
321
- results.append({
322
- 'success': False,
323
- 'image_index': i,
324
- 'error': str(e)
338
+ try:
339
+ image = self._decode_image(image_b64)
340
+
341
+ # Step 1: Detect tables
342
+ tables = self._detect_tables_impl(image, detection_threshold)
343
+
344
+ # Step 2: Analyze structure for each detected table
345
+ for table in tables:
346
+ if 'bbox' in table:
347
+ x1, y1, x2, y2 = table['bbox']
348
+ table_image = image.crop((x1, y1, x2, y2))
349
+ structure = self._analyze_table_structure_impl(table_image, structure_threshold)
350
+ table['structure'] = structure
351
+
352
+ processing_time = time.time() - start_time
353
+
354
+ return {
355
+ 'success': True,
356
+ 'service': 'isa-vision-table',
357
+ 'function': 'complete_processing',
358
+ 'tables': tables,
359
+ 'table_count': len(tables),
360
+ 'processing_time': processing_time,
361
+ 'model_info': {
362
+ 'detection_model': 'microsoft/table-transformer-detection',
363
+ 'structure_model': 'microsoft/table-transformer-structure-recognition-v1.1-all',
364
+ 'gpu': 'T4'
365
+ }
366
+ }
367
+
368
+ except Exception as e:
369
+ return {
370
+ 'success': False,
371
+ 'service': 'isa-vision-table',
372
+ 'function': 'complete_processing',
373
+ 'error': str(e),
374
+ 'processing_time': time.time() - start_time
375
+ }
376
+
377
+ def _detect_tables_impl(self, image: Image.Image, threshold: float) -> List[Dict[str, Any]]:
378
+ """Implementation of table detection using Table Transformer"""
379
+ print("<� Running Table Transformer detection...")
380
+
381
+ try:
382
+ # Prepare inputs
383
+ inputs = self.detection_processor(images=image, return_tensors="pt")
384
+
385
+ # Move to GPU if available
386
+ device = next(self.detection_model.parameters()).device
387
+ inputs = {k: v.to(device) for k, v in inputs.items()}
388
+
389
+ # Run inference
390
+ with torch.no_grad():
391
+ outputs = self.detection_model(**inputs)
392
+
393
+ # Process results
394
+ target_sizes = torch.tensor([image.size[::-1]]) # (height, width)
395
+ results = self.detection_processor.post_process_object_detection(
396
+ outputs, threshold=threshold, target_sizes=target_sizes
397
+ )[0]
398
+
399
+ tables = []
400
+ for idx, (score, label, box) in enumerate(zip(
401
+ results["scores"], results["labels"], results["boxes"]
402
+ )):
403
+ x1, y1, x2, y2 = box.tolist()
404
+
405
+ tables.append({
406
+ 'id': f'table_{idx}',
407
+ 'bbox': [int(x1), int(y1), int(x2), int(y2)],
408
+ 'confidence': float(score),
409
+ 'center': [int((x1 + x2) // 2), int((y1 + y2) // 2)],
410
+ 'label': int(label),
411
+ 'type': 'table'
325
412
  })
413
+
414
+ print(f" Table Transformer detected {len(tables)} tables")
415
+ return tables
416
+
417
+ except Exception as e:
418
+ print(f"L Table detection failed: {e}")
419
+ import traceback
420
+ traceback.print_exc()
421
+ return []
422
+
423
+ def _analyze_table_structure_impl(self, image: Image.Image, threshold: float) -> Dict[str, Any]:
424
+ """Implementation of table structure analysis using Table Transformer"""
425
+ print("=� Running Table Transformer structure analysis...")
326
426
 
327
- return {
328
- 'success': True,
329
- 'service': 'qwen-vision-table',
330
- 'batch_results': results,
331
- 'total_images': len(images_b64),
332
- 'successful_extractions': sum(1 for r in results if r.get('success', False)),
333
- 'total_processing_time': time.time() - start_time
334
- }
427
+ try:
428
+ # Prepare inputs
429
+ inputs = self.structure_processor(images=image, return_tensors="pt")
430
+
431
+ # Move to GPU if available
432
+ device = next(self.structure_model.parameters()).device
433
+ inputs = {k: v.to(device) for k, v in inputs.items()}
434
+
435
+ # Run inference
436
+ with torch.no_grad():
437
+ outputs = self.structure_model(**inputs)
438
+
439
+ # Process results
440
+ target_sizes = torch.tensor([image.size[::-1]]) # (height, width)
441
+ results = self.structure_processor.post_process_object_detection(
442
+ outputs, threshold=threshold, target_sizes=target_sizes
443
+ )[0]
444
+
445
+ # Parse structure elements
446
+ rows = []
447
+ columns = []
448
+ cells = []
449
+
450
+ for score, label, box in zip(
451
+ results["scores"], results["labels"], results["boxes"]
452
+ ):
453
+ x1, y1, x2, y2 = box.tolist()
454
+ element = {
455
+ 'bbox': [int(x1), int(y1), int(x2), int(y2)],
456
+ 'confidence': float(score),
457
+ 'label': int(label)
458
+ }
459
+
460
+ # Categorize based on label (this may need adjustment based on model output)
461
+ if label == 0: # Row
462
+ rows.append(element)
463
+ elif label == 1: # Column
464
+ columns.append(element)
465
+ else: # Cell or other structure
466
+ cells.append(element)
467
+
468
+ structure = {
469
+ 'rows': rows,
470
+ 'columns': columns,
471
+ 'cells': cells,
472
+ 'row_count': len(rows),
473
+ 'column_count': len(columns),
474
+ 'cell_count': len(cells),
475
+ 'confidence_avg': float(torch.mean(results["scores"]).item()) if len(results["scores"]) > 0 else 0.0
476
+ }
477
+
478
+ print(f" Structure analysis: {len(rows)} rows, {len(columns)} columns, {len(cells)} cells")
479
+ return structure
480
+
481
+ except Exception as e:
482
+ print(f"L Table structure analysis failed: {e}")
483
+ import traceback
484
+ traceback.print_exc()
485
+ return {
486
+ 'rows': [],
487
+ 'columns': [],
488
+ 'cells': [],
489
+ 'row_count': 0,
490
+ 'column_count': 0,
491
+ 'cell_count': 0,
492
+ 'confidence_avg': 0.0,
493
+ 'error': str(e)
494
+ }
335
495
 
336
496
  @modal.method()
337
497
  def health_check(self) -> Dict[str, Any]:
338
498
  """Health check endpoint"""
339
499
  return {
340
500
  'status': 'healthy',
341
- 'service': 'qwen-vision-table',
342
- 'model': 'Qwen2.5-VL-32B-Instruct-AWQ',
343
- 'model_loaded': self.model is not None,
344
- 'processor_loaded': self.processor is not None,
501
+ 'service': 'isa-vision-table',
502
+ 'provider': 'ISA',
503
+ 'models_loaded': {
504
+ 'detection': self.detection_model is not None,
505
+ 'structure': self.structure_model is not None
506
+ },
507
+ 'model_names': {
508
+ 'detection': 'microsoft/table-transformer-detection',
509
+ 'structure': 'microsoft/table-transformer-structure-recognition-v1.1-all'
510
+ },
345
511
  'timestamp': time.time(),
346
- 'gpu': 'A100'
512
+ 'gpu': 'T4',
513
+ 'memory_usage': '12GB',
514
+ 'request_count': self.request_count
515
+ }
516
+
517
+ @modal.method()
518
+ def get_usage_stats(self) -> Dict[str, Any]:
519
+ """Get service usage statistics for billing"""
520
+ avg_processing_time = (
521
+ self.total_processing_time / self.request_count
522
+ if self.request_count > 0 else 0
523
+ )
524
+ total_cost = (self.total_processing_time / 3600) * 0.40
525
+
526
+ return {
527
+ 'service': 'isa-vision-table',
528
+ 'provider': 'ISA',
529
+ 'stats': {
530
+ 'total_requests': self.request_count,
531
+ 'total_gpu_seconds': round(self.total_processing_time, 3),
532
+ 'avg_processing_time': round(avg_processing_time, 3),
533
+ 'total_cost_usd': round(total_cost, 6),
534
+ 'container_id': os.environ.get('MODAL_TASK_ID', 'unknown')
535
+ }
347
536
  }
348
537
 
349
538
  def _decode_image(self, image_b64: str) -> Image.Image:
350
539
  """Decode base64 image"""
351
540
  try:
541
+ # Handle data URL format
352
542
  if image_b64.startswith('data:image'):
353
543
  image_b64 = image_b64.split(',')[1]
354
544
 
545
+ # Clean up base64 string
546
+ image_b64 = image_b64.strip().replace('\n', '').replace('\r', '').replace(' ', '')
547
+
548
+ # Decode base64
355
549
  image_data = base64.b64decode(image_b64)
356
- return Image.open(io.BytesIO(image_data)).convert('RGB')
550
+ print(f"=
357
551
  Decoded image size: {len(image_data)} bytes")
552
+
553
+ # Open with PIL
554
+ image = Image.open(io.BytesIO(image_data))
555
+ print(f"=
358
556
  Image format: {image.format}, size: {image.size}, mode: {image.mode}")
557
+
558
+ return image.convert('RGB')
559
+
359
560
  except Exception as e:
360
- raise ValueError(f"Failed to decode image: {e}")
361
-
362
- # Helper function for vision processing
363
- def process_vision_info(messages):
364
- """Process vision information from messages"""
365
- image_inputs = []
366
- video_inputs = []
367
-
368
- for message in messages:
369
- if isinstance(message.get("content"), list):
370
- for content in message["content"]:
371
- if content.get("type") == "image":
372
- image_inputs.append(content["image"])
373
- elif content.get("type") == "video":
374
- video_inputs.append(content["video"])
375
-
376
- return image_inputs, video_inputs
377
-
378
- # Deployment script
379
- @app.function()
380
- def deploy_info():
381
- """Deployment information"""
382
- return {
383
- "service": "Qwen2.5-VL-32B Table Extraction",
384
- "model": "Qwen/Qwen2.5-VL-32B-Instruct-AWQ",
385
- "gpu_requirement": "A100 (minimum), H100 (recommended)",
386
- "memory_requirement": "32GB+",
387
- "deploy_command": "modal deploy qwen_table_extraction.py"
388
- }
561
+ print(f"L Image decode error: {e}")
562
+ raise e
389
563
 
390
564
  # Auto-registration function
391
- @app.function()
565
+ @app.function()
392
566
  async def register_service():
393
567
  """Auto-register this service in the model registry"""
394
568
  try:
@@ -398,111 +572,81 @@ async def register_service():
398
572
  sys.path.insert(0, str(project_root))
399
573
 
400
574
  try:
401
- from isa_model.core.model_manager import ModelManager
402
- from isa_model.core.model_repo import ModelType, ModelCapability
403
- from isa_model.core.service_registry import ServiceRegistry
404
- from isa_model.core.types import ServiceType, DeploymentPlatform, ServiceStatus, ResourceRequirements
405
- from isa_model.core.model_service import ModelService
575
+ from isa_model.core.models.model_manager import ModelManager
576
+ from isa_model.core.models.model_repo import ModelType, ModelCapability
406
577
  except ImportError:
407
- # Fallback if import fails in Modal environment
408
- print("⚠️ Could not import required modules - registration skipped")
409
- return {"success": False, "error": "Required modules not available"}
578
+ print("� Could not import model manager - registration skipped")
579
+ return {"success": False, "error": "Model manager not available"}
410
580
 
411
581
  # Use ModelManager to register this service
412
582
  model_manager = ModelManager()
413
583
 
414
- # 1. First register the underlying model (backward compatibility)
415
- model_success = model_manager.registry.register_model(
416
- model_id="qwen2.5-vl-32b-table-service",
584
+ # Register the ISA service in the registry
585
+ success = model_manager.registry.register_model(
586
+ model_id="isa-table-transformer-service",
417
587
  model_type=ModelType.VISION,
418
588
  capabilities=[
419
589
  ModelCapability.TABLE_DETECTION,
420
590
  ModelCapability.TABLE_STRUCTURE_RECOGNITION,
421
- ModelCapability.OCR,
422
591
  ModelCapability.IMAGE_ANALYSIS
423
592
  ],
424
593
  metadata={
425
- "description": "Qwen2.5-VL-32B table extraction service",
426
- "service_name": "qwen-vision-table",
594
+ "description": "ISA Table Transformer detection and structure recognition service",
595
+ "provider": "ISA",
596
+ "service_name": "isa-vision-table",
427
597
  "service_type": "modal",
428
- "deployment_type": "modal",
429
- "endpoint": "https://qwen-vision-table.modal.run",
430
- "underlying_model": "Qwen/Qwen2.5-VL-32B-Instruct-AWQ",
431
- "gpu_requirement": "A100",
432
- "memory_mb": 32768,
598
+ "deployment_type": "modal_gpu",
599
+ "endpoint": "https://isa-vision-table.modal.run",
600
+ "underlying_models": [
601
+ "microsoft/table-transformer-detection",
602
+ "microsoft/table-transformer-structure-recognition-v1.1-all"
603
+ ],
604
+ "gpu_requirement": "T4",
605
+ "memory_mb": 12288,
606
+ "max_containers": 15,
607
+ "cost_per_hour_usd": 0.40,
433
608
  "auto_registered": True,
434
609
  "registered_by": "isa_vision_table_service.py",
435
- "is_service": True # Mark this as a service, not a raw model
610
+ "is_service": True,
611
+ "optimized": True,
612
+ "billing_enabled": True
436
613
  }
437
614
  )
438
615
 
439
- # 2. Register as a deployed service in the ServiceRegistry (MaaS platform)
440
- service_success = False
441
- try:
442
- service_registry = ServiceRegistry(model_manager.registry)
443
-
444
- # Create ModelService instance
445
- service = ModelService(
446
- service_id="qwen-table-modal-001",
447
- service_name="isa_vision_table",
448
- model_id="qwen2.5-vl-32b-table-service",
449
- deployment_platform=DeploymentPlatform.MODAL,
450
- service_type=ServiceType.VISION,
451
- status=ServiceStatus.HEALTHY,
452
- inference_endpoint="https://qwen-vision-table.modal.run/extract_table_data",
453
- health_endpoint="https://qwen-vision-table.modal.run/health_check",
454
- capabilities=["table_detection", "table_structure_recognition", "ocr", "image_analysis"],
455
- resource_requirements=ResourceRequirements(
456
- gpu_type="A100",
457
- memory_mb=32768,
458
- cpu_cores=8,
459
- min_replicas=0,
460
- max_replicas=3
461
- ),
462
- metadata={
463
- "description": "Qwen2.5-VL-32B table extraction service",
464
- "underlying_model": "Qwen/Qwen2.5-VL-32B-Instruct-AWQ",
465
- "auto_scaling": True,
466
- "scale_to_zero": True,
467
- "platform": "modal",
468
- "registered_by": "isa_vision_table_service.py"
469
- }
470
- )
471
-
472
- # Register in ServiceRegistry
473
- service_success = await service_registry.register_service(service)
474
-
475
- if service_success:
476
- print("✅ Service registered in MaaS platform ServiceRegistry")
477
- else:
478
- print("⚠️ ServiceRegistry registration failed")
479
-
480
- except Exception as e:
481
- print(f"⚠️ ServiceRegistry registration error: {e}")
482
-
483
- if model_success:
484
- print("✅ Model registry registration successful")
616
+ if success:
617
+ print(" Table service auto-registered successfully")
485
618
  else:
486
- print("⚠️ Model registry registration failed")
619
+ print("� Table service registration failed")
487
620
 
488
- overall_success = model_success and service_success
489
- return {
490
- "success": overall_success,
491
- "model_registry": model_success,
492
- "service_registry": service_success
493
- }
621
+ return {"success": success}
494
622
 
495
623
  except Exception as e:
496
- print(f" Auto-registration error: {e}")
624
+ print(f"L Auto-registration error: {e}")
497
625
  return {"success": False, "error": str(e)}
498
626
 
627
+ # Deployment script
628
+ @app.function()
629
+ def deploy_info():
630
+ """Deployment information"""
631
+ return {
632
+ "service": "ISA Vision Table Processing",
633
+ "models": [
634
+ "microsoft/table-transformer-detection",
635
+ "microsoft/table-transformer-structure-recognition-v1.1-all"
636
+ ],
637
+ "gpu_requirement": "T4",
638
+ "memory_requirement": "12GB",
639
+ "deploy_command": "modal deploy isa_vision_table_service.py"
640
+ }
641
+
499
642
  # Quick deployment function
500
643
  @app.function()
501
644
  def deploy_service():
502
645
  """Deploy this service instantly"""
503
646
  import subprocess
647
+ import os
504
648
 
505
- print("🚀 Deploying Qwen2.5-VL Table Extraction Service...")
649
+ print("=� Deploying ISA Vision Table Service...")
506
650
  try:
507
651
  # Get the current file path
508
652
  current_file = __file__
@@ -515,18 +659,18 @@ def deploy_service():
515
659
  check=True
516
660
  )
517
661
 
518
- print(" Deployment completed successfully!")
519
- print(f"📝 Output: {result.stdout}")
662
+ print(" Deployment completed successfully!")
663
+ print(f"=� Output: {result.stdout}")
520
664
  return {"success": True, "output": result.stdout}
521
665
 
522
666
  except subprocess.CalledProcessError as e:
523
- print(f" Deployment failed: {e}")
524
- print(f"📝 Error: {e.stderr}")
667
+ print(f"L Deployment failed: {e}")
668
+ print(f"=� Error: {e.stderr}")
525
669
  return {"success": False, "error": str(e), "stderr": e.stderr}
526
670
 
527
671
  if __name__ == "__main__":
528
- print("🚀 Qwen2.5-VL Table Extraction Service - Modal Deployment")
672
+ print("=� ISA Vision Table Service - Modal Deployment")
529
673
  print("Deploy with: modal deploy isa_vision_table_service.py")
530
674
  print("Or call: modal run isa_vision_table_service.py::deploy_service")
531
- print("Note: Requires A100 GPU and 32GB+ RAM for optimal performance")
532
- print("\n📝 Service will auto-register in model registry upon deployment")
675
+ print("Note: Uses Microsoft Table Transformer for detection and structure recognition")
676
+ print("\n=� Service will auto-register in model registry upon deployment")