isa-model 0.3.91__py3-none-any.whl → 0.4.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (228) hide show
  1. isa_model/client.py +1166 -584
  2. isa_model/core/cache/redis_cache.py +410 -0
  3. isa_model/core/config/config_manager.py +282 -12
  4. isa_model/core/config.py +91 -1
  5. isa_model/core/database/__init__.py +1 -0
  6. isa_model/core/database/direct_db_client.py +114 -0
  7. isa_model/core/database/migration_manager.py +563 -0
  8. isa_model/core/database/migrations.py +297 -0
  9. isa_model/core/database/supabase_client.py +258 -0
  10. isa_model/core/dependencies.py +316 -0
  11. isa_model/core/discovery/__init__.py +19 -0
  12. isa_model/core/discovery/consul_discovery.py +190 -0
  13. isa_model/core/logging/__init__.py +54 -0
  14. isa_model/core/logging/influx_logger.py +523 -0
  15. isa_model/core/logging/loki_logger.py +160 -0
  16. isa_model/core/models/__init__.py +46 -0
  17. isa_model/core/models/config_models.py +625 -0
  18. isa_model/core/models/deployment_billing_tracker.py +430 -0
  19. isa_model/core/models/model_billing_tracker.py +60 -88
  20. isa_model/core/models/model_manager.py +66 -25
  21. isa_model/core/models/model_metadata.py +690 -0
  22. isa_model/core/models/model_repo.py +217 -55
  23. isa_model/core/models/model_statistics_tracker.py +234 -0
  24. isa_model/core/models/model_storage.py +0 -1
  25. isa_model/core/models/model_version_manager.py +959 -0
  26. isa_model/core/models/system_models.py +857 -0
  27. isa_model/core/pricing_manager.py +2 -249
  28. isa_model/core/repositories/__init__.py +9 -0
  29. isa_model/core/repositories/config_repository.py +912 -0
  30. isa_model/core/resilience/circuit_breaker.py +366 -0
  31. isa_model/core/security/secrets.py +358 -0
  32. isa_model/core/services/__init__.py +2 -4
  33. isa_model/core/services/intelligent_model_selector.py +479 -370
  34. isa_model/core/storage/hf_storage.py +2 -2
  35. isa_model/core/types.py +8 -0
  36. isa_model/deployment/__init__.py +5 -48
  37. isa_model/deployment/core/__init__.py +2 -31
  38. isa_model/deployment/core/deployment_manager.py +1278 -368
  39. isa_model/deployment/local/__init__.py +31 -0
  40. isa_model/deployment/local/config.py +248 -0
  41. isa_model/deployment/local/gpu_gateway.py +607 -0
  42. isa_model/deployment/local/health_checker.py +428 -0
  43. isa_model/deployment/local/provider.py +586 -0
  44. isa_model/deployment/local/tensorrt_service.py +621 -0
  45. isa_model/deployment/local/transformers_service.py +644 -0
  46. isa_model/deployment/local/vllm_service.py +527 -0
  47. isa_model/deployment/modal/__init__.py +8 -0
  48. isa_model/deployment/modal/config.py +136 -0
  49. isa_model/deployment/modal/deployer.py +894 -0
  50. isa_model/deployment/modal/services/__init__.py +3 -0
  51. isa_model/deployment/modal/services/audio/__init__.py +1 -0
  52. isa_model/deployment/modal/services/audio/isa_audio_chatTTS_service.py +520 -0
  53. isa_model/deployment/modal/services/audio/isa_audio_openvoice_service.py +758 -0
  54. isa_model/deployment/modal/services/audio/isa_audio_service_v2.py +1044 -0
  55. isa_model/deployment/modal/services/embedding/__init__.py +1 -0
  56. isa_model/deployment/modal/services/embedding/isa_embed_rerank_service.py +296 -0
  57. isa_model/deployment/modal/services/llm/__init__.py +1 -0
  58. isa_model/deployment/modal/services/llm/isa_llm_service.py +424 -0
  59. isa_model/deployment/modal/services/video/__init__.py +1 -0
  60. isa_model/deployment/modal/services/video/isa_video_hunyuan_service.py +423 -0
  61. isa_model/deployment/modal/services/vision/__init__.py +1 -0
  62. isa_model/deployment/modal/services/vision/isa_vision_ocr_service.py +519 -0
  63. isa_model/deployment/modal/services/vision/isa_vision_qwen25_service.py +709 -0
  64. isa_model/deployment/modal/services/vision/isa_vision_table_service.py +676 -0
  65. isa_model/deployment/modal/services/vision/isa_vision_ui_service.py +833 -0
  66. isa_model/deployment/modal/services/vision/isa_vision_ui_service_optimized.py +660 -0
  67. isa_model/deployment/models/org-org-acme-corp-tenant-a-service-llm-20250825-225822/tenant-a-service_modal_service.py +48 -0
  68. isa_model/deployment/models/org-test-org-123-prefix-test-service-llm-20250825-225822/prefix-test-service_modal_service.py +48 -0
  69. isa_model/deployment/models/test-llm-service-llm-20250825-204442/test-llm-service_modal_service.py +48 -0
  70. isa_model/deployment/models/test-monitoring-gpt2-llm-20250825-212906/test-monitoring-gpt2_modal_service.py +48 -0
  71. isa_model/deployment/models/test-monitoring-gpt2-llm-20250825-213009/test-monitoring-gpt2_modal_service.py +48 -0
  72. isa_model/deployment/storage/__init__.py +5 -0
  73. isa_model/deployment/storage/deployment_repository.py +824 -0
  74. isa_model/deployment/triton/__init__.py +10 -0
  75. isa_model/deployment/triton/config.py +196 -0
  76. isa_model/deployment/triton/configs/__init__.py +1 -0
  77. isa_model/deployment/triton/provider.py +512 -0
  78. isa_model/deployment/triton/scripts/__init__.py +1 -0
  79. isa_model/deployment/triton/templates/__init__.py +1 -0
  80. isa_model/inference/__init__.py +47 -1
  81. isa_model/inference/ai_factory.py +179 -16
  82. isa_model/inference/legacy_services/__init__.py +21 -0
  83. isa_model/inference/legacy_services/model_evaluation.py +637 -0
  84. isa_model/inference/legacy_services/model_service.py +573 -0
  85. isa_model/inference/legacy_services/model_serving.py +717 -0
  86. isa_model/inference/legacy_services/model_training.py +561 -0
  87. isa_model/inference/models/__init__.py +21 -0
  88. isa_model/inference/models/inference_config.py +551 -0
  89. isa_model/inference/models/inference_record.py +675 -0
  90. isa_model/inference/models/performance_models.py +714 -0
  91. isa_model/inference/repositories/__init__.py +9 -0
  92. isa_model/inference/repositories/inference_repository.py +828 -0
  93. isa_model/inference/services/audio/__init__.py +21 -0
  94. isa_model/inference/services/audio/base_realtime_service.py +225 -0
  95. isa_model/inference/services/audio/base_stt_service.py +184 -11
  96. isa_model/inference/services/audio/isa_tts_service.py +0 -0
  97. isa_model/inference/services/audio/openai_realtime_service.py +320 -124
  98. isa_model/inference/services/audio/openai_stt_service.py +53 -11
  99. isa_model/inference/services/base_service.py +17 -1
  100. isa_model/inference/services/custom_model_manager.py +277 -0
  101. isa_model/inference/services/embedding/__init__.py +13 -0
  102. isa_model/inference/services/embedding/base_embed_service.py +111 -8
  103. isa_model/inference/services/embedding/isa_embed_service.py +305 -0
  104. isa_model/inference/services/embedding/ollama_embed_service.py +15 -3
  105. isa_model/inference/services/embedding/openai_embed_service.py +2 -4
  106. isa_model/inference/services/embedding/resilient_embed_service.py +285 -0
  107. isa_model/inference/services/embedding/tests/test_embedding.py +222 -0
  108. isa_model/inference/services/img/__init__.py +2 -2
  109. isa_model/inference/services/img/base_image_gen_service.py +24 -7
  110. isa_model/inference/services/img/replicate_image_gen_service.py +84 -422
  111. isa_model/inference/services/img/services/replicate_face_swap.py +193 -0
  112. isa_model/inference/services/img/services/replicate_flux.py +226 -0
  113. isa_model/inference/services/img/services/replicate_flux_kontext.py +219 -0
  114. isa_model/inference/services/img/services/replicate_sticker_maker.py +249 -0
  115. isa_model/inference/services/img/tests/test_img_client.py +297 -0
  116. isa_model/inference/services/llm/__init__.py +10 -2
  117. isa_model/inference/services/llm/base_llm_service.py +361 -26
  118. isa_model/inference/services/llm/cerebras_llm_service.py +628 -0
  119. isa_model/inference/services/llm/helpers/llm_adapter.py +71 -12
  120. isa_model/inference/services/llm/helpers/llm_prompts.py +342 -0
  121. isa_model/inference/services/llm/helpers/llm_utils.py +321 -23
  122. isa_model/inference/services/llm/huggingface_llm_service.py +581 -0
  123. isa_model/inference/services/llm/local_llm_service.py +747 -0
  124. isa_model/inference/services/llm/ollama_llm_service.py +11 -3
  125. isa_model/inference/services/llm/openai_llm_service.py +670 -56
  126. isa_model/inference/services/llm/yyds_llm_service.py +10 -3
  127. isa_model/inference/services/vision/__init__.py +27 -6
  128. isa_model/inference/services/vision/base_vision_service.py +118 -185
  129. isa_model/inference/services/vision/blip_vision_service.py +359 -0
  130. isa_model/inference/services/vision/helpers/image_utils.py +19 -10
  131. isa_model/inference/services/vision/isa_vision_service.py +634 -0
  132. isa_model/inference/services/vision/openai_vision_service.py +19 -10
  133. isa_model/inference/services/vision/tests/test_ocr_client.py +284 -0
  134. isa_model/inference/services/vision/vgg16_vision_service.py +257 -0
  135. isa_model/serving/api/cache_manager.py +245 -0
  136. isa_model/serving/api/dependencies/__init__.py +1 -0
  137. isa_model/serving/api/dependencies/auth.py +194 -0
  138. isa_model/serving/api/dependencies/database.py +139 -0
  139. isa_model/serving/api/error_handlers.py +284 -0
  140. isa_model/serving/api/fastapi_server.py +240 -18
  141. isa_model/serving/api/middleware/auth.py +317 -0
  142. isa_model/serving/api/middleware/security.py +268 -0
  143. isa_model/serving/api/middleware/tenant_context.py +414 -0
  144. isa_model/serving/api/routes/analytics.py +489 -0
  145. isa_model/serving/api/routes/config.py +645 -0
  146. isa_model/serving/api/routes/deployment_billing.py +315 -0
  147. isa_model/serving/api/routes/deployments.py +475 -0
  148. isa_model/serving/api/routes/gpu_gateway.py +440 -0
  149. isa_model/serving/api/routes/health.py +32 -12
  150. isa_model/serving/api/routes/inference_monitoring.py +486 -0
  151. isa_model/serving/api/routes/local_deployments.py +448 -0
  152. isa_model/serving/api/routes/logs.py +430 -0
  153. isa_model/serving/api/routes/settings.py +582 -0
  154. isa_model/serving/api/routes/tenants.py +575 -0
  155. isa_model/serving/api/routes/unified.py +992 -171
  156. isa_model/serving/api/routes/webhooks.py +479 -0
  157. isa_model/serving/api/startup.py +318 -0
  158. isa_model/serving/modal_proxy_server.py +249 -0
  159. isa_model/utils/gpu_utils.py +311 -0
  160. {isa_model-0.3.91.dist-info → isa_model-0.4.3.dist-info}/METADATA +76 -22
  161. isa_model-0.4.3.dist-info/RECORD +193 -0
  162. isa_model/deployment/cloud/__init__.py +0 -9
  163. isa_model/deployment/cloud/modal/__init__.py +0 -10
  164. isa_model/deployment/cloud/modal/isa_vision_doc_service.py +0 -766
  165. isa_model/deployment/cloud/modal/isa_vision_table_service.py +0 -532
  166. isa_model/deployment/cloud/modal/isa_vision_ui_service.py +0 -406
  167. isa_model/deployment/cloud/modal/register_models.py +0 -321
  168. isa_model/deployment/core/deployment_config.py +0 -356
  169. isa_model/deployment/core/isa_deployment_service.py +0 -401
  170. isa_model/deployment/gpu_int8_ds8/app/server.py +0 -66
  171. isa_model/deployment/gpu_int8_ds8/scripts/test_client.py +0 -43
  172. isa_model/deployment/gpu_int8_ds8/scripts/test_client_os.py +0 -35
  173. isa_model/deployment/runtime/deployed_service.py +0 -338
  174. isa_model/deployment/services/__init__.py +0 -9
  175. isa_model/deployment/services/auto_deploy_vision_service.py +0 -538
  176. isa_model/deployment/services/model_service.py +0 -332
  177. isa_model/deployment/services/service_monitor.py +0 -356
  178. isa_model/deployment/services/service_registry.py +0 -527
  179. isa_model/eval/__init__.py +0 -92
  180. isa_model/eval/benchmarks.py +0 -469
  181. isa_model/eval/config/__init__.py +0 -10
  182. isa_model/eval/config/evaluation_config.py +0 -108
  183. isa_model/eval/evaluators/__init__.py +0 -18
  184. isa_model/eval/evaluators/base_evaluator.py +0 -503
  185. isa_model/eval/evaluators/llm_evaluator.py +0 -472
  186. isa_model/eval/factory.py +0 -531
  187. isa_model/eval/infrastructure/__init__.py +0 -24
  188. isa_model/eval/infrastructure/experiment_tracker.py +0 -466
  189. isa_model/eval/metrics.py +0 -798
  190. isa_model/inference/adapter/unified_api.py +0 -248
  191. isa_model/inference/services/helpers/stacked_config.py +0 -148
  192. isa_model/inference/services/img/flux_professional_service.py +0 -603
  193. isa_model/inference/services/img/helpers/base_stacked_service.py +0 -274
  194. isa_model/inference/services/others/table_transformer_service.py +0 -61
  195. isa_model/inference/services/vision/doc_analysis_service.py +0 -640
  196. isa_model/inference/services/vision/helpers/base_stacked_service.py +0 -274
  197. isa_model/inference/services/vision/ui_analysis_service.py +0 -823
  198. isa_model/scripts/inference_tracker.py +0 -283
  199. isa_model/scripts/mlflow_manager.py +0 -379
  200. isa_model/scripts/model_registry.py +0 -465
  201. isa_model/scripts/register_models.py +0 -370
  202. isa_model/scripts/register_models_with_embeddings.py +0 -510
  203. isa_model/scripts/start_mlflow.py +0 -95
  204. isa_model/scripts/training_tracker.py +0 -257
  205. isa_model/training/__init__.py +0 -74
  206. isa_model/training/annotation/annotation_schema.py +0 -47
  207. isa_model/training/annotation/processors/annotation_processor.py +0 -126
  208. isa_model/training/annotation/storage/dataset_manager.py +0 -131
  209. isa_model/training/annotation/storage/dataset_schema.py +0 -44
  210. isa_model/training/annotation/tests/test_annotation_flow.py +0 -109
  211. isa_model/training/annotation/tests/test_minio copy.py +0 -113
  212. isa_model/training/annotation/tests/test_minio_upload.py +0 -43
  213. isa_model/training/annotation/views/annotation_controller.py +0 -158
  214. isa_model/training/cloud/__init__.py +0 -22
  215. isa_model/training/cloud/job_orchestrator.py +0 -402
  216. isa_model/training/cloud/runpod_trainer.py +0 -454
  217. isa_model/training/cloud/storage_manager.py +0 -482
  218. isa_model/training/core/__init__.py +0 -23
  219. isa_model/training/core/config.py +0 -181
  220. isa_model/training/core/dataset.py +0 -222
  221. isa_model/training/core/trainer.py +0 -720
  222. isa_model/training/core/utils.py +0 -213
  223. isa_model/training/factory.py +0 -424
  224. isa_model-0.3.91.dist-info/RECORD +0 -138
  225. /isa_model/{core/storage/minio_storage.py → deployment/modal/services/audio/isa_audio_fish_service.py} +0 -0
  226. /isa_model/deployment/{services → modal/services/vision}/simple_auto_deploy_vision_service.py +0 -0
  227. {isa_model-0.3.91.dist-info → isa_model-0.4.3.dist-info}/WHEEL +0 -0
  228. {isa_model-0.3.91.dist-info → isa_model-0.4.3.dist-info}/top_level.txt +0 -0
@@ -1,321 +0,0 @@
1
- """
2
- Model Registration Script for UI Analysis Pipeline
3
-
4
- Registers the latest versions of UI analysis models in the core model registry
5
- Prepares models for Modal deployment with proper version management
6
- """
7
-
8
- import asyncio
9
- from pathlib import Path
10
- import sys
11
- import os
12
-
13
- # Add project root to path
14
- project_root = Path(__file__).parent.parent.parent.parent
15
- sys.path.insert(0, str(project_root))
16
-
17
- from isa_model.core.model_manager import ModelManager
18
- from isa_model.core.model_repo import ModelRegistry, ModelType, ModelCapability
19
-
20
- async def register_ui_analysis_models():
21
- """Register UI analysis models with latest versions"""
22
-
23
- # Initialize model manager and registry
24
- model_manager = ModelManager()
25
-
26
- print("🔧 Registering UI Analysis Models...")
27
-
28
- # Debug: Check available capabilities
29
- print("Available capabilities:")
30
- for cap in ModelCapability:
31
- print(f" - {cap.name}: {cap.value}")
32
- print()
33
-
34
- # Model definitions with latest versions from HuggingFace
35
- models_to_register = [
36
- {
37
- "model_id": "omniparser-v2.0",
38
- "repo_id": "microsoft/OmniParser",
39
- "model_type": ModelType.VISION,
40
- "capabilities": [
41
- ModelCapability.UI_DETECTION,
42
- ModelCapability.IMAGE_ANALYSIS,
43
- ModelCapability.IMAGE_UNDERSTANDING
44
- ],
45
- "revision": "main", # Latest version
46
- "metadata": {
47
- "description": "Microsoft OmniParser v2.0 - Advanced UI element detection",
48
- "provider": "microsoft",
49
- "model_family": "omniparser",
50
- "version": "2.0",
51
- "paper": "https://arxiv.org/abs/2408.00203",
52
- "huggingface_url": "https://huggingface.co/microsoft/OmniParser",
53
- "use_case": "UI element detection and parsing",
54
- "input_format": "image",
55
- "output_format": "structured_elements",
56
- "gpu_memory_mb": 8192,
57
- "inference_time_ms": 500
58
- }
59
- },
60
- {
61
- "model_id": "table-transformer-v1.1-detection",
62
- "repo_id": "microsoft/table-transformer-detection",
63
- "model_type": ModelType.VISION,
64
- "capabilities": [
65
- ModelCapability.TABLE_DETECTION,
66
- ModelCapability.IMAGE_ANALYSIS
67
- ],
68
- "revision": "main",
69
- "metadata": {
70
- "description": "Microsoft Table Transformer v1.1 - Table detection model",
71
- "provider": "microsoft",
72
- "model_family": "table-transformer",
73
- "version": "1.1",
74
- "paper": "https://arxiv.org/abs/2110.00061",
75
- "huggingface_url": "https://huggingface.co/microsoft/table-transformer-detection",
76
- "use_case": "Table detection in documents and images",
77
- "input_format": "image",
78
- "output_format": "bounding_boxes",
79
- "gpu_memory_mb": 4096,
80
- "inference_time_ms": 300
81
- }
82
- },
83
- {
84
- "model_id": "table-transformer-v1.1-structure",
85
- "repo_id": "microsoft/table-transformer-structure-recognition",
86
- "model_type": ModelType.VISION,
87
- "capabilities": [
88
- ModelCapability.TABLE_STRUCTURE_RECOGNITION,
89
- ModelCapability.IMAGE_ANALYSIS
90
- ],
91
- "revision": "main",
92
- "metadata": {
93
- "description": "Microsoft Table Transformer v1.1 - Table structure recognition",
94
- "provider": "microsoft",
95
- "model_family": "table-transformer",
96
- "version": "1.1",
97
- "paper": "https://arxiv.org/abs/2110.00061",
98
- "huggingface_url": "https://huggingface.co/microsoft/table-transformer-structure-recognition",
99
- "use_case": "Table structure recognition and cell extraction",
100
- "input_format": "image",
101
- "output_format": "table_structure",
102
- "gpu_memory_mb": 4096,
103
- "inference_time_ms": 400
104
- }
105
- },
106
- {
107
- "model_id": "paddleocr-v3.0",
108
- "repo_id": "PaddlePaddle/PaddleOCR",
109
- "model_type": ModelType.VISION,
110
- "capabilities": [
111
- ModelCapability.OCR,
112
- ModelCapability.IMAGE_ANALYSIS
113
- ],
114
- "revision": "release/2.8",
115
- "metadata": {
116
- "description": "PaddleOCR v3.0 - Multilingual OCR model",
117
- "provider": "paddlepaddle",
118
- "model_family": "paddleocr",
119
- "version": "3.0",
120
- "github_url": "https://github.com/PaddlePaddle/PaddleOCR",
121
- "huggingface_url": "https://huggingface.co/PaddlePaddle/PaddleOCR",
122
- "use_case": "Text extraction from images",
123
- "input_format": "image",
124
- "output_format": "text_with_coordinates",
125
- "languages": ["en", "ch", "multilingual"],
126
- "gpu_memory_mb": 2048,
127
- "inference_time_ms": 200
128
- }
129
- },
130
- {
131
- "model_id": "yolov8n-fallback",
132
- "repo_id": "ultralytics/yolov8",
133
- "model_type": ModelType.VISION,
134
- "capabilities": [
135
- ModelCapability.IMAGE_ANALYSIS,
136
- ModelCapability.UI_DETECTION # As fallback
137
- ],
138
- "revision": "main",
139
- "metadata": {
140
- "description": "YOLOv8 Nano - Fallback object detection model",
141
- "provider": "ultralytics",
142
- "model_family": "yolo",
143
- "version": "8.0",
144
- "github_url": "https://github.com/ultralytics/ultralytics",
145
- "use_case": "General object detection (fallback for UI elements)",
146
- "input_format": "image",
147
- "output_format": "bounding_boxes",
148
- "gpu_memory_mb": 1024,
149
- "inference_time_ms": 50
150
- }
151
- }
152
- ]
153
-
154
- # Register each model
155
- registration_results = []
156
-
157
- for model_config in models_to_register:
158
- print(f"\n📝 Registering {model_config['model_id']}...")
159
-
160
- try:
161
- # Register model in registry (without downloading)
162
- success = model_manager.registry.register_model(
163
- model_id=model_config['model_id'],
164
- model_type=model_config['model_type'],
165
- capabilities=model_config['capabilities'],
166
- metadata={
167
- **model_config['metadata'],
168
- 'repo_id': model_config['repo_id'],
169
- 'revision': model_config['revision'],
170
- 'registered_at': 'auto',
171
- 'download_status': 'not_downloaded'
172
- }
173
- )
174
-
175
- if success:
176
- print(f"✅ Successfully registered {model_config['model_id']}")
177
- registration_results.append({
178
- 'model_id': model_config['model_id'],
179
- 'status': 'success'
180
- })
181
- else:
182
- print(f"❌ Failed to register {model_config['model_id']}")
183
- registration_results.append({
184
- 'model_id': model_config['model_id'],
185
- 'status': 'failed'
186
- })
187
-
188
- except Exception as e:
189
- print(f"❌ Error registering {model_config['model_id']}: {e}")
190
- registration_results.append({
191
- 'model_id': model_config['model_id'],
192
- 'status': 'error',
193
- 'error': str(e)
194
- })
195
-
196
- # Print summary
197
- print(f"\n📊 Registration Summary:")
198
- successful = [r for r in registration_results if r['status'] == 'success']
199
- failed = [r for r in registration_results if r['status'] != 'success']
200
-
201
- print(f"✅ Successfully registered: {len(successful)} models")
202
- for result in successful:
203
- print(f" - {result['model_id']}")
204
-
205
- if failed:
206
- print(f"❌ Failed to register: {len(failed)} models")
207
- for result in failed:
208
- error_msg = f" ({result.get('error', 'unknown error')})" if 'error' in result else ""
209
- print(f" - {result['model_id']}{error_msg}")
210
-
211
- return registration_results
212
-
213
- async def verify_model_registry():
214
- """Verify registered models and their capabilities"""
215
-
216
- model_manager = ModelManager()
217
-
218
- print(f"\n🔍 Verifying Model Registry...")
219
-
220
- # Check models by capability
221
- capabilities_to_check = [
222
- ModelCapability.UI_DETECTION,
223
- ModelCapability.OCR,
224
- ModelCapability.TABLE_DETECTION,
225
- ModelCapability.TABLE_STRUCTURE_RECOGNITION
226
- ]
227
-
228
- for capability in capabilities_to_check:
229
- models = model_manager.registry.get_models_by_capability(capability)
230
- print(f"\n📋 Models with {capability.value} capability:")
231
-
232
- if models:
233
- for model_id, model_info in models.items():
234
- metadata = model_info.get('metadata', {})
235
- version = metadata.get('version', 'unknown')
236
- provider = metadata.get('provider', 'unknown')
237
- print(f" ✅ {model_id} (v{version}, {provider})")
238
- else:
239
- print(f" ❌ No models found for {capability.value}")
240
-
241
- # Print overall stats
242
- stats = model_manager.registry.get_stats()
243
- print(f"\n📈 Registry Statistics:")
244
- print(f" Total models: {stats['total_models']}")
245
- print(f" Models by type: {stats['models_by_type']}")
246
- print(f" Models by capability: {stats['models_by_capability']}")
247
-
248
- def get_model_for_capability(capability: ModelCapability) -> str:
249
- """Get the best model for a specific capability"""
250
-
251
- model_manager = ModelManager()
252
- models = model_manager.registry.get_models_by_capability(capability)
253
-
254
- if not models:
255
- return None
256
-
257
- # Priority order for UI analysis models
258
- priority_order = {
259
- ModelCapability.UI_DETECTION: [
260
- "omniparser-v2.0",
261
- "yolov8n-fallback"
262
- ],
263
- ModelCapability.OCR: [
264
- "paddleocr-v3.0"
265
- ],
266
- ModelCapability.TABLE_DETECTION: [
267
- "table-transformer-v1.1-detection"
268
- ],
269
- ModelCapability.TABLE_STRUCTURE_RECOGNITION: [
270
- "table-transformer-v1.1-structure"
271
- ]
272
- }
273
-
274
- preferred_models = priority_order.get(capability, [])
275
-
276
- # Return the first available preferred model
277
- for model_id in preferred_models:
278
- if model_id in models:
279
- return model_id
280
-
281
- # Fallback to first available model
282
- return list(models.keys())[0] if models else None
283
-
284
- async def main():
285
- """Main registration workflow"""
286
-
287
- print("🚀 ISA Model Registry - UI Analysis Models Registration")
288
- print("=" * 60)
289
-
290
- try:
291
- # Register models
292
- results = await register_ui_analysis_models()
293
-
294
- # Verify registration
295
- await verify_model_registry()
296
-
297
- print(f"\n🎉 Model registration completed!")
298
- print(f" Use ModelManager.get_model() to download and use models")
299
- print(f" Use get_model_for_capability() to get recommended models")
300
-
301
- # Show usage example
302
- print(f"\n💡 Usage Example:")
303
- print(f" from isa_model.core.model_manager import ModelManager")
304
- print(f" from isa_model.core.model_repo import ModelCapability")
305
- print(f" ")
306
- print(f" manager = ModelManager()")
307
- print(f" ui_model_path = await manager.get_model(")
308
- print(f" model_id='omniparser-v2.0',")
309
- print(f" repo_id='microsoft/OmniParser',")
310
- print(f" model_type=ModelType.VISION,")
311
- print(f" capabilities=[ModelCapability.UI_DETECTION]")
312
- print(f" )")
313
-
314
- except Exception as e:
315
- print(f"❌ Registration failed: {e}")
316
- return False
317
-
318
- return True
319
-
320
- if __name__ == "__main__":
321
- asyncio.run(main())
@@ -1,356 +0,0 @@
1
- """
2
- Deployment Configuration Classes
3
-
4
- Defines configuration classes for different deployment scenarios including
5
- RunPod serverless, Triton inference server, and TensorRT-LLM backend.
6
- """
7
-
8
- from dataclasses import dataclass, field
9
- from typing import Optional, Dict, Any, List
10
- from enum import Enum
11
- from pathlib import Path
12
-
13
-
14
- class DeploymentProvider(str, Enum):
15
- """Deployment providers"""
16
- RUNPOD_SERVERLESS = "runpod_serverless"
17
- RUNPOD_PODS = "runpod_pods"
18
- AWS_LAMBDA = "aws_lambda"
19
- GOOGLE_CLOUD_RUN = "google_cloud_run"
20
- AZURE_CONTAINER_INSTANCES = "azure_container_instances"
21
- LOCAL = "local"
22
-
23
-
24
- class InferenceEngine(str, Enum):
25
- """Inference engines"""
26
- TRITON = "triton"
27
- VLLM = "vllm"
28
- TENSORRT_LLM = "tensorrt_llm"
29
- HUGGINGFACE = "huggingface"
30
- ONNX = "onnx"
31
- TORCHSCRIPT = "torchscript"
32
-
33
-
34
- class ModelFormat(str, Enum):
35
- """Model formats for deployment"""
36
- HUGGINGFACE = "huggingface"
37
- TENSORRT = "tensorrt"
38
- ONNX = "onnx"
39
- TORCHSCRIPT = "torchscript"
40
- SAFETENSORS = "safetensors"
41
-
42
-
43
- @dataclass
44
- class TritonConfig:
45
- """Configuration for Triton Inference Server"""
46
-
47
- # Model repository configuration
48
- model_repository: str = "/models"
49
- model_name: str = "model"
50
- model_version: str = "1"
51
-
52
- # Backend configuration
53
- backend: str = "tensorrtllm" # tensorrtllm, python, onnxruntime
54
- max_batch_size: int = 8
55
- max_sequence_length: int = 2048
56
-
57
- # TensorRT-LLM specific
58
- tensorrt_llm_model_dir: str = "/models/tensorrt_llm"
59
- engine_dir: str = "/models/engines"
60
- tokenizer_dir: str = "/models/tokenizer"
61
-
62
- # Performance settings
63
- instance_group_count: int = 1
64
- instance_group_kind: str = "KIND_GPU" # KIND_GPU, KIND_CPU
65
-
66
- # Memory settings
67
- optimization_level: str = "OPTIMIZATION_LEVEL_ENABLED"
68
- enable_pinned_input: bool = True
69
- enable_pinned_output: bool = True
70
-
71
- def to_dict(self) -> Dict[str, Any]:
72
- """Convert to dictionary"""
73
- return self.__dict__.copy()
74
-
75
-
76
- @dataclass
77
- class RunPodServerlessConfig:
78
- """Configuration for RunPod Serverless deployment"""
79
-
80
- # RunPod settings
81
- api_key: str
82
- endpoint_id: Optional[str] = None
83
- template_id: Optional[str] = None
84
-
85
- # Container configuration
86
- container_image: str = "runpod/pytorch:2.1.0-py3.10-cuda11.8.0-devel-ubuntu22.04"
87
- container_disk_in_gb: int = 20
88
-
89
- # GPU configuration
90
- gpu_type: str = "NVIDIA RTX A6000"
91
- gpu_count: int = 1
92
-
93
- # Scaling configuration
94
- min_workers: int = 0
95
- max_workers: int = 3
96
- idle_timeout: int = 5 # seconds
97
-
98
- # Network configuration
99
- network_volume_id: Optional[str] = None
100
-
101
- # Environment variables
102
- env_vars: Dict[str, str] = field(default_factory=dict)
103
-
104
- def to_dict(self) -> Dict[str, Any]:
105
- """Convert to dictionary"""
106
- return self.__dict__.copy()
107
-
108
-
109
- @dataclass
110
- class ModelConfig:
111
- """Configuration for model deployment"""
112
-
113
- # Model identification
114
- model_id: str
115
- model_name: str
116
- model_version: str = "1.0.0"
117
-
118
- # Model source
119
- source_type: str = "huggingface" # huggingface, local, s3, gcs
120
- source_path: str = ""
121
-
122
- # Model format and engine
123
- model_format: ModelFormat = ModelFormat.HUGGINGFACE
124
- inference_engine: InferenceEngine = InferenceEngine.TRITON
125
-
126
- # Model metadata
127
- model_type: str = "llm" # llm, embedding, vision, audio
128
- capabilities: List[str] = field(default_factory=lambda: ["text_generation"])
129
-
130
- # Performance configuration
131
- max_batch_size: int = 8
132
- max_sequence_length: int = 2048
133
- dtype: str = "float16" # float32, float16, int8, int4
134
-
135
- # Optimization settings
136
- use_tensorrt: bool = True
137
- use_quantization: bool = False
138
- quantization_method: str = "int8" # int8, int4, awq, gptq
139
-
140
- def to_dict(self) -> Dict[str, Any]:
141
- """Convert to dictionary"""
142
- return self.__dict__.copy()
143
-
144
-
145
- @dataclass
146
- class DeploymentConfig:
147
- """Main deployment configuration"""
148
-
149
- # Deployment identification
150
- deployment_id: str
151
- deployment_name: str
152
- description: Optional[str] = None
153
-
154
- # Provider and engine configuration
155
- provider: DeploymentProvider = DeploymentProvider.RUNPOD_SERVERLESS
156
- inference_engine: InferenceEngine = InferenceEngine.TRITON
157
-
158
- # Model configuration
159
- model_config: ModelConfig = None
160
-
161
- # Provider-specific configurations
162
- runpod_config: Optional[RunPodServerlessConfig] = None
163
- triton_config: Optional[TritonConfig] = None
164
-
165
- # Health check configuration
166
- health_check_path: str = "/health"
167
- health_check_timeout: int = 30
168
-
169
- # Monitoring configuration
170
- enable_logging: bool = True
171
- log_level: str = "INFO"
172
- enable_metrics: bool = True
173
-
174
- # Networking
175
- custom_domain: Optional[str] = None
176
- allowed_origins: List[str] = field(default_factory=lambda: ["*"])
177
-
178
- # Additional settings
179
- extra_config: Dict[str, Any] = field(default_factory=dict)
180
-
181
- def __post_init__(self):
182
- """Validate configuration after initialization"""
183
- if not self.deployment_id:
184
- raise ValueError("deployment_id is required")
185
-
186
- if not self.deployment_name:
187
- raise ValueError("deployment_name is required")
188
-
189
- if not self.model_config:
190
- raise ValueError("model_config is required")
191
-
192
- # Set default provider configs if not provided
193
- if self.provider == DeploymentProvider.RUNPOD_SERVERLESS and not self.runpod_config:
194
- self.runpod_config = RunPodServerlessConfig(api_key="")
195
-
196
- if self.inference_engine == InferenceEngine.TRITON and not self.triton_config:
197
- self.triton_config = TritonConfig()
198
-
199
- def to_dict(self) -> Dict[str, Any]:
200
- """Convert config to dictionary"""
201
- config_dict = {}
202
-
203
- for key, value in self.__dict__.items():
204
- if key in ['model_config', 'runpod_config', 'triton_config']:
205
- if value is not None:
206
- config_dict[key] = value.to_dict()
207
- else:
208
- config_dict[key] = None
209
- elif isinstance(value, Enum):
210
- config_dict[key] = value.value
211
- else:
212
- config_dict[key] = value
213
-
214
- return config_dict
215
-
216
- @classmethod
217
- def from_dict(cls, config_dict: Dict[str, Any]) -> 'DeploymentConfig':
218
- """Create config from dictionary"""
219
- # Handle nested configs
220
- if 'model_config' in config_dict and config_dict['model_config'] is not None:
221
- config_dict['model_config'] = ModelConfig(**config_dict['model_config'])
222
-
223
- if 'runpod_config' in config_dict and config_dict['runpod_config'] is not None:
224
- config_dict['runpod_config'] = RunPodServerlessConfig(**config_dict['runpod_config'])
225
-
226
- if 'triton_config' in config_dict and config_dict['triton_config'] is not None:
227
- config_dict['triton_config'] = TritonConfig(**config_dict['triton_config'])
228
-
229
- # Handle enums
230
- if 'provider' in config_dict:
231
- config_dict['provider'] = DeploymentProvider(config_dict['provider'])
232
-
233
- if 'inference_engine' in config_dict:
234
- config_dict['inference_engine'] = InferenceEngine(config_dict['inference_engine'])
235
-
236
- return cls(**config_dict)
237
-
238
-
239
- # Predefined configurations for common deployment scenarios
240
-
241
- def create_gemma_runpod_triton_config(
242
- model_id: str,
243
- runpod_api_key: str,
244
- model_source_path: str = "xenobordom/gemma-4b-alpaca-v1"
245
- ) -> DeploymentConfig:
246
- """
247
- Create a deployment configuration for Gemma model on RunPod with Triton + TensorRT-LLM.
248
-
249
- Args:
250
- model_id: Unique identifier for the deployment
251
- runpod_api_key: RunPod API key
252
- model_source_path: HuggingFace model path or local path
253
-
254
- Returns:
255
- DeploymentConfig for Gemma deployment
256
- """
257
- model_config = ModelConfig(
258
- model_id=model_id,
259
- model_name="gemma-4b-alpaca",
260
- source_type="huggingface",
261
- source_path=model_source_path,
262
- model_format=ModelFormat.HUGGINGFACE,
263
- inference_engine=InferenceEngine.TRITON,
264
- model_type="llm",
265
- capabilities=["text_generation", "chat"],
266
- max_batch_size=8,
267
- max_sequence_length=2048,
268
- dtype="float16",
269
- use_tensorrt=True
270
- )
271
-
272
- runpod_config = RunPodServerlessConfig(
273
- api_key=runpod_api_key,
274
- container_image="nvcr.io/nvidia/tritonserver:23.10-trtllm-python-py3",
275
- container_disk_in_gb=30,
276
- gpu_type="NVIDIA RTX A6000",
277
- gpu_count=1,
278
- min_workers=0,
279
- max_workers=3,
280
- idle_timeout=5,
281
- env_vars={
282
- "TRITON_MODEL_REPOSITORY": "/models",
283
- "CUDA_VISIBLE_DEVICES": "0"
284
- }
285
- )
286
-
287
- triton_config = TritonConfig(
288
- model_repository="/models",
289
- model_name="gemma-4b-alpaca",
290
- backend="tensorrtllm",
291
- max_batch_size=8,
292
- max_sequence_length=2048,
293
- tensorrt_llm_model_dir="/models/tensorrt_llm",
294
- engine_dir="/models/engines",
295
- tokenizer_dir="/models/tokenizer"
296
- )
297
-
298
- return DeploymentConfig(
299
- deployment_id=f"gemma-deployment-{model_id}",
300
- deployment_name=f"Gemma 4B Alpaca - {model_id}",
301
- description="Gemma 4B model fine-tuned on Alpaca dataset, deployed with Triton + TensorRT-LLM",
302
- provider=DeploymentProvider.RUNPOD_SERVERLESS,
303
- inference_engine=InferenceEngine.TRITON,
304
- model_config=model_config,
305
- runpod_config=runpod_config,
306
- triton_config=triton_config
307
- )
308
-
309
-
310
- def create_local_triton_config(
311
- model_id: str,
312
- model_source_path: str,
313
- triton_model_repository: str = "./models/triton"
314
- ) -> DeploymentConfig:
315
- """
316
- Create a deployment configuration for local Triton deployment.
317
-
318
- Args:
319
- model_id: Unique identifier for the deployment
320
- model_source_path: Path to the model
321
- triton_model_repository: Path to Triton model repository
322
-
323
- Returns:
324
- DeploymentConfig for local deployment
325
- """
326
- model_config = ModelConfig(
327
- model_id=model_id,
328
- model_name=f"local-model-{model_id}",
329
- source_type="local",
330
- source_path=model_source_path,
331
- model_format=ModelFormat.HUGGINGFACE,
332
- inference_engine=InferenceEngine.TRITON,
333
- model_type="llm",
334
- capabilities=["text_generation"],
335
- max_batch_size=4,
336
- max_sequence_length=1024,
337
- dtype="float16"
338
- )
339
-
340
- triton_config = TritonConfig(
341
- model_repository=triton_model_repository,
342
- model_name=f"local-model-{model_id}",
343
- backend="python", # Use Python backend for local development
344
- max_batch_size=4,
345
- max_sequence_length=1024
346
- )
347
-
348
- return DeploymentConfig(
349
- deployment_id=f"local-deployment-{model_id}",
350
- deployment_name=f"Local Model - {model_id}",
351
- description="Local model deployment for development and testing",
352
- provider=DeploymentProvider.LOCAL,
353
- inference_engine=InferenceEngine.TRITON,
354
- model_config=model_config,
355
- triton_config=triton_config
356
- )