isa-model 0.4.0__py3-none-any.whl → 0.4.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (199) hide show
  1. isa_model/client.py +466 -43
  2. isa_model/core/cache/redis_cache.py +12 -3
  3. isa_model/core/config/config_manager.py +230 -3
  4. isa_model/core/config.py +90 -0
  5. isa_model/core/database/direct_db_client.py +114 -0
  6. isa_model/core/database/migration_manager.py +563 -0
  7. isa_model/core/database/migrations.py +21 -1
  8. isa_model/core/database/supabase_client.py +154 -19
  9. isa_model/core/dependencies.py +316 -0
  10. isa_model/core/discovery/__init__.py +19 -0
  11. isa_model/core/discovery/consul_discovery.py +190 -0
  12. isa_model/core/logging/__init__.py +54 -0
  13. isa_model/core/logging/influx_logger.py +523 -0
  14. isa_model/core/logging/loki_logger.py +160 -0
  15. isa_model/core/models/__init__.py +27 -18
  16. isa_model/core/models/config_models.py +625 -0
  17. isa_model/core/models/deployment_billing_tracker.py +430 -0
  18. isa_model/core/models/model_manager.py +40 -17
  19. isa_model/core/models/model_metadata.py +690 -0
  20. isa_model/core/models/model_repo.py +174 -18
  21. isa_model/core/models/system_models.py +857 -0
  22. isa_model/core/repositories/__init__.py +9 -0
  23. isa_model/core/repositories/config_repository.py +912 -0
  24. isa_model/core/services/intelligent_model_selector.py +399 -21
  25. isa_model/core/storage/hf_storage.py +1 -1
  26. isa_model/core/types.py +1 -0
  27. isa_model/deployment/__init__.py +5 -48
  28. isa_model/deployment/core/__init__.py +2 -31
  29. isa_model/deployment/core/deployment_manager.py +1278 -370
  30. isa_model/deployment/local/__init__.py +31 -0
  31. isa_model/deployment/local/config.py +248 -0
  32. isa_model/deployment/local/gpu_gateway.py +607 -0
  33. isa_model/deployment/local/health_checker.py +428 -0
  34. isa_model/deployment/local/provider.py +586 -0
  35. isa_model/deployment/local/tensorrt_service.py +621 -0
  36. isa_model/deployment/local/transformers_service.py +644 -0
  37. isa_model/deployment/local/vllm_service.py +527 -0
  38. isa_model/deployment/modal/__init__.py +8 -0
  39. isa_model/deployment/modal/config.py +136 -0
  40. isa_model/deployment/{services/auto_hf_modal_deployer.py → modal/deployer.py} +1 -1
  41. isa_model/deployment/modal/services/__init__.py +3 -0
  42. isa_model/deployment/modal/services/audio/__init__.py +1 -0
  43. isa_model/deployment/modal/services/embedding/__init__.py +1 -0
  44. isa_model/deployment/modal/services/llm/__init__.py +1 -0
  45. isa_model/deployment/modal/services/llm/isa_llm_service.py +424 -0
  46. isa_model/deployment/modal/services/video/__init__.py +1 -0
  47. isa_model/deployment/modal/services/vision/__init__.py +1 -0
  48. isa_model/deployment/models/org-org-acme-corp-tenant-a-service-llm-20250825-225822/tenant-a-service_modal_service.py +48 -0
  49. isa_model/deployment/models/org-test-org-123-prefix-test-service-llm-20250825-225822/prefix-test-service_modal_service.py +48 -0
  50. isa_model/deployment/models/test-llm-service-llm-20250825-204442/test-llm-service_modal_service.py +48 -0
  51. isa_model/deployment/models/test-monitoring-gpt2-llm-20250825-212906/test-monitoring-gpt2_modal_service.py +48 -0
  52. isa_model/deployment/models/test-monitoring-gpt2-llm-20250825-213009/test-monitoring-gpt2_modal_service.py +48 -0
  53. isa_model/deployment/storage/__init__.py +5 -0
  54. isa_model/deployment/storage/deployment_repository.py +824 -0
  55. isa_model/deployment/triton/__init__.py +10 -0
  56. isa_model/deployment/triton/config.py +196 -0
  57. isa_model/deployment/triton/configs/__init__.py +1 -0
  58. isa_model/deployment/triton/provider.py +512 -0
  59. isa_model/deployment/triton/scripts/__init__.py +1 -0
  60. isa_model/deployment/triton/templates/__init__.py +1 -0
  61. isa_model/inference/__init__.py +47 -1
  62. isa_model/inference/ai_factory.py +137 -10
  63. isa_model/inference/legacy_services/__init__.py +21 -0
  64. isa_model/inference/legacy_services/model_evaluation.py +637 -0
  65. isa_model/inference/legacy_services/model_service.py +573 -0
  66. isa_model/inference/legacy_services/model_serving.py +717 -0
  67. isa_model/inference/legacy_services/model_training.py +561 -0
  68. isa_model/inference/models/__init__.py +21 -0
  69. isa_model/inference/models/inference_config.py +551 -0
  70. isa_model/inference/models/inference_record.py +675 -0
  71. isa_model/inference/models/performance_models.py +714 -0
  72. isa_model/inference/repositories/__init__.py +9 -0
  73. isa_model/inference/repositories/inference_repository.py +828 -0
  74. isa_model/inference/services/audio/base_stt_service.py +184 -11
  75. isa_model/inference/services/audio/openai_stt_service.py +22 -6
  76. isa_model/inference/services/custom_model_manager.py +277 -0
  77. isa_model/inference/services/embedding/ollama_embed_service.py +15 -3
  78. isa_model/inference/services/embedding/resilient_embed_service.py +285 -0
  79. isa_model/inference/services/llm/__init__.py +10 -2
  80. isa_model/inference/services/llm/base_llm_service.py +335 -24
  81. isa_model/inference/services/llm/cerebras_llm_service.py +628 -0
  82. isa_model/inference/services/llm/helpers/llm_adapter.py +9 -4
  83. isa_model/inference/services/llm/helpers/llm_prompts.py +342 -0
  84. isa_model/inference/services/llm/helpers/llm_utils.py +321 -23
  85. isa_model/inference/services/llm/huggingface_llm_service.py +581 -0
  86. isa_model/inference/services/llm/local_llm_service.py +747 -0
  87. isa_model/inference/services/llm/ollama_llm_service.py +9 -2
  88. isa_model/inference/services/llm/openai_llm_service.py +33 -16
  89. isa_model/inference/services/llm/yyds_llm_service.py +8 -2
  90. isa_model/inference/services/vision/__init__.py +22 -1
  91. isa_model/inference/services/vision/blip_vision_service.py +359 -0
  92. isa_model/inference/services/vision/helpers/image_utils.py +8 -5
  93. isa_model/inference/services/vision/isa_vision_service.py +65 -4
  94. isa_model/inference/services/vision/openai_vision_service.py +19 -10
  95. isa_model/inference/services/vision/vgg16_vision_service.py +257 -0
  96. isa_model/serving/api/cache_manager.py +245 -0
  97. isa_model/serving/api/dependencies/__init__.py +1 -0
  98. isa_model/serving/api/dependencies/auth.py +194 -0
  99. isa_model/serving/api/dependencies/database.py +139 -0
  100. isa_model/serving/api/error_handlers.py +284 -0
  101. isa_model/serving/api/fastapi_server.py +172 -22
  102. isa_model/serving/api/middleware/auth.py +8 -2
  103. isa_model/serving/api/middleware/security.py +23 -33
  104. isa_model/serving/api/middleware/tenant_context.py +414 -0
  105. isa_model/serving/api/routes/analytics.py +4 -1
  106. isa_model/serving/api/routes/config.py +645 -0
  107. isa_model/serving/api/routes/deployment_billing.py +315 -0
  108. isa_model/serving/api/routes/deployments.py +138 -2
  109. isa_model/serving/api/routes/gpu_gateway.py +440 -0
  110. isa_model/serving/api/routes/health.py +32 -12
  111. isa_model/serving/api/routes/inference_monitoring.py +486 -0
  112. isa_model/serving/api/routes/local_deployments.py +448 -0
  113. isa_model/serving/api/routes/tenants.py +575 -0
  114. isa_model/serving/api/routes/unified.py +680 -18
  115. isa_model/serving/api/routes/webhooks.py +479 -0
  116. isa_model/serving/api/startup.py +68 -54
  117. isa_model/utils/gpu_utils.py +311 -0
  118. {isa_model-0.4.0.dist-info → isa_model-0.4.3.dist-info}/METADATA +66 -24
  119. isa_model-0.4.3.dist-info/RECORD +193 -0
  120. isa_model/core/storage/minio_storage.py +0 -0
  121. isa_model/deployment/cloud/__init__.py +0 -9
  122. isa_model/deployment/cloud/modal/__init__.py +0 -10
  123. isa_model/deployment/core/deployment_config.py +0 -356
  124. isa_model/deployment/core/isa_deployment_service.py +0 -401
  125. isa_model/deployment/gpu_int8_ds8/app/server.py +0 -66
  126. isa_model/deployment/gpu_int8_ds8/scripts/test_client.py +0 -43
  127. isa_model/deployment/gpu_int8_ds8/scripts/test_client_os.py +0 -35
  128. isa_model/deployment/runtime/deployed_service.py +0 -338
  129. isa_model/deployment/services/__init__.py +0 -9
  130. isa_model/deployment/services/auto_deploy_vision_service.py +0 -538
  131. isa_model/deployment/services/model_service.py +0 -332
  132. isa_model/deployment/services/service_monitor.py +0 -356
  133. isa_model/deployment/services/service_registry.py +0 -527
  134. isa_model/eval/__init__.py +0 -92
  135. isa_model/eval/benchmarks/__init__.py +0 -27
  136. isa_model/eval/benchmarks/multimodal_datasets.py +0 -460
  137. isa_model/eval/benchmarks.py +0 -701
  138. isa_model/eval/config/__init__.py +0 -10
  139. isa_model/eval/config/evaluation_config.py +0 -108
  140. isa_model/eval/evaluators/__init__.py +0 -24
  141. isa_model/eval/evaluators/audio_evaluator.py +0 -727
  142. isa_model/eval/evaluators/base_evaluator.py +0 -503
  143. isa_model/eval/evaluators/embedding_evaluator.py +0 -742
  144. isa_model/eval/evaluators/llm_evaluator.py +0 -472
  145. isa_model/eval/evaluators/vision_evaluator.py +0 -564
  146. isa_model/eval/example_evaluation.py +0 -395
  147. isa_model/eval/factory.py +0 -798
  148. isa_model/eval/infrastructure/__init__.py +0 -24
  149. isa_model/eval/infrastructure/experiment_tracker.py +0 -466
  150. isa_model/eval/isa_benchmarks.py +0 -700
  151. isa_model/eval/isa_integration.py +0 -582
  152. isa_model/eval/metrics.py +0 -951
  153. isa_model/eval/tests/unit/test_basic.py +0 -396
  154. isa_model/serving/api/routes/evaluations.py +0 -579
  155. isa_model/training/__init__.py +0 -168
  156. isa_model/training/annotation/annotation_schema.py +0 -47
  157. isa_model/training/annotation/processors/annotation_processor.py +0 -126
  158. isa_model/training/annotation/storage/dataset_manager.py +0 -131
  159. isa_model/training/annotation/storage/dataset_schema.py +0 -44
  160. isa_model/training/annotation/tests/test_annotation_flow.py +0 -109
  161. isa_model/training/annotation/tests/test_minio copy.py +0 -113
  162. isa_model/training/annotation/tests/test_minio_upload.py +0 -43
  163. isa_model/training/annotation/views/annotation_controller.py +0 -158
  164. isa_model/training/cloud/__init__.py +0 -22
  165. isa_model/training/cloud/job_orchestrator.py +0 -402
  166. isa_model/training/cloud/runpod_trainer.py +0 -454
  167. isa_model/training/cloud/storage_manager.py +0 -482
  168. isa_model/training/core/__init__.py +0 -26
  169. isa_model/training/core/config.py +0 -181
  170. isa_model/training/core/dataset.py +0 -222
  171. isa_model/training/core/trainer.py +0 -720
  172. isa_model/training/core/utils.py +0 -213
  173. isa_model/training/examples/intelligent_training_example.py +0 -281
  174. isa_model/training/factory.py +0 -424
  175. isa_model/training/intelligent/__init__.py +0 -25
  176. isa_model/training/intelligent/decision_engine.py +0 -643
  177. isa_model/training/intelligent/intelligent_factory.py +0 -888
  178. isa_model/training/intelligent/knowledge_base.py +0 -751
  179. isa_model/training/intelligent/resource_optimizer.py +0 -839
  180. isa_model/training/intelligent/task_classifier.py +0 -576
  181. isa_model/training/storage/__init__.py +0 -24
  182. isa_model/training/storage/core_integration.py +0 -439
  183. isa_model/training/storage/training_repository.py +0 -552
  184. isa_model/training/storage/training_storage.py +0 -628
  185. isa_model-0.4.0.dist-info/RECORD +0 -182
  186. /isa_model/deployment/{cloud/modal → modal/services/audio}/isa_audio_chatTTS_service.py +0 -0
  187. /isa_model/deployment/{cloud/modal → modal/services/audio}/isa_audio_fish_service.py +0 -0
  188. /isa_model/deployment/{cloud/modal → modal/services/audio}/isa_audio_openvoice_service.py +0 -0
  189. /isa_model/deployment/{cloud/modal → modal/services/audio}/isa_audio_service_v2.py +0 -0
  190. /isa_model/deployment/{cloud/modal → modal/services/embedding}/isa_embed_rerank_service.py +0 -0
  191. /isa_model/deployment/{cloud/modal → modal/services/video}/isa_video_hunyuan_service.py +0 -0
  192. /isa_model/deployment/{cloud/modal → modal/services/vision}/isa_vision_ocr_service.py +0 -0
  193. /isa_model/deployment/{cloud/modal → modal/services/vision}/isa_vision_qwen25_service.py +0 -0
  194. /isa_model/deployment/{cloud/modal → modal/services/vision}/isa_vision_table_service.py +0 -0
  195. /isa_model/deployment/{cloud/modal → modal/services/vision}/isa_vision_ui_service.py +0 -0
  196. /isa_model/deployment/{cloud/modal → modal/services/vision}/isa_vision_ui_service_optimized.py +0 -0
  197. /isa_model/deployment/{services → modal/services/vision}/simple_auto_deploy_vision_service.py +0 -0
  198. {isa_model-0.4.0.dist-info → isa_model-0.4.3.dist-info}/WHEEL +0 -0
  199. {isa_model-0.4.0.dist-info → isa_model-0.4.3.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,193 @@
1
+ isa_model/__init__.py,sha256=lYYKstKw33oavW6xS0-9cpsdYq-h0cfV_ZlGAwICRaU,868
2
+ isa_model/client.py,sha256=7OUA6yi0G94I8U4GOFcoXvCjWxk2ds1-3CDCivJXA6M,67628
3
+ isa_model/core/config.py,sha256=9OL8_EkBcnAH-RgyWUi3jblKo42m7K1JDeHa9C5CPL4,19519
4
+ isa_model/core/dependencies.py,sha256=2ZgGDjtYitBEVy8H3UppQSb_BId3--f2kQw-Lm4Umh8,10050
5
+ isa_model/core/pricing_manager.py,sha256=NWQLhNIzUDqS5_jBfVcJGrdOdRasFyifSNCliaIDvqU,17122
6
+ isa_model/core/types.py,sha256=jdO_q0FDuzvWURXZtxMV1Zj1XgARX9kopTviWFuq_FU,8713
7
+ isa_model/core/cache/redis_cache.py,sha256=J_A4OcVENsbc6RMOnXn6O-i320egM5hCQXTR7RlYz6s,14111
8
+ isa_model/core/config/__init__.py,sha256=SLeHQtYGDHl64NDVyb3ECQXOKepGM8YNHEoM8CVEWus,350
9
+ isa_model/core/config/config_manager.py,sha256=tF6EyNr_y6Jm0easV9aNHgBce4Ddamf0acaFlz7yToA,32289
10
+ isa_model/core/database/__init__.py,sha256=E2lp9te05QgdQfMeUq702t_23fv4Y7be_P2QU60Yqzs,18
11
+ isa_model/core/database/direct_db_client.py,sha256=jHp5AWyrXcYDRvrPvBRGH89FmPRAhgzL8MQsSt4zkTQ,4273
12
+ isa_model/core/database/migration_manager.py,sha256=GxSJtngsbK8oA8kAKkquLQby_DdXLTwU795NGGHMH8c,21748
13
+ isa_model/core/database/migrations.py,sha256=RpM1eqt7Chu736K0ij_42gKMYHg2HPR_qSt70xfJ_co,12626
14
+ isa_model/core/database/supabase_client.py,sha256=waY0VQLy9VM6FMIoViDe5yAgNW78qJaI3Jb2ohTNj98,11124
15
+ isa_model/core/discovery/__init__.py,sha256=U7YzSNqsyPYmT_TdMlLy9QMAotdfaFlDE-c9XgKk8-4,380
16
+ isa_model/core/discovery/consul_discovery.py,sha256=UziTVGVbejh0zUvayhISkiRJ5KeFcxUkw8t4gTK0UVw,6631
17
+ isa_model/core/logging/__init__.py,sha256=9PA5MJvn7y73aCUUqCgv5r2it2nNb38YXSG31oxNIDA,1213
18
+ isa_model/core/logging/influx_logger.py,sha256=TsPnWeKgV6pl2cz9NoqM0OV9z9jI1_uVbn6nTblfM94,19483
19
+ isa_model/core/logging/loki_logger.py,sha256=pFkKFK5ouKNa9dPehC_kV6af0urnAvzfvbYE5IFiwD0,4706
20
+ isa_model/core/models/__init__.py,sha256=bDzyE0KHIJxxoN3q08pvW_hHBeHux2aMdeKY4GlDzmU,1286
21
+ isa_model/core/models/config_models.py,sha256=Gy0oNndDsKVQ92Bz6aZp1JMqr4Z0jdoEJCCscIrpogI,22955
22
+ isa_model/core/models/deployment_billing_tracker.py,sha256=hW2e3vUkASvCycYhBdrZcsEtBjYo3TLTVpfwfVkaXnQ,16406
23
+ isa_model/core/models/model_billing_tracker.py,sha256=er35dsoKAGt8bjkQwO9f3MQ6U_NI6OIuhIn4PEOPEWU,17302
24
+ isa_model/core/models/model_manager.py,sha256=vjFYWxmhRGUDlrHkwySjN5d0hfCx-VqBKiMhgTXjs8Y,17093
25
+ isa_model/core/models/model_metadata.py,sha256=C6ubW12qmXAuqKmE_2BaO4HMB7WJqBkYDVXYCINsc-4,25148
26
+ isa_model/core/models/model_repo.py,sha256=1018Qi8fMfgXtU8DKfWvf-0hBE16Q7wJIzmEZa7x6bw,19868
27
+ isa_model/core/models/model_statistics_tracker.py,sha256=4KoKawwtEDAx8FV9ysmZS4nvRqZAgRSSIa-32f_Jhwk,10561
28
+ isa_model/core/models/model_storage.py,sha256=gpW7R_wDQh0WUo4CYkrQen9GMKn8Z8ys5iGQenaMmCM,4473
29
+ isa_model/core/models/model_version_manager.py,sha256=20BwNbCg1NlcmHmCxK_zMvpPmVFHg0B6ZCFnPLY6Yj8,37563
30
+ isa_model/core/models/system_models.py,sha256=I52nTi0UVft8tkJdb2LZrJ_Qxax-JE00_YKqnSa-P4E,32525
31
+ isa_model/core/repositories/__init__.py,sha256=RRERY7mWZxhSAZa4m6493l6sFl3CPlyL2bW6qJMEzD8,172
32
+ isa_model/core/repositories/config_repository.py,sha256=QlL22r_bGEV6mHfmztEIY5Zw3wIFoiR5IQJyIj36wXU,37428
33
+ isa_model/core/resilience/circuit_breaker.py,sha256=Ccoh3O31xVFJO2A0flnc9SI-sRqQ3sGKbwv3WbgJxBc,12435
34
+ isa_model/core/security/secrets.py,sha256=kzRjpSiGwY9z47NUlurK29uY_uMsA5lqk8_6Ywu8Zvw,13319
35
+ isa_model/core/services/__init__.py,sha256=TEE58Vk8JKIaQx8ELeAaWo-WPz0hjck9x-ZK7pbfiIE,422
36
+ isa_model/core/services/intelligent_model_selector.py,sha256=PPUWiMcV8DkCPMHhnIlsBgksUY8hKB4SjlFDW1zhLYY,29205
37
+ isa_model/core/storage/hf_storage.py,sha256=k271Rg5G7qUJAJ6VXQBTUHGU6y2NYBNWKVeBJm02DRo,14736
38
+ isa_model/core/storage/local_storage.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
39
+ isa_model/deployment/__init__.py,sha256=Y3IUEOriJYVZ-3ZEamMs4n6_X0OwtD2eguwBas5zgtg,345
40
+ isa_model/deployment/core/__init__.py,sha256=TRJ4tNjNFub_ObhZy61iZZpqC0CYsnv1HV1Qp_XWhVI,119
41
+ isa_model/deployment/core/deployment_manager.py,sha256=In2e5EuXwVJfg6ENjigOrgQyy19DCWX5uuwSa--Czzc,58337
42
+ isa_model/deployment/local/__init__.py,sha256=Ld1QbaDHIHnbW2IkSXVTZeDcxnmUXBa074uOSLRu5t0,904
43
+ isa_model/deployment/local/config.py,sha256=Kft5EORBcsO2HVizD0ct6VFIIs9sVBN-CjRnOrcm00g,9246
44
+ isa_model/deployment/local/gpu_gateway.py,sha256=be6d9eSWRufXvJH9GyklBWlXhOukITY1lnXTM6RPcQs,21954
45
+ isa_model/deployment/local/health_checker.py,sha256=_u2vwiwEGCbFA6laUu1JX6GfE6prrOSHiy6PclimGys,16392
46
+ isa_model/deployment/local/provider.py,sha256=6E1WfTUvI32aeEQs13TIyuy9xQmZqJeaJULfE2KLe4E,22763
47
+ isa_model/deployment/local/tensorrt_service.py,sha256=f05BkJMw2NhiMp18xW1RwRED4bIjZ0gmUS5OgEAGnk0,23026
48
+ isa_model/deployment/local/transformers_service.py,sha256=pdC3KppUzSVrWd-CKA8fXPC1uzy45S8FTtQj9odAWpM,23937
49
+ isa_model/deployment/local/vllm_service.py,sha256=zVuBopgzG6ulSvHnRE8h_dLQQpNqTDwHbXo88IKXrwk,18849
50
+ isa_model/deployment/modal/__init__.py,sha256=ZBcToGNtL6ztWY5pvqM1YMiL_F-S1xx9b-uZd8cuajc,380
51
+ isa_model/deployment/modal/config.py,sha256=8XhBMIbx6oDTf-P9ipQ58xmBYDbNZekZ4gixorBDIpw,4267
52
+ isa_model/deployment/modal/deployer.py,sha256=YNCtbO8FTVstz8OG6Kh8p3AM05dtbg5i73-JsuNy4KM,31961
53
+ isa_model/deployment/modal/services/__init__.py,sha256=m9D6jZ-RH3fohn5pNDLEfFQWj18LDlq565I2t8tTIAk,61
54
+ isa_model/deployment/modal/services/audio/__init__.py,sha256=YhBTWmI0k8onSU6K0MzW9NIS1d7uedPT_9U6G0Qmfio,41
55
+ isa_model/deployment/modal/services/audio/isa_audio_chatTTS_service.py,sha256=w3s4hj78HedQ0g2X8_PJbXPBGcMmwnZeV1LHS_Auy_o,18637
56
+ isa_model/deployment/modal/services/audio/isa_audio_fish_service.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
57
+ isa_model/deployment/modal/services/audio/isa_audio_openvoice_service.py,sha256=2OE_J5KTglEh7iVnTb-2_phghvQKkTD7OIsHZ9fKfZI,33515
58
+ isa_model/deployment/modal/services/audio/isa_audio_service_v2.py,sha256=TMF-TXKcRsNT6vq6U-45PSvKfdJ-SBbpAD-lCC9x_zI,40832
59
+ isa_model/deployment/modal/services/embedding/__init__.py,sha256=uwKfvNrVTO_AvRKlONGCVaXwmSIs-lDNfF2gef_Clb0,45
60
+ isa_model/deployment/modal/services/embedding/isa_embed_rerank_service.py,sha256=K6TdCWD-Ko-vmKi03tVfpjZpX2Folv5EW79yPqJzT_U,10182
61
+ isa_model/deployment/modal/services/llm/__init__.py,sha256=dDp1ekrBb6E1cMwpE4kMFswlIxfs5G0M_LuTLvCAtKk,39
62
+ isa_model/deployment/modal/services/llm/isa_llm_service.py,sha256=-GVcBMRiKezT0oGkqirmwuKb0Oo2GsMrL-AFEwp5Ik8,14108
63
+ isa_model/deployment/modal/services/video/__init__.py,sha256=IehlRWhB0X-IOY1-rGXugY-BzOLr1jhMPQTLAZZTj_c,41
64
+ isa_model/deployment/modal/services/video/isa_video_hunyuan_service.py,sha256=slxEZxFFnFkOBlxdOdNu7JRXJMWwl-gaOUHFQr5stZ0,15208
65
+ isa_model/deployment/modal/services/vision/__init__.py,sha256=JzkcYOTBIJ4vUUpZwwjYCpzyh1VtUOa-RSlymOzKdHU,42
66
+ isa_model/deployment/modal/services/vision/isa_vision_ocr_service.py,sha256=1Y6s6mLRFLPhpItFLS61mbqGggR1UNd1rNtAluIzQrk,18501
67
+ isa_model/deployment/modal/services/vision/isa_vision_qwen25_service.py,sha256=Ldy0VvO7CGiPGDmPXWQCbCLUZgCGg3aBqWOA66Lvt5I,26797
68
+ isa_model/deployment/modal/services/vision/isa_vision_table_service.py,sha256=UhQinNRda0b11g3dYFjDCNaR0850IgCl1WtRghrFNu0,24827
69
+ isa_model/deployment/modal/services/vision/isa_vision_ui_service.py,sha256=BwoMhJQiQe2vGFteTsbbeVsemJZszjce9ech39Z0cWQ,34296
70
+ isa_model/deployment/modal/services/vision/isa_vision_ui_service_optimized.py,sha256=xy3IOEDifi2aMvJFB2_u4qfdD3YdXfgWW3_0z_BLNuo,25665
71
+ isa_model/deployment/modal/services/vision/simple_auto_deploy_vision_service.py,sha256=rfXsv9mh_w5cXHVYxA4fBD5ppyNY4HplsH34xp4WpY8,9882
72
+ isa_model/deployment/models/org-org-acme-corp-tenant-a-service-llm-20250825-225822/tenant-a-service_modal_service.py,sha256=3MHnvm6TFDycULcFaEdX95Rl0jY2VWYHDGzjDSJNCm0,1388
73
+ isa_model/deployment/models/org-test-org-123-prefix-test-service-llm-20250825-225822/prefix-test-service_modal_service.py,sha256=IfmIal7OfSdbnGeyGy-zMee2WAf5zteXJ0zAZiCZxSQ,1400
74
+ isa_model/deployment/models/test-llm-service-llm-20250825-204442/test-llm-service_modal_service.py,sha256=nQEhzFDU2dsAQkHApyeQpZHu0VLWtG3Ilhuv498wRLA,1388
75
+ isa_model/deployment/models/test-monitoring-gpt2-llm-20250825-212906/test-monitoring-gpt2_modal_service.py,sha256=yx3xiXyzskiVjwdr54favQ48UYO3hXzt2bQM9iR_irM,1404
76
+ isa_model/deployment/models/test-monitoring-gpt2-llm-20250825-213009/test-monitoring-gpt2_modal_service.py,sha256=fjPAVAtdcxOnEA252E4KPjTNkJq-_4UlG9Qj7eEF6tw,1404
77
+ isa_model/deployment/storage/__init__.py,sha256=TPxs4prCYb2AJ9NqNk9nh3kc6EFDkAkDmrqxq1ERWzo,138
78
+ isa_model/deployment/storage/deployment_repository.py,sha256=F6eFQjDXL4AAhw6F1hDMiU1rlnCL1zhZ_cMssVVEtHY,33168
79
+ isa_model/deployment/triton/__init__.py,sha256=TrsBOxzGeESj_GrVWUd3jsScFUQ5Cd266NXvwRbuAJ8,330
80
+ isa_model/deployment/triton/config.py,sha256=H7VemDvdLpREfSvD0DK86CXFg46l-eHnGf_vo8lOrH4,7319
81
+ isa_model/deployment/triton/provider.py,sha256=ZtpgF2kaHGtPO2EuXQEHMtwobMW025sliogkBtqjbe0,17391
82
+ isa_model/deployment/triton/configs/__init__.py,sha256=bxiam68sNO8eFutXKK9yaaKS-5MulIuhgusN_mdwz2A,50
83
+ isa_model/deployment/triton/scripts/__init__.py,sha256=fE3HxpLG9_wvXWIN27Tj-B9IEvtp_aZ0bUS3GzfRHqY,31
84
+ isa_model/deployment/triton/templates/__init__.py,sha256=rWtBQzM6kpC2yXcc3yCjrnWSj7W3jxbfXXZe2Rr1GL8,33
85
+ isa_model/inference/__init__.py,sha256=gmrJV1cdSyDpBZdfm8r0kPZsbnqogth0rXf_wT5fIFY,1381
86
+ isa_model/inference/ai_factory.py,sha256=ztTllV58ovfuO208aYQA_yT7iq88F7wdiPZr1TvW1_I,28716
87
+ isa_model/inference/base.py,sha256=qwOddnSGI0GUdD6qIdGBPQpkW7UjU3Y-zaZvu70B4WA,1278
88
+ isa_model/inference/legacy_services/__init__.py,sha256=H0RTKKE3UOqmpjc3S3GfysbhZZl6CnZd9_I38KTXg1E,563
89
+ isa_model/inference/legacy_services/model_evaluation.py,sha256=GbhcNM8uO0AnI59yuCYaTqDEdmh0pS4YxJVgJGL8Vjs,27964
90
+ isa_model/inference/legacy_services/model_service.py,sha256=9xEaLJWW1B2lLFL5-eP_uPFpfg8iYnqrAphpJnMTn2A,24751
91
+ isa_model/inference/legacy_services/model_serving.py,sha256=XQ7j6B02MsjUWKeeSVovKg0kljwMCLbRLeCuckmjtM8,28255
92
+ isa_model/inference/legacy_services/model_training.py,sha256=g0rfzSKGXzoJbNgEh8wDJlfdhHxh6-jXN0fv_B3-1C8,24036
93
+ isa_model/inference/models/__init__.py,sha256=FJ6goyHRf4RCnqbq75qAigL0FMKXulDOns-ebHWCgP8,579
94
+ isa_model/inference/models/inference_config.py,sha256=1ITGWOtQ3cVyucQq9Rih-Ab5uqaAGf31UfCYyJMY_DI,19529
95
+ isa_model/inference/models/inference_record.py,sha256=oHidCa9-lHSonOSoc24tAbrsSPBgm3bb8-cqzWOqzcU,24840
96
+ isa_model/inference/models/performance_models.py,sha256=KZWC8fEslhTohL2y-nz8S39P9RZ9SgJ6piQ9pMXh04E,26434
97
+ isa_model/inference/repositories/__init__.py,sha256=SYTQX1E5L6zTuo_p_KnDjYefoCKw4p1m4pW_FDb_sgM,191
98
+ isa_model/inference/repositories/inference_repository.py,sha256=QnfSzkcLQ5CPcABTmSYBRAv_5SVk0ayjVW6B1Q0SKaQ,31718
99
+ isa_model/inference/services/__init__.py,sha256=yfLz0YGl8ixk6LfTRL6cRTvZMb9F_Pv1QRgGyNc9xYM,386
100
+ isa_model/inference/services/base_service.py,sha256=NJIvq7YpGw55ah-axDR2hcu40B2gm6L_WYXyfX0rSaE,5816
101
+ isa_model/inference/services/custom_model_manager.py,sha256=HUHSDOWArJYMfdvaI-gfCJkVRVFdftScOw7BgS-h3zo,10829
102
+ isa_model/inference/services/audio/__init__.py,sha256=Hgtk3j5H4U3YxNlfG8UaU2eUNOWgrpSA8LN_tKEFWMk,616
103
+ isa_model/inference/services/audio/base_realtime_service.py,sha256=hSP89_hnzLBnmBvFOQlU_tW8UT2QKWKVR9Z7fwsVPa8,8125
104
+ isa_model/inference/services/audio/base_stt_service.py,sha256=qahYTLpf8gruvhEtS5bWDXPiYbgxXF3nYnqTq3Ckc0E,13361
105
+ isa_model/inference/services/audio/base_tts_service.py,sha256=PgctcV98Pe9I2kSjScsm8epRwdaEU-vAGCIfdd2P8us,6924
106
+ isa_model/inference/services/audio/isa_tts_service.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
107
+ isa_model/inference/services/audio/openai_realtime_service.py,sha256=vo4ow8CULZJXz4nSepMTq7_uKufWvRmcoezhmX2Q16s,22101
108
+ isa_model/inference/services/audio/openai_stt_service.py,sha256=2UBBrRP8PXYTlDz9gEh4_mOqTSIdCewG8Ptu9aT2nCo,13476
109
+ isa_model/inference/services/audio/openai_tts_service.py,sha256=C4vIRvCKoySs4-zBEteI_DZYZsATS84W_ZUwbxjJjpA,8253
110
+ isa_model/inference/services/audio/replicate_tts_service.py,sha256=kCG_bBNgW7GQwt5-ZdwPSqsMiTV54-FhSowFwNWGvg0,10292
111
+ isa_model/inference/services/embedding/__init__.py,sha256=xeWeq3jighDBCUzgveTiH11VLkkhk-J6z5cq9sf1mEk,311
112
+ isa_model/inference/services/embedding/base_embed_service.py,sha256=V57nDU_VzWXjw3dqyaTXBr3ntxT2VI1my6uSAX-vvxY,10382
113
+ isa_model/inference/services/embedding/isa_embed_service.py,sha256=AQ3yuGas64SRW8jlB0rvXhq2cyD5NMlCekodPCuN8dw,11242
114
+ isa_model/inference/services/embedding/ollama_embed_service.py,sha256=7OZMMrDO4eePNJXSjNB5E4j0rtX0HVq3RgNSzzqU1nA,7291
115
+ isa_model/inference/services/embedding/openai_embed_service.py,sha256=CaoNtepIreMA0wo5i0lUgJHUjVp4vdHQ59eT_gan3s4,8321
116
+ isa_model/inference/services/embedding/resilient_embed_service.py,sha256=26HPaWmVKQ_fPClTNew1VZeAX5vy4ncKxp1HR04yAYw,11735
117
+ isa_model/inference/services/embedding/helpers/text_splitter.py,sha256=6AbvcQ7H6MS54B9d9T1XBGg4GhvmKfZqp00lKp9pF-U,1635
118
+ isa_model/inference/services/embedding/tests/test_embedding.py,sha256=_Syrgt2sYCS8oNCqaJMuzck_FRqeSx0Nnk9B1j3OJBk,10200
119
+ isa_model/inference/services/img/__init__.py,sha256=moVvATbOEEqzKRtu2A9E7eBzlrkbr5oY1bGyH_3PebY,577
120
+ isa_model/inference/services/img/base_image_gen_service.py,sha256=3BYoUo9ASw02ZPl2T9Pwvu4uVD-GOZIFEaGsrnuKCrM,8101
121
+ isa_model/inference/services/img/replicate_image_gen_service.py,sha256=mspkdRh_snMUPQxRccpfzcAXO3TAxZsNzei0c45G7YA,6447
122
+ isa_model/inference/services/img/services/replicate_face_swap.py,sha256=Q6SiWJN9eNvD1nv4kWXnvvPnm9A1DLb7Gsb_vwUfUJw,7385
123
+ isa_model/inference/services/img/services/replicate_flux.py,sha256=BUIkuBUMZCH5ChvbIhmJ_1pJVNo0CjY7q7hgvsaFJO0,8008
124
+ isa_model/inference/services/img/services/replicate_flux_kontext.py,sha256=3DEwruobN7JL6-3LNMOJVOtorjlqV7ykD2ul4NTQ9Fs,7786
125
+ isa_model/inference/services/img/services/replicate_sticker_maker.py,sha256=9D_IISOpFfC2MWlP1BFohuIwHcI1H_b95zQU52Nl-Mw,8528
126
+ isa_model/inference/services/img/tests/test_img_client.py,sha256=r6lYybP_ty3A55LoaE_GRuXvXSh35KXOVbODGskLza0,11313
127
+ isa_model/inference/services/llm/__init__.py,sha256=aiNdB692nopCy8QaopVDB0e-J7mg22LrRzk4ytlu2iQ,769
128
+ isa_model/inference/services/llm/base_llm_service.py,sha256=CUw24rMytVXAUcberTzogKRLwYZiYifyg-kcNfHiHkg,36402
129
+ isa_model/inference/services/llm/cerebras_llm_service.py,sha256=8BU9I7HHO481nn7ddsiP4nl2ItYTCQJzJyaIArKA0ZA,25126
130
+ isa_model/inference/services/llm/huggingface_llm_service.py,sha256=mWnOGh3OsRyaL002Ax71Mb7oXp254VDDdP0QiQ-p9Yk,22733
131
+ isa_model/inference/services/llm/local_llm_service.py,sha256=_ILRD-oKcolf972aXe3zPS_tBu8SD-xH_Iw29alpkHM,27606
132
+ isa_model/inference/services/llm/ollama_llm_service.py,sha256=78VNSspzlQrXDqAxUR52jLGIKnBw4e_4LT2unAFMiTk,17967
133
+ isa_model/inference/services/llm/openai_llm_service.py,sha256=BpYugS2Vsrc-SS69cnW2VqFv4JXMbgglXvvbNgUZNZY,43874
134
+ isa_model/inference/services/llm/yyds_llm_service.py,sha256=ZHl2ukcDVkwYahF4OV5etTvJKa9Ni6O1TkJp75pQWaA,12495
135
+ isa_model/inference/services/llm/helpers/llm_adapter.py,sha256=7PrpiKvZaMfI9U6wabfTegwDMKcGDsv6I_lqnzcsE-o,24392
136
+ isa_model/inference/services/llm/helpers/llm_prompts.py,sha256=qGcG5hiBcnuq3IrcpNvshpWR1TqUrkdtf2Il__xeBoo,21661
137
+ isa_model/inference/services/llm/helpers/llm_utils.py,sha256=x2-wbij95bWgrZpJyXah9uMhEHKYAznChYRldT0VVno,22776
138
+ isa_model/inference/services/ml/base_ml_service.py,sha256=mLBA6ENowa3KVzNqHyhWxf_Pr-cJJj84lDE4TniPzYI,2894
139
+ isa_model/inference/services/ml/sklearn_ml_service.py,sha256=Lf9JrwvI25lca7JBbjB_e66eAUtXFbwxZ3Hs13dVGkA,5512
140
+ isa_model/inference/services/vision/__init__.py,sha256=1GO2NoC7p8IJ92mI6fGcPaN4MeFzLhdNdNlAnFYpzpE,1839
141
+ isa_model/inference/services/vision/base_vision_service.py,sha256=mjrfcUT01HBi0k1qeIL3CkpkvQIuL_jar-N03W8sMV8,10531
142
+ isa_model/inference/services/vision/blip_vision_service.py,sha256=tmGCznQ9qBsidLV_mnKEtvpSUPvBUbwJdwviKYXrrkA,12020
143
+ isa_model/inference/services/vision/isa_vision_service.py,sha256=OPuIZmG_lYOgajGfrQj4uLzVk5Y4H0PkeSNViIiL1O0,22960
144
+ isa_model/inference/services/vision/openai_vision_service.py,sha256=LeD910WWyJd6QiJncSM3x_Whj-a32Vr1_2FG4gfjtc4,10179
145
+ isa_model/inference/services/vision/replicate_vision_service.py,sha256=smRkSCTwk5mvyKVnvyplqPNuVYjRZngVBWxTCbFmrxA,20679
146
+ isa_model/inference/services/vision/vgg16_vision_service.py,sha256=5w9r3vXQz5EAdXFPawtsuWzaNlhFA2N0xcJxSJcudQI,9382
147
+ isa_model/inference/services/vision/disabled/isA_vision_service.py,sha256=VYa8VJtxDB9KdnfNW0GPEP_TPker4pHp33gLD_TnpaM,18336
148
+ isa_model/inference/services/vision/helpers/image_utils.py,sha256=BYEFnOu2MBRJrJmKdALfFy5CTKpI4Co9PTVCxJvdFqc,11388
149
+ isa_model/inference/services/vision/helpers/vision_prompts.py,sha256=WbzOYu-Z2-8Xn9dcvuPRTA7VTy23_uoMRRGO4t0wZ8Q,12098
150
+ isa_model/inference/services/vision/tests/test_ocr_client.py,sha256=IY2KbHuIf1FmKFrUO9HrmKtgyT9achylwKykOIFLR8E,11250
151
+ isa_model/inference/utils/conversion/bge_rerank_convert.py,sha256=1dvtxe5-PPCe2Au6SO8F2XaD-xdIoeA4zDTcid2L9FU,2691
152
+ isa_model/inference/utils/conversion/onnx_converter.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
153
+ isa_model/inference/utils/conversion/torch_converter.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
154
+ isa_model/serving/__init__.py,sha256=LTO0Adbvm7A-bgQqtuOQSoHvdu9OH3OrEjYgQanuHgI,429
155
+ isa_model/serving/modal_proxy_server.py,sha256=U8AJMF4ewtTGHjmbLb6ezR3NCT6d0APBtFuKVxVkRu4,7481
156
+ isa_model/serving/api/__init__.py,sha256=wgWD69eqV37fFTLxhz8b0rOn_34P7SZHoWw2sufWjk4,162
157
+ isa_model/serving/api/cache_manager.py,sha256=pBA9-4OUrHlyqYGtNbJSfZrDYPfEs2DV1Cx1qI0pM6U,8120
158
+ isa_model/serving/api/error_handlers.py,sha256=UigiYCu7O59Mp-GkB3grzMgAz5rl9ClgK81DyEzX6ZU,10869
159
+ isa_model/serving/api/fastapi_server.py,sha256=huSO9ZMuFirr5LhQO__mfM0tq1qKik11Wid9Qzk6T-Y,12367
160
+ isa_model/serving/api/startup.py,sha256=iZqRnkgBFvuMYkueeVxKheZZ13Hy5xoaeenX2udrPW8,12944
161
+ isa_model/serving/api/dependencies/__init__.py,sha256=e9ho7V27vnK73k2R7uRqVFzJ9hOHOsEydSz5mOAQbQU,21
162
+ isa_model/serving/api/dependencies/auth.py,sha256=6G46JMx35O_9PWhc66ptA67BTH3fb9ASgdOW-dKfK0g,6149
163
+ isa_model/serving/api/dependencies/database.py,sha256=fh8nrLNuD2ATVHnaMOxGD8ueR2g1f54rgflM1ISqcWg,4145
164
+ isa_model/serving/api/middleware/__init__.py,sha256=iCKUYECf0bjNGXgV91K03hb8Dnp0Jc_wnUL897Rd0sg,163
165
+ isa_model/serving/api/middleware/auth.py,sha256=WRoRrsDTlf4FDa7E8PnNhY5qfhUn76Be3ZMOPdi3FMk,11313
166
+ isa_model/serving/api/middleware/request_logger.py,sha256=d48n6tp1pqZ7HFWFl8jg6er24ugWkWkMOc1y80aqPU8,2938
167
+ isa_model/serving/api/middleware/security.py,sha256=YBzP3BZtEM1-PLSvVEuyTIxwGJqtw0IioRFWPUSu8FA,10233
168
+ isa_model/serving/api/middleware/tenant_context.py,sha256=uf2Lla2aov-vIOXCIukBURn90GJ3G484tbqj993NktI,16008
169
+ isa_model/serving/api/routes/__init__.py,sha256=RIaG9OPg0AjAIVbtMzwnqGyNU-tuQXbdvFcYOt4b_Do,84
170
+ isa_model/serving/api/routes/analytics.py,sha256=qUVo1SV-wNJZPyvVx_w80gL6JYuiL3DbyEahEZGJIVU,17655
171
+ isa_model/serving/api/routes/config.py,sha256=ER2PiKExoctMLkxs9ZxihGB0rEiczgU2Zt-FxpzMPsY,22453
172
+ isa_model/serving/api/routes/deployment_billing.py,sha256=-9Ut6FIexwXbL_wT6oG-6yRxJLvCTXMBThiJn3av1B0,12350
173
+ isa_model/serving/api/routes/deployments.py,sha256=DQHCePARLy1xht7nmeGlfomdUhWDqBzeAwReSI0arCc,17536
174
+ isa_model/serving/api/routes/gpu_gateway.py,sha256=cTZmOApxinMqSQe1W9nglx3UduUnC-wRJ311bEajSV0,15512
175
+ isa_model/serving/api/routes/health.py,sha256=dqmTLF_x0AvEdXHPYd9mf4mqUN-Z5oziNW_7yMI735s,2974
176
+ isa_model/serving/api/routes/inference_monitoring.py,sha256=yXLQApZv5kDeTQjQAduKyv30k36AGl7tQ9ifG-wn21I,18799
177
+ isa_model/serving/api/routes/llm.py,sha256=5ZVxWugff0i6VBKz_Nv5CqacMZJsPZEKyoSB6XDrW34,385
178
+ isa_model/serving/api/routes/local_deployments.py,sha256=LbAiRcozooyhckg4joQXCWjpX2Au8Wt-MMq-GLZHul0,16289
179
+ isa_model/serving/api/routes/logs.py,sha256=9t8cft3fprpQHHCj8UNxvcHmvOiELxbu7WXoWe8JLPw,14238
180
+ isa_model/serving/api/routes/settings.py,sha256=Xj_uXnRxmHpgSxUfztvrwE2yjWtlWQElE2CizmeeVds,20905
181
+ isa_model/serving/api/routes/tenants.py,sha256=7gU7xpEjYuDeCunWWeY6BIwuEw-t3_ctqYDU5Sv3GBI,21000
182
+ isa_model/serving/api/routes/ui_analysis.py,sha256=-WxLaRKQNHnRh4okB85cWA4blTegpEPZtzHTsF3yeeU,6848
183
+ isa_model/serving/api/routes/unified.py,sha256=rKCHKU4accmnE2jdZw-ZVSN8hDh8vB0a1Q-8U35faQM,44294
184
+ isa_model/serving/api/routes/vision.py,sha256=U9jxssQYe6igtayUW0C2fcYwqmLRIE15__X-5Ru9J4c,396
185
+ isa_model/serving/api/routes/webhooks.py,sha256=kspHgX8PIZ5L6S0klnV0XtGoE9BiTfYTMMx7Qsuf6CY,16219
186
+ isa_model/serving/api/schemas/__init__.py,sha256=Tu_hzxoKW1ZHpww3-5ER4A2hNuDByZ0rAfrgaJ7Bs-M,275
187
+ isa_model/serving/api/schemas/common.py,sha256=HVaAS7wlvqrwC1gMZ2Cvo0vzHB053x2uOTAwUoY2vsE,696
188
+ isa_model/serving/api/schemas/ui_analysis.py,sha256=IpOcIvmUeXN1UtZsbGozMfV1vvz7AVF2PVXjjxYl_0k,4089
189
+ isa_model/utils/gpu_utils.py,sha256=HbMvJzSsOCcjOJluUrszAJ58dC8LPnyA_nQn9s_1I6c,11730
190
+ isa_model-0.4.3.dist-info/METADATA,sha256=2xHn4pAvo10QKBDQK34v0-acBWGBUZRR6ZUZa-xq7BU,15090
191
+ isa_model-0.4.3.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
192
+ isa_model-0.4.3.dist-info/top_level.txt,sha256=eHSy_Xb3kNkh2kK11mi1mZh0Wz91AQ5b8k2KFYO-rE8,10
193
+ isa_model-0.4.3.dist-info/RECORD,,
File without changes
@@ -1,9 +0,0 @@
1
- """
2
- Cloud Deployment Module
3
-
4
- Support for various cloud platforms
5
- """
6
-
7
- from .modal import ModalDeployment
8
-
9
- __all__ = ["ModalDeployment"]
@@ -1,10 +0,0 @@
1
- """
2
- Modal Deployment Module
3
-
4
- Modal.com cloud deployment for ISA Model services
5
- """
6
-
7
- from .ui_analysis_service import UIAnalysisService as UIAnalysisModalService
8
- from .deployment_manager import ModalDeployment
9
-
10
- __all__ = ["UIAnalysisModalService", "ModalDeployment"]
@@ -1,356 +0,0 @@
1
- """
2
- Deployment Configuration Classes
3
-
4
- Defines configuration classes for different deployment scenarios including
5
- RunPod serverless, Triton inference server, and TensorRT-LLM backend.
6
- """
7
-
8
- from dataclasses import dataclass, field
9
- from typing import Optional, Dict, Any, List
10
- from enum import Enum
11
- from pathlib import Path
12
-
13
-
14
- class DeploymentProvider(str, Enum):
15
- """Deployment providers"""
16
- RUNPOD_SERVERLESS = "runpod_serverless"
17
- RUNPOD_PODS = "runpod_pods"
18
- AWS_LAMBDA = "aws_lambda"
19
- GOOGLE_CLOUD_RUN = "google_cloud_run"
20
- AZURE_CONTAINER_INSTANCES = "azure_container_instances"
21
- LOCAL = "local"
22
-
23
-
24
- class InferenceEngine(str, Enum):
25
- """Inference engines"""
26
- TRITON = "triton"
27
- VLLM = "vllm"
28
- TENSORRT_LLM = "tensorrt_llm"
29
- HUGGINGFACE = "huggingface"
30
- ONNX = "onnx"
31
- TORCHSCRIPT = "torchscript"
32
-
33
-
34
- class ModelFormat(str, Enum):
35
- """Model formats for deployment"""
36
- HUGGINGFACE = "huggingface"
37
- TENSORRT = "tensorrt"
38
- ONNX = "onnx"
39
- TORCHSCRIPT = "torchscript"
40
- SAFETENSORS = "safetensors"
41
-
42
-
43
- @dataclass
44
- class TritonConfig:
45
- """Configuration for Triton Inference Server"""
46
-
47
- # Model repository configuration
48
- model_repository: str = "/models"
49
- model_name: str = "model"
50
- model_version: str = "1"
51
-
52
- # Backend configuration
53
- backend: str = "tensorrtllm" # tensorrtllm, python, onnxruntime
54
- max_batch_size: int = 8
55
- max_sequence_length: int = 2048
56
-
57
- # TensorRT-LLM specific
58
- tensorrt_llm_model_dir: str = "/models/tensorrt_llm"
59
- engine_dir: str = "/models/engines"
60
- tokenizer_dir: str = "/models/tokenizer"
61
-
62
- # Performance settings
63
- instance_group_count: int = 1
64
- instance_group_kind: str = "KIND_GPU" # KIND_GPU, KIND_CPU
65
-
66
- # Memory settings
67
- optimization_level: str = "OPTIMIZATION_LEVEL_ENABLED"
68
- enable_pinned_input: bool = True
69
- enable_pinned_output: bool = True
70
-
71
- def to_dict(self) -> Dict[str, Any]:
72
- """Convert to dictionary"""
73
- return self.__dict__.copy()
74
-
75
-
76
- @dataclass
77
- class RunPodServerlessConfig:
78
- """Configuration for RunPod Serverless deployment"""
79
-
80
- # RunPod settings
81
- api_key: str
82
- endpoint_id: Optional[str] = None
83
- template_id: Optional[str] = None
84
-
85
- # Container configuration
86
- container_image: str = "runpod/pytorch:2.1.0-py3.10-cuda11.8.0-devel-ubuntu22.04"
87
- container_disk_in_gb: int = 20
88
-
89
- # GPU configuration
90
- gpu_type: str = "NVIDIA RTX A6000"
91
- gpu_count: int = 1
92
-
93
- # Scaling configuration
94
- min_workers: int = 0
95
- max_workers: int = 3
96
- idle_timeout: int = 5 # seconds
97
-
98
- # Network configuration
99
- network_volume_id: Optional[str] = None
100
-
101
- # Environment variables
102
- env_vars: Dict[str, str] = field(default_factory=dict)
103
-
104
- def to_dict(self) -> Dict[str, Any]:
105
- """Convert to dictionary"""
106
- return self.__dict__.copy()
107
-
108
-
109
- @dataclass
110
- class ModelConfig:
111
- """Configuration for model deployment"""
112
-
113
- # Model identification
114
- model_id: str
115
- model_name: str
116
- model_version: str = "1.0.0"
117
-
118
- # Model source
119
- source_type: str = "huggingface" # huggingface, local, s3, gcs
120
- source_path: str = ""
121
-
122
- # Model format and engine
123
- model_format: ModelFormat = ModelFormat.HUGGINGFACE
124
- inference_engine: InferenceEngine = InferenceEngine.TRITON
125
-
126
- # Model metadata
127
- model_type: str = "llm" # llm, embedding, vision, audio
128
- capabilities: List[str] = field(default_factory=lambda: ["text_generation"])
129
-
130
- # Performance configuration
131
- max_batch_size: int = 8
132
- max_sequence_length: int = 2048
133
- dtype: str = "float16" # float32, float16, int8, int4
134
-
135
- # Optimization settings
136
- use_tensorrt: bool = True
137
- use_quantization: bool = False
138
- quantization_method: str = "int8" # int8, int4, awq, gptq
139
-
140
- def to_dict(self) -> Dict[str, Any]:
141
- """Convert to dictionary"""
142
- return self.__dict__.copy()
143
-
144
-
145
- @dataclass
146
- class DeploymentConfig:
147
- """Main deployment configuration"""
148
-
149
- # Deployment identification
150
- deployment_id: str
151
- deployment_name: str
152
- description: Optional[str] = None
153
-
154
- # Provider and engine configuration
155
- provider: DeploymentProvider = DeploymentProvider.RUNPOD_SERVERLESS
156
- inference_engine: InferenceEngine = InferenceEngine.TRITON
157
-
158
- # Model configuration
159
- model_config: ModelConfig = None
160
-
161
- # Provider-specific configurations
162
- runpod_config: Optional[RunPodServerlessConfig] = None
163
- triton_config: Optional[TritonConfig] = None
164
-
165
- # Health check configuration
166
- health_check_path: str = "/health"
167
- health_check_timeout: int = 30
168
-
169
- # Monitoring configuration
170
- enable_logging: bool = True
171
- log_level: str = "INFO"
172
- enable_metrics: bool = True
173
-
174
- # Networking
175
- custom_domain: Optional[str] = None
176
- allowed_origins: List[str] = field(default_factory=lambda: ["*"])
177
-
178
- # Additional settings
179
- extra_config: Dict[str, Any] = field(default_factory=dict)
180
-
181
- def __post_init__(self):
182
- """Validate configuration after initialization"""
183
- if not self.deployment_id:
184
- raise ValueError("deployment_id is required")
185
-
186
- if not self.deployment_name:
187
- raise ValueError("deployment_name is required")
188
-
189
- if not self.model_config:
190
- raise ValueError("model_config is required")
191
-
192
- # Set default provider configs if not provided
193
- if self.provider == DeploymentProvider.RUNPOD_SERVERLESS and not self.runpod_config:
194
- self.runpod_config = RunPodServerlessConfig(api_key="")
195
-
196
- if self.inference_engine == InferenceEngine.TRITON and not self.triton_config:
197
- self.triton_config = TritonConfig()
198
-
199
- def to_dict(self) -> Dict[str, Any]:
200
- """Convert config to dictionary"""
201
- config_dict = {}
202
-
203
- for key, value in self.__dict__.items():
204
- if key in ['model_config', 'runpod_config', 'triton_config']:
205
- if value is not None:
206
- config_dict[key] = value.to_dict()
207
- else:
208
- config_dict[key] = None
209
- elif isinstance(value, Enum):
210
- config_dict[key] = value.value
211
- else:
212
- config_dict[key] = value
213
-
214
- return config_dict
215
-
216
- @classmethod
217
- def from_dict(cls, config_dict: Dict[str, Any]) -> 'DeploymentConfig':
218
- """Create config from dictionary"""
219
- # Handle nested configs
220
- if 'model_config' in config_dict and config_dict['model_config'] is not None:
221
- config_dict['model_config'] = ModelConfig(**config_dict['model_config'])
222
-
223
- if 'runpod_config' in config_dict and config_dict['runpod_config'] is not None:
224
- config_dict['runpod_config'] = RunPodServerlessConfig(**config_dict['runpod_config'])
225
-
226
- if 'triton_config' in config_dict and config_dict['triton_config'] is not None:
227
- config_dict['triton_config'] = TritonConfig(**config_dict['triton_config'])
228
-
229
- # Handle enums
230
- if 'provider' in config_dict:
231
- config_dict['provider'] = DeploymentProvider(config_dict['provider'])
232
-
233
- if 'inference_engine' in config_dict:
234
- config_dict['inference_engine'] = InferenceEngine(config_dict['inference_engine'])
235
-
236
- return cls(**config_dict)
237
-
238
-
239
- # Predefined configurations for common deployment scenarios
240
-
241
- def create_gemma_runpod_triton_config(
242
- model_id: str,
243
- runpod_api_key: str,
244
- model_source_path: str = "xenobordom/gemma-4b-alpaca-v1"
245
- ) -> DeploymentConfig:
246
- """
247
- Create a deployment configuration for Gemma model on RunPod with Triton + TensorRT-LLM.
248
-
249
- Args:
250
- model_id: Unique identifier for the deployment
251
- runpod_api_key: RunPod API key
252
- model_source_path: HuggingFace model path or local path
253
-
254
- Returns:
255
- DeploymentConfig for Gemma deployment
256
- """
257
- model_config = ModelConfig(
258
- model_id=model_id,
259
- model_name="gemma-4b-alpaca",
260
- source_type="huggingface",
261
- source_path=model_source_path,
262
- model_format=ModelFormat.HUGGINGFACE,
263
- inference_engine=InferenceEngine.TRITON,
264
- model_type="llm",
265
- capabilities=["text_generation", "chat"],
266
- max_batch_size=8,
267
- max_sequence_length=2048,
268
- dtype="float16",
269
- use_tensorrt=True
270
- )
271
-
272
- runpod_config = RunPodServerlessConfig(
273
- api_key=runpod_api_key,
274
- container_image="nvcr.io/nvidia/tritonserver:23.10-trtllm-python-py3",
275
- container_disk_in_gb=30,
276
- gpu_type="NVIDIA RTX A6000",
277
- gpu_count=1,
278
- min_workers=0,
279
- max_workers=3,
280
- idle_timeout=5,
281
- env_vars={
282
- "TRITON_MODEL_REPOSITORY": "/models",
283
- "CUDA_VISIBLE_DEVICES": "0"
284
- }
285
- )
286
-
287
- triton_config = TritonConfig(
288
- model_repository="/models",
289
- model_name="gemma-4b-alpaca",
290
- backend="tensorrtllm",
291
- max_batch_size=8,
292
- max_sequence_length=2048,
293
- tensorrt_llm_model_dir="/models/tensorrt_llm",
294
- engine_dir="/models/engines",
295
- tokenizer_dir="/models/tokenizer"
296
- )
297
-
298
- return DeploymentConfig(
299
- deployment_id=f"gemma-deployment-{model_id}",
300
- deployment_name=f"Gemma 4B Alpaca - {model_id}",
301
- description="Gemma 4B model fine-tuned on Alpaca dataset, deployed with Triton + TensorRT-LLM",
302
- provider=DeploymentProvider.RUNPOD_SERVERLESS,
303
- inference_engine=InferenceEngine.TRITON,
304
- model_config=model_config,
305
- runpod_config=runpod_config,
306
- triton_config=triton_config
307
- )
308
-
309
-
310
- def create_local_triton_config(
311
- model_id: str,
312
- model_source_path: str,
313
- triton_model_repository: str = "./models/triton"
314
- ) -> DeploymentConfig:
315
- """
316
- Create a deployment configuration for local Triton deployment.
317
-
318
- Args:
319
- model_id: Unique identifier for the deployment
320
- model_source_path: Path to the model
321
- triton_model_repository: Path to Triton model repository
322
-
323
- Returns:
324
- DeploymentConfig for local deployment
325
- """
326
- model_config = ModelConfig(
327
- model_id=model_id,
328
- model_name=f"local-model-{model_id}",
329
- source_type="local",
330
- source_path=model_source_path,
331
- model_format=ModelFormat.HUGGINGFACE,
332
- inference_engine=InferenceEngine.TRITON,
333
- model_type="llm",
334
- capabilities=["text_generation"],
335
- max_batch_size=4,
336
- max_sequence_length=1024,
337
- dtype="float16"
338
- )
339
-
340
- triton_config = TritonConfig(
341
- model_repository=triton_model_repository,
342
- model_name=f"local-model-{model_id}",
343
- backend="python", # Use Python backend for local development
344
- max_batch_size=4,
345
- max_sequence_length=1024
346
- )
347
-
348
- return DeploymentConfig(
349
- deployment_id=f"local-deployment-{model_id}",
350
- deployment_name=f"Local Model - {model_id}",
351
- description="Local model deployment for development and testing",
352
- provider=DeploymentProvider.LOCAL,
353
- inference_engine=InferenceEngine.TRITON,
354
- model_config=model_config,
355
- triton_config=triton_config
356
- )