isa-model 0.4.0__py3-none-any.whl → 0.4.3__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- isa_model/client.py +466 -43
- isa_model/core/cache/redis_cache.py +12 -3
- isa_model/core/config/config_manager.py +230 -3
- isa_model/core/config.py +90 -0
- isa_model/core/database/direct_db_client.py +114 -0
- isa_model/core/database/migration_manager.py +563 -0
- isa_model/core/database/migrations.py +21 -1
- isa_model/core/database/supabase_client.py +154 -19
- isa_model/core/dependencies.py +316 -0
- isa_model/core/discovery/__init__.py +19 -0
- isa_model/core/discovery/consul_discovery.py +190 -0
- isa_model/core/logging/__init__.py +54 -0
- isa_model/core/logging/influx_logger.py +523 -0
- isa_model/core/logging/loki_logger.py +160 -0
- isa_model/core/models/__init__.py +27 -18
- isa_model/core/models/config_models.py +625 -0
- isa_model/core/models/deployment_billing_tracker.py +430 -0
- isa_model/core/models/model_manager.py +40 -17
- isa_model/core/models/model_metadata.py +690 -0
- isa_model/core/models/model_repo.py +174 -18
- isa_model/core/models/system_models.py +857 -0
- isa_model/core/repositories/__init__.py +9 -0
- isa_model/core/repositories/config_repository.py +912 -0
- isa_model/core/services/intelligent_model_selector.py +399 -21
- isa_model/core/storage/hf_storage.py +1 -1
- isa_model/core/types.py +1 -0
- isa_model/deployment/__init__.py +5 -48
- isa_model/deployment/core/__init__.py +2 -31
- isa_model/deployment/core/deployment_manager.py +1278 -370
- isa_model/deployment/local/__init__.py +31 -0
- isa_model/deployment/local/config.py +248 -0
- isa_model/deployment/local/gpu_gateway.py +607 -0
- isa_model/deployment/local/health_checker.py +428 -0
- isa_model/deployment/local/provider.py +586 -0
- isa_model/deployment/local/tensorrt_service.py +621 -0
- isa_model/deployment/local/transformers_service.py +644 -0
- isa_model/deployment/local/vllm_service.py +527 -0
- isa_model/deployment/modal/__init__.py +8 -0
- isa_model/deployment/modal/config.py +136 -0
- isa_model/deployment/{services/auto_hf_modal_deployer.py → modal/deployer.py} +1 -1
- isa_model/deployment/modal/services/__init__.py +3 -0
- isa_model/deployment/modal/services/audio/__init__.py +1 -0
- isa_model/deployment/modal/services/embedding/__init__.py +1 -0
- isa_model/deployment/modal/services/llm/__init__.py +1 -0
- isa_model/deployment/modal/services/llm/isa_llm_service.py +424 -0
- isa_model/deployment/modal/services/video/__init__.py +1 -0
- isa_model/deployment/modal/services/vision/__init__.py +1 -0
- isa_model/deployment/models/org-org-acme-corp-tenant-a-service-llm-20250825-225822/tenant-a-service_modal_service.py +48 -0
- isa_model/deployment/models/org-test-org-123-prefix-test-service-llm-20250825-225822/prefix-test-service_modal_service.py +48 -0
- isa_model/deployment/models/test-llm-service-llm-20250825-204442/test-llm-service_modal_service.py +48 -0
- isa_model/deployment/models/test-monitoring-gpt2-llm-20250825-212906/test-monitoring-gpt2_modal_service.py +48 -0
- isa_model/deployment/models/test-monitoring-gpt2-llm-20250825-213009/test-monitoring-gpt2_modal_service.py +48 -0
- isa_model/deployment/storage/__init__.py +5 -0
- isa_model/deployment/storage/deployment_repository.py +824 -0
- isa_model/deployment/triton/__init__.py +10 -0
- isa_model/deployment/triton/config.py +196 -0
- isa_model/deployment/triton/configs/__init__.py +1 -0
- isa_model/deployment/triton/provider.py +512 -0
- isa_model/deployment/triton/scripts/__init__.py +1 -0
- isa_model/deployment/triton/templates/__init__.py +1 -0
- isa_model/inference/__init__.py +47 -1
- isa_model/inference/ai_factory.py +137 -10
- isa_model/inference/legacy_services/__init__.py +21 -0
- isa_model/inference/legacy_services/model_evaluation.py +637 -0
- isa_model/inference/legacy_services/model_service.py +573 -0
- isa_model/inference/legacy_services/model_serving.py +717 -0
- isa_model/inference/legacy_services/model_training.py +561 -0
- isa_model/inference/models/__init__.py +21 -0
- isa_model/inference/models/inference_config.py +551 -0
- isa_model/inference/models/inference_record.py +675 -0
- isa_model/inference/models/performance_models.py +714 -0
- isa_model/inference/repositories/__init__.py +9 -0
- isa_model/inference/repositories/inference_repository.py +828 -0
- isa_model/inference/services/audio/base_stt_service.py +184 -11
- isa_model/inference/services/audio/openai_stt_service.py +22 -6
- isa_model/inference/services/custom_model_manager.py +277 -0
- isa_model/inference/services/embedding/ollama_embed_service.py +15 -3
- isa_model/inference/services/embedding/resilient_embed_service.py +285 -0
- isa_model/inference/services/llm/__init__.py +10 -2
- isa_model/inference/services/llm/base_llm_service.py +335 -24
- isa_model/inference/services/llm/cerebras_llm_service.py +628 -0
- isa_model/inference/services/llm/helpers/llm_adapter.py +9 -4
- isa_model/inference/services/llm/helpers/llm_prompts.py +342 -0
- isa_model/inference/services/llm/helpers/llm_utils.py +321 -23
- isa_model/inference/services/llm/huggingface_llm_service.py +581 -0
- isa_model/inference/services/llm/local_llm_service.py +747 -0
- isa_model/inference/services/llm/ollama_llm_service.py +9 -2
- isa_model/inference/services/llm/openai_llm_service.py +33 -16
- isa_model/inference/services/llm/yyds_llm_service.py +8 -2
- isa_model/inference/services/vision/__init__.py +22 -1
- isa_model/inference/services/vision/blip_vision_service.py +359 -0
- isa_model/inference/services/vision/helpers/image_utils.py +8 -5
- isa_model/inference/services/vision/isa_vision_service.py +65 -4
- isa_model/inference/services/vision/openai_vision_service.py +19 -10
- isa_model/inference/services/vision/vgg16_vision_service.py +257 -0
- isa_model/serving/api/cache_manager.py +245 -0
- isa_model/serving/api/dependencies/__init__.py +1 -0
- isa_model/serving/api/dependencies/auth.py +194 -0
- isa_model/serving/api/dependencies/database.py +139 -0
- isa_model/serving/api/error_handlers.py +284 -0
- isa_model/serving/api/fastapi_server.py +172 -22
- isa_model/serving/api/middleware/auth.py +8 -2
- isa_model/serving/api/middleware/security.py +23 -33
- isa_model/serving/api/middleware/tenant_context.py +414 -0
- isa_model/serving/api/routes/analytics.py +4 -1
- isa_model/serving/api/routes/config.py +645 -0
- isa_model/serving/api/routes/deployment_billing.py +315 -0
- isa_model/serving/api/routes/deployments.py +138 -2
- isa_model/serving/api/routes/gpu_gateway.py +440 -0
- isa_model/serving/api/routes/health.py +32 -12
- isa_model/serving/api/routes/inference_monitoring.py +486 -0
- isa_model/serving/api/routes/local_deployments.py +448 -0
- isa_model/serving/api/routes/tenants.py +575 -0
- isa_model/serving/api/routes/unified.py +680 -18
- isa_model/serving/api/routes/webhooks.py +479 -0
- isa_model/serving/api/startup.py +68 -54
- isa_model/utils/gpu_utils.py +311 -0
- {isa_model-0.4.0.dist-info → isa_model-0.4.3.dist-info}/METADATA +66 -24
- isa_model-0.4.3.dist-info/RECORD +193 -0
- isa_model/core/storage/minio_storage.py +0 -0
- isa_model/deployment/cloud/__init__.py +0 -9
- isa_model/deployment/cloud/modal/__init__.py +0 -10
- isa_model/deployment/core/deployment_config.py +0 -356
- isa_model/deployment/core/isa_deployment_service.py +0 -401
- isa_model/deployment/gpu_int8_ds8/app/server.py +0 -66
- isa_model/deployment/gpu_int8_ds8/scripts/test_client.py +0 -43
- isa_model/deployment/gpu_int8_ds8/scripts/test_client_os.py +0 -35
- isa_model/deployment/runtime/deployed_service.py +0 -338
- isa_model/deployment/services/__init__.py +0 -9
- isa_model/deployment/services/auto_deploy_vision_service.py +0 -538
- isa_model/deployment/services/model_service.py +0 -332
- isa_model/deployment/services/service_monitor.py +0 -356
- isa_model/deployment/services/service_registry.py +0 -527
- isa_model/eval/__init__.py +0 -92
- isa_model/eval/benchmarks/__init__.py +0 -27
- isa_model/eval/benchmarks/multimodal_datasets.py +0 -460
- isa_model/eval/benchmarks.py +0 -701
- isa_model/eval/config/__init__.py +0 -10
- isa_model/eval/config/evaluation_config.py +0 -108
- isa_model/eval/evaluators/__init__.py +0 -24
- isa_model/eval/evaluators/audio_evaluator.py +0 -727
- isa_model/eval/evaluators/base_evaluator.py +0 -503
- isa_model/eval/evaluators/embedding_evaluator.py +0 -742
- isa_model/eval/evaluators/llm_evaluator.py +0 -472
- isa_model/eval/evaluators/vision_evaluator.py +0 -564
- isa_model/eval/example_evaluation.py +0 -395
- isa_model/eval/factory.py +0 -798
- isa_model/eval/infrastructure/__init__.py +0 -24
- isa_model/eval/infrastructure/experiment_tracker.py +0 -466
- isa_model/eval/isa_benchmarks.py +0 -700
- isa_model/eval/isa_integration.py +0 -582
- isa_model/eval/metrics.py +0 -951
- isa_model/eval/tests/unit/test_basic.py +0 -396
- isa_model/serving/api/routes/evaluations.py +0 -579
- isa_model/training/__init__.py +0 -168
- isa_model/training/annotation/annotation_schema.py +0 -47
- isa_model/training/annotation/processors/annotation_processor.py +0 -126
- isa_model/training/annotation/storage/dataset_manager.py +0 -131
- isa_model/training/annotation/storage/dataset_schema.py +0 -44
- isa_model/training/annotation/tests/test_annotation_flow.py +0 -109
- isa_model/training/annotation/tests/test_minio copy.py +0 -113
- isa_model/training/annotation/tests/test_minio_upload.py +0 -43
- isa_model/training/annotation/views/annotation_controller.py +0 -158
- isa_model/training/cloud/__init__.py +0 -22
- isa_model/training/cloud/job_orchestrator.py +0 -402
- isa_model/training/cloud/runpod_trainer.py +0 -454
- isa_model/training/cloud/storage_manager.py +0 -482
- isa_model/training/core/__init__.py +0 -26
- isa_model/training/core/config.py +0 -181
- isa_model/training/core/dataset.py +0 -222
- isa_model/training/core/trainer.py +0 -720
- isa_model/training/core/utils.py +0 -213
- isa_model/training/examples/intelligent_training_example.py +0 -281
- isa_model/training/factory.py +0 -424
- isa_model/training/intelligent/__init__.py +0 -25
- isa_model/training/intelligent/decision_engine.py +0 -643
- isa_model/training/intelligent/intelligent_factory.py +0 -888
- isa_model/training/intelligent/knowledge_base.py +0 -751
- isa_model/training/intelligent/resource_optimizer.py +0 -839
- isa_model/training/intelligent/task_classifier.py +0 -576
- isa_model/training/storage/__init__.py +0 -24
- isa_model/training/storage/core_integration.py +0 -439
- isa_model/training/storage/training_repository.py +0 -552
- isa_model/training/storage/training_storage.py +0 -628
- isa_model-0.4.0.dist-info/RECORD +0 -182
- /isa_model/deployment/{cloud/modal → modal/services/audio}/isa_audio_chatTTS_service.py +0 -0
- /isa_model/deployment/{cloud/modal → modal/services/audio}/isa_audio_fish_service.py +0 -0
- /isa_model/deployment/{cloud/modal → modal/services/audio}/isa_audio_openvoice_service.py +0 -0
- /isa_model/deployment/{cloud/modal → modal/services/audio}/isa_audio_service_v2.py +0 -0
- /isa_model/deployment/{cloud/modal → modal/services/embedding}/isa_embed_rerank_service.py +0 -0
- /isa_model/deployment/{cloud/modal → modal/services/video}/isa_video_hunyuan_service.py +0 -0
- /isa_model/deployment/{cloud/modal → modal/services/vision}/isa_vision_ocr_service.py +0 -0
- /isa_model/deployment/{cloud/modal → modal/services/vision}/isa_vision_qwen25_service.py +0 -0
- /isa_model/deployment/{cloud/modal → modal/services/vision}/isa_vision_table_service.py +0 -0
- /isa_model/deployment/{cloud/modal → modal/services/vision}/isa_vision_ui_service.py +0 -0
- /isa_model/deployment/{cloud/modal → modal/services/vision}/isa_vision_ui_service_optimized.py +0 -0
- /isa_model/deployment/{services → modal/services/vision}/simple_auto_deploy_vision_service.py +0 -0
- {isa_model-0.4.0.dist-info → isa_model-0.4.3.dist-info}/WHEEL +0 -0
- {isa_model-0.4.0.dist-info → isa_model-0.4.3.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,193 @@
|
|
1
|
+
isa_model/__init__.py,sha256=lYYKstKw33oavW6xS0-9cpsdYq-h0cfV_ZlGAwICRaU,868
|
2
|
+
isa_model/client.py,sha256=7OUA6yi0G94I8U4GOFcoXvCjWxk2ds1-3CDCivJXA6M,67628
|
3
|
+
isa_model/core/config.py,sha256=9OL8_EkBcnAH-RgyWUi3jblKo42m7K1JDeHa9C5CPL4,19519
|
4
|
+
isa_model/core/dependencies.py,sha256=2ZgGDjtYitBEVy8H3UppQSb_BId3--f2kQw-Lm4Umh8,10050
|
5
|
+
isa_model/core/pricing_manager.py,sha256=NWQLhNIzUDqS5_jBfVcJGrdOdRasFyifSNCliaIDvqU,17122
|
6
|
+
isa_model/core/types.py,sha256=jdO_q0FDuzvWURXZtxMV1Zj1XgARX9kopTviWFuq_FU,8713
|
7
|
+
isa_model/core/cache/redis_cache.py,sha256=J_A4OcVENsbc6RMOnXn6O-i320egM5hCQXTR7RlYz6s,14111
|
8
|
+
isa_model/core/config/__init__.py,sha256=SLeHQtYGDHl64NDVyb3ECQXOKepGM8YNHEoM8CVEWus,350
|
9
|
+
isa_model/core/config/config_manager.py,sha256=tF6EyNr_y6Jm0easV9aNHgBce4Ddamf0acaFlz7yToA,32289
|
10
|
+
isa_model/core/database/__init__.py,sha256=E2lp9te05QgdQfMeUq702t_23fv4Y7be_P2QU60Yqzs,18
|
11
|
+
isa_model/core/database/direct_db_client.py,sha256=jHp5AWyrXcYDRvrPvBRGH89FmPRAhgzL8MQsSt4zkTQ,4273
|
12
|
+
isa_model/core/database/migration_manager.py,sha256=GxSJtngsbK8oA8kAKkquLQby_DdXLTwU795NGGHMH8c,21748
|
13
|
+
isa_model/core/database/migrations.py,sha256=RpM1eqt7Chu736K0ij_42gKMYHg2HPR_qSt70xfJ_co,12626
|
14
|
+
isa_model/core/database/supabase_client.py,sha256=waY0VQLy9VM6FMIoViDe5yAgNW78qJaI3Jb2ohTNj98,11124
|
15
|
+
isa_model/core/discovery/__init__.py,sha256=U7YzSNqsyPYmT_TdMlLy9QMAotdfaFlDE-c9XgKk8-4,380
|
16
|
+
isa_model/core/discovery/consul_discovery.py,sha256=UziTVGVbejh0zUvayhISkiRJ5KeFcxUkw8t4gTK0UVw,6631
|
17
|
+
isa_model/core/logging/__init__.py,sha256=9PA5MJvn7y73aCUUqCgv5r2it2nNb38YXSG31oxNIDA,1213
|
18
|
+
isa_model/core/logging/influx_logger.py,sha256=TsPnWeKgV6pl2cz9NoqM0OV9z9jI1_uVbn6nTblfM94,19483
|
19
|
+
isa_model/core/logging/loki_logger.py,sha256=pFkKFK5ouKNa9dPehC_kV6af0urnAvzfvbYE5IFiwD0,4706
|
20
|
+
isa_model/core/models/__init__.py,sha256=bDzyE0KHIJxxoN3q08pvW_hHBeHux2aMdeKY4GlDzmU,1286
|
21
|
+
isa_model/core/models/config_models.py,sha256=Gy0oNndDsKVQ92Bz6aZp1JMqr4Z0jdoEJCCscIrpogI,22955
|
22
|
+
isa_model/core/models/deployment_billing_tracker.py,sha256=hW2e3vUkASvCycYhBdrZcsEtBjYo3TLTVpfwfVkaXnQ,16406
|
23
|
+
isa_model/core/models/model_billing_tracker.py,sha256=er35dsoKAGt8bjkQwO9f3MQ6U_NI6OIuhIn4PEOPEWU,17302
|
24
|
+
isa_model/core/models/model_manager.py,sha256=vjFYWxmhRGUDlrHkwySjN5d0hfCx-VqBKiMhgTXjs8Y,17093
|
25
|
+
isa_model/core/models/model_metadata.py,sha256=C6ubW12qmXAuqKmE_2BaO4HMB7WJqBkYDVXYCINsc-4,25148
|
26
|
+
isa_model/core/models/model_repo.py,sha256=1018Qi8fMfgXtU8DKfWvf-0hBE16Q7wJIzmEZa7x6bw,19868
|
27
|
+
isa_model/core/models/model_statistics_tracker.py,sha256=4KoKawwtEDAx8FV9ysmZS4nvRqZAgRSSIa-32f_Jhwk,10561
|
28
|
+
isa_model/core/models/model_storage.py,sha256=gpW7R_wDQh0WUo4CYkrQen9GMKn8Z8ys5iGQenaMmCM,4473
|
29
|
+
isa_model/core/models/model_version_manager.py,sha256=20BwNbCg1NlcmHmCxK_zMvpPmVFHg0B6ZCFnPLY6Yj8,37563
|
30
|
+
isa_model/core/models/system_models.py,sha256=I52nTi0UVft8tkJdb2LZrJ_Qxax-JE00_YKqnSa-P4E,32525
|
31
|
+
isa_model/core/repositories/__init__.py,sha256=RRERY7mWZxhSAZa4m6493l6sFl3CPlyL2bW6qJMEzD8,172
|
32
|
+
isa_model/core/repositories/config_repository.py,sha256=QlL22r_bGEV6mHfmztEIY5Zw3wIFoiR5IQJyIj36wXU,37428
|
33
|
+
isa_model/core/resilience/circuit_breaker.py,sha256=Ccoh3O31xVFJO2A0flnc9SI-sRqQ3sGKbwv3WbgJxBc,12435
|
34
|
+
isa_model/core/security/secrets.py,sha256=kzRjpSiGwY9z47NUlurK29uY_uMsA5lqk8_6Ywu8Zvw,13319
|
35
|
+
isa_model/core/services/__init__.py,sha256=TEE58Vk8JKIaQx8ELeAaWo-WPz0hjck9x-ZK7pbfiIE,422
|
36
|
+
isa_model/core/services/intelligent_model_selector.py,sha256=PPUWiMcV8DkCPMHhnIlsBgksUY8hKB4SjlFDW1zhLYY,29205
|
37
|
+
isa_model/core/storage/hf_storage.py,sha256=k271Rg5G7qUJAJ6VXQBTUHGU6y2NYBNWKVeBJm02DRo,14736
|
38
|
+
isa_model/core/storage/local_storage.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
39
|
+
isa_model/deployment/__init__.py,sha256=Y3IUEOriJYVZ-3ZEamMs4n6_X0OwtD2eguwBas5zgtg,345
|
40
|
+
isa_model/deployment/core/__init__.py,sha256=TRJ4tNjNFub_ObhZy61iZZpqC0CYsnv1HV1Qp_XWhVI,119
|
41
|
+
isa_model/deployment/core/deployment_manager.py,sha256=In2e5EuXwVJfg6ENjigOrgQyy19DCWX5uuwSa--Czzc,58337
|
42
|
+
isa_model/deployment/local/__init__.py,sha256=Ld1QbaDHIHnbW2IkSXVTZeDcxnmUXBa074uOSLRu5t0,904
|
43
|
+
isa_model/deployment/local/config.py,sha256=Kft5EORBcsO2HVizD0ct6VFIIs9sVBN-CjRnOrcm00g,9246
|
44
|
+
isa_model/deployment/local/gpu_gateway.py,sha256=be6d9eSWRufXvJH9GyklBWlXhOukITY1lnXTM6RPcQs,21954
|
45
|
+
isa_model/deployment/local/health_checker.py,sha256=_u2vwiwEGCbFA6laUu1JX6GfE6prrOSHiy6PclimGys,16392
|
46
|
+
isa_model/deployment/local/provider.py,sha256=6E1WfTUvI32aeEQs13TIyuy9xQmZqJeaJULfE2KLe4E,22763
|
47
|
+
isa_model/deployment/local/tensorrt_service.py,sha256=f05BkJMw2NhiMp18xW1RwRED4bIjZ0gmUS5OgEAGnk0,23026
|
48
|
+
isa_model/deployment/local/transformers_service.py,sha256=pdC3KppUzSVrWd-CKA8fXPC1uzy45S8FTtQj9odAWpM,23937
|
49
|
+
isa_model/deployment/local/vllm_service.py,sha256=zVuBopgzG6ulSvHnRE8h_dLQQpNqTDwHbXo88IKXrwk,18849
|
50
|
+
isa_model/deployment/modal/__init__.py,sha256=ZBcToGNtL6ztWY5pvqM1YMiL_F-S1xx9b-uZd8cuajc,380
|
51
|
+
isa_model/deployment/modal/config.py,sha256=8XhBMIbx6oDTf-P9ipQ58xmBYDbNZekZ4gixorBDIpw,4267
|
52
|
+
isa_model/deployment/modal/deployer.py,sha256=YNCtbO8FTVstz8OG6Kh8p3AM05dtbg5i73-JsuNy4KM,31961
|
53
|
+
isa_model/deployment/modal/services/__init__.py,sha256=m9D6jZ-RH3fohn5pNDLEfFQWj18LDlq565I2t8tTIAk,61
|
54
|
+
isa_model/deployment/modal/services/audio/__init__.py,sha256=YhBTWmI0k8onSU6K0MzW9NIS1d7uedPT_9U6G0Qmfio,41
|
55
|
+
isa_model/deployment/modal/services/audio/isa_audio_chatTTS_service.py,sha256=w3s4hj78HedQ0g2X8_PJbXPBGcMmwnZeV1LHS_Auy_o,18637
|
56
|
+
isa_model/deployment/modal/services/audio/isa_audio_fish_service.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
57
|
+
isa_model/deployment/modal/services/audio/isa_audio_openvoice_service.py,sha256=2OE_J5KTglEh7iVnTb-2_phghvQKkTD7OIsHZ9fKfZI,33515
|
58
|
+
isa_model/deployment/modal/services/audio/isa_audio_service_v2.py,sha256=TMF-TXKcRsNT6vq6U-45PSvKfdJ-SBbpAD-lCC9x_zI,40832
|
59
|
+
isa_model/deployment/modal/services/embedding/__init__.py,sha256=uwKfvNrVTO_AvRKlONGCVaXwmSIs-lDNfF2gef_Clb0,45
|
60
|
+
isa_model/deployment/modal/services/embedding/isa_embed_rerank_service.py,sha256=K6TdCWD-Ko-vmKi03tVfpjZpX2Folv5EW79yPqJzT_U,10182
|
61
|
+
isa_model/deployment/modal/services/llm/__init__.py,sha256=dDp1ekrBb6E1cMwpE4kMFswlIxfs5G0M_LuTLvCAtKk,39
|
62
|
+
isa_model/deployment/modal/services/llm/isa_llm_service.py,sha256=-GVcBMRiKezT0oGkqirmwuKb0Oo2GsMrL-AFEwp5Ik8,14108
|
63
|
+
isa_model/deployment/modal/services/video/__init__.py,sha256=IehlRWhB0X-IOY1-rGXugY-BzOLr1jhMPQTLAZZTj_c,41
|
64
|
+
isa_model/deployment/modal/services/video/isa_video_hunyuan_service.py,sha256=slxEZxFFnFkOBlxdOdNu7JRXJMWwl-gaOUHFQr5stZ0,15208
|
65
|
+
isa_model/deployment/modal/services/vision/__init__.py,sha256=JzkcYOTBIJ4vUUpZwwjYCpzyh1VtUOa-RSlymOzKdHU,42
|
66
|
+
isa_model/deployment/modal/services/vision/isa_vision_ocr_service.py,sha256=1Y6s6mLRFLPhpItFLS61mbqGggR1UNd1rNtAluIzQrk,18501
|
67
|
+
isa_model/deployment/modal/services/vision/isa_vision_qwen25_service.py,sha256=Ldy0VvO7CGiPGDmPXWQCbCLUZgCGg3aBqWOA66Lvt5I,26797
|
68
|
+
isa_model/deployment/modal/services/vision/isa_vision_table_service.py,sha256=UhQinNRda0b11g3dYFjDCNaR0850IgCl1WtRghrFNu0,24827
|
69
|
+
isa_model/deployment/modal/services/vision/isa_vision_ui_service.py,sha256=BwoMhJQiQe2vGFteTsbbeVsemJZszjce9ech39Z0cWQ,34296
|
70
|
+
isa_model/deployment/modal/services/vision/isa_vision_ui_service_optimized.py,sha256=xy3IOEDifi2aMvJFB2_u4qfdD3YdXfgWW3_0z_BLNuo,25665
|
71
|
+
isa_model/deployment/modal/services/vision/simple_auto_deploy_vision_service.py,sha256=rfXsv9mh_w5cXHVYxA4fBD5ppyNY4HplsH34xp4WpY8,9882
|
72
|
+
isa_model/deployment/models/org-org-acme-corp-tenant-a-service-llm-20250825-225822/tenant-a-service_modal_service.py,sha256=3MHnvm6TFDycULcFaEdX95Rl0jY2VWYHDGzjDSJNCm0,1388
|
73
|
+
isa_model/deployment/models/org-test-org-123-prefix-test-service-llm-20250825-225822/prefix-test-service_modal_service.py,sha256=IfmIal7OfSdbnGeyGy-zMee2WAf5zteXJ0zAZiCZxSQ,1400
|
74
|
+
isa_model/deployment/models/test-llm-service-llm-20250825-204442/test-llm-service_modal_service.py,sha256=nQEhzFDU2dsAQkHApyeQpZHu0VLWtG3Ilhuv498wRLA,1388
|
75
|
+
isa_model/deployment/models/test-monitoring-gpt2-llm-20250825-212906/test-monitoring-gpt2_modal_service.py,sha256=yx3xiXyzskiVjwdr54favQ48UYO3hXzt2bQM9iR_irM,1404
|
76
|
+
isa_model/deployment/models/test-monitoring-gpt2-llm-20250825-213009/test-monitoring-gpt2_modal_service.py,sha256=fjPAVAtdcxOnEA252E4KPjTNkJq-_4UlG9Qj7eEF6tw,1404
|
77
|
+
isa_model/deployment/storage/__init__.py,sha256=TPxs4prCYb2AJ9NqNk9nh3kc6EFDkAkDmrqxq1ERWzo,138
|
78
|
+
isa_model/deployment/storage/deployment_repository.py,sha256=F6eFQjDXL4AAhw6F1hDMiU1rlnCL1zhZ_cMssVVEtHY,33168
|
79
|
+
isa_model/deployment/triton/__init__.py,sha256=TrsBOxzGeESj_GrVWUd3jsScFUQ5Cd266NXvwRbuAJ8,330
|
80
|
+
isa_model/deployment/triton/config.py,sha256=H7VemDvdLpREfSvD0DK86CXFg46l-eHnGf_vo8lOrH4,7319
|
81
|
+
isa_model/deployment/triton/provider.py,sha256=ZtpgF2kaHGtPO2EuXQEHMtwobMW025sliogkBtqjbe0,17391
|
82
|
+
isa_model/deployment/triton/configs/__init__.py,sha256=bxiam68sNO8eFutXKK9yaaKS-5MulIuhgusN_mdwz2A,50
|
83
|
+
isa_model/deployment/triton/scripts/__init__.py,sha256=fE3HxpLG9_wvXWIN27Tj-B9IEvtp_aZ0bUS3GzfRHqY,31
|
84
|
+
isa_model/deployment/triton/templates/__init__.py,sha256=rWtBQzM6kpC2yXcc3yCjrnWSj7W3jxbfXXZe2Rr1GL8,33
|
85
|
+
isa_model/inference/__init__.py,sha256=gmrJV1cdSyDpBZdfm8r0kPZsbnqogth0rXf_wT5fIFY,1381
|
86
|
+
isa_model/inference/ai_factory.py,sha256=ztTllV58ovfuO208aYQA_yT7iq88F7wdiPZr1TvW1_I,28716
|
87
|
+
isa_model/inference/base.py,sha256=qwOddnSGI0GUdD6qIdGBPQpkW7UjU3Y-zaZvu70B4WA,1278
|
88
|
+
isa_model/inference/legacy_services/__init__.py,sha256=H0RTKKE3UOqmpjc3S3GfysbhZZl6CnZd9_I38KTXg1E,563
|
89
|
+
isa_model/inference/legacy_services/model_evaluation.py,sha256=GbhcNM8uO0AnI59yuCYaTqDEdmh0pS4YxJVgJGL8Vjs,27964
|
90
|
+
isa_model/inference/legacy_services/model_service.py,sha256=9xEaLJWW1B2lLFL5-eP_uPFpfg8iYnqrAphpJnMTn2A,24751
|
91
|
+
isa_model/inference/legacy_services/model_serving.py,sha256=XQ7j6B02MsjUWKeeSVovKg0kljwMCLbRLeCuckmjtM8,28255
|
92
|
+
isa_model/inference/legacy_services/model_training.py,sha256=g0rfzSKGXzoJbNgEh8wDJlfdhHxh6-jXN0fv_B3-1C8,24036
|
93
|
+
isa_model/inference/models/__init__.py,sha256=FJ6goyHRf4RCnqbq75qAigL0FMKXulDOns-ebHWCgP8,579
|
94
|
+
isa_model/inference/models/inference_config.py,sha256=1ITGWOtQ3cVyucQq9Rih-Ab5uqaAGf31UfCYyJMY_DI,19529
|
95
|
+
isa_model/inference/models/inference_record.py,sha256=oHidCa9-lHSonOSoc24tAbrsSPBgm3bb8-cqzWOqzcU,24840
|
96
|
+
isa_model/inference/models/performance_models.py,sha256=KZWC8fEslhTohL2y-nz8S39P9RZ9SgJ6piQ9pMXh04E,26434
|
97
|
+
isa_model/inference/repositories/__init__.py,sha256=SYTQX1E5L6zTuo_p_KnDjYefoCKw4p1m4pW_FDb_sgM,191
|
98
|
+
isa_model/inference/repositories/inference_repository.py,sha256=QnfSzkcLQ5CPcABTmSYBRAv_5SVk0ayjVW6B1Q0SKaQ,31718
|
99
|
+
isa_model/inference/services/__init__.py,sha256=yfLz0YGl8ixk6LfTRL6cRTvZMb9F_Pv1QRgGyNc9xYM,386
|
100
|
+
isa_model/inference/services/base_service.py,sha256=NJIvq7YpGw55ah-axDR2hcu40B2gm6L_WYXyfX0rSaE,5816
|
101
|
+
isa_model/inference/services/custom_model_manager.py,sha256=HUHSDOWArJYMfdvaI-gfCJkVRVFdftScOw7BgS-h3zo,10829
|
102
|
+
isa_model/inference/services/audio/__init__.py,sha256=Hgtk3j5H4U3YxNlfG8UaU2eUNOWgrpSA8LN_tKEFWMk,616
|
103
|
+
isa_model/inference/services/audio/base_realtime_service.py,sha256=hSP89_hnzLBnmBvFOQlU_tW8UT2QKWKVR9Z7fwsVPa8,8125
|
104
|
+
isa_model/inference/services/audio/base_stt_service.py,sha256=qahYTLpf8gruvhEtS5bWDXPiYbgxXF3nYnqTq3Ckc0E,13361
|
105
|
+
isa_model/inference/services/audio/base_tts_service.py,sha256=PgctcV98Pe9I2kSjScsm8epRwdaEU-vAGCIfdd2P8us,6924
|
106
|
+
isa_model/inference/services/audio/isa_tts_service.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
107
|
+
isa_model/inference/services/audio/openai_realtime_service.py,sha256=vo4ow8CULZJXz4nSepMTq7_uKufWvRmcoezhmX2Q16s,22101
|
108
|
+
isa_model/inference/services/audio/openai_stt_service.py,sha256=2UBBrRP8PXYTlDz9gEh4_mOqTSIdCewG8Ptu9aT2nCo,13476
|
109
|
+
isa_model/inference/services/audio/openai_tts_service.py,sha256=C4vIRvCKoySs4-zBEteI_DZYZsATS84W_ZUwbxjJjpA,8253
|
110
|
+
isa_model/inference/services/audio/replicate_tts_service.py,sha256=kCG_bBNgW7GQwt5-ZdwPSqsMiTV54-FhSowFwNWGvg0,10292
|
111
|
+
isa_model/inference/services/embedding/__init__.py,sha256=xeWeq3jighDBCUzgveTiH11VLkkhk-J6z5cq9sf1mEk,311
|
112
|
+
isa_model/inference/services/embedding/base_embed_service.py,sha256=V57nDU_VzWXjw3dqyaTXBr3ntxT2VI1my6uSAX-vvxY,10382
|
113
|
+
isa_model/inference/services/embedding/isa_embed_service.py,sha256=AQ3yuGas64SRW8jlB0rvXhq2cyD5NMlCekodPCuN8dw,11242
|
114
|
+
isa_model/inference/services/embedding/ollama_embed_service.py,sha256=7OZMMrDO4eePNJXSjNB5E4j0rtX0HVq3RgNSzzqU1nA,7291
|
115
|
+
isa_model/inference/services/embedding/openai_embed_service.py,sha256=CaoNtepIreMA0wo5i0lUgJHUjVp4vdHQ59eT_gan3s4,8321
|
116
|
+
isa_model/inference/services/embedding/resilient_embed_service.py,sha256=26HPaWmVKQ_fPClTNew1VZeAX5vy4ncKxp1HR04yAYw,11735
|
117
|
+
isa_model/inference/services/embedding/helpers/text_splitter.py,sha256=6AbvcQ7H6MS54B9d9T1XBGg4GhvmKfZqp00lKp9pF-U,1635
|
118
|
+
isa_model/inference/services/embedding/tests/test_embedding.py,sha256=_Syrgt2sYCS8oNCqaJMuzck_FRqeSx0Nnk9B1j3OJBk,10200
|
119
|
+
isa_model/inference/services/img/__init__.py,sha256=moVvATbOEEqzKRtu2A9E7eBzlrkbr5oY1bGyH_3PebY,577
|
120
|
+
isa_model/inference/services/img/base_image_gen_service.py,sha256=3BYoUo9ASw02ZPl2T9Pwvu4uVD-GOZIFEaGsrnuKCrM,8101
|
121
|
+
isa_model/inference/services/img/replicate_image_gen_service.py,sha256=mspkdRh_snMUPQxRccpfzcAXO3TAxZsNzei0c45G7YA,6447
|
122
|
+
isa_model/inference/services/img/services/replicate_face_swap.py,sha256=Q6SiWJN9eNvD1nv4kWXnvvPnm9A1DLb7Gsb_vwUfUJw,7385
|
123
|
+
isa_model/inference/services/img/services/replicate_flux.py,sha256=BUIkuBUMZCH5ChvbIhmJ_1pJVNo0CjY7q7hgvsaFJO0,8008
|
124
|
+
isa_model/inference/services/img/services/replicate_flux_kontext.py,sha256=3DEwruobN7JL6-3LNMOJVOtorjlqV7ykD2ul4NTQ9Fs,7786
|
125
|
+
isa_model/inference/services/img/services/replicate_sticker_maker.py,sha256=9D_IISOpFfC2MWlP1BFohuIwHcI1H_b95zQU52Nl-Mw,8528
|
126
|
+
isa_model/inference/services/img/tests/test_img_client.py,sha256=r6lYybP_ty3A55LoaE_GRuXvXSh35KXOVbODGskLza0,11313
|
127
|
+
isa_model/inference/services/llm/__init__.py,sha256=aiNdB692nopCy8QaopVDB0e-J7mg22LrRzk4ytlu2iQ,769
|
128
|
+
isa_model/inference/services/llm/base_llm_service.py,sha256=CUw24rMytVXAUcberTzogKRLwYZiYifyg-kcNfHiHkg,36402
|
129
|
+
isa_model/inference/services/llm/cerebras_llm_service.py,sha256=8BU9I7HHO481nn7ddsiP4nl2ItYTCQJzJyaIArKA0ZA,25126
|
130
|
+
isa_model/inference/services/llm/huggingface_llm_service.py,sha256=mWnOGh3OsRyaL002Ax71Mb7oXp254VDDdP0QiQ-p9Yk,22733
|
131
|
+
isa_model/inference/services/llm/local_llm_service.py,sha256=_ILRD-oKcolf972aXe3zPS_tBu8SD-xH_Iw29alpkHM,27606
|
132
|
+
isa_model/inference/services/llm/ollama_llm_service.py,sha256=78VNSspzlQrXDqAxUR52jLGIKnBw4e_4LT2unAFMiTk,17967
|
133
|
+
isa_model/inference/services/llm/openai_llm_service.py,sha256=BpYugS2Vsrc-SS69cnW2VqFv4JXMbgglXvvbNgUZNZY,43874
|
134
|
+
isa_model/inference/services/llm/yyds_llm_service.py,sha256=ZHl2ukcDVkwYahF4OV5etTvJKa9Ni6O1TkJp75pQWaA,12495
|
135
|
+
isa_model/inference/services/llm/helpers/llm_adapter.py,sha256=7PrpiKvZaMfI9U6wabfTegwDMKcGDsv6I_lqnzcsE-o,24392
|
136
|
+
isa_model/inference/services/llm/helpers/llm_prompts.py,sha256=qGcG5hiBcnuq3IrcpNvshpWR1TqUrkdtf2Il__xeBoo,21661
|
137
|
+
isa_model/inference/services/llm/helpers/llm_utils.py,sha256=x2-wbij95bWgrZpJyXah9uMhEHKYAznChYRldT0VVno,22776
|
138
|
+
isa_model/inference/services/ml/base_ml_service.py,sha256=mLBA6ENowa3KVzNqHyhWxf_Pr-cJJj84lDE4TniPzYI,2894
|
139
|
+
isa_model/inference/services/ml/sklearn_ml_service.py,sha256=Lf9JrwvI25lca7JBbjB_e66eAUtXFbwxZ3Hs13dVGkA,5512
|
140
|
+
isa_model/inference/services/vision/__init__.py,sha256=1GO2NoC7p8IJ92mI6fGcPaN4MeFzLhdNdNlAnFYpzpE,1839
|
141
|
+
isa_model/inference/services/vision/base_vision_service.py,sha256=mjrfcUT01HBi0k1qeIL3CkpkvQIuL_jar-N03W8sMV8,10531
|
142
|
+
isa_model/inference/services/vision/blip_vision_service.py,sha256=tmGCznQ9qBsidLV_mnKEtvpSUPvBUbwJdwviKYXrrkA,12020
|
143
|
+
isa_model/inference/services/vision/isa_vision_service.py,sha256=OPuIZmG_lYOgajGfrQj4uLzVk5Y4H0PkeSNViIiL1O0,22960
|
144
|
+
isa_model/inference/services/vision/openai_vision_service.py,sha256=LeD910WWyJd6QiJncSM3x_Whj-a32Vr1_2FG4gfjtc4,10179
|
145
|
+
isa_model/inference/services/vision/replicate_vision_service.py,sha256=smRkSCTwk5mvyKVnvyplqPNuVYjRZngVBWxTCbFmrxA,20679
|
146
|
+
isa_model/inference/services/vision/vgg16_vision_service.py,sha256=5w9r3vXQz5EAdXFPawtsuWzaNlhFA2N0xcJxSJcudQI,9382
|
147
|
+
isa_model/inference/services/vision/disabled/isA_vision_service.py,sha256=VYa8VJtxDB9KdnfNW0GPEP_TPker4pHp33gLD_TnpaM,18336
|
148
|
+
isa_model/inference/services/vision/helpers/image_utils.py,sha256=BYEFnOu2MBRJrJmKdALfFy5CTKpI4Co9PTVCxJvdFqc,11388
|
149
|
+
isa_model/inference/services/vision/helpers/vision_prompts.py,sha256=WbzOYu-Z2-8Xn9dcvuPRTA7VTy23_uoMRRGO4t0wZ8Q,12098
|
150
|
+
isa_model/inference/services/vision/tests/test_ocr_client.py,sha256=IY2KbHuIf1FmKFrUO9HrmKtgyT9achylwKykOIFLR8E,11250
|
151
|
+
isa_model/inference/utils/conversion/bge_rerank_convert.py,sha256=1dvtxe5-PPCe2Au6SO8F2XaD-xdIoeA4zDTcid2L9FU,2691
|
152
|
+
isa_model/inference/utils/conversion/onnx_converter.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
153
|
+
isa_model/inference/utils/conversion/torch_converter.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
154
|
+
isa_model/serving/__init__.py,sha256=LTO0Adbvm7A-bgQqtuOQSoHvdu9OH3OrEjYgQanuHgI,429
|
155
|
+
isa_model/serving/modal_proxy_server.py,sha256=U8AJMF4ewtTGHjmbLb6ezR3NCT6d0APBtFuKVxVkRu4,7481
|
156
|
+
isa_model/serving/api/__init__.py,sha256=wgWD69eqV37fFTLxhz8b0rOn_34P7SZHoWw2sufWjk4,162
|
157
|
+
isa_model/serving/api/cache_manager.py,sha256=pBA9-4OUrHlyqYGtNbJSfZrDYPfEs2DV1Cx1qI0pM6U,8120
|
158
|
+
isa_model/serving/api/error_handlers.py,sha256=UigiYCu7O59Mp-GkB3grzMgAz5rl9ClgK81DyEzX6ZU,10869
|
159
|
+
isa_model/serving/api/fastapi_server.py,sha256=huSO9ZMuFirr5LhQO__mfM0tq1qKik11Wid9Qzk6T-Y,12367
|
160
|
+
isa_model/serving/api/startup.py,sha256=iZqRnkgBFvuMYkueeVxKheZZ13Hy5xoaeenX2udrPW8,12944
|
161
|
+
isa_model/serving/api/dependencies/__init__.py,sha256=e9ho7V27vnK73k2R7uRqVFzJ9hOHOsEydSz5mOAQbQU,21
|
162
|
+
isa_model/serving/api/dependencies/auth.py,sha256=6G46JMx35O_9PWhc66ptA67BTH3fb9ASgdOW-dKfK0g,6149
|
163
|
+
isa_model/serving/api/dependencies/database.py,sha256=fh8nrLNuD2ATVHnaMOxGD8ueR2g1f54rgflM1ISqcWg,4145
|
164
|
+
isa_model/serving/api/middleware/__init__.py,sha256=iCKUYECf0bjNGXgV91K03hb8Dnp0Jc_wnUL897Rd0sg,163
|
165
|
+
isa_model/serving/api/middleware/auth.py,sha256=WRoRrsDTlf4FDa7E8PnNhY5qfhUn76Be3ZMOPdi3FMk,11313
|
166
|
+
isa_model/serving/api/middleware/request_logger.py,sha256=d48n6tp1pqZ7HFWFl8jg6er24ugWkWkMOc1y80aqPU8,2938
|
167
|
+
isa_model/serving/api/middleware/security.py,sha256=YBzP3BZtEM1-PLSvVEuyTIxwGJqtw0IioRFWPUSu8FA,10233
|
168
|
+
isa_model/serving/api/middleware/tenant_context.py,sha256=uf2Lla2aov-vIOXCIukBURn90GJ3G484tbqj993NktI,16008
|
169
|
+
isa_model/serving/api/routes/__init__.py,sha256=RIaG9OPg0AjAIVbtMzwnqGyNU-tuQXbdvFcYOt4b_Do,84
|
170
|
+
isa_model/serving/api/routes/analytics.py,sha256=qUVo1SV-wNJZPyvVx_w80gL6JYuiL3DbyEahEZGJIVU,17655
|
171
|
+
isa_model/serving/api/routes/config.py,sha256=ER2PiKExoctMLkxs9ZxihGB0rEiczgU2Zt-FxpzMPsY,22453
|
172
|
+
isa_model/serving/api/routes/deployment_billing.py,sha256=-9Ut6FIexwXbL_wT6oG-6yRxJLvCTXMBThiJn3av1B0,12350
|
173
|
+
isa_model/serving/api/routes/deployments.py,sha256=DQHCePARLy1xht7nmeGlfomdUhWDqBzeAwReSI0arCc,17536
|
174
|
+
isa_model/serving/api/routes/gpu_gateway.py,sha256=cTZmOApxinMqSQe1W9nglx3UduUnC-wRJ311bEajSV0,15512
|
175
|
+
isa_model/serving/api/routes/health.py,sha256=dqmTLF_x0AvEdXHPYd9mf4mqUN-Z5oziNW_7yMI735s,2974
|
176
|
+
isa_model/serving/api/routes/inference_monitoring.py,sha256=yXLQApZv5kDeTQjQAduKyv30k36AGl7tQ9ifG-wn21I,18799
|
177
|
+
isa_model/serving/api/routes/llm.py,sha256=5ZVxWugff0i6VBKz_Nv5CqacMZJsPZEKyoSB6XDrW34,385
|
178
|
+
isa_model/serving/api/routes/local_deployments.py,sha256=LbAiRcozooyhckg4joQXCWjpX2Au8Wt-MMq-GLZHul0,16289
|
179
|
+
isa_model/serving/api/routes/logs.py,sha256=9t8cft3fprpQHHCj8UNxvcHmvOiELxbu7WXoWe8JLPw,14238
|
180
|
+
isa_model/serving/api/routes/settings.py,sha256=Xj_uXnRxmHpgSxUfztvrwE2yjWtlWQElE2CizmeeVds,20905
|
181
|
+
isa_model/serving/api/routes/tenants.py,sha256=7gU7xpEjYuDeCunWWeY6BIwuEw-t3_ctqYDU5Sv3GBI,21000
|
182
|
+
isa_model/serving/api/routes/ui_analysis.py,sha256=-WxLaRKQNHnRh4okB85cWA4blTegpEPZtzHTsF3yeeU,6848
|
183
|
+
isa_model/serving/api/routes/unified.py,sha256=rKCHKU4accmnE2jdZw-ZVSN8hDh8vB0a1Q-8U35faQM,44294
|
184
|
+
isa_model/serving/api/routes/vision.py,sha256=U9jxssQYe6igtayUW0C2fcYwqmLRIE15__X-5Ru9J4c,396
|
185
|
+
isa_model/serving/api/routes/webhooks.py,sha256=kspHgX8PIZ5L6S0klnV0XtGoE9BiTfYTMMx7Qsuf6CY,16219
|
186
|
+
isa_model/serving/api/schemas/__init__.py,sha256=Tu_hzxoKW1ZHpww3-5ER4A2hNuDByZ0rAfrgaJ7Bs-M,275
|
187
|
+
isa_model/serving/api/schemas/common.py,sha256=HVaAS7wlvqrwC1gMZ2Cvo0vzHB053x2uOTAwUoY2vsE,696
|
188
|
+
isa_model/serving/api/schemas/ui_analysis.py,sha256=IpOcIvmUeXN1UtZsbGozMfV1vvz7AVF2PVXjjxYl_0k,4089
|
189
|
+
isa_model/utils/gpu_utils.py,sha256=HbMvJzSsOCcjOJluUrszAJ58dC8LPnyA_nQn9s_1I6c,11730
|
190
|
+
isa_model-0.4.3.dist-info/METADATA,sha256=2xHn4pAvo10QKBDQK34v0-acBWGBUZRR6ZUZa-xq7BU,15090
|
191
|
+
isa_model-0.4.3.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
|
192
|
+
isa_model-0.4.3.dist-info/top_level.txt,sha256=eHSy_Xb3kNkh2kK11mi1mZh0Wz91AQ5b8k2KFYO-rE8,10
|
193
|
+
isa_model-0.4.3.dist-info/RECORD,,
|
File without changes
|
@@ -1,10 +0,0 @@
|
|
1
|
-
"""
|
2
|
-
Modal Deployment Module
|
3
|
-
|
4
|
-
Modal.com cloud deployment for ISA Model services
|
5
|
-
"""
|
6
|
-
|
7
|
-
from .ui_analysis_service import UIAnalysisService as UIAnalysisModalService
|
8
|
-
from .deployment_manager import ModalDeployment
|
9
|
-
|
10
|
-
__all__ = ["UIAnalysisModalService", "ModalDeployment"]
|
@@ -1,356 +0,0 @@
|
|
1
|
-
"""
|
2
|
-
Deployment Configuration Classes
|
3
|
-
|
4
|
-
Defines configuration classes for different deployment scenarios including
|
5
|
-
RunPod serverless, Triton inference server, and TensorRT-LLM backend.
|
6
|
-
"""
|
7
|
-
|
8
|
-
from dataclasses import dataclass, field
|
9
|
-
from typing import Optional, Dict, Any, List
|
10
|
-
from enum import Enum
|
11
|
-
from pathlib import Path
|
12
|
-
|
13
|
-
|
14
|
-
class DeploymentProvider(str, Enum):
|
15
|
-
"""Deployment providers"""
|
16
|
-
RUNPOD_SERVERLESS = "runpod_serverless"
|
17
|
-
RUNPOD_PODS = "runpod_pods"
|
18
|
-
AWS_LAMBDA = "aws_lambda"
|
19
|
-
GOOGLE_CLOUD_RUN = "google_cloud_run"
|
20
|
-
AZURE_CONTAINER_INSTANCES = "azure_container_instances"
|
21
|
-
LOCAL = "local"
|
22
|
-
|
23
|
-
|
24
|
-
class InferenceEngine(str, Enum):
|
25
|
-
"""Inference engines"""
|
26
|
-
TRITON = "triton"
|
27
|
-
VLLM = "vllm"
|
28
|
-
TENSORRT_LLM = "tensorrt_llm"
|
29
|
-
HUGGINGFACE = "huggingface"
|
30
|
-
ONNX = "onnx"
|
31
|
-
TORCHSCRIPT = "torchscript"
|
32
|
-
|
33
|
-
|
34
|
-
class ModelFormat(str, Enum):
|
35
|
-
"""Model formats for deployment"""
|
36
|
-
HUGGINGFACE = "huggingface"
|
37
|
-
TENSORRT = "tensorrt"
|
38
|
-
ONNX = "onnx"
|
39
|
-
TORCHSCRIPT = "torchscript"
|
40
|
-
SAFETENSORS = "safetensors"
|
41
|
-
|
42
|
-
|
43
|
-
@dataclass
|
44
|
-
class TritonConfig:
|
45
|
-
"""Configuration for Triton Inference Server"""
|
46
|
-
|
47
|
-
# Model repository configuration
|
48
|
-
model_repository: str = "/models"
|
49
|
-
model_name: str = "model"
|
50
|
-
model_version: str = "1"
|
51
|
-
|
52
|
-
# Backend configuration
|
53
|
-
backend: str = "tensorrtllm" # tensorrtllm, python, onnxruntime
|
54
|
-
max_batch_size: int = 8
|
55
|
-
max_sequence_length: int = 2048
|
56
|
-
|
57
|
-
# TensorRT-LLM specific
|
58
|
-
tensorrt_llm_model_dir: str = "/models/tensorrt_llm"
|
59
|
-
engine_dir: str = "/models/engines"
|
60
|
-
tokenizer_dir: str = "/models/tokenizer"
|
61
|
-
|
62
|
-
# Performance settings
|
63
|
-
instance_group_count: int = 1
|
64
|
-
instance_group_kind: str = "KIND_GPU" # KIND_GPU, KIND_CPU
|
65
|
-
|
66
|
-
# Memory settings
|
67
|
-
optimization_level: str = "OPTIMIZATION_LEVEL_ENABLED"
|
68
|
-
enable_pinned_input: bool = True
|
69
|
-
enable_pinned_output: bool = True
|
70
|
-
|
71
|
-
def to_dict(self) -> Dict[str, Any]:
|
72
|
-
"""Convert to dictionary"""
|
73
|
-
return self.__dict__.copy()
|
74
|
-
|
75
|
-
|
76
|
-
@dataclass
|
77
|
-
class RunPodServerlessConfig:
|
78
|
-
"""Configuration for RunPod Serverless deployment"""
|
79
|
-
|
80
|
-
# RunPod settings
|
81
|
-
api_key: str
|
82
|
-
endpoint_id: Optional[str] = None
|
83
|
-
template_id: Optional[str] = None
|
84
|
-
|
85
|
-
# Container configuration
|
86
|
-
container_image: str = "runpod/pytorch:2.1.0-py3.10-cuda11.8.0-devel-ubuntu22.04"
|
87
|
-
container_disk_in_gb: int = 20
|
88
|
-
|
89
|
-
# GPU configuration
|
90
|
-
gpu_type: str = "NVIDIA RTX A6000"
|
91
|
-
gpu_count: int = 1
|
92
|
-
|
93
|
-
# Scaling configuration
|
94
|
-
min_workers: int = 0
|
95
|
-
max_workers: int = 3
|
96
|
-
idle_timeout: int = 5 # seconds
|
97
|
-
|
98
|
-
# Network configuration
|
99
|
-
network_volume_id: Optional[str] = None
|
100
|
-
|
101
|
-
# Environment variables
|
102
|
-
env_vars: Dict[str, str] = field(default_factory=dict)
|
103
|
-
|
104
|
-
def to_dict(self) -> Dict[str, Any]:
|
105
|
-
"""Convert to dictionary"""
|
106
|
-
return self.__dict__.copy()
|
107
|
-
|
108
|
-
|
109
|
-
@dataclass
|
110
|
-
class ModelConfig:
|
111
|
-
"""Configuration for model deployment"""
|
112
|
-
|
113
|
-
# Model identification
|
114
|
-
model_id: str
|
115
|
-
model_name: str
|
116
|
-
model_version: str = "1.0.0"
|
117
|
-
|
118
|
-
# Model source
|
119
|
-
source_type: str = "huggingface" # huggingface, local, s3, gcs
|
120
|
-
source_path: str = ""
|
121
|
-
|
122
|
-
# Model format and engine
|
123
|
-
model_format: ModelFormat = ModelFormat.HUGGINGFACE
|
124
|
-
inference_engine: InferenceEngine = InferenceEngine.TRITON
|
125
|
-
|
126
|
-
# Model metadata
|
127
|
-
model_type: str = "llm" # llm, embedding, vision, audio
|
128
|
-
capabilities: List[str] = field(default_factory=lambda: ["text_generation"])
|
129
|
-
|
130
|
-
# Performance configuration
|
131
|
-
max_batch_size: int = 8
|
132
|
-
max_sequence_length: int = 2048
|
133
|
-
dtype: str = "float16" # float32, float16, int8, int4
|
134
|
-
|
135
|
-
# Optimization settings
|
136
|
-
use_tensorrt: bool = True
|
137
|
-
use_quantization: bool = False
|
138
|
-
quantization_method: str = "int8" # int8, int4, awq, gptq
|
139
|
-
|
140
|
-
def to_dict(self) -> Dict[str, Any]:
|
141
|
-
"""Convert to dictionary"""
|
142
|
-
return self.__dict__.copy()
|
143
|
-
|
144
|
-
|
145
|
-
@dataclass
|
146
|
-
class DeploymentConfig:
|
147
|
-
"""Main deployment configuration"""
|
148
|
-
|
149
|
-
# Deployment identification
|
150
|
-
deployment_id: str
|
151
|
-
deployment_name: str
|
152
|
-
description: Optional[str] = None
|
153
|
-
|
154
|
-
# Provider and engine configuration
|
155
|
-
provider: DeploymentProvider = DeploymentProvider.RUNPOD_SERVERLESS
|
156
|
-
inference_engine: InferenceEngine = InferenceEngine.TRITON
|
157
|
-
|
158
|
-
# Model configuration
|
159
|
-
model_config: ModelConfig = None
|
160
|
-
|
161
|
-
# Provider-specific configurations
|
162
|
-
runpod_config: Optional[RunPodServerlessConfig] = None
|
163
|
-
triton_config: Optional[TritonConfig] = None
|
164
|
-
|
165
|
-
# Health check configuration
|
166
|
-
health_check_path: str = "/health"
|
167
|
-
health_check_timeout: int = 30
|
168
|
-
|
169
|
-
# Monitoring configuration
|
170
|
-
enable_logging: bool = True
|
171
|
-
log_level: str = "INFO"
|
172
|
-
enable_metrics: bool = True
|
173
|
-
|
174
|
-
# Networking
|
175
|
-
custom_domain: Optional[str] = None
|
176
|
-
allowed_origins: List[str] = field(default_factory=lambda: ["*"])
|
177
|
-
|
178
|
-
# Additional settings
|
179
|
-
extra_config: Dict[str, Any] = field(default_factory=dict)
|
180
|
-
|
181
|
-
def __post_init__(self):
|
182
|
-
"""Validate configuration after initialization"""
|
183
|
-
if not self.deployment_id:
|
184
|
-
raise ValueError("deployment_id is required")
|
185
|
-
|
186
|
-
if not self.deployment_name:
|
187
|
-
raise ValueError("deployment_name is required")
|
188
|
-
|
189
|
-
if not self.model_config:
|
190
|
-
raise ValueError("model_config is required")
|
191
|
-
|
192
|
-
# Set default provider configs if not provided
|
193
|
-
if self.provider == DeploymentProvider.RUNPOD_SERVERLESS and not self.runpod_config:
|
194
|
-
self.runpod_config = RunPodServerlessConfig(api_key="")
|
195
|
-
|
196
|
-
if self.inference_engine == InferenceEngine.TRITON and not self.triton_config:
|
197
|
-
self.triton_config = TritonConfig()
|
198
|
-
|
199
|
-
def to_dict(self) -> Dict[str, Any]:
|
200
|
-
"""Convert config to dictionary"""
|
201
|
-
config_dict = {}
|
202
|
-
|
203
|
-
for key, value in self.__dict__.items():
|
204
|
-
if key in ['model_config', 'runpod_config', 'triton_config']:
|
205
|
-
if value is not None:
|
206
|
-
config_dict[key] = value.to_dict()
|
207
|
-
else:
|
208
|
-
config_dict[key] = None
|
209
|
-
elif isinstance(value, Enum):
|
210
|
-
config_dict[key] = value.value
|
211
|
-
else:
|
212
|
-
config_dict[key] = value
|
213
|
-
|
214
|
-
return config_dict
|
215
|
-
|
216
|
-
@classmethod
|
217
|
-
def from_dict(cls, config_dict: Dict[str, Any]) -> 'DeploymentConfig':
|
218
|
-
"""Create config from dictionary"""
|
219
|
-
# Handle nested configs
|
220
|
-
if 'model_config' in config_dict and config_dict['model_config'] is not None:
|
221
|
-
config_dict['model_config'] = ModelConfig(**config_dict['model_config'])
|
222
|
-
|
223
|
-
if 'runpod_config' in config_dict and config_dict['runpod_config'] is not None:
|
224
|
-
config_dict['runpod_config'] = RunPodServerlessConfig(**config_dict['runpod_config'])
|
225
|
-
|
226
|
-
if 'triton_config' in config_dict and config_dict['triton_config'] is not None:
|
227
|
-
config_dict['triton_config'] = TritonConfig(**config_dict['triton_config'])
|
228
|
-
|
229
|
-
# Handle enums
|
230
|
-
if 'provider' in config_dict:
|
231
|
-
config_dict['provider'] = DeploymentProvider(config_dict['provider'])
|
232
|
-
|
233
|
-
if 'inference_engine' in config_dict:
|
234
|
-
config_dict['inference_engine'] = InferenceEngine(config_dict['inference_engine'])
|
235
|
-
|
236
|
-
return cls(**config_dict)
|
237
|
-
|
238
|
-
|
239
|
-
# Predefined configurations for common deployment scenarios
|
240
|
-
|
241
|
-
def create_gemma_runpod_triton_config(
|
242
|
-
model_id: str,
|
243
|
-
runpod_api_key: str,
|
244
|
-
model_source_path: str = "xenobordom/gemma-4b-alpaca-v1"
|
245
|
-
) -> DeploymentConfig:
|
246
|
-
"""
|
247
|
-
Create a deployment configuration for Gemma model on RunPod with Triton + TensorRT-LLM.
|
248
|
-
|
249
|
-
Args:
|
250
|
-
model_id: Unique identifier for the deployment
|
251
|
-
runpod_api_key: RunPod API key
|
252
|
-
model_source_path: HuggingFace model path or local path
|
253
|
-
|
254
|
-
Returns:
|
255
|
-
DeploymentConfig for Gemma deployment
|
256
|
-
"""
|
257
|
-
model_config = ModelConfig(
|
258
|
-
model_id=model_id,
|
259
|
-
model_name="gemma-4b-alpaca",
|
260
|
-
source_type="huggingface",
|
261
|
-
source_path=model_source_path,
|
262
|
-
model_format=ModelFormat.HUGGINGFACE,
|
263
|
-
inference_engine=InferenceEngine.TRITON,
|
264
|
-
model_type="llm",
|
265
|
-
capabilities=["text_generation", "chat"],
|
266
|
-
max_batch_size=8,
|
267
|
-
max_sequence_length=2048,
|
268
|
-
dtype="float16",
|
269
|
-
use_tensorrt=True
|
270
|
-
)
|
271
|
-
|
272
|
-
runpod_config = RunPodServerlessConfig(
|
273
|
-
api_key=runpod_api_key,
|
274
|
-
container_image="nvcr.io/nvidia/tritonserver:23.10-trtllm-python-py3",
|
275
|
-
container_disk_in_gb=30,
|
276
|
-
gpu_type="NVIDIA RTX A6000",
|
277
|
-
gpu_count=1,
|
278
|
-
min_workers=0,
|
279
|
-
max_workers=3,
|
280
|
-
idle_timeout=5,
|
281
|
-
env_vars={
|
282
|
-
"TRITON_MODEL_REPOSITORY": "/models",
|
283
|
-
"CUDA_VISIBLE_DEVICES": "0"
|
284
|
-
}
|
285
|
-
)
|
286
|
-
|
287
|
-
triton_config = TritonConfig(
|
288
|
-
model_repository="/models",
|
289
|
-
model_name="gemma-4b-alpaca",
|
290
|
-
backend="tensorrtllm",
|
291
|
-
max_batch_size=8,
|
292
|
-
max_sequence_length=2048,
|
293
|
-
tensorrt_llm_model_dir="/models/tensorrt_llm",
|
294
|
-
engine_dir="/models/engines",
|
295
|
-
tokenizer_dir="/models/tokenizer"
|
296
|
-
)
|
297
|
-
|
298
|
-
return DeploymentConfig(
|
299
|
-
deployment_id=f"gemma-deployment-{model_id}",
|
300
|
-
deployment_name=f"Gemma 4B Alpaca - {model_id}",
|
301
|
-
description="Gemma 4B model fine-tuned on Alpaca dataset, deployed with Triton + TensorRT-LLM",
|
302
|
-
provider=DeploymentProvider.RUNPOD_SERVERLESS,
|
303
|
-
inference_engine=InferenceEngine.TRITON,
|
304
|
-
model_config=model_config,
|
305
|
-
runpod_config=runpod_config,
|
306
|
-
triton_config=triton_config
|
307
|
-
)
|
308
|
-
|
309
|
-
|
310
|
-
def create_local_triton_config(
|
311
|
-
model_id: str,
|
312
|
-
model_source_path: str,
|
313
|
-
triton_model_repository: str = "./models/triton"
|
314
|
-
) -> DeploymentConfig:
|
315
|
-
"""
|
316
|
-
Create a deployment configuration for local Triton deployment.
|
317
|
-
|
318
|
-
Args:
|
319
|
-
model_id: Unique identifier for the deployment
|
320
|
-
model_source_path: Path to the model
|
321
|
-
triton_model_repository: Path to Triton model repository
|
322
|
-
|
323
|
-
Returns:
|
324
|
-
DeploymentConfig for local deployment
|
325
|
-
"""
|
326
|
-
model_config = ModelConfig(
|
327
|
-
model_id=model_id,
|
328
|
-
model_name=f"local-model-{model_id}",
|
329
|
-
source_type="local",
|
330
|
-
source_path=model_source_path,
|
331
|
-
model_format=ModelFormat.HUGGINGFACE,
|
332
|
-
inference_engine=InferenceEngine.TRITON,
|
333
|
-
model_type="llm",
|
334
|
-
capabilities=["text_generation"],
|
335
|
-
max_batch_size=4,
|
336
|
-
max_sequence_length=1024,
|
337
|
-
dtype="float16"
|
338
|
-
)
|
339
|
-
|
340
|
-
triton_config = TritonConfig(
|
341
|
-
model_repository=triton_model_repository,
|
342
|
-
model_name=f"local-model-{model_id}",
|
343
|
-
backend="python", # Use Python backend for local development
|
344
|
-
max_batch_size=4,
|
345
|
-
max_sequence_length=1024
|
346
|
-
)
|
347
|
-
|
348
|
-
return DeploymentConfig(
|
349
|
-
deployment_id=f"local-deployment-{model_id}",
|
350
|
-
deployment_name=f"Local Model - {model_id}",
|
351
|
-
description="Local model deployment for development and testing",
|
352
|
-
provider=DeploymentProvider.LOCAL,
|
353
|
-
inference_engine=InferenceEngine.TRITON,
|
354
|
-
model_config=model_config,
|
355
|
-
triton_config=triton_config
|
356
|
-
)
|