isa-model 0.3.91__py3-none-any.whl → 0.4.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (228) hide show
  1. isa_model/client.py +1166 -584
  2. isa_model/core/cache/redis_cache.py +410 -0
  3. isa_model/core/config/config_manager.py +282 -12
  4. isa_model/core/config.py +91 -1
  5. isa_model/core/database/__init__.py +1 -0
  6. isa_model/core/database/direct_db_client.py +114 -0
  7. isa_model/core/database/migration_manager.py +563 -0
  8. isa_model/core/database/migrations.py +297 -0
  9. isa_model/core/database/supabase_client.py +258 -0
  10. isa_model/core/dependencies.py +316 -0
  11. isa_model/core/discovery/__init__.py +19 -0
  12. isa_model/core/discovery/consul_discovery.py +190 -0
  13. isa_model/core/logging/__init__.py +54 -0
  14. isa_model/core/logging/influx_logger.py +523 -0
  15. isa_model/core/logging/loki_logger.py +160 -0
  16. isa_model/core/models/__init__.py +46 -0
  17. isa_model/core/models/config_models.py +625 -0
  18. isa_model/core/models/deployment_billing_tracker.py +430 -0
  19. isa_model/core/models/model_billing_tracker.py +60 -88
  20. isa_model/core/models/model_manager.py +66 -25
  21. isa_model/core/models/model_metadata.py +690 -0
  22. isa_model/core/models/model_repo.py +217 -55
  23. isa_model/core/models/model_statistics_tracker.py +234 -0
  24. isa_model/core/models/model_storage.py +0 -1
  25. isa_model/core/models/model_version_manager.py +959 -0
  26. isa_model/core/models/system_models.py +857 -0
  27. isa_model/core/pricing_manager.py +2 -249
  28. isa_model/core/repositories/__init__.py +9 -0
  29. isa_model/core/repositories/config_repository.py +912 -0
  30. isa_model/core/resilience/circuit_breaker.py +366 -0
  31. isa_model/core/security/secrets.py +358 -0
  32. isa_model/core/services/__init__.py +2 -4
  33. isa_model/core/services/intelligent_model_selector.py +479 -370
  34. isa_model/core/storage/hf_storage.py +2 -2
  35. isa_model/core/types.py +8 -0
  36. isa_model/deployment/__init__.py +5 -48
  37. isa_model/deployment/core/__init__.py +2 -31
  38. isa_model/deployment/core/deployment_manager.py +1278 -368
  39. isa_model/deployment/local/__init__.py +31 -0
  40. isa_model/deployment/local/config.py +248 -0
  41. isa_model/deployment/local/gpu_gateway.py +607 -0
  42. isa_model/deployment/local/health_checker.py +428 -0
  43. isa_model/deployment/local/provider.py +586 -0
  44. isa_model/deployment/local/tensorrt_service.py +621 -0
  45. isa_model/deployment/local/transformers_service.py +644 -0
  46. isa_model/deployment/local/vllm_service.py +527 -0
  47. isa_model/deployment/modal/__init__.py +8 -0
  48. isa_model/deployment/modal/config.py +136 -0
  49. isa_model/deployment/modal/deployer.py +894 -0
  50. isa_model/deployment/modal/services/__init__.py +3 -0
  51. isa_model/deployment/modal/services/audio/__init__.py +1 -0
  52. isa_model/deployment/modal/services/audio/isa_audio_chatTTS_service.py +520 -0
  53. isa_model/deployment/modal/services/audio/isa_audio_openvoice_service.py +758 -0
  54. isa_model/deployment/modal/services/audio/isa_audio_service_v2.py +1044 -0
  55. isa_model/deployment/modal/services/embedding/__init__.py +1 -0
  56. isa_model/deployment/modal/services/embedding/isa_embed_rerank_service.py +296 -0
  57. isa_model/deployment/modal/services/llm/__init__.py +1 -0
  58. isa_model/deployment/modal/services/llm/isa_llm_service.py +424 -0
  59. isa_model/deployment/modal/services/video/__init__.py +1 -0
  60. isa_model/deployment/modal/services/video/isa_video_hunyuan_service.py +423 -0
  61. isa_model/deployment/modal/services/vision/__init__.py +1 -0
  62. isa_model/deployment/modal/services/vision/isa_vision_ocr_service.py +519 -0
  63. isa_model/deployment/modal/services/vision/isa_vision_qwen25_service.py +709 -0
  64. isa_model/deployment/modal/services/vision/isa_vision_table_service.py +676 -0
  65. isa_model/deployment/modal/services/vision/isa_vision_ui_service.py +833 -0
  66. isa_model/deployment/modal/services/vision/isa_vision_ui_service_optimized.py +660 -0
  67. isa_model/deployment/models/org-org-acme-corp-tenant-a-service-llm-20250825-225822/tenant-a-service_modal_service.py +48 -0
  68. isa_model/deployment/models/org-test-org-123-prefix-test-service-llm-20250825-225822/prefix-test-service_modal_service.py +48 -0
  69. isa_model/deployment/models/test-llm-service-llm-20250825-204442/test-llm-service_modal_service.py +48 -0
  70. isa_model/deployment/models/test-monitoring-gpt2-llm-20250825-212906/test-monitoring-gpt2_modal_service.py +48 -0
  71. isa_model/deployment/models/test-monitoring-gpt2-llm-20250825-213009/test-monitoring-gpt2_modal_service.py +48 -0
  72. isa_model/deployment/storage/__init__.py +5 -0
  73. isa_model/deployment/storage/deployment_repository.py +824 -0
  74. isa_model/deployment/triton/__init__.py +10 -0
  75. isa_model/deployment/triton/config.py +196 -0
  76. isa_model/deployment/triton/configs/__init__.py +1 -0
  77. isa_model/deployment/triton/provider.py +512 -0
  78. isa_model/deployment/triton/scripts/__init__.py +1 -0
  79. isa_model/deployment/triton/templates/__init__.py +1 -0
  80. isa_model/inference/__init__.py +47 -1
  81. isa_model/inference/ai_factory.py +179 -16
  82. isa_model/inference/legacy_services/__init__.py +21 -0
  83. isa_model/inference/legacy_services/model_evaluation.py +637 -0
  84. isa_model/inference/legacy_services/model_service.py +573 -0
  85. isa_model/inference/legacy_services/model_serving.py +717 -0
  86. isa_model/inference/legacy_services/model_training.py +561 -0
  87. isa_model/inference/models/__init__.py +21 -0
  88. isa_model/inference/models/inference_config.py +551 -0
  89. isa_model/inference/models/inference_record.py +675 -0
  90. isa_model/inference/models/performance_models.py +714 -0
  91. isa_model/inference/repositories/__init__.py +9 -0
  92. isa_model/inference/repositories/inference_repository.py +828 -0
  93. isa_model/inference/services/audio/__init__.py +21 -0
  94. isa_model/inference/services/audio/base_realtime_service.py +225 -0
  95. isa_model/inference/services/audio/base_stt_service.py +184 -11
  96. isa_model/inference/services/audio/isa_tts_service.py +0 -0
  97. isa_model/inference/services/audio/openai_realtime_service.py +320 -124
  98. isa_model/inference/services/audio/openai_stt_service.py +53 -11
  99. isa_model/inference/services/base_service.py +17 -1
  100. isa_model/inference/services/custom_model_manager.py +277 -0
  101. isa_model/inference/services/embedding/__init__.py +13 -0
  102. isa_model/inference/services/embedding/base_embed_service.py +111 -8
  103. isa_model/inference/services/embedding/isa_embed_service.py +305 -0
  104. isa_model/inference/services/embedding/ollama_embed_service.py +15 -3
  105. isa_model/inference/services/embedding/openai_embed_service.py +2 -4
  106. isa_model/inference/services/embedding/resilient_embed_service.py +285 -0
  107. isa_model/inference/services/embedding/tests/test_embedding.py +222 -0
  108. isa_model/inference/services/img/__init__.py +2 -2
  109. isa_model/inference/services/img/base_image_gen_service.py +24 -7
  110. isa_model/inference/services/img/replicate_image_gen_service.py +84 -422
  111. isa_model/inference/services/img/services/replicate_face_swap.py +193 -0
  112. isa_model/inference/services/img/services/replicate_flux.py +226 -0
  113. isa_model/inference/services/img/services/replicate_flux_kontext.py +219 -0
  114. isa_model/inference/services/img/services/replicate_sticker_maker.py +249 -0
  115. isa_model/inference/services/img/tests/test_img_client.py +297 -0
  116. isa_model/inference/services/llm/__init__.py +10 -2
  117. isa_model/inference/services/llm/base_llm_service.py +361 -26
  118. isa_model/inference/services/llm/cerebras_llm_service.py +628 -0
  119. isa_model/inference/services/llm/helpers/llm_adapter.py +71 -12
  120. isa_model/inference/services/llm/helpers/llm_prompts.py +342 -0
  121. isa_model/inference/services/llm/helpers/llm_utils.py +321 -23
  122. isa_model/inference/services/llm/huggingface_llm_service.py +581 -0
  123. isa_model/inference/services/llm/local_llm_service.py +747 -0
  124. isa_model/inference/services/llm/ollama_llm_service.py +11 -3
  125. isa_model/inference/services/llm/openai_llm_service.py +670 -56
  126. isa_model/inference/services/llm/yyds_llm_service.py +10 -3
  127. isa_model/inference/services/vision/__init__.py +27 -6
  128. isa_model/inference/services/vision/base_vision_service.py +118 -185
  129. isa_model/inference/services/vision/blip_vision_service.py +359 -0
  130. isa_model/inference/services/vision/helpers/image_utils.py +19 -10
  131. isa_model/inference/services/vision/isa_vision_service.py +634 -0
  132. isa_model/inference/services/vision/openai_vision_service.py +19 -10
  133. isa_model/inference/services/vision/tests/test_ocr_client.py +284 -0
  134. isa_model/inference/services/vision/vgg16_vision_service.py +257 -0
  135. isa_model/serving/api/cache_manager.py +245 -0
  136. isa_model/serving/api/dependencies/__init__.py +1 -0
  137. isa_model/serving/api/dependencies/auth.py +194 -0
  138. isa_model/serving/api/dependencies/database.py +139 -0
  139. isa_model/serving/api/error_handlers.py +284 -0
  140. isa_model/serving/api/fastapi_server.py +240 -18
  141. isa_model/serving/api/middleware/auth.py +317 -0
  142. isa_model/serving/api/middleware/security.py +268 -0
  143. isa_model/serving/api/middleware/tenant_context.py +414 -0
  144. isa_model/serving/api/routes/analytics.py +489 -0
  145. isa_model/serving/api/routes/config.py +645 -0
  146. isa_model/serving/api/routes/deployment_billing.py +315 -0
  147. isa_model/serving/api/routes/deployments.py +475 -0
  148. isa_model/serving/api/routes/gpu_gateway.py +440 -0
  149. isa_model/serving/api/routes/health.py +32 -12
  150. isa_model/serving/api/routes/inference_monitoring.py +486 -0
  151. isa_model/serving/api/routes/local_deployments.py +448 -0
  152. isa_model/serving/api/routes/logs.py +430 -0
  153. isa_model/serving/api/routes/settings.py +582 -0
  154. isa_model/serving/api/routes/tenants.py +575 -0
  155. isa_model/serving/api/routes/unified.py +992 -171
  156. isa_model/serving/api/routes/webhooks.py +479 -0
  157. isa_model/serving/api/startup.py +318 -0
  158. isa_model/serving/modal_proxy_server.py +249 -0
  159. isa_model/utils/gpu_utils.py +311 -0
  160. {isa_model-0.3.91.dist-info → isa_model-0.4.3.dist-info}/METADATA +76 -22
  161. isa_model-0.4.3.dist-info/RECORD +193 -0
  162. isa_model/deployment/cloud/__init__.py +0 -9
  163. isa_model/deployment/cloud/modal/__init__.py +0 -10
  164. isa_model/deployment/cloud/modal/isa_vision_doc_service.py +0 -766
  165. isa_model/deployment/cloud/modal/isa_vision_table_service.py +0 -532
  166. isa_model/deployment/cloud/modal/isa_vision_ui_service.py +0 -406
  167. isa_model/deployment/cloud/modal/register_models.py +0 -321
  168. isa_model/deployment/core/deployment_config.py +0 -356
  169. isa_model/deployment/core/isa_deployment_service.py +0 -401
  170. isa_model/deployment/gpu_int8_ds8/app/server.py +0 -66
  171. isa_model/deployment/gpu_int8_ds8/scripts/test_client.py +0 -43
  172. isa_model/deployment/gpu_int8_ds8/scripts/test_client_os.py +0 -35
  173. isa_model/deployment/runtime/deployed_service.py +0 -338
  174. isa_model/deployment/services/__init__.py +0 -9
  175. isa_model/deployment/services/auto_deploy_vision_service.py +0 -538
  176. isa_model/deployment/services/model_service.py +0 -332
  177. isa_model/deployment/services/service_monitor.py +0 -356
  178. isa_model/deployment/services/service_registry.py +0 -527
  179. isa_model/eval/__init__.py +0 -92
  180. isa_model/eval/benchmarks.py +0 -469
  181. isa_model/eval/config/__init__.py +0 -10
  182. isa_model/eval/config/evaluation_config.py +0 -108
  183. isa_model/eval/evaluators/__init__.py +0 -18
  184. isa_model/eval/evaluators/base_evaluator.py +0 -503
  185. isa_model/eval/evaluators/llm_evaluator.py +0 -472
  186. isa_model/eval/factory.py +0 -531
  187. isa_model/eval/infrastructure/__init__.py +0 -24
  188. isa_model/eval/infrastructure/experiment_tracker.py +0 -466
  189. isa_model/eval/metrics.py +0 -798
  190. isa_model/inference/adapter/unified_api.py +0 -248
  191. isa_model/inference/services/helpers/stacked_config.py +0 -148
  192. isa_model/inference/services/img/flux_professional_service.py +0 -603
  193. isa_model/inference/services/img/helpers/base_stacked_service.py +0 -274
  194. isa_model/inference/services/others/table_transformer_service.py +0 -61
  195. isa_model/inference/services/vision/doc_analysis_service.py +0 -640
  196. isa_model/inference/services/vision/helpers/base_stacked_service.py +0 -274
  197. isa_model/inference/services/vision/ui_analysis_service.py +0 -823
  198. isa_model/scripts/inference_tracker.py +0 -283
  199. isa_model/scripts/mlflow_manager.py +0 -379
  200. isa_model/scripts/model_registry.py +0 -465
  201. isa_model/scripts/register_models.py +0 -370
  202. isa_model/scripts/register_models_with_embeddings.py +0 -510
  203. isa_model/scripts/start_mlflow.py +0 -95
  204. isa_model/scripts/training_tracker.py +0 -257
  205. isa_model/training/__init__.py +0 -74
  206. isa_model/training/annotation/annotation_schema.py +0 -47
  207. isa_model/training/annotation/processors/annotation_processor.py +0 -126
  208. isa_model/training/annotation/storage/dataset_manager.py +0 -131
  209. isa_model/training/annotation/storage/dataset_schema.py +0 -44
  210. isa_model/training/annotation/tests/test_annotation_flow.py +0 -109
  211. isa_model/training/annotation/tests/test_minio copy.py +0 -113
  212. isa_model/training/annotation/tests/test_minio_upload.py +0 -43
  213. isa_model/training/annotation/views/annotation_controller.py +0 -158
  214. isa_model/training/cloud/__init__.py +0 -22
  215. isa_model/training/cloud/job_orchestrator.py +0 -402
  216. isa_model/training/cloud/runpod_trainer.py +0 -454
  217. isa_model/training/cloud/storage_manager.py +0 -482
  218. isa_model/training/core/__init__.py +0 -23
  219. isa_model/training/core/config.py +0 -181
  220. isa_model/training/core/dataset.py +0 -222
  221. isa_model/training/core/trainer.py +0 -720
  222. isa_model/training/core/utils.py +0 -213
  223. isa_model/training/factory.py +0 -424
  224. isa_model-0.3.91.dist-info/RECORD +0 -138
  225. /isa_model/{core/storage/minio_storage.py → deployment/modal/services/audio/isa_audio_fish_service.py} +0 -0
  226. /isa_model/deployment/{services → modal/services/vision}/simple_auto_deploy_vision_service.py +0 -0
  227. {isa_model-0.3.91.dist-info → isa_model-0.4.3.dist-info}/WHEEL +0 -0
  228. {isa_model-0.3.91.dist-info → isa_model-0.4.3.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,48 @@
1
+ """
2
+ tenant-a-service LLM Service for Modal
3
+
4
+ Auto-generated service for model: gpt2
5
+ Architecture: gpt
6
+ """
7
+
8
+ import modal
9
+ from typing import Dict, Any, List
10
+
11
+ app = modal.App("tenant-a-service")
12
+
13
+ image = modal.Image.debian_slim().pip_install(
14
+ "accelerate>=0.24.0", "transformers>=4.35.0", "httpx>=0.26.0", "torch>=2.0.0", "requests>=2.31.0", "numpy>=1.24.0", "pydantic>=2.0.0"
15
+ )
16
+
17
+ @app.cls(
18
+ image=image,
19
+ gpu=modal.gpu.A10G(count=1),
20
+ container_idle_timeout=300,
21
+ memory=32768
22
+ )
23
+ class Tenant_A_ServiceService:
24
+
25
+ @modal.enter()
26
+ def load_model(self):
27
+ import torch
28
+ from transformers import AutoTokenizer, AutoModelForCausalLM
29
+
30
+ self.tokenizer = AutoTokenizer.from_pretrained("gpt2")
31
+ self.model = AutoModelForCausalLM.from_pretrained(
32
+ "gpt2",
33
+ torch_dtype=torch.float16,
34
+ device_map="auto",
35
+ trust_remote_code=True
36
+ )
37
+
38
+ @modal.method()
39
+ def generate(self, messages: List[Dict[str, str]], **kwargs):
40
+ # Generate response (simplified)
41
+ prompt = messages[-1]["content"] if messages else ""
42
+ return {"response": f"Generated response for: {prompt}", "model": "gpt2"}
43
+
44
+ @app.function(image=image)
45
+ @modal.web_endpoint(method="POST")
46
+ def inference_endpoint(item: Dict[str, Any]):
47
+ service = Tenant_A_ServiceService()
48
+ return service.generate(**item)
@@ -0,0 +1,48 @@
1
+ """
2
+ prefix-test-service LLM Service for Modal
3
+
4
+ Auto-generated service for model: gpt2
5
+ Architecture: gpt
6
+ """
7
+
8
+ import modal
9
+ from typing import Dict, Any, List
10
+
11
+ app = modal.App("prefix-test-service")
12
+
13
+ image = modal.Image.debian_slim().pip_install(
14
+ "accelerate>=0.24.0", "transformers>=4.35.0", "httpx>=0.26.0", "torch>=2.0.0", "requests>=2.31.0", "numpy>=1.24.0", "pydantic>=2.0.0"
15
+ )
16
+
17
+ @app.cls(
18
+ image=image,
19
+ gpu=modal.gpu.A10G(count=1),
20
+ container_idle_timeout=300,
21
+ memory=32768
22
+ )
23
+ class Prefix_Test_ServiceService:
24
+
25
+ @modal.enter()
26
+ def load_model(self):
27
+ import torch
28
+ from transformers import AutoTokenizer, AutoModelForCausalLM
29
+
30
+ self.tokenizer = AutoTokenizer.from_pretrained("gpt2")
31
+ self.model = AutoModelForCausalLM.from_pretrained(
32
+ "gpt2",
33
+ torch_dtype=torch.float16,
34
+ device_map="auto",
35
+ trust_remote_code=True
36
+ )
37
+
38
+ @modal.method()
39
+ def generate(self, messages: List[Dict[str, str]], **kwargs):
40
+ # Generate response (simplified)
41
+ prompt = messages[-1]["content"] if messages else ""
42
+ return {"response": f"Generated response for: {prompt}", "model": "gpt2"}
43
+
44
+ @app.function(image=image)
45
+ @modal.web_endpoint(method="POST")
46
+ def inference_endpoint(item: Dict[str, Any]):
47
+ service = Prefix_Test_ServiceService()
48
+ return service.generate(**item)
@@ -0,0 +1,48 @@
1
+ """
2
+ test-llm-service LLM Service for Modal
3
+
4
+ Auto-generated service for model: gpt2
5
+ Architecture: gpt
6
+ """
7
+
8
+ import modal
9
+ from typing import Dict, Any, List
10
+
11
+ app = modal.App("test-llm-service")
12
+
13
+ image = modal.Image.debian_slim().pip_install(
14
+ "torch>=2.0.0", "httpx>=0.26.0", "transformers>=4.35.0", "requests>=2.31.0", "pydantic>=2.0.0", "numpy>=1.24.0", "accelerate>=0.24.0"
15
+ )
16
+
17
+ @app.cls(
18
+ image=image,
19
+ gpu=modal.gpu.A10G(count=1),
20
+ container_idle_timeout=300,
21
+ memory=32768
22
+ )
23
+ class Test_Llm_ServiceService:
24
+
25
+ @modal.enter()
26
+ def load_model(self):
27
+ import torch
28
+ from transformers import AutoTokenizer, AutoModelForCausalLM
29
+
30
+ self.tokenizer = AutoTokenizer.from_pretrained("gpt2")
31
+ self.model = AutoModelForCausalLM.from_pretrained(
32
+ "gpt2",
33
+ torch_dtype=torch.float16,
34
+ device_map="auto",
35
+ trust_remote_code=True
36
+ )
37
+
38
+ @modal.method()
39
+ def generate(self, messages: List[Dict[str, str]], **kwargs):
40
+ # Generate response (simplified)
41
+ prompt = messages[-1]["content"] if messages else ""
42
+ return {"response": f"Generated response for: {prompt}", "model": "gpt2"}
43
+
44
+ @app.function(image=image)
45
+ @modal.web_endpoint(method="POST")
46
+ def inference_endpoint(item: Dict[str, Any]):
47
+ service = Test_Llm_ServiceService()
48
+ return service.generate(**item)
@@ -0,0 +1,48 @@
1
+ """
2
+ test-monitoring-gpt2 LLM Service for Modal
3
+
4
+ Auto-generated service for model: gpt2
5
+ Architecture: gpt
6
+ """
7
+
8
+ import modal
9
+ from typing import Dict, Any, List
10
+
11
+ app = modal.App("test-monitoring-gpt2")
12
+
13
+ image = modal.Image.debian_slim().pip_install(
14
+ "numpy>=1.24.0", "requests>=2.31.0", "accelerate>=0.24.0", "httpx>=0.26.0", "pydantic>=2.0.0", "transformers>=4.35.0", "torch>=2.0.0"
15
+ )
16
+
17
+ @app.cls(
18
+ image=image,
19
+ gpu=modal.gpu.A10G(count=1),
20
+ container_idle_timeout=300,
21
+ memory=32768
22
+ )
23
+ class Test_Monitoring_Gpt2Service:
24
+
25
+ @modal.enter()
26
+ def load_model(self):
27
+ import torch
28
+ from transformers import AutoTokenizer, AutoModelForCausalLM
29
+
30
+ self.tokenizer = AutoTokenizer.from_pretrained("gpt2")
31
+ self.model = AutoModelForCausalLM.from_pretrained(
32
+ "gpt2",
33
+ torch_dtype=torch.float16,
34
+ device_map="auto",
35
+ trust_remote_code=True
36
+ )
37
+
38
+ @modal.method()
39
+ def generate(self, messages: List[Dict[str, str]], **kwargs):
40
+ # Generate response (simplified)
41
+ prompt = messages[-1]["content"] if messages else ""
42
+ return {"response": f"Generated response for: {prompt}", "model": "gpt2"}
43
+
44
+ @app.function(image=image)
45
+ @modal.web_endpoint(method="POST")
46
+ def inference_endpoint(item: Dict[str, Any]):
47
+ service = Test_Monitoring_Gpt2Service()
48
+ return service.generate(**item)
@@ -0,0 +1,48 @@
1
+ """
2
+ test-monitoring-gpt2 LLM Service for Modal
3
+
4
+ Auto-generated service for model: gpt2
5
+ Architecture: gpt
6
+ """
7
+
8
+ import modal
9
+ from typing import Dict, Any, List
10
+
11
+ app = modal.App("test-monitoring-gpt2")
12
+
13
+ image = modal.Image.debian_slim().pip_install(
14
+ "transformers>=4.35.0", "torch>=2.0.0", "accelerate>=0.24.0", "httpx>=0.26.0", "numpy>=1.24.0", "requests>=2.31.0", "pydantic>=2.0.0"
15
+ )
16
+
17
+ @app.cls(
18
+ image=image,
19
+ gpu=modal.gpu.A10G(count=1),
20
+ container_idle_timeout=300,
21
+ memory=32768
22
+ )
23
+ class Test_Monitoring_Gpt2Service:
24
+
25
+ @modal.enter()
26
+ def load_model(self):
27
+ import torch
28
+ from transformers import AutoTokenizer, AutoModelForCausalLM
29
+
30
+ self.tokenizer = AutoTokenizer.from_pretrained("gpt2")
31
+ self.model = AutoModelForCausalLM.from_pretrained(
32
+ "gpt2",
33
+ torch_dtype=torch.float16,
34
+ device_map="auto",
35
+ trust_remote_code=True
36
+ )
37
+
38
+ @modal.method()
39
+ def generate(self, messages: List[Dict[str, str]], **kwargs):
40
+ # Generate response (simplified)
41
+ prompt = messages[-1]["content"] if messages else ""
42
+ return {"response": f"Generated response for: {prompt}", "model": "gpt2"}
43
+
44
+ @app.function(image=image)
45
+ @modal.web_endpoint(method="POST")
46
+ def inference_endpoint(item: Dict[str, Any]):
47
+ service = Test_Monitoring_Gpt2Service()
48
+ return service.generate(**item)
@@ -0,0 +1,5 @@
1
+ """Storage and persistence for deployments"""
2
+
3
+ from .deployment_repository import DeploymentRepository
4
+
5
+ __all__ = ["DeploymentRepository"]