isa-model 0.4.0__py3-none-any.whl → 0.4.4__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- isa_model/client.py +466 -43
- isa_model/core/cache/redis_cache.py +12 -3
- isa_model/core/config/config_manager.py +230 -3
- isa_model/core/config.py +90 -0
- isa_model/core/database/direct_db_client.py +114 -0
- isa_model/core/database/migration_manager.py +563 -0
- isa_model/core/database/migrations.py +21 -1
- isa_model/core/database/supabase_client.py +154 -19
- isa_model/core/dependencies.py +316 -0
- isa_model/core/discovery/__init__.py +19 -0
- isa_model/core/discovery/consul_discovery.py +190 -0
- isa_model/core/logging/__init__.py +54 -0
- isa_model/core/logging/influx_logger.py +523 -0
- isa_model/core/logging/loki_logger.py +160 -0
- isa_model/core/models/__init__.py +27 -18
- isa_model/core/models/config_models.py +625 -0
- isa_model/core/models/deployment_billing_tracker.py +430 -0
- isa_model/core/models/model_manager.py +35 -80
- isa_model/core/models/model_metadata.py +690 -0
- isa_model/core/models/model_repo.py +174 -18
- isa_model/core/models/system_models.py +857 -0
- isa_model/core/repositories/__init__.py +9 -0
- isa_model/core/repositories/config_repository.py +912 -0
- isa_model/core/services/intelligent_model_selector.py +399 -21
- isa_model/core/types.py +1 -0
- isa_model/deployment/__init__.py +5 -48
- isa_model/deployment/core/__init__.py +2 -31
- isa_model/deployment/core/deployment_manager.py +1278 -370
- isa_model/deployment/modal/__init__.py +8 -0
- isa_model/deployment/modal/config.py +136 -0
- isa_model/deployment/{services/auto_hf_modal_deployer.py → modal/deployer.py} +1 -1
- isa_model/deployment/modal/services/__init__.py +3 -0
- isa_model/deployment/modal/services/audio/__init__.py +1 -0
- isa_model/deployment/modal/services/embedding/__init__.py +1 -0
- isa_model/deployment/modal/services/llm/__init__.py +1 -0
- isa_model/deployment/modal/services/llm/isa_llm_service.py +424 -0
- isa_model/deployment/modal/services/video/__init__.py +1 -0
- isa_model/deployment/modal/services/vision/__init__.py +1 -0
- isa_model/deployment/models/org-org-acme-corp-tenant-a-service-llm-20250825-225822/tenant-a-service_modal_service.py +48 -0
- isa_model/deployment/models/org-test-org-123-prefix-test-service-llm-20250825-225822/prefix-test-service_modal_service.py +48 -0
- isa_model/deployment/models/test-llm-service-llm-20250825-204442/test-llm-service_modal_service.py +48 -0
- isa_model/deployment/models/test-monitoring-gpt2-llm-20250825-212906/test-monitoring-gpt2_modal_service.py +48 -0
- isa_model/deployment/models/test-monitoring-gpt2-llm-20250825-213009/test-monitoring-gpt2_modal_service.py +48 -0
- isa_model/deployment/storage/__init__.py +5 -0
- isa_model/deployment/storage/deployment_repository.py +824 -0
- isa_model/deployment/triton/__init__.py +10 -0
- isa_model/deployment/triton/config.py +196 -0
- isa_model/deployment/triton/configs/__init__.py +1 -0
- isa_model/deployment/triton/provider.py +512 -0
- isa_model/deployment/triton/scripts/__init__.py +1 -0
- isa_model/deployment/triton/templates/__init__.py +1 -0
- isa_model/inference/__init__.py +47 -1
- isa_model/inference/ai_factory.py +137 -10
- isa_model/inference/legacy_services/__init__.py +21 -0
- isa_model/inference/legacy_services/model_evaluation.py +637 -0
- isa_model/inference/legacy_services/model_service.py +573 -0
- isa_model/inference/legacy_services/model_serving.py +717 -0
- isa_model/inference/legacy_services/model_training.py +561 -0
- isa_model/inference/models/__init__.py +21 -0
- isa_model/inference/models/inference_config.py +551 -0
- isa_model/inference/models/inference_record.py +675 -0
- isa_model/inference/models/performance_models.py +714 -0
- isa_model/inference/repositories/__init__.py +9 -0
- isa_model/inference/repositories/inference_repository.py +828 -0
- isa_model/inference/services/audio/base_stt_service.py +184 -11
- isa_model/inference/services/audio/openai_stt_service.py +22 -6
- isa_model/inference/services/embedding/ollama_embed_service.py +15 -3
- isa_model/inference/services/embedding/resilient_embed_service.py +285 -0
- isa_model/inference/services/llm/__init__.py +10 -2
- isa_model/inference/services/llm/base_llm_service.py +335 -24
- isa_model/inference/services/llm/cerebras_llm_service.py +628 -0
- isa_model/inference/services/llm/helpers/llm_adapter.py +9 -4
- isa_model/inference/services/llm/helpers/llm_prompts.py +342 -0
- isa_model/inference/services/llm/helpers/llm_utils.py +321 -23
- isa_model/inference/services/llm/huggingface_llm_service.py +581 -0
- isa_model/inference/services/llm/ollama_llm_service.py +9 -2
- isa_model/inference/services/llm/openai_llm_service.py +33 -16
- isa_model/inference/services/llm/yyds_llm_service.py +8 -2
- isa_model/inference/services/vision/__init__.py +22 -1
- isa_model/inference/services/vision/helpers/image_utils.py +8 -5
- isa_model/inference/services/vision/isa_vision_service.py +65 -4
- isa_model/inference/services/vision/openai_vision_service.py +19 -10
- isa_model/inference/services/vision/vgg16_vision_service.py +257 -0
- isa_model/serving/api/cache_manager.py +245 -0
- isa_model/serving/api/dependencies/__init__.py +1 -0
- isa_model/serving/api/dependencies/auth.py +194 -0
- isa_model/serving/api/dependencies/database.py +139 -0
- isa_model/serving/api/error_handlers.py +284 -0
- isa_model/serving/api/fastapi_server.py +172 -22
- isa_model/serving/api/middleware/auth.py +8 -2
- isa_model/serving/api/middleware/security.py +23 -33
- isa_model/serving/api/middleware/tenant_context.py +414 -0
- isa_model/serving/api/routes/analytics.py +4 -1
- isa_model/serving/api/routes/config.py +645 -0
- isa_model/serving/api/routes/deployment_billing.py +315 -0
- isa_model/serving/api/routes/deployments.py +138 -2
- isa_model/serving/api/routes/gpu_gateway.py +440 -0
- isa_model/serving/api/routes/health.py +32 -12
- isa_model/serving/api/routes/inference_monitoring.py +486 -0
- isa_model/serving/api/routes/local_deployments.py +448 -0
- isa_model/serving/api/routes/tenants.py +575 -0
- isa_model/serving/api/routes/unified.py +680 -18
- isa_model/serving/api/routes/webhooks.py +479 -0
- isa_model/serving/api/startup.py +68 -54
- isa_model/utils/gpu_utils.py +311 -0
- {isa_model-0.4.0.dist-info → isa_model-0.4.4.dist-info}/METADATA +71 -24
- isa_model-0.4.4.dist-info/RECORD +180 -0
- isa_model/core/security/secrets.py +0 -358
- isa_model/core/storage/hf_storage.py +0 -419
- isa_model/core/storage/minio_storage.py +0 -0
- isa_model/deployment/cloud/__init__.py +0 -9
- isa_model/deployment/cloud/modal/__init__.py +0 -10
- isa_model/deployment/core/deployment_config.py +0 -356
- isa_model/deployment/core/isa_deployment_service.py +0 -401
- isa_model/deployment/gpu_int8_ds8/app/server.py +0 -66
- isa_model/deployment/gpu_int8_ds8/scripts/test_client.py +0 -43
- isa_model/deployment/gpu_int8_ds8/scripts/test_client_os.py +0 -35
- isa_model/deployment/runtime/deployed_service.py +0 -338
- isa_model/deployment/services/__init__.py +0 -9
- isa_model/deployment/services/auto_deploy_vision_service.py +0 -538
- isa_model/deployment/services/model_service.py +0 -332
- isa_model/deployment/services/service_monitor.py +0 -356
- isa_model/deployment/services/service_registry.py +0 -527
- isa_model/eval/__init__.py +0 -92
- isa_model/eval/benchmarks/__init__.py +0 -27
- isa_model/eval/benchmarks/multimodal_datasets.py +0 -460
- isa_model/eval/benchmarks.py +0 -701
- isa_model/eval/config/__init__.py +0 -10
- isa_model/eval/config/evaluation_config.py +0 -108
- isa_model/eval/evaluators/__init__.py +0 -24
- isa_model/eval/evaluators/audio_evaluator.py +0 -727
- isa_model/eval/evaluators/base_evaluator.py +0 -503
- isa_model/eval/evaluators/embedding_evaluator.py +0 -742
- isa_model/eval/evaluators/llm_evaluator.py +0 -472
- isa_model/eval/evaluators/vision_evaluator.py +0 -564
- isa_model/eval/example_evaluation.py +0 -395
- isa_model/eval/factory.py +0 -798
- isa_model/eval/infrastructure/__init__.py +0 -24
- isa_model/eval/infrastructure/experiment_tracker.py +0 -466
- isa_model/eval/isa_benchmarks.py +0 -700
- isa_model/eval/isa_integration.py +0 -582
- isa_model/eval/metrics.py +0 -951
- isa_model/eval/tests/unit/test_basic.py +0 -396
- isa_model/serving/api/routes/evaluations.py +0 -579
- isa_model/training/__init__.py +0 -168
- isa_model/training/annotation/annotation_schema.py +0 -47
- isa_model/training/annotation/processors/annotation_processor.py +0 -126
- isa_model/training/annotation/storage/dataset_manager.py +0 -131
- isa_model/training/annotation/storage/dataset_schema.py +0 -44
- isa_model/training/annotation/tests/test_annotation_flow.py +0 -109
- isa_model/training/annotation/tests/test_minio copy.py +0 -113
- isa_model/training/annotation/tests/test_minio_upload.py +0 -43
- isa_model/training/annotation/views/annotation_controller.py +0 -158
- isa_model/training/cloud/__init__.py +0 -22
- isa_model/training/cloud/job_orchestrator.py +0 -402
- isa_model/training/cloud/runpod_trainer.py +0 -454
- isa_model/training/cloud/storage_manager.py +0 -482
- isa_model/training/core/__init__.py +0 -26
- isa_model/training/core/config.py +0 -181
- isa_model/training/core/dataset.py +0 -222
- isa_model/training/core/trainer.py +0 -720
- isa_model/training/core/utils.py +0 -213
- isa_model/training/examples/intelligent_training_example.py +0 -281
- isa_model/training/factory.py +0 -424
- isa_model/training/intelligent/__init__.py +0 -25
- isa_model/training/intelligent/decision_engine.py +0 -643
- isa_model/training/intelligent/intelligent_factory.py +0 -888
- isa_model/training/intelligent/knowledge_base.py +0 -751
- isa_model/training/intelligent/resource_optimizer.py +0 -839
- isa_model/training/intelligent/task_classifier.py +0 -576
- isa_model/training/storage/__init__.py +0 -24
- isa_model/training/storage/core_integration.py +0 -439
- isa_model/training/storage/training_repository.py +0 -552
- isa_model/training/storage/training_storage.py +0 -628
- isa_model-0.4.0.dist-info/RECORD +0 -182
- /isa_model/deployment/{cloud/modal → modal/services/audio}/isa_audio_chatTTS_service.py +0 -0
- /isa_model/deployment/{cloud/modal → modal/services/audio}/isa_audio_fish_service.py +0 -0
- /isa_model/deployment/{cloud/modal → modal/services/audio}/isa_audio_openvoice_service.py +0 -0
- /isa_model/deployment/{cloud/modal → modal/services/audio}/isa_audio_service_v2.py +0 -0
- /isa_model/deployment/{cloud/modal → modal/services/embedding}/isa_embed_rerank_service.py +0 -0
- /isa_model/deployment/{cloud/modal → modal/services/video}/isa_video_hunyuan_service.py +0 -0
- /isa_model/deployment/{cloud/modal → modal/services/vision}/isa_vision_ocr_service.py +0 -0
- /isa_model/deployment/{cloud/modal → modal/services/vision}/isa_vision_qwen25_service.py +0 -0
- /isa_model/deployment/{cloud/modal → modal/services/vision}/isa_vision_table_service.py +0 -0
- /isa_model/deployment/{cloud/modal → modal/services/vision}/isa_vision_ui_service.py +0 -0
- /isa_model/deployment/{cloud/modal → modal/services/vision}/isa_vision_ui_service_optimized.py +0 -0
- /isa_model/deployment/{services → modal/services/vision}/simple_auto_deploy_vision_service.py +0 -0
- {isa_model-0.4.0.dist-info → isa_model-0.4.4.dist-info}/WHEEL +0 -0
- {isa_model-0.4.0.dist-info → isa_model-0.4.4.dist-info}/top_level.txt +0 -0
isa_model/deployment/models/test-llm-service-llm-20250825-204442/test-llm-service_modal_service.py
ADDED
@@ -0,0 +1,48 @@
|
|
1
|
+
"""
|
2
|
+
test-llm-service LLM Service for Modal
|
3
|
+
|
4
|
+
Auto-generated service for model: gpt2
|
5
|
+
Architecture: gpt
|
6
|
+
"""
|
7
|
+
|
8
|
+
import modal
|
9
|
+
from typing import Dict, Any, List
|
10
|
+
|
11
|
+
app = modal.App("test-llm-service")
|
12
|
+
|
13
|
+
image = modal.Image.debian_slim().pip_install(
|
14
|
+
"torch>=2.0.0", "httpx>=0.26.0", "transformers>=4.35.0", "requests>=2.31.0", "pydantic>=2.0.0", "numpy>=1.24.0", "accelerate>=0.24.0"
|
15
|
+
)
|
16
|
+
|
17
|
+
@app.cls(
|
18
|
+
image=image,
|
19
|
+
gpu=modal.gpu.A10G(count=1),
|
20
|
+
container_idle_timeout=300,
|
21
|
+
memory=32768
|
22
|
+
)
|
23
|
+
class Test_Llm_ServiceService:
|
24
|
+
|
25
|
+
@modal.enter()
|
26
|
+
def load_model(self):
|
27
|
+
import torch
|
28
|
+
from transformers import AutoTokenizer, AutoModelForCausalLM
|
29
|
+
|
30
|
+
self.tokenizer = AutoTokenizer.from_pretrained("gpt2")
|
31
|
+
self.model = AutoModelForCausalLM.from_pretrained(
|
32
|
+
"gpt2",
|
33
|
+
torch_dtype=torch.float16,
|
34
|
+
device_map="auto",
|
35
|
+
trust_remote_code=True
|
36
|
+
)
|
37
|
+
|
38
|
+
@modal.method()
|
39
|
+
def generate(self, messages: List[Dict[str, str]], **kwargs):
|
40
|
+
# Generate response (simplified)
|
41
|
+
prompt = messages[-1]["content"] if messages else ""
|
42
|
+
return {"response": f"Generated response for: {prompt}", "model": "gpt2"}
|
43
|
+
|
44
|
+
@app.function(image=image)
|
45
|
+
@modal.web_endpoint(method="POST")
|
46
|
+
def inference_endpoint(item: Dict[str, Any]):
|
47
|
+
service = Test_Llm_ServiceService()
|
48
|
+
return service.generate(**item)
|
@@ -0,0 +1,48 @@
|
|
1
|
+
"""
|
2
|
+
test-monitoring-gpt2 LLM Service for Modal
|
3
|
+
|
4
|
+
Auto-generated service for model: gpt2
|
5
|
+
Architecture: gpt
|
6
|
+
"""
|
7
|
+
|
8
|
+
import modal
|
9
|
+
from typing import Dict, Any, List
|
10
|
+
|
11
|
+
app = modal.App("test-monitoring-gpt2")
|
12
|
+
|
13
|
+
image = modal.Image.debian_slim().pip_install(
|
14
|
+
"numpy>=1.24.0", "requests>=2.31.0", "accelerate>=0.24.0", "httpx>=0.26.0", "pydantic>=2.0.0", "transformers>=4.35.0", "torch>=2.0.0"
|
15
|
+
)
|
16
|
+
|
17
|
+
@app.cls(
|
18
|
+
image=image,
|
19
|
+
gpu=modal.gpu.A10G(count=1),
|
20
|
+
container_idle_timeout=300,
|
21
|
+
memory=32768
|
22
|
+
)
|
23
|
+
class Test_Monitoring_Gpt2Service:
|
24
|
+
|
25
|
+
@modal.enter()
|
26
|
+
def load_model(self):
|
27
|
+
import torch
|
28
|
+
from transformers import AutoTokenizer, AutoModelForCausalLM
|
29
|
+
|
30
|
+
self.tokenizer = AutoTokenizer.from_pretrained("gpt2")
|
31
|
+
self.model = AutoModelForCausalLM.from_pretrained(
|
32
|
+
"gpt2",
|
33
|
+
torch_dtype=torch.float16,
|
34
|
+
device_map="auto",
|
35
|
+
trust_remote_code=True
|
36
|
+
)
|
37
|
+
|
38
|
+
@modal.method()
|
39
|
+
def generate(self, messages: List[Dict[str, str]], **kwargs):
|
40
|
+
# Generate response (simplified)
|
41
|
+
prompt = messages[-1]["content"] if messages else ""
|
42
|
+
return {"response": f"Generated response for: {prompt}", "model": "gpt2"}
|
43
|
+
|
44
|
+
@app.function(image=image)
|
45
|
+
@modal.web_endpoint(method="POST")
|
46
|
+
def inference_endpoint(item: Dict[str, Any]):
|
47
|
+
service = Test_Monitoring_Gpt2Service()
|
48
|
+
return service.generate(**item)
|
@@ -0,0 +1,48 @@
|
|
1
|
+
"""
|
2
|
+
test-monitoring-gpt2 LLM Service for Modal
|
3
|
+
|
4
|
+
Auto-generated service for model: gpt2
|
5
|
+
Architecture: gpt
|
6
|
+
"""
|
7
|
+
|
8
|
+
import modal
|
9
|
+
from typing import Dict, Any, List
|
10
|
+
|
11
|
+
app = modal.App("test-monitoring-gpt2")
|
12
|
+
|
13
|
+
image = modal.Image.debian_slim().pip_install(
|
14
|
+
"transformers>=4.35.0", "torch>=2.0.0", "accelerate>=0.24.0", "httpx>=0.26.0", "numpy>=1.24.0", "requests>=2.31.0", "pydantic>=2.0.0"
|
15
|
+
)
|
16
|
+
|
17
|
+
@app.cls(
|
18
|
+
image=image,
|
19
|
+
gpu=modal.gpu.A10G(count=1),
|
20
|
+
container_idle_timeout=300,
|
21
|
+
memory=32768
|
22
|
+
)
|
23
|
+
class Test_Monitoring_Gpt2Service:
|
24
|
+
|
25
|
+
@modal.enter()
|
26
|
+
def load_model(self):
|
27
|
+
import torch
|
28
|
+
from transformers import AutoTokenizer, AutoModelForCausalLM
|
29
|
+
|
30
|
+
self.tokenizer = AutoTokenizer.from_pretrained("gpt2")
|
31
|
+
self.model = AutoModelForCausalLM.from_pretrained(
|
32
|
+
"gpt2",
|
33
|
+
torch_dtype=torch.float16,
|
34
|
+
device_map="auto",
|
35
|
+
trust_remote_code=True
|
36
|
+
)
|
37
|
+
|
38
|
+
@modal.method()
|
39
|
+
def generate(self, messages: List[Dict[str, str]], **kwargs):
|
40
|
+
# Generate response (simplified)
|
41
|
+
prompt = messages[-1]["content"] if messages else ""
|
42
|
+
return {"response": f"Generated response for: {prompt}", "model": "gpt2"}
|
43
|
+
|
44
|
+
@app.function(image=image)
|
45
|
+
@modal.web_endpoint(method="POST")
|
46
|
+
def inference_endpoint(item: Dict[str, Any]):
|
47
|
+
service = Test_Monitoring_Gpt2Service()
|
48
|
+
return service.generate(**item)
|