isa-model 0.3.91__py3-none-any.whl → 0.4.3__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- isa_model/client.py +1166 -584
- isa_model/core/cache/redis_cache.py +410 -0
- isa_model/core/config/config_manager.py +282 -12
- isa_model/core/config.py +91 -1
- isa_model/core/database/__init__.py +1 -0
- isa_model/core/database/direct_db_client.py +114 -0
- isa_model/core/database/migration_manager.py +563 -0
- isa_model/core/database/migrations.py +297 -0
- isa_model/core/database/supabase_client.py +258 -0
- isa_model/core/dependencies.py +316 -0
- isa_model/core/discovery/__init__.py +19 -0
- isa_model/core/discovery/consul_discovery.py +190 -0
- isa_model/core/logging/__init__.py +54 -0
- isa_model/core/logging/influx_logger.py +523 -0
- isa_model/core/logging/loki_logger.py +160 -0
- isa_model/core/models/__init__.py +46 -0
- isa_model/core/models/config_models.py +625 -0
- isa_model/core/models/deployment_billing_tracker.py +430 -0
- isa_model/core/models/model_billing_tracker.py +60 -88
- isa_model/core/models/model_manager.py +66 -25
- isa_model/core/models/model_metadata.py +690 -0
- isa_model/core/models/model_repo.py +217 -55
- isa_model/core/models/model_statistics_tracker.py +234 -0
- isa_model/core/models/model_storage.py +0 -1
- isa_model/core/models/model_version_manager.py +959 -0
- isa_model/core/models/system_models.py +857 -0
- isa_model/core/pricing_manager.py +2 -249
- isa_model/core/repositories/__init__.py +9 -0
- isa_model/core/repositories/config_repository.py +912 -0
- isa_model/core/resilience/circuit_breaker.py +366 -0
- isa_model/core/security/secrets.py +358 -0
- isa_model/core/services/__init__.py +2 -4
- isa_model/core/services/intelligent_model_selector.py +479 -370
- isa_model/core/storage/hf_storage.py +2 -2
- isa_model/core/types.py +8 -0
- isa_model/deployment/__init__.py +5 -48
- isa_model/deployment/core/__init__.py +2 -31
- isa_model/deployment/core/deployment_manager.py +1278 -368
- isa_model/deployment/local/__init__.py +31 -0
- isa_model/deployment/local/config.py +248 -0
- isa_model/deployment/local/gpu_gateway.py +607 -0
- isa_model/deployment/local/health_checker.py +428 -0
- isa_model/deployment/local/provider.py +586 -0
- isa_model/deployment/local/tensorrt_service.py +621 -0
- isa_model/deployment/local/transformers_service.py +644 -0
- isa_model/deployment/local/vllm_service.py +527 -0
- isa_model/deployment/modal/__init__.py +8 -0
- isa_model/deployment/modal/config.py +136 -0
- isa_model/deployment/modal/deployer.py +894 -0
- isa_model/deployment/modal/services/__init__.py +3 -0
- isa_model/deployment/modal/services/audio/__init__.py +1 -0
- isa_model/deployment/modal/services/audio/isa_audio_chatTTS_service.py +520 -0
- isa_model/deployment/modal/services/audio/isa_audio_openvoice_service.py +758 -0
- isa_model/deployment/modal/services/audio/isa_audio_service_v2.py +1044 -0
- isa_model/deployment/modal/services/embedding/__init__.py +1 -0
- isa_model/deployment/modal/services/embedding/isa_embed_rerank_service.py +296 -0
- isa_model/deployment/modal/services/llm/__init__.py +1 -0
- isa_model/deployment/modal/services/llm/isa_llm_service.py +424 -0
- isa_model/deployment/modal/services/video/__init__.py +1 -0
- isa_model/deployment/modal/services/video/isa_video_hunyuan_service.py +423 -0
- isa_model/deployment/modal/services/vision/__init__.py +1 -0
- isa_model/deployment/modal/services/vision/isa_vision_ocr_service.py +519 -0
- isa_model/deployment/modal/services/vision/isa_vision_qwen25_service.py +709 -0
- isa_model/deployment/modal/services/vision/isa_vision_table_service.py +676 -0
- isa_model/deployment/modal/services/vision/isa_vision_ui_service.py +833 -0
- isa_model/deployment/modal/services/vision/isa_vision_ui_service_optimized.py +660 -0
- isa_model/deployment/models/org-org-acme-corp-tenant-a-service-llm-20250825-225822/tenant-a-service_modal_service.py +48 -0
- isa_model/deployment/models/org-test-org-123-prefix-test-service-llm-20250825-225822/prefix-test-service_modal_service.py +48 -0
- isa_model/deployment/models/test-llm-service-llm-20250825-204442/test-llm-service_modal_service.py +48 -0
- isa_model/deployment/models/test-monitoring-gpt2-llm-20250825-212906/test-monitoring-gpt2_modal_service.py +48 -0
- isa_model/deployment/models/test-monitoring-gpt2-llm-20250825-213009/test-monitoring-gpt2_modal_service.py +48 -0
- isa_model/deployment/storage/__init__.py +5 -0
- isa_model/deployment/storage/deployment_repository.py +824 -0
- isa_model/deployment/triton/__init__.py +10 -0
- isa_model/deployment/triton/config.py +196 -0
- isa_model/deployment/triton/configs/__init__.py +1 -0
- isa_model/deployment/triton/provider.py +512 -0
- isa_model/deployment/triton/scripts/__init__.py +1 -0
- isa_model/deployment/triton/templates/__init__.py +1 -0
- isa_model/inference/__init__.py +47 -1
- isa_model/inference/ai_factory.py +179 -16
- isa_model/inference/legacy_services/__init__.py +21 -0
- isa_model/inference/legacy_services/model_evaluation.py +637 -0
- isa_model/inference/legacy_services/model_service.py +573 -0
- isa_model/inference/legacy_services/model_serving.py +717 -0
- isa_model/inference/legacy_services/model_training.py +561 -0
- isa_model/inference/models/__init__.py +21 -0
- isa_model/inference/models/inference_config.py +551 -0
- isa_model/inference/models/inference_record.py +675 -0
- isa_model/inference/models/performance_models.py +714 -0
- isa_model/inference/repositories/__init__.py +9 -0
- isa_model/inference/repositories/inference_repository.py +828 -0
- isa_model/inference/services/audio/__init__.py +21 -0
- isa_model/inference/services/audio/base_realtime_service.py +225 -0
- isa_model/inference/services/audio/base_stt_service.py +184 -11
- isa_model/inference/services/audio/isa_tts_service.py +0 -0
- isa_model/inference/services/audio/openai_realtime_service.py +320 -124
- isa_model/inference/services/audio/openai_stt_service.py +53 -11
- isa_model/inference/services/base_service.py +17 -1
- isa_model/inference/services/custom_model_manager.py +277 -0
- isa_model/inference/services/embedding/__init__.py +13 -0
- isa_model/inference/services/embedding/base_embed_service.py +111 -8
- isa_model/inference/services/embedding/isa_embed_service.py +305 -0
- isa_model/inference/services/embedding/ollama_embed_service.py +15 -3
- isa_model/inference/services/embedding/openai_embed_service.py +2 -4
- isa_model/inference/services/embedding/resilient_embed_service.py +285 -0
- isa_model/inference/services/embedding/tests/test_embedding.py +222 -0
- isa_model/inference/services/img/__init__.py +2 -2
- isa_model/inference/services/img/base_image_gen_service.py +24 -7
- isa_model/inference/services/img/replicate_image_gen_service.py +84 -422
- isa_model/inference/services/img/services/replicate_face_swap.py +193 -0
- isa_model/inference/services/img/services/replicate_flux.py +226 -0
- isa_model/inference/services/img/services/replicate_flux_kontext.py +219 -0
- isa_model/inference/services/img/services/replicate_sticker_maker.py +249 -0
- isa_model/inference/services/img/tests/test_img_client.py +297 -0
- isa_model/inference/services/llm/__init__.py +10 -2
- isa_model/inference/services/llm/base_llm_service.py +361 -26
- isa_model/inference/services/llm/cerebras_llm_service.py +628 -0
- isa_model/inference/services/llm/helpers/llm_adapter.py +71 -12
- isa_model/inference/services/llm/helpers/llm_prompts.py +342 -0
- isa_model/inference/services/llm/helpers/llm_utils.py +321 -23
- isa_model/inference/services/llm/huggingface_llm_service.py +581 -0
- isa_model/inference/services/llm/local_llm_service.py +747 -0
- isa_model/inference/services/llm/ollama_llm_service.py +11 -3
- isa_model/inference/services/llm/openai_llm_service.py +670 -56
- isa_model/inference/services/llm/yyds_llm_service.py +10 -3
- isa_model/inference/services/vision/__init__.py +27 -6
- isa_model/inference/services/vision/base_vision_service.py +118 -185
- isa_model/inference/services/vision/blip_vision_service.py +359 -0
- isa_model/inference/services/vision/helpers/image_utils.py +19 -10
- isa_model/inference/services/vision/isa_vision_service.py +634 -0
- isa_model/inference/services/vision/openai_vision_service.py +19 -10
- isa_model/inference/services/vision/tests/test_ocr_client.py +284 -0
- isa_model/inference/services/vision/vgg16_vision_service.py +257 -0
- isa_model/serving/api/cache_manager.py +245 -0
- isa_model/serving/api/dependencies/__init__.py +1 -0
- isa_model/serving/api/dependencies/auth.py +194 -0
- isa_model/serving/api/dependencies/database.py +139 -0
- isa_model/serving/api/error_handlers.py +284 -0
- isa_model/serving/api/fastapi_server.py +240 -18
- isa_model/serving/api/middleware/auth.py +317 -0
- isa_model/serving/api/middleware/security.py +268 -0
- isa_model/serving/api/middleware/tenant_context.py +414 -0
- isa_model/serving/api/routes/analytics.py +489 -0
- isa_model/serving/api/routes/config.py +645 -0
- isa_model/serving/api/routes/deployment_billing.py +315 -0
- isa_model/serving/api/routes/deployments.py +475 -0
- isa_model/serving/api/routes/gpu_gateway.py +440 -0
- isa_model/serving/api/routes/health.py +32 -12
- isa_model/serving/api/routes/inference_monitoring.py +486 -0
- isa_model/serving/api/routes/local_deployments.py +448 -0
- isa_model/serving/api/routes/logs.py +430 -0
- isa_model/serving/api/routes/settings.py +582 -0
- isa_model/serving/api/routes/tenants.py +575 -0
- isa_model/serving/api/routes/unified.py +992 -171
- isa_model/serving/api/routes/webhooks.py +479 -0
- isa_model/serving/api/startup.py +318 -0
- isa_model/serving/modal_proxy_server.py +249 -0
- isa_model/utils/gpu_utils.py +311 -0
- {isa_model-0.3.91.dist-info → isa_model-0.4.3.dist-info}/METADATA +76 -22
- isa_model-0.4.3.dist-info/RECORD +193 -0
- isa_model/deployment/cloud/__init__.py +0 -9
- isa_model/deployment/cloud/modal/__init__.py +0 -10
- isa_model/deployment/cloud/modal/isa_vision_doc_service.py +0 -766
- isa_model/deployment/cloud/modal/isa_vision_table_service.py +0 -532
- isa_model/deployment/cloud/modal/isa_vision_ui_service.py +0 -406
- isa_model/deployment/cloud/modal/register_models.py +0 -321
- isa_model/deployment/core/deployment_config.py +0 -356
- isa_model/deployment/core/isa_deployment_service.py +0 -401
- isa_model/deployment/gpu_int8_ds8/app/server.py +0 -66
- isa_model/deployment/gpu_int8_ds8/scripts/test_client.py +0 -43
- isa_model/deployment/gpu_int8_ds8/scripts/test_client_os.py +0 -35
- isa_model/deployment/runtime/deployed_service.py +0 -338
- isa_model/deployment/services/__init__.py +0 -9
- isa_model/deployment/services/auto_deploy_vision_service.py +0 -538
- isa_model/deployment/services/model_service.py +0 -332
- isa_model/deployment/services/service_monitor.py +0 -356
- isa_model/deployment/services/service_registry.py +0 -527
- isa_model/eval/__init__.py +0 -92
- isa_model/eval/benchmarks.py +0 -469
- isa_model/eval/config/__init__.py +0 -10
- isa_model/eval/config/evaluation_config.py +0 -108
- isa_model/eval/evaluators/__init__.py +0 -18
- isa_model/eval/evaluators/base_evaluator.py +0 -503
- isa_model/eval/evaluators/llm_evaluator.py +0 -472
- isa_model/eval/factory.py +0 -531
- isa_model/eval/infrastructure/__init__.py +0 -24
- isa_model/eval/infrastructure/experiment_tracker.py +0 -466
- isa_model/eval/metrics.py +0 -798
- isa_model/inference/adapter/unified_api.py +0 -248
- isa_model/inference/services/helpers/stacked_config.py +0 -148
- isa_model/inference/services/img/flux_professional_service.py +0 -603
- isa_model/inference/services/img/helpers/base_stacked_service.py +0 -274
- isa_model/inference/services/others/table_transformer_service.py +0 -61
- isa_model/inference/services/vision/doc_analysis_service.py +0 -640
- isa_model/inference/services/vision/helpers/base_stacked_service.py +0 -274
- isa_model/inference/services/vision/ui_analysis_service.py +0 -823
- isa_model/scripts/inference_tracker.py +0 -283
- isa_model/scripts/mlflow_manager.py +0 -379
- isa_model/scripts/model_registry.py +0 -465
- isa_model/scripts/register_models.py +0 -370
- isa_model/scripts/register_models_with_embeddings.py +0 -510
- isa_model/scripts/start_mlflow.py +0 -95
- isa_model/scripts/training_tracker.py +0 -257
- isa_model/training/__init__.py +0 -74
- isa_model/training/annotation/annotation_schema.py +0 -47
- isa_model/training/annotation/processors/annotation_processor.py +0 -126
- isa_model/training/annotation/storage/dataset_manager.py +0 -131
- isa_model/training/annotation/storage/dataset_schema.py +0 -44
- isa_model/training/annotation/tests/test_annotation_flow.py +0 -109
- isa_model/training/annotation/tests/test_minio copy.py +0 -113
- isa_model/training/annotation/tests/test_minio_upload.py +0 -43
- isa_model/training/annotation/views/annotation_controller.py +0 -158
- isa_model/training/cloud/__init__.py +0 -22
- isa_model/training/cloud/job_orchestrator.py +0 -402
- isa_model/training/cloud/runpod_trainer.py +0 -454
- isa_model/training/cloud/storage_manager.py +0 -482
- isa_model/training/core/__init__.py +0 -23
- isa_model/training/core/config.py +0 -181
- isa_model/training/core/dataset.py +0 -222
- isa_model/training/core/trainer.py +0 -720
- isa_model/training/core/utils.py +0 -213
- isa_model/training/factory.py +0 -424
- isa_model-0.3.91.dist-info/RECORD +0 -138
- /isa_model/{core/storage/minio_storage.py → deployment/modal/services/audio/isa_audio_fish_service.py} +0 -0
- /isa_model/deployment/{services → modal/services/vision}/simple_auto_deploy_vision_service.py +0 -0
- {isa_model-0.3.91.dist-info → isa_model-0.4.3.dist-info}/WHEEL +0 -0
- {isa_model-0.3.91.dist-info → isa_model-0.4.3.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,410 @@
|
|
1
|
+
"""
|
2
|
+
Redis-based Caching Strategy for ISA Model
|
3
|
+
|
4
|
+
Provides multi-level caching for:
|
5
|
+
- Model responses and completions
|
6
|
+
- Model metadata and configurations
|
7
|
+
- User sessions and authentication
|
8
|
+
- Rate limiting data
|
9
|
+
"""
|
10
|
+
|
11
|
+
import json
|
12
|
+
import hashlib
|
13
|
+
import pickle
|
14
|
+
import asyncio
|
15
|
+
import time
|
16
|
+
import logging
|
17
|
+
import os
|
18
|
+
from typing import Any, Dict, Optional, Union, List, Callable
|
19
|
+
from dataclasses import dataclass
|
20
|
+
import redis.asyncio as redis
|
21
|
+
import structlog
|
22
|
+
from functools import wraps
|
23
|
+
|
24
|
+
from ..config.config_manager import ConfigManager
|
25
|
+
|
26
|
+
logger = structlog.get_logger(__name__)
|
27
|
+
|
28
|
+
@dataclass
|
29
|
+
class CacheConfig:
|
30
|
+
"""Configuration for Redis cache"""
|
31
|
+
redis_url: str = None
|
32
|
+
|
33
|
+
def __post_init__(self):
|
34
|
+
if self.redis_url is None:
|
35
|
+
config_manager = ConfigManager()
|
36
|
+
# Use Consul discovery for Redis URL with fallback
|
37
|
+
self.redis_url = config_manager.get_redis_url()
|
38
|
+
default_ttl: int = 3600 # 1 hour
|
39
|
+
model_cache_ttl: int = 3600 # 1 hour for model responses
|
40
|
+
config_cache_ttl: int = 7200 # 2 hours for configurations
|
41
|
+
session_cache_ttl: int = 86400 # 24 hours for sessions
|
42
|
+
rate_limit_ttl: int = 3600 # 1 hour for rate limiting
|
43
|
+
max_key_length: int = 250
|
44
|
+
compression_enabled: bool = True
|
45
|
+
serialization_method: str = "json" # "json" or "pickle"
|
46
|
+
|
47
|
+
class RedisCache:
|
48
|
+
"""Redis-based cache with advanced features"""
|
49
|
+
|
50
|
+
def __init__(self, config: CacheConfig):
|
51
|
+
self.config = config
|
52
|
+
self.redis_client = None
|
53
|
+
self._connected = False
|
54
|
+
self._stats = {
|
55
|
+
"hits": 0,
|
56
|
+
"misses": 0,
|
57
|
+
"sets": 0,
|
58
|
+
"deletes": 0,
|
59
|
+
"errors": 0
|
60
|
+
}
|
61
|
+
|
62
|
+
async def connect(self):
|
63
|
+
"""Connect to Redis"""
|
64
|
+
try:
|
65
|
+
self.redis_client = redis.from_url(
|
66
|
+
self.config.redis_url,
|
67
|
+
decode_responses=False, # Handle binary data
|
68
|
+
retry_on_timeout=True,
|
69
|
+
health_check_interval=30
|
70
|
+
)
|
71
|
+
|
72
|
+
# Test connection
|
73
|
+
await self.redis_client.ping()
|
74
|
+
self._connected = True
|
75
|
+
|
76
|
+
logger.info("Redis cache connected", url=self.config.redis_url)
|
77
|
+
|
78
|
+
except Exception as e:
|
79
|
+
logger.error("Failed to connect to Redis", error=str(e))
|
80
|
+
self._connected = False
|
81
|
+
raise
|
82
|
+
|
83
|
+
async def disconnect(self):
|
84
|
+
"""Disconnect from Redis"""
|
85
|
+
if self.redis_client:
|
86
|
+
await self.redis_client.close()
|
87
|
+
self._connected = False
|
88
|
+
logger.info("Redis cache disconnected")
|
89
|
+
|
90
|
+
def _generate_key(self, namespace: str, key: str) -> str:
|
91
|
+
"""Generate a cache key with namespace"""
|
92
|
+
full_key = f"isa_model:{namespace}:{key}"
|
93
|
+
|
94
|
+
# Hash long keys to avoid Redis key length limits
|
95
|
+
if len(full_key) > self.config.max_key_length:
|
96
|
+
hash_suffix = hashlib.md5(full_key.encode()).hexdigest()[:8]
|
97
|
+
full_key = f"isa_model:{namespace}:hash_{hash_suffix}"
|
98
|
+
|
99
|
+
return full_key
|
100
|
+
|
101
|
+
def _serialize_value(self, value: Any) -> bytes:
|
102
|
+
"""Serialize value for storage"""
|
103
|
+
try:
|
104
|
+
if self.config.serialization_method == "pickle":
|
105
|
+
return pickle.dumps(value)
|
106
|
+
else:
|
107
|
+
# JSON serialization
|
108
|
+
json_str = json.dumps(value, default=str, ensure_ascii=False)
|
109
|
+
return json_str.encode('utf-8')
|
110
|
+
except Exception as e:
|
111
|
+
logger.error("Serialization failed", error=str(e))
|
112
|
+
raise
|
113
|
+
|
114
|
+
def _deserialize_value(self, data: bytes) -> Any:
|
115
|
+
"""Deserialize value from storage"""
|
116
|
+
try:
|
117
|
+
if self.config.serialization_method == "pickle":
|
118
|
+
return pickle.loads(data)
|
119
|
+
else:
|
120
|
+
# JSON deserialization
|
121
|
+
json_str = data.decode('utf-8')
|
122
|
+
return json.loads(json_str)
|
123
|
+
except Exception as e:
|
124
|
+
logger.error("Deserialization failed", error=str(e))
|
125
|
+
raise
|
126
|
+
|
127
|
+
async def get(self, namespace: str, key: str) -> Optional[Any]:
|
128
|
+
"""Get value from cache"""
|
129
|
+
if not self._connected:
|
130
|
+
return None
|
131
|
+
|
132
|
+
try:
|
133
|
+
cache_key = self._generate_key(namespace, key)
|
134
|
+
data = await self.redis_client.get(cache_key)
|
135
|
+
|
136
|
+
if data is None:
|
137
|
+
self._stats["misses"] += 1
|
138
|
+
return None
|
139
|
+
|
140
|
+
value = self._deserialize_value(data)
|
141
|
+
self._stats["hits"] += 1
|
142
|
+
|
143
|
+
logger.debug("Cache hit", namespace=namespace, key=key)
|
144
|
+
return value
|
145
|
+
|
146
|
+
except Exception as e:
|
147
|
+
self._stats["errors"] += 1
|
148
|
+
logger.error("Cache get failed", namespace=namespace, key=key, error=str(e))
|
149
|
+
return None
|
150
|
+
|
151
|
+
async def set(
|
152
|
+
self,
|
153
|
+
namespace: str,
|
154
|
+
key: str,
|
155
|
+
value: Any,
|
156
|
+
ttl: Optional[int] = None
|
157
|
+
) -> bool:
|
158
|
+
"""Set value in cache"""
|
159
|
+
if not self._connected:
|
160
|
+
return False
|
161
|
+
|
162
|
+
try:
|
163
|
+
cache_key = self._generate_key(namespace, key)
|
164
|
+
serialized_value = self._serialize_value(value)
|
165
|
+
|
166
|
+
# Use namespace-specific TTL if not provided
|
167
|
+
if ttl is None:
|
168
|
+
ttl = self._get_namespace_ttl(namespace)
|
169
|
+
|
170
|
+
await self.redis_client.setex(cache_key, ttl, serialized_value)
|
171
|
+
self._stats["sets"] += 1
|
172
|
+
|
173
|
+
logger.debug("Cache set", namespace=namespace, key=key, ttl=ttl)
|
174
|
+
return True
|
175
|
+
|
176
|
+
except Exception as e:
|
177
|
+
self._stats["errors"] += 1
|
178
|
+
logger.error("Cache set failed", namespace=namespace, key=key, error=str(e))
|
179
|
+
return False
|
180
|
+
|
181
|
+
async def delete(self, namespace: str, key: str) -> bool:
|
182
|
+
"""Delete value from cache"""
|
183
|
+
if not self._connected:
|
184
|
+
return False
|
185
|
+
|
186
|
+
try:
|
187
|
+
cache_key = self._generate_key(namespace, key)
|
188
|
+
result = await self.redis_client.delete(cache_key)
|
189
|
+
self._stats["deletes"] += 1
|
190
|
+
|
191
|
+
logger.debug("Cache delete", namespace=namespace, key=key, existed=bool(result))
|
192
|
+
return bool(result)
|
193
|
+
|
194
|
+
except Exception as e:
|
195
|
+
self._stats["errors"] += 1
|
196
|
+
logger.error("Cache delete failed", namespace=namespace, key=key, error=str(e))
|
197
|
+
return False
|
198
|
+
|
199
|
+
async def exists(self, namespace: str, key: str) -> bool:
|
200
|
+
"""Check if key exists in cache"""
|
201
|
+
if not self._connected:
|
202
|
+
return False
|
203
|
+
|
204
|
+
try:
|
205
|
+
cache_key = self._generate_key(namespace, key)
|
206
|
+
return bool(await self.redis_client.exists(cache_key))
|
207
|
+
except Exception as e:
|
208
|
+
logger.error("Cache exists check failed", error=str(e))
|
209
|
+
return False
|
210
|
+
|
211
|
+
async def increment(self, namespace: str, key: str, amount: int = 1, ttl: Optional[int] = None) -> Optional[int]:
|
212
|
+
"""Increment a counter in cache"""
|
213
|
+
if not self._connected:
|
214
|
+
return None
|
215
|
+
|
216
|
+
try:
|
217
|
+
cache_key = self._generate_key(namespace, key)
|
218
|
+
|
219
|
+
# Use pipeline for atomic operations
|
220
|
+
pipe = self.redis_client.pipeline()
|
221
|
+
pipe.incrby(cache_key, amount)
|
222
|
+
|
223
|
+
if ttl:
|
224
|
+
pipe.expire(cache_key, ttl)
|
225
|
+
|
226
|
+
results = await pipe.execute()
|
227
|
+
return results[0]
|
228
|
+
|
229
|
+
except Exception as e:
|
230
|
+
logger.error("Cache increment failed", error=str(e))
|
231
|
+
return None
|
232
|
+
|
233
|
+
async def clear_namespace(self, namespace: str) -> int:
|
234
|
+
"""Clear all keys in a namespace"""
|
235
|
+
if not self._connected:
|
236
|
+
return 0
|
237
|
+
|
238
|
+
try:
|
239
|
+
pattern = f"isa_model:{namespace}:*"
|
240
|
+
keys = []
|
241
|
+
|
242
|
+
# Use SCAN to avoid blocking Redis
|
243
|
+
async for key in self.redis_client.scan_iter(pattern):
|
244
|
+
keys.append(key)
|
245
|
+
|
246
|
+
if keys:
|
247
|
+
deleted = await self.redis_client.delete(*keys)
|
248
|
+
logger.info("Namespace cleared", namespace=namespace, deleted_keys=deleted)
|
249
|
+
return deleted
|
250
|
+
|
251
|
+
return 0
|
252
|
+
|
253
|
+
except Exception as e:
|
254
|
+
logger.error("Cache namespace clear failed", namespace=namespace, error=str(e))
|
255
|
+
return 0
|
256
|
+
|
257
|
+
def _get_namespace_ttl(self, namespace: str) -> int:
|
258
|
+
"""Get TTL for a specific namespace"""
|
259
|
+
ttl_mapping = {
|
260
|
+
"models": self.config.model_cache_ttl,
|
261
|
+
"config": self.config.config_cache_ttl,
|
262
|
+
"sessions": self.config.session_cache_ttl,
|
263
|
+
"rate_limit": self.config.rate_limit_ttl,
|
264
|
+
"responses": self.config.model_cache_ttl,
|
265
|
+
}
|
266
|
+
return ttl_mapping.get(namespace, self.config.default_ttl)
|
267
|
+
|
268
|
+
async def get_stats(self) -> Dict[str, Any]:
|
269
|
+
"""Get cache statistics"""
|
270
|
+
stats = dict(self._stats)
|
271
|
+
|
272
|
+
# Calculate hit rate
|
273
|
+
total_requests = stats["hits"] + stats["misses"]
|
274
|
+
stats["hit_rate"] = stats["hits"] / total_requests if total_requests > 0 else 0
|
275
|
+
stats["connected"] = self._connected
|
276
|
+
|
277
|
+
# Redis info if connected
|
278
|
+
if self._connected:
|
279
|
+
try:
|
280
|
+
redis_info = await self.redis_client.info()
|
281
|
+
stats["redis_info"] = {
|
282
|
+
"used_memory": redis_info.get("used_memory"),
|
283
|
+
"connected_clients": redis_info.get("connected_clients"),
|
284
|
+
"total_commands_processed": redis_info.get("total_commands_processed"),
|
285
|
+
"keyspace_hits": redis_info.get("keyspace_hits"),
|
286
|
+
"keyspace_misses": redis_info.get("keyspace_misses")
|
287
|
+
}
|
288
|
+
except Exception as e:
|
289
|
+
logger.error("Failed to get Redis info", error=str(e))
|
290
|
+
|
291
|
+
return stats
|
292
|
+
|
293
|
+
# Global cache instance
|
294
|
+
_cache: Optional[RedisCache] = None
|
295
|
+
|
296
|
+
async def get_cache() -> RedisCache:
|
297
|
+
"""Get the global cache instance"""
|
298
|
+
global _cache
|
299
|
+
|
300
|
+
if _cache is None:
|
301
|
+
config_manager = ConfigManager()
|
302
|
+
config = CacheConfig(
|
303
|
+
redis_url=os.getenv("REDIS_URL", config_manager.get_redis_url()),
|
304
|
+
default_ttl=int(os.getenv("CACHE_DEFAULT_TTL", "3600")),
|
305
|
+
model_cache_ttl=int(os.getenv("MODEL_CACHE_TTL", "3600")),
|
306
|
+
compression_enabled=os.getenv("CACHE_COMPRESSION", "true").lower() == "true"
|
307
|
+
)
|
308
|
+
_cache = RedisCache(config)
|
309
|
+
await _cache.connect()
|
310
|
+
|
311
|
+
return _cache
|
312
|
+
|
313
|
+
# Caching decorators
|
314
|
+
def cached_response(namespace: str = "responses", ttl: Optional[int] = None):
|
315
|
+
"""Decorator for caching function responses"""
|
316
|
+
|
317
|
+
def decorator(func: Callable):
|
318
|
+
@wraps(func)
|
319
|
+
async def wrapper(*args, **kwargs):
|
320
|
+
# Generate cache key from function name and arguments
|
321
|
+
cache_key = f"{func.__name__}:{hashlib.md5(str(args).encode() + str(kwargs).encode()).hexdigest()}"
|
322
|
+
|
323
|
+
cache = await get_cache()
|
324
|
+
|
325
|
+
# Try to get from cache first
|
326
|
+
cached_result = await cache.get(namespace, cache_key)
|
327
|
+
if cached_result is not None:
|
328
|
+
logger.debug("Function result served from cache", function=func.__name__)
|
329
|
+
return cached_result
|
330
|
+
|
331
|
+
# Execute function and cache result
|
332
|
+
if asyncio.iscoroutinefunction(func):
|
333
|
+
result = await func(*args, **kwargs)
|
334
|
+
else:
|
335
|
+
result = func(*args, **kwargs)
|
336
|
+
|
337
|
+
# Cache the result
|
338
|
+
await cache.set(namespace, cache_key, result, ttl)
|
339
|
+
logger.debug("Function result cached", function=func.__name__)
|
340
|
+
|
341
|
+
return result
|
342
|
+
|
343
|
+
return wrapper
|
344
|
+
return decorator
|
345
|
+
|
346
|
+
def cached_model_response(ttl: Optional[int] = None):
|
347
|
+
"""Decorator specifically for model responses"""
|
348
|
+
return cached_response(namespace="models", ttl=ttl)
|
349
|
+
|
350
|
+
# Specialized cache functions
|
351
|
+
async def cache_model_response(
|
352
|
+
model_id: str,
|
353
|
+
input_hash: str,
|
354
|
+
response: Any,
|
355
|
+
ttl: Optional[int] = None
|
356
|
+
):
|
357
|
+
"""Cache a model response"""
|
358
|
+
cache = await get_cache()
|
359
|
+
cache_key = f"{model_id}:{input_hash}"
|
360
|
+
await cache.set("models", cache_key, response, ttl)
|
361
|
+
|
362
|
+
async def get_cached_model_response(
|
363
|
+
model_id: str,
|
364
|
+
input_hash: str
|
365
|
+
) -> Optional[Any]:
|
366
|
+
"""Get cached model response"""
|
367
|
+
cache = await get_cache()
|
368
|
+
cache_key = f"{model_id}:{input_hash}"
|
369
|
+
return await cache.get("models", cache_key)
|
370
|
+
|
371
|
+
async def cache_user_session(user_id: str, session_data: Dict[str, Any]):
|
372
|
+
"""Cache user session data"""
|
373
|
+
cache = await get_cache()
|
374
|
+
await cache.set("sessions", user_id, session_data)
|
375
|
+
|
376
|
+
async def get_user_session(user_id: str) -> Optional[Dict[str, Any]]:
|
377
|
+
"""Get user session data"""
|
378
|
+
cache = await get_cache()
|
379
|
+
return await cache.get("sessions", user_id)
|
380
|
+
|
381
|
+
# Rate limiting cache functions
|
382
|
+
async def increment_rate_limit(key: str, window_seconds: int = 3600) -> int:
|
383
|
+
"""Increment rate limit counter"""
|
384
|
+
cache = await get_cache()
|
385
|
+
return await cache.increment("rate_limit", key, amount=1, ttl=window_seconds) or 0
|
386
|
+
|
387
|
+
async def get_rate_limit_count(key: str) -> int:
|
388
|
+
"""Get current rate limit count"""
|
389
|
+
cache = await get_cache()
|
390
|
+
count = await cache.get("rate_limit", key)
|
391
|
+
return count or 0
|
392
|
+
|
393
|
+
# Health check
|
394
|
+
async def check_cache_health() -> Dict[str, Any]:
|
395
|
+
"""Check cache health"""
|
396
|
+
try:
|
397
|
+
cache = await get_cache()
|
398
|
+
stats = await cache.get_stats()
|
399
|
+
|
400
|
+
return {
|
401
|
+
"cache": "redis",
|
402
|
+
"status": "healthy" if stats["connected"] else "disconnected",
|
403
|
+
"stats": stats
|
404
|
+
}
|
405
|
+
except Exception as e:
|
406
|
+
return {
|
407
|
+
"cache": "redis",
|
408
|
+
"status": "error",
|
409
|
+
"error": str(e)
|
410
|
+
}
|