isa-model 0.3.91__py3-none-any.whl → 0.4.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (228) hide show
  1. isa_model/client.py +1166 -584
  2. isa_model/core/cache/redis_cache.py +410 -0
  3. isa_model/core/config/config_manager.py +282 -12
  4. isa_model/core/config.py +91 -1
  5. isa_model/core/database/__init__.py +1 -0
  6. isa_model/core/database/direct_db_client.py +114 -0
  7. isa_model/core/database/migration_manager.py +563 -0
  8. isa_model/core/database/migrations.py +297 -0
  9. isa_model/core/database/supabase_client.py +258 -0
  10. isa_model/core/dependencies.py +316 -0
  11. isa_model/core/discovery/__init__.py +19 -0
  12. isa_model/core/discovery/consul_discovery.py +190 -0
  13. isa_model/core/logging/__init__.py +54 -0
  14. isa_model/core/logging/influx_logger.py +523 -0
  15. isa_model/core/logging/loki_logger.py +160 -0
  16. isa_model/core/models/__init__.py +46 -0
  17. isa_model/core/models/config_models.py +625 -0
  18. isa_model/core/models/deployment_billing_tracker.py +430 -0
  19. isa_model/core/models/model_billing_tracker.py +60 -88
  20. isa_model/core/models/model_manager.py +66 -25
  21. isa_model/core/models/model_metadata.py +690 -0
  22. isa_model/core/models/model_repo.py +217 -55
  23. isa_model/core/models/model_statistics_tracker.py +234 -0
  24. isa_model/core/models/model_storage.py +0 -1
  25. isa_model/core/models/model_version_manager.py +959 -0
  26. isa_model/core/models/system_models.py +857 -0
  27. isa_model/core/pricing_manager.py +2 -249
  28. isa_model/core/repositories/__init__.py +9 -0
  29. isa_model/core/repositories/config_repository.py +912 -0
  30. isa_model/core/resilience/circuit_breaker.py +366 -0
  31. isa_model/core/security/secrets.py +358 -0
  32. isa_model/core/services/__init__.py +2 -4
  33. isa_model/core/services/intelligent_model_selector.py +479 -370
  34. isa_model/core/storage/hf_storage.py +2 -2
  35. isa_model/core/types.py +8 -0
  36. isa_model/deployment/__init__.py +5 -48
  37. isa_model/deployment/core/__init__.py +2 -31
  38. isa_model/deployment/core/deployment_manager.py +1278 -368
  39. isa_model/deployment/local/__init__.py +31 -0
  40. isa_model/deployment/local/config.py +248 -0
  41. isa_model/deployment/local/gpu_gateway.py +607 -0
  42. isa_model/deployment/local/health_checker.py +428 -0
  43. isa_model/deployment/local/provider.py +586 -0
  44. isa_model/deployment/local/tensorrt_service.py +621 -0
  45. isa_model/deployment/local/transformers_service.py +644 -0
  46. isa_model/deployment/local/vllm_service.py +527 -0
  47. isa_model/deployment/modal/__init__.py +8 -0
  48. isa_model/deployment/modal/config.py +136 -0
  49. isa_model/deployment/modal/deployer.py +894 -0
  50. isa_model/deployment/modal/services/__init__.py +3 -0
  51. isa_model/deployment/modal/services/audio/__init__.py +1 -0
  52. isa_model/deployment/modal/services/audio/isa_audio_chatTTS_service.py +520 -0
  53. isa_model/deployment/modal/services/audio/isa_audio_openvoice_service.py +758 -0
  54. isa_model/deployment/modal/services/audio/isa_audio_service_v2.py +1044 -0
  55. isa_model/deployment/modal/services/embedding/__init__.py +1 -0
  56. isa_model/deployment/modal/services/embedding/isa_embed_rerank_service.py +296 -0
  57. isa_model/deployment/modal/services/llm/__init__.py +1 -0
  58. isa_model/deployment/modal/services/llm/isa_llm_service.py +424 -0
  59. isa_model/deployment/modal/services/video/__init__.py +1 -0
  60. isa_model/deployment/modal/services/video/isa_video_hunyuan_service.py +423 -0
  61. isa_model/deployment/modal/services/vision/__init__.py +1 -0
  62. isa_model/deployment/modal/services/vision/isa_vision_ocr_service.py +519 -0
  63. isa_model/deployment/modal/services/vision/isa_vision_qwen25_service.py +709 -0
  64. isa_model/deployment/modal/services/vision/isa_vision_table_service.py +676 -0
  65. isa_model/deployment/modal/services/vision/isa_vision_ui_service.py +833 -0
  66. isa_model/deployment/modal/services/vision/isa_vision_ui_service_optimized.py +660 -0
  67. isa_model/deployment/models/org-org-acme-corp-tenant-a-service-llm-20250825-225822/tenant-a-service_modal_service.py +48 -0
  68. isa_model/deployment/models/org-test-org-123-prefix-test-service-llm-20250825-225822/prefix-test-service_modal_service.py +48 -0
  69. isa_model/deployment/models/test-llm-service-llm-20250825-204442/test-llm-service_modal_service.py +48 -0
  70. isa_model/deployment/models/test-monitoring-gpt2-llm-20250825-212906/test-monitoring-gpt2_modal_service.py +48 -0
  71. isa_model/deployment/models/test-monitoring-gpt2-llm-20250825-213009/test-monitoring-gpt2_modal_service.py +48 -0
  72. isa_model/deployment/storage/__init__.py +5 -0
  73. isa_model/deployment/storage/deployment_repository.py +824 -0
  74. isa_model/deployment/triton/__init__.py +10 -0
  75. isa_model/deployment/triton/config.py +196 -0
  76. isa_model/deployment/triton/configs/__init__.py +1 -0
  77. isa_model/deployment/triton/provider.py +512 -0
  78. isa_model/deployment/triton/scripts/__init__.py +1 -0
  79. isa_model/deployment/triton/templates/__init__.py +1 -0
  80. isa_model/inference/__init__.py +47 -1
  81. isa_model/inference/ai_factory.py +179 -16
  82. isa_model/inference/legacy_services/__init__.py +21 -0
  83. isa_model/inference/legacy_services/model_evaluation.py +637 -0
  84. isa_model/inference/legacy_services/model_service.py +573 -0
  85. isa_model/inference/legacy_services/model_serving.py +717 -0
  86. isa_model/inference/legacy_services/model_training.py +561 -0
  87. isa_model/inference/models/__init__.py +21 -0
  88. isa_model/inference/models/inference_config.py +551 -0
  89. isa_model/inference/models/inference_record.py +675 -0
  90. isa_model/inference/models/performance_models.py +714 -0
  91. isa_model/inference/repositories/__init__.py +9 -0
  92. isa_model/inference/repositories/inference_repository.py +828 -0
  93. isa_model/inference/services/audio/__init__.py +21 -0
  94. isa_model/inference/services/audio/base_realtime_service.py +225 -0
  95. isa_model/inference/services/audio/base_stt_service.py +184 -11
  96. isa_model/inference/services/audio/isa_tts_service.py +0 -0
  97. isa_model/inference/services/audio/openai_realtime_service.py +320 -124
  98. isa_model/inference/services/audio/openai_stt_service.py +53 -11
  99. isa_model/inference/services/base_service.py +17 -1
  100. isa_model/inference/services/custom_model_manager.py +277 -0
  101. isa_model/inference/services/embedding/__init__.py +13 -0
  102. isa_model/inference/services/embedding/base_embed_service.py +111 -8
  103. isa_model/inference/services/embedding/isa_embed_service.py +305 -0
  104. isa_model/inference/services/embedding/ollama_embed_service.py +15 -3
  105. isa_model/inference/services/embedding/openai_embed_service.py +2 -4
  106. isa_model/inference/services/embedding/resilient_embed_service.py +285 -0
  107. isa_model/inference/services/embedding/tests/test_embedding.py +222 -0
  108. isa_model/inference/services/img/__init__.py +2 -2
  109. isa_model/inference/services/img/base_image_gen_service.py +24 -7
  110. isa_model/inference/services/img/replicate_image_gen_service.py +84 -422
  111. isa_model/inference/services/img/services/replicate_face_swap.py +193 -0
  112. isa_model/inference/services/img/services/replicate_flux.py +226 -0
  113. isa_model/inference/services/img/services/replicate_flux_kontext.py +219 -0
  114. isa_model/inference/services/img/services/replicate_sticker_maker.py +249 -0
  115. isa_model/inference/services/img/tests/test_img_client.py +297 -0
  116. isa_model/inference/services/llm/__init__.py +10 -2
  117. isa_model/inference/services/llm/base_llm_service.py +361 -26
  118. isa_model/inference/services/llm/cerebras_llm_service.py +628 -0
  119. isa_model/inference/services/llm/helpers/llm_adapter.py +71 -12
  120. isa_model/inference/services/llm/helpers/llm_prompts.py +342 -0
  121. isa_model/inference/services/llm/helpers/llm_utils.py +321 -23
  122. isa_model/inference/services/llm/huggingface_llm_service.py +581 -0
  123. isa_model/inference/services/llm/local_llm_service.py +747 -0
  124. isa_model/inference/services/llm/ollama_llm_service.py +11 -3
  125. isa_model/inference/services/llm/openai_llm_service.py +670 -56
  126. isa_model/inference/services/llm/yyds_llm_service.py +10 -3
  127. isa_model/inference/services/vision/__init__.py +27 -6
  128. isa_model/inference/services/vision/base_vision_service.py +118 -185
  129. isa_model/inference/services/vision/blip_vision_service.py +359 -0
  130. isa_model/inference/services/vision/helpers/image_utils.py +19 -10
  131. isa_model/inference/services/vision/isa_vision_service.py +634 -0
  132. isa_model/inference/services/vision/openai_vision_service.py +19 -10
  133. isa_model/inference/services/vision/tests/test_ocr_client.py +284 -0
  134. isa_model/inference/services/vision/vgg16_vision_service.py +257 -0
  135. isa_model/serving/api/cache_manager.py +245 -0
  136. isa_model/serving/api/dependencies/__init__.py +1 -0
  137. isa_model/serving/api/dependencies/auth.py +194 -0
  138. isa_model/serving/api/dependencies/database.py +139 -0
  139. isa_model/serving/api/error_handlers.py +284 -0
  140. isa_model/serving/api/fastapi_server.py +240 -18
  141. isa_model/serving/api/middleware/auth.py +317 -0
  142. isa_model/serving/api/middleware/security.py +268 -0
  143. isa_model/serving/api/middleware/tenant_context.py +414 -0
  144. isa_model/serving/api/routes/analytics.py +489 -0
  145. isa_model/serving/api/routes/config.py +645 -0
  146. isa_model/serving/api/routes/deployment_billing.py +315 -0
  147. isa_model/serving/api/routes/deployments.py +475 -0
  148. isa_model/serving/api/routes/gpu_gateway.py +440 -0
  149. isa_model/serving/api/routes/health.py +32 -12
  150. isa_model/serving/api/routes/inference_monitoring.py +486 -0
  151. isa_model/serving/api/routes/local_deployments.py +448 -0
  152. isa_model/serving/api/routes/logs.py +430 -0
  153. isa_model/serving/api/routes/settings.py +582 -0
  154. isa_model/serving/api/routes/tenants.py +575 -0
  155. isa_model/serving/api/routes/unified.py +992 -171
  156. isa_model/serving/api/routes/webhooks.py +479 -0
  157. isa_model/serving/api/startup.py +318 -0
  158. isa_model/serving/modal_proxy_server.py +249 -0
  159. isa_model/utils/gpu_utils.py +311 -0
  160. {isa_model-0.3.91.dist-info → isa_model-0.4.3.dist-info}/METADATA +76 -22
  161. isa_model-0.4.3.dist-info/RECORD +193 -0
  162. isa_model/deployment/cloud/__init__.py +0 -9
  163. isa_model/deployment/cloud/modal/__init__.py +0 -10
  164. isa_model/deployment/cloud/modal/isa_vision_doc_service.py +0 -766
  165. isa_model/deployment/cloud/modal/isa_vision_table_service.py +0 -532
  166. isa_model/deployment/cloud/modal/isa_vision_ui_service.py +0 -406
  167. isa_model/deployment/cloud/modal/register_models.py +0 -321
  168. isa_model/deployment/core/deployment_config.py +0 -356
  169. isa_model/deployment/core/isa_deployment_service.py +0 -401
  170. isa_model/deployment/gpu_int8_ds8/app/server.py +0 -66
  171. isa_model/deployment/gpu_int8_ds8/scripts/test_client.py +0 -43
  172. isa_model/deployment/gpu_int8_ds8/scripts/test_client_os.py +0 -35
  173. isa_model/deployment/runtime/deployed_service.py +0 -338
  174. isa_model/deployment/services/__init__.py +0 -9
  175. isa_model/deployment/services/auto_deploy_vision_service.py +0 -538
  176. isa_model/deployment/services/model_service.py +0 -332
  177. isa_model/deployment/services/service_monitor.py +0 -356
  178. isa_model/deployment/services/service_registry.py +0 -527
  179. isa_model/eval/__init__.py +0 -92
  180. isa_model/eval/benchmarks.py +0 -469
  181. isa_model/eval/config/__init__.py +0 -10
  182. isa_model/eval/config/evaluation_config.py +0 -108
  183. isa_model/eval/evaluators/__init__.py +0 -18
  184. isa_model/eval/evaluators/base_evaluator.py +0 -503
  185. isa_model/eval/evaluators/llm_evaluator.py +0 -472
  186. isa_model/eval/factory.py +0 -531
  187. isa_model/eval/infrastructure/__init__.py +0 -24
  188. isa_model/eval/infrastructure/experiment_tracker.py +0 -466
  189. isa_model/eval/metrics.py +0 -798
  190. isa_model/inference/adapter/unified_api.py +0 -248
  191. isa_model/inference/services/helpers/stacked_config.py +0 -148
  192. isa_model/inference/services/img/flux_professional_service.py +0 -603
  193. isa_model/inference/services/img/helpers/base_stacked_service.py +0 -274
  194. isa_model/inference/services/others/table_transformer_service.py +0 -61
  195. isa_model/inference/services/vision/doc_analysis_service.py +0 -640
  196. isa_model/inference/services/vision/helpers/base_stacked_service.py +0 -274
  197. isa_model/inference/services/vision/ui_analysis_service.py +0 -823
  198. isa_model/scripts/inference_tracker.py +0 -283
  199. isa_model/scripts/mlflow_manager.py +0 -379
  200. isa_model/scripts/model_registry.py +0 -465
  201. isa_model/scripts/register_models.py +0 -370
  202. isa_model/scripts/register_models_with_embeddings.py +0 -510
  203. isa_model/scripts/start_mlflow.py +0 -95
  204. isa_model/scripts/training_tracker.py +0 -257
  205. isa_model/training/__init__.py +0 -74
  206. isa_model/training/annotation/annotation_schema.py +0 -47
  207. isa_model/training/annotation/processors/annotation_processor.py +0 -126
  208. isa_model/training/annotation/storage/dataset_manager.py +0 -131
  209. isa_model/training/annotation/storage/dataset_schema.py +0 -44
  210. isa_model/training/annotation/tests/test_annotation_flow.py +0 -109
  211. isa_model/training/annotation/tests/test_minio copy.py +0 -113
  212. isa_model/training/annotation/tests/test_minio_upload.py +0 -43
  213. isa_model/training/annotation/views/annotation_controller.py +0 -158
  214. isa_model/training/cloud/__init__.py +0 -22
  215. isa_model/training/cloud/job_orchestrator.py +0 -402
  216. isa_model/training/cloud/runpod_trainer.py +0 -454
  217. isa_model/training/cloud/storage_manager.py +0 -482
  218. isa_model/training/core/__init__.py +0 -23
  219. isa_model/training/core/config.py +0 -181
  220. isa_model/training/core/dataset.py +0 -222
  221. isa_model/training/core/trainer.py +0 -720
  222. isa_model/training/core/utils.py +0 -213
  223. isa_model/training/factory.py +0 -424
  224. isa_model-0.3.91.dist-info/RECORD +0 -138
  225. /isa_model/{core/storage/minio_storage.py → deployment/modal/services/audio/isa_audio_fish_service.py} +0 -0
  226. /isa_model/deployment/{services → modal/services/vision}/simple_auto_deploy_vision_service.py +0 -0
  227. {isa_model-0.3.91.dist-info → isa_model-0.4.3.dist-info}/WHEEL +0 -0
  228. {isa_model-0.3.91.dist-info → isa_model-0.4.3.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,410 @@
1
+ """
2
+ Redis-based Caching Strategy for ISA Model
3
+
4
+ Provides multi-level caching for:
5
+ - Model responses and completions
6
+ - Model metadata and configurations
7
+ - User sessions and authentication
8
+ - Rate limiting data
9
+ """
10
+
11
+ import json
12
+ import hashlib
13
+ import pickle
14
+ import asyncio
15
+ import time
16
+ import logging
17
+ import os
18
+ from typing import Any, Dict, Optional, Union, List, Callable
19
+ from dataclasses import dataclass
20
+ import redis.asyncio as redis
21
+ import structlog
22
+ from functools import wraps
23
+
24
+ from ..config.config_manager import ConfigManager
25
+
26
+ logger = structlog.get_logger(__name__)
27
+
28
+ @dataclass
29
+ class CacheConfig:
30
+ """Configuration for Redis cache"""
31
+ redis_url: str = None
32
+
33
+ def __post_init__(self):
34
+ if self.redis_url is None:
35
+ config_manager = ConfigManager()
36
+ # Use Consul discovery for Redis URL with fallback
37
+ self.redis_url = config_manager.get_redis_url()
38
+ default_ttl: int = 3600 # 1 hour
39
+ model_cache_ttl: int = 3600 # 1 hour for model responses
40
+ config_cache_ttl: int = 7200 # 2 hours for configurations
41
+ session_cache_ttl: int = 86400 # 24 hours for sessions
42
+ rate_limit_ttl: int = 3600 # 1 hour for rate limiting
43
+ max_key_length: int = 250
44
+ compression_enabled: bool = True
45
+ serialization_method: str = "json" # "json" or "pickle"
46
+
47
+ class RedisCache:
48
+ """Redis-based cache with advanced features"""
49
+
50
+ def __init__(self, config: CacheConfig):
51
+ self.config = config
52
+ self.redis_client = None
53
+ self._connected = False
54
+ self._stats = {
55
+ "hits": 0,
56
+ "misses": 0,
57
+ "sets": 0,
58
+ "deletes": 0,
59
+ "errors": 0
60
+ }
61
+
62
+ async def connect(self):
63
+ """Connect to Redis"""
64
+ try:
65
+ self.redis_client = redis.from_url(
66
+ self.config.redis_url,
67
+ decode_responses=False, # Handle binary data
68
+ retry_on_timeout=True,
69
+ health_check_interval=30
70
+ )
71
+
72
+ # Test connection
73
+ await self.redis_client.ping()
74
+ self._connected = True
75
+
76
+ logger.info("Redis cache connected", url=self.config.redis_url)
77
+
78
+ except Exception as e:
79
+ logger.error("Failed to connect to Redis", error=str(e))
80
+ self._connected = False
81
+ raise
82
+
83
+ async def disconnect(self):
84
+ """Disconnect from Redis"""
85
+ if self.redis_client:
86
+ await self.redis_client.close()
87
+ self._connected = False
88
+ logger.info("Redis cache disconnected")
89
+
90
+ def _generate_key(self, namespace: str, key: str) -> str:
91
+ """Generate a cache key with namespace"""
92
+ full_key = f"isa_model:{namespace}:{key}"
93
+
94
+ # Hash long keys to avoid Redis key length limits
95
+ if len(full_key) > self.config.max_key_length:
96
+ hash_suffix = hashlib.md5(full_key.encode()).hexdigest()[:8]
97
+ full_key = f"isa_model:{namespace}:hash_{hash_suffix}"
98
+
99
+ return full_key
100
+
101
+ def _serialize_value(self, value: Any) -> bytes:
102
+ """Serialize value for storage"""
103
+ try:
104
+ if self.config.serialization_method == "pickle":
105
+ return pickle.dumps(value)
106
+ else:
107
+ # JSON serialization
108
+ json_str = json.dumps(value, default=str, ensure_ascii=False)
109
+ return json_str.encode('utf-8')
110
+ except Exception as e:
111
+ logger.error("Serialization failed", error=str(e))
112
+ raise
113
+
114
+ def _deserialize_value(self, data: bytes) -> Any:
115
+ """Deserialize value from storage"""
116
+ try:
117
+ if self.config.serialization_method == "pickle":
118
+ return pickle.loads(data)
119
+ else:
120
+ # JSON deserialization
121
+ json_str = data.decode('utf-8')
122
+ return json.loads(json_str)
123
+ except Exception as e:
124
+ logger.error("Deserialization failed", error=str(e))
125
+ raise
126
+
127
+ async def get(self, namespace: str, key: str) -> Optional[Any]:
128
+ """Get value from cache"""
129
+ if not self._connected:
130
+ return None
131
+
132
+ try:
133
+ cache_key = self._generate_key(namespace, key)
134
+ data = await self.redis_client.get(cache_key)
135
+
136
+ if data is None:
137
+ self._stats["misses"] += 1
138
+ return None
139
+
140
+ value = self._deserialize_value(data)
141
+ self._stats["hits"] += 1
142
+
143
+ logger.debug("Cache hit", namespace=namespace, key=key)
144
+ return value
145
+
146
+ except Exception as e:
147
+ self._stats["errors"] += 1
148
+ logger.error("Cache get failed", namespace=namespace, key=key, error=str(e))
149
+ return None
150
+
151
+ async def set(
152
+ self,
153
+ namespace: str,
154
+ key: str,
155
+ value: Any,
156
+ ttl: Optional[int] = None
157
+ ) -> bool:
158
+ """Set value in cache"""
159
+ if not self._connected:
160
+ return False
161
+
162
+ try:
163
+ cache_key = self._generate_key(namespace, key)
164
+ serialized_value = self._serialize_value(value)
165
+
166
+ # Use namespace-specific TTL if not provided
167
+ if ttl is None:
168
+ ttl = self._get_namespace_ttl(namespace)
169
+
170
+ await self.redis_client.setex(cache_key, ttl, serialized_value)
171
+ self._stats["sets"] += 1
172
+
173
+ logger.debug("Cache set", namespace=namespace, key=key, ttl=ttl)
174
+ return True
175
+
176
+ except Exception as e:
177
+ self._stats["errors"] += 1
178
+ logger.error("Cache set failed", namespace=namespace, key=key, error=str(e))
179
+ return False
180
+
181
+ async def delete(self, namespace: str, key: str) -> bool:
182
+ """Delete value from cache"""
183
+ if not self._connected:
184
+ return False
185
+
186
+ try:
187
+ cache_key = self._generate_key(namespace, key)
188
+ result = await self.redis_client.delete(cache_key)
189
+ self._stats["deletes"] += 1
190
+
191
+ logger.debug("Cache delete", namespace=namespace, key=key, existed=bool(result))
192
+ return bool(result)
193
+
194
+ except Exception as e:
195
+ self._stats["errors"] += 1
196
+ logger.error("Cache delete failed", namespace=namespace, key=key, error=str(e))
197
+ return False
198
+
199
+ async def exists(self, namespace: str, key: str) -> bool:
200
+ """Check if key exists in cache"""
201
+ if not self._connected:
202
+ return False
203
+
204
+ try:
205
+ cache_key = self._generate_key(namespace, key)
206
+ return bool(await self.redis_client.exists(cache_key))
207
+ except Exception as e:
208
+ logger.error("Cache exists check failed", error=str(e))
209
+ return False
210
+
211
+ async def increment(self, namespace: str, key: str, amount: int = 1, ttl: Optional[int] = None) -> Optional[int]:
212
+ """Increment a counter in cache"""
213
+ if not self._connected:
214
+ return None
215
+
216
+ try:
217
+ cache_key = self._generate_key(namespace, key)
218
+
219
+ # Use pipeline for atomic operations
220
+ pipe = self.redis_client.pipeline()
221
+ pipe.incrby(cache_key, amount)
222
+
223
+ if ttl:
224
+ pipe.expire(cache_key, ttl)
225
+
226
+ results = await pipe.execute()
227
+ return results[0]
228
+
229
+ except Exception as e:
230
+ logger.error("Cache increment failed", error=str(e))
231
+ return None
232
+
233
+ async def clear_namespace(self, namespace: str) -> int:
234
+ """Clear all keys in a namespace"""
235
+ if not self._connected:
236
+ return 0
237
+
238
+ try:
239
+ pattern = f"isa_model:{namespace}:*"
240
+ keys = []
241
+
242
+ # Use SCAN to avoid blocking Redis
243
+ async for key in self.redis_client.scan_iter(pattern):
244
+ keys.append(key)
245
+
246
+ if keys:
247
+ deleted = await self.redis_client.delete(*keys)
248
+ logger.info("Namespace cleared", namespace=namespace, deleted_keys=deleted)
249
+ return deleted
250
+
251
+ return 0
252
+
253
+ except Exception as e:
254
+ logger.error("Cache namespace clear failed", namespace=namespace, error=str(e))
255
+ return 0
256
+
257
+ def _get_namespace_ttl(self, namespace: str) -> int:
258
+ """Get TTL for a specific namespace"""
259
+ ttl_mapping = {
260
+ "models": self.config.model_cache_ttl,
261
+ "config": self.config.config_cache_ttl,
262
+ "sessions": self.config.session_cache_ttl,
263
+ "rate_limit": self.config.rate_limit_ttl,
264
+ "responses": self.config.model_cache_ttl,
265
+ }
266
+ return ttl_mapping.get(namespace, self.config.default_ttl)
267
+
268
+ async def get_stats(self) -> Dict[str, Any]:
269
+ """Get cache statistics"""
270
+ stats = dict(self._stats)
271
+
272
+ # Calculate hit rate
273
+ total_requests = stats["hits"] + stats["misses"]
274
+ stats["hit_rate"] = stats["hits"] / total_requests if total_requests > 0 else 0
275
+ stats["connected"] = self._connected
276
+
277
+ # Redis info if connected
278
+ if self._connected:
279
+ try:
280
+ redis_info = await self.redis_client.info()
281
+ stats["redis_info"] = {
282
+ "used_memory": redis_info.get("used_memory"),
283
+ "connected_clients": redis_info.get("connected_clients"),
284
+ "total_commands_processed": redis_info.get("total_commands_processed"),
285
+ "keyspace_hits": redis_info.get("keyspace_hits"),
286
+ "keyspace_misses": redis_info.get("keyspace_misses")
287
+ }
288
+ except Exception as e:
289
+ logger.error("Failed to get Redis info", error=str(e))
290
+
291
+ return stats
292
+
293
+ # Global cache instance
294
+ _cache: Optional[RedisCache] = None
295
+
296
+ async def get_cache() -> RedisCache:
297
+ """Get the global cache instance"""
298
+ global _cache
299
+
300
+ if _cache is None:
301
+ config_manager = ConfigManager()
302
+ config = CacheConfig(
303
+ redis_url=os.getenv("REDIS_URL", config_manager.get_redis_url()),
304
+ default_ttl=int(os.getenv("CACHE_DEFAULT_TTL", "3600")),
305
+ model_cache_ttl=int(os.getenv("MODEL_CACHE_TTL", "3600")),
306
+ compression_enabled=os.getenv("CACHE_COMPRESSION", "true").lower() == "true"
307
+ )
308
+ _cache = RedisCache(config)
309
+ await _cache.connect()
310
+
311
+ return _cache
312
+
313
+ # Caching decorators
314
+ def cached_response(namespace: str = "responses", ttl: Optional[int] = None):
315
+ """Decorator for caching function responses"""
316
+
317
+ def decorator(func: Callable):
318
+ @wraps(func)
319
+ async def wrapper(*args, **kwargs):
320
+ # Generate cache key from function name and arguments
321
+ cache_key = f"{func.__name__}:{hashlib.md5(str(args).encode() + str(kwargs).encode()).hexdigest()}"
322
+
323
+ cache = await get_cache()
324
+
325
+ # Try to get from cache first
326
+ cached_result = await cache.get(namespace, cache_key)
327
+ if cached_result is not None:
328
+ logger.debug("Function result served from cache", function=func.__name__)
329
+ return cached_result
330
+
331
+ # Execute function and cache result
332
+ if asyncio.iscoroutinefunction(func):
333
+ result = await func(*args, **kwargs)
334
+ else:
335
+ result = func(*args, **kwargs)
336
+
337
+ # Cache the result
338
+ await cache.set(namespace, cache_key, result, ttl)
339
+ logger.debug("Function result cached", function=func.__name__)
340
+
341
+ return result
342
+
343
+ return wrapper
344
+ return decorator
345
+
346
+ def cached_model_response(ttl: Optional[int] = None):
347
+ """Decorator specifically for model responses"""
348
+ return cached_response(namespace="models", ttl=ttl)
349
+
350
+ # Specialized cache functions
351
+ async def cache_model_response(
352
+ model_id: str,
353
+ input_hash: str,
354
+ response: Any,
355
+ ttl: Optional[int] = None
356
+ ):
357
+ """Cache a model response"""
358
+ cache = await get_cache()
359
+ cache_key = f"{model_id}:{input_hash}"
360
+ await cache.set("models", cache_key, response, ttl)
361
+
362
+ async def get_cached_model_response(
363
+ model_id: str,
364
+ input_hash: str
365
+ ) -> Optional[Any]:
366
+ """Get cached model response"""
367
+ cache = await get_cache()
368
+ cache_key = f"{model_id}:{input_hash}"
369
+ return await cache.get("models", cache_key)
370
+
371
+ async def cache_user_session(user_id: str, session_data: Dict[str, Any]):
372
+ """Cache user session data"""
373
+ cache = await get_cache()
374
+ await cache.set("sessions", user_id, session_data)
375
+
376
+ async def get_user_session(user_id: str) -> Optional[Dict[str, Any]]:
377
+ """Get user session data"""
378
+ cache = await get_cache()
379
+ return await cache.get("sessions", user_id)
380
+
381
+ # Rate limiting cache functions
382
+ async def increment_rate_limit(key: str, window_seconds: int = 3600) -> int:
383
+ """Increment rate limit counter"""
384
+ cache = await get_cache()
385
+ return await cache.increment("rate_limit", key, amount=1, ttl=window_seconds) or 0
386
+
387
+ async def get_rate_limit_count(key: str) -> int:
388
+ """Get current rate limit count"""
389
+ cache = await get_cache()
390
+ count = await cache.get("rate_limit", key)
391
+ return count or 0
392
+
393
+ # Health check
394
+ async def check_cache_health() -> Dict[str, Any]:
395
+ """Check cache health"""
396
+ try:
397
+ cache = await get_cache()
398
+ stats = await cache.get_stats()
399
+
400
+ return {
401
+ "cache": "redis",
402
+ "status": "healthy" if stats["connected"] else "disconnected",
403
+ "stats": stats
404
+ }
405
+ except Exception as e:
406
+ return {
407
+ "cache": "redis",
408
+ "status": "error",
409
+ "error": str(e)
410
+ }