isa-model 0.4.0__py3-none-any.whl → 0.4.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (199) hide show
  1. isa_model/client.py +466 -43
  2. isa_model/core/cache/redis_cache.py +12 -3
  3. isa_model/core/config/config_manager.py +230 -3
  4. isa_model/core/config.py +90 -0
  5. isa_model/core/database/direct_db_client.py +114 -0
  6. isa_model/core/database/migration_manager.py +563 -0
  7. isa_model/core/database/migrations.py +21 -1
  8. isa_model/core/database/supabase_client.py +154 -19
  9. isa_model/core/dependencies.py +316 -0
  10. isa_model/core/discovery/__init__.py +19 -0
  11. isa_model/core/discovery/consul_discovery.py +190 -0
  12. isa_model/core/logging/__init__.py +54 -0
  13. isa_model/core/logging/influx_logger.py +523 -0
  14. isa_model/core/logging/loki_logger.py +160 -0
  15. isa_model/core/models/__init__.py +27 -18
  16. isa_model/core/models/config_models.py +625 -0
  17. isa_model/core/models/deployment_billing_tracker.py +430 -0
  18. isa_model/core/models/model_manager.py +40 -17
  19. isa_model/core/models/model_metadata.py +690 -0
  20. isa_model/core/models/model_repo.py +174 -18
  21. isa_model/core/models/system_models.py +857 -0
  22. isa_model/core/repositories/__init__.py +9 -0
  23. isa_model/core/repositories/config_repository.py +912 -0
  24. isa_model/core/services/intelligent_model_selector.py +399 -21
  25. isa_model/core/storage/hf_storage.py +1 -1
  26. isa_model/core/types.py +1 -0
  27. isa_model/deployment/__init__.py +5 -48
  28. isa_model/deployment/core/__init__.py +2 -31
  29. isa_model/deployment/core/deployment_manager.py +1278 -370
  30. isa_model/deployment/local/__init__.py +31 -0
  31. isa_model/deployment/local/config.py +248 -0
  32. isa_model/deployment/local/gpu_gateway.py +607 -0
  33. isa_model/deployment/local/health_checker.py +428 -0
  34. isa_model/deployment/local/provider.py +586 -0
  35. isa_model/deployment/local/tensorrt_service.py +621 -0
  36. isa_model/deployment/local/transformers_service.py +644 -0
  37. isa_model/deployment/local/vllm_service.py +527 -0
  38. isa_model/deployment/modal/__init__.py +8 -0
  39. isa_model/deployment/modal/config.py +136 -0
  40. isa_model/deployment/{services/auto_hf_modal_deployer.py → modal/deployer.py} +1 -1
  41. isa_model/deployment/modal/services/__init__.py +3 -0
  42. isa_model/deployment/modal/services/audio/__init__.py +1 -0
  43. isa_model/deployment/modal/services/embedding/__init__.py +1 -0
  44. isa_model/deployment/modal/services/llm/__init__.py +1 -0
  45. isa_model/deployment/modal/services/llm/isa_llm_service.py +424 -0
  46. isa_model/deployment/modal/services/video/__init__.py +1 -0
  47. isa_model/deployment/modal/services/vision/__init__.py +1 -0
  48. isa_model/deployment/models/org-org-acme-corp-tenant-a-service-llm-20250825-225822/tenant-a-service_modal_service.py +48 -0
  49. isa_model/deployment/models/org-test-org-123-prefix-test-service-llm-20250825-225822/prefix-test-service_modal_service.py +48 -0
  50. isa_model/deployment/models/test-llm-service-llm-20250825-204442/test-llm-service_modal_service.py +48 -0
  51. isa_model/deployment/models/test-monitoring-gpt2-llm-20250825-212906/test-monitoring-gpt2_modal_service.py +48 -0
  52. isa_model/deployment/models/test-monitoring-gpt2-llm-20250825-213009/test-monitoring-gpt2_modal_service.py +48 -0
  53. isa_model/deployment/storage/__init__.py +5 -0
  54. isa_model/deployment/storage/deployment_repository.py +824 -0
  55. isa_model/deployment/triton/__init__.py +10 -0
  56. isa_model/deployment/triton/config.py +196 -0
  57. isa_model/deployment/triton/configs/__init__.py +1 -0
  58. isa_model/deployment/triton/provider.py +512 -0
  59. isa_model/deployment/triton/scripts/__init__.py +1 -0
  60. isa_model/deployment/triton/templates/__init__.py +1 -0
  61. isa_model/inference/__init__.py +47 -1
  62. isa_model/inference/ai_factory.py +137 -10
  63. isa_model/inference/legacy_services/__init__.py +21 -0
  64. isa_model/inference/legacy_services/model_evaluation.py +637 -0
  65. isa_model/inference/legacy_services/model_service.py +573 -0
  66. isa_model/inference/legacy_services/model_serving.py +717 -0
  67. isa_model/inference/legacy_services/model_training.py +561 -0
  68. isa_model/inference/models/__init__.py +21 -0
  69. isa_model/inference/models/inference_config.py +551 -0
  70. isa_model/inference/models/inference_record.py +675 -0
  71. isa_model/inference/models/performance_models.py +714 -0
  72. isa_model/inference/repositories/__init__.py +9 -0
  73. isa_model/inference/repositories/inference_repository.py +828 -0
  74. isa_model/inference/services/audio/base_stt_service.py +184 -11
  75. isa_model/inference/services/audio/openai_stt_service.py +22 -6
  76. isa_model/inference/services/custom_model_manager.py +277 -0
  77. isa_model/inference/services/embedding/ollama_embed_service.py +15 -3
  78. isa_model/inference/services/embedding/resilient_embed_service.py +285 -0
  79. isa_model/inference/services/llm/__init__.py +10 -2
  80. isa_model/inference/services/llm/base_llm_service.py +335 -24
  81. isa_model/inference/services/llm/cerebras_llm_service.py +628 -0
  82. isa_model/inference/services/llm/helpers/llm_adapter.py +9 -4
  83. isa_model/inference/services/llm/helpers/llm_prompts.py +342 -0
  84. isa_model/inference/services/llm/helpers/llm_utils.py +321 -23
  85. isa_model/inference/services/llm/huggingface_llm_service.py +581 -0
  86. isa_model/inference/services/llm/local_llm_service.py +747 -0
  87. isa_model/inference/services/llm/ollama_llm_service.py +9 -2
  88. isa_model/inference/services/llm/openai_llm_service.py +33 -16
  89. isa_model/inference/services/llm/yyds_llm_service.py +8 -2
  90. isa_model/inference/services/vision/__init__.py +22 -1
  91. isa_model/inference/services/vision/blip_vision_service.py +359 -0
  92. isa_model/inference/services/vision/helpers/image_utils.py +8 -5
  93. isa_model/inference/services/vision/isa_vision_service.py +65 -4
  94. isa_model/inference/services/vision/openai_vision_service.py +19 -10
  95. isa_model/inference/services/vision/vgg16_vision_service.py +257 -0
  96. isa_model/serving/api/cache_manager.py +245 -0
  97. isa_model/serving/api/dependencies/__init__.py +1 -0
  98. isa_model/serving/api/dependencies/auth.py +194 -0
  99. isa_model/serving/api/dependencies/database.py +139 -0
  100. isa_model/serving/api/error_handlers.py +284 -0
  101. isa_model/serving/api/fastapi_server.py +172 -22
  102. isa_model/serving/api/middleware/auth.py +8 -2
  103. isa_model/serving/api/middleware/security.py +23 -33
  104. isa_model/serving/api/middleware/tenant_context.py +414 -0
  105. isa_model/serving/api/routes/analytics.py +4 -1
  106. isa_model/serving/api/routes/config.py +645 -0
  107. isa_model/serving/api/routes/deployment_billing.py +315 -0
  108. isa_model/serving/api/routes/deployments.py +138 -2
  109. isa_model/serving/api/routes/gpu_gateway.py +440 -0
  110. isa_model/serving/api/routes/health.py +32 -12
  111. isa_model/serving/api/routes/inference_monitoring.py +486 -0
  112. isa_model/serving/api/routes/local_deployments.py +448 -0
  113. isa_model/serving/api/routes/tenants.py +575 -0
  114. isa_model/serving/api/routes/unified.py +680 -18
  115. isa_model/serving/api/routes/webhooks.py +479 -0
  116. isa_model/serving/api/startup.py +68 -54
  117. isa_model/utils/gpu_utils.py +311 -0
  118. {isa_model-0.4.0.dist-info → isa_model-0.4.3.dist-info}/METADATA +66 -24
  119. isa_model-0.4.3.dist-info/RECORD +193 -0
  120. isa_model/core/storage/minio_storage.py +0 -0
  121. isa_model/deployment/cloud/__init__.py +0 -9
  122. isa_model/deployment/cloud/modal/__init__.py +0 -10
  123. isa_model/deployment/core/deployment_config.py +0 -356
  124. isa_model/deployment/core/isa_deployment_service.py +0 -401
  125. isa_model/deployment/gpu_int8_ds8/app/server.py +0 -66
  126. isa_model/deployment/gpu_int8_ds8/scripts/test_client.py +0 -43
  127. isa_model/deployment/gpu_int8_ds8/scripts/test_client_os.py +0 -35
  128. isa_model/deployment/runtime/deployed_service.py +0 -338
  129. isa_model/deployment/services/__init__.py +0 -9
  130. isa_model/deployment/services/auto_deploy_vision_service.py +0 -538
  131. isa_model/deployment/services/model_service.py +0 -332
  132. isa_model/deployment/services/service_monitor.py +0 -356
  133. isa_model/deployment/services/service_registry.py +0 -527
  134. isa_model/eval/__init__.py +0 -92
  135. isa_model/eval/benchmarks/__init__.py +0 -27
  136. isa_model/eval/benchmarks/multimodal_datasets.py +0 -460
  137. isa_model/eval/benchmarks.py +0 -701
  138. isa_model/eval/config/__init__.py +0 -10
  139. isa_model/eval/config/evaluation_config.py +0 -108
  140. isa_model/eval/evaluators/__init__.py +0 -24
  141. isa_model/eval/evaluators/audio_evaluator.py +0 -727
  142. isa_model/eval/evaluators/base_evaluator.py +0 -503
  143. isa_model/eval/evaluators/embedding_evaluator.py +0 -742
  144. isa_model/eval/evaluators/llm_evaluator.py +0 -472
  145. isa_model/eval/evaluators/vision_evaluator.py +0 -564
  146. isa_model/eval/example_evaluation.py +0 -395
  147. isa_model/eval/factory.py +0 -798
  148. isa_model/eval/infrastructure/__init__.py +0 -24
  149. isa_model/eval/infrastructure/experiment_tracker.py +0 -466
  150. isa_model/eval/isa_benchmarks.py +0 -700
  151. isa_model/eval/isa_integration.py +0 -582
  152. isa_model/eval/metrics.py +0 -951
  153. isa_model/eval/tests/unit/test_basic.py +0 -396
  154. isa_model/serving/api/routes/evaluations.py +0 -579
  155. isa_model/training/__init__.py +0 -168
  156. isa_model/training/annotation/annotation_schema.py +0 -47
  157. isa_model/training/annotation/processors/annotation_processor.py +0 -126
  158. isa_model/training/annotation/storage/dataset_manager.py +0 -131
  159. isa_model/training/annotation/storage/dataset_schema.py +0 -44
  160. isa_model/training/annotation/tests/test_annotation_flow.py +0 -109
  161. isa_model/training/annotation/tests/test_minio copy.py +0 -113
  162. isa_model/training/annotation/tests/test_minio_upload.py +0 -43
  163. isa_model/training/annotation/views/annotation_controller.py +0 -158
  164. isa_model/training/cloud/__init__.py +0 -22
  165. isa_model/training/cloud/job_orchestrator.py +0 -402
  166. isa_model/training/cloud/runpod_trainer.py +0 -454
  167. isa_model/training/cloud/storage_manager.py +0 -482
  168. isa_model/training/core/__init__.py +0 -26
  169. isa_model/training/core/config.py +0 -181
  170. isa_model/training/core/dataset.py +0 -222
  171. isa_model/training/core/trainer.py +0 -720
  172. isa_model/training/core/utils.py +0 -213
  173. isa_model/training/examples/intelligent_training_example.py +0 -281
  174. isa_model/training/factory.py +0 -424
  175. isa_model/training/intelligent/__init__.py +0 -25
  176. isa_model/training/intelligent/decision_engine.py +0 -643
  177. isa_model/training/intelligent/intelligent_factory.py +0 -888
  178. isa_model/training/intelligent/knowledge_base.py +0 -751
  179. isa_model/training/intelligent/resource_optimizer.py +0 -839
  180. isa_model/training/intelligent/task_classifier.py +0 -576
  181. isa_model/training/storage/__init__.py +0 -24
  182. isa_model/training/storage/core_integration.py +0 -439
  183. isa_model/training/storage/training_repository.py +0 -552
  184. isa_model/training/storage/training_storage.py +0 -628
  185. isa_model-0.4.0.dist-info/RECORD +0 -182
  186. /isa_model/deployment/{cloud/modal → modal/services/audio}/isa_audio_chatTTS_service.py +0 -0
  187. /isa_model/deployment/{cloud/modal → modal/services/audio}/isa_audio_fish_service.py +0 -0
  188. /isa_model/deployment/{cloud/modal → modal/services/audio}/isa_audio_openvoice_service.py +0 -0
  189. /isa_model/deployment/{cloud/modal → modal/services/audio}/isa_audio_service_v2.py +0 -0
  190. /isa_model/deployment/{cloud/modal → modal/services/embedding}/isa_embed_rerank_service.py +0 -0
  191. /isa_model/deployment/{cloud/modal → modal/services/video}/isa_video_hunyuan_service.py +0 -0
  192. /isa_model/deployment/{cloud/modal → modal/services/vision}/isa_vision_ocr_service.py +0 -0
  193. /isa_model/deployment/{cloud/modal → modal/services/vision}/isa_vision_qwen25_service.py +0 -0
  194. /isa_model/deployment/{cloud/modal → modal/services/vision}/isa_vision_table_service.py +0 -0
  195. /isa_model/deployment/{cloud/modal → modal/services/vision}/isa_vision_ui_service.py +0 -0
  196. /isa_model/deployment/{cloud/modal → modal/services/vision}/isa_vision_ui_service_optimized.py +0 -0
  197. /isa_model/deployment/{services → modal/services/vision}/simple_auto_deploy_vision_service.py +0 -0
  198. {isa_model-0.4.0.dist-info → isa_model-0.4.3.dist-info}/WHEEL +0 -0
  199. {isa_model-0.4.0.dist-info → isa_model-0.4.3.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,523 @@
1
+ """
2
+ InfluxDB-based Inference Logging System for ISA Model
3
+
4
+ This module provides comprehensive logging for model inference requests,
5
+ optimized for time-series analysis and monitoring.
6
+
7
+ Data Model:
8
+ - measurement: 'inference_requests' (main table)
9
+ - tags: indexed fields for fast queries (provider, model, service_type, etc.)
10
+ - fields: numerical and text data (tokens, costs, response_time, etc.)
11
+ - timestamp: automatic time-based partitioning
12
+
13
+ Key Features:
14
+ - Automatic data retention (30 days default)
15
+ - Cost-effective storage with compression
16
+ - Real-time monitoring capabilities
17
+ - Aggregated metrics generation
18
+ """
19
+
20
+ import os
21
+ import json
22
+ import hashlib
23
+ import logging
24
+ from datetime import datetime, timezone
25
+ from typing import Dict, Any, Optional, List, Union
26
+ from dataclasses import dataclass, field
27
+ from influxdb_client import InfluxDBClient, Point, WritePrecision
28
+ from influxdb_client.client.write_api import SYNCHRONOUS
29
+ import uuid
30
+
31
+ logger = logging.getLogger(__name__)
32
+
33
+ @dataclass
34
+ class InferenceLogEntry:
35
+ """
36
+ Structure for inference log data
37
+ """
38
+ # Required fields
39
+ request_id: str
40
+ service_type: str # 'text', 'vision', 'audio', 'image', 'embedding'
41
+ task: str # 'chat', 'analyze_image', 'generate_speech', etc.
42
+ provider: str # 'openai', 'replicate', 'anthropic', etc.
43
+ model_name: str # Actual model used
44
+ status: str # 'completed', 'failed', 'timeout'
45
+
46
+ # Timing data
47
+ start_time: datetime
48
+ end_time: Optional[datetime] = None
49
+ execution_time_ms: Optional[int] = None
50
+ queue_time_ms: Optional[int] = None
51
+
52
+ # Token and usage data
53
+ input_tokens: Optional[int] = None
54
+ output_tokens: Optional[int] = None
55
+ total_tokens: Optional[int] = None
56
+
57
+ # Cost data
58
+ estimated_cost_usd: Optional[float] = None
59
+ actual_cost_usd: Optional[float] = None
60
+ cost_breakdown: Optional[Dict[str, Any]] = None
61
+
62
+ # Request/response data (optional, for debugging)
63
+ input_data_hash: Optional[str] = None
64
+ input_size_bytes: Optional[int] = None
65
+ output_size_bytes: Optional[int] = None
66
+
67
+ # Streaming data
68
+ is_streaming: bool = False
69
+ stream_start_time: Optional[datetime] = None
70
+ stream_chunks_count: Optional[int] = None
71
+ time_to_first_token_ms: Optional[int] = None
72
+
73
+ # Error information
74
+ error_message: Optional[str] = None
75
+ error_code: Optional[str] = None
76
+
77
+ # Context and metadata
78
+ session_id: Optional[str] = None
79
+ user_id: Optional[str] = None
80
+ client_ip: Optional[str] = None
81
+ model_version: Optional[str] = None
82
+ cache_hit: bool = False
83
+
84
+ # Quality metrics
85
+ quality_score: Optional[float] = None
86
+ user_feedback: Optional[int] = None # 1-5 rating
87
+
88
+ # Additional metadata
89
+ custom_metadata: Dict[str, Any] = field(default_factory=dict)
90
+
91
+ class InfluxInferenceLogger:
92
+ """
93
+ InfluxDB-based logger for model inference activities
94
+
95
+ Features:
96
+ - Time-series storage optimized for metrics
97
+ - Automatic data retention and compression
98
+ - Real-time query capabilities
99
+ - Cost tracking and analysis
100
+ - Performance monitoring
101
+ """
102
+
103
+ def __init__(self):
104
+ """Initialize InfluxDB connection"""
105
+ self.enabled = os.getenv('ENABLE_INFERENCE_LOGGING', 'false').lower() == 'true'
106
+
107
+ # Logging configuration - always set these regardless of enabled status
108
+ self.retention_days = int(os.getenv('LOG_RETENTION_DAYS', '30'))
109
+ self.log_detailed_requests = os.getenv('LOG_DETAILED_REQUESTS', 'true').lower() == 'true'
110
+ self.log_sensitive_data = os.getenv('LOG_SENSITIVE_DATA', 'false').lower() == 'true'
111
+
112
+ if not self.enabled:
113
+ logger.info("Inference logging disabled via ENABLE_INFERENCE_LOGGING")
114
+ return
115
+
116
+ # InfluxDB configuration
117
+ from ..config.config_manager import ConfigManager
118
+ config_manager = ConfigManager()
119
+ # Use Consul discovery for InfluxDB URL with fallback
120
+ self.url = os.getenv('INFLUXDB_URL', config_manager.get_influxdb_url())
121
+ self.token = os.getenv('INFLUXDB_TOKEN', 'dev-token-isa-model-12345')
122
+ self.org = os.getenv('INFLUXDB_ORG', 'isa-model')
123
+ self.bucket = os.getenv('INFLUXDB_BUCKET', 'isa-model-logs')
124
+
125
+ try:
126
+ # Initialize InfluxDB client
127
+ self.client = InfluxDBClient(url=self.url, token=self.token, org=self.org)
128
+ self.write_api = self.client.write_api(write_options=SYNCHRONOUS)
129
+ self.query_api = self.client.query_api()
130
+
131
+ # Test connection
132
+ self._test_connection()
133
+ logger.info(f"InfluxDB inference logger initialized: {self.url}")
134
+
135
+ except Exception as e:
136
+ logger.error(f"Failed to initialize InfluxDB logger: {e}")
137
+ self.enabled = False
138
+
139
+ def _test_connection(self):
140
+ """Test InfluxDB connection"""
141
+ try:
142
+ health = self.client.health()
143
+ if health.status == "pass":
144
+ logger.debug("InfluxDB connection healthy")
145
+ else:
146
+ raise Exception(f"InfluxDB health check failed: {health.message}")
147
+ except Exception as e:
148
+ raise Exception(f"InfluxDB connection test failed: {e}")
149
+
150
+ def _create_data_hash(self, data: Any) -> str:
151
+ """Create SHA-256 hash of input data for deduplication"""
152
+ try:
153
+ if isinstance(data, (dict, list)):
154
+ data_str = json.dumps(data, sort_keys=True)
155
+ else:
156
+ data_str = str(data)
157
+ return hashlib.sha256(data_str.encode()).hexdigest()
158
+ except Exception:
159
+ return None
160
+
161
+ def log_inference_start(
162
+ self,
163
+ request_id: str,
164
+ service_type: str,
165
+ task: str,
166
+ provider: str,
167
+ model_name: str,
168
+ input_data: Any = None,
169
+ session_id: Optional[str] = None,
170
+ user_id: Optional[str] = None,
171
+ client_ip: Optional[str] = None,
172
+ is_streaming: bool = False,
173
+ custom_metadata: Optional[Dict[str, Any]] = None
174
+ ) -> None:
175
+ """
176
+ Log the start of an inference request
177
+ """
178
+ if not self.enabled:
179
+ return
180
+
181
+ try:
182
+ start_time = datetime.now(timezone.utc)
183
+
184
+ # Create data hash for input
185
+ input_hash = None
186
+ input_size = None
187
+ if input_data and self.log_detailed_requests:
188
+ input_hash = self._create_data_hash(input_data)
189
+ try:
190
+ input_size = len(str(input_data).encode('utf-8'))
191
+ except:
192
+ input_size = None
193
+
194
+ # Create InfluxDB point
195
+ point = Point("inference_requests") \
196
+ .tag("service_type", service_type) \
197
+ .tag("task", task) \
198
+ .tag("provider", provider) \
199
+ .tag("model_name", model_name) \
200
+ .tag("status", "started") \
201
+ .field("request_id", request_id) \
202
+ .field("is_streaming", is_streaming) \
203
+ .time(start_time, WritePrecision.MS)
204
+
205
+ # Add optional tags and fields
206
+ if session_id:
207
+ point = point.tag("session_id", session_id)
208
+ if user_id:
209
+ point = point.tag("user_id", user_id)
210
+ if client_ip and not self.log_sensitive_data:
211
+ # Hash IP for privacy
212
+ ip_hash = hashlib.md5(client_ip.encode()).hexdigest()[:8]
213
+ point = point.field("client_ip_hash", ip_hash)
214
+ if input_hash:
215
+ point = point.field("input_data_hash", input_hash)
216
+ if input_size:
217
+ point = point.field("input_size_bytes", input_size)
218
+ if custom_metadata:
219
+ for key, value in custom_metadata.items():
220
+ point = point.field(f"meta_{key}", str(value))
221
+
222
+ # Write to InfluxDB
223
+ self.write_api.write(bucket=self.bucket, org=self.org, record=point)
224
+ logger.debug(f"Logged inference start: {request_id}")
225
+
226
+ except Exception as e:
227
+ logger.error(f"Failed to log inference start: {e}")
228
+
229
+ def log_inference_complete(
230
+ self,
231
+ request_id: str,
232
+ status: str = "completed",
233
+ execution_time_ms: Optional[int] = None,
234
+ input_tokens: Optional[int] = None,
235
+ output_tokens: Optional[int] = None,
236
+ estimated_cost_usd: Optional[float] = None,
237
+ output_data: Any = None,
238
+ stream_chunks_count: Optional[int] = None,
239
+ time_to_first_token_ms: Optional[int] = None,
240
+ error_message: Optional[str] = None,
241
+ error_code: Optional[str] = None,
242
+ cache_hit: bool = False,
243
+ quality_score: Optional[float] = None,
244
+ custom_metadata: Optional[Dict[str, Any]] = None
245
+ ) -> None:
246
+ """
247
+ Log the completion of an inference request
248
+ """
249
+ if not self.enabled:
250
+ return
251
+
252
+ try:
253
+ end_time = datetime.now(timezone.utc)
254
+
255
+ # Calculate output data size
256
+ output_size = None
257
+ if output_data and self.log_detailed_requests:
258
+ try:
259
+ output_size = len(str(output_data).encode('utf-8'))
260
+ except:
261
+ output_size = None
262
+
263
+ # Create InfluxDB point
264
+ point = Point("inference_requests") \
265
+ .tag("status", status) \
266
+ .field("request_id", request_id) \
267
+ .field("cache_hit", cache_hit) \
268
+ .time(end_time, WritePrecision.MS)
269
+
270
+ # Add timing data
271
+ if execution_time_ms is not None:
272
+ point = point.field("execution_time_ms", execution_time_ms)
273
+
274
+ # Add token data
275
+ if input_tokens is not None:
276
+ point = point.field("input_tokens", input_tokens)
277
+ if output_tokens is not None:
278
+ point = point.field("output_tokens", output_tokens)
279
+ if input_tokens and output_tokens:
280
+ point = point.field("total_tokens", input_tokens + output_tokens)
281
+
282
+ # Add cost data
283
+ if estimated_cost_usd is not None:
284
+ point = point.field("estimated_cost_usd", float(estimated_cost_usd))
285
+
286
+ # Add output data size
287
+ if output_size:
288
+ point = point.field("output_size_bytes", output_size)
289
+
290
+ # Add streaming metrics
291
+ if stream_chunks_count is not None:
292
+ point = point.field("stream_chunks_count", stream_chunks_count)
293
+ if time_to_first_token_ms is not None:
294
+ point = point.field("time_to_first_token_ms", time_to_first_token_ms)
295
+
296
+ # Add error information
297
+ if error_message:
298
+ point = point.field("error_message", error_message[:500]) # Limit length
299
+ if error_code:
300
+ point = point.field("error_code", error_code)
301
+
302
+ # Add quality metrics
303
+ if quality_score is not None:
304
+ point = point.field("quality_score", float(quality_score))
305
+
306
+ # Add custom metadata
307
+ if custom_metadata:
308
+ for key, value in custom_metadata.items():
309
+ point = point.field(f"meta_{key}", str(value))
310
+
311
+ # Write to InfluxDB
312
+ self.write_api.write(bucket=self.bucket, org=self.org, record=point)
313
+ logger.debug(f"Logged inference completion: {request_id} ({status})")
314
+
315
+ except Exception as e:
316
+ logger.error(f"Failed to log inference completion: {e}")
317
+
318
+ def log_token_usage(
319
+ self,
320
+ request_id: str,
321
+ provider: str,
322
+ model_name: str,
323
+ prompt_tokens: int,
324
+ completion_tokens: int,
325
+ prompt_cost_usd: Optional[float] = None,
326
+ completion_cost_usd: Optional[float] = None
327
+ ) -> None:
328
+ """
329
+ Log detailed token usage data
330
+ """
331
+ if not self.enabled:
332
+ return
333
+
334
+ try:
335
+ timestamp = datetime.now(timezone.utc)
336
+ total_tokens = prompt_tokens + completion_tokens
337
+ total_cost = (prompt_cost_usd or 0) + (completion_cost_usd or 0)
338
+
339
+ point = Point("token_usage") \
340
+ .tag("provider", provider) \
341
+ .tag("model_name", model_name) \
342
+ .field("request_id", request_id) \
343
+ .field("prompt_tokens", prompt_tokens) \
344
+ .field("completion_tokens", completion_tokens) \
345
+ .field("total_tokens", total_tokens) \
346
+ .time(timestamp, WritePrecision.MS)
347
+
348
+ if prompt_cost_usd is not None:
349
+ point = point.field("prompt_cost_usd", float(prompt_cost_usd))
350
+ if completion_cost_usd is not None:
351
+ point = point.field("completion_cost_usd", float(completion_cost_usd))
352
+ if total_cost > 0:
353
+ point = point.field("total_cost_usd", float(total_cost))
354
+ point = point.field("cost_per_token_usd", float(total_cost / total_tokens))
355
+
356
+ self.write_api.write(bucket=self.bucket, org=self.org, record=point)
357
+ logger.debug(f"Logged token usage: {request_id}")
358
+
359
+ except Exception as e:
360
+ logger.error(f"Failed to log token usage: {e}")
361
+
362
+ def log_error(
363
+ self,
364
+ request_id: str,
365
+ error_type: str,
366
+ error_message: str,
367
+ error_code: Optional[str] = None,
368
+ provider: Optional[str] = None,
369
+ model_name: Optional[str] = None,
370
+ retry_count: int = 0
371
+ ) -> None:
372
+ """
373
+ Log error events
374
+ """
375
+ if not self.enabled:
376
+ return
377
+
378
+ try:
379
+ timestamp = datetime.now(timezone.utc)
380
+
381
+ point = Point("inference_errors") \
382
+ .tag("error_type", error_type) \
383
+ .field("request_id", request_id) \
384
+ .field("error_message", error_message[:500]) \
385
+ .field("retry_count", retry_count) \
386
+ .time(timestamp, WritePrecision.MS)
387
+
388
+ if error_code:
389
+ point = point.field("error_code", error_code)
390
+ if provider:
391
+ point = point.tag("provider", provider)
392
+ if model_name:
393
+ point = point.tag("model_name", model_name)
394
+
395
+ self.write_api.write(bucket=self.bucket, org=self.org, record=point)
396
+ logger.debug(f"Logged error: {request_id} - {error_type}")
397
+
398
+ except Exception as e:
399
+ logger.error(f"Failed to log error: {e}")
400
+
401
+ def get_recent_requests(
402
+ self,
403
+ limit: int = 100,
404
+ hours: int = 24,
405
+ service_type: Optional[str] = None,
406
+ provider: Optional[str] = None,
407
+ status: Optional[str] = None
408
+ ) -> List[Dict[str, Any]]:
409
+ """
410
+ Query recent inference requests
411
+ """
412
+ if not self.enabled:
413
+ return []
414
+
415
+ try:
416
+ # Build query with simpler filtering
417
+ filters = []
418
+ if service_type:
419
+ filters.append(f'r.service_type == "{service_type}"')
420
+ if provider:
421
+ filters.append(f'r.provider == "{provider}"')
422
+ if status:
423
+ filters.append(f'r.status == "{status}"')
424
+
425
+ # Build filter clause
426
+ if filters:
427
+ filter_clause = " and " + " and ".join(filters)
428
+ else:
429
+ filter_clause = ""
430
+
431
+ query = f'''
432
+ from(bucket: "{self.bucket}")
433
+ |> range(start: -{hours}h)
434
+ |> filter(fn: (r) => r._measurement == "inference_requests"{filter_clause})
435
+ |> filter(fn: (r) => r._field == "request_id")
436
+ |> sort(columns: ["_time"], desc: true)
437
+ |> limit(n: {limit})
438
+ '''
439
+
440
+ result = self.query_api.query(org=self.org, query=query)
441
+
442
+ # Process results - get unique request IDs first
443
+ request_ids = []
444
+ for table in result:
445
+ for record in table.records:
446
+ request_id = record.get_value()
447
+ if request_id not in [r.get('request_id') for r in request_ids]:
448
+ request_ids.append({
449
+ 'request_id': request_id,
450
+ 'time': record.get_time(),
451
+ 'service_type': record.values.get('service_type'),
452
+ 'provider': record.values.get('provider'),
453
+ 'model_name': record.values.get('model_name'),
454
+ 'status': record.values.get('status'),
455
+ 'task': record.values.get('task')
456
+ })
457
+
458
+ return request_ids
459
+
460
+ except Exception as e:
461
+ logger.error(f"Failed to query recent requests: {e}")
462
+ return []
463
+
464
+ def get_usage_statistics(
465
+ self,
466
+ hours: int = 24,
467
+ group_by: str = "provider" # "provider", "model_name", "service_type"
468
+ ) -> Dict[str, Any]:
469
+ """
470
+ Get usage statistics and metrics
471
+ """
472
+ if not self.enabled:
473
+ return {}
474
+
475
+ try:
476
+ # Simplified query to count unique request IDs by group
477
+ query = f'''
478
+ from(bucket: "{self.bucket}")
479
+ |> range(start: -{hours}h)
480
+ |> filter(fn: (r) => r._measurement == "inference_requests")
481
+ |> filter(fn: (r) => r._field == "request_id")
482
+ |> group(columns: ["{group_by}"])
483
+ |> count()
484
+ |> yield(name: "request_counts")
485
+ '''
486
+
487
+ result = self.query_api.query(org=self.org, query=query)
488
+
489
+ # Process results into statistics
490
+ stats = {}
491
+ for table in result:
492
+ for record in table.records:
493
+ key = record.values.get(group_by, 'unknown')
494
+ stats[key] = {
495
+ 'total_requests': record.get_value() or 0,
496
+ 'group_by': group_by,
497
+ 'time_range_hours': hours
498
+ }
499
+
500
+ return stats
501
+
502
+ except Exception as e:
503
+ logger.error(f"Failed to get usage statistics: {e}")
504
+ return {}
505
+
506
+ def close(self):
507
+ """Close InfluxDB connection"""
508
+ if self.enabled and hasattr(self, 'client'):
509
+ self.client.close()
510
+
511
+ # Global logger instance
512
+ _inference_logger: Optional[InfluxInferenceLogger] = None
513
+
514
+ def get_inference_logger() -> InfluxInferenceLogger:
515
+ """Get or create global inference logger instance"""
516
+ global _inference_logger
517
+ if _inference_logger is None:
518
+ _inference_logger = InfluxInferenceLogger()
519
+ return _inference_logger
520
+
521
+ def generate_request_id() -> str:
522
+ """Generate unique request ID"""
523
+ return f"req_{uuid.uuid4().hex[:12]}"
@@ -0,0 +1,160 @@
1
+ """
2
+ Centralized Logging Configuration with Loki Integration for ISA Model
3
+
4
+ This module provides centralized application logging with Loki support,
5
+ complementing the existing InfluxDB inference logging system.
6
+
7
+ Architecture:
8
+ - Loki: General application logs (INFO, WARNING, ERROR, DEBUG)
9
+ - InfluxDB: Inference metrics and performance data (tokens, costs, timing)
10
+
11
+ Usage:
12
+ from isa_model.core.logging import app_logger, api_logger
13
+
14
+ app_logger.info("Service starting...")
15
+ api_logger.error(f"Request failed: {error}", exc_info=True)
16
+ """
17
+
18
+ import logging
19
+ import sys
20
+ import os
21
+ from typing import Optional
22
+
23
+
24
+ def setup_logger(
25
+ name: str,
26
+ level: Optional[str] = None,
27
+ format_str: Optional[str] = None
28
+ ) -> logging.Logger:
29
+ """
30
+ Setup logger with centralized Loki integration
31
+
32
+ Args:
33
+ name: Logger name (e.g., "ISAModel.API")
34
+ level: Log level (DEBUG, INFO, WARNING, ERROR, CRITICAL)
35
+ format_str: Log format string (optional)
36
+
37
+ Returns:
38
+ Configured logger instance
39
+
40
+ Example:
41
+ >>> from isa_model.core.logging import setup_logger
42
+ >>> my_logger = setup_logger("ISAModel.MyModule")
43
+ >>> my_logger.info("Processing started")
44
+ """
45
+ logger = logging.getLogger(name)
46
+
47
+ # Avoid duplicate handlers
48
+ if logger.handlers:
49
+ return logger
50
+
51
+ # Get configuration from environment
52
+ log_level_env = os.getenv("LOG_LEVEL", "INFO").upper()
53
+ log_format_env = os.getenv(
54
+ "LOG_FORMAT",
55
+ "%(asctime)s - %(name)s - %(levelname)s - %(message)s"
56
+ )
57
+ from ..config.config_manager import ConfigManager
58
+ config_manager = ConfigManager()
59
+ # Use Consul discovery for Loki URL with fallback
60
+ loki_url = os.getenv("LOKI_URL", config_manager.get_loki_url())
61
+ loki_enabled = os.getenv("LOKI_ENABLED", "true").lower() == "true"
62
+
63
+ # Set log level
64
+ final_level = (level or log_level_env).upper()
65
+ logger.setLevel(getattr(logging, final_level, logging.INFO))
66
+
67
+ # Disable propagation to prevent duplicate logs
68
+ logger.propagate = False
69
+
70
+ # Log format
71
+ formatter = logging.Formatter(format_str or log_format_env)
72
+
73
+ # 1. Console Handler (for local development and debugging)
74
+ console_handler = logging.StreamHandler(sys.stdout)
75
+ console_handler.setFormatter(formatter)
76
+ logger.addHandler(console_handler)
77
+
78
+ # 2. Loki Handler (for centralized logging)
79
+ if loki_enabled:
80
+ try:
81
+ from logging_loki import LokiHandler
82
+
83
+ # Extract service name and logger component
84
+ # e.g., "ISAModel.API" -> service="isa_model", logger="API"
85
+ service_name = "isa_model"
86
+ logger_component = name.replace("ISAModel.", "").replace("ISAModel", "main")
87
+
88
+ # Labels for Loki (used for filtering and searching)
89
+ # Use service_name to match other services (mcp, agent, etc.)
90
+ loki_labels = {
91
+ "service_name": "model", # Use "model" to match service naming convention
92
+ "logger": logger_component,
93
+ "environment": os.getenv("ENVIRONMENT", "development"),
94
+ "job": "isa_model_service"
95
+ }
96
+
97
+ # Create Loki handler
98
+ loki_handler = LokiHandler(
99
+ url=f"{loki_url}/loki/api/v1/push",
100
+ tags=loki_labels,
101
+ version="1",
102
+ )
103
+
104
+ # Only send INFO and above to Loki (reduce network traffic)
105
+ loki_handler.setLevel(logging.INFO)
106
+
107
+ logger.addHandler(loki_handler)
108
+
109
+ except ImportError:
110
+ # Silently fall back to console-only logging during initialization
111
+ pass
112
+ except Exception as e:
113
+ # Loki unavailable - silently fall back to console
114
+ pass
115
+
116
+ return logger
117
+
118
+
119
+ # Create application loggers
120
+ # Main application logger
121
+ app_logger = setup_logger("ISAModel")
122
+
123
+ # API/Server logger
124
+ api_logger = setup_logger("ISAModel.API")
125
+
126
+ # Client logger
127
+ client_logger = setup_logger("ISAModel.Client")
128
+
129
+ # Inference logger (application-level, not metrics)
130
+ inference_logger = setup_logger("ISAModel.Inference")
131
+
132
+ # Training logger
133
+ training_logger = setup_logger("ISAModel.Training")
134
+
135
+ # Evaluation logger
136
+ eval_logger = setup_logger("ISAModel.Evaluation")
137
+
138
+ # Database logger
139
+ db_logger = setup_logger("ISAModel.Database")
140
+
141
+ # Deployment logger
142
+ deployment_logger = setup_logger("ISAModel.Deployment")
143
+
144
+ # Model manager logger
145
+ model_logger = setup_logger("ISAModel.Models")
146
+
147
+
148
+ # Export all loggers
149
+ __all__ = [
150
+ 'setup_logger',
151
+ 'app_logger',
152
+ 'api_logger',
153
+ 'client_logger',
154
+ 'inference_logger',
155
+ 'training_logger',
156
+ 'eval_logger',
157
+ 'db_logger',
158
+ 'deployment_logger',
159
+ 'model_logger',
160
+ ]