isa-model 0.4.0__py3-none-any.whl → 0.4.4__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (189) hide show
  1. isa_model/client.py +466 -43
  2. isa_model/core/cache/redis_cache.py +12 -3
  3. isa_model/core/config/config_manager.py +230 -3
  4. isa_model/core/config.py +90 -0
  5. isa_model/core/database/direct_db_client.py +114 -0
  6. isa_model/core/database/migration_manager.py +563 -0
  7. isa_model/core/database/migrations.py +21 -1
  8. isa_model/core/database/supabase_client.py +154 -19
  9. isa_model/core/dependencies.py +316 -0
  10. isa_model/core/discovery/__init__.py +19 -0
  11. isa_model/core/discovery/consul_discovery.py +190 -0
  12. isa_model/core/logging/__init__.py +54 -0
  13. isa_model/core/logging/influx_logger.py +523 -0
  14. isa_model/core/logging/loki_logger.py +160 -0
  15. isa_model/core/models/__init__.py +27 -18
  16. isa_model/core/models/config_models.py +625 -0
  17. isa_model/core/models/deployment_billing_tracker.py +430 -0
  18. isa_model/core/models/model_manager.py +35 -80
  19. isa_model/core/models/model_metadata.py +690 -0
  20. isa_model/core/models/model_repo.py +174 -18
  21. isa_model/core/models/system_models.py +857 -0
  22. isa_model/core/repositories/__init__.py +9 -0
  23. isa_model/core/repositories/config_repository.py +912 -0
  24. isa_model/core/services/intelligent_model_selector.py +399 -21
  25. isa_model/core/types.py +1 -0
  26. isa_model/deployment/__init__.py +5 -48
  27. isa_model/deployment/core/__init__.py +2 -31
  28. isa_model/deployment/core/deployment_manager.py +1278 -370
  29. isa_model/deployment/modal/__init__.py +8 -0
  30. isa_model/deployment/modal/config.py +136 -0
  31. isa_model/deployment/{services/auto_hf_modal_deployer.py → modal/deployer.py} +1 -1
  32. isa_model/deployment/modal/services/__init__.py +3 -0
  33. isa_model/deployment/modal/services/audio/__init__.py +1 -0
  34. isa_model/deployment/modal/services/embedding/__init__.py +1 -0
  35. isa_model/deployment/modal/services/llm/__init__.py +1 -0
  36. isa_model/deployment/modal/services/llm/isa_llm_service.py +424 -0
  37. isa_model/deployment/modal/services/video/__init__.py +1 -0
  38. isa_model/deployment/modal/services/vision/__init__.py +1 -0
  39. isa_model/deployment/models/org-org-acme-corp-tenant-a-service-llm-20250825-225822/tenant-a-service_modal_service.py +48 -0
  40. isa_model/deployment/models/org-test-org-123-prefix-test-service-llm-20250825-225822/prefix-test-service_modal_service.py +48 -0
  41. isa_model/deployment/models/test-llm-service-llm-20250825-204442/test-llm-service_modal_service.py +48 -0
  42. isa_model/deployment/models/test-monitoring-gpt2-llm-20250825-212906/test-monitoring-gpt2_modal_service.py +48 -0
  43. isa_model/deployment/models/test-monitoring-gpt2-llm-20250825-213009/test-monitoring-gpt2_modal_service.py +48 -0
  44. isa_model/deployment/storage/__init__.py +5 -0
  45. isa_model/deployment/storage/deployment_repository.py +824 -0
  46. isa_model/deployment/triton/__init__.py +10 -0
  47. isa_model/deployment/triton/config.py +196 -0
  48. isa_model/deployment/triton/configs/__init__.py +1 -0
  49. isa_model/deployment/triton/provider.py +512 -0
  50. isa_model/deployment/triton/scripts/__init__.py +1 -0
  51. isa_model/deployment/triton/templates/__init__.py +1 -0
  52. isa_model/inference/__init__.py +47 -1
  53. isa_model/inference/ai_factory.py +137 -10
  54. isa_model/inference/legacy_services/__init__.py +21 -0
  55. isa_model/inference/legacy_services/model_evaluation.py +637 -0
  56. isa_model/inference/legacy_services/model_service.py +573 -0
  57. isa_model/inference/legacy_services/model_serving.py +717 -0
  58. isa_model/inference/legacy_services/model_training.py +561 -0
  59. isa_model/inference/models/__init__.py +21 -0
  60. isa_model/inference/models/inference_config.py +551 -0
  61. isa_model/inference/models/inference_record.py +675 -0
  62. isa_model/inference/models/performance_models.py +714 -0
  63. isa_model/inference/repositories/__init__.py +9 -0
  64. isa_model/inference/repositories/inference_repository.py +828 -0
  65. isa_model/inference/services/audio/base_stt_service.py +184 -11
  66. isa_model/inference/services/audio/openai_stt_service.py +22 -6
  67. isa_model/inference/services/embedding/ollama_embed_service.py +15 -3
  68. isa_model/inference/services/embedding/resilient_embed_service.py +285 -0
  69. isa_model/inference/services/llm/__init__.py +10 -2
  70. isa_model/inference/services/llm/base_llm_service.py +335 -24
  71. isa_model/inference/services/llm/cerebras_llm_service.py +628 -0
  72. isa_model/inference/services/llm/helpers/llm_adapter.py +9 -4
  73. isa_model/inference/services/llm/helpers/llm_prompts.py +342 -0
  74. isa_model/inference/services/llm/helpers/llm_utils.py +321 -23
  75. isa_model/inference/services/llm/huggingface_llm_service.py +581 -0
  76. isa_model/inference/services/llm/ollama_llm_service.py +9 -2
  77. isa_model/inference/services/llm/openai_llm_service.py +33 -16
  78. isa_model/inference/services/llm/yyds_llm_service.py +8 -2
  79. isa_model/inference/services/vision/__init__.py +22 -1
  80. isa_model/inference/services/vision/helpers/image_utils.py +8 -5
  81. isa_model/inference/services/vision/isa_vision_service.py +65 -4
  82. isa_model/inference/services/vision/openai_vision_service.py +19 -10
  83. isa_model/inference/services/vision/vgg16_vision_service.py +257 -0
  84. isa_model/serving/api/cache_manager.py +245 -0
  85. isa_model/serving/api/dependencies/__init__.py +1 -0
  86. isa_model/serving/api/dependencies/auth.py +194 -0
  87. isa_model/serving/api/dependencies/database.py +139 -0
  88. isa_model/serving/api/error_handlers.py +284 -0
  89. isa_model/serving/api/fastapi_server.py +172 -22
  90. isa_model/serving/api/middleware/auth.py +8 -2
  91. isa_model/serving/api/middleware/security.py +23 -33
  92. isa_model/serving/api/middleware/tenant_context.py +414 -0
  93. isa_model/serving/api/routes/analytics.py +4 -1
  94. isa_model/serving/api/routes/config.py +645 -0
  95. isa_model/serving/api/routes/deployment_billing.py +315 -0
  96. isa_model/serving/api/routes/deployments.py +138 -2
  97. isa_model/serving/api/routes/gpu_gateway.py +440 -0
  98. isa_model/serving/api/routes/health.py +32 -12
  99. isa_model/serving/api/routes/inference_monitoring.py +486 -0
  100. isa_model/serving/api/routes/local_deployments.py +448 -0
  101. isa_model/serving/api/routes/tenants.py +575 -0
  102. isa_model/serving/api/routes/unified.py +680 -18
  103. isa_model/serving/api/routes/webhooks.py +479 -0
  104. isa_model/serving/api/startup.py +68 -54
  105. isa_model/utils/gpu_utils.py +311 -0
  106. {isa_model-0.4.0.dist-info → isa_model-0.4.4.dist-info}/METADATA +71 -24
  107. isa_model-0.4.4.dist-info/RECORD +180 -0
  108. isa_model/core/security/secrets.py +0 -358
  109. isa_model/core/storage/hf_storage.py +0 -419
  110. isa_model/core/storage/minio_storage.py +0 -0
  111. isa_model/deployment/cloud/__init__.py +0 -9
  112. isa_model/deployment/cloud/modal/__init__.py +0 -10
  113. isa_model/deployment/core/deployment_config.py +0 -356
  114. isa_model/deployment/core/isa_deployment_service.py +0 -401
  115. isa_model/deployment/gpu_int8_ds8/app/server.py +0 -66
  116. isa_model/deployment/gpu_int8_ds8/scripts/test_client.py +0 -43
  117. isa_model/deployment/gpu_int8_ds8/scripts/test_client_os.py +0 -35
  118. isa_model/deployment/runtime/deployed_service.py +0 -338
  119. isa_model/deployment/services/__init__.py +0 -9
  120. isa_model/deployment/services/auto_deploy_vision_service.py +0 -538
  121. isa_model/deployment/services/model_service.py +0 -332
  122. isa_model/deployment/services/service_monitor.py +0 -356
  123. isa_model/deployment/services/service_registry.py +0 -527
  124. isa_model/eval/__init__.py +0 -92
  125. isa_model/eval/benchmarks/__init__.py +0 -27
  126. isa_model/eval/benchmarks/multimodal_datasets.py +0 -460
  127. isa_model/eval/benchmarks.py +0 -701
  128. isa_model/eval/config/__init__.py +0 -10
  129. isa_model/eval/config/evaluation_config.py +0 -108
  130. isa_model/eval/evaluators/__init__.py +0 -24
  131. isa_model/eval/evaluators/audio_evaluator.py +0 -727
  132. isa_model/eval/evaluators/base_evaluator.py +0 -503
  133. isa_model/eval/evaluators/embedding_evaluator.py +0 -742
  134. isa_model/eval/evaluators/llm_evaluator.py +0 -472
  135. isa_model/eval/evaluators/vision_evaluator.py +0 -564
  136. isa_model/eval/example_evaluation.py +0 -395
  137. isa_model/eval/factory.py +0 -798
  138. isa_model/eval/infrastructure/__init__.py +0 -24
  139. isa_model/eval/infrastructure/experiment_tracker.py +0 -466
  140. isa_model/eval/isa_benchmarks.py +0 -700
  141. isa_model/eval/isa_integration.py +0 -582
  142. isa_model/eval/metrics.py +0 -951
  143. isa_model/eval/tests/unit/test_basic.py +0 -396
  144. isa_model/serving/api/routes/evaluations.py +0 -579
  145. isa_model/training/__init__.py +0 -168
  146. isa_model/training/annotation/annotation_schema.py +0 -47
  147. isa_model/training/annotation/processors/annotation_processor.py +0 -126
  148. isa_model/training/annotation/storage/dataset_manager.py +0 -131
  149. isa_model/training/annotation/storage/dataset_schema.py +0 -44
  150. isa_model/training/annotation/tests/test_annotation_flow.py +0 -109
  151. isa_model/training/annotation/tests/test_minio copy.py +0 -113
  152. isa_model/training/annotation/tests/test_minio_upload.py +0 -43
  153. isa_model/training/annotation/views/annotation_controller.py +0 -158
  154. isa_model/training/cloud/__init__.py +0 -22
  155. isa_model/training/cloud/job_orchestrator.py +0 -402
  156. isa_model/training/cloud/runpod_trainer.py +0 -454
  157. isa_model/training/cloud/storage_manager.py +0 -482
  158. isa_model/training/core/__init__.py +0 -26
  159. isa_model/training/core/config.py +0 -181
  160. isa_model/training/core/dataset.py +0 -222
  161. isa_model/training/core/trainer.py +0 -720
  162. isa_model/training/core/utils.py +0 -213
  163. isa_model/training/examples/intelligent_training_example.py +0 -281
  164. isa_model/training/factory.py +0 -424
  165. isa_model/training/intelligent/__init__.py +0 -25
  166. isa_model/training/intelligent/decision_engine.py +0 -643
  167. isa_model/training/intelligent/intelligent_factory.py +0 -888
  168. isa_model/training/intelligent/knowledge_base.py +0 -751
  169. isa_model/training/intelligent/resource_optimizer.py +0 -839
  170. isa_model/training/intelligent/task_classifier.py +0 -576
  171. isa_model/training/storage/__init__.py +0 -24
  172. isa_model/training/storage/core_integration.py +0 -439
  173. isa_model/training/storage/training_repository.py +0 -552
  174. isa_model/training/storage/training_storage.py +0 -628
  175. isa_model-0.4.0.dist-info/RECORD +0 -182
  176. /isa_model/deployment/{cloud/modal → modal/services/audio}/isa_audio_chatTTS_service.py +0 -0
  177. /isa_model/deployment/{cloud/modal → modal/services/audio}/isa_audio_fish_service.py +0 -0
  178. /isa_model/deployment/{cloud/modal → modal/services/audio}/isa_audio_openvoice_service.py +0 -0
  179. /isa_model/deployment/{cloud/modal → modal/services/audio}/isa_audio_service_v2.py +0 -0
  180. /isa_model/deployment/{cloud/modal → modal/services/embedding}/isa_embed_rerank_service.py +0 -0
  181. /isa_model/deployment/{cloud/modal → modal/services/video}/isa_video_hunyuan_service.py +0 -0
  182. /isa_model/deployment/{cloud/modal → modal/services/vision}/isa_vision_ocr_service.py +0 -0
  183. /isa_model/deployment/{cloud/modal → modal/services/vision}/isa_vision_qwen25_service.py +0 -0
  184. /isa_model/deployment/{cloud/modal → modal/services/vision}/isa_vision_table_service.py +0 -0
  185. /isa_model/deployment/{cloud/modal → modal/services/vision}/isa_vision_ui_service.py +0 -0
  186. /isa_model/deployment/{cloud/modal → modal/services/vision}/isa_vision_ui_service_optimized.py +0 -0
  187. /isa_model/deployment/{services → modal/services/vision}/simple_auto_deploy_vision_service.py +0 -0
  188. {isa_model-0.4.0.dist-info → isa_model-0.4.4.dist-info}/WHEEL +0 -0
  189. {isa_model-0.4.0.dist-info → isa_model-0.4.4.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,523 @@
1
+ """
2
+ InfluxDB-based Inference Logging System for ISA Model
3
+
4
+ This module provides comprehensive logging for model inference requests,
5
+ optimized for time-series analysis and monitoring.
6
+
7
+ Data Model:
8
+ - measurement: 'inference_requests' (main table)
9
+ - tags: indexed fields for fast queries (provider, model, service_type, etc.)
10
+ - fields: numerical and text data (tokens, costs, response_time, etc.)
11
+ - timestamp: automatic time-based partitioning
12
+
13
+ Key Features:
14
+ - Automatic data retention (30 days default)
15
+ - Cost-effective storage with compression
16
+ - Real-time monitoring capabilities
17
+ - Aggregated metrics generation
18
+ """
19
+
20
+ import os
21
+ import json
22
+ import hashlib
23
+ import logging
24
+ from datetime import datetime, timezone
25
+ from typing import Dict, Any, Optional, List, Union
26
+ from dataclasses import dataclass, field
27
+ from influxdb_client import InfluxDBClient, Point, WritePrecision
28
+ from influxdb_client.client.write_api import SYNCHRONOUS
29
+ import uuid
30
+
31
+ logger = logging.getLogger(__name__)
32
+
33
+ @dataclass
34
+ class InferenceLogEntry:
35
+ """
36
+ Structure for inference log data
37
+ """
38
+ # Required fields
39
+ request_id: str
40
+ service_type: str # 'text', 'vision', 'audio', 'image', 'embedding'
41
+ task: str # 'chat', 'analyze_image', 'generate_speech', etc.
42
+ provider: str # 'openai', 'replicate', 'anthropic', etc.
43
+ model_name: str # Actual model used
44
+ status: str # 'completed', 'failed', 'timeout'
45
+
46
+ # Timing data
47
+ start_time: datetime
48
+ end_time: Optional[datetime] = None
49
+ execution_time_ms: Optional[int] = None
50
+ queue_time_ms: Optional[int] = None
51
+
52
+ # Token and usage data
53
+ input_tokens: Optional[int] = None
54
+ output_tokens: Optional[int] = None
55
+ total_tokens: Optional[int] = None
56
+
57
+ # Cost data
58
+ estimated_cost_usd: Optional[float] = None
59
+ actual_cost_usd: Optional[float] = None
60
+ cost_breakdown: Optional[Dict[str, Any]] = None
61
+
62
+ # Request/response data (optional, for debugging)
63
+ input_data_hash: Optional[str] = None
64
+ input_size_bytes: Optional[int] = None
65
+ output_size_bytes: Optional[int] = None
66
+
67
+ # Streaming data
68
+ is_streaming: bool = False
69
+ stream_start_time: Optional[datetime] = None
70
+ stream_chunks_count: Optional[int] = None
71
+ time_to_first_token_ms: Optional[int] = None
72
+
73
+ # Error information
74
+ error_message: Optional[str] = None
75
+ error_code: Optional[str] = None
76
+
77
+ # Context and metadata
78
+ session_id: Optional[str] = None
79
+ user_id: Optional[str] = None
80
+ client_ip: Optional[str] = None
81
+ model_version: Optional[str] = None
82
+ cache_hit: bool = False
83
+
84
+ # Quality metrics
85
+ quality_score: Optional[float] = None
86
+ user_feedback: Optional[int] = None # 1-5 rating
87
+
88
+ # Additional metadata
89
+ custom_metadata: Dict[str, Any] = field(default_factory=dict)
90
+
91
+ class InfluxInferenceLogger:
92
+ """
93
+ InfluxDB-based logger for model inference activities
94
+
95
+ Features:
96
+ - Time-series storage optimized for metrics
97
+ - Automatic data retention and compression
98
+ - Real-time query capabilities
99
+ - Cost tracking and analysis
100
+ - Performance monitoring
101
+ """
102
+
103
+ def __init__(self):
104
+ """Initialize InfluxDB connection"""
105
+ self.enabled = os.getenv('ENABLE_INFERENCE_LOGGING', 'false').lower() == 'true'
106
+
107
+ # Logging configuration - always set these regardless of enabled status
108
+ self.retention_days = int(os.getenv('LOG_RETENTION_DAYS', '30'))
109
+ self.log_detailed_requests = os.getenv('LOG_DETAILED_REQUESTS', 'true').lower() == 'true'
110
+ self.log_sensitive_data = os.getenv('LOG_SENSITIVE_DATA', 'false').lower() == 'true'
111
+
112
+ if not self.enabled:
113
+ logger.info("Inference logging disabled via ENABLE_INFERENCE_LOGGING")
114
+ return
115
+
116
+ # InfluxDB configuration
117
+ from ..config.config_manager import ConfigManager
118
+ config_manager = ConfigManager()
119
+ # Use Consul discovery for InfluxDB URL with fallback
120
+ self.url = os.getenv('INFLUXDB_URL', config_manager.get_influxdb_url())
121
+ self.token = os.getenv('INFLUXDB_TOKEN', 'dev-token-isa-model-12345')
122
+ self.org = os.getenv('INFLUXDB_ORG', 'isa-model')
123
+ self.bucket = os.getenv('INFLUXDB_BUCKET', 'isa-model-logs')
124
+
125
+ try:
126
+ # Initialize InfluxDB client
127
+ self.client = InfluxDBClient(url=self.url, token=self.token, org=self.org)
128
+ self.write_api = self.client.write_api(write_options=SYNCHRONOUS)
129
+ self.query_api = self.client.query_api()
130
+
131
+ # Test connection
132
+ self._test_connection()
133
+ logger.info(f"InfluxDB inference logger initialized: {self.url}")
134
+
135
+ except Exception as e:
136
+ logger.error(f"Failed to initialize InfluxDB logger: {e}")
137
+ self.enabled = False
138
+
139
+ def _test_connection(self):
140
+ """Test InfluxDB connection"""
141
+ try:
142
+ health = self.client.health()
143
+ if health.status == "pass":
144
+ logger.debug("InfluxDB connection healthy")
145
+ else:
146
+ raise Exception(f"InfluxDB health check failed: {health.message}")
147
+ except Exception as e:
148
+ raise Exception(f"InfluxDB connection test failed: {e}")
149
+
150
+ def _create_data_hash(self, data: Any) -> str:
151
+ """Create SHA-256 hash of input data for deduplication"""
152
+ try:
153
+ if isinstance(data, (dict, list)):
154
+ data_str = json.dumps(data, sort_keys=True)
155
+ else:
156
+ data_str = str(data)
157
+ return hashlib.sha256(data_str.encode()).hexdigest()
158
+ except Exception:
159
+ return None
160
+
161
+ def log_inference_start(
162
+ self,
163
+ request_id: str,
164
+ service_type: str,
165
+ task: str,
166
+ provider: str,
167
+ model_name: str,
168
+ input_data: Any = None,
169
+ session_id: Optional[str] = None,
170
+ user_id: Optional[str] = None,
171
+ client_ip: Optional[str] = None,
172
+ is_streaming: bool = False,
173
+ custom_metadata: Optional[Dict[str, Any]] = None
174
+ ) -> None:
175
+ """
176
+ Log the start of an inference request
177
+ """
178
+ if not self.enabled:
179
+ return
180
+
181
+ try:
182
+ start_time = datetime.now(timezone.utc)
183
+
184
+ # Create data hash for input
185
+ input_hash = None
186
+ input_size = None
187
+ if input_data and self.log_detailed_requests:
188
+ input_hash = self._create_data_hash(input_data)
189
+ try:
190
+ input_size = len(str(input_data).encode('utf-8'))
191
+ except:
192
+ input_size = None
193
+
194
+ # Create InfluxDB point
195
+ point = Point("inference_requests") \
196
+ .tag("service_type", service_type) \
197
+ .tag("task", task) \
198
+ .tag("provider", provider) \
199
+ .tag("model_name", model_name) \
200
+ .tag("status", "started") \
201
+ .field("request_id", request_id) \
202
+ .field("is_streaming", is_streaming) \
203
+ .time(start_time, WritePrecision.MS)
204
+
205
+ # Add optional tags and fields
206
+ if session_id:
207
+ point = point.tag("session_id", session_id)
208
+ if user_id:
209
+ point = point.tag("user_id", user_id)
210
+ if client_ip and not self.log_sensitive_data:
211
+ # Hash IP for privacy
212
+ ip_hash = hashlib.md5(client_ip.encode()).hexdigest()[:8]
213
+ point = point.field("client_ip_hash", ip_hash)
214
+ if input_hash:
215
+ point = point.field("input_data_hash", input_hash)
216
+ if input_size:
217
+ point = point.field("input_size_bytes", input_size)
218
+ if custom_metadata:
219
+ for key, value in custom_metadata.items():
220
+ point = point.field(f"meta_{key}", str(value))
221
+
222
+ # Write to InfluxDB
223
+ self.write_api.write(bucket=self.bucket, org=self.org, record=point)
224
+ logger.debug(f"Logged inference start: {request_id}")
225
+
226
+ except Exception as e:
227
+ logger.error(f"Failed to log inference start: {e}")
228
+
229
+ def log_inference_complete(
230
+ self,
231
+ request_id: str,
232
+ status: str = "completed",
233
+ execution_time_ms: Optional[int] = None,
234
+ input_tokens: Optional[int] = None,
235
+ output_tokens: Optional[int] = None,
236
+ estimated_cost_usd: Optional[float] = None,
237
+ output_data: Any = None,
238
+ stream_chunks_count: Optional[int] = None,
239
+ time_to_first_token_ms: Optional[int] = None,
240
+ error_message: Optional[str] = None,
241
+ error_code: Optional[str] = None,
242
+ cache_hit: bool = False,
243
+ quality_score: Optional[float] = None,
244
+ custom_metadata: Optional[Dict[str, Any]] = None
245
+ ) -> None:
246
+ """
247
+ Log the completion of an inference request
248
+ """
249
+ if not self.enabled:
250
+ return
251
+
252
+ try:
253
+ end_time = datetime.now(timezone.utc)
254
+
255
+ # Calculate output data size
256
+ output_size = None
257
+ if output_data and self.log_detailed_requests:
258
+ try:
259
+ output_size = len(str(output_data).encode('utf-8'))
260
+ except:
261
+ output_size = None
262
+
263
+ # Create InfluxDB point
264
+ point = Point("inference_requests") \
265
+ .tag("status", status) \
266
+ .field("request_id", request_id) \
267
+ .field("cache_hit", cache_hit) \
268
+ .time(end_time, WritePrecision.MS)
269
+
270
+ # Add timing data
271
+ if execution_time_ms is not None:
272
+ point = point.field("execution_time_ms", execution_time_ms)
273
+
274
+ # Add token data
275
+ if input_tokens is not None:
276
+ point = point.field("input_tokens", input_tokens)
277
+ if output_tokens is not None:
278
+ point = point.field("output_tokens", output_tokens)
279
+ if input_tokens and output_tokens:
280
+ point = point.field("total_tokens", input_tokens + output_tokens)
281
+
282
+ # Add cost data
283
+ if estimated_cost_usd is not None:
284
+ point = point.field("estimated_cost_usd", float(estimated_cost_usd))
285
+
286
+ # Add output data size
287
+ if output_size:
288
+ point = point.field("output_size_bytes", output_size)
289
+
290
+ # Add streaming metrics
291
+ if stream_chunks_count is not None:
292
+ point = point.field("stream_chunks_count", stream_chunks_count)
293
+ if time_to_first_token_ms is not None:
294
+ point = point.field("time_to_first_token_ms", time_to_first_token_ms)
295
+
296
+ # Add error information
297
+ if error_message:
298
+ point = point.field("error_message", error_message[:500]) # Limit length
299
+ if error_code:
300
+ point = point.field("error_code", error_code)
301
+
302
+ # Add quality metrics
303
+ if quality_score is not None:
304
+ point = point.field("quality_score", float(quality_score))
305
+
306
+ # Add custom metadata
307
+ if custom_metadata:
308
+ for key, value in custom_metadata.items():
309
+ point = point.field(f"meta_{key}", str(value))
310
+
311
+ # Write to InfluxDB
312
+ self.write_api.write(bucket=self.bucket, org=self.org, record=point)
313
+ logger.debug(f"Logged inference completion: {request_id} ({status})")
314
+
315
+ except Exception as e:
316
+ logger.error(f"Failed to log inference completion: {e}")
317
+
318
+ def log_token_usage(
319
+ self,
320
+ request_id: str,
321
+ provider: str,
322
+ model_name: str,
323
+ prompt_tokens: int,
324
+ completion_tokens: int,
325
+ prompt_cost_usd: Optional[float] = None,
326
+ completion_cost_usd: Optional[float] = None
327
+ ) -> None:
328
+ """
329
+ Log detailed token usage data
330
+ """
331
+ if not self.enabled:
332
+ return
333
+
334
+ try:
335
+ timestamp = datetime.now(timezone.utc)
336
+ total_tokens = prompt_tokens + completion_tokens
337
+ total_cost = (prompt_cost_usd or 0) + (completion_cost_usd or 0)
338
+
339
+ point = Point("token_usage") \
340
+ .tag("provider", provider) \
341
+ .tag("model_name", model_name) \
342
+ .field("request_id", request_id) \
343
+ .field("prompt_tokens", prompt_tokens) \
344
+ .field("completion_tokens", completion_tokens) \
345
+ .field("total_tokens", total_tokens) \
346
+ .time(timestamp, WritePrecision.MS)
347
+
348
+ if prompt_cost_usd is not None:
349
+ point = point.field("prompt_cost_usd", float(prompt_cost_usd))
350
+ if completion_cost_usd is not None:
351
+ point = point.field("completion_cost_usd", float(completion_cost_usd))
352
+ if total_cost > 0:
353
+ point = point.field("total_cost_usd", float(total_cost))
354
+ point = point.field("cost_per_token_usd", float(total_cost / total_tokens))
355
+
356
+ self.write_api.write(bucket=self.bucket, org=self.org, record=point)
357
+ logger.debug(f"Logged token usage: {request_id}")
358
+
359
+ except Exception as e:
360
+ logger.error(f"Failed to log token usage: {e}")
361
+
362
+ def log_error(
363
+ self,
364
+ request_id: str,
365
+ error_type: str,
366
+ error_message: str,
367
+ error_code: Optional[str] = None,
368
+ provider: Optional[str] = None,
369
+ model_name: Optional[str] = None,
370
+ retry_count: int = 0
371
+ ) -> None:
372
+ """
373
+ Log error events
374
+ """
375
+ if not self.enabled:
376
+ return
377
+
378
+ try:
379
+ timestamp = datetime.now(timezone.utc)
380
+
381
+ point = Point("inference_errors") \
382
+ .tag("error_type", error_type) \
383
+ .field("request_id", request_id) \
384
+ .field("error_message", error_message[:500]) \
385
+ .field("retry_count", retry_count) \
386
+ .time(timestamp, WritePrecision.MS)
387
+
388
+ if error_code:
389
+ point = point.field("error_code", error_code)
390
+ if provider:
391
+ point = point.tag("provider", provider)
392
+ if model_name:
393
+ point = point.tag("model_name", model_name)
394
+
395
+ self.write_api.write(bucket=self.bucket, org=self.org, record=point)
396
+ logger.debug(f"Logged error: {request_id} - {error_type}")
397
+
398
+ except Exception as e:
399
+ logger.error(f"Failed to log error: {e}")
400
+
401
+ def get_recent_requests(
402
+ self,
403
+ limit: int = 100,
404
+ hours: int = 24,
405
+ service_type: Optional[str] = None,
406
+ provider: Optional[str] = None,
407
+ status: Optional[str] = None
408
+ ) -> List[Dict[str, Any]]:
409
+ """
410
+ Query recent inference requests
411
+ """
412
+ if not self.enabled:
413
+ return []
414
+
415
+ try:
416
+ # Build query with simpler filtering
417
+ filters = []
418
+ if service_type:
419
+ filters.append(f'r.service_type == "{service_type}"')
420
+ if provider:
421
+ filters.append(f'r.provider == "{provider}"')
422
+ if status:
423
+ filters.append(f'r.status == "{status}"')
424
+
425
+ # Build filter clause
426
+ if filters:
427
+ filter_clause = " and " + " and ".join(filters)
428
+ else:
429
+ filter_clause = ""
430
+
431
+ query = f'''
432
+ from(bucket: "{self.bucket}")
433
+ |> range(start: -{hours}h)
434
+ |> filter(fn: (r) => r._measurement == "inference_requests"{filter_clause})
435
+ |> filter(fn: (r) => r._field == "request_id")
436
+ |> sort(columns: ["_time"], desc: true)
437
+ |> limit(n: {limit})
438
+ '''
439
+
440
+ result = self.query_api.query(org=self.org, query=query)
441
+
442
+ # Process results - get unique request IDs first
443
+ request_ids = []
444
+ for table in result:
445
+ for record in table.records:
446
+ request_id = record.get_value()
447
+ if request_id not in [r.get('request_id') for r in request_ids]:
448
+ request_ids.append({
449
+ 'request_id': request_id,
450
+ 'time': record.get_time(),
451
+ 'service_type': record.values.get('service_type'),
452
+ 'provider': record.values.get('provider'),
453
+ 'model_name': record.values.get('model_name'),
454
+ 'status': record.values.get('status'),
455
+ 'task': record.values.get('task')
456
+ })
457
+
458
+ return request_ids
459
+
460
+ except Exception as e:
461
+ logger.error(f"Failed to query recent requests: {e}")
462
+ return []
463
+
464
+ def get_usage_statistics(
465
+ self,
466
+ hours: int = 24,
467
+ group_by: str = "provider" # "provider", "model_name", "service_type"
468
+ ) -> Dict[str, Any]:
469
+ """
470
+ Get usage statistics and metrics
471
+ """
472
+ if not self.enabled:
473
+ return {}
474
+
475
+ try:
476
+ # Simplified query to count unique request IDs by group
477
+ query = f'''
478
+ from(bucket: "{self.bucket}")
479
+ |> range(start: -{hours}h)
480
+ |> filter(fn: (r) => r._measurement == "inference_requests")
481
+ |> filter(fn: (r) => r._field == "request_id")
482
+ |> group(columns: ["{group_by}"])
483
+ |> count()
484
+ |> yield(name: "request_counts")
485
+ '''
486
+
487
+ result = self.query_api.query(org=self.org, query=query)
488
+
489
+ # Process results into statistics
490
+ stats = {}
491
+ for table in result:
492
+ for record in table.records:
493
+ key = record.values.get(group_by, 'unknown')
494
+ stats[key] = {
495
+ 'total_requests': record.get_value() or 0,
496
+ 'group_by': group_by,
497
+ 'time_range_hours': hours
498
+ }
499
+
500
+ return stats
501
+
502
+ except Exception as e:
503
+ logger.error(f"Failed to get usage statistics: {e}")
504
+ return {}
505
+
506
+ def close(self):
507
+ """Close InfluxDB connection"""
508
+ if self.enabled and hasattr(self, 'client'):
509
+ self.client.close()
510
+
511
+ # Global logger instance
512
+ _inference_logger: Optional[InfluxInferenceLogger] = None
513
+
514
+ def get_inference_logger() -> InfluxInferenceLogger:
515
+ """Get or create global inference logger instance"""
516
+ global _inference_logger
517
+ if _inference_logger is None:
518
+ _inference_logger = InfluxInferenceLogger()
519
+ return _inference_logger
520
+
521
+ def generate_request_id() -> str:
522
+ """Generate unique request ID"""
523
+ return f"req_{uuid.uuid4().hex[:12]}"
@@ -0,0 +1,160 @@
1
+ """
2
+ Centralized Logging Configuration with Loki Integration for ISA Model
3
+
4
+ This module provides centralized application logging with Loki support,
5
+ complementing the existing InfluxDB inference logging system.
6
+
7
+ Architecture:
8
+ - Loki: General application logs (INFO, WARNING, ERROR, DEBUG)
9
+ - InfluxDB: Inference metrics and performance data (tokens, costs, timing)
10
+
11
+ Usage:
12
+ from isa_model.core.logging import app_logger, api_logger
13
+
14
+ app_logger.info("Service starting...")
15
+ api_logger.error(f"Request failed: {error}", exc_info=True)
16
+ """
17
+
18
+ import logging
19
+ import sys
20
+ import os
21
+ from typing import Optional
22
+
23
+
24
+ def setup_logger(
25
+ name: str,
26
+ level: Optional[str] = None,
27
+ format_str: Optional[str] = None
28
+ ) -> logging.Logger:
29
+ """
30
+ Setup logger with centralized Loki integration
31
+
32
+ Args:
33
+ name: Logger name (e.g., "ISAModel.API")
34
+ level: Log level (DEBUG, INFO, WARNING, ERROR, CRITICAL)
35
+ format_str: Log format string (optional)
36
+
37
+ Returns:
38
+ Configured logger instance
39
+
40
+ Example:
41
+ >>> from isa_model.core.logging import setup_logger
42
+ >>> my_logger = setup_logger("ISAModel.MyModule")
43
+ >>> my_logger.info("Processing started")
44
+ """
45
+ logger = logging.getLogger(name)
46
+
47
+ # Avoid duplicate handlers
48
+ if logger.handlers:
49
+ return logger
50
+
51
+ # Get configuration from environment
52
+ log_level_env = os.getenv("LOG_LEVEL", "INFO").upper()
53
+ log_format_env = os.getenv(
54
+ "LOG_FORMAT",
55
+ "%(asctime)s - %(name)s - %(levelname)s - %(message)s"
56
+ )
57
+ from ..config.config_manager import ConfigManager
58
+ config_manager = ConfigManager()
59
+ # Use Consul discovery for Loki URL with fallback
60
+ loki_url = os.getenv("LOKI_URL", config_manager.get_loki_url())
61
+ loki_enabled = os.getenv("LOKI_ENABLED", "true").lower() == "true"
62
+
63
+ # Set log level
64
+ final_level = (level or log_level_env).upper()
65
+ logger.setLevel(getattr(logging, final_level, logging.INFO))
66
+
67
+ # Disable propagation to prevent duplicate logs
68
+ logger.propagate = False
69
+
70
+ # Log format
71
+ formatter = logging.Formatter(format_str or log_format_env)
72
+
73
+ # 1. Console Handler (for local development and debugging)
74
+ console_handler = logging.StreamHandler(sys.stdout)
75
+ console_handler.setFormatter(formatter)
76
+ logger.addHandler(console_handler)
77
+
78
+ # 2. Loki Handler (for centralized logging)
79
+ if loki_enabled:
80
+ try:
81
+ from logging_loki import LokiHandler
82
+
83
+ # Extract service name and logger component
84
+ # e.g., "ISAModel.API" -> service="isa_model", logger="API"
85
+ service_name = "isa_model"
86
+ logger_component = name.replace("ISAModel.", "").replace("ISAModel", "main")
87
+
88
+ # Labels for Loki (used for filtering and searching)
89
+ # Use service_name to match other services (mcp, agent, etc.)
90
+ loki_labels = {
91
+ "service_name": "model", # Use "model" to match service naming convention
92
+ "logger": logger_component,
93
+ "environment": os.getenv("ENVIRONMENT", "development"),
94
+ "job": "isa_model_service"
95
+ }
96
+
97
+ # Create Loki handler
98
+ loki_handler = LokiHandler(
99
+ url=f"{loki_url}/loki/api/v1/push",
100
+ tags=loki_labels,
101
+ version="1",
102
+ )
103
+
104
+ # Only send INFO and above to Loki (reduce network traffic)
105
+ loki_handler.setLevel(logging.INFO)
106
+
107
+ logger.addHandler(loki_handler)
108
+
109
+ except ImportError:
110
+ # Silently fall back to console-only logging during initialization
111
+ pass
112
+ except Exception as e:
113
+ # Loki unavailable - silently fall back to console
114
+ pass
115
+
116
+ return logger
117
+
118
+
119
+ # Create application loggers
120
+ # Main application logger
121
+ app_logger = setup_logger("ISAModel")
122
+
123
+ # API/Server logger
124
+ api_logger = setup_logger("ISAModel.API")
125
+
126
+ # Client logger
127
+ client_logger = setup_logger("ISAModel.Client")
128
+
129
+ # Inference logger (application-level, not metrics)
130
+ inference_logger = setup_logger("ISAModel.Inference")
131
+
132
+ # Training logger
133
+ training_logger = setup_logger("ISAModel.Training")
134
+
135
+ # Evaluation logger
136
+ eval_logger = setup_logger("ISAModel.Evaluation")
137
+
138
+ # Database logger
139
+ db_logger = setup_logger("ISAModel.Database")
140
+
141
+ # Deployment logger
142
+ deployment_logger = setup_logger("ISAModel.Deployment")
143
+
144
+ # Model manager logger
145
+ model_logger = setup_logger("ISAModel.Models")
146
+
147
+
148
+ # Export all loggers
149
+ __all__ = [
150
+ 'setup_logger',
151
+ 'app_logger',
152
+ 'api_logger',
153
+ 'client_logger',
154
+ 'inference_logger',
155
+ 'training_logger',
156
+ 'eval_logger',
157
+ 'db_logger',
158
+ 'deployment_logger',
159
+ 'model_logger',
160
+ ]