isa-model 0.4.0__py3-none-any.whl → 0.4.3__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- isa_model/client.py +466 -43
- isa_model/core/cache/redis_cache.py +12 -3
- isa_model/core/config/config_manager.py +230 -3
- isa_model/core/config.py +90 -0
- isa_model/core/database/direct_db_client.py +114 -0
- isa_model/core/database/migration_manager.py +563 -0
- isa_model/core/database/migrations.py +21 -1
- isa_model/core/database/supabase_client.py +154 -19
- isa_model/core/dependencies.py +316 -0
- isa_model/core/discovery/__init__.py +19 -0
- isa_model/core/discovery/consul_discovery.py +190 -0
- isa_model/core/logging/__init__.py +54 -0
- isa_model/core/logging/influx_logger.py +523 -0
- isa_model/core/logging/loki_logger.py +160 -0
- isa_model/core/models/__init__.py +27 -18
- isa_model/core/models/config_models.py +625 -0
- isa_model/core/models/deployment_billing_tracker.py +430 -0
- isa_model/core/models/model_manager.py +40 -17
- isa_model/core/models/model_metadata.py +690 -0
- isa_model/core/models/model_repo.py +174 -18
- isa_model/core/models/system_models.py +857 -0
- isa_model/core/repositories/__init__.py +9 -0
- isa_model/core/repositories/config_repository.py +912 -0
- isa_model/core/services/intelligent_model_selector.py +399 -21
- isa_model/core/storage/hf_storage.py +1 -1
- isa_model/core/types.py +1 -0
- isa_model/deployment/__init__.py +5 -48
- isa_model/deployment/core/__init__.py +2 -31
- isa_model/deployment/core/deployment_manager.py +1278 -370
- isa_model/deployment/local/__init__.py +31 -0
- isa_model/deployment/local/config.py +248 -0
- isa_model/deployment/local/gpu_gateway.py +607 -0
- isa_model/deployment/local/health_checker.py +428 -0
- isa_model/deployment/local/provider.py +586 -0
- isa_model/deployment/local/tensorrt_service.py +621 -0
- isa_model/deployment/local/transformers_service.py +644 -0
- isa_model/deployment/local/vllm_service.py +527 -0
- isa_model/deployment/modal/__init__.py +8 -0
- isa_model/deployment/modal/config.py +136 -0
- isa_model/deployment/{services/auto_hf_modal_deployer.py → modal/deployer.py} +1 -1
- isa_model/deployment/modal/services/__init__.py +3 -0
- isa_model/deployment/modal/services/audio/__init__.py +1 -0
- isa_model/deployment/modal/services/embedding/__init__.py +1 -0
- isa_model/deployment/modal/services/llm/__init__.py +1 -0
- isa_model/deployment/modal/services/llm/isa_llm_service.py +424 -0
- isa_model/deployment/modal/services/video/__init__.py +1 -0
- isa_model/deployment/modal/services/vision/__init__.py +1 -0
- isa_model/deployment/models/org-org-acme-corp-tenant-a-service-llm-20250825-225822/tenant-a-service_modal_service.py +48 -0
- isa_model/deployment/models/org-test-org-123-prefix-test-service-llm-20250825-225822/prefix-test-service_modal_service.py +48 -0
- isa_model/deployment/models/test-llm-service-llm-20250825-204442/test-llm-service_modal_service.py +48 -0
- isa_model/deployment/models/test-monitoring-gpt2-llm-20250825-212906/test-monitoring-gpt2_modal_service.py +48 -0
- isa_model/deployment/models/test-monitoring-gpt2-llm-20250825-213009/test-monitoring-gpt2_modal_service.py +48 -0
- isa_model/deployment/storage/__init__.py +5 -0
- isa_model/deployment/storage/deployment_repository.py +824 -0
- isa_model/deployment/triton/__init__.py +10 -0
- isa_model/deployment/triton/config.py +196 -0
- isa_model/deployment/triton/configs/__init__.py +1 -0
- isa_model/deployment/triton/provider.py +512 -0
- isa_model/deployment/triton/scripts/__init__.py +1 -0
- isa_model/deployment/triton/templates/__init__.py +1 -0
- isa_model/inference/__init__.py +47 -1
- isa_model/inference/ai_factory.py +137 -10
- isa_model/inference/legacy_services/__init__.py +21 -0
- isa_model/inference/legacy_services/model_evaluation.py +637 -0
- isa_model/inference/legacy_services/model_service.py +573 -0
- isa_model/inference/legacy_services/model_serving.py +717 -0
- isa_model/inference/legacy_services/model_training.py +561 -0
- isa_model/inference/models/__init__.py +21 -0
- isa_model/inference/models/inference_config.py +551 -0
- isa_model/inference/models/inference_record.py +675 -0
- isa_model/inference/models/performance_models.py +714 -0
- isa_model/inference/repositories/__init__.py +9 -0
- isa_model/inference/repositories/inference_repository.py +828 -0
- isa_model/inference/services/audio/base_stt_service.py +184 -11
- isa_model/inference/services/audio/openai_stt_service.py +22 -6
- isa_model/inference/services/custom_model_manager.py +277 -0
- isa_model/inference/services/embedding/ollama_embed_service.py +15 -3
- isa_model/inference/services/embedding/resilient_embed_service.py +285 -0
- isa_model/inference/services/llm/__init__.py +10 -2
- isa_model/inference/services/llm/base_llm_service.py +335 -24
- isa_model/inference/services/llm/cerebras_llm_service.py +628 -0
- isa_model/inference/services/llm/helpers/llm_adapter.py +9 -4
- isa_model/inference/services/llm/helpers/llm_prompts.py +342 -0
- isa_model/inference/services/llm/helpers/llm_utils.py +321 -23
- isa_model/inference/services/llm/huggingface_llm_service.py +581 -0
- isa_model/inference/services/llm/local_llm_service.py +747 -0
- isa_model/inference/services/llm/ollama_llm_service.py +9 -2
- isa_model/inference/services/llm/openai_llm_service.py +33 -16
- isa_model/inference/services/llm/yyds_llm_service.py +8 -2
- isa_model/inference/services/vision/__init__.py +22 -1
- isa_model/inference/services/vision/blip_vision_service.py +359 -0
- isa_model/inference/services/vision/helpers/image_utils.py +8 -5
- isa_model/inference/services/vision/isa_vision_service.py +65 -4
- isa_model/inference/services/vision/openai_vision_service.py +19 -10
- isa_model/inference/services/vision/vgg16_vision_service.py +257 -0
- isa_model/serving/api/cache_manager.py +245 -0
- isa_model/serving/api/dependencies/__init__.py +1 -0
- isa_model/serving/api/dependencies/auth.py +194 -0
- isa_model/serving/api/dependencies/database.py +139 -0
- isa_model/serving/api/error_handlers.py +284 -0
- isa_model/serving/api/fastapi_server.py +172 -22
- isa_model/serving/api/middleware/auth.py +8 -2
- isa_model/serving/api/middleware/security.py +23 -33
- isa_model/serving/api/middleware/tenant_context.py +414 -0
- isa_model/serving/api/routes/analytics.py +4 -1
- isa_model/serving/api/routes/config.py +645 -0
- isa_model/serving/api/routes/deployment_billing.py +315 -0
- isa_model/serving/api/routes/deployments.py +138 -2
- isa_model/serving/api/routes/gpu_gateway.py +440 -0
- isa_model/serving/api/routes/health.py +32 -12
- isa_model/serving/api/routes/inference_monitoring.py +486 -0
- isa_model/serving/api/routes/local_deployments.py +448 -0
- isa_model/serving/api/routes/tenants.py +575 -0
- isa_model/serving/api/routes/unified.py +680 -18
- isa_model/serving/api/routes/webhooks.py +479 -0
- isa_model/serving/api/startup.py +68 -54
- isa_model/utils/gpu_utils.py +311 -0
- {isa_model-0.4.0.dist-info → isa_model-0.4.3.dist-info}/METADATA +66 -24
- isa_model-0.4.3.dist-info/RECORD +193 -0
- isa_model/core/storage/minio_storage.py +0 -0
- isa_model/deployment/cloud/__init__.py +0 -9
- isa_model/deployment/cloud/modal/__init__.py +0 -10
- isa_model/deployment/core/deployment_config.py +0 -356
- isa_model/deployment/core/isa_deployment_service.py +0 -401
- isa_model/deployment/gpu_int8_ds8/app/server.py +0 -66
- isa_model/deployment/gpu_int8_ds8/scripts/test_client.py +0 -43
- isa_model/deployment/gpu_int8_ds8/scripts/test_client_os.py +0 -35
- isa_model/deployment/runtime/deployed_service.py +0 -338
- isa_model/deployment/services/__init__.py +0 -9
- isa_model/deployment/services/auto_deploy_vision_service.py +0 -538
- isa_model/deployment/services/model_service.py +0 -332
- isa_model/deployment/services/service_monitor.py +0 -356
- isa_model/deployment/services/service_registry.py +0 -527
- isa_model/eval/__init__.py +0 -92
- isa_model/eval/benchmarks/__init__.py +0 -27
- isa_model/eval/benchmarks/multimodal_datasets.py +0 -460
- isa_model/eval/benchmarks.py +0 -701
- isa_model/eval/config/__init__.py +0 -10
- isa_model/eval/config/evaluation_config.py +0 -108
- isa_model/eval/evaluators/__init__.py +0 -24
- isa_model/eval/evaluators/audio_evaluator.py +0 -727
- isa_model/eval/evaluators/base_evaluator.py +0 -503
- isa_model/eval/evaluators/embedding_evaluator.py +0 -742
- isa_model/eval/evaluators/llm_evaluator.py +0 -472
- isa_model/eval/evaluators/vision_evaluator.py +0 -564
- isa_model/eval/example_evaluation.py +0 -395
- isa_model/eval/factory.py +0 -798
- isa_model/eval/infrastructure/__init__.py +0 -24
- isa_model/eval/infrastructure/experiment_tracker.py +0 -466
- isa_model/eval/isa_benchmarks.py +0 -700
- isa_model/eval/isa_integration.py +0 -582
- isa_model/eval/metrics.py +0 -951
- isa_model/eval/tests/unit/test_basic.py +0 -396
- isa_model/serving/api/routes/evaluations.py +0 -579
- isa_model/training/__init__.py +0 -168
- isa_model/training/annotation/annotation_schema.py +0 -47
- isa_model/training/annotation/processors/annotation_processor.py +0 -126
- isa_model/training/annotation/storage/dataset_manager.py +0 -131
- isa_model/training/annotation/storage/dataset_schema.py +0 -44
- isa_model/training/annotation/tests/test_annotation_flow.py +0 -109
- isa_model/training/annotation/tests/test_minio copy.py +0 -113
- isa_model/training/annotation/tests/test_minio_upload.py +0 -43
- isa_model/training/annotation/views/annotation_controller.py +0 -158
- isa_model/training/cloud/__init__.py +0 -22
- isa_model/training/cloud/job_orchestrator.py +0 -402
- isa_model/training/cloud/runpod_trainer.py +0 -454
- isa_model/training/cloud/storage_manager.py +0 -482
- isa_model/training/core/__init__.py +0 -26
- isa_model/training/core/config.py +0 -181
- isa_model/training/core/dataset.py +0 -222
- isa_model/training/core/trainer.py +0 -720
- isa_model/training/core/utils.py +0 -213
- isa_model/training/examples/intelligent_training_example.py +0 -281
- isa_model/training/factory.py +0 -424
- isa_model/training/intelligent/__init__.py +0 -25
- isa_model/training/intelligent/decision_engine.py +0 -643
- isa_model/training/intelligent/intelligent_factory.py +0 -888
- isa_model/training/intelligent/knowledge_base.py +0 -751
- isa_model/training/intelligent/resource_optimizer.py +0 -839
- isa_model/training/intelligent/task_classifier.py +0 -576
- isa_model/training/storage/__init__.py +0 -24
- isa_model/training/storage/core_integration.py +0 -439
- isa_model/training/storage/training_repository.py +0 -552
- isa_model/training/storage/training_storage.py +0 -628
- isa_model-0.4.0.dist-info/RECORD +0 -182
- /isa_model/deployment/{cloud/modal → modal/services/audio}/isa_audio_chatTTS_service.py +0 -0
- /isa_model/deployment/{cloud/modal → modal/services/audio}/isa_audio_fish_service.py +0 -0
- /isa_model/deployment/{cloud/modal → modal/services/audio}/isa_audio_openvoice_service.py +0 -0
- /isa_model/deployment/{cloud/modal → modal/services/audio}/isa_audio_service_v2.py +0 -0
- /isa_model/deployment/{cloud/modal → modal/services/embedding}/isa_embed_rerank_service.py +0 -0
- /isa_model/deployment/{cloud/modal → modal/services/video}/isa_video_hunyuan_service.py +0 -0
- /isa_model/deployment/{cloud/modal → modal/services/vision}/isa_vision_ocr_service.py +0 -0
- /isa_model/deployment/{cloud/modal → modal/services/vision}/isa_vision_qwen25_service.py +0 -0
- /isa_model/deployment/{cloud/modal → modal/services/vision}/isa_vision_table_service.py +0 -0
- /isa_model/deployment/{cloud/modal → modal/services/vision}/isa_vision_ui_service.py +0 -0
- /isa_model/deployment/{cloud/modal → modal/services/vision}/isa_vision_ui_service_optimized.py +0 -0
- /isa_model/deployment/{services → modal/services/vision}/simple_auto_deploy_vision_service.py +0 -0
- {isa_model-0.4.0.dist-info → isa_model-0.4.3.dist-info}/WHEEL +0 -0
- {isa_model-0.4.0.dist-info → isa_model-0.4.3.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,523 @@
|
|
1
|
+
"""
|
2
|
+
InfluxDB-based Inference Logging System for ISA Model
|
3
|
+
|
4
|
+
This module provides comprehensive logging for model inference requests,
|
5
|
+
optimized for time-series analysis and monitoring.
|
6
|
+
|
7
|
+
Data Model:
|
8
|
+
- measurement: 'inference_requests' (main table)
|
9
|
+
- tags: indexed fields for fast queries (provider, model, service_type, etc.)
|
10
|
+
- fields: numerical and text data (tokens, costs, response_time, etc.)
|
11
|
+
- timestamp: automatic time-based partitioning
|
12
|
+
|
13
|
+
Key Features:
|
14
|
+
- Automatic data retention (30 days default)
|
15
|
+
- Cost-effective storage with compression
|
16
|
+
- Real-time monitoring capabilities
|
17
|
+
- Aggregated metrics generation
|
18
|
+
"""
|
19
|
+
|
20
|
+
import os
|
21
|
+
import json
|
22
|
+
import hashlib
|
23
|
+
import logging
|
24
|
+
from datetime import datetime, timezone
|
25
|
+
from typing import Dict, Any, Optional, List, Union
|
26
|
+
from dataclasses import dataclass, field
|
27
|
+
from influxdb_client import InfluxDBClient, Point, WritePrecision
|
28
|
+
from influxdb_client.client.write_api import SYNCHRONOUS
|
29
|
+
import uuid
|
30
|
+
|
31
|
+
logger = logging.getLogger(__name__)
|
32
|
+
|
33
|
+
@dataclass
|
34
|
+
class InferenceLogEntry:
|
35
|
+
"""
|
36
|
+
Structure for inference log data
|
37
|
+
"""
|
38
|
+
# Required fields
|
39
|
+
request_id: str
|
40
|
+
service_type: str # 'text', 'vision', 'audio', 'image', 'embedding'
|
41
|
+
task: str # 'chat', 'analyze_image', 'generate_speech', etc.
|
42
|
+
provider: str # 'openai', 'replicate', 'anthropic', etc.
|
43
|
+
model_name: str # Actual model used
|
44
|
+
status: str # 'completed', 'failed', 'timeout'
|
45
|
+
|
46
|
+
# Timing data
|
47
|
+
start_time: datetime
|
48
|
+
end_time: Optional[datetime] = None
|
49
|
+
execution_time_ms: Optional[int] = None
|
50
|
+
queue_time_ms: Optional[int] = None
|
51
|
+
|
52
|
+
# Token and usage data
|
53
|
+
input_tokens: Optional[int] = None
|
54
|
+
output_tokens: Optional[int] = None
|
55
|
+
total_tokens: Optional[int] = None
|
56
|
+
|
57
|
+
# Cost data
|
58
|
+
estimated_cost_usd: Optional[float] = None
|
59
|
+
actual_cost_usd: Optional[float] = None
|
60
|
+
cost_breakdown: Optional[Dict[str, Any]] = None
|
61
|
+
|
62
|
+
# Request/response data (optional, for debugging)
|
63
|
+
input_data_hash: Optional[str] = None
|
64
|
+
input_size_bytes: Optional[int] = None
|
65
|
+
output_size_bytes: Optional[int] = None
|
66
|
+
|
67
|
+
# Streaming data
|
68
|
+
is_streaming: bool = False
|
69
|
+
stream_start_time: Optional[datetime] = None
|
70
|
+
stream_chunks_count: Optional[int] = None
|
71
|
+
time_to_first_token_ms: Optional[int] = None
|
72
|
+
|
73
|
+
# Error information
|
74
|
+
error_message: Optional[str] = None
|
75
|
+
error_code: Optional[str] = None
|
76
|
+
|
77
|
+
# Context and metadata
|
78
|
+
session_id: Optional[str] = None
|
79
|
+
user_id: Optional[str] = None
|
80
|
+
client_ip: Optional[str] = None
|
81
|
+
model_version: Optional[str] = None
|
82
|
+
cache_hit: bool = False
|
83
|
+
|
84
|
+
# Quality metrics
|
85
|
+
quality_score: Optional[float] = None
|
86
|
+
user_feedback: Optional[int] = None # 1-5 rating
|
87
|
+
|
88
|
+
# Additional metadata
|
89
|
+
custom_metadata: Dict[str, Any] = field(default_factory=dict)
|
90
|
+
|
91
|
+
class InfluxInferenceLogger:
|
92
|
+
"""
|
93
|
+
InfluxDB-based logger for model inference activities
|
94
|
+
|
95
|
+
Features:
|
96
|
+
- Time-series storage optimized for metrics
|
97
|
+
- Automatic data retention and compression
|
98
|
+
- Real-time query capabilities
|
99
|
+
- Cost tracking and analysis
|
100
|
+
- Performance monitoring
|
101
|
+
"""
|
102
|
+
|
103
|
+
def __init__(self):
|
104
|
+
"""Initialize InfluxDB connection"""
|
105
|
+
self.enabled = os.getenv('ENABLE_INFERENCE_LOGGING', 'false').lower() == 'true'
|
106
|
+
|
107
|
+
# Logging configuration - always set these regardless of enabled status
|
108
|
+
self.retention_days = int(os.getenv('LOG_RETENTION_DAYS', '30'))
|
109
|
+
self.log_detailed_requests = os.getenv('LOG_DETAILED_REQUESTS', 'true').lower() == 'true'
|
110
|
+
self.log_sensitive_data = os.getenv('LOG_SENSITIVE_DATA', 'false').lower() == 'true'
|
111
|
+
|
112
|
+
if not self.enabled:
|
113
|
+
logger.info("Inference logging disabled via ENABLE_INFERENCE_LOGGING")
|
114
|
+
return
|
115
|
+
|
116
|
+
# InfluxDB configuration
|
117
|
+
from ..config.config_manager import ConfigManager
|
118
|
+
config_manager = ConfigManager()
|
119
|
+
# Use Consul discovery for InfluxDB URL with fallback
|
120
|
+
self.url = os.getenv('INFLUXDB_URL', config_manager.get_influxdb_url())
|
121
|
+
self.token = os.getenv('INFLUXDB_TOKEN', 'dev-token-isa-model-12345')
|
122
|
+
self.org = os.getenv('INFLUXDB_ORG', 'isa-model')
|
123
|
+
self.bucket = os.getenv('INFLUXDB_BUCKET', 'isa-model-logs')
|
124
|
+
|
125
|
+
try:
|
126
|
+
# Initialize InfluxDB client
|
127
|
+
self.client = InfluxDBClient(url=self.url, token=self.token, org=self.org)
|
128
|
+
self.write_api = self.client.write_api(write_options=SYNCHRONOUS)
|
129
|
+
self.query_api = self.client.query_api()
|
130
|
+
|
131
|
+
# Test connection
|
132
|
+
self._test_connection()
|
133
|
+
logger.info(f"InfluxDB inference logger initialized: {self.url}")
|
134
|
+
|
135
|
+
except Exception as e:
|
136
|
+
logger.error(f"Failed to initialize InfluxDB logger: {e}")
|
137
|
+
self.enabled = False
|
138
|
+
|
139
|
+
def _test_connection(self):
|
140
|
+
"""Test InfluxDB connection"""
|
141
|
+
try:
|
142
|
+
health = self.client.health()
|
143
|
+
if health.status == "pass":
|
144
|
+
logger.debug("InfluxDB connection healthy")
|
145
|
+
else:
|
146
|
+
raise Exception(f"InfluxDB health check failed: {health.message}")
|
147
|
+
except Exception as e:
|
148
|
+
raise Exception(f"InfluxDB connection test failed: {e}")
|
149
|
+
|
150
|
+
def _create_data_hash(self, data: Any) -> str:
|
151
|
+
"""Create SHA-256 hash of input data for deduplication"""
|
152
|
+
try:
|
153
|
+
if isinstance(data, (dict, list)):
|
154
|
+
data_str = json.dumps(data, sort_keys=True)
|
155
|
+
else:
|
156
|
+
data_str = str(data)
|
157
|
+
return hashlib.sha256(data_str.encode()).hexdigest()
|
158
|
+
except Exception:
|
159
|
+
return None
|
160
|
+
|
161
|
+
def log_inference_start(
|
162
|
+
self,
|
163
|
+
request_id: str,
|
164
|
+
service_type: str,
|
165
|
+
task: str,
|
166
|
+
provider: str,
|
167
|
+
model_name: str,
|
168
|
+
input_data: Any = None,
|
169
|
+
session_id: Optional[str] = None,
|
170
|
+
user_id: Optional[str] = None,
|
171
|
+
client_ip: Optional[str] = None,
|
172
|
+
is_streaming: bool = False,
|
173
|
+
custom_metadata: Optional[Dict[str, Any]] = None
|
174
|
+
) -> None:
|
175
|
+
"""
|
176
|
+
Log the start of an inference request
|
177
|
+
"""
|
178
|
+
if not self.enabled:
|
179
|
+
return
|
180
|
+
|
181
|
+
try:
|
182
|
+
start_time = datetime.now(timezone.utc)
|
183
|
+
|
184
|
+
# Create data hash for input
|
185
|
+
input_hash = None
|
186
|
+
input_size = None
|
187
|
+
if input_data and self.log_detailed_requests:
|
188
|
+
input_hash = self._create_data_hash(input_data)
|
189
|
+
try:
|
190
|
+
input_size = len(str(input_data).encode('utf-8'))
|
191
|
+
except:
|
192
|
+
input_size = None
|
193
|
+
|
194
|
+
# Create InfluxDB point
|
195
|
+
point = Point("inference_requests") \
|
196
|
+
.tag("service_type", service_type) \
|
197
|
+
.tag("task", task) \
|
198
|
+
.tag("provider", provider) \
|
199
|
+
.tag("model_name", model_name) \
|
200
|
+
.tag("status", "started") \
|
201
|
+
.field("request_id", request_id) \
|
202
|
+
.field("is_streaming", is_streaming) \
|
203
|
+
.time(start_time, WritePrecision.MS)
|
204
|
+
|
205
|
+
# Add optional tags and fields
|
206
|
+
if session_id:
|
207
|
+
point = point.tag("session_id", session_id)
|
208
|
+
if user_id:
|
209
|
+
point = point.tag("user_id", user_id)
|
210
|
+
if client_ip and not self.log_sensitive_data:
|
211
|
+
# Hash IP for privacy
|
212
|
+
ip_hash = hashlib.md5(client_ip.encode()).hexdigest()[:8]
|
213
|
+
point = point.field("client_ip_hash", ip_hash)
|
214
|
+
if input_hash:
|
215
|
+
point = point.field("input_data_hash", input_hash)
|
216
|
+
if input_size:
|
217
|
+
point = point.field("input_size_bytes", input_size)
|
218
|
+
if custom_metadata:
|
219
|
+
for key, value in custom_metadata.items():
|
220
|
+
point = point.field(f"meta_{key}", str(value))
|
221
|
+
|
222
|
+
# Write to InfluxDB
|
223
|
+
self.write_api.write(bucket=self.bucket, org=self.org, record=point)
|
224
|
+
logger.debug(f"Logged inference start: {request_id}")
|
225
|
+
|
226
|
+
except Exception as e:
|
227
|
+
logger.error(f"Failed to log inference start: {e}")
|
228
|
+
|
229
|
+
def log_inference_complete(
|
230
|
+
self,
|
231
|
+
request_id: str,
|
232
|
+
status: str = "completed",
|
233
|
+
execution_time_ms: Optional[int] = None,
|
234
|
+
input_tokens: Optional[int] = None,
|
235
|
+
output_tokens: Optional[int] = None,
|
236
|
+
estimated_cost_usd: Optional[float] = None,
|
237
|
+
output_data: Any = None,
|
238
|
+
stream_chunks_count: Optional[int] = None,
|
239
|
+
time_to_first_token_ms: Optional[int] = None,
|
240
|
+
error_message: Optional[str] = None,
|
241
|
+
error_code: Optional[str] = None,
|
242
|
+
cache_hit: bool = False,
|
243
|
+
quality_score: Optional[float] = None,
|
244
|
+
custom_metadata: Optional[Dict[str, Any]] = None
|
245
|
+
) -> None:
|
246
|
+
"""
|
247
|
+
Log the completion of an inference request
|
248
|
+
"""
|
249
|
+
if not self.enabled:
|
250
|
+
return
|
251
|
+
|
252
|
+
try:
|
253
|
+
end_time = datetime.now(timezone.utc)
|
254
|
+
|
255
|
+
# Calculate output data size
|
256
|
+
output_size = None
|
257
|
+
if output_data and self.log_detailed_requests:
|
258
|
+
try:
|
259
|
+
output_size = len(str(output_data).encode('utf-8'))
|
260
|
+
except:
|
261
|
+
output_size = None
|
262
|
+
|
263
|
+
# Create InfluxDB point
|
264
|
+
point = Point("inference_requests") \
|
265
|
+
.tag("status", status) \
|
266
|
+
.field("request_id", request_id) \
|
267
|
+
.field("cache_hit", cache_hit) \
|
268
|
+
.time(end_time, WritePrecision.MS)
|
269
|
+
|
270
|
+
# Add timing data
|
271
|
+
if execution_time_ms is not None:
|
272
|
+
point = point.field("execution_time_ms", execution_time_ms)
|
273
|
+
|
274
|
+
# Add token data
|
275
|
+
if input_tokens is not None:
|
276
|
+
point = point.field("input_tokens", input_tokens)
|
277
|
+
if output_tokens is not None:
|
278
|
+
point = point.field("output_tokens", output_tokens)
|
279
|
+
if input_tokens and output_tokens:
|
280
|
+
point = point.field("total_tokens", input_tokens + output_tokens)
|
281
|
+
|
282
|
+
# Add cost data
|
283
|
+
if estimated_cost_usd is not None:
|
284
|
+
point = point.field("estimated_cost_usd", float(estimated_cost_usd))
|
285
|
+
|
286
|
+
# Add output data size
|
287
|
+
if output_size:
|
288
|
+
point = point.field("output_size_bytes", output_size)
|
289
|
+
|
290
|
+
# Add streaming metrics
|
291
|
+
if stream_chunks_count is not None:
|
292
|
+
point = point.field("stream_chunks_count", stream_chunks_count)
|
293
|
+
if time_to_first_token_ms is not None:
|
294
|
+
point = point.field("time_to_first_token_ms", time_to_first_token_ms)
|
295
|
+
|
296
|
+
# Add error information
|
297
|
+
if error_message:
|
298
|
+
point = point.field("error_message", error_message[:500]) # Limit length
|
299
|
+
if error_code:
|
300
|
+
point = point.field("error_code", error_code)
|
301
|
+
|
302
|
+
# Add quality metrics
|
303
|
+
if quality_score is not None:
|
304
|
+
point = point.field("quality_score", float(quality_score))
|
305
|
+
|
306
|
+
# Add custom metadata
|
307
|
+
if custom_metadata:
|
308
|
+
for key, value in custom_metadata.items():
|
309
|
+
point = point.field(f"meta_{key}", str(value))
|
310
|
+
|
311
|
+
# Write to InfluxDB
|
312
|
+
self.write_api.write(bucket=self.bucket, org=self.org, record=point)
|
313
|
+
logger.debug(f"Logged inference completion: {request_id} ({status})")
|
314
|
+
|
315
|
+
except Exception as e:
|
316
|
+
logger.error(f"Failed to log inference completion: {e}")
|
317
|
+
|
318
|
+
def log_token_usage(
|
319
|
+
self,
|
320
|
+
request_id: str,
|
321
|
+
provider: str,
|
322
|
+
model_name: str,
|
323
|
+
prompt_tokens: int,
|
324
|
+
completion_tokens: int,
|
325
|
+
prompt_cost_usd: Optional[float] = None,
|
326
|
+
completion_cost_usd: Optional[float] = None
|
327
|
+
) -> None:
|
328
|
+
"""
|
329
|
+
Log detailed token usage data
|
330
|
+
"""
|
331
|
+
if not self.enabled:
|
332
|
+
return
|
333
|
+
|
334
|
+
try:
|
335
|
+
timestamp = datetime.now(timezone.utc)
|
336
|
+
total_tokens = prompt_tokens + completion_tokens
|
337
|
+
total_cost = (prompt_cost_usd or 0) + (completion_cost_usd or 0)
|
338
|
+
|
339
|
+
point = Point("token_usage") \
|
340
|
+
.tag("provider", provider) \
|
341
|
+
.tag("model_name", model_name) \
|
342
|
+
.field("request_id", request_id) \
|
343
|
+
.field("prompt_tokens", prompt_tokens) \
|
344
|
+
.field("completion_tokens", completion_tokens) \
|
345
|
+
.field("total_tokens", total_tokens) \
|
346
|
+
.time(timestamp, WritePrecision.MS)
|
347
|
+
|
348
|
+
if prompt_cost_usd is not None:
|
349
|
+
point = point.field("prompt_cost_usd", float(prompt_cost_usd))
|
350
|
+
if completion_cost_usd is not None:
|
351
|
+
point = point.field("completion_cost_usd", float(completion_cost_usd))
|
352
|
+
if total_cost > 0:
|
353
|
+
point = point.field("total_cost_usd", float(total_cost))
|
354
|
+
point = point.field("cost_per_token_usd", float(total_cost / total_tokens))
|
355
|
+
|
356
|
+
self.write_api.write(bucket=self.bucket, org=self.org, record=point)
|
357
|
+
logger.debug(f"Logged token usage: {request_id}")
|
358
|
+
|
359
|
+
except Exception as e:
|
360
|
+
logger.error(f"Failed to log token usage: {e}")
|
361
|
+
|
362
|
+
def log_error(
|
363
|
+
self,
|
364
|
+
request_id: str,
|
365
|
+
error_type: str,
|
366
|
+
error_message: str,
|
367
|
+
error_code: Optional[str] = None,
|
368
|
+
provider: Optional[str] = None,
|
369
|
+
model_name: Optional[str] = None,
|
370
|
+
retry_count: int = 0
|
371
|
+
) -> None:
|
372
|
+
"""
|
373
|
+
Log error events
|
374
|
+
"""
|
375
|
+
if not self.enabled:
|
376
|
+
return
|
377
|
+
|
378
|
+
try:
|
379
|
+
timestamp = datetime.now(timezone.utc)
|
380
|
+
|
381
|
+
point = Point("inference_errors") \
|
382
|
+
.tag("error_type", error_type) \
|
383
|
+
.field("request_id", request_id) \
|
384
|
+
.field("error_message", error_message[:500]) \
|
385
|
+
.field("retry_count", retry_count) \
|
386
|
+
.time(timestamp, WritePrecision.MS)
|
387
|
+
|
388
|
+
if error_code:
|
389
|
+
point = point.field("error_code", error_code)
|
390
|
+
if provider:
|
391
|
+
point = point.tag("provider", provider)
|
392
|
+
if model_name:
|
393
|
+
point = point.tag("model_name", model_name)
|
394
|
+
|
395
|
+
self.write_api.write(bucket=self.bucket, org=self.org, record=point)
|
396
|
+
logger.debug(f"Logged error: {request_id} - {error_type}")
|
397
|
+
|
398
|
+
except Exception as e:
|
399
|
+
logger.error(f"Failed to log error: {e}")
|
400
|
+
|
401
|
+
def get_recent_requests(
|
402
|
+
self,
|
403
|
+
limit: int = 100,
|
404
|
+
hours: int = 24,
|
405
|
+
service_type: Optional[str] = None,
|
406
|
+
provider: Optional[str] = None,
|
407
|
+
status: Optional[str] = None
|
408
|
+
) -> List[Dict[str, Any]]:
|
409
|
+
"""
|
410
|
+
Query recent inference requests
|
411
|
+
"""
|
412
|
+
if not self.enabled:
|
413
|
+
return []
|
414
|
+
|
415
|
+
try:
|
416
|
+
# Build query with simpler filtering
|
417
|
+
filters = []
|
418
|
+
if service_type:
|
419
|
+
filters.append(f'r.service_type == "{service_type}"')
|
420
|
+
if provider:
|
421
|
+
filters.append(f'r.provider == "{provider}"')
|
422
|
+
if status:
|
423
|
+
filters.append(f'r.status == "{status}"')
|
424
|
+
|
425
|
+
# Build filter clause
|
426
|
+
if filters:
|
427
|
+
filter_clause = " and " + " and ".join(filters)
|
428
|
+
else:
|
429
|
+
filter_clause = ""
|
430
|
+
|
431
|
+
query = f'''
|
432
|
+
from(bucket: "{self.bucket}")
|
433
|
+
|> range(start: -{hours}h)
|
434
|
+
|> filter(fn: (r) => r._measurement == "inference_requests"{filter_clause})
|
435
|
+
|> filter(fn: (r) => r._field == "request_id")
|
436
|
+
|> sort(columns: ["_time"], desc: true)
|
437
|
+
|> limit(n: {limit})
|
438
|
+
'''
|
439
|
+
|
440
|
+
result = self.query_api.query(org=self.org, query=query)
|
441
|
+
|
442
|
+
# Process results - get unique request IDs first
|
443
|
+
request_ids = []
|
444
|
+
for table in result:
|
445
|
+
for record in table.records:
|
446
|
+
request_id = record.get_value()
|
447
|
+
if request_id not in [r.get('request_id') for r in request_ids]:
|
448
|
+
request_ids.append({
|
449
|
+
'request_id': request_id,
|
450
|
+
'time': record.get_time(),
|
451
|
+
'service_type': record.values.get('service_type'),
|
452
|
+
'provider': record.values.get('provider'),
|
453
|
+
'model_name': record.values.get('model_name'),
|
454
|
+
'status': record.values.get('status'),
|
455
|
+
'task': record.values.get('task')
|
456
|
+
})
|
457
|
+
|
458
|
+
return request_ids
|
459
|
+
|
460
|
+
except Exception as e:
|
461
|
+
logger.error(f"Failed to query recent requests: {e}")
|
462
|
+
return []
|
463
|
+
|
464
|
+
def get_usage_statistics(
|
465
|
+
self,
|
466
|
+
hours: int = 24,
|
467
|
+
group_by: str = "provider" # "provider", "model_name", "service_type"
|
468
|
+
) -> Dict[str, Any]:
|
469
|
+
"""
|
470
|
+
Get usage statistics and metrics
|
471
|
+
"""
|
472
|
+
if not self.enabled:
|
473
|
+
return {}
|
474
|
+
|
475
|
+
try:
|
476
|
+
# Simplified query to count unique request IDs by group
|
477
|
+
query = f'''
|
478
|
+
from(bucket: "{self.bucket}")
|
479
|
+
|> range(start: -{hours}h)
|
480
|
+
|> filter(fn: (r) => r._measurement == "inference_requests")
|
481
|
+
|> filter(fn: (r) => r._field == "request_id")
|
482
|
+
|> group(columns: ["{group_by}"])
|
483
|
+
|> count()
|
484
|
+
|> yield(name: "request_counts")
|
485
|
+
'''
|
486
|
+
|
487
|
+
result = self.query_api.query(org=self.org, query=query)
|
488
|
+
|
489
|
+
# Process results into statistics
|
490
|
+
stats = {}
|
491
|
+
for table in result:
|
492
|
+
for record in table.records:
|
493
|
+
key = record.values.get(group_by, 'unknown')
|
494
|
+
stats[key] = {
|
495
|
+
'total_requests': record.get_value() or 0,
|
496
|
+
'group_by': group_by,
|
497
|
+
'time_range_hours': hours
|
498
|
+
}
|
499
|
+
|
500
|
+
return stats
|
501
|
+
|
502
|
+
except Exception as e:
|
503
|
+
logger.error(f"Failed to get usage statistics: {e}")
|
504
|
+
return {}
|
505
|
+
|
506
|
+
def close(self):
|
507
|
+
"""Close InfluxDB connection"""
|
508
|
+
if self.enabled and hasattr(self, 'client'):
|
509
|
+
self.client.close()
|
510
|
+
|
511
|
+
# Global logger instance
|
512
|
+
_inference_logger: Optional[InfluxInferenceLogger] = None
|
513
|
+
|
514
|
+
def get_inference_logger() -> InfluxInferenceLogger:
|
515
|
+
"""Get or create global inference logger instance"""
|
516
|
+
global _inference_logger
|
517
|
+
if _inference_logger is None:
|
518
|
+
_inference_logger = InfluxInferenceLogger()
|
519
|
+
return _inference_logger
|
520
|
+
|
521
|
+
def generate_request_id() -> str:
|
522
|
+
"""Generate unique request ID"""
|
523
|
+
return f"req_{uuid.uuid4().hex[:12]}"
|
@@ -0,0 +1,160 @@
|
|
1
|
+
"""
|
2
|
+
Centralized Logging Configuration with Loki Integration for ISA Model
|
3
|
+
|
4
|
+
This module provides centralized application logging with Loki support,
|
5
|
+
complementing the existing InfluxDB inference logging system.
|
6
|
+
|
7
|
+
Architecture:
|
8
|
+
- Loki: General application logs (INFO, WARNING, ERROR, DEBUG)
|
9
|
+
- InfluxDB: Inference metrics and performance data (tokens, costs, timing)
|
10
|
+
|
11
|
+
Usage:
|
12
|
+
from isa_model.core.logging import app_logger, api_logger
|
13
|
+
|
14
|
+
app_logger.info("Service starting...")
|
15
|
+
api_logger.error(f"Request failed: {error}", exc_info=True)
|
16
|
+
"""
|
17
|
+
|
18
|
+
import logging
|
19
|
+
import sys
|
20
|
+
import os
|
21
|
+
from typing import Optional
|
22
|
+
|
23
|
+
|
24
|
+
def setup_logger(
|
25
|
+
name: str,
|
26
|
+
level: Optional[str] = None,
|
27
|
+
format_str: Optional[str] = None
|
28
|
+
) -> logging.Logger:
|
29
|
+
"""
|
30
|
+
Setup logger with centralized Loki integration
|
31
|
+
|
32
|
+
Args:
|
33
|
+
name: Logger name (e.g., "ISAModel.API")
|
34
|
+
level: Log level (DEBUG, INFO, WARNING, ERROR, CRITICAL)
|
35
|
+
format_str: Log format string (optional)
|
36
|
+
|
37
|
+
Returns:
|
38
|
+
Configured logger instance
|
39
|
+
|
40
|
+
Example:
|
41
|
+
>>> from isa_model.core.logging import setup_logger
|
42
|
+
>>> my_logger = setup_logger("ISAModel.MyModule")
|
43
|
+
>>> my_logger.info("Processing started")
|
44
|
+
"""
|
45
|
+
logger = logging.getLogger(name)
|
46
|
+
|
47
|
+
# Avoid duplicate handlers
|
48
|
+
if logger.handlers:
|
49
|
+
return logger
|
50
|
+
|
51
|
+
# Get configuration from environment
|
52
|
+
log_level_env = os.getenv("LOG_LEVEL", "INFO").upper()
|
53
|
+
log_format_env = os.getenv(
|
54
|
+
"LOG_FORMAT",
|
55
|
+
"%(asctime)s - %(name)s - %(levelname)s - %(message)s"
|
56
|
+
)
|
57
|
+
from ..config.config_manager import ConfigManager
|
58
|
+
config_manager = ConfigManager()
|
59
|
+
# Use Consul discovery for Loki URL with fallback
|
60
|
+
loki_url = os.getenv("LOKI_URL", config_manager.get_loki_url())
|
61
|
+
loki_enabled = os.getenv("LOKI_ENABLED", "true").lower() == "true"
|
62
|
+
|
63
|
+
# Set log level
|
64
|
+
final_level = (level or log_level_env).upper()
|
65
|
+
logger.setLevel(getattr(logging, final_level, logging.INFO))
|
66
|
+
|
67
|
+
# Disable propagation to prevent duplicate logs
|
68
|
+
logger.propagate = False
|
69
|
+
|
70
|
+
# Log format
|
71
|
+
formatter = logging.Formatter(format_str or log_format_env)
|
72
|
+
|
73
|
+
# 1. Console Handler (for local development and debugging)
|
74
|
+
console_handler = logging.StreamHandler(sys.stdout)
|
75
|
+
console_handler.setFormatter(formatter)
|
76
|
+
logger.addHandler(console_handler)
|
77
|
+
|
78
|
+
# 2. Loki Handler (for centralized logging)
|
79
|
+
if loki_enabled:
|
80
|
+
try:
|
81
|
+
from logging_loki import LokiHandler
|
82
|
+
|
83
|
+
# Extract service name and logger component
|
84
|
+
# e.g., "ISAModel.API" -> service="isa_model", logger="API"
|
85
|
+
service_name = "isa_model"
|
86
|
+
logger_component = name.replace("ISAModel.", "").replace("ISAModel", "main")
|
87
|
+
|
88
|
+
# Labels for Loki (used for filtering and searching)
|
89
|
+
# Use service_name to match other services (mcp, agent, etc.)
|
90
|
+
loki_labels = {
|
91
|
+
"service_name": "model", # Use "model" to match service naming convention
|
92
|
+
"logger": logger_component,
|
93
|
+
"environment": os.getenv("ENVIRONMENT", "development"),
|
94
|
+
"job": "isa_model_service"
|
95
|
+
}
|
96
|
+
|
97
|
+
# Create Loki handler
|
98
|
+
loki_handler = LokiHandler(
|
99
|
+
url=f"{loki_url}/loki/api/v1/push",
|
100
|
+
tags=loki_labels,
|
101
|
+
version="1",
|
102
|
+
)
|
103
|
+
|
104
|
+
# Only send INFO and above to Loki (reduce network traffic)
|
105
|
+
loki_handler.setLevel(logging.INFO)
|
106
|
+
|
107
|
+
logger.addHandler(loki_handler)
|
108
|
+
|
109
|
+
except ImportError:
|
110
|
+
# Silently fall back to console-only logging during initialization
|
111
|
+
pass
|
112
|
+
except Exception as e:
|
113
|
+
# Loki unavailable - silently fall back to console
|
114
|
+
pass
|
115
|
+
|
116
|
+
return logger
|
117
|
+
|
118
|
+
|
119
|
+
# Create application loggers
|
120
|
+
# Main application logger
|
121
|
+
app_logger = setup_logger("ISAModel")
|
122
|
+
|
123
|
+
# API/Server logger
|
124
|
+
api_logger = setup_logger("ISAModel.API")
|
125
|
+
|
126
|
+
# Client logger
|
127
|
+
client_logger = setup_logger("ISAModel.Client")
|
128
|
+
|
129
|
+
# Inference logger (application-level, not metrics)
|
130
|
+
inference_logger = setup_logger("ISAModel.Inference")
|
131
|
+
|
132
|
+
# Training logger
|
133
|
+
training_logger = setup_logger("ISAModel.Training")
|
134
|
+
|
135
|
+
# Evaluation logger
|
136
|
+
eval_logger = setup_logger("ISAModel.Evaluation")
|
137
|
+
|
138
|
+
# Database logger
|
139
|
+
db_logger = setup_logger("ISAModel.Database")
|
140
|
+
|
141
|
+
# Deployment logger
|
142
|
+
deployment_logger = setup_logger("ISAModel.Deployment")
|
143
|
+
|
144
|
+
# Model manager logger
|
145
|
+
model_logger = setup_logger("ISAModel.Models")
|
146
|
+
|
147
|
+
|
148
|
+
# Export all loggers
|
149
|
+
__all__ = [
|
150
|
+
'setup_logger',
|
151
|
+
'app_logger',
|
152
|
+
'api_logger',
|
153
|
+
'client_logger',
|
154
|
+
'inference_logger',
|
155
|
+
'training_logger',
|
156
|
+
'eval_logger',
|
157
|
+
'db_logger',
|
158
|
+
'deployment_logger',
|
159
|
+
'model_logger',
|
160
|
+
]
|