guardianhub 0.1.88__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (64) hide show
  1. guardianhub/__init__.py +29 -0
  2. guardianhub/_version.py +1 -0
  3. guardianhub/agents/runtime.py +12 -0
  4. guardianhub/auth/token_provider.py +22 -0
  5. guardianhub/clients/__init__.py +2 -0
  6. guardianhub/clients/classification_client.py +52 -0
  7. guardianhub/clients/graph_db_client.py +161 -0
  8. guardianhub/clients/langfuse/dataset_client.py +157 -0
  9. guardianhub/clients/langfuse/manager.py +118 -0
  10. guardianhub/clients/langfuse/prompt_client.py +68 -0
  11. guardianhub/clients/langfuse/score_evaluation_client.py +92 -0
  12. guardianhub/clients/langfuse/tracing_client.py +250 -0
  13. guardianhub/clients/langfuse_client.py +63 -0
  14. guardianhub/clients/llm_client.py +144 -0
  15. guardianhub/clients/llm_service.py +295 -0
  16. guardianhub/clients/metadata_extractor_client.py +53 -0
  17. guardianhub/clients/ocr_client.py +81 -0
  18. guardianhub/clients/paperless_client.py +515 -0
  19. guardianhub/clients/registry_client.py +18 -0
  20. guardianhub/clients/text_cleaner_client.py +58 -0
  21. guardianhub/clients/vector_client.py +344 -0
  22. guardianhub/config/__init__.py +0 -0
  23. guardianhub/config/config_development.json +84 -0
  24. guardianhub/config/config_prod.json +39 -0
  25. guardianhub/config/settings.py +221 -0
  26. guardianhub/http/http_client.py +26 -0
  27. guardianhub/logging/__init__.py +2 -0
  28. guardianhub/logging/logging.py +168 -0
  29. guardianhub/logging/logging_filters.py +35 -0
  30. guardianhub/models/__init__.py +0 -0
  31. guardianhub/models/agent_models.py +153 -0
  32. guardianhub/models/base.py +2 -0
  33. guardianhub/models/registry/client.py +16 -0
  34. guardianhub/models/registry/dynamic_loader.py +73 -0
  35. guardianhub/models/registry/loader.py +37 -0
  36. guardianhub/models/registry/registry.py +17 -0
  37. guardianhub/models/registry/signing.py +70 -0
  38. guardianhub/models/template/__init__.py +0 -0
  39. guardianhub/models/template/agent_plan.py +65 -0
  40. guardianhub/models/template/agent_response_evaluation.py +67 -0
  41. guardianhub/models/template/extraction.py +29 -0
  42. guardianhub/models/template/reflection_critique.py +206 -0
  43. guardianhub/models/template/suggestion.py +42 -0
  44. guardianhub/observability/__init__.py +1 -0
  45. guardianhub/observability/instrumentation.py +271 -0
  46. guardianhub/observability/otel_helper.py +43 -0
  47. guardianhub/observability/otel_middlewares.py +73 -0
  48. guardianhub/prompts/base.py +7 -0
  49. guardianhub/prompts/providers/langfuse_provider.py +13 -0
  50. guardianhub/prompts/providers/local_provider.py +22 -0
  51. guardianhub/prompts/registry.py +14 -0
  52. guardianhub/scripts/script.sh +31 -0
  53. guardianhub/services/base.py +15 -0
  54. guardianhub/template/__init__.py +0 -0
  55. guardianhub/tools/gh_registry_cli.py +171 -0
  56. guardianhub/utils/__init__.py +0 -0
  57. guardianhub/utils/app_state.py +74 -0
  58. guardianhub/utils/fastapi_utils.py +152 -0
  59. guardianhub/utils/json_utils.py +137 -0
  60. guardianhub/utils/metrics.py +60 -0
  61. guardianhub-0.1.88.dist-info/METADATA +240 -0
  62. guardianhub-0.1.88.dist-info/RECORD +64 -0
  63. guardianhub-0.1.88.dist-info/WHEEL +4 -0
  64. guardianhub-0.1.88.dist-info/licenses/LICENSE +21 -0
@@ -0,0 +1,67 @@
1
+ from pydantic import BaseModel, Field
2
+ from typing import Dict, Any, List, Optional
3
+ from enum import Enum
4
+ from ..registry.registry import register_model
5
+
6
+
7
+ class EvaluationMetric(str, Enum):
8
+ """Standard evaluation metrics."""
9
+ RELEVANCE = "relevance"
10
+ GROUNDEDNESS = "groundedness"
11
+ COHERENCE = "coherence"
12
+ FLUENCY = "fluency"
13
+ COMPLETENESS = "completeness"
14
+ CORRECTNESS = "correctness"
15
+ SAFETY = "safety"
16
+
17
+
18
+ # New model for structured LLM output
19
+ @register_model
20
+ class EvaluationScoresModel(BaseModel):
21
+ """Pydantic model for the structured JSON output expected from the LLM."""
22
+ relevance: float = Field(..., ge=0.0, le=1.0,
23
+ description="Score for how well the response addresses the query (0.0 to 1.0).")
24
+ groundedness: float = Field(..., ge=0.0, le=1.0,
25
+ description="Score for whether the response is supported by the context (0.0 to 1.0).")
26
+ coherence: float = Field(..., ge=0.0, le=1.0,
27
+ description="Score for the logical flow and consistency of the response (0.0 to 1.0).")
28
+ fluency: float = Field(..., ge=0.0, le=1.0,
29
+ description="Score for the readability and grammatical correctness (0.0 to 1.0).")
30
+ completeness: float = Field(..., ge=0.0, le=1.0,
31
+ description="Score for whether the response fully answers all parts of the query (0.0 to 1.0).")
32
+
33
+ # Optional fields for other potential metrics, if the LLM supports them
34
+ # correctness: Optional[float] = Field(None, ge=0.0, le=1.0, description="Factual accuracy score.")
35
+ # safety: Optional[float] = Field(None, ge=0.0, le=1.0, description="Safety compliance score.")
36
+
37
+ class EvaluationErrorLevel(str, Enum):
38
+ """Severity levels for evaluation errors."""
39
+ WARNING = "warning"
40
+ ERROR = "error"
41
+ CRITICAL = "critical"
42
+
43
+
44
+ @register_model
45
+ class EvaluationResult(BaseModel):
46
+ """Container for evaluation results and metrics."""
47
+ scores: Dict[EvaluationMetric, float] = Field(default_factory=dict)
48
+ metadata: Dict[str, Any] = Field(default_factory=dict)
49
+ error: Optional[str] = None
50
+ error_level: Optional[EvaluationErrorLevel] = None
51
+
52
+ @property
53
+ def overall_score(self) -> float:
54
+ """Calculate an overall score from individual metrics."""
55
+ if not self.scores:
56
+ return 0.0
57
+ return sum(self.scores.values()) / len(self.scores)
58
+
59
+ def to_dict(self) -> Dict[str, Any]:
60
+ """Convert the result to a dictionary."""
61
+ return {
62
+ "scores": {k.value: v for k, v in self.scores.items()},
63
+ "overall_score": self.overall_score,
64
+ "metadata": self.metadata,
65
+ "error": self.error,
66
+ "error_level": self.error_level.value if self.error_level else None
67
+ }
@@ -0,0 +1,29 @@
1
+ from pydantic import BaseModel, Field
2
+ from typing import Dict, Any, List, Optional
3
+
4
+ from ..registry.registry import register_model
5
+
6
+
7
+ @register_model
8
+ class StructuredExtractionResult(BaseModel):
9
+ """
10
+ Unified result model for document classification and metadata extraction.
11
+ The LLM must populate the 'document_type' and 'metadata' fields.
12
+ """
13
+ document_type: str = Field(
14
+ ...,
15
+ description=(
16
+ "The primary classification of the document. Must be one of the provided types "
17
+ "(e.g., 'Invoice', 'Receipt', 'Contract', 'Technical Knowledge Documents', 'Unknown')."
18
+ )
19
+ )
20
+ # FIX: Use default_factory=dict. This ensures that if the field is missing or comes in as null,
21
+ # Pydantic accepts it and defaults it to an empty dictionary {}.
22
+ metadata: Dict[str, Any] = Field(
23
+ default_factory=dict,
24
+ description="A dictionary containing the extracted key-value metadata pairs specific to the classified document_type."
25
+ )
26
+ confidence: float = Field(
27
+ 1.0,
28
+ description="A confidence score (0.0 to 1.0) of the classification/extraction accuracy. Default to 1.0."
29
+ )
@@ -0,0 +1,206 @@
1
+ import json
2
+ import logging
3
+ from pydantic import BaseModel, Field, validator, field_validator
4
+ from typing import Literal, Optional, Any, Dict, Type, TypeVar, Union, List
5
+
6
+ logger = logging.getLogger(__name__)
7
+ from ..registry.registry import register_model
8
+
9
+ T = TypeVar('T', bound='ReflectionCritique')
10
+
11
+ # --- Enumerations for Clarity ---
12
+
13
+ # Common component types for better IDE support and documentation
14
+ COMMON_COMPONENTS = [
15
+ "PlannerAgent",
16
+ "DiagnosisAgent",
17
+ "ToolCall_CMDB",
18
+ "ToolCall_Metrics",
19
+ "TemporalWorkflow",
20
+ "LangGraphStateNode",
21
+ "PromptTemplate",
22
+ 'ToolRegistryService',
23
+ 'ExternalLLMService',
24
+ 'ActivityError',
25
+ 'HTTPCommunication'
26
+ ]
27
+
28
+ # FailingComponent is now a string with validation
29
+ def validate_failing_component(value: str) -> str:
30
+ """Validate that the failing component is a non-empty string.
31
+
32
+ Args:
33
+ value: The component name to validate
34
+
35
+ Returns:
36
+ The validated component name
37
+
38
+ Raises:
39
+ ValueError: If the value is not a string or is empty
40
+ """
41
+ if not isinstance(value, str) or not value.strip():
42
+ raise ValueError("Failing component must be a non-empty string")
43
+ return value
44
+
45
+ # Type alias for documentation and IDE support
46
+ FailingComponent = str
47
+
48
+ # Define the severity of the identified issue
49
+ IssueSeverity = Literal[
50
+ "Critical_Tool_Failure",
51
+ "Major_Logic_Error",
52
+ "Minor_Prompt_Drift",
53
+ "Schema_Mismatch",
54
+ 'Critical_Service_Unreachable',
55
+ 'Critical_Server_Error_5xx',
56
+ 'Minor_Tool_Schema',
57
+ 'Major_Read_Timeout'
58
+ ]
59
+
60
+ # --- Update 3: Fix Scope ---
61
+ FixScope = Literal[
62
+ 'Prompt',
63
+ 'Code',
64
+ 'Configuration',
65
+ 'Tool_Schema',
66
+ # NEW TECHNICAL SCOPE:
67
+ 'Infrastructure_Kubernetes',
68
+ 'Timeout_Setting'
69
+ ]
70
+ @register_model
71
+ class ReflectionSuccessOptimization(BaseModel):
72
+ """Structured output for optimizing successful agent executions."""
73
+ trace_id: str = Field(..., description="The unique Langfuse ID of the trace being optimized")
74
+ optimization_opportunity: str = Field(..., description="Brief description of the optimization opportunity")
75
+ impact_level: Literal["Low", "Medium", "High"] = Field(..., description="Expected impact of implementing the optimization")
76
+ optimization_type: Literal["Efficiency", "Completeness", "Accuracy", "UserExperience", "CostSaving"]
77
+ suggested_improvement: str = Field(..., description="Detailed suggestion for improvement")
78
+ expected_benefit: str = Field(..., description="Expected benefits of implementing the suggestion")
79
+ confidence_score: int = Field(ge=0, le=100, default=50, description="Confidence in this optimization (0-100)")
80
+ optimization_priority: int = Field(ge=1, le=5, default=3, description="Suggested implementation priority (1-5)")
81
+ related_components: List[str] = Field(default_factory=list, description="Components that would be affected by this optimization")
82
+
83
+ # --- Main Structured Output ---
84
+ @register_model
85
+ class ReflectionCritique(BaseModel):
86
+ """
87
+ A structured output generated by the Reflection Agent after analyzing a failed or
88
+ sub-optimal Langfuse trace. This object is the key deliverable for the Governance layer (JARVIS v1).
89
+ """
90
+ trace_id: str = Field(..., description="The unique Langfuse ID of the trace that was analyzed.")
91
+ run_status: Literal["Failure", "Suboptimal"] = Field(...,
92
+ description="The final status of the run being critiqued.")
93
+
94
+ # --- Core Findings ---
95
+ failing_component: str = Field(
96
+ ...,
97
+ description="The specific component, agent, or tool that ultimately caused the failure."
98
+ )
99
+
100
+ root_cause_summary: str = Field(
101
+ ...,
102
+ description="A concise, one-sentence summary of the single root cause."
103
+ )
104
+
105
+ issue_severity: IssueSeverity = Field(
106
+ ...,
107
+ description="The assessed severity of the root cause."
108
+ )
109
+
110
+ # --- Suggested Fix & Confidence ---
111
+ suggested_fix_action: str = Field(
112
+ ...,
113
+ description="A detailed, actionable recommendation to fix the root cause."
114
+ )
115
+
116
+ fix_scope: FixScope = Field(
117
+ ...,
118
+ description="The area where the fix needs to be applied."
119
+ )
120
+
121
+ fix_confidence_score: int = Field(
122
+ default=50,
123
+ ge=1,
124
+ le=100,
125
+ description="Confidence score (1-100) for the suggested fix. Defaults to 50."
126
+ )
127
+
128
+ # --- Audit Score ---
129
+ critique_score: int = Field(
130
+ default=3,
131
+ ge=0,
132
+ le=5,
133
+ description="Quality score (0-5) for the failed run. Defaults to 3."
134
+ )
135
+
136
+ # --- Detailed Analysis ---
137
+ detailed_analysis: str = Field(
138
+ ...,
139
+ description="Detailed explanation of the root cause and fix rationale."
140
+ )
141
+
142
+ # --- Validation ---
143
+ @field_validator('failing_component')
144
+ def validate_failing_component(cls, v: str) -> str:
145
+ """
146
+ Ensure failing_component is a valid non-empty string.
147
+
148
+ This validator is intentionally permissive to allow any non-empty string
149
+ while providing validation for common issues like whitespace-only values.
150
+ """
151
+ try:
152
+ # Use the standalone validator function for consistency
153
+ return validate_failing_component(v)
154
+ except ValueError as e:
155
+ # Provide a more helpful error message
156
+ raise ValueError(
157
+ f"Invalid failing component: {str(e)}. "
158
+ f"Common components include: {', '.join(COMMON_COMPONENTS)}"
159
+ ) from e
160
+
161
+ @field_validator('root_cause_summary')
162
+ def validate_root_cause_summary(cls, v: str) -> str:
163
+ """Ensure root_cause_summary is a non-empty string."""
164
+ if not isinstance(v, str) or not v.strip():
165
+ raise ValueError("Root cause summary must be a non-empty string")
166
+ return v.strip()
167
+
168
+ @field_validator('suggested_fix_action')
169
+ def validate_suggested_fix(cls, v: str) -> str:
170
+ """Ensure suggested_fix_action is a non-empty string."""
171
+ if not isinstance(v, str) or not v.strip():
172
+ raise ValueError("Suggested fix must be a non-empty string")
173
+ return v.strip()
174
+
175
+ @field_validator('detailed_analysis')
176
+ def validate_detailed_analysis(cls, v: str) -> str:
177
+ """Ensure detailed_analysis is a non-empty string."""
178
+ if not isinstance(v, str) or not v.strip():
179
+ raise ValueError("Detailed analysis must be a non-empty string")
180
+ return v.strip()
181
+
182
+ # --- Serialization ---
183
+ def model_dump_json(self, **kwargs) -> str:
184
+ """Serialize to JSON with proper error handling."""
185
+ try:
186
+ return super().model_dump_json(**kwargs)
187
+ except Exception as e:
188
+ logger.error(f"Failed to serialize ReflectionCritique: {str(e)}")
189
+ # Return a minimal valid JSON object with error information
190
+ return json.dumps({
191
+ "error": "Failed to serialize reflection critique",
192
+ "trace_id": self.trace_id,
193
+ "failing_component": getattr(self, 'failing_component', 'unknown'),
194
+ "issue_severity": getattr(self, 'issue_severity', 'unknown')
195
+ })
196
+
197
+ @classmethod
198
+ def parse_raw(cls: Type[T], json_data: Union[str, bytes], **kwargs) -> T:
199
+ """Parse JSON data with improved error handling."""
200
+ try:
201
+ if isinstance(json_data, bytes):
202
+ json_data = json_data.decode('utf-8')
203
+ return super().model_validate_json(json_data, **kwargs)
204
+ except Exception as e:
205
+ logger.error(f"Failed to parse ReflectionCritique: {str(e)}\nData: {json_data[:500]}")
206
+ raise ValueError(f"Invalid reflection critique data: {str(e)}") from e
@@ -0,0 +1,42 @@
1
+ from pydantic import BaseModel, Field
2
+ from typing import Dict, Any, List, Optional
3
+ from ..registry.registry import register_model
4
+
5
+
6
+ @register_model
7
+ class TemplateSchemaSuggestion(BaseModel):
8
+ """
9
+ Schema expected from the LLM for a new document type.
10
+ """
11
+ template_id: str = Field(
12
+ ...,
13
+ description="The id with which it will be identified."
14
+ )
15
+
16
+ document_type: str = Field(
17
+ ...,
18
+ description="The high-level category (e.g., 'Invoice', 'CV', 'Tax Form')."
19
+ )
20
+
21
+ template_name: str = Field(
22
+ ...,
23
+ description="A unique, descriptive name (e.g., 'ACME Q3 2024 Invoice')."
24
+ )
25
+
26
+ fingerprint_vector: Optional[List[float]] = Field(
27
+ None,
28
+ description="The fingerprint vector of the document."
29
+ )
30
+
31
+ json_schema: Dict[str, Any] = Field(
32
+ ...,
33
+ description=(
34
+ "The Pydantic-compatible JSON Schema defining the required "
35
+ "extraction fields."
36
+ )
37
+ )
38
+
39
+ required_keywords: List[str] = Field(
40
+ default_factory=list,
41
+ description="Top 5 keywords unique to this document template."
42
+ )
@@ -0,0 +1 @@
1
+ from .instrumentation import configure_instrumentation
@@ -0,0 +1,271 @@
1
+ """
2
+ Centralized OpenTelemetry instrumentation and observability configuration.
3
+
4
+ This module provides a consistent way to configure distributed tracing and metrics
5
+ across all services in the GuardianHub ecosystem. It sets up:
6
+
7
+ 1. Distributed Tracing:
8
+ - Automatic instrumentation for FastAPI (incoming requests)
9
+ - HTTPX client instrumentation (outgoing requests)
10
+ - OTLP export for centralized trace collection
11
+
12
+ 2. Metrics:
13
+ - System and application metrics
14
+ - OTLP export for metrics collection
15
+
16
+ 3. Context Propagation:
17
+ - Ensures trace context is propagated across service boundaries (CRITICAL for Langfuse integration)
18
+ - Integrates with Langfuse for LLM/agent tracing
19
+
20
+ The module follows OpenTelemetry best practices and provides sensible defaults
21
+ while remaining configurable for different deployment environments.
22
+ """
23
+
24
+ import os
25
+ from typing import Optional, Union, Tuple, Any
26
+
27
+ # Imports for resilient HTTP session configuration
28
+ import requests
29
+ from opentelemetry import trace, metrics
30
+ from opentelemetry.exporter.otlp.proto.http.metric_exporter import OTLPMetricExporter
31
+ from opentelemetry.exporter.otlp.proto.http.trace_exporter import OTLPSpanExporter
32
+ from opentelemetry.instrumentation.fastapi import FastAPIInstrumentor
33
+ from opentelemetry.instrumentation.httpx import HTTPXClientInstrumentor
34
+ from opentelemetry.propagate import set_global_textmap
35
+ from opentelemetry.sdk.metrics import MeterProvider
36
+ from opentelemetry.sdk.metrics.export import (
37
+ PeriodicExportingMetricReader,
38
+ ConsoleMetricExporter
39
+ )
40
+ from guardianhub.config.settings import settings
41
+ from opentelemetry.sdk.resources import (
42
+ SERVICE_NAME,
43
+ Resource,
44
+ SERVICE_VERSION,
45
+ SERVICE_NAMESPACE,
46
+ DEPLOYMENT_ENVIRONMENT
47
+ )
48
+ from opentelemetry.sdk.trace import TracerProvider
49
+ from opentelemetry.sdk.trace.export import BatchSpanProcessor
50
+ from opentelemetry.trace.propagation.tracecontext import TraceContextTextMapPropagator
51
+ from requests.adapters import HTTPAdapter
52
+ from urllib3.util.retry import Retry
53
+
54
+ from guardianhub import get_logger
55
+ from guardianhub.observability.otel_middlewares import GuardianHubSampler
56
+
57
+ logger = get_logger(__name__)
58
+
59
+ def configure_instrumentation(
60
+ app,
61
+ enable_console_export: bool = False,
62
+ excluded_urls: str = "/health,/metrics",
63
+ httpx_excluded_urls: Union[str, Tuple[str, ...]] = "/health,/metrics",
64
+ ) -> None:
65
+ """Configure OpenTelemetry instrumentation for the application.
66
+
67
+ Args:
68
+ app: The FastAPI application instance to instrument
69
+ service_name: Name of the service for resource identification
70
+ environment: Deployment environment (defaults to ENV var or 'development')
71
+ service_version: Service version string (defaults to ENV var or '0.1.0')
72
+ otlp_endpoint: Base URL for OTLP collector (defaults to OTEL_EXPORTER_OTLP_ENDPOINT)
73
+ enable_console_export: If True, export traces/metrics to console
74
+ excluded_urls: Comma-separated URLs to exclude from tracing
75
+ """
76
+ # 1. Resolve configuration variables
77
+ environment = settings.endpoints.ENVIRONMENT
78
+ otlp_endpoint = settings.endpoints.OTEL_EXPORTER_OTLP_ENDPOINT
79
+ service_version = settings.service.version
80
+
81
+ # Default to the known Kubernetes OTLP Collector service if the environment variable is missing.
82
+ # We use the service-name.namespace:port format for cross-namespace communication.
83
+ # default_otlp_endpoint = "http://otel-collector-service.monitoring.svc.cluster.local:4318"
84
+ # otlp_endpoint = otlp_endpoint or os.getenv('OTEL_EXPORTER_OTLP_ENDPOINT')
85
+ #
86
+ # # Ensure the endpoint doesn't have a trailing slash, as the exporter needs the clean base URL
87
+ # if otlp_endpoint:
88
+ # otlp_endpoint = otlp_endpoint.rstrip('/')
89
+
90
+ logger.info(
91
+ "Configuring OpenTelemetry instrumentation",
92
+ extra={
93
+ "service_name": settings.service.version,
94
+ "environment": environment,
95
+ "version": service_version,
96
+ "otlp_endpoint": settings.endpoints.OTEL_EXPORTER_OTLP_ENDPOINT
97
+ }
98
+ )
99
+
100
+ try:
101
+ # 2. Create resource with service metadata
102
+ resource = Resource.create(
103
+ attributes={
104
+ SERVICE_NAME: settings.service.name,
105
+ SERVICE_VERSION: service_version,
106
+ DEPLOYMENT_ENVIRONMENT: environment,
107
+ SERVICE_NAMESPACE: "guardianhub",
108
+ }
109
+ )
110
+
111
+ # # 2.5. Configure global context propagation using W3C Trace Context
112
+ # set_global_textmap(TraceContextTextMapPropagator())
113
+ # logger.info("Configured W3C Trace Context Propagator for context propagation")
114
+
115
+ # 3. Configure tracing
116
+ _setup_tracing(resource, settings, otlp_endpoint, enable_console_export)
117
+
118
+ # 4. Configure metrics
119
+ _setup_metrics(resource, otlp_endpoint, enable_console_export)
120
+
121
+ # 5. Instrument libraries
122
+ # Preferred path: pass request/response hooks if supported
123
+ FastAPIInstrumentor.instrument_app(
124
+ app=app,
125
+ tracer_provider=trace.get_tracer_provider(),
126
+ excluded_urls=excluded_urls,
127
+ # server_request_hook=server_request_hook,
128
+ # server_response_hook=server_response_hook,
129
+ )
130
+ logger.info("Instrumented FastAPI application", extra={"excluded_urls": excluded_urls})
131
+
132
+ # Convert tuple of URLs to comma-separated string if needed
133
+ excluded_urls_param = (
134
+ ",".join(httpx_excluded_urls)
135
+ if isinstance(httpx_excluded_urls, tuple)
136
+ else httpx_excluded_urls
137
+ )
138
+ HTTPXClientInstrumentor().instrument(
139
+ excluded_urls=excluded_urls_param,
140
+ # request_hook=client_request_hook,
141
+ # response_hook=client_response_hook,
142
+ )
143
+
144
+ logger.info("Instrumented HTTPX clients for outbound requests")
145
+ logger.info("OpenTelemetry instrumentation configured successfully")
146
+
147
+ except Exception as e:
148
+ logger.error(
149
+ "Failed to configure OpenTelemetry instrumentation. Continuing without full tracing/metrics.",
150
+ exc_info=True,
151
+ extra={"error": str(e)}
152
+ )
153
+ # Note: We catch the error but don't re-raise, allowing the application to start
154
+ # but with reduced observability. This is typically safer than failing startup.
155
+
156
+ def _setup_tracing(resource: Resource, settings: Any, otlp_endpoint: Optional[str], console_export: bool) -> None:
157
+ """Configure and initialize OpenTelemetry tracing."""
158
+ logger.debug("Configuring tracing subsystem")
159
+ tracer_provider = TracerProvider(resource=resource, sampler=GuardianHubSampler())
160
+
161
+ # 2. OTLP Exporter
162
+ try:
163
+ # The Python SDK must be trusted to append '/v1/traces' internally,
164
+ # as demonstrated by the successful curl test to the collector.
165
+ # Langfuse acts as an OTEL receiver via OTLP
166
+ langfuse_base_url = settings.endpoints.LANGFUSE_OTLP_TRACES_ENDPOINT
167
+ langfuse_public_key = settings.endpoints.LANGFUSE_PUBLIC_KEY
168
+ langfuse_secret_key = settings.endpoints.LANGFUSE_SECRET_KEY
169
+
170
+ otlp_exporter = OTLPSpanExporter(
171
+ endpoint=f"{langfuse_base_url}/v1/traces",
172
+ headers={
173
+ "x-langfuse-public-key": langfuse_public_key,
174
+ "x-langfuse-secret-key": langfuse_secret_key,
175
+ }
176
+ )
177
+
178
+ otlp_processor = BatchSpanProcessor(otlp_exporter)
179
+ tracer_provider.add_span_processor(otlp_processor)
180
+
181
+ logger.info("Configured OTLP trace exporter", extra={"endpoint": f"{otlp_endpoint}/v1/traces (internal path)"})
182
+ logger.info(f"{otlp_endpoint}/v1/traces (internal path)")
183
+
184
+ trace.set_tracer_provider(tracer_provider)
185
+ except Exception as e:
186
+ # Only log the error, don't crash startup if the collector is unreachable
187
+ logger.warning(
188
+ "Failed to configure OTLP trace exporter. Check endpoint and network access.",
189
+ extra={"endpoint": otlp_endpoint, "error": str(e)}
190
+ )
191
+
192
+
193
+ def _setup_metrics(resource: Resource, otlp_endpoint: Optional[str], console_export: bool) -> None:
194
+ """Configure and initialize OpenTelemetry metrics."""
195
+ logger.debug("Configuring metrics subsystem")
196
+
197
+ metric_readers = []
198
+
199
+ # 1. Console Exporter
200
+ if console_export:
201
+ # Wrap the ConsoleMetricExporter in a PeriodicExportingMetricReader
202
+ metric_readers.append(
203
+ PeriodicExportingMetricReader(
204
+ ConsoleMetricExporter()
205
+ )
206
+ )
207
+ logger.debug("Enabled console metrics export")
208
+
209
+ # 2. OTLP Exporter
210
+ if otlp_endpoint:
211
+ try:
212
+ # Create a resilient HTTP session for the exporter
213
+ otlp_session = _create_otlp_session()
214
+ full_otlp_metrics_endpoint = f"{otlp_endpoint}/v1/metrics"
215
+
216
+ # FIX for 404 error: Revert the explicit path addition.
217
+ # The Python SDK must be trusted to append '/v1/metrics' internally.
218
+ otlp_exporter = OTLPMetricExporter(
219
+ endpoint=full_otlp_metrics_endpoint,
220
+ session=otlp_session
221
+ )
222
+ # Wrap the OTLPMetricExporter in a PeriodicExportingMetricReader
223
+ metric_readers.append(
224
+ PeriodicExportingMetricReader(otlp_exporter)
225
+ )
226
+ logger.info("Configured OTLP metrics exporter", extra={"endpoint": f"{otlp_endpoint}/v1/metrics (internal path)"})
227
+ except Exception as e:
228
+ # Only log the error, don't crash startup if the collector is unreachable
229
+ logger.warning(
230
+ "Failed to configure OTLP metrics exporter. Check endpoint and network access.",
231
+ extra={"endpoint": otlp_endpoint, "error": str(e)}
232
+ )
233
+
234
+ if metric_readers:
235
+ # Set the MeterProvider only if at least one reader is successfully configured
236
+ metrics.set_meter_provider(
237
+ MeterProvider(
238
+ resource=resource,
239
+ metric_readers=metric_readers
240
+ )
241
+ )
242
+ else:
243
+ logger.info("No OTLP endpoint or console export enabled. Metrics will not be exported.")
244
+
245
+ def _create_otlp_session() -> requests.Session:
246
+ """
247
+ Creates a requests session configured for robust OTLP export retries.
248
+
249
+ This helps handle transient network failures (like 'Connection refused'
250
+ during service startup) in Kubernetes environments.
251
+ """
252
+ # Configure retry strategy: 5 total retries with 1 second backoff factor
253
+ retry_strategy = Retry(
254
+ total=5,
255
+ backoff_factor=1,
256
+ # Includes connection errors and typical server errors
257
+ status_forcelist=[429, 500, 502, 503, 504],
258
+ allowed_methods=frozenset(['POST']),
259
+ # We allow the underlying connection errors to trigger retries
260
+ )
261
+
262
+ adapter = HTTPAdapter(max_retries=retry_strategy)
263
+ session = requests.Session()
264
+ # Apply the resilient adapter to both HTTP and HTTPS protocols
265
+ session.mount("http://", adapter)
266
+ session.mount("https://", adapter)
267
+ return session
268
+
269
+ def get_meter(name: str) -> metrics.Meter:
270
+ """Get a meter instance with the given name."""
271
+ return metrics.get_meter(name)
@@ -0,0 +1,43 @@
1
+ from typing import Dict, Any, Optional
2
+ from opentelemetry import trace
3
+ from opentelemetry.sdk.resources import (
4
+ Resource,
5
+ SERVICE_NAME,
6
+ SERVICE_VERSION,
7
+ SERVICE_NAMESPACE,
8
+ DEPLOYMENT_ENVIRONMENT
9
+ )
10
+ from opentelemetry.sdk.trace import TracerProvider
11
+
12
+ def configure_resource(
13
+ service_name: str,
14
+ service_namespace: str = "guardianhub",
15
+ service_version: str = "0.1.0",
16
+ service_language: str = "python",
17
+ environment: str = "development",
18
+ resource_attributes: Optional[Dict[str, Any]] = None,
19
+ ) -> None:
20
+ """
21
+ Configure the global tracer provider with the given service details.
22
+
23
+ This should be called once at application startup.
24
+
25
+ Args:
26
+ service_name: Name of the service (e.g., 'aura-llm-service')
27
+ service_namespace: Namespace for the service (default: 'guardianhub')
28
+ service_version: Version of the service (default: '0.1.0')
29
+ environment: Deployment environment (e.g., 'development', 'staging', 'production')
30
+ resource_attributes: Additional attributes to add to the resource
31
+ service_language: Always set to 'python'
32
+ """
33
+ attributes = {
34
+ SERVICE_NAME: service_name,
35
+ SERVICE_NAMESPACE: service_namespace,
36
+ SERVICE_VERSION: service_version,
37
+ DEPLOYMENT_ENVIRONMENT: environment,
38
+ "service.language": service_language,
39
+ **(resource_attributes or {})
40
+ }
41
+
42
+ resource = Resource(attributes=attributes)
43
+ trace.set_tracer_provider(TracerProvider(resource=resource))