chuk-tool-processor 0.7.0__tar.gz → 0.9__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of chuk-tool-processor might be problematic. Click here for more details.

Files changed (76) hide show
  1. {chuk_tool_processor-0.7.0 → chuk_tool_processor-0.9}/PKG-INFO +487 -7
  2. {chuk_tool_processor-0.7.0 → chuk_tool_processor-0.9}/README.md +486 -6
  3. {chuk_tool_processor-0.7.0 → chuk_tool_processor-0.9}/pyproject.toml +8 -1
  4. chuk_tool_processor-0.9/src/chuk_tool_processor/core/__init__.py +32 -0
  5. chuk_tool_processor-0.9/src/chuk_tool_processor/core/exceptions.py +257 -0
  6. {chuk_tool_processor-0.7.0 → chuk_tool_processor-0.9}/src/chuk_tool_processor/core/processor.py +30 -1
  7. chuk_tool_processor-0.9/src/chuk_tool_processor/execution/wrappers/__init__.py +42 -0
  8. {chuk_tool_processor-0.7.0 → chuk_tool_processor-0.9}/src/chuk_tool_processor/execution/wrappers/caching.py +43 -10
  9. chuk_tool_processor-0.9/src/chuk_tool_processor/execution/wrappers/circuit_breaker.py +370 -0
  10. {chuk_tool_processor-0.7.0 → chuk_tool_processor-0.9}/src/chuk_tool_processor/execution/wrappers/rate_limiting.py +31 -1
  11. {chuk_tool_processor-0.7.0 → chuk_tool_processor-0.9}/src/chuk_tool_processor/execution/wrappers/retry.py +93 -53
  12. {chuk_tool_processor-0.7.0 → chuk_tool_processor-0.9}/src/chuk_tool_processor/mcp/setup_mcp_http_streamable.py +38 -2
  13. {chuk_tool_processor-0.7.0 → chuk_tool_processor-0.9}/src/chuk_tool_processor/mcp/setup_mcp_sse.py +38 -2
  14. {chuk_tool_processor-0.7.0 → chuk_tool_processor-0.9}/src/chuk_tool_processor/mcp/transport/http_streamable_transport.py +16 -3
  15. {chuk_tool_processor-0.7.0 → chuk_tool_processor-0.9}/src/chuk_tool_processor/mcp/transport/sse_transport.py +16 -3
  16. chuk_tool_processor-0.9/src/chuk_tool_processor/models/__init__.py +21 -0
  17. {chuk_tool_processor-0.7.0 → chuk_tool_processor-0.9}/src/chuk_tool_processor/models/tool_call.py +34 -1
  18. chuk_tool_processor-0.9/src/chuk_tool_processor/models/tool_spec.py +350 -0
  19. {chuk_tool_processor-0.7.0 → chuk_tool_processor-0.9}/src/chuk_tool_processor/models/validated_tool.py +22 -2
  20. chuk_tool_processor-0.9/src/chuk_tool_processor/observability/__init__.py +30 -0
  21. chuk_tool_processor-0.9/src/chuk_tool_processor/observability/metrics.py +312 -0
  22. chuk_tool_processor-0.9/src/chuk_tool_processor/observability/setup.py +105 -0
  23. chuk_tool_processor-0.9/src/chuk_tool_processor/observability/tracing.py +343 -0
  24. {chuk_tool_processor-0.7.0 → chuk_tool_processor-0.9}/src/chuk_tool_processor.egg-info/PKG-INFO +487 -7
  25. {chuk_tool_processor-0.7.0 → chuk_tool_processor-0.9}/src/chuk_tool_processor.egg-info/SOURCES.txt +6 -0
  26. chuk_tool_processor-0.7.0/src/chuk_tool_processor/core/__init__.py +0 -1
  27. chuk_tool_processor-0.7.0/src/chuk_tool_processor/core/exceptions.py +0 -51
  28. chuk_tool_processor-0.7.0/src/chuk_tool_processor/models/__init__.py +0 -1
  29. chuk_tool_processor-0.7.0/src/chuk_tool_processor/utils/__init__.py +0 -0
  30. {chuk_tool_processor-0.7.0 → chuk_tool_processor-0.9}/setup.cfg +0 -0
  31. {chuk_tool_processor-0.7.0 → chuk_tool_processor-0.9}/src/chuk_tool_processor/__init__.py +0 -0
  32. {chuk_tool_processor-0.7.0 → chuk_tool_processor-0.9}/src/chuk_tool_processor/execution/__init__.py +0 -0
  33. {chuk_tool_processor-0.7.0 → chuk_tool_processor-0.9}/src/chuk_tool_processor/execution/strategies/__init__.py +0 -0
  34. {chuk_tool_processor-0.7.0 → chuk_tool_processor-0.9}/src/chuk_tool_processor/execution/strategies/inprocess_strategy.py +0 -0
  35. {chuk_tool_processor-0.7.0 → chuk_tool_processor-0.9}/src/chuk_tool_processor/execution/strategies/subprocess_strategy.py +0 -0
  36. {chuk_tool_processor-0.7.0 → chuk_tool_processor-0.9}/src/chuk_tool_processor/execution/tool_executor.py +0 -0
  37. {chuk_tool_processor-0.7.0 → chuk_tool_processor-0.9}/src/chuk_tool_processor/logging/__init__.py +0 -0
  38. {chuk_tool_processor-0.7.0 → chuk_tool_processor-0.9}/src/chuk_tool_processor/logging/context.py +0 -0
  39. {chuk_tool_processor-0.7.0 → chuk_tool_processor-0.9}/src/chuk_tool_processor/logging/formatter.py +0 -0
  40. {chuk_tool_processor-0.7.0 → chuk_tool_processor-0.9}/src/chuk_tool_processor/logging/helpers.py +0 -0
  41. {chuk_tool_processor-0.7.0 → chuk_tool_processor-0.9}/src/chuk_tool_processor/logging/metrics.py +0 -0
  42. {chuk_tool_processor-0.7.0 → chuk_tool_processor-0.9}/src/chuk_tool_processor/mcp/__init__.py +0 -0
  43. {chuk_tool_processor-0.7.0 → chuk_tool_processor-0.9}/src/chuk_tool_processor/mcp/mcp_tool.py +0 -0
  44. {chuk_tool_processor-0.7.0 → chuk_tool_processor-0.9}/src/chuk_tool_processor/mcp/register_mcp_tools.py +0 -0
  45. {chuk_tool_processor-0.7.0 → chuk_tool_processor-0.9}/src/chuk_tool_processor/mcp/setup_mcp_stdio.py +0 -0
  46. {chuk_tool_processor-0.7.0 → chuk_tool_processor-0.9}/src/chuk_tool_processor/mcp/stream_manager.py +0 -0
  47. {chuk_tool_processor-0.7.0 → chuk_tool_processor-0.9}/src/chuk_tool_processor/mcp/transport/__init__.py +0 -0
  48. {chuk_tool_processor-0.7.0 → chuk_tool_processor-0.9}/src/chuk_tool_processor/mcp/transport/base_transport.py +0 -0
  49. {chuk_tool_processor-0.7.0 → chuk_tool_processor-0.9}/src/chuk_tool_processor/mcp/transport/models.py +0 -0
  50. {chuk_tool_processor-0.7.0 → chuk_tool_processor-0.9}/src/chuk_tool_processor/mcp/transport/stdio_transport.py +0 -0
  51. {chuk_tool_processor-0.7.0 → chuk_tool_processor-0.9}/src/chuk_tool_processor/models/execution_strategy.py +0 -0
  52. {chuk_tool_processor-0.7.0 → chuk_tool_processor-0.9}/src/chuk_tool_processor/models/streaming_tool.py +0 -0
  53. {chuk_tool_processor-0.7.0 → chuk_tool_processor-0.9}/src/chuk_tool_processor/models/tool_export_mixin.py +0 -0
  54. {chuk_tool_processor-0.7.0 → chuk_tool_processor-0.9}/src/chuk_tool_processor/models/tool_result.py +0 -0
  55. {chuk_tool_processor-0.7.0 → chuk_tool_processor-0.9}/src/chuk_tool_processor/plugins/__init__.py +0 -0
  56. {chuk_tool_processor-0.7.0 → chuk_tool_processor-0.9}/src/chuk_tool_processor/plugins/discovery.py +0 -0
  57. {chuk_tool_processor-0.7.0 → chuk_tool_processor-0.9}/src/chuk_tool_processor/plugins/parsers/__init__.py +0 -0
  58. {chuk_tool_processor-0.7.0 → chuk_tool_processor-0.9}/src/chuk_tool_processor/plugins/parsers/base.py +0 -0
  59. {chuk_tool_processor-0.7.0 → chuk_tool_processor-0.9}/src/chuk_tool_processor/plugins/parsers/function_call_tool.py +0 -0
  60. {chuk_tool_processor-0.7.0 → chuk_tool_processor-0.9}/src/chuk_tool_processor/plugins/parsers/json_tool.py +0 -0
  61. {chuk_tool_processor-0.7.0 → chuk_tool_processor-0.9}/src/chuk_tool_processor/plugins/parsers/openai_tool.py +0 -0
  62. {chuk_tool_processor-0.7.0 → chuk_tool_processor-0.9}/src/chuk_tool_processor/plugins/parsers/xml_tool.py +0 -0
  63. {chuk_tool_processor-0.7.0 → chuk_tool_processor-0.9}/src/chuk_tool_processor/registry/__init__.py +0 -0
  64. {chuk_tool_processor-0.7.0 → chuk_tool_processor-0.9}/src/chuk_tool_processor/registry/auto_register.py +0 -0
  65. {chuk_tool_processor-0.7.0 → chuk_tool_processor-0.9}/src/chuk_tool_processor/registry/decorators.py +0 -0
  66. {chuk_tool_processor-0.7.0 → chuk_tool_processor-0.9}/src/chuk_tool_processor/registry/interface.py +0 -0
  67. {chuk_tool_processor-0.7.0 → chuk_tool_processor-0.9}/src/chuk_tool_processor/registry/metadata.py +0 -0
  68. {chuk_tool_processor-0.7.0 → chuk_tool_processor-0.9}/src/chuk_tool_processor/registry/provider.py +0 -0
  69. {chuk_tool_processor-0.7.0 → chuk_tool_processor-0.9}/src/chuk_tool_processor/registry/providers/__init__.py +0 -0
  70. {chuk_tool_processor-0.7.0 → chuk_tool_processor-0.9}/src/chuk_tool_processor/registry/providers/memory.py +0 -0
  71. {chuk_tool_processor-0.7.0 → chuk_tool_processor-0.9}/src/chuk_tool_processor/registry/tool_export.py +0 -0
  72. {chuk_tool_processor-0.7.0/src/chuk_tool_processor/execution/wrappers → chuk_tool_processor-0.9/src/chuk_tool_processor/utils}/__init__.py +0 -0
  73. {chuk_tool_processor-0.7.0 → chuk_tool_processor-0.9}/src/chuk_tool_processor/utils/validation.py +0 -0
  74. {chuk_tool_processor-0.7.0 → chuk_tool_processor-0.9}/src/chuk_tool_processor.egg-info/dependency_links.txt +0 -0
  75. {chuk_tool_processor-0.7.0 → chuk_tool_processor-0.9}/src/chuk_tool_processor.egg-info/requires.txt +0 -0
  76. {chuk_tool_processor-0.7.0 → chuk_tool_processor-0.9}/src/chuk_tool_processor.egg-info/top_level.txt +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: chuk-tool-processor
3
- Version: 0.7.0
3
+ Version: 0.9
4
4
  Summary: Async-native framework for registering, discovering, and executing tools referenced in LLM responses
5
5
  Author-email: CHUK Team <chrishayuk@somejunkmailbox.com>
6
6
  Maintainer-email: CHUK Team <chrishayuk@somejunkmailbox.com>
@@ -72,12 +72,15 @@ Unlike full-fledged LLM frameworks (LangChain, LlamaIndex, etc.), CHUK Tool Proc
72
72
  Research code vs production code is about handling the edges:
73
73
 
74
74
  - **Timeouts**: Every tool execution has proper timeout handling
75
- - **Retries**: Automatic retry with exponential backoff
75
+ - **Retries**: Automatic retry with exponential backoff and deadline awareness
76
76
  - **Rate Limiting**: Global and per-tool rate limits with sliding windows
77
- - **Caching**: Intelligent result caching with TTL
78
- - **Error Handling**: Graceful degradation, never crashes your app
77
+ - **Caching**: Intelligent result caching with TTL and idempotency key support
78
+ - **Circuit Breakers**: Prevent cascading failures with automatic fault detection
79
+ - **Error Handling**: Machine-readable error codes with structured details
79
80
  - **Observability**: Structured logging, metrics, request tracing
80
81
  - **Safety**: Subprocess isolation for untrusted code
82
+ - **Type Safety**: Pydantic validation with LLM-friendly argument coercion
83
+ - **Tool Discovery**: Formal schema export (OpenAI, Anthropic, MCP formats)
81
84
 
82
85
  ### It's About Stacks
83
86
 
@@ -91,11 +94,13 @@ CHUK Tool Processor uses a **composable stack architecture**:
91
94
  │ tool calls
92
95
 
93
96
  ┌─────────────────────────────────┐
94
- │ Caching Wrapper │ ← Cache expensive results
97
+ │ Caching Wrapper │ ← Cache expensive results (idempotency keys)
95
98
  ├─────────────────────────────────┤
96
99
  │ Rate Limiting Wrapper │ ← Prevent API abuse
97
100
  ├─────────────────────────────────┤
98
- │ Retry Wrapper │ ← Handle transient failures
101
+ │ Retry Wrapper │ ← Handle transient failures (exponential backoff)
102
+ ├─────────────────────────────────┤
103
+ │ Circuit Breaker Wrapper │ ← Prevent cascading failures (CLOSED/OPEN/HALF_OPEN)
99
104
  ├─────────────────────────────────┤
100
105
  │ Execution Strategy │ ← How to run tools
101
106
  │ • InProcess (fast) │
@@ -179,7 +184,7 @@ asyncio.run(main())
179
184
  | 🔌 **Connect to external tools** | MCP integration (HTTP/STDIO/SSE) | [MCP Integration](#5-mcp-integration-external-tools) |
180
185
  | 🛡️ **Production deployment** | Timeouts, retries, rate limits, caching | [Production Configuration](#using-the-processor) |
181
186
  | 🔒 **Run untrusted code safely** | Subprocess isolation strategy | [Subprocess Strategy](#using-subprocess-strategy) |
182
- | 📊 **Monitor and observe** | Structured logging and metrics | [Observability](#observability) |
187
+ | 📊 **Monitor and observe** | OpenTelemetry + Prometheus | [Observability](#opentelemetry--prometheus-drop-in-observability) |
183
188
  | 🌊 **Stream incremental results** | StreamingTool pattern | [StreamingTool](#streamingtool-real-time-results) |
184
189
 
185
190
  ### Real-World Quick Start
@@ -639,6 +644,192 @@ processor = ToolProcessor(
639
644
  )
640
645
  ```
641
646
 
647
+ ### Advanced Production Features
648
+
649
+ Beyond basic configuration, CHUK Tool Processor includes several advanced features for production environments:
650
+
651
+ #### Circuit Breaker Pattern
652
+
653
+ Prevent cascading failures by automatically opening circuits for failing tools:
654
+
655
+ ```python
656
+ from chuk_tool_processor.core.processor import ToolProcessor
657
+
658
+ processor = ToolProcessor(
659
+ enable_circuit_breaker=True,
660
+ circuit_breaker_threshold=5, # Open after 5 failures
661
+ circuit_breaker_timeout=60.0, # Try recovery after 60s
662
+ )
663
+
664
+ # Circuit states: CLOSED → OPEN → HALF_OPEN → CLOSED
665
+ # - CLOSED: Normal operation
666
+ # - OPEN: Blocking requests (too many failures)
667
+ # - HALF_OPEN: Testing recovery with limited requests
668
+ ```
669
+
670
+ **How it works:**
671
+ 1. Tool fails repeatedly (hits threshold)
672
+ 2. Circuit opens → requests blocked immediately
673
+ 3. After timeout, circuit enters HALF_OPEN
674
+ 4. If test requests succeed → circuit closes
675
+ 5. If test requests fail → back to OPEN
676
+
677
+ **Benefits:**
678
+ - Prevents wasting resources on failing services
679
+ - Fast-fail for better UX
680
+ - Automatic recovery detection
681
+
682
+ #### Idempotency Keys
683
+
684
+ Automatically deduplicate LLM tool calls using SHA256-based keys:
685
+
686
+ ```python
687
+ from chuk_tool_processor.models.tool_call import ToolCall
688
+
689
+ # Idempotency keys are auto-generated
690
+ call1 = ToolCall(tool="search", arguments={"query": "Python"})
691
+ call2 = ToolCall(tool="search", arguments={"query": "Python"})
692
+
693
+ # Same arguments = same idempotency key
694
+ assert call1.idempotency_key == call2.idempotency_key
695
+
696
+ # Used automatically by caching layer
697
+ processor = ToolProcessor(enable_caching=True)
698
+ results1 = await processor.execute([call1]) # Executes
699
+ results2 = await processor.execute([call2]) # Cache hit!
700
+ ```
701
+
702
+ **Benefits:**
703
+ - Prevents duplicate executions from LLM retries
704
+ - Deterministic cache keys
705
+ - No manual key management needed
706
+
707
+ #### Tool Schema Export
708
+
709
+ Export tool definitions to multiple formats for LLM prompting:
710
+
711
+ ```python
712
+ from chuk_tool_processor.models.tool_spec import ToolSpec, ToolCapability
713
+ from chuk_tool_processor.models.validated_tool import ValidatedTool
714
+
715
+ @register_tool(name="weather")
716
+ class WeatherTool(ValidatedTool):
717
+ """Get current weather for a location."""
718
+
719
+ class Arguments(BaseModel):
720
+ location: str = Field(..., description="City name")
721
+
722
+ class Result(BaseModel):
723
+ temperature: float
724
+ conditions: str
725
+
726
+ # Generate tool spec
727
+ spec = ToolSpec.from_validated_tool(WeatherTool)
728
+
729
+ # Export to different formats
730
+ openai_format = spec.to_openai() # For OpenAI function calling
731
+ anthropic_format = spec.to_anthropic() # For Claude tools
732
+ mcp_format = spec.to_mcp() # For MCP servers
733
+
734
+ # Example OpenAI format:
735
+ # {
736
+ # "type": "function",
737
+ # "function": {
738
+ # "name": "weather",
739
+ # "description": "Get current weather for a location.",
740
+ # "parameters": {...} # JSON Schema
741
+ # }
742
+ # }
743
+ ```
744
+
745
+ **Use cases:**
746
+ - Generate tool definitions for LLM system prompts
747
+ - Documentation generation
748
+ - API contract validation
749
+ - Cross-platform tool sharing
750
+
751
+ #### Machine-Readable Error Codes
752
+
753
+ Structured error handling with error codes for programmatic responses:
754
+
755
+ ```python
756
+ from chuk_tool_processor.core.exceptions import (
757
+ ErrorCode,
758
+ ToolNotFoundError,
759
+ ToolTimeoutError,
760
+ ToolCircuitOpenError,
761
+ )
762
+
763
+ try:
764
+ results = await processor.process(llm_output)
765
+ except ToolNotFoundError as e:
766
+ if e.code == ErrorCode.TOOL_NOT_FOUND:
767
+ # Suggest available tools to LLM
768
+ available = e.details.get("available_tools", [])
769
+ print(f"Try one of: {available}")
770
+ except ToolTimeoutError as e:
771
+ if e.code == ErrorCode.TOOL_TIMEOUT:
772
+ # Inform LLM to use faster alternative
773
+ timeout = e.details["timeout"]
774
+ print(f"Tool timed out after {timeout}s")
775
+ except ToolCircuitOpenError as e:
776
+ if e.code == ErrorCode.TOOL_CIRCUIT_OPEN:
777
+ # Tell LLM this service is temporarily down
778
+ reset_time = e.details.get("reset_timeout")
779
+ print(f"Service unavailable, retry in {reset_time}s")
780
+
781
+ # All errors include .to_dict() for logging
782
+ error_dict = e.to_dict()
783
+ # {
784
+ # "error": "ToolCircuitOpenError",
785
+ # "code": "TOOL_CIRCUIT_OPEN",
786
+ # "message": "Tool 'api_tool' circuit breaker is open...",
787
+ # "details": {"tool_name": "api_tool", "failure_count": 5, ...}
788
+ # }
789
+ ```
790
+
791
+ **Available error codes:**
792
+ - `TOOL_NOT_FOUND` - Tool doesn't exist in registry
793
+ - `TOOL_EXECUTION_FAILED` - Tool execution error
794
+ - `TOOL_TIMEOUT` - Tool exceeded timeout
795
+ - `TOOL_CIRCUIT_OPEN` - Circuit breaker is open
796
+ - `TOOL_RATE_LIMITED` - Rate limit exceeded
797
+ - `TOOL_VALIDATION_ERROR` - Argument validation failed
798
+ - `MCP_CONNECTION_FAILED` - MCP server unreachable
799
+ - Plus 11 more for comprehensive error handling
800
+
801
+ #### LLM-Friendly Argument Coercion
802
+
803
+ Automatically coerce LLM outputs to correct types:
804
+
805
+ ```python
806
+ from chuk_tool_processor.models.validated_tool import ValidatedTool
807
+
808
+ class SearchTool(ValidatedTool):
809
+ class Arguments(BaseModel):
810
+ query: str
811
+ limit: int = 10
812
+ category: str = "all"
813
+
814
+ # Pydantic config for LLM outputs:
815
+ # - str_strip_whitespace=True → Remove accidental whitespace
816
+ # - extra="ignore" → Ignore unknown fields
817
+ # - use_enum_values=True → Convert enums to values
818
+ # - coerce_numbers_to_str=False → Keep type strictness
819
+
820
+ # LLM outputs often have quirks:
821
+ llm_output = {
822
+ "query": " Python tutorials ", # Extra whitespace
823
+ "limit": "5", # String instead of int
824
+ "unknown_field": "ignored" # Extra field
825
+ }
826
+
827
+ # ValidatedTool automatically coerces and validates
828
+ tool = SearchTool()
829
+ result = await tool.execute(**llm_output)
830
+ # ✅ Works! Whitespace stripped, "5" → 5, extra field ignored
831
+ ```
832
+
642
833
  ## Advanced Topics
643
834
 
644
835
  ### Using Subprocess Strategy
@@ -907,6 +1098,294 @@ async def main():
907
1098
  asyncio.run(main())
908
1099
  ```
909
1100
 
1101
+ #### OpenTelemetry & Prometheus (Drop-in Observability)
1102
+
1103
+ **Why Telemetry Matters**: In production, you need to know *what* your tools are doing, *how long* they take, *when* they fail, and *why*. CHUK Tool Processor provides **enterprise-grade telemetry** that operations teams expect—with zero manual instrumentation.
1104
+
1105
+ **One function call. Full observability.**
1106
+
1107
+ ```python
1108
+ from chuk_tool_processor.observability import setup_observability
1109
+
1110
+ # Enable everything
1111
+ setup_observability(
1112
+ service_name="my-tool-service",
1113
+ enable_tracing=True, # OpenTelemetry distributed tracing
1114
+ enable_metrics=True, # Prometheus metrics endpoint
1115
+ metrics_port=9090 # HTTP endpoint at :9090/metrics
1116
+ )
1117
+
1118
+ # Every tool execution is now automatically traced and metered!
1119
+ ```
1120
+
1121
+ **What You Get (Automatically)**
1122
+
1123
+ ✅ **Distributed Traces** - Understand exactly what happened in each tool call
1124
+ - See the complete execution timeline for every tool
1125
+ - Track retries, cache hits, circuit breaker state changes
1126
+ - Correlate failures across your system
1127
+ - Export to Jaeger, Zipkin, or any OTLP-compatible backend
1128
+
1129
+ ✅ **Production Metrics** - Monitor health and performance in real-time
1130
+ - Track error rates, latency percentiles (P50/P95/P99)
1131
+ - Monitor cache hit rates and retry attempts
1132
+ - Alert on circuit breaker opens and rate limit hits
1133
+ - Export to Prometheus, Grafana, or any metrics backend
1134
+
1135
+ ✅ **Zero Configuration** - Works out of the box
1136
+ - No manual instrumentation needed
1137
+ - No code changes to existing tools
1138
+ - Gracefully degrades if packages not installed
1139
+ - Standard OTEL and Prometheus formats
1140
+
1141
+ **Installation**
1142
+
1143
+ ```bash
1144
+ # Install observability dependencies
1145
+ pip install chuk-tool-processor[observability]
1146
+
1147
+ # Or manually
1148
+ pip install opentelemetry-api opentelemetry-sdk opentelemetry-exporter-otlp prometheus-client
1149
+
1150
+ # Or with uv (recommended)
1151
+ uv pip install chuk-tool-processor --group observability
1152
+ ```
1153
+
1154
+ **Quick Start: See Your Tools in Action**
1155
+
1156
+ ```python
1157
+ import asyncio
1158
+ from chuk_tool_processor.observability import setup_observability
1159
+ from chuk_tool_processor.core.processor import ToolProcessor
1160
+ from chuk_tool_processor.registry import initialize, register_tool
1161
+
1162
+ @register_tool(name="weather_api")
1163
+ class WeatherTool:
1164
+ async def execute(self, location: str) -> dict:
1165
+ # Simulating API call
1166
+ return {"temperature": 72, "conditions": "sunny", "location": location}
1167
+
1168
+ async def main():
1169
+ # 1. Enable observability (one line!)
1170
+ setup_observability(
1171
+ service_name="weather-service",
1172
+ enable_tracing=True,
1173
+ enable_metrics=True,
1174
+ metrics_port=9090
1175
+ )
1176
+
1177
+ # 2. Create processor with production features
1178
+ await initialize()
1179
+ processor = ToolProcessor(
1180
+ enable_caching=True, # Cache expensive API calls
1181
+ enable_retries=True, # Auto-retry on failures
1182
+ enable_circuit_breaker=True, # Prevent cascading failures
1183
+ enable_rate_limiting=True, # Prevent API abuse
1184
+ )
1185
+
1186
+ # 3. Execute tools - automatically traced and metered
1187
+ results = await processor.process(
1188
+ '<tool name="weather_api" args=\'{"location": "San Francisco"}\'/>'
1189
+ )
1190
+
1191
+ print(f"Result: {results[0].result}")
1192
+ print(f"Duration: {results[0].duration}s")
1193
+ print(f"Cached: {results[0].cached}")
1194
+
1195
+ asyncio.run(main())
1196
+ ```
1197
+
1198
+ **View Your Data**
1199
+
1200
+ ```bash
1201
+ # Start Jaeger for trace visualization
1202
+ docker run -d -p 4317:4317 -p 16686:16686 jaegertracing/all-in-one:latest
1203
+
1204
+ # Start your application
1205
+ python your_app.py
1206
+
1207
+ # View distributed traces
1208
+ open http://localhost:16686
1209
+
1210
+ # View Prometheus metrics
1211
+ curl http://localhost:9090/metrics | grep tool_
1212
+ ```
1213
+
1214
+ **What Gets Traced (Automatic Spans)**
1215
+
1216
+ Every execution layer creates standardized OpenTelemetry spans:
1217
+
1218
+ | Span Name | When Created | Key Attributes |
1219
+ |-----------|--------------|----------------|
1220
+ | `tool.execute` | Every tool execution | `tool.name`, `tool.namespace`, `tool.duration_ms`, `tool.cached`, `tool.error`, `tool.success` |
1221
+ | `tool.cache.lookup` | Cache lookup | `cache.hit` (true/false), `cache.operation=lookup` |
1222
+ | `tool.cache.set` | Cache write | `cache.ttl`, `cache.operation=set` |
1223
+ | `tool.retry.attempt` | Each retry | `retry.attempt`, `retry.max_attempts`, `retry.success` |
1224
+ | `tool.circuit_breaker.check` | Circuit state check | `circuit.state` (CLOSED/OPEN/HALF_OPEN) |
1225
+ | `tool.rate_limit.check` | Rate limit check | `rate_limit.allowed` (true/false) |
1226
+
1227
+ **Example trace hierarchy:**
1228
+ ```
1229
+ tool.execute (weather_api)
1230
+ ├── tool.cache.lookup (miss)
1231
+ ├── tool.retry.attempt (0)
1232
+ │ └── tool.execute (actual API call)
1233
+ ├── tool.retry.attempt (1) [if first failed]
1234
+ └── tool.cache.set (store result)
1235
+ ```
1236
+
1237
+ **What Gets Metered (Automatic Metrics)**
1238
+
1239
+ Standard Prometheus metrics exposed at `/metrics`:
1240
+
1241
+ | Metric | Type | Labels | Use For |
1242
+ |--------|------|--------|---------|
1243
+ | `tool_executions_total` | Counter | `tool`, `namespace`, `status` | Error rate, request volume |
1244
+ | `tool_execution_duration_seconds` | Histogram | `tool`, `namespace` | P50/P95/P99 latency |
1245
+ | `tool_cache_operations_total` | Counter | `tool`, `operation`, `result` | Cache hit rate |
1246
+ | `tool_retry_attempts_total` | Counter | `tool`, `attempt`, `success` | Retry frequency |
1247
+ | `tool_circuit_breaker_state` | Gauge | `tool` | Circuit health (0=CLOSED, 1=OPEN, 2=HALF_OPEN) |
1248
+ | `tool_circuit_breaker_failures_total` | Counter | `tool` | Failure count |
1249
+ | `tool_rate_limit_checks_total` | Counter | `tool`, `allowed` | Rate limit hits |
1250
+
1251
+ **Useful PromQL Queries**
1252
+
1253
+ ```promql
1254
+ # Error rate per tool (last 5 minutes)
1255
+ rate(tool_executions_total{status="error"}[5m])
1256
+ / rate(tool_executions_total[5m])
1257
+
1258
+ # P95 latency
1259
+ histogram_quantile(0.95, rate(tool_execution_duration_seconds_bucket[5m]))
1260
+
1261
+ # Cache hit rate
1262
+ rate(tool_cache_operations_total{result="hit"}[5m])
1263
+ / rate(tool_cache_operations_total{operation="lookup"}[5m])
1264
+
1265
+ # Tools currently circuit broken
1266
+ tool_circuit_breaker_state == 1
1267
+
1268
+ # Retry rate (how often tools need retries)
1269
+ rate(tool_retry_attempts_total{attempt!="0"}[5m])
1270
+ / rate(tool_executions_total[5m])
1271
+ ```
1272
+
1273
+ **Configuration**
1274
+
1275
+ Configure via environment variables:
1276
+
1277
+ ```bash
1278
+ # OTLP endpoint (where traces are sent)
1279
+ export OTEL_EXPORTER_OTLP_ENDPOINT=http://otel-collector:4317
1280
+
1281
+ # Service name (shown in traces)
1282
+ export OTEL_SERVICE_NAME=production-api
1283
+
1284
+ # Sampling (reduce overhead in high-traffic scenarios)
1285
+ export OTEL_TRACES_SAMPLER=traceidratio
1286
+ export OTEL_TRACES_SAMPLER_ARG=0.1 # Sample 10% of traces
1287
+ ```
1288
+
1289
+ Or in code:
1290
+
1291
+ ```python
1292
+ status = setup_observability(
1293
+ service_name="my-service",
1294
+ enable_tracing=True,
1295
+ enable_metrics=True,
1296
+ metrics_port=9090,
1297
+ metrics_host="0.0.0.0" # Allow external Prometheus scraping
1298
+ )
1299
+
1300
+ # Check status
1301
+ if status["tracing_enabled"]:
1302
+ print("Traces exporting to OTLP endpoint")
1303
+ if status["metrics_server_started"]:
1304
+ print("Metrics available at http://localhost:9090/metrics")
1305
+ ```
1306
+
1307
+ **Production Integration**
1308
+
1309
+ **With Grafana + Prometheus:**
1310
+ ```yaml
1311
+ # prometheus.yml
1312
+ scrape_configs:
1313
+ - job_name: 'chuk-tool-processor'
1314
+ scrape_interval: 15s
1315
+ static_configs:
1316
+ - targets: ['app:9090']
1317
+ ```
1318
+
1319
+ **With OpenTelemetry Collector:**
1320
+ ```yaml
1321
+ # otel-collector-config.yaml
1322
+ receivers:
1323
+ otlp:
1324
+ protocols:
1325
+ grpc:
1326
+ endpoint: 0.0.0.0:4317
1327
+
1328
+ exporters:
1329
+ jaeger:
1330
+ endpoint: jaeger:14250
1331
+ prometheus:
1332
+ endpoint: 0.0.0.0:8889
1333
+
1334
+ service:
1335
+ pipelines:
1336
+ traces:
1337
+ receivers: [otlp]
1338
+ exporters: [jaeger]
1339
+ ```
1340
+
1341
+ **With Cloud Providers:**
1342
+ ```bash
1343
+ # AWS X-Ray
1344
+ export OTEL_TRACES_SAMPLER=xray
1345
+
1346
+ # Google Cloud Trace
1347
+ export OTEL_EXPORTER_OTLP_ENDPOINT=https://cloudtrace.googleapis.com/v1/projects/PROJECT_ID/traces
1348
+
1349
+ # Datadog
1350
+ export OTEL_EXPORTER_OTLP_ENDPOINT=http://datadog-agent:4317
1351
+ ```
1352
+
1353
+ **Why This Matters**
1354
+
1355
+ ❌ **Without telemetry:**
1356
+ - "Why is this tool slow?" → No idea
1357
+ - "Is caching helping?" → Guessing
1358
+ - "Did that retry work?" → Check logs manually
1359
+ - "Is the circuit breaker working?" → Hope so
1360
+ - "Which tool is failing?" → Debug blindly
1361
+
1362
+ ✅ **With telemetry:**
1363
+ - See exact execution timeline in Jaeger
1364
+ - Monitor cache hit rate in Grafana
1365
+ - Alert when retry rate spikes
1366
+ - Dashboard shows circuit breaker states
1367
+ - Metrics pinpoint the failing tool immediately
1368
+
1369
+ **Learn More**
1370
+
1371
+ 📖 **Complete Guide**: See [`OBSERVABILITY.md`](OBSERVABILITY.md) for:
1372
+ - Complete span and metric specifications
1373
+ - Architecture and implementation details
1374
+ - Integration guides (Jaeger, Grafana, OTEL Collector)
1375
+ - Testing observability features
1376
+ - Environment variable configuration
1377
+
1378
+ 🎯 **Working Example**: See `examples/observability_demo.py` for a complete demonstration with retries, caching, and circuit breakers
1379
+
1380
+ **Benefits**
1381
+
1382
+ ✅ **Drop-in** - One function call, zero code changes
1383
+ ✅ **Automatic** - All execution layers instrumented
1384
+ ✅ **Standard** - OTEL + Prometheus (works with existing tools)
1385
+ ✅ **Production-ready** - Ops teams get exactly what they expect
1386
+ ✅ **Optional** - Gracefully degrades if packages not installed
1387
+ ✅ **Zero-overhead** - No performance impact when disabled
1388
+
910
1389
  ### Error Handling
911
1390
 
912
1391
  ```python
@@ -1137,6 +1616,7 @@ Check out the [`examples/`](examples/) directory for complete working examples:
1137
1616
  - **Execution strategies**: `examples/execution_strategies_demo.py` - InProcess vs Subprocess
1138
1617
  - **Production wrappers**: `examples/wrappers_demo.py` - Caching, retries, rate limiting
1139
1618
  - **Streaming tools**: `examples/streaming_demo.py` - Real-time incremental results
1619
+ - **Observability**: `examples/observability_demo.py` - OpenTelemetry + Prometheus integration
1140
1620
 
1141
1621
  ### MCP Integration (Real-World)
1142
1622
  - **Notion + OAuth**: `examples/notion_oauth.py` - Complete OAuth 2.1 flow with HTTP Streamable