tsikol 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/CHANGELOG.md +22 -0
- data/CONTRIBUTING.md +84 -0
- data/LICENSE +21 -0
- data/README.md +579 -0
- data/Rakefile +12 -0
- data/docs/README.md +69 -0
- data/docs/api/middleware.md +721 -0
- data/docs/api/prompt.md +858 -0
- data/docs/api/resource.md +651 -0
- data/docs/api/server.md +509 -0
- data/docs/api/test-helpers.md +591 -0
- data/docs/api/tool.md +527 -0
- data/docs/cookbook/authentication.md +651 -0
- data/docs/cookbook/caching.md +877 -0
- data/docs/cookbook/dynamic-tools.md +970 -0
- data/docs/cookbook/error-handling.md +887 -0
- data/docs/cookbook/logging.md +1044 -0
- data/docs/cookbook/rate-limiting.md +717 -0
- data/docs/examples/code-assistant.md +922 -0
- data/docs/examples/complete-server.md +726 -0
- data/docs/examples/database-manager.md +1198 -0
- data/docs/examples/devops-tools.md +1382 -0
- data/docs/examples/echo-server.md +501 -0
- data/docs/examples/weather-service.md +822 -0
- data/docs/guides/completion.md +472 -0
- data/docs/guides/getting-started.md +462 -0
- data/docs/guides/middleware.md +823 -0
- data/docs/guides/project-structure.md +434 -0
- data/docs/guides/prompts.md +920 -0
- data/docs/guides/resources.md +720 -0
- data/docs/guides/sampling.md +804 -0
- data/docs/guides/testing.md +863 -0
- data/docs/guides/tools.md +627 -0
- data/examples/README.md +92 -0
- data/examples/advanced_features.rb +129 -0
- data/examples/basic-migrated/app/prompts/weather_chat.rb +44 -0
- data/examples/basic-migrated/app/resources/weather_alerts.rb +18 -0
- data/examples/basic-migrated/app/tools/get_current_weather.rb +34 -0
- data/examples/basic-migrated/app/tools/get_forecast.rb +30 -0
- data/examples/basic-migrated/app/tools/get_weather_by_coords.rb +48 -0
- data/examples/basic-migrated/server.rb +25 -0
- data/examples/basic.rb +73 -0
- data/examples/full_featured.rb +175 -0
- data/examples/middleware_example.rb +112 -0
- data/examples/sampling_example.rb +104 -0
- data/examples/weather-service/app/prompts/weather/chat.rb +90 -0
- data/examples/weather-service/app/resources/weather/alerts.rb +59 -0
- data/examples/weather-service/app/tools/weather/get_current.rb +82 -0
- data/examples/weather-service/app/tools/weather/get_forecast.rb +90 -0
- data/examples/weather-service/server.rb +28 -0
- data/exe/tsikol +6 -0
- data/lib/tsikol/cli/templates/Gemfile.erb +10 -0
- data/lib/tsikol/cli/templates/README.md.erb +38 -0
- data/lib/tsikol/cli/templates/gitignore.erb +49 -0
- data/lib/tsikol/cli/templates/prompt.rb.erb +53 -0
- data/lib/tsikol/cli/templates/resource.rb.erb +29 -0
- data/lib/tsikol/cli/templates/server.rb.erb +24 -0
- data/lib/tsikol/cli/templates/tool.rb.erb +60 -0
- data/lib/tsikol/cli.rb +203 -0
- data/lib/tsikol/error_handler.rb +141 -0
- data/lib/tsikol/health.rb +198 -0
- data/lib/tsikol/http_transport.rb +72 -0
- data/lib/tsikol/lifecycle.rb +149 -0
- data/lib/tsikol/middleware.rb +168 -0
- data/lib/tsikol/prompt.rb +101 -0
- data/lib/tsikol/resource.rb +53 -0
- data/lib/tsikol/router.rb +190 -0
- data/lib/tsikol/server.rb +660 -0
- data/lib/tsikol/stdio_transport.rb +108 -0
- data/lib/tsikol/test_helpers.rb +261 -0
- data/lib/tsikol/tool.rb +111 -0
- data/lib/tsikol/version.rb +5 -0
- data/lib/tsikol.rb +72 -0
- metadata +219 -0
@@ -0,0 +1,887 @@
|
|
1
|
+
# Error Handling Recipe
|
2
|
+
|
3
|
+
This recipe demonstrates comprehensive error handling strategies for building robust MCP servers.
|
4
|
+
|
5
|
+
## Basic Error Handling
|
6
|
+
|
7
|
+
### Tool Error Handling
|
8
|
+
|
9
|
+
```ruby
|
10
|
+
class SafeTool < Tsikol::Tool
|
11
|
+
description "Tool with comprehensive error handling"
|
12
|
+
|
13
|
+
parameter :input do
|
14
|
+
type :string
|
15
|
+
required
|
16
|
+
description "Input to process"
|
17
|
+
end
|
18
|
+
|
19
|
+
def execute(input:)
|
20
|
+
# Input validation
|
21
|
+
validate_input!(input)
|
22
|
+
|
23
|
+
# Process with error handling
|
24
|
+
result = process_safely(input)
|
25
|
+
|
26
|
+
# Format result
|
27
|
+
format_result(result)
|
28
|
+
|
29
|
+
rescue ValidationError => e
|
30
|
+
# Return user-friendly validation error
|
31
|
+
handle_validation_error(e)
|
32
|
+
|
33
|
+
rescue ExternalServiceError => e
|
34
|
+
# Handle external service failures
|
35
|
+
handle_external_error(e)
|
36
|
+
|
37
|
+
rescue => e
|
38
|
+
# Handle unexpected errors
|
39
|
+
handle_unexpected_error(e)
|
40
|
+
end
|
41
|
+
|
42
|
+
private
|
43
|
+
|
44
|
+
def validate_input!(input)
|
45
|
+
raise ValidationError, "Input cannot be empty" if input.strip.empty?
|
46
|
+
raise ValidationError, "Input too long (max 1000 chars)" if input.length > 1000
|
47
|
+
raise ValidationError, "Invalid characters" unless input.match?(/\A[\w\s\-.,!?]+\z/)
|
48
|
+
end
|
49
|
+
|
50
|
+
def process_safely(input)
|
51
|
+
with_timeout(5) do
|
52
|
+
with_retry(3) do
|
53
|
+
perform_processing(input)
|
54
|
+
end
|
55
|
+
end
|
56
|
+
end
|
57
|
+
|
58
|
+
def with_timeout(seconds)
|
59
|
+
Timeout.timeout(seconds) do
|
60
|
+
yield
|
61
|
+
end
|
62
|
+
rescue Timeout::Error
|
63
|
+
raise ExternalServiceError, "Operation timed out after #{seconds} seconds"
|
64
|
+
end
|
65
|
+
|
66
|
+
def with_retry(max_attempts)
|
67
|
+
attempts = 0
|
68
|
+
begin
|
69
|
+
attempts += 1
|
70
|
+
yield
|
71
|
+
rescue ExternalServiceError => e
|
72
|
+
if attempts < max_attempts
|
73
|
+
log :warning, "Retrying after error", attempt: attempts, error: e.message
|
74
|
+
sleep(attempts * 0.5) # Exponential backoff
|
75
|
+
retry
|
76
|
+
else
|
77
|
+
raise
|
78
|
+
end
|
79
|
+
end
|
80
|
+
end
|
81
|
+
|
82
|
+
def handle_validation_error(error)
|
83
|
+
log :info, "Validation failed", error: error.message
|
84
|
+
"Validation Error: #{error.message}"
|
85
|
+
end
|
86
|
+
|
87
|
+
def handle_external_error(error)
|
88
|
+
log :error, "External service error", error: error.message
|
89
|
+
"Service temporarily unavailable. Please try again later."
|
90
|
+
end
|
91
|
+
|
92
|
+
def handle_unexpected_error(error)
|
93
|
+
error_id = SecureRandom.uuid
|
94
|
+
log :error, "Unexpected error", error_id: error_id, error: error.class.name, message: error.message
|
95
|
+
"An unexpected error occurred (ID: #{error_id}). Please contact support."
|
96
|
+
end
|
97
|
+
end
|
98
|
+
|
99
|
+
# Custom error classes
|
100
|
+
class ValidationError < StandardError; end
|
101
|
+
class ExternalServiceError < StandardError; end
|
102
|
+
```
|
103
|
+
|
104
|
+
### Resource Error Handling
|
105
|
+
|
106
|
+
```ruby
|
107
|
+
class ResilientResource < Tsikol::Resource
|
108
|
+
uri "data/resilient"
|
109
|
+
description "Resource with fallback mechanisms"
|
110
|
+
|
111
|
+
def read
|
112
|
+
# Try primary data source
|
113
|
+
fetch_from_primary
|
114
|
+
rescue PrimarySourceError => e
|
115
|
+
log :warning, "Primary source failed, trying secondary", error: e.message
|
116
|
+
|
117
|
+
# Fallback to secondary source
|
118
|
+
fetch_from_secondary
|
119
|
+
rescue SecondarySourceError => e
|
120
|
+
log :error, "Secondary source also failed", error: e.message
|
121
|
+
|
122
|
+
# Return cached or default data
|
123
|
+
fallback_response
|
124
|
+
rescue => e
|
125
|
+
log :error, "Unexpected error in resource", error: e.class.name
|
126
|
+
error_response("Resource temporarily unavailable")
|
127
|
+
end
|
128
|
+
|
129
|
+
private
|
130
|
+
|
131
|
+
def fetch_from_primary
|
132
|
+
# Simulate primary data source
|
133
|
+
raise PrimarySourceError, "Database connection failed" if rand > 0.9
|
134
|
+
|
135
|
+
{
|
136
|
+
source: "primary",
|
137
|
+
data: fetch_current_data,
|
138
|
+
timestamp: Time.now.iso8601
|
139
|
+
}.to_json
|
140
|
+
end
|
141
|
+
|
142
|
+
def fetch_from_secondary
|
143
|
+
# Simulate secondary data source
|
144
|
+
raise SecondarySourceError, "API timeout" if rand > 0.8
|
145
|
+
|
146
|
+
{
|
147
|
+
source: "secondary",
|
148
|
+
data: fetch_cached_data,
|
149
|
+
timestamp: Time.now.iso8601,
|
150
|
+
warning: "Using secondary data source"
|
151
|
+
}.to_json
|
152
|
+
end
|
153
|
+
|
154
|
+
def fallback_response
|
155
|
+
{
|
156
|
+
source: "fallback",
|
157
|
+
data: default_data,
|
158
|
+
timestamp: Time.now.iso8601,
|
159
|
+
error: "All data sources unavailable, returning default data"
|
160
|
+
}.to_json
|
161
|
+
end
|
162
|
+
|
163
|
+
def error_response(message)
|
164
|
+
{
|
165
|
+
error: message,
|
166
|
+
timestamp: Time.now.iso8601,
|
167
|
+
retry_after: 60
|
168
|
+
}.to_json
|
169
|
+
end
|
170
|
+
end
|
171
|
+
|
172
|
+
class PrimarySourceError < StandardError; end
|
173
|
+
class SecondarySourceError < StandardError; end
|
174
|
+
```
|
175
|
+
|
176
|
+
## Error Handling Middleware
|
177
|
+
|
178
|
+
### Comprehensive Error Middleware
|
179
|
+
|
180
|
+
```ruby
|
181
|
+
class ErrorHandlingMiddleware < Tsikol::Middleware
|
182
|
+
def initialize(app, options = {})
|
183
|
+
@app = app
|
184
|
+
@log_errors = options.fetch(:log_errors, true)
|
185
|
+
@include_backtrace = options.fetch(:include_backtrace, false)
|
186
|
+
@error_callbacks = options[:error_callbacks] || {}
|
187
|
+
@error_reporters = options[:error_reporters] || []
|
188
|
+
end
|
189
|
+
|
190
|
+
def call(request)
|
191
|
+
# Add error context
|
192
|
+
Thread.current[:error_context] = {
|
193
|
+
request_id: request["id"],
|
194
|
+
method: request["method"],
|
195
|
+
timestamp: Time.now
|
196
|
+
}
|
197
|
+
|
198
|
+
# Process request
|
199
|
+
@app.call(request)
|
200
|
+
|
201
|
+
rescue StandardError => e
|
202
|
+
handle_error(request, e)
|
203
|
+
ensure
|
204
|
+
Thread.current[:error_context] = nil
|
205
|
+
end
|
206
|
+
|
207
|
+
private
|
208
|
+
|
209
|
+
def handle_error(request, error)
|
210
|
+
error_info = analyze_error(error)
|
211
|
+
|
212
|
+
# Log error
|
213
|
+
log_error(error_info) if @log_errors
|
214
|
+
|
215
|
+
# Report to external services
|
216
|
+
report_error(error_info)
|
217
|
+
|
218
|
+
# Execute callbacks
|
219
|
+
execute_error_callbacks(error_info)
|
220
|
+
|
221
|
+
# Return appropriate error response
|
222
|
+
error_response(request["id"], error_info)
|
223
|
+
end
|
224
|
+
|
225
|
+
def analyze_error(error)
|
226
|
+
{
|
227
|
+
error_class: error.class.name,
|
228
|
+
message: error.message,
|
229
|
+
backtrace: @include_backtrace ? error.backtrace : nil,
|
230
|
+
category: categorize_error(error),
|
231
|
+
severity: determine_severity(error),
|
232
|
+
context: Thread.current[:error_context],
|
233
|
+
timestamp: Time.now
|
234
|
+
}
|
235
|
+
end
|
236
|
+
|
237
|
+
def categorize_error(error)
|
238
|
+
case error
|
239
|
+
when Tsikol::ValidationError, ArgumentError
|
240
|
+
:validation
|
241
|
+
when Tsikol::NotFoundError
|
242
|
+
:not_found
|
243
|
+
when Timeout::Error, Net::ReadTimeout
|
244
|
+
:timeout
|
245
|
+
when Redis::BaseError, PG::Error
|
246
|
+
:database
|
247
|
+
when Net::HTTPError, Faraday::Error
|
248
|
+
:network
|
249
|
+
else
|
250
|
+
:internal
|
251
|
+
end
|
252
|
+
end
|
253
|
+
|
254
|
+
def determine_severity(error)
|
255
|
+
case categorize_error(error)
|
256
|
+
when :validation, :not_found
|
257
|
+
:info
|
258
|
+
when :timeout, :network
|
259
|
+
:warning
|
260
|
+
when :database
|
261
|
+
:error
|
262
|
+
else
|
263
|
+
:critical
|
264
|
+
end
|
265
|
+
end
|
266
|
+
|
267
|
+
def error_response(id, error_info)
|
268
|
+
code, message = case error_info[:category]
|
269
|
+
when :validation
|
270
|
+
[-32602, "Invalid params"]
|
271
|
+
when :not_found
|
272
|
+
[-32601, "Method not found"]
|
273
|
+
when :timeout
|
274
|
+
[-32001, "Request timeout"]
|
275
|
+
when :database, :network
|
276
|
+
[-32002, "Service unavailable"]
|
277
|
+
else
|
278
|
+
[-32603, "Internal error"]
|
279
|
+
end
|
280
|
+
|
281
|
+
response = {
|
282
|
+
jsonrpc: "2.0",
|
283
|
+
id: id,
|
284
|
+
error: {
|
285
|
+
code: code,
|
286
|
+
message: message,
|
287
|
+
data: build_error_data(error_info)
|
288
|
+
}
|
289
|
+
}
|
290
|
+
|
291
|
+
response
|
292
|
+
end
|
293
|
+
|
294
|
+
def build_error_data(error_info)
|
295
|
+
data = {
|
296
|
+
category: error_info[:category],
|
297
|
+
details: sanitize_message(error_info[:message])
|
298
|
+
}
|
299
|
+
|
300
|
+
if @include_backtrace && error_info[:backtrace]
|
301
|
+
data[:backtrace] = error_info[:backtrace].first(5)
|
302
|
+
end
|
303
|
+
|
304
|
+
data
|
305
|
+
end
|
306
|
+
|
307
|
+
def sanitize_message(message)
|
308
|
+
# Remove sensitive information
|
309
|
+
message.gsub(/password=\S+/i, 'password=[REDACTED]')
|
310
|
+
.gsub(/key=\S+/i, 'key=[REDACTED]')
|
311
|
+
.gsub(/token=\S+/i, 'token=[REDACTED]')
|
312
|
+
end
|
313
|
+
|
314
|
+
def execute_error_callbacks(error_info)
|
315
|
+
callback = @error_callbacks[error_info[:category]]
|
316
|
+
callback&.call(error_info)
|
317
|
+
rescue => e
|
318
|
+
log :error, "Error callback failed", error: e.message
|
319
|
+
end
|
320
|
+
|
321
|
+
def report_error(error_info)
|
322
|
+
@error_reporters.each do |reporter|
|
323
|
+
reporter.report(error_info)
|
324
|
+
rescue => e
|
325
|
+
log :error, "Error reporter failed", reporter: reporter.class.name, error: e.message
|
326
|
+
end
|
327
|
+
end
|
328
|
+
end
|
329
|
+
```
|
330
|
+
|
331
|
+
### Circuit Breaker Pattern
|
332
|
+
|
333
|
+
```ruby
|
334
|
+
class CircuitBreaker
|
335
|
+
def initialize(options = {})
|
336
|
+
@threshold = options[:threshold] || 5
|
337
|
+
@timeout = options[:timeout] || 60
|
338
|
+
@half_open_requests = options[:half_open_requests] || 1
|
339
|
+
|
340
|
+
@failure_count = 0
|
341
|
+
@last_failure_time = nil
|
342
|
+
@state = :closed
|
343
|
+
@half_open_attempts = 0
|
344
|
+
@mutex = Mutex.new
|
345
|
+
end
|
346
|
+
|
347
|
+
def call
|
348
|
+
@mutex.synchronize do
|
349
|
+
case @state
|
350
|
+
when :open
|
351
|
+
if can_attempt_reset?
|
352
|
+
@state = :half_open
|
353
|
+
@half_open_attempts = 0
|
354
|
+
else
|
355
|
+
raise CircuitOpenError, "Circuit breaker is open"
|
356
|
+
end
|
357
|
+
when :half_open
|
358
|
+
if @half_open_attempts >= @half_open_requests
|
359
|
+
raise CircuitOpenError, "Circuit breaker is half-open, limit reached"
|
360
|
+
end
|
361
|
+
@half_open_attempts += 1
|
362
|
+
end
|
363
|
+
end
|
364
|
+
|
365
|
+
# Try the operation
|
366
|
+
result = yield
|
367
|
+
|
368
|
+
# Success - update state
|
369
|
+
@mutex.synchronize do
|
370
|
+
@failure_count = 0
|
371
|
+
@state = :closed if @state == :half_open
|
372
|
+
end
|
373
|
+
|
374
|
+
result
|
375
|
+
|
376
|
+
rescue => e
|
377
|
+
@mutex.synchronize do
|
378
|
+
@failure_count += 1
|
379
|
+
@last_failure_time = Time.now
|
380
|
+
|
381
|
+
if @failure_count >= @threshold
|
382
|
+
@state = :open
|
383
|
+
log :warning, "Circuit breaker opened", failures: @failure_count
|
384
|
+
end
|
385
|
+
end
|
386
|
+
|
387
|
+
raise
|
388
|
+
end
|
389
|
+
|
390
|
+
def state
|
391
|
+
@mutex.synchronize { @state }
|
392
|
+
end
|
393
|
+
|
394
|
+
def reset
|
395
|
+
@mutex.synchronize do
|
396
|
+
@failure_count = 0
|
397
|
+
@state = :closed
|
398
|
+
@last_failure_time = nil
|
399
|
+
end
|
400
|
+
end
|
401
|
+
|
402
|
+
private
|
403
|
+
|
404
|
+
def can_attempt_reset?
|
405
|
+
return false unless @last_failure_time
|
406
|
+
Time.now - @last_failure_time >= @timeout
|
407
|
+
end
|
408
|
+
end
|
409
|
+
|
410
|
+
class CircuitOpenError < StandardError; end
|
411
|
+
|
412
|
+
# Usage in tools
|
413
|
+
class ProtectedTool < Tsikol::Tool
|
414
|
+
def initialize
|
415
|
+
super
|
416
|
+
@circuit_breaker = CircuitBreaker.new(threshold: 3, timeout: 30)
|
417
|
+
end
|
418
|
+
|
419
|
+
def execute(query:)
|
420
|
+
@circuit_breaker.call do
|
421
|
+
# Potentially failing operation
|
422
|
+
external_api_call(query)
|
423
|
+
end
|
424
|
+
rescue CircuitOpenError => e
|
425
|
+
log :warning, "Circuit breaker prevented call"
|
426
|
+
"Service temporarily unavailable. Please try again later."
|
427
|
+
end
|
428
|
+
end
|
429
|
+
```
|
430
|
+
|
431
|
+
## Graceful Degradation
|
432
|
+
|
433
|
+
### Feature Flags and Fallbacks
|
434
|
+
|
435
|
+
```ruby
|
436
|
+
class DegradableService
|
437
|
+
def initialize(options = {})
|
438
|
+
@features = options[:features] || {}
|
439
|
+
@fallbacks = options[:fallbacks] || {}
|
440
|
+
end
|
441
|
+
|
442
|
+
def execute(operation, params)
|
443
|
+
if feature_enabled?(operation)
|
444
|
+
execute_with_fallback(operation, params)
|
445
|
+
else
|
446
|
+
"Feature '#{operation}' is currently disabled"
|
447
|
+
end
|
448
|
+
end
|
449
|
+
|
450
|
+
private
|
451
|
+
|
452
|
+
def feature_enabled?(operation)
|
453
|
+
@features.fetch(operation, true)
|
454
|
+
end
|
455
|
+
|
456
|
+
def execute_with_fallback(operation, params)
|
457
|
+
primary_handler(operation, params)
|
458
|
+
rescue => e
|
459
|
+
log :warning, "Primary handler failed, using fallback",
|
460
|
+
operation: operation, error: e.message
|
461
|
+
|
462
|
+
fallback = @fallbacks[operation]
|
463
|
+
if fallback
|
464
|
+
fallback.call(params)
|
465
|
+
else
|
466
|
+
raise
|
467
|
+
end
|
468
|
+
end
|
469
|
+
|
470
|
+
def primary_handler(operation, params)
|
471
|
+
case operation
|
472
|
+
when :search
|
473
|
+
perform_full_search(params)
|
474
|
+
when :analyze
|
475
|
+
perform_deep_analysis(params)
|
476
|
+
else
|
477
|
+
raise NotImplementedError, "Unknown operation: #{operation}"
|
478
|
+
end
|
479
|
+
end
|
480
|
+
end
|
481
|
+
|
482
|
+
# Configuration
|
483
|
+
service = DegradableService.new(
|
484
|
+
features: {
|
485
|
+
search: true,
|
486
|
+
analyze: false # Disabled due to high load
|
487
|
+
},
|
488
|
+
fallbacks: {
|
489
|
+
search: ->(params) { perform_basic_search(params) },
|
490
|
+
analyze: ->(params) { "Analysis temporarily unavailable" }
|
491
|
+
}
|
492
|
+
)
|
493
|
+
```
|
494
|
+
|
495
|
+
### Progressive Enhancement
|
496
|
+
|
497
|
+
```ruby
|
498
|
+
class ProgressiveResource < Tsikol::Resource
|
499
|
+
uri "data/progressive"
|
500
|
+
description "Resource with progressive data quality"
|
501
|
+
|
502
|
+
def read
|
503
|
+
start_time = Time.now
|
504
|
+
timeout = 5.0 # 5 second budget
|
505
|
+
|
506
|
+
data = {
|
507
|
+
basic: fetch_basic_data,
|
508
|
+
timestamp: Time.now.iso8601
|
509
|
+
}
|
510
|
+
|
511
|
+
# Progressively add more data if time permits
|
512
|
+
if time_remaining(start_time, timeout) > 2
|
513
|
+
begin
|
514
|
+
data[:enhanced] = fetch_enhanced_data
|
515
|
+
rescue => e
|
516
|
+
log :warning, "Failed to fetch enhanced data", error: e.message
|
517
|
+
data[:enhanced] = { error: "Unavailable" }
|
518
|
+
end
|
519
|
+
end
|
520
|
+
|
521
|
+
if time_remaining(start_time, timeout) > 1
|
522
|
+
begin
|
523
|
+
data[:premium] = fetch_premium_data
|
524
|
+
rescue => e
|
525
|
+
log :warning, "Failed to fetch premium data", error: e.message
|
526
|
+
end
|
527
|
+
end
|
528
|
+
|
529
|
+
data.to_json
|
530
|
+
end
|
531
|
+
|
532
|
+
private
|
533
|
+
|
534
|
+
def time_remaining(start_time, timeout)
|
535
|
+
timeout - (Time.now - start_time)
|
536
|
+
end
|
537
|
+
|
538
|
+
def fetch_basic_data
|
539
|
+
# Always available, cached data
|
540
|
+
{ status: "operational", users: 1000 }
|
541
|
+
end
|
542
|
+
|
543
|
+
def fetch_enhanced_data
|
544
|
+
# Might fail or be slow
|
545
|
+
sleep(1) # Simulate work
|
546
|
+
{ metrics: calculate_metrics, trends: analyze_trends }
|
547
|
+
end
|
548
|
+
|
549
|
+
def fetch_premium_data
|
550
|
+
# Expensive operation
|
551
|
+
sleep(2) # Simulate work
|
552
|
+
{ predictions: generate_predictions }
|
553
|
+
end
|
554
|
+
end
|
555
|
+
```
|
556
|
+
|
557
|
+
## Error Recovery Strategies
|
558
|
+
|
559
|
+
### Retry with Backoff
|
560
|
+
|
561
|
+
```ruby
|
562
|
+
class RetryableOperation
|
563
|
+
def self.execute(options = {})
|
564
|
+
max_attempts = options[:max_attempts] || 3
|
565
|
+
base_delay = options[:base_delay] || 1
|
566
|
+
max_delay = options[:max_delay] || 30
|
567
|
+
multiplier = options[:multiplier] || 2
|
568
|
+
jitter = options[:jitter] || true
|
569
|
+
|
570
|
+
attempt = 0
|
571
|
+
last_error = nil
|
572
|
+
|
573
|
+
loop do
|
574
|
+
attempt += 1
|
575
|
+
|
576
|
+
begin
|
577
|
+
return yield(attempt)
|
578
|
+
rescue => e
|
579
|
+
last_error = e
|
580
|
+
|
581
|
+
if attempt >= max_attempts || !retryable_error?(e)
|
582
|
+
raise last_error
|
583
|
+
end
|
584
|
+
|
585
|
+
delay = calculate_delay(attempt, base_delay, max_delay, multiplier, jitter)
|
586
|
+
log :info, "Retrying after error",
|
587
|
+
attempt: attempt,
|
588
|
+
delay: delay,
|
589
|
+
error: e.message
|
590
|
+
|
591
|
+
sleep(delay)
|
592
|
+
end
|
593
|
+
end
|
594
|
+
end
|
595
|
+
|
596
|
+
private
|
597
|
+
|
598
|
+
def self.retryable_error?(error)
|
599
|
+
case error
|
600
|
+
when Net::ReadTimeout, Net::OpenTimeout, Timeout::Error
|
601
|
+
true
|
602
|
+
when Redis::BaseError
|
603
|
+
true
|
604
|
+
when StandardError
|
605
|
+
# Check for specific error messages
|
606
|
+
error.message.match?(/temporary|timeout|unavailable/i)
|
607
|
+
else
|
608
|
+
false
|
609
|
+
end
|
610
|
+
end
|
611
|
+
|
612
|
+
def self.calculate_delay(attempt, base, max, multiplier, jitter)
|
613
|
+
delay = [base * (multiplier ** (attempt - 1)), max].min
|
614
|
+
|
615
|
+
if jitter
|
616
|
+
# Add random jitter (±25%)
|
617
|
+
delay = delay * (0.75 + rand * 0.5)
|
618
|
+
end
|
619
|
+
|
620
|
+
delay
|
621
|
+
end
|
622
|
+
end
|
623
|
+
|
624
|
+
# Usage
|
625
|
+
class RobustTool < Tsikol::Tool
|
626
|
+
def execute(params)
|
627
|
+
RetryableOperation.execute(max_attempts: 5, base_delay: 0.5) do |attempt|
|
628
|
+
log :debug, "Attempting operation", attempt: attempt
|
629
|
+
perform_operation(params)
|
630
|
+
end
|
631
|
+
end
|
632
|
+
end
|
633
|
+
```
|
634
|
+
|
635
|
+
### Bulkhead Pattern
|
636
|
+
|
637
|
+
```ruby
|
638
|
+
class BulkheadExecutor
|
639
|
+
def initialize(options = {})
|
640
|
+
@pools = {}
|
641
|
+
@default_pool_size = options[:default_pool_size] || 10
|
642
|
+
@reject_policy = options[:reject_policy] || :abort
|
643
|
+
end
|
644
|
+
|
645
|
+
def execute(pool_name, &block)
|
646
|
+
pool = get_or_create_pool(pool_name)
|
647
|
+
|
648
|
+
case @reject_policy
|
649
|
+
when :abort
|
650
|
+
pool.post(&block)
|
651
|
+
when :caller_runs
|
652
|
+
if pool.queue_length >= pool.max_queue
|
653
|
+
# Run in caller thread if pool is full
|
654
|
+
yield
|
655
|
+
else
|
656
|
+
pool.post(&block)
|
657
|
+
end
|
658
|
+
when :discard
|
659
|
+
# Silently discard if pool is full
|
660
|
+
pool.post(&block) rescue nil
|
661
|
+
end
|
662
|
+
rescue Concurrent::RejectedExecutionError
|
663
|
+
raise BulkheadFullError, "Pool '#{pool_name}' is at capacity"
|
664
|
+
end
|
665
|
+
|
666
|
+
def shutdown
|
667
|
+
@pools.each do |name, pool|
|
668
|
+
pool.shutdown
|
669
|
+
pool.wait_for_termination(5)
|
670
|
+
end
|
671
|
+
end
|
672
|
+
|
673
|
+
private
|
674
|
+
|
675
|
+
def get_or_create_pool(name)
|
676
|
+
@pools[name] ||= Concurrent::ThreadPoolExecutor.new(
|
677
|
+
min_threads: 2,
|
678
|
+
max_threads: @default_pool_size,
|
679
|
+
max_queue: @default_pool_size * 2,
|
680
|
+
fallback_policy: @reject_policy
|
681
|
+
)
|
682
|
+
end
|
683
|
+
end
|
684
|
+
|
685
|
+
class BulkheadFullError < StandardError; end
|
686
|
+
|
687
|
+
# Usage - isolate different operations
|
688
|
+
bulkhead = BulkheadExecutor.new(default_pool_size: 5)
|
689
|
+
|
690
|
+
# Critical operations get their own pool
|
691
|
+
bulkhead.execute(:critical) do
|
692
|
+
process_payment(order)
|
693
|
+
end
|
694
|
+
|
695
|
+
# Less critical operations share a pool
|
696
|
+
bulkhead.execute(:background) do
|
697
|
+
send_notification(user)
|
698
|
+
end
|
699
|
+
```
|
700
|
+
|
701
|
+
## Error Reporting
|
702
|
+
|
703
|
+
### Structured Error Reporter
|
704
|
+
|
705
|
+
```ruby
|
706
|
+
class ErrorReporter
|
707
|
+
def initialize(options = {})
|
708
|
+
@service_name = options[:service_name] || "mcp-server"
|
709
|
+
@environment = options[:environment] || "production"
|
710
|
+
@reporters = []
|
711
|
+
end
|
712
|
+
|
713
|
+
def add_reporter(reporter)
|
714
|
+
@reporters << reporter
|
715
|
+
end
|
716
|
+
|
717
|
+
def report(error, context = {})
|
718
|
+
error_data = build_error_data(error, context)
|
719
|
+
|
720
|
+
@reporters.each do |reporter|
|
721
|
+
Thread.new do
|
722
|
+
begin
|
723
|
+
reporter.report(error_data)
|
724
|
+
rescue => e
|
725
|
+
# Don't let reporter errors affect main flow
|
726
|
+
log :error, "Reporter failed",
|
727
|
+
reporter: reporter.class.name,
|
728
|
+
error: e.message
|
729
|
+
end
|
730
|
+
end
|
731
|
+
end
|
732
|
+
end
|
733
|
+
|
734
|
+
private
|
735
|
+
|
736
|
+
def build_error_data(error, context)
|
737
|
+
{
|
738
|
+
service: @service_name,
|
739
|
+
environment: @environment,
|
740
|
+
error: {
|
741
|
+
class: error.class.name,
|
742
|
+
message: error.message,
|
743
|
+
backtrace: error.backtrace&.first(20)
|
744
|
+
},
|
745
|
+
context: context.merge(
|
746
|
+
timestamp: Time.now.iso8601,
|
747
|
+
thread_id: Thread.current.object_id,
|
748
|
+
process_id: Process.pid
|
749
|
+
),
|
750
|
+
system: {
|
751
|
+
ruby_version: RUBY_VERSION,
|
752
|
+
platform: RUBY_PLATFORM,
|
753
|
+
memory_usage: memory_usage
|
754
|
+
}
|
755
|
+
}
|
756
|
+
end
|
757
|
+
|
758
|
+
def memory_usage
|
759
|
+
`ps -o rss= -p #{Process.pid}`.to_i / 1024 rescue nil
|
760
|
+
end
|
761
|
+
end
|
762
|
+
|
763
|
+
# Example reporters
|
764
|
+
class LogReporter
|
765
|
+
def report(error_data)
|
766
|
+
log :error, "Error reported", data: error_data
|
767
|
+
end
|
768
|
+
end
|
769
|
+
|
770
|
+
class SentryReporter
|
771
|
+
def report(error_data)
|
772
|
+
Sentry.capture_exception(
|
773
|
+
StandardError.new(error_data[:error][:message]),
|
774
|
+
extra: error_data
|
775
|
+
)
|
776
|
+
end
|
777
|
+
end
|
778
|
+
|
779
|
+
# Setup
|
780
|
+
error_reporter = ErrorReporter.new(
|
781
|
+
service_name: "my-mcp-server",
|
782
|
+
environment: ENV['RACK_ENV']
|
783
|
+
)
|
784
|
+
error_reporter.add_reporter(LogReporter.new)
|
785
|
+
error_reporter.add_reporter(SentryReporter.new) if ENV['SENTRY_DSN']
|
786
|
+
```
|
787
|
+
|
788
|
+
## Testing Error Handling
|
789
|
+
|
790
|
+
```ruby
|
791
|
+
require 'minitest/autorun'
|
792
|
+
|
793
|
+
class ErrorHandlingTest < Minitest::Test
|
794
|
+
def setup
|
795
|
+
@server = Tsikol::Server.new(name: "test")
|
796
|
+
@server.use ErrorHandlingMiddleware, include_backtrace: true
|
797
|
+
@server.register_tool_instance(FaultyTool.new)
|
798
|
+
@client = Tsikol::TestHelpers::TestClient.new(@server)
|
799
|
+
end
|
800
|
+
|
801
|
+
def test_handles_validation_errors
|
802
|
+
response = @client.call_tool("faulty_tool", {
|
803
|
+
"action" => "validate",
|
804
|
+
"input" => ""
|
805
|
+
})
|
806
|
+
|
807
|
+
assert_error_response(response, -32602)
|
808
|
+
assert_match /Invalid params/, response[:error][:message]
|
809
|
+
assert_equal :validation, response[:error][:data][:category]
|
810
|
+
end
|
811
|
+
|
812
|
+
def test_retries_transient_failures
|
813
|
+
response = @client.call_tool("faulty_tool", {
|
814
|
+
"action" => "transient",
|
815
|
+
"fail_times" => 2
|
816
|
+
})
|
817
|
+
|
818
|
+
# Should succeed after retries
|
819
|
+
assert_successful_response(response)
|
820
|
+
end
|
821
|
+
|
822
|
+
def test_circuit_breaker_opens
|
823
|
+
# Make requests fail repeatedly
|
824
|
+
3.times do
|
825
|
+
@client.call_tool("faulty_tool", {
|
826
|
+
"action" => "external",
|
827
|
+
"should_fail" => true
|
828
|
+
})
|
829
|
+
end
|
830
|
+
|
831
|
+
# Circuit should be open
|
832
|
+
response = @client.call_tool("faulty_tool", {
|
833
|
+
"action" => "external",
|
834
|
+
"should_fail" => false
|
835
|
+
})
|
836
|
+
|
837
|
+
assert_error_response(response)
|
838
|
+
assert_match /circuit.*open/i, response[:error][:message]
|
839
|
+
end
|
840
|
+
end
|
841
|
+
|
842
|
+
class FaultyTool < Tsikol::Tool
|
843
|
+
def initialize
|
844
|
+
super
|
845
|
+
@attempt_count = 0
|
846
|
+
@circuit_breaker = CircuitBreaker.new(threshold: 3)
|
847
|
+
end
|
848
|
+
|
849
|
+
def execute(action:, **params)
|
850
|
+
case action
|
851
|
+
when "validate"
|
852
|
+
raise ValidationError, "Input cannot be empty" if params[:input].empty?
|
853
|
+
when "transient"
|
854
|
+
@attempt_count += 1
|
855
|
+
if @attempt_count <= params[:fail_times]
|
856
|
+
raise ExternalServiceError, "Transient failure"
|
857
|
+
end
|
858
|
+
"Success after #{@attempt_count} attempts"
|
859
|
+
when "external"
|
860
|
+
@circuit_breaker.call do
|
861
|
+
if params[:should_fail]
|
862
|
+
raise ExternalServiceError, "External service failed"
|
863
|
+
end
|
864
|
+
"External call succeeded"
|
865
|
+
end
|
866
|
+
end
|
867
|
+
end
|
868
|
+
end
|
869
|
+
```
|
870
|
+
|
871
|
+
## Best Practices
|
872
|
+
|
873
|
+
1. **Fail fast** for unrecoverable errors
|
874
|
+
2. **Retry with backoff** for transient failures
|
875
|
+
3. **Use circuit breakers** for external dependencies
|
876
|
+
4. **Provide fallbacks** for degraded functionality
|
877
|
+
5. **Log appropriately** - don't flood logs with expected errors
|
878
|
+
6. **Sanitize error messages** - remove sensitive data
|
879
|
+
7. **Monitor error rates** and set up alerts
|
880
|
+
8. **Test error paths** as thoroughly as happy paths
|
881
|
+
|
882
|
+
## Next Steps
|
883
|
+
|
884
|
+
- Implement [Logging](logging.md) for error tracking
|
885
|
+
- Add [Monitoring](monitoring.md) for error metrics
|
886
|
+
- Review [Testing](../guides/testing.md) error scenarios
|
887
|
+
- Set up [Authentication](authentication.md) error handling
|