foundry-mcp 0.8.22__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of foundry-mcp might be problematic. Click here for more details.
- foundry_mcp/__init__.py +13 -0
- foundry_mcp/cli/__init__.py +67 -0
- foundry_mcp/cli/__main__.py +9 -0
- foundry_mcp/cli/agent.py +96 -0
- foundry_mcp/cli/commands/__init__.py +37 -0
- foundry_mcp/cli/commands/cache.py +137 -0
- foundry_mcp/cli/commands/dashboard.py +148 -0
- foundry_mcp/cli/commands/dev.py +446 -0
- foundry_mcp/cli/commands/journal.py +377 -0
- foundry_mcp/cli/commands/lifecycle.py +274 -0
- foundry_mcp/cli/commands/modify.py +824 -0
- foundry_mcp/cli/commands/plan.py +640 -0
- foundry_mcp/cli/commands/pr.py +393 -0
- foundry_mcp/cli/commands/review.py +667 -0
- foundry_mcp/cli/commands/session.py +472 -0
- foundry_mcp/cli/commands/specs.py +686 -0
- foundry_mcp/cli/commands/tasks.py +807 -0
- foundry_mcp/cli/commands/testing.py +676 -0
- foundry_mcp/cli/commands/validate.py +982 -0
- foundry_mcp/cli/config.py +98 -0
- foundry_mcp/cli/context.py +298 -0
- foundry_mcp/cli/logging.py +212 -0
- foundry_mcp/cli/main.py +44 -0
- foundry_mcp/cli/output.py +122 -0
- foundry_mcp/cli/registry.py +110 -0
- foundry_mcp/cli/resilience.py +178 -0
- foundry_mcp/cli/transcript.py +217 -0
- foundry_mcp/config.py +1454 -0
- foundry_mcp/core/__init__.py +144 -0
- foundry_mcp/core/ai_consultation.py +1773 -0
- foundry_mcp/core/batch_operations.py +1202 -0
- foundry_mcp/core/cache.py +195 -0
- foundry_mcp/core/capabilities.py +446 -0
- foundry_mcp/core/concurrency.py +898 -0
- foundry_mcp/core/context.py +540 -0
- foundry_mcp/core/discovery.py +1603 -0
- foundry_mcp/core/error_collection.py +728 -0
- foundry_mcp/core/error_store.py +592 -0
- foundry_mcp/core/health.py +749 -0
- foundry_mcp/core/intake.py +933 -0
- foundry_mcp/core/journal.py +700 -0
- foundry_mcp/core/lifecycle.py +412 -0
- foundry_mcp/core/llm_config.py +1376 -0
- foundry_mcp/core/llm_patterns.py +510 -0
- foundry_mcp/core/llm_provider.py +1569 -0
- foundry_mcp/core/logging_config.py +374 -0
- foundry_mcp/core/metrics_persistence.py +584 -0
- foundry_mcp/core/metrics_registry.py +327 -0
- foundry_mcp/core/metrics_store.py +641 -0
- foundry_mcp/core/modifications.py +224 -0
- foundry_mcp/core/naming.py +146 -0
- foundry_mcp/core/observability.py +1216 -0
- foundry_mcp/core/otel.py +452 -0
- foundry_mcp/core/otel_stubs.py +264 -0
- foundry_mcp/core/pagination.py +255 -0
- foundry_mcp/core/progress.py +387 -0
- foundry_mcp/core/prometheus.py +564 -0
- foundry_mcp/core/prompts/__init__.py +464 -0
- foundry_mcp/core/prompts/fidelity_review.py +691 -0
- foundry_mcp/core/prompts/markdown_plan_review.py +515 -0
- foundry_mcp/core/prompts/plan_review.py +627 -0
- foundry_mcp/core/providers/__init__.py +237 -0
- foundry_mcp/core/providers/base.py +515 -0
- foundry_mcp/core/providers/claude.py +472 -0
- foundry_mcp/core/providers/codex.py +637 -0
- foundry_mcp/core/providers/cursor_agent.py +630 -0
- foundry_mcp/core/providers/detectors.py +515 -0
- foundry_mcp/core/providers/gemini.py +426 -0
- foundry_mcp/core/providers/opencode.py +718 -0
- foundry_mcp/core/providers/opencode_wrapper.js +308 -0
- foundry_mcp/core/providers/package-lock.json +24 -0
- foundry_mcp/core/providers/package.json +25 -0
- foundry_mcp/core/providers/registry.py +607 -0
- foundry_mcp/core/providers/test_provider.py +171 -0
- foundry_mcp/core/providers/validation.py +857 -0
- foundry_mcp/core/rate_limit.py +427 -0
- foundry_mcp/core/research/__init__.py +68 -0
- foundry_mcp/core/research/memory.py +528 -0
- foundry_mcp/core/research/models.py +1234 -0
- foundry_mcp/core/research/providers/__init__.py +40 -0
- foundry_mcp/core/research/providers/base.py +242 -0
- foundry_mcp/core/research/providers/google.py +507 -0
- foundry_mcp/core/research/providers/perplexity.py +442 -0
- foundry_mcp/core/research/providers/semantic_scholar.py +544 -0
- foundry_mcp/core/research/providers/tavily.py +383 -0
- foundry_mcp/core/research/workflows/__init__.py +25 -0
- foundry_mcp/core/research/workflows/base.py +298 -0
- foundry_mcp/core/research/workflows/chat.py +271 -0
- foundry_mcp/core/research/workflows/consensus.py +539 -0
- foundry_mcp/core/research/workflows/deep_research.py +4142 -0
- foundry_mcp/core/research/workflows/ideate.py +682 -0
- foundry_mcp/core/research/workflows/thinkdeep.py +405 -0
- foundry_mcp/core/resilience.py +600 -0
- foundry_mcp/core/responses.py +1624 -0
- foundry_mcp/core/review.py +366 -0
- foundry_mcp/core/security.py +438 -0
- foundry_mcp/core/spec.py +4119 -0
- foundry_mcp/core/task.py +2463 -0
- foundry_mcp/core/testing.py +839 -0
- foundry_mcp/core/validation.py +2357 -0
- foundry_mcp/dashboard/__init__.py +32 -0
- foundry_mcp/dashboard/app.py +119 -0
- foundry_mcp/dashboard/components/__init__.py +17 -0
- foundry_mcp/dashboard/components/cards.py +88 -0
- foundry_mcp/dashboard/components/charts.py +177 -0
- foundry_mcp/dashboard/components/filters.py +136 -0
- foundry_mcp/dashboard/components/tables.py +195 -0
- foundry_mcp/dashboard/data/__init__.py +11 -0
- foundry_mcp/dashboard/data/stores.py +433 -0
- foundry_mcp/dashboard/launcher.py +300 -0
- foundry_mcp/dashboard/views/__init__.py +12 -0
- foundry_mcp/dashboard/views/errors.py +217 -0
- foundry_mcp/dashboard/views/metrics.py +164 -0
- foundry_mcp/dashboard/views/overview.py +96 -0
- foundry_mcp/dashboard/views/providers.py +83 -0
- foundry_mcp/dashboard/views/sdd_workflow.py +255 -0
- foundry_mcp/dashboard/views/tool_usage.py +139 -0
- foundry_mcp/prompts/__init__.py +9 -0
- foundry_mcp/prompts/workflows.py +525 -0
- foundry_mcp/resources/__init__.py +9 -0
- foundry_mcp/resources/specs.py +591 -0
- foundry_mcp/schemas/__init__.py +38 -0
- foundry_mcp/schemas/intake-schema.json +89 -0
- foundry_mcp/schemas/sdd-spec-schema.json +414 -0
- foundry_mcp/server.py +150 -0
- foundry_mcp/tools/__init__.py +10 -0
- foundry_mcp/tools/unified/__init__.py +92 -0
- foundry_mcp/tools/unified/authoring.py +3620 -0
- foundry_mcp/tools/unified/context_helpers.py +98 -0
- foundry_mcp/tools/unified/documentation_helpers.py +268 -0
- foundry_mcp/tools/unified/environment.py +1341 -0
- foundry_mcp/tools/unified/error.py +479 -0
- foundry_mcp/tools/unified/health.py +225 -0
- foundry_mcp/tools/unified/journal.py +841 -0
- foundry_mcp/tools/unified/lifecycle.py +640 -0
- foundry_mcp/tools/unified/metrics.py +777 -0
- foundry_mcp/tools/unified/plan.py +876 -0
- foundry_mcp/tools/unified/pr.py +294 -0
- foundry_mcp/tools/unified/provider.py +589 -0
- foundry_mcp/tools/unified/research.py +1283 -0
- foundry_mcp/tools/unified/review.py +1042 -0
- foundry_mcp/tools/unified/review_helpers.py +314 -0
- foundry_mcp/tools/unified/router.py +102 -0
- foundry_mcp/tools/unified/server.py +565 -0
- foundry_mcp/tools/unified/spec.py +1283 -0
- foundry_mcp/tools/unified/task.py +3846 -0
- foundry_mcp/tools/unified/test.py +431 -0
- foundry_mcp/tools/unified/verification.py +520 -0
- foundry_mcp-0.8.22.dist-info/METADATA +344 -0
- foundry_mcp-0.8.22.dist-info/RECORD +153 -0
- foundry_mcp-0.8.22.dist-info/WHEEL +4 -0
- foundry_mcp-0.8.22.dist-info/entry_points.txt +3 -0
- foundry_mcp-0.8.22.dist-info/licenses/LICENSE +21 -0
|
@@ -0,0 +1,857 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Cross-provider validation, observability hooks, and resilience patterns.
|
|
3
|
+
|
|
4
|
+
Centralizes ProviderRequest validation, input hygiene, observability hooks,
|
|
5
|
+
and rate limiting consistent with docs/mcp_best_practices/{04,05,12,15}.
|
|
6
|
+
"""
|
|
7
|
+
|
|
8
|
+
from __future__ import annotations
|
|
9
|
+
|
|
10
|
+
import logging
|
|
11
|
+
import re
|
|
12
|
+
import time
|
|
13
|
+
import uuid
|
|
14
|
+
from dataclasses import dataclass, field
|
|
15
|
+
from datetime import datetime
|
|
16
|
+
from enum import Enum
|
|
17
|
+
from functools import wraps
|
|
18
|
+
from threading import Lock
|
|
19
|
+
from typing import Any, Callable, Dict, Optional, Set, TypeVar
|
|
20
|
+
|
|
21
|
+
from .base import (
|
|
22
|
+
ProviderError,
|
|
23
|
+
ProviderExecutionError,
|
|
24
|
+
ProviderRequest,
|
|
25
|
+
ProviderResult,
|
|
26
|
+
ProviderStatus,
|
|
27
|
+
ProviderTimeoutError,
|
|
28
|
+
)
|
|
29
|
+
|
|
30
|
+
# ---------------------------------------------------------------------------
|
|
31
|
+
# Logging Configuration (per docs/mcp_best_practices/05-observability-telemetry.md)
|
|
32
|
+
# ---------------------------------------------------------------------------
|
|
33
|
+
|
|
34
|
+
logger = logging.getLogger("foundry_mcp.providers")
|
|
35
|
+
|
|
36
|
+
# ANSI escape sequence pattern for stripping from inputs
|
|
37
|
+
ANSI_ESCAPE_PATTERN = re.compile(r"\x1b\[[0-9;]*[mGKHF]")
|
|
38
|
+
|
|
39
|
+
# Maximum prompt length (characters) to prevent token explosion
|
|
40
|
+
MAX_PROMPT_LENGTH = 500_000 # ~125k tokens at 4 chars/token
|
|
41
|
+
|
|
42
|
+
# Maximum metadata size (bytes) for logging/storage
|
|
43
|
+
MAX_METADATA_SIZE = 64 * 1024 # 64KB
|
|
44
|
+
|
|
45
|
+
|
|
46
|
+
# ---------------------------------------------------------------------------
|
|
47
|
+
# Input Hygiene (per docs/mcp_best_practices/04-validation-input-hygiene.md)
|
|
48
|
+
# ---------------------------------------------------------------------------
|
|
49
|
+
|
|
50
|
+
|
|
51
|
+
class ValidationError(ProviderExecutionError):
|
|
52
|
+
"""Raised when input validation fails."""
|
|
53
|
+
|
|
54
|
+
def __init__(self, message: str, field: Optional[str] = None, **kwargs: object) -> None:
|
|
55
|
+
self.field = field
|
|
56
|
+
super().__init__(message, **kwargs)
|
|
57
|
+
|
|
58
|
+
|
|
59
|
+
def strip_ansi(text: str) -> str:
|
|
60
|
+
"""Remove ANSI escape sequences from text."""
|
|
61
|
+
if not text:
|
|
62
|
+
return text
|
|
63
|
+
return ANSI_ESCAPE_PATTERN.sub("", text)
|
|
64
|
+
|
|
65
|
+
|
|
66
|
+
def ensure_utf8(text: str) -> str:
|
|
67
|
+
"""Ensure text is valid UTF-8, replacing invalid sequences."""
|
|
68
|
+
if not text:
|
|
69
|
+
return text
|
|
70
|
+
# Encode to bytes and decode back, replacing invalid chars
|
|
71
|
+
return text.encode("utf-8", errors="replace").decode("utf-8")
|
|
72
|
+
|
|
73
|
+
|
|
74
|
+
def sanitize_prompt(prompt: str) -> str:
|
|
75
|
+
"""
|
|
76
|
+
Sanitize prompt text for safe subprocess execution.
|
|
77
|
+
|
|
78
|
+
- Ensures valid UTF-8 encoding
|
|
79
|
+
- Strips ANSI escape sequences
|
|
80
|
+
- Validates length limits
|
|
81
|
+
"""
|
|
82
|
+
if not prompt:
|
|
83
|
+
raise ValidationError("Prompt cannot be empty", field="prompt")
|
|
84
|
+
|
|
85
|
+
# Ensure UTF-8
|
|
86
|
+
clean = ensure_utf8(prompt)
|
|
87
|
+
|
|
88
|
+
# Strip ANSI sequences
|
|
89
|
+
clean = strip_ansi(clean)
|
|
90
|
+
|
|
91
|
+
# Check length
|
|
92
|
+
if len(clean) > MAX_PROMPT_LENGTH:
|
|
93
|
+
raise ValidationError(
|
|
94
|
+
f"Prompt exceeds maximum length ({len(clean)} > {MAX_PROMPT_LENGTH})",
|
|
95
|
+
field="prompt",
|
|
96
|
+
)
|
|
97
|
+
|
|
98
|
+
return clean
|
|
99
|
+
|
|
100
|
+
|
|
101
|
+
def validate_request(request: ProviderRequest) -> ProviderRequest:
|
|
102
|
+
"""
|
|
103
|
+
Validate and sanitize a ProviderRequest before execution.
|
|
104
|
+
|
|
105
|
+
Performs validation in order per best practices:
|
|
106
|
+
1. Required fields present
|
|
107
|
+
2. Type correctness (implicit via dataclass)
|
|
108
|
+
3. Format validation
|
|
109
|
+
4. Business rules
|
|
110
|
+
"""
|
|
111
|
+
# 1. Required fields
|
|
112
|
+
if not request.prompt:
|
|
113
|
+
raise ValidationError("Prompt is required", field="prompt")
|
|
114
|
+
|
|
115
|
+
# 3. Format validation - sanitize prompt
|
|
116
|
+
sanitized_prompt = sanitize_prompt(request.prompt)
|
|
117
|
+
|
|
118
|
+
# Sanitize system prompt if present
|
|
119
|
+
sanitized_system = None
|
|
120
|
+
if request.system_prompt:
|
|
121
|
+
sanitized_system = sanitize_prompt(request.system_prompt)
|
|
122
|
+
|
|
123
|
+
# 4. Business rules - validate numeric parameters
|
|
124
|
+
if request.temperature is not None:
|
|
125
|
+
if not (0.0 <= request.temperature <= 2.0):
|
|
126
|
+
raise ValidationError(
|
|
127
|
+
f"Temperature must be between 0.0 and 2.0, got {request.temperature}",
|
|
128
|
+
field="temperature",
|
|
129
|
+
)
|
|
130
|
+
|
|
131
|
+
if request.max_tokens is not None:
|
|
132
|
+
if request.max_tokens < 1:
|
|
133
|
+
raise ValidationError(
|
|
134
|
+
f"max_tokens must be positive, got {request.max_tokens}",
|
|
135
|
+
field="max_tokens",
|
|
136
|
+
)
|
|
137
|
+
|
|
138
|
+
if request.timeout is not None:
|
|
139
|
+
if request.timeout < 0:
|
|
140
|
+
raise ValidationError(
|
|
141
|
+
f"timeout must be non-negative, got {request.timeout}",
|
|
142
|
+
field="timeout",
|
|
143
|
+
)
|
|
144
|
+
|
|
145
|
+
# Return sanitized request (create new instance with sanitized fields)
|
|
146
|
+
return ProviderRequest(
|
|
147
|
+
prompt=sanitized_prompt,
|
|
148
|
+
system_prompt=sanitized_system,
|
|
149
|
+
stream=request.stream,
|
|
150
|
+
temperature=request.temperature,
|
|
151
|
+
max_tokens=request.max_tokens,
|
|
152
|
+
attachments=request.attachments,
|
|
153
|
+
timeout=request.timeout,
|
|
154
|
+
metadata=request.metadata,
|
|
155
|
+
)
|
|
156
|
+
|
|
157
|
+
|
|
158
|
+
# ---------------------------------------------------------------------------
|
|
159
|
+
# Command Allowlists (per docs/mcp_best_practices/04-validation-input-hygiene.md)
|
|
160
|
+
# ---------------------------------------------------------------------------
|
|
161
|
+
|
|
162
|
+
# Common read-only commands that are safe across all providers
|
|
163
|
+
COMMON_SAFE_COMMANDS: Set[str] = {
|
|
164
|
+
# File viewing
|
|
165
|
+
"cat",
|
|
166
|
+
"head",
|
|
167
|
+
"tail",
|
|
168
|
+
"bat",
|
|
169
|
+
"less",
|
|
170
|
+
"more",
|
|
171
|
+
# Directory listing
|
|
172
|
+
"ls",
|
|
173
|
+
"tree",
|
|
174
|
+
"pwd",
|
|
175
|
+
"which",
|
|
176
|
+
"whereis",
|
|
177
|
+
# Search
|
|
178
|
+
"grep",
|
|
179
|
+
"rg",
|
|
180
|
+
"ag",
|
|
181
|
+
"find",
|
|
182
|
+
"fd",
|
|
183
|
+
"locate",
|
|
184
|
+
# Git read-only
|
|
185
|
+
"git log",
|
|
186
|
+
"git show",
|
|
187
|
+
"git diff",
|
|
188
|
+
"git status",
|
|
189
|
+
"git grep",
|
|
190
|
+
"git blame",
|
|
191
|
+
"git branch",
|
|
192
|
+
"git rev-parse",
|
|
193
|
+
"git describe",
|
|
194
|
+
"git ls-tree",
|
|
195
|
+
# Text processing
|
|
196
|
+
"wc",
|
|
197
|
+
"cut",
|
|
198
|
+
"paste",
|
|
199
|
+
"column",
|
|
200
|
+
"sort",
|
|
201
|
+
"uniq",
|
|
202
|
+
# Data formats
|
|
203
|
+
"jq",
|
|
204
|
+
"yq",
|
|
205
|
+
# File analysis
|
|
206
|
+
"file",
|
|
207
|
+
"stat",
|
|
208
|
+
"du",
|
|
209
|
+
"df",
|
|
210
|
+
# Checksums
|
|
211
|
+
"md5sum",
|
|
212
|
+
"shasum",
|
|
213
|
+
"sha256sum",
|
|
214
|
+
"sha512sum",
|
|
215
|
+
}
|
|
216
|
+
|
|
217
|
+
# Commands that should never be allowed
|
|
218
|
+
BLOCKED_COMMANDS: Set[str] = {
|
|
219
|
+
# Destructive operations
|
|
220
|
+
"rm",
|
|
221
|
+
"rmdir",
|
|
222
|
+
"dd",
|
|
223
|
+
"mkfs",
|
|
224
|
+
"fdisk",
|
|
225
|
+
"shred",
|
|
226
|
+
# File modifications
|
|
227
|
+
"touch",
|
|
228
|
+
"mkdir",
|
|
229
|
+
"mv",
|
|
230
|
+
"cp",
|
|
231
|
+
"chmod",
|
|
232
|
+
"chown",
|
|
233
|
+
"chgrp",
|
|
234
|
+
# Text modification
|
|
235
|
+
"sed",
|
|
236
|
+
"awk",
|
|
237
|
+
"ed",
|
|
238
|
+
# Git write operations
|
|
239
|
+
"git add",
|
|
240
|
+
"git commit",
|
|
241
|
+
"git push",
|
|
242
|
+
"git pull",
|
|
243
|
+
"git merge",
|
|
244
|
+
"git rebase",
|
|
245
|
+
"git reset",
|
|
246
|
+
"git checkout",
|
|
247
|
+
"git stash",
|
|
248
|
+
# Package management
|
|
249
|
+
"npm",
|
|
250
|
+
"pip",
|
|
251
|
+
"apt",
|
|
252
|
+
"brew",
|
|
253
|
+
"yum",
|
|
254
|
+
"dnf",
|
|
255
|
+
# System operations
|
|
256
|
+
"sudo",
|
|
257
|
+
"su",
|
|
258
|
+
"halt",
|
|
259
|
+
"reboot",
|
|
260
|
+
"shutdown",
|
|
261
|
+
"kill",
|
|
262
|
+
"pkill",
|
|
263
|
+
# Network (data exfiltration risk)
|
|
264
|
+
"curl",
|
|
265
|
+
"wget",
|
|
266
|
+
"nc",
|
|
267
|
+
"netcat",
|
|
268
|
+
"ssh",
|
|
269
|
+
"scp",
|
|
270
|
+
"rsync",
|
|
271
|
+
}
|
|
272
|
+
|
|
273
|
+
|
|
274
|
+
def is_command_allowed(command: str, *, allowlist: Optional[Set[str]] = None) -> bool:
|
|
275
|
+
"""
|
|
276
|
+
Check if a command is allowed based on allowlist/blocklist.
|
|
277
|
+
|
|
278
|
+
Uses allowlist approach per best practices - only explicitly allowed
|
|
279
|
+
commands pass validation.
|
|
280
|
+
|
|
281
|
+
Args:
|
|
282
|
+
command: The command string (may include arguments)
|
|
283
|
+
allowlist: Optional custom allowlist (defaults to COMMON_SAFE_COMMANDS)
|
|
284
|
+
|
|
285
|
+
Returns:
|
|
286
|
+
True if command is allowed, False otherwise
|
|
287
|
+
"""
|
|
288
|
+
if not command:
|
|
289
|
+
return False
|
|
290
|
+
|
|
291
|
+
# Extract base command (first word or "cmd arg" for compound commands)
|
|
292
|
+
parts = command.strip().split()
|
|
293
|
+
if not parts:
|
|
294
|
+
return False
|
|
295
|
+
|
|
296
|
+
base_cmd = parts[0]
|
|
297
|
+
effective_allowlist = allowlist or COMMON_SAFE_COMMANDS
|
|
298
|
+
|
|
299
|
+
# Check for blocked commands first (deny takes precedence)
|
|
300
|
+
if base_cmd in BLOCKED_COMMANDS:
|
|
301
|
+
return False
|
|
302
|
+
|
|
303
|
+
# Check compound commands (e.g., "git log")
|
|
304
|
+
if len(parts) >= 2:
|
|
305
|
+
compound = f"{parts[0]} {parts[1]}"
|
|
306
|
+
if compound in BLOCKED_COMMANDS:
|
|
307
|
+
return False
|
|
308
|
+
if compound in effective_allowlist:
|
|
309
|
+
return True
|
|
310
|
+
|
|
311
|
+
# Check base command in allowlist
|
|
312
|
+
return base_cmd in effective_allowlist
|
|
313
|
+
|
|
314
|
+
|
|
315
|
+
# ---------------------------------------------------------------------------
|
|
316
|
+
# Observability & Telemetry (per docs/mcp_best_practices/05-observability-telemetry.md)
|
|
317
|
+
# ---------------------------------------------------------------------------
|
|
318
|
+
|
|
319
|
+
|
|
320
|
+
@dataclass
|
|
321
|
+
class ExecutionSpan:
|
|
322
|
+
"""Represents a provider execution span for telemetry."""
|
|
323
|
+
|
|
324
|
+
span_id: str = field(default_factory=lambda: str(uuid.uuid4())[:8])
|
|
325
|
+
trace_id: Optional[str] = None
|
|
326
|
+
operation: str = "provider_execute"
|
|
327
|
+
provider_id: str = ""
|
|
328
|
+
model: str = ""
|
|
329
|
+
started_at: datetime = field(default_factory=datetime.utcnow)
|
|
330
|
+
ended_at: Optional[datetime] = None
|
|
331
|
+
duration_ms: Optional[float] = None
|
|
332
|
+
status: Optional[ProviderStatus] = None # None = pending/in-progress
|
|
333
|
+
error: Optional[str] = None
|
|
334
|
+
input_tokens: int = 0
|
|
335
|
+
output_tokens: int = 0
|
|
336
|
+
metadata: Dict[str, Any] = field(default_factory=dict)
|
|
337
|
+
|
|
338
|
+
def finish(
|
|
339
|
+
self,
|
|
340
|
+
status: ProviderStatus,
|
|
341
|
+
*,
|
|
342
|
+
error: Optional[str] = None,
|
|
343
|
+
input_tokens: int = 0,
|
|
344
|
+
output_tokens: int = 0,
|
|
345
|
+
) -> None:
|
|
346
|
+
"""Mark the span as finished."""
|
|
347
|
+
self.ended_at = datetime.utcnow()
|
|
348
|
+
self.duration_ms = (self.ended_at - self.started_at).total_seconds() * 1000
|
|
349
|
+
self.status = status
|
|
350
|
+
self.error = error
|
|
351
|
+
self.input_tokens = input_tokens
|
|
352
|
+
self.output_tokens = output_tokens
|
|
353
|
+
|
|
354
|
+
def to_log_dict(self) -> Dict[str, Any]:
|
|
355
|
+
"""Convert to dictionary for structured logging."""
|
|
356
|
+
return {
|
|
357
|
+
"span_id": self.span_id,
|
|
358
|
+
"trace_id": self.trace_id,
|
|
359
|
+
"operation": self.operation,
|
|
360
|
+
"provider_id": self.provider_id,
|
|
361
|
+
"model": self.model,
|
|
362
|
+
"started_at": self.started_at.isoformat() + "Z",
|
|
363
|
+
"ended_at": self.ended_at.isoformat() + "Z" if self.ended_at else None,
|
|
364
|
+
"duration_ms": self.duration_ms,
|
|
365
|
+
"status": self.status.value if self.status else "pending",
|
|
366
|
+
"error": self.error,
|
|
367
|
+
"input_tokens": self.input_tokens,
|
|
368
|
+
"output_tokens": self.output_tokens,
|
|
369
|
+
"metadata": self.metadata,
|
|
370
|
+
}
|
|
371
|
+
|
|
372
|
+
|
|
373
|
+
def create_execution_span(
|
|
374
|
+
provider_id: str,
|
|
375
|
+
model: str = "",
|
|
376
|
+
*,
|
|
377
|
+
trace_id: Optional[str] = None,
|
|
378
|
+
metadata: Optional[Dict[str, Any]] = None,
|
|
379
|
+
) -> ExecutionSpan:
|
|
380
|
+
"""Create a new execution span for telemetry."""
|
|
381
|
+
return ExecutionSpan(
|
|
382
|
+
trace_id=trace_id or str(uuid.uuid4()),
|
|
383
|
+
provider_id=provider_id,
|
|
384
|
+
model=model,
|
|
385
|
+
metadata=metadata or {},
|
|
386
|
+
)
|
|
387
|
+
|
|
388
|
+
|
|
389
|
+
def log_span(span: ExecutionSpan, level: int = logging.INFO) -> None:
|
|
390
|
+
"""Log an execution span as structured JSON."""
|
|
391
|
+
log_data = span.to_log_dict()
|
|
392
|
+
logger.log(level, "Provider execution span", extra={"span_data": log_data})
|
|
393
|
+
|
|
394
|
+
|
|
395
|
+
# ---------------------------------------------------------------------------
|
|
396
|
+
# Retry Matrix (per docs/mcp_best_practices/12-timeout-resilience.md)
|
|
397
|
+
# ---------------------------------------------------------------------------
|
|
398
|
+
|
|
399
|
+
# Status codes that are safe to retry
|
|
400
|
+
RETRYABLE_STATUSES: Set[ProviderStatus] = {
|
|
401
|
+
ProviderStatus.TIMEOUT,
|
|
402
|
+
# Note: RATE_LIMITED not in ProviderStatus enum, would need extension
|
|
403
|
+
}
|
|
404
|
+
|
|
405
|
+
|
|
406
|
+
def is_retryable(status: ProviderStatus) -> bool:
|
|
407
|
+
"""Check if a provider status indicates a retryable error."""
|
|
408
|
+
return status in RETRYABLE_STATUSES
|
|
409
|
+
|
|
410
|
+
|
|
411
|
+
def is_retryable_error(error: Exception) -> bool:
|
|
412
|
+
"""Check if an exception indicates a retryable error."""
|
|
413
|
+
if isinstance(error, ProviderTimeoutError):
|
|
414
|
+
return True
|
|
415
|
+
if isinstance(error, ProviderError):
|
|
416
|
+
# Check if the error has a retryable status
|
|
417
|
+
return False # Most provider errors are not retryable by default
|
|
418
|
+
return False
|
|
419
|
+
|
|
420
|
+
|
|
421
|
+
# ---------------------------------------------------------------------------
|
|
422
|
+
# Circuit Breaker (per docs/mcp_best_practices/12-timeout-resilience.md)
|
|
423
|
+
# ---------------------------------------------------------------------------
|
|
424
|
+
|
|
425
|
+
|
|
426
|
+
class CircuitState(Enum):
|
|
427
|
+
"""Circuit breaker states."""
|
|
428
|
+
|
|
429
|
+
CLOSED = "closed" # Normal operation
|
|
430
|
+
OPEN = "open" # Failures exceeded threshold, blocking requests
|
|
431
|
+
HALF_OPEN = "half_open" # Testing if service recovered
|
|
432
|
+
|
|
433
|
+
|
|
434
|
+
@dataclass
|
|
435
|
+
class CircuitBreaker:
|
|
436
|
+
"""
|
|
437
|
+
Circuit breaker for provider resilience.
|
|
438
|
+
|
|
439
|
+
Prevents cascade failures by stopping requests to failing providers.
|
|
440
|
+
"""
|
|
441
|
+
|
|
442
|
+
name: str
|
|
443
|
+
failure_threshold: int = 5
|
|
444
|
+
recovery_timeout: float = 60.0 # seconds
|
|
445
|
+
half_open_max_calls: int = 1
|
|
446
|
+
|
|
447
|
+
_state: CircuitState = field(default=CircuitState.CLOSED, init=False)
|
|
448
|
+
_failure_count: int = field(default=0, init=False)
|
|
449
|
+
_last_failure_time: Optional[float] = field(default=None, init=False)
|
|
450
|
+
_half_open_calls: int = field(default=0, init=False)
|
|
451
|
+
_lock: Lock = field(default_factory=Lock, init=False)
|
|
452
|
+
|
|
453
|
+
@property
|
|
454
|
+
def state(self) -> CircuitState:
|
|
455
|
+
"""Get current circuit state, checking for recovery."""
|
|
456
|
+
with self._lock:
|
|
457
|
+
if self._state == CircuitState.OPEN:
|
|
458
|
+
# Check if recovery timeout has passed
|
|
459
|
+
if self._last_failure_time is not None:
|
|
460
|
+
elapsed = time.time() - self._last_failure_time
|
|
461
|
+
if elapsed >= self.recovery_timeout:
|
|
462
|
+
self._state = CircuitState.HALF_OPEN
|
|
463
|
+
self._half_open_calls = 0
|
|
464
|
+
return self._state
|
|
465
|
+
|
|
466
|
+
def can_execute(self) -> bool:
|
|
467
|
+
"""Check if a request can be executed."""
|
|
468
|
+
state = self.state
|
|
469
|
+
if state == CircuitState.CLOSED:
|
|
470
|
+
return True
|
|
471
|
+
if state == CircuitState.HALF_OPEN:
|
|
472
|
+
with self._lock:
|
|
473
|
+
if self._half_open_calls < self.half_open_max_calls:
|
|
474
|
+
self._half_open_calls += 1
|
|
475
|
+
return True
|
|
476
|
+
return False
|
|
477
|
+
return False # OPEN
|
|
478
|
+
|
|
479
|
+
def record_success(self) -> None:
|
|
480
|
+
"""Record a successful execution."""
|
|
481
|
+
with self._lock:
|
|
482
|
+
if self._state == CircuitState.HALF_OPEN:
|
|
483
|
+
# Success in half-open state, close the circuit
|
|
484
|
+
self._state = CircuitState.CLOSED
|
|
485
|
+
self._failure_count = 0
|
|
486
|
+
|
|
487
|
+
def record_failure(self) -> None:
|
|
488
|
+
"""Record a failed execution."""
|
|
489
|
+
with self._lock:
|
|
490
|
+
self._failure_count += 1
|
|
491
|
+
self._last_failure_time = time.time()
|
|
492
|
+
|
|
493
|
+
if self._state == CircuitState.HALF_OPEN:
|
|
494
|
+
# Failure in half-open state, re-open the circuit
|
|
495
|
+
self._state = CircuitState.OPEN
|
|
496
|
+
elif self._failure_count >= self.failure_threshold:
|
|
497
|
+
self._state = CircuitState.OPEN
|
|
498
|
+
|
|
499
|
+
def reset(self) -> None:
|
|
500
|
+
"""Reset the circuit breaker to initial state."""
|
|
501
|
+
with self._lock:
|
|
502
|
+
self._state = CircuitState.CLOSED
|
|
503
|
+
self._failure_count = 0
|
|
504
|
+
self._last_failure_time = None
|
|
505
|
+
self._half_open_calls = 0
|
|
506
|
+
|
|
507
|
+
|
|
508
|
+
# Global circuit breakers per provider
|
|
509
|
+
_circuit_breakers: Dict[str, CircuitBreaker] = {}
|
|
510
|
+
_circuit_breaker_lock = Lock()
|
|
511
|
+
|
|
512
|
+
|
|
513
|
+
def get_circuit_breaker(provider_id: str) -> CircuitBreaker:
|
|
514
|
+
"""Get or create a circuit breaker for a provider."""
|
|
515
|
+
with _circuit_breaker_lock:
|
|
516
|
+
if provider_id not in _circuit_breakers:
|
|
517
|
+
_circuit_breakers[provider_id] = CircuitBreaker(name=provider_id)
|
|
518
|
+
return _circuit_breakers[provider_id]
|
|
519
|
+
|
|
520
|
+
|
|
521
|
+
def reset_circuit_breakers() -> None:
|
|
522
|
+
"""Reset all circuit breakers (for testing)."""
|
|
523
|
+
with _circuit_breaker_lock:
|
|
524
|
+
_circuit_breakers.clear()
|
|
525
|
+
|
|
526
|
+
|
|
527
|
+
# ---------------------------------------------------------------------------
|
|
528
|
+
# Rate Limiting (per docs/mcp_best_practices/15-concurrency-patterns.md)
|
|
529
|
+
# ---------------------------------------------------------------------------
|
|
530
|
+
|
|
531
|
+
|
|
532
|
+
@dataclass
|
|
533
|
+
class RateLimiter:
|
|
534
|
+
"""
|
|
535
|
+
Token bucket rate limiter for provider calls.
|
|
536
|
+
|
|
537
|
+
Prevents overwhelming providers with too many concurrent requests.
|
|
538
|
+
"""
|
|
539
|
+
|
|
540
|
+
name: str
|
|
541
|
+
max_tokens: int = 10
|
|
542
|
+
refill_rate: float = 1.0 # tokens per second
|
|
543
|
+
|
|
544
|
+
_tokens: float = field(default=0.0, init=False)
|
|
545
|
+
_last_refill: float = field(default_factory=time.time, init=False)
|
|
546
|
+
_lock: Lock = field(default_factory=Lock, init=False)
|
|
547
|
+
|
|
548
|
+
def __post_init__(self) -> None:
|
|
549
|
+
self._tokens = float(self.max_tokens)
|
|
550
|
+
|
|
551
|
+
def _refill(self) -> None:
|
|
552
|
+
"""Refill tokens based on elapsed time."""
|
|
553
|
+
now = time.time()
|
|
554
|
+
elapsed = now - self._last_refill
|
|
555
|
+
self._tokens = min(self.max_tokens, self._tokens + elapsed * self.refill_rate)
|
|
556
|
+
self._last_refill = now
|
|
557
|
+
|
|
558
|
+
def acquire(self, tokens: int = 1) -> bool:
|
|
559
|
+
"""
|
|
560
|
+
Try to acquire tokens for a request.
|
|
561
|
+
|
|
562
|
+
Returns:
|
|
563
|
+
True if tokens acquired, False if rate limited
|
|
564
|
+
"""
|
|
565
|
+
with self._lock:
|
|
566
|
+
self._refill()
|
|
567
|
+
if self._tokens >= tokens:
|
|
568
|
+
self._tokens -= tokens
|
|
569
|
+
return True
|
|
570
|
+
return False
|
|
571
|
+
|
|
572
|
+
def wait_time(self, tokens: int = 1) -> float:
|
|
573
|
+
"""Calculate time to wait for tokens to become available."""
|
|
574
|
+
with self._lock:
|
|
575
|
+
self._refill()
|
|
576
|
+
if self._tokens >= tokens:
|
|
577
|
+
return 0.0
|
|
578
|
+
needed = tokens - self._tokens
|
|
579
|
+
return needed / self.refill_rate
|
|
580
|
+
|
|
581
|
+
|
|
582
|
+
# Global rate limiters per provider
|
|
583
|
+
_rate_limiters: Dict[str, RateLimiter] = {}
|
|
584
|
+
_rate_limiter_lock = Lock()
|
|
585
|
+
|
|
586
|
+
|
|
587
|
+
def get_rate_limiter(provider_id: str) -> RateLimiter:
|
|
588
|
+
"""Get or create a rate limiter for a provider."""
|
|
589
|
+
with _rate_limiter_lock:
|
|
590
|
+
if provider_id not in _rate_limiters:
|
|
591
|
+
_rate_limiters[provider_id] = RateLimiter(name=provider_id)
|
|
592
|
+
return _rate_limiters[provider_id]
|
|
593
|
+
|
|
594
|
+
|
|
595
|
+
def reset_rate_limiters() -> None:
|
|
596
|
+
"""Reset all rate limiters (for testing)."""
|
|
597
|
+
with _rate_limiter_lock:
|
|
598
|
+
_rate_limiters.clear()
|
|
599
|
+
|
|
600
|
+
|
|
601
|
+
# ---------------------------------------------------------------------------
|
|
602
|
+
# Execution Wrapper (combines validation, observability, and resilience)
|
|
603
|
+
# ---------------------------------------------------------------------------
|
|
604
|
+
|
|
605
|
+
T = TypeVar("T")
|
|
606
|
+
|
|
607
|
+
|
|
608
|
+
def with_validation_and_resilience(
|
|
609
|
+
provider_id: str,
|
|
610
|
+
*,
|
|
611
|
+
validate: bool = True,
|
|
612
|
+
circuit_breaker: bool = True,
|
|
613
|
+
rate_limit: bool = True,
|
|
614
|
+
log_spans: bool = True,
|
|
615
|
+
) -> Callable[[Callable[..., ProviderResult]], Callable[..., ProviderResult]]:
|
|
616
|
+
"""
|
|
617
|
+
Decorator combining validation, circuit breaking, rate limiting, and observability.
|
|
618
|
+
|
|
619
|
+
Args:
|
|
620
|
+
provider_id: The provider identifier
|
|
621
|
+
validate: Whether to validate requests
|
|
622
|
+
circuit_breaker: Whether to apply circuit breaker
|
|
623
|
+
rate_limit: Whether to apply rate limiting
|
|
624
|
+
log_spans: Whether to log execution spans
|
|
625
|
+
|
|
626
|
+
Returns:
|
|
627
|
+
Decorated function with resilience patterns applied
|
|
628
|
+
"""
|
|
629
|
+
|
|
630
|
+
def decorator(func: Callable[..., ProviderResult]) -> Callable[..., ProviderResult]:
|
|
631
|
+
@wraps(func)
|
|
632
|
+
def wrapper(request: ProviderRequest, *args: Any, **kwargs: Any) -> ProviderResult:
|
|
633
|
+
span = create_execution_span(provider_id) if log_spans else None
|
|
634
|
+
|
|
635
|
+
try:
|
|
636
|
+
# Input validation
|
|
637
|
+
if validate:
|
|
638
|
+
request = validate_request(request)
|
|
639
|
+
|
|
640
|
+
# Circuit breaker check
|
|
641
|
+
if circuit_breaker:
|
|
642
|
+
cb = get_circuit_breaker(provider_id)
|
|
643
|
+
if not cb.can_execute():
|
|
644
|
+
raise ProviderExecutionError(
|
|
645
|
+
f"Circuit breaker open for provider {provider_id}",
|
|
646
|
+
provider=provider_id,
|
|
647
|
+
)
|
|
648
|
+
|
|
649
|
+
# Rate limiting check
|
|
650
|
+
if rate_limit:
|
|
651
|
+
rl = get_rate_limiter(provider_id)
|
|
652
|
+
if not rl.acquire():
|
|
653
|
+
raise ProviderExecutionError(
|
|
654
|
+
f"Rate limited for provider {provider_id}",
|
|
655
|
+
provider=provider_id,
|
|
656
|
+
)
|
|
657
|
+
|
|
658
|
+
# Execute the actual function
|
|
659
|
+
result = func(request, *args, **kwargs)
|
|
660
|
+
|
|
661
|
+
# Record success
|
|
662
|
+
if circuit_breaker:
|
|
663
|
+
get_circuit_breaker(provider_id).record_success()
|
|
664
|
+
|
|
665
|
+
# Update span
|
|
666
|
+
if span and result.tokens:
|
|
667
|
+
span.finish(
|
|
668
|
+
result.status,
|
|
669
|
+
input_tokens=result.tokens.input_tokens,
|
|
670
|
+
output_tokens=result.tokens.output_tokens,
|
|
671
|
+
)
|
|
672
|
+
elif span:
|
|
673
|
+
span.finish(result.status)
|
|
674
|
+
|
|
675
|
+
return result
|
|
676
|
+
|
|
677
|
+
except Exception as e:
|
|
678
|
+
# Record failure
|
|
679
|
+
if circuit_breaker:
|
|
680
|
+
get_circuit_breaker(provider_id).record_failure()
|
|
681
|
+
|
|
682
|
+
# Update span
|
|
683
|
+
if span:
|
|
684
|
+
status = ProviderStatus.TIMEOUT if isinstance(e, ProviderTimeoutError) else ProviderStatus.ERROR
|
|
685
|
+
span.finish(status, error=str(e))
|
|
686
|
+
|
|
687
|
+
raise
|
|
688
|
+
|
|
689
|
+
finally:
|
|
690
|
+
# Log span
|
|
691
|
+
if span and log_spans:
|
|
692
|
+
log_span(span)
|
|
693
|
+
|
|
694
|
+
return wrapper
|
|
695
|
+
|
|
696
|
+
return decorator
|
|
697
|
+
|
|
698
|
+
|
|
699
|
+
# ---------------------------------------------------------------------------
|
|
700
|
+
# Context Window Error Detection
|
|
701
|
+
# ---------------------------------------------------------------------------
|
|
702
|
+
|
|
703
|
+
# Common error patterns indicating context window/token limit exceeded
|
|
704
|
+
CONTEXT_WINDOW_ERROR_PATTERNS: Set[str] = {
|
|
705
|
+
# OpenAI patterns
|
|
706
|
+
"context_length_exceeded",
|
|
707
|
+
"maximum context length",
|
|
708
|
+
"max_tokens",
|
|
709
|
+
"token limit",
|
|
710
|
+
"tokens exceeds",
|
|
711
|
+
"prompt is too long",
|
|
712
|
+
"input too long",
|
|
713
|
+
# Anthropic patterns
|
|
714
|
+
"prompt is too large",
|
|
715
|
+
"context window",
|
|
716
|
+
"exceeds the maximum",
|
|
717
|
+
"too many tokens",
|
|
718
|
+
# Google/Gemini patterns
|
|
719
|
+
"max input tokens",
|
|
720
|
+
"input token limit",
|
|
721
|
+
"content is too long",
|
|
722
|
+
"request payload size exceeds",
|
|
723
|
+
# Generic patterns
|
|
724
|
+
"length exceeded",
|
|
725
|
+
"limit exceeded",
|
|
726
|
+
"too long for model",
|
|
727
|
+
"input exceeds",
|
|
728
|
+
"context limit",
|
|
729
|
+
}
|
|
730
|
+
|
|
731
|
+
|
|
732
|
+
def is_context_window_error(error: Exception) -> bool:
|
|
733
|
+
"""Check if an exception indicates a context window/token limit error.
|
|
734
|
+
|
|
735
|
+
Examines the error message for common patterns indicating the prompt
|
|
736
|
+
exceeded the model's context window or token limit.
|
|
737
|
+
|
|
738
|
+
Args:
|
|
739
|
+
error: Exception to check
|
|
740
|
+
|
|
741
|
+
Returns:
|
|
742
|
+
True if the error appears to be a context window error
|
|
743
|
+
"""
|
|
744
|
+
error_str = str(error).lower()
|
|
745
|
+
|
|
746
|
+
for pattern in CONTEXT_WINDOW_ERROR_PATTERNS:
|
|
747
|
+
if pattern in error_str:
|
|
748
|
+
return True
|
|
749
|
+
|
|
750
|
+
return False
|
|
751
|
+
|
|
752
|
+
|
|
753
|
+
def extract_token_counts(error_str: str) -> tuple[Optional[int], Optional[int]]:
|
|
754
|
+
"""Extract token counts from error message if present.
|
|
755
|
+
|
|
756
|
+
Attempts to parse prompt_tokens and max_tokens from common error formats.
|
|
757
|
+
|
|
758
|
+
Args:
|
|
759
|
+
error_str: Error message string
|
|
760
|
+
|
|
761
|
+
Returns:
|
|
762
|
+
Tuple of (prompt_tokens, max_tokens), either may be None if not found
|
|
763
|
+
"""
|
|
764
|
+
import re
|
|
765
|
+
|
|
766
|
+
prompt_tokens = None
|
|
767
|
+
max_tokens = None
|
|
768
|
+
|
|
769
|
+
# Pattern: "X tokens exceeds Y limit" or "X exceeds Y"
|
|
770
|
+
match = re.search(r"(\d{1,7})\s*tokens?\s*exceeds?\s*(?:the\s*)?(\d{1,7})", error_str.lower())
|
|
771
|
+
if match:
|
|
772
|
+
prompt_tokens = int(match.group(1))
|
|
773
|
+
max_tokens = int(match.group(2))
|
|
774
|
+
return prompt_tokens, max_tokens
|
|
775
|
+
|
|
776
|
+
# Pattern: "maximum context length is X tokens" with "Y tokens" input
|
|
777
|
+
max_match = re.search(r"maximum\s+(?:context\s+)?length\s+(?:is\s+)?(\d{1,7})", error_str.lower())
|
|
778
|
+
if max_match:
|
|
779
|
+
max_tokens = int(max_match.group(1))
|
|
780
|
+
|
|
781
|
+
# Pattern: "requested X tokens" or "contains X tokens"
|
|
782
|
+
prompt_match = re.search(r"(?:requested|contains|have|with)\s+(\d{1,7})\s*tokens?", error_str.lower())
|
|
783
|
+
if prompt_match:
|
|
784
|
+
prompt_tokens = int(prompt_match.group(1))
|
|
785
|
+
|
|
786
|
+
return prompt_tokens, max_tokens
|
|
787
|
+
|
|
788
|
+
|
|
789
|
+
def create_context_window_guidance(
|
|
790
|
+
prompt_tokens: Optional[int] = None,
|
|
791
|
+
max_tokens: Optional[int] = None,
|
|
792
|
+
provider_id: Optional[str] = None,
|
|
793
|
+
) -> str:
|
|
794
|
+
"""Generate actionable guidance for resolving context window errors.
|
|
795
|
+
|
|
796
|
+
Args:
|
|
797
|
+
prompt_tokens: Number of tokens in the prompt (if known)
|
|
798
|
+
max_tokens: Maximum tokens allowed (if known)
|
|
799
|
+
provider_id: Provider that raised the error
|
|
800
|
+
|
|
801
|
+
Returns:
|
|
802
|
+
Human-readable guidance string
|
|
803
|
+
"""
|
|
804
|
+
parts = ["Context window limit exceeded."]
|
|
805
|
+
|
|
806
|
+
if prompt_tokens and max_tokens:
|
|
807
|
+
overflow = prompt_tokens - max_tokens
|
|
808
|
+
parts.append(f"Prompt ({prompt_tokens:,} tokens) exceeds limit ({max_tokens:,} tokens) by {overflow:,} tokens.")
|
|
809
|
+
elif prompt_tokens:
|
|
810
|
+
parts.append(f"Prompt contains approximately {prompt_tokens:,} tokens.")
|
|
811
|
+
elif max_tokens:
|
|
812
|
+
parts.append(f"Maximum context window is {max_tokens:,} tokens.")
|
|
813
|
+
|
|
814
|
+
parts.append("To resolve: (1) Reduce input size by excluding large content, "
|
|
815
|
+
"(2) Summarize or truncate long sections, "
|
|
816
|
+
"(3) Use a model with larger context window, "
|
|
817
|
+
"(4) Process content in smaller batches.")
|
|
818
|
+
|
|
819
|
+
return " ".join(parts)
|
|
820
|
+
|
|
821
|
+
|
|
822
|
+
__all__ = [
|
|
823
|
+
# Validation
|
|
824
|
+
"ValidationError",
|
|
825
|
+
"strip_ansi",
|
|
826
|
+
"ensure_utf8",
|
|
827
|
+
"sanitize_prompt",
|
|
828
|
+
"validate_request",
|
|
829
|
+
# Command allowlists
|
|
830
|
+
"COMMON_SAFE_COMMANDS",
|
|
831
|
+
"BLOCKED_COMMANDS",
|
|
832
|
+
"is_command_allowed",
|
|
833
|
+
# Observability
|
|
834
|
+
"ExecutionSpan",
|
|
835
|
+
"create_execution_span",
|
|
836
|
+
"log_span",
|
|
837
|
+
# Retry
|
|
838
|
+
"RETRYABLE_STATUSES",
|
|
839
|
+
"is_retryable",
|
|
840
|
+
"is_retryable_error",
|
|
841
|
+
# Circuit breaker
|
|
842
|
+
"CircuitState",
|
|
843
|
+
"CircuitBreaker",
|
|
844
|
+
"get_circuit_breaker",
|
|
845
|
+
"reset_circuit_breakers",
|
|
846
|
+
# Rate limiting
|
|
847
|
+
"RateLimiter",
|
|
848
|
+
"get_rate_limiter",
|
|
849
|
+
"reset_rate_limiters",
|
|
850
|
+
# Execution wrapper
|
|
851
|
+
"with_validation_and_resilience",
|
|
852
|
+
# Context window detection
|
|
853
|
+
"CONTEXT_WINDOW_ERROR_PATTERNS",
|
|
854
|
+
"is_context_window_error",
|
|
855
|
+
"extract_token_counts",
|
|
856
|
+
"create_context_window_guidance",
|
|
857
|
+
]
|