errorsense 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
errorsense/models.py ADDED
@@ -0,0 +1,52 @@
1
+ from __future__ import annotations
2
+
3
+ from dataclasses import dataclass
4
+ from typing import Any
5
+
6
+
7
+ @dataclass(frozen=True)
8
+ class SenseResult:
9
+ """Result from classification — produced by rulesets or skills."""
10
+
11
+ label: str
12
+ confidence: float
13
+ phase: str = ""
14
+ skill_name: str = ""
15
+ reason: str | None = None # only set when explain=True, LLM phases only
16
+
17
+
18
+ @dataclass(frozen=True)
19
+ class TrailResult:
20
+ """Result from trail() — classification + threshold state.
21
+
22
+ If a review ran (threshold hit + review enabled), label and reason
23
+ reflect the review's verdict. If the review changed the label,
24
+ the history entry is updated and counts are adjusted.
25
+ """
26
+
27
+ label: str
28
+ confidence: float
29
+ phase: str
30
+ skill_name: str
31
+ at_threshold: bool
32
+ reason: str | None = None # LLM review explanation, None if no review ran
33
+
34
+
35
+ @dataclass(frozen=True)
36
+ class TrailingConfig:
37
+ """Configuration for trailing (stateful error tracking).
38
+
39
+ Args:
40
+ threshold: Number of counted errors before review triggers.
41
+ count_labels: Only these labels count toward threshold.
42
+ history_size: Max errors kept per key (ring buffer).
43
+ review: Whether to LLM-review history when threshold hit.
44
+ None = auto (True if LLM phase exists, False if not).
45
+ True = force (raises if no LLM phase).
46
+ False = never.
47
+ """
48
+
49
+ threshold: int = 3
50
+ count_labels: list[str] | None = None
51
+ history_size: int = 10
52
+ review: bool | None = None
errorsense/phase.py ADDED
@@ -0,0 +1,192 @@
1
+ """Phase — named stage in the classification pipeline."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import asyncio
6
+ import logging
7
+ from dataclasses import replace
8
+ from typing import Any
9
+
10
+ from errorsense.llm import LLMClient, LLMConfig
11
+ from errorsense.models import SenseResult
12
+ from errorsense.ruleset import Ruleset
13
+ from errorsense.signal import Signal
14
+ from errorsense.skill import Skill
15
+
16
+ logger = logging.getLogger("errorsense")
17
+
18
+ __all__ = ["Phase"]
19
+
20
+
21
+ class Phase:
22
+ """A named stage in the classification pipeline.
23
+
24
+ Each phase contains either rulesets (deterministic) or skills (LLM).
25
+ Not both.
26
+ """
27
+
28
+ def __init__(
29
+ self,
30
+ name: str,
31
+ rulesets: list[Ruleset] | None = None,
32
+ skills: list[Skill] | None = None,
33
+ llm: LLMConfig | None = None,
34
+ ) -> None:
35
+ if not name:
36
+ raise ValueError("Phase requires a non-empty name")
37
+
38
+ has_rulesets = rulesets is not None and len(rulesets) > 0
39
+ has_skills = skills is not None and len(skills) > 0
40
+
41
+ if has_rulesets and has_skills:
42
+ raise ValueError(
43
+ f"Phase {name!r}: cannot mix rulesets and skills. "
44
+ "Use rulesets OR (skills + llm), not both."
45
+ )
46
+ if not has_rulesets and not has_skills:
47
+ raise ValueError(
48
+ f"Phase {name!r}: must have at least one ruleset or skill."
49
+ )
50
+ if has_skills and not llm:
51
+ raise ValueError(
52
+ f"Phase {name!r}: skills require llm=LLMConfig(...)."
53
+ )
54
+ if has_rulesets and llm:
55
+ logger.warning(
56
+ "Phase %r: llm config ignored for ruleset phase.", name
57
+ )
58
+
59
+ self.name = name
60
+ self.rulesets = rulesets or []
61
+ self.skills = skills or []
62
+ self.llm = llm
63
+ self.is_llm_phase = has_skills
64
+ self._categories: list[str] = []
65
+ self._llm_client: LLMClient | None = None
66
+
67
+ if self.is_llm_phase and llm:
68
+ self._llm_client = LLMClient(llm)
69
+
70
+ def set_categories(self, categories: list[str]) -> None:
71
+ self._categories = list(categories)
72
+
73
+ def classify(self, signal: Signal, explain: bool = False) -> SenseResult | None:
74
+ """Sync classification. Full pipeline — rulesets or LLM."""
75
+ if self.is_llm_phase:
76
+ return self._run_skills_sync(signal, explain)
77
+ return self._run_rulesets(signal)
78
+
79
+ async def async_classify(self, signal: Signal, explain: bool = False) -> SenseResult | None:
80
+ """Async classification. Full pipeline — rulesets or LLM."""
81
+ if self.is_llm_phase:
82
+ return await self._run_skills_async(signal, explain)
83
+ return self._run_rulesets(signal)
84
+
85
+ def _run_rulesets(self, signal: Signal) -> SenseResult | None:
86
+ for ruleset in self.rulesets:
87
+ try:
88
+ result = ruleset.classify(signal)
89
+ except Exception as e:
90
+ logger.warning(
91
+ "Phase %r: ruleset %s raised %s: %s",
92
+ self.name, type(ruleset).__name__, type(e).__name__, e,
93
+ )
94
+ continue
95
+ if result is not None:
96
+ return self._stamp_phase(result, type(ruleset).__name__)
97
+ return None
98
+
99
+ def _run_skills_sync(self, signal: Signal, explain: bool) -> SenseResult | None:
100
+ if not self._llm_client:
101
+ return None
102
+
103
+ best: SenseResult | None = None
104
+ for skill in self.skills:
105
+ try:
106
+ r = self._run_one_skill_sync(signal, skill, explain)
107
+ except Exception as e:
108
+ logger.warning("Phase %r: skill %r failed: %s", self.name, skill.name, e)
109
+ continue
110
+ if r is None:
111
+ continue
112
+ result = self._stamp_phase(r, r.skill_name)
113
+ if best is None or result.confidence > best.confidence:
114
+ best = result
115
+ return best
116
+
117
+ async def _run_skills_async(self, signal: Signal, explain: bool) -> SenseResult | None:
118
+ if not self._llm_client:
119
+ return None
120
+
121
+ results = await asyncio.gather(
122
+ *[self._run_one_skill_async(signal, skill, explain) for skill in self.skills],
123
+ return_exceptions=True,
124
+ )
125
+
126
+ best: SenseResult | None = None
127
+ for r in results:
128
+ if isinstance(r, Exception):
129
+ logger.warning("Phase %r: skill failed: %s", self.name, r)
130
+ continue
131
+ if r is None:
132
+ continue
133
+ result = self._stamp_phase(r, r.skill_name)
134
+ if best is None or result.confidence > best.confidence:
135
+ best = result
136
+ return best
137
+
138
+ def _run_one_skill_sync(self, signal: Signal, skill: Skill, explain: bool) -> SenseResult | None:
139
+ if skill.llm is not None:
140
+ client = LLMClient(skill.llm)
141
+ try:
142
+ return client.classify_sync(signal, skill, self._categories, include_reason=explain)
143
+ finally:
144
+ client.close_sync()
145
+ return self._llm_client.classify_sync(signal, skill, self._categories, include_reason=explain)
146
+
147
+ async def _run_one_skill_async(self, signal: Signal, skill: Skill, explain: bool) -> SenseResult | None:
148
+ if skill.llm is not None:
149
+ client = LLMClient(skill.llm)
150
+ try:
151
+ return await client.classify_async(signal, skill, self._categories, include_reason=explain)
152
+ finally:
153
+ await client.close_async()
154
+ return await self._llm_client.classify_async(signal, skill, self._categories, include_reason=explain)
155
+
156
+ def run_llm_call(
157
+ self, signal: Signal, skill: Skill, categories: list[str],
158
+ ) -> SenseResult | None:
159
+ """Run a single sync LLM call. Public API for Tracker reclassification."""
160
+ if not self._llm_client:
161
+ return None
162
+ return self._llm_client.classify_sync(signal, skill, categories, include_reason=True)
163
+
164
+ async def async_run_llm_call(
165
+ self, signal: Signal, skill: Skill, categories: list[str],
166
+ ) -> SenseResult | None:
167
+ """Run a single async LLM call. Public API for Tracker reclassification."""
168
+ if not self._llm_client:
169
+ return None
170
+ return await self._llm_client.classify_async(signal, skill, categories, include_reason=True)
171
+
172
+ def _stamp_phase(self, result: SenseResult, skill_name: str) -> SenseResult:
173
+ updates: dict[str, Any] = {}
174
+ if not result.phase:
175
+ updates["phase"] = self.name
176
+ if not result.skill_name:
177
+ updates["skill_name"] = skill_name
178
+ if updates:
179
+ return replace(result, **updates)
180
+ return result
181
+
182
+ def close_sync(self) -> None:
183
+ if self._llm_client:
184
+ self._llm_client.close_sync()
185
+
186
+ async def close_async(self) -> None:
187
+ if self._llm_client:
188
+ await self._llm_client.close_async()
189
+
190
+ async def close(self) -> None:
191
+ if self._llm_client:
192
+ await self._llm_client.close()
@@ -0,0 +1,5 @@
1
+ """Built-in presets — opinionated pre-configured ErrorSense instances."""
2
+
3
+ from errorsense.presets.http_gateway import http, http_no_llm
4
+
5
+ __all__ = ["http", "http_no_llm"]
@@ -0,0 +1,72 @@
1
+ """HTTP presets — client vs server error classification."""
2
+
3
+ from __future__ import annotations
4
+
5
+ from errorsense.engine import ErrorSense
6
+ from errorsense.llm import LLMConfig
7
+ from errorsense.phase import Phase
8
+ from errorsense.ruleset import Ruleset
9
+ from errorsense.skill import Skill
10
+
11
+ __all__ = ["http", "http_no_llm"]
12
+
13
+
14
+ def _ruleset_phases(extra_rulesets: list[Ruleset] | None = None) -> list[Phase]:
15
+ """Shared ruleset phases for both http() and http_no_llm()."""
16
+ return [
17
+ Phase("rules", rulesets=[
18
+ Ruleset(field="status_code", match={
19
+ "4xx": "client", 502: "server", 503: "server", 504: "server",
20
+ }),
21
+ Ruleset(field="headers.content-type", match={
22
+ "text/html": "server", "application/json": None,
23
+ }),
24
+ ]),
25
+ Phase("patterns", rulesets=[
26
+ Ruleset(field="body", patterns=[
27
+ ("server", [r"Bad Gateway", r"Service Unavailable", r"Gateway Timeout"]),
28
+ ]),
29
+ *(extra_rulesets or []),
30
+ ]),
31
+ ]
32
+
33
+
34
+ def http(
35
+ llm: LLMConfig,
36
+ extra_rulesets: list[Ruleset] | None = None,
37
+ ) -> ErrorSense:
38
+ """HTTP error classification with LLM: client, server, or undecided.
39
+
40
+ Rulesets handle clear-cut cases (4xx, 502/503/504, HTML responses).
41
+ LLM handles ambiguous errors — this is where ErrorSense earns its keep.
42
+
43
+ Args:
44
+ llm: LLM connection config (required).
45
+ extra_rulesets: Additional rulesets appended to the patterns phase.
46
+ """
47
+ phases = _ruleset_phases(extra_rulesets)
48
+ phases.append(Phase("llm", skills=[Skill("http_classifier")], llm=llm))
49
+
50
+ return ErrorSense(
51
+ categories=["client", "server", "undecided"],
52
+ pipeline=phases,
53
+ default="undecided",
54
+ )
55
+
56
+
57
+ def http_no_llm(
58
+ extra_rulesets: list[Ruleset] | None = None,
59
+ ) -> ErrorSense:
60
+ """HTTP error classification without LLM: client, server, or undecided.
61
+
62
+ Only classifies clear-cut cases (status codes, gateway patterns).
63
+ Ambiguous errors are "undecided".
64
+
65
+ Args:
66
+ extra_rulesets: Additional rulesets appended to the patterns phase.
67
+ """
68
+ return ErrorSense(
69
+ categories=["client", "server", "undecided"],
70
+ pipeline=_ruleset_phases(extra_rulesets),
71
+ default="undecided",
72
+ )
errorsense/ruleset.py ADDED
@@ -0,0 +1,165 @@
1
+ """Ruleset — deterministic (non-LLM) classification logic."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import json
6
+ import logging
7
+ import re
8
+ from typing import Any
9
+
10
+ from errorsense.models import SenseResult
11
+ from errorsense.signal import Signal
12
+
13
+ logger = logging.getLogger("errorsense")
14
+
15
+ __all__ = ["Ruleset"]
16
+
17
+
18
+ def _resolve_dotted(data: Any, path: str) -> Any:
19
+ """Resolve a dotted path like 'error.type' into nested dict access."""
20
+ current = data
21
+ for part in path.split("."):
22
+ if isinstance(current, dict):
23
+ current = current.get(part)
24
+ else:
25
+ return None
26
+ return current
27
+
28
+
29
+ class Ruleset:
30
+ """Deterministic classification logic.
31
+
32
+ Each ruleset does one thing: either field matching (match=) or regex
33
+ patterns (patterns=). Not both. Subclass and override classify() for
34
+ custom logic beyond config.
35
+ """
36
+
37
+ def __init__(
38
+ self,
39
+ field: str | None = None,
40
+ match: dict[Any, str | None] | None = None,
41
+ patterns: list[tuple[str, list[str]]] | None = None,
42
+ case_sensitive: bool = False,
43
+ ) -> None:
44
+ if type(self) is Ruleset:
45
+ if not field:
46
+ raise ValueError("Ruleset requires a 'field' parameter")
47
+ if match is not None and patterns is not None:
48
+ raise ValueError(
49
+ "Ruleset takes match= OR patterns=, not both. "
50
+ "Use separate rulesets in the same phase."
51
+ )
52
+ if match is None and patterns is None:
53
+ raise ValueError("Ruleset requires either match= or patterns=")
54
+
55
+ self._init_fields(field, match, patterns, case_sensitive)
56
+
57
+ def _init_fields(
58
+ self,
59
+ field: str | None,
60
+ match: dict[Any, str | None] | None,
61
+ patterns: list[tuple[str, list[str]]] | None,
62
+ case_sensitive: bool,
63
+ ) -> None:
64
+ self.field = field
65
+ self._match = match
66
+ self._range_keys: dict[str, str] = {}
67
+ self._exact_keys: dict[Any, str | None] = {}
68
+ self._compiled: list[tuple[str, list[re.Pattern[str]]]] | None = None
69
+
70
+ if match:
71
+ self._split_match_keys(match)
72
+ if patterns:
73
+ flags = 0 if case_sensitive else re.IGNORECASE
74
+ self._compiled = [
75
+ (label, [re.compile(p, flags) for p in pats])
76
+ for label, pats in patterns
77
+ ]
78
+
79
+ def _split_match_keys(self, match: dict[Any, str | None]) -> None:
80
+ for key, value in match.items():
81
+ if isinstance(key, str) and len(key) == 3 and key[0].isdigit() and key.endswith("xx"):
82
+ if value is not None:
83
+ self._range_keys[key] = value
84
+ else:
85
+ self._exact_keys[key] = value
86
+
87
+ def referenced_labels(self) -> set[str]:
88
+ """Return set of label strings this ruleset can produce. Used by engine validation."""
89
+ match = getattr(self, "_match", None)
90
+ if match is None:
91
+ return set()
92
+ return {v for v in match.values() if isinstance(v, str)}
93
+
94
+ def classify(self, signal: Signal) -> SenseResult | None:
95
+ """Classify a signal. Override in subclass for custom logic."""
96
+ value = self._resolve_field(signal)
97
+ if value is None:
98
+ return None
99
+
100
+ if self._match is not None:
101
+ return self._match_value(value)
102
+ if self._compiled is not None:
103
+ return self._match_patterns(value)
104
+ return None
105
+
106
+ def _resolve_field(self, signal: Signal) -> Any:
107
+ field = self.field
108
+ if field is None:
109
+ return None
110
+
111
+ if field.startswith("headers."):
112
+ headers = signal.get("headers")
113
+ if not hasattr(headers, "get"):
114
+ return None
115
+ header_name = field[len("headers."):]
116
+ return headers.get(header_name, "")
117
+
118
+ if field.startswith("body."):
119
+ body = signal.get("body")
120
+ if not isinstance(body, str):
121
+ return None
122
+ try:
123
+ parsed = json.loads(body)
124
+ except (json.JSONDecodeError, TypeError):
125
+ logger.debug("Ruleset %r: failed to parse JSON body", field)
126
+ return None
127
+ if not isinstance(parsed, dict):
128
+ return None
129
+ dot_path = field[len("body."):]
130
+ return _resolve_dotted(parsed, dot_path)
131
+
132
+ return signal.get(field)
133
+
134
+ def _match_value(self, value: Any) -> SenseResult | None:
135
+ field = self.field
136
+
137
+ if value in self._exact_keys:
138
+ label = self._exact_keys[value]
139
+ if label is None:
140
+ return None
141
+ return SenseResult(label=label, confidence=1.0)
142
+
143
+ if isinstance(value, int) and self._range_keys:
144
+ range_key = f"{value // 100}xx"
145
+ if range_key in self._range_keys:
146
+ label = self._range_keys[range_key]
147
+ return SenseResult(label=label, confidence=1.0)
148
+
149
+ if isinstance(value, str) and self._exact_keys:
150
+ for pattern, label in self._exact_keys.items():
151
+ if isinstance(pattern, str) and pattern in value:
152
+ if label is None:
153
+ return None
154
+ return SenseResult(label=label, confidence=1.0)
155
+
156
+ return None
157
+
158
+ def _match_patterns(self, value: Any) -> SenseResult | None:
159
+ if not isinstance(value, str):
160
+ return None
161
+ for label, compiled_pats in self._compiled:
162
+ for pat in compiled_pats:
163
+ if pat.search(value):
164
+ return SenseResult(label=label, confidence=0.9)
165
+ return None
errorsense/signal.py ADDED
@@ -0,0 +1,100 @@
1
+ from __future__ import annotations
2
+
3
+ import traceback
4
+ from types import MappingProxyType
5
+ from typing import Any
6
+
7
+
8
+ def _deep_freeze(obj: Any) -> Any:
9
+ """Recursively freeze dicts into MappingProxyType and lists into tuples."""
10
+ if isinstance(obj, dict):
11
+ return MappingProxyType({k: _deep_freeze(v) for k, v in obj.items()})
12
+ if isinstance(obj, list):
13
+ return tuple(_deep_freeze(item) for item in obj)
14
+ return obj
15
+
16
+
17
+ class Signal:
18
+ """Immutable container for error/event data.
19
+
20
+ All values are deep-frozen at construction time — skills get a
21
+ truly read-only view. Dict-like access for convenience.
22
+ """
23
+
24
+ __slots__ = ("_data",)
25
+
26
+ def __init__(self, data: dict[str, Any] | None = None, **kwargs: Any) -> None:
27
+ raw = {**(data or {}), **kwargs}
28
+ object.__setattr__(self, "_data", _deep_freeze(raw))
29
+
30
+ def __getitem__(self, key: str) -> Any:
31
+ return self._data[key]
32
+
33
+ def get(self, key: str, default: Any = None) -> Any:
34
+ return self._data.get(key, default)
35
+
36
+ def __contains__(self, key: str) -> bool:
37
+ return key in self._data
38
+
39
+ def keys(self) -> Any:
40
+ return self._data.keys()
41
+
42
+ def values(self) -> Any:
43
+ return self._data.values()
44
+
45
+ def items(self) -> Any:
46
+ return self._data.items()
47
+
48
+ def to_dict(self) -> dict[str, Any]:
49
+ """Return a mutable deep copy of the signal data."""
50
+ return _thaw(self._data)
51
+
52
+ def __setattr__(self, name: str, value: Any) -> None:
53
+ raise AttributeError("Signal is immutable")
54
+
55
+ def __setitem__(self, key: str, value: Any) -> None:
56
+ raise TypeError("Signal is immutable")
57
+
58
+ def __delitem__(self, key: str) -> None:
59
+ raise TypeError("Signal is immutable")
60
+
61
+ def __repr__(self) -> str:
62
+ return f"Signal({dict(self._data)!r})"
63
+
64
+ @classmethod
65
+ def from_http(
66
+ cls,
67
+ status_code: int,
68
+ body: str = "",
69
+ headers: dict[str, str] | None = None,
70
+ ) -> Signal:
71
+ return cls(
72
+ {
73
+ "status_code": status_code,
74
+ "body": body,
75
+ "headers": headers or {},
76
+ }
77
+ )
78
+
79
+ @classmethod
80
+ def from_grpc(cls, code: int, details: str = "") -> Signal:
81
+ return cls({"grpc_code": code, "details": details})
82
+
83
+ @classmethod
84
+ def from_exception(cls, exc: BaseException) -> Signal:
85
+ return cls(
86
+ {
87
+ "exception_type": type(exc).__name__,
88
+ "message": str(exc),
89
+ "traceback": traceback.format_exception(type(exc), exc, exc.__traceback__),
90
+ }
91
+ )
92
+
93
+
94
+ def _thaw(obj: Any) -> Any:
95
+ """Recursively convert MappingProxyType back to dict and tuples to lists."""
96
+ if isinstance(obj, (MappingProxyType, dict)):
97
+ return {k: _thaw(v) for k, v in obj.items()}
98
+ if isinstance(obj, (tuple, list)):
99
+ return [_thaw(item) for item in obj]
100
+ return obj
errorsense/skill.py ADDED
@@ -0,0 +1,70 @@
1
+ """Skill — LLM domain instructions loaded from markdown files."""
2
+
3
+ from __future__ import annotations
4
+
5
+ from pathlib import Path
6
+ from typing import TYPE_CHECKING
7
+
8
+ if TYPE_CHECKING:
9
+ from errorsense.llm import LLMConfig
10
+
11
+ __all__ = ["Skill"]
12
+
13
+ _BUILT_IN_SKILLS_DIR = Path(__file__).parent / "skills"
14
+
15
+
16
+ class Skill:
17
+ """Domain-specific instructions for LLM classification.
18
+
19
+ Instructions are loaded from a markdown file by default. Built-in skills
20
+ live in errorsense/skills/. Custom skills can point to any file path.
21
+
22
+ For programmatic use (e.g. Tracker reclassification), inline instructions=
23
+ is also supported.
24
+
25
+ Args:
26
+ name: Skill name. If no path or instructions given, looks for {name}.md
27
+ in the built-in skills directory.
28
+ path: Explicit path to a .md file. Overrides built-in lookup.
29
+ instructions: Inline instructions string. Overrides file loading.
30
+ prompt_template: Override the default LLM prompt template.
31
+ temperature: LLM temperature (default: 0.0 for determinism).
32
+ llm: Per-skill LLMConfig override.
33
+ """
34
+
35
+ def __init__(
36
+ self,
37
+ name: str,
38
+ path: str | Path | None = None,
39
+ instructions: str | None = None,
40
+ prompt_template: str | None = None,
41
+ temperature: float = 0.0,
42
+ llm: LLMConfig | None = None,
43
+ ) -> None:
44
+ if not name:
45
+ raise ValueError("Skill requires a non-empty 'name'")
46
+
47
+ self.name = name
48
+ self.prompt_template = prompt_template
49
+ self.temperature = temperature
50
+ self.llm = llm
51
+
52
+ if instructions:
53
+ self.instructions = instructions
54
+ return
55
+
56
+ # Load from file
57
+ if path is not None:
58
+ skill_path = Path(path)
59
+ else:
60
+ skill_path = _BUILT_IN_SKILLS_DIR / f"{name}.md"
61
+
62
+ if not skill_path.exists():
63
+ raise FileNotFoundError(
64
+ f"Skill {name!r}: file not found at {skill_path}. "
65
+ f"Create {skill_path} or pass path= to point to your skill file."
66
+ )
67
+
68
+ self.instructions = skill_path.read_text().strip()
69
+ if not self.instructions:
70
+ raise ValueError(f"Skill {name!r}: file {skill_path} is empty")
@@ -0,0 +1,29 @@
1
+ You classify HTTP API errors as "client", "server", or "undecided".
2
+
3
+ You only see errors that were NOT already classified by deterministic rules.
4
+ The obvious cases (4xx status codes, 502/503/504, HTML error pages) are already handled.
5
+ You are the fallback for ambiguous errors
6
+
7
+ ## How to decide
8
+
9
+ **Client errors** — the request itself is the problem:
10
+ - Error message mentions the request: "invalid parameter", "model not found", "unsupported format"
11
+ - The body contains a structured error response with a type like "invalid_request_error" or "validation_error"
12
+ - The error would go away if the client fixed their request
13
+ - Rate limiting, authentication failures, quota exceeded
14
+
15
+ **Server errors** — the server is the problem:
16
+ - Resource exhaustion: out of memory, disk full, too many connections
17
+ - Internal failures: null pointer, assertion failed, stack overflow
18
+ - Dependency failures: database connection lost, upstream timeout
19
+ - The same request would succeed if retried later or against a different server
20
+
21
+ ## Edge cases
22
+
23
+ - A 500 with "model not found" is **client** — the user asked for something that doesn't exist
24
+ - A 500 with "CUDA out of memory" is **server** — GPU resource exhaustion
25
+ - A 500 with no body or generic "Internal Server Error" is **server** — no evidence of client fault
26
+ - A 500 with a JSON error response containing a request validation message is **client**
27
+
28
+ If you have reasonable evidence, classify as "client" or "server".
29
+ If the signal is truly ambiguous with no useful evidence, classify as "undecided".