errorsense 0.1.2__tar.gz → 0.2.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {errorsense-0.1.2 → errorsense-0.2.0}/.gitignore +0 -1
- {errorsense-0.1.2 → errorsense-0.2.0}/PKG-INFO +15 -10
- {errorsense-0.1.2 → errorsense-0.2.0}/README.md +14 -9
- {errorsense-0.1.2 → errorsense-0.2.0}/design/ERRORSENSE.md +11 -9
- {errorsense-0.1.2 → errorsense-0.2.0}/errorsense/__init__.py +1 -1
- {errorsense-0.1.2 → errorsense-0.2.0}/errorsense/engine.py +41 -37
- {errorsense-0.1.2 → errorsense-0.2.0}/errorsense/llm.py +18 -20
- {errorsense-0.1.2 → errorsense-0.2.0}/errorsense/models.py +9 -6
- {errorsense-0.1.2 → errorsense-0.2.0}/errorsense/phase.py +5 -33
- {errorsense-0.1.2 → errorsense-0.2.0}/errorsense/presets/http_gateway.py +2 -2
- {errorsense-0.1.2 → errorsense-0.2.0}/errorsense/skill.py +0 -7
- errorsense-0.2.0/errorsense/skills/reclassification.md +21 -0
- {errorsense-0.1.2 → errorsense-0.2.0}/pyproject.toml +1 -1
- {errorsense-0.1.2 → errorsense-0.2.0}/tests/test_engine.py +25 -25
- {errorsense-0.1.2 → errorsense-0.2.0}/tests/test_tracker.py +5 -10
- errorsense-0.1.2/errorsense/skills/reclassification.md +0 -9
- {errorsense-0.1.2 → errorsense-0.2.0}/LICENSE +0 -0
- {errorsense-0.1.2 → errorsense-0.2.0}/errorsense/presets/__init__.py +0 -0
- {errorsense-0.1.2 → errorsense-0.2.0}/errorsense/py.typed +0 -0
- {errorsense-0.1.2 → errorsense-0.2.0}/errorsense/ruleset.py +0 -0
- {errorsense-0.1.2 → errorsense-0.2.0}/errorsense/signal.py +0 -0
- {errorsense-0.1.2 → errorsense-0.2.0}/errorsense/skills/http_classifier.md +0 -0
- {errorsense-0.1.2 → errorsense-0.2.0}/tests/test_llm_config.py +0 -0
- {errorsense-0.1.2 → errorsense-0.2.0}/tests/test_ruleset.py +0 -0
- {errorsense-0.1.2 → errorsense-0.2.0}/tests/test_signal.py +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: errorsense
|
|
3
|
-
Version: 0.
|
|
3
|
+
Version: 0.2.0
|
|
4
4
|
Summary: Error classification engine. Rules for the obvious, AI for the ambiguous.
|
|
5
5
|
Project-URL: Homepage, https://github.com/OpenGPU-Network/errorsense
|
|
6
6
|
Project-URL: Documentation, https://github.com/OpenGPU-Network/errorsense#readme
|
|
@@ -54,7 +54,7 @@ results = sense.classify(Signal.from_http(status_code=500, body="model not found
|
|
|
54
54
|
results[0].label # "client" (LLM figured it out)
|
|
55
55
|
```
|
|
56
56
|
|
|
57
|
-
The `http` preset gives you a 3-phase pipeline (rules → patterns → LLM) with 3
|
|
57
|
+
The `http` preset gives you a 3-phase pipeline (rules → patterns → LLM) with 3 labels: `"client"`, `"server"`, `"undecided"`. Rulesets handle obvious cases instantly. LLM handles the ambiguous ones.
|
|
58
58
|
|
|
59
59
|
Don't want LLM? Use `http_no_llm()` — rulesets only, ambiguous errors come back as `"undecided"`.
|
|
60
60
|
|
|
@@ -67,7 +67,7 @@ from errorsense import ErrorSense, Phase, Ruleset, Skill, LLMConfig, Signal
|
|
|
67
67
|
|
|
68
68
|
# Rulesets + LLM
|
|
69
69
|
sense = ErrorSense(
|
|
70
|
-
|
|
70
|
+
labels=["transient", "permanent", "user"],
|
|
71
71
|
pipeline=[
|
|
72
72
|
Phase("codes", rulesets=[
|
|
73
73
|
Ruleset(field="error_code", match={
|
|
@@ -89,7 +89,7 @@ sense = ErrorSense(
|
|
|
89
89
|
|
|
90
90
|
# Rulesets only — no LLM needed
|
|
91
91
|
sense = ErrorSense(
|
|
92
|
-
|
|
92
|
+
labels=["client", "server"],
|
|
93
93
|
pipeline=[
|
|
94
94
|
Phase("rules", rulesets=[
|
|
95
95
|
Ruleset(field="status_code", match={"4xx": "client", 502: "server"}),
|
|
@@ -100,7 +100,7 @@ sense = ErrorSense(
|
|
|
100
100
|
|
|
101
101
|
# LLM only — skip rulesets entirely
|
|
102
102
|
sense = ErrorSense(
|
|
103
|
-
|
|
103
|
+
labels=["client", "server"],
|
|
104
104
|
pipeline=[
|
|
105
105
|
Phase("llm", skills=[
|
|
106
106
|
Skill("my_classifier", path="./skills/my_classifier.md"),
|
|
@@ -162,20 +162,25 @@ results[0].reason # "ECONNRESET indicates transient network failure"
|
|
|
162
162
|
|
|
163
163
|
## Trailing (Stateful Error Tracking)
|
|
164
164
|
|
|
165
|
-
Track errors per key. When a threshold is hit,
|
|
165
|
+
Track errors per key. When a threshold is hit, optionally have an LLM review the full error history.
|
|
166
166
|
|
|
167
167
|
```python
|
|
168
|
-
from errorsense import TrailingConfig
|
|
168
|
+
from errorsense import LLMConfig, TrailingConfig
|
|
169
169
|
|
|
170
|
+
# With LLM review at threshold
|
|
170
171
|
sense = ErrorSense(
|
|
171
|
-
|
|
172
|
+
labels=["transient", "permanent", "user"],
|
|
172
173
|
pipeline=[...],
|
|
173
174
|
trailing=TrailingConfig(
|
|
174
175
|
threshold=3,
|
|
175
176
|
count_labels=["transient", "permanent"], # user errors don't count
|
|
177
|
+
reviewer_llm=LLMConfig(), # enables LLM review
|
|
176
178
|
),
|
|
177
179
|
)
|
|
178
180
|
|
|
181
|
+
# Without LLM review (just counting)
|
|
182
|
+
trailing=TrailingConfig(threshold=3, count_labels=["transient", "permanent"])
|
|
183
|
+
|
|
179
184
|
# In your error handler:
|
|
180
185
|
result = sense.trail("service-a", signal)
|
|
181
186
|
result.label # "transient"
|
|
@@ -189,9 +194,9 @@ sense.reset("service-a")
|
|
|
189
194
|
**How it works:**
|
|
190
195
|
- Each `trail()` call classifies the signal normally through the pipeline
|
|
191
196
|
- Counted labels accumulate per key toward the threshold
|
|
192
|
-
- At threshold, the LLM reviews all recorded errors
|
|
197
|
+
- At threshold, the LLM reviews all recorded errors (if `reviewer_llm` is set)
|
|
193
198
|
- If the review changes the label, the history entry is corrected and the count adjusts
|
|
194
|
-
- `
|
|
199
|
+
- `reviewer_skill=Skill(...)` lets you override the default review instructions
|
|
195
200
|
|
|
196
201
|
**Manual review anytime:**
|
|
197
202
|
|
|
@@ -33,7 +33,7 @@ results = sense.classify(Signal.from_http(status_code=500, body="model not found
|
|
|
33
33
|
results[0].label # "client" (LLM figured it out)
|
|
34
34
|
```
|
|
35
35
|
|
|
36
|
-
The `http` preset gives you a 3-phase pipeline (rules → patterns → LLM) with 3
|
|
36
|
+
The `http` preset gives you a 3-phase pipeline (rules → patterns → LLM) with 3 labels: `"client"`, `"server"`, `"undecided"`. Rulesets handle obvious cases instantly. LLM handles the ambiguous ones.
|
|
37
37
|
|
|
38
38
|
Don't want LLM? Use `http_no_llm()` — rulesets only, ambiguous errors come back as `"undecided"`.
|
|
39
39
|
|
|
@@ -46,7 +46,7 @@ from errorsense import ErrorSense, Phase, Ruleset, Skill, LLMConfig, Signal
|
|
|
46
46
|
|
|
47
47
|
# Rulesets + LLM
|
|
48
48
|
sense = ErrorSense(
|
|
49
|
-
|
|
49
|
+
labels=["transient", "permanent", "user"],
|
|
50
50
|
pipeline=[
|
|
51
51
|
Phase("codes", rulesets=[
|
|
52
52
|
Ruleset(field="error_code", match={
|
|
@@ -68,7 +68,7 @@ sense = ErrorSense(
|
|
|
68
68
|
|
|
69
69
|
# Rulesets only — no LLM needed
|
|
70
70
|
sense = ErrorSense(
|
|
71
|
-
|
|
71
|
+
labels=["client", "server"],
|
|
72
72
|
pipeline=[
|
|
73
73
|
Phase("rules", rulesets=[
|
|
74
74
|
Ruleset(field="status_code", match={"4xx": "client", 502: "server"}),
|
|
@@ -79,7 +79,7 @@ sense = ErrorSense(
|
|
|
79
79
|
|
|
80
80
|
# LLM only — skip rulesets entirely
|
|
81
81
|
sense = ErrorSense(
|
|
82
|
-
|
|
82
|
+
labels=["client", "server"],
|
|
83
83
|
pipeline=[
|
|
84
84
|
Phase("llm", skills=[
|
|
85
85
|
Skill("my_classifier", path="./skills/my_classifier.md"),
|
|
@@ -141,20 +141,25 @@ results[0].reason # "ECONNRESET indicates transient network failure"
|
|
|
141
141
|
|
|
142
142
|
## Trailing (Stateful Error Tracking)
|
|
143
143
|
|
|
144
|
-
Track errors per key. When a threshold is hit,
|
|
144
|
+
Track errors per key. When a threshold is hit, optionally have an LLM review the full error history.
|
|
145
145
|
|
|
146
146
|
```python
|
|
147
|
-
from errorsense import TrailingConfig
|
|
147
|
+
from errorsense import LLMConfig, TrailingConfig
|
|
148
148
|
|
|
149
|
+
# With LLM review at threshold
|
|
149
150
|
sense = ErrorSense(
|
|
150
|
-
|
|
151
|
+
labels=["transient", "permanent", "user"],
|
|
151
152
|
pipeline=[...],
|
|
152
153
|
trailing=TrailingConfig(
|
|
153
154
|
threshold=3,
|
|
154
155
|
count_labels=["transient", "permanent"], # user errors don't count
|
|
156
|
+
reviewer_llm=LLMConfig(), # enables LLM review
|
|
155
157
|
),
|
|
156
158
|
)
|
|
157
159
|
|
|
160
|
+
# Without LLM review (just counting)
|
|
161
|
+
trailing=TrailingConfig(threshold=3, count_labels=["transient", "permanent"])
|
|
162
|
+
|
|
158
163
|
# In your error handler:
|
|
159
164
|
result = sense.trail("service-a", signal)
|
|
160
165
|
result.label # "transient"
|
|
@@ -168,9 +173,9 @@ sense.reset("service-a")
|
|
|
168
173
|
**How it works:**
|
|
169
174
|
- Each `trail()` call classifies the signal normally through the pipeline
|
|
170
175
|
- Counted labels accumulate per key toward the threshold
|
|
171
|
-
- At threshold, the LLM reviews all recorded errors
|
|
176
|
+
- At threshold, the LLM reviews all recorded errors (if `reviewer_llm` is set)
|
|
172
177
|
- If the review changes the label, the history entry is corrected and the count adjusts
|
|
173
|
-
- `
|
|
178
|
+
- `reviewer_skill=Skill(...)` lets you override the default review instructions
|
|
174
179
|
|
|
175
180
|
**Manual review anytime:**
|
|
176
181
|
|
|
@@ -104,7 +104,7 @@ Runs signals through a pipeline of phases.
|
|
|
104
104
|
from errorsense import ErrorSense, Phase, Ruleset, Skill, LLMConfig
|
|
105
105
|
|
|
106
106
|
sense = ErrorSense(
|
|
107
|
-
|
|
107
|
+
labels=["client", "server", "undecided"],
|
|
108
108
|
pipeline=[
|
|
109
109
|
Phase("rules", rulesets=[...]),
|
|
110
110
|
Phase("patterns", rulesets=[...]),
|
|
@@ -118,7 +118,7 @@ sense = ErrorSense(
|
|
|
118
118
|
|
|
119
119
|
```python
|
|
120
120
|
sense = ErrorSense(
|
|
121
|
-
|
|
121
|
+
labels=["client", "server"],
|
|
122
122
|
rulesets=[Ruleset(...)],
|
|
123
123
|
skills=[Skill(...)],
|
|
124
124
|
llm=LLMConfig(...),
|
|
@@ -160,13 +160,14 @@ Track errors per key with threshold-based LLM review.
|
|
|
160
160
|
from errorsense import TrailingConfig
|
|
161
161
|
|
|
162
162
|
sense = ErrorSense(
|
|
163
|
-
|
|
163
|
+
labels=["client", "server", "undecided"],
|
|
164
164
|
pipeline=[...],
|
|
165
165
|
trailing=TrailingConfig(
|
|
166
166
|
threshold=3,
|
|
167
167
|
count_labels=["server"],
|
|
168
168
|
history_size=10,
|
|
169
|
-
|
|
169
|
+
reviewer_llm=LLMConfig(), # enables LLM review at threshold
|
|
170
|
+
reviewer_skill=Skill("custom"), # optional, defaults to built-in reclassification.md
|
|
170
171
|
),
|
|
171
172
|
)
|
|
172
173
|
|
|
@@ -187,9 +188,9 @@ sense.reset("service-a")
|
|
|
187
188
|
5. `at_threshold` recalculates after any correction
|
|
188
189
|
|
|
189
190
|
**Review behavior:**
|
|
190
|
-
- `
|
|
191
|
-
- `
|
|
192
|
-
- `
|
|
191
|
+
- `reviewer_llm=LLMConfig(...)`: LLM reviews error history at threshold
|
|
192
|
+
- `reviewer_llm=None` (default): no review, just count
|
|
193
|
+
- `reviewer_skill=Skill(...)`: override the default review instructions
|
|
193
194
|
|
|
194
195
|
**Manual review:** `sense.review(key)` / `await sense.async_review(key)` — LLM reviews full history anytime.
|
|
195
196
|
|
|
@@ -222,7 +223,8 @@ class TrailingConfig:
|
|
|
222
223
|
threshold: int = 3
|
|
223
224
|
count_labels: list[str] | None = None
|
|
224
225
|
history_size: int = 10
|
|
225
|
-
|
|
226
|
+
reviewer_llm: LLMConfig | None = None
|
|
227
|
+
reviewer_skill: Skill | None = None
|
|
226
228
|
```
|
|
227
229
|
|
|
228
230
|
---
|
|
@@ -260,7 +262,7 @@ Every `classify()` call is wrapped in try/except at the phase level. Exceptions
|
|
|
260
262
|
|
|
261
263
|
### Validation (at construction)
|
|
262
264
|
|
|
263
|
-
- Labels in rulesets must be in `
|
|
265
|
+
- Labels in rulesets must be in `labels` or `default`
|
|
264
266
|
- Phases must have rulesets OR (skills + llm)
|
|
265
267
|
- LLM phases must have an API key in LLMConfig
|
|
266
268
|
- `pipeline=` and `rulesets=/skills=` cannot be mixed
|
|
@@ -10,7 +10,7 @@ import time
|
|
|
10
10
|
from collections import defaultdict, deque
|
|
11
11
|
from typing import Any, Callable
|
|
12
12
|
|
|
13
|
-
from errorsense.llm import LLMConfig
|
|
13
|
+
from errorsense.llm import LLMClient, LLMConfig
|
|
14
14
|
from errorsense.models import SenseResult, TrailResult, TrailingConfig
|
|
15
15
|
from errorsense.phase import Phase
|
|
16
16
|
from errorsense.ruleset import Ruleset
|
|
@@ -31,7 +31,7 @@ class ErrorSense:
|
|
|
31
31
|
|
|
32
32
|
def __init__(
|
|
33
33
|
self,
|
|
34
|
-
|
|
34
|
+
labels: list[str],
|
|
35
35
|
# Explicit mode
|
|
36
36
|
pipeline: list[Phase] | None = None,
|
|
37
37
|
# Implicit mode
|
|
@@ -44,7 +44,7 @@ class ErrorSense:
|
|
|
44
44
|
on_classify: Callable[[Signal, SenseResult], Any] | None = None,
|
|
45
45
|
on_error: Callable[[str, Exception], Any] | None = None,
|
|
46
46
|
) -> None:
|
|
47
|
-
self.
|
|
47
|
+
self.labels = set(labels)
|
|
48
48
|
self.default = default
|
|
49
49
|
self._on_classify = on_classify
|
|
50
50
|
self._on_error = on_error
|
|
@@ -60,26 +60,22 @@ class ErrorSense:
|
|
|
60
60
|
|
|
61
61
|
self._validate_phase_names()
|
|
62
62
|
self._pipeline_names = frozenset(p.name for p in self._pipeline)
|
|
63
|
-
self.
|
|
63
|
+
self._validate_labels()
|
|
64
64
|
self._validate_llm_api_keys()
|
|
65
65
|
for phase in self._pipeline:
|
|
66
|
-
phase.
|
|
66
|
+
phase.set_labels(list(labels))
|
|
67
67
|
|
|
68
68
|
# Trailing state
|
|
69
69
|
self._trailing = trailing
|
|
70
|
-
self.
|
|
71
|
-
self.
|
|
70
|
+
self._reviewer_client: LLMClient | None = None
|
|
71
|
+
self._reviewer_skill: Skill | None = None
|
|
72
72
|
if trailing:
|
|
73
73
|
self._init_trailing(trailing)
|
|
74
74
|
|
|
75
75
|
def _init_trailing(self, config: TrailingConfig) -> None:
|
|
76
|
-
if config.
|
|
77
|
-
|
|
78
|
-
|
|
79
|
-
)
|
|
80
|
-
self._review_enabled = (
|
|
81
|
-
config.review if config.review is not None else self._has_llm
|
|
82
|
-
)
|
|
76
|
+
if config.reviewer_llm is not None:
|
|
77
|
+
self._reviewer_client = LLMClient(config.reviewer_llm)
|
|
78
|
+
self._reviewer_skill = config.reviewer_skill
|
|
83
79
|
self._threshold = config.threshold
|
|
84
80
|
self._count_labels = set(config.count_labels or [])
|
|
85
81
|
hs = config.history_size
|
|
@@ -106,11 +102,15 @@ class ErrorSense:
|
|
|
106
102
|
"""Close all LLM phase clients (sync)."""
|
|
107
103
|
for phase in self._pipeline:
|
|
108
104
|
phase.close_sync()
|
|
105
|
+
if self._reviewer_client:
|
|
106
|
+
self._reviewer_client.close_sync()
|
|
109
107
|
|
|
110
108
|
async def async_close(self) -> None:
|
|
111
109
|
"""Close all LLM phase clients (async)."""
|
|
112
110
|
for phase in self._pipeline:
|
|
113
111
|
await phase.close_async()
|
|
112
|
+
if self._reviewer_client:
|
|
113
|
+
await self._reviewer_client.close_async()
|
|
114
114
|
|
|
115
115
|
async def __aenter__(self) -> ErrorSense:
|
|
116
116
|
return self
|
|
@@ -204,7 +204,7 @@ class ErrorSense:
|
|
|
204
204
|
at_threshold = self._record_and_check(key, signal, result)
|
|
205
205
|
review_result = (
|
|
206
206
|
self._run_review_sync(key)
|
|
207
|
-
if at_threshold and self.
|
|
207
|
+
if at_threshold and self._reviewer_client else None
|
|
208
208
|
)
|
|
209
209
|
return self._build_trail_result(key, result, at_threshold, review_result)
|
|
210
210
|
|
|
@@ -224,16 +224,24 @@ class ErrorSense:
|
|
|
224
224
|
at_threshold = self._record_and_check(key, signal, result)
|
|
225
225
|
review_result = (
|
|
226
226
|
await self._run_review_async(key)
|
|
227
|
-
if at_threshold and self.
|
|
227
|
+
if at_threshold and self._reviewer_client else None
|
|
228
228
|
)
|
|
229
229
|
return self._build_trail_result(key, result, at_threshold, review_result)
|
|
230
230
|
|
|
231
231
|
def review(self, key: str) -> SenseResult | None:
|
|
232
232
|
"""Manually review full history for a key (sync). Returns LLM verdict."""
|
|
233
|
+
if not self._trailing:
|
|
234
|
+
raise RuntimeError(
|
|
235
|
+
"Trailing not configured. Pass trailing=TrailingConfig(...) to ErrorSense."
|
|
236
|
+
)
|
|
233
237
|
return self._run_review_sync(key)
|
|
234
238
|
|
|
235
239
|
async def async_review(self, key: str) -> SenseResult | None:
|
|
236
240
|
"""Manually review full history for a key (async). Returns LLM verdict."""
|
|
241
|
+
if not self._trailing:
|
|
242
|
+
raise RuntimeError(
|
|
243
|
+
"Trailing not configured. Pass trailing=TrailingConfig(...) to ErrorSense."
|
|
244
|
+
)
|
|
237
245
|
return self._run_review_async(key)
|
|
238
246
|
|
|
239
247
|
def _record_and_check(self, key: str, signal: Signal, result: SenseResult) -> bool:
|
|
@@ -297,37 +305,33 @@ class ErrorSense:
|
|
|
297
305
|
self._counts[key][new_label] += 1
|
|
298
306
|
|
|
299
307
|
def _run_review_sync(self, key: str) -> SenseResult | None:
|
|
300
|
-
|
|
301
|
-
if not llm_phase:
|
|
308
|
+
if not self._reviewer_client:
|
|
302
309
|
return None
|
|
303
310
|
signal, skill = self._build_review_context(key)
|
|
304
311
|
try:
|
|
305
|
-
return
|
|
312
|
+
return self._reviewer_client.classify_sync(
|
|
313
|
+
signal, skill, list(self.labels), include_reason=True,
|
|
314
|
+
)
|
|
306
315
|
except Exception as e:
|
|
307
316
|
logger.warning("LLM review failed: %s", e)
|
|
308
317
|
return None
|
|
309
318
|
|
|
310
319
|
async def _run_review_async(self, key: str) -> SenseResult | None:
|
|
311
|
-
|
|
312
|
-
if not llm_phase:
|
|
320
|
+
if not self._reviewer_client:
|
|
313
321
|
return None
|
|
314
322
|
signal, skill = self._build_review_context(key)
|
|
315
323
|
try:
|
|
316
|
-
return await
|
|
324
|
+
return await self._reviewer_client.classify_async(
|
|
325
|
+
signal, skill, list(self.labels), include_reason=True,
|
|
326
|
+
)
|
|
317
327
|
except Exception as e:
|
|
318
328
|
logger.warning("LLM review failed: %s", e)
|
|
319
329
|
return None
|
|
320
330
|
|
|
321
|
-
def
|
|
322
|
-
|
|
323
|
-
|
|
324
|
-
|
|
325
|
-
return None
|
|
326
|
-
|
|
327
|
-
def _get_reclass_skill(self) -> Skill:
|
|
328
|
-
if self._reclass_skill is None:
|
|
329
|
-
self._reclass_skill = Skill("reclassification")
|
|
330
|
-
return self._reclass_skill
|
|
331
|
+
def _get_reviewer_skill(self) -> Skill:
|
|
332
|
+
if self._reviewer_skill is None:
|
|
333
|
+
self._reviewer_skill = Skill("reclassification")
|
|
334
|
+
return self._reviewer_skill
|
|
331
335
|
|
|
332
336
|
def _build_review_context(self, key: str) -> tuple[Signal, Skill]:
|
|
333
337
|
history = list(self._history[key])
|
|
@@ -341,7 +345,7 @@ class ErrorSense:
|
|
|
341
345
|
"key": key,
|
|
342
346
|
"history_summary": summary,
|
|
343
347
|
})
|
|
344
|
-
return signal, self.
|
|
348
|
+
return signal, self._get_reviewer_skill()
|
|
345
349
|
|
|
346
350
|
def reset(self, key: str) -> None:
|
|
347
351
|
"""Clear trailing history and counts for a key."""
|
|
@@ -395,15 +399,15 @@ class ErrorSense:
|
|
|
395
399
|
raise ValueError(f"Duplicate phase name: {phase.name!r}")
|
|
396
400
|
seen.add(phase.name)
|
|
397
401
|
|
|
398
|
-
def
|
|
399
|
-
|
|
402
|
+
def _validate_labels(self) -> None:
|
|
403
|
+
all_labels = self.labels | {self.default}
|
|
400
404
|
for phase in self._pipeline:
|
|
401
405
|
for ruleset in phase.rulesets:
|
|
402
|
-
bad = ruleset.referenced_labels() -
|
|
406
|
+
bad = ruleset.referenced_labels() - all_labels
|
|
403
407
|
if bad:
|
|
404
408
|
raise ValueError(
|
|
405
409
|
f"Ruleset on field {getattr(ruleset, 'field', '?')!r} maps to "
|
|
406
|
-
f"label {bad.pop()!r} not in {sorted(self.
|
|
410
|
+
f"label {bad.pop()!r} not in {sorted(self.labels)}"
|
|
407
411
|
)
|
|
408
412
|
|
|
409
413
|
def _validate_llm_api_keys(self) -> None:
|
|
@@ -22,7 +22,7 @@ DEFAULT_BASE_URL = "https://relay.opengpu.network/v2/openai/v1"
|
|
|
22
22
|
DEFAULT_MODEL = "Qwen/Qwen3.5-397B-A17B-FP8"
|
|
23
23
|
DEFAULT_PROMPT_TEMPLATE = (
|
|
24
24
|
"{instructions}\n\n"
|
|
25
|
-
"Classify the following error signal into exactly one of these
|
|
25
|
+
"Classify the following error signal into exactly one of these labels: {labels}\n\n"
|
|
26
26
|
"Signal data:\n{signal}\n\n"
|
|
27
27
|
'Reply ONLY with JSON: {{"label": "...", "confidence": 0.0, "reason": "..."}}'
|
|
28
28
|
)
|
|
@@ -54,7 +54,7 @@ class LLMConfig:
|
|
|
54
54
|
object.__setattr__(self, "base_url", os.environ.get("ERRORSENSE_LLM_URL", DEFAULT_BASE_URL))
|
|
55
55
|
|
|
56
56
|
|
|
57
|
-
def _build_prompt(signal: Signal, skill: Skill,
|
|
57
|
+
def _build_prompt(signal: Signal, skill: Skill, labels: list[str], config: LLMConfig) -> str:
|
|
58
58
|
signal_text = json.dumps(signal.to_dict(), default=str)
|
|
59
59
|
if len(signal_text) > config.max_signal_size:
|
|
60
60
|
signal_text = signal_text[: config.max_signal_size] + "..."
|
|
@@ -62,7 +62,7 @@ def _build_prompt(signal: Signal, skill: Skill, categories: list[str], config: L
|
|
|
62
62
|
template = skill.prompt_template or DEFAULT_PROMPT_TEMPLATE
|
|
63
63
|
return template.format(
|
|
64
64
|
instructions=skill.instructions,
|
|
65
|
-
|
|
65
|
+
labels=", ".join(labels) if labels else "unknown",
|
|
66
66
|
signal=signal_text,
|
|
67
67
|
)
|
|
68
68
|
|
|
@@ -84,7 +84,7 @@ def _build_headers(config: LLMConfig) -> dict:
|
|
|
84
84
|
|
|
85
85
|
def _parse_response(
|
|
86
86
|
data: dict,
|
|
87
|
-
|
|
87
|
+
labels: list[str],
|
|
88
88
|
skill_name: str,
|
|
89
89
|
include_reason: bool = False,
|
|
90
90
|
) -> SenseResult | None:
|
|
@@ -96,11 +96,11 @@ def _parse_response(
|
|
|
96
96
|
content = content.rsplit("```", 1)[0]
|
|
97
97
|
parsed = json.loads(content.strip())
|
|
98
98
|
|
|
99
|
-
label = parsed.get("label", "")
|
|
99
|
+
label = parsed.get("label", "")
|
|
100
100
|
confidence = min(1.0, max(0.0, float(parsed.get("confidence", 0.7))))
|
|
101
101
|
reason = parsed.get("reason") if include_reason else None
|
|
102
102
|
|
|
103
|
-
if
|
|
103
|
+
if labels and label not in labels:
|
|
104
104
|
logger.warning(
|
|
105
105
|
"Skill %r: LLM returned unknown label %r", skill_name, label
|
|
106
106
|
)
|
|
@@ -154,19 +154,18 @@ class LLMClient:
|
|
|
154
154
|
self,
|
|
155
155
|
signal: Signal,
|
|
156
156
|
skill: Skill,
|
|
157
|
-
|
|
157
|
+
labels: list[str],
|
|
158
158
|
include_reason: bool = False,
|
|
159
159
|
) -> SenseResult | None:
|
|
160
|
-
|
|
161
|
-
|
|
162
|
-
url = f"{config.base_url.rstrip('/')}/chat/completions"
|
|
160
|
+
prompt = _build_prompt(signal, skill, labels, self._config)
|
|
161
|
+
url = f"{self._config.base_url.rstrip('/')}/chat/completions"
|
|
163
162
|
|
|
164
163
|
try:
|
|
165
164
|
client = self._get_sync_client()
|
|
166
165
|
resp = client.post(
|
|
167
166
|
url,
|
|
168
|
-
headers=_build_headers(
|
|
169
|
-
json=_build_request_body(skill, prompt,
|
|
167
|
+
headers=_build_headers(self._config),
|
|
168
|
+
json=_build_request_body(skill, prompt, self._config),
|
|
170
169
|
)
|
|
171
170
|
resp.raise_for_status()
|
|
172
171
|
data = resp.json()
|
|
@@ -174,25 +173,24 @@ class LLMClient:
|
|
|
174
173
|
logger.warning("LLM call failed for skill %r: %s", skill.name, e)
|
|
175
174
|
return None
|
|
176
175
|
|
|
177
|
-
return _parse_response(data,
|
|
176
|
+
return _parse_response(data, labels, skill.name, include_reason)
|
|
178
177
|
|
|
179
178
|
async def classify_async(
|
|
180
179
|
self,
|
|
181
180
|
signal: Signal,
|
|
182
181
|
skill: Skill,
|
|
183
|
-
|
|
182
|
+
labels: list[str],
|
|
184
183
|
include_reason: bool = False,
|
|
185
184
|
) -> SenseResult | None:
|
|
186
|
-
|
|
187
|
-
|
|
188
|
-
url = f"{config.base_url.rstrip('/')}/chat/completions"
|
|
185
|
+
prompt = _build_prompt(signal, skill, labels, self._config)
|
|
186
|
+
url = f"{self._config.base_url.rstrip('/')}/chat/completions"
|
|
189
187
|
|
|
190
188
|
try:
|
|
191
189
|
client = await self._get_async_client()
|
|
192
190
|
resp = await client.post(
|
|
193
191
|
url,
|
|
194
|
-
headers=_build_headers(
|
|
195
|
-
json=_build_request_body(skill, prompt,
|
|
192
|
+
headers=_build_headers(self._config),
|
|
193
|
+
json=_build_request_body(skill, prompt, self._config),
|
|
196
194
|
)
|
|
197
195
|
resp.raise_for_status()
|
|
198
196
|
data = resp.json()
|
|
@@ -200,7 +198,7 @@ class LLMClient:
|
|
|
200
198
|
logger.warning("LLM call failed for skill %r: %s", skill.name, e)
|
|
201
199
|
return None
|
|
202
200
|
|
|
203
|
-
return _parse_response(data,
|
|
201
|
+
return _parse_response(data, labels, skill.name, include_reason)
|
|
204
202
|
|
|
205
203
|
def close_sync(self) -> None:
|
|
206
204
|
if self._sync_client is not None:
|
|
@@ -1,7 +1,11 @@
|
|
|
1
1
|
from __future__ import annotations
|
|
2
2
|
|
|
3
3
|
from dataclasses import dataclass
|
|
4
|
-
from typing import Any
|
|
4
|
+
from typing import TYPE_CHECKING, Any
|
|
5
|
+
|
|
6
|
+
if TYPE_CHECKING:
|
|
7
|
+
from errorsense.llm import LLMConfig
|
|
8
|
+
from errorsense.skill import Skill
|
|
5
9
|
|
|
6
10
|
|
|
7
11
|
@dataclass(frozen=True)
|
|
@@ -40,13 +44,12 @@ class TrailingConfig:
|
|
|
40
44
|
threshold: Number of counted errors before review triggers.
|
|
41
45
|
count_labels: Only these labels count toward threshold.
|
|
42
46
|
history_size: Max errors kept per key (ring buffer).
|
|
43
|
-
|
|
44
|
-
|
|
45
|
-
True = force (raises if no LLM phase).
|
|
46
|
-
False = never.
|
|
47
|
+
reviewer_llm: LLM config for review. Set to enable review, None to disable.
|
|
48
|
+
reviewer_skill: Custom review skill. Defaults to built-in reclassification.
|
|
47
49
|
"""
|
|
48
50
|
|
|
49
51
|
threshold: int = 3
|
|
50
52
|
count_labels: list[str] | None = None
|
|
51
53
|
history_size: int = 10
|
|
52
|
-
|
|
54
|
+
reviewer_llm: LLMConfig | None = None
|
|
55
|
+
reviewer_skill: Skill | None = None
|
|
@@ -61,14 +61,14 @@ class Phase:
|
|
|
61
61
|
self.skills = skills or []
|
|
62
62
|
self.llm = llm
|
|
63
63
|
self.is_llm_phase = has_skills
|
|
64
|
-
self.
|
|
64
|
+
self._labels: list[str] = []
|
|
65
65
|
self._llm_client: LLMClient | None = None
|
|
66
66
|
|
|
67
67
|
if self.is_llm_phase and llm:
|
|
68
68
|
self._llm_client = LLMClient(llm)
|
|
69
69
|
|
|
70
|
-
def
|
|
71
|
-
self.
|
|
70
|
+
def set_labels(self, labels: list[str]) -> None:
|
|
71
|
+
self._labels = list(labels)
|
|
72
72
|
|
|
73
73
|
def classify(self, signal: Signal, explain: bool = False) -> SenseResult | None:
|
|
74
74
|
"""Sync classification. Full pipeline — rulesets or LLM."""
|
|
@@ -136,38 +136,10 @@ class Phase:
|
|
|
136
136
|
return best
|
|
137
137
|
|
|
138
138
|
def _run_one_skill_sync(self, signal: Signal, skill: Skill, explain: bool) -> SenseResult | None:
|
|
139
|
-
|
|
140
|
-
client = LLMClient(skill.llm)
|
|
141
|
-
try:
|
|
142
|
-
return client.classify_sync(signal, skill, self._categories, include_reason=explain)
|
|
143
|
-
finally:
|
|
144
|
-
client.close_sync()
|
|
145
|
-
return self._llm_client.classify_sync(signal, skill, self._categories, include_reason=explain)
|
|
139
|
+
return self._llm_client.classify_sync(signal, skill, self._labels, include_reason=explain)
|
|
146
140
|
|
|
147
141
|
async def _run_one_skill_async(self, signal: Signal, skill: Skill, explain: bool) -> SenseResult | None:
|
|
148
|
-
|
|
149
|
-
client = LLMClient(skill.llm)
|
|
150
|
-
try:
|
|
151
|
-
return await client.classify_async(signal, skill, self._categories, include_reason=explain)
|
|
152
|
-
finally:
|
|
153
|
-
await client.close_async()
|
|
154
|
-
return await self._llm_client.classify_async(signal, skill, self._categories, include_reason=explain)
|
|
155
|
-
|
|
156
|
-
def run_llm_call(
|
|
157
|
-
self, signal: Signal, skill: Skill, categories: list[str],
|
|
158
|
-
) -> SenseResult | None:
|
|
159
|
-
"""Run a single sync LLM call. Public API for Tracker reclassification."""
|
|
160
|
-
if not self._llm_client:
|
|
161
|
-
return None
|
|
162
|
-
return self._llm_client.classify_sync(signal, skill, categories, include_reason=True)
|
|
163
|
-
|
|
164
|
-
async def async_run_llm_call(
|
|
165
|
-
self, signal: Signal, skill: Skill, categories: list[str],
|
|
166
|
-
) -> SenseResult | None:
|
|
167
|
-
"""Run a single async LLM call. Public API for Tracker reclassification."""
|
|
168
|
-
if not self._llm_client:
|
|
169
|
-
return None
|
|
170
|
-
return await self._llm_client.classify_async(signal, skill, categories, include_reason=True)
|
|
142
|
+
return await self._llm_client.classify_async(signal, skill, self._labels, include_reason=explain)
|
|
171
143
|
|
|
172
144
|
def _stamp_phase(self, result: SenseResult, skill_name: str) -> SenseResult:
|
|
173
145
|
updates: dict[str, Any] = {}
|
|
@@ -48,7 +48,7 @@ def http(
|
|
|
48
48
|
phases.append(Phase("llm", skills=[Skill("http_classifier")], llm=llm))
|
|
49
49
|
|
|
50
50
|
return ErrorSense(
|
|
51
|
-
|
|
51
|
+
labels=["client", "server", "undecided"],
|
|
52
52
|
pipeline=phases,
|
|
53
53
|
default="undecided",
|
|
54
54
|
)
|
|
@@ -66,7 +66,7 @@ def http_no_llm(
|
|
|
66
66
|
extra_rulesets: Additional rulesets appended to the patterns phase.
|
|
67
67
|
"""
|
|
68
68
|
return ErrorSense(
|
|
69
|
-
|
|
69
|
+
labels=["client", "server", "undecided"],
|
|
70
70
|
pipeline=_ruleset_phases(extra_rulesets),
|
|
71
71
|
default="undecided",
|
|
72
72
|
)
|
|
@@ -3,10 +3,6 @@
|
|
|
3
3
|
from __future__ import annotations
|
|
4
4
|
|
|
5
5
|
from pathlib import Path
|
|
6
|
-
from typing import TYPE_CHECKING
|
|
7
|
-
|
|
8
|
-
if TYPE_CHECKING:
|
|
9
|
-
from errorsense.llm import LLMConfig
|
|
10
6
|
|
|
11
7
|
__all__ = ["Skill"]
|
|
12
8
|
|
|
@@ -29,7 +25,6 @@ class Skill:
|
|
|
29
25
|
instructions: Inline instructions string. Overrides file loading.
|
|
30
26
|
prompt_template: Override the default LLM prompt template.
|
|
31
27
|
temperature: LLM temperature (default: 0.0 for determinism).
|
|
32
|
-
llm: Per-skill LLMConfig override.
|
|
33
28
|
"""
|
|
34
29
|
|
|
35
30
|
def __init__(
|
|
@@ -39,7 +34,6 @@ class Skill:
|
|
|
39
34
|
instructions: str | None = None,
|
|
40
35
|
prompt_template: str | None = None,
|
|
41
36
|
temperature: float = 0.0,
|
|
42
|
-
llm: LLMConfig | None = None,
|
|
43
37
|
) -> None:
|
|
44
38
|
if not name:
|
|
45
39
|
raise ValueError("Skill requires a non-empty 'name'")
|
|
@@ -47,7 +41,6 @@ class Skill:
|
|
|
47
41
|
self.name = name
|
|
48
42
|
self.prompt_template = prompt_template
|
|
49
43
|
self.temperature = temperature
|
|
50
|
-
self.llm = llm
|
|
51
44
|
|
|
52
45
|
if instructions:
|
|
53
46
|
self.instructions = instructions
|
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
You are reviewing a history of classified errors for a single key (e.g., a service or provider).
|
|
2
|
+
|
|
3
|
+
Each entry in the history has a label that was assigned by earlier classification. Your job is to review the full history and decide: is the most recent label correct, or should it be changed?
|
|
4
|
+
|
|
5
|
+
## How to decide
|
|
6
|
+
|
|
7
|
+
Look at the pattern across all entries:
|
|
8
|
+
- If the errors are consistent (all the same type), the label is probably correct
|
|
9
|
+
- If earlier errors were classified differently and the pattern suggests the latest one was misclassified, pick the label that better fits the overall pattern
|
|
10
|
+
- If the history shows a mix of genuine errors, keep the most recent label as-is
|
|
11
|
+
|
|
12
|
+
## Your output
|
|
13
|
+
|
|
14
|
+
Pick one of the allowed labels as your label. This must be one of the labels provided in the prompt — do not invent new ones.
|
|
15
|
+
|
|
16
|
+
Set confidence based on how clear the pattern is:
|
|
17
|
+
- 0.9+ if the history strongly supports your label
|
|
18
|
+
- 0.7-0.9 if the evidence is moderate
|
|
19
|
+
- Below 0.7 if the history is genuinely mixed
|
|
20
|
+
|
|
21
|
+
In your reason, briefly explain what pattern you saw and why you kept or changed the label.
|
|
@@ -25,7 +25,7 @@ class BrokenRuleset(Ruleset):
|
|
|
25
25
|
class TestExplicitMode:
|
|
26
26
|
def test_first_match_wins(self):
|
|
27
27
|
engine = ErrorSense(
|
|
28
|
-
|
|
28
|
+
labels=["a", "b"],
|
|
29
29
|
pipeline=[Phase("p1", rulesets=[AlwaysMatchRuleset("a"), AlwaysMatchRuleset("b")])],
|
|
30
30
|
)
|
|
31
31
|
results = engine.classify(Signal({"x": 1}))
|
|
@@ -34,7 +34,7 @@ class TestExplicitMode:
|
|
|
34
34
|
|
|
35
35
|
def test_skip_none_results(self):
|
|
36
36
|
engine = ErrorSense(
|
|
37
|
-
|
|
37
|
+
labels=["a"],
|
|
38
38
|
pipeline=[Phase("p1", rulesets=[NeverMatchRuleset(), AlwaysMatchRuleset("a")])],
|
|
39
39
|
)
|
|
40
40
|
results = engine.classify(Signal({"x": 1}))
|
|
@@ -42,7 +42,7 @@ class TestExplicitMode:
|
|
|
42
42
|
|
|
43
43
|
def test_default_when_no_match(self):
|
|
44
44
|
engine = ErrorSense(
|
|
45
|
-
|
|
45
|
+
labels=["a"],
|
|
46
46
|
pipeline=[Phase("p1", rulesets=[NeverMatchRuleset()])],
|
|
47
47
|
default="fallback",
|
|
48
48
|
)
|
|
@@ -52,7 +52,7 @@ class TestExplicitMode:
|
|
|
52
52
|
|
|
53
53
|
def test_skill_name_auto_filled(self):
|
|
54
54
|
engine = ErrorSense(
|
|
55
|
-
|
|
55
|
+
labels=["a"],
|
|
56
56
|
pipeline=[Phase("p1", rulesets=[AlwaysMatchRuleset("a")])],
|
|
57
57
|
)
|
|
58
58
|
results = engine.classify(Signal({"x": 1}))
|
|
@@ -61,7 +61,7 @@ class TestExplicitMode:
|
|
|
61
61
|
|
|
62
62
|
def test_broken_ruleset_skipped(self):
|
|
63
63
|
engine = ErrorSense(
|
|
64
|
-
|
|
64
|
+
labels=["a"],
|
|
65
65
|
pipeline=[Phase("p1", rulesets=[BrokenRuleset(), AlwaysMatchRuleset("a")])],
|
|
66
66
|
)
|
|
67
67
|
results = engine.classify(Signal({"x": 1}))
|
|
@@ -69,7 +69,7 @@ class TestExplicitMode:
|
|
|
69
69
|
|
|
70
70
|
def test_all_broken_falls_to_default(self):
|
|
71
71
|
engine = ErrorSense(
|
|
72
|
-
|
|
72
|
+
labels=["a"],
|
|
73
73
|
pipeline=[Phase("p1", rulesets=[BrokenRuleset()])],
|
|
74
74
|
default="oops",
|
|
75
75
|
)
|
|
@@ -78,7 +78,7 @@ class TestExplicitMode:
|
|
|
78
78
|
|
|
79
79
|
def test_multi_phase_first_catch(self):
|
|
80
80
|
engine = ErrorSense(
|
|
81
|
-
|
|
81
|
+
labels=["a", "b"],
|
|
82
82
|
pipeline=[
|
|
83
83
|
Phase("first", rulesets=[NeverMatchRuleset()]),
|
|
84
84
|
Phase("second", rulesets=[AlwaysMatchRuleset("b")]),
|
|
@@ -90,7 +90,7 @@ class TestExplicitMode:
|
|
|
90
90
|
|
|
91
91
|
def test_skip_phase(self):
|
|
92
92
|
engine = ErrorSense(
|
|
93
|
-
|
|
93
|
+
labels=["a", "b"],
|
|
94
94
|
pipeline=[
|
|
95
95
|
Phase("skip_me", rulesets=[AlwaysMatchRuleset("a")]),
|
|
96
96
|
Phase("use_me", rulesets=[AlwaysMatchRuleset("b")]),
|
|
@@ -101,7 +101,7 @@ class TestExplicitMode:
|
|
|
101
101
|
|
|
102
102
|
def test_skip_invalid_phase_raises(self):
|
|
103
103
|
engine = ErrorSense(
|
|
104
|
-
|
|
104
|
+
labels=["a"],
|
|
105
105
|
pipeline=[Phase("p1", rulesets=[AlwaysMatchRuleset("a")])],
|
|
106
106
|
)
|
|
107
107
|
with pytest.raises(ValueError, match="Unknown phase"):
|
|
@@ -110,7 +110,7 @@ class TestExplicitMode:
|
|
|
110
110
|
def test_duplicate_phase_names_rejected(self):
|
|
111
111
|
with pytest.raises(ValueError, match="Duplicate"):
|
|
112
112
|
ErrorSense(
|
|
113
|
-
|
|
113
|
+
labels=["a"],
|
|
114
114
|
pipeline=[
|
|
115
115
|
Phase("p1", rulesets=[AlwaysMatchRuleset("a")]),
|
|
116
116
|
Phase("p1", rulesets=[AlwaysMatchRuleset("a")]),
|
|
@@ -121,7 +121,7 @@ class TestExplicitMode:
|
|
|
121
121
|
class TestShortCircuit:
|
|
122
122
|
def test_short_circuit_true_returns_one(self):
|
|
123
123
|
engine = ErrorSense(
|
|
124
|
-
|
|
124
|
+
labels=["a", "b"],
|
|
125
125
|
pipeline=[
|
|
126
126
|
Phase("first", rulesets=[AlwaysMatchRuleset("a")]),
|
|
127
127
|
Phase("second", rulesets=[AlwaysMatchRuleset("b")]),
|
|
@@ -133,7 +133,7 @@ class TestShortCircuit:
|
|
|
133
133
|
|
|
134
134
|
def test_short_circuit_false_returns_all_matches(self):
|
|
135
135
|
engine = ErrorSense(
|
|
136
|
-
|
|
136
|
+
labels=["a", "b"],
|
|
137
137
|
pipeline=[
|
|
138
138
|
Phase("first", rulesets=[AlwaysMatchRuleset("a", confidence=0.8)]),
|
|
139
139
|
Phase("second", rulesets=[AlwaysMatchRuleset("b", confidence=0.9)]),
|
|
@@ -146,7 +146,7 @@ class TestShortCircuit:
|
|
|
146
146
|
|
|
147
147
|
def test_short_circuit_false_skips_unmatched(self):
|
|
148
148
|
engine = ErrorSense(
|
|
149
|
-
|
|
149
|
+
labels=["a", "b"],
|
|
150
150
|
pipeline=[
|
|
151
151
|
Phase("first", rulesets=[AlwaysMatchRuleset("a")]),
|
|
152
152
|
Phase("second", rulesets=[NeverMatchRuleset()]),
|
|
@@ -160,7 +160,7 @@ class TestShortCircuit:
|
|
|
160
160
|
|
|
161
161
|
def test_short_circuit_false_no_matches_default(self):
|
|
162
162
|
engine = ErrorSense(
|
|
163
|
-
|
|
163
|
+
labels=["a"],
|
|
164
164
|
pipeline=[Phase("p1", rulesets=[NeverMatchRuleset()])],
|
|
165
165
|
default="none",
|
|
166
166
|
)
|
|
@@ -172,7 +172,7 @@ class TestShortCircuit:
|
|
|
172
172
|
class TestImplicitMode:
|
|
173
173
|
def test_rulesets_only(self):
|
|
174
174
|
engine = ErrorSense(
|
|
175
|
-
|
|
175
|
+
labels=["a"],
|
|
176
176
|
rulesets=[AlwaysMatchRuleset("a")],
|
|
177
177
|
)
|
|
178
178
|
results = engine.classify(Signal({"x": 1}))
|
|
@@ -181,7 +181,7 @@ class TestImplicitMode:
|
|
|
181
181
|
|
|
182
182
|
def test_default_when_no_match(self):
|
|
183
183
|
engine = ErrorSense(
|
|
184
|
-
|
|
184
|
+
labels=["a"],
|
|
185
185
|
rulesets=[NeverMatchRuleset()],
|
|
186
186
|
default="none",
|
|
187
187
|
)
|
|
@@ -191,21 +191,21 @@ class TestImplicitMode:
|
|
|
191
191
|
def test_cannot_mix_modes(self):
|
|
192
192
|
with pytest.raises(ValueError, match="Cannot mix"):
|
|
193
193
|
ErrorSense(
|
|
194
|
-
|
|
194
|
+
labels=["a"],
|
|
195
195
|
pipeline=[Phase("p1", rulesets=[AlwaysMatchRuleset("a")])],
|
|
196
196
|
rulesets=[AlwaysMatchRuleset("a")],
|
|
197
197
|
)
|
|
198
198
|
|
|
199
199
|
def test_must_provide_something(self):
|
|
200
200
|
with pytest.raises(ValueError, match="Must provide"):
|
|
201
|
-
ErrorSense(
|
|
201
|
+
ErrorSense(labels=["a"])
|
|
202
202
|
|
|
203
203
|
|
|
204
204
|
class TestCallbacks:
|
|
205
205
|
def test_on_classify_callback(self):
|
|
206
206
|
collected = []
|
|
207
207
|
engine = ErrorSense(
|
|
208
|
-
|
|
208
|
+
labels=["a"],
|
|
209
209
|
pipeline=[Phase("p1", rulesets=[AlwaysMatchRuleset("a")])],
|
|
210
210
|
on_classify=lambda sig, res: collected.append(res),
|
|
211
211
|
)
|
|
@@ -215,7 +215,7 @@ class TestCallbacks:
|
|
|
215
215
|
|
|
216
216
|
def test_on_error_callback(self):
|
|
217
217
|
engine = ErrorSense(
|
|
218
|
-
|
|
218
|
+
labels=["a"],
|
|
219
219
|
pipeline=[
|
|
220
220
|
Phase("broken", rulesets=[BrokenRuleset()]),
|
|
221
221
|
Phase("ok", rulesets=[AlwaysMatchRuleset("a")]),
|
|
@@ -230,7 +230,7 @@ class TestLabelValidation:
|
|
|
230
230
|
def test_invalid_label_in_ruleset(self):
|
|
231
231
|
with pytest.raises(ValueError, match="not in"):
|
|
232
232
|
ErrorSense(
|
|
233
|
-
|
|
233
|
+
labels=["a", "b"],
|
|
234
234
|
pipeline=[Phase("p1", rulesets=[
|
|
235
235
|
Ruleset(field="x", match={1: "c"}),
|
|
236
236
|
])],
|
|
@@ -241,7 +241,7 @@ class TestAsyncClassify:
|
|
|
241
241
|
@pytest.mark.asyncio
|
|
242
242
|
async def test_async_classify_first_catch(self):
|
|
243
243
|
engine = ErrorSense(
|
|
244
|
-
|
|
244
|
+
labels=["a"],
|
|
245
245
|
pipeline=[Phase("p1", rulesets=[NeverMatchRuleset(), AlwaysMatchRuleset("a")])],
|
|
246
246
|
)
|
|
247
247
|
results = await engine.async_classify(Signal({"x": 1}))
|
|
@@ -250,7 +250,7 @@ class TestAsyncClassify:
|
|
|
250
250
|
@pytest.mark.asyncio
|
|
251
251
|
async def test_async_classify_default(self):
|
|
252
252
|
engine = ErrorSense(
|
|
253
|
-
|
|
253
|
+
labels=["a"],
|
|
254
254
|
pipeline=[Phase("p1", rulesets=[NeverMatchRuleset()])],
|
|
255
255
|
default="fallback",
|
|
256
256
|
)
|
|
@@ -260,7 +260,7 @@ class TestAsyncClassify:
|
|
|
260
260
|
@pytest.mark.asyncio
|
|
261
261
|
async def test_async_classify_broken_skipped(self):
|
|
262
262
|
engine = ErrorSense(
|
|
263
|
-
|
|
263
|
+
labels=["a"],
|
|
264
264
|
pipeline=[Phase("p1", rulesets=[BrokenRuleset(), AlwaysMatchRuleset("a")])],
|
|
265
265
|
)
|
|
266
266
|
results = await engine.async_classify(Signal({"x": 1}))
|
|
@@ -269,7 +269,7 @@ class TestAsyncClassify:
|
|
|
269
269
|
@pytest.mark.asyncio
|
|
270
270
|
async def test_async_classify_all_phases(self):
|
|
271
271
|
engine = ErrorSense(
|
|
272
|
-
|
|
272
|
+
labels=["a", "b"],
|
|
273
273
|
pipeline=[
|
|
274
274
|
Phase("first", rulesets=[AlwaysMatchRuleset("a", confidence=0.5)]),
|
|
275
275
|
Phase("second", rulesets=[AlwaysMatchRuleset("b", confidence=0.9)]),
|
|
@@ -8,7 +8,7 @@ from errorsense import ErrorSense, Phase, Ruleset, Signal, TrailingConfig
|
|
|
8
8
|
def make_sense(**kwargs):
|
|
9
9
|
"""Helper to build an ErrorSense with trailing enabled."""
|
|
10
10
|
defaults = {
|
|
11
|
-
"
|
|
11
|
+
"labels": ["infra", "provider", "user"],
|
|
12
12
|
"pipeline": [
|
|
13
13
|
Phase("rules", rulesets=[
|
|
14
14
|
Ruleset(field="status_code", match={400: "user", 401: "user", 502: "infra", 503: "infra"}),
|
|
@@ -111,21 +111,16 @@ class TestTrail:
|
|
|
111
111
|
|
|
112
112
|
def test_trail_without_config_raises(self):
|
|
113
113
|
sense = ErrorSense(
|
|
114
|
-
|
|
114
|
+
labels=["a"],
|
|
115
115
|
pipeline=[Phase("p1", rulesets=[Ruleset(field="x", match={1: "a"})])],
|
|
116
116
|
)
|
|
117
117
|
with pytest.raises(RuntimeError, match="Trailing not configured"):
|
|
118
118
|
sense.trail("key", Signal({"x": 1}))
|
|
119
119
|
|
|
120
|
-
def
|
|
121
|
-
|
|
122
|
-
make_sense(trailing=TrailingConfig(
|
|
123
|
-
threshold=3, count_labels=["infra"], review=True,
|
|
124
|
-
))
|
|
125
|
-
|
|
126
|
-
def test_review_false_no_review(self):
|
|
120
|
+
def test_no_reviewer_llm_no_review(self):
|
|
121
|
+
"""Without reviewer_llm, trailing just counts — no LLM review."""
|
|
127
122
|
sense = make_sense(trailing=TrailingConfig(
|
|
128
|
-
threshold=2, count_labels=["infra", "provider"],
|
|
123
|
+
threshold=2, count_labels=["infra", "provider"],
|
|
129
124
|
))
|
|
130
125
|
sense.trail("p1", Signal.from_http(status_code=502))
|
|
131
126
|
result = sense.trail("p1", Signal.from_http(status_code=502))
|
|
@@ -1,9 +0,0 @@
|
|
|
1
|
-
Review this full error history and provide analysis.
|
|
2
|
-
|
|
3
|
-
Are these errors consistent or mixed? Is there a clear pattern (e.g., all infrastructure
|
|
4
|
-
failures, all user errors, or a mix)?
|
|
5
|
-
|
|
6
|
-
Should the system trip/alert based on this history, or are the counted errors
|
|
7
|
-
misleading (e.g., user errors miscounted as server errors)?
|
|
8
|
-
|
|
9
|
-
Provide your overall assessment.
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|