errorsense 0.1.2__tar.gz → 0.2.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (25) hide show
  1. {errorsense-0.1.2 → errorsense-0.2.0}/.gitignore +0 -1
  2. {errorsense-0.1.2 → errorsense-0.2.0}/PKG-INFO +15 -10
  3. {errorsense-0.1.2 → errorsense-0.2.0}/README.md +14 -9
  4. {errorsense-0.1.2 → errorsense-0.2.0}/design/ERRORSENSE.md +11 -9
  5. {errorsense-0.1.2 → errorsense-0.2.0}/errorsense/__init__.py +1 -1
  6. {errorsense-0.1.2 → errorsense-0.2.0}/errorsense/engine.py +41 -37
  7. {errorsense-0.1.2 → errorsense-0.2.0}/errorsense/llm.py +18 -20
  8. {errorsense-0.1.2 → errorsense-0.2.0}/errorsense/models.py +9 -6
  9. {errorsense-0.1.2 → errorsense-0.2.0}/errorsense/phase.py +5 -33
  10. {errorsense-0.1.2 → errorsense-0.2.0}/errorsense/presets/http_gateway.py +2 -2
  11. {errorsense-0.1.2 → errorsense-0.2.0}/errorsense/skill.py +0 -7
  12. errorsense-0.2.0/errorsense/skills/reclassification.md +21 -0
  13. {errorsense-0.1.2 → errorsense-0.2.0}/pyproject.toml +1 -1
  14. {errorsense-0.1.2 → errorsense-0.2.0}/tests/test_engine.py +25 -25
  15. {errorsense-0.1.2 → errorsense-0.2.0}/tests/test_tracker.py +5 -10
  16. errorsense-0.1.2/errorsense/skills/reclassification.md +0 -9
  17. {errorsense-0.1.2 → errorsense-0.2.0}/LICENSE +0 -0
  18. {errorsense-0.1.2 → errorsense-0.2.0}/errorsense/presets/__init__.py +0 -0
  19. {errorsense-0.1.2 → errorsense-0.2.0}/errorsense/py.typed +0 -0
  20. {errorsense-0.1.2 → errorsense-0.2.0}/errorsense/ruleset.py +0 -0
  21. {errorsense-0.1.2 → errorsense-0.2.0}/errorsense/signal.py +0 -0
  22. {errorsense-0.1.2 → errorsense-0.2.0}/errorsense/skills/http_classifier.md +0 -0
  23. {errorsense-0.1.2 → errorsense-0.2.0}/tests/test_llm_config.py +0 -0
  24. {errorsense-0.1.2 → errorsense-0.2.0}/tests/test_ruleset.py +0 -0
  25. {errorsense-0.1.2 → errorsense-0.2.0}/tests/test_signal.py +0 -0
@@ -7,5 +7,4 @@ build/
7
7
  *.pyc
8
8
  .DS_Store
9
9
  .claude
10
- relay_preset/
11
10
  .env
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: errorsense
3
- Version: 0.1.2
3
+ Version: 0.2.0
4
4
  Summary: Error classification engine. Rules for the obvious, AI for the ambiguous.
5
5
  Project-URL: Homepage, https://github.com/OpenGPU-Network/errorsense
6
6
  Project-URL: Documentation, https://github.com/OpenGPU-Network/errorsense#readme
@@ -54,7 +54,7 @@ results = sense.classify(Signal.from_http(status_code=500, body="model not found
54
54
  results[0].label # "client" (LLM figured it out)
55
55
  ```
56
56
 
57
- The `http` preset gives you a 3-phase pipeline (rules → patterns → LLM) with 3 categories: `"client"`, `"server"`, `"undecided"`. Rulesets handle obvious cases instantly. LLM handles the ambiguous ones.
57
+ The `http` preset gives you a 3-phase pipeline (rules → patterns → LLM) with 3 labels: `"client"`, `"server"`, `"undecided"`. Rulesets handle obvious cases instantly. LLM handles the ambiguous ones.
58
58
 
59
59
  Don't want LLM? Use `http_no_llm()` — rulesets only, ambiguous errors come back as `"undecided"`.
60
60
 
@@ -67,7 +67,7 @@ from errorsense import ErrorSense, Phase, Ruleset, Skill, LLMConfig, Signal
67
67
 
68
68
  # Rulesets + LLM
69
69
  sense = ErrorSense(
70
- categories=["transient", "permanent", "user"],
70
+ labels=["transient", "permanent", "user"],
71
71
  pipeline=[
72
72
  Phase("codes", rulesets=[
73
73
  Ruleset(field="error_code", match={
@@ -89,7 +89,7 @@ sense = ErrorSense(
89
89
 
90
90
  # Rulesets only — no LLM needed
91
91
  sense = ErrorSense(
92
- categories=["client", "server"],
92
+ labels=["client", "server"],
93
93
  pipeline=[
94
94
  Phase("rules", rulesets=[
95
95
  Ruleset(field="status_code", match={"4xx": "client", 502: "server"}),
@@ -100,7 +100,7 @@ sense = ErrorSense(
100
100
 
101
101
  # LLM only — skip rulesets entirely
102
102
  sense = ErrorSense(
103
- categories=["client", "server"],
103
+ labels=["client", "server"],
104
104
  pipeline=[
105
105
  Phase("llm", skills=[
106
106
  Skill("my_classifier", path="./skills/my_classifier.md"),
@@ -162,20 +162,25 @@ results[0].reason # "ECONNRESET indicates transient network failure"
162
162
 
163
163
  ## Trailing (Stateful Error Tracking)
164
164
 
165
- Track errors per key. When a threshold is hit, the LLM reviews the full error history.
165
+ Track errors per key. When a threshold is hit, optionally have an LLM review the full error history.
166
166
 
167
167
  ```python
168
- from errorsense import TrailingConfig
168
+ from errorsense import LLMConfig, TrailingConfig
169
169
 
170
+ # With LLM review at threshold
170
171
  sense = ErrorSense(
171
- categories=["transient", "permanent", "user"],
172
+ labels=["transient", "permanent", "user"],
172
173
  pipeline=[...],
173
174
  trailing=TrailingConfig(
174
175
  threshold=3,
175
176
  count_labels=["transient", "permanent"], # user errors don't count
177
+ reviewer_llm=LLMConfig(), # enables LLM review
176
178
  ),
177
179
  )
178
180
 
181
+ # Without LLM review (just counting)
182
+ trailing=TrailingConfig(threshold=3, count_labels=["transient", "permanent"])
183
+
179
184
  # In your error handler:
180
185
  result = sense.trail("service-a", signal)
181
186
  result.label # "transient"
@@ -189,9 +194,9 @@ sense.reset("service-a")
189
194
  **How it works:**
190
195
  - Each `trail()` call classifies the signal normally through the pipeline
191
196
  - Counted labels accumulate per key toward the threshold
192
- - At threshold, the LLM reviews all recorded errors and gives its verdict
197
+ - At threshold, the LLM reviews all recorded errors (if `reviewer_llm` is set)
193
198
  - If the review changes the label, the history entry is corrected and the count adjusts
194
- - `review=False` in TrailingConfig disables LLM review (just counting)
199
+ - `reviewer_skill=Skill(...)` lets you override the default review instructions
195
200
 
196
201
  **Manual review anytime:**
197
202
 
@@ -33,7 +33,7 @@ results = sense.classify(Signal.from_http(status_code=500, body="model not found
33
33
  results[0].label # "client" (LLM figured it out)
34
34
  ```
35
35
 
36
- The `http` preset gives you a 3-phase pipeline (rules → patterns → LLM) with 3 categories: `"client"`, `"server"`, `"undecided"`. Rulesets handle obvious cases instantly. LLM handles the ambiguous ones.
36
+ The `http` preset gives you a 3-phase pipeline (rules → patterns → LLM) with 3 labels: `"client"`, `"server"`, `"undecided"`. Rulesets handle obvious cases instantly. LLM handles the ambiguous ones.
37
37
 
38
38
  Don't want LLM? Use `http_no_llm()` — rulesets only, ambiguous errors come back as `"undecided"`.
39
39
 
@@ -46,7 +46,7 @@ from errorsense import ErrorSense, Phase, Ruleset, Skill, LLMConfig, Signal
46
46
 
47
47
  # Rulesets + LLM
48
48
  sense = ErrorSense(
49
- categories=["transient", "permanent", "user"],
49
+ labels=["transient", "permanent", "user"],
50
50
  pipeline=[
51
51
  Phase("codes", rulesets=[
52
52
  Ruleset(field="error_code", match={
@@ -68,7 +68,7 @@ sense = ErrorSense(
68
68
 
69
69
  # Rulesets only — no LLM needed
70
70
  sense = ErrorSense(
71
- categories=["client", "server"],
71
+ labels=["client", "server"],
72
72
  pipeline=[
73
73
  Phase("rules", rulesets=[
74
74
  Ruleset(field="status_code", match={"4xx": "client", 502: "server"}),
@@ -79,7 +79,7 @@ sense = ErrorSense(
79
79
 
80
80
  # LLM only — skip rulesets entirely
81
81
  sense = ErrorSense(
82
- categories=["client", "server"],
82
+ labels=["client", "server"],
83
83
  pipeline=[
84
84
  Phase("llm", skills=[
85
85
  Skill("my_classifier", path="./skills/my_classifier.md"),
@@ -141,20 +141,25 @@ results[0].reason # "ECONNRESET indicates transient network failure"
141
141
 
142
142
  ## Trailing (Stateful Error Tracking)
143
143
 
144
- Track errors per key. When a threshold is hit, the LLM reviews the full error history.
144
+ Track errors per key. When a threshold is hit, optionally have an LLM review the full error history.
145
145
 
146
146
  ```python
147
- from errorsense import TrailingConfig
147
+ from errorsense import LLMConfig, TrailingConfig
148
148
 
149
+ # With LLM review at threshold
149
150
  sense = ErrorSense(
150
- categories=["transient", "permanent", "user"],
151
+ labels=["transient", "permanent", "user"],
151
152
  pipeline=[...],
152
153
  trailing=TrailingConfig(
153
154
  threshold=3,
154
155
  count_labels=["transient", "permanent"], # user errors don't count
156
+ reviewer_llm=LLMConfig(), # enables LLM review
155
157
  ),
156
158
  )
157
159
 
160
+ # Without LLM review (just counting)
161
+ trailing=TrailingConfig(threshold=3, count_labels=["transient", "permanent"])
162
+
158
163
  # In your error handler:
159
164
  result = sense.trail("service-a", signal)
160
165
  result.label # "transient"
@@ -168,9 +173,9 @@ sense.reset("service-a")
168
173
  **How it works:**
169
174
  - Each `trail()` call classifies the signal normally through the pipeline
170
175
  - Counted labels accumulate per key toward the threshold
171
- - At threshold, the LLM reviews all recorded errors and gives its verdict
176
+ - At threshold, the LLM reviews all recorded errors (if `reviewer_llm` is set)
172
177
  - If the review changes the label, the history entry is corrected and the count adjusts
173
- - `review=False` in TrailingConfig disables LLM review (just counting)
178
+ - `reviewer_skill=Skill(...)` lets you override the default review instructions
174
179
 
175
180
  **Manual review anytime:**
176
181
 
@@ -104,7 +104,7 @@ Runs signals through a pipeline of phases.
104
104
  from errorsense import ErrorSense, Phase, Ruleset, Skill, LLMConfig
105
105
 
106
106
  sense = ErrorSense(
107
- categories=["client", "server", "undecided"],
107
+ labels=["client", "server", "undecided"],
108
108
  pipeline=[
109
109
  Phase("rules", rulesets=[...]),
110
110
  Phase("patterns", rulesets=[...]),
@@ -118,7 +118,7 @@ sense = ErrorSense(
118
118
 
119
119
  ```python
120
120
  sense = ErrorSense(
121
- categories=["client", "server"],
121
+ labels=["client", "server"],
122
122
  rulesets=[Ruleset(...)],
123
123
  skills=[Skill(...)],
124
124
  llm=LLMConfig(...),
@@ -160,13 +160,14 @@ Track errors per key with threshold-based LLM review.
160
160
  from errorsense import TrailingConfig
161
161
 
162
162
  sense = ErrorSense(
163
- categories=["client", "server", "undecided"],
163
+ labels=["client", "server", "undecided"],
164
164
  pipeline=[...],
165
165
  trailing=TrailingConfig(
166
166
  threshold=3,
167
167
  count_labels=["server"],
168
168
  history_size=10,
169
- review=None, # None=auto, True=force, False=never
169
+ reviewer_llm=LLMConfig(), # enables LLM review at threshold
170
+ reviewer_skill=Skill("custom"), # optional, defaults to built-in reclassification.md
170
171
  ),
171
172
  )
172
173
 
@@ -187,9 +188,9 @@ sense.reset("service-a")
187
188
  5. `at_threshold` recalculates after any correction
188
189
 
189
190
  **Review behavior:**
190
- - `review=None` (default): auto-review if an LLM phase exists
191
- - `review=True`: force review (raises if no LLM phase)
192
- - `review=False`: never review, just count
191
+ - `reviewer_llm=LLMConfig(...)`: LLM reviews error history at threshold
192
+ - `reviewer_llm=None` (default): no review, just count
193
+ - `reviewer_skill=Skill(...)`: override the default review instructions
193
194
 
194
195
  **Manual review:** `sense.review(key)` / `await sense.async_review(key)` — LLM reviews full history anytime.
195
196
 
@@ -222,7 +223,8 @@ class TrailingConfig:
222
223
  threshold: int = 3
223
224
  count_labels: list[str] | None = None
224
225
  history_size: int = 10
225
- review: bool | None = None
226
+ reviewer_llm: LLMConfig | None = None
227
+ reviewer_skill: Skill | None = None
226
228
  ```
227
229
 
228
230
  ---
@@ -260,7 +262,7 @@ Every `classify()` call is wrapped in try/except at the phase level. Exceptions
260
262
 
261
263
  ### Validation (at construction)
262
264
 
263
- - Labels in rulesets must be in `categories` or `default`
265
+ - Labels in rulesets must be in `labels` or `default`
264
266
  - Phases must have rulesets OR (skills + llm)
265
267
  - LLM phases must have an API key in LLMConfig
266
268
  - `pipeline=` and `rulesets=/skills=` cannot be mixed
@@ -24,4 +24,4 @@ __all__ = [
24
24
  "TrailingConfig",
25
25
  ]
26
26
 
27
- __version__ = "0.1.0"
27
+ __version__ = "0.2.0"
@@ -10,7 +10,7 @@ import time
10
10
  from collections import defaultdict, deque
11
11
  from typing import Any, Callable
12
12
 
13
- from errorsense.llm import LLMConfig
13
+ from errorsense.llm import LLMClient, LLMConfig
14
14
  from errorsense.models import SenseResult, TrailResult, TrailingConfig
15
15
  from errorsense.phase import Phase
16
16
  from errorsense.ruleset import Ruleset
@@ -31,7 +31,7 @@ class ErrorSense:
31
31
 
32
32
  def __init__(
33
33
  self,
34
- categories: list[str],
34
+ labels: list[str],
35
35
  # Explicit mode
36
36
  pipeline: list[Phase] | None = None,
37
37
  # Implicit mode
@@ -44,7 +44,7 @@ class ErrorSense:
44
44
  on_classify: Callable[[Signal, SenseResult], Any] | None = None,
45
45
  on_error: Callable[[str, Exception], Any] | None = None,
46
46
  ) -> None:
47
- self.categories = set(categories)
47
+ self.labels = set(labels)
48
48
  self.default = default
49
49
  self._on_classify = on_classify
50
50
  self._on_error = on_error
@@ -60,26 +60,22 @@ class ErrorSense:
60
60
 
61
61
  self._validate_phase_names()
62
62
  self._pipeline_names = frozenset(p.name for p in self._pipeline)
63
- self._validate_categories()
63
+ self._validate_labels()
64
64
  self._validate_llm_api_keys()
65
65
  for phase in self._pipeline:
66
- phase.set_categories(list(categories))
66
+ phase.set_labels(list(labels))
67
67
 
68
68
  # Trailing state
69
69
  self._trailing = trailing
70
- self._has_llm = any(p.is_llm_phase for p in self._pipeline)
71
- self._reclass_skill: Skill | None = None
70
+ self._reviewer_client: LLMClient | None = None
71
+ self._reviewer_skill: Skill | None = None
72
72
  if trailing:
73
73
  self._init_trailing(trailing)
74
74
 
75
75
  def _init_trailing(self, config: TrailingConfig) -> None:
76
- if config.review is True and not self._has_llm:
77
- raise ValueError(
78
- "TrailingConfig(review=True) requires an LLM phase in the pipeline."
79
- )
80
- self._review_enabled = (
81
- config.review if config.review is not None else self._has_llm
82
- )
76
+ if config.reviewer_llm is not None:
77
+ self._reviewer_client = LLMClient(config.reviewer_llm)
78
+ self._reviewer_skill = config.reviewer_skill
83
79
  self._threshold = config.threshold
84
80
  self._count_labels = set(config.count_labels or [])
85
81
  hs = config.history_size
@@ -106,11 +102,15 @@ class ErrorSense:
106
102
  """Close all LLM phase clients (sync)."""
107
103
  for phase in self._pipeline:
108
104
  phase.close_sync()
105
+ if self._reviewer_client:
106
+ self._reviewer_client.close_sync()
109
107
 
110
108
  async def async_close(self) -> None:
111
109
  """Close all LLM phase clients (async)."""
112
110
  for phase in self._pipeline:
113
111
  await phase.close_async()
112
+ if self._reviewer_client:
113
+ await self._reviewer_client.close_async()
114
114
 
115
115
  async def __aenter__(self) -> ErrorSense:
116
116
  return self
@@ -204,7 +204,7 @@ class ErrorSense:
204
204
  at_threshold = self._record_and_check(key, signal, result)
205
205
  review_result = (
206
206
  self._run_review_sync(key)
207
- if at_threshold and self._review_enabled else None
207
+ if at_threshold and self._reviewer_client else None
208
208
  )
209
209
  return self._build_trail_result(key, result, at_threshold, review_result)
210
210
 
@@ -224,16 +224,24 @@ class ErrorSense:
224
224
  at_threshold = self._record_and_check(key, signal, result)
225
225
  review_result = (
226
226
  await self._run_review_async(key)
227
- if at_threshold and self._review_enabled else None
227
+ if at_threshold and self._reviewer_client else None
228
228
  )
229
229
  return self._build_trail_result(key, result, at_threshold, review_result)
230
230
 
231
231
  def review(self, key: str) -> SenseResult | None:
232
232
  """Manually review full history for a key (sync). Returns LLM verdict."""
233
+ if not self._trailing:
234
+ raise RuntimeError(
235
+ "Trailing not configured. Pass trailing=TrailingConfig(...) to ErrorSense."
236
+ )
233
237
  return self._run_review_sync(key)
234
238
 
235
239
  async def async_review(self, key: str) -> SenseResult | None:
236
240
  """Manually review full history for a key (async). Returns LLM verdict."""
241
+ if not self._trailing:
242
+ raise RuntimeError(
243
+ "Trailing not configured. Pass trailing=TrailingConfig(...) to ErrorSense."
244
+ )
237
245
  return self._run_review_async(key)
238
246
 
239
247
  def _record_and_check(self, key: str, signal: Signal, result: SenseResult) -> bool:
@@ -297,37 +305,33 @@ class ErrorSense:
297
305
  self._counts[key][new_label] += 1
298
306
 
299
307
  def _run_review_sync(self, key: str) -> SenseResult | None:
300
- llm_phase = self._find_llm_phase()
301
- if not llm_phase:
308
+ if not self._reviewer_client:
302
309
  return None
303
310
  signal, skill = self._build_review_context(key)
304
311
  try:
305
- return llm_phase.run_llm_call(signal, skill, list(self.categories))
312
+ return self._reviewer_client.classify_sync(
313
+ signal, skill, list(self.labels), include_reason=True,
314
+ )
306
315
  except Exception as e:
307
316
  logger.warning("LLM review failed: %s", e)
308
317
  return None
309
318
 
310
319
  async def _run_review_async(self, key: str) -> SenseResult | None:
311
- llm_phase = self._find_llm_phase()
312
- if not llm_phase:
320
+ if not self._reviewer_client:
313
321
  return None
314
322
  signal, skill = self._build_review_context(key)
315
323
  try:
316
- return await llm_phase.async_run_llm_call(signal, skill, list(self.categories))
324
+ return await self._reviewer_client.classify_async(
325
+ signal, skill, list(self.labels), include_reason=True,
326
+ )
317
327
  except Exception as e:
318
328
  logger.warning("LLM review failed: %s", e)
319
329
  return None
320
330
 
321
- def _find_llm_phase(self) -> Phase | None:
322
- for phase in self._pipeline:
323
- if phase.is_llm_phase:
324
- return phase
325
- return None
326
-
327
- def _get_reclass_skill(self) -> Skill:
328
- if self._reclass_skill is None:
329
- self._reclass_skill = Skill("reclassification")
330
- return self._reclass_skill
331
+ def _get_reviewer_skill(self) -> Skill:
332
+ if self._reviewer_skill is None:
333
+ self._reviewer_skill = Skill("reclassification")
334
+ return self._reviewer_skill
331
335
 
332
336
  def _build_review_context(self, key: str) -> tuple[Signal, Skill]:
333
337
  history = list(self._history[key])
@@ -341,7 +345,7 @@ class ErrorSense:
341
345
  "key": key,
342
346
  "history_summary": summary,
343
347
  })
344
- return signal, self._get_reclass_skill()
348
+ return signal, self._get_reviewer_skill()
345
349
 
346
350
  def reset(self, key: str) -> None:
347
351
  """Clear trailing history and counts for a key."""
@@ -395,15 +399,15 @@ class ErrorSense:
395
399
  raise ValueError(f"Duplicate phase name: {phase.name!r}")
396
400
  seen.add(phase.name)
397
401
 
398
- def _validate_categories(self) -> None:
399
- all_cats = self.categories | {self.default}
402
+ def _validate_labels(self) -> None:
403
+ all_labels = self.labels | {self.default}
400
404
  for phase in self._pipeline:
401
405
  for ruleset in phase.rulesets:
402
- bad = ruleset.referenced_labels() - all_cats
406
+ bad = ruleset.referenced_labels() - all_labels
403
407
  if bad:
404
408
  raise ValueError(
405
409
  f"Ruleset on field {getattr(ruleset, 'field', '?')!r} maps to "
406
- f"label {bad.pop()!r} not in {sorted(self.categories)}"
410
+ f"label {bad.pop()!r} not in {sorted(self.labels)}"
407
411
  )
408
412
 
409
413
  def _validate_llm_api_keys(self) -> None:
@@ -22,7 +22,7 @@ DEFAULT_BASE_URL = "https://relay.opengpu.network/v2/openai/v1"
22
22
  DEFAULT_MODEL = "Qwen/Qwen3.5-397B-A17B-FP8"
23
23
  DEFAULT_PROMPT_TEMPLATE = (
24
24
  "{instructions}\n\n"
25
- "Classify the following error signal into exactly one of these categories: {categories}\n\n"
25
+ "Classify the following error signal into exactly one of these labels: {labels}\n\n"
26
26
  "Signal data:\n{signal}\n\n"
27
27
  'Reply ONLY with JSON: {{"label": "...", "confidence": 0.0, "reason": "..."}}'
28
28
  )
@@ -54,7 +54,7 @@ class LLMConfig:
54
54
  object.__setattr__(self, "base_url", os.environ.get("ERRORSENSE_LLM_URL", DEFAULT_BASE_URL))
55
55
 
56
56
 
57
- def _build_prompt(signal: Signal, skill: Skill, categories: list[str], config: LLMConfig) -> str:
57
+ def _build_prompt(signal: Signal, skill: Skill, labels: list[str], config: LLMConfig) -> str:
58
58
  signal_text = json.dumps(signal.to_dict(), default=str)
59
59
  if len(signal_text) > config.max_signal_size:
60
60
  signal_text = signal_text[: config.max_signal_size] + "..."
@@ -62,7 +62,7 @@ def _build_prompt(signal: Signal, skill: Skill, categories: list[str], config: L
62
62
  template = skill.prompt_template or DEFAULT_PROMPT_TEMPLATE
63
63
  return template.format(
64
64
  instructions=skill.instructions,
65
- categories=", ".join(categories) if categories else "unknown",
65
+ labels=", ".join(labels) if labels else "unknown",
66
66
  signal=signal_text,
67
67
  )
68
68
 
@@ -84,7 +84,7 @@ def _build_headers(config: LLMConfig) -> dict:
84
84
 
85
85
  def _parse_response(
86
86
  data: dict,
87
- categories: list[str],
87
+ labels: list[str],
88
88
  skill_name: str,
89
89
  include_reason: bool = False,
90
90
  ) -> SenseResult | None:
@@ -96,11 +96,11 @@ def _parse_response(
96
96
  content = content.rsplit("```", 1)[0]
97
97
  parsed = json.loads(content.strip())
98
98
 
99
- label = parsed.get("label", "") or parsed.get("category", "")
99
+ label = parsed.get("label", "")
100
100
  confidence = min(1.0, max(0.0, float(parsed.get("confidence", 0.7))))
101
101
  reason = parsed.get("reason") if include_reason else None
102
102
 
103
- if categories and label not in categories:
103
+ if labels and label not in labels:
104
104
  logger.warning(
105
105
  "Skill %r: LLM returned unknown label %r", skill_name, label
106
106
  )
@@ -154,19 +154,18 @@ class LLMClient:
154
154
  self,
155
155
  signal: Signal,
156
156
  skill: Skill,
157
- categories: list[str],
157
+ labels: list[str],
158
158
  include_reason: bool = False,
159
159
  ) -> SenseResult | None:
160
- config = skill.llm if skill.llm is not None else self._config
161
- prompt = _build_prompt(signal, skill, categories, config)
162
- url = f"{config.base_url.rstrip('/')}/chat/completions"
160
+ prompt = _build_prompt(signal, skill, labels, self._config)
161
+ url = f"{self._config.base_url.rstrip('/')}/chat/completions"
163
162
 
164
163
  try:
165
164
  client = self._get_sync_client()
166
165
  resp = client.post(
167
166
  url,
168
- headers=_build_headers(config),
169
- json=_build_request_body(skill, prompt, config),
167
+ headers=_build_headers(self._config),
168
+ json=_build_request_body(skill, prompt, self._config),
170
169
  )
171
170
  resp.raise_for_status()
172
171
  data = resp.json()
@@ -174,25 +173,24 @@ class LLMClient:
174
173
  logger.warning("LLM call failed for skill %r: %s", skill.name, e)
175
174
  return None
176
175
 
177
- return _parse_response(data, categories, skill.name, include_reason)
176
+ return _parse_response(data, labels, skill.name, include_reason)
178
177
 
179
178
  async def classify_async(
180
179
  self,
181
180
  signal: Signal,
182
181
  skill: Skill,
183
- categories: list[str],
182
+ labels: list[str],
184
183
  include_reason: bool = False,
185
184
  ) -> SenseResult | None:
186
- config = skill.llm if skill.llm is not None else self._config
187
- prompt = _build_prompt(signal, skill, categories, config)
188
- url = f"{config.base_url.rstrip('/')}/chat/completions"
185
+ prompt = _build_prompt(signal, skill, labels, self._config)
186
+ url = f"{self._config.base_url.rstrip('/')}/chat/completions"
189
187
 
190
188
  try:
191
189
  client = await self._get_async_client()
192
190
  resp = await client.post(
193
191
  url,
194
- headers=_build_headers(config),
195
- json=_build_request_body(skill, prompt, config),
192
+ headers=_build_headers(self._config),
193
+ json=_build_request_body(skill, prompt, self._config),
196
194
  )
197
195
  resp.raise_for_status()
198
196
  data = resp.json()
@@ -200,7 +198,7 @@ class LLMClient:
200
198
  logger.warning("LLM call failed for skill %r: %s", skill.name, e)
201
199
  return None
202
200
 
203
- return _parse_response(data, categories, skill.name, include_reason)
201
+ return _parse_response(data, labels, skill.name, include_reason)
204
202
 
205
203
  def close_sync(self) -> None:
206
204
  if self._sync_client is not None:
@@ -1,7 +1,11 @@
1
1
  from __future__ import annotations
2
2
 
3
3
  from dataclasses import dataclass
4
- from typing import Any
4
+ from typing import TYPE_CHECKING, Any
5
+
6
+ if TYPE_CHECKING:
7
+ from errorsense.llm import LLMConfig
8
+ from errorsense.skill import Skill
5
9
 
6
10
 
7
11
  @dataclass(frozen=True)
@@ -40,13 +44,12 @@ class TrailingConfig:
40
44
  threshold: Number of counted errors before review triggers.
41
45
  count_labels: Only these labels count toward threshold.
42
46
  history_size: Max errors kept per key (ring buffer).
43
- review: Whether to LLM-review history when threshold hit.
44
- None = auto (True if LLM phase exists, False if not).
45
- True = force (raises if no LLM phase).
46
- False = never.
47
+ reviewer_llm: LLM config for review. Set to enable review, None to disable.
48
+ reviewer_skill: Custom review skill. Defaults to built-in reclassification.
47
49
  """
48
50
 
49
51
  threshold: int = 3
50
52
  count_labels: list[str] | None = None
51
53
  history_size: int = 10
52
- review: bool | None = None
54
+ reviewer_llm: LLMConfig | None = None
55
+ reviewer_skill: Skill | None = None
@@ -61,14 +61,14 @@ class Phase:
61
61
  self.skills = skills or []
62
62
  self.llm = llm
63
63
  self.is_llm_phase = has_skills
64
- self._categories: list[str] = []
64
+ self._labels: list[str] = []
65
65
  self._llm_client: LLMClient | None = None
66
66
 
67
67
  if self.is_llm_phase and llm:
68
68
  self._llm_client = LLMClient(llm)
69
69
 
70
- def set_categories(self, categories: list[str]) -> None:
71
- self._categories = list(categories)
70
+ def set_labels(self, labels: list[str]) -> None:
71
+ self._labels = list(labels)
72
72
 
73
73
  def classify(self, signal: Signal, explain: bool = False) -> SenseResult | None:
74
74
  """Sync classification. Full pipeline — rulesets or LLM."""
@@ -136,38 +136,10 @@ class Phase:
136
136
  return best
137
137
 
138
138
  def _run_one_skill_sync(self, signal: Signal, skill: Skill, explain: bool) -> SenseResult | None:
139
- if skill.llm is not None:
140
- client = LLMClient(skill.llm)
141
- try:
142
- return client.classify_sync(signal, skill, self._categories, include_reason=explain)
143
- finally:
144
- client.close_sync()
145
- return self._llm_client.classify_sync(signal, skill, self._categories, include_reason=explain)
139
+ return self._llm_client.classify_sync(signal, skill, self._labels, include_reason=explain)
146
140
 
147
141
  async def _run_one_skill_async(self, signal: Signal, skill: Skill, explain: bool) -> SenseResult | None:
148
- if skill.llm is not None:
149
- client = LLMClient(skill.llm)
150
- try:
151
- return await client.classify_async(signal, skill, self._categories, include_reason=explain)
152
- finally:
153
- await client.close_async()
154
- return await self._llm_client.classify_async(signal, skill, self._categories, include_reason=explain)
155
-
156
- def run_llm_call(
157
- self, signal: Signal, skill: Skill, categories: list[str],
158
- ) -> SenseResult | None:
159
- """Run a single sync LLM call. Public API for Tracker reclassification."""
160
- if not self._llm_client:
161
- return None
162
- return self._llm_client.classify_sync(signal, skill, categories, include_reason=True)
163
-
164
- async def async_run_llm_call(
165
- self, signal: Signal, skill: Skill, categories: list[str],
166
- ) -> SenseResult | None:
167
- """Run a single async LLM call. Public API for Tracker reclassification."""
168
- if not self._llm_client:
169
- return None
170
- return await self._llm_client.classify_async(signal, skill, categories, include_reason=True)
142
+ return await self._llm_client.classify_async(signal, skill, self._labels, include_reason=explain)
171
143
 
172
144
  def _stamp_phase(self, result: SenseResult, skill_name: str) -> SenseResult:
173
145
  updates: dict[str, Any] = {}
@@ -48,7 +48,7 @@ def http(
48
48
  phases.append(Phase("llm", skills=[Skill("http_classifier")], llm=llm))
49
49
 
50
50
  return ErrorSense(
51
- categories=["client", "server", "undecided"],
51
+ labels=["client", "server", "undecided"],
52
52
  pipeline=phases,
53
53
  default="undecided",
54
54
  )
@@ -66,7 +66,7 @@ def http_no_llm(
66
66
  extra_rulesets: Additional rulesets appended to the patterns phase.
67
67
  """
68
68
  return ErrorSense(
69
- categories=["client", "server", "undecided"],
69
+ labels=["client", "server", "undecided"],
70
70
  pipeline=_ruleset_phases(extra_rulesets),
71
71
  default="undecided",
72
72
  )
@@ -3,10 +3,6 @@
3
3
  from __future__ import annotations
4
4
 
5
5
  from pathlib import Path
6
- from typing import TYPE_CHECKING
7
-
8
- if TYPE_CHECKING:
9
- from errorsense.llm import LLMConfig
10
6
 
11
7
  __all__ = ["Skill"]
12
8
 
@@ -29,7 +25,6 @@ class Skill:
29
25
  instructions: Inline instructions string. Overrides file loading.
30
26
  prompt_template: Override the default LLM prompt template.
31
27
  temperature: LLM temperature (default: 0.0 for determinism).
32
- llm: Per-skill LLMConfig override.
33
28
  """
34
29
 
35
30
  def __init__(
@@ -39,7 +34,6 @@ class Skill:
39
34
  instructions: str | None = None,
40
35
  prompt_template: str | None = None,
41
36
  temperature: float = 0.0,
42
- llm: LLMConfig | None = None,
43
37
  ) -> None:
44
38
  if not name:
45
39
  raise ValueError("Skill requires a non-empty 'name'")
@@ -47,7 +41,6 @@ class Skill:
47
41
  self.name = name
48
42
  self.prompt_template = prompt_template
49
43
  self.temperature = temperature
50
- self.llm = llm
51
44
 
52
45
  if instructions:
53
46
  self.instructions = instructions
@@ -0,0 +1,21 @@
1
+ You are reviewing a history of classified errors for a single key (e.g., a service or provider).
2
+
3
+ Each entry in the history has a label that was assigned by earlier classification. Your job is to review the full history and decide: is the most recent label correct, or should it be changed?
4
+
5
+ ## How to decide
6
+
7
+ Look at the pattern across all entries:
8
+ - If the errors are consistent (all the same type), the label is probably correct
9
+ - If earlier errors were classified differently and the pattern suggests the latest one was misclassified, pick the label that better fits the overall pattern
10
+ - If the history shows a mix of genuine errors, keep the most recent label as-is
11
+
12
+ ## Your output
13
+
14
+ Pick one of the allowed labels as your label. This must be one of the labels provided in the prompt — do not invent new ones.
15
+
16
+ Set confidence based on how clear the pattern is:
17
+ - 0.9+ if the history strongly supports your label
18
+ - 0.7-0.9 if the evidence is moderate
19
+ - Below 0.7 if the history is genuinely mixed
20
+
21
+ In your reason, briefly explain what pattern you saw and why you kept or changed the label.
@@ -4,7 +4,7 @@ build-backend = "hatchling.build"
4
4
 
5
5
  [project]
6
6
  name = "errorsense"
7
- version = "0.1.2"
7
+ version = "0.2.0"
8
8
  description = "Error classification engine. Rules for the obvious, AI for the ambiguous."
9
9
  readme = "README.md"
10
10
  license = "MIT"
@@ -25,7 +25,7 @@ class BrokenRuleset(Ruleset):
25
25
  class TestExplicitMode:
26
26
  def test_first_match_wins(self):
27
27
  engine = ErrorSense(
28
- categories=["a", "b"],
28
+ labels=["a", "b"],
29
29
  pipeline=[Phase("p1", rulesets=[AlwaysMatchRuleset("a"), AlwaysMatchRuleset("b")])],
30
30
  )
31
31
  results = engine.classify(Signal({"x": 1}))
@@ -34,7 +34,7 @@ class TestExplicitMode:
34
34
 
35
35
  def test_skip_none_results(self):
36
36
  engine = ErrorSense(
37
- categories=["a"],
37
+ labels=["a"],
38
38
  pipeline=[Phase("p1", rulesets=[NeverMatchRuleset(), AlwaysMatchRuleset("a")])],
39
39
  )
40
40
  results = engine.classify(Signal({"x": 1}))
@@ -42,7 +42,7 @@ class TestExplicitMode:
42
42
 
43
43
  def test_default_when_no_match(self):
44
44
  engine = ErrorSense(
45
- categories=["a"],
45
+ labels=["a"],
46
46
  pipeline=[Phase("p1", rulesets=[NeverMatchRuleset()])],
47
47
  default="fallback",
48
48
  )
@@ -52,7 +52,7 @@ class TestExplicitMode:
52
52
 
53
53
  def test_skill_name_auto_filled(self):
54
54
  engine = ErrorSense(
55
- categories=["a"],
55
+ labels=["a"],
56
56
  pipeline=[Phase("p1", rulesets=[AlwaysMatchRuleset("a")])],
57
57
  )
58
58
  results = engine.classify(Signal({"x": 1}))
@@ -61,7 +61,7 @@ class TestExplicitMode:
61
61
 
62
62
  def test_broken_ruleset_skipped(self):
63
63
  engine = ErrorSense(
64
- categories=["a"],
64
+ labels=["a"],
65
65
  pipeline=[Phase("p1", rulesets=[BrokenRuleset(), AlwaysMatchRuleset("a")])],
66
66
  )
67
67
  results = engine.classify(Signal({"x": 1}))
@@ -69,7 +69,7 @@ class TestExplicitMode:
69
69
 
70
70
  def test_all_broken_falls_to_default(self):
71
71
  engine = ErrorSense(
72
- categories=["a"],
72
+ labels=["a"],
73
73
  pipeline=[Phase("p1", rulesets=[BrokenRuleset()])],
74
74
  default="oops",
75
75
  )
@@ -78,7 +78,7 @@ class TestExplicitMode:
78
78
 
79
79
  def test_multi_phase_first_catch(self):
80
80
  engine = ErrorSense(
81
- categories=["a", "b"],
81
+ labels=["a", "b"],
82
82
  pipeline=[
83
83
  Phase("first", rulesets=[NeverMatchRuleset()]),
84
84
  Phase("second", rulesets=[AlwaysMatchRuleset("b")]),
@@ -90,7 +90,7 @@ class TestExplicitMode:
90
90
 
91
91
  def test_skip_phase(self):
92
92
  engine = ErrorSense(
93
- categories=["a", "b"],
93
+ labels=["a", "b"],
94
94
  pipeline=[
95
95
  Phase("skip_me", rulesets=[AlwaysMatchRuleset("a")]),
96
96
  Phase("use_me", rulesets=[AlwaysMatchRuleset("b")]),
@@ -101,7 +101,7 @@ class TestExplicitMode:
101
101
 
102
102
  def test_skip_invalid_phase_raises(self):
103
103
  engine = ErrorSense(
104
- categories=["a"],
104
+ labels=["a"],
105
105
  pipeline=[Phase("p1", rulesets=[AlwaysMatchRuleset("a")])],
106
106
  )
107
107
  with pytest.raises(ValueError, match="Unknown phase"):
@@ -110,7 +110,7 @@ class TestExplicitMode:
110
110
  def test_duplicate_phase_names_rejected(self):
111
111
  with pytest.raises(ValueError, match="Duplicate"):
112
112
  ErrorSense(
113
- categories=["a"],
113
+ labels=["a"],
114
114
  pipeline=[
115
115
  Phase("p1", rulesets=[AlwaysMatchRuleset("a")]),
116
116
  Phase("p1", rulesets=[AlwaysMatchRuleset("a")]),
@@ -121,7 +121,7 @@ class TestExplicitMode:
121
121
  class TestShortCircuit:
122
122
  def test_short_circuit_true_returns_one(self):
123
123
  engine = ErrorSense(
124
- categories=["a", "b"],
124
+ labels=["a", "b"],
125
125
  pipeline=[
126
126
  Phase("first", rulesets=[AlwaysMatchRuleset("a")]),
127
127
  Phase("second", rulesets=[AlwaysMatchRuleset("b")]),
@@ -133,7 +133,7 @@ class TestShortCircuit:
133
133
 
134
134
  def test_short_circuit_false_returns_all_matches(self):
135
135
  engine = ErrorSense(
136
- categories=["a", "b"],
136
+ labels=["a", "b"],
137
137
  pipeline=[
138
138
  Phase("first", rulesets=[AlwaysMatchRuleset("a", confidence=0.8)]),
139
139
  Phase("second", rulesets=[AlwaysMatchRuleset("b", confidence=0.9)]),
@@ -146,7 +146,7 @@ class TestShortCircuit:
146
146
 
147
147
  def test_short_circuit_false_skips_unmatched(self):
148
148
  engine = ErrorSense(
149
- categories=["a", "b"],
149
+ labels=["a", "b"],
150
150
  pipeline=[
151
151
  Phase("first", rulesets=[AlwaysMatchRuleset("a")]),
152
152
  Phase("second", rulesets=[NeverMatchRuleset()]),
@@ -160,7 +160,7 @@ class TestShortCircuit:
160
160
 
161
161
  def test_short_circuit_false_no_matches_default(self):
162
162
  engine = ErrorSense(
163
- categories=["a"],
163
+ labels=["a"],
164
164
  pipeline=[Phase("p1", rulesets=[NeverMatchRuleset()])],
165
165
  default="none",
166
166
  )
@@ -172,7 +172,7 @@ class TestShortCircuit:
172
172
  class TestImplicitMode:
173
173
  def test_rulesets_only(self):
174
174
  engine = ErrorSense(
175
- categories=["a"],
175
+ labels=["a"],
176
176
  rulesets=[AlwaysMatchRuleset("a")],
177
177
  )
178
178
  results = engine.classify(Signal({"x": 1}))
@@ -181,7 +181,7 @@ class TestImplicitMode:
181
181
 
182
182
  def test_default_when_no_match(self):
183
183
  engine = ErrorSense(
184
- categories=["a"],
184
+ labels=["a"],
185
185
  rulesets=[NeverMatchRuleset()],
186
186
  default="none",
187
187
  )
@@ -191,21 +191,21 @@ class TestImplicitMode:
191
191
  def test_cannot_mix_modes(self):
192
192
  with pytest.raises(ValueError, match="Cannot mix"):
193
193
  ErrorSense(
194
- categories=["a"],
194
+ labels=["a"],
195
195
  pipeline=[Phase("p1", rulesets=[AlwaysMatchRuleset("a")])],
196
196
  rulesets=[AlwaysMatchRuleset("a")],
197
197
  )
198
198
 
199
199
  def test_must_provide_something(self):
200
200
  with pytest.raises(ValueError, match="Must provide"):
201
- ErrorSense(categories=["a"])
201
+ ErrorSense(labels=["a"])
202
202
 
203
203
 
204
204
  class TestCallbacks:
205
205
  def test_on_classify_callback(self):
206
206
  collected = []
207
207
  engine = ErrorSense(
208
- categories=["a"],
208
+ labels=["a"],
209
209
  pipeline=[Phase("p1", rulesets=[AlwaysMatchRuleset("a")])],
210
210
  on_classify=lambda sig, res: collected.append(res),
211
211
  )
@@ -215,7 +215,7 @@ class TestCallbacks:
215
215
 
216
216
  def test_on_error_callback(self):
217
217
  engine = ErrorSense(
218
- categories=["a"],
218
+ labels=["a"],
219
219
  pipeline=[
220
220
  Phase("broken", rulesets=[BrokenRuleset()]),
221
221
  Phase("ok", rulesets=[AlwaysMatchRuleset("a")]),
@@ -230,7 +230,7 @@ class TestLabelValidation:
230
230
  def test_invalid_label_in_ruleset(self):
231
231
  with pytest.raises(ValueError, match="not in"):
232
232
  ErrorSense(
233
- categories=["a", "b"],
233
+ labels=["a", "b"],
234
234
  pipeline=[Phase("p1", rulesets=[
235
235
  Ruleset(field="x", match={1: "c"}),
236
236
  ])],
@@ -241,7 +241,7 @@ class TestAsyncClassify:
241
241
  @pytest.mark.asyncio
242
242
  async def test_async_classify_first_catch(self):
243
243
  engine = ErrorSense(
244
- categories=["a"],
244
+ labels=["a"],
245
245
  pipeline=[Phase("p1", rulesets=[NeverMatchRuleset(), AlwaysMatchRuleset("a")])],
246
246
  )
247
247
  results = await engine.async_classify(Signal({"x": 1}))
@@ -250,7 +250,7 @@ class TestAsyncClassify:
250
250
  @pytest.mark.asyncio
251
251
  async def test_async_classify_default(self):
252
252
  engine = ErrorSense(
253
- categories=["a"],
253
+ labels=["a"],
254
254
  pipeline=[Phase("p1", rulesets=[NeverMatchRuleset()])],
255
255
  default="fallback",
256
256
  )
@@ -260,7 +260,7 @@ class TestAsyncClassify:
260
260
  @pytest.mark.asyncio
261
261
  async def test_async_classify_broken_skipped(self):
262
262
  engine = ErrorSense(
263
- categories=["a"],
263
+ labels=["a"],
264
264
  pipeline=[Phase("p1", rulesets=[BrokenRuleset(), AlwaysMatchRuleset("a")])],
265
265
  )
266
266
  results = await engine.async_classify(Signal({"x": 1}))
@@ -269,7 +269,7 @@ class TestAsyncClassify:
269
269
  @pytest.mark.asyncio
270
270
  async def test_async_classify_all_phases(self):
271
271
  engine = ErrorSense(
272
- categories=["a", "b"],
272
+ labels=["a", "b"],
273
273
  pipeline=[
274
274
  Phase("first", rulesets=[AlwaysMatchRuleset("a", confidence=0.5)]),
275
275
  Phase("second", rulesets=[AlwaysMatchRuleset("b", confidence=0.9)]),
@@ -8,7 +8,7 @@ from errorsense import ErrorSense, Phase, Ruleset, Signal, TrailingConfig
8
8
  def make_sense(**kwargs):
9
9
  """Helper to build an ErrorSense with trailing enabled."""
10
10
  defaults = {
11
- "categories": ["infra", "provider", "user"],
11
+ "labels": ["infra", "provider", "user"],
12
12
  "pipeline": [
13
13
  Phase("rules", rulesets=[
14
14
  Ruleset(field="status_code", match={400: "user", 401: "user", 502: "infra", 503: "infra"}),
@@ -111,21 +111,16 @@ class TestTrail:
111
111
 
112
112
  def test_trail_without_config_raises(self):
113
113
  sense = ErrorSense(
114
- categories=["a"],
114
+ labels=["a"],
115
115
  pipeline=[Phase("p1", rulesets=[Ruleset(field="x", match={1: "a"})])],
116
116
  )
117
117
  with pytest.raises(RuntimeError, match="Trailing not configured"):
118
118
  sense.trail("key", Signal({"x": 1}))
119
119
 
120
- def test_review_true_without_llm_raises(self):
121
- with pytest.raises(ValueError, match="requires an LLM phase"):
122
- make_sense(trailing=TrailingConfig(
123
- threshold=3, count_labels=["infra"], review=True,
124
- ))
125
-
126
- def test_review_false_no_review(self):
120
+ def test_no_reviewer_llm_no_review(self):
121
+ """Without reviewer_llm, trailing just counts — no LLM review."""
127
122
  sense = make_sense(trailing=TrailingConfig(
128
- threshold=2, count_labels=["infra", "provider"], review=False,
123
+ threshold=2, count_labels=["infra", "provider"],
129
124
  ))
130
125
  sense.trail("p1", Signal.from_http(status_code=502))
131
126
  result = sense.trail("p1", Signal.from_http(status_code=502))
@@ -1,9 +0,0 @@
1
- Review this full error history and provide analysis.
2
-
3
- Are these errors consistent or mixed? Is there a clear pattern (e.g., all infrastructure
4
- failures, all user errors, or a mix)?
5
-
6
- Should the system trip/alert based on this history, or are the counted errors
7
- misleading (e.g., user errors miscounted as server errors)?
8
-
9
- Provide your overall assessment.
File without changes