PyPI - errorsense - Versions diffs - 0.1.2__tar.gz → 0.2.0__tar.gz - Mend

errorsense 0.1.2tar.gz → 0.2.0tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (25) hide show

{errorsense-0.1.2 → errorsense-0.2.0}/.gitignore RENAMED Viewed

@@ -7,5 +7,4 @@ build/
 *.pyc
 .DS_Store
 .claude
-relay_preset/
 .env

{errorsense-0.1.2 → errorsense-0.2.0}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: errorsense
-Version: 0.1.2
+Version: 0.2.0
 Summary: Error classification engine. Rules for the obvious, AI for the ambiguous.
 Project-URL: Homepage, https://github.com/OpenGPU-Network/errorsense
 Project-URL: Documentation, https://github.com/OpenGPU-Network/errorsense#readme
@@ -54,7 +54,7 @@ results = sense.classify(Signal.from_http(status_code=500, body="model not found
 results[0].label  # "client" (LLM figured it out)
 ```
-The `http` preset gives you a 3-phase pipeline (rules → patterns → LLM) with 3 categories: `"client"`, `"server"`, `"undecided"`. Rulesets handle obvious cases instantly. LLM handles the ambiguous ones.
+The `http` preset gives you a 3-phase pipeline (rules → patterns → LLM) with 3 labels: `"client"`, `"server"`, `"undecided"`. Rulesets handle obvious cases instantly. LLM handles the ambiguous ones.
 Don't want LLM? Use `http_no_llm()` — rulesets only, ambiguous errors come back as `"undecided"`.
@@ -67,7 +67,7 @@ from errorsense import ErrorSense, Phase, Ruleset, Skill, LLMConfig, Signal
 # Rulesets + LLM
 sense = ErrorSense(
-    categories=["transient", "permanent", "user"],
+    labels=["transient", "permanent", "user"],
     pipeline=[
         Phase("codes", rulesets=[
             Ruleset(field="error_code", match={
@@ -89,7 +89,7 @@ sense = ErrorSense(
 # Rulesets only — no LLM needed
 sense = ErrorSense(
-    categories=["client", "server"],
+    labels=["client", "server"],
     pipeline=[
         Phase("rules", rulesets=[
             Ruleset(field="status_code", match={"4xx": "client", 502: "server"}),
@@ -100,7 +100,7 @@ sense = ErrorSense(
 # LLM only — skip rulesets entirely
 sense = ErrorSense(
-    categories=["client", "server"],
+    labels=["client", "server"],
     pipeline=[
         Phase("llm", skills=[
             Skill("my_classifier", path="./skills/my_classifier.md"),
@@ -162,20 +162,25 @@ results[0].reason  # "ECONNRESET indicates transient network failure"
 ## Trailing (Stateful Error Tracking)
-Track errors per key. When a threshold is hit, the LLM reviews the full error history.
+Track errors per key. When a threshold is hit, optionally have an LLM review the full error history.
 ```python
-from errorsense import TrailingConfig
+from errorsense import LLMConfig, TrailingConfig
+# With LLM review at threshold
 sense = ErrorSense(
-    categories=["transient", "permanent", "user"],
+    labels=["transient", "permanent", "user"],
     pipeline=[...],
     trailing=TrailingConfig(
         threshold=3,
         count_labels=["transient", "permanent"],  # user errors don't count
+        reviewer_llm=LLMConfig(),                 # enables LLM review
     ),
 )
+# Without LLM review (just counting)
+trailing=TrailingConfig(threshold=3, count_labels=["transient", "permanent"])
 # In your error handler:
 result = sense.trail("service-a", signal)
 result.label         # "transient"
@@ -189,9 +194,9 @@ sense.reset("service-a")
 **How it works:**
 - Each `trail()` call classifies the signal normally through the pipeline
 - Counted labels accumulate per key toward the threshold
-- At threshold, the LLM reviews all recorded errors and gives its verdict
+- At threshold, the LLM reviews all recorded errors (if `reviewer_llm` is set)
 - If the review changes the label, the history entry is corrected and the count adjusts
-- `review=False` in TrailingConfig disables LLM review (just counting)
+- `reviewer_skill=Skill(...)` lets you override the default review instructions
 **Manual review anytime:**

{errorsense-0.1.2 → errorsense-0.2.0}/README.md RENAMED Viewed

@@ -33,7 +33,7 @@ results = sense.classify(Signal.from_http(status_code=500, body="model not found
 results[0].label  # "client" (LLM figured it out)
 ```
-The `http` preset gives you a 3-phase pipeline (rules → patterns → LLM) with 3 categories: `"client"`, `"server"`, `"undecided"`. Rulesets handle obvious cases instantly. LLM handles the ambiguous ones.
+The `http` preset gives you a 3-phase pipeline (rules → patterns → LLM) with 3 labels: `"client"`, `"server"`, `"undecided"`. Rulesets handle obvious cases instantly. LLM handles the ambiguous ones.
 Don't want LLM? Use `http_no_llm()` — rulesets only, ambiguous errors come back as `"undecided"`.
@@ -46,7 +46,7 @@ from errorsense import ErrorSense, Phase, Ruleset, Skill, LLMConfig, Signal
 # Rulesets + LLM
 sense = ErrorSense(
-    categories=["transient", "permanent", "user"],
+    labels=["transient", "permanent", "user"],
     pipeline=[
         Phase("codes", rulesets=[
             Ruleset(field="error_code", match={
@@ -68,7 +68,7 @@ sense = ErrorSense(
 # Rulesets only — no LLM needed
 sense = ErrorSense(
-    categories=["client", "server"],
+    labels=["client", "server"],
     pipeline=[
         Phase("rules", rulesets=[
             Ruleset(field="status_code", match={"4xx": "client", 502: "server"}),
@@ -79,7 +79,7 @@ sense = ErrorSense(
 # LLM only — skip rulesets entirely
 sense = ErrorSense(
-    categories=["client", "server"],
+    labels=["client", "server"],
     pipeline=[
         Phase("llm", skills=[
             Skill("my_classifier", path="./skills/my_classifier.md"),
@@ -141,20 +141,25 @@ results[0].reason  # "ECONNRESET indicates transient network failure"
 ## Trailing (Stateful Error Tracking)
-Track errors per key. When a threshold is hit, the LLM reviews the full error history.
+Track errors per key. When a threshold is hit, optionally have an LLM review the full error history.
 ```python
-from errorsense import TrailingConfig
+from errorsense import LLMConfig, TrailingConfig
+# With LLM review at threshold
 sense = ErrorSense(
-    categories=["transient", "permanent", "user"],
+    labels=["transient", "permanent", "user"],
     pipeline=[...],
     trailing=TrailingConfig(
         threshold=3,
         count_labels=["transient", "permanent"],  # user errors don't count
+        reviewer_llm=LLMConfig(),                 # enables LLM review
     ),
 )
+# Without LLM review (just counting)
+trailing=TrailingConfig(threshold=3, count_labels=["transient", "permanent"])
 # In your error handler:
 result = sense.trail("service-a", signal)
 result.label         # "transient"
@@ -168,9 +173,9 @@ sense.reset("service-a")
 **How it works:**
 - Each `trail()` call classifies the signal normally through the pipeline
 - Counted labels accumulate per key toward the threshold
-- At threshold, the LLM reviews all recorded errors and gives its verdict
+- At threshold, the LLM reviews all recorded errors (if `reviewer_llm` is set)
 - If the review changes the label, the history entry is corrected and the count adjusts
-- `review=False` in TrailingConfig disables LLM review (just counting)
+- `reviewer_skill=Skill(...)` lets you override the default review instructions
 **Manual review anytime:**

{errorsense-0.1.2 → errorsense-0.2.0}/design/ERRORSENSE.md RENAMED Viewed

@@ -104,7 +104,7 @@ Runs signals through a pipeline of phases.
 from errorsense import ErrorSense, Phase, Ruleset, Skill, LLMConfig
 sense = ErrorSense(
-    categories=["client", "server", "undecided"],
+    labels=["client", "server", "undecided"],
     pipeline=[
         Phase("rules", rulesets=[...]),
         Phase("patterns", rulesets=[...]),
@@ -118,7 +118,7 @@ sense = ErrorSense(
 ```python
 sense = ErrorSense(
-    categories=["client", "server"],
+    labels=["client", "server"],
     rulesets=[Ruleset(...)],
     skills=[Skill(...)],
     llm=LLMConfig(...),
@@ -160,13 +160,14 @@ Track errors per key with threshold-based LLM review.
 from errorsense import TrailingConfig
 sense = ErrorSense(
-    categories=["client", "server", "undecided"],
+    labels=["client", "server", "undecided"],
     pipeline=[...],
     trailing=TrailingConfig(
         threshold=3,
         count_labels=["server"],
         history_size=10,
-        review=None,  # None=auto, True=force, False=never
+        reviewer_llm=LLMConfig(),          # enables LLM review at threshold
+        reviewer_skill=Skill("custom"),     # optional, defaults to built-in reclassification.md
     ),
 )
@@ -187,9 +188,9 @@ sense.reset("service-a")
 5. `at_threshold` recalculates after any correction
 **Review behavior:**
-- `review=None` (default): auto-review if an LLM phase exists
-- `review=True`: force review (raises if no LLM phase)
-- `review=False`: never review, just count
+- `reviewer_llm=LLMConfig(...)`: LLM reviews error history at threshold
+- `reviewer_llm=None` (default): no review, just count
+- `reviewer_skill=Skill(...)`: override the default review instructions
 **Manual review:** `sense.review(key)` / `await sense.async_review(key)` — LLM reviews full history anytime.
@@ -222,7 +223,8 @@ class TrailingConfig:
     threshold: int = 3
     count_labels: list[str] | None = None
     history_size: int = 10
-    review: bool | None = None
+    reviewer_llm: LLMConfig | None = None
+    reviewer_skill: Skill | None = None
 ```
 ---
@@ -260,7 +262,7 @@ Every `classify()` call is wrapped in try/except at the phase level. Exceptions
 ### Validation (at construction)
-- Labels in rulesets must be in `categories` or `default`
+- Labels in rulesets must be in `labels` or `default`
 - Phases must have rulesets OR (skills + llm)
 - LLM phases must have an API key in LLMConfig
 - `pipeline=` and `rulesets=/skills=` cannot be mixed

{errorsense-0.1.2 → errorsense-0.2.0}/errorsense/__init__.py RENAMED Viewed

@@ -24,4 +24,4 @@ __all__ = [
     "TrailingConfig",
 ]
-__version__ = "0.1.0"
+__version__ = "0.2.0"

{errorsense-0.1.2 → errorsense-0.2.0}/errorsense/engine.py RENAMED Viewed

@@ -10,7 +10,7 @@ import time
 from collections import defaultdict, deque
 from typing import Any, Callable
-from errorsense.llm import LLMConfig
+from errorsense.llm import LLMClient, LLMConfig
 from errorsense.models import SenseResult, TrailResult, TrailingConfig
 from errorsense.phase import Phase
 from errorsense.ruleset import Ruleset
@@ -31,7 +31,7 @@ class ErrorSense:
     def __init__(
         self,
-        categories: list[str],
+        labels: list[str],
         # Explicit mode
         pipeline: list[Phase] | None = None,
         # Implicit mode
@@ -44,7 +44,7 @@ class ErrorSense:
         on_classify: Callable[[Signal, SenseResult], Any] | None = None,
         on_error: Callable[[str, Exception], Any] | None = None,
     ) -> None:
-        self.categories = set(categories)
+        self.labels = set(labels)
         self.default = default
         self._on_classify = on_classify
         self._on_error = on_error
@@ -60,26 +60,22 @@ class ErrorSense:
         self._validate_phase_names()
         self._pipeline_names = frozenset(p.name for p in self._pipeline)
-        self._validate_categories()
+        self._validate_labels()
         self._validate_llm_api_keys()
         for phase in self._pipeline:
-            phase.set_categories(list(categories))
+            phase.set_labels(list(labels))
         # Trailing state
         self._trailing = trailing
-        self._has_llm = any(p.is_llm_phase for p in self._pipeline)
-        self._reclass_skill: Skill | None = None
+        self._reviewer_client: LLMClient | None = None
+        self._reviewer_skill: Skill | None = None
         if trailing:
             self._init_trailing(trailing)
     def _init_trailing(self, config: TrailingConfig) -> None:
-        if config.review is True and not self._has_llm:
-            raise ValueError(
-                "TrailingConfig(review=True) requires an LLM phase in the pipeline."
-            )
-        self._review_enabled = (
-            config.review if config.review is not None else self._has_llm
-        )
+        if config.reviewer_llm is not None:
+            self._reviewer_client = LLMClient(config.reviewer_llm)
+            self._reviewer_skill = config.reviewer_skill
         self._threshold = config.threshold
         self._count_labels = set(config.count_labels or [])
         hs = config.history_size
@@ -106,11 +102,15 @@ class ErrorSense:
         """Close all LLM phase clients (sync)."""
         for phase in self._pipeline:
             phase.close_sync()
+        if self._reviewer_client:
+            self._reviewer_client.close_sync()
     async def async_close(self) -> None:
         """Close all LLM phase clients (async)."""
         for phase in self._pipeline:
             await phase.close_async()
+        if self._reviewer_client:
+            await self._reviewer_client.close_async()
     async def __aenter__(self) -> ErrorSense:
         return self
@@ -204,7 +204,7 @@ class ErrorSense:
             at_threshold = self._record_and_check(key, signal, result)
             review_result = (
                 self._run_review_sync(key)
-                if at_threshold and self._review_enabled else None
+                if at_threshold and self._reviewer_client else None
             )
             return self._build_trail_result(key, result, at_threshold, review_result)
@@ -224,16 +224,24 @@ class ErrorSense:
             at_threshold = self._record_and_check(key, signal, result)
             review_result = (
                 await self._run_review_async(key)
-                if at_threshold and self._review_enabled else None
+                if at_threshold and self._reviewer_client else None
             )
             return self._build_trail_result(key, result, at_threshold, review_result)
     def review(self, key: str) -> SenseResult | None:
         """Manually review full history for a key (sync). Returns LLM verdict."""
+        if not self._trailing:
+            raise RuntimeError(
+                "Trailing not configured. Pass trailing=TrailingConfig(...) to ErrorSense."
+            )
         return self._run_review_sync(key)
     async def async_review(self, key: str) -> SenseResult | None:
         """Manually review full history for a key (async). Returns LLM verdict."""
+        if not self._trailing:
+            raise RuntimeError(
+                "Trailing not configured. Pass trailing=TrailingConfig(...) to ErrorSense."
+            )
         return self._run_review_async(key)
     def _record_and_check(self, key: str, signal: Signal, result: SenseResult) -> bool:
@@ -297,37 +305,33 @@ class ErrorSense:
             self._counts[key][new_label] += 1
     def _run_review_sync(self, key: str) -> SenseResult | None:
-        llm_phase = self._find_llm_phase()
-        if not llm_phase:
+        if not self._reviewer_client:
             return None
         signal, skill = self._build_review_context(key)
         try:
-            return llm_phase.run_llm_call(signal, skill, list(self.categories))
+            return self._reviewer_client.classify_sync(
+                signal, skill, list(self.labels), include_reason=True,
+            )
         except Exception as e:
             logger.warning("LLM review failed: %s", e)
             return None
     async def _run_review_async(self, key: str) -> SenseResult | None:
-        llm_phase = self._find_llm_phase()
-        if not llm_phase:
+        if not self._reviewer_client:
             return None
         signal, skill = self._build_review_context(key)
         try:
-            return await llm_phase.async_run_llm_call(signal, skill, list(self.categories))
+            return await self._reviewer_client.classify_async(
+                signal, skill, list(self.labels), include_reason=True,
+            )
         except Exception as e:
             logger.warning("LLM review failed: %s", e)
             return None
-    def _find_llm_phase(self) -> Phase | None:
-        for phase in self._pipeline:
-            if phase.is_llm_phase:
-                return phase
-        return None
-    def _get_reclass_skill(self) -> Skill:
-        if self._reclass_skill is None:
-            self._reclass_skill = Skill("reclassification")
-        return self._reclass_skill
+    def _get_reviewer_skill(self) -> Skill:
+        if self._reviewer_skill is None:
+            self._reviewer_skill = Skill("reclassification")
+        return self._reviewer_skill
     def _build_review_context(self, key: str) -> tuple[Signal, Skill]:
         history = list(self._history[key])
@@ -341,7 +345,7 @@ class ErrorSense:
             "key": key,
             "history_summary": summary,
         })
-        return signal, self._get_reclass_skill()
+        return signal, self._get_reviewer_skill()
     def reset(self, key: str) -> None:
         """Clear trailing history and counts for a key."""
@@ -395,15 +399,15 @@ class ErrorSense:
                 raise ValueError(f"Duplicate phase name: {phase.name!r}")
             seen.add(phase.name)
-    def _validate_categories(self) -> None:
-        all_cats = self.categories | {self.default}
+    def _validate_labels(self) -> None:
+        all_labels = self.labels | {self.default}
         for phase in self._pipeline:
             for ruleset in phase.rulesets:
-                bad = ruleset.referenced_labels() - all_cats
+                bad = ruleset.referenced_labels() - all_labels
                 if bad:
                     raise ValueError(
                         f"Ruleset on field {getattr(ruleset, 'field', '?')!r} maps to "
-                        f"label {bad.pop()!r} not in {sorted(self.categories)}"
+                        f"label {bad.pop()!r} not in {sorted(self.labels)}"
                     )
     def _validate_llm_api_keys(self) -> None:

{errorsense-0.1.2 → errorsense-0.2.0}/errorsense/llm.py RENAMED Viewed

@@ -22,7 +22,7 @@ DEFAULT_BASE_URL = "https://relay.opengpu.network/v2/openai/v1"
 DEFAULT_MODEL = "Qwen/Qwen3.5-397B-A17B-FP8"
 DEFAULT_PROMPT_TEMPLATE = (
     "{instructions}\n\n"
-    "Classify the following error signal into exactly one of these categories: {categories}\n\n"
+    "Classify the following error signal into exactly one of these labels: {labels}\n\n"
     "Signal data:\n{signal}\n\n"
     'Reply ONLY with JSON: {{"label": "...", "confidence": 0.0, "reason": "..."}}'
 )
@@ -54,7 +54,7 @@ class LLMConfig:
             object.__setattr__(self, "base_url", os.environ.get("ERRORSENSE_LLM_URL", DEFAULT_BASE_URL))
-def _build_prompt(signal: Signal, skill: Skill, categories: list[str], config: LLMConfig) -> str:
+def _build_prompt(signal: Signal, skill: Skill, labels: list[str], config: LLMConfig) -> str:
     signal_text = json.dumps(signal.to_dict(), default=str)
     if len(signal_text) > config.max_signal_size:
         signal_text = signal_text[: config.max_signal_size] + "..."
@@ -62,7 +62,7 @@ def _build_prompt(signal: Signal, skill: Skill, categories: list[str], config: L
     template = skill.prompt_template or DEFAULT_PROMPT_TEMPLATE
     return template.format(
         instructions=skill.instructions,
-        categories=", ".join(categories) if categories else "unknown",
+        labels=", ".join(labels) if labels else "unknown",
         signal=signal_text,
     )
@@ -84,7 +84,7 @@ def _build_headers(config: LLMConfig) -> dict:
 def _parse_response(
     data: dict,
-    categories: list[str],
+    labels: list[str],
     skill_name: str,
     include_reason: bool = False,
 ) -> SenseResult | None:
@@ -96,11 +96,11 @@ def _parse_response(
             content = content.rsplit("```", 1)[0]
         parsed = json.loads(content.strip())
-        label = parsed.get("label", "") or parsed.get("category", "")
+        label = parsed.get("label", "")
         confidence = min(1.0, max(0.0, float(parsed.get("confidence", 0.7))))
         reason = parsed.get("reason") if include_reason else None
-        if categories and label not in categories:
+        if labels and label not in labels:
             logger.warning(
                 "Skill %r: LLM returned unknown label %r", skill_name, label
             )
@@ -154,19 +154,18 @@ class LLMClient:
         self,
         signal: Signal,
         skill: Skill,
-        categories: list[str],
+        labels: list[str],
         include_reason: bool = False,
     ) -> SenseResult | None:
-        config = skill.llm if skill.llm is not None else self._config
-        prompt = _build_prompt(signal, skill, categories, config)
-        url = f"{config.base_url.rstrip('/')}/chat/completions"
+        prompt = _build_prompt(signal, skill, labels, self._config)
+        url = f"{self._config.base_url.rstrip('/')}/chat/completions"
         try:
             client = self._get_sync_client()
             resp = client.post(
                 url,
-                headers=_build_headers(config),
-                json=_build_request_body(skill, prompt, config),
+                headers=_build_headers(self._config),
+                json=_build_request_body(skill, prompt, self._config),
             )
             resp.raise_for_status()
             data = resp.json()
@@ -174,25 +173,24 @@ class LLMClient:
             logger.warning("LLM call failed for skill %r: %s", skill.name, e)
             return None
-        return _parse_response(data, categories, skill.name, include_reason)
+        return _parse_response(data, labels, skill.name, include_reason)
     async def classify_async(
         self,
         signal: Signal,
         skill: Skill,
-        categories: list[str],
+        labels: list[str],
         include_reason: bool = False,
     ) -> SenseResult | None:
-        config = skill.llm if skill.llm is not None else self._config
-        prompt = _build_prompt(signal, skill, categories, config)
-        url = f"{config.base_url.rstrip('/')}/chat/completions"
+        prompt = _build_prompt(signal, skill, labels, self._config)
+        url = f"{self._config.base_url.rstrip('/')}/chat/completions"
         try:
             client = await self._get_async_client()
             resp = await client.post(
                 url,
-                headers=_build_headers(config),
-                json=_build_request_body(skill, prompt, config),
+                headers=_build_headers(self._config),
+                json=_build_request_body(skill, prompt, self._config),
             )
             resp.raise_for_status()
             data = resp.json()
@@ -200,7 +198,7 @@ class LLMClient:
             logger.warning("LLM call failed for skill %r: %s", skill.name, e)
             return None
-        return _parse_response(data, categories, skill.name, include_reason)
+        return _parse_response(data, labels, skill.name, include_reason)
     def close_sync(self) -> None:
         if self._sync_client is not None:

{errorsense-0.1.2 → errorsense-0.2.0}/errorsense/models.py RENAMED Viewed

@@ -1,7 +1,11 @@
 from __future__ import annotations
 from dataclasses import dataclass
-from typing import Any
+from typing import TYPE_CHECKING, Any
+if TYPE_CHECKING:
+    from errorsense.llm import LLMConfig
+    from errorsense.skill import Skill
 @dataclass(frozen=True)
@@ -40,13 +44,12 @@ class TrailingConfig:
         threshold: Number of counted errors before review triggers.
         count_labels: Only these labels count toward threshold.
         history_size: Max errors kept per key (ring buffer).
-        review: Whether to LLM-review history when threshold hit.
-            None = auto (True if LLM phase exists, False if not).
-            True = force (raises if no LLM phase).
-            False = never.
+        reviewer_llm: LLM config for review. Set to enable review, None to disable.
+        reviewer_skill: Custom review skill. Defaults to built-in reclassification.
     """
     threshold: int = 3
     count_labels: list[str] | None = None
     history_size: int = 10
-    review: bool | None = None
+    reviewer_llm: LLMConfig | None = None
+    reviewer_skill: Skill | None = None

{errorsense-0.1.2 → errorsense-0.2.0}/errorsense/phase.py RENAMED Viewed

@@ -61,14 +61,14 @@ class Phase:
         self.skills = skills or []
         self.llm = llm
         self.is_llm_phase = has_skills
-        self._categories: list[str] = []
+        self._labels: list[str] = []
         self._llm_client: LLMClient | None = None
         if self.is_llm_phase and llm:
             self._llm_client = LLMClient(llm)
-    def set_categories(self, categories: list[str]) -> None:
-        self._categories = list(categories)
+    def set_labels(self, labels: list[str]) -> None:
+        self._labels = list(labels)
     def classify(self, signal: Signal, explain: bool = False) -> SenseResult | None:
         """Sync classification. Full pipeline — rulesets or LLM."""
@@ -136,38 +136,10 @@ class Phase:
         return best
     def _run_one_skill_sync(self, signal: Signal, skill: Skill, explain: bool) -> SenseResult | None:
-        if skill.llm is not None:
-            client = LLMClient(skill.llm)
-            try:
-                return client.classify_sync(signal, skill, self._categories, include_reason=explain)
-            finally:
-                client.close_sync()
-        return self._llm_client.classify_sync(signal, skill, self._categories, include_reason=explain)
+        return self._llm_client.classify_sync(signal, skill, self._labels, include_reason=explain)
     async def _run_one_skill_async(self, signal: Signal, skill: Skill, explain: bool) -> SenseResult | None:
-        if skill.llm is not None:
-            client = LLMClient(skill.llm)
-            try:
-                return await client.classify_async(signal, skill, self._categories, include_reason=explain)
-            finally:
-                await client.close_async()
-        return await self._llm_client.classify_async(signal, skill, self._categories, include_reason=explain)
-    def run_llm_call(
-        self, signal: Signal, skill: Skill, categories: list[str],
-    ) -> SenseResult | None:
-        """Run a single sync LLM call. Public API for Tracker reclassification."""
-        if not self._llm_client:
-            return None
-        return self._llm_client.classify_sync(signal, skill, categories, include_reason=True)
-    async def async_run_llm_call(
-        self, signal: Signal, skill: Skill, categories: list[str],
-    ) -> SenseResult | None:
-        """Run a single async LLM call. Public API for Tracker reclassification."""
-        if not self._llm_client:
-            return None
-        return await self._llm_client.classify_async(signal, skill, categories, include_reason=True)
+        return await self._llm_client.classify_async(signal, skill, self._labels, include_reason=explain)
     def _stamp_phase(self, result: SenseResult, skill_name: str) -> SenseResult:
         updates: dict[str, Any] = {}

{errorsense-0.1.2 → errorsense-0.2.0}/errorsense/presets/http_gateway.py RENAMED Viewed

@@ -48,7 +48,7 @@ def http(
     phases.append(Phase("llm", skills=[Skill("http_classifier")], llm=llm))
     return ErrorSense(
-        categories=["client", "server", "undecided"],
+        labels=["client", "server", "undecided"],
         pipeline=phases,
         default="undecided",
     )
@@ -66,7 +66,7 @@ def http_no_llm(
         extra_rulesets: Additional rulesets appended to the patterns phase.
     """
     return ErrorSense(
-        categories=["client", "server", "undecided"],
+        labels=["client", "server", "undecided"],
         pipeline=_ruleset_phases(extra_rulesets),
         default="undecided",
     )

{errorsense-0.1.2 → errorsense-0.2.0}/errorsense/skill.py RENAMED Viewed

@@ -3,10 +3,6 @@
 from __future__ import annotations
 from pathlib import Path
-from typing import TYPE_CHECKING
-if TYPE_CHECKING:
-    from errorsense.llm import LLMConfig
 __all__ = ["Skill"]
@@ -29,7 +25,6 @@ class Skill:
         instructions: Inline instructions string. Overrides file loading.
         prompt_template: Override the default LLM prompt template.
         temperature: LLM temperature (default: 0.0 for determinism).
-        llm: Per-skill LLMConfig override.
     """
     def __init__(
@@ -39,7 +34,6 @@ class Skill:
         instructions: str | None = None,
         prompt_template: str | None = None,
         temperature: float = 0.0,
-        llm: LLMConfig | None = None,
     ) -> None:
         if not name:
             raise ValueError("Skill requires a non-empty 'name'")
@@ -47,7 +41,6 @@ class Skill:
         self.name = name
         self.prompt_template = prompt_template
         self.temperature = temperature
-        self.llm = llm
         if instructions:
             self.instructions = instructions

errorsense-0.2.0/errorsense/skills/reclassification.md ADDED Viewed

@@ -0,0 +1,21 @@
+You are reviewing a history of classified errors for a single key (e.g., a service or provider).
+Each entry in the history has a label that was assigned by earlier classification. Your job is to review the full history and decide: is the most recent label correct, or should it be changed?
+## How to decide
+Look at the pattern across all entries:
+- If the errors are consistent (all the same type), the label is probably correct
+- If earlier errors were classified differently and the pattern suggests the latest one was misclassified, pick the label that better fits the overall pattern
+- If the history shows a mix of genuine errors, keep the most recent label as-is
+## Your output
+Pick one of the allowed labels as your label. This must be one of the labels provided in the prompt — do not invent new ones.
+Set confidence based on how clear the pattern is:
+- 0.9+ if the history strongly supports your label
+- 0.7-0.9 if the evidence is moderate
+- Below 0.7 if the history is genuinely mixed
+In your reason, briefly explain what pattern you saw and why you kept or changed the label.

{errorsense-0.1.2 → errorsense-0.2.0}/pyproject.toml RENAMED Viewed

@@ -4,7 +4,7 @@ build-backend = "hatchling.build"
 [project]
 name = "errorsense"
-version = "0.1.2"
+version = "0.2.0"
 description = "Error classification engine. Rules for the obvious, AI for the ambiguous."
 readme = "README.md"
 license = "MIT"

{errorsense-0.1.2 → errorsense-0.2.0}/tests/test_engine.py RENAMED Viewed

@@ -25,7 +25,7 @@ class BrokenRuleset(Ruleset):
 class TestExplicitMode:
     def test_first_match_wins(self):
         engine = ErrorSense(
-            categories=["a", "b"],
+            labels=["a", "b"],
             pipeline=[Phase("p1", rulesets=[AlwaysMatchRuleset("a"), AlwaysMatchRuleset("b")])],
         )
         results = engine.classify(Signal({"x": 1}))
@@ -34,7 +34,7 @@ class TestExplicitMode:
     def test_skip_none_results(self):
         engine = ErrorSense(
-            categories=["a"],
+            labels=["a"],
             pipeline=[Phase("p1", rulesets=[NeverMatchRuleset(), AlwaysMatchRuleset("a")])],
         )
         results = engine.classify(Signal({"x": 1}))
@@ -42,7 +42,7 @@ class TestExplicitMode:
     def test_default_when_no_match(self):
         engine = ErrorSense(
-            categories=["a"],
+            labels=["a"],
             pipeline=[Phase("p1", rulesets=[NeverMatchRuleset()])],
             default="fallback",
         )
@@ -52,7 +52,7 @@ class TestExplicitMode:
     def test_skill_name_auto_filled(self):
         engine = ErrorSense(
-            categories=["a"],
+            labels=["a"],
             pipeline=[Phase("p1", rulesets=[AlwaysMatchRuleset("a")])],
         )
         results = engine.classify(Signal({"x": 1}))
@@ -61,7 +61,7 @@ class TestExplicitMode:
     def test_broken_ruleset_skipped(self):
         engine = ErrorSense(
-            categories=["a"],
+            labels=["a"],
             pipeline=[Phase("p1", rulesets=[BrokenRuleset(), AlwaysMatchRuleset("a")])],
         )
         results = engine.classify(Signal({"x": 1}))
@@ -69,7 +69,7 @@ class TestExplicitMode:
     def test_all_broken_falls_to_default(self):
         engine = ErrorSense(
-            categories=["a"],
+            labels=["a"],
             pipeline=[Phase("p1", rulesets=[BrokenRuleset()])],
             default="oops",
         )
@@ -78,7 +78,7 @@ class TestExplicitMode:
     def test_multi_phase_first_catch(self):
         engine = ErrorSense(
-            categories=["a", "b"],
+            labels=["a", "b"],
             pipeline=[
                 Phase("first", rulesets=[NeverMatchRuleset()]),
                 Phase("second", rulesets=[AlwaysMatchRuleset("b")]),
@@ -90,7 +90,7 @@ class TestExplicitMode:
     def test_skip_phase(self):
         engine = ErrorSense(
-            categories=["a", "b"],
+            labels=["a", "b"],
             pipeline=[
                 Phase("skip_me", rulesets=[AlwaysMatchRuleset("a")]),
                 Phase("use_me", rulesets=[AlwaysMatchRuleset("b")]),
@@ -101,7 +101,7 @@ class TestExplicitMode:
     def test_skip_invalid_phase_raises(self):
         engine = ErrorSense(
-            categories=["a"],
+            labels=["a"],
             pipeline=[Phase("p1", rulesets=[AlwaysMatchRuleset("a")])],
         )
         with pytest.raises(ValueError, match="Unknown phase"):
@@ -110,7 +110,7 @@ class TestExplicitMode:
     def test_duplicate_phase_names_rejected(self):
         with pytest.raises(ValueError, match="Duplicate"):
             ErrorSense(
-                categories=["a"],
+                labels=["a"],
                 pipeline=[
                     Phase("p1", rulesets=[AlwaysMatchRuleset("a")]),
                     Phase("p1", rulesets=[AlwaysMatchRuleset("a")]),
@@ -121,7 +121,7 @@ class TestExplicitMode:
 class TestShortCircuit:
     def test_short_circuit_true_returns_one(self):
         engine = ErrorSense(
-            categories=["a", "b"],
+            labels=["a", "b"],
             pipeline=[
                 Phase("first", rulesets=[AlwaysMatchRuleset("a")]),
                 Phase("second", rulesets=[AlwaysMatchRuleset("b")]),
@@ -133,7 +133,7 @@ class TestShortCircuit:
     def test_short_circuit_false_returns_all_matches(self):
         engine = ErrorSense(
-            categories=["a", "b"],
+            labels=["a", "b"],
             pipeline=[
                 Phase("first", rulesets=[AlwaysMatchRuleset("a", confidence=0.8)]),
                 Phase("second", rulesets=[AlwaysMatchRuleset("b", confidence=0.9)]),
@@ -146,7 +146,7 @@ class TestShortCircuit:
     def test_short_circuit_false_skips_unmatched(self):
         engine = ErrorSense(
-            categories=["a", "b"],
+            labels=["a", "b"],
             pipeline=[
                 Phase("first", rulesets=[AlwaysMatchRuleset("a")]),
                 Phase("second", rulesets=[NeverMatchRuleset()]),
@@ -160,7 +160,7 @@ class TestShortCircuit:
     def test_short_circuit_false_no_matches_default(self):
         engine = ErrorSense(
-            categories=["a"],
+            labels=["a"],
             pipeline=[Phase("p1", rulesets=[NeverMatchRuleset()])],
             default="none",
         )
@@ -172,7 +172,7 @@ class TestShortCircuit:
 class TestImplicitMode:
     def test_rulesets_only(self):
         engine = ErrorSense(
-            categories=["a"],
+            labels=["a"],
             rulesets=[AlwaysMatchRuleset("a")],
         )
         results = engine.classify(Signal({"x": 1}))
@@ -181,7 +181,7 @@ class TestImplicitMode:
     def test_default_when_no_match(self):
         engine = ErrorSense(
-            categories=["a"],
+            labels=["a"],
             rulesets=[NeverMatchRuleset()],
             default="none",
         )
@@ -191,21 +191,21 @@ class TestImplicitMode:
     def test_cannot_mix_modes(self):
         with pytest.raises(ValueError, match="Cannot mix"):
             ErrorSense(
-                categories=["a"],
+                labels=["a"],
                 pipeline=[Phase("p1", rulesets=[AlwaysMatchRuleset("a")])],
                 rulesets=[AlwaysMatchRuleset("a")],
             )
     def test_must_provide_something(self):
         with pytest.raises(ValueError, match="Must provide"):
-            ErrorSense(categories=["a"])
+            ErrorSense(labels=["a"])
 class TestCallbacks:
     def test_on_classify_callback(self):
         collected = []
         engine = ErrorSense(
-            categories=["a"],
+            labels=["a"],
             pipeline=[Phase("p1", rulesets=[AlwaysMatchRuleset("a")])],
             on_classify=lambda sig, res: collected.append(res),
         )
@@ -215,7 +215,7 @@ class TestCallbacks:
     def test_on_error_callback(self):
         engine = ErrorSense(
-            categories=["a"],
+            labels=["a"],
             pipeline=[
                 Phase("broken", rulesets=[BrokenRuleset()]),
                 Phase("ok", rulesets=[AlwaysMatchRuleset("a")]),
@@ -230,7 +230,7 @@ class TestLabelValidation:
     def test_invalid_label_in_ruleset(self):
         with pytest.raises(ValueError, match="not in"):
             ErrorSense(
-                categories=["a", "b"],
+                labels=["a", "b"],
                 pipeline=[Phase("p1", rulesets=[
                     Ruleset(field="x", match={1: "c"}),
                 ])],
@@ -241,7 +241,7 @@ class TestAsyncClassify:
     @pytest.mark.asyncio
     async def test_async_classify_first_catch(self):
         engine = ErrorSense(
-            categories=["a"],
+            labels=["a"],
             pipeline=[Phase("p1", rulesets=[NeverMatchRuleset(), AlwaysMatchRuleset("a")])],
         )
         results = await engine.async_classify(Signal({"x": 1}))
@@ -250,7 +250,7 @@ class TestAsyncClassify:
     @pytest.mark.asyncio
     async def test_async_classify_default(self):
         engine = ErrorSense(
-            categories=["a"],
+            labels=["a"],
             pipeline=[Phase("p1", rulesets=[NeverMatchRuleset()])],
             default="fallback",
         )
@@ -260,7 +260,7 @@ class TestAsyncClassify:
     @pytest.mark.asyncio
     async def test_async_classify_broken_skipped(self):
         engine = ErrorSense(
-            categories=["a"],
+            labels=["a"],
             pipeline=[Phase("p1", rulesets=[BrokenRuleset(), AlwaysMatchRuleset("a")])],
         )
         results = await engine.async_classify(Signal({"x": 1}))
@@ -269,7 +269,7 @@ class TestAsyncClassify:
     @pytest.mark.asyncio
     async def test_async_classify_all_phases(self):
         engine = ErrorSense(
-            categories=["a", "b"],
+            labels=["a", "b"],
             pipeline=[
                 Phase("first", rulesets=[AlwaysMatchRuleset("a", confidence=0.5)]),
                 Phase("second", rulesets=[AlwaysMatchRuleset("b", confidence=0.9)]),

{errorsense-0.1.2 → errorsense-0.2.0}/tests/test_tracker.py RENAMED Viewed

@@ -8,7 +8,7 @@ from errorsense import ErrorSense, Phase, Ruleset, Signal, TrailingConfig
 def make_sense(**kwargs):
     """Helper to build an ErrorSense with trailing enabled."""
     defaults = {
-        "categories": ["infra", "provider", "user"],
+        "labels": ["infra", "provider", "user"],
         "pipeline": [
             Phase("rules", rulesets=[
                 Ruleset(field="status_code", match={400: "user", 401: "user", 502: "infra", 503: "infra"}),
@@ -111,21 +111,16 @@ class TestTrail:
     def test_trail_without_config_raises(self):
         sense = ErrorSense(
-            categories=["a"],
+            labels=["a"],
             pipeline=[Phase("p1", rulesets=[Ruleset(field="x", match={1: "a"})])],
         )
         with pytest.raises(RuntimeError, match="Trailing not configured"):
             sense.trail("key", Signal({"x": 1}))
-    def test_review_true_without_llm_raises(self):
-        with pytest.raises(ValueError, match="requires an LLM phase"):
-            make_sense(trailing=TrailingConfig(
-                threshold=3, count_labels=["infra"], review=True,
-            ))
-    def test_review_false_no_review(self):
+    def test_no_reviewer_llm_no_review(self):
+        """Without reviewer_llm, trailing just counts — no LLM review."""
         sense = make_sense(trailing=TrailingConfig(
-            threshold=2, count_labels=["infra", "provider"], review=False,
+            threshold=2, count_labels=["infra", "provider"],
         ))
         sense.trail("p1", Signal.from_http(status_code=502))
         result = sense.trail("p1", Signal.from_http(status_code=502))

errorsense-0.1.2/errorsense/skills/reclassification.md DELETED Viewed

@@ -1,9 +0,0 @@
-Review this full error history and provide analysis.
-Are these errors consistent or mixed? Is there a clear pattern (e.g., all infrastructure
-failures, all user errors, or a mix)?
-Should the system trip/alert based on this history, or are the counted errors
-misleading (e.g., user errors miscounted as server errors)?
-Provide your overall assessment.