errorsense 0.2.0__tar.gz → 0.2.1__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {errorsense-0.2.0 → errorsense-0.2.1}/PKG-INFO +15 -4
- {errorsense-0.2.0 → errorsense-0.2.1}/README.md +14 -3
- {errorsense-0.2.0 → errorsense-0.2.1}/design/ERRORSENSE.md +5 -3
- {errorsense-0.2.0 → errorsense-0.2.1}/errorsense/__init__.py +1 -1
- {errorsense-0.2.0 → errorsense-0.2.1}/errorsense/engine.py +10 -7
- {errorsense-0.2.0 → errorsense-0.2.1}/errorsense/llm.py +13 -13
- {errorsense-0.2.0 → errorsense-0.2.1}/errorsense/models.py +6 -2
- {errorsense-0.2.0 → errorsense-0.2.1}/errorsense/phase.py +5 -0
- {errorsense-0.2.0 → errorsense-0.2.1}/errorsense/presets/http_gateway.py +3 -3
- {errorsense-0.2.0 → errorsense-0.2.1}/errorsense/ruleset.py +7 -12
- {errorsense-0.2.0 → errorsense-0.2.1}/errorsense/signal.py +5 -1
- {errorsense-0.2.0 → errorsense-0.2.1}/errorsense/skill.py +4 -4
- {errorsense-0.2.0 → errorsense-0.2.1}/pyproject.toml +1 -1
- errorsense-0.2.1/tests/test_llm.py +126 -0
- {errorsense-0.2.0 → errorsense-0.2.1}/tests/test_ruleset.py +16 -2
- {errorsense-0.2.0 → errorsense-0.2.1}/.gitignore +0 -0
- {errorsense-0.2.0 → errorsense-0.2.1}/LICENSE +0 -0
- {errorsense-0.2.0 → errorsense-0.2.1}/errorsense/presets/__init__.py +0 -0
- {errorsense-0.2.0 → errorsense-0.2.1}/errorsense/py.typed +0 -0
- {errorsense-0.2.0 → errorsense-0.2.1}/errorsense/skills/http_classifier.md +0 -0
- /errorsense-0.2.0/errorsense/skills/reclassification.md → /errorsense-0.2.1/errorsense/skills/reviewer.md +0 -0
- {errorsense-0.2.0 → errorsense-0.2.1}/tests/test_engine.py +0 -0
- {errorsense-0.2.0 → errorsense-0.2.1}/tests/test_llm_config.py +0 -0
- {errorsense-0.2.0 → errorsense-0.2.1}/tests/test_signal.py +0 -0
- {errorsense-0.2.0 → errorsense-0.2.1}/tests/test_tracker.py +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: errorsense
|
|
3
|
-
Version: 0.2.
|
|
3
|
+
Version: 0.2.1
|
|
4
4
|
Summary: Error classification engine. Rules for the obvious, AI for the ambiguous.
|
|
5
5
|
Project-URL: Homepage, https://github.com/OpenGPU-Network/errorsense
|
|
6
6
|
Project-URL: Documentation, https://github.com/OpenGPU-Network/errorsense#readme
|
|
@@ -114,13 +114,13 @@ Phases run in order. First match wins. Rulesets are instant and free. LLM is the
|
|
|
114
114
|
|
|
115
115
|
## Rulesets
|
|
116
116
|
|
|
117
|
-
Each ruleset does one thing — `match=` for field matching or `patterns=` for regex:
|
|
117
|
+
Each ruleset does one thing — `match=` for exact field matching or `patterns=` for regex:
|
|
118
118
|
|
|
119
119
|
```python
|
|
120
120
|
Ruleset(field="status_code", match={400: "client", 502: "server"}) # exact match
|
|
121
121
|
Ruleset(field="status_code", match={"4xx": "client", 503: "server"}) # range match
|
|
122
|
-
Ruleset(field="headers.content-type", match={"text/html": "server"}) # header match
|
|
123
122
|
Ruleset(field="body.error.type", match={"validation_error": "client"}) # JSON dot-path
|
|
123
|
+
Ruleset(field="headers.content-type", patterns=[("server", [r"^text/html"])]) # regex
|
|
124
124
|
Ruleset(field="body", patterns=[("server", [r"OOM"]), ("client", [r"invalid"])]) # regex
|
|
125
125
|
```
|
|
126
126
|
|
|
@@ -136,7 +136,7 @@ class VendorBugRuleset(Ruleset):
|
|
|
136
136
|
|
|
137
137
|
## Skills
|
|
138
138
|
|
|
139
|
-
Skills are LLM instructions stored as `.md` files. Each skill teaches the LLM how to classify errors in a specific domain.
|
|
139
|
+
Skills are LLM instructions stored as `.md` files. Each skill teaches the LLM how to classify errors in a specific domain. Each skill triggers a separate LLM call — highest confidence result wins.
|
|
140
140
|
|
|
141
141
|
```python
|
|
142
142
|
# Loads from errorsense/skills/http_classifier.md (built-in)
|
|
@@ -146,6 +146,17 @@ Skill("http_classifier")
|
|
|
146
146
|
Skill("my_classifier", path="./skills/my_classifier.md")
|
|
147
147
|
```
|
|
148
148
|
|
|
149
|
+
**Multiple skills in one phase:** Use this when you want multiple domain-specific opinions on the same error.
|
|
150
|
+
|
|
151
|
+
```python
|
|
152
|
+
Phase("llm", skills=[
|
|
153
|
+
Skill("http_classifier"), # knows HTTP error patterns
|
|
154
|
+
Skill("db_classifier"), # knows database error patterns
|
|
155
|
+
], llm=LLMConfig(...))
|
|
156
|
+
```
|
|
157
|
+
|
|
158
|
+
In sync (`classify`), skills run sequentially. In async (`async_classify`), skills run concurrently.
|
|
159
|
+
|
|
149
160
|
## All Phases Mode
|
|
150
161
|
|
|
151
162
|
```python
|
|
@@ -93,13 +93,13 @@ Phases run in order. First match wins. Rulesets are instant and free. LLM is the
|
|
|
93
93
|
|
|
94
94
|
## Rulesets
|
|
95
95
|
|
|
96
|
-
Each ruleset does one thing — `match=` for field matching or `patterns=` for regex:
|
|
96
|
+
Each ruleset does one thing — `match=` for exact field matching or `patterns=` for regex:
|
|
97
97
|
|
|
98
98
|
```python
|
|
99
99
|
Ruleset(field="status_code", match={400: "client", 502: "server"}) # exact match
|
|
100
100
|
Ruleset(field="status_code", match={"4xx": "client", 503: "server"}) # range match
|
|
101
|
-
Ruleset(field="headers.content-type", match={"text/html": "server"}) # header match
|
|
102
101
|
Ruleset(field="body.error.type", match={"validation_error": "client"}) # JSON dot-path
|
|
102
|
+
Ruleset(field="headers.content-type", patterns=[("server", [r"^text/html"])]) # regex
|
|
103
103
|
Ruleset(field="body", patterns=[("server", [r"OOM"]), ("client", [r"invalid"])]) # regex
|
|
104
104
|
```
|
|
105
105
|
|
|
@@ -115,7 +115,7 @@ class VendorBugRuleset(Ruleset):
|
|
|
115
115
|
|
|
116
116
|
## Skills
|
|
117
117
|
|
|
118
|
-
Skills are LLM instructions stored as `.md` files. Each skill teaches the LLM how to classify errors in a specific domain.
|
|
118
|
+
Skills are LLM instructions stored as `.md` files. Each skill teaches the LLM how to classify errors in a specific domain. Each skill triggers a separate LLM call — highest confidence result wins.
|
|
119
119
|
|
|
120
120
|
```python
|
|
121
121
|
# Loads from errorsense/skills/http_classifier.md (built-in)
|
|
@@ -125,6 +125,17 @@ Skill("http_classifier")
|
|
|
125
125
|
Skill("my_classifier", path="./skills/my_classifier.md")
|
|
126
126
|
```
|
|
127
127
|
|
|
128
|
+
**Multiple skills in one phase:** Use this when you want multiple domain-specific opinions on the same error.
|
|
129
|
+
|
|
130
|
+
```python
|
|
131
|
+
Phase("llm", skills=[
|
|
132
|
+
Skill("http_classifier"), # knows HTTP error patterns
|
|
133
|
+
Skill("db_classifier"), # knows database error patterns
|
|
134
|
+
], llm=LLMConfig(...))
|
|
135
|
+
```
|
|
136
|
+
|
|
137
|
+
In sync (`classify`), skills run sequentially. In async (`async_classify`), skills run concurrently.
|
|
138
|
+
|
|
128
139
|
## All Phases Mode
|
|
129
140
|
|
|
130
141
|
```python
|
|
@@ -40,13 +40,15 @@ from errorsense import Ruleset
|
|
|
40
40
|
|
|
41
41
|
Ruleset(field="status_code", match={400: "client", 502: "server"})
|
|
42
42
|
Ruleset(field="status_code", match={"4xx": "client", 503: "server"})
|
|
43
|
-
Ruleset(field="headers.content-type", match={"text/html": "server"})
|
|
44
43
|
Ruleset(field="body.error.type", match={"validation_error": "client"})
|
|
44
|
+
Ruleset(field="headers.content-type", patterns=[("server", [r"^text/html"])])
|
|
45
45
|
Ruleset(field="body", patterns=[("server", [r"OOM"]), ("client", [r"invalid"])])
|
|
46
46
|
```
|
|
47
47
|
|
|
48
48
|
**Field resolution:** Plain field → `signal[field]`. `headers.*` → header lookup. `body.*` → JSON parse + dot-path. `body` (no dots) → raw string.
|
|
49
49
|
|
|
50
|
+
**Match behavior:** `match=` does exact equality (`==`). For int fields, range keys like `"4xx"` match the hundred-range. For substring/prefix matching, use `patterns=` with regex.
|
|
51
|
+
|
|
50
52
|
**Confidence:** Match = 1.0. Pattern = 0.9. Custom subclass sets its own.
|
|
51
53
|
|
|
52
54
|
**Custom logic:** Subclass and override `classify()`:
|
|
@@ -167,7 +169,7 @@ sense = ErrorSense(
|
|
|
167
169
|
count_labels=["server"],
|
|
168
170
|
history_size=10,
|
|
169
171
|
reviewer_llm=LLMConfig(), # enables LLM review at threshold
|
|
170
|
-
reviewer_skill=Skill("custom"), # optional, defaults to built-in
|
|
172
|
+
reviewer_skill=Skill("custom"), # optional, defaults to built-in reviewer.md
|
|
171
173
|
),
|
|
172
174
|
)
|
|
173
175
|
|
|
@@ -305,7 +307,7 @@ errorsense/
|
|
|
305
307
|
│ └── http_gateway.py # http(), http_no_llm()
|
|
306
308
|
└── skills/
|
|
307
309
|
├── http_classifier.md # HTTP error classification instructions
|
|
308
|
-
└──
|
|
310
|
+
└── reviewer.md # Trailing review instructions
|
|
309
311
|
```
|
|
310
312
|
|
|
311
313
|
## Dependencies
|
|
@@ -44,7 +44,7 @@ class ErrorSense:
|
|
|
44
44
|
on_classify: Callable[[Signal, SenseResult], Any] | None = None,
|
|
45
45
|
on_error: Callable[[str, Exception], Any] | None = None,
|
|
46
46
|
) -> None:
|
|
47
|
-
self.labels =
|
|
47
|
+
self.labels = tuple(labels)
|
|
48
48
|
self.default = default
|
|
49
49
|
self._on_classify = on_classify
|
|
50
50
|
self._on_error = on_error
|
|
@@ -63,7 +63,7 @@ class ErrorSense:
|
|
|
63
63
|
self._validate_labels()
|
|
64
64
|
self._validate_llm_api_keys()
|
|
65
65
|
for phase in self._pipeline:
|
|
66
|
-
phase.set_labels(
|
|
66
|
+
phase.set_labels(self.labels)
|
|
67
67
|
|
|
68
68
|
# Trailing state
|
|
69
69
|
self._trailing = trailing
|
|
@@ -99,7 +99,10 @@ class ErrorSense:
|
|
|
99
99
|
return None
|
|
100
100
|
|
|
101
101
|
def close(self) -> None:
|
|
102
|
-
"""Close all LLM phase clients (sync).
|
|
102
|
+
"""Close all LLM phase clients (sync).
|
|
103
|
+
|
|
104
|
+
Not thread-safe with in-flight classify/trail calls.
|
|
105
|
+
"""
|
|
103
106
|
for phase in self._pipeline:
|
|
104
107
|
phase.close_sync()
|
|
105
108
|
if self._reviewer_client:
|
|
@@ -310,7 +313,7 @@ class ErrorSense:
|
|
|
310
313
|
signal, skill = self._build_review_context(key)
|
|
311
314
|
try:
|
|
312
315
|
return self._reviewer_client.classify_sync(
|
|
313
|
-
signal, skill,
|
|
316
|
+
signal, skill, self.labels, include_reason=True,
|
|
314
317
|
)
|
|
315
318
|
except Exception as e:
|
|
316
319
|
logger.warning("LLM review failed: %s", e)
|
|
@@ -322,7 +325,7 @@ class ErrorSense:
|
|
|
322
325
|
signal, skill = self._build_review_context(key)
|
|
323
326
|
try:
|
|
324
327
|
return await self._reviewer_client.classify_async(
|
|
325
|
-
signal, skill,
|
|
328
|
+
signal, skill, self.labels, include_reason=True,
|
|
326
329
|
)
|
|
327
330
|
except Exception as e:
|
|
328
331
|
logger.warning("LLM review failed: %s", e)
|
|
@@ -330,7 +333,7 @@ class ErrorSense:
|
|
|
330
333
|
|
|
331
334
|
def _get_reviewer_skill(self) -> Skill:
|
|
332
335
|
if self._reviewer_skill is None:
|
|
333
|
-
self._reviewer_skill = Skill("
|
|
336
|
+
self._reviewer_skill = Skill("reviewer")
|
|
334
337
|
return self._reviewer_skill
|
|
335
338
|
|
|
336
339
|
def _build_review_context(self, key: str) -> tuple[Signal, Skill]:
|
|
@@ -400,7 +403,7 @@ class ErrorSense:
|
|
|
400
403
|
seen.add(phase.name)
|
|
401
404
|
|
|
402
405
|
def _validate_labels(self) -> None:
|
|
403
|
-
all_labels = self.labels | {self.default}
|
|
406
|
+
all_labels = set(self.labels) | {self.default}
|
|
404
407
|
for phase in self._pipeline:
|
|
405
408
|
for ruleset in phase.rulesets:
|
|
406
409
|
bad = ruleset.referenced_labels() - all_labels
|
|
@@ -10,6 +10,11 @@ import threading
|
|
|
10
10
|
from dataclasses import dataclass
|
|
11
11
|
from typing import Any
|
|
12
12
|
|
|
13
|
+
try:
|
|
14
|
+
import httpx
|
|
15
|
+
except ImportError:
|
|
16
|
+
httpx = None # type: ignore[assignment]
|
|
17
|
+
|
|
13
18
|
from errorsense.models import SenseResult
|
|
14
19
|
from errorsense.signal import Signal
|
|
15
20
|
from errorsense.skill import Skill
|
|
@@ -20,7 +25,7 @@ __all__ = ["LLMConfig", "LLMClient"]
|
|
|
20
25
|
|
|
21
26
|
DEFAULT_BASE_URL = "https://relay.opengpu.network/v2/openai/v1"
|
|
22
27
|
DEFAULT_MODEL = "Qwen/Qwen3.5-397B-A17B-FP8"
|
|
23
|
-
|
|
28
|
+
DEFAULT_PROMPT_FORMAT = (
|
|
24
29
|
"{instructions}\n\n"
|
|
25
30
|
"Classify the following error signal into exactly one of these labels: {labels}\n\n"
|
|
26
31
|
"Signal data:\n{signal}\n\n"
|
|
@@ -57,9 +62,9 @@ class LLMConfig:
|
|
|
57
62
|
def _build_prompt(signal: Signal, skill: Skill, labels: list[str], config: LLMConfig) -> str:
|
|
58
63
|
signal_text = json.dumps(signal.to_dict(), default=str)
|
|
59
64
|
if len(signal_text) > config.max_signal_size:
|
|
60
|
-
signal_text = signal_text[: config.max_signal_size] + "..."
|
|
65
|
+
signal_text = signal_text[: config.max_signal_size] + "...(truncated)"
|
|
61
66
|
|
|
62
|
-
template = skill.
|
|
67
|
+
template = skill.prompt_format or DEFAULT_PROMPT_FORMAT
|
|
63
68
|
return template.format(
|
|
64
69
|
instructions=skill.instructions,
|
|
65
70
|
labels=", ".join(labels) if labels else "unknown",
|
|
@@ -76,6 +81,7 @@ def _build_request_body(skill: Skill, prompt: str, config: LLMConfig) -> dict:
|
|
|
76
81
|
|
|
77
82
|
|
|
78
83
|
def _build_headers(config: LLMConfig) -> dict:
|
|
84
|
+
# Empty api_key sends "Bearer " — relay accepts this for guest tier.
|
|
79
85
|
return {
|
|
80
86
|
"Authorization": f"Bearer {config.api_key}",
|
|
81
87
|
"Content-Type": "application/json",
|
|
@@ -121,12 +127,10 @@ class LLMClient:
|
|
|
121
127
|
"""HTTP client for LLM classification calls. Supports both sync and async."""
|
|
122
128
|
|
|
123
129
|
def __init__(self, config: LLMConfig) -> None:
|
|
124
|
-
|
|
125
|
-
import httpx # noqa: F401
|
|
126
|
-
except ImportError:
|
|
130
|
+
if httpx is None:
|
|
127
131
|
raise ImportError(
|
|
128
132
|
"LLM skills require httpx. Install with: pip install errorsense[llm]"
|
|
129
|
-
)
|
|
133
|
+
)
|
|
130
134
|
|
|
131
135
|
self._config = config
|
|
132
136
|
self._sync_client: Any = None
|
|
@@ -135,16 +139,12 @@ class LLMClient:
|
|
|
135
139
|
self._async_lock = asyncio.Lock()
|
|
136
140
|
|
|
137
141
|
def _get_sync_client(self) -> Any:
|
|
138
|
-
import httpx
|
|
139
|
-
|
|
140
142
|
with self._sync_lock:
|
|
141
143
|
if self._sync_client is None:
|
|
142
144
|
self._sync_client = httpx.Client(timeout=self._config.timeout)
|
|
143
145
|
return self._sync_client
|
|
144
146
|
|
|
145
147
|
async def _get_async_client(self) -> Any:
|
|
146
|
-
import httpx
|
|
147
|
-
|
|
148
148
|
async with self._async_lock:
|
|
149
149
|
if self._async_client is None:
|
|
150
150
|
self._async_client = httpx.AsyncClient(timeout=self._config.timeout)
|
|
@@ -169,7 +169,7 @@ class LLMClient:
|
|
|
169
169
|
)
|
|
170
170
|
resp.raise_for_status()
|
|
171
171
|
data = resp.json()
|
|
172
|
-
except (
|
|
172
|
+
except (httpx.HTTPError, ValueError) as e:
|
|
173
173
|
logger.warning("LLM call failed for skill %r: %s", skill.name, e)
|
|
174
174
|
return None
|
|
175
175
|
|
|
@@ -194,7 +194,7 @@ class LLMClient:
|
|
|
194
194
|
)
|
|
195
195
|
resp.raise_for_status()
|
|
196
196
|
data = resp.json()
|
|
197
|
-
except (
|
|
197
|
+
except (httpx.HTTPError, ValueError) as e:
|
|
198
198
|
logger.warning("LLM call failed for skill %r: %s", skill.name, e)
|
|
199
199
|
return None
|
|
200
200
|
|
|
@@ -45,11 +45,15 @@ class TrailingConfig:
|
|
|
45
45
|
count_labels: Only these labels count toward threshold.
|
|
46
46
|
history_size: Max errors kept per key (ring buffer).
|
|
47
47
|
reviewer_llm: LLM config for review. Set to enable review, None to disable.
|
|
48
|
-
reviewer_skill: Custom review skill. Defaults to built-in
|
|
48
|
+
reviewer_skill: Custom review skill. Defaults to built-in reviewer.
|
|
49
49
|
"""
|
|
50
50
|
|
|
51
51
|
threshold: int = 3
|
|
52
|
-
count_labels: list[str] | None = None
|
|
52
|
+
count_labels: list[str] | tuple[str, ...] | None = None
|
|
53
53
|
history_size: int = 10
|
|
54
54
|
reviewer_llm: LLMConfig | None = None
|
|
55
55
|
reviewer_skill: Skill | None = None
|
|
56
|
+
|
|
57
|
+
def __post_init__(self) -> None:
|
|
58
|
+
if self.count_labels is not None:
|
|
59
|
+
object.__setattr__(self, "count_labels", tuple(self.count_labels))
|
|
@@ -97,6 +97,11 @@ class Phase:
|
|
|
97
97
|
return None
|
|
98
98
|
|
|
99
99
|
def _run_skills_sync(self, signal: Signal, explain: bool) -> SenseResult | None:
|
|
100
|
+
"""Run skills sequentially. Highest confidence wins.
|
|
101
|
+
|
|
102
|
+
Note: async variant runs skills concurrently via asyncio.gather,
|
|
103
|
+
so ordering may differ when skills tie on confidence.
|
|
104
|
+
"""
|
|
100
105
|
if not self._llm_client:
|
|
101
106
|
return None
|
|
102
107
|
|
|
@@ -18,9 +18,9 @@ def _ruleset_phases(extra_rulesets: list[Ruleset] | None = None) -> list[Phase]:
|
|
|
18
18
|
Ruleset(field="status_code", match={
|
|
19
19
|
"4xx": "client", 502: "server", 503: "server", 504: "server",
|
|
20
20
|
}),
|
|
21
|
-
Ruleset(field="headers.content-type",
|
|
22
|
-
"
|
|
23
|
-
|
|
21
|
+
Ruleset(field="headers.content-type", patterns=[
|
|
22
|
+
("server", [r"^text/html"]),
|
|
23
|
+
]),
|
|
24
24
|
]),
|
|
25
25
|
Phase("patterns", rulesets=[
|
|
26
26
|
Ruleset(field="body", patterns=[
|
|
@@ -86,10 +86,14 @@ class Ruleset:
|
|
|
86
86
|
|
|
87
87
|
def referenced_labels(self) -> set[str]:
|
|
88
88
|
"""Return set of label strings this ruleset can produce. Used by engine validation."""
|
|
89
|
+
labels: set[str] = set()
|
|
89
90
|
match = getattr(self, "_match", None)
|
|
90
|
-
if match is None:
|
|
91
|
-
|
|
92
|
-
|
|
91
|
+
if match is not None:
|
|
92
|
+
labels |= {v for v in match.values() if isinstance(v, str)}
|
|
93
|
+
compiled = getattr(self, "_compiled", None)
|
|
94
|
+
if compiled is not None:
|
|
95
|
+
labels |= {label for label, _ in compiled}
|
|
96
|
+
return labels
|
|
93
97
|
|
|
94
98
|
def classify(self, signal: Signal) -> SenseResult | None:
|
|
95
99
|
"""Classify a signal. Override in subclass for custom logic."""
|
|
@@ -132,8 +136,6 @@ class Ruleset:
|
|
|
132
136
|
return signal.get(field)
|
|
133
137
|
|
|
134
138
|
def _match_value(self, value: Any) -> SenseResult | None:
|
|
135
|
-
field = self.field
|
|
136
|
-
|
|
137
139
|
if value in self._exact_keys:
|
|
138
140
|
label = self._exact_keys[value]
|
|
139
141
|
if label is None:
|
|
@@ -146,13 +148,6 @@ class Ruleset:
|
|
|
146
148
|
label = self._range_keys[range_key]
|
|
147
149
|
return SenseResult(label=label, confidence=1.0)
|
|
148
150
|
|
|
149
|
-
if isinstance(value, str) and self._exact_keys:
|
|
150
|
-
for pattern, label in self._exact_keys.items():
|
|
151
|
-
if isinstance(pattern, str) and pattern in value:
|
|
152
|
-
if label is None:
|
|
153
|
-
return None
|
|
154
|
-
return SenseResult(label=label, confidence=1.0)
|
|
155
|
-
|
|
156
151
|
return None
|
|
157
152
|
|
|
158
153
|
def _match_patterns(self, value: Any) -> SenseResult | None:
|
|
@@ -6,7 +6,11 @@ from typing import Any
|
|
|
6
6
|
|
|
7
7
|
|
|
8
8
|
def _deep_freeze(obj: Any) -> Any:
|
|
9
|
-
"""Recursively freeze dicts into MappingProxyType and lists into tuples.
|
|
9
|
+
"""Recursively freeze dicts into MappingProxyType and lists into tuples.
|
|
10
|
+
|
|
11
|
+
Note: lists become tuples. Code receiving signal data should check
|
|
12
|
+
for Sequence, not list specifically.
|
|
13
|
+
"""
|
|
10
14
|
if isinstance(obj, dict):
|
|
11
15
|
return MappingProxyType({k: _deep_freeze(v) for k, v in obj.items()})
|
|
12
16
|
if isinstance(obj, list):
|
|
@@ -15,7 +15,7 @@ class Skill:
|
|
|
15
15
|
Instructions are loaded from a markdown file by default. Built-in skills
|
|
16
16
|
live in errorsense/skills/. Custom skills can point to any file path.
|
|
17
17
|
|
|
18
|
-
For programmatic use (e.g.
|
|
18
|
+
For programmatic use (e.g. trailing review), inline instructions=
|
|
19
19
|
is also supported.
|
|
20
20
|
|
|
21
21
|
Args:
|
|
@@ -23,7 +23,7 @@ class Skill:
|
|
|
23
23
|
in the built-in skills directory.
|
|
24
24
|
path: Explicit path to a .md file. Overrides built-in lookup.
|
|
25
25
|
instructions: Inline instructions string. Overrides file loading.
|
|
26
|
-
|
|
26
|
+
prompt_format: Override the default LLM prompt format.
|
|
27
27
|
temperature: LLM temperature (default: 0.0 for determinism).
|
|
28
28
|
"""
|
|
29
29
|
|
|
@@ -32,14 +32,14 @@ class Skill:
|
|
|
32
32
|
name: str,
|
|
33
33
|
path: str | Path | None = None,
|
|
34
34
|
instructions: str | None = None,
|
|
35
|
-
|
|
35
|
+
prompt_format: str | None = None,
|
|
36
36
|
temperature: float = 0.0,
|
|
37
37
|
) -> None:
|
|
38
38
|
if not name:
|
|
39
39
|
raise ValueError("Skill requires a non-empty 'name'")
|
|
40
40
|
|
|
41
41
|
self.name = name
|
|
42
|
-
self.
|
|
42
|
+
self.prompt_format = prompt_format
|
|
43
43
|
self.temperature = temperature
|
|
44
44
|
|
|
45
45
|
if instructions:
|
|
@@ -0,0 +1,126 @@
|
|
|
1
|
+
"""Tests for LLM prompt building and response parsing."""
|
|
2
|
+
|
|
3
|
+
import json
|
|
4
|
+
|
|
5
|
+
from errorsense.llm import (
|
|
6
|
+
DEFAULT_PROMPT_FORMAT,
|
|
7
|
+
LLMConfig,
|
|
8
|
+
_build_prompt,
|
|
9
|
+
_parse_response,
|
|
10
|
+
)
|
|
11
|
+
from errorsense.signal import Signal
|
|
12
|
+
from errorsense.skill import Skill
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
class TestParseResponse:
|
|
16
|
+
def _make_response(self, content: str) -> dict:
|
|
17
|
+
return {"choices": [{"message": {"content": content}}]}
|
|
18
|
+
|
|
19
|
+
def test_valid_json(self):
|
|
20
|
+
data = self._make_response('{"label": "server", "confidence": 0.9, "reason": "OOM"}')
|
|
21
|
+
result = _parse_response(data, ["client", "server"], "test_skill", include_reason=True)
|
|
22
|
+
assert result is not None
|
|
23
|
+
assert result.label == "server"
|
|
24
|
+
assert result.confidence == 0.9
|
|
25
|
+
assert result.reason == "OOM"
|
|
26
|
+
|
|
27
|
+
def test_reason_excluded_when_not_requested(self):
|
|
28
|
+
data = self._make_response('{"label": "server", "confidence": 0.9, "reason": "OOM"}')
|
|
29
|
+
result = _parse_response(data, ["client", "server"], "test_skill", include_reason=False)
|
|
30
|
+
assert result is not None
|
|
31
|
+
assert result.reason is None
|
|
32
|
+
|
|
33
|
+
def test_unknown_label_returns_none(self):
|
|
34
|
+
data = self._make_response('{"label": "bogus", "confidence": 0.9}')
|
|
35
|
+
result = _parse_response(data, ["client", "server"], "test_skill")
|
|
36
|
+
assert result is None
|
|
37
|
+
|
|
38
|
+
def test_empty_label_returns_none(self):
|
|
39
|
+
data = self._make_response('{"confidence": 0.9}')
|
|
40
|
+
result = _parse_response(data, ["client", "server"], "test_skill")
|
|
41
|
+
assert result is None
|
|
42
|
+
|
|
43
|
+
def test_malformed_json_returns_none(self):
|
|
44
|
+
data = self._make_response("not json at all")
|
|
45
|
+
result = _parse_response(data, ["client", "server"], "test_skill")
|
|
46
|
+
assert result is None
|
|
47
|
+
|
|
48
|
+
def test_code_fence_stripped(self):
|
|
49
|
+
content = '```json\n{"label": "client", "confidence": 0.8}\n```'
|
|
50
|
+
data = self._make_response(content)
|
|
51
|
+
result = _parse_response(data, ["client", "server"], "test_skill")
|
|
52
|
+
assert result is not None
|
|
53
|
+
assert result.label == "client"
|
|
54
|
+
|
|
55
|
+
def test_confidence_clamped_high(self):
|
|
56
|
+
data = self._make_response('{"label": "server", "confidence": 5.0}')
|
|
57
|
+
result = _parse_response(data, ["server"], "test_skill")
|
|
58
|
+
assert result.confidence == 1.0
|
|
59
|
+
|
|
60
|
+
def test_confidence_clamped_low(self):
|
|
61
|
+
data = self._make_response('{"label": "server", "confidence": -1.0}')
|
|
62
|
+
result = _parse_response(data, ["server"], "test_skill")
|
|
63
|
+
assert result.confidence == 0.0
|
|
64
|
+
|
|
65
|
+
def test_default_confidence(self):
|
|
66
|
+
data = self._make_response('{"label": "server"}')
|
|
67
|
+
result = _parse_response(data, ["server"], "test_skill")
|
|
68
|
+
assert result.confidence == 0.7
|
|
69
|
+
|
|
70
|
+
def test_missing_choices_returns_none(self):
|
|
71
|
+
result = _parse_response({}, ["server"], "test_skill")
|
|
72
|
+
assert result is None
|
|
73
|
+
|
|
74
|
+
def test_empty_labels_accepts_anything(self):
|
|
75
|
+
data = self._make_response('{"label": "whatever", "confidence": 0.5}')
|
|
76
|
+
result = _parse_response(data, [], "test_skill")
|
|
77
|
+
assert result is not None
|
|
78
|
+
assert result.label == "whatever"
|
|
79
|
+
|
|
80
|
+
def test_skill_name_set(self):
|
|
81
|
+
data = self._make_response('{"label": "server", "confidence": 0.9}')
|
|
82
|
+
result = _parse_response(data, ["server"], "my_skill")
|
|
83
|
+
assert result.skill_name == "my_skill"
|
|
84
|
+
|
|
85
|
+
|
|
86
|
+
class TestBuildPrompt:
|
|
87
|
+
def _make_skill(self, instructions: str = "Classify this error.", **kwargs) -> Skill:
|
|
88
|
+
return Skill("test", instructions=instructions, **kwargs)
|
|
89
|
+
|
|
90
|
+
def test_basic_format(self):
|
|
91
|
+
signal = Signal({"status_code": 500})
|
|
92
|
+
skill = self._make_skill()
|
|
93
|
+
config = LLMConfig()
|
|
94
|
+
prompt = _build_prompt(signal, skill, ["client", "server"], config)
|
|
95
|
+
assert "Classify this error." in prompt
|
|
96
|
+
assert "client, server" in prompt
|
|
97
|
+
assert "500" in prompt
|
|
98
|
+
|
|
99
|
+
def test_truncation(self):
|
|
100
|
+
signal = Signal({"body": "x" * 1000})
|
|
101
|
+
skill = self._make_skill()
|
|
102
|
+
config = LLMConfig(max_signal_size=50)
|
|
103
|
+
prompt = _build_prompt(signal, skill, ["a"], config)
|
|
104
|
+
assert "...(truncated)" in prompt
|
|
105
|
+
|
|
106
|
+
def test_no_truncation_when_small(self):
|
|
107
|
+
signal = Signal({"x": 1})
|
|
108
|
+
skill = self._make_skill()
|
|
109
|
+
config = LLMConfig(max_signal_size=500)
|
|
110
|
+
prompt = _build_prompt(signal, skill, ["a"], config)
|
|
111
|
+
assert "(truncated)" not in prompt
|
|
112
|
+
|
|
113
|
+
def test_custom_template(self):
|
|
114
|
+
skill = self._make_skill(prompt_format="Labels: {labels}\nSignal: {signal}\n{instructions}")
|
|
115
|
+
signal = Signal({"code": 1})
|
|
116
|
+
config = LLMConfig()
|
|
117
|
+
prompt = _build_prompt(signal, skill, ["a", "b"], config)
|
|
118
|
+
assert prompt.startswith("Labels: a, b")
|
|
119
|
+
assert "Classify this error." in prompt
|
|
120
|
+
|
|
121
|
+
def test_empty_labels(self):
|
|
122
|
+
signal = Signal({"x": 1})
|
|
123
|
+
skill = self._make_skill()
|
|
124
|
+
config = LLMConfig()
|
|
125
|
+
prompt = _build_prompt(signal, skill, [], config)
|
|
126
|
+
assert "unknown" in prompt
|
|
@@ -57,14 +57,28 @@ class TestRangeMatch:
|
|
|
57
57
|
|
|
58
58
|
|
|
59
59
|
class TestHeaderMatch:
|
|
60
|
-
def
|
|
60
|
+
def test_content_type_exact_match(self):
|
|
61
61
|
rs = Ruleset(field="headers.content-type", match={"text/html": "infra"})
|
|
62
|
+
signal = Signal.from_http(status_code=500, body="<html>", headers={"content-type": "text/html"})
|
|
63
|
+
result = rs.classify(signal)
|
|
64
|
+
assert result.label == "infra"
|
|
65
|
+
|
|
66
|
+
def test_content_type_with_charset_no_match(self):
|
|
67
|
+
"""Exact match: 'text/html' != 'text/html; charset=utf-8'. Use patterns for prefix matching."""
|
|
68
|
+
rs = Ruleset(field="headers.content-type", match={"text/html": "infra"})
|
|
69
|
+
signal = Signal.from_http(status_code=500, body="<html>", headers={"content-type": "text/html; charset=utf-8"})
|
|
70
|
+
result = rs.classify(signal)
|
|
71
|
+
assert result is None
|
|
72
|
+
|
|
73
|
+
def test_content_type_pattern_match(self):
|
|
74
|
+
"""Use patterns= for prefix matching on content-type with charset."""
|
|
75
|
+
rs = Ruleset(field="headers.content-type", patterns=[("infra", [r"^text/html"])])
|
|
62
76
|
signal = Signal.from_http(status_code=500, body="<html>", headers={"content-type": "text/html; charset=utf-8"})
|
|
63
77
|
result = rs.classify(signal)
|
|
64
78
|
assert result.label == "infra"
|
|
65
79
|
|
|
66
80
|
def test_content_type_none_pass(self):
|
|
67
|
-
rs = Ruleset(field="headers.content-type", match={"application/json": None
|
|
81
|
+
rs = Ruleset(field="headers.content-type", match={"application/json": None})
|
|
68
82
|
signal = Signal.from_http(status_code=500, body="{}", headers={"content-type": "application/json"})
|
|
69
83
|
result = rs.classify(signal)
|
|
70
84
|
assert result is None
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|