errorsense 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- errorsense/__init__.py +27 -0
- errorsense/engine.py +452 -0
- errorsense/llm.py +201 -0
- errorsense/models.py +52 -0
- errorsense/phase.py +192 -0
- errorsense/presets/__init__.py +5 -0
- errorsense/presets/http_gateway.py +72 -0
- errorsense/ruleset.py +165 -0
- errorsense/signal.py +100 -0
- errorsense/skill.py +70 -0
- errorsense/skills/http_classifier.md +29 -0
- errorsense/skills/reclassification.md +9 -0
- errorsense-0.1.0.dist-info/METADATA +213 -0
- errorsense-0.1.0.dist-info/RECORD +16 -0
- errorsense-0.1.0.dist-info/WHEEL +4 -0
- errorsense-0.1.0.dist-info/licenses/LICENSE +21 -0
errorsense/__init__.py
ADDED
|
@@ -0,0 +1,27 @@
|
|
|
1
|
+
"""ErrorSense — error classification engine."""
|
|
2
|
+
|
|
3
|
+
from errorsense.engine import ErrorSense
|
|
4
|
+
from errorsense.llm import LLMConfig
|
|
5
|
+
from errorsense.models import (
|
|
6
|
+
SenseResult,
|
|
7
|
+
TrailResult,
|
|
8
|
+
TrailingConfig,
|
|
9
|
+
)
|
|
10
|
+
from errorsense.phase import Phase
|
|
11
|
+
from errorsense.ruleset import Ruleset
|
|
12
|
+
from errorsense.signal import Signal
|
|
13
|
+
from errorsense.skill import Skill
|
|
14
|
+
|
|
15
|
+
__all__ = [
|
|
16
|
+
"ErrorSense",
|
|
17
|
+
"Phase",
|
|
18
|
+
"Ruleset",
|
|
19
|
+
"Skill",
|
|
20
|
+
"LLMConfig",
|
|
21
|
+
"Signal",
|
|
22
|
+
"SenseResult",
|
|
23
|
+
"TrailResult",
|
|
24
|
+
"TrailingConfig",
|
|
25
|
+
]
|
|
26
|
+
|
|
27
|
+
__version__ = "0.1.0"
|
errorsense/engine.py
ADDED
|
@@ -0,0 +1,452 @@
|
|
|
1
|
+
"""ErrorSense — phase pipeline classification engine."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
import asyncio
|
|
6
|
+
import json
|
|
7
|
+
import logging
|
|
8
|
+
import threading
|
|
9
|
+
import time
|
|
10
|
+
from collections import defaultdict, deque
|
|
11
|
+
from typing import Any, Callable
|
|
12
|
+
|
|
13
|
+
from errorsense.llm import LLMConfig
|
|
14
|
+
from errorsense.models import SenseResult, TrailResult, TrailingConfig
|
|
15
|
+
from errorsense.phase import Phase
|
|
16
|
+
from errorsense.ruleset import Ruleset
|
|
17
|
+
from errorsense.signal import Signal
|
|
18
|
+
from errorsense.skill import Skill
|
|
19
|
+
|
|
20
|
+
logger = logging.getLogger("errorsense")
|
|
21
|
+
|
|
22
|
+
__all__ = ["ErrorSense"]
|
|
23
|
+
|
|
24
|
+
|
|
25
|
+
class ErrorSense:
|
|
26
|
+
"""Classification engine — runs signals through a phase pipeline.
|
|
27
|
+
|
|
28
|
+
Supports stateless classification (classify) and stateful trailing
|
|
29
|
+
(trail) with per-key error history and threshold-based decisions.
|
|
30
|
+
"""
|
|
31
|
+
|
|
32
|
+
def __init__(
|
|
33
|
+
self,
|
|
34
|
+
categories: list[str],
|
|
35
|
+
# Explicit mode
|
|
36
|
+
pipeline: list[Phase] | None = None,
|
|
37
|
+
# Implicit mode
|
|
38
|
+
rulesets: list[Ruleset] | None = None,
|
|
39
|
+
skills: list[Skill] | None = None,
|
|
40
|
+
llm: LLMConfig | None = None,
|
|
41
|
+
# Common
|
|
42
|
+
default: str = "unknown",
|
|
43
|
+
trailing: TrailingConfig | None = None,
|
|
44
|
+
on_classify: Callable[[Signal, SenseResult], Any] | None = None,
|
|
45
|
+
on_error: Callable[[str, Exception], Any] | None = None,
|
|
46
|
+
) -> None:
|
|
47
|
+
self.categories = set(categories)
|
|
48
|
+
self.default = default
|
|
49
|
+
self._on_classify = on_classify
|
|
50
|
+
self._on_error = on_error
|
|
51
|
+
|
|
52
|
+
if pipeline is not None:
|
|
53
|
+
if rulesets is not None or skills is not None or llm is not None:
|
|
54
|
+
raise ValueError(
|
|
55
|
+
"Cannot mix explicit (pipeline=) and implicit (rulesets=/skills=/llm=) modes"
|
|
56
|
+
)
|
|
57
|
+
self._pipeline = list(pipeline)
|
|
58
|
+
else:
|
|
59
|
+
self._pipeline = self._build_implicit_pipeline(rulesets, skills, llm)
|
|
60
|
+
|
|
61
|
+
self._validate_phase_names()
|
|
62
|
+
self._pipeline_names = frozenset(p.name for p in self._pipeline)
|
|
63
|
+
self._validate_categories()
|
|
64
|
+
self._validate_llm_api_keys()
|
|
65
|
+
for phase in self._pipeline:
|
|
66
|
+
phase.set_categories(list(categories))
|
|
67
|
+
|
|
68
|
+
# Trailing state
|
|
69
|
+
self._trailing = trailing
|
|
70
|
+
self._has_llm = any(p.is_llm_phase for p in self._pipeline)
|
|
71
|
+
self._reclass_skill: Skill | None = None
|
|
72
|
+
if trailing:
|
|
73
|
+
self._init_trailing(trailing)
|
|
74
|
+
|
|
75
|
+
def _init_trailing(self, config: TrailingConfig) -> None:
|
|
76
|
+
if config.review is True and not self._has_llm:
|
|
77
|
+
raise ValueError(
|
|
78
|
+
"TrailingConfig(review=True) requires an LLM phase in the pipeline."
|
|
79
|
+
)
|
|
80
|
+
self._review_enabled = (
|
|
81
|
+
config.review if config.review is not None else self._has_llm
|
|
82
|
+
)
|
|
83
|
+
self._threshold = config.threshold
|
|
84
|
+
self._count_labels = set(config.count_labels or [])
|
|
85
|
+
hs = config.history_size
|
|
86
|
+
self._history: dict[str, deque[dict[str, Any]]] = defaultdict(
|
|
87
|
+
lambda: deque(maxlen=hs)
|
|
88
|
+
)
|
|
89
|
+
self._counts: dict[str, dict[str, int]] = defaultdict(lambda: defaultdict(int))
|
|
90
|
+
self._trail_lock = threading.Lock()
|
|
91
|
+
self._trail_locks: dict[str, threading.Lock] = {}
|
|
92
|
+
self._async_trail_lock = asyncio.Lock()
|
|
93
|
+
self._async_trail_locks: dict[str, asyncio.Lock] = {}
|
|
94
|
+
|
|
95
|
+
@property
|
|
96
|
+
def pipeline(self) -> list[Phase]:
|
|
97
|
+
return list(self._pipeline)
|
|
98
|
+
|
|
99
|
+
def get_phase(self, name: str) -> Phase | None:
|
|
100
|
+
for phase in self._pipeline:
|
|
101
|
+
if phase.name == name:
|
|
102
|
+
return phase
|
|
103
|
+
return None
|
|
104
|
+
|
|
105
|
+
def close(self) -> None:
|
|
106
|
+
"""Close all LLM phase clients (sync)."""
|
|
107
|
+
for phase in self._pipeline:
|
|
108
|
+
phase.close_sync()
|
|
109
|
+
|
|
110
|
+
async def async_close(self) -> None:
|
|
111
|
+
"""Close all LLM phase clients (async)."""
|
|
112
|
+
for phase in self._pipeline:
|
|
113
|
+
await phase.close_async()
|
|
114
|
+
|
|
115
|
+
async def __aenter__(self) -> ErrorSense:
|
|
116
|
+
return self
|
|
117
|
+
|
|
118
|
+
async def __aexit__(self, *exc: Any) -> None:
|
|
119
|
+
await self.async_close()
|
|
120
|
+
|
|
121
|
+
# -- Stateless classification --
|
|
122
|
+
|
|
123
|
+
def classify(
|
|
124
|
+
self,
|
|
125
|
+
signal: Signal,
|
|
126
|
+
skip: set[str] | list[str] | None = None,
|
|
127
|
+
short_circuit: bool = True,
|
|
128
|
+
explain: bool = False,
|
|
129
|
+
) -> list[SenseResult]:
|
|
130
|
+
"""Classify a signal through the phase pipeline (sync).
|
|
131
|
+
|
|
132
|
+
Returns list of SenseResult from pipeline phases that matched.
|
|
133
|
+
If nothing matched, returns [default_result].
|
|
134
|
+
"""
|
|
135
|
+
skip_set = self._validate_skip(skip)
|
|
136
|
+
results: list[SenseResult] = []
|
|
137
|
+
|
|
138
|
+
for phase in self._pipeline:
|
|
139
|
+
if phase.name in skip_set:
|
|
140
|
+
continue
|
|
141
|
+
try:
|
|
142
|
+
result = phase.classify(signal, explain=explain)
|
|
143
|
+
except Exception as e:
|
|
144
|
+
logger.warning("Phase %r raised: %s", phase.name, e)
|
|
145
|
+
self._notify_error(phase.name, e)
|
|
146
|
+
continue
|
|
147
|
+
|
|
148
|
+
if result is not None:
|
|
149
|
+
results.append(result)
|
|
150
|
+
self._notify_classify(signal, result)
|
|
151
|
+
if short_circuit:
|
|
152
|
+
break
|
|
153
|
+
|
|
154
|
+
if not results:
|
|
155
|
+
results.append(self._make_default_result(signal))
|
|
156
|
+
|
|
157
|
+
return results
|
|
158
|
+
|
|
159
|
+
async def async_classify(
|
|
160
|
+
self,
|
|
161
|
+
signal: Signal,
|
|
162
|
+
skip: set[str] | list[str] | None = None,
|
|
163
|
+
short_circuit: bool = True,
|
|
164
|
+
explain: bool = False,
|
|
165
|
+
) -> list[SenseResult]:
|
|
166
|
+
"""Classify a signal through the phase pipeline (async)."""
|
|
167
|
+
skip_set = self._validate_skip(skip)
|
|
168
|
+
results: list[SenseResult] = []
|
|
169
|
+
|
|
170
|
+
for phase in self._pipeline:
|
|
171
|
+
if phase.name in skip_set:
|
|
172
|
+
continue
|
|
173
|
+
try:
|
|
174
|
+
result = await phase.async_classify(signal, explain=explain)
|
|
175
|
+
except Exception as e:
|
|
176
|
+
logger.warning("Phase %r raised: %s", phase.name, e)
|
|
177
|
+
self._notify_error(phase.name, e)
|
|
178
|
+
continue
|
|
179
|
+
|
|
180
|
+
if result is not None:
|
|
181
|
+
results.append(result)
|
|
182
|
+
self._notify_classify(signal, result)
|
|
183
|
+
if short_circuit:
|
|
184
|
+
break
|
|
185
|
+
|
|
186
|
+
if not results:
|
|
187
|
+
results.append(self._make_default_result(signal))
|
|
188
|
+
|
|
189
|
+
return results
|
|
190
|
+
|
|
191
|
+
# -- Stateful trailing --
|
|
192
|
+
|
|
193
|
+
def trail(self, key: str, signal: Signal) -> TrailResult:
|
|
194
|
+
"""Classify + track per key (sync)."""
|
|
195
|
+
if not self._trailing:
|
|
196
|
+
raise RuntimeError(
|
|
197
|
+
"Trailing not configured. Pass trailing=TrailingConfig(...) to ErrorSense."
|
|
198
|
+
)
|
|
199
|
+
|
|
200
|
+
with self._trail_lock:
|
|
201
|
+
lock = self._trail_locks.setdefault(key, threading.Lock())
|
|
202
|
+
with lock:
|
|
203
|
+
result = self.classify(signal)[0]
|
|
204
|
+
at_threshold = self._record_and_check(key, signal, result)
|
|
205
|
+
review_result = (
|
|
206
|
+
self._run_review_sync(key)
|
|
207
|
+
if at_threshold and self._review_enabled else None
|
|
208
|
+
)
|
|
209
|
+
return self._build_trail_result(key, result, at_threshold, review_result)
|
|
210
|
+
|
|
211
|
+
async def async_trail(self, key: str, signal: Signal) -> TrailResult:
|
|
212
|
+
"""Classify + track per key (async)."""
|
|
213
|
+
if not self._trailing:
|
|
214
|
+
raise RuntimeError(
|
|
215
|
+
"Trailing not configured. Pass trailing=TrailingConfig(...) to ErrorSense."
|
|
216
|
+
)
|
|
217
|
+
|
|
218
|
+
async with self._async_trail_lock:
|
|
219
|
+
if key not in self._async_trail_locks:
|
|
220
|
+
self._async_trail_locks[key] = asyncio.Lock()
|
|
221
|
+
lock = self._async_trail_locks[key]
|
|
222
|
+
async with lock:
|
|
223
|
+
result = (await self.async_classify(signal))[0]
|
|
224
|
+
at_threshold = self._record_and_check(key, signal, result)
|
|
225
|
+
review_result = (
|
|
226
|
+
await self._run_review_async(key)
|
|
227
|
+
if at_threshold and self._review_enabled else None
|
|
228
|
+
)
|
|
229
|
+
return self._build_trail_result(key, result, at_threshold, review_result)
|
|
230
|
+
|
|
231
|
+
def review(self, key: str) -> SenseResult | None:
|
|
232
|
+
"""Manually review full history for a key (sync). Returns LLM verdict."""
|
|
233
|
+
return self._run_review_sync(key)
|
|
234
|
+
|
|
235
|
+
async def async_review(self, key: str) -> SenseResult | None:
|
|
236
|
+
"""Manually review full history for a key (async). Returns LLM verdict."""
|
|
237
|
+
return self._run_review_async(key)
|
|
238
|
+
|
|
239
|
+
def _record_and_check(self, key: str, signal: Signal, result: SenseResult) -> bool:
|
|
240
|
+
entry = {
|
|
241
|
+
"label": result.label,
|
|
242
|
+
"confidence": result.confidence,
|
|
243
|
+
"phase": result.phase,
|
|
244
|
+
"skill": result.skill_name,
|
|
245
|
+
"timestamp": time.time(),
|
|
246
|
+
"signal_data": signal.to_dict(),
|
|
247
|
+
}
|
|
248
|
+
history = self._history[key]
|
|
249
|
+
|
|
250
|
+
if len(history) == history.maxlen:
|
|
251
|
+
evicted = history[0]["label"]
|
|
252
|
+
if evicted in self._count_labels and self._counts[key].get(evicted, 0) > 0:
|
|
253
|
+
self._counts[key][evicted] -= 1
|
|
254
|
+
|
|
255
|
+
history.append(entry)
|
|
256
|
+
|
|
257
|
+
if result.label in self._count_labels:
|
|
258
|
+
self._counts[key][result.label] += 1
|
|
259
|
+
|
|
260
|
+
return self._is_at_threshold(key)
|
|
261
|
+
|
|
262
|
+
def _build_trail_result(
|
|
263
|
+
self, key: str, result: SenseResult, at_threshold: bool,
|
|
264
|
+
review_result: SenseResult | None,
|
|
265
|
+
) -> TrailResult:
|
|
266
|
+
label = result.label
|
|
267
|
+
reason = None
|
|
268
|
+
if review_result:
|
|
269
|
+
reason = review_result.reason
|
|
270
|
+
if review_result.label != result.label:
|
|
271
|
+
self._update_latest_label(key, result.label, review_result.label)
|
|
272
|
+
label = review_result.label
|
|
273
|
+
at_threshold = self._is_at_threshold(key)
|
|
274
|
+
|
|
275
|
+
return TrailResult(
|
|
276
|
+
label=label,
|
|
277
|
+
confidence=result.confidence,
|
|
278
|
+
phase=result.phase,
|
|
279
|
+
skill_name=result.skill_name,
|
|
280
|
+
at_threshold=at_threshold,
|
|
281
|
+
reason=reason,
|
|
282
|
+
)
|
|
283
|
+
|
|
284
|
+
def _is_at_threshold(self, key: str) -> bool:
|
|
285
|
+
return sum(self._counts[key].values()) >= self._threshold
|
|
286
|
+
|
|
287
|
+
def _update_latest_label(self, key: str, old_label: str, new_label: str) -> None:
|
|
288
|
+
"""Update the most recent history entry's label and adjust counts."""
|
|
289
|
+
history = self._history[key]
|
|
290
|
+
if not history:
|
|
291
|
+
return
|
|
292
|
+
history[-1]["label"] = new_label
|
|
293
|
+
|
|
294
|
+
if old_label in self._count_labels and self._counts[key].get(old_label, 0) > 0:
|
|
295
|
+
self._counts[key][old_label] -= 1
|
|
296
|
+
if new_label in self._count_labels:
|
|
297
|
+
self._counts[key][new_label] += 1
|
|
298
|
+
|
|
299
|
+
def _run_review_sync(self, key: str) -> SenseResult | None:
|
|
300
|
+
llm_phase = self._find_llm_phase()
|
|
301
|
+
if not llm_phase:
|
|
302
|
+
return None
|
|
303
|
+
signal, skill = self._build_review_context(key)
|
|
304
|
+
try:
|
|
305
|
+
return llm_phase.run_llm_call(signal, skill, list(self.categories))
|
|
306
|
+
except Exception as e:
|
|
307
|
+
logger.warning("LLM review failed: %s", e)
|
|
308
|
+
return None
|
|
309
|
+
|
|
310
|
+
async def _run_review_async(self, key: str) -> SenseResult | None:
|
|
311
|
+
llm_phase = self._find_llm_phase()
|
|
312
|
+
if not llm_phase:
|
|
313
|
+
return None
|
|
314
|
+
signal, skill = self._build_review_context(key)
|
|
315
|
+
try:
|
|
316
|
+
return await llm_phase.async_run_llm_call(signal, skill, list(self.categories))
|
|
317
|
+
except Exception as e:
|
|
318
|
+
logger.warning("LLM review failed: %s", e)
|
|
319
|
+
return None
|
|
320
|
+
|
|
321
|
+
def _find_llm_phase(self) -> Phase | None:
|
|
322
|
+
for phase in self._pipeline:
|
|
323
|
+
if phase.is_llm_phase:
|
|
324
|
+
return phase
|
|
325
|
+
return None
|
|
326
|
+
|
|
327
|
+
def _get_reclass_skill(self) -> Skill:
|
|
328
|
+
if self._reclass_skill is None:
|
|
329
|
+
self._reclass_skill = Skill("reclassification")
|
|
330
|
+
return self._reclass_skill
|
|
331
|
+
|
|
332
|
+
def _build_review_context(self, key: str) -> tuple[Signal, Skill]:
|
|
333
|
+
history = list(self._history[key])
|
|
334
|
+
summary = json.dumps(
|
|
335
|
+
[{"label": e["label"], "phase": e.get("phase", ""), "signal": e.get("signal_data", {})}
|
|
336
|
+
for e in history],
|
|
337
|
+
default=str,
|
|
338
|
+
)
|
|
339
|
+
signal = Signal({
|
|
340
|
+
"context": "trailing_review",
|
|
341
|
+
"key": key,
|
|
342
|
+
"history_summary": summary,
|
|
343
|
+
})
|
|
344
|
+
return signal, self._get_reclass_skill()
|
|
345
|
+
|
|
346
|
+
def reset(self, key: str) -> None:
|
|
347
|
+
"""Clear trailing history and counts for a key."""
|
|
348
|
+
if not self._trailing:
|
|
349
|
+
return
|
|
350
|
+
with self._trail_lock:
|
|
351
|
+
lock = self._trail_locks.get(key)
|
|
352
|
+
if lock is not None:
|
|
353
|
+
with lock:
|
|
354
|
+
self._history.pop(key, None)
|
|
355
|
+
self._counts.pop(key, None)
|
|
356
|
+
with self._trail_lock:
|
|
357
|
+
if self._trail_locks.get(key) is lock:
|
|
358
|
+
del self._trail_locks[key]
|
|
359
|
+
else:
|
|
360
|
+
self._history.pop(key, None)
|
|
361
|
+
self._counts.pop(key, None)
|
|
362
|
+
|
|
363
|
+
def reset_all(self) -> None:
|
|
364
|
+
"""Clear all trailing state."""
|
|
365
|
+
if not self._trailing:
|
|
366
|
+
return
|
|
367
|
+
with self._trail_lock:
|
|
368
|
+
self._history.clear()
|
|
369
|
+
self._counts.clear()
|
|
370
|
+
self._trail_locks.clear()
|
|
371
|
+
|
|
372
|
+
# -- Internal --
|
|
373
|
+
|
|
374
|
+
def _build_implicit_pipeline(
|
|
375
|
+
self,
|
|
376
|
+
rulesets: list[Ruleset] | None,
|
|
377
|
+
skills: list[Skill] | None,
|
|
378
|
+
llm: LLMConfig | None,
|
|
379
|
+
) -> list[Phase]:
|
|
380
|
+
phases: list[Phase] = []
|
|
381
|
+
if rulesets:
|
|
382
|
+
phases.append(Phase("rulesets", rulesets=rulesets))
|
|
383
|
+
if skills:
|
|
384
|
+
if not llm:
|
|
385
|
+
raise ValueError("skills= requires llm=LLMConfig(...)")
|
|
386
|
+
phases.append(Phase("llm", skills=skills, llm=llm))
|
|
387
|
+
if not phases:
|
|
388
|
+
raise ValueError("Must provide pipeline= or at least rulesets= or skills=")
|
|
389
|
+
return phases
|
|
390
|
+
|
|
391
|
+
def _validate_phase_names(self) -> None:
|
|
392
|
+
seen: set[str] = set()
|
|
393
|
+
for phase in self._pipeline:
|
|
394
|
+
if phase.name in seen:
|
|
395
|
+
raise ValueError(f"Duplicate phase name: {phase.name!r}")
|
|
396
|
+
seen.add(phase.name)
|
|
397
|
+
|
|
398
|
+
def _validate_categories(self) -> None:
|
|
399
|
+
all_cats = self.categories | {self.default}
|
|
400
|
+
for phase in self._pipeline:
|
|
401
|
+
for ruleset in phase.rulesets:
|
|
402
|
+
bad = ruleset.referenced_labels() - all_cats
|
|
403
|
+
if bad:
|
|
404
|
+
raise ValueError(
|
|
405
|
+
f"Ruleset on field {getattr(ruleset, 'field', '?')!r} maps to "
|
|
406
|
+
f"label {bad.pop()!r} not in {sorted(self.categories)}"
|
|
407
|
+
)
|
|
408
|
+
|
|
409
|
+
def _validate_llm_api_keys(self) -> None:
|
|
410
|
+
for phase in self._pipeline:
|
|
411
|
+
if not phase.is_llm_phase:
|
|
412
|
+
continue
|
|
413
|
+
if not phase.llm or not phase.llm.api_key:
|
|
414
|
+
raise ValueError(
|
|
415
|
+
f"Phase {phase.name!r} uses LLM skills but no API key is configured. "
|
|
416
|
+
f"Pass llm=LLMConfig(api_key=...) to the Phase."
|
|
417
|
+
)
|
|
418
|
+
|
|
419
|
+
def _validate_skip(self, skip: set[str] | list[str] | None) -> set[str]:
|
|
420
|
+
if not skip:
|
|
421
|
+
return set()
|
|
422
|
+
skip_set = set(skip)
|
|
423
|
+
invalid = skip_set - self._pipeline_names
|
|
424
|
+
if invalid:
|
|
425
|
+
raise ValueError(
|
|
426
|
+
f"Unknown phase names in skip: {invalid}. "
|
|
427
|
+
f"Valid phase names: {sorted(self._pipeline_names)}"
|
|
428
|
+
)
|
|
429
|
+
return skip_set
|
|
430
|
+
|
|
431
|
+
def _make_default_result(self, signal: Signal) -> SenseResult:
|
|
432
|
+
result = SenseResult(
|
|
433
|
+
label=self.default,
|
|
434
|
+
confidence=0.0,
|
|
435
|
+
skill_name="default",
|
|
436
|
+
)
|
|
437
|
+
self._notify_classify(signal, result)
|
|
438
|
+
return result
|
|
439
|
+
|
|
440
|
+
def _notify_classify(self, signal: Signal, result: SenseResult) -> None:
|
|
441
|
+
if self._on_classify:
|
|
442
|
+
try:
|
|
443
|
+
self._on_classify(signal, result)
|
|
444
|
+
except Exception as e:
|
|
445
|
+
logger.debug("on_classify callback raised: %s", e)
|
|
446
|
+
|
|
447
|
+
def _notify_error(self, phase_name: str, error: Exception) -> None:
|
|
448
|
+
if self._on_error:
|
|
449
|
+
try:
|
|
450
|
+
self._on_error(phase_name, error)
|
|
451
|
+
except Exception as e:
|
|
452
|
+
logger.debug("on_error callback raised: %s", e)
|
errorsense/llm.py
ADDED
|
@@ -0,0 +1,201 @@
|
|
|
1
|
+
"""LLM infrastructure — LLMConfig and LLMClient for LLM API calls."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
import asyncio
|
|
6
|
+
import json
|
|
7
|
+
import logging
|
|
8
|
+
import threading
|
|
9
|
+
from dataclasses import dataclass
|
|
10
|
+
from typing import Any
|
|
11
|
+
|
|
12
|
+
from errorsense.models import SenseResult
|
|
13
|
+
from errorsense.signal import Signal
|
|
14
|
+
from errorsense.skill import Skill
|
|
15
|
+
|
|
16
|
+
logger = logging.getLogger("errorsense.llm")
|
|
17
|
+
|
|
18
|
+
__all__ = ["LLMConfig", "LLMClient"]
|
|
19
|
+
|
|
20
|
+
DEFAULT_BASE_URL = "https://relay.opengpu.network/v1"
|
|
21
|
+
DEFAULT_MODEL = "gpt-oss:120b"
|
|
22
|
+
DEFAULT_PROMPT_TEMPLATE = (
|
|
23
|
+
"{instructions}\n\n"
|
|
24
|
+
"Classify the following error signal into exactly one of these categories: {categories}\n\n"
|
|
25
|
+
"Signal data:\n{signal}\n\n"
|
|
26
|
+
'Reply ONLY with JSON: {{"label": "...", "confidence": 0.0, "reason": "..."}}'
|
|
27
|
+
)
|
|
28
|
+
|
|
29
|
+
|
|
30
|
+
@dataclass(frozen=True)
|
|
31
|
+
class LLMConfig:
|
|
32
|
+
"""Connection config for LLM API."""
|
|
33
|
+
|
|
34
|
+
api_key: str
|
|
35
|
+
model: str = DEFAULT_MODEL
|
|
36
|
+
base_url: str = DEFAULT_BASE_URL
|
|
37
|
+
timeout: float = 10.0
|
|
38
|
+
max_signal_size: int = 500
|
|
39
|
+
|
|
40
|
+
|
|
41
|
+
def _build_prompt(signal: Signal, skill: Skill, categories: list[str], config: LLMConfig) -> str:
|
|
42
|
+
signal_text = json.dumps(signal.to_dict(), default=str)
|
|
43
|
+
if len(signal_text) > config.max_signal_size:
|
|
44
|
+
signal_text = signal_text[: config.max_signal_size] + "..."
|
|
45
|
+
|
|
46
|
+
template = skill.prompt_template or DEFAULT_PROMPT_TEMPLATE
|
|
47
|
+
return template.format(
|
|
48
|
+
instructions=skill.instructions,
|
|
49
|
+
categories=", ".join(categories) if categories else "unknown",
|
|
50
|
+
signal=signal_text,
|
|
51
|
+
)
|
|
52
|
+
|
|
53
|
+
|
|
54
|
+
def _build_request_body(skill: Skill, prompt: str, config: LLMConfig) -> dict:
|
|
55
|
+
return {
|
|
56
|
+
"model": config.model,
|
|
57
|
+
"messages": [{"role": "user", "content": prompt}],
|
|
58
|
+
"temperature": skill.temperature,
|
|
59
|
+
}
|
|
60
|
+
|
|
61
|
+
|
|
62
|
+
def _build_headers(config: LLMConfig) -> dict:
|
|
63
|
+
return {
|
|
64
|
+
"Authorization": f"Bearer {config.api_key}",
|
|
65
|
+
"Content-Type": "application/json",
|
|
66
|
+
}
|
|
67
|
+
|
|
68
|
+
|
|
69
|
+
def _parse_response(
|
|
70
|
+
data: dict,
|
|
71
|
+
categories: list[str],
|
|
72
|
+
skill_name: str,
|
|
73
|
+
include_reason: bool = False,
|
|
74
|
+
) -> SenseResult | None:
|
|
75
|
+
try:
|
|
76
|
+
content = data["choices"][0]["message"]["content"]
|
|
77
|
+
content = content.strip()
|
|
78
|
+
if content.startswith("```"):
|
|
79
|
+
content = content.split("\n", 1)[-1]
|
|
80
|
+
content = content.rsplit("```", 1)[0]
|
|
81
|
+
parsed = json.loads(content.strip())
|
|
82
|
+
|
|
83
|
+
label = parsed.get("label", "") or parsed.get("category", "")
|
|
84
|
+
confidence = min(1.0, max(0.0, float(parsed.get("confidence", 0.7))))
|
|
85
|
+
reason = parsed.get("reason") if include_reason else None
|
|
86
|
+
|
|
87
|
+
if categories and label not in categories:
|
|
88
|
+
logger.warning(
|
|
89
|
+
"Skill %r: LLM returned unknown label %r", skill_name, label
|
|
90
|
+
)
|
|
91
|
+
return None
|
|
92
|
+
|
|
93
|
+
return SenseResult(
|
|
94
|
+
label=label,
|
|
95
|
+
confidence=confidence,
|
|
96
|
+
skill_name=skill_name,
|
|
97
|
+
reason=reason,
|
|
98
|
+
)
|
|
99
|
+
except (KeyError, json.JSONDecodeError, IndexError, ValueError) as e:
|
|
100
|
+
logger.warning("Failed to parse LLM response for skill %r: %s", skill_name, e)
|
|
101
|
+
return None
|
|
102
|
+
|
|
103
|
+
|
|
104
|
+
class LLMClient:
|
|
105
|
+
"""HTTP client for LLM classification calls. Supports both sync and async."""
|
|
106
|
+
|
|
107
|
+
def __init__(self, config: LLMConfig) -> None:
|
|
108
|
+
try:
|
|
109
|
+
import httpx # noqa: F401
|
|
110
|
+
except ImportError:
|
|
111
|
+
raise ImportError(
|
|
112
|
+
"LLM skills require httpx. Install with: pip install errorsense[llm]"
|
|
113
|
+
) from None
|
|
114
|
+
|
|
115
|
+
self._config = config
|
|
116
|
+
self._sync_client: Any = None
|
|
117
|
+
self._async_client: Any = None
|
|
118
|
+
self._sync_lock = threading.Lock()
|
|
119
|
+
self._async_lock = asyncio.Lock()
|
|
120
|
+
|
|
121
|
+
def _get_sync_client(self) -> Any:
|
|
122
|
+
import httpx
|
|
123
|
+
|
|
124
|
+
with self._sync_lock:
|
|
125
|
+
if self._sync_client is None:
|
|
126
|
+
self._sync_client = httpx.Client(timeout=self._config.timeout)
|
|
127
|
+
return self._sync_client
|
|
128
|
+
|
|
129
|
+
async def _get_async_client(self) -> Any:
|
|
130
|
+
import httpx
|
|
131
|
+
|
|
132
|
+
async with self._async_lock:
|
|
133
|
+
if self._async_client is None:
|
|
134
|
+
self._async_client = httpx.AsyncClient(timeout=self._config.timeout)
|
|
135
|
+
return self._async_client
|
|
136
|
+
|
|
137
|
+
def classify_sync(
|
|
138
|
+
self,
|
|
139
|
+
signal: Signal,
|
|
140
|
+
skill: Skill,
|
|
141
|
+
categories: list[str],
|
|
142
|
+
include_reason: bool = False,
|
|
143
|
+
) -> SenseResult | None:
|
|
144
|
+
config = skill.llm if skill.llm is not None else self._config
|
|
145
|
+
prompt = _build_prompt(signal, skill, categories, config)
|
|
146
|
+
url = f"{config.base_url.rstrip('/')}/chat/completions"
|
|
147
|
+
|
|
148
|
+
try:
|
|
149
|
+
client = self._get_sync_client()
|
|
150
|
+
resp = client.post(
|
|
151
|
+
url,
|
|
152
|
+
headers=_build_headers(config),
|
|
153
|
+
json=_build_request_body(skill, prompt, config),
|
|
154
|
+
)
|
|
155
|
+
resp.raise_for_status()
|
|
156
|
+
data = resp.json()
|
|
157
|
+
except (OSError, ValueError, KeyError, TypeError) as e:
|
|
158
|
+
logger.warning("LLM call failed for skill %r: %s", skill.name, e)
|
|
159
|
+
return None
|
|
160
|
+
|
|
161
|
+
return _parse_response(data, categories, skill.name, include_reason)
|
|
162
|
+
|
|
163
|
+
async def classify_async(
|
|
164
|
+
self,
|
|
165
|
+
signal: Signal,
|
|
166
|
+
skill: Skill,
|
|
167
|
+
categories: list[str],
|
|
168
|
+
include_reason: bool = False,
|
|
169
|
+
) -> SenseResult | None:
|
|
170
|
+
config = skill.llm if skill.llm is not None else self._config
|
|
171
|
+
prompt = _build_prompt(signal, skill, categories, config)
|
|
172
|
+
url = f"{config.base_url.rstrip('/')}/chat/completions"
|
|
173
|
+
|
|
174
|
+
try:
|
|
175
|
+
client = await self._get_async_client()
|
|
176
|
+
resp = await client.post(
|
|
177
|
+
url,
|
|
178
|
+
headers=_build_headers(config),
|
|
179
|
+
json=_build_request_body(skill, prompt, config),
|
|
180
|
+
)
|
|
181
|
+
resp.raise_for_status()
|
|
182
|
+
data = resp.json()
|
|
183
|
+
except (OSError, ValueError, KeyError, TypeError) as e:
|
|
184
|
+
logger.warning("LLM call failed for skill %r: %s", skill.name, e)
|
|
185
|
+
return None
|
|
186
|
+
|
|
187
|
+
return _parse_response(data, categories, skill.name, include_reason)
|
|
188
|
+
|
|
189
|
+
def close_sync(self) -> None:
|
|
190
|
+
if self._sync_client is not None:
|
|
191
|
+
self._sync_client.close()
|
|
192
|
+
self._sync_client = None
|
|
193
|
+
|
|
194
|
+
async def close_async(self) -> None:
|
|
195
|
+
if self._async_client is not None:
|
|
196
|
+
await self._async_client.aclose()
|
|
197
|
+
self._async_client = None
|
|
198
|
+
|
|
199
|
+
async def close(self) -> None:
|
|
200
|
+
self.close_sync()
|
|
201
|
+
await self.close_async()
|