cc-transcript 0.7.1__tar.gz → 0.8.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {cc_transcript-0.7.1 → cc_transcript-0.8.0}/PKG-INFO +5 -1
- {cc_transcript-0.7.1 → cc_transcript-0.8.0}/README.md +2 -0
- {cc_transcript-0.7.1 → cc_transcript-0.8.0}/cc_transcript/domains/mining/__init__.py +37 -0
- {cc_transcript-0.7.1 → cc_transcript-0.8.0}/cc_transcript/domains/mining/context.py +29 -0
- cc_transcript-0.8.0/cc_transcript/domains/mining/filterspec.py +128 -0
- cc_transcript-0.8.0/cc_transcript/domains/mining/llm.py +91 -0
- {cc_transcript-0.7.1 → cc_transcript-0.8.0}/cc_transcript/domains/mining/signals.py +85 -14
- cc_transcript-0.8.0/cc_transcript/domains/mining/verdicts.py +615 -0
- {cc_transcript-0.7.1 → cc_transcript-0.8.0}/pyproject.toml +2 -1
- {cc_transcript-0.7.1 → cc_transcript-0.8.0}/Cargo.lock +0 -0
- {cc_transcript-0.7.1 → cc_transcript-0.8.0}/Cargo.toml +0 -0
- {cc_transcript-0.7.1 → cc_transcript-0.8.0}/LICENSE +0 -0
- {cc_transcript-0.7.1 → cc_transcript-0.8.0}/cc_transcript/__init__.py +0 -0
- {cc_transcript-0.7.1 → cc_transcript-0.8.0}/cc_transcript/_parser_rs.pyi +0 -0
- {cc_transcript-0.7.1 → cc_transcript-0.8.0}/cc_transcript/backend.py +0 -0
- {cc_transcript-0.7.1 → cc_transcript-0.8.0}/cc_transcript/builders.py +0 -0
- {cc_transcript-0.7.1 → cc_transcript-0.8.0}/cc_transcript/discovery.py +0 -0
- {cc_transcript-0.7.1 → cc_transcript-0.8.0}/cc_transcript/domains/__init__.py +0 -0
- {cc_transcript-0.7.1 → cc_transcript-0.8.0}/cc_transcript/domains/mining/candidates.py +0 -0
- {cc_transcript-0.7.1 → cc_transcript-0.8.0}/cc_transcript/domains/mining/confidence.py +0 -0
- {cc_transcript-0.7.1 → cc_transcript-0.8.0}/cc_transcript/domains/mining/formats.py +0 -0
- {cc_transcript-0.7.1 → cc_transcript-0.8.0}/cc_transcript/domains/mining/markers.py +0 -0
- {cc_transcript-0.7.1 → cc_transcript-0.8.0}/cc_transcript/domains/mining/nav.py +0 -0
- {cc_transcript-0.7.1 → cc_transcript-0.8.0}/cc_transcript/domains/mining/sourcekind.py +0 -0
- {cc_transcript-0.7.1 → cc_transcript-0.8.0}/cc_transcript/domains/mining/store.py +0 -0
- {cc_transcript-0.7.1 → cc_transcript-0.8.0}/cc_transcript/domains/sentiment/__init__.py +0 -0
- {cc_transcript-0.7.1 → cc_transcript-0.8.0}/cc_transcript/domains/sentiment/buckets.py +0 -0
- {cc_transcript-0.7.1 → cc_transcript-0.8.0}/cc_transcript/domains/sentiment/engine.py +0 -0
- {cc_transcript-0.7.1 → cc_transcript-0.8.0}/cc_transcript/domains/sentiment/lexicon.py +0 -0
- {cc_transcript-0.7.1 → cc_transcript-0.8.0}/cc_transcript/domains/sentiment/scorespec.py +0 -0
- {cc_transcript-0.7.1 → cc_transcript-0.8.0}/cc_transcript/filters.py +0 -0
- {cc_transcript-0.7.1 → cc_transcript-0.8.0}/cc_transcript/filterspec.py +0 -0
- {cc_transcript-0.7.1 → cc_transcript-0.8.0}/cc_transcript/messages.py +0 -0
- {cc_transcript-0.7.1 → cc_transcript-0.8.0}/cc_transcript/models.py +0 -0
- {cc_transcript-0.7.1 → cc_transcript-0.8.0}/cc_transcript/parser.py +0 -0
- {cc_transcript-0.7.1 → cc_transcript-0.8.0}/cc_transcript/py.typed +0 -0
- {cc_transcript-0.7.1 → cc_transcript-0.8.0}/cc_transcript/rust.py +0 -0
- {cc_transcript-0.7.1 → cc_transcript-0.8.0}/cc_transcript/sentiment/__init__.py +0 -0
- {cc_transcript-0.7.1 → cc_transcript-0.8.0}/cc_transcript/sentiment/buckets.py +0 -0
- {cc_transcript-0.7.1 → cc_transcript-0.8.0}/cc_transcript/sentiment/lexicon.py +0 -0
- {cc_transcript-0.7.1 → cc_transcript-0.8.0}/cc_transcript/sentiment/messages.py +0 -0
- {cc_transcript-0.7.1 → cc_transcript-0.8.0}/cc_transcript/store.py +0 -0
- {cc_transcript-0.7.1 → cc_transcript-0.8.0}/rust/Cargo.toml +0 -0
- {cc_transcript-0.7.1 → cc_transcript-0.8.0}/rust/data/afinn-en-165.tsv +0 -0
- {cc_transcript-0.7.1 → cc_transcript-0.8.0}/rust/data/domain_overrides.tsv +0 -0
- {cc_transcript-0.7.1 → cc_transcript-0.8.0}/rust/src/event.rs +0 -0
- {cc_transcript-0.7.1 → cc_transcript-0.8.0}/rust/src/filter.rs +0 -0
- {cc_transcript-0.7.1 → cc_transcript-0.8.0}/rust/src/lexicon.rs +0 -0
- {cc_transcript-0.7.1 → cc_transcript-0.8.0}/rust/src/lib.rs +0 -0
- {cc_transcript-0.7.1 → cc_transcript-0.8.0}/rust/src/model.rs +0 -0
- {cc_transcript-0.7.1 → cc_transcript-0.8.0}/rust/src/score.rs +0 -0
- {cc_transcript-0.7.1 → cc_transcript-0.8.0}/rust/src/value.rs +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: cc-transcript
|
|
3
|
-
Version: 0.
|
|
3
|
+
Version: 0.8.0
|
|
4
4
|
Classifier: Development Status :: 3 - Alpha
|
|
5
5
|
Classifier: Intended Audience :: Developers
|
|
6
6
|
Classifier: Operating System :: OS Independent
|
|
@@ -14,10 +14,12 @@ Requires-Dist: pytest>=8.0 ; extra == 'dev'
|
|
|
14
14
|
Requires-Dist: ty>=0.0.44 ; extra == 'dev'
|
|
15
15
|
Requires-Dist: ruff>=0.8 ; extra == 'dev'
|
|
16
16
|
Requires-Dist: cc-transcript[sentiment] ; extra == 'lexicon'
|
|
17
|
+
Requires-Dist: spawnllm>=0.1.3 ; extra == 'llm'
|
|
17
18
|
Requires-Dist: spacy>=3.8 ; extra == 'sentiment'
|
|
18
19
|
Requires-Dist: afinn>=0.1 ; extra == 'sentiment'
|
|
19
20
|
Provides-Extra: dev
|
|
20
21
|
Provides-Extra: lexicon
|
|
22
|
+
Provides-Extra: llm
|
|
21
23
|
Provides-Extra: sentiment
|
|
22
24
|
License-File: LICENSE
|
|
23
25
|
Summary: Typed events for Claude Code transcripts: discovery, a superset JSONL parser (Python + Rust), and ingestion-state tracking.
|
|
@@ -34,6 +36,8 @@ Project-URL: Repository, https://github.com/yasyf/cc-transcript
|
|
|
34
36
|
|
|
35
37
|
# cc-transcript
|
|
36
38
|
|
|
39
|
+

|
|
40
|
+
|
|
37
41
|
[](https://pypi.org/project/cc-transcript/)
|
|
38
42
|
[](https://pypi.org/project/cc-transcript/)
|
|
39
43
|
[](https://yasyf.github.io/cc-transcript/)
|
|
@@ -1,5 +1,7 @@
|
|
|
1
1
|
# cc-transcript
|
|
2
2
|
|
|
3
|
+

|
|
4
|
+
|
|
3
5
|
[](https://pypi.org/project/cc-transcript/)
|
|
4
6
|
[](https://pypi.org/project/cc-transcript/)
|
|
5
7
|
[](https://yasyf.github.io/cc-transcript/)
|
|
@@ -30,14 +30,33 @@ from cc_transcript.domains.mining.confidence import (
|
|
|
30
30
|
)
|
|
31
31
|
from cc_transcript.domains.mining.context import (
|
|
32
32
|
TOOL_INPUT_LIMIT,
|
|
33
|
+
TURN_TEXT_LIMIT,
|
|
33
34
|
ContextSnapshot,
|
|
34
35
|
ContextTurn,
|
|
35
36
|
build_snapshot,
|
|
37
|
+
clip,
|
|
38
|
+
render_turn,
|
|
39
|
+
render_turns,
|
|
36
40
|
summarize_tool_input,
|
|
37
41
|
trigger_for,
|
|
38
42
|
turn_for,
|
|
39
43
|
)
|
|
44
|
+
from cc_transcript.domains.mining.filterspec import (
|
|
45
|
+
CandidateClause,
|
|
46
|
+
CandidateFilterSpec,
|
|
47
|
+
CandidatePredicate,
|
|
48
|
+
ConfidenceAtLeast,
|
|
49
|
+
HasReason,
|
|
50
|
+
IsDurable,
|
|
51
|
+
SourceKindIn,
|
|
52
|
+
apply_candidate_filter,
|
|
53
|
+
at_least,
|
|
54
|
+
build_candidate_filter,
|
|
55
|
+
keep_candidate,
|
|
56
|
+
only_kinds,
|
|
57
|
+
)
|
|
40
58
|
from cc_transcript.domains.mining.formats import ReviewComment, ReviewFormat, extract_all
|
|
59
|
+
from cc_transcript.domains.mining.llm import resolved_model, run_structured, structured_judge
|
|
41
60
|
from cc_transcript.domains.mining.markers import (
|
|
42
61
|
DENIAL_PREFIX,
|
|
43
62
|
EDIT_TOOLS,
|
|
@@ -77,3 +96,21 @@ from cc_transcript.domains.mining.sourcekind import (
|
|
|
77
96
|
SourceKind,
|
|
78
97
|
)
|
|
79
98
|
from cc_transcript.domains.mining.store import FEEDBACK_DDL, FeedbackStore, Stats, event_row
|
|
99
|
+
from cc_transcript.domains.mining.verdicts import (
|
|
100
|
+
AuditEstimate,
|
|
101
|
+
AuditSample,
|
|
102
|
+
Disagreement,
|
|
103
|
+
Flip,
|
|
104
|
+
FlipReport,
|
|
105
|
+
GoldenFailure,
|
|
106
|
+
GoldenResult,
|
|
107
|
+
GoldenRow,
|
|
108
|
+
Metrics,
|
|
109
|
+
VerdictLike,
|
|
110
|
+
VerdictStoreMixin,
|
|
111
|
+
exact_upper_bound,
|
|
112
|
+
flip_pairs,
|
|
113
|
+
golden_result,
|
|
114
|
+
run_verdicts,
|
|
115
|
+
sample_audit,
|
|
116
|
+
)
|
|
@@ -4,6 +4,7 @@ from __future__ import annotations
|
|
|
4
4
|
|
|
5
5
|
import json
|
|
6
6
|
from dataclasses import dataclass
|
|
7
|
+
from itertools import zip_longest
|
|
7
8
|
from typing import TYPE_CHECKING, Literal
|
|
8
9
|
|
|
9
10
|
from cc_transcript.models import AssistantEvent, ToolUseBlock, UserEvent
|
|
@@ -16,6 +17,7 @@ if TYPE_CHECKING:
|
|
|
16
17
|
|
|
17
18
|
ASSISTANT_TEXT_LIMIT = 2000
|
|
18
19
|
TOOL_INPUT_LIMIT = 1500
|
|
20
|
+
TURN_TEXT_LIMIT = 700
|
|
19
21
|
|
|
20
22
|
|
|
21
23
|
@dataclass(frozen=True, slots=True)
|
|
@@ -121,6 +123,33 @@ def summarize_tool_input(name: str, input: Mapping[str, Any]) -> str:
|
|
|
121
123
|
return summary[:TOOL_INPUT_LIMIT]
|
|
122
124
|
|
|
123
125
|
|
|
126
|
+
def clip(text: str, limit: int) -> str:
|
|
127
|
+
"""Truncates ``text`` to ``limit`` characters, marking any cut with an ellipsis."""
|
|
128
|
+
return text if len(text) <= limit else text[:limit].rstrip() + "…"
|
|
129
|
+
|
|
130
|
+
|
|
131
|
+
def render_turn(turn: ContextTurn, limit: int = TURN_TEXT_LIMIT) -> str:
|
|
132
|
+
"""Renders one turn as ``role: text`` plus one indented line per tool call.
|
|
133
|
+
|
|
134
|
+
Args:
|
|
135
|
+
turn: The turn to render.
|
|
136
|
+
limit: The character budget for the turn text and each tool input.
|
|
137
|
+
|
|
138
|
+
Returns:
|
|
139
|
+
The prompt-ready rendering, tool inputs included.
|
|
140
|
+
"""
|
|
141
|
+
tools = "".join(
|
|
142
|
+
f"\n {name}({clip(input, limit)})" if input else f"\n {name}()"
|
|
143
|
+
for name, input in zip_longest(turn.tool_calls, turn.tool_inputs, fillvalue="")
|
|
144
|
+
)
|
|
145
|
+
return f"{turn.role}: {clip(turn.text, limit)}{tools}"
|
|
146
|
+
|
|
147
|
+
|
|
148
|
+
def render_turns(turns: Sequence[ContextTurn]) -> str:
|
|
149
|
+
"""Renders a window of turns, one per line, or ``(none)`` when empty."""
|
|
150
|
+
return "\n".join(render_turn(turn) for turn in turns) or "(none)"
|
|
151
|
+
|
|
152
|
+
|
|
124
153
|
def turn_for(event: UserEvent | AssistantEvent) -> ContextTurn:
|
|
125
154
|
match event:
|
|
126
155
|
case UserEvent():
|
|
@@ -0,0 +1,128 @@
|
|
|
1
|
+
"""Declarative filtering of mined feedback candidates.
|
|
2
|
+
|
|
3
|
+
A :class:`CandidateFilterSpec` is an ordered tuple of :class:`CandidateClause`
|
|
4
|
+
rules — the candidate-level companion to the event-level
|
|
5
|
+
:class:`cc_transcript.filterspec.FilterSpec`. A candidate survives when every
|
|
6
|
+
clause matches, after per-clause negation. The core ships no concrete spec and
|
|
7
|
+
no thresholds; the consumer owns policy and composes its own spec from the
|
|
8
|
+
builders here.
|
|
9
|
+
|
|
10
|
+
Example:
|
|
11
|
+
>>> from cc_transcript.domains.mining import NOISE_FLOOR, REVIEW_COMMENT
|
|
12
|
+
>>> spec = build_candidate_filter(at_least(NOISE_FLOOR), only_kinds(REVIEW_COMMENT))
|
|
13
|
+
>>> kept = list(apply_candidate_filter(candidates, spec))
|
|
14
|
+
"""
|
|
15
|
+
|
|
16
|
+
from __future__ import annotations
|
|
17
|
+
|
|
18
|
+
from dataclasses import dataclass
|
|
19
|
+
from typing import TYPE_CHECKING
|
|
20
|
+
|
|
21
|
+
from cc_transcript.domains.mining.confidence import effective_confidence
|
|
22
|
+
|
|
23
|
+
if TYPE_CHECKING:
|
|
24
|
+
from collections.abc import Iterable, Iterator
|
|
25
|
+
|
|
26
|
+
from cc_transcript.domains.mining.candidates import FeedbackCandidate
|
|
27
|
+
from cc_transcript.domains.mining.confidence import Confidence
|
|
28
|
+
from cc_transcript.domains.mining.sourcekind import SourceKind
|
|
29
|
+
|
|
30
|
+
|
|
31
|
+
@dataclass(frozen=True, slots=True)
|
|
32
|
+
class ConfidenceAtLeast:
|
|
33
|
+
"""Matches candidates whose effective confidence is at least ``floor``.
|
|
34
|
+
|
|
35
|
+
A candidate without a stored signal scores
|
|
36
|
+
:data:`~cc_transcript.domains.mining.confidence.MEDIUM` via
|
|
37
|
+
:func:`~cc_transcript.domains.mining.confidence.effective_confidence`, so
|
|
38
|
+
legacy rows pass any floor at or below it.
|
|
39
|
+
"""
|
|
40
|
+
|
|
41
|
+
floor: Confidence
|
|
42
|
+
|
|
43
|
+
|
|
44
|
+
@dataclass(frozen=True, slots=True)
|
|
45
|
+
class SourceKindIn:
|
|
46
|
+
"""Matches candidates whose ``source_kind`` is in ``kinds``."""
|
|
47
|
+
|
|
48
|
+
kinds: frozenset[SourceKind]
|
|
49
|
+
|
|
50
|
+
|
|
51
|
+
@dataclass(frozen=True, slots=True)
|
|
52
|
+
class HasReason:
|
|
53
|
+
"""Matches candidates whose signal carries the reason code ``reason``."""
|
|
54
|
+
|
|
55
|
+
reason: str
|
|
56
|
+
|
|
57
|
+
|
|
58
|
+
@dataclass(frozen=True, slots=True)
|
|
59
|
+
class IsDurable:
|
|
60
|
+
"""Matches candidates whose signal durability equals ``want``.
|
|
61
|
+
|
|
62
|
+
A candidate without a stored signal counts as durable, mirroring
|
|
63
|
+
:class:`~cc_transcript.domains.mining.confidence.CandidateSignal`'s default.
|
|
64
|
+
"""
|
|
65
|
+
|
|
66
|
+
want: bool
|
|
67
|
+
|
|
68
|
+
|
|
69
|
+
CandidatePredicate = ConfidenceAtLeast | SourceKindIn | HasReason | IsDurable
|
|
70
|
+
|
|
71
|
+
|
|
72
|
+
@dataclass(frozen=True, slots=True)
|
|
73
|
+
class CandidateClause:
|
|
74
|
+
"""One filter rule: the candidate must satisfy ``predicate``.
|
|
75
|
+
|
|
76
|
+
Attributes:
|
|
77
|
+
predicate: The condition tested against a candidate.
|
|
78
|
+
negate: Invert the predicate match.
|
|
79
|
+
"""
|
|
80
|
+
|
|
81
|
+
predicate: CandidatePredicate
|
|
82
|
+
negate: bool = False
|
|
83
|
+
|
|
84
|
+
|
|
85
|
+
@dataclass(frozen=True, slots=True)
|
|
86
|
+
class CandidateFilterSpec:
|
|
87
|
+
"""An ordered tuple of :class:`CandidateClause` rules, all of which must hold."""
|
|
88
|
+
|
|
89
|
+
clauses: tuple[CandidateClause, ...]
|
|
90
|
+
|
|
91
|
+
|
|
92
|
+
def predicate_matches(predicate: CandidatePredicate, candidate: FeedbackCandidate) -> bool:
|
|
93
|
+
match predicate:
|
|
94
|
+
case ConfidenceAtLeast(floor):
|
|
95
|
+
return effective_confidence(candidate.signal) >= floor
|
|
96
|
+
case SourceKindIn(kinds):
|
|
97
|
+
return candidate.source_kind in kinds
|
|
98
|
+
case HasReason(reason):
|
|
99
|
+
return candidate.signal is not None and reason in candidate.signal.reasons
|
|
100
|
+
case IsDurable(want):
|
|
101
|
+
return (candidate.signal is None or candidate.signal.durable) is want
|
|
102
|
+
|
|
103
|
+
|
|
104
|
+
def keep_candidate(candidate: FeedbackCandidate, spec: CandidateFilterSpec) -> bool:
|
|
105
|
+
"""Returns whether ``candidate`` satisfies every clause of ``spec``."""
|
|
106
|
+
return all(predicate_matches(clause.predicate, candidate) is not clause.negate for clause in spec.clauses)
|
|
107
|
+
|
|
108
|
+
|
|
109
|
+
def apply_candidate_filter(
|
|
110
|
+
candidates: Iterable[FeedbackCandidate], spec: CandidateFilterSpec
|
|
111
|
+
) -> Iterator[FeedbackCandidate]:
|
|
112
|
+
"""Yields the candidates that satisfy every clause of ``spec``."""
|
|
113
|
+
return (candidate for candidate in candidates if keep_candidate(candidate, spec))
|
|
114
|
+
|
|
115
|
+
|
|
116
|
+
def at_least(floor: Confidence) -> CandidateClause:
|
|
117
|
+
"""Returns a clause keeping candidates at or above ``floor`` confidence."""
|
|
118
|
+
return CandidateClause(ConfidenceAtLeast(floor))
|
|
119
|
+
|
|
120
|
+
|
|
121
|
+
def only_kinds(*kinds: SourceKind) -> CandidateClause:
|
|
122
|
+
"""Returns a clause keeping candidates whose ``source_kind`` is one of ``kinds``."""
|
|
123
|
+
return CandidateClause(SourceKindIn(frozenset(kinds)))
|
|
124
|
+
|
|
125
|
+
|
|
126
|
+
def build_candidate_filter(*clauses: CandidateClause) -> CandidateFilterSpec:
|
|
127
|
+
"""Composes ``clauses`` into a :class:`CandidateFilterSpec`."""
|
|
128
|
+
return CandidateFilterSpec(clauses=clauses)
|
|
@@ -0,0 +1,91 @@
|
|
|
1
|
+
"""Headless structured completions via the ``claude`` CLI, behind the ``[llm]`` extra.
|
|
2
|
+
|
|
3
|
+
Argv construction and envelope parsing come from the shared ``spawnllm`` library;
|
|
4
|
+
the spawn stays local (``anyio.run_process``). It uses the user's existing Claude
|
|
5
|
+
Code auth (no API key). ``spawnllm`` and ``pydantic`` load lazily inside each
|
|
6
|
+
function, so importing the mining domain needs no extra installed.
|
|
7
|
+
"""
|
|
8
|
+
|
|
9
|
+
from __future__ import annotations
|
|
10
|
+
|
|
11
|
+
import os
|
|
12
|
+
import subprocess
|
|
13
|
+
from typing import TYPE_CHECKING
|
|
14
|
+
|
|
15
|
+
import anyio
|
|
16
|
+
|
|
17
|
+
if TYPE_CHECKING:
|
|
18
|
+
from collections.abc import Awaitable, Callable
|
|
19
|
+
|
|
20
|
+
from pydantic import BaseModel
|
|
21
|
+
from spawnllm import TModel
|
|
22
|
+
|
|
23
|
+
CLAUDE_TIMEOUT = 180
|
|
24
|
+
|
|
25
|
+
|
|
26
|
+
def resolved_model(tier: TModel) -> str:
|
|
27
|
+
"""Returns the concrete Claude model name for an abstract tier.
|
|
28
|
+
|
|
29
|
+
A verdict store's unique key includes the model string, so the resolution
|
|
30
|
+
must stay byte-identical across releases for a judged corpus to stay valid.
|
|
31
|
+
"""
|
|
32
|
+
from spawnllm import ClaudeCliBackend
|
|
33
|
+
|
|
34
|
+
return ClaudeCliBackend.models[tier]
|
|
35
|
+
|
|
36
|
+
|
|
37
|
+
async def run_structured[M: BaseModel](
|
|
38
|
+
prompt: str, *, response_model: type[M], tier: TModel, timeout: int = CLAUDE_TIMEOUT
|
|
39
|
+
) -> M:
|
|
40
|
+
"""Runs one headless ``claude`` turn and parses its structured output.
|
|
41
|
+
|
|
42
|
+
The prompt is delivered over stdin and the response is forced into
|
|
43
|
+
``response_model``'s JSON schema via the CLI's ``--json-schema`` flag. The
|
|
44
|
+
structured path runs with an empty system prompt, so all instructions must
|
|
45
|
+
live in ``prompt``.
|
|
46
|
+
|
|
47
|
+
Args:
|
|
48
|
+
prompt: The full prompt, instructions included.
|
|
49
|
+
response_model: The pydantic model the response must validate against.
|
|
50
|
+
tier: The abstract model tier to run, resolved by the Claude backend.
|
|
51
|
+
timeout: The per-call wall-clock budget in seconds.
|
|
52
|
+
|
|
53
|
+
Returns:
|
|
54
|
+
The validated ``response_model`` instance.
|
|
55
|
+
|
|
56
|
+
Raises:
|
|
57
|
+
subprocess.SubprocessError: If ``claude`` exits non-zero or times out.
|
|
58
|
+
pydantic.ValidationError: If the response does not match the schema.
|
|
59
|
+
"""
|
|
60
|
+
from spawnllm import ClaudeCliBackend, parse_structured_output, resolve_schema_path, schema_for
|
|
61
|
+
|
|
62
|
+
backend = ClaudeCliBackend()
|
|
63
|
+
argv = backend.build_command(
|
|
64
|
+
backend.models[tier], resolve_schema_path(backend, schema_for(response_model)), agent=False
|
|
65
|
+
)
|
|
66
|
+
try:
|
|
67
|
+
with anyio.fail_after(timeout):
|
|
68
|
+
result = await anyio.run_process(argv, input=prompt.encode(), check=True, env=os.environ | backend.env())
|
|
69
|
+
except TimeoutError as exc:
|
|
70
|
+
raise subprocess.TimeoutExpired(argv, timeout) from exc
|
|
71
|
+
return parse_structured_output(result.stdout.decode(), response_model)
|
|
72
|
+
|
|
73
|
+
|
|
74
|
+
def structured_judge[M: BaseModel](
|
|
75
|
+
response_model: type[M], *, tier: TModel, timeout: int = CLAUDE_TIMEOUT
|
|
76
|
+
) -> Callable[[str], Awaitable[M]]:
|
|
77
|
+
"""Returns a prompt-to-verdict callable that plugs into :func:`run_verdicts`.
|
|
78
|
+
|
|
79
|
+
Args:
|
|
80
|
+
response_model: The pydantic model each response must validate against.
|
|
81
|
+
tier: The abstract model tier to run.
|
|
82
|
+
timeout: The per-call wall-clock budget in seconds.
|
|
83
|
+
|
|
84
|
+
Returns:
|
|
85
|
+
A callable awaiting one structured completion per prompt.
|
|
86
|
+
|
|
87
|
+
Example:
|
|
88
|
+
>>> judge = structured_judge(Verdict, tier="medium")
|
|
89
|
+
>>> await run_verdicts(rows, prompt_for, judge, persist, concurrency=8)
|
|
90
|
+
"""
|
|
91
|
+
return lambda prompt: run_structured(prompt, response_model=response_model, tier=tier, timeout=timeout)
|
|
@@ -4,17 +4,25 @@ Each iterator recognizes one transcript shape and yields a :class:`MiningSignal`
|
|
|
4
4
|
describing it. A signal is a neutral fact: it carries a candidate ``trigger_index``
|
|
5
5
|
but never disqualifies on its absence, never applies a ``FilterSpec``, and never
|
|
6
6
|
builds an app candidate. The app maps signals to its own records with policy injected.
|
|
7
|
+
|
|
8
|
+
Every signal carries a calibrated :class:`CandidateSignal` spanning the full
|
|
9
|
+
confidence band: arithmetic bumps and demotions over the anchors, with named
|
|
10
|
+
reason codes (``trigger_proximate``, ``short_followup``, ``substantive``,
|
|
11
|
+
``hedged``, ``embedded_text``, ``bare_marker``, ``structural_only``) so apps can
|
|
12
|
+
filter on :func:`~cc_transcript.domains.mining.confidence.effective_confidence`
|
|
13
|
+
and reasons instead of re-deriving them.
|
|
7
14
|
"""
|
|
8
15
|
|
|
9
16
|
from __future__ import annotations
|
|
10
17
|
|
|
18
|
+
import re
|
|
11
19
|
from dataclasses import dataclass, field
|
|
12
|
-
from typing import TYPE_CHECKING
|
|
20
|
+
from typing import TYPE_CHECKING, NamedTuple
|
|
13
21
|
|
|
14
22
|
from cc_transcript import STRUCTURAL_NOISE_RE
|
|
15
23
|
from cc_transcript.models import AssistantEvent, ModeEvent, UserEvent
|
|
16
24
|
|
|
17
|
-
from cc_transcript.domains.mining.confidence import firm, weak
|
|
25
|
+
from cc_transcript.domains.mining.confidence import CandidateSignal, Confidence, firm, noise, weak
|
|
18
26
|
from cc_transcript.domains.mining.formats import extract_all
|
|
19
27
|
from cc_transcript.domains.mining.nav import (
|
|
20
28
|
denial_results,
|
|
@@ -40,10 +48,17 @@ if TYPE_CHECKING:
|
|
|
40
48
|
|
|
41
49
|
from cc_transcript.models import CcVersion, EntryUuid, SessionId, TranscriptEvent
|
|
42
50
|
|
|
43
|
-
from cc_transcript.domains.mining.confidence import CandidateSignal
|
|
44
51
|
from cc_transcript.domains.mining.formats import ReviewFormat
|
|
45
52
|
from cc_transcript.domains.mining.sourcekind import SourceKind
|
|
46
53
|
|
|
54
|
+
CONFIDENCE_STEP = 0.25
|
|
55
|
+
SHORT_FOLLOWUP_MAX_WORDS = 2
|
|
56
|
+
TIGHT_PROXIMITY = 2
|
|
57
|
+
HEDGE_RE = re.compile(
|
|
58
|
+
r"\b(?:maybe|perhaps|possibly|might|not sure|i think|i guess|if you (?:want|prefer)|up to you)\b",
|
|
59
|
+
re.IGNORECASE,
|
|
60
|
+
)
|
|
61
|
+
|
|
47
62
|
|
|
48
63
|
@dataclass(frozen=True, slots=True)
|
|
49
64
|
class MiningSignal:
|
|
@@ -79,6 +94,11 @@ class MiningSignal:
|
|
|
79
94
|
signal: CandidateSignal | None = None
|
|
80
95
|
|
|
81
96
|
|
|
97
|
+
class ScoredText(NamedTuple):
|
|
98
|
+
text: str
|
|
99
|
+
signal: CandidateSignal
|
|
100
|
+
|
|
101
|
+
|
|
82
102
|
def nearest_assistant_index(events: Sequence[TranscriptEvent], index: int) -> int | None:
|
|
83
103
|
return next((i for i in range(index - 1, -1, -1) if isinstance(events[i], AssistantEvent)), None)
|
|
84
104
|
|
|
@@ -92,6 +112,57 @@ def correction_text(events: Sequence[TranscriptEvent], index: int) -> str | None
|
|
|
92
112
|
return None
|
|
93
113
|
|
|
94
114
|
|
|
115
|
+
def first_followup(events: Sequence[TranscriptEvent], index: int) -> str | None:
|
|
116
|
+
while (found := next_user_message(events, index + 1)) is not None:
|
|
117
|
+
index, event = found
|
|
118
|
+
if not is_bare_interrupt_marker(event.text):
|
|
119
|
+
return event.text
|
|
120
|
+
return None
|
|
121
|
+
|
|
122
|
+
|
|
123
|
+
def adjust(signal: CandidateSignal, delta: float, reason: str) -> CandidateSignal:
|
|
124
|
+
return CandidateSignal(
|
|
125
|
+
Confidence(min(1.0, max(0.0, signal.confidence + delta))), (*signal.reasons, reason), signal.durable
|
|
126
|
+
)
|
|
127
|
+
|
|
128
|
+
|
|
129
|
+
def is_substantive(text: str) -> bool:
|
|
130
|
+
return len(text.split()) > SHORT_FOLLOWUP_MAX_WORDS and not STRUCTURAL_NOISE_RE.search(text)
|
|
131
|
+
|
|
132
|
+
|
|
133
|
+
def is_proximate(index: int, trigger: int | None) -> bool:
|
|
134
|
+
return trigger is not None and index - trigger <= TIGHT_PROXIMITY
|
|
135
|
+
|
|
136
|
+
|
|
137
|
+
def calibrated(text: str, *reasons: str) -> CandidateSignal:
|
|
138
|
+
base = firm(*reasons)
|
|
139
|
+
promoted = adjust(base, CONFIDENCE_STEP, "substantive") if is_substantive(text) else base
|
|
140
|
+
return adjust(promoted, -CONFIDENCE_STEP, "hedged") if HEDGE_RE.search(text) else promoted
|
|
141
|
+
|
|
142
|
+
|
|
143
|
+
def score_user_message(text: str, index: int, trigger: int | None) -> CandidateSignal:
|
|
144
|
+
if STRUCTURAL_NOISE_RE.search(text):
|
|
145
|
+
return noise("structural_only")
|
|
146
|
+
base = firm("user_message")
|
|
147
|
+
short = len(text.split()) <= SHORT_FOLLOWUP_MAX_WORDS
|
|
148
|
+
demoted = adjust(base, -CONFIDENCE_STEP, "short_followup") if short else base
|
|
149
|
+
return adjust(demoted, CONFIDENCE_STEP, "trigger_proximate") if is_proximate(index, trigger) else demoted
|
|
150
|
+
|
|
151
|
+
|
|
152
|
+
def marker_correction(events: Sequence[TranscriptEvent], index: int) -> ScoredText | None:
|
|
153
|
+
if (correction := correction_text(events, index)) is not None:
|
|
154
|
+
return ScoredText(correction, weak("bare_marker"))
|
|
155
|
+
if (followup := first_followup(events, index)) is not None:
|
|
156
|
+
return ScoredText(followup, noise("structural_only"))
|
|
157
|
+
return None
|
|
158
|
+
|
|
159
|
+
|
|
160
|
+
def denial_correction(events: Sequence[TranscriptEvent], index: int, embedded: str | None) -> ScoredText | None:
|
|
161
|
+
if embedded:
|
|
162
|
+
return ScoredText(embedded, calibrated(embedded, "embedded_text"))
|
|
163
|
+
return marker_correction(events, index)
|
|
164
|
+
|
|
165
|
+
|
|
95
166
|
def iter_user_message_signals(events: Sequence[TranscriptEvent]) -> Iterator[MiningSignal]:
|
|
96
167
|
return (
|
|
97
168
|
MiningSignal(
|
|
@@ -103,8 +174,8 @@ def iter_user_message_signals(events: Sequence[TranscriptEvent]) -> Iterator[Min
|
|
|
103
174
|
occurred_at=event.meta.timestamp,
|
|
104
175
|
text=event.text,
|
|
105
176
|
cc_version=event.meta.cc_version,
|
|
106
|
-
trigger_index=nearest_assistant_index(events, index),
|
|
107
|
-
signal=
|
|
177
|
+
trigger_index=(trigger := nearest_assistant_index(events, index)),
|
|
178
|
+
signal=score_user_message(event.text, index, trigger),
|
|
108
179
|
)
|
|
109
180
|
for index, event in enumerate(events)
|
|
110
181
|
if isinstance(event, UserEvent)
|
|
@@ -126,7 +197,7 @@ def iter_plan_rejection_signals(events: Sequence[TranscriptEvent]) -> Iterator[M
|
|
|
126
197
|
text=text,
|
|
127
198
|
cc_version=event.meta.cc_version,
|
|
128
199
|
trigger_index=nearest_assistant_index(events, index),
|
|
129
|
-
signal=
|
|
200
|
+
signal=calibrated(text, "embedded_text"),
|
|
130
201
|
)
|
|
131
202
|
for index, event in enumerate(events)
|
|
132
203
|
if isinstance(event, UserEvent)
|
|
@@ -161,7 +232,7 @@ def iter_plan_reentry_signals(events: Sequence[TranscriptEvent]) -> Iterator[Min
|
|
|
161
232
|
cc_version=user_event.meta.cc_version,
|
|
162
233
|
trigger_index=nearest_assistant_index(events, user_index),
|
|
163
234
|
lower_bound=edit,
|
|
164
|
-
signal=
|
|
235
|
+
signal=calibrated(user_event.text, "reentry_after_edit"),
|
|
165
236
|
)
|
|
166
237
|
|
|
167
238
|
|
|
@@ -175,17 +246,17 @@ def iter_tool_denial_signals(events: Sequence[TranscriptEvent]) -> Iterator[Mini
|
|
|
175
246
|
event_index=index,
|
|
176
247
|
event_uuid=event.meta.uuid,
|
|
177
248
|
occurred_at=event.meta.timestamp,
|
|
178
|
-
text=text,
|
|
249
|
+
text=scored.text,
|
|
179
250
|
cc_version=event.meta.cc_version,
|
|
180
251
|
trigger_index=nearest_assistant_index(events, index),
|
|
181
252
|
evidence=denied_tool_payload(paired) if paired else {},
|
|
182
|
-
signal=
|
|
253
|
+
signal=scored.signal,
|
|
183
254
|
)
|
|
184
255
|
for index, event in enumerate(events)
|
|
185
256
|
if isinstance(event, UserEvent)
|
|
186
257
|
for block in denial_results(event)
|
|
187
258
|
if (paired := uses.get(block.tool_use_id)) is None or paired.name not in {"ExitPlanMode", "AskUserQuestion"}
|
|
188
|
-
if (
|
|
259
|
+
if (scored := denial_correction(events, index, embedded_user_text(block.content))) is not None
|
|
189
260
|
)
|
|
190
261
|
|
|
191
262
|
|
|
@@ -198,15 +269,15 @@ def iter_interrupt_marker_signals(events: Sequence[TranscriptEvent]) -> Iterator
|
|
|
198
269
|
event_index=index,
|
|
199
270
|
event_uuid=event.meta.uuid,
|
|
200
271
|
occurred_at=event.meta.timestamp,
|
|
201
|
-
text=
|
|
272
|
+
text=scored.text,
|
|
202
273
|
cc_version=event.meta.cc_version,
|
|
203
274
|
trigger_index=nearest_assistant_index(events, index),
|
|
204
|
-
signal=
|
|
275
|
+
signal=scored.signal,
|
|
205
276
|
)
|
|
206
277
|
for index, event in enumerate(events)
|
|
207
278
|
if isinstance(event, UserEvent)
|
|
208
279
|
if marker_in(event) is not None
|
|
209
|
-
if (
|
|
280
|
+
if (scored := marker_correction(events, index)) is not None
|
|
210
281
|
)
|
|
211
282
|
|
|
212
283
|
|
|
@@ -230,7 +301,7 @@ def iter_review_comment_signals(
|
|
|
230
301
|
"line_start": comment.line_start,
|
|
231
302
|
"line_end": comment.line_end,
|
|
232
303
|
},
|
|
233
|
-
signal=
|
|
304
|
+
signal=calibrated(comment.comment, "format_match"),
|
|
234
305
|
)
|
|
235
306
|
for index, event in enumerate(events)
|
|
236
307
|
if isinstance(event, UserEvent)
|