cc-transcript 0.6.0__tar.gz → 0.7.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (49) hide show
  1. {cc_transcript-0.6.0 → cc_transcript-0.7.0}/Cargo.lock +1 -1
  2. {cc_transcript-0.6.0 → cc_transcript-0.7.0}/PKG-INFO +5 -3
  3. {cc_transcript-0.6.0 → cc_transcript-0.7.0}/cc_transcript/__init__.py +8 -1
  4. cc_transcript-0.7.0/cc_transcript/domains/__init__.py +9 -0
  5. cc_transcript-0.7.0/cc_transcript/domains/mining/__init__.py +77 -0
  6. cc_transcript-0.7.0/cc_transcript/domains/mining/candidates.py +70 -0
  7. cc_transcript-0.7.0/cc_transcript/domains/mining/confidence.py +74 -0
  8. cc_transcript-0.7.0/cc_transcript/domains/mining/context.py +140 -0
  9. cc_transcript-0.7.0/cc_transcript/domains/mining/formats.py +64 -0
  10. cc_transcript-0.7.0/cc_transcript/domains/mining/markers.py +11 -0
  11. cc_transcript-0.7.0/cc_transcript/domains/mining/nav.py +100 -0
  12. cc_transcript-0.7.0/cc_transcript/domains/mining/signals.py +247 -0
  13. cc_transcript-0.7.0/cc_transcript/domains/mining/sourcekind.py +17 -0
  14. cc_transcript-0.7.0/cc_transcript/domains/mining/store.py +206 -0
  15. {cc_transcript-0.6.0/cc_transcript → cc_transcript-0.7.0/cc_transcript/domains}/sentiment/__init__.py +8 -8
  16. {cc_transcript-0.6.0/cc_transcript → cc_transcript-0.7.0/cc_transcript/domains}/sentiment/buckets.py +1 -1
  17. {cc_transcript-0.6.0/cc_transcript → cc_transcript-0.7.0/cc_transcript/domains}/sentiment/engine.py +3 -3
  18. {cc_transcript-0.6.0/cc_transcript → cc_transcript-0.7.0/cc_transcript/domains}/sentiment/lexicon.py +2 -2
  19. {cc_transcript-0.6.0/cc_transcript → cc_transcript-0.7.0/cc_transcript/domains}/sentiment/scorespec.py +2 -2
  20. {cc_transcript-0.6.0/cc_transcript/sentiment → cc_transcript-0.7.0/cc_transcript}/messages.py +2 -2
  21. {cc_transcript-0.6.0 → cc_transcript-0.7.0}/cc_transcript/models.py +3 -0
  22. {cc_transcript-0.6.0 → cc_transcript-0.7.0}/cc_transcript/parser.py +15 -6
  23. cc_transcript-0.7.0/cc_transcript/sentiment/__init__.py +39 -0
  24. cc_transcript-0.7.0/cc_transcript/sentiment/buckets.py +17 -0
  25. cc_transcript-0.7.0/cc_transcript/sentiment/lexicon.py +7 -0
  26. cc_transcript-0.7.0/cc_transcript/sentiment/messages.py +13 -0
  27. {cc_transcript-0.6.0 → cc_transcript-0.7.0}/pyproject.toml +4 -2
  28. {cc_transcript-0.6.0 → cc_transcript-0.7.0}/rust/Cargo.toml +1 -1
  29. {cc_transcript-0.6.0 → cc_transcript-0.7.0}/rust/src/event.rs +6 -1
  30. {cc_transcript-0.6.0 → cc_transcript-0.7.0}/Cargo.toml +0 -0
  31. {cc_transcript-0.6.0 → cc_transcript-0.7.0}/LICENSE +0 -0
  32. {cc_transcript-0.6.0 → cc_transcript-0.7.0}/README.md +0 -0
  33. {cc_transcript-0.6.0 → cc_transcript-0.7.0}/cc_transcript/_parser_rs.pyi +0 -0
  34. {cc_transcript-0.6.0 → cc_transcript-0.7.0}/cc_transcript/backend.py +0 -0
  35. {cc_transcript-0.6.0 → cc_transcript-0.7.0}/cc_transcript/builders.py +0 -0
  36. {cc_transcript-0.6.0 → cc_transcript-0.7.0}/cc_transcript/discovery.py +0 -0
  37. {cc_transcript-0.6.0 → cc_transcript-0.7.0}/cc_transcript/filters.py +0 -0
  38. {cc_transcript-0.6.0 → cc_transcript-0.7.0}/cc_transcript/filterspec.py +0 -0
  39. {cc_transcript-0.6.0 → cc_transcript-0.7.0}/cc_transcript/py.typed +0 -0
  40. {cc_transcript-0.6.0 → cc_transcript-0.7.0}/cc_transcript/rust.py +0 -0
  41. {cc_transcript-0.6.0 → cc_transcript-0.7.0}/cc_transcript/store.py +0 -0
  42. {cc_transcript-0.6.0 → cc_transcript-0.7.0}/rust/data/afinn-en-165.tsv +0 -0
  43. {cc_transcript-0.6.0 → cc_transcript-0.7.0}/rust/data/domain_overrides.tsv +0 -0
  44. {cc_transcript-0.6.0 → cc_transcript-0.7.0}/rust/src/filter.rs +0 -0
  45. {cc_transcript-0.6.0 → cc_transcript-0.7.0}/rust/src/lexicon.rs +0 -0
  46. {cc_transcript-0.6.0 → cc_transcript-0.7.0}/rust/src/lib.rs +0 -0
  47. {cc_transcript-0.6.0 → cc_transcript-0.7.0}/rust/src/model.rs +0 -0
  48. {cc_transcript-0.6.0 → cc_transcript-0.7.0}/rust/src/score.rs +0 -0
  49. {cc_transcript-0.6.0 → cc_transcript-0.7.0}/rust/src/value.rs +0 -0
@@ -66,7 +66,7 @@ dependencies = [
66
66
 
67
67
  [[package]]
68
68
  name = "cc_transcript_parser"
69
- version = "0.5.0"
69
+ version = "0.6.0"
70
70
  dependencies = [
71
71
  "chrono",
72
72
  "crossbeam-channel",
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: cc-transcript
3
- Version: 0.6.0
3
+ Version: 0.7.0
4
4
  Classifier: Development Status :: 3 - Alpha
5
5
  Classifier: Intended Audience :: Developers
6
6
  Classifier: Operating System :: OS Independent
@@ -13,10 +13,12 @@ Requires-Dist: orjson>=3.10
13
13
  Requires-Dist: pytest>=8.0 ; extra == 'dev'
14
14
  Requires-Dist: ty>=0.0.44 ; extra == 'dev'
15
15
  Requires-Dist: ruff>=0.8 ; extra == 'dev'
16
- Requires-Dist: spacy>=3.8 ; extra == 'lexicon'
17
- Requires-Dist: afinn>=0.1 ; extra == 'lexicon'
16
+ Requires-Dist: cc-transcript[sentiment] ; extra == 'lexicon'
17
+ Requires-Dist: spacy>=3.8 ; extra == 'sentiment'
18
+ Requires-Dist: afinn>=0.1 ; extra == 'sentiment'
18
19
  Provides-Extra: dev
19
20
  Provides-Extra: lexicon
21
+ Provides-Extra: sentiment
20
22
  License-File: LICENSE
21
23
  Summary: Typed events for Claude Code transcripts: discovery, a superset JSONL parser (Python + Rust), and ingestion-state tracking.
22
24
  Keywords:
@@ -38,6 +38,13 @@ from cc_transcript.filterspec import (
38
38
  keep,
39
39
  labels_for,
40
40
  )
41
+ from cc_transcript.messages import (
42
+ AssistantMessage,
43
+ BaseMessage,
44
+ ToolCall,
45
+ TranscriptMessage,
46
+ UserMessage,
47
+ )
41
48
  from cc_transcript.models import (
42
49
  AssistantEvent,
43
50
  CcVersion,
@@ -56,5 +63,5 @@ from cc_transcript.models import (
56
63
  TranscriptEvent,
57
64
  UserEvent,
58
65
  )
59
- from cc_transcript.parser import TranscriptParser, parse_events_async, parse_events_from_bytes
66
+ from cc_transcript.parser import TranscriptParser, parse_event, parse_events_async, parse_events_from_bytes
60
67
  from cc_transcript.store import FileStateStore
@@ -0,0 +1,9 @@
1
+ """Built-in domains layered on the cc-transcript core.
2
+
3
+ Each domain builds on the core transcript model and depends only on core — never on
4
+ another domain, and never the reverse. Heavy dependencies sit behind a per-domain
5
+ extra. Today: :mod:`cc_transcript.domains.sentiment` (scoring) and
6
+ :mod:`cc_transcript.domains.mining` (correction/feedback extraction).
7
+ """
8
+
9
+ from __future__ import annotations
@@ -0,0 +1,77 @@
1
+ # Re-exports establish the domain's public surface; pyright sees them as unused.
2
+ # pyright: reportUnusedImport=false
3
+ """The correction/feedback mining mechanism.
4
+
5
+ Neutral fact-detectors over Claude Code transcripts: each iterator recognizes a
6
+ transcript shape and yields a :class:`MiningSignal` — a neutral fact carrying a
7
+ candidate trigger, confidence, and evidence, but no policy. Apps map signals to
8
+ their own candidate records with policy injected (their filter spec, their
9
+ disqualification rules, their review formats), and persist them through
10
+ :class:`FeedbackStore`.
11
+ """
12
+
13
+ from __future__ import annotations
14
+
15
+ from cc_transcript.domains.mining.candidates import DedupKey, FeedbackCandidate, dedup_key
16
+ from cc_transcript.domains.mining.confidence import (
17
+ HIGH,
18
+ LOW,
19
+ MEDIUM,
20
+ NOISE_FLOOR,
21
+ NONE,
22
+ VERY_HIGH,
23
+ CandidateSignal,
24
+ Confidence,
25
+ effective_confidence,
26
+ firm,
27
+ noise,
28
+ strong,
29
+ weak,
30
+ )
31
+ from cc_transcript.domains.mining.context import (
32
+ ContextSnapshot,
33
+ ContextTurn,
34
+ build_snapshot,
35
+ trigger_for,
36
+ turn_for,
37
+ )
38
+ from cc_transcript.domains.mining.formats import ReviewComment, ReviewFormat, extract_all
39
+ from cc_transcript.domains.mining.markers import (
40
+ DENIAL_PREFIX,
41
+ EDIT_TOOLS,
42
+ INTERRUPT_MARKER_RE,
43
+ REENTRY_LOOKBACK,
44
+ USER_SAID_MARKER,
45
+ USER_SAID_TRAILER,
46
+ )
47
+ from cc_transcript.domains.mining.nav import (
48
+ denial_results,
49
+ denied_tool_payload,
50
+ embedded_user_text,
51
+ interrupt_marker,
52
+ is_bare_interrupt_marker,
53
+ last_edit_index,
54
+ marker_in,
55
+ next_user_message,
56
+ tool_uses,
57
+ )
58
+ from cc_transcript.domains.mining.signals import (
59
+ DEFAULT_DETECTORS,
60
+ MiningSignal,
61
+ correction_text,
62
+ iter_interrupt_marker_signals,
63
+ iter_plan_reentry_signals,
64
+ iter_plan_rejection_signals,
65
+ iter_review_comment_signals,
66
+ iter_tool_denial_signals,
67
+ iter_user_message_signals,
68
+ nearest_assistant_index,
69
+ )
70
+ from cc_transcript.domains.mining.sourcekind import (
71
+ INTERRUPT_REJECTION,
72
+ PLAN_REVIEW,
73
+ REVIEW_COMMENT,
74
+ TRANSCRIPT_MESSAGE,
75
+ SourceKind,
76
+ )
77
+ from cc_transcript.domains.mining.store import FEEDBACK_DDL, FeedbackStore, Stats, event_row
@@ -0,0 +1,70 @@
1
+ """The feedback candidate model and the dedup key that makes ingestion idempotent."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import hashlib
6
+ from dataclasses import dataclass
7
+ from typing import TYPE_CHECKING, NewType
8
+
9
+ if TYPE_CHECKING:
10
+ from collections.abc import Mapping
11
+ from datetime import datetime
12
+ from pathlib import Path
13
+ from typing import Any
14
+
15
+ from cc_transcript.models import SessionId
16
+
17
+ from cc_transcript.domains.mining.confidence import CandidateSignal
18
+ from cc_transcript.domains.mining.context import ContextSnapshot
19
+ from cc_transcript.domains.mining.sourcekind import SourceKind
20
+
21
+ DedupKey = NewType("DedupKey", str)
22
+ """A content-derived SHA-256 key that makes feedback ingestion idempotent."""
23
+
24
+
25
+ @dataclass(frozen=True, slots=True)
26
+ class FeedbackCandidate:
27
+ """A single piece of developer pushback extracted from a transcript.
28
+
29
+ Attributes:
30
+ dedup_key: The content-derived key that makes ingestion idempotent.
31
+ source_kind: Which detector produced the candidate.
32
+ occurred_at: When the feedback was given.
33
+ text: The verbatim pushback text.
34
+ context: The conversational window around the feedback.
35
+ session_id: The transcript session the feedback came from.
36
+ origin_path: The file the candidate was extracted from.
37
+ origin_uuid: The originating transcript entry's uuid.
38
+ cc_version: The Claude Code version recorded for the origin.
39
+ payload: Detector-specific metadata preserved verbatim.
40
+ signal: The de-noising confidence signal, when computed.
41
+ """
42
+
43
+ dedup_key: DedupKey
44
+ source_kind: SourceKind
45
+ occurred_at: datetime
46
+ text: str
47
+ context: ContextSnapshot
48
+ session_id: SessionId | None = None
49
+ origin_path: Path | None = None
50
+ origin_uuid: str | None = None
51
+ cc_version: str | None = None
52
+ payload: Mapping[str, Any] | None = None
53
+ signal: CandidateSignal | None = None
54
+
55
+
56
+ def dedup_key(*parts: str) -> DedupKey:
57
+ """Returns the stable dedup key for ``parts``.
58
+
59
+ Detectors key on session, kind, and the feedback content (plus its code
60
+ location for review comments) rather than the transcript entry's uuid or the
61
+ absolute file path, so the same pushback recorded under two transcript entries
62
+ collapses to one row, and the database stays portable and idempotent across moves.
63
+
64
+ Args:
65
+ parts: The content fragments that uniquely identify a candidate.
66
+
67
+ Returns:
68
+ The SHA-256 hex digest of the parts joined by a null byte.
69
+ """
70
+ return DedupKey(hashlib.sha256("\x00".join(parts).encode()).hexdigest())
@@ -0,0 +1,74 @@
1
+ """The de-noising confidence primitive carried alongside mined feedback facts."""
2
+
3
+ from __future__ import annotations
4
+
5
+ from dataclasses import dataclass
6
+ from typing import TYPE_CHECKING, NewType
7
+
8
+ if TYPE_CHECKING:
9
+ from collections.abc import Mapping
10
+ from typing import Any
11
+
12
+ Confidence = NewType("Confidence", float)
13
+ """A de-noising score in the closed interval [0, 1]; higher is more trustworthy."""
14
+
15
+ NONE = Confidence(0.0)
16
+ LOW = Confidence(0.25)
17
+ MEDIUM = Confidence(0.5)
18
+ HIGH = Confidence(0.75)
19
+ VERY_HIGH = Confidence(0.95)
20
+ NOISE_FLOOR = LOW
21
+
22
+
23
+ @dataclass(frozen=True, slots=True)
24
+ class CandidateSignal:
25
+ """A confidence verdict on a mined fact, with the reasons that produced it.
26
+
27
+ Attributes:
28
+ confidence: The de-noising score in [0, 1].
29
+ reasons: The short reason codes that justify the score.
30
+ durable: Whether the signal should persist across re-derivation.
31
+ """
32
+
33
+ confidence: Confidence
34
+ reasons: tuple[str, ...] = ()
35
+ durable: bool = True
36
+
37
+
38
+ def strong(*reasons: str, durable: bool = True) -> CandidateSignal:
39
+ """Returns a :data:`HIGH`-confidence signal carrying ``reasons``."""
40
+ return CandidateSignal(HIGH, reasons, durable)
41
+
42
+
43
+ def firm(*reasons: str, durable: bool = True) -> CandidateSignal:
44
+ """Returns a :data:`MEDIUM`-confidence signal carrying ``reasons``."""
45
+ return CandidateSignal(MEDIUM, reasons, durable)
46
+
47
+
48
+ def weak(*reasons: str, durable: bool = True) -> CandidateSignal:
49
+ """Returns a :data:`LOW`-confidence signal carrying ``reasons``."""
50
+ return CandidateSignal(LOW, reasons, durable)
51
+
52
+
53
+ def noise(*reasons: str, durable: bool = True) -> CandidateSignal:
54
+ """Returns a :data:`NONE`-confidence signal carrying ``reasons``."""
55
+ return CandidateSignal(NONE, reasons, durable)
56
+
57
+
58
+ def effective_confidence(signal: CandidateSignal | None) -> Confidence:
59
+ """Returns ``signal``'s confidence, or :data:`MEDIUM` when no signal is set."""
60
+ return signal.confidence if signal else MEDIUM
61
+
62
+
63
+ def to_payload(signal: CandidateSignal) -> dict[str, Any]:
64
+ return {"confidence": signal.confidence, "reasons": list(signal.reasons), "durable": signal.durable}
65
+
66
+
67
+ def from_payload(data: Mapping[str, Any] | None) -> CandidateSignal | None:
68
+ if data is None:
69
+ return None
70
+ return CandidateSignal(
71
+ confidence=Confidence(data["confidence"]),
72
+ reasons=tuple(data["reasons"]),
73
+ durable=data["durable"],
74
+ )
@@ -0,0 +1,140 @@
1
+ """The conversational-window primitive captured around each piece of feedback."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import json
6
+ from dataclasses import dataclass
7
+ from typing import TYPE_CHECKING, Literal
8
+
9
+ from cc_transcript.models import AssistantEvent, ToolUseBlock, UserEvent
10
+
11
+ if TYPE_CHECKING:
12
+ from collections.abc import Mapping, Sequence
13
+ from typing import Any
14
+
15
+ from cc_transcript.models import TranscriptEvent
16
+
17
+ ASSISTANT_TEXT_LIMIT = 2000
18
+
19
+
20
+ @dataclass(frozen=True, slots=True)
21
+ class ContextTurn:
22
+ """One conversational turn surrounding a piece of feedback.
23
+
24
+ Attributes:
25
+ role: Whether the turn came from the user, the assistant, or a tool.
26
+ text: The turn's text content.
27
+ tool_calls: The names of the tools the turn invoked, in order.
28
+ """
29
+
30
+ role: Literal["user", "assistant", "tool"]
31
+ text: str
32
+ tool_calls: tuple[str, ...] = ()
33
+
34
+
35
+ @dataclass(frozen=True, slots=True)
36
+ class ContextSnapshot:
37
+ """The conversational window around a piece of feedback.
38
+
39
+ Attributes:
40
+ before: The turns leading up to the trigger.
41
+ trigger: The assistant action the feedback responds to, when known.
42
+ after: The turns following the trigger.
43
+ """
44
+
45
+ before: tuple[ContextTurn, ...]
46
+ trigger: ContextTurn | None
47
+ after: tuple[ContextTurn, ...]
48
+
49
+ def to_json(self) -> str:
50
+ """Serializes the snapshot to the JSON stored in ``context_json``."""
51
+ return json.dumps(
52
+ {
53
+ "before": [turn_to_dict(turn) for turn in self.before],
54
+ "trigger": turn_to_dict(self.trigger) if self.trigger else None,
55
+ "after": [turn_to_dict(turn) for turn in self.after],
56
+ }
57
+ )
58
+
59
+ @classmethod
60
+ def from_json(cls, raw: str) -> ContextSnapshot:
61
+ """Deserializes a snapshot from a ``context_json`` string."""
62
+ data = json.loads(raw)
63
+ return cls(
64
+ before=tuple(turn_from_dict(turn) for turn in data["before"]),
65
+ trigger=turn_from_dict(data["trigger"]) if data["trigger"] else None,
66
+ after=tuple(turn_from_dict(turn) for turn in data["after"]),
67
+ )
68
+
69
+
70
+ def turn_to_dict(turn: ContextTurn) -> dict[str, Any]:
71
+ return {"role": turn.role, "text": turn.text, "tool_calls": list(turn.tool_calls)}
72
+
73
+
74
+ def turn_from_dict(data: Mapping[str, Any]) -> ContextTurn:
75
+ return ContextTurn(role=data["role"], text=data["text"], tool_calls=tuple(data["tool_calls"]))
76
+
77
+
78
+ def turn_for(event: UserEvent | AssistantEvent) -> ContextTurn:
79
+ match event:
80
+ case UserEvent():
81
+ return ContextTurn(role="user", text=event.text)
82
+ case AssistantEvent():
83
+ return ContextTurn(
84
+ role="assistant",
85
+ text=event.text[:ASSISTANT_TEXT_LIMIT],
86
+ tool_calls=tuple(block.name for block in event.blocks if isinstance(block, ToolUseBlock)),
87
+ )
88
+
89
+
90
+ def trigger_for(events: Sequence[TranscriptEvent], index: int, lower: int) -> ContextTurn | None:
91
+ return next(
92
+ (
93
+ turn_for(event)
94
+ for i in range(index - 1, lower - 1, -1)
95
+ if isinstance(event := events[i], AssistantEvent)
96
+ ),
97
+ None,
98
+ )
99
+
100
+
101
+ def build_snapshot(
102
+ events: Sequence[TranscriptEvent],
103
+ index: int,
104
+ *,
105
+ before: int = 6,
106
+ after: int = 2,
107
+ lower_bound: int | None = None,
108
+ ) -> ContextSnapshot:
109
+ """Builds the conversational window around the event at ``index``.
110
+
111
+ A turn is a :class:`UserEvent` or :class:`AssistantEvent`; system, mode, and
112
+ other events are skipped. The trigger is the nearest preceding assistant
113
+ turn — the action the feedback responds to.
114
+
115
+ Args:
116
+ events: The full ordered event stream for one transcript.
117
+ index: The index of the event the feedback was attached to.
118
+ before: The maximum number of turns to capture before the trigger.
119
+ after: The maximum number of turns to capture after the index.
120
+ lower_bound: When set, an event index the ``before`` window and trigger
121
+ search may not reach back past — used to anchor plan-review context
122
+ to the triggering edit cycle.
123
+
124
+ Returns:
125
+ The assembled :class:`ContextSnapshot`.
126
+ """
127
+ lower = lower_bound if lower_bound is not None else 0
128
+ return ContextSnapshot(
129
+ before=tuple(
130
+ turn_for(event)
131
+ for i in range(index - 1, lower - 1, -1)
132
+ if isinstance(event := events[i], UserEvent | AssistantEvent)
133
+ )[:before][::-1],
134
+ trigger=trigger_for(events, index, lower),
135
+ after=tuple(
136
+ turn_for(event)
137
+ for i in range(index + 1, len(events))
138
+ if isinstance(event := events[i], UserEvent | AssistantEvent)
139
+ )[:after],
140
+ )
@@ -0,0 +1,64 @@
1
+ """Generic infrastructure for parsing structured code-review messages.
2
+
3
+ The concrete review formats are app policy; an app injects its own
4
+ :class:`ReviewFormat` sequence into :func:`extract_all`.
5
+ """
6
+
7
+ from __future__ import annotations
8
+
9
+ from dataclasses import dataclass
10
+ from typing import TYPE_CHECKING
11
+
12
+ if TYPE_CHECKING:
13
+ import re
14
+ from collections.abc import Callable, Iterator, Sequence
15
+
16
+
17
+ @dataclass(frozen=True, slots=True)
18
+ class ReviewComment:
19
+ """A single inline review comment parsed from a code-review message.
20
+
21
+ Attributes:
22
+ file: The file the comment targets, when cited.
23
+ line_start: The first line the comment targets, when cited.
24
+ line_end: The last line the comment targets, when a range is cited.
25
+ comment: The comment's text.
26
+ """
27
+
28
+ file: str | None
29
+ line_start: int | None
30
+ line_end: int | None
31
+ comment: str
32
+
33
+
34
+ @dataclass(frozen=True, slots=True)
35
+ class ReviewFormat:
36
+ """A named code-review text format with a detector and extractor.
37
+
38
+ Attributes:
39
+ name: The format's identifier.
40
+ pattern: A pattern that matches when the format is present in a text.
41
+ extract: Parses a matching text into its review comments.
42
+ """
43
+
44
+ name: str
45
+ pattern: re.Pattern[str]
46
+ extract: Callable[[str], tuple[ReviewComment, ...]]
47
+
48
+
49
+ def extract_all(text: str, formats: Sequence[ReviewFormat]) -> Iterator[tuple[ReviewFormat, ReviewComment]]:
50
+ """Yields every ``(format, comment)`` extracted by any matching format.
51
+
52
+ Args:
53
+ text: The raw review message text.
54
+ formats: The review formats to try, in order.
55
+
56
+ Yields:
57
+ One pair per extracted comment, across all formats whose pattern matches.
58
+ """
59
+ return (
60
+ (fmt, comment)
61
+ for fmt in formats
62
+ if fmt.pattern.search(text)
63
+ for comment in fmt.extract(text)
64
+ )
@@ -0,0 +1,11 @@
1
+ """Claude Code transcript marker constants the mining fact-detectors recognize."""
2
+
3
+ from __future__ import annotations
4
+
5
+ from cc_transcript import INTERRUPT_MARKER_RE as INTERRUPT_MARKER_RE
6
+
7
+ DENIAL_PREFIX = "The user doesn't want to proceed with this tool use. The tool use was rejected"
8
+ USER_SAID_MARKER = "To tell you how to proceed, the user said:\n"
9
+ USER_SAID_TRAILER = "Note: The user's next message"
10
+ EDIT_TOOLS = frozenset({"Edit", "Write", "MultiEdit", "NotebookEdit"})
11
+ REENTRY_LOOKBACK = 40
@@ -0,0 +1,100 @@
1
+ """Pure navigation helpers over a transcript's ordered events."""
2
+
3
+ from __future__ import annotations
4
+
5
+ from typing import TYPE_CHECKING
6
+
7
+ from cc_transcript.models import AssistantEvent, ToolResultBlock, ToolUseBlock, UserEvent
8
+
9
+ from cc_transcript.domains.mining.markers import (
10
+ DENIAL_PREFIX,
11
+ EDIT_TOOLS,
12
+ INTERRUPT_MARKER_RE,
13
+ REENTRY_LOOKBACK,
14
+ USER_SAID_MARKER,
15
+ USER_SAID_TRAILER,
16
+ )
17
+
18
+ if TYPE_CHECKING:
19
+ from collections.abc import Iterator, Sequence
20
+ from typing import Any
21
+
22
+ from cc_transcript.models import ToolUseId, TranscriptEvent
23
+
24
+
25
+ def tool_uses(events: Sequence[TranscriptEvent]) -> dict[ToolUseId, ToolUseBlock]:
26
+ return {
27
+ block.id: block
28
+ for event in events
29
+ if isinstance(event, AssistantEvent)
30
+ for block in event.blocks
31
+ if isinstance(block, ToolUseBlock)
32
+ }
33
+
34
+
35
+ def denial_results(event: UserEvent) -> Iterator[ToolResultBlock]:
36
+ return (
37
+ block
38
+ for block in event.blocks
39
+ if isinstance(block, ToolResultBlock)
40
+ if block.is_error
41
+ if block.content.startswith(DENIAL_PREFIX)
42
+ )
43
+
44
+
45
+ def embedded_user_text(content: str) -> str | None:
46
+ if (start := content.find(USER_SAID_MARKER)) == -1:
47
+ return None
48
+ return content[start + len(USER_SAID_MARKER) :].split(USER_SAID_TRAILER, 1)[0].strip()
49
+
50
+
51
+ def last_edit_index(events: Sequence[TranscriptEvent], index: int) -> int | None:
52
+ return next(
53
+ (
54
+ i
55
+ for i in range(index - 1, max(index - REENTRY_LOOKBACK, 0) - 1, -1)
56
+ if isinstance(event := events[i], AssistantEvent)
57
+ if any(isinstance(b, ToolUseBlock) and b.name in EDIT_TOOLS for b in event.blocks)
58
+ ),
59
+ None,
60
+ )
61
+
62
+
63
+ def next_user_message(events: Sequence[TranscriptEvent], index: int) -> tuple[int, UserEvent] | None:
64
+ return next(
65
+ (
66
+ (i, event)
67
+ for i in range(index, len(events))
68
+ if isinstance(event := events[i], UserEvent)
69
+ if event.text.strip()
70
+ ),
71
+ None,
72
+ )
73
+
74
+
75
+ def denied_tool_payload(use: ToolUseBlock) -> dict[str, Any]:
76
+ return {"tool": use.name, "file_path": use.input.get("file_path")}
77
+
78
+
79
+ def interrupt_marker(content: str) -> str | None:
80
+ stripped = content.lstrip()
81
+ if (match := INTERRUPT_MARKER_RE.match(stripped)) is None:
82
+ return None
83
+ end = stripped.find("]")
84
+ return stripped[: end + 1] if end != -1 else match.group(0)
85
+
86
+
87
+ def is_bare_interrupt_marker(text: str) -> bool:
88
+ return (marker := interrupt_marker(text)) is not None and not text.strip()[len(marker.strip()) :].strip()
89
+
90
+
91
+ def marker_in(event: UserEvent) -> str | None:
92
+ return next(
93
+ (
94
+ marker
95
+ for block in event.blocks
96
+ if isinstance(block, ToolResultBlock)
97
+ if (marker := interrupt_marker(block.content)) is not None
98
+ ),
99
+ None,
100
+ )