cc-transcript 0.4.0__tar.gz → 0.5.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {cc_transcript-0.4.0 → cc_transcript-0.5.0}/Cargo.lock +1 -1
- {cc_transcript-0.4.0 → cc_transcript-0.5.0}/PKG-INFO +4 -3
- {cc_transcript-0.4.0 → cc_transcript-0.5.0}/README.md +1 -1
- {cc_transcript-0.4.0 → cc_transcript-0.5.0}/cc_transcript/__init__.py +1 -1
- {cc_transcript-0.4.0 → cc_transcript-0.5.0}/cc_transcript/discovery.py +23 -16
- {cc_transcript-0.4.0 → cc_transcript-0.5.0}/cc_transcript/filterspec.py +20 -0
- {cc_transcript-0.4.0 → cc_transcript-0.5.0}/cc_transcript/parser.py +19 -6
- {cc_transcript-0.4.0 → cc_transcript-0.5.0}/cc_transcript/sentiment/buckets.py +11 -0
- {cc_transcript-0.4.0 → cc_transcript-0.5.0}/cc_transcript/sentiment/lexicon.py +18 -0
- {cc_transcript-0.4.0 → cc_transcript-0.5.0}/cc_transcript/sentiment/messages.py +6 -0
- {cc_transcript-0.4.0 → cc_transcript-0.5.0}/cc_transcript/sentiment/scorespec.py +5 -0
- cc_transcript-0.5.0/cc_transcript/store.py +130 -0
- {cc_transcript-0.4.0 → cc_transcript-0.5.0}/pyproject.toml +25 -4
- {cc_transcript-0.4.0 → cc_transcript-0.5.0}/rust/Cargo.toml +1 -1
- {cc_transcript-0.4.0 → cc_transcript-0.5.0}/rust/src/lib.rs +34 -16
- cc_transcript-0.4.0/cc_transcript/store.py +0 -118
- {cc_transcript-0.4.0 → cc_transcript-0.5.0}/Cargo.toml +0 -0
- {cc_transcript-0.4.0 → cc_transcript-0.5.0}/LICENSE +0 -0
- {cc_transcript-0.4.0 → cc_transcript-0.5.0}/cc_transcript/_parser_rs.pyi +0 -0
- {cc_transcript-0.4.0 → cc_transcript-0.5.0}/cc_transcript/backend.py +0 -0
- {cc_transcript-0.4.0 → cc_transcript-0.5.0}/cc_transcript/builders.py +0 -0
- {cc_transcript-0.4.0 → cc_transcript-0.5.0}/cc_transcript/filters.py +0 -0
- {cc_transcript-0.4.0 → cc_transcript-0.5.0}/cc_transcript/models.py +0 -0
- {cc_transcript-0.4.0 → cc_transcript-0.5.0}/cc_transcript/py.typed +0 -0
- {cc_transcript-0.4.0 → cc_transcript-0.5.0}/cc_transcript/rust.py +0 -0
- {cc_transcript-0.4.0 → cc_transcript-0.5.0}/cc_transcript/sentiment/__init__.py +0 -0
- {cc_transcript-0.4.0 → cc_transcript-0.5.0}/cc_transcript/sentiment/engine.py +0 -0
- {cc_transcript-0.4.0 → cc_transcript-0.5.0}/rust/data/afinn-en-165.tsv +0 -0
- {cc_transcript-0.4.0 → cc_transcript-0.5.0}/rust/data/domain_overrides.tsv +0 -0
- {cc_transcript-0.4.0 → cc_transcript-0.5.0}/rust/src/event.rs +0 -0
- {cc_transcript-0.4.0 → cc_transcript-0.5.0}/rust/src/filter.rs +0 -0
- {cc_transcript-0.4.0 → cc_transcript-0.5.0}/rust/src/lexicon.rs +0 -0
- {cc_transcript-0.4.0 → cc_transcript-0.5.0}/rust/src/model.rs +0 -0
- {cc_transcript-0.4.0 → cc_transcript-0.5.0}/rust/src/score.rs +0 -0
- {cc_transcript-0.4.0 → cc_transcript-0.5.0}/rust/src/value.rs +0 -0
|
@@ -1,16 +1,17 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: cc-transcript
|
|
3
|
-
Version: 0.
|
|
3
|
+
Version: 0.5.0
|
|
4
4
|
Classifier: Development Status :: 3 - Alpha
|
|
5
5
|
Classifier: Intended Audience :: Developers
|
|
6
6
|
Classifier: Operating System :: OS Independent
|
|
7
7
|
Classifier: Programming Language :: Python :: 3
|
|
8
8
|
Classifier: Programming Language :: Python :: 3 :: Only
|
|
9
9
|
Classifier: Typing :: Typed
|
|
10
|
+
Requires-Dist: aiosqlite>=0.20
|
|
10
11
|
Requires-Dist: anyio>=4.4
|
|
11
12
|
Requires-Dist: orjson>=3.10
|
|
12
13
|
Requires-Dist: pytest>=8.0 ; extra == 'dev'
|
|
13
|
-
Requires-Dist:
|
|
14
|
+
Requires-Dist: ty>=0.0.44 ; extra == 'dev'
|
|
14
15
|
Requires-Dist: ruff>=0.8 ; extra == 'dev'
|
|
15
16
|
Requires-Dist: spacy>=3.8 ; extra == 'lexicon'
|
|
16
17
|
Requires-Dist: afinn>=0.1 ; extra == 'lexicon'
|
|
@@ -86,5 +87,5 @@ available — every rule is off by default, so a bare `FilterConfig()` passes ev
|
|
|
86
87
|
|
|
87
88
|
## Docs
|
|
88
89
|
|
|
89
|
-
[Read the docs](https://yasyf.github.io/cc-transcript/) for the full
|
|
90
|
+
[Read the docs](https://yasyf.github.io/cc-transcript/) for the full guides — Getting Started, Filtering events, Scoring sentiment, Rust/Python backends & parity, and Compose your own policy — plus the complete API reference.
|
|
90
91
|
|
|
@@ -55,4 +55,4 @@ available — every rule is off by default, so a bare `FilterConfig()` passes ev
|
|
|
55
55
|
|
|
56
56
|
## Docs
|
|
57
57
|
|
|
58
|
-
[Read the docs](https://yasyf.github.io/cc-transcript/) for the full
|
|
58
|
+
[Read the docs](https://yasyf.github.io/cc-transcript/) for the full guides — Getting Started, Filtering events, Scoring sentiment, Rust/Python backends & parity, and Compose your own policy — plus the complete API reference.
|
|
@@ -56,5 +56,5 @@ from cc_transcript.models import (
|
|
|
56
56
|
TranscriptEvent,
|
|
57
57
|
UserEvent,
|
|
58
58
|
)
|
|
59
|
-
from cc_transcript.parser import TranscriptParser,
|
|
59
|
+
from cc_transcript.parser import TranscriptParser, parse_events_async, parse_events_from_bytes
|
|
60
60
|
from cc_transcript.store import FileStateStore
|
|
@@ -2,6 +2,8 @@ from __future__ import annotations
|
|
|
2
2
|
|
|
3
3
|
from pathlib import Path
|
|
4
4
|
|
|
5
|
+
import anyio
|
|
6
|
+
|
|
5
7
|
CLAUDE_PROJECTS_DIR = Path.home() / ".claude" / "projects"
|
|
6
8
|
|
|
7
9
|
|
|
@@ -14,26 +16,27 @@ class TranscriptDiscovery:
|
|
|
14
16
|
"""
|
|
15
17
|
|
|
16
18
|
@staticmethod
|
|
17
|
-
def find_transcripts() -> list[Path]:
|
|
19
|
+
async def find_transcripts() -> list[Path]:
|
|
18
20
|
"""Returns every transcript under the projects directory, sorted."""
|
|
19
|
-
|
|
21
|
+
root = anyio.Path(CLAUDE_PROJECTS_DIR)
|
|
22
|
+
if not await root.exists():
|
|
20
23
|
return []
|
|
21
|
-
return sorted(
|
|
24
|
+
return sorted([Path(p) async for p in root.rglob("*.jsonl")])
|
|
22
25
|
|
|
23
26
|
@staticmethod
|
|
24
|
-
def stat_mtime(path: Path) -> float | None:
|
|
27
|
+
async def stat_mtime(path: Path) -> float | None:
|
|
25
28
|
try:
|
|
26
|
-
return path.stat().st_mtime
|
|
29
|
+
return (await anyio.Path(path).stat()).st_mtime
|
|
27
30
|
except OSError:
|
|
28
31
|
return None
|
|
29
32
|
|
|
30
33
|
@staticmethod
|
|
31
|
-
def transcript_mtime(path: Path) -> float:
|
|
34
|
+
async def transcript_mtime(path: Path) -> float:
|
|
32
35
|
"""Returns ``path``'s modification time, raising if it cannot be read."""
|
|
33
|
-
return path.stat().st_mtime
|
|
36
|
+
return (await anyio.Path(path).stat()).st_mtime
|
|
34
37
|
|
|
35
38
|
@staticmethod
|
|
36
|
-
def find_in(
|
|
39
|
+
async def find_in(
|
|
37
40
|
directory: Path,
|
|
38
41
|
*,
|
|
39
42
|
name_contains: str | None = None,
|
|
@@ -52,14 +55,18 @@ class TranscriptDiscovery:
|
|
|
52
55
|
Returns:
|
|
53
56
|
Pairs of ``(path, mtime)`` sorted by path.
|
|
54
57
|
"""
|
|
55
|
-
|
|
58
|
+
root = anyio.Path(directory)
|
|
59
|
+
if not await root.exists():
|
|
56
60
|
return []
|
|
57
|
-
found = [
|
|
58
|
-
|
|
59
|
-
|
|
60
|
-
|
|
61
|
-
|
|
62
|
-
if
|
|
63
|
-
|
|
61
|
+
found: list[tuple[Path, float]] = []
|
|
62
|
+
async for entry in root.rglob("*.jsonl"):
|
|
63
|
+
if name_contains and name_contains not in entry.name:
|
|
64
|
+
continue
|
|
65
|
+
path = Path(entry)
|
|
66
|
+
if (mtime := await TranscriptDiscovery.stat_mtime(path)) is None:
|
|
67
|
+
continue
|
|
68
|
+
if known_mtimes is not None and (prev := known_mtimes.get(str(path))) is not None and prev >= mtime:
|
|
69
|
+
continue
|
|
70
|
+
found.append((path, mtime))
|
|
64
71
|
found.sort(key=lambda e: e[0])
|
|
65
72
|
return found[:limit] if limit is not None else found
|
|
@@ -63,11 +63,28 @@ STRUCTURAL_GROUPS: tuple[tuple[str, str], ...] = (
|
|
|
63
63
|
AGENT_INJECTION_GROUPS: tuple[tuple[str, str], ...] = (
|
|
64
64
|
("xml_tags_extra", r"<(?:teammate-message|scheduled-task)\b"),
|
|
65
65
|
("augment_agent", r"^# Augment Agent\b"),
|
|
66
|
+
("role_reminder", r"^\s*\[Role Reminder\b"),
|
|
66
67
|
)
|
|
67
68
|
|
|
68
69
|
INTERRUPT_MARKER_GROUPS: tuple[tuple[str, str], ...] = (("interrupt", r"\[Request interrupted by user"),)
|
|
69
70
|
STOP_HOOK_GROUPS: tuple[tuple[str, str], ...] = (("stop_hook", r"Stop hook feedback:"),)
|
|
70
71
|
|
|
72
|
+
# Approve-and-advance directives: a user telling the agent to proceed/commit/push or
|
|
73
|
+
# to resume killed work. They follow an assistant turn but advance it rather than
|
|
74
|
+
# correcting it — the opposite of pushback — so a pushback consumer drops them. The
|
|
75
|
+
# approve-and-advance arm is start-anchored so a mid-sentence "commit"/"push" inside
|
|
76
|
+
# a real correction never matches; only the resume arm searches anywhere.
|
|
77
|
+
CONTINUATION_GROUPS: tuple[tuple[str, str], ...] = (
|
|
78
|
+
(
|
|
79
|
+
"continuation",
|
|
80
|
+
r"^\s*(?:(?:yea+h?|yep|yup|sure|ok(?:ay)?|sounds good|looks good|lgtm|perfect)[\s,.!]+){0,2}"
|
|
81
|
+
r"(?:go ahead\b|(?:go ahead and\s+)?(?:commit|push|rebase|merge|deploy)\b"
|
|
82
|
+
r"|ship it\b|cut (?:a |the )?(?:new )?release\b|proceed\b)"
|
|
83
|
+
r"|\byou must resume\b"
|
|
84
|
+
r"|\b(?:resume|restart) (?:them|it|the (?:sub-?agents?|workflows?|agents?|tasks?))\b",
|
|
85
|
+
),
|
|
86
|
+
)
|
|
87
|
+
|
|
71
88
|
# Named junk categories a consumer composes via ``drop_junk(...)``. Interrupt and
|
|
72
89
|
# stop-hook are kept separate because they carry pushback and must never be folded
|
|
73
90
|
# into the structural-noise default.
|
|
@@ -76,6 +93,7 @@ JUNK_CATEGORIES: dict[str, tuple[tuple[str, str], ...]] = {
|
|
|
76
93
|
"agent_injection": AGENT_INJECTION_GROUPS,
|
|
77
94
|
"interrupt": INTERRUPT_MARKER_GROUPS,
|
|
78
95
|
"stop_hook": STOP_HOOK_GROUPS,
|
|
96
|
+
"continuation": CONTINUATION_GROUPS,
|
|
79
97
|
}
|
|
80
98
|
|
|
81
99
|
# The superset of structural noise (structural ∪ agent-injection), WITHOUT
|
|
@@ -125,6 +143,7 @@ PORTABLE_GROUP_NAMES: frozenset[str] = frozenset(
|
|
|
125
143
|
*STRUCTURAL_NOISE_GROUPS,
|
|
126
144
|
*INTERRUPT_MARKER_GROUPS,
|
|
127
145
|
*STOP_HOOK_GROUPS,
|
|
146
|
+
*CONTINUATION_GROUPS,
|
|
128
147
|
*FRUSTRATION_GROUPS,
|
|
129
148
|
*MILD_IMPATIENCE_GROUPS,
|
|
130
149
|
)
|
|
@@ -398,6 +417,7 @@ def keep(event: TranscriptEvent, spec: FilterSpec) -> bool:
|
|
|
398
417
|
|
|
399
418
|
|
|
400
419
|
def labels_for(event: TranscriptEvent, spec: FilterSpec) -> tuple[str, ...]:
|
|
420
|
+
"""Returns the TAG labels ``spec`` records for ``event``, in clause order."""
|
|
401
421
|
kind = event_kind(event)
|
|
402
422
|
return tuple(
|
|
403
423
|
clause.label
|
|
@@ -1,6 +1,8 @@
|
|
|
1
1
|
from __future__ import annotations
|
|
2
2
|
|
|
3
|
+
import asyncio
|
|
3
4
|
import os
|
|
5
|
+
from contextlib import suppress
|
|
4
6
|
from datetime import datetime
|
|
5
7
|
from typing import TYPE_CHECKING, Any, ClassVar, Literal
|
|
6
8
|
|
|
@@ -144,12 +146,12 @@ def decode_line(line: bytes) -> TranscriptEvent | None:
|
|
|
144
146
|
return build_event(data)
|
|
145
147
|
|
|
146
148
|
|
|
147
|
-
def
|
|
148
|
-
return parse_events_from_bytes(path.read_bytes())
|
|
149
|
+
async def parse_events_async(path: Path) -> list[TranscriptEvent]:
|
|
150
|
+
return parse_events_from_bytes(await anyio.Path(path).read_bytes())
|
|
149
151
|
|
|
150
152
|
|
|
151
153
|
def parse_one(path: Path, mtime: float) -> ParsedTranscript:
|
|
152
|
-
return ParsedTranscript(path=path, mtime=mtime, events=tuple(
|
|
154
|
+
return ParsedTranscript(path=path, mtime=mtime, events=tuple(parse_events_from_bytes(path.read_bytes())))
|
|
153
155
|
|
|
154
156
|
|
|
155
157
|
def parse_one_filtered(path: Path, mtime: float, spec: FilterSpec | None) -> ParsedTranscript:
|
|
@@ -192,7 +194,14 @@ class PythonBackend:
|
|
|
192
194
|
|
|
193
195
|
async def worker(path: Path, mtime: float) -> None:
|
|
194
196
|
async with limiter:
|
|
195
|
-
|
|
197
|
+
try:
|
|
198
|
+
parsed = await anyio.to_thread.run_sync(parse_one_filtered, path, mtime, spec)
|
|
199
|
+
except (OSError, ValueError, KeyError):
|
|
200
|
+
return
|
|
201
|
+
try:
|
|
202
|
+
await send_ch.send(parsed)
|
|
203
|
+
except anyio.BrokenResourceError:
|
|
204
|
+
return
|
|
196
205
|
|
|
197
206
|
async def drive() -> None:
|
|
198
207
|
try:
|
|
@@ -202,11 +211,15 @@ class PythonBackend:
|
|
|
202
211
|
finally:
|
|
203
212
|
await send_ch.aclose()
|
|
204
213
|
|
|
205
|
-
|
|
206
|
-
|
|
214
|
+
driver = asyncio.ensure_future(drive())
|
|
215
|
+
try:
|
|
207
216
|
async with recv_ch:
|
|
208
217
|
async for parsed in recv_ch:
|
|
209
218
|
yield parsed
|
|
219
|
+
finally:
|
|
220
|
+
driver.cancel()
|
|
221
|
+
with suppress(asyncio.CancelledError):
|
|
222
|
+
await driver
|
|
210
223
|
|
|
211
224
|
|
|
212
225
|
class TranscriptParser:
|
|
@@ -16,6 +16,8 @@ MIN_USER_CHARS = 5
|
|
|
16
16
|
|
|
17
17
|
|
|
18
18
|
class ConversationBucket(NamedTuple):
|
|
19
|
+
"""A session's messages grouped into one fixed-width time window — the unit that gets scored."""
|
|
20
|
+
|
|
19
21
|
session_id: SessionId
|
|
20
22
|
bucket_index: BucketIndex
|
|
21
23
|
bucket_start: datetime
|
|
@@ -23,11 +25,19 @@ class ConversationBucket(NamedTuple):
|
|
|
23
25
|
|
|
24
26
|
|
|
25
27
|
class BucketKey(NamedTuple):
|
|
28
|
+
"""Stable identity of a :class:`ConversationBucket`: its session and bucket index."""
|
|
29
|
+
|
|
26
30
|
session_id: SessionId
|
|
27
31
|
bucket_index: BucketIndex
|
|
28
32
|
|
|
29
33
|
|
|
30
34
|
class ConversationBucketer:
|
|
35
|
+
"""Groups transcript messages into per-session, time-aligned buckets worth scoring.
|
|
36
|
+
|
|
37
|
+
Sessions below ``MIN_USER_TURNS_PER_SESSION`` and windows lacking a substantive user turn or
|
|
38
|
+
any assistant turn are dropped.
|
|
39
|
+
"""
|
|
40
|
+
|
|
31
41
|
@staticmethod
|
|
32
42
|
def align_to_bucket(ts: datetime) -> datetime:
|
|
33
43
|
return ts.replace(
|
|
@@ -73,6 +83,7 @@ class ConversationBucketer:
|
|
|
73
83
|
|
|
74
84
|
|
|
75
85
|
def extract_bucket_keys(messages: list[TranscriptMessage]) -> list[BucketKey]:
|
|
86
|
+
"""Returns the :class:`BucketKey` of every scorable bucket in ``messages``."""
|
|
76
87
|
return [
|
|
77
88
|
BucketKey(session_id=b.session_id, bucket_index=b.bucket_index)
|
|
78
89
|
for b in ConversationBucketer.bucket_messages(messages)
|
|
@@ -38,6 +38,13 @@ def rust_lexicon() -> ModuleType | None:
|
|
|
38
38
|
|
|
39
39
|
|
|
40
40
|
class Lexicon:
|
|
41
|
+
"""Token-polarity lookup: AFINN base scores layered with coding-domain overrides.
|
|
42
|
+
|
|
43
|
+
``DOMAIN_OVERRIDES`` pins context-specific terms (``stop``, ``broken``, ``ship``) that
|
|
44
|
+
AFINN mis-scores, and magnitudes below ``MIN_MAGNITUDE`` collapse to neutral. Backs the
|
|
45
|
+
lexicon-bearing score stages through :meth:`has_hit`.
|
|
46
|
+
"""
|
|
47
|
+
|
|
41
48
|
DOMAIN_OVERRIDES: ClassVar[dict[str, int]] = {
|
|
42
49
|
"stop": -3,
|
|
43
50
|
"halt": -3,
|
|
@@ -104,6 +111,11 @@ class Lexicon:
|
|
|
104
111
|
|
|
105
112
|
@classmethod
|
|
106
113
|
def polarity(cls, lemma: str) -> int:
|
|
114
|
+
"""The signed polarity of ``lemma``.
|
|
115
|
+
|
|
116
|
+
A domain override when present, else its AFINN score zeroed below
|
|
117
|
+
``MIN_MAGNITUDE``.
|
|
118
|
+
"""
|
|
107
119
|
lower = lemma.lower()
|
|
108
120
|
if (override := cls.DOMAIN_OVERRIDES.get(lower)) is not None:
|
|
109
121
|
return override
|
|
@@ -129,6 +141,12 @@ class Lexicon:
|
|
|
129
141
|
|
|
130
142
|
|
|
131
143
|
class NLP:
|
|
144
|
+
"""Lazy loader for the spaCy ``en_core_web_sm`` model used to lemmatize text.
|
|
145
|
+
|
|
146
|
+
Loads from the user spaCy cache, downloading the model on first use; on failure it records
|
|
147
|
+
the diagnostic and disables itself so the lexicon path fails open.
|
|
148
|
+
"""
|
|
149
|
+
|
|
132
150
|
model: ClassVar[spacy.language.Language | None] = None
|
|
133
151
|
failed: ClassVar[bool] = False
|
|
134
152
|
last_download_output: ClassVar[str | None] = None
|
|
@@ -7,11 +7,15 @@ from cc_transcript.models import SessionId
|
|
|
7
7
|
|
|
8
8
|
|
|
9
9
|
class ToolCall(NamedTuple):
|
|
10
|
+
"""A single tool invocation within a message: the tool ``name`` and optional target file path."""
|
|
11
|
+
|
|
10
12
|
name: str
|
|
11
13
|
file_path: str | None = None
|
|
12
14
|
|
|
13
15
|
|
|
14
16
|
class UserMessage(NamedTuple):
|
|
17
|
+
"""A user turn distilled for bucketing: its text, tool calls, and authoring metadata."""
|
|
18
|
+
|
|
15
19
|
content: str
|
|
16
20
|
timestamp: datetime
|
|
17
21
|
session_id: SessionId
|
|
@@ -23,6 +27,8 @@ class UserMessage(NamedTuple):
|
|
|
23
27
|
|
|
24
28
|
|
|
25
29
|
class AssistantMessage(NamedTuple):
|
|
30
|
+
"""An assistant turn distilled for bucketing: its text, tool calls, and responding model."""
|
|
31
|
+
|
|
26
32
|
content: str
|
|
27
33
|
timestamp: datetime
|
|
28
34
|
session_id: SessionId
|
|
@@ -91,24 +91,29 @@ class ScoreSpec:
|
|
|
91
91
|
|
|
92
92
|
|
|
93
93
|
def flag_frustration(*, score: int = 1) -> FrustrationShortCircuit:
|
|
94
|
+
"""Composes the short-circuit stage that pins a frustrated message to ``score`` before inference."""
|
|
94
95
|
return FrustrationShortCircuit(groups=FRUSTRATION_GROUPS, score=score)
|
|
95
96
|
|
|
96
97
|
|
|
97
98
|
def clamp_positive(*, floor: int = 3, max_words: int = SHORT_MESSAGE_MAX_WORDS) -> PositiveClamp:
|
|
99
|
+
"""Composes the post-process stage that lowers a top score on a short message lacking positive lexicon."""
|
|
98
100
|
return PositiveClamp(positive_floor=floor, max_words=max_words)
|
|
99
101
|
|
|
100
102
|
|
|
101
103
|
def demote_mild_irritation(*, floor: int = 3) -> MildIrritationDemote:
|
|
104
|
+
"""Composes the post-process stage that softens a non-hostile mild-impatience message off the floor score."""
|
|
102
105
|
return MildIrritationDemote(
|
|
103
106
|
trigger_groups=MILD_IMPATIENCE_GROUPS, hostile_groups=FRUSTRATION_GROUPS, hostile_floor=floor
|
|
104
107
|
)
|
|
105
108
|
|
|
106
109
|
|
|
107
110
|
def clamp_resume() -> ResumeClamp:
|
|
111
|
+
"""Composes the post-process stage that neutralizes a bare resume phrase to a middling score."""
|
|
108
112
|
return ResumeClamp(phrases=RESUME_PHRASE_SET)
|
|
109
113
|
|
|
110
114
|
|
|
111
115
|
def build_score_spec(*stages: ScoreStage) -> ScoreSpec:
|
|
116
|
+
"""Assembles ``stages`` into a :class:`ScoreSpec` for the engine to apply around inference."""
|
|
112
117
|
return ScoreSpec(stages=tuple(stages))
|
|
113
118
|
|
|
114
119
|
|
|
@@ -0,0 +1,130 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
from contextlib import asynccontextmanager
|
|
4
|
+
from typing import TYPE_CHECKING, Self
|
|
5
|
+
|
|
6
|
+
import aiosqlite
|
|
7
|
+
import anyio
|
|
8
|
+
|
|
9
|
+
if TYPE_CHECKING:
|
|
10
|
+
from collections.abc import AsyncIterator
|
|
11
|
+
from pathlib import Path
|
|
12
|
+
from types import TracebackType
|
|
13
|
+
|
|
14
|
+
FILE_SCHEMA = """
|
|
15
|
+
CREATE TABLE IF NOT EXISTS files (
|
|
16
|
+
path TEXT PRIMARY KEY,
|
|
17
|
+
mtime REAL NOT NULL
|
|
18
|
+
);
|
|
19
|
+
"""
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
class FileStateStore:
|
|
23
|
+
"""Tracks which transcript files have been ingested, keyed by mtime.
|
|
24
|
+
|
|
25
|
+
Backed by a single async SQLite (``aiosqlite``) database with WAL journaling
|
|
26
|
+
and a task lock, so it is safe to share one store across concurrent tasks.
|
|
27
|
+
Consumers compose their own writes alongside :meth:`record_file` inside
|
|
28
|
+
:meth:`transaction` to keep ingestion state and derived records atomic.
|
|
29
|
+
|
|
30
|
+
Example:
|
|
31
|
+
>>> store = await FileStateStore.open(Path("state.db"), extra_schema=MY_SCHEMA)
|
|
32
|
+
>>> async with store.transaction() as conn:
|
|
33
|
+
... await conn.execute("INSERT INTO my_table VALUES (?)", (value,))
|
|
34
|
+
... await store.record_file(str(path), mtime)
|
|
35
|
+
"""
|
|
36
|
+
|
|
37
|
+
def __init__(self, conn: aiosqlite.Connection) -> None:
|
|
38
|
+
self.conn = conn
|
|
39
|
+
self.lock = anyio.Lock()
|
|
40
|
+
self._txn_owner: int | None = None
|
|
41
|
+
|
|
42
|
+
@classmethod
|
|
43
|
+
async def open(cls, path: Path, *, extra_schema: str = "") -> Self:
|
|
44
|
+
"""Opens (creating if needed) the store at ``path``.
|
|
45
|
+
|
|
46
|
+
Args:
|
|
47
|
+
path: The database file path; its parent is created if absent.
|
|
48
|
+
extra_schema: Additional DDL to execute after the file schema,
|
|
49
|
+
e.g. consumer tables that reference ``files(path)``.
|
|
50
|
+
|
|
51
|
+
Returns:
|
|
52
|
+
The opened store.
|
|
53
|
+
"""
|
|
54
|
+
path.parent.mkdir(parents=True, exist_ok=True)
|
|
55
|
+
conn = await aiosqlite.connect(str(path), isolation_level=None)
|
|
56
|
+
conn.row_factory = aiosqlite.Row
|
|
57
|
+
await conn.execute("PRAGMA foreign_keys = ON")
|
|
58
|
+
await conn.execute("PRAGMA journal_mode = WAL")
|
|
59
|
+
await conn.executescript(FILE_SCHEMA + extra_schema)
|
|
60
|
+
return cls(conn)
|
|
61
|
+
|
|
62
|
+
async def close(self) -> None:
|
|
63
|
+
"""Closes the underlying connection."""
|
|
64
|
+
async with self.lock:
|
|
65
|
+
await self.conn.close()
|
|
66
|
+
|
|
67
|
+
async def __aenter__(self) -> Self:
|
|
68
|
+
return self
|
|
69
|
+
|
|
70
|
+
async def __aexit__(
|
|
71
|
+
self,
|
|
72
|
+
exc_type: type[BaseException] | None,
|
|
73
|
+
exc: BaseException | None,
|
|
74
|
+
tb: TracebackType | None,
|
|
75
|
+
) -> None:
|
|
76
|
+
await self.close()
|
|
77
|
+
|
|
78
|
+
@asynccontextmanager
|
|
79
|
+
async def transaction(self) -> AsyncIterator[aiosqlite.Connection]:
|
|
80
|
+
"""Yields the locked connection inside a single committed transaction.
|
|
81
|
+
|
|
82
|
+
Use this to compose consumer writes with :meth:`record_file` so they
|
|
83
|
+
commit or roll back together. :meth:`record_file` called within the
|
|
84
|
+
block joins this transaction instead of opening its own.
|
|
85
|
+
|
|
86
|
+
Yields:
|
|
87
|
+
The store's connection, held under the store lock.
|
|
88
|
+
"""
|
|
89
|
+
async with self.lock:
|
|
90
|
+
self._txn_owner = anyio.get_current_task().id
|
|
91
|
+
await self.conn.execute("BEGIN IMMEDIATE")
|
|
92
|
+
try:
|
|
93
|
+
yield self.conn
|
|
94
|
+
except BaseException:
|
|
95
|
+
await self.conn.rollback()
|
|
96
|
+
raise
|
|
97
|
+
else:
|
|
98
|
+
await self.conn.commit()
|
|
99
|
+
finally:
|
|
100
|
+
self._txn_owner = None
|
|
101
|
+
|
|
102
|
+
async def file_mtimes(self) -> dict[str, float]:
|
|
103
|
+
"""Returns the recorded ``path`` to ``mtime`` map."""
|
|
104
|
+
async with self.lock, self.conn.execute("SELECT path, mtime FROM files") as cur:
|
|
105
|
+
return {row["path"]: row["mtime"] async for row in cur}
|
|
106
|
+
|
|
107
|
+
async def record_file(self, path: str, mtime: float) -> None:
|
|
108
|
+
"""Upserts the recorded mtime for ``path``.
|
|
109
|
+
|
|
110
|
+
Call inside :meth:`transaction` to commit alongside consumer writes;
|
|
111
|
+
called on its own it commits immediately.
|
|
112
|
+
"""
|
|
113
|
+
if self._txn_owner == anyio.get_current_task().id:
|
|
114
|
+
await self.upsert_file(path, mtime)
|
|
115
|
+
return
|
|
116
|
+
async with self.lock:
|
|
117
|
+
await self.conn.execute("BEGIN IMMEDIATE")
|
|
118
|
+
try:
|
|
119
|
+
await self.upsert_file(path, mtime)
|
|
120
|
+
except BaseException:
|
|
121
|
+
await self.conn.rollback()
|
|
122
|
+
raise
|
|
123
|
+
else:
|
|
124
|
+
await self.conn.commit()
|
|
125
|
+
|
|
126
|
+
async def upsert_file(self, path: str, mtime: float) -> None:
|
|
127
|
+
await self.conn.execute(
|
|
128
|
+
"INSERT INTO files(path, mtime) VALUES(?, ?) ON CONFLICT(path) DO UPDATE SET mtime = excluded.mtime",
|
|
129
|
+
(path, mtime),
|
|
130
|
+
)
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
[project]
|
|
2
2
|
name = "cc-transcript"
|
|
3
|
-
version = "0.
|
|
3
|
+
version = "0.5.0"
|
|
4
4
|
description = "Typed events for Claude Code transcripts: discovery, a superset JSONL parser (Python + Rust), and ingestion-state tracking."
|
|
5
5
|
readme = "README.md"
|
|
6
6
|
license = "PolyForm-Noncommercial-1.0.0"
|
|
@@ -17,6 +17,7 @@ classifiers = [
|
|
|
17
17
|
]
|
|
18
18
|
requires-python = ">=3.13"
|
|
19
19
|
dependencies = [
|
|
20
|
+
"aiosqlite>=0.20",
|
|
20
21
|
"anyio>=4.4",
|
|
21
22
|
"orjson>=3.10",
|
|
22
23
|
]
|
|
@@ -24,7 +25,7 @@ dependencies = [
|
|
|
24
25
|
[project.optional-dependencies]
|
|
25
26
|
dev = [
|
|
26
27
|
"pytest>=8.0",
|
|
27
|
-
"
|
|
28
|
+
"ty>=0.0.44",
|
|
28
29
|
"ruff>=0.8",
|
|
29
30
|
]
|
|
30
31
|
lexicon = [
|
|
@@ -57,12 +58,32 @@ markers = [
|
|
|
57
58
|
"integration: Integration tests",
|
|
58
59
|
]
|
|
59
60
|
|
|
61
|
+
# ty (Astral) is the default type checker — run `uv run ty check cc_transcript`.
|
|
62
|
+
# It is fast, understands modern syntax, and avoids the strict-pyright false
|
|
63
|
+
# positives on pydantic/attrs-style dynamic defaults and PK-type overrides.
|
|
64
|
+
[tool.ty.rules]
|
|
65
|
+
# Keep cross-checker `# type: ignore` / `# pyright: ignore` comments from tripping ty.
|
|
66
|
+
unused-type-ignore-comment = "ignore"
|
|
67
|
+
unresolved-import = "ignore"
|
|
68
|
+
|
|
69
|
+
# pyright is kept as a secondary checker (editors / `uvx pyright`). Basic mode plus
|
|
70
|
+
# a few disables covers the noise; ty is the gate that runs in CI.
|
|
60
71
|
[tool.pyright]
|
|
61
72
|
pythonVersion = "3.13"
|
|
62
|
-
typeCheckingMode = "
|
|
73
|
+
typeCheckingMode = "basic"
|
|
63
74
|
include = ["cc_transcript"]
|
|
64
75
|
venvPath = "."
|
|
65
76
|
venv = ".venv"
|
|
77
|
+
reportImplicitOverride = "none"
|
|
78
|
+
reportIncompatibleVariableOverride = "none"
|
|
79
|
+
reportUnknownVariableType = "none"
|
|
80
|
+
reportUnknownMemberType = "none"
|
|
81
|
+
reportUnknownArgumentType = "none"
|
|
82
|
+
reportUnknownParameterType = "none"
|
|
83
|
+
reportUnknownLambdaType = "none"
|
|
84
|
+
reportMissingTypeArgument = "none"
|
|
85
|
+
reportPrivateImportUsage = "none"
|
|
86
|
+
reportUnusedCallResult = "none"
|
|
66
87
|
|
|
67
88
|
[tool.ruff]
|
|
68
89
|
line-length = 120
|
|
@@ -73,7 +94,7 @@ src = [".", "tests"]
|
|
|
73
94
|
select = ["E", "F", "I", "UP"]
|
|
74
95
|
|
|
75
96
|
[tool.ruff.lint.per-file-ignores]
|
|
76
|
-
"
|
|
97
|
+
"__init__.py" = ["F401"]
|
|
77
98
|
|
|
78
99
|
[dependency-groups]
|
|
79
100
|
docs = [
|
|
@@ -96,30 +96,48 @@ pub struct ParseStream {
|
|
|
96
96
|
|
|
97
97
|
#[pymethods]
|
|
98
98
|
impl ParseStream {
|
|
99
|
+
// A file whose events cannot be materialized (e.g. a malformed line missing a
|
|
100
|
+
// required field) is silently skipped — whole-file parity with PythonBackend.
|
|
99
101
|
fn recv<'py>(&self, py: Python<'py>) -> PyResult<Option<Bound<'py, PyAny>>> {
|
|
100
|
-
|
|
101
|
-
|
|
102
|
-
|
|
102
|
+
loop {
|
|
103
|
+
match py.detach(|| self.rx.recv().ok()) {
|
|
104
|
+
None => return Ok(None),
|
|
105
|
+
Some(pf) => {
|
|
106
|
+
if let Ok(obj) = parsed_file_to_py(py, pf) {
|
|
107
|
+
return Ok(Some(obj));
|
|
108
|
+
}
|
|
109
|
+
}
|
|
110
|
+
}
|
|
103
111
|
}
|
|
104
112
|
}
|
|
105
113
|
|
|
106
114
|
fn recv_many<'py>(&self, py: Python<'py>, max: usize) -> PyResult<Vec<Bound<'py, PyAny>>> {
|
|
107
|
-
py
|
|
108
|
-
|
|
109
|
-
|
|
110
|
-
|
|
111
|
-
|
|
112
|
-
|
|
113
|
-
|
|
114
|
-
|
|
115
|
+
let mut out: Vec<Bound<'py, PyAny>> = Vec::new();
|
|
116
|
+
// Block for the first materialized file; return [] only when the channel
|
|
117
|
+
// is genuinely closed, so an all-skipped batch never reads as "done".
|
|
118
|
+
loop {
|
|
119
|
+
match py.detach(|| self.rx.recv().ok()) {
|
|
120
|
+
None => return Ok(out),
|
|
121
|
+
Some(pf) => {
|
|
122
|
+
if let Ok(obj) = parsed_file_to_py(py, pf) {
|
|
123
|
+
out.push(obj);
|
|
124
|
+
break;
|
|
125
|
+
}
|
|
126
|
+
}
|
|
127
|
+
}
|
|
128
|
+
}
|
|
129
|
+
// Drain what is already buffered without blocking, skipping bad files.
|
|
130
|
+
while out.len() < max {
|
|
131
|
+
match py.detach(|| self.rx.try_recv().ok()) {
|
|
132
|
+
None => break,
|
|
133
|
+
Some(pf) => {
|
|
134
|
+
if let Ok(obj) = parsed_file_to_py(py, pf) {
|
|
135
|
+
out.push(obj);
|
|
115
136
|
}
|
|
116
137
|
}
|
|
117
138
|
}
|
|
118
|
-
|
|
119
|
-
|
|
120
|
-
.into_iter()
|
|
121
|
-
.map(|pf| parsed_file_to_py(py, pf))
|
|
122
|
-
.collect()
|
|
139
|
+
}
|
|
140
|
+
Ok(out)
|
|
123
141
|
}
|
|
124
142
|
}
|
|
125
143
|
|
|
@@ -1,118 +0,0 @@
|
|
|
1
|
-
from __future__ import annotations
|
|
2
|
-
|
|
3
|
-
import sqlite3
|
|
4
|
-
import threading
|
|
5
|
-
from contextlib import contextmanager
|
|
6
|
-
from typing import TYPE_CHECKING, Self
|
|
7
|
-
|
|
8
|
-
if TYPE_CHECKING:
|
|
9
|
-
from collections.abc import Generator
|
|
10
|
-
from pathlib import Path
|
|
11
|
-
from types import TracebackType
|
|
12
|
-
|
|
13
|
-
FILE_SCHEMA = """
|
|
14
|
-
CREATE TABLE IF NOT EXISTS files (
|
|
15
|
-
path TEXT PRIMARY KEY,
|
|
16
|
-
mtime REAL NOT NULL
|
|
17
|
-
);
|
|
18
|
-
"""
|
|
19
|
-
|
|
20
|
-
|
|
21
|
-
class FileStateStore:
|
|
22
|
-
"""Tracks which transcript files have been ingested, keyed by mtime.
|
|
23
|
-
|
|
24
|
-
Backed by a single SQLite database with WAL journaling and a process-wide
|
|
25
|
-
lock, so it is safe to share one store across threads. Consumers compose
|
|
26
|
-
their own writes alongside :meth:`record_file` inside :meth:`transaction`
|
|
27
|
-
to keep ingestion state and derived records atomic.
|
|
28
|
-
|
|
29
|
-
Example:
|
|
30
|
-
>>> store = FileStateStore.open(Path("state.db"), extra_schema=MY_SCHEMA)
|
|
31
|
-
>>> with store.transaction() as conn:
|
|
32
|
-
... conn.execute("INSERT INTO my_table VALUES (?)", (value,))
|
|
33
|
-
... store.record_file(str(path), mtime)
|
|
34
|
-
"""
|
|
35
|
-
|
|
36
|
-
def __init__(self, conn: sqlite3.Connection) -> None:
|
|
37
|
-
self.conn = conn
|
|
38
|
-
self.lock = threading.RLock()
|
|
39
|
-
self._in_transaction = False
|
|
40
|
-
|
|
41
|
-
@classmethod
|
|
42
|
-
def open(cls, path: Path, *, extra_schema: str = "") -> Self:
|
|
43
|
-
"""Opens (creating if needed) the store at ``path``.
|
|
44
|
-
|
|
45
|
-
Args:
|
|
46
|
-
path: The database file path; its parent is created if absent.
|
|
47
|
-
extra_schema: Additional DDL to execute after the file schema,
|
|
48
|
-
e.g. consumer tables that reference ``files(path)``.
|
|
49
|
-
|
|
50
|
-
Returns:
|
|
51
|
-
The opened store.
|
|
52
|
-
"""
|
|
53
|
-
path.parent.mkdir(parents=True, exist_ok=True)
|
|
54
|
-
conn = sqlite3.connect(str(path), check_same_thread=False)
|
|
55
|
-
conn.row_factory = sqlite3.Row
|
|
56
|
-
conn.execute("PRAGMA foreign_keys = ON")
|
|
57
|
-
conn.execute("PRAGMA journal_mode = WAL")
|
|
58
|
-
conn.executescript(FILE_SCHEMA + extra_schema)
|
|
59
|
-
conn.commit()
|
|
60
|
-
return cls(conn)
|
|
61
|
-
|
|
62
|
-
def close(self) -> None:
|
|
63
|
-
"""Closes the underlying connection."""
|
|
64
|
-
with self.lock:
|
|
65
|
-
self.conn.close()
|
|
66
|
-
|
|
67
|
-
def __enter__(self) -> Self:
|
|
68
|
-
return self
|
|
69
|
-
|
|
70
|
-
def __exit__(
|
|
71
|
-
self,
|
|
72
|
-
exc_type: type[BaseException] | None,
|
|
73
|
-
exc: BaseException | None,
|
|
74
|
-
tb: TracebackType | None,
|
|
75
|
-
) -> None:
|
|
76
|
-
self.close()
|
|
77
|
-
|
|
78
|
-
@contextmanager
|
|
79
|
-
def transaction(self) -> Generator[sqlite3.Connection]:
|
|
80
|
-
"""Yields the locked connection inside a single committed transaction.
|
|
81
|
-
|
|
82
|
-
Use this to compose consumer writes with :meth:`record_file` so they
|
|
83
|
-
commit or roll back together. :meth:`record_file` called within the
|
|
84
|
-
block joins this transaction instead of opening its own.
|
|
85
|
-
|
|
86
|
-
Yields:
|
|
87
|
-
The store's connection, held under the store lock.
|
|
88
|
-
"""
|
|
89
|
-
with self.lock, self.conn:
|
|
90
|
-
self._in_transaction = True
|
|
91
|
-
try:
|
|
92
|
-
yield self.conn
|
|
93
|
-
finally:
|
|
94
|
-
self._in_transaction = False
|
|
95
|
-
|
|
96
|
-
def file_mtimes(self) -> dict[str, float]:
|
|
97
|
-
"""Returns the recorded ``path`` to ``mtime`` map."""
|
|
98
|
-
with self.lock:
|
|
99
|
-
return {row["path"]: row["mtime"] for row in self.conn.execute("SELECT path, mtime FROM files")}
|
|
100
|
-
|
|
101
|
-
def record_file(self, path: str, mtime: float) -> None:
|
|
102
|
-
"""Upserts the recorded mtime for ``path``.
|
|
103
|
-
|
|
104
|
-
Call inside :meth:`transaction` to commit alongside consumer writes;
|
|
105
|
-
called on its own it commits immediately.
|
|
106
|
-
"""
|
|
107
|
-
with self.lock:
|
|
108
|
-
if self._in_transaction:
|
|
109
|
-
self.upsert_file(path, mtime)
|
|
110
|
-
return
|
|
111
|
-
with self.conn:
|
|
112
|
-
self.upsert_file(path, mtime)
|
|
113
|
-
|
|
114
|
-
def upsert_file(self, path: str, mtime: float) -> None:
|
|
115
|
-
self.conn.execute(
|
|
116
|
-
"INSERT INTO files(path, mtime) VALUES(?, ?) ON CONFLICT(path) DO UPDATE SET mtime = excluded.mtime",
|
|
117
|
-
(path, mtime),
|
|
118
|
-
)
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|