PyPI - cc-transcript - Versions diffs - 0.4.0__tar.gz → 0.5.0__tar.gz - Mend

cc-transcript 0.4.0tar.gz → 0.5.0tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (35) hide show

{cc_transcript-0.4.0 → cc_transcript-0.5.0}/Cargo.lock RENAMED Viewed

@@ -66,7 +66,7 @@ dependencies = [
 [[package]]
 name = "cc_transcript_parser"
-version = "0.4.0"
+version = "0.5.0"
 dependencies = [
  "chrono",
  "crossbeam-channel",

{cc_transcript-0.4.0 → cc_transcript-0.5.0}/PKG-INFO RENAMED Viewed

@@ -1,16 +1,17 @@
 Metadata-Version: 2.4
 Name: cc-transcript
-Version: 0.4.0
+Version: 0.5.0
 Classifier: Development Status :: 3 - Alpha
 Classifier: Intended Audience :: Developers
 Classifier: Operating System :: OS Independent
 Classifier: Programming Language :: Python :: 3
 Classifier: Programming Language :: Python :: 3 :: Only
 Classifier: Typing :: Typed
+Requires-Dist: aiosqlite>=0.20
 Requires-Dist: anyio>=4.4
 Requires-Dist: orjson>=3.10
 Requires-Dist: pytest>=8.0 ; extra == 'dev'
-Requires-Dist: pyright>=1.1 ; extra == 'dev'
+Requires-Dist: ty>=0.0.44 ; extra == 'dev'
 Requires-Dist: ruff>=0.8 ; extra == 'dev'
 Requires-Dist: spacy>=3.8 ; extra == 'lexicon'
 Requires-Dist: afinn>=0.1 ; extra == 'lexicon'
@@ -86,5 +87,5 @@ available — every rule is off by default, so a bare `FilterConfig()` passes ev
 ## Docs
-[Read the docs](https://yasyf.github.io/cc-transcript/) for the full guide and API reference.
+[Read the docs](https://yasyf.github.io/cc-transcript/) for the full guides — Getting Started, Filtering events, Scoring sentiment, Rust/Python backends & parity, and Compose your own policy — plus the complete API reference.

{cc_transcript-0.4.0 → cc_transcript-0.5.0}/README.md RENAMED Viewed

@@ -55,4 +55,4 @@ available — every rule is off by default, so a bare `FilterConfig()` passes ev
 ## Docs
-[Read the docs](https://yasyf.github.io/cc-transcript/) for the full guide and API reference.
+[Read the docs](https://yasyf.github.io/cc-transcript/) for the full guides — Getting Started, Filtering events, Scoring sentiment, Rust/Python backends & parity, and Compose your own policy — plus the complete API reference.

{cc_transcript-0.4.0 → cc_transcript-0.5.0}/cc_transcript/__init__.py RENAMED Viewed

@@ -56,5 +56,5 @@ from cc_transcript.models import (
     TranscriptEvent,
     UserEvent,
 )
-from cc_transcript.parser import TranscriptParser, parse_events, parse_events_from_bytes
+from cc_transcript.parser import TranscriptParser, parse_events_async, parse_events_from_bytes
 from cc_transcript.store import FileStateStore

{cc_transcript-0.4.0 → cc_transcript-0.5.0}/cc_transcript/discovery.py RENAMED Viewed

@@ -2,6 +2,8 @@ from __future__ import annotations
 from pathlib import Path
+import anyio
 CLAUDE_PROJECTS_DIR = Path.home() / ".claude" / "projects"
@@ -14,26 +16,27 @@ class TranscriptDiscovery:
     """
     @staticmethod
-    def find_transcripts() -> list[Path]:
+    async def find_transcripts() -> list[Path]:
         """Returns every transcript under the projects directory, sorted."""
-        if not CLAUDE_PROJECTS_DIR.exists():
+        root = anyio.Path(CLAUDE_PROJECTS_DIR)
+        if not await root.exists():
             return []
-        return sorted(CLAUDE_PROJECTS_DIR.rglob("*.jsonl"))
+        return sorted([Path(p) async for p in root.rglob("*.jsonl")])
     @staticmethod
-    def stat_mtime(path: Path) -> float | None:
+    async def stat_mtime(path: Path) -> float | None:
         try:
-            return path.stat().st_mtime
+            return (await anyio.Path(path).stat()).st_mtime
         except OSError:
             return None
     @staticmethod
-    def transcript_mtime(path: Path) -> float:
+    async def transcript_mtime(path: Path) -> float:
         """Returns ``path``'s modification time, raising if it cannot be read."""
-        return path.stat().st_mtime
+        return (await anyio.Path(path).stat()).st_mtime
     @staticmethod
-    def find_in(
+    async def find_in(
         directory: Path,
         *,
         name_contains: str | None = None,
@@ -52,14 +55,18 @@ class TranscriptDiscovery:
         Returns:
             Pairs of ``(path, mtime)`` sorted by path.
         """
-        if not directory.exists():
+        root = anyio.Path(directory)
+        if not await root.exists():
             return []
-        found = [
-            (p, mtime)
-            for p in directory.rglob("*.jsonl")
-            if not name_contains or name_contains in p.name
-            if (mtime := TranscriptDiscovery.stat_mtime(p)) is not None
-            if known_mtimes is None or (prev := known_mtimes.get(str(p))) is None or prev < mtime
-        ]
+        found: list[tuple[Path, float]] = []
+        async for entry in root.rglob("*.jsonl"):
+            if name_contains and name_contains not in entry.name:
+                continue
+            path = Path(entry)
+            if (mtime := await TranscriptDiscovery.stat_mtime(path)) is None:
+                continue
+            if known_mtimes is not None and (prev := known_mtimes.get(str(path))) is not None and prev >= mtime:
+                continue
+            found.append((path, mtime))
         found.sort(key=lambda e: e[0])
         return found[:limit] if limit is not None else found

{cc_transcript-0.4.0 → cc_transcript-0.5.0}/cc_transcript/filterspec.py RENAMED Viewed

@@ -63,11 +63,28 @@ STRUCTURAL_GROUPS: tuple[tuple[str, str], ...] = (
 AGENT_INJECTION_GROUPS: tuple[tuple[str, str], ...] = (
     ("xml_tags_extra", r"<(?:teammate-message|scheduled-task)\b"),
     ("augment_agent", r"^# Augment Agent\b"),
+    ("role_reminder", r"^\s*\[Role Reminder\b"),
 )
 INTERRUPT_MARKER_GROUPS: tuple[tuple[str, str], ...] = (("interrupt", r"\[Request interrupted by user"),)
 STOP_HOOK_GROUPS: tuple[tuple[str, str], ...] = (("stop_hook", r"Stop hook feedback:"),)
+# Approve-and-advance directives: a user telling the agent to proceed/commit/push or
+# to resume killed work. They follow an assistant turn but advance it rather than
+# correcting it — the opposite of pushback — so a pushback consumer drops them. The
+# approve-and-advance arm is start-anchored so a mid-sentence "commit"/"push" inside
+# a real correction never matches; only the resume arm searches anywhere.
+CONTINUATION_GROUPS: tuple[tuple[str, str], ...] = (
+    (
+        "continuation",
+        r"^\s*(?:(?:yea+h?|yep|yup|sure|ok(?:ay)?|sounds good|looks good|lgtm|perfect)[\s,.!]+){0,2}"
+        r"(?:go ahead\b|(?:go ahead and\s+)?(?:commit|push|rebase|merge|deploy)\b"
+        r"|ship it\b|cut (?:a |the )?(?:new )?release\b|proceed\b)"
+        r"|\byou must resume\b"
+        r"|\b(?:resume|restart) (?:them|it|the (?:sub-?agents?|workflows?|agents?|tasks?))\b",
+    ),
+)
 # Named junk categories a consumer composes via ``drop_junk(...)``. Interrupt and
 # stop-hook are kept separate because they carry pushback and must never be folded
 # into the structural-noise default.
@@ -76,6 +93,7 @@ JUNK_CATEGORIES: dict[str, tuple[tuple[str, str], ...]] = {
     "agent_injection": AGENT_INJECTION_GROUPS,
     "interrupt": INTERRUPT_MARKER_GROUPS,
     "stop_hook": STOP_HOOK_GROUPS,
+    "continuation": CONTINUATION_GROUPS,
 }
 # The superset of structural noise (structural ∪ agent-injection), WITHOUT
@@ -125,6 +143,7 @@ PORTABLE_GROUP_NAMES: frozenset[str] = frozenset(
         *STRUCTURAL_NOISE_GROUPS,
         *INTERRUPT_MARKER_GROUPS,
         *STOP_HOOK_GROUPS,
+        *CONTINUATION_GROUPS,
         *FRUSTRATION_GROUPS,
         *MILD_IMPATIENCE_GROUPS,
     )
@@ -398,6 +417,7 @@ def keep(event: TranscriptEvent, spec: FilterSpec) -> bool:
 def labels_for(event: TranscriptEvent, spec: FilterSpec) -> tuple[str, ...]:
+    """Returns the TAG labels ``spec`` records for ``event``, in clause order."""
     kind = event_kind(event)
     return tuple(
         clause.label

{cc_transcript-0.4.0 → cc_transcript-0.5.0}/cc_transcript/parser.py RENAMED Viewed

@@ -1,6 +1,8 @@
 from __future__ import annotations
+import asyncio
 import os
+from contextlib import suppress
 from datetime import datetime
 from typing import TYPE_CHECKING, Any, ClassVar, Literal
@@ -144,12 +146,12 @@ def decode_line(line: bytes) -> TranscriptEvent | None:
     return build_event(data)
-def parse_events(path: Path) -> list[TranscriptEvent]:
-    return parse_events_from_bytes(path.read_bytes())
+async def parse_events_async(path: Path) -> list[TranscriptEvent]:
+    return parse_events_from_bytes(await anyio.Path(path).read_bytes())
 def parse_one(path: Path, mtime: float) -> ParsedTranscript:
-    return ParsedTranscript(path=path, mtime=mtime, events=tuple(parse_events(path)))
+    return ParsedTranscript(path=path, mtime=mtime, events=tuple(parse_events_from_bytes(path.read_bytes())))
 def parse_one_filtered(path: Path, mtime: float, spec: FilterSpec | None) -> ParsedTranscript:
@@ -192,7 +194,14 @@ class PythonBackend:
         async def worker(path: Path, mtime: float) -> None:
             async with limiter:
-                await send_ch.send(await anyio.to_thread.run_sync(parse_one_filtered, path, mtime, spec))
+                try:
+                    parsed = await anyio.to_thread.run_sync(parse_one_filtered, path, mtime, spec)
+                except (OSError, ValueError, KeyError):
+                    return
+                try:
+                    await send_ch.send(parsed)
+                except anyio.BrokenResourceError:
+                    return
         async def drive() -> None:
             try:
@@ -202,11 +211,15 @@ class PythonBackend:
             finally:
                 await send_ch.aclose()
-        async with anyio.create_task_group() as outer:
-            outer.start_soon(drive)
+        driver = asyncio.ensure_future(drive())
+        try:
             async with recv_ch:
                 async for parsed in recv_ch:
                     yield parsed
+        finally:
+            driver.cancel()
+            with suppress(asyncio.CancelledError):
+                await driver
 class TranscriptParser:

{cc_transcript-0.4.0 → cc_transcript-0.5.0}/cc_transcript/sentiment/buckets.py RENAMED Viewed

@@ -16,6 +16,8 @@ MIN_USER_CHARS = 5
 class ConversationBucket(NamedTuple):
+    """A session's messages grouped into one fixed-width time window — the unit that gets scored."""
     session_id: SessionId
     bucket_index: BucketIndex
     bucket_start: datetime
@@ -23,11 +25,19 @@ class ConversationBucket(NamedTuple):
 class BucketKey(NamedTuple):
+    """Stable identity of a :class:`ConversationBucket`: its session and bucket index."""
     session_id: SessionId
     bucket_index: BucketIndex
 class ConversationBucketer:
+    """Groups transcript messages into per-session, time-aligned buckets worth scoring.
+    Sessions below ``MIN_USER_TURNS_PER_SESSION`` and windows lacking a substantive user turn or
+    any assistant turn are dropped.
+    """
     @staticmethod
     def align_to_bucket(ts: datetime) -> datetime:
         return ts.replace(
@@ -73,6 +83,7 @@ class ConversationBucketer:
 def extract_bucket_keys(messages: list[TranscriptMessage]) -> list[BucketKey]:
+    """Returns the :class:`BucketKey` of every scorable bucket in ``messages``."""
     return [
         BucketKey(session_id=b.session_id, bucket_index=b.bucket_index)
         for b in ConversationBucketer.bucket_messages(messages)

{cc_transcript-0.4.0 → cc_transcript-0.5.0}/cc_transcript/sentiment/lexicon.py RENAMED Viewed

@@ -38,6 +38,13 @@ def rust_lexicon() -> ModuleType | None:
 class Lexicon:
+    """Token-polarity lookup: AFINN base scores layered with coding-domain overrides.
+    ``DOMAIN_OVERRIDES`` pins context-specific terms (``stop``, ``broken``, ``ship``) that
+    AFINN mis-scores, and magnitudes below ``MIN_MAGNITUDE`` collapse to neutral. Backs the
+    lexicon-bearing score stages through :meth:`has_hit`.
+    """
     DOMAIN_OVERRIDES: ClassVar[dict[str, int]] = {
         "stop": -3,
         "halt": -3,
@@ -104,6 +111,11 @@ class Lexicon:
     @classmethod
     def polarity(cls, lemma: str) -> int:
+        """The signed polarity of ``lemma``.
+        A domain override when present, else its AFINN score zeroed below
+        ``MIN_MAGNITUDE``.
+        """
         lower = lemma.lower()
         if (override := cls.DOMAIN_OVERRIDES.get(lower)) is not None:
             return override
@@ -129,6 +141,12 @@ class Lexicon:
 class NLP:
+    """Lazy loader for the spaCy ``en_core_web_sm`` model used to lemmatize text.
+    Loads from the user spaCy cache, downloading the model on first use; on failure it records
+    the diagnostic and disables itself so the lexicon path fails open.
+    """
     model: ClassVar[spacy.language.Language | None] = None
     failed: ClassVar[bool] = False
     last_download_output: ClassVar[str | None] = None

{cc_transcript-0.4.0 → cc_transcript-0.5.0}/cc_transcript/sentiment/messages.py RENAMED Viewed

@@ -7,11 +7,15 @@ from cc_transcript.models import SessionId
 class ToolCall(NamedTuple):
+    """A single tool invocation within a message: the tool ``name`` and optional target file path."""
     name: str
     file_path: str | None = None
 class UserMessage(NamedTuple):
+    """A user turn distilled for bucketing: its text, tool calls, and authoring metadata."""
     content: str
     timestamp: datetime
     session_id: SessionId
@@ -23,6 +27,8 @@ class UserMessage(NamedTuple):
 class AssistantMessage(NamedTuple):
+    """An assistant turn distilled for bucketing: its text, tool calls, and responding model."""
     content: str
     timestamp: datetime
     session_id: SessionId

{cc_transcript-0.4.0 → cc_transcript-0.5.0}/cc_transcript/sentiment/scorespec.py RENAMED Viewed

@@ -91,24 +91,29 @@ class ScoreSpec:
 def flag_frustration(*, score: int = 1) -> FrustrationShortCircuit:
+    """Composes the short-circuit stage that pins a frustrated message to ``score`` before inference."""
     return FrustrationShortCircuit(groups=FRUSTRATION_GROUPS, score=score)
 def clamp_positive(*, floor: int = 3, max_words: int = SHORT_MESSAGE_MAX_WORDS) -> PositiveClamp:
+    """Composes the post-process stage that lowers a top score on a short message lacking positive lexicon."""
     return PositiveClamp(positive_floor=floor, max_words=max_words)
 def demote_mild_irritation(*, floor: int = 3) -> MildIrritationDemote:
+    """Composes the post-process stage that softens a non-hostile mild-impatience message off the floor score."""
     return MildIrritationDemote(
         trigger_groups=MILD_IMPATIENCE_GROUPS, hostile_groups=FRUSTRATION_GROUPS, hostile_floor=floor
     )
 def clamp_resume() -> ResumeClamp:
+    """Composes the post-process stage that neutralizes a bare resume phrase to a middling score."""
     return ResumeClamp(phrases=RESUME_PHRASE_SET)
 def build_score_spec(*stages: ScoreStage) -> ScoreSpec:
+    """Assembles ``stages`` into a :class:`ScoreSpec` for the engine to apply around inference."""
     return ScoreSpec(stages=tuple(stages))

cc_transcript-0.5.0/cc_transcript/store.py ADDED Viewed

@@ -0,0 +1,130 @@
+from __future__ import annotations
+from contextlib import asynccontextmanager
+from typing import TYPE_CHECKING, Self
+import aiosqlite
+import anyio
+if TYPE_CHECKING:
+    from collections.abc import AsyncIterator
+    from pathlib import Path
+    from types import TracebackType
+FILE_SCHEMA = """
+CREATE TABLE IF NOT EXISTS files (
+  path TEXT PRIMARY KEY,
+  mtime REAL NOT NULL
+);
+"""
+class FileStateStore:
+    """Tracks which transcript files have been ingested, keyed by mtime.
+    Backed by a single async SQLite (``aiosqlite``) database with WAL journaling
+    and a task lock, so it is safe to share one store across concurrent tasks.
+    Consumers compose their own writes alongside :meth:`record_file` inside
+    :meth:`transaction` to keep ingestion state and derived records atomic.
+    Example:
+        >>> store = await FileStateStore.open(Path("state.db"), extra_schema=MY_SCHEMA)
+        >>> async with store.transaction() as conn:
+        ...     await conn.execute("INSERT INTO my_table VALUES (?)", (value,))
+        ...     await store.record_file(str(path), mtime)
+    """
+    def __init__(self, conn: aiosqlite.Connection) -> None:
+        self.conn = conn
+        self.lock = anyio.Lock()
+        self._txn_owner: int | None = None
+    @classmethod
+    async def open(cls, path: Path, *, extra_schema: str = "") -> Self:
+        """Opens (creating if needed) the store at ``path``.
+        Args:
+            path: The database file path; its parent is created if absent.
+            extra_schema: Additional DDL to execute after the file schema,
+                e.g. consumer tables that reference ``files(path)``.
+        Returns:
+            The opened store.
+        """
+        path.parent.mkdir(parents=True, exist_ok=True)
+        conn = await aiosqlite.connect(str(path), isolation_level=None)
+        conn.row_factory = aiosqlite.Row
+        await conn.execute("PRAGMA foreign_keys = ON")
+        await conn.execute("PRAGMA journal_mode = WAL")
+        await conn.executescript(FILE_SCHEMA + extra_schema)
+        return cls(conn)
+    async def close(self) -> None:
+        """Closes the underlying connection."""
+        async with self.lock:
+            await self.conn.close()
+    async def __aenter__(self) -> Self:
+        return self
+    async def __aexit__(
+        self,
+        exc_type: type[BaseException] | None,
+        exc: BaseException | None,
+        tb: TracebackType | None,
+    ) -> None:
+        await self.close()
+    @asynccontextmanager
+    async def transaction(self) -> AsyncIterator[aiosqlite.Connection]:
+        """Yields the locked connection inside a single committed transaction.
+        Use this to compose consumer writes with :meth:`record_file` so they
+        commit or roll back together. :meth:`record_file` called within the
+        block joins this transaction instead of opening its own.
+        Yields:
+            The store's connection, held under the store lock.
+        """
+        async with self.lock:
+            self._txn_owner = anyio.get_current_task().id
+            await self.conn.execute("BEGIN IMMEDIATE")
+            try:
+                yield self.conn
+            except BaseException:
+                await self.conn.rollback()
+                raise
+            else:
+                await self.conn.commit()
+            finally:
+                self._txn_owner = None
+    async def file_mtimes(self) -> dict[str, float]:
+        """Returns the recorded ``path`` to ``mtime`` map."""
+        async with self.lock, self.conn.execute("SELECT path, mtime FROM files") as cur:
+            return {row["path"]: row["mtime"] async for row in cur}
+    async def record_file(self, path: str, mtime: float) -> None:
+        """Upserts the recorded mtime for ``path``.
+        Call inside :meth:`transaction` to commit alongside consumer writes;
+        called on its own it commits immediately.
+        """
+        if self._txn_owner == anyio.get_current_task().id:
+            await self.upsert_file(path, mtime)
+            return
+        async with self.lock:
+            await self.conn.execute("BEGIN IMMEDIATE")
+            try:
+                await self.upsert_file(path, mtime)
+            except BaseException:
+                await self.conn.rollback()
+                raise
+            else:
+                await self.conn.commit()
+    async def upsert_file(self, path: str, mtime: float) -> None:
+        await self.conn.execute(
+            "INSERT INTO files(path, mtime) VALUES(?, ?) ON CONFLICT(path) DO UPDATE SET mtime = excluded.mtime",
+            (path, mtime),
+        )

{cc_transcript-0.4.0 → cc_transcript-0.5.0}/pyproject.toml RENAMED Viewed

@@ -1,6 +1,6 @@
 [project]
 name = "cc-transcript"
-version = "0.4.0"
+version = "0.5.0"
 description = "Typed events for Claude Code transcripts: discovery, a superset JSONL parser (Python + Rust), and ingestion-state tracking."
 readme = "README.md"
 license = "PolyForm-Noncommercial-1.0.0"
@@ -17,6 +17,7 @@ classifiers = [
 ]
 requires-python = ">=3.13"
 dependencies = [
+    "aiosqlite>=0.20",
     "anyio>=4.4",
     "orjson>=3.10",
 ]
@@ -24,7 +25,7 @@ dependencies = [
 [project.optional-dependencies]
 dev = [
     "pytest>=8.0",
-    "pyright>=1.1",
+    "ty>=0.0.44",
     "ruff>=0.8",
 ]
 lexicon = [
@@ -57,12 +58,32 @@ markers = [
     "integration: Integration tests",
 ]
+# ty (Astral) is the default type checker — run `uv run ty check cc_transcript`.
+# It is fast, understands modern syntax, and avoids the strict-pyright false
+# positives on pydantic/attrs-style dynamic defaults and PK-type overrides.
+[tool.ty.rules]
+# Keep cross-checker `# type: ignore` / `# pyright: ignore` comments from tripping ty.
+unused-type-ignore-comment = "ignore"
+unresolved-import = "ignore"
+# pyright is kept as a secondary checker (editors / `uvx pyright`). Basic mode plus
+# a few disables covers the noise; ty is the gate that runs in CI.
 [tool.pyright]
 pythonVersion = "3.13"
-typeCheckingMode = "strict"
+typeCheckingMode = "basic"
 include = ["cc_transcript"]
 venvPath = "."
 venv = ".venv"
+reportImplicitOverride = "none"
+reportIncompatibleVariableOverride = "none"
+reportUnknownVariableType = "none"
+reportUnknownMemberType = "none"
+reportUnknownArgumentType = "none"
+reportUnknownParameterType = "none"
+reportUnknownLambdaType = "none"
+reportMissingTypeArgument = "none"
+reportPrivateImportUsage = "none"
+reportUnusedCallResult = "none"
 [tool.ruff]
 line-length = 120
@@ -73,7 +94,7 @@ src = [".", "tests"]
 select = ["E", "F", "I", "UP"]
 [tool.ruff.lint.per-file-ignores]
-"**/__init__.py" = ["F401"]
+"__init__.py" = ["F401"]
 [dependency-groups]
 docs = [

{cc_transcript-0.4.0 → cc_transcript-0.5.0}/rust/Cargo.toml RENAMED Viewed

@@ -1,6 +1,6 @@
 [package]
 name = "cc_transcript_parser"
-version = "0.4.0"
+version = "0.5.0"
 edition = "2021"
 [lib]

{cc_transcript-0.4.0 → cc_transcript-0.5.0}/rust/src/lib.rs RENAMED Viewed

@@ -96,30 +96,48 @@ pub struct ParseStream {
 #[pymethods]
 impl ParseStream {
+    // A file whose events cannot be materialized (e.g. a malformed line missing a
+    // required field) is silently skipped — whole-file parity with PythonBackend.
     fn recv<'py>(&self, py: Python<'py>) -> PyResult<Option<Bound<'py, PyAny>>> {
-        match py.detach(|| self.rx.recv().ok()) {
-            None => Ok(None),
-            Some(pf) => Ok(Some(parsed_file_to_py(py, pf)?)),
+        loop {
+            match py.detach(|| self.rx.recv().ok()) {
+                None => return Ok(None),
+                Some(pf) => {
+                    if let Ok(obj) = parsed_file_to_py(py, pf) {
+                        return Ok(Some(obj));
+                    }
+                }
+            }
         }
     }
     fn recv_many<'py>(&self, py: Python<'py>, max: usize) -> PyResult<Vec<Bound<'py, PyAny>>> {
-        py.detach(|| {
-            let mut out: Vec<ParsedFile> = Vec::new();
-            if let Ok(pf) = self.rx.recv() {
-                out.push(pf);
-                while out.len() < max {
-                    match self.rx.try_recv() {
-                        Ok(pf) => out.push(pf),
-                        Err(_) => break,
+        let mut out: Vec<Bound<'py, PyAny>> = Vec::new();
+        // Block for the first materialized file; return [] only when the channel
+        // is genuinely closed, so an all-skipped batch never reads as "done".
+        loop {
+            match py.detach(|| self.rx.recv().ok()) {
+                None => return Ok(out),
+                Some(pf) => {
+                    if let Ok(obj) = parsed_file_to_py(py, pf) {
+                        out.push(obj);
+                        break;
+                    }
+                }
+            }
+        }
+        // Drain what is already buffered without blocking, skipping bad files.
+        while out.len() < max {
+            match py.detach(|| self.rx.try_recv().ok()) {
+                None => break,
+                Some(pf) => {
+                    if let Ok(obj) = parsed_file_to_py(py, pf) {
+                        out.push(obj);
                     }
                 }
             }
-            out
-        })
-        .into_iter()
-        .map(|pf| parsed_file_to_py(py, pf))
-        .collect()
+        }
+        Ok(out)
     }
 }

cc_transcript-0.4.0/cc_transcript/store.py DELETED Viewed

@@ -1,118 +0,0 @@
-from __future__ import annotations
-import sqlite3
-import threading
-from contextlib import contextmanager
-from typing import TYPE_CHECKING, Self
-if TYPE_CHECKING:
-    from collections.abc import Generator
-    from pathlib import Path
-    from types import TracebackType
-FILE_SCHEMA = """
-CREATE TABLE IF NOT EXISTS files (
-  path TEXT PRIMARY KEY,
-  mtime REAL NOT NULL
-);
-"""
-class FileStateStore:
-    """Tracks which transcript files have been ingested, keyed by mtime.
-    Backed by a single SQLite database with WAL journaling and a process-wide
-    lock, so it is safe to share one store across threads. Consumers compose
-    their own writes alongside :meth:`record_file` inside :meth:`transaction`
-    to keep ingestion state and derived records atomic.
-    Example:
-        >>> store = FileStateStore.open(Path("state.db"), extra_schema=MY_SCHEMA)
-        >>> with store.transaction() as conn:
-        ...     conn.execute("INSERT INTO my_table VALUES (?)", (value,))
-        ...     store.record_file(str(path), mtime)
-    """
-    def __init__(self, conn: sqlite3.Connection) -> None:
-        self.conn = conn
-        self.lock = threading.RLock()
-        self._in_transaction = False
-    @classmethod
-    def open(cls, path: Path, *, extra_schema: str = "") -> Self:
-        """Opens (creating if needed) the store at ``path``.
-        Args:
-            path: The database file path; its parent is created if absent.
-            extra_schema: Additional DDL to execute after the file schema,
-                e.g. consumer tables that reference ``files(path)``.
-        Returns:
-            The opened store.
-        """
-        path.parent.mkdir(parents=True, exist_ok=True)
-        conn = sqlite3.connect(str(path), check_same_thread=False)
-        conn.row_factory = sqlite3.Row
-        conn.execute("PRAGMA foreign_keys = ON")
-        conn.execute("PRAGMA journal_mode = WAL")
-        conn.executescript(FILE_SCHEMA + extra_schema)
-        conn.commit()
-        return cls(conn)
-    def close(self) -> None:
-        """Closes the underlying connection."""
-        with self.lock:
-            self.conn.close()
-    def __enter__(self) -> Self:
-        return self
-    def __exit__(
-        self,
-        exc_type: type[BaseException] | None,
-        exc: BaseException | None,
-        tb: TracebackType | None,
-    ) -> None:
-        self.close()
-    @contextmanager
-    def transaction(self) -> Generator[sqlite3.Connection]:
-        """Yields the locked connection inside a single committed transaction.
-        Use this to compose consumer writes with :meth:`record_file` so they
-        commit or roll back together. :meth:`record_file` called within the
-        block joins this transaction instead of opening its own.
-        Yields:
-            The store's connection, held under the store lock.
-        """
-        with self.lock, self.conn:
-            self._in_transaction = True
-            try:
-                yield self.conn
-            finally:
-                self._in_transaction = False
-    def file_mtimes(self) -> dict[str, float]:
-        """Returns the recorded ``path`` to ``mtime`` map."""
-        with self.lock:
-            return {row["path"]: row["mtime"] for row in self.conn.execute("SELECT path, mtime FROM files")}
-    def record_file(self, path: str, mtime: float) -> None:
-        """Upserts the recorded mtime for ``path``.
-        Call inside :meth:`transaction` to commit alongside consumer writes;
-        called on its own it commits immediately.
-        """
-        with self.lock:
-            if self._in_transaction:
-                self.upsert_file(path, mtime)
-                return
-            with self.conn:
-                self.upsert_file(path, mtime)
-    def upsert_file(self, path: str, mtime: float) -> None:
-        self.conn.execute(
-            "INSERT INTO files(path, mtime) VALUES(?, ?) ON CONFLICT(path) DO UPDATE SET mtime = excluded.mtime",
-            (path, mtime),
-        )

{cc_transcript-0.4.0 → cc_transcript-0.5.0}/Cargo.toml RENAMED Viewed

File without changes

{cc_transcript-0.4.0 → cc_transcript-0.5.0}/LICENSE RENAMED Viewed

File without changes

{cc_transcript-0.4.0 → cc_transcript-0.5.0}/cc_transcript/_parser_rs.pyi RENAMED Viewed

File without changes

{cc_transcript-0.4.0 → cc_transcript-0.5.0}/cc_transcript/backend.py RENAMED Viewed

File without changes

{cc_transcript-0.4.0 → cc_transcript-0.5.0}/cc_transcript/builders.py RENAMED Viewed

File without changes

{cc_transcript-0.4.0 → cc_transcript-0.5.0}/cc_transcript/filters.py RENAMED Viewed

File without changes

{cc_transcript-0.4.0 → cc_transcript-0.5.0}/cc_transcript/models.py RENAMED Viewed

File without changes

{cc_transcript-0.4.0 → cc_transcript-0.5.0}/cc_transcript/py.typed RENAMED Viewed

File without changes

{cc_transcript-0.4.0 → cc_transcript-0.5.0}/cc_transcript/rust.py RENAMED Viewed

File without changes

{cc_transcript-0.4.0 → cc_transcript-0.5.0}/cc_transcript/sentiment/__init__.py RENAMED Viewed

File without changes

{cc_transcript-0.4.0 → cc_transcript-0.5.0}/cc_transcript/sentiment/engine.py RENAMED Viewed

File without changes

{cc_transcript-0.4.0 → cc_transcript-0.5.0}/rust/data/afinn-en-165.tsv RENAMED Viewed

File without changes

{cc_transcript-0.4.0 → cc_transcript-0.5.0}/rust/data/domain_overrides.tsv RENAMED Viewed

File without changes

{cc_transcript-0.4.0 → cc_transcript-0.5.0}/rust/src/event.rs RENAMED Viewed

File without changes

{cc_transcript-0.4.0 → cc_transcript-0.5.0}/rust/src/filter.rs RENAMED Viewed

File without changes

{cc_transcript-0.4.0 → cc_transcript-0.5.0}/rust/src/lexicon.rs RENAMED Viewed

File without changes

{cc_transcript-0.4.0 → cc_transcript-0.5.0}/rust/src/model.rs RENAMED Viewed

File without changes

{cc_transcript-0.4.0 → cc_transcript-0.5.0}/rust/src/score.rs RENAMED Viewed

File without changes

{cc_transcript-0.4.0 → cc_transcript-0.5.0}/rust/src/value.rs RENAMED Viewed

File without changes

cc-transcript 0.4.0__tar.gz → 0.5.0__tar.gz

cc-transcript 0.4.0tar.gz → 0.5.0tar.gz