PyPI - cr-proc - Versions diffs - 0.1.2__py3-none-any.whl → 0.1.5__py3-none-any.whl - Mend

cr-proc 0.1.2py3-none-any.whl → 0.1.5py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (8) hide show

code_recorder_processor/api/load.py +21 -4
code_recorder_processor/api/verify.py +286 -56
code_recorder_processor/cli.py +339 -98
{cr_proc-0.1.2.dist-info → cr_proc-0.1.5.dist-info}/METADATA +2 -2
cr_proc-0.1.5.dist-info/RECORD +9 -0
cr_proc-0.1.2.dist-info/RECORD +0 -9
{cr_proc-0.1.2.dist-info → cr_proc-0.1.5.dist-info}/WHEEL +0 -0
{cr_proc-0.1.2.dist-info → cr_proc-0.1.5.dist-info}/entry_points.txt +0 -0

code_recorder_processor/api/load.py CHANGED Viewed

@@ -65,12 +65,29 @@ def load_jsonl(file: Path) -> tuple[dict[str, Any], ...]:
         if data is None:
             # If gzip stream is broken, attempt a lenient zlib decompress to salvage content.
+            # Handle multiple concatenated gzip streams (common in recordings)
             try:
                 raw = file.read_bytes()
-                dobj = zlib.decompressobj(16 + zlib.MAX_WBITS)
-                text_bytes = dobj.decompress(raw) + dobj.flush()
-                text = text_bytes.decode("utf-8", errors="replace")
-                data = _load_jsonl(StringIO(text))
+                all_text = ""
+                remaining = raw
+                # Decompress all concatenated gzip streams
+                while remaining:
+                    dobj = zlib.decompressobj(16 + zlib.MAX_WBITS)
+                    try:
+                        text_bytes = dobj.decompress(remaining) + dobj.flush()
+                        all_text += text_bytes.decode("utf-8", errors="replace")
+                        remaining = dobj.unused_data
+                        if not text_bytes or not remaining:
+                            break
+                    except Exception:
+                        # If decompression fails, try to salvage what we have
+                        break
+                if all_text:
+                    data = _load_jsonl(StringIO(all_text))
+                else:
+                    data = None
             except Exception:
                 data = None

code_recorder_processor/api/verify.py CHANGED Viewed

@@ -113,20 +113,84 @@ def template_diff(template: str, jsonData: tuple[dict[str, Any], ...]) -> str:
     return "".join(diff_iter)
-def _detect_multiline_external_pastes(jsonData: tuple[dict[str, Any], ...]) -> list[dict[str, Any]]:
+def _build_document_states(jsonData: tuple[dict[str, Any], ...]) -> tuple[list[str], set[str]]:
+    """
+    Build complete document state at each event and a whitelist of all content seen.
+    Reconstructs the document after each keystroke/edit to track what content
+    existed in the document at each point in time. This allows detectors to
+    check if pasted/autocompleted content already existed in the document.
+    Parameters
+    ----------
+    jsonData : tuple[dict[str, Any], ...]
+        The event data from the JSONL file
+    Returns
+    -------
+    tuple[list[str], set[str]]
+        - List of document states (one per event, strings of full document content)
+        - Set of all content fragments ever seen (whitelist for internal copy detection)
+    """
+    document_states = []
+    content_whitelist = set()
+    current_state = ""
+    for idx, event in enumerate(jsonData):
+        old_frag = _normalize_newlines(event.get("oldFragment", ""))
+        new_frag = _normalize_newlines(event.get("newFragment", ""))
+        offset = event.get("offset", 0)
+        # First event is the initial snapshot (template)
+        if idx == 0:
+            current_state = new_frag
+        elif new_frag != old_frag:
+            # Apply the edit to reconstruct document state
+            current_state = current_state[:offset] + new_frag + current_state[offset + len(old_frag):]
+        document_states.append(current_state)
+        # Build whitelist of all content fragments seen
+        # Add both old and new fragments to whitelist for comprehensive coverage
+        if len(old_frag) > 10:  # Ignore tiny fragments
+            content_whitelist.add(old_frag)
+        if len(new_frag) > 10:
+            content_whitelist.add(new_frag)
+        # Also add the full document state to whitelist
+        if len(current_state) > 10:
+            content_whitelist.add(current_state)
+    return document_states, content_whitelist
+def _detect_multiline_external_pastes(
+    jsonData: tuple[dict[str, Any], ...],
+    document_states: list[str],
+    content_whitelist: set[str]
+) -> list[dict[str, Any]]:
     """
     Detect multi-line copy-paste events from external sources.
     Flags newFragments that are significant in length (more than one line)
     and do not appear to be copied from within the document itself.
-    Returns a list of suspicious multi-line paste events.
+    Parameters
+    ----------
+    jsonData : tuple[dict[str, Any], ...]
+        The event data
+    document_states : list[str]
+        Full document state at each event
+    content_whitelist : set[str]
+        All content fragments ever seen in the document (for internal copy detection)
+    Returns
+    -------
+    list[dict[str, Any]]
+        List of suspicious multi-line paste events.
     """
     suspicious_events = []
-    # Build a history of all document content seen so far
-    document_history = set()
     for idx, event in enumerate(jsonData):
         old_frag = _normalize_newlines(event.get("oldFragment", ""))
         new_frag = _normalize_newlines(event.get("newFragment", ""))
@@ -140,32 +204,39 @@ def _detect_multiline_external_pastes(jsonData: tuple[dict[str, Any], ...]) -> l
         if len(new_lines) <= 2:  # Single line or line + empty
             continue
-        # Check if the new content appears to be from within the document
+        # Check if the new content already existed in the document at any prior point
         is_internal_copy = False
-        # Check if new_frag content was present in any previous fragments
-        for hist_content in document_history:
-            # Ignore tiny fragments; they appear everywhere and cause false positives
-            if len(hist_content) < 20:
-                continue
+        # Check against document state BEFORE this event
+        if idx > 0:
+            prior_state = document_states[idx - 1]
+            if new_frag in prior_state:
+                is_internal_copy = True
+        # Also check against whitelist of all content seen
+        if not is_internal_copy:
+            for hist_content in content_whitelist:
+                # Ignore tiny fragments
+                if len(hist_content) < 20:
+                    continue
-            # Require substantial overlap in size to count as an internal copy
-            similar_length = (
-                len(hist_content) >= 0.8 * len(new_frag)
-                and len(hist_content) <= 1.25 * len(new_frag)
-            )
+                # Require substantial overlap in size to count as an internal copy
+                similar_length = (
+                    len(hist_content) >= 0.8 * len(new_frag)
+                    and len(hist_content) <= 1.25 * len(new_frag)
+                )
-            if new_frag == hist_content:
-                is_internal_copy = True
-                break
+                if new_frag == hist_content:
+                    is_internal_copy = True
+                    break
-            if new_frag in hist_content and similar_length:
-                is_internal_copy = True
-                break
+                if new_frag in hist_content and similar_length:
+                    is_internal_copy = True
+                    break
-            if hist_content in new_frag and similar_length:
-                is_internal_copy = True
-                break
+                if hist_content in new_frag and similar_length:
+                    is_internal_copy = True
+                    break
         # Also check if it's in the old fragment (internal move/copy)
         if not is_internal_copy and old_frag and (new_frag in old_frag or old_frag in new_frag):
@@ -177,15 +248,9 @@ def _detect_multiline_external_pastes(jsonData: tuple[dict[str, Any], ...]) -> l
                 "line_count": len(new_lines),
                 "char_count": len(new_frag),
                 "reason": "multi-line external paste",
-                "newFragment": new_frag[:100] + ("..." if len(new_frag) > 100 else ""),
+                "newFragment": new_frag
             })
-        # Update history after analysis so the current fragment cannot mask itself
-        if len(old_frag) > 1:
-            document_history.add(old_frag)
-        if len(new_frag) > 1:
-            document_history.add(new_frag)
     return suspicious_events
@@ -262,6 +327,119 @@ def _detect_rapid_paste_sequences(jsonData: tuple[dict[str, Any], ...]) -> list[
     return suspicious_events
+def _detect_fullline_autocomplete(
+    jsonData: tuple[dict[str, Any], ...],
+    document_states: list[str],
+    content_whitelist: set[str],
+    excluded_indices: set[int]
+) -> list[dict[str, Any]]:
+    """
+    Detect full-line auto-complete events where the IDE/AI completes code.
+    At keystroke level, events show:
+    - Normal typing: oldFragment="" (empty), newFragment="X" (1 char)
+    - Auto-complete: oldFragment="" (empty), newFragment="long_text" (10+ chars)
+    Auto-complete is detected when:
+    - oldFragment is empty or very short (0-3 chars)
+    - newFragment is substantial (10+ characters)
+    - newFragment contains code structure (assignment, parens, brackets, etc.)
+    - newFragment does NOT already exist in the document state
+    - Event not already flagged as external copy-paste
+    Parameters
+    ----------
+    jsonData : tuple[dict[str, Any], ...]
+        The event data
+    document_states : list[str]
+        Full document state at each event
+    content_whitelist : set[str]
+        All content fragments ever seen in the document
+    excluded_indices : set[int]
+        Set of event indices already flagged by other detectors (to avoid double-flagging)
+    Returns
+    -------
+    list[dict[str, Any]]
+        List of suspected auto-complete events.
+    """
+    suspicious_events = []
+    for idx, event in enumerate(jsonData):
+        # Skip if already flagged by another detector
+        if idx in excluded_indices:
+            continue
+        old_frag = _normalize_newlines(event.get("oldFragment", ""))
+        new_frag = _normalize_newlines(event.get("newFragment", ""))
+        # Skip first event (template) and no-change events
+        if idx == 0 or new_frag == old_frag:
+            continue
+        old_len = len(old_frag)
+        new_len = len(new_frag)
+        # At keystroke level, oldFragment is typically empty for insertions
+        # Allow up to 3 chars for prefix-based autocomplete triggers
+        if old_len > 3:
+            continue
+        # Skip single-character additions (normal typing)
+        # Auto-complete typically adds 10+ characters at once
+        if new_len < 10:
+            continue
+        # Skip large multi-line pastes - those should be caught by multi-line paste detector
+        # Auto-complete is typically 1-2 lines and under 100 chars
+        # Anything larger is likely external copy-paste, not auto-complete
+        new_lines = new_frag.split("\n")
+        if len(new_lines) > 2 or new_len > 100:
+            continue
+        # The new fragment should not be just whitespace
+        if not new_frag.strip():
+            continue
+        # Check if the new fragment contains code structure indicators
+        # These strongly suggest IDE/AI auto-completion of code
+        code_indicators = [
+            "=",  # Assignment (most common in autocomplete)
+            "(",  # Function call/definition
+            ")",  # Closing paren
+            ":",  # Block statement (if, for, def, etc.)
+            "{",  # Dictionary/block
+            "}",  # Closing brace
+            "[",  # List/index
+            "]",  # Closing bracket
+            "=>", # Arrow function
+            ";",  # Statement end
+        ]
+        has_code_structure = any(indicator in new_frag for indicator in code_indicators)
+        # Must have code structure to be considered auto-complete
+        if has_code_structure:
+            # Check if this content already existed in the document state BEFORE this event
+            is_internal_copy = False
+            if idx > 0:
+                prior_state = document_states[idx - 1]
+                if new_frag in prior_state:
+                    is_internal_copy = True
+            if not is_internal_copy:
+                suspicious_events.append({
+                    "event_index": idx,
+                    "line_count": len(new_lines),
+                    "char_count": new_len,
+                    "reason": "full-line auto-complete",
+                    "newFragment": new_frag,
+                })
+    return suspicious_events
 def detect_external_copypaste(jsonData: tuple[dict[str, Any], ...]) -> list[dict[str, Any]]:
     """
     Detect copy-paste events from external sources and AI-assisted coding patterns.
@@ -269,16 +447,64 @@ def detect_external_copypaste(jsonData: tuple[dict[str, Any], ...]) -> list[dict
     Combines detection of:
     1. Multi-line external paste events (content not from within document)
     2. Rapid one-line paste sequences (potential AI assistance indicator)
+    3. Full-line auto-complete events (user types, AI completes the line)
+    Detection order matters: events flagged by earlier detectors are excluded
+    from later detectors to avoid double-flagging.
-    Returns a list of all suspicious events with metadata.
+    Returns a list of all suspicious events with metadata, including aggregate statistics.
     """
     suspicious_events = []
-    # Detect multi-line external pastes
-    suspicious_events.extend(_detect_multiline_external_pastes(jsonData))
+    # Build shared document state tracking
+    # This reconstructs the full document at each event and creates a whitelist
+    # of all content that has ever appeared in the document
+    document_states, content_whitelist = _build_document_states(jsonData)
+    # Step 1: Detect multi-line external pastes
+    multiline_events = _detect_multiline_external_pastes(jsonData, document_states, content_whitelist)
+    suspicious_events.extend(multiline_events)
+    # Step 2: Detect rapid one-line paste sequences (AI indicator)
+    rapid_paste_events = _detect_rapid_paste_sequences(jsonData)
+    suspicious_events.extend(rapid_paste_events)
+    # Build set of all event indices already flagged
+    excluded_indices = set()
+    for event in multiline_events:
+        # Handle both single events and clusters
+        if "event_indices" in event:
+            excluded_indices.update(event["event_indices"])
+        else:
+            excluded_indices.add(event["event_index"])
+    for event in rapid_paste_events:
+        if "event_indices" in event:
+            excluded_indices.update(event["event_indices"])
+        else:
+            excluded_indices.add(event["event_index"])
+    # Step 3: Detect full-line auto-complete events (excluding already-flagged events)
+    autocomplete_events = _detect_fullline_autocomplete(
+        jsonData, document_states, content_whitelist, excluded_indices
+    )
-    # Detect rapid one-line paste sequences (AI indicator)
-    suspicious_events.extend(_detect_rapid_paste_sequences(jsonData))
+    # Calculate aggregate statistics for auto-complete/small paste events
+    # Store individual events for optional detailed review, but don't report them by default
+    if autocomplete_events:
+        total_autocomplete_chars = sum(ev["char_count"] for ev in autocomplete_events)
+        total_autocomplete_events = len(autocomplete_events)
+        # Always add aggregate summary, never individual events
+        # Store individual events in the aggregate for optional detailed review
+        suspicious_events.append({
+            "event_index": -1,  # Special marker for aggregate
+            "event_count": total_autocomplete_events,
+            "total_chars": total_autocomplete_chars,
+            "reason": "aggregate auto-complete/small paste activity",
+            "newFragment": f"{total_autocomplete_events} auto-complete events ({total_autocomplete_chars} total chars)",
+            "detailed_events": autocomplete_events,  # Store for optional review
+        })
     return suspicious_events
@@ -311,12 +537,15 @@ def check_time_limit(jsonData: tuple[dict[str, Any], ...], time_limit_minutes: i
     def parse_ts(ts_str: str) -> datetime:
         return datetime.fromisoformat(ts_str.replace("Z", "+00:00"))
-    # Identify session boundaries: sessions start at indices where oldFragment == newFragment (non-empty)
+    # Identify session boundaries: sessions start at indices where offset == 0
+    # (indicating file reopen/recording restart) and oldFragment == newFragment (initial snapshot)
     session_starts = [0]  # First session always starts at index 0
     for idx in range(1, len(jsonData)):
+        offset = jsonData[idx].get("offset", -1)
         old_frag = jsonData[idx].get("oldFragment", "")
         new_frag = jsonData[idx].get("newFragment", "")
-        if old_frag == new_frag and old_frag.strip() != "":
+        # Session boundary: offset is 0 and it's an initial snapshot (old == new, non-empty)
+        if offset == 0 and old_frag == new_frag and old_frag.strip() != "":
             session_starts.append(idx)
     # Add sentinel to mark end of last session
@@ -344,25 +573,26 @@ def check_time_limit(jsonData: tuple[dict[str, Any], ...], time_limit_minutes: i
         session_end = session_starts[i + 1]
         # Find first and last events with timestamps in this session
-        first_event = None
-        last_event = None
-        for event in jsonData[session_start:session_end]:
-            if event.get("timestamp"):
-                if first_event is None:
-                    first_event = event
-                last_event = event
+        first_event_time = None
+        last_event_time = None
+        for idx in range(session_start, session_end):
+            event = jsonData[idx]
+            timestamp = event.get("timestamp")
+            if timestamp:
+                try:
+                    event_time = parse_ts(timestamp)
+                    if first_event_time is None:
+                        first_event_time = event_time
+                    last_event_time = event_time
+                except (ValueError, KeyError):
+                    # Skip events with invalid timestamps
+                    continue
         # If this session has timestamped events, add its elapsed time
-        if first_event is not None and last_event is not None:
-            try:
-                first_time = parse_ts(first_event["timestamp"])
-                last_time = parse_ts(last_event["timestamp"])
-                session_diff = last_time - first_time
-                total_minutes_elapsed += session_diff.total_seconds() / 60
-            except (ValueError, KeyError):
-                # Timestamp parsing failed for this session, skip it
-                continue
+        if first_event_time is not None and last_event_time is not None:
+            session_diff = last_event_time - first_event_time
+            total_minutes_elapsed += session_diff.total_seconds() / 60
     # For time limit check, use the span from first to last timestamp overall
     try:

code_recorder_processor/cli.py CHANGED Viewed

@@ -1,14 +1,280 @@
 import argparse
-import sys
 import json
+import sys
 from datetime import datetime
 from pathlib import Path
-from .api.load import load_jsonl
-from .api.verify import verify, template_diff, check_time_limit
+from typing import Any
 from .api.build import reconstruct_file_from_events
+from .api.load import load_jsonl
+from .api.verify import check_time_limit, template_diff, verify
+def resolve_document(
+    docs: list[str], template_path: Path, override: str | None
+) -> str | None:
+    """
+    Determine which document from the recording to process.
+    Parameters
+    ----------
+    docs : list[str]
+        List of document paths found in the recording
+    template_path : Path
+        Path to the template file
+    override : str | None
+        Explicit document name or path override
+    Returns
+    -------
+    str | None
+        The resolved document path, or None if no documents exist
+    Raises
+    ------
+    ValueError
+        If document resolution is ambiguous or the override doesn't match
+    """
+    if not docs:
+        return None
+    if override:
+        matches = [
+            d for d in docs if d.endswith(override) or Path(d).name == override
+        ]
+        if not matches:
+            raise ValueError(
+                f"No document in recording matches '{override}'. Available: {docs}"
+            )
+        if len(matches) > 1:
+            raise ValueError(
+                f"Ambiguous document override '{override}'. Matches: {matches}"
+            )
+        return matches[0]
+    template_ext = template_path.suffix
+    ext_matches = [d for d in docs if Path(d).suffix == template_ext]
+    if len(ext_matches) == 1:
+        return ext_matches[0]
+    if len(ext_matches) > 1:
+        raise ValueError(
+            f"Multiple documents share extension '{template_ext}': {ext_matches}. "
+            "Use --document to choose one."
+        )
+    if len(docs) == 1:
+        return docs[0]
+    raise ValueError(
+        "Could not determine document to process. Use --document to select one. "
+        f"Available documents: {docs}"
+    )
+def get_recorded_documents(events: tuple[dict[str, Any], ...]) -> list[str]:
+    """
+    Extract unique document paths from recording events.
+    Parameters
+    ----------
+    events : tuple[dict[str, Any], ...]
+        Recording events loaded from JSONL
+    Returns
+    -------
+    list[str]
+        Sorted list of unique document paths
+    """
+    documents = {
+        e.get("document")
+        for e in events
+        if "document" in e and e.get("document") is not None
+    }
+    return sorted([d for d in documents if d is not None])
+def filter_events_by_document(
+    events: tuple[dict[str, Any], ...], document: str | None
+) -> tuple[dict[str, Any], ...]:
+    """
+    Filter events to only those for a specific document.
+    Parameters
+    ----------
+    events : tuple[dict[str, Any], ...]
+        All recording events
+    document : str | None
+        Document path to filter by, or None to return all events
+    Returns
+    -------
+    tuple[dict[str, Any], ...]
+        Filtered events
+    """
+    if document:
+        return tuple(e for e in events if e.get("document") == document)
+    return events
+def display_time_info(time_info: dict[str, Any] | None) -> None:
+    """
+    Display elapsed time and time limit information.
+    Parameters
+    ----------
+    time_info : dict[str, Any] | None
+        Time information from check_time_limit, or None if no time data
+    """
+    if not time_info:
+        return
+    print(
+        f"Elapsed editing time: {time_info['minutes_elapsed']} minutes",
+        file=sys.stderr,
+    )
+    first_ts = datetime.fromisoformat(
+        time_info["first_timestamp"].replace("Z", "+00:00")
+    )
+    last_ts = datetime.fromisoformat(
+        time_info["last_timestamp"].replace("Z", "+00:00")
+    )
+    time_span = (last_ts - first_ts).total_seconds() / 60
+    print(f"Time span (first to last edit): {time_span:.2f} minutes", file=sys.stderr)
+    if time_info["exceeds_limit"]:
+        print("\nTime limit exceeded!", file=sys.stderr)
+        print(f"  Limit: {time_info['time_limit_minutes']} minutes", file=sys.stderr)
+        print(f"  First edit: {time_info['first_timestamp']}", file=sys.stderr)
+        print(f"  Last edit: {time_info['last_timestamp']}", file=sys.stderr)
+def display_suspicious_event(event: dict[str, Any], show_details: bool) -> None:
+    """
+    Display a single suspicious event.
+    Parameters
+    ----------
+    event : dict[str, Any]
+        Suspicious event data
+    show_details : bool
+        Whether to show detailed autocomplete events
+    """
+    reason = event.get("reason", "unknown")
+    # Handle aggregate auto-complete events
+    if event.get("event_index") == -1 and "detailed_events" in event:
+        event_count = event["event_count"]
+        total_chars = event["total_chars"]
+        print(
+            f"  Aggregate: {event_count} auto-complete/small paste events "
+            f"({total_chars} total chars)",
+            file=sys.stderr,
+        )
+        if show_details:
+            print("    Detailed events:", file=sys.stderr)
+            for detail in event["detailed_events"]:
+                detail_idx = detail["event_index"]
+                detail_lines = detail["line_count"]
+                detail_chars = detail["char_count"]
+                detail_frag = detail["newFragment"]
+                print(
+                    f"      Event #{detail_idx}: {detail_lines} lines, "
+                    f"{detail_chars} chars",
+                    file=sys.stderr,
+                )
+                print("        ```", file=sys.stderr)
+                for line in detail_frag.split("\n"):
+                    print(f"        {line}", file=sys.stderr)
+                print("        ```", file=sys.stderr)
+    elif "event_indices" in event:
+        indices = event.get("event_indices", [event["event_index"]])
+        print(
+            f"  Events #{indices[0]}-#{indices[-1]} ({reason}): "
+            f"{event['line_count']} lines, {event['char_count']} chars",
+            file=sys.stderr,
+        )
+    else:
+        new_fragment = event["newFragment"].replace("\n", "\n    ")
+        print(
+            f"  Event #{event['event_index']} ({reason}): "
+            f"{event['line_count']} lines, {event['char_count']} chars - "
+            f"newFragment:\n    ```\n    {new_fragment}\n    ```",
+            file=sys.stderr,
+        )
+def display_suspicious_events(
+    suspicious_events: list[dict[str, Any]], show_details: bool
+) -> None:
+    """
+    Display all suspicious events or success message.
+    Parameters
+    ----------
+    suspicious_events : list[dict[str, Any]]
+        List of suspicious events detected
+    show_details : bool
+        Whether to show detailed autocomplete events
+    """
+    if suspicious_events:
+        print("\nSuspicious copy-paste events detected:", file=sys.stderr)
+        for event in suspicious_events:
+            display_suspicious_event(event, show_details)
+    else:
+        print("Success! No suspicious events detected.", file=sys.stderr)
-def main():
+def write_json_output(
+    output_path: Path,
+    document: str,
+    time_info: dict[str, Any] | None,
+    suspicious_events: list[dict[str, Any]],
+) -> None:
+    """
+    Write verification results to JSON file.
+    Parameters
+    ----------
+    output_path : Path
+        Path to output JSON file
+    document : str
+        Document that was processed
+    time_info : dict[str, Any] | None
+        Time information from verification
+    suspicious_events : list[dict[str, Any]]
+        List of suspicious events detected
+    Raises
+    ------
+    Exception
+        If file writing fails
+    """
+    results = {
+        "document": document,
+        "time_info": time_info,
+        "suspicious_events": suspicious_events,
+    }
+    output_path.parent.mkdir(parents=True, exist_ok=True)
+    with open(output_path, "w") as f:
+        json.dump(results, f, indent=2)
+    print(f"Results written to {output_path}", file=sys.stderr)
+def create_parser() -> argparse.ArgumentParser:
+    """
+    Create and configure the argument parser.
+    Returns
+    -------
+    argparse.ArgumentParser
+        Configured argument parser
+    """
     parser = argparse.ArgumentParser(
         description="Process and verify code recorder JSONL files"
     )
@@ -23,30 +289,54 @@ def main():
         help="Path to the initial template file that was recorded",
     )
     parser.add_argument(
+        "-t",
         "--time-limit",
         type=int,
         default=None,
-        help="Maximum allowed time in minutes between first and last edit. If exceeded, recording is flagged.",
+        help="Maximum allowed time in minutes between first and last edit. "
+        "If exceeded, recording is flagged.",
     )
     parser.add_argument(
+        "-d",
         "--document",
         type=str,
         default=None,
-        help=("Document path or filename to process from the recording. "
-              "Defaults to the document whose extension matches the template file."),
+        help="Document path or filename to process from the recording. "
+        "Defaults to the document whose extension matches the template file.",
     )
     parser.add_argument(
+        "-o",
         "--output-json",
         type=Path,
         default=None,
-        help="Path to output JSON file with verification results (time info and suspicious events).",
+        help="Path to output JSON file with verification results "
+        "(time info and suspicious events).",
     )
+    parser.add_argument(
+        "-s",
+        "--show-autocomplete-details",
+        action="store_true",
+        help="Show individual auto-complete events in addition to "
+        "aggregate statistics",
+    )
+    return parser
+def main() -> int:
+    """
+    Main entry point for the CLI application.
+    Returns
+    -------
+    int
+        Exit code (0 for success, 1 for errors)
+    """
+    parser = create_parser()
     args = parser.parse_args()
-    # Load JSONL file first to get document path
+    # Load JSONL file
     try:
-        jsonData = load_jsonl(args.jsonl_file)
+        json_data = load_jsonl(args.jsonl_file)
     except FileNotFoundError as e:
         print(f"Error: {e}", file=sys.stderr)
         return 1
@@ -54,123 +344,72 @@ def main():
         print(f"Error loading JSONL file: {e}", file=sys.stderr)
         return 1
-    # Decide which recorded document to process
-    documents = {e.get("document") for e in jsonData if "document" in e and e.get("document") is not None}
-    recorded_docs = sorted([d for d in documents if d is not None])
-    def resolve_document(docs: list[str], template_path: Path, override: str | None) -> str | None:
-        if not docs:
-            return None
-        if override:
-            matches = [d for d in docs if d.endswith(override) or Path(d).name == override]
-            if not matches:
-                raise ValueError(
-                    f"No document in recording matches '{override}'. Available: {docs}"
-                )
-            if len(matches) > 1:
-                raise ValueError(
-                    f"Ambiguous document override '{override}'. Matches: {matches}"
-                )
-            return matches[0]
-        template_ext = template_path.suffix
-        ext_matches = [d for d in docs if Path(d).suffix == template_ext]
-        if len(ext_matches) == 1:
-            return ext_matches[0]
-        if len(ext_matches) > 1:
-            raise ValueError(
-                f"Multiple documents share extension '{template_ext}': {ext_matches}. "
-                "Use --document to choose one."
-            )
-        if len(docs) == 1:
-            return docs[0]
-        raise ValueError(
-            "Could not determine document to process. Use --document to select one. "
-            f"Available documents: {docs}"
-        )
+    # Resolve which document to process
+    recorded_docs = get_recorded_documents(json_data)
     try:
-        target_document = resolve_document(recorded_docs, args.template_file, args.document)
+        target_document = resolve_document(
+            recorded_docs, args.template_file, args.document
+        )
     except ValueError as e:
         print(f"Error determining document: {e}", file=sys.stderr)
         return 1
-    if target_document:
-        doc_events = tuple(e for e in jsonData if e.get("document") == target_document)
-        if not doc_events:
-            print(f"Error: No events found for document '{target_document}'", file=sys.stderr)
-            return 1
-    else:
-        doc_events = jsonData
+    # Filter events for target document
+    doc_events = filter_events_by_document(json_data, target_document)
+    if target_document and not doc_events:
+        print(
+            f"Error: No events found for document '{target_document}'",
+            file=sys.stderr,
+        )
+        return 1
     print(f"Processing: {target_document or args.template_file}", file=sys.stderr)
     # Read template file
     try:
-        templateData = args.template_file.read_text()
+        template_data = args.template_file.read_text()
     except FileNotFoundError:
-        print(f"Error: Template file not found: {args.template_file}", file=sys.stderr)
+        print(
+            f"Error: Template file not found: {args.template_file}", file=sys.stderr
+        )
         return 1
     except Exception as e:
         print(f"Error reading template file: {e}", file=sys.stderr)
         return 1
-    # Check time limit and display elapsed time
+    # Check and display time information
     time_info = check_time_limit(doc_events, args.time_limit)
-    if time_info:
-        print(f"Elapsed editing time: {time_info['minutes_elapsed']} minutes", file=sys.stderr)
-        print(f"Time span (first to last edit): {(datetime.fromisoformat(time_info['last_timestamp'].replace('Z', '+00:00')) - datetime.fromisoformat(time_info['first_timestamp'].replace('Z', '+00:00'))).total_seconds() / 60:.2f} minutes", file=sys.stderr)
-        if time_info['exceeds_limit']:
-            print(f"\nTime limit exceeded!", file=sys.stderr)
-            print(f"  Limit: {time_info['time_limit_minutes']} minutes", file=sys.stderr)
-            print(f"  First edit: {time_info['first_timestamp']}", file=sys.stderr)
-            print(f"  Last edit: {time_info['last_timestamp']}", file=sys.stderr)
-    # Verify and process
+    display_time_info(time_info)
+    # Verify and process the recording
     try:
-        templateData, suspicious_events = verify(templateData, doc_events)
-        print(reconstruct_file_from_events(doc_events, templateData, document_path=target_document))
-        # Prepare results for JSON output
-        results = {
-            "document": target_document or str(args.template_file),
-            "time_info": time_info,
-            "suspicious_events": suspicious_events,
-        }
-        if suspicious_events:
-            print("\nSuspicious copy-paste events detected:", file=sys.stderr)
-            for ev in suspicious_events:
-                reason = ev.get('reason', 'unknown')
-                indices = ev.get('event_indices', [ev['event_index']])
-                if len(indices) > 1:
-                    print(f"  Events #{indices[0]}-#{indices[-1]} ({reason}): "
-                          f"{ev['line_count']} lines, {ev['char_count']} chars", file=sys.stderr)
-                else:
-                    print(f"  Event #{ev['event_index']} ({reason}): "
-                          f"{ev['line_count']} lines, {ev['char_count']} chars - "
-                          f"newFragment:\n```\n{ev['newFragment']}\n```", file=sys.stderr)
-        else:
-            print("Success! No suspicious events detected.", file=sys.stderr)
+        template_data, suspicious_events = verify(template_data, doc_events)
+        reconstructed = reconstruct_file_from_events(
+            doc_events, template_data, document_path=target_document
+        )
+        print(reconstructed)
+        # Display suspicious events
+        display_suspicious_events(suspicious_events, args.show_autocomplete_details)
         # Write JSON output if requested
         if args.output_json:
             try:
-                args.output_json.parent.mkdir(parents=True, exist_ok=True)
-                with open(args.output_json, 'w') as f:
-                    json.dump(results, f, indent=2)
-                print(f"Results written to {args.output_json}", file=sys.stderr)
+                write_json_output(
+                    args.output_json,
+                    target_document or str(args.template_file),
+                    time_info,
+                    suspicious_events,
+                )
             except Exception as e:
                 print(f"Error writing JSON output: {e}", file=sys.stderr)
                 return 1
     except ValueError as e:
         print("File failed verification from template!", file=sys.stderr)
         print(str(e), file=sys.stderr)
         try:
-            print(template_diff(templateData, doc_events), file=sys.stderr)
+            print(template_diff(template_data, doc_events), file=sys.stderr)
         except Exception:
             pass
         return 1
@@ -178,6 +417,8 @@ def main():
         print(f"Error processing file: {type(e).__name__}: {e}", file=sys.stderr)
         return 1
+    return 0
 if __name__ == "__main__":
     sys.exit(main())

{cr_proc-0.1.2.dist-info → cr_proc-0.1.5.dist-info}/METADATA RENAMED Viewed

@@ -1,7 +1,7 @@
 Metadata-Version: 2.4
 Name: cr_proc
-Version: 0.1.2
-Summary: A tool for processing BYU CS code recording files
+Version: 0.1.5
+Summary: A tool for processing BYU CS code recording files.
 Author: Ethan Dye
 Author-email: mrtops03@gmail.com
 Requires-Python: >=3.14

cr_proc-0.1.5.dist-info/RECORD ADDED Viewed

@@ -0,0 +1,9 @@
+code_recorder_processor/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
+code_recorder_processor/api/build.py,sha256=-EMg0w-llblJ_N_vs_B1kOsAOwiV-TYetAXiOY6PcWs,7643
+code_recorder_processor/api/load.py,sha256=ZKoheLsEoGJ3fpAtPauoeEyNUhGLhUYSwjRsqt1m-TI,3947
+code_recorder_processor/api/verify.py,sha256=2XLWr39g3jqjzQhpx82R_lx7FCYrdQjj8VRd9TTRM_8,23266
+code_recorder_processor/cli.py,sha256=OcoKaJ5SV2iY8bExpiagagQMPtIlFYMUcL8nMtjG13g,12530
+cr_proc-0.1.5.dist-info/METADATA,sha256=wyQbPvVGSkLrzERm3j3Xy_WrhJhEGpWeLCn382kBT4g,4070
+cr_proc-0.1.5.dist-info/WHEEL,sha256=3ny-bZhpXrU6vSQ1UPG34FoxZBp3lVcvK0LkgUz6VLk,88
+cr_proc-0.1.5.dist-info/entry_points.txt,sha256=xb5dPAAWN1Z9NUHpvZgNakaslR1MVOERf_IfpG_M04M,77
+cr_proc-0.1.5.dist-info/RECORD,,

cr_proc-0.1.2.dist-info/RECORD DELETED Viewed

@@ -1,9 +0,0 @@
-code_recorder_processor/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
-code_recorder_processor/api/build.py,sha256=-EMg0w-llblJ_N_vs_B1kOsAOwiV-TYetAXiOY6PcWs,7643
-code_recorder_processor/api/load.py,sha256=lkEPnQi3Q_91GOTImk4H380F-uKJPszeX3FJJWM4CIA,3272
-code_recorder_processor/api/verify.py,sha256=byW4fyW_gLkFq4rLvWut2Cvj_ds5Cj_MUFrhlhOrucY,14327
-code_recorder_processor/cli.py,sha256=sKm9f06NEZ3psw-HEShlHt4grVZvRmNEG33yvxhIIQQ,7154
-cr_proc-0.1.2.dist-info/METADATA,sha256=A60JkKqmku5ZO-hPjhSDlPqkqI55gpQj_2UZNs3ZlXg,4069
-cr_proc-0.1.2.dist-info/WHEEL,sha256=3ny-bZhpXrU6vSQ1UPG34FoxZBp3lVcvK0LkgUz6VLk,88
-cr_proc-0.1.2.dist-info/entry_points.txt,sha256=xb5dPAAWN1Z9NUHpvZgNakaslR1MVOERf_IfpG_M04M,77
-cr_proc-0.1.2.dist-info/RECORD,,

{cr_proc-0.1.2.dist-info → cr_proc-0.1.5.dist-info}/WHEEL RENAMED Viewed

File without changes

{cr_proc-0.1.2.dist-info → cr_proc-0.1.5.dist-info}/entry_points.txt RENAMED Viewed

File without changes

cr-proc 0.1.2__py3-none-any.whl → 0.1.5__py3-none-any.whl

cr-proc 0.1.2py3-none-any.whl → 0.1.5py3-none-any.whl