PyPI - cr-proc - Versions diffs - 0.1.8__py3-none-any.whl → 0.1.10__py3-none-any.whl - Mend

cr-proc 0.1.8py3-none-any.whl → 0.1.10py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (14) hide show

code_recorder_processor/api/build.py +6 -0
code_recorder_processor/api/document.py +337 -0
code_recorder_processor/api/load.py +58 -0
code_recorder_processor/api/output.py +70 -0
code_recorder_processor/api/verify.py +171 -32
code_recorder_processor/cli.py +514 -494
code_recorder_processor/display.py +201 -0
code_recorder_processor/playback.py +116 -0
cr_proc-0.1.10.dist-info/METADATA +280 -0
cr_proc-0.1.10.dist-info/RECORD +13 -0
cr_proc-0.1.8.dist-info/METADATA +0 -142
cr_proc-0.1.8.dist-info/RECORD +0 -9
{cr_proc-0.1.8.dist-info → cr_proc-0.1.10.dist-info}/WHEEL +0 -0
{cr_proc-0.1.8.dist-info → cr_proc-0.1.10.dist-info}/entry_points.txt +0 -0

code_recorder_processor/cli.py CHANGED Viewed

@@ -1,491 +1,500 @@
+"""Command-line interface for code recorder processor."""
 import argparse
-import json
-import os
+import glob
 import sys
-import time
-from datetime import datetime
 from pathlib import Path
 from typing import Any
 from .api.build import reconstruct_file_from_events
+from .api.document import (
+    filter_events_by_document,
+    get_recorded_documents,
+    resolve_document,
+    resolve_template_file,
+    find_matching_template,
+)
 from .api.load import load_jsonl
-from .api.verify import check_time_limit, template_diff, verify
+from .api.output import write_batch_json_output
+from .api.verify import (
+    check_time_limit,
+    combine_time_info,
+    detect_external_copypaste,
+    template_diff,
+    verify,
+)
+from .display import (
+    display_suspicious_events,
+    display_template_diff,
+    display_time_info,
+    print_batch_header,
+    print_batch_summary,
+)
+from .playback import playback_recording
-def resolve_document(
-    docs: list[str], template_path: Path, override: str | None
-) -> str | None:
+def create_parser() -> argparse.ArgumentParser:
     """
-    Determine which document from the recording to process.
-    Parameters
-    ----------
-    docs : list[str]
-        List of document paths found in the recording
-    template_path : Path
-        Path to the template file
-    override : str | None
-        Explicit document name or path override
+    Create and configure the argument parser.
     Returns
     -------
-    str | None
-        The resolved document path, or None if no documents exist
-    Raises
-    ------
-    ValueError
-        If document resolution is ambiguous or the override doesn't match
+    argparse.ArgumentParser
+        Configured argument parser
     """
-    if not docs:
-        return None
-    if override:
-        matches = [
-            d for d in docs if d.endswith(override) or Path(d).name == override
-        ]
-        if not matches:
-            raise ValueError(
-                f"No document in recording matches '{override}'. Available: {docs}"
-            )
-        if len(matches) > 1:
-            raise ValueError(
-                f"Ambiguous document override '{override}'. Matches: {matches}"
-            )
-        return matches[0]
-    template_ext = template_path.suffix
-    ext_matches = [d for d in docs if Path(d).suffix == template_ext]
-    if len(ext_matches) == 1:
-        return ext_matches[0]
-    if len(ext_matches) > 1:
-        raise ValueError(
-            f"Multiple documents share extension '{template_ext}': {ext_matches}. "
-            "Use --document to choose one."
-        )
-    if len(docs) == 1:
-        return docs[0]
-    raise ValueError(
-        "Could not determine document to process. Use --document to select one. "
-        f"Available documents: {docs}"
+    parser = argparse.ArgumentParser(
+        description="Process and verify code recorder JSONL files"
     )
+    parser.add_argument(
+        "files",
+        type=str,
+        nargs="+",
+        help="Path(s) to JSONL file(s) and optionally a template file. "
+        "JSONL files: compressed JSONL file(s) (*.recording.jsonl.gz). "
+        "Supports glob patterns like 'recordings/*.jsonl.gz'. "
+        "Template file (optional last positional): template file path. "
+        "Omit to use --template-dir instead.",
+    )
+    parser.add_argument(
+        "--template-dir",
+        type=Path,
+        default=None,
+        help="Directory containing template files (overrides positional template file). "
+        "Will search for files matching the document name. "
+        "If no match found, reconstruction proceeds with warning.",
+    )
+    parser.add_argument(
+        "-t",
+        "--time-limit",
+        type=int,
+        default=None,
+        help="Maximum allowed time in minutes between first and last edit. "
+        "If exceeded, recording is flagged. Applied individually to each recording file.",
+    )
+    parser.add_argument(
+        "-d",
+        "--document",
+        type=str,
+        default=None,
+        help="Document path or filename to process from the recording. "
+        "Defaults to the document whose extension matches the template file.",
+    )
+    parser.add_argument(
+        "-o",
+        "--output-json",
+        type=Path,
+        default=None,
+        help="Path to output JSON file with verification results. "
+        "Uses consistent format for both single and batch modes, with batch_mode flag. "
+        "In batch mode, includes combined_time_info across all files.",
+    )
+    parser.add_argument(
+        "-f",
+        "--output-file",
+        type=Path,
+        default=None,
+        help="Write reconstructed code to specified file instead of stdout. "
+        "In batch mode, this should be a directory where files will be named after the input files.",
+    )
+    parser.add_argument(
+        "--output-dir",
+        type=Path,
+        default=None,
+        help="Directory to write reconstructed code files in batch mode (one file per recording). "
+        "Files are named based on input recording filenames.",
+    )
+    parser.add_argument(
+        "-s",
+        "--show-autocomplete-details",
+        action="store_true",
+        help="Show individual auto-complete events in addition to "
+        "aggregate statistics",
+    )
+    parser.add_argument(
+        "-p",
+        "--playback",
+        action="store_true",
+        help="Play back the recording in real-time, showing code evolution",
+    )
+    parser.add_argument(
+        "--playback-speed",
+        type=float,
+        default=1.0,
+        help="Playback speed multiplier (1.0 = real-time, 2.0 = 2x speed, 0.5 = half speed)",
+    )
+    return parser
-def get_recorded_documents(events: tuple[dict[str, Any], ...]) -> list[str]:
-    """
-    Extract unique document paths from recording events.
-    Parameters
-    ----------
-    events : tuple[dict[str, Any], ...]
-        Recording events loaded from JSONL
-    Returns
-    -------
-    list[str]
-        Sorted list of unique document paths
-    """
-    documents = {
-        e.get("document")
-        for e in events
-        if "document" in e and e.get("document") is not None
-    }
-    return sorted([d for d in documents if d is not None])
-def filter_events_by_document(
-    events: tuple[dict[str, Any], ...], document: str | None
-) -> tuple[dict[str, Any], ...]:
+def expand_file_patterns(patterns: list[str]) -> list[Path]:
     """
-    Filter events to only those for a specific document.
+    Expand glob patterns and validate files exist.
     Parameters
     ----------
-    events : tuple[dict[str, Any], ...]
-        All recording events
-    document : str | None
-        Document path to filter by, or None to return all events
+    patterns : list[str]
+        List of file paths or glob patterns
     Returns
     -------
-    tuple[dict[str, Any], ...]
-        Filtered events
-    """
-    if document:
-        return tuple(e for e in events if e.get("document") == document)
-    return events
+    list[Path]
+        List of existing file paths
-def display_time_info(time_info: dict[str, Any] | None) -> None:
+    Raises
+    ------
+    FileNotFoundError
+        If no files are found
     """
-    Display elapsed time and time limit information.
+    jsonl_files = []
+    for pattern in patterns:
+        expanded = glob.glob(pattern)
+        if expanded:
+            jsonl_files.extend([Path(f) for f in expanded])
+        else:
+            # If no glob match, treat as literal path
+            jsonl_files.append(Path(pattern))
-    Parameters
-    ----------
-    time_info : dict[str, Any] | None
-        Time information from check_time_limit, or None if no time data
-    """
-    if not time_info:
-        return
+    if not jsonl_files:
+        raise FileNotFoundError("No JSONL files found")
-    print(
-        f"Elapsed editing time: {time_info['minutes_elapsed']} minutes",
-        file=sys.stderr,
-    )
+    # Check if files exist
+    existing_files = [f for f in jsonl_files if f.exists()]
+    if not existing_files:
+        raise FileNotFoundError("None of the specified files exist")
-    first_ts = datetime.fromisoformat(
-        time_info["first_timestamp"].replace("Z", "+00:00")
-    )
-    last_ts = datetime.fromisoformat(
-        time_info["last_timestamp"].replace("Z", "+00:00")
-    )
-    time_span = (last_ts - first_ts).total_seconds() / 60
-    print(f"Time span (first to last edit): {time_span:.2f} minutes", file=sys.stderr)
+    # Warn about missing files
+    if len(existing_files) < len(jsonl_files):
+        missing = [f for f in jsonl_files if f not in existing_files]
+        for f in missing:
+            print(f"Warning: File not found: {f}", file=sys.stderr)
-    if time_info["exceeds_limit"]:
-        print("\nTime limit exceeded!", file=sys.stderr)
-        print(f"  Limit: {time_info['time_limit_minutes']} minutes", file=sys.stderr)
-        print(f"  First edit: {time_info['first_timestamp']}", file=sys.stderr)
-        print(f"  Last edit: {time_info['last_timestamp']}", file=sys.stderr)
+    return existing_files
-def display_suspicious_event(event: dict[str, Any], show_details: bool) -> None:
+def process_single_file(
+    jsonl_path: Path,
+    template_data: str,
+    target_document: str | None,
+    time_limit: int | None,
+) -> tuple[bool, str, list[dict[str, Any]], dict[str, Any] | None, str]:
     """
-    Display a single suspicious event.
+    Process a single JSONL recording file.
     Parameters
     ----------
-    event : dict[str, Any]
-        Suspicious event data
-    show_details : bool
-        Whether to show detailed autocomplete events
+    jsonl_path : Path
+        Path to the JSONL file
+    template_data : str
+        Template file content
+    target_document : str | None
+        Document to process
+    time_limit : int | None
+        Time limit in minutes
+    Returns
+    -------
+    tuple
+        (verified, reconstructed_code, suspicious_events, time_info, template_diff_text)
     """
-    reason = event.get("reason", "unknown")
+    try:
+        json_data = load_jsonl(jsonl_path)
+    except (FileNotFoundError, ValueError, IOError) as e:
+        print(f"Error loading {jsonl_path}: {e}", file=sys.stderr)
+        return False, "", [], None, ""
-    # Handle aggregate auto-complete events
-    if event.get("event_index") == -1 and "detailed_events" in event:
-        event_count = event["event_count"]
-        total_chars = event["total_chars"]
+    # Filter events for target document
+    doc_events = filter_events_by_document(json_data, target_document)
+    if target_document and not doc_events:
         print(
-            f"  Aggregate: {event_count} auto-complete/small paste events "
-            f"({total_chars} total chars)",
+            f"Warning: No events found for document '{target_document}' in {jsonl_path}",
             file=sys.stderr,
         )
+        return False, "", [], None, ""
-        if show_details:
-            print("    Detailed events:", file=sys.stderr)
-            for detail in event["detailed_events"]:
-                detail_idx = detail["event_index"]
-                detail_lines = detail["line_count"]
-                detail_chars = detail["char_count"]
-                detail_frag = detail["newFragment"]
-                print(
-                    f"      Event #{detail_idx}: {detail_lines} lines, "
-                    f"{detail_chars} chars",
-                    file=sys.stderr,
-                )
-                print("        ```", file=sys.stderr)
-                for line in detail_frag.split("\n"):
-                    print(f"        {line}", file=sys.stderr)
-                print("        ```", file=sys.stderr)
-    elif "event_indices" in event and reason == "rapid one-line pastes (AI indicator)":
-        # Rapid paste sequences (AI indicator) - show aggregate style
-        indices = event["event_indices"]
-        print(
-            f"  AI Rapid Paste: Events #{indices[0]}-#{indices[-1]} "
-            f"({event['line_count']} lines, {event['char_count']} chars, "
-            f"{len(indices)} events in < 1 second)",
-            file=sys.stderr,
-        )
+    # Check time information
+    time_info = check_time_limit(doc_events, time_limit)
-        if show_details and "detailed_events" in event:
-            # Combine all detailed events into one block
-            combined_content = "".join(
-                detail["newFragment"] for detail in event["detailed_events"]
-            )
-            print("    Combined output:", file=sys.stderr)
-            print("        ```", file=sys.stderr)
-            for line in combined_content.split("\n"):
-                print(f"        {line}", file=sys.stderr)
-            print("        ```", file=sys.stderr)
-    elif "event_indices" in event:
-        # Other multi-event clusters
-        indices = event.get("event_indices", [event["event_index"]])
-        print(
-            f"  Events #{indices[0]}-#{indices[-1]} ({reason}): "
-            f"{event['line_count']} lines, {event['char_count']} chars",
-            file=sys.stderr,
+    # Verify and process the recording
+    try:
+        verified_template, suspicious_events = verify(template_data, doc_events)
+        reconstructed = reconstruct_file_from_events(
+            doc_events, verified_template, document_path=target_document
         )
+        return True, reconstructed, suspicious_events, time_info, ""
+    except ValueError as e:
+        # If verification fails but we have events, still try to reconstruct
+        print(f"Warning: Verification failed for {jsonl_path}: {e}", file=sys.stderr)
+        try:
+            if not doc_events:
+                return False, "", [], time_info, ""
-    else:
-        new_fragment = event["newFragment"].replace("\n", "\n    ")
+            # Compute diff against template and still detect suspicious events
+            diff_text = template_diff(template_data, doc_events)
+            suspicious_events = detect_external_copypaste(doc_events)
+            # Reconstruct using the initial recorded state
+            initial_state = doc_events[0].get("newFragment", "")
+            reconstructed = reconstruct_file_from_events(
+                doc_events, initial_state, document_path=target_document
+            )
+            return False, reconstructed, suspicious_events, time_info, diff_text
+        except Exception as reconstruction_error:
+            print(
+                f"Error reconstructing {jsonl_path}: {type(reconstruction_error).__name__}: {reconstruction_error}",
+                file=sys.stderr,
+            )
+            return False, "", [], time_info, ""
+    except Exception as e:
         print(
-            f"  Event #{event['event_index']} ({reason}): "
-            f"{event['line_count']} lines, {event['char_count']} chars - "
-            f"newFragment:\n    ```\n    {new_fragment}\n    ```",
+            f"Error processing {jsonl_path}: {type(e).__name__}: {e}",
             file=sys.stderr,
         )
+        return False, "", [], time_info, ""
-def display_suspicious_events(
-    suspicious_events: list[dict[str, Any]], show_details: bool
-) -> None:
+def write_reconstructed_file(
+    output_path: Path,
+    content: str,
+    file_description: str = "Reconstructed code"
+) -> bool:
     """
-    Display all suspicious events or success message.
+    Write reconstructed code to a file.
     Parameters
     ----------
-    suspicious_events : list[dict[str, Any]]
-        List of suspicious events detected
-    show_details : bool
-        Whether to show detailed autocomplete events
+    output_path : Path
+        Path to write to
+    content : str
+        Content to write
+    file_description : str
+        Description for success message
+    Returns
+    -------
+    bool
+        True if successful, False otherwise
     """
-    if suspicious_events:
-        print("\nSuspicious events detected:", file=sys.stderr)
-        # Sort events by their index for chronological display
-        def get_sort_key(event: dict[str, Any]) -> int | float:
-            if "event_indices" in event and event["event_indices"]:
-                return event["event_indices"][0]
-            if "detailed_events" in event and event["detailed_events"]:
-                return event["detailed_events"][0].get("event_index", float("inf"))
-            event_idx = event.get("event_index", -1)
-            return event_idx if event_idx >= 0 else float("inf")
-        sorted_events = sorted(suspicious_events, key=get_sort_key)
-        for event in sorted_events:
-            display_suspicious_event(event, show_details)
-    else:
-        print("Success! No suspicious events detected.", file=sys.stderr)
+    try:
+        output_path.parent.mkdir(parents=True, exist_ok=True)
+        output_path.write_text(content)
+        print(f"{file_description} written to: {output_path}", file=sys.stderr)
+        return True
+    except Exception as e:
+        print(f"Error writing output file: {e}", file=sys.stderr)
+        return False
-def write_json_output(
-    output_path: Path,
-    document: str,
-    time_info: dict[str, Any] | None,
-    suspicious_events: list[dict[str, Any]],
-    reconstructed_code: str,
-    verified: bool,
-) -> None:
+def handle_playback_mode(
+    jsonl_file: Path,
+    template_file: Path,
+    template_data: str,
+    document_override: str | None,
+    speed: float,
+) -> int:
     """
-    Write verification results to JSON file.
+    Handle playback mode for a single file.
     Parameters
     ----------
-    output_path : Path
-        Path to output JSON file
-    document : str
-        Document that was processed
-    time_info : dict[str, Any] | None
-        Time information from verification
-    suspicious_events : list[dict[str, Any]]
-        List of suspicious events detected
-    reconstructed_code : str
-        The reconstructed file content
-    verified : bool
-        Whether the file passed verification
+    jsonl_file : Path
+        Path to the recording file
+    template_file : Path
+        Path to the template file
+    template_data : str
+        Template file content
+    document_override : str | None
+        Document override
+    speed : float
+        Playback speed
-    Raises
-    ------
-    Exception
-        If file writing fails
+    Returns
+    -------
+    int
+        Exit code (0 for success, 1 for error)
     """
-    results = {
-        "document": document,
-        "verified": verified,
-        "time_info": time_info,
-        "suspicious_events": suspicious_events,
-        "reconstructed_code": reconstructed_code,
-    }
+    try:
+        json_data = load_jsonl(jsonl_file)
+        recorded_docs = get_recorded_documents(json_data)
+        target_document = resolve_document(recorded_docs, template_file, document_override)
-    output_path.parent.mkdir(parents=True, exist_ok=True)
-    with open(output_path, "w") as f:
-        json.dump(results, f, indent=2)
-    print(f"Results written to {output_path}", file=sys.stderr)
+        if target_document:
+            playback_recording(json_data, target_document, template_data, speed)
+            return 0
+        else:
+            print("Error: No documents found in recording", file=sys.stderr)
+            return 1
+    except Exception as e:
+        print(f"Error loading file for playback: {e}", file=sys.stderr)
+        return 1
-def playback_recording(
-    json_data: tuple[dict[str, Any], ...],
-    document: str,
-    template: str,
-    speed: float = 1.0,
-) -> None:
+def process_batch(
+    jsonl_files: list[Path],
+    template_base: Path | None,
+    template_data: str,
+    args: argparse.Namespace,
+) -> tuple[list[dict[str, Any]], bool]:
     """
-    Play back a recording, showing the code evolving in real-time.
+    Process multiple recording files in batch mode.
     Parameters
     ----------
-    json_data : tuple[dict[str, Any], ...]
-        The recording events
-    document : str
-        The document to play back
-    template : str
-        The initial template content
-    speed : float
-        Playback speed multiplier (1.0 = real-time, 2.0 = 2x speed, 0.5 = half speed)
+    jsonl_files : list[Path]
+        List of JSONL files to process
+    template_base : Path
+        Path to template file or directory
+    template_data : str
+        Template file content
+    args : argparse.Namespace
+        Command-line arguments
+    Returns
+    -------
+    tuple
+        (results, all_verified)
     """
-    # Filter events for the target document
-    doc_events = [e for e in json_data if e.get("document") == document]
-    if not doc_events:
-        print(f"No events found for document: {document}", file=sys.stderr)
-        return
-    # Start with template
-    current_content = template
-    last_timestamp = None
-    def clear_screen():
-        """Clear the terminal screen."""
-        os.system('cls' if os.name == 'nt' else 'clear')
-    def parse_timestamp(ts_str: str) -> datetime:
-        """Parse ISO timestamp string."""
-        return datetime.fromisoformat(ts_str.replace("Z", "+00:00"))
-    # Show initial template
-    clear_screen()
-    print(f"=" * 80)
-    print(f"PLAYBACK: {document} (Speed: {speed}x)")
-    print(f"Event 0 / {len(doc_events)} - Initial Template")
-    print(f"=" * 80)
-    print(current_content)
-    print(f"\n{'=' * 80}")
-    print("Press Ctrl+C to stop playback")
-    time.sleep(2.0 / speed)
+    results = []
+    all_verified = True
+    output_dir = args.output_dir or (
+        args.output_file if args.output_file and args.output_file.is_dir() else None
+    )
-    try:
-        for idx, event in enumerate(doc_events, 1):
-            old_frag = event.get("oldFragment", "")
-            new_frag = event.get("newFragment", "")
-            offset = event.get("offset", 0)
-            timestamp = event.get("timestamp")
-            # Calculate delay based on timestamp difference
-            if last_timestamp and timestamp:
-                try:
-                    ts1 = parse_timestamp(last_timestamp)
-                    ts2 = parse_timestamp(timestamp)
-                    delay = (ts2 - ts1).total_seconds() / speed
-                    # Cap delay at 5 seconds for very long pauses
-                    delay = min(delay, 5.0)
-                    if delay > 0:
-                        time.sleep(delay)
-                except (ValueError, KeyError):
-                    time.sleep(0.1 / speed)
+    for i, jsonl_file in enumerate(jsonl_files, 1):
+        print_batch_header(i, len(jsonl_files), jsonl_file.name)
+        # Determine target document for this file
+        try:
+            file_data = load_jsonl(jsonl_file)
+            recorded_docs = get_recorded_documents(file_data)
+            target_document = resolve_document(recorded_docs, template_base, args.document)
+        except (FileNotFoundError, ValueError, IOError) as e:
+            print(f"Error determining document: {e}", file=sys.stderr)
+            all_verified = False
+            continue
+        # If using template directory, find the matching template for this document
+        if args.template_dir and target_document:
+            matching_template_path = find_matching_template(args.template_dir, target_document)
+            if matching_template_path:
+                file_template_data = matching_template_path.read_text()
+                print(f"Using template: {matching_template_path.name}", file=sys.stderr)
             else:
-                time.sleep(0.1 / speed)
+                file_template_data = ""
+                print(
+                    f"Warning: No matching template found for {target_document}. "
+                    "Reconstruction will proceed without template verification.",
+                    file=sys.stderr
+                )
+        else:
+            file_template_data = template_data
-            last_timestamp = timestamp
+        # Process the file
+        verified, reconstructed, suspicious_events, time_info, diff_text = process_single_file(
+            jsonl_file, file_template_data, target_document, args.time_limit
+        )
-            # Apply the edit
-            if new_frag != old_frag:
-                current_content = current_content[:offset] + new_frag + current_content[offset + len(old_frag):]
+        if not verified:
+            all_verified = False
-            # Display current state
-            clear_screen()
-            print(f"=" * 80)
-            print(f"PLAYBACK: {document} (Speed: {speed}x)")
-            print(f"Event {idx} / {len(doc_events)} - {timestamp or 'unknown time'}")
+        # Display results
+        display_time_info(time_info)
+        display_suspicious_events(suspicious_events, args.show_autocomplete_details)
+        display_template_diff(diff_text)
+        # Store results
+        results.append({
+            "jsonl_file": jsonl_file,
+            "target_document": target_document,
+            "verified": verified,
+            "reconstructed": reconstructed,
+            "suspicious_events": suspicious_events,
+            "time_info": time_info,
+            "template_diff": diff_text,
+        })
+        # Write output file if requested
+        if reconstructed and output_dir:
+            output_name = jsonl_file.stem.replace(".recording.jsonl", "") + ".py"
+            output_path = output_dir / output_name
+            write_reconstructed_file(output_path, reconstructed, "Written to")
+    return results, all_verified
+def process_single(
+    jsonl_file: Path,
+    template_base: Path | None,
+    template_data: str,
+    args: argparse.Namespace,
+) -> tuple[list[dict[str, Any]], bool]:
+    """
+    Process a single recording file.
-            # Show what changed
-            if new_frag != old_frag:
-                change_type = "INSERT" if not old_frag else ("DELETE" if not new_frag else "REPLACE")
-                print(f"Action: {change_type} at offset {offset} ({len(new_frag)} chars)")
+    Parameters
+    ----------
+    jsonl_file : Path
+        Path to JSONL file
+    template_base : Path
+        Path to template file or directory
+    template_data : str
+        Template file content
+    args : argparse.Namespace
+        Command-line arguments
-            print(f"=" * 80)
-            print(current_content)
-            print(f"\n{'=' * 80}")
-            print(f"Progress: [{('#' * (idx * 40 // len(doc_events))).ljust(40)}] {idx}/{len(doc_events)}")
-            print("Press Ctrl+C to stop playback")
+    Returns
+    -------
+    tuple
+        (results, verified)
+    """
+    try:
+        file_data = load_jsonl(jsonl_file)
+        recorded_docs = get_recorded_documents(file_data)
+        target_document = resolve_document(recorded_docs, template_base, args.document)
+    except (FileNotFoundError, ValueError, IOError) as e:
+        print(f"Error determining document: {e}", file=sys.stderr)
+        return [], False
+    # If using template directory, find the matching template for this document
+    if args.template_dir and target_document:
+        matching_template_path = find_matching_template(args.template_dir, target_document)
+        if matching_template_path:
+            file_template_data = matching_template_path.read_text()
+            print(f"Using template: {matching_template_path.name}", file=sys.stderr)
+        else:
+            file_template_data = ""
+            print(
+                f"Warning: No matching template found for {target_document}. "
+                "Reconstruction will proceed without template verification.",
+                file=sys.stderr
+            )
+    else:
+        file_template_data = template_data
-    except KeyboardInterrupt:
-        print("\n\nPlayback stopped by user.", file=sys.stderr)
-        return
+    print(f"Processing: {target_document or template_base}", file=sys.stderr)
-    # Final summary
-    print("\n\nPlayback complete!", file=sys.stderr)
-    print(f"Total events: {len(doc_events)}", file=sys.stderr)
+    verified, reconstructed, suspicious_events, time_info, diff_text = process_single_file(
+        jsonl_file, file_template_data, target_document, args.time_limit
+    )
+    # Display results
+    display_time_info(time_info)
+    display_suspicious_events(suspicious_events, args.show_autocomplete_details)
+    display_template_diff(diff_text)
-def create_parser() -> argparse.ArgumentParser:
-    """
-    Create and configure the argument parser.
+    # Write output file if requested
+    if reconstructed and args.output_file:
+        if not write_reconstructed_file(args.output_file, reconstructed):
+            return [], False
-    Returns
-    -------
-    argparse.ArgumentParser
-        Configured argument parser
-    """
-    parser = argparse.ArgumentParser(
-        description="Process and verify code recorder JSONL files"
-    )
-    parser.add_argument(
-        "jsonl_file",
-        type=Path,
-        help="Path to the compressed JSONL file (*.recording.jsonl.gz)",
-    )
-    parser.add_argument(
-        "template_file",
-        type=Path,
-        help="Path to the initial template file that was recorded",
-    )
-    parser.add_argument(
-        "-t",
-        "--time-limit",
-        type=int,
-        default=None,
-        help="Maximum allowed time in minutes between first and last edit. "
-        "If exceeded, recording is flagged.",
-    )
-    parser.add_argument(
-        "-d",
-        "--document",
-        type=str,
-        default=None,
-        help="Document path or filename to process from the recording. "
-        "Defaults to the document whose extension matches the template file.",
-    )
-    parser.add_argument(
-        "-o",
-        "--output-json",
-        type=Path,
-        default=None,
-        help="Path to output JSON file with verification results "
-        "(time info and suspicious events).",
-    )
-    parser.add_argument(
-        "-s",
-        "--show-autocomplete-details",
-        action="store_true",
-        help="Show individual auto-complete events in addition to "
-        "aggregate statistics",
-    )
-    parser.add_argument(
-        "-q",
-        "--quiet",
-        action="store_true",
-        help="Suppress output of reconstructed code to stdout",
-    )
-    parser.add_argument(
-        "-p",
-        "--playback",
-        action="store_true",
-        help="Play back the recording in real-time, showing code evolution",
-    )
-    parser.add_argument(
-        "--playback-speed",
-        type=float,
-        default=1.0,
-        help="Playback speed multiplier (1.0 = real-time, 2.0 = 2x speed, 0.5 = half speed)",
-    )
-    return parser
+    results = [{
+        "jsonl_file": jsonl_file,
+        "target_document": target_document,
+        "verified": verified,
+        "reconstructed": reconstructed,
+        "suspicious_events": suspicious_events,
+        "time_info": time_info,
+        "template_diff": diff_text,
+    }]
+    return results, verified
 def main() -> int:
@@ -500,112 +509,123 @@ def main() -> int:
     parser = create_parser()
     args = parser.parse_args()
-    # Load JSONL file
+    # Parse files argument: last one may be template_file if it's not a JSONL file
+    files_list = args.files
+    template_file = None
+    jsonl_patterns = files_list
+    # If we have more than one file and the last one doesn't look like a JSONL file,
+    # treat it as the template file
+    if len(files_list) > 1 and not files_list[-1].endswith(('.jsonl', '.jsonl.gz')):
+        template_file = Path(files_list[-1])
+        jsonl_patterns = files_list[:-1]
+    # Validate that at least one of template_file or template_dir is provided
+    if not template_file and not args.template_dir:
+        print("Error: Either a template file or --template-dir must be provided", file=sys.stderr)
+        parser.print_help()
+        return 1
+    # Expand file patterns and validate
     try:
-        json_data = load_jsonl(args.jsonl_file)
+        jsonl_files = expand_file_patterns(jsonl_patterns)
     except FileNotFoundError as e:
         print(f"Error: {e}", file=sys.stderr)
         return 1
-    except (ValueError, IOError) as e:
-        print(f"Error loading JSONL file: {e}", file=sys.stderr)
-        return 1
-    # Resolve which document to process
-    recorded_docs = get_recorded_documents(json_data)
-    try:
-        target_document = resolve_document(
-            recorded_docs, args.template_file, args.document
-        )
-    except ValueError as e:
-        print(f"Error determining document: {e}", file=sys.stderr)
-        return 1
+    batch_mode = len(jsonl_files) > 1
+    if batch_mode:
+        print(f"Processing {len(jsonl_files)} recording files in batch mode", file=sys.stderr)
-    # Handle playback mode
-    if args.playback:
+    # Determine template source (use template_dir if provided, otherwise template_file)
+    template_path = args.template_dir if args.template_dir else template_file
+    # Handle playback mode (single file only)
+    if not batch_mode and args.playback:
         try:
-            template_content = args.template_file.read_text()
-        except FileNotFoundError:
-            print(f"Error: Template file not found: {args.template_file}", file=sys.stderr)
-            return 1
+            json_data = load_jsonl(jsonl_files[0])
+            recorded_docs = get_recorded_documents(json_data)
+            target_document = resolve_document(recorded_docs, template_path, args.document)
+            # Get template data for playback
+            template_data, _ = resolve_template_file(
+                template_file if not args.template_dir else None,
+                args.template_dir,
+                target_document
+            )
-        if target_document:
-            playback_recording(json_data, target_document, template_content, args.playback_speed)
-            return 0
-        else:
-            print("Error: No documents found in recording", file=sys.stderr)
+            if target_document:
+                playback_recording(json_data, target_document, template_data, args.playback_speed)
+                return 0
+            else:
+                print("Error: No documents found in recording", file=sys.stderr)
+                return 1
+        except Exception as e:
+            print(f"Error loading file for playback: {e}", file=sys.stderr)
             return 1
-    # Filter events for target document
-    doc_events = filter_events_by_document(json_data, target_document)
-    if target_document and not doc_events:
-        print(
-            f"Error: No events found for document '{target_document}'",
-            file=sys.stderr,
-        )
-        return 1
-    print(f"Processing: {target_document or args.template_file}", file=sys.stderr)
-    # Read template file
+    # Get template data
     try:
-        template_data = args.template_file.read_text()
-    except FileNotFoundError:
-        print(
-            f"Error: Template file not found: {args.template_file}", file=sys.stderr
-        )
-        return 1
-    except Exception as e:
-        print(f"Error reading template file: {e}", file=sys.stderr)
+        # If using a template directory, skip loading a global template here
+        # Let per-file matching handle it in process_batch/process_single
+        if args.template_dir:
+            template_data = ""
+        else:
+            template_data, _ = resolve_template_file(
+                template_file if not args.template_dir else None,
+                None,
+                None
+            )
+    except (FileNotFoundError, ValueError) as e:
+        print(f"Error: {e}", file=sys.stderr)
         return 1
-    # Check and display time information
-    time_info = check_time_limit(doc_events, args.time_limit)
-    display_time_info(time_info)
-    # Verify and process the recording
-    verified = False
-    reconstructed = ""
-    suspicious_events = []
-    try:
-        template_data, suspicious_events = verify(template_data, doc_events)
-        reconstructed = reconstruct_file_from_events(
-            doc_events, template_data, document_path=target_document
+    # Process files
+    if batch_mode:
+        results, all_verified = process_batch(
+            jsonl_files, template_path, template_data, args
+        )
+    else:
+        results, all_verified = process_single(
+            jsonl_files[0], template_path, template_data, args
         )
-        verified = True
-        if not args.quiet:
-            print(reconstructed)
-        # Display suspicious events
-        display_suspicious_events(suspicious_events, args.show_autocomplete_details)
-    except ValueError as e:
-        print("File failed verification from template!", file=sys.stderr)
-        print(str(e), file=sys.stderr)
-        try:
-            print(template_diff(template_data, doc_events), file=sys.stderr)
-        except Exception:
-            pass
-        verified = False
-    except Exception as e:
-        print(f"Error processing file: {type(e).__name__}: {e}", file=sys.stderr)
-        verified = False
+    if not results:
+        return 1
-    # Write JSON output to file if requested
-    if args.output_json:
-        try:
-            write_json_output(
-                args.output_json,
-                target_document or str(args.template_file),
-                time_info,
-                suspicious_events,
-                reconstructed,
-                verified,
-            )
-        except Exception as e:
-            print(f"Error writing JSON output: {e}", file=sys.stderr)
-            return 1
+    # Output summary and combined report for batch mode
+    if batch_mode:
+        failed_files = [r["jsonl_file"].name for r in results if not r["verified"]]
+        verified_count = len(results) - len(failed_files)
+        print_batch_summary(len(results), verified_count, failed_files)
+        # Display combined time report
+        time_infos = [r["time_info"] for r in results]
+        combined_time = None
+        if any(time_infos):
+            combined_time = combine_time_info(time_infos, args.time_limit)
+            display_time_info(combined_time, is_combined=True)
+        # Write JSON output
+        if args.output_json:
+            try:
+                write_batch_json_output(
+                    args.output_json, results, combined_time, all_verified, batch_mode=True
+                )
+            except Exception as e:
+                print(f"Error writing batch JSON output: {e}", file=sys.stderr)
+    else:
+        # Single file mode - write JSON output
+        if args.output_json:
+            try:
+                write_batch_json_output(
+                    args.output_json, results, results[0]["time_info"],
+                    results[0]["verified"], batch_mode=False
+                )
+            except Exception as e:
+                print(f"Error writing JSON output: {e}", file=sys.stderr)
-    return 0 if verified else 1
+    return 0 if all_verified else 1
 if __name__ == "__main__":

cr-proc 0.1.8__py3-none-any.whl → 0.1.10__py3-none-any.whl

cr-proc 0.1.8py3-none-any.whl → 0.1.10py3-none-any.whl