PyPI - cr-proc - Versions diffs - 0.1.11__py3-none-any.whl → 0.1.13__py3-none-any.whl - Mend

cr-proc 0.1.11py3-none-any.whl → 0.1.13py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (10) hide show

code_recorder_processor/api/document.py +33 -9
code_recorder_processor/api/output.py +11 -3
code_recorder_processor/api/verify.py +93 -34
code_recorder_processor/cli.py +173 -44
code_recorder_processor/display.py +33 -0
{cr_proc-0.1.11.dist-info → cr_proc-0.1.13.dist-info}/METADATA +21 -1
cr_proc-0.1.13.dist-info/RECORD +13 -0
{cr_proc-0.1.11.dist-info → cr_proc-0.1.13.dist-info}/WHEEL +1 -1
cr_proc-0.1.11.dist-info/RECORD +0 -13
{cr_proc-0.1.11.dist-info → cr_proc-0.1.13.dist-info}/entry_points.txt +0 -0

code_recorder_processor/api/document.py CHANGED Viewed

@@ -5,9 +5,38 @@ from pathlib import Path, PureWindowsPath, PurePosixPath
 from typing import Any
+def normalize_path_string(path_str: str) -> str:
+    """
+    Normalize a path string to use forward slashes (POSIX style).
+    Handles both Windows-style (backslash) and Unix-style (forward slash) paths
+    regardless of the current platform. Useful for cross-platform consistency
+    when files are created on Windows but processed on other systems.
+    Parameters
+    ----------
+    path_str : str
+        Path string (may use Windows or Unix separators)
+    Returns
+    -------
+    str
+        Normalized path string using forward slashes
+    """
+    # Try to detect if this is a Windows path (contains backslashes)
+    if "\\" in path_str:
+        # Windows-style path
+        path_obj = PureWindowsPath(path_str)
+    else:
+        # Unix-style path (or just a filename)
+        path_obj = PurePosixPath(path_str)
+    return path_obj.as_posix()
 def _normalize_document_path(doc_path: str) -> tuple[str, str]:
     """
-    Normalize a document path to extract filename and stem.
+    Extract filename and stem from a document path.
     Handles both Windows-style (backslash) and Unix-style (forward slash) paths
     regardless of the current platform.
@@ -22,14 +51,9 @@ def _normalize_document_path(doc_path: str) -> tuple[str, str]:
     tuple[str, str]
         (filename, stem) extracted from the path
     """
-    # Try to detect if this is a Windows path (contains backslashes)
-    if "\\" in doc_path:
-        # Windows-style path
-        path_obj = PureWindowsPath(doc_path)
-    else:
-        # Unix-style path (or just a filename)
-        path_obj = PurePosixPath(doc_path)
+    # Normalize to forward slashes first, then parse
+    normalized = normalize_path_string(doc_path)
+    path_obj = PurePosixPath(normalized)
     return path_obj.name, path_obj.stem

code_recorder_processor/api/output.py CHANGED Viewed

@@ -4,6 +4,8 @@ import sys
 from pathlib import Path
 from typing import Any
+from .document import normalize_path_string
 def write_batch_json_output(
     output_path: Path,
@@ -36,15 +38,21 @@ def write_batch_json_output(
     # Convert results to JSON-serializable format
     files_data = []
     for r in results:
-        files_data.append({
-            "jsonl_file": str(r["jsonl_file"]),
+        file_result = {
+            "jsonl_file": normalize_path_string(str(r["jsonl_file"])),
             "document": r["target_document"],
             "verified": r["verified"],
             "time_info": r["time_info"],
             "suspicious_events": r["suspicious_events"],
             "template_diff": r.get("template_diff", ""),
             "reconstructed_code": r["reconstructed"],
-        })
+        }
+        # Add submitted_comparison if present
+        if r.get("submitted_comparison") is not None:
+            file_result["submitted_comparison"] = r["submitted_comparison"]
+        files_data.append(file_result)
     # Use consistent format for both single and batch modes
     output_data = {

code_recorder_processor/api/verify.py CHANGED Viewed

@@ -1,6 +1,7 @@
 from typing import Any
 from datetime import datetime
 import difflib
+from .document import normalize_path_string
 # ============================================================================
 # Constants for detection thresholds
@@ -837,15 +838,19 @@ def verify(template: str, jsonData: tuple[dict[str, Any], ...]) -> tuple[str, li
 def combine_time_info(
-    time_infos: list[dict[str, Any] | None], time_limit_minutes: int | None
+    all_events: list[tuple[dict[str, Any], ...]], time_limit_minutes: int | None
 ) -> dict[str, Any] | None:
     """
-    Combine time information from multiple recording files.
+    Combine time information from multiple recording files, avoiding double-counting overlapping time.
+    Merges all events from multiple recordings, then calculates the actual time spent editing
+    using the same logic as check_time_limit (gap analysis with focus awareness). This ensures
+    overlapping editing sessions are not double-counted.
     Parameters
     ----------
-    time_infos : list[dict[str, Any] | None]
-        List of time information dictionaries from multiple files
+    all_events : list[tuple[dict[str, Any], ...]]
+        List of event tuples from multiple recording files
     time_limit_minutes : int | None
         Time limit to check against
@@ -854,40 +859,94 @@ def combine_time_info(
     dict[str, Any] | None
         Combined time information, or None if no valid data
     """
-    valid_infos = [info for info in time_infos if info is not None]
-    if not valid_infos:
+    # Filter out empty event sets
+    valid_event_sets = [events for events in all_events if events]
+    if not valid_event_sets:
         return None
-    # Sum elapsed times across all sessions
-    total_elapsed = sum(info["minutes_elapsed"] for info in valid_infos)
+    # Merge all events from all recordings into a single tuple
+    merged_events = tuple(
+        event
+        for event_set in valid_event_sets
+        for event in event_set
+    )
-    # Find overall first and last timestamps
-    all_timestamps = []
-    for info in valid_infos:
-        all_timestamps.append(
-            datetime.fromisoformat(info["first_timestamp"].replace("Z", "+00:00"))
-        )
-        all_timestamps.append(
-            datetime.fromisoformat(info["last_timestamp"].replace("Z", "+00:00"))
-        )
+    # Use check_time_limit on the merged events to calculate time properly
+    # This handles overlapping periods automatically since we're now analyzing
+    # all events together chronologically
+    combined_result = check_time_limit(merged_events, time_limit_minutes)
-    first_ts = min(all_timestamps)
-    last_ts = max(all_timestamps)
-    overall_span = (last_ts - first_ts).total_seconds() / 60
+    if combined_result is None:
+        return None
-    result = {
-        "time_limit_minutes": time_limit_minutes,
-        "minutes_elapsed": round(total_elapsed, 2),
-        "first_timestamp": first_ts.isoformat().replace("+00:00", "Z"),
-        "last_timestamp": last_ts.isoformat().replace("+00:00", "Z"),
-        "file_count": len(valid_infos),
-        "overall_span_minutes": round(overall_span, 2),
-    }
+    # Add file_count to the result
+    combined_result["file_count"] = len(valid_event_sets)
-    # For time limit check in combined mode, use the sum of elapsed times
-    if time_limit_minutes is not None:
-        result["exceeds_limit"] = total_elapsed > time_limit_minutes
-    else:
-        result["exceeds_limit"] = False
+    return combined_result
-    return result
+def compare_submitted_file(reconstructed_code: str, submitted_file_path) -> dict[str, Any]:
+    """
+    Compare reconstructed code from recording with a submitted final file.
+    Parameters
+    ----------
+    reconstructed_code : str
+        The code reconstructed from the recording
+    submitted_file_path : Path
+        Path to the submitted file
+    Returns
+    -------
+    dict[str, Any]
+        Dictionary containing:
+        - matches: bool indicating if the files match
+        - submitted_file: path to the submitted file
+        - diff: unified diff string if files don't match
+        - whitespace_only: bool indicating if only whitespace differs
+    """
+    try:
+        submitted_content = submitted_file_path.read_text()
+    except Exception as e:
+        return {
+            "matches": False,
+            "submitted_file": normalize_path_string(str(submitted_file_path)),
+            "error": f"Failed to read submitted file: {e}",
+            "diff": "",
+            "whitespace_only": False,
+        }
+    # Normalize newlines for comparison
+    reconstructed_normalized = _normalize_newlines(reconstructed_code)
+    submitted_normalized = _normalize_newlines(submitted_content)
+    # Check exact match
+    matches = reconstructed_normalized == submitted_normalized
+    # Check if only whitespace differs
+    whitespace_only = False
+    if not matches:
+        whitespace_only = is_only_whitespace_differences(
+            submitted_normalized, reconstructed_normalized
+        )
+    # Generate diff if they don't match
+    diff_text = ""
+    if not matches:
+        reconstructed_lines = reconstructed_normalized.splitlines(keepends=True)
+        submitted_lines = submitted_normalized.splitlines(keepends=True)
+        diff = difflib.unified_diff(
+            reconstructed_lines,
+            submitted_lines,
+            fromfile="reconstructed",
+            tofile="submitted",
+            lineterm="",
+        )
+        diff_text = "".join(diff)
+    return {
+        "matches": matches,
+        "submitted_file": normalize_path_string(str(submitted_file_path)),
+        "diff": diff_text,
+        "whitespace_only": whitespace_only,
+    }

code_recorder_processor/cli.py CHANGED Viewed

@@ -18,11 +18,13 @@ from .api.output import write_batch_json_output
 from .api.verify import (
     check_time_limit,
     combine_time_info,
+    compare_submitted_file,
     detect_external_copypaste,
     template_diff,
     verify,
 )
 from .display import (
+    display_submitted_file_comparison,
     display_suspicious_events,
     display_template_diff,
     display_time_info,
@@ -102,6 +104,21 @@ def create_parser() -> argparse.ArgumentParser:
         help="Directory to write reconstructed code files in batch mode (one file per recording). "
         "Files are named based on input recording filenames.",
     )
+    parser.add_argument(
+        "--submitted-file",
+        type=Path,
+        default=None,
+        help="Path to the submitted final file to verify against the reconstructed output. "
+        "If provided, the reconstructed code will be compared to this file.",
+    )
+    parser.add_argument(
+        "--submitted-dir",
+        type=Path,
+        default=None,
+        help="Directory containing submitted files to compare against. "
+        "For each recording, the corresponding submitted file will be found by matching the filename. "
+        "For example, 'homework0-ISC.recording.jsonl.gz' will match 'homework0-ISC.py' in the directory.",
+    )
     parser.add_argument(
         "-s",
         "--show-autocomplete-details",
@@ -169,36 +186,81 @@ def expand_file_patterns(patterns: list[str]) -> list[Path]:
     return existing_files
+def find_submitted_file(
+    jsonl_file: Path,
+    submitted_dir: Path,
+    target_document: str | None,
+) -> Path | None:
+    """
+    Find the submitted file corresponding to a recording file.
+    Matches by replacing '.recording.jsonl.gz' with the extension of the
+    target document (or '.py' if not specified).
+    Parameters
+    ----------
+    jsonl_file : Path
+        Path to the JSONL recording file
+    submitted_dir : Path
+        Directory containing submitted files
+    target_document : str | None
+        Target document path (to extract extension)
+    Returns
+    -------
+    Path | None
+        Path to the submitted file if found, None otherwise
+    """
+    # Determine the file extension from target_document or default to .py
+    extension = ".py"
+    if target_document:
+        extension = Path(target_document).suffix or ".py"
+    # Remove '.recording.jsonl.gz' and add the appropriate extension
+    base_name = jsonl_file.name.replace(".recording.jsonl.gz", "")
+    submitted_filename = base_name + extension
+    submitted_file = submitted_dir / submitted_filename
+    if submitted_file.exists():
+        return submitted_file
+    return None
 def process_single_file(
     jsonl_path: Path,
+    json_data: tuple[dict[str, Any], ...],
     template_data: str,
     target_document: str | None,
     time_limit: int | None,
-) -> tuple[bool, str, list[dict[str, Any]], dict[str, Any] | None, str]:
+    submitted_file: Path | None = None,
+    submitted_dir: Path | None = None,
+) -> tuple[bool, str, list[dict[str, Any]], dict[str, Any] | None, str, tuple[dict[str, Any], ...], dict[str, Any] | None]:
     """
     Process a single JSONL recording file.
     Parameters
     ----------
     jsonl_path : Path
-        Path to the JSONL file
+        Path to the JSONL file (used for error reporting and file matching)
+    json_data : tuple[dict[str, Any], ...]
+        Pre-loaded JSON events from the recording file
     template_data : str
         Template file content
     target_document : str | None
         Document to process
     time_limit : int | None
         Time limit in minutes
+    submitted_file : Path | None
+        Path to the submitted file to compare against
+    submitted_dir : Path | None
+        Directory containing submitted files to compare against
     Returns
     -------
     tuple
-        (verified, reconstructed_code, suspicious_events, time_info, template_diff_text)
+        (verified, reconstructed_code, suspicious_events, time_info, template_diff_text, doc_events, submitted_comparison)
     """
-    try:
-        json_data = load_jsonl(jsonl_path)
-    except (FileNotFoundError, ValueError, IOError) as e:
-        print(f"Error loading {jsonl_path}: {e}", file=sys.stderr)
-        return False, "", [], None, ""
     # Filter events for target document
     doc_events = filter_events_by_document(json_data, target_document)
@@ -207,7 +269,7 @@ def process_single_file(
             f"Warning: No events found for document '{target_document}' in {jsonl_path}",
             file=sys.stderr,
         )
-        return False, "", [], None, ""
+        return False, "", [], None, "", (), None
     # Check time information
     time_info = check_time_limit(doc_events, time_limit)
@@ -218,13 +280,29 @@ def process_single_file(
         reconstructed = reconstruct_file_from_events(
             doc_events, verified_template, document_path=target_document
         )
-        return True, reconstructed, suspicious_events, time_info, ""
+        # Compare with submitted file if provided
+        submitted_comparison = None
+        actual_submitted_file = submitted_file
+        # If submitted_dir is provided, find the matching file
+        if submitted_dir and not submitted_file:
+            actual_submitted_file = find_submitted_file(jsonl_path, submitted_dir, target_document)
+            if actual_submitted_file:
+                print(f"Found submitted file: {actual_submitted_file.name}", file=sys.stderr)
+        if actual_submitted_file and actual_submitted_file.exists():
+            submitted_comparison = compare_submitted_file(reconstructed, actual_submitted_file)
+        elif actual_submitted_file:
+            print(f"Warning: Submitted file not found: {actual_submitted_file}", file=sys.stderr)
+        return True, reconstructed, suspicious_events, time_info, "", doc_events, submitted_comparison
     except ValueError as e:
         # If verification fails but we have events, still try to reconstruct
         print(f"Warning: Verification failed for {jsonl_path}: {e}", file=sys.stderr)
         try:
             if not doc_events:
-                return False, "", [], time_info, ""
+                return False, "", [], time_info, "", (), None
             # Compute diff against template and still detect suspicious events
             diff_text = template_diff(template_data, doc_events)
@@ -235,19 +313,35 @@ def process_single_file(
             reconstructed = reconstruct_file_from_events(
                 doc_events, initial_state, document_path=target_document
             )
-            return False, reconstructed, suspicious_events, time_info, diff_text
+            # Compare with submitted file if provided
+            submitted_comparison = None
+            actual_submitted_file = submitted_file
+            # If submitted_dir is provided, find the matching file
+            if submitted_dir and not submitted_file:
+                actual_submitted_file = find_submitted_file(jsonl_path, submitted_dir, target_document)
+                if actual_submitted_file:
+                    print(f"Found submitted file: {actual_submitted_file.name}", file=sys.stderr)
+            if actual_submitted_file and actual_submitted_file.exists():
+                submitted_comparison = compare_submitted_file(reconstructed, actual_submitted_file)
+            elif actual_submitted_file:
+                print(f"Warning: Submitted file not found: {actual_submitted_file}", file=sys.stderr)
+            return False, reconstructed, suspicious_events, time_info, diff_text, doc_events, submitted_comparison
         except Exception as reconstruction_error:
             print(
                 f"Error reconstructing {jsonl_path}: {type(reconstruction_error).__name__}: {reconstruction_error}",
                 file=sys.stderr,
             )
-            return False, "", [], time_info, ""
+            return False, "", [], time_info, "", (), None
     except Exception as e:
         print(
             f"Error processing {jsonl_path}: {type(e).__name__}: {e}",
             file=sys.stderr,
         )
-        return False, "", [], time_info, ""
+        return False, "", [], time_info, "", (), None
 def write_reconstructed_file(
@@ -274,7 +368,7 @@ def write_reconstructed_file(
     """
     try:
         output_path.parent.mkdir(parents=True, exist_ok=True)
-        output_path.write_text(content)
+        output_path.write_text(content + '\n')
         print(f"{file_description} written to: {output_path}", file=sys.stderr)
         return True
     except Exception as e:
@@ -284,7 +378,8 @@ def write_reconstructed_file(
 def handle_playback_mode(
     jsonl_file: Path,
-    template_file: Path,
+    json_data: tuple[dict[str, Any], ...],
+    template_base: Path | None,
     template_data: str,
     document_override: str | None,
     speed: float,
@@ -295,9 +390,11 @@ def handle_playback_mode(
     Parameters
     ----------
     jsonl_file : Path
-        Path to the recording file
-    template_file : Path
-        Path to the template file
+        Path to the recording file (for error reporting)
+    json_data : tuple[dict[str, Any], ...]
+        Pre-loaded JSON events from the recording file
+    template_base : Path | None
+        Path to the template file or directory
     template_data : str
         Template file content
     document_override : str | None
@@ -311,9 +408,8 @@ def handle_playback_mode(
         Exit code (0 for success, 1 for error)
     """
     try:
-        json_data = load_jsonl(jsonl_file)
         recorded_docs = get_recorded_documents(json_data)
-        target_document = resolve_document(recorded_docs, template_file, document_override)
+        target_document = resolve_document(recorded_docs, template_base, document_override)
         if target_document:
             playback_recording(json_data, target_document, template_data, speed)
@@ -322,12 +418,13 @@ def handle_playback_mode(
             print("Error: No documents found in recording", file=sys.stderr)
             return 1
     except Exception as e:
-        print(f"Error loading file for playback: {e}", file=sys.stderr)
+        print(f"Error in playback: {e}", file=sys.stderr)
         return 1
 def process_batch(
     jsonl_files: list[Path],
+    json_data_map: dict[Path, tuple[dict[str, Any], ...]],
     template_base: Path | None,
     template_data: str,
     args: argparse.Namespace,
@@ -339,6 +436,8 @@ def process_batch(
     ----------
     jsonl_files : list[Path]
         List of JSONL files to process
+    json_data_map : dict[Path, tuple[dict[str, Any], ...]]
+        Pre-loaded JSON data for each file
     template_base : Path
         Path to template file or directory
     template_data : str
@@ -360,9 +459,16 @@ def process_batch(
     for i, jsonl_file in enumerate(jsonl_files, 1):
         print_batch_header(i, len(jsonl_files), jsonl_file.name)
+        # Get pre-loaded data for this file
+        if jsonl_file not in json_data_map:
+            print(f"Error: No pre-loaded data for {jsonl_file}", file=sys.stderr)
+            all_verified = False
+            continue
+        file_data = json_data_map[jsonl_file]
         # Determine target document for this file
         try:
-            file_data = load_jsonl(jsonl_file)
             recorded_docs = get_recorded_documents(file_data)
             target_document = resolve_document(recorded_docs, template_base, args.document)
         except (FileNotFoundError, ValueError, IOError) as e:
@@ -386,9 +492,9 @@ def process_batch(
         else:
             file_template_data = template_data
-        # Process the file
-        verified, reconstructed, suspicious_events, time_info, diff_text = process_single_file(
-            jsonl_file, file_template_data, target_document, args.time_limit
+        # Process the file with pre-loaded data
+        verified, reconstructed, suspicious_events, time_info, diff_text, doc_events, submitted_comparison = process_single_file(
+            jsonl_file, file_data, file_template_data, target_document, args.time_limit, args.submitted_file, args.submitted_dir
         )
         if not verified:
@@ -398,6 +504,7 @@ def process_batch(
         display_time_info(time_info)
         display_suspicious_events(suspicious_events, args.show_autocomplete_details)
         display_template_diff(diff_text)
+        display_submitted_file_comparison(submitted_comparison)
         # Store results
         results.append({
@@ -408,6 +515,8 @@ def process_batch(
             "suspicious_events": suspicious_events,
             "time_info": time_info,
             "template_diff": diff_text,
+            "doc_events": doc_events,
+            "submitted_comparison": submitted_comparison,
         })
         # Write output file if requested
@@ -421,6 +530,7 @@ def process_batch(
 def process_single(
     jsonl_file: Path,
+    json_data: tuple[dict[str, Any], ...],
     template_base: Path | None,
     template_data: str,
     args: argparse.Namespace,
@@ -432,6 +542,8 @@ def process_single(
     ----------
     jsonl_file : Path
         Path to JSONL file
+    json_data : tuple[dict[str, Any], ...]
+        Pre-loaded JSON data for the file
     template_base : Path
         Path to template file or directory
     template_data : str
@@ -445,8 +557,7 @@ def process_single(
         (results, verified)
     """
     try:
-        file_data = load_jsonl(jsonl_file)
-        recorded_docs = get_recorded_documents(file_data)
+        recorded_docs = get_recorded_documents(json_data)
         target_document = resolve_document(recorded_docs, template_base, args.document)
     except (FileNotFoundError, ValueError, IOError) as e:
         print(f"Error determining document: {e}", file=sys.stderr)
@@ -470,14 +581,15 @@ def process_single(
     print(f"Processing: {target_document or template_base}", file=sys.stderr)
-    verified, reconstructed, suspicious_events, time_info, diff_text = process_single_file(
-        jsonl_file, file_template_data, target_document, args.time_limit
+    verified, reconstructed, suspicious_events, time_info, diff_text, doc_events, submitted_comparison = process_single_file(
+        jsonl_file, json_data, file_template_data, target_document, args.time_limit, args.submitted_file, args.submitted_dir
     )
     # Display results
     display_time_info(time_info)
     display_suspicious_events(suspicious_events, args.show_autocomplete_details)
     display_template_diff(diff_text)
+    display_submitted_file_comparison(submitted_comparison)
     # Write output file if requested
     if reconstructed and args.output_file:
@@ -492,6 +604,8 @@ def process_single(
         "suspicious_events": suspicious_events,
         "time_info": time_info,
         "template_diff": diff_text,
+        "doc_events": doc_events,
+        "submitted_comparison": submitted_comparison,
     }]
     return results, verified
@@ -526,6 +640,11 @@ def main() -> int:
         parser.print_help()
         return 1
+    # Validate that both --submitted-file and --submitted-dir are not provided simultaneously
+    if args.submitted_file and args.submitted_dir:
+        print("Error: Cannot specify both --submitted-file and --submitted-dir", file=sys.stderr)
+        return 1
     # Expand file patterns and validate
     try:
         jsonl_files = expand_file_patterns(jsonl_patterns)
@@ -540,10 +659,23 @@ def main() -> int:
     # Determine template source (use template_dir if provided, otherwise template_file)
     template_path = args.template_dir if args.template_dir else template_file
+    # Load all files once - fail fast if any fail
+    json_data_map: dict[Path, tuple[dict[str, Any], ...]] = {}
+    for jsonl_file in jsonl_files:
+        try:
+            json_data_map[jsonl_file] = load_jsonl(jsonl_file)
+        except ValueError as e:
+            print(f"Error parsing {jsonl_file}: {e}", file=sys.stderr)
+            print("Tampering is likey - aborting processing.", file=sys.stderr)
+            return 1
+        except (FileNotFoundError, ValueError, IOError) as e:
+            print(f"Error loading {jsonl_file}: {e}", file=sys.stderr)
+            return 1
     # Handle playback mode (single file only)
     if not batch_mode and args.playback:
         try:
-            json_data = load_jsonl(jsonl_files[0])
+            json_data = json_data_map[jsonl_files[0]]
             recorded_docs = get_recorded_documents(json_data)
             target_document = resolve_document(recorded_docs, template_path, args.document)
@@ -554,14 +686,11 @@ def main() -> int:
                 target_document
             )
-            if target_document:
-                playback_recording(json_data, target_document, template_data, args.playback_speed)
-                return 0
-            else:
-                print("Error: No documents found in recording", file=sys.stderr)
-                return 1
+            return handle_playback_mode(
+                jsonl_files[0], json_data, template_path, template_data, args.document, args.playback_speed
+            )
         except Exception as e:
-            print(f"Error loading file for playback: {e}", file=sys.stderr)
+            print(f"Error in playback: {e}", file=sys.stderr)
             return 1
     # Get template data
@@ -580,14 +709,14 @@ def main() -> int:
         print(f"Error: {e}", file=sys.stderr)
         return 1
-    # Process files
+    # Process files with pre-loaded data
     if batch_mode:
         results, all_verified = process_batch(
-            jsonl_files, template_path, template_data, args
+            jsonl_files, json_data_map, template_path, template_data, args
         )
     else:
         results, all_verified = process_single(
-            jsonl_files[0], template_path, template_data, args
+            jsonl_files[0], json_data_map[jsonl_files[0]], template_path, template_data, args
         )
     if not results:
@@ -600,10 +729,10 @@ def main() -> int:
         print_batch_summary(len(results), verified_count, failed_files)
         # Display combined time report
-        time_infos = [r["time_info"] for r in results]
+        all_events = [r["doc_events"] for r in results]
         combined_time = None
-        if any(time_infos):
-            combined_time = combine_time_info(time_infos, args.time_limit)
+        if any(all_events):
+            combined_time = combine_time_info(all_events, args.time_limit)
             display_time_info(combined_time, is_combined=True)
         # Write JSON output

code_recorder_processor/display.py CHANGED Viewed

@@ -176,6 +176,39 @@ def display_template_diff(diff_text: str) -> None:
     print(diff_text, file=sys.stderr)
+def display_submitted_file_comparison(comparison: dict[str, Any] | None) -> None:
+    """
+    Display comparison results between reconstructed code and submitted file.
+    Parameters
+    ----------
+    comparison : dict[str, Any] | None
+        Comparison results from compare_submitted_file, or None if no comparison
+    """
+    if not comparison:
+        return
+    print("\nSubmitted file comparison:", file=sys.stderr)
+    print(f"  Submitted file: {comparison['submitted_file']}", file=sys.stderr)
+    if "error" in comparison:
+        print(f"  Error: {comparison['error']}", file=sys.stderr)
+        return
+    if comparison["matches"]:
+        print("  ✓ Reconstructed code matches submitted file exactly", file=sys.stderr)
+    elif comparison.get("whitespace_only", False):
+        print("  ⚠ Reconstructed code differs only in whitespace from submitted file", file=sys.stderr)
+    else:
+        print("  ✗ Reconstructed code differs from submitted file", file=sys.stderr)
+        if comparison.get("diff"):
+            print("\n  Diff (reconstructed → submitted):", file=sys.stderr)
+            # Indent each line of the diff
+            for line in comparison["diff"].split("\n"):
+                if line:
+                    print(f"    {line}", file=sys.stderr)
 def print_separator() -> None:
     """Print a separator line."""
     print(f"{'='*80}", file=sys.stderr)

{cr_proc-0.1.11.dist-info → cr_proc-0.1.13.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: cr_proc
-Version: 0.1.11
+Version: 0.1.13
 Summary: A tool for processing BYU CS code recording files.
 Author: Ethan Dye
 Author-email: mrtops03@gmail.com
@@ -79,6 +79,14 @@ When processing multiple files:
   specified file instead of stdout. For single files only.
 - `--output-dir OUTPUT_DIR`: (Optional) Directory to write reconstructed code
   files in batch mode. Files are named based on input recording filenames.
+- `--submitted-file SUBMITTED_FILE`: (Optional) Path to the submitted final file
+  to verify against the reconstructed output. If provided, the reconstructed code
+  will be compared to this file and differences will be reported.
+- `--submitted-dir SUBMITTED_DIR`: (Optional) Directory containing submitted files
+  to verify against the reconstructed output. For each recording file, the
+  corresponding submitted file will be found by matching the filename
+  (e.g., `homework0-ISC.recording.jsonl.gz` will match `homework0-ISC.py`).
+  Cannot be used with `--submitted-file`.
 - `-s, --show-autocomplete-details`: (Optional) Show individual auto-complete
   events in addition to aggregate statistics.
 - `-p, --playback`: (Optional) Play back the recording in real-time, showing
@@ -112,6 +120,18 @@ Save JSON results:
 poetry run cr_proc student1.jsonl.gz student2.jsonl.gz template.py -o results/
 ```
+Verify against a single submitted file:
+```bash
+poetry run cr_proc homework0.recording.jsonl.gz homework0.py --submitted-file submitted_homework0.py
+```
+Verify against submitted files in a directory (batch mode):
+```bash
+poetry run cr_proc recordings/*.jsonl.gz template.py --submitted-dir submissions/
+```
 This will process each recording independently and flag any that exceed 30
 minutes.

cr_proc-0.1.13.dist-info/RECORD ADDED Viewed

@@ -0,0 +1,13 @@
+code_recorder_processor/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
+code_recorder_processor/api/build.py,sha256=XuF8Vx9mDdRqeaxCVgYAdn4NFJzkRt4Q839m15th0Fo,9908
+code_recorder_processor/api/document.py,sha256=eIsGBCPq234cPiPN_ktBOoVrt0G1hHqbgvhXwMXQdnU,11343
+code_recorder_processor/api/load.py,sha256=Br-USpFQJ6W8c5hjmCnunM3V0_MURKZp5Yyl1IJdahc,5514
+code_recorder_processor/api/output.py,sha256=HLa6DtN4i-wW0-vyE7SzqLeq35nhSoj2yHc9RjTPcBc,2441
+code_recorder_processor/api/verify.py,sha256=6D8Zs8NBziG1kNc6HgH59yQp4u1wa4zyCuNE0qLTlpk,36040
+code_recorder_processor/cli.py,sha256=giFUu9KiB40k5Z2CwZK9CDL7dQOicBLCGBx0Uzki39o,26329
+code_recorder_processor/display.py,sha256=He5loCMrm1S1186N2BgDy6bl0v__kiosJ_qDxpa4hbM,8657
+code_recorder_processor/playback.py,sha256=6-OJtQOHKgfutxUNBMunWl-VVSIB0zUDENSl0EsPCh4,4008
+cr_proc-0.1.13.dist-info/METADATA,sha256=-3ylZb1eSoxy2GY2egju_6WobzjoyLnGfEQikM7wMzA,9812
+cr_proc-0.1.13.dist-info/WHEEL,sha256=kJCRJT_g0adfAJzTx2GUMmS80rTJIVHRCfG0DQgLq3o,88
+cr_proc-0.1.13.dist-info/entry_points.txt,sha256=xb5dPAAWN1Z9NUHpvZgNakaslR1MVOERf_IfpG_M04M,77
+cr_proc-0.1.13.dist-info/RECORD,,

{cr_proc-0.1.11.dist-info → cr_proc-0.1.13.dist-info}/WHEEL RENAMED Viewed

@@ -1,4 +1,4 @@
 Wheel-Version: 1.0
-Generator: poetry-core 2.3.0
+Generator: poetry-core 2.3.1
 Root-Is-Purelib: true
 Tag: py3-none-any

cr_proc-0.1.11.dist-info/RECORD DELETED Viewed

@@ -1,13 +0,0 @@
-code_recorder_processor/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
-code_recorder_processor/api/build.py,sha256=XuF8Vx9mDdRqeaxCVgYAdn4NFJzkRt4Q839m15th0Fo,9908
-code_recorder_processor/api/document.py,sha256=DOQ0H1dQJtMs2P9E2qnKgg2iKQT9msgdE9oJXl36SnY,10622
-code_recorder_processor/api/load.py,sha256=Br-USpFQJ6W8c5hjmCnunM3V0_MURKZp5Yyl1IJdahc,5514
-code_recorder_processor/api/output.py,sha256=H2SC3pQ0C9V8YyN4yeA_KmvSoWXy_3T3TKWKhywIax4,2161
-code_recorder_processor/api/verify.py,sha256=9GpeoFQIiTzZd-DNSyN5OUM6YB5iMslO85oAjc0yoSU,34073
-code_recorder_processor/cli.py,sha256=ardcM3bLNhf6abOQ1Aj746x4hp8gerdklfDwszLlYKc,20504
-code_recorder_processor/display.py,sha256=IVTNFB3Vjzpc5ZHceAFQI2-o-N6bvjYmotLDaEy0KoU,7368
-code_recorder_processor/playback.py,sha256=6-OJtQOHKgfutxUNBMunWl-VVSIB0zUDENSl0EsPCh4,4008
-cr_proc-0.1.11.dist-info/METADATA,sha256=wZuAW9ghrjT2fCbiI9bJSy5TPLc4YD6OpYb0mTlyOL4,8926
-cr_proc-0.1.11.dist-info/WHEEL,sha256=3ny-bZhpXrU6vSQ1UPG34FoxZBp3lVcvK0LkgUz6VLk,88
-cr_proc-0.1.11.dist-info/entry_points.txt,sha256=xb5dPAAWN1Z9NUHpvZgNakaslR1MVOERf_IfpG_M04M,77
-cr_proc-0.1.11.dist-info/RECORD,,

{cr_proc-0.1.11.dist-info → cr_proc-0.1.13.dist-info}/entry_points.txt RENAMED Viewed

File without changes

cr-proc 0.1.11__py3-none-any.whl → 0.1.13__py3-none-any.whl

cr-proc 0.1.11py3-none-any.whl → 0.1.13py3-none-any.whl