PyPI - cr-proc - Versions diffs - 0.1.9__py3-none-any.whl → 0.1.11__py3-none-any.whl - Mend

cr-proc 0.1.9py3-none-any.whl → 0.1.11py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (6) hide show

code_recorder_processor/api/build.py CHANGED Viewed

@@ -169,6 +169,9 @@ def reconstruct_file_from_events(
     from .load import is_edit_event
     events = tuple(e for e in events if is_edit_event(e))
+    # Skip no-op events (oldFragment == newFragment, typically file-open markers)
+    events = tuple(e for e in events if not (e.get("oldFragment") == e.get("newFragment") and e.get("offset") == 0))
     # Read template content
     if normalize_newlines:
         template = _normalize_newlines(template)
@@ -197,6 +200,39 @@ def reconstruct_file_from_events(
         # No events for target_doc; return template unchanged
         return template
+    # Handle case where first event is a file-open/load event at offset 0
+    # (IDE captures the file content as seen when opened)
+    if evs and evs[0].get("offset") == 0:
+        first_old = evs[0].get("oldFragment", "")
+        first_new = evs[0].get("newFragment", "")
+        if first_old and not template.startswith(first_old):
+            # Check if this looks like a file-open event:
+            # - First event is at offset 0
+            # - oldFragment and newFragment contain significant content (file was loaded)
+            # - Template is much smaller (stub/placeholder)
+            is_likely_file_open = (
+                first_old == first_new and  # no-op replacement (just file load)
+                len(first_old) > 50 and      # substantial content
+                len(template) < len(first_old)  # template is smaller stub
+            )
+            if is_likely_file_open:
+                # Use first event's oldFragment as the template (actual file state when opened)
+                template = first_old
+            else:
+                # Template genuinely doesn't match
+                raise ValueError(
+                    f"Template content does not match recording's initial state.\n"
+                    f"First event expects to replace {len(first_old)} chars starting at offset 0,\n"
+                    f"but template only has {len(template)} chars and starts with:\n"
+                    f"{template[:min(100, len(template))]!r}\n\n"
+                    f"Expected to start with:\n"
+                    f"{first_old[:min(100, len(first_old))]!r}\n\n"
+                    f"Recording was likely made on a different version of the file.\n"
+                    f"Document path in recording: {target_doc}"
+                )
     if utf16_mode:
         # Work in UTF-16-LE byte space
         doc_bytes = template.encode("utf-16-le")

code_recorder_processor/api/document.py CHANGED Viewed

@@ -1,10 +1,38 @@
 """Document resolution and filtering utilities."""
 import difflib
 import sys
-from pathlib import Path
+from pathlib import Path, PureWindowsPath, PurePosixPath
 from typing import Any
+def _normalize_document_path(doc_path: str) -> tuple[str, str]:
+    """
+    Normalize a document path to extract filename and stem.
+    Handles both Windows-style (backslash) and Unix-style (forward slash) paths
+    regardless of the current platform.
+    Parameters
+    ----------
+    doc_path : str
+        Document path string (may use Windows or Unix separators)
+    Returns
+    -------
+    tuple[str, str]
+        (filename, stem) extracted from the path
+    """
+    # Try to detect if this is a Windows path (contains backslashes)
+    if "\\" in doc_path:
+        # Windows-style path
+        path_obj = PureWindowsPath(doc_path)
+    else:
+        # Unix-style path (or just a filename)
+        path_obj = PurePosixPath(doc_path)
+    return path_obj.name, path_obj.stem
 def find_matching_template(
     template_dir: Path, document_path: str
 ) -> Path | None:
@@ -31,8 +59,7 @@ def find_matching_template(
     if not template_dir.is_dir():
         return None
-    doc_name = Path(document_path).name
-    doc_stem = Path(document_path).stem
+    doc_name, doc_stem = _normalize_document_path(document_path)
     # First, try exact filename match
     exact_match = template_dir / doc_name
@@ -81,19 +108,25 @@ def get_normalized_document_key(doc_path: str) -> tuple[str, str]:
     Get a normalized key for a document based on filename and extension.
     This helps identify documents that are the same but with different paths.
+    Handles both Windows and Unix style paths correctly.
     Parameters
     ----------
     doc_path : str
-        Document path
+        Document path (may use Windows or Unix separators)
     Returns
     -------
     tuple[str, str]
         (filename_with_extension, extension) for grouping similar documents
     """
-    path_obj = Path(doc_path)
-    return (path_obj.name, path_obj.suffix)
+    filename, _ = _normalize_document_path(doc_path)
+    # Get extension from filename
+    if '.' in filename:
+        extension = '.' + filename.rsplit('.', 1)[1]
+    else:
+        extension = ''
+    return (filename, extension)
 def group_documents_by_name(docs: list[str]) -> dict[tuple[str, str], list[str]]:
@@ -205,7 +238,8 @@ def resolve_document(
     if override:
         matches = [
-            d for d in unique_docs if d.endswith(override) or Path(d).name == override
+            d for d in unique_docs
+            if d.endswith(override) or _normalize_document_path(d)[0] == override
         ]
         if not matches:
             raise ValueError(
@@ -220,7 +254,10 @@ def resolve_document(
     # If template_path is provided and is a file (not directory), use its extension for matching
     if template_path and template_path.is_file():
         template_ext = template_path.suffix
-        ext_matches = [d for d in unique_docs if Path(d).suffix == template_ext]
+        ext_matches = [
+            d for d in unique_docs
+            if _normalize_document_path(d)[0].endswith(template_ext)
+        ]
         if len(ext_matches) == 1:
             return ext_matches[0]
         if len(ext_matches) > 1:

{cr_proc-0.1.9.dist-info → cr_proc-0.1.11.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: cr_proc
-Version: 0.1.9
+Version: 0.1.11
 Summary: A tool for processing BYU CS code recording files.
 Author: Ethan Dye
 Author-email: mrtops03@gmail.com
@@ -28,7 +28,8 @@ poetry install
 ## Usage
-The processor can be run using the `cr_proc` command with recording file(s) and a template:
+The processor can be run using the `cr_proc` command with recording file(s) and
+a template:
 ```bash
 poetry run cr_proc <path-to-jsonl-file> <path-to-template-file>
@@ -36,7 +37,8 @@ poetry run cr_proc <path-to-jsonl-file> <path-to-template-file>
 ### Batch Processing
-You can process multiple recording files at once (e.g., for different students' submissions):
+You can process multiple recording files at once (e.g., for different students'
+submissions):
 ```bash
 # Process multiple files
@@ -47,9 +49,11 @@ poetry run cr_proc recordings/*.jsonl.gz template.py
 ```
 When processing multiple files:
 - Each recording is processed independently (for different students/documents)
 - Time calculations and verification are done separately for each file
-- A combined time report is shown at the end summarizing total editing time across all recordings
+- A combined time report is shown at the end summarizing total editing time
+  across all recordings
 - Results can be output to individual files using `--output-dir`
 ### Arguments
@@ -61,24 +65,26 @@ When processing multiple files:
 ### Options
-- `-t, --time-limit MINUTES`: (Optional) Maximum allowed time in minutes between the
-  first and last edit in the recording. Applied individually to each recording file and
-  also to the combined total in batch mode. If the elapsed time exceeds this limit, the
-  recording is flagged as suspicious.
-- `-d, --document DOCUMENT`: (Optional) Document path or filename to process from the
-  recording. Defaults to the document whose extension matches the template file.
-- `-o, --output-json OUTPUT_JSON`: (Optional) Path to output JSON file with verification
-  results (time info and suspicious events). In batch mode, creates a single JSON file
-  containing all recordings plus the combined time report.
-- `-f, --output-file OUTPUT_FILE`: (Optional) Write reconstructed code to specified file
-  instead of stdout. For single files only.
-- `--output-dir OUTPUT_DIR`: (Optional) Directory to write reconstructed code files in
-  batch mode. Files are named based on input recording filenames.
-- `-s, --show-autocomplete-details`: (Optional) Show individual auto-complete events in
-  addition to aggregate statistics.
-- `-p, --playback`: (Optional) Play back the recording in real-time, showing code evolution.
-- `--playback-speed SPEED`: (Optional) Playback speed multiplier (1.0 = real-time, 2.0 = 2x
-  speed, 0.5 = half speed).
+- `-t, --time-limit MINUTES`: (Optional) Maximum allowed time in minutes between
+  the first and last edit in the recording. Applied individually to each
+  recording file and also to the combined total in batch mode. If the elapsed
+  time exceeds this limit, the recording is flagged as suspicious.
+- `-d, --document DOCUMENT`: (Optional) Document path or filename to process
+  from the recording. Defaults to the document whose extension matches the
+  template file.
+- `-o, --output-json OUTPUT_JSON`: (Optional) Path to output JSON file with
+  verification results (time info and suspicious events). In batch mode, creates
+  a single JSON file containing all recordings plus the combined time report.
+- `-f, --output-file OUTPUT_FILE`: (Optional) Write reconstructed code to
+  specified file instead of stdout. For single files only.
+- `--output-dir OUTPUT_DIR`: (Optional) Directory to write reconstructed code
+  files in batch mode. Files are named based on input recording filenames.
+- `-s, --show-autocomplete-details`: (Optional) Show individual auto-complete
+  events in addition to aggregate statistics.
+- `-p, --playback`: (Optional) Play back the recording in real-time, showing
+  code evolution.
+- `--playback-speed SPEED`: (Optional) Playback speed multiplier (1.0 =
+  real-time, 2.0 = 2x speed, 0.5 = half speed).
 ### Examples
@@ -106,7 +112,8 @@ Save JSON results:
 poetry run cr_proc student1.jsonl.gz student2.jsonl.gz template.py -o results/
 ```
-This will process each recording independently and flag any that exceed 30 minutes.
+This will process each recording independently and flag any that exceed 30
+minutes.
 The processor will:
@@ -118,8 +125,9 @@ The processor will:
 ### Output
-Reconstructed code files are written to disk using `-f/--output-file` (single file)
-or `--output-dir` (batch mode). The processor does not output reconstructed code to stdout.
+Reconstructed code files are written to disk using `-f/--output-file` (single
+file) or `--output-dir` (batch mode). The processor does not output
+reconstructed code to stdout.
 Verification information, warnings, and errors are printed to stderr, including:
@@ -133,8 +141,8 @@ Verification information, warnings, and errors are printed to stderr, including:
 ### Suspicious Activity Detection
-The processor automatically detects and reports three types of suspicious activity
-patterns:
+The processor automatically detects and reports three types of suspicious
+activity patterns:
 #### 1. Time Limit Exceeded
@@ -142,8 +150,8 @@ When the `--time-limit` flag is specified, the processor flags recordings where
 the elapsed time between the first and last edit exceeds the specified limit.
 This can indicate unusually long work sessions or potential external assistance.
-Each recording file is checked independently against the time limit. In batch mode,
-the combined total time is also checked against the limit.
+Each recording file is checked independently against the time limit. In batch
+mode, the combined total time is also checked against the limit.
 **Example warning (single file):**
@@ -199,12 +207,14 @@ Events #42-#44 (rapid one-line pastes (AI indicator)): 3 lines, 89 chars
 ### JSON Output Format
-The `--output-json` flag generates JSON files with verification results using a consistent format
-for both single file and batch modes, making it easier for tooling to consume.
+The `--output-json` flag generates JSON files with verification results using a
+consistent format for both single file and batch modes, making it easier for
+tooling to consume.
 #### JSON Structure
 All JSON output follows this unified format:
 - `batch_mode`: Boolean indicating if multiple files were processed
 - `total_files`: Number of files processed
 - `verified_count`: How many files passed verification
@@ -219,6 +229,7 @@ All JSON output follows this unified format:
 - `files`: Array of individual results for each recording
 **Single file example:**
 ```json
 {
   "batch_mode": false,
@@ -244,6 +255,7 @@ All JSON output follows this unified format:
 ```
 **Batch file example:**
 ```json
 {
   "batch_mode": true,

{cr_proc-0.1.9.dist-info → cr_proc-0.1.11.dist-info}/RECORD RENAMED Viewed

@@ -1,13 +1,13 @@
 code_recorder_processor/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
-code_recorder_processor/api/build.py,sha256=tljtuEFH-ZU-hSFYmlAMSY61W-DSptQo_D5-GjAasco,7951
-code_recorder_processor/api/document.py,sha256=mBvATBZs8yyCY_nDOX2qhw0Gp1mmwI3PgOAzFgHUiSY,9486
+code_recorder_processor/api/build.py,sha256=XuF8Vx9mDdRqeaxCVgYAdn4NFJzkRt4Q839m15th0Fo,9908
+code_recorder_processor/api/document.py,sha256=DOQ0H1dQJtMs2P9E2qnKgg2iKQT9msgdE9oJXl36SnY,10622
 code_recorder_processor/api/load.py,sha256=Br-USpFQJ6W8c5hjmCnunM3V0_MURKZp5Yyl1IJdahc,5514
 code_recorder_processor/api/output.py,sha256=H2SC3pQ0C9V8YyN4yeA_KmvSoWXy_3T3TKWKhywIax4,2161
 code_recorder_processor/api/verify.py,sha256=9GpeoFQIiTzZd-DNSyN5OUM6YB5iMslO85oAjc0yoSU,34073
 code_recorder_processor/cli.py,sha256=ardcM3bLNhf6abOQ1Aj746x4hp8gerdklfDwszLlYKc,20504
 code_recorder_processor/display.py,sha256=IVTNFB3Vjzpc5ZHceAFQI2-o-N6bvjYmotLDaEy0KoU,7368
 code_recorder_processor/playback.py,sha256=6-OJtQOHKgfutxUNBMunWl-VVSIB0zUDENSl0EsPCh4,4008
-cr_proc-0.1.9.dist-info/METADATA,sha256=3yqgqvpe1juNoinP6Xn59UiowZen06mgFTh1eG2ZC8M,8915
-cr_proc-0.1.9.dist-info/WHEEL,sha256=3ny-bZhpXrU6vSQ1UPG34FoxZBp3lVcvK0LkgUz6VLk,88
-cr_proc-0.1.9.dist-info/entry_points.txt,sha256=xb5dPAAWN1Z9NUHpvZgNakaslR1MVOERf_IfpG_M04M,77
-cr_proc-0.1.9.dist-info/RECORD,,
+cr_proc-0.1.11.dist-info/METADATA,sha256=wZuAW9ghrjT2fCbiI9bJSy5TPLc4YD6OpYb0mTlyOL4,8926
+cr_proc-0.1.11.dist-info/WHEEL,sha256=3ny-bZhpXrU6vSQ1UPG34FoxZBp3lVcvK0LkgUz6VLk,88
+cr_proc-0.1.11.dist-info/entry_points.txt,sha256=xb5dPAAWN1Z9NUHpvZgNakaslR1MVOERf_IfpG_M04M,77
+cr_proc-0.1.11.dist-info/RECORD,,

{cr_proc-0.1.9.dist-info → cr_proc-0.1.11.dist-info}/WHEEL RENAMED Viewed

File without changes

{cr_proc-0.1.9.dist-info → cr_proc-0.1.11.dist-info}/entry_points.txt RENAMED Viewed

File without changes

cr-proc 0.1.9__py3-none-any.whl → 0.1.11__py3-none-any.whl

cr-proc 0.1.9py3-none-any.whl → 0.1.11py3-none-any.whl