PyPI - cr-proc - Versions diffs - 0.1.7__tar.gz → 0.1.8__tar.gz - Mend

cr-proc 0.1.7tar.gz → 0.1.8tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (8) hide show

{cr_proc-0.1.7 → cr_proc-0.1.8}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: cr_proc
-Version: 0.1.7
+Version: 0.1.8
 Summary: A tool for processing BYU CS code recording files.
 Author: Ethan Dye
 Author-email: mrtops03@gmail.com

{cr_proc-0.1.7 → cr_proc-0.1.8}/pyproject.toml RENAMED Viewed

@@ -1,6 +1,6 @@
 [project]
 name = "cr_proc"
-version = "0.1.7"
+version = "0.1.8"
 description = "A tool for processing BYU CS code recording files."
 authors = [
     {name = "Ethan Dye",email = "mrtops03@gmail.com"}

{cr_proc-0.1.7 → cr_proc-0.1.8}/src/code_recorder_processor/api/verify.py RENAMED Viewed

@@ -2,6 +2,13 @@ from typing import Any
 from datetime import datetime
 import difflib
+# ============================================================================
+# Constants for detection thresholds
+# ============================================================================
+MIN_WHITELIST_SIZE = 10  # Minimum fragment size to add to whitelist
+MIN_MULTILINE_SIZE = 20  # Minimum size for multiline external paste detection
+MIN_AUTOCOMPLETE_SIZE = 10  # Minimum size for autocomplete detection
+MIN_RAPID_PASTE_CHARS = 5  # Minimum chars for a "paste" in rapid detection
 def _normalize_newlines(text: str) -> str:
     """Normalize CRLF to LF to avoid offset and diff noise."""
@@ -152,13 +159,13 @@ def _build_document_states(jsonData: tuple[dict[str, Any], ...]) -> tuple[list[s
         # Build whitelist of all content fragments seen
         # Add both old and new fragments to whitelist for comprehensive coverage
-        if len(old_frag) > 10:  # Ignore tiny fragments
+        if len(old_frag) > MIN_WHITELIST_SIZE:
             content_whitelist.add(old_frag)
-        if len(new_frag) > 10:
+        if len(new_frag) > MIN_WHITELIST_SIZE:
             content_whitelist.add(new_frag)
         # Also add the full document state to whitelist
-        if len(current_state) > 10:
+        if len(current_state) > MIN_WHITELIST_SIZE:
             content_whitelist.add(current_state)
     return document_states, content_whitelist
@@ -191,65 +198,74 @@ def _detect_multiline_external_pastes(
     """
     suspicious_events = []
+    # Build whitelist incrementally to only include content from BEFORE each event
+    past_whitelist = set()
     for idx, event in enumerate(jsonData):
         old_frag = _normalize_newlines(event.get("oldFragment", ""))
         new_frag = _normalize_newlines(event.get("newFragment", ""))
         # Skip if no actual change
         if new_frag == old_frag or new_frag.strip() == "":
-            continue
+            pass  # Still add to whitelist below
         # Only check multi-line content (more than 2 lines means at least 2 actual lines)
-        new_lines = new_frag.split("\n")
-        if len(new_lines) <= 2:  # Single line or line + empty
-            continue
-        # Check if the new content already existed in the document at any prior point
-        is_internal_copy = False
-        # Check against document state BEFORE this event
-        if idx > 0:
-            prior_state = document_states[idx - 1]
-            if new_frag in prior_state:
-                is_internal_copy = True
-        # Also check against whitelist of all content seen
-        if not is_internal_copy:
-            for hist_content in content_whitelist:
-                # Ignore tiny fragments
-                if len(hist_content) < 20:
-                    continue
-                # Require substantial overlap in size to count as an internal copy
-                similar_length = (
-                    len(hist_content) >= 0.8 * len(new_frag)
-                    and len(hist_content) <= 1.25 * len(new_frag)
-                )
+        elif len(new_frag.split("\n")) > 2:
+            new_lines = new_frag.split("\n")
-                if new_frag == hist_content:
-                    is_internal_copy = True
-                    break
+            # Check if the new content already existed in the document at any prior point
+            is_internal_copy = False
-                if new_frag in hist_content and similar_length:
+            # Check against document state BEFORE this event
+            if idx > 0:
+                prior_state = document_states[idx - 1]
+                if new_frag in prior_state:
                     is_internal_copy = True
-                    break
-                if hist_content in new_frag and similar_length:
-                    is_internal_copy = True
-                    break
+            # Also check against whitelist of content from BEFORE this event
+            if not is_internal_copy:
+                for hist_content in past_whitelist:
+                    # Ignore tiny fragments - multiline external pastes should be significant
+                    if len(hist_content) < MIN_MULTILINE_SIZE:
+                        continue
+                    # Require substantial overlap in size to count as an internal copy
+                    similar_length = (
+                        len(hist_content) >= 0.8 * len(new_frag)
+                        and len(hist_content) <= 1.25 * len(new_frag)
+                    )
+                    if new_frag == hist_content:
+                        is_internal_copy = True
+                        break
+                    if new_frag in hist_content and similar_length:
+                        is_internal_copy = True
+                        break
+                    if hist_content in new_frag and similar_length:
+                        is_internal_copy = True
+                        break
+            # Also check if it's in the old fragment (internal move/copy)
+            if not is_internal_copy and old_frag and (new_frag in old_frag or old_frag in new_frag):
+                is_internal_copy = True
-        # Also check if it's in the old fragment (internal move/copy)
-        if not is_internal_copy and old_frag and (new_frag in old_frag or old_frag in new_frag):
-            is_internal_copy = True
+            if not is_internal_copy:
+                suspicious_events.append({
+                    "event_index": idx,
+                    "line_count": len(new_lines),
+                    "char_count": len(new_frag),
+                    "reason": "multi-line external paste",
+                    "newFragment": new_frag
+                })
-        if not is_internal_copy:
-            suspicious_events.append({
-                "event_index": idx,
-                "line_count": len(new_lines),
-                "char_count": len(new_frag),
-                "reason": "multi-line external paste",
-                "newFragment": new_frag
-            })
+        # Add current event's content to whitelist for future events
+        if len(old_frag) > MIN_MULTILINE_SIZE:
+            past_whitelist.add(old_frag)
+        if len(new_frag) > MIN_MULTILINE_SIZE:
+            past_whitelist.add(new_frag)
+        if idx > 0 and len(document_states[idx - 1]) > MIN_MULTILINE_SIZE:
+            past_whitelist.add(document_states[idx - 1])
     return suspicious_events
@@ -281,7 +297,7 @@ def _detect_rapid_paste_sequences(jsonData: tuple[dict[str, Any], ...]) -> list[
         new_lines = new_frag.split("\n")
         if len(new_lines) == 2:
             # Heuristic: if it's more than a few characters, it might be pasted
-            if len(new_frag.strip()) > 5:
+            if len(new_frag.strip()) > MIN_RAPID_PASTE_CHARS:
                 one_line_pastes.append({
                     "event_index": idx,
                     "timestamp": timestamp,
@@ -385,9 +401,13 @@ def _detect_fullline_autocomplete(
     """
     suspicious_events = []
+    # Build whitelist incrementally to only include content from BEFORE each event
+    past_whitelist = set()
     for idx, event in enumerate(jsonData):
         # Skip if already flagged by another detector
         if idx in excluded_indices:
+            past_whitelist_update(idx, event, document_states, past_whitelist)
             continue
         old_frag = _normalize_newlines(event.get("oldFragment", ""))
@@ -395,6 +415,7 @@ def _detect_fullline_autocomplete(
         # Skip first event (template) and no-change events
         if idx == 0 or new_frag == old_frag:
+            past_whitelist_update(idx, event, document_states, past_whitelist)
             continue
         old_len = len(old_frag)
@@ -403,6 +424,7 @@ def _detect_fullline_autocomplete(
         # At keystroke level, oldFragment is typically empty for insertions
         # Allow up to 3 chars for prefix-based triggers (e.g., "de" -> "def")
         if old_len > 3:
+            past_whitelist_update(idx, event, document_states, past_whitelist)
             continue
         # Check line count - we care about complete statements
@@ -417,10 +439,12 @@ def _detect_fullline_autocomplete(
         if not (is_single_line or is_multi_line):
             # Shouldn't happen, but skip if malformed
+            past_whitelist_update(idx, event, document_states, past_whitelist)
             continue
         # The new fragment should not be just whitespace
         if not new_frag.strip():
+            past_whitelist_update(idx, event, document_states, past_whitelist)
             continue
         # Check if the new fragment contains code structure indicators
@@ -443,21 +467,25 @@ def _detect_fullline_autocomplete(
         if not has_complete_statement:
             # No complete statement - skip basic identifier completion
+            past_whitelist_update(idx, event, document_states, past_whitelist)
             continue
         # Minimum size for meaningful completion
-        if new_len < 10:
+        if new_len < MIN_AUTOCOMPLETE_SIZE:
+            past_whitelist_update(idx, event, document_states, past_whitelist)
             continue
         # For multi-line: maximum size to distinguish from external pastes
         # External pastes are typically much larger (100+ chars)
         # Multi-line completions are usually 20-300 chars for a small function/block
         if is_multi_line and new_len > 300:
+            past_whitelist_update(idx, event, document_states, past_whitelist)
             continue
         # For single-line: could be larger due to chained methods or long statements
         # but cap at 200 chars to avoid flagging user-typed long lines
         if is_single_line and new_len > 200:
+            past_whitelist_update(idx, event, document_states, past_whitelist)
             continue
         # Check if this content already existed in the document state BEFORE this event
@@ -468,6 +496,28 @@ def _detect_fullline_autocomplete(
             if new_frag in prior_state:
                 is_internal_copy = True
+        # Also check against whitelist of content from BEFORE this event
+        if not is_internal_copy:
+            for hist_content in past_whitelist:
+                # Ignore tiny fragments
+                if len(hist_content) < MIN_AUTOCOMPLETE_SIZE:
+                    continue
+                # Check for exact match or significant overlap
+                if new_frag == hist_content:
+                    is_internal_copy = True
+                    break
+                # Check for substring matches with similar length
+                similar_length = (
+                    len(hist_content) >= 0.8 * len(new_frag)
+                    and len(hist_content) <= 1.25 * len(new_frag)
+                )
+                if (new_frag in hist_content or hist_content in new_frag) and similar_length:
+                    is_internal_copy = True
+                    break
         if not is_internal_copy:
             line_desc = "line" if is_single_line else "lines"
             suspicious_events.append({
@@ -478,9 +528,30 @@ def _detect_fullline_autocomplete(
                 "newFragment": new_frag,
             })
+        # Add current event's content to whitelist for future events
+        past_whitelist_update(idx, event, document_states, past_whitelist)
     return suspicious_events
+def past_whitelist_update(
+    idx: int,
+    event: dict[str, Any],
+    document_states: list[str],
+    past_whitelist: set[str]
+) -> None:
+    """Helper to update the past_whitelist with content from current event."""
+    old_frag = _normalize_newlines(event.get("oldFragment", ""))
+    new_frag = _normalize_newlines(event.get("newFragment", ""))
+    if len(old_frag) > MIN_AUTOCOMPLETE_SIZE:
+        past_whitelist.add(old_frag)
+    if len(new_frag) > MIN_AUTOCOMPLETE_SIZE:
+        past_whitelist.add(new_frag)
+    if idx < len(document_states) and len(document_states[idx]) > MIN_AUTOCOMPLETE_SIZE:
+        past_whitelist.add(document_states[idx])
 def detect_external_copypaste(jsonData: tuple[dict[str, Any], ...]) -> list[dict[str, Any]]:
     """
     Detect copy-paste events from external sources and AI-assisted coding patterns.

{cr_proc-0.1.7 → cr_proc-0.1.8}/src/code_recorder_processor/cli.py RENAMED Viewed

@@ -1,6 +1,8 @@
 import argparse
 import json
+import os
 import sys
+import time
 from datetime import datetime
 from pathlib import Path
 from typing import Any
@@ -268,6 +270,8 @@ def write_json_output(
     document: str,
     time_info: dict[str, Any] | None,
     suspicious_events: list[dict[str, Any]],
+    reconstructed_code: str,
+    verified: bool,
 ) -> None:
     """
     Write verification results to JSON file.
@@ -282,6 +286,10 @@ def write_json_output(
         Time information from verification
     suspicious_events : list[dict[str, Any]]
         List of suspicious events detected
+    reconstructed_code : str
+        The reconstructed file content
+    verified : bool
+        Whether the file passed verification
     Raises
     ------
@@ -290,8 +298,10 @@ def write_json_output(
     """
     results = {
         "document": document,
+        "verified": verified,
         "time_info": time_info,
         "suspicious_events": suspicious_events,
+        "reconstructed_code": reconstructed_code,
     }
     output_path.parent.mkdir(parents=True, exist_ok=True)
@@ -300,6 +310,110 @@ def write_json_output(
     print(f"Results written to {output_path}", file=sys.stderr)
+def playback_recording(
+    json_data: tuple[dict[str, Any], ...],
+    document: str,
+    template: str,
+    speed: float = 1.0,
+) -> None:
+    """
+    Play back a recording, showing the code evolving in real-time.
+    Parameters
+    ----------
+    json_data : tuple[dict[str, Any], ...]
+        The recording events
+    document : str
+        The document to play back
+    template : str
+        The initial template content
+    speed : float
+        Playback speed multiplier (1.0 = real-time, 2.0 = 2x speed, 0.5 = half speed)
+    """
+    # Filter events for the target document
+    doc_events = [e for e in json_data if e.get("document") == document]
+    if not doc_events:
+        print(f"No events found for document: {document}", file=sys.stderr)
+        return
+    # Start with template
+    current_content = template
+    last_timestamp = None
+    def clear_screen():
+        """Clear the terminal screen."""
+        os.system('cls' if os.name == 'nt' else 'clear')
+    def parse_timestamp(ts_str: str) -> datetime:
+        """Parse ISO timestamp string."""
+        return datetime.fromisoformat(ts_str.replace("Z", "+00:00"))
+    # Show initial template
+    clear_screen()
+    print(f"=" * 80)
+    print(f"PLAYBACK: {document} (Speed: {speed}x)")
+    print(f"Event 0 / {len(doc_events)} - Initial Template")
+    print(f"=" * 80)
+    print(current_content)
+    print(f"\n{'=' * 80}")
+    print("Press Ctrl+C to stop playback")
+    time.sleep(2.0 / speed)
+    try:
+        for idx, event in enumerate(doc_events, 1):
+            old_frag = event.get("oldFragment", "")
+            new_frag = event.get("newFragment", "")
+            offset = event.get("offset", 0)
+            timestamp = event.get("timestamp")
+            # Calculate delay based on timestamp difference
+            if last_timestamp and timestamp:
+                try:
+                    ts1 = parse_timestamp(last_timestamp)
+                    ts2 = parse_timestamp(timestamp)
+                    delay = (ts2 - ts1).total_seconds() / speed
+                    # Cap delay at 5 seconds for very long pauses
+                    delay = min(delay, 5.0)
+                    if delay > 0:
+                        time.sleep(delay)
+                except (ValueError, KeyError):
+                    time.sleep(0.1 / speed)
+            else:
+                time.sleep(0.1 / speed)
+            last_timestamp = timestamp
+            # Apply the edit
+            if new_frag != old_frag:
+                current_content = current_content[:offset] + new_frag + current_content[offset + len(old_frag):]
+            # Display current state
+            clear_screen()
+            print(f"=" * 80)
+            print(f"PLAYBACK: {document} (Speed: {speed}x)")
+            print(f"Event {idx} / {len(doc_events)} - {timestamp or 'unknown time'}")
+            # Show what changed
+            if new_frag != old_frag:
+                change_type = "INSERT" if not old_frag else ("DELETE" if not new_frag else "REPLACE")
+                print(f"Action: {change_type} at offset {offset} ({len(new_frag)} chars)")
+            print(f"=" * 80)
+            print(current_content)
+            print(f"\n{'=' * 80}")
+            print(f"Progress: [{('#' * (idx * 40 // len(doc_events))).ljust(40)}] {idx}/{len(doc_events)}")
+            print("Press Ctrl+C to stop playback")
+    except KeyboardInterrupt:
+        print("\n\nPlayback stopped by user.", file=sys.stderr)
+        return
+    # Final summary
+    print("\n\nPlayback complete!", file=sys.stderr)
+    print(f"Total events: {len(doc_events)}", file=sys.stderr)
 def create_parser() -> argparse.ArgumentParser:
     """
     Create and configure the argument parser.
@@ -353,6 +467,24 @@ def create_parser() -> argparse.ArgumentParser:
         help="Show individual auto-complete events in addition to "
         "aggregate statistics",
     )
+    parser.add_argument(
+        "-q",
+        "--quiet",
+        action="store_true",
+        help="Suppress output of reconstructed code to stdout",
+    )
+    parser.add_argument(
+        "-p",
+        "--playback",
+        action="store_true",
+        help="Play back the recording in real-time, showing code evolution",
+    )
+    parser.add_argument(
+        "--playback-speed",
+        type=float,
+        default=1.0,
+        help="Playback speed multiplier (1.0 = real-time, 2.0 = 2x speed, 0.5 = half speed)",
+    )
     return parser
@@ -388,6 +520,21 @@ def main() -> int:
         print(f"Error determining document: {e}", file=sys.stderr)
         return 1
+    # Handle playback mode
+    if args.playback:
+        try:
+            template_content = args.template_file.read_text()
+        except FileNotFoundError:
+            print(f"Error: Template file not found: {args.template_file}", file=sys.stderr)
+            return 1
+        if target_document:
+            playback_recording(json_data, target_document, template_content, args.playback_speed)
+            return 0
+        else:
+            print("Error: No documents found in recording", file=sys.stderr)
+            return 1
     # Filter events for target document
     doc_events = filter_events_by_document(json_data, target_document)
     if target_document and not doc_events:
@@ -416,29 +563,21 @@ def main() -> int:
     display_time_info(time_info)
     # Verify and process the recording
+    verified = False
+    reconstructed = ""
+    suspicious_events = []
     try:
         template_data, suspicious_events = verify(template_data, doc_events)
         reconstructed = reconstruct_file_from_events(
             doc_events, template_data, document_path=target_document
         )
-        print(reconstructed)
+        verified = True
+        if not args.quiet:
+            print(reconstructed)
         # Display suspicious events
         display_suspicious_events(suspicious_events, args.show_autocomplete_details)
-        # Write JSON output if requested
-        if args.output_json:
-            try:
-                write_json_output(
-                    args.output_json,
-                    target_document or str(args.template_file),
-                    time_info,
-                    suspicious_events,
-                )
-            except Exception as e:
-                print(f"Error writing JSON output: {e}", file=sys.stderr)
-                return 1
     except ValueError as e:
         print("File failed verification from template!", file=sys.stderr)
         print(str(e), file=sys.stderr)
@@ -446,12 +585,27 @@ def main() -> int:
             print(template_diff(template_data, doc_events), file=sys.stderr)
         except Exception:
             pass
-        return 1
+        verified = False
     except Exception as e:
         print(f"Error processing file: {type(e).__name__}: {e}", file=sys.stderr)
-        return 1
+        verified = False
+    # Write JSON output to file if requested
+    if args.output_json:
+        try:
+            write_json_output(
+                args.output_json,
+                target_document or str(args.template_file),
+                time_info,
+                suspicious_events,
+                reconstructed,
+                verified,
+            )
+        except Exception as e:
+            print(f"Error writing JSON output: {e}", file=sys.stderr)
+            return 1
-    return 0
+    return 0 if verified else 1
 if __name__ == "__main__":

{cr_proc-0.1.7 → cr_proc-0.1.8}/README.md RENAMED Viewed

File without changes

{cr_proc-0.1.7 → cr_proc-0.1.8}/src/code_recorder_processor/__init__.py RENAMED Viewed

File without changes

{cr_proc-0.1.7 → cr_proc-0.1.8}/src/code_recorder_processor/api/build.py RENAMED Viewed

File without changes

{cr_proc-0.1.7 → cr_proc-0.1.8}/src/code_recorder_processor/api/load.py RENAMED Viewed

File without changes

cr-proc 0.1.7__tar.gz → 0.1.8__tar.gz

cr-proc 0.1.7tar.gz → 0.1.8tar.gz