PyPI - cr-proc - Versions diffs - 0.1.6__tar.gz → 0.1.8__tar.gz - Mend

cr-proc 0.1.6tar.gz → 0.1.8tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (8) hide show

{cr_proc-0.1.6 → cr_proc-0.1.8}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: cr_proc
-Version: 0.1.6
+Version: 0.1.8
 Summary: A tool for processing BYU CS code recording files.
 Author: Ethan Dye
 Author-email: mrtops03@gmail.com

{cr_proc-0.1.6 → cr_proc-0.1.8}/pyproject.toml RENAMED Viewed

@@ -1,6 +1,6 @@
 [project]
 name = "cr_proc"
-version = "0.1.6"
+version = "0.1.8"
 description = "A tool for processing BYU CS code recording files."
 authors = [
     {name = "Ethan Dye",email = "mrtops03@gmail.com"}

{cr_proc-0.1.6 → cr_proc-0.1.8}/src/code_recorder_processor/api/verify.py RENAMED Viewed

@@ -2,6 +2,13 @@ from typing import Any
 from datetime import datetime
 import difflib
+# ============================================================================
+# Constants for detection thresholds
+# ============================================================================
+MIN_WHITELIST_SIZE = 10  # Minimum fragment size to add to whitelist
+MIN_MULTILINE_SIZE = 20  # Minimum size for multiline external paste detection
+MIN_AUTOCOMPLETE_SIZE = 10  # Minimum size for autocomplete detection
+MIN_RAPID_PASTE_CHARS = 5  # Minimum chars for a "paste" in rapid detection
 def _normalize_newlines(text: str) -> str:
     """Normalize CRLF to LF to avoid offset and diff noise."""
@@ -152,13 +159,13 @@ def _build_document_states(jsonData: tuple[dict[str, Any], ...]) -> tuple[list[s
         # Build whitelist of all content fragments seen
         # Add both old and new fragments to whitelist for comprehensive coverage
-        if len(old_frag) > 10:  # Ignore tiny fragments
+        if len(old_frag) > MIN_WHITELIST_SIZE:
             content_whitelist.add(old_frag)
-        if len(new_frag) > 10:
+        if len(new_frag) > MIN_WHITELIST_SIZE:
             content_whitelist.add(new_frag)
         # Also add the full document state to whitelist
-        if len(current_state) > 10:
+        if len(current_state) > MIN_WHITELIST_SIZE:
             content_whitelist.add(current_state)
     return document_states, content_whitelist
@@ -191,65 +198,74 @@ def _detect_multiline_external_pastes(
     """
     suspicious_events = []
+    # Build whitelist incrementally to only include content from BEFORE each event
+    past_whitelist = set()
     for idx, event in enumerate(jsonData):
         old_frag = _normalize_newlines(event.get("oldFragment", ""))
         new_frag = _normalize_newlines(event.get("newFragment", ""))
         # Skip if no actual change
         if new_frag == old_frag or new_frag.strip() == "":
-            continue
+            pass  # Still add to whitelist below
         # Only check multi-line content (more than 2 lines means at least 2 actual lines)
-        new_lines = new_frag.split("\n")
-        if len(new_lines) <= 2:  # Single line or line + empty
-            continue
-        # Check if the new content already existed in the document at any prior point
-        is_internal_copy = False
-        # Check against document state BEFORE this event
-        if idx > 0:
-            prior_state = document_states[idx - 1]
-            if new_frag in prior_state:
-                is_internal_copy = True
+        elif len(new_frag.split("\n")) > 2:
+            new_lines = new_frag.split("\n")
-        # Also check against whitelist of all content seen
-        if not is_internal_copy:
-            for hist_content in content_whitelist:
-                # Ignore tiny fragments
-                if len(hist_content) < 20:
-                    continue
-                # Require substantial overlap in size to count as an internal copy
-                similar_length = (
-                    len(hist_content) >= 0.8 * len(new_frag)
-                    and len(hist_content) <= 1.25 * len(new_frag)
-                )
-                if new_frag == hist_content:
-                    is_internal_copy = True
-                    break
+            # Check if the new content already existed in the document at any prior point
+            is_internal_copy = False
-                if new_frag in hist_content and similar_length:
+            # Check against document state BEFORE this event
+            if idx > 0:
+                prior_state = document_states[idx - 1]
+                if new_frag in prior_state:
                     is_internal_copy = True
-                    break
-                if hist_content in new_frag and similar_length:
-                    is_internal_copy = True
-                    break
+            # Also check against whitelist of content from BEFORE this event
+            if not is_internal_copy:
+                for hist_content in past_whitelist:
+                    # Ignore tiny fragments - multiline external pastes should be significant
+                    if len(hist_content) < MIN_MULTILINE_SIZE:
+                        continue
+                    # Require substantial overlap in size to count as an internal copy
+                    similar_length = (
+                        len(hist_content) >= 0.8 * len(new_frag)
+                        and len(hist_content) <= 1.25 * len(new_frag)
+                    )
+                    if new_frag == hist_content:
+                        is_internal_copy = True
+                        break
+                    if new_frag in hist_content and similar_length:
+                        is_internal_copy = True
+                        break
+                    if hist_content in new_frag and similar_length:
+                        is_internal_copy = True
+                        break
+            # Also check if it's in the old fragment (internal move/copy)
+            if not is_internal_copy and old_frag and (new_frag in old_frag or old_frag in new_frag):
+                is_internal_copy = True
-        # Also check if it's in the old fragment (internal move/copy)
-        if not is_internal_copy and old_frag and (new_frag in old_frag or old_frag in new_frag):
-            is_internal_copy = True
+            if not is_internal_copy:
+                suspicious_events.append({
+                    "event_index": idx,
+                    "line_count": len(new_lines),
+                    "char_count": len(new_frag),
+                    "reason": "multi-line external paste",
+                    "newFragment": new_frag
+                })
-        if not is_internal_copy:
-            suspicious_events.append({
-                "event_index": idx,
-                "line_count": len(new_lines),
-                "char_count": len(new_frag),
-                "reason": "multi-line external paste",
-                "newFragment": new_frag
-            })
+        # Add current event's content to whitelist for future events
+        if len(old_frag) > MIN_MULTILINE_SIZE:
+            past_whitelist.add(old_frag)
+        if len(new_frag) > MIN_MULTILINE_SIZE:
+            past_whitelist.add(new_frag)
+        if idx > 0 and len(document_states[idx - 1]) > MIN_MULTILINE_SIZE:
+            past_whitelist.add(document_states[idx - 1])
     return suspicious_events
@@ -281,7 +297,7 @@ def _detect_rapid_paste_sequences(jsonData: tuple[dict[str, Any], ...]) -> list[
         new_lines = new_frag.split("\n")
         if len(new_lines) == 2:
             # Heuristic: if it's more than a few characters, it might be pasted
-            if len(new_frag.strip()) > 5:
+            if len(new_frag.strip()) > MIN_RAPID_PASTE_CHARS:
                 one_line_pastes.append({
                     "event_index": idx,
                     "timestamp": timestamp,
@@ -348,16 +364,22 @@ def _detect_fullline_autocomplete(
     excluded_indices: set[int]
 ) -> list[dict[str, Any]]:
     """
-    Detect full-line auto-complete events where the IDE/AI completes code.
+    Detect multi-line auto-complete events where the IDE/AI generates multiple complete lines.
+    Focuses on significant AI assistance where the system generates entire functions or blocks
+    (2+ lines) in a single completion event. This is distinct from basic IDE autocomplete
+    (e.g., finishing a function name).
     At keystroke level, events show:
     - Normal typing: oldFragment="" (empty), newFragment="X" (1 char)
-    - Auto-complete: oldFragment="" (empty), newFragment="long_text" (10+ chars)
+    - Basic autocomplete: oldFragment="" (empty), newFragment="function_name" (IDE suggests identifier)
+    - Full-line AI completion: oldFragment="" (empty), newFragment="def foo():\n    pass" (entire function)
-    Auto-complete is detected when:
+    Full-line auto-complete is detected when:
     - oldFragment is empty or very short (0-3 chars)
-    - newFragment is substantial (10+ characters)
-    - newFragment contains code structure (assignment, parens, brackets, etc.)
+    - newFragment generates 2+ complete lines
+    - newFragment contains complete statements (not just identifiers)
+    - Content represents meaningful code structure
     - newFragment does NOT already exist in the document state
     - Event not already flagged as external copy-paste
@@ -375,13 +397,17 @@ def _detect_fullline_autocomplete(
     Returns
     -------
     list[dict[str, Any]]
-        List of suspected auto-complete events.
+        List of suspected multi-line auto-complete events.
     """
     suspicious_events = []
+    # Build whitelist incrementally to only include content from BEFORE each event
+    past_whitelist = set()
     for idx, event in enumerate(jsonData):
         # Skip if already flagged by another detector
         if idx in excluded_indices:
+            past_whitelist_update(idx, event, document_states, past_whitelist)
             continue
         old_frag = _normalize_newlines(event.get("oldFragment", ""))
@@ -389,71 +415,143 @@ def _detect_fullline_autocomplete(
         # Skip first event (template) and no-change events
         if idx == 0 or new_frag == old_frag:
+            past_whitelist_update(idx, event, document_states, past_whitelist)
             continue
         old_len = len(old_frag)
         new_len = len(new_frag)
         # At keystroke level, oldFragment is typically empty for insertions
-        # Allow up to 3 chars for prefix-based autocomplete triggers
+        # Allow up to 3 chars for prefix-based triggers (e.g., "de" -> "def")
         if old_len > 3:
+            past_whitelist_update(idx, event, document_states, past_whitelist)
             continue
-        # Skip single-character additions (normal typing)
-        # Auto-complete typically adds 10+ characters at once
-        if new_len < 10:
-            continue
+        # Check line count - we care about complete statements
+        # Multi-line is obviously concerning, but single-line with a complete statement
+        # (like "if x: return True") is also suspicious if it came from autocomplete
+        new_lines = [n for n in new_frag.split("\n") if n.strip() != ""]
-        # Skip large multi-line pastes - those should be caught by multi-line paste detector
-        # Auto-complete is typically 1-2 lines and under 100 chars
-        # Anything larger is likely external copy-paste, not auto-complete
-        new_lines = new_frag.split("\n")
-        if len(new_lines) > 2 or new_len > 100:
+        # For single-line completions, be more strict about what we flag
+        # We only flag if it's a complete statement with keywords, not just identifier completion
+        is_single_line = len(new_lines) <= 2  # 2 elements = 1 line + trailing \n
+        is_multi_line = len(new_lines) >= 3   # 3+ elements = 2+ actual lines
+        if not (is_single_line or is_multi_line):
+            # Shouldn't happen, but skip if malformed
+            past_whitelist_update(idx, event, document_states, past_whitelist)
             continue
         # The new fragment should not be just whitespace
         if not new_frag.strip():
+            past_whitelist_update(idx, event, document_states, past_whitelist)
             continue
         # Check if the new fragment contains code structure indicators
-        # These strongly suggest IDE/AI auto-completion of code
-        code_indicators = [
-            "=",  # Assignment (most common in autocomplete)
-            "(",  # Function call/definition
-            ")",  # Closing paren
-            ":",  # Block statement (if, for, def, etc.)
-            "{",  # Dictionary/block
-            "}",  # Closing brace
-            "[",  # List/index
-            "]",  # Closing bracket
-            "=>", # Arrow function
-            ";",  # Statement end
+        # These strongly suggest IDE/AI auto-completion of actual code (not just identifiers)
+        complete_statement_indicators = [
+            ":",      # Block statement (if:, for:, def:, class:, while:, with:, etc.)
+            "return", # Return statement
+            "def ",   # Function definition
+            "class ", # Class definition
+            "if ",    # If statement
+            "for ",   # For loop
+            "while ", # While loop
+            "try:",   # Try block
+            "except", # Exception handling
+            "import ", # Import statement
+            "=",      # Assignment
         ]
-        has_code_structure = any(indicator in new_frag for indicator in code_indicators)
+        has_complete_statement = any(indicator in new_frag for indicator in complete_statement_indicators)
-        # Must have code structure to be considered auto-complete
-        if has_code_structure:
-            # Check if this content already existed in the document state BEFORE this event
-            is_internal_copy = False
+        if not has_complete_statement:
+            # No complete statement - skip basic identifier completion
+            past_whitelist_update(idx, event, document_states, past_whitelist)
+            continue
-            if idx > 0:
-                prior_state = document_states[idx - 1]
-                if new_frag in prior_state:
+        # Minimum size for meaningful completion
+        if new_len < MIN_AUTOCOMPLETE_SIZE:
+            past_whitelist_update(idx, event, document_states, past_whitelist)
+            continue
+        # For multi-line: maximum size to distinguish from external pastes
+        # External pastes are typically much larger (100+ chars)
+        # Multi-line completions are usually 20-300 chars for a small function/block
+        if is_multi_line and new_len > 300:
+            past_whitelist_update(idx, event, document_states, past_whitelist)
+            continue
+        # For single-line: could be larger due to chained methods or long statements
+        # but cap at 200 chars to avoid flagging user-typed long lines
+        if is_single_line and new_len > 200:
+            past_whitelist_update(idx, event, document_states, past_whitelist)
+            continue
+        # Check if this content already existed in the document state BEFORE this event
+        is_internal_copy = False
+        if idx > 0:
+            prior_state = document_states[idx - 1]
+            if new_frag in prior_state:
+                is_internal_copy = True
+        # Also check against whitelist of content from BEFORE this event
+        if not is_internal_copy:
+            for hist_content in past_whitelist:
+                # Ignore tiny fragments
+                if len(hist_content) < MIN_AUTOCOMPLETE_SIZE:
+                    continue
+                # Check for exact match or significant overlap
+                if new_frag == hist_content:
                     is_internal_copy = True
+                    break
-            if not is_internal_copy:
-                suspicious_events.append({
-                    "event_index": idx,
-                    "line_count": len(new_lines),
-                    "char_count": new_len,
-                    "reason": "full-line auto-complete",
-                    "newFragment": new_frag,
-                })
+                # Check for substring matches with similar length
+                similar_length = (
+                    len(hist_content) >= 0.8 * len(new_frag)
+                    and len(hist_content) <= 1.25 * len(new_frag)
+                )
+                if (new_frag in hist_content or hist_content in new_frag) and similar_length:
+                    is_internal_copy = True
+                    break
+        if not is_internal_copy:
+            line_desc = "line" if is_single_line else "lines"
+            suspicious_events.append({
+                "event_index": idx,
+                "line_count": len(new_lines),
+                "char_count": new_len,
+                "reason": f"complete statement auto-complete (AI assistance)",
+                "newFragment": new_frag,
+            })
+        # Add current event's content to whitelist for future events
+        past_whitelist_update(idx, event, document_states, past_whitelist)
     return suspicious_events
+def past_whitelist_update(
+    idx: int,
+    event: dict[str, Any],
+    document_states: list[str],
+    past_whitelist: set[str]
+) -> None:
+    """Helper to update the past_whitelist with content from current event."""
+    old_frag = _normalize_newlines(event.get("oldFragment", ""))
+    new_frag = _normalize_newlines(event.get("newFragment", ""))
+    if len(old_frag) > MIN_AUTOCOMPLETE_SIZE:
+        past_whitelist.add(old_frag)
+    if len(new_frag) > MIN_AUTOCOMPLETE_SIZE:
+        past_whitelist.add(new_frag)
+    if idx < len(document_states) and len(document_states[idx]) > MIN_AUTOCOMPLETE_SIZE:
+        past_whitelist.add(document_states[idx])
 def detect_external_copypaste(jsonData: tuple[dict[str, Any], ...]) -> list[dict[str, Any]]:
     """
     Detect copy-paste events from external sources and AI-assisted coding patterns.

{cr_proc-0.1.6 → cr_proc-0.1.8}/src/code_recorder_processor/cli.py RENAMED Viewed

@@ -1,6 +1,8 @@
 import argparse
 import json
+import os
 import sys
+import time
 from datetime import datetime
 from pathlib import Path
 from typing import Any
@@ -268,6 +270,8 @@ def write_json_output(
     document: str,
     time_info: dict[str, Any] | None,
     suspicious_events: list[dict[str, Any]],
+    reconstructed_code: str,
+    verified: bool,
 ) -> None:
     """
     Write verification results to JSON file.
@@ -282,6 +286,10 @@ def write_json_output(
         Time information from verification
     suspicious_events : list[dict[str, Any]]
         List of suspicious events detected
+    reconstructed_code : str
+        The reconstructed file content
+    verified : bool
+        Whether the file passed verification
     Raises
     ------
@@ -290,8 +298,10 @@ def write_json_output(
     """
     results = {
         "document": document,
+        "verified": verified,
         "time_info": time_info,
         "suspicious_events": suspicious_events,
+        "reconstructed_code": reconstructed_code,
     }
     output_path.parent.mkdir(parents=True, exist_ok=True)
@@ -300,6 +310,110 @@ def write_json_output(
     print(f"Results written to {output_path}", file=sys.stderr)
+def playback_recording(
+    json_data: tuple[dict[str, Any], ...],
+    document: str,
+    template: str,
+    speed: float = 1.0,
+) -> None:
+    """
+    Play back a recording, showing the code evolving in real-time.
+    Parameters
+    ----------
+    json_data : tuple[dict[str, Any], ...]
+        The recording events
+    document : str
+        The document to play back
+    template : str
+        The initial template content
+    speed : float
+        Playback speed multiplier (1.0 = real-time, 2.0 = 2x speed, 0.5 = half speed)
+    """
+    # Filter events for the target document
+    doc_events = [e for e in json_data if e.get("document") == document]
+    if not doc_events:
+        print(f"No events found for document: {document}", file=sys.stderr)
+        return
+    # Start with template
+    current_content = template
+    last_timestamp = None
+    def clear_screen():
+        """Clear the terminal screen."""
+        os.system('cls' if os.name == 'nt' else 'clear')
+    def parse_timestamp(ts_str: str) -> datetime:
+        """Parse ISO timestamp string."""
+        return datetime.fromisoformat(ts_str.replace("Z", "+00:00"))
+    # Show initial template
+    clear_screen()
+    print(f"=" * 80)
+    print(f"PLAYBACK: {document} (Speed: {speed}x)")
+    print(f"Event 0 / {len(doc_events)} - Initial Template")
+    print(f"=" * 80)
+    print(current_content)
+    print(f"\n{'=' * 80}")
+    print("Press Ctrl+C to stop playback")
+    time.sleep(2.0 / speed)
+    try:
+        for idx, event in enumerate(doc_events, 1):
+            old_frag = event.get("oldFragment", "")
+            new_frag = event.get("newFragment", "")
+            offset = event.get("offset", 0)
+            timestamp = event.get("timestamp")
+            # Calculate delay based on timestamp difference
+            if last_timestamp and timestamp:
+                try:
+                    ts1 = parse_timestamp(last_timestamp)
+                    ts2 = parse_timestamp(timestamp)
+                    delay = (ts2 - ts1).total_seconds() / speed
+                    # Cap delay at 5 seconds for very long pauses
+                    delay = min(delay, 5.0)
+                    if delay > 0:
+                        time.sleep(delay)
+                except (ValueError, KeyError):
+                    time.sleep(0.1 / speed)
+            else:
+                time.sleep(0.1 / speed)
+            last_timestamp = timestamp
+            # Apply the edit
+            if new_frag != old_frag:
+                current_content = current_content[:offset] + new_frag + current_content[offset + len(old_frag):]
+            # Display current state
+            clear_screen()
+            print(f"=" * 80)
+            print(f"PLAYBACK: {document} (Speed: {speed}x)")
+            print(f"Event {idx} / {len(doc_events)} - {timestamp or 'unknown time'}")
+            # Show what changed
+            if new_frag != old_frag:
+                change_type = "INSERT" if not old_frag else ("DELETE" if not new_frag else "REPLACE")
+                print(f"Action: {change_type} at offset {offset} ({len(new_frag)} chars)")
+            print(f"=" * 80)
+            print(current_content)
+            print(f"\n{'=' * 80}")
+            print(f"Progress: [{('#' * (idx * 40 // len(doc_events))).ljust(40)}] {idx}/{len(doc_events)}")
+            print("Press Ctrl+C to stop playback")
+    except KeyboardInterrupt:
+        print("\n\nPlayback stopped by user.", file=sys.stderr)
+        return
+    # Final summary
+    print("\n\nPlayback complete!", file=sys.stderr)
+    print(f"Total events: {len(doc_events)}", file=sys.stderr)
 def create_parser() -> argparse.ArgumentParser:
     """
     Create and configure the argument parser.
@@ -353,6 +467,24 @@ def create_parser() -> argparse.ArgumentParser:
         help="Show individual auto-complete events in addition to "
         "aggregate statistics",
     )
+    parser.add_argument(
+        "-q",
+        "--quiet",
+        action="store_true",
+        help="Suppress output of reconstructed code to stdout",
+    )
+    parser.add_argument(
+        "-p",
+        "--playback",
+        action="store_true",
+        help="Play back the recording in real-time, showing code evolution",
+    )
+    parser.add_argument(
+        "--playback-speed",
+        type=float,
+        default=1.0,
+        help="Playback speed multiplier (1.0 = real-time, 2.0 = 2x speed, 0.5 = half speed)",
+    )
     return parser
@@ -388,6 +520,21 @@ def main() -> int:
         print(f"Error determining document: {e}", file=sys.stderr)
         return 1
+    # Handle playback mode
+    if args.playback:
+        try:
+            template_content = args.template_file.read_text()
+        except FileNotFoundError:
+            print(f"Error: Template file not found: {args.template_file}", file=sys.stderr)
+            return 1
+        if target_document:
+            playback_recording(json_data, target_document, template_content, args.playback_speed)
+            return 0
+        else:
+            print("Error: No documents found in recording", file=sys.stderr)
+            return 1
     # Filter events for target document
     doc_events = filter_events_by_document(json_data, target_document)
     if target_document and not doc_events:
@@ -416,29 +563,21 @@ def main() -> int:
     display_time_info(time_info)
     # Verify and process the recording
+    verified = False
+    reconstructed = ""
+    suspicious_events = []
     try:
         template_data, suspicious_events = verify(template_data, doc_events)
         reconstructed = reconstruct_file_from_events(
             doc_events, template_data, document_path=target_document
         )
-        print(reconstructed)
+        verified = True
+        if not args.quiet:
+            print(reconstructed)
         # Display suspicious events
         display_suspicious_events(suspicious_events, args.show_autocomplete_details)
-        # Write JSON output if requested
-        if args.output_json:
-            try:
-                write_json_output(
-                    args.output_json,
-                    target_document or str(args.template_file),
-                    time_info,
-                    suspicious_events,
-                )
-            except Exception as e:
-                print(f"Error writing JSON output: {e}", file=sys.stderr)
-                return 1
     except ValueError as e:
         print("File failed verification from template!", file=sys.stderr)
         print(str(e), file=sys.stderr)
@@ -446,12 +585,27 @@ def main() -> int:
             print(template_diff(template_data, doc_events), file=sys.stderr)
         except Exception:
             pass
-        return 1
+        verified = False
     except Exception as e:
         print(f"Error processing file: {type(e).__name__}: {e}", file=sys.stderr)
-        return 1
+        verified = False
+    # Write JSON output to file if requested
+    if args.output_json:
+        try:
+            write_json_output(
+                args.output_json,
+                target_document or str(args.template_file),
+                time_info,
+                suspicious_events,
+                reconstructed,
+                verified,
+            )
+        except Exception as e:
+            print(f"Error writing JSON output: {e}", file=sys.stderr)
+            return 1
-    return 0
+    return 0 if verified else 1
 if __name__ == "__main__":

{cr_proc-0.1.6 → cr_proc-0.1.8}/README.md RENAMED Viewed

File without changes

{cr_proc-0.1.6 → cr_proc-0.1.8}/src/code_recorder_processor/__init__.py RENAMED Viewed

File without changes

{cr_proc-0.1.6 → cr_proc-0.1.8}/src/code_recorder_processor/api/build.py RENAMED Viewed

File without changes

{cr_proc-0.1.6 → cr_proc-0.1.8}/src/code_recorder_processor/api/load.py RENAMED Viewed

File without changes

cr-proc 0.1.6__tar.gz → 0.1.8__tar.gz

cr-proc 0.1.6tar.gz → 0.1.8tar.gz