PyPI - cr-proc - Versions diffs - 0.1.8__py3-none-any.whl → 0.1.10__py3-none-any.whl - Mend

cr-proc 0.1.8py3-none-any.whl → 0.1.10py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (14) hide show

code_recorder_processor/api/build.py +6 -0
code_recorder_processor/api/document.py +337 -0
code_recorder_processor/api/load.py +58 -0
code_recorder_processor/api/output.py +70 -0
code_recorder_processor/api/verify.py +171 -32
code_recorder_processor/cli.py +514 -494
code_recorder_processor/display.py +201 -0
code_recorder_processor/playback.py +116 -0
cr_proc-0.1.10.dist-info/METADATA +280 -0
cr_proc-0.1.10.dist-info/RECORD +13 -0
cr_proc-0.1.8.dist-info/METADATA +0 -142
cr_proc-0.1.8.dist-info/RECORD +0 -9
{cr_proc-0.1.8.dist-info → cr_proc-0.1.10.dist-info}/WHEEL +0 -0
{cr_proc-0.1.8.dist-info → cr_proc-0.1.10.dist-info}/entry_points.txt +0 -0

code_recorder_processor/api/verify.py CHANGED Viewed

@@ -128,22 +128,29 @@ def _build_document_states(jsonData: tuple[dict[str, Any], ...]) -> tuple[list[s
     existed in the document at each point in time. This allows detectors to
     check if pasted/autocompleted content already existed in the document.
+    Only processes edit events (type="edit" or no type field for backwards compatibility).
     Parameters
     ----------
     jsonData : tuple[dict[str, Any], ...]
-        The event data from the JSONL file
+        The event data from the JSONL file (all event types)
     Returns
     -------
     tuple[list[str], set[str]]
-        - List of document states (one per event, strings of full document content)
+        - List of document states (one per edit event, strings of full document content)
         - Set of all content fragments ever seen (whitelist for internal copy detection)
     """
+    from .load import is_edit_event
+    # Filter to only edit events
+    edit_events = [e for e in jsonData if is_edit_event(e)]
     document_states = []
     content_whitelist = set()
     current_state = ""
-    for idx, event in enumerate(jsonData):
+    for idx, event in enumerate(edit_events):
         old_frag = _normalize_newlines(event.get("oldFragment", ""))
         new_frag = _normalize_newlines(event.get("newFragment", ""))
         offset = event.get("offset", 0)
@@ -182,12 +189,14 @@ def _detect_multiline_external_pastes(
     Flags newFragments that are significant in length (more than one line)
     and do not appear to be copied from within the document itself.
+    Only processes edit events (type="edit" or no type field for backwards compatibility).
     Parameters
     ----------
     jsonData : tuple[dict[str, Any], ...]
-        The event data
+        The event data (all event types)
     document_states : list[str]
-        Full document state at each event
+        Full document state at each edit event
     content_whitelist : set[str]
         All content fragments ever seen in the document (for internal copy detection)
@@ -196,12 +205,17 @@ def _detect_multiline_external_pastes(
     list[dict[str, Any]]
         List of suspicious multi-line paste events.
     """
+    from .load import is_edit_event
+    # Filter to only edit events
+    edit_events = [e for e in jsonData if is_edit_event(e)]
     suspicious_events = []
     # Build whitelist incrementally to only include content from BEFORE each event
     past_whitelist = set()
-    for idx, event in enumerate(jsonData):
+    for idx, event in enumerate(edit_events):
         old_frag = _normalize_newlines(event.get("oldFragment", ""))
         new_frag = _normalize_newlines(event.get("newFragment", ""))
@@ -277,14 +291,21 @@ def _detect_rapid_paste_sequences(jsonData: tuple[dict[str, Any], ...]) -> list[
     Identifies clusters of 3+ one-line paste events occurring within 1 second,
     which may indicate AI-assisted code generation.
+    Only processes edit events (type="edit" or no type field for backwards compatibility).
     Returns a list of suspicious rapid-paste events.
     """
+    from .load import is_edit_event
+    # Filter to only edit events
+    edit_events = [e for e in jsonData if is_edit_event(e)]
     suspicious_events = []
     # Track one-line paste events for rapid-paste detection
     one_line_pastes = []
-    for idx, event in enumerate(jsonData):
+    for idx, event in enumerate(edit_events):
         new_frag = _normalize_newlines(event.get("newFragment", ""))
         old_frag = _normalize_newlines(event.get("oldFragment", ""))
         timestamp = event.get("timestamp")
@@ -383,12 +404,14 @@ def _detect_fullline_autocomplete(
     - newFragment does NOT already exist in the document state
     - Event not already flagged as external copy-paste
+    Only processes edit events (type="edit" or no type field for backwards compatibility).
     Parameters
     ----------
     jsonData : tuple[dict[str, Any], ...]
-        The event data
+        The event data (all event types)
     document_states : list[str]
-        Full document state at each event
+        Full document state at each edit event
     content_whitelist : set[str]
         All content fragments ever seen in the document
     excluded_indices : set[int]
@@ -399,12 +422,17 @@ def _detect_fullline_autocomplete(
     list[dict[str, Any]]
         List of suspected multi-line auto-complete events.
     """
+    from .load import is_edit_event
+    # Filter to only edit events
+    edit_events = [e for e in jsonData if is_edit_event(e)]
     suspicious_events = []
     # Build whitelist incrementally to only include content from BEFORE each event
     past_whitelist = set()
-    for idx, event in enumerate(jsonData):
+    for idx, event in enumerate(edit_events):
         # Skip if already flagged by another detector
         if idx in excluded_indices:
             past_whitelist_update(idx, event, document_states, past_whitelist)
@@ -626,13 +654,20 @@ def check_time_limit(jsonData: tuple[dict[str, Any], ...], time_limit_minutes: i
     Check if the time between first and last edit exceeds the specified time limit.
     Tracks elapsed editing time across sessions by summing actual editing time within
-    each session (excluding gaps between sessions). For the time limit check, compares
-    the span from the first timestamp to the last timestamp overall.
+    each session (excluding gaps between sessions). Focus events (type="focusStatus")
+    are used to pause time tracking when the window loses focus for extended periods.
+    Time tracking behavior:
+    - Tracks actual editing time by looking at timestamps between edit events
+    - When a focusStatus event with focused=false is encountered, time tracking pauses
+    - Time tracking resumes when a focusStatus event with focused=true is encountered
+    - Gaps > 5 minutes while unfocused are excluded from time tracking
+    - Gaps <= 5 minutes are counted even when unfocused (student thinking/reviewing)
     Parameters
     ----------
     jsonData : tuple[dict[str, Any], ...]
-        The event data from the JSONL file
+        The event data from the JSONL file (all event types)
     time_limit_minutes : int | None
         Maximum allowed time in minutes between first and last overall edit.
         If None, no time limit is enforced.
@@ -649,25 +684,34 @@ def check_time_limit(jsonData: tuple[dict[str, Any], ...], time_limit_minutes: i
     def parse_ts(ts_str: str) -> datetime:
         return datetime.fromisoformat(ts_str.replace("Z", "+00:00"))
+    # Separate edit events from focus events
+    from .load import is_edit_event
+    edit_events = [e for e in jsonData if is_edit_event(e)]
+    focus_events = [e for e in jsonData if e.get("type") == "focusStatus"]
+    if not edit_events:
+        return None
     # Identify session boundaries: sessions start at indices where offset == 0
     # (indicating file reopen/recording restart) and oldFragment == newFragment (initial snapshot)
     session_starts = [0]  # First session always starts at index 0
-    for idx in range(1, len(jsonData)):
-        offset = jsonData[idx].get("offset", -1)
-        old_frag = jsonData[idx].get("oldFragment", "")
-        new_frag = jsonData[idx].get("newFragment", "")
+    for idx in range(1, len(edit_events)):
+        offset = edit_events[idx].get("offset", -1)
+        old_frag = edit_events[idx].get("oldFragment", "")
+        new_frag = edit_events[idx].get("newFragment", "")
         # Session boundary: offset is 0 and it's an initial snapshot (old == new, non-empty)
         if offset == 0 and old_frag == new_frag and old_frag.strip() != "":
             session_starts.append(idx)
     # Add sentinel to mark end of last session
-    session_starts.append(len(jsonData))
+    session_starts.append(len(edit_events))
     # Find first and last timestamps overall
     first_timestamp_overall = None
     last_timestamp_overall = None
-    for event in jsonData:
+    for event in edit_events:
         if event.get("timestamp"):
             if first_timestamp_overall is None:
                 first_timestamp_overall = event["timestamp"]
@@ -677,34 +721,72 @@ def check_time_limit(jsonData: tuple[dict[str, Any], ...], time_limit_minutes: i
         # Not enough events with timestamps
         return None
+    # Build a focus status timeline from focus events
+    # Map timestamp -> focused (True/False)
+    focus_timeline: list[tuple[datetime, bool]] = []
+    for focus_event in focus_events:
+        if "timestamp" in focus_event and "focused" in focus_event:
+            try:
+                ts = parse_ts(focus_event["timestamp"])
+                focused = focus_event["focused"]
+                focus_timeline.append((ts, focused))
+            except (ValueError, KeyError):
+                continue
+    # Sort by timestamp
+    focus_timeline.sort(key=lambda x: x[0])
+    def is_focused_at(timestamp: datetime) -> bool:
+        """Check if the window was focused at the given timestamp."""
+        # Walk backwards through focus events to find the most recent state
+        for ts, focused in reversed(focus_timeline):
+            if ts <= timestamp:
+                return focused
+        # Default to focused if no prior focus event found
+        return True
     # Calculate elapsed time by summing editing time within each session
+    # with focus-aware gap handling
     total_minutes_elapsed = 0.0
+    UNFOCUSED_GAP_THRESHOLD_MINUTES = 5.0  # Don't count gaps > 5 min when unfocused
     for i in range(len(session_starts) - 1):
         session_start = session_starts[i]
         session_end = session_starts[i + 1]
-        # Find first and last events with timestamps in this session
-        first_event_time = None
-        last_event_time = None
+        # Collect all timestamped events in this session
+        session_events: list[tuple[datetime, int]] = []
         for idx in range(session_start, session_end):
-            event = jsonData[idx]
+            event = edit_events[idx]
             timestamp = event.get("timestamp")
             if timestamp:
                 try:
                     event_time = parse_ts(timestamp)
-                    if first_event_time is None:
-                        first_event_time = event_time
-                    last_event_time = event_time
+                    session_events.append((event_time, idx))
                 except (ValueError, KeyError):
-                    # Skip events with invalid timestamps
                     continue
-        # If this session has timestamped events, add its elapsed time
-        if first_event_time is not None and last_event_time is not None:
-            session_diff = last_event_time - first_event_time
-            total_minutes_elapsed += session_diff.total_seconds() / 60
+        if not session_events:
+            continue
+        # Sort by timestamp
+        session_events.sort(key=lambda x: x[0])
+        # Calculate time by summing gaps between consecutive events
+        for j in range(len(session_events) - 1):
+            current_time, _ = session_events[j]
+            next_time, _ = session_events[j + 1]
+            gap_seconds = (next_time - current_time).total_seconds()
+            gap_minutes = gap_seconds / 60
+            # Check focus status at the end of this gap (next_time)
+            # If unfocused and gap is large, don't count it
+            if not is_focused_at(next_time) and gap_minutes > UNFOCUSED_GAP_THRESHOLD_MINUTES:
+                # Skip this gap - student was away from editor
+                continue
+            total_minutes_elapsed += gap_minutes
     # For time limit check, use the span from first to last timestamp overall
     try:
@@ -752,3 +834,60 @@ def verify(template: str, jsonData: tuple[dict[str, Any], ...]) -> tuple[str, li
     suspicious_events = detect_external_copypaste(jsonData)
     return verified_template, suspicious_events
+def combine_time_info(
+    time_infos: list[dict[str, Any] | None], time_limit_minutes: int | None
+) -> dict[str, Any] | None:
+    """
+    Combine time information from multiple recording files.
+    Parameters
+    ----------
+    time_infos : list[dict[str, Any] | None]
+        List of time information dictionaries from multiple files
+    time_limit_minutes : int | None
+        Time limit to check against
+    Returns
+    -------
+    dict[str, Any] | None
+        Combined time information, or None if no valid data
+    """
+    valid_infos = [info for info in time_infos if info is not None]
+    if not valid_infos:
+        return None
+    # Sum elapsed times across all sessions
+    total_elapsed = sum(info["minutes_elapsed"] for info in valid_infos)
+    # Find overall first and last timestamps
+    all_timestamps = []
+    for info in valid_infos:
+        all_timestamps.append(
+            datetime.fromisoformat(info["first_timestamp"].replace("Z", "+00:00"))
+        )
+        all_timestamps.append(
+            datetime.fromisoformat(info["last_timestamp"].replace("Z", "+00:00"))
+        )
+    first_ts = min(all_timestamps)
+    last_ts = max(all_timestamps)
+    overall_span = (last_ts - first_ts).total_seconds() / 60
+    result = {
+        "time_limit_minutes": time_limit_minutes,
+        "minutes_elapsed": round(total_elapsed, 2),
+        "first_timestamp": first_ts.isoformat().replace("+00:00", "Z"),
+        "last_timestamp": last_ts.isoformat().replace("+00:00", "Z"),
+        "file_count": len(valid_infos),
+        "overall_span_minutes": round(overall_span, 2),
+    }
+    # For time limit check in combined mode, use the sum of elapsed times
+    if time_limit_minutes is not None:
+        result["exceeds_limit"] = total_elapsed > time_limit_minutes
+    else:
+        result["exceeds_limit"] = False
+    return result

cr-proc 0.1.8__py3-none-any.whl → 0.1.10__py3-none-any.whl

cr-proc 0.1.8py3-none-any.whl → 0.1.10py3-none-any.whl