cr-proc 0.1.8__py3-none-any.whl → 0.1.10__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -128,22 +128,29 @@ def _build_document_states(jsonData: tuple[dict[str, Any], ...]) -> tuple[list[s
128
128
  existed in the document at each point in time. This allows detectors to
129
129
  check if pasted/autocompleted content already existed in the document.
130
130
 
131
+ Only processes edit events (type="edit" or no type field for backwards compatibility).
132
+
131
133
  Parameters
132
134
  ----------
133
135
  jsonData : tuple[dict[str, Any], ...]
134
- The event data from the JSONL file
136
+ The event data from the JSONL file (all event types)
135
137
 
136
138
  Returns
137
139
  -------
138
140
  tuple[list[str], set[str]]
139
- - List of document states (one per event, strings of full document content)
141
+ - List of document states (one per edit event, strings of full document content)
140
142
  - Set of all content fragments ever seen (whitelist for internal copy detection)
141
143
  """
144
+ from .load import is_edit_event
145
+
146
+ # Filter to only edit events
147
+ edit_events = [e for e in jsonData if is_edit_event(e)]
148
+
142
149
  document_states = []
143
150
  content_whitelist = set()
144
151
  current_state = ""
145
152
 
146
- for idx, event in enumerate(jsonData):
153
+ for idx, event in enumerate(edit_events):
147
154
  old_frag = _normalize_newlines(event.get("oldFragment", ""))
148
155
  new_frag = _normalize_newlines(event.get("newFragment", ""))
149
156
  offset = event.get("offset", 0)
@@ -182,12 +189,14 @@ def _detect_multiline_external_pastes(
182
189
  Flags newFragments that are significant in length (more than one line)
183
190
  and do not appear to be copied from within the document itself.
184
191
 
192
+ Only processes edit events (type="edit" or no type field for backwards compatibility).
193
+
185
194
  Parameters
186
195
  ----------
187
196
  jsonData : tuple[dict[str, Any], ...]
188
- The event data
197
+ The event data (all event types)
189
198
  document_states : list[str]
190
- Full document state at each event
199
+ Full document state at each edit event
191
200
  content_whitelist : set[str]
192
201
  All content fragments ever seen in the document (for internal copy detection)
193
202
 
@@ -196,12 +205,17 @@ def _detect_multiline_external_pastes(
196
205
  list[dict[str, Any]]
197
206
  List of suspicious multi-line paste events.
198
207
  """
208
+ from .load import is_edit_event
209
+
210
+ # Filter to only edit events
211
+ edit_events = [e for e in jsonData if is_edit_event(e)]
212
+
199
213
  suspicious_events = []
200
214
 
201
215
  # Build whitelist incrementally to only include content from BEFORE each event
202
216
  past_whitelist = set()
203
217
 
204
- for idx, event in enumerate(jsonData):
218
+ for idx, event in enumerate(edit_events):
205
219
  old_frag = _normalize_newlines(event.get("oldFragment", ""))
206
220
  new_frag = _normalize_newlines(event.get("newFragment", ""))
207
221
 
@@ -277,14 +291,21 @@ def _detect_rapid_paste_sequences(jsonData: tuple[dict[str, Any], ...]) -> list[
277
291
  Identifies clusters of 3+ one-line paste events occurring within 1 second,
278
292
  which may indicate AI-assisted code generation.
279
293
 
294
+ Only processes edit events (type="edit" or no type field for backwards compatibility).
295
+
280
296
  Returns a list of suspicious rapid-paste events.
281
297
  """
298
+ from .load import is_edit_event
299
+
300
+ # Filter to only edit events
301
+ edit_events = [e for e in jsonData if is_edit_event(e)]
302
+
282
303
  suspicious_events = []
283
304
 
284
305
  # Track one-line paste events for rapid-paste detection
285
306
  one_line_pastes = []
286
307
 
287
- for idx, event in enumerate(jsonData):
308
+ for idx, event in enumerate(edit_events):
288
309
  new_frag = _normalize_newlines(event.get("newFragment", ""))
289
310
  old_frag = _normalize_newlines(event.get("oldFragment", ""))
290
311
  timestamp = event.get("timestamp")
@@ -383,12 +404,14 @@ def _detect_fullline_autocomplete(
383
404
  - newFragment does NOT already exist in the document state
384
405
  - Event not already flagged as external copy-paste
385
406
 
407
+ Only processes edit events (type="edit" or no type field for backwards compatibility).
408
+
386
409
  Parameters
387
410
  ----------
388
411
  jsonData : tuple[dict[str, Any], ...]
389
- The event data
412
+ The event data (all event types)
390
413
  document_states : list[str]
391
- Full document state at each event
414
+ Full document state at each edit event
392
415
  content_whitelist : set[str]
393
416
  All content fragments ever seen in the document
394
417
  excluded_indices : set[int]
@@ -399,12 +422,17 @@ def _detect_fullline_autocomplete(
399
422
  list[dict[str, Any]]
400
423
  List of suspected multi-line auto-complete events.
401
424
  """
425
+ from .load import is_edit_event
426
+
427
+ # Filter to only edit events
428
+ edit_events = [e for e in jsonData if is_edit_event(e)]
429
+
402
430
  suspicious_events = []
403
431
 
404
432
  # Build whitelist incrementally to only include content from BEFORE each event
405
433
  past_whitelist = set()
406
434
 
407
- for idx, event in enumerate(jsonData):
435
+ for idx, event in enumerate(edit_events):
408
436
  # Skip if already flagged by another detector
409
437
  if idx in excluded_indices:
410
438
  past_whitelist_update(idx, event, document_states, past_whitelist)
@@ -626,13 +654,20 @@ def check_time_limit(jsonData: tuple[dict[str, Any], ...], time_limit_minutes: i
626
654
  Check if the time between first and last edit exceeds the specified time limit.
627
655
 
628
656
  Tracks elapsed editing time across sessions by summing actual editing time within
629
- each session (excluding gaps between sessions). For the time limit check, compares
630
- the span from the first timestamp to the last timestamp overall.
657
+ each session (excluding gaps between sessions). Focus events (type="focusStatus")
658
+ are used to pause time tracking when the window loses focus for extended periods.
659
+
660
+ Time tracking behavior:
661
+ - Tracks actual editing time by looking at timestamps between edit events
662
+ - When a focusStatus event with focused=false is encountered, time tracking pauses
663
+ - Time tracking resumes when a focusStatus event with focused=true is encountered
664
+ - Gaps > 5 minutes while unfocused are excluded from time tracking
665
+ - Gaps <= 5 minutes are counted even when unfocused (student thinking/reviewing)
631
666
 
632
667
  Parameters
633
668
  ----------
634
669
  jsonData : tuple[dict[str, Any], ...]
635
- The event data from the JSONL file
670
+ The event data from the JSONL file (all event types)
636
671
  time_limit_minutes : int | None
637
672
  Maximum allowed time in minutes between first and last overall edit.
638
673
  If None, no time limit is enforced.
@@ -649,25 +684,34 @@ def check_time_limit(jsonData: tuple[dict[str, Any], ...], time_limit_minutes: i
649
684
  def parse_ts(ts_str: str) -> datetime:
650
685
  return datetime.fromisoformat(ts_str.replace("Z", "+00:00"))
651
686
 
687
+ # Separate edit events from focus events
688
+ from .load import is_edit_event
689
+
690
+ edit_events = [e for e in jsonData if is_edit_event(e)]
691
+ focus_events = [e for e in jsonData if e.get("type") == "focusStatus"]
692
+
693
+ if not edit_events:
694
+ return None
695
+
652
696
  # Identify session boundaries: sessions start at indices where offset == 0
653
697
  # (indicating file reopen/recording restart) and oldFragment == newFragment (initial snapshot)
654
698
  session_starts = [0] # First session always starts at index 0
655
- for idx in range(1, len(jsonData)):
656
- offset = jsonData[idx].get("offset", -1)
657
- old_frag = jsonData[idx].get("oldFragment", "")
658
- new_frag = jsonData[idx].get("newFragment", "")
699
+ for idx in range(1, len(edit_events)):
700
+ offset = edit_events[idx].get("offset", -1)
701
+ old_frag = edit_events[idx].get("oldFragment", "")
702
+ new_frag = edit_events[idx].get("newFragment", "")
659
703
  # Session boundary: offset is 0 and it's an initial snapshot (old == new, non-empty)
660
704
  if offset == 0 and old_frag == new_frag and old_frag.strip() != "":
661
705
  session_starts.append(idx)
662
706
 
663
707
  # Add sentinel to mark end of last session
664
- session_starts.append(len(jsonData))
708
+ session_starts.append(len(edit_events))
665
709
 
666
710
  # Find first and last timestamps overall
667
711
  first_timestamp_overall = None
668
712
  last_timestamp_overall = None
669
713
 
670
- for event in jsonData:
714
+ for event in edit_events:
671
715
  if event.get("timestamp"):
672
716
  if first_timestamp_overall is None:
673
717
  first_timestamp_overall = event["timestamp"]
@@ -677,34 +721,72 @@ def check_time_limit(jsonData: tuple[dict[str, Any], ...], time_limit_minutes: i
677
721
  # Not enough events with timestamps
678
722
  return None
679
723
 
724
+ # Build a focus status timeline from focus events
725
+ # Map timestamp -> focused (True/False)
726
+ focus_timeline: list[tuple[datetime, bool]] = []
727
+ for focus_event in focus_events:
728
+ if "timestamp" in focus_event and "focused" in focus_event:
729
+ try:
730
+ ts = parse_ts(focus_event["timestamp"])
731
+ focused = focus_event["focused"]
732
+ focus_timeline.append((ts, focused))
733
+ except (ValueError, KeyError):
734
+ continue
735
+
736
+ # Sort by timestamp
737
+ focus_timeline.sort(key=lambda x: x[0])
738
+
739
+ def is_focused_at(timestamp: datetime) -> bool:
740
+ """Check if the window was focused at the given timestamp."""
741
+ # Walk backwards through focus events to find the most recent state
742
+ for ts, focused in reversed(focus_timeline):
743
+ if ts <= timestamp:
744
+ return focused
745
+ # Default to focused if no prior focus event found
746
+ return True
747
+
680
748
  # Calculate elapsed time by summing editing time within each session
749
+ # with focus-aware gap handling
681
750
  total_minutes_elapsed = 0.0
751
+ UNFOCUSED_GAP_THRESHOLD_MINUTES = 5.0 # Don't count gaps > 5 min when unfocused
682
752
 
683
753
  for i in range(len(session_starts) - 1):
684
754
  session_start = session_starts[i]
685
755
  session_end = session_starts[i + 1]
686
756
 
687
- # Find first and last events with timestamps in this session
688
- first_event_time = None
689
- last_event_time = None
690
-
757
+ # Collect all timestamped events in this session
758
+ session_events: list[tuple[datetime, int]] = []
691
759
  for idx in range(session_start, session_end):
692
- event = jsonData[idx]
760
+ event = edit_events[idx]
693
761
  timestamp = event.get("timestamp")
694
762
  if timestamp:
695
763
  try:
696
764
  event_time = parse_ts(timestamp)
697
- if first_event_time is None:
698
- first_event_time = event_time
699
- last_event_time = event_time
765
+ session_events.append((event_time, idx))
700
766
  except (ValueError, KeyError):
701
- # Skip events with invalid timestamps
702
767
  continue
703
768
 
704
- # If this session has timestamped events, add its elapsed time
705
- if first_event_time is not None and last_event_time is not None:
706
- session_diff = last_event_time - first_event_time
707
- total_minutes_elapsed += session_diff.total_seconds() / 60
769
+ if not session_events:
770
+ continue
771
+
772
+ # Sort by timestamp
773
+ session_events.sort(key=lambda x: x[0])
774
+
775
+ # Calculate time by summing gaps between consecutive events
776
+ for j in range(len(session_events) - 1):
777
+ current_time, _ = session_events[j]
778
+ next_time, _ = session_events[j + 1]
779
+
780
+ gap_seconds = (next_time - current_time).total_seconds()
781
+ gap_minutes = gap_seconds / 60
782
+
783
+ # Check focus status at the end of this gap (next_time)
784
+ # If unfocused and gap is large, don't count it
785
+ if not is_focused_at(next_time) and gap_minutes > UNFOCUSED_GAP_THRESHOLD_MINUTES:
786
+ # Skip this gap - student was away from editor
787
+ continue
788
+
789
+ total_minutes_elapsed += gap_minutes
708
790
 
709
791
  # For time limit check, use the span from first to last timestamp overall
710
792
  try:
@@ -752,3 +834,60 @@ def verify(template: str, jsonData: tuple[dict[str, Any], ...]) -> tuple[str, li
752
834
  suspicious_events = detect_external_copypaste(jsonData)
753
835
 
754
836
  return verified_template, suspicious_events
837
+
838
+
839
+ def combine_time_info(
840
+ time_infos: list[dict[str, Any] | None], time_limit_minutes: int | None
841
+ ) -> dict[str, Any] | None:
842
+ """
843
+ Combine time information from multiple recording files.
844
+
845
+ Parameters
846
+ ----------
847
+ time_infos : list[dict[str, Any] | None]
848
+ List of time information dictionaries from multiple files
849
+ time_limit_minutes : int | None
850
+ Time limit to check against
851
+
852
+ Returns
853
+ -------
854
+ dict[str, Any] | None
855
+ Combined time information, or None if no valid data
856
+ """
857
+ valid_infos = [info for info in time_infos if info is not None]
858
+ if not valid_infos:
859
+ return None
860
+
861
+ # Sum elapsed times across all sessions
862
+ total_elapsed = sum(info["minutes_elapsed"] for info in valid_infos)
863
+
864
+ # Find overall first and last timestamps
865
+ all_timestamps = []
866
+ for info in valid_infos:
867
+ all_timestamps.append(
868
+ datetime.fromisoformat(info["first_timestamp"].replace("Z", "+00:00"))
869
+ )
870
+ all_timestamps.append(
871
+ datetime.fromisoformat(info["last_timestamp"].replace("Z", "+00:00"))
872
+ )
873
+
874
+ first_ts = min(all_timestamps)
875
+ last_ts = max(all_timestamps)
876
+ overall_span = (last_ts - first_ts).total_seconds() / 60
877
+
878
+ result = {
879
+ "time_limit_minutes": time_limit_minutes,
880
+ "minutes_elapsed": round(total_elapsed, 2),
881
+ "first_timestamp": first_ts.isoformat().replace("+00:00", "Z"),
882
+ "last_timestamp": last_ts.isoformat().replace("+00:00", "Z"),
883
+ "file_count": len(valid_infos),
884
+ "overall_span_minutes": round(overall_span, 2),
885
+ }
886
+
887
+ # For time limit check in combined mode, use the sum of elapsed times
888
+ if time_limit_minutes is not None:
889
+ result["exceeds_limit"] = total_elapsed > time_limit_minutes
890
+ else:
891
+ result["exceeds_limit"] = False
892
+
893
+ return result