cr-proc 0.1.8__py3-none-any.whl → 0.1.10__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- code_recorder_processor/api/build.py +6 -0
- code_recorder_processor/api/document.py +337 -0
- code_recorder_processor/api/load.py +58 -0
- code_recorder_processor/api/output.py +70 -0
- code_recorder_processor/api/verify.py +171 -32
- code_recorder_processor/cli.py +514 -494
- code_recorder_processor/display.py +201 -0
- code_recorder_processor/playback.py +116 -0
- cr_proc-0.1.10.dist-info/METADATA +280 -0
- cr_proc-0.1.10.dist-info/RECORD +13 -0
- cr_proc-0.1.8.dist-info/METADATA +0 -142
- cr_proc-0.1.8.dist-info/RECORD +0 -9
- {cr_proc-0.1.8.dist-info → cr_proc-0.1.10.dist-info}/WHEEL +0 -0
- {cr_proc-0.1.8.dist-info → cr_proc-0.1.10.dist-info}/entry_points.txt +0 -0
|
@@ -128,22 +128,29 @@ def _build_document_states(jsonData: tuple[dict[str, Any], ...]) -> tuple[list[s
|
|
|
128
128
|
existed in the document at each point in time. This allows detectors to
|
|
129
129
|
check if pasted/autocompleted content already existed in the document.
|
|
130
130
|
|
|
131
|
+
Only processes edit events (type="edit" or no type field for backwards compatibility).
|
|
132
|
+
|
|
131
133
|
Parameters
|
|
132
134
|
----------
|
|
133
135
|
jsonData : tuple[dict[str, Any], ...]
|
|
134
|
-
The event data from the JSONL file
|
|
136
|
+
The event data from the JSONL file (all event types)
|
|
135
137
|
|
|
136
138
|
Returns
|
|
137
139
|
-------
|
|
138
140
|
tuple[list[str], set[str]]
|
|
139
|
-
- List of document states (one per event, strings of full document content)
|
|
141
|
+
- List of document states (one per edit event, strings of full document content)
|
|
140
142
|
- Set of all content fragments ever seen (whitelist for internal copy detection)
|
|
141
143
|
"""
|
|
144
|
+
from .load import is_edit_event
|
|
145
|
+
|
|
146
|
+
# Filter to only edit events
|
|
147
|
+
edit_events = [e for e in jsonData if is_edit_event(e)]
|
|
148
|
+
|
|
142
149
|
document_states = []
|
|
143
150
|
content_whitelist = set()
|
|
144
151
|
current_state = ""
|
|
145
152
|
|
|
146
|
-
for idx, event in enumerate(
|
|
153
|
+
for idx, event in enumerate(edit_events):
|
|
147
154
|
old_frag = _normalize_newlines(event.get("oldFragment", ""))
|
|
148
155
|
new_frag = _normalize_newlines(event.get("newFragment", ""))
|
|
149
156
|
offset = event.get("offset", 0)
|
|
@@ -182,12 +189,14 @@ def _detect_multiline_external_pastes(
|
|
|
182
189
|
Flags newFragments that are significant in length (more than one line)
|
|
183
190
|
and do not appear to be copied from within the document itself.
|
|
184
191
|
|
|
192
|
+
Only processes edit events (type="edit" or no type field for backwards compatibility).
|
|
193
|
+
|
|
185
194
|
Parameters
|
|
186
195
|
----------
|
|
187
196
|
jsonData : tuple[dict[str, Any], ...]
|
|
188
|
-
The event data
|
|
197
|
+
The event data (all event types)
|
|
189
198
|
document_states : list[str]
|
|
190
|
-
Full document state at each event
|
|
199
|
+
Full document state at each edit event
|
|
191
200
|
content_whitelist : set[str]
|
|
192
201
|
All content fragments ever seen in the document (for internal copy detection)
|
|
193
202
|
|
|
@@ -196,12 +205,17 @@ def _detect_multiline_external_pastes(
|
|
|
196
205
|
list[dict[str, Any]]
|
|
197
206
|
List of suspicious multi-line paste events.
|
|
198
207
|
"""
|
|
208
|
+
from .load import is_edit_event
|
|
209
|
+
|
|
210
|
+
# Filter to only edit events
|
|
211
|
+
edit_events = [e for e in jsonData if is_edit_event(e)]
|
|
212
|
+
|
|
199
213
|
suspicious_events = []
|
|
200
214
|
|
|
201
215
|
# Build whitelist incrementally to only include content from BEFORE each event
|
|
202
216
|
past_whitelist = set()
|
|
203
217
|
|
|
204
|
-
for idx, event in enumerate(
|
|
218
|
+
for idx, event in enumerate(edit_events):
|
|
205
219
|
old_frag = _normalize_newlines(event.get("oldFragment", ""))
|
|
206
220
|
new_frag = _normalize_newlines(event.get("newFragment", ""))
|
|
207
221
|
|
|
@@ -277,14 +291,21 @@ def _detect_rapid_paste_sequences(jsonData: tuple[dict[str, Any], ...]) -> list[
|
|
|
277
291
|
Identifies clusters of 3+ one-line paste events occurring within 1 second,
|
|
278
292
|
which may indicate AI-assisted code generation.
|
|
279
293
|
|
|
294
|
+
Only processes edit events (type="edit" or no type field for backwards compatibility).
|
|
295
|
+
|
|
280
296
|
Returns a list of suspicious rapid-paste events.
|
|
281
297
|
"""
|
|
298
|
+
from .load import is_edit_event
|
|
299
|
+
|
|
300
|
+
# Filter to only edit events
|
|
301
|
+
edit_events = [e for e in jsonData if is_edit_event(e)]
|
|
302
|
+
|
|
282
303
|
suspicious_events = []
|
|
283
304
|
|
|
284
305
|
# Track one-line paste events for rapid-paste detection
|
|
285
306
|
one_line_pastes = []
|
|
286
307
|
|
|
287
|
-
for idx, event in enumerate(
|
|
308
|
+
for idx, event in enumerate(edit_events):
|
|
288
309
|
new_frag = _normalize_newlines(event.get("newFragment", ""))
|
|
289
310
|
old_frag = _normalize_newlines(event.get("oldFragment", ""))
|
|
290
311
|
timestamp = event.get("timestamp")
|
|
@@ -383,12 +404,14 @@ def _detect_fullline_autocomplete(
|
|
|
383
404
|
- newFragment does NOT already exist in the document state
|
|
384
405
|
- Event not already flagged as external copy-paste
|
|
385
406
|
|
|
407
|
+
Only processes edit events (type="edit" or no type field for backwards compatibility).
|
|
408
|
+
|
|
386
409
|
Parameters
|
|
387
410
|
----------
|
|
388
411
|
jsonData : tuple[dict[str, Any], ...]
|
|
389
|
-
The event data
|
|
412
|
+
The event data (all event types)
|
|
390
413
|
document_states : list[str]
|
|
391
|
-
Full document state at each event
|
|
414
|
+
Full document state at each edit event
|
|
392
415
|
content_whitelist : set[str]
|
|
393
416
|
All content fragments ever seen in the document
|
|
394
417
|
excluded_indices : set[int]
|
|
@@ -399,12 +422,17 @@ def _detect_fullline_autocomplete(
|
|
|
399
422
|
list[dict[str, Any]]
|
|
400
423
|
List of suspected multi-line auto-complete events.
|
|
401
424
|
"""
|
|
425
|
+
from .load import is_edit_event
|
|
426
|
+
|
|
427
|
+
# Filter to only edit events
|
|
428
|
+
edit_events = [e for e in jsonData if is_edit_event(e)]
|
|
429
|
+
|
|
402
430
|
suspicious_events = []
|
|
403
431
|
|
|
404
432
|
# Build whitelist incrementally to only include content from BEFORE each event
|
|
405
433
|
past_whitelist = set()
|
|
406
434
|
|
|
407
|
-
for idx, event in enumerate(
|
|
435
|
+
for idx, event in enumerate(edit_events):
|
|
408
436
|
# Skip if already flagged by another detector
|
|
409
437
|
if idx in excluded_indices:
|
|
410
438
|
past_whitelist_update(idx, event, document_states, past_whitelist)
|
|
@@ -626,13 +654,20 @@ def check_time_limit(jsonData: tuple[dict[str, Any], ...], time_limit_minutes: i
|
|
|
626
654
|
Check if the time between first and last edit exceeds the specified time limit.
|
|
627
655
|
|
|
628
656
|
Tracks elapsed editing time across sessions by summing actual editing time within
|
|
629
|
-
each session (excluding gaps between sessions).
|
|
630
|
-
|
|
657
|
+
each session (excluding gaps between sessions). Focus events (type="focusStatus")
|
|
658
|
+
are used to pause time tracking when the window loses focus for extended periods.
|
|
659
|
+
|
|
660
|
+
Time tracking behavior:
|
|
661
|
+
- Tracks actual editing time by looking at timestamps between edit events
|
|
662
|
+
- When a focusStatus event with focused=false is encountered, time tracking pauses
|
|
663
|
+
- Time tracking resumes when a focusStatus event with focused=true is encountered
|
|
664
|
+
- Gaps > 5 minutes while unfocused are excluded from time tracking
|
|
665
|
+
- Gaps <= 5 minutes are counted even when unfocused (student thinking/reviewing)
|
|
631
666
|
|
|
632
667
|
Parameters
|
|
633
668
|
----------
|
|
634
669
|
jsonData : tuple[dict[str, Any], ...]
|
|
635
|
-
The event data from the JSONL file
|
|
670
|
+
The event data from the JSONL file (all event types)
|
|
636
671
|
time_limit_minutes : int | None
|
|
637
672
|
Maximum allowed time in minutes between first and last overall edit.
|
|
638
673
|
If None, no time limit is enforced.
|
|
@@ -649,25 +684,34 @@ def check_time_limit(jsonData: tuple[dict[str, Any], ...], time_limit_minutes: i
|
|
|
649
684
|
def parse_ts(ts_str: str) -> datetime:
|
|
650
685
|
return datetime.fromisoformat(ts_str.replace("Z", "+00:00"))
|
|
651
686
|
|
|
687
|
+
# Separate edit events from focus events
|
|
688
|
+
from .load import is_edit_event
|
|
689
|
+
|
|
690
|
+
edit_events = [e for e in jsonData if is_edit_event(e)]
|
|
691
|
+
focus_events = [e for e in jsonData if e.get("type") == "focusStatus"]
|
|
692
|
+
|
|
693
|
+
if not edit_events:
|
|
694
|
+
return None
|
|
695
|
+
|
|
652
696
|
# Identify session boundaries: sessions start at indices where offset == 0
|
|
653
697
|
# (indicating file reopen/recording restart) and oldFragment == newFragment (initial snapshot)
|
|
654
698
|
session_starts = [0] # First session always starts at index 0
|
|
655
|
-
for idx in range(1, len(
|
|
656
|
-
offset =
|
|
657
|
-
old_frag =
|
|
658
|
-
new_frag =
|
|
699
|
+
for idx in range(1, len(edit_events)):
|
|
700
|
+
offset = edit_events[idx].get("offset", -1)
|
|
701
|
+
old_frag = edit_events[idx].get("oldFragment", "")
|
|
702
|
+
new_frag = edit_events[idx].get("newFragment", "")
|
|
659
703
|
# Session boundary: offset is 0 and it's an initial snapshot (old == new, non-empty)
|
|
660
704
|
if offset == 0 and old_frag == new_frag and old_frag.strip() != "":
|
|
661
705
|
session_starts.append(idx)
|
|
662
706
|
|
|
663
707
|
# Add sentinel to mark end of last session
|
|
664
|
-
session_starts.append(len(
|
|
708
|
+
session_starts.append(len(edit_events))
|
|
665
709
|
|
|
666
710
|
# Find first and last timestamps overall
|
|
667
711
|
first_timestamp_overall = None
|
|
668
712
|
last_timestamp_overall = None
|
|
669
713
|
|
|
670
|
-
for event in
|
|
714
|
+
for event in edit_events:
|
|
671
715
|
if event.get("timestamp"):
|
|
672
716
|
if first_timestamp_overall is None:
|
|
673
717
|
first_timestamp_overall = event["timestamp"]
|
|
@@ -677,34 +721,72 @@ def check_time_limit(jsonData: tuple[dict[str, Any], ...], time_limit_minutes: i
|
|
|
677
721
|
# Not enough events with timestamps
|
|
678
722
|
return None
|
|
679
723
|
|
|
724
|
+
# Build a focus status timeline from focus events
|
|
725
|
+
# Map timestamp -> focused (True/False)
|
|
726
|
+
focus_timeline: list[tuple[datetime, bool]] = []
|
|
727
|
+
for focus_event in focus_events:
|
|
728
|
+
if "timestamp" in focus_event and "focused" in focus_event:
|
|
729
|
+
try:
|
|
730
|
+
ts = parse_ts(focus_event["timestamp"])
|
|
731
|
+
focused = focus_event["focused"]
|
|
732
|
+
focus_timeline.append((ts, focused))
|
|
733
|
+
except (ValueError, KeyError):
|
|
734
|
+
continue
|
|
735
|
+
|
|
736
|
+
# Sort by timestamp
|
|
737
|
+
focus_timeline.sort(key=lambda x: x[0])
|
|
738
|
+
|
|
739
|
+
def is_focused_at(timestamp: datetime) -> bool:
|
|
740
|
+
"""Check if the window was focused at the given timestamp."""
|
|
741
|
+
# Walk backwards through focus events to find the most recent state
|
|
742
|
+
for ts, focused in reversed(focus_timeline):
|
|
743
|
+
if ts <= timestamp:
|
|
744
|
+
return focused
|
|
745
|
+
# Default to focused if no prior focus event found
|
|
746
|
+
return True
|
|
747
|
+
|
|
680
748
|
# Calculate elapsed time by summing editing time within each session
|
|
749
|
+
# with focus-aware gap handling
|
|
681
750
|
total_minutes_elapsed = 0.0
|
|
751
|
+
UNFOCUSED_GAP_THRESHOLD_MINUTES = 5.0 # Don't count gaps > 5 min when unfocused
|
|
682
752
|
|
|
683
753
|
for i in range(len(session_starts) - 1):
|
|
684
754
|
session_start = session_starts[i]
|
|
685
755
|
session_end = session_starts[i + 1]
|
|
686
756
|
|
|
687
|
-
#
|
|
688
|
-
|
|
689
|
-
last_event_time = None
|
|
690
|
-
|
|
757
|
+
# Collect all timestamped events in this session
|
|
758
|
+
session_events: list[tuple[datetime, int]] = []
|
|
691
759
|
for idx in range(session_start, session_end):
|
|
692
|
-
event =
|
|
760
|
+
event = edit_events[idx]
|
|
693
761
|
timestamp = event.get("timestamp")
|
|
694
762
|
if timestamp:
|
|
695
763
|
try:
|
|
696
764
|
event_time = parse_ts(timestamp)
|
|
697
|
-
|
|
698
|
-
first_event_time = event_time
|
|
699
|
-
last_event_time = event_time
|
|
765
|
+
session_events.append((event_time, idx))
|
|
700
766
|
except (ValueError, KeyError):
|
|
701
|
-
# Skip events with invalid timestamps
|
|
702
767
|
continue
|
|
703
768
|
|
|
704
|
-
|
|
705
|
-
|
|
706
|
-
|
|
707
|
-
|
|
769
|
+
if not session_events:
|
|
770
|
+
continue
|
|
771
|
+
|
|
772
|
+
# Sort by timestamp
|
|
773
|
+
session_events.sort(key=lambda x: x[0])
|
|
774
|
+
|
|
775
|
+
# Calculate time by summing gaps between consecutive events
|
|
776
|
+
for j in range(len(session_events) - 1):
|
|
777
|
+
current_time, _ = session_events[j]
|
|
778
|
+
next_time, _ = session_events[j + 1]
|
|
779
|
+
|
|
780
|
+
gap_seconds = (next_time - current_time).total_seconds()
|
|
781
|
+
gap_minutes = gap_seconds / 60
|
|
782
|
+
|
|
783
|
+
# Check focus status at the end of this gap (next_time)
|
|
784
|
+
# If unfocused and gap is large, don't count it
|
|
785
|
+
if not is_focused_at(next_time) and gap_minutes > UNFOCUSED_GAP_THRESHOLD_MINUTES:
|
|
786
|
+
# Skip this gap - student was away from editor
|
|
787
|
+
continue
|
|
788
|
+
|
|
789
|
+
total_minutes_elapsed += gap_minutes
|
|
708
790
|
|
|
709
791
|
# For time limit check, use the span from first to last timestamp overall
|
|
710
792
|
try:
|
|
@@ -752,3 +834,60 @@ def verify(template: str, jsonData: tuple[dict[str, Any], ...]) -> tuple[str, li
|
|
|
752
834
|
suspicious_events = detect_external_copypaste(jsonData)
|
|
753
835
|
|
|
754
836
|
return verified_template, suspicious_events
|
|
837
|
+
|
|
838
|
+
|
|
839
|
+
def combine_time_info(
|
|
840
|
+
time_infos: list[dict[str, Any] | None], time_limit_minutes: int | None
|
|
841
|
+
) -> dict[str, Any] | None:
|
|
842
|
+
"""
|
|
843
|
+
Combine time information from multiple recording files.
|
|
844
|
+
|
|
845
|
+
Parameters
|
|
846
|
+
----------
|
|
847
|
+
time_infos : list[dict[str, Any] | None]
|
|
848
|
+
List of time information dictionaries from multiple files
|
|
849
|
+
time_limit_minutes : int | None
|
|
850
|
+
Time limit to check against
|
|
851
|
+
|
|
852
|
+
Returns
|
|
853
|
+
-------
|
|
854
|
+
dict[str, Any] | None
|
|
855
|
+
Combined time information, or None if no valid data
|
|
856
|
+
"""
|
|
857
|
+
valid_infos = [info for info in time_infos if info is not None]
|
|
858
|
+
if not valid_infos:
|
|
859
|
+
return None
|
|
860
|
+
|
|
861
|
+
# Sum elapsed times across all sessions
|
|
862
|
+
total_elapsed = sum(info["minutes_elapsed"] for info in valid_infos)
|
|
863
|
+
|
|
864
|
+
# Find overall first and last timestamps
|
|
865
|
+
all_timestamps = []
|
|
866
|
+
for info in valid_infos:
|
|
867
|
+
all_timestamps.append(
|
|
868
|
+
datetime.fromisoformat(info["first_timestamp"].replace("Z", "+00:00"))
|
|
869
|
+
)
|
|
870
|
+
all_timestamps.append(
|
|
871
|
+
datetime.fromisoformat(info["last_timestamp"].replace("Z", "+00:00"))
|
|
872
|
+
)
|
|
873
|
+
|
|
874
|
+
first_ts = min(all_timestamps)
|
|
875
|
+
last_ts = max(all_timestamps)
|
|
876
|
+
overall_span = (last_ts - first_ts).total_seconds() / 60
|
|
877
|
+
|
|
878
|
+
result = {
|
|
879
|
+
"time_limit_minutes": time_limit_minutes,
|
|
880
|
+
"minutes_elapsed": round(total_elapsed, 2),
|
|
881
|
+
"first_timestamp": first_ts.isoformat().replace("+00:00", "Z"),
|
|
882
|
+
"last_timestamp": last_ts.isoformat().replace("+00:00", "Z"),
|
|
883
|
+
"file_count": len(valid_infos),
|
|
884
|
+
"overall_span_minutes": round(overall_span, 2),
|
|
885
|
+
}
|
|
886
|
+
|
|
887
|
+
# For time limit check in combined mode, use the sum of elapsed times
|
|
888
|
+
if time_limit_minutes is not None:
|
|
889
|
+
result["exceeds_limit"] = total_elapsed > time_limit_minutes
|
|
890
|
+
else:
|
|
891
|
+
result["exceeds_limit"] = False
|
|
892
|
+
|
|
893
|
+
return result
|