cr-proc 0.1.7__py3-none-any.whl → 0.1.9__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- code_recorder_processor/api/build.py +6 -0
- code_recorder_processor/api/document.py +300 -0
- code_recorder_processor/api/load.py +58 -0
- code_recorder_processor/api/output.py +70 -0
- code_recorder_processor/api/verify.py +293 -83
- code_recorder_processor/cli.py +523 -349
- code_recorder_processor/display.py +201 -0
- code_recorder_processor/playback.py +116 -0
- cr_proc-0.1.9.dist-info/METADATA +280 -0
- cr_proc-0.1.9.dist-info/RECORD +13 -0
- cr_proc-0.1.7.dist-info/METADATA +0 -142
- cr_proc-0.1.7.dist-info/RECORD +0 -9
- {cr_proc-0.1.7.dist-info → cr_proc-0.1.9.dist-info}/WHEEL +0 -0
- {cr_proc-0.1.7.dist-info → cr_proc-0.1.9.dist-info}/entry_points.txt +0 -0
|
@@ -2,6 +2,13 @@ from typing import Any
|
|
|
2
2
|
from datetime import datetime
|
|
3
3
|
import difflib
|
|
4
4
|
|
|
5
|
+
# ============================================================================
|
|
6
|
+
# Constants for detection thresholds
|
|
7
|
+
# ============================================================================
|
|
8
|
+
MIN_WHITELIST_SIZE = 10 # Minimum fragment size to add to whitelist
|
|
9
|
+
MIN_MULTILINE_SIZE = 20 # Minimum size for multiline external paste detection
|
|
10
|
+
MIN_AUTOCOMPLETE_SIZE = 10 # Minimum size for autocomplete detection
|
|
11
|
+
MIN_RAPID_PASTE_CHARS = 5 # Minimum chars for a "paste" in rapid detection
|
|
5
12
|
|
|
6
13
|
def _normalize_newlines(text: str) -> str:
|
|
7
14
|
"""Normalize CRLF to LF to avoid offset and diff noise."""
|
|
@@ -121,22 +128,29 @@ def _build_document_states(jsonData: tuple[dict[str, Any], ...]) -> tuple[list[s
|
|
|
121
128
|
existed in the document at each point in time. This allows detectors to
|
|
122
129
|
check if pasted/autocompleted content already existed in the document.
|
|
123
130
|
|
|
131
|
+
Only processes edit events (type="edit" or no type field for backwards compatibility).
|
|
132
|
+
|
|
124
133
|
Parameters
|
|
125
134
|
----------
|
|
126
135
|
jsonData : tuple[dict[str, Any], ...]
|
|
127
|
-
The event data from the JSONL file
|
|
136
|
+
The event data from the JSONL file (all event types)
|
|
128
137
|
|
|
129
138
|
Returns
|
|
130
139
|
-------
|
|
131
140
|
tuple[list[str], set[str]]
|
|
132
|
-
- List of document states (one per event, strings of full document content)
|
|
141
|
+
- List of document states (one per edit event, strings of full document content)
|
|
133
142
|
- Set of all content fragments ever seen (whitelist for internal copy detection)
|
|
134
143
|
"""
|
|
144
|
+
from .load import is_edit_event
|
|
145
|
+
|
|
146
|
+
# Filter to only edit events
|
|
147
|
+
edit_events = [e for e in jsonData if is_edit_event(e)]
|
|
148
|
+
|
|
135
149
|
document_states = []
|
|
136
150
|
content_whitelist = set()
|
|
137
151
|
current_state = ""
|
|
138
152
|
|
|
139
|
-
for idx, event in enumerate(
|
|
153
|
+
for idx, event in enumerate(edit_events):
|
|
140
154
|
old_frag = _normalize_newlines(event.get("oldFragment", ""))
|
|
141
155
|
new_frag = _normalize_newlines(event.get("newFragment", ""))
|
|
142
156
|
offset = event.get("offset", 0)
|
|
@@ -152,13 +166,13 @@ def _build_document_states(jsonData: tuple[dict[str, Any], ...]) -> tuple[list[s
|
|
|
152
166
|
|
|
153
167
|
# Build whitelist of all content fragments seen
|
|
154
168
|
# Add both old and new fragments to whitelist for comprehensive coverage
|
|
155
|
-
if len(old_frag) >
|
|
169
|
+
if len(old_frag) > MIN_WHITELIST_SIZE:
|
|
156
170
|
content_whitelist.add(old_frag)
|
|
157
|
-
if len(new_frag) >
|
|
171
|
+
if len(new_frag) > MIN_WHITELIST_SIZE:
|
|
158
172
|
content_whitelist.add(new_frag)
|
|
159
173
|
|
|
160
174
|
# Also add the full document state to whitelist
|
|
161
|
-
if len(current_state) >
|
|
175
|
+
if len(current_state) > MIN_WHITELIST_SIZE:
|
|
162
176
|
content_whitelist.add(current_state)
|
|
163
177
|
|
|
164
178
|
return document_states, content_whitelist
|
|
@@ -175,12 +189,14 @@ def _detect_multiline_external_pastes(
|
|
|
175
189
|
Flags newFragments that are significant in length (more than one line)
|
|
176
190
|
and do not appear to be copied from within the document itself.
|
|
177
191
|
|
|
192
|
+
Only processes edit events (type="edit" or no type field for backwards compatibility).
|
|
193
|
+
|
|
178
194
|
Parameters
|
|
179
195
|
----------
|
|
180
196
|
jsonData : tuple[dict[str, Any], ...]
|
|
181
|
-
The event data
|
|
197
|
+
The event data (all event types)
|
|
182
198
|
document_states : list[str]
|
|
183
|
-
Full document state at each event
|
|
199
|
+
Full document state at each edit event
|
|
184
200
|
content_whitelist : set[str]
|
|
185
201
|
All content fragments ever seen in the document (for internal copy detection)
|
|
186
202
|
|
|
@@ -189,67 +205,81 @@ def _detect_multiline_external_pastes(
|
|
|
189
205
|
list[dict[str, Any]]
|
|
190
206
|
List of suspicious multi-line paste events.
|
|
191
207
|
"""
|
|
208
|
+
from .load import is_edit_event
|
|
209
|
+
|
|
210
|
+
# Filter to only edit events
|
|
211
|
+
edit_events = [e for e in jsonData if is_edit_event(e)]
|
|
212
|
+
|
|
192
213
|
suspicious_events = []
|
|
193
214
|
|
|
194
|
-
|
|
215
|
+
# Build whitelist incrementally to only include content from BEFORE each event
|
|
216
|
+
past_whitelist = set()
|
|
217
|
+
|
|
218
|
+
for idx, event in enumerate(edit_events):
|
|
195
219
|
old_frag = _normalize_newlines(event.get("oldFragment", ""))
|
|
196
220
|
new_frag = _normalize_newlines(event.get("newFragment", ""))
|
|
197
221
|
|
|
198
222
|
# Skip if no actual change
|
|
199
223
|
if new_frag == old_frag or new_frag.strip() == "":
|
|
200
|
-
|
|
201
|
-
|
|
224
|
+
pass # Still add to whitelist below
|
|
202
225
|
# Only check multi-line content (more than 2 lines means at least 2 actual lines)
|
|
203
|
-
|
|
204
|
-
|
|
205
|
-
continue
|
|
206
|
-
|
|
207
|
-
# Check if the new content already existed in the document at any prior point
|
|
208
|
-
is_internal_copy = False
|
|
209
|
-
|
|
210
|
-
# Check against document state BEFORE this event
|
|
211
|
-
if idx > 0:
|
|
212
|
-
prior_state = document_states[idx - 1]
|
|
213
|
-
if new_frag in prior_state:
|
|
214
|
-
is_internal_copy = True
|
|
215
|
-
|
|
216
|
-
# Also check against whitelist of all content seen
|
|
217
|
-
if not is_internal_copy:
|
|
218
|
-
for hist_content in content_whitelist:
|
|
219
|
-
# Ignore tiny fragments
|
|
220
|
-
if len(hist_content) < 20:
|
|
221
|
-
continue
|
|
222
|
-
|
|
223
|
-
# Require substantial overlap in size to count as an internal copy
|
|
224
|
-
similar_length = (
|
|
225
|
-
len(hist_content) >= 0.8 * len(new_frag)
|
|
226
|
-
and len(hist_content) <= 1.25 * len(new_frag)
|
|
227
|
-
)
|
|
226
|
+
elif len(new_frag.split("\n")) > 2:
|
|
227
|
+
new_lines = new_frag.split("\n")
|
|
228
228
|
|
|
229
|
-
|
|
230
|
-
|
|
231
|
-
break
|
|
229
|
+
# Check if the new content already existed in the document at any prior point
|
|
230
|
+
is_internal_copy = False
|
|
232
231
|
|
|
233
|
-
|
|
232
|
+
# Check against document state BEFORE this event
|
|
233
|
+
if idx > 0:
|
|
234
|
+
prior_state = document_states[idx - 1]
|
|
235
|
+
if new_frag in prior_state:
|
|
234
236
|
is_internal_copy = True
|
|
235
|
-
break
|
|
236
237
|
|
|
237
|
-
|
|
238
|
-
|
|
239
|
-
|
|
238
|
+
# Also check against whitelist of content from BEFORE this event
|
|
239
|
+
if not is_internal_copy:
|
|
240
|
+
for hist_content in past_whitelist:
|
|
241
|
+
# Ignore tiny fragments - multiline external pastes should be significant
|
|
242
|
+
if len(hist_content) < MIN_MULTILINE_SIZE:
|
|
243
|
+
continue
|
|
244
|
+
|
|
245
|
+
# Require substantial overlap in size to count as an internal copy
|
|
246
|
+
similar_length = (
|
|
247
|
+
len(hist_content) >= 0.8 * len(new_frag)
|
|
248
|
+
and len(hist_content) <= 1.25 * len(new_frag)
|
|
249
|
+
)
|
|
250
|
+
|
|
251
|
+
if new_frag == hist_content:
|
|
252
|
+
is_internal_copy = True
|
|
253
|
+
break
|
|
254
|
+
|
|
255
|
+
if new_frag in hist_content and similar_length:
|
|
256
|
+
is_internal_copy = True
|
|
257
|
+
break
|
|
258
|
+
|
|
259
|
+
if hist_content in new_frag and similar_length:
|
|
260
|
+
is_internal_copy = True
|
|
261
|
+
break
|
|
262
|
+
|
|
263
|
+
# Also check if it's in the old fragment (internal move/copy)
|
|
264
|
+
if not is_internal_copy and old_frag and (new_frag in old_frag or old_frag in new_frag):
|
|
265
|
+
is_internal_copy = True
|
|
240
266
|
|
|
241
|
-
|
|
242
|
-
|
|
243
|
-
|
|
267
|
+
if not is_internal_copy:
|
|
268
|
+
suspicious_events.append({
|
|
269
|
+
"event_index": idx,
|
|
270
|
+
"line_count": len(new_lines),
|
|
271
|
+
"char_count": len(new_frag),
|
|
272
|
+
"reason": "multi-line external paste",
|
|
273
|
+
"newFragment": new_frag
|
|
274
|
+
})
|
|
244
275
|
|
|
245
|
-
|
|
246
|
-
|
|
247
|
-
|
|
248
|
-
|
|
249
|
-
|
|
250
|
-
|
|
251
|
-
|
|
252
|
-
})
|
|
276
|
+
# Add current event's content to whitelist for future events
|
|
277
|
+
if len(old_frag) > MIN_MULTILINE_SIZE:
|
|
278
|
+
past_whitelist.add(old_frag)
|
|
279
|
+
if len(new_frag) > MIN_MULTILINE_SIZE:
|
|
280
|
+
past_whitelist.add(new_frag)
|
|
281
|
+
if idx > 0 and len(document_states[idx - 1]) > MIN_MULTILINE_SIZE:
|
|
282
|
+
past_whitelist.add(document_states[idx - 1])
|
|
253
283
|
|
|
254
284
|
return suspicious_events
|
|
255
285
|
|
|
@@ -261,14 +291,21 @@ def _detect_rapid_paste_sequences(jsonData: tuple[dict[str, Any], ...]) -> list[
|
|
|
261
291
|
Identifies clusters of 3+ one-line paste events occurring within 1 second,
|
|
262
292
|
which may indicate AI-assisted code generation.
|
|
263
293
|
|
|
294
|
+
Only processes edit events (type="edit" or no type field for backwards compatibility).
|
|
295
|
+
|
|
264
296
|
Returns a list of suspicious rapid-paste events.
|
|
265
297
|
"""
|
|
298
|
+
from .load import is_edit_event
|
|
299
|
+
|
|
300
|
+
# Filter to only edit events
|
|
301
|
+
edit_events = [e for e in jsonData if is_edit_event(e)]
|
|
302
|
+
|
|
266
303
|
suspicious_events = []
|
|
267
304
|
|
|
268
305
|
# Track one-line paste events for rapid-paste detection
|
|
269
306
|
one_line_pastes = []
|
|
270
307
|
|
|
271
|
-
for idx, event in enumerate(
|
|
308
|
+
for idx, event in enumerate(edit_events):
|
|
272
309
|
new_frag = _normalize_newlines(event.get("newFragment", ""))
|
|
273
310
|
old_frag = _normalize_newlines(event.get("oldFragment", ""))
|
|
274
311
|
timestamp = event.get("timestamp")
|
|
@@ -281,7 +318,7 @@ def _detect_rapid_paste_sequences(jsonData: tuple[dict[str, Any], ...]) -> list[
|
|
|
281
318
|
new_lines = new_frag.split("\n")
|
|
282
319
|
if len(new_lines) == 2:
|
|
283
320
|
# Heuristic: if it's more than a few characters, it might be pasted
|
|
284
|
-
if len(new_frag.strip()) >
|
|
321
|
+
if len(new_frag.strip()) > MIN_RAPID_PASTE_CHARS:
|
|
285
322
|
one_line_pastes.append({
|
|
286
323
|
"event_index": idx,
|
|
287
324
|
"timestamp": timestamp,
|
|
@@ -367,12 +404,14 @@ def _detect_fullline_autocomplete(
|
|
|
367
404
|
- newFragment does NOT already exist in the document state
|
|
368
405
|
- Event not already flagged as external copy-paste
|
|
369
406
|
|
|
407
|
+
Only processes edit events (type="edit" or no type field for backwards compatibility).
|
|
408
|
+
|
|
370
409
|
Parameters
|
|
371
410
|
----------
|
|
372
411
|
jsonData : tuple[dict[str, Any], ...]
|
|
373
|
-
The event data
|
|
412
|
+
The event data (all event types)
|
|
374
413
|
document_states : list[str]
|
|
375
|
-
Full document state at each event
|
|
414
|
+
Full document state at each edit event
|
|
376
415
|
content_whitelist : set[str]
|
|
377
416
|
All content fragments ever seen in the document
|
|
378
417
|
excluded_indices : set[int]
|
|
@@ -383,11 +422,20 @@ def _detect_fullline_autocomplete(
|
|
|
383
422
|
list[dict[str, Any]]
|
|
384
423
|
List of suspected multi-line auto-complete events.
|
|
385
424
|
"""
|
|
425
|
+
from .load import is_edit_event
|
|
426
|
+
|
|
427
|
+
# Filter to only edit events
|
|
428
|
+
edit_events = [e for e in jsonData if is_edit_event(e)]
|
|
429
|
+
|
|
386
430
|
suspicious_events = []
|
|
387
431
|
|
|
388
|
-
|
|
432
|
+
# Build whitelist incrementally to only include content from BEFORE each event
|
|
433
|
+
past_whitelist = set()
|
|
434
|
+
|
|
435
|
+
for idx, event in enumerate(edit_events):
|
|
389
436
|
# Skip if already flagged by another detector
|
|
390
437
|
if idx in excluded_indices:
|
|
438
|
+
past_whitelist_update(idx, event, document_states, past_whitelist)
|
|
391
439
|
continue
|
|
392
440
|
|
|
393
441
|
old_frag = _normalize_newlines(event.get("oldFragment", ""))
|
|
@@ -395,6 +443,7 @@ def _detect_fullline_autocomplete(
|
|
|
395
443
|
|
|
396
444
|
# Skip first event (template) and no-change events
|
|
397
445
|
if idx == 0 or new_frag == old_frag:
|
|
446
|
+
past_whitelist_update(idx, event, document_states, past_whitelist)
|
|
398
447
|
continue
|
|
399
448
|
|
|
400
449
|
old_len = len(old_frag)
|
|
@@ -403,6 +452,7 @@ def _detect_fullline_autocomplete(
|
|
|
403
452
|
# At keystroke level, oldFragment is typically empty for insertions
|
|
404
453
|
# Allow up to 3 chars for prefix-based triggers (e.g., "de" -> "def")
|
|
405
454
|
if old_len > 3:
|
|
455
|
+
past_whitelist_update(idx, event, document_states, past_whitelist)
|
|
406
456
|
continue
|
|
407
457
|
|
|
408
458
|
# Check line count - we care about complete statements
|
|
@@ -417,10 +467,12 @@ def _detect_fullline_autocomplete(
|
|
|
417
467
|
|
|
418
468
|
if not (is_single_line or is_multi_line):
|
|
419
469
|
# Shouldn't happen, but skip if malformed
|
|
470
|
+
past_whitelist_update(idx, event, document_states, past_whitelist)
|
|
420
471
|
continue
|
|
421
472
|
|
|
422
473
|
# The new fragment should not be just whitespace
|
|
423
474
|
if not new_frag.strip():
|
|
475
|
+
past_whitelist_update(idx, event, document_states, past_whitelist)
|
|
424
476
|
continue
|
|
425
477
|
|
|
426
478
|
# Check if the new fragment contains code structure indicators
|
|
@@ -443,21 +495,25 @@ def _detect_fullline_autocomplete(
|
|
|
443
495
|
|
|
444
496
|
if not has_complete_statement:
|
|
445
497
|
# No complete statement - skip basic identifier completion
|
|
498
|
+
past_whitelist_update(idx, event, document_states, past_whitelist)
|
|
446
499
|
continue
|
|
447
500
|
|
|
448
501
|
# Minimum size for meaningful completion
|
|
449
|
-
if new_len <
|
|
502
|
+
if new_len < MIN_AUTOCOMPLETE_SIZE:
|
|
503
|
+
past_whitelist_update(idx, event, document_states, past_whitelist)
|
|
450
504
|
continue
|
|
451
505
|
|
|
452
506
|
# For multi-line: maximum size to distinguish from external pastes
|
|
453
507
|
# External pastes are typically much larger (100+ chars)
|
|
454
508
|
# Multi-line completions are usually 20-300 chars for a small function/block
|
|
455
509
|
if is_multi_line and new_len > 300:
|
|
510
|
+
past_whitelist_update(idx, event, document_states, past_whitelist)
|
|
456
511
|
continue
|
|
457
512
|
|
|
458
513
|
# For single-line: could be larger due to chained methods or long statements
|
|
459
514
|
# but cap at 200 chars to avoid flagging user-typed long lines
|
|
460
515
|
if is_single_line and new_len > 200:
|
|
516
|
+
past_whitelist_update(idx, event, document_states, past_whitelist)
|
|
461
517
|
continue
|
|
462
518
|
|
|
463
519
|
# Check if this content already existed in the document state BEFORE this event
|
|
@@ -468,6 +524,28 @@ def _detect_fullline_autocomplete(
|
|
|
468
524
|
if new_frag in prior_state:
|
|
469
525
|
is_internal_copy = True
|
|
470
526
|
|
|
527
|
+
# Also check against whitelist of content from BEFORE this event
|
|
528
|
+
if not is_internal_copy:
|
|
529
|
+
for hist_content in past_whitelist:
|
|
530
|
+
# Ignore tiny fragments
|
|
531
|
+
if len(hist_content) < MIN_AUTOCOMPLETE_SIZE:
|
|
532
|
+
continue
|
|
533
|
+
|
|
534
|
+
# Check for exact match or significant overlap
|
|
535
|
+
if new_frag == hist_content:
|
|
536
|
+
is_internal_copy = True
|
|
537
|
+
break
|
|
538
|
+
|
|
539
|
+
# Check for substring matches with similar length
|
|
540
|
+
similar_length = (
|
|
541
|
+
len(hist_content) >= 0.8 * len(new_frag)
|
|
542
|
+
and len(hist_content) <= 1.25 * len(new_frag)
|
|
543
|
+
)
|
|
544
|
+
|
|
545
|
+
if (new_frag in hist_content or hist_content in new_frag) and similar_length:
|
|
546
|
+
is_internal_copy = True
|
|
547
|
+
break
|
|
548
|
+
|
|
471
549
|
if not is_internal_copy:
|
|
472
550
|
line_desc = "line" if is_single_line else "lines"
|
|
473
551
|
suspicious_events.append({
|
|
@@ -478,9 +556,30 @@ def _detect_fullline_autocomplete(
|
|
|
478
556
|
"newFragment": new_frag,
|
|
479
557
|
})
|
|
480
558
|
|
|
559
|
+
# Add current event's content to whitelist for future events
|
|
560
|
+
past_whitelist_update(idx, event, document_states, past_whitelist)
|
|
561
|
+
|
|
481
562
|
return suspicious_events
|
|
482
563
|
|
|
483
564
|
|
|
565
|
+
def past_whitelist_update(
|
|
566
|
+
idx: int,
|
|
567
|
+
event: dict[str, Any],
|
|
568
|
+
document_states: list[str],
|
|
569
|
+
past_whitelist: set[str]
|
|
570
|
+
) -> None:
|
|
571
|
+
"""Helper to update the past_whitelist with content from current event."""
|
|
572
|
+
old_frag = _normalize_newlines(event.get("oldFragment", ""))
|
|
573
|
+
new_frag = _normalize_newlines(event.get("newFragment", ""))
|
|
574
|
+
|
|
575
|
+
if len(old_frag) > MIN_AUTOCOMPLETE_SIZE:
|
|
576
|
+
past_whitelist.add(old_frag)
|
|
577
|
+
if len(new_frag) > MIN_AUTOCOMPLETE_SIZE:
|
|
578
|
+
past_whitelist.add(new_frag)
|
|
579
|
+
if idx < len(document_states) and len(document_states[idx]) > MIN_AUTOCOMPLETE_SIZE:
|
|
580
|
+
past_whitelist.add(document_states[idx])
|
|
581
|
+
|
|
582
|
+
|
|
484
583
|
def detect_external_copypaste(jsonData: tuple[dict[str, Any], ...]) -> list[dict[str, Any]]:
|
|
485
584
|
"""
|
|
486
585
|
Detect copy-paste events from external sources and AI-assisted coding patterns.
|
|
@@ -555,13 +654,20 @@ def check_time_limit(jsonData: tuple[dict[str, Any], ...], time_limit_minutes: i
|
|
|
555
654
|
Check if the time between first and last edit exceeds the specified time limit.
|
|
556
655
|
|
|
557
656
|
Tracks elapsed editing time across sessions by summing actual editing time within
|
|
558
|
-
each session (excluding gaps between sessions).
|
|
559
|
-
|
|
657
|
+
each session (excluding gaps between sessions). Focus events (type="focusStatus")
|
|
658
|
+
are used to pause time tracking when the window loses focus for extended periods.
|
|
659
|
+
|
|
660
|
+
Time tracking behavior:
|
|
661
|
+
- Tracks actual editing time by looking at timestamps between edit events
|
|
662
|
+
- When a focusStatus event with focused=false is encountered, time tracking pauses
|
|
663
|
+
- Time tracking resumes when a focusStatus event with focused=true is encountered
|
|
664
|
+
- Gaps > 5 minutes while unfocused are excluded from time tracking
|
|
665
|
+
- Gaps <= 5 minutes are counted even when unfocused (student thinking/reviewing)
|
|
560
666
|
|
|
561
667
|
Parameters
|
|
562
668
|
----------
|
|
563
669
|
jsonData : tuple[dict[str, Any], ...]
|
|
564
|
-
The event data from the JSONL file
|
|
670
|
+
The event data from the JSONL file (all event types)
|
|
565
671
|
time_limit_minutes : int | None
|
|
566
672
|
Maximum allowed time in minutes between first and last overall edit.
|
|
567
673
|
If None, no time limit is enforced.
|
|
@@ -578,25 +684,34 @@ def check_time_limit(jsonData: tuple[dict[str, Any], ...], time_limit_minutes: i
|
|
|
578
684
|
def parse_ts(ts_str: str) -> datetime:
|
|
579
685
|
return datetime.fromisoformat(ts_str.replace("Z", "+00:00"))
|
|
580
686
|
|
|
687
|
+
# Separate edit events from focus events
|
|
688
|
+
from .load import is_edit_event
|
|
689
|
+
|
|
690
|
+
edit_events = [e for e in jsonData if is_edit_event(e)]
|
|
691
|
+
focus_events = [e for e in jsonData if e.get("type") == "focusStatus"]
|
|
692
|
+
|
|
693
|
+
if not edit_events:
|
|
694
|
+
return None
|
|
695
|
+
|
|
581
696
|
# Identify session boundaries: sessions start at indices where offset == 0
|
|
582
697
|
# (indicating file reopen/recording restart) and oldFragment == newFragment (initial snapshot)
|
|
583
698
|
session_starts = [0] # First session always starts at index 0
|
|
584
|
-
for idx in range(1, len(
|
|
585
|
-
offset =
|
|
586
|
-
old_frag =
|
|
587
|
-
new_frag =
|
|
699
|
+
for idx in range(1, len(edit_events)):
|
|
700
|
+
offset = edit_events[idx].get("offset", -1)
|
|
701
|
+
old_frag = edit_events[idx].get("oldFragment", "")
|
|
702
|
+
new_frag = edit_events[idx].get("newFragment", "")
|
|
588
703
|
# Session boundary: offset is 0 and it's an initial snapshot (old == new, non-empty)
|
|
589
704
|
if offset == 0 and old_frag == new_frag and old_frag.strip() != "":
|
|
590
705
|
session_starts.append(idx)
|
|
591
706
|
|
|
592
707
|
# Add sentinel to mark end of last session
|
|
593
|
-
session_starts.append(len(
|
|
708
|
+
session_starts.append(len(edit_events))
|
|
594
709
|
|
|
595
710
|
# Find first and last timestamps overall
|
|
596
711
|
first_timestamp_overall = None
|
|
597
712
|
last_timestamp_overall = None
|
|
598
713
|
|
|
599
|
-
for event in
|
|
714
|
+
for event in edit_events:
|
|
600
715
|
if event.get("timestamp"):
|
|
601
716
|
if first_timestamp_overall is None:
|
|
602
717
|
first_timestamp_overall = event["timestamp"]
|
|
@@ -606,34 +721,72 @@ def check_time_limit(jsonData: tuple[dict[str, Any], ...], time_limit_minutes: i
|
|
|
606
721
|
# Not enough events with timestamps
|
|
607
722
|
return None
|
|
608
723
|
|
|
724
|
+
# Build a focus status timeline from focus events
|
|
725
|
+
# Map timestamp -> focused (True/False)
|
|
726
|
+
focus_timeline: list[tuple[datetime, bool]] = []
|
|
727
|
+
for focus_event in focus_events:
|
|
728
|
+
if "timestamp" in focus_event and "focused" in focus_event:
|
|
729
|
+
try:
|
|
730
|
+
ts = parse_ts(focus_event["timestamp"])
|
|
731
|
+
focused = focus_event["focused"]
|
|
732
|
+
focus_timeline.append((ts, focused))
|
|
733
|
+
except (ValueError, KeyError):
|
|
734
|
+
continue
|
|
735
|
+
|
|
736
|
+
# Sort by timestamp
|
|
737
|
+
focus_timeline.sort(key=lambda x: x[0])
|
|
738
|
+
|
|
739
|
+
def is_focused_at(timestamp: datetime) -> bool:
|
|
740
|
+
"""Check if the window was focused at the given timestamp."""
|
|
741
|
+
# Walk backwards through focus events to find the most recent state
|
|
742
|
+
for ts, focused in reversed(focus_timeline):
|
|
743
|
+
if ts <= timestamp:
|
|
744
|
+
return focused
|
|
745
|
+
# Default to focused if no prior focus event found
|
|
746
|
+
return True
|
|
747
|
+
|
|
609
748
|
# Calculate elapsed time by summing editing time within each session
|
|
749
|
+
# with focus-aware gap handling
|
|
610
750
|
total_minutes_elapsed = 0.0
|
|
751
|
+
UNFOCUSED_GAP_THRESHOLD_MINUTES = 5.0 # Don't count gaps > 5 min when unfocused
|
|
611
752
|
|
|
612
753
|
for i in range(len(session_starts) - 1):
|
|
613
754
|
session_start = session_starts[i]
|
|
614
755
|
session_end = session_starts[i + 1]
|
|
615
756
|
|
|
616
|
-
#
|
|
617
|
-
|
|
618
|
-
last_event_time = None
|
|
619
|
-
|
|
757
|
+
# Collect all timestamped events in this session
|
|
758
|
+
session_events: list[tuple[datetime, int]] = []
|
|
620
759
|
for idx in range(session_start, session_end):
|
|
621
|
-
event =
|
|
760
|
+
event = edit_events[idx]
|
|
622
761
|
timestamp = event.get("timestamp")
|
|
623
762
|
if timestamp:
|
|
624
763
|
try:
|
|
625
764
|
event_time = parse_ts(timestamp)
|
|
626
|
-
|
|
627
|
-
first_event_time = event_time
|
|
628
|
-
last_event_time = event_time
|
|
765
|
+
session_events.append((event_time, idx))
|
|
629
766
|
except (ValueError, KeyError):
|
|
630
|
-
# Skip events with invalid timestamps
|
|
631
767
|
continue
|
|
632
768
|
|
|
633
|
-
|
|
634
|
-
|
|
635
|
-
|
|
636
|
-
|
|
769
|
+
if not session_events:
|
|
770
|
+
continue
|
|
771
|
+
|
|
772
|
+
# Sort by timestamp
|
|
773
|
+
session_events.sort(key=lambda x: x[0])
|
|
774
|
+
|
|
775
|
+
# Calculate time by summing gaps between consecutive events
|
|
776
|
+
for j in range(len(session_events) - 1):
|
|
777
|
+
current_time, _ = session_events[j]
|
|
778
|
+
next_time, _ = session_events[j + 1]
|
|
779
|
+
|
|
780
|
+
gap_seconds = (next_time - current_time).total_seconds()
|
|
781
|
+
gap_minutes = gap_seconds / 60
|
|
782
|
+
|
|
783
|
+
# Check focus status at the end of this gap (next_time)
|
|
784
|
+
# If unfocused and gap is large, don't count it
|
|
785
|
+
if not is_focused_at(next_time) and gap_minutes > UNFOCUSED_GAP_THRESHOLD_MINUTES:
|
|
786
|
+
# Skip this gap - student was away from editor
|
|
787
|
+
continue
|
|
788
|
+
|
|
789
|
+
total_minutes_elapsed += gap_minutes
|
|
637
790
|
|
|
638
791
|
# For time limit check, use the span from first to last timestamp overall
|
|
639
792
|
try:
|
|
@@ -681,3 +834,60 @@ def verify(template: str, jsonData: tuple[dict[str, Any], ...]) -> tuple[str, li
|
|
|
681
834
|
suspicious_events = detect_external_copypaste(jsonData)
|
|
682
835
|
|
|
683
836
|
return verified_template, suspicious_events
|
|
837
|
+
|
|
838
|
+
|
|
839
|
+
def combine_time_info(
|
|
840
|
+
time_infos: list[dict[str, Any] | None], time_limit_minutes: int | None
|
|
841
|
+
) -> dict[str, Any] | None:
|
|
842
|
+
"""
|
|
843
|
+
Combine time information from multiple recording files.
|
|
844
|
+
|
|
845
|
+
Parameters
|
|
846
|
+
----------
|
|
847
|
+
time_infos : list[dict[str, Any] | None]
|
|
848
|
+
List of time information dictionaries from multiple files
|
|
849
|
+
time_limit_minutes : int | None
|
|
850
|
+
Time limit to check against
|
|
851
|
+
|
|
852
|
+
Returns
|
|
853
|
+
-------
|
|
854
|
+
dict[str, Any] | None
|
|
855
|
+
Combined time information, or None if no valid data
|
|
856
|
+
"""
|
|
857
|
+
valid_infos = [info for info in time_infos if info is not None]
|
|
858
|
+
if not valid_infos:
|
|
859
|
+
return None
|
|
860
|
+
|
|
861
|
+
# Sum elapsed times across all sessions
|
|
862
|
+
total_elapsed = sum(info["minutes_elapsed"] for info in valid_infos)
|
|
863
|
+
|
|
864
|
+
# Find overall first and last timestamps
|
|
865
|
+
all_timestamps = []
|
|
866
|
+
for info in valid_infos:
|
|
867
|
+
all_timestamps.append(
|
|
868
|
+
datetime.fromisoformat(info["first_timestamp"].replace("Z", "+00:00"))
|
|
869
|
+
)
|
|
870
|
+
all_timestamps.append(
|
|
871
|
+
datetime.fromisoformat(info["last_timestamp"].replace("Z", "+00:00"))
|
|
872
|
+
)
|
|
873
|
+
|
|
874
|
+
first_ts = min(all_timestamps)
|
|
875
|
+
last_ts = max(all_timestamps)
|
|
876
|
+
overall_span = (last_ts - first_ts).total_seconds() / 60
|
|
877
|
+
|
|
878
|
+
result = {
|
|
879
|
+
"time_limit_minutes": time_limit_minutes,
|
|
880
|
+
"minutes_elapsed": round(total_elapsed, 2),
|
|
881
|
+
"first_timestamp": first_ts.isoformat().replace("+00:00", "Z"),
|
|
882
|
+
"last_timestamp": last_ts.isoformat().replace("+00:00", "Z"),
|
|
883
|
+
"file_count": len(valid_infos),
|
|
884
|
+
"overall_span_minutes": round(overall_span, 2),
|
|
885
|
+
}
|
|
886
|
+
|
|
887
|
+
# For time limit check in combined mode, use the sum of elapsed times
|
|
888
|
+
if time_limit_minutes is not None:
|
|
889
|
+
result["exceeds_limit"] = total_elapsed > time_limit_minutes
|
|
890
|
+
else:
|
|
891
|
+
result["exceeds_limit"] = False
|
|
892
|
+
|
|
893
|
+
return result
|