cr-proc 0.1.3__tar.gz → 0.1.5__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {cr_proc-0.1.3 → cr_proc-0.1.5}/PKG-INFO +2 -2
- {cr_proc-0.1.3 → cr_proc-0.1.5}/pyproject.toml +2 -2
- {cr_proc-0.1.3 → cr_proc-0.1.5}/src/code_recorder_processor/api/load.py +21 -4
- {cr_proc-0.1.3 → cr_proc-0.1.5}/src/code_recorder_processor/api/verify.py +285 -55
- cr_proc-0.1.5/src/code_recorder_processor/cli.py +424 -0
- cr_proc-0.1.3/src/code_recorder_processor/cli.py +0 -183
- {cr_proc-0.1.3 → cr_proc-0.1.5}/README.md +0 -0
- {cr_proc-0.1.3 → cr_proc-0.1.5}/src/code_recorder_processor/__init__.py +0 -0
- {cr_proc-0.1.3 → cr_proc-0.1.5}/src/code_recorder_processor/api/build.py +0 -0
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: cr_proc
|
|
3
|
-
Version: 0.1.
|
|
4
|
-
Summary: A tool for processing BYU CS code recording files
|
|
3
|
+
Version: 0.1.5
|
|
4
|
+
Summary: A tool for processing BYU CS code recording files.
|
|
5
5
|
Author: Ethan Dye
|
|
6
6
|
Author-email: mrtops03@gmail.com
|
|
7
7
|
Requires-Python: >=3.14
|
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
[project]
|
|
2
2
|
name = "cr_proc"
|
|
3
|
-
version = "0.1.
|
|
4
|
-
description = "A tool for processing BYU CS code recording files"
|
|
3
|
+
version = "0.1.5"
|
|
4
|
+
description = "A tool for processing BYU CS code recording files."
|
|
5
5
|
authors = [
|
|
6
6
|
{name = "Ethan Dye",email = "mrtops03@gmail.com"}
|
|
7
7
|
]
|
|
@@ -65,12 +65,29 @@ def load_jsonl(file: Path) -> tuple[dict[str, Any], ...]:
|
|
|
65
65
|
|
|
66
66
|
if data is None:
|
|
67
67
|
# If gzip stream is broken, attempt a lenient zlib decompress to salvage content.
|
|
68
|
+
# Handle multiple concatenated gzip streams (common in recordings)
|
|
68
69
|
try:
|
|
69
70
|
raw = file.read_bytes()
|
|
70
|
-
|
|
71
|
-
|
|
72
|
-
|
|
73
|
-
|
|
71
|
+
all_text = ""
|
|
72
|
+
remaining = raw
|
|
73
|
+
|
|
74
|
+
# Decompress all concatenated gzip streams
|
|
75
|
+
while remaining:
|
|
76
|
+
dobj = zlib.decompressobj(16 + zlib.MAX_WBITS)
|
|
77
|
+
try:
|
|
78
|
+
text_bytes = dobj.decompress(remaining) + dobj.flush()
|
|
79
|
+
all_text += text_bytes.decode("utf-8", errors="replace")
|
|
80
|
+
remaining = dobj.unused_data
|
|
81
|
+
if not text_bytes or not remaining:
|
|
82
|
+
break
|
|
83
|
+
except Exception:
|
|
84
|
+
# If decompression fails, try to salvage what we have
|
|
85
|
+
break
|
|
86
|
+
|
|
87
|
+
if all_text:
|
|
88
|
+
data = _load_jsonl(StringIO(all_text))
|
|
89
|
+
else:
|
|
90
|
+
data = None
|
|
74
91
|
except Exception:
|
|
75
92
|
data = None
|
|
76
93
|
|
|
@@ -113,20 +113,84 @@ def template_diff(template: str, jsonData: tuple[dict[str, Any], ...]) -> str:
|
|
|
113
113
|
return "".join(diff_iter)
|
|
114
114
|
|
|
115
115
|
|
|
116
|
-
def
|
|
116
|
+
def _build_document_states(jsonData: tuple[dict[str, Any], ...]) -> tuple[list[str], set[str]]:
|
|
117
|
+
"""
|
|
118
|
+
Build complete document state at each event and a whitelist of all content seen.
|
|
119
|
+
|
|
120
|
+
Reconstructs the document after each keystroke/edit to track what content
|
|
121
|
+
existed in the document at each point in time. This allows detectors to
|
|
122
|
+
check if pasted/autocompleted content already existed in the document.
|
|
123
|
+
|
|
124
|
+
Parameters
|
|
125
|
+
----------
|
|
126
|
+
jsonData : tuple[dict[str, Any], ...]
|
|
127
|
+
The event data from the JSONL file
|
|
128
|
+
|
|
129
|
+
Returns
|
|
130
|
+
-------
|
|
131
|
+
tuple[list[str], set[str]]
|
|
132
|
+
- List of document states (one per event, strings of full document content)
|
|
133
|
+
- Set of all content fragments ever seen (whitelist for internal copy detection)
|
|
134
|
+
"""
|
|
135
|
+
document_states = []
|
|
136
|
+
content_whitelist = set()
|
|
137
|
+
current_state = ""
|
|
138
|
+
|
|
139
|
+
for idx, event in enumerate(jsonData):
|
|
140
|
+
old_frag = _normalize_newlines(event.get("oldFragment", ""))
|
|
141
|
+
new_frag = _normalize_newlines(event.get("newFragment", ""))
|
|
142
|
+
offset = event.get("offset", 0)
|
|
143
|
+
|
|
144
|
+
# First event is the initial snapshot (template)
|
|
145
|
+
if idx == 0:
|
|
146
|
+
current_state = new_frag
|
|
147
|
+
elif new_frag != old_frag:
|
|
148
|
+
# Apply the edit to reconstruct document state
|
|
149
|
+
current_state = current_state[:offset] + new_frag + current_state[offset + len(old_frag):]
|
|
150
|
+
|
|
151
|
+
document_states.append(current_state)
|
|
152
|
+
|
|
153
|
+
# Build whitelist of all content fragments seen
|
|
154
|
+
# Add both old and new fragments to whitelist for comprehensive coverage
|
|
155
|
+
if len(old_frag) > 10: # Ignore tiny fragments
|
|
156
|
+
content_whitelist.add(old_frag)
|
|
157
|
+
if len(new_frag) > 10:
|
|
158
|
+
content_whitelist.add(new_frag)
|
|
159
|
+
|
|
160
|
+
# Also add the full document state to whitelist
|
|
161
|
+
if len(current_state) > 10:
|
|
162
|
+
content_whitelist.add(current_state)
|
|
163
|
+
|
|
164
|
+
return document_states, content_whitelist
|
|
165
|
+
|
|
166
|
+
|
|
167
|
+
def _detect_multiline_external_pastes(
|
|
168
|
+
jsonData: tuple[dict[str, Any], ...],
|
|
169
|
+
document_states: list[str],
|
|
170
|
+
content_whitelist: set[str]
|
|
171
|
+
) -> list[dict[str, Any]]:
|
|
117
172
|
"""
|
|
118
173
|
Detect multi-line copy-paste events from external sources.
|
|
119
174
|
|
|
120
175
|
Flags newFragments that are significant in length (more than one line)
|
|
121
176
|
and do not appear to be copied from within the document itself.
|
|
122
177
|
|
|
123
|
-
|
|
178
|
+
Parameters
|
|
179
|
+
----------
|
|
180
|
+
jsonData : tuple[dict[str, Any], ...]
|
|
181
|
+
The event data
|
|
182
|
+
document_states : list[str]
|
|
183
|
+
Full document state at each event
|
|
184
|
+
content_whitelist : set[str]
|
|
185
|
+
All content fragments ever seen in the document (for internal copy detection)
|
|
186
|
+
|
|
187
|
+
Returns
|
|
188
|
+
-------
|
|
189
|
+
list[dict[str, Any]]
|
|
190
|
+
List of suspicious multi-line paste events.
|
|
124
191
|
"""
|
|
125
192
|
suspicious_events = []
|
|
126
193
|
|
|
127
|
-
# Build a history of all document content seen so far
|
|
128
|
-
document_history = set()
|
|
129
|
-
|
|
130
194
|
for idx, event in enumerate(jsonData):
|
|
131
195
|
old_frag = _normalize_newlines(event.get("oldFragment", ""))
|
|
132
196
|
new_frag = _normalize_newlines(event.get("newFragment", ""))
|
|
@@ -140,32 +204,39 @@ def _detect_multiline_external_pastes(jsonData: tuple[dict[str, Any], ...]) -> l
|
|
|
140
204
|
if len(new_lines) <= 2: # Single line or line + empty
|
|
141
205
|
continue
|
|
142
206
|
|
|
143
|
-
# Check if the new content
|
|
207
|
+
# Check if the new content already existed in the document at any prior point
|
|
144
208
|
is_internal_copy = False
|
|
145
209
|
|
|
146
|
-
# Check
|
|
147
|
-
|
|
148
|
-
|
|
149
|
-
if
|
|
150
|
-
|
|
210
|
+
# Check against document state BEFORE this event
|
|
211
|
+
if idx > 0:
|
|
212
|
+
prior_state = document_states[idx - 1]
|
|
213
|
+
if new_frag in prior_state:
|
|
214
|
+
is_internal_copy = True
|
|
215
|
+
|
|
216
|
+
# Also check against whitelist of all content seen
|
|
217
|
+
if not is_internal_copy:
|
|
218
|
+
for hist_content in content_whitelist:
|
|
219
|
+
# Ignore tiny fragments
|
|
220
|
+
if len(hist_content) < 20:
|
|
221
|
+
continue
|
|
151
222
|
|
|
152
|
-
|
|
153
|
-
|
|
154
|
-
|
|
155
|
-
|
|
156
|
-
|
|
223
|
+
# Require substantial overlap in size to count as an internal copy
|
|
224
|
+
similar_length = (
|
|
225
|
+
len(hist_content) >= 0.8 * len(new_frag)
|
|
226
|
+
and len(hist_content) <= 1.25 * len(new_frag)
|
|
227
|
+
)
|
|
157
228
|
|
|
158
|
-
|
|
159
|
-
|
|
160
|
-
|
|
229
|
+
if new_frag == hist_content:
|
|
230
|
+
is_internal_copy = True
|
|
231
|
+
break
|
|
161
232
|
|
|
162
|
-
|
|
163
|
-
|
|
164
|
-
|
|
233
|
+
if new_frag in hist_content and similar_length:
|
|
234
|
+
is_internal_copy = True
|
|
235
|
+
break
|
|
165
236
|
|
|
166
|
-
|
|
167
|
-
|
|
168
|
-
|
|
237
|
+
if hist_content in new_frag and similar_length:
|
|
238
|
+
is_internal_copy = True
|
|
239
|
+
break
|
|
169
240
|
|
|
170
241
|
# Also check if it's in the old fragment (internal move/copy)
|
|
171
242
|
if not is_internal_copy and old_frag and (new_frag in old_frag or old_frag in new_frag):
|
|
@@ -180,12 +251,6 @@ def _detect_multiline_external_pastes(jsonData: tuple[dict[str, Any], ...]) -> l
|
|
|
180
251
|
"newFragment": new_frag
|
|
181
252
|
})
|
|
182
253
|
|
|
183
|
-
# Update history after analysis so the current fragment cannot mask itself
|
|
184
|
-
if len(old_frag) > 1:
|
|
185
|
-
document_history.add(old_frag)
|
|
186
|
-
if len(new_frag) > 1:
|
|
187
|
-
document_history.add(new_frag)
|
|
188
|
-
|
|
189
254
|
return suspicious_events
|
|
190
255
|
|
|
191
256
|
|
|
@@ -262,6 +327,119 @@ def _detect_rapid_paste_sequences(jsonData: tuple[dict[str, Any], ...]) -> list[
|
|
|
262
327
|
return suspicious_events
|
|
263
328
|
|
|
264
329
|
|
|
330
|
+
def _detect_fullline_autocomplete(
|
|
331
|
+
jsonData: tuple[dict[str, Any], ...],
|
|
332
|
+
document_states: list[str],
|
|
333
|
+
content_whitelist: set[str],
|
|
334
|
+
excluded_indices: set[int]
|
|
335
|
+
) -> list[dict[str, Any]]:
|
|
336
|
+
"""
|
|
337
|
+
Detect full-line auto-complete events where the IDE/AI completes code.
|
|
338
|
+
|
|
339
|
+
At keystroke level, events show:
|
|
340
|
+
- Normal typing: oldFragment="" (empty), newFragment="X" (1 char)
|
|
341
|
+
- Auto-complete: oldFragment="" (empty), newFragment="long_text" (10+ chars)
|
|
342
|
+
|
|
343
|
+
Auto-complete is detected when:
|
|
344
|
+
- oldFragment is empty or very short (0-3 chars)
|
|
345
|
+
- newFragment is substantial (10+ characters)
|
|
346
|
+
- newFragment contains code structure (assignment, parens, brackets, etc.)
|
|
347
|
+
- newFragment does NOT already exist in the document state
|
|
348
|
+
- Event not already flagged as external copy-paste
|
|
349
|
+
|
|
350
|
+
Parameters
|
|
351
|
+
----------
|
|
352
|
+
jsonData : tuple[dict[str, Any], ...]
|
|
353
|
+
The event data
|
|
354
|
+
document_states : list[str]
|
|
355
|
+
Full document state at each event
|
|
356
|
+
content_whitelist : set[str]
|
|
357
|
+
All content fragments ever seen in the document
|
|
358
|
+
excluded_indices : set[int]
|
|
359
|
+
Set of event indices already flagged by other detectors (to avoid double-flagging)
|
|
360
|
+
|
|
361
|
+
Returns
|
|
362
|
+
-------
|
|
363
|
+
list[dict[str, Any]]
|
|
364
|
+
List of suspected auto-complete events.
|
|
365
|
+
"""
|
|
366
|
+
suspicious_events = []
|
|
367
|
+
|
|
368
|
+
for idx, event in enumerate(jsonData):
|
|
369
|
+
# Skip if already flagged by another detector
|
|
370
|
+
if idx in excluded_indices:
|
|
371
|
+
continue
|
|
372
|
+
|
|
373
|
+
old_frag = _normalize_newlines(event.get("oldFragment", ""))
|
|
374
|
+
new_frag = _normalize_newlines(event.get("newFragment", ""))
|
|
375
|
+
|
|
376
|
+
# Skip first event (template) and no-change events
|
|
377
|
+
if idx == 0 or new_frag == old_frag:
|
|
378
|
+
continue
|
|
379
|
+
|
|
380
|
+
old_len = len(old_frag)
|
|
381
|
+
new_len = len(new_frag)
|
|
382
|
+
|
|
383
|
+
# At keystroke level, oldFragment is typically empty for insertions
|
|
384
|
+
# Allow up to 3 chars for prefix-based autocomplete triggers
|
|
385
|
+
if old_len > 3:
|
|
386
|
+
continue
|
|
387
|
+
|
|
388
|
+
# Skip single-character additions (normal typing)
|
|
389
|
+
# Auto-complete typically adds 10+ characters at once
|
|
390
|
+
if new_len < 10:
|
|
391
|
+
continue
|
|
392
|
+
|
|
393
|
+
# Skip large multi-line pastes - those should be caught by multi-line paste detector
|
|
394
|
+
# Auto-complete is typically 1-2 lines and under 100 chars
|
|
395
|
+
# Anything larger is likely external copy-paste, not auto-complete
|
|
396
|
+
new_lines = new_frag.split("\n")
|
|
397
|
+
if len(new_lines) > 2 or new_len > 100:
|
|
398
|
+
continue
|
|
399
|
+
|
|
400
|
+
# The new fragment should not be just whitespace
|
|
401
|
+
if not new_frag.strip():
|
|
402
|
+
continue
|
|
403
|
+
|
|
404
|
+
# Check if the new fragment contains code structure indicators
|
|
405
|
+
# These strongly suggest IDE/AI auto-completion of code
|
|
406
|
+
code_indicators = [
|
|
407
|
+
"=", # Assignment (most common in autocomplete)
|
|
408
|
+
"(", # Function call/definition
|
|
409
|
+
")", # Closing paren
|
|
410
|
+
":", # Block statement (if, for, def, etc.)
|
|
411
|
+
"{", # Dictionary/block
|
|
412
|
+
"}", # Closing brace
|
|
413
|
+
"[", # List/index
|
|
414
|
+
"]", # Closing bracket
|
|
415
|
+
"=>", # Arrow function
|
|
416
|
+
";", # Statement end
|
|
417
|
+
]
|
|
418
|
+
|
|
419
|
+
has_code_structure = any(indicator in new_frag for indicator in code_indicators)
|
|
420
|
+
|
|
421
|
+
# Must have code structure to be considered auto-complete
|
|
422
|
+
if has_code_structure:
|
|
423
|
+
# Check if this content already existed in the document state BEFORE this event
|
|
424
|
+
is_internal_copy = False
|
|
425
|
+
|
|
426
|
+
if idx > 0:
|
|
427
|
+
prior_state = document_states[idx - 1]
|
|
428
|
+
if new_frag in prior_state:
|
|
429
|
+
is_internal_copy = True
|
|
430
|
+
|
|
431
|
+
if not is_internal_copy:
|
|
432
|
+
suspicious_events.append({
|
|
433
|
+
"event_index": idx,
|
|
434
|
+
"line_count": len(new_lines),
|
|
435
|
+
"char_count": new_len,
|
|
436
|
+
"reason": "full-line auto-complete",
|
|
437
|
+
"newFragment": new_frag,
|
|
438
|
+
})
|
|
439
|
+
|
|
440
|
+
return suspicious_events
|
|
441
|
+
|
|
442
|
+
|
|
265
443
|
def detect_external_copypaste(jsonData: tuple[dict[str, Any], ...]) -> list[dict[str, Any]]:
|
|
266
444
|
"""
|
|
267
445
|
Detect copy-paste events from external sources and AI-assisted coding patterns.
|
|
@@ -269,16 +447,64 @@ def detect_external_copypaste(jsonData: tuple[dict[str, Any], ...]) -> list[dict
|
|
|
269
447
|
Combines detection of:
|
|
270
448
|
1. Multi-line external paste events (content not from within document)
|
|
271
449
|
2. Rapid one-line paste sequences (potential AI assistance indicator)
|
|
450
|
+
3. Full-line auto-complete events (user types, AI completes the line)
|
|
451
|
+
|
|
452
|
+
Detection order matters: events flagged by earlier detectors are excluded
|
|
453
|
+
from later detectors to avoid double-flagging.
|
|
272
454
|
|
|
273
|
-
Returns a list of all suspicious events with metadata.
|
|
455
|
+
Returns a list of all suspicious events with metadata, including aggregate statistics.
|
|
274
456
|
"""
|
|
275
457
|
suspicious_events = []
|
|
276
458
|
|
|
277
|
-
#
|
|
278
|
-
|
|
459
|
+
# Build shared document state tracking
|
|
460
|
+
# This reconstructs the full document at each event and creates a whitelist
|
|
461
|
+
# of all content that has ever appeared in the document
|
|
462
|
+
document_states, content_whitelist = _build_document_states(jsonData)
|
|
463
|
+
|
|
464
|
+
# Step 1: Detect multi-line external pastes
|
|
465
|
+
multiline_events = _detect_multiline_external_pastes(jsonData, document_states, content_whitelist)
|
|
466
|
+
suspicious_events.extend(multiline_events)
|
|
467
|
+
|
|
468
|
+
# Step 2: Detect rapid one-line paste sequences (AI indicator)
|
|
469
|
+
rapid_paste_events = _detect_rapid_paste_sequences(jsonData)
|
|
470
|
+
suspicious_events.extend(rapid_paste_events)
|
|
471
|
+
|
|
472
|
+
# Build set of all event indices already flagged
|
|
473
|
+
excluded_indices = set()
|
|
474
|
+
for event in multiline_events:
|
|
475
|
+
# Handle both single events and clusters
|
|
476
|
+
if "event_indices" in event:
|
|
477
|
+
excluded_indices.update(event["event_indices"])
|
|
478
|
+
else:
|
|
479
|
+
excluded_indices.add(event["event_index"])
|
|
480
|
+
|
|
481
|
+
for event in rapid_paste_events:
|
|
482
|
+
if "event_indices" in event:
|
|
483
|
+
excluded_indices.update(event["event_indices"])
|
|
484
|
+
else:
|
|
485
|
+
excluded_indices.add(event["event_index"])
|
|
486
|
+
|
|
487
|
+
# Step 3: Detect full-line auto-complete events (excluding already-flagged events)
|
|
488
|
+
autocomplete_events = _detect_fullline_autocomplete(
|
|
489
|
+
jsonData, document_states, content_whitelist, excluded_indices
|
|
490
|
+
)
|
|
279
491
|
|
|
280
|
-
#
|
|
281
|
-
|
|
492
|
+
# Calculate aggregate statistics for auto-complete/small paste events
|
|
493
|
+
# Store individual events for optional detailed review, but don't report them by default
|
|
494
|
+
if autocomplete_events:
|
|
495
|
+
total_autocomplete_chars = sum(ev["char_count"] for ev in autocomplete_events)
|
|
496
|
+
total_autocomplete_events = len(autocomplete_events)
|
|
497
|
+
|
|
498
|
+
# Always add aggregate summary, never individual events
|
|
499
|
+
# Store individual events in the aggregate for optional detailed review
|
|
500
|
+
suspicious_events.append({
|
|
501
|
+
"event_index": -1, # Special marker for aggregate
|
|
502
|
+
"event_count": total_autocomplete_events,
|
|
503
|
+
"total_chars": total_autocomplete_chars,
|
|
504
|
+
"reason": "aggregate auto-complete/small paste activity",
|
|
505
|
+
"newFragment": f"{total_autocomplete_events} auto-complete events ({total_autocomplete_chars} total chars)",
|
|
506
|
+
"detailed_events": autocomplete_events, # Store for optional review
|
|
507
|
+
})
|
|
282
508
|
|
|
283
509
|
return suspicious_events
|
|
284
510
|
|
|
@@ -311,12 +537,15 @@ def check_time_limit(jsonData: tuple[dict[str, Any], ...], time_limit_minutes: i
|
|
|
311
537
|
def parse_ts(ts_str: str) -> datetime:
|
|
312
538
|
return datetime.fromisoformat(ts_str.replace("Z", "+00:00"))
|
|
313
539
|
|
|
314
|
-
# Identify session boundaries: sessions start at indices where
|
|
540
|
+
# Identify session boundaries: sessions start at indices where offset == 0
|
|
541
|
+
# (indicating file reopen/recording restart) and oldFragment == newFragment (initial snapshot)
|
|
315
542
|
session_starts = [0] # First session always starts at index 0
|
|
316
543
|
for idx in range(1, len(jsonData)):
|
|
544
|
+
offset = jsonData[idx].get("offset", -1)
|
|
317
545
|
old_frag = jsonData[idx].get("oldFragment", "")
|
|
318
546
|
new_frag = jsonData[idx].get("newFragment", "")
|
|
319
|
-
|
|
547
|
+
# Session boundary: offset is 0 and it's an initial snapshot (old == new, non-empty)
|
|
548
|
+
if offset == 0 and old_frag == new_frag and old_frag.strip() != "":
|
|
320
549
|
session_starts.append(idx)
|
|
321
550
|
|
|
322
551
|
# Add sentinel to mark end of last session
|
|
@@ -344,25 +573,26 @@ def check_time_limit(jsonData: tuple[dict[str, Any], ...], time_limit_minutes: i
|
|
|
344
573
|
session_end = session_starts[i + 1]
|
|
345
574
|
|
|
346
575
|
# Find first and last events with timestamps in this session
|
|
347
|
-
|
|
348
|
-
|
|
349
|
-
|
|
350
|
-
for
|
|
351
|
-
|
|
352
|
-
|
|
353
|
-
|
|
354
|
-
|
|
576
|
+
first_event_time = None
|
|
577
|
+
last_event_time = None
|
|
578
|
+
|
|
579
|
+
for idx in range(session_start, session_end):
|
|
580
|
+
event = jsonData[idx]
|
|
581
|
+
timestamp = event.get("timestamp")
|
|
582
|
+
if timestamp:
|
|
583
|
+
try:
|
|
584
|
+
event_time = parse_ts(timestamp)
|
|
585
|
+
if first_event_time is None:
|
|
586
|
+
first_event_time = event_time
|
|
587
|
+
last_event_time = event_time
|
|
588
|
+
except (ValueError, KeyError):
|
|
589
|
+
# Skip events with invalid timestamps
|
|
590
|
+
continue
|
|
355
591
|
|
|
356
592
|
# If this session has timestamped events, add its elapsed time
|
|
357
|
-
if
|
|
358
|
-
|
|
359
|
-
|
|
360
|
-
last_time = parse_ts(last_event["timestamp"])
|
|
361
|
-
session_diff = last_time - first_time
|
|
362
|
-
total_minutes_elapsed += session_diff.total_seconds() / 60
|
|
363
|
-
except (ValueError, KeyError):
|
|
364
|
-
# Timestamp parsing failed for this session, skip it
|
|
365
|
-
continue
|
|
593
|
+
if first_event_time is not None and last_event_time is not None:
|
|
594
|
+
session_diff = last_event_time - first_event_time
|
|
595
|
+
total_minutes_elapsed += session_diff.total_seconds() / 60
|
|
366
596
|
|
|
367
597
|
# For time limit check, use the span from first to last timestamp overall
|
|
368
598
|
try:
|
|
@@ -0,0 +1,424 @@
|
|
|
1
|
+
import argparse
|
|
2
|
+
import json
|
|
3
|
+
import sys
|
|
4
|
+
from datetime import datetime
|
|
5
|
+
from pathlib import Path
|
|
6
|
+
from typing import Any
|
|
7
|
+
|
|
8
|
+
from .api.build import reconstruct_file_from_events
|
|
9
|
+
from .api.load import load_jsonl
|
|
10
|
+
from .api.verify import check_time_limit, template_diff, verify
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
def resolve_document(
|
|
14
|
+
docs: list[str], template_path: Path, override: str | None
|
|
15
|
+
) -> str | None:
|
|
16
|
+
"""
|
|
17
|
+
Determine which document from the recording to process.
|
|
18
|
+
|
|
19
|
+
Parameters
|
|
20
|
+
----------
|
|
21
|
+
docs : list[str]
|
|
22
|
+
List of document paths found in the recording
|
|
23
|
+
template_path : Path
|
|
24
|
+
Path to the template file
|
|
25
|
+
override : str | None
|
|
26
|
+
Explicit document name or path override
|
|
27
|
+
|
|
28
|
+
Returns
|
|
29
|
+
-------
|
|
30
|
+
str | None
|
|
31
|
+
The resolved document path, or None if no documents exist
|
|
32
|
+
|
|
33
|
+
Raises
|
|
34
|
+
------
|
|
35
|
+
ValueError
|
|
36
|
+
If document resolution is ambiguous or the override doesn't match
|
|
37
|
+
"""
|
|
38
|
+
if not docs:
|
|
39
|
+
return None
|
|
40
|
+
|
|
41
|
+
if override:
|
|
42
|
+
matches = [
|
|
43
|
+
d for d in docs if d.endswith(override) or Path(d).name == override
|
|
44
|
+
]
|
|
45
|
+
if not matches:
|
|
46
|
+
raise ValueError(
|
|
47
|
+
f"No document in recording matches '{override}'. Available: {docs}"
|
|
48
|
+
)
|
|
49
|
+
if len(matches) > 1:
|
|
50
|
+
raise ValueError(
|
|
51
|
+
f"Ambiguous document override '{override}'. Matches: {matches}"
|
|
52
|
+
)
|
|
53
|
+
return matches[0]
|
|
54
|
+
|
|
55
|
+
template_ext = template_path.suffix
|
|
56
|
+
ext_matches = [d for d in docs if Path(d).suffix == template_ext]
|
|
57
|
+
if len(ext_matches) == 1:
|
|
58
|
+
return ext_matches[0]
|
|
59
|
+
if len(ext_matches) > 1:
|
|
60
|
+
raise ValueError(
|
|
61
|
+
f"Multiple documents share extension '{template_ext}': {ext_matches}. "
|
|
62
|
+
"Use --document to choose one."
|
|
63
|
+
)
|
|
64
|
+
|
|
65
|
+
if len(docs) == 1:
|
|
66
|
+
return docs[0]
|
|
67
|
+
|
|
68
|
+
raise ValueError(
|
|
69
|
+
"Could not determine document to process. Use --document to select one. "
|
|
70
|
+
f"Available documents: {docs}"
|
|
71
|
+
)
|
|
72
|
+
|
|
73
|
+
|
|
74
|
+
def get_recorded_documents(events: tuple[dict[str, Any], ...]) -> list[str]:
|
|
75
|
+
"""
|
|
76
|
+
Extract unique document paths from recording events.
|
|
77
|
+
|
|
78
|
+
Parameters
|
|
79
|
+
----------
|
|
80
|
+
events : tuple[dict[str, Any], ...]
|
|
81
|
+
Recording events loaded from JSONL
|
|
82
|
+
|
|
83
|
+
Returns
|
|
84
|
+
-------
|
|
85
|
+
list[str]
|
|
86
|
+
Sorted list of unique document paths
|
|
87
|
+
"""
|
|
88
|
+
documents = {
|
|
89
|
+
e.get("document")
|
|
90
|
+
for e in events
|
|
91
|
+
if "document" in e and e.get("document") is not None
|
|
92
|
+
}
|
|
93
|
+
return sorted([d for d in documents if d is not None])
|
|
94
|
+
|
|
95
|
+
|
|
96
|
+
def filter_events_by_document(
|
|
97
|
+
events: tuple[dict[str, Any], ...], document: str | None
|
|
98
|
+
) -> tuple[dict[str, Any], ...]:
|
|
99
|
+
"""
|
|
100
|
+
Filter events to only those for a specific document.
|
|
101
|
+
|
|
102
|
+
Parameters
|
|
103
|
+
----------
|
|
104
|
+
events : tuple[dict[str, Any], ...]
|
|
105
|
+
All recording events
|
|
106
|
+
document : str | None
|
|
107
|
+
Document path to filter by, or None to return all events
|
|
108
|
+
|
|
109
|
+
Returns
|
|
110
|
+
-------
|
|
111
|
+
tuple[dict[str, Any], ...]
|
|
112
|
+
Filtered events
|
|
113
|
+
"""
|
|
114
|
+
if document:
|
|
115
|
+
return tuple(e for e in events if e.get("document") == document)
|
|
116
|
+
return events
|
|
117
|
+
|
|
118
|
+
|
|
119
|
+
def display_time_info(time_info: dict[str, Any] | None) -> None:
|
|
120
|
+
"""
|
|
121
|
+
Display elapsed time and time limit information.
|
|
122
|
+
|
|
123
|
+
Parameters
|
|
124
|
+
----------
|
|
125
|
+
time_info : dict[str, Any] | None
|
|
126
|
+
Time information from check_time_limit, or None if no time data
|
|
127
|
+
"""
|
|
128
|
+
if not time_info:
|
|
129
|
+
return
|
|
130
|
+
|
|
131
|
+
print(
|
|
132
|
+
f"Elapsed editing time: {time_info['minutes_elapsed']} minutes",
|
|
133
|
+
file=sys.stderr,
|
|
134
|
+
)
|
|
135
|
+
|
|
136
|
+
first_ts = datetime.fromisoformat(
|
|
137
|
+
time_info["first_timestamp"].replace("Z", "+00:00")
|
|
138
|
+
)
|
|
139
|
+
last_ts = datetime.fromisoformat(
|
|
140
|
+
time_info["last_timestamp"].replace("Z", "+00:00")
|
|
141
|
+
)
|
|
142
|
+
time_span = (last_ts - first_ts).total_seconds() / 60
|
|
143
|
+
|
|
144
|
+
print(f"Time span (first to last edit): {time_span:.2f} minutes", file=sys.stderr)
|
|
145
|
+
|
|
146
|
+
if time_info["exceeds_limit"]:
|
|
147
|
+
print("\nTime limit exceeded!", file=sys.stderr)
|
|
148
|
+
print(f" Limit: {time_info['time_limit_minutes']} minutes", file=sys.stderr)
|
|
149
|
+
print(f" First edit: {time_info['first_timestamp']}", file=sys.stderr)
|
|
150
|
+
print(f" Last edit: {time_info['last_timestamp']}", file=sys.stderr)
|
|
151
|
+
|
|
152
|
+
|
|
153
|
+
def display_suspicious_event(event: dict[str, Any], show_details: bool) -> None:
|
|
154
|
+
"""
|
|
155
|
+
Display a single suspicious event.
|
|
156
|
+
|
|
157
|
+
Parameters
|
|
158
|
+
----------
|
|
159
|
+
event : dict[str, Any]
|
|
160
|
+
Suspicious event data
|
|
161
|
+
show_details : bool
|
|
162
|
+
Whether to show detailed autocomplete events
|
|
163
|
+
"""
|
|
164
|
+
reason = event.get("reason", "unknown")
|
|
165
|
+
|
|
166
|
+
# Handle aggregate auto-complete events
|
|
167
|
+
if event.get("event_index") == -1 and "detailed_events" in event:
|
|
168
|
+
event_count = event["event_count"]
|
|
169
|
+
total_chars = event["total_chars"]
|
|
170
|
+
print(
|
|
171
|
+
f" Aggregate: {event_count} auto-complete/small paste events "
|
|
172
|
+
f"({total_chars} total chars)",
|
|
173
|
+
file=sys.stderr,
|
|
174
|
+
)
|
|
175
|
+
|
|
176
|
+
if show_details:
|
|
177
|
+
print(" Detailed events:", file=sys.stderr)
|
|
178
|
+
for detail in event["detailed_events"]:
|
|
179
|
+
detail_idx = detail["event_index"]
|
|
180
|
+
detail_lines = detail["line_count"]
|
|
181
|
+
detail_chars = detail["char_count"]
|
|
182
|
+
detail_frag = detail["newFragment"]
|
|
183
|
+
print(
|
|
184
|
+
f" Event #{detail_idx}: {detail_lines} lines, "
|
|
185
|
+
f"{detail_chars} chars",
|
|
186
|
+
file=sys.stderr,
|
|
187
|
+
)
|
|
188
|
+
print(" ```", file=sys.stderr)
|
|
189
|
+
for line in detail_frag.split("\n"):
|
|
190
|
+
print(f" {line}", file=sys.stderr)
|
|
191
|
+
print(" ```", file=sys.stderr)
|
|
192
|
+
|
|
193
|
+
elif "event_indices" in event:
|
|
194
|
+
indices = event.get("event_indices", [event["event_index"]])
|
|
195
|
+
print(
|
|
196
|
+
f" Events #{indices[0]}-#{indices[-1]} ({reason}): "
|
|
197
|
+
f"{event['line_count']} lines, {event['char_count']} chars",
|
|
198
|
+
file=sys.stderr,
|
|
199
|
+
)
|
|
200
|
+
|
|
201
|
+
else:
|
|
202
|
+
new_fragment = event["newFragment"].replace("\n", "\n ")
|
|
203
|
+
print(
|
|
204
|
+
f" Event #{event['event_index']} ({reason}): "
|
|
205
|
+
f"{event['line_count']} lines, {event['char_count']} chars - "
|
|
206
|
+
f"newFragment:\n ```\n {new_fragment}\n ```",
|
|
207
|
+
file=sys.stderr,
|
|
208
|
+
)
|
|
209
|
+
|
|
210
|
+
|
|
211
|
+
def display_suspicious_events(
|
|
212
|
+
suspicious_events: list[dict[str, Any]], show_details: bool
|
|
213
|
+
) -> None:
|
|
214
|
+
"""
|
|
215
|
+
Display all suspicious events or success message.
|
|
216
|
+
|
|
217
|
+
Parameters
|
|
218
|
+
----------
|
|
219
|
+
suspicious_events : list[dict[str, Any]]
|
|
220
|
+
List of suspicious events detected
|
|
221
|
+
show_details : bool
|
|
222
|
+
Whether to show detailed autocomplete events
|
|
223
|
+
"""
|
|
224
|
+
if suspicious_events:
|
|
225
|
+
print("\nSuspicious copy-paste events detected:", file=sys.stderr)
|
|
226
|
+
for event in suspicious_events:
|
|
227
|
+
display_suspicious_event(event, show_details)
|
|
228
|
+
else:
|
|
229
|
+
print("Success! No suspicious events detected.", file=sys.stderr)
|
|
230
|
+
|
|
231
|
+
|
|
232
|
+
def write_json_output(
|
|
233
|
+
output_path: Path,
|
|
234
|
+
document: str,
|
|
235
|
+
time_info: dict[str, Any] | None,
|
|
236
|
+
suspicious_events: list[dict[str, Any]],
|
|
237
|
+
) -> None:
|
|
238
|
+
"""
|
|
239
|
+
Write verification results to JSON file.
|
|
240
|
+
|
|
241
|
+
Parameters
|
|
242
|
+
----------
|
|
243
|
+
output_path : Path
|
|
244
|
+
Path to output JSON file
|
|
245
|
+
document : str
|
|
246
|
+
Document that was processed
|
|
247
|
+
time_info : dict[str, Any] | None
|
|
248
|
+
Time information from verification
|
|
249
|
+
suspicious_events : list[dict[str, Any]]
|
|
250
|
+
List of suspicious events detected
|
|
251
|
+
|
|
252
|
+
Raises
|
|
253
|
+
------
|
|
254
|
+
Exception
|
|
255
|
+
If file writing fails
|
|
256
|
+
"""
|
|
257
|
+
results = {
|
|
258
|
+
"document": document,
|
|
259
|
+
"time_info": time_info,
|
|
260
|
+
"suspicious_events": suspicious_events,
|
|
261
|
+
}
|
|
262
|
+
|
|
263
|
+
output_path.parent.mkdir(parents=True, exist_ok=True)
|
|
264
|
+
with open(output_path, "w") as f:
|
|
265
|
+
json.dump(results, f, indent=2)
|
|
266
|
+
print(f"Results written to {output_path}", file=sys.stderr)
|
|
267
|
+
|
|
268
|
+
|
|
269
|
+
def create_parser() -> argparse.ArgumentParser:
|
|
270
|
+
"""
|
|
271
|
+
Create and configure the argument parser.
|
|
272
|
+
|
|
273
|
+
Returns
|
|
274
|
+
-------
|
|
275
|
+
argparse.ArgumentParser
|
|
276
|
+
Configured argument parser
|
|
277
|
+
"""
|
|
278
|
+
parser = argparse.ArgumentParser(
|
|
279
|
+
description="Process and verify code recorder JSONL files"
|
|
280
|
+
)
|
|
281
|
+
parser.add_argument(
|
|
282
|
+
"jsonl_file",
|
|
283
|
+
type=Path,
|
|
284
|
+
help="Path to the compressed JSONL file (*.recording.jsonl.gz)",
|
|
285
|
+
)
|
|
286
|
+
parser.add_argument(
|
|
287
|
+
"template_file",
|
|
288
|
+
type=Path,
|
|
289
|
+
help="Path to the initial template file that was recorded",
|
|
290
|
+
)
|
|
291
|
+
parser.add_argument(
|
|
292
|
+
"-t",
|
|
293
|
+
"--time-limit",
|
|
294
|
+
type=int,
|
|
295
|
+
default=None,
|
|
296
|
+
help="Maximum allowed time in minutes between first and last edit. "
|
|
297
|
+
"If exceeded, recording is flagged.",
|
|
298
|
+
)
|
|
299
|
+
parser.add_argument(
|
|
300
|
+
"-d",
|
|
301
|
+
"--document",
|
|
302
|
+
type=str,
|
|
303
|
+
default=None,
|
|
304
|
+
help="Document path or filename to process from the recording. "
|
|
305
|
+
"Defaults to the document whose extension matches the template file.",
|
|
306
|
+
)
|
|
307
|
+
parser.add_argument(
|
|
308
|
+
"-o",
|
|
309
|
+
"--output-json",
|
|
310
|
+
type=Path,
|
|
311
|
+
default=None,
|
|
312
|
+
help="Path to output JSON file with verification results "
|
|
313
|
+
"(time info and suspicious events).",
|
|
314
|
+
)
|
|
315
|
+
parser.add_argument(
|
|
316
|
+
"-s",
|
|
317
|
+
"--show-autocomplete-details",
|
|
318
|
+
action="store_true",
|
|
319
|
+
help="Show individual auto-complete events in addition to "
|
|
320
|
+
"aggregate statistics",
|
|
321
|
+
)
|
|
322
|
+
return parser
|
|
323
|
+
|
|
324
|
+
|
|
325
|
+
def main() -> int:
|
|
326
|
+
"""
|
|
327
|
+
Main entry point for the CLI application.
|
|
328
|
+
|
|
329
|
+
Returns
|
|
330
|
+
-------
|
|
331
|
+
int
|
|
332
|
+
Exit code (0 for success, 1 for errors)
|
|
333
|
+
"""
|
|
334
|
+
parser = create_parser()
|
|
335
|
+
args = parser.parse_args()
|
|
336
|
+
|
|
337
|
+
# Load JSONL file
|
|
338
|
+
try:
|
|
339
|
+
json_data = load_jsonl(args.jsonl_file)
|
|
340
|
+
except FileNotFoundError as e:
|
|
341
|
+
print(f"Error: {e}", file=sys.stderr)
|
|
342
|
+
return 1
|
|
343
|
+
except (ValueError, IOError) as e:
|
|
344
|
+
print(f"Error loading JSONL file: {e}", file=sys.stderr)
|
|
345
|
+
return 1
|
|
346
|
+
|
|
347
|
+
# Resolve which document to process
|
|
348
|
+
recorded_docs = get_recorded_documents(json_data)
|
|
349
|
+
try:
|
|
350
|
+
target_document = resolve_document(
|
|
351
|
+
recorded_docs, args.template_file, args.document
|
|
352
|
+
)
|
|
353
|
+
except ValueError as e:
|
|
354
|
+
print(f"Error determining document: {e}", file=sys.stderr)
|
|
355
|
+
return 1
|
|
356
|
+
|
|
357
|
+
# Filter events for target document
|
|
358
|
+
doc_events = filter_events_by_document(json_data, target_document)
|
|
359
|
+
if target_document and not doc_events:
|
|
360
|
+
print(
|
|
361
|
+
f"Error: No events found for document '{target_document}'",
|
|
362
|
+
file=sys.stderr,
|
|
363
|
+
)
|
|
364
|
+
return 1
|
|
365
|
+
|
|
366
|
+
print(f"Processing: {target_document or args.template_file}", file=sys.stderr)
|
|
367
|
+
|
|
368
|
+
# Read template file
|
|
369
|
+
try:
|
|
370
|
+
template_data = args.template_file.read_text()
|
|
371
|
+
except FileNotFoundError:
|
|
372
|
+
print(
|
|
373
|
+
f"Error: Template file not found: {args.template_file}", file=sys.stderr
|
|
374
|
+
)
|
|
375
|
+
return 1
|
|
376
|
+
except Exception as e:
|
|
377
|
+
print(f"Error reading template file: {e}", file=sys.stderr)
|
|
378
|
+
return 1
|
|
379
|
+
|
|
380
|
+
# Check and display time information
|
|
381
|
+
time_info = check_time_limit(doc_events, args.time_limit)
|
|
382
|
+
display_time_info(time_info)
|
|
383
|
+
|
|
384
|
+
# Verify and process the recording
|
|
385
|
+
try:
|
|
386
|
+
template_data, suspicious_events = verify(template_data, doc_events)
|
|
387
|
+
reconstructed = reconstruct_file_from_events(
|
|
388
|
+
doc_events, template_data, document_path=target_document
|
|
389
|
+
)
|
|
390
|
+
print(reconstructed)
|
|
391
|
+
|
|
392
|
+
# Display suspicious events
|
|
393
|
+
display_suspicious_events(suspicious_events, args.show_autocomplete_details)
|
|
394
|
+
|
|
395
|
+
# Write JSON output if requested
|
|
396
|
+
if args.output_json:
|
|
397
|
+
try:
|
|
398
|
+
write_json_output(
|
|
399
|
+
args.output_json,
|
|
400
|
+
target_document or str(args.template_file),
|
|
401
|
+
time_info,
|
|
402
|
+
suspicious_events,
|
|
403
|
+
)
|
|
404
|
+
except Exception as e:
|
|
405
|
+
print(f"Error writing JSON output: {e}", file=sys.stderr)
|
|
406
|
+
return 1
|
|
407
|
+
|
|
408
|
+
except ValueError as e:
|
|
409
|
+
print("File failed verification from template!", file=sys.stderr)
|
|
410
|
+
print(str(e), file=sys.stderr)
|
|
411
|
+
try:
|
|
412
|
+
print(template_diff(template_data, doc_events), file=sys.stderr)
|
|
413
|
+
except Exception:
|
|
414
|
+
pass
|
|
415
|
+
return 1
|
|
416
|
+
except Exception as e:
|
|
417
|
+
print(f"Error processing file: {type(e).__name__}: {e}", file=sys.stderr)
|
|
418
|
+
return 1
|
|
419
|
+
|
|
420
|
+
return 0
|
|
421
|
+
|
|
422
|
+
|
|
423
|
+
if __name__ == "__main__":
|
|
424
|
+
sys.exit(main())
|
|
@@ -1,183 +0,0 @@
|
|
|
1
|
-
import argparse
|
|
2
|
-
import sys
|
|
3
|
-
import json
|
|
4
|
-
from datetime import datetime
|
|
5
|
-
from pathlib import Path
|
|
6
|
-
from .api.load import load_jsonl
|
|
7
|
-
from .api.verify import verify, template_diff, check_time_limit
|
|
8
|
-
from .api.build import reconstruct_file_from_events
|
|
9
|
-
|
|
10
|
-
|
|
11
|
-
def main():
|
|
12
|
-
parser = argparse.ArgumentParser(
|
|
13
|
-
description="Process and verify code recorder JSONL files"
|
|
14
|
-
)
|
|
15
|
-
parser.add_argument(
|
|
16
|
-
"jsonl_file",
|
|
17
|
-
type=Path,
|
|
18
|
-
help="Path to the compressed JSONL file (*.recording.jsonl.gz)",
|
|
19
|
-
)
|
|
20
|
-
parser.add_argument(
|
|
21
|
-
"template_file",
|
|
22
|
-
type=Path,
|
|
23
|
-
help="Path to the initial template file that was recorded",
|
|
24
|
-
)
|
|
25
|
-
parser.add_argument(
|
|
26
|
-
"--time-limit",
|
|
27
|
-
type=int,
|
|
28
|
-
default=None,
|
|
29
|
-
help="Maximum allowed time in minutes between first and last edit. If exceeded, recording is flagged.",
|
|
30
|
-
)
|
|
31
|
-
parser.add_argument(
|
|
32
|
-
"--document",
|
|
33
|
-
type=str,
|
|
34
|
-
default=None,
|
|
35
|
-
help=("Document path or filename to process from the recording. "
|
|
36
|
-
"Defaults to the document whose extension matches the template file."),
|
|
37
|
-
)
|
|
38
|
-
parser.add_argument(
|
|
39
|
-
"--output-json",
|
|
40
|
-
type=Path,
|
|
41
|
-
default=None,
|
|
42
|
-
help="Path to output JSON file with verification results (time info and suspicious events).",
|
|
43
|
-
)
|
|
44
|
-
|
|
45
|
-
args = parser.parse_args()
|
|
46
|
-
|
|
47
|
-
# Load JSONL file first to get document path
|
|
48
|
-
try:
|
|
49
|
-
jsonData = load_jsonl(args.jsonl_file)
|
|
50
|
-
except FileNotFoundError as e:
|
|
51
|
-
print(f"Error: {e}", file=sys.stderr)
|
|
52
|
-
return 1
|
|
53
|
-
except (ValueError, IOError) as e:
|
|
54
|
-
print(f"Error loading JSONL file: {e}", file=sys.stderr)
|
|
55
|
-
return 1
|
|
56
|
-
|
|
57
|
-
# Decide which recorded document to process
|
|
58
|
-
documents = {e.get("document") for e in jsonData if "document" in e and e.get("document") is not None}
|
|
59
|
-
recorded_docs = sorted([d for d in documents if d is not None])
|
|
60
|
-
|
|
61
|
-
def resolve_document(docs: list[str], template_path: Path, override: str | None) -> str | None:
|
|
62
|
-
if not docs:
|
|
63
|
-
return None
|
|
64
|
-
|
|
65
|
-
if override:
|
|
66
|
-
matches = [d for d in docs if d.endswith(override) or Path(d).name == override]
|
|
67
|
-
if not matches:
|
|
68
|
-
raise ValueError(
|
|
69
|
-
f"No document in recording matches '{override}'. Available: {docs}"
|
|
70
|
-
)
|
|
71
|
-
if len(matches) > 1:
|
|
72
|
-
raise ValueError(
|
|
73
|
-
f"Ambiguous document override '{override}'. Matches: {matches}"
|
|
74
|
-
)
|
|
75
|
-
return matches[0]
|
|
76
|
-
|
|
77
|
-
template_ext = template_path.suffix
|
|
78
|
-
ext_matches = [d for d in docs if Path(d).suffix == template_ext]
|
|
79
|
-
if len(ext_matches) == 1:
|
|
80
|
-
return ext_matches[0]
|
|
81
|
-
if len(ext_matches) > 1:
|
|
82
|
-
raise ValueError(
|
|
83
|
-
f"Multiple documents share extension '{template_ext}': {ext_matches}. "
|
|
84
|
-
"Use --document to choose one."
|
|
85
|
-
)
|
|
86
|
-
|
|
87
|
-
if len(docs) == 1:
|
|
88
|
-
return docs[0]
|
|
89
|
-
|
|
90
|
-
raise ValueError(
|
|
91
|
-
"Could not determine document to process. Use --document to select one. "
|
|
92
|
-
f"Available documents: {docs}"
|
|
93
|
-
)
|
|
94
|
-
|
|
95
|
-
try:
|
|
96
|
-
target_document = resolve_document(recorded_docs, args.template_file, args.document)
|
|
97
|
-
except ValueError as e:
|
|
98
|
-
print(f"Error determining document: {e}", file=sys.stderr)
|
|
99
|
-
return 1
|
|
100
|
-
|
|
101
|
-
if target_document:
|
|
102
|
-
doc_events = tuple(e for e in jsonData if e.get("document") == target_document)
|
|
103
|
-
if not doc_events:
|
|
104
|
-
print(f"Error: No events found for document '{target_document}'", file=sys.stderr)
|
|
105
|
-
return 1
|
|
106
|
-
else:
|
|
107
|
-
doc_events = jsonData
|
|
108
|
-
|
|
109
|
-
print(f"Processing: {target_document or args.template_file}", file=sys.stderr)
|
|
110
|
-
|
|
111
|
-
# Read template file
|
|
112
|
-
try:
|
|
113
|
-
templateData = args.template_file.read_text()
|
|
114
|
-
except FileNotFoundError:
|
|
115
|
-
print(f"Error: Template file not found: {args.template_file}", file=sys.stderr)
|
|
116
|
-
return 1
|
|
117
|
-
except Exception as e:
|
|
118
|
-
print(f"Error reading template file: {e}", file=sys.stderr)
|
|
119
|
-
return 1
|
|
120
|
-
|
|
121
|
-
# Check time limit and display elapsed time
|
|
122
|
-
time_info = check_time_limit(doc_events, args.time_limit)
|
|
123
|
-
if time_info:
|
|
124
|
-
print(f"Elapsed editing time: {time_info['minutes_elapsed']} minutes", file=sys.stderr)
|
|
125
|
-
print(f"Time span (first to last edit): {(datetime.fromisoformat(time_info['last_timestamp'].replace('Z', '+00:00')) - datetime.fromisoformat(time_info['first_timestamp'].replace('Z', '+00:00'))).total_seconds() / 60:.2f} minutes", file=sys.stderr)
|
|
126
|
-
if time_info['exceeds_limit']:
|
|
127
|
-
print(f"\nTime limit exceeded!", file=sys.stderr)
|
|
128
|
-
print(f" Limit: {time_info['time_limit_minutes']} minutes", file=sys.stderr)
|
|
129
|
-
print(f" First edit: {time_info['first_timestamp']}", file=sys.stderr)
|
|
130
|
-
print(f" Last edit: {time_info['last_timestamp']}", file=sys.stderr)
|
|
131
|
-
|
|
132
|
-
# Verify and process
|
|
133
|
-
try:
|
|
134
|
-
templateData, suspicious_events = verify(templateData, doc_events)
|
|
135
|
-
print(reconstruct_file_from_events(doc_events, templateData, document_path=target_document))
|
|
136
|
-
|
|
137
|
-
# Prepare results for JSON output
|
|
138
|
-
results = {
|
|
139
|
-
"document": target_document or str(args.template_file),
|
|
140
|
-
"time_info": time_info,
|
|
141
|
-
"suspicious_events": suspicious_events,
|
|
142
|
-
}
|
|
143
|
-
|
|
144
|
-
if suspicious_events:
|
|
145
|
-
print("\nSuspicious copy-paste events detected:", file=sys.stderr)
|
|
146
|
-
for ev in suspicious_events:
|
|
147
|
-
reason = ev.get('reason', 'unknown')
|
|
148
|
-
indices = ev.get('event_indices', [ev['event_index']])
|
|
149
|
-
if len(indices) > 1:
|
|
150
|
-
print(f" Events #{indices[0]}-#{indices[-1]} ({reason}): "
|
|
151
|
-
f"{ev['line_count']} lines, {ev['char_count']} chars", file=sys.stderr)
|
|
152
|
-
else:
|
|
153
|
-
print(f" Event #{ev['event_index']} ({reason}): "
|
|
154
|
-
f"{ev['line_count']} lines, {ev['char_count']} chars - "
|
|
155
|
-
f"newFragment:\n```\n{ev['newFragment']}\n```", file=sys.stderr)
|
|
156
|
-
else:
|
|
157
|
-
print("Success! No suspicious events detected.", file=sys.stderr)
|
|
158
|
-
|
|
159
|
-
# Write JSON output if requested
|
|
160
|
-
if args.output_json:
|
|
161
|
-
try:
|
|
162
|
-
args.output_json.parent.mkdir(parents=True, exist_ok=True)
|
|
163
|
-
with open(args.output_json, 'w') as f:
|
|
164
|
-
json.dump(results, f, indent=2)
|
|
165
|
-
print(f"Results written to {args.output_json}", file=sys.stderr)
|
|
166
|
-
except Exception as e:
|
|
167
|
-
print(f"Error writing JSON output: {e}", file=sys.stderr)
|
|
168
|
-
return 1
|
|
169
|
-
except ValueError as e:
|
|
170
|
-
print("File failed verification from template!", file=sys.stderr)
|
|
171
|
-
print(str(e), file=sys.stderr)
|
|
172
|
-
try:
|
|
173
|
-
print(template_diff(templateData, doc_events), file=sys.stderr)
|
|
174
|
-
except Exception:
|
|
175
|
-
pass
|
|
176
|
-
return 1
|
|
177
|
-
except Exception as e:
|
|
178
|
-
print(f"Error processing file: {type(e).__name__}: {e}", file=sys.stderr)
|
|
179
|
-
return 1
|
|
180
|
-
|
|
181
|
-
|
|
182
|
-
if __name__ == "__main__":
|
|
183
|
-
sys.exit(main())
|
|
File without changes
|
|
File without changes
|
|
File without changes
|