cr-proc 0.1.6__tar.gz → 0.1.8__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {cr_proc-0.1.6 → cr_proc-0.1.8}/PKG-INFO +1 -1
- {cr_proc-0.1.6 → cr_proc-0.1.8}/pyproject.toml +1 -1
- {cr_proc-0.1.6 → cr_proc-0.1.8}/src/code_recorder_processor/api/verify.py +192 -94
- {cr_proc-0.1.6 → cr_proc-0.1.8}/src/code_recorder_processor/cli.py +171 -17
- {cr_proc-0.1.6 → cr_proc-0.1.8}/README.md +0 -0
- {cr_proc-0.1.6 → cr_proc-0.1.8}/src/code_recorder_processor/__init__.py +0 -0
- {cr_proc-0.1.6 → cr_proc-0.1.8}/src/code_recorder_processor/api/build.py +0 -0
- {cr_proc-0.1.6 → cr_proc-0.1.8}/src/code_recorder_processor/api/load.py +0 -0
|
@@ -2,6 +2,13 @@ from typing import Any
|
|
|
2
2
|
from datetime import datetime
|
|
3
3
|
import difflib
|
|
4
4
|
|
|
5
|
+
# ============================================================================
|
|
6
|
+
# Constants for detection thresholds
|
|
7
|
+
# ============================================================================
|
|
8
|
+
MIN_WHITELIST_SIZE = 10 # Minimum fragment size to add to whitelist
|
|
9
|
+
MIN_MULTILINE_SIZE = 20 # Minimum size for multiline external paste detection
|
|
10
|
+
MIN_AUTOCOMPLETE_SIZE = 10 # Minimum size for autocomplete detection
|
|
11
|
+
MIN_RAPID_PASTE_CHARS = 5 # Minimum chars for a "paste" in rapid detection
|
|
5
12
|
|
|
6
13
|
def _normalize_newlines(text: str) -> str:
|
|
7
14
|
"""Normalize CRLF to LF to avoid offset and diff noise."""
|
|
@@ -152,13 +159,13 @@ def _build_document_states(jsonData: tuple[dict[str, Any], ...]) -> tuple[list[s
|
|
|
152
159
|
|
|
153
160
|
# Build whitelist of all content fragments seen
|
|
154
161
|
# Add both old and new fragments to whitelist for comprehensive coverage
|
|
155
|
-
if len(old_frag) >
|
|
162
|
+
if len(old_frag) > MIN_WHITELIST_SIZE:
|
|
156
163
|
content_whitelist.add(old_frag)
|
|
157
|
-
if len(new_frag) >
|
|
164
|
+
if len(new_frag) > MIN_WHITELIST_SIZE:
|
|
158
165
|
content_whitelist.add(new_frag)
|
|
159
166
|
|
|
160
167
|
# Also add the full document state to whitelist
|
|
161
|
-
if len(current_state) >
|
|
168
|
+
if len(current_state) > MIN_WHITELIST_SIZE:
|
|
162
169
|
content_whitelist.add(current_state)
|
|
163
170
|
|
|
164
171
|
return document_states, content_whitelist
|
|
@@ -191,65 +198,74 @@ def _detect_multiline_external_pastes(
|
|
|
191
198
|
"""
|
|
192
199
|
suspicious_events = []
|
|
193
200
|
|
|
201
|
+
# Build whitelist incrementally to only include content from BEFORE each event
|
|
202
|
+
past_whitelist = set()
|
|
203
|
+
|
|
194
204
|
for idx, event in enumerate(jsonData):
|
|
195
205
|
old_frag = _normalize_newlines(event.get("oldFragment", ""))
|
|
196
206
|
new_frag = _normalize_newlines(event.get("newFragment", ""))
|
|
197
207
|
|
|
198
208
|
# Skip if no actual change
|
|
199
209
|
if new_frag == old_frag or new_frag.strip() == "":
|
|
200
|
-
|
|
201
|
-
|
|
210
|
+
pass # Still add to whitelist below
|
|
202
211
|
# Only check multi-line content (more than 2 lines means at least 2 actual lines)
|
|
203
|
-
|
|
204
|
-
|
|
205
|
-
continue
|
|
206
|
-
|
|
207
|
-
# Check if the new content already existed in the document at any prior point
|
|
208
|
-
is_internal_copy = False
|
|
209
|
-
|
|
210
|
-
# Check against document state BEFORE this event
|
|
211
|
-
if idx > 0:
|
|
212
|
-
prior_state = document_states[idx - 1]
|
|
213
|
-
if new_frag in prior_state:
|
|
214
|
-
is_internal_copy = True
|
|
212
|
+
elif len(new_frag.split("\n")) > 2:
|
|
213
|
+
new_lines = new_frag.split("\n")
|
|
215
214
|
|
|
216
|
-
|
|
217
|
-
|
|
218
|
-
for hist_content in content_whitelist:
|
|
219
|
-
# Ignore tiny fragments
|
|
220
|
-
if len(hist_content) < 20:
|
|
221
|
-
continue
|
|
222
|
-
|
|
223
|
-
# Require substantial overlap in size to count as an internal copy
|
|
224
|
-
similar_length = (
|
|
225
|
-
len(hist_content) >= 0.8 * len(new_frag)
|
|
226
|
-
and len(hist_content) <= 1.25 * len(new_frag)
|
|
227
|
-
)
|
|
228
|
-
|
|
229
|
-
if new_frag == hist_content:
|
|
230
|
-
is_internal_copy = True
|
|
231
|
-
break
|
|
215
|
+
# Check if the new content already existed in the document at any prior point
|
|
216
|
+
is_internal_copy = False
|
|
232
217
|
|
|
233
|
-
|
|
218
|
+
# Check against document state BEFORE this event
|
|
219
|
+
if idx > 0:
|
|
220
|
+
prior_state = document_states[idx - 1]
|
|
221
|
+
if new_frag in prior_state:
|
|
234
222
|
is_internal_copy = True
|
|
235
|
-
break
|
|
236
223
|
|
|
237
|
-
|
|
238
|
-
|
|
239
|
-
|
|
224
|
+
# Also check against whitelist of content from BEFORE this event
|
|
225
|
+
if not is_internal_copy:
|
|
226
|
+
for hist_content in past_whitelist:
|
|
227
|
+
# Ignore tiny fragments - multiline external pastes should be significant
|
|
228
|
+
if len(hist_content) < MIN_MULTILINE_SIZE:
|
|
229
|
+
continue
|
|
230
|
+
|
|
231
|
+
# Require substantial overlap in size to count as an internal copy
|
|
232
|
+
similar_length = (
|
|
233
|
+
len(hist_content) >= 0.8 * len(new_frag)
|
|
234
|
+
and len(hist_content) <= 1.25 * len(new_frag)
|
|
235
|
+
)
|
|
236
|
+
|
|
237
|
+
if new_frag == hist_content:
|
|
238
|
+
is_internal_copy = True
|
|
239
|
+
break
|
|
240
|
+
|
|
241
|
+
if new_frag in hist_content and similar_length:
|
|
242
|
+
is_internal_copy = True
|
|
243
|
+
break
|
|
244
|
+
|
|
245
|
+
if hist_content in new_frag and similar_length:
|
|
246
|
+
is_internal_copy = True
|
|
247
|
+
break
|
|
248
|
+
|
|
249
|
+
# Also check if it's in the old fragment (internal move/copy)
|
|
250
|
+
if not is_internal_copy and old_frag and (new_frag in old_frag or old_frag in new_frag):
|
|
251
|
+
is_internal_copy = True
|
|
240
252
|
|
|
241
|
-
|
|
242
|
-
|
|
243
|
-
|
|
253
|
+
if not is_internal_copy:
|
|
254
|
+
suspicious_events.append({
|
|
255
|
+
"event_index": idx,
|
|
256
|
+
"line_count": len(new_lines),
|
|
257
|
+
"char_count": len(new_frag),
|
|
258
|
+
"reason": "multi-line external paste",
|
|
259
|
+
"newFragment": new_frag
|
|
260
|
+
})
|
|
244
261
|
|
|
245
|
-
|
|
246
|
-
|
|
247
|
-
|
|
248
|
-
|
|
249
|
-
|
|
250
|
-
|
|
251
|
-
|
|
252
|
-
})
|
|
262
|
+
# Add current event's content to whitelist for future events
|
|
263
|
+
if len(old_frag) > MIN_MULTILINE_SIZE:
|
|
264
|
+
past_whitelist.add(old_frag)
|
|
265
|
+
if len(new_frag) > MIN_MULTILINE_SIZE:
|
|
266
|
+
past_whitelist.add(new_frag)
|
|
267
|
+
if idx > 0 and len(document_states[idx - 1]) > MIN_MULTILINE_SIZE:
|
|
268
|
+
past_whitelist.add(document_states[idx - 1])
|
|
253
269
|
|
|
254
270
|
return suspicious_events
|
|
255
271
|
|
|
@@ -281,7 +297,7 @@ def _detect_rapid_paste_sequences(jsonData: tuple[dict[str, Any], ...]) -> list[
|
|
|
281
297
|
new_lines = new_frag.split("\n")
|
|
282
298
|
if len(new_lines) == 2:
|
|
283
299
|
# Heuristic: if it's more than a few characters, it might be pasted
|
|
284
|
-
if len(new_frag.strip()) >
|
|
300
|
+
if len(new_frag.strip()) > MIN_RAPID_PASTE_CHARS:
|
|
285
301
|
one_line_pastes.append({
|
|
286
302
|
"event_index": idx,
|
|
287
303
|
"timestamp": timestamp,
|
|
@@ -348,16 +364,22 @@ def _detect_fullline_autocomplete(
|
|
|
348
364
|
excluded_indices: set[int]
|
|
349
365
|
) -> list[dict[str, Any]]:
|
|
350
366
|
"""
|
|
351
|
-
Detect
|
|
367
|
+
Detect multi-line auto-complete events where the IDE/AI generates multiple complete lines.
|
|
368
|
+
|
|
369
|
+
Focuses on significant AI assistance where the system generates entire functions or blocks
|
|
370
|
+
(2+ lines) in a single completion event. This is distinct from basic IDE autocomplete
|
|
371
|
+
(e.g., finishing a function name).
|
|
352
372
|
|
|
353
373
|
At keystroke level, events show:
|
|
354
374
|
- Normal typing: oldFragment="" (empty), newFragment="X" (1 char)
|
|
355
|
-
-
|
|
375
|
+
- Basic autocomplete: oldFragment="" (empty), newFragment="function_name" (IDE suggests identifier)
|
|
376
|
+
- Full-line AI completion: oldFragment="" (empty), newFragment="def foo():\n pass" (entire function)
|
|
356
377
|
|
|
357
|
-
|
|
378
|
+
Full-line auto-complete is detected when:
|
|
358
379
|
- oldFragment is empty or very short (0-3 chars)
|
|
359
|
-
- newFragment
|
|
360
|
-
- newFragment contains
|
|
380
|
+
- newFragment generates 2+ complete lines
|
|
381
|
+
- newFragment contains complete statements (not just identifiers)
|
|
382
|
+
- Content represents meaningful code structure
|
|
361
383
|
- newFragment does NOT already exist in the document state
|
|
362
384
|
- Event not already flagged as external copy-paste
|
|
363
385
|
|
|
@@ -375,13 +397,17 @@ def _detect_fullline_autocomplete(
|
|
|
375
397
|
Returns
|
|
376
398
|
-------
|
|
377
399
|
list[dict[str, Any]]
|
|
378
|
-
List of suspected auto-complete events.
|
|
400
|
+
List of suspected multi-line auto-complete events.
|
|
379
401
|
"""
|
|
380
402
|
suspicious_events = []
|
|
381
403
|
|
|
404
|
+
# Build whitelist incrementally to only include content from BEFORE each event
|
|
405
|
+
past_whitelist = set()
|
|
406
|
+
|
|
382
407
|
for idx, event in enumerate(jsonData):
|
|
383
408
|
# Skip if already flagged by another detector
|
|
384
409
|
if idx in excluded_indices:
|
|
410
|
+
past_whitelist_update(idx, event, document_states, past_whitelist)
|
|
385
411
|
continue
|
|
386
412
|
|
|
387
413
|
old_frag = _normalize_newlines(event.get("oldFragment", ""))
|
|
@@ -389,71 +415,143 @@ def _detect_fullline_autocomplete(
|
|
|
389
415
|
|
|
390
416
|
# Skip first event (template) and no-change events
|
|
391
417
|
if idx == 0 or new_frag == old_frag:
|
|
418
|
+
past_whitelist_update(idx, event, document_states, past_whitelist)
|
|
392
419
|
continue
|
|
393
420
|
|
|
394
421
|
old_len = len(old_frag)
|
|
395
422
|
new_len = len(new_frag)
|
|
396
423
|
|
|
397
424
|
# At keystroke level, oldFragment is typically empty for insertions
|
|
398
|
-
# Allow up to 3 chars for prefix-based
|
|
425
|
+
# Allow up to 3 chars for prefix-based triggers (e.g., "de" -> "def")
|
|
399
426
|
if old_len > 3:
|
|
427
|
+
past_whitelist_update(idx, event, document_states, past_whitelist)
|
|
400
428
|
continue
|
|
401
429
|
|
|
402
|
-
#
|
|
403
|
-
#
|
|
404
|
-
if
|
|
405
|
-
|
|
430
|
+
# Check line count - we care about complete statements
|
|
431
|
+
# Multi-line is obviously concerning, but single-line with a complete statement
|
|
432
|
+
# (like "if x: return True") is also suspicious if it came from autocomplete
|
|
433
|
+
new_lines = [n for n in new_frag.split("\n") if n.strip() != ""]
|
|
406
434
|
|
|
407
|
-
#
|
|
408
|
-
#
|
|
409
|
-
#
|
|
410
|
-
|
|
411
|
-
|
|
435
|
+
# For single-line completions, be more strict about what we flag
|
|
436
|
+
# We only flag if it's a complete statement with keywords, not just identifier completion
|
|
437
|
+
is_single_line = len(new_lines) <= 2 # 2 elements = 1 line + trailing \n
|
|
438
|
+
is_multi_line = len(new_lines) >= 3 # 3+ elements = 2+ actual lines
|
|
439
|
+
|
|
440
|
+
if not (is_single_line or is_multi_line):
|
|
441
|
+
# Shouldn't happen, but skip if malformed
|
|
442
|
+
past_whitelist_update(idx, event, document_states, past_whitelist)
|
|
412
443
|
continue
|
|
413
444
|
|
|
414
445
|
# The new fragment should not be just whitespace
|
|
415
446
|
if not new_frag.strip():
|
|
447
|
+
past_whitelist_update(idx, event, document_states, past_whitelist)
|
|
416
448
|
continue
|
|
417
449
|
|
|
418
450
|
# Check if the new fragment contains code structure indicators
|
|
419
|
-
# These strongly suggest IDE/AI auto-completion of code
|
|
420
|
-
|
|
421
|
-
"
|
|
422
|
-
"
|
|
423
|
-
"
|
|
424
|
-
"
|
|
425
|
-
"
|
|
426
|
-
"
|
|
427
|
-
"
|
|
428
|
-
"
|
|
429
|
-
"
|
|
430
|
-
"
|
|
451
|
+
# These strongly suggest IDE/AI auto-completion of actual code (not just identifiers)
|
|
452
|
+
complete_statement_indicators = [
|
|
453
|
+
":", # Block statement (if:, for:, def:, class:, while:, with:, etc.)
|
|
454
|
+
"return", # Return statement
|
|
455
|
+
"def ", # Function definition
|
|
456
|
+
"class ", # Class definition
|
|
457
|
+
"if ", # If statement
|
|
458
|
+
"for ", # For loop
|
|
459
|
+
"while ", # While loop
|
|
460
|
+
"try:", # Try block
|
|
461
|
+
"except", # Exception handling
|
|
462
|
+
"import ", # Import statement
|
|
463
|
+
"=", # Assignment
|
|
431
464
|
]
|
|
432
465
|
|
|
433
|
-
|
|
466
|
+
has_complete_statement = any(indicator in new_frag for indicator in complete_statement_indicators)
|
|
434
467
|
|
|
435
|
-
|
|
436
|
-
|
|
437
|
-
|
|
438
|
-
|
|
468
|
+
if not has_complete_statement:
|
|
469
|
+
# No complete statement - skip basic identifier completion
|
|
470
|
+
past_whitelist_update(idx, event, document_states, past_whitelist)
|
|
471
|
+
continue
|
|
439
472
|
|
|
440
|
-
|
|
441
|
-
|
|
442
|
-
|
|
473
|
+
# Minimum size for meaningful completion
|
|
474
|
+
if new_len < MIN_AUTOCOMPLETE_SIZE:
|
|
475
|
+
past_whitelist_update(idx, event, document_states, past_whitelist)
|
|
476
|
+
continue
|
|
477
|
+
|
|
478
|
+
# For multi-line: maximum size to distinguish from external pastes
|
|
479
|
+
# External pastes are typically much larger (100+ chars)
|
|
480
|
+
# Multi-line completions are usually 20-300 chars for a small function/block
|
|
481
|
+
if is_multi_line and new_len > 300:
|
|
482
|
+
past_whitelist_update(idx, event, document_states, past_whitelist)
|
|
483
|
+
continue
|
|
484
|
+
|
|
485
|
+
# For single-line: could be larger due to chained methods or long statements
|
|
486
|
+
# but cap at 200 chars to avoid flagging user-typed long lines
|
|
487
|
+
if is_single_line and new_len > 200:
|
|
488
|
+
past_whitelist_update(idx, event, document_states, past_whitelist)
|
|
489
|
+
continue
|
|
490
|
+
|
|
491
|
+
# Check if this content already existed in the document state BEFORE this event
|
|
492
|
+
is_internal_copy = False
|
|
493
|
+
|
|
494
|
+
if idx > 0:
|
|
495
|
+
prior_state = document_states[idx - 1]
|
|
496
|
+
if new_frag in prior_state:
|
|
497
|
+
is_internal_copy = True
|
|
498
|
+
|
|
499
|
+
# Also check against whitelist of content from BEFORE this event
|
|
500
|
+
if not is_internal_copy:
|
|
501
|
+
for hist_content in past_whitelist:
|
|
502
|
+
# Ignore tiny fragments
|
|
503
|
+
if len(hist_content) < MIN_AUTOCOMPLETE_SIZE:
|
|
504
|
+
continue
|
|
505
|
+
|
|
506
|
+
# Check for exact match or significant overlap
|
|
507
|
+
if new_frag == hist_content:
|
|
443
508
|
is_internal_copy = True
|
|
509
|
+
break
|
|
444
510
|
|
|
445
|
-
|
|
446
|
-
|
|
447
|
-
|
|
448
|
-
|
|
449
|
-
|
|
450
|
-
|
|
451
|
-
|
|
452
|
-
|
|
511
|
+
# Check for substring matches with similar length
|
|
512
|
+
similar_length = (
|
|
513
|
+
len(hist_content) >= 0.8 * len(new_frag)
|
|
514
|
+
and len(hist_content) <= 1.25 * len(new_frag)
|
|
515
|
+
)
|
|
516
|
+
|
|
517
|
+
if (new_frag in hist_content or hist_content in new_frag) and similar_length:
|
|
518
|
+
is_internal_copy = True
|
|
519
|
+
break
|
|
520
|
+
|
|
521
|
+
if not is_internal_copy:
|
|
522
|
+
line_desc = "line" if is_single_line else "lines"
|
|
523
|
+
suspicious_events.append({
|
|
524
|
+
"event_index": idx,
|
|
525
|
+
"line_count": len(new_lines),
|
|
526
|
+
"char_count": new_len,
|
|
527
|
+
"reason": f"complete statement auto-complete (AI assistance)",
|
|
528
|
+
"newFragment": new_frag,
|
|
529
|
+
})
|
|
530
|
+
|
|
531
|
+
# Add current event's content to whitelist for future events
|
|
532
|
+
past_whitelist_update(idx, event, document_states, past_whitelist)
|
|
453
533
|
|
|
454
534
|
return suspicious_events
|
|
455
535
|
|
|
456
536
|
|
|
537
|
+
def past_whitelist_update(
|
|
538
|
+
idx: int,
|
|
539
|
+
event: dict[str, Any],
|
|
540
|
+
document_states: list[str],
|
|
541
|
+
past_whitelist: set[str]
|
|
542
|
+
) -> None:
|
|
543
|
+
"""Helper to update the past_whitelist with content from current event."""
|
|
544
|
+
old_frag = _normalize_newlines(event.get("oldFragment", ""))
|
|
545
|
+
new_frag = _normalize_newlines(event.get("newFragment", ""))
|
|
546
|
+
|
|
547
|
+
if len(old_frag) > MIN_AUTOCOMPLETE_SIZE:
|
|
548
|
+
past_whitelist.add(old_frag)
|
|
549
|
+
if len(new_frag) > MIN_AUTOCOMPLETE_SIZE:
|
|
550
|
+
past_whitelist.add(new_frag)
|
|
551
|
+
if idx < len(document_states) and len(document_states[idx]) > MIN_AUTOCOMPLETE_SIZE:
|
|
552
|
+
past_whitelist.add(document_states[idx])
|
|
553
|
+
|
|
554
|
+
|
|
457
555
|
def detect_external_copypaste(jsonData: tuple[dict[str, Any], ...]) -> list[dict[str, Any]]:
|
|
458
556
|
"""
|
|
459
557
|
Detect copy-paste events from external sources and AI-assisted coding patterns.
|
|
@@ -1,6 +1,8 @@
|
|
|
1
1
|
import argparse
|
|
2
2
|
import json
|
|
3
|
+
import os
|
|
3
4
|
import sys
|
|
5
|
+
import time
|
|
4
6
|
from datetime import datetime
|
|
5
7
|
from pathlib import Path
|
|
6
8
|
from typing import Any
|
|
@@ -268,6 +270,8 @@ def write_json_output(
|
|
|
268
270
|
document: str,
|
|
269
271
|
time_info: dict[str, Any] | None,
|
|
270
272
|
suspicious_events: list[dict[str, Any]],
|
|
273
|
+
reconstructed_code: str,
|
|
274
|
+
verified: bool,
|
|
271
275
|
) -> None:
|
|
272
276
|
"""
|
|
273
277
|
Write verification results to JSON file.
|
|
@@ -282,6 +286,10 @@ def write_json_output(
|
|
|
282
286
|
Time information from verification
|
|
283
287
|
suspicious_events : list[dict[str, Any]]
|
|
284
288
|
List of suspicious events detected
|
|
289
|
+
reconstructed_code : str
|
|
290
|
+
The reconstructed file content
|
|
291
|
+
verified : bool
|
|
292
|
+
Whether the file passed verification
|
|
285
293
|
|
|
286
294
|
Raises
|
|
287
295
|
------
|
|
@@ -290,8 +298,10 @@ def write_json_output(
|
|
|
290
298
|
"""
|
|
291
299
|
results = {
|
|
292
300
|
"document": document,
|
|
301
|
+
"verified": verified,
|
|
293
302
|
"time_info": time_info,
|
|
294
303
|
"suspicious_events": suspicious_events,
|
|
304
|
+
"reconstructed_code": reconstructed_code,
|
|
295
305
|
}
|
|
296
306
|
|
|
297
307
|
output_path.parent.mkdir(parents=True, exist_ok=True)
|
|
@@ -300,6 +310,110 @@ def write_json_output(
|
|
|
300
310
|
print(f"Results written to {output_path}", file=sys.stderr)
|
|
301
311
|
|
|
302
312
|
|
|
313
|
+
def playback_recording(
|
|
314
|
+
json_data: tuple[dict[str, Any], ...],
|
|
315
|
+
document: str,
|
|
316
|
+
template: str,
|
|
317
|
+
speed: float = 1.0,
|
|
318
|
+
) -> None:
|
|
319
|
+
"""
|
|
320
|
+
Play back a recording, showing the code evolving in real-time.
|
|
321
|
+
|
|
322
|
+
Parameters
|
|
323
|
+
----------
|
|
324
|
+
json_data : tuple[dict[str, Any], ...]
|
|
325
|
+
The recording events
|
|
326
|
+
document : str
|
|
327
|
+
The document to play back
|
|
328
|
+
template : str
|
|
329
|
+
The initial template content
|
|
330
|
+
speed : float
|
|
331
|
+
Playback speed multiplier (1.0 = real-time, 2.0 = 2x speed, 0.5 = half speed)
|
|
332
|
+
"""
|
|
333
|
+
# Filter events for the target document
|
|
334
|
+
doc_events = [e for e in json_data if e.get("document") == document]
|
|
335
|
+
|
|
336
|
+
if not doc_events:
|
|
337
|
+
print(f"No events found for document: {document}", file=sys.stderr)
|
|
338
|
+
return
|
|
339
|
+
|
|
340
|
+
# Start with template
|
|
341
|
+
current_content = template
|
|
342
|
+
last_timestamp = None
|
|
343
|
+
|
|
344
|
+
def clear_screen():
|
|
345
|
+
"""Clear the terminal screen."""
|
|
346
|
+
os.system('cls' if os.name == 'nt' else 'clear')
|
|
347
|
+
|
|
348
|
+
def parse_timestamp(ts_str: str) -> datetime:
|
|
349
|
+
"""Parse ISO timestamp string."""
|
|
350
|
+
return datetime.fromisoformat(ts_str.replace("Z", "+00:00"))
|
|
351
|
+
|
|
352
|
+
# Show initial template
|
|
353
|
+
clear_screen()
|
|
354
|
+
print(f"=" * 80)
|
|
355
|
+
print(f"PLAYBACK: {document} (Speed: {speed}x)")
|
|
356
|
+
print(f"Event 0 / {len(doc_events)} - Initial Template")
|
|
357
|
+
print(f"=" * 80)
|
|
358
|
+
print(current_content)
|
|
359
|
+
print(f"\n{'=' * 80}")
|
|
360
|
+
print("Press Ctrl+C to stop playback")
|
|
361
|
+
time.sleep(2.0 / speed)
|
|
362
|
+
|
|
363
|
+
try:
|
|
364
|
+
for idx, event in enumerate(doc_events, 1):
|
|
365
|
+
old_frag = event.get("oldFragment", "")
|
|
366
|
+
new_frag = event.get("newFragment", "")
|
|
367
|
+
offset = event.get("offset", 0)
|
|
368
|
+
timestamp = event.get("timestamp")
|
|
369
|
+
|
|
370
|
+
# Calculate delay based on timestamp difference
|
|
371
|
+
if last_timestamp and timestamp:
|
|
372
|
+
try:
|
|
373
|
+
ts1 = parse_timestamp(last_timestamp)
|
|
374
|
+
ts2 = parse_timestamp(timestamp)
|
|
375
|
+
delay = (ts2 - ts1).total_seconds() / speed
|
|
376
|
+
# Cap delay at 5 seconds for very long pauses
|
|
377
|
+
delay = min(delay, 5.0)
|
|
378
|
+
if delay > 0:
|
|
379
|
+
time.sleep(delay)
|
|
380
|
+
except (ValueError, KeyError):
|
|
381
|
+
time.sleep(0.1 / speed)
|
|
382
|
+
else:
|
|
383
|
+
time.sleep(0.1 / speed)
|
|
384
|
+
|
|
385
|
+
last_timestamp = timestamp
|
|
386
|
+
|
|
387
|
+
# Apply the edit
|
|
388
|
+
if new_frag != old_frag:
|
|
389
|
+
current_content = current_content[:offset] + new_frag + current_content[offset + len(old_frag):]
|
|
390
|
+
|
|
391
|
+
# Display current state
|
|
392
|
+
clear_screen()
|
|
393
|
+
print(f"=" * 80)
|
|
394
|
+
print(f"PLAYBACK: {document} (Speed: {speed}x)")
|
|
395
|
+
print(f"Event {idx} / {len(doc_events)} - {timestamp or 'unknown time'}")
|
|
396
|
+
|
|
397
|
+
# Show what changed
|
|
398
|
+
if new_frag != old_frag:
|
|
399
|
+
change_type = "INSERT" if not old_frag else ("DELETE" if not new_frag else "REPLACE")
|
|
400
|
+
print(f"Action: {change_type} at offset {offset} ({len(new_frag)} chars)")
|
|
401
|
+
|
|
402
|
+
print(f"=" * 80)
|
|
403
|
+
print(current_content)
|
|
404
|
+
print(f"\n{'=' * 80}")
|
|
405
|
+
print(f"Progress: [{('#' * (idx * 40 // len(doc_events))).ljust(40)}] {idx}/{len(doc_events)}")
|
|
406
|
+
print("Press Ctrl+C to stop playback")
|
|
407
|
+
|
|
408
|
+
except KeyboardInterrupt:
|
|
409
|
+
print("\n\nPlayback stopped by user.", file=sys.stderr)
|
|
410
|
+
return
|
|
411
|
+
|
|
412
|
+
# Final summary
|
|
413
|
+
print("\n\nPlayback complete!", file=sys.stderr)
|
|
414
|
+
print(f"Total events: {len(doc_events)}", file=sys.stderr)
|
|
415
|
+
|
|
416
|
+
|
|
303
417
|
def create_parser() -> argparse.ArgumentParser:
|
|
304
418
|
"""
|
|
305
419
|
Create and configure the argument parser.
|
|
@@ -353,6 +467,24 @@ def create_parser() -> argparse.ArgumentParser:
|
|
|
353
467
|
help="Show individual auto-complete events in addition to "
|
|
354
468
|
"aggregate statistics",
|
|
355
469
|
)
|
|
470
|
+
parser.add_argument(
|
|
471
|
+
"-q",
|
|
472
|
+
"--quiet",
|
|
473
|
+
action="store_true",
|
|
474
|
+
help="Suppress output of reconstructed code to stdout",
|
|
475
|
+
)
|
|
476
|
+
parser.add_argument(
|
|
477
|
+
"-p",
|
|
478
|
+
"--playback",
|
|
479
|
+
action="store_true",
|
|
480
|
+
help="Play back the recording in real-time, showing code evolution",
|
|
481
|
+
)
|
|
482
|
+
parser.add_argument(
|
|
483
|
+
"--playback-speed",
|
|
484
|
+
type=float,
|
|
485
|
+
default=1.0,
|
|
486
|
+
help="Playback speed multiplier (1.0 = real-time, 2.0 = 2x speed, 0.5 = half speed)",
|
|
487
|
+
)
|
|
356
488
|
return parser
|
|
357
489
|
|
|
358
490
|
|
|
@@ -388,6 +520,21 @@ def main() -> int:
|
|
|
388
520
|
print(f"Error determining document: {e}", file=sys.stderr)
|
|
389
521
|
return 1
|
|
390
522
|
|
|
523
|
+
# Handle playback mode
|
|
524
|
+
if args.playback:
|
|
525
|
+
try:
|
|
526
|
+
template_content = args.template_file.read_text()
|
|
527
|
+
except FileNotFoundError:
|
|
528
|
+
print(f"Error: Template file not found: {args.template_file}", file=sys.stderr)
|
|
529
|
+
return 1
|
|
530
|
+
|
|
531
|
+
if target_document:
|
|
532
|
+
playback_recording(json_data, target_document, template_content, args.playback_speed)
|
|
533
|
+
return 0
|
|
534
|
+
else:
|
|
535
|
+
print("Error: No documents found in recording", file=sys.stderr)
|
|
536
|
+
return 1
|
|
537
|
+
|
|
391
538
|
# Filter events for target document
|
|
392
539
|
doc_events = filter_events_by_document(json_data, target_document)
|
|
393
540
|
if target_document and not doc_events:
|
|
@@ -416,29 +563,21 @@ def main() -> int:
|
|
|
416
563
|
display_time_info(time_info)
|
|
417
564
|
|
|
418
565
|
# Verify and process the recording
|
|
566
|
+
verified = False
|
|
567
|
+
reconstructed = ""
|
|
568
|
+
suspicious_events = []
|
|
419
569
|
try:
|
|
420
570
|
template_data, suspicious_events = verify(template_data, doc_events)
|
|
421
571
|
reconstructed = reconstruct_file_from_events(
|
|
422
572
|
doc_events, template_data, document_path=target_document
|
|
423
573
|
)
|
|
424
|
-
|
|
574
|
+
verified = True
|
|
575
|
+
if not args.quiet:
|
|
576
|
+
print(reconstructed)
|
|
425
577
|
|
|
426
578
|
# Display suspicious events
|
|
427
579
|
display_suspicious_events(suspicious_events, args.show_autocomplete_details)
|
|
428
580
|
|
|
429
|
-
# Write JSON output if requested
|
|
430
|
-
if args.output_json:
|
|
431
|
-
try:
|
|
432
|
-
write_json_output(
|
|
433
|
-
args.output_json,
|
|
434
|
-
target_document or str(args.template_file),
|
|
435
|
-
time_info,
|
|
436
|
-
suspicious_events,
|
|
437
|
-
)
|
|
438
|
-
except Exception as e:
|
|
439
|
-
print(f"Error writing JSON output: {e}", file=sys.stderr)
|
|
440
|
-
return 1
|
|
441
|
-
|
|
442
581
|
except ValueError as e:
|
|
443
582
|
print("File failed verification from template!", file=sys.stderr)
|
|
444
583
|
print(str(e), file=sys.stderr)
|
|
@@ -446,12 +585,27 @@ def main() -> int:
|
|
|
446
585
|
print(template_diff(template_data, doc_events), file=sys.stderr)
|
|
447
586
|
except Exception:
|
|
448
587
|
pass
|
|
449
|
-
|
|
588
|
+
verified = False
|
|
450
589
|
except Exception as e:
|
|
451
590
|
print(f"Error processing file: {type(e).__name__}: {e}", file=sys.stderr)
|
|
452
|
-
|
|
591
|
+
verified = False
|
|
592
|
+
|
|
593
|
+
# Write JSON output to file if requested
|
|
594
|
+
if args.output_json:
|
|
595
|
+
try:
|
|
596
|
+
write_json_output(
|
|
597
|
+
args.output_json,
|
|
598
|
+
target_document or str(args.template_file),
|
|
599
|
+
time_info,
|
|
600
|
+
suspicious_events,
|
|
601
|
+
reconstructed,
|
|
602
|
+
verified,
|
|
603
|
+
)
|
|
604
|
+
except Exception as e:
|
|
605
|
+
print(f"Error writing JSON output: {e}", file=sys.stderr)
|
|
606
|
+
return 1
|
|
453
607
|
|
|
454
|
-
return 0
|
|
608
|
+
return 0 if verified else 1
|
|
455
609
|
|
|
456
610
|
|
|
457
611
|
if __name__ == "__main__":
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|