cr-proc 0.1.5__tar.gz → 0.1.7__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {cr_proc-0.1.5 → cr_proc-0.1.7}/PKG-INFO +1 -1
- {cr_proc-0.1.5 → cr_proc-0.1.7}/pyproject.toml +1 -1
- {cr_proc-0.1.5 → cr_proc-0.1.7}/src/code_recorder_processor/api/verify.py +86 -45
- {cr_proc-0.1.5 → cr_proc-0.1.7}/src/code_recorder_processor/cli.py +36 -2
- {cr_proc-0.1.5 → cr_proc-0.1.7}/README.md +0 -0
- {cr_proc-0.1.5 → cr_proc-0.1.7}/src/code_recorder_processor/__init__.py +0 -0
- {cr_proc-0.1.5 → cr_proc-0.1.7}/src/code_recorder_processor/api/build.py +0 -0
- {cr_proc-0.1.5 → cr_proc-0.1.7}/src/code_recorder_processor/api/load.py +0 -0
|
@@ -313,6 +313,19 @@ def _detect_rapid_paste_sequences(jsonData: tuple[dict[str, Any], ...]) -> list[
|
|
|
313
313
|
# If we found 3+ one-line pastes within 1 second, flag it
|
|
314
314
|
if len(cluster) >= 3:
|
|
315
315
|
event_indices = [p["event_index"] for p in cluster]
|
|
316
|
+
|
|
317
|
+
# Build detailed events list for optional detailed review
|
|
318
|
+
detailed_events = []
|
|
319
|
+
for paste in cluster:
|
|
320
|
+
idx = paste["event_index"]
|
|
321
|
+
content = paste["content"]
|
|
322
|
+
detailed_events.append({
|
|
323
|
+
"event_index": idx,
|
|
324
|
+
"line_count": 1,
|
|
325
|
+
"char_count": len(content),
|
|
326
|
+
"newFragment": content,
|
|
327
|
+
})
|
|
328
|
+
|
|
316
329
|
suspicious_events.append({
|
|
317
330
|
"event_index": event_indices[0],
|
|
318
331
|
"event_indices": event_indices,
|
|
@@ -320,6 +333,7 @@ def _detect_rapid_paste_sequences(jsonData: tuple[dict[str, Any], ...]) -> list[
|
|
|
320
333
|
"char_count": sum(len(p["content"]) for p in cluster),
|
|
321
334
|
"reason": "rapid one-line pastes (AI indicator)",
|
|
322
335
|
"newFragment": f"{len(cluster)} one-line pastes in 1 second",
|
|
336
|
+
"detailed_events": detailed_events,
|
|
323
337
|
})
|
|
324
338
|
|
|
325
339
|
i = j if j > i + 1 else i + 1
|
|
@@ -334,16 +348,22 @@ def _detect_fullline_autocomplete(
|
|
|
334
348
|
excluded_indices: set[int]
|
|
335
349
|
) -> list[dict[str, Any]]:
|
|
336
350
|
"""
|
|
337
|
-
Detect
|
|
351
|
+
Detect multi-line auto-complete events where the IDE/AI generates multiple complete lines.
|
|
352
|
+
|
|
353
|
+
Focuses on significant AI assistance where the system generates entire functions or blocks
|
|
354
|
+
(2+ lines) in a single completion event. This is distinct from basic IDE autocomplete
|
|
355
|
+
(e.g., finishing a function name).
|
|
338
356
|
|
|
339
357
|
At keystroke level, events show:
|
|
340
358
|
- Normal typing: oldFragment="" (empty), newFragment="X" (1 char)
|
|
341
|
-
-
|
|
359
|
+
- Basic autocomplete: oldFragment="" (empty), newFragment="function_name" (IDE suggests identifier)
|
|
360
|
+
- Full-line AI completion: oldFragment="" (empty), newFragment="def foo():\n pass" (entire function)
|
|
342
361
|
|
|
343
|
-
|
|
362
|
+
Full-line auto-complete is detected when:
|
|
344
363
|
- oldFragment is empty or very short (0-3 chars)
|
|
345
|
-
- newFragment
|
|
346
|
-
- newFragment contains
|
|
364
|
+
- newFragment generates 2+ complete lines
|
|
365
|
+
- newFragment contains complete statements (not just identifiers)
|
|
366
|
+
- Content represents meaningful code structure
|
|
347
367
|
- newFragment does NOT already exist in the document state
|
|
348
368
|
- Event not already flagged as external copy-paste
|
|
349
369
|
|
|
@@ -361,7 +381,7 @@ def _detect_fullline_autocomplete(
|
|
|
361
381
|
Returns
|
|
362
382
|
-------
|
|
363
383
|
list[dict[str, Any]]
|
|
364
|
-
List of suspected auto-complete events.
|
|
384
|
+
List of suspected multi-line auto-complete events.
|
|
365
385
|
"""
|
|
366
386
|
suspicious_events = []
|
|
367
387
|
|
|
@@ -381,20 +401,22 @@ def _detect_fullline_autocomplete(
|
|
|
381
401
|
new_len = len(new_frag)
|
|
382
402
|
|
|
383
403
|
# At keystroke level, oldFragment is typically empty for insertions
|
|
384
|
-
# Allow up to 3 chars for prefix-based
|
|
404
|
+
# Allow up to 3 chars for prefix-based triggers (e.g., "de" -> "def")
|
|
385
405
|
if old_len > 3:
|
|
386
406
|
continue
|
|
387
407
|
|
|
388
|
-
#
|
|
389
|
-
#
|
|
390
|
-
if
|
|
391
|
-
|
|
408
|
+
# Check line count - we care about complete statements
|
|
409
|
+
# Multi-line is obviously concerning, but single-line with a complete statement
|
|
410
|
+
# (like "if x: return True") is also suspicious if it came from autocomplete
|
|
411
|
+
new_lines = [n for n in new_frag.split("\n") if n.strip() != ""]
|
|
392
412
|
|
|
393
|
-
#
|
|
394
|
-
#
|
|
395
|
-
#
|
|
396
|
-
|
|
397
|
-
|
|
413
|
+
# For single-line completions, be more strict about what we flag
|
|
414
|
+
# We only flag if it's a complete statement with keywords, not just identifier completion
|
|
415
|
+
is_single_line = len(new_lines) <= 2 # 2 elements = 1 line + trailing \n
|
|
416
|
+
is_multi_line = len(new_lines) >= 3 # 3+ elements = 2+ actual lines
|
|
417
|
+
|
|
418
|
+
if not (is_single_line or is_multi_line):
|
|
419
|
+
# Shouldn't happen, but skip if malformed
|
|
398
420
|
continue
|
|
399
421
|
|
|
400
422
|
# The new fragment should not be just whitespace
|
|
@@ -402,40 +424,59 @@ def _detect_fullline_autocomplete(
|
|
|
402
424
|
continue
|
|
403
425
|
|
|
404
426
|
# Check if the new fragment contains code structure indicators
|
|
405
|
-
# These strongly suggest IDE/AI auto-completion of code
|
|
406
|
-
|
|
407
|
-
"
|
|
408
|
-
"
|
|
409
|
-
"
|
|
410
|
-
"
|
|
411
|
-
"
|
|
412
|
-
"
|
|
413
|
-
"
|
|
414
|
-
"
|
|
415
|
-
"
|
|
416
|
-
"
|
|
427
|
+
# These strongly suggest IDE/AI auto-completion of actual code (not just identifiers)
|
|
428
|
+
complete_statement_indicators = [
|
|
429
|
+
":", # Block statement (if:, for:, def:, class:, while:, with:, etc.)
|
|
430
|
+
"return", # Return statement
|
|
431
|
+
"def ", # Function definition
|
|
432
|
+
"class ", # Class definition
|
|
433
|
+
"if ", # If statement
|
|
434
|
+
"for ", # For loop
|
|
435
|
+
"while ", # While loop
|
|
436
|
+
"try:", # Try block
|
|
437
|
+
"except", # Exception handling
|
|
438
|
+
"import ", # Import statement
|
|
439
|
+
"=", # Assignment
|
|
417
440
|
]
|
|
418
441
|
|
|
419
|
-
|
|
442
|
+
has_complete_statement = any(indicator in new_frag for indicator in complete_statement_indicators)
|
|
420
443
|
|
|
421
|
-
|
|
422
|
-
|
|
423
|
-
|
|
424
|
-
is_internal_copy = False
|
|
444
|
+
if not has_complete_statement:
|
|
445
|
+
# No complete statement - skip basic identifier completion
|
|
446
|
+
continue
|
|
425
447
|
|
|
426
|
-
|
|
427
|
-
|
|
428
|
-
|
|
429
|
-
is_internal_copy = True
|
|
448
|
+
# Minimum size for meaningful completion
|
|
449
|
+
if new_len < 10:
|
|
450
|
+
continue
|
|
430
451
|
|
|
431
|
-
|
|
432
|
-
|
|
433
|
-
|
|
434
|
-
|
|
435
|
-
|
|
436
|
-
|
|
437
|
-
|
|
438
|
-
|
|
452
|
+
# For multi-line: maximum size to distinguish from external pastes
|
|
453
|
+
# External pastes are typically much larger (100+ chars)
|
|
454
|
+
# Multi-line completions are usually 20-300 chars for a small function/block
|
|
455
|
+
if is_multi_line and new_len > 300:
|
|
456
|
+
continue
|
|
457
|
+
|
|
458
|
+
# For single-line: could be larger due to chained methods or long statements
|
|
459
|
+
# but cap at 200 chars to avoid flagging user-typed long lines
|
|
460
|
+
if is_single_line and new_len > 200:
|
|
461
|
+
continue
|
|
462
|
+
|
|
463
|
+
# Check if this content already existed in the document state BEFORE this event
|
|
464
|
+
is_internal_copy = False
|
|
465
|
+
|
|
466
|
+
if idx > 0:
|
|
467
|
+
prior_state = document_states[idx - 1]
|
|
468
|
+
if new_frag in prior_state:
|
|
469
|
+
is_internal_copy = True
|
|
470
|
+
|
|
471
|
+
if not is_internal_copy:
|
|
472
|
+
line_desc = "line" if is_single_line else "lines"
|
|
473
|
+
suspicious_events.append({
|
|
474
|
+
"event_index": idx,
|
|
475
|
+
"line_count": len(new_lines),
|
|
476
|
+
"char_count": new_len,
|
|
477
|
+
"reason": f"complete statement auto-complete (AI assistance)",
|
|
478
|
+
"newFragment": new_frag,
|
|
479
|
+
})
|
|
439
480
|
|
|
440
481
|
return suspicious_events
|
|
441
482
|
|
|
@@ -190,7 +190,29 @@ def display_suspicious_event(event: dict[str, Any], show_details: bool) -> None:
|
|
|
190
190
|
print(f" {line}", file=sys.stderr)
|
|
191
191
|
print(" ```", file=sys.stderr)
|
|
192
192
|
|
|
193
|
+
elif "event_indices" in event and reason == "rapid one-line pastes (AI indicator)":
|
|
194
|
+
# Rapid paste sequences (AI indicator) - show aggregate style
|
|
195
|
+
indices = event["event_indices"]
|
|
196
|
+
print(
|
|
197
|
+
f" AI Rapid Paste: Events #{indices[0]}-#{indices[-1]} "
|
|
198
|
+
f"({event['line_count']} lines, {event['char_count']} chars, "
|
|
199
|
+
f"{len(indices)} events in < 1 second)",
|
|
200
|
+
file=sys.stderr,
|
|
201
|
+
)
|
|
202
|
+
|
|
203
|
+
if show_details and "detailed_events" in event:
|
|
204
|
+
# Combine all detailed events into one block
|
|
205
|
+
combined_content = "".join(
|
|
206
|
+
detail["newFragment"] for detail in event["detailed_events"]
|
|
207
|
+
)
|
|
208
|
+
print(" Combined output:", file=sys.stderr)
|
|
209
|
+
print(" ```", file=sys.stderr)
|
|
210
|
+
for line in combined_content.split("\n"):
|
|
211
|
+
print(f" {line}", file=sys.stderr)
|
|
212
|
+
print(" ```", file=sys.stderr)
|
|
213
|
+
|
|
193
214
|
elif "event_indices" in event:
|
|
215
|
+
# Other multi-event clusters
|
|
194
216
|
indices = event.get("event_indices", [event["event_index"]])
|
|
195
217
|
print(
|
|
196
218
|
f" Events #{indices[0]}-#{indices[-1]} ({reason}): "
|
|
@@ -222,8 +244,20 @@ def display_suspicious_events(
|
|
|
222
244
|
Whether to show detailed autocomplete events
|
|
223
245
|
"""
|
|
224
246
|
if suspicious_events:
|
|
225
|
-
print("\nSuspicious
|
|
226
|
-
|
|
247
|
+
print("\nSuspicious events detected:", file=sys.stderr)
|
|
248
|
+
|
|
249
|
+
# Sort events by their index for chronological display
|
|
250
|
+
def get_sort_key(event: dict[str, Any]) -> int | float:
|
|
251
|
+
if "event_indices" in event and event["event_indices"]:
|
|
252
|
+
return event["event_indices"][0]
|
|
253
|
+
if "detailed_events" in event and event["detailed_events"]:
|
|
254
|
+
return event["detailed_events"][0].get("event_index", float("inf"))
|
|
255
|
+
event_idx = event.get("event_index", -1)
|
|
256
|
+
return event_idx if event_idx >= 0 else float("inf")
|
|
257
|
+
|
|
258
|
+
sorted_events = sorted(suspicious_events, key=get_sort_key)
|
|
259
|
+
|
|
260
|
+
for event in sorted_events:
|
|
227
261
|
display_suspicious_event(event, show_details)
|
|
228
262
|
else:
|
|
229
263
|
print("Success! No suspicious events detected.", file=sys.stderr)
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|