cr-proc 0.1.5__tar.gz → 0.1.7__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: cr_proc
3
- Version: 0.1.5
3
+ Version: 0.1.7
4
4
  Summary: A tool for processing BYU CS code recording files.
5
5
  Author: Ethan Dye
6
6
  Author-email: mrtops03@gmail.com
@@ -1,6 +1,6 @@
1
1
  [project]
2
2
  name = "cr_proc"
3
- version = "0.1.5"
3
+ version = "0.1.7"
4
4
  description = "A tool for processing BYU CS code recording files."
5
5
  authors = [
6
6
  {name = "Ethan Dye",email = "mrtops03@gmail.com"}
@@ -313,6 +313,19 @@ def _detect_rapid_paste_sequences(jsonData: tuple[dict[str, Any], ...]) -> list[
313
313
  # If we found 3+ one-line pastes within 1 second, flag it
314
314
  if len(cluster) >= 3:
315
315
  event_indices = [p["event_index"] for p in cluster]
316
+
317
+ # Build detailed events list for optional detailed review
318
+ detailed_events = []
319
+ for paste in cluster:
320
+ idx = paste["event_index"]
321
+ content = paste["content"]
322
+ detailed_events.append({
323
+ "event_index": idx,
324
+ "line_count": 1,
325
+ "char_count": len(content),
326
+ "newFragment": content,
327
+ })
328
+
316
329
  suspicious_events.append({
317
330
  "event_index": event_indices[0],
318
331
  "event_indices": event_indices,
@@ -320,6 +333,7 @@ def _detect_rapid_paste_sequences(jsonData: tuple[dict[str, Any], ...]) -> list[
320
333
  "char_count": sum(len(p["content"]) for p in cluster),
321
334
  "reason": "rapid one-line pastes (AI indicator)",
322
335
  "newFragment": f"{len(cluster)} one-line pastes in 1 second",
336
+ "detailed_events": detailed_events,
323
337
  })
324
338
 
325
339
  i = j if j > i + 1 else i + 1
@@ -334,16 +348,22 @@ def _detect_fullline_autocomplete(
334
348
  excluded_indices: set[int]
335
349
  ) -> list[dict[str, Any]]:
336
350
  """
337
- Detect full-line auto-complete events where the IDE/AI completes code.
351
+ Detect multi-line auto-complete events where the IDE/AI generates multiple complete lines.
352
+
353
+ Focuses on significant AI assistance where the system generates entire functions or blocks
354
+ (2+ lines) in a single completion event. This is distinct from basic IDE autocomplete
355
+ (e.g., finishing a function name).
338
356
 
339
357
  At keystroke level, events show:
340
358
  - Normal typing: oldFragment="" (empty), newFragment="X" (1 char)
341
- - Auto-complete: oldFragment="" (empty), newFragment="long_text" (10+ chars)
359
+ - Basic autocomplete: oldFragment="" (empty), newFragment="function_name" (IDE suggests identifier)
360
+ - Full-line AI completion: oldFragment="" (empty), newFragment="def foo():\n pass" (entire function)
342
361
 
343
- Auto-complete is detected when:
362
+ Full-line auto-complete is detected when:
344
363
  - oldFragment is empty or very short (0-3 chars)
345
- - newFragment is substantial (10+ characters)
346
- - newFragment contains code structure (assignment, parens, brackets, etc.)
364
+ - newFragment generates 2+ complete lines
365
+ - newFragment contains complete statements (not just identifiers)
366
+ - Content represents meaningful code structure
347
367
  - newFragment does NOT already exist in the document state
348
368
  - Event not already flagged as external copy-paste
349
369
 
@@ -361,7 +381,7 @@ def _detect_fullline_autocomplete(
361
381
  Returns
362
382
  -------
363
383
  list[dict[str, Any]]
364
- List of suspected auto-complete events.
384
+ List of suspected multi-line auto-complete events.
365
385
  """
366
386
  suspicious_events = []
367
387
 
@@ -381,20 +401,22 @@ def _detect_fullline_autocomplete(
381
401
  new_len = len(new_frag)
382
402
 
383
403
  # At keystroke level, oldFragment is typically empty for insertions
384
- # Allow up to 3 chars for prefix-based autocomplete triggers
404
+ # Allow up to 3 chars for prefix-based triggers (e.g., "de" -> "def")
385
405
  if old_len > 3:
386
406
  continue
387
407
 
388
- # Skip single-character additions (normal typing)
389
- # Auto-complete typically adds 10+ characters at once
390
- if new_len < 10:
391
- continue
408
+ # Check line count - we care about complete statements
409
+ # Multi-line is obviously concerning, but single-line with a complete statement
410
+ # (like "if x: return True") is also suspicious if it came from autocomplete
411
+ new_lines = [n for n in new_frag.split("\n") if n.strip() != ""]
392
412
 
393
- # Skip large multi-line pastes - those should be caught by multi-line paste detector
394
- # Auto-complete is typically 1-2 lines and under 100 chars
395
- # Anything larger is likely external copy-paste, not auto-complete
396
- new_lines = new_frag.split("\n")
397
- if len(new_lines) > 2 or new_len > 100:
413
+ # For single-line completions, be more strict about what we flag
414
+ # We only flag if it's a complete statement with keywords, not just identifier completion
415
+ is_single_line = len(new_lines) <= 2 # 2 elements = 1 line + trailing \n
416
+ is_multi_line = len(new_lines) >= 3 # 3+ elements = 2+ actual lines
417
+
418
+ if not (is_single_line or is_multi_line):
419
+ # Shouldn't happen, but skip if malformed
398
420
  continue
399
421
 
400
422
  # The new fragment should not be just whitespace
@@ -402,40 +424,59 @@ def _detect_fullline_autocomplete(
402
424
  continue
403
425
 
404
426
  # Check if the new fragment contains code structure indicators
405
- # These strongly suggest IDE/AI auto-completion of code
406
- code_indicators = [
407
- "=", # Assignment (most common in autocomplete)
408
- "(", # Function call/definition
409
- ")", # Closing paren
410
- ":", # Block statement (if, for, def, etc.)
411
- "{", # Dictionary/block
412
- "}", # Closing brace
413
- "[", # List/index
414
- "]", # Closing bracket
415
- "=>", # Arrow function
416
- ";", # Statement end
427
+ # These strongly suggest IDE/AI auto-completion of actual code (not just identifiers)
428
+ complete_statement_indicators = [
429
+ ":", # Block statement (if:, for:, def:, class:, while:, with:, etc.)
430
+ "return", # Return statement
431
+ "def ", # Function definition
432
+ "class ", # Class definition
433
+ "if ", # If statement
434
+ "for ", # For loop
435
+ "while ", # While loop
436
+ "try:", # Try block
437
+ "except", # Exception handling
438
+ "import ", # Import statement
439
+ "=", # Assignment
417
440
  ]
418
441
 
419
- has_code_structure = any(indicator in new_frag for indicator in code_indicators)
442
+ has_complete_statement = any(indicator in new_frag for indicator in complete_statement_indicators)
420
443
 
421
- # Must have code structure to be considered auto-complete
422
- if has_code_structure:
423
- # Check if this content already existed in the document state BEFORE this event
424
- is_internal_copy = False
444
+ if not has_complete_statement:
445
+ # No complete statement - skip basic identifier completion
446
+ continue
425
447
 
426
- if idx > 0:
427
- prior_state = document_states[idx - 1]
428
- if new_frag in prior_state:
429
- is_internal_copy = True
448
+ # Minimum size for meaningful completion
449
+ if new_len < 10:
450
+ continue
430
451
 
431
- if not is_internal_copy:
432
- suspicious_events.append({
433
- "event_index": idx,
434
- "line_count": len(new_lines),
435
- "char_count": new_len,
436
- "reason": "full-line auto-complete",
437
- "newFragment": new_frag,
438
- })
452
+ # For multi-line: maximum size to distinguish from external pastes
453
+ # External pastes are typically much larger (100+ chars)
454
+ # Multi-line completions are usually 20-300 chars for a small function/block
455
+ if is_multi_line and new_len > 300:
456
+ continue
457
+
458
+ # For single-line: could be larger due to chained methods or long statements
459
+ # but cap at 200 chars to avoid flagging user-typed long lines
460
+ if is_single_line and new_len > 200:
461
+ continue
462
+
463
+ # Check if this content already existed in the document state BEFORE this event
464
+ is_internal_copy = False
465
+
466
+ if idx > 0:
467
+ prior_state = document_states[idx - 1]
468
+ if new_frag in prior_state:
469
+ is_internal_copy = True
470
+
471
+ if not is_internal_copy:
472
+ line_desc = "line" if is_single_line else "lines"
473
+ suspicious_events.append({
474
+ "event_index": idx,
475
+ "line_count": len(new_lines),
476
+ "char_count": new_len,
477
+ "reason": f"complete statement auto-complete (AI assistance)",
478
+ "newFragment": new_frag,
479
+ })
439
480
 
440
481
  return suspicious_events
441
482
 
@@ -190,7 +190,29 @@ def display_suspicious_event(event: dict[str, Any], show_details: bool) -> None:
190
190
  print(f" {line}", file=sys.stderr)
191
191
  print(" ```", file=sys.stderr)
192
192
 
193
+ elif "event_indices" in event and reason == "rapid one-line pastes (AI indicator)":
194
+ # Rapid paste sequences (AI indicator) - show aggregate style
195
+ indices = event["event_indices"]
196
+ print(
197
+ f" AI Rapid Paste: Events #{indices[0]}-#{indices[-1]} "
198
+ f"({event['line_count']} lines, {event['char_count']} chars, "
199
+ f"{len(indices)} events in < 1 second)",
200
+ file=sys.stderr,
201
+ )
202
+
203
+ if show_details and "detailed_events" in event:
204
+ # Combine all detailed events into one block
205
+ combined_content = "".join(
206
+ detail["newFragment"] for detail in event["detailed_events"]
207
+ )
208
+ print(" Combined output:", file=sys.stderr)
209
+ print(" ```", file=sys.stderr)
210
+ for line in combined_content.split("\n"):
211
+ print(f" {line}", file=sys.stderr)
212
+ print(" ```", file=sys.stderr)
213
+
193
214
  elif "event_indices" in event:
215
+ # Other multi-event clusters
194
216
  indices = event.get("event_indices", [event["event_index"]])
195
217
  print(
196
218
  f" Events #{indices[0]}-#{indices[-1]} ({reason}): "
@@ -222,8 +244,20 @@ def display_suspicious_events(
222
244
  Whether to show detailed autocomplete events
223
245
  """
224
246
  if suspicious_events:
225
- print("\nSuspicious copy-paste events detected:", file=sys.stderr)
226
- for event in suspicious_events:
247
+ print("\nSuspicious events detected:", file=sys.stderr)
248
+
249
+ # Sort events by their index for chronological display
250
+ def get_sort_key(event: dict[str, Any]) -> int | float:
251
+ if "event_indices" in event and event["event_indices"]:
252
+ return event["event_indices"][0]
253
+ if "detailed_events" in event and event["detailed_events"]:
254
+ return event["detailed_events"][0].get("event_index", float("inf"))
255
+ event_idx = event.get("event_index", -1)
256
+ return event_idx if event_idx >= 0 else float("inf")
257
+
258
+ sorted_events = sorted(suspicious_events, key=get_sort_key)
259
+
260
+ for event in sorted_events:
227
261
  display_suspicious_event(event, show_details)
228
262
  else:
229
263
  print("Success! No suspicious events detected.", file=sys.stderr)
File without changes