cr-proc 0.1.7__py3-none-any.whl → 0.1.9__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,359 +1,500 @@
1
+ """Command-line interface for code recorder processor."""
1
2
  import argparse
2
- import json
3
+ import glob
3
4
  import sys
4
- from datetime import datetime
5
5
  from pathlib import Path
6
6
  from typing import Any
7
7
 
8
8
  from .api.build import reconstruct_file_from_events
9
+ from .api.document import (
10
+ filter_events_by_document,
11
+ get_recorded_documents,
12
+ resolve_document,
13
+ resolve_template_file,
14
+ find_matching_template,
15
+ )
9
16
  from .api.load import load_jsonl
10
- from .api.verify import check_time_limit, template_diff, verify
17
+ from .api.output import write_batch_json_output
18
+ from .api.verify import (
19
+ check_time_limit,
20
+ combine_time_info,
21
+ detect_external_copypaste,
22
+ template_diff,
23
+ verify,
24
+ )
25
+ from .display import (
26
+ display_suspicious_events,
27
+ display_template_diff,
28
+ display_time_info,
29
+ print_batch_header,
30
+ print_batch_summary,
31
+ )
32
+ from .playback import playback_recording
11
33
 
12
34
 
13
- def resolve_document(
14
- docs: list[str], template_path: Path, override: str | None
15
- ) -> str | None:
35
+ def create_parser() -> argparse.ArgumentParser:
36
+ """
37
+ Create and configure the argument parser.
38
+
39
+ Returns
40
+ -------
41
+ argparse.ArgumentParser
42
+ Configured argument parser
43
+ """
44
+ parser = argparse.ArgumentParser(
45
+ description="Process and verify code recorder JSONL files"
46
+ )
47
+ parser.add_argument(
48
+ "files",
49
+ type=str,
50
+ nargs="+",
51
+ help="Path(s) to JSONL file(s) and optionally a template file. "
52
+ "JSONL files: compressed JSONL file(s) (*.recording.jsonl.gz). "
53
+ "Supports glob patterns like 'recordings/*.jsonl.gz'. "
54
+ "Template file (optional last positional): template file path. "
55
+ "Omit to use --template-dir instead.",
56
+ )
57
+ parser.add_argument(
58
+ "--template-dir",
59
+ type=Path,
60
+ default=None,
61
+ help="Directory containing template files (overrides positional template file). "
62
+ "Will search for files matching the document name. "
63
+ "If no match found, reconstruction proceeds with warning.",
64
+ )
65
+ parser.add_argument(
66
+ "-t",
67
+ "--time-limit",
68
+ type=int,
69
+ default=None,
70
+ help="Maximum allowed time in minutes between first and last edit. "
71
+ "If exceeded, recording is flagged. Applied individually to each recording file.",
72
+ )
73
+ parser.add_argument(
74
+ "-d",
75
+ "--document",
76
+ type=str,
77
+ default=None,
78
+ help="Document path or filename to process from the recording. "
79
+ "Defaults to the document whose extension matches the template file.",
80
+ )
81
+ parser.add_argument(
82
+ "-o",
83
+ "--output-json",
84
+ type=Path,
85
+ default=None,
86
+ help="Path to output JSON file with verification results. "
87
+ "Uses consistent format for both single and batch modes, with batch_mode flag. "
88
+ "In batch mode, includes combined_time_info across all files.",
89
+ )
90
+ parser.add_argument(
91
+ "-f",
92
+ "--output-file",
93
+ type=Path,
94
+ default=None,
95
+ help="Write reconstructed code to specified file instead of stdout. "
96
+ "In batch mode, this should be a directory where files will be named after the input files.",
97
+ )
98
+ parser.add_argument(
99
+ "--output-dir",
100
+ type=Path,
101
+ default=None,
102
+ help="Directory to write reconstructed code files in batch mode (one file per recording). "
103
+ "Files are named based on input recording filenames.",
104
+ )
105
+ parser.add_argument(
106
+ "-s",
107
+ "--show-autocomplete-details",
108
+ action="store_true",
109
+ help="Show individual auto-complete events in addition to "
110
+ "aggregate statistics",
111
+ )
112
+ parser.add_argument(
113
+ "-p",
114
+ "--playback",
115
+ action="store_true",
116
+ help="Play back the recording in real-time, showing code evolution",
117
+ )
118
+ parser.add_argument(
119
+ "--playback-speed",
120
+ type=float,
121
+ default=1.0,
122
+ help="Playback speed multiplier (1.0 = real-time, 2.0 = 2x speed, 0.5 = half speed)",
123
+ )
124
+ return parser
125
+
126
+
127
+ def expand_file_patterns(patterns: list[str]) -> list[Path]:
16
128
  """
17
- Determine which document from the recording to process.
129
+ Expand glob patterns and validate files exist.
18
130
 
19
131
  Parameters
20
132
  ----------
21
- docs : list[str]
22
- List of document paths found in the recording
23
- template_path : Path
24
- Path to the template file
25
- override : str | None
26
- Explicit document name or path override
133
+ patterns : list[str]
134
+ List of file paths or glob patterns
27
135
 
28
136
  Returns
29
137
  -------
30
- str | None
31
- The resolved document path, or None if no documents exist
138
+ list[Path]
139
+ List of existing file paths
32
140
 
33
141
  Raises
34
142
  ------
35
- ValueError
36
- If document resolution is ambiguous or the override doesn't match
143
+ FileNotFoundError
144
+ If no files are found
37
145
  """
38
- if not docs:
39
- return None
40
-
41
- if override:
42
- matches = [
43
- d for d in docs if d.endswith(override) or Path(d).name == override
44
- ]
45
- if not matches:
46
- raise ValueError(
47
- f"No document in recording matches '{override}'. Available: {docs}"
48
- )
49
- if len(matches) > 1:
50
- raise ValueError(
51
- f"Ambiguous document override '{override}'. Matches: {matches}"
52
- )
53
- return matches[0]
54
-
55
- template_ext = template_path.suffix
56
- ext_matches = [d for d in docs if Path(d).suffix == template_ext]
57
- if len(ext_matches) == 1:
58
- return ext_matches[0]
59
- if len(ext_matches) > 1:
60
- raise ValueError(
61
- f"Multiple documents share extension '{template_ext}': {ext_matches}. "
62
- "Use --document to choose one."
63
- )
64
-
65
- if len(docs) == 1:
66
- return docs[0]
67
-
68
- raise ValueError(
69
- "Could not determine document to process. Use --document to select one. "
70
- f"Available documents: {docs}"
71
- )
72
-
73
-
74
- def get_recorded_documents(events: tuple[dict[str, Any], ...]) -> list[str]:
146
+ jsonl_files = []
147
+ for pattern in patterns:
148
+ expanded = glob.glob(pattern)
149
+ if expanded:
150
+ jsonl_files.extend([Path(f) for f in expanded])
151
+ else:
152
+ # If no glob match, treat as literal path
153
+ jsonl_files.append(Path(pattern))
154
+
155
+ if not jsonl_files:
156
+ raise FileNotFoundError("No JSONL files found")
157
+
158
+ # Check if files exist
159
+ existing_files = [f for f in jsonl_files if f.exists()]
160
+ if not existing_files:
161
+ raise FileNotFoundError("None of the specified files exist")
162
+
163
+ # Warn about missing files
164
+ if len(existing_files) < len(jsonl_files):
165
+ missing = [f for f in jsonl_files if f not in existing_files]
166
+ for f in missing:
167
+ print(f"Warning: File not found: {f}", file=sys.stderr)
168
+
169
+ return existing_files
170
+
171
+
172
+ def process_single_file(
173
+ jsonl_path: Path,
174
+ template_data: str,
175
+ target_document: str | None,
176
+ time_limit: int | None,
177
+ ) -> tuple[bool, str, list[dict[str, Any]], dict[str, Any] | None, str]:
75
178
  """
76
- Extract unique document paths from recording events.
179
+ Process a single JSONL recording file.
77
180
 
78
181
  Parameters
79
182
  ----------
80
- events : tuple[dict[str, Any], ...]
81
- Recording events loaded from JSONL
183
+ jsonl_path : Path
184
+ Path to the JSONL file
185
+ template_data : str
186
+ Template file content
187
+ target_document : str | None
188
+ Document to process
189
+ time_limit : int | None
190
+ Time limit in minutes
82
191
 
83
192
  Returns
84
193
  -------
85
- list[str]
86
- Sorted list of unique document paths
194
+ tuple
195
+ (verified, reconstructed_code, suspicious_events, time_info, template_diff_text)
87
196
  """
88
- documents = {
89
- e.get("document")
90
- for e in events
91
- if "document" in e and e.get("document") is not None
92
- }
93
- return sorted([d for d in documents if d is not None])
197
+ try:
198
+ json_data = load_jsonl(jsonl_path)
199
+ except (FileNotFoundError, ValueError, IOError) as e:
200
+ print(f"Error loading {jsonl_path}: {e}", file=sys.stderr)
201
+ return False, "", [], None, ""
202
+
203
+ # Filter events for target document
204
+ doc_events = filter_events_by_document(json_data, target_document)
205
+ if target_document and not doc_events:
206
+ print(
207
+ f"Warning: No events found for document '{target_document}' in {jsonl_path}",
208
+ file=sys.stderr,
209
+ )
210
+ return False, "", [], None, ""
211
+
212
+ # Check time information
213
+ time_info = check_time_limit(doc_events, time_limit)
214
+
215
+ # Verify and process the recording
216
+ try:
217
+ verified_template, suspicious_events = verify(template_data, doc_events)
218
+ reconstructed = reconstruct_file_from_events(
219
+ doc_events, verified_template, document_path=target_document
220
+ )
221
+ return True, reconstructed, suspicious_events, time_info, ""
222
+ except ValueError as e:
223
+ # If verification fails but we have events, still try to reconstruct
224
+ print(f"Warning: Verification failed for {jsonl_path}: {e}", file=sys.stderr)
225
+ try:
226
+ if not doc_events:
227
+ return False, "", [], time_info, ""
228
+
229
+ # Compute diff against template and still detect suspicious events
230
+ diff_text = template_diff(template_data, doc_events)
231
+ suspicious_events = detect_external_copypaste(doc_events)
94
232
 
233
+ # Reconstruct using the initial recorded state
234
+ initial_state = doc_events[0].get("newFragment", "")
235
+ reconstructed = reconstruct_file_from_events(
236
+ doc_events, initial_state, document_path=target_document
237
+ )
238
+ return False, reconstructed, suspicious_events, time_info, diff_text
239
+ except Exception as reconstruction_error:
240
+ print(
241
+ f"Error reconstructing {jsonl_path}: {type(reconstruction_error).__name__}: {reconstruction_error}",
242
+ file=sys.stderr,
243
+ )
244
+ return False, "", [], time_info, ""
245
+ except Exception as e:
246
+ print(
247
+ f"Error processing {jsonl_path}: {type(e).__name__}: {e}",
248
+ file=sys.stderr,
249
+ )
250
+ return False, "", [], time_info, ""
95
251
 
96
- def filter_events_by_document(
97
- events: tuple[dict[str, Any], ...], document: str | None
98
- ) -> tuple[dict[str, Any], ...]:
252
+
253
+ def write_reconstructed_file(
254
+ output_path: Path,
255
+ content: str,
256
+ file_description: str = "Reconstructed code"
257
+ ) -> bool:
99
258
  """
100
- Filter events to only those for a specific document.
259
+ Write reconstructed code to a file.
101
260
 
102
261
  Parameters
103
262
  ----------
104
- events : tuple[dict[str, Any], ...]
105
- All recording events
106
- document : str | None
107
- Document path to filter by, or None to return all events
263
+ output_path : Path
264
+ Path to write to
265
+ content : str
266
+ Content to write
267
+ file_description : str
268
+ Description for success message
108
269
 
109
270
  Returns
110
271
  -------
111
- tuple[dict[str, Any], ...]
112
- Filtered events
272
+ bool
273
+ True if successful, False otherwise
113
274
  """
114
- if document:
115
- return tuple(e for e in events if e.get("document") == document)
116
- return events
275
+ try:
276
+ output_path.parent.mkdir(parents=True, exist_ok=True)
277
+ output_path.write_text(content)
278
+ print(f"{file_description} written to: {output_path}", file=sys.stderr)
279
+ return True
280
+ except Exception as e:
281
+ print(f"Error writing output file: {e}", file=sys.stderr)
282
+ return False
117
283
 
118
284
 
119
- def display_time_info(time_info: dict[str, Any] | None) -> None:
285
+ def handle_playback_mode(
286
+ jsonl_file: Path,
287
+ template_file: Path,
288
+ template_data: str,
289
+ document_override: str | None,
290
+ speed: float,
291
+ ) -> int:
120
292
  """
121
- Display elapsed time and time limit information.
293
+ Handle playback mode for a single file.
122
294
 
123
295
  Parameters
124
296
  ----------
125
- time_info : dict[str, Any] | None
126
- Time information from check_time_limit, or None if no time data
127
- """
128
- if not time_info:
129
- return
130
-
131
- print(
132
- f"Elapsed editing time: {time_info['minutes_elapsed']} minutes",
133
- file=sys.stderr,
134
- )
135
-
136
- first_ts = datetime.fromisoformat(
137
- time_info["first_timestamp"].replace("Z", "+00:00")
138
- )
139
- last_ts = datetime.fromisoformat(
140
- time_info["last_timestamp"].replace("Z", "+00:00")
141
- )
142
- time_span = (last_ts - first_ts).total_seconds() / 60
143
-
144
- print(f"Time span (first to last edit): {time_span:.2f} minutes", file=sys.stderr)
297
+ jsonl_file : Path
298
+ Path to the recording file
299
+ template_file : Path
300
+ Path to the template file
301
+ template_data : str
302
+ Template file content
303
+ document_override : str | None
304
+ Document override
305
+ speed : float
306
+ Playback speed
145
307
 
146
- if time_info["exceeds_limit"]:
147
- print("\nTime limit exceeded!", file=sys.stderr)
148
- print(f" Limit: {time_info['time_limit_minutes']} minutes", file=sys.stderr)
149
- print(f" First edit: {time_info['first_timestamp']}", file=sys.stderr)
150
- print(f" Last edit: {time_info['last_timestamp']}", file=sys.stderr)
308
+ Returns
309
+ -------
310
+ int
311
+ Exit code (0 for success, 1 for error)
312
+ """
313
+ try:
314
+ json_data = load_jsonl(jsonl_file)
315
+ recorded_docs = get_recorded_documents(json_data)
316
+ target_document = resolve_document(recorded_docs, template_file, document_override)
317
+
318
+ if target_document:
319
+ playback_recording(json_data, target_document, template_data, speed)
320
+ return 0
321
+ else:
322
+ print("Error: No documents found in recording", file=sys.stderr)
323
+ return 1
324
+ except Exception as e:
325
+ print(f"Error loading file for playback: {e}", file=sys.stderr)
326
+ return 1
151
327
 
152
328
 
153
- def display_suspicious_event(event: dict[str, Any], show_details: bool) -> None:
329
+ def process_batch(
330
+ jsonl_files: list[Path],
331
+ template_base: Path | None,
332
+ template_data: str,
333
+ args: argparse.Namespace,
334
+ ) -> tuple[list[dict[str, Any]], bool]:
154
335
  """
155
- Display a single suspicious event.
336
+ Process multiple recording files in batch mode.
156
337
 
157
338
  Parameters
158
339
  ----------
159
- event : dict[str, Any]
160
- Suspicious event data
161
- show_details : bool
162
- Whether to show detailed autocomplete events
340
+ jsonl_files : list[Path]
341
+ List of JSONL files to process
342
+ template_base : Path
343
+ Path to template file or directory
344
+ template_data : str
345
+ Template file content
346
+ args : argparse.Namespace
347
+ Command-line arguments
348
+
349
+ Returns
350
+ -------
351
+ tuple
352
+ (results, all_verified)
163
353
  """
164
- reason = event.get("reason", "unknown")
354
+ results = []
355
+ all_verified = True
356
+ output_dir = args.output_dir or (
357
+ args.output_file if args.output_file and args.output_file.is_dir() else None
358
+ )
165
359
 
166
- # Handle aggregate auto-complete events
167
- if event.get("event_index") == -1 and "detailed_events" in event:
168
- event_count = event["event_count"]
169
- total_chars = event["total_chars"]
170
- print(
171
- f" Aggregate: {event_count} auto-complete/small paste events "
172
- f"({total_chars} total chars)",
173
- file=sys.stderr,
174
- )
360
+ for i, jsonl_file in enumerate(jsonl_files, 1):
361
+ print_batch_header(i, len(jsonl_files), jsonl_file.name)
175
362
 
176
- if show_details:
177
- print(" Detailed events:", file=sys.stderr)
178
- for detail in event["detailed_events"]:
179
- detail_idx = detail["event_index"]
180
- detail_lines = detail["line_count"]
181
- detail_chars = detail["char_count"]
182
- detail_frag = detail["newFragment"]
363
+ # Determine target document for this file
364
+ try:
365
+ file_data = load_jsonl(jsonl_file)
366
+ recorded_docs = get_recorded_documents(file_data)
367
+ target_document = resolve_document(recorded_docs, template_base, args.document)
368
+ except (FileNotFoundError, ValueError, IOError) as e:
369
+ print(f"Error determining document: {e}", file=sys.stderr)
370
+ all_verified = False
371
+ continue
372
+
373
+ # If using template directory, find the matching template for this document
374
+ if args.template_dir and target_document:
375
+ matching_template_path = find_matching_template(args.template_dir, target_document)
376
+ if matching_template_path:
377
+ file_template_data = matching_template_path.read_text()
378
+ print(f"Using template: {matching_template_path.name}", file=sys.stderr)
379
+ else:
380
+ file_template_data = ""
183
381
  print(
184
- f" Event #{detail_idx}: {detail_lines} lines, "
185
- f"{detail_chars} chars",
186
- file=sys.stderr,
382
+ f"Warning: No matching template found for {target_document}. "
383
+ "Reconstruction will proceed without template verification.",
384
+ file=sys.stderr
187
385
  )
188
- print(" ```", file=sys.stderr)
189
- for line in detail_frag.split("\n"):
190
- print(f" {line}", file=sys.stderr)
191
- print(" ```", file=sys.stderr)
192
-
193
- elif "event_indices" in event and reason == "rapid one-line pastes (AI indicator)":
194
- # Rapid paste sequences (AI indicator) - show aggregate style
195
- indices = event["event_indices"]
196
- print(
197
- f" AI Rapid Paste: Events #{indices[0]}-#{indices[-1]} "
198
- f"({event['line_count']} lines, {event['char_count']} chars, "
199
- f"{len(indices)} events in < 1 second)",
200
- file=sys.stderr,
201
- )
386
+ else:
387
+ file_template_data = template_data
202
388
 
203
- if show_details and "detailed_events" in event:
204
- # Combine all detailed events into one block
205
- combined_content = "".join(
206
- detail["newFragment"] for detail in event["detailed_events"]
207
- )
208
- print(" Combined output:", file=sys.stderr)
209
- print(" ```", file=sys.stderr)
210
- for line in combined_content.split("\n"):
211
- print(f" {line}", file=sys.stderr)
212
- print(" ```", file=sys.stderr)
213
-
214
- elif "event_indices" in event:
215
- # Other multi-event clusters
216
- indices = event.get("event_indices", [event["event_index"]])
217
- print(
218
- f" Events #{indices[0]}-#{indices[-1]} ({reason}): "
219
- f"{event['line_count']} lines, {event['char_count']} chars",
220
- file=sys.stderr,
221
- )
222
-
223
- else:
224
- new_fragment = event["newFragment"].replace("\n", "\n ")
225
- print(
226
- f" Event #{event['event_index']} ({reason}): "
227
- f"{event['line_count']} lines, {event['char_count']} chars - "
228
- f"newFragment:\n ```\n {new_fragment}\n ```",
229
- file=sys.stderr,
389
+ # Process the file
390
+ verified, reconstructed, suspicious_events, time_info, diff_text = process_single_file(
391
+ jsonl_file, file_template_data, target_document, args.time_limit
230
392
  )
231
393
 
394
+ if not verified:
395
+ all_verified = False
232
396
 
233
- def display_suspicious_events(
234
- suspicious_events: list[dict[str, Any]], show_details: bool
235
- ) -> None:
397
+ # Display results
398
+ display_time_info(time_info)
399
+ display_suspicious_events(suspicious_events, args.show_autocomplete_details)
400
+ display_template_diff(diff_text)
401
+
402
+ # Store results
403
+ results.append({
404
+ "jsonl_file": jsonl_file,
405
+ "target_document": target_document,
406
+ "verified": verified,
407
+ "reconstructed": reconstructed,
408
+ "suspicious_events": suspicious_events,
409
+ "time_info": time_info,
410
+ "template_diff": diff_text,
411
+ })
412
+
413
+ # Write output file if requested
414
+ if reconstructed and output_dir:
415
+ output_name = jsonl_file.stem.replace(".recording.jsonl", "") + ".py"
416
+ output_path = output_dir / output_name
417
+ write_reconstructed_file(output_path, reconstructed, "Written to")
418
+
419
+ return results, all_verified
420
+
421
+
422
+ def process_single(
423
+ jsonl_file: Path,
424
+ template_base: Path | None,
425
+ template_data: str,
426
+ args: argparse.Namespace,
427
+ ) -> tuple[list[dict[str, Any]], bool]:
236
428
  """
237
- Display all suspicious events or success message.
429
+ Process a single recording file.
238
430
 
239
431
  Parameters
240
432
  ----------
241
- suspicious_events : list[dict[str, Any]]
242
- List of suspicious events detected
243
- show_details : bool
244
- Whether to show detailed autocomplete events
433
+ jsonl_file : Path
434
+ Path to JSONL file
435
+ template_base : Path
436
+ Path to template file or directory
437
+ template_data : str
438
+ Template file content
439
+ args : argparse.Namespace
440
+ Command-line arguments
441
+
442
+ Returns
443
+ -------
444
+ tuple
445
+ (results, verified)
245
446
  """
246
- if suspicious_events:
247
- print("\nSuspicious events detected:", file=sys.stderr)
248
-
249
- # Sort events by their index for chronological display
250
- def get_sort_key(event: dict[str, Any]) -> int | float:
251
- if "event_indices" in event and event["event_indices"]:
252
- return event["event_indices"][0]
253
- if "detailed_events" in event and event["detailed_events"]:
254
- return event["detailed_events"][0].get("event_index", float("inf"))
255
- event_idx = event.get("event_index", -1)
256
- return event_idx if event_idx >= 0 else float("inf")
257
-
258
- sorted_events = sorted(suspicious_events, key=get_sort_key)
259
-
260
- for event in sorted_events:
261
- display_suspicious_event(event, show_details)
447
+ try:
448
+ file_data = load_jsonl(jsonl_file)
449
+ recorded_docs = get_recorded_documents(file_data)
450
+ target_document = resolve_document(recorded_docs, template_base, args.document)
451
+ except (FileNotFoundError, ValueError, IOError) as e:
452
+ print(f"Error determining document: {e}", file=sys.stderr)
453
+ return [], False
454
+
455
+ # If using template directory, find the matching template for this document
456
+ if args.template_dir and target_document:
457
+ matching_template_path = find_matching_template(args.template_dir, target_document)
458
+ if matching_template_path:
459
+ file_template_data = matching_template_path.read_text()
460
+ print(f"Using template: {matching_template_path.name}", file=sys.stderr)
461
+ else:
462
+ file_template_data = ""
463
+ print(
464
+ f"Warning: No matching template found for {target_document}. "
465
+ "Reconstruction will proceed without template verification.",
466
+ file=sys.stderr
467
+ )
262
468
  else:
263
- print("Success! No suspicious events detected.", file=sys.stderr)
264
-
469
+ file_template_data = template_data
265
470
 
266
- def write_json_output(
267
- output_path: Path,
268
- document: str,
269
- time_info: dict[str, Any] | None,
270
- suspicious_events: list[dict[str, Any]],
271
- ) -> None:
272
- """
273
- Write verification results to JSON file.
471
+ print(f"Processing: {target_document or template_base}", file=sys.stderr)
274
472
 
275
- Parameters
276
- ----------
277
- output_path : Path
278
- Path to output JSON file
279
- document : str
280
- Document that was processed
281
- time_info : dict[str, Any] | None
282
- Time information from verification
283
- suspicious_events : list[dict[str, Any]]
284
- List of suspicious events detected
473
+ verified, reconstructed, suspicious_events, time_info, diff_text = process_single_file(
474
+ jsonl_file, file_template_data, target_document, args.time_limit
475
+ )
285
476
 
286
- Raises
287
- ------
288
- Exception
289
- If file writing fails
290
- """
291
- results = {
292
- "document": document,
293
- "time_info": time_info,
477
+ # Display results
478
+ display_time_info(time_info)
479
+ display_suspicious_events(suspicious_events, args.show_autocomplete_details)
480
+ display_template_diff(diff_text)
481
+
482
+ # Write output file if requested
483
+ if reconstructed and args.output_file:
484
+ if not write_reconstructed_file(args.output_file, reconstructed):
485
+ return [], False
486
+
487
+ results = [{
488
+ "jsonl_file": jsonl_file,
489
+ "target_document": target_document,
490
+ "verified": verified,
491
+ "reconstructed": reconstructed,
294
492
  "suspicious_events": suspicious_events,
295
- }
296
-
297
- output_path.parent.mkdir(parents=True, exist_ok=True)
298
- with open(output_path, "w") as f:
299
- json.dump(results, f, indent=2)
300
- print(f"Results written to {output_path}", file=sys.stderr)
493
+ "time_info": time_info,
494
+ "template_diff": diff_text,
495
+ }]
301
496
 
302
-
303
- def create_parser() -> argparse.ArgumentParser:
304
- """
305
- Create and configure the argument parser.
306
-
307
- Returns
308
- -------
309
- argparse.ArgumentParser
310
- Configured argument parser
311
- """
312
- parser = argparse.ArgumentParser(
313
- description="Process and verify code recorder JSONL files"
314
- )
315
- parser.add_argument(
316
- "jsonl_file",
317
- type=Path,
318
- help="Path to the compressed JSONL file (*.recording.jsonl.gz)",
319
- )
320
- parser.add_argument(
321
- "template_file",
322
- type=Path,
323
- help="Path to the initial template file that was recorded",
324
- )
325
- parser.add_argument(
326
- "-t",
327
- "--time-limit",
328
- type=int,
329
- default=None,
330
- help="Maximum allowed time in minutes between first and last edit. "
331
- "If exceeded, recording is flagged.",
332
- )
333
- parser.add_argument(
334
- "-d",
335
- "--document",
336
- type=str,
337
- default=None,
338
- help="Document path or filename to process from the recording. "
339
- "Defaults to the document whose extension matches the template file.",
340
- )
341
- parser.add_argument(
342
- "-o",
343
- "--output-json",
344
- type=Path,
345
- default=None,
346
- help="Path to output JSON file with verification results "
347
- "(time info and suspicious events).",
348
- )
349
- parser.add_argument(
350
- "-s",
351
- "--show-autocomplete-details",
352
- action="store_true",
353
- help="Show individual auto-complete events in addition to "
354
- "aggregate statistics",
355
- )
356
- return parser
497
+ return results, verified
357
498
 
358
499
 
359
500
  def main() -> int:
@@ -368,90 +509,123 @@ def main() -> int:
368
509
  parser = create_parser()
369
510
  args = parser.parse_args()
370
511
 
371
- # Load JSONL file
512
+ # Parse files argument: last one may be template_file if it's not a JSONL file
513
+ files_list = args.files
514
+ template_file = None
515
+ jsonl_patterns = files_list
516
+
517
+ # If we have more than one file and the last one doesn't look like a JSONL file,
518
+ # treat it as the template file
519
+ if len(files_list) > 1 and not files_list[-1].endswith(('.jsonl', '.jsonl.gz')):
520
+ template_file = Path(files_list[-1])
521
+ jsonl_patterns = files_list[:-1]
522
+
523
+ # Validate that at least one of template_file or template_dir is provided
524
+ if not template_file and not args.template_dir:
525
+ print("Error: Either a template file or --template-dir must be provided", file=sys.stderr)
526
+ parser.print_help()
527
+ return 1
528
+
529
+ # Expand file patterns and validate
372
530
  try:
373
- json_data = load_jsonl(args.jsonl_file)
531
+ jsonl_files = expand_file_patterns(jsonl_patterns)
374
532
  except FileNotFoundError as e:
375
533
  print(f"Error: {e}", file=sys.stderr)
376
534
  return 1
377
- except (ValueError, IOError) as e:
378
- print(f"Error loading JSONL file: {e}", file=sys.stderr)
379
- return 1
380
535
 
381
- # Resolve which document to process
382
- recorded_docs = get_recorded_documents(json_data)
383
- try:
384
- target_document = resolve_document(
385
- recorded_docs, args.template_file, args.document
386
- )
387
- except ValueError as e:
388
- print(f"Error determining document: {e}", file=sys.stderr)
389
- return 1
536
+ batch_mode = len(jsonl_files) > 1
537
+ if batch_mode:
538
+ print(f"Processing {len(jsonl_files)} recording files in batch mode", file=sys.stderr)
390
539
 
391
- # Filter events for target document
392
- doc_events = filter_events_by_document(json_data, target_document)
393
- if target_document and not doc_events:
394
- print(
395
- f"Error: No events found for document '{target_document}'",
396
- file=sys.stderr,
397
- )
398
- return 1
540
+ # Determine template source (use template_dir if provided, otherwise template_file)
541
+ template_path = args.template_dir if args.template_dir else template_file
542
+
543
+ # Handle playback mode (single file only)
544
+ if not batch_mode and args.playback:
545
+ try:
546
+ json_data = load_jsonl(jsonl_files[0])
547
+ recorded_docs = get_recorded_documents(json_data)
548
+ target_document = resolve_document(recorded_docs, template_path, args.document)
549
+
550
+ # Get template data for playback
551
+ template_data, _ = resolve_template_file(
552
+ template_file if not args.template_dir else None,
553
+ args.template_dir,
554
+ target_document
555
+ )
399
556
 
400
- print(f"Processing: {target_document or args.template_file}", file=sys.stderr)
557
+ if target_document:
558
+ playback_recording(json_data, target_document, template_data, args.playback_speed)
559
+ return 0
560
+ else:
561
+ print("Error: No documents found in recording", file=sys.stderr)
562
+ return 1
563
+ except Exception as e:
564
+ print(f"Error loading file for playback: {e}", file=sys.stderr)
565
+ return 1
401
566
 
402
- # Read template file
567
+ # Get template data
403
568
  try:
404
- template_data = args.template_file.read_text()
405
- except FileNotFoundError:
406
- print(
407
- f"Error: Template file not found: {args.template_file}", file=sys.stderr
408
- )
409
- return 1
410
- except Exception as e:
411
- print(f"Error reading template file: {e}", file=sys.stderr)
569
+ # If using a template directory, skip loading a global template here
570
+ # Let per-file matching handle it in process_batch/process_single
571
+ if args.template_dir:
572
+ template_data = ""
573
+ else:
574
+ template_data, _ = resolve_template_file(
575
+ template_file if not args.template_dir else None,
576
+ None,
577
+ None
578
+ )
579
+ except (FileNotFoundError, ValueError) as e:
580
+ print(f"Error: {e}", file=sys.stderr)
412
581
  return 1
413
582
 
414
- # Check and display time information
415
- time_info = check_time_limit(doc_events, args.time_limit)
416
- display_time_info(time_info)
417
-
418
- # Verify and process the recording
419
- try:
420
- template_data, suspicious_events = verify(template_data, doc_events)
421
- reconstructed = reconstruct_file_from_events(
422
- doc_events, template_data, document_path=target_document
583
+ # Process files
584
+ if batch_mode:
585
+ results, all_verified = process_batch(
586
+ jsonl_files, template_path, template_data, args
587
+ )
588
+ else:
589
+ results, all_verified = process_single(
590
+ jsonl_files[0], template_path, template_data, args
423
591
  )
424
- print(reconstructed)
425
592
 
426
- # Display suspicious events
427
- display_suspicious_events(suspicious_events, args.show_autocomplete_details)
593
+ if not results:
594
+ return 1
595
+
596
+ # Output summary and combined report for batch mode
597
+ if batch_mode:
598
+ failed_files = [r["jsonl_file"].name for r in results if not r["verified"]]
599
+ verified_count = len(results) - len(failed_files)
600
+ print_batch_summary(len(results), verified_count, failed_files)
428
601
 
429
- # Write JSON output if requested
602
+ # Display combined time report
603
+ time_infos = [r["time_info"] for r in results]
604
+ combined_time = None
605
+ if any(time_infos):
606
+ combined_time = combine_time_info(time_infos, args.time_limit)
607
+ display_time_info(combined_time, is_combined=True)
608
+
609
+ # Write JSON output
610
+ if args.output_json:
611
+ try:
612
+ write_batch_json_output(
613
+ args.output_json, results, combined_time, all_verified, batch_mode=True
614
+ )
615
+ except Exception as e:
616
+ print(f"Error writing batch JSON output: {e}", file=sys.stderr)
617
+ else:
618
+ # Single file mode - write JSON output
430
619
  if args.output_json:
431
620
  try:
432
- write_json_output(
433
- args.output_json,
434
- target_document or str(args.template_file),
435
- time_info,
436
- suspicious_events,
621
+ write_batch_json_output(
622
+ args.output_json, results, results[0]["time_info"],
623
+ results[0]["verified"], batch_mode=False
437
624
  )
438
625
  except Exception as e:
439
626
  print(f"Error writing JSON output: {e}", file=sys.stderr)
440
- return 1
441
-
442
- except ValueError as e:
443
- print("File failed verification from template!", file=sys.stderr)
444
- print(str(e), file=sys.stderr)
445
- try:
446
- print(template_diff(template_data, doc_events), file=sys.stderr)
447
- except Exception:
448
- pass
449
- return 1
450
- except Exception as e:
451
- print(f"Error processing file: {type(e).__name__}: {e}", file=sys.stderr)
452
- return 1
453
627
 
454
- return 0
628
+ return 0 if all_verified else 1
455
629
 
456
630
 
457
631
  if __name__ == "__main__":