cr-proc 0.1.8__py3-none-any.whl → 0.1.10__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,491 +1,500 @@
1
+ """Command-line interface for code recorder processor."""
1
2
  import argparse
2
- import json
3
- import os
3
+ import glob
4
4
  import sys
5
- import time
6
- from datetime import datetime
7
5
  from pathlib import Path
8
6
  from typing import Any
9
7
 
10
8
  from .api.build import reconstruct_file_from_events
9
+ from .api.document import (
10
+ filter_events_by_document,
11
+ get_recorded_documents,
12
+ resolve_document,
13
+ resolve_template_file,
14
+ find_matching_template,
15
+ )
11
16
  from .api.load import load_jsonl
12
- from .api.verify import check_time_limit, template_diff, verify
17
+ from .api.output import write_batch_json_output
18
+ from .api.verify import (
19
+ check_time_limit,
20
+ combine_time_info,
21
+ detect_external_copypaste,
22
+ template_diff,
23
+ verify,
24
+ )
25
+ from .display import (
26
+ display_suspicious_events,
27
+ display_template_diff,
28
+ display_time_info,
29
+ print_batch_header,
30
+ print_batch_summary,
31
+ )
32
+ from .playback import playback_recording
13
33
 
14
34
 
15
- def resolve_document(
16
- docs: list[str], template_path: Path, override: str | None
17
- ) -> str | None:
35
+ def create_parser() -> argparse.ArgumentParser:
18
36
  """
19
- Determine which document from the recording to process.
20
-
21
- Parameters
22
- ----------
23
- docs : list[str]
24
- List of document paths found in the recording
25
- template_path : Path
26
- Path to the template file
27
- override : str | None
28
- Explicit document name or path override
37
+ Create and configure the argument parser.
29
38
 
30
39
  Returns
31
40
  -------
32
- str | None
33
- The resolved document path, or None if no documents exist
34
-
35
- Raises
36
- ------
37
- ValueError
38
- If document resolution is ambiguous or the override doesn't match
41
+ argparse.ArgumentParser
42
+ Configured argument parser
39
43
  """
40
- if not docs:
41
- return None
42
-
43
- if override:
44
- matches = [
45
- d for d in docs if d.endswith(override) or Path(d).name == override
46
- ]
47
- if not matches:
48
- raise ValueError(
49
- f"No document in recording matches '{override}'. Available: {docs}"
50
- )
51
- if len(matches) > 1:
52
- raise ValueError(
53
- f"Ambiguous document override '{override}'. Matches: {matches}"
54
- )
55
- return matches[0]
56
-
57
- template_ext = template_path.suffix
58
- ext_matches = [d for d in docs if Path(d).suffix == template_ext]
59
- if len(ext_matches) == 1:
60
- return ext_matches[0]
61
- if len(ext_matches) > 1:
62
- raise ValueError(
63
- f"Multiple documents share extension '{template_ext}': {ext_matches}. "
64
- "Use --document to choose one."
65
- )
66
-
67
- if len(docs) == 1:
68
- return docs[0]
69
-
70
- raise ValueError(
71
- "Could not determine document to process. Use --document to select one. "
72
- f"Available documents: {docs}"
44
+ parser = argparse.ArgumentParser(
45
+ description="Process and verify code recorder JSONL files"
73
46
  )
47
+ parser.add_argument(
48
+ "files",
49
+ type=str,
50
+ nargs="+",
51
+ help="Path(s) to JSONL file(s) and optionally a template file. "
52
+ "JSONL files: compressed JSONL file(s) (*.recording.jsonl.gz). "
53
+ "Supports glob patterns like 'recordings/*.jsonl.gz'. "
54
+ "Template file (optional last positional): template file path. "
55
+ "Omit to use --template-dir instead.",
56
+ )
57
+ parser.add_argument(
58
+ "--template-dir",
59
+ type=Path,
60
+ default=None,
61
+ help="Directory containing template files (overrides positional template file). "
62
+ "Will search for files matching the document name. "
63
+ "If no match found, reconstruction proceeds with warning.",
64
+ )
65
+ parser.add_argument(
66
+ "-t",
67
+ "--time-limit",
68
+ type=int,
69
+ default=None,
70
+ help="Maximum allowed time in minutes between first and last edit. "
71
+ "If exceeded, recording is flagged. Applied individually to each recording file.",
72
+ )
73
+ parser.add_argument(
74
+ "-d",
75
+ "--document",
76
+ type=str,
77
+ default=None,
78
+ help="Document path or filename to process from the recording. "
79
+ "Defaults to the document whose extension matches the template file.",
80
+ )
81
+ parser.add_argument(
82
+ "-o",
83
+ "--output-json",
84
+ type=Path,
85
+ default=None,
86
+ help="Path to output JSON file with verification results. "
87
+ "Uses consistent format for both single and batch modes, with batch_mode flag. "
88
+ "In batch mode, includes combined_time_info across all files.",
89
+ )
90
+ parser.add_argument(
91
+ "-f",
92
+ "--output-file",
93
+ type=Path,
94
+ default=None,
95
+ help="Write reconstructed code to specified file instead of stdout. "
96
+ "In batch mode, this should be a directory where files will be named after the input files.",
97
+ )
98
+ parser.add_argument(
99
+ "--output-dir",
100
+ type=Path,
101
+ default=None,
102
+ help="Directory to write reconstructed code files in batch mode (one file per recording). "
103
+ "Files are named based on input recording filenames.",
104
+ )
105
+ parser.add_argument(
106
+ "-s",
107
+ "--show-autocomplete-details",
108
+ action="store_true",
109
+ help="Show individual auto-complete events in addition to "
110
+ "aggregate statistics",
111
+ )
112
+ parser.add_argument(
113
+ "-p",
114
+ "--playback",
115
+ action="store_true",
116
+ help="Play back the recording in real-time, showing code evolution",
117
+ )
118
+ parser.add_argument(
119
+ "--playback-speed",
120
+ type=float,
121
+ default=1.0,
122
+ help="Playback speed multiplier (1.0 = real-time, 2.0 = 2x speed, 0.5 = half speed)",
123
+ )
124
+ return parser
74
125
 
75
126
 
76
- def get_recorded_documents(events: tuple[dict[str, Any], ...]) -> list[str]:
77
- """
78
- Extract unique document paths from recording events.
79
-
80
- Parameters
81
- ----------
82
- events : tuple[dict[str, Any], ...]
83
- Recording events loaded from JSONL
84
-
85
- Returns
86
- -------
87
- list[str]
88
- Sorted list of unique document paths
89
- """
90
- documents = {
91
- e.get("document")
92
- for e in events
93
- if "document" in e and e.get("document") is not None
94
- }
95
- return sorted([d for d in documents if d is not None])
96
-
97
-
98
- def filter_events_by_document(
99
- events: tuple[dict[str, Any], ...], document: str | None
100
- ) -> tuple[dict[str, Any], ...]:
127
+ def expand_file_patterns(patterns: list[str]) -> list[Path]:
101
128
  """
102
- Filter events to only those for a specific document.
129
+ Expand glob patterns and validate files exist.
103
130
 
104
131
  Parameters
105
132
  ----------
106
- events : tuple[dict[str, Any], ...]
107
- All recording events
108
- document : str | None
109
- Document path to filter by, or None to return all events
133
+ patterns : list[str]
134
+ List of file paths or glob patterns
110
135
 
111
136
  Returns
112
137
  -------
113
- tuple[dict[str, Any], ...]
114
- Filtered events
115
- """
116
- if document:
117
- return tuple(e for e in events if e.get("document") == document)
118
- return events
119
-
138
+ list[Path]
139
+ List of existing file paths
120
140
 
121
- def display_time_info(time_info: dict[str, Any] | None) -> None:
141
+ Raises
142
+ ------
143
+ FileNotFoundError
144
+ If no files are found
122
145
  """
123
- Display elapsed time and time limit information.
146
+ jsonl_files = []
147
+ for pattern in patterns:
148
+ expanded = glob.glob(pattern)
149
+ if expanded:
150
+ jsonl_files.extend([Path(f) for f in expanded])
151
+ else:
152
+ # If no glob match, treat as literal path
153
+ jsonl_files.append(Path(pattern))
124
154
 
125
- Parameters
126
- ----------
127
- time_info : dict[str, Any] | None
128
- Time information from check_time_limit, or None if no time data
129
- """
130
- if not time_info:
131
- return
155
+ if not jsonl_files:
156
+ raise FileNotFoundError("No JSONL files found")
132
157
 
133
- print(
134
- f"Elapsed editing time: {time_info['minutes_elapsed']} minutes",
135
- file=sys.stderr,
136
- )
158
+ # Check if files exist
159
+ existing_files = [f for f in jsonl_files if f.exists()]
160
+ if not existing_files:
161
+ raise FileNotFoundError("None of the specified files exist")
137
162
 
138
- first_ts = datetime.fromisoformat(
139
- time_info["first_timestamp"].replace("Z", "+00:00")
140
- )
141
- last_ts = datetime.fromisoformat(
142
- time_info["last_timestamp"].replace("Z", "+00:00")
143
- )
144
- time_span = (last_ts - first_ts).total_seconds() / 60
145
-
146
- print(f"Time span (first to last edit): {time_span:.2f} minutes", file=sys.stderr)
163
+ # Warn about missing files
164
+ if len(existing_files) < len(jsonl_files):
165
+ missing = [f for f in jsonl_files if f not in existing_files]
166
+ for f in missing:
167
+ print(f"Warning: File not found: {f}", file=sys.stderr)
147
168
 
148
- if time_info["exceeds_limit"]:
149
- print("\nTime limit exceeded!", file=sys.stderr)
150
- print(f" Limit: {time_info['time_limit_minutes']} minutes", file=sys.stderr)
151
- print(f" First edit: {time_info['first_timestamp']}", file=sys.stderr)
152
- print(f" Last edit: {time_info['last_timestamp']}", file=sys.stderr)
169
+ return existing_files
153
170
 
154
171
 
155
- def display_suspicious_event(event: dict[str, Any], show_details: bool) -> None:
172
+ def process_single_file(
173
+ jsonl_path: Path,
174
+ template_data: str,
175
+ target_document: str | None,
176
+ time_limit: int | None,
177
+ ) -> tuple[bool, str, list[dict[str, Any]], dict[str, Any] | None, str]:
156
178
  """
157
- Display a single suspicious event.
179
+ Process a single JSONL recording file.
158
180
 
159
181
  Parameters
160
182
  ----------
161
- event : dict[str, Any]
162
- Suspicious event data
163
- show_details : bool
164
- Whether to show detailed autocomplete events
183
+ jsonl_path : Path
184
+ Path to the JSONL file
185
+ template_data : str
186
+ Template file content
187
+ target_document : str | None
188
+ Document to process
189
+ time_limit : int | None
190
+ Time limit in minutes
191
+
192
+ Returns
193
+ -------
194
+ tuple
195
+ (verified, reconstructed_code, suspicious_events, time_info, template_diff_text)
165
196
  """
166
- reason = event.get("reason", "unknown")
197
+ try:
198
+ json_data = load_jsonl(jsonl_path)
199
+ except (FileNotFoundError, ValueError, IOError) as e:
200
+ print(f"Error loading {jsonl_path}: {e}", file=sys.stderr)
201
+ return False, "", [], None, ""
167
202
 
168
- # Handle aggregate auto-complete events
169
- if event.get("event_index") == -1 and "detailed_events" in event:
170
- event_count = event["event_count"]
171
- total_chars = event["total_chars"]
203
+ # Filter events for target document
204
+ doc_events = filter_events_by_document(json_data, target_document)
205
+ if target_document and not doc_events:
172
206
  print(
173
- f" Aggregate: {event_count} auto-complete/small paste events "
174
- f"({total_chars} total chars)",
207
+ f"Warning: No events found for document '{target_document}' in {jsonl_path}",
175
208
  file=sys.stderr,
176
209
  )
210
+ return False, "", [], None, ""
177
211
 
178
- if show_details:
179
- print(" Detailed events:", file=sys.stderr)
180
- for detail in event["detailed_events"]:
181
- detail_idx = detail["event_index"]
182
- detail_lines = detail["line_count"]
183
- detail_chars = detail["char_count"]
184
- detail_frag = detail["newFragment"]
185
- print(
186
- f" Event #{detail_idx}: {detail_lines} lines, "
187
- f"{detail_chars} chars",
188
- file=sys.stderr,
189
- )
190
- print(" ```", file=sys.stderr)
191
- for line in detail_frag.split("\n"):
192
- print(f" {line}", file=sys.stderr)
193
- print(" ```", file=sys.stderr)
194
-
195
- elif "event_indices" in event and reason == "rapid one-line pastes (AI indicator)":
196
- # Rapid paste sequences (AI indicator) - show aggregate style
197
- indices = event["event_indices"]
198
- print(
199
- f" AI Rapid Paste: Events #{indices[0]}-#{indices[-1]} "
200
- f"({event['line_count']} lines, {event['char_count']} chars, "
201
- f"{len(indices)} events in < 1 second)",
202
- file=sys.stderr,
203
- )
212
+ # Check time information
213
+ time_info = check_time_limit(doc_events, time_limit)
204
214
 
205
- if show_details and "detailed_events" in event:
206
- # Combine all detailed events into one block
207
- combined_content = "".join(
208
- detail["newFragment"] for detail in event["detailed_events"]
209
- )
210
- print(" Combined output:", file=sys.stderr)
211
- print(" ```", file=sys.stderr)
212
- for line in combined_content.split("\n"):
213
- print(f" {line}", file=sys.stderr)
214
- print(" ```", file=sys.stderr)
215
-
216
- elif "event_indices" in event:
217
- # Other multi-event clusters
218
- indices = event.get("event_indices", [event["event_index"]])
219
- print(
220
- f" Events #{indices[0]}-#{indices[-1]} ({reason}): "
221
- f"{event['line_count']} lines, {event['char_count']} chars",
222
- file=sys.stderr,
215
+ # Verify and process the recording
216
+ try:
217
+ verified_template, suspicious_events = verify(template_data, doc_events)
218
+ reconstructed = reconstruct_file_from_events(
219
+ doc_events, verified_template, document_path=target_document
223
220
  )
221
+ return True, reconstructed, suspicious_events, time_info, ""
222
+ except ValueError as e:
223
+ # If verification fails but we have events, still try to reconstruct
224
+ print(f"Warning: Verification failed for {jsonl_path}: {e}", file=sys.stderr)
225
+ try:
226
+ if not doc_events:
227
+ return False, "", [], time_info, ""
224
228
 
225
- else:
226
- new_fragment = event["newFragment"].replace("\n", "\n ")
229
+ # Compute diff against template and still detect suspicious events
230
+ diff_text = template_diff(template_data, doc_events)
231
+ suspicious_events = detect_external_copypaste(doc_events)
232
+
233
+ # Reconstruct using the initial recorded state
234
+ initial_state = doc_events[0].get("newFragment", "")
235
+ reconstructed = reconstruct_file_from_events(
236
+ doc_events, initial_state, document_path=target_document
237
+ )
238
+ return False, reconstructed, suspicious_events, time_info, diff_text
239
+ except Exception as reconstruction_error:
240
+ print(
241
+ f"Error reconstructing {jsonl_path}: {type(reconstruction_error).__name__}: {reconstruction_error}",
242
+ file=sys.stderr,
243
+ )
244
+ return False, "", [], time_info, ""
245
+ except Exception as e:
227
246
  print(
228
- f" Event #{event['event_index']} ({reason}): "
229
- f"{event['line_count']} lines, {event['char_count']} chars - "
230
- f"newFragment:\n ```\n {new_fragment}\n ```",
247
+ f"Error processing {jsonl_path}: {type(e).__name__}: {e}",
231
248
  file=sys.stderr,
232
249
  )
250
+ return False, "", [], time_info, ""
233
251
 
234
252
 
235
- def display_suspicious_events(
236
- suspicious_events: list[dict[str, Any]], show_details: bool
237
- ) -> None:
253
+ def write_reconstructed_file(
254
+ output_path: Path,
255
+ content: str,
256
+ file_description: str = "Reconstructed code"
257
+ ) -> bool:
238
258
  """
239
- Display all suspicious events or success message.
259
+ Write reconstructed code to a file.
240
260
 
241
261
  Parameters
242
262
  ----------
243
- suspicious_events : list[dict[str, Any]]
244
- List of suspicious events detected
245
- show_details : bool
246
- Whether to show detailed autocomplete events
263
+ output_path : Path
264
+ Path to write to
265
+ content : str
266
+ Content to write
267
+ file_description : str
268
+ Description for success message
269
+
270
+ Returns
271
+ -------
272
+ bool
273
+ True if successful, False otherwise
247
274
  """
248
- if suspicious_events:
249
- print("\nSuspicious events detected:", file=sys.stderr)
250
-
251
- # Sort events by their index for chronological display
252
- def get_sort_key(event: dict[str, Any]) -> int | float:
253
- if "event_indices" in event and event["event_indices"]:
254
- return event["event_indices"][0]
255
- if "detailed_events" in event and event["detailed_events"]:
256
- return event["detailed_events"][0].get("event_index", float("inf"))
257
- event_idx = event.get("event_index", -1)
258
- return event_idx if event_idx >= 0 else float("inf")
259
-
260
- sorted_events = sorted(suspicious_events, key=get_sort_key)
261
-
262
- for event in sorted_events:
263
- display_suspicious_event(event, show_details)
264
- else:
265
- print("Success! No suspicious events detected.", file=sys.stderr)
275
+ try:
276
+ output_path.parent.mkdir(parents=True, exist_ok=True)
277
+ output_path.write_text(content)
278
+ print(f"{file_description} written to: {output_path}", file=sys.stderr)
279
+ return True
280
+ except Exception as e:
281
+ print(f"Error writing output file: {e}", file=sys.stderr)
282
+ return False
266
283
 
267
284
 
268
- def write_json_output(
269
- output_path: Path,
270
- document: str,
271
- time_info: dict[str, Any] | None,
272
- suspicious_events: list[dict[str, Any]],
273
- reconstructed_code: str,
274
- verified: bool,
275
- ) -> None:
285
+ def handle_playback_mode(
286
+ jsonl_file: Path,
287
+ template_file: Path,
288
+ template_data: str,
289
+ document_override: str | None,
290
+ speed: float,
291
+ ) -> int:
276
292
  """
277
- Write verification results to JSON file.
293
+ Handle playback mode for a single file.
278
294
 
279
295
  Parameters
280
296
  ----------
281
- output_path : Path
282
- Path to output JSON file
283
- document : str
284
- Document that was processed
285
- time_info : dict[str, Any] | None
286
- Time information from verification
287
- suspicious_events : list[dict[str, Any]]
288
- List of suspicious events detected
289
- reconstructed_code : str
290
- The reconstructed file content
291
- verified : bool
292
- Whether the file passed verification
297
+ jsonl_file : Path
298
+ Path to the recording file
299
+ template_file : Path
300
+ Path to the template file
301
+ template_data : str
302
+ Template file content
303
+ document_override : str | None
304
+ Document override
305
+ speed : float
306
+ Playback speed
293
307
 
294
- Raises
295
- ------
296
- Exception
297
- If file writing fails
308
+ Returns
309
+ -------
310
+ int
311
+ Exit code (0 for success, 1 for error)
298
312
  """
299
- results = {
300
- "document": document,
301
- "verified": verified,
302
- "time_info": time_info,
303
- "suspicious_events": suspicious_events,
304
- "reconstructed_code": reconstructed_code,
305
- }
313
+ try:
314
+ json_data = load_jsonl(jsonl_file)
315
+ recorded_docs = get_recorded_documents(json_data)
316
+ target_document = resolve_document(recorded_docs, template_file, document_override)
306
317
 
307
- output_path.parent.mkdir(parents=True, exist_ok=True)
308
- with open(output_path, "w") as f:
309
- json.dump(results, f, indent=2)
310
- print(f"Results written to {output_path}", file=sys.stderr)
318
+ if target_document:
319
+ playback_recording(json_data, target_document, template_data, speed)
320
+ return 0
321
+ else:
322
+ print("Error: No documents found in recording", file=sys.stderr)
323
+ return 1
324
+ except Exception as e:
325
+ print(f"Error loading file for playback: {e}", file=sys.stderr)
326
+ return 1
311
327
 
312
328
 
313
- def playback_recording(
314
- json_data: tuple[dict[str, Any], ...],
315
- document: str,
316
- template: str,
317
- speed: float = 1.0,
318
- ) -> None:
329
+ def process_batch(
330
+ jsonl_files: list[Path],
331
+ template_base: Path | None,
332
+ template_data: str,
333
+ args: argparse.Namespace,
334
+ ) -> tuple[list[dict[str, Any]], bool]:
319
335
  """
320
- Play back a recording, showing the code evolving in real-time.
336
+ Process multiple recording files in batch mode.
321
337
 
322
338
  Parameters
323
339
  ----------
324
- json_data : tuple[dict[str, Any], ...]
325
- The recording events
326
- document : str
327
- The document to play back
328
- template : str
329
- The initial template content
330
- speed : float
331
- Playback speed multiplier (1.0 = real-time, 2.0 = 2x speed, 0.5 = half speed)
340
+ jsonl_files : list[Path]
341
+ List of JSONL files to process
342
+ template_base : Path
343
+ Path to template file or directory
344
+ template_data : str
345
+ Template file content
346
+ args : argparse.Namespace
347
+ Command-line arguments
348
+
349
+ Returns
350
+ -------
351
+ tuple
352
+ (results, all_verified)
332
353
  """
333
- # Filter events for the target document
334
- doc_events = [e for e in json_data if e.get("document") == document]
335
-
336
- if not doc_events:
337
- print(f"No events found for document: {document}", file=sys.stderr)
338
- return
339
-
340
- # Start with template
341
- current_content = template
342
- last_timestamp = None
343
-
344
- def clear_screen():
345
- """Clear the terminal screen."""
346
- os.system('cls' if os.name == 'nt' else 'clear')
347
-
348
- def parse_timestamp(ts_str: str) -> datetime:
349
- """Parse ISO timestamp string."""
350
- return datetime.fromisoformat(ts_str.replace("Z", "+00:00"))
351
-
352
- # Show initial template
353
- clear_screen()
354
- print(f"=" * 80)
355
- print(f"PLAYBACK: {document} (Speed: {speed}x)")
356
- print(f"Event 0 / {len(doc_events)} - Initial Template")
357
- print(f"=" * 80)
358
- print(current_content)
359
- print(f"\n{'=' * 80}")
360
- print("Press Ctrl+C to stop playback")
361
- time.sleep(2.0 / speed)
354
+ results = []
355
+ all_verified = True
356
+ output_dir = args.output_dir or (
357
+ args.output_file if args.output_file and args.output_file.is_dir() else None
358
+ )
362
359
 
363
- try:
364
- for idx, event in enumerate(doc_events, 1):
365
- old_frag = event.get("oldFragment", "")
366
- new_frag = event.get("newFragment", "")
367
- offset = event.get("offset", 0)
368
- timestamp = event.get("timestamp")
369
-
370
- # Calculate delay based on timestamp difference
371
- if last_timestamp and timestamp:
372
- try:
373
- ts1 = parse_timestamp(last_timestamp)
374
- ts2 = parse_timestamp(timestamp)
375
- delay = (ts2 - ts1).total_seconds() / speed
376
- # Cap delay at 5 seconds for very long pauses
377
- delay = min(delay, 5.0)
378
- if delay > 0:
379
- time.sleep(delay)
380
- except (ValueError, KeyError):
381
- time.sleep(0.1 / speed)
360
+ for i, jsonl_file in enumerate(jsonl_files, 1):
361
+ print_batch_header(i, len(jsonl_files), jsonl_file.name)
362
+
363
+ # Determine target document for this file
364
+ try:
365
+ file_data = load_jsonl(jsonl_file)
366
+ recorded_docs = get_recorded_documents(file_data)
367
+ target_document = resolve_document(recorded_docs, template_base, args.document)
368
+ except (FileNotFoundError, ValueError, IOError) as e:
369
+ print(f"Error determining document: {e}", file=sys.stderr)
370
+ all_verified = False
371
+ continue
372
+
373
+ # If using template directory, find the matching template for this document
374
+ if args.template_dir and target_document:
375
+ matching_template_path = find_matching_template(args.template_dir, target_document)
376
+ if matching_template_path:
377
+ file_template_data = matching_template_path.read_text()
378
+ print(f"Using template: {matching_template_path.name}", file=sys.stderr)
382
379
  else:
383
- time.sleep(0.1 / speed)
380
+ file_template_data = ""
381
+ print(
382
+ f"Warning: No matching template found for {target_document}. "
383
+ "Reconstruction will proceed without template verification.",
384
+ file=sys.stderr
385
+ )
386
+ else:
387
+ file_template_data = template_data
384
388
 
385
- last_timestamp = timestamp
389
+ # Process the file
390
+ verified, reconstructed, suspicious_events, time_info, diff_text = process_single_file(
391
+ jsonl_file, file_template_data, target_document, args.time_limit
392
+ )
386
393
 
387
- # Apply the edit
388
- if new_frag != old_frag:
389
- current_content = current_content[:offset] + new_frag + current_content[offset + len(old_frag):]
394
+ if not verified:
395
+ all_verified = False
390
396
 
391
- # Display current state
392
- clear_screen()
393
- print(f"=" * 80)
394
- print(f"PLAYBACK: {document} (Speed: {speed}x)")
395
- print(f"Event {idx} / {len(doc_events)} - {timestamp or 'unknown time'}")
397
+ # Display results
398
+ display_time_info(time_info)
399
+ display_suspicious_events(suspicious_events, args.show_autocomplete_details)
400
+ display_template_diff(diff_text)
401
+
402
+ # Store results
403
+ results.append({
404
+ "jsonl_file": jsonl_file,
405
+ "target_document": target_document,
406
+ "verified": verified,
407
+ "reconstructed": reconstructed,
408
+ "suspicious_events": suspicious_events,
409
+ "time_info": time_info,
410
+ "template_diff": diff_text,
411
+ })
412
+
413
+ # Write output file if requested
414
+ if reconstructed and output_dir:
415
+ output_name = jsonl_file.stem.replace(".recording.jsonl", "") + ".py"
416
+ output_path = output_dir / output_name
417
+ write_reconstructed_file(output_path, reconstructed, "Written to")
418
+
419
+ return results, all_verified
420
+
421
+
422
+ def process_single(
423
+ jsonl_file: Path,
424
+ template_base: Path | None,
425
+ template_data: str,
426
+ args: argparse.Namespace,
427
+ ) -> tuple[list[dict[str, Any]], bool]:
428
+ """
429
+ Process a single recording file.
396
430
 
397
- # Show what changed
398
- if new_frag != old_frag:
399
- change_type = "INSERT" if not old_frag else ("DELETE" if not new_frag else "REPLACE")
400
- print(f"Action: {change_type} at offset {offset} ({len(new_frag)} chars)")
431
+ Parameters
432
+ ----------
433
+ jsonl_file : Path
434
+ Path to JSONL file
435
+ template_base : Path
436
+ Path to template file or directory
437
+ template_data : str
438
+ Template file content
439
+ args : argparse.Namespace
440
+ Command-line arguments
401
441
 
402
- print(f"=" * 80)
403
- print(current_content)
404
- print(f"\n{'=' * 80}")
405
- print(f"Progress: [{('#' * (idx * 40 // len(doc_events))).ljust(40)}] {idx}/{len(doc_events)}")
406
- print("Press Ctrl+C to stop playback")
442
+ Returns
443
+ -------
444
+ tuple
445
+ (results, verified)
446
+ """
447
+ try:
448
+ file_data = load_jsonl(jsonl_file)
449
+ recorded_docs = get_recorded_documents(file_data)
450
+ target_document = resolve_document(recorded_docs, template_base, args.document)
451
+ except (FileNotFoundError, ValueError, IOError) as e:
452
+ print(f"Error determining document: {e}", file=sys.stderr)
453
+ return [], False
454
+
455
+ # If using template directory, find the matching template for this document
456
+ if args.template_dir and target_document:
457
+ matching_template_path = find_matching_template(args.template_dir, target_document)
458
+ if matching_template_path:
459
+ file_template_data = matching_template_path.read_text()
460
+ print(f"Using template: {matching_template_path.name}", file=sys.stderr)
461
+ else:
462
+ file_template_data = ""
463
+ print(
464
+ f"Warning: No matching template found for {target_document}. "
465
+ "Reconstruction will proceed without template verification.",
466
+ file=sys.stderr
467
+ )
468
+ else:
469
+ file_template_data = template_data
407
470
 
408
- except KeyboardInterrupt:
409
- print("\n\nPlayback stopped by user.", file=sys.stderr)
410
- return
471
+ print(f"Processing: {target_document or template_base}", file=sys.stderr)
411
472
 
412
- # Final summary
413
- print("\n\nPlayback complete!", file=sys.stderr)
414
- print(f"Total events: {len(doc_events)}", file=sys.stderr)
473
+ verified, reconstructed, suspicious_events, time_info, diff_text = process_single_file(
474
+ jsonl_file, file_template_data, target_document, args.time_limit
475
+ )
415
476
 
477
+ # Display results
478
+ display_time_info(time_info)
479
+ display_suspicious_events(suspicious_events, args.show_autocomplete_details)
480
+ display_template_diff(diff_text)
416
481
 
417
- def create_parser() -> argparse.ArgumentParser:
418
- """
419
- Create and configure the argument parser.
482
+ # Write output file if requested
483
+ if reconstructed and args.output_file:
484
+ if not write_reconstructed_file(args.output_file, reconstructed):
485
+ return [], False
420
486
 
421
- Returns
422
- -------
423
- argparse.ArgumentParser
424
- Configured argument parser
425
- """
426
- parser = argparse.ArgumentParser(
427
- description="Process and verify code recorder JSONL files"
428
- )
429
- parser.add_argument(
430
- "jsonl_file",
431
- type=Path,
432
- help="Path to the compressed JSONL file (*.recording.jsonl.gz)",
433
- )
434
- parser.add_argument(
435
- "template_file",
436
- type=Path,
437
- help="Path to the initial template file that was recorded",
438
- )
439
- parser.add_argument(
440
- "-t",
441
- "--time-limit",
442
- type=int,
443
- default=None,
444
- help="Maximum allowed time in minutes between first and last edit. "
445
- "If exceeded, recording is flagged.",
446
- )
447
- parser.add_argument(
448
- "-d",
449
- "--document",
450
- type=str,
451
- default=None,
452
- help="Document path or filename to process from the recording. "
453
- "Defaults to the document whose extension matches the template file.",
454
- )
455
- parser.add_argument(
456
- "-o",
457
- "--output-json",
458
- type=Path,
459
- default=None,
460
- help="Path to output JSON file with verification results "
461
- "(time info and suspicious events).",
462
- )
463
- parser.add_argument(
464
- "-s",
465
- "--show-autocomplete-details",
466
- action="store_true",
467
- help="Show individual auto-complete events in addition to "
468
- "aggregate statistics",
469
- )
470
- parser.add_argument(
471
- "-q",
472
- "--quiet",
473
- action="store_true",
474
- help="Suppress output of reconstructed code to stdout",
475
- )
476
- parser.add_argument(
477
- "-p",
478
- "--playback",
479
- action="store_true",
480
- help="Play back the recording in real-time, showing code evolution",
481
- )
482
- parser.add_argument(
483
- "--playback-speed",
484
- type=float,
485
- default=1.0,
486
- help="Playback speed multiplier (1.0 = real-time, 2.0 = 2x speed, 0.5 = half speed)",
487
- )
488
- return parser
487
+ results = [{
488
+ "jsonl_file": jsonl_file,
489
+ "target_document": target_document,
490
+ "verified": verified,
491
+ "reconstructed": reconstructed,
492
+ "suspicious_events": suspicious_events,
493
+ "time_info": time_info,
494
+ "template_diff": diff_text,
495
+ }]
496
+
497
+ return results, verified
489
498
 
490
499
 
491
500
  def main() -> int:
@@ -500,112 +509,123 @@ def main() -> int:
500
509
  parser = create_parser()
501
510
  args = parser.parse_args()
502
511
 
503
- # Load JSONL file
512
+ # Parse files argument: last one may be template_file if it's not a JSONL file
513
+ files_list = args.files
514
+ template_file = None
515
+ jsonl_patterns = files_list
516
+
517
+ # If we have more than one file and the last one doesn't look like a JSONL file,
518
+ # treat it as the template file
519
+ if len(files_list) > 1 and not files_list[-1].endswith(('.jsonl', '.jsonl.gz')):
520
+ template_file = Path(files_list[-1])
521
+ jsonl_patterns = files_list[:-1]
522
+
523
+ # Validate that at least one of template_file or template_dir is provided
524
+ if not template_file and not args.template_dir:
525
+ print("Error: Either a template file or --template-dir must be provided", file=sys.stderr)
526
+ parser.print_help()
527
+ return 1
528
+
529
+ # Expand file patterns and validate
504
530
  try:
505
- json_data = load_jsonl(args.jsonl_file)
531
+ jsonl_files = expand_file_patterns(jsonl_patterns)
506
532
  except FileNotFoundError as e:
507
533
  print(f"Error: {e}", file=sys.stderr)
508
534
  return 1
509
- except (ValueError, IOError) as e:
510
- print(f"Error loading JSONL file: {e}", file=sys.stderr)
511
- return 1
512
535
 
513
- # Resolve which document to process
514
- recorded_docs = get_recorded_documents(json_data)
515
- try:
516
- target_document = resolve_document(
517
- recorded_docs, args.template_file, args.document
518
- )
519
- except ValueError as e:
520
- print(f"Error determining document: {e}", file=sys.stderr)
521
- return 1
536
+ batch_mode = len(jsonl_files) > 1
537
+ if batch_mode:
538
+ print(f"Processing {len(jsonl_files)} recording files in batch mode", file=sys.stderr)
522
539
 
523
- # Handle playback mode
524
- if args.playback:
540
+ # Determine template source (use template_dir if provided, otherwise template_file)
541
+ template_path = args.template_dir if args.template_dir else template_file
542
+
543
+ # Handle playback mode (single file only)
544
+ if not batch_mode and args.playback:
525
545
  try:
526
- template_content = args.template_file.read_text()
527
- except FileNotFoundError:
528
- print(f"Error: Template file not found: {args.template_file}", file=sys.stderr)
529
- return 1
546
+ json_data = load_jsonl(jsonl_files[0])
547
+ recorded_docs = get_recorded_documents(json_data)
548
+ target_document = resolve_document(recorded_docs, template_path, args.document)
549
+
550
+ # Get template data for playback
551
+ template_data, _ = resolve_template_file(
552
+ template_file if not args.template_dir else None,
553
+ args.template_dir,
554
+ target_document
555
+ )
530
556
 
531
- if target_document:
532
- playback_recording(json_data, target_document, template_content, args.playback_speed)
533
- return 0
534
- else:
535
- print("Error: No documents found in recording", file=sys.stderr)
557
+ if target_document:
558
+ playback_recording(json_data, target_document, template_data, args.playback_speed)
559
+ return 0
560
+ else:
561
+ print("Error: No documents found in recording", file=sys.stderr)
562
+ return 1
563
+ except Exception as e:
564
+ print(f"Error loading file for playback: {e}", file=sys.stderr)
536
565
  return 1
537
566
 
538
- # Filter events for target document
539
- doc_events = filter_events_by_document(json_data, target_document)
540
- if target_document and not doc_events:
541
- print(
542
- f"Error: No events found for document '{target_document}'",
543
- file=sys.stderr,
544
- )
545
- return 1
546
-
547
- print(f"Processing: {target_document or args.template_file}", file=sys.stderr)
548
-
549
- # Read template file
567
+ # Get template data
550
568
  try:
551
- template_data = args.template_file.read_text()
552
- except FileNotFoundError:
553
- print(
554
- f"Error: Template file not found: {args.template_file}", file=sys.stderr
555
- )
556
- return 1
557
- except Exception as e:
558
- print(f"Error reading template file: {e}", file=sys.stderr)
569
+ # If using a template directory, skip loading a global template here
570
+ # Let per-file matching handle it in process_batch/process_single
571
+ if args.template_dir:
572
+ template_data = ""
573
+ else:
574
+ template_data, _ = resolve_template_file(
575
+ template_file if not args.template_dir else None,
576
+ None,
577
+ None
578
+ )
579
+ except (FileNotFoundError, ValueError) as e:
580
+ print(f"Error: {e}", file=sys.stderr)
559
581
  return 1
560
582
 
561
- # Check and display time information
562
- time_info = check_time_limit(doc_events, args.time_limit)
563
- display_time_info(time_info)
564
-
565
- # Verify and process the recording
566
- verified = False
567
- reconstructed = ""
568
- suspicious_events = []
569
- try:
570
- template_data, suspicious_events = verify(template_data, doc_events)
571
- reconstructed = reconstruct_file_from_events(
572
- doc_events, template_data, document_path=target_document
583
+ # Process files
584
+ if batch_mode:
585
+ results, all_verified = process_batch(
586
+ jsonl_files, template_path, template_data, args
587
+ )
588
+ else:
589
+ results, all_verified = process_single(
590
+ jsonl_files[0], template_path, template_data, args
573
591
  )
574
- verified = True
575
- if not args.quiet:
576
- print(reconstructed)
577
-
578
- # Display suspicious events
579
- display_suspicious_events(suspicious_events, args.show_autocomplete_details)
580
592
 
581
- except ValueError as e:
582
- print("File failed verification from template!", file=sys.stderr)
583
- print(str(e), file=sys.stderr)
584
- try:
585
- print(template_diff(template_data, doc_events), file=sys.stderr)
586
- except Exception:
587
- pass
588
- verified = False
589
- except Exception as e:
590
- print(f"Error processing file: {type(e).__name__}: {e}", file=sys.stderr)
591
- verified = False
593
+ if not results:
594
+ return 1
592
595
 
593
- # Write JSON output to file if requested
594
- if args.output_json:
595
- try:
596
- write_json_output(
597
- args.output_json,
598
- target_document or str(args.template_file),
599
- time_info,
600
- suspicious_events,
601
- reconstructed,
602
- verified,
603
- )
604
- except Exception as e:
605
- print(f"Error writing JSON output: {e}", file=sys.stderr)
606
- return 1
596
+ # Output summary and combined report for batch mode
597
+ if batch_mode:
598
+ failed_files = [r["jsonl_file"].name for r in results if not r["verified"]]
599
+ verified_count = len(results) - len(failed_files)
600
+ print_batch_summary(len(results), verified_count, failed_files)
601
+
602
+ # Display combined time report
603
+ time_infos = [r["time_info"] for r in results]
604
+ combined_time = None
605
+ if any(time_infos):
606
+ combined_time = combine_time_info(time_infos, args.time_limit)
607
+ display_time_info(combined_time, is_combined=True)
608
+
609
+ # Write JSON output
610
+ if args.output_json:
611
+ try:
612
+ write_batch_json_output(
613
+ args.output_json, results, combined_time, all_verified, batch_mode=True
614
+ )
615
+ except Exception as e:
616
+ print(f"Error writing batch JSON output: {e}", file=sys.stderr)
617
+ else:
618
+ # Single file mode - write JSON output
619
+ if args.output_json:
620
+ try:
621
+ write_batch_json_output(
622
+ args.output_json, results, results[0]["time_info"],
623
+ results[0]["verified"], batch_mode=False
624
+ )
625
+ except Exception as e:
626
+ print(f"Error writing JSON output: {e}", file=sys.stderr)
607
627
 
608
- return 0 if verified else 1
628
+ return 0 if all_verified else 1
609
629
 
610
630
 
611
631
  if __name__ == "__main__":