cr-proc 0.1.7__py3-none-any.whl → 0.1.9__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- code_recorder_processor/api/build.py +6 -0
- code_recorder_processor/api/document.py +300 -0
- code_recorder_processor/api/load.py +58 -0
- code_recorder_processor/api/output.py +70 -0
- code_recorder_processor/api/verify.py +293 -83
- code_recorder_processor/cli.py +523 -349
- code_recorder_processor/display.py +201 -0
- code_recorder_processor/playback.py +116 -0
- cr_proc-0.1.9.dist-info/METADATA +280 -0
- cr_proc-0.1.9.dist-info/RECORD +13 -0
- cr_proc-0.1.7.dist-info/METADATA +0 -142
- cr_proc-0.1.7.dist-info/RECORD +0 -9
- {cr_proc-0.1.7.dist-info → cr_proc-0.1.9.dist-info}/WHEEL +0 -0
- {cr_proc-0.1.7.dist-info → cr_proc-0.1.9.dist-info}/entry_points.txt +0 -0
code_recorder_processor/cli.py
CHANGED
|
@@ -1,359 +1,500 @@
|
|
|
1
|
+
"""Command-line interface for code recorder processor."""
|
|
1
2
|
import argparse
|
|
2
|
-
import
|
|
3
|
+
import glob
|
|
3
4
|
import sys
|
|
4
|
-
from datetime import datetime
|
|
5
5
|
from pathlib import Path
|
|
6
6
|
from typing import Any
|
|
7
7
|
|
|
8
8
|
from .api.build import reconstruct_file_from_events
|
|
9
|
+
from .api.document import (
|
|
10
|
+
filter_events_by_document,
|
|
11
|
+
get_recorded_documents,
|
|
12
|
+
resolve_document,
|
|
13
|
+
resolve_template_file,
|
|
14
|
+
find_matching_template,
|
|
15
|
+
)
|
|
9
16
|
from .api.load import load_jsonl
|
|
10
|
-
from .api.
|
|
17
|
+
from .api.output import write_batch_json_output
|
|
18
|
+
from .api.verify import (
|
|
19
|
+
check_time_limit,
|
|
20
|
+
combine_time_info,
|
|
21
|
+
detect_external_copypaste,
|
|
22
|
+
template_diff,
|
|
23
|
+
verify,
|
|
24
|
+
)
|
|
25
|
+
from .display import (
|
|
26
|
+
display_suspicious_events,
|
|
27
|
+
display_template_diff,
|
|
28
|
+
display_time_info,
|
|
29
|
+
print_batch_header,
|
|
30
|
+
print_batch_summary,
|
|
31
|
+
)
|
|
32
|
+
from .playback import playback_recording
|
|
11
33
|
|
|
12
34
|
|
|
13
|
-
def
|
|
14
|
-
|
|
15
|
-
|
|
35
|
+
def create_parser() -> argparse.ArgumentParser:
|
|
36
|
+
"""
|
|
37
|
+
Create and configure the argument parser.
|
|
38
|
+
|
|
39
|
+
Returns
|
|
40
|
+
-------
|
|
41
|
+
argparse.ArgumentParser
|
|
42
|
+
Configured argument parser
|
|
43
|
+
"""
|
|
44
|
+
parser = argparse.ArgumentParser(
|
|
45
|
+
description="Process and verify code recorder JSONL files"
|
|
46
|
+
)
|
|
47
|
+
parser.add_argument(
|
|
48
|
+
"files",
|
|
49
|
+
type=str,
|
|
50
|
+
nargs="+",
|
|
51
|
+
help="Path(s) to JSONL file(s) and optionally a template file. "
|
|
52
|
+
"JSONL files: compressed JSONL file(s) (*.recording.jsonl.gz). "
|
|
53
|
+
"Supports glob patterns like 'recordings/*.jsonl.gz'. "
|
|
54
|
+
"Template file (optional last positional): template file path. "
|
|
55
|
+
"Omit to use --template-dir instead.",
|
|
56
|
+
)
|
|
57
|
+
parser.add_argument(
|
|
58
|
+
"--template-dir",
|
|
59
|
+
type=Path,
|
|
60
|
+
default=None,
|
|
61
|
+
help="Directory containing template files (overrides positional template file). "
|
|
62
|
+
"Will search for files matching the document name. "
|
|
63
|
+
"If no match found, reconstruction proceeds with warning.",
|
|
64
|
+
)
|
|
65
|
+
parser.add_argument(
|
|
66
|
+
"-t",
|
|
67
|
+
"--time-limit",
|
|
68
|
+
type=int,
|
|
69
|
+
default=None,
|
|
70
|
+
help="Maximum allowed time in minutes between first and last edit. "
|
|
71
|
+
"If exceeded, recording is flagged. Applied individually to each recording file.",
|
|
72
|
+
)
|
|
73
|
+
parser.add_argument(
|
|
74
|
+
"-d",
|
|
75
|
+
"--document",
|
|
76
|
+
type=str,
|
|
77
|
+
default=None,
|
|
78
|
+
help="Document path or filename to process from the recording. "
|
|
79
|
+
"Defaults to the document whose extension matches the template file.",
|
|
80
|
+
)
|
|
81
|
+
parser.add_argument(
|
|
82
|
+
"-o",
|
|
83
|
+
"--output-json",
|
|
84
|
+
type=Path,
|
|
85
|
+
default=None,
|
|
86
|
+
help="Path to output JSON file with verification results. "
|
|
87
|
+
"Uses consistent format for both single and batch modes, with batch_mode flag. "
|
|
88
|
+
"In batch mode, includes combined_time_info across all files.",
|
|
89
|
+
)
|
|
90
|
+
parser.add_argument(
|
|
91
|
+
"-f",
|
|
92
|
+
"--output-file",
|
|
93
|
+
type=Path,
|
|
94
|
+
default=None,
|
|
95
|
+
help="Write reconstructed code to specified file instead of stdout. "
|
|
96
|
+
"In batch mode, this should be a directory where files will be named after the input files.",
|
|
97
|
+
)
|
|
98
|
+
parser.add_argument(
|
|
99
|
+
"--output-dir",
|
|
100
|
+
type=Path,
|
|
101
|
+
default=None,
|
|
102
|
+
help="Directory to write reconstructed code files in batch mode (one file per recording). "
|
|
103
|
+
"Files are named based on input recording filenames.",
|
|
104
|
+
)
|
|
105
|
+
parser.add_argument(
|
|
106
|
+
"-s",
|
|
107
|
+
"--show-autocomplete-details",
|
|
108
|
+
action="store_true",
|
|
109
|
+
help="Show individual auto-complete events in addition to "
|
|
110
|
+
"aggregate statistics",
|
|
111
|
+
)
|
|
112
|
+
parser.add_argument(
|
|
113
|
+
"-p",
|
|
114
|
+
"--playback",
|
|
115
|
+
action="store_true",
|
|
116
|
+
help="Play back the recording in real-time, showing code evolution",
|
|
117
|
+
)
|
|
118
|
+
parser.add_argument(
|
|
119
|
+
"--playback-speed",
|
|
120
|
+
type=float,
|
|
121
|
+
default=1.0,
|
|
122
|
+
help="Playback speed multiplier (1.0 = real-time, 2.0 = 2x speed, 0.5 = half speed)",
|
|
123
|
+
)
|
|
124
|
+
return parser
|
|
125
|
+
|
|
126
|
+
|
|
127
|
+
def expand_file_patterns(patterns: list[str]) -> list[Path]:
|
|
16
128
|
"""
|
|
17
|
-
|
|
129
|
+
Expand glob patterns and validate files exist.
|
|
18
130
|
|
|
19
131
|
Parameters
|
|
20
132
|
----------
|
|
21
|
-
|
|
22
|
-
List of
|
|
23
|
-
template_path : Path
|
|
24
|
-
Path to the template file
|
|
25
|
-
override : str | None
|
|
26
|
-
Explicit document name or path override
|
|
133
|
+
patterns : list[str]
|
|
134
|
+
List of file paths or glob patterns
|
|
27
135
|
|
|
28
136
|
Returns
|
|
29
137
|
-------
|
|
30
|
-
|
|
31
|
-
|
|
138
|
+
list[Path]
|
|
139
|
+
List of existing file paths
|
|
32
140
|
|
|
33
141
|
Raises
|
|
34
142
|
------
|
|
35
|
-
|
|
36
|
-
If
|
|
143
|
+
FileNotFoundError
|
|
144
|
+
If no files are found
|
|
37
145
|
"""
|
|
38
|
-
|
|
39
|
-
|
|
40
|
-
|
|
41
|
-
|
|
42
|
-
|
|
43
|
-
|
|
44
|
-
|
|
45
|
-
|
|
46
|
-
|
|
47
|
-
|
|
48
|
-
|
|
49
|
-
|
|
50
|
-
|
|
51
|
-
|
|
52
|
-
|
|
53
|
-
|
|
54
|
-
|
|
55
|
-
|
|
56
|
-
|
|
57
|
-
|
|
58
|
-
|
|
59
|
-
|
|
60
|
-
|
|
61
|
-
|
|
62
|
-
|
|
63
|
-
|
|
64
|
-
|
|
65
|
-
|
|
66
|
-
|
|
67
|
-
|
|
68
|
-
|
|
69
|
-
|
|
70
|
-
f"Available documents: {docs}"
|
|
71
|
-
)
|
|
72
|
-
|
|
73
|
-
|
|
74
|
-
def get_recorded_documents(events: tuple[dict[str, Any], ...]) -> list[str]:
|
|
146
|
+
jsonl_files = []
|
|
147
|
+
for pattern in patterns:
|
|
148
|
+
expanded = glob.glob(pattern)
|
|
149
|
+
if expanded:
|
|
150
|
+
jsonl_files.extend([Path(f) for f in expanded])
|
|
151
|
+
else:
|
|
152
|
+
# If no glob match, treat as literal path
|
|
153
|
+
jsonl_files.append(Path(pattern))
|
|
154
|
+
|
|
155
|
+
if not jsonl_files:
|
|
156
|
+
raise FileNotFoundError("No JSONL files found")
|
|
157
|
+
|
|
158
|
+
# Check if files exist
|
|
159
|
+
existing_files = [f for f in jsonl_files if f.exists()]
|
|
160
|
+
if not existing_files:
|
|
161
|
+
raise FileNotFoundError("None of the specified files exist")
|
|
162
|
+
|
|
163
|
+
# Warn about missing files
|
|
164
|
+
if len(existing_files) < len(jsonl_files):
|
|
165
|
+
missing = [f for f in jsonl_files if f not in existing_files]
|
|
166
|
+
for f in missing:
|
|
167
|
+
print(f"Warning: File not found: {f}", file=sys.stderr)
|
|
168
|
+
|
|
169
|
+
return existing_files
|
|
170
|
+
|
|
171
|
+
|
|
172
|
+
def process_single_file(
|
|
173
|
+
jsonl_path: Path,
|
|
174
|
+
template_data: str,
|
|
175
|
+
target_document: str | None,
|
|
176
|
+
time_limit: int | None,
|
|
177
|
+
) -> tuple[bool, str, list[dict[str, Any]], dict[str, Any] | None, str]:
|
|
75
178
|
"""
|
|
76
|
-
|
|
179
|
+
Process a single JSONL recording file.
|
|
77
180
|
|
|
78
181
|
Parameters
|
|
79
182
|
----------
|
|
80
|
-
|
|
81
|
-
|
|
183
|
+
jsonl_path : Path
|
|
184
|
+
Path to the JSONL file
|
|
185
|
+
template_data : str
|
|
186
|
+
Template file content
|
|
187
|
+
target_document : str | None
|
|
188
|
+
Document to process
|
|
189
|
+
time_limit : int | None
|
|
190
|
+
Time limit in minutes
|
|
82
191
|
|
|
83
192
|
Returns
|
|
84
193
|
-------
|
|
85
|
-
|
|
86
|
-
|
|
194
|
+
tuple
|
|
195
|
+
(verified, reconstructed_code, suspicious_events, time_info, template_diff_text)
|
|
87
196
|
"""
|
|
88
|
-
|
|
89
|
-
|
|
90
|
-
|
|
91
|
-
|
|
92
|
-
|
|
93
|
-
|
|
197
|
+
try:
|
|
198
|
+
json_data = load_jsonl(jsonl_path)
|
|
199
|
+
except (FileNotFoundError, ValueError, IOError) as e:
|
|
200
|
+
print(f"Error loading {jsonl_path}: {e}", file=sys.stderr)
|
|
201
|
+
return False, "", [], None, ""
|
|
202
|
+
|
|
203
|
+
# Filter events for target document
|
|
204
|
+
doc_events = filter_events_by_document(json_data, target_document)
|
|
205
|
+
if target_document and not doc_events:
|
|
206
|
+
print(
|
|
207
|
+
f"Warning: No events found for document '{target_document}' in {jsonl_path}",
|
|
208
|
+
file=sys.stderr,
|
|
209
|
+
)
|
|
210
|
+
return False, "", [], None, ""
|
|
211
|
+
|
|
212
|
+
# Check time information
|
|
213
|
+
time_info = check_time_limit(doc_events, time_limit)
|
|
214
|
+
|
|
215
|
+
# Verify and process the recording
|
|
216
|
+
try:
|
|
217
|
+
verified_template, suspicious_events = verify(template_data, doc_events)
|
|
218
|
+
reconstructed = reconstruct_file_from_events(
|
|
219
|
+
doc_events, verified_template, document_path=target_document
|
|
220
|
+
)
|
|
221
|
+
return True, reconstructed, suspicious_events, time_info, ""
|
|
222
|
+
except ValueError as e:
|
|
223
|
+
# If verification fails but we have events, still try to reconstruct
|
|
224
|
+
print(f"Warning: Verification failed for {jsonl_path}: {e}", file=sys.stderr)
|
|
225
|
+
try:
|
|
226
|
+
if not doc_events:
|
|
227
|
+
return False, "", [], time_info, ""
|
|
228
|
+
|
|
229
|
+
# Compute diff against template and still detect suspicious events
|
|
230
|
+
diff_text = template_diff(template_data, doc_events)
|
|
231
|
+
suspicious_events = detect_external_copypaste(doc_events)
|
|
94
232
|
|
|
233
|
+
# Reconstruct using the initial recorded state
|
|
234
|
+
initial_state = doc_events[0].get("newFragment", "")
|
|
235
|
+
reconstructed = reconstruct_file_from_events(
|
|
236
|
+
doc_events, initial_state, document_path=target_document
|
|
237
|
+
)
|
|
238
|
+
return False, reconstructed, suspicious_events, time_info, diff_text
|
|
239
|
+
except Exception as reconstruction_error:
|
|
240
|
+
print(
|
|
241
|
+
f"Error reconstructing {jsonl_path}: {type(reconstruction_error).__name__}: {reconstruction_error}",
|
|
242
|
+
file=sys.stderr,
|
|
243
|
+
)
|
|
244
|
+
return False, "", [], time_info, ""
|
|
245
|
+
except Exception as e:
|
|
246
|
+
print(
|
|
247
|
+
f"Error processing {jsonl_path}: {type(e).__name__}: {e}",
|
|
248
|
+
file=sys.stderr,
|
|
249
|
+
)
|
|
250
|
+
return False, "", [], time_info, ""
|
|
95
251
|
|
|
96
|
-
|
|
97
|
-
|
|
98
|
-
|
|
252
|
+
|
|
253
|
+
def write_reconstructed_file(
|
|
254
|
+
output_path: Path,
|
|
255
|
+
content: str,
|
|
256
|
+
file_description: str = "Reconstructed code"
|
|
257
|
+
) -> bool:
|
|
99
258
|
"""
|
|
100
|
-
|
|
259
|
+
Write reconstructed code to a file.
|
|
101
260
|
|
|
102
261
|
Parameters
|
|
103
262
|
----------
|
|
104
|
-
|
|
105
|
-
|
|
106
|
-
|
|
107
|
-
|
|
263
|
+
output_path : Path
|
|
264
|
+
Path to write to
|
|
265
|
+
content : str
|
|
266
|
+
Content to write
|
|
267
|
+
file_description : str
|
|
268
|
+
Description for success message
|
|
108
269
|
|
|
109
270
|
Returns
|
|
110
271
|
-------
|
|
111
|
-
|
|
112
|
-
|
|
272
|
+
bool
|
|
273
|
+
True if successful, False otherwise
|
|
113
274
|
"""
|
|
114
|
-
|
|
115
|
-
|
|
116
|
-
|
|
275
|
+
try:
|
|
276
|
+
output_path.parent.mkdir(parents=True, exist_ok=True)
|
|
277
|
+
output_path.write_text(content)
|
|
278
|
+
print(f"{file_description} written to: {output_path}", file=sys.stderr)
|
|
279
|
+
return True
|
|
280
|
+
except Exception as e:
|
|
281
|
+
print(f"Error writing output file: {e}", file=sys.stderr)
|
|
282
|
+
return False
|
|
117
283
|
|
|
118
284
|
|
|
119
|
-
def
|
|
285
|
+
def handle_playback_mode(
|
|
286
|
+
jsonl_file: Path,
|
|
287
|
+
template_file: Path,
|
|
288
|
+
template_data: str,
|
|
289
|
+
document_override: str | None,
|
|
290
|
+
speed: float,
|
|
291
|
+
) -> int:
|
|
120
292
|
"""
|
|
121
|
-
|
|
293
|
+
Handle playback mode for a single file.
|
|
122
294
|
|
|
123
295
|
Parameters
|
|
124
296
|
----------
|
|
125
|
-
|
|
126
|
-
|
|
127
|
-
|
|
128
|
-
|
|
129
|
-
|
|
130
|
-
|
|
131
|
-
|
|
132
|
-
|
|
133
|
-
|
|
134
|
-
|
|
135
|
-
|
|
136
|
-
first_ts = datetime.fromisoformat(
|
|
137
|
-
time_info["first_timestamp"].replace("Z", "+00:00")
|
|
138
|
-
)
|
|
139
|
-
last_ts = datetime.fromisoformat(
|
|
140
|
-
time_info["last_timestamp"].replace("Z", "+00:00")
|
|
141
|
-
)
|
|
142
|
-
time_span = (last_ts - first_ts).total_seconds() / 60
|
|
143
|
-
|
|
144
|
-
print(f"Time span (first to last edit): {time_span:.2f} minutes", file=sys.stderr)
|
|
297
|
+
jsonl_file : Path
|
|
298
|
+
Path to the recording file
|
|
299
|
+
template_file : Path
|
|
300
|
+
Path to the template file
|
|
301
|
+
template_data : str
|
|
302
|
+
Template file content
|
|
303
|
+
document_override : str | None
|
|
304
|
+
Document override
|
|
305
|
+
speed : float
|
|
306
|
+
Playback speed
|
|
145
307
|
|
|
146
|
-
|
|
147
|
-
|
|
148
|
-
|
|
149
|
-
|
|
150
|
-
|
|
308
|
+
Returns
|
|
309
|
+
-------
|
|
310
|
+
int
|
|
311
|
+
Exit code (0 for success, 1 for error)
|
|
312
|
+
"""
|
|
313
|
+
try:
|
|
314
|
+
json_data = load_jsonl(jsonl_file)
|
|
315
|
+
recorded_docs = get_recorded_documents(json_data)
|
|
316
|
+
target_document = resolve_document(recorded_docs, template_file, document_override)
|
|
317
|
+
|
|
318
|
+
if target_document:
|
|
319
|
+
playback_recording(json_data, target_document, template_data, speed)
|
|
320
|
+
return 0
|
|
321
|
+
else:
|
|
322
|
+
print("Error: No documents found in recording", file=sys.stderr)
|
|
323
|
+
return 1
|
|
324
|
+
except Exception as e:
|
|
325
|
+
print(f"Error loading file for playback: {e}", file=sys.stderr)
|
|
326
|
+
return 1
|
|
151
327
|
|
|
152
328
|
|
|
153
|
-
def
|
|
329
|
+
def process_batch(
|
|
330
|
+
jsonl_files: list[Path],
|
|
331
|
+
template_base: Path | None,
|
|
332
|
+
template_data: str,
|
|
333
|
+
args: argparse.Namespace,
|
|
334
|
+
) -> tuple[list[dict[str, Any]], bool]:
|
|
154
335
|
"""
|
|
155
|
-
|
|
336
|
+
Process multiple recording files in batch mode.
|
|
156
337
|
|
|
157
338
|
Parameters
|
|
158
339
|
----------
|
|
159
|
-
|
|
160
|
-
|
|
161
|
-
|
|
162
|
-
|
|
340
|
+
jsonl_files : list[Path]
|
|
341
|
+
List of JSONL files to process
|
|
342
|
+
template_base : Path
|
|
343
|
+
Path to template file or directory
|
|
344
|
+
template_data : str
|
|
345
|
+
Template file content
|
|
346
|
+
args : argparse.Namespace
|
|
347
|
+
Command-line arguments
|
|
348
|
+
|
|
349
|
+
Returns
|
|
350
|
+
-------
|
|
351
|
+
tuple
|
|
352
|
+
(results, all_verified)
|
|
163
353
|
"""
|
|
164
|
-
|
|
354
|
+
results = []
|
|
355
|
+
all_verified = True
|
|
356
|
+
output_dir = args.output_dir or (
|
|
357
|
+
args.output_file if args.output_file and args.output_file.is_dir() else None
|
|
358
|
+
)
|
|
165
359
|
|
|
166
|
-
|
|
167
|
-
|
|
168
|
-
event_count = event["event_count"]
|
|
169
|
-
total_chars = event["total_chars"]
|
|
170
|
-
print(
|
|
171
|
-
f" Aggregate: {event_count} auto-complete/small paste events "
|
|
172
|
-
f"({total_chars} total chars)",
|
|
173
|
-
file=sys.stderr,
|
|
174
|
-
)
|
|
360
|
+
for i, jsonl_file in enumerate(jsonl_files, 1):
|
|
361
|
+
print_batch_header(i, len(jsonl_files), jsonl_file.name)
|
|
175
362
|
|
|
176
|
-
|
|
177
|
-
|
|
178
|
-
|
|
179
|
-
|
|
180
|
-
|
|
181
|
-
|
|
182
|
-
|
|
363
|
+
# Determine target document for this file
|
|
364
|
+
try:
|
|
365
|
+
file_data = load_jsonl(jsonl_file)
|
|
366
|
+
recorded_docs = get_recorded_documents(file_data)
|
|
367
|
+
target_document = resolve_document(recorded_docs, template_base, args.document)
|
|
368
|
+
except (FileNotFoundError, ValueError, IOError) as e:
|
|
369
|
+
print(f"Error determining document: {e}", file=sys.stderr)
|
|
370
|
+
all_verified = False
|
|
371
|
+
continue
|
|
372
|
+
|
|
373
|
+
# If using template directory, find the matching template for this document
|
|
374
|
+
if args.template_dir and target_document:
|
|
375
|
+
matching_template_path = find_matching_template(args.template_dir, target_document)
|
|
376
|
+
if matching_template_path:
|
|
377
|
+
file_template_data = matching_template_path.read_text()
|
|
378
|
+
print(f"Using template: {matching_template_path.name}", file=sys.stderr)
|
|
379
|
+
else:
|
|
380
|
+
file_template_data = ""
|
|
183
381
|
print(
|
|
184
|
-
f"
|
|
185
|
-
|
|
186
|
-
file=sys.stderr
|
|
382
|
+
f"Warning: No matching template found for {target_document}. "
|
|
383
|
+
"Reconstruction will proceed without template verification.",
|
|
384
|
+
file=sys.stderr
|
|
187
385
|
)
|
|
188
|
-
|
|
189
|
-
|
|
190
|
-
print(f" {line}", file=sys.stderr)
|
|
191
|
-
print(" ```", file=sys.stderr)
|
|
192
|
-
|
|
193
|
-
elif "event_indices" in event and reason == "rapid one-line pastes (AI indicator)":
|
|
194
|
-
# Rapid paste sequences (AI indicator) - show aggregate style
|
|
195
|
-
indices = event["event_indices"]
|
|
196
|
-
print(
|
|
197
|
-
f" AI Rapid Paste: Events #{indices[0]}-#{indices[-1]} "
|
|
198
|
-
f"({event['line_count']} lines, {event['char_count']} chars, "
|
|
199
|
-
f"{len(indices)} events in < 1 second)",
|
|
200
|
-
file=sys.stderr,
|
|
201
|
-
)
|
|
386
|
+
else:
|
|
387
|
+
file_template_data = template_data
|
|
202
388
|
|
|
203
|
-
|
|
204
|
-
|
|
205
|
-
|
|
206
|
-
detail["newFragment"] for detail in event["detailed_events"]
|
|
207
|
-
)
|
|
208
|
-
print(" Combined output:", file=sys.stderr)
|
|
209
|
-
print(" ```", file=sys.stderr)
|
|
210
|
-
for line in combined_content.split("\n"):
|
|
211
|
-
print(f" {line}", file=sys.stderr)
|
|
212
|
-
print(" ```", file=sys.stderr)
|
|
213
|
-
|
|
214
|
-
elif "event_indices" in event:
|
|
215
|
-
# Other multi-event clusters
|
|
216
|
-
indices = event.get("event_indices", [event["event_index"]])
|
|
217
|
-
print(
|
|
218
|
-
f" Events #{indices[0]}-#{indices[-1]} ({reason}): "
|
|
219
|
-
f"{event['line_count']} lines, {event['char_count']} chars",
|
|
220
|
-
file=sys.stderr,
|
|
221
|
-
)
|
|
222
|
-
|
|
223
|
-
else:
|
|
224
|
-
new_fragment = event["newFragment"].replace("\n", "\n ")
|
|
225
|
-
print(
|
|
226
|
-
f" Event #{event['event_index']} ({reason}): "
|
|
227
|
-
f"{event['line_count']} lines, {event['char_count']} chars - "
|
|
228
|
-
f"newFragment:\n ```\n {new_fragment}\n ```",
|
|
229
|
-
file=sys.stderr,
|
|
389
|
+
# Process the file
|
|
390
|
+
verified, reconstructed, suspicious_events, time_info, diff_text = process_single_file(
|
|
391
|
+
jsonl_file, file_template_data, target_document, args.time_limit
|
|
230
392
|
)
|
|
231
393
|
|
|
394
|
+
if not verified:
|
|
395
|
+
all_verified = False
|
|
232
396
|
|
|
233
|
-
|
|
234
|
-
|
|
235
|
-
)
|
|
397
|
+
# Display results
|
|
398
|
+
display_time_info(time_info)
|
|
399
|
+
display_suspicious_events(suspicious_events, args.show_autocomplete_details)
|
|
400
|
+
display_template_diff(diff_text)
|
|
401
|
+
|
|
402
|
+
# Store results
|
|
403
|
+
results.append({
|
|
404
|
+
"jsonl_file": jsonl_file,
|
|
405
|
+
"target_document": target_document,
|
|
406
|
+
"verified": verified,
|
|
407
|
+
"reconstructed": reconstructed,
|
|
408
|
+
"suspicious_events": suspicious_events,
|
|
409
|
+
"time_info": time_info,
|
|
410
|
+
"template_diff": diff_text,
|
|
411
|
+
})
|
|
412
|
+
|
|
413
|
+
# Write output file if requested
|
|
414
|
+
if reconstructed and output_dir:
|
|
415
|
+
output_name = jsonl_file.stem.replace(".recording.jsonl", "") + ".py"
|
|
416
|
+
output_path = output_dir / output_name
|
|
417
|
+
write_reconstructed_file(output_path, reconstructed, "Written to")
|
|
418
|
+
|
|
419
|
+
return results, all_verified
|
|
420
|
+
|
|
421
|
+
|
|
422
|
+
def process_single(
|
|
423
|
+
jsonl_file: Path,
|
|
424
|
+
template_base: Path | None,
|
|
425
|
+
template_data: str,
|
|
426
|
+
args: argparse.Namespace,
|
|
427
|
+
) -> tuple[list[dict[str, Any]], bool]:
|
|
236
428
|
"""
|
|
237
|
-
|
|
429
|
+
Process a single recording file.
|
|
238
430
|
|
|
239
431
|
Parameters
|
|
240
432
|
----------
|
|
241
|
-
|
|
242
|
-
|
|
243
|
-
|
|
244
|
-
|
|
433
|
+
jsonl_file : Path
|
|
434
|
+
Path to JSONL file
|
|
435
|
+
template_base : Path
|
|
436
|
+
Path to template file or directory
|
|
437
|
+
template_data : str
|
|
438
|
+
Template file content
|
|
439
|
+
args : argparse.Namespace
|
|
440
|
+
Command-line arguments
|
|
441
|
+
|
|
442
|
+
Returns
|
|
443
|
+
-------
|
|
444
|
+
tuple
|
|
445
|
+
(results, verified)
|
|
245
446
|
"""
|
|
246
|
-
|
|
247
|
-
|
|
248
|
-
|
|
249
|
-
|
|
250
|
-
|
|
251
|
-
|
|
252
|
-
|
|
253
|
-
|
|
254
|
-
|
|
255
|
-
|
|
256
|
-
|
|
257
|
-
|
|
258
|
-
|
|
259
|
-
|
|
260
|
-
|
|
261
|
-
|
|
447
|
+
try:
|
|
448
|
+
file_data = load_jsonl(jsonl_file)
|
|
449
|
+
recorded_docs = get_recorded_documents(file_data)
|
|
450
|
+
target_document = resolve_document(recorded_docs, template_base, args.document)
|
|
451
|
+
except (FileNotFoundError, ValueError, IOError) as e:
|
|
452
|
+
print(f"Error determining document: {e}", file=sys.stderr)
|
|
453
|
+
return [], False
|
|
454
|
+
|
|
455
|
+
# If using template directory, find the matching template for this document
|
|
456
|
+
if args.template_dir and target_document:
|
|
457
|
+
matching_template_path = find_matching_template(args.template_dir, target_document)
|
|
458
|
+
if matching_template_path:
|
|
459
|
+
file_template_data = matching_template_path.read_text()
|
|
460
|
+
print(f"Using template: {matching_template_path.name}", file=sys.stderr)
|
|
461
|
+
else:
|
|
462
|
+
file_template_data = ""
|
|
463
|
+
print(
|
|
464
|
+
f"Warning: No matching template found for {target_document}. "
|
|
465
|
+
"Reconstruction will proceed without template verification.",
|
|
466
|
+
file=sys.stderr
|
|
467
|
+
)
|
|
262
468
|
else:
|
|
263
|
-
|
|
264
|
-
|
|
469
|
+
file_template_data = template_data
|
|
265
470
|
|
|
266
|
-
|
|
267
|
-
output_path: Path,
|
|
268
|
-
document: str,
|
|
269
|
-
time_info: dict[str, Any] | None,
|
|
270
|
-
suspicious_events: list[dict[str, Any]],
|
|
271
|
-
) -> None:
|
|
272
|
-
"""
|
|
273
|
-
Write verification results to JSON file.
|
|
471
|
+
print(f"Processing: {target_document or template_base}", file=sys.stderr)
|
|
274
472
|
|
|
275
|
-
|
|
276
|
-
|
|
277
|
-
|
|
278
|
-
Path to output JSON file
|
|
279
|
-
document : str
|
|
280
|
-
Document that was processed
|
|
281
|
-
time_info : dict[str, Any] | None
|
|
282
|
-
Time information from verification
|
|
283
|
-
suspicious_events : list[dict[str, Any]]
|
|
284
|
-
List of suspicious events detected
|
|
473
|
+
verified, reconstructed, suspicious_events, time_info, diff_text = process_single_file(
|
|
474
|
+
jsonl_file, file_template_data, target_document, args.time_limit
|
|
475
|
+
)
|
|
285
476
|
|
|
286
|
-
|
|
287
|
-
|
|
288
|
-
|
|
289
|
-
|
|
290
|
-
|
|
291
|
-
|
|
292
|
-
|
|
293
|
-
|
|
477
|
+
# Display results
|
|
478
|
+
display_time_info(time_info)
|
|
479
|
+
display_suspicious_events(suspicious_events, args.show_autocomplete_details)
|
|
480
|
+
display_template_diff(diff_text)
|
|
481
|
+
|
|
482
|
+
# Write output file if requested
|
|
483
|
+
if reconstructed and args.output_file:
|
|
484
|
+
if not write_reconstructed_file(args.output_file, reconstructed):
|
|
485
|
+
return [], False
|
|
486
|
+
|
|
487
|
+
results = [{
|
|
488
|
+
"jsonl_file": jsonl_file,
|
|
489
|
+
"target_document": target_document,
|
|
490
|
+
"verified": verified,
|
|
491
|
+
"reconstructed": reconstructed,
|
|
294
492
|
"suspicious_events": suspicious_events,
|
|
295
|
-
|
|
296
|
-
|
|
297
|
-
|
|
298
|
-
with open(output_path, "w") as f:
|
|
299
|
-
json.dump(results, f, indent=2)
|
|
300
|
-
print(f"Results written to {output_path}", file=sys.stderr)
|
|
493
|
+
"time_info": time_info,
|
|
494
|
+
"template_diff": diff_text,
|
|
495
|
+
}]
|
|
301
496
|
|
|
302
|
-
|
|
303
|
-
def create_parser() -> argparse.ArgumentParser:
|
|
304
|
-
"""
|
|
305
|
-
Create and configure the argument parser.
|
|
306
|
-
|
|
307
|
-
Returns
|
|
308
|
-
-------
|
|
309
|
-
argparse.ArgumentParser
|
|
310
|
-
Configured argument parser
|
|
311
|
-
"""
|
|
312
|
-
parser = argparse.ArgumentParser(
|
|
313
|
-
description="Process and verify code recorder JSONL files"
|
|
314
|
-
)
|
|
315
|
-
parser.add_argument(
|
|
316
|
-
"jsonl_file",
|
|
317
|
-
type=Path,
|
|
318
|
-
help="Path to the compressed JSONL file (*.recording.jsonl.gz)",
|
|
319
|
-
)
|
|
320
|
-
parser.add_argument(
|
|
321
|
-
"template_file",
|
|
322
|
-
type=Path,
|
|
323
|
-
help="Path to the initial template file that was recorded",
|
|
324
|
-
)
|
|
325
|
-
parser.add_argument(
|
|
326
|
-
"-t",
|
|
327
|
-
"--time-limit",
|
|
328
|
-
type=int,
|
|
329
|
-
default=None,
|
|
330
|
-
help="Maximum allowed time in minutes between first and last edit. "
|
|
331
|
-
"If exceeded, recording is flagged.",
|
|
332
|
-
)
|
|
333
|
-
parser.add_argument(
|
|
334
|
-
"-d",
|
|
335
|
-
"--document",
|
|
336
|
-
type=str,
|
|
337
|
-
default=None,
|
|
338
|
-
help="Document path or filename to process from the recording. "
|
|
339
|
-
"Defaults to the document whose extension matches the template file.",
|
|
340
|
-
)
|
|
341
|
-
parser.add_argument(
|
|
342
|
-
"-o",
|
|
343
|
-
"--output-json",
|
|
344
|
-
type=Path,
|
|
345
|
-
default=None,
|
|
346
|
-
help="Path to output JSON file with verification results "
|
|
347
|
-
"(time info and suspicious events).",
|
|
348
|
-
)
|
|
349
|
-
parser.add_argument(
|
|
350
|
-
"-s",
|
|
351
|
-
"--show-autocomplete-details",
|
|
352
|
-
action="store_true",
|
|
353
|
-
help="Show individual auto-complete events in addition to "
|
|
354
|
-
"aggregate statistics",
|
|
355
|
-
)
|
|
356
|
-
return parser
|
|
497
|
+
return results, verified
|
|
357
498
|
|
|
358
499
|
|
|
359
500
|
def main() -> int:
|
|
@@ -368,90 +509,123 @@ def main() -> int:
|
|
|
368
509
|
parser = create_parser()
|
|
369
510
|
args = parser.parse_args()
|
|
370
511
|
|
|
371
|
-
#
|
|
512
|
+
# Parse files argument: last one may be template_file if it's not a JSONL file
|
|
513
|
+
files_list = args.files
|
|
514
|
+
template_file = None
|
|
515
|
+
jsonl_patterns = files_list
|
|
516
|
+
|
|
517
|
+
# If we have more than one file and the last one doesn't look like a JSONL file,
|
|
518
|
+
# treat it as the template file
|
|
519
|
+
if len(files_list) > 1 and not files_list[-1].endswith(('.jsonl', '.jsonl.gz')):
|
|
520
|
+
template_file = Path(files_list[-1])
|
|
521
|
+
jsonl_patterns = files_list[:-1]
|
|
522
|
+
|
|
523
|
+
# Validate that at least one of template_file or template_dir is provided
|
|
524
|
+
if not template_file and not args.template_dir:
|
|
525
|
+
print("Error: Either a template file or --template-dir must be provided", file=sys.stderr)
|
|
526
|
+
parser.print_help()
|
|
527
|
+
return 1
|
|
528
|
+
|
|
529
|
+
# Expand file patterns and validate
|
|
372
530
|
try:
|
|
373
|
-
|
|
531
|
+
jsonl_files = expand_file_patterns(jsonl_patterns)
|
|
374
532
|
except FileNotFoundError as e:
|
|
375
533
|
print(f"Error: {e}", file=sys.stderr)
|
|
376
534
|
return 1
|
|
377
|
-
except (ValueError, IOError) as e:
|
|
378
|
-
print(f"Error loading JSONL file: {e}", file=sys.stderr)
|
|
379
|
-
return 1
|
|
380
535
|
|
|
381
|
-
|
|
382
|
-
|
|
383
|
-
|
|
384
|
-
target_document = resolve_document(
|
|
385
|
-
recorded_docs, args.template_file, args.document
|
|
386
|
-
)
|
|
387
|
-
except ValueError as e:
|
|
388
|
-
print(f"Error determining document: {e}", file=sys.stderr)
|
|
389
|
-
return 1
|
|
536
|
+
batch_mode = len(jsonl_files) > 1
|
|
537
|
+
if batch_mode:
|
|
538
|
+
print(f"Processing {len(jsonl_files)} recording files in batch mode", file=sys.stderr)
|
|
390
539
|
|
|
391
|
-
#
|
|
392
|
-
|
|
393
|
-
|
|
394
|
-
|
|
395
|
-
|
|
396
|
-
|
|
397
|
-
|
|
398
|
-
|
|
540
|
+
# Determine template source (use template_dir if provided, otherwise template_file)
|
|
541
|
+
template_path = args.template_dir if args.template_dir else template_file
|
|
542
|
+
|
|
543
|
+
# Handle playback mode (single file only)
|
|
544
|
+
if not batch_mode and args.playback:
|
|
545
|
+
try:
|
|
546
|
+
json_data = load_jsonl(jsonl_files[0])
|
|
547
|
+
recorded_docs = get_recorded_documents(json_data)
|
|
548
|
+
target_document = resolve_document(recorded_docs, template_path, args.document)
|
|
549
|
+
|
|
550
|
+
# Get template data for playback
|
|
551
|
+
template_data, _ = resolve_template_file(
|
|
552
|
+
template_file if not args.template_dir else None,
|
|
553
|
+
args.template_dir,
|
|
554
|
+
target_document
|
|
555
|
+
)
|
|
399
556
|
|
|
400
|
-
|
|
557
|
+
if target_document:
|
|
558
|
+
playback_recording(json_data, target_document, template_data, args.playback_speed)
|
|
559
|
+
return 0
|
|
560
|
+
else:
|
|
561
|
+
print("Error: No documents found in recording", file=sys.stderr)
|
|
562
|
+
return 1
|
|
563
|
+
except Exception as e:
|
|
564
|
+
print(f"Error loading file for playback: {e}", file=sys.stderr)
|
|
565
|
+
return 1
|
|
401
566
|
|
|
402
|
-
#
|
|
567
|
+
# Get template data
|
|
403
568
|
try:
|
|
404
|
-
|
|
405
|
-
|
|
406
|
-
|
|
407
|
-
|
|
408
|
-
|
|
409
|
-
|
|
410
|
-
|
|
411
|
-
|
|
569
|
+
# If using a template directory, skip loading a global template here
|
|
570
|
+
# Let per-file matching handle it in process_batch/process_single
|
|
571
|
+
if args.template_dir:
|
|
572
|
+
template_data = ""
|
|
573
|
+
else:
|
|
574
|
+
template_data, _ = resolve_template_file(
|
|
575
|
+
template_file if not args.template_dir else None,
|
|
576
|
+
None,
|
|
577
|
+
None
|
|
578
|
+
)
|
|
579
|
+
except (FileNotFoundError, ValueError) as e:
|
|
580
|
+
print(f"Error: {e}", file=sys.stderr)
|
|
412
581
|
return 1
|
|
413
582
|
|
|
414
|
-
#
|
|
415
|
-
|
|
416
|
-
|
|
417
|
-
|
|
418
|
-
|
|
419
|
-
|
|
420
|
-
|
|
421
|
-
|
|
422
|
-
doc_events, template_data, document_path=target_document
|
|
583
|
+
# Process files
|
|
584
|
+
if batch_mode:
|
|
585
|
+
results, all_verified = process_batch(
|
|
586
|
+
jsonl_files, template_path, template_data, args
|
|
587
|
+
)
|
|
588
|
+
else:
|
|
589
|
+
results, all_verified = process_single(
|
|
590
|
+
jsonl_files[0], template_path, template_data, args
|
|
423
591
|
)
|
|
424
|
-
print(reconstructed)
|
|
425
592
|
|
|
426
|
-
|
|
427
|
-
|
|
593
|
+
if not results:
|
|
594
|
+
return 1
|
|
595
|
+
|
|
596
|
+
# Output summary and combined report for batch mode
|
|
597
|
+
if batch_mode:
|
|
598
|
+
failed_files = [r["jsonl_file"].name for r in results if not r["verified"]]
|
|
599
|
+
verified_count = len(results) - len(failed_files)
|
|
600
|
+
print_batch_summary(len(results), verified_count, failed_files)
|
|
428
601
|
|
|
429
|
-
#
|
|
602
|
+
# Display combined time report
|
|
603
|
+
time_infos = [r["time_info"] for r in results]
|
|
604
|
+
combined_time = None
|
|
605
|
+
if any(time_infos):
|
|
606
|
+
combined_time = combine_time_info(time_infos, args.time_limit)
|
|
607
|
+
display_time_info(combined_time, is_combined=True)
|
|
608
|
+
|
|
609
|
+
# Write JSON output
|
|
610
|
+
if args.output_json:
|
|
611
|
+
try:
|
|
612
|
+
write_batch_json_output(
|
|
613
|
+
args.output_json, results, combined_time, all_verified, batch_mode=True
|
|
614
|
+
)
|
|
615
|
+
except Exception as e:
|
|
616
|
+
print(f"Error writing batch JSON output: {e}", file=sys.stderr)
|
|
617
|
+
else:
|
|
618
|
+
# Single file mode - write JSON output
|
|
430
619
|
if args.output_json:
|
|
431
620
|
try:
|
|
432
|
-
|
|
433
|
-
args.output_json,
|
|
434
|
-
|
|
435
|
-
time_info,
|
|
436
|
-
suspicious_events,
|
|
621
|
+
write_batch_json_output(
|
|
622
|
+
args.output_json, results, results[0]["time_info"],
|
|
623
|
+
results[0]["verified"], batch_mode=False
|
|
437
624
|
)
|
|
438
625
|
except Exception as e:
|
|
439
626
|
print(f"Error writing JSON output: {e}", file=sys.stderr)
|
|
440
|
-
return 1
|
|
441
|
-
|
|
442
|
-
except ValueError as e:
|
|
443
|
-
print("File failed verification from template!", file=sys.stderr)
|
|
444
|
-
print(str(e), file=sys.stderr)
|
|
445
|
-
try:
|
|
446
|
-
print(template_diff(template_data, doc_events), file=sys.stderr)
|
|
447
|
-
except Exception:
|
|
448
|
-
pass
|
|
449
|
-
return 1
|
|
450
|
-
except Exception as e:
|
|
451
|
-
print(f"Error processing file: {type(e).__name__}: {e}", file=sys.stderr)
|
|
452
|
-
return 1
|
|
453
627
|
|
|
454
|
-
return 0
|
|
628
|
+
return 0 if all_verified else 1
|
|
455
629
|
|
|
456
630
|
|
|
457
631
|
if __name__ == "__main__":
|