cr-proc 0.1.11__py3-none-any.whl → 0.1.13__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- code_recorder_processor/api/document.py +33 -9
- code_recorder_processor/api/output.py +11 -3
- code_recorder_processor/api/verify.py +93 -34
- code_recorder_processor/cli.py +173 -44
- code_recorder_processor/display.py +33 -0
- {cr_proc-0.1.11.dist-info → cr_proc-0.1.13.dist-info}/METADATA +21 -1
- cr_proc-0.1.13.dist-info/RECORD +13 -0
- {cr_proc-0.1.11.dist-info → cr_proc-0.1.13.dist-info}/WHEEL +1 -1
- cr_proc-0.1.11.dist-info/RECORD +0 -13
- {cr_proc-0.1.11.dist-info → cr_proc-0.1.13.dist-info}/entry_points.txt +0 -0
|
@@ -5,9 +5,38 @@ from pathlib import Path, PureWindowsPath, PurePosixPath
|
|
|
5
5
|
from typing import Any
|
|
6
6
|
|
|
7
7
|
|
|
8
|
+
def normalize_path_string(path_str: str) -> str:
|
|
9
|
+
"""
|
|
10
|
+
Normalize a path string to use forward slashes (POSIX style).
|
|
11
|
+
|
|
12
|
+
Handles both Windows-style (backslash) and Unix-style (forward slash) paths
|
|
13
|
+
regardless of the current platform. Useful for cross-platform consistency
|
|
14
|
+
when files are created on Windows but processed on other systems.
|
|
15
|
+
|
|
16
|
+
Parameters
|
|
17
|
+
----------
|
|
18
|
+
path_str : str
|
|
19
|
+
Path string (may use Windows or Unix separators)
|
|
20
|
+
|
|
21
|
+
Returns
|
|
22
|
+
-------
|
|
23
|
+
str
|
|
24
|
+
Normalized path string using forward slashes
|
|
25
|
+
"""
|
|
26
|
+
# Try to detect if this is a Windows path (contains backslashes)
|
|
27
|
+
if "\\" in path_str:
|
|
28
|
+
# Windows-style path
|
|
29
|
+
path_obj = PureWindowsPath(path_str)
|
|
30
|
+
else:
|
|
31
|
+
# Unix-style path (or just a filename)
|
|
32
|
+
path_obj = PurePosixPath(path_str)
|
|
33
|
+
|
|
34
|
+
return path_obj.as_posix()
|
|
35
|
+
|
|
36
|
+
|
|
8
37
|
def _normalize_document_path(doc_path: str) -> tuple[str, str]:
|
|
9
38
|
"""
|
|
10
|
-
|
|
39
|
+
Extract filename and stem from a document path.
|
|
11
40
|
|
|
12
41
|
Handles both Windows-style (backslash) and Unix-style (forward slash) paths
|
|
13
42
|
regardless of the current platform.
|
|
@@ -22,14 +51,9 @@ def _normalize_document_path(doc_path: str) -> tuple[str, str]:
|
|
|
22
51
|
tuple[str, str]
|
|
23
52
|
(filename, stem) extracted from the path
|
|
24
53
|
"""
|
|
25
|
-
#
|
|
26
|
-
|
|
27
|
-
|
|
28
|
-
path_obj = PureWindowsPath(doc_path)
|
|
29
|
-
else:
|
|
30
|
-
# Unix-style path (or just a filename)
|
|
31
|
-
path_obj = PurePosixPath(doc_path)
|
|
32
|
-
|
|
54
|
+
# Normalize to forward slashes first, then parse
|
|
55
|
+
normalized = normalize_path_string(doc_path)
|
|
56
|
+
path_obj = PurePosixPath(normalized)
|
|
33
57
|
return path_obj.name, path_obj.stem
|
|
34
58
|
|
|
35
59
|
|
|
@@ -4,6 +4,8 @@ import sys
|
|
|
4
4
|
from pathlib import Path
|
|
5
5
|
from typing import Any
|
|
6
6
|
|
|
7
|
+
from .document import normalize_path_string
|
|
8
|
+
|
|
7
9
|
|
|
8
10
|
def write_batch_json_output(
|
|
9
11
|
output_path: Path,
|
|
@@ -36,15 +38,21 @@ def write_batch_json_output(
|
|
|
36
38
|
# Convert results to JSON-serializable format
|
|
37
39
|
files_data = []
|
|
38
40
|
for r in results:
|
|
39
|
-
|
|
40
|
-
"jsonl_file": str(r["jsonl_file"]),
|
|
41
|
+
file_result = {
|
|
42
|
+
"jsonl_file": normalize_path_string(str(r["jsonl_file"])),
|
|
41
43
|
"document": r["target_document"],
|
|
42
44
|
"verified": r["verified"],
|
|
43
45
|
"time_info": r["time_info"],
|
|
44
46
|
"suspicious_events": r["suspicious_events"],
|
|
45
47
|
"template_diff": r.get("template_diff", ""),
|
|
46
48
|
"reconstructed_code": r["reconstructed"],
|
|
47
|
-
}
|
|
49
|
+
}
|
|
50
|
+
|
|
51
|
+
# Add submitted_comparison if present
|
|
52
|
+
if r.get("submitted_comparison") is not None:
|
|
53
|
+
file_result["submitted_comparison"] = r["submitted_comparison"]
|
|
54
|
+
|
|
55
|
+
files_data.append(file_result)
|
|
48
56
|
|
|
49
57
|
# Use consistent format for both single and batch modes
|
|
50
58
|
output_data = {
|
|
@@ -1,6 +1,7 @@
|
|
|
1
1
|
from typing import Any
|
|
2
2
|
from datetime import datetime
|
|
3
3
|
import difflib
|
|
4
|
+
from .document import normalize_path_string
|
|
4
5
|
|
|
5
6
|
# ============================================================================
|
|
6
7
|
# Constants for detection thresholds
|
|
@@ -837,15 +838,19 @@ def verify(template: str, jsonData: tuple[dict[str, Any], ...]) -> tuple[str, li
|
|
|
837
838
|
|
|
838
839
|
|
|
839
840
|
def combine_time_info(
|
|
840
|
-
|
|
841
|
+
all_events: list[tuple[dict[str, Any], ...]], time_limit_minutes: int | None
|
|
841
842
|
) -> dict[str, Any] | None:
|
|
842
843
|
"""
|
|
843
|
-
Combine time information from multiple recording files.
|
|
844
|
+
Combine time information from multiple recording files, avoiding double-counting overlapping time.
|
|
845
|
+
|
|
846
|
+
Merges all events from multiple recordings, then calculates the actual time spent editing
|
|
847
|
+
using the same logic as check_time_limit (gap analysis with focus awareness). This ensures
|
|
848
|
+
overlapping editing sessions are not double-counted.
|
|
844
849
|
|
|
845
850
|
Parameters
|
|
846
851
|
----------
|
|
847
|
-
|
|
848
|
-
List of
|
|
852
|
+
all_events : list[tuple[dict[str, Any], ...]]
|
|
853
|
+
List of event tuples from multiple recording files
|
|
849
854
|
time_limit_minutes : int | None
|
|
850
855
|
Time limit to check against
|
|
851
856
|
|
|
@@ -854,40 +859,94 @@ def combine_time_info(
|
|
|
854
859
|
dict[str, Any] | None
|
|
855
860
|
Combined time information, or None if no valid data
|
|
856
861
|
"""
|
|
857
|
-
|
|
858
|
-
if
|
|
862
|
+
# Filter out empty event sets
|
|
863
|
+
valid_event_sets = [events for events in all_events if events]
|
|
864
|
+
if not valid_event_sets:
|
|
859
865
|
return None
|
|
860
866
|
|
|
861
|
-
#
|
|
862
|
-
|
|
867
|
+
# Merge all events from all recordings into a single tuple
|
|
868
|
+
merged_events = tuple(
|
|
869
|
+
event
|
|
870
|
+
for event_set in valid_event_sets
|
|
871
|
+
for event in event_set
|
|
872
|
+
)
|
|
863
873
|
|
|
864
|
-
#
|
|
865
|
-
|
|
866
|
-
|
|
867
|
-
|
|
868
|
-
datetime.fromisoformat(info["first_timestamp"].replace("Z", "+00:00"))
|
|
869
|
-
)
|
|
870
|
-
all_timestamps.append(
|
|
871
|
-
datetime.fromisoformat(info["last_timestamp"].replace("Z", "+00:00"))
|
|
872
|
-
)
|
|
874
|
+
# Use check_time_limit on the merged events to calculate time properly
|
|
875
|
+
# This handles overlapping periods automatically since we're now analyzing
|
|
876
|
+
# all events together chronologically
|
|
877
|
+
combined_result = check_time_limit(merged_events, time_limit_minutes)
|
|
873
878
|
|
|
874
|
-
|
|
875
|
-
|
|
876
|
-
overall_span = (last_ts - first_ts).total_seconds() / 60
|
|
879
|
+
if combined_result is None:
|
|
880
|
+
return None
|
|
877
881
|
|
|
878
|
-
|
|
879
|
-
|
|
880
|
-
"minutes_elapsed": round(total_elapsed, 2),
|
|
881
|
-
"first_timestamp": first_ts.isoformat().replace("+00:00", "Z"),
|
|
882
|
-
"last_timestamp": last_ts.isoformat().replace("+00:00", "Z"),
|
|
883
|
-
"file_count": len(valid_infos),
|
|
884
|
-
"overall_span_minutes": round(overall_span, 2),
|
|
885
|
-
}
|
|
882
|
+
# Add file_count to the result
|
|
883
|
+
combined_result["file_count"] = len(valid_event_sets)
|
|
886
884
|
|
|
887
|
-
|
|
888
|
-
if time_limit_minutes is not None:
|
|
889
|
-
result["exceeds_limit"] = total_elapsed > time_limit_minutes
|
|
890
|
-
else:
|
|
891
|
-
result["exceeds_limit"] = False
|
|
885
|
+
return combined_result
|
|
892
886
|
|
|
893
|
-
|
|
887
|
+
|
|
888
|
+
def compare_submitted_file(reconstructed_code: str, submitted_file_path) -> dict[str, Any]:
|
|
889
|
+
"""
|
|
890
|
+
Compare reconstructed code from recording with a submitted final file.
|
|
891
|
+
|
|
892
|
+
Parameters
|
|
893
|
+
----------
|
|
894
|
+
reconstructed_code : str
|
|
895
|
+
The code reconstructed from the recording
|
|
896
|
+
submitted_file_path : Path
|
|
897
|
+
Path to the submitted file
|
|
898
|
+
|
|
899
|
+
Returns
|
|
900
|
+
-------
|
|
901
|
+
dict[str, Any]
|
|
902
|
+
Dictionary containing:
|
|
903
|
+
- matches: bool indicating if the files match
|
|
904
|
+
- submitted_file: path to the submitted file
|
|
905
|
+
- diff: unified diff string if files don't match
|
|
906
|
+
- whitespace_only: bool indicating if only whitespace differs
|
|
907
|
+
"""
|
|
908
|
+
try:
|
|
909
|
+
submitted_content = submitted_file_path.read_text()
|
|
910
|
+
except Exception as e:
|
|
911
|
+
return {
|
|
912
|
+
"matches": False,
|
|
913
|
+
"submitted_file": normalize_path_string(str(submitted_file_path)),
|
|
914
|
+
"error": f"Failed to read submitted file: {e}",
|
|
915
|
+
"diff": "",
|
|
916
|
+
"whitespace_only": False,
|
|
917
|
+
}
|
|
918
|
+
|
|
919
|
+
# Normalize newlines for comparison
|
|
920
|
+
reconstructed_normalized = _normalize_newlines(reconstructed_code)
|
|
921
|
+
submitted_normalized = _normalize_newlines(submitted_content)
|
|
922
|
+
|
|
923
|
+
# Check exact match
|
|
924
|
+
matches = reconstructed_normalized == submitted_normalized
|
|
925
|
+
|
|
926
|
+
# Check if only whitespace differs
|
|
927
|
+
whitespace_only = False
|
|
928
|
+
if not matches:
|
|
929
|
+
whitespace_only = is_only_whitespace_differences(
|
|
930
|
+
submitted_normalized, reconstructed_normalized
|
|
931
|
+
)
|
|
932
|
+
|
|
933
|
+
# Generate diff if they don't match
|
|
934
|
+
diff_text = ""
|
|
935
|
+
if not matches:
|
|
936
|
+
reconstructed_lines = reconstructed_normalized.splitlines(keepends=True)
|
|
937
|
+
submitted_lines = submitted_normalized.splitlines(keepends=True)
|
|
938
|
+
diff = difflib.unified_diff(
|
|
939
|
+
reconstructed_lines,
|
|
940
|
+
submitted_lines,
|
|
941
|
+
fromfile="reconstructed",
|
|
942
|
+
tofile="submitted",
|
|
943
|
+
lineterm="",
|
|
944
|
+
)
|
|
945
|
+
diff_text = "".join(diff)
|
|
946
|
+
|
|
947
|
+
return {
|
|
948
|
+
"matches": matches,
|
|
949
|
+
"submitted_file": normalize_path_string(str(submitted_file_path)),
|
|
950
|
+
"diff": diff_text,
|
|
951
|
+
"whitespace_only": whitespace_only,
|
|
952
|
+
}
|
code_recorder_processor/cli.py
CHANGED
|
@@ -18,11 +18,13 @@ from .api.output import write_batch_json_output
|
|
|
18
18
|
from .api.verify import (
|
|
19
19
|
check_time_limit,
|
|
20
20
|
combine_time_info,
|
|
21
|
+
compare_submitted_file,
|
|
21
22
|
detect_external_copypaste,
|
|
22
23
|
template_diff,
|
|
23
24
|
verify,
|
|
24
25
|
)
|
|
25
26
|
from .display import (
|
|
27
|
+
display_submitted_file_comparison,
|
|
26
28
|
display_suspicious_events,
|
|
27
29
|
display_template_diff,
|
|
28
30
|
display_time_info,
|
|
@@ -102,6 +104,21 @@ def create_parser() -> argparse.ArgumentParser:
|
|
|
102
104
|
help="Directory to write reconstructed code files in batch mode (one file per recording). "
|
|
103
105
|
"Files are named based on input recording filenames.",
|
|
104
106
|
)
|
|
107
|
+
parser.add_argument(
|
|
108
|
+
"--submitted-file",
|
|
109
|
+
type=Path,
|
|
110
|
+
default=None,
|
|
111
|
+
help="Path to the submitted final file to verify against the reconstructed output. "
|
|
112
|
+
"If provided, the reconstructed code will be compared to this file.",
|
|
113
|
+
)
|
|
114
|
+
parser.add_argument(
|
|
115
|
+
"--submitted-dir",
|
|
116
|
+
type=Path,
|
|
117
|
+
default=None,
|
|
118
|
+
help="Directory containing submitted files to compare against. "
|
|
119
|
+
"For each recording, the corresponding submitted file will be found by matching the filename. "
|
|
120
|
+
"For example, 'homework0-ISC.recording.jsonl.gz' will match 'homework0-ISC.py' in the directory.",
|
|
121
|
+
)
|
|
105
122
|
parser.add_argument(
|
|
106
123
|
"-s",
|
|
107
124
|
"--show-autocomplete-details",
|
|
@@ -169,36 +186,81 @@ def expand_file_patterns(patterns: list[str]) -> list[Path]:
|
|
|
169
186
|
return existing_files
|
|
170
187
|
|
|
171
188
|
|
|
189
|
+
def find_submitted_file(
|
|
190
|
+
jsonl_file: Path,
|
|
191
|
+
submitted_dir: Path,
|
|
192
|
+
target_document: str | None,
|
|
193
|
+
) -> Path | None:
|
|
194
|
+
"""
|
|
195
|
+
Find the submitted file corresponding to a recording file.
|
|
196
|
+
|
|
197
|
+
Matches by replacing '.recording.jsonl.gz' with the extension of the
|
|
198
|
+
target document (or '.py' if not specified).
|
|
199
|
+
|
|
200
|
+
Parameters
|
|
201
|
+
----------
|
|
202
|
+
jsonl_file : Path
|
|
203
|
+
Path to the JSONL recording file
|
|
204
|
+
submitted_dir : Path
|
|
205
|
+
Directory containing submitted files
|
|
206
|
+
target_document : str | None
|
|
207
|
+
Target document path (to extract extension)
|
|
208
|
+
|
|
209
|
+
Returns
|
|
210
|
+
-------
|
|
211
|
+
Path | None
|
|
212
|
+
Path to the submitted file if found, None otherwise
|
|
213
|
+
"""
|
|
214
|
+
# Determine the file extension from target_document or default to .py
|
|
215
|
+
extension = ".py"
|
|
216
|
+
if target_document:
|
|
217
|
+
extension = Path(target_document).suffix or ".py"
|
|
218
|
+
|
|
219
|
+
# Remove '.recording.jsonl.gz' and add the appropriate extension
|
|
220
|
+
base_name = jsonl_file.name.replace(".recording.jsonl.gz", "")
|
|
221
|
+
submitted_filename = base_name + extension
|
|
222
|
+
|
|
223
|
+
submitted_file = submitted_dir / submitted_filename
|
|
224
|
+
if submitted_file.exists():
|
|
225
|
+
return submitted_file
|
|
226
|
+
|
|
227
|
+
return None
|
|
228
|
+
|
|
229
|
+
|
|
172
230
|
def process_single_file(
|
|
173
231
|
jsonl_path: Path,
|
|
232
|
+
json_data: tuple[dict[str, Any], ...],
|
|
174
233
|
template_data: str,
|
|
175
234
|
target_document: str | None,
|
|
176
235
|
time_limit: int | None,
|
|
177
|
-
|
|
236
|
+
submitted_file: Path | None = None,
|
|
237
|
+
submitted_dir: Path | None = None,
|
|
238
|
+
) -> tuple[bool, str, list[dict[str, Any]], dict[str, Any] | None, str, tuple[dict[str, Any], ...], dict[str, Any] | None]:
|
|
178
239
|
"""
|
|
179
240
|
Process a single JSONL recording file.
|
|
180
241
|
|
|
181
242
|
Parameters
|
|
182
243
|
----------
|
|
183
244
|
jsonl_path : Path
|
|
184
|
-
Path to the JSONL file
|
|
245
|
+
Path to the JSONL file (used for error reporting and file matching)
|
|
246
|
+
json_data : tuple[dict[str, Any], ...]
|
|
247
|
+
Pre-loaded JSON events from the recording file
|
|
185
248
|
template_data : str
|
|
186
249
|
Template file content
|
|
187
250
|
target_document : str | None
|
|
188
251
|
Document to process
|
|
189
252
|
time_limit : int | None
|
|
190
253
|
Time limit in minutes
|
|
254
|
+
submitted_file : Path | None
|
|
255
|
+
Path to the submitted file to compare against
|
|
256
|
+
submitted_dir : Path | None
|
|
257
|
+
Directory containing submitted files to compare against
|
|
191
258
|
|
|
192
259
|
Returns
|
|
193
260
|
-------
|
|
194
261
|
tuple
|
|
195
|
-
(verified, reconstructed_code, suspicious_events, time_info, template_diff_text)
|
|
262
|
+
(verified, reconstructed_code, suspicious_events, time_info, template_diff_text, doc_events, submitted_comparison)
|
|
196
263
|
"""
|
|
197
|
-
try:
|
|
198
|
-
json_data = load_jsonl(jsonl_path)
|
|
199
|
-
except (FileNotFoundError, ValueError, IOError) as e:
|
|
200
|
-
print(f"Error loading {jsonl_path}: {e}", file=sys.stderr)
|
|
201
|
-
return False, "", [], None, ""
|
|
202
264
|
|
|
203
265
|
# Filter events for target document
|
|
204
266
|
doc_events = filter_events_by_document(json_data, target_document)
|
|
@@ -207,7 +269,7 @@ def process_single_file(
|
|
|
207
269
|
f"Warning: No events found for document '{target_document}' in {jsonl_path}",
|
|
208
270
|
file=sys.stderr,
|
|
209
271
|
)
|
|
210
|
-
return False, "", [], None, ""
|
|
272
|
+
return False, "", [], None, "", (), None
|
|
211
273
|
|
|
212
274
|
# Check time information
|
|
213
275
|
time_info = check_time_limit(doc_events, time_limit)
|
|
@@ -218,13 +280,29 @@ def process_single_file(
|
|
|
218
280
|
reconstructed = reconstruct_file_from_events(
|
|
219
281
|
doc_events, verified_template, document_path=target_document
|
|
220
282
|
)
|
|
221
|
-
|
|
283
|
+
|
|
284
|
+
# Compare with submitted file if provided
|
|
285
|
+
submitted_comparison = None
|
|
286
|
+
actual_submitted_file = submitted_file
|
|
287
|
+
|
|
288
|
+
# If submitted_dir is provided, find the matching file
|
|
289
|
+
if submitted_dir and not submitted_file:
|
|
290
|
+
actual_submitted_file = find_submitted_file(jsonl_path, submitted_dir, target_document)
|
|
291
|
+
if actual_submitted_file:
|
|
292
|
+
print(f"Found submitted file: {actual_submitted_file.name}", file=sys.stderr)
|
|
293
|
+
|
|
294
|
+
if actual_submitted_file and actual_submitted_file.exists():
|
|
295
|
+
submitted_comparison = compare_submitted_file(reconstructed, actual_submitted_file)
|
|
296
|
+
elif actual_submitted_file:
|
|
297
|
+
print(f"Warning: Submitted file not found: {actual_submitted_file}", file=sys.stderr)
|
|
298
|
+
|
|
299
|
+
return True, reconstructed, suspicious_events, time_info, "", doc_events, submitted_comparison
|
|
222
300
|
except ValueError as e:
|
|
223
301
|
# If verification fails but we have events, still try to reconstruct
|
|
224
302
|
print(f"Warning: Verification failed for {jsonl_path}: {e}", file=sys.stderr)
|
|
225
303
|
try:
|
|
226
304
|
if not doc_events:
|
|
227
|
-
return False, "", [], time_info, ""
|
|
305
|
+
return False, "", [], time_info, "", (), None
|
|
228
306
|
|
|
229
307
|
# Compute diff against template and still detect suspicious events
|
|
230
308
|
diff_text = template_diff(template_data, doc_events)
|
|
@@ -235,19 +313,35 @@ def process_single_file(
|
|
|
235
313
|
reconstructed = reconstruct_file_from_events(
|
|
236
314
|
doc_events, initial_state, document_path=target_document
|
|
237
315
|
)
|
|
238
|
-
|
|
316
|
+
|
|
317
|
+
# Compare with submitted file if provided
|
|
318
|
+
submitted_comparison = None
|
|
319
|
+
actual_submitted_file = submitted_file
|
|
320
|
+
|
|
321
|
+
# If submitted_dir is provided, find the matching file
|
|
322
|
+
if submitted_dir and not submitted_file:
|
|
323
|
+
actual_submitted_file = find_submitted_file(jsonl_path, submitted_dir, target_document)
|
|
324
|
+
if actual_submitted_file:
|
|
325
|
+
print(f"Found submitted file: {actual_submitted_file.name}", file=sys.stderr)
|
|
326
|
+
|
|
327
|
+
if actual_submitted_file and actual_submitted_file.exists():
|
|
328
|
+
submitted_comparison = compare_submitted_file(reconstructed, actual_submitted_file)
|
|
329
|
+
elif actual_submitted_file:
|
|
330
|
+
print(f"Warning: Submitted file not found: {actual_submitted_file}", file=sys.stderr)
|
|
331
|
+
|
|
332
|
+
return False, reconstructed, suspicious_events, time_info, diff_text, doc_events, submitted_comparison
|
|
239
333
|
except Exception as reconstruction_error:
|
|
240
334
|
print(
|
|
241
335
|
f"Error reconstructing {jsonl_path}: {type(reconstruction_error).__name__}: {reconstruction_error}",
|
|
242
336
|
file=sys.stderr,
|
|
243
337
|
)
|
|
244
|
-
return False, "", [], time_info, ""
|
|
338
|
+
return False, "", [], time_info, "", (), None
|
|
245
339
|
except Exception as e:
|
|
246
340
|
print(
|
|
247
341
|
f"Error processing {jsonl_path}: {type(e).__name__}: {e}",
|
|
248
342
|
file=sys.stderr,
|
|
249
343
|
)
|
|
250
|
-
return False, "", [], time_info, ""
|
|
344
|
+
return False, "", [], time_info, "", (), None
|
|
251
345
|
|
|
252
346
|
|
|
253
347
|
def write_reconstructed_file(
|
|
@@ -274,7 +368,7 @@ def write_reconstructed_file(
|
|
|
274
368
|
"""
|
|
275
369
|
try:
|
|
276
370
|
output_path.parent.mkdir(parents=True, exist_ok=True)
|
|
277
|
-
output_path.write_text(content)
|
|
371
|
+
output_path.write_text(content + '\n')
|
|
278
372
|
print(f"{file_description} written to: {output_path}", file=sys.stderr)
|
|
279
373
|
return True
|
|
280
374
|
except Exception as e:
|
|
@@ -284,7 +378,8 @@ def write_reconstructed_file(
|
|
|
284
378
|
|
|
285
379
|
def handle_playback_mode(
|
|
286
380
|
jsonl_file: Path,
|
|
287
|
-
|
|
381
|
+
json_data: tuple[dict[str, Any], ...],
|
|
382
|
+
template_base: Path | None,
|
|
288
383
|
template_data: str,
|
|
289
384
|
document_override: str | None,
|
|
290
385
|
speed: float,
|
|
@@ -295,9 +390,11 @@ def handle_playback_mode(
|
|
|
295
390
|
Parameters
|
|
296
391
|
----------
|
|
297
392
|
jsonl_file : Path
|
|
298
|
-
Path to the recording file
|
|
299
|
-
|
|
300
|
-
|
|
393
|
+
Path to the recording file (for error reporting)
|
|
394
|
+
json_data : tuple[dict[str, Any], ...]
|
|
395
|
+
Pre-loaded JSON events from the recording file
|
|
396
|
+
template_base : Path | None
|
|
397
|
+
Path to the template file or directory
|
|
301
398
|
template_data : str
|
|
302
399
|
Template file content
|
|
303
400
|
document_override : str | None
|
|
@@ -311,9 +408,8 @@ def handle_playback_mode(
|
|
|
311
408
|
Exit code (0 for success, 1 for error)
|
|
312
409
|
"""
|
|
313
410
|
try:
|
|
314
|
-
json_data = load_jsonl(jsonl_file)
|
|
315
411
|
recorded_docs = get_recorded_documents(json_data)
|
|
316
|
-
target_document = resolve_document(recorded_docs,
|
|
412
|
+
target_document = resolve_document(recorded_docs, template_base, document_override)
|
|
317
413
|
|
|
318
414
|
if target_document:
|
|
319
415
|
playback_recording(json_data, target_document, template_data, speed)
|
|
@@ -322,12 +418,13 @@ def handle_playback_mode(
|
|
|
322
418
|
print("Error: No documents found in recording", file=sys.stderr)
|
|
323
419
|
return 1
|
|
324
420
|
except Exception as e:
|
|
325
|
-
print(f"Error
|
|
421
|
+
print(f"Error in playback: {e}", file=sys.stderr)
|
|
326
422
|
return 1
|
|
327
423
|
|
|
328
424
|
|
|
329
425
|
def process_batch(
|
|
330
426
|
jsonl_files: list[Path],
|
|
427
|
+
json_data_map: dict[Path, tuple[dict[str, Any], ...]],
|
|
331
428
|
template_base: Path | None,
|
|
332
429
|
template_data: str,
|
|
333
430
|
args: argparse.Namespace,
|
|
@@ -339,6 +436,8 @@ def process_batch(
|
|
|
339
436
|
----------
|
|
340
437
|
jsonl_files : list[Path]
|
|
341
438
|
List of JSONL files to process
|
|
439
|
+
json_data_map : dict[Path, tuple[dict[str, Any], ...]]
|
|
440
|
+
Pre-loaded JSON data for each file
|
|
342
441
|
template_base : Path
|
|
343
442
|
Path to template file or directory
|
|
344
443
|
template_data : str
|
|
@@ -360,9 +459,16 @@ def process_batch(
|
|
|
360
459
|
for i, jsonl_file in enumerate(jsonl_files, 1):
|
|
361
460
|
print_batch_header(i, len(jsonl_files), jsonl_file.name)
|
|
362
461
|
|
|
462
|
+
# Get pre-loaded data for this file
|
|
463
|
+
if jsonl_file not in json_data_map:
|
|
464
|
+
print(f"Error: No pre-loaded data for {jsonl_file}", file=sys.stderr)
|
|
465
|
+
all_verified = False
|
|
466
|
+
continue
|
|
467
|
+
|
|
468
|
+
file_data = json_data_map[jsonl_file]
|
|
469
|
+
|
|
363
470
|
# Determine target document for this file
|
|
364
471
|
try:
|
|
365
|
-
file_data = load_jsonl(jsonl_file)
|
|
366
472
|
recorded_docs = get_recorded_documents(file_data)
|
|
367
473
|
target_document = resolve_document(recorded_docs, template_base, args.document)
|
|
368
474
|
except (FileNotFoundError, ValueError, IOError) as e:
|
|
@@ -386,9 +492,9 @@ def process_batch(
|
|
|
386
492
|
else:
|
|
387
493
|
file_template_data = template_data
|
|
388
494
|
|
|
389
|
-
# Process the file
|
|
390
|
-
verified, reconstructed, suspicious_events, time_info, diff_text = process_single_file(
|
|
391
|
-
jsonl_file, file_template_data, target_document, args.time_limit
|
|
495
|
+
# Process the file with pre-loaded data
|
|
496
|
+
verified, reconstructed, suspicious_events, time_info, diff_text, doc_events, submitted_comparison = process_single_file(
|
|
497
|
+
jsonl_file, file_data, file_template_data, target_document, args.time_limit, args.submitted_file, args.submitted_dir
|
|
392
498
|
)
|
|
393
499
|
|
|
394
500
|
if not verified:
|
|
@@ -398,6 +504,7 @@ def process_batch(
|
|
|
398
504
|
display_time_info(time_info)
|
|
399
505
|
display_suspicious_events(suspicious_events, args.show_autocomplete_details)
|
|
400
506
|
display_template_diff(diff_text)
|
|
507
|
+
display_submitted_file_comparison(submitted_comparison)
|
|
401
508
|
|
|
402
509
|
# Store results
|
|
403
510
|
results.append({
|
|
@@ -408,6 +515,8 @@ def process_batch(
|
|
|
408
515
|
"suspicious_events": suspicious_events,
|
|
409
516
|
"time_info": time_info,
|
|
410
517
|
"template_diff": diff_text,
|
|
518
|
+
"doc_events": doc_events,
|
|
519
|
+
"submitted_comparison": submitted_comparison,
|
|
411
520
|
})
|
|
412
521
|
|
|
413
522
|
# Write output file if requested
|
|
@@ -421,6 +530,7 @@ def process_batch(
|
|
|
421
530
|
|
|
422
531
|
def process_single(
|
|
423
532
|
jsonl_file: Path,
|
|
533
|
+
json_data: tuple[dict[str, Any], ...],
|
|
424
534
|
template_base: Path | None,
|
|
425
535
|
template_data: str,
|
|
426
536
|
args: argparse.Namespace,
|
|
@@ -432,6 +542,8 @@ def process_single(
|
|
|
432
542
|
----------
|
|
433
543
|
jsonl_file : Path
|
|
434
544
|
Path to JSONL file
|
|
545
|
+
json_data : tuple[dict[str, Any], ...]
|
|
546
|
+
Pre-loaded JSON data for the file
|
|
435
547
|
template_base : Path
|
|
436
548
|
Path to template file or directory
|
|
437
549
|
template_data : str
|
|
@@ -445,8 +557,7 @@ def process_single(
|
|
|
445
557
|
(results, verified)
|
|
446
558
|
"""
|
|
447
559
|
try:
|
|
448
|
-
|
|
449
|
-
recorded_docs = get_recorded_documents(file_data)
|
|
560
|
+
recorded_docs = get_recorded_documents(json_data)
|
|
450
561
|
target_document = resolve_document(recorded_docs, template_base, args.document)
|
|
451
562
|
except (FileNotFoundError, ValueError, IOError) as e:
|
|
452
563
|
print(f"Error determining document: {e}", file=sys.stderr)
|
|
@@ -470,14 +581,15 @@ def process_single(
|
|
|
470
581
|
|
|
471
582
|
print(f"Processing: {target_document or template_base}", file=sys.stderr)
|
|
472
583
|
|
|
473
|
-
verified, reconstructed, suspicious_events, time_info, diff_text = process_single_file(
|
|
474
|
-
jsonl_file, file_template_data, target_document, args.time_limit
|
|
584
|
+
verified, reconstructed, suspicious_events, time_info, diff_text, doc_events, submitted_comparison = process_single_file(
|
|
585
|
+
jsonl_file, json_data, file_template_data, target_document, args.time_limit, args.submitted_file, args.submitted_dir
|
|
475
586
|
)
|
|
476
587
|
|
|
477
588
|
# Display results
|
|
478
589
|
display_time_info(time_info)
|
|
479
590
|
display_suspicious_events(suspicious_events, args.show_autocomplete_details)
|
|
480
591
|
display_template_diff(diff_text)
|
|
592
|
+
display_submitted_file_comparison(submitted_comparison)
|
|
481
593
|
|
|
482
594
|
# Write output file if requested
|
|
483
595
|
if reconstructed and args.output_file:
|
|
@@ -492,6 +604,8 @@ def process_single(
|
|
|
492
604
|
"suspicious_events": suspicious_events,
|
|
493
605
|
"time_info": time_info,
|
|
494
606
|
"template_diff": diff_text,
|
|
607
|
+
"doc_events": doc_events,
|
|
608
|
+
"submitted_comparison": submitted_comparison,
|
|
495
609
|
}]
|
|
496
610
|
|
|
497
611
|
return results, verified
|
|
@@ -526,6 +640,11 @@ def main() -> int:
|
|
|
526
640
|
parser.print_help()
|
|
527
641
|
return 1
|
|
528
642
|
|
|
643
|
+
# Validate that both --submitted-file and --submitted-dir are not provided simultaneously
|
|
644
|
+
if args.submitted_file and args.submitted_dir:
|
|
645
|
+
print("Error: Cannot specify both --submitted-file and --submitted-dir", file=sys.stderr)
|
|
646
|
+
return 1
|
|
647
|
+
|
|
529
648
|
# Expand file patterns and validate
|
|
530
649
|
try:
|
|
531
650
|
jsonl_files = expand_file_patterns(jsonl_patterns)
|
|
@@ -540,10 +659,23 @@ def main() -> int:
|
|
|
540
659
|
# Determine template source (use template_dir if provided, otherwise template_file)
|
|
541
660
|
template_path = args.template_dir if args.template_dir else template_file
|
|
542
661
|
|
|
662
|
+
# Load all files once - fail fast if any fail
|
|
663
|
+
json_data_map: dict[Path, tuple[dict[str, Any], ...]] = {}
|
|
664
|
+
for jsonl_file in jsonl_files:
|
|
665
|
+
try:
|
|
666
|
+
json_data_map[jsonl_file] = load_jsonl(jsonl_file)
|
|
667
|
+
except ValueError as e:
|
|
668
|
+
print(f"Error parsing {jsonl_file}: {e}", file=sys.stderr)
|
|
669
|
+
print("Tampering is likey - aborting processing.", file=sys.stderr)
|
|
670
|
+
return 1
|
|
671
|
+
except (FileNotFoundError, ValueError, IOError) as e:
|
|
672
|
+
print(f"Error loading {jsonl_file}: {e}", file=sys.stderr)
|
|
673
|
+
return 1
|
|
674
|
+
|
|
543
675
|
# Handle playback mode (single file only)
|
|
544
676
|
if not batch_mode and args.playback:
|
|
545
677
|
try:
|
|
546
|
-
json_data =
|
|
678
|
+
json_data = json_data_map[jsonl_files[0]]
|
|
547
679
|
recorded_docs = get_recorded_documents(json_data)
|
|
548
680
|
target_document = resolve_document(recorded_docs, template_path, args.document)
|
|
549
681
|
|
|
@@ -554,14 +686,11 @@ def main() -> int:
|
|
|
554
686
|
target_document
|
|
555
687
|
)
|
|
556
688
|
|
|
557
|
-
|
|
558
|
-
|
|
559
|
-
|
|
560
|
-
else:
|
|
561
|
-
print("Error: No documents found in recording", file=sys.stderr)
|
|
562
|
-
return 1
|
|
689
|
+
return handle_playback_mode(
|
|
690
|
+
jsonl_files[0], json_data, template_path, template_data, args.document, args.playback_speed
|
|
691
|
+
)
|
|
563
692
|
except Exception as e:
|
|
564
|
-
print(f"Error
|
|
693
|
+
print(f"Error in playback: {e}", file=sys.stderr)
|
|
565
694
|
return 1
|
|
566
695
|
|
|
567
696
|
# Get template data
|
|
@@ -580,14 +709,14 @@ def main() -> int:
|
|
|
580
709
|
print(f"Error: {e}", file=sys.stderr)
|
|
581
710
|
return 1
|
|
582
711
|
|
|
583
|
-
# Process files
|
|
712
|
+
# Process files with pre-loaded data
|
|
584
713
|
if batch_mode:
|
|
585
714
|
results, all_verified = process_batch(
|
|
586
|
-
jsonl_files, template_path, template_data, args
|
|
715
|
+
jsonl_files, json_data_map, template_path, template_data, args
|
|
587
716
|
)
|
|
588
717
|
else:
|
|
589
718
|
results, all_verified = process_single(
|
|
590
|
-
jsonl_files[0], template_path, template_data, args
|
|
719
|
+
jsonl_files[0], json_data_map[jsonl_files[0]], template_path, template_data, args
|
|
591
720
|
)
|
|
592
721
|
|
|
593
722
|
if not results:
|
|
@@ -600,10 +729,10 @@ def main() -> int:
|
|
|
600
729
|
print_batch_summary(len(results), verified_count, failed_files)
|
|
601
730
|
|
|
602
731
|
# Display combined time report
|
|
603
|
-
|
|
732
|
+
all_events = [r["doc_events"] for r in results]
|
|
604
733
|
combined_time = None
|
|
605
|
-
if any(
|
|
606
|
-
combined_time = combine_time_info(
|
|
734
|
+
if any(all_events):
|
|
735
|
+
combined_time = combine_time_info(all_events, args.time_limit)
|
|
607
736
|
display_time_info(combined_time, is_combined=True)
|
|
608
737
|
|
|
609
738
|
# Write JSON output
|
|
@@ -176,6 +176,39 @@ def display_template_diff(diff_text: str) -> None:
|
|
|
176
176
|
print(diff_text, file=sys.stderr)
|
|
177
177
|
|
|
178
178
|
|
|
179
|
+
def display_submitted_file_comparison(comparison: dict[str, Any] | None) -> None:
|
|
180
|
+
"""
|
|
181
|
+
Display comparison results between reconstructed code and submitted file.
|
|
182
|
+
|
|
183
|
+
Parameters
|
|
184
|
+
----------
|
|
185
|
+
comparison : dict[str, Any] | None
|
|
186
|
+
Comparison results from compare_submitted_file, or None if no comparison
|
|
187
|
+
"""
|
|
188
|
+
if not comparison:
|
|
189
|
+
return
|
|
190
|
+
|
|
191
|
+
print("\nSubmitted file comparison:", file=sys.stderr)
|
|
192
|
+
print(f" Submitted file: {comparison['submitted_file']}", file=sys.stderr)
|
|
193
|
+
|
|
194
|
+
if "error" in comparison:
|
|
195
|
+
print(f" Error: {comparison['error']}", file=sys.stderr)
|
|
196
|
+
return
|
|
197
|
+
|
|
198
|
+
if comparison["matches"]:
|
|
199
|
+
print(" ✓ Reconstructed code matches submitted file exactly", file=sys.stderr)
|
|
200
|
+
elif comparison.get("whitespace_only", False):
|
|
201
|
+
print(" ⚠ Reconstructed code differs only in whitespace from submitted file", file=sys.stderr)
|
|
202
|
+
else:
|
|
203
|
+
print(" ✗ Reconstructed code differs from submitted file", file=sys.stderr)
|
|
204
|
+
if comparison.get("diff"):
|
|
205
|
+
print("\n Diff (reconstructed → submitted):", file=sys.stderr)
|
|
206
|
+
# Indent each line of the diff
|
|
207
|
+
for line in comparison["diff"].split("\n"):
|
|
208
|
+
if line:
|
|
209
|
+
print(f" {line}", file=sys.stderr)
|
|
210
|
+
|
|
211
|
+
|
|
179
212
|
def print_separator() -> None:
|
|
180
213
|
"""Print a separator line."""
|
|
181
214
|
print(f"{'='*80}", file=sys.stderr)
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: cr_proc
|
|
3
|
-
Version: 0.1.
|
|
3
|
+
Version: 0.1.13
|
|
4
4
|
Summary: A tool for processing BYU CS code recording files.
|
|
5
5
|
Author: Ethan Dye
|
|
6
6
|
Author-email: mrtops03@gmail.com
|
|
@@ -79,6 +79,14 @@ When processing multiple files:
|
|
|
79
79
|
specified file instead of stdout. For single files only.
|
|
80
80
|
- `--output-dir OUTPUT_DIR`: (Optional) Directory to write reconstructed code
|
|
81
81
|
files in batch mode. Files are named based on input recording filenames.
|
|
82
|
+
- `--submitted-file SUBMITTED_FILE`: (Optional) Path to the submitted final file
|
|
83
|
+
to verify against the reconstructed output. If provided, the reconstructed code
|
|
84
|
+
will be compared to this file and differences will be reported.
|
|
85
|
+
- `--submitted-dir SUBMITTED_DIR`: (Optional) Directory containing submitted files
|
|
86
|
+
to verify against the reconstructed output. For each recording file, the
|
|
87
|
+
corresponding submitted file will be found by matching the filename
|
|
88
|
+
(e.g., `homework0-ISC.recording.jsonl.gz` will match `homework0-ISC.py`).
|
|
89
|
+
Cannot be used with `--submitted-file`.
|
|
82
90
|
- `-s, --show-autocomplete-details`: (Optional) Show individual auto-complete
|
|
83
91
|
events in addition to aggregate statistics.
|
|
84
92
|
- `-p, --playback`: (Optional) Play back the recording in real-time, showing
|
|
@@ -112,6 +120,18 @@ Save JSON results:
|
|
|
112
120
|
poetry run cr_proc student1.jsonl.gz student2.jsonl.gz template.py -o results/
|
|
113
121
|
```
|
|
114
122
|
|
|
123
|
+
Verify against a single submitted file:
|
|
124
|
+
|
|
125
|
+
```bash
|
|
126
|
+
poetry run cr_proc homework0.recording.jsonl.gz homework0.py --submitted-file submitted_homework0.py
|
|
127
|
+
```
|
|
128
|
+
|
|
129
|
+
Verify against submitted files in a directory (batch mode):
|
|
130
|
+
|
|
131
|
+
```bash
|
|
132
|
+
poetry run cr_proc recordings/*.jsonl.gz template.py --submitted-dir submissions/
|
|
133
|
+
```
|
|
134
|
+
|
|
115
135
|
This will process each recording independently and flag any that exceed 30
|
|
116
136
|
minutes.
|
|
117
137
|
|
|
@@ -0,0 +1,13 @@
|
|
|
1
|
+
code_recorder_processor/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
2
|
+
code_recorder_processor/api/build.py,sha256=XuF8Vx9mDdRqeaxCVgYAdn4NFJzkRt4Q839m15th0Fo,9908
|
|
3
|
+
code_recorder_processor/api/document.py,sha256=eIsGBCPq234cPiPN_ktBOoVrt0G1hHqbgvhXwMXQdnU,11343
|
|
4
|
+
code_recorder_processor/api/load.py,sha256=Br-USpFQJ6W8c5hjmCnunM3V0_MURKZp5Yyl1IJdahc,5514
|
|
5
|
+
code_recorder_processor/api/output.py,sha256=HLa6DtN4i-wW0-vyE7SzqLeq35nhSoj2yHc9RjTPcBc,2441
|
|
6
|
+
code_recorder_processor/api/verify.py,sha256=6D8Zs8NBziG1kNc6HgH59yQp4u1wa4zyCuNE0qLTlpk,36040
|
|
7
|
+
code_recorder_processor/cli.py,sha256=giFUu9KiB40k5Z2CwZK9CDL7dQOicBLCGBx0Uzki39o,26329
|
|
8
|
+
code_recorder_processor/display.py,sha256=He5loCMrm1S1186N2BgDy6bl0v__kiosJ_qDxpa4hbM,8657
|
|
9
|
+
code_recorder_processor/playback.py,sha256=6-OJtQOHKgfutxUNBMunWl-VVSIB0zUDENSl0EsPCh4,4008
|
|
10
|
+
cr_proc-0.1.13.dist-info/METADATA,sha256=-3ylZb1eSoxy2GY2egju_6WobzjoyLnGfEQikM7wMzA,9812
|
|
11
|
+
cr_proc-0.1.13.dist-info/WHEEL,sha256=kJCRJT_g0adfAJzTx2GUMmS80rTJIVHRCfG0DQgLq3o,88
|
|
12
|
+
cr_proc-0.1.13.dist-info/entry_points.txt,sha256=xb5dPAAWN1Z9NUHpvZgNakaslR1MVOERf_IfpG_M04M,77
|
|
13
|
+
cr_proc-0.1.13.dist-info/RECORD,,
|
cr_proc-0.1.11.dist-info/RECORD
DELETED
|
@@ -1,13 +0,0 @@
|
|
|
1
|
-
code_recorder_processor/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
2
|
-
code_recorder_processor/api/build.py,sha256=XuF8Vx9mDdRqeaxCVgYAdn4NFJzkRt4Q839m15th0Fo,9908
|
|
3
|
-
code_recorder_processor/api/document.py,sha256=DOQ0H1dQJtMs2P9E2qnKgg2iKQT9msgdE9oJXl36SnY,10622
|
|
4
|
-
code_recorder_processor/api/load.py,sha256=Br-USpFQJ6W8c5hjmCnunM3V0_MURKZp5Yyl1IJdahc,5514
|
|
5
|
-
code_recorder_processor/api/output.py,sha256=H2SC3pQ0C9V8YyN4yeA_KmvSoWXy_3T3TKWKhywIax4,2161
|
|
6
|
-
code_recorder_processor/api/verify.py,sha256=9GpeoFQIiTzZd-DNSyN5OUM6YB5iMslO85oAjc0yoSU,34073
|
|
7
|
-
code_recorder_processor/cli.py,sha256=ardcM3bLNhf6abOQ1Aj746x4hp8gerdklfDwszLlYKc,20504
|
|
8
|
-
code_recorder_processor/display.py,sha256=IVTNFB3Vjzpc5ZHceAFQI2-o-N6bvjYmotLDaEy0KoU,7368
|
|
9
|
-
code_recorder_processor/playback.py,sha256=6-OJtQOHKgfutxUNBMunWl-VVSIB0zUDENSl0EsPCh4,4008
|
|
10
|
-
cr_proc-0.1.11.dist-info/METADATA,sha256=wZuAW9ghrjT2fCbiI9bJSy5TPLc4YD6OpYb0mTlyOL4,8926
|
|
11
|
-
cr_proc-0.1.11.dist-info/WHEEL,sha256=3ny-bZhpXrU6vSQ1UPG34FoxZBp3lVcvK0LkgUz6VLk,88
|
|
12
|
-
cr_proc-0.1.11.dist-info/entry_points.txt,sha256=xb5dPAAWN1Z9NUHpvZgNakaslR1MVOERf_IfpG_M04M,77
|
|
13
|
-
cr_proc-0.1.11.dist-info/RECORD,,
|
|
File without changes
|