cr-proc 0.1.11__py3-none-any.whl → 0.1.12__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- code_recorder_processor/api/document.py +33 -9
- code_recorder_processor/api/output.py +11 -3
- code_recorder_processor/api/verify.py +93 -34
- code_recorder_processor/cli.py +124 -17
- code_recorder_processor/display.py +33 -0
- {cr_proc-0.1.11.dist-info → cr_proc-0.1.12.dist-info}/METADATA +21 -1
- cr_proc-0.1.12.dist-info/RECORD +13 -0
- cr_proc-0.1.11.dist-info/RECORD +0 -13
- {cr_proc-0.1.11.dist-info → cr_proc-0.1.12.dist-info}/WHEEL +0 -0
- {cr_proc-0.1.11.dist-info → cr_proc-0.1.12.dist-info}/entry_points.txt +0 -0
|
@@ -5,9 +5,38 @@ from pathlib import Path, PureWindowsPath, PurePosixPath
|
|
|
5
5
|
from typing import Any
|
|
6
6
|
|
|
7
7
|
|
|
8
|
+
def normalize_path_string(path_str: str) -> str:
|
|
9
|
+
"""
|
|
10
|
+
Normalize a path string to use forward slashes (POSIX style).
|
|
11
|
+
|
|
12
|
+
Handles both Windows-style (backslash) and Unix-style (forward slash) paths
|
|
13
|
+
regardless of the current platform. Useful for cross-platform consistency
|
|
14
|
+
when files are created on Windows but processed on other systems.
|
|
15
|
+
|
|
16
|
+
Parameters
|
|
17
|
+
----------
|
|
18
|
+
path_str : str
|
|
19
|
+
Path string (may use Windows or Unix separators)
|
|
20
|
+
|
|
21
|
+
Returns
|
|
22
|
+
-------
|
|
23
|
+
str
|
|
24
|
+
Normalized path string using forward slashes
|
|
25
|
+
"""
|
|
26
|
+
# Try to detect if this is a Windows path (contains backslashes)
|
|
27
|
+
if "\\" in path_str:
|
|
28
|
+
# Windows-style path
|
|
29
|
+
path_obj = PureWindowsPath(path_str)
|
|
30
|
+
else:
|
|
31
|
+
# Unix-style path (or just a filename)
|
|
32
|
+
path_obj = PurePosixPath(path_str)
|
|
33
|
+
|
|
34
|
+
return path_obj.as_posix()
|
|
35
|
+
|
|
36
|
+
|
|
8
37
|
def _normalize_document_path(doc_path: str) -> tuple[str, str]:
|
|
9
38
|
"""
|
|
10
|
-
|
|
39
|
+
Extract filename and stem from a document path.
|
|
11
40
|
|
|
12
41
|
Handles both Windows-style (backslash) and Unix-style (forward slash) paths
|
|
13
42
|
regardless of the current platform.
|
|
@@ -22,14 +51,9 @@ def _normalize_document_path(doc_path: str) -> tuple[str, str]:
|
|
|
22
51
|
tuple[str, str]
|
|
23
52
|
(filename, stem) extracted from the path
|
|
24
53
|
"""
|
|
25
|
-
#
|
|
26
|
-
|
|
27
|
-
|
|
28
|
-
path_obj = PureWindowsPath(doc_path)
|
|
29
|
-
else:
|
|
30
|
-
# Unix-style path (or just a filename)
|
|
31
|
-
path_obj = PurePosixPath(doc_path)
|
|
32
|
-
|
|
54
|
+
# Normalize to forward slashes first, then parse
|
|
55
|
+
normalized = normalize_path_string(doc_path)
|
|
56
|
+
path_obj = PurePosixPath(normalized)
|
|
33
57
|
return path_obj.name, path_obj.stem
|
|
34
58
|
|
|
35
59
|
|
|
@@ -4,6 +4,8 @@ import sys
|
|
|
4
4
|
from pathlib import Path
|
|
5
5
|
from typing import Any
|
|
6
6
|
|
|
7
|
+
from .document import normalize_path_string
|
|
8
|
+
|
|
7
9
|
|
|
8
10
|
def write_batch_json_output(
|
|
9
11
|
output_path: Path,
|
|
@@ -36,15 +38,21 @@ def write_batch_json_output(
|
|
|
36
38
|
# Convert results to JSON-serializable format
|
|
37
39
|
files_data = []
|
|
38
40
|
for r in results:
|
|
39
|
-
|
|
40
|
-
"jsonl_file": str(r["jsonl_file"]),
|
|
41
|
+
file_result = {
|
|
42
|
+
"jsonl_file": normalize_path_string(str(r["jsonl_file"])),
|
|
41
43
|
"document": r["target_document"],
|
|
42
44
|
"verified": r["verified"],
|
|
43
45
|
"time_info": r["time_info"],
|
|
44
46
|
"suspicious_events": r["suspicious_events"],
|
|
45
47
|
"template_diff": r.get("template_diff", ""),
|
|
46
48
|
"reconstructed_code": r["reconstructed"],
|
|
47
|
-
}
|
|
49
|
+
}
|
|
50
|
+
|
|
51
|
+
# Add submitted_comparison if present
|
|
52
|
+
if r.get("submitted_comparison") is not None:
|
|
53
|
+
file_result["submitted_comparison"] = r["submitted_comparison"]
|
|
54
|
+
|
|
55
|
+
files_data.append(file_result)
|
|
48
56
|
|
|
49
57
|
# Use consistent format for both single and batch modes
|
|
50
58
|
output_data = {
|
|
@@ -1,6 +1,7 @@
|
|
|
1
1
|
from typing import Any
|
|
2
2
|
from datetime import datetime
|
|
3
3
|
import difflib
|
|
4
|
+
from .document import normalize_path_string
|
|
4
5
|
|
|
5
6
|
# ============================================================================
|
|
6
7
|
# Constants for detection thresholds
|
|
@@ -837,15 +838,19 @@ def verify(template: str, jsonData: tuple[dict[str, Any], ...]) -> tuple[str, li
|
|
|
837
838
|
|
|
838
839
|
|
|
839
840
|
def combine_time_info(
|
|
840
|
-
|
|
841
|
+
all_events: list[tuple[dict[str, Any], ...]], time_limit_minutes: int | None
|
|
841
842
|
) -> dict[str, Any] | None:
|
|
842
843
|
"""
|
|
843
|
-
Combine time information from multiple recording files.
|
|
844
|
+
Combine time information from multiple recording files, avoiding double-counting overlapping time.
|
|
845
|
+
|
|
846
|
+
Merges all events from multiple recordings, then calculates the actual time spent editing
|
|
847
|
+
using the same logic as check_time_limit (gap analysis with focus awareness). This ensures
|
|
848
|
+
overlapping editing sessions are not double-counted.
|
|
844
849
|
|
|
845
850
|
Parameters
|
|
846
851
|
----------
|
|
847
|
-
|
|
848
|
-
List of
|
|
852
|
+
all_events : list[tuple[dict[str, Any], ...]]
|
|
853
|
+
List of event tuples from multiple recording files
|
|
849
854
|
time_limit_minutes : int | None
|
|
850
855
|
Time limit to check against
|
|
851
856
|
|
|
@@ -854,40 +859,94 @@ def combine_time_info(
|
|
|
854
859
|
dict[str, Any] | None
|
|
855
860
|
Combined time information, or None if no valid data
|
|
856
861
|
"""
|
|
857
|
-
|
|
858
|
-
if
|
|
862
|
+
# Filter out empty event sets
|
|
863
|
+
valid_event_sets = [events for events in all_events if events]
|
|
864
|
+
if not valid_event_sets:
|
|
859
865
|
return None
|
|
860
866
|
|
|
861
|
-
#
|
|
862
|
-
|
|
867
|
+
# Merge all events from all recordings into a single tuple
|
|
868
|
+
merged_events = tuple(
|
|
869
|
+
event
|
|
870
|
+
for event_set in valid_event_sets
|
|
871
|
+
for event in event_set
|
|
872
|
+
)
|
|
863
873
|
|
|
864
|
-
#
|
|
865
|
-
|
|
866
|
-
|
|
867
|
-
|
|
868
|
-
datetime.fromisoformat(info["first_timestamp"].replace("Z", "+00:00"))
|
|
869
|
-
)
|
|
870
|
-
all_timestamps.append(
|
|
871
|
-
datetime.fromisoformat(info["last_timestamp"].replace("Z", "+00:00"))
|
|
872
|
-
)
|
|
874
|
+
# Use check_time_limit on the merged events to calculate time properly
|
|
875
|
+
# This handles overlapping periods automatically since we're now analyzing
|
|
876
|
+
# all events together chronologically
|
|
877
|
+
combined_result = check_time_limit(merged_events, time_limit_minutes)
|
|
873
878
|
|
|
874
|
-
|
|
875
|
-
|
|
876
|
-
overall_span = (last_ts - first_ts).total_seconds() / 60
|
|
879
|
+
if combined_result is None:
|
|
880
|
+
return None
|
|
877
881
|
|
|
878
|
-
|
|
879
|
-
|
|
880
|
-
"minutes_elapsed": round(total_elapsed, 2),
|
|
881
|
-
"first_timestamp": first_ts.isoformat().replace("+00:00", "Z"),
|
|
882
|
-
"last_timestamp": last_ts.isoformat().replace("+00:00", "Z"),
|
|
883
|
-
"file_count": len(valid_infos),
|
|
884
|
-
"overall_span_minutes": round(overall_span, 2),
|
|
885
|
-
}
|
|
882
|
+
# Add file_count to the result
|
|
883
|
+
combined_result["file_count"] = len(valid_event_sets)
|
|
886
884
|
|
|
887
|
-
|
|
888
|
-
if time_limit_minutes is not None:
|
|
889
|
-
result["exceeds_limit"] = total_elapsed > time_limit_minutes
|
|
890
|
-
else:
|
|
891
|
-
result["exceeds_limit"] = False
|
|
885
|
+
return combined_result
|
|
892
886
|
|
|
893
|
-
|
|
887
|
+
|
|
888
|
+
def compare_submitted_file(reconstructed_code: str, submitted_file_path) -> dict[str, Any]:
|
|
889
|
+
"""
|
|
890
|
+
Compare reconstructed code from recording with a submitted final file.
|
|
891
|
+
|
|
892
|
+
Parameters
|
|
893
|
+
----------
|
|
894
|
+
reconstructed_code : str
|
|
895
|
+
The code reconstructed from the recording
|
|
896
|
+
submitted_file_path : Path
|
|
897
|
+
Path to the submitted file
|
|
898
|
+
|
|
899
|
+
Returns
|
|
900
|
+
-------
|
|
901
|
+
dict[str, Any]
|
|
902
|
+
Dictionary containing:
|
|
903
|
+
- matches: bool indicating if the files match
|
|
904
|
+
- submitted_file: path to the submitted file
|
|
905
|
+
- diff: unified diff string if files don't match
|
|
906
|
+
- whitespace_only: bool indicating if only whitespace differs
|
|
907
|
+
"""
|
|
908
|
+
try:
|
|
909
|
+
submitted_content = submitted_file_path.read_text()
|
|
910
|
+
except Exception as e:
|
|
911
|
+
return {
|
|
912
|
+
"matches": False,
|
|
913
|
+
"submitted_file": normalize_path_string(str(submitted_file_path)),
|
|
914
|
+
"error": f"Failed to read submitted file: {e}",
|
|
915
|
+
"diff": "",
|
|
916
|
+
"whitespace_only": False,
|
|
917
|
+
}
|
|
918
|
+
|
|
919
|
+
# Normalize newlines for comparison
|
|
920
|
+
reconstructed_normalized = _normalize_newlines(reconstructed_code)
|
|
921
|
+
submitted_normalized = _normalize_newlines(submitted_content)
|
|
922
|
+
|
|
923
|
+
# Check exact match
|
|
924
|
+
matches = reconstructed_normalized == submitted_normalized
|
|
925
|
+
|
|
926
|
+
# Check if only whitespace differs
|
|
927
|
+
whitespace_only = False
|
|
928
|
+
if not matches:
|
|
929
|
+
whitespace_only = is_only_whitespace_differences(
|
|
930
|
+
submitted_normalized, reconstructed_normalized
|
|
931
|
+
)
|
|
932
|
+
|
|
933
|
+
# Generate diff if they don't match
|
|
934
|
+
diff_text = ""
|
|
935
|
+
if not matches:
|
|
936
|
+
reconstructed_lines = reconstructed_normalized.splitlines(keepends=True)
|
|
937
|
+
submitted_lines = submitted_normalized.splitlines(keepends=True)
|
|
938
|
+
diff = difflib.unified_diff(
|
|
939
|
+
reconstructed_lines,
|
|
940
|
+
submitted_lines,
|
|
941
|
+
fromfile="reconstructed",
|
|
942
|
+
tofile="submitted",
|
|
943
|
+
lineterm="",
|
|
944
|
+
)
|
|
945
|
+
diff_text = "".join(diff)
|
|
946
|
+
|
|
947
|
+
return {
|
|
948
|
+
"matches": matches,
|
|
949
|
+
"submitted_file": normalize_path_string(str(submitted_file_path)),
|
|
950
|
+
"diff": diff_text,
|
|
951
|
+
"whitespace_only": whitespace_only,
|
|
952
|
+
}
|
code_recorder_processor/cli.py
CHANGED
|
@@ -18,11 +18,13 @@ from .api.output import write_batch_json_output
|
|
|
18
18
|
from .api.verify import (
|
|
19
19
|
check_time_limit,
|
|
20
20
|
combine_time_info,
|
|
21
|
+
compare_submitted_file,
|
|
21
22
|
detect_external_copypaste,
|
|
22
23
|
template_diff,
|
|
23
24
|
verify,
|
|
24
25
|
)
|
|
25
26
|
from .display import (
|
|
27
|
+
display_submitted_file_comparison,
|
|
26
28
|
display_suspicious_events,
|
|
27
29
|
display_template_diff,
|
|
28
30
|
display_time_info,
|
|
@@ -102,6 +104,21 @@ def create_parser() -> argparse.ArgumentParser:
|
|
|
102
104
|
help="Directory to write reconstructed code files in batch mode (one file per recording). "
|
|
103
105
|
"Files are named based on input recording filenames.",
|
|
104
106
|
)
|
|
107
|
+
parser.add_argument(
|
|
108
|
+
"--submitted-file",
|
|
109
|
+
type=Path,
|
|
110
|
+
default=None,
|
|
111
|
+
help="Path to the submitted final file to verify against the reconstructed output. "
|
|
112
|
+
"If provided, the reconstructed code will be compared to this file.",
|
|
113
|
+
)
|
|
114
|
+
parser.add_argument(
|
|
115
|
+
"--submitted-dir",
|
|
116
|
+
type=Path,
|
|
117
|
+
default=None,
|
|
118
|
+
help="Directory containing submitted files to compare against. "
|
|
119
|
+
"For each recording, the corresponding submitted file will be found by matching the filename. "
|
|
120
|
+
"For example, 'homework0-ISC.recording.jsonl.gz' will match 'homework0-ISC.py' in the directory.",
|
|
121
|
+
)
|
|
105
122
|
parser.add_argument(
|
|
106
123
|
"-s",
|
|
107
124
|
"--show-autocomplete-details",
|
|
@@ -169,12 +186,55 @@ def expand_file_patterns(patterns: list[str]) -> list[Path]:
|
|
|
169
186
|
return existing_files
|
|
170
187
|
|
|
171
188
|
|
|
189
|
+
def find_submitted_file(
|
|
190
|
+
jsonl_file: Path,
|
|
191
|
+
submitted_dir: Path,
|
|
192
|
+
target_document: str | None,
|
|
193
|
+
) -> Path | None:
|
|
194
|
+
"""
|
|
195
|
+
Find the submitted file corresponding to a recording file.
|
|
196
|
+
|
|
197
|
+
Matches by replacing '.recording.jsonl.gz' with the extension of the
|
|
198
|
+
target document (or '.py' if not specified).
|
|
199
|
+
|
|
200
|
+
Parameters
|
|
201
|
+
----------
|
|
202
|
+
jsonl_file : Path
|
|
203
|
+
Path to the JSONL recording file
|
|
204
|
+
submitted_dir : Path
|
|
205
|
+
Directory containing submitted files
|
|
206
|
+
target_document : str | None
|
|
207
|
+
Target document path (to extract extension)
|
|
208
|
+
|
|
209
|
+
Returns
|
|
210
|
+
-------
|
|
211
|
+
Path | None
|
|
212
|
+
Path to the submitted file if found, None otherwise
|
|
213
|
+
"""
|
|
214
|
+
# Determine the file extension from target_document or default to .py
|
|
215
|
+
extension = ".py"
|
|
216
|
+
if target_document:
|
|
217
|
+
extension = Path(target_document).suffix or ".py"
|
|
218
|
+
|
|
219
|
+
# Remove '.recording.jsonl.gz' and add the appropriate extension
|
|
220
|
+
base_name = jsonl_file.name.replace(".recording.jsonl.gz", "")
|
|
221
|
+
submitted_filename = base_name + extension
|
|
222
|
+
|
|
223
|
+
submitted_file = submitted_dir / submitted_filename
|
|
224
|
+
if submitted_file.exists():
|
|
225
|
+
return submitted_file
|
|
226
|
+
|
|
227
|
+
return None
|
|
228
|
+
|
|
229
|
+
|
|
172
230
|
def process_single_file(
|
|
173
231
|
jsonl_path: Path,
|
|
174
232
|
template_data: str,
|
|
175
233
|
target_document: str | None,
|
|
176
234
|
time_limit: int | None,
|
|
177
|
-
|
|
235
|
+
submitted_file: Path | None = None,
|
|
236
|
+
submitted_dir: Path | None = None,
|
|
237
|
+
) -> tuple[bool, str, list[dict[str, Any]], dict[str, Any] | None, str, tuple[dict[str, Any], ...], dict[str, Any] | None]:
|
|
178
238
|
"""
|
|
179
239
|
Process a single JSONL recording file.
|
|
180
240
|
|
|
@@ -188,17 +248,21 @@ def process_single_file(
|
|
|
188
248
|
Document to process
|
|
189
249
|
time_limit : int | None
|
|
190
250
|
Time limit in minutes
|
|
251
|
+
submitted_file : Path | None
|
|
252
|
+
Path to the submitted file to compare against
|
|
253
|
+
submitted_dir : Path | None
|
|
254
|
+
Directory containing submitted files to compare against
|
|
191
255
|
|
|
192
256
|
Returns
|
|
193
257
|
-------
|
|
194
258
|
tuple
|
|
195
|
-
(verified, reconstructed_code, suspicious_events, time_info, template_diff_text)
|
|
259
|
+
(verified, reconstructed_code, suspicious_events, time_info, template_diff_text, doc_events, submitted_comparison)
|
|
196
260
|
"""
|
|
197
261
|
try:
|
|
198
262
|
json_data = load_jsonl(jsonl_path)
|
|
199
263
|
except (FileNotFoundError, ValueError, IOError) as e:
|
|
200
264
|
print(f"Error loading {jsonl_path}: {e}", file=sys.stderr)
|
|
201
|
-
return False, "", [], None, ""
|
|
265
|
+
return False, "", [], None, "", (), None
|
|
202
266
|
|
|
203
267
|
# Filter events for target document
|
|
204
268
|
doc_events = filter_events_by_document(json_data, target_document)
|
|
@@ -207,7 +271,7 @@ def process_single_file(
|
|
|
207
271
|
f"Warning: No events found for document '{target_document}' in {jsonl_path}",
|
|
208
272
|
file=sys.stderr,
|
|
209
273
|
)
|
|
210
|
-
return False, "", [], None, ""
|
|
274
|
+
return False, "", [], None, "", (), None
|
|
211
275
|
|
|
212
276
|
# Check time information
|
|
213
277
|
time_info = check_time_limit(doc_events, time_limit)
|
|
@@ -218,13 +282,29 @@ def process_single_file(
|
|
|
218
282
|
reconstructed = reconstruct_file_from_events(
|
|
219
283
|
doc_events, verified_template, document_path=target_document
|
|
220
284
|
)
|
|
221
|
-
|
|
285
|
+
|
|
286
|
+
# Compare with submitted file if provided
|
|
287
|
+
submitted_comparison = None
|
|
288
|
+
actual_submitted_file = submitted_file
|
|
289
|
+
|
|
290
|
+
# If submitted_dir is provided, find the matching file
|
|
291
|
+
if submitted_dir and not submitted_file:
|
|
292
|
+
actual_submitted_file = find_submitted_file(jsonl_path, submitted_dir, target_document)
|
|
293
|
+
if actual_submitted_file:
|
|
294
|
+
print(f"Found submitted file: {actual_submitted_file.name}", file=sys.stderr)
|
|
295
|
+
|
|
296
|
+
if actual_submitted_file and actual_submitted_file.exists():
|
|
297
|
+
submitted_comparison = compare_submitted_file(reconstructed, actual_submitted_file)
|
|
298
|
+
elif actual_submitted_file:
|
|
299
|
+
print(f"Warning: Submitted file not found: {actual_submitted_file}", file=sys.stderr)
|
|
300
|
+
|
|
301
|
+
return True, reconstructed, suspicious_events, time_info, "", doc_events, submitted_comparison
|
|
222
302
|
except ValueError as e:
|
|
223
303
|
# If verification fails but we have events, still try to reconstruct
|
|
224
304
|
print(f"Warning: Verification failed for {jsonl_path}: {e}", file=sys.stderr)
|
|
225
305
|
try:
|
|
226
306
|
if not doc_events:
|
|
227
|
-
return False, "", [], time_info, ""
|
|
307
|
+
return False, "", [], time_info, "", (), None
|
|
228
308
|
|
|
229
309
|
# Compute diff against template and still detect suspicious events
|
|
230
310
|
diff_text = template_diff(template_data, doc_events)
|
|
@@ -235,19 +315,35 @@ def process_single_file(
|
|
|
235
315
|
reconstructed = reconstruct_file_from_events(
|
|
236
316
|
doc_events, initial_state, document_path=target_document
|
|
237
317
|
)
|
|
238
|
-
|
|
318
|
+
|
|
319
|
+
# Compare with submitted file if provided
|
|
320
|
+
submitted_comparison = None
|
|
321
|
+
actual_submitted_file = submitted_file
|
|
322
|
+
|
|
323
|
+
# If submitted_dir is provided, find the matching file
|
|
324
|
+
if submitted_dir and not submitted_file:
|
|
325
|
+
actual_submitted_file = find_submitted_file(jsonl_path, submitted_dir, target_document)
|
|
326
|
+
if actual_submitted_file:
|
|
327
|
+
print(f"Found submitted file: {actual_submitted_file.name}", file=sys.stderr)
|
|
328
|
+
|
|
329
|
+
if actual_submitted_file and actual_submitted_file.exists():
|
|
330
|
+
submitted_comparison = compare_submitted_file(reconstructed, actual_submitted_file)
|
|
331
|
+
elif actual_submitted_file:
|
|
332
|
+
print(f"Warning: Submitted file not found: {actual_submitted_file}", file=sys.stderr)
|
|
333
|
+
|
|
334
|
+
return False, reconstructed, suspicious_events, time_info, diff_text, doc_events, submitted_comparison
|
|
239
335
|
except Exception as reconstruction_error:
|
|
240
336
|
print(
|
|
241
337
|
f"Error reconstructing {jsonl_path}: {type(reconstruction_error).__name__}: {reconstruction_error}",
|
|
242
338
|
file=sys.stderr,
|
|
243
339
|
)
|
|
244
|
-
return False, "", [], time_info, ""
|
|
340
|
+
return False, "", [], time_info, "", (), None
|
|
245
341
|
except Exception as e:
|
|
246
342
|
print(
|
|
247
343
|
f"Error processing {jsonl_path}: {type(e).__name__}: {e}",
|
|
248
344
|
file=sys.stderr,
|
|
249
345
|
)
|
|
250
|
-
return False, "", [], time_info, ""
|
|
346
|
+
return False, "", [], time_info, "", (), None
|
|
251
347
|
|
|
252
348
|
|
|
253
349
|
def write_reconstructed_file(
|
|
@@ -274,7 +370,7 @@ def write_reconstructed_file(
|
|
|
274
370
|
"""
|
|
275
371
|
try:
|
|
276
372
|
output_path.parent.mkdir(parents=True, exist_ok=True)
|
|
277
|
-
output_path.write_text(content)
|
|
373
|
+
output_path.write_text(content + '\n')
|
|
278
374
|
print(f"{file_description} written to: {output_path}", file=sys.stderr)
|
|
279
375
|
return True
|
|
280
376
|
except Exception as e:
|
|
@@ -387,8 +483,8 @@ def process_batch(
|
|
|
387
483
|
file_template_data = template_data
|
|
388
484
|
|
|
389
485
|
# Process the file
|
|
390
|
-
verified, reconstructed, suspicious_events, time_info, diff_text = process_single_file(
|
|
391
|
-
jsonl_file, file_template_data, target_document, args.time_limit
|
|
486
|
+
verified, reconstructed, suspicious_events, time_info, diff_text, doc_events, submitted_comparison = process_single_file(
|
|
487
|
+
jsonl_file, file_template_data, target_document, args.time_limit, args.submitted_file, args.submitted_dir
|
|
392
488
|
)
|
|
393
489
|
|
|
394
490
|
if not verified:
|
|
@@ -398,6 +494,7 @@ def process_batch(
|
|
|
398
494
|
display_time_info(time_info)
|
|
399
495
|
display_suspicious_events(suspicious_events, args.show_autocomplete_details)
|
|
400
496
|
display_template_diff(diff_text)
|
|
497
|
+
display_submitted_file_comparison(submitted_comparison)
|
|
401
498
|
|
|
402
499
|
# Store results
|
|
403
500
|
results.append({
|
|
@@ -408,6 +505,8 @@ def process_batch(
|
|
|
408
505
|
"suspicious_events": suspicious_events,
|
|
409
506
|
"time_info": time_info,
|
|
410
507
|
"template_diff": diff_text,
|
|
508
|
+
"doc_events": doc_events,
|
|
509
|
+
"submitted_comparison": submitted_comparison,
|
|
411
510
|
})
|
|
412
511
|
|
|
413
512
|
# Write output file if requested
|
|
@@ -470,14 +569,15 @@ def process_single(
|
|
|
470
569
|
|
|
471
570
|
print(f"Processing: {target_document or template_base}", file=sys.stderr)
|
|
472
571
|
|
|
473
|
-
verified, reconstructed, suspicious_events, time_info, diff_text = process_single_file(
|
|
474
|
-
jsonl_file, file_template_data, target_document, args.time_limit
|
|
572
|
+
verified, reconstructed, suspicious_events, time_info, diff_text, doc_events, submitted_comparison = process_single_file(
|
|
573
|
+
jsonl_file, file_template_data, target_document, args.time_limit, args.submitted_file, args.submitted_dir
|
|
475
574
|
)
|
|
476
575
|
|
|
477
576
|
# Display results
|
|
478
577
|
display_time_info(time_info)
|
|
479
578
|
display_suspicious_events(suspicious_events, args.show_autocomplete_details)
|
|
480
579
|
display_template_diff(diff_text)
|
|
580
|
+
display_submitted_file_comparison(submitted_comparison)
|
|
481
581
|
|
|
482
582
|
# Write output file if requested
|
|
483
583
|
if reconstructed and args.output_file:
|
|
@@ -492,6 +592,8 @@ def process_single(
|
|
|
492
592
|
"suspicious_events": suspicious_events,
|
|
493
593
|
"time_info": time_info,
|
|
494
594
|
"template_diff": diff_text,
|
|
595
|
+
"doc_events": doc_events,
|
|
596
|
+
"submitted_comparison": submitted_comparison,
|
|
495
597
|
}]
|
|
496
598
|
|
|
497
599
|
return results, verified
|
|
@@ -526,6 +628,11 @@ def main() -> int:
|
|
|
526
628
|
parser.print_help()
|
|
527
629
|
return 1
|
|
528
630
|
|
|
631
|
+
# Validate that both --submitted-file and --submitted-dir are not provided simultaneously
|
|
632
|
+
if args.submitted_file and args.submitted_dir:
|
|
633
|
+
print("Error: Cannot specify both --submitted-file and --submitted-dir", file=sys.stderr)
|
|
634
|
+
return 1
|
|
635
|
+
|
|
529
636
|
# Expand file patterns and validate
|
|
530
637
|
try:
|
|
531
638
|
jsonl_files = expand_file_patterns(jsonl_patterns)
|
|
@@ -600,10 +707,10 @@ def main() -> int:
|
|
|
600
707
|
print_batch_summary(len(results), verified_count, failed_files)
|
|
601
708
|
|
|
602
709
|
# Display combined time report
|
|
603
|
-
|
|
710
|
+
all_events = [r["doc_events"] for r in results]
|
|
604
711
|
combined_time = None
|
|
605
|
-
if any(
|
|
606
|
-
combined_time = combine_time_info(
|
|
712
|
+
if any(all_events):
|
|
713
|
+
combined_time = combine_time_info(all_events, args.time_limit)
|
|
607
714
|
display_time_info(combined_time, is_combined=True)
|
|
608
715
|
|
|
609
716
|
# Write JSON output
|
|
@@ -176,6 +176,39 @@ def display_template_diff(diff_text: str) -> None:
|
|
|
176
176
|
print(diff_text, file=sys.stderr)
|
|
177
177
|
|
|
178
178
|
|
|
179
|
+
def display_submitted_file_comparison(comparison: dict[str, Any] | None) -> None:
|
|
180
|
+
"""
|
|
181
|
+
Display comparison results between reconstructed code and submitted file.
|
|
182
|
+
|
|
183
|
+
Parameters
|
|
184
|
+
----------
|
|
185
|
+
comparison : dict[str, Any] | None
|
|
186
|
+
Comparison results from compare_submitted_file, or None if no comparison
|
|
187
|
+
"""
|
|
188
|
+
if not comparison:
|
|
189
|
+
return
|
|
190
|
+
|
|
191
|
+
print("\nSubmitted file comparison:", file=sys.stderr)
|
|
192
|
+
print(f" Submitted file: {comparison['submitted_file']}", file=sys.stderr)
|
|
193
|
+
|
|
194
|
+
if "error" in comparison:
|
|
195
|
+
print(f" Error: {comparison['error']}", file=sys.stderr)
|
|
196
|
+
return
|
|
197
|
+
|
|
198
|
+
if comparison["matches"]:
|
|
199
|
+
print(" ✓ Reconstructed code matches submitted file exactly", file=sys.stderr)
|
|
200
|
+
elif comparison.get("whitespace_only", False):
|
|
201
|
+
print(" ⚠ Reconstructed code differs only in whitespace from submitted file", file=sys.stderr)
|
|
202
|
+
else:
|
|
203
|
+
print(" ✗ Reconstructed code differs from submitted file", file=sys.stderr)
|
|
204
|
+
if comparison.get("diff"):
|
|
205
|
+
print("\n Diff (reconstructed → submitted):", file=sys.stderr)
|
|
206
|
+
# Indent each line of the diff
|
|
207
|
+
for line in comparison["diff"].split("\n"):
|
|
208
|
+
if line:
|
|
209
|
+
print(f" {line}", file=sys.stderr)
|
|
210
|
+
|
|
211
|
+
|
|
179
212
|
def print_separator() -> None:
|
|
180
213
|
"""Print a separator line."""
|
|
181
214
|
print(f"{'='*80}", file=sys.stderr)
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: cr_proc
|
|
3
|
-
Version: 0.1.
|
|
3
|
+
Version: 0.1.12
|
|
4
4
|
Summary: A tool for processing BYU CS code recording files.
|
|
5
5
|
Author: Ethan Dye
|
|
6
6
|
Author-email: mrtops03@gmail.com
|
|
@@ -79,6 +79,14 @@ When processing multiple files:
|
|
|
79
79
|
specified file instead of stdout. For single files only.
|
|
80
80
|
- `--output-dir OUTPUT_DIR`: (Optional) Directory to write reconstructed code
|
|
81
81
|
files in batch mode. Files are named based on input recording filenames.
|
|
82
|
+
- `--submitted-file SUBMITTED_FILE`: (Optional) Path to the submitted final file
|
|
83
|
+
to verify against the reconstructed output. If provided, the reconstructed code
|
|
84
|
+
will be compared to this file and differences will be reported.
|
|
85
|
+
- `--submitted-dir SUBMITTED_DIR`: (Optional) Directory containing submitted files
|
|
86
|
+
to verify against the reconstructed output. For each recording file, the
|
|
87
|
+
corresponding submitted file will be found by matching the filename
|
|
88
|
+
(e.g., `homework0-ISC.recording.jsonl.gz` will match `homework0-ISC.py`).
|
|
89
|
+
Cannot be used with `--submitted-file`.
|
|
82
90
|
- `-s, --show-autocomplete-details`: (Optional) Show individual auto-complete
|
|
83
91
|
events in addition to aggregate statistics.
|
|
84
92
|
- `-p, --playback`: (Optional) Play back the recording in real-time, showing
|
|
@@ -112,6 +120,18 @@ Save JSON results:
|
|
|
112
120
|
poetry run cr_proc student1.jsonl.gz student2.jsonl.gz template.py -o results/
|
|
113
121
|
```
|
|
114
122
|
|
|
123
|
+
Verify against a single submitted file:
|
|
124
|
+
|
|
125
|
+
```bash
|
|
126
|
+
poetry run cr_proc homework0.recording.jsonl.gz homework0.py --submitted-file submitted_homework0.py
|
|
127
|
+
```
|
|
128
|
+
|
|
129
|
+
Verify against submitted files in a directory (batch mode):
|
|
130
|
+
|
|
131
|
+
```bash
|
|
132
|
+
poetry run cr_proc recordings/*.jsonl.gz template.py --submitted-dir submissions/
|
|
133
|
+
```
|
|
134
|
+
|
|
115
135
|
This will process each recording independently and flag any that exceed 30
|
|
116
136
|
minutes.
|
|
117
137
|
|
|
@@ -0,0 +1,13 @@
|
|
|
1
|
+
code_recorder_processor/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
2
|
+
code_recorder_processor/api/build.py,sha256=XuF8Vx9mDdRqeaxCVgYAdn4NFJzkRt4Q839m15th0Fo,9908
|
|
3
|
+
code_recorder_processor/api/document.py,sha256=eIsGBCPq234cPiPN_ktBOoVrt0G1hHqbgvhXwMXQdnU,11343
|
|
4
|
+
code_recorder_processor/api/load.py,sha256=Br-USpFQJ6W8c5hjmCnunM3V0_MURKZp5Yyl1IJdahc,5514
|
|
5
|
+
code_recorder_processor/api/output.py,sha256=HLa6DtN4i-wW0-vyE7SzqLeq35nhSoj2yHc9RjTPcBc,2441
|
|
6
|
+
code_recorder_processor/api/verify.py,sha256=6D8Zs8NBziG1kNc6HgH59yQp4u1wa4zyCuNE0qLTlpk,36040
|
|
7
|
+
code_recorder_processor/cli.py,sha256=t3pIJnoEh9bMmLTQ3xDcehl63aFtCZ218GQBSnPLwzI,25200
|
|
8
|
+
code_recorder_processor/display.py,sha256=He5loCMrm1S1186N2BgDy6bl0v__kiosJ_qDxpa4hbM,8657
|
|
9
|
+
code_recorder_processor/playback.py,sha256=6-OJtQOHKgfutxUNBMunWl-VVSIB0zUDENSl0EsPCh4,4008
|
|
10
|
+
cr_proc-0.1.12.dist-info/METADATA,sha256=HcLLyKL-nl08T_1S7-OSIBdCNqh05MIaOr7ja__oIWM,9812
|
|
11
|
+
cr_proc-0.1.12.dist-info/WHEEL,sha256=3ny-bZhpXrU6vSQ1UPG34FoxZBp3lVcvK0LkgUz6VLk,88
|
|
12
|
+
cr_proc-0.1.12.dist-info/entry_points.txt,sha256=xb5dPAAWN1Z9NUHpvZgNakaslR1MVOERf_IfpG_M04M,77
|
|
13
|
+
cr_proc-0.1.12.dist-info/RECORD,,
|
cr_proc-0.1.11.dist-info/RECORD
DELETED
|
@@ -1,13 +0,0 @@
|
|
|
1
|
-
code_recorder_processor/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
2
|
-
code_recorder_processor/api/build.py,sha256=XuF8Vx9mDdRqeaxCVgYAdn4NFJzkRt4Q839m15th0Fo,9908
|
|
3
|
-
code_recorder_processor/api/document.py,sha256=DOQ0H1dQJtMs2P9E2qnKgg2iKQT9msgdE9oJXl36SnY,10622
|
|
4
|
-
code_recorder_processor/api/load.py,sha256=Br-USpFQJ6W8c5hjmCnunM3V0_MURKZp5Yyl1IJdahc,5514
|
|
5
|
-
code_recorder_processor/api/output.py,sha256=H2SC3pQ0C9V8YyN4yeA_KmvSoWXy_3T3TKWKhywIax4,2161
|
|
6
|
-
code_recorder_processor/api/verify.py,sha256=9GpeoFQIiTzZd-DNSyN5OUM6YB5iMslO85oAjc0yoSU,34073
|
|
7
|
-
code_recorder_processor/cli.py,sha256=ardcM3bLNhf6abOQ1Aj746x4hp8gerdklfDwszLlYKc,20504
|
|
8
|
-
code_recorder_processor/display.py,sha256=IVTNFB3Vjzpc5ZHceAFQI2-o-N6bvjYmotLDaEy0KoU,7368
|
|
9
|
-
code_recorder_processor/playback.py,sha256=6-OJtQOHKgfutxUNBMunWl-VVSIB0zUDENSl0EsPCh4,4008
|
|
10
|
-
cr_proc-0.1.11.dist-info/METADATA,sha256=wZuAW9ghrjT2fCbiI9bJSy5TPLc4YD6OpYb0mTlyOL4,8926
|
|
11
|
-
cr_proc-0.1.11.dist-info/WHEEL,sha256=3ny-bZhpXrU6vSQ1UPG34FoxZBp3lVcvK0LkgUz6VLk,88
|
|
12
|
-
cr_proc-0.1.11.dist-info/entry_points.txt,sha256=xb5dPAAWN1Z9NUHpvZgNakaslR1MVOERf_IfpG_M04M,77
|
|
13
|
-
cr_proc-0.1.11.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|