cr-proc 0.1.9__py3-none-any.whl → 0.1.11__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- code_recorder_processor/api/build.py +36 -0
- code_recorder_processor/api/document.py +45 -8
- {cr_proc-0.1.9.dist-info → cr_proc-0.1.11.dist-info}/METADATA +43 -31
- {cr_proc-0.1.9.dist-info → cr_proc-0.1.11.dist-info}/RECORD +6 -6
- {cr_proc-0.1.9.dist-info → cr_proc-0.1.11.dist-info}/WHEEL +0 -0
- {cr_proc-0.1.9.dist-info → cr_proc-0.1.11.dist-info}/entry_points.txt +0 -0
|
@@ -169,6 +169,9 @@ def reconstruct_file_from_events(
|
|
|
169
169
|
from .load import is_edit_event
|
|
170
170
|
events = tuple(e for e in events if is_edit_event(e))
|
|
171
171
|
|
|
172
|
+
# Skip no-op events (oldFragment == newFragment, typically file-open markers)
|
|
173
|
+
events = tuple(e for e in events if not (e.get("oldFragment") == e.get("newFragment") and e.get("offset") == 0))
|
|
174
|
+
|
|
172
175
|
# Read template content
|
|
173
176
|
if normalize_newlines:
|
|
174
177
|
template = _normalize_newlines(template)
|
|
@@ -197,6 +200,39 @@ def reconstruct_file_from_events(
|
|
|
197
200
|
# No events for target_doc; return template unchanged
|
|
198
201
|
return template
|
|
199
202
|
|
|
203
|
+
# Handle case where first event is a file-open/load event at offset 0
|
|
204
|
+
# (IDE captures the file content as seen when opened)
|
|
205
|
+
if evs and evs[0].get("offset") == 0:
|
|
206
|
+
first_old = evs[0].get("oldFragment", "")
|
|
207
|
+
first_new = evs[0].get("newFragment", "")
|
|
208
|
+
|
|
209
|
+
if first_old and not template.startswith(first_old):
|
|
210
|
+
# Check if this looks like a file-open event:
|
|
211
|
+
# - First event is at offset 0
|
|
212
|
+
# - oldFragment and newFragment contain significant content (file was loaded)
|
|
213
|
+
# - Template is much smaller (stub/placeholder)
|
|
214
|
+
is_likely_file_open = (
|
|
215
|
+
first_old == first_new and # no-op replacement (just file load)
|
|
216
|
+
len(first_old) > 50 and # substantial content
|
|
217
|
+
len(template) < len(first_old) # template is smaller stub
|
|
218
|
+
)
|
|
219
|
+
|
|
220
|
+
if is_likely_file_open:
|
|
221
|
+
# Use first event's oldFragment as the template (actual file state when opened)
|
|
222
|
+
template = first_old
|
|
223
|
+
else:
|
|
224
|
+
# Template genuinely doesn't match
|
|
225
|
+
raise ValueError(
|
|
226
|
+
f"Template content does not match recording's initial state.\n"
|
|
227
|
+
f"First event expects to replace {len(first_old)} chars starting at offset 0,\n"
|
|
228
|
+
f"but template only has {len(template)} chars and starts with:\n"
|
|
229
|
+
f"{template[:min(100, len(template))]!r}\n\n"
|
|
230
|
+
f"Expected to start with:\n"
|
|
231
|
+
f"{first_old[:min(100, len(first_old))]!r}\n\n"
|
|
232
|
+
f"Recording was likely made on a different version of the file.\n"
|
|
233
|
+
f"Document path in recording: {target_doc}"
|
|
234
|
+
)
|
|
235
|
+
|
|
200
236
|
if utf16_mode:
|
|
201
237
|
# Work in UTF-16-LE byte space
|
|
202
238
|
doc_bytes = template.encode("utf-16-le")
|
|
@@ -1,10 +1,38 @@
|
|
|
1
1
|
"""Document resolution and filtering utilities."""
|
|
2
2
|
import difflib
|
|
3
3
|
import sys
|
|
4
|
-
from pathlib import Path
|
|
4
|
+
from pathlib import Path, PureWindowsPath, PurePosixPath
|
|
5
5
|
from typing import Any
|
|
6
6
|
|
|
7
7
|
|
|
8
|
+
def _normalize_document_path(doc_path: str) -> tuple[str, str]:
|
|
9
|
+
"""
|
|
10
|
+
Normalize a document path to extract filename and stem.
|
|
11
|
+
|
|
12
|
+
Handles both Windows-style (backslash) and Unix-style (forward slash) paths
|
|
13
|
+
regardless of the current platform.
|
|
14
|
+
|
|
15
|
+
Parameters
|
|
16
|
+
----------
|
|
17
|
+
doc_path : str
|
|
18
|
+
Document path string (may use Windows or Unix separators)
|
|
19
|
+
|
|
20
|
+
Returns
|
|
21
|
+
-------
|
|
22
|
+
tuple[str, str]
|
|
23
|
+
(filename, stem) extracted from the path
|
|
24
|
+
"""
|
|
25
|
+
# Try to detect if this is a Windows path (contains backslashes)
|
|
26
|
+
if "\\" in doc_path:
|
|
27
|
+
# Windows-style path
|
|
28
|
+
path_obj = PureWindowsPath(doc_path)
|
|
29
|
+
else:
|
|
30
|
+
# Unix-style path (or just a filename)
|
|
31
|
+
path_obj = PurePosixPath(doc_path)
|
|
32
|
+
|
|
33
|
+
return path_obj.name, path_obj.stem
|
|
34
|
+
|
|
35
|
+
|
|
8
36
|
def find_matching_template(
|
|
9
37
|
template_dir: Path, document_path: str
|
|
10
38
|
) -> Path | None:
|
|
@@ -31,8 +59,7 @@ def find_matching_template(
|
|
|
31
59
|
if not template_dir.is_dir():
|
|
32
60
|
return None
|
|
33
61
|
|
|
34
|
-
doc_name =
|
|
35
|
-
doc_stem = Path(document_path).stem
|
|
62
|
+
doc_name, doc_stem = _normalize_document_path(document_path)
|
|
36
63
|
|
|
37
64
|
# First, try exact filename match
|
|
38
65
|
exact_match = template_dir / doc_name
|
|
@@ -81,19 +108,25 @@ def get_normalized_document_key(doc_path: str) -> tuple[str, str]:
|
|
|
81
108
|
Get a normalized key for a document based on filename and extension.
|
|
82
109
|
|
|
83
110
|
This helps identify documents that are the same but with different paths.
|
|
111
|
+
Handles both Windows and Unix style paths correctly.
|
|
84
112
|
|
|
85
113
|
Parameters
|
|
86
114
|
----------
|
|
87
115
|
doc_path : str
|
|
88
|
-
Document path
|
|
116
|
+
Document path (may use Windows or Unix separators)
|
|
89
117
|
|
|
90
118
|
Returns
|
|
91
119
|
-------
|
|
92
120
|
tuple[str, str]
|
|
93
121
|
(filename_with_extension, extension) for grouping similar documents
|
|
94
122
|
"""
|
|
95
|
-
|
|
96
|
-
|
|
123
|
+
filename, _ = _normalize_document_path(doc_path)
|
|
124
|
+
# Get extension from filename
|
|
125
|
+
if '.' in filename:
|
|
126
|
+
extension = '.' + filename.rsplit('.', 1)[1]
|
|
127
|
+
else:
|
|
128
|
+
extension = ''
|
|
129
|
+
return (filename, extension)
|
|
97
130
|
|
|
98
131
|
|
|
99
132
|
def group_documents_by_name(docs: list[str]) -> dict[tuple[str, str], list[str]]:
|
|
@@ -205,7 +238,8 @@ def resolve_document(
|
|
|
205
238
|
|
|
206
239
|
if override:
|
|
207
240
|
matches = [
|
|
208
|
-
d for d in unique_docs
|
|
241
|
+
d for d in unique_docs
|
|
242
|
+
if d.endswith(override) or _normalize_document_path(d)[0] == override
|
|
209
243
|
]
|
|
210
244
|
if not matches:
|
|
211
245
|
raise ValueError(
|
|
@@ -220,7 +254,10 @@ def resolve_document(
|
|
|
220
254
|
# If template_path is provided and is a file (not directory), use its extension for matching
|
|
221
255
|
if template_path and template_path.is_file():
|
|
222
256
|
template_ext = template_path.suffix
|
|
223
|
-
ext_matches = [
|
|
257
|
+
ext_matches = [
|
|
258
|
+
d for d in unique_docs
|
|
259
|
+
if _normalize_document_path(d)[0].endswith(template_ext)
|
|
260
|
+
]
|
|
224
261
|
if len(ext_matches) == 1:
|
|
225
262
|
return ext_matches[0]
|
|
226
263
|
if len(ext_matches) > 1:
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: cr_proc
|
|
3
|
-
Version: 0.1.
|
|
3
|
+
Version: 0.1.11
|
|
4
4
|
Summary: A tool for processing BYU CS code recording files.
|
|
5
5
|
Author: Ethan Dye
|
|
6
6
|
Author-email: mrtops03@gmail.com
|
|
@@ -28,7 +28,8 @@ poetry install
|
|
|
28
28
|
|
|
29
29
|
## Usage
|
|
30
30
|
|
|
31
|
-
The processor can be run using the `cr_proc` command with recording file(s) and
|
|
31
|
+
The processor can be run using the `cr_proc` command with recording file(s) and
|
|
32
|
+
a template:
|
|
32
33
|
|
|
33
34
|
```bash
|
|
34
35
|
poetry run cr_proc <path-to-jsonl-file> <path-to-template-file>
|
|
@@ -36,7 +37,8 @@ poetry run cr_proc <path-to-jsonl-file> <path-to-template-file>
|
|
|
36
37
|
|
|
37
38
|
### Batch Processing
|
|
38
39
|
|
|
39
|
-
You can process multiple recording files at once (e.g., for different students'
|
|
40
|
+
You can process multiple recording files at once (e.g., for different students'
|
|
41
|
+
submissions):
|
|
40
42
|
|
|
41
43
|
```bash
|
|
42
44
|
# Process multiple files
|
|
@@ -47,9 +49,11 @@ poetry run cr_proc recordings/*.jsonl.gz template.py
|
|
|
47
49
|
```
|
|
48
50
|
|
|
49
51
|
When processing multiple files:
|
|
52
|
+
|
|
50
53
|
- Each recording is processed independently (for different students/documents)
|
|
51
54
|
- Time calculations and verification are done separately for each file
|
|
52
|
-
- A combined time report is shown at the end summarizing total editing time
|
|
55
|
+
- A combined time report is shown at the end summarizing total editing time
|
|
56
|
+
across all recordings
|
|
53
57
|
- Results can be output to individual files using `--output-dir`
|
|
54
58
|
|
|
55
59
|
### Arguments
|
|
@@ -61,24 +65,26 @@ When processing multiple files:
|
|
|
61
65
|
|
|
62
66
|
### Options
|
|
63
67
|
|
|
64
|
-
- `-t, --time-limit MINUTES`: (Optional) Maximum allowed time in minutes between
|
|
65
|
-
first and last edit in the recording. Applied individually to each
|
|
66
|
-
also to the combined total in batch mode. If the elapsed
|
|
67
|
-
recording is flagged as suspicious.
|
|
68
|
-
- `-d, --document DOCUMENT`: (Optional) Document path or filename to process
|
|
69
|
-
recording. Defaults to the document whose extension matches the
|
|
70
|
-
|
|
71
|
-
|
|
72
|
-
|
|
73
|
-
|
|
74
|
-
|
|
75
|
-
|
|
76
|
-
|
|
77
|
-
|
|
78
|
-
|
|
79
|
-
|
|
80
|
-
-
|
|
81
|
-
|
|
68
|
+
- `-t, --time-limit MINUTES`: (Optional) Maximum allowed time in minutes between
|
|
69
|
+
the first and last edit in the recording. Applied individually to each
|
|
70
|
+
recording file and also to the combined total in batch mode. If the elapsed
|
|
71
|
+
time exceeds this limit, the recording is flagged as suspicious.
|
|
72
|
+
- `-d, --document DOCUMENT`: (Optional) Document path or filename to process
|
|
73
|
+
from the recording. Defaults to the document whose extension matches the
|
|
74
|
+
template file.
|
|
75
|
+
- `-o, --output-json OUTPUT_JSON`: (Optional) Path to output JSON file with
|
|
76
|
+
verification results (time info and suspicious events). In batch mode, creates
|
|
77
|
+
a single JSON file containing all recordings plus the combined time report.
|
|
78
|
+
- `-f, --output-file OUTPUT_FILE`: (Optional) Write reconstructed code to
|
|
79
|
+
specified file instead of stdout. For single files only.
|
|
80
|
+
- `--output-dir OUTPUT_DIR`: (Optional) Directory to write reconstructed code
|
|
81
|
+
files in batch mode. Files are named based on input recording filenames.
|
|
82
|
+
- `-s, --show-autocomplete-details`: (Optional) Show individual auto-complete
|
|
83
|
+
events in addition to aggregate statistics.
|
|
84
|
+
- `-p, --playback`: (Optional) Play back the recording in real-time, showing
|
|
85
|
+
code evolution.
|
|
86
|
+
- `--playback-speed SPEED`: (Optional) Playback speed multiplier (1.0 =
|
|
87
|
+
real-time, 2.0 = 2x speed, 0.5 = half speed).
|
|
82
88
|
|
|
83
89
|
### Examples
|
|
84
90
|
|
|
@@ -106,7 +112,8 @@ Save JSON results:
|
|
|
106
112
|
poetry run cr_proc student1.jsonl.gz student2.jsonl.gz template.py -o results/
|
|
107
113
|
```
|
|
108
114
|
|
|
109
|
-
This will process each recording independently and flag any that exceed 30
|
|
115
|
+
This will process each recording independently and flag any that exceed 30
|
|
116
|
+
minutes.
|
|
110
117
|
|
|
111
118
|
The processor will:
|
|
112
119
|
|
|
@@ -118,8 +125,9 @@ The processor will:
|
|
|
118
125
|
|
|
119
126
|
### Output
|
|
120
127
|
|
|
121
|
-
Reconstructed code files are written to disk using `-f/--output-file` (single
|
|
122
|
-
or `--output-dir` (batch mode). The processor does not output
|
|
128
|
+
Reconstructed code files are written to disk using `-f/--output-file` (single
|
|
129
|
+
file) or `--output-dir` (batch mode). The processor does not output
|
|
130
|
+
reconstructed code to stdout.
|
|
123
131
|
|
|
124
132
|
Verification information, warnings, and errors are printed to stderr, including:
|
|
125
133
|
|
|
@@ -133,8 +141,8 @@ Verification information, warnings, and errors are printed to stderr, including:
|
|
|
133
141
|
|
|
134
142
|
### Suspicious Activity Detection
|
|
135
143
|
|
|
136
|
-
The processor automatically detects and reports three types of suspicious
|
|
137
|
-
patterns:
|
|
144
|
+
The processor automatically detects and reports three types of suspicious
|
|
145
|
+
activity patterns:
|
|
138
146
|
|
|
139
147
|
#### 1. Time Limit Exceeded
|
|
140
148
|
|
|
@@ -142,8 +150,8 @@ When the `--time-limit` flag is specified, the processor flags recordings where
|
|
|
142
150
|
the elapsed time between the first and last edit exceeds the specified limit.
|
|
143
151
|
This can indicate unusually long work sessions or potential external assistance.
|
|
144
152
|
|
|
145
|
-
Each recording file is checked independently against the time limit. In batch
|
|
146
|
-
the combined total time is also checked against the limit.
|
|
153
|
+
Each recording file is checked independently against the time limit. In batch
|
|
154
|
+
mode, the combined total time is also checked against the limit.
|
|
147
155
|
|
|
148
156
|
**Example warning (single file):**
|
|
149
157
|
|
|
@@ -199,12 +207,14 @@ Events #42-#44 (rapid one-line pastes (AI indicator)): 3 lines, 89 chars
|
|
|
199
207
|
|
|
200
208
|
### JSON Output Format
|
|
201
209
|
|
|
202
|
-
The `--output-json` flag generates JSON files with verification results using a
|
|
203
|
-
for both single file and batch modes, making it easier for
|
|
210
|
+
The `--output-json` flag generates JSON files with verification results using a
|
|
211
|
+
consistent format for both single file and batch modes, making it easier for
|
|
212
|
+
tooling to consume.
|
|
204
213
|
|
|
205
214
|
#### JSON Structure
|
|
206
215
|
|
|
207
216
|
All JSON output follows this unified format:
|
|
217
|
+
|
|
208
218
|
- `batch_mode`: Boolean indicating if multiple files were processed
|
|
209
219
|
- `total_files`: Number of files processed
|
|
210
220
|
- `verified_count`: How many files passed verification
|
|
@@ -219,6 +229,7 @@ All JSON output follows this unified format:
|
|
|
219
229
|
- `files`: Array of individual results for each recording
|
|
220
230
|
|
|
221
231
|
**Single file example:**
|
|
232
|
+
|
|
222
233
|
```json
|
|
223
234
|
{
|
|
224
235
|
"batch_mode": false,
|
|
@@ -244,6 +255,7 @@ All JSON output follows this unified format:
|
|
|
244
255
|
```
|
|
245
256
|
|
|
246
257
|
**Batch file example:**
|
|
258
|
+
|
|
247
259
|
```json
|
|
248
260
|
{
|
|
249
261
|
"batch_mode": true,
|
|
@@ -1,13 +1,13 @@
|
|
|
1
1
|
code_recorder_processor/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
2
|
-
code_recorder_processor/api/build.py,sha256=
|
|
3
|
-
code_recorder_processor/api/document.py,sha256=
|
|
2
|
+
code_recorder_processor/api/build.py,sha256=XuF8Vx9mDdRqeaxCVgYAdn4NFJzkRt4Q839m15th0Fo,9908
|
|
3
|
+
code_recorder_processor/api/document.py,sha256=DOQ0H1dQJtMs2P9E2qnKgg2iKQT9msgdE9oJXl36SnY,10622
|
|
4
4
|
code_recorder_processor/api/load.py,sha256=Br-USpFQJ6W8c5hjmCnunM3V0_MURKZp5Yyl1IJdahc,5514
|
|
5
5
|
code_recorder_processor/api/output.py,sha256=H2SC3pQ0C9V8YyN4yeA_KmvSoWXy_3T3TKWKhywIax4,2161
|
|
6
6
|
code_recorder_processor/api/verify.py,sha256=9GpeoFQIiTzZd-DNSyN5OUM6YB5iMslO85oAjc0yoSU,34073
|
|
7
7
|
code_recorder_processor/cli.py,sha256=ardcM3bLNhf6abOQ1Aj746x4hp8gerdklfDwszLlYKc,20504
|
|
8
8
|
code_recorder_processor/display.py,sha256=IVTNFB3Vjzpc5ZHceAFQI2-o-N6bvjYmotLDaEy0KoU,7368
|
|
9
9
|
code_recorder_processor/playback.py,sha256=6-OJtQOHKgfutxUNBMunWl-VVSIB0zUDENSl0EsPCh4,4008
|
|
10
|
-
cr_proc-0.1.
|
|
11
|
-
cr_proc-0.1.
|
|
12
|
-
cr_proc-0.1.
|
|
13
|
-
cr_proc-0.1.
|
|
10
|
+
cr_proc-0.1.11.dist-info/METADATA,sha256=wZuAW9ghrjT2fCbiI9bJSy5TPLc4YD6OpYb0mTlyOL4,8926
|
|
11
|
+
cr_proc-0.1.11.dist-info/WHEEL,sha256=3ny-bZhpXrU6vSQ1UPG34FoxZBp3lVcvK0LkgUz6VLk,88
|
|
12
|
+
cr_proc-0.1.11.dist-info/entry_points.txt,sha256=xb5dPAAWN1Z9NUHpvZgNakaslR1MVOERf_IfpG_M04M,77
|
|
13
|
+
cr_proc-0.1.11.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|