cr-proc 0.1.10__py3-none-any.whl → 0.1.11__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -169,6 +169,9 @@ def reconstruct_file_from_events(
169
169
  from .load import is_edit_event
170
170
  events = tuple(e for e in events if is_edit_event(e))
171
171
 
172
+ # Skip no-op events (oldFragment == newFragment, typically file-open markers)
173
+ events = tuple(e for e in events if not (e.get("oldFragment") == e.get("newFragment") and e.get("offset") == 0))
174
+
172
175
  # Read template content
173
176
  if normalize_newlines:
174
177
  template = _normalize_newlines(template)
@@ -197,6 +200,39 @@ def reconstruct_file_from_events(
197
200
  # No events for target_doc; return template unchanged
198
201
  return template
199
202
 
203
+ # Handle case where first event is a file-open/load event at offset 0
204
+ # (IDE captures the file content as seen when opened)
205
+ if evs and evs[0].get("offset") == 0:
206
+ first_old = evs[0].get("oldFragment", "")
207
+ first_new = evs[0].get("newFragment", "")
208
+
209
+ if first_old and not template.startswith(first_old):
210
+ # Check if this looks like a file-open event:
211
+ # - First event is at offset 0
212
+ # - oldFragment and newFragment contain significant content (file was loaded)
213
+ # - Template is much smaller (stub/placeholder)
214
+ is_likely_file_open = (
215
+ first_old == first_new and # no-op replacement (just file load)
216
+ len(first_old) > 50 and # substantial content
217
+ len(template) < len(first_old) # template is smaller stub
218
+ )
219
+
220
+ if is_likely_file_open:
221
+ # Use first event's oldFragment as the template (actual file state when opened)
222
+ template = first_old
223
+ else:
224
+ # Template genuinely doesn't match
225
+ raise ValueError(
226
+ f"Template content does not match recording's initial state.\n"
227
+ f"First event expects to replace {len(first_old)} chars starting at offset 0,\n"
228
+ f"but template only has {len(template)} chars and starts with:\n"
229
+ f"{template[:min(100, len(template))]!r}\n\n"
230
+ f"Expected to start with:\n"
231
+ f"{first_old[:min(100, len(first_old))]!r}\n\n"
232
+ f"Recording was likely made on a different version of the file.\n"
233
+ f"Document path in recording: {target_doc}"
234
+ )
235
+
200
236
  if utf16_mode:
201
237
  # Work in UTF-16-LE byte space
202
238
  doc_bytes = template.encode("utf-16-le")
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: cr_proc
3
- Version: 0.1.10
3
+ Version: 0.1.11
4
4
  Summary: A tool for processing BYU CS code recording files.
5
5
  Author: Ethan Dye
6
6
  Author-email: mrtops03@gmail.com
@@ -28,7 +28,8 @@ poetry install
28
28
 
29
29
  ## Usage
30
30
 
31
- The processor can be run using the `cr_proc` command with recording file(s) and a template:
31
+ The processor can be run using the `cr_proc` command with recording file(s) and
32
+ a template:
32
33
 
33
34
  ```bash
34
35
  poetry run cr_proc <path-to-jsonl-file> <path-to-template-file>
@@ -36,7 +37,8 @@ poetry run cr_proc <path-to-jsonl-file> <path-to-template-file>
36
37
 
37
38
  ### Batch Processing
38
39
 
39
- You can process multiple recording files at once (e.g., for different students' submissions):
40
+ You can process multiple recording files at once (e.g., for different students'
41
+ submissions):
40
42
 
41
43
  ```bash
42
44
  # Process multiple files
@@ -47,9 +49,11 @@ poetry run cr_proc recordings/*.jsonl.gz template.py
47
49
  ```
48
50
 
49
51
  When processing multiple files:
52
+
50
53
  - Each recording is processed independently (for different students/documents)
51
54
  - Time calculations and verification are done separately for each file
52
- - A combined time report is shown at the end summarizing total editing time across all recordings
55
+ - A combined time report is shown at the end summarizing total editing time
56
+ across all recordings
53
57
  - Results can be output to individual files using `--output-dir`
54
58
 
55
59
  ### Arguments
@@ -61,24 +65,26 @@ When processing multiple files:
61
65
 
62
66
  ### Options
63
67
 
64
- - `-t, --time-limit MINUTES`: (Optional) Maximum allowed time in minutes between the
65
- first and last edit in the recording. Applied individually to each recording file and
66
- also to the combined total in batch mode. If the elapsed time exceeds this limit, the
67
- recording is flagged as suspicious.
68
- - `-d, --document DOCUMENT`: (Optional) Document path or filename to process from the
69
- recording. Defaults to the document whose extension matches the template file.
70
- - `-o, --output-json OUTPUT_JSON`: (Optional) Path to output JSON file with verification
71
- results (time info and suspicious events). In batch mode, creates a single JSON file
72
- containing all recordings plus the combined time report.
73
- - `-f, --output-file OUTPUT_FILE`: (Optional) Write reconstructed code to specified file
74
- instead of stdout. For single files only.
75
- - `--output-dir OUTPUT_DIR`: (Optional) Directory to write reconstructed code files in
76
- batch mode. Files are named based on input recording filenames.
77
- - `-s, --show-autocomplete-details`: (Optional) Show individual auto-complete events in
78
- addition to aggregate statistics.
79
- - `-p, --playback`: (Optional) Play back the recording in real-time, showing code evolution.
80
- - `--playback-speed SPEED`: (Optional) Playback speed multiplier (1.0 = real-time, 2.0 = 2x
81
- speed, 0.5 = half speed).
68
+ - `-t, --time-limit MINUTES`: (Optional) Maximum allowed time in minutes between
69
+ the first and last edit in the recording. Applied individually to each
70
+ recording file and also to the combined total in batch mode. If the elapsed
71
+ time exceeds this limit, the recording is flagged as suspicious.
72
+ - `-d, --document DOCUMENT`: (Optional) Document path or filename to process
73
+ from the recording. Defaults to the document whose extension matches the
74
+ template file.
75
+ - `-o, --output-json OUTPUT_JSON`: (Optional) Path to output JSON file with
76
+ verification results (time info and suspicious events). In batch mode, creates
77
+ a single JSON file containing all recordings plus the combined time report.
78
+ - `-f, --output-file OUTPUT_FILE`: (Optional) Write reconstructed code to
79
+ specified file instead of stdout. For single files only.
80
+ - `--output-dir OUTPUT_DIR`: (Optional) Directory to write reconstructed code
81
+ files in batch mode. Files are named based on input recording filenames.
82
+ - `-s, --show-autocomplete-details`: (Optional) Show individual auto-complete
83
+ events in addition to aggregate statistics.
84
+ - `-p, --playback`: (Optional) Play back the recording in real-time, showing
85
+ code evolution.
86
+ - `--playback-speed SPEED`: (Optional) Playback speed multiplier (1.0 =
87
+ real-time, 2.0 = 2x speed, 0.5 = half speed).
82
88
 
83
89
  ### Examples
84
90
 
@@ -106,7 +112,8 @@ Save JSON results:
106
112
  poetry run cr_proc student1.jsonl.gz student2.jsonl.gz template.py -o results/
107
113
  ```
108
114
 
109
- This will process each recording independently and flag any that exceed 30 minutes.
115
+ This will process each recording independently and flag any that exceed 30
116
+ minutes.
110
117
 
111
118
  The processor will:
112
119
 
@@ -118,8 +125,9 @@ The processor will:
118
125
 
119
126
  ### Output
120
127
 
121
- Reconstructed code files are written to disk using `-f/--output-file` (single file)
122
- or `--output-dir` (batch mode). The processor does not output reconstructed code to stdout.
128
+ Reconstructed code files are written to disk using `-f/--output-file` (single
129
+ file) or `--output-dir` (batch mode). The processor does not output
130
+ reconstructed code to stdout.
123
131
 
124
132
  Verification information, warnings, and errors are printed to stderr, including:
125
133
 
@@ -133,8 +141,8 @@ Verification information, warnings, and errors are printed to stderr, including:
133
141
 
134
142
  ### Suspicious Activity Detection
135
143
 
136
- The processor automatically detects and reports three types of suspicious activity
137
- patterns:
144
+ The processor automatically detects and reports three types of suspicious
145
+ activity patterns:
138
146
 
139
147
  #### 1. Time Limit Exceeded
140
148
 
@@ -142,8 +150,8 @@ When the `--time-limit` flag is specified, the processor flags recordings where
142
150
  the elapsed time between the first and last edit exceeds the specified limit.
143
151
  This can indicate unusually long work sessions or potential external assistance.
144
152
 
145
- Each recording file is checked independently against the time limit. In batch mode,
146
- the combined total time is also checked against the limit.
153
+ Each recording file is checked independently against the time limit. In batch
154
+ mode, the combined total time is also checked against the limit.
147
155
 
148
156
  **Example warning (single file):**
149
157
 
@@ -199,12 +207,14 @@ Events #42-#44 (rapid one-line pastes (AI indicator)): 3 lines, 89 chars
199
207
 
200
208
  ### JSON Output Format
201
209
 
202
- The `--output-json` flag generates JSON files with verification results using a consistent format
203
- for both single file and batch modes, making it easier for tooling to consume.
210
+ The `--output-json` flag generates JSON files with verification results using a
211
+ consistent format for both single file and batch modes, making it easier for
212
+ tooling to consume.
204
213
 
205
214
  #### JSON Structure
206
215
 
207
216
  All JSON output follows this unified format:
217
+
208
218
  - `batch_mode`: Boolean indicating if multiple files were processed
209
219
  - `total_files`: Number of files processed
210
220
  - `verified_count`: How many files passed verification
@@ -219,6 +229,7 @@ All JSON output follows this unified format:
219
229
  - `files`: Array of individual results for each recording
220
230
 
221
231
  **Single file example:**
232
+
222
233
  ```json
223
234
  {
224
235
  "batch_mode": false,
@@ -244,6 +255,7 @@ All JSON output follows this unified format:
244
255
  ```
245
256
 
246
257
  **Batch file example:**
258
+
247
259
  ```json
248
260
  {
249
261
  "batch_mode": true,
@@ -1,5 +1,5 @@
1
1
  code_recorder_processor/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
2
- code_recorder_processor/api/build.py,sha256=tljtuEFH-ZU-hSFYmlAMSY61W-DSptQo_D5-GjAasco,7951
2
+ code_recorder_processor/api/build.py,sha256=XuF8Vx9mDdRqeaxCVgYAdn4NFJzkRt4Q839m15th0Fo,9908
3
3
  code_recorder_processor/api/document.py,sha256=DOQ0H1dQJtMs2P9E2qnKgg2iKQT9msgdE9oJXl36SnY,10622
4
4
  code_recorder_processor/api/load.py,sha256=Br-USpFQJ6W8c5hjmCnunM3V0_MURKZp5Yyl1IJdahc,5514
5
5
  code_recorder_processor/api/output.py,sha256=H2SC3pQ0C9V8YyN4yeA_KmvSoWXy_3T3TKWKhywIax4,2161
@@ -7,7 +7,7 @@ code_recorder_processor/api/verify.py,sha256=9GpeoFQIiTzZd-DNSyN5OUM6YB5iMslO85o
7
7
  code_recorder_processor/cli.py,sha256=ardcM3bLNhf6abOQ1Aj746x4hp8gerdklfDwszLlYKc,20504
8
8
  code_recorder_processor/display.py,sha256=IVTNFB3Vjzpc5ZHceAFQI2-o-N6bvjYmotLDaEy0KoU,7368
9
9
  code_recorder_processor/playback.py,sha256=6-OJtQOHKgfutxUNBMunWl-VVSIB0zUDENSl0EsPCh4,4008
10
- cr_proc-0.1.10.dist-info/METADATA,sha256=KYL9rim_NIa2ke0Xao0k8H4Y5QmD1Rw6u2OuT07dlhs,8916
11
- cr_proc-0.1.10.dist-info/WHEEL,sha256=3ny-bZhpXrU6vSQ1UPG34FoxZBp3lVcvK0LkgUz6VLk,88
12
- cr_proc-0.1.10.dist-info/entry_points.txt,sha256=xb5dPAAWN1Z9NUHpvZgNakaslR1MVOERf_IfpG_M04M,77
13
- cr_proc-0.1.10.dist-info/RECORD,,
10
+ cr_proc-0.1.11.dist-info/METADATA,sha256=wZuAW9ghrjT2fCbiI9bJSy5TPLc4YD6OpYb0mTlyOL4,8926
11
+ cr_proc-0.1.11.dist-info/WHEEL,sha256=3ny-bZhpXrU6vSQ1UPG34FoxZBp3lVcvK0LkgUz6VLk,88
12
+ cr_proc-0.1.11.dist-info/entry_points.txt,sha256=xb5dPAAWN1Z9NUHpvZgNakaslR1MVOERf_IfpG_M04M,77
13
+ cr_proc-0.1.11.dist-info/RECORD,,