dwipe 2.0.1__py3-none-any.whl → 3.0.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
dwipe/VerifyTask.py ADDED
@@ -0,0 +1,412 @@
1
+ """
2
+ VerifyTask - Abstract base class and implementations for verification operations
3
+
4
+ Includes:
5
+ - VerifyTask: Abstract base class with verification logic and statistical analysis
6
+ - VerifyZeroTask: Concrete class for verifying zeros (fast memcmp)
7
+ - VerifyRandTask: Concrete class for verifying random data (statistical)
8
+ """
9
+ # pylint: disable=broad-exception-raised,broad-exception-caught
10
+ import os
11
+ import time
12
+ import random
13
+ import traceback
14
+ from types import SimpleNamespace
15
+
16
+ from .WipeTask import WipeTask
17
+ from .Utils import Utils
18
+
19
+
20
+ class VerifyTask(WipeTask):
21
+ """Abstract base class for verify operations (VerifyZeroTask, VerifyRandTask)
22
+
23
+ Implements verification logic with:
24
+ - Section-by-section analysis of disk content
25
+ - Fast-fail for zero verification (memcmp)
26
+ - Statistical analysis for random pattern verification
27
+ - Progress tracking
28
+
29
+ Subclasses must set:
30
+ - expected_pattern: "zeroed" or "random"
31
+ - fast_fail: True for zero (fast memcmp), False for random (statistical)
32
+ """
33
+
34
+ def __init__(self, device_path, total_size, opts=None, verify_pct=2, expected_pattern=None):
35
+ """Initialize verify task
36
+
37
+ Args:
38
+ device_path: Path to device (e.g., '/dev/sda1')
39
+ total_size: Total size in bytes
40
+ opts: Options namespace
41
+ verify_pct: Percentage of disk to verify (e.g., 2 for 2%)
42
+ expected_pattern: "zeroed", "random", or None (auto-detect)
43
+ """
44
+ super().__init__(device_path, total_size, opts)
45
+
46
+ # Verify-specific attributes
47
+ self.verify_pct = verify_pct
48
+ self.expected_pattern = expected_pattern
49
+ self.verify_result = None # "zeroed", "random", "not-wiped", "mixed", "error"
50
+ self.section_results = [] # Section-by-section results
51
+ self.verify_progress = 0 # Bytes verified (for total_written tracking)
52
+
53
+ # Fast-fail flag (set by subclasses)
54
+ self.fast_fail = False
55
+
56
+ def run_task(self):
57
+ """Execute verification operation (blocking, runs in thread)"""
58
+ try:
59
+ if self.verify_pct == 0:
60
+ self.verify_result = "skipped"
61
+ self.done = True
62
+ return
63
+
64
+ # Fast-fail for zeros (VerifyZeroTask)
65
+ fast_fail_zeros = self.fast_fail and self.expected_pattern == "zeroed"
66
+
67
+ # For unmarked disks: track if ALL bytes are zero
68
+ all_zeros = (self.expected_pattern is None)
69
+
70
+ # Open with regular buffered I/O
71
+ fd = os.open(self.device_path, os.O_RDONLY)
72
+
73
+ try:
74
+ read_chunk_size = 64 * 1024 # 64KB chunks
75
+ SAMPLE_STEP = 23 # Sample every 23rd byte (~4% of data) - prime for even distribution
76
+
77
+ # Skip marker area
78
+ marker_skip = WipeTask.BUFFER_SIZE
79
+ usable_size = self.total_size - marker_skip
80
+
81
+ # Divide disk into 100 sections for sampling
82
+ num_sections = 100
83
+ section_size = usable_size // num_sections
84
+
85
+ # Pre-allocated zero pattern for fast comparison
86
+ ZERO_PATTERN_64K = b'\x00' * (64 * 1024)
87
+
88
+ # Track if any section failed
89
+ overall_failed = False
90
+ failure_reason = ""
91
+
92
+ for section_idx in range(num_sections):
93
+ if self.do_abort or overall_failed:
94
+ break
95
+
96
+ # Reset analysis for THIS SECTION
97
+ section_byte_counts = [0] * 256
98
+ section_samples = 0
99
+ section_found_nonzero = False
100
+
101
+ # Calculate bytes to verify in this section
102
+ bytes_in_section = min(section_size, usable_size - section_idx * section_size)
103
+ bytes_to_verify = int(bytes_in_section * self.verify_pct / 100)
104
+
105
+ if bytes_to_verify == 0:
106
+ self.section_results.append((section_idx, "skipped", {}))
107
+ continue
108
+
109
+ # Random offset within section
110
+ if bytes_to_verify < bytes_in_section:
111
+ offset_in_section = random.randint(0, bytes_in_section - bytes_to_verify)
112
+ else:
113
+ offset_in_section = 0
114
+
115
+ read_pos = marker_skip + (section_idx * section_size) + offset_in_section
116
+ verified_in_section = 0
117
+
118
+ # Seek to position in this section
119
+ os.lseek(fd, read_pos, os.SEEK_SET)
120
+
121
+ # Read and analyze THIS SECTION
122
+ while verified_in_section < bytes_to_verify:
123
+ if self.do_abort:
124
+ break
125
+
126
+ chunk_size = min(read_chunk_size, bytes_to_verify - verified_in_section)
127
+
128
+ data = os.read(fd, chunk_size)
129
+ if not data:
130
+ break
131
+
132
+ # --------------------------------------------------
133
+ # SECTION ANALYSIS
134
+ # --------------------------------------------------
135
+
136
+ # FAST zero check for zeroed pattern
137
+ if fast_fail_zeros:
138
+ # Ultra-fast: compare against pre-allocated zero pattern
139
+ if memoryview(data) != ZERO_PATTERN_64K[:len(data)]:
140
+ failed_offset = read_pos + verified_in_section
141
+ overall_failed = True
142
+ failure_reason = f"non-zero at {Utils.human(failed_offset)}"
143
+ break
144
+
145
+ # FAST check for unmarked disks (looking for all zeros)
146
+ if all_zeros and not section_found_nonzero:
147
+ # Fast check: use bytes.count() which is C-optimized
148
+ if data.count(0) != len(data):
149
+ section_found_nonzero = True
150
+
151
+ # RANDOM pattern analysis (always collect data for analysis)
152
+ # Use memoryview for fast slicing
153
+ mv = memoryview(data)
154
+ data_len = len(data)
155
+
156
+ # Sample every SAMPLE_STEP-th byte
157
+ for i in range(0, data_len, SAMPLE_STEP):
158
+ section_byte_counts[mv[i]] += 1
159
+ section_samples += 1
160
+
161
+ # --------------------------------------------------
162
+ # END SECTION ANALYSIS
163
+ # --------------------------------------------------
164
+
165
+ verified_in_section += len(data)
166
+ self.verify_progress += len(data) # Track actual bytes read for progress
167
+ self.total_written = self.verify_progress # Update for get_status()
168
+
169
+ # After reading section, analyze it
170
+ if overall_failed:
171
+ break
172
+
173
+ # Determine section result
174
+ if fast_fail_zeros:
175
+ # Already passed zero check if we got here
176
+ section_result = "zeroed"
177
+ section_stats = {}
178
+
179
+ elif all_zeros:
180
+ if not section_found_nonzero:
181
+ section_result = "zeroed"
182
+ section_stats = {}
183
+ else:
184
+ # Need to check if it's random
185
+ section_result, section_stats = self._analyze_section_randomness(
186
+ section_byte_counts, section_samples
187
+ )
188
+
189
+ else: # Expected random
190
+ section_result, section_stats = self._analyze_section_randomness(
191
+ section_byte_counts, section_samples
192
+ )
193
+
194
+ # Store section result
195
+ self.section_results.append((section_idx, section_result, section_stats))
196
+
197
+ # Check if section failed
198
+ if (self.expected_pattern == "random" and section_result != "random") or \
199
+ (self.expected_pattern == "zeroed" and section_result != "zeroed") or \
200
+ (self.expected_pattern is None and section_result == "not-wiped"):
201
+
202
+ overall_failed = True
203
+ failure_reason = f"section {section_idx}: {section_result}"
204
+ break
205
+
206
+ finally:
207
+ # Close file descriptor
208
+ if fd is not None:
209
+ os.close(fd)
210
+
211
+ # Determine overall result
212
+ if overall_failed:
213
+ if self.expected_pattern == "zeroed":
214
+ self.verify_result = f"not-wiped ({failure_reason})"
215
+ elif self.expected_pattern == "random":
216
+ self.verify_result = f"not-wiped ({failure_reason})"
217
+ else: # unmarked
218
+ # Count section results
219
+ zeroed_sections = sum(1 for _, result, _ in self.section_results if result == "zeroed")
220
+ random_sections = sum(1 for _, result, _ in self.section_results if result == "random")
221
+ total_checked = len([r for _, r, _ in self.section_results if r != "skipped"])
222
+
223
+ if zeroed_sections == total_checked:
224
+ self.verify_result = "zeroed"
225
+ self.expected_pattern = "zeroed"
226
+ elif random_sections == total_checked:
227
+ self.verify_result = "random"
228
+ self.expected_pattern = "random"
229
+ else:
230
+ self.verify_result = f"mixed ({failure_reason})"
231
+ else:
232
+ # All sections passed
233
+ if self.expected_pattern == "zeroed":
234
+ self.verify_result = "zeroed"
235
+ elif self.expected_pattern == "random":
236
+ self.verify_result = "random"
237
+ else: # unmarked
238
+ # Determine from section consensus
239
+ zeroed_sections = sum(1 for _, result, _ in self.section_results if result == "zeroed")
240
+ random_sections = sum(1 for _, result, _ in self.section_results if result == "random")
241
+
242
+ if zeroed_sections > random_sections:
243
+ self.verify_result = "zeroed"
244
+ self.expected_pattern = "zeroed"
245
+ else:
246
+ self.verify_result = "random"
247
+ self.expected_pattern = "random"
248
+
249
+ self.done = True
250
+ except Exception:
251
+ self.exception = traceback.format_exc()
252
+ self.verify_result = "error"
253
+ self.done = True
254
+
255
+ def _analyze_section_randomness(self, byte_counts, total_samples):
256
+ """Analyze if a section appears random"""
257
+ if total_samples < 100:
258
+ return "insufficient-data", {"samples": total_samples}
259
+
260
+ # Calculate statistics
261
+ max_count = max(byte_counts)
262
+ max_freq = max_count / total_samples
263
+
264
+ # Count unique bytes seen
265
+ unique_bytes = sum(1 for count in byte_counts if count > 0)
266
+
267
+ # Count completely unused bytes
268
+ unused_bytes = sum(1 for count in byte_counts if count == 0)
269
+
270
+ # Calculate expected frequency and variance
271
+ expected = total_samples / 256
272
+ if expected > 0:
273
+ # Coefficient of variation (measure of dispersion)
274
+ variance = sum((count - expected) ** 2 for count in byte_counts) / 256
275
+ std_dev = variance ** 0.5
276
+ cv = std_dev / expected
277
+ else:
278
+ cv = float('inf')
279
+
280
+ # Decision logic for "random"
281
+ # Good random data should:
282
+ # 1. Use most byte values (>200 unique)
283
+ # 2. No single byte dominates (<2% frequency)
284
+ # 3. Relatively even distribution (CV < 2.0)
285
+ # 4. Not too many zeros (if it's supposed to be random, not zeroed)
286
+
287
+ is_random = (unique_bytes > 200 and # >78% of bytes used
288
+ max_freq < 0.02 and # No byte > 2%
289
+ cv < 2.0 and # Not too lumpy
290
+ byte_counts[0] / total_samples < 0.5) # Not mostly zeros
291
+
292
+ stats = {
293
+ "samples": total_samples,
294
+ "max_freq": max_freq,
295
+ "unique_bytes": unique_bytes,
296
+ "unused_bytes": unused_bytes,
297
+ "cv": cv,
298
+ "zero_freq": byte_counts[0] / total_samples if total_samples > 0 else 0
299
+ }
300
+
301
+ if is_random:
302
+ return "random", stats
303
+ else:
304
+ # Check if it's zeros
305
+ if byte_counts[0] / total_samples > 0.95:
306
+ return "zeroed", stats
307
+ else:
308
+ return "not-wiped", stats
309
+
310
+ def get_status(self):
311
+ """Get current progress status (thread-safe, called from main thread)
312
+
313
+ Returns:
314
+ tuple: (elapsed_str, pct_str, rate_str, eta_str, more_state)
315
+ - pct_str has 'v' prefix (e.g., "v45%")
316
+ """
317
+ mono = time.monotonic()
318
+ elapsed_time = mono - self.start_mono
319
+
320
+ # Calculate total bytes to verify (verify_pct% of total_size)
321
+ if self.verify_pct > 0:
322
+ total_to_verify = self.total_size * self.verify_pct / 100
323
+ else:
324
+ total_to_verify = self.total_size
325
+
326
+ # Calculate verification percentage (0-100)
327
+ pct = int((self.verify_progress / total_to_verify) * 100) if total_to_verify > 0 else 0
328
+ pct_str = f'v{pct}%'
329
+
330
+ if self.do_abort:
331
+ pct_str = 'STOP'
332
+
333
+ # Track verification progress for rate calculation
334
+ self.wr_hists.append(SimpleNamespace(mono=mono, written=self.verify_progress))
335
+ floor = mono - 30
336
+ while len(self.wr_hists) >= 3 and self.wr_hists[1].mono >= floor:
337
+ del self.wr_hists[0]
338
+
339
+ delta_mono = mono - self.wr_hists[0].mono
340
+ physical_rate = (self.verify_progress - self.wr_hists[0].written) / delta_mono if delta_mono > 1.0 else 0
341
+ # Scale rate to show "effective" verification rate (as if verifying 100% of disk)
342
+ effective_rate = physical_rate * (100 / self.verify_pct) if self.verify_pct > 0 else physical_rate
343
+ rate_str = f'{Utils.human(int(round(effective_rate, 0)))}/s'
344
+
345
+ if physical_rate > 0:
346
+ remaining = total_to_verify - self.verify_progress
347
+ when = int(round(remaining / physical_rate))
348
+ when_str = Utils.ago_str(when)
349
+ else:
350
+ when_str = '0'
351
+
352
+ return Utils.ago_str(int(round(elapsed_time))), pct_str, rate_str, when_str, self.more_state
353
+
354
+ def get_summary_dict(self):
355
+ """Get final summary for this verify task
356
+
357
+ Returns:
358
+ dict: Summary with step name, elapsed, rate, bytes checked, result
359
+ """
360
+ mono = time.monotonic()
361
+ elapsed = mono - self.start_mono
362
+ rate_bps = self.verify_progress / elapsed if elapsed > 0 else 0
363
+
364
+ # Determine mode from expected pattern
365
+ mode = "Rand" if self.expected_pattern == "random" else "Zero"
366
+
367
+ # Build verify label
368
+ verify_label = f"verify {mode}"
369
+ if self.verify_pct > 0 and self.verify_pct < 100:
370
+ verify_label += f" ({self.verify_pct}% sample)"
371
+
372
+ # Extract verify detail if present
373
+ verify_detail = None
374
+ if self.verify_result and '(' in str(self.verify_result):
375
+ verify_detail = str(self.verify_result).split('(')[1].rstrip(')')
376
+
377
+ result = {
378
+ "step": verify_label,
379
+ "elapsed": Utils.ago_str(int(elapsed)),
380
+ "rate": f"{Utils.human(int(rate_bps))}/s",
381
+ "bytes_checked": self.verify_progress,
382
+ "result": self.verify_result,
383
+ }
384
+
385
+ if verify_detail:
386
+ result["verify_detail"] = verify_detail
387
+
388
+ return result
389
+
390
+
391
+ class VerifyZeroTask(VerifyTask):
392
+ """Verify disk contains zeros"""
393
+
394
+ def __init__(self, device_path, total_size, opts=None, verify_pct=2):
395
+ super().__init__(device_path, total_size, opts, verify_pct, expected_pattern="zeroed")
396
+ self.fast_fail = True # Use fast memcmp verification
397
+
398
+ def get_display_name(self):
399
+ """Get display name for zero verification"""
400
+ return "Verify"
401
+
402
+
403
+ class VerifyRandTask(VerifyTask):
404
+ """Verify disk contains random pattern"""
405
+
406
+ def __init__(self, device_path, total_size, opts=None, verify_pct=2):
407
+ super().__init__(device_path, total_size, opts, verify_pct, expected_pattern="random")
408
+ self.fast_fail = False # Use statistical analysis
409
+
410
+ def get_display_name(self):
411
+ """Get display name for random verification"""
412
+ return "Verify"