pysfi 0.1.7__py3-none-any.whl → 0.1.11__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (55) hide show
  1. {pysfi-0.1.7.dist-info → pysfi-0.1.11.dist-info}/METADATA +11 -9
  2. pysfi-0.1.11.dist-info/RECORD +60 -0
  3. pysfi-0.1.11.dist-info/entry_points.txt +28 -0
  4. sfi/__init__.py +1 -1
  5. sfi/alarmclock/alarmclock.py +40 -40
  6. sfi/bumpversion/__init__.py +1 -1
  7. sfi/cleanbuild/cleanbuild.py +155 -0
  8. sfi/condasetup/condasetup.py +116 -0
  9. sfi/docscan/__init__.py +1 -1
  10. sfi/docscan/docscan.py +407 -103
  11. sfi/docscan/docscan_gui.py +1282 -596
  12. sfi/docscan/lang/eng.py +152 -0
  13. sfi/docscan/lang/zhcn.py +170 -0
  14. sfi/filedate/filedate.py +185 -112
  15. sfi/gittool/__init__.py +2 -0
  16. sfi/gittool/gittool.py +401 -0
  17. sfi/llmclient/llmclient.py +592 -0
  18. sfi/llmquantize/llmquantize.py +480 -0
  19. sfi/llmserver/llmserver.py +335 -0
  20. sfi/makepython/makepython.py +31 -30
  21. sfi/pdfsplit/pdfsplit.py +173 -173
  22. sfi/pyarchive/pyarchive.py +418 -0
  23. sfi/pyembedinstall/pyembedinstall.py +629 -0
  24. sfi/pylibpack/__init__.py +0 -0
  25. sfi/pylibpack/pylibpack.py +1457 -0
  26. sfi/pylibpack/rules/numpy.json +22 -0
  27. sfi/pylibpack/rules/pymupdf.json +10 -0
  28. sfi/pylibpack/rules/pyqt5.json +19 -0
  29. sfi/pylibpack/rules/pyside2.json +23 -0
  30. sfi/pylibpack/rules/scipy.json +23 -0
  31. sfi/pylibpack/rules/shiboken2.json +24 -0
  32. sfi/pyloadergen/pyloadergen.py +512 -227
  33. sfi/pypack/__init__.py +0 -0
  34. sfi/pypack/pypack.py +1142 -0
  35. sfi/pyprojectparse/__init__.py +0 -0
  36. sfi/pyprojectparse/pyprojectparse.py +500 -0
  37. sfi/pysourcepack/pysourcepack.py +308 -0
  38. sfi/quizbase/__init__.py +0 -0
  39. sfi/quizbase/quizbase.py +828 -0
  40. sfi/quizbase/quizbase_gui.py +987 -0
  41. sfi/regexvalidate/__init__.py +0 -0
  42. sfi/regexvalidate/regex_help.html +284 -0
  43. sfi/regexvalidate/regexvalidate.py +468 -0
  44. sfi/taskkill/taskkill.py +0 -2
  45. sfi/workflowengine/__init__.py +0 -0
  46. sfi/workflowengine/workflowengine.py +444 -0
  47. pysfi-0.1.7.dist-info/RECORD +0 -31
  48. pysfi-0.1.7.dist-info/entry_points.txt +0 -15
  49. sfi/embedinstall/embedinstall.py +0 -418
  50. sfi/projectparse/projectparse.py +0 -152
  51. sfi/pypacker/fspacker.py +0 -91
  52. {pysfi-0.1.7.dist-info → pysfi-0.1.11.dist-info}/WHEEL +0 -0
  53. /sfi/{embedinstall → docscan/lang}/__init__.py +0 -0
  54. /sfi/{projectparse → llmquantize}/__init__.py +0 -0
  55. /sfi/{pypacker → pyembedinstall}/__init__.py +0 -0
@@ -1,596 +1,1282 @@
1
- """PySide2 GUI version of docscan application."""
2
-
3
- from __future__ import annotations
4
-
5
- import json
6
- import logging
7
- import sys
8
- from datetime import datetime
9
- from pathlib import Path
10
- from typing import Any
11
-
12
- from PySide2.QtCore import QThread, Signal
13
- from PySide2.QtWidgets import (
14
- QApplication,
15
- QCheckBox,
16
- QFileDialog,
17
- QGroupBox,
18
- QHBoxLayout,
19
- QLabel,
20
- QLineEdit,
21
- QMessageBox,
22
- QProgressBar,
23
- QPushButton,
24
- QSpinBox,
25
- QTableWidget,
26
- QTableWidgetItem,
27
- QTabWidget,
28
- QTextEdit,
29
- QVBoxLayout,
30
- QWidget,
31
- )
32
-
33
- # Import from docscan module
34
- try:
35
- from docscan import DocumentScanner, Rule
36
- except ImportError:
37
- from sfi.docscan.docscan import DocumentScanner, Rule
38
-
39
- logging.basicConfig(level=logging.INFO, format="%(message)s")
40
- logger = logging.getLogger(__name__)
41
-
42
-
43
- class ScanWorker(QThread):
44
- """Worker thread for running document scan in background."""
45
-
46
- progress = Signal(str)
47
- finished = Signal(dict)
48
- error = Signal(str)
49
- progress_update = Signal(int, int) # current, total
50
-
51
- def __init__(self, scanner: DocumentScanner, threads: int):
52
- """Initialize worker thread.
53
-
54
- Args:
55
- scanner: DocumentScanner instance
56
- threads: Number of worker threads
57
- """
58
- super().__init__()
59
- self.scanner = scanner
60
- self.threads = threads
61
-
62
- def run(self):
63
- """Run the document scan."""
64
- try:
65
- self.progress.emit("Starting scan...") # type: ignore
66
-
67
- # Set up custom logger to capture messages
68
- class ProgressHandler(logging.Handler):
69
- def __init__(self, signal: Signal):
70
- super().__init__()
71
- self.signal = signal
72
-
73
- def emit(self, record: logging.LogRecord) -> None:
74
- self.signal.emit(lambda: self.format(record)) # type: ignore
75
-
76
- progress_handler = ProgressHandler(self.progress)
77
- progress_handler.setFormatter(logging.Formatter("%(message)s"))
78
- logger.addHandler(progress_handler)
79
-
80
- # Set progress callback
81
- def progress_callback(current: int, total: int) -> None:
82
- self.progress_update.emit(current, total) # pyright: ignore[reportAttributeAccessIssue]
83
-
84
- self.scanner.set_progress_callback(progress_callback)
85
-
86
- # Run scan
87
- results = self.scanner.scan(threads=self.threads, show_progress=True)
88
-
89
- logger.removeHandler(progress_handler)
90
- self.progress.emit("Scan complete!") # pyright: ignore[reportAttributeAccessIssue]
91
- self.finished.emit(results) # pyright: ignore[reportAttributeAccessIssue]
92
- except Exception as e:
93
- self.error.emit(str(e)) # pyright: ignore[reportAttributeAccessIssue]
94
-
95
-
96
- class DocScanGUI(QWidget):
97
- """Main GUI window for document scanner application."""
98
-
99
- def __init__(self):
100
- """Initialize GUI components."""
101
- super().__init__()
102
- self.scan_results = None
103
- self.scan_worker = None
104
- self.is_scanning = False
105
- self.init_ui()
106
-
107
- def init_ui(self):
108
- """Initialize user interface."""
109
- self.setWindowTitle("Document Scanner GUI")
110
- self.setMinimumSize(1000, 700)
111
-
112
- # Main layout
113
- main_layout = QVBoxLayout()
114
- self.setLayout(main_layout)
115
-
116
- # Create tab widget for options
117
- tab_widget = QTabWidget()
118
- self._create_input_tab(tab_widget)
119
- self._create_options_tab(tab_widget)
120
- main_layout.addWidget(tab_widget)
121
-
122
- # Create other sections
123
- self._create_actions_section(main_layout)
124
- self._create_results_section(main_layout)
125
-
126
- def _create_input_tab(self, tab_widget: QTabWidget) -> None:
127
- """Create input configuration tab.
128
-
129
- Args:
130
- tab_widget: Tab widget to add tab to
131
- """
132
- input_tab = QWidget()
133
- input_layout = QVBoxLayout()
134
- input_tab.setLayout(input_layout)
135
-
136
- # Input directory
137
- dir_layout = QHBoxLayout()
138
- dir_label = QLabel("Input Directory:")
139
- self.dir_edit = QLineEdit(str(Path.cwd()))
140
- dir_browse_btn = QPushButton("Browse...")
141
- dir_browse_btn.clicked.connect(self._browse_directory) # pyright: ignore[reportAttributeAccessIssue]
142
- self.dir_edit.textChanged.connect(self._on_directory_changed) # pyright: ignore[reportAttributeAccessIssue]
143
- dir_layout.addWidget(dir_label)
144
- dir_layout.addWidget(self.dir_edit)
145
- dir_layout.addWidget(dir_browse_btn)
146
- input_layout.addLayout(dir_layout)
147
-
148
- # Rules file
149
- rules_layout = QHBoxLayout()
150
- rules_label = QLabel("Rules File:")
151
- self.rules_edit = QLineEdit("rules.json")
152
- rules_browse_btn = QPushButton("Browse...")
153
- rules_browse_btn.clicked.connect(self._browse_rules_file) # pyright: ignore[reportAttributeAccessIssue]
154
- rules_layout.addWidget(rules_label)
155
- rules_layout.addWidget(self.rules_edit)
156
- rules_layout.addWidget(rules_browse_btn)
157
- input_layout.addLayout(rules_layout)
158
-
159
- # File types
160
- types_layout = QVBoxLayout()
161
- types_label = QLabel("File Types:")
162
- self.types_edit = QLineEdit("pdf,docx,xlsx,pptx,txt,odt,rtf,epub,csv,xml,html,md")
163
- types_layout.addWidget(types_label)
164
- types_layout.addWidget(self.types_edit)
165
- input_layout.addLayout(types_layout)
166
-
167
- input_layout.addStretch()
168
-
169
- tab_widget.addTab(input_tab, "Input Configuration")
170
-
171
- def _create_options_tab(self, tab_widget: QTabWidget) -> None:
172
- """Create scan options tab.
173
-
174
- Args:
175
- tab_widget: Tab widget to add tab to
176
- """
177
- options_tab = QWidget()
178
- options_layout = QVBoxLayout()
179
- options_tab.setLayout(options_layout)
180
-
181
- # Checkboxes
182
- self.ocr_checkbox = QCheckBox("Use PDF OCR")
183
- self.process_pool_checkbox = QCheckBox("Use Process Pool (CPU-intensive)")
184
- options_layout.addWidget(self.ocr_checkbox)
185
- options_layout.addWidget(self.process_pool_checkbox)
186
-
187
- # Thread count
188
- thread_layout = QHBoxLayout()
189
- thread_label = QLabel("Threads:")
190
- self.thread_spin = QSpinBox()
191
- self.thread_spin.setMinimum(1)
192
- self.thread_spin.setMaximum(16)
193
- self.thread_spin.setValue(4)
194
- thread_layout.addWidget(thread_label)
195
- thread_layout.addWidget(self.thread_spin)
196
- thread_layout.addStretch()
197
- options_layout.addLayout(thread_layout)
198
-
199
- # Batch size
200
- batch_layout = QHBoxLayout()
201
- batch_label = QLabel("Batch Size:")
202
- self.batch_spin = QSpinBox()
203
- self.batch_spin.setMinimum(1)
204
- self.batch_spin.setMaximum(1000)
205
- self.batch_spin.setValue(50)
206
- batch_layout.addWidget(batch_label)
207
- batch_layout.addWidget(self.batch_spin)
208
- batch_layout.addStretch()
209
- options_layout.addLayout(batch_layout)
210
-
211
- options_layout.addStretch()
212
-
213
- tab_widget.addTab(options_tab, "Scan Options")
214
-
215
- def _create_actions_section(self, parent_layout: QVBoxLayout) -> None:
216
- """Create action buttons section.
217
-
218
- Args:
219
- parent_layout: Parent layout to add this section to
220
- """
221
- actions_layout = QHBoxLayout()
222
-
223
- self.scan_btn = QPushButton("Start Scan")
224
- self.scan_btn.clicked.connect(self._start_scan) # pyright: ignore[reportAttributeAccessIssue]
225
- self.scan_btn.setMinimumHeight(40)
226
-
227
- self.pause_btn = QPushButton("Pause")
228
- self.pause_btn.clicked.connect(self._pause_scan) # pyright: ignore[reportAttributeAccessIssue]
229
- self.pause_btn.setEnabled(False)
230
- self.pause_btn.setMinimumHeight(40)
231
-
232
- self.stop_btn = QPushButton("Stop")
233
- self.stop_btn.clicked.connect(self._stop_scan) # pyright: ignore[reportAttributeAccessIssue]
234
- self.stop_btn.setEnabled(False)
235
- self.stop_btn.setMinimumHeight(40)
236
-
237
- self.save_btn = QPushButton("Save Results")
238
- self.save_btn.clicked.connect(self._save_results) # pyright: ignore[reportAttributeAccessIssue]
239
- self.save_btn.setEnabled(False)
240
- self.save_btn.setMinimumHeight(40)
241
-
242
- self.clear_btn = QPushButton("Clear Results")
243
- self.clear_btn.clicked.connect(self._clear_results) # pyright: ignore[reportAttributeAccessIssue]
244
- self.clear_btn.setMinimumHeight(40)
245
-
246
- actions_layout.addWidget(self.scan_btn)
247
- actions_layout.addWidget(self.pause_btn)
248
- actions_layout.addWidget(self.stop_btn)
249
- actions_layout.addWidget(self.save_btn)
250
- actions_layout.addWidget(self.clear_btn)
251
-
252
- parent_layout.addLayout(actions_layout)
253
-
254
- def _create_results_section(self, parent_layout: QVBoxLayout) -> None:
255
- """Create results display section.
256
-
257
- Args:
258
- parent_layout: Parent layout to add this section to
259
- """
260
- results_group = QGroupBox("Results")
261
- results_layout = QVBoxLayout()
262
- results_group.setLayout(results_layout)
263
-
264
- # Summary labels
265
- summary_layout = QHBoxLayout()
266
- self.files_label = QLabel("Files Scanned: 0")
267
- self.matches_label = QLabel("Files with Matches: 0")
268
- summary_layout.addWidget(self.files_label)
269
- summary_layout.addWidget(self.matches_label)
270
- results_layout.addLayout(summary_layout)
271
-
272
- # Progress bar
273
- self.progress_bar = QProgressBar()
274
- self.progress_bar.setMinimum(0)
275
- self.progress_bar.setMaximum(100)
276
- self.progress_bar.setValue(0)
277
- results_layout.addWidget(self.progress_bar)
278
-
279
- # Progress/Log text
280
- self.log_text = QTextEdit()
281
- self.log_text.setReadOnly(True)
282
- self.log_text.setMaximumHeight(150)
283
- results_layout.addWidget(QLabel("Progress Log:"))
284
- results_layout.addWidget(self.log_text)
285
-
286
- # Results table
287
- self.results_table = QTableWidget()
288
- self.results_table.setColumnCount(4)
289
- self.results_table.setHorizontalHeaderLabels(["File", "Type", "Matches", "Time (s)"])
290
- self.results_table.horizontalHeader().setStretchLastSection(True)
291
- results_layout.addWidget(QLabel("Match Details:"))
292
- results_layout.addWidget(self.results_table)
293
-
294
- # Match details text
295
- self.details_text = QTextEdit()
296
- self.details_text.setReadOnly(True)
297
- self.details_text.setMaximumHeight(200)
298
- results_layout.addWidget(QLabel("Selected Match Context:"))
299
- results_layout.addWidget(self.details_text)
300
-
301
- # Connect table selection
302
- self.results_table.itemSelectionChanged.connect(self._show_match_details) # pyright: ignore[reportAttributeAccessIssue]
303
-
304
- parent_layout.addWidget(results_group)
305
-
306
- def _browse_directory(self) -> None:
307
- """Open directory browser dialog."""
308
- dir_path = QFileDialog.getExistingDirectory(self, "Select Input Directory")
309
- if dir_path:
310
- self.dir_edit.setText(str(Path(dir_path)))
311
-
312
- def _on_directory_changed(self) -> None:
313
- """Handle directory text change - auto-search for rules.json."""
314
- dir_text = self.dir_edit.text()
315
- if not dir_text:
316
- return
317
-
318
- try:
319
- input_dir = Path(dir_text)
320
- if input_dir.exists() and input_dir.is_dir():
321
- # Search for rules.json or rules*.json files
322
- rule_files = list(input_dir.glob("rules.json")) + list(input_dir.glob("rules*.json"))
323
-
324
- if rule_files:
325
- # Use the first matching file, prefer exact "rules.json"
326
- exact_match = next((f for f in rule_files if f.name == "rules.json"), None)
327
- rules_file = exact_match if exact_match else rule_files[0]
328
- self.rules_edit.setText(str(rules_file.resolve()))
329
- except Exception:
330
- # Ignore errors during directory change handling
331
- pass
332
-
333
- def _browse_rules_file(self) -> None:
334
- """Open file browser dialog for rules file."""
335
- file_path, _ = QFileDialog.getOpenFileName(self, "Select Rules File", "", "JSON Files (*.json)")
336
- if file_path:
337
- self.rules_edit.setText(str(Path(file_path)))
338
-
339
- def _load_rules(self) -> list[Rule]:
340
- """Load rules from JSON file.
341
-
342
- Returns:
343
- List of Rule objects
344
- """
345
- rules_file = Path(self.rules_edit.text())
346
- if not rules_file.exists():
347
- # Try finding rules in input directory
348
- input_dir = Path(self.dir_edit.text())
349
- rule_files = list(input_dir.glob("rules*.json"))
350
- if rule_files:
351
- rules_file = rule_files[0]
352
- self.rules_edit.setText(str(rules_file.resolve()))
353
- else:
354
- raise FileNotFoundError(f"Rules file not found: {rules_file}")
355
-
356
- with open(rules_file, encoding="utf-8") as f:
357
- rules_data = json.load(f)
358
-
359
- rules = []
360
- if isinstance(rules_data, list):
361
- rules = [Rule(rule) for rule in rules_data]
362
- elif isinstance(rules_data, dict) and "rules" in rules_data:
363
- rules = [Rule(rule) for rule in rules_data["rules"]]
364
-
365
- return rules
366
-
367
- def _start_scan(self) -> None:
368
- """Start the document scan."""
369
- # Validate inputs
370
- input_dir = Path(self.dir_edit.text())
371
- if not input_dir.exists() or not input_dir.is_dir():
372
- QMessageBox.warning(self, "Error", "Invalid input directory")
373
- return
374
-
375
- try:
376
- rules = self._load_rules()
377
- if not rules:
378
- QMessageBox.warning(self, "Error", "No valid rules found")
379
- return
380
- except Exception as e:
381
- QMessageBox.warning(self, "Error", f"Failed to load rules: {e}")
382
- return
383
-
384
- # Parse file types
385
- file_types = [ft.strip() for ft in self.types_edit.text().split(",")]
386
-
387
- # Clear previous results
388
- self._clear_results()
389
-
390
- # Set scanning state
391
- self.is_scanning = True
392
-
393
- # Disable scan button during scan, enable pause and stop
394
- self.scan_btn.setEnabled(False)
395
- self.pause_btn.setEnabled(True)
396
- self.stop_btn.setEnabled(True)
397
- self.pause_btn.setText("Pause")
398
-
399
- # Create scanner
400
- scanner = DocumentScanner(
401
- input_dir=input_dir,
402
- rules=rules,
403
- file_types=file_types,
404
- use_pdf_ocr=self.ocr_checkbox.isChecked(),
405
- use_process_pool=self.process_pool_checkbox.isChecked(),
406
- batch_size=self.batch_spin.value(),
407
- )
408
-
409
- # Create and start worker thread
410
- self.scan_worker = ScanWorker(scanner, self.thread_spin.value())
411
- self.scan_worker.progress.connect(self._log_message) # pyright: ignore[reportAttributeAccessIssue]
412
- self.scan_worker.progress_update.connect(self._update_progress) # pyright: ignore[reportAttributeAccessIssue]
413
- self.scan_worker.finished.connect(self._scan_finished) # pyright: ignore[reportAttributeAccessIssue]
414
- self.scan_worker.error.connect(self._scan_error) # pyright: ignore[reportAttributeAccessIssue]
415
- self.scan_worker.start()
416
-
417
- def _scan_finished(self, results: dict[str, Any]) -> None:
418
- """Handle scan completion.
419
-
420
- Args:
421
- results: Scan results dictionary
422
- """
423
- self.scan_results = results
424
- self.is_scanning = False
425
- self.scan_btn.setEnabled(True)
426
- self.pause_btn.setEnabled(False)
427
- self.stop_btn.setEnabled(False)
428
- self.save_btn.setEnabled(True)
429
-
430
- # Update summary
431
- scan_info = results.get("scan_info", {})
432
- processed = scan_info.get("files_processed", scan_info.get("total_files", 0))
433
- self.files_label.setText(f"Files Scanned: {processed}/{scan_info.get('total_files', 0)}")
434
- self.matches_label.setText(f"Files with Matches: {scan_info.get('files_with_matches', 0)}")
435
-
436
- # Update progress bar to 100%
437
- self.progress_bar.setValue(100)
438
-
439
- # Populate results table
440
- matches = results.get("matches", [])
441
- self.results_table.setRowCount(len(matches))
442
-
443
- for row, match_data in enumerate(matches):
444
- file_path = match_data.get("file_path", "")
445
- file_type = match_data.get("file_type", "")
446
- match_count = len(match_data.get("matches", []))
447
- proc_time = match_data.get("metadata", {}).get("processing_time_seconds", 0)
448
-
449
- self.results_table.setItem(row, 0, QTableWidgetItem(Path(file_path).name))
450
- self.results_table.setItem(row, 1, QTableWidgetItem(file_type))
451
- self.results_table.setItem(row, 2, QTableWidgetItem(str(match_count)))
452
- self.results_table.setItem(row, 3, QTableWidgetItem(f"{proc_time:.3f}"))
453
-
454
- status = "completed" if not results.get("stopped") else "stopped"
455
- self._log_message(f"Scan {status}. Found matches in {len(matches)} files.")
456
-
457
- def _scan_error(self, error_msg: str) -> None:
458
- """Handle scan error.
459
-
460
- Args:
461
- error_msg: Error message
462
- """
463
- self.is_scanning = False
464
- self.scan_btn.setEnabled(True)
465
- self.pause_btn.setEnabled(False)
466
- self.stop_btn.setEnabled(False)
467
- self._log_message(f"Error: {error_msg}")
468
- QMessageBox.critical(self, "Error", f"Scan failed: {error_msg}")
469
-
470
- def _pause_scan(self) -> None:
471
- """Pause or resume the document scan."""
472
- if self.scan_worker and self.scan_worker.scanner:
473
- scanner = self.scan_worker.scanner
474
- if scanner.is_paused():
475
- # Resume
476
- scanner.resume()
477
- self.pause_btn.setText("Pause")
478
- self._log_message("Scan resumed")
479
- else:
480
- # Pause
481
- scanner.pause()
482
- self.pause_btn.setText("Resume")
483
- self._log_message("Scan paused")
484
-
485
- def _stop_scan(self) -> None:
486
- """Stop the document scan."""
487
- if not self.is_scanning:
488
- return
489
-
490
- if self.scan_worker and self.scan_worker.scanner:
491
- scanner = self.scan_worker.scanner
492
- scanner.stop()
493
-
494
- # Disable pause and stop buttons immediately
495
- self.pause_btn.setEnabled(False)
496
- self.stop_btn.setEnabled(False)
497
-
498
- # Log the stop action
499
- self._log_message("Stopping scan...")
500
-
501
- # Force UI update
502
- QApplication.processEvents()
503
-
504
- def _update_progress(self, current: int, total: int) -> None:
505
- """Update progress bar and file count.
506
-
507
- Args:
508
- current: Current number of files processed
509
- total: Total number of files
510
- """
511
- if total > 0:
512
- percentage = int((current / total) * 100)
513
- self.progress_bar.setValue(percentage)
514
- self.files_label.setText(f"Files Scanned: {current}/{total}")
515
-
516
- def _show_match_details(self) -> None:
517
- """Show details of selected match in the results table."""
518
- selected_rows = self.results_table.selectionModel().selectedRows()
519
- if not selected_rows or not self.scan_results:
520
- return
521
-
522
- row = selected_rows[0].row()
523
- matches = self.scan_results.get("matches", [])
524
-
525
- if row >= len(matches):
526
- return
527
-
528
- match_data = matches[row]
529
- details = []
530
-
531
- # File info
532
- details.append(f"File: {match_data.get('file_path', '')}")
533
- details.append(f"Type: {match_data.get('file_type', '')}")
534
- details.append(f"Size: {match_data.get('file_size', 0)} bytes\n")
535
-
536
- # Match info
537
- for match in match_data.get("matches", []):
538
- details.append(f"Rule: {match.get('rule_name', '')}")
539
- details.append(f"Description: {match.get('rule_description', '')}")
540
- details.append(f"Line {match.get('line_number', 0)}: {match.get('match', '')}")
541
- details.append("\nContext:")
542
- for ctx_line in match.get("context", []):
543
- details.append(f" {ctx_line}")
544
- details.append("-" * 50)
545
-
546
- self.details_text.setText("\n".join(details))
547
-
548
- def _save_results(self) -> None:
549
- """Save scan results to JSON file."""
550
- if not self.scan_results:
551
- QMessageBox.warning(self, "Warning", "No results to save")
552
- return
553
-
554
- default_name = f"scan_results_{datetime.now().strftime('%Y%m%d_%H%M%S')}.json"
555
- file_path, _ = QFileDialog.getSaveFileName(self, "Save Results", default_name, "JSON Files (*.json)")
556
-
557
- if file_path:
558
- try:
559
- with open(file_path, "w", encoding="utf-8") as f:
560
- json.dump(self.scan_results, f, indent=2, ensure_ascii=False)
561
- self._log_message(f"Results saved to: {file_path}")
562
- QMessageBox.information(self, "Success", f"Results saved to:\n{file_path}")
563
- except Exception as e:
564
- QMessageBox.critical(self, "Error", f"Failed to save results: {e}")
565
-
566
- def _clear_results(self) -> None:
567
- """Clear all results and logs."""
568
- self.scan_results = None
569
- self.log_text.clear()
570
- self.results_table.setRowCount(0)
571
- self.details_text.clear()
572
- self.files_label.setText("Files Scanned: 0")
573
- self.matches_label.setText("Files with Matches: 0")
574
- self.progress_bar.setValue(0)
575
- self.save_btn.setEnabled(False)
576
-
577
- def _log_message(self, message: str) -> None:
578
- """Add message to log text area.
579
-
580
- Args:
581
- message: Message to log
582
- """
583
- timestamp = datetime.now().strftime("%H:%M:%S")
584
- self.log_text.append(f"[{timestamp}] {message}")
585
-
586
-
587
- def main():
588
- """Main entry point for GUI application."""
589
- app = QApplication(sys.argv)
590
- window = DocScanGUI()
591
- window.show()
592
- sys.exit(app.exec_())
593
-
594
-
595
- if __name__ == "__main__":
596
- main()
1
+ """PySide2 GUI version of docscan application."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import contextlib
6
+ import json
7
+ import logging
8
+ import os
9
+ import sys
10
+ from datetime import datetime
11
+ from pathlib import Path
12
+ from typing import Any, ClassVar
13
+
14
+ import PySide2
15
+ from PySide2.QtCore import QObject, QThread, Signal
16
+ from PySide2.QtWidgets import (
17
+ QAction,
18
+ QApplication,
19
+ QCheckBox,
20
+ QComboBox,
21
+ QDialog,
22
+ QDialogButtonBox,
23
+ QFileDialog,
24
+ QFormLayout,
25
+ QGroupBox,
26
+ QHBoxLayout,
27
+ QLabel,
28
+ QLineEdit,
29
+ QMainWindow,
30
+ QMessageBox,
31
+ QProgressBar,
32
+ QPushButton,
33
+ QSpinBox,
34
+ QTableWidget,
35
+ QTableWidgetItem,
36
+ QTextEdit,
37
+ QVBoxLayout,
38
+ QWidget,
39
+ )
40
+
41
+ # Import from docscan module
42
+ try:
43
+ from docscan import DocumentScanner, Rule
44
+ except ImportError:
45
+ try:
46
+ from sfi.docscan.docscan import DocumentScanner, Rule
47
+ except ImportError:
48
+ from src.docscan.docscan import DocumentScanner, Rule
49
+
50
+ # Import translations
51
+ try:
52
+ from sfi.docscan.lang.zhcn import TRANSLATIONS
53
+ except ImportError:
54
+ try:
55
+ from src.docscan.lang.zhcn import TRANSLATIONS
56
+ except ImportError:
57
+ TRANSLATIONS = {}
58
+
59
+ logging.basicConfig(level=logging.INFO, format="%(message)s")
60
+ logger = logging.getLogger(__name__)
61
+
62
+ # Language support
63
+ LANGUAGE = "zh_CN" # Default to Chinese
64
+ USE_CHINESE = True # Toggle for Chinese/English
65
+
66
+ qt_dir = Path(PySide2.__file__).parent
67
+ plugin_path = str(qt_dir / "plugins" / "platforms")
68
+ os.environ["QT_QPA_PLATFORM_PLUGIN_PATH"] = plugin_path
69
+
70
+
71
+ def t(key: str, **kwargs) -> str:
72
+ """Get translated text for the given key.
73
+
74
+ Args:
75
+ key: Translation key
76
+ **kwargs: Arguments for string formatting
77
+
78
+ Returns:
79
+ Translated text
80
+ """
81
+ if not USE_CHINESE:
82
+ # Return English default values
83
+ try:
84
+ from sfi.docscan.lang.eng import ENGLISH_DEFAULTS
85
+ except ImportError:
86
+ try:
87
+ from src.docscan.lang.eng import ENGLISH_DEFAULTS
88
+ except ImportError:
89
+ ENGLISH_DEFAULTS = {} # noqa: N806
90
+
91
+ text = ENGLISH_DEFAULTS.get(key, key)
92
+ else:
93
+ text = TRANSLATIONS.get(key, key)
94
+
95
+ # Format with kwargs if provided
96
+ if kwargs:
97
+ with contextlib.suppress(KeyError, ValueError):
98
+ text = text.format(**kwargs)
99
+ return text
100
+
101
+
102
+ class ConfigManager:
103
+ """Manage GUI configuration persistence."""
104
+
105
+ DEFAULT_CONFIG: ClassVar[dict[str, Any]] = {
106
+ "input_directory": str(Path.cwd()),
107
+ "rules_file": "rules.json",
108
+ "file_types": "pdf,docx,xlsx,pptx,txt,odt,rtf,epub,csv,xml,html,md,jpg,jpeg,png,gif,bmp,tiff",
109
+ "use_pdf_ocr": False,
110
+ "use_process_pool": False,
111
+ "threads": 4,
112
+ "batch_size": 50,
113
+ "window_width": 1000,
114
+ "window_height": 700,
115
+ "window_x": 100,
116
+ "window_y": 100,
117
+ "recent_directories": [],
118
+ "recent_rules_files": [],
119
+ "include_image_formats": False,
120
+ }
121
+
122
+ MAX_RECENT_ITEMS = 10
123
+
124
+ def __init__(self, config_file: Path | None = None):
125
+ """Initialize configuration manager.
126
+
127
+ Args:
128
+ config_file: Path to configuration file. If None, uses default location.
129
+ """
130
+ if config_file is None:
131
+ # Use user home directory for config
132
+ config_dir = Path.home() / ".sfi"
133
+ config_dir.mkdir(exist_ok=True)
134
+ config_file = config_dir / "docscan_gui.json"
135
+ self.config_file = config_file
136
+ self.config = self._load_config()
137
+
138
+ def _load_config(self) -> dict[str, Any]:
139
+ """Load configuration from file.
140
+
141
+ Returns:
142
+ Configuration dictionary
143
+ """
144
+ if self.config_file.exists():
145
+ try:
146
+ with open(self.config_file, encoding="utf-8") as f:
147
+ config = json.load(f)
148
+ # Merge with defaults to ensure all keys exist
149
+ return {**self.DEFAULT_CONFIG, **config}
150
+ except (OSError, json.JSONDecodeError) as e:
151
+ logger.warning(f"Failed to load config: {e}. Using defaults.")
152
+ return self.DEFAULT_CONFIG.copy()
153
+
154
+ def save_config(self) -> None:
155
+ """Save configuration to file."""
156
+ try:
157
+ with open(self.config_file, "w", encoding="utf-8") as f:
158
+ json.dump(self.config, f, indent=2, ensure_ascii=False)
159
+ except OSError as e:
160
+ logger.warning(f"Failed to save config: {e}")
161
+
162
+ def get(self, key: str, default: Any = None) -> Any:
163
+ """Get configuration value.
164
+
165
+ Args:
166
+ key: Configuration key
167
+ default: Default value if key not found
168
+
169
+ Returns:
170
+ Configuration value
171
+ """
172
+ return self.config.get(key, default)
173
+
174
+ def set(self, key: str, value: Any) -> None:
175
+ """Set configuration value.
176
+
177
+ Args:
178
+ key: Configuration key
179
+ value: Value to set
180
+ """
181
+ self.config[key] = value
182
+
183
+ def add_recent_directory(self, directory: str) -> None:
184
+ """Add directory to recent directories list.
185
+
186
+ Args:
187
+ directory: Directory path to add
188
+ """
189
+ recent_dirs = self.config.get("recent_directories", [])
190
+ # Remove if already exists
191
+ recent_dirs = [d for d in recent_dirs if d != directory]
192
+ # Add to front
193
+ recent_dirs.insert(0, directory)
194
+ # Keep only MAX_RECENT_ITEMS
195
+ recent_dirs = recent_dirs[: self.MAX_RECENT_ITEMS]
196
+ self.config["recent_directories"] = recent_dirs
197
+
198
+ def add_recent_rules_file(self, rules_file: str) -> None:
199
+ """Add rules file to recent rules files list.
200
+
201
+ Args:
202
+ rules_file: Rules file path to add
203
+ """
204
+ recent_files = self.config.get("recent_rules_files", [])
205
+ # Remove if already exists
206
+ recent_files = [f for f in recent_files if f != rules_file]
207
+ # Add to front
208
+ recent_files.insert(0, rules_file)
209
+ # Keep only MAX_RECENT_ITEMS
210
+ recent_files = recent_files[: self.MAX_RECENT_ITEMS]
211
+ self.config["recent_rules_files"] = recent_files
212
+
213
+
214
+ class WorkerSignals(QObject):
215
+ """Defines the signals available from a running worker thread."""
216
+
217
+ progress = Signal(str)
218
+ finished = Signal(dict)
219
+ error = Signal(str)
220
+ progress_update = Signal(int, int)
221
+
222
+
223
+ class ScanWorker(QThread):
224
+ """Worker thread for running document scan in background."""
225
+
226
+ def __init__(self, scanner: DocumentScanner, threads: int):
227
+ """Initialize worker thread."""
228
+ super().__init__()
229
+ self.scanner = scanner
230
+ self.threads = threads
231
+ self.signals = WorkerSignals()
232
+
233
+ def run(self):
234
+ """Run the document scan."""
235
+ try:
236
+ # Set up custom logger to capture messages
237
+ class ProgressHandler(logging.Handler):
238
+ def __init__(self, signal):
239
+ super().__init__()
240
+ self.signal = signal
241
+
242
+ def emit(self, record):
243
+ self.signal.emit(self.format(record))
244
+
245
+ handler = ProgressHandler(self.signals.progress)
246
+ handler.setFormatter(logging.Formatter("%(message)s"))
247
+ logger.addHandler(handler)
248
+
249
+ # Set progress callback
250
+ def progress_callback(current, total):
251
+ self.signals.progress_update.emit(current, total)
252
+
253
+ self.scanner.set_progress_callback(progress_callback)
254
+ self.signals.progress.emit(t("starting_scan"))
255
+
256
+ results = self.scanner.scan(threads=self.threads, show_progress=True)
257
+
258
+ logger.removeHandler(handler)
259
+ self.signals.progress.emit(t("scan_complete"))
260
+ self.signals.finished.emit(results)
261
+ except Exception as e:
262
+ self.signals.error.emit(str(e))
263
+
264
+
265
+ class SettingsDialog(QDialog):
266
+ """Settings dialog for scan options."""
267
+
268
+ def __init__(self, parent=None, config_manager=None):
269
+ """Initialize settings dialog.
270
+
271
+ Args:
272
+ parent: Parent widget (should be DocScanGUI instance)
273
+ config_manager: ConfigManager instance
274
+ """
275
+ super().__init__(parent)
276
+ self.config_manager = config_manager
277
+ self.main_window = parent # Store reference to main window
278
+ self.setWindowTitle(t("scan_options_tab"))
279
+ self.setModal(True)
280
+ self.resize(500, 400)
281
+ self._create_ui()
282
+ self._load_settings()
283
+
284
+ def _create_ui(self):
285
+ """Create settings dialog UI."""
286
+ layout = QVBoxLayout()
287
+
288
+ # Language Settings Group
289
+ self.language_group = QGroupBox(t("language_settings", default="Language Settings"))
290
+ language_layout = QHBoxLayout()
291
+ language_layout.setSpacing(10)
292
+
293
+ self.lang_label = QLabel(t("language_label", default="Language:"))
294
+ self.lang_combo = QComboBox()
295
+ self.lang_combo.addItem("中文", "zh_CN")
296
+ self.lang_combo.addItem("English", "en")
297
+ self.lang_combo.currentTextChanged.connect(self._on_language_changed) # type: ignore # Real-time language change
298
+ language_layout.addWidget(self.lang_label)
299
+ language_layout.addWidget(self.lang_combo)
300
+ language_layout.addStretch()
301
+ self.language_group.setLayout(language_layout)
302
+ layout.addWidget(self.language_group)
303
+
304
+ # Processing Options Group
305
+ processing_group = QGroupBox(t("processing_options", default="Processing Options"))
306
+ processing_layout = QVBoxLayout()
307
+ processing_layout.setSpacing(10)
308
+
309
+ self.ocr_checkbox = QCheckBox(t("use_pdf_ocr"))
310
+ self.ocr_checkbox.setToolTip(
311
+ t("ocr_tooltip", default="Enable OCR for scanned PDF files to extract text from images")
312
+ )
313
+
314
+ self.process_pool_checkbox = QCheckBox(t("use_process_pool"))
315
+ self.process_pool_checkbox.setToolTip(
316
+ t(
317
+ "process_pool_tooltip",
318
+ default="Use multiple processes for CPU-intensive operations (may increase memory usage)",
319
+ )
320
+ )
321
+
322
+ processing_layout.addWidget(self.ocr_checkbox)
323
+ processing_layout.addWidget(self.process_pool_checkbox)
324
+ processing_group.setLayout(processing_layout)
325
+ layout.addWidget(processing_group)
326
+
327
+ # Performance Settings Group
328
+ performance_group = QGroupBox(t("performance_settings", default="Performance Settings"))
329
+ performance_layout = QFormLayout()
330
+ performance_layout.setSpacing(12)
331
+
332
+ # Thread count
333
+ self.thread_spin = QSpinBox()
334
+ self.thread_spin.setMinimum(1)
335
+ self.thread_spin.setMaximum(16)
336
+ self.thread_spin.setValue(4)
337
+ self.thread_spin.setToolTip(
338
+ t("threads_tooltip", default="Number of worker threads (higher values may improve speed but use more CPU)")
339
+ )
340
+ performance_layout.addRow(t("threads"), self.thread_spin)
341
+
342
+ # Batch size
343
+ self.batch_spin = QSpinBox()
344
+ self.batch_spin.setMinimum(1)
345
+ self.batch_spin.setMaximum(1000)
346
+ self.batch_spin.setValue(50)
347
+ self.batch_spin.setToolTip(
348
+ t(
349
+ "batch_size_tooltip",
350
+ default="Number of files to process in each batch (larger batches may improve throughput)",
351
+ )
352
+ )
353
+ performance_layout.addRow(t("batch_size"), self.batch_spin)
354
+
355
+ performance_group.setLayout(performance_layout)
356
+ layout.addWidget(performance_group)
357
+
358
+ # Buttons
359
+ button_layout = QHBoxLayout()
360
+
361
+ # Apply button for immediate language/application
362
+ self.apply_btn = QPushButton(t("apply", default="Apply"))
363
+ self.apply_btn.clicked.connect(self._apply_settings) # type: ignore
364
+ button_layout.addWidget(self.apply_btn)
365
+
366
+ button_layout.addStretch()
367
+
368
+ # OK and Cancel buttons
369
+ button_box = QDialogButtonBox()
370
+ button_box.setStandardButtons(QDialogButtonBox.StandardButton.Ok | QDialogButtonBox.StandardButton.Cancel) # type: ignore
371
+ button_box.accepted.connect(self.accept) # type: ignore
372
+ button_box.rejected.connect(self.reject) # type: ignore
373
+ button_layout.addWidget(button_box)
374
+
375
+ layout.addLayout(button_layout)
376
+
377
+ self.setLayout(layout)
378
+
379
+ def _on_language_changed(self):
380
+ """Handle real-time language change in settings dialog."""
381
+ # Get selected language
382
+ lang = self.lang_combo.currentData()
383
+
384
+ # Update global language setting
385
+ global LANGUAGE, USE_CHINESE
386
+ LANGUAGE = lang
387
+ USE_CHINESE = lang == "zh_CN"
388
+
389
+ # Update dialog title and UI elements immediately
390
+ self.setWindowTitle(t("scan_options_tab"))
391
+ self.language_group.setTitle(t("language_settings", default="Language Settings"))
392
+ self.lang_label.setText(t("language_label", default="Language:"))
393
+
394
+ # Update other group boxes
395
+ for i in range(self.layout().count()):
396
+ item = self.layout().itemAt(i)
397
+ if item.widget() and isinstance(item.widget(), QGroupBox):
398
+ group_box = item.widget()
399
+ if "Language" in group_box.title(): # type: ignore
400
+ group_box.setTitle(t("language_settings", default="Language Settings")) # type: ignore
401
+ elif "Processing" in group_box.title(): # type: ignore
402
+ group_box.setTitle(t("processing_options", default="Processing Options")) # type: ignore
403
+ elif "Performance" in group_box.title(): # type: ignore
404
+ group_box.setTitle(t("performance_settings", default="Performance Settings")) # type: ignore
405
+
406
+ # Show temporary message in dialog
407
+ logger.info(f"Language preview: {'中文' if lang == 'zh_CN' else 'English'}")
408
+
409
+ def _load_settings(self):
410
+ """Load current settings into dialog."""
411
+ if self.config_manager:
412
+ self.ocr_checkbox.setChecked(self.config_manager.get("use_pdf_ocr", False))
413
+ self.process_pool_checkbox.setChecked(self.config_manager.get("use_process_pool", False))
414
+ self.thread_spin.setValue(self.config_manager.get("threads", 4))
415
+ self.batch_spin.setValue(self.config_manager.get("batch_size", 50))
416
+
417
+ # Set language combo
418
+ lang = self.config_manager.get("language", "zh_CN")
419
+ index = self.lang_combo.findData(lang)
420
+ if index >= 0:
421
+ self.lang_combo.setCurrentIndex(index)
422
+
423
+ def _apply_settings(self):
424
+ """Apply settings immediately without closing dialog (for preview)."""
425
+ if not self.main_window:
426
+ return
427
+
428
+ # Save settings to config
429
+ if self.config_manager:
430
+ self.config_manager.set("use_pdf_ocr", self.ocr_checkbox.isChecked())
431
+ self.config_manager.set("use_process_pool", self.process_pool_checkbox.isChecked())
432
+ self.config_manager.set("threads", self.thread_spin.value())
433
+ self.config_manager.set("batch_size", self.batch_spin.value())
434
+
435
+ # Apply language immediately
436
+ selected_lang = self.lang_combo.currentData()
437
+ self.config_manager.set("language", selected_lang)
438
+
439
+ # Apply language to parent window
440
+ self._apply_language_to_parent(selected_lang)
441
+
442
+ # Save config to file
443
+ self.config_manager.save_config()
444
+
445
+ def accept(self):
446
+ """Save settings when OK is clicked and apply language immediately."""
447
+ if self.config_manager:
448
+ self.config_manager.set("use_pdf_ocr", self.ocr_checkbox.isChecked())
449
+ self.config_manager.set("use_process_pool", self.process_pool_checkbox.isChecked())
450
+ self.config_manager.set("threads", self.thread_spin.value())
451
+ self.config_manager.set("batch_size", self.batch_spin.value())
452
+
453
+ # Apply language immediately when OK is clicked
454
+ selected_lang = self.lang_combo.currentData()
455
+ self.config_manager.set("language", selected_lang)
456
+ self.config_manager.save_config()
457
+
458
+ # Apply language to parent window if exists
459
+ if self.main_window:
460
+ self._apply_language_to_parent(selected_lang)
461
+
462
+ super().accept()
463
+
464
+ def _apply_language_to_parent(self, lang: str) -> None:
465
+ """Apply language change to parent window immediately.
466
+
467
+ Args:
468
+ lang: Language code ('zh_CN' or 'en')
469
+ """
470
+ if not self.main_window:
471
+ return
472
+
473
+ # Update global language variables
474
+ global LANGUAGE, USE_CHINESE
475
+ LANGUAGE = lang
476
+ USE_CHINESE = lang == "zh_CN"
477
+
478
+ # Get parent window (DocScanGUI instance)
479
+
480
+ # Update all translatable UI elements in parent window
481
+ self.main_window.setWindowTitle(t("window_title"))
482
+
483
+ # Update menu bar
484
+ self.main_window.file_menu.setTitle(t("file_menu", default="&File"))
485
+ self.main_window.save_action.setText(t("save_results", default="&Save Results"))
486
+ self.main_window.clear_action.setText(t("clear_results", default="&Clear Results"))
487
+
488
+ # Update input section
489
+ for i in range(self.main_window.centralWidget().layout().count()):
490
+ item = self.main_window.centralWidget().layout().itemAt(i)
491
+ if item.widget():
492
+ widget = item.widget()
493
+ if hasattr(widget, "title"):
494
+ title = widget.title()
495
+ if "Input" in title or "输入" in title:
496
+ widget.setTitle(t("input_config_tab"))
497
+ elif "Results" in title or "结果" in title:
498
+ widget.setTitle(t("results"))
499
+
500
+ # Update labels in input section
501
+ if hasattr(self.main_window, "dir_edit") and self.main_window.dir_edit:
502
+ # Find and update labels (this is a simplified approach)
503
+ # In a real implementation, you'd store references to all translatable widgets
504
+ pass
505
+
506
+ # Force UI refresh
507
+ self.main_window.update()
508
+
509
+ # Show confirmation message
510
+ lang_name = "中文" if lang == "zh_CN" else "English"
511
+ QMessageBox.information(
512
+ self.main_window,
513
+ t("success"),
514
+ f"Language switched to {lang_name}. Some elements may require restart to fully update.",
515
+ )
516
+
517
+
518
+ class DocScanGUI(QMainWindow):
519
+ """Main GUI window for document scanner application."""
520
+
521
+ def __init__(self):
522
+ """Initialize GUI components."""
523
+ super().__init__()
524
+ self.config_manager = ConfigManager()
525
+ self.scan_results = None
526
+ self.scan_worker = None
527
+ self.is_scanning = False
528
+ self.settings_dialog = None
529
+ # Menu actions
530
+ self.file_menu = None
531
+ self.open_action = None
532
+ self.save_action = None
533
+ self.clear_action = None
534
+ # Validation label for rules file
535
+ self.rules_validation_label = None
536
+ self.init_ui()
537
+ self._load_config()
538
+ self._setup_close_handler()
539
+
540
+ def init_ui(self):
541
+ """Initialize user interface."""
542
+ self.setWindowTitle(t("window_title"))
543
+ self.setMinimumSize(1000, 700)
544
+
545
+ # Create central widget with splitter
546
+ central_widget = QWidget()
547
+ self.setCentralWidget(central_widget)
548
+ main_layout = QVBoxLayout(central_widget)
549
+
550
+ # Create menu bar
551
+ self._create_menu_bar()
552
+
553
+ # Input configuration section
554
+ self._create_input_section(main_layout)
555
+
556
+ # Create other sections
557
+ self._create_actions_section(main_layout)
558
+ self._create_results_section(main_layout)
559
+
560
+ def _create_menu_bar(self):
561
+ """Create menu bar with File, Settings, and Help menus."""
562
+ menubar = self.menuBar()
563
+
564
+ # File menu
565
+ self.file_menu = menubar.addMenu(t("file_menu", default="&File"))
566
+
567
+ self.open_action = QAction(t("open_results", default="&Open Results..."), self)
568
+ self.open_action.triggered.connect(self._open_results) # type: ignore
569
+ self.file_menu.addAction(self.open_action)
570
+
571
+ self.file_menu.addSeparator()
572
+
573
+ self.save_action = QAction(t("save_results", default="&Save Results"), self)
574
+ self.save_action.triggered.connect(self._save_results) # type: ignore
575
+ self.save_action.setEnabled(False)
576
+ self.file_menu.addAction(self.save_action)
577
+
578
+ self.clear_action = QAction(t("clear_results", default="&Clear Results"), self)
579
+ self.clear_action.triggered.connect(self._clear_results) # type: ignore
580
+ self.file_menu.addAction(self.clear_action)
581
+
582
+ self.file_menu.addSeparator()
583
+
584
+ exit_action = QAction(t("exit", default="E&xit"), self)
585
+ exit_action.triggered.connect(self.close) # type: ignore
586
+ self.file_menu.addAction(exit_action)
587
+
588
+ # Settings menu
589
+ settings_menu = menubar.addMenu(t("settings_menu", default="&Settings"))
590
+
591
+ preferences_action = QAction(t("preferences", default="&Preferences..."), self)
592
+ preferences_action.triggered.connect(self._show_settings) # type: ignore
593
+ settings_menu.addAction(preferences_action)
594
+
595
+ # Help menu
596
+ help_menu = menubar.addMenu(t("help_menu", default="&Help"))
597
+
598
+ about_action = QAction(t("about", default="&About"), self)
599
+ about_action.triggered.connect(self._show_about) # type: ignore
600
+ help_menu.addAction(about_action)
601
+
602
+ def _show_settings(self):
603
+ """Show settings dialog."""
604
+ if self.settings_dialog is None:
605
+ self.settings_dialog = SettingsDialog(self, self.config_manager)
606
+ self.settings_dialog.show()
607
+
608
+ def _show_about(self):
609
+ """Show about dialog."""
610
+ QMessageBox.about(
611
+ self,
612
+ t("about_title", default="About Document Scanner"),
613
+ t("about_text", default="Document Scanner GUI\n\nVersion 1.0"),
614
+ )
615
+
616
+ def _create_input_section(self, parent_layout: QVBoxLayout) -> None:
617
+ """Create input configuration section.
618
+
619
+ Args:
620
+ parent_layout: Parent layout to add this section to
621
+ """
622
+ input_group = QGroupBox(t("input_config_tab"))
623
+ input_layout = QVBoxLayout()
624
+ input_group.setLayout(input_layout)
625
+
626
+ # Input directory
627
+ dir_layout = QHBoxLayout()
628
+ dir_label = QLabel(t("input_directory"))
629
+ self.dir_edit = QLineEdit(str(Path.cwd()))
630
+ dir_browse_btn = QPushButton(t("browse"))
631
+ dir_browse_btn.clicked.connect(self._browse_directory) # type: ignore
632
+ self.dir_edit.textChanged.connect(self._on_directory_changed) # type: ignore
633
+ dir_layout.addWidget(dir_label)
634
+ dir_layout.addWidget(self.dir_edit)
635
+ dir_layout.addWidget(dir_browse_btn)
636
+ input_layout.addLayout(dir_layout)
637
+
638
+ # Rules file
639
+ rules_layout = QHBoxLayout()
640
+ rules_label = QLabel(t("rules_file"))
641
+ self.rules_edit = QLineEdit(t("default_rules_file"))
642
+ self.rules_edit.setMinimumWidth(300)
643
+ rules_browse_btn = QPushButton(t("browse"))
644
+ rules_browse_btn.clicked.connect(self._browse_rules_file) # type: ignore
645
+ # Validation icon label
646
+ self.rules_validation_label = QLabel("") # Will show ✓ or ✗
647
+ self.rules_validation_label.setFixedWidth(20)
648
+ rules_layout.addWidget(rules_label)
649
+ rules_layout.addWidget(self.rules_edit)
650
+ rules_layout.addWidget(self.rules_validation_label)
651
+ rules_layout.addWidget(rules_browse_btn)
652
+ input_layout.addLayout(rules_layout)
653
+
654
+ # File types - simplified to use default
655
+ types_layout = QHBoxLayout()
656
+ types_label = QLabel(t("file_types"))
657
+ self.types_edit = QLineEdit(t("default_file_types"))
658
+ self.types_edit.setToolTip(t("file_types_tooltip", default="File types to scan (comma separated)"))
659
+ types_layout.addWidget(types_label)
660
+ types_layout.addWidget(self.types_edit)
661
+ input_layout.addLayout(types_layout)
662
+
663
+ # Checkbox for including image formats
664
+ self.include_images_checkbox = QCheckBox(t("include_image_formats", default="Include Image Formats"))
665
+ self.include_images_checkbox.setChecked(self.config_manager.get("include_image_formats", False))
666
+ self.include_images_checkbox.setToolTip(
667
+ t("include_image_formats_tooltip", default="Include image formats (jpg, jpeg, png, gif, bmp, tiff) in scan")
668
+ )
669
+ self.include_images_checkbox.stateChanged.connect(self._toggle_image_formats) # type: ignore
670
+ input_layout.addWidget(self.include_images_checkbox)
671
+
672
+ parent_layout.addWidget(input_group)
673
+
674
+ def _toggle_image_formats(self, state):
675
+ """Toggle image formats in file types based on checkbox state."""
676
+ base_types = "pdf,docx,xlsx,pptx,txt,odt,rtf,epub,csv,xml,html,md"
677
+ image_types = "jpg,jpeg,png,gif,bmp,tiff"
678
+ self.config_manager.set("include_image_formats", state == 2)
679
+
680
+ if state == 2: # Checked
681
+ # Add image formats if not already present
682
+ current_types = self.types_edit.text()
683
+ if not any(img_type in current_types for img_type in image_types.split(",")):
684
+ all_types = f"{base_types},{image_types}"
685
+ self.types_edit.setText(all_types)
686
+ else: # Unchecked
687
+ # Remove image formats
688
+ current_types = self.types_edit.text()
689
+ types_list = [t.strip() for t in current_types.split(",")]
690
+ filtered_types = [t for t in types_list if t not in ["jpg", "jpeg", "png", "gif", "bmp", "tiff"]]
691
+ self.types_edit.setText(",".join(filtered_types))
692
+
693
+ def _create_actions_section(self, parent_layout: QVBoxLayout) -> None:
694
+ """Create action buttons section.
695
+
696
+ Args:
697
+ parent_layout: Parent layout to add this section to
698
+ """
699
+ actions_layout = QHBoxLayout()
700
+
701
+ self.scan_btn = QPushButton(t("start_scan"))
702
+ self.scan_btn.clicked.connect(self._start_scan) # pyright: ignore[reportAttributeAccessIssue]
703
+ self.scan_btn.setMinimumHeight(40)
704
+
705
+ self.pause_btn = QPushButton(t("pause"))
706
+ self.pause_btn.clicked.connect(self._pause_scan) # pyright: ignore[reportAttributeAccessIssue]
707
+ self.pause_btn.setEnabled(False)
708
+ self.pause_btn.setMinimumHeight(40)
709
+
710
+ self.stop_btn = QPushButton(t("stop"))
711
+ self.stop_btn.clicked.connect(self._stop_scan) # pyright: ignore[reportAttributeAccessIssue]
712
+ self.stop_btn.setEnabled(False)
713
+ self.stop_btn.setMinimumHeight(40)
714
+
715
+ actions_layout.addWidget(self.scan_btn)
716
+ actions_layout.addWidget(self.pause_btn)
717
+ actions_layout.addWidget(self.stop_btn)
718
+
719
+ parent_layout.addLayout(actions_layout)
720
+
721
+ def _create_results_section(self, parent_layout: QVBoxLayout) -> None:
722
+ """Create results display section.
723
+
724
+ Args:
725
+ parent_layout: Parent layout to add this section to
726
+ """
727
+ results_group = QGroupBox(t("results"))
728
+ results_layout = QVBoxLayout()
729
+ results_group.setLayout(results_layout)
730
+
731
+ # Summary labels
732
+ summary_layout = QHBoxLayout()
733
+ self.files_label = QLabel(t("files_scanned_zero"))
734
+ self.matches_label = QLabel(t("files_with_matches_zero"))
735
+ summary_layout.addWidget(self.files_label)
736
+ summary_layout.addWidget(self.matches_label)
737
+ results_layout.addLayout(summary_layout)
738
+
739
+ # Progress bar
740
+ self.progress_bar = QProgressBar()
741
+ self.progress_bar.setMinimum(0)
742
+ self.progress_bar.setMaximum(100)
743
+ self.progress_bar.setValue(0)
744
+ results_layout.addWidget(self.progress_bar)
745
+
746
+ # Progress/Log text
747
+ self.log_text = QTextEdit()
748
+ self.log_text.setReadOnly(True)
749
+ self.log_text.setMaximumHeight(150)
750
+ results_layout.addWidget(QLabel(t("progress_log")))
751
+ results_layout.addWidget(self.log_text)
752
+
753
+ # Results table
754
+ self.results_table = QTableWidget()
755
+ self.results_table.setColumnCount(4)
756
+ self.results_table.setHorizontalHeaderLabels([t("file"), t("type"), t("matches"), t("time")])
757
+ self.results_table.horizontalHeader().setStretchLastSection(True)
758
+ results_layout.addWidget(QLabel(t("match_details")))
759
+ results_layout.addWidget(self.results_table)
760
+
761
+ # Match details text
762
+ self.details_text = QTextEdit()
763
+ self.details_text.setReadOnly(True)
764
+ self.details_text.setMaximumHeight(200)
765
+ results_layout.addWidget(QLabel(t("selected_match_context")))
766
+ results_layout.addWidget(self.details_text)
767
+
768
+ # Connect table selection
769
+ self.results_table.itemSelectionChanged.connect(self._show_match_details) # pyright: ignore[reportAttributeAccessIssue]
770
+ self.results_table.cellClicked.connect(self._handle_cell_click) # pyright: ignore[reportAttributeAccessIssue]
771
+
772
+ parent_layout.addWidget(results_group)
773
+
774
+ def _handle_cell_click(self, row: int, column: int) -> None:
775
+ """Handle cell click event to show match details regardless of selection change.
776
+
777
+ Args:
778
+ row: Row index of clicked cell
779
+ column: Column index of clicked cell
780
+ """
781
+ # Simply call the existing method to show details for the clicked row
782
+ # We temporarily select the row to ensure consistency
783
+ self.results_table.selectRow(row)
784
+ self._show_match_details()
785
+
786
+ def _browse_directory(self) -> None:
787
+ """Open directory browser dialog."""
788
+ # Get recent directories for initial path
789
+ recent_dirs = self.config_manager.get("recent_directories", [])
790
+ start_dir = recent_dirs[0] if recent_dirs else str(Path.cwd())
791
+
792
+ dir_path = QFileDialog.getExistingDirectory(self, t("select_input_directory"), start_dir)
793
+ if dir_path:
794
+ self.dir_edit.setText(str(Path(dir_path)))
795
+
796
+ def _on_directory_changed(self) -> None:
797
+ """Handle directory text change - auto-search for rules.json."""
798
+ dir_text = self.dir_edit.text()
799
+ if not dir_text:
800
+ return
801
+
802
+ try:
803
+ input_dir = Path(dir_text)
804
+ if input_dir.exists() and input_dir.is_dir():
805
+ # Search for rules.json or rules*.json files
806
+ rule_files = list(input_dir.glob("rules.json")) + list(input_dir.glob("rules*.json"))
807
+
808
+ if rule_files:
809
+ # Use the first matching file, prefer exact "rules.json"
810
+ exact_match = next((f for f in rule_files if f.name == "rules.json"), None)
811
+ rules_file = exact_match if exact_match else rule_files[0]
812
+ self.rules_edit.setText(str(rules_file.resolve()))
813
+ # Validate rules file after directory change
814
+ self._validate_rules_file()
815
+ except Exception:
816
+ # Ignore errors during directory change handling
817
+ pass
818
+
819
+ def _browse_rules_file(self) -> None:
820
+ """Open file browser dialog for rules file."""
821
+ # Get recent rules files for initial path
822
+ recent_files = self.config_manager.get("recent_rules_files", [])
823
+ start_dir = str(Path(recent_files[0]).parent) if recent_files else str(Path.cwd())
824
+
825
+ file_path, _ = QFileDialog.getOpenFileName(self, t("select_rules_file"), start_dir, t("json_files"))
826
+ if file_path:
827
+ self.rules_edit.setText(str(Path(file_path)))
828
+ self._validate_rules_file()
829
+
830
+ def _validate_rules_file(self) -> None:
831
+ """Validate rules file path and update UI indicator.
832
+
833
+ Shows green border and checkmark if file exists, red border and X if not.
834
+ """
835
+ if not hasattr(self, "rules_edit") or self.rules_edit is None:
836
+ return
837
+
838
+ rules_path = self.rules_edit.text().strip()
839
+ if not rules_path:
840
+ # Reset style if empty
841
+ self.rules_edit.setStyleSheet("")
842
+ if self.rules_validation_label:
843
+ self.rules_validation_label.setText("")
844
+ return
845
+
846
+ try:
847
+ rules_file = Path(rules_path)
848
+ if rules_file.exists() and rules_file.is_file():
849
+ # File exists - show green border and checkmark
850
+ self.rules_edit.setStyleSheet("""
851
+ QLineEdit {
852
+ border: 2px solid #4CAF50;
853
+ border-radius: 3px;
854
+ padding: 2px;
855
+ }
856
+ """)
857
+ if self.rules_validation_label:
858
+ self.rules_validation_label.setText("✓")
859
+ self.rules_validation_label.setStyleSheet("color: #4CAF50; font-weight: bold;")
860
+ else:
861
+ # File doesn't exist - show red border and X
862
+ self.rules_edit.setStyleSheet("""
863
+ QLineEdit {
864
+ border: 2px solid #F44336;
865
+ border-radius: 3px;
866
+ padding: 2px;
867
+ }
868
+ """)
869
+ if self.rules_validation_label:
870
+ self.rules_validation_label.setText("✗")
871
+ self.rules_validation_label.setStyleSheet("color: #F44336; font-weight: bold;")
872
+ except Exception:
873
+ # Invalid path - show red border and X
874
+ self.rules_edit.setStyleSheet("""
875
+ QLineEdit {
876
+ border: 2px solid #F44336;
877
+ border-radius: 3px;
878
+ padding: 2px;
879
+ }
880
+ """)
881
+ if self.rules_validation_label:
882
+ self.rules_validation_label.setText("✗")
883
+ self.rules_validation_label.setStyleSheet("color: #F44336; font-weight: bold;")
884
+
885
+ def _load_rules(self) -> list[Rule]:
886
+ """Load rules from JSON file.
887
+
888
+ Returns:
889
+ List of Rule objects
890
+ """
891
+ rules_file = Path(self.rules_edit.text())
892
+ if not rules_file.exists():
893
+ # Try finding rules in input directory
894
+ input_dir = Path(self.dir_edit.text())
895
+ rule_files = list(input_dir.glob("rules*.json"))
896
+ if rule_files:
897
+ rules_file = rule_files[0]
898
+ self.rules_edit.setText(str(rules_file.resolve()))
899
+ # Validate after updating path
900
+ self._validate_rules_file()
901
+ else:
902
+ raise FileNotFoundError(f"Rules file not found: {rules_file}")
903
+
904
+ with open(rules_file, encoding="utf-8") as f:
905
+ rules_data = json.load(f)
906
+
907
+ rules = []
908
+ if isinstance(rules_data, list):
909
+ rules = [Rule(rule) for rule in rules_data]
910
+ elif isinstance(rules_data, dict) and "rules" in rules_data:
911
+ rules = [Rule(rule) for rule in rules_data["rules"]]
912
+
913
+ return rules
914
+
915
+ def _start_scan(self) -> None:
916
+ """Start the document scan."""
917
+ # Validate inputs
918
+ input_dir = Path(self.dir_edit.text())
919
+ if not input_dir.exists() or not input_dir.is_dir():
920
+ QMessageBox.warning(self, t("error"), t("invalid_input_directory"))
921
+ return
922
+
923
+ try:
924
+ rules = self._load_rules()
925
+ if not rules:
926
+ QMessageBox.warning(self, t("error"), t("no_valid_rules"))
927
+ return
928
+ except Exception as e:
929
+ QMessageBox.warning(self, t("error"), t("failed_to_load_rules", error=e))
930
+ return
931
+
932
+ # Parse file types
933
+ file_types = [ft.strip() for ft in self.types_edit.text().split(",")]
934
+
935
+ # Clear previous results
936
+ self._clear_results()
937
+
938
+ # Set scanning state
939
+ self.is_scanning = True
940
+
941
+ # Disable scan button during scan, enable pause and stop
942
+ self.scan_btn.setEnabled(False)
943
+ self.pause_btn.setEnabled(True)
944
+ self.stop_btn.setEnabled(True)
945
+ self.pause_btn.setText(t("pause"))
946
+
947
+ # Create scanner
948
+ scanner = DocumentScanner(
949
+ input_dir=input_dir,
950
+ rules=rules,
951
+ file_types=file_types,
952
+ use_pdf_ocr=self.config_manager.get("use_pdf_ocr", False),
953
+ use_process_pool=self.config_manager.get("use_process_pool", False),
954
+ batch_size=self.config_manager.get("batch_size", 50),
955
+ )
956
+
957
+ # Create and start worker thread
958
+ self.scan_worker = ScanWorker(scanner, self.config_manager.get("threads", 4))
959
+ self.scan_worker.signals.progress.connect(self._log_message)
960
+ self.scan_worker.signals.progress_update.connect(self._update_progress)
961
+ self.scan_worker.signals.finished.connect(self._scan_finished)
962
+ self.scan_worker.signals.error.connect(self._scan_error)
963
+ self.scan_worker.start()
964
+
965
+ def _scan_finished(self, results: dict[str, Any]) -> None:
966
+ """Handle scan completion.
967
+
968
+ Args:
969
+ results: Scan results dictionary
970
+ """
971
+ self.scan_results = results
972
+ self.is_scanning = False
973
+ self.scan_btn.setEnabled(True)
974
+ self.pause_btn.setEnabled(False)
975
+ self.stop_btn.setEnabled(False)
976
+
977
+ # Enable menu actions after successful scan
978
+ if self.save_action:
979
+ self.save_action.setEnabled(True)
980
+
981
+ # Update summary
982
+ scan_info = results.get("scan_info", {})
983
+ processed = scan_info.get("files_processed", scan_info.get("total_files", 0))
984
+ self.files_label.setText(t("files_scanned").replace("0", f"{processed}/{scan_info.get('total_files', 0)}"))
985
+ self.matches_label.setText(f"{t('files_with_matches')} {scan_info.get('files_with_matches', 0)}")
986
+
987
+ # Update progress bar to 100%
988
+ self.progress_bar.setValue(100)
989
+
990
+ # Populate results table
991
+ matches = results.get("matches", [])
992
+ self.results_table.setRowCount(len(matches))
993
+
994
+ for row, match_data in enumerate(matches):
995
+ file_path = match_data.get("file_path", "")
996
+ file_type = match_data.get("file_type", "")
997
+ match_count = len(match_data.get("matches", []))
998
+ proc_time = match_data.get("metadata", {}).get("processing_time_seconds", 0)
999
+
1000
+ self.results_table.setItem(row, 0, QTableWidgetItem(Path(file_path).name))
1001
+ self.results_table.setItem(row, 1, QTableWidgetItem(file_type))
1002
+ self.results_table.setItem(row, 2, QTableWidgetItem(str(match_count)))
1003
+ self.results_table.setItem(row, 3, QTableWidgetItem(f"{proc_time:.3f}"))
1004
+
1005
+ # Determine status: completed takes precedence if scan finished normally, otherwise stopped
1006
+ # A scan is considered completed if it processed files or was intentionally stopped after starting
1007
+ scan_info = results.get("scan_info", {})
1008
+ files_processed = scan_info.get("files_processed", scan_info.get("total_files", 0))
1009
+ was_stopped = results.get("stopped", False)
1010
+ status = t("scan_completed") if files_processed > 0 or not was_stopped else t("scan_stopped")
1011
+ self._log_message(status)
1012
+ self._log_message(t("found_matches_files", count=len(matches)))
1013
+
1014
+ # Auto-save configuration after successful scan
1015
+ self._save_config()
1016
+
1017
+ def _scan_error(self, error_msg: str) -> None:
1018
+ """Handle scan error.
1019
+
1020
+ Args:
1021
+ error_msg: Error message
1022
+ """
1023
+ self.is_scanning = False
1024
+ self.scan_btn.setEnabled(True)
1025
+ self.pause_btn.setEnabled(False)
1026
+ self.stop_btn.setEnabled(False)
1027
+ self._log_message(f"Error: {error_msg}")
1028
+ QMessageBox.critical(self, t("error"), t("scan_failed", error=error_msg))
1029
+
1030
+ def _pause_scan(self) -> None:
1031
+ """Pause or resume the document scan."""
1032
+ if self.scan_worker and self.scan_worker.scanner:
1033
+ scanner = self.scan_worker.scanner
1034
+ if scanner.is_paused():
1035
+ # Resume
1036
+ scanner.resume()
1037
+ self.pause_btn.setText(t("pause"))
1038
+ else:
1039
+ # Pause
1040
+ scanner.pause()
1041
+ self.pause_btn.setText(t("resume"))
1042
+ self._log_message(t("pausing_scan"))
1043
+
1044
+ def _stop_scan(self) -> None:
1045
+ """Stop the document scan."""
1046
+ if not self.is_scanning:
1047
+ return
1048
+
1049
+ if self.scan_worker and self.scan_worker.scanner:
1050
+ scanner = self.scan_worker.scanner
1051
+ scanner.stop()
1052
+
1053
+ # Disable pause and stop buttons immediately
1054
+ self.pause_btn.setEnabled(False)
1055
+ self.stop_btn.setEnabled(False)
1056
+
1057
+ # Re-enable the scan button after stopping
1058
+ self.scan_btn.setEnabled(True)
1059
+
1060
+ # Log the stop action
1061
+ self._log_message(t("stopping_scan"))
1062
+
1063
+ # Force UI update
1064
+ QApplication.processEvents()
1065
+
1066
+ def _update_progress(self, current: int, total: int) -> None:
1067
+ """Update progress bar and file count.
1068
+
1069
+ Args:
1070
+ current: Current number of files processed
1071
+ total: Total number of files
1072
+ """
1073
+ if total > 0:
1074
+ percentage = int((current / total) * 100)
1075
+ self.progress_bar.setValue(percentage)
1076
+ # Use format string to display current/total progress
1077
+ self.files_label.setText(f"{t('files_scanned')} {current}/{total}")
1078
+
1079
+ def _show_match_details(self) -> None:
1080
+ """Show details of selected match in the results table."""
1081
+ selected_rows = self.results_table.selectionModel().selectedRows()
1082
+ if not selected_rows or not self.scan_results:
1083
+ return
1084
+
1085
+ row = selected_rows[0].row()
1086
+ matches = self.scan_results.get("matches", [])
1087
+
1088
+ if row >= len(matches):
1089
+ return
1090
+
1091
+ match_data = matches[row]
1092
+ details = []
1093
+
1094
+ # File info
1095
+ details.append(f"File: {match_data.get('file_path', '')}")
1096
+ details.append(f"Type: {match_data.get('file_type', '')}")
1097
+ details.append(f"Size: {match_data.get('file_size', 0)} bytes\n")
1098
+
1099
+ # Match info
1100
+ for match in match_data.get("matches", []):
1101
+ details.append(f"Rule: {match.get('rule_name', '')}")
1102
+ details.append(f"Description: {match.get('rule_description', '')}")
1103
+ details.append(f"Line {match.get('line_number', 0)}: {match.get('match', '')}")
1104
+ details.append("\nContext:")
1105
+ for ctx_line in match.get("context", []):
1106
+ details.append(f" {ctx_line}")
1107
+ details.append("-" * 50)
1108
+
1109
+ self.details_text.setText("\n".join(details))
1110
+
1111
+ def _save_results(self) -> None:
1112
+ """Save scan results to JSON file."""
1113
+ if not self.scan_results:
1114
+ QMessageBox.warning(self, t("warning"), t("no_results_to_save"))
1115
+ return
1116
+
1117
+ default_name = f"scan_results_{datetime.now().strftime('%Y%m%d_%H%M%S')}.json"
1118
+ file_path, _ = QFileDialog.getSaveFileName(self, "Save Results", default_name, "JSON Files (*.json)")
1119
+
1120
+ if file_path:
1121
+ try:
1122
+ with open(file_path, "w", encoding="utf-8") as f:
1123
+ json.dump(self.scan_results, f, indent=2, ensure_ascii=False)
1124
+ self._log_message(t("results_saved_to", path=file_path))
1125
+ QMessageBox.information(self, t("success"), t("results_saved_to", path=file_path))
1126
+ except Exception as e:
1127
+ QMessageBox.critical(self, t("error"), t("failed_to_save_results", error=e))
1128
+
1129
+ def _open_results(self) -> None:
1130
+ """Open and load previously saved scan results from JSON file."""
1131
+ file_path, _ = QFileDialog.getOpenFileName(
1132
+ self,
1133
+ t("open_results_file", default="Open Scan Results"),
1134
+ str(Path.home()),
1135
+ t("json_files", default="JSON Files (*.json)"),
1136
+ )
1137
+
1138
+ if not file_path:
1139
+ return
1140
+
1141
+ try:
1142
+ with open(file_path, encoding="utf-8") as f:
1143
+ results = json.load(f)
1144
+
1145
+ # Validate the loaded data structure
1146
+ if not isinstance(results, dict) or "matches" not in results:
1147
+ raise ValueError("Invalid scan results file format")
1148
+
1149
+ # Clear current results and load the new ones
1150
+ self._clear_results()
1151
+ self.scan_results = results
1152
+
1153
+ # Update summary
1154
+ scan_info = results.get("scan_info", {})
1155
+ processed = scan_info.get("files_processed", scan_info.get("total_files", 0))
1156
+ self.files_label.setText(t("files_scanned").replace("0", f"{processed}/{scan_info.get('total_files', 0)}"))
1157
+ self.matches_label.setText(f"{t('files_with_matches')} {scan_info.get('files_with_matches', 0)}")
1158
+
1159
+ # Update progress bar to 100% since this is completed work
1160
+ self.progress_bar.setValue(100)
1161
+
1162
+ # Populate results table
1163
+ matches = results.get("matches", [])
1164
+ self.results_table.setRowCount(len(matches))
1165
+
1166
+ for row, match_data in enumerate(matches):
1167
+ file_path = match_data.get("file_path", "")
1168
+ file_type = match_data.get("file_type", "")
1169
+ match_count = len(match_data.get("matches", []))
1170
+ proc_time = match_data.get("metadata", {}).get("processing_time_seconds", 0)
1171
+
1172
+ self.results_table.setItem(row, 0, QTableWidgetItem(Path(file_path).name))
1173
+ self.results_table.setItem(row, 1, QTableWidgetItem(file_type))
1174
+ self.results_table.setItem(row, 2, QTableWidgetItem(str(match_count)))
1175
+ self.results_table.setItem(row, 3, QTableWidgetItem(f"{proc_time:.3f}"))
1176
+
1177
+ # Enable save menu action since we now have results
1178
+ if self.save_action:
1179
+ self.save_action.setEnabled(True)
1180
+
1181
+ # Log the action
1182
+ self._log_message(t("loaded_results_from", path=file_path))
1183
+ QMessageBox.information(self, t("success"), t("results_loaded_successfully", path=file_path))
1184
+
1185
+ except Exception as e:
1186
+ QMessageBox.critical(self, t("error"), t("failed_to_load_results", error=str(e)))
1187
+
1188
+ def _clear_results(self) -> None:
1189
+ """Clear all results and logs."""
1190
+ self.scan_results = None
1191
+ self.log_text.clear()
1192
+ self.results_table.setRowCount(0)
1193
+ self.details_text.clear()
1194
+ self.files_label.setText(t("files_scanned_zero"))
1195
+ self.matches_label.setText(t("files_with_matches_zero"))
1196
+ self.progress_bar.setValue(0)
1197
+
1198
+ # Disable save menu action after clearing results
1199
+ if self.save_action:
1200
+ self.save_action.setEnabled(False)
1201
+
1202
+ def _log_message(self, message: str) -> None:
1203
+ """Add message to log text area.
1204
+
1205
+ Args:
1206
+ message: Message to log
1207
+ """
1208
+ timestamp = datetime.now().strftime("%H:%M:%S")
1209
+ self.log_text.append(f"[{timestamp}] {message}")
1210
+
1211
+ def _load_config(self) -> None:
1212
+ """Load configuration and restore UI state."""
1213
+ # Restore window size and position
1214
+ width = self.config_manager.get("window_width", 1000)
1215
+ height = self.config_manager.get("window_height", 700)
1216
+ x = self.config_manager.get("window_x", 100)
1217
+ y = self.config_manager.get("window_y", 100)
1218
+ self.resize(width, height)
1219
+ self.move(x, y)
1220
+
1221
+ # Restore input directory
1222
+ input_dir = self.config_manager.get("input_directory", str(Path.cwd()))
1223
+ self.dir_edit.setText(input_dir)
1224
+
1225
+ # Restore rules file
1226
+ rules_file = self.config_manager.get("rules_file", "rules.json")
1227
+ self.rules_edit.setText(rules_file)
1228
+
1229
+ # Restore file types
1230
+ file_types = self.config_manager.get("file_types", "pdf,docx,xlsx,pptx,txt,odt,rtf,epub,csv,xml,html,md")
1231
+ self.types_edit.setText(file_types)
1232
+
1233
+ # Validate rules file after loading config
1234
+ self._validate_rules_file()
1235
+
1236
+ def _save_config(self) -> None:
1237
+ """Save current UI state to configuration."""
1238
+ # Save window size and position
1239
+ self.config_manager.set("window_width", self.width())
1240
+ self.config_manager.set("window_height", self.height())
1241
+ self.config_manager.set("window_x", self.x())
1242
+ self.config_manager.set("window_y", self.y())
1243
+
1244
+ # Save input directory
1245
+ input_dir = self.dir_edit.text()
1246
+ self.config_manager.set("input_directory", input_dir)
1247
+ self.config_manager.add_recent_directory(input_dir)
1248
+
1249
+ # Save rules file
1250
+ rules_file = self.rules_edit.text()
1251
+ self.config_manager.set("rules_file", rules_file)
1252
+ self.config_manager.add_recent_rules_file(rules_file)
1253
+
1254
+ # Save file types
1255
+ self.config_manager.set("file_types", self.types_edit.text())
1256
+
1257
+ # Persist to file
1258
+ self.config_manager.save_config()
1259
+
1260
+ def _setup_close_handler(self) -> None:
1261
+ """Set up window close event handler."""
1262
+ # Override closeEvent to save config before closing
1263
+ original_close = self.closeEvent
1264
+
1265
+ def close_event(event):
1266
+ """Handle close event by saving config."""
1267
+ self._save_config()
1268
+ original_close(event)
1269
+
1270
+ self.closeEvent = close_event
1271
+
1272
+
1273
+ def main():
1274
+ """Main entry point for GUI application."""
1275
+ app = QApplication(sys.argv)
1276
+ window = DocScanGUI()
1277
+ window.show()
1278
+ sys.exit(app.exec_())
1279
+
1280
+
1281
+ if __name__ == "__main__":
1282
+ main()