pysfi 0.1.12__py3-none-any.whl → 0.1.13__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -129,7 +129,7 @@ class ConfigManager:
129
129
  """
130
130
  if config_file is None:
131
131
  # Use user home directory for config
132
- config_dir = Path.home() / ".sfi"
132
+ config_dir = Path.home() / ".pysfi"
133
133
  config_dir.mkdir(exist_ok=True)
134
134
  config_file = config_dir / "docscan_gui.json"
135
135
  self.config_file = config_file
@@ -286,7 +286,9 @@ class SettingsDialog(QDialog):
286
286
  layout = QVBoxLayout()
287
287
 
288
288
  # Language Settings Group
289
- self.language_group = QGroupBox(t("language_settings", default="Language Settings"))
289
+ self.language_group = QGroupBox(
290
+ t("language_settings", default="Language Settings")
291
+ )
290
292
  language_layout = QHBoxLayout()
291
293
  language_layout.setSpacing(10)
292
294
 
@@ -302,13 +304,18 @@ class SettingsDialog(QDialog):
302
304
  layout.addWidget(self.language_group)
303
305
 
304
306
  # Processing Options Group
305
- processing_group = QGroupBox(t("processing_options", default="Processing Options"))
307
+ processing_group = QGroupBox(
308
+ t("processing_options", default="Processing Options")
309
+ )
306
310
  processing_layout = QVBoxLayout()
307
311
  processing_layout.setSpacing(10)
308
312
 
309
313
  self.ocr_checkbox = QCheckBox(t("use_pdf_ocr"))
310
314
  self.ocr_checkbox.setToolTip(
311
- t("ocr_tooltip", default="Enable OCR for scanned PDF files to extract text from images")
315
+ t(
316
+ "ocr_tooltip",
317
+ default="Enable OCR for scanned PDF files to extract text from images",
318
+ )
312
319
  )
313
320
 
314
321
  self.process_pool_checkbox = QCheckBox(t("use_process_pool"))
@@ -325,7 +332,9 @@ class SettingsDialog(QDialog):
325
332
  layout.addWidget(processing_group)
326
333
 
327
334
  # Performance Settings Group
328
- performance_group = QGroupBox(t("performance_settings", default="Performance Settings"))
335
+ performance_group = QGroupBox(
336
+ t("performance_settings", default="Performance Settings")
337
+ )
329
338
  performance_layout = QFormLayout()
330
339
  performance_layout.setSpacing(12)
331
340
 
@@ -335,7 +344,10 @@ class SettingsDialog(QDialog):
335
344
  self.thread_spin.setMaximum(16)
336
345
  self.thread_spin.setValue(4)
337
346
  self.thread_spin.setToolTip(
338
- t("threads_tooltip", default="Number of worker threads (higher values may improve speed but use more CPU)")
347
+ t(
348
+ "threads_tooltip",
349
+ default="Number of worker threads (higher values may improve speed but use more CPU)",
350
+ )
339
351
  )
340
352
  performance_layout.addRow(t("threads"), self.thread_spin)
341
353
 
@@ -367,7 +379,9 @@ class SettingsDialog(QDialog):
367
379
 
368
380
  # OK and Cancel buttons
369
381
  button_box = QDialogButtonBox()
370
- button_box.setStandardButtons(QDialogButtonBox.StandardButton.Ok | QDialogButtonBox.StandardButton.Cancel) # type: ignore
382
+ button_box.setStandardButtons(
383
+ QDialogButtonBox.StandardButton.Ok | QDialogButtonBox.StandardButton.Cancel
384
+ ) # type: ignore
371
385
  button_box.accepted.connect(self.accept) # type: ignore
372
386
  button_box.rejected.connect(self.reject) # type: ignore
373
387
  button_layout.addWidget(button_box)
@@ -388,7 +402,9 @@ class SettingsDialog(QDialog):
388
402
 
389
403
  # Update dialog title and UI elements immediately
390
404
  self.setWindowTitle(t("scan_options_tab"))
391
- self.language_group.setTitle(t("language_settings", default="Language Settings"))
405
+ self.language_group.setTitle(
406
+ t("language_settings", default="Language Settings")
407
+ )
392
408
  self.lang_label.setText(t("language_label", default="Language:"))
393
409
 
394
410
  # Update other group boxes
@@ -397,11 +413,17 @@ class SettingsDialog(QDialog):
397
413
  if item.widget() and isinstance(item.widget(), QGroupBox):
398
414
  group_box = item.widget()
399
415
  if "Language" in group_box.title(): # type: ignore
400
- group_box.setTitle(t("language_settings", default="Language Settings")) # type: ignore
416
+ group_box.setTitle(
417
+ t("language_settings", default="Language Settings")
418
+ ) # type: ignore
401
419
  elif "Processing" in group_box.title(): # type: ignore
402
- group_box.setTitle(t("processing_options", default="Processing Options")) # type: ignore
420
+ group_box.setTitle(
421
+ t("processing_options", default="Processing Options")
422
+ ) # type: ignore
403
423
  elif "Performance" in group_box.title(): # type: ignore
404
- group_box.setTitle(t("performance_settings", default="Performance Settings")) # type: ignore
424
+ group_box.setTitle(
425
+ t("performance_settings", default="Performance Settings")
426
+ ) # type: ignore
405
427
 
406
428
  # Show temporary message in dialog
407
429
  logger.info(f"Language preview: {'中文' if lang == 'zh_CN' else 'English'}")
@@ -410,7 +432,9 @@ class SettingsDialog(QDialog):
410
432
  """Load current settings into dialog."""
411
433
  if self.config_manager:
412
434
  self.ocr_checkbox.setChecked(self.config_manager.get("use_pdf_ocr", False))
413
- self.process_pool_checkbox.setChecked(self.config_manager.get("use_process_pool", False))
435
+ self.process_pool_checkbox.setChecked(
436
+ self.config_manager.get("use_process_pool", False)
437
+ )
414
438
  self.thread_spin.setValue(self.config_manager.get("threads", 4))
415
439
  self.batch_spin.setValue(self.config_manager.get("batch_size", 50))
416
440
 
@@ -428,7 +452,9 @@ class SettingsDialog(QDialog):
428
452
  # Save settings to config
429
453
  if self.config_manager:
430
454
  self.config_manager.set("use_pdf_ocr", self.ocr_checkbox.isChecked())
431
- self.config_manager.set("use_process_pool", self.process_pool_checkbox.isChecked())
455
+ self.config_manager.set(
456
+ "use_process_pool", self.process_pool_checkbox.isChecked()
457
+ )
432
458
  self.config_manager.set("threads", self.thread_spin.value())
433
459
  self.config_manager.set("batch_size", self.batch_spin.value())
434
460
 
@@ -446,7 +472,9 @@ class SettingsDialog(QDialog):
446
472
  """Save settings when OK is clicked and apply language immediately."""
447
473
  if self.config_manager:
448
474
  self.config_manager.set("use_pdf_ocr", self.ocr_checkbox.isChecked())
449
- self.config_manager.set("use_process_pool", self.process_pool_checkbox.isChecked())
475
+ self.config_manager.set(
476
+ "use_process_pool", self.process_pool_checkbox.isChecked()
477
+ )
450
478
  self.config_manager.set("threads", self.thread_spin.value())
451
479
  self.config_manager.set("batch_size", self.batch_spin.value())
452
480
 
@@ -483,7 +511,9 @@ class SettingsDialog(QDialog):
483
511
  # Update menu bar
484
512
  self.main_window.file_menu.setTitle(t("file_menu", default="&File"))
485
513
  self.main_window.save_action.setText(t("save_results", default="&Save Results"))
486
- self.main_window.clear_action.setText(t("clear_results", default="&Clear Results"))
514
+ self.main_window.clear_action.setText(
515
+ t("clear_results", default="&Clear Results")
516
+ )
487
517
 
488
518
  # Update input section
489
519
  for i in range(self.main_window.centralWidget().layout().count()):
@@ -655,16 +685,25 @@ class DocScanGUI(QMainWindow):
655
685
  types_layout = QHBoxLayout()
656
686
  types_label = QLabel(t("file_types"))
657
687
  self.types_edit = QLineEdit(t("default_file_types"))
658
- self.types_edit.setToolTip(t("file_types_tooltip", default="File types to scan (comma separated)"))
688
+ self.types_edit.setToolTip(
689
+ t("file_types_tooltip", default="File types to scan (comma separated)")
690
+ )
659
691
  types_layout.addWidget(types_label)
660
692
  types_layout.addWidget(self.types_edit)
661
693
  input_layout.addLayout(types_layout)
662
694
 
663
695
  # Checkbox for including image formats
664
- self.include_images_checkbox = QCheckBox(t("include_image_formats", default="Include Image Formats"))
665
- self.include_images_checkbox.setChecked(self.config_manager.get("include_image_formats", False))
696
+ self.include_images_checkbox = QCheckBox(
697
+ t("include_image_formats", default="Include Image Formats")
698
+ )
699
+ self.include_images_checkbox.setChecked(
700
+ self.config_manager.get("include_image_formats", False)
701
+ )
666
702
  self.include_images_checkbox.setToolTip(
667
- t("include_image_formats_tooltip", default="Include image formats (jpg, jpeg, png, gif, bmp, tiff) in scan")
703
+ t(
704
+ "include_image_formats_tooltip",
705
+ default="Include image formats (jpg, jpeg, png, gif, bmp, tiff) in scan",
706
+ )
668
707
  )
669
708
  self.include_images_checkbox.stateChanged.connect(self._toggle_image_formats) # type: ignore
670
709
  input_layout.addWidget(self.include_images_checkbox)
@@ -680,14 +719,20 @@ class DocScanGUI(QMainWindow):
680
719
  if state == 2: # Checked
681
720
  # Add image formats if not already present
682
721
  current_types = self.types_edit.text()
683
- if not any(img_type in current_types for img_type in image_types.split(",")):
722
+ if not any(
723
+ img_type in current_types for img_type in image_types.split(",")
724
+ ):
684
725
  all_types = f"{base_types},{image_types}"
685
726
  self.types_edit.setText(all_types)
686
727
  else: # Unchecked
687
728
  # Remove image formats
688
729
  current_types = self.types_edit.text()
689
730
  types_list = [t.strip() for t in current_types.split(",")]
690
- filtered_types = [t for t in types_list if t not in ["jpg", "jpeg", "png", "gif", "bmp", "tiff"]]
731
+ filtered_types = [
732
+ t
733
+ for t in types_list
734
+ if t not in ["jpg", "jpeg", "png", "gif", "bmp", "tiff"]
735
+ ]
691
736
  self.types_edit.setText(",".join(filtered_types))
692
737
 
693
738
  def _create_actions_section(self, parent_layout: QVBoxLayout) -> None:
@@ -753,7 +798,12 @@ class DocScanGUI(QMainWindow):
753
798
  # Results table
754
799
  self.results_table = QTableWidget()
755
800
  self.results_table.setColumnCount(4)
756
- self.results_table.setHorizontalHeaderLabels([t("file"), t("type"), t("matches"), t("time")])
801
+ self.results_table.setHorizontalHeaderLabels([
802
+ t("file"),
803
+ t("type"),
804
+ t("matches"),
805
+ t("time"),
806
+ ])
757
807
  self.results_table.horizontalHeader().setStretchLastSection(True)
758
808
  results_layout.addWidget(QLabel(t("match_details")))
759
809
  results_layout.addWidget(self.results_table)
@@ -789,7 +839,9 @@ class DocScanGUI(QMainWindow):
789
839
  recent_dirs = self.config_manager.get("recent_directories", [])
790
840
  start_dir = recent_dirs[0] if recent_dirs else str(Path.cwd())
791
841
 
792
- dir_path = QFileDialog.getExistingDirectory(self, t("select_input_directory"), start_dir)
842
+ dir_path = QFileDialog.getExistingDirectory(
843
+ self, t("select_input_directory"), start_dir
844
+ )
793
845
  if dir_path:
794
846
  self.dir_edit.setText(str(Path(dir_path)))
795
847
 
@@ -803,11 +855,15 @@ class DocScanGUI(QMainWindow):
803
855
  input_dir = Path(dir_text)
804
856
  if input_dir.exists() and input_dir.is_dir():
805
857
  # Search for rules.json or rules*.json files
806
- rule_files = list(input_dir.glob("rules.json")) + list(input_dir.glob("rules*.json"))
858
+ rule_files = list(input_dir.glob("rules.json")) + list(
859
+ input_dir.glob("rules*.json")
860
+ )
807
861
 
808
862
  if rule_files:
809
863
  # Use the first matching file, prefer exact "rules.json"
810
- exact_match = next((f for f in rule_files if f.name == "rules.json"), None)
864
+ exact_match = next(
865
+ (f for f in rule_files if f.name == "rules.json"), None
866
+ )
811
867
  rules_file = exact_match if exact_match else rule_files[0]
812
868
  self.rules_edit.setText(str(rules_file.resolve()))
813
869
  # Validate rules file after directory change
@@ -820,9 +876,13 @@ class DocScanGUI(QMainWindow):
820
876
  """Open file browser dialog for rules file."""
821
877
  # Get recent rules files for initial path
822
878
  recent_files = self.config_manager.get("recent_rules_files", [])
823
- start_dir = str(Path(recent_files[0]).parent) if recent_files else str(Path.cwd())
879
+ start_dir = (
880
+ str(Path(recent_files[0]).parent) if recent_files else str(Path.cwd())
881
+ )
824
882
 
825
- file_path, _ = QFileDialog.getOpenFileName(self, t("select_rules_file"), start_dir, t("json_files"))
883
+ file_path, _ = QFileDialog.getOpenFileName(
884
+ self, t("select_rules_file"), start_dir, t("json_files")
885
+ )
826
886
  if file_path:
827
887
  self.rules_edit.setText(str(Path(file_path)))
828
888
  self._validate_rules_file()
@@ -856,7 +916,9 @@ class DocScanGUI(QMainWindow):
856
916
  """)
857
917
  if self.rules_validation_label:
858
918
  self.rules_validation_label.setText("✓")
859
- self.rules_validation_label.setStyleSheet("color: #4CAF50; font-weight: bold;")
919
+ self.rules_validation_label.setStyleSheet(
920
+ "color: #4CAF50; font-weight: bold;"
921
+ )
860
922
  else:
861
923
  # File doesn't exist - show red border and X
862
924
  self.rules_edit.setStyleSheet("""
@@ -868,7 +930,9 @@ class DocScanGUI(QMainWindow):
868
930
  """)
869
931
  if self.rules_validation_label:
870
932
  self.rules_validation_label.setText("✗")
871
- self.rules_validation_label.setStyleSheet("color: #F44336; font-weight: bold;")
933
+ self.rules_validation_label.setStyleSheet(
934
+ "color: #F44336; font-weight: bold;"
935
+ )
872
936
  except Exception:
873
937
  # Invalid path - show red border and X
874
938
  self.rules_edit.setStyleSheet("""
@@ -880,7 +944,9 @@ class DocScanGUI(QMainWindow):
880
944
  """)
881
945
  if self.rules_validation_label:
882
946
  self.rules_validation_label.setText("✗")
883
- self.rules_validation_label.setStyleSheet("color: #F44336; font-weight: bold;")
947
+ self.rules_validation_label.setStyleSheet(
948
+ "color: #F44336; font-weight: bold;"
949
+ )
884
950
 
885
951
  def _load_rules(self) -> list[Rule]:
886
952
  """Load rules from JSON file.
@@ -981,8 +1047,14 @@ class DocScanGUI(QMainWindow):
981
1047
  # Update summary
982
1048
  scan_info = results.get("scan_info", {})
983
1049
  processed = scan_info.get("files_processed", scan_info.get("total_files", 0))
984
- self.files_label.setText(t("files_scanned").replace("0", f"{processed}/{scan_info.get('total_files', 0)}"))
985
- self.matches_label.setText(f"{t('files_with_matches')} {scan_info.get('files_with_matches', 0)}")
1050
+ self.files_label.setText(
1051
+ t("files_scanned").replace(
1052
+ "0", f"{processed}/{scan_info.get('total_files', 0)}"
1053
+ )
1054
+ )
1055
+ self.matches_label.setText(
1056
+ f"{t('files_with_matches')} {scan_info.get('files_with_matches', 0)}"
1057
+ )
986
1058
 
987
1059
  # Update progress bar to 100%
988
1060
  self.progress_bar.setValue(100)
@@ -1005,9 +1077,15 @@ class DocScanGUI(QMainWindow):
1005
1077
  # Determine status: completed takes precedence if scan finished normally, otherwise stopped
1006
1078
  # A scan is considered completed if it processed files or was intentionally stopped after starting
1007
1079
  scan_info = results.get("scan_info", {})
1008
- files_processed = scan_info.get("files_processed", scan_info.get("total_files", 0))
1080
+ files_processed = scan_info.get(
1081
+ "files_processed", scan_info.get("total_files", 0)
1082
+ )
1009
1083
  was_stopped = results.get("stopped", False)
1010
- status = t("scan_completed") if files_processed > 0 or not was_stopped else t("scan_stopped")
1084
+ status = (
1085
+ t("scan_completed")
1086
+ if files_processed > 0 or not was_stopped
1087
+ else t("scan_stopped")
1088
+ )
1011
1089
  self._log_message(status)
1012
1090
  self._log_message(t("found_matches_files", count=len(matches)))
1013
1091
 
@@ -1100,7 +1178,9 @@ class DocScanGUI(QMainWindow):
1100
1178
  for match in match_data.get("matches", []):
1101
1179
  details.append(f"Rule: {match.get('rule_name', '')}")
1102
1180
  details.append(f"Description: {match.get('rule_description', '')}")
1103
- details.append(f"Line {match.get('line_number', 0)}: {match.get('match', '')}")
1181
+ details.append(
1182
+ f"Line {match.get('line_number', 0)}: {match.get('match', '')}"
1183
+ )
1104
1184
  details.append("\nContext:")
1105
1185
  for ctx_line in match.get("context", []):
1106
1186
  details.append(f" {ctx_line}")
@@ -1115,16 +1195,22 @@ class DocScanGUI(QMainWindow):
1115
1195
  return
1116
1196
 
1117
1197
  default_name = f"scan_results_{datetime.now().strftime('%Y%m%d_%H%M%S')}.json"
1118
- file_path, _ = QFileDialog.getSaveFileName(self, "Save Results", default_name, "JSON Files (*.json)")
1198
+ file_path, _ = QFileDialog.getSaveFileName(
1199
+ self, "Save Results", default_name, "JSON Files (*.json)"
1200
+ )
1119
1201
 
1120
1202
  if file_path:
1121
1203
  try:
1122
1204
  with open(file_path, "w", encoding="utf-8") as f:
1123
1205
  json.dump(self.scan_results, f, indent=2, ensure_ascii=False)
1124
1206
  self._log_message(t("results_saved_to", path=file_path))
1125
- QMessageBox.information(self, t("success"), t("results_saved_to", path=file_path))
1207
+ QMessageBox.information(
1208
+ self, t("success"), t("results_saved_to", path=file_path)
1209
+ )
1126
1210
  except Exception as e:
1127
- QMessageBox.critical(self, t("error"), t("failed_to_save_results", error=e))
1211
+ QMessageBox.critical(
1212
+ self, t("error"), t("failed_to_save_results", error=e)
1213
+ )
1128
1214
 
1129
1215
  def _open_results(self) -> None:
1130
1216
  """Open and load previously saved scan results from JSON file."""
@@ -1152,9 +1238,17 @@ class DocScanGUI(QMainWindow):
1152
1238
 
1153
1239
  # Update summary
1154
1240
  scan_info = results.get("scan_info", {})
1155
- processed = scan_info.get("files_processed", scan_info.get("total_files", 0))
1156
- self.files_label.setText(t("files_scanned").replace("0", f"{processed}/{scan_info.get('total_files', 0)}"))
1157
- self.matches_label.setText(f"{t('files_with_matches')} {scan_info.get('files_with_matches', 0)}")
1241
+ processed = scan_info.get(
1242
+ "files_processed", scan_info.get("total_files", 0)
1243
+ )
1244
+ self.files_label.setText(
1245
+ t("files_scanned").replace(
1246
+ "0", f"{processed}/{scan_info.get('total_files', 0)}"
1247
+ )
1248
+ )
1249
+ self.matches_label.setText(
1250
+ f"{t('files_with_matches')} {scan_info.get('files_with_matches', 0)}"
1251
+ )
1158
1252
 
1159
1253
  # Update progress bar to 100% since this is completed work
1160
1254
  self.progress_bar.setValue(100)
@@ -1167,9 +1261,13 @@ class DocScanGUI(QMainWindow):
1167
1261
  file_path = match_data.get("file_path", "")
1168
1262
  file_type = match_data.get("file_type", "")
1169
1263
  match_count = len(match_data.get("matches", []))
1170
- proc_time = match_data.get("metadata", {}).get("processing_time_seconds", 0)
1264
+ proc_time = match_data.get("metadata", {}).get(
1265
+ "processing_time_seconds", 0
1266
+ )
1171
1267
 
1172
- self.results_table.setItem(row, 0, QTableWidgetItem(Path(file_path).name))
1268
+ self.results_table.setItem(
1269
+ row, 0, QTableWidgetItem(Path(file_path).name)
1270
+ )
1173
1271
  self.results_table.setItem(row, 1, QTableWidgetItem(file_type))
1174
1272
  self.results_table.setItem(row, 2, QTableWidgetItem(str(match_count)))
1175
1273
  self.results_table.setItem(row, 3, QTableWidgetItem(f"{proc_time:.3f}"))
@@ -1180,10 +1278,14 @@ class DocScanGUI(QMainWindow):
1180
1278
 
1181
1279
  # Log the action
1182
1280
  self._log_message(t("loaded_results_from", path=file_path))
1183
- QMessageBox.information(self, t("success"), t("results_loaded_successfully", path=file_path))
1281
+ QMessageBox.information(
1282
+ self, t("success"), t("results_loaded_successfully", path=file_path)
1283
+ )
1184
1284
 
1185
1285
  except Exception as e:
1186
- QMessageBox.critical(self, t("error"), t("failed_to_load_results", error=str(e)))
1286
+ QMessageBox.critical(
1287
+ self, t("error"), t("failed_to_load_results", error=str(e))
1288
+ )
1187
1289
 
1188
1290
  def _clear_results(self) -> None:
1189
1291
  """Clear all results and logs."""
@@ -1227,7 +1329,9 @@ class DocScanGUI(QMainWindow):
1227
1329
  self.rules_edit.setText(rules_file)
1228
1330
 
1229
1331
  # Restore file types
1230
- file_types = self.config_manager.get("file_types", "pdf,docx,xlsx,pptx,txt,odt,rtf,epub,csv,xml,html,md")
1332
+ file_types = self.config_manager.get(
1333
+ "file_types", "pdf,docx,xlsx,pptx,txt,odt,rtf,epub,csv,xml,html,md"
1334
+ )
1231
1335
  self.types_edit.setText(file_types)
1232
1336
 
1233
1337
  # Validate rules file after loading config
File without changes