pysfi 0.1.12__py3-none-any.whl → 0.1.14__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (42) hide show
  1. {pysfi-0.1.12.dist-info → pysfi-0.1.14.dist-info}/METADATA +1 -1
  2. pysfi-0.1.14.dist-info/RECORD +68 -0
  3. {pysfi-0.1.12.dist-info → pysfi-0.1.14.dist-info}/entry_points.txt +3 -0
  4. sfi/__init__.py +19 -2
  5. sfi/alarmclock/__init__.py +3 -0
  6. sfi/alarmclock/alarmclock.py +23 -40
  7. sfi/bumpversion/__init__.py +3 -1
  8. sfi/bumpversion/bumpversion.py +64 -15
  9. sfi/cleanbuild/__init__.py +3 -0
  10. sfi/cleanbuild/cleanbuild.py +5 -1
  11. sfi/cli.py +25 -4
  12. sfi/condasetup/__init__.py +1 -0
  13. sfi/condasetup/condasetup.py +91 -76
  14. sfi/docdiff/__init__.py +1 -0
  15. sfi/docdiff/docdiff.py +3 -2
  16. sfi/docscan/__init__.py +1 -1
  17. sfi/docscan/docscan.py +78 -23
  18. sfi/docscan/docscan_gui.py +152 -48
  19. sfi/filedate/filedate.py +12 -5
  20. sfi/img2pdf/img2pdf.py +453 -0
  21. sfi/llmclient/llmclient.py +31 -8
  22. sfi/llmquantize/llmquantize.py +76 -37
  23. sfi/llmserver/__init__.py +1 -0
  24. sfi/llmserver/llmserver.py +63 -13
  25. sfi/makepython/makepython.py +1145 -201
  26. sfi/pdfsplit/pdfsplit.py +45 -12
  27. sfi/pyarchive/__init__.py +1 -0
  28. sfi/pyarchive/pyarchive.py +908 -278
  29. sfi/pyembedinstall/pyembedinstall.py +88 -89
  30. sfi/pylibpack/pylibpack.py +561 -463
  31. sfi/pyloadergen/pyloadergen.py +372 -218
  32. sfi/pypack/pypack.py +510 -959
  33. sfi/pyprojectparse/pyprojectparse.py +337 -40
  34. sfi/pysourcepack/__init__.py +1 -0
  35. sfi/pysourcepack/pysourcepack.py +210 -131
  36. sfi/quizbase/quizbase_gui.py +2 -2
  37. sfi/taskkill/taskkill.py +168 -59
  38. sfi/which/which.py +11 -3
  39. pysfi-0.1.12.dist-info/RECORD +0 -62
  40. sfi/workflowengine/workflowengine.py +0 -444
  41. {pysfi-0.1.12.dist-info → pysfi-0.1.14.dist-info}/WHEEL +0 -0
  42. /sfi/{workflowengine → img2pdf}/__init__.py +0 -0
@@ -3,92 +3,106 @@ from __future__ import annotations
3
3
  import argparse
4
4
  import logging
5
5
  import os
6
+ import subprocess
6
7
  from pathlib import Path
8
+ from typing import Final
7
9
 
8
10
  logging.basicConfig(level=logging.INFO, format="%(message)s")
9
11
  logger = logging.getLogger(__name__)
10
- cwd = Path.cwd()
11
-
12
- _CONDA_MIRROR_URLS: dict[str, frozenset[str]] = {
13
- "tsinghua": frozenset(
14
- [
15
- "https://mirrors.tuna.tsinghua.edu.cn/anaconda/pkgs/main/",
16
- "https://mirrors.tuna.tsinghua.edu.cn/anaconda/pkgs/free/",
17
- "https://mirrors.tuna.tsinghua.edu.cn/anaconda/pkgs/r/",
18
- "https://mirrors.tuna.tsinghua.edu.cn/anaconda/pkgs/msys2/",
19
- "https://mirrors.tuna.tsinghua.edu.cn/anaconda/pkgs/pro/",
20
- "https://mirrors.tuna.tsinghua.edu.cn/anaconda/cloud/conda-forge/",
21
- "https://mirrors.tuna.tsinghua.edu.cn/anaconda/cloud/bioconda/",
22
- "https://mirrors.tuna.tsinghua.edu.cn/anaconda/cloud/menpo/",
23
- "https://mirrors.tuna.tsinghua.edu.cn/anaconda/cloud/pytorch/",
24
- ]
25
- ),
26
- "ustc": frozenset(
27
- [
28
- "https://mirrors.ustc.edu.cn/anaconda/pkgs/main/",
29
- "https://mirrors.ustc.edu.cn/anaconda/pkgs/free/",
30
- "https://mirrors.ustc.edu.cn/anaconda/pkgs/r/",
31
- "https://mirrors.ustc.edu.cn/anaconda/pkgs/msys2/",
32
- "https://mirrors.ustc.edu.cn/anaconda/pkgs/pro/",
33
- "https://mirrors.ustc.edu.cn/anaconda/pkgs/dev/",
34
- "https://mirrors.ustc.edu.cn/anaconda/cloud/conda-forge/",
35
- "https://mirrors.ustc.edu.cn/anaconda/cloud/bioconda/",
36
- "https://mirrors.ustc.edu.cn/anaconda/cloud/menpo/",
37
- "https://mirrors.ustc.edu.cn/anaconda/cloud/pytorch/",
38
- ]
39
- ),
40
- "bsfu": frozenset(
41
- [
42
- "https://mirrors.bsfu.edu.cn/anaconda/pkgs/main/",
43
- "https://mirrors.bsfu.edu.cn/anaconda/pkgs/free/",
44
- "https://mirrors.bsfu.edu.cn/anaconda/pkgs/r/",
45
- "https://mirrors.bsfu.edu.cn/anaconda/pkgs/msys2/",
46
- "https://mirrors.bsfu.edu.cn/anaconda/pkgs/pro/",
47
- "https://mirrors.bsfu.edu.cn/anaconda/pkgs/dev/",
48
- "https://mirrors.bsfu.edu.cn/anaconda/cloud/conda-forge/",
49
- "https://mirrors.bsfu.edu.cn/anaconda/cloud/bioconda/",
50
- "https://mirrors.bsfu.edu.cn/anaconda/cloud/menpo/",
51
- "https://mirrors.bsfu.edu.cn/anaconda/cloud/pytorch/",
52
- ]
53
- ),
54
- "aliyun": frozenset(
55
- [
56
- "https://mirrors.aliyun.com/anaconda/pkgs/main/",
57
- "https://mirrors.aliyun.com/anaconda/pkgs/free/",
58
- "https://mirrors.aliyun.com/anaconda/pkgs/r/",
59
- "https://mirrors.aliyun.com/anaconda/pkgs/msys2/",
60
- "https://mirrors.aliyun.com/anaconda/pkgs/pro/",
61
- "https://mirrors.aliyun.com/anaconda/pkgs/dev/",
62
- "https://mirrors.aliyun.com/anaconda/cloud/conda-forge/",
63
- "https://mirrors.aliyun.com/anaconda/cloud/bioconda/",
64
- "https://mirrors.aliyun.com/anaconda/cloud/menpo/",
65
- "https://mirrors.aliyun.com/anaconda/cloud/pytorch/",
66
- ]
67
- ),
12
+
13
+ # Conda mirror URLs
14
+ _CONDA_MIRROR_URLS: Final[dict[str, frozenset[str]]] = {
15
+ "tsinghua": frozenset([
16
+ "https://mirrors.tuna.tsinghua.edu.cn/anaconda/pkgs/main/",
17
+ "https://mirrors.tuna.tsinghua.edu.cn/anaconda/pkgs/free/",
18
+ "https://mirrors.tuna.tsinghua.edu.cn/anaconda/pkgs/r/",
19
+ "https://mirrors.tuna.tsinghua.edu.cn/anaconda/pkgs/msys2/",
20
+ "https://mirrors.tuna.tsinghua.edu.cn/anaconda/pkgs/pro/",
21
+ "https://mirrors.tuna.tsinghua.edu.cn/anaconda/cloud/conda-forge/",
22
+ "https://mirrors.tuna.tsinghua.edu.cn/anaconda/cloud/bioconda/",
23
+ "https://mirrors.tuna.tsinghua.edu.cn/anaconda/cloud/menpo/",
24
+ "https://mirrors.tuna.tsinghua.edu.cn/anaconda/cloud/pytorch/",
25
+ ]),
26
+ "ustc": frozenset([
27
+ "https://mirrors.ustc.edu.cn/anaconda/pkgs/main/",
28
+ "https://mirrors.ustc.edu.cn/anaconda/pkgs/free/",
29
+ "https://mirrors.ustc.edu.cn/anaconda/pkgs/r/",
30
+ "https://mirrors.ustc.edu.cn/anaconda/pkgs/msys2/",
31
+ "https://mirrors.ustc.edu.cn/anaconda/pkgs/pro/",
32
+ "https://mirrors.ustc.edu.cn/anaconda/pkgs/dev/",
33
+ "https://mirrors.ustc.edu.cn/anaconda/cloud/conda-forge/",
34
+ "https://mirrors.ustc.edu.cn/anaconda/cloud/bioconda/",
35
+ "https://mirrors.ustc.edu.cn/anaconda/cloud/menpo/",
36
+ "https://mirrors.ustc.edu.cn/anaconda/cloud/pytorch/",
37
+ ]),
38
+ "bsfu": frozenset([
39
+ "https://mirrors.bsfu.edu.cn/anaconda/pkgs/main/",
40
+ "https://mirrors.bsfu.edu.cn/anaconda/pkgs/free/",
41
+ "https://mirrors.bsfu.edu.cn/anaconda/pkgs/r/",
42
+ "https://mirrors.bsfu.edu.cn/anaconda/pkgs/msys2/",
43
+ "https://mirrors.bsfu.edu.cn/anaconda/pkgs/pro/",
44
+ "https://mirrors.bsfu.edu.cn/anaconda/pkgs/dev/",
45
+ "https://mirrors.bsfu.edu.cn/anaconda/cloud/conda-forge/",
46
+ "https://mirrors.bsfu.edu.cn/anaconda/cloud/bioconda/",
47
+ "https://mirrors.bsfu.edu.cn/anaconda/cloud/menpo/",
48
+ "https://mirrors.bsfu.edu.cn/anaconda/cloud/pytorch/",
49
+ ]),
50
+ "aliyun": frozenset([
51
+ "https://mirrors.aliyun.com/anaconda/pkgs/main/",
52
+ "https://mirrors.aliyun.com/anaconda/pkgs/free/",
53
+ "https://mirrors.aliyun.com/anaconda/pkgs/r/",
54
+ "https://mirrors.aliyun.com/anaconda/pkgs/msys2/",
55
+ "https://mirrors.aliyun.com/anaconda/pkgs/pro/",
56
+ "https://mirrors.aliyun.com/anaconda/pkgs/dev/",
57
+ "https://mirrors.aliyun.com/anaconda/cloud/conda-forge/",
58
+ "https://mirrors.aliyun.com/anaconda/cloud/bioconda/",
59
+ "https://mirrors.aliyun.com/anaconda/cloud/menpo/",
60
+ "https://mirrors.aliyun.com/anaconda/cloud/pytorch/",
61
+ ]),
68
62
  }
69
63
 
70
64
 
71
65
  def set_conda_mirror(mirror: str = "tsinghua") -> None:
72
- """Set the Conda mirror for the given channel."""
73
- if mirror in _CONDA_MIRROR_URLS:
74
- old_config = Path.home() / ".condarc"
75
- if old_config.exists():
76
- logger.info("Found existing .condarc file, backing it up")
77
- os.rename(old_config, Path.home() / ".condarc.bak")
78
- else:
79
- logger.debug("No existing .condarc file found")
80
-
81
- mirror_urls = _CONDA_MIRROR_URLS[mirror]
82
- for url in mirror_urls:
83
- logger.debug(f"Adding mirror: {url}")
84
- os.system(f"conda config --add channels {url}")
85
- os.system("conda config --set show_channel_urls yes")
86
- logger.info("Conda mirror set successfully")
87
- else:
66
+ """Set the Conda mirror for the given channel.
67
+
68
+ Args:
69
+ mirror: Mirror name (tsinghua, ustc, bsfu, or aliyun)
70
+ """
71
+ if mirror not in _CONDA_MIRROR_URLS:
88
72
  logger.error(f"Invalid mirror: {mirror}")
73
+ return
74
+
75
+ old_config = Path.home() / ".condarc"
76
+ if old_config.exists():
77
+ logger.info("Found existing .condarc file, backing it up")
78
+ os.rename(old_config, Path.home() / ".condarc.bak")
79
+ else:
80
+ logger.debug("No existing .condarc file found")
81
+
82
+ mirror_urls = _CONDA_MIRROR_URLS[mirror]
83
+ for url in mirror_urls:
84
+ logger.debug(f"Adding mirror: {url}")
85
+ try:
86
+ subprocess.run(["conda", "config", "--add", "channels", url], check=True)
87
+ except subprocess.CalledProcessError as e:
88
+ logger.error(f"Failed to add mirror {url}: {e}")
89
+ return
90
+
91
+ try:
92
+ subprocess.run(
93
+ ["conda", "config", "--set", "show_channel_urls", "yes"], check=True
94
+ )
95
+ logger.info("Conda mirror set successfully")
96
+ except subprocess.CalledProcessError as e:
97
+ logger.error(f"Failed to set show_channel_urls: {e}")
98
+
89
99
 
100
+ def parse_args() -> argparse.Namespace:
101
+ """Parse command line arguments.
90
102
 
91
- def parse_args():
103
+ Returns:
104
+ Parsed arguments
105
+ """
92
106
  parser = argparse.ArgumentParser(description="Setup Conda environment for SFI")
93
107
  parser.add_argument(
94
108
  "mirror",
@@ -103,7 +117,8 @@ def parse_args():
103
117
  return parser.parse_args()
104
118
 
105
119
 
106
- def main():
120
+ def main() -> None:
121
+ """Main entry point for condasetup CLI."""
107
122
  args = parse_args()
108
123
 
109
124
  if args.debug:
@@ -0,0 +1 @@
1
+
sfi/docdiff/docdiff.py CHANGED
@@ -10,9 +10,10 @@ import time
10
10
  from dataclasses import dataclass
11
11
  from functools import cached_property
12
12
  from pathlib import Path
13
- from typing import Any
13
+ from typing import Any, Final
14
14
 
15
- CONFIG_FILE = Path.home() / ".sfi" / "docdiff.json"
15
+ # Configuration file path
16
+ CONFIG_FILE: Final[Path] = Path.home() / ".pysfi" / "docdiff.json"
16
17
 
17
18
  logging.basicConfig(level=logging.INFO, format="%(message)s")
18
19
  logger = logging.getLogger(__name__)
sfi/docscan/__init__.py CHANGED
@@ -1,3 +1,3 @@
1
1
  """Document scanner module for scanning and extracting content from various document formats."""
2
2
 
3
- __version__ = "0.1.12"
3
+ __version__ = "0.1.14"
sfi/docscan/docscan.py CHANGED
@@ -91,7 +91,9 @@ def t(key: str, **kwargs) -> str:
91
91
  Returns:
92
92
  Translated text
93
93
  """
94
- text = ZH_TRANSLATIONS.get(key, key) if USE_CHINESE else EN_TRANSLATIONS.get(key, key)
94
+ text = (
95
+ ZH_TRANSLATIONS.get(key, key) if USE_CHINESE else EN_TRANSLATIONS.get(key, key)
96
+ )
95
97
 
96
98
  # Format with kwargs if provided
97
99
  if kwargs:
@@ -123,7 +125,9 @@ class Rule:
123
125
  # Use re.ASCII for faster matching when possible
124
126
  self.compiled_pattern = re.compile(self.pattern, flags | re.ASCII)
125
127
  except re.error as e:
126
- logger.warning(t("invalid_regex_pattern", pattern=self.pattern, error=e))
128
+ logger.warning(
129
+ t("invalid_regex_pattern", pattern=self.pattern, error=e)
130
+ )
127
131
  self.compiled_pattern = None
128
132
  else:
129
133
  self.compiled_pattern = None
@@ -274,13 +278,18 @@ class DocumentScanner:
274
278
  "use_pdf_ocr": self.use_pdf_ocr,
275
279
  "use_process_pool": self.use_process_pool,
276
280
  },
277
- "rules": [{"name": r.name, "pattern": r.pattern, "is_regex": r.is_regex} for r in self.rules],
281
+ "rules": [
282
+ {"name": r.name, "pattern": r.pattern, "is_regex": r.is_regex}
283
+ for r in self.rules
284
+ ],
278
285
  "matches": [],
279
286
  }
280
287
 
281
288
  # Scan files in parallel
282
289
  processed = 0
283
- executor_class = ProcessPoolExecutor if self.use_process_pool else ThreadPoolExecutor
290
+ executor_class = (
291
+ ProcessPoolExecutor if self.use_process_pool else ThreadPoolExecutor
292
+ )
284
293
  executor = executor_class(max_workers=threads)
285
294
  self._executor = executor # Keep reference for forced shutdown
286
295
 
@@ -350,10 +359,17 @@ class DocumentScanner:
350
359
  break
351
360
 
352
361
  try:
353
- file_result = future.result(timeout=1.0) # Short timeout to allow quick stop
362
+ file_result = future.result(
363
+ timeout=1.0
364
+ ) # Short timeout to allow quick stop
354
365
  if file_result and file_result["matches"]:
355
366
  results["matches"].append(file_result)
356
- logger.info(t("found_matches_in_file", file_name=Path(file_result.get("file_path", "")).name))
367
+ logger.info(
368
+ t(
369
+ "found_matches_in_file",
370
+ file_name=Path(file_result.get("file_path", "")).name,
371
+ )
372
+ )
357
373
  except TimeoutError:
358
374
  logger.warning(t("task_timeout_scan_may_be_stopping"))
359
375
  if self.stopped:
@@ -366,7 +382,9 @@ class DocumentScanner:
366
382
 
367
383
  # Report progress
368
384
  if show_progress and processed % 10 == 0:
369
- logger.info(t("progress_report", processed=processed, total=len(files)))
385
+ logger.info(
386
+ t("progress_report", processed=processed, total=len(files))
387
+ )
370
388
 
371
389
  # Call progress callback if set
372
390
  if self._progress_callback:
@@ -391,7 +409,9 @@ class DocumentScanner:
391
409
  if self.stopped:
392
410
  logger.info(t("scan_stopped_processed_files", processed=processed))
393
411
  else:
394
- logger.info(t("scan_complete_found_matches", matches_count=len(results["matches"])))
412
+ logger.info(
413
+ t("scan_complete_found_matches", matches_count=len(results["matches"]))
414
+ )
395
415
 
396
416
  return results
397
417
 
@@ -493,7 +513,9 @@ class DocumentScanner:
493
513
  return {}
494
514
 
495
515
  except Exception as e:
496
- logger.warning(t("could_not_extract_text_from_file", file_path=file_path, error=e))
516
+ logger.warning(
517
+ t("could_not_extract_text_from_file", file_path=file_path, error=e)
518
+ )
497
519
  return {}
498
520
 
499
521
  processing_time = time.perf_counter() - file_start_time
@@ -549,14 +571,18 @@ class DocumentScanner:
549
571
  try:
550
572
  return self._extract_pdf_fitz(file_path)
551
573
  except Exception as e:
552
- logger.warning(t("pymupdf_failed_for_file", file_name=file_path.name, error=e))
574
+ logger.warning(
575
+ t("pymupdf_failed_for_file", file_name=file_path.name, error=e)
576
+ )
553
577
 
554
578
  # Fallback to pypdf
555
579
  if pypdf is not None:
556
580
  try:
557
581
  return self._extract_pdf_pypdf(file_path)
558
582
  except Exception as e:
559
- logger.error(t("pypdf_also_failed_for_file", file_name=file_path.name, error=e))
583
+ logger.error(
584
+ t("pypdf_also_failed_for_file", file_name=file_path.name, error=e)
585
+ )
560
586
  return "", {}
561
587
 
562
588
  logger.warning(t("no_pdf_library_installed"))
@@ -632,7 +658,9 @@ class DocumentScanner:
632
658
  except Exception as e:
633
659
  if doc:
634
660
  doc.close()
635
- logger.warning(t("pymupdf_error_trying_fallback", file_path=file_path, error=e))
661
+ logger.warning(
662
+ t("pymupdf_error_trying_fallback", file_path=file_path, error=e)
663
+ )
636
664
  # Re-raise to trigger fallback to pypdf
637
665
  raise
638
666
 
@@ -764,8 +792,12 @@ class DocumentScanner:
764
792
  text_parts.append(text)
765
793
 
766
794
  metadata = {
767
- "title": book.get_metadata("DC", "title")[0][0] if book.get_metadata("DC", "title") else "", # pyright: ignore[reportAttributeAccessIssue]
768
- "author": book.get_metadata("DC", "creator")[0][0] if book.get_metadata("DC", "creator") else "", # pyright: ignore[reportAttributeAccessIssue]
795
+ "title": book.get_metadata("DC", "title")[0][0]
796
+ if book.get_metadata("DC", "title")
797
+ else "", # pyright: ignore[reportAttributeAccessIssue]
798
+ "author": book.get_metadata("DC", "creator")[0][0]
799
+ if book.get_metadata("DC", "creator")
800
+ else "", # pyright: ignore[reportAttributeAccessIssue]
769
801
  "format": "EPUB",
770
802
  }
771
803
 
@@ -810,7 +842,9 @@ class DocumentScanner:
810
842
  root = tree.getroot()
811
843
 
812
844
  # Extract all text content
813
- text_parts = [elem.text for elem in root.iter() if elem.text and elem.text.strip()]
845
+ text_parts = [
846
+ elem.text for elem in root.iter() if elem.text and elem.text.strip()
847
+ ]
814
848
  text = "\n".join(text_parts)
815
849
 
816
850
  metadata = {
@@ -954,7 +988,9 @@ class DocumentScanner:
954
988
  wb.close()
955
989
  return "", {}
956
990
 
957
- row_text = " | ".join(str(cell) if cell is not None else "" for cell in row)
991
+ row_text = " | ".join(
992
+ str(cell) if cell is not None else "" for cell in row
993
+ )
958
994
  if row_text.strip():
959
995
  text_parts.append(row_text)
960
996
 
@@ -1017,7 +1053,9 @@ class DocumentScanner:
1017
1053
 
1018
1054
  return text, metadata
1019
1055
  except Exception as e:
1020
- logger.warning(t("could_not_perform_ocr_on_file", file_path=file_path, error=e))
1056
+ logger.warning(
1057
+ t("could_not_perform_ocr_on_file", file_path=file_path, error=e)
1058
+ )
1021
1059
  return "", {}
1022
1060
 
1023
1061
  def _extract_text(self, file_path: Path) -> tuple[str, dict[str, Any]]:
@@ -1047,8 +1085,12 @@ def main():
1047
1085
  USE_CHINESE = temp_args.lang == "zh"
1048
1086
 
1049
1087
  parser = argparse.ArgumentParser(description=t("document_scanner_description"))
1050
- parser.add_argument("input", type=str, nargs="?", default=str(cwd), help=t("input_directory_help"))
1051
- parser.add_argument("-r", "--rules", type=str, default="rules.json", help=t("rules_file_help"))
1088
+ parser.add_argument(
1089
+ "input", type=str, nargs="?", default=str(cwd), help=t("input_directory_help")
1090
+ )
1091
+ parser.add_argument(
1092
+ "-r", "--rules", type=str, default="rules.json", help=t("rules_file_help")
1093
+ )
1052
1094
  parser.add_argument("--recursive", action="store_true", help=t("recursive_help"))
1053
1095
  parser.add_argument(
1054
1096
  "-f",
@@ -1056,7 +1098,9 @@ def main():
1056
1098
  help=t("file_types_help"),
1057
1099
  default="pdf,docx,xlsx,pptx,txt,odt,rtf,epub,csv,xml,html,md,jpg,jpeg,png,gif,bmp,tiff",
1058
1100
  )
1059
- parser.add_argument("--use-pdf-ocr", help=t("use_pdf_ocr_help"), action="store_true")
1101
+ parser.add_argument(
1102
+ "--use-pdf-ocr", help=t("use_pdf_ocr_help"), action="store_true"
1103
+ )
1060
1104
  parser.add_argument(
1061
1105
  "--use-process-pool",
1062
1106
  help=t("use_process_pool_help"),
@@ -1074,7 +1118,9 @@ def main():
1074
1118
  parser.add_argument("-v", "--verbose", help=t("verbose_help"), action="store_true")
1075
1119
 
1076
1120
  # 添加语言参数
1077
- parser.add_argument("--lang", help=t("language_help"), choices=["en", "zh"], default="zh")
1121
+ parser.add_argument(
1122
+ "--lang", help=t("language_help"), choices=["en", "zh"], default="zh"
1123
+ )
1078
1124
 
1079
1125
  args = parser.parse_args()
1080
1126
 
@@ -1129,11 +1175,20 @@ def main():
1129
1175
  file_types = [ft.strip() for ft in args.file_types.split(",")]
1130
1176
 
1131
1177
  # Create scanner and run scan
1132
- scanner = DocumentScanner(input_dir, rules, file_types, args.use_pdf_ocr, args.use_process_pool, args.batch_size)
1178
+ scanner = DocumentScanner(
1179
+ input_dir,
1180
+ rules,
1181
+ file_types,
1182
+ args.use_pdf_ocr,
1183
+ args.use_process_pool,
1184
+ args.batch_size,
1185
+ )
1133
1186
  results = scanner.scan(threads=args.threads, show_progress=args.progress)
1134
1187
 
1135
1188
  # Save results to JSON file in input directory
1136
- output_file = input_dir / f"scan_results_{datetime.now().strftime('%Y%m%d_%H%M%S')}.json"
1189
+ output_file = (
1190
+ input_dir / f"scan_results_{datetime.now().strftime('%Y%m%d_%H%M%S')}.json"
1191
+ )
1137
1192
  with open(output_file, "w", encoding="utf-8") as f:
1138
1193
  json.dump(results, f, indent=2, ensure_ascii=False)
1139
1194