pysfi 0.1.10__py3-none-any.whl → 0.1.12__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {pysfi-0.1.10.dist-info → pysfi-0.1.12.dist-info}/METADATA +9 -7
- pysfi-0.1.12.dist-info/RECORD +62 -0
- {pysfi-0.1.10.dist-info → pysfi-0.1.12.dist-info}/entry_points.txt +13 -2
- sfi/__init__.py +1 -1
- sfi/alarmclock/alarmclock.py +40 -40
- sfi/bumpversion/__init__.py +1 -1
- sfi/cleanbuild/cleanbuild.py +155 -0
- sfi/condasetup/condasetup.py +116 -0
- sfi/docdiff/docdiff.py +238 -0
- sfi/docscan/__init__.py +1 -1
- sfi/docscan/docscan_gui.py +1 -1
- sfi/docscan/lang/eng.py +152 -152
- sfi/docscan/lang/zhcn.py +170 -170
- sfi/filedate/filedate.py +185 -112
- sfi/gittool/__init__.py +2 -0
- sfi/gittool/gittool.py +401 -0
- sfi/llmclient/llmclient.py +592 -0
- sfi/llmquantize/llmquantize.py +480 -0
- sfi/llmserver/llmserver.py +335 -0
- sfi/makepython/makepython.py +2 -2
- sfi/pdfsplit/pdfsplit.py +4 -4
- sfi/pyarchive/pyarchive.py +418 -0
- sfi/pyembedinstall/__init__.py +0 -0
- sfi/pyembedinstall/pyembedinstall.py +629 -0
- sfi/pylibpack/pylibpack.py +813 -269
- sfi/pylibpack/rules/numpy.json +22 -0
- sfi/pylibpack/rules/pymupdf.json +10 -0
- sfi/pylibpack/rules/pyqt5.json +19 -0
- sfi/pylibpack/rules/pyside2.json +23 -0
- sfi/pylibpack/rules/scipy.json +23 -0
- sfi/pylibpack/rules/shiboken2.json +24 -0
- sfi/pyloadergen/pyloadergen.py +271 -572
- sfi/pypack/pypack.py +822 -471
- sfi/pyprojectparse/__init__.py +0 -0
- sfi/pyprojectparse/pyprojectparse.py +500 -0
- sfi/pysourcepack/pysourcepack.py +308 -369
- sfi/quizbase/__init__.py +0 -0
- sfi/quizbase/quizbase.py +828 -0
- sfi/quizbase/quizbase_gui.py +987 -0
- sfi/regexvalidate/__init__.py +0 -0
- sfi/regexvalidate/regex_help.html +284 -0
- sfi/regexvalidate/regexvalidate.py +468 -0
- sfi/taskkill/taskkill.py +0 -2
- pysfi-0.1.10.dist-info/RECORD +0 -39
- sfi/embedinstall/embedinstall.py +0 -478
- sfi/projectparse/projectparse.py +0 -152
- {pysfi-0.1.10.dist-info → pysfi-0.1.12.dist-info}/WHEEL +0 -0
- /sfi/{embedinstall → llmclient}/__init__.py +0 -0
- /sfi/{projectparse → llmquantize}/__init__.py +0 -0
sfi/docdiff/docdiff.py
ADDED
|
@@ -0,0 +1,238 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
import argparse
|
|
4
|
+
import atexit
|
|
5
|
+
import json
|
|
6
|
+
import logging
|
|
7
|
+
import platform
|
|
8
|
+
import subprocess
|
|
9
|
+
import time
|
|
10
|
+
from dataclasses import dataclass
|
|
11
|
+
from functools import cached_property
|
|
12
|
+
from pathlib import Path
|
|
13
|
+
from typing import Any
|
|
14
|
+
|
|
15
|
+
CONFIG_FILE = Path.home() / ".sfi" / "docdiff.json"
|
|
16
|
+
|
|
17
|
+
logging.basicConfig(level=logging.INFO, format="%(message)s")
|
|
18
|
+
logger = logging.getLogger(__name__)
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
@dataclass
|
|
22
|
+
class DocDiffConfig:
|
|
23
|
+
"""Document comparison configuration."""
|
|
24
|
+
|
|
25
|
+
DOC_DIFF_TITLE: str = "Comparison Result"
|
|
26
|
+
OUTPUT_DIR: str = str(Path.home()) # Use current directory if empty
|
|
27
|
+
COMPARE_MODE: str = "original" # Options: original, revised
|
|
28
|
+
SHOW_CHANGES: bool = True
|
|
29
|
+
TRACK_REVISIONS: bool = True
|
|
30
|
+
|
|
31
|
+
def __init__(self) -> None:
|
|
32
|
+
if CONFIG_FILE.exists():
|
|
33
|
+
logger.info("Loading configuration from %s", CONFIG_FILE)
|
|
34
|
+
config_data = json.loads(CONFIG_FILE.read_text())
|
|
35
|
+
# Update configuration items, keeping defaults as fallback
|
|
36
|
+
for key, value in config_data.items():
|
|
37
|
+
if hasattr(self, key):
|
|
38
|
+
setattr(self, key, value)
|
|
39
|
+
else:
|
|
40
|
+
logger.info("Using default configuration")
|
|
41
|
+
|
|
42
|
+
def save(self) -> None:
|
|
43
|
+
"""Save configuration."""
|
|
44
|
+
CONFIG_FILE.parent.mkdir(parents=True, exist_ok=True)
|
|
45
|
+
CONFIG_FILE.write_text(json.dumps(vars(self), indent=4))
|
|
46
|
+
|
|
47
|
+
|
|
48
|
+
conf = DocDiffConfig()
|
|
49
|
+
atexit.register(conf.save)
|
|
50
|
+
|
|
51
|
+
|
|
52
|
+
@dataclass(frozen=True)
|
|
53
|
+
class DiffDocCommand:
|
|
54
|
+
"""Document comparison command."""
|
|
55
|
+
|
|
56
|
+
old_doc: Path
|
|
57
|
+
new_doc: Path
|
|
58
|
+
output_path: Path | None = None
|
|
59
|
+
|
|
60
|
+
def run(self) -> None:
|
|
61
|
+
"""Run the document comparison command."""
|
|
62
|
+
if platform.system() != "Windows":
|
|
63
|
+
logger.error("This tool is only available on Windows.")
|
|
64
|
+
return
|
|
65
|
+
if not self.old_doc.exists():
|
|
66
|
+
logger.error(f"Old file does not exist: {self.old_doc}")
|
|
67
|
+
return
|
|
68
|
+
|
|
69
|
+
if not self.new_doc.exists():
|
|
70
|
+
logger.error(f"New file does not exist: {self.new_doc}")
|
|
71
|
+
return
|
|
72
|
+
|
|
73
|
+
if not self.validate_files:
|
|
74
|
+
logger.error("Invalid file paths or extensions")
|
|
75
|
+
return
|
|
76
|
+
|
|
77
|
+
if self.word_app is None:
|
|
78
|
+
logger.error("Word application is not available")
|
|
79
|
+
return
|
|
80
|
+
|
|
81
|
+
if self.compare_data is None:
|
|
82
|
+
return
|
|
83
|
+
|
|
84
|
+
self.output.parent.mkdir(parents=True, exist_ok=True)
|
|
85
|
+
try:
|
|
86
|
+
self.compare_data.SaveAs2(str(self.output))
|
|
87
|
+
self.compare_data.Close()
|
|
88
|
+
except Exception as e:
|
|
89
|
+
logger.exception(f"Comparison failed: {e}")
|
|
90
|
+
else:
|
|
91
|
+
logger.info(f"Comparison completed. Saved to: {self.output}")
|
|
92
|
+
finally:
|
|
93
|
+
try:
|
|
94
|
+
self.word_app.Documents.Close(SaveChanges=False)
|
|
95
|
+
except Exception:
|
|
96
|
+
logger.exception("Close document failed!")
|
|
97
|
+
else:
|
|
98
|
+
self.word_app.Quit()
|
|
99
|
+
|
|
100
|
+
try:
|
|
101
|
+
subprocess.run(
|
|
102
|
+
["taskkill", "/f", "/t", "/im", "WINWORD.EXE"], check=False
|
|
103
|
+
)
|
|
104
|
+
except Exception:
|
|
105
|
+
logger.exception("Taskkill failed!")
|
|
106
|
+
else:
|
|
107
|
+
logger.info("Taskkill completed successfully")
|
|
108
|
+
|
|
109
|
+
@cached_property
|
|
110
|
+
def word_app(self) -> Any:
|
|
111
|
+
try:
|
|
112
|
+
import win32com.client as win32 # type: ignore
|
|
113
|
+
except ImportError:
|
|
114
|
+
logger.exception("win32com.client is not installed, exiting.")
|
|
115
|
+
raise
|
|
116
|
+
else:
|
|
117
|
+
logger.info("Started Word application")
|
|
118
|
+
app = win32.gencache.EnsureDispatch("Word.Application") # type: ignore
|
|
119
|
+
app.Visible = False
|
|
120
|
+
app.DisplayAlerts = False
|
|
121
|
+
|
|
122
|
+
try:
|
|
123
|
+
app.Options.TrackRevisions = conf.TRACK_REVISIONS
|
|
124
|
+
except AttributeError:
|
|
125
|
+
logger.warning(
|
|
126
|
+
"TrackRevisions option not available in this Word version"
|
|
127
|
+
)
|
|
128
|
+
|
|
129
|
+
return app
|
|
130
|
+
|
|
131
|
+
@cached_property
|
|
132
|
+
def validate_files(self) -> bool:
|
|
133
|
+
return all([
|
|
134
|
+
self.old_doc.exists(),
|
|
135
|
+
self.new_doc.exists(),
|
|
136
|
+
self.old_doc.suffix.lower() in [".doc", ".docx"],
|
|
137
|
+
self.new_doc.suffix.lower() in [".doc", ".docx"],
|
|
138
|
+
])
|
|
139
|
+
|
|
140
|
+
@cached_property
|
|
141
|
+
def compare_data(self) -> Any:
|
|
142
|
+
try:
|
|
143
|
+
compared = self.word_app.CompareDocuments(
|
|
144
|
+
self.old_doc_data,
|
|
145
|
+
self.new_doc_data,
|
|
146
|
+
0,
|
|
147
|
+
2 if conf.COMPARE_MODE == "revised" else 0,
|
|
148
|
+
True,
|
|
149
|
+
)
|
|
150
|
+
except Exception as e:
|
|
151
|
+
logger.exception(f"Comparison failed: {e}")
|
|
152
|
+
return None
|
|
153
|
+
else:
|
|
154
|
+
if compared:
|
|
155
|
+
logger.info("Comparison completed successfully")
|
|
156
|
+
compared.ShowRevisions = conf.SHOW_CHANGES
|
|
157
|
+
return compared
|
|
158
|
+
return None
|
|
159
|
+
|
|
160
|
+
@cached_property
|
|
161
|
+
def old_doc_data(self) -> Any:
|
|
162
|
+
logger.info(f"Opening old file: {self.old_doc}")
|
|
163
|
+
return self.word_app.Documents.Open(str(self.old_doc.resolve()))
|
|
164
|
+
|
|
165
|
+
@cached_property
|
|
166
|
+
def new_doc_data(self) -> Any:
|
|
167
|
+
logger.info(f"Opening new file: {self.new_doc}")
|
|
168
|
+
return self.word_app.Documents.Open(str(self.new_doc.resolve()))
|
|
169
|
+
|
|
170
|
+
@cached_property
|
|
171
|
+
def output(self) -> Path:
|
|
172
|
+
"""Determine the output directory for the comparison result."""
|
|
173
|
+
output_filename = (
|
|
174
|
+
f"{conf.DOC_DIFF_TITLE}@{time.strftime('%Y%m%d_%H_%M_%S')}.docx"
|
|
175
|
+
)
|
|
176
|
+
|
|
177
|
+
if self.output_path is None:
|
|
178
|
+
output_dir = (
|
|
179
|
+
Path(conf.OUTPUT_DIR) if conf.OUTPUT_DIR else self.new_doc.parent
|
|
180
|
+
)
|
|
181
|
+
return output_dir / output_filename
|
|
182
|
+
|
|
183
|
+
if self.output_path.is_dir():
|
|
184
|
+
return self.output_path / output_filename
|
|
185
|
+
elif self.output_path.is_file():
|
|
186
|
+
return self.output_path
|
|
187
|
+
else:
|
|
188
|
+
raise ValueError(f"Invalid output path: {self.output_path}")
|
|
189
|
+
|
|
190
|
+
|
|
191
|
+
def parse_args():
|
|
192
|
+
parser = argparse.ArgumentParser(description="Compare two doc/docx files.")
|
|
193
|
+
parser.add_argument(
|
|
194
|
+
"files", nargs=2, help="Two input files to compare (old_file new_file)"
|
|
195
|
+
)
|
|
196
|
+
parser.add_argument(
|
|
197
|
+
"-o", "--output", dest="output", default=".", help="Output file path"
|
|
198
|
+
)
|
|
199
|
+
parser.add_argument("--title", help="Title for the comparison result")
|
|
200
|
+
parser.add_argument(
|
|
201
|
+
"--show-changes", action="store_true", help="Show changes in the comparison"
|
|
202
|
+
)
|
|
203
|
+
parser.add_argument(
|
|
204
|
+
"--hide-changes", action="store_true", help="Hide changes in the comparison"
|
|
205
|
+
)
|
|
206
|
+
parser.add_argument(
|
|
207
|
+
"--compare-mode",
|
|
208
|
+
choices=["original", "revised"],
|
|
209
|
+
help="Compare mode: original or revised",
|
|
210
|
+
)
|
|
211
|
+
parser.add_argument("--output-dir", help="Output directory for the result file")
|
|
212
|
+
|
|
213
|
+
args = parser.parse_args()
|
|
214
|
+
|
|
215
|
+
# Update configuration from command line arguments
|
|
216
|
+
if args.title:
|
|
217
|
+
conf.DOC_DIFF_TITLE = args.title
|
|
218
|
+
if args.show_changes:
|
|
219
|
+
conf.SHOW_CHANGES = True
|
|
220
|
+
if args.hide_changes:
|
|
221
|
+
conf.SHOW_CHANGES = False
|
|
222
|
+
if args.compare_mode:
|
|
223
|
+
conf.COMPARE_MODE = args.compare_mode
|
|
224
|
+
if args.output_dir:
|
|
225
|
+
conf.OUTPUT_DIR = args.output_dir
|
|
226
|
+
|
|
227
|
+
return args
|
|
228
|
+
|
|
229
|
+
|
|
230
|
+
def main() -> None:
|
|
231
|
+
"""Compare two doc/docx files."""
|
|
232
|
+
args = parse_args()
|
|
233
|
+
|
|
234
|
+
DiffDocCommand(
|
|
235
|
+
Path(args.files[0]),
|
|
236
|
+
Path(args.files[1]),
|
|
237
|
+
Path(args.output),
|
|
238
|
+
).run()
|
sfi/docscan/__init__.py
CHANGED
sfi/docscan/docscan_gui.py
CHANGED
sfi/docscan/lang/eng.py
CHANGED
|
@@ -1,152 +1,152 @@
|
|
|
1
|
-
"""English translation defaults for docscan GUI."""
|
|
2
|
-
|
|
3
|
-
ENGLISH_DEFAULTS = {
|
|
4
|
-
"window_title": "Document Scanner GUI",
|
|
5
|
-
"input_config_tab": "Input Configuration",
|
|
6
|
-
"scan_options_tab": "Scan Options",
|
|
7
|
-
"input_directory": "Input Directory:",
|
|
8
|
-
"browse": "Browse...",
|
|
9
|
-
"rules_file": "Rules File:",
|
|
10
|
-
"file_types": "File Types:",
|
|
11
|
-
"use_pdf_ocr": "Use PDF OCR",
|
|
12
|
-
"use_process_pool": "Use Process Pool (CPU-intensive)",
|
|
13
|
-
"threads": "Threads:",
|
|
14
|
-
"batch_size": "Batch Size:",
|
|
15
|
-
"start_scan": "Start Scan",
|
|
16
|
-
"pause": "Pause",
|
|
17
|
-
"resume": "Resume",
|
|
18
|
-
"stop": "Stop",
|
|
19
|
-
"save_results": "Save Results",
|
|
20
|
-
"clear_results": "Clear Results",
|
|
21
|
-
"results": "Results",
|
|
22
|
-
"files_scanned": "Files Scanned:",
|
|
23
|
-
"files_with_matches": "Files with Matches:",
|
|
24
|
-
"progress_log": "Progress Log:",
|
|
25
|
-
"match_details": "Match Details:",
|
|
26
|
-
"selected_match_context": "Selected Match Context:",
|
|
27
|
-
"file": "File",
|
|
28
|
-
"type": "Type",
|
|
29
|
-
"matches": "Matches",
|
|
30
|
-
"time": "Time (s)",
|
|
31
|
-
"select_input_directory": "Select Input Directory",
|
|
32
|
-
"select_rules_file": "Select Rules File",
|
|
33
|
-
"json_files": "JSON Files (*.json)",
|
|
34
|
-
"save_results_dialog": "Save Results",
|
|
35
|
-
"default_results_filename": "scan_results_{datetime}.json",
|
|
36
|
-
"error": "Error",
|
|
37
|
-
"warning": "Warning",
|
|
38
|
-
"success": "Success",
|
|
39
|
-
"invalid_input_directory": "Invalid input directory",
|
|
40
|
-
"no_valid_rules": "No valid rules found",
|
|
41
|
-
"failed_to_load_rules": "Failed to load rules: {error}",
|
|
42
|
-
"no_results_to_save": "No results to save",
|
|
43
|
-
"failed_to_save_results": "Failed to save results: {error}",
|
|
44
|
-
"scan_failed": "Scan failed: {error}",
|
|
45
|
-
"starting_scan": "Starting scan...",
|
|
46
|
-
"scan_complete": "Scan complete!",
|
|
47
|
-
"pausing_scan": "Pausing scan...",
|
|
48
|
-
"stopping_scan": "Stopping scan...",
|
|
49
|
-
"scan_completed": "Scan completed",
|
|
50
|
-
"scan_stopped": "Scan stopped",
|
|
51
|
-
"found_matches_files": "Found matches in {count} files",
|
|
52
|
-
"file_info": "File",
|
|
53
|
-
"type_info": "Type",
|
|
54
|
-
"size": "Size",
|
|
55
|
-
"bytes": "bytes",
|
|
56
|
-
"rule": "Rule",
|
|
57
|
-
"description": "Description",
|
|
58
|
-
"line": "Line {line}: {match}",
|
|
59
|
-
"context": "Context:",
|
|
60
|
-
"default_file_types": "pdf,docx,xlsx,pptx,txt,odt,rtf,epub,csv,xml,html,md,jpg,jpeg,png,gif,bmp,tiff",
|
|
61
|
-
"default_rules_file": "rules.json",
|
|
62
|
-
"results_saved_to": "Results saved to:\n{path}",
|
|
63
|
-
"files_scanned_zero": "Files Scanned: 0",
|
|
64
|
-
"files_with_matches_zero": "Files with Matches: 0",
|
|
65
|
-
"language_settings": "Language Settings",
|
|
66
|
-
"processing_options": "Processing Options",
|
|
67
|
-
"performance_settings": "Performance Settings",
|
|
68
|
-
"language_label": "Language:",
|
|
69
|
-
"ocr_tooltip": "Enable OCR for scanned PDF files to extract text from images",
|
|
70
|
-
"process_pool_tooltip": "Use multiple processes for CPU-intensive operations (may increase memory usage)",
|
|
71
|
-
"threads_tooltip": "Number of worker threads (higher values may improve speed but use more CPU)",
|
|
72
|
-
"batch_size_tooltip": "Number of files to process in each batch (larger batches may improve throughput)",
|
|
73
|
-
"file_types_tooltip": "File types to scan (comma separated)",
|
|
74
|
-
"file_menu": "&File",
|
|
75
|
-
"settings_menu": "&Settings",
|
|
76
|
-
"help_menu": "&Help",
|
|
77
|
-
"preferences": "&Preferences...",
|
|
78
|
-
"exit": "E&xit",
|
|
79
|
-
"about": "&About",
|
|
80
|
-
"about_title": "About Document Scanner",
|
|
81
|
-
"about_text": "Document Scanner GUI\n\nVersion 1.0",
|
|
82
|
-
"language": "Language",
|
|
83
|
-
"open_results": "Open Results...",
|
|
84
|
-
"open_results_file": "Open Scan Results",
|
|
85
|
-
"loaded_results_from": "Loaded results from: {path}",
|
|
86
|
-
"results_loaded_successfully": "Results loaded successfully from:\n{path}",
|
|
87
|
-
"failed_to_load_results": "Failed to load results: {error}",
|
|
88
|
-
# Command-line specific translations
|
|
89
|
-
"document_scanner_description": "Scan documents and extract text, images, and metadata with certain rules.",
|
|
90
|
-
"input_directory_help": "Input directory containing documents to scan",
|
|
91
|
-
"rules_file_help": "Rules file (JSON)",
|
|
92
|
-
"recursive_help": "Scan files recursively",
|
|
93
|
-
"file_types_help": "File types to scan (comma-separated)",
|
|
94
|
-
"use_pdf_ocr_help": "Use PDF OCR for image-based PDFs",
|
|
95
|
-
"use_process_pool_help": "Use process pool instead of thread pool (better for CPU-intensive tasks)",
|
|
96
|
-
"batch_size_help": "Number of files to process in each batch",
|
|
97
|
-
"threads_help": "Number of threads for parallel scanning",
|
|
98
|
-
"progress_help": "Show progress bar",
|
|
99
|
-
"verbose_help": "Verbose output",
|
|
100
|
-
"language_help": "Set language (en for English, zh for Chinese)",
|
|
101
|
-
# Status and logging messages
|
|
102
|
-
"scanning_directory": "Scanning directory: {directory}",
|
|
103
|
-
"found_files_to_scan": "Found {count} files to scan",
|
|
104
|
-
"scan_resumed": "Scan resumed",
|
|
105
|
-
"scan_stopped_before_submitting_tasks": "Scan stopped by user before submitting all tasks",
|
|
106
|
-
"scan_paused": "Scan paused",
|
|
107
|
-
"scan_stopped_while_paused": "Scan stopped while paused",
|
|
108
|
-
"scan_stopped_by_user_canceling_tasks": "Scan stopped by user, cancelling remaining tasks...",
|
|
109
|
-
"task_timeout_scan_may_be_stopping": "Task timeout, scan may be stopping",
|
|
110
|
-
"error_scanning_file": "Error scanning file: {error}",
|
|
111
|
-
"progress_report": "Progress: {processed}/{total} files processed",
|
|
112
|
-
"force_shutting_down_executor": "Force shutting down executor...",
|
|
113
|
-
"scan_stopped_processed_files": "Scan stopped. Processed {processed} files",
|
|
114
|
-
"scan_complete_found_matches": "Scan complete. Found matches in {matches_count} files",
|
|
115
|
-
"found_matches_in_file": "Found matches in: {file_name}",
|
|
116
|
-
"processed_file_info": "Processed {file_name} ({ext}) in {time:.3f}s - {matches_count} matches found",
|
|
117
|
-
"could_not_extract_text_from_file": "Could not extract text from {file_path}: {error}",
|
|
118
|
-
"pymupdf_failed_for_file": "PyMuPDF failed for {file_name}: {error}",
|
|
119
|
-
"pypdf_also_failed_for_file": "pypdf also failed for {file_name}: {error}",
|
|
120
|
-
"no_pdf_library_installed": "No PDF library installed (pymupdf or pypdf)",
|
|
121
|
-
"pymupdf_not_installed": "PyMuPDF not installed",
|
|
122
|
-
"no_pages_found_in_file": "No pages found in {file_path}",
|
|
123
|
-
"no_metadata_found_in_file": "No metadata found in {file_path}",
|
|
124
|
-
"pymupdf_error_trying_fallback": "PyMuPDF error on {file_path}: {error}, trying pypdf fallback",
|
|
125
|
-
"pypdf_not_installed_skipping_extraction": "pypdf not installed, skipping PDF extraction",
|
|
126
|
-
"error_extracting_pdf_with_pypdf": "Error extracting PDF with pypdf: {error}",
|
|
127
|
-
"odfpy_not_installed_skipping_extraction": "odfpy not installed, skipping ODT extraction",
|
|
128
|
-
"error_extracting_odt": "Error extracting ODT: {error}",
|
|
129
|
-
"error_extracting_rtf": "Error extracting RTF: {error}",
|
|
130
|
-
"ebooklib_not_installed_skipping_extraction": "ebooklib not installed, skipping EPUB extraction",
|
|
131
|
-
"error_extracting_epub": "Error extracting EPUB: {error}",
|
|
132
|
-
"error_extracting_csv": "Error extracting CSV: {error}",
|
|
133
|
-
"error_extracting_xml": "Error extracting XML: {error}",
|
|
134
|
-
"error_extracting_html": "Error extracting HTML: {error}",
|
|
135
|
-
"error_extracting_markdown": "Error extracting Markdown: {error}",
|
|
136
|
-
"python_docx_not_installed_skipping_extraction": "python-docx not installed, skipping DOCX extraction",
|
|
137
|
-
"openpyxl_not_installed_skipping_extraction": "openpyxl not installed, skipping XLSX extraction",
|
|
138
|
-
"python_pptx_not_installed_skipping_extraction": "python-pptx not installed, skipping PPTX extraction",
|
|
139
|
-
"pillow_or_tesseract_not_installed_skipping_ocr": "PIL or pytesseract not installed, skipping image OCR",
|
|
140
|
-
"could_not_perform_ocr_on_file": "Could not perform OCR on {file_path}: {error}",
|
|
141
|
-
"input_directory_does_not_exist": "Input directory does not exist: {input_dir}",
|
|
142
|
-
"using_rules_file": "Using rules file: {rules_file}",
|
|
143
|
-
"invalid_json_in_rules_file": "Invalid JSON in rules file: {error}",
|
|
144
|
-
"invalid_rules_format": "Invalid rules format. Expected a list or dict with 'rules' key",
|
|
145
|
-
"no_valid_rules_found": "No valid rules found",
|
|
146
|
-
"total_time_elapsed": "Total time elapsed: {time:.2f}s",
|
|
147
|
-
"invalid_regex_pattern": "Invalid regex pattern '{pattern}': {error}",
|
|
148
|
-
"rules_file_does_not_exist_alt": "Rules file does not exist: {rules_file}",
|
|
149
|
-
"image_files_supported": "Image files supported (requires OCR)",
|
|
150
|
-
"include_image_formats": "Include Image Formats",
|
|
151
|
-
"include_image_formats_tooltip": "Include image formats (jpg, jpeg, png, gif, bmp, tiff) in scan",
|
|
152
|
-
}
|
|
1
|
+
"""English translation defaults for docscan GUI."""
|
|
2
|
+
|
|
3
|
+
ENGLISH_DEFAULTS = {
|
|
4
|
+
"window_title": "Document Scanner GUI",
|
|
5
|
+
"input_config_tab": "Input Configuration",
|
|
6
|
+
"scan_options_tab": "Scan Options",
|
|
7
|
+
"input_directory": "Input Directory:",
|
|
8
|
+
"browse": "Browse...",
|
|
9
|
+
"rules_file": "Rules File:",
|
|
10
|
+
"file_types": "File Types:",
|
|
11
|
+
"use_pdf_ocr": "Use PDF OCR",
|
|
12
|
+
"use_process_pool": "Use Process Pool (CPU-intensive)",
|
|
13
|
+
"threads": "Threads:",
|
|
14
|
+
"batch_size": "Batch Size:",
|
|
15
|
+
"start_scan": "Start Scan",
|
|
16
|
+
"pause": "Pause",
|
|
17
|
+
"resume": "Resume",
|
|
18
|
+
"stop": "Stop",
|
|
19
|
+
"save_results": "Save Results",
|
|
20
|
+
"clear_results": "Clear Results",
|
|
21
|
+
"results": "Results",
|
|
22
|
+
"files_scanned": "Files Scanned:",
|
|
23
|
+
"files_with_matches": "Files with Matches:",
|
|
24
|
+
"progress_log": "Progress Log:",
|
|
25
|
+
"match_details": "Match Details:",
|
|
26
|
+
"selected_match_context": "Selected Match Context:",
|
|
27
|
+
"file": "File",
|
|
28
|
+
"type": "Type",
|
|
29
|
+
"matches": "Matches",
|
|
30
|
+
"time": "Time (s)",
|
|
31
|
+
"select_input_directory": "Select Input Directory",
|
|
32
|
+
"select_rules_file": "Select Rules File",
|
|
33
|
+
"json_files": "JSON Files (*.json)",
|
|
34
|
+
"save_results_dialog": "Save Results",
|
|
35
|
+
"default_results_filename": "scan_results_{datetime}.json",
|
|
36
|
+
"error": "Error",
|
|
37
|
+
"warning": "Warning",
|
|
38
|
+
"success": "Success",
|
|
39
|
+
"invalid_input_directory": "Invalid input directory",
|
|
40
|
+
"no_valid_rules": "No valid rules found",
|
|
41
|
+
"failed_to_load_rules": "Failed to load rules: {error}",
|
|
42
|
+
"no_results_to_save": "No results to save",
|
|
43
|
+
"failed_to_save_results": "Failed to save results: {error}",
|
|
44
|
+
"scan_failed": "Scan failed: {error}",
|
|
45
|
+
"starting_scan": "Starting scan...",
|
|
46
|
+
"scan_complete": "Scan complete!",
|
|
47
|
+
"pausing_scan": "Pausing scan...",
|
|
48
|
+
"stopping_scan": "Stopping scan...",
|
|
49
|
+
"scan_completed": "Scan completed",
|
|
50
|
+
"scan_stopped": "Scan stopped",
|
|
51
|
+
"found_matches_files": "Found matches in {count} files",
|
|
52
|
+
"file_info": "File",
|
|
53
|
+
"type_info": "Type",
|
|
54
|
+
"size": "Size",
|
|
55
|
+
"bytes": "bytes",
|
|
56
|
+
"rule": "Rule",
|
|
57
|
+
"description": "Description",
|
|
58
|
+
"line": "Line {line}: {match}",
|
|
59
|
+
"context": "Context:",
|
|
60
|
+
"default_file_types": "pdf,docx,xlsx,pptx,txt,odt,rtf,epub,csv,xml,html,md,jpg,jpeg,png,gif,bmp,tiff",
|
|
61
|
+
"default_rules_file": "rules.json",
|
|
62
|
+
"results_saved_to": "Results saved to:\n{path}",
|
|
63
|
+
"files_scanned_zero": "Files Scanned: 0",
|
|
64
|
+
"files_with_matches_zero": "Files with Matches: 0",
|
|
65
|
+
"language_settings": "Language Settings",
|
|
66
|
+
"processing_options": "Processing Options",
|
|
67
|
+
"performance_settings": "Performance Settings",
|
|
68
|
+
"language_label": "Language:",
|
|
69
|
+
"ocr_tooltip": "Enable OCR for scanned PDF files to extract text from images",
|
|
70
|
+
"process_pool_tooltip": "Use multiple processes for CPU-intensive operations (may increase memory usage)",
|
|
71
|
+
"threads_tooltip": "Number of worker threads (higher values may improve speed but use more CPU)",
|
|
72
|
+
"batch_size_tooltip": "Number of files to process in each batch (larger batches may improve throughput)",
|
|
73
|
+
"file_types_tooltip": "File types to scan (comma separated)",
|
|
74
|
+
"file_menu": "&File",
|
|
75
|
+
"settings_menu": "&Settings",
|
|
76
|
+
"help_menu": "&Help",
|
|
77
|
+
"preferences": "&Preferences...",
|
|
78
|
+
"exit": "E&xit",
|
|
79
|
+
"about": "&About",
|
|
80
|
+
"about_title": "About Document Scanner",
|
|
81
|
+
"about_text": "Document Scanner GUI\n\nVersion 1.0",
|
|
82
|
+
"language": "Language",
|
|
83
|
+
"open_results": "Open Results...",
|
|
84
|
+
"open_results_file": "Open Scan Results",
|
|
85
|
+
"loaded_results_from": "Loaded results from: {path}",
|
|
86
|
+
"results_loaded_successfully": "Results loaded successfully from:\n{path}",
|
|
87
|
+
"failed_to_load_results": "Failed to load results: {error}",
|
|
88
|
+
# Command-line specific translations
|
|
89
|
+
"document_scanner_description": "Scan documents and extract text, images, and metadata with certain rules.",
|
|
90
|
+
"input_directory_help": "Input directory containing documents to scan",
|
|
91
|
+
"rules_file_help": "Rules file (JSON)",
|
|
92
|
+
"recursive_help": "Scan files recursively",
|
|
93
|
+
"file_types_help": "File types to scan (comma-separated)",
|
|
94
|
+
"use_pdf_ocr_help": "Use PDF OCR for image-based PDFs",
|
|
95
|
+
"use_process_pool_help": "Use process pool instead of thread pool (better for CPU-intensive tasks)",
|
|
96
|
+
"batch_size_help": "Number of files to process in each batch",
|
|
97
|
+
"threads_help": "Number of threads for parallel scanning",
|
|
98
|
+
"progress_help": "Show progress bar",
|
|
99
|
+
"verbose_help": "Verbose output",
|
|
100
|
+
"language_help": "Set language (en for English, zh for Chinese)",
|
|
101
|
+
# Status and logging messages
|
|
102
|
+
"scanning_directory": "Scanning directory: {directory}",
|
|
103
|
+
"found_files_to_scan": "Found {count} files to scan",
|
|
104
|
+
"scan_resumed": "Scan resumed",
|
|
105
|
+
"scan_stopped_before_submitting_tasks": "Scan stopped by user before submitting all tasks",
|
|
106
|
+
"scan_paused": "Scan paused",
|
|
107
|
+
"scan_stopped_while_paused": "Scan stopped while paused",
|
|
108
|
+
"scan_stopped_by_user_canceling_tasks": "Scan stopped by user, cancelling remaining tasks...",
|
|
109
|
+
"task_timeout_scan_may_be_stopping": "Task timeout, scan may be stopping",
|
|
110
|
+
"error_scanning_file": "Error scanning file: {error}",
|
|
111
|
+
"progress_report": "Progress: {processed}/{total} files processed",
|
|
112
|
+
"force_shutting_down_executor": "Force shutting down executor...",
|
|
113
|
+
"scan_stopped_processed_files": "Scan stopped. Processed {processed} files",
|
|
114
|
+
"scan_complete_found_matches": "Scan complete. Found matches in {matches_count} files",
|
|
115
|
+
"found_matches_in_file": "Found matches in: {file_name}",
|
|
116
|
+
"processed_file_info": "Processed {file_name} ({ext}) in {time:.3f}s - {matches_count} matches found",
|
|
117
|
+
"could_not_extract_text_from_file": "Could not extract text from {file_path}: {error}",
|
|
118
|
+
"pymupdf_failed_for_file": "PyMuPDF failed for {file_name}: {error}",
|
|
119
|
+
"pypdf_also_failed_for_file": "pypdf also failed for {file_name}: {error}",
|
|
120
|
+
"no_pdf_library_installed": "No PDF library installed (pymupdf or pypdf)",
|
|
121
|
+
"pymupdf_not_installed": "PyMuPDF not installed",
|
|
122
|
+
"no_pages_found_in_file": "No pages found in {file_path}",
|
|
123
|
+
"no_metadata_found_in_file": "No metadata found in {file_path}",
|
|
124
|
+
"pymupdf_error_trying_fallback": "PyMuPDF error on {file_path}: {error}, trying pypdf fallback",
|
|
125
|
+
"pypdf_not_installed_skipping_extraction": "pypdf not installed, skipping PDF extraction",
|
|
126
|
+
"error_extracting_pdf_with_pypdf": "Error extracting PDF with pypdf: {error}",
|
|
127
|
+
"odfpy_not_installed_skipping_extraction": "odfpy not installed, skipping ODT extraction",
|
|
128
|
+
"error_extracting_odt": "Error extracting ODT: {error}",
|
|
129
|
+
"error_extracting_rtf": "Error extracting RTF: {error}",
|
|
130
|
+
"ebooklib_not_installed_skipping_extraction": "ebooklib not installed, skipping EPUB extraction",
|
|
131
|
+
"error_extracting_epub": "Error extracting EPUB: {error}",
|
|
132
|
+
"error_extracting_csv": "Error extracting CSV: {error}",
|
|
133
|
+
"error_extracting_xml": "Error extracting XML: {error}",
|
|
134
|
+
"error_extracting_html": "Error extracting HTML: {error}",
|
|
135
|
+
"error_extracting_markdown": "Error extracting Markdown: {error}",
|
|
136
|
+
"python_docx_not_installed_skipping_extraction": "python-docx not installed, skipping DOCX extraction",
|
|
137
|
+
"openpyxl_not_installed_skipping_extraction": "openpyxl not installed, skipping XLSX extraction",
|
|
138
|
+
"python_pptx_not_installed_skipping_extraction": "python-pptx not installed, skipping PPTX extraction",
|
|
139
|
+
"pillow_or_tesseract_not_installed_skipping_ocr": "PIL or pytesseract not installed, skipping image OCR",
|
|
140
|
+
"could_not_perform_ocr_on_file": "Could not perform OCR on {file_path}: {error}",
|
|
141
|
+
"input_directory_does_not_exist": "Input directory does not exist: {input_dir}",
|
|
142
|
+
"using_rules_file": "Using rules file: {rules_file}",
|
|
143
|
+
"invalid_json_in_rules_file": "Invalid JSON in rules file: {error}",
|
|
144
|
+
"invalid_rules_format": "Invalid rules format. Expected a list or dict with 'rules' key",
|
|
145
|
+
"no_valid_rules_found": "No valid rules found",
|
|
146
|
+
"total_time_elapsed": "Total time elapsed: {time:.2f}s",
|
|
147
|
+
"invalid_regex_pattern": "Invalid regex pattern '{pattern}': {error}",
|
|
148
|
+
"rules_file_does_not_exist_alt": "Rules file does not exist: {rules_file}",
|
|
149
|
+
"image_files_supported": "Image files supported (requires OCR)",
|
|
150
|
+
"include_image_formats": "Include Image Formats",
|
|
151
|
+
"include_image_formats_tooltip": "Include image formats (jpg, jpeg, png, gif, bmp, tiff) in scan",
|
|
152
|
+
}
|