pysfi 0.1.6__py3-none-any.whl → 0.1.10__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
File without changes
@@ -0,0 +1,152 @@
1
+ """English translation defaults for docscan GUI."""
2
+
3
+ ENGLISH_DEFAULTS = {
4
+ "window_title": "Document Scanner GUI",
5
+ "input_config_tab": "Input Configuration",
6
+ "scan_options_tab": "Scan Options",
7
+ "input_directory": "Input Directory:",
8
+ "browse": "Browse...",
9
+ "rules_file": "Rules File:",
10
+ "file_types": "File Types:",
11
+ "use_pdf_ocr": "Use PDF OCR",
12
+ "use_process_pool": "Use Process Pool (CPU-intensive)",
13
+ "threads": "Threads:",
14
+ "batch_size": "Batch Size:",
15
+ "start_scan": "Start Scan",
16
+ "pause": "Pause",
17
+ "resume": "Resume",
18
+ "stop": "Stop",
19
+ "save_results": "Save Results",
20
+ "clear_results": "Clear Results",
21
+ "results": "Results",
22
+ "files_scanned": "Files Scanned:",
23
+ "files_with_matches": "Files with Matches:",
24
+ "progress_log": "Progress Log:",
25
+ "match_details": "Match Details:",
26
+ "selected_match_context": "Selected Match Context:",
27
+ "file": "File",
28
+ "type": "Type",
29
+ "matches": "Matches",
30
+ "time": "Time (s)",
31
+ "select_input_directory": "Select Input Directory",
32
+ "select_rules_file": "Select Rules File",
33
+ "json_files": "JSON Files (*.json)",
34
+ "save_results_dialog": "Save Results",
35
+ "default_results_filename": "scan_results_{datetime}.json",
36
+ "error": "Error",
37
+ "warning": "Warning",
38
+ "success": "Success",
39
+ "invalid_input_directory": "Invalid input directory",
40
+ "no_valid_rules": "No valid rules found",
41
+ "failed_to_load_rules": "Failed to load rules: {error}",
42
+ "no_results_to_save": "No results to save",
43
+ "failed_to_save_results": "Failed to save results: {error}",
44
+ "scan_failed": "Scan failed: {error}",
45
+ "starting_scan": "Starting scan...",
46
+ "scan_complete": "Scan complete!",
47
+ "pausing_scan": "Pausing scan...",
48
+ "stopping_scan": "Stopping scan...",
49
+ "scan_completed": "Scan completed",
50
+ "scan_stopped": "Scan stopped",
51
+ "found_matches_files": "Found matches in {count} files",
52
+ "file_info": "File",
53
+ "type_info": "Type",
54
+ "size": "Size",
55
+ "bytes": "bytes",
56
+ "rule": "Rule",
57
+ "description": "Description",
58
+ "line": "Line {line}: {match}",
59
+ "context": "Context:",
60
+ "default_file_types": "pdf,docx,xlsx,pptx,txt,odt,rtf,epub,csv,xml,html,md,jpg,jpeg,png,gif,bmp,tiff",
61
+ "default_rules_file": "rules.json",
62
+ "results_saved_to": "Results saved to:\n{path}",
63
+ "files_scanned_zero": "Files Scanned: 0",
64
+ "files_with_matches_zero": "Files with Matches: 0",
65
+ "language_settings": "Language Settings",
66
+ "processing_options": "Processing Options",
67
+ "performance_settings": "Performance Settings",
68
+ "language_label": "Language:",
69
+ "ocr_tooltip": "Enable OCR for scanned PDF files to extract text from images",
70
+ "process_pool_tooltip": "Use multiple processes for CPU-intensive operations (may increase memory usage)",
71
+ "threads_tooltip": "Number of worker threads (higher values may improve speed but use more CPU)",
72
+ "batch_size_tooltip": "Number of files to process in each batch (larger batches may improve throughput)",
73
+ "file_types_tooltip": "File types to scan (comma separated)",
74
+ "file_menu": "&File",
75
+ "settings_menu": "&Settings",
76
+ "help_menu": "&Help",
77
+ "preferences": "&Preferences...",
78
+ "exit": "E&xit",
79
+ "about": "&About",
80
+ "about_title": "About Document Scanner",
81
+ "about_text": "Document Scanner GUI\n\nVersion 1.0",
82
+ "language": "Language",
83
+ "open_results": "Open Results...",
84
+ "open_results_file": "Open Scan Results",
85
+ "loaded_results_from": "Loaded results from: {path}",
86
+ "results_loaded_successfully": "Results loaded successfully from:\n{path}",
87
+ "failed_to_load_results": "Failed to load results: {error}",
88
+ # Command-line specific translations
89
+ "document_scanner_description": "Scan documents and extract text, images, and metadata with certain rules.",
90
+ "input_directory_help": "Input directory containing documents to scan",
91
+ "rules_file_help": "Rules file (JSON)",
92
+ "recursive_help": "Scan files recursively",
93
+ "file_types_help": "File types to scan (comma-separated)",
94
+ "use_pdf_ocr_help": "Use PDF OCR for image-based PDFs",
95
+ "use_process_pool_help": "Use process pool instead of thread pool (better for CPU-intensive tasks)",
96
+ "batch_size_help": "Number of files to process in each batch",
97
+ "threads_help": "Number of threads for parallel scanning",
98
+ "progress_help": "Show progress bar",
99
+ "verbose_help": "Verbose output",
100
+ "language_help": "Set language (en for English, zh for Chinese)",
101
+ # Status and logging messages
102
+ "scanning_directory": "Scanning directory: {directory}",
103
+ "found_files_to_scan": "Found {count} files to scan",
104
+ "scan_resumed": "Scan resumed",
105
+ "scan_stopped_before_submitting_tasks": "Scan stopped by user before submitting all tasks",
106
+ "scan_paused": "Scan paused",
107
+ "scan_stopped_while_paused": "Scan stopped while paused",
108
+ "scan_stopped_by_user_canceling_tasks": "Scan stopped by user, cancelling remaining tasks...",
109
+ "task_timeout_scan_may_be_stopping": "Task timeout, scan may be stopping",
110
+ "error_scanning_file": "Error scanning file: {error}",
111
+ "progress_report": "Progress: {processed}/{total} files processed",
112
+ "force_shutting_down_executor": "Force shutting down executor...",
113
+ "scan_stopped_processed_files": "Scan stopped. Processed {processed} files",
114
+ "scan_complete_found_matches": "Scan complete. Found matches in {matches_count} files",
115
+ "found_matches_in_file": "Found matches in: {file_name}",
116
+ "processed_file_info": "Processed {file_name} ({ext}) in {time:.3f}s - {matches_count} matches found",
117
+ "could_not_extract_text_from_file": "Could not extract text from {file_path}: {error}",
118
+ "pymupdf_failed_for_file": "PyMuPDF failed for {file_name}: {error}",
119
+ "pypdf_also_failed_for_file": "pypdf also failed for {file_name}: {error}",
120
+ "no_pdf_library_installed": "No PDF library installed (pymupdf or pypdf)",
121
+ "pymupdf_not_installed": "PyMuPDF not installed",
122
+ "no_pages_found_in_file": "No pages found in {file_path}",
123
+ "no_metadata_found_in_file": "No metadata found in {file_path}",
124
+ "pymupdf_error_trying_fallback": "PyMuPDF error on {file_path}: {error}, trying pypdf fallback",
125
+ "pypdf_not_installed_skipping_extraction": "pypdf not installed, skipping PDF extraction",
126
+ "error_extracting_pdf_with_pypdf": "Error extracting PDF with pypdf: {error}",
127
+ "odfpy_not_installed_skipping_extraction": "odfpy not installed, skipping ODT extraction",
128
+ "error_extracting_odt": "Error extracting ODT: {error}",
129
+ "error_extracting_rtf": "Error extracting RTF: {error}",
130
+ "ebooklib_not_installed_skipping_extraction": "ebooklib not installed, skipping EPUB extraction",
131
+ "error_extracting_epub": "Error extracting EPUB: {error}",
132
+ "error_extracting_csv": "Error extracting CSV: {error}",
133
+ "error_extracting_xml": "Error extracting XML: {error}",
134
+ "error_extracting_html": "Error extracting HTML: {error}",
135
+ "error_extracting_markdown": "Error extracting Markdown: {error}",
136
+ "python_docx_not_installed_skipping_extraction": "python-docx not installed, skipping DOCX extraction",
137
+ "openpyxl_not_installed_skipping_extraction": "openpyxl not installed, skipping XLSX extraction",
138
+ "python_pptx_not_installed_skipping_extraction": "python-pptx not installed, skipping PPTX extraction",
139
+ "pillow_or_tesseract_not_installed_skipping_ocr": "PIL or pytesseract not installed, skipping image OCR",
140
+ "could_not_perform_ocr_on_file": "Could not perform OCR on {file_path}: {error}",
141
+ "input_directory_does_not_exist": "Input directory does not exist: {input_dir}",
142
+ "using_rules_file": "Using rules file: {rules_file}",
143
+ "invalid_json_in_rules_file": "Invalid JSON in rules file: {error}",
144
+ "invalid_rules_format": "Invalid rules format. Expected a list or dict with 'rules' key",
145
+ "no_valid_rules_found": "No valid rules found",
146
+ "total_time_elapsed": "Total time elapsed: {time:.2f}s",
147
+ "invalid_regex_pattern": "Invalid regex pattern '{pattern}': {error}",
148
+ "rules_file_does_not_exist_alt": "Rules file does not exist: {rules_file}",
149
+ "image_files_supported": "Image files supported (requires OCR)",
150
+ "include_image_formats": "Include Image Formats",
151
+ "include_image_formats_tooltip": "Include image formats (jpg, jpeg, png, gif, bmp, tiff) in scan",
152
+ }
@@ -0,0 +1,170 @@
1
+ """Chinese translations for docscan_gui."""
2
+
3
+ TRANSLATIONS = {
4
+ # Window and Tab Titles
5
+ "window_title": "文档扫描器",
6
+ "input_config_tab": "输入配置",
7
+ "scan_options_tab": "扫描选项",
8
+ # Input Section
9
+ "input_directory": "输入目录:",
10
+ "browse": "浏览...",
11
+ "rules_file": "规则文件:",
12
+ "file_types": "文件类型:",
13
+ # Options Section
14
+ "use_pdf_ocr": "使用 PDF OCR",
15
+ "use_process_pool": "使用进程池 (CPU 密集型)",
16
+ "threads": "线程数:",
17
+ "batch_size": "批处理大小:",
18
+ # Action Buttons
19
+ "start_scan": "开始扫描",
20
+ "pause": "暂停",
21
+ "resume": "恢复",
22
+ "stop": "停止",
23
+ "save_results": "保存结果",
24
+ "clear_results": "清除结果",
25
+ # Results Section
26
+ "results": "结果",
27
+ "files_scanned": "已扫描文件:",
28
+ "files_with_matches": "包含匹配项的文件:",
29
+ "progress_log": "进度日志:",
30
+ "match_details": "匹配详情:",
31
+ "selected_match_context": "选中匹配项的上下文:",
32
+ # Table Headers
33
+ "file": "文件",
34
+ "type": "类型",
35
+ "matches": "匹配数",
36
+ "time": "时间 (s)",
37
+ # Dialogs and Messages
38
+ "select_input_directory": "选择输入目录",
39
+ "select_rules_file": "选择规则文件",
40
+ "json_files": "JSON 文件 (*.json)",
41
+ "save_results_dialog": "保存结果",
42
+ "default_results_filename": "扫描结果_{datetime}.json",
43
+ # Error Messages
44
+ "error": "错误",
45
+ "warning": "警告",
46
+ "success": "成功",
47
+ "invalid_input_directory": "无效的输入目录",
48
+ "no_valid_rules": "没有找到有效的规则",
49
+ "failed_to_load_rules": "加载规则失败: {error}",
50
+ "no_results_to_save": "没有可保存的结果",
51
+ "failed_to_save_results": "保存结果失败: {error}",
52
+ "scan_failed": "扫描失败: {error}",
53
+ # Progress Messages
54
+ "starting_scan": "开始扫描...",
55
+ "scan_complete": "扫描完成!",
56
+ "pausing_scan": "暂停扫描...",
57
+ "stopping_scan": "停止扫描...",
58
+ "scan_completed": "扫描完成",
59
+ "scan_stopped": "扫描已停止",
60
+ "scan_resumed": "扫描已恢复",
61
+ "found_matches_files": "在 {count} 个文件中找到匹配项",
62
+ # File Info
63
+ "file_info": "文件",
64
+ "type_info": "类型",
65
+ "size": "大小",
66
+ "bytes": "字节",
67
+ # Match Info
68
+ "rule": "规则",
69
+ "description": "描述",
70
+ "line": "行 {line}: {match}",
71
+ "context": "上下文",
72
+ "default_file_types": "pdf,docx,xlsx,pptx,txt,odt,rtf,epub,csv,xml,html,md,jpg,jpeg,png,gif,bmp,tiff",
73
+ "default_rules_file": "rules.json",
74
+ # Menu Items
75
+ "file_menu": "文件(&F)",
76
+ "settings_menu": "设置(&S)",
77
+ "help_menu": "帮助(&H)",
78
+ "preferences": "首选项(&P)...",
79
+ "exit": "退出(&X)",
80
+ "about": "关于(&A)",
81
+ "about_title": "关于文档扫描器",
82
+ "about_text": "文档扫描器 GUI\n\n版本 1.0",
83
+ "language": "语言",
84
+ # Open Results
85
+ "open_results": "打开结果...",
86
+ "open_results_file": "打开扫描结果",
87
+ "loaded_results_from": "已从以下位置加载结果: {path}",
88
+ "results_loaded_successfully": "结果已成功从以下位置加载:\n{path}",
89
+ "failed_to_load_results": "加载结果失败: {error}",
90
+ # Settings Dialog
91
+ "language_settings": "语言设置",
92
+ "processing_options": "处理选项",
93
+ "performance_settings": "性能设置",
94
+ "language_label": "语言:",
95
+ "ocr_tooltip": "为扫描的 PDF 文件启用 OCR 以从图像中提取文本",
96
+ "process_pool_tooltip": "对 CPU 密集型操作使用多进程(可能会增加内存使用)",
97
+ "threads_tooltip": "工作线程数量(较高的值可能会提高速度但会使用更多 CPU)",
98
+ "batch_size_tooltip": "每批处理的文件数量(较大的批次可能会提高吞吐量)",
99
+ "file_types_tooltip": "要扫描的文件类型(逗号分隔)",
100
+ # Results Saved Message
101
+ "results_saved_to": "结果已保存至:\t{path}",
102
+ # Default Values
103
+ "files_scanned_zero": "已扫描文件: 0",
104
+ "files_with_matches_zero": "包含匹配项的文件: 0",
105
+ # Apply Button
106
+ "apply": "应用",
107
+ # Command-line specific translations
108
+ "document_scanner_description": "扫描文档并使用特定规则提取文本、图像和元数据。",
109
+ "input_directory_help": "包含待扫描文档的输入目录",
110
+ "rules_file_help": "规则文件 (JSON格式)",
111
+ "recursive_help": "递归扫描文件",
112
+ "file_types_help": "要扫描的文件类型(逗号分隔)",
113
+ "use_pdf_ocr_help": "对基于图像的PDF文件使用OCR",
114
+ "use_process_pool_help": "使用进程池而非线程池(更适合CPU密集型任务)",
115
+ "batch_size_help": "每批次处理的文件数量",
116
+ "threads_help": "并行扫描的线程数",
117
+ "progress_help": "显示进度条",
118
+ "verbose_help": "详细输出",
119
+ "language_help": "设置语言(en表示英文,zh表示中文)",
120
+ # Status and logging messages
121
+ "scanning_directory": "正在扫描目录: {directory}",
122
+ "found_files_to_scan": "发现 {count} 个文件待扫描",
123
+ "scan_stopped_before_submitting_tasks": "用户在提交所有任务之前停止扫描",
124
+ "scan_paused": "扫描已暂停",
125
+ "scan_stopped_while_paused": "扫描在暂停状态下停止",
126
+ "scan_stopped_by_user_canceling_tasks": "用户停止扫描,正在取消剩余任务...",
127
+ "task_timeout_scan_may_be_stopping": "任务超时,扫描可能正在停止",
128
+ "error_scanning_file": "扫描文件时出错: {error}",
129
+ "progress_report": "进度: {processed}/{total} 文件已处理",
130
+ "force_shutting_down_executor": "强制关闭执行器...",
131
+ "scan_stopped_processed_files": "扫描已停止。已处理 {processed} 个文件",
132
+ "scan_complete_found_matches": "扫描完成。在 {matches_count} 个文件中找到匹配项",
133
+ "found_matches_in_file": "找到匹配项: {file_name}",
134
+ "processed_file_info": "已处理 {file_name} ({ext}),耗时 {time:.3f}秒 - 找到 {matches_count} 个匹配项",
135
+ "could_not_extract_text_from_file": "无法从 {file_path} 提取文本: {error}",
136
+ "pymupdf_failed_for_file": "{file_name} 的 PyMuPDF 失败: {error}",
137
+ "pypdf_also_failed_for_file": "{file_name} 的 pypdf 也失败: {error}",
138
+ "no_pdf_library_installed": "未安装PDF库 (pymupdf 或 pypdf)",
139
+ "pymupdf_not_installed": "未安装 PyMuPDF",
140
+ "no_pages_found_in_file": "在 {file_path} 中未找到页面",
141
+ "no_metadata_found_in_file": "在 {file_path} 中未找到元数据",
142
+ "pymupdf_error_trying_fallback": "PyMuPDF 在 {file_path} 上出错: {error},尝试使用 pypdf 回退",
143
+ "pypdf_not_installed_skipping_extraction": "未安装 pypdf,跳过PDF提取",
144
+ "error_extracting_pdf_with_pypdf": "使用 pypdf 提取PDF时出错: {error}",
145
+ "odfpy_not_installed_skipping_extraction": "未安装 odfpy,跳过ODT提取",
146
+ "error_extracting_odt": "提取ODT时出错: {error}",
147
+ "error_extracting_rtf": "提取RTF时出错: {error}",
148
+ "ebooklib_not_installed_skipping_extraction": "未安装 ebooklib,跳过EPUB提取",
149
+ "error_extracting_epub": "提取EPUB时出错: {error}",
150
+ "error_extracting_csv": "提取CSV时出错: {error}",
151
+ "error_extracting_xml": "提取XML时出错: {error}",
152
+ "error_extracting_html": "提取HTML时出错: {error}",
153
+ "error_extracting_markdown": "提取Markdown时出错: {error}",
154
+ "python_docx_not_installed_skipping_extraction": "未安装 python-docx,跳过DOCX提取",
155
+ "openpyxl_not_installed_skipping_extraction": "未安装 openpyxl,跳过XLSX提取",
156
+ "python_pptx_not_installed_skipping_extraction": "未安装 python-pptx,跳过PPTX提取",
157
+ "pillow_or_tesseract_not_installed_skipping_ocr": "未安装 PIL 或 pytesseract,跳过图像OCR",
158
+ "could_not_perform_ocr_on_file": "无法对 {file_path} 执行OCR: {error}",
159
+ "input_directory_does_not_exist": "输入目录不存在: {input_dir}",
160
+ "using_rules_file": "使用规则文件: {rules_file}",
161
+ "invalid_json_in_rules_file": "规则文件中的JSON无效: {error}",
162
+ "invalid_rules_format": "无效的规则格式。期望包含'rules'键的列表或字典",
163
+ "no_valid_rules_found": "未找到有效规则",
164
+ "total_time_elapsed": "总耗时: {time:.2f}秒",
165
+ "invalid_regex_pattern": "无效的正则表达式模式 '{pattern}': {error}",
166
+ "rules_file_does_not_exist_alt": "规则文件不存在: {rules_file}",
167
+ "image_files_supported": "支持的图像文件(需要OCR)",
168
+ "include_image_formats": "包含图像格式",
169
+ "include_image_formats_tooltip": "在扫描中包含图像格式(jpg, jpeg, png, gif, bmp, tiff)",
170
+ }
File without changes
@@ -31,6 +31,9 @@ logging.basicConfig(level=logging.INFO, format="%(message)s")
31
31
  cwd = Path.cwd()
32
32
  logger = logging.getLogger(__name__)
33
33
 
34
+ # Default cache directory
35
+ DEFAULT_CACHE_DIR = Path.home() / ".pysfi" / ".cache" / "embed-python"
36
+
34
37
 
35
38
  def get_system_arch(manual_arch: str | None = None) -> str:
36
39
  """Get system architecture for Python embeddable package."""
@@ -61,12 +64,13 @@ def get_official_url_template(arch: str) -> str:
61
64
  return f"https://www.python.org/ftp/python/{{version}}/python-{{version}}-embed-{arch}.zip"
62
65
 
63
66
 
64
- def get_latest_patch_version(major_minor: str, arch: str, timeout: int = 5) -> str:
67
+ def get_latest_patch_version(major_minor: str, arch: str, cache_dir: Path, timeout: int = 5) -> str:
65
68
  """Get the latest patch version for a given major.minor version.
66
69
 
67
70
  Args:
68
71
  major_minor: Major.minor version (e.g., '3.13') or full version (e.g., '3.13.1')
69
72
  arch: Architecture (amd64 or arm64)
73
+ cache_dir: Cache directory to check for cached versions
70
74
  timeout: Request timeout in seconds
71
75
 
72
76
  Returns:
@@ -83,10 +87,30 @@ def get_latest_patch_version(major_minor: str, arch: str, timeout: int = 5) -> s
83
87
  logger.debug(f"Invalid version format: {major_minor}, using as: `3.13.5`")
84
88
  return major_minor
85
89
 
90
+ major, minor = major_minor.split(".")
91
+
92
+ # Check cache for any cached version matching the major.minor
93
+ if cache_dir.exists():
94
+ cached_versions = []
95
+ for cache_file in cache_dir.glob(f"python-{major}.{minor}.*-embed-{arch}.zip"):
96
+ try:
97
+ # Extract version from filename
98
+ match = cache_file.stem.replace("python-", "").replace(f"-embed-{arch}", "")
99
+ if match:
100
+ cached_versions.append((match, cache_file))
101
+ except Exception:
102
+ pass
103
+
104
+ if cached_versions:
105
+ # Return the highest cached version
106
+ cached_versions.sort(key=lambda x: x[0], reverse=True)
107
+ latest_cached = cached_versions[0][0]
108
+ logger.info(f"Using cached version: {latest_cached}")
109
+ return latest_cached
110
+
86
111
  logger.info(f"Checking latest version for {major_minor}...")
87
112
 
88
113
  base_url = get_official_url_template(arch)
89
- major, minor = major_minor.split(".")
90
114
 
91
115
  # Try from highest patch number down to 0
92
116
  for patch in range(20, -1, -1):
@@ -261,12 +285,21 @@ def install_embed_python(
261
285
  version_filename = f"python-{version}-embed-{arch}.zip"
262
286
  cache_file = cache_dir / version_filename
263
287
 
288
+ # Check if cached file exists and is valid
264
289
  if cache_file.exists():
265
- logger.debug(f"Using cached file: {cache_file}")
266
- elif offline:
267
- logger.error(f"Offline mode: no cached file found for version {version}")
268
- return False
269
- else:
290
+ if not zipfile.is_zipfile(cache_file):
291
+ logger.warning(f"Corrupted cache file detected: {cache_file}")
292
+ logger.info("Deleting corrupted cache file and re-downloading...")
293
+ cache_file.unlink()
294
+ else:
295
+ logger.debug(f"Using cached file: {cache_file}")
296
+
297
+ # Download if cache file doesn't exist or was corrupted
298
+ if not cache_file.exists():
299
+ if offline:
300
+ logger.error(f"Offline mode: no cached file found for version {version}")
301
+ return False
302
+
270
303
  if skip_speed_test:
271
304
  url = get_official_url_template(arch).format(version=version)
272
305
  logger.info(f"Skipping speed test, using official URL: {url}")
@@ -279,6 +312,13 @@ def install_embed_python(
279
312
  try:
280
313
  logger.info(f"Attempting download {i}/{len(urls)}: {url}")
281
314
  download_with_progress(url, cache_file)
315
+
316
+ # Validate downloaded file
317
+ if not zipfile.is_zipfile(cache_file):
318
+ logger.warning("Downloaded file is corrupted, trying next URL...")
319
+ cache_file.unlink()
320
+ continue
321
+
282
322
  download_success = True
283
323
  break
284
324
  except URLError as e:
@@ -296,11 +336,32 @@ def install_embed_python(
296
336
  return False
297
337
 
298
338
  try:
339
+ # Final validation before extraction
340
+ if not zipfile.is_zipfile(cache_file):
341
+ logger.error(f"Invalid zip file: {cache_file}")
342
+ return False
343
+
299
344
  extract_zip(cache_file, target_dir)
300
- except zipfile.BadZipFile:
301
- logger.error(f"Invalid zip file: {cache_file}")
345
+ except zipfile.BadZipFile as e:
346
+ logger.error(f"Corrupted zip file: {cache_file}")
347
+ logger.error(f"Error: {e}")
348
+ # Delete corrupted file and try to re-download in the next run
302
349
  if not keep_cache:
350
+ logger.info("Deleting corrupted cache file...")
303
351
  cache_file.unlink()
352
+ # Retry download if not offline
353
+ if not offline:
354
+ logger.info("Attempting to re-download...")
355
+ return install_embed_python(
356
+ target_dir=target_dir,
357
+ version=version,
358
+ cache_dir=cache_dir,
359
+ offline=offline,
360
+ keep_cache=keep_cache,
361
+ skip_speed_test=skip_speed_test,
362
+ arch=arch,
363
+ timeout=timeout,
364
+ )
304
365
  return False
305
366
  except Exception as e:
306
367
  logger.error(f"Failed to extract: {e}")
@@ -326,10 +387,10 @@ def main() -> None:
326
387
  description="Download and install Python embeddable package to a specific directory.",
327
388
  )
328
389
  parser.add_argument(
329
- "--directory",
330
- "-D",
390
+ "directory",
331
391
  type=str,
332
- default=str(cwd / "runtime"),
392
+ nargs="?",
393
+ default=str(cwd / "dist" / "runtime"),
333
394
  help="Directory to install Python embeddable package (default: current directory)",
334
395
  )
335
396
  parser.add_argument("--debug", "-d", action="store_true", help="Enable debug mode")
@@ -350,7 +411,7 @@ def main() -> None:
350
411
  "--cache-dir",
351
412
  "-C",
352
413
  type=str,
353
- default=str(cwd / ".cache" / "embedinstall"),
414
+ default=str(DEFAULT_CACHE_DIR),
354
415
  help="Cache directory for downloaded files",
355
416
  )
356
417
  parser.add_argument(
@@ -384,17 +445,16 @@ def main() -> None:
384
445
 
385
446
  if args.debug:
386
447
  logger.setLevel(logging.DEBUG)
387
- else:
388
- logger.setLevel(logging.INFO)
389
448
 
390
449
  target_dir = Path(args.directory)
391
- cache_dir = Path(args.cache_dir)
450
+ target_dir.mkdir(parents=True, exist_ok=True)
392
451
 
452
+ cache_dir = Path(args.cache_dir)
393
453
  t0 = time.perf_counter()
394
454
  arch = get_system_arch(args.arch)
395
455
 
396
456
  # Auto-complete version if only major.minor is provided
397
- version = get_latest_patch_version(args.version, arch, args.timeout)
457
+ version = get_latest_patch_version(args.version, arch, cache_dir, args.timeout)
398
458
  if "." not in version:
399
459
  return
400
460
 
@@ -1,7 +1,6 @@
1
1
  from __future__ import annotations
2
2
 
3
3
  import argparse
4
- import json
5
4
  import logging
6
5
  import os
7
6
  import shutil
@@ -9,6 +8,7 @@ import subprocess
9
8
  import sys
10
9
  from dataclasses import dataclass
11
10
  from pathlib import Path
11
+ from typing import Any, Callable
12
12
 
13
13
  if sys.version_info >= (3, 11):
14
14
  import tomllib
@@ -34,8 +34,12 @@ def parse_pyproject_toml(directory: Path) -> dict:
34
34
  logger.error(f"No pyproject.toml found in {directory}")
35
35
  return {}
36
36
 
37
- with project_toml.open("rb") as f:
38
- return tomllib.load(f)
37
+ try:
38
+ with project_toml.open("rb") as f:
39
+ return tomllib.load(f)
40
+ except Exception as e:
41
+ logger.error(f"Error parsing pyproject.toml: {e}")
42
+ return {}
39
43
 
40
44
 
41
45
  def _get_build_command_from_toml(directory: Path) -> str | None:
@@ -58,8 +62,7 @@ def _get_build_command_from_toml(directory: Path) -> str | None:
58
62
  logger.error(f"Unknown build-backend: {build_backend}")
59
63
  return None
60
64
 
61
- logger.error("No `build-system` or `build-backend` found in pyproject.toml: ")
62
- logger.error(json.dumps(project_data, indent=2, ensure_ascii=False, sort_keys=True))
65
+ logger.error("No `build-system` or `build-backend` found in pyproject.toml")
63
66
  return None
64
67
 
65
68
 
@@ -83,42 +86,56 @@ def _get_build_command(directory: Path):
83
86
  class Command:
84
87
  name: str
85
88
  alias: str
89
+ cmds: list[str] | Callable[..., Any] | None = None
86
90
 
87
91
 
88
- _COMMANDS = [
89
- Command(name="build", alias="b"),
90
- Command(name="bumpversion", alias="bump"),
91
- Command(name="clean", alias="c"),
92
- Command(name="publish", alias="p"),
93
- Command(name="token", alias="tk"),
94
- ]
95
- _CHOICES = [command.alias for command in _COMMANDS]
96
- _CHOICES.extend([command.name for command in _COMMANDS])
92
+ def _clean(root_dir: Path = cwd):
93
+ _run_command(["rm", "-rf", "dist", "build", "*.egg-info"], root_dir)
97
94
 
98
95
 
99
96
  def main():
97
+ # Get build command
98
+ build_command = _get_build_command(cwd) or ""
99
+ commands = [
100
+ Command(name="build", alias="b", cmds=[build_command, "build"]),
101
+ Command(name="bumpversion", alias="bump", cmds=["bumpversion", "patch", "--tag"]),
102
+ Command(name="clean", alias="c", cmds=_clean),
103
+ Command(name="publish", alias="p"), # No preset commands
104
+ Command(name="test", alias="t", cmds=lambda: os.system("pytest")),
105
+ Command(name="test-benchmark", alias="tb", cmds=lambda: os.system("pytest -m benchmark")),
106
+ Command(name="test-coverage", alias="tc", cmds=lambda: os.system("pytest --cov=sfi")),
107
+ Command(name="token", alias="tk", cmds=lambda: _set_token(build_command)),
108
+ ]
109
+ command_dict = {command.name: command for command in commands}
110
+ command_dict.update({command.alias: command for command in commands})
111
+ choices = [command.alias for command in commands]
112
+ choices.extend([command.name for command in commands])
113
+
114
+ # Parse args
100
115
  parser = argparse.ArgumentParser(description="Make Python")
101
- parser.add_argument("command", type=str, choices=_CHOICES, help=f"Command to run, options: {_CHOICES}")
102
116
  parser.add_argument("--debug", "-d", action="store_true", help="Enable debug mode")
103
-
117
+ parser.add_argument("command", type=str, choices=choices, help=f"Command to run, options: {choices}")
104
118
  args = parser.parse_args()
105
119
  if args.debug:
106
120
  logger.setLevel(logging.DEBUG)
107
121
 
108
- build_command = _get_build_command(cwd) or ""
109
- logger.info(f"Using build command: {build_command}")
110
- if args.command in {"build", "b"}:
111
- _run_command([build_command, "build"], cwd)
112
- elif args.command in {"bump", "bumpversion"}:
113
- _run_command(["bumpversion", "patch"], cwd)
114
- elif args.command in {"clean", "c"}:
115
- _run_command(["rm", "-rf", "dist", "build", "*.egg-info"], cwd)
116
- elif args.command in {"publish", "p"}:
122
+ logger.debug(f"Using build command: {build_command}")
123
+ command = command_dict.get(args.command)
124
+ if command:
125
+ if callable(command.cmds):
126
+ command.cmds()
127
+ elif isinstance(command.cmds, list):
128
+ _run_command(command.cmds, cwd)
129
+ else:
130
+ logger.debug("No preset commands found")
131
+ else:
132
+ logger.error(f"Unknown command: {args.command}")
133
+ sys.exit(1)
134
+
135
+ if args.command in {"publish", "p"}:
117
136
  if not _check_pypi_token(build_command):
118
137
  _set_token(build_command)
119
138
  _run_command([build_command, "publish"], cwd)
120
- elif args.command in {"token", "tk"}:
121
- _set_token(build_command)
122
139
 
123
140
 
124
141
  def _set_token(build_command: str, show_header: bool = True) -> None:
File without changes