PyPI - pysfi - Versions diffs - 0.1.13__py3-none-any.whl → 0.1.15__py3-none-any.whl - Mend

pysfi 0.1.13py3-none-any.whl → 0.1.15py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (37) hide show

{pysfi-0.1.13.dist-info → pysfi-0.1.15.dist-info}/METADATA +1 -1
{pysfi-0.1.13.dist-info → pysfi-0.1.15.dist-info}/RECORD +35 -35
{pysfi-0.1.13.dist-info → pysfi-0.1.15.dist-info}/entry_points.txt +2 -0
sfi/__init__.py +20 -5
sfi/alarmclock/__init__.py +3 -3
sfi/bumpversion/__init__.py +5 -5
sfi/bumpversion/bumpversion.py +64 -15
sfi/cleanbuild/__init__.py +3 -3
sfi/cleanbuild/cleanbuild.py +5 -1
sfi/cli.py +13 -2
sfi/condasetup/__init__.py +1 -1
sfi/condasetup/condasetup.py +91 -76
sfi/docdiff/__init__.py +1 -1
sfi/docdiff/docdiff.py +3 -2
sfi/docscan/__init__.py +3 -3
sfi/docscan/docscan.py +78 -23
sfi/docscan/docscan_gui.py +5 -5
sfi/filedate/filedate.py +12 -5
sfi/img2pdf/img2pdf.py +5 -5
sfi/llmquantize/llmquantize.py +44 -33
sfi/llmserver/__init__.py +1 -1
sfi/makepython/makepython.py +880 -319
sfi/pdfcrypt/__init__.py +30 -0
sfi/pdfcrypt/pdfcrypt.py +435 -0
sfi/pdfsplit/pdfsplit.py +45 -12
sfi/pyarchive/__init__.py +1 -1
sfi/pyarchive/pyarchive.py +1 -1
sfi/pyembedinstall/pyembedinstall.py +1 -1
sfi/pylibpack/pylibpack.py +5 -13
sfi/pyloadergen/pyloadergen.py +6 -3
sfi/pypack/pypack.py +131 -105
sfi/pyprojectparse/pyprojectparse.py +19 -44
sfi/pysourcepack/__init__.py +1 -1
sfi/pysourcepack/pysourcepack.py +11 -14
sfi/workflowengine/__init__.py +0 -0
sfi/workflowengine/workflowengine.py +0 -547
{pysfi-0.1.13.dist-info → pysfi-0.1.15.dist-info}/WHEEL +0 -0

sfi/docscan/docscan.py CHANGED Viewed

@@ -91,7 +91,9 @@ def t(key: str, **kwargs) -> str:
     Returns:
         Translated text
     """
-    text = ZH_TRANSLATIONS.get(key, key) if USE_CHINESE else EN_TRANSLATIONS.get(key, key)
+    text = (
+        ZH_TRANSLATIONS.get(key, key) if USE_CHINESE else EN_TRANSLATIONS.get(key, key)
+    )
     # Format with kwargs if provided
     if kwargs:
@@ -123,7 +125,9 @@ class Rule:
                 # Use re.ASCII for faster matching when possible
                 self.compiled_pattern = re.compile(self.pattern, flags | re.ASCII)
             except re.error as e:
-                logger.warning(t("invalid_regex_pattern", pattern=self.pattern, error=e))
+                logger.warning(
+                    t("invalid_regex_pattern", pattern=self.pattern, error=e)
+                )
                 self.compiled_pattern = None
         else:
             self.compiled_pattern = None
@@ -274,13 +278,18 @@ class DocumentScanner:
                 "use_pdf_ocr": self.use_pdf_ocr,
                 "use_process_pool": self.use_process_pool,
             },
-            "rules": [{"name": r.name, "pattern": r.pattern, "is_regex": r.is_regex} for r in self.rules],
+            "rules": [
+                {"name": r.name, "pattern": r.pattern, "is_regex": r.is_regex}
+                for r in self.rules
+            ],
             "matches": [],
         }
         # Scan files in parallel
         processed = 0
-        executor_class = ProcessPoolExecutor if self.use_process_pool else ThreadPoolExecutor
+        executor_class = (
+            ProcessPoolExecutor if self.use_process_pool else ThreadPoolExecutor
+        )
         executor = executor_class(max_workers=threads)
         self._executor = executor  # Keep reference for forced shutdown
@@ -350,10 +359,17 @@ class DocumentScanner:
                     break
                 try:
-                    file_result = future.result(timeout=1.0)  # Short timeout to allow quick stop
+                    file_result = future.result(
+                        timeout=1.0
+                    )  # Short timeout to allow quick stop
                     if file_result and file_result["matches"]:
                         results["matches"].append(file_result)
-                        logger.info(t("found_matches_in_file", file_name=Path(file_result.get("file_path", "")).name))
+                        logger.info(
+                            t(
+                                "found_matches_in_file",
+                                file_name=Path(file_result.get("file_path", "")).name,
+                            )
+                        )
                 except TimeoutError:
                     logger.warning(t("task_timeout_scan_may_be_stopping"))
                     if self.stopped:
@@ -366,7 +382,9 @@ class DocumentScanner:
                 # Report progress
                 if show_progress and processed % 10 == 0:
-                    logger.info(t("progress_report", processed=processed, total=len(files)))
+                    logger.info(
+                        t("progress_report", processed=processed, total=len(files))
+                    )
                 # Call progress callback if set
                 if self._progress_callback:
@@ -391,7 +409,9 @@ class DocumentScanner:
         if self.stopped:
             logger.info(t("scan_stopped_processed_files", processed=processed))
         else:
-            logger.info(t("scan_complete_found_matches", matches_count=len(results["matches"])))
+            logger.info(
+                t("scan_complete_found_matches", matches_count=len(results["matches"]))
+            )
         return results
@@ -493,7 +513,9 @@ class DocumentScanner:
                     return {}
         except Exception as e:
-            logger.warning(t("could_not_extract_text_from_file", file_path=file_path, error=e))
+            logger.warning(
+                t("could_not_extract_text_from_file", file_path=file_path, error=e)
+            )
             return {}
         processing_time = time.perf_counter() - file_start_time
@@ -549,14 +571,18 @@ class DocumentScanner:
             try:
                 return self._extract_pdf_fitz(file_path)
             except Exception as e:
-                logger.warning(t("pymupdf_failed_for_file", file_name=file_path.name, error=e))
+                logger.warning(
+                    t("pymupdf_failed_for_file", file_name=file_path.name, error=e)
+                )
         # Fallback to pypdf
         if pypdf is not None:
             try:
                 return self._extract_pdf_pypdf(file_path)
             except Exception as e:
-                logger.error(t("pypdf_also_failed_for_file", file_name=file_path.name, error=e))
+                logger.error(
+                    t("pypdf_also_failed_for_file", file_name=file_path.name, error=e)
+                )
                 return "", {}
         logger.warning(t("no_pdf_library_installed"))
@@ -632,7 +658,9 @@ class DocumentScanner:
         except Exception as e:
             if doc:
                 doc.close()
-            logger.warning(t("pymupdf_error_trying_fallback", file_path=file_path, error=e))
+            logger.warning(
+                t("pymupdf_error_trying_fallback", file_path=file_path, error=e)
+            )
             # Re-raise to trigger fallback to pypdf
             raise
@@ -764,8 +792,12 @@ class DocumentScanner:
                     text_parts.append(text)
             metadata = {
-                "title": book.get_metadata("DC", "title")[0][0] if book.get_metadata("DC", "title") else "",  # pyright: ignore[reportAttributeAccessIssue]
-                "author": book.get_metadata("DC", "creator")[0][0] if book.get_metadata("DC", "creator") else "",  # pyright: ignore[reportAttributeAccessIssue]
+                "title": book.get_metadata("DC", "title")[0][0]
+                if book.get_metadata("DC", "title")
+                else "",  # pyright: ignore[reportAttributeAccessIssue]
+                "author": book.get_metadata("DC", "creator")[0][0]
+                if book.get_metadata("DC", "creator")
+                else "",  # pyright: ignore[reportAttributeAccessIssue]
                 "format": "EPUB",
             }
@@ -810,7 +842,9 @@ class DocumentScanner:
             root = tree.getroot()
             # Extract all text content
-            text_parts = [elem.text for elem in root.iter() if elem.text and elem.text.strip()]
+            text_parts = [
+                elem.text for elem in root.iter() if elem.text and elem.text.strip()
+            ]
             text = "\n".join(text_parts)
             metadata = {
@@ -954,7 +988,9 @@ class DocumentScanner:
                         wb.close()
                         return "", {}
-                row_text = " | ".join(str(cell) if cell is not None else "" for cell in row)
+                row_text = " | ".join(
+                    str(cell) if cell is not None else "" for cell in row
+                )
                 if row_text.strip():
                     text_parts.append(row_text)
@@ -1017,7 +1053,9 @@ class DocumentScanner:
             return text, metadata
         except Exception as e:
-            logger.warning(t("could_not_perform_ocr_on_file", file_path=file_path, error=e))
+            logger.warning(
+                t("could_not_perform_ocr_on_file", file_path=file_path, error=e)
+            )
             return "", {}
     def _extract_text(self, file_path: Path) -> tuple[str, dict[str, Any]]:
@@ -1047,8 +1085,12 @@ def main():
     USE_CHINESE = temp_args.lang == "zh"
     parser = argparse.ArgumentParser(description=t("document_scanner_description"))
-    parser.add_argument("input", type=str, nargs="?", default=str(cwd), help=t("input_directory_help"))
-    parser.add_argument("-r", "--rules", type=str, default="rules.json", help=t("rules_file_help"))
+    parser.add_argument(
+        "input", type=str, nargs="?", default=str(cwd), help=t("input_directory_help")
+    )
+    parser.add_argument(
+        "-r", "--rules", type=str, default="rules.json", help=t("rules_file_help")
+    )
     parser.add_argument("--recursive", action="store_true", help=t("recursive_help"))
     parser.add_argument(
         "-f",
@@ -1056,7 +1098,9 @@ def main():
         help=t("file_types_help"),
         default="pdf,docx,xlsx,pptx,txt,odt,rtf,epub,csv,xml,html,md,jpg,jpeg,png,gif,bmp,tiff",
     )
-    parser.add_argument("--use-pdf-ocr", help=t("use_pdf_ocr_help"), action="store_true")
+    parser.add_argument(
+        "--use-pdf-ocr", help=t("use_pdf_ocr_help"), action="store_true"
+    )
     parser.add_argument(
         "--use-process-pool",
         help=t("use_process_pool_help"),
@@ -1074,7 +1118,9 @@ def main():
     parser.add_argument("-v", "--verbose", help=t("verbose_help"), action="store_true")
     # 添加语言参数
-    parser.add_argument("--lang", help=t("language_help"), choices=["en", "zh"], default="zh")
+    parser.add_argument(
+        "--lang", help=t("language_help"), choices=["en", "zh"], default="zh"
+    )
     args = parser.parse_args()
@@ -1129,11 +1175,20 @@ def main():
     file_types = [ft.strip() for ft in args.file_types.split(",")]
     # Create scanner and run scan
-    scanner = DocumentScanner(input_dir, rules, file_types, args.use_pdf_ocr, args.use_process_pool, args.batch_size)
+    scanner = DocumentScanner(
+        input_dir,
+        rules,
+        file_types,
+        args.use_pdf_ocr,
+        args.use_process_pool,
+        args.batch_size,
+    )
     results = scanner.scan(threads=args.threads, show_progress=args.progress)
     # Save results to JSON file in input directory
-    output_file = input_dir / f"scan_results_{datetime.now().strftime('%Y%m%d_%H%M%S')}.json"
+    output_file = (
+        input_dir / f"scan_results_{datetime.now().strftime('%Y%m%d_%H%M%S')}.json"
+    )
     with open(output_file, "w", encoding="utf-8") as f:
         json.dump(results, f, indent=2, ensure_ascii=False)

sfi/docscan/docscan_gui.py CHANGED Viewed

@@ -45,14 +45,14 @@ except ImportError:
     try:
         from sfi.docscan.docscan import DocumentScanner, Rule
     except ImportError:
-        from src.docscan.docscan import DocumentScanner, Rule
+        from docscan.docscan import DocumentScanner, Rule
 # Import translations
 try:
     from sfi.docscan.lang.zhcn import TRANSLATIONS
 except ImportError:
     try:
-        from src.docscan.lang.zhcn import TRANSLATIONS
+        from docscan.lang.zhcn import TRANSLATIONS
     except ImportError:
         TRANSLATIONS = {}
@@ -413,15 +413,15 @@ class SettingsDialog(QDialog):
             if item.widget() and isinstance(item.widget(), QGroupBox):
                 group_box = item.widget()
                 if "Language" in group_box.title():  # type: ignore
-                    group_box.setTitle(
+                    group_box.setWindowTitle(
                         t("language_settings", default="Language Settings")
                     )  # type: ignore
                 elif "Processing" in group_box.title():  # type: ignore
-                    group_box.setTitle(
+                    group_box.setWindowTitle(
                         t("processing_options", default="Processing Options")
                     )  # type: ignore
                 elif "Performance" in group_box.title():  # type: ignore
-                    group_box.setTitle(
+                    group_box.setWindowTitle(
                         t("performance_settings", default="Performance Settings")
                     )  # type: ignore

sfi/filedate/filedate.py CHANGED Viewed

@@ -11,11 +11,18 @@ import time
 from dataclasses import dataclass
 from functools import cached_property, lru_cache
 from pathlib import Path
-DETECT_SEPARATORS: str = "-_#.~"
-SEP: str = "_"
-MAX_RETRY: int = 100
-DATE_PATTERN = re.compile(r"(20|19)\d{2}((0[1-9])|(1[012]))((0[1-9])|([12]\d)|(3[01]))")
+from re import Pattern
+from typing import Final
+# Configuration constants
+DETECT_SEPARATORS: Final[str] = "-_#.~"
+SEP: Final[str] = "_"
+MAX_RETRY: Final[int] = 100
+# Date pattern for detection
+DATE_PATTERN: Final[Pattern[str]] = re.compile(
+    r"(20|19)\d{2}((0[1-9])|(1[012]))((0[1-9])|([12]\d)|(3[01]))"
+)
 logging.basicConfig(level=logging.INFO, format="%(message)s")
 logger = logging.getLogger(__name__)

sfi/img2pdf/img2pdf.py CHANGED Viewed

@@ -33,7 +33,7 @@ class ImageToPdfConfig:
     """Configuration for image to PDF conversion."""
     DPI: int = 300
-    EXTENSIONS: set[str] = None
+    EXTENSIONS: set[str] | None = None
     def __post_init__(self) -> None:
         # Initialize default extensions if not provided
@@ -68,7 +68,7 @@ class ImageToPdfConfig:
     def save(self) -> None:
         """Save current configuration to file."""
         CONFIG_FILE.parent.mkdir(parents=True, exist_ok=True)
-        config_dict = {"DPI": self.DPI, "EXTENSIONS": list(self.EXTENSIONS)}
+        config_dict = {"DPI": self.DPI, "EXTENSIONS": list(self.EXTENSIONS or set())}
         CONFIG_FILE.write_text(json.dumps(config_dict, indent=4), encoding="utf-8")
@@ -111,7 +111,7 @@ def is_valid_image(file_path: Path) -> bool:
     # Extension validation.
     ext = file_path.suffix.lower()
-    if ext not in conf.EXTENSIONS:
+    if not conf.EXTENSIONS or ext not in conf.EXTENSIONS:
         logger.debug(f"Invalid image extension: {ext}, {file_path}")
         return False
@@ -337,8 +337,8 @@ class ImageToPDFRunner:
                 del image
             if "rgb_img" in locals():
                 del rgb_img
-        except:
-            pass  # Ignore cleanup errors
+        except Exception:
+            logger.error(f"Cleanup image: {filepath} failed")
         return None
     def _auto_rotate_image(self, image: Image.Image) -> Image.Image:

sfi/llmquantize/llmquantize.py CHANGED Viewed

@@ -31,9 +31,14 @@ from PySide2.QtWidgets import (
 CONFIG_FILE = Path.home() / ".pysfi" / "llmquantize.json"
-logging.basicConfig(level=logging.INFO)
+logging.basicConfig(
+    level=logging.INFO, format="%(asctime)s - %(levelname)s - %(message)s"
+)
 logger = logging.getLogger(__name__)
+__version__ = "1.0.0"
+__build__ = "20260204"
 @dataclass
 class QuantizerConfig:
@@ -44,9 +49,9 @@ class QuantizerConfig:
     WIN_POS: list[int] = None
     LAST_INPUT_FILE: str = ""
     SELECTED_QUANTS: list[str] = None
-    _loaded_from_file: bool = False
     def __post_init__(self) -> None:
+        """初始化默认值并加载配置文件."""
         # 初始化默认值
         if self.WIN_SIZE is None:
             self.WIN_SIZE = [600, 500]
@@ -62,12 +67,7 @@ class QuantizerConfig:
                 # 更新实例属性，只更新存在的属性
                 for key, value in config_data.items():
                     if hasattr(self, key):
-                        if key in ["WIN_SIZE", "WIN_POS", "SELECTED_QUANTS"]:
-                            # 对于列表类型，需要特别处理
-                            setattr(self, key, value)
-                        else:
-                            setattr(self, key, value)
-                self._loaded_from_file = True
+                        setattr(self, key, value)
             except (json.JSONDecodeError, TypeError, AttributeError) as e:
                 logger.warning("Failed to load configuration: %s", e)
                 logger.info("Using default configuration")
@@ -75,7 +75,7 @@ class QuantizerConfig:
             logger.info("Using default configuration")
     def save(self) -> None:
-        """保存配置."""
+        """保存配置到文件."""
         CONFIG_FILE.parent.mkdir(parents=True, exist_ok=True)
         # 将数据类转换为字典进行JSON序列化
         config_dict = {}
@@ -97,8 +97,11 @@ atexit.register(conf.save)
 def _process_gguf_stem(filename: str) -> str:
     """处理文件名, 移除可能的F16后缀.
+    Args:
+        filename: 输入的文件名(不含扩展名)
     Returns:
-        str: 处理后的文件名
+        str: 处理后的文件名, 移除了F16后缀(如果存在)
     """
     if filename.upper().endswith("-F16"):
         filename = filename[:-4]  # 移除-F16后缀
@@ -106,7 +109,13 @@ def _process_gguf_stem(filename: str) -> str:
 class QuantizationWorker(QThread):
-    """量化执行线程Worker."""
+    """量化执行线程Worker.
+    Attributes:
+        progress_msg_updated: 进度消息更新信号
+        progress_count_updated: 进度数值更新信号
+        is_finished: 完成信号
+    """
     progress_msg_updated = Signal(str)
     progress_count_updated = Signal(int)
@@ -117,6 +126,12 @@ class QuantizationWorker(QThread):
         input_file: pathlib.Path,
         quant_types: list[str],
     ) -> None:
+        """初始化量化Worker.
+        Args:
+            input_file: 输入的F16 GGUF文件路径
+            quant_types: 需要转换的量化类型列表
+        """
         super().__init__()
         self.input_file = input_file
@@ -139,7 +154,7 @@ class QuantizationWorker(QThread):
                     f"正在转换到 {quant_type} 格式...",
                 )
-                # 构建命令行参数（确保所有参数都是字符串）
+                # 构建命令行参数
                 cmd = [
                     "llama-quantize",
                     str(self.input_file.name),
@@ -147,7 +162,7 @@ class QuantizationWorker(QThread):
                     quant_type,
                 ]
-                # 执行转换命令（使用 cwd 参数避免全局目录变更）
+                # 执行转换命令
                 try:
                     process = subprocess.Popen(
                         cmd,
@@ -312,7 +327,7 @@ class GGUFQuantizerGUI(QMainWindow):
         self.setCentralWidget(main_widget)
     def select_file(self) -> None:
-        """选择文件."""
+        """选择F16 GGUF文件."""
         # 使用上次选择的目录作为初始目录
         initial_dir = ""
         if conf.LAST_INPUT_FILE and pathlib.Path(conf.LAST_INPUT_FILE).exists():
@@ -361,21 +376,25 @@ class GGUFQuantizerGUI(QMainWindow):
             filename = f"{_process_gguf_stem(self.input_file.stem)}-{quant_type}.gguf"
             expected_file = dir_path / filename
             if expected_file.exists():
-                # 文件已存在，标记但不禁用，允许用户选择重新生成
+                # 文件已存在，标记并禁用，防止重复生成
                 self.quant_checks[quant_type].setText(
                     f"{self.quant_types[quant_type]} (已存在)",
                 )
                 self.quant_checks[quant_type].setStyleSheet("color: orange;")
+                self.quant_checks[quant_type].setChecked(True)
+                self.quant_checks[quant_type].setEnabled(False)
             else:
                 self.quant_checks[quant_type].setText(
                     self.quant_types[quant_type],
                 )
                 self.quant_checks[quant_type].setStyleSheet("")
+                self.quant_checks[quant_type].setEnabled(True)
     def _scroll_to_bottom(self) -> None:
         """滚动输出框到底部."""
         scrollbar = self.output_text.verticalScrollBar()
-        scrollbar.setValue(scrollbar.maximum())
+        if scrollbar:
+            scrollbar.setValue(scrollbar.maximum())
     def on_quant_type_changed(self, _state: int) -> None:
         """量化类型变更时保存配置."""
@@ -397,7 +416,7 @@ class GGUFQuantizerGUI(QMainWindow):
         return super().resizeEvent(event)
     def start_conversion(self) -> None:
-        """开始转换."""
+        """开始转换量化任务."""
         # 检查是否已有任务在运行
         if self.worker and self.worker.isRunning():
             self.output_text.append("已有转换任务正在进行, 请等待完成")
@@ -405,7 +424,9 @@ class GGUFQuantizerGUI(QMainWindow):
             return
         selected_quants: list[str] = [
-            q for q, check in self.quant_checks.items() if check.isChecked()
+            q
+            for q, check in self.quant_checks.items()
+            if check.isChecked() and check.isEnabled()
         ]
         if not selected_quants:
@@ -418,19 +439,8 @@ class GGUFQuantizerGUI(QMainWindow):
             self._scroll_to_bottom()
             return
-        # 检查是否有已存在的文件将被覆盖
-        existing_files = []
-        for quant_type in selected_quants:
-            filename = f"{_process_gguf_stem(self.input_file.stem)}-{quant_type}.gguf"
-            expected_file = self.input_file.parent / filename
-            if expected_file.exists():
-                existing_files.append(filename)
-        if existing_files:
-            self.output_text.append("警告: 将覆盖以下已存在文件:")
-            for existing_file in existing_files:
-                self.output_text.append(f"  - {existing_file}")
-            self._scroll_to_bottom()
+        # 注意：由于selected_quants只包含启用的复选框，
+        # 所以不会包含已存在的禁用文件，无需额外检查覆盖
         self.convert_btn.setEnabled(False)
         self.progress_bar.setValue(0)
@@ -487,6 +497,7 @@ class GGUFQuantizerGUI(QMainWindow):
 def main() -> None:
+    """主程序入口."""
     app = QApplication(sys.argv)
     # 检查是否安装了llama.cpp
@@ -497,8 +508,8 @@ def main() -> None:
             check=False,
         )
     except FileNotFoundError:
-        logger.exception("错误: 未找到llama.cpp/quantize工具")
-        logger.exception(
+        logger.error("错误: 未找到llama.cpp/quantize工具")
+        logger.error(
             "请确保已编译llama.cpp并将quantize工具放在llama.cpp/目录下",
         )
         sys.exit(1)

sfi/llmserver/__init__.py CHANGED Viewed

	@@ -1 +1 @@
1	-
1	+

pysfi 0.1.13__py3-none-any.whl → 0.1.15__py3-none-any.whl

pysfi 0.1.13py3-none-any.whl → 0.1.15py3-none-any.whl