PyPI - pysfi - Versions diffs - 0.1.12__py3-none-any.whl → 0.1.14__py3-none-any.whl - Mend

pysfi 0.1.12py3-none-any.whl → 0.1.14py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (42) hide show

{pysfi-0.1.12.dist-info → pysfi-0.1.14.dist-info}/METADATA +1 -1
pysfi-0.1.14.dist-info/RECORD +68 -0
{pysfi-0.1.12.dist-info → pysfi-0.1.14.dist-info}/entry_points.txt +3 -0
sfi/__init__.py +19 -2
sfi/alarmclock/__init__.py +3 -0
sfi/alarmclock/alarmclock.py +23 -40
sfi/bumpversion/__init__.py +3 -1
sfi/bumpversion/bumpversion.py +64 -15
sfi/cleanbuild/__init__.py +3 -0
sfi/cleanbuild/cleanbuild.py +5 -1
sfi/cli.py +25 -4
sfi/condasetup/__init__.py +1 -0
sfi/condasetup/condasetup.py +91 -76
sfi/docdiff/__init__.py +1 -0
sfi/docdiff/docdiff.py +3 -2
sfi/docscan/__init__.py +1 -1
sfi/docscan/docscan.py +78 -23
sfi/docscan/docscan_gui.py +152 -48
sfi/filedate/filedate.py +12 -5
sfi/img2pdf/img2pdf.py +453 -0
sfi/llmclient/llmclient.py +31 -8
sfi/llmquantize/llmquantize.py +76 -37
sfi/llmserver/__init__.py +1 -0
sfi/llmserver/llmserver.py +63 -13
sfi/makepython/makepython.py +1145 -201
sfi/pdfsplit/pdfsplit.py +45 -12
sfi/pyarchive/__init__.py +1 -0
sfi/pyarchive/pyarchive.py +908 -278
sfi/pyembedinstall/pyembedinstall.py +88 -89
sfi/pylibpack/pylibpack.py +561 -463
sfi/pyloadergen/pyloadergen.py +372 -218
sfi/pypack/pypack.py +510 -959
sfi/pyprojectparse/pyprojectparse.py +337 -40
sfi/pysourcepack/__init__.py +1 -0
sfi/pysourcepack/pysourcepack.py +210 -131
sfi/quizbase/quizbase_gui.py +2 -2
sfi/taskkill/taskkill.py +168 -59
sfi/which/which.py +11 -3
pysfi-0.1.12.dist-info/RECORD +0 -62
sfi/workflowengine/workflowengine.py +0 -444
{pysfi-0.1.12.dist-info → pysfi-0.1.14.dist-info}/WHEEL +0 -0
/sfi/{workflowengine → img2pdf}/__init__.py +0 -0

sfi/condasetup/condasetup.py CHANGED Viewed

@@ -3,92 +3,106 @@ from __future__ import annotations
 import argparse
 import logging
 import os
+import subprocess
 from pathlib import Path
+from typing import Final
 logging.basicConfig(level=logging.INFO, format="%(message)s")
 logger = logging.getLogger(__name__)
-cwd = Path.cwd()
-_CONDA_MIRROR_URLS: dict[str, frozenset[str]] = {
-    "tsinghua": frozenset(
-        [
-            "https://mirrors.tuna.tsinghua.edu.cn/anaconda/pkgs/main/",
-            "https://mirrors.tuna.tsinghua.edu.cn/anaconda/pkgs/free/",
-            "https://mirrors.tuna.tsinghua.edu.cn/anaconda/pkgs/r/",
-            "https://mirrors.tuna.tsinghua.edu.cn/anaconda/pkgs/msys2/",
-            "https://mirrors.tuna.tsinghua.edu.cn/anaconda/pkgs/pro/",
-            "https://mirrors.tuna.tsinghua.edu.cn/anaconda/cloud/conda-forge/",
-            "https://mirrors.tuna.tsinghua.edu.cn/anaconda/cloud/bioconda/",
-            "https://mirrors.tuna.tsinghua.edu.cn/anaconda/cloud/menpo/",
-            "https://mirrors.tuna.tsinghua.edu.cn/anaconda/cloud/pytorch/",
-        ]
-    ),
-    "ustc": frozenset(
-        [
-            "https://mirrors.ustc.edu.cn/anaconda/pkgs/main/",
-            "https://mirrors.ustc.edu.cn/anaconda/pkgs/free/",
-            "https://mirrors.ustc.edu.cn/anaconda/pkgs/r/",
-            "https://mirrors.ustc.edu.cn/anaconda/pkgs/msys2/",
-            "https://mirrors.ustc.edu.cn/anaconda/pkgs/pro/",
-            "https://mirrors.ustc.edu.cn/anaconda/pkgs/dev/",
-            "https://mirrors.ustc.edu.cn/anaconda/cloud/conda-forge/",
-            "https://mirrors.ustc.edu.cn/anaconda/cloud/bioconda/",
-            "https://mirrors.ustc.edu.cn/anaconda/cloud/menpo/",
-            "https://mirrors.ustc.edu.cn/anaconda/cloud/pytorch/",
-        ]
-    ),
-    "bsfu": frozenset(
-        [
-            "https://mirrors.bsfu.edu.cn/anaconda/pkgs/main/",
-            "https://mirrors.bsfu.edu.cn/anaconda/pkgs/free/",
-            "https://mirrors.bsfu.edu.cn/anaconda/pkgs/r/",
-            "https://mirrors.bsfu.edu.cn/anaconda/pkgs/msys2/",
-            "https://mirrors.bsfu.edu.cn/anaconda/pkgs/pro/",
-            "https://mirrors.bsfu.edu.cn/anaconda/pkgs/dev/",
-            "https://mirrors.bsfu.edu.cn/anaconda/cloud/conda-forge/",
-            "https://mirrors.bsfu.edu.cn/anaconda/cloud/bioconda/",
-            "https://mirrors.bsfu.edu.cn/anaconda/cloud/menpo/",
-            "https://mirrors.bsfu.edu.cn/anaconda/cloud/pytorch/",
-        ]
-    ),
-    "aliyun": frozenset(
-        [
-            "https://mirrors.aliyun.com/anaconda/pkgs/main/",
-            "https://mirrors.aliyun.com/anaconda/pkgs/free/",
-            "https://mirrors.aliyun.com/anaconda/pkgs/r/",
-            "https://mirrors.aliyun.com/anaconda/pkgs/msys2/",
-            "https://mirrors.aliyun.com/anaconda/pkgs/pro/",
-            "https://mirrors.aliyun.com/anaconda/pkgs/dev/",
-            "https://mirrors.aliyun.com/anaconda/cloud/conda-forge/",
-            "https://mirrors.aliyun.com/anaconda/cloud/bioconda/",
-            "https://mirrors.aliyun.com/anaconda/cloud/menpo/",
-            "https://mirrors.aliyun.com/anaconda/cloud/pytorch/",
-        ]
-    ),
+# Conda mirror URLs
+_CONDA_MIRROR_URLS: Final[dict[str, frozenset[str]]] = {
+    "tsinghua": frozenset([
+        "https://mirrors.tuna.tsinghua.edu.cn/anaconda/pkgs/main/",
+        "https://mirrors.tuna.tsinghua.edu.cn/anaconda/pkgs/free/",
+        "https://mirrors.tuna.tsinghua.edu.cn/anaconda/pkgs/r/",
+        "https://mirrors.tuna.tsinghua.edu.cn/anaconda/pkgs/msys2/",
+        "https://mirrors.tuna.tsinghua.edu.cn/anaconda/pkgs/pro/",
+        "https://mirrors.tuna.tsinghua.edu.cn/anaconda/cloud/conda-forge/",
+        "https://mirrors.tuna.tsinghua.edu.cn/anaconda/cloud/bioconda/",
+        "https://mirrors.tuna.tsinghua.edu.cn/anaconda/cloud/menpo/",
+        "https://mirrors.tuna.tsinghua.edu.cn/anaconda/cloud/pytorch/",
+    ]),
+    "ustc": frozenset([
+        "https://mirrors.ustc.edu.cn/anaconda/pkgs/main/",
+        "https://mirrors.ustc.edu.cn/anaconda/pkgs/free/",
+        "https://mirrors.ustc.edu.cn/anaconda/pkgs/r/",
+        "https://mirrors.ustc.edu.cn/anaconda/pkgs/msys2/",
+        "https://mirrors.ustc.edu.cn/anaconda/pkgs/pro/",
+        "https://mirrors.ustc.edu.cn/anaconda/pkgs/dev/",
+        "https://mirrors.ustc.edu.cn/anaconda/cloud/conda-forge/",
+        "https://mirrors.ustc.edu.cn/anaconda/cloud/bioconda/",
+        "https://mirrors.ustc.edu.cn/anaconda/cloud/menpo/",
+        "https://mirrors.ustc.edu.cn/anaconda/cloud/pytorch/",
+    ]),
+    "bsfu": frozenset([
+        "https://mirrors.bsfu.edu.cn/anaconda/pkgs/main/",
+        "https://mirrors.bsfu.edu.cn/anaconda/pkgs/free/",
+        "https://mirrors.bsfu.edu.cn/anaconda/pkgs/r/",
+        "https://mirrors.bsfu.edu.cn/anaconda/pkgs/msys2/",
+        "https://mirrors.bsfu.edu.cn/anaconda/pkgs/pro/",
+        "https://mirrors.bsfu.edu.cn/anaconda/pkgs/dev/",
+        "https://mirrors.bsfu.edu.cn/anaconda/cloud/conda-forge/",
+        "https://mirrors.bsfu.edu.cn/anaconda/cloud/bioconda/",
+        "https://mirrors.bsfu.edu.cn/anaconda/cloud/menpo/",
+        "https://mirrors.bsfu.edu.cn/anaconda/cloud/pytorch/",
+    ]),
+    "aliyun": frozenset([
+        "https://mirrors.aliyun.com/anaconda/pkgs/main/",
+        "https://mirrors.aliyun.com/anaconda/pkgs/free/",
+        "https://mirrors.aliyun.com/anaconda/pkgs/r/",
+        "https://mirrors.aliyun.com/anaconda/pkgs/msys2/",
+        "https://mirrors.aliyun.com/anaconda/pkgs/pro/",
+        "https://mirrors.aliyun.com/anaconda/pkgs/dev/",
+        "https://mirrors.aliyun.com/anaconda/cloud/conda-forge/",
+        "https://mirrors.aliyun.com/anaconda/cloud/bioconda/",
+        "https://mirrors.aliyun.com/anaconda/cloud/menpo/",
+        "https://mirrors.aliyun.com/anaconda/cloud/pytorch/",
+    ]),
 }
 def set_conda_mirror(mirror: str = "tsinghua") -> None:
-    """Set the Conda mirror for the given channel."""
-    if mirror in _CONDA_MIRROR_URLS:
-        old_config = Path.home() / ".condarc"
-        if old_config.exists():
-            logger.info("Found existing .condarc file, backing it up")
-            os.rename(old_config, Path.home() / ".condarc.bak")
-        else:
-            logger.debug("No existing .condarc file found")
-        mirror_urls = _CONDA_MIRROR_URLS[mirror]
-        for url in mirror_urls:
-            logger.debug(f"Adding mirror: {url}")
-            os.system(f"conda config --add channels {url}")
-        os.system("conda config --set show_channel_urls yes")
-        logger.info("Conda mirror set successfully")
-    else:
+    """Set the Conda mirror for the given channel.
+    Args:
+        mirror: Mirror name (tsinghua, ustc, bsfu, or aliyun)
+    """
+    if mirror not in _CONDA_MIRROR_URLS:
         logger.error(f"Invalid mirror: {mirror}")
+        return
+    old_config = Path.home() / ".condarc"
+    if old_config.exists():
+        logger.info("Found existing .condarc file, backing it up")
+        os.rename(old_config, Path.home() / ".condarc.bak")
+    else:
+        logger.debug("No existing .condarc file found")
+    mirror_urls = _CONDA_MIRROR_URLS[mirror]
+    for url in mirror_urls:
+        logger.debug(f"Adding mirror: {url}")
+        try:
+            subprocess.run(["conda", "config", "--add", "channels", url], check=True)
+        except subprocess.CalledProcessError as e:
+            logger.error(f"Failed to add mirror {url}: {e}")
+            return
+    try:
+        subprocess.run(
+            ["conda", "config", "--set", "show_channel_urls", "yes"], check=True
+        )
+        logger.info("Conda mirror set successfully")
+    except subprocess.CalledProcessError as e:
+        logger.error(f"Failed to set show_channel_urls: {e}")
+def parse_args() -> argparse.Namespace:
+    """Parse command line arguments.
-def parse_args():
+    Returns:
+        Parsed arguments
+    """
     parser = argparse.ArgumentParser(description="Setup Conda environment for SFI")
     parser.add_argument(
         "mirror",
@@ -103,7 +117,8 @@ def parse_args():
     return parser.parse_args()
-def main():
+def main() -> None:
+    """Main entry point for condasetup CLI."""
     args = parse_args()
     if args.debug:

sfi/docdiff/__init__.py ADDED Viewed

	@@ -0,0 +1 @@
1	+

sfi/docdiff/docdiff.py CHANGED Viewed

@@ -10,9 +10,10 @@ import time
 from dataclasses import dataclass
 from functools import cached_property
 from pathlib import Path
-from typing import Any
+from typing import Any, Final
-CONFIG_FILE = Path.home() / ".sfi" / "docdiff.json"
+# Configuration file path
+CONFIG_FILE: Final[Path] = Path.home() / ".pysfi" / "docdiff.json"
 logging.basicConfig(level=logging.INFO, format="%(message)s")
 logger = logging.getLogger(__name__)

sfi/docscan/__init__.py CHANGED Viewed

@@ -1,3 +1,3 @@
 """Document scanner module for scanning and extracting content from various document formats."""
-__version__ = "0.1.12"
+__version__ = "0.1.14"

sfi/docscan/docscan.py CHANGED Viewed

@@ -91,7 +91,9 @@ def t(key: str, **kwargs) -> str:
     Returns:
         Translated text
     """
-    text = ZH_TRANSLATIONS.get(key, key) if USE_CHINESE else EN_TRANSLATIONS.get(key, key)
+    text = (
+        ZH_TRANSLATIONS.get(key, key) if USE_CHINESE else EN_TRANSLATIONS.get(key, key)
+    )
     # Format with kwargs if provided
     if kwargs:
@@ -123,7 +125,9 @@ class Rule:
                 # Use re.ASCII for faster matching when possible
                 self.compiled_pattern = re.compile(self.pattern, flags | re.ASCII)
             except re.error as e:
-                logger.warning(t("invalid_regex_pattern", pattern=self.pattern, error=e))
+                logger.warning(
+                    t("invalid_regex_pattern", pattern=self.pattern, error=e)
+                )
                 self.compiled_pattern = None
         else:
             self.compiled_pattern = None
@@ -274,13 +278,18 @@ class DocumentScanner:
                 "use_pdf_ocr": self.use_pdf_ocr,
                 "use_process_pool": self.use_process_pool,
             },
-            "rules": [{"name": r.name, "pattern": r.pattern, "is_regex": r.is_regex} for r in self.rules],
+            "rules": [
+                {"name": r.name, "pattern": r.pattern, "is_regex": r.is_regex}
+                for r in self.rules
+            ],
             "matches": [],
         }
         # Scan files in parallel
         processed = 0
-        executor_class = ProcessPoolExecutor if self.use_process_pool else ThreadPoolExecutor
+        executor_class = (
+            ProcessPoolExecutor if self.use_process_pool else ThreadPoolExecutor
+        )
         executor = executor_class(max_workers=threads)
         self._executor = executor  # Keep reference for forced shutdown
@@ -350,10 +359,17 @@ class DocumentScanner:
                     break
                 try:
-                    file_result = future.result(timeout=1.0)  # Short timeout to allow quick stop
+                    file_result = future.result(
+                        timeout=1.0
+                    )  # Short timeout to allow quick stop
                     if file_result and file_result["matches"]:
                         results["matches"].append(file_result)
-                        logger.info(t("found_matches_in_file", file_name=Path(file_result.get("file_path", "")).name))
+                        logger.info(
+                            t(
+                                "found_matches_in_file",
+                                file_name=Path(file_result.get("file_path", "")).name,
+                            )
+                        )
                 except TimeoutError:
                     logger.warning(t("task_timeout_scan_may_be_stopping"))
                     if self.stopped:
@@ -366,7 +382,9 @@ class DocumentScanner:
                 # Report progress
                 if show_progress and processed % 10 == 0:
-                    logger.info(t("progress_report", processed=processed, total=len(files)))
+                    logger.info(
+                        t("progress_report", processed=processed, total=len(files))
+                    )
                 # Call progress callback if set
                 if self._progress_callback:
@@ -391,7 +409,9 @@ class DocumentScanner:
         if self.stopped:
             logger.info(t("scan_stopped_processed_files", processed=processed))
         else:
-            logger.info(t("scan_complete_found_matches", matches_count=len(results["matches"])))
+            logger.info(
+                t("scan_complete_found_matches", matches_count=len(results["matches"]))
+            )
         return results
@@ -493,7 +513,9 @@ class DocumentScanner:
                     return {}
         except Exception as e:
-            logger.warning(t("could_not_extract_text_from_file", file_path=file_path, error=e))
+            logger.warning(
+                t("could_not_extract_text_from_file", file_path=file_path, error=e)
+            )
             return {}
         processing_time = time.perf_counter() - file_start_time
@@ -549,14 +571,18 @@ class DocumentScanner:
             try:
                 return self._extract_pdf_fitz(file_path)
             except Exception as e:
-                logger.warning(t("pymupdf_failed_for_file", file_name=file_path.name, error=e))
+                logger.warning(
+                    t("pymupdf_failed_for_file", file_name=file_path.name, error=e)
+                )
         # Fallback to pypdf
         if pypdf is not None:
             try:
                 return self._extract_pdf_pypdf(file_path)
             except Exception as e:
-                logger.error(t("pypdf_also_failed_for_file", file_name=file_path.name, error=e))
+                logger.error(
+                    t("pypdf_also_failed_for_file", file_name=file_path.name, error=e)
+                )
                 return "", {}
         logger.warning(t("no_pdf_library_installed"))
@@ -632,7 +658,9 @@ class DocumentScanner:
         except Exception as e:
             if doc:
                 doc.close()
-            logger.warning(t("pymupdf_error_trying_fallback", file_path=file_path, error=e))
+            logger.warning(
+                t("pymupdf_error_trying_fallback", file_path=file_path, error=e)
+            )
             # Re-raise to trigger fallback to pypdf
             raise
@@ -764,8 +792,12 @@ class DocumentScanner:
                     text_parts.append(text)
             metadata = {
-                "title": book.get_metadata("DC", "title")[0][0] if book.get_metadata("DC", "title") else "",  # pyright: ignore[reportAttributeAccessIssue]
-                "author": book.get_metadata("DC", "creator")[0][0] if book.get_metadata("DC", "creator") else "",  # pyright: ignore[reportAttributeAccessIssue]
+                "title": book.get_metadata("DC", "title")[0][0]
+                if book.get_metadata("DC", "title")
+                else "",  # pyright: ignore[reportAttributeAccessIssue]
+                "author": book.get_metadata("DC", "creator")[0][0]
+                if book.get_metadata("DC", "creator")
+                else "",  # pyright: ignore[reportAttributeAccessIssue]
                 "format": "EPUB",
             }
@@ -810,7 +842,9 @@ class DocumentScanner:
             root = tree.getroot()
             # Extract all text content
-            text_parts = [elem.text for elem in root.iter() if elem.text and elem.text.strip()]
+            text_parts = [
+                elem.text for elem in root.iter() if elem.text and elem.text.strip()
+            ]
             text = "\n".join(text_parts)
             metadata = {
@@ -954,7 +988,9 @@ class DocumentScanner:
                         wb.close()
                         return "", {}
-                row_text = " | ".join(str(cell) if cell is not None else "" for cell in row)
+                row_text = " | ".join(
+                    str(cell) if cell is not None else "" for cell in row
+                )
                 if row_text.strip():
                     text_parts.append(row_text)
@@ -1017,7 +1053,9 @@ class DocumentScanner:
             return text, metadata
         except Exception as e:
-            logger.warning(t("could_not_perform_ocr_on_file", file_path=file_path, error=e))
+            logger.warning(
+                t("could_not_perform_ocr_on_file", file_path=file_path, error=e)
+            )
             return "", {}
     def _extract_text(self, file_path: Path) -> tuple[str, dict[str, Any]]:
@@ -1047,8 +1085,12 @@ def main():
     USE_CHINESE = temp_args.lang == "zh"
     parser = argparse.ArgumentParser(description=t("document_scanner_description"))
-    parser.add_argument("input", type=str, nargs="?", default=str(cwd), help=t("input_directory_help"))
-    parser.add_argument("-r", "--rules", type=str, default="rules.json", help=t("rules_file_help"))
+    parser.add_argument(
+        "input", type=str, nargs="?", default=str(cwd), help=t("input_directory_help")
+    )
+    parser.add_argument(
+        "-r", "--rules", type=str, default="rules.json", help=t("rules_file_help")
+    )
     parser.add_argument("--recursive", action="store_true", help=t("recursive_help"))
     parser.add_argument(
         "-f",
@@ -1056,7 +1098,9 @@ def main():
         help=t("file_types_help"),
         default="pdf,docx,xlsx,pptx,txt,odt,rtf,epub,csv,xml,html,md,jpg,jpeg,png,gif,bmp,tiff",
     )
-    parser.add_argument("--use-pdf-ocr", help=t("use_pdf_ocr_help"), action="store_true")
+    parser.add_argument(
+        "--use-pdf-ocr", help=t("use_pdf_ocr_help"), action="store_true"
+    )
     parser.add_argument(
         "--use-process-pool",
         help=t("use_process_pool_help"),
@@ -1074,7 +1118,9 @@ def main():
     parser.add_argument("-v", "--verbose", help=t("verbose_help"), action="store_true")
     # 添加语言参数
-    parser.add_argument("--lang", help=t("language_help"), choices=["en", "zh"], default="zh")
+    parser.add_argument(
+        "--lang", help=t("language_help"), choices=["en", "zh"], default="zh"
+    )
     args = parser.parse_args()
@@ -1129,11 +1175,20 @@ def main():
     file_types = [ft.strip() for ft in args.file_types.split(",")]
     # Create scanner and run scan
-    scanner = DocumentScanner(input_dir, rules, file_types, args.use_pdf_ocr, args.use_process_pool, args.batch_size)
+    scanner = DocumentScanner(
+        input_dir,
+        rules,
+        file_types,
+        args.use_pdf_ocr,
+        args.use_process_pool,
+        args.batch_size,
+    )
     results = scanner.scan(threads=args.threads, show_progress=args.progress)
     # Save results to JSON file in input directory
-    output_file = input_dir / f"scan_results_{datetime.now().strftime('%Y%m%d_%H%M%S')}.json"
+    output_file = (
+        input_dir / f"scan_results_{datetime.now().strftime('%Y%m%d_%H%M%S')}.json"
+    )
     with open(output_file, "w", encoding="utf-8") as f:
         json.dump(results, f, indent=2, ensure_ascii=False)

pysfi 0.1.12__py3-none-any.whl → 0.1.14__py3-none-any.whl

pysfi 0.1.12py3-none-any.whl → 0.1.14py3-none-any.whl