PyPI - pdf-scount - Versions diffs - 0.1.0__py3-none-any.whl - Mend

pdf-scount 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (5) hide show

pdf_scount-0.1.0.dist-info/METADATA +17 -0
pdf_scount-0.1.0.dist-info/RECORD +5 -0
pdf_scount-0.1.0.dist-info/WHEEL +4 -0
pdf_scount-0.1.0.dist-info/entry_points.txt +2 -0
pdf_scount.py +74 -0

pdf_scount-0.1.0.dist-info/METADATA ADDED Viewed

@@ -0,0 +1,17 @@
+Metadata-Version: 2.4
+Name: pdf-scount
+Version: 0.1.0
+Summary: A CLI tool to search keywords in PDFs and export matching pages as PNG images
+Author-email: pioet <1599023541@qq.com>
+License-Expression: MIT
+Keywords: cli,extract,pdf,png,search
+Classifier: Development Status :: 3 - Alpha
+Classifier: Environment :: Console
+Classifier: Intended Audience :: End Users/Desktop
+Classifier: License :: OSI Approved :: MIT License
+Classifier: Programming Language :: Python :: 3
+Classifier: Programming Language :: Python :: 3.13
+Classifier: Topic :: Utilities
+Requires-Python: >=3.13
+Requires-Dist: pymupdf>=1.26.7
+Requires-Dist: typer>=0.21.1

pdf_scount-0.1.0.dist-info/RECORD ADDED Viewed

@@ -0,0 +1,5 @@
+pdf_scount.py,sha256=ntUX9D-azjQiCKhEu8jYHdRMHpmaq6R32r-Lci_WBp4,2613
+pdf_scount-0.1.0.dist-info/METADATA,sha256=RZILGhoAJLz4nQmsf8L9FCi42d_s1z6QOevZaZdSkIQ,637
+pdf_scount-0.1.0.dist-info/WHEEL,sha256=WLgqFyCfm_KASv4WHyYy0P3pM_m7J5L9k2skdKLirC8,87
+pdf_scount-0.1.0.dist-info/entry_points.txt,sha256=z_hdqmQ6c_G4DWwWvsd-7VJCXdMpx0XWlgspMjWYjW8,44
+pdf_scount-0.1.0.dist-info/RECORD,,

pdf_scount-0.1.0.dist-info/WHEEL ADDED Viewed

@@ -0,0 +1,4 @@
+Wheel-Version: 1.0
+Generator: hatchling 1.28.0
+Root-Is-Purelib: true
+Tag: py3-none-any

pdf_scount-0.1.0.dist-info/entry_points.txt ADDED Viewed

	@@ -0,0 +1,2 @@
1	+ [console_scripts]
2	+ pdfscout = pdf_scount:app

pdf_scount.py ADDED Viewed

@@ -0,0 +1,74 @@
+import fitz  # PyMuPDF
+import typer
+from typing import Optional
+from pathlib import Path
+app = typer.Typer(help="PdfScout: 一个根据关键词快速定位并截取 PDF 页面的工具。")
+def find_first_page_with_keyword(pdf_path: Path, keyword: str) -> Optional[int]:
+    """
+    在 PDF 中搜索关键词，返回第一个匹配页面的索引（从0开始）。
+    """
+    doc = fitz.open(pdf_path)
+    search_term = keyword.lower()
+    for page_num in range(len(doc)):
+        page = doc.load_page(page_num)
+        # 获取纯文本并转为小写进行不区分大小写匹配
+        if search_term in page.get_text().lower():
+            doc.close()
+            return page_num
+    doc.close()
+    return None
+def save_page_as_png(pdf_path: Path, page_index: int, output_path: Path, dpi: int):
+    """
+    将指定页码转换为 PNG 图片。
+    """
+    doc = fitz.open(pdf_path)
+    page = doc.load_page(page_index)
+    # 计算缩放比例。PyMuPDF 默认 DPI 是 72。
+    # 缩放因子 = 目标 DPI / 72
+    zoom = dpi / 72
+    matrix = fitz.Matrix(zoom, zoom)
+    pix = page.get_pixmap(matrix=matrix)
+    pix.save(str(output_path))
+    doc.close()
+@app.command()
+def scout(
+    pdf_path: Path = typer.Argument(..., help="输入的 PDF 文件路径", exists=True, file_okay=True, dir_okay=False, readable=True),
+    keyword: str = typer.Argument(..., help="要搜索的关键词"),
+    output: Optional[Path] = typer.Option(None, "--output", "-o", help="输出图片路径。如果不指定，则使用默认命名规则"),
+    dpi: int = typer.Option(300, "--dpi", "-d", help="输出图片的 DPI 分辨率")
+):
+    """
+    执行侦察任务：搜索关键词并导出页面为 PNG。
+    """
+    # 1. 执行搜索
+    typer.echo(f"🔍 正在 '{pdf_path.name}' 中寻找关键词: '{keyword}'...")
+    page_idx = find_first_page_with_keyword(pdf_path, keyword)
+    if page_idx is None:
+        typer.secho(f"❌ 未在文档中找到关键词: '{keyword}'", fg="red")
+        raise typer.Exit()
+    # 2. 确定输出文件名
+    if output is None:
+        # 默认命名: 原文件名_page_N.png
+        output_name = f"{pdf_path.stem}_page_{page_idx + 1}.png"
+        output = Path.cwd() / output_name
+    # 3. 转换并存储
+    try:
+        save_page_as_png(pdf_path, page_idx, output, dpi)
+        typer.secho(f"✅ 成功！页面 {page_idx + 1} 已保存至: {output}", fg="green")
+    except Exception as e:
+        typer.secho(f"💥 转换过程中出错: {e}", fg="red")
+        raise typer.Exit(code=1)
+if __name__ == "__main__":
+    app()