pdf-scount 0.1.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,10 @@
1
+ # Python-generated files
2
+ __pycache__/
3
+ *.py[oc]
4
+ build/
5
+ dist/
6
+ wheels/
7
+ *.egg-info
8
+
9
+ # Virtual environments
10
+ .venv
@@ -0,0 +1 @@
1
+ 3.13
@@ -0,0 +1,17 @@
1
+ Metadata-Version: 2.4
2
+ Name: pdf-scount
3
+ Version: 0.1.0
4
+ Summary: A CLI tool to search keywords in PDFs and export matching pages as PNG images
5
+ Author-email: pioet <1599023541@qq.com>
6
+ License-Expression: MIT
7
+ Keywords: cli,extract,pdf,png,search
8
+ Classifier: Development Status :: 3 - Alpha
9
+ Classifier: Environment :: Console
10
+ Classifier: Intended Audience :: End Users/Desktop
11
+ Classifier: License :: OSI Approved :: MIT License
12
+ Classifier: Programming Language :: Python :: 3
13
+ Classifier: Programming Language :: Python :: 3.13
14
+ Classifier: Topic :: Utilities
15
+ Requires-Python: >=3.13
16
+ Requires-Dist: pymupdf>=1.26.7
17
+ Requires-Dist: typer>=0.21.1
File without changes
@@ -0,0 +1,74 @@
1
+ import fitz # PyMuPDF
2
+ import typer
3
+ from typing import Optional
4
+ from pathlib import Path
5
+
6
+ app = typer.Typer(help="PdfScout: 一个根据关键词快速定位并截取 PDF 页面的工具。")
7
+
8
+ def find_first_page_with_keyword(pdf_path: Path, keyword: str) -> Optional[int]:
9
+ """
10
+ 在 PDF 中搜索关键词,返回第一个匹配页面的索引(从0开始)。
11
+ """
12
+ doc = fitz.open(pdf_path)
13
+ search_term = keyword.lower()
14
+
15
+ for page_num in range(len(doc)):
16
+ page = doc.load_page(page_num)
17
+ # 获取纯文本并转为小写进行不区分大小写匹配
18
+ if search_term in page.get_text().lower():
19
+ doc.close()
20
+ return page_num
21
+
22
+ doc.close()
23
+ return None
24
+
25
+ def save_page_as_png(pdf_path: Path, page_index: int, output_path: Path, dpi: int):
26
+ """
27
+ 将指定页码转换为 PNG 图片。
28
+ """
29
+ doc = fitz.open(pdf_path)
30
+ page = doc.load_page(page_index)
31
+
32
+ # 计算缩放比例。PyMuPDF 默认 DPI 是 72。
33
+ # 缩放因子 = 目标 DPI / 72
34
+ zoom = dpi / 72
35
+ matrix = fitz.Matrix(zoom, zoom)
36
+
37
+ pix = page.get_pixmap(matrix=matrix)
38
+ pix.save(str(output_path))
39
+ doc.close()
40
+
41
+ @app.command()
42
+ def scout(
43
+ pdf_path: Path = typer.Argument(..., help="输入的 PDF 文件路径", exists=True, file_okay=True, dir_okay=False, readable=True),
44
+ keyword: str = typer.Argument(..., help="要搜索的关键词"),
45
+ output: Optional[Path] = typer.Option(None, "--output", "-o", help="输出图片路径。如果不指定,则使用默认命名规则"),
46
+ dpi: int = typer.Option(300, "--dpi", "-d", help="输出图片的 DPI 分辨率")
47
+ ):
48
+ """
49
+ 执行侦察任务:搜索关键词并导出页面为 PNG。
50
+ """
51
+ # 1. 执行搜索
52
+ typer.echo(f"🔍 正在 '{pdf_path.name}' 中寻找关键词: '{keyword}'...")
53
+ page_idx = find_first_page_with_keyword(pdf_path, keyword)
54
+
55
+ if page_idx is None:
56
+ typer.secho(f"❌ 未在文档中找到关键词: '{keyword}'", fg="red")
57
+ raise typer.Exit()
58
+
59
+ # 2. 确定输出文件名
60
+ if output is None:
61
+ # 默认命名: 原文件名_page_N.png
62
+ output_name = f"{pdf_path.stem}_page_{page_idx + 1}.png"
63
+ output = Path.cwd() / output_name
64
+
65
+ # 3. 转换并存储
66
+ try:
67
+ save_page_as_png(pdf_path, page_idx, output, dpi)
68
+ typer.secho(f"✅ 成功!页面 {page_idx + 1} 已保存至: {output}", fg="green")
69
+ except Exception as e:
70
+ typer.secho(f"💥 转换过程中出错: {e}", fg="red")
71
+ raise typer.Exit(code=1)
72
+
73
+ if __name__ == "__main__":
74
+ app()
@@ -0,0 +1,35 @@
1
+ [project]
2
+ name = "pdf-scount"
3
+ version = "0.1.0"
4
+ description = "A CLI tool to search keywords in PDFs and export matching pages as PNG images"
5
+ readme = "README.md"
6
+ requires-python = ">=3.13"
7
+ license = "MIT"
8
+ authors = [
9
+ { name = "pioet", email = "1599023541@qq.com" }
10
+ ]
11
+ keywords = ["pdf", "cli", "search", "extract", "png"]
12
+ classifiers = [
13
+ "Development Status :: 3 - Alpha",
14
+ "Environment :: Console",
15
+ "Intended Audience :: End Users/Desktop",
16
+ "License :: OSI Approved :: MIT License",
17
+ "Programming Language :: Python :: 3",
18
+ "Programming Language :: Python :: 3.13",
19
+ "Topic :: Utilities",
20
+ ]
21
+ dependencies = [
22
+ "pymupdf>=1.26.7",
23
+ "typer>=0.21.1",
24
+ ]
25
+
26
+ [project.scripts]
27
+ pdfscout = "pdf_scount:app"
28
+
29
+ [build-system]
30
+ requires = ["hatchling"]
31
+ build-backend = "hatchling.build"
32
+
33
+ [[tool.uv.index]]
34
+ url = "https://pypi.tuna.tsinghua.edu.cn/simple/"
35
+ default = true
@@ -0,0 +1,138 @@
1
+ version = 1
2
+ revision = 3
3
+ requires-python = ">=3.13"
4
+ resolution-markers = [
5
+ "python_full_version >= '3.14' and sys_platform == 'win32'",
6
+ "python_full_version >= '3.14' and sys_platform == 'emscripten'",
7
+ "python_full_version >= '3.14' and sys_platform != 'emscripten' and sys_platform != 'win32'",
8
+ "python_full_version < '3.14' and sys_platform == 'win32'",
9
+ "python_full_version < '3.14' and sys_platform == 'emscripten'",
10
+ "python_full_version < '3.14' and sys_platform != 'emscripten' and sys_platform != 'win32'",
11
+ ]
12
+
13
+ [[package]]
14
+ name = "click"
15
+ version = "8.3.1"
16
+ source = { registry = "https://pypi.tuna.tsinghua.edu.cn/simple/" }
17
+ dependencies = [
18
+ { name = "colorama", marker = "sys_platform == 'win32'" },
19
+ ]
20
+ sdist = { url = "https://pypi.tuna.tsinghua.edu.cn/packages/3d/fa/656b739db8587d7b5dfa22e22ed02566950fbfbcdc20311993483657a5c0/click-8.3.1.tar.gz", hash = "sha256:12ff4785d337a1bb490bb7e9c2b1ee5da3112e94a8622f26a6c77f5d2fc6842a", size = 295065, upload-time = "2025-11-15T20:45:42.706Z" }
21
+ wheels = [
22
+ { url = "https://pypi.tuna.tsinghua.edu.cn/packages/98/78/01c019cdb5d6498122777c1a43056ebb3ebfeef2076d9d026bfe15583b2b/click-8.3.1-py3-none-any.whl", hash = "sha256:981153a64e25f12d547d3426c367a4857371575ee7ad18df2a6183ab0545b2a6", size = 108274, upload-time = "2025-11-15T20:45:41.139Z" },
23
+ ]
24
+
25
+ [[package]]
26
+ name = "colorama"
27
+ version = "0.4.6"
28
+ source = { registry = "https://pypi.tuna.tsinghua.edu.cn/simple/" }
29
+ sdist = { url = "https://pypi.tuna.tsinghua.edu.cn/packages/d8/53/6f443c9a4a8358a93a6792e2acffb9d9d5cb0a5cfd8802644b7b1c9a02e4/colorama-0.4.6.tar.gz", hash = "sha256:08695f5cb7ed6e0531a20572697297273c47b8cae5a63ffc6d6ed5c201be6e44", size = 27697, upload-time = "2022-10-25T02:36:22.414Z" }
30
+ wheels = [
31
+ { url = "https://pypi.tuna.tsinghua.edu.cn/packages/d1/d6/3965ed04c63042e047cb6a3e6ed1a63a35087b6a609aa3a15ed8ac56c221/colorama-0.4.6-py2.py3-none-any.whl", hash = "sha256:4f1d9991f5acc0ca119f9d443620b77f9d6b33703e51011c16baf57afb285fc6", size = 25335, upload-time = "2022-10-25T02:36:20.889Z" },
32
+ ]
33
+
34
+ [[package]]
35
+ name = "markdown-it-py"
36
+ version = "4.0.0"
37
+ source = { registry = "https://pypi.tuna.tsinghua.edu.cn/simple/" }
38
+ dependencies = [
39
+ { name = "mdurl" },
40
+ ]
41
+ sdist = { url = "https://pypi.tuna.tsinghua.edu.cn/packages/5b/f5/4ec618ed16cc4f8fb3b701563655a69816155e79e24a17b651541804721d/markdown_it_py-4.0.0.tar.gz", hash = "sha256:cb0a2b4aa34f932c007117b194e945bd74e0ec24133ceb5bac59009cda1cb9f3", size = 73070, upload-time = "2025-08-11T12:57:52.854Z" }
42
+ wheels = [
43
+ { url = "https://pypi.tuna.tsinghua.edu.cn/packages/94/54/e7d793b573f298e1c9013b8c4dade17d481164aa517d1d7148619c2cedbf/markdown_it_py-4.0.0-py3-none-any.whl", hash = "sha256:87327c59b172c5011896038353a81343b6754500a08cd7a4973bb48c6d578147", size = 87321, upload-time = "2025-08-11T12:57:51.923Z" },
44
+ ]
45
+
46
+ [[package]]
47
+ name = "mdurl"
48
+ version = "0.1.2"
49
+ source = { registry = "https://pypi.tuna.tsinghua.edu.cn/simple/" }
50
+ sdist = { url = "https://pypi.tuna.tsinghua.edu.cn/packages/d6/54/cfe61301667036ec958cb99bd3efefba235e65cdeb9c84d24a8293ba1d90/mdurl-0.1.2.tar.gz", hash = "sha256:bb413d29f5eea38f31dd4754dd7377d4465116fb207585f97bf925588687c1ba", size = 8729, upload-time = "2022-08-14T12:40:10.846Z" }
51
+ wheels = [
52
+ { url = "https://pypi.tuna.tsinghua.edu.cn/packages/b3/38/89ba8ad64ae25be8de66a6d463314cf1eb366222074cfda9ee839c56a4b4/mdurl-0.1.2-py3-none-any.whl", hash = "sha256:84008a41e51615a49fc9966191ff91509e3c40b939176e643fd50a5c2196b8f8", size = 9979, upload-time = "2022-08-14T12:40:09.779Z" },
53
+ ]
54
+
55
+ [[package]]
56
+ name = "pdf-scount"
57
+ version = "0.1.0"
58
+ source = { virtual = "." }
59
+ dependencies = [
60
+ { name = "pymupdf" },
61
+ { name = "typer" },
62
+ ]
63
+
64
+ [package.metadata]
65
+ requires-dist = [
66
+ { name = "pymupdf", specifier = ">=1.26.7" },
67
+ { name = "typer", specifier = ">=0.21.1" },
68
+ ]
69
+
70
+ [[package]]
71
+ name = "pygments"
72
+ version = "2.19.2"
73
+ source = { registry = "https://pypi.tuna.tsinghua.edu.cn/simple/" }
74
+ sdist = { url = "https://pypi.tuna.tsinghua.edu.cn/packages/b0/77/a5b8c569bf593b0140bde72ea885a803b82086995367bf2037de0159d924/pygments-2.19.2.tar.gz", hash = "sha256:636cb2477cec7f8952536970bc533bc43743542f70392ae026374600add5b887", size = 4968631, upload-time = "2025-06-21T13:39:12.283Z" }
75
+ wheels = [
76
+ { url = "https://pypi.tuna.tsinghua.edu.cn/packages/c7/21/705964c7812476f378728bdf590ca4b771ec72385c533964653c68e86bdc/pygments-2.19.2-py3-none-any.whl", hash = "sha256:86540386c03d588bb81d44bc3928634ff26449851e99741617ecb9037ee5ec0b", size = 1225217, upload-time = "2025-06-21T13:39:07.939Z" },
77
+ ]
78
+
79
+ [[package]]
80
+ name = "pymupdf"
81
+ version = "1.26.7"
82
+ source = { registry = "https://pypi.tuna.tsinghua.edu.cn/simple/" }
83
+ sdist = { url = "https://pypi.tuna.tsinghua.edu.cn/packages/48/d6/09b28f027b510838559f7748807192149c419b30cb90e6d5f0cf916dc9dc/pymupdf-1.26.7.tar.gz", hash = "sha256:71add8bdc8eb1aaa207c69a13400693f06ad9b927bea976f5d5ab9df0bb489c3", size = 84327033, upload-time = "2025-12-11T21:48:50.694Z" }
84
+ wheels = [
85
+ { url = "https://pypi.tuna.tsinghua.edu.cn/packages/94/35/cd74cea1787b2247702ef8522186bdef32e9cb30a099e6bb864627ef6045/pymupdf-1.26.7-cp310-abi3-macosx_10_9_x86_64.whl", hash = "sha256:07085718dfdae5ab83b05eb5eb397f863bcc538fe05135318a01ea353e7a1353", size = 23179369, upload-time = "2025-12-11T21:47:21.587Z" },
86
+ { url = "https://pypi.tuna.tsinghua.edu.cn/packages/72/74/448b6172927c829c6a3fba80078d7b0a016ebbe2c9ee528821f5ea21677a/pymupdf-1.26.7-cp310-abi3-macosx_11_0_arm64.whl", hash = "sha256:31aa9c8377ea1eea02934b92f4dcf79fb2abba0bf41f8a46d64c3e31546a3c02", size = 22470101, upload-time = "2025-12-11T21:47:37.105Z" },
87
+ { url = "https://pypi.tuna.tsinghua.edu.cn/packages/65/e7/47af26f3ac76be7ac3dd4d6cc7ee105948a8355d774e5ca39857bf91c11c/pymupdf-1.26.7-cp310-abi3-manylinux_2_28_aarch64.whl", hash = "sha256:e419b609996434a14a80fa060adec72c434a1cca6a511ec54db9841bc5d51b3c", size = 23502486, upload-time = "2025-12-12T09:51:25.824Z" },
88
+ { url = "https://pypi.tuna.tsinghua.edu.cn/packages/2a/6b/3de1714d734ff949be1e90a22375d0598d3540b22ae73eb85c2d7d1f36a9/pymupdf-1.26.7-cp310-abi3-manylinux_2_28_x86_64.whl", hash = "sha256:69dfc78f206a96e5b3ac22741263ebab945fdf51f0dbe7c5757c3511b23d9d72", size = 24115727, upload-time = "2025-12-11T21:47:51.274Z" },
89
+ { url = "https://pypi.tuna.tsinghua.edu.cn/packages/62/9b/f86224847949577a523be2207315ae0fd3155b5d909cd66c274d095349a3/pymupdf-1.26.7-cp310-abi3-musllinux_1_2_x86_64.whl", hash = "sha256:1d5106f46e1ca0d64d46bd51892372a4f82076bdc14a9678d33d630702abca36", size = 24324386, upload-time = "2025-12-12T14:58:45.483Z" },
90
+ { url = "https://pypi.tuna.tsinghua.edu.cn/packages/85/8e/a117d39092ca645fde8b903f4a941d9aa75b370a67b4f1f435f56393dc5a/pymupdf-1.26.7-cp310-abi3-win32.whl", hash = "sha256:7c9645b6f5452629c747690190350213d3e5bbdb6b2eca227d82702b327f6eee", size = 17203888, upload-time = "2025-12-12T13:59:57.613Z" },
91
+ { url = "https://pypi.tuna.tsinghua.edu.cn/packages/dd/c3/d0047678146c294469c33bae167c8ace337deafb736b0bf97b9bc481aa65/pymupdf-1.26.7-cp310-abi3-win_amd64.whl", hash = "sha256:425b1befe40d41b72eb0fe211711c7ae334db5eb60307e9dd09066ed060cceba", size = 18405952, upload-time = "2025-12-11T21:48:02.947Z" },
92
+ ]
93
+
94
+ [[package]]
95
+ name = "rich"
96
+ version = "14.3.2"
97
+ source = { registry = "https://pypi.tuna.tsinghua.edu.cn/simple/" }
98
+ dependencies = [
99
+ { name = "markdown-it-py" },
100
+ { name = "pygments" },
101
+ ]
102
+ sdist = { url = "https://pypi.tuna.tsinghua.edu.cn/packages/74/99/a4cab2acbb884f80e558b0771e97e21e939c5dfb460f488d19df485e8298/rich-14.3.2.tar.gz", hash = "sha256:e712f11c1a562a11843306f5ed999475f09ac31ffb64281f73ab29ffdda8b3b8", size = 230143, upload-time = "2026-02-01T16:20:47.908Z" }
103
+ wheels = [
104
+ { url = "https://pypi.tuna.tsinghua.edu.cn/packages/ef/45/615f5babd880b4bd7d405cc0dc348234c5ffb6ed1ea33e152ede08b2072d/rich-14.3.2-py3-none-any.whl", hash = "sha256:08e67c3e90884651da3239ea668222d19bea7b589149d8014a21c633420dbb69", size = 309963, upload-time = "2026-02-01T16:20:46.078Z" },
105
+ ]
106
+
107
+ [[package]]
108
+ name = "shellingham"
109
+ version = "1.5.4"
110
+ source = { registry = "https://pypi.tuna.tsinghua.edu.cn/simple/" }
111
+ sdist = { url = "https://pypi.tuna.tsinghua.edu.cn/packages/58/15/8b3609fd3830ef7b27b655beb4b4e9c62313a4e8da8c676e142cc210d58e/shellingham-1.5.4.tar.gz", hash = "sha256:8dbca0739d487e5bd35ab3ca4b36e11c4078f3a234bfce294b0a0291363404de", size = 10310, upload-time = "2023-10-24T04:13:40.426Z" }
112
+ wheels = [
113
+ { url = "https://pypi.tuna.tsinghua.edu.cn/packages/e0/f9/0595336914c5619e5f28a1fb793285925a8cd4b432c9da0a987836c7f822/shellingham-1.5.4-py2.py3-none-any.whl", hash = "sha256:7ecfff8f2fd72616f7481040475a65b2bf8af90a56c89140852d1120324e8686", size = 9755, upload-time = "2023-10-24T04:13:38.866Z" },
114
+ ]
115
+
116
+ [[package]]
117
+ name = "typer"
118
+ version = "0.21.1"
119
+ source = { registry = "https://pypi.tuna.tsinghua.edu.cn/simple/" }
120
+ dependencies = [
121
+ { name = "click" },
122
+ { name = "rich" },
123
+ { name = "shellingham" },
124
+ { name = "typing-extensions" },
125
+ ]
126
+ sdist = { url = "https://pypi.tuna.tsinghua.edu.cn/packages/36/bf/8825b5929afd84d0dabd606c67cd57b8388cb3ec385f7ef19c5cc2202069/typer-0.21.1.tar.gz", hash = "sha256:ea835607cd752343b6b2b7ce676893e5a0324082268b48f27aa058bdb7d2145d", size = 110371, upload-time = "2026-01-06T11:21:10.989Z" }
127
+ wheels = [
128
+ { url = "https://pypi.tuna.tsinghua.edu.cn/packages/a0/1d/d9257dd49ff2ca23ea5f132edf1281a0c4f9de8a762b9ae399b670a59235/typer-0.21.1-py3-none-any.whl", hash = "sha256:7985e89081c636b88d172c2ee0cfe33c253160994d47bdfdc302defd7d1f1d01", size = 47381, upload-time = "2026-01-06T11:21:09.824Z" },
129
+ ]
130
+
131
+ [[package]]
132
+ name = "typing-extensions"
133
+ version = "4.15.0"
134
+ source = { registry = "https://pypi.tuna.tsinghua.edu.cn/simple/" }
135
+ sdist = { url = "https://pypi.tuna.tsinghua.edu.cn/packages/72/94/1a15dd82efb362ac84269196e94cf00f187f7ed21c242792a923cdb1c61f/typing_extensions-4.15.0.tar.gz", hash = "sha256:0cea48d173cc12fa28ecabc3b837ea3cf6f38c6d1136f85cbaaf598984861466", size = 109391, upload-time = "2025-08-25T13:49:26.313Z" }
136
+ wheels = [
137
+ { url = "https://pypi.tuna.tsinghua.edu.cn/packages/18/67/36e9267722cc04a6b9f15c7f3441c2363321a3ea07da7ae0c0707beb2a9c/typing_extensions-4.15.0-py3-none-any.whl", hash = "sha256:f0fa19c6845758ab08074a0cfa8b7aecb71c999ca73d62883bc25cc018c4e548", size = 44614, upload-time = "2025-08-25T13:49:24.86Z" },
138
+ ]