pdf2imgCrop 0.1.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,27 @@
1
+ name: Python Package Release
2
+
3
+ on:
4
+ release:
5
+ types: [created]
6
+ workflow_dispatch:
7
+
8
+ jobs:
9
+ deploy:
10
+ runs-on: ubuntu-latest
11
+ steps:
12
+ - uses: actions/checkout@v3
13
+ - name: Set up Python
14
+ uses: actions/setup-python@v4
15
+ with:
16
+ python-version: '3.x'
17
+ - name: Install dependencies
18
+ run: |
19
+ python -m pip install --upgrade pip
20
+ pip install build twine
21
+ - name: Build and publish
22
+ env:
23
+ TWINE_USERNAME: ${{ secrets.PYPI_USERNAME }}
24
+ TWINE_PASSWORD: ${{ secrets.PYPI_PASSWORD }}
25
+ run: |
26
+ python -m build
27
+ twine upload dist/*
@@ -0,0 +1,4 @@
1
+ .vscode/
2
+ *.pyc
3
+ __pycache__/
4
+ .venv/
@@ -0,0 +1,7 @@
1
+ Copyright © 2025 <Muxkin>
2
+
3
+ Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the “Software”), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions:
4
+
5
+ The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.
6
+
7
+ THE SOFTWARE IS PROVIDED “AS IS”, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
@@ -0,0 +1,10 @@
1
+ Metadata-Version: 2.4
2
+ Name: pdf2imgCrop
3
+ Version: 0.1.0
4
+ Summary: 将PDF文件转换为图片并自动裁剪空白边距
5
+ Author-email: Your Name <your.email@example.com>
6
+ License-File: LICENSE.txt
7
+ Requires-Python: >=3.8
8
+ Requires-Dist: pillow>=10.0.0
9
+ Requires-Dist: pymupdf>=1.23.0
10
+ Requires-Dist: tqdm>=4.65.0
@@ -0,0 +1,58 @@
1
+ # pdf2imgCrop
2
+
3
+ 一个用于将PDF文件转换为图片并自动裁剪空白边距的Python工具。
4
+ > 如果想从pptx转换为高清图片,可以先试用Adobe Acrobat的ppt插件。注意需要在插件的**首选项->高级设置->图像**中将分辨率调高到300dpi以上。(我自己用的dpi是2400)
5
+
6
+ ## 功能特点
7
+
8
+ - 将PDF文件转换为JPG或PNG格式的图片
9
+ - 自动裁剪图片周围的空白边距
10
+ - 支持自定义DPI设置
11
+ - 命令行界面,使用简单
12
+
13
+ ## 安装
14
+
15
+ ```bash
16
+ pip install pdf2imgCrop
17
+ ```
18
+
19
+ ## 使用方法
20
+
21
+ ### 命令行使用
22
+
23
+ 基本用法:
24
+ ```bash
25
+ pdf2imgcrop your_file.pdf
26
+ ```
27
+
28
+ 指定DPI和输出格式:
29
+ ```bash
30
+ pdf2imgcrop your_file.pdf -d 600 -f png
31
+ ```
32
+
33
+ 查看帮助:
34
+ ```bash
35
+ pdf2imgcrop --help
36
+ ```
37
+
38
+ ### 参数说明
39
+
40
+ - `file`: PDF文件路径(必需)
41
+ - `-d`, `--dpi`: 输出图片的DPI(默认:300)
42
+ - `-f`, `--format`: 输出图片格式,可选 jpg 或 png(默认:jpg)
43
+
44
+ ## 代码示例
45
+
46
+ ```python
47
+ from pdf2imgCrop.core import convert_pdf
48
+
49
+ # 基本用法
50
+ convert_pdf("your_file.pdf")
51
+
52
+ # 自定义DPI和格式
53
+ convert_pdf("your_file.pdf", dpi=600, file_format="png")
54
+ ```
55
+
56
+ ## 输出
57
+
58
+ 转换后的图片将保存在与输入PDF文件同名的目录中,后缀为"output"。例如,如果输入文件是"document.pdf",输出目录将是"documentoutput"。
@@ -0,0 +1,6 @@
1
+ """
2
+ PDF to Image Converter with Auto-Cropping
3
+ 将PDF文件转换为图片,并自动裁剪空白边距
4
+ """
5
+
6
+ __version__ = "0.1.0"
@@ -0,0 +1,41 @@
1
+ import argparse
2
+ from .core import convert_pdf
3
+
4
+ def main():
5
+ parser = argparse.ArgumentParser(
6
+ description="将PDF文件转换为图片并自动裁剪空白边距",
7
+ formatter_class=argparse.ArgumentDefaultsHelpFormatter
8
+ )
9
+
10
+ parser.add_argument(
11
+ "file",
12
+ help="PDF文件路径,(Path to the PDF file)",
13
+ )
14
+
15
+ parser.add_argument(
16
+ "-d", "--dpi",
17
+ type=int,
18
+ default=300,
19
+ help="输出图片的DPI, 默认为 300, (DPI for output images, default is 300)"
20
+ )
21
+
22
+ parser.add_argument(
23
+ "-f", "--format",
24
+ choices=["jpg", "png"],
25
+ default="jpg",
26
+ help="输出图片格式, 默认为 'jpg', (Format of output images, default is 'jpg')",
27
+ )
28
+
29
+ args = parser.parse_args()
30
+
31
+ try:
32
+ convert_pdf(args.file, args.dpi, args.format)
33
+ print(f"\n转换完成!输出目录: {args.file}output")
34
+ except Exception as e:
35
+ print(f"错误: {str(e)}")
36
+ return 1
37
+
38
+ return 0
39
+
40
+ if __name__ == "__main__":
41
+ exit(main())
@@ -0,0 +1,56 @@
1
+ import fitz
2
+ from PIL import Image, ImageOps
3
+ import os
4
+ from tqdm import tqdm
5
+ from fitz import Page
6
+
7
+ def convert_pdf(file: str, dpi: int = 300, file_format: str = "jpg") -> None:
8
+ """
9
+ 将PDF文件转换为图片并自动裁剪空白边距
10
+
11
+ Args:
12
+ file (str): PDF文件路径
13
+ dpi (int, optional): 输出图片的DPI. 默认为 300.
14
+ file_format (str, optional): 输出图片格式 ('jpg' 或 'png'). 默认为 'jpg'.
15
+ """
16
+ doc = fitz.open(file)
17
+ for pg in tqdm(doc, desc="正在转换页面", unit="页"):
18
+ # 获取页面的宽高
19
+ pg_width = pg.rect.width / 72 # in inch
20
+ pg_height = pg.rect.height / 72 # in inch
21
+ # 计算对应dpi对应的像素
22
+ pix_dpi_width = int(pg_width * dpi)
23
+ pix_dpi_height = int(pg_height * dpi)
24
+ zoom = 16
25
+ mat = fitz.Matrix(zoom, zoom).prerotate(0)
26
+ pix = pg.get_pixmap(matrix=mat, alpha=False)
27
+
28
+ # 准备输出目录
29
+ filename, _ = os.path.splitext(file)
30
+ output_dir = filename + "output"
31
+ if not os.path.exists(output_dir):
32
+ os.mkdir(output_dir)
33
+
34
+ # 裁剪空白区域
35
+ img = Image.frombytes("RGB", [pix.width, pix.height], pix.samples)
36
+ # 将图片大小转为目标DPI大小
37
+ img = img.resize((pix_dpi_width, pix_dpi_height), Image.LANCZOS)
38
+ img_inverse = ImageOps.invert(img)
39
+ bbox = img_inverse.getbbox()
40
+ cropped_img = img.crop(bbox)
41
+
42
+ # 保存处理后的图片
43
+ output_path = os.path.join(output_dir, f"{pg.number + 1}.{file_format}")
44
+ if file_format.lower() == "jpg":
45
+ cropped_img.save(
46
+ output_path,
47
+ quality=95,
48
+ dpi=(dpi, dpi),
49
+ )
50
+ else:
51
+ cropped_img.save(
52
+ output_path,
53
+ dpi=(dpi, dpi),
54
+ )
55
+
56
+ doc.close()
@@ -0,0 +1,23 @@
1
+ [project]
2
+ name = "pdf2imgCrop"
3
+ version = "0.1.0"
4
+ description = "将PDF文件转换为图片并自动裁剪空白边距"
5
+ authors = [
6
+ {name = "Your Name", email = "your.email@example.com"},
7
+ ]
8
+ dependencies = [
9
+ "PyMuPDF>=1.23.0",
10
+ "Pillow>=10.0.0",
11
+ "tqdm>=4.65.0",
12
+ ]
13
+ requires-python = ">=3.8"
14
+
15
+ [project.scripts]
16
+ pdf2imgcrop = "pdf2imgCrop.__main__:main"
17
+
18
+ [build-system]
19
+ requires = ["hatchling"]
20
+ build-backend = "hatchling.build"
21
+
22
+ [tool.hatch.build.targets.wheel]
23
+ packages = ["pdf2imgCrop"]
@@ -0,0 +1,3 @@
1
+ PyMuPDF>=1.23.0
2
+ Pillow>=10.0.0
3
+ tqdm>=4.65.0
@@ -0,0 +1,34 @@
1
+ from setuptools import setup, find_packages
2
+
3
+ setup(
4
+ name="pdf2imgCrop",
5
+ version="0.1.0",
6
+ packages=find_packages(),
7
+ url="https://github.com/muxkin/pdf2imgCrop",
8
+ project_urls={
9
+ "Bug Tracker": "https://github.com/muxkin/pdf2imgCrop/issues",
10
+ "Source Code": "https://github.com/muxkin/pdf2imgCrop",
11
+ },
12
+ install_requires=[
13
+ "PyMuPDF",
14
+ "Pillow",
15
+ "tqdm",
16
+ ],
17
+ entry_points={
18
+ "console_scripts": [
19
+ "pdf2imgcrop=pdf2imgCrop.__main__:main",
20
+ ],
21
+ },
22
+ author="Muxkin",
23
+ description="将PDF文件转换为图片并自动裁剪空白边距",
24
+ long_description=open("README.md").read(),
25
+ long_description_content_type="text/markdown",
26
+ keywords="pdf, image, convert, crop",
27
+ classifiers=[
28
+ "Development Status :: 3 - Alpha",
29
+ "Intended Audience :: End Users/Desktop",
30
+ "Programming Language :: Python :: 3",
31
+ "License :: OSI Approved :: MIT License",
32
+ ],
33
+ python_requires=">=3.6",
34
+ )