pdfitdown 0.0.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
pdfitdown/__init__.py ADDED
@@ -0,0 +1 @@
1
+ from .pdfconversion import convert_to_pdf
@@ -0,0 +1,43 @@
1
+ # Import required libraries
2
+ from markitdown import MarkItDown # Library for conversion to markdown
3
+ from markdown_pdf import MarkdownPdf, Section # Library for PDF generation
4
+
5
+ def convert_to_pdf(
6
+ file_path: str, # Path to input file
7
+ output_path: str, # Desired path for output PDF
8
+ title: str = "PDF Title" # Optional title for the PDF, defaults to "PDF Title"
9
+ ):
10
+ """
11
+ Converts a .pdf/.pptx/.docx/.csv/.json/.xml/.html/.zip file to PDF format.
12
+
13
+ Args:
14
+ file_path: Path to the source .pdf/.pptx/.docx/.csv/.json/.xml/.html/.zip file
15
+ output_path: Where to save the resulting PDF
16
+ title: Title to be set in PDF metadata
17
+
18
+ Returns:
19
+ str: Path to the generated PDF file
20
+ """
21
+ # Initialize markdown converter
22
+ md = MarkItDown()
23
+
24
+ # Convert file to markdown
25
+ result = md.convert(file_path)
26
+
27
+ # Extract the text content from the conversion result
28
+ finstr = result.text_content
29
+
30
+ # Create new PDF document with no table of contents (toc_level=0)
31
+ pdf = MarkdownPdf(toc_level=0)
32
+
33
+ # Add the converted markdown content as a section in the PDF
34
+ pdf.add_section(Section(finstr))
35
+
36
+ # Set the PDF document's title in its metadata
37
+ pdf.meta["title"] = title
38
+
39
+ # Save the PDF to the specified output path
40
+ pdf.save(output_path)
41
+
42
+ # Return the path where the PDF was saved
43
+ return output_path
@@ -0,0 +1,21 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2024 Clelia (Astra) Bertelli
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
@@ -0,0 +1,80 @@
1
+ Metadata-Version: 2.1
2
+ Name: pdfitdown
3
+ Version: 0.0.0
4
+ Summary: PdfItDown - Convert Everything to PDF
5
+ Author-email: "Clelia (Astra) Bertelli" <astraberte9@gmail.com>
6
+ Project-URL: Homepage, https://github.com/AstraBert/PdfItDown
7
+ Project-URL: Issues, https://github.com/AstraBert/PdfItDown/issues
8
+ Classifier: Programming Language :: Python :: 3
9
+ Classifier: License :: OSI Approved :: MIT License
10
+ Classifier: Operating System :: OS Independent
11
+ Requires-Python: >=3.10
12
+ Description-Content-Type: text/markdown
13
+ License-File: LICENSE
14
+ Requires-Dist: markitdown
15
+ Requires-Dist: markdown_pdf==1.3.2
16
+
17
+ <div align="center">
18
+ <h1>PdfItDown</h1>
19
+ <h2>Convert Everything to PDF</h2>
20
+ </div>
21
+ <br>
22
+ <div align="center">
23
+ <img src="https://raw.githubusercontent.com/AstraBert/PdfItDown/main/logo.png" alt="PdfItDown Logo">
24
+ </div>
25
+
26
+ **PdfItDown** is a python package that relies on [`markitdown` by Microsoft](https://github.com/microsoft/markitdown/) and [`markdown_pdf`](https://github.com/vb64/markdown-pdf).
27
+
28
+ ### Applicability
29
+
30
+ **PdfItDown** is applicable to the following file formats:
31
+
32
+ - PDF
33
+ - PowerPoint
34
+ - Word
35
+ - Excel
36
+ - HTML
37
+ - Text-based formats (CSV, JSON, XML)
38
+ - ZIP files (iterates over contents)
39
+
40
+ ### How does it work?
41
+
42
+ **PdfItDown** works in a very simple way:
43
+
44
+ ```mermaid
45
+ graph LR
46
+ 2(Input File) --> 3[markitdown]
47
+ 3[markitdown] --> 4[Markdown content]
48
+ 4[Markdown content] --> 5[markdown-pdf]
49
+ 5[markdown-pdf] --> 6(PDF file)
50
+ ```
51
+
52
+ ### Installation and Usage
53
+
54
+ To install **PdfItDown**, just run:
55
+
56
+ ```bash
57
+ python3 -m pip install pdfitdown
58
+ ```
59
+
60
+ And then you can simply use it inside your python scripts:
61
+
62
+ ```python
63
+ from pdfitdown.pdfconversion import convert_to_pdf
64
+
65
+ output_pdf = convert_to_pdf(file_path = "BusinessGrowth.xlsx", output_path = "business_growth.pdf", title = "Business Growth")
66
+ ```
67
+
68
+ In this example, you will find the output PDF under `business_growth.pdf`.
69
+
70
+ ### Contributing
71
+
72
+ Contributions are always welcome!
73
+
74
+ Find contribution guidelines at [CONTRIBUTING.md](https://github.com/AstraBert/PdfItDown/tree/main/CONTRIBUTING.md)
75
+
76
+ ### License and Funding
77
+
78
+ This project is open-source and is provided under an [MIT License](https://github.com/AstraBert/PdfItDown/tree/main/LICENSE).
79
+
80
+ If you found it useful, please consider [funding it](https://github.com/sponsors/AstraBert).
@@ -0,0 +1,7 @@
1
+ pdfitdown/__init__.py,sha256=hK1actVd5H_fsPA5TQsFXD3d22P7SlnoMWtSsU0vhPM,41
2
+ pdfitdown/pdfconversion.py,sha256=MuEhvO_znsq8-jGuX0EBdeXUEUfANV-uvpeie_DEiuM,1472
3
+ pdfitdown-0.0.0.dist-info/LICENSE,sha256=Y9dzy-RL719DOTR6PkcDPnpEOq0o3_LveaR4vx0bAj4,1101
4
+ pdfitdown-0.0.0.dist-info/METADATA,sha256=XGc4WNGa2dXVJ0uAcr17qdkEn4kJo693hq2oWm90zuk,2276
5
+ pdfitdown-0.0.0.dist-info/WHEEL,sha256=PZUExdf71Ui_so67QXpySuHtCi3-J3wvF4ORK6k_S8U,91
6
+ pdfitdown-0.0.0.dist-info/top_level.txt,sha256=ex-Js-1tMyGnROEh9KndLI85CKdQVohxMd6v3ikqN-s,10
7
+ pdfitdown-0.0.0.dist-info/RECORD,,
@@ -0,0 +1,5 @@
1
+ Wheel-Version: 1.0
2
+ Generator: setuptools (75.6.0)
3
+ Root-Is-Purelib: true
4
+ Tag: py3-none-any
5
+
@@ -0,0 +1 @@
1
+ pdfitdown