opendataloader-pdf 1.0.5__py3-none-any.whl → 1.0.6__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of opendataloader-pdf might be problematic. Click here for more details.

@@ -0,0 +1,5 @@
1
+ from .wrapper import main
2
+
3
+
4
+ if __name__ == "__main__":
5
+ raise SystemExit(main())
@@ -1,3 +1,4 @@
1
+ import argparse
1
2
  import subprocess
2
3
  import sys
3
4
  import importlib.resources as importlib_resources
@@ -135,3 +136,84 @@ def run(
135
136
  if e.stdout:
136
137
  print(f"Stdout: {e.stdout}", file=sys.stderr)
137
138
  raise e
139
+
140
+
141
+ def main(argv=None) -> int:
142
+ """CLI entry point for running the wrapper from the command line."""
143
+ parser = argparse.ArgumentParser(
144
+ description="Run the opendataloader-pdf CLI using the bundled JAR."
145
+ )
146
+ parser.add_argument("input_path", help="Path to the input PDF file or directory.")
147
+ parser.add_argument(
148
+ "-o",
149
+ "--output-dir",
150
+ dest="output_folder",
151
+ help="Directory where outputs are written.",
152
+ )
153
+ parser.add_argument("-p", "--password", help="Password for encrypted PDFs.")
154
+ parser.add_argument(
155
+ "--replace-invalid-chars",
156
+ help="Replacement character for invalid or unrecognized characters.",
157
+ )
158
+ parser.add_argument(
159
+ "--content-safety-off",
160
+ help="Disable content safety filtering (expects the desired mode).",
161
+ )
162
+ parser.add_argument(
163
+ "--markdown",
164
+ dest="generate_markdown",
165
+ action="store_true",
166
+ help="Generate Markdown output.",
167
+ )
168
+ parser.add_argument(
169
+ "--html",
170
+ dest="generate_html",
171
+ action="store_true",
172
+ help="Generate HTML output.",
173
+ )
174
+ parser.add_argument(
175
+ "--pdf",
176
+ dest="generate_annotated_pdf",
177
+ action="store_true",
178
+ help="Generate annotated PDF output.",
179
+ )
180
+ parser.add_argument(
181
+ "--keep-line-breaks",
182
+ action="store_true",
183
+ help="Preserve line breaks in text output.",
184
+ )
185
+ parser.add_argument(
186
+ "--markdown-with-html",
187
+ dest="html_in_markdown",
188
+ action="store_true",
189
+ help="Allow raw HTML within Markdown output.",
190
+ )
191
+ parser.add_argument(
192
+ "--markdown-with-images",
193
+ dest="add_image_to_markdown",
194
+ action="store_true",
195
+ help="Embed images in Markdown output.",
196
+ )
197
+ parser.add_argument(
198
+ "--no-json",
199
+ action="store_true",
200
+ help="Disable JSON output generation.",
201
+ )
202
+ parser.add_argument(
203
+ "--debug",
204
+ action="store_true",
205
+ help="Stream CLI logs directly to stdout.",
206
+ )
207
+ args = parser.parse_args(argv)
208
+
209
+ try:
210
+ run(**vars(args))
211
+ except FileNotFoundError as err:
212
+ print(err, file=sys.stderr)
213
+ return 1
214
+ except subprocess.CalledProcessError as err:
215
+ return err.returncode or 1
216
+
217
+
218
+ if __name__ == "__main__":
219
+ sys.exit(main())
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: opendataloader-pdf
3
- Version: 1.0.5
3
+ Version: 1.0.6
4
4
  Summary: A Python wrapper for the opendataloader-pdf Java CLI.
5
5
  Home-page: https://github.com/opendataloader-project/opendataloader-pdf
6
6
  Author: opendataloader-project
@@ -109,9 +109,16 @@ opendataloader_pdf.run(
109
109
  generate_markdown=True,
110
110
  generate_html=True,
111
111
  generate_annotated_pdf=True,
112
+ debug=True,
112
113
  )
113
114
  ```
114
115
 
116
+ - If you want to run it via CLI, you can use the following command:
117
+
118
+ ```sh
119
+ opendataloader-pdf path/to/document.pdf --markdown --html --pdf
120
+ ```
121
+
115
122
  ### Function: run()
116
123
 
117
124
  The main function to process PDFs.
@@ -174,6 +181,24 @@ async function main() {
174
181
  main();
175
182
  ```
176
183
 
184
+ If you want to run it via CLI, you can use the following command:
185
+
186
+ ```bash
187
+ npx @opendataloader/pdf path/to/document.pdf -o path/to/output --markdown --html --pdf
188
+ ```
189
+
190
+ or you can install it globally:
191
+
192
+ ```bash
193
+ npm install -g @opendataloader/pdf
194
+ ```
195
+
196
+ then run:
197
+
198
+ ```bash
199
+ opendataloader-pdf path/to/document.pdf -o path/to/output --markdown --html --pdf
200
+ ```
201
+
177
202
  ### Function: run()
178
203
 
179
204
  `run(inputPath: string, options?: RunOptions): Promise<string>`
@@ -1,7 +1,8 @@
1
1
  opendataloader_pdf/LICENSE,sha256=rxdbnZbuk8IaA2FS4bkFsLlTBNSujCySHHYJEAuo334,15921
2
2
  opendataloader_pdf/NOTICE.md,sha256=Uxc6sEbVz2hfsDinzzSNMtmsjx9HsQUod0yy0cswUwg,562
3
3
  opendataloader_pdf/__init__.py,sha256=T5RV-dcgjNCm8klNy_EH-IgOeodcPg6Yc34HHXtuAmQ,44
4
- opendataloader_pdf/wrapper.py,sha256=WL7qTsX214L0jXxlSDesYadRVpdrsLQd2Hgum5BdD1s,4962
4
+ opendataloader_pdf/__main__.py,sha256=lmla4yz3SaYBfRJXOXnwO_8ID31-Ja20aQmomiz1eEc,84
5
+ opendataloader_pdf/wrapper.py,sha256=Dsvw5un_HROLcy2xX0WqoKKRnOjL081LEYC6YfpViLE,7331
5
6
  opendataloader_pdf/THIRD_PARTY/THIRD_PARTY_LICENSES.md,sha256=QRYYiXFS2zBDGdmWRo_SrRfGhrdRBwhiRo1SdUKfrQo,11235
6
7
  opendataloader_pdf/THIRD_PARTY/THIRD_PARTY_NOTICES.md,sha256=pB2ZitFM1u0x3rIDpMHsLxOe4OFNCZRqkzeR-bfpFzE,8911
7
8
  opendataloader_pdf/THIRD_PARTY/licenses/Apache-2.0.txt,sha256=z8d0m5b2O9McPEK1xHG_dWgUBT6EfBDz6wA0F7xSPTA,11358
@@ -13,8 +14,9 @@ opendataloader_pdf/THIRD_PARTY/licenses/LICENSE-JJ2000.txt,sha256=itSesIy3XiNWgJ
13
14
  opendataloader_pdf/THIRD_PARTY/licenses/MIT.txt,sha256=JPCdbR3BU0uO_KypOd3sGWnKwlVHGq4l0pmrjoGtop8,1078
14
15
  opendataloader_pdf/THIRD_PARTY/licenses/MPL-2.0.txt,sha256=CGF6Fx5WV7DJmRZJ8_6w6JEt2N9bu4p6zDo18fTHHRw,15818
15
16
  opendataloader_pdf/THIRD_PARTY/licenses/Plexus Classworlds License.txt,sha256=ZQuKXwVz4FeC34ApB20vYg8kPTwgIUKRzEk5ew74-hU,1937
16
- opendataloader_pdf/jar/opendataloader-pdf-cli.jar,sha256=Z9WU68Tw5ckOTgnlUPJs_Jub_C6ZGyQ-0sqjjSNMYYk,20477542
17
- opendataloader_pdf-1.0.5.dist-info/METADATA,sha256=RNIDw03Rwl4wGRSPIhbHR6VyTzhc7cnlYHEEIajZBTk,25452
18
- opendataloader_pdf-1.0.5.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
19
- opendataloader_pdf-1.0.5.dist-info/top_level.txt,sha256=xee0qFQd6HPfS50E2NLICGuR6cq9C9At5SJ81yv5HkY,19
20
- opendataloader_pdf-1.0.5.dist-info/RECORD,,
17
+ opendataloader_pdf/jar/opendataloader-pdf-cli.jar,sha256=HmcxP25ZCOJNRV9U1IXy-beAN243_iCWTBIV6JB-6S8,20477911
18
+ opendataloader_pdf-1.0.6.dist-info/METADATA,sha256=2BWSSScAW3mmpWum3N7g-01fMZITHsmQcDBqSmGSkU0,25966
19
+ opendataloader_pdf-1.0.6.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
20
+ opendataloader_pdf-1.0.6.dist-info/entry_points.txt,sha256=Tupa9pVNF6nXD9sqzCLI8PCHbSu0jKkL3SYyTkQy0dc,71
21
+ opendataloader_pdf-1.0.6.dist-info/top_level.txt,sha256=xee0qFQd6HPfS50E2NLICGuR6cq9C9At5SJ81yv5HkY,19
22
+ opendataloader_pdf-1.0.6.dist-info/RECORD,,
@@ -0,0 +1,2 @@
1
+ [console_scripts]
2
+ opendataloader-pdf = opendataloader_pdf.wrapper:main