opendataloader-pdf 1.0.4__py3-none-any.whl → 1.0.5__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of opendataloader-pdf might be problematic. Click here for more details.
- opendataloader_pdf/jar/opendataloader-pdf-cli.jar +0 -0
- opendataloader_pdf/wrapper.py +2 -19
- {opendataloader_pdf-1.0.4.dist-info → opendataloader_pdf-1.0.5.dist-info}/METADATA +1 -1
- {opendataloader_pdf-1.0.4.dist-info → opendataloader_pdf-1.0.5.dist-info}/RECORD +6 -6
- {opendataloader_pdf-1.0.4.dist-info → opendataloader_pdf-1.0.5.dist-info}/WHEEL +0 -0
- {opendataloader_pdf-1.0.4.dist-info → opendataloader_pdf-1.0.5.dist-info}/top_level.txt +0 -0
|
Binary file
|
opendataloader_pdf/wrapper.py
CHANGED
|
@@ -3,24 +3,11 @@ import sys
|
|
|
3
3
|
import importlib.resources as importlib_resources
|
|
4
4
|
import locale
|
|
5
5
|
from pathlib import Path
|
|
6
|
-
from typing import List
|
|
7
6
|
|
|
8
7
|
# The consistent name of the JAR file bundled with the package
|
|
9
8
|
_JAR_NAME = "opendataloader-pdf-cli.jar"
|
|
10
9
|
|
|
11
10
|
|
|
12
|
-
def _get_redacted_command_string(command: List[str]) -> str:
|
|
13
|
-
"""Redacts the password from a command list for safe logging."""
|
|
14
|
-
command_for_logging = list(command)
|
|
15
|
-
try:
|
|
16
|
-
password_index = command_for_logging.index("--password")
|
|
17
|
-
if password_index + 1 < len(command_for_logging):
|
|
18
|
-
command_for_logging[password_index + 1] = "[REDACTED]"
|
|
19
|
-
except ValueError:
|
|
20
|
-
pass # '--password' not in command
|
|
21
|
-
return " ".join(command_for_logging)
|
|
22
|
-
|
|
23
|
-
|
|
24
11
|
def run(
|
|
25
12
|
input_path: str,
|
|
26
13
|
output_folder: str = None,
|
|
@@ -70,6 +57,8 @@ def run(
|
|
|
70
57
|
args.extend(["--password", password])
|
|
71
58
|
if replace_invalid_chars:
|
|
72
59
|
args.extend(["--replace-invalid-chars", replace_invalid_chars])
|
|
60
|
+
if content_safety_off:
|
|
61
|
+
args.extend(["--content-safety-off", content_safety_off])
|
|
73
62
|
if generate_markdown:
|
|
74
63
|
args.append("--markdown")
|
|
75
64
|
if generate_html:
|
|
@@ -78,8 +67,6 @@ def run(
|
|
|
78
67
|
args.append("--pdf")
|
|
79
68
|
if keep_line_breaks:
|
|
80
69
|
args.append("--keep-line-breaks")
|
|
81
|
-
if content_safety_off:
|
|
82
|
-
args.append(["--content-safety-off", content_safety_off])
|
|
83
70
|
if html_in_markdown:
|
|
84
71
|
args.append("--markdown-with-html")
|
|
85
72
|
if add_image_to_markdown:
|
|
@@ -98,10 +85,6 @@ def run(
|
|
|
98
85
|
command = ["java", "-jar", str(jar_path)] + args
|
|
99
86
|
|
|
100
87
|
if debug:
|
|
101
|
-
print(
|
|
102
|
-
f"Running command: {_get_redacted_command_string(command)}",
|
|
103
|
-
file=sys.stderr,
|
|
104
|
-
)
|
|
105
88
|
process = subprocess.Popen(
|
|
106
89
|
command,
|
|
107
90
|
stdout=subprocess.PIPE,
|
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
opendataloader_pdf/LICENSE,sha256=rxdbnZbuk8IaA2FS4bkFsLlTBNSujCySHHYJEAuo334,15921
|
|
2
2
|
opendataloader_pdf/NOTICE.md,sha256=Uxc6sEbVz2hfsDinzzSNMtmsjx9HsQUod0yy0cswUwg,562
|
|
3
3
|
opendataloader_pdf/__init__.py,sha256=T5RV-dcgjNCm8klNy_EH-IgOeodcPg6Yc34HHXtuAmQ,44
|
|
4
|
-
opendataloader_pdf/wrapper.py,sha256=
|
|
4
|
+
opendataloader_pdf/wrapper.py,sha256=WL7qTsX214L0jXxlSDesYadRVpdrsLQd2Hgum5BdD1s,4962
|
|
5
5
|
opendataloader_pdf/THIRD_PARTY/THIRD_PARTY_LICENSES.md,sha256=QRYYiXFS2zBDGdmWRo_SrRfGhrdRBwhiRo1SdUKfrQo,11235
|
|
6
6
|
opendataloader_pdf/THIRD_PARTY/THIRD_PARTY_NOTICES.md,sha256=pB2ZitFM1u0x3rIDpMHsLxOe4OFNCZRqkzeR-bfpFzE,8911
|
|
7
7
|
opendataloader_pdf/THIRD_PARTY/licenses/Apache-2.0.txt,sha256=z8d0m5b2O9McPEK1xHG_dWgUBT6EfBDz6wA0F7xSPTA,11358
|
|
@@ -13,8 +13,8 @@ opendataloader_pdf/THIRD_PARTY/licenses/LICENSE-JJ2000.txt,sha256=itSesIy3XiNWgJ
|
|
|
13
13
|
opendataloader_pdf/THIRD_PARTY/licenses/MIT.txt,sha256=JPCdbR3BU0uO_KypOd3sGWnKwlVHGq4l0pmrjoGtop8,1078
|
|
14
14
|
opendataloader_pdf/THIRD_PARTY/licenses/MPL-2.0.txt,sha256=CGF6Fx5WV7DJmRZJ8_6w6JEt2N9bu4p6zDo18fTHHRw,15818
|
|
15
15
|
opendataloader_pdf/THIRD_PARTY/licenses/Plexus Classworlds License.txt,sha256=ZQuKXwVz4FeC34ApB20vYg8kPTwgIUKRzEk5ew74-hU,1937
|
|
16
|
-
opendataloader_pdf/jar/opendataloader-pdf-cli.jar,sha256=
|
|
17
|
-
opendataloader_pdf-1.0.
|
|
18
|
-
opendataloader_pdf-1.0.
|
|
19
|
-
opendataloader_pdf-1.0.
|
|
20
|
-
opendataloader_pdf-1.0.
|
|
16
|
+
opendataloader_pdf/jar/opendataloader-pdf-cli.jar,sha256=Z9WU68Tw5ckOTgnlUPJs_Jub_C6ZGyQ-0sqjjSNMYYk,20477542
|
|
17
|
+
opendataloader_pdf-1.0.5.dist-info/METADATA,sha256=RNIDw03Rwl4wGRSPIhbHR6VyTzhc7cnlYHEEIajZBTk,25452
|
|
18
|
+
opendataloader_pdf-1.0.5.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
|
|
19
|
+
opendataloader_pdf-1.0.5.dist-info/top_level.txt,sha256=xee0qFQd6HPfS50E2NLICGuR6cq9C9At5SJ81yv5HkY,19
|
|
20
|
+
opendataloader_pdf-1.0.5.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|