magic-pdf 1.3.4__py3-none-any.whl → 1.3.6__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- magic_pdf/data/read_api.py +1 -1
- magic_pdf/libs/version.py +1 -1
- magic_pdf/utils/office_to_pdf.py +13 -22
- {magic_pdf-1.3.4.dist-info → magic_pdf-1.3.6.dist-info}/METADATA +1 -1
- {magic_pdf-1.3.4.dist-info → magic_pdf-1.3.6.dist-info}/RECORD +9 -9
- {magic_pdf-1.3.4.dist-info → magic_pdf-1.3.6.dist-info}/LICENSE.md +0 -0
- {magic_pdf-1.3.4.dist-info → magic_pdf-1.3.6.dist-info}/WHEEL +0 -0
- {magic_pdf-1.3.4.dist-info → magic_pdf-1.3.6.dist-info}/entry_points.txt +0 -0
- {magic_pdf-1.3.4.dist-info → magic_pdf-1.3.6.dist-info}/top_level.txt +0 -0
magic_pdf/data/read_api.py
CHANGED
@@ -116,7 +116,7 @@ def read_local_office(path: str) -> list[PymuDocDataset]:
|
|
116
116
|
shutil.rmtree(temp_dir)
|
117
117
|
return ret
|
118
118
|
|
119
|
-
def read_local_images(path: str, suffixes: list[str]=['.png', '.jpg']) -> list[ImageDataset]:
|
119
|
+
def read_local_images(path: str, suffixes: list[str]=['.png', '.jpg', '.jpeg']) -> list[ImageDataset]:
|
120
120
|
"""Read images from path or directory.
|
121
121
|
|
122
122
|
Args:
|
magic_pdf/libs/version.py
CHANGED
@@ -1 +1 @@
|
|
1
|
-
__version__ = "1.3.
|
1
|
+
__version__ = "1.3.6"
|
magic_pdf/utils/office_to_pdf.py
CHANGED
@@ -4,6 +4,8 @@ import platform
|
|
4
4
|
from pathlib import Path
|
5
5
|
import shutil
|
6
6
|
|
7
|
+
from loguru import logger
|
8
|
+
|
7
9
|
|
8
10
|
class ConvertToPdfError(Exception):
|
9
11
|
def __init__(self, msg):
|
@@ -11,35 +13,24 @@ class ConvertToPdfError(Exception):
|
|
11
13
|
super().__init__(self.msg)
|
12
14
|
|
13
15
|
|
14
|
-
# Chinese font list
|
15
|
-
REQUIRED_CHS_FONTS = ['SimSun', 'Microsoft YaHei', 'Noto Sans CJK SC']
|
16
|
-
|
17
|
-
|
18
16
|
def check_fonts_installed():
|
19
17
|
"""Check if required Chinese fonts are installed."""
|
20
18
|
system_type = platform.system()
|
21
19
|
|
22
|
-
if system_type
|
23
|
-
|
24
|
-
font_dir = Path("C:/Windows/Fonts")
|
25
|
-
installed_fonts = [f.name for f in font_dir.glob("*.ttf")]
|
26
|
-
if any(font for font in REQUIRED_CHS_FONTS if any(font in f for f in installed_fonts)):
|
27
|
-
return True
|
28
|
-
raise EnvironmentError(
|
29
|
-
f"Missing Chinese font. Please install at least one of: {', '.join(REQUIRED_CHS_FONTS)}"
|
30
|
-
)
|
20
|
+
if system_type in ['Windows', 'Darwin']:
|
21
|
+
pass
|
31
22
|
else:
|
32
|
-
# Linux
|
23
|
+
# Linux: use fc-list
|
33
24
|
try:
|
34
25
|
output = subprocess.check_output(['fc-list', ':lang=zh'], encoding='utf-8')
|
35
|
-
|
36
|
-
|
37
|
-
|
38
|
-
|
39
|
-
|
40
|
-
|
41
|
-
except Exception
|
42
|
-
|
26
|
+
if output.strip(): # 只要有任何输出(非空)
|
27
|
+
return True
|
28
|
+
else:
|
29
|
+
logger.warning(
|
30
|
+
f"No Chinese fonts were detected, the converted document may not display Chinese content properly."
|
31
|
+
)
|
32
|
+
except Exception:
|
33
|
+
pass
|
43
34
|
|
44
35
|
|
45
36
|
def get_soffice_command():
|
@@ -12,7 +12,7 @@ magic_pdf/config/ocr_content_type.py,sha256=e_7RBTdShaWvWhMO2SFou7GM521elMH_Jtn5
|
|
12
12
|
magic_pdf/data/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
13
13
|
magic_pdf/data/batch_build_dataset.py,sha256=KQoWFJDqCwRQug8-fTuciSwff58AYRjCNP6GdiDhxLI,4953
|
14
14
|
magic_pdf/data/dataset.py,sha256=2v-a7kA6dRUDQpjlAVE5We1tMATR-MYKzQCcBhNci5g,12258
|
15
|
-
magic_pdf/data/read_api.py,sha256=
|
15
|
+
magic_pdf/data/read_api.py,sha256=qcG0T4c4ME5CkSRDjX2Wa2JQ_HW4GqzXAixI3_DZG_U,5234
|
16
16
|
magic_pdf/data/schemas.py,sha256=oIUTBzK8Wq8Wuy8A_uilWAbVhucRvOs9_f3lSKYgcmQ,664
|
17
17
|
magic_pdf/data/utils.py,sha256=dNWIJECPXaakKocI4z5Tq6vhDDSnR-bVWQV7DO2w_A8,5335
|
18
18
|
magic_pdf/data/data_reader_writer/__init__.py,sha256=QtevUaeSivv9dQKi3Tomfn4Z0E4To0cB8qXTnglxaHc,705
|
@@ -52,7 +52,7 @@ magic_pdf/libs/pdf_check.py,sha256=7GWWvDR6g_rj_fE6XJlbTq5AFVX11ngRIzT0N18F214,3
|
|
52
52
|
magic_pdf/libs/pdf_image_tools.py,sha256=_au7plmKKctpPKozBumSKgP8689q4vH1mU8VMLO0IbM,2260
|
53
53
|
magic_pdf/libs/performance_stats.py,sha256=DW-c6nUTUnWKGTONRKfpucsYZm1ake016F9K7jJwbik,2136
|
54
54
|
magic_pdf/libs/safe_filename.py,sha256=ckwcM_eqoysTb5id8czp-tXq2G9da0-l3pshZDCHQtE,236
|
55
|
-
magic_pdf/libs/version.py,sha256=
|
55
|
+
magic_pdf/libs/version.py,sha256=5ZbAQtod5QalTI1C2N07edlxplzG_Q2XvGOSyOok4uA,22
|
56
56
|
magic_pdf/model/__init__.py,sha256=sa-dO2k-TLy25I2gRrzjm_cQeYfzMf-pLwBJHkIxGo0,51
|
57
57
|
magic_pdf/model/batch_analyze.py,sha256=yKhKQuZTh9GG83p61bw2BRqKMbnsjsmX73gfuTRk8xE,11272
|
58
58
|
magic_pdf/model/doc_analyze_by_custom_model.py,sha256=-cjn7DQi6kZCqVZ0IxbXuL2kmeGhSVLzLaezIHPFzMU,10317
|
@@ -194,10 +194,10 @@ magic_pdf/tools/cli_dev.py,sha256=3RbubfTIagWoFYdu8wSDanr-BJDjFGeDet55jTy7He0,39
|
|
194
194
|
magic_pdf/tools/common.py,sha256=-x0RSFr7SNbdYq7DntaLYmQmaxyF-xKSf4xMpSUTzA0,12623
|
195
195
|
magic_pdf/utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
196
196
|
magic_pdf/utils/annotations.py,sha256=82ou3uELNbQWa9hOFFkVt0gsIskAKf5msCv5J2IJ5V0,211
|
197
|
-
magic_pdf/utils/office_to_pdf.py,sha256=
|
198
|
-
magic_pdf-1.3.
|
199
|
-
magic_pdf-1.3.
|
200
|
-
magic_pdf-1.3.
|
201
|
-
magic_pdf-1.3.
|
202
|
-
magic_pdf-1.3.
|
203
|
-
magic_pdf-1.3.
|
197
|
+
magic_pdf/utils/office_to_pdf.py,sha256=bFRYe6v3-pfx5R8-bV8cmf12jPnOrYZsleKoECTXzbM,3958
|
198
|
+
magic_pdf-1.3.6.dist-info/LICENSE.md,sha256=jVa0BUaKrRH4erV2P5AeJ24I2WRv9chIGxditreJ6e0,34524
|
199
|
+
magic_pdf-1.3.6.dist-info/METADATA,sha256=WciKDOUh2xH6OdTqDPJlMlLrUhaHBseO5dycx0_7RLo,45798
|
200
|
+
magic_pdf-1.3.6.dist-info/WHEEL,sha256=oiQVh_5PnQM0E3gPdiz09WCNmwiHDMaGer_elqB3coM,92
|
201
|
+
magic_pdf-1.3.6.dist-info/entry_points.txt,sha256=wXwYke3j8fqDQTocUspL-CqDUEv3Tfcwp09fM8dZAhA,98
|
202
|
+
magic_pdf-1.3.6.dist-info/top_level.txt,sha256=J9I0AzmHWGkp9c6DL8Oe4mEx3yYphLzkRn4H25Lg1rE,10
|
203
|
+
magic_pdf-1.3.6.dist-info/RECORD,,
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|