magic-pdf 1.3.8__py3-none-any.whl → 1.3.10__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -5,6 +5,7 @@ from loguru import logger
5
5
  from magic_pdf.config.make_content_config import DropMode, MakeMode
6
6
  from magic_pdf.config.ocr_content_type import BlockType, ContentType
7
7
  from magic_pdf.libs.commons import join_path
8
+ from magic_pdf.libs.config_reader import get_latex_delimiter_config
8
9
  from magic_pdf.libs.language import detect_lang
9
10
  from magic_pdf.libs.markdown_utils import ocr_escape_special_markdown_char
10
11
  from magic_pdf.post_proc.para_split_v3 import ListLineTag
@@ -145,6 +146,19 @@ def full_to_half(text: str) -> str:
145
146
  result.append(char)
146
147
  return ''.join(result)
147
148
 
149
+ latex_delimiters_config = get_latex_delimiter_config()
150
+
151
+ default_delimiters = {
152
+ 'display': {'left': '$$', 'right': '$$'},
153
+ 'inline': {'left': '$', 'right': '$'}
154
+ }
155
+
156
+ delimiters = latex_delimiters_config if latex_delimiters_config else default_delimiters
157
+
158
+ display_left_delimiter = delimiters['display']['left']
159
+ display_right_delimiter = delimiters['display']['right']
160
+ inline_left_delimiter = delimiters['inline']['left']
161
+ inline_right_delimiter = delimiters['inline']['right']
148
162
 
149
163
  def merge_para_with_text(para_block):
150
164
  block_text = ''
@@ -168,9 +182,9 @@ def merge_para_with_text(para_block):
168
182
  if span_type == ContentType.Text:
169
183
  content = ocr_escape_special_markdown_char(span['content'])
170
184
  elif span_type == ContentType.InlineEquation:
171
- content = f"${span['content']}$"
185
+ content = f"{inline_left_delimiter}{span['content']}{inline_right_delimiter}"
172
186
  elif span_type == ContentType.InterlineEquation:
173
- content = f"\n$$\n{span['content']}\n$$\n"
187
+ content = f"\n{display_left_delimiter}\n{span['content']}\n{display_right_delimiter}\n"
174
188
 
175
189
  content = content.strip()
176
190
 
@@ -125,6 +125,15 @@ def get_llm_aided_config():
125
125
  else:
126
126
  return llm_aided_config
127
127
 
128
+ def get_latex_delimiter_config():
129
+ config = read_config()
130
+ latex_delimiter_config = config.get('latex-delimiter-config')
131
+ if latex_delimiter_config is None:
132
+ logger.warning(f"'latex-delimiter-config' not found in {CONFIG_FILE_NAME}, use 'None' as default")
133
+ return None
134
+ else:
135
+ return latex_delimiter_config
136
+
128
137
 
129
138
  if __name__ == '__main__':
130
139
  ak, sk, endpoint = get_s3_config('llm-raw')
magic_pdf/libs/version.py CHANGED
@@ -1 +1 @@
1
- __version__ = "1.3.8"
1
+ __version__ = "1.3.10"
@@ -5,6 +5,7 @@ from typing import Optional
5
5
 
6
6
  import torch
7
7
  from ftfy import fix_text
8
+ from loguru import logger
8
9
 
9
10
  from transformers import AutoConfig, AutoModel, AutoModelForCausalLM, AutoTokenizer, PretrainedConfig, PreTrainedModel
10
11
  from transformers import VisionEncoderDecoderConfig, VisionEncoderDecoderModel
@@ -57,22 +58,319 @@ class TokenizerWrapper:
57
58
  return toks
58
59
 
59
60
 
60
- def latex_rm_whitespace(s: str):
61
- """Remove unnecessary whitespace from LaTeX code.
61
+ LEFT_PATTERN = re.compile(r'(\\left)(\S*)')
62
+ RIGHT_PATTERN = re.compile(r'(\\right)(\S*)')
63
+ LEFT_COUNT_PATTERN = re.compile(r'\\left(?![a-zA-Z])')
64
+ RIGHT_COUNT_PATTERN = re.compile(r'\\right(?![a-zA-Z])')
65
+ LEFT_RIGHT_REMOVE_PATTERN = re.compile(r'\\left\.?|\\right\.?')
66
+
67
+ def fix_latex_left_right(s):
68
+ """
69
+ 修复LaTeX中的\left和\right命令
70
+ 1. 确保它们后面跟有效分隔符
71
+ 2. 平衡\left和\right的数量
72
+ """
73
+ # 白名单分隔符
74
+ valid_delims_list = [r'(', r')', r'[', r']', r'{', r'}', r'/', r'|',
75
+ r'\{', r'\}', r'\lceil', r'\rceil', r'\lfloor',
76
+ r'\rfloor', r'\backslash', r'\uparrow', r'\downarrow',
77
+ r'\Uparrow', r'\Downarrow', r'\|', r'\.']
78
+
79
+ # 为\left后缺失有效分隔符的情况添加点
80
+ def fix_delim(match, is_left=True):
81
+ cmd = match.group(1) # \left 或 \right
82
+ rest = match.group(2) if len(match.groups()) > 1 else ""
83
+ if not rest or rest not in valid_delims_list:
84
+ return cmd + "."
85
+ return match.group(0)
86
+
87
+ # 使用更精确的模式匹配\left和\right命令
88
+ # 确保它们是独立的命令,不是其他命令的一部分
89
+ # 使用预编译正则和统一回调函数
90
+ s = LEFT_PATTERN.sub(lambda m: fix_delim(m, True), s)
91
+ s = RIGHT_PATTERN.sub(lambda m: fix_delim(m, False), s)
92
+
93
+ # 更精确地计算\left和\right的数量
94
+ left_count = len(LEFT_COUNT_PATTERN.findall(s)) # 不匹配\lefteqn等
95
+ right_count = len(RIGHT_COUNT_PATTERN.findall(s)) # 不匹配\rightarrow等
96
+
97
+ if left_count == right_count:
98
+ # 如果数量相等,检查是否在同一组
99
+ return fix_left_right_pairs(s)
100
+ else:
101
+ # 如果数量不等,移除所有\left和\right
102
+ # logger.debug(f"latex:{s}")
103
+ # logger.warning(f"left_count: {left_count}, right_count: {right_count}")
104
+ return LEFT_RIGHT_REMOVE_PATTERN.sub('', s)
105
+
106
+
107
+ def fix_left_right_pairs(latex_formula):
62
108
  """
63
- text_reg = r'(\\(operatorname|mathrm|text|mathbf)\s?\*? {.*?})'
64
- letter = r'[a-zA-Z]'
65
- noletter = r'[\W_^\d]'
66
- names = [x[0].replace(' ', '') for x in re.findall(text_reg, s)]
67
- s = re.sub(text_reg, lambda _: str(names.pop(0)), s)
68
- news = s
69
- while True:
70
- s = news
71
- news = re.sub(r'(?!\\ )(%s)\s+?(%s)' % (noletter, noletter), r'\1\2', s)
72
- news = re.sub(r'(?!\\ )(%s)\s+?(%s)' % (noletter, letter), r'\1\2', news)
73
- news = re.sub(r'(%s)\s+?(%s)' % (letter, noletter), r'\1\2', news)
74
- if news == s:
75
- break
109
+ 检测并修复LaTeX公式中\left和\right不在同一组的情况
110
+
111
+ Args:
112
+ latex_formula (str): 输入的LaTeX公式
113
+
114
+ Returns:
115
+ str: 修复后的LaTeX公式
116
+ """
117
+ # 用于跟踪花括号嵌套层级
118
+ brace_stack = []
119
+ # 用于存储\left信息: (位置, 深度, 分隔符)
120
+ left_stack = []
121
+ # 存储需要调整的\right信息: (开始位置, 结束位置, 目标位置)
122
+ adjustments = []
123
+
124
+ i = 0
125
+ while i < len(latex_formula):
126
+ # 检查是否是转义字符
127
+ if i > 0 and latex_formula[i - 1] == '\\':
128
+ backslash_count = 0
129
+ j = i - 1
130
+ while j >= 0 and latex_formula[j] == '\\':
131
+ backslash_count += 1
132
+ j -= 1
133
+
134
+ if backslash_count % 2 == 1:
135
+ i += 1
136
+ continue
137
+
138
+ # 检测\left命令
139
+ if i + 5 < len(latex_formula) and latex_formula[i:i + 5] == "\\left" and i + 5 < len(latex_formula):
140
+ delimiter = latex_formula[i + 5]
141
+ left_stack.append((i, len(brace_stack), delimiter))
142
+ i += 6 # 跳过\left和分隔符
143
+ continue
144
+
145
+ # 检测\right命令
146
+ elif i + 6 < len(latex_formula) and latex_formula[i:i + 6] == "\\right" and i + 6 < len(latex_formula):
147
+ delimiter = latex_formula[i + 6]
148
+
149
+ if left_stack:
150
+ left_pos, left_depth, left_delim = left_stack.pop()
151
+
152
+ # 如果\left和\right不在同一花括号深度
153
+ if left_depth != len(brace_stack):
154
+ # 找到\left所在花括号组的结束位置
155
+ target_pos = find_group_end(latex_formula, left_pos, left_depth)
156
+ if target_pos != -1:
157
+ # 记录需要移动的\right
158
+ adjustments.append((i, i + 7, target_pos))
159
+
160
+ i += 7 # 跳过\right和分隔符
161
+ continue
162
+
163
+ # 处理花括号
164
+ if latex_formula[i] == '{':
165
+ brace_stack.append(i)
166
+ elif latex_formula[i] == '}':
167
+ if brace_stack:
168
+ brace_stack.pop()
169
+
170
+ i += 1
171
+
172
+ # 应用调整,从后向前处理以避免索引变化
173
+ if not adjustments:
174
+ return latex_formula
175
+
176
+ result = list(latex_formula)
177
+ adjustments.sort(reverse=True, key=lambda x: x[0])
178
+
179
+ for start, end, target in adjustments:
180
+ # 提取\right部分
181
+ right_part = result[start:end]
182
+ # 从原位置删除
183
+ del result[start:end]
184
+ # 在目标位置插入
185
+ result.insert(target, ''.join(right_part))
186
+
187
+ return ''.join(result)
188
+
189
+
190
+ def find_group_end(text, pos, depth):
191
+ """查找特定深度的花括号组的结束位置"""
192
+ current_depth = depth
193
+ i = pos
194
+
195
+ while i < len(text):
196
+ if text[i] == '{' and (i == 0 or not is_escaped(text, i)):
197
+ current_depth += 1
198
+ elif text[i] == '}' and (i == 0 or not is_escaped(text, i)):
199
+ current_depth -= 1
200
+ if current_depth < depth:
201
+ return i
202
+ i += 1
203
+
204
+ return -1 # 未找到对应结束位置
205
+
206
+
207
+ def is_escaped(text, pos):
208
+ """检查字符是否被转义"""
209
+ backslash_count = 0
210
+ j = pos - 1
211
+ while j >= 0 and text[j] == '\\':
212
+ backslash_count += 1
213
+ j -= 1
214
+
215
+ return backslash_count % 2 == 1
216
+
217
+
218
+ def fix_unbalanced_braces(latex_formula):
219
+ """
220
+ 检测LaTeX公式中的花括号是否闭合,并删除无法配对的花括号
221
+
222
+ Args:
223
+ latex_formula (str): 输入的LaTeX公式
224
+
225
+ Returns:
226
+ str: 删除无法配对的花括号后的LaTeX公式
227
+ """
228
+ stack = [] # 存储左括号的索引
229
+ unmatched = set() # 存储不匹配括号的索引
230
+ i = 0
231
+
232
+ while i < len(latex_formula):
233
+ # 检查是否是转义的花括号
234
+ if latex_formula[i] in ['{', '}']:
235
+ # 计算前面连续的反斜杠数量
236
+ backslash_count = 0
237
+ j = i - 1
238
+ while j >= 0 and latex_formula[j] == '\\':
239
+ backslash_count += 1
240
+ j -= 1
241
+
242
+ # 如果前面有奇数个反斜杠,则该花括号是转义的,不参与匹配
243
+ if backslash_count % 2 == 1:
244
+ i += 1
245
+ continue
246
+
247
+ # 否则,该花括号参与匹配
248
+ if latex_formula[i] == '{':
249
+ stack.append(i)
250
+ else: # latex_formula[i] == '}'
251
+ if stack: # 有对应的左括号
252
+ stack.pop()
253
+ else: # 没有对应的左括号
254
+ unmatched.add(i)
255
+
256
+ i += 1
257
+
258
+ # 所有未匹配的左括号
259
+ unmatched.update(stack)
260
+
261
+ # 构建新字符串,删除不匹配的括号
262
+ return ''.join(char for i, char in enumerate(latex_formula) if i not in unmatched)
263
+
264
+
265
+ def process_latex(input_string):
266
+ """
267
+ 处理LaTeX公式中的反斜杠:
268
+ 1. 如果\后跟特殊字符(#$%&~_^\\{})或空格,保持不变
269
+ 2. 如果\后跟两个小写字母,保持不变
270
+ 3. 其他情况,在\后添加空格
271
+
272
+ Args:
273
+ input_string (str): 输入的LaTeX公式
274
+
275
+ Returns:
276
+ str: 处理后的LaTeX公式
277
+ """
278
+
279
+ def replace_func(match):
280
+ # 获取\后面的字符
281
+ next_char = match.group(1)
282
+
283
+ # 如果是特殊字符或空格,保持不变
284
+ if next_char in "#$%&~_^|\\{} \t\n\r\v\f":
285
+ return match.group(0)
286
+
287
+ # 如果是字母,检查下一个字符
288
+ if 'a' <= next_char <= 'z' or 'A' <= next_char <= 'Z':
289
+ pos = match.start() + 2 # \x后的位置
290
+ if pos < len(input_string) and ('a' <= input_string[pos] <= 'z' or 'A' <= input_string[pos] <= 'Z'):
291
+ # 下一个字符也是字母,保持不变
292
+ return match.group(0)
293
+
294
+ # 其他情况,在\后添加空格
295
+ return '\\' + ' ' + next_char
296
+
297
+ # 匹配\后面跟一个字符的情况
298
+ pattern = r'\\(.)'
299
+
300
+ return re.sub(pattern, replace_func, input_string)
301
+
302
+ # 常见的在KaTeX/MathJax中可用的数学环境
303
+ ENV_TYPES = ['array', 'matrix', 'pmatrix', 'bmatrix', 'vmatrix',
304
+ 'Bmatrix', 'Vmatrix', 'cases', 'aligned', 'gathered']
305
+ ENV_BEGIN_PATTERNS = {env: re.compile(r'\\begin\{' + env + r'\}') for env in ENV_TYPES}
306
+ ENV_END_PATTERNS = {env: re.compile(r'\\end\{' + env + r'\}') for env in ENV_TYPES}
307
+ ENV_FORMAT_PATTERNS = {env: re.compile(r'\\begin\{' + env + r'\}\{([^}]*)\}') for env in ENV_TYPES}
308
+
309
+ def fix_latex_environments(s):
310
+ """
311
+ 检测LaTeX中环境(如array)的\begin和\end是否匹配
312
+ 1. 如果缺少\begin标签则在开头添加
313
+ 2. 如果缺少\end标签则在末尾添加
314
+ """
315
+ for env in ENV_TYPES:
316
+ begin_count = len(ENV_BEGIN_PATTERNS[env].findall(s))
317
+ end_count = len(ENV_END_PATTERNS[env].findall(s))
318
+
319
+ if begin_count != end_count:
320
+ if end_count > begin_count:
321
+ format_match = ENV_FORMAT_PATTERNS[env].search(s)
322
+ default_format = '{c}' if env == 'array' else ''
323
+ format_str = '{' + format_match.group(1) + '}' if format_match else default_format
324
+
325
+ missing_count = end_count - begin_count
326
+ begin_command = '\\begin{' + env + '}' + format_str + ' '
327
+ s = begin_command * missing_count + s
328
+ else:
329
+ missing_count = begin_count - end_count
330
+ s = s + (' \\end{' + env + '}') * missing_count
331
+
332
+ return s
333
+
334
+
335
+ UP_PATTERN = re.compile(r'\\up([a-zA-Z]+)')
336
+ COMMANDS_TO_REMOVE_PATTERN = re.compile(
337
+ r'\\(?:lefteqn|boldmath|ensuremath|centering|textsubscript|sides|textsl|textcent|emph)')
338
+ REPLACEMENTS_PATTERNS = {
339
+ re.compile(r'\\underbar'): r'\\underline',
340
+ re.compile(r'\\Bar'): r'\\hat',
341
+ re.compile(r'\\Hat'): r'\\hat',
342
+ re.compile(r'\\Tilde'): r'\\tilde',
343
+ re.compile(r'\\slash'): r'/',
344
+ re.compile(r'\\textperthousand'): r'‰',
345
+ re.compile(r'\\sun'): r'☉',
346
+ re.compile(r'\\textunderscore'): r'\\_',
347
+ re.compile(r'\\fint'): r'⨏',
348
+ re.compile(r'\\up '): r'\\ ',
349
+ }
350
+ QQUAD_PATTERN = re.compile(r'\\qquad(?!\s)')
351
+
352
+ def latex_rm_whitespace(s: str):
353
+ """Remove unnecessary whitespace from LaTeX code."""
354
+ s = fix_unbalanced_braces(s)
355
+ s = fix_latex_left_right(s)
356
+ s = fix_latex_environments(s)
357
+
358
+ # 使用预编译的正则表达式
359
+ s = UP_PATTERN.sub(
360
+ lambda m: m.group(0) if m.group(1) in ["arrow", "downarrow", "lus", "silon"] else f"\\{m.group(1)}", s
361
+ )
362
+ s = COMMANDS_TO_REMOVE_PATTERN.sub('', s)
363
+
364
+ # 应用所有替换
365
+ for pattern, replacement in REPLACEMENTS_PATTERNS.items():
366
+ s = pattern.sub(replacement, s)
367
+
368
+ # 处理LaTeX中的反斜杠和空格
369
+ s = process_latex(s)
370
+
371
+ # \qquad后补空格
372
+ s = QQUAD_PATTERN.sub(r'\\qquad ', s)
373
+
76
374
  return s
77
375
 
78
376
 
@@ -172,8 +172,8 @@ def filter_nested_tables(table_res_list, overlap_threshold=0.8, area_threshold=0
172
172
  tables_inside = [j for j in range(len(table_res_list))
173
173
  if i != j and is_inside(table_info[j], table_info[i], overlap_threshold)]
174
174
 
175
- # Continue if there are at least 2 tables inside
176
- if len(tables_inside) >= 2:
175
+ # Continue if there are at least 3 tables inside
176
+ if len(tables_inside) >= 3:
177
177
  # Check if inside tables overlap with each other
178
178
  tables_overlap = any(do_overlap(table_info[tables_inside[idx1]], table_info[tables_inside[idx2]])
179
179
  for idx1 in range(len(tables_inside))
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: magic-pdf
3
- Version: 1.3.8
3
+ Version: 1.3.10
4
4
  Summary: A practical tool for converting PDF to Markdown
5
5
  License: AGPL-3.0
6
6
  Project-URL: Home, https://mineru.net/
@@ -20,7 +20,7 @@ Requires-Dist: click >=8.1.7
20
20
  Requires-Dist: fast-langdetect <0.3.0,>=0.2.3
21
21
  Requires-Dist: loguru >=0.6.0
22
22
  Requires-Dist: numpy >=1.21.6
23
- Requires-Dist: pdfminer.six ==20231228
23
+ Requires-Dist: pdfminer.six ==20250324
24
24
  Requires-Dist: pydantic <2.11,>=2.7.2
25
25
  Requires-Dist: scikit-learn >=1.0.2
26
26
  Requires-Dist: torch !=2.5.0,!=2.5.1,>=2.2.2
@@ -107,7 +107,13 @@ Easier to use: Just grab MinerU Desktop. No coding, no login, just a simple inte
107
107
  </div>
108
108
 
109
109
  # Changelog
110
- - Released on 2025/04/23, version 1.3.8
110
+ - 2025/04/29 1.3.10 Released
111
+ - Support for custom formula delimiters can be achieved by modifying the `latex-delimiter-config` item in the `magic-pdf.json` file under the user directory.
112
+ - Pinned `pdfminer.six` to version `20250324` to prevent parsing failures caused by new versions.
113
+ - 2025/04/27 1.3.9 Released
114
+ - Optimized the formula parsing function to improve the success rate of formula rendering
115
+ - Updated `pdfminer.six` to the latest version, fixing some abnormal PDF parsing issues
116
+ - 2025/04/23 1.3.8 Released
111
117
  - The default `ocr` model (`ch`) has been updated to `PP-OCRv4_server_rec_doc` (model update required)
112
118
  - `PP-OCRv4_server_rec_doc` is trained on a mix of more Chinese document data and PP-OCR training data, enhancing recognition capabilities for some traditional Chinese characters, Japanese, and special characters. It supports over 15,000 recognizable characters, improving text recognition in documents while also boosting general text recognition.
113
119
  - [Performance comparison between PP-OCRv4_server_rec_doc, PP-OCRv4_server_rec, and PP-OCRv4_mobile_rec](https://paddlepaddle.github.io/PaddleX/latest/en/module_usage/tutorials/ocr_modules/text_recognition.html#ii-supported-model-list)
@@ -433,7 +439,7 @@ There are three different ways to experience MinerU:
433
439
  <td colspan="2">GPU VRAM 6GB or more</td>
434
440
  <td colspan="2">All GPUs with Tensor Cores produced from Volta(2017) onwards.<br>
435
441
  More than 6GB VRAM </td>
436
- <td rowspan="2">apple slicon</td>
442
+ <td rowspan="2">Apple silicon</td>
437
443
  </tr>
438
444
  </table>
439
445
 
@@ -25,7 +25,7 @@ magic_pdf/data/io/base.py,sha256=SqNQqe30ZvoVvg7GVv-hLMCjN6yBgDyQQWeLgGsTfhQ,111
25
25
  magic_pdf/data/io/http.py,sha256=XlKB0DNf4a_uUnfgcclvaaOtmE7lmddx0DnK8A-emAM,958
26
26
  magic_pdf/data/io/s3.py,sha256=hyA7sbNriQy64xd_uyJ7acN_oneQ1Pdmoc7_xcvkue8,3606
27
27
  magic_pdf/dict2md/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
28
- magic_pdf/dict2md/ocr_mkcontent.py,sha256=U4DqKfD4dJ2S5Z8NEAGhuLYkEOIeC-BWuArMbwi7BJs,13784
28
+ magic_pdf/dict2md/ocr_mkcontent.py,sha256=jRGoSNeR3XBgzGhKdQ25CmsdW0pi7NA-5NY3TB2pja0,14421
29
29
  magic_pdf/filter/__init__.py,sha256=_7lSez_myu4b6cdzPpQ-NfREuqeBSq_QdyBPKVLyq2U,1505
30
30
  magic_pdf/filter/pdf_classify_by_type.py,sha256=YNYXamxYgEiSujwilCNHOtrwpgJGDiQ597qJfardDVc,42354
31
31
  magic_pdf/filter/pdf_meta_scan.py,sha256=eOuM0-JgaXvHolSgepGoNDJDmv_uITWLQpH_0MfnVQw,17478
@@ -38,7 +38,7 @@ magic_pdf/libs/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
38
38
  magic_pdf/libs/boxbase.py,sha256=DKZXhwpJd-HE2_Du7NmkeeYW5gG-iwX3GeSWL7rYGv0,16956
39
39
  magic_pdf/libs/clean_memory.py,sha256=OsQexCjmBO2i-Hv-0uYQfn72dbUWR8sTW81nG2zlNQQ,479
40
40
  magic_pdf/libs/commons.py,sha256=xD0fGA16KNB5rhbl4zRrOqdrNHYwaRablT_s9W2ZTbw,1174
41
- magic_pdf/libs/config_reader.py,sha256=9GXK7jtDyA_jxXKWZAQ69rB02v5UW4mOmo1IaOYOkW0,4374
41
+ magic_pdf/libs/config_reader.py,sha256=Z8C5o2uYfByB0Sj-jpgzu6VRobNp0y2gTheVXWkZV_0,4716
42
42
  magic_pdf/libs/convert_utils.py,sha256=Ov-lsfCLBPz_15iSJXIslBNmrSf_E_1g_XDWJy8NgO8,143
43
43
  magic_pdf/libs/coordinate_transform.py,sha256=Bbop2cP2uz2ZG0U0gwd7J6EKkgABq5Rv03qf2LMPw80,429
44
44
  magic_pdf/libs/draw_bbox.py,sha256=hpUmpPiQVu7UgWQa3M49dS22G6A9gcG2jpq4dQjTjzA,18331
@@ -52,7 +52,7 @@ magic_pdf/libs/pdf_check.py,sha256=7GWWvDR6g_rj_fE6XJlbTq5AFVX11ngRIzT0N18F214,3
52
52
  magic_pdf/libs/pdf_image_tools.py,sha256=_au7plmKKctpPKozBumSKgP8689q4vH1mU8VMLO0IbM,2260
53
53
  magic_pdf/libs/performance_stats.py,sha256=DW-c6nUTUnWKGTONRKfpucsYZm1ake016F9K7jJwbik,2136
54
54
  magic_pdf/libs/safe_filename.py,sha256=ckwcM_eqoysTb5id8czp-tXq2G9da0-l3pshZDCHQtE,236
55
- magic_pdf/libs/version.py,sha256=47xEhOdVR5Y8-pZH8aVP6Z2UhhY8jGWTQ-rJHt5fIeU,22
55
+ magic_pdf/libs/version.py,sha256=4o4BxiWDvKULo_NByGymiLj9KXGht1PsOBGUMmasvxM,23
56
56
  magic_pdf/model/__init__.py,sha256=sa-dO2k-TLy25I2gRrzjm_cQeYfzMf-pLwBJHkIxGo0,51
57
57
  magic_pdf/model/batch_analyze.py,sha256=F0WsjbQ6z9txdiUiVy6n6zhyJWJ-4moljNx8fe8HFws,10977
58
58
  magic_pdf/model/doc_analyze_by_custom_model.py,sha256=-cjn7DQi6kZCqVZ0IxbXuL2kmeGhSVLzLaezIHPFzMU,10317
@@ -62,7 +62,7 @@ magic_pdf/model/pdf_extract_kit.py,sha256=C3sKqRkoD20Ldmo-cqGn1zRldEL-l5NYqcFvd0
62
62
  magic_pdf/model/pp_structure_v2.py,sha256=NcqFWL4nUtjl82MFak8HX_8V3i4Aw_fK4dATrIp5uGs,3840
63
63
  magic_pdf/model/sub_modules/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
64
64
  magic_pdf/model/sub_modules/model_init.py,sha256=e2t95kxiuU47luOHByDokbQ2uob6oQhEA4b6UGVfMjY,8303
65
- magic_pdf/model/sub_modules/model_utils.py,sha256=HKRC9ubCs6O0nNqaztrZO0YKuFpRhs0LKWOaeZfDrTw,12166
65
+ magic_pdf/model/sub_modules/model_utils.py,sha256=Md5yOki9uqW31sWIi7AKRwAJNKnCJBVSfQx6LXRKngs,12166
66
66
  magic_pdf/model/sub_modules/language_detection/__init__.py,sha256=8CRrCQVuExa0BttRFh3Z40lFy2K5jN0sp67KWjOlj5c,50
67
67
  magic_pdf/model/sub_modules/language_detection/utils.py,sha256=Q__v6DdNJztt8GhVSuSB0txahVq-aj8RLhWn2VScx4w,3047
68
68
  magic_pdf/model/sub_modules/language_detection/yolov11/YOLOv11.py,sha256=7T8eFl8zlZe6F0j0jSB3jrwOapDft320JQ1fuWxpvAY,5230
@@ -97,7 +97,7 @@ magic_pdf/model/sub_modules/mfr/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm
97
97
  magic_pdf/model/sub_modules/mfr/unimernet/Unimernet.py,sha256=hq48TP1Ac8Y4FlK7GamnU-WZTQfdZotxBKuFhOIjrcM,5349
98
98
  magic_pdf/model/sub_modules/mfr/unimernet/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
99
99
  magic_pdf/model/sub_modules/mfr/unimernet/unimernet_hf/__init__.py,sha256=kHcISG8GS4TWJW34SCJCei1jxo6HxvO00aC0dqyNFgI,413
100
- magic_pdf/model/sub_modules/mfr/unimernet/unimernet_hf/modeling_unimernet.py,sha256=F_rwlFytWOwOntNhhZoUqFAyXgzvq_HVjKyBizwlnjo,7913
100
+ magic_pdf/model/sub_modules/mfr/unimernet/unimernet_hf/modeling_unimernet.py,sha256=BEZhsW6TqVXDNOgbvcW_0XMtYYiR3hFjpABRhKZgZC8,18101
101
101
  magic_pdf/model/sub_modules/mfr/unimernet/unimernet_hf/unimer_mbart/__init__.py,sha256=9T2rBpyGX5YFQYj89-mWujRokOuz4xgNreBuegcg1_c,228
102
102
  magic_pdf/model/sub_modules/mfr/unimernet/unimernet_hf/unimer_mbart/configuration_unimer_mbart.py,sha256=hwRar3pqN_cVs3TRTNSuhB4wacBncfJ-qvaTajRb0xc,7934
103
103
  magic_pdf/model/sub_modules/mfr/unimernet/unimernet_hf/unimer_mbart/modeling_unimer_mbart.py,sha256=Y5nwqDRzSb8zAbY1U0HGJGVtFggJW1zATP8RdnasNcA,113605
@@ -196,9 +196,9 @@ magic_pdf/tools/common.py,sha256=-x0RSFr7SNbdYq7DntaLYmQmaxyF-xKSf4xMpSUTzA0,126
196
196
  magic_pdf/utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
197
197
  magic_pdf/utils/annotations.py,sha256=82ou3uELNbQWa9hOFFkVt0gsIskAKf5msCv5J2IJ5V0,211
198
198
  magic_pdf/utils/office_to_pdf.py,sha256=bFRYe6v3-pfx5R8-bV8cmf12jPnOrYZsleKoECTXzbM,3958
199
- magic_pdf-1.3.8.dist-info/LICENSE.md,sha256=jVa0BUaKrRH4erV2P5AeJ24I2WRv9chIGxditreJ6e0,34524
200
- magic_pdf-1.3.8.dist-info/METADATA,sha256=8DvNpL0env37CB35wE7uErGK_mxx3LVSC6Qgr1kttEM,47408
201
- magic_pdf-1.3.8.dist-info/WHEEL,sha256=oiQVh_5PnQM0E3gPdiz09WCNmwiHDMaGer_elqB3coM,92
202
- magic_pdf-1.3.8.dist-info/entry_points.txt,sha256=wXwYke3j8fqDQTocUspL-CqDUEv3Tfcwp09fM8dZAhA,98
203
- magic_pdf-1.3.8.dist-info/top_level.txt,sha256=J9I0AzmHWGkp9c6DL8Oe4mEx3yYphLzkRn4H25Lg1rE,10
204
- magic_pdf-1.3.8.dist-info/RECORD,,
199
+ magic_pdf-1.3.10.dist-info/LICENSE.md,sha256=jVa0BUaKrRH4erV2P5AeJ24I2WRv9chIGxditreJ6e0,34524
200
+ magic_pdf-1.3.10.dist-info/METADATA,sha256=_Z35A_31Utec2rGaKDxxpZLjtG2uZywgnJrjNRUF__w,47901
201
+ magic_pdf-1.3.10.dist-info/WHEEL,sha256=oiQVh_5PnQM0E3gPdiz09WCNmwiHDMaGer_elqB3coM,92
202
+ magic_pdf-1.3.10.dist-info/entry_points.txt,sha256=wXwYke3j8fqDQTocUspL-CqDUEv3Tfcwp09fM8dZAhA,98
203
+ magic_pdf-1.3.10.dist-info/top_level.txt,sha256=J9I0AzmHWGkp9c6DL8Oe4mEx3yYphLzkRn4H25Lg1rE,10
204
+ magic_pdf-1.3.10.dist-info/RECORD,,