magic-pdf 0.10.1__py3-none-any.whl → 0.10.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (79) hide show
  1. magic_pdf/dict2md/ocr_mkcontent.py +16 -22
  2. magic_pdf/filter/pdf_meta_scan.py +5 -19
  3. magic_pdf/libs/commons.py +0 -161
  4. magic_pdf/libs/draw_bbox.py +2 -3
  5. magic_pdf/libs/markdown_utils.py +0 -21
  6. magic_pdf/libs/pdf_check.py +52 -25
  7. magic_pdf/libs/pdf_image_tools.py +2 -1
  8. magic_pdf/libs/version.py +1 -1
  9. magic_pdf/model/doc_analyze_by_custom_model.py +2 -2
  10. magic_pdf/model/magic_model.py +0 -30
  11. magic_pdf/model/pp_structure_v2.py +23 -3
  12. magic_pdf/model/sub_modules/ocr/paddleocr/ocr_utils.py +50 -29
  13. magic_pdf/model/sub_modules/ocr/paddleocr/ppocr_273_mod.py +11 -9
  14. magic_pdf/model/sub_modules/table/tablemaster/tablemaster_paddle.py +2 -2
  15. magic_pdf/para/para_split_v3.py +21 -7
  16. magic_pdf/pdf_parse_union_core_v2.py +134 -146
  17. magic_pdf/pre_proc/construct_page_dict.py +0 -55
  18. magic_pdf/pre_proc/cut_image.py +0 -37
  19. magic_pdf/pre_proc/ocr_detect_all_bboxes.py +5 -178
  20. magic_pdf/pre_proc/ocr_dict_merge.py +1 -224
  21. magic_pdf/pre_proc/ocr_span_list_modify.py +2 -252
  22. magic_pdf/rw/S3ReaderWriter.py +1 -1
  23. {magic_pdf-0.10.1.dist-info → magic_pdf-0.10.3.dist-info}/METADATA +3 -78
  24. {magic_pdf-0.10.1.dist-info → magic_pdf-0.10.3.dist-info}/RECORD +28 -79
  25. magic_pdf/dict2md/mkcontent.py +0 -438
  26. magic_pdf/layout/__init__.py +0 -0
  27. magic_pdf/layout/bbox_sort.py +0 -681
  28. magic_pdf/layout/layout_det_utils.py +0 -182
  29. magic_pdf/layout/layout_sort.py +0 -921
  30. magic_pdf/layout/layout_spiler_recog.py +0 -101
  31. magic_pdf/layout/mcol_sort.py +0 -336
  32. magic_pdf/libs/calc_span_stats.py +0 -239
  33. magic_pdf/libs/detect_language_from_model.py +0 -21
  34. magic_pdf/libs/nlp_utils.py +0 -203
  35. magic_pdf/libs/textbase.py +0 -33
  36. magic_pdf/libs/vis_utils.py +0 -308
  37. magic_pdf/para/block_continuation_processor.py +0 -562
  38. magic_pdf/para/block_termination_processor.py +0 -480
  39. magic_pdf/para/commons.py +0 -222
  40. magic_pdf/para/denoise.py +0 -246
  41. magic_pdf/para/draw.py +0 -121
  42. magic_pdf/para/exceptions.py +0 -198
  43. magic_pdf/para/layout_match_processor.py +0 -40
  44. magic_pdf/para/para_split.py +0 -807
  45. magic_pdf/para/para_split_v2.py +0 -959
  46. magic_pdf/para/raw_processor.py +0 -207
  47. magic_pdf/para/stats.py +0 -268
  48. magic_pdf/para/title_processor.py +0 -1014
  49. magic_pdf/pdf_parse_union_core.py +0 -345
  50. magic_pdf/post_proc/__init__.py +0 -0
  51. magic_pdf/post_proc/detect_para.py +0 -3472
  52. magic_pdf/post_proc/pdf_post_filter.py +0 -60
  53. magic_pdf/post_proc/remove_footnote.py +0 -153
  54. magic_pdf/pre_proc/citationmarker_remove.py +0 -161
  55. magic_pdf/pre_proc/detect_equation.py +0 -134
  56. magic_pdf/pre_proc/detect_footer_by_model.py +0 -64
  57. magic_pdf/pre_proc/detect_footer_header_by_statistics.py +0 -284
  58. magic_pdf/pre_proc/detect_footnote.py +0 -170
  59. magic_pdf/pre_proc/detect_header.py +0 -64
  60. magic_pdf/pre_proc/detect_images.py +0 -647
  61. magic_pdf/pre_proc/detect_page_number.py +0 -64
  62. magic_pdf/pre_proc/detect_tables.py +0 -62
  63. magic_pdf/pre_proc/equations_replace.py +0 -550
  64. magic_pdf/pre_proc/fix_image.py +0 -244
  65. magic_pdf/pre_proc/fix_table.py +0 -270
  66. magic_pdf/pre_proc/main_text_font.py +0 -23
  67. magic_pdf/pre_proc/ocr_detect_layout.py +0 -133
  68. magic_pdf/pre_proc/pdf_pre_filter.py +0 -78
  69. magic_pdf/pre_proc/post_layout_split.py +0 -0
  70. magic_pdf/pre_proc/remove_colored_strip_bbox.py +0 -101
  71. magic_pdf/pre_proc/remove_footer_header.py +0 -114
  72. magic_pdf/pre_proc/remove_rotate_bbox.py +0 -236
  73. magic_pdf/pre_proc/resolve_bbox_conflict.py +0 -184
  74. magic_pdf/pre_proc/solve_line_alien.py +0 -29
  75. magic_pdf/pre_proc/statistics.py +0 -12
  76. {magic_pdf-0.10.1.dist-info → magic_pdf-0.10.3.dist-info}/LICENSE.md +0 -0
  77. {magic_pdf-0.10.1.dist-info → magic_pdf-0.10.3.dist-info}/WHEEL +0 -0
  78. {magic_pdf-0.10.1.dist-info → magic_pdf-0.10.3.dist-info}/entry_points.txt +0 -0
  79. {magic_pdf-0.10.1.dist-info → magic_pdf-0.10.3.dist-info}/top_level.txt +0 -0
@@ -1,10 +1,7 @@
1
1
 
2
2
  from magic_pdf.config.drop_tag import DropTag
3
- from magic_pdf.config.ocr_content_type import BlockType, ContentType
4
- from magic_pdf.libs.boxbase import (__is_overlaps_y_exceeds_threshold,
5
- calculate_iou,
6
- calculate_overlap_area_in_bbox1_area_ratio,
7
- get_minbox_if_overlap_by_ratio)
3
+ from magic_pdf.config.ocr_content_type import BlockType
4
+ from magic_pdf.libs.boxbase import calculate_iou, get_minbox_if_overlap_by_ratio
8
5
 
9
6
 
10
7
  def remove_overlaps_low_confidence_spans(spans):
@@ -59,253 +56,6 @@ def remove_overlaps_min_spans(spans):
59
56
  return spans, dropped_spans
60
57
 
61
58
 
62
- def remove_spans_by_bboxes(spans, need_remove_spans_bboxes):
63
- # 遍历spans, 判断是否在removed_span_block_bboxes中
64
- # 如果是, 则删除该span 否则, 保留该span
65
- need_remove_spans = []
66
- for span in spans:
67
- for removed_bbox in need_remove_spans_bboxes:
68
- if (
69
- calculate_overlap_area_in_bbox1_area_ratio(span['bbox'], removed_bbox)
70
- > 0.5
71
- ):
72
- if span not in need_remove_spans:
73
- need_remove_spans.append(span)
74
- break
75
-
76
- if len(need_remove_spans) > 0:
77
- for span in need_remove_spans:
78
- spans.remove(span)
79
-
80
- return spans
81
-
82
-
83
- def remove_spans_by_bboxes_dict(spans, need_remove_spans_bboxes_dict):
84
- dropped_spans = []
85
- for drop_tag, removed_bboxes in need_remove_spans_bboxes_dict.items():
86
- # logger.info(f"remove spans by bbox dict, drop_tag: {drop_tag}, removed_bboxes: {removed_bboxes}")
87
- need_remove_spans = []
88
- for span in spans:
89
- # 通过判断span的bbox是否在removed_bboxes中, 判断是否需要删除该span
90
- for removed_bbox in removed_bboxes:
91
- if (
92
- calculate_overlap_area_in_bbox1_area_ratio(
93
- span['bbox'], removed_bbox
94
- )
95
- > 0.5
96
- ):
97
- need_remove_spans.append(span)
98
- break
99
- # 当drop_tag为DropTag.FOOTNOTE时, 判断span是否在removed_bboxes中任意一个的下方,如果是,则删除该span
100
- elif (
101
- drop_tag == DropTag.FOOTNOTE
102
- and (span['bbox'][1] + span['bbox'][3]) / 2 > removed_bbox[3]
103
- and removed_bbox[0]
104
- < (span['bbox'][0] + span['bbox'][2]) / 2
105
- < removed_bbox[2]
106
- ):
107
- need_remove_spans.append(span)
108
- break
109
-
110
- for span in need_remove_spans:
111
- spans.remove(span)
112
- span['tag'] = drop_tag
113
- dropped_spans.append(span)
114
-
115
- return spans, dropped_spans
116
-
117
-
118
- def adjust_bbox_for_standalone_block(spans):
119
- # 对tpye=["interline_equation", "image", "table"]进行额外处理,如果左边有字的话,将该span的bbox中y0调整至不高于文字的y0
120
- for sb_span in spans:
121
- if sb_span['type'] in [
122
- ContentType.InterlineEquation,
123
- ContentType.Image,
124
- ContentType.Table,
125
- ]:
126
- for text_span in spans:
127
- if text_span['type'] in [ContentType.Text, ContentType.InlineEquation]:
128
- # 判断span2的纵向高度是否被span所覆盖
129
- if (
130
- sb_span['bbox'][1] < text_span['bbox'][1]
131
- and sb_span['bbox'][3] > text_span['bbox'][3]
132
- ):
133
- # 判断span2是否在span左边
134
- if text_span['bbox'][0] < sb_span['bbox'][0]:
135
- # 调整span的y0和span2的y0一致
136
- sb_span['bbox'][1] = text_span['bbox'][1]
137
- return spans
138
-
139
-
140
- def modify_y_axis(spans: list, displayed_list: list, text_inline_lines: list):
141
- # displayed_list = []
142
- # 如果spans为空,则不处理
143
- if len(spans) == 0:
144
- pass
145
- else:
146
- spans.sort(key=lambda span: span['bbox'][1])
147
-
148
- lines = []
149
- current_line = [spans[0]]
150
- if spans[0]['type'] in [
151
- ContentType.InterlineEquation,
152
- ContentType.Image,
153
- ContentType.Table,
154
- ]:
155
- displayed_list.append(spans[0])
156
-
157
- line_first_y0 = spans[0]['bbox'][1]
158
- line_first_y = spans[0]['bbox'][3]
159
- # 用于给行间公式搜索
160
- # text_inline_lines = []
161
- for span in spans[1:]:
162
- # if span.get("content","") == "78.":
163
- # print("debug")
164
- # 如果当前的span类型为"interline_equation" 或者 当前行中已经有"interline_equation"
165
- # image和table类型,同上
166
- if span['type'] in [
167
- ContentType.InterlineEquation,
168
- ContentType.Image,
169
- ContentType.Table,
170
- ] or any(
171
- s['type']
172
- in [ContentType.InterlineEquation, ContentType.Image, ContentType.Table]
173
- for s in current_line
174
- ):
175
- # 传入
176
- if span['type'] in [
177
- ContentType.InterlineEquation,
178
- ContentType.Image,
179
- ContentType.Table,
180
- ]:
181
- displayed_list.append(span)
182
- # 则开始新行
183
- lines.append(current_line)
184
- if len(current_line) > 1 or current_line[0]['type'] in [
185
- ContentType.Text,
186
- ContentType.InlineEquation,
187
- ]:
188
- text_inline_lines.append(
189
- (current_line, (line_first_y0, line_first_y))
190
- )
191
- current_line = [span]
192
- line_first_y0 = span['bbox'][1]
193
- line_first_y = span['bbox'][3]
194
- continue
195
-
196
- # 如果当前的span与当前行的最后一个span在y轴上重叠,则添加到当前行
197
- if __is_overlaps_y_exceeds_threshold(
198
- span['bbox'], current_line[-1]['bbox']
199
- ):
200
- if span['type'] == 'text':
201
- line_first_y0 = span['bbox'][1]
202
- line_first_y = span['bbox'][3]
203
- current_line.append(span)
204
-
205
- else:
206
- # 否则,开始新行
207
- lines.append(current_line)
208
- text_inline_lines.append((current_line, (line_first_y0, line_first_y)))
209
- current_line = [span]
210
- line_first_y0 = span['bbox'][1]
211
- line_first_y = span['bbox'][3]
212
-
213
- # 添加最后一行
214
- if current_line:
215
- lines.append(current_line)
216
- if len(current_line) > 1 or current_line[0]['type'] in [
217
- ContentType.Text,
218
- ContentType.InlineEquation,
219
- ]:
220
- text_inline_lines.append((current_line, (line_first_y0, line_first_y)))
221
- for line in text_inline_lines:
222
- # 按照x0坐标排序
223
- current_line = line[0]
224
- current_line.sort(key=lambda span: span['bbox'][0])
225
-
226
- # 调整每一个文字行内bbox统一
227
- for line in text_inline_lines:
228
- current_line, (line_first_y0, line_first_y) = line
229
- for span in current_line:
230
- span['bbox'][1] = line_first_y0
231
- span['bbox'][3] = line_first_y
232
-
233
- # return spans, displayed_list, text_inline_lines
234
-
235
-
236
- def modify_inline_equation(spans: list, displayed_list: list, text_inline_lines: list):
237
- # 错误行间公式转行内公式
238
- j = 0
239
- for i in range(len(displayed_list)):
240
- # if i == 8:
241
- # print("debug")
242
- span = displayed_list[i]
243
- span_y0, span_y = span['bbox'][1], span['bbox'][3]
244
-
245
- while j < len(text_inline_lines):
246
- text_line = text_inline_lines[j]
247
- y0, y1 = text_line[1]
248
- if (
249
- span_y0 < y0 < span_y
250
- or span_y0 < y1 < span_y
251
- or span_y0 < y0
252
- and span_y > y1
253
- ) and __is_overlaps_y_exceeds_threshold(span['bbox'], (0, y0, 0, y1)):
254
- # 调整公式类型
255
- if span['type'] == ContentType.InterlineEquation:
256
- # 最后一行是行间公式
257
- if j + 1 >= len(text_inline_lines):
258
- span['type'] = ContentType.InlineEquation
259
- span['bbox'][1] = y0
260
- span['bbox'][3] = y1
261
- else:
262
- # 行间公式旁边有多行文字或者行间公式比文字高3倍则不转换
263
- y0_next, y1_next = text_inline_lines[j + 1][1]
264
- if (
265
- not __is_overlaps_y_exceeds_threshold(
266
- span['bbox'], (0, y0_next, 0, y1_next)
267
- )
268
- and 3 * (y1 - y0) > span_y - span_y0
269
- ):
270
- span['type'] = ContentType.InlineEquation
271
- span['bbox'][1] = y0
272
- span['bbox'][3] = y1
273
- break
274
- elif (
275
- span_y < y0
276
- or span_y0 < y0 < span_y
277
- and not __is_overlaps_y_exceeds_threshold(span['bbox'], (0, y0, 0, y1))
278
- ):
279
- break
280
- else:
281
- j += 1
282
-
283
- return spans
284
-
285
-
286
- def get_qa_need_list(blocks):
287
- # 创建 images, tables, interline_equations, inline_equations 的副本
288
- images = []
289
- tables = []
290
- interline_equations = []
291
- inline_equations = []
292
-
293
- for block in blocks:
294
- for line in block['lines']:
295
- for span in line['spans']:
296
- if span['type'] == ContentType.Image:
297
- images.append(span)
298
- elif span['type'] == ContentType.Table:
299
- tables.append(span)
300
- elif span['type'] == ContentType.InlineEquation:
301
- inline_equations.append(span)
302
- elif span['type'] == ContentType.InterlineEquation:
303
- interline_equations.append(span)
304
- else:
305
- continue
306
- return images, tables, interline_equations, inline_equations
307
-
308
-
309
59
  def get_qa_need_list_v2(blocks):
310
60
  # 创建 images, tables, interline_equations, inline_equations 的副本
311
61
  images = []
@@ -1,5 +1,5 @@
1
1
  from magic_pdf.rw.AbsReaderWriter import AbsReaderWriter
2
- from magic_pdf.libs.commons import parse_aws_param, parse_bucket_key, join_path
2
+ from magic_pdf.libs.commons import parse_bucket_key, join_path
3
3
  import boto3
4
4
  from loguru import logger
5
5
  from botocore.config import Config
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: magic-pdf
3
- Version: 0.10.1
3
+ Version: 0.10.3
4
4
  Summary: A practical tool for converting PDF to Markdown
5
5
  Home-page: https://github.com/opendatalab/MinerU
6
6
  Requires-Python: >=3.9
@@ -12,7 +12,6 @@ Requires-Dist: click>=8.1.7
12
12
  Requires-Dist: fast-langdetect==0.2.0
13
13
  Requires-Dist: loguru>=0.6.0
14
14
  Requires-Dist: numpy<2.0.0,>=1.21.6
15
- Requires-Dist: pdfminer.six==20231228
16
15
  Requires-Dist: pydantic<2.8.0,>=2.7.2
17
16
  Requires-Dist: PyMuPDF>=1.24.9
18
17
  Requires-Dist: scikit-learn>=1.0.2
@@ -320,88 +319,14 @@ If your device supports CUDA and meets the GPU requirements of the mainline envi
320
319
 
321
320
  ### Command Line
322
321
 
323
- ```bash
324
- magic-pdf --help
325
- Usage: magic-pdf [OPTIONS]
326
-
327
- Options:
328
- -v, --version display the version and exit
329
- -p, --path PATH local pdf filepath or directory [required]
330
- -o, --output-dir PATH output local directory [required]
331
- -m, --method [ocr|txt|auto] the method for parsing pdf. ocr: using ocr
332
- technique to extract information from pdf. txt:
333
- suitable for the text-based pdf only and
334
- outperform ocr. auto: automatically choose the
335
- best method for parsing pdf from ocr and txt.
336
- without method specified, auto will be used by
337
- default.
338
- -l, --lang TEXT Input the languages in the pdf (if known) to
339
- improve OCR accuracy. Optional. You should
340
- input "Abbreviation" with language form url: ht
341
- tps://paddlepaddle.github.io/PaddleOCR/latest/en
342
- /ppocr/blog/multi_languages.html#5-support-languages-
343
- and-abbreviations
344
- -d, --debug BOOLEAN Enables detailed debugging information during
345
- the execution of the CLI commands.
346
- -s, --start INTEGER The starting page for PDF parsing, beginning
347
- from 0.
348
- -e, --end INTEGER The ending page for PDF parsing, beginning from
349
- 0.
350
- --help Show this message and exit.
351
-
352
-
353
- ## show version
354
- magic-pdf -v
355
-
356
- ## command line example
357
- magic-pdf -p {some_pdf} -o {some_output_dir} -m auto
358
- ```
322
+ [Using MinerU via Command Line](https://mineru.readthedocs.io/en/latest/user_guide/quick_start/command_line.html)
359
323
 
360
- `{some_pdf}` can be a single PDF file or a directory containing multiple PDFs.
361
- The results will be saved in the `{some_output_dir}` directory. The output file list is as follows:
362
-
363
- ```text
364
- ├── some_pdf.md # markdown file
365
- ├── images # directory for storing images
366
- ├── some_pdf_layout.pdf # layout diagram (Include layout reading order)
367
- ├── some_pdf_middle.json # MinerU intermediate processing result
368
- ├── some_pdf_model.json # model inference result
369
- ├── some_pdf_origin.pdf # original PDF file
370
- ├── some_pdf_spans.pdf # smallest granularity bbox position information diagram
371
- └── some_pdf_content_list.json # Rich text JSON arranged in reading order
372
- ```
373
324
  > [!TIP]
374
325
  > For more information about the output files, please refer to the [Output File Description](docs/output_file_en_us.md).
375
326
 
376
327
  ### API
377
328
 
378
- Processing files from local disk
379
-
380
- ```python
381
- image_writer = DiskReaderWriter(local_image_dir)
382
- image_dir = str(os.path.basename(local_image_dir))
383
- jso_useful_key = {"_pdf_type": "", "model_list": []}
384
- pipe = UNIPipe(pdf_bytes, jso_useful_key, image_writer)
385
- pipe.pipe_classify()
386
- pipe.pipe_analyze()
387
- pipe.pipe_parse()
388
- md_content = pipe.pipe_mk_markdown(image_dir, drop_mode="none")
389
- ```
390
-
391
- Processing files from object storage
392
-
393
- ```python
394
- s3pdf_cli = S3ReaderWriter(pdf_ak, pdf_sk, pdf_endpoint)
395
- image_dir = "s3://img_bucket/"
396
- s3image_cli = S3ReaderWriter(img_ak, img_sk, img_endpoint, parent_path=image_dir)
397
- pdf_bytes = s3pdf_cli.read(s3_pdf_path, mode=s3pdf_cli.MODE_BIN)
398
- jso_useful_key = {"_pdf_type": "", "model_list": []}
399
- pipe = UNIPipe(pdf_bytes, jso_useful_key, s3image_cli)
400
- pipe.pipe_classify()
401
- pipe.pipe_analyze()
402
- pipe.pipe_parse()
403
- md_content = pipe.pipe_mk_markdown(image_dir, drop_mode="none")
404
- ```
329
+ [Using MinerU via Python API](https://mineru.readthedocs.io/en/latest/user_guide/quick_start/to_markdown.html)
405
330
 
406
331
  For detailed implementation, refer to:
407
332
 
@@ -1,8 +1,7 @@
1
1
  magic_pdf/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
2
2
  magic_pdf/pdf_parse_by_ocr.py,sha256=WTaLVSU2wRpgtldasnqbrw1B0OvVi8VvcB_t-dAIfmw,880
3
3
  magic_pdf/pdf_parse_by_txt.py,sha256=dh3ZM6BVrFzwbH4137BPUdKhgacGlpS2N4mn74_-UaA,762
4
- magic_pdf/pdf_parse_union_core.py,sha256=w90lFIMOYUMAq4iv8bpsbBtLXFphPV4HyYeqbOTYQUI,12420
5
- magic_pdf/pdf_parse_union_core_v2.py,sha256=EqEi9AahBBh2JbXoY8uOCmClvi9W_H_26U4jK8RwPwU,31308
4
+ magic_pdf/pdf_parse_union_core_v2.py,sha256=6Apku7-pW450HbHNTtbVLDyroRSKlQ57w9f0ScOaZv4,30879
6
5
  magic_pdf/user_api.py,sha256=Sh6U7iD5VsH7Qkav_0o5GTx-Rlj7vhmhHQHZSBKR5T8,4006
7
6
  magic_pdf/config/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
8
7
  magic_pdf/config/constants.py,sha256=gqhUEtso7rCop-k-VvEPAMW_6pA6Tv2Y9smrr_0Iajo,1173
@@ -28,51 +27,39 @@ magic_pdf/data/io/base.py,sha256=SqNQqe30ZvoVvg7GVv-hLMCjN6yBgDyQQWeLgGsTfhQ,111
28
27
  magic_pdf/data/io/http.py,sha256=XlKB0DNf4a_uUnfgcclvaaOtmE7lmddx0DnK8A-emAM,958
29
28
  magic_pdf/data/io/s3.py,sha256=hyA7sbNriQy64xd_uyJ7acN_oneQ1Pdmoc7_xcvkue8,3606
30
29
  magic_pdf/dict2md/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
31
- magic_pdf/dict2md/mkcontent.py,sha256=bMQK7uiay76YaWA92VIK57YajINV20SnOs65wOEXyKE,18667
32
- magic_pdf/dict2md/ocr_mkcontent.py,sha256=ohjhEFS9YFrzTCC9c9yrvi4QuZe9iZm1qlkQWB6xxIw,13038
30
+ magic_pdf/dict2md/ocr_mkcontent.py,sha256=hwcHTEx1tbIlM9ukmPBOAyH0G6rmbOTu87nVtZ1gE6k,12354
33
31
  magic_pdf/filter/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
34
32
  magic_pdf/filter/pdf_classify_by_type.py,sha256=spmDO-f2ihAl1d6-EP-j271Yi50oyu6mw4X2kRd_m0s,42320
35
- magic_pdf/filter/pdf_meta_scan.py,sha256=h4D4O0OeAlEy2A8mJ6E0aQ8wIizIfsIxEagbjaomnAo,17823
33
+ magic_pdf/filter/pdf_meta_scan.py,sha256=3ba7SxXu1z2r5N97Dxmp_L10Lo7llsrBlvtEAJeIJBQ,17403
36
34
  magic_pdf/integrations/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
37
35
  magic_pdf/integrations/rag/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
38
36
  magic_pdf/integrations/rag/api.py,sha256=t38wvIBzLje4_JzTP3dewMLqV-tQJ-A3B92Sj2oyrfs,2507
39
37
  magic_pdf/integrations/rag/type.py,sha256=Z_1g_ZIOCsb7-FmZBudReIXj8nzGrgj_BygCalhJdmk,3193
40
38
  magic_pdf/integrations/rag/utils.py,sha256=DCb-UhC8TElb6Eq7_6NmmETreKEk5DVE18hNL8sTEBk,11762
41
- magic_pdf/layout/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
42
- magic_pdf/layout/bbox_sort.py,sha256=PzzaBf6MC_AZ-ZWGU0Kg-KIsw874l_gML73mM3hE4Ps,30807
43
- magic_pdf/layout/layout_det_utils.py,sha256=NCYBTvsrULE3Cue53aMD1MfXTmOL9Xy0nivl6ku2cls,9137
44
- magic_pdf/layout/layout_sort.py,sha256=jtacQVcxnuYAksvEqtS0DH-v6U8qyjX-jmyZgDJ-egA,37005
45
- magic_pdf/layout/layout_spiler_recog.py,sha256=QjBSgB-a7J2yjUR1eaCs9ZD7URtiRnV6W934hpAeuC4,3067
46
- magic_pdf/layout/mcol_sort.py,sha256=ADnLisBJBHXDKYChcf2lzTb_TC_vZ4q89_CSN8mwEJc,11331
47
39
  magic_pdf/libs/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
48
40
  magic_pdf/libs/boxbase.py,sha256=ELMHWolgWROxOAQDgwmL7VS5kveZp4ifvEzRmPul2Ws,16925
49
- magic_pdf/libs/calc_span_stats.py,sha256=5vnU27DcbkFDRSAoLqAmX0KQ3I9ehWkEgh_t9hxg_zI,10147
50
41
  magic_pdf/libs/clean_memory.py,sha256=BIOmEWuwR7c_p4OwTSW2muE3PRaGhmOplS-wTXt_EXk,211
51
- magic_pdf/libs/commons.py,sha256=6Zu9-OyamyCNDY7qj0SxR-rux-ggj9im3CVPtC4ubB8,7108
42
+ magic_pdf/libs/commons.py,sha256=xD0fGA16KNB5rhbl4zRrOqdrNHYwaRablT_s9W2ZTbw,1174
52
43
  magic_pdf/libs/config_reader.py,sha256=vDsxw2xbW7Gb1mKqERTSlttbXFNtVU0BDdae2dG7wEI,4068
53
44
  magic_pdf/libs/convert_utils.py,sha256=Ov-lsfCLBPz_15iSJXIslBNmrSf_E_1g_XDWJy8NgO8,143
54
45
  magic_pdf/libs/coordinate_transform.py,sha256=Bbop2cP2uz2ZG0U0gwd7J6EKkgABq5Rv03qf2LMPw80,429
55
- magic_pdf/libs/detect_language_from_model.py,sha256=Uln8F9qs8EJOw4EgI7KRlaU3lD_mK8KMTlADLFtz8fk,816
56
- magic_pdf/libs/draw_bbox.py,sha256=NhAfqib5HYuGjjrAG_SvJR-yOHZTy6tzDxLXdxKlULQ,17676
46
+ magic_pdf/libs/draw_bbox.py,sha256=2IXr4TUxm0-pXYIPkNaELWo9pOysZC6etpqzTE5eg-w,17588
57
47
  magic_pdf/libs/hash_utils.py,sha256=VEKK9WfFoZgrPfi8kfITjLpr8Ahufs8tXh9R1Y5lAL8,404
58
48
  magic_pdf/libs/json_compressor.py,sha256=6-KCu0lb5ksmyqWtQGb4QqmP-FjRb5dP7P-Hevcn68g,875
59
49
  magic_pdf/libs/language.py,sha256=Hj5-lrGoNExxdHLbkcNG-c27U4AjJ9AZPdZblaNSehU,1099
60
50
  magic_pdf/libs/local_math.py,sha256=tqljQOgqh3fZc146HYhO88JXJaiXMVwArBkk_CSGICc,177
61
- magic_pdf/libs/markdown_utils.py,sha256=cLxLXjRhrNp_wCHvtglrGA_FVdrvfd1KULeTtj1p18w,944
62
- magic_pdf/libs/nlp_utils.py,sha256=-X9W3-Ns5ZdDYFvyyEq6i6P2b5hCATaFEZeOjwNOH9M,6901
51
+ magic_pdf/libs/markdown_utils.py,sha256=86v2BmsSV4NkoRZrH4uQD1youJhYFF3vIKr_vDeg3z0,270
63
52
  magic_pdf/libs/path_utils.py,sha256=Hykw_l5CU736b2egHV9P7B-qh3QNKO4nZSGCbsi0Z8E,1043
64
- magic_pdf/libs/pdf_check.py,sha256=MAe8wzwT0qvPf_I72wEZG7k1g4haNHS7oUtLqkB5rlE,2145
65
- magic_pdf/libs/pdf_image_tools.py,sha256=sh8hgBQu_83R71qBLodOFdByBUuQujsOMfgpSD9mrhE,1981
53
+ magic_pdf/libs/pdf_check.py,sha256=wCVOcwEPeMRcHW5OGN-GSQnPT5qNXUYHWWowoUknxF4,3178
54
+ magic_pdf/libs/pdf_image_tools.py,sha256=kjzSEbm7K0yiHv8kJ4VbZ9HHktM8qvAv3LhxRyDZEQk,1987
66
55
  magic_pdf/libs/safe_filename.py,sha256=ckwcM_eqoysTb5id8czp-tXq2G9da0-l3pshZDCHQtE,236
67
- magic_pdf/libs/textbase.py,sha256=SC1Frhz3Fb7V7n2SFRBsl7Bmg0JZdlvZskq0lfW1vIk,732
68
- magic_pdf/libs/version.py,sha256=v7Gyp89umFzDtY45tTjCdXqZnQ2RN01AibdYNxEvxYo,23
69
- magic_pdf/libs/vis_utils.py,sha256=hTOTEakKV0pGMbk0tbRkVI_tku7A3dGc96ynObZ4kwI,10207
56
+ magic_pdf/libs/version.py,sha256=0C8KcY1dzs3hdkAre06v0NCQ0Uxcqv6g9a93bRcVLW0,23
70
57
  magic_pdf/model/__init__.py,sha256=1QcfMKET0xQhSaZMjNQHi_TjzSSDR6PI5mjkmaXHPe8,52
71
- magic_pdf/model/doc_analyze_by_custom_model.py,sha256=HOT6chGx2VPyH6O9WB0c6xGPeDs9m_6oZn3iOa745yw,7125
72
- magic_pdf/model/magic_model.py,sha256=8nJLzNCa0Ag4JhMAQbjj5qrkj617qKPCXVJAiT9DnaA,43472
58
+ magic_pdf/model/doc_analyze_by_custom_model.py,sha256=YZwlhIgidy1_MUyTM_MRSLfKR_rpi508Bra6Vpj8PJ4,7125
59
+ magic_pdf/model/magic_model.py,sha256=ppMkMqtP7sKncHTZ2SbXuPOoR988iRPexBEMA6QeiIc,42208
73
60
  magic_pdf/model/model_list.py,sha256=tJ9jtMB93HGx8Rmt8wmQSDFXZBUIPQrwaaYsep4luTM,183
74
61
  magic_pdf/model/pdf_extract_kit.py,sha256=ceYWlSU1BhakfsHPVM9SrUx35EvCBa20uJmgDO5PAtE,10933
75
- magic_pdf/model/pp_structure_v2.py,sha256=BKPN7W4BjG0eWPAPjPEac1RMnb5eIzmAz4E4Rq-9b1U,3019
62
+ magic_pdf/model/pp_structure_v2.py,sha256=NcqFWL4nUtjl82MFak8HX_8V3i4Aw_fK4dATrIp5uGs,3840
76
63
  magic_pdf/model/sub_modules/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
77
64
  magic_pdf/model/sub_modules/model_init.py,sha256=CnlZLsiSOmGJXQRASH-hMmuPiF6hYKCNfmzDTjQqy5g,5073
78
65
  magic_pdf/model/sub_modules/model_utils.py,sha256=ToiuwXbrvH_CPIwW2AXzz9miadUN5FA7lthwBljtIco,2118
@@ -107,8 +94,8 @@ magic_pdf/model/sub_modules/mfr/unimernet/Unimernet.py,sha256=jeJkqID6L1ZivPMdK1
107
94
  magic_pdf/model/sub_modules/mfr/unimernet/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
108
95
  magic_pdf/model/sub_modules/ocr/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
109
96
  magic_pdf/model/sub_modules/ocr/paddleocr/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
110
- magic_pdf/model/sub_modules/ocr/paddleocr/ocr_utils.py,sha256=UP7fADPGoxAMj2SUKmeW-fe_AcAQxlT9Mfy4WF6vHmU,9796
111
- magic_pdf/model/sub_modules/ocr/paddleocr/ppocr_273_mod.py,sha256=a6xkQHqLMUL4NCaORp8oo4Tfa8GB8PN9MVvG7Pj6jIE,7316
97
+ magic_pdf/model/sub_modules/ocr/paddleocr/ocr_utils.py,sha256=rwKphio9SZgiNgqASWOBWZIf6PPi3kvgQO_qJLc_diE,10726
98
+ magic_pdf/model/sub_modules/ocr/paddleocr/ppocr_273_mod.py,sha256=d__xICejA_Q-Cz4cfajwroDjfA0dT4TL18XAFYYc4OQ,7265
112
99
  magic_pdf/model/sub_modules/ocr/paddleocr/ppocr_291_mod.py,sha256=VouMTvi6M5TV6pQdlpusgfyZapxiZ_Wi7Ff53eMC3rE,8996
113
100
  magic_pdf/model/sub_modules/reading_oreder/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
114
101
  magic_pdf/model/sub_modules/reading_oreder/layoutreader/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
@@ -121,66 +108,28 @@ magic_pdf/model/sub_modules/table/rapidtable/rapid_table.py,sha256=_FKKOSKeceusx
121
108
  magic_pdf/model/sub_modules/table/structeqtable/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
122
109
  magic_pdf/model/sub_modules/table/structeqtable/struct_eqtable.py,sha256=SrNPm-uOFEvN5muFGbXTAuwzXm-rCiaihVdqbydIBIA,1131
123
110
  magic_pdf/model/sub_modules/table/tablemaster/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
124
- magic_pdf/model/sub_modules/table/tablemaster/tablemaster_paddle.py,sha256=AdH3UGu4BEoII0uFjPKUf61W7HmG4fDlWgR1xxMeFlE,2775
111
+ magic_pdf/model/sub_modules/table/tablemaster/tablemaster_paddle.py,sha256=QEQ-56AzoIAU7UWsEidWW_KDOY5r16qm2kSpox8cxq4,2755
125
112
  magic_pdf/para/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
126
- magic_pdf/para/block_continuation_processor.py,sha256=IkReB5hirjm0OAirNzQQpnWe2f2bdP3Hug3Ef8qTRDA,22749
127
- magic_pdf/para/block_termination_processor.py,sha256=YU3ZYqJy9e3OQmOuQYZrR6AUpmAlQ0mhj0PgZZPZ_fM,17957
128
- magic_pdf/para/commons.py,sha256=VdJ8SY9qJTtcRyx8HH-PFeZSJwL4Tsf50197RD_-dwc,5414
129
- magic_pdf/para/denoise.py,sha256=J7dM2KNnbdzAd2A3agB04U6L1GL9RrhAs-MLrq-_Ftg,10443
130
- magic_pdf/para/draw.py,sha256=KyWc03do_WuBKQ028HYzepYwbIkel9ID0uqRhuPVOHc,5643
131
- magic_pdf/para/exceptions.py,sha256=kpjGxrSZ-drNmoKlmuQ0asTjI8cKKKWsdDDBoDHQP9M,4978
132
- magic_pdf/para/layout_match_processor.py,sha256=yr4FEO7GJ502udShqGRqIJQ_FQxoa0aG_mhmWd8nLwI,1554
133
- magic_pdf/para/para_split.py,sha256=z7nYeg86BjZOAdJNMwYKSu51W9evurtl3cy1ZUcQLlw,33222
134
- magic_pdf/para/para_split_v2.py,sha256=vJJqqMMKbv8D702nODThL-5hjkgZ7Vl2BTmEIdwmmDw,39051
135
- magic_pdf/para/para_split_v3.py,sha256=atfELVRx-90paAS3nZptgP0qG8UpTTaj3LG_2x3NAlQ,15977
136
- magic_pdf/para/raw_processor.py,sha256=mHxD9FrdOSXH7NqM41s55URyCyuyACvm9kKtowkIb3k,6317
137
- magic_pdf/para/stats.py,sha256=-6Pf9Y8jkP1uJOYWiHUjw9Lb-Fb9GY7MHr_ok7x2GX0,9731
138
- magic_pdf/para/title_processor.py,sha256=pYZv9vEkIjAtCz8jIUtl9AVUy_ib5SdAZmMVoZtsMRI,38593
113
+ magic_pdf/para/para_split_v3.py,sha256=UOQe0HUVX7FAlMbJp1OkGfdM7JECWeqscv3s8Hge7ps,16922
139
114
  magic_pdf/pipe/AbsPipe.py,sha256=jPtAa0pz_vPddya3ZpUk6UrGqp8PcBdLONO1spzavQo,4371
140
115
  magic_pdf/pipe/OCRPipe.py,sha256=nuN-zpUzu--gyrC0_vsvvilAyK7Mp3Tom_UOnsur1ps,2158
141
116
  magic_pdf/pipe/TXTPipe.py,sha256=5OFo2e8U5Y24wJrFDEJghBDpklnKFEnzKTYVnnhQssE,2159
142
117
  magic_pdf/pipe/UNIPipe.py,sha256=ik0xXPdsHo7Un0gFpLC5ul04BP3Omd2mp5gqem40deE,4807
143
118
  magic_pdf/pipe/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
144
- magic_pdf/post_proc/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
145
- magic_pdf/post_proc/detect_para.py,sha256=5LX86ueHQGOV9CNimAxqZH4R3KTi78leum1de_Na0pw,126181
146
- magic_pdf/post_proc/pdf_post_filter.py,sha256=3EJDovQPckPKJaBY1wvAty-LGKyRG63WICY_bA_Kfbs,2501
147
- magic_pdf/post_proc/remove_footnote.py,sha256=701P7xRu6gzLaEHfb2xkYpLZI4CwK2FAo7Ggho4bOTI,7596
148
119
  magic_pdf/pre_proc/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
149
- magic_pdf/pre_proc/citationmarker_remove.py,sha256=IitOERaK9fGaktsYMyiaaL_71uMIrlG5ZdmpZaR6dsA,6640
150
- magic_pdf/pre_proc/construct_page_dict.py,sha256=lp3zBmInlWYYIcGC1-NSqT9s44AjDvlnWxDPeZoBVSY,3043
151
- magic_pdf/pre_proc/cut_image.py,sha256=TghshkDTgdUbyLSbKZoFI9-n-xaFub02IYPyu0IAnRY,2761
152
- magic_pdf/pre_proc/detect_equation.py,sha256=9omDHKTI8QO9Qd46eVFHWhZeMmTNx7XDuWRgjXI-KFA,6627
153
- magic_pdf/pre_proc/detect_footer_by_model.py,sha256=_EghAM_zWBcqVY8XBkbSoprKqKUa0mlN1U8YNWxNNLI,2848
154
- magic_pdf/pre_proc/detect_footer_header_by_statistics.py,sha256=924soXZ51QVpitPgVgnwbC7BqOZI30j5hGW5zP86y-w,11250
155
- magic_pdf/pre_proc/detect_footnote.py,sha256=UxFuTCRwXdAv3wKCgRQJJVt12hM9O9oPTwzPAChQXoM,8309
156
- magic_pdf/pre_proc/detect_header.py,sha256=KOmRehgKMuMqNa_2weXkdNSiRVWMFgLMQE4e1itbY7g,2848
157
- magic_pdf/pre_proc/detect_images.py,sha256=8DwGGTb5IjxqADZDTc_ngwJrTYXxK2qpRqI2FBoPr00,30432
158
- magic_pdf/pre_proc/detect_page_number.py,sha256=qvYrBbCtBbREvw-MySL_p7byCRvcm1fkLJ5ZB4TP8OM,2848
159
- magic_pdf/pre_proc/detect_tables.py,sha256=srJzgLVeVuOsqnESqfdJfVukTF84K8qmI5mgFX_BZGs,2800
160
- magic_pdf/pre_proc/equations_replace.py,sha256=7mexRPwD9C_UJ-SbvO_-XnpcnN7YtGUUznmPjHbjhnw,20526
161
- magic_pdf/pre_proc/fix_image.py,sha256=5MOfkXc8abfIp49g-68vll40wwTUZ5tcQ2gtsJuFmvs,11486
162
- magic_pdf/pre_proc/fix_table.py,sha256=20sqJe27fAXcL7_C0qQ9mpsggmH37WuX-wPYWyRgACA,13227
163
- magic_pdf/pre_proc/main_text_font.py,sha256=1gkjvPuBdKC4oVFkLvnRm2zghsLtVlfAEMKXouyVonM,1048
164
- magic_pdf/pre_proc/ocr_detect_all_bboxes.py,sha256=DMc2H2xGqVePBReZu5AQbPdvDw3sxOssmujCLlNW3Vs,14143
165
- magic_pdf/pre_proc/ocr_detect_layout.py,sha256=DW0_HXzmcbW22cXKIYFsyZNFh8mEjSHXIFVjXndJsvQ,5878
166
- magic_pdf/pre_proc/ocr_dict_merge.py,sha256=Au8y1NBhbWpq_VuPLg3b9dAMUhyPS71xtTghtd21K5M,14273
167
- magic_pdf/pre_proc/ocr_span_list_modify.py,sha256=9DxEyy1pH87g4T_JEgI3cTVCL2TVrEBl38wsmqhQM4k,12758
168
- magic_pdf/pre_proc/pdf_pre_filter.py,sha256=qvNlNyj4Mc3qa73mgfkp0PMR-ucABbx3mMcyVipaEpQ,2776
169
- magic_pdf/pre_proc/post_layout_split.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
120
+ magic_pdf/pre_proc/construct_page_dict.py,sha256=OFmq5XRKi6fYIo-lmGlL-NB16Sf0egzsfEx-fT2uYrc,660
121
+ magic_pdf/pre_proc/cut_image.py,sha256=U-ttnl3lAhhmgtkR1GGyPAVm0i0-6VscXf3E2EDy3lE,1187
122
+ magic_pdf/pre_proc/ocr_detect_all_bboxes.py,sha256=xQ1L6pwQjN4xBSKEXslheip1aMFaiB0grqlX3BF-kh0,9282
123
+ magic_pdf/pre_proc/ocr_dict_merge.py,sha256=Ycgz2whzotL7kwl0-mHNV48QOQ2j4tRXqLSQrJRojYg,4847
124
+ magic_pdf/pre_proc/ocr_span_list_modify.py,sha256=a5OmIwtkXkz6fTQg6p8R-f1nA_w0rgMwKFQjfs_HwrE,2864
170
125
  magic_pdf/pre_proc/remove_bbox_overlap.py,sha256=8eXNdsz9s06LX0kS0AxYSkaY1tWQQMkJfVtVSdjTQNE,3090
171
- magic_pdf/pre_proc/remove_colored_strip_bbox.py,sha256=WVKhgeWifRdO-u2ETYffkcMOFVYIbiaZu5pMr1RpEdA,4090
172
- magic_pdf/pre_proc/remove_footer_header.py,sha256=Igdr4jH7BUGuTcapWPiKEGKxhWH12c3VVmX5xwUVn7w,5680
173
- magic_pdf/pre_proc/remove_rotate_bbox.py,sha256=di7geS7AFhSaAvkWZHT6J3dlXEq8uu9Z4oBYtolQjl0,8803
174
- magic_pdf/pre_proc/resolve_bbox_conflict.py,sha256=ABl0vo8kkcCPSTI8dpXQTOH1b9R-lbzsJDDFONU6ELk,7313
175
- magic_pdf/pre_proc/solve_line_alien.py,sha256=aNoQptPcC38Sm1I2ABhgw8jeH_5kjsRHx3VYlFFtm1g,853
176
- magic_pdf/pre_proc/statistics.py,sha256=_9jGlXq0iXd03UMxB92ZqCiu7cjNkG5vHvFlTF_9ytA,220
177
126
  magic_pdf/resources/fasttext-langdetect/lid.176.ftz,sha256=jzRyz-hzintgmejpmcPL-uDc0VaWqsfXc4qAOdtgPoM,938013
178
127
  magic_pdf/resources/model_config/model_configs.yaml,sha256=S2BnVQxPd0xsZswn9WqJKTfnqd7ayY5lRwDVifTEAfw,290
179
128
  magic_pdf/resources/model_config/UniMERNet/demo.yaml,sha256=Jdaim2D2lAYrV9rhc1X5Sy2_IacGOrfysJhxEUgSElo,827
180
129
  magic_pdf/resources/model_config/layoutlmv3/layoutlmv3_base_inference.yaml,sha256=9aNAEYgpHTAWpcUrDvuPG2y4V-Qw8QdcJefi96y8yDU,6109
181
130
  magic_pdf/rw/AbsReaderWriter.py,sha256=2H5SDJfAAOX9kPfel06a8VRCHxD1Y8aPbWEkQDdn9JM,452
182
131
  magic_pdf/rw/DiskReaderWriter.py,sha256=7ZAekH8V6xlBo_1WeSZ6sNwAj2WGPtjNl50zq1CoMDY,2614
183
- magic_pdf/rw/S3ReaderWriter.py,sha256=_DmL45Ubio-_VsKD84KrqOQ-VNDUTzcXSrXfNMb5vww,5310
132
+ magic_pdf/rw/S3ReaderWriter.py,sha256=LmbtA-pZlC745nnSUs67C1iqSrBDS7IzE6QC8YMB644,5293
184
133
  magic_pdf/rw/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
185
134
  magic_pdf/spark/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
186
135
  magic_pdf/spark/spark_api.py,sha256=BYO6zlRW0cEnIUB3ZzNQTu_LsPHEVitqiUN7gy3x_wo,1124
@@ -190,9 +139,9 @@ magic_pdf/tools/cli_dev.py,sha256=3RbubfTIagWoFYdu8wSDanr-BJDjFGeDet55jTy7He0,39
190
139
  magic_pdf/tools/common.py,sha256=ILTv8YjnK-XTVV5nzak3Sm-EJJXjG1hJJghlYKgYVBQ,6809
191
140
  magic_pdf/utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
192
141
  magic_pdf/utils/annotations.py,sha256=82ou3uELNbQWa9hOFFkVt0gsIskAKf5msCv5J2IJ5V0,211
193
- magic_pdf-0.10.1.dist-info/LICENSE.md,sha256=jVa0BUaKrRH4erV2P5AeJ24I2WRv9chIGxditreJ6e0,34524
194
- magic_pdf-0.10.1.dist-info/METADATA,sha256=QdRsUeX9lmB2tTEFLT92qEWnPcgxIu7L0GeqTOHBGms,40300
195
- magic_pdf-0.10.1.dist-info/WHEEL,sha256=tZoeGjtWxWRfdplE7E3d45VPlLNQnvbKiYnx7gwAy8A,92
196
- magic_pdf-0.10.1.dist-info/entry_points.txt,sha256=wXwYke3j8fqDQTocUspL-CqDUEv3Tfcwp09fM8dZAhA,98
197
- magic_pdf-0.10.1.dist-info/top_level.txt,sha256=J9I0AzmHWGkp9c6DL8Oe4mEx3yYphLzkRn4H25Lg1rE,10
198
- magic_pdf-0.10.1.dist-info/RECORD,,
142
+ magic_pdf-0.10.3.dist-info/LICENSE.md,sha256=jVa0BUaKrRH4erV2P5AeJ24I2WRv9chIGxditreJ6e0,34524
143
+ magic_pdf-0.10.3.dist-info/METADATA,sha256=R86XDaSfj1tcu3etkvhQfg3FSoARv8mKW2KpwjsdqWs,36992
144
+ magic_pdf-0.10.3.dist-info/WHEEL,sha256=tZoeGjtWxWRfdplE7E3d45VPlLNQnvbKiYnx7gwAy8A,92
145
+ magic_pdf-0.10.3.dist-info/entry_points.txt,sha256=wXwYke3j8fqDQTocUspL-CqDUEv3Tfcwp09fM8dZAhA,98
146
+ magic_pdf-0.10.3.dist-info/top_level.txt,sha256=J9I0AzmHWGkp9c6DL8Oe4mEx3yYphLzkRn4H25Lg1rE,10
147
+ magic_pdf-0.10.3.dist-info/RECORD,,