magic-pdf 0.7.1__py3-none-any.whl → 0.8.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- magic_pdf/dict2md/ocr_mkcontent.py +130 -76
- magic_pdf/integrations/__init__.py +0 -0
- magic_pdf/integrations/rag/__init__.py +0 -0
- magic_pdf/integrations/rag/api.py +82 -0
- magic_pdf/integrations/rag/type.py +82 -0
- magic_pdf/integrations/rag/utils.py +285 -0
- magic_pdf/layout/layout_sort.py +472 -283
- magic_pdf/libs/boxbase.py +188 -149
- magic_pdf/libs/draw_bbox.py +113 -87
- magic_pdf/libs/ocr_content_type.py +21 -18
- magic_pdf/libs/version.py +1 -1
- magic_pdf/model/doc_analyze_by_custom_model.py +14 -2
- magic_pdf/model/magic_model.py +283 -166
- magic_pdf/model/model_list.py +8 -0
- magic_pdf/model/pdf_extract_kit.py +105 -15
- magic_pdf/model/pek_sub_modules/self_modify.py +84 -0
- magic_pdf/para/para_split_v2.py +26 -27
- magic_pdf/pdf_parse_union_core.py +34 -6
- magic_pdf/pipe/AbsPipe.py +4 -1
- magic_pdf/pipe/OCRPipe.py +7 -4
- magic_pdf/pipe/TXTPipe.py +7 -4
- magic_pdf/pipe/UNIPipe.py +11 -6
- magic_pdf/pre_proc/ocr_detect_all_bboxes.py +12 -3
- magic_pdf/pre_proc/ocr_dict_merge.py +60 -59
- magic_pdf/tools/cli.py +56 -29
- magic_pdf/tools/cli_dev.py +61 -64
- magic_pdf/tools/common.py +57 -37
- magic_pdf/user_api.py +17 -9
- {magic_pdf-0.7.1.dist-info → magic_pdf-0.8.1.dist-info}/METADATA +72 -27
- {magic_pdf-0.7.1.dist-info → magic_pdf-0.8.1.dist-info}/RECORD +34 -29
- {magic_pdf-0.7.1.dist-info → magic_pdf-0.8.1.dist-info}/LICENSE.md +0 -0
- {magic_pdf-0.7.1.dist-info → magic_pdf-0.8.1.dist-info}/WHEEL +0 -0
- {magic_pdf-0.7.1.dist-info → magic_pdf-0.8.1.dist-info}/entry_points.txt +0 -0
- {magic_pdf-0.7.1.dist-info → magic_pdf-0.8.1.dist-info}/top_level.txt +0 -0
magic_pdf/libs/draw_bbox.py
CHANGED
@@ -1,6 +1,6 @@
|
|
1
|
-
from magic_pdf.libs.Constants import CROSS_PAGE
|
2
1
|
from magic_pdf.libs.commons import fitz # PyMuPDF
|
3
|
-
from magic_pdf.libs.
|
2
|
+
from magic_pdf.libs.Constants import CROSS_PAGE
|
3
|
+
from magic_pdf.libs.ocr_content_type import BlockType, CategoryId, ContentType
|
4
4
|
from magic_pdf.model.magic_model import MagicModel
|
5
5
|
|
6
6
|
|
@@ -65,11 +65,13 @@ def draw_bbox_with_number(i, bbox_list, page, rgb_config, fill_config):
|
|
65
65
|
) # Insert the index in the top left corner of the rectangle
|
66
66
|
|
67
67
|
|
68
|
-
def draw_layout_bbox(pdf_info, pdf_bytes, out_path):
|
68
|
+
def draw_layout_bbox(pdf_info, pdf_bytes, out_path, filename):
|
69
69
|
layout_bbox_list = []
|
70
70
|
dropped_bbox_list = []
|
71
|
-
tables_list, tables_body_list
|
71
|
+
tables_list, tables_body_list = [], []
|
72
|
+
tables_caption_list, tables_footnote_list = [], []
|
72
73
|
imgs_list, imgs_body_list, imgs_caption_list = [], [], []
|
74
|
+
imgs_footnote_list = []
|
73
75
|
titles_list = []
|
74
76
|
texts_list = []
|
75
77
|
interequations_list = []
|
@@ -77,41 +79,43 @@ def draw_layout_bbox(pdf_info, pdf_bytes, out_path):
|
|
77
79
|
page_layout_list = []
|
78
80
|
page_dropped_list = []
|
79
81
|
tables, tables_body, tables_caption, tables_footnote = [], [], [], []
|
80
|
-
imgs, imgs_body, imgs_caption = [], [], []
|
82
|
+
imgs, imgs_body, imgs_caption, imgs_footnote = [], [], [], []
|
81
83
|
titles = []
|
82
84
|
texts = []
|
83
85
|
interequations = []
|
84
|
-
for layout in page[
|
85
|
-
page_layout_list.append(layout[
|
86
|
+
for layout in page['layout_bboxes']:
|
87
|
+
page_layout_list.append(layout['layout_bbox'])
|
86
88
|
layout_bbox_list.append(page_layout_list)
|
87
|
-
for dropped_bbox in page[
|
88
|
-
page_dropped_list.append(dropped_bbox[
|
89
|
+
for dropped_bbox in page['discarded_blocks']:
|
90
|
+
page_dropped_list.append(dropped_bbox['bbox'])
|
89
91
|
dropped_bbox_list.append(page_dropped_list)
|
90
|
-
for block in page[
|
91
|
-
bbox = block[
|
92
|
-
if block[
|
92
|
+
for block in page['para_blocks']:
|
93
|
+
bbox = block['bbox']
|
94
|
+
if block['type'] == BlockType.Table:
|
93
95
|
tables.append(bbox)
|
94
|
-
for nested_block in block[
|
95
|
-
bbox = nested_block[
|
96
|
-
if nested_block[
|
96
|
+
for nested_block in block['blocks']:
|
97
|
+
bbox = nested_block['bbox']
|
98
|
+
if nested_block['type'] == BlockType.TableBody:
|
97
99
|
tables_body.append(bbox)
|
98
|
-
elif nested_block[
|
100
|
+
elif nested_block['type'] == BlockType.TableCaption:
|
99
101
|
tables_caption.append(bbox)
|
100
|
-
elif nested_block[
|
102
|
+
elif nested_block['type'] == BlockType.TableFootnote:
|
101
103
|
tables_footnote.append(bbox)
|
102
|
-
elif block[
|
104
|
+
elif block['type'] == BlockType.Image:
|
103
105
|
imgs.append(bbox)
|
104
|
-
for nested_block in block[
|
105
|
-
bbox = nested_block[
|
106
|
-
if nested_block[
|
106
|
+
for nested_block in block['blocks']:
|
107
|
+
bbox = nested_block['bbox']
|
108
|
+
if nested_block['type'] == BlockType.ImageBody:
|
107
109
|
imgs_body.append(bbox)
|
108
|
-
elif nested_block[
|
110
|
+
elif nested_block['type'] == BlockType.ImageCaption:
|
109
111
|
imgs_caption.append(bbox)
|
110
|
-
|
112
|
+
elif nested_block['type'] == BlockType.ImageFootnote:
|
113
|
+
imgs_footnote.append(bbox)
|
114
|
+
elif block['type'] == BlockType.Title:
|
111
115
|
titles.append(bbox)
|
112
|
-
elif block[
|
116
|
+
elif block['type'] == BlockType.Text:
|
113
117
|
texts.append(bbox)
|
114
|
-
elif block[
|
118
|
+
elif block['type'] == BlockType.InterlineEquation:
|
115
119
|
interequations.append(bbox)
|
116
120
|
tables_list.append(tables)
|
117
121
|
tables_body_list.append(tables_body)
|
@@ -120,30 +124,40 @@ def draw_layout_bbox(pdf_info, pdf_bytes, out_path):
|
|
120
124
|
imgs_list.append(imgs)
|
121
125
|
imgs_body_list.append(imgs_body)
|
122
126
|
imgs_caption_list.append(imgs_caption)
|
127
|
+
imgs_footnote_list.append(imgs_footnote)
|
123
128
|
titles_list.append(titles)
|
124
129
|
texts_list.append(texts)
|
125
130
|
interequations_list.append(interequations)
|
126
131
|
|
127
|
-
pdf_docs = fitz.open(
|
132
|
+
pdf_docs = fitz.open('pdf', pdf_bytes)
|
128
133
|
for i, page in enumerate(pdf_docs):
|
129
134
|
draw_bbox_with_number(i, layout_bbox_list, page, [255, 0, 0], False)
|
130
|
-
draw_bbox_without_number(i, dropped_bbox_list, page, [158, 158, 158],
|
131
|
-
|
132
|
-
draw_bbox_without_number(i,
|
133
|
-
|
134
|
-
draw_bbox_without_number(i,
|
135
|
+
draw_bbox_without_number(i, dropped_bbox_list, page, [158, 158, 158],
|
136
|
+
True)
|
137
|
+
draw_bbox_without_number(i, tables_list, page, [153, 153, 0],
|
138
|
+
True) # color !
|
139
|
+
draw_bbox_without_number(i, tables_body_list, page, [204, 204, 0],
|
140
|
+
True)
|
141
|
+
draw_bbox_without_number(i, tables_caption_list, page, [255, 255, 102],
|
142
|
+
True)
|
143
|
+
draw_bbox_without_number(i, tables_footnote_list, page,
|
144
|
+
[229, 255, 204], True)
|
135
145
|
draw_bbox_without_number(i, imgs_list, page, [51, 102, 0], True)
|
136
146
|
draw_bbox_without_number(i, imgs_body_list, page, [153, 255, 51], True)
|
137
|
-
draw_bbox_without_number(i, imgs_caption_list, page, [102, 178, 255],
|
147
|
+
draw_bbox_without_number(i, imgs_caption_list, page, [102, 178, 255],
|
148
|
+
True)
|
149
|
+
draw_bbox_with_number(i, imgs_footnote_list, page, [255, 178, 102],
|
150
|
+
True),
|
138
151
|
draw_bbox_without_number(i, titles_list, page, [102, 102, 255], True)
|
139
152
|
draw_bbox_without_number(i, texts_list, page, [153, 0, 76], True)
|
140
|
-
draw_bbox_without_number(i, interequations_list, page, [0, 255, 0],
|
153
|
+
draw_bbox_without_number(i, interequations_list, page, [0, 255, 0],
|
154
|
+
True)
|
141
155
|
|
142
156
|
# Save the PDF
|
143
|
-
pdf_docs.save(f
|
157
|
+
pdf_docs.save(f'{out_path}/{filename}_layout.pdf')
|
144
158
|
|
145
159
|
|
146
|
-
def draw_span_bbox(pdf_info, pdf_bytes, out_path):
|
160
|
+
def draw_span_bbox(pdf_info, pdf_bytes, out_path, filename):
|
147
161
|
text_list = []
|
148
162
|
inline_equation_list = []
|
149
163
|
interline_equation_list = []
|
@@ -154,22 +168,22 @@ def draw_span_bbox(pdf_info, pdf_bytes, out_path):
|
|
154
168
|
next_page_inline_equation_list = []
|
155
169
|
|
156
170
|
def get_span_info(span):
|
157
|
-
if span[
|
171
|
+
if span['type'] == ContentType.Text:
|
158
172
|
if span.get(CROSS_PAGE, False):
|
159
|
-
next_page_text_list.append(span[
|
173
|
+
next_page_text_list.append(span['bbox'])
|
160
174
|
else:
|
161
|
-
page_text_list.append(span[
|
162
|
-
elif span[
|
175
|
+
page_text_list.append(span['bbox'])
|
176
|
+
elif span['type'] == ContentType.InlineEquation:
|
163
177
|
if span.get(CROSS_PAGE, False):
|
164
|
-
next_page_inline_equation_list.append(span[
|
178
|
+
next_page_inline_equation_list.append(span['bbox'])
|
165
179
|
else:
|
166
|
-
page_inline_equation_list.append(span[
|
167
|
-
elif span[
|
168
|
-
page_interline_equation_list.append(span[
|
169
|
-
elif span[
|
170
|
-
page_image_list.append(span[
|
171
|
-
elif span[
|
172
|
-
page_table_list.append(span[
|
180
|
+
page_inline_equation_list.append(span['bbox'])
|
181
|
+
elif span['type'] == ContentType.InterlineEquation:
|
182
|
+
page_interline_equation_list.append(span['bbox'])
|
183
|
+
elif span['type'] == ContentType.Image:
|
184
|
+
page_image_list.append(span['bbox'])
|
185
|
+
elif span['type'] == ContentType.Table:
|
186
|
+
page_table_list.append(span['bbox'])
|
173
187
|
|
174
188
|
for page in pdf_info:
|
175
189
|
page_text_list = []
|
@@ -188,84 +202,89 @@ def draw_span_bbox(pdf_info, pdf_bytes, out_path):
|
|
188
202
|
next_page_inline_equation_list.clear()
|
189
203
|
|
190
204
|
# 构造dropped_list
|
191
|
-
for block in page[
|
192
|
-
if block[
|
193
|
-
for line in block[
|
194
|
-
for span in line[
|
195
|
-
page_dropped_list.append(span[
|
205
|
+
for block in page['discarded_blocks']:
|
206
|
+
if block['type'] == BlockType.Discarded:
|
207
|
+
for line in block['lines']:
|
208
|
+
for span in line['spans']:
|
209
|
+
page_dropped_list.append(span['bbox'])
|
196
210
|
dropped_list.append(page_dropped_list)
|
197
211
|
# 构造其余useful_list
|
198
|
-
for block in page[
|
199
|
-
if block[
|
200
|
-
|
201
|
-
|
202
|
-
|
212
|
+
for block in page['para_blocks']:
|
213
|
+
if block['type'] in [
|
214
|
+
BlockType.Text,
|
215
|
+
BlockType.Title,
|
216
|
+
BlockType.InterlineEquation,
|
203
217
|
]:
|
204
|
-
for line in block[
|
205
|
-
for span in line[
|
218
|
+
for line in block['lines']:
|
219
|
+
for span in line['spans']:
|
206
220
|
get_span_info(span)
|
207
|
-
elif block[
|
208
|
-
for sub_block in block[
|
209
|
-
for line in sub_block[
|
210
|
-
for span in line[
|
221
|
+
elif block['type'] in [BlockType.Image, BlockType.Table]:
|
222
|
+
for sub_block in block['blocks']:
|
223
|
+
for line in sub_block['lines']:
|
224
|
+
for span in line['spans']:
|
211
225
|
get_span_info(span)
|
212
226
|
text_list.append(page_text_list)
|
213
227
|
inline_equation_list.append(page_inline_equation_list)
|
214
228
|
interline_equation_list.append(page_interline_equation_list)
|
215
229
|
image_list.append(page_image_list)
|
216
230
|
table_list.append(page_table_list)
|
217
|
-
pdf_docs = fitz.open(
|
231
|
+
pdf_docs = fitz.open('pdf', pdf_bytes)
|
218
232
|
for i, page in enumerate(pdf_docs):
|
219
233
|
# 获取当前页面的数据
|
220
234
|
draw_bbox_without_number(i, text_list, page, [255, 0, 0], False)
|
221
|
-
draw_bbox_without_number(i, inline_equation_list, page, [0, 255, 0],
|
222
|
-
|
235
|
+
draw_bbox_without_number(i, inline_equation_list, page, [0, 255, 0],
|
236
|
+
False)
|
237
|
+
draw_bbox_without_number(i, interline_equation_list, page, [0, 0, 255],
|
238
|
+
False)
|
223
239
|
draw_bbox_without_number(i, image_list, page, [255, 204, 0], False)
|
224
240
|
draw_bbox_without_number(i, table_list, page, [204, 0, 255], False)
|
225
241
|
draw_bbox_without_number(i, dropped_list, page, [158, 158, 158], False)
|
226
242
|
|
227
243
|
# Save the PDF
|
228
|
-
pdf_docs.save(f
|
244
|
+
pdf_docs.save(f'{out_path}/{filename}_spans.pdf')
|
229
245
|
|
230
246
|
|
231
|
-
def drow_model_bbox(model_list: list, pdf_bytes, out_path):
|
247
|
+
def drow_model_bbox(model_list: list, pdf_bytes, out_path, filename):
|
232
248
|
dropped_bbox_list = []
|
233
249
|
tables_body_list, tables_caption_list, tables_footnote_list = [], [], []
|
234
|
-
imgs_body_list, imgs_caption_list = [], []
|
250
|
+
imgs_body_list, imgs_caption_list, imgs_footnote_list = [], [], []
|
235
251
|
titles_list = []
|
236
252
|
texts_list = []
|
237
253
|
interequations_list = []
|
238
|
-
pdf_docs = fitz.open(
|
254
|
+
pdf_docs = fitz.open('pdf', pdf_bytes)
|
239
255
|
magic_model = MagicModel(model_list, pdf_docs)
|
240
256
|
for i in range(len(model_list)):
|
241
257
|
page_dropped_list = []
|
242
258
|
tables_body, tables_caption, tables_footnote = [], [], []
|
243
|
-
imgs_body, imgs_caption = [], []
|
259
|
+
imgs_body, imgs_caption, imgs_footnote = [], [], []
|
244
260
|
titles = []
|
245
261
|
texts = []
|
246
262
|
interequations = []
|
247
263
|
page_info = magic_model.get_model_list(i)
|
248
|
-
layout_dets = page_info[
|
264
|
+
layout_dets = page_info['layout_dets']
|
249
265
|
for layout_det in layout_dets:
|
250
|
-
bbox = layout_det[
|
251
|
-
if layout_det[
|
266
|
+
bbox = layout_det['bbox']
|
267
|
+
if layout_det['category_id'] == CategoryId.Text:
|
252
268
|
texts.append(bbox)
|
253
|
-
elif layout_det[
|
269
|
+
elif layout_det['category_id'] == CategoryId.Title:
|
254
270
|
titles.append(bbox)
|
255
|
-
elif layout_det[
|
271
|
+
elif layout_det['category_id'] == CategoryId.TableBody:
|
256
272
|
tables_body.append(bbox)
|
257
|
-
elif layout_det[
|
273
|
+
elif layout_det['category_id'] == CategoryId.TableCaption:
|
258
274
|
tables_caption.append(bbox)
|
259
|
-
elif layout_det[
|
275
|
+
elif layout_det['category_id'] == CategoryId.TableFootnote:
|
260
276
|
tables_footnote.append(bbox)
|
261
|
-
elif layout_det[
|
277
|
+
elif layout_det['category_id'] == CategoryId.ImageBody:
|
262
278
|
imgs_body.append(bbox)
|
263
|
-
elif layout_det[
|
279
|
+
elif layout_det['category_id'] == CategoryId.ImageCaption:
|
264
280
|
imgs_caption.append(bbox)
|
265
|
-
elif layout_det[
|
281
|
+
elif layout_det[
|
282
|
+
'category_id'] == CategoryId.InterlineEquation_YOLO:
|
266
283
|
interequations.append(bbox)
|
267
|
-
elif layout_det[
|
284
|
+
elif layout_det['category_id'] == CategoryId.Abandon:
|
268
285
|
page_dropped_list.append(bbox)
|
286
|
+
elif layout_det['category_id'] == CategoryId.ImageFootnote:
|
287
|
+
imgs_footnote.append(bbox)
|
269
288
|
|
270
289
|
tables_body_list.append(tables_body)
|
271
290
|
tables_caption_list.append(tables_caption)
|
@@ -276,17 +295,24 @@ def drow_model_bbox(model_list: list, pdf_bytes, out_path):
|
|
276
295
|
texts_list.append(texts)
|
277
296
|
interequations_list.append(interequations)
|
278
297
|
dropped_bbox_list.append(page_dropped_list)
|
298
|
+
imgs_footnote_list.append(imgs_footnote)
|
279
299
|
|
280
300
|
for i, page in enumerate(pdf_docs):
|
281
|
-
draw_bbox_with_number(i, dropped_bbox_list, page, [158, 158, 158],
|
301
|
+
draw_bbox_with_number(i, dropped_bbox_list, page, [158, 158, 158],
|
302
|
+
True) # color !
|
282
303
|
draw_bbox_with_number(i, tables_body_list, page, [204, 204, 0], True)
|
283
|
-
draw_bbox_with_number(i, tables_caption_list, page, [255, 255, 102],
|
284
|
-
|
304
|
+
draw_bbox_with_number(i, tables_caption_list, page, [255, 255, 102],
|
305
|
+
True)
|
306
|
+
draw_bbox_with_number(i, tables_footnote_list, page, [229, 255, 204],
|
307
|
+
True)
|
285
308
|
draw_bbox_with_number(i, imgs_body_list, page, [153, 255, 51], True)
|
286
|
-
draw_bbox_with_number(i, imgs_caption_list, page, [102, 178, 255],
|
309
|
+
draw_bbox_with_number(i, imgs_caption_list, page, [102, 178, 255],
|
310
|
+
True)
|
311
|
+
draw_bbox_with_number(i, imgs_footnote_list, page, [255, 178, 102],
|
312
|
+
True)
|
287
313
|
draw_bbox_with_number(i, titles_list, page, [102, 102, 255], True)
|
288
314
|
draw_bbox_with_number(i, texts_list, page, [153, 0, 76], True)
|
289
315
|
draw_bbox_with_number(i, interequations_list, page, [0, 255, 0], True)
|
290
316
|
|
291
317
|
# Save the PDF
|
292
|
-
pdf_docs.save(f
|
318
|
+
pdf_docs.save(f'{out_path}/{filename}_model.pdf')
|
@@ -1,23 +1,25 @@
|
|
1
1
|
class ContentType:
|
2
|
-
Image =
|
3
|
-
Table =
|
4
|
-
Text =
|
5
|
-
InlineEquation =
|
6
|
-
InterlineEquation =
|
7
|
-
|
2
|
+
Image = 'image'
|
3
|
+
Table = 'table'
|
4
|
+
Text = 'text'
|
5
|
+
InlineEquation = 'inline_equation'
|
6
|
+
InterlineEquation = 'interline_equation'
|
7
|
+
|
8
|
+
|
8
9
|
class BlockType:
|
9
|
-
Image =
|
10
|
-
ImageBody =
|
11
|
-
ImageCaption =
|
12
|
-
|
13
|
-
|
14
|
-
|
15
|
-
|
16
|
-
|
17
|
-
|
18
|
-
|
19
|
-
|
20
|
-
|
10
|
+
Image = 'image'
|
11
|
+
ImageBody = 'image_body'
|
12
|
+
ImageCaption = 'image_caption'
|
13
|
+
ImageFootnote = 'image_footnote'
|
14
|
+
Table = 'table'
|
15
|
+
TableBody = 'table_body'
|
16
|
+
TableCaption = 'table_caption'
|
17
|
+
TableFootnote = 'table_footnote'
|
18
|
+
Text = 'text'
|
19
|
+
Title = 'title'
|
20
|
+
InterlineEquation = 'interline_equation'
|
21
|
+
Footnote = 'footnote'
|
22
|
+
Discarded = 'discarded'
|
21
23
|
|
22
24
|
|
23
25
|
class CategoryId:
|
@@ -33,3 +35,4 @@ class CategoryId:
|
|
33
35
|
InlineEquation = 13
|
34
36
|
InterlineEquation_YOLO = 14
|
35
37
|
OcrText = 15
|
38
|
+
ImageFootnote = 101
|
magic_pdf/libs/version.py
CHANGED
@@ -1 +1 @@
|
|
1
|
-
__version__ = "0.
|
1
|
+
__version__ = "0.8.1"
|
@@ -103,20 +103,32 @@ def custom_model_init(ocr: bool = False, show_log: bool = False):
|
|
103
103
|
return custom_model
|
104
104
|
|
105
105
|
|
106
|
-
def doc_analyze(pdf_bytes: bytes, ocr: bool = False, show_log: bool = False
|
106
|
+
def doc_analyze(pdf_bytes: bytes, ocr: bool = False, show_log: bool = False,
|
107
|
+
start_page_id=0, end_page_id=None):
|
107
108
|
|
108
109
|
model_manager = ModelSingleton()
|
109
110
|
custom_model = model_manager.get_model(ocr, show_log)
|
110
111
|
|
111
112
|
images = load_images_from_pdf(pdf_bytes)
|
112
113
|
|
114
|
+
# end_page_id = end_page_id if end_page_id else len(images) - 1
|
115
|
+
end_page_id = end_page_id if end_page_id is not None and end_page_id >= 0 else len(images) - 1
|
116
|
+
|
117
|
+
if end_page_id > len(images) - 1:
|
118
|
+
logger.warning("end_page_id is out of range, use images length")
|
119
|
+
end_page_id = len(images) - 1
|
120
|
+
|
113
121
|
model_json = []
|
114
122
|
doc_analyze_start = time.time()
|
123
|
+
|
115
124
|
for index, img_dict in enumerate(images):
|
116
125
|
img = img_dict["img"]
|
117
126
|
page_width = img_dict["width"]
|
118
127
|
page_height = img_dict["height"]
|
119
|
-
|
128
|
+
if start_page_id <= index <= end_page_id:
|
129
|
+
result = custom_model(img)
|
130
|
+
else:
|
131
|
+
result = []
|
120
132
|
page_info = {"page_no": index, "height": page_height, "width": page_width}
|
121
133
|
page_dict = {"layout_dets": result, "page_info": page_info}
|
122
134
|
model_json.append(page_dict)
|