magic-pdf 1.3.6__py3-none-any.whl → 1.3.8__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
magic_pdf/libs/version.py CHANGED
@@ -1 +1 @@
1
- __version__ = "1.3.6"
1
+ __version__ = "1.3.8"
@@ -161,20 +161,13 @@ class BatchAnalyze:
161
161
  for table_res_dict in tqdm(table_res_list_all_page, desc="Table Predict"):
162
162
  _lang = table_res_dict['lang']
163
163
  atom_model_manager = AtomModelSingleton()
164
- ocr_engine = atom_model_manager.get_atom_model(
165
- atom_model_name='ocr',
166
- ocr_show_log=False,
167
- det_db_box_thresh=0.5,
168
- det_db_unclip_ratio=1.6,
169
- lang=_lang
170
- )
171
164
  table_model = atom_model_manager.get_atom_model(
172
165
  atom_model_name='table',
173
166
  table_model_name='rapid_table',
174
167
  table_model_path='',
175
168
  table_max_time=400,
176
169
  device='cpu',
177
- ocr_engine=ocr_engine,
170
+ lang=_lang,
178
171
  table_sub_model_name='slanet_plus'
179
172
  )
180
173
  html_code, table_cell_bboxes, logic_points, elapse = table_model.predict(table_res_dict['table_img'])
@@ -53,6 +53,12 @@ class PytorchPaddleOCR(TextSystem):
53
53
  args = parser.parse_args(args)
54
54
 
55
55
  self.lang = kwargs.get('lang', 'ch')
56
+
57
+ device = get_device()
58
+ if device == 'cpu' and self.lang in ['ch', 'ch_server']:
59
+ logger.warning("The current device in use is CPU. To ensure the speed of parsing, the language is automatically switched to ch_lite.")
60
+ self.lang = 'ch_lite'
61
+
56
62
  if self.lang in latin_lang:
57
63
  self.lang = 'latin'
58
64
  elif self.lang in arabic_lang:
@@ -74,7 +80,7 @@ class PytorchPaddleOCR(TextSystem):
74
80
  kwargs['rec_char_dict_path'] = os.path.join(root_dir, 'pytorchocr', 'utils', 'resources', 'dict', dict_file)
75
81
  # kwargs['rec_batch_num'] = 8
76
82
 
77
- kwargs['device'] = get_device()
83
+ kwargs['device'] = device
78
84
 
79
85
  default_args = vars(args)
80
86
  default_args.update(kwargs)
@@ -171,6 +171,31 @@ ch_PP-OCRv4_rec_server_infer:
171
171
  nrtr_dim: 384
172
172
  max_text_length: 25
173
173
 
174
+ ch_PP-OCRv4_rec_server_doc_infer:
175
+ model_type: rec
176
+ algorithm: SVTR_HGNet
177
+ Transform:
178
+ Backbone:
179
+ name: PPHGNet_small
180
+ Head:
181
+ name: MultiHead
182
+ out_channels_list:
183
+ CTCLabelDecode: 15631
184
+ head_list:
185
+ - CTCHead:
186
+ Neck:
187
+ name: svtr
188
+ dims: 120
189
+ depth: 2
190
+ hidden_dims: 120
191
+ kernel_size: [ 1, 3 ]
192
+ use_guide: True
193
+ Head:
194
+ fc_decay: 0.00001
195
+ - NRTRHead:
196
+ nrtr_dim: 384
197
+ max_text_length: 25
198
+
174
199
  chinese_cht_PP-OCRv3_rec_infer:
175
200
  model_type: rec
176
201
  algorithm: SVTR