docling-ibm-models 2.0.7__py3-none-any.whl → 2.0.8__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -96,10 +96,10 @@ class MatchingPostProcessor:
96
96
  if cell["cell_class"] <= 1:
97
97
  allow_class = False
98
98
  else:
99
- print("***")
100
- print("no cell_class in...")
101
- print(cell)
102
- print("***")
99
+ self._log().debug("***")
100
+ self._log().debug("no cell_class in...")
101
+ self._log().debug(cell)
102
+ self._log().debug("***")
103
103
  if allow_class:
104
104
  match_list = matches[pdf_cell_id]
105
105
  for match in match_list:
@@ -264,7 +264,7 @@ class CellMatcher:
264
264
  r, o = otsl.html_to_otsl(table_html_structure, None, False, False, True, False)
265
265
  if not r:
266
266
  ermsg = "ERR#: COULD NOT CONVERT TO RS THIS TABLE TO COMPUTE SPANS"
267
- print(ermsg)
267
+ self._log().debug(ermsg)
268
268
  else:
269
269
  otsl_spans = o["otsl_spans"]
270
270
 
@@ -30,6 +30,8 @@ from docling_ibm_models.tableformer.utils.app_profiler import AggProfiler
30
30
  # LOG_LEVEL = logging.DEBUG
31
31
  LOG_LEVEL = logging.WARN
32
32
 
33
+ logger = s.get_custom_logger(__name__, LOG_LEVEL)
34
+
33
35
 
34
36
  class bcolors:
35
37
  HEADER = "\033[95m"
@@ -53,17 +55,17 @@ def otsl_sqr_chk(rs_list, logdebug):
53
55
 
54
56
  totcelnum = rs_list.count("fcel") + rs_list.count("ecel")
55
57
  if logdebug:
56
- print("Total number of cells = {}".format(totcelnum))
58
+ logger.debug("Total number of cells = {}".format(totcelnum))
57
59
 
58
60
  for ind, ln in enumerate(rs_list_split):
59
61
  ln.append("nl")
60
62
  if logdebug:
61
- print("{}".format(ln))
63
+ logger.debug("{}".format(ln))
62
64
  if len(ln) != init_tag_len:
63
65
  isSquare = False
64
66
  if isSquare:
65
67
  if logdebug:
66
- print(
68
+ logger.debug(
67
69
  "{}*OK* Table is square! *OK*{}".format(
68
70
  bcolors.OKGREEN, bcolors.ENDC
69
71
  )
@@ -71,8 +73,8 @@ def otsl_sqr_chk(rs_list, logdebug):
71
73
  else:
72
74
  if logdebug:
73
75
  err_name = "{}***** ERR ******{}"
74
- print(err_name.format(bcolors.FAIL, bcolors.ENDC))
75
- print(
76
+ logger.debug(err_name.format(bcolors.FAIL, bcolors.ENDC))
77
+ logger.debug(
76
78
  "{}*ERR* Table is not square! *ERR*{}".format(
77
79
  bcolors.FAIL, bcolors.ENDC
78
80
  )
@@ -49,15 +49,15 @@ def otsl_sqr_chk(rs_list, name, logdebug):
49
49
  isSquare = False
50
50
  if isSquare:
51
51
  if logdebug:
52
- print(
52
+ logger.debug(
53
53
  "{}*OK* Table is square! *OK*{}".format(
54
54
  bcolors.OKGREEN, bcolors.ENDC
55
55
  )
56
56
  )
57
57
  else:
58
58
  err_name = "{}*ERR* " + name + " *ERR*{}"
59
- print(err_name.format(bcolors.FAIL, bcolors.ENDC))
60
- print(
59
+ logger.debug(err_name.format(bcolors.FAIL, bcolors.ENDC))
60
+ logger.debug(
61
61
  "{}*ERR* Table is not square! *ERR*{}".format(
62
62
  bcolors.FAIL, bcolors.ENDC
63
63
  )
@@ -89,9 +89,9 @@ def otsl_tags_cells_sync_chk(rs_list, cells, name, logdebug):
89
89
  countCellTags += 1
90
90
  if countCellTags != len(cells):
91
91
  err_name = "{}*!ERR* " + name + " *ERR!*{}"
92
- print(err_name.format(bcolors.FAIL, bcolors.ENDC))
92
+ logger.debug(err_name.format(bcolors.FAIL, bcolors.ENDC))
93
93
  err_msg = "{}*!ERR* Tags are not in sync with cells! *ERR!*{}"
94
- print(err_msg.format(bcolors.FAIL, bcolors.ENDC))
94
+ logger.debug(err_msg.format(bcolors.FAIL, bcolors.ENDC))
95
95
  isGood = False
96
96
  return isGood
97
97
 
@@ -131,11 +131,13 @@ def otsl_to_html(rs_list, logdebug):
131
131
  return rs_list
132
132
  html_table = []
133
133
  if logdebug:
134
- print("{}*Reconstructing HTML...*{}".format(bcolors.WARNING, bcolors.ENDC))
134
+ logger.debug(
135
+ "{}*Reconstructing HTML...*{}".format(bcolors.WARNING, bcolors.ENDC)
136
+ )
135
137
 
136
138
  if not otsl_sqr_chk(rs_list, "---", logdebug):
137
139
  # PAD TABLE TO SQUARE
138
- print("{}*Padding to square...*{}".format(bcolors.WARNING, bcolors.ENDC))
140
+ logger.debug("{}*Padding to square...*{}".format(bcolors.WARNING, bcolors.ENDC))
139
141
  rs_list = otsl_pad_to_sqr(rs_list, "lcel")
140
142
 
141
143
  # 2D structure, line by line:
@@ -144,7 +146,7 @@ def otsl_to_html(rs_list, logdebug):
144
146
  ]
145
147
 
146
148
  if logdebug:
147
- print("")
149
+ logger.debug("")
148
150
 
149
151
  # Sequentially store indexes of 2D spans that were registered to avoid re-registering them
150
152
  registry_2d_span = []
@@ -182,9 +184,9 @@ def otsl_to_html(rs_list, logdebug):
182
184
  span = True
183
185
  # Check if it has vertical span:
184
186
  if rs_row_ind + 1 < len(rs_list_split):
185
- # print(">>>")
186
- # print(rs_list_split[rs_row_ind + 1])
187
- # print(">>> rs_cell_ind = {}".format(rs_cell_ind))
187
+ # logger.debug(">>>")
188
+ # logger.debug(rs_list_split[rs_row_ind + 1])
189
+ # logger.debug(">>> rs_cell_ind = {}".format(rs_cell_ind))
188
190
  if rs_list_split[rs_row_ind + 1][rs_cell_ind] == "ucel":
189
191
  ddist = otsl_check_down(rs_list_split, rs_cell_ind, rs_row_ind)
190
192
  span = True
@@ -198,12 +200,12 @@ def otsl_to_html(rs_list, logdebug):
198
200
  span = True
199
201
  # Check if this 2D span was already registered,
200
202
  # If not - register, if yes - cancel span
201
- # print("rs_cell_ind: {}, xrdist:{}".format(rs_cell_ind, xrdist))
202
- # print("rs_row_ind: {}, xddist:{}".format(rs_cell_ind, xrdist))
203
+ # logger.debug("rs_cell_ind: {}, xrdist:{}".format(rs_cell_ind, xrdist))
204
+ # logger.debug("rs_row_ind: {}, xddist:{}".format(rs_cell_ind, xrdist))
203
205
  for x in range(rs_cell_ind, xrdist + rs_cell_ind):
204
206
  for y in range(rs_row_ind, xddist + rs_row_ind):
205
207
  reg2dind = str(x) + "_" + str(y)
206
- # print(reg2dind)
208
+ # logger.debug(reg2dind)
207
209
  if reg2dind in registry_2d_span:
208
210
  # Cell of the span is already in, cancel current span
209
211
  span = False
@@ -232,9 +234,13 @@ def otsl_to_html(rs_list, logdebug):
232
234
  html_table.extend(html_list)
233
235
 
234
236
  if logdebug:
235
- print("*********************** registry_2d_span ***************************")
236
- print(registry_2d_span)
237
- print("********************************************************************")
237
+ logger.debug(
238
+ "*********************** registry_2d_span ***************************"
239
+ )
240
+ logger.debug(registry_2d_span)
241
+ logger.debug(
242
+ "********************************************************************"
243
+ )
238
244
 
239
245
  return html_table
240
246
 
@@ -316,20 +322,24 @@ def html_to_otsl(table, writer, logdebug, extra_debug, include_html, use_writer)
316
322
  current_line_expands = []
317
323
 
318
324
  if logdebug:
319
- print("")
320
- print("*** {}: {} ***".format(table["split"], table["filename"]))
325
+ logger.debug("")
326
+ logger.debug("*** {}: {} ***".format(table["split"], table["filename"]))
321
327
 
322
328
  colnum = 0
323
329
 
324
330
  if extra_debug:
325
- print("========================== Input HTML ============================")
326
- print(table_html_structure["tokens"])
327
- print("==================================================================")
331
+ logger.debug(
332
+ "========================== Input HTML ============================"
333
+ )
334
+ logger.debug(table_html_structure["tokens"])
335
+ logger.debug(
336
+ "=================================================================="
337
+ )
328
338
 
329
339
  if logdebug:
330
- print("********")
331
- print("* OTSL *")
332
- print("********")
340
+ logger.debug("********")
341
+ logger.debug("* OTSL *")
342
+ logger.debug("********")
333
343
 
334
344
  for i in range(len(table_html_structure["tokens"])):
335
345
  html_tag = table_html_structure["tokens"][i]
@@ -377,7 +387,7 @@ def html_to_otsl(table, writer, logdebug, extra_debug, include_html, use_writer)
377
387
  extra_columns = pre_line_len - cur_line_len - 1
378
388
  if extra_columns > 0:
379
389
  if extra_debug:
380
- print(
390
+ logger.debug(
381
391
  "Extra columns needed in row: {}".format(
382
392
  extra_columns
383
393
  )
@@ -534,11 +544,11 @@ def html_to_otsl(table, writer, logdebug, extra_debug, include_html, use_writer)
534
544
  writer.write(out_line)
535
545
 
536
546
  if logdebug:
537
- print("{}Reconstructed HTML:{}".format(bcolors.OKGREEN, bcolors.ENDC))
538
- print(rHTML)
547
+ logger.debug("{}Reconstructed HTML:{}".format(bcolors.OKGREEN, bcolors.ENDC))
548
+ logger.debug(rHTML)
539
549
  # original HTML
540
550
  oHTML = out_line["html"]["html_structure"]
541
- print("{}Original HTML:{}".format(bcolors.OKBLUE, bcolors.ENDC))
542
- print(oHTML)
551
+ logger.debug("{}Original HTML:{}".format(bcolors.OKBLUE, bcolors.ENDC))
552
+ logger.debug(oHTML)
543
553
 
544
554
  return True, out_line
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: docling-ibm-models
3
- Version: 2.0.7
3
+ Version: 2.0.8
4
4
  Summary: This package contains the AI models used by the Docling PDF conversion package
5
5
  License: MIT
6
6
  Keywords: docling,convert,document,pdf,layout model,segmentation,table structure,table former
@@ -18,6 +18,7 @@ Classifier: Programming Language :: Python :: 3.9
18
18
  Classifier: Programming Language :: Python :: 3.10
19
19
  Classifier: Programming Language :: Python :: 3.11
20
20
  Classifier: Programming Language :: Python :: 3.12
21
+ Classifier: Programming Language :: Python :: 3.13
21
22
  Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
22
23
  Requires-Dist: Pillow (>=10.0.0,<11.0.0)
23
24
  Requires-Dist: huggingface_hub (>=0.23,<1)
@@ -3,9 +3,9 @@ docling_ibm_models/tableformer/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5
3
3
  docling_ibm_models/tableformer/common.py,sha256=2zgGZBFf4fXytEaXrZR2NU6FWdX2kxO0DHlGZmuvpNQ,3230
4
4
  docling_ibm_models/tableformer/data_management/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
5
5
  docling_ibm_models/tableformer/data_management/functional.py,sha256=kJntHEXFz2SP7obEcHyjAqZNZC9qh-U75MwUJALLADI,3143
6
- docling_ibm_models/tableformer/data_management/matching_post_processor.py,sha256=41GLMlkMAY1pkc-elP3ktFgZLCHjscghaHfgIVn2168,57998
7
- docling_ibm_models/tableformer/data_management/tf_cell_matcher.py,sha256=GaBW5px3xX9JaHVASZArKiQ-qfrzX0oj-E_6P3-OvuU,21238
8
- docling_ibm_models/tableformer/data_management/tf_predictor.py,sha256=J_AjIGnpT0SkSV12comBlDa8Ga86WnsyJvKkIok4ohs,38834
6
+ docling_ibm_models/tableformer/data_management/matching_post_processor.py,sha256=meSM0jLWNLS8P95QjN6pEp095jFEbKdl9KKfRY1ocy0,58046
7
+ docling_ibm_models/tableformer/data_management/tf_cell_matcher.py,sha256=IdZTaWIRhPpyEwzZgCmviZnYacR6kbcUqBvx7ilmkKY,21250
8
+ docling_ibm_models/tableformer/data_management/tf_predictor.py,sha256=rRGcuyF_Kwika_P-mNrQvkOgDceTMvwgsekhHi4aafo,38920
9
9
  docling_ibm_models/tableformer/data_management/transforms.py,sha256=NNaz_7GI7FCVmu_rJuenqH5VfzRSljJHUHpNQQ8Mq3Q,2983
10
10
  docling_ibm_models/tableformer/models/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
11
11
  docling_ibm_models/tableformer/models/common/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
@@ -15,14 +15,14 @@ docling_ibm_models/tableformer/models/table04_rs/bbox_decoder_rs.py,sha256=JV9rF
15
15
  docling_ibm_models/tableformer/models/table04_rs/encoder04_rs.py,sha256=iExmqJ0Pn0lJU3nWb_x8abTn42GctMqE55_YA2ppgvc,1975
16
16
  docling_ibm_models/tableformer/models/table04_rs/tablemodel04_rs.py,sha256=Mv17JGgO12hIt8jrnflWLgOimdFYkBLuV0rxaGawBpk,12266
17
17
  docling_ibm_models/tableformer/models/table04_rs/transformer_rs.py,sha256=nhnYFlXT5KyJMdB4qMo5r8GimWXVy0lcqcmoHPEl-KE,6416
18
- docling_ibm_models/tableformer/otsl.py,sha256=oE_s2QHTE74jXD0vsXCuya_woReabUOBg6npprEqt58,21069
18
+ docling_ibm_models/tableformer/otsl.py,sha256=QxAODv6D0SkWK0pYp_RkZZbqMmcC-jwngxKUYGuCH5E,21389
19
19
  docling_ibm_models/tableformer/settings.py,sha256=UlpsP0cpJZR2Uk48lgysYy0om3fr8Xt3z1xzvlTw5j4,3067
20
20
  docling_ibm_models/tableformer/utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
21
21
  docling_ibm_models/tableformer/utils/app_profiler.py,sha256=Pb7o1zcikKXh7ninaNt4_nVa1xuUrogZxbTr6U6jkEE,8392
22
22
  docling_ibm_models/tableformer/utils/mem_monitor.py,sha256=ycZ07fUBVVKKLTVGF54jGPDM2aTkKuZWk1kMbOS0wwQ,6353
23
23
  docling_ibm_models/tableformer/utils/torch_utils.py,sha256=uN0rK9mSXy1ewBnBnILrWebJhhVU4N-XJZBqNiLJwlQ,8893
24
24
  docling_ibm_models/tableformer/utils/utils.py,sha256=8Bxf1rEn977lFbY9NX0r5xh9PvxIRipQZX_EZW92XfA,10980
25
- docling_ibm_models-2.0.7.dist-info/LICENSE,sha256=mBb7ErEcM8VS9OhiGHnQ2kk75HwPhr54W1Oiz3965MY,1088
26
- docling_ibm_models-2.0.7.dist-info/METADATA,sha256=0bsjQO9MThzSoalrxicKNwvx8D3YXYC4wqAHBJYeqLQ,6879
27
- docling_ibm_models-2.0.7.dist-info/WHEEL,sha256=sP946D7jFCHeNz5Iq4fL4Lu-PrWrFsgfLXbbkciIZwg,88
28
- docling_ibm_models-2.0.7.dist-info/RECORD,,
25
+ docling_ibm_models-2.0.8.dist-info/LICENSE,sha256=mBb7ErEcM8VS9OhiGHnQ2kk75HwPhr54W1Oiz3965MY,1088
26
+ docling_ibm_models-2.0.8.dist-info/METADATA,sha256=-hV4IdslFbo69zhRSQvslUFR-AwxTXiaW0BtA_oaiKI,6930
27
+ docling_ibm_models-2.0.8.dist-info/WHEEL,sha256=Nq82e9rUAnEjt98J6MlVmMCZb-t9cYE2Ir1kpBmnWfs,88
28
+ docling_ibm_models-2.0.8.dist-info/RECORD,,
@@ -1,4 +1,4 @@
1
1
  Wheel-Version: 1.0
2
- Generator: poetry-core 1.9.0
2
+ Generator: poetry-core 1.9.1
3
3
  Root-Is-Purelib: true
4
4
  Tag: py3-none-any