docling 1.1.0__py3-none-any.whl → 1.1.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -125,7 +125,7 @@ class ConvertedDocument(BaseModel):
125
125
  desc = DsDocumentDescription(logs=[])
126
126
 
127
127
  page_hashes = [
128
- PageReference(hash=p.page_hash, page=p.page_no, model="default")
128
+ PageReference(hash=p.page_hash, page=p.page_no + 1, model="default")
129
129
  for p in self.pages
130
130
  ]
131
131
 
@@ -159,7 +159,7 @@ class ConvertedDocument(BaseModel):
159
159
  prov=[
160
160
  Prov(
161
161
  bbox=target_bbox,
162
- page=element.page_no,
162
+ page=element.page_no + 1,
163
163
  span=[0, len(element.text)],
164
164
  )
165
165
  ],
@@ -242,7 +242,7 @@ class ConvertedDocument(BaseModel):
242
242
  prov=[
243
243
  Prov(
244
244
  bbox=target_bbox,
245
- page=element.page_no,
245
+ page=element.page_no + 1,
246
246
  span=[0, 0],
247
247
  )
248
248
  ],
@@ -264,7 +264,7 @@ class ConvertedDocument(BaseModel):
264
264
  prov=[
265
265
  Prov(
266
266
  bbox=target_bbox,
267
- page=element.page_no,
267
+ page=element.page_no + 1,
268
268
  span=[0, 0],
269
269
  )
270
270
  ],
@@ -274,7 +274,7 @@ class ConvertedDocument(BaseModel):
274
274
  )
275
275
 
276
276
  page_dimensions = [
277
- PageDimensions(page=p.page_no, height=p.size.height, width=p.size.width)
277
+ PageDimensions(page=p.page_no + 1, height=p.size.height, width=p.size.width)
278
278
  for p in self.pages
279
279
  ]
280
280
 
@@ -114,12 +114,15 @@ class TableStructureModel:
114
114
  for element in table_out["tf_responses"]:
115
115
 
116
116
  if not self.do_cell_matching:
117
- the_bbox = BoundingBox.model_validate(element["bbox"])
117
+ the_bbox = BoundingBox.model_validate(
118
+ element["bbox"]
119
+ ).scaled(1 / self.scale)
118
120
  text_piece = page._backend.get_text_in_rect(the_bbox)
119
121
  element["bbox"]["token"] = text_piece
120
122
 
121
123
  tc = TableCell.model_validate(element)
122
- tc.bbox = tc.bbox.scaled(1 / self.scale)
124
+ if self.do_cell_matching:
125
+ tc.bbox = tc.bbox.scaled(1 / self.scale)
123
126
  table_cells.append(tc)
124
127
 
125
128
  # Retrieving cols/rows, after post processing:
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: docling
3
- Version: 1.1.0
3
+ Version: 1.1.2
4
4
  Summary: Docling PDF conversion package
5
5
  Home-page: https://github.com/DS4SD/docling
6
6
  License: MIT
@@ -22,7 +22,7 @@ Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
22
22
  Provides-Extra: easyocr
23
23
  Provides-Extra: ocr
24
24
  Requires-Dist: deepsearch-glm (>=0.19.0,<1)
25
- Requires-Dist: docling-core (>=1.1.0,<2.0.0)
25
+ Requires-Dist: docling-core (>=1.1.2,<2.0.0)
26
26
  Requires-Dist: docling-ibm-models (>=1.1.0,<2.0.0)
27
27
  Requires-Dist: easyocr (>=1.7,<2.0) ; extra == "easyocr" or extra == "ocr"
28
28
  Requires-Dist: filetype (>=1.2.0,<2.0.0)
@@ -92,7 +92,7 @@ print(doc.export_to_markdown()) # output: "## DocLayNet: A Large Human-Annotate
92
92
 
93
93
  ### Convert a batch of documents
94
94
 
95
- For an example of converting multiple documents, see [convert.py](https://github.com/DS4SD/docling/blob/main/examples/convert.py).
95
+ For an example of batch-converting documents, see [convert.py](https://github.com/DS4SD/docling/blob/main/examples/convert.py).
96
96
 
97
97
  From a local repo clone, you can run it with:
98
98
 
@@ -4,7 +4,7 @@ docling/backend/abstract_backend.py,sha256=dINr8oTax9Fq31Y1AR0CGWNZtAHN5aqB_M7TA
4
4
  docling/backend/pypdfium2_backend.py,sha256=cIQGFkwzceN57PzmACt06CytRo0A_t-im6rW804RC3M,7421
5
5
  docling/datamodel/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
6
6
  docling/datamodel/base_models.py,sha256=k7gLFPnq3ArEMAFz6qUcp5qemlYzVhOmR9qtBTkAiX4,6862
7
- docling/datamodel/document.py,sha256=7caefzaii6itMQgtXfA4SJhB1TAF32v1c8zRwbiU03s,12497
7
+ docling/datamodel/document.py,sha256=FG_ntDFRBWj-MhV52D0sC8XaZOwN3yryyXahsVHGnyI,12517
8
8
  docling/datamodel/settings.py,sha256=t5g6wrEJnPa9gBzMMl8ppgBRUYz-8xgopEtfMS0ZH28,733
9
9
  docling/document_converter.py,sha256=I9vjTLCLahsMrcs9ozM3C5r_CtBN-9qHk7-ANma7fkc,9895
10
10
  docling/models/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
@@ -12,14 +12,14 @@ docling/models/ds_glm_model.py,sha256=wmb--2JKFQby-kvidw6PyM8wURPXYPQ_Z_eKKCBAdY
12
12
  docling/models/easyocr_model.py,sha256=NaHVs8IN0eW9KB076E2Kae1s-bq74_4IMWueze9QqtE,2290
13
13
  docling/models/layout_model.py,sha256=4AfPFiu6pXc8wIQ1sQlEZnHRt7SnBmfzDdctiRveOWw,10944
14
14
  docling/models/page_assemble_model.py,sha256=8eoG2WiFxPxq9TPvM-wkngb2gkr0tdtCRVXg1JcTETo,5550
15
- docling/models/table_structure_model.py,sha256=ryZrmkNkCbw5SCpgdQabkmcRAEi_4VqOMv2VGdpvGZo,5499
15
+ docling/models/table_structure_model.py,sha256=xUmfunZNYC30P0fRdESdztqy1FVlMzlhJjLBp-xcn4A,5638
16
16
  docling/pipeline/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
17
17
  docling/pipeline/base_model_pipeline.py,sha256=ozHdJak0yQAxQf7pQN_C480vI35A2e5KL5Qq1xSkq5c,560
18
18
  docling/pipeline/standard_model_pipeline.py,sha256=UTwodKUKrisLoVcntbNUBDhjzRyFvpdUvyVw-gNmBlM,1541
19
19
  docling/utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
20
20
  docling/utils/layout_utils.py,sha256=FOFbL0hKzUoWXdZaeUvEtFqKv0IkPifIr4sdGW4suKs,31804
21
21
  docling/utils/utils.py,sha256=llhXSbIDNZ1MHOwBEfLHBAoJIAYI7QlPIonlI1jLUJ0,1208
22
- docling-1.1.0.dist-info/LICENSE,sha256=ACwmltkrXIz5VsEQcrqljq-fat6ZXAMepjXGoe40KtE,1069
23
- docling-1.1.0.dist-info/METADATA,sha256=mUAryQOsHRejcJ3Qb4zFvRVWpcKX0e4aycnJM_OE0o0,6759
24
- docling-1.1.0.dist-info/WHEEL,sha256=sP946D7jFCHeNz5Iq4fL4Lu-PrWrFsgfLXbbkciIZwg,88
25
- docling-1.1.0.dist-info/RECORD,,
22
+ docling-1.1.2.dist-info/LICENSE,sha256=ACwmltkrXIz5VsEQcrqljq-fat6ZXAMepjXGoe40KtE,1069
23
+ docling-1.1.2.dist-info/METADATA,sha256=3uSlwJCchlMWLELW4Sr3L6apbAPt4sOZem3T7NlglU8,6756
24
+ docling-1.1.2.dist-info/WHEEL,sha256=sP946D7jFCHeNz5Iq4fL4Lu-PrWrFsgfLXbbkciIZwg,88
25
+ docling-1.1.2.dist-info/RECORD,,