docling-core 1.1.1__tar.gz → 1.1.3__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of docling-core might be problematic. Click here for more details.

Files changed (45) hide show
  1. {docling_core-1.1.1 → docling_core-1.1.3}/PKG-INFO +1 -1
  2. {docling_core-1.1.1 → docling_core-1.1.3}/docling_core/types/doc/base.py +14 -23
  3. {docling_core-1.1.1 → docling_core-1.1.3}/docling_core/types/doc/document.py +18 -4
  4. {docling_core-1.1.1 → docling_core-1.1.3}/pyproject.toml +1 -1
  5. {docling_core-1.1.1 → docling_core-1.1.3}/LICENSE +0 -0
  6. {docling_core-1.1.1 → docling_core-1.1.3}/README.md +0 -0
  7. {docling_core-1.1.1 → docling_core-1.1.3}/docling_core/__init__.py +0 -0
  8. {docling_core-1.1.1 → docling_core-1.1.3}/docling_core/py.typed +0 -0
  9. {docling_core-1.1.1 → docling_core-1.1.3}/docling_core/resources/schemas/doc/ANN.json +0 -0
  10. {docling_core-1.1.1 → docling_core-1.1.3}/docling_core/resources/schemas/doc/DOC.json +0 -0
  11. {docling_core-1.1.1 → docling_core-1.1.3}/docling_core/resources/schemas/doc/OCR-output.json +0 -0
  12. {docling_core-1.1.1 → docling_core-1.1.3}/docling_core/resources/schemas/doc/RAW.json +0 -0
  13. {docling_core-1.1.1 → docling_core-1.1.3}/docling_core/resources/schemas/generated/ccs_document_schema.json +0 -0
  14. {docling_core-1.1.1 → docling_core-1.1.3}/docling_core/resources/schemas/generated/minimal_document_schema_flat.json +0 -0
  15. {docling_core-1.1.1 → docling_core-1.1.3}/docling_core/resources/schemas/search/search_doc_mapping.json +0 -0
  16. {docling_core-1.1.1 → docling_core-1.1.3}/docling_core/resources/schemas/search/search_doc_mapping_v2.json +0 -0
  17. {docling_core-1.1.1 → docling_core-1.1.3}/docling_core/search/__init__.py +0 -0
  18. {docling_core-1.1.1 → docling_core-1.1.3}/docling_core/search/json_schema_to_search_mapper.py +0 -0
  19. {docling_core-1.1.1 → docling_core-1.1.3}/docling_core/search/mapping.py +0 -0
  20. {docling_core-1.1.1 → docling_core-1.1.3}/docling_core/search/meta.py +0 -0
  21. {docling_core-1.1.1 → docling_core-1.1.3}/docling_core/search/package.py +0 -0
  22. {docling_core-1.1.1 → docling_core-1.1.3}/docling_core/types/__init__.py +0 -0
  23. {docling_core-1.1.1 → docling_core-1.1.3}/docling_core/types/base.py +0 -0
  24. {docling_core-1.1.1 → docling_core-1.1.3}/docling_core/types/doc/__init__.py +0 -0
  25. {docling_core-1.1.1 → docling_core-1.1.3}/docling_core/types/doc/doc_ann.py +0 -0
  26. {docling_core-1.1.1 → docling_core-1.1.3}/docling_core/types/doc/doc_ocr.py +0 -0
  27. {docling_core-1.1.1 → docling_core-1.1.3}/docling_core/types/doc/doc_raw.py +0 -0
  28. {docling_core-1.1.1 → docling_core-1.1.3}/docling_core/types/gen/__init__.py +0 -0
  29. {docling_core-1.1.1 → docling_core-1.1.3}/docling_core/types/gen/generic.py +0 -0
  30. {docling_core-1.1.1 → docling_core-1.1.3}/docling_core/types/nlp/__init__.py +0 -0
  31. {docling_core-1.1.1 → docling_core-1.1.3}/docling_core/types/nlp/qa.py +0 -0
  32. {docling_core-1.1.1 → docling_core-1.1.3}/docling_core/types/nlp/qa_labels.py +0 -0
  33. {docling_core-1.1.1 → docling_core-1.1.3}/docling_core/types/rec/__init__.py +0 -0
  34. {docling_core-1.1.1 → docling_core-1.1.3}/docling_core/types/rec/attribute.py +0 -0
  35. {docling_core-1.1.1 → docling_core-1.1.3}/docling_core/types/rec/base.py +0 -0
  36. {docling_core-1.1.1 → docling_core-1.1.3}/docling_core/types/rec/predicate.py +0 -0
  37. {docling_core-1.1.1 → docling_core-1.1.3}/docling_core/types/rec/record.py +0 -0
  38. {docling_core-1.1.1 → docling_core-1.1.3}/docling_core/types/rec/statement.py +0 -0
  39. {docling_core-1.1.1 → docling_core-1.1.3}/docling_core/types/rec/subject.py +0 -0
  40. {docling_core-1.1.1 → docling_core-1.1.3}/docling_core/utils/__init__.py +0 -0
  41. {docling_core-1.1.1 → docling_core-1.1.3}/docling_core/utils/alias.py +0 -0
  42. {docling_core-1.1.1 → docling_core-1.1.3}/docling_core/utils/ds_generate_docs.py +0 -0
  43. {docling_core-1.1.1 → docling_core-1.1.3}/docling_core/utils/ds_generate_jsonschema.py +0 -0
  44. {docling_core-1.1.1 → docling_core-1.1.3}/docling_core/utils/validate.py +0 -0
  45. {docling_core-1.1.1 → docling_core-1.1.3}/docling_core/utils/validators.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: docling-core
3
- Version: 1.1.1
3
+ Version: 1.1.3
4
4
  Summary: A python library to define and validate data types in Docling.
5
5
  Home-page: https://ds4sd.github.io/
6
6
  License: MIT
@@ -6,7 +6,7 @@
6
6
  """Define common models across CCS objects."""
7
7
  from typing import Annotated, Literal, Optional, Union
8
8
 
9
- from pydantic import BaseModel, Field, StrictStr
9
+ from pydantic import BaseModel, Field, PositiveInt, StrictStr
10
10
 
11
11
  from docling_core.search.mapping import es_field
12
12
  from docling_core.utils.alias import AliasModel
@@ -39,7 +39,7 @@ class S3Resource(BaseModel):
39
39
 
40
40
  mime: str
41
41
  path: str
42
- page: Optional[int] = None
42
+ page: Optional[PositiveInt] = None
43
43
 
44
44
 
45
45
  class S3Data(AliasModel):
@@ -68,7 +68,7 @@ class Prov(AliasModel):
68
68
  """Provenance."""
69
69
 
70
70
  bbox: BoundingBox
71
- page: int
71
+ page: PositiveInt
72
72
  span: Span
73
73
  ref_s3_data: Optional[StrictStr] = Field(
74
74
  default=None, alias="__ref_s3_data", json_schema_extra=es_field(suppress=True)
@@ -96,7 +96,7 @@ class PageDimensions(BaseModel):
96
96
  """Page dimensions."""
97
97
 
98
98
  height: float
99
- page: int
99
+ page: PositiveInt
100
100
  width: float
101
101
 
102
102
 
@@ -128,37 +128,28 @@ class GlmTableCell(TableCell):
128
128
  )
129
129
 
130
130
 
131
- class Table(AliasModel):
132
- """Table."""
131
+ class BaseCell(AliasModel):
132
+ """Base cell."""
133
133
 
134
- num_cols: int = Field(alias="#-cols")
135
- num_rows: int = Field(alias="#-rows")
136
134
  bounding_box: Optional[BoundingBoxContainer] = Field(
137
135
  default=None, alias="bounding-box", json_schema_extra=es_field(suppress=True)
138
136
  )
139
- data: Optional[list[list[Union[GlmTableCell, TableCell]]]] = None
140
- model: Optional[str] = None
141
137
  prov: Optional[list[Prov]] = None
142
138
  text: Optional[str] = Field(
143
139
  default=None, json_schema_extra=es_field(term_vector="with_positions_offsets")
144
140
  )
145
141
  obj_type: str = Field(
146
- alias="type",
147
- json_schema_extra=es_field(type="keyword", ignore_above=8191),
142
+ alias="type", json_schema_extra=es_field(type="keyword", ignore_above=8191)
148
143
  )
149
144
 
150
145
 
151
- class BaseCell(AliasModel):
152
- """Base cell."""
146
+ class Table(BaseCell):
147
+ """Table."""
153
148
 
154
- bounding_box: Optional[BoundingBoxContainer] = Field(
155
- default=None, alias="bounding-box", json_schema_extra=es_field(suppress=True)
156
- )
157
- prov: Optional[list[Prov]] = None
158
- text: Optional[str] = None
159
- obj_type: str = Field(
160
- alias="type", json_schema_extra=es_field(type="keyword", ignore_above=8191)
161
- )
149
+ num_cols: int = Field(alias="#-cols")
150
+ num_rows: int = Field(alias="#-rows")
151
+ data: Optional[list[list[Union[GlmTableCell, TableCell]]]] = None
152
+ model: Optional[str] = None
162
153
 
163
154
 
164
155
  class BaseText(AliasModel):
@@ -196,4 +187,4 @@ class PageReference(BaseModel):
196
187
 
197
188
  hash: str = Field(json_schema_extra=es_field(type="keyword", ignore_above=8191))
198
189
  model: str = Field(json_schema_extra=es_field(suppress=True))
199
- page: int = Field(json_schema_extra=es_field(type="short"))
190
+ page: PositiveInt = Field(json_schema_extra=es_field(type="short"))
@@ -393,14 +393,28 @@ class ExportedCCSDocument(
393
393
 
394
394
  return data
395
395
 
396
- def _resolve_ref(self, item: Ref) -> Optional[Table]:
397
- """Return the resolved reference in case of table reference, otherwise None."""
398
- result: Optional[Table] = None
396
+ def _resolve_ref(self, item: Ref) -> Optional[Union[BaseCell, BaseText]]:
397
+ """Return the resolved reference.
399
398
 
400
- # NOTE: currently only resolves table refs & makes assumptions on ref parts
399
+ Resolved the Ref object within the document.
400
+ If the object is not found, None is returned.
401
+ """
402
+ result: Optional[Union[BaseCell, BaseText]] = None
403
+
404
+ # NOTE: currently only resolves refs explicitely, such that we can make
405
+ # assumptions on ref parts
401
406
  if item.obj_type == "table" and self.tables:
402
407
  parts = item.ref.split("/")
403
408
  result = self.tables[int(parts[2])]
409
+ elif item.obj_type == "figure" and self.figures:
410
+ parts = item.ref.split("/")
411
+ result = self.figures[int(parts[2])]
412
+ elif item.obj_type == "equation" and self.equations:
413
+ parts = item.ref.split("/")
414
+ result = self.equations[int(parts[2])]
415
+ elif item.obj_type == "footnote" and self.footnotes:
416
+ parts = item.ref.split("/")
417
+ result = self.footnotes[int(parts[2])]
404
418
 
405
419
  return result
406
420
 
@@ -1,6 +1,6 @@
1
1
  [tool.poetry]
2
2
  name = "docling-core"
3
- version = "1.1.1"
3
+ version = "1.1.3"
4
4
  description = "A python library to define and validate data types in Docling."
5
5
  license = "MIT"
6
6
  authors = [
File without changes
File without changes