docling-core 2.10.0__tar.gz → 2.12.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of docling-core might be problematic. Click here for more details.

Files changed (62) hide show
  1. {docling_core-2.10.0 → docling_core-2.12.0}/PKG-INFO +1 -1
  2. {docling_core-2.10.0 → docling_core-2.12.0}/docling_core/cli/view.py +1 -1
  3. {docling_core-2.10.0 → docling_core-2.12.0}/docling_core/types/doc/document.py +9 -6
  4. {docling_core-2.10.0 → docling_core-2.12.0}/docling_core/types/doc/labels.py +3 -0
  5. {docling_core-2.10.0 → docling_core-2.12.0}/docling_core/utils/validate.py +1 -1
  6. {docling_core-2.10.0 → docling_core-2.12.0}/pyproject.toml +1 -1
  7. {docling_core-2.10.0 → docling_core-2.12.0}/LICENSE +0 -0
  8. {docling_core-2.10.0 → docling_core-2.12.0}/README.md +0 -0
  9. {docling_core-2.10.0 → docling_core-2.12.0}/docling_core/__init__.py +0 -0
  10. {docling_core-2.10.0 → docling_core-2.12.0}/docling_core/cli/__init__.py +0 -0
  11. {docling_core-2.10.0 → docling_core-2.12.0}/docling_core/py.typed +0 -0
  12. {docling_core-2.10.0 → docling_core-2.12.0}/docling_core/resources/schemas/doc/ANN.json +0 -0
  13. {docling_core-2.10.0 → docling_core-2.12.0}/docling_core/resources/schemas/doc/DOC.json +0 -0
  14. {docling_core-2.10.0 → docling_core-2.12.0}/docling_core/resources/schemas/doc/OCR-output.json +0 -0
  15. {docling_core-2.10.0 → docling_core-2.12.0}/docling_core/resources/schemas/doc/RAW.json +0 -0
  16. {docling_core-2.10.0 → docling_core-2.12.0}/docling_core/resources/schemas/generated/ccs_document_schema.json +0 -0
  17. {docling_core-2.10.0 → docling_core-2.12.0}/docling_core/resources/schemas/generated/minimal_document_schema_flat.json +0 -0
  18. {docling_core-2.10.0 → docling_core-2.12.0}/docling_core/resources/schemas/search/search_doc_mapping.json +0 -0
  19. {docling_core-2.10.0 → docling_core-2.12.0}/docling_core/resources/schemas/search/search_doc_mapping_v2.json +0 -0
  20. {docling_core-2.10.0 → docling_core-2.12.0}/docling_core/search/__init__.py +0 -0
  21. {docling_core-2.10.0 → docling_core-2.12.0}/docling_core/search/json_schema_to_search_mapper.py +0 -0
  22. {docling_core-2.10.0 → docling_core-2.12.0}/docling_core/search/mapping.py +0 -0
  23. {docling_core-2.10.0 → docling_core-2.12.0}/docling_core/search/meta.py +0 -0
  24. {docling_core-2.10.0 → docling_core-2.12.0}/docling_core/search/package.py +0 -0
  25. {docling_core-2.10.0 → docling_core-2.12.0}/docling_core/transforms/__init__.py +0 -0
  26. {docling_core-2.10.0 → docling_core-2.12.0}/docling_core/transforms/chunker/__init__.py +0 -0
  27. {docling_core-2.10.0 → docling_core-2.12.0}/docling_core/transforms/chunker/base.py +0 -0
  28. {docling_core-2.10.0 → docling_core-2.12.0}/docling_core/transforms/chunker/hierarchical_chunker.py +0 -0
  29. {docling_core-2.10.0 → docling_core-2.12.0}/docling_core/transforms/chunker/hybrid_chunker.py +0 -0
  30. {docling_core-2.10.0 → docling_core-2.12.0}/docling_core/types/__init__.py +0 -0
  31. {docling_core-2.10.0 → docling_core-2.12.0}/docling_core/types/base.py +0 -0
  32. {docling_core-2.10.0 → docling_core-2.12.0}/docling_core/types/doc/__init__.py +0 -0
  33. {docling_core-2.10.0 → docling_core-2.12.0}/docling_core/types/doc/base.py +0 -0
  34. {docling_core-2.10.0 → docling_core-2.12.0}/docling_core/types/doc/tokens.py +0 -0
  35. {docling_core-2.10.0 → docling_core-2.12.0}/docling_core/types/doc/utils.py +0 -0
  36. {docling_core-2.10.0 → docling_core-2.12.0}/docling_core/types/gen/__init__.py +0 -0
  37. {docling_core-2.10.0 → docling_core-2.12.0}/docling_core/types/gen/generic.py +0 -0
  38. {docling_core-2.10.0 → docling_core-2.12.0}/docling_core/types/io/__init__.py +0 -0
  39. {docling_core-2.10.0 → docling_core-2.12.0}/docling_core/types/legacy_doc/__init__.py +0 -0
  40. {docling_core-2.10.0 → docling_core-2.12.0}/docling_core/types/legacy_doc/base.py +0 -0
  41. {docling_core-2.10.0 → docling_core-2.12.0}/docling_core/types/legacy_doc/doc_ann.py +0 -0
  42. {docling_core-2.10.0 → docling_core-2.12.0}/docling_core/types/legacy_doc/doc_ocr.py +0 -0
  43. {docling_core-2.10.0 → docling_core-2.12.0}/docling_core/types/legacy_doc/doc_raw.py +0 -0
  44. {docling_core-2.10.0 → docling_core-2.12.0}/docling_core/types/legacy_doc/document.py +0 -0
  45. {docling_core-2.10.0 → docling_core-2.12.0}/docling_core/types/legacy_doc/tokens.py +0 -0
  46. {docling_core-2.10.0 → docling_core-2.12.0}/docling_core/types/nlp/__init__.py +0 -0
  47. {docling_core-2.10.0 → docling_core-2.12.0}/docling_core/types/nlp/qa.py +0 -0
  48. {docling_core-2.10.0 → docling_core-2.12.0}/docling_core/types/nlp/qa_labels.py +0 -0
  49. {docling_core-2.10.0 → docling_core-2.12.0}/docling_core/types/rec/__init__.py +0 -0
  50. {docling_core-2.10.0 → docling_core-2.12.0}/docling_core/types/rec/attribute.py +0 -0
  51. {docling_core-2.10.0 → docling_core-2.12.0}/docling_core/types/rec/base.py +0 -0
  52. {docling_core-2.10.0 → docling_core-2.12.0}/docling_core/types/rec/predicate.py +0 -0
  53. {docling_core-2.10.0 → docling_core-2.12.0}/docling_core/types/rec/record.py +0 -0
  54. {docling_core-2.10.0 → docling_core-2.12.0}/docling_core/types/rec/statement.py +0 -0
  55. {docling_core-2.10.0 → docling_core-2.12.0}/docling_core/types/rec/subject.py +0 -0
  56. {docling_core-2.10.0 → docling_core-2.12.0}/docling_core/utils/__init__.py +0 -0
  57. {docling_core-2.10.0 → docling_core-2.12.0}/docling_core/utils/alias.py +0 -0
  58. {docling_core-2.10.0 → docling_core-2.12.0}/docling_core/utils/file.py +0 -0
  59. {docling_core-2.10.0 → docling_core-2.12.0}/docling_core/utils/generate_docs.py +0 -0
  60. {docling_core-2.10.0 → docling_core-2.12.0}/docling_core/utils/generate_jsonschema.py +0 -0
  61. {docling_core-2.10.0 → docling_core-2.12.0}/docling_core/utils/legacy.py +0 -0
  62. {docling_core-2.10.0 → docling_core-2.12.0}/docling_core/utils/validators.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: docling-core
3
- Version: 2.10.0
3
+ Version: 2.12.0
4
4
  Summary: A python library to define and validate data types in Docling.
5
5
  Home-page: https://ds4sd.github.io/
6
6
  License: MIT
@@ -57,7 +57,7 @@ def view(
57
57
  doc = DoclingDocument.load_from_json(filename=path)
58
58
  target_path = Path(tempfile.mkdtemp()) / "out.html"
59
59
  html_output = doc.export_to_html(image_mode=ImageRefMode.EMBEDDED)
60
- with open(target_path, "w") as f:
60
+ with open(target_path, "w", encoding="utf-8") as f:
61
61
  f.write(html_output)
62
62
  webbrowser.open(url=f"file://{target_path.absolute().resolve()}")
63
63
 
@@ -1884,7 +1884,7 @@ class DoclingDocument(BaseModel):
1884
1884
  )
1885
1885
 
1886
1886
  out = new_doc.export_to_dict()
1887
- with open(filename, "w") as fw:
1887
+ with open(filename, "w", encoding="utf-8") as fw:
1888
1888
  json.dump(out, fw, indent=indent)
1889
1889
 
1890
1890
  @classmethod
@@ -1898,7 +1898,7 @@ class DoclingDocument(BaseModel):
1898
1898
  :rtype: DoclingDocument
1899
1899
 
1900
1900
  """
1901
- with open(filename, "r") as f:
1901
+ with open(filename, "r", encoding="utf-8") as f:
1902
1902
  return cls.model_validate_json(f.read())
1903
1903
 
1904
1904
  def save_as_yaml(
@@ -1919,7 +1919,7 @@ class DoclingDocument(BaseModel):
1919
1919
  )
1920
1920
 
1921
1921
  out = new_doc.export_to_dict()
1922
- with open(filename, "w") as fw:
1922
+ with open(filename, "w", encoding="utf-8") as fw:
1923
1923
  yaml.dump(out, fw, default_flow_style=default_flow_style)
1924
1924
 
1925
1925
  def export_to_dict(
@@ -1971,7 +1971,7 @@ class DoclingDocument(BaseModel):
1971
1971
  page_no=page_no,
1972
1972
  )
1973
1973
 
1974
- with open(filename, "w") as fw:
1974
+ with open(filename, "w", encoding="utf-8") as fw:
1975
1975
  fw.write(md_out)
1976
1976
 
1977
1977
  def export_to_markdown( # noqa: C901
@@ -2038,6 +2038,9 @@ class DoclingDocument(BaseModel):
2038
2038
  if ix < from_element or to_element <= ix:
2039
2039
  continue # skip as many items as you want
2040
2040
 
2041
+ if (isinstance(item, DocItem)) and (item.label not in labels):
2042
+ continue # skip any label that is not whitelisted
2043
+
2041
2044
  # Handle newlines between different types of content
2042
2045
  if (
2043
2046
  len(mdtexts) > 0
@@ -2224,7 +2227,7 @@ class DoclingDocument(BaseModel):
2224
2227
  html_head=html_head,
2225
2228
  )
2226
2229
 
2227
- with open(filename, "w") as fw:
2230
+ with open(filename, "w", encoding="utf-8") as fw:
2228
2231
  fw.write(html_out)
2229
2232
 
2230
2233
  def _get_output_paths(
@@ -2462,7 +2465,7 @@ class DoclingDocument(BaseModel):
2462
2465
  with_groups=with_groups,
2463
2466
  )
2464
2467
 
2465
- with open(filename, "w") as fw:
2468
+ with open(filename, "w", encoding="utf-8") as fw:
2466
2469
  fw.write(out)
2467
2470
 
2468
2471
  def export_to_document_tokens(
@@ -46,6 +46,9 @@ class GroupLabel(str, Enum):
46
46
  SECTION = "section"
47
47
  SHEET = "sheet"
48
48
  SLIDE = "slide"
49
+ FORM_AREA = "form_area"
50
+ KEY_VALUE_AREA = "key_value_area"
51
+ COMMENT_SECTION = "comment_section"
49
52
 
50
53
  def __str__(self):
51
54
  """Get string value."""
@@ -38,7 +38,7 @@ def run():
38
38
  """Run the validation of a file containing a Document."""
39
39
  file_format, input_file = parse_arguments()
40
40
 
41
- with open(input_file, "r") as fd:
41
+ with open(input_file, "r", encoding="utf-8") as fd:
42
42
  file_ = json.load(fd)
43
43
 
44
44
  result = (False, "Empty result")
@@ -1,6 +1,6 @@
1
1
  [tool.poetry]
2
2
  name = "docling-core"
3
- version = "2.10.0"
3
+ version = "2.12.0"
4
4
  description = "A python library to define and validate data types in Docling."
5
5
  license = "MIT"
6
6
  authors = [
File without changes
File without changes