docling-core 2.31.1__tar.gz → 2.31.2__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of docling-core might be problematic. Click here for more details.

Files changed (79) hide show
  1. {docling_core-2.31.1 → docling_core-2.31.2}/PKG-INFO +1 -1
  2. {docling_core-2.31.1 → docling_core-2.31.2}/docling_core/transforms/chunker/hybrid_chunker.py +22 -7
  3. {docling_core-2.31.1 → docling_core-2.31.2}/pyproject.toml +1 -1
  4. {docling_core-2.31.1 → docling_core-2.31.2}/LICENSE +0 -0
  5. {docling_core-2.31.1 → docling_core-2.31.2}/README.md +0 -0
  6. {docling_core-2.31.1 → docling_core-2.31.2}/docling_core/__init__.py +0 -0
  7. {docling_core-2.31.1 → docling_core-2.31.2}/docling_core/cli/__init__.py +0 -0
  8. {docling_core-2.31.1 → docling_core-2.31.2}/docling_core/cli/view.py +0 -0
  9. {docling_core-2.31.1 → docling_core-2.31.2}/docling_core/experimental/__init__.py +0 -0
  10. {docling_core-2.31.1 → docling_core-2.31.2}/docling_core/py.typed +0 -0
  11. {docling_core-2.31.1 → docling_core-2.31.2}/docling_core/resources/schemas/doc/ANN.json +0 -0
  12. {docling_core-2.31.1 → docling_core-2.31.2}/docling_core/resources/schemas/doc/DOC.json +0 -0
  13. {docling_core-2.31.1 → docling_core-2.31.2}/docling_core/resources/schemas/doc/OCR-output.json +0 -0
  14. {docling_core-2.31.1 → docling_core-2.31.2}/docling_core/resources/schemas/doc/RAW.json +0 -0
  15. {docling_core-2.31.1 → docling_core-2.31.2}/docling_core/resources/schemas/generated/ccs_document_schema.json +0 -0
  16. {docling_core-2.31.1 → docling_core-2.31.2}/docling_core/resources/schemas/generated/minimal_document_schema_flat.json +0 -0
  17. {docling_core-2.31.1 → docling_core-2.31.2}/docling_core/resources/schemas/search/search_doc_mapping.json +0 -0
  18. {docling_core-2.31.1 → docling_core-2.31.2}/docling_core/resources/schemas/search/search_doc_mapping_v2.json +0 -0
  19. {docling_core-2.31.1 → docling_core-2.31.2}/docling_core/search/__init__.py +0 -0
  20. {docling_core-2.31.1 → docling_core-2.31.2}/docling_core/search/json_schema_to_search_mapper.py +0 -0
  21. {docling_core-2.31.1 → docling_core-2.31.2}/docling_core/search/mapping.py +0 -0
  22. {docling_core-2.31.1 → docling_core-2.31.2}/docling_core/search/meta.py +0 -0
  23. {docling_core-2.31.1 → docling_core-2.31.2}/docling_core/search/package.py +0 -0
  24. {docling_core-2.31.1 → docling_core-2.31.2}/docling_core/transforms/__init__.py +0 -0
  25. {docling_core-2.31.1 → docling_core-2.31.2}/docling_core/transforms/chunker/__init__.py +0 -0
  26. {docling_core-2.31.1 → docling_core-2.31.2}/docling_core/transforms/chunker/base.py +0 -0
  27. {docling_core-2.31.1 → docling_core-2.31.2}/docling_core/transforms/chunker/hierarchical_chunker.py +0 -0
  28. {docling_core-2.31.1 → docling_core-2.31.2}/docling_core/transforms/chunker/tokenizer/__init__.py +0 -0
  29. {docling_core-2.31.1 → docling_core-2.31.2}/docling_core/transforms/chunker/tokenizer/base.py +0 -0
  30. {docling_core-2.31.1 → docling_core-2.31.2}/docling_core/transforms/chunker/tokenizer/huggingface.py +0 -0
  31. {docling_core-2.31.1 → docling_core-2.31.2}/docling_core/transforms/chunker/tokenizer/openai.py +0 -0
  32. {docling_core-2.31.1 → docling_core-2.31.2}/docling_core/transforms/serializer/__init__.py +0 -0
  33. {docling_core-2.31.1 → docling_core-2.31.2}/docling_core/transforms/serializer/base.py +0 -0
  34. {docling_core-2.31.1 → docling_core-2.31.2}/docling_core/transforms/serializer/common.py +0 -0
  35. {docling_core-2.31.1 → docling_core-2.31.2}/docling_core/transforms/serializer/doctags.py +0 -0
  36. {docling_core-2.31.1 → docling_core-2.31.2}/docling_core/transforms/serializer/html.py +0 -0
  37. {docling_core-2.31.1 → docling_core-2.31.2}/docling_core/transforms/serializer/html_styles.py +0 -0
  38. {docling_core-2.31.1 → docling_core-2.31.2}/docling_core/transforms/serializer/markdown.py +0 -0
  39. {docling_core-2.31.1 → docling_core-2.31.2}/docling_core/transforms/visualizer/__init__.py +0 -0
  40. {docling_core-2.31.1 → docling_core-2.31.2}/docling_core/transforms/visualizer/base.py +0 -0
  41. {docling_core-2.31.1 → docling_core-2.31.2}/docling_core/transforms/visualizer/layout_visualizer.py +0 -0
  42. {docling_core-2.31.1 → docling_core-2.31.2}/docling_core/transforms/visualizer/reading_order_visualizer.py +0 -0
  43. {docling_core-2.31.1 → docling_core-2.31.2}/docling_core/types/__init__.py +0 -0
  44. {docling_core-2.31.1 → docling_core-2.31.2}/docling_core/types/base.py +0 -0
  45. {docling_core-2.31.1 → docling_core-2.31.2}/docling_core/types/doc/__init__.py +0 -0
  46. {docling_core-2.31.1 → docling_core-2.31.2}/docling_core/types/doc/base.py +0 -0
  47. {docling_core-2.31.1 → docling_core-2.31.2}/docling_core/types/doc/document.py +0 -0
  48. {docling_core-2.31.1 → docling_core-2.31.2}/docling_core/types/doc/labels.py +0 -0
  49. {docling_core-2.31.1 → docling_core-2.31.2}/docling_core/types/doc/page.py +0 -0
  50. {docling_core-2.31.1 → docling_core-2.31.2}/docling_core/types/doc/tokens.py +0 -0
  51. {docling_core-2.31.1 → docling_core-2.31.2}/docling_core/types/doc/utils.py +0 -0
  52. {docling_core-2.31.1 → docling_core-2.31.2}/docling_core/types/gen/__init__.py +0 -0
  53. {docling_core-2.31.1 → docling_core-2.31.2}/docling_core/types/gen/generic.py +0 -0
  54. {docling_core-2.31.1 → docling_core-2.31.2}/docling_core/types/io/__init__.py +0 -0
  55. {docling_core-2.31.1 → docling_core-2.31.2}/docling_core/types/legacy_doc/__init__.py +0 -0
  56. {docling_core-2.31.1 → docling_core-2.31.2}/docling_core/types/legacy_doc/base.py +0 -0
  57. {docling_core-2.31.1 → docling_core-2.31.2}/docling_core/types/legacy_doc/doc_ann.py +0 -0
  58. {docling_core-2.31.1 → docling_core-2.31.2}/docling_core/types/legacy_doc/doc_ocr.py +0 -0
  59. {docling_core-2.31.1 → docling_core-2.31.2}/docling_core/types/legacy_doc/doc_raw.py +0 -0
  60. {docling_core-2.31.1 → docling_core-2.31.2}/docling_core/types/legacy_doc/document.py +0 -0
  61. {docling_core-2.31.1 → docling_core-2.31.2}/docling_core/types/legacy_doc/tokens.py +0 -0
  62. {docling_core-2.31.1 → docling_core-2.31.2}/docling_core/types/nlp/__init__.py +0 -0
  63. {docling_core-2.31.1 → docling_core-2.31.2}/docling_core/types/nlp/qa.py +0 -0
  64. {docling_core-2.31.1 → docling_core-2.31.2}/docling_core/types/nlp/qa_labels.py +0 -0
  65. {docling_core-2.31.1 → docling_core-2.31.2}/docling_core/types/rec/__init__.py +0 -0
  66. {docling_core-2.31.1 → docling_core-2.31.2}/docling_core/types/rec/attribute.py +0 -0
  67. {docling_core-2.31.1 → docling_core-2.31.2}/docling_core/types/rec/base.py +0 -0
  68. {docling_core-2.31.1 → docling_core-2.31.2}/docling_core/types/rec/predicate.py +0 -0
  69. {docling_core-2.31.1 → docling_core-2.31.2}/docling_core/types/rec/record.py +0 -0
  70. {docling_core-2.31.1 → docling_core-2.31.2}/docling_core/types/rec/statement.py +0 -0
  71. {docling_core-2.31.1 → docling_core-2.31.2}/docling_core/types/rec/subject.py +0 -0
  72. {docling_core-2.31.1 → docling_core-2.31.2}/docling_core/utils/__init__.py +0 -0
  73. {docling_core-2.31.1 → docling_core-2.31.2}/docling_core/utils/alias.py +0 -0
  74. {docling_core-2.31.1 → docling_core-2.31.2}/docling_core/utils/file.py +0 -0
  75. {docling_core-2.31.1 → docling_core-2.31.2}/docling_core/utils/generate_docs.py +0 -0
  76. {docling_core-2.31.1 → docling_core-2.31.2}/docling_core/utils/generate_jsonschema.py +0 -0
  77. {docling_core-2.31.1 → docling_core-2.31.2}/docling_core/utils/legacy.py +0 -0
  78. {docling_core-2.31.1 → docling_core-2.31.2}/docling_core/utils/validate.py +0 -0
  79. {docling_core-2.31.1 → docling_core-2.31.2}/docling_core/utils/validators.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: docling-core
3
- Version: 2.31.1
3
+ Version: 2.31.2
4
4
  Summary: A python library to define and validate data types in Docling.
5
5
  Home-page: https://github.com/docling-project
6
6
  License: MIT
@@ -9,6 +9,7 @@ from functools import cached_property
9
9
  from typing import Any, Iterable, Iterator, Optional, Union
10
10
 
11
11
  from pydantic import BaseModel, ConfigDict, Field, computed_field, model_validator
12
+ from transformers import PreTrainedTokenizerBase
12
13
 
13
14
  from docling_core.transforms.chunker.hierarchical_chunker import (
14
15
  ChunkingSerializerProvider,
@@ -70,23 +71,37 @@ class HybridChunker(BaseChunker):
70
71
  @model_validator(mode="before")
71
72
  @classmethod
72
73
  def _patch(cls, data: Any) -> Any:
73
- if isinstance(data, dict) and (tokenizer := data.get("tokenizer")):
74
+ if isinstance(data, dict):
75
+ tokenizer = data.get("tokenizer")
74
76
  max_tokens = data.get("max_tokens")
75
- if isinstance(tokenizer, BaseTokenizer):
76
- pass
77
- else:
77
+ if not isinstance(tokenizer, BaseTokenizer) and (
78
+ # some legacy param passed:
79
+ tokenizer is not None
80
+ or max_tokens is not None
81
+ ):
78
82
  from docling_core.transforms.chunker.tokenizer.huggingface import (
79
83
  HuggingFaceTokenizer,
80
84
  )
81
85
 
86
+ warnings.warn(
87
+ "Deprecated initialization parameter types for HybridChunker. "
88
+ "For updated usage check out "
89
+ "https://docling-project.github.io/docling/examples/hybrid_chunking/",
90
+ DeprecationWarning,
91
+ stacklevel=3,
92
+ )
93
+
82
94
  if isinstance(tokenizer, str):
83
95
  data["tokenizer"] = HuggingFaceTokenizer.from_pretrained(
84
96
  model_name=tokenizer,
85
97
  max_tokens=max_tokens,
86
98
  )
87
- else:
88
- # migrate previous HF-based tokenizers
89
- kwargs = {"tokenizer": tokenizer}
99
+ elif tokenizer is None or isinstance(
100
+ tokenizer, PreTrainedTokenizerBase
101
+ ):
102
+ kwargs = {
103
+ "tokenizer": tokenizer or _get_default_tokenizer().tokenizer
104
+ }
90
105
  if max_tokens is not None:
91
106
  kwargs["max_tokens"] = max_tokens
92
107
  data["tokenizer"] = HuggingFaceTokenizer(**kwargs)
@@ -1,6 +1,6 @@
1
1
  [tool.poetry]
2
2
  name = "docling-core"
3
- version = "2.31.1"
3
+ version = "2.31.2"
4
4
  description = "A python library to define and validate data types in Docling."
5
5
  license = "MIT"
6
6
  authors = [
File without changes
File without changes