kreuzberg 3.5.0__py3-none-any.whl → 3.6.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
kreuzberg/__init__.py CHANGED
@@ -1,5 +1,6 @@
1
1
  from importlib.metadata import version
2
2
 
3
+ from kreuzberg._entity_extraction import SpacyEntityExtractionConfig
3
4
  from kreuzberg._gmft import GMFTConfig
4
5
  from kreuzberg._language_detection import LanguageDetectionConfig
5
6
  from kreuzberg._ocr._easyocr import EasyOCRConfig
@@ -8,7 +9,7 @@ from kreuzberg._ocr._tesseract import TesseractConfig
8
9
 
9
10
  from ._ocr._tesseract import PSMMode
10
11
  from ._registry import ExtractorRegistry
11
- from ._types import ExtractionConfig, ExtractionResult, Metadata, TableData
12
+ from ._types import Entity, ExtractionConfig, ExtractionResult, Metadata, TableData
12
13
  from .exceptions import KreuzbergError, MissingDependencyError, OCRError, ParsingError, ValidationError
13
14
  from .extraction import (
14
15
  batch_extract_bytes,
@@ -25,6 +26,7 @@ __version__ = version("kreuzberg")
25
26
 
26
27
  __all__ = [
27
28
  "EasyOCRConfig",
29
+ "Entity",
28
30
  "ExtractionConfig",
29
31
  "ExtractionResult",
30
32
  "ExtractorRegistry",
@@ -37,6 +39,7 @@ __all__ = [
37
39
  "PSMMode",
38
40
  "PaddleOCRConfig",
39
41
  "ParsingError",
42
+ "SpacyEntityExtractionConfig",
40
43
  "TableData",
41
44
  "TesseractConfig",
42
45
  "ValidationError",
@@ -0,0 +1,239 @@
1
+ from __future__ import annotations
2
+
3
+ import re
4
+ from dataclasses import dataclass
5
+ from functools import lru_cache
6
+ from typing import TYPE_CHECKING, Any
7
+
8
+ from kreuzberg._types import Entity
9
+ from kreuzberg.exceptions import MissingDependencyError
10
+
11
+ if TYPE_CHECKING:
12
+ from collections.abc import Sequence
13
+ from pathlib import Path
14
+
15
+
16
+ @dataclass(unsafe_hash=True, frozen=True)
17
+ class SpacyEntityExtractionConfig:
18
+ """Configuration for spaCy-based entity extraction."""
19
+
20
+ model_cache_dir: str | Path | None = None
21
+ """Directory to cache spaCy models. If None, uses spaCy's default."""
22
+
23
+ language_models: dict[str, str] | tuple[tuple[str, str], ...] | None = None
24
+ """Mapping of language codes to spaCy model names.
25
+
26
+ If None, uses default mappings:
27
+ - en: en_core_web_sm
28
+ - de: de_core_news_sm
29
+ - fr: fr_core_news_sm
30
+ - es: es_core_news_sm
31
+ - pt: pt_core_news_sm
32
+ - it: it_core_news_sm
33
+ - nl: nl_core_news_sm
34
+ - zh: zh_core_web_sm
35
+ - ja: ja_core_news_sm
36
+ """
37
+
38
+ fallback_to_multilingual: bool = True
39
+ """If True and language-specific model fails, try xx_ent_wiki_sm (multilingual)."""
40
+
41
+ max_doc_length: int = 1000000
42
+ """Maximum document length for spaCy processing."""
43
+
44
+ batch_size: int = 1000
45
+ """Batch size for processing multiple texts."""
46
+
47
+ def __post_init__(self) -> None:
48
+ if self.language_models is None:
49
+ object.__setattr__(self, "language_models", self._get_default_language_models())
50
+
51
+ if isinstance(self.language_models, dict):
52
+ object.__setattr__(self, "language_models", tuple(sorted(self.language_models.items())))
53
+
54
+ @staticmethod
55
+ def _get_default_language_models() -> dict[str, str]:
56
+ """Get default language model mappings based on available spaCy models."""
57
+ return {
58
+ "en": "en_core_web_sm",
59
+ "de": "de_core_news_sm",
60
+ "fr": "fr_core_news_sm",
61
+ "es": "es_core_news_sm",
62
+ "pt": "pt_core_news_sm",
63
+ "it": "it_core_news_sm",
64
+ "nl": "nl_core_news_sm",
65
+ "zh": "zh_core_web_sm",
66
+ "ja": "ja_core_news_sm",
67
+ "ko": "ko_core_news_sm",
68
+ "ru": "ru_core_news_sm",
69
+ "pl": "pl_core_news_sm",
70
+ "ro": "ro_core_news_sm",
71
+ "el": "el_core_news_sm",
72
+ "da": "da_core_news_sm",
73
+ "fi": "fi_core_news_sm",
74
+ "nb": "nb_core_news_sm",
75
+ "sv": "sv_core_news_sm",
76
+ "ca": "ca_core_news_sm",
77
+ "hr": "hr_core_news_sm",
78
+ "lt": "lt_core_news_sm",
79
+ "mk": "mk_core_news_sm",
80
+ "sl": "sl_core_news_sm",
81
+ "uk": "uk_core_news_sm",
82
+ }
83
+
84
+ def get_model_for_language(self, language_code: str) -> str | None:
85
+ """Get the appropriate spaCy model for a language code."""
86
+ if not self.language_models:
87
+ return None
88
+
89
+ models_dict = dict(self.language_models) if isinstance(self.language_models, tuple) else self.language_models
90
+
91
+ if language_code in models_dict:
92
+ return models_dict[language_code]
93
+
94
+ base_lang = language_code.split("-")[0].lower()
95
+ if base_lang in models_dict:
96
+ return models_dict[base_lang]
97
+
98
+ return None
99
+
100
+ def get_fallback_model(self) -> str | None:
101
+ """Get fallback multilingual model if enabled."""
102
+ return "xx_ent_wiki_sm" if self.fallback_to_multilingual else None
103
+
104
+
105
+ def extract_entities(
106
+ text: str,
107
+ entity_types: Sequence[str] = ("PERSON", "ORGANIZATION", "LOCATION", "DATE", "EMAIL", "PHONE"),
108
+ custom_patterns: frozenset[tuple[str, str]] | None = None,
109
+ languages: list[str] | None = None,
110
+ spacy_config: SpacyEntityExtractionConfig | None = None,
111
+ ) -> list[Entity]:
112
+ """Extract entities from text using custom regex patterns and/or a NER model.
113
+
114
+ Args:
115
+ text: The input text to extract entities from.
116
+ entity_types: List of entity types to extract using the NER model.
117
+ custom_patterns: Tuple mapping entity types to regex patterns for custom extraction.
118
+ languages: List of detected languages to choose appropriate spaCy models.
119
+ spacy_config: Configuration for spaCy entity extraction.
120
+
121
+ Returns:
122
+ list[Entity]: A list of extracted Entity objects with type, text, start, and end positions.
123
+
124
+ Raises:
125
+ MissingDependencyError: If `spacy` is not installed.
126
+ """
127
+ entities: list[Entity] = []
128
+ if custom_patterns:
129
+ custom_patterns_dict = dict(custom_patterns)
130
+ for ent_type, pattern in custom_patterns_dict.items():
131
+ entities.extend(
132
+ Entity(type=ent_type, text=match.group(), start=match.start(), end=match.end())
133
+ for match in re.finditer(pattern, text)
134
+ )
135
+
136
+ if spacy_config is None:
137
+ spacy_config = SpacyEntityExtractionConfig()
138
+
139
+ try:
140
+ import spacy # noqa: F401
141
+ except ImportError as e:
142
+ raise MissingDependencyError.create_for_package(
143
+ package_name="spacy",
144
+ dependency_group="entity-extraction",
145
+ functionality="Entity Extraction",
146
+ ) from e
147
+
148
+ model_name = _select_spacy_model(languages, spacy_config)
149
+ if not model_name:
150
+ return entities
151
+
152
+ nlp = _load_spacy_model(model_name, spacy_config)
153
+ if not nlp:
154
+ return entities
155
+
156
+ if len(text) > spacy_config.max_doc_length:
157
+ text = text[: spacy_config.max_doc_length]
158
+
159
+ doc = nlp(text)
160
+
161
+ entity_type_mapping = {etype.upper() for etype in entity_types}
162
+
163
+ entities.extend(
164
+ Entity(
165
+ type=ent.label_,
166
+ text=ent.text,
167
+ start=ent.start_char,
168
+ end=ent.end_char,
169
+ )
170
+ for ent in doc.ents
171
+ if ent.label_ in entity_type_mapping or ent.label_.upper() in entity_type_mapping
172
+ )
173
+
174
+ return entities
175
+
176
+
177
+ @lru_cache(maxsize=32)
178
+ def _load_spacy_model(model_name: str, spacy_config: SpacyEntityExtractionConfig) -> Any:
179
+ """Load a spaCy model with caching."""
180
+ try:
181
+ import spacy
182
+
183
+ if spacy_config.model_cache_dir:
184
+ import os
185
+
186
+ os.environ["SPACY_DATA"] = str(spacy_config.model_cache_dir)
187
+
188
+ nlp = spacy.load(model_name)
189
+
190
+ nlp.max_length = spacy_config.max_doc_length
191
+
192
+ return nlp
193
+ except (OSError, ImportError):
194
+ return None
195
+
196
+
197
+ def _select_spacy_model(languages: list[str] | None, spacy_config: SpacyEntityExtractionConfig) -> str | None:
198
+ """Select the best spaCy model based on detected languages."""
199
+ if not languages:
200
+ return spacy_config.get_model_for_language("en")
201
+
202
+ for lang in languages:
203
+ model_name = spacy_config.get_model_for_language(lang)
204
+ if model_name:
205
+ return model_name
206
+
207
+ return spacy_config.get_fallback_model()
208
+
209
+
210
+ def extract_keywords(
211
+ text: str,
212
+ keyword_count: int = 10,
213
+ ) -> list[tuple[str, float]]:
214
+ """Extract keywords from text using the KeyBERT model.
215
+
216
+ Args:
217
+ text: The input text to extract keywords from.
218
+ keyword_count: Number of top keywords to return. Defaults to 10.
219
+
220
+ Returns:
221
+ list[tuple[str, float]]: A list of tuples containing keywords and their relevance scores.
222
+
223
+ Raises:
224
+ MissingDependencyError: If `keybert` is not installed.
225
+ """
226
+ try:
227
+ from keybert import KeyBERT
228
+
229
+ kw_model = KeyBERT()
230
+ keywords = kw_model.extract_keywords(text, top_n=keyword_count)
231
+ return [(kw, float(score)) for kw, score in keywords]
232
+ except (RuntimeError, OSError, ValueError):
233
+ return []
234
+ except ImportError as e:
235
+ raise MissingDependencyError.create_for_package(
236
+ package_name="keybert",
237
+ dependency_group="entity-extraction",
238
+ functionality="Keyword Extraction",
239
+ ) from e
kreuzberg/_types.py CHANGED
@@ -17,6 +17,7 @@ if TYPE_CHECKING:
17
17
  from pandas import DataFrame
18
18
  from PIL.Image import Image
19
19
 
20
+ from kreuzberg._entity_extraction import SpacyEntityExtractionConfig
20
21
  from kreuzberg._gmft import GMFTConfig
21
22
  from kreuzberg._language_detection import LanguageDetectionConfig
22
23
  from kreuzberg._ocr._easyocr import EasyOCRConfig
@@ -100,6 +101,20 @@ class Metadata(TypedDict, total=False):
100
101
  """Width of the document page/slide/image, if applicable."""
101
102
 
102
103
 
104
+ @dataclass(frozen=True)
105
+ class Entity:
106
+ """Represents an extracted entity with type, text, and position."""
107
+
108
+ type: str
109
+ """e.g., PERSON, ORGANIZATION, LOCATION, DATE, EMAIL, PHONE, or custom"""
110
+ text: str
111
+ """Extracted text"""
112
+ start: int
113
+ """Start character offset in the content"""
114
+ end: int
115
+ """End character offset in the content"""
116
+
117
+
103
118
  @dataclass
104
119
  class ExtractionResult:
105
120
  """The result of a file extraction."""
@@ -114,6 +129,10 @@ class ExtractionResult:
114
129
  """Extracted tables. Is an empty list if 'extract_tables' is not set to True in the ExtractionConfig."""
115
130
  chunks: list[str] = field(default_factory=list)
116
131
  """The extracted content chunks. This is an empty list if 'chunk_content' is not set to True in the ExtractionConfig."""
132
+ entities: list[Entity] | None = None
133
+ """Extracted entities, if entity extraction is enabled."""
134
+ keywords: list[tuple[str, float]] | None = None
135
+ """Extracted keywords and their scores, if keyword extraction is enabled."""
117
136
  detected_languages: list[str] | None = None
118
137
  """Languages detected in the extracted content, if language detection is enabled."""
119
138
 
@@ -160,12 +179,28 @@ class ExtractionConfig:
160
179
  """Post processing hooks to call after processing is done and before the final result is returned."""
161
180
  validators: list[ValidationHook] | None = None
162
181
  """Validation hooks to call after processing is done and before post-processing and result return."""
182
+ extract_entities: bool = False
183
+ """Whether to extract named entities from the content."""
184
+ extract_keywords: bool = False
185
+ """Whether to extract keywords from the content."""
186
+ keyword_count: int = 10
187
+ """Number of keywords to extract if extract_keywords is True."""
188
+ custom_entity_patterns: frozenset[tuple[str, str]] | None = None
189
+ """Custom entity patterns as a frozenset of (entity_type, regex_pattern) tuples."""
163
190
  auto_detect_language: bool = False
164
191
  """Whether to automatically detect language and configure OCR accordingly."""
165
192
  language_detection_config: LanguageDetectionConfig | None = None
166
193
  """Configuration for language detection. If None, uses default settings."""
194
+ spacy_entity_extraction_config: SpacyEntityExtractionConfig | None = None
195
+ """Configuration for spaCy entity extraction. If None, uses default settings."""
167
196
 
168
197
  def __post_init__(self) -> None:
198
+ if self.custom_entity_patterns is not None and isinstance(self.custom_entity_patterns, dict):
199
+ object.__setattr__(self, "custom_entity_patterns", frozenset(self.custom_entity_patterns.items()))
200
+ if self.post_processing_hooks is not None and isinstance(self.post_processing_hooks, list):
201
+ object.__setattr__(self, "post_processing_hooks", tuple(self.post_processing_hooks))
202
+ if self.validators is not None and isinstance(self.validators, list):
203
+ object.__setattr__(self, "validators", tuple(self.validators))
169
204
  from kreuzberg._ocr._easyocr import EasyOCRConfig
170
205
  from kreuzberg._ocr._paddleocr import PaddleOCRConfig
171
206
  from kreuzberg._ocr._tesseract import TesseractConfig
kreuzberg/extraction.py CHANGED
@@ -1,12 +1,14 @@
1
1
  from __future__ import annotations
2
2
 
3
3
  from pathlib import Path
4
- from typing import TYPE_CHECKING, Final, cast
4
+ from typing import TYPE_CHECKING, Any, Final, cast
5
5
 
6
6
  import anyio
7
7
 
8
8
  from kreuzberg import ExtractionResult
9
9
  from kreuzberg._chunker import get_chunker
10
+ from kreuzberg._entity_extraction import extract_entities, extract_keywords
11
+ from kreuzberg._language_detection import detect_languages
10
12
  from kreuzberg._mime_types import (
11
13
  validate_mime_type,
12
14
  )
@@ -24,15 +26,7 @@ if TYPE_CHECKING:
24
26
  DEFAULT_CONFIG: Final[ExtractionConfig] = ExtractionConfig()
25
27
 
26
28
 
27
- async def _validate_and_post_process_async(result: ExtractionResult, config: ExtractionConfig) -> ExtractionResult:
28
- for validator in config.validators or []:
29
- await run_maybe_sync(validator, result)
30
-
31
- if config.auto_detect_language and result.content:
32
- from kreuzberg._language_detection import detect_languages
33
-
34
- result.detected_languages = detect_languages(result.content, config.language_detection_config)
35
-
29
+ def _validate_and_post_process_helper(result: ExtractionResult, config: ExtractionConfig) -> ExtractionResult:
36
30
  if config.chunk_content:
37
31
  result.chunks = _handle_chunk_content(
38
32
  mime_type=result.mime_type,
@@ -40,6 +34,39 @@ async def _validate_and_post_process_async(result: ExtractionResult, config: Ext
40
34
  content=result.content,
41
35
  )
42
36
 
37
+ if config.extract_entities:
38
+ try:
39
+ result.entities = extract_entities(
40
+ result.content,
41
+ custom_patterns=config.custom_entity_patterns,
42
+ )
43
+ except RuntimeError:
44
+ result.entities = None
45
+
46
+ if config.extract_keywords:
47
+ try:
48
+ result.keywords = extract_keywords(
49
+ result.content,
50
+ keyword_count=config.keyword_count,
51
+ )
52
+ except RuntimeError:
53
+ result.keywords = None
54
+
55
+ if config.auto_detect_language:
56
+ result.detected_languages = detect_languages(
57
+ result.content,
58
+ config=config.language_detection_config,
59
+ )
60
+
61
+ return result
62
+
63
+
64
+ async def _validate_and_post_process_async(result: ExtractionResult, config: ExtractionConfig) -> ExtractionResult:
65
+ for validator in config.validators or []:
66
+ await run_maybe_sync(validator, result)
67
+
68
+ result = _validate_and_post_process_helper(result, config)
69
+
43
70
  for post_processor in config.post_processing_hooks or []:
44
71
  result = await run_maybe_sync(post_processor, result)
45
72
 
@@ -50,17 +77,7 @@ def _validate_and_post_process_sync(result: ExtractionResult, config: Extraction
50
77
  for validator in config.validators or []:
51
78
  run_sync_only(validator, result)
52
79
 
53
- if config.auto_detect_language and result.content:
54
- from kreuzberg._language_detection import detect_languages
55
-
56
- result.detected_languages = detect_languages(result.content, config.language_detection_config)
57
-
58
- if config.chunk_content:
59
- result.chunks = _handle_chunk_content(
60
- mime_type=result.mime_type,
61
- config=config,
62
- content=result.content,
63
- )
80
+ result = _validate_and_post_process_helper(result, config)
64
81
 
65
82
  for post_processor in config.post_processing_hooks or []:
66
83
  result = run_sync_only(post_processor, result)
@@ -72,7 +89,7 @@ def _handle_chunk_content(
72
89
  mime_type: str,
73
90
  config: ExtractionConfig,
74
91
  content: str,
75
- ) -> list[str]:
92
+ ) -> Any:
76
93
  chunker = get_chunker(mime_type=mime_type, max_characters=config.max_chars, overlap_characters=config.max_overlap)
77
94
  return chunker.chunks(content)
78
95
 
@@ -1,12 +1,12 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: kreuzberg
3
- Version: 3.5.0
3
+ Version: 3.6.1
4
4
  Summary: A text extraction library supporting PDFs, images, office documents and more
5
5
  Project-URL: homepage, https://github.com/Goldziher/kreuzberg
6
6
  Author-email: Na'aman Hirschfeld <nhirschfed@gmail.com>
7
7
  License: MIT
8
8
  License-File: LICENSE
9
- Keywords: document-processing,image-to-text,ocr,pandoc,pdf-extraction,rag,table-extraction,tesseract,text-extraction,text-processing
9
+ Keywords: document-processing,entity-extraction,image-to-text,keyword-extraction,named-entity-recognition,ner,ocr,pandoc,pdf-extraction,rag,spacy,table-extraction,tesseract,text-extraction,text-processing
10
10
  Classifier: Development Status :: 5 - Production/Stable
11
11
  Classifier: Intended Audience :: Developers
12
12
  Classifier: License :: OSI Approved :: MIT License
@@ -36,16 +36,19 @@ Requires-Dist: typing-extensions>=4.14.0; python_version < '3.12'
36
36
  Provides-Extra: all
37
37
  Requires-Dist: click>=8.2.1; extra == 'all'
38
38
  Requires-Dist: easyocr>=1.7.2; extra == 'all'
39
+ Requires-Dist: fast-langdetect>=0.3.2; extra == 'all'
39
40
  Requires-Dist: gmft>=0.4.2; extra == 'all'
40
- Requires-Dist: litestar[opentelemetry,standard,structlog]>=2.1.6; extra == 'all'
41
+ Requires-Dist: keybert>=0.9.0; extra == 'all'
42
+ Requires-Dist: litestar[opentelemetry,standard,structlog]>=2.16.0; extra == 'all'
41
43
  Requires-Dist: paddleocr>=3.1.0; extra == 'all'
42
44
  Requires-Dist: paddlepaddle>=3.1.0; extra == 'all'
43
45
  Requires-Dist: rich>=14.0.0; extra == 'all'
44
46
  Requires-Dist: semantic-text-splitter>=0.27.0; extra == 'all'
45
47
  Requires-Dist: setuptools>=80.9.0; extra == 'all'
48
+ Requires-Dist: spacy>=3.8.7; extra == 'all'
46
49
  Requires-Dist: tomli>=2.0.0; (python_version < '3.11') and extra == 'all'
47
50
  Provides-Extra: api
48
- Requires-Dist: litestar[opentelemetry,standard,structlog]>=2.1.6; extra == 'api'
51
+ Requires-Dist: litestar[opentelemetry,standard,structlog]>=2.16.0; extra == 'api'
49
52
  Provides-Extra: chunking
50
53
  Requires-Dist: semantic-text-splitter>=0.27.0; extra == 'chunking'
51
54
  Provides-Extra: cli
@@ -54,10 +57,13 @@ Requires-Dist: rich>=14.0.0; extra == 'cli'
54
57
  Requires-Dist: tomli>=2.0.0; (python_version < '3.11') and extra == 'cli'
55
58
  Provides-Extra: easyocr
56
59
  Requires-Dist: easyocr>=1.7.2; extra == 'easyocr'
60
+ Provides-Extra: entity-extraction
61
+ Requires-Dist: keybert>=0.9.0; extra == 'entity-extraction'
62
+ Requires-Dist: spacy>=3.8.7; extra == 'entity-extraction'
57
63
  Provides-Extra: gmft
58
64
  Requires-Dist: gmft>=0.4.2; extra == 'gmft'
59
65
  Provides-Extra: langdetect
60
- Requires-Dist: fast-langdetect>=0.2.0; extra == 'langdetect'
66
+ Requires-Dist: fast-langdetect>=0.3.2; extra == 'langdetect'
61
67
  Provides-Extra: paddleocr
62
68
  Requires-Dist: paddleocr>=3.1.0; extra == 'paddleocr'
63
69
  Requires-Dist: paddlepaddle>=3.1.0; extra == 'paddleocr'
@@ -1,17 +1,18 @@
1
- kreuzberg/__init__.py,sha256=zZ_puArNdw0pQk93BV99fXCxzkHFKXB9kINn8-6-y24,1408
1
+ kreuzberg/__init__.py,sha256=wVxbug-w1cO2xHcP04Bf6QeIKmT2Ep6aeenb8EOYLA0,1534
2
2
  kreuzberg/__main__.py,sha256=s2qM1nPEkRHAQP-G3P7sf5l6qA_KJeIEHS5LpPz04lg,183
3
3
  kreuzberg/_chunker.py,sha256=2eHSRHcZdJ2ZjR3in49y3o9tPl5HMO3vkbnMqaVCbHI,1887
4
4
  kreuzberg/_cli_config.py,sha256=WD_seFjbuay_NJv77vGLBW6BVV9WZNujdzf3zQkhzPc,5691
5
5
  kreuzberg/_constants.py,sha256=Bxc8oiN-wHwnWXT9bEiJhTUcu1ygPpra5qHirAif3b4,191
6
+ kreuzberg/_entity_extraction.py,sha256=EIasBGpkZ-3FwivjEpisz23LilTwx8os-IbfrDtzNl4,7815
6
7
  kreuzberg/_gmft.py,sha256=e-UpYwizRX_V-dn0a7ja0Z9nShAmDKA1Q7HThJy8cyA,14856
7
8
  kreuzberg/_language_detection.py,sha256=22-uXoOu_ws0K8Hz2M7U_SF9QX3npRYLhntAE1dNLFU,3283
8
9
  kreuzberg/_mime_types.py,sha256=QgX-k8aI4lTKArObDM0TFPt7DUjUVwWrdIaIZDh_XQY,7815
9
10
  kreuzberg/_playa.py,sha256=rU6ii2Qnrj8tkDYlSiab5h-BCYLJnUg4QwSLVDEXF5g,11883
10
11
  kreuzberg/_registry.py,sha256=c2B_PJbaL0q3ab2eNmj_0jldeyMaqgvRwkZqUU4MM5Q,3290
11
- kreuzberg/_types.py,sha256=Tnl9yP56dn8ziBZk1sorNk1ZHZbJYMjSoqh7xxImFHs,8092
12
+ kreuzberg/_types.py,sha256=U72a4SXS1e-zV8cXG0tiozMy9mX9wFM1ma6sVz7HpJo,9936
12
13
  kreuzberg/cli.py,sha256=S0w2nGXBWPFn1NhxppW7dpUwB9f_3ymFuWSAB2aRu9g,12465
13
14
  kreuzberg/exceptions.py,sha256=xRaiJh11i8E6Nc-gAQPgNW5xvhiiFBhRS-CBbCEbHQM,2881
14
- kreuzberg/extraction.py,sha256=Jz0f31Mm90mBkWwn0L3vn3z7-irdwNIzMHWByIj5d_I,17005
15
+ kreuzberg/extraction.py,sha256=mdH45bMAAUUNXYT7UrNyWJ2oD_gXuLUU-NyuYxQM884,17459
15
16
  kreuzberg/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
16
17
  kreuzberg/_api/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
17
18
  kreuzberg/_api/main.py,sha256=kZCMPPzP4BGzEege9pdhQTJPKKVjCaC6kZdMMeaqP2M,2599
@@ -46,8 +47,8 @@ kreuzberg/_utils/_serialization.py,sha256=AhZvyAu4KsjAqyZDh--Kn2kSWGgCuH7udio8lT
46
47
  kreuzberg/_utils/_string.py,sha256=owIVkUtP0__GiJD9RIJzPdvyIigT5sQho3mOXPbsnW0,958
47
48
  kreuzberg/_utils/_sync.py,sha256=oT4Y_cDBKtE_BFEoLTae3rSisqlYXzW-jlUG_x-dmLM,4725
48
49
  kreuzberg/_utils/_tmp.py,sha256=hVn-VVijIg2FM7EZJ899gc7wZg-TGoJZoeAcxMX-Cxg,1044
49
- kreuzberg-3.5.0.dist-info/METADATA,sha256=jJXbwUuTXevmry2VVg1H8d6rEzebILJyN7q7kJ0M9mQ,8790
50
- kreuzberg-3.5.0.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
51
- kreuzberg-3.5.0.dist-info/entry_points.txt,sha256=VdoFaTl3QSvVWOZcIlPpDd47o6kn7EvmXSs8FI0ExLc,48
52
- kreuzberg-3.5.0.dist-info/licenses/LICENSE,sha256=-8caMvpCK8SgZ5LlRKhGCMtYDEXqTKH9X8pFEhl91_4,1066
53
- kreuzberg-3.5.0.dist-info/RECORD,,
50
+ kreuzberg-3.6.1.dist-info/METADATA,sha256=JPTejc7zpahkvhZtUqTVPPVzQ-93aOPnx3l3EQXseok,9160
51
+ kreuzberg-3.6.1.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
52
+ kreuzberg-3.6.1.dist-info/entry_points.txt,sha256=VdoFaTl3QSvVWOZcIlPpDd47o6kn7EvmXSs8FI0ExLc,48
53
+ kreuzberg-3.6.1.dist-info/licenses/LICENSE,sha256=-8caMvpCK8SgZ5LlRKhGCMtYDEXqTKH9X8pFEhl91_4,1066
54
+ kreuzberg-3.6.1.dist-info/RECORD,,