kreuzberg 1.7.0__py3-none-any.whl → 2.0.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
kreuzberg/_extractors.py DELETED
@@ -1,280 +0,0 @@
1
- from __future__ import annotations
2
-
3
- import re
4
- from asyncio import gather
5
- from contextlib import suppress
6
- from html import escape
7
- from io import BytesIO
8
- from pathlib import Path
9
- from tempfile import NamedTemporaryFile
10
- from typing import TYPE_CHECKING
11
-
12
- import html_to_markdown
13
- import pptx
14
- import pypdfium2
15
- from anyio import Path as AsyncPath
16
- from pptx.enum.shapes import MSO_SHAPE_TYPE
17
- from xlsx2csv import Xlsx2csv
18
-
19
- from kreuzberg._pandoc import process_content, process_file
20
- from kreuzberg._string import normalize_spaces, safe_decode
21
- from kreuzberg._sync import run_sync
22
- from kreuzberg._tesseract import batch_process_images
23
- from kreuzberg.exceptions import ParsingError
24
-
25
- if TYPE_CHECKING: # pragma: no cover
26
- from PIL.Image import Image
27
-
28
-
29
- async def convert_pdf_to_images(file_path: Path) -> list[Image]:
30
- """Convert a PDF file to images.
31
-
32
- Args:
33
- file_path: The path to the PDF file.
34
-
35
- Raises:
36
- ParsingError: If the PDF file could not be converted to images.
37
-
38
- Returns:
39
- A list of Pillow Images.
40
- """
41
- pdf = None
42
- resolved_path = str(await AsyncPath(file_path).resolve())
43
- try:
44
- pdf = await run_sync(pypdfium2.PdfDocument, resolved_path)
45
- return [page.render(scale=2.0).to_pil() for page in pdf]
46
- except pypdfium2.PdfiumError as e:
47
- raise ParsingError(
48
- "Could not convert PDF to images", context={"file_path": str(file_path), "error": str(e)}
49
- ) from e
50
- finally:
51
- if pdf is not None:
52
- pdf.close()
53
-
54
-
55
- async def extract_pdf_with_tesseract(file_path: Path) -> str:
56
- """Extract text from a scanned PDF file using pytesseract.
57
-
58
- Args:
59
- file_path: The path to the PDF file.
60
-
61
- Returns:
62
- The extracted text.
63
- """
64
- images = await convert_pdf_to_images(file_path)
65
- ocr_results = await batch_process_images(images)
66
- return normalize_spaces("\n".join(ocr_results))
67
-
68
-
69
- async def extract_pdf_with_pdfium2(file_path: Path) -> str:
70
- """Extract text from a searchable PDF file using pypdfium2.
71
-
72
- Args:
73
- file_path: The path to the PDF file.
74
-
75
- Raises:
76
- ParsingError: If the text could not be extracted from the PDF file.
77
-
78
- Returns:
79
- The extracted text.
80
- """
81
- document = None
82
- resolved_path = str(await AsyncPath(file_path).resolve())
83
- try:
84
- document = await run_sync(pypdfium2.PdfDocument, resolved_path)
85
- text = "\n".join(page.get_textpage().get_text_bounded() for page in document)
86
- return normalize_spaces(text)
87
- except pypdfium2.PdfiumError as e:
88
- raise ParsingError(
89
- "Could not extract text from PDF file", context={"file_path": str(file_path), "error": str(e)}
90
- ) from e
91
- finally:
92
- if document is not None:
93
- document.close()
94
-
95
-
96
- async def extract_pdf(file_path_or_contents: Path | bytes, force_ocr: bool = False) -> str:
97
- """Extract text from a PDF file.
98
-
99
- Args:
100
- file_path_or_contents: The path to the PDF file or its contents as bytes.
101
- force_ocr: Whether or not to force OCR on PDF files that have a text layer. Default = false.
102
-
103
- Returns:
104
- The extracted text.
105
- """
106
- if isinstance(file_path_or_contents, bytes):
107
- with NamedTemporaryFile(suffix=".pdf", delete=False) as pdf_file:
108
- try:
109
- file_path = Path(pdf_file.name)
110
- await AsyncPath(file_path).write_bytes(file_path_or_contents)
111
-
112
- if not force_ocr and (content := await extract_pdf_with_pdfium2(file_path)):
113
- return normalize_spaces(content)
114
-
115
- return await extract_pdf_with_tesseract(file_path)
116
- finally:
117
- pdf_file.close()
118
- await AsyncPath(pdf_file.name).unlink()
119
-
120
- if not force_ocr and (content := await extract_pdf_with_pdfium2(file_path_or_contents)):
121
- return normalize_spaces(content)
122
-
123
- return await extract_pdf_with_tesseract(file_path_or_contents)
124
-
125
-
126
- async def extract_content_with_pandoc(file_data: bytes, mime_type: str) -> str:
127
- """Extract text using pandoc.
128
-
129
- Args:
130
- file_data: The content of the file.
131
- mime_type: The mime type of the file.
132
-
133
- Returns:
134
- The extracted text.
135
- """
136
- result = await process_content(file_data, mime_type=mime_type)
137
- return normalize_spaces(result.content)
138
-
139
-
140
- async def extract_file_with_pandoc(file_path: Path | str, mime_type: str) -> str:
141
- """Extract text using pandoc.
142
-
143
- Args:
144
- file_path: The path to the file.
145
- mime_type: The mime type of the file.
146
-
147
- Returns:
148
- The extracted text.
149
- """
150
- resolved_path = str(await AsyncPath(file_path).resolve())
151
- result = await process_file(resolved_path, mime_type=mime_type)
152
- return normalize_spaces(result.content)
153
-
154
-
155
- async def extract_pptx_file(file_path_or_contents: Path | bytes) -> str:
156
- """Extract text from a PPTX file.
157
-
158
- Notes:
159
- This function is based on code vendored from `markitdown`, which has an MIT license as well.
160
-
161
- Args:
162
- file_path_or_contents: The path to the PPTX file or its contents as bytes.
163
-
164
- Returns:
165
- The extracted text content
166
- """
167
- md_content = ""
168
- file_contents = (
169
- file_path_or_contents
170
- if isinstance(file_path_or_contents, bytes)
171
- else await AsyncPath(file_path_or_contents).read_bytes()
172
- )
173
- presentation = pptx.Presentation(BytesIO(file_contents))
174
-
175
- for index, slide in enumerate(presentation.slides):
176
- md_content += f"\n\n<!-- Slide number: {index + 1} -->\n"
177
-
178
- title = slide.shapes.title
179
-
180
- for shape in slide.shapes:
181
- if shape.shape_type == MSO_SHAPE_TYPE.PICTURE or (
182
- shape.shape_type == MSO_SHAPE_TYPE.PLACEHOLDER and hasattr(shape, "image")
183
- ):
184
- alt_text = ""
185
- with suppress(AttributeError):
186
- # access non-visual properties
187
- alt_text = shape._element._nvXxPr.cNvPr.attrib.get("descr", "") # noqa: SLF001
188
-
189
- filename = re.sub(r"\W", "", shape.name) + ".jpg"
190
- md_content += f"\n![{alt_text if alt_text else shape.name}]({filename})\n"
191
-
192
- elif shape.shape_type == MSO_SHAPE_TYPE.TABLE:
193
- html_table = "<table>"
194
- first_row = True
195
-
196
- for row in shape.table.rows:
197
- html_table += "<tr>"
198
-
199
- for cell in row.cells:
200
- tag = "th" if first_row else "td"
201
- html_table += f"<{tag}>{escape(cell.text)}</{tag}>"
202
-
203
- html_table += "</tr>"
204
- first_row = False
205
-
206
- html_table += "</table>"
207
- md_content += "\n" + html_table + "\n"
208
-
209
- elif shape.has_text_frame:
210
- md_content += "# " + shape.text.lstrip() + "\n" if shape == title else shape.text + "\n"
211
-
212
- md_content = md_content.strip()
213
- if slide.has_notes_slide:
214
- md_content += "\n\n### Notes:\n"
215
- notes_frame = slide.notes_slide.notes_text_frame
216
-
217
- if notes_frame is not None:
218
- md_content += notes_frame.text
219
-
220
- md_content = md_content.strip()
221
-
222
- return normalize_spaces(md_content)
223
-
224
-
225
- async def extract_xlsx_file(file_path_or_contents: Path | bytes) -> str:
226
- """Extract text from an XLSX file by converting it to CSV and then to markdown.
227
-
228
- Args:
229
- file_path_or_contents: The path to the XLSX file or its contents as bytes.
230
-
231
- Returns:
232
- The extracted text content.
233
-
234
- Raises:
235
- ParsingError: If the XLSX file could not be parsed.
236
- """
237
- with (
238
- NamedTemporaryFile(suffix=".xlsx", delete=False) as xlsx_file,
239
- NamedTemporaryFile(suffix=".csv", delete=False) as csv_file,
240
- ):
241
- try:
242
- if isinstance(file_path_or_contents, bytes):
243
- xlsx_file.write(file_path_or_contents)
244
- xlsx_file.flush()
245
- xlsx_path = xlsx_file.name
246
- else:
247
- xlsx_path = str(await AsyncPath(file_path_or_contents).resolve())
248
-
249
- await run_sync(Xlsx2csv(xlsx_path).convert, csv_file.name)
250
- result = await process_file(csv_file.name, mime_type="text/csv")
251
- return normalize_spaces(result.content)
252
- except Exception as e:
253
- raise ParsingError(
254
- "Could not extract text from XLSX file",
255
- context={
256
- "error": str(e),
257
- "file_path": str(file_path_or_contents) if isinstance(file_path_or_contents, Path) else None,
258
- },
259
- ) from e
260
- finally:
261
- xlsx_file.close()
262
- csv_file.close()
263
- await gather(AsyncPath(xlsx_file.name).unlink(), AsyncPath(csv_file.name).unlink())
264
-
265
-
266
- async def extract_html_string(file_path_or_contents: Path | bytes) -> str:
267
- """Extract text from an HTML string.
268
-
269
- Args:
270
- file_path_or_contents: The HTML content.
271
-
272
- Returns:
273
- The extracted text content.
274
- """
275
- content = (
276
- safe_decode(file_path_or_contents)
277
- if isinstance(file_path_or_contents, bytes)
278
- else await AsyncPath(file_path_or_contents).read_text()
279
- )
280
- return normalize_spaces(await run_sync(html_to_markdown.convert_to_markdown, content))
@@ -1,342 +0,0 @@
1
- Metadata-Version: 2.2
2
- Name: kreuzberg
3
- Version: 1.7.0
4
- Summary: A text extraction library supporting PDFs, images, office documents and more
5
- Author-email: Na'aman Hirschfeld <nhirschfed@gmail.com>
6
- License: MIT
7
- Project-URL: homepage, https://github.com/Goldziher/kreuzberg
8
- Keywords: document-processing,image-to-text,ocr,pandoc,pdf-extraction,rag,tesseract,text-extraction,text-processing
9
- Classifier: Development Status :: 4 - Beta
10
- Classifier: Intended Audience :: Developers
11
- Classifier: License :: OSI Approved :: MIT License
12
- Classifier: Operating System :: OS Independent
13
- Classifier: Programming Language :: Python :: 3 :: Only
14
- Classifier: Programming Language :: Python :: 3.9
15
- Classifier: Programming Language :: Python :: 3.10
16
- Classifier: Programming Language :: Python :: 3.11
17
- Classifier: Programming Language :: Python :: 3.12
18
- Classifier: Programming Language :: Python :: 3.13
19
- Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
20
- Classifier: Topic :: Software Development :: Libraries :: Python Modules
21
- Classifier: Topic :: Text Processing :: General
22
- Classifier: Topic :: Utilities
23
- Classifier: Typing :: Typed
24
- Requires-Python: >=3.9
25
- Description-Content-Type: text/markdown
26
- License-File: LICENSE
27
- Requires-Dist: anyio>=4.8.0
28
- Requires-Dist: charset-normalizer>=3.4.1
29
- Requires-Dist: html-to-markdown>=1.2.0
30
- Requires-Dist: pypdfium2>=4.30.1
31
- Requires-Dist: python-pptx>=1.0.2
32
- Requires-Dist: typing-extensions>=4.12.2
33
- Requires-Dist: xlsx2csv>=0.8.4
34
-
35
- # Kreuzberg
36
-
37
- Kreuzberg is a modern Python library for text extraction from documents, designed for simplicity and efficiency. It provides a unified async interface for extracting text from a wide range of file formats including PDFs, images, office documents, and more.
38
-
39
- ## Why Kreuzberg?
40
-
41
- - **Simple and Hassle-Free**: Clean API that just works, without complex configuration
42
- - **Local Processing**: No external API calls or cloud dependencies required
43
- - **Resource Efficient**: Lightweight processing without GPU requirements
44
- - **Format Support**: Comprehensive support for documents, images, and text formats
45
- - **Modern Python**: Built with async/await, type hints, and current best practices
46
-
47
- Kreuzberg was created to solve text extraction needs in RAG (Retrieval Augmented Generation) applications, but it's suitable for any text extraction use case. Unlike many commercial solutions that require API calls or complex setups, Kreuzberg focuses on local processing with minimal dependencies.
48
-
49
- ## Features
50
-
51
- - **Universal Text Extraction**: Extract text from PDFs (both searchable and scanned), images, office documents, and more
52
- - **Smart Processing**: Automatic OCR for scanned documents, encoding detection for text files
53
- - **Modern Python Design**:
54
- - Async-first API using `anyio`
55
- - Comprehensive type hints for better IDE support
56
- - Detailed error handling with context information
57
- - **Production Ready**:
58
- - Robust error handling
59
- - Detailed debugging information
60
- - Memory efficient processing
61
-
62
- ## Installation
63
-
64
- ### 1. Install the Python Package
65
-
66
- ```shell
67
- pip install kreuzberg
68
- ```
69
-
70
- ### 2. Install System Dependencies
71
-
72
- Kreuzberg requires two system level dependencies:
73
-
74
- - [Pandoc](https://pandoc.org/installing.html) - For document format conversion
75
- - [Tesseract OCR](https://tesseract-ocr.github.io/) - For image and PDF OCR
76
-
77
- Please install these using their respective installation guides.
78
-
79
- ## Architecture
80
-
81
- Kreuzberg is designed as a high-level async abstraction over established open-source tools. It integrates:
82
-
83
- - **PDF Processing**:
84
- - `pdfium2` for searchable PDFs
85
- - Tesseract OCR for scanned content
86
- - **Document Conversion**:
87
- - Pandoc for many document and markup formats
88
- - `python-pptx` for PowerPoint files
89
- - `html-to-markdown` for HTML content
90
- - `xlsx2csv` for Excel spreadsheets
91
- - **Text Processing**:
92
- - Smart encoding detection
93
- - Markdown and plain text handling
94
-
95
- ### Supported Formats
96
-
97
- #### Document Formats
98
-
99
- - PDF (`.pdf`, both searchable and scanned documents)
100
- - Microsoft Word (`.docx`, `.doc`)
101
- - PowerPoint presentations (`.pptx`)
102
- - OpenDocument Text (`.odt`)
103
- - Rich Text Format (`.rtf`)
104
- - EPUB (`.epub`)
105
- - DocBook XML (`.dbk`, `.xml`)
106
- - FictionBook (`.fb2`)
107
- - LaTeX (`.tex`, `.latex`)
108
- - Typst (`.typ`)
109
-
110
- #### Markup and Text Formats
111
-
112
- - HTML (`.html`, `.htm`)
113
- - Plain text (`.txt`) and Markdown (`.md`, `.markdown`)
114
- - reStructuredText (`.rst`)
115
- - Org-mode (`.org`)
116
- - DokuWiki (`.txt`)
117
- - Pod (`.pod`)
118
- - Man pages (`.1`, `.2`, etc.)
119
-
120
- #### Data and Research Formats
121
-
122
- - Excel spreadsheets (`.xlsx`)
123
- - CSV (`.csv`) and TSV (`.tsv`) files
124
- - Jupyter Notebooks (`.ipynb`)
125
- - BibTeX (`.bib`) and BibLaTeX (`.bib`)
126
- - CSL-JSON (`.json`)
127
- - EndNote XML (`.xml`)
128
- - RIS (`.ris`)
129
- - JATS XML (`.xml`)
130
-
131
- #### Image Formats
132
-
133
- - JPEG (`.jpg`, `.jpeg`, `.pjpeg`)
134
- - PNG (`.png`)
135
- - TIFF (`.tiff`, `.tif`)
136
- - BMP (`.bmp`)
137
- - GIF (`.gif`)
138
- - WebP (`.webp`)
139
- - JPEG 2000 (`.jp2`, `.jpx`, `.jpm`, `.mj2`)
140
- - Portable Anymap (`.pnm`)
141
- - Portable Bitmap (`.pbm`)
142
- - Portable Graymap (`.pgm`)
143
- - Portable Pixmap (`.ppm`)
144
-
145
- ## Usage
146
-
147
- Kreuzberg provides a simple, async-first API for text extraction. The library exports two main functions:
148
-
149
- - `extract_file()`: Extract text from a file (accepts string path or `pathlib.Path`)
150
- - `extract_bytes()`: Extract text from bytes (accepts a byte string)
151
-
152
- ### Quick Start
153
-
154
- ```python
155
- from pathlib import Path
156
- from kreuzberg import extract_file, extract_bytes
157
-
158
- # Basic file extraction
159
- async def extract_document():
160
- # Extract from a PDF file
161
- pdf_result = await extract_file("document.pdf")
162
- print(f"PDF text: {pdf_result.content}")
163
-
164
- # Extract from an image
165
- img_result = await extract_file("scan.png")
166
- print(f"Image text: {img_result.content}")
167
-
168
- # Extract from Word document
169
- docx_result = await extract_file(Path("document.docx"))
170
- print(f"Word text: {docx_result.content}")
171
- ```
172
-
173
- ### Processing Uploaded Files
174
-
175
- ```python
176
- from kreuzberg import extract_bytes
177
-
178
- async def process_upload(file_content: bytes, mime_type: str):
179
- """Process uploaded file content with known MIME type."""
180
- result = await extract_bytes(file_content, mime_type=mime_type)
181
- return result.content
182
-
183
- # Example usage with different file types
184
- async def handle_uploads():
185
- # Process PDF upload
186
- pdf_result = await extract_bytes(pdf_bytes, mime_type="application/pdf")
187
-
188
- # Process image upload
189
- img_result = await extract_bytes(image_bytes, mime_type="image/jpeg")
190
-
191
- # Process Word document upload
192
- docx_result = await extract_bytes(docx_bytes,
193
- mime_type="application/vnd.openxmlformats-officedocument.wordprocessingml.document")
194
- ```
195
-
196
- ### Advanced Features
197
-
198
- #### PDF Processing Options
199
-
200
- ```python
201
- from kreuzberg import extract_file
202
-
203
- async def process_pdf():
204
- # Force OCR for PDFs with embedded images or scanned content
205
- result = await extract_file("document.pdf", force_ocr=True)
206
-
207
- # Process a scanned PDF (automatically uses OCR)
208
- scanned = await extract_file("scanned.pdf")
209
- ```
210
-
211
- #### ExtractionResult Object
212
-
213
- All extraction functions return an `ExtractionResult` containing:
214
-
215
- - `content`: The extracted text (str)
216
- - `mime_type`: Output format ("text/plain" or "text/markdown" for Pandoc conversions)
217
-
218
- ```python
219
- from kreuzberg import ExtractionResult
220
-
221
- async def process_document(path: str) -> tuple[str, str]:
222
- # Access as a named tuple
223
- result: ExtractionResult = await extract_file(path)
224
- print(f"Content: {result.content}")
225
- print(f"Format: {result.mime_type}")
226
-
227
- # Or unpack as a tuple
228
- content, mime_type = await extract_file(path)
229
- return content, mime_type
230
- ```
231
-
232
- ### Error Handling
233
-
234
- Kreuzberg provides comprehensive error handling through several exception types, all inheriting from `KreuzbergError`. Each exception includes helpful context information for debugging.
235
-
236
- ```python
237
- from kreuzberg import extract_file
238
- from kreuzberg.exceptions import (
239
- ValidationError,
240
- ParsingError,
241
- OCRError,
242
- MissingDependencyError
243
- )
244
-
245
- async def safe_extract(path: str) -> str:
246
- try:
247
- result = await extract_file(path)
248
- return result.content
249
-
250
- except ValidationError as e:
251
- # Input validation issues
252
- # - Unsupported or undetectable MIME types
253
- # - Missing files
254
- # - Invalid input parameters
255
- print(f"Validation failed: {e}")
256
-
257
- except OCRError as e:
258
- # OCR-specific issues
259
- # - Tesseract processing failures
260
- # - Image conversion problems
261
- print(f"OCR failed: {e}")
262
-
263
- except MissingDependencyError as e:
264
- # System dependency issues
265
- # - Missing Tesseract OCR
266
- # - Missing Pandoc
267
- # - Incompatible versions
268
- print(f"Dependency missing: {e}")
269
-
270
- except ParsingError as e:
271
- # General processing errors
272
- # - PDF parsing failures
273
- # - Format conversion issues
274
- # - Encoding problems
275
- print(f"Processing failed: {e}")
276
-
277
- return ""
278
-
279
- # Example error contexts
280
- try:
281
- result = await extract_file("document.xyz")
282
- except ValidationError as e:
283
- # Error will include context:
284
- # ValidationError: Unsupported mime type
285
- # Context: {
286
- # "file_path": "document.xyz",
287
- # "supported_mimetypes": ["application/pdf", ...]
288
- # }
289
- print(e)
290
-
291
- try:
292
- result = await extract_file("scan.jpg")
293
- except OCRError as e:
294
- # Error will include context:
295
- # OCRError: OCR failed with a non-0 return code
296
- # Context: {
297
- # "file_path": "scan.jpg",
298
- # "tesseract_version": "5.3.0"
299
- # }
300
- print(e)
301
- ```
302
-
303
- All exceptions provide:
304
-
305
- - A descriptive error message
306
- - Relevant context in the `context` attribute
307
- - String representation with both message and context
308
- - Proper exception chaining for debugging
309
-
310
- ## Roadmap
311
-
312
- V1:
313
-
314
- - [x] - html file text extraction
315
- - [ ] - better PDF table extraction
316
- - [ ] - batch APIs
317
- - [ ] - sync APIs
318
-
319
- V2:
320
-
321
- - [ ] - metadata extraction (breaking change)
322
- - [ ] - TBD
323
-
324
- ## Contribution
325
-
326
- This library is open to contribution. Feel free to open issues or submit PRs. Its better to discuss issues before
327
- submitting PRs to avoid disappointment.
328
-
329
- ### Local Development
330
-
331
- 1. Clone the repo
332
- 2. Install the system dependencies
333
- 3. Install the full dependencies with `uv sync`
334
- 4. Install the pre-commit hooks with:
335
- ```shell
336
- pre-commit install && pre-commit install --hook-type commit-msg
337
- ```
338
- 5. Make your changes and submit a PR
339
-
340
- ## License
341
-
342
- This library uses the MIT license.
@@ -1,15 +0,0 @@
1
- kreuzberg/__init__.py,sha256=5IBPjPsZ7faK15gFB9ZEROHhkEX7KKQmrHPCZuGnhb0,285
2
- kreuzberg/_extractors.py,sha256=3VP7oBz0VpmkkhlbKDPjRmnZdHBv4K_xqcyMeeDaetM,9283
3
- kreuzberg/_mime_types.py,sha256=nvRSWDUhtntO9-E9gv2l5BVYow61zim4llJ6n33k_BE,2682
4
- kreuzberg/_pandoc.py,sha256=zhNJ8_92JMs4gG_Fj-IVwdpZwWsyaK-VTrbLke6NGyU,15097
5
- kreuzberg/_string.py,sha256=4txRDnkdR12oO6G8V-jXEMlA9ivgmw8E8EbjyhfL-W4,1106
6
- kreuzberg/_sync.py,sha256=ovsFHFdkcczz7gNEUJsbZzY8KHG0_GAOOYipQNE4hIY,874
7
- kreuzberg/_tesseract.py,sha256=Yya15OxB4PBi2QqmrGXF70_SHBD7Luii9sBXzMJlCpU,8168
8
- kreuzberg/exceptions.py,sha256=pxoEPS0T9e5QSgxsfXn1VmxsY_EGXvTwY0gETPiNn8E,945
9
- kreuzberg/extraction.py,sha256=_vJ9O8t50a3p4co3hY8b3BdBIXV5S7XOUNl_kD9_FvM,6599
10
- kreuzberg/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
11
- kreuzberg-1.7.0.dist-info/LICENSE,sha256=-8caMvpCK8SgZ5LlRKhGCMtYDEXqTKH9X8pFEhl91_4,1066
12
- kreuzberg-1.7.0.dist-info/METADATA,sha256=3wKe7X5G1IQfSPNzD0wnS0t81MqoWtQ-cgR-6MBoyec,10355
13
- kreuzberg-1.7.0.dist-info/WHEEL,sha256=In9FTNxeP60KnTkGw7wk6mJPYd_dQSjEZmXdBdMCI-8,91
14
- kreuzberg-1.7.0.dist-info/top_level.txt,sha256=rbGkygffkZiyKhL8UN41ZOjLfem0jJPA1Whtndne0rE,10
15
- kreuzberg-1.7.0.dist-info/RECORD,,