py-chunks 0.2.2__tar.gz → 0.2.3__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {py_chunks-0.2.2 → py_chunks-0.2.3}/PKG-INFO +1 -1
- {py_chunks-0.2.2 → py_chunks-0.2.3}/py_chunks/__init__.py +25 -5
- {py_chunks-0.2.2 → py_chunks-0.2.3}/pyproject.toml +1 -1
- {py_chunks-0.2.2 → py_chunks-0.2.3}/.github/workflows/release.yml +0 -0
- {py_chunks-0.2.2 → py_chunks-0.2.3}/.gitignore +0 -0
- {py_chunks-0.2.2 → py_chunks-0.2.3}/.pylintrc +0 -0
- {py_chunks-0.2.2 → py_chunks-0.2.3}/Cargo.lock +0 -0
- {py_chunks-0.2.2 → py_chunks-0.2.3}/Cargo.toml +0 -0
- {py_chunks-0.2.2 → py_chunks-0.2.3}/LICENSE +0 -0
- {py_chunks-0.2.2 → py_chunks-0.2.3}/README.md +0 -0
- {py_chunks-0.2.2 → py_chunks-0.2.3}/py_chunks/chunkers/__init__.py +0 -0
- {py_chunks-0.2.2 → py_chunks-0.2.3}/py_chunks/chunkers/docx.py +0 -0
- {py_chunks-0.2.2 → py_chunks-0.2.3}/py_chunks/chunkers/html.py +0 -0
- {py_chunks-0.2.2 → py_chunks-0.2.3}/py_chunks/chunkers/md.py +0 -0
- {py_chunks-0.2.2 → py_chunks-0.2.3}/py_chunks/chunkers/pdf.py +0 -0
- {py_chunks-0.2.2 → py_chunks-0.2.3}/py_chunks/chunkers/pptx.py +0 -0
- {py_chunks-0.2.2 → py_chunks-0.2.3}/py_chunks/chunkers/txt.py +0 -0
- {py_chunks-0.2.2 → py_chunks-0.2.3}/src/extensions/docx/common.rs +0 -0
- {py_chunks-0.2.2 → py_chunks-0.2.3}/src/extensions/docx/mod.rs +0 -0
- {py_chunks-0.2.2 → py_chunks-0.2.3}/src/extensions/docx/page_aware.rs +0 -0
- {py_chunks-0.2.2 → py_chunks-0.2.3}/src/extensions/docx/section.rs +0 -0
- {py_chunks-0.2.2 → py_chunks-0.2.3}/src/extensions/docx/semantic.rs +0 -0
- {py_chunks-0.2.2 → py_chunks-0.2.3}/src/extensions/docx/sentence.rs +0 -0
- {py_chunks-0.2.2 → py_chunks-0.2.3}/src/extensions/docx/sliding_window.rs +0 -0
- {py_chunks-0.2.2 → py_chunks-0.2.3}/src/extensions/docx/structural.rs +0 -0
- {py_chunks-0.2.2 → py_chunks-0.2.3}/src/extensions/html/common.rs +0 -0
- {py_chunks-0.2.2 → py_chunks-0.2.3}/src/extensions/html/mod.rs +0 -0
- {py_chunks-0.2.2 → py_chunks-0.2.3}/src/extensions/html/page_aware.rs +0 -0
- {py_chunks-0.2.2 → py_chunks-0.2.3}/src/extensions/html/section.rs +0 -0
- {py_chunks-0.2.2 → py_chunks-0.2.3}/src/extensions/html/semantic.rs +0 -0
- {py_chunks-0.2.2 → py_chunks-0.2.3}/src/extensions/html/sentence.rs +0 -0
- {py_chunks-0.2.2 → py_chunks-0.2.3}/src/extensions/html/sliding_window.rs +0 -0
- {py_chunks-0.2.2 → py_chunks-0.2.3}/src/extensions/html/stream_iter.rs +0 -0
- {py_chunks-0.2.2 → py_chunks-0.2.3}/src/extensions/html/structural.rs +0 -0
- {py_chunks-0.2.2 → py_chunks-0.2.3}/src/extensions/md/common.rs +0 -0
- {py_chunks-0.2.2 → py_chunks-0.2.3}/src/extensions/md/mod.rs +0 -0
- {py_chunks-0.2.2 → py_chunks-0.2.3}/src/extensions/md/page_aware.rs +0 -0
- {py_chunks-0.2.2 → py_chunks-0.2.3}/src/extensions/md/section.rs +0 -0
- {py_chunks-0.2.2 → py_chunks-0.2.3}/src/extensions/md/semantic.rs +0 -0
- {py_chunks-0.2.2 → py_chunks-0.2.3}/src/extensions/md/sentence.rs +0 -0
- {py_chunks-0.2.2 → py_chunks-0.2.3}/src/extensions/md/sliding_window.rs +0 -0
- {py_chunks-0.2.2 → py_chunks-0.2.3}/src/extensions/md/stream_iter.rs +0 -0
- {py_chunks-0.2.2 → py_chunks-0.2.3}/src/extensions/md/structural.rs +0 -0
- {py_chunks-0.2.2 → py_chunks-0.2.3}/src/extensions/mod.rs +0 -0
- {py_chunks-0.2.2 → py_chunks-0.2.3}/src/extensions/pdf/common.rs +0 -0
- {py_chunks-0.2.2 → py_chunks-0.2.3}/src/extensions/pdf/mod.rs +0 -0
- {py_chunks-0.2.2 → py_chunks-0.2.3}/src/extensions/pdf/stream_iter.rs +0 -0
- {py_chunks-0.2.2 → py_chunks-0.2.3}/src/extensions/pdf/structural.rs +0 -0
- {py_chunks-0.2.2 → py_chunks-0.2.3}/src/extensions/pptx/common.rs +0 -0
- {py_chunks-0.2.2 → py_chunks-0.2.3}/src/extensions/pptx/mod.rs +0 -0
- {py_chunks-0.2.2 → py_chunks-0.2.3}/src/extensions/pptx/page_aware.rs +0 -0
- {py_chunks-0.2.2 → py_chunks-0.2.3}/src/extensions/pptx/section.rs +0 -0
- {py_chunks-0.2.2 → py_chunks-0.2.3}/src/extensions/pptx/semantic.rs +0 -0
- {py_chunks-0.2.2 → py_chunks-0.2.3}/src/extensions/pptx/sentence.rs +0 -0
- {py_chunks-0.2.2 → py_chunks-0.2.3}/src/extensions/pptx/sliding_window.rs +0 -0
- {py_chunks-0.2.2 → py_chunks-0.2.3}/src/extensions/pptx/stream_iter.rs +0 -0
- {py_chunks-0.2.2 → py_chunks-0.2.3}/src/extensions/pptx/structural.rs +0 -0
- {py_chunks-0.2.2 → py_chunks-0.2.3}/src/extensions/shared.rs +0 -0
- {py_chunks-0.2.2 → py_chunks-0.2.3}/src/extensions/txt/common.rs +0 -0
- {py_chunks-0.2.2 → py_chunks-0.2.3}/src/extensions/txt/mod.rs +0 -0
- {py_chunks-0.2.2 → py_chunks-0.2.3}/src/extensions/txt/page_aware.rs +0 -0
- {py_chunks-0.2.2 → py_chunks-0.2.3}/src/extensions/txt/section.rs +0 -0
- {py_chunks-0.2.2 → py_chunks-0.2.3}/src/extensions/txt/semantic.rs +0 -0
- {py_chunks-0.2.2 → py_chunks-0.2.3}/src/extensions/txt/sentence.rs +0 -0
- {py_chunks-0.2.2 → py_chunks-0.2.3}/src/extensions/txt/sliding_window.rs +0 -0
- {py_chunks-0.2.2 → py_chunks-0.2.3}/src/extensions/txt/stream_iter.rs +0 -0
- {py_chunks-0.2.2 → py_chunks-0.2.3}/src/extensions/txt/structural.rs +0 -0
- {py_chunks-0.2.2 → py_chunks-0.2.3}/src/lib.rs +0 -0
|
@@ -6,6 +6,7 @@ URLs.
|
|
|
6
6
|
"""
|
|
7
7
|
|
|
8
8
|
import os
|
|
9
|
+
import sys
|
|
9
10
|
import tempfile
|
|
10
11
|
from os import PathLike, fspath
|
|
11
12
|
from pathlib import Path
|
|
@@ -13,11 +14,30 @@ from typing import Any
|
|
|
13
14
|
from urllib.parse import urlparse
|
|
14
15
|
from urllib.request import urlopen
|
|
15
16
|
|
|
16
|
-
|
|
17
|
-
|
|
18
|
-
#
|
|
19
|
-
|
|
20
|
-
|
|
17
|
+
_pkg_dir = Path(__file__).parent
|
|
18
|
+
|
|
19
|
+
# Tell the Rust layer where to find the bundled PDFium binary.
|
|
20
|
+
os.environ.setdefault("PY_CHUNKS_PACKAGE_DIR", str(_pkg_dir))
|
|
21
|
+
|
|
22
|
+
# Directly resolve the bundled binary and set PDFIUM_LIBRARY_PATH to its
|
|
23
|
+
# absolute path. This hits the highest-priority branch in the Rust resolver
|
|
24
|
+
# so no directory scanning is needed — the path is always exact.
|
|
25
|
+
_PDFIUM_NAMES = {
|
|
26
|
+
"win32": "pdfium.dll",
|
|
27
|
+
"darwin": "libpdfium.dylib",
|
|
28
|
+
"linux": "libpdfium.so",
|
|
29
|
+
}
|
|
30
|
+
_pdfium_bin = _pkg_dir / _PDFIUM_NAMES.get(sys.platform, "")
|
|
31
|
+
if _pdfium_bin.exists():
|
|
32
|
+
os.environ.setdefault("PDFIUM_LIBRARY_PATH", str(_pdfium_bin))
|
|
33
|
+
|
|
34
|
+
# On Windows, register the package directory as a DLL search directory so
|
|
35
|
+
# pdfium.dll's own dependencies (vcruntime140.dll, msvcp140.dll …) are found
|
|
36
|
+
# in py_chunks/ rather than failing with LoadLibrary error 126.
|
|
37
|
+
# os.add_dll_directory() wraps AddDllDirectory() — available on Python 3.8+,
|
|
38
|
+
# which we always satisfy (requires-python = ">=3.9").
|
|
39
|
+
if sys.platform == "win32" and hasattr(os, "add_dll_directory"):
|
|
40
|
+
os.add_dll_directory(str(_pkg_dir))
|
|
21
41
|
|
|
22
42
|
from .chunkers.docx import chunk_docx, stream_chunk_docx
|
|
23
43
|
from .chunkers.html import chunk_html, stream_chunk_html
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|