PyPI - kreuzberg - Versions diffs - 3.11.4__py3-none-any.whl → 3.13.1__py3-none-any.whl - Mend

kreuzberg 3.11.4py3-none-any.whl → 3.13.1py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (51) hide show

kreuzberg/__init__.py +14 -13
kreuzberg/__main__.py +0 -2
kreuzberg/_api/main.py +119 -9
kreuzberg/_chunker.py +0 -15
kreuzberg/_config.py +212 -292
kreuzberg/_document_classification.py +20 -47
kreuzberg/_entity_extraction.py +1 -122
kreuzberg/_extractors/_base.py +4 -71
kreuzberg/_extractors/_email.py +1 -15
kreuzberg/_extractors/_html.py +9 -12
kreuzberg/_extractors/_image.py +1 -25
kreuzberg/_extractors/_pandoc.py +10 -147
kreuzberg/_extractors/_pdf.py +38 -94
kreuzberg/_extractors/_presentation.py +0 -99
kreuzberg/_extractors/_spread_sheet.py +13 -55
kreuzberg/_extractors/_structured.py +1 -4
kreuzberg/_gmft.py +14 -199
kreuzberg/_language_detection.py +1 -36
kreuzberg/_mcp/__init__.py +0 -2
kreuzberg/_mcp/server.py +3 -10
kreuzberg/_mime_types.py +1 -19
kreuzberg/_ocr/_base.py +4 -76
kreuzberg/_ocr/_easyocr.py +124 -186
kreuzberg/_ocr/_paddleocr.py +154 -224
kreuzberg/_ocr/_table_extractor.py +184 -0
kreuzberg/_ocr/_tesseract.py +797 -361
kreuzberg/_playa.py +5 -31
kreuzberg/_registry.py +0 -36
kreuzberg/_types.py +588 -93
kreuzberg/_utils/_cache.py +84 -138
kreuzberg/_utils/_device.py +0 -74
kreuzberg/_utils/_document_cache.py +0 -75
kreuzberg/_utils/_errors.py +0 -50
kreuzberg/_utils/_ocr_cache.py +136 -0
kreuzberg/_utils/_pdf_lock.py +0 -16
kreuzberg/_utils/_process_pool.py +17 -64
kreuzberg/_utils/_quality.py +0 -60
kreuzberg/_utils/_ref.py +32 -0
kreuzberg/_utils/_serialization.py +0 -30
kreuzberg/_utils/_string.py +9 -59
kreuzberg/_utils/_sync.py +0 -77
kreuzberg/_utils/_table.py +49 -101
kreuzberg/_utils/_tmp.py +0 -9
kreuzberg/cli.py +54 -74
kreuzberg/extraction.py +39 -32
{kreuzberg-3.11.4.dist-info → kreuzberg-3.13.1.dist-info}/METADATA +19 -15
kreuzberg-3.13.1.dist-info/RECORD +57 -0
kreuzberg-3.11.4.dist-info/RECORD +0 -54
{kreuzberg-3.11.4.dist-info → kreuzberg-3.13.1.dist-info}/WHEEL +0 -0
{kreuzberg-3.11.4.dist-info → kreuzberg-3.13.1.dist-info}/entry_points.txt +0 -0
{kreuzberg-3.11.4.dist-info → kreuzberg-3.13.1.dist-info}/licenses/LICENSE +0 -0

kreuzberg/_utils/_quality.py CHANGED Viewed

@@ -1,14 +1,10 @@
-"""Quality post-processing utilities for extracted text."""
 from __future__ import annotations
 import re
 from functools import reduce
 from typing import Any
-# Pre-compiled patterns for performance
 _OCR_ARTIFACTS = {
-    # Common OCR misreads
     "scattered_chars": re.compile(r"\b[a-zA-Z]\s{2,}[a-zA-Z]\s{2,}[a-zA-Z]\b"),
     "repeated_punctuation": re.compile(r"[.]{3,}|[-]{3,}|[_]{3,}"),
     "isolated_punctuation": re.compile(r"\s[.,;:!?]\s"),
@@ -17,7 +13,6 @@ _OCR_ARTIFACTS = {
     "broken_sentences": re.compile(r"[a-z]\s{3,}[A-Z][a-z]"),
 }
-# Combined pattern for faster OCR penalty calculation
 _COMBINED_OCR_PATTERN = re.compile(
     r"(?P<scattered>\b[a-zA-Z]\s{2,}[a-zA-Z]\s{2,}[a-zA-Z]\b)|"
     r"(?P<repeated>[.]{3,}|[-]{3,}|[_]{3,})|"
@@ -27,14 +22,12 @@ _COMBINED_OCR_PATTERN = re.compile(
     r"(?P<broken>[a-z]\s{3,}[A-Z][a-z])"
 )
-# Pre-compiled patterns for text normalization
 _WHITESPACE_NORMALIZE = re.compile(r"[ \t\f\v\r\xa0\u2000-\u200b\u2028\u2029\u3000]+")
 _NEWLINE_NORMALIZE = re.compile(r"\n\s*\n\s*\n+")
 _SENTENCE_DETECT = re.compile(r"[.!?]\s+[A-Z]")
 _PUNCTUATION_DETECT = re.compile(r"[.!?]")
 _SCRIPT_PATTERNS = {
-    # JavaScript and CSS content
     "js_functions": re.compile(r"function\s+\w+\s*\([^)]*\)\s*\{[^}]*\}", re.IGNORECASE),
     "css_rules": re.compile(r"\.[a-zA-Z][\w-]*\s*\{[^}]*\}", re.IGNORECASE),
     "script_tags": re.compile(r"<script[^>]*>.*?</script>", re.DOTALL | re.IGNORECASE),
@@ -51,39 +44,24 @@ _NAVIGATION_PATTERNS = {
 def calculate_quality_score(text: str, metadata: dict[str, Any] | None = None) -> float:
-    """Calculate overall quality score for extracted text.
-    Args:
-        text: The extracted text content
-        metadata: Optional metadata for additional scoring
-    Returns:
-        Quality score between 0.0 and 1.0
-    """
     if not text or not text.strip():
         return 0.0
-    # Initialize score
     score = 1.0
     total_chars = len(text)
-    # Penalize OCR artifacts
     ocr_penalty = _calculate_ocr_penalty(text, total_chars)
     score -= ocr_penalty * 0.3
-    # Penalize script/style content
     script_penalty = _calculate_script_penalty(text, total_chars)
     score -= script_penalty * 0.2
-    # Penalize navigation content
     nav_penalty = _calculate_navigation_penalty(text, total_chars)
     score -= nav_penalty * 0.1
-    # Bonus for structure (sentences, paragraphs)
     structure_bonus = _calculate_structure_bonus(text)
     score += structure_bonus * 0.2
-    # Bonus for metadata richness
     if metadata:
         metadata_bonus = _calculate_metadata_bonus(metadata)
         score += metadata_bonus * 0.1
@@ -92,27 +70,15 @@ def calculate_quality_score(text: str, metadata: dict[str, Any] | None = None) -
 def clean_extracted_text(text: str) -> str:
-    """Clean extracted text by removing artifacts and improving quality.
-    Args:
-        text: The raw extracted text
-    Returns:
-        Cleaned text with artifacts removed
-    """
     if not text:
         return text
-    # Remove script and style content using functools.reduce for single pass
     text = reduce(lambda t, pattern: pattern.sub(" ", t), _SCRIPT_PATTERNS.values(), text)
-    # Clean OCR artifacts
     text = _clean_ocr_artifacts(text)
-    # Clean navigation elements
     text = _clean_navigation_elements(text)
-    # Normalize whitespace using pre-compiled patterns
     text = _WHITESPACE_NORMALIZE.sub(" ", text)
     text = _NEWLINE_NORMALIZE.sub("\n\n", text)
@@ -120,72 +86,57 @@ def clean_extracted_text(text: str) -> str:
 def _calculate_ocr_penalty(text: str, total_chars: int) -> float:
-    """Calculate penalty for OCR artifacts."""
     if total_chars == 0:
         return 0.0
-    # Use combined pattern for single-pass processing
     artifact_chars = sum(len(match.group()) for match in _COMBINED_OCR_PATTERN.finditer(text))
     return min(1.0, artifact_chars / total_chars)
 def _calculate_script_penalty(text: str, total_chars: int) -> float:
-    """Calculate penalty for script/style content."""
     if total_chars == 0:
         return 0.0
-    # Use sum with generator expression for single-pass calculation
     script_chars = sum(len(match) for pattern in _SCRIPT_PATTERNS.values() for match in pattern.findall(text))
     return min(1.0, script_chars / total_chars)
 def _calculate_navigation_penalty(text: str, total_chars: int) -> float:
-    """Calculate penalty for navigation content."""
     if total_chars == 0:
         return 0.0
-    # Use sum with generator expression for single-pass calculation
     nav_chars = sum(len(match) for pattern in _NAVIGATION_PATTERNS.values() for match in pattern.findall(text))
     return min(1.0, nav_chars / total_chars)
 def _calculate_structure_bonus(text: str) -> float:
-    """Calculate bonus for proper text structure."""
     if not text:
         return 0.0
-    # Count sentences (rough heuristic)
     sentence_count = len(_SENTENCE_DETECT.findall(text))
-    # Count paragraphs
     paragraph_count = len(text.split("\n\n"))
-    # Calculate structure score
     words = len(text.split())
     if words == 0:
         return 0.0
-    # Good structure: reasonable sentence and paragraph distribution
     avg_words_per_sentence = words / max(1, sentence_count)
     avg_words_per_paragraph = words / max(1, paragraph_count)
     structure_score = 0.0
-    # Bonus for reasonable sentence length (10-30 words)
     if 10 <= avg_words_per_sentence <= 30:
         structure_score += 0.3
-    # Bonus for reasonable paragraph length (50-300 words)
     if 50 <= avg_words_per_paragraph <= 300:
         structure_score += 0.3
-    # Bonus for having multiple paragraphs
     if paragraph_count > 1:
         structure_score += 0.2
-    # Bonus for having punctuation
     if _PUNCTUATION_DETECT.search(text):
         structure_score += 0.2
@@ -193,7 +144,6 @@ def _calculate_structure_bonus(text: str) -> float:
 def _calculate_metadata_bonus(metadata: dict[str, Any]) -> float:
-    """Calculate bonus for rich metadata."""
     if not metadata:
         return 0.0
@@ -204,30 +154,20 @@ def _calculate_metadata_bonus(metadata: dict[str, Any]) -> float:
 def _clean_ocr_artifacts(text: str) -> str:
-    """Remove common OCR artifacts from text."""
-    # Fix scattered characters (likely OCR errors)
     text = _OCR_ARTIFACTS["scattered_chars"].sub(lambda m: m.group().replace(" ", ""), text)
-    # Clean repeated punctuation
     text = _OCR_ARTIFACTS["repeated_punctuation"].sub("...", text)
-    # Fix isolated punctuation
     text = _OCR_ARTIFACTS["isolated_punctuation"].sub(" ", text)
-    # Remove malformed words with numbers mixed in
     text = _OCR_ARTIFACTS["malformed_words"].sub(" ", text)
-    # Normalize excessive whitespace
     return _OCR_ARTIFACTS["excessive_whitespace"].sub(" ", text)
 def _clean_navigation_elements(text: str) -> str:
-    """Remove navigation elements from text."""
-    # Remove navigation words
     text = _NAVIGATION_PATTERNS["nav_words"].sub(" ", text)
-    # Remove breadcrumbs
     text = _NAVIGATION_PATTERNS["breadcrumbs"].sub(" ", text)
-    # Remove pagination
     return _NAVIGATION_PATTERNS["pagination"].sub(" ", text)

kreuzberg/_utils/_ref.py ADDED Viewed

@@ -0,0 +1,32 @@
+from __future__ import annotations
+from typing import TYPE_CHECKING, Any, ClassVar, Generic, TypeVar, cast
+if TYPE_CHECKING:
+    from collections.abc import Callable
+T = TypeVar("T")
+class Ref(Generic[T]):
+    _instances: ClassVar[dict[str, Any]] = {}
+    def __init__(self, name: str, factory: Callable[[], T]) -> None:
+        self.name = name
+        self.factory = factory
+    def get(self) -> T:
+        if self.name not in self._instances:
+            self._instances[self.name] = self.factory()
+        return cast("T", self._instances[self.name])
+    def clear(self) -> None:
+        if self.name in self._instances:
+            del self._instances[self.name]
+    def is_initialized(self) -> bool:
+        return self.name in self._instances
+    @classmethod
+    def clear_all(cls) -> None:
+        cls._instances.clear()

kreuzberg/_utils/_serialization.py CHANGED Viewed

@@ -1,5 +1,3 @@
-"""Fast serialization utilities using msgspec."""
 from __future__ import annotations
 from dataclasses import is_dataclass
@@ -12,7 +10,6 @@ from msgspec.msgpack import decode, encode
 T = TypeVar("T")
-# Define dict method names in priority order
 _DICT_METHOD_NAMES = (
     "to_dict",
     "as_dict",
@@ -25,21 +22,18 @@ _DICT_METHOD_NAMES = (
 def encode_hook(obj: Any) -> Any:
-    """Custom encoder for complex objects."""
     if callable(obj):
         return None
     if isinstance(obj, Exception):
         return {"message": str(obj), "type": type(obj).__name__}
-    # Check for dict-like methods more efficiently using any() with generator
     for attr_name in _DICT_METHOD_NAMES:
         method = getattr(obj, attr_name, None)
         if method is not None and callable(method):
             return method()
     if is_dataclass(obj) and not isinstance(obj, type):
-        # Use msgspec.to_builtins for more efficient conversion
         return msgspec.to_builtins(obj)
     if hasattr(obj, "save") and hasattr(obj, "format"):
@@ -49,18 +43,6 @@ def encode_hook(obj: Any) -> Any:
 def deserialize(value: str | bytes, target_type: type[T]) -> T:
-    """Deserialize bytes/string to target type.
-    Args:
-        value: Serialized data
-        target_type: Type to deserialize to
-    Returns:
-        Deserialized object
-    Raises:
-        ValueError: If deserialization fails
-    """
     try:
         return decode(cast("bytes", value), type=target_type, strict=False)
     except MsgspecError as e:
@@ -68,18 +50,6 @@ def deserialize(value: str | bytes, target_type: type[T]) -> T:
 def serialize(value: Any, **kwargs: Any) -> bytes:
-    """Serialize value to bytes.
-    Args:
-        value: Object to serialize
-        **kwargs: Additional data to merge with value if it's a dict
-    Returns:
-        Serialized bytes
-    Raises:
-        ValueError: If serialization fails
-    """
     if isinstance(value, dict) and kwargs:
         value = value | kwargs

kreuzberg/_utils/_string.py CHANGED Viewed

@@ -7,52 +7,33 @@ from functools import lru_cache
 import chardetng_py
-# Compile regex patterns once at module level for performance
 _WHITESPACE_PATTERN = re.compile(r"[ \t\f\v\r\xa0\u2000-\u200b\u2028\u2029\u3000]+")
 _NEWLINES_PATTERN = re.compile(r"\n+")
 _MOJIBAKE_PATTERNS = {
-    # Hebrew as Cyrillic patterns
     "hebrew_as_cyrillic": re.compile(r"[\u0400-\u04FF]{3,}"),
-    # Control characters that shouldn't appear in text
     "control_chars": re.compile(r"[\x00-\x08\x0B-\x0C\x0E-\x1F\x7F-\x9F]"),
-    # Unicode replacement characters
     "replacement_chars": re.compile(r"\uFFFD+"),
-    # Isolated combining marks (likely encoding issues)
     "isolated_combining": re.compile(r"[\u0300-\u036F](?![^\u0300-\u036F])"),
 }
-# Simple cache for encoding detection (in-memory, session-scoped)
 _encoding_cache: dict[str, str] = {}
 @lru_cache(maxsize=128)
 def _get_encoding_cache_key(data_hash: str, size: int) -> str:
-    """Generate cache key for encoding detection."""
-    # Use string interpolation which is faster than format strings for simple cases
     return f"{data_hash}:{size}"
 def safe_decode(byte_data: bytes, encoding: str | None = None) -> str:
-    """Decode a byte string safely with mojibake detection and correction.
-    Args:
-        byte_data: The byte string to decode.
-        encoding: The encoding to use when decoding the byte string.
-    Returns:
-        The decoded string with mojibake detection and correction.
-    """
     if not byte_data:
         return ""
-    # Try provided encoding first (fastest path)
     if encoding:
         with suppress(UnicodeDecodeError, LookupError):
             decoded = byte_data.decode(encoding)
             return _fix_mojibake(decoded)
-    # Check cache for similar content (performance optimization)
-    data_hash = hashlib.sha256(byte_data[:1024]).hexdigest()[:16]  # Hash first 1KB
+    data_hash = hashlib.sha256(byte_data[:1024]).hexdigest()[:16]
     cache_key = _get_encoding_cache_key(data_hash, len(byte_data))
     if cache_key in _encoding_cache:
@@ -61,25 +42,22 @@ def safe_decode(byte_data: bytes, encoding: str | None = None) -> str:
             decoded = byte_data.decode(cached_encoding)
             return _fix_mojibake(decoded)
-    # Use chardetng for better performance than charset-normalizer
     detected_encoding = chardetng_py.detect(byte_data)
     if detected_encoding:
         with suppress(UnicodeDecodeError, LookupError):
             decoded = byte_data.decode(detected_encoding)
-            # Cache successful encoding detection
-            if len(_encoding_cache) < 1000:  # Prevent unlimited growth
+            if len(_encoding_cache) < 1000:  # Prevent unlimited growth ~keep
                 _encoding_cache[cache_key] = detected_encoding
             return _fix_mojibake(decoded)
-    # Try multiple encodings with confidence scoring
     encodings_to_try = [
         "utf-8",
-        "windows-1255",  # Hebrew
-        "iso-8859-8",  # Hebrew
-        "windows-1256",  # Arabic
-        "iso-8859-6",  # Arabic
-        "windows-1252",  # Western European
-        "cp1251",  # Cyrillic
+        "windows-1255",  # Hebrew ~keep
+        "iso-8859-8",  # Hebrew ~keep
+        "windows-1256",  # Arabic ~keep
+        "iso-8859-6",  # Arabic ~keep
+        "windows-1252",  # Western European ~keep
+        "cp1251",  # Cyrillic ~keep
     ]
     best_result = None
@@ -96,12 +74,10 @@ def safe_decode(byte_data: bytes, encoding: str | None = None) -> str:
     if best_result and best_confidence > 0.5:
         return _fix_mojibake(best_result)
-    # Final fallback
     return byte_data.decode("latin-1", errors="replace")
 def _calculate_text_confidence(text: str) -> float:
-    """Calculate confidence score for decoded text quality."""
     if not text:
         return 0.0
@@ -109,77 +85,51 @@ def _calculate_text_confidence(text: str) -> float:
     if total_chars == 0:
         return 0.0
-    # Check for common encoding problems - compile patterns once
     replacement_count = len(_MOJIBAKE_PATTERNS["replacement_chars"].findall(text))
     control_count = len(_MOJIBAKE_PATTERNS["control_chars"].findall(text))
-    # Penalize replacement and control characters
     penalty = (replacement_count + control_count * 2) / total_chars
-    # Bonus for readable character ranges - more efficient counting
-    # Use generator expression with early termination
     readable_chars = sum(1 for c in text if c.isprintable() or c.isspace())
     readability_score = readable_chars / total_chars
-    # Check for suspicious Cyrillic that might be misencoded Hebrew
     cyrillic_matches = _MOJIBAKE_PATTERNS["hebrew_as_cyrillic"].findall(text)
     if cyrillic_matches:
-        # Calculate total length more efficiently
         cyrillic_length = sum(len(match) for match in cyrillic_matches)
         if cyrillic_length > total_chars * 0.1:
-            penalty += 0.3  # Heavy penalty for likely mojibake
+            penalty += 0.3
     return max(0.0, min(1.0, readability_score - penalty))
 def _fix_mojibake(text: str) -> str:
-    """Attempt to fix common mojibake patterns."""
     if not text:
         return text
-    # Remove control characters
     text = _MOJIBAKE_PATTERNS["control_chars"].sub("", text)
-    # Remove replacement characters
     text = _MOJIBAKE_PATTERNS["replacement_chars"].sub("", text)
-    # Remove isolated combining marks
     text = _MOJIBAKE_PATTERNS["isolated_combining"].sub("", text)
-    # Try to fix Hebrew encoded as Cyrillic (common Windows-1255 -> CP1251 confusion)
     if _MOJIBAKE_PATTERNS["hebrew_as_cyrillic"].search(text):
-        # This is a heuristic fix - in practice, you'd need actual character mapping
-        # For now, we flag it for manual review by keeping the text but adding a marker
         pass
     return text
 def normalize_spaces(text: str) -> str:
-    """Normalize spaces while preserving line breaks and paragraph structure.
-    Args:
-        text: The text to normalize.
-    Returns:
-        The normalized text with proper spacing.
-    """
     if not text or not text.strip():
         return ""
-    # Split by double newlines to preserve paragraph breaks
     paragraphs = text.split("\n\n")
     result_paragraphs = []
     for paragraph in paragraphs:
-        # Use pre-compiled patterns for better performance
-        # Replace multiple whitespace (except newlines) with single space
         cleaned = _WHITESPACE_PATTERN.sub(" ", paragraph)
-        # Clean up multiple newlines within paragraph (keep single newlines)
         cleaned = _NEWLINES_PATTERN.sub("\n", cleaned)
-        # Process lines efficiently - manual loop avoids double strip() calls
         lines = []
         for line in cleaned.split("\n"):
             stripped_line = line.strip()

kreuzberg/_utils/_sync.py CHANGED Viewed

@@ -18,17 +18,6 @@ P = ParamSpec("P")
 async def run_sync(sync_fn: Callable[P, T], *args: P.args, **kwargs: P.kwargs) -> T:
-    """Run a synchronous function in an asynchronous context.
-    Args:
-        sync_fn: The synchronous function to run.
-        *args: The positional arguments to pass to the function.
-        **kwargs: The keyword arguments to pass to the function.
-    Returns:
-        The result of the synchronous function.
-    """
-    # Optimize: only create partial if we have kwargs
     if kwargs:
         handler = partial(sync_fn, **kwargs)
         return cast("T", await any_io_run_sync(handler, *args, abandon_on_cancel=True))  # pyright: ignore [reportCallIssue]
@@ -36,14 +25,6 @@ async def run_sync(sync_fn: Callable[P, T], *args: P.args, **kwargs: P.kwargs) -
 async def run_taskgroup(*async_tasks: Awaitable[Any]) -> list[Any]:
-    """Run a list of coroutines concurrently.
-    Args:
-        *async_tasks: The list of coroutines to run.
-    Returns:
-        The results of the coroutines.
-    """
     results: list[Any] = [None] * len(async_tasks)
     async def run_task(index: int, task: Awaitable[T]) -> None:
@@ -57,15 +38,6 @@ async def run_taskgroup(*async_tasks: Awaitable[Any]) -> list[Any]:
 async def run_taskgroup_batched(*async_tasks: Awaitable[Any], batch_size: int) -> list[Any]:
-    """Run a list of coroutines concurrently in batches.
-    Args:
-        *async_tasks: The list of coroutines to run.
-        batch_size: The size of each batch.
-    Returns:
-        The results of the coroutines.
-    """
     results: list[Any] = []
     for i in range(0, len(async_tasks), batch_size):
@@ -76,25 +48,6 @@ async def run_taskgroup_batched(*async_tasks: Awaitable[Any], batch_size: int) -
 async def run_maybe_sync(fn: Callable[P, T | Awaitable[T]], *args: P.args, **kwargs: P.kwargs) -> T:
-    """Executes a callable function and handles both synchronous and asynchronous
-    results.
-    This function invokes the provided callable `sync_fn` with the given
-    arguments and keyword arguments. If the result of `sync_fn` is awaitable,
-    it awaits the result before returning it. Otherwise, the result is returned
-    directly.
-    Args:
-        fn: The callable to be executed. It can produce either a
-            synchronous or asynchronous result.
-        *args: Positional arguments to pass to `sync_fn`.
-        **kwargs: Keyword arguments to pass to `sync_fn`.
-    Returns:
-        The result of `sync_fn` invocation. If the result is awaitable, the
-        awaited value is returned. Otherwise, the synchronous result is
-        returned.
-    """
     result = fn(*args, **kwargs)
     if isawaitable(result):
         return cast("T", await result)
@@ -102,40 +55,10 @@ async def run_maybe_sync(fn: Callable[P, T | Awaitable[T]], *args: P.args, **kwa
 def run_maybe_async(fn: Callable[P, T | Awaitable[T]], *args: P.args, **kwargs: P.kwargs) -> T:
-    """Runs a synchronous or asynchronous function, resolving the output.
-    Determines if the provided function is synchronous or asynchronous. If synchronous,
-    executes it directly. If asynchronous, it runs the function within the event loop
-    using anyio. The return value is resolved regardless of the function type.
-    Args:
-        fn: The function to be executed, which can
-            either be synchronous or asynchronous.
-        *args: Positional arguments to be passed to the function.
-        **kwargs: Keyword arguments to be passed to the function.
-    Returns:
-        T: The return value of the executed function, resolved if asynchronous.
-    """
     return cast("T", fn(*args, **kwargs) if not iscoroutinefunction(fn) else anyio.run(partial(fn, **kwargs), *args))
 def run_sync_only(fn: Callable[P, T | Awaitable[T]], *args: P.args, **kwargs: P.kwargs) -> T:
-    """Runs a function, but only if it's synchronous. Raises error if async.
-    This is used for pure sync code paths where we cannot handle async functions.
-    Args:
-        fn: The function to be executed, must be synchronous.
-        *args: Positional arguments to be passed to the function.
-        **kwargs: Keyword arguments to be passed to the function.
-    Returns:
-        T: The return value of the executed function.
-    Raises:
-        RuntimeError: If the function is asynchronous.
-    """
     if iscoroutinefunction(fn):
         raise RuntimeError(f"Cannot run async function {fn.__name__} in sync-only context")
     return cast("T", fn(*args, **kwargs))

kreuzberg 3.11.4__py3-none-any.whl → 3.13.1__py3-none-any.whl

kreuzberg 3.11.4py3-none-any.whl → 3.13.1py3-none-any.whl