PyPI - chunkr-ai - Versions diffs - 0.1.0a6__py3-none-any.whl → 0.1.0a8__py3-none-any.whl - Mend

chunkr-ai 0.1.0a6py3-none-any.whl → 0.1.0a8py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (69) hide show

chunkr_ai/__init__.py +2 -0
chunkr_ai/_base_client.py +3 -3
chunkr_ai/_client.py +31 -3
chunkr_ai/_compat.py +48 -48
chunkr_ai/_constants.py +5 -5
chunkr_ai/_exceptions.py +4 -0
chunkr_ai/_models.py +41 -41
chunkr_ai/_types.py +35 -1
chunkr_ai/_utils/__init__.py +9 -2
chunkr_ai/_utils/_compat.py +45 -0
chunkr_ai/_utils/_datetime_parse.py +136 -0
chunkr_ai/_utils/_transform.py +11 -1
chunkr_ai/_utils/_typing.py +6 -1
chunkr_ai/_utils/_utils.py +0 -1
chunkr_ai/_version.py +1 -1
chunkr_ai/resources/__init__.py +14 -0
chunkr_ai/resources/files.py +3 -3
chunkr_ai/resources/tasks/__init__.py +14 -0
chunkr_ai/resources/tasks/extract.py +393 -0
chunkr_ai/resources/tasks/parse.py +110 -286
chunkr_ai/resources/tasks/tasks.py +64 -32
chunkr_ai/resources/webhooks.py +193 -0
chunkr_ai/types/__init__.py +27 -1
chunkr_ai/types/bounding_box.py +19 -0
chunkr_ai/types/cell.py +39 -0
chunkr_ai/types/cell_style.py +28 -0
chunkr_ai/types/chunk.py +40 -0
chunkr_ai/types/chunk_processing.py +40 -0
chunkr_ai/types/chunk_processing_param.py +42 -0
chunkr_ai/types/extract_configuration.py +24 -0
chunkr_ai/types/extract_output_response.py +62 -0
chunkr_ai/types/file_create_params.py +2 -1
chunkr_ai/types/file_info.py +21 -0
chunkr_ai/types/generation_config.py +29 -0
chunkr_ai/types/generation_config_param.py +29 -0
chunkr_ai/types/llm_processing.py +36 -0
chunkr_ai/types/llm_processing_param.py +36 -0
chunkr_ai/types/ocr_result.py +28 -0
chunkr_ai/types/page.py +27 -0
chunkr_ai/types/parse_configuration.py +64 -0
chunkr_ai/types/parse_configuration_param.py +65 -0
chunkr_ai/types/parse_output_response.py +29 -0
chunkr_ai/types/segment.py +109 -0
chunkr_ai/types/segment_processing.py +228 -0
chunkr_ai/types/segment_processing_param.py +229 -0
chunkr_ai/types/task_extract_updated_webhook_event.py +22 -0
chunkr_ai/types/task_get_params.py +0 -3
chunkr_ai/types/task_list_params.py +7 -1
chunkr_ai/types/task_parse_updated_webhook_event.py +22 -0
chunkr_ai/types/task_response.py +68 -0
chunkr_ai/types/tasks/__init__.py +7 -1
chunkr_ai/types/tasks/extract_create_params.py +47 -0
chunkr_ai/types/tasks/extract_create_response.py +67 -0
chunkr_ai/types/tasks/extract_get_params.py +18 -0
chunkr_ai/types/tasks/extract_get_response.py +67 -0
chunkr_ai/types/tasks/parse_create_params.py +25 -793
chunkr_ai/types/tasks/parse_create_response.py +55 -0
chunkr_ai/types/tasks/parse_get_params.py +18 -0
chunkr_ai/types/tasks/parse_get_response.py +55 -0
chunkr_ai/types/unwrap_webhook_event.py +11 -0
chunkr_ai/types/version_info.py +31 -0
chunkr_ai/types/webhook_url_response.py +9 -0
{chunkr_ai-0.1.0a6.dist-info → chunkr_ai-0.1.0a8.dist-info}/METADATA +14 -13
chunkr_ai-0.1.0a8.dist-info/RECORD +88 -0
chunkr_ai/types/task.py +0 -1225
chunkr_ai/types/tasks/parse_update_params.py +0 -845
chunkr_ai-0.1.0a6.dist-info/RECORD +0 -52
{chunkr_ai-0.1.0a6.dist-info → chunkr_ai-0.1.0a8.dist-info}/WHEEL +0 -0
{chunkr_ai-0.1.0a6.dist-info → chunkr_ai-0.1.0a8.dist-info}/licenses/LICENSE +0 -0

chunkr_ai/_utils/__init__.py CHANGED Viewed

@@ -10,7 +10,6 @@ from ._utils import (
     lru_cache as lru_cache,
     is_mapping as is_mapping,
     is_tuple_t as is_tuple_t,
-    parse_date as parse_date,
     is_iterable as is_iterable,
     is_sequence as is_sequence,
     coerce_float as coerce_float,
@@ -23,7 +22,6 @@ from ._utils import (
     coerce_boolean as coerce_boolean,
     coerce_integer as coerce_integer,
     file_from_path as file_from_path,
-    parse_datetime as parse_datetime,
     strip_not_given as strip_not_given,
     deepcopy_minimal as deepcopy_minimal,
     get_async_library as get_async_library,
@@ -32,12 +30,20 @@ from ._utils import (
     maybe_coerce_boolean as maybe_coerce_boolean,
     maybe_coerce_integer as maybe_coerce_integer,
 )
+from ._compat import (
+    get_args as get_args,
+    is_union as is_union,
+    get_origin as get_origin,
+    is_typeddict as is_typeddict,
+    is_literal_type as is_literal_type,
+)
 from ._typing import (
     is_list_type as is_list_type,
     is_union_type as is_union_type,
     extract_type_arg as extract_type_arg,
     is_iterable_type as is_iterable_type,
     is_required_type as is_required_type,
+    is_sequence_type as is_sequence_type,
     is_annotated_type as is_annotated_type,
     is_type_alias_type as is_type_alias_type,
     strip_annotated_type as strip_annotated_type,
@@ -55,3 +61,4 @@ from ._reflection import (
     function_has_argument as function_has_argument,
     assert_signatures_in_sync as assert_signatures_in_sync,
 )
+from ._datetime_parse import parse_date as parse_date, parse_datetime as parse_datetime

chunkr_ai/_utils/_compat.py ADDED Viewed

@@ -0,0 +1,45 @@
+from __future__ import annotations
+import sys
+import typing_extensions
+from typing import Any, Type, Union, Literal, Optional
+from datetime import date, datetime
+from typing_extensions import get_args as _get_args, get_origin as _get_origin
+from .._types import StrBytesIntFloat
+from ._datetime_parse import parse_date as _parse_date, parse_datetime as _parse_datetime
+_LITERAL_TYPES = {Literal, typing_extensions.Literal}
+def get_args(tp: type[Any]) -> tuple[Any, ...]:
+    return _get_args(tp)
+def get_origin(tp: type[Any]) -> type[Any] | None:
+    return _get_origin(tp)
+def is_union(tp: Optional[Type[Any]]) -> bool:
+    if sys.version_info < (3, 10):
+        return tp is Union  # type: ignore[comparison-overlap]
+    else:
+        import types
+        return tp is Union or tp is types.UnionType
+def is_typeddict(tp: Type[Any]) -> bool:
+    return typing_extensions.is_typeddict(tp)
+def is_literal_type(tp: Type[Any]) -> bool:
+    return get_origin(tp) in _LITERAL_TYPES
+def parse_date(value: Union[date, StrBytesIntFloat]) -> date:
+    return _parse_date(value)
+def parse_datetime(value: Union[datetime, StrBytesIntFloat]) -> datetime:
+    return _parse_datetime(value)

chunkr_ai/_utils/_datetime_parse.py ADDED Viewed

@@ -0,0 +1,136 @@
+"""
+This file contains code from https://github.com/pydantic/pydantic/blob/main/pydantic/v1/datetime_parse.py
+without the Pydantic v1 specific errors.
+"""
+from __future__ import annotations
+import re
+from typing import Dict, Union, Optional
+from datetime import date, datetime, timezone, timedelta
+from .._types import StrBytesIntFloat
+date_expr = r"(?P<year>\d{4})-(?P<month>\d{1,2})-(?P<day>\d{1,2})"
+time_expr = (
+    r"(?P<hour>\d{1,2}):(?P<minute>\d{1,2})"
+    r"(?::(?P<second>\d{1,2})(?:\.(?P<microsecond>\d{1,6})\d{0,6})?)?"
+    r"(?P<tzinfo>Z|[+-]\d{2}(?::?\d{2})?)?$"
+)
+date_re = re.compile(f"{date_expr}$")
+datetime_re = re.compile(f"{date_expr}[T ]{time_expr}")
+EPOCH = datetime(1970, 1, 1)
+# if greater than this, the number is in ms, if less than or equal it's in seconds
+# (in seconds this is 11th October 2603, in ms it's 20th August 1970)
+MS_WATERSHED = int(2e10)
+# slightly more than datetime.max in ns - (datetime.max - EPOCH).total_seconds() * 1e9
+MAX_NUMBER = int(3e20)
+def _get_numeric(value: StrBytesIntFloat, native_expected_type: str) -> Union[None, int, float]:
+    if isinstance(value, (int, float)):
+        return value
+    try:
+        return float(value)
+    except ValueError:
+        return None
+    except TypeError:
+        raise TypeError(f"invalid type; expected {native_expected_type}, string, bytes, int or float") from None
+def _from_unix_seconds(seconds: Union[int, float]) -> datetime:
+    if seconds > MAX_NUMBER:
+        return datetime.max
+    elif seconds < -MAX_NUMBER:
+        return datetime.min
+    while abs(seconds) > MS_WATERSHED:
+        seconds /= 1000
+    dt = EPOCH + timedelta(seconds=seconds)
+    return dt.replace(tzinfo=timezone.utc)
+def _parse_timezone(value: Optional[str]) -> Union[None, int, timezone]:
+    if value == "Z":
+        return timezone.utc
+    elif value is not None:
+        offset_mins = int(value[-2:]) if len(value) > 3 else 0
+        offset = 60 * int(value[1:3]) + offset_mins
+        if value[0] == "-":
+            offset = -offset
+        return timezone(timedelta(minutes=offset))
+    else:
+        return None
+def parse_datetime(value: Union[datetime, StrBytesIntFloat]) -> datetime:
+    """
+    Parse a datetime/int/float/string and return a datetime.datetime.
+    This function supports time zone offsets. When the input contains one,
+    the output uses a timezone with a fixed offset from UTC.
+    Raise ValueError if the input is well formatted but not a valid datetime.
+    Raise ValueError if the input isn't well formatted.
+    """
+    if isinstance(value, datetime):
+        return value
+    number = _get_numeric(value, "datetime")
+    if number is not None:
+        return _from_unix_seconds(number)
+    if isinstance(value, bytes):
+        value = value.decode()
+    assert not isinstance(value, (float, int))
+    match = datetime_re.match(value)
+    if match is None:
+        raise ValueError("invalid datetime format")
+    kw = match.groupdict()
+    if kw["microsecond"]:
+        kw["microsecond"] = kw["microsecond"].ljust(6, "0")
+    tzinfo = _parse_timezone(kw.pop("tzinfo"))
+    kw_: Dict[str, Union[None, int, timezone]] = {k: int(v) for k, v in kw.items() if v is not None}
+    kw_["tzinfo"] = tzinfo
+    return datetime(**kw_)  # type: ignore
+def parse_date(value: Union[date, StrBytesIntFloat]) -> date:
+    """
+    Parse a date/int/float/string and return a datetime.date.
+    Raise ValueError if the input is well formatted but not a valid date.
+    Raise ValueError if the input isn't well formatted.
+    """
+    if isinstance(value, date):
+        if isinstance(value, datetime):
+            return value.date()
+        else:
+            return value
+    number = _get_numeric(value, "date")
+    if number is not None:
+        return _from_unix_seconds(number).date()
+    if isinstance(value, bytes):
+        value = value.decode()
+    assert not isinstance(value, (float, int))
+    match = date_re.match(value)
+    if match is None:
+        raise ValueError("invalid date format")
+    kw = {k: int(v) for k, v in match.groupdict().items()}
+    try:
+        return date(**kw)
+    except ValueError:
+        raise ValueError("invalid date format") from None

chunkr_ai/_utils/_transform.py CHANGED Viewed

@@ -16,18 +16,20 @@ from ._utils import (
     lru_cache,
     is_mapping,
     is_iterable,
+    is_sequence,
 )
 from .._files import is_base64_file_input
+from ._compat import get_origin, is_typeddict
 from ._typing import (
     is_list_type,
     is_union_type,
     extract_type_arg,
     is_iterable_type,
     is_required_type,
+    is_sequence_type,
     is_annotated_type,
     strip_annotated_type,
 )
-from .._compat import get_origin, model_dump, is_typeddict
 _T = TypeVar("_T")
@@ -167,6 +169,8 @@ def _transform_recursive(
             Defaults to the same value as the `annotation` argument.
     """
+    from .._compat import model_dump
     if inner_type is None:
         inner_type = annotation
@@ -184,6 +188,8 @@ def _transform_recursive(
         (is_list_type(stripped_type) and is_list(data))
         # Iterable[T]
         or (is_iterable_type(stripped_type) and is_iterable(data) and not isinstance(data, str))
+        # Sequence[T]
+        or (is_sequence_type(stripped_type) and is_sequence(data) and not isinstance(data, str))
     ):
         # dicts are technically iterable, but it is an iterable on the keys of the dict and is not usually
         # intended as an iterable, so we don't transform it.
@@ -329,6 +335,8 @@ async def _async_transform_recursive(
             Defaults to the same value as the `annotation` argument.
     """
+    from .._compat import model_dump
     if inner_type is None:
         inner_type = annotation
@@ -346,6 +354,8 @@ async def _async_transform_recursive(
         (is_list_type(stripped_type) and is_list(data))
         # Iterable[T]
         or (is_iterable_type(stripped_type) and is_iterable(data) and not isinstance(data, str))
+        # Sequence[T]
+        or (is_sequence_type(stripped_type) and is_sequence(data) and not isinstance(data, str))
     ):
         # dicts are technically iterable, but it is an iterable on the keys of the dict and is not usually
         # intended as an iterable, so we don't transform it.

chunkr_ai/_utils/_typing.py CHANGED Viewed

@@ -15,7 +15,7 @@ from typing_extensions import (
 from ._utils import lru_cache
 from .._types import InheritsGeneric
-from .._compat import is_union as _is_union
+from ._compat import is_union as _is_union
 def is_annotated_type(typ: type) -> bool:
@@ -26,6 +26,11 @@ def is_list_type(typ: type) -> bool:
     return (get_origin(typ) or typ) == list
+def is_sequence_type(typ: type) -> bool:
+    origin = get_origin(typ) or typ
+    return origin == typing_extensions.Sequence or origin == typing.Sequence or origin == _c_abc.Sequence
 def is_iterable_type(typ: type) -> bool:
     """If the given type is `typing.Iterable[T]`"""
     origin = get_origin(typ) or typ

chunkr_ai/_utils/_utils.py CHANGED Viewed

@@ -22,7 +22,6 @@ from typing_extensions import TypeGuard
 import sniffio
 from .._types import NotGiven, FileTypes, NotGivenOr, HeadersLike
-from .._compat import parse_date as parse_date, parse_datetime as parse_datetime
 _T = TypeVar("_T")
 _TupleT = TypeVar("_TupleT", bound=Tuple[object, ...])

chunkr_ai/_version.py CHANGED Viewed

@@ -1,4 +1,4 @@
 # File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
 __title__ = "chunkr_ai"
-__version__ = "0.1.0-alpha.6"  # x-release-please-version
+__version__ = "0.1.0-alpha.8"  # x-release-please-version

chunkr_ai/resources/__init__.py CHANGED Viewed

@@ -24,6 +24,14 @@ from .health import (
     HealthResourceWithStreamingResponse,
     AsyncHealthResourceWithStreamingResponse,
 )
+from .webhooks import (
+    WebhooksResource,
+    AsyncWebhooksResource,
+    WebhooksResourceWithRawResponse,
+    AsyncWebhooksResourceWithRawResponse,
+    WebhooksResourceWithStreamingResponse,
+    AsyncWebhooksResourceWithStreamingResponse,
+)
 __all__ = [
     "TasksResource",
@@ -44,4 +52,10 @@ __all__ = [
     "AsyncHealthResourceWithRawResponse",
     "HealthResourceWithStreamingResponse",
     "AsyncHealthResourceWithStreamingResponse",
+    "WebhooksResource",
+    "AsyncWebhooksResource",
+    "WebhooksResourceWithRawResponse",
+    "AsyncWebhooksResourceWithRawResponse",
+    "WebhooksResourceWithStreamingResponse",
+    "AsyncWebhooksResourceWithStreamingResponse",
 ]

chunkr_ai/resources/files.py CHANGED Viewed

@@ -2,7 +2,7 @@
 from __future__ import annotations
-from typing import Union, Mapping, cast
+from typing import Union, Mapping, Optional, cast
 from datetime import datetime
 from typing_extensions import Literal
@@ -52,7 +52,7 @@ class FilesResource(SyncAPIResource):
         self,
         *,
         file: FileTypes,
-        file_metadata: str,
+        file_metadata: Optional[str] | NotGiven = NOT_GIVEN,
         # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
         # The extra values given here take precedence over values defined on the client or passed to this method.
         extra_headers: Headers | None = None,
@@ -360,7 +360,7 @@ class AsyncFilesResource(AsyncAPIResource):
         self,
         *,
         file: FileTypes,
-        file_metadata: str,
+        file_metadata: Optional[str] | NotGiven = NOT_GIVEN,
         # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
         # The extra values given here take precedence over values defined on the client or passed to this method.
         extra_headers: Headers | None = None,

chunkr_ai/resources/tasks/__init__.py CHANGED Viewed

@@ -16,8 +16,22 @@ from .tasks import (
     TasksResourceWithStreamingResponse,
     AsyncTasksResourceWithStreamingResponse,
 )
+from .extract import (
+    ExtractResource,
+    AsyncExtractResource,
+    ExtractResourceWithRawResponse,
+    AsyncExtractResourceWithRawResponse,
+    ExtractResourceWithStreamingResponse,
+    AsyncExtractResourceWithStreamingResponse,
+)
 __all__ = [
+    "ExtractResource",
+    "AsyncExtractResource",
+    "ExtractResourceWithRawResponse",
+    "AsyncExtractResourceWithRawResponse",
+    "ExtractResourceWithStreamingResponse",
+    "AsyncExtractResourceWithStreamingResponse",
     "ParseResource",
     "AsyncParseResource",
     "ParseResourceWithRawResponse",

chunkr-ai 0.1.0a6__py3-none-any.whl → 0.1.0a8__py3-none-any.whl

chunkr-ai 0.1.0a6py3-none-any.whl → 0.1.0a8py3-none-any.whl