chunkr-ai 0.1.0a6__py3-none-any.whl → 0.1.0a8__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- chunkr_ai/__init__.py +2 -0
- chunkr_ai/_base_client.py +3 -3
- chunkr_ai/_client.py +31 -3
- chunkr_ai/_compat.py +48 -48
- chunkr_ai/_constants.py +5 -5
- chunkr_ai/_exceptions.py +4 -0
- chunkr_ai/_models.py +41 -41
- chunkr_ai/_types.py +35 -1
- chunkr_ai/_utils/__init__.py +9 -2
- chunkr_ai/_utils/_compat.py +45 -0
- chunkr_ai/_utils/_datetime_parse.py +136 -0
- chunkr_ai/_utils/_transform.py +11 -1
- chunkr_ai/_utils/_typing.py +6 -1
- chunkr_ai/_utils/_utils.py +0 -1
- chunkr_ai/_version.py +1 -1
- chunkr_ai/resources/__init__.py +14 -0
- chunkr_ai/resources/files.py +3 -3
- chunkr_ai/resources/tasks/__init__.py +14 -0
- chunkr_ai/resources/tasks/extract.py +393 -0
- chunkr_ai/resources/tasks/parse.py +110 -286
- chunkr_ai/resources/tasks/tasks.py +64 -32
- chunkr_ai/resources/webhooks.py +193 -0
- chunkr_ai/types/__init__.py +27 -1
- chunkr_ai/types/bounding_box.py +19 -0
- chunkr_ai/types/cell.py +39 -0
- chunkr_ai/types/cell_style.py +28 -0
- chunkr_ai/types/chunk.py +40 -0
- chunkr_ai/types/chunk_processing.py +40 -0
- chunkr_ai/types/chunk_processing_param.py +42 -0
- chunkr_ai/types/extract_configuration.py +24 -0
- chunkr_ai/types/extract_output_response.py +62 -0
- chunkr_ai/types/file_create_params.py +2 -1
- chunkr_ai/types/file_info.py +21 -0
- chunkr_ai/types/generation_config.py +29 -0
- chunkr_ai/types/generation_config_param.py +29 -0
- chunkr_ai/types/llm_processing.py +36 -0
- chunkr_ai/types/llm_processing_param.py +36 -0
- chunkr_ai/types/ocr_result.py +28 -0
- chunkr_ai/types/page.py +27 -0
- chunkr_ai/types/parse_configuration.py +64 -0
- chunkr_ai/types/parse_configuration_param.py +65 -0
- chunkr_ai/types/parse_output_response.py +29 -0
- chunkr_ai/types/segment.py +109 -0
- chunkr_ai/types/segment_processing.py +228 -0
- chunkr_ai/types/segment_processing_param.py +229 -0
- chunkr_ai/types/task_extract_updated_webhook_event.py +22 -0
- chunkr_ai/types/task_get_params.py +0 -3
- chunkr_ai/types/task_list_params.py +7 -1
- chunkr_ai/types/task_parse_updated_webhook_event.py +22 -0
- chunkr_ai/types/task_response.py +68 -0
- chunkr_ai/types/tasks/__init__.py +7 -1
- chunkr_ai/types/tasks/extract_create_params.py +47 -0
- chunkr_ai/types/tasks/extract_create_response.py +67 -0
- chunkr_ai/types/tasks/extract_get_params.py +18 -0
- chunkr_ai/types/tasks/extract_get_response.py +67 -0
- chunkr_ai/types/tasks/parse_create_params.py +25 -793
- chunkr_ai/types/tasks/parse_create_response.py +55 -0
- chunkr_ai/types/tasks/parse_get_params.py +18 -0
- chunkr_ai/types/tasks/parse_get_response.py +55 -0
- chunkr_ai/types/unwrap_webhook_event.py +11 -0
- chunkr_ai/types/version_info.py +31 -0
- chunkr_ai/types/webhook_url_response.py +9 -0
- {chunkr_ai-0.1.0a6.dist-info → chunkr_ai-0.1.0a8.dist-info}/METADATA +14 -13
- chunkr_ai-0.1.0a8.dist-info/RECORD +88 -0
- chunkr_ai/types/task.py +0 -1225
- chunkr_ai/types/tasks/parse_update_params.py +0 -845
- chunkr_ai-0.1.0a6.dist-info/RECORD +0 -52
- {chunkr_ai-0.1.0a6.dist-info → chunkr_ai-0.1.0a8.dist-info}/WHEEL +0 -0
- {chunkr_ai-0.1.0a6.dist-info → chunkr_ai-0.1.0a8.dist-info}/licenses/LICENSE +0 -0
chunkr_ai/_utils/__init__.py
CHANGED
@@ -10,7 +10,6 @@ from ._utils import (
|
|
10
10
|
lru_cache as lru_cache,
|
11
11
|
is_mapping as is_mapping,
|
12
12
|
is_tuple_t as is_tuple_t,
|
13
|
-
parse_date as parse_date,
|
14
13
|
is_iterable as is_iterable,
|
15
14
|
is_sequence as is_sequence,
|
16
15
|
coerce_float as coerce_float,
|
@@ -23,7 +22,6 @@ from ._utils import (
|
|
23
22
|
coerce_boolean as coerce_boolean,
|
24
23
|
coerce_integer as coerce_integer,
|
25
24
|
file_from_path as file_from_path,
|
26
|
-
parse_datetime as parse_datetime,
|
27
25
|
strip_not_given as strip_not_given,
|
28
26
|
deepcopy_minimal as deepcopy_minimal,
|
29
27
|
get_async_library as get_async_library,
|
@@ -32,12 +30,20 @@ from ._utils import (
|
|
32
30
|
maybe_coerce_boolean as maybe_coerce_boolean,
|
33
31
|
maybe_coerce_integer as maybe_coerce_integer,
|
34
32
|
)
|
33
|
+
from ._compat import (
|
34
|
+
get_args as get_args,
|
35
|
+
is_union as is_union,
|
36
|
+
get_origin as get_origin,
|
37
|
+
is_typeddict as is_typeddict,
|
38
|
+
is_literal_type as is_literal_type,
|
39
|
+
)
|
35
40
|
from ._typing import (
|
36
41
|
is_list_type as is_list_type,
|
37
42
|
is_union_type as is_union_type,
|
38
43
|
extract_type_arg as extract_type_arg,
|
39
44
|
is_iterable_type as is_iterable_type,
|
40
45
|
is_required_type as is_required_type,
|
46
|
+
is_sequence_type as is_sequence_type,
|
41
47
|
is_annotated_type as is_annotated_type,
|
42
48
|
is_type_alias_type as is_type_alias_type,
|
43
49
|
strip_annotated_type as strip_annotated_type,
|
@@ -55,3 +61,4 @@ from ._reflection import (
|
|
55
61
|
function_has_argument as function_has_argument,
|
56
62
|
assert_signatures_in_sync as assert_signatures_in_sync,
|
57
63
|
)
|
64
|
+
from ._datetime_parse import parse_date as parse_date, parse_datetime as parse_datetime
|
@@ -0,0 +1,45 @@
|
|
1
|
+
from __future__ import annotations
|
2
|
+
|
3
|
+
import sys
|
4
|
+
import typing_extensions
|
5
|
+
from typing import Any, Type, Union, Literal, Optional
|
6
|
+
from datetime import date, datetime
|
7
|
+
from typing_extensions import get_args as _get_args, get_origin as _get_origin
|
8
|
+
|
9
|
+
from .._types import StrBytesIntFloat
|
10
|
+
from ._datetime_parse import parse_date as _parse_date, parse_datetime as _parse_datetime
|
11
|
+
|
12
|
+
_LITERAL_TYPES = {Literal, typing_extensions.Literal}
|
13
|
+
|
14
|
+
|
15
|
+
def get_args(tp: type[Any]) -> tuple[Any, ...]:
|
16
|
+
return _get_args(tp)
|
17
|
+
|
18
|
+
|
19
|
+
def get_origin(tp: type[Any]) -> type[Any] | None:
|
20
|
+
return _get_origin(tp)
|
21
|
+
|
22
|
+
|
23
|
+
def is_union(tp: Optional[Type[Any]]) -> bool:
|
24
|
+
if sys.version_info < (3, 10):
|
25
|
+
return tp is Union # type: ignore[comparison-overlap]
|
26
|
+
else:
|
27
|
+
import types
|
28
|
+
|
29
|
+
return tp is Union or tp is types.UnionType
|
30
|
+
|
31
|
+
|
32
|
+
def is_typeddict(tp: Type[Any]) -> bool:
|
33
|
+
return typing_extensions.is_typeddict(tp)
|
34
|
+
|
35
|
+
|
36
|
+
def is_literal_type(tp: Type[Any]) -> bool:
|
37
|
+
return get_origin(tp) in _LITERAL_TYPES
|
38
|
+
|
39
|
+
|
40
|
+
def parse_date(value: Union[date, StrBytesIntFloat]) -> date:
|
41
|
+
return _parse_date(value)
|
42
|
+
|
43
|
+
|
44
|
+
def parse_datetime(value: Union[datetime, StrBytesIntFloat]) -> datetime:
|
45
|
+
return _parse_datetime(value)
|
@@ -0,0 +1,136 @@
|
|
1
|
+
"""
|
2
|
+
This file contains code from https://github.com/pydantic/pydantic/blob/main/pydantic/v1/datetime_parse.py
|
3
|
+
without the Pydantic v1 specific errors.
|
4
|
+
"""
|
5
|
+
|
6
|
+
from __future__ import annotations
|
7
|
+
|
8
|
+
import re
|
9
|
+
from typing import Dict, Union, Optional
|
10
|
+
from datetime import date, datetime, timezone, timedelta
|
11
|
+
|
12
|
+
from .._types import StrBytesIntFloat
|
13
|
+
|
14
|
+
date_expr = r"(?P<year>\d{4})-(?P<month>\d{1,2})-(?P<day>\d{1,2})"
|
15
|
+
time_expr = (
|
16
|
+
r"(?P<hour>\d{1,2}):(?P<minute>\d{1,2})"
|
17
|
+
r"(?::(?P<second>\d{1,2})(?:\.(?P<microsecond>\d{1,6})\d{0,6})?)?"
|
18
|
+
r"(?P<tzinfo>Z|[+-]\d{2}(?::?\d{2})?)?$"
|
19
|
+
)
|
20
|
+
|
21
|
+
date_re = re.compile(f"{date_expr}$")
|
22
|
+
datetime_re = re.compile(f"{date_expr}[T ]{time_expr}")
|
23
|
+
|
24
|
+
|
25
|
+
EPOCH = datetime(1970, 1, 1)
|
26
|
+
# if greater than this, the number is in ms, if less than or equal it's in seconds
|
27
|
+
# (in seconds this is 11th October 2603, in ms it's 20th August 1970)
|
28
|
+
MS_WATERSHED = int(2e10)
|
29
|
+
# slightly more than datetime.max in ns - (datetime.max - EPOCH).total_seconds() * 1e9
|
30
|
+
MAX_NUMBER = int(3e20)
|
31
|
+
|
32
|
+
|
33
|
+
def _get_numeric(value: StrBytesIntFloat, native_expected_type: str) -> Union[None, int, float]:
|
34
|
+
if isinstance(value, (int, float)):
|
35
|
+
return value
|
36
|
+
try:
|
37
|
+
return float(value)
|
38
|
+
except ValueError:
|
39
|
+
return None
|
40
|
+
except TypeError:
|
41
|
+
raise TypeError(f"invalid type; expected {native_expected_type}, string, bytes, int or float") from None
|
42
|
+
|
43
|
+
|
44
|
+
def _from_unix_seconds(seconds: Union[int, float]) -> datetime:
|
45
|
+
if seconds > MAX_NUMBER:
|
46
|
+
return datetime.max
|
47
|
+
elif seconds < -MAX_NUMBER:
|
48
|
+
return datetime.min
|
49
|
+
|
50
|
+
while abs(seconds) > MS_WATERSHED:
|
51
|
+
seconds /= 1000
|
52
|
+
dt = EPOCH + timedelta(seconds=seconds)
|
53
|
+
return dt.replace(tzinfo=timezone.utc)
|
54
|
+
|
55
|
+
|
56
|
+
def _parse_timezone(value: Optional[str]) -> Union[None, int, timezone]:
|
57
|
+
if value == "Z":
|
58
|
+
return timezone.utc
|
59
|
+
elif value is not None:
|
60
|
+
offset_mins = int(value[-2:]) if len(value) > 3 else 0
|
61
|
+
offset = 60 * int(value[1:3]) + offset_mins
|
62
|
+
if value[0] == "-":
|
63
|
+
offset = -offset
|
64
|
+
return timezone(timedelta(minutes=offset))
|
65
|
+
else:
|
66
|
+
return None
|
67
|
+
|
68
|
+
|
69
|
+
def parse_datetime(value: Union[datetime, StrBytesIntFloat]) -> datetime:
|
70
|
+
"""
|
71
|
+
Parse a datetime/int/float/string and return a datetime.datetime.
|
72
|
+
|
73
|
+
This function supports time zone offsets. When the input contains one,
|
74
|
+
the output uses a timezone with a fixed offset from UTC.
|
75
|
+
|
76
|
+
Raise ValueError if the input is well formatted but not a valid datetime.
|
77
|
+
Raise ValueError if the input isn't well formatted.
|
78
|
+
"""
|
79
|
+
if isinstance(value, datetime):
|
80
|
+
return value
|
81
|
+
|
82
|
+
number = _get_numeric(value, "datetime")
|
83
|
+
if number is not None:
|
84
|
+
return _from_unix_seconds(number)
|
85
|
+
|
86
|
+
if isinstance(value, bytes):
|
87
|
+
value = value.decode()
|
88
|
+
|
89
|
+
assert not isinstance(value, (float, int))
|
90
|
+
|
91
|
+
match = datetime_re.match(value)
|
92
|
+
if match is None:
|
93
|
+
raise ValueError("invalid datetime format")
|
94
|
+
|
95
|
+
kw = match.groupdict()
|
96
|
+
if kw["microsecond"]:
|
97
|
+
kw["microsecond"] = kw["microsecond"].ljust(6, "0")
|
98
|
+
|
99
|
+
tzinfo = _parse_timezone(kw.pop("tzinfo"))
|
100
|
+
kw_: Dict[str, Union[None, int, timezone]] = {k: int(v) for k, v in kw.items() if v is not None}
|
101
|
+
kw_["tzinfo"] = tzinfo
|
102
|
+
|
103
|
+
return datetime(**kw_) # type: ignore
|
104
|
+
|
105
|
+
|
106
|
+
def parse_date(value: Union[date, StrBytesIntFloat]) -> date:
|
107
|
+
"""
|
108
|
+
Parse a date/int/float/string and return a datetime.date.
|
109
|
+
|
110
|
+
Raise ValueError if the input is well formatted but not a valid date.
|
111
|
+
Raise ValueError if the input isn't well formatted.
|
112
|
+
"""
|
113
|
+
if isinstance(value, date):
|
114
|
+
if isinstance(value, datetime):
|
115
|
+
return value.date()
|
116
|
+
else:
|
117
|
+
return value
|
118
|
+
|
119
|
+
number = _get_numeric(value, "date")
|
120
|
+
if number is not None:
|
121
|
+
return _from_unix_seconds(number).date()
|
122
|
+
|
123
|
+
if isinstance(value, bytes):
|
124
|
+
value = value.decode()
|
125
|
+
|
126
|
+
assert not isinstance(value, (float, int))
|
127
|
+
match = date_re.match(value)
|
128
|
+
if match is None:
|
129
|
+
raise ValueError("invalid date format")
|
130
|
+
|
131
|
+
kw = {k: int(v) for k, v in match.groupdict().items()}
|
132
|
+
|
133
|
+
try:
|
134
|
+
return date(**kw)
|
135
|
+
except ValueError:
|
136
|
+
raise ValueError("invalid date format") from None
|
chunkr_ai/_utils/_transform.py
CHANGED
@@ -16,18 +16,20 @@ from ._utils import (
|
|
16
16
|
lru_cache,
|
17
17
|
is_mapping,
|
18
18
|
is_iterable,
|
19
|
+
is_sequence,
|
19
20
|
)
|
20
21
|
from .._files import is_base64_file_input
|
22
|
+
from ._compat import get_origin, is_typeddict
|
21
23
|
from ._typing import (
|
22
24
|
is_list_type,
|
23
25
|
is_union_type,
|
24
26
|
extract_type_arg,
|
25
27
|
is_iterable_type,
|
26
28
|
is_required_type,
|
29
|
+
is_sequence_type,
|
27
30
|
is_annotated_type,
|
28
31
|
strip_annotated_type,
|
29
32
|
)
|
30
|
-
from .._compat import get_origin, model_dump, is_typeddict
|
31
33
|
|
32
34
|
_T = TypeVar("_T")
|
33
35
|
|
@@ -167,6 +169,8 @@ def _transform_recursive(
|
|
167
169
|
|
168
170
|
Defaults to the same value as the `annotation` argument.
|
169
171
|
"""
|
172
|
+
from .._compat import model_dump
|
173
|
+
|
170
174
|
if inner_type is None:
|
171
175
|
inner_type = annotation
|
172
176
|
|
@@ -184,6 +188,8 @@ def _transform_recursive(
|
|
184
188
|
(is_list_type(stripped_type) and is_list(data))
|
185
189
|
# Iterable[T]
|
186
190
|
or (is_iterable_type(stripped_type) and is_iterable(data) and not isinstance(data, str))
|
191
|
+
# Sequence[T]
|
192
|
+
or (is_sequence_type(stripped_type) and is_sequence(data) and not isinstance(data, str))
|
187
193
|
):
|
188
194
|
# dicts are technically iterable, but it is an iterable on the keys of the dict and is not usually
|
189
195
|
# intended as an iterable, so we don't transform it.
|
@@ -329,6 +335,8 @@ async def _async_transform_recursive(
|
|
329
335
|
|
330
336
|
Defaults to the same value as the `annotation` argument.
|
331
337
|
"""
|
338
|
+
from .._compat import model_dump
|
339
|
+
|
332
340
|
if inner_type is None:
|
333
341
|
inner_type = annotation
|
334
342
|
|
@@ -346,6 +354,8 @@ async def _async_transform_recursive(
|
|
346
354
|
(is_list_type(stripped_type) and is_list(data))
|
347
355
|
# Iterable[T]
|
348
356
|
or (is_iterable_type(stripped_type) and is_iterable(data) and not isinstance(data, str))
|
357
|
+
# Sequence[T]
|
358
|
+
or (is_sequence_type(stripped_type) and is_sequence(data) and not isinstance(data, str))
|
349
359
|
):
|
350
360
|
# dicts are technically iterable, but it is an iterable on the keys of the dict and is not usually
|
351
361
|
# intended as an iterable, so we don't transform it.
|
chunkr_ai/_utils/_typing.py
CHANGED
@@ -15,7 +15,7 @@ from typing_extensions import (
|
|
15
15
|
|
16
16
|
from ._utils import lru_cache
|
17
17
|
from .._types import InheritsGeneric
|
18
|
-
from
|
18
|
+
from ._compat import is_union as _is_union
|
19
19
|
|
20
20
|
|
21
21
|
def is_annotated_type(typ: type) -> bool:
|
@@ -26,6 +26,11 @@ def is_list_type(typ: type) -> bool:
|
|
26
26
|
return (get_origin(typ) or typ) == list
|
27
27
|
|
28
28
|
|
29
|
+
def is_sequence_type(typ: type) -> bool:
|
30
|
+
origin = get_origin(typ) or typ
|
31
|
+
return origin == typing_extensions.Sequence or origin == typing.Sequence or origin == _c_abc.Sequence
|
32
|
+
|
33
|
+
|
29
34
|
def is_iterable_type(typ: type) -> bool:
|
30
35
|
"""If the given type is `typing.Iterable[T]`"""
|
31
36
|
origin = get_origin(typ) or typ
|
chunkr_ai/_utils/_utils.py
CHANGED
@@ -22,7 +22,6 @@ from typing_extensions import TypeGuard
|
|
22
22
|
import sniffio
|
23
23
|
|
24
24
|
from .._types import NotGiven, FileTypes, NotGivenOr, HeadersLike
|
25
|
-
from .._compat import parse_date as parse_date, parse_datetime as parse_datetime
|
26
25
|
|
27
26
|
_T = TypeVar("_T")
|
28
27
|
_TupleT = TypeVar("_TupleT", bound=Tuple[object, ...])
|
chunkr_ai/_version.py
CHANGED
chunkr_ai/resources/__init__.py
CHANGED
@@ -24,6 +24,14 @@ from .health import (
|
|
24
24
|
HealthResourceWithStreamingResponse,
|
25
25
|
AsyncHealthResourceWithStreamingResponse,
|
26
26
|
)
|
27
|
+
from .webhooks import (
|
28
|
+
WebhooksResource,
|
29
|
+
AsyncWebhooksResource,
|
30
|
+
WebhooksResourceWithRawResponse,
|
31
|
+
AsyncWebhooksResourceWithRawResponse,
|
32
|
+
WebhooksResourceWithStreamingResponse,
|
33
|
+
AsyncWebhooksResourceWithStreamingResponse,
|
34
|
+
)
|
27
35
|
|
28
36
|
__all__ = [
|
29
37
|
"TasksResource",
|
@@ -44,4 +52,10 @@ __all__ = [
|
|
44
52
|
"AsyncHealthResourceWithRawResponse",
|
45
53
|
"HealthResourceWithStreamingResponse",
|
46
54
|
"AsyncHealthResourceWithStreamingResponse",
|
55
|
+
"WebhooksResource",
|
56
|
+
"AsyncWebhooksResource",
|
57
|
+
"WebhooksResourceWithRawResponse",
|
58
|
+
"AsyncWebhooksResourceWithRawResponse",
|
59
|
+
"WebhooksResourceWithStreamingResponse",
|
60
|
+
"AsyncWebhooksResourceWithStreamingResponse",
|
47
61
|
]
|
chunkr_ai/resources/files.py
CHANGED
@@ -2,7 +2,7 @@
|
|
2
2
|
|
3
3
|
from __future__ import annotations
|
4
4
|
|
5
|
-
from typing import Union, Mapping, cast
|
5
|
+
from typing import Union, Mapping, Optional, cast
|
6
6
|
from datetime import datetime
|
7
7
|
from typing_extensions import Literal
|
8
8
|
|
@@ -52,7 +52,7 @@ class FilesResource(SyncAPIResource):
|
|
52
52
|
self,
|
53
53
|
*,
|
54
54
|
file: FileTypes,
|
55
|
-
file_metadata: str,
|
55
|
+
file_metadata: Optional[str] | NotGiven = NOT_GIVEN,
|
56
56
|
# Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
|
57
57
|
# The extra values given here take precedence over values defined on the client or passed to this method.
|
58
58
|
extra_headers: Headers | None = None,
|
@@ -360,7 +360,7 @@ class AsyncFilesResource(AsyncAPIResource):
|
|
360
360
|
self,
|
361
361
|
*,
|
362
362
|
file: FileTypes,
|
363
|
-
file_metadata: str,
|
363
|
+
file_metadata: Optional[str] | NotGiven = NOT_GIVEN,
|
364
364
|
# Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
|
365
365
|
# The extra values given here take precedence over values defined on the client or passed to this method.
|
366
366
|
extra_headers: Headers | None = None,
|
@@ -16,8 +16,22 @@ from .tasks import (
|
|
16
16
|
TasksResourceWithStreamingResponse,
|
17
17
|
AsyncTasksResourceWithStreamingResponse,
|
18
18
|
)
|
19
|
+
from .extract import (
|
20
|
+
ExtractResource,
|
21
|
+
AsyncExtractResource,
|
22
|
+
ExtractResourceWithRawResponse,
|
23
|
+
AsyncExtractResourceWithRawResponse,
|
24
|
+
ExtractResourceWithStreamingResponse,
|
25
|
+
AsyncExtractResourceWithStreamingResponse,
|
26
|
+
)
|
19
27
|
|
20
28
|
__all__ = [
|
29
|
+
"ExtractResource",
|
30
|
+
"AsyncExtractResource",
|
31
|
+
"ExtractResourceWithRawResponse",
|
32
|
+
"AsyncExtractResourceWithRawResponse",
|
33
|
+
"ExtractResourceWithStreamingResponse",
|
34
|
+
"AsyncExtractResourceWithStreamingResponse",
|
21
35
|
"ParseResource",
|
22
36
|
"AsyncParseResource",
|
23
37
|
"ParseResourceWithRawResponse",
|