chunkr-ai 0.1.0a6__py3-none-any.whl → 0.1.0a8__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (69) hide show
  1. chunkr_ai/__init__.py +2 -0
  2. chunkr_ai/_base_client.py +3 -3
  3. chunkr_ai/_client.py +31 -3
  4. chunkr_ai/_compat.py +48 -48
  5. chunkr_ai/_constants.py +5 -5
  6. chunkr_ai/_exceptions.py +4 -0
  7. chunkr_ai/_models.py +41 -41
  8. chunkr_ai/_types.py +35 -1
  9. chunkr_ai/_utils/__init__.py +9 -2
  10. chunkr_ai/_utils/_compat.py +45 -0
  11. chunkr_ai/_utils/_datetime_parse.py +136 -0
  12. chunkr_ai/_utils/_transform.py +11 -1
  13. chunkr_ai/_utils/_typing.py +6 -1
  14. chunkr_ai/_utils/_utils.py +0 -1
  15. chunkr_ai/_version.py +1 -1
  16. chunkr_ai/resources/__init__.py +14 -0
  17. chunkr_ai/resources/files.py +3 -3
  18. chunkr_ai/resources/tasks/__init__.py +14 -0
  19. chunkr_ai/resources/tasks/extract.py +393 -0
  20. chunkr_ai/resources/tasks/parse.py +110 -286
  21. chunkr_ai/resources/tasks/tasks.py +64 -32
  22. chunkr_ai/resources/webhooks.py +193 -0
  23. chunkr_ai/types/__init__.py +27 -1
  24. chunkr_ai/types/bounding_box.py +19 -0
  25. chunkr_ai/types/cell.py +39 -0
  26. chunkr_ai/types/cell_style.py +28 -0
  27. chunkr_ai/types/chunk.py +40 -0
  28. chunkr_ai/types/chunk_processing.py +40 -0
  29. chunkr_ai/types/chunk_processing_param.py +42 -0
  30. chunkr_ai/types/extract_configuration.py +24 -0
  31. chunkr_ai/types/extract_output_response.py +62 -0
  32. chunkr_ai/types/file_create_params.py +2 -1
  33. chunkr_ai/types/file_info.py +21 -0
  34. chunkr_ai/types/generation_config.py +29 -0
  35. chunkr_ai/types/generation_config_param.py +29 -0
  36. chunkr_ai/types/llm_processing.py +36 -0
  37. chunkr_ai/types/llm_processing_param.py +36 -0
  38. chunkr_ai/types/ocr_result.py +28 -0
  39. chunkr_ai/types/page.py +27 -0
  40. chunkr_ai/types/parse_configuration.py +64 -0
  41. chunkr_ai/types/parse_configuration_param.py +65 -0
  42. chunkr_ai/types/parse_output_response.py +29 -0
  43. chunkr_ai/types/segment.py +109 -0
  44. chunkr_ai/types/segment_processing.py +228 -0
  45. chunkr_ai/types/segment_processing_param.py +229 -0
  46. chunkr_ai/types/task_extract_updated_webhook_event.py +22 -0
  47. chunkr_ai/types/task_get_params.py +0 -3
  48. chunkr_ai/types/task_list_params.py +7 -1
  49. chunkr_ai/types/task_parse_updated_webhook_event.py +22 -0
  50. chunkr_ai/types/task_response.py +68 -0
  51. chunkr_ai/types/tasks/__init__.py +7 -1
  52. chunkr_ai/types/tasks/extract_create_params.py +47 -0
  53. chunkr_ai/types/tasks/extract_create_response.py +67 -0
  54. chunkr_ai/types/tasks/extract_get_params.py +18 -0
  55. chunkr_ai/types/tasks/extract_get_response.py +67 -0
  56. chunkr_ai/types/tasks/parse_create_params.py +25 -793
  57. chunkr_ai/types/tasks/parse_create_response.py +55 -0
  58. chunkr_ai/types/tasks/parse_get_params.py +18 -0
  59. chunkr_ai/types/tasks/parse_get_response.py +55 -0
  60. chunkr_ai/types/unwrap_webhook_event.py +11 -0
  61. chunkr_ai/types/version_info.py +31 -0
  62. chunkr_ai/types/webhook_url_response.py +9 -0
  63. {chunkr_ai-0.1.0a6.dist-info → chunkr_ai-0.1.0a8.dist-info}/METADATA +14 -13
  64. chunkr_ai-0.1.0a8.dist-info/RECORD +88 -0
  65. chunkr_ai/types/task.py +0 -1225
  66. chunkr_ai/types/tasks/parse_update_params.py +0 -845
  67. chunkr_ai-0.1.0a6.dist-info/RECORD +0 -52
  68. {chunkr_ai-0.1.0a6.dist-info → chunkr_ai-0.1.0a8.dist-info}/WHEEL +0 -0
  69. {chunkr_ai-0.1.0a6.dist-info → chunkr_ai-0.1.0a8.dist-info}/licenses/LICENSE +0 -0
@@ -10,7 +10,6 @@ from ._utils import (
10
10
  lru_cache as lru_cache,
11
11
  is_mapping as is_mapping,
12
12
  is_tuple_t as is_tuple_t,
13
- parse_date as parse_date,
14
13
  is_iterable as is_iterable,
15
14
  is_sequence as is_sequence,
16
15
  coerce_float as coerce_float,
@@ -23,7 +22,6 @@ from ._utils import (
23
22
  coerce_boolean as coerce_boolean,
24
23
  coerce_integer as coerce_integer,
25
24
  file_from_path as file_from_path,
26
- parse_datetime as parse_datetime,
27
25
  strip_not_given as strip_not_given,
28
26
  deepcopy_minimal as deepcopy_minimal,
29
27
  get_async_library as get_async_library,
@@ -32,12 +30,20 @@ from ._utils import (
32
30
  maybe_coerce_boolean as maybe_coerce_boolean,
33
31
  maybe_coerce_integer as maybe_coerce_integer,
34
32
  )
33
+ from ._compat import (
34
+ get_args as get_args,
35
+ is_union as is_union,
36
+ get_origin as get_origin,
37
+ is_typeddict as is_typeddict,
38
+ is_literal_type as is_literal_type,
39
+ )
35
40
  from ._typing import (
36
41
  is_list_type as is_list_type,
37
42
  is_union_type as is_union_type,
38
43
  extract_type_arg as extract_type_arg,
39
44
  is_iterable_type as is_iterable_type,
40
45
  is_required_type as is_required_type,
46
+ is_sequence_type as is_sequence_type,
41
47
  is_annotated_type as is_annotated_type,
42
48
  is_type_alias_type as is_type_alias_type,
43
49
  strip_annotated_type as strip_annotated_type,
@@ -55,3 +61,4 @@ from ._reflection import (
55
61
  function_has_argument as function_has_argument,
56
62
  assert_signatures_in_sync as assert_signatures_in_sync,
57
63
  )
64
+ from ._datetime_parse import parse_date as parse_date, parse_datetime as parse_datetime
@@ -0,0 +1,45 @@
1
+ from __future__ import annotations
2
+
3
+ import sys
4
+ import typing_extensions
5
+ from typing import Any, Type, Union, Literal, Optional
6
+ from datetime import date, datetime
7
+ from typing_extensions import get_args as _get_args, get_origin as _get_origin
8
+
9
+ from .._types import StrBytesIntFloat
10
+ from ._datetime_parse import parse_date as _parse_date, parse_datetime as _parse_datetime
11
+
12
+ _LITERAL_TYPES = {Literal, typing_extensions.Literal}
13
+
14
+
15
+ def get_args(tp: type[Any]) -> tuple[Any, ...]:
16
+ return _get_args(tp)
17
+
18
+
19
+ def get_origin(tp: type[Any]) -> type[Any] | None:
20
+ return _get_origin(tp)
21
+
22
+
23
+ def is_union(tp: Optional[Type[Any]]) -> bool:
24
+ if sys.version_info < (3, 10):
25
+ return tp is Union # type: ignore[comparison-overlap]
26
+ else:
27
+ import types
28
+
29
+ return tp is Union or tp is types.UnionType
30
+
31
+
32
+ def is_typeddict(tp: Type[Any]) -> bool:
33
+ return typing_extensions.is_typeddict(tp)
34
+
35
+
36
+ def is_literal_type(tp: Type[Any]) -> bool:
37
+ return get_origin(tp) in _LITERAL_TYPES
38
+
39
+
40
+ def parse_date(value: Union[date, StrBytesIntFloat]) -> date:
41
+ return _parse_date(value)
42
+
43
+
44
+ def parse_datetime(value: Union[datetime, StrBytesIntFloat]) -> datetime:
45
+ return _parse_datetime(value)
@@ -0,0 +1,136 @@
1
+ """
2
+ This file contains code from https://github.com/pydantic/pydantic/blob/main/pydantic/v1/datetime_parse.py
3
+ without the Pydantic v1 specific errors.
4
+ """
5
+
6
+ from __future__ import annotations
7
+
8
+ import re
9
+ from typing import Dict, Union, Optional
10
+ from datetime import date, datetime, timezone, timedelta
11
+
12
+ from .._types import StrBytesIntFloat
13
+
14
+ date_expr = r"(?P<year>\d{4})-(?P<month>\d{1,2})-(?P<day>\d{1,2})"
15
+ time_expr = (
16
+ r"(?P<hour>\d{1,2}):(?P<minute>\d{1,2})"
17
+ r"(?::(?P<second>\d{1,2})(?:\.(?P<microsecond>\d{1,6})\d{0,6})?)?"
18
+ r"(?P<tzinfo>Z|[+-]\d{2}(?::?\d{2})?)?$"
19
+ )
20
+
21
+ date_re = re.compile(f"{date_expr}$")
22
+ datetime_re = re.compile(f"{date_expr}[T ]{time_expr}")
23
+
24
+
25
+ EPOCH = datetime(1970, 1, 1)
26
+ # if greater than this, the number is in ms, if less than or equal it's in seconds
27
+ # (in seconds this is 11th October 2603, in ms it's 20th August 1970)
28
+ MS_WATERSHED = int(2e10)
29
+ # slightly more than datetime.max in ns - (datetime.max - EPOCH).total_seconds() * 1e9
30
+ MAX_NUMBER = int(3e20)
31
+
32
+
33
+ def _get_numeric(value: StrBytesIntFloat, native_expected_type: str) -> Union[None, int, float]:
34
+ if isinstance(value, (int, float)):
35
+ return value
36
+ try:
37
+ return float(value)
38
+ except ValueError:
39
+ return None
40
+ except TypeError:
41
+ raise TypeError(f"invalid type; expected {native_expected_type}, string, bytes, int or float") from None
42
+
43
+
44
+ def _from_unix_seconds(seconds: Union[int, float]) -> datetime:
45
+ if seconds > MAX_NUMBER:
46
+ return datetime.max
47
+ elif seconds < -MAX_NUMBER:
48
+ return datetime.min
49
+
50
+ while abs(seconds) > MS_WATERSHED:
51
+ seconds /= 1000
52
+ dt = EPOCH + timedelta(seconds=seconds)
53
+ return dt.replace(tzinfo=timezone.utc)
54
+
55
+
56
+ def _parse_timezone(value: Optional[str]) -> Union[None, int, timezone]:
57
+ if value == "Z":
58
+ return timezone.utc
59
+ elif value is not None:
60
+ offset_mins = int(value[-2:]) if len(value) > 3 else 0
61
+ offset = 60 * int(value[1:3]) + offset_mins
62
+ if value[0] == "-":
63
+ offset = -offset
64
+ return timezone(timedelta(minutes=offset))
65
+ else:
66
+ return None
67
+
68
+
69
+ def parse_datetime(value: Union[datetime, StrBytesIntFloat]) -> datetime:
70
+ """
71
+ Parse a datetime/int/float/string and return a datetime.datetime.
72
+
73
+ This function supports time zone offsets. When the input contains one,
74
+ the output uses a timezone with a fixed offset from UTC.
75
+
76
+ Raise ValueError if the input is well formatted but not a valid datetime.
77
+ Raise ValueError if the input isn't well formatted.
78
+ """
79
+ if isinstance(value, datetime):
80
+ return value
81
+
82
+ number = _get_numeric(value, "datetime")
83
+ if number is not None:
84
+ return _from_unix_seconds(number)
85
+
86
+ if isinstance(value, bytes):
87
+ value = value.decode()
88
+
89
+ assert not isinstance(value, (float, int))
90
+
91
+ match = datetime_re.match(value)
92
+ if match is None:
93
+ raise ValueError("invalid datetime format")
94
+
95
+ kw = match.groupdict()
96
+ if kw["microsecond"]:
97
+ kw["microsecond"] = kw["microsecond"].ljust(6, "0")
98
+
99
+ tzinfo = _parse_timezone(kw.pop("tzinfo"))
100
+ kw_: Dict[str, Union[None, int, timezone]] = {k: int(v) for k, v in kw.items() if v is not None}
101
+ kw_["tzinfo"] = tzinfo
102
+
103
+ return datetime(**kw_) # type: ignore
104
+
105
+
106
+ def parse_date(value: Union[date, StrBytesIntFloat]) -> date:
107
+ """
108
+ Parse a date/int/float/string and return a datetime.date.
109
+
110
+ Raise ValueError if the input is well formatted but not a valid date.
111
+ Raise ValueError if the input isn't well formatted.
112
+ """
113
+ if isinstance(value, date):
114
+ if isinstance(value, datetime):
115
+ return value.date()
116
+ else:
117
+ return value
118
+
119
+ number = _get_numeric(value, "date")
120
+ if number is not None:
121
+ return _from_unix_seconds(number).date()
122
+
123
+ if isinstance(value, bytes):
124
+ value = value.decode()
125
+
126
+ assert not isinstance(value, (float, int))
127
+ match = date_re.match(value)
128
+ if match is None:
129
+ raise ValueError("invalid date format")
130
+
131
+ kw = {k: int(v) for k, v in match.groupdict().items()}
132
+
133
+ try:
134
+ return date(**kw)
135
+ except ValueError:
136
+ raise ValueError("invalid date format") from None
@@ -16,18 +16,20 @@ from ._utils import (
16
16
  lru_cache,
17
17
  is_mapping,
18
18
  is_iterable,
19
+ is_sequence,
19
20
  )
20
21
  from .._files import is_base64_file_input
22
+ from ._compat import get_origin, is_typeddict
21
23
  from ._typing import (
22
24
  is_list_type,
23
25
  is_union_type,
24
26
  extract_type_arg,
25
27
  is_iterable_type,
26
28
  is_required_type,
29
+ is_sequence_type,
27
30
  is_annotated_type,
28
31
  strip_annotated_type,
29
32
  )
30
- from .._compat import get_origin, model_dump, is_typeddict
31
33
 
32
34
  _T = TypeVar("_T")
33
35
 
@@ -167,6 +169,8 @@ def _transform_recursive(
167
169
 
168
170
  Defaults to the same value as the `annotation` argument.
169
171
  """
172
+ from .._compat import model_dump
173
+
170
174
  if inner_type is None:
171
175
  inner_type = annotation
172
176
 
@@ -184,6 +188,8 @@ def _transform_recursive(
184
188
  (is_list_type(stripped_type) and is_list(data))
185
189
  # Iterable[T]
186
190
  or (is_iterable_type(stripped_type) and is_iterable(data) and not isinstance(data, str))
191
+ # Sequence[T]
192
+ or (is_sequence_type(stripped_type) and is_sequence(data) and not isinstance(data, str))
187
193
  ):
188
194
  # dicts are technically iterable, but it is an iterable on the keys of the dict and is not usually
189
195
  # intended as an iterable, so we don't transform it.
@@ -329,6 +335,8 @@ async def _async_transform_recursive(
329
335
 
330
336
  Defaults to the same value as the `annotation` argument.
331
337
  """
338
+ from .._compat import model_dump
339
+
332
340
  if inner_type is None:
333
341
  inner_type = annotation
334
342
 
@@ -346,6 +354,8 @@ async def _async_transform_recursive(
346
354
  (is_list_type(stripped_type) and is_list(data))
347
355
  # Iterable[T]
348
356
  or (is_iterable_type(stripped_type) and is_iterable(data) and not isinstance(data, str))
357
+ # Sequence[T]
358
+ or (is_sequence_type(stripped_type) and is_sequence(data) and not isinstance(data, str))
349
359
  ):
350
360
  # dicts are technically iterable, but it is an iterable on the keys of the dict and is not usually
351
361
  # intended as an iterable, so we don't transform it.
@@ -15,7 +15,7 @@ from typing_extensions import (
15
15
 
16
16
  from ._utils import lru_cache
17
17
  from .._types import InheritsGeneric
18
- from .._compat import is_union as _is_union
18
+ from ._compat import is_union as _is_union
19
19
 
20
20
 
21
21
  def is_annotated_type(typ: type) -> bool:
@@ -26,6 +26,11 @@ def is_list_type(typ: type) -> bool:
26
26
  return (get_origin(typ) or typ) == list
27
27
 
28
28
 
29
+ def is_sequence_type(typ: type) -> bool:
30
+ origin = get_origin(typ) or typ
31
+ return origin == typing_extensions.Sequence or origin == typing.Sequence or origin == _c_abc.Sequence
32
+
33
+
29
34
  def is_iterable_type(typ: type) -> bool:
30
35
  """If the given type is `typing.Iterable[T]`"""
31
36
  origin = get_origin(typ) or typ
@@ -22,7 +22,6 @@ from typing_extensions import TypeGuard
22
22
  import sniffio
23
23
 
24
24
  from .._types import NotGiven, FileTypes, NotGivenOr, HeadersLike
25
- from .._compat import parse_date as parse_date, parse_datetime as parse_datetime
26
25
 
27
26
  _T = TypeVar("_T")
28
27
  _TupleT = TypeVar("_TupleT", bound=Tuple[object, ...])
chunkr_ai/_version.py CHANGED
@@ -1,4 +1,4 @@
1
1
  # File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
2
2
 
3
3
  __title__ = "chunkr_ai"
4
- __version__ = "0.1.0-alpha.6" # x-release-please-version
4
+ __version__ = "0.1.0-alpha.8" # x-release-please-version
@@ -24,6 +24,14 @@ from .health import (
24
24
  HealthResourceWithStreamingResponse,
25
25
  AsyncHealthResourceWithStreamingResponse,
26
26
  )
27
+ from .webhooks import (
28
+ WebhooksResource,
29
+ AsyncWebhooksResource,
30
+ WebhooksResourceWithRawResponse,
31
+ AsyncWebhooksResourceWithRawResponse,
32
+ WebhooksResourceWithStreamingResponse,
33
+ AsyncWebhooksResourceWithStreamingResponse,
34
+ )
27
35
 
28
36
  __all__ = [
29
37
  "TasksResource",
@@ -44,4 +52,10 @@ __all__ = [
44
52
  "AsyncHealthResourceWithRawResponse",
45
53
  "HealthResourceWithStreamingResponse",
46
54
  "AsyncHealthResourceWithStreamingResponse",
55
+ "WebhooksResource",
56
+ "AsyncWebhooksResource",
57
+ "WebhooksResourceWithRawResponse",
58
+ "AsyncWebhooksResourceWithRawResponse",
59
+ "WebhooksResourceWithStreamingResponse",
60
+ "AsyncWebhooksResourceWithStreamingResponse",
47
61
  ]
@@ -2,7 +2,7 @@
2
2
 
3
3
  from __future__ import annotations
4
4
 
5
- from typing import Union, Mapping, cast
5
+ from typing import Union, Mapping, Optional, cast
6
6
  from datetime import datetime
7
7
  from typing_extensions import Literal
8
8
 
@@ -52,7 +52,7 @@ class FilesResource(SyncAPIResource):
52
52
  self,
53
53
  *,
54
54
  file: FileTypes,
55
- file_metadata: str,
55
+ file_metadata: Optional[str] | NotGiven = NOT_GIVEN,
56
56
  # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
57
57
  # The extra values given here take precedence over values defined on the client or passed to this method.
58
58
  extra_headers: Headers | None = None,
@@ -360,7 +360,7 @@ class AsyncFilesResource(AsyncAPIResource):
360
360
  self,
361
361
  *,
362
362
  file: FileTypes,
363
- file_metadata: str,
363
+ file_metadata: Optional[str] | NotGiven = NOT_GIVEN,
364
364
  # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
365
365
  # The extra values given here take precedence over values defined on the client or passed to this method.
366
366
  extra_headers: Headers | None = None,
@@ -16,8 +16,22 @@ from .tasks import (
16
16
  TasksResourceWithStreamingResponse,
17
17
  AsyncTasksResourceWithStreamingResponse,
18
18
  )
19
+ from .extract import (
20
+ ExtractResource,
21
+ AsyncExtractResource,
22
+ ExtractResourceWithRawResponse,
23
+ AsyncExtractResourceWithRawResponse,
24
+ ExtractResourceWithStreamingResponse,
25
+ AsyncExtractResourceWithStreamingResponse,
26
+ )
19
27
 
20
28
  __all__ = [
29
+ "ExtractResource",
30
+ "AsyncExtractResource",
31
+ "ExtractResourceWithRawResponse",
32
+ "AsyncExtractResourceWithRawResponse",
33
+ "ExtractResourceWithStreamingResponse",
34
+ "AsyncExtractResourceWithStreamingResponse",
21
35
  "ParseResource",
22
36
  "AsyncParseResource",
23
37
  "ParseResourceWithRawResponse",