chunkr-ai 0.1.0a6__py3-none-any.whl → 0.1.0a7__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (62) hide show
  1. chunkr_ai/__init__.py +2 -0
  2. chunkr_ai/_client.py +31 -3
  3. chunkr_ai/_constants.py +5 -5
  4. chunkr_ai/_exceptions.py +4 -0
  5. chunkr_ai/_models.py +1 -1
  6. chunkr_ai/_types.py +35 -1
  7. chunkr_ai/_utils/__init__.py +1 -0
  8. chunkr_ai/_utils/_typing.py +5 -0
  9. chunkr_ai/_version.py +1 -1
  10. chunkr_ai/resources/__init__.py +14 -0
  11. chunkr_ai/resources/files.py +3 -3
  12. chunkr_ai/resources/tasks/__init__.py +14 -0
  13. chunkr_ai/resources/tasks/extract.py +409 -0
  14. chunkr_ai/resources/tasks/parse.py +124 -284
  15. chunkr_ai/resources/tasks/tasks.py +62 -14
  16. chunkr_ai/resources/webhooks.py +193 -0
  17. chunkr_ai/types/__init__.py +27 -1
  18. chunkr_ai/types/bounding_box.py +19 -0
  19. chunkr_ai/types/cell.py +39 -0
  20. chunkr_ai/types/cell_style.py +28 -0
  21. chunkr_ai/types/chunk.py +40 -0
  22. chunkr_ai/types/chunk_processing.py +40 -0
  23. chunkr_ai/types/chunk_processing_param.py +42 -0
  24. chunkr_ai/types/extract_configuration.py +24 -0
  25. chunkr_ai/types/extract_output_response.py +19 -0
  26. chunkr_ai/types/file_create_params.py +2 -1
  27. chunkr_ai/types/file_info.py +21 -0
  28. chunkr_ai/types/generation_config.py +29 -0
  29. chunkr_ai/types/generation_config_param.py +29 -0
  30. chunkr_ai/types/llm_processing.py +36 -0
  31. chunkr_ai/types/llm_processing_param.py +36 -0
  32. chunkr_ai/types/ocr_result.py +28 -0
  33. chunkr_ai/types/page.py +27 -0
  34. chunkr_ai/types/parse_configuration.py +64 -0
  35. chunkr_ai/types/parse_configuration_param.py +65 -0
  36. chunkr_ai/types/parse_output_response.py +29 -0
  37. chunkr_ai/types/segment.py +109 -0
  38. chunkr_ai/types/segment_processing.py +228 -0
  39. chunkr_ai/types/segment_processing_param.py +229 -0
  40. chunkr_ai/types/task_extract_updated_webhook_event.py +22 -0
  41. chunkr_ai/types/task_list_params.py +7 -1
  42. chunkr_ai/types/task_parse_updated_webhook_event.py +22 -0
  43. chunkr_ai/types/task_response.py +68 -0
  44. chunkr_ai/types/tasks/__init__.py +7 -1
  45. chunkr_ai/types/tasks/extract_create_params.py +47 -0
  46. chunkr_ai/types/tasks/extract_create_response.py +214 -0
  47. chunkr_ai/types/tasks/extract_get_params.py +21 -0
  48. chunkr_ai/types/tasks/extract_get_response.py +214 -0
  49. chunkr_ai/types/tasks/parse_create_params.py +25 -793
  50. chunkr_ai/types/tasks/parse_create_response.py +55 -0
  51. chunkr_ai/types/tasks/parse_get_params.py +21 -0
  52. chunkr_ai/types/tasks/parse_get_response.py +55 -0
  53. chunkr_ai/types/unwrap_webhook_event.py +11 -0
  54. chunkr_ai/types/version_info.py +31 -0
  55. chunkr_ai/types/webhook_url_response.py +9 -0
  56. {chunkr_ai-0.1.0a6.dist-info → chunkr_ai-0.1.0a7.dist-info}/METADATA +14 -13
  57. chunkr_ai-0.1.0a7.dist-info/RECORD +86 -0
  58. chunkr_ai/types/task.py +0 -1225
  59. chunkr_ai/types/tasks/parse_update_params.py +0 -845
  60. chunkr_ai-0.1.0a6.dist-info/RECORD +0 -52
  61. {chunkr_ai-0.1.0a6.dist-info → chunkr_ai-0.1.0a7.dist-info}/WHEEL +0 -0
  62. {chunkr_ai-0.1.0a6.dist-info → chunkr_ai-0.1.0a7.dist-info}/licenses/LICENSE +0 -0
chunkr_ai/__init__.py CHANGED
@@ -24,6 +24,7 @@ from ._exceptions import (
24
24
  InternalServerError,
25
25
  PermissionDeniedError,
26
26
  UnprocessableEntityError,
27
+ APIWebhookValidationError,
27
28
  APIResponseValidationError,
28
29
  )
29
30
  from ._base_client import DefaultHttpxClient, DefaultAioHttpClient, DefaultAsyncHttpxClient
@@ -45,6 +46,7 @@ __all__ = [
45
46
  "APITimeoutError",
46
47
  "APIConnectionError",
47
48
  "APIResponseValidationError",
49
+ "APIWebhookValidationError",
48
50
  "BadRequestError",
49
51
  "AuthenticationError",
50
52
  "PermissionDeniedError",
chunkr_ai/_client.py CHANGED
@@ -21,7 +21,7 @@ from ._types import (
21
21
  )
22
22
  from ._utils import is_given, get_async_library
23
23
  from ._version import __version__
24
- from .resources import files, health
24
+ from .resources import files, health, webhooks
25
25
  from ._streaming import Stream as Stream, AsyncStream as AsyncStream
26
26
  from ._exceptions import ChunkrError, APIStatusError
27
27
  from ._base_client import (
@@ -38,16 +38,19 @@ class Chunkr(SyncAPIClient):
38
38
  tasks: tasks.TasksResource
39
39
  files: files.FilesResource
40
40
  health: health.HealthResource
41
+ webhooks: webhooks.WebhooksResource
41
42
  with_raw_response: ChunkrWithRawResponse
42
43
  with_streaming_response: ChunkrWithStreamedResponse
43
44
 
44
45
  # client options
45
46
  api_key: str
47
+ webhook_key: str | None
46
48
 
47
49
  def __init__(
48
50
  self,
49
51
  *,
50
52
  api_key: str | None = None,
53
+ webhook_key: str | None = None,
51
54
  base_url: str | httpx.URL | None = None,
52
55
  timeout: Union[float, Timeout, None, NotGiven] = NOT_GIVEN,
53
56
  max_retries: int = DEFAULT_MAX_RETRIES,
@@ -69,7 +72,9 @@ class Chunkr(SyncAPIClient):
69
72
  ) -> None:
70
73
  """Construct a new synchronous Chunkr client instance.
71
74
 
72
- This automatically infers the `api_key` argument from the `CHUNKR_API_KEY` environment variable if it is not provided.
75
+ This automatically infers the following arguments from their corresponding environment variables if they are not provided:
76
+ - `api_key` from `CHUNKR_API_KEY`
77
+ - `webhook_key` from `CHUNKR_WEBHOOK_KEY`
73
78
  """
74
79
  if api_key is None:
75
80
  api_key = os.environ.get("CHUNKR_API_KEY")
@@ -79,6 +84,10 @@ class Chunkr(SyncAPIClient):
79
84
  )
80
85
  self.api_key = api_key
81
86
 
87
+ if webhook_key is None:
88
+ webhook_key = os.environ.get("CHUNKR_WEBHOOK_KEY")
89
+ self.webhook_key = webhook_key
90
+
82
91
  if base_url is None:
83
92
  base_url = os.environ.get("CHUNKR_BASE_URL")
84
93
  if base_url is None:
@@ -100,6 +109,7 @@ class Chunkr(SyncAPIClient):
100
109
  self.tasks = tasks.TasksResource(self)
101
110
  self.files = files.FilesResource(self)
102
111
  self.health = health.HealthResource(self)
112
+ self.webhooks = webhooks.WebhooksResource(self)
103
113
  self.with_raw_response = ChunkrWithRawResponse(self)
104
114
  self.with_streaming_response = ChunkrWithStreamedResponse(self)
105
115
 
@@ -127,6 +137,7 @@ class Chunkr(SyncAPIClient):
127
137
  self,
128
138
  *,
129
139
  api_key: str | None = None,
140
+ webhook_key: str | None = None,
130
141
  base_url: str | httpx.URL | None = None,
131
142
  timeout: float | Timeout | None | NotGiven = NOT_GIVEN,
132
143
  http_client: httpx.Client | None = None,
@@ -161,6 +172,7 @@ class Chunkr(SyncAPIClient):
161
172
  http_client = http_client or self._client
162
173
  return self.__class__(
163
174
  api_key=api_key or self.api_key,
175
+ webhook_key=webhook_key or self.webhook_key,
164
176
  base_url=base_url or self.base_url,
165
177
  timeout=self.timeout if isinstance(timeout, NotGiven) else timeout,
166
178
  http_client=http_client,
@@ -212,16 +224,19 @@ class AsyncChunkr(AsyncAPIClient):
212
224
  tasks: tasks.AsyncTasksResource
213
225
  files: files.AsyncFilesResource
214
226
  health: health.AsyncHealthResource
227
+ webhooks: webhooks.AsyncWebhooksResource
215
228
  with_raw_response: AsyncChunkrWithRawResponse
216
229
  with_streaming_response: AsyncChunkrWithStreamedResponse
217
230
 
218
231
  # client options
219
232
  api_key: str
233
+ webhook_key: str | None
220
234
 
221
235
  def __init__(
222
236
  self,
223
237
  *,
224
238
  api_key: str | None = None,
239
+ webhook_key: str | None = None,
225
240
  base_url: str | httpx.URL | None = None,
226
241
  timeout: Union[float, Timeout, None, NotGiven] = NOT_GIVEN,
227
242
  max_retries: int = DEFAULT_MAX_RETRIES,
@@ -243,7 +258,9 @@ class AsyncChunkr(AsyncAPIClient):
243
258
  ) -> None:
244
259
  """Construct a new async AsyncChunkr client instance.
245
260
 
246
- This automatically infers the `api_key` argument from the `CHUNKR_API_KEY` environment variable if it is not provided.
261
+ This automatically infers the following arguments from their corresponding environment variables if they are not provided:
262
+ - `api_key` from `CHUNKR_API_KEY`
263
+ - `webhook_key` from `CHUNKR_WEBHOOK_KEY`
247
264
  """
248
265
  if api_key is None:
249
266
  api_key = os.environ.get("CHUNKR_API_KEY")
@@ -253,6 +270,10 @@ class AsyncChunkr(AsyncAPIClient):
253
270
  )
254
271
  self.api_key = api_key
255
272
 
273
+ if webhook_key is None:
274
+ webhook_key = os.environ.get("CHUNKR_WEBHOOK_KEY")
275
+ self.webhook_key = webhook_key
276
+
256
277
  if base_url is None:
257
278
  base_url = os.environ.get("CHUNKR_BASE_URL")
258
279
  if base_url is None:
@@ -274,6 +295,7 @@ class AsyncChunkr(AsyncAPIClient):
274
295
  self.tasks = tasks.AsyncTasksResource(self)
275
296
  self.files = files.AsyncFilesResource(self)
276
297
  self.health = health.AsyncHealthResource(self)
298
+ self.webhooks = webhooks.AsyncWebhooksResource(self)
277
299
  self.with_raw_response = AsyncChunkrWithRawResponse(self)
278
300
  self.with_streaming_response = AsyncChunkrWithStreamedResponse(self)
279
301
 
@@ -301,6 +323,7 @@ class AsyncChunkr(AsyncAPIClient):
301
323
  self,
302
324
  *,
303
325
  api_key: str | None = None,
326
+ webhook_key: str | None = None,
304
327
  base_url: str | httpx.URL | None = None,
305
328
  timeout: float | Timeout | None | NotGiven = NOT_GIVEN,
306
329
  http_client: httpx.AsyncClient | None = None,
@@ -335,6 +358,7 @@ class AsyncChunkr(AsyncAPIClient):
335
358
  http_client = http_client or self._client
336
359
  return self.__class__(
337
360
  api_key=api_key or self.api_key,
361
+ webhook_key=webhook_key or self.webhook_key,
338
362
  base_url=base_url or self.base_url,
339
363
  timeout=self.timeout if isinstance(timeout, NotGiven) else timeout,
340
364
  http_client=http_client,
@@ -387,6 +411,7 @@ class ChunkrWithRawResponse:
387
411
  self.tasks = tasks.TasksResourceWithRawResponse(client.tasks)
388
412
  self.files = files.FilesResourceWithRawResponse(client.files)
389
413
  self.health = health.HealthResourceWithRawResponse(client.health)
414
+ self.webhooks = webhooks.WebhooksResourceWithRawResponse(client.webhooks)
390
415
 
391
416
 
392
417
  class AsyncChunkrWithRawResponse:
@@ -394,6 +419,7 @@ class AsyncChunkrWithRawResponse:
394
419
  self.tasks = tasks.AsyncTasksResourceWithRawResponse(client.tasks)
395
420
  self.files = files.AsyncFilesResourceWithRawResponse(client.files)
396
421
  self.health = health.AsyncHealthResourceWithRawResponse(client.health)
422
+ self.webhooks = webhooks.AsyncWebhooksResourceWithRawResponse(client.webhooks)
397
423
 
398
424
 
399
425
  class ChunkrWithStreamedResponse:
@@ -401,6 +427,7 @@ class ChunkrWithStreamedResponse:
401
427
  self.tasks = tasks.TasksResourceWithStreamingResponse(client.tasks)
402
428
  self.files = files.FilesResourceWithStreamingResponse(client.files)
403
429
  self.health = health.HealthResourceWithStreamingResponse(client.health)
430
+ self.webhooks = webhooks.WebhooksResourceWithStreamingResponse(client.webhooks)
404
431
 
405
432
 
406
433
  class AsyncChunkrWithStreamedResponse:
@@ -408,6 +435,7 @@ class AsyncChunkrWithStreamedResponse:
408
435
  self.tasks = tasks.AsyncTasksResourceWithStreamingResponse(client.tasks)
409
436
  self.files = files.AsyncFilesResourceWithStreamingResponse(client.files)
410
437
  self.health = health.AsyncHealthResourceWithStreamingResponse(client.health)
438
+ self.webhooks = webhooks.AsyncWebhooksResourceWithStreamingResponse(client.webhooks)
411
439
 
412
440
 
413
441
  Client = Chunkr
chunkr_ai/_constants.py CHANGED
@@ -5,10 +5,10 @@ import httpx
5
5
  RAW_RESPONSE_HEADER = "X-Stainless-Raw-Response"
6
6
  OVERRIDE_CAST_TO_HEADER = "____stainless_override_cast_to"
7
7
 
8
- # default timeout is 30 seconds
9
- DEFAULT_TIMEOUT = httpx.Timeout(timeout=30, connect=5.0)
10
- DEFAULT_MAX_RETRIES = 50
8
+ # default timeout is 1 minute
9
+ DEFAULT_TIMEOUT = httpx.Timeout(timeout=60, connect=5.0)
10
+ DEFAULT_MAX_RETRIES = 2
11
11
  DEFAULT_CONNECTION_LIMITS = httpx.Limits(max_connections=100, max_keepalive_connections=20)
12
12
 
13
- INITIAL_RETRY_DELAY = 1.0
14
- MAX_RETRY_DELAY = 10.0
13
+ INITIAL_RETRY_DELAY = 0.5
14
+ MAX_RETRY_DELAY = 8.0
chunkr_ai/_exceptions.py CHANGED
@@ -54,6 +54,10 @@ class APIResponseValidationError(APIError):
54
54
  self.status_code = response.status_code
55
55
 
56
56
 
57
+ class APIWebhookValidationError(APIError):
58
+ pass
59
+
60
+
57
61
  class APIStatusError(APIError):
58
62
  """Raised when an API response has a status code of 4xx or 5xx."""
59
63
 
chunkr_ai/_models.py CHANGED
@@ -304,7 +304,7 @@ class BaseModel(pydantic.BaseModel):
304
304
  exclude_none=exclude_none,
305
305
  )
306
306
 
307
- return cast(dict[str, Any], json_safe(dumped)) if mode == "json" else dumped
307
+ return cast("dict[str, Any]", json_safe(dumped)) if mode == "json" else dumped
308
308
 
309
309
  @override
310
310
  def model_dump_json(
chunkr_ai/_types.py CHANGED
@@ -13,10 +13,21 @@ from typing import (
13
13
  Mapping,
14
14
  TypeVar,
15
15
  Callable,
16
+ Iterator,
16
17
  Optional,
17
18
  Sequence,
18
19
  )
19
- from typing_extensions import Set, Literal, Protocol, TypeAlias, TypedDict, override, runtime_checkable
20
+ from typing_extensions import (
21
+ Set,
22
+ Literal,
23
+ Protocol,
24
+ TypeAlias,
25
+ TypedDict,
26
+ SupportsIndex,
27
+ overload,
28
+ override,
29
+ runtime_checkable,
30
+ )
20
31
 
21
32
  import httpx
22
33
  import pydantic
@@ -217,3 +228,26 @@ class _GenericAlias(Protocol):
217
228
  class HttpxSendArgs(TypedDict, total=False):
218
229
  auth: httpx.Auth
219
230
  follow_redirects: bool
231
+
232
+
233
+ _T_co = TypeVar("_T_co", covariant=True)
234
+
235
+
236
+ if TYPE_CHECKING:
237
+ # This works because str.__contains__ does not accept object (either in typeshed or at runtime)
238
+ # https://github.com/hauntsaninja/useful_types/blob/5e9710f3875107d068e7679fd7fec9cfab0eff3b/useful_types/__init__.py#L285
239
+ class SequenceNotStr(Protocol[_T_co]):
240
+ @overload
241
+ def __getitem__(self, index: SupportsIndex, /) -> _T_co: ...
242
+ @overload
243
+ def __getitem__(self, index: slice, /) -> Sequence[_T_co]: ...
244
+ def __contains__(self, value: object, /) -> bool: ...
245
+ def __len__(self) -> int: ...
246
+ def __iter__(self) -> Iterator[_T_co]: ...
247
+ def index(self, value: Any, start: int = 0, stop: int = ..., /) -> int: ...
248
+ def count(self, value: Any, /) -> int: ...
249
+ def __reversed__(self) -> Iterator[_T_co]: ...
250
+ else:
251
+ # just point this to a normal `Sequence` at runtime to avoid having to special case
252
+ # deserializing our custom sequence type
253
+ SequenceNotStr = Sequence
@@ -38,6 +38,7 @@ from ._typing import (
38
38
  extract_type_arg as extract_type_arg,
39
39
  is_iterable_type as is_iterable_type,
40
40
  is_required_type as is_required_type,
41
+ is_sequence_type as is_sequence_type,
41
42
  is_annotated_type as is_annotated_type,
42
43
  is_type_alias_type as is_type_alias_type,
43
44
  strip_annotated_type as strip_annotated_type,
@@ -26,6 +26,11 @@ def is_list_type(typ: type) -> bool:
26
26
  return (get_origin(typ) or typ) == list
27
27
 
28
28
 
29
+ def is_sequence_type(typ: type) -> bool:
30
+ origin = get_origin(typ) or typ
31
+ return origin == typing_extensions.Sequence or origin == typing.Sequence or origin == _c_abc.Sequence
32
+
33
+
29
34
  def is_iterable_type(typ: type) -> bool:
30
35
  """If the given type is `typing.Iterable[T]`"""
31
36
  origin = get_origin(typ) or typ
chunkr_ai/_version.py CHANGED
@@ -1,4 +1,4 @@
1
1
  # File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
2
2
 
3
3
  __title__ = "chunkr_ai"
4
- __version__ = "0.1.0-alpha.6" # x-release-please-version
4
+ __version__ = "0.1.0-alpha.7" # x-release-please-version
@@ -24,6 +24,14 @@ from .health import (
24
24
  HealthResourceWithStreamingResponse,
25
25
  AsyncHealthResourceWithStreamingResponse,
26
26
  )
27
+ from .webhooks import (
28
+ WebhooksResource,
29
+ AsyncWebhooksResource,
30
+ WebhooksResourceWithRawResponse,
31
+ AsyncWebhooksResourceWithRawResponse,
32
+ WebhooksResourceWithStreamingResponse,
33
+ AsyncWebhooksResourceWithStreamingResponse,
34
+ )
27
35
 
28
36
  __all__ = [
29
37
  "TasksResource",
@@ -44,4 +52,10 @@ __all__ = [
44
52
  "AsyncHealthResourceWithRawResponse",
45
53
  "HealthResourceWithStreamingResponse",
46
54
  "AsyncHealthResourceWithStreamingResponse",
55
+ "WebhooksResource",
56
+ "AsyncWebhooksResource",
57
+ "WebhooksResourceWithRawResponse",
58
+ "AsyncWebhooksResourceWithRawResponse",
59
+ "WebhooksResourceWithStreamingResponse",
60
+ "AsyncWebhooksResourceWithStreamingResponse",
47
61
  ]
@@ -2,7 +2,7 @@
2
2
 
3
3
  from __future__ import annotations
4
4
 
5
- from typing import Union, Mapping, cast
5
+ from typing import Union, Mapping, Optional, cast
6
6
  from datetime import datetime
7
7
  from typing_extensions import Literal
8
8
 
@@ -52,7 +52,7 @@ class FilesResource(SyncAPIResource):
52
52
  self,
53
53
  *,
54
54
  file: FileTypes,
55
- file_metadata: str,
55
+ file_metadata: Optional[str] | NotGiven = NOT_GIVEN,
56
56
  # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
57
57
  # The extra values given here take precedence over values defined on the client or passed to this method.
58
58
  extra_headers: Headers | None = None,
@@ -360,7 +360,7 @@ class AsyncFilesResource(AsyncAPIResource):
360
360
  self,
361
361
  *,
362
362
  file: FileTypes,
363
- file_metadata: str,
363
+ file_metadata: Optional[str] | NotGiven = NOT_GIVEN,
364
364
  # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
365
365
  # The extra values given here take precedence over values defined on the client or passed to this method.
366
366
  extra_headers: Headers | None = None,
@@ -16,8 +16,22 @@ from .tasks import (
16
16
  TasksResourceWithStreamingResponse,
17
17
  AsyncTasksResourceWithStreamingResponse,
18
18
  )
19
+ from .extract import (
20
+ ExtractResource,
21
+ AsyncExtractResource,
22
+ ExtractResourceWithRawResponse,
23
+ AsyncExtractResourceWithRawResponse,
24
+ ExtractResourceWithStreamingResponse,
25
+ AsyncExtractResourceWithStreamingResponse,
26
+ )
19
27
 
20
28
  __all__ = [
29
+ "ExtractResource",
30
+ "AsyncExtractResource",
31
+ "ExtractResourceWithRawResponse",
32
+ "AsyncExtractResourceWithRawResponse",
33
+ "ExtractResourceWithStreamingResponse",
34
+ "AsyncExtractResourceWithStreamingResponse",
21
35
  "ParseResource",
22
36
  "AsyncParseResource",
23
37
  "ParseResourceWithRawResponse",