pulse-python-sdk 0.0.52__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (72) hide show
  1. pulse/__init__.py +42 -0
  2. pulse/client.py +666 -0
  3. pulse/core/__init__.py +34 -0
  4. pulse/core/api_error.py +23 -0
  5. pulse/core/client_wrapper.py +89 -0
  6. pulse/core/datetime_utils.py +28 -0
  7. pulse/core/file.py +67 -0
  8. pulse/core/force_multipart.py +18 -0
  9. pulse/core/http_client.py +663 -0
  10. pulse/core/http_response.py +55 -0
  11. pulse/core/http_sse/__init__.py +42 -0
  12. pulse/core/http_sse/_api.py +112 -0
  13. pulse/core/http_sse/_decoders.py +61 -0
  14. pulse/core/http_sse/_exceptions.py +7 -0
  15. pulse/core/http_sse/_models.py +17 -0
  16. pulse/core/jsonable_encoder.py +100 -0
  17. pulse/core/pydantic_utilities.py +260 -0
  18. pulse/core/query_encoder.py +58 -0
  19. pulse/core/remove_none_from_dict.py +11 -0
  20. pulse/core/request_options.py +35 -0
  21. pulse/core/serialization.py +276 -0
  22. pulse/core/unchecked_base_model.py +396 -0
  23. pulse/environment.py +7 -0
  24. pulse/errors/__init__.py +4 -0
  25. pulse/errors/bad_request_error.py +10 -0
  26. pulse/errors/forbidden_error.py +10 -0
  27. pulse/errors/internal_server_error.py +10 -0
  28. pulse/errors/not_found_error.py +10 -0
  29. pulse/errors/too_many_requests_error.py +10 -0
  30. pulse/errors/unauthorized_error.py +10 -0
  31. pulse/jobs/__init__.py +4 -0
  32. pulse/jobs/client.py +191 -0
  33. pulse/jobs/raw_client.py +408 -0
  34. pulse/py.typed +0 -0
  35. pulse/raw_client.py +661 -0
  36. pulse/types/__init__.py +4 -0
  37. pulse/types/extract_async_input.py +5 -0
  38. pulse/types/extract_async_response.py +43 -0
  39. pulse/types/extract_async_submission_response_status.py +7 -0
  40. pulse/types/extract_input.py +5 -0
  41. pulse/types/extract_json_input.py +116 -0
  42. pulse/types/extract_json_input_experimental_schema.py +5 -0
  43. pulse/types/extract_json_input_schema.py +5 -0
  44. pulse/types/extract_json_input_storage.py +36 -0
  45. pulse/types/extract_json_input_structured_output.py +38 -0
  46. pulse/types/extract_multipart_input.py +111 -0
  47. pulse/types/extract_multipart_input_experimental_schema.py +5 -0
  48. pulse/types/extract_multipart_input_schema.py +5 -0
  49. pulse/types/extract_multipart_input_storage.py +36 -0
  50. pulse/types/extract_multipart_input_structured_output.py +38 -0
  51. pulse/types/extract_options.py +111 -0
  52. pulse/types/extract_options_experimental_schema.py +5 -0
  53. pulse/types/extract_options_schema.py +5 -0
  54. pulse/types/extract_options_storage.py +36 -0
  55. pulse/types/extract_options_structured_output.py +38 -0
  56. pulse/types/extract_response.py +47 -0
  57. pulse/types/extract_source_multipart_one.py +27 -0
  58. pulse/types/extract_source_multipart_zero.py +27 -0
  59. pulse/types/job_cancellation_response.py +32 -0
  60. pulse/types/job_status.py +5 -0
  61. pulse/types/job_status_response.py +50 -0
  62. pulse/types/json_source.py +29 -0
  63. pulse/types/multipart_source.py +8 -0
  64. pulse/version.py +3 -0
  65. pulse/webhooks/__init__.py +4 -0
  66. pulse/webhooks/client.py +104 -0
  67. pulse/webhooks/raw_client.py +139 -0
  68. pulse/webhooks/types/__init__.py +4 -0
  69. pulse/webhooks/types/create_webhook_link_response.py +23 -0
  70. pulse_python_sdk-0.0.52.dist-info/METADATA +197 -0
  71. pulse_python_sdk-0.0.52.dist-info/RECORD +72 -0
  72. pulse_python_sdk-0.0.52.dist-info/WHEEL +4 -0
pulse/raw_client.py ADDED
@@ -0,0 +1,661 @@
1
+ # This file was auto-generated by Fern from our API Definition.
2
+
3
+ import typing
4
+ from json.decoder import JSONDecodeError
5
+
6
+ from .core.api_error import ApiError
7
+ from .core.client_wrapper import AsyncClientWrapper, SyncClientWrapper
8
+ from .core.http_response import AsyncHttpResponse, HttpResponse
9
+ from .core.request_options import RequestOptions
10
+ from .core.serialization import convert_and_respect_annotation_metadata
11
+ from .core.unchecked_base_model import construct_type
12
+ from .errors.bad_request_error import BadRequestError
13
+ from .errors.too_many_requests_error import TooManyRequestsError
14
+ from .errors.unauthorized_error import UnauthorizedError
15
+ from .types.extract_async_response import ExtractAsyncResponse
16
+ from .types.extract_json_input_experimental_schema import ExtractJsonInputExperimentalSchema
17
+ from .types.extract_json_input_schema import ExtractJsonInputSchema
18
+ from .types.extract_json_input_storage import ExtractJsonInputStorage
19
+ from .types.extract_json_input_structured_output import ExtractJsonInputStructuredOutput
20
+ from .types.extract_response import ExtractResponse
21
+
22
+ # this is used as the default value for optional parameters
23
+ OMIT = typing.cast(typing.Any, ...)
24
+
25
+
26
+ class RawPulse:
27
+ def __init__(self, *, client_wrapper: SyncClientWrapper):
28
+ self._client_wrapper = client_wrapper
29
+
30
+ def extract(
31
+ self,
32
+ *,
33
+ file_url: str,
34
+ structured_output: typing.Optional[ExtractJsonInputStructuredOutput] = OMIT,
35
+ schema: typing.Optional[ExtractJsonInputSchema] = OMIT,
36
+ experimental_schema: typing.Optional[ExtractJsonInputExperimentalSchema] = OMIT,
37
+ schema_prompt: typing.Optional[str] = OMIT,
38
+ custom_prompt: typing.Optional[str] = OMIT,
39
+ chunking: typing.Optional[str] = OMIT,
40
+ chunk_size: typing.Optional[int] = OMIT,
41
+ pages: typing.Optional[str] = OMIT,
42
+ extract_figure: typing.Optional[bool] = OMIT,
43
+ figure_description: typing.Optional[bool] = OMIT,
44
+ return_html: typing.Optional[bool] = OMIT,
45
+ thinking: typing.Optional[bool] = OMIT,
46
+ storage: typing.Optional[ExtractJsonInputStorage] = OMIT,
47
+ request_options: typing.Optional[RequestOptions] = None,
48
+ ) -> HttpResponse[ExtractResponse]:
49
+ """
50
+ The primary endpoint for the Pulse API. Parses uploaded documents or remote
51
+ file URLs and returns rich markdown content with optional structured data
52
+ extraction based on user-provided schemas and extraction options.
53
+
54
+ Parameters
55
+ ----------
56
+ file_url : str
57
+ Public or pre-signed URL that Pulse will download and extract.
58
+
59
+ structured_output : typing.Optional[ExtractJsonInputStructuredOutput]
60
+ Recommended method for schema-guided extraction. Contains the schema and optional prompt in a single object.
61
+
62
+ schema : typing.Optional[ExtractJsonInputSchema]
63
+ (Deprecated) JSON schema describing structured data to extract. Use structuredOutput instead. Accepts either a JSON object or a stringified JSON representation.
64
+
65
+ experimental_schema : typing.Optional[ExtractJsonInputExperimentalSchema]
66
+ (Deprecated) Experimental schema definition used for feature flagged behaviour. Accepts either a JSON object or a stringified JSON representation.
67
+
68
+ schema_prompt : typing.Optional[str]
69
+ (Deprecated) Natural language prompt for schema-guided extraction. Use structuredOutput.schemaPrompt instead.
70
+
71
+ custom_prompt : typing.Optional[str]
72
+ (Deprecated) Custom instructions that augment the default extraction behaviour.
73
+
74
+ chunking : typing.Optional[str]
75
+ Comma-separated list of chunking strategies to apply (for example `semantic,header,page,recursive`).
76
+
77
+ chunk_size : typing.Optional[int]
78
+ Override for maximum characters per chunk when chunking is enabled.
79
+
80
+ pages : typing.Optional[str]
81
+ Page range filter supporting segments such as `1-2` or mixed ranges like `1-2,5`.
82
+
83
+ extract_figure : typing.Optional[bool]
84
+ Toggle to enable figure extraction in results.
85
+
86
+ figure_description : typing.Optional[bool]
87
+ Toggle to generate descriptive captions for extracted figures.
88
+
89
+ return_html : typing.Optional[bool]
90
+ Whether to include HTML representation alongside markdown in the response.
91
+
92
+ thinking : typing.Optional[bool]
93
+ (Deprecated) Enables expanded rationale output for debugging.
94
+
95
+ storage : typing.Optional[ExtractJsonInputStorage]
96
+ Options for persisting extraction artifacts. When enabled (default), artifacts are saved to storage and a database record is created.
97
+
98
+ request_options : typing.Optional[RequestOptions]
99
+ Request-specific configuration.
100
+
101
+ Returns
102
+ -------
103
+ HttpResponse[ExtractResponse]
104
+ Synchronous extraction result
105
+ """
106
+ _response = self._client_wrapper.httpx_client.request(
107
+ "extract",
108
+ method="POST",
109
+ json={
110
+ "fileUrl": file_url,
111
+ "structuredOutput": convert_and_respect_annotation_metadata(
112
+ object_=structured_output, annotation=ExtractJsonInputStructuredOutput, direction="write"
113
+ ),
114
+ "schema": convert_and_respect_annotation_metadata(
115
+ object_=schema, annotation=ExtractJsonInputSchema, direction="write"
116
+ ),
117
+ "experimentalSchema": convert_and_respect_annotation_metadata(
118
+ object_=experimental_schema, annotation=ExtractJsonInputExperimentalSchema, direction="write"
119
+ ),
120
+ "schemaPrompt": schema_prompt,
121
+ "customPrompt": custom_prompt,
122
+ "chunking": chunking,
123
+ "chunkSize": chunk_size,
124
+ "pages": pages,
125
+ "extractFigure": extract_figure,
126
+ "figureDescription": figure_description,
127
+ "returnHtml": return_html,
128
+ "thinking": thinking,
129
+ "storage": convert_and_respect_annotation_metadata(
130
+ object_=storage, annotation=ExtractJsonInputStorage, direction="write"
131
+ ),
132
+ },
133
+ headers={
134
+ "content-type": "application/json",
135
+ },
136
+ request_options=request_options,
137
+ omit=OMIT,
138
+ )
139
+ try:
140
+ if 200 <= _response.status_code < 300:
141
+ _data = typing.cast(
142
+ ExtractResponse,
143
+ construct_type(
144
+ type_=ExtractResponse, # type: ignore
145
+ object_=_response.json(),
146
+ ),
147
+ )
148
+ return HttpResponse(response=_response, data=_data)
149
+ if _response.status_code == 400:
150
+ raise BadRequestError(
151
+ headers=dict(_response.headers),
152
+ body=typing.cast(
153
+ typing.Any,
154
+ construct_type(
155
+ type_=typing.Any, # type: ignore
156
+ object_=_response.json(),
157
+ ),
158
+ ),
159
+ )
160
+ if _response.status_code == 401:
161
+ raise UnauthorizedError(
162
+ headers=dict(_response.headers),
163
+ body=typing.cast(
164
+ typing.Any,
165
+ construct_type(
166
+ type_=typing.Any, # type: ignore
167
+ object_=_response.json(),
168
+ ),
169
+ ),
170
+ )
171
+ if _response.status_code == 429:
172
+ raise TooManyRequestsError(
173
+ headers=dict(_response.headers),
174
+ body=typing.cast(
175
+ typing.Any,
176
+ construct_type(
177
+ type_=typing.Any, # type: ignore
178
+ object_=_response.json(),
179
+ ),
180
+ ),
181
+ )
182
+ _response_json = _response.json()
183
+ except JSONDecodeError:
184
+ raise ApiError(status_code=_response.status_code, headers=dict(_response.headers), body=_response.text)
185
+ raise ApiError(status_code=_response.status_code, headers=dict(_response.headers), body=_response_json)
186
+
187
+ def extract_async(
188
+ self,
189
+ *,
190
+ file_url: str,
191
+ structured_output: typing.Optional[ExtractJsonInputStructuredOutput] = OMIT,
192
+ schema: typing.Optional[ExtractJsonInputSchema] = OMIT,
193
+ experimental_schema: typing.Optional[ExtractJsonInputExperimentalSchema] = OMIT,
194
+ schema_prompt: typing.Optional[str] = OMIT,
195
+ custom_prompt: typing.Optional[str] = OMIT,
196
+ chunking: typing.Optional[str] = OMIT,
197
+ chunk_size: typing.Optional[int] = OMIT,
198
+ pages: typing.Optional[str] = OMIT,
199
+ extract_figure: typing.Optional[bool] = OMIT,
200
+ figure_description: typing.Optional[bool] = OMIT,
201
+ return_html: typing.Optional[bool] = OMIT,
202
+ thinking: typing.Optional[bool] = OMIT,
203
+ storage: typing.Optional[ExtractJsonInputStorage] = OMIT,
204
+ request_options: typing.Optional[RequestOptions] = None,
205
+ ) -> HttpResponse[ExtractAsyncResponse]:
206
+ """
207
+ Starts an asynchronous extraction job. The request mirrors the
208
+ synchronous options but returns immediately with a job identifier that
209
+ clients can poll for completion status.
210
+
211
+ Parameters
212
+ ----------
213
+ file_url : str
214
+ Public or pre-signed URL that Pulse will download and extract.
215
+
216
+ structured_output : typing.Optional[ExtractJsonInputStructuredOutput]
217
+ Recommended method for schema-guided extraction. Contains the schema and optional prompt in a single object.
218
+
219
+ schema : typing.Optional[ExtractJsonInputSchema]
220
+ (Deprecated) JSON schema describing structured data to extract. Use structuredOutput instead. Accepts either a JSON object or a stringified JSON representation.
221
+
222
+ experimental_schema : typing.Optional[ExtractJsonInputExperimentalSchema]
223
+ (Deprecated) Experimental schema definition used for feature flagged behaviour. Accepts either a JSON object or a stringified JSON representation.
224
+
225
+ schema_prompt : typing.Optional[str]
226
+ (Deprecated) Natural language prompt for schema-guided extraction. Use structuredOutput.schemaPrompt instead.
227
+
228
+ custom_prompt : typing.Optional[str]
229
+ (Deprecated) Custom instructions that augment the default extraction behaviour.
230
+
231
+ chunking : typing.Optional[str]
232
+ Comma-separated list of chunking strategies to apply (for example `semantic,header,page,recursive`).
233
+
234
+ chunk_size : typing.Optional[int]
235
+ Override for maximum characters per chunk when chunking is enabled.
236
+
237
+ pages : typing.Optional[str]
238
+ Page range filter supporting segments such as `1-2` or mixed ranges like `1-2,5`.
239
+
240
+ extract_figure : typing.Optional[bool]
241
+ Toggle to enable figure extraction in results.
242
+
243
+ figure_description : typing.Optional[bool]
244
+ Toggle to generate descriptive captions for extracted figures.
245
+
246
+ return_html : typing.Optional[bool]
247
+ Whether to include HTML representation alongside markdown in the response.
248
+
249
+ thinking : typing.Optional[bool]
250
+ (Deprecated) Enables expanded rationale output for debugging.
251
+
252
+ storage : typing.Optional[ExtractJsonInputStorage]
253
+ Options for persisting extraction artifacts. When enabled (default), artifacts are saved to storage and a database record is created.
254
+
255
+ request_options : typing.Optional[RequestOptions]
256
+ Request-specific configuration.
257
+
258
+ Returns
259
+ -------
260
+ HttpResponse[ExtractAsyncResponse]
261
+ Asynchronous extraction job accepted
262
+ """
263
+ _response = self._client_wrapper.httpx_client.request(
264
+ "extract_async",
265
+ method="POST",
266
+ json={
267
+ "fileUrl": file_url,
268
+ "structuredOutput": convert_and_respect_annotation_metadata(
269
+ object_=structured_output, annotation=ExtractJsonInputStructuredOutput, direction="write"
270
+ ),
271
+ "schema": convert_and_respect_annotation_metadata(
272
+ object_=schema, annotation=ExtractJsonInputSchema, direction="write"
273
+ ),
274
+ "experimentalSchema": convert_and_respect_annotation_metadata(
275
+ object_=experimental_schema, annotation=ExtractJsonInputExperimentalSchema, direction="write"
276
+ ),
277
+ "schemaPrompt": schema_prompt,
278
+ "customPrompt": custom_prompt,
279
+ "chunking": chunking,
280
+ "chunkSize": chunk_size,
281
+ "pages": pages,
282
+ "extractFigure": extract_figure,
283
+ "figureDescription": figure_description,
284
+ "returnHtml": return_html,
285
+ "thinking": thinking,
286
+ "storage": convert_and_respect_annotation_metadata(
287
+ object_=storage, annotation=ExtractJsonInputStorage, direction="write"
288
+ ),
289
+ },
290
+ headers={
291
+ "content-type": "application/json",
292
+ },
293
+ request_options=request_options,
294
+ omit=OMIT,
295
+ )
296
+ try:
297
+ if 200 <= _response.status_code < 300:
298
+ _data = typing.cast(
299
+ ExtractAsyncResponse,
300
+ construct_type(
301
+ type_=ExtractAsyncResponse, # type: ignore
302
+ object_=_response.json(),
303
+ ),
304
+ )
305
+ return HttpResponse(response=_response, data=_data)
306
+ if _response.status_code == 400:
307
+ raise BadRequestError(
308
+ headers=dict(_response.headers),
309
+ body=typing.cast(
310
+ typing.Any,
311
+ construct_type(
312
+ type_=typing.Any, # type: ignore
313
+ object_=_response.json(),
314
+ ),
315
+ ),
316
+ )
317
+ if _response.status_code == 401:
318
+ raise UnauthorizedError(
319
+ headers=dict(_response.headers),
320
+ body=typing.cast(
321
+ typing.Any,
322
+ construct_type(
323
+ type_=typing.Any, # type: ignore
324
+ object_=_response.json(),
325
+ ),
326
+ ),
327
+ )
328
+ if _response.status_code == 429:
329
+ raise TooManyRequestsError(
330
+ headers=dict(_response.headers),
331
+ body=typing.cast(
332
+ typing.Any,
333
+ construct_type(
334
+ type_=typing.Any, # type: ignore
335
+ object_=_response.json(),
336
+ ),
337
+ ),
338
+ )
339
+ _response_json = _response.json()
340
+ except JSONDecodeError:
341
+ raise ApiError(status_code=_response.status_code, headers=dict(_response.headers), body=_response.text)
342
+ raise ApiError(status_code=_response.status_code, headers=dict(_response.headers), body=_response_json)
343
+
344
+
345
+ class AsyncRawPulse:
346
+ def __init__(self, *, client_wrapper: AsyncClientWrapper):
347
+ self._client_wrapper = client_wrapper
348
+
349
+ async def extract(
350
+ self,
351
+ *,
352
+ file_url: str,
353
+ structured_output: typing.Optional[ExtractJsonInputStructuredOutput] = OMIT,
354
+ schema: typing.Optional[ExtractJsonInputSchema] = OMIT,
355
+ experimental_schema: typing.Optional[ExtractJsonInputExperimentalSchema] = OMIT,
356
+ schema_prompt: typing.Optional[str] = OMIT,
357
+ custom_prompt: typing.Optional[str] = OMIT,
358
+ chunking: typing.Optional[str] = OMIT,
359
+ chunk_size: typing.Optional[int] = OMIT,
360
+ pages: typing.Optional[str] = OMIT,
361
+ extract_figure: typing.Optional[bool] = OMIT,
362
+ figure_description: typing.Optional[bool] = OMIT,
363
+ return_html: typing.Optional[bool] = OMIT,
364
+ thinking: typing.Optional[bool] = OMIT,
365
+ storage: typing.Optional[ExtractJsonInputStorage] = OMIT,
366
+ request_options: typing.Optional[RequestOptions] = None,
367
+ ) -> AsyncHttpResponse[ExtractResponse]:
368
+ """
369
+ The primary endpoint for the Pulse API. Parses uploaded documents or remote
370
+ file URLs and returns rich markdown content with optional structured data
371
+ extraction based on user-provided schemas and extraction options.
372
+
373
+ Parameters
374
+ ----------
375
+ file_url : str
376
+ Public or pre-signed URL that Pulse will download and extract.
377
+
378
+ structured_output : typing.Optional[ExtractJsonInputStructuredOutput]
379
+ Recommended method for schema-guided extraction. Contains the schema and optional prompt in a single object.
380
+
381
+ schema : typing.Optional[ExtractJsonInputSchema]
382
+ (Deprecated) JSON schema describing structured data to extract. Use structuredOutput instead. Accepts either a JSON object or a stringified JSON representation.
383
+
384
+ experimental_schema : typing.Optional[ExtractJsonInputExperimentalSchema]
385
+ (Deprecated) Experimental schema definition used for feature flagged behaviour. Accepts either a JSON object or a stringified JSON representation.
386
+
387
+ schema_prompt : typing.Optional[str]
388
+ (Deprecated) Natural language prompt for schema-guided extraction. Use structuredOutput.schemaPrompt instead.
389
+
390
+ custom_prompt : typing.Optional[str]
391
+ (Deprecated) Custom instructions that augment the default extraction behaviour.
392
+
393
+ chunking : typing.Optional[str]
394
+ Comma-separated list of chunking strategies to apply (for example `semantic,header,page,recursive`).
395
+
396
+ chunk_size : typing.Optional[int]
397
+ Override for maximum characters per chunk when chunking is enabled.
398
+
399
+ pages : typing.Optional[str]
400
+ Page range filter supporting segments such as `1-2` or mixed ranges like `1-2,5`.
401
+
402
+ extract_figure : typing.Optional[bool]
403
+ Toggle to enable figure extraction in results.
404
+
405
+ figure_description : typing.Optional[bool]
406
+ Toggle to generate descriptive captions for extracted figures.
407
+
408
+ return_html : typing.Optional[bool]
409
+ Whether to include HTML representation alongside markdown in the response.
410
+
411
+ thinking : typing.Optional[bool]
412
+ (Deprecated) Enables expanded rationale output for debugging.
413
+
414
+ storage : typing.Optional[ExtractJsonInputStorage]
415
+ Options for persisting extraction artifacts. When enabled (default), artifacts are saved to storage and a database record is created.
416
+
417
+ request_options : typing.Optional[RequestOptions]
418
+ Request-specific configuration.
419
+
420
+ Returns
421
+ -------
422
+ AsyncHttpResponse[ExtractResponse]
423
+ Synchronous extraction result
424
+ """
425
+ _response = await self._client_wrapper.httpx_client.request(
426
+ "extract",
427
+ method="POST",
428
+ json={
429
+ "fileUrl": file_url,
430
+ "structuredOutput": convert_and_respect_annotation_metadata(
431
+ object_=structured_output, annotation=ExtractJsonInputStructuredOutput, direction="write"
432
+ ),
433
+ "schema": convert_and_respect_annotation_metadata(
434
+ object_=schema, annotation=ExtractJsonInputSchema, direction="write"
435
+ ),
436
+ "experimentalSchema": convert_and_respect_annotation_metadata(
437
+ object_=experimental_schema, annotation=ExtractJsonInputExperimentalSchema, direction="write"
438
+ ),
439
+ "schemaPrompt": schema_prompt,
440
+ "customPrompt": custom_prompt,
441
+ "chunking": chunking,
442
+ "chunkSize": chunk_size,
443
+ "pages": pages,
444
+ "extractFigure": extract_figure,
445
+ "figureDescription": figure_description,
446
+ "returnHtml": return_html,
447
+ "thinking": thinking,
448
+ "storage": convert_and_respect_annotation_metadata(
449
+ object_=storage, annotation=ExtractJsonInputStorage, direction="write"
450
+ ),
451
+ },
452
+ headers={
453
+ "content-type": "application/json",
454
+ },
455
+ request_options=request_options,
456
+ omit=OMIT,
457
+ )
458
+ try:
459
+ if 200 <= _response.status_code < 300:
460
+ _data = typing.cast(
461
+ ExtractResponse,
462
+ construct_type(
463
+ type_=ExtractResponse, # type: ignore
464
+ object_=_response.json(),
465
+ ),
466
+ )
467
+ return AsyncHttpResponse(response=_response, data=_data)
468
+ if _response.status_code == 400:
469
+ raise BadRequestError(
470
+ headers=dict(_response.headers),
471
+ body=typing.cast(
472
+ typing.Any,
473
+ construct_type(
474
+ type_=typing.Any, # type: ignore
475
+ object_=_response.json(),
476
+ ),
477
+ ),
478
+ )
479
+ if _response.status_code == 401:
480
+ raise UnauthorizedError(
481
+ headers=dict(_response.headers),
482
+ body=typing.cast(
483
+ typing.Any,
484
+ construct_type(
485
+ type_=typing.Any, # type: ignore
486
+ object_=_response.json(),
487
+ ),
488
+ ),
489
+ )
490
+ if _response.status_code == 429:
491
+ raise TooManyRequestsError(
492
+ headers=dict(_response.headers),
493
+ body=typing.cast(
494
+ typing.Any,
495
+ construct_type(
496
+ type_=typing.Any, # type: ignore
497
+ object_=_response.json(),
498
+ ),
499
+ ),
500
+ )
501
+ _response_json = _response.json()
502
+ except JSONDecodeError:
503
+ raise ApiError(status_code=_response.status_code, headers=dict(_response.headers), body=_response.text)
504
+ raise ApiError(status_code=_response.status_code, headers=dict(_response.headers), body=_response_json)
505
+
506
+ async def extract_async(
507
+ self,
508
+ *,
509
+ file_url: str,
510
+ structured_output: typing.Optional[ExtractJsonInputStructuredOutput] = OMIT,
511
+ schema: typing.Optional[ExtractJsonInputSchema] = OMIT,
512
+ experimental_schema: typing.Optional[ExtractJsonInputExperimentalSchema] = OMIT,
513
+ schema_prompt: typing.Optional[str] = OMIT,
514
+ custom_prompt: typing.Optional[str] = OMIT,
515
+ chunking: typing.Optional[str] = OMIT,
516
+ chunk_size: typing.Optional[int] = OMIT,
517
+ pages: typing.Optional[str] = OMIT,
518
+ extract_figure: typing.Optional[bool] = OMIT,
519
+ figure_description: typing.Optional[bool] = OMIT,
520
+ return_html: typing.Optional[bool] = OMIT,
521
+ thinking: typing.Optional[bool] = OMIT,
522
+ storage: typing.Optional[ExtractJsonInputStorage] = OMIT,
523
+ request_options: typing.Optional[RequestOptions] = None,
524
+ ) -> AsyncHttpResponse[ExtractAsyncResponse]:
525
+ """
526
+ Starts an asynchronous extraction job. The request mirrors the
527
+ synchronous options but returns immediately with a job identifier that
528
+ clients can poll for completion status.
529
+
530
+ Parameters
531
+ ----------
532
+ file_url : str
533
+ Public or pre-signed URL that Pulse will download and extract.
534
+
535
+ structured_output : typing.Optional[ExtractJsonInputStructuredOutput]
536
+ Recommended method for schema-guided extraction. Contains the schema and optional prompt in a single object.
537
+
538
+ schema : typing.Optional[ExtractJsonInputSchema]
539
+ (Deprecated) JSON schema describing structured data to extract. Use structuredOutput instead. Accepts either a JSON object or a stringified JSON representation.
540
+
541
+ experimental_schema : typing.Optional[ExtractJsonInputExperimentalSchema]
542
+ (Deprecated) Experimental schema definition used for feature flagged behaviour. Accepts either a JSON object or a stringified JSON representation.
543
+
544
+ schema_prompt : typing.Optional[str]
545
+ (Deprecated) Natural language prompt for schema-guided extraction. Use structuredOutput.schemaPrompt instead.
546
+
547
+ custom_prompt : typing.Optional[str]
548
+ (Deprecated) Custom instructions that augment the default extraction behaviour.
549
+
550
+ chunking : typing.Optional[str]
551
+ Comma-separated list of chunking strategies to apply (for example `semantic,header,page,recursive`).
552
+
553
+ chunk_size : typing.Optional[int]
554
+ Override for maximum characters per chunk when chunking is enabled.
555
+
556
+ pages : typing.Optional[str]
557
+ Page range filter supporting segments such as `1-2` or mixed ranges like `1-2,5`.
558
+
559
+ extract_figure : typing.Optional[bool]
560
+ Toggle to enable figure extraction in results.
561
+
562
+ figure_description : typing.Optional[bool]
563
+ Toggle to generate descriptive captions for extracted figures.
564
+
565
+ return_html : typing.Optional[bool]
566
+ Whether to include HTML representation alongside markdown in the response.
567
+
568
+ thinking : typing.Optional[bool]
569
+ (Deprecated) Enables expanded rationale output for debugging.
570
+
571
+ storage : typing.Optional[ExtractJsonInputStorage]
572
+ Options for persisting extraction artifacts. When enabled (default), artifacts are saved to storage and a database record is created.
573
+
574
+ request_options : typing.Optional[RequestOptions]
575
+ Request-specific configuration.
576
+
577
+ Returns
578
+ -------
579
+ AsyncHttpResponse[ExtractAsyncResponse]
580
+ Asynchronous extraction job accepted
581
+ """
582
+ _response = await self._client_wrapper.httpx_client.request(
583
+ "extract_async",
584
+ method="POST",
585
+ json={
586
+ "fileUrl": file_url,
587
+ "structuredOutput": convert_and_respect_annotation_metadata(
588
+ object_=structured_output, annotation=ExtractJsonInputStructuredOutput, direction="write"
589
+ ),
590
+ "schema": convert_and_respect_annotation_metadata(
591
+ object_=schema, annotation=ExtractJsonInputSchema, direction="write"
592
+ ),
593
+ "experimentalSchema": convert_and_respect_annotation_metadata(
594
+ object_=experimental_schema, annotation=ExtractJsonInputExperimentalSchema, direction="write"
595
+ ),
596
+ "schemaPrompt": schema_prompt,
597
+ "customPrompt": custom_prompt,
598
+ "chunking": chunking,
599
+ "chunkSize": chunk_size,
600
+ "pages": pages,
601
+ "extractFigure": extract_figure,
602
+ "figureDescription": figure_description,
603
+ "returnHtml": return_html,
604
+ "thinking": thinking,
605
+ "storage": convert_and_respect_annotation_metadata(
606
+ object_=storage, annotation=ExtractJsonInputStorage, direction="write"
607
+ ),
608
+ },
609
+ headers={
610
+ "content-type": "application/json",
611
+ },
612
+ request_options=request_options,
613
+ omit=OMIT,
614
+ )
615
+ try:
616
+ if 200 <= _response.status_code < 300:
617
+ _data = typing.cast(
618
+ ExtractAsyncResponse,
619
+ construct_type(
620
+ type_=ExtractAsyncResponse, # type: ignore
621
+ object_=_response.json(),
622
+ ),
623
+ )
624
+ return AsyncHttpResponse(response=_response, data=_data)
625
+ if _response.status_code == 400:
626
+ raise BadRequestError(
627
+ headers=dict(_response.headers),
628
+ body=typing.cast(
629
+ typing.Any,
630
+ construct_type(
631
+ type_=typing.Any, # type: ignore
632
+ object_=_response.json(),
633
+ ),
634
+ ),
635
+ )
636
+ if _response.status_code == 401:
637
+ raise UnauthorizedError(
638
+ headers=dict(_response.headers),
639
+ body=typing.cast(
640
+ typing.Any,
641
+ construct_type(
642
+ type_=typing.Any, # type: ignore
643
+ object_=_response.json(),
644
+ ),
645
+ ),
646
+ )
647
+ if _response.status_code == 429:
648
+ raise TooManyRequestsError(
649
+ headers=dict(_response.headers),
650
+ body=typing.cast(
651
+ typing.Any,
652
+ construct_type(
653
+ type_=typing.Any, # type: ignore
654
+ object_=_response.json(),
655
+ ),
656
+ ),
657
+ )
658
+ _response_json = _response.json()
659
+ except JSONDecodeError:
660
+ raise ApiError(status_code=_response.status_code, headers=dict(_response.headers), body=_response.text)
661
+ raise ApiError(status_code=_response.status_code, headers=dict(_response.headers), body=_response_json)
@@ -0,0 +1,4 @@
1
+ # This file was auto-generated by Fern from our API Definition.
2
+
3
+ # isort: skip_file
4
+
@@ -0,0 +1,5 @@
1
+ # This file was auto-generated by Fern from our API Definition.
2
+
3
+ import typing
4
+
5
+ ExtractAsyncInput = typing.Any