pulse-python-sdk 0.0.52__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- pulse/__init__.py +42 -0
- pulse/client.py +666 -0
- pulse/core/__init__.py +34 -0
- pulse/core/api_error.py +23 -0
- pulse/core/client_wrapper.py +89 -0
- pulse/core/datetime_utils.py +28 -0
- pulse/core/file.py +67 -0
- pulse/core/force_multipart.py +18 -0
- pulse/core/http_client.py +663 -0
- pulse/core/http_response.py +55 -0
- pulse/core/http_sse/__init__.py +42 -0
- pulse/core/http_sse/_api.py +112 -0
- pulse/core/http_sse/_decoders.py +61 -0
- pulse/core/http_sse/_exceptions.py +7 -0
- pulse/core/http_sse/_models.py +17 -0
- pulse/core/jsonable_encoder.py +100 -0
- pulse/core/pydantic_utilities.py +260 -0
- pulse/core/query_encoder.py +58 -0
- pulse/core/remove_none_from_dict.py +11 -0
- pulse/core/request_options.py +35 -0
- pulse/core/serialization.py +276 -0
- pulse/core/unchecked_base_model.py +396 -0
- pulse/environment.py +7 -0
- pulse/errors/__init__.py +4 -0
- pulse/errors/bad_request_error.py +10 -0
- pulse/errors/forbidden_error.py +10 -0
- pulse/errors/internal_server_error.py +10 -0
- pulse/errors/not_found_error.py +10 -0
- pulse/errors/too_many_requests_error.py +10 -0
- pulse/errors/unauthorized_error.py +10 -0
- pulse/jobs/__init__.py +4 -0
- pulse/jobs/client.py +191 -0
- pulse/jobs/raw_client.py +408 -0
- pulse/py.typed +0 -0
- pulse/raw_client.py +661 -0
- pulse/types/__init__.py +4 -0
- pulse/types/extract_async_input.py +5 -0
- pulse/types/extract_async_response.py +43 -0
- pulse/types/extract_async_submission_response_status.py +7 -0
- pulse/types/extract_input.py +5 -0
- pulse/types/extract_json_input.py +116 -0
- pulse/types/extract_json_input_experimental_schema.py +5 -0
- pulse/types/extract_json_input_schema.py +5 -0
- pulse/types/extract_json_input_storage.py +36 -0
- pulse/types/extract_json_input_structured_output.py +38 -0
- pulse/types/extract_multipart_input.py +111 -0
- pulse/types/extract_multipart_input_experimental_schema.py +5 -0
- pulse/types/extract_multipart_input_schema.py +5 -0
- pulse/types/extract_multipart_input_storage.py +36 -0
- pulse/types/extract_multipart_input_structured_output.py +38 -0
- pulse/types/extract_options.py +111 -0
- pulse/types/extract_options_experimental_schema.py +5 -0
- pulse/types/extract_options_schema.py +5 -0
- pulse/types/extract_options_storage.py +36 -0
- pulse/types/extract_options_structured_output.py +38 -0
- pulse/types/extract_response.py +47 -0
- pulse/types/extract_source_multipart_one.py +27 -0
- pulse/types/extract_source_multipart_zero.py +27 -0
- pulse/types/job_cancellation_response.py +32 -0
- pulse/types/job_status.py +5 -0
- pulse/types/job_status_response.py +50 -0
- pulse/types/json_source.py +29 -0
- pulse/types/multipart_source.py +8 -0
- pulse/version.py +3 -0
- pulse/webhooks/__init__.py +4 -0
- pulse/webhooks/client.py +104 -0
- pulse/webhooks/raw_client.py +139 -0
- pulse/webhooks/types/__init__.py +4 -0
- pulse/webhooks/types/create_webhook_link_response.py +23 -0
- pulse_python_sdk-0.0.52.dist-info/METADATA +197 -0
- pulse_python_sdk-0.0.52.dist-info/RECORD +72 -0
- pulse_python_sdk-0.0.52.dist-info/WHEEL +4 -0
pulse/raw_client.py
ADDED
|
@@ -0,0 +1,661 @@
|
|
|
1
|
+
# This file was auto-generated by Fern from our API Definition.
|
|
2
|
+
|
|
3
|
+
import typing
|
|
4
|
+
from json.decoder import JSONDecodeError
|
|
5
|
+
|
|
6
|
+
from .core.api_error import ApiError
|
|
7
|
+
from .core.client_wrapper import AsyncClientWrapper, SyncClientWrapper
|
|
8
|
+
from .core.http_response import AsyncHttpResponse, HttpResponse
|
|
9
|
+
from .core.request_options import RequestOptions
|
|
10
|
+
from .core.serialization import convert_and_respect_annotation_metadata
|
|
11
|
+
from .core.unchecked_base_model import construct_type
|
|
12
|
+
from .errors.bad_request_error import BadRequestError
|
|
13
|
+
from .errors.too_many_requests_error import TooManyRequestsError
|
|
14
|
+
from .errors.unauthorized_error import UnauthorizedError
|
|
15
|
+
from .types.extract_async_response import ExtractAsyncResponse
|
|
16
|
+
from .types.extract_json_input_experimental_schema import ExtractJsonInputExperimentalSchema
|
|
17
|
+
from .types.extract_json_input_schema import ExtractJsonInputSchema
|
|
18
|
+
from .types.extract_json_input_storage import ExtractJsonInputStorage
|
|
19
|
+
from .types.extract_json_input_structured_output import ExtractJsonInputStructuredOutput
|
|
20
|
+
from .types.extract_response import ExtractResponse
|
|
21
|
+
|
|
22
|
+
# this is used as the default value for optional parameters
|
|
23
|
+
OMIT = typing.cast(typing.Any, ...)
|
|
24
|
+
|
|
25
|
+
|
|
26
|
+
class RawPulse:
|
|
27
|
+
def __init__(self, *, client_wrapper: SyncClientWrapper):
|
|
28
|
+
self._client_wrapper = client_wrapper
|
|
29
|
+
|
|
30
|
+
def extract(
|
|
31
|
+
self,
|
|
32
|
+
*,
|
|
33
|
+
file_url: str,
|
|
34
|
+
structured_output: typing.Optional[ExtractJsonInputStructuredOutput] = OMIT,
|
|
35
|
+
schema: typing.Optional[ExtractJsonInputSchema] = OMIT,
|
|
36
|
+
experimental_schema: typing.Optional[ExtractJsonInputExperimentalSchema] = OMIT,
|
|
37
|
+
schema_prompt: typing.Optional[str] = OMIT,
|
|
38
|
+
custom_prompt: typing.Optional[str] = OMIT,
|
|
39
|
+
chunking: typing.Optional[str] = OMIT,
|
|
40
|
+
chunk_size: typing.Optional[int] = OMIT,
|
|
41
|
+
pages: typing.Optional[str] = OMIT,
|
|
42
|
+
extract_figure: typing.Optional[bool] = OMIT,
|
|
43
|
+
figure_description: typing.Optional[bool] = OMIT,
|
|
44
|
+
return_html: typing.Optional[bool] = OMIT,
|
|
45
|
+
thinking: typing.Optional[bool] = OMIT,
|
|
46
|
+
storage: typing.Optional[ExtractJsonInputStorage] = OMIT,
|
|
47
|
+
request_options: typing.Optional[RequestOptions] = None,
|
|
48
|
+
) -> HttpResponse[ExtractResponse]:
|
|
49
|
+
"""
|
|
50
|
+
The primary endpoint for the Pulse API. Parses uploaded documents or remote
|
|
51
|
+
file URLs and returns rich markdown content with optional structured data
|
|
52
|
+
extraction based on user-provided schemas and extraction options.
|
|
53
|
+
|
|
54
|
+
Parameters
|
|
55
|
+
----------
|
|
56
|
+
file_url : str
|
|
57
|
+
Public or pre-signed URL that Pulse will download and extract.
|
|
58
|
+
|
|
59
|
+
structured_output : typing.Optional[ExtractJsonInputStructuredOutput]
|
|
60
|
+
Recommended method for schema-guided extraction. Contains the schema and optional prompt in a single object.
|
|
61
|
+
|
|
62
|
+
schema : typing.Optional[ExtractJsonInputSchema]
|
|
63
|
+
(Deprecated) JSON schema describing structured data to extract. Use structuredOutput instead. Accepts either a JSON object or a stringified JSON representation.
|
|
64
|
+
|
|
65
|
+
experimental_schema : typing.Optional[ExtractJsonInputExperimentalSchema]
|
|
66
|
+
(Deprecated) Experimental schema definition used for feature flagged behaviour. Accepts either a JSON object or a stringified JSON representation.
|
|
67
|
+
|
|
68
|
+
schema_prompt : typing.Optional[str]
|
|
69
|
+
(Deprecated) Natural language prompt for schema-guided extraction. Use structuredOutput.schemaPrompt instead.
|
|
70
|
+
|
|
71
|
+
custom_prompt : typing.Optional[str]
|
|
72
|
+
(Deprecated) Custom instructions that augment the default extraction behaviour.
|
|
73
|
+
|
|
74
|
+
chunking : typing.Optional[str]
|
|
75
|
+
Comma-separated list of chunking strategies to apply (for example `semantic,header,page,recursive`).
|
|
76
|
+
|
|
77
|
+
chunk_size : typing.Optional[int]
|
|
78
|
+
Override for maximum characters per chunk when chunking is enabled.
|
|
79
|
+
|
|
80
|
+
pages : typing.Optional[str]
|
|
81
|
+
Page range filter supporting segments such as `1-2` or mixed ranges like `1-2,5`.
|
|
82
|
+
|
|
83
|
+
extract_figure : typing.Optional[bool]
|
|
84
|
+
Toggle to enable figure extraction in results.
|
|
85
|
+
|
|
86
|
+
figure_description : typing.Optional[bool]
|
|
87
|
+
Toggle to generate descriptive captions for extracted figures.
|
|
88
|
+
|
|
89
|
+
return_html : typing.Optional[bool]
|
|
90
|
+
Whether to include HTML representation alongside markdown in the response.
|
|
91
|
+
|
|
92
|
+
thinking : typing.Optional[bool]
|
|
93
|
+
(Deprecated) Enables expanded rationale output for debugging.
|
|
94
|
+
|
|
95
|
+
storage : typing.Optional[ExtractJsonInputStorage]
|
|
96
|
+
Options for persisting extraction artifacts. When enabled (default), artifacts are saved to storage and a database record is created.
|
|
97
|
+
|
|
98
|
+
request_options : typing.Optional[RequestOptions]
|
|
99
|
+
Request-specific configuration.
|
|
100
|
+
|
|
101
|
+
Returns
|
|
102
|
+
-------
|
|
103
|
+
HttpResponse[ExtractResponse]
|
|
104
|
+
Synchronous extraction result
|
|
105
|
+
"""
|
|
106
|
+
_response = self._client_wrapper.httpx_client.request(
|
|
107
|
+
"extract",
|
|
108
|
+
method="POST",
|
|
109
|
+
json={
|
|
110
|
+
"fileUrl": file_url,
|
|
111
|
+
"structuredOutput": convert_and_respect_annotation_metadata(
|
|
112
|
+
object_=structured_output, annotation=ExtractJsonInputStructuredOutput, direction="write"
|
|
113
|
+
),
|
|
114
|
+
"schema": convert_and_respect_annotation_metadata(
|
|
115
|
+
object_=schema, annotation=ExtractJsonInputSchema, direction="write"
|
|
116
|
+
),
|
|
117
|
+
"experimentalSchema": convert_and_respect_annotation_metadata(
|
|
118
|
+
object_=experimental_schema, annotation=ExtractJsonInputExperimentalSchema, direction="write"
|
|
119
|
+
),
|
|
120
|
+
"schemaPrompt": schema_prompt,
|
|
121
|
+
"customPrompt": custom_prompt,
|
|
122
|
+
"chunking": chunking,
|
|
123
|
+
"chunkSize": chunk_size,
|
|
124
|
+
"pages": pages,
|
|
125
|
+
"extractFigure": extract_figure,
|
|
126
|
+
"figureDescription": figure_description,
|
|
127
|
+
"returnHtml": return_html,
|
|
128
|
+
"thinking": thinking,
|
|
129
|
+
"storage": convert_and_respect_annotation_metadata(
|
|
130
|
+
object_=storage, annotation=ExtractJsonInputStorage, direction="write"
|
|
131
|
+
),
|
|
132
|
+
},
|
|
133
|
+
headers={
|
|
134
|
+
"content-type": "application/json",
|
|
135
|
+
},
|
|
136
|
+
request_options=request_options,
|
|
137
|
+
omit=OMIT,
|
|
138
|
+
)
|
|
139
|
+
try:
|
|
140
|
+
if 200 <= _response.status_code < 300:
|
|
141
|
+
_data = typing.cast(
|
|
142
|
+
ExtractResponse,
|
|
143
|
+
construct_type(
|
|
144
|
+
type_=ExtractResponse, # type: ignore
|
|
145
|
+
object_=_response.json(),
|
|
146
|
+
),
|
|
147
|
+
)
|
|
148
|
+
return HttpResponse(response=_response, data=_data)
|
|
149
|
+
if _response.status_code == 400:
|
|
150
|
+
raise BadRequestError(
|
|
151
|
+
headers=dict(_response.headers),
|
|
152
|
+
body=typing.cast(
|
|
153
|
+
typing.Any,
|
|
154
|
+
construct_type(
|
|
155
|
+
type_=typing.Any, # type: ignore
|
|
156
|
+
object_=_response.json(),
|
|
157
|
+
),
|
|
158
|
+
),
|
|
159
|
+
)
|
|
160
|
+
if _response.status_code == 401:
|
|
161
|
+
raise UnauthorizedError(
|
|
162
|
+
headers=dict(_response.headers),
|
|
163
|
+
body=typing.cast(
|
|
164
|
+
typing.Any,
|
|
165
|
+
construct_type(
|
|
166
|
+
type_=typing.Any, # type: ignore
|
|
167
|
+
object_=_response.json(),
|
|
168
|
+
),
|
|
169
|
+
),
|
|
170
|
+
)
|
|
171
|
+
if _response.status_code == 429:
|
|
172
|
+
raise TooManyRequestsError(
|
|
173
|
+
headers=dict(_response.headers),
|
|
174
|
+
body=typing.cast(
|
|
175
|
+
typing.Any,
|
|
176
|
+
construct_type(
|
|
177
|
+
type_=typing.Any, # type: ignore
|
|
178
|
+
object_=_response.json(),
|
|
179
|
+
),
|
|
180
|
+
),
|
|
181
|
+
)
|
|
182
|
+
_response_json = _response.json()
|
|
183
|
+
except JSONDecodeError:
|
|
184
|
+
raise ApiError(status_code=_response.status_code, headers=dict(_response.headers), body=_response.text)
|
|
185
|
+
raise ApiError(status_code=_response.status_code, headers=dict(_response.headers), body=_response_json)
|
|
186
|
+
|
|
187
|
+
def extract_async(
|
|
188
|
+
self,
|
|
189
|
+
*,
|
|
190
|
+
file_url: str,
|
|
191
|
+
structured_output: typing.Optional[ExtractJsonInputStructuredOutput] = OMIT,
|
|
192
|
+
schema: typing.Optional[ExtractJsonInputSchema] = OMIT,
|
|
193
|
+
experimental_schema: typing.Optional[ExtractJsonInputExperimentalSchema] = OMIT,
|
|
194
|
+
schema_prompt: typing.Optional[str] = OMIT,
|
|
195
|
+
custom_prompt: typing.Optional[str] = OMIT,
|
|
196
|
+
chunking: typing.Optional[str] = OMIT,
|
|
197
|
+
chunk_size: typing.Optional[int] = OMIT,
|
|
198
|
+
pages: typing.Optional[str] = OMIT,
|
|
199
|
+
extract_figure: typing.Optional[bool] = OMIT,
|
|
200
|
+
figure_description: typing.Optional[bool] = OMIT,
|
|
201
|
+
return_html: typing.Optional[bool] = OMIT,
|
|
202
|
+
thinking: typing.Optional[bool] = OMIT,
|
|
203
|
+
storage: typing.Optional[ExtractJsonInputStorage] = OMIT,
|
|
204
|
+
request_options: typing.Optional[RequestOptions] = None,
|
|
205
|
+
) -> HttpResponse[ExtractAsyncResponse]:
|
|
206
|
+
"""
|
|
207
|
+
Starts an asynchronous extraction job. The request mirrors the
|
|
208
|
+
synchronous options but returns immediately with a job identifier that
|
|
209
|
+
clients can poll for completion status.
|
|
210
|
+
|
|
211
|
+
Parameters
|
|
212
|
+
----------
|
|
213
|
+
file_url : str
|
|
214
|
+
Public or pre-signed URL that Pulse will download and extract.
|
|
215
|
+
|
|
216
|
+
structured_output : typing.Optional[ExtractJsonInputStructuredOutput]
|
|
217
|
+
Recommended method for schema-guided extraction. Contains the schema and optional prompt in a single object.
|
|
218
|
+
|
|
219
|
+
schema : typing.Optional[ExtractJsonInputSchema]
|
|
220
|
+
(Deprecated) JSON schema describing structured data to extract. Use structuredOutput instead. Accepts either a JSON object or a stringified JSON representation.
|
|
221
|
+
|
|
222
|
+
experimental_schema : typing.Optional[ExtractJsonInputExperimentalSchema]
|
|
223
|
+
(Deprecated) Experimental schema definition used for feature flagged behaviour. Accepts either a JSON object or a stringified JSON representation.
|
|
224
|
+
|
|
225
|
+
schema_prompt : typing.Optional[str]
|
|
226
|
+
(Deprecated) Natural language prompt for schema-guided extraction. Use structuredOutput.schemaPrompt instead.
|
|
227
|
+
|
|
228
|
+
custom_prompt : typing.Optional[str]
|
|
229
|
+
(Deprecated) Custom instructions that augment the default extraction behaviour.
|
|
230
|
+
|
|
231
|
+
chunking : typing.Optional[str]
|
|
232
|
+
Comma-separated list of chunking strategies to apply (for example `semantic,header,page,recursive`).
|
|
233
|
+
|
|
234
|
+
chunk_size : typing.Optional[int]
|
|
235
|
+
Override for maximum characters per chunk when chunking is enabled.
|
|
236
|
+
|
|
237
|
+
pages : typing.Optional[str]
|
|
238
|
+
Page range filter supporting segments such as `1-2` or mixed ranges like `1-2,5`.
|
|
239
|
+
|
|
240
|
+
extract_figure : typing.Optional[bool]
|
|
241
|
+
Toggle to enable figure extraction in results.
|
|
242
|
+
|
|
243
|
+
figure_description : typing.Optional[bool]
|
|
244
|
+
Toggle to generate descriptive captions for extracted figures.
|
|
245
|
+
|
|
246
|
+
return_html : typing.Optional[bool]
|
|
247
|
+
Whether to include HTML representation alongside markdown in the response.
|
|
248
|
+
|
|
249
|
+
thinking : typing.Optional[bool]
|
|
250
|
+
(Deprecated) Enables expanded rationale output for debugging.
|
|
251
|
+
|
|
252
|
+
storage : typing.Optional[ExtractJsonInputStorage]
|
|
253
|
+
Options for persisting extraction artifacts. When enabled (default), artifacts are saved to storage and a database record is created.
|
|
254
|
+
|
|
255
|
+
request_options : typing.Optional[RequestOptions]
|
|
256
|
+
Request-specific configuration.
|
|
257
|
+
|
|
258
|
+
Returns
|
|
259
|
+
-------
|
|
260
|
+
HttpResponse[ExtractAsyncResponse]
|
|
261
|
+
Asynchronous extraction job accepted
|
|
262
|
+
"""
|
|
263
|
+
_response = self._client_wrapper.httpx_client.request(
|
|
264
|
+
"extract_async",
|
|
265
|
+
method="POST",
|
|
266
|
+
json={
|
|
267
|
+
"fileUrl": file_url,
|
|
268
|
+
"structuredOutput": convert_and_respect_annotation_metadata(
|
|
269
|
+
object_=structured_output, annotation=ExtractJsonInputStructuredOutput, direction="write"
|
|
270
|
+
),
|
|
271
|
+
"schema": convert_and_respect_annotation_metadata(
|
|
272
|
+
object_=schema, annotation=ExtractJsonInputSchema, direction="write"
|
|
273
|
+
),
|
|
274
|
+
"experimentalSchema": convert_and_respect_annotation_metadata(
|
|
275
|
+
object_=experimental_schema, annotation=ExtractJsonInputExperimentalSchema, direction="write"
|
|
276
|
+
),
|
|
277
|
+
"schemaPrompt": schema_prompt,
|
|
278
|
+
"customPrompt": custom_prompt,
|
|
279
|
+
"chunking": chunking,
|
|
280
|
+
"chunkSize": chunk_size,
|
|
281
|
+
"pages": pages,
|
|
282
|
+
"extractFigure": extract_figure,
|
|
283
|
+
"figureDescription": figure_description,
|
|
284
|
+
"returnHtml": return_html,
|
|
285
|
+
"thinking": thinking,
|
|
286
|
+
"storage": convert_and_respect_annotation_metadata(
|
|
287
|
+
object_=storage, annotation=ExtractJsonInputStorage, direction="write"
|
|
288
|
+
),
|
|
289
|
+
},
|
|
290
|
+
headers={
|
|
291
|
+
"content-type": "application/json",
|
|
292
|
+
},
|
|
293
|
+
request_options=request_options,
|
|
294
|
+
omit=OMIT,
|
|
295
|
+
)
|
|
296
|
+
try:
|
|
297
|
+
if 200 <= _response.status_code < 300:
|
|
298
|
+
_data = typing.cast(
|
|
299
|
+
ExtractAsyncResponse,
|
|
300
|
+
construct_type(
|
|
301
|
+
type_=ExtractAsyncResponse, # type: ignore
|
|
302
|
+
object_=_response.json(),
|
|
303
|
+
),
|
|
304
|
+
)
|
|
305
|
+
return HttpResponse(response=_response, data=_data)
|
|
306
|
+
if _response.status_code == 400:
|
|
307
|
+
raise BadRequestError(
|
|
308
|
+
headers=dict(_response.headers),
|
|
309
|
+
body=typing.cast(
|
|
310
|
+
typing.Any,
|
|
311
|
+
construct_type(
|
|
312
|
+
type_=typing.Any, # type: ignore
|
|
313
|
+
object_=_response.json(),
|
|
314
|
+
),
|
|
315
|
+
),
|
|
316
|
+
)
|
|
317
|
+
if _response.status_code == 401:
|
|
318
|
+
raise UnauthorizedError(
|
|
319
|
+
headers=dict(_response.headers),
|
|
320
|
+
body=typing.cast(
|
|
321
|
+
typing.Any,
|
|
322
|
+
construct_type(
|
|
323
|
+
type_=typing.Any, # type: ignore
|
|
324
|
+
object_=_response.json(),
|
|
325
|
+
),
|
|
326
|
+
),
|
|
327
|
+
)
|
|
328
|
+
if _response.status_code == 429:
|
|
329
|
+
raise TooManyRequestsError(
|
|
330
|
+
headers=dict(_response.headers),
|
|
331
|
+
body=typing.cast(
|
|
332
|
+
typing.Any,
|
|
333
|
+
construct_type(
|
|
334
|
+
type_=typing.Any, # type: ignore
|
|
335
|
+
object_=_response.json(),
|
|
336
|
+
),
|
|
337
|
+
),
|
|
338
|
+
)
|
|
339
|
+
_response_json = _response.json()
|
|
340
|
+
except JSONDecodeError:
|
|
341
|
+
raise ApiError(status_code=_response.status_code, headers=dict(_response.headers), body=_response.text)
|
|
342
|
+
raise ApiError(status_code=_response.status_code, headers=dict(_response.headers), body=_response_json)
|
|
343
|
+
|
|
344
|
+
|
|
345
|
+
class AsyncRawPulse:
|
|
346
|
+
def __init__(self, *, client_wrapper: AsyncClientWrapper):
|
|
347
|
+
self._client_wrapper = client_wrapper
|
|
348
|
+
|
|
349
|
+
async def extract(
|
|
350
|
+
self,
|
|
351
|
+
*,
|
|
352
|
+
file_url: str,
|
|
353
|
+
structured_output: typing.Optional[ExtractJsonInputStructuredOutput] = OMIT,
|
|
354
|
+
schema: typing.Optional[ExtractJsonInputSchema] = OMIT,
|
|
355
|
+
experimental_schema: typing.Optional[ExtractJsonInputExperimentalSchema] = OMIT,
|
|
356
|
+
schema_prompt: typing.Optional[str] = OMIT,
|
|
357
|
+
custom_prompt: typing.Optional[str] = OMIT,
|
|
358
|
+
chunking: typing.Optional[str] = OMIT,
|
|
359
|
+
chunk_size: typing.Optional[int] = OMIT,
|
|
360
|
+
pages: typing.Optional[str] = OMIT,
|
|
361
|
+
extract_figure: typing.Optional[bool] = OMIT,
|
|
362
|
+
figure_description: typing.Optional[bool] = OMIT,
|
|
363
|
+
return_html: typing.Optional[bool] = OMIT,
|
|
364
|
+
thinking: typing.Optional[bool] = OMIT,
|
|
365
|
+
storage: typing.Optional[ExtractJsonInputStorage] = OMIT,
|
|
366
|
+
request_options: typing.Optional[RequestOptions] = None,
|
|
367
|
+
) -> AsyncHttpResponse[ExtractResponse]:
|
|
368
|
+
"""
|
|
369
|
+
The primary endpoint for the Pulse API. Parses uploaded documents or remote
|
|
370
|
+
file URLs and returns rich markdown content with optional structured data
|
|
371
|
+
extraction based on user-provided schemas and extraction options.
|
|
372
|
+
|
|
373
|
+
Parameters
|
|
374
|
+
----------
|
|
375
|
+
file_url : str
|
|
376
|
+
Public or pre-signed URL that Pulse will download and extract.
|
|
377
|
+
|
|
378
|
+
structured_output : typing.Optional[ExtractJsonInputStructuredOutput]
|
|
379
|
+
Recommended method for schema-guided extraction. Contains the schema and optional prompt in a single object.
|
|
380
|
+
|
|
381
|
+
schema : typing.Optional[ExtractJsonInputSchema]
|
|
382
|
+
(Deprecated) JSON schema describing structured data to extract. Use structuredOutput instead. Accepts either a JSON object or a stringified JSON representation.
|
|
383
|
+
|
|
384
|
+
experimental_schema : typing.Optional[ExtractJsonInputExperimentalSchema]
|
|
385
|
+
(Deprecated) Experimental schema definition used for feature flagged behaviour. Accepts either a JSON object or a stringified JSON representation.
|
|
386
|
+
|
|
387
|
+
schema_prompt : typing.Optional[str]
|
|
388
|
+
(Deprecated) Natural language prompt for schema-guided extraction. Use structuredOutput.schemaPrompt instead.
|
|
389
|
+
|
|
390
|
+
custom_prompt : typing.Optional[str]
|
|
391
|
+
(Deprecated) Custom instructions that augment the default extraction behaviour.
|
|
392
|
+
|
|
393
|
+
chunking : typing.Optional[str]
|
|
394
|
+
Comma-separated list of chunking strategies to apply (for example `semantic,header,page,recursive`).
|
|
395
|
+
|
|
396
|
+
chunk_size : typing.Optional[int]
|
|
397
|
+
Override for maximum characters per chunk when chunking is enabled.
|
|
398
|
+
|
|
399
|
+
pages : typing.Optional[str]
|
|
400
|
+
Page range filter supporting segments such as `1-2` or mixed ranges like `1-2,5`.
|
|
401
|
+
|
|
402
|
+
extract_figure : typing.Optional[bool]
|
|
403
|
+
Toggle to enable figure extraction in results.
|
|
404
|
+
|
|
405
|
+
figure_description : typing.Optional[bool]
|
|
406
|
+
Toggle to generate descriptive captions for extracted figures.
|
|
407
|
+
|
|
408
|
+
return_html : typing.Optional[bool]
|
|
409
|
+
Whether to include HTML representation alongside markdown in the response.
|
|
410
|
+
|
|
411
|
+
thinking : typing.Optional[bool]
|
|
412
|
+
(Deprecated) Enables expanded rationale output for debugging.
|
|
413
|
+
|
|
414
|
+
storage : typing.Optional[ExtractJsonInputStorage]
|
|
415
|
+
Options for persisting extraction artifacts. When enabled (default), artifacts are saved to storage and a database record is created.
|
|
416
|
+
|
|
417
|
+
request_options : typing.Optional[RequestOptions]
|
|
418
|
+
Request-specific configuration.
|
|
419
|
+
|
|
420
|
+
Returns
|
|
421
|
+
-------
|
|
422
|
+
AsyncHttpResponse[ExtractResponse]
|
|
423
|
+
Synchronous extraction result
|
|
424
|
+
"""
|
|
425
|
+
_response = await self._client_wrapper.httpx_client.request(
|
|
426
|
+
"extract",
|
|
427
|
+
method="POST",
|
|
428
|
+
json={
|
|
429
|
+
"fileUrl": file_url,
|
|
430
|
+
"structuredOutput": convert_and_respect_annotation_metadata(
|
|
431
|
+
object_=structured_output, annotation=ExtractJsonInputStructuredOutput, direction="write"
|
|
432
|
+
),
|
|
433
|
+
"schema": convert_and_respect_annotation_metadata(
|
|
434
|
+
object_=schema, annotation=ExtractJsonInputSchema, direction="write"
|
|
435
|
+
),
|
|
436
|
+
"experimentalSchema": convert_and_respect_annotation_metadata(
|
|
437
|
+
object_=experimental_schema, annotation=ExtractJsonInputExperimentalSchema, direction="write"
|
|
438
|
+
),
|
|
439
|
+
"schemaPrompt": schema_prompt,
|
|
440
|
+
"customPrompt": custom_prompt,
|
|
441
|
+
"chunking": chunking,
|
|
442
|
+
"chunkSize": chunk_size,
|
|
443
|
+
"pages": pages,
|
|
444
|
+
"extractFigure": extract_figure,
|
|
445
|
+
"figureDescription": figure_description,
|
|
446
|
+
"returnHtml": return_html,
|
|
447
|
+
"thinking": thinking,
|
|
448
|
+
"storage": convert_and_respect_annotation_metadata(
|
|
449
|
+
object_=storage, annotation=ExtractJsonInputStorage, direction="write"
|
|
450
|
+
),
|
|
451
|
+
},
|
|
452
|
+
headers={
|
|
453
|
+
"content-type": "application/json",
|
|
454
|
+
},
|
|
455
|
+
request_options=request_options,
|
|
456
|
+
omit=OMIT,
|
|
457
|
+
)
|
|
458
|
+
try:
|
|
459
|
+
if 200 <= _response.status_code < 300:
|
|
460
|
+
_data = typing.cast(
|
|
461
|
+
ExtractResponse,
|
|
462
|
+
construct_type(
|
|
463
|
+
type_=ExtractResponse, # type: ignore
|
|
464
|
+
object_=_response.json(),
|
|
465
|
+
),
|
|
466
|
+
)
|
|
467
|
+
return AsyncHttpResponse(response=_response, data=_data)
|
|
468
|
+
if _response.status_code == 400:
|
|
469
|
+
raise BadRequestError(
|
|
470
|
+
headers=dict(_response.headers),
|
|
471
|
+
body=typing.cast(
|
|
472
|
+
typing.Any,
|
|
473
|
+
construct_type(
|
|
474
|
+
type_=typing.Any, # type: ignore
|
|
475
|
+
object_=_response.json(),
|
|
476
|
+
),
|
|
477
|
+
),
|
|
478
|
+
)
|
|
479
|
+
if _response.status_code == 401:
|
|
480
|
+
raise UnauthorizedError(
|
|
481
|
+
headers=dict(_response.headers),
|
|
482
|
+
body=typing.cast(
|
|
483
|
+
typing.Any,
|
|
484
|
+
construct_type(
|
|
485
|
+
type_=typing.Any, # type: ignore
|
|
486
|
+
object_=_response.json(),
|
|
487
|
+
),
|
|
488
|
+
),
|
|
489
|
+
)
|
|
490
|
+
if _response.status_code == 429:
|
|
491
|
+
raise TooManyRequestsError(
|
|
492
|
+
headers=dict(_response.headers),
|
|
493
|
+
body=typing.cast(
|
|
494
|
+
typing.Any,
|
|
495
|
+
construct_type(
|
|
496
|
+
type_=typing.Any, # type: ignore
|
|
497
|
+
object_=_response.json(),
|
|
498
|
+
),
|
|
499
|
+
),
|
|
500
|
+
)
|
|
501
|
+
_response_json = _response.json()
|
|
502
|
+
except JSONDecodeError:
|
|
503
|
+
raise ApiError(status_code=_response.status_code, headers=dict(_response.headers), body=_response.text)
|
|
504
|
+
raise ApiError(status_code=_response.status_code, headers=dict(_response.headers), body=_response_json)
|
|
505
|
+
|
|
506
|
+
async def extract_async(
|
|
507
|
+
self,
|
|
508
|
+
*,
|
|
509
|
+
file_url: str,
|
|
510
|
+
structured_output: typing.Optional[ExtractJsonInputStructuredOutput] = OMIT,
|
|
511
|
+
schema: typing.Optional[ExtractJsonInputSchema] = OMIT,
|
|
512
|
+
experimental_schema: typing.Optional[ExtractJsonInputExperimentalSchema] = OMIT,
|
|
513
|
+
schema_prompt: typing.Optional[str] = OMIT,
|
|
514
|
+
custom_prompt: typing.Optional[str] = OMIT,
|
|
515
|
+
chunking: typing.Optional[str] = OMIT,
|
|
516
|
+
chunk_size: typing.Optional[int] = OMIT,
|
|
517
|
+
pages: typing.Optional[str] = OMIT,
|
|
518
|
+
extract_figure: typing.Optional[bool] = OMIT,
|
|
519
|
+
figure_description: typing.Optional[bool] = OMIT,
|
|
520
|
+
return_html: typing.Optional[bool] = OMIT,
|
|
521
|
+
thinking: typing.Optional[bool] = OMIT,
|
|
522
|
+
storage: typing.Optional[ExtractJsonInputStorage] = OMIT,
|
|
523
|
+
request_options: typing.Optional[RequestOptions] = None,
|
|
524
|
+
) -> AsyncHttpResponse[ExtractAsyncResponse]:
|
|
525
|
+
"""
|
|
526
|
+
Starts an asynchronous extraction job. The request mirrors the
|
|
527
|
+
synchronous options but returns immediately with a job identifier that
|
|
528
|
+
clients can poll for completion status.
|
|
529
|
+
|
|
530
|
+
Parameters
|
|
531
|
+
----------
|
|
532
|
+
file_url : str
|
|
533
|
+
Public or pre-signed URL that Pulse will download and extract.
|
|
534
|
+
|
|
535
|
+
structured_output : typing.Optional[ExtractJsonInputStructuredOutput]
|
|
536
|
+
Recommended method for schema-guided extraction. Contains the schema and optional prompt in a single object.
|
|
537
|
+
|
|
538
|
+
schema : typing.Optional[ExtractJsonInputSchema]
|
|
539
|
+
(Deprecated) JSON schema describing structured data to extract. Use structuredOutput instead. Accepts either a JSON object or a stringified JSON representation.
|
|
540
|
+
|
|
541
|
+
experimental_schema : typing.Optional[ExtractJsonInputExperimentalSchema]
|
|
542
|
+
(Deprecated) Experimental schema definition used for feature flagged behaviour. Accepts either a JSON object or a stringified JSON representation.
|
|
543
|
+
|
|
544
|
+
schema_prompt : typing.Optional[str]
|
|
545
|
+
(Deprecated) Natural language prompt for schema-guided extraction. Use structuredOutput.schemaPrompt instead.
|
|
546
|
+
|
|
547
|
+
custom_prompt : typing.Optional[str]
|
|
548
|
+
(Deprecated) Custom instructions that augment the default extraction behaviour.
|
|
549
|
+
|
|
550
|
+
chunking : typing.Optional[str]
|
|
551
|
+
Comma-separated list of chunking strategies to apply (for example `semantic,header,page,recursive`).
|
|
552
|
+
|
|
553
|
+
chunk_size : typing.Optional[int]
|
|
554
|
+
Override for maximum characters per chunk when chunking is enabled.
|
|
555
|
+
|
|
556
|
+
pages : typing.Optional[str]
|
|
557
|
+
Page range filter supporting segments such as `1-2` or mixed ranges like `1-2,5`.
|
|
558
|
+
|
|
559
|
+
extract_figure : typing.Optional[bool]
|
|
560
|
+
Toggle to enable figure extraction in results.
|
|
561
|
+
|
|
562
|
+
figure_description : typing.Optional[bool]
|
|
563
|
+
Toggle to generate descriptive captions for extracted figures.
|
|
564
|
+
|
|
565
|
+
return_html : typing.Optional[bool]
|
|
566
|
+
Whether to include HTML representation alongside markdown in the response.
|
|
567
|
+
|
|
568
|
+
thinking : typing.Optional[bool]
|
|
569
|
+
(Deprecated) Enables expanded rationale output for debugging.
|
|
570
|
+
|
|
571
|
+
storage : typing.Optional[ExtractJsonInputStorage]
|
|
572
|
+
Options for persisting extraction artifacts. When enabled (default), artifacts are saved to storage and a database record is created.
|
|
573
|
+
|
|
574
|
+
request_options : typing.Optional[RequestOptions]
|
|
575
|
+
Request-specific configuration.
|
|
576
|
+
|
|
577
|
+
Returns
|
|
578
|
+
-------
|
|
579
|
+
AsyncHttpResponse[ExtractAsyncResponse]
|
|
580
|
+
Asynchronous extraction job accepted
|
|
581
|
+
"""
|
|
582
|
+
_response = await self._client_wrapper.httpx_client.request(
|
|
583
|
+
"extract_async",
|
|
584
|
+
method="POST",
|
|
585
|
+
json={
|
|
586
|
+
"fileUrl": file_url,
|
|
587
|
+
"structuredOutput": convert_and_respect_annotation_metadata(
|
|
588
|
+
object_=structured_output, annotation=ExtractJsonInputStructuredOutput, direction="write"
|
|
589
|
+
),
|
|
590
|
+
"schema": convert_and_respect_annotation_metadata(
|
|
591
|
+
object_=schema, annotation=ExtractJsonInputSchema, direction="write"
|
|
592
|
+
),
|
|
593
|
+
"experimentalSchema": convert_and_respect_annotation_metadata(
|
|
594
|
+
object_=experimental_schema, annotation=ExtractJsonInputExperimentalSchema, direction="write"
|
|
595
|
+
),
|
|
596
|
+
"schemaPrompt": schema_prompt,
|
|
597
|
+
"customPrompt": custom_prompt,
|
|
598
|
+
"chunking": chunking,
|
|
599
|
+
"chunkSize": chunk_size,
|
|
600
|
+
"pages": pages,
|
|
601
|
+
"extractFigure": extract_figure,
|
|
602
|
+
"figureDescription": figure_description,
|
|
603
|
+
"returnHtml": return_html,
|
|
604
|
+
"thinking": thinking,
|
|
605
|
+
"storage": convert_and_respect_annotation_metadata(
|
|
606
|
+
object_=storage, annotation=ExtractJsonInputStorage, direction="write"
|
|
607
|
+
),
|
|
608
|
+
},
|
|
609
|
+
headers={
|
|
610
|
+
"content-type": "application/json",
|
|
611
|
+
},
|
|
612
|
+
request_options=request_options,
|
|
613
|
+
omit=OMIT,
|
|
614
|
+
)
|
|
615
|
+
try:
|
|
616
|
+
if 200 <= _response.status_code < 300:
|
|
617
|
+
_data = typing.cast(
|
|
618
|
+
ExtractAsyncResponse,
|
|
619
|
+
construct_type(
|
|
620
|
+
type_=ExtractAsyncResponse, # type: ignore
|
|
621
|
+
object_=_response.json(),
|
|
622
|
+
),
|
|
623
|
+
)
|
|
624
|
+
return AsyncHttpResponse(response=_response, data=_data)
|
|
625
|
+
if _response.status_code == 400:
|
|
626
|
+
raise BadRequestError(
|
|
627
|
+
headers=dict(_response.headers),
|
|
628
|
+
body=typing.cast(
|
|
629
|
+
typing.Any,
|
|
630
|
+
construct_type(
|
|
631
|
+
type_=typing.Any, # type: ignore
|
|
632
|
+
object_=_response.json(),
|
|
633
|
+
),
|
|
634
|
+
),
|
|
635
|
+
)
|
|
636
|
+
if _response.status_code == 401:
|
|
637
|
+
raise UnauthorizedError(
|
|
638
|
+
headers=dict(_response.headers),
|
|
639
|
+
body=typing.cast(
|
|
640
|
+
typing.Any,
|
|
641
|
+
construct_type(
|
|
642
|
+
type_=typing.Any, # type: ignore
|
|
643
|
+
object_=_response.json(),
|
|
644
|
+
),
|
|
645
|
+
),
|
|
646
|
+
)
|
|
647
|
+
if _response.status_code == 429:
|
|
648
|
+
raise TooManyRequestsError(
|
|
649
|
+
headers=dict(_response.headers),
|
|
650
|
+
body=typing.cast(
|
|
651
|
+
typing.Any,
|
|
652
|
+
construct_type(
|
|
653
|
+
type_=typing.Any, # type: ignore
|
|
654
|
+
object_=_response.json(),
|
|
655
|
+
),
|
|
656
|
+
),
|
|
657
|
+
)
|
|
658
|
+
_response_json = _response.json()
|
|
659
|
+
except JSONDecodeError:
|
|
660
|
+
raise ApiError(status_code=_response.status_code, headers=dict(_response.headers), body=_response.text)
|
|
661
|
+
raise ApiError(status_code=_response.status_code, headers=dict(_response.headers), body=_response_json)
|
pulse/types/__init__.py
ADDED