pulse-python-sdk 0.0.62__tar.gz → 0.0.64__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {pulse_python_sdk-0.0.62 → pulse_python_sdk-0.0.64}/PKG-INFO +1 -1
- {pulse_python_sdk-0.0.62 → pulse_python_sdk-0.0.64}/pyproject.toml +1 -1
- {pulse_python_sdk-0.0.62 → pulse_python_sdk-0.0.64}/src/pulse/client.py +384 -11
- {pulse_python_sdk-0.0.62 → pulse_python_sdk-0.0.64}/src/pulse/core/client_wrapper.py +2 -2
- {pulse_python_sdk-0.0.62 → pulse_python_sdk-0.0.64}/src/pulse/jobs/client.py +6 -6
- {pulse_python_sdk-0.0.62 → pulse_python_sdk-0.0.64}/src/pulse/jobs/raw_client.py +6 -6
- pulse_python_sdk-0.0.64/src/pulse/raw_client.py +1289 -0
- pulse_python_sdk-0.0.62/src/pulse/types/extract_async_response.py → pulse_python_sdk-0.0.64/src/pulse/types/async_submission_response.py +12 -7
- pulse_python_sdk-0.0.62/src/pulse/types/extract_async_submission_response_status.py → pulse_python_sdk-0.0.64/src/pulse/types/async_submission_response_status.py +1 -1
- {pulse_python_sdk-0.0.62 → pulse_python_sdk-0.0.64}/src/pulse/types/extract_async_request_structured_output.py +1 -1
- pulse_python_sdk-0.0.64/src/pulse/types/extract_async_submission_response.py +5 -0
- {pulse_python_sdk-0.0.62 → pulse_python_sdk-0.0.64}/src/pulse/types/extract_input.py +20 -1
- {pulse_python_sdk-0.0.62 → pulse_python_sdk-0.0.64}/src/pulse/types/extract_input_structured_output.py +1 -1
- {pulse_python_sdk-0.0.62 → pulse_python_sdk-0.0.64}/src/pulse/types/extract_options.py +20 -1
- {pulse_python_sdk-0.0.62 → pulse_python_sdk-0.0.64}/src/pulse/types/extract_options_structured_output.py +1 -1
- {pulse_python_sdk-0.0.62 → pulse_python_sdk-0.0.64}/src/pulse/types/extract_request_structured_output.py +1 -1
- pulse_python_sdk-0.0.64/src/pulse/types/extract_response.py +109 -0
- pulse_python_sdk-0.0.64/src/pulse/types/extract_response_chunks.py +42 -0
- pulse_python_sdk-0.0.64/src/pulse/types/extract_response_plan_info.py +37 -0
- {pulse_python_sdk-0.0.62 → pulse_python_sdk-0.0.64}/src/pulse/types/job_status_response.py +3 -3
- pulse_python_sdk-0.0.64/src/pulse/types/schema_config.py +37 -0
- pulse_python_sdk-0.0.64/src/pulse/types/schema_response.py +8 -0
- pulse_python_sdk-0.0.64/src/pulse/types/single_schema_response.py +38 -0
- pulse_python_sdk-0.0.64/src/pulse/types/split_config.py +28 -0
- pulse_python_sdk-0.0.64/src/pulse/types/split_output.py +27 -0
- pulse_python_sdk-0.0.64/src/pulse/types/split_response.py +33 -0
- pulse_python_sdk-0.0.64/src/pulse/types/split_schema_response.py +48 -0
- pulse_python_sdk-0.0.64/src/pulse/types/structured_output_config.py +41 -0
- pulse_python_sdk-0.0.64/src/pulse/types/structured_output_result.py +32 -0
- pulse_python_sdk-0.0.64/src/pulse/types/topic_definition.py +32 -0
- pulse_python_sdk-0.0.64/src/pulse/types/topic_schema_config.py +47 -0
- pulse_python_sdk-0.0.62/src/pulse/raw_client.py +0 -653
- pulse_python_sdk-0.0.62/src/pulse/types/extract_response.py +0 -47
- {pulse_python_sdk-0.0.62 → pulse_python_sdk-0.0.64}/README.md +0 -0
- {pulse_python_sdk-0.0.62 → pulse_python_sdk-0.0.64}/src/pulse/__init__.py +0 -0
- {pulse_python_sdk-0.0.62 → pulse_python_sdk-0.0.64}/src/pulse/core/__init__.py +0 -0
- {pulse_python_sdk-0.0.62 → pulse_python_sdk-0.0.64}/src/pulse/core/api_error.py +0 -0
- {pulse_python_sdk-0.0.62 → pulse_python_sdk-0.0.64}/src/pulse/core/datetime_utils.py +0 -0
- {pulse_python_sdk-0.0.62 → pulse_python_sdk-0.0.64}/src/pulse/core/file.py +0 -0
- {pulse_python_sdk-0.0.62 → pulse_python_sdk-0.0.64}/src/pulse/core/force_multipart.py +0 -0
- {pulse_python_sdk-0.0.62 → pulse_python_sdk-0.0.64}/src/pulse/core/http_client.py +0 -0
- {pulse_python_sdk-0.0.62 → pulse_python_sdk-0.0.64}/src/pulse/core/http_response.py +0 -0
- {pulse_python_sdk-0.0.62 → pulse_python_sdk-0.0.64}/src/pulse/core/http_sse/__init__.py +0 -0
- {pulse_python_sdk-0.0.62 → pulse_python_sdk-0.0.64}/src/pulse/core/http_sse/_api.py +0 -0
- {pulse_python_sdk-0.0.62 → pulse_python_sdk-0.0.64}/src/pulse/core/http_sse/_decoders.py +0 -0
- {pulse_python_sdk-0.0.62 → pulse_python_sdk-0.0.64}/src/pulse/core/http_sse/_exceptions.py +0 -0
- {pulse_python_sdk-0.0.62 → pulse_python_sdk-0.0.64}/src/pulse/core/http_sse/_models.py +0 -0
- {pulse_python_sdk-0.0.62 → pulse_python_sdk-0.0.64}/src/pulse/core/jsonable_encoder.py +0 -0
- {pulse_python_sdk-0.0.62 → pulse_python_sdk-0.0.64}/src/pulse/core/pydantic_utilities.py +0 -0
- {pulse_python_sdk-0.0.62 → pulse_python_sdk-0.0.64}/src/pulse/core/query_encoder.py +0 -0
- {pulse_python_sdk-0.0.62 → pulse_python_sdk-0.0.64}/src/pulse/core/remove_none_from_dict.py +0 -0
- {pulse_python_sdk-0.0.62 → pulse_python_sdk-0.0.64}/src/pulse/core/request_options.py +0 -0
- {pulse_python_sdk-0.0.62 → pulse_python_sdk-0.0.64}/src/pulse/core/serialization.py +0 -0
- {pulse_python_sdk-0.0.62 → pulse_python_sdk-0.0.64}/src/pulse/core/unchecked_base_model.py +0 -0
- {pulse_python_sdk-0.0.62 → pulse_python_sdk-0.0.64}/src/pulse/environment.py +0 -0
- {pulse_python_sdk-0.0.62 → pulse_python_sdk-0.0.64}/src/pulse/errors/__init__.py +0 -0
- {pulse_python_sdk-0.0.62 → pulse_python_sdk-0.0.64}/src/pulse/errors/bad_request_error.py +0 -0
- {pulse_python_sdk-0.0.62 → pulse_python_sdk-0.0.64}/src/pulse/errors/forbidden_error.py +0 -0
- {pulse_python_sdk-0.0.62 → pulse_python_sdk-0.0.64}/src/pulse/errors/internal_server_error.py +0 -0
- {pulse_python_sdk-0.0.62 → pulse_python_sdk-0.0.64}/src/pulse/errors/not_found_error.py +0 -0
- {pulse_python_sdk-0.0.62 → pulse_python_sdk-0.0.64}/src/pulse/errors/too_many_requests_error.py +0 -0
- {pulse_python_sdk-0.0.62 → pulse_python_sdk-0.0.64}/src/pulse/errors/unauthorized_error.py +0 -0
- {pulse_python_sdk-0.0.62 → pulse_python_sdk-0.0.64}/src/pulse/jobs/__init__.py +0 -0
- {pulse_python_sdk-0.0.62 → pulse_python_sdk-0.0.64}/src/pulse/py.typed +0 -0
- {pulse_python_sdk-0.0.62 → pulse_python_sdk-0.0.64}/src/pulse/types/__init__.py +0 -0
- {pulse_python_sdk-0.0.62 → pulse_python_sdk-0.0.64}/src/pulse/types/extract_async_request_experimental_schema.py +0 -0
- {pulse_python_sdk-0.0.62 → pulse_python_sdk-0.0.64}/src/pulse/types/extract_async_request_schema.py +0 -0
- {pulse_python_sdk-0.0.62 → pulse_python_sdk-0.0.64}/src/pulse/types/extract_async_request_storage.py +0 -0
- {pulse_python_sdk-0.0.62 → pulse_python_sdk-0.0.64}/src/pulse/types/extract_input_experimental_schema.py +0 -0
- {pulse_python_sdk-0.0.62 → pulse_python_sdk-0.0.64}/src/pulse/types/extract_input_schema.py +0 -0
- {pulse_python_sdk-0.0.62 → pulse_python_sdk-0.0.64}/src/pulse/types/extract_input_storage.py +0 -0
- {pulse_python_sdk-0.0.62 → pulse_python_sdk-0.0.64}/src/pulse/types/extract_options_experimental_schema.py +0 -0
- {pulse_python_sdk-0.0.62 → pulse_python_sdk-0.0.64}/src/pulse/types/extract_options_schema.py +0 -0
- {pulse_python_sdk-0.0.62 → pulse_python_sdk-0.0.64}/src/pulse/types/extract_options_storage.py +0 -0
- {pulse_python_sdk-0.0.62 → pulse_python_sdk-0.0.64}/src/pulse/types/extract_request_experimental_schema.py +0 -0
- {pulse_python_sdk-0.0.62 → pulse_python_sdk-0.0.64}/src/pulse/types/extract_request_schema.py +0 -0
- {pulse_python_sdk-0.0.62 → pulse_python_sdk-0.0.64}/src/pulse/types/extract_request_storage.py +0 -0
- {pulse_python_sdk-0.0.62 → pulse_python_sdk-0.0.64}/src/pulse/types/extract_source.py +0 -0
- {pulse_python_sdk-0.0.62 → pulse_python_sdk-0.0.64}/src/pulse/types/job_cancellation_response.py +0 -0
- {pulse_python_sdk-0.0.62 → pulse_python_sdk-0.0.64}/src/pulse/types/job_status.py +0 -0
- {pulse_python_sdk-0.0.62 → pulse_python_sdk-0.0.64}/src/pulse/version.py +0 -0
- {pulse_python_sdk-0.0.62 → pulse_python_sdk-0.0.64}/src/pulse/webhooks/__init__.py +0 -0
- {pulse_python_sdk-0.0.62 → pulse_python_sdk-0.0.64}/src/pulse/webhooks/client.py +0 -0
- {pulse_python_sdk-0.0.62 → pulse_python_sdk-0.0.64}/src/pulse/webhooks/raw_client.py +0 -0
- {pulse_python_sdk-0.0.62 → pulse_python_sdk-0.0.64}/src/pulse/webhooks/types/__init__.py +0 -0
- {pulse_python_sdk-0.0.62 → pulse_python_sdk-0.0.64}/src/pulse/webhooks/types/create_webhook_link_response.py +0 -0
|
@@ -12,16 +12,21 @@ from .core.client_wrapper import AsyncClientWrapper, SyncClientWrapper
|
|
|
12
12
|
from .core.request_options import RequestOptions
|
|
13
13
|
from .environment import PulseEnvironment
|
|
14
14
|
from .raw_client import AsyncRawPulse, RawPulse
|
|
15
|
+
from .types.async_submission_response import AsyncSubmissionResponse
|
|
15
16
|
from .types.extract_async_request_experimental_schema import ExtractAsyncRequestExperimentalSchema
|
|
16
17
|
from .types.extract_async_request_schema import ExtractAsyncRequestSchema
|
|
17
18
|
from .types.extract_async_request_storage import ExtractAsyncRequestStorage
|
|
18
19
|
from .types.extract_async_request_structured_output import ExtractAsyncRequestStructuredOutput
|
|
19
|
-
from .types.extract_async_response import ExtractAsyncResponse
|
|
20
20
|
from .types.extract_request_experimental_schema import ExtractRequestExperimentalSchema
|
|
21
21
|
from .types.extract_request_schema import ExtractRequestSchema
|
|
22
22
|
from .types.extract_request_storage import ExtractRequestStorage
|
|
23
23
|
from .types.extract_request_structured_output import ExtractRequestStructuredOutput
|
|
24
24
|
from .types.extract_response import ExtractResponse
|
|
25
|
+
from .types.schema_config import SchemaConfig
|
|
26
|
+
from .types.schema_response import SchemaResponse
|
|
27
|
+
from .types.split_config import SplitConfig
|
|
28
|
+
from .types.split_response import SplitResponse
|
|
29
|
+
from .types.topic_schema_config import TopicSchemaConfig
|
|
25
30
|
|
|
26
31
|
if typing.TYPE_CHECKING:
|
|
27
32
|
from .jobs.client import AsyncJobsClient, JobsClient
|
|
@@ -127,9 +132,12 @@ class Pulse:
|
|
|
127
132
|
pages: typing.Optional[str] = OMIT,
|
|
128
133
|
extract_figure: typing.Optional[bool] = OMIT,
|
|
129
134
|
figure_description: typing.Optional[bool] = OMIT,
|
|
135
|
+
show_images: typing.Optional[bool] = OMIT,
|
|
130
136
|
return_html: typing.Optional[bool] = OMIT,
|
|
137
|
+
effort: typing.Optional[bool] = OMIT,
|
|
131
138
|
thinking: typing.Optional[bool] = OMIT,
|
|
132
139
|
storage: typing.Optional[ExtractRequestStorage] = OMIT,
|
|
140
|
+
async_: typing.Optional[bool] = OMIT,
|
|
133
141
|
request_options: typing.Optional[RequestOptions] = None,
|
|
134
142
|
) -> ExtractResponse:
|
|
135
143
|
"""
|
|
@@ -137,6 +145,13 @@ class Pulse:
|
|
|
137
145
|
file URLs and returns rich markdown content with optional structured data
|
|
138
146
|
extraction based on user-provided schemas and extraction options.
|
|
139
147
|
|
|
148
|
+
Set `async: true` to return immediately with a job_id for polling via
|
|
149
|
+
GET /job/{jobId}. Otherwise processes synchronously.
|
|
150
|
+
|
|
151
|
+
**Note:** Both sync and async modes return HTTP 200. When `async` is true
|
|
152
|
+
the response body contains `{ job_id, status }` instead of the full
|
|
153
|
+
extraction result.
|
|
154
|
+
|
|
140
155
|
Parameters
|
|
141
156
|
----------
|
|
142
157
|
file : typing.Optional[core.File]
|
|
@@ -146,7 +161,7 @@ class Pulse:
|
|
|
146
161
|
Public or pre-signed URL that Pulse will download and extract. Required unless file is provided.
|
|
147
162
|
|
|
148
163
|
structured_output : typing.Optional[ExtractRequestStructuredOutput]
|
|
149
|
-
|
|
164
|
+
**⚠️ DEPRECATED** — Use the `/schema` endpoint after extraction instead. Pass the `extraction_id` from the extract response to `/schema` with your `schema_config`. This parameter still works for backward compatibility but will be removed in a future version.
|
|
150
165
|
|
|
151
166
|
schema : typing.Optional[ExtractRequestSchema]
|
|
152
167
|
(Deprecated) JSON schema describing structured data to extract. Use structuredOutput instead. Accepts either a JSON object or a stringified JSON representation.
|
|
@@ -175,22 +190,33 @@ class Pulse:
|
|
|
175
190
|
figure_description : typing.Optional[bool]
|
|
176
191
|
Toggle to generate descriptive captions for extracted figures.
|
|
177
192
|
|
|
193
|
+
show_images : typing.Optional[bool]
|
|
194
|
+
Embed base64-encoded images inline in figure tags in the output. Increases response size.
|
|
195
|
+
|
|
178
196
|
return_html : typing.Optional[bool]
|
|
179
197
|
Whether to include HTML representation alongside markdown in the response.
|
|
180
198
|
|
|
199
|
+
effort : typing.Optional[bool]
|
|
200
|
+
Enable extended reasoning mode for higher quality extraction on complex documents. Uses a more powerful model at higher latency.
|
|
201
|
+
|
|
181
202
|
thinking : typing.Optional[bool]
|
|
182
203
|
(Deprecated) Enables expanded rationale output for debugging.
|
|
183
204
|
|
|
184
205
|
storage : typing.Optional[ExtractRequestStorage]
|
|
185
206
|
Options for persisting extraction artifacts. When enabled (default), artifacts are saved to storage and a database record is created.
|
|
186
207
|
|
|
208
|
+
async_ : typing.Optional[bool]
|
|
209
|
+
If true, returns immediately with a job_id for polling via GET /job/{jobId}. Otherwise processes synchronously.
|
|
210
|
+
|
|
187
211
|
request_options : typing.Optional[RequestOptions]
|
|
188
212
|
Request-specific configuration.
|
|
189
213
|
|
|
190
214
|
Returns
|
|
191
215
|
-------
|
|
192
216
|
ExtractResponse
|
|
193
|
-
|
|
217
|
+
When `async=false` (default): full extraction result with markdown,
|
|
218
|
+
bounding boxes, chunks, etc.
|
|
219
|
+
When `async=true`: job submission acknowledgement with `job_id`.
|
|
194
220
|
|
|
195
221
|
Examples
|
|
196
222
|
--------
|
|
@@ -214,9 +240,12 @@ class Pulse:
|
|
|
214
240
|
pages=pages,
|
|
215
241
|
extract_figure=extract_figure,
|
|
216
242
|
figure_description=figure_description,
|
|
243
|
+
show_images=show_images,
|
|
217
244
|
return_html=return_html,
|
|
245
|
+
effort=effort,
|
|
218
246
|
thinking=thinking,
|
|
219
247
|
storage=storage,
|
|
248
|
+
async_=async_,
|
|
220
249
|
request_options=request_options,
|
|
221
250
|
)
|
|
222
251
|
return _response.data
|
|
@@ -236,12 +265,17 @@ class Pulse:
|
|
|
236
265
|
pages: typing.Optional[str] = OMIT,
|
|
237
266
|
extract_figure: typing.Optional[bool] = OMIT,
|
|
238
267
|
figure_description: typing.Optional[bool] = OMIT,
|
|
268
|
+
show_images: typing.Optional[bool] = OMIT,
|
|
239
269
|
return_html: typing.Optional[bool] = OMIT,
|
|
270
|
+
effort: typing.Optional[bool] = OMIT,
|
|
240
271
|
thinking: typing.Optional[bool] = OMIT,
|
|
241
272
|
storage: typing.Optional[ExtractAsyncRequestStorage] = OMIT,
|
|
273
|
+
async_: typing.Optional[bool] = OMIT,
|
|
242
274
|
request_options: typing.Optional[RequestOptions] = None,
|
|
243
|
-
) ->
|
|
275
|
+
) -> AsyncSubmissionResponse:
|
|
244
276
|
"""
|
|
277
|
+
**Deprecated**: Use `/extract` with `async: true` instead.
|
|
278
|
+
|
|
245
279
|
Starts an asynchronous extraction job. The request mirrors the
|
|
246
280
|
synchronous options but returns immediately with a job identifier that
|
|
247
281
|
clients can poll for completion status.
|
|
@@ -255,7 +289,7 @@ class Pulse:
|
|
|
255
289
|
Public or pre-signed URL that Pulse will download and extract. Required unless file is provided.
|
|
256
290
|
|
|
257
291
|
structured_output : typing.Optional[ExtractAsyncRequestStructuredOutput]
|
|
258
|
-
|
|
292
|
+
**⚠️ DEPRECATED** — Use the `/schema` endpoint after extraction instead. Pass the `extraction_id` from the extract response to `/schema` with your `schema_config`. This parameter still works for backward compatibility but will be removed in a future version.
|
|
259
293
|
|
|
260
294
|
schema : typing.Optional[ExtractAsyncRequestSchema]
|
|
261
295
|
(Deprecated) JSON schema describing structured data to extract. Use structuredOutput instead. Accepts either a JSON object or a stringified JSON representation.
|
|
@@ -284,21 +318,30 @@ class Pulse:
|
|
|
284
318
|
figure_description : typing.Optional[bool]
|
|
285
319
|
Toggle to generate descriptive captions for extracted figures.
|
|
286
320
|
|
|
321
|
+
show_images : typing.Optional[bool]
|
|
322
|
+
Embed base64-encoded images inline in figure tags in the output. Increases response size.
|
|
323
|
+
|
|
287
324
|
return_html : typing.Optional[bool]
|
|
288
325
|
Whether to include HTML representation alongside markdown in the response.
|
|
289
326
|
|
|
327
|
+
effort : typing.Optional[bool]
|
|
328
|
+
Enable extended reasoning mode for higher quality extraction on complex documents. Uses a more powerful model at higher latency.
|
|
329
|
+
|
|
290
330
|
thinking : typing.Optional[bool]
|
|
291
331
|
(Deprecated) Enables expanded rationale output for debugging.
|
|
292
332
|
|
|
293
333
|
storage : typing.Optional[ExtractAsyncRequestStorage]
|
|
294
334
|
Options for persisting extraction artifacts. When enabled (default), artifacts are saved to storage and a database record is created.
|
|
295
335
|
|
|
336
|
+
async_ : typing.Optional[bool]
|
|
337
|
+
If true, returns immediately with a job_id for polling via GET /job/{jobId}. Otherwise processes synchronously.
|
|
338
|
+
|
|
296
339
|
request_options : typing.Optional[RequestOptions]
|
|
297
340
|
Request-specific configuration.
|
|
298
341
|
|
|
299
342
|
Returns
|
|
300
343
|
-------
|
|
301
|
-
|
|
344
|
+
AsyncSubmissionResponse
|
|
302
345
|
Asynchronous extraction job accepted
|
|
303
346
|
|
|
304
347
|
Examples
|
|
@@ -323,9 +366,147 @@ class Pulse:
|
|
|
323
366
|
pages=pages,
|
|
324
367
|
extract_figure=extract_figure,
|
|
325
368
|
figure_description=figure_description,
|
|
369
|
+
show_images=show_images,
|
|
326
370
|
return_html=return_html,
|
|
371
|
+
effort=effort,
|
|
327
372
|
thinking=thinking,
|
|
328
373
|
storage=storage,
|
|
374
|
+
async_=async_,
|
|
375
|
+
request_options=request_options,
|
|
376
|
+
)
|
|
377
|
+
return _response.data
|
|
378
|
+
|
|
379
|
+
def split(
|
|
380
|
+
self,
|
|
381
|
+
*,
|
|
382
|
+
extraction_id: str,
|
|
383
|
+
split_config: typing.Optional[SplitConfig] = OMIT,
|
|
384
|
+
split_config_id: typing.Optional[str] = OMIT,
|
|
385
|
+
async_: typing.Optional[bool] = OMIT,
|
|
386
|
+
request_options: typing.Optional[RequestOptions] = None,
|
|
387
|
+
) -> SplitResponse:
|
|
388
|
+
"""
|
|
389
|
+
Identify which pages of a document contain each topic/section.
|
|
390
|
+
Takes an existing extraction and a list of topics, then uses AI to
|
|
391
|
+
identify which PDF pages contain content related to each topic.
|
|
392
|
+
|
|
393
|
+
The result is persisted with a `split_id` that can be used with
|
|
394
|
+
the `/schema` endpoint (split mode) for targeted schema extraction on
|
|
395
|
+
specific page groups.
|
|
396
|
+
|
|
397
|
+
Set `async: true` to return immediately with a job_id for polling.
|
|
398
|
+
|
|
399
|
+
Parameters
|
|
400
|
+
----------
|
|
401
|
+
extraction_id : str
|
|
402
|
+
ID of the saved extraction to split.
|
|
403
|
+
|
|
404
|
+
split_config : typing.Optional[SplitConfig]
|
|
405
|
+
Inline split configuration with topics. Required if split_config_id is not provided.
|
|
406
|
+
|
|
407
|
+
split_config_id : typing.Optional[str]
|
|
408
|
+
Reference to a saved split configuration. Use this instead of providing split_config inline.
|
|
409
|
+
|
|
410
|
+
async_ : typing.Optional[bool]
|
|
411
|
+
If true, returns immediately with a job_id for polling via GET /job/{jobId}. Otherwise processes synchronously.
|
|
412
|
+
|
|
413
|
+
request_options : typing.Optional[RequestOptions]
|
|
414
|
+
Request-specific configuration.
|
|
415
|
+
|
|
416
|
+
Returns
|
|
417
|
+
-------
|
|
418
|
+
SplitResponse
|
|
419
|
+
Split result with page assignments (when async=false or omitted)
|
|
420
|
+
|
|
421
|
+
Examples
|
|
422
|
+
--------
|
|
423
|
+
from pulse import Pulse
|
|
424
|
+
|
|
425
|
+
client = Pulse(
|
|
426
|
+
api_key="YOUR_API_KEY",
|
|
427
|
+
)
|
|
428
|
+
client.split(
|
|
429
|
+
extraction_id="extraction_id",
|
|
430
|
+
)
|
|
431
|
+
"""
|
|
432
|
+
_response = self._raw_client.split(
|
|
433
|
+
extraction_id=extraction_id,
|
|
434
|
+
split_config=split_config,
|
|
435
|
+
split_config_id=split_config_id,
|
|
436
|
+
async_=async_,
|
|
437
|
+
request_options=request_options,
|
|
438
|
+
)
|
|
439
|
+
return _response.data
|
|
440
|
+
|
|
441
|
+
def schema(
|
|
442
|
+
self,
|
|
443
|
+
*,
|
|
444
|
+
extraction_id: typing.Optional[str] = OMIT,
|
|
445
|
+
split_id: typing.Optional[str] = OMIT,
|
|
446
|
+
schema_config: typing.Optional[SchemaConfig] = OMIT,
|
|
447
|
+
schema_config_id: typing.Optional[str] = OMIT,
|
|
448
|
+
split_schema_config: typing.Optional[typing.Dict[str, TopicSchemaConfig]] = OMIT,
|
|
449
|
+
async_: typing.Optional[bool] = OMIT,
|
|
450
|
+
request_options: typing.Optional[RequestOptions] = None,
|
|
451
|
+
) -> SchemaResponse:
|
|
452
|
+
"""
|
|
453
|
+
Apply schema extraction to a previously saved extraction. The mode is
|
|
454
|
+
inferred from the input:
|
|
455
|
+
|
|
456
|
+
**Single mode** — Provide `extraction_id` + `schema_config` (or
|
|
457
|
+
`schema_config_id`) to apply one schema to the entire document.
|
|
458
|
+
|
|
459
|
+
**Split mode** — Provide `split_id` + `split_schema_config` to apply
|
|
460
|
+
different schemas to different page groups from a prior `/split` call.
|
|
461
|
+
Each topic can have its own schema, prompt, and effort setting.
|
|
462
|
+
|
|
463
|
+
Creates a versioned schema record that can be retrieved later.
|
|
464
|
+
Set `async: true` to return immediately with a job_id for polling.
|
|
465
|
+
|
|
466
|
+
Parameters
|
|
467
|
+
----------
|
|
468
|
+
extraction_id : typing.Optional[str]
|
|
469
|
+
ID of saved extraction to apply the schema to. Use for single-mode schema extraction.
|
|
470
|
+
|
|
471
|
+
split_id : typing.Optional[str]
|
|
472
|
+
ID of saved split (from a prior `/split` call). Use for split-mode schema extraction.
|
|
473
|
+
|
|
474
|
+
schema_config : typing.Optional[SchemaConfig]
|
|
475
|
+
Inline schema configuration for single mode. Required (with extraction_id) if schema_config_id is not provided.
|
|
476
|
+
|
|
477
|
+
schema_config_id : typing.Optional[str]
|
|
478
|
+
Reference to a saved schema configuration for single mode. Use this instead of providing schema_config inline.
|
|
479
|
+
|
|
480
|
+
split_schema_config : typing.Optional[typing.Dict[str, TopicSchemaConfig]]
|
|
481
|
+
Per-topic schema configurations for split mode. Keys must match the topic names from the split. Each topic provides either inline schema or schema_config_id.
|
|
482
|
+
|
|
483
|
+
async_ : typing.Optional[bool]
|
|
484
|
+
If true, returns immediately with a job_id for polling via GET /job/{jobId}. Otherwise processes synchronously.
|
|
485
|
+
|
|
486
|
+
request_options : typing.Optional[RequestOptions]
|
|
487
|
+
Request-specific configuration.
|
|
488
|
+
|
|
489
|
+
Returns
|
|
490
|
+
-------
|
|
491
|
+
SchemaResponse
|
|
492
|
+
Schema extraction result (when async=false or omitted). Shape depends on the mode used.
|
|
493
|
+
|
|
494
|
+
Examples
|
|
495
|
+
--------
|
|
496
|
+
from pulse import Pulse
|
|
497
|
+
|
|
498
|
+
client = Pulse(
|
|
499
|
+
api_key="YOUR_API_KEY",
|
|
500
|
+
)
|
|
501
|
+
client.schema()
|
|
502
|
+
"""
|
|
503
|
+
_response = self._raw_client.schema(
|
|
504
|
+
extraction_id=extraction_id,
|
|
505
|
+
split_id=split_id,
|
|
506
|
+
schema_config=schema_config,
|
|
507
|
+
schema_config_id=schema_config_id,
|
|
508
|
+
split_schema_config=split_schema_config,
|
|
509
|
+
async_=async_,
|
|
329
510
|
request_options=request_options,
|
|
330
511
|
)
|
|
331
512
|
return _response.data
|
|
@@ -444,9 +625,12 @@ class AsyncPulse:
|
|
|
444
625
|
pages: typing.Optional[str] = OMIT,
|
|
445
626
|
extract_figure: typing.Optional[bool] = OMIT,
|
|
446
627
|
figure_description: typing.Optional[bool] = OMIT,
|
|
628
|
+
show_images: typing.Optional[bool] = OMIT,
|
|
447
629
|
return_html: typing.Optional[bool] = OMIT,
|
|
630
|
+
effort: typing.Optional[bool] = OMIT,
|
|
448
631
|
thinking: typing.Optional[bool] = OMIT,
|
|
449
632
|
storage: typing.Optional[ExtractRequestStorage] = OMIT,
|
|
633
|
+
async_: typing.Optional[bool] = OMIT,
|
|
450
634
|
request_options: typing.Optional[RequestOptions] = None,
|
|
451
635
|
) -> ExtractResponse:
|
|
452
636
|
"""
|
|
@@ -454,6 +638,13 @@ class AsyncPulse:
|
|
|
454
638
|
file URLs and returns rich markdown content with optional structured data
|
|
455
639
|
extraction based on user-provided schemas and extraction options.
|
|
456
640
|
|
|
641
|
+
Set `async: true` to return immediately with a job_id for polling via
|
|
642
|
+
GET /job/{jobId}. Otherwise processes synchronously.
|
|
643
|
+
|
|
644
|
+
**Note:** Both sync and async modes return HTTP 200. When `async` is true
|
|
645
|
+
the response body contains `{ job_id, status }` instead of the full
|
|
646
|
+
extraction result.
|
|
647
|
+
|
|
457
648
|
Parameters
|
|
458
649
|
----------
|
|
459
650
|
file : typing.Optional[core.File]
|
|
@@ -463,7 +654,7 @@ class AsyncPulse:
|
|
|
463
654
|
Public or pre-signed URL that Pulse will download and extract. Required unless file is provided.
|
|
464
655
|
|
|
465
656
|
structured_output : typing.Optional[ExtractRequestStructuredOutput]
|
|
466
|
-
|
|
657
|
+
**⚠️ DEPRECATED** — Use the `/schema` endpoint after extraction instead. Pass the `extraction_id` from the extract response to `/schema` with your `schema_config`. This parameter still works for backward compatibility but will be removed in a future version.
|
|
467
658
|
|
|
468
659
|
schema : typing.Optional[ExtractRequestSchema]
|
|
469
660
|
(Deprecated) JSON schema describing structured data to extract. Use structuredOutput instead. Accepts either a JSON object or a stringified JSON representation.
|
|
@@ -492,22 +683,33 @@ class AsyncPulse:
|
|
|
492
683
|
figure_description : typing.Optional[bool]
|
|
493
684
|
Toggle to generate descriptive captions for extracted figures.
|
|
494
685
|
|
|
686
|
+
show_images : typing.Optional[bool]
|
|
687
|
+
Embed base64-encoded images inline in figure tags in the output. Increases response size.
|
|
688
|
+
|
|
495
689
|
return_html : typing.Optional[bool]
|
|
496
690
|
Whether to include HTML representation alongside markdown in the response.
|
|
497
691
|
|
|
692
|
+
effort : typing.Optional[bool]
|
|
693
|
+
Enable extended reasoning mode for higher quality extraction on complex documents. Uses a more powerful model at higher latency.
|
|
694
|
+
|
|
498
695
|
thinking : typing.Optional[bool]
|
|
499
696
|
(Deprecated) Enables expanded rationale output for debugging.
|
|
500
697
|
|
|
501
698
|
storage : typing.Optional[ExtractRequestStorage]
|
|
502
699
|
Options for persisting extraction artifacts. When enabled (default), artifacts are saved to storage and a database record is created.
|
|
503
700
|
|
|
701
|
+
async_ : typing.Optional[bool]
|
|
702
|
+
If true, returns immediately with a job_id for polling via GET /job/{jobId}. Otherwise processes synchronously.
|
|
703
|
+
|
|
504
704
|
request_options : typing.Optional[RequestOptions]
|
|
505
705
|
Request-specific configuration.
|
|
506
706
|
|
|
507
707
|
Returns
|
|
508
708
|
-------
|
|
509
709
|
ExtractResponse
|
|
510
|
-
|
|
710
|
+
When `async=false` (default): full extraction result with markdown,
|
|
711
|
+
bounding boxes, chunks, etc.
|
|
712
|
+
When `async=true`: job submission acknowledgement with `job_id`.
|
|
511
713
|
|
|
512
714
|
Examples
|
|
513
715
|
--------
|
|
@@ -539,9 +741,12 @@ class AsyncPulse:
|
|
|
539
741
|
pages=pages,
|
|
540
742
|
extract_figure=extract_figure,
|
|
541
743
|
figure_description=figure_description,
|
|
744
|
+
show_images=show_images,
|
|
542
745
|
return_html=return_html,
|
|
746
|
+
effort=effort,
|
|
543
747
|
thinking=thinking,
|
|
544
748
|
storage=storage,
|
|
749
|
+
async_=async_,
|
|
545
750
|
request_options=request_options,
|
|
546
751
|
)
|
|
547
752
|
return _response.data
|
|
@@ -561,12 +766,17 @@ class AsyncPulse:
|
|
|
561
766
|
pages: typing.Optional[str] = OMIT,
|
|
562
767
|
extract_figure: typing.Optional[bool] = OMIT,
|
|
563
768
|
figure_description: typing.Optional[bool] = OMIT,
|
|
769
|
+
show_images: typing.Optional[bool] = OMIT,
|
|
564
770
|
return_html: typing.Optional[bool] = OMIT,
|
|
771
|
+
effort: typing.Optional[bool] = OMIT,
|
|
565
772
|
thinking: typing.Optional[bool] = OMIT,
|
|
566
773
|
storage: typing.Optional[ExtractAsyncRequestStorage] = OMIT,
|
|
774
|
+
async_: typing.Optional[bool] = OMIT,
|
|
567
775
|
request_options: typing.Optional[RequestOptions] = None,
|
|
568
|
-
) ->
|
|
776
|
+
) -> AsyncSubmissionResponse:
|
|
569
777
|
"""
|
|
778
|
+
**Deprecated**: Use `/extract` with `async: true` instead.
|
|
779
|
+
|
|
570
780
|
Starts an asynchronous extraction job. The request mirrors the
|
|
571
781
|
synchronous options but returns immediately with a job identifier that
|
|
572
782
|
clients can poll for completion status.
|
|
@@ -580,7 +790,7 @@ class AsyncPulse:
|
|
|
580
790
|
Public or pre-signed URL that Pulse will download and extract. Required unless file is provided.
|
|
581
791
|
|
|
582
792
|
structured_output : typing.Optional[ExtractAsyncRequestStructuredOutput]
|
|
583
|
-
|
|
793
|
+
**⚠️ DEPRECATED** — Use the `/schema` endpoint after extraction instead. Pass the `extraction_id` from the extract response to `/schema` with your `schema_config`. This parameter still works for backward compatibility but will be removed in a future version.
|
|
584
794
|
|
|
585
795
|
schema : typing.Optional[ExtractAsyncRequestSchema]
|
|
586
796
|
(Deprecated) JSON schema describing structured data to extract. Use structuredOutput instead. Accepts either a JSON object or a stringified JSON representation.
|
|
@@ -609,21 +819,30 @@ class AsyncPulse:
|
|
|
609
819
|
figure_description : typing.Optional[bool]
|
|
610
820
|
Toggle to generate descriptive captions for extracted figures.
|
|
611
821
|
|
|
822
|
+
show_images : typing.Optional[bool]
|
|
823
|
+
Embed base64-encoded images inline in figure tags in the output. Increases response size.
|
|
824
|
+
|
|
612
825
|
return_html : typing.Optional[bool]
|
|
613
826
|
Whether to include HTML representation alongside markdown in the response.
|
|
614
827
|
|
|
828
|
+
effort : typing.Optional[bool]
|
|
829
|
+
Enable extended reasoning mode for higher quality extraction on complex documents. Uses a more powerful model at higher latency.
|
|
830
|
+
|
|
615
831
|
thinking : typing.Optional[bool]
|
|
616
832
|
(Deprecated) Enables expanded rationale output for debugging.
|
|
617
833
|
|
|
618
834
|
storage : typing.Optional[ExtractAsyncRequestStorage]
|
|
619
835
|
Options for persisting extraction artifacts. When enabled (default), artifacts are saved to storage and a database record is created.
|
|
620
836
|
|
|
837
|
+
async_ : typing.Optional[bool]
|
|
838
|
+
If true, returns immediately with a job_id for polling via GET /job/{jobId}. Otherwise processes synchronously.
|
|
839
|
+
|
|
621
840
|
request_options : typing.Optional[RequestOptions]
|
|
622
841
|
Request-specific configuration.
|
|
623
842
|
|
|
624
843
|
Returns
|
|
625
844
|
-------
|
|
626
|
-
|
|
845
|
+
AsyncSubmissionResponse
|
|
627
846
|
Asynchronous extraction job accepted
|
|
628
847
|
|
|
629
848
|
Examples
|
|
@@ -656,9 +875,163 @@ class AsyncPulse:
|
|
|
656
875
|
pages=pages,
|
|
657
876
|
extract_figure=extract_figure,
|
|
658
877
|
figure_description=figure_description,
|
|
878
|
+
show_images=show_images,
|
|
659
879
|
return_html=return_html,
|
|
880
|
+
effort=effort,
|
|
660
881
|
thinking=thinking,
|
|
661
882
|
storage=storage,
|
|
883
|
+
async_=async_,
|
|
884
|
+
request_options=request_options,
|
|
885
|
+
)
|
|
886
|
+
return _response.data
|
|
887
|
+
|
|
888
|
+
async def split(
|
|
889
|
+
self,
|
|
890
|
+
*,
|
|
891
|
+
extraction_id: str,
|
|
892
|
+
split_config: typing.Optional[SplitConfig] = OMIT,
|
|
893
|
+
split_config_id: typing.Optional[str] = OMIT,
|
|
894
|
+
async_: typing.Optional[bool] = OMIT,
|
|
895
|
+
request_options: typing.Optional[RequestOptions] = None,
|
|
896
|
+
) -> SplitResponse:
|
|
897
|
+
"""
|
|
898
|
+
Identify which pages of a document contain each topic/section.
|
|
899
|
+
Takes an existing extraction and a list of topics, then uses AI to
|
|
900
|
+
identify which PDF pages contain content related to each topic.
|
|
901
|
+
|
|
902
|
+
The result is persisted with a `split_id` that can be used with
|
|
903
|
+
the `/schema` endpoint (split mode) for targeted schema extraction on
|
|
904
|
+
specific page groups.
|
|
905
|
+
|
|
906
|
+
Set `async: true` to return immediately with a job_id for polling.
|
|
907
|
+
|
|
908
|
+
Parameters
|
|
909
|
+
----------
|
|
910
|
+
extraction_id : str
|
|
911
|
+
ID of the saved extraction to split.
|
|
912
|
+
|
|
913
|
+
split_config : typing.Optional[SplitConfig]
|
|
914
|
+
Inline split configuration with topics. Required if split_config_id is not provided.
|
|
915
|
+
|
|
916
|
+
split_config_id : typing.Optional[str]
|
|
917
|
+
Reference to a saved split configuration. Use this instead of providing split_config inline.
|
|
918
|
+
|
|
919
|
+
async_ : typing.Optional[bool]
|
|
920
|
+
If true, returns immediately with a job_id for polling via GET /job/{jobId}. Otherwise processes synchronously.
|
|
921
|
+
|
|
922
|
+
request_options : typing.Optional[RequestOptions]
|
|
923
|
+
Request-specific configuration.
|
|
924
|
+
|
|
925
|
+
Returns
|
|
926
|
+
-------
|
|
927
|
+
SplitResponse
|
|
928
|
+
Split result with page assignments (when async=false or omitted)
|
|
929
|
+
|
|
930
|
+
Examples
|
|
931
|
+
--------
|
|
932
|
+
import asyncio
|
|
933
|
+
|
|
934
|
+
from pulse import AsyncPulse
|
|
935
|
+
|
|
936
|
+
client = AsyncPulse(
|
|
937
|
+
api_key="YOUR_API_KEY",
|
|
938
|
+
)
|
|
939
|
+
|
|
940
|
+
|
|
941
|
+
async def main() -> None:
|
|
942
|
+
await client.split(
|
|
943
|
+
extraction_id="extraction_id",
|
|
944
|
+
)
|
|
945
|
+
|
|
946
|
+
|
|
947
|
+
asyncio.run(main())
|
|
948
|
+
"""
|
|
949
|
+
_response = await self._raw_client.split(
|
|
950
|
+
extraction_id=extraction_id,
|
|
951
|
+
split_config=split_config,
|
|
952
|
+
split_config_id=split_config_id,
|
|
953
|
+
async_=async_,
|
|
954
|
+
request_options=request_options,
|
|
955
|
+
)
|
|
956
|
+
return _response.data
|
|
957
|
+
|
|
958
|
+
async def schema(
|
|
959
|
+
self,
|
|
960
|
+
*,
|
|
961
|
+
extraction_id: typing.Optional[str] = OMIT,
|
|
962
|
+
split_id: typing.Optional[str] = OMIT,
|
|
963
|
+
schema_config: typing.Optional[SchemaConfig] = OMIT,
|
|
964
|
+
schema_config_id: typing.Optional[str] = OMIT,
|
|
965
|
+
split_schema_config: typing.Optional[typing.Dict[str, TopicSchemaConfig]] = OMIT,
|
|
966
|
+
async_: typing.Optional[bool] = OMIT,
|
|
967
|
+
request_options: typing.Optional[RequestOptions] = None,
|
|
968
|
+
) -> SchemaResponse:
|
|
969
|
+
"""
|
|
970
|
+
Apply schema extraction to a previously saved extraction. The mode is
|
|
971
|
+
inferred from the input:
|
|
972
|
+
|
|
973
|
+
**Single mode** — Provide `extraction_id` + `schema_config` (or
|
|
974
|
+
`schema_config_id`) to apply one schema to the entire document.
|
|
975
|
+
|
|
976
|
+
**Split mode** — Provide `split_id` + `split_schema_config` to apply
|
|
977
|
+
different schemas to different page groups from a prior `/split` call.
|
|
978
|
+
Each topic can have its own schema, prompt, and effort setting.
|
|
979
|
+
|
|
980
|
+
Creates a versioned schema record that can be retrieved later.
|
|
981
|
+
Set `async: true` to return immediately with a job_id for polling.
|
|
982
|
+
|
|
983
|
+
Parameters
|
|
984
|
+
----------
|
|
985
|
+
extraction_id : typing.Optional[str]
|
|
986
|
+
ID of saved extraction to apply the schema to. Use for single-mode schema extraction.
|
|
987
|
+
|
|
988
|
+
split_id : typing.Optional[str]
|
|
989
|
+
ID of saved split (from a prior `/split` call). Use for split-mode schema extraction.
|
|
990
|
+
|
|
991
|
+
schema_config : typing.Optional[SchemaConfig]
|
|
992
|
+
Inline schema configuration for single mode. Required (with extraction_id) if schema_config_id is not provided.
|
|
993
|
+
|
|
994
|
+
schema_config_id : typing.Optional[str]
|
|
995
|
+
Reference to a saved schema configuration for single mode. Use this instead of providing schema_config inline.
|
|
996
|
+
|
|
997
|
+
split_schema_config : typing.Optional[typing.Dict[str, TopicSchemaConfig]]
|
|
998
|
+
Per-topic schema configurations for split mode. Keys must match the topic names from the split. Each topic provides either inline schema or schema_config_id.
|
|
999
|
+
|
|
1000
|
+
async_ : typing.Optional[bool]
|
|
1001
|
+
If true, returns immediately with a job_id for polling via GET /job/{jobId}. Otherwise processes synchronously.
|
|
1002
|
+
|
|
1003
|
+
request_options : typing.Optional[RequestOptions]
|
|
1004
|
+
Request-specific configuration.
|
|
1005
|
+
|
|
1006
|
+
Returns
|
|
1007
|
+
-------
|
|
1008
|
+
SchemaResponse
|
|
1009
|
+
Schema extraction result (when async=false or omitted). Shape depends on the mode used.
|
|
1010
|
+
|
|
1011
|
+
Examples
|
|
1012
|
+
--------
|
|
1013
|
+
import asyncio
|
|
1014
|
+
|
|
1015
|
+
from pulse import AsyncPulse
|
|
1016
|
+
|
|
1017
|
+
client = AsyncPulse(
|
|
1018
|
+
api_key="YOUR_API_KEY",
|
|
1019
|
+
)
|
|
1020
|
+
|
|
1021
|
+
|
|
1022
|
+
async def main() -> None:
|
|
1023
|
+
await client.schema()
|
|
1024
|
+
|
|
1025
|
+
|
|
1026
|
+
asyncio.run(main())
|
|
1027
|
+
"""
|
|
1028
|
+
_response = await self._raw_client.schema(
|
|
1029
|
+
extraction_id=extraction_id,
|
|
1030
|
+
split_id=split_id,
|
|
1031
|
+
schema_config=schema_config,
|
|
1032
|
+
schema_config_id=schema_config_id,
|
|
1033
|
+
split_schema_config=split_schema_config,
|
|
1034
|
+
async_=async_,
|
|
662
1035
|
request_options=request_options,
|
|
663
1036
|
)
|
|
664
1037
|
return _response.data
|
|
@@ -22,10 +22,10 @@ class BaseClientWrapper:
|
|
|
22
22
|
|
|
23
23
|
def get_headers(self) -> typing.Dict[str, str]:
|
|
24
24
|
headers: typing.Dict[str, str] = {
|
|
25
|
-
"User-Agent": "pulse-python-sdk/0.0.
|
|
25
|
+
"User-Agent": "pulse-python-sdk/0.0.64",
|
|
26
26
|
"X-Fern-Language": "Python",
|
|
27
27
|
"X-Fern-SDK-Name": "pulse-python-sdk",
|
|
28
|
-
"X-Fern-SDK-Version": "0.0.
|
|
28
|
+
"X-Fern-SDK-Version": "0.0.64",
|
|
29
29
|
**(self.get_custom_headers() or {}),
|
|
30
30
|
}
|
|
31
31
|
headers["x-api-key"] = self.api_key
|