datalab-python-sdk 0.2.0__tar.gz → 0.2.2__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {datalab_python_sdk-0.2.0 → datalab_python_sdk-0.2.2}/PKG-INFO +3 -5
- {datalab_python_sdk-0.2.0 → datalab_python_sdk-0.2.2}/README.md +1 -4
- {datalab_python_sdk-0.2.0 → datalab_python_sdk-0.2.2}/datalab_sdk/cli.py +34 -19
- {datalab_python_sdk-0.2.0 → datalab_python_sdk-0.2.2}/datalab_sdk/client.py +32 -18
- {datalab_python_sdk-0.2.0 → datalab_python_sdk-0.2.2}/datalab_sdk/models.py +8 -5
- {datalab_python_sdk-0.2.0 → datalab_python_sdk-0.2.2}/pyproject.toml +2 -1
- {datalab_python_sdk-0.2.0 → datalab_python_sdk-0.2.2}/tests/test_client_methods.py +3 -3
- {datalab_python_sdk-0.2.0 → datalab_python_sdk-0.2.2}/uv.lock +15 -1
- {datalab_python_sdk-0.2.0 → datalab_python_sdk-0.2.2}/.github/workflows/ci.yml +0 -0
- {datalab_python_sdk-0.2.0 → datalab_python_sdk-0.2.2}/.github/workflows/publish.yml +0 -0
- {datalab_python_sdk-0.2.0 → datalab_python_sdk-0.2.2}/.gitignore +0 -0
- {datalab_python_sdk-0.2.0 → datalab_python_sdk-0.2.2}/.pre-commit-config.yaml +0 -0
- {datalab_python_sdk-0.2.0 → datalab_python_sdk-0.2.2}/.python-version +0 -0
- {datalab_python_sdk-0.2.0 → datalab_python_sdk-0.2.2}/LICENSE +0 -0
- {datalab_python_sdk-0.2.0 → datalab_python_sdk-0.2.2}/data/08-Lambda-Calculus.pptx +0 -0
- {datalab_python_sdk-0.2.0 → datalab_python_sdk-0.2.2}/data/adversarial.pdf +0 -0
- {datalab_python_sdk-0.2.0 → datalab_python_sdk-0.2.2}/data/bid_evaluation.docx +0 -0
- {datalab_python_sdk-0.2.0 → datalab_python_sdk-0.2.2}/data/book_review.ppt +0 -0
- {datalab_python_sdk-0.2.0 → datalab_python_sdk-0.2.2}/data/book_store.xls +0 -0
- {datalab_python_sdk-0.2.0 → datalab_python_sdk-0.2.2}/data/chi_hind.png +0 -0
- {datalab_python_sdk-0.2.0 → datalab_python_sdk-0.2.2}/data/how_to_read.doc +0 -0
- {datalab_python_sdk-0.2.0 → datalab_python_sdk-0.2.2}/data/normandy.epub +0 -0
- {datalab_python_sdk-0.2.0 → datalab_python_sdk-0.2.2}/data/sample-1-sheet.xlsx +0 -0
- {datalab_python_sdk-0.2.0 → datalab_python_sdk-0.2.2}/data/thinkpython.pdf +0 -0
- {datalab_python_sdk-0.2.0 → datalab_python_sdk-0.2.2}/data/vibe.html +0 -0
- {datalab_python_sdk-0.2.0 → datalab_python_sdk-0.2.2}/datalab_sdk/__init__.py +0 -0
- {datalab_python_sdk-0.2.0 → datalab_python_sdk-0.2.2}/datalab_sdk/exceptions.py +0 -0
- {datalab_python_sdk-0.2.0 → datalab_python_sdk-0.2.2}/datalab_sdk/mimetypes.py +0 -0
- {datalab_python_sdk-0.2.0 → datalab_python_sdk-0.2.2}/datalab_sdk/settings.py +0 -0
- {datalab_python_sdk-0.2.0 → datalab_python_sdk-0.2.2}/integration/README.md +0 -0
- {datalab_python_sdk-0.2.0 → datalab_python_sdk-0.2.2}/integration/__init__.py +0 -0
- {datalab_python_sdk-0.2.0 → datalab_python_sdk-0.2.2}/integration/test_live_api.py +0 -0
- {datalab_python_sdk-0.2.0 → datalab_python_sdk-0.2.2}/integration/test_readme_examples.py +0 -0
- {datalab_python_sdk-0.2.0 → datalab_python_sdk-0.2.2}/poetry.lock +0 -0
- {datalab_python_sdk-0.2.0 → datalab_python_sdk-0.2.2}/pytest.ini +0 -0
- {datalab_python_sdk-0.2.0 → datalab_python_sdk-0.2.2}/recipes/workflows/README.md +0 -0
- {datalab_python_sdk-0.2.0 → datalab_python_sdk-0.2.2}/recipes/workflows/end_to_end_workflow.py +0 -0
- {datalab_python_sdk-0.2.0 → datalab_python_sdk-0.2.2}/recipes/workflows/workflow_api_tutorial/1_get_step_types.py +0 -0
- {datalab_python_sdk-0.2.0 → datalab_python_sdk-0.2.2}/recipes/workflows/workflow_api_tutorial/2_get_workflows.py +0 -0
- {datalab_python_sdk-0.2.0 → datalab_python_sdk-0.2.2}/recipes/workflows/workflow_api_tutorial/3_create_workflow.py +0 -0
- {datalab_python_sdk-0.2.0 → datalab_python_sdk-0.2.2}/recipes/workflows/workflow_api_tutorial/4_execute_workflow.py +0 -0
- {datalab_python_sdk-0.2.0 → datalab_python_sdk-0.2.2}/recipes/workflows/workflow_api_tutorial/5_poll_workflow.py +0 -0
- {datalab_python_sdk-0.2.0 → datalab_python_sdk-0.2.2}/recipes/workflows/workflow_api_tutorial/README.md +0 -0
- {datalab_python_sdk-0.2.0 → datalab_python_sdk-0.2.2}/recipes/workflows/workflow_definitions/README.md +0 -0
- {datalab_python_sdk-0.2.0 → datalab_python_sdk-0.2.2}/recipes/workflows/workflow_definitions/eval_segmentation.json +0 -0
- {datalab_python_sdk-0.2.0 → datalab_python_sdk-0.2.2}/recipes/workflows/workflow_definitions/parse_segment.json +0 -0
- {datalab_python_sdk-0.2.0 → datalab_python_sdk-0.2.2}/recipes/workflows/workflow_definitions/segment_parallel_extract.json +0 -0
- {datalab_python_sdk-0.2.0 → datalab_python_sdk-0.2.2}/recipes/workflows/workflow_definitions/slack_alert.json +0 -0
- {datalab_python_sdk-0.2.0 → datalab_python_sdk-0.2.2}/tests/__init__.py +0 -0
- {datalab_python_sdk-0.2.0 → datalab_python_sdk-0.2.2}/tests/conftest.py +0 -0
- {datalab_python_sdk-0.2.0 → datalab_python_sdk-0.2.2}/tests/test_cli_simple.py +0 -0
- {datalab_python_sdk-0.2.0 → datalab_python_sdk-0.2.2}/tests/test_workflows.py +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: datalab-python-sdk
|
|
3
|
-
Version: 0.2.
|
|
3
|
+
Version: 0.2.2
|
|
4
4
|
Summary: SDK for the Datalab document intelligence API
|
|
5
5
|
Author-email: Datalab Team <hi@datalab.to>
|
|
6
6
|
License-Expression: MIT
|
|
@@ -12,6 +12,7 @@ Requires-Dist: click>=8.2.1
|
|
|
12
12
|
Requires-Dist: pydantic-settings>=2.10.1
|
|
13
13
|
Requires-Dist: pydantic>=2.11.7
|
|
14
14
|
Requires-Dist: tenacity>=8.2.3
|
|
15
|
+
Requires-Dist: tqdm>=4.66.0
|
|
15
16
|
Description-Content-Type: text/markdown
|
|
16
17
|
|
|
17
18
|
# Datalab SDK
|
|
@@ -54,7 +55,7 @@ Workflows allow you to chain multiple document processing steps together. Each w
|
|
|
54
55
|
|
|
55
56
|
**Note:** All workflow operations require authentication. Make sure you have set your `DATALAB_API_KEY` environment variable or pass `api_key` when creating the client (see [Authentication](#authentication) section above).
|
|
56
57
|
|
|
57
|
-
For more Workflow tips, see our [
|
|
58
|
+
For more Workflow tips, see our [documentation](https://documentation.datalab.to/docs/recipes/workflows/workflow-concepts).
|
|
58
59
|
|
|
59
60
|
## CLI Usage
|
|
60
61
|
|
|
@@ -64,9 +65,6 @@ The SDK includes a command-line interface:
|
|
|
64
65
|
# Convert document to markdown
|
|
65
66
|
datalab convert document.pdf
|
|
66
67
|
|
|
67
|
-
# OCR with JSON output
|
|
68
|
-
datalab ocr document.pdf --output-format json
|
|
69
|
-
|
|
70
68
|
# Workflow commands
|
|
71
69
|
datalab create-workflow --help
|
|
72
70
|
datalab execute-workflow --help
|
|
@@ -38,7 +38,7 @@ Workflows allow you to chain multiple document processing steps together. Each w
|
|
|
38
38
|
|
|
39
39
|
**Note:** All workflow operations require authentication. Make sure you have set your `DATALAB_API_KEY` environment variable or pass `api_key` when creating the client (see [Authentication](#authentication) section above).
|
|
40
40
|
|
|
41
|
-
For more Workflow tips, see our [
|
|
41
|
+
For more Workflow tips, see our [documentation](https://documentation.datalab.to/docs/recipes/workflows/workflow-concepts).
|
|
42
42
|
|
|
43
43
|
## CLI Usage
|
|
44
44
|
|
|
@@ -48,9 +48,6 @@ The SDK includes a command-line interface:
|
|
|
48
48
|
# Convert document to markdown
|
|
49
49
|
datalab convert document.pdf
|
|
50
50
|
|
|
51
|
-
# OCR with JSON output
|
|
52
|
-
datalab ocr document.pdf --output-format json
|
|
53
|
-
|
|
54
51
|
# Workflow commands
|
|
55
52
|
datalab create-workflow --help
|
|
56
53
|
datalab execute-workflow --help
|
|
@@ -9,6 +9,7 @@ import asyncio
|
|
|
9
9
|
from pathlib import Path
|
|
10
10
|
from typing import Optional, List
|
|
11
11
|
import click
|
|
12
|
+
from tqdm import tqdm
|
|
12
13
|
|
|
13
14
|
from datalab_sdk.client import AsyncDatalabClient, DatalabClient
|
|
14
15
|
from datalab_sdk.mimetypes import SUPPORTED_EXTENSIONS
|
|
@@ -122,6 +123,25 @@ async def process_files_async(
|
|
|
122
123
|
"""Process files asynchronously"""
|
|
123
124
|
semaphore = asyncio.Semaphore(max_concurrent)
|
|
124
125
|
|
|
126
|
+
async def call_api(client, file_path, output_path):
|
|
127
|
+
"""Make API call - client handles retries for rate limits"""
|
|
128
|
+
if method == "convert":
|
|
129
|
+
return await client.convert(
|
|
130
|
+
file_path,
|
|
131
|
+
options=options,
|
|
132
|
+
save_output=output_path,
|
|
133
|
+
max_polls=max_polls,
|
|
134
|
+
poll_interval=poll_interval,
|
|
135
|
+
)
|
|
136
|
+
else: # method == 'ocr'
|
|
137
|
+
return await client.ocr(
|
|
138
|
+
file_path,
|
|
139
|
+
options=options,
|
|
140
|
+
save_output=output_path,
|
|
141
|
+
max_polls=max_polls,
|
|
142
|
+
poll_interval=poll_interval,
|
|
143
|
+
)
|
|
144
|
+
|
|
125
145
|
async def process_single_file(file_path: Path) -> dict:
|
|
126
146
|
async with semaphore:
|
|
127
147
|
try:
|
|
@@ -134,22 +154,7 @@ async def process_files_async(
|
|
|
134
154
|
async with AsyncDatalabClient(
|
|
135
155
|
api_key=api_key, base_url=base_url
|
|
136
156
|
) as client:
|
|
137
|
-
|
|
138
|
-
result = await client.convert(
|
|
139
|
-
file_path,
|
|
140
|
-
options=options,
|
|
141
|
-
save_output=output_path,
|
|
142
|
-
max_polls=max_polls,
|
|
143
|
-
poll_interval=poll_interval,
|
|
144
|
-
)
|
|
145
|
-
else: # method == 'ocr'
|
|
146
|
-
result = await client.ocr(
|
|
147
|
-
file_path,
|
|
148
|
-
options=options,
|
|
149
|
-
save_output=output_path,
|
|
150
|
-
max_polls=max_polls,
|
|
151
|
-
poll_interval=poll_interval,
|
|
152
|
-
)
|
|
157
|
+
result = await call_api(client, file_path, output_path)
|
|
153
158
|
|
|
154
159
|
return {
|
|
155
160
|
"file_path": str(file_path),
|
|
@@ -167,9 +172,19 @@ async def process_files_async(
|
|
|
167
172
|
"page_count": None,
|
|
168
173
|
}
|
|
169
174
|
|
|
170
|
-
# Process all files concurrently
|
|
171
|
-
tasks = [process_single_file(file_path) for file_path in files]
|
|
172
|
-
results =
|
|
175
|
+
# Process all files concurrently with progress bar
|
|
176
|
+
tasks = [asyncio.create_task(process_single_file(file_path)) for file_path in files]
|
|
177
|
+
results = []
|
|
178
|
+
|
|
179
|
+
with tqdm(total=len(tasks), desc="Processing", unit="file") as pbar:
|
|
180
|
+
for coro in asyncio.as_completed(tasks):
|
|
181
|
+
result = await coro
|
|
182
|
+
results.append(result)
|
|
183
|
+
# Update progress bar description with current file
|
|
184
|
+
filename = Path(result["file_path"]).name
|
|
185
|
+
status = "✓" if result["success"] else "✗"
|
|
186
|
+
pbar.set_postfix_str(f"{status} {filename[:30]}")
|
|
187
|
+
pbar.update(1)
|
|
173
188
|
|
|
174
189
|
return results
|
|
175
190
|
|
|
@@ -112,7 +112,9 @@ class AsyncDatalabClient:
|
|
|
112
112
|
try:
|
|
113
113
|
error_data = await response.json()
|
|
114
114
|
# FastAPI returns errors in "detail" field, but some APIs use "error"
|
|
115
|
-
error_message =
|
|
115
|
+
error_message = (
|
|
116
|
+
error_data.get("detail") or error_data.get("error") or str(e)
|
|
117
|
+
)
|
|
116
118
|
except Exception:
|
|
117
119
|
error_message = str(e)
|
|
118
120
|
raise DatalabAPIError(
|
|
@@ -123,6 +125,19 @@ class AsyncDatalabClient:
|
|
|
123
125
|
except aiohttp.ClientError as e:
|
|
124
126
|
raise DatalabAPIError(f"Request failed: {str(e)}")
|
|
125
127
|
|
|
128
|
+
@retry(
|
|
129
|
+
retry=retry_if_exception(
|
|
130
|
+
lambda e: isinstance(e, DatalabAPIError)
|
|
131
|
+
and getattr(e, "status_code", None) == 429
|
|
132
|
+
),
|
|
133
|
+
stop=stop_after_attempt(10),
|
|
134
|
+
wait=wait_exponential_jitter(initial=5, max=120),
|
|
135
|
+
reraise=True,
|
|
136
|
+
)
|
|
137
|
+
async def _submit_with_retry(self, endpoint: str, data) -> Dict[str, Any]:
|
|
138
|
+
"""POST submission with retry for rate limits (429)"""
|
|
139
|
+
return await self._make_request("POST", endpoint, data=data)
|
|
140
|
+
|
|
126
141
|
async def _poll_result(
|
|
127
142
|
self, check_url: str, max_polls: int = 300, poll_interval: int = 1
|
|
128
143
|
) -> Dict[str, Any]:
|
|
@@ -168,8 +183,8 @@ class AsyncDatalabClient:
|
|
|
168
183
|
)
|
|
169
184
|
)
|
|
170
185
|
),
|
|
171
|
-
stop=stop_after_attempt(
|
|
172
|
-
wait=wait_exponential_jitter(max=
|
|
186
|
+
stop=stop_after_attempt(10),
|
|
187
|
+
wait=wait_exponential_jitter(initial=5, max=120),
|
|
173
188
|
reraise=True,
|
|
174
189
|
)
|
|
175
190
|
async def _poll_get_with_retry(self, url: str) -> Dict[str, Any]:
|
|
@@ -185,7 +200,7 @@ class AsyncDatalabClient:
|
|
|
185
200
|
|
|
186
201
|
# Read file content
|
|
187
202
|
file_data = file_path.read_bytes()
|
|
188
|
-
|
|
203
|
+
|
|
189
204
|
# Check if file is empty
|
|
190
205
|
if not file_data:
|
|
191
206
|
raise DatalabFileError(
|
|
@@ -252,8 +267,7 @@ class AsyncDatalabClient:
|
|
|
252
267
|
if options is None:
|
|
253
268
|
options = ConvertOptions()
|
|
254
269
|
|
|
255
|
-
initial_data = await self.
|
|
256
|
-
"POST",
|
|
270
|
+
initial_data = await self._submit_with_retry(
|
|
257
271
|
"/api/v1/marker",
|
|
258
272
|
data=self.get_form_params(
|
|
259
273
|
file_path=file_path, file_url=file_url, options=options
|
|
@@ -283,6 +297,7 @@ class AsyncDatalabClient:
|
|
|
283
297
|
images=result_data.get("images"),
|
|
284
298
|
metadata=result_data.get("metadata"),
|
|
285
299
|
error=result_data.get("error"),
|
|
300
|
+
error_in=result_data.get("error_in"),
|
|
286
301
|
page_count=result_data.get("page_count"),
|
|
287
302
|
status=result_data.get("status", "complete"),
|
|
288
303
|
checkpoint_id=result_data.get("checkpoint_id"),
|
|
@@ -312,8 +327,7 @@ class AsyncDatalabClient:
|
|
|
312
327
|
if options is None:
|
|
313
328
|
options = OCROptions()
|
|
314
329
|
|
|
315
|
-
initial_data = await self.
|
|
316
|
-
"POST",
|
|
330
|
+
initial_data = await self._submit_with_retry(
|
|
317
331
|
"/api/v1/ocr",
|
|
318
332
|
data=self.get_form_params(file_path=file_path, options=options),
|
|
319
333
|
)
|
|
@@ -370,8 +384,7 @@ class AsyncDatalabClient:
|
|
|
370
384
|
if options is None:
|
|
371
385
|
raise ValueError("options must be provided with field_data")
|
|
372
386
|
|
|
373
|
-
initial_data = await self.
|
|
374
|
-
"POST",
|
|
387
|
+
initial_data = await self._submit_with_retry(
|
|
375
388
|
"/api/v1/fill",
|
|
376
389
|
data=self.get_form_params(
|
|
377
390
|
file_path=file_path, file_url=file_url, options=options
|
|
@@ -393,6 +406,7 @@ class AsyncDatalabClient:
|
|
|
393
406
|
status=result_data.get("status", "complete"),
|
|
394
407
|
success=result_data.get("success"),
|
|
395
408
|
error=result_data.get("error"),
|
|
409
|
+
error_in=result_data.get("error_in"),
|
|
396
410
|
output_format=result_data.get("output_format"),
|
|
397
411
|
output_base64=result_data.get("output_base64"),
|
|
398
412
|
fields_filled=result_data.get("fields_filled"),
|
|
@@ -570,7 +584,9 @@ class AsyncDatalabClient:
|
|
|
570
584
|
|
|
571
585
|
return {
|
|
572
586
|
"success": response.get("success", True),
|
|
573
|
-
"message": response.get(
|
|
587
|
+
"message": response.get(
|
|
588
|
+
"message", f"Workflow {workflow_id} deleted successfully"
|
|
589
|
+
),
|
|
574
590
|
}
|
|
575
591
|
|
|
576
592
|
async def execute_workflow(
|
|
@@ -1236,9 +1252,7 @@ class DatalabClient:
|
|
|
1236
1252
|
Returns:
|
|
1237
1253
|
UploadedFileMetadata object with file information
|
|
1238
1254
|
"""
|
|
1239
|
-
return self._run_async(
|
|
1240
|
-
self._async_client.get_file_metadata(file_id=file_id)
|
|
1241
|
-
)
|
|
1255
|
+
return self._run_async(self._async_client.get_file_metadata(file_id=file_id))
|
|
1242
1256
|
|
|
1243
1257
|
def get_file_download_url(
|
|
1244
1258
|
self,
|
|
@@ -1260,7 +1274,9 @@ class DatalabClient:
|
|
|
1260
1274
|
- original_filename: Original filename
|
|
1261
1275
|
"""
|
|
1262
1276
|
return self._run_async(
|
|
1263
|
-
self._async_client.get_file_download_url(
|
|
1277
|
+
self._async_client.get_file_download_url(
|
|
1278
|
+
file_id=file_id, expires_in=expires_in
|
|
1279
|
+
)
|
|
1264
1280
|
)
|
|
1265
1281
|
|
|
1266
1282
|
def delete_file(
|
|
@@ -1280,9 +1296,7 @@ class DatalabClient:
|
|
|
1280
1296
|
- success: Whether the deletion was successful
|
|
1281
1297
|
- message: Confirmation message
|
|
1282
1298
|
"""
|
|
1283
|
-
return self._run_async(
|
|
1284
|
-
self._async_client.delete_file(file_id=file_id)
|
|
1285
|
-
)
|
|
1299
|
+
return self._run_async(self._async_client.delete_file(file_id=file_id))
|
|
1286
1300
|
|
|
1287
1301
|
def delete_workflow(self, workflow_id: int) -> Dict[str, Any]:
|
|
1288
1302
|
"""
|
|
@@ -3,7 +3,7 @@ Datalab SDK data models
|
|
|
3
3
|
"""
|
|
4
4
|
|
|
5
5
|
from dataclasses import dataclass, field
|
|
6
|
-
from typing import Dict, List, Optional, Any, Union
|
|
6
|
+
from typing import Dict, List, Optional, Any, Union, Literal
|
|
7
7
|
from pathlib import Path
|
|
8
8
|
import json
|
|
9
9
|
import base64
|
|
@@ -41,19 +41,20 @@ class ConvertOptions(ProcessingOptions):
|
|
|
41
41
|
paginate: bool = False
|
|
42
42
|
disable_image_extraction: bool = False
|
|
43
43
|
disable_image_captions: bool = False
|
|
44
|
+
fence_synthetic_captions: bool = False
|
|
44
45
|
additional_config: Optional[Dict[str, Any]] = None
|
|
45
46
|
page_schema: Optional[Dict[str, Any]] = None
|
|
46
47
|
segmentation_schema: Optional[str] = None # JSON string for document segmentation
|
|
47
48
|
save_checkpoint: bool = False
|
|
48
|
-
extras: Optional[str] = (
|
|
49
|
-
None # Comma-separated list: 'track_changes', 'chart_understanding'
|
|
50
|
-
)
|
|
51
49
|
output_format: str = "markdown" # markdown, json, html, chunks
|
|
52
50
|
mode: str = "balanced" # fast, balanced, accurate
|
|
53
51
|
keep_spreadsheet_formatting: bool = False
|
|
54
52
|
webhook_url: Optional[str] = None
|
|
55
|
-
|
|
53
|
+
# Comma-separated list of extra features: 'track_changes', 'chart_understanding',
|
|
54
|
+
# 'table_row_bboxes', 'extract_links', 'infographic', 'new_block_types'
|
|
55
|
+
extras: Optional[str] = None
|
|
56
56
|
add_block_ids: bool = False # add block IDs to HTML output
|
|
57
|
+
include_markdown_in_chunks: bool = False # include markdown field in chunks/JSON output
|
|
57
58
|
|
|
58
59
|
def to_form_data(self) -> Dict[str, Any]:
|
|
59
60
|
"""Convert to form data format for API requests"""
|
|
@@ -121,6 +122,7 @@ class ConversionResult:
|
|
|
121
122
|
images: Optional[Dict[str, str]] = None
|
|
122
123
|
metadata: Optional[Dict[str, Any]] = None
|
|
123
124
|
error: Optional[str] = None
|
|
125
|
+
error_in: Optional[Literal["VALIDATION", "INFERENCE", "OTHER"]] = None
|
|
124
126
|
page_count: Optional[int] = None
|
|
125
127
|
status: str = "complete"
|
|
126
128
|
checkpoint_id: Optional[str] = None
|
|
@@ -375,6 +377,7 @@ class FormFillingResult:
|
|
|
375
377
|
status: str
|
|
376
378
|
success: Optional[bool] = None
|
|
377
379
|
error: Optional[str] = None
|
|
380
|
+
error_in: Optional[Literal["VALIDATION", "INFERENCE", "OTHER"]] = None
|
|
378
381
|
output_format: Optional[str] = None # "pdf" or "png"
|
|
379
382
|
output_base64: Optional[str] = None # Base64-encoded filled form
|
|
380
383
|
fields_filled: Optional[List[str]] = (
|
|
@@ -7,7 +7,7 @@ readme = "README.md"
|
|
|
7
7
|
license = "MIT"
|
|
8
8
|
repository = "https://github.com/datalab-to/sdk"
|
|
9
9
|
keywords = ["datalab", "sdk", "document-intelligence", "api"]
|
|
10
|
-
version = "0.2.
|
|
10
|
+
version = "0.2.2"
|
|
11
11
|
description = "SDK for the Datalab document intelligence API"
|
|
12
12
|
requires-python = ">=3.10"
|
|
13
13
|
dependencies = [
|
|
@@ -16,6 +16,7 @@ dependencies = [
|
|
|
16
16
|
"pydantic>=2.11.7",
|
|
17
17
|
"pydantic-settings>=2.10.1",
|
|
18
18
|
"tenacity>=8.2.3",
|
|
19
|
+
"tqdm>=4.66.0",
|
|
19
20
|
]
|
|
20
21
|
|
|
21
22
|
[project.scripts]
|
|
@@ -504,13 +504,13 @@ class TestClientErrorHandling:
|
|
|
504
504
|
with patch.object(
|
|
505
505
|
client, "_make_request", new_callable=AsyncMock
|
|
506
506
|
) as mock_request:
|
|
507
|
-
# Setup mock to raise API error
|
|
507
|
+
# Setup mock to raise API error (use 400 since 429 is retried)
|
|
508
508
|
mock_request.side_effect = DatalabAPIError(
|
|
509
|
-
"
|
|
509
|
+
"Bad request", status_code=400
|
|
510
510
|
)
|
|
511
511
|
|
|
512
512
|
# Test that error is propagated
|
|
513
|
-
with pytest.raises(DatalabAPIError, match="
|
|
513
|
+
with pytest.raises(DatalabAPIError, match="Bad request"):
|
|
514
514
|
await client.ocr(pdf_file)
|
|
515
515
|
|
|
516
516
|
def test_convert_unsuccessful_response(self, temp_dir):
|
|
@@ -212,7 +212,7 @@ wheels = [
|
|
|
212
212
|
|
|
213
213
|
[[package]]
|
|
214
214
|
name = "datalab-python-sdk"
|
|
215
|
-
version = "0.2.
|
|
215
|
+
version = "0.2.2"
|
|
216
216
|
source = { editable = "." }
|
|
217
217
|
dependencies = [
|
|
218
218
|
{ name = "aiohttp" },
|
|
@@ -220,6 +220,7 @@ dependencies = [
|
|
|
220
220
|
{ name = "pydantic" },
|
|
221
221
|
{ name = "pydantic-settings" },
|
|
222
222
|
{ name = "tenacity" },
|
|
223
|
+
{ name = "tqdm" },
|
|
223
224
|
]
|
|
224
225
|
|
|
225
226
|
[package.dev-dependencies]
|
|
@@ -240,6 +241,7 @@ requires-dist = [
|
|
|
240
241
|
{ name = "pydantic", specifier = ">=2.11.7" },
|
|
241
242
|
{ name = "pydantic-settings", specifier = ">=2.10.1" },
|
|
242
243
|
{ name = "tenacity", specifier = ">=8.2.3" },
|
|
244
|
+
{ name = "tqdm", specifier = ">=4.66.0" },
|
|
243
245
|
]
|
|
244
246
|
|
|
245
247
|
[package.metadata.requires-dev]
|
|
@@ -1102,6 +1104,18 @@ wheels = [
|
|
|
1102
1104
|
{ url = "https://files.pythonhosted.org/packages/77/b8/0135fadc89e73be292b473cb820b4f5a08197779206b33191e801feeae40/tomli-2.3.0-py3-none-any.whl", hash = "sha256:e95b1af3c5b07d9e643909b5abbec77cd9f1217e6d0bca72b0234736b9fb1f1b", size = 14408, upload-time = "2025-10-08T22:01:46.04Z" },
|
|
1103
1105
|
]
|
|
1104
1106
|
|
|
1107
|
+
[[package]]
|
|
1108
|
+
name = "tqdm"
|
|
1109
|
+
version = "4.67.3"
|
|
1110
|
+
source = { registry = "https://pypi.org/simple" }
|
|
1111
|
+
dependencies = [
|
|
1112
|
+
{ name = "colorama", marker = "sys_platform == 'win32'" },
|
|
1113
|
+
]
|
|
1114
|
+
sdist = { url = "https://files.pythonhosted.org/packages/09/a9/6ba95a270c6f1fbcd8dac228323f2777d886cb206987444e4bce66338dd4/tqdm-4.67.3.tar.gz", hash = "sha256:7d825f03f89244ef73f1d4ce193cb1774a8179fd96f31d7e1dcde62092b960bb", size = 169598, upload-time = "2026-02-03T17:35:53.048Z" }
|
|
1115
|
+
wheels = [
|
|
1116
|
+
{ url = "https://files.pythonhosted.org/packages/16/e1/3079a9ff9b8e11b846c6ac5c8b5bfb7ff225eee721825310c91b3b50304f/tqdm-4.67.3-py3-none-any.whl", hash = "sha256:ee1e4c0e59148062281c49d80b25b67771a127c85fc9676d3be5f243206826bf", size = 78374, upload-time = "2026-02-03T17:35:50.982Z" },
|
|
1117
|
+
]
|
|
1118
|
+
|
|
1105
1119
|
[[package]]
|
|
1106
1120
|
name = "typing-extensions"
|
|
1107
1121
|
version = "4.15.0"
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{datalab_python_sdk-0.2.0 → datalab_python_sdk-0.2.2}/recipes/workflows/end_to_end_workflow.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|