retab 0.0.68__py3-none-any.whl → 0.0.70__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
retab/client.py CHANGED
@@ -10,7 +10,7 @@ import backoff.types
10
10
  import httpx
11
11
  import truststore
12
12
 
13
- from .resources import documents, models, schemas, projects
13
+ from .resources import documents, models, schemas, projects, extractions
14
14
  from .types.standards import PreparedRequest, FieldUnset
15
15
 
16
16
 
@@ -184,6 +184,7 @@ class Retab(BaseRetab):
184
184
 
185
185
  self.client = httpx.Client(timeout=self.timeout)
186
186
  self.projects = projects.Projects(client=self)
187
+ self.extractions = extractions.Extractions(client=self)
187
188
  self.documents = documents.Documents(client=self)
188
189
  self.models = models.Models(client=self)
189
190
  self.schemas = schemas.Schemas(client=self)
@@ -480,6 +481,7 @@ class AsyncRetab(BaseRetab):
480
481
  self.client = httpx.AsyncClient(timeout=self.timeout)
481
482
 
482
483
  self.projects = projects.AsyncProjects(client=self)
484
+ self.extractions = extractions.AsyncExtractions(client=self)
483
485
  self.documents = documents.AsyncDocuments(client=self)
484
486
  self.models = models.AsyncModels(client=self)
485
487
  self.schemas = schemas.AsyncSchemas(client=self)
@@ -14,7 +14,6 @@ from ...utils.stream_context_managers import as_async_context_manager, as_contex
14
14
  from ...types.documents.create_messages import DocumentCreateInputRequest, DocumentCreateMessageRequest, DocumentMessage
15
15
  from ...types.documents.extract import DocumentExtractRequest, RetabParsedChatCompletion, RetabParsedChatCompletionChunk, RetabParsedChoice, maybe_parse_to_pydantic
16
16
  from ...types.documents.parse import ParseRequest, ParseResult, TableParsingFormat
17
- from ...types.browser_canvas import BrowserCanvas
18
17
  from ...types.mime import MIMEData
19
18
  from ...types.standards import PreparedRequest, FieldUnset
20
19
  from ...utils.json_schema import load_json_schema, unflatten_dict
@@ -26,8 +25,6 @@ class BaseDocumentsMixin:
26
25
  self,
27
26
  document: Path | str | IOBase | MIMEData | PIL.Image.Image | HttpUrl,
28
27
  image_resolution_dpi: int = FieldUnset,
29
- browser_canvas: BrowserCanvas = FieldUnset,
30
- idempotency_key: str | None = None,
31
28
  **extra_body: Any,
32
29
  ) -> PreparedRequest:
33
30
  mime_document = prepare_mime_document(document)
@@ -35,8 +32,6 @@ class BaseDocumentsMixin:
35
32
  loading_request_dict: dict[str, Any] = {"document": mime_document}
36
33
  if image_resolution_dpi is not FieldUnset:
37
34
  loading_request_dict["image_resolution_dpi"] = image_resolution_dpi
38
- if browser_canvas is not FieldUnset:
39
- loading_request_dict["browser_canvas"] = browser_canvas
40
35
 
41
36
  # Merge any extra fields provided by the caller
42
37
  if extra_body:
@@ -44,7 +39,7 @@ class BaseDocumentsMixin:
44
39
 
45
40
  loading_request = DocumentCreateMessageRequest(**loading_request_dict)
46
41
  return PreparedRequest(
47
- method="POST", url="/v1/documents/create_messages", data=loading_request.model_dump(mode="json", exclude_unset=True), idempotency_key=idempotency_key
42
+ method="POST", url="/v1/documents/create_messages", data=loading_request.model_dump(mode="json", exclude_unset=True)
48
43
  )
49
44
 
50
45
  def _prepare_get_extraction(self, extraction_id: str) -> PreparedRequest:
@@ -61,8 +56,6 @@ class BaseDocumentsMixin:
61
56
  document: Path | str | IOBase | MIMEData | PIL.Image.Image | HttpUrl,
62
57
  json_schema: dict[str, Any] | Path | str,
63
58
  image_resolution_dpi: int = FieldUnset,
64
- browser_canvas: BrowserCanvas = FieldUnset,
65
- idempotency_key: str | None = None,
66
59
  **extra_body: Any,
67
60
  ) -> PreparedRequest:
68
61
  mime_document = prepare_mime_document(document)
@@ -74,15 +67,13 @@ class BaseDocumentsMixin:
74
67
  }
75
68
  if image_resolution_dpi is not FieldUnset:
76
69
  loading_request_dict["image_resolution_dpi"] = image_resolution_dpi
77
- if browser_canvas is not FieldUnset:
78
- loading_request_dict["browser_canvas"] = browser_canvas
79
70
 
80
71
  # Merge any extra fields provided by the caller
81
72
  if extra_body:
82
73
  loading_request_dict.update(extra_body)
83
74
 
84
75
  loading_request = DocumentCreateInputRequest(**loading_request_dict)
85
- return PreparedRequest(method="POST", url="/v1/documents/create_inputs", data=loading_request.model_dump(mode="json", exclude_unset=True), idempotency_key=idempotency_key)
76
+ return PreparedRequest(method="POST", url="/v1/documents/create_inputs", data=loading_request.model_dump(mode="json", exclude_unset=True))
86
77
 
87
78
  def _prepare_correct_image_orientation(self, document: Path | str | IOBase | MIMEData | PIL.Image.Image) -> PreparedRequest:
88
79
  mime_document = prepare_mime_document(document)
@@ -102,8 +93,6 @@ class BaseDocumentsMixin:
102
93
  model: str,
103
94
  table_parsing_format: TableParsingFormat = FieldUnset,
104
95
  image_resolution_dpi: int = FieldUnset,
105
- browser_canvas: BrowserCanvas = FieldUnset,
106
- idempotency_key: str | None = None,
107
96
  **extra_body: Any,
108
97
  ) -> PreparedRequest:
109
98
  mime_document = prepare_mime_document(document)
@@ -116,50 +105,40 @@ class BaseDocumentsMixin:
116
105
  request_dict["table_parsing_format"] = table_parsing_format
117
106
  if image_resolution_dpi is not FieldUnset:
118
107
  request_dict["image_resolution_dpi"] = image_resolution_dpi
119
- if browser_canvas is not FieldUnset:
120
- request_dict["browser_canvas"] = browser_canvas
121
108
 
122
109
  # Merge any extra fields provided by the caller
123
110
  if extra_body:
124
111
  request_dict.update(extra_body)
125
112
 
126
113
  parse_request = ParseRequest(**request_dict)
127
- return PreparedRequest(method="POST", url="/v1/documents/parse", data=parse_request.model_dump(mode="json", exclude_unset=True), idempotency_key=idempotency_key)
114
+ return PreparedRequest(method="POST", url="/v1/documents/parse", data=parse_request.model_dump(mode="json", exclude_unset=True))
128
115
 
129
116
  def _prepare_extract(
130
117
  self,
131
118
  json_schema: dict[str, Any] | Path | str,
132
119
  model: str,
133
- document: Path | str | IOBase | HttpUrl | None = None,
134
- documents: list[Path | str | IOBase | HttpUrl] | None = None,
120
+ document: Path | str | IOBase | HttpUrl,
135
121
  image_resolution_dpi: int = FieldUnset,
136
- browser_canvas: BrowserCanvas = FieldUnset,
137
122
  temperature: float = FieldUnset,
138
123
  reasoning_effort: ChatCompletionReasoningEffort = FieldUnset,
139
124
  n_consensus: int = FieldUnset,
140
125
  stream: bool = FieldUnset,
141
126
  store: bool = FieldUnset,
142
- idempotency_key: str | None = None,
127
+ metadata: dict[str, str] = FieldUnset,
143
128
  **extra_body: Any,
144
129
  ) -> PreparedRequest:
145
130
  loaded_schema = load_json_schema(json_schema)
146
131
 
147
- # Handle both single document and multiple documents
148
- if document is not None and documents is not None:
149
- raise ValueError("Cannot provide both 'document' and 'documents' parameters. Use either one.")
132
+ # Handle document parameter
133
+ if document is None:
134
+ raise ValueError("Must provide 'document' parameter.")
150
135
 
151
- # Convert single document to documents list for consistency
152
- if document is not None:
153
- processed_documents = [prepare_mime_document(document)]
154
- elif documents is not None:
155
- processed_documents = [prepare_mime_document(doc) for doc in documents]
156
- else:
157
- raise ValueError("Must provide either 'document' or 'documents' parameter.")
136
+ processed_document = prepare_mime_document(document)
158
137
 
159
138
  # Build request dictionary with only provided fields
160
- request_dict = {
139
+ request_dict: dict[str, Any] = {
161
140
  "json_schema": loaded_schema,
162
- "documents": processed_documents,
141
+ "document": processed_document,
163
142
  "model": model,
164
143
  }
165
144
  if stream is not FieldUnset:
@@ -174,8 +153,8 @@ class BaseDocumentsMixin:
174
153
  request_dict["n_consensus"] = n_consensus
175
154
  if image_resolution_dpi is not FieldUnset:
176
155
  request_dict["image_resolution_dpi"] = image_resolution_dpi
177
- if browser_canvas is not FieldUnset:
178
- request_dict["browser_canvas"] = browser_canvas
156
+ if metadata is not FieldUnset:
157
+ request_dict["metadata"] = metadata
179
158
 
180
159
  # Merge any extra fields provided by the caller
181
160
  if extra_body:
@@ -186,7 +165,7 @@ class BaseDocumentsMixin:
186
165
 
187
166
  # Use the same URL as extractions.py for consistency when streaming
188
167
  url = "/v1/documents/extractions" if stream else "/v1/documents/extract"
189
- return PreparedRequest(method="POST", url=url, data=extract_request.model_dump(mode="json", exclude_unset=True, exclude_defaults=True), idempotency_key=idempotency_key)
168
+ return PreparedRequest(method="POST", url=url, data=extract_request.model_dump(mode="json", exclude_unset=True, exclude_defaults=True))
190
169
 
191
170
 
192
171
  class Documents(SyncAPIResource, BaseDocumentsMixin):
@@ -199,8 +178,6 @@ class Documents(SyncAPIResource, BaseDocumentsMixin):
199
178
  self,
200
179
  document: Path | str | IOBase | MIMEData | PIL.Image.Image | HttpUrl,
201
180
  image_resolution_dpi: int = FieldUnset,
202
- browser_canvas: BrowserCanvas = FieldUnset,
203
- idempotency_key: str | None = None,
204
181
  **extra_body: Any,
205
182
  ) -> DocumentMessage:
206
183
  """
@@ -209,8 +186,6 @@ class Documents(SyncAPIResource, BaseDocumentsMixin):
209
186
  Args:
210
187
  document: The document to process. Can be a file path (Path or str) or a file-like object.
211
188
  image_resolution_dpi: Optional image resolution DPI.
212
- browser_canvas: Optional browser canvas size.
213
- idempotency_key: Optional idempotency key for the request
214
189
  Returns:
215
190
  DocumentMessage: The processed document message containing extracted content.
216
191
 
@@ -220,8 +195,6 @@ class Documents(SyncAPIResource, BaseDocumentsMixin):
220
195
  request = self._prepare_create_messages(
221
196
  document=document,
222
197
  image_resolution_dpi=image_resolution_dpi,
223
- browser_canvas=browser_canvas,
224
- idempotency_key=idempotency_key,
225
198
  **extra_body,
226
199
  )
227
200
  response = self._client._prepared_request(request)
@@ -232,8 +205,6 @@ class Documents(SyncAPIResource, BaseDocumentsMixin):
232
205
  document: Path | str | IOBase | MIMEData | PIL.Image.Image | HttpUrl,
233
206
  json_schema: dict[str, Any] | Path | str,
234
207
  image_resolution_dpi: int = FieldUnset,
235
- browser_canvas: BrowserCanvas = FieldUnset,
236
- idempotency_key: str | None = None,
237
208
  **extra_body: Any,
238
209
  ) -> DocumentMessage:
239
210
  """
@@ -243,8 +214,6 @@ class Documents(SyncAPIResource, BaseDocumentsMixin):
243
214
  document: The document to process. Can be a file path (Path or str), file-like object, MIMEData, PIL Image, or URL.
244
215
  json_schema: The JSON schema to use for structuring the document content.
245
216
  image_resolution_dpi: Optional image resolution DPI.
246
- browser_canvas: Optional browser canvas size.
247
- idempotency_key: Optional idempotency key for the request
248
217
  Returns:
249
218
  DocumentMessage: The processed document message containing extracted content with schema context.
250
219
 
@@ -255,8 +224,6 @@ class Documents(SyncAPIResource, BaseDocumentsMixin):
255
224
  document=document,
256
225
  json_schema=json_schema,
257
226
  image_resolution_dpi=image_resolution_dpi,
258
- browser_canvas=browser_canvas,
259
- idempotency_key=idempotency_key,
260
227
  **extra_body,
261
228
  )
262
229
  response = self._client._prepared_request(request)
@@ -266,55 +233,48 @@ class Documents(SyncAPIResource, BaseDocumentsMixin):
266
233
  self,
267
234
  json_schema: dict[str, Any] | Path | str,
268
235
  model: str,
269
- document: Path | str | IOBase | HttpUrl | None = None,
270
- documents: list[Path | str | IOBase | HttpUrl] | None = None,
236
+ document: Path | str | IOBase | HttpUrl,
271
237
  image_resolution_dpi: int = FieldUnset,
272
- browser_canvas: BrowserCanvas = FieldUnset,
273
238
  temperature: float = FieldUnset,
274
239
  reasoning_effort: ChatCompletionReasoningEffort = FieldUnset,
275
240
  n_consensus: int = FieldUnset,
276
- idempotency_key: str | None = None,
277
241
  store: bool = FieldUnset,
242
+ metadata: dict[str, str] = FieldUnset,
278
243
  **extra_body: Any,
279
244
  ) -> RetabParsedChatCompletion:
280
245
  """
281
- Process one or more documents using the Retab API for structured data extraction.
246
+ Process a document using the Retab API for structured data extraction.
282
247
 
283
- This method provides a direct interface to document extraction functionality,
284
- intended to replace the current `.extractions.parse()` pattern.
248
+ This method provides a direct interface to document extraction functionality.
285
249
 
286
250
  Args:
287
251
  json_schema: JSON schema defining the expected data structure
288
252
  model: The AI model to use for processing
289
- document: Single document to process (use either this or documents, not both)
290
- documents: List of documents to process (use either this or document, not both)
253
+ document: Document to process (file path, URL, or file-like object)
291
254
  image_resolution_dpi: Optional image resolution DPI
292
- browser_canvas: Optional browser canvas size
293
255
  temperature: Model temperature setting (0-1)
294
256
  reasoning_effort: The effort level for the model to reason about the input data
295
257
  n_consensus: Number of consensus extractions to perform
296
- idempotency_key: Idempotency key for request
297
258
  store: Whether to store the document in the Retab database
259
+ metadata: User-defined metadata to associate with this extraction
298
260
 
299
261
  Returns:
300
262
  RetabParsedChatCompletion: Parsed response from the API
301
263
 
302
264
  Raises:
303
- ValueError: If neither document nor documents is provided, or if both are provided
265
+ ValueError: If document is not provided
304
266
  HTTPException: If the request fails
305
267
  """
306
268
  request = self._prepare_extract(
307
269
  json_schema=json_schema,
308
270
  model=model,
309
271
  document=document,
310
- documents=documents,
311
272
  image_resolution_dpi=image_resolution_dpi,
312
- browser_canvas=browser_canvas,
313
273
  temperature=temperature,
314
274
  reasoning_effort=reasoning_effort,
315
275
  n_consensus=n_consensus,
316
276
  store=store,
317
- idempotency_key=idempotency_key,
277
+ metadata=metadata,
318
278
  **extra_body,
319
279
  )
320
280
  response = self._client._prepared_request(request)
@@ -414,64 +374,52 @@ class Documents(SyncAPIResource, BaseDocumentsMixin):
414
374
  self,
415
375
  json_schema: dict[str, Any] | Path | str,
416
376
  model: str,
417
- document: Path | str | IOBase | HttpUrl | None = None,
418
- documents: list[Path | str | IOBase | HttpUrl] | None = None,
377
+ document: Path | str | IOBase | HttpUrl,
419
378
  image_resolution_dpi: int = FieldUnset,
420
- browser_canvas: BrowserCanvas = FieldUnset,
421
379
  temperature: float = FieldUnset,
422
380
  reasoning_effort: ChatCompletionReasoningEffort = FieldUnset,
423
381
  n_consensus: int = FieldUnset,
424
- idempotency_key: str | None = None,
425
382
  store: bool = FieldUnset,
383
+ metadata: dict[str, str] = FieldUnset,
426
384
  **extra_body: Any,
427
385
  ) -> Generator[RetabParsedChatCompletion, None, None]:
428
386
  """
429
- Process one or more documents using the Retab API with streaming enabled.
387
+ Process a document using the Retab API with streaming enabled.
430
388
 
431
389
  Args:
432
390
  json_schema: JSON schema defining the expected data structure
433
391
  model: The AI model to use for processing
434
- document: Single document to process (use either this or documents, not both)
435
- documents: List of documents to process (use either this or document, not both)
392
+ document: Document to process (file path, URL, or file-like object)
436
393
  image_resolution_dpi: Optional image resolution DPI.
437
- browser_canvas: Optional browser canvas size.
438
394
  temperature: Model temperature setting (0-1)
439
395
  reasoning_effort: The effort level for the model to reason about the input data.
440
396
  n_consensus: Number of consensus extractions to perform (default: 1 which computes a single extraction and the likelihoods comes from the model logprobs)
441
- idempotency_key: Idempotency key for request
442
397
  store: Whether to store the document in the Retab database
398
+ metadata: User-defined metadata to associate with this extraction
443
399
 
444
400
  Returns:
445
401
  Generator[RetabParsedChatCompletion]: Stream of parsed responses
446
402
  Raises:
447
- ValueError: If neither document nor documents is provided, or if both are provided
403
+ ValueError: If document is not provided
448
404
  HTTPException: If the request fails
449
405
  Usage:
450
406
  ```python
451
- # Single document
452
407
  with retab.documents.extract_stream(json_schema, model, document=document) as stream:
453
408
  for response in stream:
454
409
  print(response)
455
-
456
- # Multiple documents
457
- with retab.documents.extract_stream(json_schema, model, documents=[doc1, doc2]) as stream:
458
- for response in stream:
459
- print(response)
460
410
  ```
461
411
  """
462
412
  request = self._prepare_extract(
463
413
  json_schema=json_schema,
464
414
  document=document,
465
- documents=documents,
466
415
  image_resolution_dpi=image_resolution_dpi,
467
- browser_canvas=browser_canvas,
468
416
  model=model,
469
417
  temperature=temperature,
470
418
  reasoning_effort=reasoning_effort,
471
419
  stream=True,
472
420
  n_consensus=n_consensus,
473
421
  store=store,
474
- idempotency_key=idempotency_key,
422
+ metadata=metadata,
475
423
  **extra_body,
476
424
  )
477
425
  schema = load_json_schema(json_schema)
@@ -521,8 +469,6 @@ class Documents(SyncAPIResource, BaseDocumentsMixin):
521
469
  model: str,
522
470
  table_parsing_format: TableParsingFormat = FieldUnset,
523
471
  image_resolution_dpi: int = FieldUnset,
524
- browser_canvas: BrowserCanvas = FieldUnset,
525
- idempotency_key: str | None = None,
526
472
  **extra_body: Any,
527
473
  ) -> ParseResult:
528
474
  """
@@ -536,8 +482,6 @@ class Documents(SyncAPIResource, BaseDocumentsMixin):
536
482
  model: The AI model to use for document parsing.
537
483
  table_parsing_format: Format for parsing tables. Options: "html", "json", "yaml", "markdown". Defaults to "html".
538
484
  image_resolution_dpi: DPI for image processing. Defaults to 72.
539
- browser_canvas: Canvas size for document rendering. Defaults to "A4".
540
- idempotency_key: Optional idempotency key for the request.
541
485
 
542
486
  Returns:
543
487
  ParseResult: Parsed response containing document metadata, usage information, and page text content.
@@ -550,8 +494,6 @@ class Documents(SyncAPIResource, BaseDocumentsMixin):
550
494
  model=model,
551
495
  table_parsing_format=table_parsing_format,
552
496
  image_resolution_dpi=image_resolution_dpi,
553
- browser_canvas=browser_canvas,
554
- idempotency_key=idempotency_key,
555
497
  **extra_body,
556
498
  )
557
499
  response = self._client._prepared_request(request)
@@ -569,8 +511,6 @@ class AsyncDocuments(AsyncAPIResource, BaseDocumentsMixin):
569
511
  self,
570
512
  document: Path | str | IOBase | MIMEData | PIL.Image.Image,
571
513
  image_resolution_dpi: int = FieldUnset,
572
- browser_canvas: BrowserCanvas = FieldUnset,
573
- idempotency_key: str | None = None,
574
514
  **extra_body: Any,
575
515
  ) -> DocumentMessage:
576
516
  """
@@ -578,7 +518,6 @@ class AsyncDocuments(AsyncAPIResource, BaseDocumentsMixin):
578
518
 
579
519
  Args:
580
520
  document: The document to process. Can be a file path (Path or str) or a file-like object.
581
- idempotency_key: Idempotency key for request
582
521
  Returns:
583
522
  DocumentMessage: The processed document message containing extracted content.
584
523
 
@@ -588,8 +527,6 @@ class AsyncDocuments(AsyncAPIResource, BaseDocumentsMixin):
588
527
  request = self._prepare_create_messages(
589
528
  document=document,
590
529
  image_resolution_dpi=image_resolution_dpi,
591
- browser_canvas=browser_canvas,
592
- idempotency_key=idempotency_key,
593
530
  **extra_body,
594
531
  )
595
532
  response = await self._client._prepared_request(request)
@@ -600,8 +537,6 @@ class AsyncDocuments(AsyncAPIResource, BaseDocumentsMixin):
600
537
  document: Path | str | IOBase | MIMEData | PIL.Image.Image | HttpUrl,
601
538
  json_schema: dict[str, Any] | Path | str,
602
539
  image_resolution_dpi: int = FieldUnset,
603
- browser_canvas: BrowserCanvas = FieldUnset,
604
- idempotency_key: str | None = None,
605
540
  **extra_body: Any,
606
541
  ) -> DocumentMessage:
607
542
  """
@@ -611,8 +546,6 @@ class AsyncDocuments(AsyncAPIResource, BaseDocumentsMixin):
611
546
  document: The document to process. Can be a file path (Path or str), file-like object, MIMEData, PIL Image, or URL.
612
547
  json_schema: The JSON schema to use for structuring the document content.
613
548
  image_resolution_dpi: Optional image resolution DPI.
614
- browser_canvas: Optional browser canvas size.
615
- idempotency_key: Idempotency key for request
616
549
  Returns:
617
550
  DocumentMessage: The processed document message containing extracted content with schema context.
618
551
 
@@ -623,8 +556,6 @@ class AsyncDocuments(AsyncAPIResource, BaseDocumentsMixin):
623
556
  document=document,
624
557
  json_schema=json_schema,
625
558
  image_resolution_dpi=image_resolution_dpi,
626
- browser_canvas=browser_canvas,
627
- idempotency_key=idempotency_key,
628
559
  **extra_body,
629
560
  )
630
561
  response = await self._client._prepared_request(request)
@@ -634,55 +565,48 @@ class AsyncDocuments(AsyncAPIResource, BaseDocumentsMixin):
634
565
  self,
635
566
  json_schema: dict[str, Any] | Path | str,
636
567
  model: str,
637
- document: Path | str | IOBase | HttpUrl | None = None,
638
- documents: list[Path | str | IOBase | HttpUrl] | None = None,
568
+ document: Path | str | IOBase | HttpUrl,
639
569
  image_resolution_dpi: int = FieldUnset,
640
- browser_canvas: BrowserCanvas = FieldUnset,
641
570
  temperature: float = FieldUnset,
642
571
  reasoning_effort: ChatCompletionReasoningEffort = FieldUnset,
643
572
  n_consensus: int = FieldUnset,
644
- idempotency_key: str | None = None,
645
573
  store: bool = FieldUnset,
574
+ metadata: dict[str, str] = FieldUnset,
646
575
  **extra_body: Any,
647
576
  ) -> RetabParsedChatCompletion:
648
577
  """
649
- Process one or more documents using the Retab API for structured data extraction asynchronously.
578
+ Process a document using the Retab API for structured data extraction asynchronously.
650
579
 
651
- This method provides a direct interface to document extraction functionality,
652
- intended to replace the current `.extractions.parse()` pattern.
580
+ This method provides a direct interface to document extraction functionality.
653
581
 
654
582
  Args:
655
583
  json_schema: JSON schema defining the expected data structure
656
584
  model: The AI model to use for processing
657
- document: Single document to process (use either this or documents, not both)
658
- documents: List of documents to process (use either this or document, not both)
585
+ document: Document to process (file path, URL, or file-like object)
659
586
  image_resolution_dpi: Optional image resolution DPI
660
- browser_canvas: Optional browser canvas size
661
587
  temperature: Model temperature setting (0-1)
662
588
  reasoning_effort: The effort level for the model to reason about the input data
663
589
  n_consensus: Number of consensus extractions to perform
664
- idempotency_key: Idempotency key for request
665
590
  store: Whether to store the document in the Retab database
591
+ metadata: User-defined metadata to associate with this extraction
666
592
 
667
593
  Returns:
668
594
  RetabParsedChatCompletion: Parsed response from the API
669
595
 
670
596
  Raises:
671
- ValueError: If neither document nor documents is provided, or if both are provided
597
+ ValueError: If document is not provided
672
598
  HTTPException: If the request fails
673
599
  """
674
600
  request = self._prepare_extract(
675
601
  json_schema=json_schema,
676
602
  model=model,
677
603
  document=document,
678
- documents=documents,
679
604
  image_resolution_dpi=image_resolution_dpi,
680
- browser_canvas=browser_canvas,
681
605
  temperature=temperature,
682
606
  reasoning_effort=reasoning_effort,
683
607
  n_consensus=n_consensus,
684
608
  store=store,
685
- idempotency_key=idempotency_key,
609
+ metadata=metadata,
686
610
  **extra_body,
687
611
  )
688
612
  response = await self._client._prepared_request(request)
@@ -694,63 +618,51 @@ class AsyncDocuments(AsyncAPIResource, BaseDocumentsMixin):
694
618
  self,
695
619
  json_schema: dict[str, Any] | Path | str,
696
620
  model: str,
697
- document: Path | str | IOBase | HttpUrl | None = None,
698
- documents: list[Path | str | IOBase | HttpUrl] | None = None,
621
+ document: Path | str | IOBase | HttpUrl,
699
622
  image_resolution_dpi: int = FieldUnset,
700
- browser_canvas: BrowserCanvas = FieldUnset,
701
623
  temperature: float = FieldUnset,
702
624
  reasoning_effort: ChatCompletionReasoningEffort = FieldUnset,
703
625
  n_consensus: int = FieldUnset,
704
- idempotency_key: str | None = None,
705
626
  store: bool = FieldUnset,
627
+ metadata: dict[str, str] = FieldUnset,
706
628
  **extra_body: Any,
707
629
  ) -> AsyncGenerator[RetabParsedChatCompletion, None]:
708
630
  """
709
- Extract structured data from one or more documents asynchronously with streaming.
631
+ Extract structured data from a document asynchronously with streaming.
710
632
 
711
633
  Args:
712
634
  json_schema: JSON schema defining the expected data structure.
713
635
  model: The AI model to use.
714
- document: Single document to process (use either this or documents, not both)
715
- documents: List of documents to process (use either this or document, not both)
636
+ document: Document to process (file path, URL, or file-like object)
716
637
  image_resolution_dpi: Optional image resolution DPI.
717
- browser_canvas: Optional browser canvas size.
718
638
  temperature: Model temperature setting (0-1).
719
639
  reasoning_effort: The effort level for the model to reason about the input data.
720
640
  n_consensus: Number of consensus extractions to perform (default: 1 which computes a single extraction and the likelihoods comes from the model logprobs)
721
- idempotency_key: Idempotency key for request
722
641
  store: Whether to store the document in the Retab database
642
+ metadata: User-defined metadata to associate with this extraction
723
643
  Returns:
724
644
  AsyncGenerator[RetabParsedChatCompletion, None]: Stream of parsed responses.
725
645
  Raises:
726
- ValueError: If neither document nor documents is provided, or if both are provided
646
+ ValueError: If document is not provided
727
647
 
728
648
  Usage:
729
649
  ```python
730
- # Single document
731
650
  async with retab.documents.extract_stream(json_schema, model, document=document) as stream:
732
651
  async for response in stream:
733
652
  print(response)
734
-
735
- # Multiple documents
736
- async with retab.documents.extract_stream(json_schema, model, documents=[doc1, doc2]) as stream:
737
- async for response in stream:
738
- print(response)
739
653
  ```
740
654
  """
741
655
  request = self._prepare_extract(
742
656
  json_schema=json_schema,
743
657
  document=document,
744
- documents=documents,
745
658
  image_resolution_dpi=image_resolution_dpi,
746
- browser_canvas=browser_canvas,
747
659
  model=model,
748
660
  temperature=temperature,
749
661
  reasoning_effort=reasoning_effort,
750
662
  stream=True,
751
663
  n_consensus=n_consensus,
752
664
  store=store,
753
- idempotency_key=idempotency_key,
665
+ metadata=metadata,
754
666
  **extra_body,
755
667
  )
756
668
  schema = load_json_schema(json_schema)
@@ -800,8 +712,6 @@ class AsyncDocuments(AsyncAPIResource, BaseDocumentsMixin):
800
712
  model: str,
801
713
  table_parsing_format: TableParsingFormat = FieldUnset,
802
714
  image_resolution_dpi: int = FieldUnset,
803
- browser_canvas: BrowserCanvas = FieldUnset,
804
- idempotency_key: str | None = None,
805
715
  **extra_body: Any,
806
716
  ) -> ParseResult:
807
717
  """
@@ -815,8 +725,6 @@ class AsyncDocuments(AsyncAPIResource, BaseDocumentsMixin):
815
725
  model: The AI model to use for document parsing.
816
726
  table_parsing_format: Format for parsing tables. Options: "html", "json", "yaml", "markdown". Defaults to "html".
817
727
  image_resolution_dpi: DPI for image processing. Defaults to 96.
818
- browser_canvas: Canvas size for document rendering. Defaults to "A4".
819
- idempotency_key: Optional idempotency key for the request.
820
728
 
821
729
  Returns:
822
730
  ParseResult: Parsed response containing document metadata, usage information, and page text content.
@@ -829,8 +737,6 @@ class AsyncDocuments(AsyncAPIResource, BaseDocumentsMixin):
829
737
  model=model,
830
738
  table_parsing_format=table_parsing_format,
831
739
  image_resolution_dpi=image_resolution_dpi,
832
- browser_canvas=browser_canvas,
833
- idempotency_key=idempotency_key,
834
740
  **extra_body,
835
741
  )
836
742
  response = await self._client._prepared_request(request)
@@ -0,0 +1,3 @@
1
+ from .client import AsyncExtractions, Extractions
2
+
3
+ __all__ = ["Extractions", "AsyncExtractions"]