retab 0.0.40__py3-none-any.whl → 0.0.42__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (53) hide show
  1. retab/client.py +5 -5
  2. retab/resources/consensus/completions.py +1 -1
  3. retab/resources/consensus/completions_stream.py +5 -5
  4. retab/resources/consensus/responses.py +1 -1
  5. retab/resources/consensus/responses_stream.py +2 -2
  6. retab/resources/documents/client.py +12 -11
  7. retab/resources/documents/extractions.py +4 -4
  8. retab/resources/evals.py +1 -1
  9. retab/resources/evaluations/documents.py +1 -1
  10. retab/resources/jsonlUtils.py +4 -4
  11. retab/resources/processors/automations/endpoints.py +9 -5
  12. retab/resources/processors/automations/links.py +2 -2
  13. retab/resources/processors/automations/logs.py +2 -2
  14. retab/resources/processors/automations/mailboxes.py +43 -32
  15. retab/resources/processors/automations/outlook.py +25 -7
  16. retab/resources/processors/automations/tests.py +8 -2
  17. retab/resources/processors/client.py +25 -16
  18. retab/resources/prompt_optimization.py +1 -1
  19. retab/resources/schemas.py +3 -3
  20. retab/types/automations/mailboxes.py +1 -1
  21. retab/types/completions.py +1 -1
  22. retab/types/documents/create_messages.py +4 -4
  23. retab/types/documents/extractions.py +3 -3
  24. retab/types/documents/parse.py +3 -1
  25. retab/types/evals.py +2 -2
  26. retab/types/evaluations/iterations.py +2 -2
  27. retab/types/evaluations/model.py +2 -2
  28. retab/types/extractions.py +34 -9
  29. retab/types/jobs/prompt_optimization.py +1 -1
  30. retab/types/logs.py +3 -3
  31. retab/types/schemas/object.py +4 -4
  32. retab/types/schemas/templates.py +1 -1
  33. retab/utils/__init__.py +0 -0
  34. retab/utils/_model_cards/anthropic.yaml +59 -0
  35. retab/utils/_model_cards/auto.yaml +43 -0
  36. retab/utils/_model_cards/gemini.yaml +117 -0
  37. retab/utils/_model_cards/openai.yaml +301 -0
  38. retab/utils/_model_cards/xai.yaml +28 -0
  39. retab/utils/ai_models.py +138 -0
  40. retab/utils/benchmarking.py +484 -0
  41. retab/utils/chat.py +327 -0
  42. retab/utils/display.py +440 -0
  43. retab/utils/json_schema.py +2156 -0
  44. retab/utils/mime.py +165 -0
  45. retab/utils/responses.py +169 -0
  46. retab/utils/stream_context_managers.py +52 -0
  47. retab/utils/usage/__init__.py +0 -0
  48. retab/utils/usage/usage.py +301 -0
  49. retab-0.0.42.dist-info/METADATA +119 -0
  50. {retab-0.0.40.dist-info → retab-0.0.42.dist-info}/RECORD +52 -36
  51. retab-0.0.40.dist-info/METADATA +0 -418
  52. {retab-0.0.40.dist-info → retab-0.0.42.dist-info}/WHEEL +0 -0
  53. {retab-0.0.40.dist-info → retab-0.0.42.dist-info}/top_level.txt +0 -0
@@ -3,7 +3,13 @@ from typing import Any, Literal, List
3
3
  from pydantic_core import PydanticUndefined
4
4
 
5
5
  from ...._resource import AsyncAPIResource, SyncAPIResource
6
- from ....types.automations.outlook import FetchParams, ListOutlooks, MatchParams, Outlook, UpdateOutlookRequest
6
+ from ....types.automations.outlook import (
7
+ FetchParams,
8
+ ListOutlooks,
9
+ MatchParams,
10
+ Outlook,
11
+ UpdateOutlookRequest,
12
+ )
7
13
  from ....types.standards import PreparedRequest
8
14
 
9
15
 
@@ -38,10 +44,15 @@ class OutlooksMixin:
38
44
  match_params=match_params,
39
45
  fetch_params=fetch_params,
40
46
  )
41
- return PreparedRequest(method="POST", url=self.outlooks_base_url, data=outlook_data.model_dump(mode="json"))
47
+ return PreparedRequest(
48
+ method="POST",
49
+ url=self.outlooks_base_url,
50
+ data=outlook_data.model_dump(mode="json"),
51
+ )
42
52
 
43
53
  def prepare_list(
44
54
  self,
55
+ processor_id: str,
45
56
  before: str | None = None,
46
57
  after: str | None = None,
47
58
  limit: int = 10,
@@ -50,6 +61,7 @@ class OutlooksMixin:
50
61
  webhook_url: str | None = None,
51
62
  ) -> PreparedRequest:
52
63
  params = {
64
+ "processor_id": processor_id,
53
65
  "before": before,
54
66
  "after": after,
55
67
  "limit": limit,
@@ -92,7 +104,11 @@ class OutlooksMixin:
92
104
  fetch_params=fetch_params,
93
105
  )
94
106
 
95
- return PreparedRequest(method="PUT", url=f"{self.outlooks_base_url}/{outlook_id}", data=update_outlook_request.model_dump(mode="json"))
107
+ return PreparedRequest(
108
+ method="PUT",
109
+ url=f"{self.outlooks_base_url}/{outlook_id}",
110
+ data=update_outlook_request.model_dump(mode="json"),
111
+ )
96
112
 
97
113
  def prepare_delete(self, outlook_id: str) -> PreparedRequest:
98
114
  return PreparedRequest(method="DELETE", url=f"{self.outlooks_base_url}/{outlook_id}")
@@ -149,12 +165,13 @@ class Outlooks(SyncAPIResource, OutlooksMixin):
149
165
  )
150
166
  response = self._client._prepared_request(request)
151
167
 
152
- print(f"Outlook plugin created. Url: https://www.retab.dev/dashboard/processors/automations/{response['id']}")
168
+ print(f"Outlook plugin created. Url: https://www.retab.com/dashboard/processors/automations/{response['id']}")
153
169
 
154
170
  return Outlook.model_validate(response)
155
171
 
156
172
  def list(
157
173
  self,
174
+ processor_id: str,
158
175
  before: str | None = None,
159
176
  after: str | None = None,
160
177
  limit: int = 10,
@@ -174,7 +191,7 @@ class Outlooks(SyncAPIResource, OutlooksMixin):
174
191
  Returns:
175
192
  List[Outlook]: List of outlook plugin configurations
176
193
  """
177
- request = self.prepare_list(before, after, limit, order, name, webhook_url)
194
+ request = self.prepare_list(processor_id, before, after, limit, order, name, webhook_url)
178
195
  response = self._client._prepared_request(request)
179
196
  return ListOutlooks.model_validate(response)
180
197
 
@@ -280,11 +297,12 @@ class AsyncOutlooks(AsyncAPIResource, OutlooksMixin):
280
297
  fetch_params=fetch_params,
281
298
  )
282
299
  response = await self._client._prepared_request(request)
283
- print(f"Outlook plugin created. Url: https://www.retab.dev/dashboard/processors/automations/{response['id']}")
300
+ print(f"Outlook plugin created. Url: https://www.retab.com/dashboard/processors/automations/{response['id']}")
284
301
  return Outlook.model_validate(response)
285
302
 
286
303
  async def list(
287
304
  self,
305
+ processor_id: str,
288
306
  before: str | None = None,
289
307
  after: str | None = None,
290
308
  limit: int = 10,
@@ -292,7 +310,7 @@ class AsyncOutlooks(AsyncAPIResource, OutlooksMixin):
292
310
  name: str | None = None,
293
311
  webhook_url: str | None = None,
294
312
  ) -> ListOutlooks:
295
- request = self.prepare_list(before, after, limit, order, name, webhook_url)
313
+ request = self.prepare_list(processor_id, before, after, limit, order, name, webhook_url)
296
314
  response = await self._client._prepared_request(request)
297
315
  return ListOutlooks.model_validate(response)
298
316
 
@@ -1,4 +1,5 @@
1
1
  import json
2
+ import base64
2
3
  from io import IOBase
3
4
  from pathlib import Path
4
5
 
@@ -6,7 +7,7 @@ from PIL.Image import Image
6
7
  from pydantic import HttpUrl
7
8
 
8
9
  from ...._resource import AsyncAPIResource, SyncAPIResource
9
- from ...._utils.mime import prepare_mime_document
10
+ from ....utils.mime import prepare_mime_document
10
11
  from ....types.logs import AutomationLog
11
12
  from ....types.mime import MIMEData
12
13
  from ....types.standards import PreparedRequest
@@ -15,7 +16,12 @@ from ....types.standards import PreparedRequest
15
16
  class TestsMixin:
16
17
  def prepare_upload(self, automation_id: str, document: Path | str | IOBase | HttpUrl | Image | MIMEData) -> PreparedRequest:
17
18
  mime_document = prepare_mime_document(document)
18
- return PreparedRequest(method="POST", url=f"/v1/processors/automations/tests/upload/{automation_id}", data={"document": mime_document.model_dump(mode="json")})
19
+
20
+ # Convert MIME document to file upload format (similar to processors client)
21
+ files = {"file": (mime_document.filename, base64.b64decode(mime_document.content), mime_document.mime_type)}
22
+
23
+ # Send as multipart form data with file upload
24
+ return PreparedRequest(method="POST", url=f"/v1/processors/automations/tests/upload/{automation_id}", files=files)
19
25
 
20
26
  def prepare_webhook(self, automation_id: str) -> PreparedRequest:
21
27
  return PreparedRequest(method="POST", url=f"/v1/processors/automations/tests/webhook/{automation_id}", data=None)
@@ -1,7 +1,7 @@
1
1
  import base64
2
2
  from io import IOBase
3
3
  from pathlib import Path
4
- from typing import Any, Dict, List, Literal
4
+ from typing import Any, List, Literal
5
5
 
6
6
  import PIL.Image
7
7
  from openai.types.chat.chat_completion_reasoning_effort import ChatCompletionReasoningEffort
@@ -9,8 +9,9 @@ from pydantic import BaseModel, HttpUrl
9
9
  from pydantic_core import PydanticUndefined
10
10
 
11
11
  from ..._resource import AsyncAPIResource, SyncAPIResource
12
- from ..._utils.ai_models import assert_valid_model_extraction
13
- from ..._utils.mime import MIMEData, prepare_mime_document
12
+ from ...utils.ai_models import assert_valid_model_extraction
13
+ from ...utils.json_schema import load_json_schema
14
+ from ...utils.mime import MIMEData, prepare_mime_document
14
15
  from ...types.browser_canvas import BrowserCanvas
15
16
  from ...types.documents.extractions import RetabParsedChatCompletion
16
17
  from ...types.logs import ProcessorConfig, UpdateProcessorRequest
@@ -31,7 +32,7 @@ class ProcessorsMixin:
31
32
  def prepare_create(
32
33
  self,
33
34
  name: str,
34
- json_schema: dict[str, Any],
35
+ json_schema: dict[str, Any] | Path | str,
35
36
  modality: Modality = "native",
36
37
  model: str = "gpt-4o-mini",
37
38
  temperature: float = PydanticUndefined, # type: ignore[assignment]
@@ -42,9 +43,12 @@ class ProcessorsMixin:
42
43
  ) -> PreparedRequest:
43
44
  assert_valid_model_extraction(model)
44
45
 
46
+ # Load the JSON schema from file path, string, or dict
47
+ loaded_schema = load_json_schema(json_schema)
48
+
45
49
  processor_config = ProcessorConfig(
46
50
  name=name,
47
- json_schema=json_schema,
51
+ json_schema=loaded_schema,
48
52
  modality=modality,
49
53
  model=model,
50
54
  temperature=temperature,
@@ -104,7 +108,7 @@ class ProcessorsMixin:
104
108
  image_resolution_dpi: int | None = None,
105
109
  browser_canvas: BrowserCanvas | None = None,
106
110
  model: str | None = None,
107
- json_schema: dict[str, Any] | None = None,
111
+ json_schema: dict[str, Any] | Path | str | None = None,
108
112
  temperature: float | None = None,
109
113
  reasoning_effort: ChatCompletionReasoningEffort | None = None,
110
114
  n_consensus: int | None = None,
@@ -112,13 +116,18 @@ class ProcessorsMixin:
112
116
  if model is not None:
113
117
  assert_valid_model_extraction(model)
114
118
 
119
+ # Load the JSON schema from file path, string, or dict if provided
120
+ loaded_schema = None
121
+ if json_schema is not None:
122
+ loaded_schema = load_json_schema(json_schema)
123
+
115
124
  update_request = UpdateProcessorRequest(
116
125
  name=name,
117
126
  modality=modality,
118
127
  image_resolution_dpi=image_resolution_dpi,
119
128
  browser_canvas=browser_canvas,
120
129
  model=model,
121
- json_schema=json_schema,
130
+ json_schema=loaded_schema,
122
131
  temperature=temperature,
123
132
  reasoning_effort=reasoning_effort,
124
133
  n_consensus=n_consensus,
@@ -203,7 +212,7 @@ class Processors(SyncAPIResource, ProcessorsMixin):
203
212
  def create(
204
213
  self,
205
214
  name: str,
206
- json_schema: Dict[str, Any],
215
+ json_schema: dict[str, Any] | Path | str,
207
216
  modality: Modality = "native",
208
217
  model: str = "gpt-4o-mini",
209
218
  temperature: float = PydanticUndefined, # type: ignore[assignment]
@@ -216,7 +225,7 @@ class Processors(SyncAPIResource, ProcessorsMixin):
216
225
 
217
226
  Args:
218
227
  name: Name of the processor
219
- json_schema: JSON schema for the processor
228
+ json_schema: JSON schema for the processor. Can be a dictionary, file path (Path or str), or JSON string.
220
229
  image_resolution_dpi: Optional image resolution DPI
221
230
  browser_canvas: Optional browser canvas size
222
231
  modality: Processing modality (currently only "native" supported)
@@ -239,7 +248,7 @@ class Processors(SyncAPIResource, ProcessorsMixin):
239
248
  n_consensus=n_consensus,
240
249
  )
241
250
  response = self._client._prepared_request(request)
242
- print(f"Processor ID: {response['id']}. Processor available at https://www.retab.dev/dashboard/processors/{response['id']}")
251
+ print(f"Processor ID: {response['id']}. Processor available at https://www.retab.com/dashboard/processors/{response['id']}")
243
252
  return ProcessorConfig.model_validate(response)
244
253
 
245
254
  def list(
@@ -295,7 +304,7 @@ class Processors(SyncAPIResource, ProcessorsMixin):
295
304
  image_resolution_dpi: int | None = None,
296
305
  browser_canvas: BrowserCanvas | None = None,
297
306
  model: str | None = None,
298
- json_schema: dict[str, Any] | None = None,
307
+ json_schema: dict[str, Any] | Path | str | None = None,
299
308
  temperature: float | None = None,
300
309
  reasoning_effort: ChatCompletionReasoningEffort | None = None,
301
310
  n_consensus: int | None = None,
@@ -309,7 +318,7 @@ class Processors(SyncAPIResource, ProcessorsMixin):
309
318
  image_resolution_dpi: New image resolution DPI
310
319
  browser_canvas: New browser canvas size
311
320
  model: New AI model
312
- json_schema: New JSON schema for the processor
321
+ json_schema: New JSON schema for the processor. Can be a dictionary, file path (Path or str), or JSON string.
313
322
  temperature: New temperature setting
314
323
  reasoning_effort: The effort level for the model to reason about the input data.
315
324
  n_consensus: New number of consensus required
@@ -378,7 +387,7 @@ class AsyncProcessors(AsyncAPIResource, ProcessorsMixin):
378
387
  async def create(
379
388
  self,
380
389
  name: str,
381
- json_schema: Dict[str, Any],
390
+ json_schema: dict[str, Any] | Path | str,
382
391
  modality: Modality = "native",
383
392
  model: str = "gpt-4o-mini",
384
393
  temperature: float = PydanticUndefined, # type: ignore[assignment]
@@ -399,7 +408,7 @@ class AsyncProcessors(AsyncAPIResource, ProcessorsMixin):
399
408
  n_consensus=n_consensus,
400
409
  )
401
410
  response = await self._client._prepared_request(request)
402
- print(f"Processor ID: {response['id']}. Processor available at https://www.retab.dev/dashboard/processors/{response['id']}")
411
+ print(f"Processor ID: {response['id']}. Processor available at https://www.retab.com/dashboard/processors/{response['id']}")
403
412
 
404
413
  return ProcessorConfig.model_validate(response)
405
414
 
@@ -432,7 +441,7 @@ class AsyncProcessors(AsyncAPIResource, ProcessorsMixin):
432
441
  image_resolution_dpi: int | None = None,
433
442
  browser_canvas: BrowserCanvas | None = None,
434
443
  model: str | None = None,
435
- json_schema: dict[str, Any] | None = None,
444
+ json_schema: dict[str, Any] | Path | str | None = None,
436
445
  temperature: float | None = None,
437
446
  reasoning_effort: ChatCompletionReasoningEffort | None = None,
438
447
  n_consensus: int | None = None,
@@ -446,7 +455,7 @@ class AsyncProcessors(AsyncAPIResource, ProcessorsMixin):
446
455
  image_resolution_dpi: New image resolution DPI
447
456
  browser_canvas: New browser canvas size
448
457
  model: New AI model
449
- json_schema: New JSON schema for the processor
458
+ json_schema: New JSON schema for the processor. Can be a dictionary, file path (Path or str), or JSON string.
450
459
  temperature: New temperature setting
451
460
  reasoning_effort: The effort level for the model to reason about the input data.
452
461
  n_consensus: New number of consensus required
@@ -3,7 +3,7 @@
3
3
  # import json
4
4
 
5
5
  # from .._resource import SyncAPIResource, AsyncAPIResource
6
- # from .._utils.json_schema import load_json_schema
6
+ # from ..utils.json_schema import load_json_schema
7
7
  # from ..types.jobs import JobResponse
8
8
  # from ..types.jobs.prompt_optimization import PromptOptimizationObject, PromptOptimizationProps, PromptOptimizationJobInputData, PromptOptimizationJob
9
9
 
@@ -7,9 +7,9 @@ from openai.types.chat.chat_completion_reasoning_effort import ChatCompletionRea
7
7
  from pydantic import BaseModel
8
8
 
9
9
  from .._resource import AsyncAPIResource, SyncAPIResource
10
- from .._utils.ai_models import assert_valid_model_schema_generation
11
- from .._utils.json_schema import load_json_schema
12
- from .._utils.mime import prepare_mime_document_list
10
+ from ..utils.ai_models import assert_valid_model_schema_generation
11
+ from ..utils.json_schema import load_json_schema
12
+ from ..utils.mime import prepare_mime_document_list
13
13
  from ..types.mime import MIMEData
14
14
  from ..types.modalities import Modality
15
15
  from ..types.schemas.enhance import EnhanceSchemaConfig, EnhanceSchemaConfigDict, EnhanceSchemaRequest
@@ -17,7 +17,7 @@ class Mailbox(AutomationConfig):
17
17
  def object(self) -> str:
18
18
  return "automation.mailbox"
19
19
 
20
- EMAIL_PATTERN: ClassVar[str] = f".*@{os.getenv('EMAIL_DOMAIN', 'mailbox.retab.dev')}$"
20
+ EMAIL_PATTERN: ClassVar[str] = f".*@{os.getenv('EMAIL_DOMAIN', 'mailbox.retab.com')}$"
21
21
  id: str = Field(default_factory=lambda: "mb_" + nanoid.generate(), description="Unique identifier for the mailbox")
22
22
 
23
23
  # Email Specific config
@@ -7,7 +7,7 @@ from openai.types.shared_params.reasoning import Reasoning
7
7
  from openai.types.shared_params.response_format_json_schema import ResponseFormatJSONSchema
8
8
  from pydantic import BaseModel, ConfigDict, Field
9
9
 
10
- from .._utils.ai_models import get_provider_for_model
10
+ from ..utils.ai_models import get_provider_for_model
11
11
  from .ai_models import AIProvider
12
12
  from .chat import ChatCompletionRetabMessage
13
13
 
@@ -10,10 +10,10 @@ from openai.types.chat.chat_completion_message_param import ChatCompletionMessag
10
10
  from openai.types.responses.response_input_param import ResponseInputItemParam
11
11
  from pydantic import BaseModel, Field, computed_field
12
12
 
13
- from ..._utils.chat import convert_to_anthropic_format, convert_to_google_genai_format, str_messages
14
- from ..._utils.chat import convert_to_openai_format as convert_to_openai_completions_api_format
15
- from ..._utils.display import count_image_tokens, count_text_tokens
16
- from ..._utils.responses import convert_to_openai_format as convert_to_openai_responses_api_format
13
+ from ...utils.chat import convert_to_anthropic_format, convert_to_google_genai_format, str_messages
14
+ from ...utils.chat import convert_to_openai_format as convert_to_openai_completions_api_format
15
+ from ...utils.display import count_image_tokens, count_text_tokens
16
+ from ...utils.responses import convert_to_openai_format as convert_to_openai_responses_api_format
17
17
  from ..chat import ChatCompletionRetabMessage
18
18
  from ..mime import MIMEData
19
19
  from ..modalities import Modality
@@ -15,7 +15,7 @@ from openai.types.responses.response import Response
15
15
  from openai.types.responses.response_input_param import ResponseInputItemParam
16
16
  from pydantic import BaseModel, ConfigDict, Field, ValidationInfo, computed_field, field_validator, model_validator
17
17
 
18
- from ..._utils.usage.usage import CostBreakdown, compute_cost_from_model, compute_cost_from_model_with_breakdown
18
+ from ...utils.usage.usage import CostBreakdown, compute_cost_from_model, compute_cost_from_model_with_breakdown
19
19
  from ..ai_models import Amount
20
20
  from ..chat import ChatCompletionRetabMessage
21
21
  from ..mime import MIMEData
@@ -91,7 +91,7 @@ class FieldLocation(BaseModel):
91
91
  quote: str = Field(..., description="The quote of the field (verbatim from the document)")
92
92
  file_id: str | None = Field(default=None, description="The ID of the file")
93
93
  page: int | None = Field(default=None, description="The page number of the field (1-indexed)")
94
- bboxes_normalized: list[tuple[float, float, float, float]] | None = Field(default=None, description="The normalized bounding boxes of the field")
94
+ bbox_normalized: tuple[float, float, float, float] | None = Field(default=None, description="The normalized bounding box of the field")
95
95
  score: float | None = Field(default=None, description="The score of the field")
96
96
  match_level: Literal["token", "line", "block"] | None = Field(default=None, description="The level of the match (token, line, block)")
97
97
 
@@ -99,7 +99,7 @@ class FieldLocation(BaseModel):
99
99
  class RetabParsedChoice(ParsedChoice):
100
100
  # Adaptable ParsedChoice that allows None for the finish_reason
101
101
  finish_reason: Literal["stop", "length", "tool_calls", "content_filter", "function_call"] | None = None # type: ignore
102
- field_locations: dict[str, list[FieldLocation]] | None = Field(default=None, description="The locations of the fields in the document, if available")
102
+ field_locations: dict[str, FieldLocation] | None = Field(default=None, description="The locations of the fields in the document, if available")
103
103
  key_mapping: dict[str, Optional[str]] | None = Field(default=None, description="Mapping of consensus keys to original model keys")
104
104
 
105
105
 
@@ -3,6 +3,7 @@ from pydantic import BaseModel, Field
3
3
 
4
4
  from ..mime import MIMEData, BaseMIMEData
5
5
  from ..browser_canvas import BrowserCanvas
6
+ from ..ai_models import LLMModel
6
7
 
7
8
  TableParsingFormat = Literal["markdown", "yaml", "html", "json"]
8
9
 
@@ -18,7 +19,7 @@ class ParseRequest(BaseModel):
18
19
  """Request model for document parsing."""
19
20
 
20
21
  document: MIMEData = Field(..., description="Document to parse")
21
- fast_mode: bool = Field(default=False, description="Use fast mode for parsing (may reduce quality)")
22
+ model: LLMModel = Field(default="gemini-2.5-flash", description="Model to use for parsing")
22
23
  table_parsing_format: TableParsingFormat = Field(default="html", description="Format for parsing tables")
23
24
  image_resolution_dpi: int = Field(default=72, description="DPI for image processing")
24
25
  browser_canvas: BrowserCanvas = Field(default="A4", description="Canvas size for document rendering")
@@ -30,3 +31,4 @@ class ParseResult(BaseModel):
30
31
  document: BaseMIMEData = Field(..., description="Processed document metadata")
31
32
  usage: RetabUsage = Field(..., description="Processing usage information")
32
33
  pages: list[str] = Field(..., description="Text content of each page")
34
+ text: str = Field(..., description="Text content of the document")
retab/types/evals.py CHANGED
@@ -6,8 +6,8 @@ from typing import Any, List, Literal, Optional
6
6
  import nanoid # type: ignore
7
7
  from pydantic import BaseModel, Field, computed_field
8
8
 
9
- from .._utils.json_schema import clean_schema, compute_schema_data_id
10
- from .._utils.mime import generate_blake2b_hash_from_string
9
+ from ..utils.json_schema import clean_schema, compute_schema_data_id
10
+ from ..utils.mime import generate_blake2b_hash_from_string
11
11
  from .ai_models import Amount
12
12
  from .inference_settings import InferenceSettings
13
13
  from .mime import MIMEData
@@ -6,8 +6,8 @@ from typing import Any, Optional, Self
6
6
  import nanoid # type: ignore
7
7
  from pydantic import BaseModel, Field, computed_field, model_validator
8
8
 
9
- from ..._utils.json_schema import clean_schema
10
- from ..._utils.mime import generate_blake2b_hash_from_string
9
+ from ...utils.json_schema import clean_schema
10
+ from ...utils.mime import generate_blake2b_hash_from_string
11
11
  from ..inference_settings import InferenceSettings
12
12
  from ..metrics import MetricResult
13
13
  from ..predictions import PredictionData
@@ -5,8 +5,8 @@ from typing import Any, Optional
5
5
  import nanoid # type: ignore
6
6
  from pydantic import BaseModel, Field, computed_field
7
7
 
8
- from ..._utils.json_schema import compute_schema_data_id
9
- from ..._utils.mime import generate_blake2b_hash_from_string
8
+ from ...utils.json_schema import compute_schema_data_id
9
+ from ...utils.mime import generate_blake2b_hash_from_string
10
10
  from ..inference_settings import InferenceSettings
11
11
  from .documents import EvaluationDocument
12
12
  from .iterations import Iteration
@@ -3,13 +3,19 @@ from typing import Any, Literal, Optional
3
3
 
4
4
  import nanoid # type: ignore
5
5
  from openai.types.chat import ChatCompletion
6
- from openai.types.chat.chat_completion_reasoning_effort import ChatCompletionReasoningEffort
6
+ from openai.types.chat.chat_completion_reasoning_effort import (
7
+ ChatCompletionReasoningEffort,
8
+ )
7
9
  from pydantic import BaseModel, Field, computed_field, model_validator
8
10
 
9
11
  from retab.types.chat import ChatCompletionRetabMessage
10
12
  from retab.types.documents.extractions import RetabParsedChatCompletion
11
13
 
12
- from .._utils.usage.usage import CostBreakdown, compute_cost_from_model, compute_cost_from_model_with_breakdown
14
+ from ..utils.usage.usage import (
15
+ CostBreakdown,
16
+ compute_cost_from_model,
17
+ compute_cost_from_model_with_breakdown,
18
+ )
13
19
  from .ai_models import Amount
14
20
  from .modalities import Modality
15
21
 
@@ -17,9 +23,18 @@ ValidationsState = Literal["pending", "validated", "invalid"]
17
23
 
18
24
 
19
25
  class ExtractionSource(BaseModel):
20
- type: Literal["api", "annotation", "processor", "automation.link", "automation.mailbox", "automation.cron", "automation.outlook", "automation.endpoint", "schema.extract"] = (
21
- Field(description="Type of extraction")
22
- )
26
+ type: Literal[
27
+ "api",
28
+ "annotation",
29
+ "processor",
30
+ "automation",
31
+ "automation.link",
32
+ "automation.mailbox",
33
+ "automation.cron",
34
+ "automation.outlook",
35
+ "automation.endpoint",
36
+ "schema.extract",
37
+ ] = Field(description="Type of extraction")
23
38
  id: str | None = Field(default=None, description="ID the trigger of the extraction")
24
39
 
25
40
 
@@ -34,7 +49,10 @@ class ExtractionTimingStep(BaseModel):
34
49
 
35
50
 
36
51
  class Extraction(BaseModel):
37
- id: str = Field(default_factory=lambda: "extr_" + nanoid.generate(), description="Unique identifier of the analysis")
52
+ id: str = Field(
53
+ default_factory=lambda: "extr_" + nanoid.generate(),
54
+ description="Unique identifier of the analysis",
55
+ )
38
56
  messages: list[ChatCompletionRetabMessage] = Field(default_factory=list)
39
57
  messages_gcs: str = Field(..., description="GCS path to the messages")
40
58
  file_gcs_paths: list[str] = Field(..., description="GCS paths to the files")
@@ -51,16 +69,23 @@ class Extraction(BaseModel):
51
69
  source: ExtractionSource = Field(..., description="Source of the extraction")
52
70
  image_resolution_dpi: int = Field(default=96, description="Resolution of the image sent to the LLM")
53
71
  browser_canvas: BrowserCanvas = Field(
54
- default="A4", description="Sets the size of the browser canvas for rendering documents in browser-based processing. Choose a size that matches the document type."
72
+ default="A4",
73
+ description="Sets the size of the browser canvas for rendering documents in browser-based processing. Choose a size that matches the document type.",
55
74
  )
56
75
  modality: Modality = Field(default="native", description="Modality of the extraction")
57
- reasoning_effort: Optional[ChatCompletionReasoningEffort] = Field(default=None, description="The effort level for the model to reason about the input data.")
76
+ reasoning_effort: Optional[ChatCompletionReasoningEffort] = Field(
77
+ default=None,
78
+ description="The effort level for the model to reason about the input data.",
79
+ )
58
80
  timings: list[ExtractionTimingStep] = Field(default_factory=list, description="Timings of the extraction")
59
81
 
60
82
  # Infered from the schema
61
83
  schema_id: str = Field(..., description="Version of the schema used for the analysis")
62
84
  schema_data_id: str = Field(..., description="Version of the schema data used for the analysis")
63
- created_at: datetime.datetime = Field(default_factory=lambda: datetime.datetime.now(datetime.timezone.utc), description="Timestamp of the creation of the extraction object")
85
+ created_at: datetime.datetime = Field(
86
+ default_factory=lambda: datetime.datetime.now(datetime.timezone.utc),
87
+ description="Timestamp of the creation of the extraction object",
88
+ )
64
89
  request_at: datetime.datetime | None = Field(default=None, description="Timestamp of the extraction request if provided.")
65
90
  organization_id: str = Field(..., description="Organization ID of the user or application")
66
91
  validation_state: Optional[ValidationsState] = Field(default=None, description="Validation state of the extraction")
@@ -1,7 +1,7 @@
1
1
  # from typing import Literal, Any
2
2
  # from pydantic import BaseModel, computed_field
3
3
  # from ..mime import MIMEData
4
- # from ..._utils.benchmarking import ExtractionAnalysis
4
+ # from ...utils.benchmarking import ExtractionAnalysis
5
5
 
6
6
  # MAX_CONCURRENCY = 15
7
7
 
retab/types/logs.py CHANGED
@@ -7,9 +7,9 @@ from openai.types.chat.chat_completion import ChatCompletion
7
7
  from openai.types.chat.chat_completion_reasoning_effort import ChatCompletionReasoningEffort
8
8
  from pydantic import BaseModel, EmailStr, Field, HttpUrl, computed_field, field_validator
9
9
 
10
- from .._utils.json_schema import compute_schema_data_id
11
- from .._utils.mime import generate_blake2b_hash_from_string
12
- from .._utils.usage.usage import CostBreakdown, compute_cost_from_model, compute_cost_from_model_with_breakdown
10
+ from ..utils.json_schema import compute_schema_data_id
11
+ from ..utils.mime import generate_blake2b_hash_from_string
12
+ from ..utils.usage.usage import CostBreakdown, compute_cost_from_model, compute_cost_from_model_with_breakdown
13
13
  from .ai_models import Amount
14
14
  from .documents.extractions import RetabParsedChatCompletion
15
15
  from .mime import BaseMIMEData
@@ -10,9 +10,9 @@ from openai.types.chat.chat_completion_message_param import ChatCompletionMessag
10
10
  from openai.types.responses.response_input_param import ResponseInputItemParam
11
11
  from pydantic import BaseModel, Field, PrivateAttr, computed_field, model_validator
12
12
 
13
- from ..._utils.chat import convert_to_anthropic_format, convert_to_google_genai_format
14
- from ..._utils.chat import convert_to_openai_format as convert_to_openai_completions_api_format
15
- from ..._utils.json_schema import (
13
+ from ...utils.chat import convert_to_anthropic_format, convert_to_google_genai_format
14
+ from ...utils.chat import convert_to_openai_format as convert_to_openai_completions_api_format
15
+ from ...utils.json_schema import (
16
16
  convert_basemodel_to_partial_basemodel,
17
17
  convert_json_schema_to_basemodel,
18
18
  create_reasoning_schema,
@@ -25,7 +25,7 @@ from ..._utils.json_schema import (
25
25
  load_json_schema,
26
26
  schema_to_ts_type,
27
27
  )
28
- from ..._utils.responses import convert_to_openai_format as convert_to_openai_responses_api_format
28
+ from ...utils.responses import convert_to_openai_format as convert_to_openai_responses_api_format
29
29
  from ...types.standards import StreamingBaseModel
30
30
  from ..chat import ChatCompletionRetabMessage
31
31
 
@@ -4,7 +4,7 @@ from typing import Any, Literal, Optional
4
4
  import nanoid # type: ignore
5
5
  from pydantic import BaseModel, Field, PrivateAttr, computed_field
6
6
 
7
- from ..._utils.json_schema import generate_schema_data_id, generate_schema_id
7
+ from ...utils.json_schema import generate_schema_data_id, generate_schema_id
8
8
  from ...types.mime import MIMEData
9
9
 
10
10
 
File without changes
@@ -0,0 +1,59 @@
1
+ - model: "claude-3-5-sonnet-latest"
2
+ pricing:
3
+ text:
4
+ prompt: 3.00
5
+ cached_discount: 0.5
6
+ completion: 15.00
7
+ audio: null
8
+ capabilities:
9
+ modalities: ["text", "image"]
10
+ endpoints: ["chat_completions"]
11
+ features: ["streaming", "function_calling"]
12
+ permissions:
13
+ show_in_free_picker: true
14
+ show_in_paid_picker: true
15
+
16
+ - model: "claude-3-5-sonnet-20241022"
17
+ inherits: "claude-3-5-sonnet-latest"
18
+ permissions:
19
+ show_in_free_picker: false
20
+ show_in_paid_picker: false
21
+
22
+ - model: "claude-3-opus-20240229"
23
+ pricing:
24
+ text:
25
+ prompt: 15.00
26
+ cached_discount: 0.5
27
+ completion: 75.00
28
+ audio: null
29
+ capabilities:
30
+ modalities: ["text", "image"]
31
+ endpoints: ["chat_completions"]
32
+ features: ["streaming", "function_calling"]
33
+ permissions:
34
+ show_in_free_picker: true
35
+ show_in_paid_picker: true
36
+
37
+ - model: "claude-3-sonnet-20240229"
38
+ pricing:
39
+ text:
40
+ prompt: 3.00
41
+ cached_discount: 0.5
42
+ completion: 15.00
43
+ audio: null
44
+ capabilities:
45
+ modalities: ["text", "image"]
46
+ endpoints: ["chat_completions"]
47
+ features: ["streaming", "function_calling"]
48
+
49
+ - model: "claude-3-haiku-20240307"
50
+ pricing:
51
+ text:
52
+ prompt: 0.25
53
+ cached_discount: 0.5
54
+ completion: 1.25
55
+ audio: null
56
+ capabilities:
57
+ modalities: ["text", "image"]
58
+ endpoints: ["chat_completions"]
59
+ features: ["streaming", "function_calling"]