retab 0.0.42__tar.gz → 0.0.44__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (131) hide show
  1. {retab-0.0.42 → retab-0.0.44}/PKG-INFO +4 -6
  2. {retab-0.0.42 → retab-0.0.44}/README.md +3 -5
  3. {retab-0.0.42 → retab-0.0.44}/retab/__init__.py +2 -1
  4. {retab-0.0.42 → retab-0.0.44}/retab/client.py +26 -51
  5. retab-0.0.44/retab/generate_types.py +180 -0
  6. {retab-0.0.42 → retab-0.0.44}/retab/resources/consensus/client.py +1 -1
  7. {retab-0.0.42 → retab-0.0.44}/retab/resources/consensus/responses.py +1 -1
  8. retab-0.0.44/retab/resources/deployments/__init__.py +3 -0
  9. retab-0.0.44/retab/resources/deployments/client.py +148 -0
  10. {retab-0.0.42 → retab-0.0.44}/retab/resources/documents/client.py +94 -68
  11. {retab-0.0.42 → retab-0.0.44}/retab/resources/documents/extractions.py +55 -46
  12. {retab-0.0.42 → retab-0.0.44}/retab/resources/jsonlUtils.py +3 -4
  13. {retab-0.0.42 → retab-0.0.44}/retab/resources/processors/automations/endpoints.py +49 -39
  14. {retab-0.0.42 → retab-0.0.44}/retab/resources/processors/automations/links.py +52 -43
  15. {retab-0.0.42 → retab-0.0.44}/retab/resources/processors/automations/mailboxes.py +74 -59
  16. {retab-0.0.42 → retab-0.0.44}/retab/resources/processors/automations/outlook.py +104 -82
  17. {retab-0.0.42 → retab-0.0.44}/retab/resources/processors/client.py +35 -30
  18. retab-0.0.44/retab/resources/projects/__init__.py +3 -0
  19. {retab-0.0.42/retab/resources/evaluations → retab-0.0.44/retab/resources/projects}/client.py +62 -78
  20. {retab-0.0.42/retab/resources/evaluations → retab-0.0.44/retab/resources/projects}/documents.py +48 -37
  21. {retab-0.0.42/retab/resources/evaluations → retab-0.0.44/retab/resources/projects}/iterations.py +58 -40
  22. {retab-0.0.42 → retab-0.0.44}/retab/resources/usage.py +2 -0
  23. {retab-0.0.42 → retab-0.0.44}/retab/types/ai_models.py +2 -1
  24. retab-0.0.42/retab/types/evals.py → retab-0.0.44/retab/types/deprecated_evals.py +14 -26
  25. {retab-0.0.42 → retab-0.0.44}/retab/types/extractions.py +1 -0
  26. {retab-0.0.42 → retab-0.0.44}/retab/types/jobs/base.py +1 -1
  27. {retab-0.0.42 → retab-0.0.44}/retab/types/jobs/evaluation.py +1 -1
  28. {retab-0.0.42 → retab-0.0.44}/retab/types/logs.py +5 -6
  29. {retab-0.0.42 → retab-0.0.44}/retab/types/mime.py +1 -10
  30. {retab-0.0.42/retab/types/evaluations → retab-0.0.44/retab/types/projects}/__init__.py +12 -9
  31. {retab-0.0.42/retab/types/evaluations → retab-0.0.44/retab/types/projects}/documents.py +3 -3
  32. {retab-0.0.42/retab/types/evaluations → retab-0.0.44/retab/types/projects}/iterations.py +9 -43
  33. {retab-0.0.42/retab/types/evaluations → retab-0.0.44/retab/types/projects}/model.py +25 -30
  34. {retab-0.0.42 → retab-0.0.44}/retab/types/schemas/enhance.py +22 -5
  35. {retab-0.0.42 → retab-0.0.44}/retab/types/schemas/evaluate.py +2 -2
  36. {retab-0.0.42 → retab-0.0.44}/retab/types/schemas/object.py +27 -25
  37. {retab-0.0.42 → retab-0.0.44}/retab/types/standards.py +2 -2
  38. retab-0.0.44/retab/utils/__init__.py +3 -0
  39. retab-0.0.44/retab/utils/ai_models.py +253 -0
  40. retab-0.0.44/retab/utils/hashing.py +24 -0
  41. {retab-0.0.42 → retab-0.0.44}/retab/utils/json_schema.py +1 -26
  42. {retab-0.0.42 → retab-0.0.44}/retab/utils/mime.py +0 -17
  43. {retab-0.0.42 → retab-0.0.44}/retab/utils/usage/usage.py +0 -1
  44. {retab-0.0.42 → retab-0.0.44}/retab.egg-info/PKG-INFO +4 -6
  45. {retab-0.0.42 → retab-0.0.44}/retab.egg-info/SOURCES.txt +13 -10
  46. {retab-0.0.42 → retab-0.0.44}/setup.py +1 -1
  47. {retab-0.0.42 → retab-0.0.44}/tests/test_evaluations.py +2 -16
  48. retab-0.0.42/retab/resources/evals.py +0 -825
  49. retab-0.0.42/retab/resources/evaluations/__init__.py +0 -3
  50. retab-0.0.42/retab/utils/ai_models.py +0 -138
  51. retab-0.0.42/retab/utils/usage/__init__.py +0 -0
  52. {retab-0.0.42 → retab-0.0.44}/pyproject.toml +0 -0
  53. {retab-0.0.42 → retab-0.0.44}/retab/_resource.py +0 -0
  54. {retab-0.0.42 → retab-0.0.44}/retab/py.typed +0 -0
  55. {retab-0.0.42 → retab-0.0.44}/retab/resources/__init__.py +0 -0
  56. {retab-0.0.42 → retab-0.0.44}/retab/resources/consensus/__init__.py +0 -0
  57. {retab-0.0.42 → retab-0.0.44}/retab/resources/consensus/completions.py +0 -0
  58. {retab-0.0.42 → retab-0.0.44}/retab/resources/consensus/completions_stream.py +0 -0
  59. {retab-0.0.42 → retab-0.0.44}/retab/resources/consensus/responses_stream.py +0 -0
  60. {retab-0.0.42 → retab-0.0.44}/retab/resources/documents/__init__.py +0 -0
  61. {retab-0.0.42 → retab-0.0.44}/retab/resources/files.py +0 -0
  62. {retab-0.0.42 → retab-0.0.44}/retab/resources/finetuning.py +0 -0
  63. {retab-0.0.42 → retab-0.0.44}/retab/resources/models.py +0 -0
  64. {retab-0.0.42 → retab-0.0.44}/retab/resources/openai_example.py +0 -0
  65. {retab-0.0.42 → retab-0.0.44}/retab/resources/processors/__init__.py +0 -0
  66. {retab-0.0.42 → retab-0.0.44}/retab/resources/processors/automations/__init__.py +0 -0
  67. {retab-0.0.42 → retab-0.0.44}/retab/resources/processors/automations/client.py +0 -0
  68. {retab-0.0.42 → retab-0.0.44}/retab/resources/processors/automations/logs.py +0 -0
  69. {retab-0.0.42 → retab-0.0.44}/retab/resources/processors/automations/tests.py +0 -0
  70. {retab-0.0.42 → retab-0.0.44}/retab/resources/prompt_optimization.py +0 -0
  71. {retab-0.0.42 → retab-0.0.44}/retab/resources/schemas.py +0 -0
  72. {retab-0.0.42 → retab-0.0.44}/retab/resources/secrets/__init__.py +0 -0
  73. {retab-0.0.42 → retab-0.0.44}/retab/resources/secrets/client.py +0 -0
  74. {retab-0.0.42 → retab-0.0.44}/retab/resources/secrets/external_api_keys.py +0 -0
  75. {retab-0.0.42 → retab-0.0.44}/retab/resources/secrets/webhook.py +0 -0
  76. {retab-0.0.42 → retab-0.0.44}/retab/types/__init__.py +0 -0
  77. {retab-0.0.42 → retab-0.0.44}/retab/types/automations/__init__.py +0 -0
  78. {retab-0.0.42 → retab-0.0.44}/retab/types/automations/cron.py +0 -0
  79. {retab-0.0.42 → retab-0.0.44}/retab/types/automations/endpoints.py +0 -0
  80. {retab-0.0.42 → retab-0.0.44}/retab/types/automations/links.py +0 -0
  81. {retab-0.0.42 → retab-0.0.44}/retab/types/automations/mailboxes.py +0 -0
  82. {retab-0.0.42 → retab-0.0.44}/retab/types/automations/outlook.py +0 -0
  83. {retab-0.0.42 → retab-0.0.44}/retab/types/automations/webhooks.py +0 -0
  84. {retab-0.0.42 → retab-0.0.44}/retab/types/browser_canvas.py +0 -0
  85. {retab-0.0.42 → retab-0.0.44}/retab/types/chat.py +0 -0
  86. {retab-0.0.42 → retab-0.0.44}/retab/types/completions.py +0 -0
  87. {retab-0.0.42 → retab-0.0.44}/retab/types/consensus.py +0 -0
  88. {retab-0.0.42 → retab-0.0.44}/retab/types/db/__init__.py +0 -0
  89. {retab-0.0.42 → retab-0.0.44}/retab/types/db/annotations.py +0 -0
  90. {retab-0.0.42 → retab-0.0.44}/retab/types/db/files.py +0 -0
  91. {retab-0.0.42 → retab-0.0.44}/retab/types/documents/__init__.py +0 -0
  92. {retab-0.0.42 → retab-0.0.44}/retab/types/documents/correct_orientation.py +0 -0
  93. {retab-0.0.42 → retab-0.0.44}/retab/types/documents/create_messages.py +0 -0
  94. {retab-0.0.42 → retab-0.0.44}/retab/types/documents/extractions.py +0 -0
  95. {retab-0.0.42 → retab-0.0.44}/retab/types/documents/parse.py +0 -0
  96. {retab-0.0.42 → retab-0.0.44}/retab/types/events.py +0 -0
  97. {retab-0.0.42 → retab-0.0.44}/retab/types/inference_settings.py +0 -0
  98. {retab-0.0.42 → retab-0.0.44}/retab/types/jobs/__init__.py +0 -0
  99. {retab-0.0.42 → retab-0.0.44}/retab/types/jobs/batch_annotation.py +0 -0
  100. {retab-0.0.42 → retab-0.0.44}/retab/types/jobs/finetune.py +0 -0
  101. {retab-0.0.42 → retab-0.0.44}/retab/types/jobs/prompt_optimization.py +0 -0
  102. {retab-0.0.42 → retab-0.0.44}/retab/types/jobs/webcrawl.py +0 -0
  103. {retab-0.0.42 → retab-0.0.44}/retab/types/metrics.py +0 -0
  104. {retab-0.0.42 → retab-0.0.44}/retab/types/modalities.py +0 -0
  105. {retab-0.0.42 → retab-0.0.44}/retab/types/pagination.py +0 -0
  106. {retab-0.0.42 → retab-0.0.44}/retab/types/predictions.py +0 -0
  107. {retab-0.0.42 → retab-0.0.44}/retab/types/schemas/__init__.py +0 -0
  108. {retab-0.0.42 → retab-0.0.44}/retab/types/schemas/generate.py +0 -0
  109. {retab-0.0.42 → retab-0.0.44}/retab/types/schemas/layout.py +0 -0
  110. {retab-0.0.42 → retab-0.0.44}/retab/types/schemas/templates.py +0 -0
  111. {retab-0.0.42 → retab-0.0.44}/retab/types/secrets/__init__.py +0 -0
  112. {retab-0.0.42 → retab-0.0.44}/retab/types/secrets/external_api_keys.py +0 -0
  113. {retab-0.0.42 → retab-0.0.44}/retab/utils/_model_cards/anthropic.yaml +0 -0
  114. {retab-0.0.42 → retab-0.0.44}/retab/utils/_model_cards/auto.yaml +0 -0
  115. {retab-0.0.42 → retab-0.0.44}/retab/utils/_model_cards/gemini.yaml +0 -0
  116. {retab-0.0.42 → retab-0.0.44}/retab/utils/_model_cards/openai.yaml +0 -0
  117. {retab-0.0.42 → retab-0.0.44}/retab/utils/_model_cards/xai.yaml +0 -0
  118. {retab-0.0.42 → retab-0.0.44}/retab/utils/benchmarking.py +0 -0
  119. {retab-0.0.42 → retab-0.0.44}/retab/utils/chat.py +0 -0
  120. {retab-0.0.42 → retab-0.0.44}/retab/utils/display.py +0 -0
  121. {retab-0.0.42 → retab-0.0.44}/retab/utils/responses.py +0 -0
  122. {retab-0.0.42 → retab-0.0.44}/retab/utils/stream_context_managers.py +0 -0
  123. {retab-0.0.42/retab/utils → retab-0.0.44/retab/utils/usage}/__init__.py +0 -0
  124. {retab-0.0.42 → retab-0.0.44}/retab.egg-info/dependency_links.txt +0 -0
  125. {retab-0.0.42 → retab-0.0.44}/retab.egg-info/requires.txt +0 -0
  126. {retab-0.0.42 → retab-0.0.44}/retab.egg-info/top_level.txt +0 -0
  127. {retab-0.0.42 → retab-0.0.44}/setup.cfg +0 -0
  128. {retab-0.0.42 → retab-0.0.44}/tests/test_automations_links.py +0 -0
  129. {retab-0.0.42 → retab-0.0.44}/tests/test_automations_mailboxes.py +0 -0
  130. {retab-0.0.42 → retab-0.0.44}/tests/test_documents_api.py +0 -0
  131. {retab-0.0.42 → retab-0.0.44}/tests/test_preprocessor.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: retab
3
- Version: 0.0.42
3
+ Version: 0.0.44
4
4
  Summary: Retab official python library
5
5
  Home-page: https://github.com/Retab-dev/retab
6
6
  Author: Retab
@@ -61,13 +61,13 @@ Made with love by the team at [Retab](https://retab.com) 🤍.
61
61
 
62
62
  ### What is Retab?
63
63
 
64
- Retab solves all the major challenges in document processing with LLMs:
64
+ Retab solves all the major challenges in document processing with Large Language Models:
65
65
 
66
66
  1. **Universal Document Preprocessing**: Convert any file type (PDFs, Excel, emails, etc.) into LLM-ready format without writing custom parsers
67
67
  2. **Structured, Schema-driven Extraction**: Get consistent, reliable outputs using schema-based prompt engineering
68
68
  3. **Processors**: Publish a live, stable, shareable document processor.
69
69
  4. **Automations**: Create document processing workflows that can be triggered by events (mailbox, upload link, endpoint, outlook plugin).
70
- 5. **Evaluations**: Evaluate the performance of models against annotated datasets
70
+ 5. **Projects**: Evaluate the performance of models against annotated datasets
71
71
  6. **Optimizations**: Identify the most used processors and help you finetune models to reduce costs and improve performance
72
72
 
73
73
  We are offering you all the software-defined primitives to build your own document processing solutions. We see it as **Stripe** for document processing.
@@ -90,7 +90,7 @@ Many people haven't yet realized how powerful LLMs have become at document proce
90
90
 
91
91
  ## Code examples
92
92
 
93
- ## You can check our Github repository to see code examples: [python examples](https://github.com/Retab-dev/retab/tree/main/examples) and [jupyter notebooks](https://github.com/Retab-dev/retab-nodejs/tree/main/notebooks).
93
+ You can check our Github repository to see code examples: [python examples](https://github.com/Retab-dev/retab/tree/main/examples) and [jupyter notebooks](https://github.com/Retab-dev/retab-nodejs/tree/main/notebooks).
94
94
 
95
95
  ## Community
96
96
 
@@ -112,8 +112,6 @@ We share our roadmap publicly on [Github](https://github.com/Retab-dev/retab)
112
112
  Among the features we're working on:
113
113
 
114
114
  * [ ] Node.js SDK
115
- * [ ] Low-level speed optimizations for Evals Frontend
116
115
  * [ ] Schema optimization autopilot
117
116
  * [ ] Sources API
118
- * [ ] Parse API for RAG
119
117
 
@@ -18,13 +18,13 @@ Made with love by the team at [Retab](https://retab.com) 🤍.
18
18
 
19
19
  ### What is Retab?
20
20
 
21
- Retab solves all the major challenges in document processing with LLMs:
21
+ Retab solves all the major challenges in document processing with Large Language Models:
22
22
 
23
23
  1. **Universal Document Preprocessing**: Convert any file type (PDFs, Excel, emails, etc.) into LLM-ready format without writing custom parsers
24
24
  2. **Structured, Schema-driven Extraction**: Get consistent, reliable outputs using schema-based prompt engineering
25
25
  3. **Processors**: Publish a live, stable, shareable document processor.
26
26
  4. **Automations**: Create document processing workflows that can be triggered by events (mailbox, upload link, endpoint, outlook plugin).
27
- 5. **Evaluations**: Evaluate the performance of models against annotated datasets
27
+ 5. **Projects**: Evaluate the performance of models against annotated datasets
28
28
  6. **Optimizations**: Identify the most used processors and help you finetune models to reduce costs and improve performance
29
29
 
30
30
  We are offering you all the software-defined primitives to build your own document processing solutions. We see it as **Stripe** for document processing.
@@ -47,7 +47,7 @@ Many people haven't yet realized how powerful LLMs have become at document proce
47
47
 
48
48
  ## Code examples
49
49
 
50
- ## You can check our Github repository to see code examples: [python examples](https://github.com/Retab-dev/retab/tree/main/examples) and [jupyter notebooks](https://github.com/Retab-dev/retab-nodejs/tree/main/notebooks).
50
+ You can check our Github repository to see code examples: [python examples](https://github.com/Retab-dev/retab/tree/main/examples) and [jupyter notebooks](https://github.com/Retab-dev/retab-nodejs/tree/main/notebooks).
51
51
 
52
52
  ## Community
53
53
 
@@ -69,8 +69,6 @@ We share our roadmap publicly on [Github](https://github.com/Retab-dev/retab)
69
69
  Among the features we're working on:
70
70
 
71
71
  * [ ] Node.js SDK
72
- * [ ] Low-level speed optimizations for Evals Frontend
73
72
  * [ ] Schema optimization autopilot
74
73
  * [ ] Sources API
75
- * [ ] Parse API for RAG
76
74
 
@@ -1,4 +1,5 @@
1
1
  from .client import AsyncRetab, Retab
2
2
  from .types.schemas.object import Schema
3
+ from . import utils
3
4
 
4
- __all__ = ["Retab", "AsyncRetab", "Schema"]
5
+ __all__ = ["Retab", "AsyncRetab", "Schema", "utils"]
@@ -7,10 +7,9 @@ import backoff
7
7
  import backoff.types
8
8
  import httpx
9
9
  import truststore
10
- from pydantic_core import PydanticUndefined
11
10
 
12
- from .resources import consensus, documents, evals, files, finetuning, models, processors, schemas, secrets, usage, evaluations
13
- from .types.standards import PreparedRequest
11
+ from .resources import consensus, deployments, documents, files, finetuning, models, processors, schemas, secrets, usage, projects
12
+ from .types.standards import PreparedRequest, FieldUnset
14
13
 
15
14
 
16
15
  class MaxRetriesExceeded(Exception):
@@ -43,20 +42,15 @@ class BaseRetab:
43
42
  ValueError: If no API key is provided through arguments or environment variables
44
43
  """
45
44
 
46
- # claude_api_key (str, optional): Claude API key. Will look for CLAUDE_API_KEY env variable if not provided
47
- # xai_api_key (str, optional): XAI API key. Will look for XAI_API_KEY env variable if not provided
48
- # gemini_api_key (str, optional): Gemini API key. Will look for GEMINI_API_KEY env variable if not provided
49
-
50
45
  def __init__(
51
46
  self,
52
47
  api_key: Optional[str] = None,
53
48
  base_url: Optional[str] = None,
54
49
  timeout: float = 240.0,
55
50
  max_retries: int = 3,
56
- openai_api_key: Optional[str] = PydanticUndefined, # type: ignore[assignment]
57
- gemini_api_key: Optional[str] = PydanticUndefined, # type: ignore[assignment]
58
- # claude_api_key: Optional[str] = PydanticUndefined, # type: ignore[assignment]
59
- xai_api_key: Optional[str] = PydanticUndefined, # type: ignore[assignment]
51
+ openai_api_key: Optional[str] = FieldUnset,
52
+ gemini_api_key: Optional[str] = FieldUnset,
53
+ xai_api_key: Optional[str] = FieldUnset,
60
54
  ) -> None:
61
55
  if api_key is None:
62
56
  api_key = os.environ.get("RETAB_API_KEY")
@@ -80,30 +74,21 @@ class BaseRetab:
80
74
  "Content-Type": "application/json",
81
75
  }
82
76
 
83
- # Only check environment variables if the value is PydanticUndefined
84
- if openai_api_key is PydanticUndefined:
77
+ # Only check environment variables if the value is FieldUnset
78
+ if openai_api_key is FieldUnset:
85
79
  openai_api_key = os.environ.get("OPENAI_API_KEY")
86
80
 
87
- # if claude_api_key is PydanticUndefined:
88
- # claude_api_key = os.environ.get("CLAUDE_API_KEY")
89
-
90
- # if xai_api_key is PydanticUndefined:
91
- # xai_api_key = os.environ.get("XAI_API_KEY")
92
-
93
- if gemini_api_key is PydanticUndefined:
81
+ if gemini_api_key is FieldUnset:
94
82
  gemini_api_key = os.environ.get("GEMINI_API_KEY")
95
83
 
96
- # Only add headers if the values are actual strings (not None or PydanticUndefined)
97
- if openai_api_key and openai_api_key is not PydanticUndefined:
84
+ # Only add headers if the values are actual strings (not None or FieldUnset)
85
+ if openai_api_key and openai_api_key is not FieldUnset:
98
86
  self.headers["OpenAI-Api-Key"] = openai_api_key
99
87
 
100
- # if claude_api_key and claude_api_key is not PydanticUndefined:
101
- # self.headers["Anthropic-Api-Key"] = claude_api_key
102
-
103
- if xai_api_key and xai_api_key is not PydanticUndefined:
88
+ if xai_api_key and xai_api_key is not FieldUnset:
104
89
  self.headers["XAI-Api-Key"] = xai_api_key
105
90
 
106
- if gemini_api_key and gemini_api_key is not PydanticUndefined:
91
+ if gemini_api_key and gemini_api_key is not FieldUnset:
107
92
  self.headers["Gemini-Api-Key"] = gemini_api_key
108
93
 
109
94
  def _prepare_url(self, endpoint: str) -> str:
@@ -150,7 +135,7 @@ class Retab(BaseRetab):
150
135
  """Synchronous client for interacting with the Retab API.
151
136
 
152
137
  This client provides synchronous access to all Retab API resources including files, fine-tuning,
153
- prompt optimization, documents, models, datasets, and schemas.
138
+ prompt optimization, documents, models, processors, deployments, and schemas.
154
139
 
155
140
  Args:
156
141
  api_key (str, optional): Retab API key. If not provided, will look for RETAB_API_KEY env variable.
@@ -158,8 +143,6 @@ class Retab(BaseRetab):
158
143
  timeout (float): Request timeout in seconds. Defaults to 240.0
159
144
  max_retries (int): Maximum number of retries for failed requests. Defaults to 3
160
145
  openai_api_key (str, optional): OpenAI API key. Will look for OPENAI_API_KEY env variable if not provided
161
- claude_api_key (str, optional): Claude API key. Will look for CLAUDE_API_KEY env variable if not provided
162
- xai_api_key (str, optional): XAI API key. Will look for XAI_API_KEY env variable if not provided
163
146
  gemini_api_key (str, optional): Gemini API key. Will look for GEMINI_API_KEY env variable if not provided
164
147
 
165
148
  Attributes:
@@ -168,7 +151,8 @@ class Retab(BaseRetab):
168
151
  prompt_optimization: Access to prompt optimization operations
169
152
  documents: Access to document operations
170
153
  models: Access to model operations
171
- datasets: Access to dataset operations
154
+ processors: Access to processor operations
155
+ deployments: Access to deployment operations
172
156
  schemas: Access to schema operations
173
157
  responses: Access to responses API (OpenAI Responses API compatible interface)
174
158
  """
@@ -179,10 +163,8 @@ class Retab(BaseRetab):
179
163
  base_url: Optional[str] = None,
180
164
  timeout: float = 240.0,
181
165
  max_retries: int = 3,
182
- openai_api_key: Optional[str] = PydanticUndefined, # type: ignore[assignment]
183
- gemini_api_key: Optional[str] = PydanticUndefined, # type: ignore[assignment]
184
- # claude_api_key: Optional[str] = PydanticUndefined, # type: ignore[assignment]
185
- # xai_api_key: Optional[str] = PydanticUndefined, # type: ignore[assignment]
166
+ openai_api_key: Optional[str] = FieldUnset,
167
+ gemini_api_key: Optional[str] = FieldUnset,
186
168
  ) -> None:
187
169
  super().__init__(
188
170
  api_key=api_key,
@@ -191,20 +173,17 @@ class Retab(BaseRetab):
191
173
  max_retries=max_retries,
192
174
  openai_api_key=openai_api_key,
193
175
  gemini_api_key=gemini_api_key,
194
- # claude_api_key=claude_api_key,
195
- # xai_api_key=xai_api_key,
196
176
  )
197
177
 
198
178
  self.client = httpx.Client(timeout=self.timeout)
199
- self.evals = evals.Evals(client=self)
200
- self.evaluations = evaluations.Evaluations(client=self)
179
+ self.projects = projects.Projects(client=self)
201
180
  self.files = files.Files(client=self)
202
181
  self.fine_tuning = finetuning.FineTuning(client=self)
203
- # self.prompt_optimization = prompt_optimization.PromptOptimization(client=self)
204
182
  self.documents = documents.Documents(client=self)
205
183
  self.models = models.Models(client=self)
206
184
  self.schemas = schemas.Schemas(client=self)
207
185
  self.processors = processors.Processors(client=self)
186
+ self.deployments = deployments.Deployments(client=self)
208
187
  self.secrets = secrets.Secrets(client=self)
209
188
  self.usage = usage.Usage(client=self)
210
189
  self.consensus = consensus.Consensus(client=self)
@@ -418,7 +397,7 @@ class AsyncRetab(BaseRetab):
418
397
  """Asynchronous client for interacting with the Retab API.
419
398
 
420
399
  This client provides asynchronous access to all Retab API resources including files, fine-tuning,
421
- prompt optimization, documents, models, datasets, and schemas.
400
+ prompt optimization, documents, models, processors, deployments, and schemas.
422
401
 
423
402
  Args:
424
403
  api_key (str, optional): Retab API key. If not provided, will look for RETAB_API_KEY env variable.
@@ -436,7 +415,8 @@ class AsyncRetab(BaseRetab):
436
415
  prompt_optimization: Access to asynchronous prompt optimization operations
437
416
  documents: Access to asynchronous document operations
438
417
  models: Access to asynchronous model operations
439
- datasets: Access to asynchronous dataset operations
418
+ processors: Access to asynchronous processor operations
419
+ deployments: Access to asynchronous deployment operations
440
420
  schemas: Access to asynchronous schema operations
441
421
  responses: Access to responses API (OpenAI Responses API compatible interface)
442
422
  """
@@ -447,10 +427,8 @@ class AsyncRetab(BaseRetab):
447
427
  base_url: Optional[str] = None,
448
428
  timeout: float = 240.0,
449
429
  max_retries: int = 3,
450
- openai_api_key: Optional[str] = PydanticUndefined, # type: ignore[assignment]
451
- gemini_api_key: Optional[str] = PydanticUndefined, # type: ignore[assignment]
452
- # claude_api_key: Optional[str] = PydanticUndefined, # type: ignore[assignment]
453
- # xai_api_key: Optional[str] = PydanticUndefined, # type: ignore[assignment]
430
+ openai_api_key: Optional[str] = FieldUnset,
431
+ gemini_api_key: Optional[str] = FieldUnset,
454
432
  ) -> None:
455
433
  super().__init__(
456
434
  api_key=api_key,
@@ -459,21 +437,18 @@ class AsyncRetab(BaseRetab):
459
437
  max_retries=max_retries,
460
438
  openai_api_key=openai_api_key,
461
439
  gemini_api_key=gemini_api_key,
462
- # claude_api_key=claude_api_key,
463
- # xai_api_key=xai_api_key,
464
440
  )
465
441
 
466
442
  self.client = httpx.AsyncClient(timeout=self.timeout)
467
443
 
468
- self.evals = evals.AsyncEvals(client=self)
469
- self.evaluations = evaluations.AsyncEvaluations(client=self)
444
+ self.projects = projects.AsyncProjects(client=self)
470
445
  self.files = files.AsyncFiles(client=self)
471
446
  self.fine_tuning = finetuning.AsyncFineTuning(client=self)
472
- # self.prompt_optimization = prompt_optimization.AsyncPromptOptimization(client=self)
473
447
  self.documents = documents.AsyncDocuments(client=self)
474
448
  self.models = models.AsyncModels(client=self)
475
449
  self.schemas = schemas.AsyncSchemas(client=self)
476
450
  self.processors = processors.AsyncProcessors(client=self)
451
+ self.deployments = deployments.AsyncDeployments(client=self)
477
452
  self.secrets = secrets.AsyncSecrets(client=self)
478
453
  self.usage = usage.AsyncUsage(client=self)
479
454
  self.consensus = consensus.AsyncConsensus(client=self)
@@ -0,0 +1,180 @@
1
+ import collections.abc
2
+ import json
3
+ import os
4
+ import types
5
+ import typing
6
+ import enum
7
+ import sys
8
+ import inspect
9
+ from datetime import datetime, date
10
+ from typing import Any, Type, get_args, get_origin, Union, Literal, is_typeddict
11
+ from typing_extensions import is_typeddict as is_typeddict_ext
12
+ import typing_extensions
13
+ from pydantic_core import PydanticUndefined
14
+ from pydantic import BaseModel, EmailStr
15
+ import PIL.Image
16
+
17
+ to_compile: list[tuple[str, Type, bool]] = []
18
+
19
+ def is_base_model(field_type: Type) -> bool:
20
+ return getattr(field_type, "__name__", None) in ["BaseModel", "GenericModel", "ConfigDict", "Generic"]
21
+
22
+ def type_to_zod(field_type: Any, put_names: bool = True, ts: bool = False) -> str:
23
+ origin = get_origin(field_type) or field_type
24
+ optional = False
25
+
26
+ def make_union(args):
27
+ return args[0] if len(args) <= 1 else "z.union([" + ", ".join(args) + "])"
28
+
29
+ def make_ts_union(args):
30
+ return args[0] if len(args) <= 1 else " | ".join(args)
31
+
32
+ if isinstance(field_type, typing.ForwardRef):
33
+ return type_to_zod(typing._eval_type(field_type, globals(), locals(), []), ts=ts)
34
+ elif origin is typing.Annotated or origin is typing.Required or origin is typing_extensions.Required:
35
+ return type_to_zod(get_args(field_type)[0], put_names, ts=ts)
36
+ if origin is Union or origin is types.UnionType:
37
+ args = [x for x in get_args(field_type)]
38
+ if types.NoneType in args:
39
+ args.remove(types.NoneType)
40
+ optional = True
41
+ typename = make_union([type_to_zod(x) for x in args])
42
+ ts_typename = make_ts_union([type_to_zod(x, ts=True) for x in args])
43
+ elif issubclass(origin, BaseModel) or is_typeddict(origin) or is_typeddict_ext(origin):
44
+ if put_names:
45
+ typename = "Z" + origin.__name__
46
+ ts_typename = origin.__name__
47
+ to_compile.append((origin.__name__, field_type, True))
48
+ else:
49
+ excluded_fields = set()
50
+ typename = "z.object({\n"
51
+ ts_typename = "{\n"
52
+ props = [(n, f.annotation, f.default) for n, f in origin.model_fields.items()] if issubclass(origin, BaseModel) else \
53
+ [(n, f, PydanticUndefined) for n, f in origin.__annotations__.items()]
54
+
55
+ for field_name, field, default in props:
56
+ if field_name in excluded_fields:
57
+ continue
58
+ ts_compiled = type_to_zod(field, ts=True)
59
+ default_str = ""
60
+ if default is not PydanticUndefined and default is not None:
61
+ if isinstance(default, BaseModel):
62
+ default_str = f".default({json.dumps(default.model_dump(mode="json", exclude_unset=True))})"
63
+ else:
64
+ default_str = f".default({json.dumps(default)})"
65
+ typename += f" {field_name}: {type_to_zod(field)}{default_str},\n"
66
+ ts_typename += f" {field_name}{"?" if ts_compiled.endswith(" | undefined") or default is not PydanticUndefined else ""}: {ts_compiled},\n"
67
+ typename += "})"
68
+ ts_typename += "}"
69
+
70
+ based = origin.__bases__
71
+ for i in range(0, len(based)):
72
+ if is_base_model(based[i]) or based[i] is dict:
73
+ break
74
+ if issubclass(based[i], BaseModel):
75
+ excluded_fields.update(based[i].model_fields.keys())
76
+ typename += ".merge(Z" + based[i].__name__ + ".schema)"
77
+ ts_typename += " & " + based[i].__name__
78
+ elif origin is list or origin is typing.List or origin is collections.abc.Sequence or origin is collections.abc.Iterable:
79
+ typename = "z.array(" + type_to_zod(get_args(field_type)[0]) + ")"
80
+ ts_typename = "Array<" + type_to_zod(get_args(field_type)[0], ts=True) + ">"
81
+ elif origin is tuple:
82
+ args = get_args(field_type)
83
+ typename = "z.tuple([" + ", ".join([type_to_zod(x) for x in args]) + "])"
84
+ ts_typename = "[" + ", ".join([type_to_zod(x, ts=True) for x in args]) + "]"
85
+ elif origin is dict:
86
+ if len(get_args(field_type)) == 2:
87
+ typename = "z.record(" + type_to_zod(get_args(field_type)[0]) + ", " + type_to_zod(get_args(field_type)[1]) + ")"
88
+ ts_typename = "{[key: " + type_to_zod(get_args(field_type)[0], ts=True) + "]: " + type_to_zod(get_args(field_type)[1], ts=True) + "}"
89
+ else:
90
+ typename = "z.record(z.any())"
91
+ ts_typename = "{[key: string]: any}"
92
+ elif origin is Literal:
93
+ typename = make_union(["z.literal(" + json.dumps(x) + ")" for x in get_args(field_type)])
94
+ ts_typename = make_ts_union([json.dumps(x) for x in get_args(field_type)])
95
+ elif isinstance(field_type, typing.TypeVar):
96
+ typename = "z.any()"
97
+ ts_typename = "any"
98
+ elif isinstance(field_type, type) and issubclass(field_type, enum.Enum):
99
+ typename = "z.any()"
100
+ ts_typename = "any"
101
+ elif field_type is str or field_type is date or field_type is datetime:
102
+ typename = "z.string()"
103
+ ts_typename = "string"
104
+ elif field_type is int or field_type is float:
105
+ typename = "z.number()"
106
+ ts_typename = "number"
107
+ elif field_type is bool:
108
+ typename = "z.boolean()"
109
+ ts_typename = "boolean"
110
+ elif field_type is typing.Any:
111
+ typename = "z.any()"
112
+ ts_typename = "any"
113
+ elif field_type is bytes or field_type is PIL.Image.Image or field_type is typing.BinaryIO or origin is typing.IO or origin is typing_extensions.IO:
114
+ typename = "z.instanceof(Uint8Array)"
115
+ ts_typename = "Uint8Array"
116
+ elif field_type is EmailStr:
117
+ typename = "z.string().email()"
118
+ ts_typename = "string"
119
+ elif field_type is os.PathLike:
120
+ typename = "z.string()"
121
+ ts_typename = "string"
122
+ elif field_type is object:
123
+ typename = "z.object({}).passthrough()"
124
+ ts_typename = "object"
125
+ else:
126
+ raise ValueError(f"Unsupported type: {field_type} ({origin})")
127
+ if ts:
128
+ return ts_typename if not optional else ts_typename + " | null | undefined"
129
+ else:
130
+ return typename if not optional else typename + ".nullable().optional()"
131
+
132
+
133
+ # SET of names of python builtin types starting with a capital
134
+ builtin_types = {
135
+ "Any",
136
+ "BaseModel",
137
+ "NoneType",
138
+ "Literal",
139
+ "Union",
140
+ "List",
141
+ "Sequence",
142
+ "ConfigDict",
143
+ "Optional",
144
+ }
145
+
146
+ if __name__ == "__main__":
147
+ modules = []
148
+ for root, dirs, files in os.walk("retab/types"):
149
+ for module in files:
150
+ if module[-3:] != '.py':
151
+ continue
152
+ full_name = os.path.join(root, module[:-3]).replace(os.path.sep, '.')
153
+ __import__(full_name, locals(), globals())
154
+ modules.append(full_name)
155
+
156
+
157
+ for module_name in modules:
158
+ for name, obj in inspect.getmembers(sys.modules[module_name]):
159
+ if name[0] != name[0].lower() and name not in builtin_types:
160
+ to_compile.append((name, obj, False))
161
+
162
+ print("import * as z from 'zod';\n")
163
+
164
+ defined = {}
165
+ while len(to_compile) > 0:
166
+ name, model, necessary = to_compile.pop(0)
167
+ if name in defined: continue
168
+ defined[name] = True
169
+ try:
170
+ compiled = type_to_zod(model, False)
171
+ compiled_ts = type_to_zod(model, False, ts=True)
172
+ except Exception as e:
173
+ if not necessary:
174
+ print(f"Skipping {name} {model} due to error: {e}", file=sys.stderr)
175
+ continue
176
+ print(f"Error compiling {name} {model}", file=sys.stderr)
177
+ raise e
178
+ print("export const Z" + name + " = z.lazy(() => " + compiled + ");")
179
+ print("export type " + name + " = z.infer<typeof Z" + name + ">;\n")
180
+
@@ -21,7 +21,7 @@ class BaseConsensusMixin:
21
21
  mode=mode,
22
22
  )
23
23
 
24
- return PreparedRequest(method="POST", url="/v1/consensus/reconcile", data=request.model_dump(), idempotency_key=idempotency_key)
24
+ return PreparedRequest(method="POST", url="/v1/consensus/reconcile", data=request.model_dump(mode="json", exclude_unset=True), idempotency_key=idempotency_key)
25
25
 
26
26
 
27
27
  class Consensus(SyncAPIResource, BaseConsensusMixin):
@@ -55,7 +55,7 @@ class BaseResponsesMixin:
55
55
  instructions=instructions,
56
56
  )
57
57
 
58
- return PreparedRequest(method="POST", url="/v1/responses", data=request.model_dump(), idempotency_key=idempotency_key)
58
+ return PreparedRequest(method="POST", url="/v1/responses", data=request.model_dump(mode="json", exclude_unset=True), idempotency_key=idempotency_key)
59
59
 
60
60
  def prepare_parse(
61
61
  self,
@@ -0,0 +1,3 @@
1
+ from .client import AsyncDeployments, Deployments
2
+
3
+ __all__ = ["Deployments", "AsyncDeployments"]
@@ -0,0 +1,148 @@
1
+ import base64
2
+ from io import IOBase
3
+ from pathlib import Path
4
+ from typing import Any, List
5
+
6
+ import PIL.Image
7
+ from pydantic import HttpUrl
8
+ from ..._resource import AsyncAPIResource, SyncAPIResource
9
+ from ...utils.mime import MIMEData, prepare_mime_document
10
+ from ...types.documents.extractions import RetabParsedChatCompletion
11
+ from ...types.standards import PreparedRequest
12
+
13
+
14
+ class DeploymentsMixin:
15
+ def prepare_submit(
16
+ self,
17
+ project_id: str,
18
+ iteration_id: str,
19
+ document: Path | str | bytes | IOBase | MIMEData | PIL.Image.Image | HttpUrl | None = None,
20
+ documents: list[Path | str | bytes | IOBase | MIMEData | PIL.Image.Image | HttpUrl] | None = None,
21
+ temperature: float | None = None,
22
+ seed: int | None = None,
23
+ store: bool = True,
24
+ ) -> PreparedRequest:
25
+ """Prepare a request to submit documents to a processor.
26
+
27
+ Args:
28
+ project_id: ID of the project
29
+ iteration_id: ID of the iteration
30
+ document: Single document to process (mutually exclusive with documents)
31
+ documents: List of documents to process (mutually exclusive with document)
32
+ temperature: Optional temperature override
33
+ seed: Optional seed for reproducibility
34
+ store: Whether to store the results
35
+
36
+ Returns:
37
+ PreparedRequest: The prepared request
38
+ """
39
+ # Validate that either document or documents is provided, but not both
40
+ if not document and not documents:
41
+ raise ValueError("Either 'document' or 'documents' must be provided")
42
+
43
+ if document and documents:
44
+ raise ValueError("Provide either 'document' (single) or 'documents' (multiple), not both")
45
+
46
+ # Prepare form data parameters
47
+ form_data = {
48
+ "temperature": temperature,
49
+ "seed": seed,
50
+ "store": store,
51
+ }
52
+ # Remove None values
53
+ form_data = {k: v for k, v in form_data.items() if v is not None}
54
+
55
+ # Prepare files for upload
56
+ files = {}
57
+ if document:
58
+ # Convert document to MIMEData if needed
59
+ mime_document = prepare_mime_document(document)
60
+ # Single document upload
61
+ files["document"] = (mime_document.filename, base64.b64decode(mime_document.content), mime_document.mime_type)
62
+ elif documents:
63
+ # Multiple documents upload - httpx supports multiple files with same field name using a list
64
+ files_list = []
65
+ for doc in documents:
66
+ # Convert each document to MIMEData if needed
67
+ mime_doc = prepare_mime_document(doc)
68
+ files_list.append(
69
+ (
70
+ "documents", # field name
71
+ (mime_doc.filename, base64.b64decode(mime_doc.content), mime_doc.mime_type),
72
+ )
73
+ )
74
+ files = files_list
75
+
76
+ url = f"/v1/deployments/{project_id}/{iteration_id}/submit"
77
+
78
+ return PreparedRequest(method="POST", url=url, form_data=form_data, files=files)
79
+
80
+
81
+ class Deployments(SyncAPIResource, DeploymentsMixin):
82
+ """Deployments API wrapper for managing deployment configurations"""
83
+
84
+ def __init__(self, client: Any) -> None:
85
+ super().__init__(client=client)
86
+
87
+ def submit(
88
+ self,
89
+ project_id: str,
90
+ iteration_id: str,
91
+ document: Path | str | bytes | IOBase | MIMEData | PIL.Image.Image | HttpUrl | None = None,
92
+ documents: List[Path | str | bytes | IOBase | MIMEData | PIL.Image.Image | HttpUrl] | None = None,
93
+ temperature: float | None = None,
94
+ seed: int | None = None,
95
+ store: bool = True,
96
+ ) -> RetabParsedChatCompletion:
97
+ """Submit documents to a deployment for processing.
98
+
99
+ Args:
100
+ project_id: ID of the project
101
+ iteration_id: ID of the iteration
102
+ document: Single document to process (mutually exclusive with documents)
103
+ documents: List of documents to process (mutually exclusive with document)
104
+ temperature: Optional temperature override
105
+ seed: Optional seed for reproducibility
106
+ store: Whether to store the results
107
+
108
+ Returns:
109
+ RetabParsedChatCompletion: The processing result
110
+ """
111
+ request = self.prepare_submit(project_id=project_id, iteration_id=iteration_id, document=document, documents=documents, temperature=temperature, seed=seed, store=store)
112
+ response = self._client._prepared_request(request)
113
+ return RetabParsedChatCompletion.model_validate(response)
114
+
115
+
116
+ class AsyncDeployments(AsyncAPIResource, DeploymentsMixin):
117
+ """Async Deployments API wrapper for managing deployment configurations"""
118
+
119
+ def __init__(self, client: Any) -> None:
120
+ super().__init__(client=client)
121
+
122
+ async def submit(
123
+ self,
124
+ project_id: str,
125
+ iteration_id: str,
126
+ document: Path | str | bytes | IOBase | MIMEData | PIL.Image.Image | HttpUrl | None = None,
127
+ documents: List[Path | str | bytes | IOBase | MIMEData | PIL.Image.Image | HttpUrl] | None = None,
128
+ temperature: float | None = None,
129
+ seed: int | None = None,
130
+ store: bool = True,
131
+ ) -> RetabParsedChatCompletion:
132
+ """Submit documents to a deployment for processing.
133
+
134
+ Args:
135
+ project_id: ID of the project
136
+ iteration_id: ID of the iteration
137
+ document: Single document to process (mutually exclusive with documents)
138
+ documents: List of documents to process (mutually exclusive with document)
139
+ temperature: Optional temperature override
140
+ seed: Optional seed for reproducibility
141
+ store: Whether to store the results
142
+
143
+ Returns:
144
+ RetabParsedChatCompletion: The processing result
145
+ """
146
+ request = self.prepare_submit(project_id=project_id, iteration_id=iteration_id, document=document, documents=documents, temperature=temperature, seed=seed, store=store)
147
+ response = await self._client._prepared_request(request)
148
+ return RetabParsedChatCompletion.model_validate(response)