retab 0.0.66__py3-none-any.whl → 0.0.68__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
retab/__init__.py CHANGED
@@ -1,4 +1,4 @@
1
- from .client import AsyncRetab, Retab
1
+ from .client import AsyncRetab, Retab, SignatureVerificationError
2
2
  from . import utils
3
3
  from . import types
4
- __all__ = ["Retab", "AsyncRetab", "utils", "types"]
4
+ __all__ = ["Retab", "AsyncRetab", "SignatureVerificationError", "utils", "types"]
retab/client.py CHANGED
@@ -1,3 +1,5 @@
1
+ import hashlib
2
+ import hmac
1
3
  import json
2
4
  import os
3
5
  from types import TracebackType
@@ -12,6 +14,11 @@ from .resources import documents, models, schemas, projects
12
14
  from .types.standards import PreparedRequest, FieldUnset
13
15
 
14
16
 
17
+ class SignatureVerificationError(Exception):
18
+ """Raised when webhook signature verification fails."""
19
+ pass
20
+
21
+
15
22
  class MaxRetriesExceeded(Exception):
16
23
  pass
17
24
 
@@ -34,7 +41,7 @@ class BaseRetab:
34
41
  Args:
35
42
  api_key (str, optional): Retab API key. If not provided, will look for RETAB_API_KEY env variable.
36
43
  base_url (str, optional): Base URL for API requests. Defaults to https://api.retab.com
37
- timeout (float): Request timeout in seconds. Defaults to 240.0
44
+ timeout (float): Request timeout in seconds. Defaults to 1800.0 (30 minutes)
38
45
  max_retries (int): Maximum number of retries for failed requests. Defaults to 3
39
46
  openai_api_key (str, optional): OpenAI API key. Will look for OPENAI_API_KEY env variable if not provided
40
47
 
@@ -46,7 +53,7 @@ class BaseRetab:
46
53
  self,
47
54
  api_key: Optional[str] = None,
48
55
  base_url: Optional[str] = None,
49
- timeout: float = 800.0,
56
+ timeout: float = 1800.0,
50
57
  max_retries: int = 3,
51
58
  openai_api_key: Optional[str] = FieldUnset,
52
59
  gemini_api_key: Optional[str] = FieldUnset,
@@ -140,7 +147,7 @@ class Retab(BaseRetab):
140
147
  Args:
141
148
  api_key (str, optional): Retab API key. If not provided, will look for RETAB_API_KEY env variable.
142
149
  base_url (str, optional): Base URL for API requests. Defaults to https://api.retab.com
143
- timeout (float): Request timeout in seconds. Defaults to 240.0
150
+ timeout (float): Request timeout in seconds. Defaults to 1800.0 (30 minutes)
144
151
  max_retries (int): Maximum number of retries for failed requests. Defaults to 3
145
152
  openai_api_key (str, optional): OpenAI API key. Will look for OPENAI_API_KEY env variable if not provided
146
153
  gemini_api_key (str, optional): Gemini API key. Will look for GEMINI_API_KEY env variable if not provided
@@ -161,7 +168,7 @@ class Retab(BaseRetab):
161
168
  self,
162
169
  api_key: Optional[str] = None,
163
170
  base_url: Optional[str] = None,
164
- timeout: float = 240.0,
171
+ timeout: float = 1800.0,
165
172
  max_retries: int = 3,
166
173
  openai_api_key: Optional[str] = FieldUnset,
167
174
  gemini_api_key: Optional[str] = FieldUnset,
@@ -385,6 +392,44 @@ class Retab(BaseRetab):
385
392
  """
386
393
  self.close()
387
394
 
395
+ @staticmethod
396
+ def verify_event(event_body: bytes, event_signature: str, secret: str) -> Any:
397
+ """Verify the signature of a webhook event.
398
+
399
+ Args:
400
+ event_body: The raw request body as bytes
401
+ event_signature: The signature from the request header (x-retab-signature)
402
+ secret: The webhook secret key used for signing
403
+
404
+ Returns:
405
+ Any: The parsed event payload (JSON)
406
+
407
+ Raises:
408
+ SignatureVerificationError: If the signature verification fails
409
+
410
+ Example:
411
+ ```python
412
+ from retab import Retab
413
+
414
+ # In your webhook handler
415
+ secret = "your_webhook_secret"
416
+ body = request.body # Raw bytes
417
+ signature = request.headers.get("x-retab-signature")
418
+
419
+ try:
420
+ event = Retab.verify_event(body, signature, secret)
421
+ print(f"Verified event: {event}")
422
+ except SignatureVerificationError:
423
+ print("Invalid signature!")
424
+ ```
425
+ """
426
+ expected_signature = hmac.new(secret.encode(), event_body, hashlib.sha256).hexdigest()
427
+
428
+ if not hmac.compare_digest(event_signature, expected_signature):
429
+ raise SignatureVerificationError("Invalid signature")
430
+
431
+ return json.loads(event_body.decode("utf-8"))
432
+
388
433
 
389
434
  class AsyncRetab(BaseRetab):
390
435
  """Asynchronous client for interacting with the Retab API.
@@ -395,7 +440,7 @@ class AsyncRetab(BaseRetab):
395
440
  Args:
396
441
  api_key (str, optional): Retab API key. If not provided, will look for RETAB_API_KEY env variable.
397
442
  base_url (str, optional): Base URL for API requests. Defaults to https://api.retab.com
398
- timeout (float): Request timeout in seconds. Defaults to 240.0
443
+ timeout (float): Request timeout in seconds. Defaults to 1800.0 (30 minutes)
399
444
  max_retries (int): Maximum number of retries for failed requests. Defaults to 3
400
445
  openai_api_key (str, optional): OpenAI API key. Will look for OPENAI_API_KEY env variable if not provided
401
446
  claude_api_key (str, optional): Claude API key. Will look for CLAUDE_API_KEY env variable if not provided
@@ -418,7 +463,7 @@ class AsyncRetab(BaseRetab):
418
463
  self,
419
464
  api_key: Optional[str] = None,
420
465
  base_url: Optional[str] = None,
421
- timeout: float = 240.0,
466
+ timeout: float = 1800.0,
422
467
  max_retries: int = 3,
423
468
  openai_api_key: Optional[str] = FieldUnset,
424
469
  gemini_api_key: Optional[str] = FieldUnset,
@@ -661,3 +706,41 @@ class AsyncRetab(BaseRetab):
661
706
  traceback: The traceback of the exception that was raised, if any
662
707
  """
663
708
  await self.close()
709
+ @staticmethod
710
+ def verify_event(event_body: bytes, event_signature: str, secret: str) -> Any:
711
+ """Verify the signature of a webhook event.
712
+
713
+ Args:
714
+ event_body: The raw request body as bytes
715
+ event_signature: The signature from the request header (x-retab-signature)
716
+ secret: The webhook secret key used for signing
717
+
718
+ Returns:
719
+ Any: The parsed event payload (JSON)
720
+
721
+ Raises:
722
+ SignatureVerificationError: If the signature verification fails
723
+
724
+ Example:
725
+ ```python
726
+ from retab import AsyncRetab
727
+
728
+ # In your async webhook handler
729
+ secret = "your_webhook_secret"
730
+ body = await request.body() # Raw bytes
731
+ signature = request.headers.get("x-retab-signature")
732
+
733
+ try:
734
+ event = AsyncRetab.verify_event(body, signature, secret)
735
+ print(f"Verified event: {event}")
736
+ except SignatureVerificationError:
737
+ print("Invalid signature!")
738
+ ```
739
+ """
740
+ expected_signature = hmac.new(secret.encode(), event_body, hashlib.sha256).hexdigest()
741
+
742
+ if not hmac.compare_digest(event_signature, expected_signature):
743
+ raise SignatureVerificationError("Invalid signature")
744
+
745
+ return json.loads(event_body.decode("utf-8"))
746
+
retab/generate_types.py CHANGED
@@ -59,7 +59,7 @@ def type_to_zod(field_type: Any, put_names: bool = True, ts: bool = False) -> st
59
59
  optional = True
60
60
  typename = make_union([type_to_zod(x) for x in args])
61
61
  ts_typename = make_ts_union([type_to_zod(x, ts=True) for x in args])
62
- elif issubclass(origin, BaseModel) or is_typeddict(origin) or is_typeddict_ext(origin):
62
+ elif isinstance(origin, type) and (issubclass(origin, BaseModel) or is_typeddict(origin) or is_typeddict_ext(origin)):
63
63
  if put_names:
64
64
  name = get_class_name(origin)
65
65
  typename = "Z" + name
@@ -77,7 +77,7 @@ def type_to_zod(field_type: Any, put_names: bool = True, ts: bool = False) -> st
77
77
 
78
78
  typename += "z.object({\n"
79
79
  ts_typename += "{\n"
80
- props = [(n, f.annotation, f.default) for n, f in origin.model_fields.items() if not f.exclude] if issubclass(origin, BaseModel) else \
80
+ props = [(n, f.annotation, f.default) for n, f in origin.model_fields.items() if not f.exclude] if isinstance(origin, type) and issubclass(origin, BaseModel) else \
81
81
  [(n, f, PydanticUndefined) for n, f in origin.__annotations__.items()]
82
82
 
83
83
  for field_name, field, default in props:
@@ -8,11 +8,8 @@ from pydantic import HttpUrl
8
8
  from ..._resource import AsyncAPIResource, SyncAPIResource
9
9
  from ...utils.mime import MIMEData, prepare_mime_document
10
10
  from ...types.documents.extract import RetabParsedChatCompletion
11
- from ...types.projects import Project, PatchProjectRequest, BaseProject
11
+ from ...types.projects import Project, PatchProjectRequest, CreateProjectRequest
12
12
  from ...types.standards import PreparedRequest, DeleteResponse, FieldUnset
13
- from .documents import Documents, AsyncDocuments
14
- from .iterations import Iterations, AsyncIterations
15
-
16
13
 
17
14
  class ProjectsMixin:
18
15
  def prepare_create(
@@ -29,7 +26,7 @@ class ProjectsMixin:
29
26
  if extra_body:
30
27
  eval_dict.update(extra_body)
31
28
 
32
- eval_data = BaseProject(**eval_dict)
29
+ eval_data = CreateProjectRequest(**eval_dict)
33
30
  return PreparedRequest(method="POST", url="/v1/projects", data=eval_data.model_dump(exclude_unset=True, mode="json"))
34
31
 
35
32
  def prepare_get(self, project_id: str) -> PreparedRequest:
@@ -76,6 +73,10 @@ class ProjectsMixin:
76
73
  def prepare_delete(self, id: str) -> PreparedRequest:
77
74
  return PreparedRequest(method="DELETE", url=f"/v1/projects/{id}")
78
75
 
76
+ def prepare_publish(self, project_id: str, **extra_body: Any) -> PreparedRequest:
77
+ data = extra_body or None
78
+ return PreparedRequest(method="POST", url=f"/v1/projects/{project_id}/publish", data=data)
79
+
79
80
  def prepare_extract(
80
81
  self,
81
82
  project_id: str,
@@ -160,8 +161,6 @@ class Projects(SyncAPIResource, ProjectsMixin):
160
161
 
161
162
  def __init__(self, *args, **kwargs):
162
163
  super().__init__(*args, **kwargs)
163
- self.documents = Documents(self._client)
164
- self.iterations = Iterations(self._client)
165
164
 
166
165
  def create(
167
166
  self,
@@ -174,9 +173,7 @@ class Projects(SyncAPIResource, ProjectsMixin):
174
173
 
175
174
  Args:
176
175
  name: The name of the project
177
- json_schema: The JSON schema for the project
178
- documents: The documents to associate with the project
179
-
176
+ json_schema: The json schema of the project
180
177
  Returns:
181
178
  Project: The created project
182
179
  Raises:
@@ -202,37 +199,6 @@ class Projects(SyncAPIResource, ProjectsMixin):
202
199
  response = self._client._prepared_request(request)
203
200
  return Project(**response)
204
201
 
205
- def update(
206
- self,
207
- project_id: str,
208
- name: str = FieldUnset,
209
- json_schema: dict[str, Any] = FieldUnset,
210
- **extra_body: Any,
211
- ) -> Project:
212
- """
213
- Update an project with partial updates.
214
-
215
- Args:
216
- project_id: The ID of the project to update
217
- name: Optional new name for the project
218
- json_schema: Optional new JSON schema
219
- documents: Optional list of documents to update
220
- iterations: Optional list of iterations to update
221
-
222
- Returns:
223
- Project: The updated project
224
- Raises:
225
- HTTPException if the request fails
226
- """
227
- request = self.prepare_update(
228
- project_id=project_id,
229
- name=name,
230
- json_schema=json_schema,
231
- **extra_body,
232
- )
233
- response = self._client._prepared_request(request)
234
- return Project(**response)
235
-
236
202
  def list(self, **extra_params: Any) -> List[Project]:
237
203
  """
238
204
  List projects for a project.
@@ -262,6 +228,12 @@ class Projects(SyncAPIResource, ProjectsMixin):
262
228
  request = self.prepare_delete(project_id)
263
229
  return self._client._prepared_request(request)
264
230
 
231
+ def publish(self, project_id: str, **extra_body: Any) -> Project:
232
+ """Publish a project's draft configuration."""
233
+ request = self.prepare_publish(project_id, **extra_body)
234
+ response = self._client._prepared_request(request)
235
+ return Project(**response)
236
+
265
237
  def extract(
266
238
  self,
267
239
  project_id: str,
@@ -314,23 +286,20 @@ class AsyncProjects(AsyncAPIResource, ProjectsMixin):
314
286
 
315
287
  def __init__(self, *args, **kwargs):
316
288
  super().__init__(*args, **kwargs)
317
- self.documents = AsyncDocuments(self._client)
318
- self.iterations = AsyncIterations(self._client)
319
289
 
320
- async def create(self, name: str, json_schema: Dict[str, Any]) -> Project:
290
+ async def create(self, name: str, json_schema: dict[str, Any], **extra_body: Any) -> Project:
321
291
  """
322
292
  Create a new project.
323
293
 
324
294
  Args:
325
295
  name: The name of the project
326
- json_schema: The JSON schema for the project
327
-
296
+ json_schema: The json schema of the project
328
297
  Returns:
329
298
  Project: The created project
330
299
  Raises:
331
300
  HTTPException if the request fails
332
301
  """
333
- request = self.prepare_create(name, json_schema)
302
+ request = self.prepare_create(name, json_schema, **extra_body)
334
303
  response = await self._client._prepared_request(request)
335
304
  return Project(**response)
336
305
 
@@ -350,36 +319,7 @@ class AsyncProjects(AsyncAPIResource, ProjectsMixin):
350
319
  response = await self._client._prepared_request(request)
351
320
  return Project(**response)
352
321
 
353
- async def update(
354
- self,
355
- project_id: str,
356
- name: str = FieldUnset,
357
- json_schema: dict[str, Any] = FieldUnset,
358
- ) -> Project:
359
- """
360
- Update an project with partial updates.
361
-
362
- Args:
363
- id: The ID of the project to update
364
- name: Optional new name for the project
365
- json_schema: Optional new JSON schema
366
- documents: Optional list of documents to update
367
- iterations: Optional list of iterations to update
368
-
369
- Returns:
370
- Project: The updated project
371
- Raises:
372
- HTTPException if the request fails
373
- """
374
- request = self.prepare_update(
375
- project_id=project_id,
376
- name=name,
377
- json_schema=json_schema,
378
- )
379
- response = await self._client._prepared_request(request)
380
- return Project(**response)
381
-
382
- async def list(self) -> List[Project]:
322
+ async def list(self, **extra_params: Any) -> List[Project]:
383
323
  """
384
324
  List projects for a project.
385
325
 
@@ -388,7 +328,7 @@ class AsyncProjects(AsyncAPIResource, ProjectsMixin):
388
328
  Raises:
389
329
  HTTPException if the request fails
390
330
  """
391
- request = self.prepare_list()
331
+ request = self.prepare_list(**extra_params)
392
332
  response = await self._client._prepared_request(request)
393
333
  return [Project(**item) for item in response.get("data", [])]
394
334
 
@@ -407,6 +347,12 @@ class AsyncProjects(AsyncAPIResource, ProjectsMixin):
407
347
  request = self.prepare_delete(project_id)
408
348
  return await self._client._prepared_request(request)
409
349
 
350
+ async def publish(self, project_id: str, **extra_body: Any) -> Project:
351
+ """Publish a project's draft configuration."""
352
+ request = self.prepare_publish(project_id, **extra_body)
353
+ response = await self._client._prepared_request(request)
354
+ return Project(**response)
355
+
410
356
  async def extract(
411
357
  self,
412
358
  project_id: str,
@@ -419,6 +365,7 @@ class AsyncProjects(AsyncAPIResource, ProjectsMixin):
419
365
  n_consensus: int | None = None,
420
366
  seed: int | None = None,
421
367
  store: bool = True,
368
+ **extra_form: Any,
422
369
  ) -> RetabParsedChatCompletion:
423
370
  """Extract documents from a project.
424
371
 
retab/types/mime.py CHANGED
@@ -8,6 +8,8 @@ from typing import Any, Optional, Self, Sequence
8
8
  from pydantic import BaseModel, Field, field_validator
9
9
  from ..utils.hashing import generate_blake2b_hash_from_base64
10
10
 
11
+ import io
12
+
11
13
  # Add webp and heic to the list of supported mime types
12
14
  mimetypes.add_type("image/webp", ".webp")
13
15
  mimetypes.add_type("image/heic", ".heic")
@@ -85,8 +87,17 @@ class OCR(BaseModel):
85
87
 
86
88
 
87
89
  class MIMEData(BaseModel):
88
- filename: str = Field(description="The filename of the file", examples=["file.pdf", "image.png", "data.txt"])
89
- url: str = Field(description="The URL of the file in base64 format", examples=["..."])
90
+ filename: str = Field(
91
+ description="The filename of the file",
92
+ examples=["file.pdf", "image.png", "data.txt"]
93
+ )
94
+ url: str = Field(
95
+ description="The URL of the file in base64 format",
96
+ examples=["..."]
97
+ )
98
+
99
+ # Internal resource
100
+ _buffer: Optional[io.BytesIO] = None
90
101
 
91
102
  @property
92
103
  def id(self) -> str:
@@ -99,18 +110,14 @@ class MIMEData(BaseModel):
99
110
  @property
100
111
  def content(self) -> str:
101
112
  if self.url.startswith("data:"):
102
- # Extract base64 content from data URL
103
- base64_content = self.url.split(",")[1]
104
- return base64_content
105
- else:
106
- raise ValueError("Content is not available for this file")
113
+ return self.url.split(",")[1]
114
+ raise ValueError("Content is not available for this file")
107
115
 
108
116
  @property
109
117
  def mime_type(self) -> str:
110
118
  if self.url.startswith("data:"):
111
119
  return self.url.split(";")[0].split(":")[1]
112
- else:
113
- return mimetypes.guess_type(self.filename)[0] or "application/octet-stream"
120
+ return mimetypes.guess_type(self.filename)[0] or "application/octet-stream"
114
121
 
115
122
  @property
116
123
  def unique_filename(self) -> str:
@@ -118,22 +125,57 @@ class MIMEData(BaseModel):
118
125
 
119
126
  @property
120
127
  def size(self) -> int:
121
- # size in bytes
122
128
  return len(base64.b64decode(self.content))
123
129
 
130
+ # def to_bytesio(self) -> io.BytesIO:
131
+ # """Decode base64 and return a BytesIO (without leaking references)."""
132
+ # buf = io.BytesIO(base64.b64decode(self.content))
133
+ # buf.seek(0)
134
+ # return buf
135
+
136
+ # # -------- Context manager interface --------
137
+
138
+ # def __enter__(self) -> io.BytesIO:
139
+ # """Opens the internal buffer so you can use it like a file."""
140
+ # if self._buffer is None:
141
+ # self._buffer = self.to_bytesio()
142
+ # return self._buffer
143
+
144
+ # def __exit__(self, exc_type, exc_val, exc_tb):
145
+ # """Close and cleanup the buffer."""
146
+ # if self._buffer is not None:
147
+ # self._buffer.close()
148
+ # self._buffer = None
149
+
150
+ # # -------- Optional convenience methods --------
151
+
152
+ # def open(self) -> io.BytesIO:
153
+ # """Manual open without `with`."""
154
+ # return self.__enter__()
155
+
156
+ # def close(self):
157
+ # """Manual close."""
158
+ # self.__exit__(None, None, None)
159
+
124
160
  def __str__(self) -> str:
125
161
  truncated_url = self.url[:50] + "..." if len(self.url) > 50 else self.url
126
- # truncated_content = self.content[:50] + '...' if len(self.content) > 50 else self.content
127
- return f"MIMEData(filename='{self.filename}', url='{truncated_url}', mime_type='{self.mime_type}', size='{self.size}', extension='{self.extension}')"
162
+ return (
163
+ f"MIMEData(filename='{self.filename}', "
164
+ f"url='{truncated_url}', "
165
+ f"mime_type='{self.mime_type}', "
166
+ f"size='{self.size}', "
167
+ f"extension='{self.extension}')"
168
+ )
128
169
 
129
170
  def __repr__(self) -> str:
130
171
  return self.__str__()
131
172
 
132
173
 
174
+
133
175
  class BaseMIMEData(MIMEData):
134
176
  @classmethod
135
177
  def model_validate(
136
- cls, obj: Any, *, strict: bool | None = None, from_attributes: bool | None = None, context: Any | None = None, by_alias: bool | None = None, by_name: bool | None = None
178
+ cls, obj: Any, *, strict: bool | None = None, extra: Any | None = None, from_attributes: bool | None = None, context: Any | None = None, by_alias: bool | None = None, by_name: bool | None = None
137
179
  ) -> Self:
138
180
  if isinstance(obj, MIMEData):
139
181
  # Convert MIMEData instance to dict
@@ -153,7 +195,7 @@ class BaseMIMEData(MIMEData):
153
195
  else:
154
196
  # If there's no comma (unexpected format), truncate to 996 chars (multiple of 4)
155
197
  obj["url"] = obj["url"][:996]
156
- return super().model_validate(obj, strict=strict, from_attributes=from_attributes, context=context, by_alias=by_alias, by_name=by_name)
198
+ return super().model_validate(obj, strict=strict, extra=extra, from_attributes=from_attributes, context=context, by_alias=by_alias, by_name=by_name)
157
199
 
158
200
  @property
159
201
  def id(self) -> str:
@@ -1,33 +1,8 @@
1
- from .model import Project, BaseProject, CreateProjectRequest, PatchProjectRequest
2
- from .documents import AnnotatedDocument, DocumentItem, ProjectDocument, CreateProjectDocumentRequest, PatchProjectDocumentRequest
3
- from .iterations import (
4
- BaseIteration,
5
- Iteration,
6
- CreateIterationRequest,
7
- PatchIterationRequest,
8
- ProcessIterationRequest,
9
- DocumentStatus,
10
- IterationDocumentStatusResponse,
11
- AddIterationFromJsonlRequest,
12
- )
1
+ from .model import Project, CreateProjectRequest, PatchProjectRequest
13
2
 
14
3
 
15
4
  __all__ = [
16
5
  "Project",
17
- "BaseProject",
18
6
  "CreateProjectRequest",
19
- "PatchProjectRequest",
20
- "AnnotatedDocument",
21
- "DocumentItem",
22
- "ProjectDocument",
23
- "CreateProjectDocumentRequest",
24
- "PatchProjectDocumentRequest",
25
- "BaseIteration",
26
- "Iteration",
27
- "CreateIterationRequest",
28
- "PatchIterationRequest",
29
- "ProcessIterationRequest",
30
- "DocumentStatus",
31
- "IterationDocumentStatusResponse",
32
- "AddIterationFromJsonlRequest",
7
+ "PatchProjectRequest"
33
8
  ]
@@ -4,8 +4,6 @@ from typing import Any, Optional
4
4
  import nanoid # type: ignore
5
5
  from pydantic import BaseModel, Field, ConfigDict
6
6
 
7
- from .documents import ProjectDocument
8
- from .iterations import Iteration
9
7
  from ..inference_settings import InferenceSettings
10
8
 
11
9
  default_inference_settings = InferenceSettings(
@@ -17,42 +15,68 @@ default_inference_settings = InferenceSettings(
17
15
  browser_canvas="A4",
18
16
  n_consensus=1,
19
17
  )
18
+ class Function(BaseModel):
19
+ model_config = ConfigDict(extra="ignore")
20
+ id: str = Field(default_factory=lambda: "function_" + nanoid.generate())
21
+ path: str
22
+ code: Optional[str] = Field(default=None, description="The code of the function")
23
+ function_registry_id: Optional[str] = Field(default=None, description="The function registry id of the function")
24
+
25
+ # @model_validator(mode="before")
26
+ # @classmethod
27
+ # def validate_function(cls, data: Any):
28
+ # if isinstance(data, dict):
29
+ # code = data.get("code")
30
+ # function_registry_id = data.get("function_registry_id")
31
+ # if code is None and function_registry_id is None:
32
+ # raise ValueError("Either code or function_registry_id must be provided")
33
+ # return data
34
+
35
+ class FunctionHilCriterion(BaseModel):
36
+ path: str
37
+ agentic_fix: bool = Field(default=False, description="Whether to use agentic fix for the criterion")
20
38
 
21
- class SheetsIntegration(BaseModel):
22
- sheet_id: str
23
- spreadsheet_id: str
39
+ class HumanInTheLoopParams(BaseModel):
40
+ enabled: bool = Field(default=False)
41
+ url: str = Field(default="", description="The URL of the human in the loop endpoint")
42
+ headers: dict[str, str] = Field(default_factory=dict, description="The headers to send to the human in the loop endpoint")
43
+ criteria: list[FunctionHilCriterion] = Field(default_factory=list, description="The criteria to use for the human in the loop")
24
44
 
25
- class BaseProject(BaseModel):
45
+ class PublishedConfig(BaseModel):
46
+ inference_settings: InferenceSettings = default_inference_settings
47
+ json_schema: dict[str, Any] = Field(default_factory=dict, description="The json schema of the project")
48
+ human_in_the_loop_params: HumanInTheLoopParams = Field(default_factory=HumanInTheLoopParams)
49
+ origin: str = Field(default="manual", description="The origin of the published config. Either 'Manual' or the iteration id that was used to generate the config")
50
+ class DraftConfig(BaseModel):
51
+ inference_settings: InferenceSettings = default_inference_settings
52
+ json_schema: dict[str, Any] = Field(default_factory=dict, description="The json schema of the builder config")
53
+ human_in_the_loop_criteria: list[FunctionHilCriterion] = Field(default_factory=list)
54
+ class Project(BaseModel):
26
55
  model_config = ConfigDict(extra="ignore")
27
- id: str = Field(default_factory=lambda: "proj_" + nanoid.generate())
56
+ id: str = Field(default_factory=lambda: "project_" + nanoid.generate())
28
57
  name: str = Field(default="", description="The name of the project")
29
- json_schema: dict[str, Any] = Field(default_factory=dict, description="The json schema of the project")
30
58
  updated_at: datetime.datetime = Field(default_factory=lambda: datetime.datetime.now(tz=datetime.timezone.utc))
31
- sheets_integration: SheetsIntegration | None = None
32
- validation_flags: dict[str, Any] | None = None
33
- inference_settings: InferenceSettings = default_inference_settings
59
+ published_config: PublishedConfig
60
+ draft_config: DraftConfig
61
+ is_published: bool = False
62
+ #computation_spec: ComputationSpec = Field(default_factory=ComputationSpec, description="The computation spec of the project")
63
+ functions: list[Function] = Field(default_factory=list, description="The functions of the project")
34
64
 
35
- # Actual Object stored in DB
36
- class Project(BaseProject):
37
- documents: list[ProjectDocument] = Field(default_factory=list)
38
- iterations: list[Iteration] = Field(default_factory=list)
65
+ class StoredProject(Project):
66
+ """Project model with organization_id for database storage"""
67
+ organization_id: str
39
68
 
40
69
  class CreateProjectRequest(BaseModel):
41
70
  model_config = ConfigDict(extra="ignore")
42
71
  name: str
43
- json_schema: dict[str, Any]
44
-
72
+ json_schema: dict[str, Any] = Field(default_factory=dict, description="The json schema of the project")
45
73
 
46
- # This is basically the same as BaseProject, but everything is optional.
47
- # Could be achieved by convert_basemodel_to_partial_basemodel(BaseProject) but we prefer explicitness
74
+ # This is basically the same as Project, but everything is optional.
48
75
  class PatchProjectRequest(BaseModel):
49
76
  model_config = ConfigDict(extra="ignore")
50
77
  name: Optional[str] = Field(default=None, description="The name of the document")
51
- json_schema: Optional[dict[str, Any]] = Field(default=None, description="The json schema of the project")
52
- sheets_integration: SheetsIntegration | None = None
53
- validation_flags: Optional[dict[str, Any]] = Field(default=None, description="The validation flags of the project")
54
- inference_settings: Optional[InferenceSettings] = Field(default=None, description="The inference settings of the project")
55
-
56
- class AddIterationFromJsonlRequest(BaseModel):
57
- model_config = ConfigDict(extra="ignore")
58
- jsonl_gcs_path: str
78
+ published_config: Optional[PublishedConfig] = Field(default=None, description="The published config of the project")
79
+ draft_config: Optional[DraftConfig] = Field(default=None, description="The draft config of the project")
80
+ is_published: Optional[bool] = Field(default=None, description="The published status of the project")
81
+ #computation_spec: Optional[ComputationSpec] = Field(default=None, description="The computation spec of the project")
82
+ functions: Optional[list[Function]] = Field(default=None, description="The functions of the project")
@@ -25,8 +25,8 @@ from openai.types.responses.response_input_message_content_list_param import Res
25
25
  from openai.types.responses.response_input_param import ResponseInputItemParam
26
26
  from openai.types.responses.response_input_text_param import ResponseInputTextParam
27
27
 
28
- from retab.types.chat import ChatCompletionRetabMessage
29
- from retab.types.documents.extract import RetabParsedChatCompletion, RetabParsedChoice
28
+ from ...types.chat import ChatCompletionRetabMessage
29
+ from ...types.documents.extract import RetabParsedChatCompletion, RetabParsedChoice
30
30
 
31
31
 
32
32
  MediaType = Literal["image/jpeg", "image/png", "image/gif", "image/webp"]
@@ -1,7 +1,7 @@
1
1
  import copy
2
2
  import json
3
3
  from typing import Any, Literal, Optional, Self, Union, Type, MutableMapping, Tuple, MutableSequence
4
- from retab.utils.hashing import generate_blake2b_hash_from_string
4
+ from ...utils.hashing import generate_blake2b_hash_from_string
5
5
 
6
6
  import datetime
7
7
  from pathlib import Path
@@ -16,7 +16,7 @@ from pydantic import BaseModel, Field, PrivateAttr, computed_field, model_valida
16
16
  from .chat import convert_to_anthropic_format, convert_to_google_genai_format
17
17
  from .chat import convert_to_openai_completions_api_format
18
18
 
19
- from retab.utils.json_schema import convert_json_schema_to_basemodel, expand_refs, load_json_schema
19
+ from ...utils.json_schema import convert_json_schema_to_basemodel, expand_refs, load_json_schema
20
20
  from .chat import convert_to_openai_responses_api_format
21
21
  from ..standards import StreamingBaseModel
22
22
  from ..chat import ChatCompletionRetabMessage