retab 0.0.80__tar.gz → 0.0.82__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (68) hide show
  1. {retab-0.0.80 → retab-0.0.82}/PKG-INFO +1 -1
  2. {retab-0.0.80 → retab-0.0.82}/retab/client.py +3 -1
  3. {retab-0.0.80 → retab-0.0.82}/retab/resources/documents/client.py +131 -0
  4. retab-0.0.82/retab/resources/edit/__init__.py +3 -0
  5. retab-0.0.82/retab/resources/edit/client.py +176 -0
  6. retab-0.0.82/retab/resources/edit/templates/__init__.py +4 -0
  7. retab-0.0.82/retab/resources/edit/templates/client.py +620 -0
  8. {retab-0.0.80 → retab-0.0.82}/retab/types/documents/edit.py +12 -24
  9. retab-0.0.82/retab/types/edit/__init__.py +16 -0
  10. retab-0.0.82/retab/types/edit/templates.py +50 -0
  11. {retab-0.0.80 → retab-0.0.82}/retab.egg-info/PKG-INFO +1 -1
  12. {retab-0.0.80 → retab-0.0.82}/retab.egg-info/SOURCES.txt +6 -0
  13. {retab-0.0.80 → retab-0.0.82}/setup.py +1 -1
  14. {retab-0.0.80 → retab-0.0.82}/README.md +0 -0
  15. {retab-0.0.80 → retab-0.0.82}/pyproject.toml +0 -0
  16. {retab-0.0.80 → retab-0.0.82}/retab/__init__.py +0 -0
  17. {retab-0.0.80 → retab-0.0.82}/retab/_resource.py +0 -0
  18. {retab-0.0.80 → retab-0.0.82}/retab/generate_types.py +0 -0
  19. {retab-0.0.80 → retab-0.0.82}/retab/py.typed +0 -0
  20. {retab-0.0.80 → retab-0.0.82}/retab/resources/__init__.py +0 -0
  21. {retab-0.0.80 → retab-0.0.82}/retab/resources/documents/__init__.py +0 -0
  22. {retab-0.0.80 → retab-0.0.82}/retab/resources/extractions/__init__.py +0 -0
  23. {retab-0.0.80 → retab-0.0.82}/retab/resources/extractions/client.py +0 -0
  24. {retab-0.0.80 → retab-0.0.82}/retab/resources/models.py +0 -0
  25. {retab-0.0.80 → retab-0.0.82}/retab/resources/projects/__init__.py +0 -0
  26. {retab-0.0.80 → retab-0.0.82}/retab/resources/projects/client.py +0 -0
  27. {retab-0.0.80 → retab-0.0.82}/retab/resources/schemas.py +0 -0
  28. {retab-0.0.80 → retab-0.0.82}/retab/resources/workflows/__init__.py +0 -0
  29. {retab-0.0.80 → retab-0.0.82}/retab/resources/workflows/client.py +0 -0
  30. {retab-0.0.80 → retab-0.0.82}/retab/types/__init__.py +0 -0
  31. {retab-0.0.80 → retab-0.0.82}/retab/types/chat.py +0 -0
  32. {retab-0.0.80 → retab-0.0.82}/retab/types/documents/__init__.py +0 -0
  33. {retab-0.0.80 → retab-0.0.82}/retab/types/documents/classify.py +0 -0
  34. {retab-0.0.80 → retab-0.0.82}/retab/types/documents/correct_orientation.py +0 -0
  35. {retab-0.0.80 → retab-0.0.82}/retab/types/documents/create_messages.py +0 -0
  36. {retab-0.0.80 → retab-0.0.82}/retab/types/documents/extract.py +0 -0
  37. {retab-0.0.80 → retab-0.0.82}/retab/types/documents/parse.py +0 -0
  38. {retab-0.0.80 → retab-0.0.82}/retab/types/documents/split.py +0 -0
  39. {retab-0.0.80 → retab-0.0.82}/retab/types/extractions/__init__.py +0 -0
  40. {retab-0.0.80 → retab-0.0.82}/retab/types/extractions/types.py +0 -0
  41. {retab-0.0.80 → retab-0.0.82}/retab/types/inference_settings.py +0 -0
  42. {retab-0.0.80 → retab-0.0.82}/retab/types/mime.py +0 -0
  43. {retab-0.0.80 → retab-0.0.82}/retab/types/modality.py +0 -0
  44. {retab-0.0.80 → retab-0.0.82}/retab/types/pagination.py +0 -0
  45. {retab-0.0.80 → retab-0.0.82}/retab/types/projects/__init__.py +0 -0
  46. {retab-0.0.80 → retab-0.0.82}/retab/types/projects/metrics.py +0 -0
  47. {retab-0.0.80 → retab-0.0.82}/retab/types/projects/model.py +0 -0
  48. {retab-0.0.80 → retab-0.0.82}/retab/types/projects/predictions.py +0 -0
  49. {retab-0.0.80 → retab-0.0.82}/retab/types/schemas/__init__.py +0 -0
  50. {retab-0.0.80 → retab-0.0.82}/retab/types/schemas/chat.py +0 -0
  51. {retab-0.0.80 → retab-0.0.82}/retab/types/schemas/generate.py +0 -0
  52. {retab-0.0.80 → retab-0.0.82}/retab/types/schemas/layout.py +0 -0
  53. {retab-0.0.80 → retab-0.0.82}/retab/types/schemas/model.py +0 -0
  54. {retab-0.0.80 → retab-0.0.82}/retab/types/schemas/templates.py +0 -0
  55. {retab-0.0.80 → retab-0.0.82}/retab/types/standards.py +0 -0
  56. {retab-0.0.80 → retab-0.0.82}/retab/types/workflows/__init__.py +0 -0
  57. {retab-0.0.80 → retab-0.0.82}/retab/types/workflows/model.py +0 -0
  58. {retab-0.0.80 → retab-0.0.82}/retab/utils/__init__.py +0 -0
  59. {retab-0.0.80 → retab-0.0.82}/retab/utils/display.py +0 -0
  60. {retab-0.0.80 → retab-0.0.82}/retab/utils/hashing.py +0 -0
  61. {retab-0.0.80 → retab-0.0.82}/retab/utils/json_schema.py +0 -0
  62. {retab-0.0.80 → retab-0.0.82}/retab/utils/mime.py +0 -0
  63. {retab-0.0.80 → retab-0.0.82}/retab/utils/stream_context_managers.py +0 -0
  64. {retab-0.0.80 → retab-0.0.82}/retab.egg-info/dependency_links.txt +0 -0
  65. {retab-0.0.80 → retab-0.0.82}/retab.egg-info/requires.txt +0 -0
  66. {retab-0.0.80 → retab-0.0.82}/retab.egg-info/top_level.txt +0 -0
  67. {retab-0.0.80 → retab-0.0.82}/setup.cfg +0 -0
  68. {retab-0.0.80 → retab-0.0.82}/tests/test_projects.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: retab
3
- Version: 0.0.80
3
+ Version: 0.0.82
4
4
  Summary: Retab official python library
5
5
  Home-page: https://github.com/retab-dev/retab
6
6
  Author: Retab
@@ -10,7 +10,7 @@ import backoff.types
10
10
  import httpx
11
11
  import truststore
12
12
 
13
- from .resources import documents, models, schemas, projects, extractions
13
+ from .resources import documents, models, schemas, projects, extractions, edit
14
14
  from .types.standards import PreparedRequest, FieldUnset
15
15
 
16
16
 
@@ -188,6 +188,7 @@ class Retab(BaseRetab):
188
188
  self.documents = documents.Documents(client=self)
189
189
  self.models = models.Models(client=self)
190
190
  self.schemas = schemas.Schemas(client=self)
191
+ self.edit = edit.Edit(client=self)
191
192
 
192
193
  def _request(
193
194
  self,
@@ -485,6 +486,7 @@ class AsyncRetab(BaseRetab):
485
486
  self.documents = documents.AsyncDocuments(client=self)
486
487
  self.models = models.AsyncModels(client=self)
487
488
  self.schemas = schemas.AsyncSchemas(client=self)
489
+ self.edit = edit.AsyncEdit(client=self)
488
490
 
489
491
  def _parse_response(self, response: httpx.Response) -> Any:
490
492
  """Parse response based on content-type.
@@ -17,6 +17,7 @@ from ...types.documents.edit import EditRequest, EditResponse
17
17
  from ...types.documents.extract import DocumentExtractRequest, RetabParsedChatCompletion, RetabParsedChatCompletionChunk, RetabParsedChoice, maybe_parse_to_pydantic
18
18
  from ...types.documents.parse import ParseRequest, ParseResult, TableParsingFormat
19
19
  from ...types.documents.split import Category, SplitRequest, SplitResponse
20
+ from ...types.documents.classify import ClassifyRequest, ClassifyResponse
20
21
  from ...types.mime import MIMEData
21
22
  from ...types.standards import PreparedRequest, FieldUnset
22
23
  from ...utils.json_schema import load_json_schema, unflatten_dict
@@ -172,6 +173,34 @@ class BaseDocumentsMixin:
172
173
  split_request = SplitRequest(**request_dict)
173
174
  return PreparedRequest(method="POST", url="/v1/documents/split", data=split_request.model_dump(mode="json", exclude_unset=True))
174
175
 
176
+ def _prepare_classify(
177
+ self,
178
+ document: Path | str | IOBase | MIMEData | PIL.Image.Image | HttpUrl,
179
+ categories: list[Category] | list[dict[str, str]],
180
+ model: str,
181
+ **extra_body: Any,
182
+ ) -> PreparedRequest:
183
+ mime_document = prepare_mime_document(document)
184
+
185
+ # Convert dict categories to Category objects if needed
186
+ category_objects = [
187
+ Category(**cat) if isinstance(cat, dict) else cat
188
+ for cat in categories
189
+ ]
190
+
191
+ request_dict: dict[str, Any] = {
192
+ "document": mime_document,
193
+ "categories": category_objects,
194
+ "model": model,
195
+ }
196
+
197
+ # Merge any extra fields provided by the caller
198
+ if extra_body:
199
+ request_dict.update(extra_body)
200
+
201
+ classify_request = ClassifyRequest(**request_dict)
202
+ return PreparedRequest(method="POST", url="/v1/documents/classify", data=classify_request.model_dump(mode="json", exclude_unset=True))
203
+
175
204
  def _prepare_extract(
176
205
  self,
177
206
  json_schema: dict[str, Any] | Path | str,
@@ -662,6 +691,57 @@ class Documents(SyncAPIResource, BaseDocumentsMixin):
662
691
  response = self._client._prepared_request(request)
663
692
  return SplitResponse.model_validate(response)
664
693
 
694
+ def classify(
695
+ self,
696
+ document: Path | str | IOBase | MIMEData | PIL.Image.Image | HttpUrl,
697
+ categories: list[Category] | list[dict[str, str]],
698
+ model: str,
699
+ **extra_body: Any,
700
+ ) -> ClassifyResponse:
701
+ """
702
+ Classify a document into one of the provided categories.
703
+
704
+ This method analyzes a document and classifies it into exactly one
705
+ of the user-defined categories, returning the classification with
706
+ chain-of-thought reasoning explaining the decision.
707
+
708
+ Args:
709
+ document: The document to classify. Can be a file path (Path or str), file-like object, MIMEData, PIL Image, or URL.
710
+ categories: List of categories to classify the document into. Each category should have a 'name' and 'description'.
711
+ Can be Category objects or dicts with 'name' and 'description' keys.
712
+ model: The AI model to use for document classification (e.g., "gemini-2.5-flash").
713
+
714
+ Returns:
715
+ ClassifyResponse: Response containing:
716
+ - result: ClassifyResult with reasoning and classification.
717
+
718
+ Raises:
719
+ HTTPException: If the request fails.
720
+
721
+ Example:
722
+ ```python
723
+ response = retab.documents.classify(
724
+ document="invoice.pdf",
725
+ model="gemini-2.5-flash",
726
+ categories=[
727
+ {"name": "invoice", "description": "Invoice documents with billing information"},
728
+ {"name": "receipt", "description": "Receipt documents for payments"},
729
+ {"name": "contract", "description": "Legal contract documents"},
730
+ ]
731
+ )
732
+ print(f"Classification: {response.result.classification}")
733
+ print(f"Reasoning: {response.result.reasoning}")
734
+ ```
735
+ """
736
+ request = self._prepare_classify(
737
+ document=document,
738
+ categories=categories,
739
+ model=model,
740
+ **extra_body,
741
+ )
742
+ response = self._client._prepared_request(request)
743
+ return ClassifyResponse.model_validate(response)
744
+
665
745
 
666
746
  class AsyncDocuments(AsyncAPIResource, BaseDocumentsMixin):
667
747
  """Documents API wrapper for asynchronous usage."""
@@ -1005,3 +1085,54 @@ class AsyncDocuments(AsyncAPIResource, BaseDocumentsMixin):
1005
1085
  )
1006
1086
  response = await self._client._prepared_request(request)
1007
1087
  return SplitResponse.model_validate(response)
1088
+
1089
+ async def classify(
1090
+ self,
1091
+ document: Path | str | IOBase | MIMEData | PIL.Image.Image | HttpUrl,
1092
+ categories: list[Category] | list[dict[str, str]],
1093
+ model: str,
1094
+ **extra_body: Any,
1095
+ ) -> ClassifyResponse:
1096
+ """
1097
+ Classify a document into one of the provided categories asynchronously.
1098
+
1099
+ This method analyzes a document and classifies it into exactly one
1100
+ of the user-defined categories, returning the classification with
1101
+ chain-of-thought reasoning explaining the decision.
1102
+
1103
+ Args:
1104
+ document: The document to classify. Can be a file path (Path or str), file-like object, MIMEData, PIL Image, or URL.
1105
+ categories: List of categories to classify the document into. Each category should have a 'name' and 'description'.
1106
+ Can be Category objects or dicts with 'name' and 'description' keys.
1107
+ model: The AI model to use for document classification (e.g., "gemini-2.5-flash").
1108
+
1109
+ Returns:
1110
+ ClassifyResponse: Response containing:
1111
+ - result: ClassifyResult with reasoning and classification.
1112
+
1113
+ Raises:
1114
+ HTTPException: If the request fails.
1115
+
1116
+ Example:
1117
+ ```python
1118
+ response = await retab.documents.classify(
1119
+ document="invoice.pdf",
1120
+ model="gemini-2.5-flash",
1121
+ categories=[
1122
+ {"name": "invoice", "description": "Invoice documents with billing information"},
1123
+ {"name": "receipt", "description": "Receipt documents for payments"},
1124
+ {"name": "contract", "description": "Legal contract documents"},
1125
+ ]
1126
+ )
1127
+ print(f"Classification: {response.result.classification}")
1128
+ print(f"Reasoning: {response.result.reasoning}")
1129
+ ```
1130
+ """
1131
+ request = self._prepare_classify(
1132
+ document=document,
1133
+ categories=categories,
1134
+ model=model,
1135
+ **extra_body,
1136
+ )
1137
+ response = await self._client._prepared_request(request)
1138
+ return ClassifyResponse.model_validate(response)
@@ -0,0 +1,3 @@
1
+ from .client import Edit, AsyncEdit
2
+
3
+ __all__ = ["Edit", "AsyncEdit"]
@@ -0,0 +1,176 @@
1
+ """
2
+ Edit SDK client - Wrapper for document editing functionality.
3
+ """
4
+
5
+ from io import IOBase
6
+ from pathlib import Path
7
+ from typing import Any
8
+
9
+ import PIL.Image
10
+ from pydantic import HttpUrl
11
+
12
+ from ..._resource import AsyncAPIResource, SyncAPIResource
13
+ from ...utils.mime import prepare_mime_document
14
+ from ...types.documents.edit import (
15
+ EditRequest,
16
+ EditResponse,
17
+ )
18
+ from ...types.mime import MIMEData
19
+ from ...types.standards import PreparedRequest, FieldUnset
20
+ from .templates import Templates, AsyncTemplates
21
+
22
+
23
+ class BaseEditMixin:
24
+ """Shared methods for preparing edit API requests."""
25
+
26
+ def _prepare_fill_document(
27
+ self,
28
+ filling_instructions: str,
29
+ document: Path | str | IOBase | MIMEData | PIL.Image.Image | HttpUrl | None = None,
30
+ model: str = FieldUnset,
31
+ template_id: str | None = FieldUnset,
32
+ **extra_body: Any,
33
+ ) -> PreparedRequest:
34
+ request_dict: dict[str, Any] = {
35
+ "filling_instructions": filling_instructions,
36
+ }
37
+
38
+ if document is not None:
39
+ mime_document = prepare_mime_document(document)
40
+ request_dict["document"] = mime_document
41
+
42
+ if model is not FieldUnset:
43
+ request_dict["model"] = model
44
+ if template_id is not FieldUnset:
45
+ request_dict["template_id"] = template_id
46
+
47
+ # Merge any extra fields provided by the caller
48
+ if extra_body:
49
+ request_dict.update(extra_body)
50
+
51
+ edit_request = EditRequest(**request_dict)
52
+ return PreparedRequest(
53
+ method="POST",
54
+ url="/v1/edit/fill-document",
55
+ data=edit_request.model_dump(mode="json", exclude_unset=True),
56
+ )
57
+
58
+
59
+ class Edit(SyncAPIResource, BaseEditMixin):
60
+ """Edit API wrapper for synchronous usage."""
61
+
62
+ def __init__(self, client: Any) -> None:
63
+ super().__init__(client=client)
64
+ self.templates = Templates(client=client)
65
+
66
+ def fill_document(
67
+ self,
68
+ filling_instructions: str,
69
+ document: Path | str | IOBase | MIMEData | PIL.Image.Image | HttpUrl | None = None,
70
+ model: str = FieldUnset,
71
+ template_id: str | None = FieldUnset,
72
+ **extra_body: Any,
73
+ ) -> EditResponse:
74
+ """
75
+ Edit a document by inferring form fields and filling them with provided instructions.
76
+
77
+ This method performs:
78
+ 1. Detection to identify form field bounding boxes
79
+ 2. LLM inference to name and describe detected fields
80
+ 3. LLM-based form filling using the provided instructions
81
+ 4. Returns the filled document with form field values populated
82
+
83
+ Either `document` OR `template_id` must be provided, but not both.
84
+
85
+ Args:
86
+ filling_instructions: Instructions describing how to fill the form fields.
87
+ document: The document to edit. Can be a file path (Path or str), file-like object,
88
+ MIMEData, PIL Image, or URL. Mutually exclusive with template_id.
89
+ model: The LLM model to use for inference. Defaults to "retab-small".
90
+ template_id: Template ID to use for filling. When provided, uses the template's
91
+ pre-defined form fields and empty PDF. Only works for PDF documents.
92
+ Mutually exclusive with document.
93
+
94
+ Returns:
95
+ EditResponse: Response containing:
96
+ - form_data: List of form fields with filled values
97
+ - filled_document: Document with filled form values (MIMEData)
98
+
99
+ Raises:
100
+ HTTPException: If the request fails.
101
+
102
+ Supported document formats:
103
+ - PDF: Native form field detection and filling
104
+ - DOCX/DOC: Native editing to preserve styles and formatting
105
+ - PPTX/PPT: Native editing for presentations
106
+ - XLSX/XLS: Native editing for spreadsheets
107
+ """
108
+ request = self._prepare_fill_document(
109
+ filling_instructions=filling_instructions,
110
+ document=document,
111
+ model=model,
112
+ template_id=template_id,
113
+ **extra_body,
114
+ )
115
+ response = self._client._prepared_request(request)
116
+ return EditResponse.model_validate(response)
117
+
118
+
119
+ class AsyncEdit(AsyncAPIResource, BaseEditMixin):
120
+ """Edit API wrapper for asynchronous usage."""
121
+
122
+ def __init__(self, client: Any) -> None:
123
+ super().__init__(client=client)
124
+ self.templates = AsyncTemplates(client=client)
125
+
126
+ async def fill_document(
127
+ self,
128
+ filling_instructions: str,
129
+ document: Path | str | IOBase | MIMEData | PIL.Image.Image | HttpUrl | None = None,
130
+ model: str = FieldUnset,
131
+ template_id: str | None = FieldUnset,
132
+ **extra_body: Any,
133
+ ) -> EditResponse:
134
+ """
135
+ Edit a document by inferring form fields and filling them with provided instructions asynchronously.
136
+
137
+ This method performs:
138
+ 1. Detection to identify form field bounding boxes
139
+ 2. LLM inference to name and describe detected fields
140
+ 3. LLM-based form filling using the provided instructions
141
+ 4. Returns the filled document with form field values populated
142
+
143
+ Either `document` OR `template_id` must be provided, but not both.
144
+
145
+ Args:
146
+ filling_instructions: Instructions describing how to fill the form fields.
147
+ document: The document to edit. Can be a file path (Path or str), file-like object,
148
+ MIMEData, PIL Image, or URL. Mutually exclusive with template_id.
149
+ model: The LLM model to use for inference. Defaults to "retab-small".
150
+ template_id: Template ID to use for filling. When provided, uses the template's
151
+ pre-defined form fields and empty PDF. Only works for PDF documents.
152
+ Mutually exclusive with document.
153
+
154
+ Returns:
155
+ EditResponse: Response containing:
156
+ - form_data: List of form fields with filled values
157
+ - filled_document: Document with filled form values (MIMEData)
158
+
159
+ Raises:
160
+ HTTPException: If the request fails.
161
+
162
+ Supported document formats:
163
+ - PDF: Native form field detection and filling
164
+ - DOCX/DOC: Native editing to preserve styles and formatting
165
+ - PPTX/PPT: Native editing for presentations
166
+ - XLSX/XLS: Native editing for spreadsheets
167
+ """
168
+ request = self._prepare_fill_document(
169
+ filling_instructions=filling_instructions,
170
+ document=document,
171
+ model=model,
172
+ template_id=template_id,
173
+ **extra_body,
174
+ )
175
+ response = await self._client._prepared_request(request)
176
+ return EditResponse.model_validate(response)
@@ -0,0 +1,4 @@
1
+ from .client import Templates, AsyncTemplates
2
+
3
+ __all__ = ["Templates", "AsyncTemplates"]
4
+