retab 0.0.80__py3-none-any.whl → 0.0.81__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -17,6 +17,7 @@ from ...types.documents.edit import EditRequest, EditResponse
17
17
  from ...types.documents.extract import DocumentExtractRequest, RetabParsedChatCompletion, RetabParsedChatCompletionChunk, RetabParsedChoice, maybe_parse_to_pydantic
18
18
  from ...types.documents.parse import ParseRequest, ParseResult, TableParsingFormat
19
19
  from ...types.documents.split import Category, SplitRequest, SplitResponse
20
+ from ...types.documents.classify import ClassifyRequest, ClassifyResponse
20
21
  from ...types.mime import MIMEData
21
22
  from ...types.standards import PreparedRequest, FieldUnset
22
23
  from ...utils.json_schema import load_json_schema, unflatten_dict
@@ -172,6 +173,34 @@ class BaseDocumentsMixin:
172
173
  split_request = SplitRequest(**request_dict)
173
174
  return PreparedRequest(method="POST", url="/v1/documents/split", data=split_request.model_dump(mode="json", exclude_unset=True))
174
175
 
176
+ def _prepare_classify(
177
+ self,
178
+ document: Path | str | IOBase | MIMEData | PIL.Image.Image | HttpUrl,
179
+ categories: list[Category] | list[dict[str, str]],
180
+ model: str,
181
+ **extra_body: Any,
182
+ ) -> PreparedRequest:
183
+ mime_document = prepare_mime_document(document)
184
+
185
+ # Convert dict categories to Category objects if needed
186
+ category_objects = [
187
+ Category(**cat) if isinstance(cat, dict) else cat
188
+ for cat in categories
189
+ ]
190
+
191
+ request_dict: dict[str, Any] = {
192
+ "document": mime_document,
193
+ "categories": category_objects,
194
+ "model": model,
195
+ }
196
+
197
+ # Merge any extra fields provided by the caller
198
+ if extra_body:
199
+ request_dict.update(extra_body)
200
+
201
+ classify_request = ClassifyRequest(**request_dict)
202
+ return PreparedRequest(method="POST", url="/v1/documents/classify", data=classify_request.model_dump(mode="json", exclude_unset=True))
203
+
175
204
  def _prepare_extract(
176
205
  self,
177
206
  json_schema: dict[str, Any] | Path | str,
@@ -662,6 +691,57 @@ class Documents(SyncAPIResource, BaseDocumentsMixin):
662
691
  response = self._client._prepared_request(request)
663
692
  return SplitResponse.model_validate(response)
664
693
 
694
+ def classify(
695
+ self,
696
+ document: Path | str | IOBase | MIMEData | PIL.Image.Image | HttpUrl,
697
+ categories: list[Category] | list[dict[str, str]],
698
+ model: str,
699
+ **extra_body: Any,
700
+ ) -> ClassifyResponse:
701
+ """
702
+ Classify a document into one of the provided categories.
703
+
704
+ This method analyzes a document and classifies it into exactly one
705
+ of the user-defined categories, returning the classification with
706
+ chain-of-thought reasoning explaining the decision.
707
+
708
+ Args:
709
+ document: The document to classify. Can be a file path (Path or str), file-like object, MIMEData, PIL Image, or URL.
710
+ categories: List of categories to classify the document into. Each category should have a 'name' and 'description'.
711
+ Can be Category objects or dicts with 'name' and 'description' keys.
712
+ model: The AI model to use for document classification (e.g., "gemini-2.5-flash").
713
+
714
+ Returns:
715
+ ClassifyResponse: Response containing:
716
+ - result: ClassifyResult with reasoning and classification.
717
+
718
+ Raises:
719
+ HTTPException: If the request fails.
720
+
721
+ Example:
722
+ ```python
723
+ response = retab.documents.classify(
724
+ document="invoice.pdf",
725
+ model="gemini-2.5-flash",
726
+ categories=[
727
+ {"name": "invoice", "description": "Invoice documents with billing information"},
728
+ {"name": "receipt", "description": "Receipt documents for payments"},
729
+ {"name": "contract", "description": "Legal contract documents"},
730
+ ]
731
+ )
732
+ print(f"Classification: {response.result.classification}")
733
+ print(f"Reasoning: {response.result.reasoning}")
734
+ ```
735
+ """
736
+ request = self._prepare_classify(
737
+ document=document,
738
+ categories=categories,
739
+ model=model,
740
+ **extra_body,
741
+ )
742
+ response = self._client._prepared_request(request)
743
+ return ClassifyResponse.model_validate(response)
744
+
665
745
 
666
746
  class AsyncDocuments(AsyncAPIResource, BaseDocumentsMixin):
667
747
  """Documents API wrapper for asynchronous usage."""
@@ -1005,3 +1085,54 @@ class AsyncDocuments(AsyncAPIResource, BaseDocumentsMixin):
1005
1085
  )
1006
1086
  response = await self._client._prepared_request(request)
1007
1087
  return SplitResponse.model_validate(response)
1088
+
1089
+ async def classify(
1090
+ self,
1091
+ document: Path | str | IOBase | MIMEData | PIL.Image.Image | HttpUrl,
1092
+ categories: list[Category] | list[dict[str, str]],
1093
+ model: str,
1094
+ **extra_body: Any,
1095
+ ) -> ClassifyResponse:
1096
+ """
1097
+ Classify a document into one of the provided categories asynchronously.
1098
+
1099
+ This method analyzes a document and classifies it into exactly one
1100
+ of the user-defined categories, returning the classification with
1101
+ chain-of-thought reasoning explaining the decision.
1102
+
1103
+ Args:
1104
+ document: The document to classify. Can be a file path (Path or str), file-like object, MIMEData, PIL Image, or URL.
1105
+ categories: List of categories to classify the document into. Each category should have a 'name' and 'description'.
1106
+ Can be Category objects or dicts with 'name' and 'description' keys.
1107
+ model: The AI model to use for document classification (e.g., "gemini-2.5-flash").
1108
+
1109
+ Returns:
1110
+ ClassifyResponse: Response containing:
1111
+ - result: ClassifyResult with reasoning and classification.
1112
+
1113
+ Raises:
1114
+ HTTPException: If the request fails.
1115
+
1116
+ Example:
1117
+ ```python
1118
+ response = await retab.documents.classify(
1119
+ document="invoice.pdf",
1120
+ model="gemini-2.5-flash",
1121
+ categories=[
1122
+ {"name": "invoice", "description": "Invoice documents with billing information"},
1123
+ {"name": "receipt", "description": "Receipt documents for payments"},
1124
+ {"name": "contract", "description": "Legal contract documents"},
1125
+ ]
1126
+ )
1127
+ print(f"Classification: {response.result.classification}")
1128
+ print(f"Reasoning: {response.result.reasoning}")
1129
+ ```
1130
+ """
1131
+ request = self._prepare_classify(
1132
+ document=document,
1133
+ categories=categories,
1134
+ model=model,
1135
+ **extra_body,
1136
+ )
1137
+ response = await self._client._prepared_request(request)
1138
+ return ClassifyResponse.model_validate(response)
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: retab
3
- Version: 0.0.80
3
+ Version: 0.0.81
4
4
  Summary: Retab official python library
5
5
  Home-page: https://github.com/retab-dev/retab
6
6
  Author: Retab
@@ -7,7 +7,7 @@ retab/resources/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
7
7
  retab/resources/models.py,sha256=4WidFBnTGZEA65DSn2pLP2SRnCVXkMTw7o_m8xVCFC4,2469
8
8
  retab/resources/schemas.py,sha256=rZ6OzfmoYv-mGaRVzvXjO09dD-KxP74mZhOO8sMgcDQ,4632
9
9
  retab/resources/documents/__init__.py,sha256=OjXmngFN0RKqO4SI-mJBNzr6Ex6rMxfq0DxaqzP0RQs,89
10
- retab/resources/documents/client.py,sha256=XxWo9FlktrpuskAPyKWTx9UIA2VA81g0SbHjHYnigMM,43583
10
+ retab/resources/documents/client.py,sha256=E8v0aBF4-9ATYo5hkQ629OP5mm2AtodTzznlj2xRWtQ,49000
11
11
  retab/resources/extractions/__init__.py,sha256=2H1ezUG8hI5SmTRy6NFzXdYLOdGFFsFrI60uzkitV20,97
12
12
  retab/resources/extractions/client.py,sha256=sEoNjOgX91FTOgoJUV-I1A9A9xl1ciCdPlhYwjhEjbA,11035
13
13
  retab/resources/projects/__init__.py,sha256=tPR3_3tr7bsoYd618qmGjnYN2R23PmF5oCFd7Z5_HGY,85
@@ -49,7 +49,7 @@ retab/utils/hashing.py,sha256=_BMVUvftOcJav68QL0rLkH2dbhW9RRJPzeGC2akR0fc,757
49
49
  retab/utils/json_schema.py,sha256=zP4pQLpVHBKWo_abCjb_dU4kA0azhHopd-1TFUgVEvc,20655
50
50
  retab/utils/mime.py,sha256=mTP_lqSPttOP5DYJxopiWaeFXrUCPjhwd7y53nCVGO4,6189
51
51
  retab/utils/stream_context_managers.py,sha256=gI1gVQSj3nWz6Mvjz7Ix5AiY0g6vSL-c2tPfuP04izo,2314
52
- retab-0.0.80.dist-info/METADATA,sha256=yzLJuUr355iV7ihl3yNw5o2-7ha2m201Ht4t3889n7c,4532
53
- retab-0.0.80.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
54
- retab-0.0.80.dist-info/top_level.txt,sha256=waQR0EGdhLIQtztoE3AXg7ik5ONQ9q_bsKVpyFuJdq0,6
55
- retab-0.0.80.dist-info/RECORD,,
52
+ retab-0.0.81.dist-info/METADATA,sha256=1dsE31zFzslvv3Up5BOM62auWgQNbLCie0hZ2NfwP5Y,4532
53
+ retab-0.0.81.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
54
+ retab-0.0.81.dist-info/top_level.txt,sha256=waQR0EGdhLIQtztoE3AXg7ik5ONQ9q_bsKVpyFuJdq0,6
55
+ retab-0.0.81.dist-info/RECORD,,
File without changes