aisberg 0.1.0__tar.gz → 0.2.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (54) hide show
  1. {aisberg-0.1.0 → aisberg-0.2.0}/PKG-INFO +16 -3
  2. {aisberg-0.1.0 → aisberg-0.2.0}/README.md +14 -2
  3. {aisberg-0.1.0 → aisberg-0.2.0}/aisberg/api/async_endpoints.py +138 -20
  4. {aisberg-0.1.0 → aisberg-0.2.0}/aisberg/api/endpoints.py +136 -18
  5. {aisberg-0.1.0 → aisberg-0.2.0}/aisberg/async_client.py +8 -0
  6. {aisberg-0.1.0 → aisberg-0.2.0}/aisberg/client.py +8 -0
  7. {aisberg-0.1.0 → aisberg-0.2.0}/aisberg/config.py +6 -0
  8. {aisberg-0.1.0 → aisberg-0.2.0}/aisberg/models/collections.py +15 -1
  9. aisberg-0.2.0/aisberg/models/documents.py +46 -0
  10. {aisberg-0.1.0 → aisberg-0.2.0}/aisberg/models/requests.py +5 -1
  11. {aisberg-0.1.0 → aisberg-0.2.0}/aisberg/modules/__init__.py +5 -0
  12. {aisberg-0.1.0 → aisberg-0.2.0}/aisberg/modules/chat.py +11 -3
  13. aisberg-0.2.0/aisberg/modules/collections.py +470 -0
  14. aisberg-0.2.0/aisberg/modules/documents.py +168 -0
  15. {aisberg-0.1.0 → aisberg-0.2.0}/aisberg/modules/embeddings.py +11 -3
  16. {aisberg-0.1.0 → aisberg-0.2.0}/aisberg/modules/me.py +1 -1
  17. {aisberg-0.1.0 → aisberg-0.2.0}/aisberg/modules/models.py +3 -3
  18. aisberg-0.2.0/aisberg/modules/s3.py +316 -0
  19. {aisberg-0.1.0 → aisberg-0.2.0}/aisberg/modules/workflows.py +3 -3
  20. {aisberg-0.1.0 → aisberg-0.2.0}/aisberg.egg-info/PKG-INFO +16 -3
  21. {aisberg-0.1.0 → aisberg-0.2.0}/aisberg.egg-info/SOURCES.txt +4 -1
  22. {aisberg-0.1.0 → aisberg-0.2.0}/aisberg.egg-info/requires.txt +1 -0
  23. {aisberg-0.1.0 → aisberg-0.2.0}/pyproject.toml +3 -2
  24. aisberg-0.2.0/tmp/test_collection.py +65 -0
  25. aisberg-0.2.0/tmp/test_doc_parse.py +36 -0
  26. aisberg-0.1.0/aisberg/modules/collections.py +0 -117
  27. aisberg-0.1.0/aisberg/modules/document.py +0 -117
  28. aisberg-0.1.0/tmp/test_doc_parse.py +0 -12
  29. {aisberg-0.1.0 → aisberg-0.2.0}/LICENSE +0 -0
  30. {aisberg-0.1.0 → aisberg-0.2.0}/aisberg/__init__.py +0 -0
  31. {aisberg-0.1.0 → aisberg-0.2.0}/aisberg/abstract/__init__.py +0 -0
  32. {aisberg-0.1.0 → aisberg-0.2.0}/aisberg/abstract/modules.py +0 -0
  33. {aisberg-0.1.0 → aisberg-0.2.0}/aisberg/api/__init__.py +0 -0
  34. {aisberg-0.1.0 → aisberg-0.2.0}/aisberg/exceptions.py +0 -0
  35. {aisberg-0.1.0 → aisberg-0.2.0}/aisberg/models/__init__.py +0 -0
  36. {aisberg-0.1.0 → aisberg-0.2.0}/aisberg/models/chat.py +0 -0
  37. {aisberg-0.1.0 → aisberg-0.2.0}/aisberg/models/embeddings.py +0 -0
  38. {aisberg-0.1.0 → aisberg-0.2.0}/aisberg/models/models.py +0 -0
  39. {aisberg-0.1.0 → aisberg-0.2.0}/aisberg/models/token.py +0 -0
  40. {aisberg-0.1.0 → aisberg-0.2.0}/aisberg/models/tools.py +0 -0
  41. {aisberg-0.1.0 → aisberg-0.2.0}/aisberg/models/workflows.py +0 -0
  42. {aisberg-0.1.0 → aisberg-0.2.0}/aisberg/modules/tools.py +0 -0
  43. {aisberg-0.1.0 → aisberg-0.2.0}/aisberg/requests/__init__.py +0 -0
  44. {aisberg-0.1.0 → aisberg-0.2.0}/aisberg/requests/async_requests.py +0 -0
  45. {aisberg-0.1.0 → aisberg-0.2.0}/aisberg/requests/sync_requests.py +0 -0
  46. {aisberg-0.1.0 → aisberg-0.2.0}/aisberg/utils.py +0 -0
  47. {aisberg-0.1.0 → aisberg-0.2.0}/aisberg.egg-info/dependency_links.txt +0 -0
  48. {aisberg-0.1.0 → aisberg-0.2.0}/aisberg.egg-info/top_level.txt +0 -0
  49. {aisberg-0.1.0 → aisberg-0.2.0}/setup.cfg +0 -0
  50. {aisberg-0.1.0 → aisberg-0.2.0}/tests/integration/test_collections_integration.py +0 -0
  51. {aisberg-0.1.0 → aisberg-0.2.0}/tests/unit/test_collections_sync.py +0 -0
  52. {aisberg-0.1.0 → aisberg-0.2.0}/tmp/test.py +0 -0
  53. {aisberg-0.1.0 → aisberg-0.2.0}/tmp/test_async.py +0 -0
  54. {aisberg-0.1.0 → aisberg-0.2.0}/tmp/test_sync.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: aisberg
3
- Version: 0.1.0
3
+ Version: 0.2.0
4
4
  Summary: Aisberg SDK for Python - A simple and powerful SDK to interact with the Aisberg API
5
5
  Author: Free Pro
6
6
  Author-email: Mathis Lambert <mathis.lambert@freepro.com>
@@ -16,6 +16,7 @@ License-File: LICENSE
16
16
  Requires-Dist: httpx>=0.28.1
17
17
  Requires-Dist: pydantic>=2.11.7
18
18
  Requires-Dist: pydantic-settings>=2.10.1
19
+ Requires-Dist: boto3>=1.38.44
19
20
  Provides-Extra: dev
20
21
  Requires-Dist: pytest>=8.4.1; extra == "dev"
21
22
  Requires-Dist: pytest-asyncio>=1.0.0; extra == "dev"
@@ -47,6 +48,7 @@ conversational LLM workflows, collections, embeddings, and more.
47
48
  - **Environment-based configuration** (supports `.env` files and system environment variables)
48
49
  - **Context manager support** for easy resource management
49
50
  - **Custom tool registration**: Easily extend LLM capabilities with your own functions
51
+ - **Document Parsing**: Parse documents into structured data (e.g., JSON, CSV, PNG, PDF, etc.)
50
52
 
51
53
  ---
52
54
 
@@ -77,6 +79,15 @@ AISBERG_API_KEY=...
77
79
  AISBERG_BASE_URL=https://url
78
80
  ```
79
81
 
82
+ In order to use the Document Parsing feature, you also need to set the `S3` credentials - ask the FreePro team for
83
+ these:
84
+
85
+ ```env
86
+ S3_ACCESS_KEY_ID=...
87
+ S3_SECRET_ACCESS_KEY=...
88
+ S3_ENDPOINT=https://s3.endpoint
89
+ ```
90
+
80
91
  ### 2. **Synchronous Usage**
81
92
 
82
93
  ```python
@@ -127,6 +138,7 @@ asyncio.run(main())
127
138
  * `client.models` — Model discovery & info
128
139
  * `client.workflows` — Workflow management & execution
129
140
  * `client.tools` — Register and execute tools for LLM tool calls
141
+ * `client.documents` — Document parsing and management
130
142
 
131
143
  Each module is available both in the sync and async clients with similar APIs.
132
144
 
@@ -173,6 +185,9 @@ client = AisbergClient(
173
185
  * `AISBERG_API_KEY`
174
186
  * `AISBERG_BASE_URL`
175
187
  * `AISBERG_TIMEOUT` (optional)
188
+ * `S3_ACCESS_KEY_ID` (for document parsing)(optional)
189
+ * `S3_SECRET_ACCESS_KEY` (for document parsing)(optional)
190
+ * `S3_ENDPOINT` (for document parsing)(optional)
176
191
 
177
192
  ### **Using in a Context Manager**
178
193
 
@@ -208,5 +223,3 @@ For enterprise/commercial use, please contact [Mathis Lambert](mailto:mathis.lam
208
223
  ## Support
209
224
 
210
225
  For support, bug reports, or feature requests, please contact your technical representative.
211
-
212
- ---
@@ -18,6 +18,7 @@ conversational LLM workflows, collections, embeddings, and more.
18
18
  - **Environment-based configuration** (supports `.env` files and system environment variables)
19
19
  - **Context manager support** for easy resource management
20
20
  - **Custom tool registration**: Easily extend LLM capabilities with your own functions
21
+ - **Document Parsing**: Parse documents into structured data (e.g., JSON, CSV, PNG, PDF, etc.)
21
22
 
22
23
  ---
23
24
 
@@ -48,6 +49,15 @@ AISBERG_API_KEY=...
48
49
  AISBERG_BASE_URL=https://url
49
50
  ```
50
51
 
52
+ In order to use the Document Parsing feature, you also need to set the `S3` credentials - ask the FreePro team for
53
+ these:
54
+
55
+ ```env
56
+ S3_ACCESS_KEY_ID=...
57
+ S3_SECRET_ACCESS_KEY=...
58
+ S3_ENDPOINT=https://s3.endpoint
59
+ ```
60
+
51
61
  ### 2. **Synchronous Usage**
52
62
 
53
63
  ```python
@@ -98,6 +108,7 @@ asyncio.run(main())
98
108
  * `client.models` — Model discovery & info
99
109
  * `client.workflows` — Workflow management & execution
100
110
  * `client.tools` — Register and execute tools for LLM tool calls
111
+ * `client.documents` — Document parsing and management
101
112
 
102
113
  Each module is available both in the sync and async clients with similar APIs.
103
114
 
@@ -144,6 +155,9 @@ client = AisbergClient(
144
155
  * `AISBERG_API_KEY`
145
156
  * `AISBERG_BASE_URL`
146
157
  * `AISBERG_TIMEOUT` (optional)
158
+ * `S3_ACCESS_KEY_ID` (for document parsing)(optional)
159
+ * `S3_SECRET_ACCESS_KEY` (for document parsing)(optional)
160
+ * `S3_ENDPOINT` (for document parsing)(optional)
147
161
 
148
162
  ### **Using in a Context Manager**
149
163
 
@@ -179,5 +193,3 @@ For enterprise/commercial use, please contact [Mathis Lambert](mailto:mathis.lam
179
193
  ## Support
180
194
 
181
195
  For support, bug reports, or feature requests, please contact your technical representative.
182
-
183
- ---
@@ -1,4 +1,4 @@
1
- from io import BytesIO
1
+ import json
2
2
 
3
3
  import httpx
4
4
  from ..models.chat import (
@@ -7,9 +7,10 @@ from ..models.chat import (
7
7
  ChatCompletionResponse,
8
8
  ChatCompletionChunk,
9
9
  )
10
- from typing import Optional, AsyncGenerator, Union, List, Any, Tuple
10
+ from typing import Optional, AsyncGenerator, Union, List, Any
11
11
 
12
- from ..models.collections import GroupCollections, PointDetails
12
+ from ..models.collections import GroupCollections, PointDetails, ChunkingDictInput
13
+ from ..models.documents import DocumentParserResponse
13
14
  from ..models.embeddings import (
14
15
  EncodingFormat,
15
16
  EncodingResponse,
@@ -21,7 +22,7 @@ from ..models.token import TokenInfo
21
22
  from ..models.workflows import WorkflowDetails, Workflow
22
23
  from ..utils import parse_chat_line, WorkflowLineParser
23
24
  from ..requests.async_requests import areq, areq_stream
24
- from ..models.requests import AnyDict, AnyList
25
+ from ..models.requests import AnyDict, AnyList, HttpxFileField
25
26
 
26
27
 
27
28
  async def models(client: httpx.AsyncClient) -> List[Model]:
@@ -67,7 +68,7 @@ async def collections(client: httpx.AsyncClient) -> List[GroupCollections]:
67
68
 
68
69
 
69
70
  async def collection(
70
- client: httpx.AsyncClient, collection_id: str, group_id: str
71
+ client: httpx.AsyncClient, collection_id: str, group_id: Optional[str] = None
71
72
  ) -> List[PointDetails]:
72
73
  """
73
74
  Get details of a specific collection.
@@ -85,6 +86,128 @@ async def collection(
85
86
  raise e
86
87
 
87
88
 
89
+ async def create_collection(
90
+ client: httpx.AsyncClient,
91
+ name: str,
92
+ model: str,
93
+ group: Optional[str] = None,
94
+ ):
95
+ """
96
+ Create a new collection with the specified name and optional group.
97
+ """
98
+ payload = {"collection_name": name, "embedding_model": model}
99
+ if group is not None:
100
+ payload["group"] = group
101
+
102
+ return await areq(
103
+ client,
104
+ "POST",
105
+ "/collections",
106
+ AnyDict,
107
+ json=payload,
108
+ )
109
+
110
+
111
+ async def delete_collection(
112
+ client: httpx.AsyncClient,
113
+ name: str,
114
+ group: Optional[str] = None,
115
+ ):
116
+ """
117
+ Delete a collection with the specified name and optional group.
118
+ """
119
+ payload = {"collections": [name]}
120
+ if group is not None:
121
+ payload["group"] = group
122
+
123
+ return await areq(
124
+ client,
125
+ "DELETE",
126
+ "/collections",
127
+ AnyDict,
128
+ json=payload,
129
+ )
130
+
131
+
132
+ async def insert_points_in_collection(
133
+ client: httpx.AsyncClient,
134
+ name: str,
135
+ files: HttpxFileField,
136
+ normalize: bool,
137
+ chunking_dict: Optional[ChunkingDictInput] = None,
138
+ group: Optional[str] = None,
139
+ ):
140
+ """
141
+ Insert points into a collection with the specified name.
142
+ """
143
+ payload = {
144
+ "chunking_dict": json.dumps({"method": "custom", "params": {}}),
145
+ "normalize": normalize,
146
+ }
147
+ if group is not None:
148
+ payload["group"] = group
149
+
150
+ if chunking_dict is not None:
151
+ payload["chunking_dict"] = chunking_dict.model_dump_json()
152
+
153
+ return await areq(
154
+ client,
155
+ "POST",
156
+ f"/collections/{name}",
157
+ AnyDict,
158
+ data=payload,
159
+ files=files,
160
+ )
161
+
162
+
163
+ async def delete_points_in_collection(
164
+ client: httpx.AsyncClient,
165
+ points_ids: List[str],
166
+ name: str,
167
+ group: Optional[str] = None,
168
+ ):
169
+ """
170
+ Delete points into a collection with the specified name.
171
+ """
172
+ payload = {
173
+ "points": points_ids,
174
+ "collection": name,
175
+ }
176
+ if group is not None:
177
+ payload["group"] = group
178
+
179
+ return await areq(
180
+ client,
181
+ "DELETE",
182
+ "/collections/chunks",
183
+ AnyDict,
184
+ json=payload,
185
+ )
186
+
187
+
188
+ async def delete_all_points_in_collection(
189
+ client: httpx.AsyncClient,
190
+ name: str,
191
+ group: Optional[str] = None,
192
+ ):
193
+ """
194
+ Delete All points into a collection with the specified name.
195
+ """
196
+ payload = {
197
+ "collection": name,
198
+ }
199
+ if group is not None:
200
+ payload["group"] = group
201
+
202
+ return await areq(
203
+ client,
204
+ "DELETE",
205
+ "/collections/all/chunks",
206
+ AnyDict,
207
+ json=payload,
208
+ )
209
+
210
+
88
211
  async def me(client: httpx.AsyncClient) -> TokenInfo:
89
212
  """
90
213
  Get the details of the current user.
@@ -303,31 +426,26 @@ async def run_workflow(
303
426
  raise e
304
427
 
305
428
 
306
- async def parse_document(
429
+ async def parse_documents(
307
430
  client: httpx.AsyncClient,
308
- file: Tuple[bytes, str],
309
- source: str,
431
+ files: HttpxFileField,
310
432
  group: Optional[str] = None,
311
- ) -> str:
433
+ **kwargs,
434
+ ) -> DocumentParserResponse:
312
435
  """
313
- Parse a document using the specified model.
436
+ Parse a single or multiple documents using the document parser endpoint.
437
+ Returns the ID of the parsed document to be downloaded later from the S3 bucket.
314
438
  """
315
- payload = {
316
- "source": source,
317
- }
318
-
439
+ payload = {**kwargs}
319
440
  if group is not None:
320
441
  payload["group"] = group
321
442
 
322
- files = {"file": (file[1], BytesIO(file[0]), "application/octet-stream")}
323
-
324
- response = areq(
443
+ response = await areq(
325
444
  client,
326
445
  "POST",
327
446
  "/document-parser/parsing/parse",
328
- AnyDict,
447
+ DocumentParserResponse,
329
448
  files=files,
330
- json=payload,
449
+ data=payload,
331
450
  )
332
- print(response)
333
451
  return response
@@ -1,4 +1,4 @@
1
- from io import BytesIO
1
+ import json
2
2
 
3
3
  import httpx
4
4
  from ..models.chat import (
@@ -7,9 +7,10 @@ from ..models.chat import (
7
7
  ChatCompletionResponse,
8
8
  ChatCompletionChunk,
9
9
  )
10
- from typing import Optional, Generator, Union, List, Any, Tuple
10
+ from typing import Optional, Generator, Union, List, Any
11
11
 
12
- from ..models.collections import GroupCollections, PointDetails
12
+ from ..models.collections import GroupCollections, PointDetails, ChunkingDictInput
13
+ from ..models.documents import DocumentParserResponse
13
14
  from ..models.embeddings import (
14
15
  EncodingFormat,
15
16
  EncodingResponse,
@@ -21,7 +22,7 @@ from ..models.token import TokenInfo
21
22
  from ..models.workflows import WorkflowDetails, Workflow
22
23
  from ..utils import parse_chat_line, WorkflowLineParser
23
24
  from ..requests.sync_requests import req, req_stream
24
- from ..models.requests import AnyDict, AnyList
25
+ from ..models.requests import AnyDict, AnyList, HttpxFileField
25
26
 
26
27
 
27
28
  def models(client: httpx.Client) -> List[Model]:
@@ -64,7 +65,7 @@ def collections(client: httpx.Client) -> List[GroupCollections]:
64
65
 
65
66
 
66
67
  def collection(
67
- client: httpx.Client, collection_id: str, group_id: str
68
+ client: httpx.Client, collection_id: str, group_id: Optional[str] = None
68
69
  ) -> List[PointDetails]:
69
70
  """
70
71
  Get details of a specific collection.
@@ -80,6 +81,128 @@ def collection(
80
81
  raise e
81
82
 
82
83
 
84
+ def create_collection(
85
+ client: httpx.Client,
86
+ name: str,
87
+ model: str,
88
+ group: Optional[str] = None,
89
+ ):
90
+ """
91
+ Create a new collection with the specified name and optional group.
92
+ """
93
+ payload = {"collection_name": name, "embedding_model": model}
94
+ if group is not None:
95
+ payload["group"] = group
96
+
97
+ return req(
98
+ client,
99
+ "POST",
100
+ "/collections",
101
+ AnyDict,
102
+ json=payload,
103
+ )
104
+
105
+
106
+ def delete_collection(
107
+ client: httpx.Client,
108
+ name: str,
109
+ group: Optional[str] = None,
110
+ ):
111
+ """
112
+ Delete a collection with the specified name and optional group.
113
+ """
114
+ payload = {"collections": [name]}
115
+ if group is not None:
116
+ payload["group"] = group
117
+
118
+ return req(
119
+ client,
120
+ "DELETE",
121
+ "/collections",
122
+ AnyDict,
123
+ json=payload,
124
+ )
125
+
126
+
127
+ def insert_points_in_collection(
128
+ client: httpx.Client,
129
+ name: str,
130
+ files: HttpxFileField,
131
+ normalize: bool,
132
+ chunking_dict: Optional[ChunkingDictInput] = None,
133
+ group: Optional[str] = None,
134
+ ):
135
+ """
136
+ Insert points into a collection with the specified name.
137
+ """
138
+ payload = {
139
+ "chunking_dict": json.dumps({"method": "custom", "params": {}}),
140
+ "normalize": normalize,
141
+ }
142
+ if group is not None:
143
+ payload["group"] = group
144
+
145
+ if chunking_dict is not None:
146
+ payload["chunking_dict"] = chunking_dict.model_dump_json()
147
+
148
+ return req(
149
+ client,
150
+ "POST",
151
+ f"/collections/{name}",
152
+ AnyDict,
153
+ data=payload,
154
+ files=files,
155
+ )
156
+
157
+
158
+ def delete_points_in_collection(
159
+ client: httpx.Client,
160
+ points_ids: List[str],
161
+ name: str,
162
+ group: Optional[str] = None,
163
+ ):
164
+ """
165
+ Delete points into a collection with the specified name.
166
+ """
167
+ payload = {
168
+ "points": points_ids,
169
+ "collection": name,
170
+ }
171
+ if group is not None:
172
+ payload["group"] = group
173
+
174
+ return req(
175
+ client,
176
+ "DELETE",
177
+ "/collections/chunks",
178
+ AnyDict,
179
+ json=payload,
180
+ )
181
+
182
+
183
+ def delete_all_points_in_collection(
184
+ client: httpx.Client,
185
+ name: str,
186
+ group: Optional[str] = None,
187
+ ):
188
+ """
189
+ Delete All points into a collection with the specified name.
190
+ """
191
+ payload = {
192
+ "collection": name,
193
+ }
194
+ if group is not None:
195
+ payload["group"] = group
196
+
197
+ return req(
198
+ client,
199
+ "DELETE",
200
+ "/collections/all/chunks",
201
+ AnyDict,
202
+ json=payload,
203
+ )
204
+
205
+
83
206
  def me(client: httpx.Client) -> TokenInfo:
84
207
  """
85
208
  Get the details of the current user.
@@ -298,31 +421,26 @@ def run_workflow(
298
421
  raise e
299
422
 
300
423
 
301
- def parse_document(
424
+ def parse_documents(
302
425
  client: httpx.Client,
303
- file: Tuple[bytes, str],
304
- source: str,
426
+ files: HttpxFileField,
305
427
  group: Optional[str] = None,
306
- ) -> str:
428
+ **kwargs,
429
+ ) -> DocumentParserResponse:
307
430
  """
308
- Parse a document using the specified model.
431
+ Parse a single or multiple documents using the document parser endpoint.
432
+ Returns the ID of the parsed document to be downloaded later from the S3 bucket.
309
433
  """
310
- payload = {
311
- "source": source,
312
- }
313
-
434
+ payload = {**kwargs}
314
435
  if group is not None:
315
436
  payload["group"] = group
316
437
 
317
- files = {"file": (file[1], BytesIO(file[0]), "application/octet-stream")}
318
-
319
438
  response = req(
320
439
  client,
321
440
  "POST",
322
441
  "/document-parser/parsing/parse",
323
- AnyDict,
442
+ DocumentParserResponse,
324
443
  files=files,
325
444
  data=payload,
326
445
  )
327
- print(response)
328
446
  return response
@@ -10,6 +10,8 @@ from .modules import (
10
10
  AsyncModelsModule,
11
11
  AsyncWorkflowsModule,
12
12
  ToolsModule,
13
+ AsyncDocumentsModule,
14
+ SyncS3Module,
13
15
  )
14
16
 
15
17
 
@@ -33,6 +35,12 @@ class AisbergAsyncClient:
33
35
  self.me = AsyncMeModule(self, self._client)
34
36
  self.collections = AsyncCollectionsModule(self, self._client)
35
37
  self.embeddings = AsyncEmbeddingsModule(self, self._client)
38
+ self.documents = AsyncDocumentsModule(self, self._client)
39
+ self._s3 = SyncS3Module(
40
+ settings.s3_access_key_id,
41
+ settings.s3_secret_access_key,
42
+ settings.s3_endpoint,
43
+ )
36
44
 
37
45
  async def initialize(self):
38
46
  """
@@ -9,6 +9,8 @@ from .modules import (
9
9
  SyncModelsModule,
10
10
  SyncWorkflowsModule,
11
11
  ToolsModule,
12
+ SyncDocumentsModule,
13
+ SyncS3Module,
12
14
  )
13
15
 
14
16
 
@@ -39,6 +41,12 @@ class AisbergClient:
39
41
  self.me = SyncMeModule(self, self._client)
40
42
  self.collections = SyncCollectionsModule(self, self._client)
41
43
  self.embeddings = SyncEmbeddingsModule(self, self._client)
44
+ self.documents = SyncDocumentsModule(self, self._client)
45
+ self._s3 = SyncS3Module(
46
+ settings.s3_access_key_id,
47
+ settings.s3_secret_access_key,
48
+ settings.s3_endpoint,
49
+ )
42
50
 
43
51
  # Validate API key
44
52
  self._validate_api_key()
@@ -5,10 +5,16 @@ from pydantic_settings import BaseSettings, SettingsConfigDict
5
5
 
6
6
  class Settings(BaseSettings):
7
7
  # Variables attendues
8
+ # -- API --
8
9
  aisberg_api_key: Union[str, None] = None
9
10
  aisberg_base_url: Union[str, None] = None
10
11
  timeout: int = 30
11
12
 
13
+ # -- S3 --
14
+ s3_access_key_id: Union[str, None] = None
15
+ s3_secret_access_key: Union[str, None] = None
16
+ s3_endpoint: Union[str, None] = None
17
+
12
18
  # Pour indiquer le fichier .env
13
19
  model_config = SettingsConfigDict(env_file=".env", env_file_encoding="utf-8")
14
20
 
@@ -11,6 +11,20 @@ class GroupCollections(BaseModel):
11
11
  collections: List[Collection]
12
12
 
13
13
 
14
+ class CollectionDataset(BaseModel):
15
+ chunks: List[str]
16
+ metadata: Optional[dict] = []
17
+
18
+
19
+ class CollectionCreateResponse(BaseModel):
20
+ message: Optional[str] = None
21
+
22
+
23
+ class ChunkingDictInput(BaseModel):
24
+ method: Optional[str] = "custom"
25
+ params: Optional[dict] = {}
26
+
27
+
14
28
  # Modèle plus structuré pour payload
15
29
  class Payload(BaseModel):
16
30
  method: Optional[str] = None
@@ -32,5 +46,5 @@ class PointDetails(BaseModel):
32
46
 
33
47
  class CollectionDetails(BaseModel):
34
48
  name: str
35
- group: str
49
+ group: Optional[str] = None
36
50
  points: List[PointDetails]
@@ -0,0 +1,46 @@
1
+ from pydantic import BaseModel
2
+ from typing import Optional, List, Tuple, Union
3
+ from io import BytesIO
4
+
5
+
6
+ class DocumentParserResponse(BaseModel):
7
+ """
8
+ Response model for document parsing.
9
+ """
10
+
11
+ message: Optional[str] = None
12
+ parsedFiles: Optional[List[str]] = None
13
+ bucketName: Optional[str] = None
14
+
15
+
16
+ class FileObject(BaseModel):
17
+ """
18
+ Represents a file object with its name and content.
19
+ """
20
+
21
+ name: str
22
+ buffer: bytes
23
+
24
+
25
+ class DocumentParserDocOutput(BaseModel):
26
+ type: str
27
+ data: Union[str, dict, list]
28
+
29
+
30
+ class ParsedDocument(BaseModel):
31
+ """
32
+ Represents a parsed document with its content and metadata.
33
+ """
34
+
35
+ content: DocumentParserDocOutput
36
+ metadata: Optional[dict] = None
37
+
38
+
39
+ DocumentParserFileInput = Union[
40
+ str,
41
+ bytes,
42
+ BytesIO,
43
+ Tuple[bytes, str],
44
+ "FileObject",
45
+ List[Union[str, bytes, BytesIO, Tuple[bytes, str], "FileObject"]],
46
+ ]
@@ -1,4 +1,5 @@
1
- from typing import List, Any
1
+ from typing import List, Any, Tuple
2
+ from io import BytesIO
2
3
 
3
4
  from pydantic import BaseModel, RootModel, ConfigDict
4
5
 
@@ -9,3 +10,6 @@ class AnyDict(BaseModel):
9
10
 
10
11
  class AnyList(RootModel[List[Any]]):
11
12
  pass
13
+
14
+
15
+ HttpxFileField = List[Tuple[str, Tuple[str, BytesIO, str]]]
@@ -5,6 +5,8 @@ from .me import AsyncMeModule, SyncMeModule
5
5
  from .models import AsyncModelsModule, SyncModelsModule
6
6
  from .workflows import AsyncWorkflowsModule, SyncWorkflowsModule
7
7
  from .tools import ToolsModule
8
+ from .documents import AsyncDocumentsModule, SyncDocumentsModule
9
+ from .s3 import SyncS3Module
8
10
 
9
11
  __all__ = [
10
12
  "AsyncChatModule",
@@ -20,4 +22,7 @@ __all__ = [
20
22
  "AsyncWorkflowsModule",
21
23
  "SyncWorkflowsModule",
22
24
  "ToolsModule",
25
+ "AsyncDocumentsModule",
26
+ "SyncDocumentsModule",
27
+ "SyncS3Module",
23
28
  ]