aisberg 0.1.0__tar.gz → 0.2.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {aisberg-0.1.0 → aisberg-0.2.0}/PKG-INFO +16 -3
- {aisberg-0.1.0 → aisberg-0.2.0}/README.md +14 -2
- {aisberg-0.1.0 → aisberg-0.2.0}/aisberg/api/async_endpoints.py +138 -20
- {aisberg-0.1.0 → aisberg-0.2.0}/aisberg/api/endpoints.py +136 -18
- {aisberg-0.1.0 → aisberg-0.2.0}/aisberg/async_client.py +8 -0
- {aisberg-0.1.0 → aisberg-0.2.0}/aisberg/client.py +8 -0
- {aisberg-0.1.0 → aisberg-0.2.0}/aisberg/config.py +6 -0
- {aisberg-0.1.0 → aisberg-0.2.0}/aisberg/models/collections.py +15 -1
- aisberg-0.2.0/aisberg/models/documents.py +46 -0
- {aisberg-0.1.0 → aisberg-0.2.0}/aisberg/models/requests.py +5 -1
- {aisberg-0.1.0 → aisberg-0.2.0}/aisberg/modules/__init__.py +5 -0
- {aisberg-0.1.0 → aisberg-0.2.0}/aisberg/modules/chat.py +11 -3
- aisberg-0.2.0/aisberg/modules/collections.py +470 -0
- aisberg-0.2.0/aisberg/modules/documents.py +168 -0
- {aisberg-0.1.0 → aisberg-0.2.0}/aisberg/modules/embeddings.py +11 -3
- {aisberg-0.1.0 → aisberg-0.2.0}/aisberg/modules/me.py +1 -1
- {aisberg-0.1.0 → aisberg-0.2.0}/aisberg/modules/models.py +3 -3
- aisberg-0.2.0/aisberg/modules/s3.py +316 -0
- {aisberg-0.1.0 → aisberg-0.2.0}/aisberg/modules/workflows.py +3 -3
- {aisberg-0.1.0 → aisberg-0.2.0}/aisberg.egg-info/PKG-INFO +16 -3
- {aisberg-0.1.0 → aisberg-0.2.0}/aisberg.egg-info/SOURCES.txt +4 -1
- {aisberg-0.1.0 → aisberg-0.2.0}/aisberg.egg-info/requires.txt +1 -0
- {aisberg-0.1.0 → aisberg-0.2.0}/pyproject.toml +3 -2
- aisberg-0.2.0/tmp/test_collection.py +65 -0
- aisberg-0.2.0/tmp/test_doc_parse.py +36 -0
- aisberg-0.1.0/aisberg/modules/collections.py +0 -117
- aisberg-0.1.0/aisberg/modules/document.py +0 -117
- aisberg-0.1.0/tmp/test_doc_parse.py +0 -12
- {aisberg-0.1.0 → aisberg-0.2.0}/LICENSE +0 -0
- {aisberg-0.1.0 → aisberg-0.2.0}/aisberg/__init__.py +0 -0
- {aisberg-0.1.0 → aisberg-0.2.0}/aisberg/abstract/__init__.py +0 -0
- {aisberg-0.1.0 → aisberg-0.2.0}/aisberg/abstract/modules.py +0 -0
- {aisberg-0.1.0 → aisberg-0.2.0}/aisberg/api/__init__.py +0 -0
- {aisberg-0.1.0 → aisberg-0.2.0}/aisberg/exceptions.py +0 -0
- {aisberg-0.1.0 → aisberg-0.2.0}/aisberg/models/__init__.py +0 -0
- {aisberg-0.1.0 → aisberg-0.2.0}/aisberg/models/chat.py +0 -0
- {aisberg-0.1.0 → aisberg-0.2.0}/aisberg/models/embeddings.py +0 -0
- {aisberg-0.1.0 → aisberg-0.2.0}/aisberg/models/models.py +0 -0
- {aisberg-0.1.0 → aisberg-0.2.0}/aisberg/models/token.py +0 -0
- {aisberg-0.1.0 → aisberg-0.2.0}/aisberg/models/tools.py +0 -0
- {aisberg-0.1.0 → aisberg-0.2.0}/aisberg/models/workflows.py +0 -0
- {aisberg-0.1.0 → aisberg-0.2.0}/aisberg/modules/tools.py +0 -0
- {aisberg-0.1.0 → aisberg-0.2.0}/aisberg/requests/__init__.py +0 -0
- {aisberg-0.1.0 → aisberg-0.2.0}/aisberg/requests/async_requests.py +0 -0
- {aisberg-0.1.0 → aisberg-0.2.0}/aisberg/requests/sync_requests.py +0 -0
- {aisberg-0.1.0 → aisberg-0.2.0}/aisberg/utils.py +0 -0
- {aisberg-0.1.0 → aisberg-0.2.0}/aisberg.egg-info/dependency_links.txt +0 -0
- {aisberg-0.1.0 → aisberg-0.2.0}/aisberg.egg-info/top_level.txt +0 -0
- {aisberg-0.1.0 → aisberg-0.2.0}/setup.cfg +0 -0
- {aisberg-0.1.0 → aisberg-0.2.0}/tests/integration/test_collections_integration.py +0 -0
- {aisberg-0.1.0 → aisberg-0.2.0}/tests/unit/test_collections_sync.py +0 -0
- {aisberg-0.1.0 → aisberg-0.2.0}/tmp/test.py +0 -0
- {aisberg-0.1.0 → aisberg-0.2.0}/tmp/test_async.py +0 -0
- {aisberg-0.1.0 → aisberg-0.2.0}/tmp/test_sync.py +0 -0
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.4
|
2
2
|
Name: aisberg
|
3
|
-
Version: 0.
|
3
|
+
Version: 0.2.0
|
4
4
|
Summary: Aisberg SDK for Python - A simple and powerful SDK to interact with the Aisberg API
|
5
5
|
Author: Free Pro
|
6
6
|
Author-email: Mathis Lambert <mathis.lambert@freepro.com>
|
@@ -16,6 +16,7 @@ License-File: LICENSE
|
|
16
16
|
Requires-Dist: httpx>=0.28.1
|
17
17
|
Requires-Dist: pydantic>=2.11.7
|
18
18
|
Requires-Dist: pydantic-settings>=2.10.1
|
19
|
+
Requires-Dist: boto3>=1.38.44
|
19
20
|
Provides-Extra: dev
|
20
21
|
Requires-Dist: pytest>=8.4.1; extra == "dev"
|
21
22
|
Requires-Dist: pytest-asyncio>=1.0.0; extra == "dev"
|
@@ -47,6 +48,7 @@ conversational LLM workflows, collections, embeddings, and more.
|
|
47
48
|
- **Environment-based configuration** (supports `.env` files and system environment variables)
|
48
49
|
- **Context manager support** for easy resource management
|
49
50
|
- **Custom tool registration**: Easily extend LLM capabilities with your own functions
|
51
|
+
- **Document Parsing**: Parse documents into structured data (e.g., JSON, CSV, PNG, PDF, etc.)
|
50
52
|
|
51
53
|
---
|
52
54
|
|
@@ -77,6 +79,15 @@ AISBERG_API_KEY=...
|
|
77
79
|
AISBERG_BASE_URL=https://url
|
78
80
|
```
|
79
81
|
|
82
|
+
In order to use the Document Parsing feature, you also need to set the `S3` credentials - ask the FreePro team for
|
83
|
+
these:
|
84
|
+
|
85
|
+
```env
|
86
|
+
S3_ACCESS_KEY_ID=...
|
87
|
+
S3_SECRET_ACCESS_KEY=...
|
88
|
+
S3_ENDPOINT=https://s3.endpoint
|
89
|
+
```
|
90
|
+
|
80
91
|
### 2. **Synchronous Usage**
|
81
92
|
|
82
93
|
```python
|
@@ -127,6 +138,7 @@ asyncio.run(main())
|
|
127
138
|
* `client.models` — Model discovery & info
|
128
139
|
* `client.workflows` — Workflow management & execution
|
129
140
|
* `client.tools` — Register and execute tools for LLM tool calls
|
141
|
+
* `client.documents` — Document parsing and management
|
130
142
|
|
131
143
|
Each module is available both in the sync and async clients with similar APIs.
|
132
144
|
|
@@ -173,6 +185,9 @@ client = AisbergClient(
|
|
173
185
|
* `AISBERG_API_KEY`
|
174
186
|
* `AISBERG_BASE_URL`
|
175
187
|
* `AISBERG_TIMEOUT` (optional)
|
188
|
+
* `S3_ACCESS_KEY_ID` (for document parsing)(optional)
|
189
|
+
* `S3_SECRET_ACCESS_KEY` (for document parsing)(optional)
|
190
|
+
* `S3_ENDPOINT` (for document parsing)(optional)
|
176
191
|
|
177
192
|
### **Using in a Context Manager**
|
178
193
|
|
@@ -208,5 +223,3 @@ For enterprise/commercial use, please contact [Mathis Lambert](mailto:mathis.lam
|
|
208
223
|
## Support
|
209
224
|
|
210
225
|
For support, bug reports, or feature requests, please contact your technical representative.
|
211
|
-
|
212
|
-
---
|
@@ -18,6 +18,7 @@ conversational LLM workflows, collections, embeddings, and more.
|
|
18
18
|
- **Environment-based configuration** (supports `.env` files and system environment variables)
|
19
19
|
- **Context manager support** for easy resource management
|
20
20
|
- **Custom tool registration**: Easily extend LLM capabilities with your own functions
|
21
|
+
- **Document Parsing**: Parse documents into structured data (e.g., JSON, CSV, PNG, PDF, etc.)
|
21
22
|
|
22
23
|
---
|
23
24
|
|
@@ -48,6 +49,15 @@ AISBERG_API_KEY=...
|
|
48
49
|
AISBERG_BASE_URL=https://url
|
49
50
|
```
|
50
51
|
|
52
|
+
In order to use the Document Parsing feature, you also need to set the `S3` credentials - ask the FreePro team for
|
53
|
+
these:
|
54
|
+
|
55
|
+
```env
|
56
|
+
S3_ACCESS_KEY_ID=...
|
57
|
+
S3_SECRET_ACCESS_KEY=...
|
58
|
+
S3_ENDPOINT=https://s3.endpoint
|
59
|
+
```
|
60
|
+
|
51
61
|
### 2. **Synchronous Usage**
|
52
62
|
|
53
63
|
```python
|
@@ -98,6 +108,7 @@ asyncio.run(main())
|
|
98
108
|
* `client.models` — Model discovery & info
|
99
109
|
* `client.workflows` — Workflow management & execution
|
100
110
|
* `client.tools` — Register and execute tools for LLM tool calls
|
111
|
+
* `client.documents` — Document parsing and management
|
101
112
|
|
102
113
|
Each module is available both in the sync and async clients with similar APIs.
|
103
114
|
|
@@ -144,6 +155,9 @@ client = AisbergClient(
|
|
144
155
|
* `AISBERG_API_KEY`
|
145
156
|
* `AISBERG_BASE_URL`
|
146
157
|
* `AISBERG_TIMEOUT` (optional)
|
158
|
+
* `S3_ACCESS_KEY_ID` (for document parsing)(optional)
|
159
|
+
* `S3_SECRET_ACCESS_KEY` (for document parsing)(optional)
|
160
|
+
* `S3_ENDPOINT` (for document parsing)(optional)
|
147
161
|
|
148
162
|
### **Using in a Context Manager**
|
149
163
|
|
@@ -179,5 +193,3 @@ For enterprise/commercial use, please contact [Mathis Lambert](mailto:mathis.lam
|
|
179
193
|
## Support
|
180
194
|
|
181
195
|
For support, bug reports, or feature requests, please contact your technical representative.
|
182
|
-
|
183
|
-
---
|
@@ -1,4 +1,4 @@
|
|
1
|
-
|
1
|
+
import json
|
2
2
|
|
3
3
|
import httpx
|
4
4
|
from ..models.chat import (
|
@@ -7,9 +7,10 @@ from ..models.chat import (
|
|
7
7
|
ChatCompletionResponse,
|
8
8
|
ChatCompletionChunk,
|
9
9
|
)
|
10
|
-
from typing import Optional, AsyncGenerator, Union, List, Any
|
10
|
+
from typing import Optional, AsyncGenerator, Union, List, Any
|
11
11
|
|
12
|
-
from ..models.collections import GroupCollections, PointDetails
|
12
|
+
from ..models.collections import GroupCollections, PointDetails, ChunkingDictInput
|
13
|
+
from ..models.documents import DocumentParserResponse
|
13
14
|
from ..models.embeddings import (
|
14
15
|
EncodingFormat,
|
15
16
|
EncodingResponse,
|
@@ -21,7 +22,7 @@ from ..models.token import TokenInfo
|
|
21
22
|
from ..models.workflows import WorkflowDetails, Workflow
|
22
23
|
from ..utils import parse_chat_line, WorkflowLineParser
|
23
24
|
from ..requests.async_requests import areq, areq_stream
|
24
|
-
from ..models.requests import AnyDict, AnyList
|
25
|
+
from ..models.requests import AnyDict, AnyList, HttpxFileField
|
25
26
|
|
26
27
|
|
27
28
|
async def models(client: httpx.AsyncClient) -> List[Model]:
|
@@ -67,7 +68,7 @@ async def collections(client: httpx.AsyncClient) -> List[GroupCollections]:
|
|
67
68
|
|
68
69
|
|
69
70
|
async def collection(
|
70
|
-
client: httpx.AsyncClient, collection_id: str, group_id: str
|
71
|
+
client: httpx.AsyncClient, collection_id: str, group_id: Optional[str] = None
|
71
72
|
) -> List[PointDetails]:
|
72
73
|
"""
|
73
74
|
Get details of a specific collection.
|
@@ -85,6 +86,128 @@ async def collection(
|
|
85
86
|
raise e
|
86
87
|
|
87
88
|
|
89
|
+
async def create_collection(
|
90
|
+
client: httpx.AsyncClient,
|
91
|
+
name: str,
|
92
|
+
model: str,
|
93
|
+
group: Optional[str] = None,
|
94
|
+
):
|
95
|
+
"""
|
96
|
+
Create a new collection with the specified name and optional group.
|
97
|
+
"""
|
98
|
+
payload = {"collection_name": name, "embedding_model": model}
|
99
|
+
if group is not None:
|
100
|
+
payload["group"] = group
|
101
|
+
|
102
|
+
return await areq(
|
103
|
+
client,
|
104
|
+
"POST",
|
105
|
+
"/collections",
|
106
|
+
AnyDict,
|
107
|
+
json=payload,
|
108
|
+
)
|
109
|
+
|
110
|
+
|
111
|
+
async def delete_collection(
|
112
|
+
client: httpx.AsyncClient,
|
113
|
+
name: str,
|
114
|
+
group: Optional[str] = None,
|
115
|
+
):
|
116
|
+
"""
|
117
|
+
Delete a collection with the specified name and optional group.
|
118
|
+
"""
|
119
|
+
payload = {"collections": [name]}
|
120
|
+
if group is not None:
|
121
|
+
payload["group"] = group
|
122
|
+
|
123
|
+
return await areq(
|
124
|
+
client,
|
125
|
+
"DELETE",
|
126
|
+
"/collections",
|
127
|
+
AnyDict,
|
128
|
+
json=payload,
|
129
|
+
)
|
130
|
+
|
131
|
+
|
132
|
+
async def insert_points_in_collection(
|
133
|
+
client: httpx.AsyncClient,
|
134
|
+
name: str,
|
135
|
+
files: HttpxFileField,
|
136
|
+
normalize: bool,
|
137
|
+
chunking_dict: Optional[ChunkingDictInput] = None,
|
138
|
+
group: Optional[str] = None,
|
139
|
+
):
|
140
|
+
"""
|
141
|
+
Insert points into a collection with the specified name.
|
142
|
+
"""
|
143
|
+
payload = {
|
144
|
+
"chunking_dict": json.dumps({"method": "custom", "params": {}}),
|
145
|
+
"normalize": normalize,
|
146
|
+
}
|
147
|
+
if group is not None:
|
148
|
+
payload["group"] = group
|
149
|
+
|
150
|
+
if chunking_dict is not None:
|
151
|
+
payload["chunking_dict"] = chunking_dict.model_dump_json()
|
152
|
+
|
153
|
+
return await areq(
|
154
|
+
client,
|
155
|
+
"POST",
|
156
|
+
f"/collections/{name}",
|
157
|
+
AnyDict,
|
158
|
+
data=payload,
|
159
|
+
files=files,
|
160
|
+
)
|
161
|
+
|
162
|
+
|
163
|
+
async def delete_points_in_collection(
|
164
|
+
client: httpx.AsyncClient,
|
165
|
+
points_ids: List[str],
|
166
|
+
name: str,
|
167
|
+
group: Optional[str] = None,
|
168
|
+
):
|
169
|
+
"""
|
170
|
+
Delete points into a collection with the specified name.
|
171
|
+
"""
|
172
|
+
payload = {
|
173
|
+
"points": points_ids,
|
174
|
+
"collection": name,
|
175
|
+
}
|
176
|
+
if group is not None:
|
177
|
+
payload["group"] = group
|
178
|
+
|
179
|
+
return await areq(
|
180
|
+
client,
|
181
|
+
"DELETE",
|
182
|
+
"/collections/chunks",
|
183
|
+
AnyDict,
|
184
|
+
json=payload,
|
185
|
+
)
|
186
|
+
|
187
|
+
|
188
|
+
async def delete_all_points_in_collection(
|
189
|
+
client: httpx.AsyncClient,
|
190
|
+
name: str,
|
191
|
+
group: Optional[str] = None,
|
192
|
+
):
|
193
|
+
"""
|
194
|
+
Delete All points into a collection with the specified name.
|
195
|
+
"""
|
196
|
+
payload = {
|
197
|
+
"collection": name,
|
198
|
+
}
|
199
|
+
if group is not None:
|
200
|
+
payload["group"] = group
|
201
|
+
|
202
|
+
return await areq(
|
203
|
+
client,
|
204
|
+
"DELETE",
|
205
|
+
"/collections/all/chunks",
|
206
|
+
AnyDict,
|
207
|
+
json=payload,
|
208
|
+
)
|
209
|
+
|
210
|
+
|
88
211
|
async def me(client: httpx.AsyncClient) -> TokenInfo:
|
89
212
|
"""
|
90
213
|
Get the details of the current user.
|
@@ -303,31 +426,26 @@ async def run_workflow(
|
|
303
426
|
raise e
|
304
427
|
|
305
428
|
|
306
|
-
async def
|
429
|
+
async def parse_documents(
|
307
430
|
client: httpx.AsyncClient,
|
308
|
-
|
309
|
-
source: str,
|
431
|
+
files: HttpxFileField,
|
310
432
|
group: Optional[str] = None,
|
311
|
-
|
433
|
+
**kwargs,
|
434
|
+
) -> DocumentParserResponse:
|
312
435
|
"""
|
313
|
-
Parse a
|
436
|
+
Parse a single or multiple documents using the document parser endpoint.
|
437
|
+
Returns the ID of the parsed document to be downloaded later from the S3 bucket.
|
314
438
|
"""
|
315
|
-
payload = {
|
316
|
-
"source": source,
|
317
|
-
}
|
318
|
-
|
439
|
+
payload = {**kwargs}
|
319
440
|
if group is not None:
|
320
441
|
payload["group"] = group
|
321
442
|
|
322
|
-
|
323
|
-
|
324
|
-
response = areq(
|
443
|
+
response = await areq(
|
325
444
|
client,
|
326
445
|
"POST",
|
327
446
|
"/document-parser/parsing/parse",
|
328
|
-
|
447
|
+
DocumentParserResponse,
|
329
448
|
files=files,
|
330
|
-
|
449
|
+
data=payload,
|
331
450
|
)
|
332
|
-
print(response)
|
333
451
|
return response
|
@@ -1,4 +1,4 @@
|
|
1
|
-
|
1
|
+
import json
|
2
2
|
|
3
3
|
import httpx
|
4
4
|
from ..models.chat import (
|
@@ -7,9 +7,10 @@ from ..models.chat import (
|
|
7
7
|
ChatCompletionResponse,
|
8
8
|
ChatCompletionChunk,
|
9
9
|
)
|
10
|
-
from typing import Optional, Generator, Union, List, Any
|
10
|
+
from typing import Optional, Generator, Union, List, Any
|
11
11
|
|
12
|
-
from ..models.collections import GroupCollections, PointDetails
|
12
|
+
from ..models.collections import GroupCollections, PointDetails, ChunkingDictInput
|
13
|
+
from ..models.documents import DocumentParserResponse
|
13
14
|
from ..models.embeddings import (
|
14
15
|
EncodingFormat,
|
15
16
|
EncodingResponse,
|
@@ -21,7 +22,7 @@ from ..models.token import TokenInfo
|
|
21
22
|
from ..models.workflows import WorkflowDetails, Workflow
|
22
23
|
from ..utils import parse_chat_line, WorkflowLineParser
|
23
24
|
from ..requests.sync_requests import req, req_stream
|
24
|
-
from ..models.requests import AnyDict, AnyList
|
25
|
+
from ..models.requests import AnyDict, AnyList, HttpxFileField
|
25
26
|
|
26
27
|
|
27
28
|
def models(client: httpx.Client) -> List[Model]:
|
@@ -64,7 +65,7 @@ def collections(client: httpx.Client) -> List[GroupCollections]:
|
|
64
65
|
|
65
66
|
|
66
67
|
def collection(
|
67
|
-
client: httpx.Client, collection_id: str, group_id: str
|
68
|
+
client: httpx.Client, collection_id: str, group_id: Optional[str] = None
|
68
69
|
) -> List[PointDetails]:
|
69
70
|
"""
|
70
71
|
Get details of a specific collection.
|
@@ -80,6 +81,128 @@ def collection(
|
|
80
81
|
raise e
|
81
82
|
|
82
83
|
|
84
|
+
def create_collection(
|
85
|
+
client: httpx.Client,
|
86
|
+
name: str,
|
87
|
+
model: str,
|
88
|
+
group: Optional[str] = None,
|
89
|
+
):
|
90
|
+
"""
|
91
|
+
Create a new collection with the specified name and optional group.
|
92
|
+
"""
|
93
|
+
payload = {"collection_name": name, "embedding_model": model}
|
94
|
+
if group is not None:
|
95
|
+
payload["group"] = group
|
96
|
+
|
97
|
+
return req(
|
98
|
+
client,
|
99
|
+
"POST",
|
100
|
+
"/collections",
|
101
|
+
AnyDict,
|
102
|
+
json=payload,
|
103
|
+
)
|
104
|
+
|
105
|
+
|
106
|
+
def delete_collection(
|
107
|
+
client: httpx.Client,
|
108
|
+
name: str,
|
109
|
+
group: Optional[str] = None,
|
110
|
+
):
|
111
|
+
"""
|
112
|
+
Delete a collection with the specified name and optional group.
|
113
|
+
"""
|
114
|
+
payload = {"collections": [name]}
|
115
|
+
if group is not None:
|
116
|
+
payload["group"] = group
|
117
|
+
|
118
|
+
return req(
|
119
|
+
client,
|
120
|
+
"DELETE",
|
121
|
+
"/collections",
|
122
|
+
AnyDict,
|
123
|
+
json=payload,
|
124
|
+
)
|
125
|
+
|
126
|
+
|
127
|
+
def insert_points_in_collection(
|
128
|
+
client: httpx.Client,
|
129
|
+
name: str,
|
130
|
+
files: HttpxFileField,
|
131
|
+
normalize: bool,
|
132
|
+
chunking_dict: Optional[ChunkingDictInput] = None,
|
133
|
+
group: Optional[str] = None,
|
134
|
+
):
|
135
|
+
"""
|
136
|
+
Insert points into a collection with the specified name.
|
137
|
+
"""
|
138
|
+
payload = {
|
139
|
+
"chunking_dict": json.dumps({"method": "custom", "params": {}}),
|
140
|
+
"normalize": normalize,
|
141
|
+
}
|
142
|
+
if group is not None:
|
143
|
+
payload["group"] = group
|
144
|
+
|
145
|
+
if chunking_dict is not None:
|
146
|
+
payload["chunking_dict"] = chunking_dict.model_dump_json()
|
147
|
+
|
148
|
+
return req(
|
149
|
+
client,
|
150
|
+
"POST",
|
151
|
+
f"/collections/{name}",
|
152
|
+
AnyDict,
|
153
|
+
data=payload,
|
154
|
+
files=files,
|
155
|
+
)
|
156
|
+
|
157
|
+
|
158
|
+
def delete_points_in_collection(
|
159
|
+
client: httpx.Client,
|
160
|
+
points_ids: List[str],
|
161
|
+
name: str,
|
162
|
+
group: Optional[str] = None,
|
163
|
+
):
|
164
|
+
"""
|
165
|
+
Delete points into a collection with the specified name.
|
166
|
+
"""
|
167
|
+
payload = {
|
168
|
+
"points": points_ids,
|
169
|
+
"collection": name,
|
170
|
+
}
|
171
|
+
if group is not None:
|
172
|
+
payload["group"] = group
|
173
|
+
|
174
|
+
return req(
|
175
|
+
client,
|
176
|
+
"DELETE",
|
177
|
+
"/collections/chunks",
|
178
|
+
AnyDict,
|
179
|
+
json=payload,
|
180
|
+
)
|
181
|
+
|
182
|
+
|
183
|
+
def delete_all_points_in_collection(
|
184
|
+
client: httpx.Client,
|
185
|
+
name: str,
|
186
|
+
group: Optional[str] = None,
|
187
|
+
):
|
188
|
+
"""
|
189
|
+
Delete All points into a collection with the specified name.
|
190
|
+
"""
|
191
|
+
payload = {
|
192
|
+
"collection": name,
|
193
|
+
}
|
194
|
+
if group is not None:
|
195
|
+
payload["group"] = group
|
196
|
+
|
197
|
+
return req(
|
198
|
+
client,
|
199
|
+
"DELETE",
|
200
|
+
"/collections/all/chunks",
|
201
|
+
AnyDict,
|
202
|
+
json=payload,
|
203
|
+
)
|
204
|
+
|
205
|
+
|
83
206
|
def me(client: httpx.Client) -> TokenInfo:
|
84
207
|
"""
|
85
208
|
Get the details of the current user.
|
@@ -298,31 +421,26 @@ def run_workflow(
|
|
298
421
|
raise e
|
299
422
|
|
300
423
|
|
301
|
-
def
|
424
|
+
def parse_documents(
|
302
425
|
client: httpx.Client,
|
303
|
-
|
304
|
-
source: str,
|
426
|
+
files: HttpxFileField,
|
305
427
|
group: Optional[str] = None,
|
306
|
-
|
428
|
+
**kwargs,
|
429
|
+
) -> DocumentParserResponse:
|
307
430
|
"""
|
308
|
-
Parse a
|
431
|
+
Parse a single or multiple documents using the document parser endpoint.
|
432
|
+
Returns the ID of the parsed document to be downloaded later from the S3 bucket.
|
309
433
|
"""
|
310
|
-
payload = {
|
311
|
-
"source": source,
|
312
|
-
}
|
313
|
-
|
434
|
+
payload = {**kwargs}
|
314
435
|
if group is not None:
|
315
436
|
payload["group"] = group
|
316
437
|
|
317
|
-
files = {"file": (file[1], BytesIO(file[0]), "application/octet-stream")}
|
318
|
-
|
319
438
|
response = req(
|
320
439
|
client,
|
321
440
|
"POST",
|
322
441
|
"/document-parser/parsing/parse",
|
323
|
-
|
442
|
+
DocumentParserResponse,
|
324
443
|
files=files,
|
325
444
|
data=payload,
|
326
445
|
)
|
327
|
-
print(response)
|
328
446
|
return response
|
@@ -10,6 +10,8 @@ from .modules import (
|
|
10
10
|
AsyncModelsModule,
|
11
11
|
AsyncWorkflowsModule,
|
12
12
|
ToolsModule,
|
13
|
+
AsyncDocumentsModule,
|
14
|
+
SyncS3Module,
|
13
15
|
)
|
14
16
|
|
15
17
|
|
@@ -33,6 +35,12 @@ class AisbergAsyncClient:
|
|
33
35
|
self.me = AsyncMeModule(self, self._client)
|
34
36
|
self.collections = AsyncCollectionsModule(self, self._client)
|
35
37
|
self.embeddings = AsyncEmbeddingsModule(self, self._client)
|
38
|
+
self.documents = AsyncDocumentsModule(self, self._client)
|
39
|
+
self._s3 = SyncS3Module(
|
40
|
+
settings.s3_access_key_id,
|
41
|
+
settings.s3_secret_access_key,
|
42
|
+
settings.s3_endpoint,
|
43
|
+
)
|
36
44
|
|
37
45
|
async def initialize(self):
|
38
46
|
"""
|
@@ -9,6 +9,8 @@ from .modules import (
|
|
9
9
|
SyncModelsModule,
|
10
10
|
SyncWorkflowsModule,
|
11
11
|
ToolsModule,
|
12
|
+
SyncDocumentsModule,
|
13
|
+
SyncS3Module,
|
12
14
|
)
|
13
15
|
|
14
16
|
|
@@ -39,6 +41,12 @@ class AisbergClient:
|
|
39
41
|
self.me = SyncMeModule(self, self._client)
|
40
42
|
self.collections = SyncCollectionsModule(self, self._client)
|
41
43
|
self.embeddings = SyncEmbeddingsModule(self, self._client)
|
44
|
+
self.documents = SyncDocumentsModule(self, self._client)
|
45
|
+
self._s3 = SyncS3Module(
|
46
|
+
settings.s3_access_key_id,
|
47
|
+
settings.s3_secret_access_key,
|
48
|
+
settings.s3_endpoint,
|
49
|
+
)
|
42
50
|
|
43
51
|
# Validate API key
|
44
52
|
self._validate_api_key()
|
@@ -5,10 +5,16 @@ from pydantic_settings import BaseSettings, SettingsConfigDict
|
|
5
5
|
|
6
6
|
class Settings(BaseSettings):
|
7
7
|
# Variables attendues
|
8
|
+
# -- API --
|
8
9
|
aisberg_api_key: Union[str, None] = None
|
9
10
|
aisberg_base_url: Union[str, None] = None
|
10
11
|
timeout: int = 30
|
11
12
|
|
13
|
+
# -- S3 --
|
14
|
+
s3_access_key_id: Union[str, None] = None
|
15
|
+
s3_secret_access_key: Union[str, None] = None
|
16
|
+
s3_endpoint: Union[str, None] = None
|
17
|
+
|
12
18
|
# Pour indiquer le fichier .env
|
13
19
|
model_config = SettingsConfigDict(env_file=".env", env_file_encoding="utf-8")
|
14
20
|
|
@@ -11,6 +11,20 @@ class GroupCollections(BaseModel):
|
|
11
11
|
collections: List[Collection]
|
12
12
|
|
13
13
|
|
14
|
+
class CollectionDataset(BaseModel):
|
15
|
+
chunks: List[str]
|
16
|
+
metadata: Optional[dict] = []
|
17
|
+
|
18
|
+
|
19
|
+
class CollectionCreateResponse(BaseModel):
|
20
|
+
message: Optional[str] = None
|
21
|
+
|
22
|
+
|
23
|
+
class ChunkingDictInput(BaseModel):
|
24
|
+
method: Optional[str] = "custom"
|
25
|
+
params: Optional[dict] = {}
|
26
|
+
|
27
|
+
|
14
28
|
# Modèle plus structuré pour payload
|
15
29
|
class Payload(BaseModel):
|
16
30
|
method: Optional[str] = None
|
@@ -32,5 +46,5 @@ class PointDetails(BaseModel):
|
|
32
46
|
|
33
47
|
class CollectionDetails(BaseModel):
|
34
48
|
name: str
|
35
|
-
group: str
|
49
|
+
group: Optional[str] = None
|
36
50
|
points: List[PointDetails]
|
@@ -0,0 +1,46 @@
|
|
1
|
+
from pydantic import BaseModel
|
2
|
+
from typing import Optional, List, Tuple, Union
|
3
|
+
from io import BytesIO
|
4
|
+
|
5
|
+
|
6
|
+
class DocumentParserResponse(BaseModel):
|
7
|
+
"""
|
8
|
+
Response model for document parsing.
|
9
|
+
"""
|
10
|
+
|
11
|
+
message: Optional[str] = None
|
12
|
+
parsedFiles: Optional[List[str]] = None
|
13
|
+
bucketName: Optional[str] = None
|
14
|
+
|
15
|
+
|
16
|
+
class FileObject(BaseModel):
|
17
|
+
"""
|
18
|
+
Represents a file object with its name and content.
|
19
|
+
"""
|
20
|
+
|
21
|
+
name: str
|
22
|
+
buffer: bytes
|
23
|
+
|
24
|
+
|
25
|
+
class DocumentParserDocOutput(BaseModel):
|
26
|
+
type: str
|
27
|
+
data: Union[str, dict, list]
|
28
|
+
|
29
|
+
|
30
|
+
class ParsedDocument(BaseModel):
|
31
|
+
"""
|
32
|
+
Represents a parsed document with its content and metadata.
|
33
|
+
"""
|
34
|
+
|
35
|
+
content: DocumentParserDocOutput
|
36
|
+
metadata: Optional[dict] = None
|
37
|
+
|
38
|
+
|
39
|
+
DocumentParserFileInput = Union[
|
40
|
+
str,
|
41
|
+
bytes,
|
42
|
+
BytesIO,
|
43
|
+
Tuple[bytes, str],
|
44
|
+
"FileObject",
|
45
|
+
List[Union[str, bytes, BytesIO, Tuple[bytes, str], "FileObject"]],
|
46
|
+
]
|
@@ -1,4 +1,5 @@
|
|
1
|
-
from typing import List, Any
|
1
|
+
from typing import List, Any, Tuple
|
2
|
+
from io import BytesIO
|
2
3
|
|
3
4
|
from pydantic import BaseModel, RootModel, ConfigDict
|
4
5
|
|
@@ -9,3 +10,6 @@ class AnyDict(BaseModel):
|
|
9
10
|
|
10
11
|
class AnyList(RootModel[List[Any]]):
|
11
12
|
pass
|
13
|
+
|
14
|
+
|
15
|
+
HttpxFileField = List[Tuple[str, Tuple[str, BytesIO, str]]]
|
@@ -5,6 +5,8 @@ from .me import AsyncMeModule, SyncMeModule
|
|
5
5
|
from .models import AsyncModelsModule, SyncModelsModule
|
6
6
|
from .workflows import AsyncWorkflowsModule, SyncWorkflowsModule
|
7
7
|
from .tools import ToolsModule
|
8
|
+
from .documents import AsyncDocumentsModule, SyncDocumentsModule
|
9
|
+
from .s3 import SyncS3Module
|
8
10
|
|
9
11
|
__all__ = [
|
10
12
|
"AsyncChatModule",
|
@@ -20,4 +22,7 @@ __all__ = [
|
|
20
22
|
"AsyncWorkflowsModule",
|
21
23
|
"SyncWorkflowsModule",
|
22
24
|
"ToolsModule",
|
25
|
+
"AsyncDocumentsModule",
|
26
|
+
"SyncDocumentsModule",
|
27
|
+
"SyncS3Module",
|
23
28
|
]
|