aisberg 0.1.0__py3-none-any.whl → 0.2.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,4 +1,4 @@
1
- from io import BytesIO
1
+ import json
2
2
 
3
3
  import httpx
4
4
  from ..models.chat import (
@@ -7,9 +7,10 @@ from ..models.chat import (
7
7
  ChatCompletionResponse,
8
8
  ChatCompletionChunk,
9
9
  )
10
- from typing import Optional, AsyncGenerator, Union, List, Any, Tuple
10
+ from typing import Optional, AsyncGenerator, Union, List, Any
11
11
 
12
- from ..models.collections import GroupCollections, PointDetails
12
+ from ..models.collections import GroupCollections, PointDetails, ChunkingDictInput
13
+ from ..models.documents import DocumentParserResponse
13
14
  from ..models.embeddings import (
14
15
  EncodingFormat,
15
16
  EncodingResponse,
@@ -21,7 +22,7 @@ from ..models.token import TokenInfo
21
22
  from ..models.workflows import WorkflowDetails, Workflow
22
23
  from ..utils import parse_chat_line, WorkflowLineParser
23
24
  from ..requests.async_requests import areq, areq_stream
24
- from ..models.requests import AnyDict, AnyList
25
+ from ..models.requests import AnyDict, AnyList, HttpxFileField
25
26
 
26
27
 
27
28
  async def models(client: httpx.AsyncClient) -> List[Model]:
@@ -67,7 +68,7 @@ async def collections(client: httpx.AsyncClient) -> List[GroupCollections]:
67
68
 
68
69
 
69
70
  async def collection(
70
- client: httpx.AsyncClient, collection_id: str, group_id: str
71
+ client: httpx.AsyncClient, collection_id: str, group_id: Optional[str] = None
71
72
  ) -> List[PointDetails]:
72
73
  """
73
74
  Get details of a specific collection.
@@ -85,6 +86,128 @@ async def collection(
85
86
  raise e
86
87
 
87
88
 
89
+ async def create_collection(
90
+ client: httpx.AsyncClient,
91
+ name: str,
92
+ model: str,
93
+ group: Optional[str] = None,
94
+ ):
95
+ """
96
+ Create a new collection with the specified name and optional group.
97
+ """
98
+ payload = {"collection_name": name, "embedding_model": model}
99
+ if group is not None:
100
+ payload["group"] = group
101
+
102
+ return await areq(
103
+ client,
104
+ "POST",
105
+ "/collections",
106
+ AnyDict,
107
+ json=payload,
108
+ )
109
+
110
+
111
+ async def delete_collection(
112
+ client: httpx.AsyncClient,
113
+ name: str,
114
+ group: Optional[str] = None,
115
+ ):
116
+ """
117
+ Delete a collection with the specified name and optional group.
118
+ """
119
+ payload = {"collections": [name]}
120
+ if group is not None:
121
+ payload["group"] = group
122
+
123
+ return await areq(
124
+ client,
125
+ "DELETE",
126
+ "/collections",
127
+ AnyDict,
128
+ json=payload,
129
+ )
130
+
131
+
132
+ async def insert_points_in_collection(
133
+ client: httpx.AsyncClient,
134
+ name: str,
135
+ files: HttpxFileField,
136
+ normalize: bool,
137
+ chunking_dict: Optional[ChunkingDictInput] = None,
138
+ group: Optional[str] = None,
139
+ ):
140
+ """
141
+ Insert points into a collection with the specified name.
142
+ """
143
+ payload = {
144
+ "chunking_dict": json.dumps({"method": "custom", "params": {}}),
145
+ "normalize": normalize,
146
+ }
147
+ if group is not None:
148
+ payload["group"] = group
149
+
150
+ if chunking_dict is not None:
151
+ payload["chunking_dict"] = chunking_dict.model_dump_json()
152
+
153
+ return await areq(
154
+ client,
155
+ "POST",
156
+ f"/collections/{name}",
157
+ AnyDict,
158
+ data=payload,
159
+ files=files,
160
+ )
161
+
162
+
163
+ async def delete_points_in_collection(
164
+ client: httpx.AsyncClient,
165
+ points_ids: List[str],
166
+ name: str,
167
+ group: Optional[str] = None,
168
+ ):
169
+ """
170
+ Delete points into a collection with the specified name.
171
+ """
172
+ payload = {
173
+ "points": points_ids,
174
+ "collection": name,
175
+ }
176
+ if group is not None:
177
+ payload["group"] = group
178
+
179
+ return await areq(
180
+ client,
181
+ "DELETE",
182
+ "/collections/chunks",
183
+ AnyDict,
184
+ json=payload,
185
+ )
186
+
187
+
188
+ async def delete_all_points_in_collection(
189
+ client: httpx.AsyncClient,
190
+ name: str,
191
+ group: Optional[str] = None,
192
+ ):
193
+ """
194
+ Delete All points into a collection with the specified name.
195
+ """
196
+ payload = {
197
+ "collection": name,
198
+ }
199
+ if group is not None:
200
+ payload["group"] = group
201
+
202
+ return await areq(
203
+ client,
204
+ "DELETE",
205
+ "/collections/all/chunks",
206
+ AnyDict,
207
+ json=payload,
208
+ )
209
+
210
+
88
211
  async def me(client: httpx.AsyncClient) -> TokenInfo:
89
212
  """
90
213
  Get the details of the current user.
@@ -303,31 +426,26 @@ async def run_workflow(
303
426
  raise e
304
427
 
305
428
 
306
- async def parse_document(
429
+ async def parse_documents(
307
430
  client: httpx.AsyncClient,
308
- file: Tuple[bytes, str],
309
- source: str,
431
+ files: HttpxFileField,
310
432
  group: Optional[str] = None,
311
- ) -> str:
433
+ **kwargs,
434
+ ) -> DocumentParserResponse:
312
435
  """
313
- Parse a document using the specified model.
436
+ Parse a single or multiple documents using the document parser endpoint.
437
+ Returns the ID of the parsed document to be downloaded later from the S3 bucket.
314
438
  """
315
- payload = {
316
- "source": source,
317
- }
318
-
439
+ payload = {**kwargs}
319
440
  if group is not None:
320
441
  payload["group"] = group
321
442
 
322
- files = {"file": (file[1], BytesIO(file[0]), "application/octet-stream")}
323
-
324
- response = areq(
443
+ response = await areq(
325
444
  client,
326
445
  "POST",
327
446
  "/document-parser/parsing/parse",
328
- AnyDict,
447
+ DocumentParserResponse,
329
448
  files=files,
330
- json=payload,
449
+ data=payload,
331
450
  )
332
- print(response)
333
451
  return response
aisberg/api/endpoints.py CHANGED
@@ -1,4 +1,4 @@
1
- from io import BytesIO
1
+ import json
2
2
 
3
3
  import httpx
4
4
  from ..models.chat import (
@@ -7,9 +7,10 @@ from ..models.chat import (
7
7
  ChatCompletionResponse,
8
8
  ChatCompletionChunk,
9
9
  )
10
- from typing import Optional, Generator, Union, List, Any, Tuple
10
+ from typing import Optional, Generator, Union, List, Any
11
11
 
12
- from ..models.collections import GroupCollections, PointDetails
12
+ from ..models.collections import GroupCollections, PointDetails, ChunkingDictInput
13
+ from ..models.documents import DocumentParserResponse
13
14
  from ..models.embeddings import (
14
15
  EncodingFormat,
15
16
  EncodingResponse,
@@ -21,7 +22,7 @@ from ..models.token import TokenInfo
21
22
  from ..models.workflows import WorkflowDetails, Workflow
22
23
  from ..utils import parse_chat_line, WorkflowLineParser
23
24
  from ..requests.sync_requests import req, req_stream
24
- from ..models.requests import AnyDict, AnyList
25
+ from ..models.requests import AnyDict, AnyList, HttpxFileField
25
26
 
26
27
 
27
28
  def models(client: httpx.Client) -> List[Model]:
@@ -64,7 +65,7 @@ def collections(client: httpx.Client) -> List[GroupCollections]:
64
65
 
65
66
 
66
67
  def collection(
67
- client: httpx.Client, collection_id: str, group_id: str
68
+ client: httpx.Client, collection_id: str, group_id: Optional[str] = None
68
69
  ) -> List[PointDetails]:
69
70
  """
70
71
  Get details of a specific collection.
@@ -80,6 +81,128 @@ def collection(
80
81
  raise e
81
82
 
82
83
 
84
+ def create_collection(
85
+ client: httpx.Client,
86
+ name: str,
87
+ model: str,
88
+ group: Optional[str] = None,
89
+ ):
90
+ """
91
+ Create a new collection with the specified name and optional group.
92
+ """
93
+ payload = {"collection_name": name, "embedding_model": model}
94
+ if group is not None:
95
+ payload["group"] = group
96
+
97
+ return req(
98
+ client,
99
+ "POST",
100
+ "/collections",
101
+ AnyDict,
102
+ json=payload,
103
+ )
104
+
105
+
106
+ def delete_collection(
107
+ client: httpx.Client,
108
+ name: str,
109
+ group: Optional[str] = None,
110
+ ):
111
+ """
112
+ Delete a collection with the specified name and optional group.
113
+ """
114
+ payload = {"collections": [name]}
115
+ if group is not None:
116
+ payload["group"] = group
117
+
118
+ return req(
119
+ client,
120
+ "DELETE",
121
+ "/collections",
122
+ AnyDict,
123
+ json=payload,
124
+ )
125
+
126
+
127
+ def insert_points_in_collection(
128
+ client: httpx.Client,
129
+ name: str,
130
+ files: HttpxFileField,
131
+ normalize: bool,
132
+ chunking_dict: Optional[ChunkingDictInput] = None,
133
+ group: Optional[str] = None,
134
+ ):
135
+ """
136
+ Insert points into a collection with the specified name.
137
+ """
138
+ payload = {
139
+ "chunking_dict": json.dumps({"method": "custom", "params": {}}),
140
+ "normalize": normalize,
141
+ }
142
+ if group is not None:
143
+ payload["group"] = group
144
+
145
+ if chunking_dict is not None:
146
+ payload["chunking_dict"] = chunking_dict.model_dump_json()
147
+
148
+ return req(
149
+ client,
150
+ "POST",
151
+ f"/collections/{name}",
152
+ AnyDict,
153
+ data=payload,
154
+ files=files,
155
+ )
156
+
157
+
158
+ def delete_points_in_collection(
159
+ client: httpx.Client,
160
+ points_ids: List[str],
161
+ name: str,
162
+ group: Optional[str] = None,
163
+ ):
164
+ """
165
+ Delete points into a collection with the specified name.
166
+ """
167
+ payload = {
168
+ "points": points_ids,
169
+ "collection": name,
170
+ }
171
+ if group is not None:
172
+ payload["group"] = group
173
+
174
+ return req(
175
+ client,
176
+ "DELETE",
177
+ "/collections/chunks",
178
+ AnyDict,
179
+ json=payload,
180
+ )
181
+
182
+
183
+ def delete_all_points_in_collection(
184
+ client: httpx.Client,
185
+ name: str,
186
+ group: Optional[str] = None,
187
+ ):
188
+ """
189
+ Delete All points into a collection with the specified name.
190
+ """
191
+ payload = {
192
+ "collection": name,
193
+ }
194
+ if group is not None:
195
+ payload["group"] = group
196
+
197
+ return req(
198
+ client,
199
+ "DELETE",
200
+ "/collections/all/chunks",
201
+ AnyDict,
202
+ json=payload,
203
+ )
204
+
205
+
83
206
  def me(client: httpx.Client) -> TokenInfo:
84
207
  """
85
208
  Get the details of the current user.
@@ -298,31 +421,26 @@ def run_workflow(
298
421
  raise e
299
422
 
300
423
 
301
- def parse_document(
424
+ def parse_documents(
302
425
  client: httpx.Client,
303
- file: Tuple[bytes, str],
304
- source: str,
426
+ files: HttpxFileField,
305
427
  group: Optional[str] = None,
306
- ) -> str:
428
+ **kwargs,
429
+ ) -> DocumentParserResponse:
307
430
  """
308
- Parse a document using the specified model.
431
+ Parse a single or multiple documents using the document parser endpoint.
432
+ Returns the ID of the parsed document to be downloaded later from the S3 bucket.
309
433
  """
310
- payload = {
311
- "source": source,
312
- }
313
-
434
+ payload = {**kwargs}
314
435
  if group is not None:
315
436
  payload["group"] = group
316
437
 
317
- files = {"file": (file[1], BytesIO(file[0]), "application/octet-stream")}
318
-
319
438
  response = req(
320
439
  client,
321
440
  "POST",
322
441
  "/document-parser/parsing/parse",
323
- AnyDict,
442
+ DocumentParserResponse,
324
443
  files=files,
325
444
  data=payload,
326
445
  )
327
- print(response)
328
446
  return response
aisberg/async_client.py CHANGED
@@ -10,6 +10,8 @@ from .modules import (
10
10
  AsyncModelsModule,
11
11
  AsyncWorkflowsModule,
12
12
  ToolsModule,
13
+ AsyncDocumentsModule,
14
+ SyncS3Module,
13
15
  )
14
16
 
15
17
 
@@ -33,6 +35,12 @@ class AisbergAsyncClient:
33
35
  self.me = AsyncMeModule(self, self._client)
34
36
  self.collections = AsyncCollectionsModule(self, self._client)
35
37
  self.embeddings = AsyncEmbeddingsModule(self, self._client)
38
+ self.documents = AsyncDocumentsModule(self, self._client)
39
+ self._s3 = SyncS3Module(
40
+ settings.s3_access_key_id,
41
+ settings.s3_secret_access_key,
42
+ settings.s3_endpoint,
43
+ )
36
44
 
37
45
  async def initialize(self):
38
46
  """
aisberg/client.py CHANGED
@@ -9,6 +9,8 @@ from .modules import (
9
9
  SyncModelsModule,
10
10
  SyncWorkflowsModule,
11
11
  ToolsModule,
12
+ SyncDocumentsModule,
13
+ SyncS3Module,
12
14
  )
13
15
 
14
16
 
@@ -39,6 +41,12 @@ class AisbergClient:
39
41
  self.me = SyncMeModule(self, self._client)
40
42
  self.collections = SyncCollectionsModule(self, self._client)
41
43
  self.embeddings = SyncEmbeddingsModule(self, self._client)
44
+ self.documents = SyncDocumentsModule(self, self._client)
45
+ self._s3 = SyncS3Module(
46
+ settings.s3_access_key_id,
47
+ settings.s3_secret_access_key,
48
+ settings.s3_endpoint,
49
+ )
42
50
 
43
51
  # Validate API key
44
52
  self._validate_api_key()
aisberg/config.py CHANGED
@@ -5,10 +5,16 @@ from pydantic_settings import BaseSettings, SettingsConfigDict
5
5
 
6
6
  class Settings(BaseSettings):
7
7
  # Variables attendues
8
+ # -- API --
8
9
  aisberg_api_key: Union[str, None] = None
9
10
  aisberg_base_url: Union[str, None] = None
10
11
  timeout: int = 30
11
12
 
13
+ # -- S3 --
14
+ s3_access_key_id: Union[str, None] = None
15
+ s3_secret_access_key: Union[str, None] = None
16
+ s3_endpoint: Union[str, None] = None
17
+
12
18
  # Pour indiquer le fichier .env
13
19
  model_config = SettingsConfigDict(env_file=".env", env_file_encoding="utf-8")
14
20
 
@@ -11,6 +11,20 @@ class GroupCollections(BaseModel):
11
11
  collections: List[Collection]
12
12
 
13
13
 
14
+ class CollectionDataset(BaseModel):
15
+ chunks: List[str]
16
+ metadata: Optional[dict] = []
17
+
18
+
19
+ class CollectionCreateResponse(BaseModel):
20
+ message: Optional[str] = None
21
+
22
+
23
+ class ChunkingDictInput(BaseModel):
24
+ method: Optional[str] = "custom"
25
+ params: Optional[dict] = {}
26
+
27
+
14
28
  # Modèle plus structuré pour payload
15
29
  class Payload(BaseModel):
16
30
  method: Optional[str] = None
@@ -32,5 +46,5 @@ class PointDetails(BaseModel):
32
46
 
33
47
  class CollectionDetails(BaseModel):
34
48
  name: str
35
- group: str
49
+ group: Optional[str] = None
36
50
  points: List[PointDetails]
@@ -0,0 +1,46 @@
1
+ from pydantic import BaseModel
2
+ from typing import Optional, List, Tuple, Union
3
+ from io import BytesIO
4
+
5
+
6
+ class DocumentParserResponse(BaseModel):
7
+ """
8
+ Response model for document parsing.
9
+ """
10
+
11
+ message: Optional[str] = None
12
+ parsedFiles: Optional[List[str]] = None
13
+ bucketName: Optional[str] = None
14
+
15
+
16
+ class FileObject(BaseModel):
17
+ """
18
+ Represents a file object with its name and content.
19
+ """
20
+
21
+ name: str
22
+ buffer: bytes
23
+
24
+
25
+ class DocumentParserDocOutput(BaseModel):
26
+ type: str
27
+ data: Union[str, dict, list]
28
+
29
+
30
+ class ParsedDocument(BaseModel):
31
+ """
32
+ Represents a parsed document with its content and metadata.
33
+ """
34
+
35
+ content: DocumentParserDocOutput
36
+ metadata: Optional[dict] = None
37
+
38
+
39
+ DocumentParserFileInput = Union[
40
+ str,
41
+ bytes,
42
+ BytesIO,
43
+ Tuple[bytes, str],
44
+ "FileObject",
45
+ List[Union[str, bytes, BytesIO, Tuple[bytes, str], "FileObject"]],
46
+ ]
@@ -1,4 +1,5 @@
1
- from typing import List, Any
1
+ from typing import List, Any, Tuple
2
+ from io import BytesIO
2
3
 
3
4
  from pydantic import BaseModel, RootModel, ConfigDict
4
5
 
@@ -9,3 +10,6 @@ class AnyDict(BaseModel):
9
10
 
10
11
  class AnyList(RootModel[List[Any]]):
11
12
  pass
13
+
14
+
15
+ HttpxFileField = List[Tuple[str, Tuple[str, BytesIO, str]]]
@@ -5,6 +5,8 @@ from .me import AsyncMeModule, SyncMeModule
5
5
  from .models import AsyncModelsModule, SyncModelsModule
6
6
  from .workflows import AsyncWorkflowsModule, SyncWorkflowsModule
7
7
  from .tools import ToolsModule
8
+ from .documents import AsyncDocumentsModule, SyncDocumentsModule
9
+ from .s3 import SyncS3Module
8
10
 
9
11
  __all__ = [
10
12
  "AsyncChatModule",
@@ -20,4 +22,7 @@ __all__ = [
20
22
  "AsyncWorkflowsModule",
21
23
  "SyncWorkflowsModule",
22
24
  "ToolsModule",
25
+ "AsyncDocumentsModule",
26
+ "SyncDocumentsModule",
27
+ "SyncS3Module",
23
28
  ]
aisberg/modules/chat.py CHANGED
@@ -14,6 +14,9 @@ from ..exceptions import ToolExecutionError
14
14
  import json
15
15
  from ..api import async_endpoints, endpoints
16
16
  from ..abstract.modules import AsyncModule, SyncModule
17
+ import logging
18
+
19
+ logger = logging.getLogger(__name__)
17
20
 
18
21
 
19
22
  class AbstractChatModule(ABC):
@@ -58,7 +61,7 @@ class AbstractChatModule(ABC):
58
61
  Returns:
59
62
  ChatCompletionResponse: Structured model response.
60
63
  """
61
- pass
64
+ ...
62
65
 
63
66
  @abstractmethod
64
67
  def stream(
@@ -91,7 +94,7 @@ class AbstractChatModule(ABC):
91
94
  Yields:
92
95
  Union[str, ChatCompletionChunk, ChatCompletionResponse]: Response chunks or final responses as they become available.
93
96
  """
94
- pass
97
+ ...
95
98
 
96
99
  @abstractmethod
97
100
  def _handle_tool_calls(
@@ -121,7 +124,7 @@ class AbstractChatModule(ABC):
121
124
  ChatCompletionResponse or AsyncGenerator: If stream is True, returns a generator yielding tool results and final response.
122
125
  Otherwise, returns a complete ChatCompletionResponse with tool results integrated.
123
126
  """
124
- pass
127
+ ...
125
128
 
126
129
  def _build_tool_messages(
127
130
  self,
@@ -160,6 +163,10 @@ class AbstractChatModule(ABC):
160
163
  )
161
164
  )
162
165
  except json.JSONDecodeError:
166
+ logger.error(
167
+ f"Invalid JSON arguments provided for tool {tool_name}."
168
+ f" Arguments: {tool_arguments_str}"
169
+ )
163
170
  messages.append(
164
171
  ToolMessage(
165
172
  content=f"Error: Invalid JSON arguments provided for tool {tool_name}.",
@@ -167,6 +174,7 @@ class AbstractChatModule(ABC):
167
174
  )
168
175
  )
169
176
  except ToolExecutionError as e:
177
+ logger.error(f"Error executing tool {tool_name}: {str(e)}")
170
178
  messages.append(
171
179
  ToolMessage(
172
180
  content=f"Error: {str(e)}",