aisberg 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (43) hide show
  1. aisberg/__init__.py +7 -0
  2. aisberg/abstract/__init__.py +0 -0
  3. aisberg/abstract/modules.py +57 -0
  4. aisberg/api/__init__.py +0 -0
  5. aisberg/api/async_endpoints.py +333 -0
  6. aisberg/api/endpoints.py +328 -0
  7. aisberg/async_client.py +107 -0
  8. aisberg/client.py +108 -0
  9. aisberg/config.py +17 -0
  10. aisberg/exceptions.py +22 -0
  11. aisberg/models/__init__.py +0 -0
  12. aisberg/models/chat.py +143 -0
  13. aisberg/models/collections.py +36 -0
  14. aisberg/models/embeddings.py +92 -0
  15. aisberg/models/models.py +39 -0
  16. aisberg/models/requests.py +11 -0
  17. aisberg/models/token.py +11 -0
  18. aisberg/models/tools.py +73 -0
  19. aisberg/models/workflows.py +66 -0
  20. aisberg/modules/__init__.py +23 -0
  21. aisberg/modules/chat.py +403 -0
  22. aisberg/modules/collections.py +117 -0
  23. aisberg/modules/document.py +117 -0
  24. aisberg/modules/embeddings.py +309 -0
  25. aisberg/modules/me.py +77 -0
  26. aisberg/modules/models.py +108 -0
  27. aisberg/modules/tools.py +78 -0
  28. aisberg/modules/workflows.py +140 -0
  29. aisberg/requests/__init__.py +0 -0
  30. aisberg/requests/async_requests.py +85 -0
  31. aisberg/requests/sync_requests.py +85 -0
  32. aisberg/utils.py +111 -0
  33. aisberg-0.1.0.dist-info/METADATA +212 -0
  34. aisberg-0.1.0.dist-info/RECORD +43 -0
  35. aisberg-0.1.0.dist-info/WHEEL +5 -0
  36. aisberg-0.1.0.dist-info/licenses/LICENSE +9 -0
  37. aisberg-0.1.0.dist-info/top_level.txt +3 -0
  38. tests/integration/test_collections_integration.py +115 -0
  39. tests/unit/test_collections_sync.py +104 -0
  40. tmp/test.py +33 -0
  41. tmp/test_async.py +126 -0
  42. tmp/test_doc_parse.py +12 -0
  43. tmp/test_sync.py +146 -0
@@ -0,0 +1,309 @@
1
+ from abc import ABC, abstractmethod
2
+ from typing import Optional, Union, List, Literal
3
+
4
+ from ..models.embeddings import (
5
+ EncodingResponse,
6
+ ChunksDataList,
7
+ RerankerResponse,
8
+ ChunkData,
9
+ )
10
+ from ..models.collections import Collection
11
+ from ..api import async_endpoints, endpoints
12
+ from ..abstract.modules import AsyncModule, SyncModule
13
+
14
+
15
+ class AbstractEmbeddingsModule(ABC):
16
+ """
17
+ Abstract base class for embeddings modules.
18
+ Handles common logic for embedding operations across synchronous and asynchronous modules.
19
+ """
20
+
21
+ def __init__(self, parent, http_client):
22
+ """
23
+ Initialize the AbstractEmbeddingsModule.
24
+
25
+ Args:
26
+ parent: Parent client instance.
27
+ http_client: HTTP client for making requests.
28
+ """
29
+ self._parent = parent
30
+ self._client = http_client
31
+
32
+ @abstractmethod
33
+ def encode(
34
+ self,
35
+ input: str,
36
+ model: str,
37
+ encoding_format: Optional[Literal["float", "base64"]] = "float",
38
+ normalize: Optional[bool] = False,
39
+ **kwargs,
40
+ ) -> EncodingResponse:
41
+ """Encode a list of texts into embeddings.
42
+
43
+ Args:
44
+ input (str): The text or texts to encode. Can be a single string or a list of strings.
45
+ model (str): The model to use for encoding. Defaults to "text-embedding-3-small".
46
+ encoding_format (str): The format of the encoding. Defaults to "float". Can be "float" or "base64".
47
+ normalize (bool): Whether to normalize the embeddings. Defaults to False.
48
+ **kwargs: Additional parameters for the encoding.
49
+
50
+ Returns:
51
+ EncodingResponse: The response containing the encoded embeddings.
52
+ """
53
+ pass
54
+
55
+ @abstractmethod
56
+ def retrieve(
57
+ self,
58
+ query: str,
59
+ collections_names: List[Union[str, Collection]],
60
+ limit: int = 10,
61
+ score_threshold: float = 0.0,
62
+ filters: List = None,
63
+ beta: float = 0.7,
64
+ ) -> ChunksDataList:
65
+ """Retrieve similar texts based on a query.
66
+
67
+ Args:
68
+ query (str): The query text to retrieve similar texts for.
69
+ collections_names (List[str]): A list of collection names to search in.
70
+ limit (int): The maximum number of results to return. Defaults to 10.
71
+ score_threshold (float): The minimum score threshold for results. Defaults to 0.0.
72
+ filters (list): A list of filters to apply to the retrieval.
73
+ beta (float): Dense/Sparse trade-off parameter. Defaults to 0.7. 0 means full sparse, 1 means full dense.
74
+
75
+ Returns:
76
+ List[ChunkData]: A list of ChunkData objects containing the retrieved texts and their metadata.
77
+ """
78
+ pass
79
+
80
+ @abstractmethod
81
+ def rerank(
82
+ self,
83
+ query: str,
84
+ documents: Union[ChunksDataList, List[Union[str, ChunkData]]],
85
+ model: str,
86
+ top_n: int = 10,
87
+ return_documents: bool = True,
88
+ threshold: Optional[float] = None,
89
+ ) -> RerankerResponse:
90
+ """Rerank texts based on a query.
91
+
92
+ Args:
93
+ query (str): The query text to rerank the documents against.
94
+ documents (Union[ChunksDataList, List[Union[str, ChunkData]]]): A list of documents to rerank. Can be a ChunksDataList or a list of strings or ChunkData objects.
95
+ model (str): The model to use for reranking. Defaults to "text-embedding-3-small".
96
+ top_n (int): The number of top results to return. Defaults to 10.
97
+ return_documents (bool): Whether to return the original documents in the response. Defaults to True.
98
+ threshold (Optional[float]): A threshold for filtering results. If provided, only results with a score above this threshold will be returned. Defaults to None.
99
+
100
+ Returns:
101
+ RerankerResponse: The response containing the reranked documents and their scores.
102
+
103
+ Raises:
104
+ ValueError: If the documents list is empty or contains invalid document types.
105
+ Exception: If the documents list is not of the expected type.
106
+ """
107
+ pass
108
+
109
+ @staticmethod
110
+ def _format_collections_names(
111
+ collections_names: List[Union[str, Collection]],
112
+ ) -> List[str]:
113
+ """Format the input collections names into a list of strings."""
114
+ coll_names = []
115
+
116
+ for coll in collections_names:
117
+ if isinstance(coll, Collection):
118
+ coll_names.append(coll.name)
119
+ elif isinstance(coll, str):
120
+ coll_names.append(coll)
121
+ else:
122
+ raise ValueError(
123
+ f"Invalid collection type: {type(coll)}. Expected str or Collection."
124
+ )
125
+
126
+ return coll_names
127
+
128
+ @staticmethod
129
+ def _format_chunks_data_list(
130
+ documents: Union[ChunksDataList, List[Union[str, ChunkData]]],
131
+ ) -> List[str]:
132
+ """Format the input documents into a ChunksDataList."""
133
+ chunks = []
134
+
135
+ if isinstance(documents, ChunksDataList):
136
+ chunks = documents.texts()
137
+ elif isinstance(documents, list):
138
+ if len(documents) == 0:
139
+ raise ValueError("Documents list is empty.")
140
+
141
+ for doc in documents:
142
+ if isinstance(doc, ChunkData):
143
+ chunks.append(doc.text)
144
+ elif isinstance(doc, str):
145
+ chunks.append(doc)
146
+ else:
147
+ raise ValueError(
148
+ f"Invalid document type: {type(doc)}. Expected str or ChunkData."
149
+ )
150
+ else:
151
+ raise Exception(
152
+ f"Documents list is not of the expected type: {type(documents)}. Expected ChunksDataList or list of str or ChunkData."
153
+ )
154
+
155
+ return chunks
156
+
157
+
158
+ class SyncEmbeddingsModule(AbstractEmbeddingsModule, SyncModule):
159
+ """
160
+ `SyncEmbeddingsModule` is a synchronous module that provides a high-level interface for interacting with
161
+ embeddings tools. The module abstracts all communication with the backend API,
162
+ providing both blocking and generator-based usage.
163
+ """
164
+
165
+ def __init__(self, parent, http_client):
166
+ super().__init__(parent, http_client)
167
+ SyncModule.__init__(self, parent, http_client)
168
+
169
+ def encode(
170
+ self,
171
+ input: str,
172
+ model: str,
173
+ encoding_format: Optional[Literal["float", "base64"]] = "float",
174
+ normalize: Optional[bool] = False,
175
+ **kwargs,
176
+ ) -> EncodingResponse:
177
+ resp = endpoints.embeddings(
178
+ self._client,
179
+ input=input,
180
+ model=model,
181
+ encoding_format=encoding_format,
182
+ normalize=normalize,
183
+ **kwargs,
184
+ )
185
+ return EncodingResponse.model_validate(resp)
186
+
187
+ def retrieve(
188
+ self,
189
+ query: str,
190
+ collections_names: List[Union[str, Collection]],
191
+ limit: int = 10,
192
+ score_threshold: float = 0.0,
193
+ filters: List = None,
194
+ beta: float = 0.7,
195
+ ) -> ChunksDataList:
196
+ if filters is None:
197
+ filters = []
198
+
199
+ resp = endpoints.retrieve(
200
+ self._client,
201
+ query=query,
202
+ collections_names=self._format_collections_names(collections_names),
203
+ limit=limit,
204
+ score_threshold=score_threshold,
205
+ filters=filters,
206
+ beta=beta,
207
+ )
208
+ return ChunksDataList.model_validate(resp)
209
+
210
+ def rerank(
211
+ self,
212
+ query: str,
213
+ documents: Union[ChunksDataList, List[Union[str, ChunkData]]],
214
+ model: str,
215
+ top_n: int = 10,
216
+ return_documents: bool = True,
217
+ threshold: Optional[float] = None,
218
+ ) -> RerankerResponse:
219
+ resp = endpoints.rerank(
220
+ self._client,
221
+ query,
222
+ self._format_chunks_data_list(documents),
223
+ model,
224
+ top_n,
225
+ return_documents,
226
+ )
227
+ resp = RerankerResponse.model_validate(resp)
228
+
229
+ if threshold is not None:
230
+ resp = resp.filter_by_relevance_score(threshold)
231
+
232
+ return resp
233
+
234
+
235
+ class AsyncEmbeddingsModule(AbstractEmbeddingsModule, AsyncModule):
236
+ """
237
+ `AsyncEmbeddingsModule` is an asynchronous module that provides a high-level interface for interacting with
238
+ embeddings tools. The module abstracts all communication with the backend API,
239
+ providing both blocking and generator-based usage.
240
+ """
241
+
242
+ def __init__(self, parent, http_client):
243
+ super().__init__(parent, http_client)
244
+ AsyncModule.__init__(self, parent, http_client)
245
+
246
+ async def encode(
247
+ self,
248
+ input: str,
249
+ model: str,
250
+ encoding_format: Optional[Literal["float", "base64"]] = "float",
251
+ normalize: Optional[bool] = False,
252
+ **kwargs,
253
+ ) -> EncodingResponse:
254
+ resp = await async_endpoints.embeddings(
255
+ self._client,
256
+ input=input,
257
+ model=model,
258
+ encoding_format=encoding_format,
259
+ normalize=normalize,
260
+ **kwargs,
261
+ )
262
+ return EncodingResponse.model_validate(resp)
263
+
264
+ async def retrieve(
265
+ self,
266
+ query: str,
267
+ collections_names: List[Union[str, Collection]],
268
+ limit: int = 10,
269
+ score_threshold: float = 0.0,
270
+ filters: List = None,
271
+ beta: float = 0.7,
272
+ ) -> ChunksDataList:
273
+ if filters is None:
274
+ filters = []
275
+
276
+ resp = await async_endpoints.retrieve(
277
+ self._client,
278
+ query=query,
279
+ collections_names=self._format_collections_names(collections_names),
280
+ limit=limit,
281
+ score_threshold=score_threshold,
282
+ filters=filters,
283
+ beta=beta,
284
+ )
285
+ return ChunksDataList.model_validate(resp)
286
+
287
+ async def rerank(
288
+ self,
289
+ query: str,
290
+ documents: Union[ChunksDataList, List[Union[str, ChunkData]]],
291
+ model: str,
292
+ top_n: int = 10,
293
+ return_documents: bool = True,
294
+ threshold: Optional[float] = None,
295
+ ) -> RerankerResponse:
296
+ resp = await async_endpoints.rerank(
297
+ self._client,
298
+ query,
299
+ self._format_chunks_data_list(documents),
300
+ model,
301
+ top_n,
302
+ return_documents,
303
+ )
304
+ resp = RerankerResponse.model_validate(resp)
305
+
306
+ if threshold is not None:
307
+ resp = resp.filter_by_relevance_score(threshold)
308
+
309
+ return resp
aisberg/modules/me.py ADDED
@@ -0,0 +1,77 @@
1
+ from typing import List
2
+ from abc import ABC
3
+ from ..models.token import TokenInfo
4
+
5
+ from abc import abstractmethod
6
+ from ..abstract.modules import SyncModule, AsyncModule
7
+ from ..api import endpoints, async_endpoints
8
+
9
+
10
+ class AbstractMeModule(ABC):
11
+ def __init__(self, parent, client):
12
+ self._parent = parent
13
+ self._client = client
14
+
15
+ @abstractmethod
16
+ def _fetch_info(self) -> TokenInfo:
17
+ """
18
+ Get information about the current API token.
19
+
20
+ Returns:
21
+ TokenInfo: Information about the API token.
22
+
23
+ Raises:
24
+ Exception: If there is an error fetching the token information.
25
+ """
26
+ pass
27
+
28
+ def info(self) -> TokenInfo:
29
+ """
30
+ Get information about the current API token.
31
+
32
+ Returns:
33
+ TokenInfo: Information about the API token.
34
+
35
+ Raises:
36
+ Exception: If there is an error fetching the token information.
37
+ """
38
+ return self._fetch_info()
39
+
40
+ def groups(self) -> List[str]:
41
+ """
42
+ Get a list of groups the current user belongs to.
43
+
44
+ Returns:
45
+ list[str]: A list of group IDs.
46
+
47
+ Raises:
48
+ Exception: If there is an error fetching the groups.
49
+ """
50
+ resp = self.info()
51
+ return getattr(resp, "groups", []) or []
52
+
53
+
54
+ class SyncMeModule(SyncModule, AbstractMeModule):
55
+ def __init__(self, parent, client):
56
+ SyncModule.__init__(self, parent, client)
57
+ AbstractMeModule.__init__(self, parent, client)
58
+
59
+ def _fetch_info(self) -> TokenInfo:
60
+ return endpoints.me(self._client)
61
+
62
+
63
+ class AsyncMeModule(AsyncModule, AbstractMeModule):
64
+ def __init__(self, parent, client):
65
+ AsyncModule.__init__(self, parent, client)
66
+ AbstractMeModule.__init__(self, parent, client)
67
+
68
+ async def _fetch_info(self) -> TokenInfo:
69
+ resp = await async_endpoints.me(self._client)
70
+ return TokenInfo.model_validate(resp)
71
+
72
+ async def info(self) -> TokenInfo:
73
+ return await self._fetch_info()
74
+
75
+ async def groups(self) -> List[str]:
76
+ resp = await self.info()
77
+ return getattr(resp, "groups", []) or []
@@ -0,0 +1,108 @@
1
+ from typing import List
2
+ from abc import ABC
3
+
4
+ from ..models.models import Model
5
+
6
+ from abc import abstractmethod
7
+ from ..abstract.modules import SyncModule, AsyncModule
8
+ from ..api import endpoints, async_endpoints
9
+
10
+
11
+ class AbstractModelsModule(ABC):
12
+ def __init__(self, parent, client):
13
+ self._parent = parent
14
+ self._client = client
15
+
16
+ @abstractmethod
17
+ def list(self) -> List[Model]:
18
+ """
19
+ Get a list of available collections. Models are grouped by your belonging groups.
20
+
21
+ Returns:
22
+ List[GroupModels]: A list of available collections.
23
+
24
+ Raises:
25
+ ValueError: If no collections are found.
26
+ Exception: If there is an error fetching the collections.
27
+ """
28
+ pass
29
+
30
+ @abstractmethod
31
+ def get(self, model_id: str) -> Model:
32
+ """Get details of a specific model.
33
+
34
+ Args:
35
+ model_id (str): The ID of the model to retrieve.
36
+
37
+ Returns:
38
+ Model: The details of the specified model.
39
+
40
+ Raises:
41
+ ValueError: If the specified model is not found.
42
+ """
43
+ pass
44
+
45
+ @abstractmethod
46
+ def is_available(self, model_id: str) -> bool:
47
+ """Check if a specific model is available.
48
+
49
+ Args:
50
+ model_id (str): The ID of the model to check.
51
+
52
+ Returns:
53
+ bool: True if the model is available, False otherwise.
54
+ """
55
+ pass
56
+
57
+ @staticmethod
58
+ def _get_model_by_id(models: List[Model], model_id: str) -> Model | None:
59
+ for model in models:
60
+ if model.id == model_id:
61
+ return model
62
+ return None
63
+
64
+
65
+ class SyncModelsModule(SyncModule, AbstractModelsModule):
66
+ def __init__(self, parent, client):
67
+ SyncModule.__init__(self, parent, client)
68
+ AbstractModelsModule.__init__(self, parent, client)
69
+
70
+ def list(self) -> List[Model]:
71
+ return endpoints.models(self._client)
72
+
73
+ def get(self, model_id: str) -> Model:
74
+ models = self.list()
75
+ model = self._get_model_by_id(models, model_id)
76
+ if model is None:
77
+ raise ValueError("No model found")
78
+ return model
79
+
80
+ def is_available(self, model_id: str) -> bool:
81
+ try:
82
+ self.get(model_id)
83
+ return True
84
+ except ValueError:
85
+ return False
86
+
87
+
88
+ class AsyncModelsModule(AsyncModule, AbstractModelsModule):
89
+ def __init__(self, parent, client):
90
+ AsyncModule.__init__(self, parent, client)
91
+ AbstractModelsModule.__init__(self, parent, client)
92
+
93
+ async def list(self) -> List[Model]:
94
+ return await async_endpoints.models(self._client)
95
+
96
+ async def get(self, model_id: str) -> Model:
97
+ models = await self.list()
98
+ model = self._get_model_by_id(models, model_id)
99
+ if model is None:
100
+ raise ValueError("No model found")
101
+ return model
102
+
103
+ async def is_available(self, model_id: str) -> bool:
104
+ try:
105
+ await self.get(model_id)
106
+ return True
107
+ except ValueError:
108
+ return False
@@ -0,0 +1,78 @@
1
+ from ..abstract.modules import BaseModule
2
+ from ..exceptions import ToolExecutionError
3
+ from typing import Callable, Dict, Any
4
+
5
+
6
+ class ToolsModule(BaseModule):
7
+ """
8
+ Tools module for the aisberg application.
9
+ This module provides methods to register and execute tools.
10
+ """
11
+
12
+ def __init__(self, parent):
13
+ """
14
+ Initialize the ToolsModule.
15
+
16
+ Args:
17
+ parent (AisbergClient): Parent client instance.
18
+ """
19
+ super().__init__(parent)
20
+
21
+ def register(self, name: str, func: Callable) -> None:
22
+ """
23
+ Enregistre une fonction comme tool disponible.
24
+
25
+ Args:
26
+ name (str): Nom du tool (doit correspondre au nom dans la définition du tool)
27
+ func (Callable): Fonction à exécuter quand le tool est appelé
28
+ """
29
+ self._parent.tool_registry[name] = func
30
+
31
+ def execute(self, tool_name: str, arguments: Dict[str, Any]) -> Any:
32
+ """
33
+ Exécute un tool avec les arguments fournis.
34
+
35
+ Args:
36
+ tool_name (str): Nom du tool à exécuter
37
+ arguments (Dict[str, Any]): Arguments à passer au tool
38
+
39
+ Returns:
40
+ Any: Résultat de l'exécution du tool
41
+
42
+ Raises:
43
+ ToolExecutionError: Si le tool n'existe pas ou si l'exécution échoue
44
+ """
45
+ if tool_name not in self._parent.tool_registry:
46
+ raise ToolExecutionError(f"Tool '{tool_name}' not registered")
47
+
48
+ try:
49
+ return self._parent.tool_registry[tool_name](**arguments)
50
+ except Exception as e:
51
+ raise ToolExecutionError(f"Error executing tool '{tool_name}': {str(e)}")
52
+
53
+ def list(self) -> Dict[str, Callable]:
54
+ """
55
+ Liste tous les tools enregistrés.
56
+
57
+ Returns:
58
+ Dict[str, Callable]: Dictionnaire des tools enregistrés avec leur nom et fonction
59
+ """
60
+ return self._parent.tool_registry
61
+
62
+ def clear(self) -> None:
63
+ """
64
+ Efface tous les tools enregistrés.
65
+ """
66
+ self._parent.tool_registry.clear()
67
+
68
+ def remove(self, tool_name: str) -> None:
69
+ """
70
+ Supprime un tool enregistré.
71
+
72
+ Args:
73
+ tool_name (str): Nom du tool à supprimer
74
+ """
75
+ if tool_name in self._parent.tool_registry:
76
+ del self._parent.tool_registry[tool_name]
77
+ else:
78
+ raise ToolExecutionError(f"Tool '{tool_name}' not registered")