PyPI - aisberg - Versions diffs - 0.1.0__tar.gz → 0.2.0__tar.gz - Mend

aisberg 0.1.0tar.gz → 0.2.0tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (54) hide show

{aisberg-0.1.0 → aisberg-0.2.0}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: aisberg
-Version: 0.1.0
+Version: 0.2.0
 Summary: Aisberg SDK for Python - A simple and powerful SDK to interact with the Aisberg API
 Author: Free Pro
 Author-email: Mathis Lambert <mathis.lambert@freepro.com>
@@ -16,6 +16,7 @@ License-File: LICENSE
 Requires-Dist: httpx>=0.28.1
 Requires-Dist: pydantic>=2.11.7
 Requires-Dist: pydantic-settings>=2.10.1
+Requires-Dist: boto3>=1.38.44
 Provides-Extra: dev
 Requires-Dist: pytest>=8.4.1; extra == "dev"
 Requires-Dist: pytest-asyncio>=1.0.0; extra == "dev"
@@ -47,6 +48,7 @@ conversational LLM workflows, collections, embeddings, and more.
 - **Environment-based configuration** (supports `.env` files and system environment variables)
 - **Context manager support** for easy resource management
 - **Custom tool registration**: Easily extend LLM capabilities with your own functions
+- **Document Parsing**: Parse documents into structured data (e.g., JSON, CSV, PNG, PDF, etc.)
 ---
@@ -77,6 +79,15 @@ AISBERG_API_KEY=...
 AISBERG_BASE_URL=https://url
 ```
+In order to use the Document Parsing feature, you also need to set the `S3` credentials - ask the FreePro team for
+these:
+```env
+S3_ACCESS_KEY_ID=...
+S3_SECRET_ACCESS_KEY=...
+S3_ENDPOINT=https://s3.endpoint
+```
 ### 2. **Synchronous Usage**
 ```python
@@ -127,6 +138,7 @@ asyncio.run(main())
 * `client.models` — Model discovery & info
 * `client.workflows` — Workflow management & execution
 * `client.tools` — Register and execute tools for LLM tool calls
+* `client.documents` — Document parsing and management
 Each module is available both in the sync and async clients with similar APIs.
@@ -173,6 +185,9 @@ client = AisbergClient(
 * `AISBERG_API_KEY`
 * `AISBERG_BASE_URL`
 * `AISBERG_TIMEOUT` (optional)
+* `S3_ACCESS_KEY_ID` (for document parsing)(optional)
+* `S3_SECRET_ACCESS_KEY` (for document parsing)(optional)
+* `S3_ENDPOINT` (for document parsing)(optional)
 ### **Using in a Context Manager**
@@ -208,5 +223,3 @@ For enterprise/commercial use, please contact [Mathis Lambert](mailto:mathis.lam
 ## Support
 For support, bug reports, or feature requests, please contact your technical representative.
----

{aisberg-0.1.0 → aisberg-0.2.0}/README.md RENAMED Viewed

@@ -18,6 +18,7 @@ conversational LLM workflows, collections, embeddings, and more.
 - **Environment-based configuration** (supports `.env` files and system environment variables)
 - **Context manager support** for easy resource management
 - **Custom tool registration**: Easily extend LLM capabilities with your own functions
+- **Document Parsing**: Parse documents into structured data (e.g., JSON, CSV, PNG, PDF, etc.)
 ---
@@ -48,6 +49,15 @@ AISBERG_API_KEY=...
 AISBERG_BASE_URL=https://url
 ```
+In order to use the Document Parsing feature, you also need to set the `S3` credentials - ask the FreePro team for
+these:
+```env
+S3_ACCESS_KEY_ID=...
+S3_SECRET_ACCESS_KEY=...
+S3_ENDPOINT=https://s3.endpoint
+```
 ### 2. **Synchronous Usage**
 ```python
@@ -98,6 +108,7 @@ asyncio.run(main())
 * `client.models` — Model discovery & info
 * `client.workflows` — Workflow management & execution
 * `client.tools` — Register and execute tools for LLM tool calls
+* `client.documents` — Document parsing and management
 Each module is available both in the sync and async clients with similar APIs.
@@ -144,6 +155,9 @@ client = AisbergClient(
 * `AISBERG_API_KEY`
 * `AISBERG_BASE_URL`
 * `AISBERG_TIMEOUT` (optional)
+* `S3_ACCESS_KEY_ID` (for document parsing)(optional)
+* `S3_SECRET_ACCESS_KEY` (for document parsing)(optional)
+* `S3_ENDPOINT` (for document parsing)(optional)
 ### **Using in a Context Manager**
@@ -179,5 +193,3 @@ For enterprise/commercial use, please contact [Mathis Lambert](mailto:mathis.lam
 ## Support
 For support, bug reports, or feature requests, please contact your technical representative.
----

{aisberg-0.1.0 → aisberg-0.2.0}/aisberg/api/async_endpoints.py RENAMED Viewed

@@ -1,4 +1,4 @@
-from io import BytesIO
+import json
 import httpx
 from ..models.chat import (
@@ -7,9 +7,10 @@ from ..models.chat import (
     ChatCompletionResponse,
     ChatCompletionChunk,
 )
-from typing import Optional, AsyncGenerator, Union, List, Any, Tuple
+from typing import Optional, AsyncGenerator, Union, List, Any
-from ..models.collections import GroupCollections, PointDetails
+from ..models.collections import GroupCollections, PointDetails, ChunkingDictInput
+from ..models.documents import DocumentParserResponse
 from ..models.embeddings import (
     EncodingFormat,
     EncodingResponse,
@@ -21,7 +22,7 @@ from ..models.token import TokenInfo
 from ..models.workflows import WorkflowDetails, Workflow
 from ..utils import parse_chat_line, WorkflowLineParser
 from ..requests.async_requests import areq, areq_stream
-from ..models.requests import AnyDict, AnyList
+from ..models.requests import AnyDict, AnyList, HttpxFileField
 async def models(client: httpx.AsyncClient) -> List[Model]:
@@ -67,7 +68,7 @@ async def collections(client: httpx.AsyncClient) -> List[GroupCollections]:
 async def collection(
-    client: httpx.AsyncClient, collection_id: str, group_id: str
+    client: httpx.AsyncClient, collection_id: str, group_id: Optional[str] = None
 ) -> List[PointDetails]:
     """
     Get details of a specific collection.
@@ -85,6 +86,128 @@ async def collection(
         raise e
+async def create_collection(
+    client: httpx.AsyncClient,
+    name: str,
+    model: str,
+    group: Optional[str] = None,
+):
+    """
+    Create a new collection with the specified name and optional group.
+    """
+    payload = {"collection_name": name, "embedding_model": model}
+    if group is not None:
+        payload["group"] = group
+    return await areq(
+        client,
+        "POST",
+        "/collections",
+        AnyDict,
+        json=payload,
+    )
+async def delete_collection(
+    client: httpx.AsyncClient,
+    name: str,
+    group: Optional[str] = None,
+):
+    """
+    Delete a collection with the specified name and optional group.
+    """
+    payload = {"collections": [name]}
+    if group is not None:
+        payload["group"] = group
+    return await areq(
+        client,
+        "DELETE",
+        "/collections",
+        AnyDict,
+        json=payload,
+    )
+async def insert_points_in_collection(
+    client: httpx.AsyncClient,
+    name: str,
+    files: HttpxFileField,
+    normalize: bool,
+    chunking_dict: Optional[ChunkingDictInput] = None,
+    group: Optional[str] = None,
+):
+    """
+    Insert points into a collection with the specified name.
+    """
+    payload = {
+        "chunking_dict": json.dumps({"method": "custom", "params": {}}),
+        "normalize": normalize,
+    }
+    if group is not None:
+        payload["group"] = group
+    if chunking_dict is not None:
+        payload["chunking_dict"] = chunking_dict.model_dump_json()
+    return await areq(
+        client,
+        "POST",
+        f"/collections/{name}",
+        AnyDict,
+        data=payload,
+        files=files,
+    )
+async def delete_points_in_collection(
+    client: httpx.AsyncClient,
+    points_ids: List[str],
+    name: str,
+    group: Optional[str] = None,
+):
+    """
+    Delete points into a collection with the specified name.
+    """
+    payload = {
+        "points": points_ids,
+        "collection": name,
+    }
+    if group is not None:
+        payload["group"] = group
+    return await areq(
+        client,
+        "DELETE",
+        "/collections/chunks",
+        AnyDict,
+        json=payload,
+    )
+async def delete_all_points_in_collection(
+    client: httpx.AsyncClient,
+    name: str,
+    group: Optional[str] = None,
+):
+    """
+    Delete All points into a collection with the specified name.
+    """
+    payload = {
+        "collection": name,
+    }
+    if group is not None:
+        payload["group"] = group
+    return await areq(
+        client,
+        "DELETE",
+        "/collections/all/chunks",
+        AnyDict,
+        json=payload,
+    )
 async def me(client: httpx.AsyncClient) -> TokenInfo:
     """
     Get the details of the current user.
@@ -303,31 +426,26 @@ async def run_workflow(
         raise e
-async def parse_document(
+async def parse_documents(
     client: httpx.AsyncClient,
-    file: Tuple[bytes, str],
-    source: str,
+    files: HttpxFileField,
     group: Optional[str] = None,
-) -> str:
+    **kwargs,
+) -> DocumentParserResponse:
     """
-    Parse a document using the specified model.
+    Parse a single or multiple documents using the document parser endpoint.
+    Returns the ID of the parsed document to be downloaded later from the S3 bucket.
     """
-    payload = {
-        "source": source,
-    }
+    payload = {**kwargs}
     if group is not None:
         payload["group"] = group
-    files = {"file": (file[1], BytesIO(file[0]), "application/octet-stream")}
-    response = areq(
+    response = await areq(
         client,
         "POST",
         "/document-parser/parsing/parse",
-        AnyDict,
+        DocumentParserResponse,
         files=files,
-        json=payload,
+        data=payload,
     )
-    print(response)
     return response

{aisberg-0.1.0 → aisberg-0.2.0}/aisberg/api/endpoints.py RENAMED Viewed

@@ -1,4 +1,4 @@
-from io import BytesIO
+import json
 import httpx
 from ..models.chat import (
@@ -7,9 +7,10 @@ from ..models.chat import (
     ChatCompletionResponse,
     ChatCompletionChunk,
 )
-from typing import Optional, Generator, Union, List, Any, Tuple
+from typing import Optional, Generator, Union, List, Any
-from ..models.collections import GroupCollections, PointDetails
+from ..models.collections import GroupCollections, PointDetails, ChunkingDictInput
+from ..models.documents import DocumentParserResponse
 from ..models.embeddings import (
     EncodingFormat,
     EncodingResponse,
@@ -21,7 +22,7 @@ from ..models.token import TokenInfo
 from ..models.workflows import WorkflowDetails, Workflow
 from ..utils import parse_chat_line, WorkflowLineParser
 from ..requests.sync_requests import req, req_stream
-from ..models.requests import AnyDict, AnyList
+from ..models.requests import AnyDict, AnyList, HttpxFileField
 def models(client: httpx.Client) -> List[Model]:
@@ -64,7 +65,7 @@ def collections(client: httpx.Client) -> List[GroupCollections]:
 def collection(
-    client: httpx.Client, collection_id: str, group_id: str
+    client: httpx.Client, collection_id: str, group_id: Optional[str] = None
 ) -> List[PointDetails]:
     """
     Get details of a specific collection.
@@ -80,6 +81,128 @@ def collection(
         raise e
+def create_collection(
+    client: httpx.Client,
+    name: str,
+    model: str,
+    group: Optional[str] = None,
+):
+    """
+    Create a new collection with the specified name and optional group.
+    """
+    payload = {"collection_name": name, "embedding_model": model}
+    if group is not None:
+        payload["group"] = group
+    return req(
+        client,
+        "POST",
+        "/collections",
+        AnyDict,
+        json=payload,
+    )
+def delete_collection(
+    client: httpx.Client,
+    name: str,
+    group: Optional[str] = None,
+):
+    """
+    Delete a collection with the specified name and optional group.
+    """
+    payload = {"collections": [name]}
+    if group is not None:
+        payload["group"] = group
+    return req(
+        client,
+        "DELETE",
+        "/collections",
+        AnyDict,
+        json=payload,
+    )
+def insert_points_in_collection(
+    client: httpx.Client,
+    name: str,
+    files: HttpxFileField,
+    normalize: bool,
+    chunking_dict: Optional[ChunkingDictInput] = None,
+    group: Optional[str] = None,
+):
+    """
+    Insert points into a collection with the specified name.
+    """
+    payload = {
+        "chunking_dict": json.dumps({"method": "custom", "params": {}}),
+        "normalize": normalize,
+    }
+    if group is not None:
+        payload["group"] = group
+    if chunking_dict is not None:
+        payload["chunking_dict"] = chunking_dict.model_dump_json()
+    return req(
+        client,
+        "POST",
+        f"/collections/{name}",
+        AnyDict,
+        data=payload,
+        files=files,
+    )
+def delete_points_in_collection(
+    client: httpx.Client,
+    points_ids: List[str],
+    name: str,
+    group: Optional[str] = None,
+):
+    """
+    Delete points into a collection with the specified name.
+    """
+    payload = {
+        "points": points_ids,
+        "collection": name,
+    }
+    if group is not None:
+        payload["group"] = group
+    return req(
+        client,
+        "DELETE",
+        "/collections/chunks",
+        AnyDict,
+        json=payload,
+    )
+def delete_all_points_in_collection(
+    client: httpx.Client,
+    name: str,
+    group: Optional[str] = None,
+):
+    """
+    Delete All points into a collection with the specified name.
+    """
+    payload = {
+        "collection": name,
+    }
+    if group is not None:
+        payload["group"] = group
+    return req(
+        client,
+        "DELETE",
+        "/collections/all/chunks",
+        AnyDict,
+        json=payload,
+    )
 def me(client: httpx.Client) -> TokenInfo:
     """
     Get the details of the current user.
@@ -298,31 +421,26 @@ def run_workflow(
         raise e
-def parse_document(
+def parse_documents(
     client: httpx.Client,
-    file: Tuple[bytes, str],
-    source: str,
+    files: HttpxFileField,
     group: Optional[str] = None,
-) -> str:
+    **kwargs,
+) -> DocumentParserResponse:
     """
-    Parse a document using the specified model.
+    Parse a single or multiple documents using the document parser endpoint.
+    Returns the ID of the parsed document to be downloaded later from the S3 bucket.
     """
-    payload = {
-        "source": source,
-    }
+    payload = {**kwargs}
     if group is not None:
         payload["group"] = group
-    files = {"file": (file[1], BytesIO(file[0]), "application/octet-stream")}
     response = req(
         client,
         "POST",
         "/document-parser/parsing/parse",
-        AnyDict,
+        DocumentParserResponse,
         files=files,
         data=payload,
     )
-    print(response)
     return response

{aisberg-0.1.0 → aisberg-0.2.0}/aisberg/async_client.py RENAMED Viewed

@@ -10,6 +10,8 @@ from .modules import (
     AsyncModelsModule,
     AsyncWorkflowsModule,
     ToolsModule,
+    AsyncDocumentsModule,
+    SyncS3Module,
 )
@@ -33,6 +35,12 @@ class AisbergAsyncClient:
         self.me = AsyncMeModule(self, self._client)
         self.collections = AsyncCollectionsModule(self, self._client)
         self.embeddings = AsyncEmbeddingsModule(self, self._client)
+        self.documents = AsyncDocumentsModule(self, self._client)
+        self._s3 = SyncS3Module(
+            settings.s3_access_key_id,
+            settings.s3_secret_access_key,
+            settings.s3_endpoint,
+        )
     async def initialize(self):
         """

{aisberg-0.1.0 → aisberg-0.2.0}/aisberg/client.py RENAMED Viewed

@@ -9,6 +9,8 @@ from .modules import (
     SyncModelsModule,
     SyncWorkflowsModule,
     ToolsModule,
+    SyncDocumentsModule,
+    SyncS3Module,
 )
@@ -39,6 +41,12 @@ class AisbergClient:
         self.me = SyncMeModule(self, self._client)
         self.collections = SyncCollectionsModule(self, self._client)
         self.embeddings = SyncEmbeddingsModule(self, self._client)
+        self.documents = SyncDocumentsModule(self, self._client)
+        self._s3 = SyncS3Module(
+            settings.s3_access_key_id,
+            settings.s3_secret_access_key,
+            settings.s3_endpoint,
+        )
         # Validate API key
         self._validate_api_key()

{aisberg-0.1.0 → aisberg-0.2.0}/aisberg/config.py RENAMED Viewed

@@ -5,10 +5,16 @@ from pydantic_settings import BaseSettings, SettingsConfigDict
 class Settings(BaseSettings):
     # Variables attendues
+    # -- API --
     aisberg_api_key: Union[str, None] = None
     aisberg_base_url: Union[str, None] = None
     timeout: int = 30
+    # -- S3 --
+    s3_access_key_id: Union[str, None] = None
+    s3_secret_access_key: Union[str, None] = None
+    s3_endpoint: Union[str, None] = None
     # Pour indiquer le fichier .env
     model_config = SettingsConfigDict(env_file=".env", env_file_encoding="utf-8")

{aisberg-0.1.0 → aisberg-0.2.0}/aisberg/models/collections.py RENAMED Viewed

@@ -11,6 +11,20 @@ class GroupCollections(BaseModel):
     collections: List[Collection]
+class CollectionDataset(BaseModel):
+    chunks: List[str]
+    metadata: Optional[dict] = []
+class CollectionCreateResponse(BaseModel):
+    message: Optional[str] = None
+class ChunkingDictInput(BaseModel):
+    method: Optional[str] = "custom"
+    params: Optional[dict] = {}
 # Modèle plus structuré pour payload
 class Payload(BaseModel):
     method: Optional[str] = None
@@ -32,5 +46,5 @@ class PointDetails(BaseModel):
 class CollectionDetails(BaseModel):
     name: str
-    group: str
+    group: Optional[str] = None
     points: List[PointDetails]

aisberg-0.2.0/aisberg/models/documents.py ADDED Viewed

@@ -0,0 +1,46 @@
+from pydantic import BaseModel
+from typing import Optional, List, Tuple, Union
+from io import BytesIO
+class DocumentParserResponse(BaseModel):
+    """
+    Response model for document parsing.
+    """
+    message: Optional[str] = None
+    parsedFiles: Optional[List[str]] = None
+    bucketName: Optional[str] = None
+class FileObject(BaseModel):
+    """
+    Represents a file object with its name and content.
+    """
+    name: str
+    buffer: bytes
+class DocumentParserDocOutput(BaseModel):
+    type: str
+    data: Union[str, dict, list]
+class ParsedDocument(BaseModel):
+    """
+    Represents a parsed document with its content and metadata.
+    """
+    content: DocumentParserDocOutput
+    metadata: Optional[dict] = None
+DocumentParserFileInput = Union[
+    str,
+    bytes,
+    BytesIO,
+    Tuple[bytes, str],
+    "FileObject",
+    List[Union[str, bytes, BytesIO, Tuple[bytes, str], "FileObject"]],
+]

{aisberg-0.1.0 → aisberg-0.2.0}/aisberg/models/requests.py RENAMED Viewed

@@ -1,4 +1,5 @@
-from typing import List, Any
+from typing import List, Any, Tuple
+from io import BytesIO
 from pydantic import BaseModel, RootModel, ConfigDict
@@ -9,3 +10,6 @@ class AnyDict(BaseModel):
 class AnyList(RootModel[List[Any]]):
     pass
+HttpxFileField = List[Tuple[str, Tuple[str, BytesIO, str]]]

{aisberg-0.1.0 → aisberg-0.2.0}/aisberg/modules/__init__.py RENAMED Viewed

@@ -5,6 +5,8 @@ from .me import AsyncMeModule, SyncMeModule
 from .models import AsyncModelsModule, SyncModelsModule
 from .workflows import AsyncWorkflowsModule, SyncWorkflowsModule
 from .tools import ToolsModule
+from .documents import AsyncDocumentsModule, SyncDocumentsModule
+from .s3 import SyncS3Module
 __all__ = [
     "AsyncChatModule",
@@ -20,4 +22,7 @@ __all__ = [
     "AsyncWorkflowsModule",
     "SyncWorkflowsModule",
     "ToolsModule",
+    "AsyncDocumentsModule",
+    "SyncDocumentsModule",
+    "SyncS3Module",
 ]

aisberg 0.1.0__tar.gz → 0.2.0__tar.gz

aisberg 0.1.0tar.gz → 0.2.0tar.gz