PyPI - groundx - Versions diffs - 2.0.20__py3-none-any.whl → 2.0.29__py3-none-any.whl - Mend

groundx 2.0.20py3-none-any.whl → 2.0.29py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (16) hide show

groundx/__init__.py +5 -1
groundx/client.py +2 -2
groundx/core/client_wrapper.py +1 -1
groundx/documents/client.py +270 -443
groundx/ingest.py +334 -0
groundx/types/__init__.py +4 -0
groundx/types/document.py +2 -2
groundx/types/document_local_ingest_request.py +6 -0
groundx/types/ingest_local_document.py +5 -23
groundx/types/ingest_local_document_metadata.py +42 -0
groundx/types/ingest_remote_document.py +1 -1
groundx/types/website_source.py +1 -1
{groundx-2.0.20.dist-info → groundx-2.0.29.dist-info}/METADATA +15 -28
{groundx-2.0.20.dist-info → groundx-2.0.29.dist-info}/RECORD +16 -13
{groundx-2.0.20.dist-info → groundx-2.0.29.dist-info}/LICENSE +0 -0
{groundx-2.0.20.dist-info → groundx-2.0.29.dist-info}/WHEEL +0 -0

groundx/ingest.py ADDED Viewed

@@ -0,0 +1,334 @@
+import aiohttp, io, json, mimetypes, requests, typing, os
+from asyncio import TimeoutError
+from urllib.parse import urlparse
+from json.decoder import JSONDecodeError
+from .client import GroundXBase, AsyncGroundXBase
+from .core.api_error import ApiError
+from .core.pydantic_utilities import parse_obj_as
+from .core.request_options import RequestOptions
+from .errors.bad_request_error import BadRequestError
+from .errors.unauthorized_error import UnauthorizedError
+from .types.document import Document
+from .types.ingest_remote_document import IngestRemoteDocument
+from .types.ingest_response import IngestResponse
+# this is used as the default value for optional parameters
+OMIT = typing.cast(typing.Any, ...)
+DOCUMENT_TYPE_TO_MIME = {
+    "txt": "text/plain",
+    "docx": "application/vnd.openxmlformats-officedocument.wordprocessingml.document",
+    "pptx": "application/vnd.openxmlformats-officedocument.presentationml.presentation",
+    "xlsx": "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet",
+    "pdf": "application/pdf",
+    "png": "image/png",
+    "jpg": "image/jpeg",
+    "csv": "text/csv",
+    "tsv": "text/tab-separated-values",
+    "json": "application/json",
+}
+MIME_TO_DOCUMENT_TYPE = {v: k for k, v in DOCUMENT_TYPE_TO_MIME.items()}
+def prep_documents(
+    documents: typing.Sequence[Document],
+) -> typing.Tuple[
+    typing.List[IngestRemoteDocument],
+    typing.List[
+        typing.Tuple[str, typing.Tuple[typing.Union[str, None], typing.BinaryIO, str]]
+    ],
+]:
+    """
+    Process documents and separate them into remote and local documents.
+    """
+    if not documents:
+        raise ValueError("No documents provided for ingestion.")
+    def is_valid_local_path(path: str) -> bool:
+        expanded_path = os.path.expanduser(path)
+        return os.path.exists(expanded_path)
+    def is_valid_url(path: str) -> bool:
+        try:
+            result = urlparse(path)
+            return all([result.scheme, result.netloc])
+        except ValueError:
+            return False
+    idx = 0
+    remote_documents: typing.List[IngestRemoteDocument] = []
+    local_documents: typing.List[
+        typing.Tuple[str, typing.Tuple[typing.Union[str, None], typing.BinaryIO, str]]
+    ] = []
+    for document in documents:
+        if not hasattr(document, "file_path"):
+            raise ValueError("Each document must have a 'file_path' attribute.")
+        if is_valid_url(document.file_path):
+            remote_document = IngestRemoteDocument(
+                bucket_id=document.bucket_id,
+                file_name=document.file_name,
+                file_type=document.file_type,
+                search_data=document.search_data,
+                source_url=document.file_path,
+            )
+            remote_documents.append(remote_document)
+        elif is_valid_local_path(document.file_path):
+            expanded_path = os.path.expanduser(document.file_path)
+            file_name = os.path.basename(expanded_path)
+            mime_type = mimetypes.guess_type(file_name)[0] or "application/octet-stream"
+            file_type = MIME_TO_DOCUMENT_TYPE.get(mime_type, None)
+            if document.file_type:
+                file_type = document.file_type
+                mime_type = DOCUMENT_TYPE_TO_MIME.get(
+                    document.file_type, "application/octet-stream"
+                )
+            if document.file_name:
+                file_name = document.file_name
+            try:
+                local_documents.append(
+                    (
+                        "blob",
+                        (
+                            file_name,
+                            open(expanded_path, "rb"),
+                            mime_type,
+                        ),
+                    )
+                )
+            except Exception as e:
+                raise ValueError(f"Error reading file {expanded_path}: {e}")
+            metadata = {
+                "bucketId": document.bucket_id,
+                "fileName": file_name,
+                "fileType": file_type,
+            }
+            if document.search_data:
+                metadata["searchData"] = document.search_data
+            local_documents.append(
+                (
+                    "metadata",
+                    (
+                        f"data.json",
+                        io.BytesIO(json.dumps(metadata).encode("utf-8")),
+                        "application/json",
+                    ),
+                )
+            )
+            idx += 1
+        else:
+            raise ValueError(f"Invalid file path: {document.file_path}")
+    return remote_documents, local_documents
+class GroundX(GroundXBase):
+    def ingest(
+        self,
+        *,
+        documents: typing.Sequence[Document],
+        request_options: typing.Optional[RequestOptions] = None,
+    ) -> IngestResponse:
+        """
+        Ingest local or hosted documents into a GroundX bucket.
+        Parameters
+        ----------
+        documents : typing.Sequence[Document]
+        request_options : typing.Optional[RequestOptions]
+            Request-specific configuration.
+        Returns
+        -------
+        IngestResponse
+            Documents successfully uploaded
+        Examples
+        --------
+        from groundx import Document, GroundX
+        client = GroundX(
+            api_key="YOUR_API_KEY",
+        )
+        client.ingest(
+            documents=[
+                Document(
+                    bucket_id=1234,
+                    file_name="my_file1.txt",
+                    file_path="https://my.source.url.com/file1.txt",
+                    file_type="txt",
+                )
+            ],
+        )
+        """
+        remote_documents, local_documents = prep_documents(documents)
+        if local_documents and remote_documents:
+            raise ValueError("Documents must all be either local or remote, not a mix.")
+        if len(remote_documents) > 0:
+            return self.documents.ingest_remote(
+                documents=remote_documents,
+                request_options=request_options,
+            )
+        timeout = self._client_wrapper.get_timeout()
+        headers = self._client_wrapper.get_headers()
+        base_url = self._client_wrapper.get_base_url().rstrip("/")
+        follow_redirects = getattr(
+            self._client_wrapper.httpx_client, "follow_redirects", True
+        )
+        url = f"{base_url}/v1/ingest/documents/local"
+        _response = requests.post(
+            url,
+            files=local_documents,
+            headers=headers,
+            timeout=timeout,
+            allow_redirects=follow_redirects,
+        )
+        try:
+            if 200 <= _response.status_code < 300:
+                return typing.cast(
+                    IngestResponse,
+                    parse_obj_as(
+                        type_=IngestResponse,  # type: ignore
+                        object_=_response.json(),
+                    ),
+                )
+            if _response.status_code == 400:
+                raise BadRequestError(
+                    typing.cast(
+                        typing.Optional[typing.Any],
+                        parse_obj_as(
+                            type_=typing.Optional[typing.Any],  # type: ignore
+                            object_=_response.json(),
+                        ),
+                    )
+                )
+            if _response.status_code == 401:
+                raise UnauthorizedError(
+                    typing.cast(
+                        typing.Optional[typing.Any],
+                        parse_obj_as(
+                            type_=typing.Optional[typing.Any],  # type: ignore
+                            object_=_response.json(),
+                        ),
+                    )
+                )
+            _response_json = _response.json()
+        except JSONDecodeError:
+            raise ApiError(status_code=_response.status_code, body=_response.text)
+        raise ApiError(status_code=_response.status_code, body=_response_json)
+class AsyncGroundX(AsyncGroundXBase):
+    async def ingest(
+        self,
+        *,
+        documents: typing.Sequence[Document],
+        request_options: typing.Optional[RequestOptions] = None,
+    ) -> IngestResponse:
+        """
+        Ingest local or hosted documents into a GroundX bucket.
+        Parameters
+        ----------
+        documents : typing.Sequence[Document]
+        request_options : typing.Optional[RequestOptions]
+            Request-specific configuration.
+        Returns
+        -------
+        IngestResponse
+            Documents successfully uploaded
+        Examples
+        --------
+        import asyncio
+        from groundx import AsyncGroundX, Document
+        client = AsyncGroundX(
+            api_key="YOUR_API_KEY",
+        )
+        async def main() -> None:
+            await client.ingest(
+                documents=[
+                    Document(
+                        bucket_id=1234,
+                        file_name="my_file1.txt",
+                        file_path="https://my.source.url.com/file1.txt",
+                        file_type="txt",
+                    )
+                ],
+            )
+        asyncio.run(main())
+        """
+        remote_documents, local_documents = prep_documents(documents)
+        if local_documents and remote_documents:
+            raise ValueError("Documents must all be either local or remote, not a mix.")
+        if len(remote_documents) > 0:
+            return await self.documents.ingest_remote(
+                documents=remote_documents,
+                request_options=request_options,
+            )
+        timeout = self._client_wrapper.get_timeout()
+        headers = self._client_wrapper.get_headers()
+        base_url = self._client_wrapper.get_base_url().rstrip("/")
+        url = f"{base_url}/v1/ingest/documents/local"
+        try:
+            async with aiohttp.ClientSession() as session:
+                data = aiohttp.FormData()
+                for field_name, (file_name, file_obj, content_type) in local_documents:
+                    data.add_field(
+                        name=field_name,
+                        value=file_obj,
+                        filename=file_name,
+                        content_type=content_type,
+                    )
+                async with session.post(
+                    url, data=data, headers=headers, timeout=timeout
+                ) as response:
+                    if 200 <= response.status < 300:
+                        response_data = await response.json()
+                        return typing.cast(
+                            IngestResponse,
+                            parse_obj_as(
+                                type_=IngestResponse,  # type: ignore
+                                object_=response_data,
+                            ),
+                        )
+                    if response.status == 400:
+                        raise BadRequestError(await response.json())
+                    if response.status == 401:
+                        raise UnauthorizedError(await response.json())
+                    raise ApiError(
+                        status_code=response.status, body=await response.text()
+                    )
+        except TimeoutError:
+            raise ApiError(status_code=408, body="Request timed out")
+        except aiohttp.ClientError as e:
+            raise ApiError(status_code=500, body=str(e))

groundx/types/__init__.py CHANGED Viewed

@@ -11,6 +11,7 @@ from .customer_response import CustomerResponse
 from .document import Document
 from .document_detail import DocumentDetail
 from .document_list_response import DocumentListResponse
+from .document_local_ingest_request import DocumentLocalIngestRequest
 from .document_lookup_response import DocumentLookupResponse
 from .document_response import DocumentResponse
 from .document_type import DocumentType
@@ -22,6 +23,7 @@ from .health_response_health import HealthResponseHealth
 from .health_service import HealthService
 from .health_service_status import HealthServiceStatus
 from .ingest_local_document import IngestLocalDocument
+from .ingest_local_document_metadata import IngestLocalDocumentMetadata
 from .ingest_remote_document import IngestRemoteDocument
 from .ingest_response import IngestResponse
 from .ingest_response_ingest import IngestResponseIngest
@@ -56,6 +58,7 @@ __all__ = [
     "Document",
     "DocumentDetail",
     "DocumentListResponse",
+    "DocumentLocalIngestRequest",
     "DocumentLookupResponse",
     "DocumentResponse",
     "DocumentType",
@@ -67,6 +70,7 @@ __all__ = [
     "HealthService",
     "HealthServiceStatus",
     "IngestLocalDocument",
+    "IngestLocalDocumentMetadata",
     "IngestRemoteDocument",
     "IngestResponse",
     "IngestResponseIngest",

groundx/types/document.py CHANGED Viewed

@@ -12,14 +12,14 @@ from ..core.pydantic_utilities import IS_PYDANTIC_V2
 class Document(UniversalBaseModel):
     bucket_id: typing_extensions.Annotated[int, FieldMetadata(alias="bucketId")] = pydantic.Field()
     """
-    the bucketId of the bucket which this remote file will be ingested to.
+    The bucketId of the bucket which this file will be ingested into.
     """
     file_name: typing_extensions.Annotated[typing.Optional[str], FieldMetadata(alias="fileName")] = pydantic.Field(
         default=None
     )
     """
-    The name of the file being ingested
+    The name of the file being ingested.
     """
     file_path: typing_extensions.Annotated[str, FieldMetadata(alias="filePath")] = pydantic.Field()

groundx/types/document_local_ingest_request.py ADDED Viewed

@@ -0,0 +1,6 @@
+# This file was auto-generated by Fern from our API Definition.
+import typing
+from .ingest_local_document import IngestLocalDocument
+DocumentLocalIngestRequest = typing.List[IngestLocalDocument]

groundx/types/ingest_local_document.py CHANGED Viewed

@@ -1,37 +1,19 @@
 # This file was auto-generated by Fern from our API Definition.
 from ..core.pydantic_utilities import UniversalBaseModel
-import typing_extensions
-from ..core.serialization import FieldMetadata
 import pydantic
-from .document_type import DocumentType
-import typing
+from .ingest_local_document_metadata import IngestLocalDocumentMetadata
 from ..core.pydantic_utilities import IS_PYDANTIC_V2
+import typing
 class IngestLocalDocument(UniversalBaseModel):
-    bucket_id: typing_extensions.Annotated[int, FieldMetadata(alias="bucketId")] = pydantic.Field()
-    """
-    the bucketId of the bucket which this local file will be ingested to.
-    """
-    file_data: typing_extensions.Annotated[str, FieldMetadata(alias="fileData")] = pydantic.Field()
+    blob: str = pydantic.Field()
     """
-    Binary data for the file being ingested.
+    The binary file data being ingested.
     """
-    file_name: typing_extensions.Annotated[str, FieldMetadata(alias="fileName")] = pydantic.Field()
-    """
-    The name of the file being ingested
-    """
-    file_type: typing_extensions.Annotated[DocumentType, FieldMetadata(alias="fileType")]
-    search_data: typing_extensions.Annotated[
-        typing.Optional[typing.Dict[str, typing.Optional[typing.Any]]], FieldMetadata(alias="searchData")
-    ] = pydantic.Field(default=None)
-    """
-    Custom metadata which can be used to influence GroundX's search functionality. This data can be used to further hone GroundX search.
-    """
+    metadata: IngestLocalDocumentMetadata
     if IS_PYDANTIC_V2:
         model_config: typing.ClassVar[pydantic.ConfigDict] = pydantic.ConfigDict(extra="allow", frozen=True)  # type: ignore # Pydantic v2

groundx/types/ingest_local_document_metadata.py ADDED Viewed

@@ -0,0 +1,42 @@
+# This file was auto-generated by Fern from our API Definition.
+from ..core.pydantic_utilities import UniversalBaseModel
+import typing_extensions
+import typing
+from ..core.serialization import FieldMetadata
+import pydantic
+from .document_type import DocumentType
+from ..core.pydantic_utilities import IS_PYDANTIC_V2
+class IngestLocalDocumentMetadata(UniversalBaseModel):
+    bucket_id: typing_extensions.Annotated[typing.Optional[int], FieldMetadata(alias="bucketId")] = pydantic.Field(
+        default=None
+    )
+    """
+    The bucketId of the bucket which this local file will be ingested into.
+    """
+    file_name: typing_extensions.Annotated[typing.Optional[str], FieldMetadata(alias="fileName")] = pydantic.Field(
+        default=None
+    )
+    """
+    The name of the file being ingested
+    """
+    file_type: typing_extensions.Annotated[typing.Optional[DocumentType], FieldMetadata(alias="fileType")] = None
+    search_data: typing_extensions.Annotated[
+        typing.Optional[typing.Dict[str, typing.Optional[typing.Any]]], FieldMetadata(alias="searchData")
+    ] = pydantic.Field(default=None)
+    """
+    Custom metadata which can be used to influence GroundX's search functionality. This data can be used to further hone GroundX search.
+    """
+    if IS_PYDANTIC_V2:
+        model_config: typing.ClassVar[pydantic.ConfigDict] = pydantic.ConfigDict(extra="allow", frozen=True)  # type: ignore # Pydantic v2
+    else:
+        class Config:
+            frozen = True
+            smart_union = True
+            extra = pydantic.Extra.allow

groundx/types/ingest_remote_document.py CHANGED Viewed

@@ -12,7 +12,7 @@ from ..core.pydantic_utilities import IS_PYDANTIC_V2
 class IngestRemoteDocument(UniversalBaseModel):
     bucket_id: typing_extensions.Annotated[int, FieldMetadata(alias="bucketId")] = pydantic.Field()
     """
-    the bucketId of the bucket which this remote file will be ingested to.
+    The bucketId of the bucket which this remote file will be ingested into.
     """
     file_name: typing_extensions.Annotated[typing.Optional[str], FieldMetadata(alias="fileName")] = pydantic.Field(

groundx/types/website_source.py CHANGED Viewed

@@ -11,7 +11,7 @@ from ..core.pydantic_utilities import IS_PYDANTIC_V2
 class WebsiteSource(UniversalBaseModel):
     bucket_id: typing_extensions.Annotated[int, FieldMetadata(alias="bucketId")] = pydantic.Field()
     """
-    the bucketId of the bucket which this website will be ingested to.
+    The bucketId of the bucket which this website will be ingested into.
     """
     cap: typing.Optional[int] = pydantic.Field(default=None)

{groundx-2.0.20.dist-info → groundx-2.0.29.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.1
 Name: groundx
-Version: 2.0.20
+Version: 2.0.29
 Summary:
 License: MIT
 Requires-Python: >=3.8,<4.0
@@ -20,18 +20,20 @@ Classifier: Programming Language :: Python :: 3.11
 Classifier: Programming Language :: Python :: 3.12
 Classifier: Topic :: Software Development :: Libraries :: Python Modules
 Classifier: Typing :: Typed
+Requires-Dist: aiohttp (>=3.8.0)
 Requires-Dist: httpx (>=0.21.2)
 Requires-Dist: pydantic (>=1.9.2)
 Requires-Dist: pydantic-core (>=2.18.2,<3.0.0)
+Requires-Dist: requests (>=2.4.0)
 Requires-Dist: typing_extensions (>=4.0.0)
 Description-Content-Type: text/markdown
-# Eyelevel Python Library
+# GroundX Python Library
 [![fern shield](https://img.shields.io/badge/%F0%9F%8C%BF-Built%20with%20Fern-brightgreen)](https://buildwithfern.com?utm_source=github&utm_medium=github&utm_campaign=readme&utm_source=https%3A%2F%2Fgithub.com%2Feyelevelai%2Fgroundx-python)
 [![pypi](https://img.shields.io/pypi/v/groundx)](https://pypi.python.org/pypi/groundx)
-The Eyelevel Python library provides convenient access to the Eyelevel API from Python.
+The GroundX Python library provides convenient access to the GroundX API from Python.
 ## Documentation
@@ -57,21 +59,15 @@ from groundx import Document, GroundX
 client = GroundX(
     api_key="YOUR_API_KEY",
 )
-client.documents.ingest(
+client.ingest(
     documents=[
         Document(
             bucket_id=1234,
             file_name="my_file1.txt",
-            file_path="https://my.source.url.com/file1.txt",
             file_type="txt",
-            search_data={"key": "value"},
-        ),
-        Document(
-            bucket_id=1234,
-            file_name="my_file2.pdf",
-            file_path="/local/path/file2.pdf",
-            file_type="pdf",
-        ),
+            source_url="https://my.source.url.com/file1.txt",
+        )
     ],
 )
 ```
@@ -89,27 +85,18 @@ client = AsyncGroundX(
     api_key="YOUR_API_KEY",
 )
 async def main() -> None:
-    await client.documents.ingest(
+    await client.ingest(
         documents=[
             Document(
                 bucket_id=1234,
                 file_name="my_file1.txt",
-                file_path="https://my.source.url.com/file1.txt",
                 file_type="txt",
-                search_data={"key": "value"},
-            ),
-            Document(
-                bucket_id=1234,
-                file_name="my_file2.pdf",
-                file_path="/local/path/file2.pdf",
-                file_type="pdf",
-            ),
+                source_url="https://my.source.url.com/file1.txt",
+            )
         ],
     )
 asyncio.run(main())
 ```
@@ -122,7 +109,7 @@ will be thrown.
 from groundx.core.api_error import ApiError
 try:
-    client.documents.ingest(...)
+    client.ingest(...)
 except ApiError as e:
     print(e.status_code)
     print(e.body)
@@ -145,7 +132,7 @@ A request is deemed retriable when any of the following HTTP status codes is ret
 Use the `max_retries` request option to configure this behavior.
 ```python
-client.documents.ingest(..., request_options={
+client.ingest(..., request_options={
     "max_retries": 1
 })
 ```
@@ -165,7 +152,7 @@ client = GroundX(
 # Override timeout for a specific method
-client.documents.ingest(..., request_options={
+client.ingest(..., request_options={
     "timeout_in_seconds": 1
 })
 ```

groundx 2.0.20__py3-none-any.whl → 2.0.29__py3-none-any.whl

groundx 2.0.20py3-none-any.whl → 2.0.29py3-none-any.whl