PyPI - robosystems-client - Versions diffs - 0.2.2__py3-none-any.whl → 0.2.3__py3-none-any.whl - Mend

robosystems-client 0.2.2py3-none-any.whl → 0.2.3py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of robosystems-client might be problematic. Click here for more details.

Files changed (34) hide show

robosystems_client/api/tables/update_file_status.py ADDED Viewed

@@ -0,0 +1,539 @@
+from http import HTTPStatus
+from typing import Any, Optional, Union, cast
+import httpx
+from ... import errors
+from ...client import AuthenticatedClient, Client
+from ...models.error_response import ErrorResponse
+from ...models.file_status_update import FileStatusUpdate
+from ...models.http_validation_error import HTTPValidationError
+from ...models.update_file_status_response_updatefilestatus import (
+  UpdateFileStatusResponseUpdatefilestatus,
+)
+from ...types import UNSET, Response, Unset
+def _get_kwargs(
+  graph_id: str,
+  file_id: str,
+  *,
+  body: FileStatusUpdate,
+  token: Union[None, Unset, str] = UNSET,
+  authorization: Union[None, Unset, str] = UNSET,
+) -> dict[str, Any]:
+  headers: dict[str, Any] = {}
+  if not isinstance(authorization, Unset):
+    headers["authorization"] = authorization
+  params: dict[str, Any] = {}
+  json_token: Union[None, Unset, str]
+  if isinstance(token, Unset):
+    json_token = UNSET
+  else:
+    json_token = token
+  params["token"] = json_token
+  params = {k: v for k, v in params.items() if v is not UNSET and v is not None}
+  _kwargs: dict[str, Any] = {
+    "method": "patch",
+    "url": f"/v1/graphs/{graph_id}/tables/files/{file_id}",
+    "params": params,
+  }
+  _kwargs["json"] = body.to_dict()
+  headers["Content-Type"] = "application/json"
+  _kwargs["headers"] = headers
+  return _kwargs
+def _parse_response(
+  *, client: Union[AuthenticatedClient, Client], response: httpx.Response
+) -> Optional[
+  Union[
+    Any, ErrorResponse, HTTPValidationError, UpdateFileStatusResponseUpdatefilestatus
+  ]
+]:
+  if response.status_code == 200:
+    response_200 = UpdateFileStatusResponseUpdatefilestatus.from_dict(response.json())
+    return response_200
+  if response.status_code == 400:
+    response_400 = ErrorResponse.from_dict(response.json())
+    return response_400
+  if response.status_code == 401:
+    response_401 = cast(Any, None)
+    return response_401
+  if response.status_code == 403:
+    response_403 = ErrorResponse.from_dict(response.json())
+    return response_403
+  if response.status_code == 404:
+    response_404 = ErrorResponse.from_dict(response.json())
+    return response_404
+  if response.status_code == 413:
+    response_413 = ErrorResponse.from_dict(response.json())
+    return response_413
+  if response.status_code == 422:
+    response_422 = HTTPValidationError.from_dict(response.json())
+    return response_422
+  if response.status_code == 500:
+    response_500 = cast(Any, None)
+    return response_500
+  if client.raise_on_unexpected_status:
+    raise errors.UnexpectedStatus(response.status_code, response.content)
+  else:
+    return None
+def _build_response(
+  *, client: Union[AuthenticatedClient, Client], response: httpx.Response
+) -> Response[
+  Union[
+    Any, ErrorResponse, HTTPValidationError, UpdateFileStatusResponseUpdatefilestatus
+  ]
+]:
+  return Response(
+    status_code=HTTPStatus(response.status_code),
+    content=response.content,
+    headers=response.headers,
+    parsed=_parse_response(client=client, response=response),
+  )
+def sync_detailed(
+  graph_id: str,
+  file_id: str,
+  *,
+  client: AuthenticatedClient,
+  body: FileStatusUpdate,
+  token: Union[None, Unset, str] = UNSET,
+  authorization: Union[None, Unset, str] = UNSET,
+) -> Response[
+  Union[
+    Any, ErrorResponse, HTTPValidationError, UpdateFileStatusResponseUpdatefilestatus
+  ]
+]:
+  r""" Update File Upload Status
+     Update file status after upload completes.
+    **Purpose:**
+    Mark files as uploaded after successful S3 upload. The backend validates
+    the file, calculates size and row count, enforces storage limits, and
+    registers the DuckDB table for queries.
+    **Status Values:**
+    - `uploaded`: File successfully uploaded to S3 (triggers validation)
+    - `disabled`: Exclude file from ingestion
+    - `archived`: Soft delete file
+    **What Happens on 'uploaded' Status:**
+    1. Verify file exists in S3
+    2. Calculate actual file size
+    3. Enforce tier storage limits
+    4. Calculate or estimate row count
+    5. Update table statistics
+    6. Register DuckDB external table
+    7. File ready for ingestion
+    **Row Count Calculation:**
+    - **Parquet**: Exact count from file metadata
+    - **CSV**: Count rows (minus header)
+    - **JSON**: Count array elements
+    - **Fallback**: Estimate from file size if reading fails
+    **Storage Limits:**
+    Enforced per subscription tier:
+    - Prevents uploads exceeding tier limit
+    - Returns HTTP 413 if limit exceeded
+    - Check current usage before large uploads
+    **Example Response:**
+    ```json
+    {
+      \"status\": \"success\",
+      \"file_id\": \"f123\",
+      \"upload_status\": \"uploaded\",
+      \"file_size_bytes\": 1048576,
+      \"row_count\": 5000,
+      \"message\": \"File validated and ready for ingestion\"
+    }
+    ```
+    **Example Usage:**
+    ```bash
+    # After uploading file to S3 presigned URL
+    curl -X PATCH \"https://api.robosystems.ai/v1/graphs/kg123/tables/files/f123\" \
+      -H \"Authorization: Bearer YOUR_TOKEN\" \
+      -H \"Content-Type: application/json\" \
+      -d '{\"status\": \"uploaded\"}'
+    ```
+    **Tips:**
+    - Always call this after S3 upload completes
+    - Check response for actual row count
+    - Storage limit errors (413) mean tier upgrade needed
+    - DuckDB registration failures are non-fatal (retried later)
+    **Note:**
+    Status updates are included - no credit consumption.
+    Args:
+        graph_id (str): Graph database identifier
+        file_id (str): File identifier
+        token (Union[None, Unset, str]): JWT token for SSE authentication
+        authorization (Union[None, Unset, str]):
+        body (FileStatusUpdate):
+    Raises:
+        errors.UnexpectedStatus: If the server returns an undocumented status code and Client.raise_on_unexpected_status is True.
+        httpx.TimeoutException: If the request takes longer than Client.timeout.
+    Returns:
+        Response[Union[Any, ErrorResponse, HTTPValidationError, UpdateFileStatusResponseUpdatefilestatus]]
+     """
+  kwargs = _get_kwargs(
+    graph_id=graph_id,
+    file_id=file_id,
+    body=body,
+    token=token,
+    authorization=authorization,
+  )
+  response = client.get_httpx_client().request(
+    **kwargs,
+  )
+  return _build_response(client=client, response=response)
+def sync(
+  graph_id: str,
+  file_id: str,
+  *,
+  client: AuthenticatedClient,
+  body: FileStatusUpdate,
+  token: Union[None, Unset, str] = UNSET,
+  authorization: Union[None, Unset, str] = UNSET,
+) -> Optional[
+  Union[
+    Any, ErrorResponse, HTTPValidationError, UpdateFileStatusResponseUpdatefilestatus
+  ]
+]:
+  r""" Update File Upload Status
+     Update file status after upload completes.
+    **Purpose:**
+    Mark files as uploaded after successful S3 upload. The backend validates
+    the file, calculates size and row count, enforces storage limits, and
+    registers the DuckDB table for queries.
+    **Status Values:**
+    - `uploaded`: File successfully uploaded to S3 (triggers validation)
+    - `disabled`: Exclude file from ingestion
+    - `archived`: Soft delete file
+    **What Happens on 'uploaded' Status:**
+    1. Verify file exists in S3
+    2. Calculate actual file size
+    3. Enforce tier storage limits
+    4. Calculate or estimate row count
+    5. Update table statistics
+    6. Register DuckDB external table
+    7. File ready for ingestion
+    **Row Count Calculation:**
+    - **Parquet**: Exact count from file metadata
+    - **CSV**: Count rows (minus header)
+    - **JSON**: Count array elements
+    - **Fallback**: Estimate from file size if reading fails
+    **Storage Limits:**
+    Enforced per subscription tier:
+    - Prevents uploads exceeding tier limit
+    - Returns HTTP 413 if limit exceeded
+    - Check current usage before large uploads
+    **Example Response:**
+    ```json
+    {
+      \"status\": \"success\",
+      \"file_id\": \"f123\",
+      \"upload_status\": \"uploaded\",
+      \"file_size_bytes\": 1048576,
+      \"row_count\": 5000,
+      \"message\": \"File validated and ready for ingestion\"
+    }
+    ```
+    **Example Usage:**
+    ```bash
+    # After uploading file to S3 presigned URL
+    curl -X PATCH \"https://api.robosystems.ai/v1/graphs/kg123/tables/files/f123\" \
+      -H \"Authorization: Bearer YOUR_TOKEN\" \
+      -H \"Content-Type: application/json\" \
+      -d '{\"status\": \"uploaded\"}'
+    ```
+    **Tips:**
+    - Always call this after S3 upload completes
+    - Check response for actual row count
+    - Storage limit errors (413) mean tier upgrade needed
+    - DuckDB registration failures are non-fatal (retried later)
+    **Note:**
+    Status updates are included - no credit consumption.
+    Args:
+        graph_id (str): Graph database identifier
+        file_id (str): File identifier
+        token (Union[None, Unset, str]): JWT token for SSE authentication
+        authorization (Union[None, Unset, str]):
+        body (FileStatusUpdate):
+    Raises:
+        errors.UnexpectedStatus: If the server returns an undocumented status code and Client.raise_on_unexpected_status is True.
+        httpx.TimeoutException: If the request takes longer than Client.timeout.
+    Returns:
+        Union[Any, ErrorResponse, HTTPValidationError, UpdateFileStatusResponseUpdatefilestatus]
+     """
+  return sync_detailed(
+    graph_id=graph_id,
+    file_id=file_id,
+    client=client,
+    body=body,
+    token=token,
+    authorization=authorization,
+  ).parsed
+async def asyncio_detailed(
+  graph_id: str,
+  file_id: str,
+  *,
+  client: AuthenticatedClient,
+  body: FileStatusUpdate,
+  token: Union[None, Unset, str] = UNSET,
+  authorization: Union[None, Unset, str] = UNSET,
+) -> Response[
+  Union[
+    Any, ErrorResponse, HTTPValidationError, UpdateFileStatusResponseUpdatefilestatus
+  ]
+]:
+  r""" Update File Upload Status
+     Update file status after upload completes.
+    **Purpose:**
+    Mark files as uploaded after successful S3 upload. The backend validates
+    the file, calculates size and row count, enforces storage limits, and
+    registers the DuckDB table for queries.
+    **Status Values:**
+    - `uploaded`: File successfully uploaded to S3 (triggers validation)
+    - `disabled`: Exclude file from ingestion
+    - `archived`: Soft delete file
+    **What Happens on 'uploaded' Status:**
+    1. Verify file exists in S3
+    2. Calculate actual file size
+    3. Enforce tier storage limits
+    4. Calculate or estimate row count
+    5. Update table statistics
+    6. Register DuckDB external table
+    7. File ready for ingestion
+    **Row Count Calculation:**
+    - **Parquet**: Exact count from file metadata
+    - **CSV**: Count rows (minus header)
+    - **JSON**: Count array elements
+    - **Fallback**: Estimate from file size if reading fails
+    **Storage Limits:**
+    Enforced per subscription tier:
+    - Prevents uploads exceeding tier limit
+    - Returns HTTP 413 if limit exceeded
+    - Check current usage before large uploads
+    **Example Response:**
+    ```json
+    {
+      \"status\": \"success\",
+      \"file_id\": \"f123\",
+      \"upload_status\": \"uploaded\",
+      \"file_size_bytes\": 1048576,
+      \"row_count\": 5000,
+      \"message\": \"File validated and ready for ingestion\"
+    }
+    ```
+    **Example Usage:**
+    ```bash
+    # After uploading file to S3 presigned URL
+    curl -X PATCH \"https://api.robosystems.ai/v1/graphs/kg123/tables/files/f123\" \
+      -H \"Authorization: Bearer YOUR_TOKEN\" \
+      -H \"Content-Type: application/json\" \
+      -d '{\"status\": \"uploaded\"}'
+    ```
+    **Tips:**
+    - Always call this after S3 upload completes
+    - Check response for actual row count
+    - Storage limit errors (413) mean tier upgrade needed
+    - DuckDB registration failures are non-fatal (retried later)
+    **Note:**
+    Status updates are included - no credit consumption.
+    Args:
+        graph_id (str): Graph database identifier
+        file_id (str): File identifier
+        token (Union[None, Unset, str]): JWT token for SSE authentication
+        authorization (Union[None, Unset, str]):
+        body (FileStatusUpdate):
+    Raises:
+        errors.UnexpectedStatus: If the server returns an undocumented status code and Client.raise_on_unexpected_status is True.
+        httpx.TimeoutException: If the request takes longer than Client.timeout.
+    Returns:
+        Response[Union[Any, ErrorResponse, HTTPValidationError, UpdateFileStatusResponseUpdatefilestatus]]
+     """
+  kwargs = _get_kwargs(
+    graph_id=graph_id,
+    file_id=file_id,
+    body=body,
+    token=token,
+    authorization=authorization,
+  )
+  response = await client.get_async_httpx_client().request(**kwargs)
+  return _build_response(client=client, response=response)
+async def asyncio(
+  graph_id: str,
+  file_id: str,
+  *,
+  client: AuthenticatedClient,
+  body: FileStatusUpdate,
+  token: Union[None, Unset, str] = UNSET,
+  authorization: Union[None, Unset, str] = UNSET,
+) -> Optional[
+  Union[
+    Any, ErrorResponse, HTTPValidationError, UpdateFileStatusResponseUpdatefilestatus
+  ]
+]:
+  r""" Update File Upload Status
+     Update file status after upload completes.
+    **Purpose:**
+    Mark files as uploaded after successful S3 upload. The backend validates
+    the file, calculates size and row count, enforces storage limits, and
+    registers the DuckDB table for queries.
+    **Status Values:**
+    - `uploaded`: File successfully uploaded to S3 (triggers validation)
+    - `disabled`: Exclude file from ingestion
+    - `archived`: Soft delete file
+    **What Happens on 'uploaded' Status:**
+    1. Verify file exists in S3
+    2. Calculate actual file size
+    3. Enforce tier storage limits
+    4. Calculate or estimate row count
+    5. Update table statistics
+    6. Register DuckDB external table
+    7. File ready for ingestion
+    **Row Count Calculation:**
+    - **Parquet**: Exact count from file metadata
+    - **CSV**: Count rows (minus header)
+    - **JSON**: Count array elements
+    - **Fallback**: Estimate from file size if reading fails
+    **Storage Limits:**
+    Enforced per subscription tier:
+    - Prevents uploads exceeding tier limit
+    - Returns HTTP 413 if limit exceeded
+    - Check current usage before large uploads
+    **Example Response:**
+    ```json
+    {
+      \"status\": \"success\",
+      \"file_id\": \"f123\",
+      \"upload_status\": \"uploaded\",
+      \"file_size_bytes\": 1048576,
+      \"row_count\": 5000,
+      \"message\": \"File validated and ready for ingestion\"
+    }
+    ```
+    **Example Usage:**
+    ```bash
+    # After uploading file to S3 presigned URL
+    curl -X PATCH \"https://api.robosystems.ai/v1/graphs/kg123/tables/files/f123\" \
+      -H \"Authorization: Bearer YOUR_TOKEN\" \
+      -H \"Content-Type: application/json\" \
+      -d '{\"status\": \"uploaded\"}'
+    ```
+    **Tips:**
+    - Always call this after S3 upload completes
+    - Check response for actual row count
+    - Storage limit errors (413) mean tier upgrade needed
+    - DuckDB registration failures are non-fatal (retried later)
+    **Note:**
+    Status updates are included - no credit consumption.
+    Args:
+        graph_id (str): Graph database identifier
+        file_id (str): File identifier
+        token (Union[None, Unset, str]): JWT token for SSE authentication
+        authorization (Union[None, Unset, str]):
+        body (FileStatusUpdate):
+    Raises:
+        errors.UnexpectedStatus: If the server returns an undocumented status code and Client.raise_on_unexpected_status is True.
+        httpx.TimeoutException: If the request takes longer than Client.timeout.
+    Returns:
+        Union[Any, ErrorResponse, HTTPValidationError, UpdateFileStatusResponseUpdatefilestatus]
+     """
+  return (
+    await asyncio_detailed(
+      graph_id=graph_id,
+      file_id=file_id,
+      client=client,
+      body=body,
+      token=token,
+      authorization=authorization,
+    )
+  ).parsed

robosystems_client/extensions/graph_client.py CHANGED Viewed

@@ -58,6 +58,7 @@ class GraphClient:
     self,
     metadata: GraphMetadata,
     initial_entity: Optional[InitialEntityData] = None,
+    create_entity: bool = True,
     timeout: int = 60,
     poll_interval: int = 2,
     on_progress: Optional[Callable[[str], None]] = None,
@@ -68,6 +69,9 @@ class GraphClient:
     Args:
         metadata: Graph metadata
         initial_entity: Optional initial entity data
+        create_entity: Whether to create the entity node and upload initial data.
+            Only applies when initial_entity is provided. Set to False to create
+            graph without populating entity data (useful for file-based ingestion).
         timeout: Maximum time to wait in seconds
         poll_interval: Time between status checks in seconds
         on_progress: Callback for progress updates
@@ -121,6 +125,7 @@ class GraphClient:
     graph_create = CreateGraphRequest(
       metadata=api_metadata,
       initial_entity=initial_entity_dict,
+      create_entity=create_entity,
     )
     if on_progress:

robosystems_client/extensions/table_ingest_client.py CHANGED Viewed

@@ -11,20 +11,20 @@ import json
 import logging
 import httpx
-from ..api.tables.get_upload_url_v1_graphs_graph_id_tables_table_name_files_post import (
+from ..api.tables.get_upload_url import (
   sync_detailed as get_upload_url,
 )
-from ..api.tables.update_file_v1_graphs_graph_id_tables_files_file_id_patch import (
-  sync_detailed as update_file,
+from ..api.tables.update_file_status import (
+  sync_detailed as update_file_status,
 )
-from ..api.tables.list_tables_v1_graphs_graph_id_tables_get import (
+from ..api.tables.list_tables import (
   sync_detailed as list_tables,
 )
-from ..api.tables.ingest_tables_v1_graphs_graph_id_tables_ingest_post import (
+from ..api.tables.ingest_tables import (
   sync_detailed as ingest_tables,
 )
 from ..models.file_upload_request import FileUploadRequest
-from ..models.file_update_request import FileUpdateRequest
+from ..models.file_status_update import FileStatusUpdate
 from ..models.bulk_ingest_request import BulkIngestRequest
 logger = logging.getLogger(__name__)
@@ -95,7 +95,7 @@ class TableIngestClient:
     This method handles the complete 3-step upload process:
     1. Get presigned upload URL
     2. Upload file to S3
-    3. Update file metadata
+    3. Mark file as 'uploaded' (backend validates, calculates size/row count)
     Args:
         graph_id: The graph ID
@@ -104,7 +104,7 @@ class TableIngestClient:
         options: Upload options
     Returns:
-        UploadResult with upload details
+        UploadResult with upload details (size/row count calculated by backend)
     """
     if options is None:
       options = UploadOptions()
@@ -216,12 +216,10 @@ class TableIngestClient:
           # BinaryIO or file-like object
           file_or_buffer.seek(0)
           file_content = file_or_buffer.read()
-        file_size = len(file_content)
       else:
         # Read from file path
         with open(file_path, "rb") as f:
           file_content = f.read()
-        file_size = len(file_content)
       s3_response = self._http_client.put(
         upload_url,
@@ -230,54 +228,47 @@ class TableIngestClient:
       )
       s3_response.raise_for_status()
-      # Step 3: Get row count and update file metadata
+      # Step 3: Mark file as uploaded (backend validates and calculates size/row count)
       if options.on_progress:
-        options.on_progress(f"Updating file metadata for {file_name}...")
+        options.on_progress(f"Marking {file_name} as uploaded...")
-      try:
-        import pyarrow.parquet as pq
-        if is_buffer:
-          # Read from buffer for row count
-          if hasattr(file_or_buffer, "seek"):
-            file_or_buffer.seek(0)
-          parquet_table = pq.read_table(file_or_buffer)
-        else:
-          # Read from file path
-          parquet_table = pq.read_table(file_path)
-        row_count = parquet_table.num_rows
-      except ImportError:
-        logger.warning(
-          "pyarrow not installed, row count will be estimated from file size"
-        )
-        # Rough estimate: ~100 bytes per row for typical data
-        row_count = file_size // 100
-      metadata_update = FileUpdateRequest(
-        file_size_bytes=file_size, row_count=row_count
-      )
+      status_update = FileStatusUpdate(status="uploaded")
       kwargs = {
         "graph_id": graph_id,
         "file_id": file_id,
         "client": client,
-        "body": metadata_update,
+        "body": status_update,
       }
-      update_response = update_file(**kwargs)
+      update_response = update_file_status(**kwargs)
       if not update_response.parsed:
+        logger.error(
+          f"No parsed response from update_file_status. Status code: {update_response.status_code}"
+        )
         return UploadResult(
           file_id=file_id,
-          file_size=file_size,
-          row_count=row_count,
+          file_size=0,
+          row_count=0,
           table_name=table_name,
           file_name=file_name,
           success=False,
-          error="Failed to update file metadata",
+          error="Failed to complete file upload",
         )
+      response_data = update_response.parsed
+      if isinstance(response_data, dict):
+        file_size = response_data.get("file_size_bytes", 0)
+        row_count = response_data.get("row_count", 0)
+      elif hasattr(response_data, "additional_properties"):
+        file_size = response_data.additional_properties.get("file_size_bytes", 0)
+        row_count = response_data.additional_properties.get("row_count", 0)
+      else:
+        file_size = getattr(response_data, "file_size_bytes", 0)
+        row_count = getattr(response_data, "row_count", 0)
       if options.on_progress:
         options.on_progress(
           f"✅ Uploaded {file_name} ({file_size:,} bytes, {row_count:,} rows)"

robosystems-client 0.2.2__py3-none-any.whl → 0.2.3__py3-none-any.whl

Potentially problematic release.

robosystems-client 0.2.2py3-none-any.whl → 0.2.3py3-none-any.whl