PyPI - discovery-engine-api - Versions diffs - 0.2.98__tar.gz → 0.2.100__tar.gz - Mend

discovery-engine-api 0.2.98tar.gz → 0.2.100tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (11) hide show

{discovery_engine_api-0.2.98 → discovery_engine_api-0.2.100}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: discovery-engine-api
-Version: 0.2.98
+Version: 0.2.100
 Summary: Python SDK for Disco API
 Project-URL: Homepage, https://www.leap-labs.com
 Project-URL: Documentation, https://disco.leap-labs.com/llms-full.txt
@@ -248,7 +248,6 @@ estimate = await engine.estimate(
 )
 # estimate["cost"]["credits"]               -> 55
 # estimate["cost"]["price_usd"]             -> 5.5
-# estimate["time_estimate"]["estimated_seconds"] -> 360
 # estimate["account"]["sufficient"]         -> True/False
 # estimate["limits"]["max_analysis_depth"]  -> 23  (num_columns - 2)
 ```
@@ -299,7 +298,6 @@ class EngineResult:
     queue_position: int | None                     # Position in queue when pending (1 = next up)
     current_step: str | None                       # Active pipeline step (preprocessing, training, interpreting, reporting)
     current_step_message: str | None               # Human-readable description of the current step
-    estimated_seconds: int | None                  # Estimated total processing time in seconds
     estimated_wait_seconds: int | None             # Estimated queue wait time in seconds (pending only)
     error_message: str | None
     report_url: str | None                         # Shareable link to interactive web report

{discovery_engine_api-0.2.98 → discovery_engine_api-0.2.100}/README.md RENAMED Viewed

@@ -211,7 +211,6 @@ estimate = await engine.estimate(
 )
 # estimate["cost"]["credits"]               -> 55
 # estimate["cost"]["price_usd"]             -> 5.5
-# estimate["time_estimate"]["estimated_seconds"] -> 360
 # estimate["account"]["sufficient"]         -> True/False
 # estimate["limits"]["max_analysis_depth"]  -> 23  (num_columns - 2)
 ```
@@ -262,7 +261,6 @@ class EngineResult:
     queue_position: int | None                     # Position in queue when pending (1 = next up)
     current_step: str | None                       # Active pipeline step (preprocessing, training, interpreting, reporting)
     current_step_message: str | None               # Human-readable description of the current step
-    estimated_seconds: int | None                  # Estimated total processing time in seconds
     estimated_wait_seconds: int | None             # Estimated queue wait time in seconds (pending only)
     error_message: str | None
     report_url: str | None                         # Shareable link to interactive web report

{discovery_engine_api-0.2.98 → discovery_engine_api-0.2.100}/discovery/__init__.py RENAMED Viewed

@@ -1,6 +1,6 @@
 """Disco Python SDK."""
-__version__ = "0.2.98"
+__version__ = "0.2.100"
 from discovery.client import Engine
 from discovery.types import (

{discovery_engine_api-0.2.98 → discovery_engine_api-0.2.100}/discovery/client.py RENAMED Viewed

@@ -157,9 +157,6 @@ class Engine:
         the code interactively, then provisions the account and returns a
         configured Engine with a ``disco_`` API key.
-        If the email service is unavailable, falls back to direct provisioning
-        and returns immediately (no code required).
         Args:
             email: Email address for the new account.
             name: Display name (optional — defaults to email local part).
@@ -180,7 +177,7 @@ class Engine:
             cls._raise_for_status(response)
             data = response.json()
-        # Direct provisioning fallback (Resend unavailable) — already have the key
+        # If the server returned a key directly, use it
         if data.get("key"):
             engine = cls(api_key=data["key"], quiet=quiet)
             if not quiet:
@@ -284,7 +281,6 @@ class Engine:
         This is the primary method. It uploads data, submits the analysis,
         polls for completion, and returns structured results — all in one call.
-        Runs typically take 3-15 minutes.
         Args:
             file: File path, Path object, or pandas DataFrame.
@@ -440,12 +436,11 @@ class Engine:
         self,
         file_size_mb: float,
         num_columns: int,
-        num_rows: Optional[int] = None,
         analysis_depth: int = 2,
         visibility: str = "public",
         use_llms: bool = False,
     ) -> Dict[str, Any]:
-        """Estimate cost and time for an analysis run.
+        """Estimate the credit cost for an analysis run.
         Works with or without authentication. If authenticated, the response
         includes your current credit balance and whether you have enough.
@@ -453,7 +448,6 @@ class Engine:
         Args:
             file_size_mb: Size of the data file in megabytes.
             num_columns: Number of columns in the dataset.
-            num_rows: Number of rows (improves time estimate accuracy).
             analysis_depth: Depth iterations (1=fast, higher=deeper).
             visibility: "public" (free, results published) or "private" (costs credits).
             use_llms: Slower and more expensive, but you get smarter pre-processing,
@@ -461,7 +455,7 @@ class Engine:
                 always use LLMs.
         Returns:
-            Dict with ``cost``, ``time_estimate``, ``limits``, and ``account`` info.
+            Dict with ``cost``, ``limits``, and ``account`` info.
         """
         client = await self._get_dashboard_client()
         response = await client.post(
@@ -469,7 +463,6 @@ class Engine:
             json={
                 "file_size_mb": file_size_mb,
                 "num_columns": num_columns,
-                "num_rows": num_rows,
                 "analysis_depth": analysis_depth,
                 "visibility": visibility,
                 "use_llms": use_llms,
@@ -516,7 +509,6 @@ class Engine:
             current_step_message=data.get("current_step", {}).get("message")
             if data.get("current_step")
             else None,
-            estimated_seconds=data.get("estimated_seconds"),
             estimated_wait_seconds=data.get("estimated_wait_seconds"),
             error_message=data.get("error_message"),
         )
@@ -600,14 +592,7 @@ class Engine:
                             else ""
                         )
                         step_str = f" ({result.current_step}{msg})"
-                    eta_str = ""
-                    if result.estimated_seconds is not None and elapsed > 0:
-                        remaining = max(0, result.estimated_seconds - elapsed)
-                        if remaining > 0:
-                            eta_str = f" | ETA: ~{max(1, round(remaining / 60))} min"
-                    status_msg = (
-                        f"Status: {result.status}{step_str} | Elapsed: {elapsed:.1f}s{eta_str}"
-                    )
+                    status_msg = f"Status: {result.status}{step_str} | Elapsed: {elapsed:.1f}s"
                 self._log(f"  {status_msg}")
             last_status = result.status
@@ -638,15 +623,45 @@ class Engine:
     # File upload
     # ------------------------------------------------------------------
+    # 8 MB chunks — large enough to amortize syscall + TLS overhead, small
+    # enough that memory stays bounded regardless of file size.
+    _UPLOAD_CHUNK_SIZE = 8 * 1024 * 1024
+    @staticmethod
+    async def _stream_file_chunks(path: Path, chunk_size: int):
+        """Yield file contents in fixed-size chunks for streaming uploads.
+        Reads from disk via run_in_executor so the event loop isn't blocked
+        on the read syscall. Memory stays bounded to one chunk regardless
+        of file size — required for multi-GB uploads that previously OOMed
+        or hit ``_ssl.c:2426`` when passed as a single bytes object.
+        """
+        loop = asyncio.get_event_loop()
+        with path.open("rb") as f:
+            while True:
+                chunk = await loop.run_in_executor(None, f.read, chunk_size)
+                if not chunk:
+                    break
+                yield chunk
     async def _presign_and_upload(
         self,
-        file_content: bytes,
+        file_source: Union[bytes, Path],
         filename: str,
         mime_type: str,
     ) -> Tuple[Dict[str, Any], Dict[str, Any]]:
-        """Upload a file using presigned URL (3-step: presign, upload, finalize)."""
+        """Upload a file using presigned URL (3-step: presign, upload, finalize).
+        ``file_source`` may be either an in-memory ``bytes`` object (used for
+        DataFrame uploads) or a ``Path`` to a file on disk. Path inputs are
+        streamed chunk-by-chunk so memory stays bounded for multi-GB files.
+        """
         dashboard_client = await self._get_dashboard_client()
-        file_size = len(file_content)
+        if isinstance(file_source, Path):
+            file_size = file_source.stat().st_size
+        else:
+            file_size = len(file_source)
         presign_response = await dashboard_client.post(
             "/api/data/upload/presign",
@@ -670,11 +685,22 @@ class Engine:
         key = presign_data["key"]
         upload_token = presign_data["uploadToken"]
+        # GCS XML API does not accept chunked transfer encoding on PUT, so
+        # we must always send an explicit Content-Length. For Path inputs we
+        # stream from disk; for bytes we let httpx send the buffer directly.
+        if isinstance(file_source, Path):
+            upload_content = self._stream_file_chunks(file_source, self._UPLOAD_CHUNK_SIZE)
+        else:
+            upload_content = file_source
         async with httpx.AsyncClient(timeout=self._TIMEOUT) as upload_client:
             upload_response = await upload_client.put(
                 upload_url,
-                content=file_content,
-                headers={"Content-Type": mime_type},
+                content=upload_content,
+                headers={
+                    "Content-Type": mime_type,
+                    "Content-Length": str(file_size),
+                },
             )
             if upload_response.status_code >= 400:
                 raise ValueError(
@@ -702,13 +728,13 @@ class Engine:
     async def _upload_file_direct(
         self,
-        file_content: bytes,
+        file_source: Union[bytes, Path],
         filename: str,
         mime_type: str,
     ) -> Dict[str, Any]:
         """Upload a file using presigned URL. Returns finalize result."""
         _, finalize_data = await self._presign_and_upload(
-            file_content=file_content,
+            file_source=file_source,
             filename=filename,
             mime_type=mime_type,
         )
@@ -720,7 +746,14 @@ class Engine:
         filename: Optional[str] = None,
         title: Optional[str] = None,
         log: bool = False,
-    ) -> Tuple[bytes, str, str, float]:
+    ) -> Tuple[Union[bytes, Path], str, str, float]:
+        """Resolve an upload source into (source, filename, mime_type, size_mb).
+        Returns ``Path`` for file inputs (streamed in ``_presign_and_upload``)
+        and ``bytes`` for DataFrame inputs (kept in memory). DataFrame uploads
+        are bounded by available RAM; users with multi-GB DataFrames should
+        ``df.to_csv(path, index=False)`` and pass the path instead.
+        """
         if pd is not None and isinstance(file, pd.DataFrame):
             import io
@@ -728,8 +761,8 @@ class Engine:
                 self._log(f"Preparing DataFrame ({len(file)} rows, {len(file.columns)} columns)...")
             buffer = io.BytesIO()
             file.to_csv(buffer, index=False)
-            buffer.seek(0)
-            file_content = buffer.getvalue()
+            file_source: Union[bytes, Path] = buffer.getvalue()
+            file_size = len(file_source)
             resolved_filename = filename or ((title + ".csv") if title else "dataset.csv")
             mime_type = "text/csv"
         else:
@@ -738,7 +771,8 @@ class Engine:
                 raise FileNotFoundError(f"File not found: {file_path}")
             if log:
                 self._log(f"Reading file: {file_path.name}...")
-            file_content = file_path.read_bytes()
+            file_source = file_path
+            file_size = file_path.stat().st_size
             resolved_filename = filename or file_path.name
             _MIME_TYPES = {
                 ".csv": "text/csv",
@@ -752,10 +786,10 @@ class Engine:
             }
             mime_type = _MIME_TYPES.get(file_path.suffix.lower(), "text/csv")
-        file_size_mb = len(file_content) / (1024 * 1024)
+        file_size_mb = file_size / (1024 * 1024)
         if log:
             self._log(f"  File size: {file_size_mb:.2f} MB")
-        return file_content, resolved_filename, mime_type, file_size_mb
+        return file_source, resolved_filename, mime_type, file_size_mb
     # ------------------------------------------------------------------
     # File upload
@@ -776,13 +810,13 @@ class Engine:
             Dict with ``file`` (key, name, size, fileHash) and ``columns``
             (list of dicts with ``name``, ``type``, ``enabled``).
         """
-        file_content, filename, mime_type, _ = self._prepare_upload(
+        file_source, filename, mime_type, _ = self._prepare_upload(
             file=file,
             title=title,
             log=True,
         )
         self._log("  Uploading to storage...")
-        result = await self._upload_file_direct(file_content, filename, mime_type)
+        result = await self._upload_file_direct(file_source, filename, mime_type)
         if not result.get("ok"):
             errors = result.get("issues", {}).get("errors", [])
@@ -859,7 +893,7 @@ class Engine:
                 f"Creating run from pre-uploaded file (depth: {analysis_depth}, target: {target_column})..."
             )
         else:
-            file_content, filename, mime_type, _ = self._prepare_upload(
+            file_source, filename, mime_type, _ = self._prepare_upload(
                 file=file,
                 title=title,
                 log=True,
@@ -868,7 +902,7 @@ class Engine:
             # Step 1: Upload file
             self._log("  Uploading to storage...")
-            raw_result = await self._upload_file_direct(file_content, filename, mime_type)
+            raw_result = await self._upload_file_direct(file_source, filename, mime_type)
             if not raw_result.get("ok"):
                 errors = raw_result.get("issues", {}).get("errors", [])
@@ -1139,7 +1173,6 @@ class Engine:
             current_step_message=data.get("current_step", {}).get("message")
             if data.get("current_step")
             else None,
-            estimated_seconds=data.get("estimated_seconds"),
             estimated_wait_seconds=data.get("estimated_wait_seconds"),
             error_message=data.get("error_message"),
             report_url=report_url,

{discovery_engine_api-0.2.98 → discovery_engine_api-0.2.100}/discovery/types.py RENAMED Viewed

@@ -205,7 +205,6 @@ class EngineResult:
     queue_position: Optional[int] = None
     current_step: Optional[str] = None
     current_step_message: Optional[str] = None
-    estimated_seconds: Optional[int] = None
     estimated_wait_seconds: Optional[int] = None
     error_message: Optional[str] = None
@@ -232,6 +231,5 @@ class RunStatus:
     queue_position: Optional[int] = None
     current_step: Optional[str] = None
     current_step_message: Optional[str] = None
-    estimated_seconds: Optional[int] = None
     estimated_wait_seconds: Optional[int] = None
     error_message: Optional[str] = None

{discovery_engine_api-0.2.98 → discovery_engine_api-0.2.100}/pyproject.toml RENAMED Viewed

@@ -1,6 +1,6 @@
 [project]
 name = "discovery-engine-api"
-version = "0.2.98"
+version = "0.2.100"
 description = "Python SDK for Disco API"
 readme = "README.md"
 requires-python = ">=3.10"

{discovery_engine_api-0.2.98 → discovery_engine_api-0.2.100}/.gitignore RENAMED Viewed

File without changes

{discovery_engine_api-0.2.98 → discovery_engine_api-0.2.100}/discovery/errors.py RENAMED Viewed

File without changes

{discovery_engine_api-0.2.98 → discovery_engine_api-0.2.100}/discovery/integrations/__init__.py RENAMED Viewed

File without changes

{discovery_engine_api-0.2.98 → discovery_engine_api-0.2.100}/discovery/integrations/crewai.py RENAMED Viewed

File without changes

{discovery_engine_api-0.2.98 → discovery_engine_api-0.2.100}/discovery/integrations/langchain.py RENAMED Viewed

File without changes

discovery-engine-api 0.2.98__tar.gz → 0.2.100__tar.gz

discovery-engine-api 0.2.98tar.gz → 0.2.100tar.gz