discovery-engine-api 0.2.98__tar.gz → 0.2.100__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {discovery_engine_api-0.2.98 → discovery_engine_api-0.2.100}/PKG-INFO +1 -3
- {discovery_engine_api-0.2.98 → discovery_engine_api-0.2.100}/README.md +0 -2
- {discovery_engine_api-0.2.98 → discovery_engine_api-0.2.100}/discovery/__init__.py +1 -1
- {discovery_engine_api-0.2.98 → discovery_engine_api-0.2.100}/discovery/client.py +70 -37
- {discovery_engine_api-0.2.98 → discovery_engine_api-0.2.100}/discovery/types.py +0 -2
- {discovery_engine_api-0.2.98 → discovery_engine_api-0.2.100}/pyproject.toml +1 -1
- {discovery_engine_api-0.2.98 → discovery_engine_api-0.2.100}/.gitignore +0 -0
- {discovery_engine_api-0.2.98 → discovery_engine_api-0.2.100}/discovery/errors.py +0 -0
- {discovery_engine_api-0.2.98 → discovery_engine_api-0.2.100}/discovery/integrations/__init__.py +0 -0
- {discovery_engine_api-0.2.98 → discovery_engine_api-0.2.100}/discovery/integrations/crewai.py +0 -0
- {discovery_engine_api-0.2.98 → discovery_engine_api-0.2.100}/discovery/integrations/langchain.py +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: discovery-engine-api
|
|
3
|
-
Version: 0.2.
|
|
3
|
+
Version: 0.2.100
|
|
4
4
|
Summary: Python SDK for Disco API
|
|
5
5
|
Project-URL: Homepage, https://www.leap-labs.com
|
|
6
6
|
Project-URL: Documentation, https://disco.leap-labs.com/llms-full.txt
|
|
@@ -248,7 +248,6 @@ estimate = await engine.estimate(
|
|
|
248
248
|
)
|
|
249
249
|
# estimate["cost"]["credits"] -> 55
|
|
250
250
|
# estimate["cost"]["price_usd"] -> 5.5
|
|
251
|
-
# estimate["time_estimate"]["estimated_seconds"] -> 360
|
|
252
251
|
# estimate["account"]["sufficient"] -> True/False
|
|
253
252
|
# estimate["limits"]["max_analysis_depth"] -> 23 (num_columns - 2)
|
|
254
253
|
```
|
|
@@ -299,7 +298,6 @@ class EngineResult:
|
|
|
299
298
|
queue_position: int | None # Position in queue when pending (1 = next up)
|
|
300
299
|
current_step: str | None # Active pipeline step (preprocessing, training, interpreting, reporting)
|
|
301
300
|
current_step_message: str | None # Human-readable description of the current step
|
|
302
|
-
estimated_seconds: int | None # Estimated total processing time in seconds
|
|
303
301
|
estimated_wait_seconds: int | None # Estimated queue wait time in seconds (pending only)
|
|
304
302
|
error_message: str | None
|
|
305
303
|
report_url: str | None # Shareable link to interactive web report
|
|
@@ -211,7 +211,6 @@ estimate = await engine.estimate(
|
|
|
211
211
|
)
|
|
212
212
|
# estimate["cost"]["credits"] -> 55
|
|
213
213
|
# estimate["cost"]["price_usd"] -> 5.5
|
|
214
|
-
# estimate["time_estimate"]["estimated_seconds"] -> 360
|
|
215
214
|
# estimate["account"]["sufficient"] -> True/False
|
|
216
215
|
# estimate["limits"]["max_analysis_depth"] -> 23 (num_columns - 2)
|
|
217
216
|
```
|
|
@@ -262,7 +261,6 @@ class EngineResult:
|
|
|
262
261
|
queue_position: int | None # Position in queue when pending (1 = next up)
|
|
263
262
|
current_step: str | None # Active pipeline step (preprocessing, training, interpreting, reporting)
|
|
264
263
|
current_step_message: str | None # Human-readable description of the current step
|
|
265
|
-
estimated_seconds: int | None # Estimated total processing time in seconds
|
|
266
264
|
estimated_wait_seconds: int | None # Estimated queue wait time in seconds (pending only)
|
|
267
265
|
error_message: str | None
|
|
268
266
|
report_url: str | None # Shareable link to interactive web report
|
|
@@ -157,9 +157,6 @@ class Engine:
|
|
|
157
157
|
the code interactively, then provisions the account and returns a
|
|
158
158
|
configured Engine with a ``disco_`` API key.
|
|
159
159
|
|
|
160
|
-
If the email service is unavailable, falls back to direct provisioning
|
|
161
|
-
and returns immediately (no code required).
|
|
162
|
-
|
|
163
160
|
Args:
|
|
164
161
|
email: Email address for the new account.
|
|
165
162
|
name: Display name (optional — defaults to email local part).
|
|
@@ -180,7 +177,7 @@ class Engine:
|
|
|
180
177
|
cls._raise_for_status(response)
|
|
181
178
|
data = response.json()
|
|
182
179
|
|
|
183
|
-
#
|
|
180
|
+
# If the server returned a key directly, use it
|
|
184
181
|
if data.get("key"):
|
|
185
182
|
engine = cls(api_key=data["key"], quiet=quiet)
|
|
186
183
|
if not quiet:
|
|
@@ -284,7 +281,6 @@ class Engine:
|
|
|
284
281
|
|
|
285
282
|
This is the primary method. It uploads data, submits the analysis,
|
|
286
283
|
polls for completion, and returns structured results — all in one call.
|
|
287
|
-
Runs typically take 3-15 minutes.
|
|
288
284
|
|
|
289
285
|
Args:
|
|
290
286
|
file: File path, Path object, or pandas DataFrame.
|
|
@@ -440,12 +436,11 @@ class Engine:
|
|
|
440
436
|
self,
|
|
441
437
|
file_size_mb: float,
|
|
442
438
|
num_columns: int,
|
|
443
|
-
num_rows: Optional[int] = None,
|
|
444
439
|
analysis_depth: int = 2,
|
|
445
440
|
visibility: str = "public",
|
|
446
441
|
use_llms: bool = False,
|
|
447
442
|
) -> Dict[str, Any]:
|
|
448
|
-
"""Estimate
|
|
443
|
+
"""Estimate the credit cost for an analysis run.
|
|
449
444
|
|
|
450
445
|
Works with or without authentication. If authenticated, the response
|
|
451
446
|
includes your current credit balance and whether you have enough.
|
|
@@ -453,7 +448,6 @@ class Engine:
|
|
|
453
448
|
Args:
|
|
454
449
|
file_size_mb: Size of the data file in megabytes.
|
|
455
450
|
num_columns: Number of columns in the dataset.
|
|
456
|
-
num_rows: Number of rows (improves time estimate accuracy).
|
|
457
451
|
analysis_depth: Depth iterations (1=fast, higher=deeper).
|
|
458
452
|
visibility: "public" (free, results published) or "private" (costs credits).
|
|
459
453
|
use_llms: Slower and more expensive, but you get smarter pre-processing,
|
|
@@ -461,7 +455,7 @@ class Engine:
|
|
|
461
455
|
always use LLMs.
|
|
462
456
|
|
|
463
457
|
Returns:
|
|
464
|
-
Dict with ``cost``, ``
|
|
458
|
+
Dict with ``cost``, ``limits``, and ``account`` info.
|
|
465
459
|
"""
|
|
466
460
|
client = await self._get_dashboard_client()
|
|
467
461
|
response = await client.post(
|
|
@@ -469,7 +463,6 @@ class Engine:
|
|
|
469
463
|
json={
|
|
470
464
|
"file_size_mb": file_size_mb,
|
|
471
465
|
"num_columns": num_columns,
|
|
472
|
-
"num_rows": num_rows,
|
|
473
466
|
"analysis_depth": analysis_depth,
|
|
474
467
|
"visibility": visibility,
|
|
475
468
|
"use_llms": use_llms,
|
|
@@ -516,7 +509,6 @@ class Engine:
|
|
|
516
509
|
current_step_message=data.get("current_step", {}).get("message")
|
|
517
510
|
if data.get("current_step")
|
|
518
511
|
else None,
|
|
519
|
-
estimated_seconds=data.get("estimated_seconds"),
|
|
520
512
|
estimated_wait_seconds=data.get("estimated_wait_seconds"),
|
|
521
513
|
error_message=data.get("error_message"),
|
|
522
514
|
)
|
|
@@ -600,14 +592,7 @@ class Engine:
|
|
|
600
592
|
else ""
|
|
601
593
|
)
|
|
602
594
|
step_str = f" ({result.current_step}{msg})"
|
|
603
|
-
|
|
604
|
-
if result.estimated_seconds is not None and elapsed > 0:
|
|
605
|
-
remaining = max(0, result.estimated_seconds - elapsed)
|
|
606
|
-
if remaining > 0:
|
|
607
|
-
eta_str = f" | ETA: ~{max(1, round(remaining / 60))} min"
|
|
608
|
-
status_msg = (
|
|
609
|
-
f"Status: {result.status}{step_str} | Elapsed: {elapsed:.1f}s{eta_str}"
|
|
610
|
-
)
|
|
595
|
+
status_msg = f"Status: {result.status}{step_str} | Elapsed: {elapsed:.1f}s"
|
|
611
596
|
self._log(f" {status_msg}")
|
|
612
597
|
|
|
613
598
|
last_status = result.status
|
|
@@ -638,15 +623,45 @@ class Engine:
|
|
|
638
623
|
# File upload
|
|
639
624
|
# ------------------------------------------------------------------
|
|
640
625
|
|
|
626
|
+
# 8 MB chunks — large enough to amortize syscall + TLS overhead, small
|
|
627
|
+
# enough that memory stays bounded regardless of file size.
|
|
628
|
+
_UPLOAD_CHUNK_SIZE = 8 * 1024 * 1024
|
|
629
|
+
|
|
630
|
+
@staticmethod
|
|
631
|
+
async def _stream_file_chunks(path: Path, chunk_size: int):
|
|
632
|
+
"""Yield file contents in fixed-size chunks for streaming uploads.
|
|
633
|
+
|
|
634
|
+
Reads from disk via run_in_executor so the event loop isn't blocked
|
|
635
|
+
on the read syscall. Memory stays bounded to one chunk regardless
|
|
636
|
+
of file size — required for multi-GB uploads that previously OOMed
|
|
637
|
+
or hit ``_ssl.c:2426`` when passed as a single bytes object.
|
|
638
|
+
"""
|
|
639
|
+
loop = asyncio.get_event_loop()
|
|
640
|
+
with path.open("rb") as f:
|
|
641
|
+
while True:
|
|
642
|
+
chunk = await loop.run_in_executor(None, f.read, chunk_size)
|
|
643
|
+
if not chunk:
|
|
644
|
+
break
|
|
645
|
+
yield chunk
|
|
646
|
+
|
|
641
647
|
async def _presign_and_upload(
|
|
642
648
|
self,
|
|
643
|
-
|
|
649
|
+
file_source: Union[bytes, Path],
|
|
644
650
|
filename: str,
|
|
645
651
|
mime_type: str,
|
|
646
652
|
) -> Tuple[Dict[str, Any], Dict[str, Any]]:
|
|
647
|
-
"""Upload a file using presigned URL (3-step: presign, upload, finalize).
|
|
653
|
+
"""Upload a file using presigned URL (3-step: presign, upload, finalize).
|
|
654
|
+
|
|
655
|
+
``file_source`` may be either an in-memory ``bytes`` object (used for
|
|
656
|
+
DataFrame uploads) or a ``Path`` to a file on disk. Path inputs are
|
|
657
|
+
streamed chunk-by-chunk so memory stays bounded for multi-GB files.
|
|
658
|
+
"""
|
|
648
659
|
dashboard_client = await self._get_dashboard_client()
|
|
649
|
-
|
|
660
|
+
|
|
661
|
+
if isinstance(file_source, Path):
|
|
662
|
+
file_size = file_source.stat().st_size
|
|
663
|
+
else:
|
|
664
|
+
file_size = len(file_source)
|
|
650
665
|
|
|
651
666
|
presign_response = await dashboard_client.post(
|
|
652
667
|
"/api/data/upload/presign",
|
|
@@ -670,11 +685,22 @@ class Engine:
|
|
|
670
685
|
key = presign_data["key"]
|
|
671
686
|
upload_token = presign_data["uploadToken"]
|
|
672
687
|
|
|
688
|
+
# GCS XML API does not accept chunked transfer encoding on PUT, so
|
|
689
|
+
# we must always send an explicit Content-Length. For Path inputs we
|
|
690
|
+
# stream from disk; for bytes we let httpx send the buffer directly.
|
|
691
|
+
if isinstance(file_source, Path):
|
|
692
|
+
upload_content = self._stream_file_chunks(file_source, self._UPLOAD_CHUNK_SIZE)
|
|
693
|
+
else:
|
|
694
|
+
upload_content = file_source
|
|
695
|
+
|
|
673
696
|
async with httpx.AsyncClient(timeout=self._TIMEOUT) as upload_client:
|
|
674
697
|
upload_response = await upload_client.put(
|
|
675
698
|
upload_url,
|
|
676
|
-
content=
|
|
677
|
-
headers={
|
|
699
|
+
content=upload_content,
|
|
700
|
+
headers={
|
|
701
|
+
"Content-Type": mime_type,
|
|
702
|
+
"Content-Length": str(file_size),
|
|
703
|
+
},
|
|
678
704
|
)
|
|
679
705
|
if upload_response.status_code >= 400:
|
|
680
706
|
raise ValueError(
|
|
@@ -702,13 +728,13 @@ class Engine:
|
|
|
702
728
|
|
|
703
729
|
async def _upload_file_direct(
|
|
704
730
|
self,
|
|
705
|
-
|
|
731
|
+
file_source: Union[bytes, Path],
|
|
706
732
|
filename: str,
|
|
707
733
|
mime_type: str,
|
|
708
734
|
) -> Dict[str, Any]:
|
|
709
735
|
"""Upload a file using presigned URL. Returns finalize result."""
|
|
710
736
|
_, finalize_data = await self._presign_and_upload(
|
|
711
|
-
|
|
737
|
+
file_source=file_source,
|
|
712
738
|
filename=filename,
|
|
713
739
|
mime_type=mime_type,
|
|
714
740
|
)
|
|
@@ -720,7 +746,14 @@ class Engine:
|
|
|
720
746
|
filename: Optional[str] = None,
|
|
721
747
|
title: Optional[str] = None,
|
|
722
748
|
log: bool = False,
|
|
723
|
-
) -> Tuple[bytes, str, str, float]:
|
|
749
|
+
) -> Tuple[Union[bytes, Path], str, str, float]:
|
|
750
|
+
"""Resolve an upload source into (source, filename, mime_type, size_mb).
|
|
751
|
+
|
|
752
|
+
Returns ``Path`` for file inputs (streamed in ``_presign_and_upload``)
|
|
753
|
+
and ``bytes`` for DataFrame inputs (kept in memory). DataFrame uploads
|
|
754
|
+
are bounded by available RAM; users with multi-GB DataFrames should
|
|
755
|
+
``df.to_csv(path, index=False)`` and pass the path instead.
|
|
756
|
+
"""
|
|
724
757
|
if pd is not None and isinstance(file, pd.DataFrame):
|
|
725
758
|
import io
|
|
726
759
|
|
|
@@ -728,8 +761,8 @@ class Engine:
|
|
|
728
761
|
self._log(f"Preparing DataFrame ({len(file)} rows, {len(file.columns)} columns)...")
|
|
729
762
|
buffer = io.BytesIO()
|
|
730
763
|
file.to_csv(buffer, index=False)
|
|
731
|
-
buffer.
|
|
732
|
-
|
|
764
|
+
file_source: Union[bytes, Path] = buffer.getvalue()
|
|
765
|
+
file_size = len(file_source)
|
|
733
766
|
resolved_filename = filename or ((title + ".csv") if title else "dataset.csv")
|
|
734
767
|
mime_type = "text/csv"
|
|
735
768
|
else:
|
|
@@ -738,7 +771,8 @@ class Engine:
|
|
|
738
771
|
raise FileNotFoundError(f"File not found: {file_path}")
|
|
739
772
|
if log:
|
|
740
773
|
self._log(f"Reading file: {file_path.name}...")
|
|
741
|
-
|
|
774
|
+
file_source = file_path
|
|
775
|
+
file_size = file_path.stat().st_size
|
|
742
776
|
resolved_filename = filename or file_path.name
|
|
743
777
|
_MIME_TYPES = {
|
|
744
778
|
".csv": "text/csv",
|
|
@@ -752,10 +786,10 @@ class Engine:
|
|
|
752
786
|
}
|
|
753
787
|
mime_type = _MIME_TYPES.get(file_path.suffix.lower(), "text/csv")
|
|
754
788
|
|
|
755
|
-
file_size_mb =
|
|
789
|
+
file_size_mb = file_size / (1024 * 1024)
|
|
756
790
|
if log:
|
|
757
791
|
self._log(f" File size: {file_size_mb:.2f} MB")
|
|
758
|
-
return
|
|
792
|
+
return file_source, resolved_filename, mime_type, file_size_mb
|
|
759
793
|
|
|
760
794
|
# ------------------------------------------------------------------
|
|
761
795
|
# File upload
|
|
@@ -776,13 +810,13 @@ class Engine:
|
|
|
776
810
|
Dict with ``file`` (key, name, size, fileHash) and ``columns``
|
|
777
811
|
(list of dicts with ``name``, ``type``, ``enabled``).
|
|
778
812
|
"""
|
|
779
|
-
|
|
813
|
+
file_source, filename, mime_type, _ = self._prepare_upload(
|
|
780
814
|
file=file,
|
|
781
815
|
title=title,
|
|
782
816
|
log=True,
|
|
783
817
|
)
|
|
784
818
|
self._log(" Uploading to storage...")
|
|
785
|
-
result = await self._upload_file_direct(
|
|
819
|
+
result = await self._upload_file_direct(file_source, filename, mime_type)
|
|
786
820
|
|
|
787
821
|
if not result.get("ok"):
|
|
788
822
|
errors = result.get("issues", {}).get("errors", [])
|
|
@@ -859,7 +893,7 @@ class Engine:
|
|
|
859
893
|
f"Creating run from pre-uploaded file (depth: {analysis_depth}, target: {target_column})..."
|
|
860
894
|
)
|
|
861
895
|
else:
|
|
862
|
-
|
|
896
|
+
file_source, filename, mime_type, _ = self._prepare_upload(
|
|
863
897
|
file=file,
|
|
864
898
|
title=title,
|
|
865
899
|
log=True,
|
|
@@ -868,7 +902,7 @@ class Engine:
|
|
|
868
902
|
|
|
869
903
|
# Step 1: Upload file
|
|
870
904
|
self._log(" Uploading to storage...")
|
|
871
|
-
raw_result = await self._upload_file_direct(
|
|
905
|
+
raw_result = await self._upload_file_direct(file_source, filename, mime_type)
|
|
872
906
|
|
|
873
907
|
if not raw_result.get("ok"):
|
|
874
908
|
errors = raw_result.get("issues", {}).get("errors", [])
|
|
@@ -1139,7 +1173,6 @@ class Engine:
|
|
|
1139
1173
|
current_step_message=data.get("current_step", {}).get("message")
|
|
1140
1174
|
if data.get("current_step")
|
|
1141
1175
|
else None,
|
|
1142
|
-
estimated_seconds=data.get("estimated_seconds"),
|
|
1143
1176
|
estimated_wait_seconds=data.get("estimated_wait_seconds"),
|
|
1144
1177
|
error_message=data.get("error_message"),
|
|
1145
1178
|
report_url=report_url,
|
|
@@ -205,7 +205,6 @@ class EngineResult:
|
|
|
205
205
|
queue_position: Optional[int] = None
|
|
206
206
|
current_step: Optional[str] = None
|
|
207
207
|
current_step_message: Optional[str] = None
|
|
208
|
-
estimated_seconds: Optional[int] = None
|
|
209
208
|
estimated_wait_seconds: Optional[int] = None
|
|
210
209
|
error_message: Optional[str] = None
|
|
211
210
|
|
|
@@ -232,6 +231,5 @@ class RunStatus:
|
|
|
232
231
|
queue_position: Optional[int] = None
|
|
233
232
|
current_step: Optional[str] = None
|
|
234
233
|
current_step_message: Optional[str] = None
|
|
235
|
-
estimated_seconds: Optional[int] = None
|
|
236
234
|
estimated_wait_seconds: Optional[int] = None
|
|
237
235
|
error_message: Optional[str] = None
|
|
File without changes
|
|
File without changes
|
{discovery_engine_api-0.2.98 → discovery_engine_api-0.2.100}/discovery/integrations/__init__.py
RENAMED
|
File without changes
|
{discovery_engine_api-0.2.98 → discovery_engine_api-0.2.100}/discovery/integrations/crewai.py
RENAMED
|
File without changes
|
{discovery_engine_api-0.2.98 → discovery_engine_api-0.2.100}/discovery/integrations/langchain.py
RENAMED
|
File without changes
|