discovery-engine-api 0.1.34__tar.gz → 0.1.50__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {discovery_engine_api-0.1.34 → discovery_engine_api-0.1.50}/PKG-INFO +1 -1
- {discovery_engine_api-0.1.34 → discovery_engine_api-0.1.50}/discovery/__init__.py +1 -1
- {discovery_engine_api-0.1.34 → discovery_engine_api-0.1.50}/discovery/client.py +15 -13
- {discovery_engine_api-0.1.34 → discovery_engine_api-0.1.50}/pyproject.toml +1 -1
- discovery_engine_api-0.1.50/tests/conftest.py +53 -0
- {discovery_engine_api-0.1.34 → discovery_engine_api-0.1.50}/tests/test_client.py +5 -5
- discovery_engine_api-0.1.50/tests/test_client_e2e.py +343 -0
- discovery_engine_api-0.1.34/tests/test_client_e2e.py +0 -241
- {discovery_engine_api-0.1.34 → discovery_engine_api-0.1.50}/.gitignore +0 -0
- {discovery_engine_api-0.1.34 → discovery_engine_api-0.1.50}/README.md +0 -0
- {discovery_engine_api-0.1.34 → discovery_engine_api-0.1.50}/TESTING.md +0 -0
- {discovery_engine_api-0.1.34 → discovery_engine_api-0.1.50}/discovery/types.py +0 -0
- {discovery_engine_api-0.1.34 → discovery_engine_api-0.1.50}/publish.sh +0 -0
- {discovery_engine_api-0.1.34 → discovery_engine_api-0.1.50}/tests/__init__.py +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: discovery-engine-api
|
|
3
|
-
Version: 0.1.
|
|
3
|
+
Version: 0.1.50
|
|
4
4
|
Summary: Python SDK for the Discovery Engine API
|
|
5
5
|
Project-URL: Homepage, https://github.com/leap-laboratories/discovery
|
|
6
6
|
Project-URL: Documentation, https://github.com/leap-laboratories/discovery
|
|
@@ -32,9 +32,9 @@ from discovery.types import (
|
|
|
32
32
|
class Engine:
|
|
33
33
|
"""Engine for the Discovery Engine API."""
|
|
34
34
|
|
|
35
|
-
# Production
|
|
36
|
-
#
|
|
37
|
-
_DEFAULT_BASE_URL = "https://
|
|
35
|
+
# Production API URL (can be overridden via DISCOVERY_API_URL env var for testing)
|
|
36
|
+
# This points to the Modal-deployed FastAPI API
|
|
37
|
+
_DEFAULT_BASE_URL = "https://leap-labs-production--discovery-api.modal.run"
|
|
38
38
|
|
|
39
39
|
def __init__(self, api_key: str):
|
|
40
40
|
"""
|
|
@@ -296,7 +296,7 @@ class Engine:
|
|
|
296
296
|
dataset_id: str,
|
|
297
297
|
target_column_id: str,
|
|
298
298
|
task: str = "regression",
|
|
299
|
-
|
|
299
|
+
depth_iterations: int = 1,
|
|
300
300
|
visibility: str = "public",
|
|
301
301
|
timeseries_groups: Optional[List[Dict[str, Any]]] = None,
|
|
302
302
|
target_column_override: Optional[str] = None,
|
|
@@ -311,7 +311,7 @@ class Engine:
|
|
|
311
311
|
dataset_id: Dataset ID
|
|
312
312
|
target_column_id: Target column ID
|
|
313
313
|
task: Task type (regression, binary_classification, multiclass_classification)
|
|
314
|
-
|
|
314
|
+
depth_iterations: Number of iterative feature removal cycles (1 = fastest)
|
|
315
315
|
visibility: Dataset visibility ("public" or "private")
|
|
316
316
|
timeseries_groups: Optional list of timeseries column groups
|
|
317
317
|
target_column_override: Optional override for target column name
|
|
@@ -327,7 +327,7 @@ class Engine:
|
|
|
327
327
|
payload = {
|
|
328
328
|
"run_target_column_id": target_column_id,
|
|
329
329
|
"task": task,
|
|
330
|
-
"
|
|
330
|
+
"depth_iterations": depth_iterations,
|
|
331
331
|
"visibility": visibility,
|
|
332
332
|
"auto_report_use_llm_evals": auto_report_use_llm_evals,
|
|
333
333
|
}
|
|
@@ -458,7 +458,7 @@ class Engine:
|
|
|
458
458
|
self,
|
|
459
459
|
file: Union[str, Path, "pd.DataFrame"],
|
|
460
460
|
target_column: str,
|
|
461
|
-
|
|
461
|
+
depth_iterations: int = 1,
|
|
462
462
|
title: Optional[str] = None,
|
|
463
463
|
description: Optional[str] = None,
|
|
464
464
|
column_descriptions: Optional[Dict[str, str]] = None,
|
|
@@ -483,7 +483,7 @@ class Engine:
|
|
|
483
483
|
Args:
|
|
484
484
|
file: File path, Path object, or pandas DataFrame
|
|
485
485
|
target_column: Name of the target column
|
|
486
|
-
|
|
486
|
+
depth_iterations: Number of iterative feature removal cycles (1 = fastest)
|
|
487
487
|
title: Optional dataset title
|
|
488
488
|
description: Optional dataset description
|
|
489
489
|
column_descriptions: Optional dict mapping column names to descriptions
|
|
@@ -535,7 +535,7 @@ class Engine:
|
|
|
535
535
|
files = {"file": (filename, file_content, mime_type)}
|
|
536
536
|
data: Dict[str, Any] = {
|
|
537
537
|
"target_column": target_column,
|
|
538
|
-
"
|
|
538
|
+
"depth_iterations": str(depth_iterations),
|
|
539
539
|
"visibility": visibility,
|
|
540
540
|
}
|
|
541
541
|
|
|
@@ -553,7 +553,9 @@ class Engine:
|
|
|
553
553
|
data["timeseries_groups"] = json.dumps(timeseries_groups)
|
|
554
554
|
|
|
555
555
|
# Call dashboard API to create report
|
|
556
|
-
print(
|
|
556
|
+
print(
|
|
557
|
+
f"🚀 Uploading file and creating run (depth: {depth_iterations}, target: {target_column})..."
|
|
558
|
+
)
|
|
557
559
|
# httpx automatically handles multipart/form-data when both files and data are provided
|
|
558
560
|
response = await client.post("/api/reports/create", files=files, data=data)
|
|
559
561
|
response.raise_for_status()
|
|
@@ -607,7 +609,7 @@ class Engine:
|
|
|
607
609
|
self,
|
|
608
610
|
file: Union[str, Path, "pd.DataFrame"],
|
|
609
611
|
target_column: str,
|
|
610
|
-
|
|
612
|
+
depth_iterations: int = 1,
|
|
611
613
|
title: Optional[str] = None,
|
|
612
614
|
description: Optional[str] = None,
|
|
613
615
|
column_descriptions: Optional[Dict[str, str]] = None,
|
|
@@ -631,7 +633,7 @@ class Engine:
|
|
|
631
633
|
Args:
|
|
632
634
|
file: File path, Path object, or pandas DataFrame
|
|
633
635
|
target_column: Name of the target column
|
|
634
|
-
|
|
636
|
+
depth_iterations: Number of iterative feature removal cycles (1 = fastest)
|
|
635
637
|
title: Optional dataset title
|
|
636
638
|
description: Optional dataset description
|
|
637
639
|
column_descriptions: Optional dict mapping column names to descriptions
|
|
@@ -653,7 +655,7 @@ class Engine:
|
|
|
653
655
|
coro = self.run_async(
|
|
654
656
|
file,
|
|
655
657
|
target_column,
|
|
656
|
-
|
|
658
|
+
depth_iterations,
|
|
657
659
|
title=title,
|
|
658
660
|
description=description,
|
|
659
661
|
column_descriptions=column_descriptions,
|
|
@@ -0,0 +1,53 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Pytest configuration for Discovery Engine SDK client tests.
|
|
3
|
+
|
|
4
|
+
This module provides session-level checks and warnings for API key configuration.
|
|
5
|
+
"""
|
|
6
|
+
|
|
7
|
+
import os
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
def pytest_configure(config):
|
|
11
|
+
"""
|
|
12
|
+
Print API key status at the start of the test session.
|
|
13
|
+
|
|
14
|
+
This runs once before any tests and provides visibility into whether
|
|
15
|
+
E2E tests will be executed or skipped.
|
|
16
|
+
"""
|
|
17
|
+
api_key = os.getenv("DISCOVERY_API_KEY")
|
|
18
|
+
environment = os.getenv("ENVIRONMENT", "staging")
|
|
19
|
+
|
|
20
|
+
separator = "=" * 70
|
|
21
|
+
print(f"\n{separator}")
|
|
22
|
+
print("Discovery Engine SDK - E2E Test Configuration")
|
|
23
|
+
print(separator)
|
|
24
|
+
|
|
25
|
+
if not api_key:
|
|
26
|
+
print("⚠️ WARNING: DISCOVERY_API_KEY is NOT SET!")
|
|
27
|
+
print("")
|
|
28
|
+
print(" All E2E tests that require the real API will be SKIPPED.")
|
|
29
|
+
print("")
|
|
30
|
+
print(" To enable E2E tests:")
|
|
31
|
+
print(" • In CI: Add DISCOVERY_API_KEY to GitHub Secrets")
|
|
32
|
+
print(" • Locally: export DISCOVERY_API_KEY='disco_...'")
|
|
33
|
+
print("")
|
|
34
|
+
elif not api_key.startswith("disco_"):
|
|
35
|
+
print("⚠️ WARNING: DISCOVERY_API_KEY format appears INVALID!")
|
|
36
|
+
print(f" Key starts with: '{api_key[:10]}...'")
|
|
37
|
+
print(" Expected format: disco_<token>")
|
|
38
|
+
print("")
|
|
39
|
+
print(" E2E tests may fail with authentication errors.")
|
|
40
|
+
print("")
|
|
41
|
+
else:
|
|
42
|
+
print("✅ DISCOVERY_API_KEY is configured (format looks valid)")
|
|
43
|
+
print(f"✅ Environment: {environment}")
|
|
44
|
+
api_url = (
|
|
45
|
+
"https://leap-labs-production--discovery-api.modal.run"
|
|
46
|
+
if environment == "production"
|
|
47
|
+
else "https://leap-labs-staging--discovery-api.modal.run"
|
|
48
|
+
)
|
|
49
|
+
print(f"✅ API URL: {api_url}")
|
|
50
|
+
print("")
|
|
51
|
+
|
|
52
|
+
print(separator)
|
|
53
|
+
print("")
|
|
@@ -463,7 +463,7 @@ class TestCreateRun:
|
|
|
463
463
|
dataset_id=dataset_id,
|
|
464
464
|
target_column_id=target_column_id,
|
|
465
465
|
task="regression",
|
|
466
|
-
|
|
466
|
+
depth_iterations=1,
|
|
467
467
|
)
|
|
468
468
|
|
|
469
469
|
assert result == sample_run
|
|
@@ -472,7 +472,7 @@ class TestCreateRun:
|
|
|
472
472
|
payload = call_args[1]["json"]
|
|
473
473
|
assert payload["run_target_column_id"] == target_column_id
|
|
474
474
|
assert payload["task"] == "regression"
|
|
475
|
-
assert payload["
|
|
475
|
+
assert payload["depth_iterations"] == 1
|
|
476
476
|
|
|
477
477
|
@pytest.mark.asyncio
|
|
478
478
|
async def test_create_run_with_optional_params(self, client, mock_httpx_client, sample_run):
|
|
@@ -492,7 +492,7 @@ class TestCreateRun:
|
|
|
492
492
|
dataset_id=dataset_id,
|
|
493
493
|
target_column_id=target_column_id,
|
|
494
494
|
task="regression",
|
|
495
|
-
|
|
495
|
+
depth_iterations=1,
|
|
496
496
|
timeseries_groups=timeseries_groups,
|
|
497
497
|
target_column_override="price_override",
|
|
498
498
|
author="Test Author",
|
|
@@ -680,7 +680,7 @@ class TestRunAsync:
|
|
|
680
680
|
result = await client.run_async(
|
|
681
681
|
file=df,
|
|
682
682
|
target_column="price",
|
|
683
|
-
|
|
683
|
+
depth_iterations=1,
|
|
684
684
|
)
|
|
685
685
|
|
|
686
686
|
assert isinstance(result, EngineResult)
|
|
@@ -795,7 +795,7 @@ class TestRun:
|
|
|
795
795
|
result = client.run(
|
|
796
796
|
file=df,
|
|
797
797
|
target_column="price",
|
|
798
|
-
|
|
798
|
+
depth_iterations=1,
|
|
799
799
|
)
|
|
800
800
|
|
|
801
801
|
assert isinstance(result, EngineResult)
|
|
@@ -0,0 +1,343 @@
|
|
|
1
|
+
"""
|
|
2
|
+
End-to-end tests for the Discovery Engine Python SDK.
|
|
3
|
+
|
|
4
|
+
These tests call the real API and exercise the full flow including Modal.
|
|
5
|
+
They are skipped if API credentials are not available.
|
|
6
|
+
|
|
7
|
+
To run these tests locally:
|
|
8
|
+
# Set required environment variables
|
|
9
|
+
export DISCOVERY_API_KEY="your-api-key"
|
|
10
|
+
|
|
11
|
+
# Optional: Set environment (defaults to staging)
|
|
12
|
+
export ENVIRONMENT="staging" # or "production"
|
|
13
|
+
|
|
14
|
+
# Run e2e tests
|
|
15
|
+
pytest engine/packages/client/tests/test_client_e2e.py -v
|
|
16
|
+
|
|
17
|
+
# Or run all tests except e2e
|
|
18
|
+
pytest -m "not e2e"
|
|
19
|
+
|
|
20
|
+
To run in CI (GitHub Actions):
|
|
21
|
+
Set these secrets in GitHub:
|
|
22
|
+
- DISCOVERY_API_KEY: Your API key
|
|
23
|
+
- ENVIRONMENT: "staging" or "production" (optional, defaults to staging)
|
|
24
|
+
|
|
25
|
+
The tests will:
|
|
26
|
+
- Auto-detect environment from ENVIRONMENT or VERCEL_ENV
|
|
27
|
+
- Use staging URL (https://leap-labs-staging--discovery-api.modal.run) by default
|
|
28
|
+
- Use production URL (https://leap-labs-production--discovery-api.modal.run) if ENVIRONMENT=production
|
|
29
|
+
- Skip gracefully if DISCOVERY_API_KEY is not set
|
|
30
|
+
"""
|
|
31
|
+
|
|
32
|
+
import io
|
|
33
|
+
import os
|
|
34
|
+
import sys
|
|
35
|
+
|
|
36
|
+
import pandas as pd
|
|
37
|
+
import pytest
|
|
38
|
+
|
|
39
|
+
# Test data - simple regression dataset
|
|
40
|
+
TEST_DATA_CSV = """age,income,experience,price
|
|
41
|
+
25,50000,2,150000
|
|
42
|
+
30,60000,5,180000
|
|
43
|
+
35,70000,8,220000
|
|
44
|
+
40,80000,12,250000
|
|
45
|
+
45,90000,15,280000
|
|
46
|
+
28,55000,3,160000
|
|
47
|
+
32,65000,6,190000
|
|
48
|
+
38,75000,10,230000
|
|
49
|
+
42,85000,13,260000
|
|
50
|
+
48,95000,18,300000
|
|
51
|
+
"""
|
|
52
|
+
|
|
53
|
+
|
|
54
|
+
# Hardcoded API URLs (Modal-hosted FastAPI backend)
|
|
55
|
+
STAGING_API_URL = "https://leap-labs-staging--discovery-api.modal.run"
|
|
56
|
+
PRODUCTION_API_URL = "https://leap-labs-production--discovery-api.modal.run"
|
|
57
|
+
|
|
58
|
+
|
|
59
|
+
def get_api_key() -> str | None:
|
|
60
|
+
"""Get API key from environment variable."""
|
|
61
|
+
return os.getenv("DISCOVERY_API_KEY")
|
|
62
|
+
|
|
63
|
+
|
|
64
|
+
def validate_api_key_format(api_key: str) -> tuple[bool, str]:
|
|
65
|
+
"""
|
|
66
|
+
Validate that the API key has the expected format.
|
|
67
|
+
|
|
68
|
+
Returns:
|
|
69
|
+
Tuple of (is_valid, error_message)
|
|
70
|
+
"""
|
|
71
|
+
if not api_key:
|
|
72
|
+
return False, "API key is empty"
|
|
73
|
+
|
|
74
|
+
if not api_key.startswith("disco_"):
|
|
75
|
+
return False, f"API key should start with 'disco_', got: '{api_key[:10]}...'"
|
|
76
|
+
|
|
77
|
+
# Expected format: disco_<base64-like-string>
|
|
78
|
+
# Should be at least 20 characters total
|
|
79
|
+
if len(api_key) < 20:
|
|
80
|
+
return False, f"API key appears too short ({len(api_key)} chars)"
|
|
81
|
+
|
|
82
|
+
return True, ""
|
|
83
|
+
|
|
84
|
+
|
|
85
|
+
def print_api_key_warning(message: str) -> None:
|
|
86
|
+
"""Print a loud warning about API key issues."""
|
|
87
|
+
separator = "=" * 70
|
|
88
|
+
print(f"\n{separator}", file=sys.stderr)
|
|
89
|
+
print("⚠️ WARNING: DISCOVERY_API_KEY ISSUE", file=sys.stderr)
|
|
90
|
+
print(separator, file=sys.stderr)
|
|
91
|
+
print(f" {message}", file=sys.stderr)
|
|
92
|
+
print("", file=sys.stderr)
|
|
93
|
+
print(" E2E tests against the real API will be SKIPPED.", file=sys.stderr)
|
|
94
|
+
print("", file=sys.stderr)
|
|
95
|
+
print(" To fix this:", file=sys.stderr)
|
|
96
|
+
print(" 1. Get a valid API key from the Discovery dashboard", file=sys.stderr)
|
|
97
|
+
print(" 2. Set it in GitHub: Settings → Secrets → DISCOVERY_API_KEY", file=sys.stderr)
|
|
98
|
+
print(" 3. Or locally: export DISCOVERY_API_KEY='disco_...'", file=sys.stderr)
|
|
99
|
+
print(f"{separator}\n", file=sys.stderr)
|
|
100
|
+
|
|
101
|
+
|
|
102
|
+
def get_environment() -> str:
|
|
103
|
+
"""
|
|
104
|
+
Determine the current environment (staging or production).
|
|
105
|
+
|
|
106
|
+
Checks environment variables in order:
|
|
107
|
+
1. ENVIRONMENT (set in CI/GitHub Actions)
|
|
108
|
+
2. VERCEL_ENV (set in Vercel deployments)
|
|
109
|
+
3. Defaults to staging
|
|
110
|
+
"""
|
|
111
|
+
env = os.getenv("ENVIRONMENT") or os.getenv("VERCEL_ENV")
|
|
112
|
+
if env == "production":
|
|
113
|
+
return "production"
|
|
114
|
+
return "staging"
|
|
115
|
+
|
|
116
|
+
|
|
117
|
+
def get_api_url() -> str:
|
|
118
|
+
"""
|
|
119
|
+
Get API URL based on environment.
|
|
120
|
+
|
|
121
|
+
Returns:
|
|
122
|
+
- Production URL if environment is production
|
|
123
|
+
- Staging URL otherwise (default)
|
|
124
|
+
"""
|
|
125
|
+
env = get_environment()
|
|
126
|
+
if env == "production":
|
|
127
|
+
return PRODUCTION_API_URL
|
|
128
|
+
return STAGING_API_URL
|
|
129
|
+
|
|
130
|
+
|
|
131
|
+
@pytest.fixture
|
|
132
|
+
def api_key():
|
|
133
|
+
"""Get API key from environment, skip test if not available or invalid."""
|
|
134
|
+
key = get_api_key()
|
|
135
|
+
|
|
136
|
+
if not key:
|
|
137
|
+
print_api_key_warning("DISCOVERY_API_KEY environment variable is NOT SET")
|
|
138
|
+
pytest.skip("DISCOVERY_API_KEY environment variable not set")
|
|
139
|
+
|
|
140
|
+
is_valid, error_message = validate_api_key_format(key)
|
|
141
|
+
if not is_valid:
|
|
142
|
+
print_api_key_warning(f"DISCOVERY_API_KEY format is INVALID: {error_message}")
|
|
143
|
+
pytest.skip(f"DISCOVERY_API_KEY format invalid: {error_message}")
|
|
144
|
+
|
|
145
|
+
return key
|
|
146
|
+
|
|
147
|
+
|
|
148
|
+
@pytest.fixture
|
|
149
|
+
def api_url():
|
|
150
|
+
"""Get API URL from environment (optional)."""
|
|
151
|
+
return get_api_url()
|
|
152
|
+
|
|
153
|
+
|
|
154
|
+
@pytest.fixture
|
|
155
|
+
def test_dataframe():
|
|
156
|
+
"""Create test DataFrame from CSV string."""
|
|
157
|
+
try:
|
|
158
|
+
return pd.read_csv(io.StringIO(TEST_DATA_CSV))
|
|
159
|
+
except ImportError:
|
|
160
|
+
pytest.skip("pandas not available")
|
|
161
|
+
|
|
162
|
+
|
|
163
|
+
@pytest.fixture
|
|
164
|
+
def engine(api_key, api_url):
|
|
165
|
+
"""Create Engine instance with API key and optional URL."""
|
|
166
|
+
from discovery import Engine
|
|
167
|
+
|
|
168
|
+
engine = Engine(api_key=api_key)
|
|
169
|
+
if api_url:
|
|
170
|
+
engine.base_url = api_url.rstrip("/")
|
|
171
|
+
return engine
|
|
172
|
+
|
|
173
|
+
|
|
174
|
+
def print_auth_error_warning(error: Exception, api_url: str) -> None:
|
|
175
|
+
"""Print a loud warning about authentication failures."""
|
|
176
|
+
separator = "=" * 70
|
|
177
|
+
print(f"\n{separator}", file=sys.stderr)
|
|
178
|
+
print("❌ ERROR: API AUTHENTICATION FAILED", file=sys.stderr)
|
|
179
|
+
print(separator, file=sys.stderr)
|
|
180
|
+
print(f" API URL: {api_url}", file=sys.stderr)
|
|
181
|
+
print(f" Error: {error}", file=sys.stderr)
|
|
182
|
+
print("", file=sys.stderr)
|
|
183
|
+
print(" This usually means:", file=sys.stderr)
|
|
184
|
+
print(" • The DISCOVERY_API_KEY is invalid or expired", file=sys.stderr)
|
|
185
|
+
print(" • The key doesn't have permission for this environment", file=sys.stderr)
|
|
186
|
+
print("", file=sys.stderr)
|
|
187
|
+
print(" To fix this:", file=sys.stderr)
|
|
188
|
+
print(" 1. Get a new API key from the Discovery dashboard", file=sys.stderr)
|
|
189
|
+
print(" 2. Update the DISCOVERY_API_KEY secret in GitHub", file=sys.stderr)
|
|
190
|
+
print(f"{separator}\n", file=sys.stderr)
|
|
191
|
+
|
|
192
|
+
|
|
193
|
+
@pytest.mark.e2e
|
|
194
|
+
@pytest.mark.asyncio
|
|
195
|
+
async def test_client_e2e_full_flow(engine, test_dataframe, api_url):
|
|
196
|
+
"""
|
|
197
|
+
Test the full end-to-end flow: upload, analyze, wait for completion.
|
|
198
|
+
|
|
199
|
+
This test:
|
|
200
|
+
1. Uploads a test dataset via the API
|
|
201
|
+
2. Creates a run
|
|
202
|
+
3. Waits for Modal to process the job
|
|
203
|
+
4. Verifies results are returned
|
|
204
|
+
|
|
205
|
+
This exercises the complete production flow including Modal.
|
|
206
|
+
"""
|
|
207
|
+
try:
|
|
208
|
+
# Run analysis with wait=True to exercise full flow including Modal
|
|
209
|
+
result = await engine.run_async(
|
|
210
|
+
file=test_dataframe,
|
|
211
|
+
target_column="price",
|
|
212
|
+
depth_iterations=1,
|
|
213
|
+
description="E2E test dataset - house price prediction",
|
|
214
|
+
column_descriptions={
|
|
215
|
+
"age": "Age of the property owner",
|
|
216
|
+
"income": "Annual income in USD",
|
|
217
|
+
"experience": "Years of work experience",
|
|
218
|
+
"price": "House price in USD",
|
|
219
|
+
},
|
|
220
|
+
auto_report_use_llm_evals=False, # Disable LLMs for faster test
|
|
221
|
+
wait=True, # Wait for completion (exercises Modal)
|
|
222
|
+
wait_timeout=600, # 10 minute timeout
|
|
223
|
+
)
|
|
224
|
+
except Exception as e:
|
|
225
|
+
error_str = str(e).lower()
|
|
226
|
+
if (
|
|
227
|
+
"401" in error_str
|
|
228
|
+
or "403" in error_str
|
|
229
|
+
or "unauthorized" in error_str
|
|
230
|
+
or "forbidden" in error_str
|
|
231
|
+
):
|
|
232
|
+
print_auth_error_warning(e, api_url)
|
|
233
|
+
pytest.fail(f"API authentication failed - check DISCOVERY_API_KEY: {e}")
|
|
234
|
+
raise
|
|
235
|
+
|
|
236
|
+
# Verify results
|
|
237
|
+
assert result is not None, "Result should not be None"
|
|
238
|
+
assert result.run_id is not None, "Run ID should be set"
|
|
239
|
+
assert result.status == "completed", f"Run should be completed, got status: {result.status}"
|
|
240
|
+
|
|
241
|
+
# Verify we got patterns (at least one pattern should be found)
|
|
242
|
+
assert result.patterns is not None, "Patterns should not be None"
|
|
243
|
+
assert len(result.patterns) > 0, f"Should find at least one pattern, got {len(result.patterns)}"
|
|
244
|
+
|
|
245
|
+
# Verify summary exists
|
|
246
|
+
assert result.summary is not None, "Summary should not be None"
|
|
247
|
+
|
|
248
|
+
# Verify feature importance exists (if available)
|
|
249
|
+
# Note: Feature importance might be None in some cases, so we don't assert it exists
|
|
250
|
+
|
|
251
|
+
|
|
252
|
+
@pytest.mark.e2e
|
|
253
|
+
@pytest.mark.asyncio
|
|
254
|
+
async def test_client_e2e_async_workflow(engine, test_dataframe, api_url):
|
|
255
|
+
"""
|
|
256
|
+
Test async workflow: start analysis, then wait for completion separately.
|
|
257
|
+
|
|
258
|
+
This tests the async pattern where you start a run and check status later.
|
|
259
|
+
"""
|
|
260
|
+
try:
|
|
261
|
+
# Start analysis without waiting
|
|
262
|
+
result = await engine.run_async(
|
|
263
|
+
file=test_dataframe,
|
|
264
|
+
target_column="price",
|
|
265
|
+
depth_iterations=1,
|
|
266
|
+
auto_report_use_llm_evals=False,
|
|
267
|
+
wait=False, # Don't wait immediately
|
|
268
|
+
)
|
|
269
|
+
except Exception as e:
|
|
270
|
+
error_str = str(e).lower()
|
|
271
|
+
if (
|
|
272
|
+
"401" in error_str
|
|
273
|
+
or "403" in error_str
|
|
274
|
+
or "unauthorized" in error_str
|
|
275
|
+
or "forbidden" in error_str
|
|
276
|
+
):
|
|
277
|
+
print_auth_error_warning(e, api_url)
|
|
278
|
+
pytest.fail(f"API authentication failed - check DISCOVERY_API_KEY: {e}")
|
|
279
|
+
raise
|
|
280
|
+
|
|
281
|
+
assert result is not None, "Result should not be None"
|
|
282
|
+
assert result.run_id is not None, "Run ID should be set"
|
|
283
|
+
run_id = result.run_id
|
|
284
|
+
|
|
285
|
+
# Now wait for completion separately
|
|
286
|
+
completed_result = await engine.wait_for_completion(
|
|
287
|
+
run_id=run_id,
|
|
288
|
+
poll_interval=5.0, # Check every 5 seconds
|
|
289
|
+
timeout=600, # 10 minute timeout
|
|
290
|
+
)
|
|
291
|
+
|
|
292
|
+
# Verify completion
|
|
293
|
+
assert (
|
|
294
|
+
completed_result.status == "completed"
|
|
295
|
+
), f"Run should be completed, got: {completed_result.status}"
|
|
296
|
+
assert completed_result.patterns is not None, "Patterns should not be None"
|
|
297
|
+
assert len(completed_result.patterns) > 0, "Should find at least one pattern"
|
|
298
|
+
|
|
299
|
+
|
|
300
|
+
@pytest.mark.e2e
|
|
301
|
+
@pytest.mark.asyncio
|
|
302
|
+
async def test_client_e2e_get_results(engine, test_dataframe, api_url):
|
|
303
|
+
"""
|
|
304
|
+
Test getting results for an existing run.
|
|
305
|
+
|
|
306
|
+
This tests the get_results method which can be used to check status
|
|
307
|
+
of a run that was started elsewhere.
|
|
308
|
+
"""
|
|
309
|
+
try:
|
|
310
|
+
# Start a run
|
|
311
|
+
result = await engine.run_async(
|
|
312
|
+
file=test_dataframe,
|
|
313
|
+
target_column="price",
|
|
314
|
+
depth_iterations=1,
|
|
315
|
+
auto_report_use_llm_evals=False,
|
|
316
|
+
wait=False,
|
|
317
|
+
)
|
|
318
|
+
except Exception as e:
|
|
319
|
+
error_str = str(e).lower()
|
|
320
|
+
if (
|
|
321
|
+
"401" in error_str
|
|
322
|
+
or "403" in error_str
|
|
323
|
+
or "unauthorized" in error_str
|
|
324
|
+
or "forbidden" in error_str
|
|
325
|
+
):
|
|
326
|
+
print_auth_error_warning(e, api_url)
|
|
327
|
+
pytest.fail(f"API authentication failed - check DISCOVERY_API_KEY: {e}")
|
|
328
|
+
raise
|
|
329
|
+
|
|
330
|
+
run_id = result.run_id
|
|
331
|
+
|
|
332
|
+
# Get results immediately (might still be processing)
|
|
333
|
+
initial_result = await engine.get_results(run_id)
|
|
334
|
+
assert initial_result is not None, "Should get initial result"
|
|
335
|
+
assert initial_result.run_id == run_id, "Run ID should match"
|
|
336
|
+
|
|
337
|
+
# Wait for completion
|
|
338
|
+
final_result = await engine.wait_for_completion(run_id, timeout=600)
|
|
339
|
+
|
|
340
|
+
# Verify final results
|
|
341
|
+
assert final_result.status == "completed", "Run should complete"
|
|
342
|
+
assert final_result.patterns is not None, "Patterns should be available"
|
|
343
|
+
assert len(final_result.patterns) > 0, "Should find patterns"
|
|
@@ -1,241 +0,0 @@
|
|
|
1
|
-
"""
|
|
2
|
-
End-to-end tests for the Discovery Engine Python SDK.
|
|
3
|
-
|
|
4
|
-
These tests call the real API and exercise the full flow including Modal.
|
|
5
|
-
They are skipped if API credentials are not available.
|
|
6
|
-
|
|
7
|
-
To run these tests locally:
|
|
8
|
-
# Set required environment variables
|
|
9
|
-
export DISCOVERY_API_KEY="your-api-key"
|
|
10
|
-
|
|
11
|
-
# Optional: Set environment (defaults to staging)
|
|
12
|
-
export ENVIRONMENT="staging" # or "production"
|
|
13
|
-
|
|
14
|
-
# Run e2e tests
|
|
15
|
-
pytest engine/packages/client/tests/test_client_e2e.py -v
|
|
16
|
-
|
|
17
|
-
# Or run all tests except e2e
|
|
18
|
-
pytest -m "not e2e"
|
|
19
|
-
|
|
20
|
-
To run in CI (GitHub Actions):
|
|
21
|
-
Set these secrets in GitHub:
|
|
22
|
-
- DISCOVERY_API_KEY: Your API key
|
|
23
|
-
- ENVIRONMENT: "staging" or "production" (optional, defaults to staging)
|
|
24
|
-
|
|
25
|
-
The tests will:
|
|
26
|
-
- Auto-detect environment from ENVIRONMENT or VERCEL_ENV
|
|
27
|
-
- Use staging URL (https://staging.disco.leap-labs.com) by default
|
|
28
|
-
- Use production URL (https://disco.leap-labs.com) if ENVIRONMENT=production
|
|
29
|
-
- Skip gracefully if DISCOVERY_API_KEY is not set
|
|
30
|
-
"""
|
|
31
|
-
|
|
32
|
-
import io
|
|
33
|
-
import os
|
|
34
|
-
|
|
35
|
-
import pandas as pd
|
|
36
|
-
import pytest
|
|
37
|
-
|
|
38
|
-
# Test data - simple regression dataset
|
|
39
|
-
TEST_DATA_CSV = """age,income,experience,price
|
|
40
|
-
25,50000,2,150000
|
|
41
|
-
30,60000,5,180000
|
|
42
|
-
35,70000,8,220000
|
|
43
|
-
40,80000,12,250000
|
|
44
|
-
45,90000,15,280000
|
|
45
|
-
28,55000,3,160000
|
|
46
|
-
32,65000,6,190000
|
|
47
|
-
38,75000,10,230000
|
|
48
|
-
42,85000,13,260000
|
|
49
|
-
48,95000,18,300000
|
|
50
|
-
"""
|
|
51
|
-
|
|
52
|
-
|
|
53
|
-
# Hardcoded API URLs (these don't change)
|
|
54
|
-
STAGING_API_URL = "https://staging.disco.leap-labs.com"
|
|
55
|
-
PRODUCTION_API_URL = "https://disco.leap-labs.com"
|
|
56
|
-
|
|
57
|
-
|
|
58
|
-
def get_api_key() -> str | None:
|
|
59
|
-
"""Get API key from environment variable."""
|
|
60
|
-
return os.getenv("DISCOVERY_API_KEY")
|
|
61
|
-
|
|
62
|
-
|
|
63
|
-
def get_environment() -> str:
|
|
64
|
-
"""
|
|
65
|
-
Determine the current environment (staging or production).
|
|
66
|
-
|
|
67
|
-
Checks environment variables in order:
|
|
68
|
-
1. ENVIRONMENT (set in CI/GitHub Actions)
|
|
69
|
-
2. VERCEL_ENV (set in Vercel deployments)
|
|
70
|
-
3. Defaults to staging
|
|
71
|
-
"""
|
|
72
|
-
env = os.getenv("ENVIRONMENT") or os.getenv("VERCEL_ENV")
|
|
73
|
-
if env == "production":
|
|
74
|
-
return "production"
|
|
75
|
-
return "staging"
|
|
76
|
-
|
|
77
|
-
|
|
78
|
-
def get_api_url() -> str:
|
|
79
|
-
"""
|
|
80
|
-
Get API URL based on environment.
|
|
81
|
-
|
|
82
|
-
Returns:
|
|
83
|
-
- Production URL if environment is production
|
|
84
|
-
- Staging URL otherwise (default)
|
|
85
|
-
"""
|
|
86
|
-
env = get_environment()
|
|
87
|
-
if env == "production":
|
|
88
|
-
return PRODUCTION_API_URL
|
|
89
|
-
return STAGING_API_URL
|
|
90
|
-
|
|
91
|
-
|
|
92
|
-
@pytest.fixture
|
|
93
|
-
def api_key():
|
|
94
|
-
"""Get API key from environment, skip test if not available."""
|
|
95
|
-
key = get_api_key()
|
|
96
|
-
if not key:
|
|
97
|
-
pytest.skip("DISCOVERY_API_KEY environment variable not set")
|
|
98
|
-
return key
|
|
99
|
-
|
|
100
|
-
|
|
101
|
-
@pytest.fixture
|
|
102
|
-
def api_url():
|
|
103
|
-
"""Get API URL from environment (optional)."""
|
|
104
|
-
return get_api_url()
|
|
105
|
-
|
|
106
|
-
|
|
107
|
-
@pytest.fixture
|
|
108
|
-
def test_dataframe():
|
|
109
|
-
"""Create test DataFrame from CSV string."""
|
|
110
|
-
try:
|
|
111
|
-
return pd.read_csv(io.StringIO(TEST_DATA_CSV))
|
|
112
|
-
except ImportError:
|
|
113
|
-
pytest.skip("pandas not available")
|
|
114
|
-
|
|
115
|
-
|
|
116
|
-
@pytest.fixture
|
|
117
|
-
def engine(api_key, api_url):
|
|
118
|
-
"""Create Engine instance with API key and optional URL."""
|
|
119
|
-
from discovery import Engine
|
|
120
|
-
|
|
121
|
-
engine = Engine(api_key=api_key)
|
|
122
|
-
if api_url:
|
|
123
|
-
engine.base_url = api_url.rstrip("/")
|
|
124
|
-
return engine
|
|
125
|
-
|
|
126
|
-
|
|
127
|
-
@pytest.mark.e2e
|
|
128
|
-
@pytest.mark.asyncio
|
|
129
|
-
async def test_client_e2e_full_flow(engine, test_dataframe):
|
|
130
|
-
"""
|
|
131
|
-
Test the full end-to-end flow: upload, analyze, wait for completion.
|
|
132
|
-
|
|
133
|
-
This test:
|
|
134
|
-
1. Uploads a test dataset via the API
|
|
135
|
-
2. Creates a run
|
|
136
|
-
3. Waits for Modal to process the job
|
|
137
|
-
4. Verifies results are returned
|
|
138
|
-
|
|
139
|
-
This exercises the complete production flow including Modal.
|
|
140
|
-
"""
|
|
141
|
-
# Run analysis with wait=True to exercise full flow including Modal
|
|
142
|
-
result = await engine.run_async(
|
|
143
|
-
file=test_dataframe,
|
|
144
|
-
target_column="price",
|
|
145
|
-
mode="fast",
|
|
146
|
-
description="E2E test dataset - house price prediction",
|
|
147
|
-
column_descriptions={
|
|
148
|
-
"age": "Age of the property owner",
|
|
149
|
-
"income": "Annual income in USD",
|
|
150
|
-
"experience": "Years of work experience",
|
|
151
|
-
"price": "House price in USD",
|
|
152
|
-
},
|
|
153
|
-
auto_report_use_llm_evals=False, # Disable LLMs for faster test
|
|
154
|
-
wait=True, # Wait for completion (exercises Modal)
|
|
155
|
-
wait_timeout=600, # 10 minute timeout
|
|
156
|
-
)
|
|
157
|
-
|
|
158
|
-
# Verify results
|
|
159
|
-
assert result is not None, "Result should not be None"
|
|
160
|
-
assert result.run_id is not None, "Run ID should be set"
|
|
161
|
-
assert result.status == "completed", f"Run should be completed, got status: {result.status}"
|
|
162
|
-
|
|
163
|
-
# Verify we got patterns (at least one pattern should be found)
|
|
164
|
-
assert result.patterns is not None, "Patterns should not be None"
|
|
165
|
-
assert len(result.patterns) > 0, f"Should find at least one pattern, got {len(result.patterns)}"
|
|
166
|
-
|
|
167
|
-
# Verify summary exists
|
|
168
|
-
assert result.summary is not None, "Summary should not be None"
|
|
169
|
-
|
|
170
|
-
# Verify feature importance exists (if available)
|
|
171
|
-
# Note: Feature importance might be None in some cases, so we don't assert it exists
|
|
172
|
-
|
|
173
|
-
|
|
174
|
-
@pytest.mark.e2e
|
|
175
|
-
@pytest.mark.asyncio
|
|
176
|
-
async def test_client_e2e_async_workflow(engine, test_dataframe):
|
|
177
|
-
"""
|
|
178
|
-
Test async workflow: start analysis, then wait for completion separately.
|
|
179
|
-
|
|
180
|
-
This tests the async pattern where you start a run and check status later.
|
|
181
|
-
"""
|
|
182
|
-
# Start analysis without waiting
|
|
183
|
-
result = await engine.run_async(
|
|
184
|
-
file=test_dataframe,
|
|
185
|
-
target_column="price",
|
|
186
|
-
mode="fast",
|
|
187
|
-
auto_report_use_llm_evals=False,
|
|
188
|
-
wait=False, # Don't wait immediately
|
|
189
|
-
)
|
|
190
|
-
|
|
191
|
-
assert result is not None, "Result should not be None"
|
|
192
|
-
assert result.run_id is not None, "Run ID should be set"
|
|
193
|
-
run_id = result.run_id
|
|
194
|
-
|
|
195
|
-
# Now wait for completion separately
|
|
196
|
-
completed_result = await engine.wait_for_completion(
|
|
197
|
-
run_id=run_id,
|
|
198
|
-
poll_interval=5.0, # Check every 5 seconds
|
|
199
|
-
timeout=600, # 10 minute timeout
|
|
200
|
-
)
|
|
201
|
-
|
|
202
|
-
# Verify completion
|
|
203
|
-
assert (
|
|
204
|
-
completed_result.status == "completed"
|
|
205
|
-
), f"Run should be completed, got: {completed_result.status}"
|
|
206
|
-
assert completed_result.patterns is not None, "Patterns should not be None"
|
|
207
|
-
assert len(completed_result.patterns) > 0, "Should find at least one pattern"
|
|
208
|
-
|
|
209
|
-
|
|
210
|
-
@pytest.mark.e2e
|
|
211
|
-
@pytest.mark.asyncio
|
|
212
|
-
async def test_client_e2e_get_results(engine, test_dataframe):
|
|
213
|
-
"""
|
|
214
|
-
Test getting results for an existing run.
|
|
215
|
-
|
|
216
|
-
This tests the get_results method which can be used to check status
|
|
217
|
-
of a run that was started elsewhere.
|
|
218
|
-
"""
|
|
219
|
-
# Start a run
|
|
220
|
-
result = await engine.run_async(
|
|
221
|
-
file=test_dataframe,
|
|
222
|
-
target_column="price",
|
|
223
|
-
mode="fast",
|
|
224
|
-
auto_report_use_llm_evals=False,
|
|
225
|
-
wait=False,
|
|
226
|
-
)
|
|
227
|
-
|
|
228
|
-
run_id = result.run_id
|
|
229
|
-
|
|
230
|
-
# Get results immediately (might still be processing)
|
|
231
|
-
initial_result = await engine.get_results(run_id)
|
|
232
|
-
assert initial_result is not None, "Should get initial result"
|
|
233
|
-
assert initial_result.run_id == run_id, "Run ID should match"
|
|
234
|
-
|
|
235
|
-
# Wait for completion
|
|
236
|
-
final_result = await engine.wait_for_completion(run_id, timeout=600)
|
|
237
|
-
|
|
238
|
-
# Verify final results
|
|
239
|
-
assert final_result.status == "completed", "Run should complete"
|
|
240
|
-
assert final_result.patterns is not None, "Patterns should be available"
|
|
241
|
-
assert len(final_result.patterns) > 0, "Should find patterns"
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|