discovery-engine-api 0.1.9__py3-none-any.whl → 0.1.24__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
discovery/__init__.py CHANGED
@@ -1,6 +1,6 @@
1
1
  """Discovery Engine Python SDK."""
2
2
 
3
- __version__ = "0.1.9"
3
+ __version__ = "0.1.24"
4
4
 
5
5
  from discovery.client import Engine
6
6
  from discovery.types import (
discovery/client.py CHANGED
@@ -103,8 +103,21 @@ class Engine:
103
103
  return self._client
104
104
 
105
105
  async def _get_client_with_org(self) -> httpx.AsyncClient:
106
- """Get HTTP client (no longer needs org header for dashboard API)."""
107
- return await self._get_client()
106
+ """
107
+ Get HTTP client with organization header set.
108
+
109
+ The organization ID is required for API requests to identify which
110
+ organization the user belongs to (multi-tenancy support).
111
+ """
112
+ client = await self._get_client()
113
+
114
+ # Ensure we have an organization ID
115
+ org_id = await self._ensure_organization_id()
116
+
117
+ # Set the organization header
118
+ client.headers["X-Organization-ID"] = org_id
119
+
120
+ return client
108
121
 
109
122
  async def close(self):
110
123
  """Close the HTTP client."""
@@ -124,18 +137,24 @@ class Engine:
124
137
  """
125
138
  Get the organizations you belong to.
126
139
 
127
- Note: This is no longer needed for the simplified SDK workflow,
128
- but kept for backwards compatibility.
129
-
130
140
  Returns:
131
141
  List of organizations with id, name, and slug
132
142
 
133
143
  Raises:
134
144
  ValueError: If the API request fails
135
145
  """
136
- # Organizations are handled automatically by the dashboard API
137
- # Return empty list for now - not needed for report creation
138
- return []
146
+ client = await self._get_client()
147
+
148
+ try:
149
+ response = await client.get("/v1/me/organizations")
150
+ response.raise_for_status()
151
+ return response.json()
152
+ except httpx.HTTPStatusError as e:
153
+ raise ValueError(
154
+ f"Failed to fetch organizations: {e.response.status_code} {e.response.text}"
155
+ ) from e
156
+ except httpx.RequestError as e:
157
+ raise ValueError(f"Failed to connect to API: {str(e)}") from e
139
158
 
140
159
  async def upload_file(
141
160
  self, file: Union[str, Path, "pd.DataFrame"], filename: Optional[str] = None
@@ -346,7 +365,7 @@ class Engine:
346
365
  Returns:
347
366
  EngineResult with complete analysis data
348
367
  """
349
- client = await self._get_client()
368
+ client = await self._get_client_with_org()
350
369
 
351
370
  # Call dashboard API for results
352
371
  response = await client.get(f"/api/runs/{run_id}/results")
@@ -443,6 +462,7 @@ class Engine:
443
462
  title: Optional[str] = None,
444
463
  description: Optional[str] = None,
445
464
  column_descriptions: Optional[Dict[str, str]] = None,
465
+ excluded_columns: Optional[List[str]] = None,
446
466
  task: Optional[str] = None,
447
467
  visibility: str = "public",
448
468
  timeseries_groups: Optional[List[Dict[str, Any]]] = None,
@@ -467,6 +487,7 @@ class Engine:
467
487
  title: Optional dataset title
468
488
  description: Optional dataset description
469
489
  column_descriptions: Optional dict mapping column names to descriptions
490
+ excluded_columns: Optional list of column names to exclude from analysis
470
491
  task: Task type (regression, binary, multiclass) - auto-detected if None
471
492
  visibility: Dataset visibility ("public" or "private", default: "public")
472
493
  timeseries_groups: Optional list of timeseries column groups
@@ -480,7 +501,7 @@ class Engine:
480
501
  Returns:
481
502
  EngineResult with run_id and (if wait=True) complete results
482
503
  """
483
- client = await self._get_client()
504
+ client = await self._get_client_with_org()
484
505
 
485
506
  # Prepare file for upload
486
507
  if pd is not None and isinstance(file, pd.DataFrame):
@@ -526,6 +547,8 @@ class Engine:
526
547
  data["source_url"] = source_url
527
548
  if column_descriptions:
528
549
  data["column_descriptions"] = json.dumps(column_descriptions)
550
+ if excluded_columns:
551
+ data["excluded_columns"] = json.dumps(excluded_columns)
529
552
  if timeseries_groups:
530
553
  data["timeseries_groups"] = json.dumps(timeseries_groups)
531
554
 
@@ -588,6 +611,7 @@ class Engine:
588
611
  title: Optional[str] = None,
589
612
  description: Optional[str] = None,
590
613
  column_descriptions: Optional[Dict[str, str]] = None,
614
+ excluded_columns: Optional[List[str]] = None,
591
615
  task: Optional[str] = None,
592
616
  visibility: str = "public",
593
617
  timeseries_groups: Optional[List[Dict[str, Any]]] = None,
@@ -611,6 +635,7 @@ class Engine:
611
635
  title: Optional dataset title
612
636
  description: Optional dataset description
613
637
  column_descriptions: Optional dict mapping column names to descriptions
638
+ excluded_columns: Optional list of column names to exclude from analysis
614
639
  task: Task type (regression, binary_classification, multiclass_classification) - auto-detected if None
615
640
  visibility: Dataset visibility ("public" or "private", default: "public")
616
641
  timeseries_groups: Optional list of timeseries column groups
@@ -632,6 +657,7 @@ class Engine:
632
657
  title=title,
633
658
  description=description,
634
659
  column_descriptions=column_descriptions,
660
+ excluded_columns=excluded_columns,
635
661
  task=task,
636
662
  visibility=visibility,
637
663
  timeseries_groups=timeseries_groups,
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: discovery-engine-api
3
- Version: 0.1.9
3
+ Version: 0.1.24
4
4
  Summary: Python SDK for the Discovery Engine API
5
5
  Project-URL: Homepage, https://github.com/leap-laboratories/discovery
6
6
  Project-URL: Documentation, https://github.com/leap-laboratories/discovery
@@ -75,6 +75,7 @@ result = engine.run(
75
75
  target_column="diagnosis",
76
76
  mode="fast",
77
77
  description="Rare diseases dataset",
78
+ excluded_columns=["patient_id"], # Exclude ID column from analysis
78
79
  wait=True # Wait for completion and return full results
79
80
  )
80
81
 
@@ -103,6 +104,7 @@ result = engine.run(
103
104
  "age": "Patient age in years",
104
105
  "heart rate": None
105
106
  },
107
+ excluded_columns=["id", "timestamp"], # Exclude ID and timestamp columns from analysis
106
108
  wait=True
107
109
  )
108
110
  ```
@@ -172,6 +174,7 @@ The `run()` and `run_async()` methods accept the following parameters:
172
174
  | `title` | `str` | `None` | Optional dataset title |
173
175
  | `description` | `str` | `None` | Optional dataset description |
174
176
  | `column_descriptions` | `Dict[str, str]` | `None` | Optional column name -> description mapping |
177
+ | `excluded_columns` | `List[str]` | `None` | Optional list of column names to exclude from analysis (e.g., IDs, timestamps) |
175
178
  | `visibility` | `"public"` / `"private"` | `"public"` | Dataset visibility (private requires credits) |
176
179
  | `auto_report_use_llm_evals` | `bool` | `True` | Use LLM for pattern descriptions |
177
180
  | `author` | `str` | `None` | Optional dataset author attribution |
@@ -185,7 +188,7 @@ The `run()` and `run_async()` methods accept the following parameters:
185
188
  - **Public datasets**: Free (0 credits required)
186
189
  - **Private datasets**:
187
190
  - Fast mode: 1 credit per MB
188
- - Deep mode: 3 credits per MB
191
+ - Deep mode: (number of columns × 0.1) credits per MB (minimum 3× fast mode cost)
189
192
 
190
193
  If you don't have enough credits for a private run, the SDK will raise an `httpx.HTTPStatusError` with an error message like:
191
194
  ```
@@ -0,0 +1,6 @@
1
+ discovery/__init__.py,sha256=9YuAELQB1dqkdnYZXdgB-qt3TI7pMaG-J1Y4iJ4EUZA,586
2
+ discovery/client.py,sha256=Io1vGqiEGrPPwOFxBDMaFbKlARj_Xg0izyWoRmoQy-c,30802
3
+ discovery/types.py,sha256=4Z3gKdxWnOpymEjBGCzAeUGjwRT2A0aCpmuwctbE4w0,6008
4
+ discovery_engine_api-0.1.24.dist-info/METADATA,sha256=6Tmt9cCbe_s-P8Id_zS5o7mac706Mb5d9h6g1EzuBYA,12867
5
+ discovery_engine_api-0.1.24.dist-info/WHEEL,sha256=WLgqFyCfm_KASv4WHyYy0P3pM_m7J5L9k2skdKLirC8,87
6
+ discovery_engine_api-0.1.24.dist-info/RECORD,,
@@ -1,6 +0,0 @@
1
- discovery/__init__.py,sha256=MKz5NmDXau4PIU5e_sQmEboCag5L3bAG-1b6HANwlpQ,585
2
- discovery/client.py,sha256=i2WX59IgZ-olmn96oAQOgeObeqP0C1EjxewGLlQLcO0,29784
3
- discovery/types.py,sha256=4Z3gKdxWnOpymEjBGCzAeUGjwRT2A0aCpmuwctbE4w0,6008
4
- discovery_engine_api-0.1.9.dist-info/METADATA,sha256=21I33FLHiGKysw-iGguSmreZLMN1P5de5ziFvU24f8Q,12521
5
- discovery_engine_api-0.1.9.dist-info/WHEEL,sha256=WLgqFyCfm_KASv4WHyYy0P3pM_m7J5L9k2skdKLirC8,87
6
- discovery_engine_api-0.1.9.dist-info/RECORD,,