kumoai 2.8.0.dev202508221830__cp312-cp312-win_amd64.whl → 2.13.0.dev202512041141__cp312-cp312-win_amd64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of kumoai might be problematic. Click here for more details.
- kumoai/__init__.py +22 -11
- kumoai/_version.py +1 -1
- kumoai/client/client.py +17 -16
- kumoai/client/endpoints.py +1 -0
- kumoai/client/rfm.py +37 -8
- kumoai/connector/file_upload_connector.py +94 -85
- kumoai/connector/utils.py +1399 -210
- kumoai/experimental/rfm/__init__.py +164 -46
- kumoai/experimental/rfm/authenticate.py +8 -5
- kumoai/experimental/rfm/backend/__init__.py +0 -0
- kumoai/experimental/rfm/backend/local/__init__.py +38 -0
- kumoai/experimental/rfm/backend/local/table.py +109 -0
- kumoai/experimental/rfm/backend/snow/__init__.py +35 -0
- kumoai/experimental/rfm/backend/snow/table.py +117 -0
- kumoai/experimental/rfm/backend/sqlite/__init__.py +30 -0
- kumoai/experimental/rfm/backend/sqlite/table.py +101 -0
- kumoai/experimental/rfm/base/__init__.py +10 -0
- kumoai/experimental/rfm/base/column.py +66 -0
- kumoai/experimental/rfm/base/source.py +18 -0
- kumoai/experimental/rfm/base/table.py +545 -0
- kumoai/experimental/rfm/{local_graph.py → graph.py} +413 -144
- kumoai/experimental/rfm/infer/__init__.py +6 -0
- kumoai/experimental/rfm/infer/dtype.py +79 -0
- kumoai/experimental/rfm/infer/pkey.py +126 -0
- kumoai/experimental/rfm/infer/time_col.py +62 -0
- kumoai/experimental/rfm/infer/timestamp.py +7 -4
- kumoai/experimental/rfm/local_graph_sampler.py +58 -11
- kumoai/experimental/rfm/local_graph_store.py +45 -37
- kumoai/experimental/rfm/local_pquery_driver.py +342 -46
- kumoai/experimental/rfm/pquery/__init__.py +4 -4
- kumoai/experimental/rfm/pquery/{backend.py → executor.py} +28 -58
- kumoai/experimental/rfm/pquery/pandas_executor.py +532 -0
- kumoai/experimental/rfm/rfm.py +559 -148
- kumoai/experimental/rfm/sagemaker.py +138 -0
- kumoai/jobs.py +27 -1
- kumoai/kumolib.cp312-win_amd64.pyd +0 -0
- kumoai/pquery/prediction_table.py +5 -3
- kumoai/pquery/training_table.py +5 -3
- kumoai/spcs.py +1 -3
- kumoai/testing/decorators.py +1 -1
- kumoai/trainer/job.py +9 -30
- kumoai/trainer/trainer.py +19 -10
- kumoai/utils/__init__.py +2 -1
- kumoai/utils/progress_logger.py +96 -16
- {kumoai-2.8.0.dev202508221830.dist-info → kumoai-2.13.0.dev202512041141.dist-info}/METADATA +14 -5
- {kumoai-2.8.0.dev202508221830.dist-info → kumoai-2.13.0.dev202512041141.dist-info}/RECORD +49 -36
- kumoai/experimental/rfm/local_table.py +0 -448
- kumoai/experimental/rfm/pquery/pandas_backend.py +0 -437
- kumoai/experimental/rfm/utils.py +0 -347
- {kumoai-2.8.0.dev202508221830.dist-info → kumoai-2.13.0.dev202512041141.dist-info}/WHEEL +0 -0
- {kumoai-2.8.0.dev202508221830.dist-info → kumoai-2.13.0.dev202512041141.dist-info}/licenses/LICENSE +0 -0
- {kumoai-2.8.0.dev202508221830.dist-info → kumoai-2.13.0.dev202512041141.dist-info}/top_level.txt +0 -0
kumoai/__init__.py
CHANGED
|
@@ -184,15 +184,12 @@ def init(
|
|
|
184
184
|
snowflake_credentials
|
|
185
185
|
) if not api_key and snowflake_credentials else None
|
|
186
186
|
client = KumoClient(url=url, api_key=api_key, spcs_token=spcs_token)
|
|
187
|
-
|
|
188
|
-
|
|
189
|
-
|
|
190
|
-
|
|
191
|
-
|
|
192
|
-
|
|
193
|
-
else:
|
|
194
|
-
raise ValueError("Client authentication failed. Please check if you "
|
|
195
|
-
"have a valid API key.")
|
|
187
|
+
client.authenticate()
|
|
188
|
+
global_state._url = client._url
|
|
189
|
+
global_state._api_key = client._api_key
|
|
190
|
+
global_state._snowflake_credentials = snowflake_credentials
|
|
191
|
+
global_state._spcs_token = client._spcs_token
|
|
192
|
+
global_state._snowpark_session = snowpark_session
|
|
196
193
|
|
|
197
194
|
if not api_key and snowflake_credentials:
|
|
198
195
|
# Refresh token every 10 minutes (expires in 1 hour):
|
|
@@ -200,9 +197,11 @@ def init(
|
|
|
200
197
|
|
|
201
198
|
logger = logging.getLogger('kumoai')
|
|
202
199
|
log_level = logging.getLevelName(logger.getEffectiveLevel())
|
|
200
|
+
|
|
203
201
|
logger.info(
|
|
204
|
-
"Successfully initialized the Kumo SDK
|
|
205
|
-
"
|
|
202
|
+
f"Successfully initialized the Kumo SDK (version {__version__}) "
|
|
203
|
+
f"against deployment {url}, with "
|
|
204
|
+
f"log level {log_level}.")
|
|
206
205
|
|
|
207
206
|
|
|
208
207
|
def set_log_level(level: str) -> None:
|
|
@@ -281,7 +280,19 @@ __all__ = [
|
|
|
281
280
|
]
|
|
282
281
|
|
|
283
282
|
|
|
283
|
+
def in_snowflake_notebook() -> bool:
|
|
284
|
+
try:
|
|
285
|
+
from snowflake.snowpark.context import get_active_session
|
|
286
|
+
import streamlit # noqa: F401
|
|
287
|
+
get_active_session()
|
|
288
|
+
return True
|
|
289
|
+
except Exception:
|
|
290
|
+
return False
|
|
291
|
+
|
|
292
|
+
|
|
284
293
|
def in_notebook() -> bool:
|
|
294
|
+
if in_snowflake_notebook():
|
|
295
|
+
return True
|
|
285
296
|
try:
|
|
286
297
|
from IPython import get_ipython
|
|
287
298
|
shell = get_ipython()
|
kumoai/_version.py
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
__version__ = '2.
|
|
1
|
+
__version__ = '2.13.0.dev202512041141'
|
kumoai/client/client.py
CHANGED
|
@@ -20,7 +20,6 @@ if TYPE_CHECKING:
|
|
|
20
20
|
)
|
|
21
21
|
from kumoai.client.online import OnlineServingEndpointAPI
|
|
22
22
|
from kumoai.client.pquery import PQueryAPI
|
|
23
|
-
from kumoai.client.rfm import RFMAPI
|
|
24
23
|
from kumoai.client.source_table import SourceTableAPI
|
|
25
24
|
from kumoai.client.table import TableAPI
|
|
26
25
|
|
|
@@ -33,6 +32,7 @@ class KumoClient:
|
|
|
33
32
|
url: str,
|
|
34
33
|
api_key: Optional[str],
|
|
35
34
|
spcs_token: Optional[str] = None,
|
|
35
|
+
verify_ssl: bool = True,
|
|
36
36
|
) -> None:
|
|
37
37
|
r"""Creates a client against the Kumo public API, provided a URL of
|
|
38
38
|
the endpoint and an authentication token.
|
|
@@ -42,11 +42,14 @@ class KumoClient:
|
|
|
42
42
|
api_key: the public API authentication token.
|
|
43
43
|
spcs_token: the SPCS token used for authentication to access the
|
|
44
44
|
Kumo API endpoint.
|
|
45
|
+
verify_ssl: whether to verify SSL certificates. Set to False to
|
|
46
|
+
skip SSL certificate verification (equivalent to curl -k).
|
|
45
47
|
"""
|
|
46
48
|
self._url = url
|
|
47
49
|
self._api_url = f"{url}/{API_VERSION}"
|
|
48
50
|
self._api_key = api_key
|
|
49
51
|
self._spcs_token = spcs_token
|
|
52
|
+
self._verify_ssl = verify_ssl
|
|
50
53
|
|
|
51
54
|
retry_strategy = Retry(
|
|
52
55
|
total=10, # Maximum number of retries
|
|
@@ -69,11 +72,15 @@ class KumoClient:
|
|
|
69
72
|
self._session.headers.update(
|
|
70
73
|
{'Authorization': f'Snowflake Token={self._spcs_token}'})
|
|
71
74
|
|
|
72
|
-
def authenticate(self) ->
|
|
73
|
-
|
|
74
|
-
|
|
75
|
-
|
|
76
|
-
|
|
75
|
+
def authenticate(self) -> None:
|
|
76
|
+
"""Raises an exception if authentication fails."""
|
|
77
|
+
try:
|
|
78
|
+
self._session.get(self._url + '/v1/connectors',
|
|
79
|
+
verify=self._verify_ssl).raise_for_status()
|
|
80
|
+
except Exception:
|
|
81
|
+
raise ValueError(
|
|
82
|
+
"Client authentication failed. Please check if you "
|
|
83
|
+
"have a valid API key/credentials.")
|
|
77
84
|
|
|
78
85
|
def set_spcs_token(self, spcs_token: str) -> None:
|
|
79
86
|
r"""Sets the SPCS token for the client and updates the session
|
|
@@ -158,12 +165,6 @@ class KumoClient:
|
|
|
158
165
|
from kumoai.client.online import OnlineServingEndpointAPI
|
|
159
166
|
return OnlineServingEndpointAPI(self)
|
|
160
167
|
|
|
161
|
-
@property
|
|
162
|
-
def rfm_api(self) -> 'RFMAPI':
|
|
163
|
-
r"""Returns the typed RFM API."""
|
|
164
|
-
from kumoai.client.rfm import RFMAPI
|
|
165
|
-
return RFMAPI(self)
|
|
166
|
-
|
|
167
168
|
def _request(self, endpoint: Endpoint, **kwargs: Any) -> requests.Response:
|
|
168
169
|
r"""Send a HTTP request to the specified endpoint."""
|
|
169
170
|
endpoint_str = endpoint.get_path()
|
|
@@ -184,7 +185,7 @@ class KumoClient:
|
|
|
184
185
|
:meth:`requests.Session.get`.
|
|
185
186
|
"""
|
|
186
187
|
url = self._format_endpoint_url(endpoint)
|
|
187
|
-
return self._session.get(url=url, **kwargs)
|
|
188
|
+
return self._session.get(url=url, verify=self._verify_ssl, **kwargs)
|
|
188
189
|
|
|
189
190
|
def _post(self, endpoint: str, **kwargs: Any) -> requests.Response:
|
|
190
191
|
r"""Send a POST request to the specified endpoint, with keyword
|
|
@@ -192,7 +193,7 @@ class KumoClient:
|
|
|
192
193
|
:meth:`requests.Session.post`.
|
|
193
194
|
"""
|
|
194
195
|
url = self._format_endpoint_url(endpoint)
|
|
195
|
-
return self._session.post(url=url, **kwargs)
|
|
196
|
+
return self._session.post(url=url, verify=self._verify_ssl, **kwargs)
|
|
196
197
|
|
|
197
198
|
def _patch(self, endpoint: str, **kwargs: Any) -> requests.Response:
|
|
198
199
|
r"""Send a PATCH request to the specified endpoint, with keyword
|
|
@@ -200,7 +201,7 @@ class KumoClient:
|
|
|
200
201
|
:meth:`requests.Session.patch`.
|
|
201
202
|
"""
|
|
202
203
|
url = self._format_endpoint_url(endpoint)
|
|
203
|
-
return self._session.patch(url=url, **kwargs)
|
|
204
|
+
return self._session.patch(url=url, verify=self._verify_ssl, **kwargs)
|
|
204
205
|
|
|
205
206
|
def _delete(self, endpoint: str, **kwargs: Any) -> requests.Response:
|
|
206
207
|
r"""Send a DELETE request to the specified endpoint, with keyword
|
|
@@ -208,7 +209,7 @@ class KumoClient:
|
|
|
208
209
|
:meth:`requests.Session.delete`.
|
|
209
210
|
"""
|
|
210
211
|
url = self._format_endpoint_url(endpoint)
|
|
211
|
-
return self._session.delete(url=url, **kwargs)
|
|
212
|
+
return self._session.delete(url=url, verify=self._verify_ssl, **kwargs)
|
|
212
213
|
|
|
213
214
|
def _format_endpoint_url(self, endpoint: str) -> str:
|
|
214
215
|
if endpoint[0] == "/":
|
kumoai/client/endpoints.py
CHANGED
|
@@ -147,3 +147,4 @@ class RFMEndpoints:
|
|
|
147
147
|
explain = Endpoint(f"{BASE}/explain", HTTPMethod.POST)
|
|
148
148
|
evaluate = Endpoint(f"{BASE}/evaluate", HTTPMethod.POST)
|
|
149
149
|
validate_query = Endpoint(f"{BASE}/validate_query", HTTPMethod.POST)
|
|
150
|
+
parse_query = Endpoint(f"{BASE}/parse_query", HTTPMethod.POST)
|
kumoai/client/rfm.py
CHANGED
|
@@ -1,6 +1,11 @@
|
|
|
1
|
+
from typing import Any
|
|
2
|
+
|
|
1
3
|
from kumoapi.json_serde import to_json_dict
|
|
2
4
|
from kumoapi.rfm import (
|
|
3
5
|
RFMEvaluateResponse,
|
|
6
|
+
RFMExplanationResponse,
|
|
7
|
+
RFMParseQueryRequest,
|
|
8
|
+
RFMParseQueryResponse,
|
|
4
9
|
RFMPredictResponse,
|
|
5
10
|
RFMValidateQueryRequest,
|
|
6
11
|
RFMValidateQueryResponse,
|
|
@@ -25,28 +30,35 @@ class RFMAPI:
|
|
|
25
30
|
Returns:
|
|
26
31
|
RFMPredictResponse containing the predictions
|
|
27
32
|
"""
|
|
28
|
-
# Send binary data to the predict endpoint
|
|
29
33
|
response = self._client._request(
|
|
30
|
-
RFMEndpoints.predict,
|
|
31
|
-
|
|
34
|
+
RFMEndpoints.predict,
|
|
35
|
+
data=request,
|
|
36
|
+
headers={'Content-Type': 'application/x-protobuf'},
|
|
37
|
+
)
|
|
32
38
|
raise_on_error(response)
|
|
33
39
|
return parse_response(RFMPredictResponse, response)
|
|
34
40
|
|
|
35
|
-
def explain(
|
|
41
|
+
def explain(
|
|
42
|
+
self,
|
|
43
|
+
request: bytes,
|
|
44
|
+
skip_summary: bool = False,
|
|
45
|
+
) -> RFMExplanationResponse:
|
|
36
46
|
"""Explain the RFM model on the given context.
|
|
37
47
|
|
|
38
48
|
Args:
|
|
39
49
|
request: The predict request as serialized protobuf.
|
|
50
|
+
skip_summary: Whether to skip generating a human-readable summary
|
|
51
|
+
of the explanation.
|
|
40
52
|
|
|
41
53
|
Returns:
|
|
42
54
|
RFMPredictResponse containing the explanations
|
|
43
55
|
"""
|
|
44
|
-
|
|
56
|
+
params: dict[str, Any] = {'generate_summary': not skip_summary}
|
|
45
57
|
response = self._client._request(
|
|
46
|
-
RFMEndpoints.explain, data=request,
|
|
58
|
+
RFMEndpoints.explain, data=request, params=params,
|
|
47
59
|
headers={'Content-Type': 'application/x-protobuf'})
|
|
48
60
|
raise_on_error(response)
|
|
49
|
-
return parse_response(
|
|
61
|
+
return parse_response(RFMExplanationResponse, response)
|
|
50
62
|
|
|
51
63
|
def evaluate(self, request: bytes) -> RFMEvaluateResponse:
|
|
52
64
|
"""Evaluate the RFM model on the given context.
|
|
@@ -57,7 +69,6 @@ class RFMAPI:
|
|
|
57
69
|
Returns:
|
|
58
70
|
RFMEvaluateResponse containing the computed metrics
|
|
59
71
|
"""
|
|
60
|
-
# Send binary data to the evaluate endpoint
|
|
61
72
|
response = self._client._request(
|
|
62
73
|
RFMEndpoints.evaluate, data=request,
|
|
63
74
|
headers={'Content-Type': 'application/x-protobuf'})
|
|
@@ -81,3 +92,21 @@ class RFMAPI:
|
|
|
81
92
|
json=to_json_dict(request))
|
|
82
93
|
raise_on_error(response)
|
|
83
94
|
return parse_response(RFMValidateQueryResponse, response)
|
|
95
|
+
|
|
96
|
+
def parse_query(
|
|
97
|
+
self,
|
|
98
|
+
request: RFMParseQueryRequest,
|
|
99
|
+
) -> RFMParseQueryResponse:
|
|
100
|
+
"""Validate a predictive query against a graph.
|
|
101
|
+
|
|
102
|
+
Args:
|
|
103
|
+
request: The request object containing
|
|
104
|
+
the query and graph definition
|
|
105
|
+
|
|
106
|
+
Returns:
|
|
107
|
+
RFMParseQueryResponse containing the QueryDefinition
|
|
108
|
+
"""
|
|
109
|
+
response = self._client._request(RFMEndpoints.parse_query,
|
|
110
|
+
json=to_json_dict(request))
|
|
111
|
+
raise_on_error(response)
|
|
112
|
+
return parse_response(RFMParseQueryResponse, response)
|
|
@@ -1,4 +1,3 @@
|
|
|
1
|
-
import os
|
|
2
1
|
from typing import List
|
|
3
2
|
|
|
4
3
|
from kumoapi.source_table import (
|
|
@@ -12,14 +11,7 @@ from typing_extensions import override
|
|
|
12
11
|
|
|
13
12
|
from kumoai import global_state
|
|
14
13
|
from kumoai.connector.base import Connector
|
|
15
|
-
from kumoai.connector.utils import
|
|
16
|
-
MAX_PARTITION_SIZE,
|
|
17
|
-
MIN_PARTITION_SIZE,
|
|
18
|
-
_upload_partitioned_csv,
|
|
19
|
-
_upload_partitioned_parquet,
|
|
20
|
-
_upload_single_file,
|
|
21
|
-
logger,
|
|
22
|
-
)
|
|
14
|
+
from kumoai.connector.utils import delete_uploaded_table, upload_table
|
|
23
15
|
|
|
24
16
|
|
|
25
17
|
class FileUploadConnector(Connector):
|
|
@@ -59,7 +51,6 @@ class FileUploadConnector(Connector):
|
|
|
59
51
|
def name(self) -> str:
|
|
60
52
|
return f'{self._file_type}_upload_connector'
|
|
61
53
|
|
|
62
|
-
@override
|
|
63
54
|
@property
|
|
64
55
|
def source_type(self) -> DataSourceType:
|
|
65
56
|
return DataSourceType.S3
|
|
@@ -89,92 +80,110 @@ class FileUploadConnector(Connector):
|
|
|
89
80
|
auto_partition: bool = True,
|
|
90
81
|
partition_size_mb: int = 250,
|
|
91
82
|
) -> None:
|
|
92
|
-
r"""
|
|
93
|
-
|
|
94
|
-
|
|
95
|
-
|
|
96
|
-
this
|
|
97
|
-
|
|
98
|
-
|
|
99
|
-
|
|
100
|
-
|
|
101
|
-
|
|
83
|
+
r"""Upload a table to Kumo from a local or remote path.
|
|
84
|
+
|
|
85
|
+
Supports ``s3://``, ``gs://``, ``abfs://``, ``abfss://``, and ``az://``
|
|
86
|
+
|
|
87
|
+
Tables uploaded this way can be accessed from this
|
|
88
|
+
``FileUploadConnector`` using the provided name, e.g.,
|
|
89
|
+
``connector_obj["my_table"]``.
|
|
90
|
+
|
|
91
|
+
Local files
|
|
92
|
+
-----------
|
|
93
|
+
- Accepts one ``.parquet`` or ``.csv`` file (must match this
|
|
94
|
+
connector’s ``file_type``).
|
|
95
|
+
- If the file is > 1 GiB and ``auto_partition=True``, it is split
|
|
96
|
+
into ~``partition_size_mb`` MiB parts and uploaded under a common
|
|
97
|
+
prefix so the connector can read them as one table.
|
|
98
|
+
|
|
99
|
+
Remote paths
|
|
100
|
+
------------
|
|
101
|
+
- **Single file** (``.parquet``/``.csv``): validated and uploaded via
|
|
102
|
+
multipart PUT. Files > 1 GiB are rejected — re-shard to ~200 MiB
|
|
103
|
+
and upload the directory instead.
|
|
104
|
+
- **Directory**: must contain only one format (all Parquet or all CSV)
|
|
105
|
+
matching this connector’s ``file_type``. Files are validated
|
|
106
|
+
(consistent schema; CSV headers sanitized) and uploaded in parallel
|
|
107
|
+
with memory-safe budgeting.
|
|
102
108
|
|
|
103
109
|
.. warning::
|
|
104
|
-
|
|
105
|
-
|
|
106
|
-
|
|
110
|
+
For local uploads, input must be a single CSV or Parquet file
|
|
111
|
+
(matching the connector type). For remote uploads, mixed
|
|
112
|
+
CSV/Parquet directories are not supported. Remote single files
|
|
113
|
+
larger than 1 GiB are not supported.
|
|
107
114
|
|
|
115
|
+
Examples:
|
|
116
|
+
---------
|
|
108
117
|
.. code-block:: python
|
|
109
118
|
|
|
110
119
|
import kumoai
|
|
111
|
-
|
|
120
|
+
conn = kumoai.FileUploadConnector(file_type="parquet")
|
|
121
|
+
|
|
122
|
+
# Local: small file
|
|
123
|
+
conn.upload(name="users", path="/data/users.parquet")
|
|
124
|
+
|
|
125
|
+
# Local: large file (auto-partitions)
|
|
126
|
+
conn.upload(
|
|
127
|
+
name="txns",
|
|
128
|
+
path="/data/large_txns.parquet",
|
|
129
|
+
)
|
|
130
|
+
|
|
131
|
+
# Local: disable auto-partitioning (raises if > 1 GiB)
|
|
132
|
+
conn.upload(
|
|
133
|
+
name="users",
|
|
134
|
+
path="/data/users.parquet",
|
|
135
|
+
auto_partition=False,
|
|
136
|
+
)
|
|
137
|
+
|
|
138
|
+
# CSV connector
|
|
139
|
+
csv_conn = kumoai.FileUploadConnector(file_type="csv")
|
|
140
|
+
csv_conn.upload(name="sales", path="/data/sales.csv")
|
|
112
141
|
|
|
113
|
-
#
|
|
114
|
-
|
|
142
|
+
# Remote: single file (<= 1 GiB)
|
|
143
|
+
conn.upload(name="logs", path="s3://bkt/path/logs.parquet")
|
|
115
144
|
|
|
116
|
-
#
|
|
117
|
-
|
|
118
|
-
path="/data/large_transactions.parquet")
|
|
145
|
+
# Remote: directory of shards (uniform format)
|
|
146
|
+
csv_conn.upload(name="events", path="gs://mybkt/events_csv/")
|
|
119
147
|
|
|
120
|
-
|
|
121
|
-
|
|
122
|
-
|
|
148
|
+
Args:
|
|
149
|
+
name:
|
|
150
|
+
Table name to create in Kumo; access later via this connector.
|
|
151
|
+
path:
|
|
152
|
+
Local path or remote URL to a ``.parquet``/``.csv`` file or a
|
|
153
|
+
directory (uniform format). The format must match this
|
|
154
|
+
connector’s ``file_type``.
|
|
155
|
+
auto_partition:
|
|
156
|
+
Local-only. If ``True`` and the local file is > 1 GiB, split
|
|
157
|
+
into ~``partition_size_mb`` MiB parts.
|
|
158
|
+
partition_size_mb:
|
|
159
|
+
Local-only. Target partition size (100–1000 MiB) when
|
|
160
|
+
``auto_partition`` is ``True``.
|
|
161
|
+
"""
|
|
162
|
+
upload_table(name=name, path=path, auto_partition=auto_partition,
|
|
163
|
+
partition_size_mb=partition_size_mb,
|
|
164
|
+
file_type=self._file_type)
|
|
165
|
+
|
|
166
|
+
def delete(
|
|
167
|
+
self,
|
|
168
|
+
name: str,
|
|
169
|
+
) -> None:
|
|
170
|
+
r"""Synchronously deletes a previously uploaded table from the Kumo
|
|
171
|
+
data plane.
|
|
123
172
|
|
|
124
|
-
|
|
125
|
-
connectorCSV = kumoai.FileUploadConnector(file_type="csv")
|
|
173
|
+
.. code-block:: python
|
|
126
174
|
|
|
127
|
-
#
|
|
128
|
-
|
|
175
|
+
# Assume we have uploaded a `.parquet` table named `users`, and a
|
|
176
|
+
# `FileUploadConnector` has been created called `connector`, and
|
|
177
|
+
# we want to delete this table from Kumo:
|
|
178
|
+
connector.delete(name="users")
|
|
129
179
|
|
|
130
180
|
Args:
|
|
131
|
-
name: The name of the table to be
|
|
132
|
-
|
|
133
|
-
:
|
|
134
|
-
path: The full path of the table to be uploaded, on the local
|
|
135
|
-
machine. File Type must match the connector type.
|
|
136
|
-
auto_partition: Whether to automatically
|
|
137
|
-
partition large files (>1GB).
|
|
138
|
-
If False and file is >1GB, raises ValueError. Supports both
|
|
139
|
-
Parquet and CSV files.
|
|
140
|
-
partition_size_mb: The size of each partition in MB. Only used if
|
|
141
|
-
auto_partition is True.
|
|
181
|
+
name: The name of the table to be deleted. This table must have
|
|
182
|
+
previously been uploaded with a call to
|
|
183
|
+
:meth:`~kumoai.connector.FileUploadConnector.upload`.
|
|
142
184
|
"""
|
|
143
|
-
|
|
144
|
-
|
|
145
|
-
|
|
146
|
-
|
|
147
|
-
|
|
148
|
-
# Validate file type
|
|
149
|
-
if not (path.endswith(".parquet") or path.endswith(".csv")):
|
|
150
|
-
raise ValueError(f"Path {path} must be either a CSV or Parquet "
|
|
151
|
-
f"file. Partitioned data is not currently "
|
|
152
|
-
f"supported.")
|
|
153
|
-
|
|
154
|
-
file_size = os.path.getsize(path)
|
|
155
|
-
|
|
156
|
-
# Route based on file size
|
|
157
|
-
if file_size < MAX_PARTITION_SIZE:
|
|
158
|
-
return _upload_single_file(name, path)
|
|
159
|
-
|
|
160
|
-
if not auto_partition:
|
|
161
|
-
raise ValueError(f"File {path} is {file_size / (1024**3):.2f}GB, "
|
|
162
|
-
f"which exceeds the 1GB limit. Enable "
|
|
163
|
-
f"auto_partition=True to automatically partition "
|
|
164
|
-
f"large files.")
|
|
165
|
-
|
|
166
|
-
# Partition and upload large files
|
|
167
|
-
partition_size = partition_size_mb * 1024**2
|
|
168
|
-
if (partition_size > MAX_PARTITION_SIZE
|
|
169
|
-
or partition_size < MIN_PARTITION_SIZE):
|
|
170
|
-
raise ValueError(f"Partition size {partition_size_mb}MB must be "
|
|
171
|
-
f"between {MIN_PARTITION_SIZE / 1024**2}MB and "
|
|
172
|
-
f"{MAX_PARTITION_SIZE / 1024**2}MB.")
|
|
173
|
-
|
|
174
|
-
logger.info(
|
|
175
|
-
"File %s is large with size %s, partitioning for upload...", path,
|
|
176
|
-
file_size)
|
|
177
|
-
if path.endswith('.parquet'):
|
|
178
|
-
_upload_partitioned_parquet(name, path, partition_size)
|
|
179
|
-
else:
|
|
180
|
-
_upload_partitioned_csv(name, path, partition_size)
|
|
185
|
+
if not self.has_table(name):
|
|
186
|
+
raise ValueError(f"The table '{name}' does not exist in {self}. "
|
|
187
|
+
f"Please check the existence of the source data.")
|
|
188
|
+
|
|
189
|
+
delete_uploaded_table(name, self._file_type)
|