kumoai 2.9.0.dev202509081831__cp312-cp312-win_amd64.whl → 2.13.0.dev202511201731__cp312-cp312-win_amd64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- kumoai/__init__.py +10 -11
- kumoai/_version.py +1 -1
- kumoai/client/client.py +17 -16
- kumoai/client/endpoints.py +1 -0
- kumoai/client/rfm.py +37 -8
- kumoai/connector/file_upload_connector.py +71 -102
- kumoai/connector/utils.py +1367 -236
- kumoai/experimental/rfm/__init__.py +153 -10
- kumoai/experimental/rfm/authenticate.py +8 -5
- kumoai/experimental/rfm/infer/timestamp.py +7 -4
- kumoai/experimental/rfm/local_graph.py +90 -80
- kumoai/experimental/rfm/local_graph_sampler.py +16 -10
- kumoai/experimental/rfm/local_graph_store.py +22 -6
- kumoai/experimental/rfm/local_pquery_driver.py +336 -42
- kumoai/experimental/rfm/local_table.py +100 -22
- kumoai/experimental/rfm/pquery/__init__.py +4 -4
- kumoai/experimental/rfm/pquery/{backend.py → executor.py} +24 -58
- kumoai/experimental/rfm/pquery/{pandas_backend.py → pandas_executor.py} +278 -222
- kumoai/experimental/rfm/rfm.py +523 -124
- kumoai/experimental/rfm/sagemaker.py +130 -0
- kumoai/jobs.py +1 -0
- kumoai/kumolib.cp312-win_amd64.pyd +0 -0
- kumoai/spcs.py +1 -3
- kumoai/trainer/trainer.py +19 -10
- kumoai/utils/progress_logger.py +68 -0
- {kumoai-2.9.0.dev202509081831.dist-info → kumoai-2.13.0.dev202511201731.dist-info}/METADATA +13 -5
- {kumoai-2.9.0.dev202509081831.dist-info → kumoai-2.13.0.dev202511201731.dist-info}/RECORD +30 -29
- {kumoai-2.9.0.dev202509081831.dist-info → kumoai-2.13.0.dev202511201731.dist-info}/WHEEL +0 -0
- {kumoai-2.9.0.dev202509081831.dist-info → kumoai-2.13.0.dev202511201731.dist-info}/licenses/LICENSE +0 -0
- {kumoai-2.9.0.dev202509081831.dist-info → kumoai-2.13.0.dev202511201731.dist-info}/top_level.txt +0 -0
kumoai/__init__.py
CHANGED
|
@@ -184,15 +184,12 @@ def init(
|
|
|
184
184
|
snowflake_credentials
|
|
185
185
|
) if not api_key and snowflake_credentials else None
|
|
186
186
|
client = KumoClient(url=url, api_key=api_key, spcs_token=spcs_token)
|
|
187
|
-
|
|
188
|
-
|
|
189
|
-
|
|
190
|
-
|
|
191
|
-
|
|
192
|
-
|
|
193
|
-
else:
|
|
194
|
-
raise ValueError("Client authentication failed. Please check if you "
|
|
195
|
-
"have a valid API key.")
|
|
187
|
+
client.authenticate()
|
|
188
|
+
global_state._url = client._url
|
|
189
|
+
global_state._api_key = client._api_key
|
|
190
|
+
global_state._snowflake_credentials = snowflake_credentials
|
|
191
|
+
global_state._spcs_token = client._spcs_token
|
|
192
|
+
global_state._snowpark_session = snowpark_session
|
|
196
193
|
|
|
197
194
|
if not api_key and snowflake_credentials:
|
|
198
195
|
# Refresh token every 10 minutes (expires in 1 hour):
|
|
@@ -200,9 +197,11 @@ def init(
|
|
|
200
197
|
|
|
201
198
|
logger = logging.getLogger('kumoai')
|
|
202
199
|
log_level = logging.getLevelName(logger.getEffectiveLevel())
|
|
200
|
+
|
|
203
201
|
logger.info(
|
|
204
|
-
"Successfully initialized the Kumo SDK
|
|
205
|
-
"
|
|
202
|
+
f"Successfully initialized the Kumo SDK (version {__version__}) "
|
|
203
|
+
f"against deployment {url}, with "
|
|
204
|
+
f"log level {log_level}.")
|
|
206
205
|
|
|
207
206
|
|
|
208
207
|
def set_log_level(level: str) -> None:
|
kumoai/_version.py
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
__version__ = '2.
|
|
1
|
+
__version__ = '2.13.0.dev202511201731'
|
kumoai/client/client.py
CHANGED
|
@@ -20,7 +20,6 @@ if TYPE_CHECKING:
|
|
|
20
20
|
)
|
|
21
21
|
from kumoai.client.online import OnlineServingEndpointAPI
|
|
22
22
|
from kumoai.client.pquery import PQueryAPI
|
|
23
|
-
from kumoai.client.rfm import RFMAPI
|
|
24
23
|
from kumoai.client.source_table import SourceTableAPI
|
|
25
24
|
from kumoai.client.table import TableAPI
|
|
26
25
|
|
|
@@ -33,6 +32,7 @@ class KumoClient:
|
|
|
33
32
|
url: str,
|
|
34
33
|
api_key: Optional[str],
|
|
35
34
|
spcs_token: Optional[str] = None,
|
|
35
|
+
verify_ssl: bool = True,
|
|
36
36
|
) -> None:
|
|
37
37
|
r"""Creates a client against the Kumo public API, provided a URL of
|
|
38
38
|
the endpoint and an authentication token.
|
|
@@ -42,11 +42,14 @@ class KumoClient:
|
|
|
42
42
|
api_key: the public API authentication token.
|
|
43
43
|
spcs_token: the SPCS token used for authentication to access the
|
|
44
44
|
Kumo API endpoint.
|
|
45
|
+
verify_ssl: whether to verify SSL certificates. Set to False to
|
|
46
|
+
skip SSL certificate verification (equivalent to curl -k).
|
|
45
47
|
"""
|
|
46
48
|
self._url = url
|
|
47
49
|
self._api_url = f"{url}/{API_VERSION}"
|
|
48
50
|
self._api_key = api_key
|
|
49
51
|
self._spcs_token = spcs_token
|
|
52
|
+
self._verify_ssl = verify_ssl
|
|
50
53
|
|
|
51
54
|
retry_strategy = Retry(
|
|
52
55
|
total=10, # Maximum number of retries
|
|
@@ -69,11 +72,15 @@ class KumoClient:
|
|
|
69
72
|
self._session.headers.update(
|
|
70
73
|
{'Authorization': f'Snowflake Token={self._spcs_token}'})
|
|
71
74
|
|
|
72
|
-
def authenticate(self) ->
|
|
73
|
-
|
|
74
|
-
|
|
75
|
-
|
|
76
|
-
|
|
75
|
+
def authenticate(self) -> None:
|
|
76
|
+
"""Raises an exception if authentication fails."""
|
|
77
|
+
try:
|
|
78
|
+
self._session.get(self._url + '/v1/connectors',
|
|
79
|
+
verify=self._verify_ssl).raise_for_status()
|
|
80
|
+
except Exception:
|
|
81
|
+
raise ValueError(
|
|
82
|
+
"Client authentication failed. Please check if you "
|
|
83
|
+
"have a valid API key/credentials.")
|
|
77
84
|
|
|
78
85
|
def set_spcs_token(self, spcs_token: str) -> None:
|
|
79
86
|
r"""Sets the SPCS token for the client and updates the session
|
|
@@ -158,12 +165,6 @@ class KumoClient:
|
|
|
158
165
|
from kumoai.client.online import OnlineServingEndpointAPI
|
|
159
166
|
return OnlineServingEndpointAPI(self)
|
|
160
167
|
|
|
161
|
-
@property
|
|
162
|
-
def rfm_api(self) -> 'RFMAPI':
|
|
163
|
-
r"""Returns the typed RFM API."""
|
|
164
|
-
from kumoai.client.rfm import RFMAPI
|
|
165
|
-
return RFMAPI(self)
|
|
166
|
-
|
|
167
168
|
def _request(self, endpoint: Endpoint, **kwargs: Any) -> requests.Response:
|
|
168
169
|
r"""Send a HTTP request to the specified endpoint."""
|
|
169
170
|
endpoint_str = endpoint.get_path()
|
|
@@ -184,7 +185,7 @@ class KumoClient:
|
|
|
184
185
|
:meth:`requests.Session.get`.
|
|
185
186
|
"""
|
|
186
187
|
url = self._format_endpoint_url(endpoint)
|
|
187
|
-
return self._session.get(url=url, **kwargs)
|
|
188
|
+
return self._session.get(url=url, verify=self._verify_ssl, **kwargs)
|
|
188
189
|
|
|
189
190
|
def _post(self, endpoint: str, **kwargs: Any) -> requests.Response:
|
|
190
191
|
r"""Send a POST request to the specified endpoint, with keyword
|
|
@@ -192,7 +193,7 @@ class KumoClient:
|
|
|
192
193
|
:meth:`requests.Session.post`.
|
|
193
194
|
"""
|
|
194
195
|
url = self._format_endpoint_url(endpoint)
|
|
195
|
-
return self._session.post(url=url, **kwargs)
|
|
196
|
+
return self._session.post(url=url, verify=self._verify_ssl, **kwargs)
|
|
196
197
|
|
|
197
198
|
def _patch(self, endpoint: str, **kwargs: Any) -> requests.Response:
|
|
198
199
|
r"""Send a PATCH request to the specified endpoint, with keyword
|
|
@@ -200,7 +201,7 @@ class KumoClient:
|
|
|
200
201
|
:meth:`requests.Session.patch`.
|
|
201
202
|
"""
|
|
202
203
|
url = self._format_endpoint_url(endpoint)
|
|
203
|
-
return self._session.patch(url=url, **kwargs)
|
|
204
|
+
return self._session.patch(url=url, verify=self._verify_ssl, **kwargs)
|
|
204
205
|
|
|
205
206
|
def _delete(self, endpoint: str, **kwargs: Any) -> requests.Response:
|
|
206
207
|
r"""Send a DELETE request to the specified endpoint, with keyword
|
|
@@ -208,7 +209,7 @@ class KumoClient:
|
|
|
208
209
|
:meth:`requests.Session.delete`.
|
|
209
210
|
"""
|
|
210
211
|
url = self._format_endpoint_url(endpoint)
|
|
211
|
-
return self._session.delete(url=url, **kwargs)
|
|
212
|
+
return self._session.delete(url=url, verify=self._verify_ssl, **kwargs)
|
|
212
213
|
|
|
213
214
|
def _format_endpoint_url(self, endpoint: str) -> str:
|
|
214
215
|
if endpoint[0] == "/":
|
kumoai/client/endpoints.py
CHANGED
|
@@ -147,3 +147,4 @@ class RFMEndpoints:
|
|
|
147
147
|
explain = Endpoint(f"{BASE}/explain", HTTPMethod.POST)
|
|
148
148
|
evaluate = Endpoint(f"{BASE}/evaluate", HTTPMethod.POST)
|
|
149
149
|
validate_query = Endpoint(f"{BASE}/validate_query", HTTPMethod.POST)
|
|
150
|
+
parse_query = Endpoint(f"{BASE}/parse_query", HTTPMethod.POST)
|
kumoai/client/rfm.py
CHANGED
|
@@ -1,6 +1,11 @@
|
|
|
1
|
+
from typing import Any
|
|
2
|
+
|
|
1
3
|
from kumoapi.json_serde import to_json_dict
|
|
2
4
|
from kumoapi.rfm import (
|
|
3
5
|
RFMEvaluateResponse,
|
|
6
|
+
RFMExplanationResponse,
|
|
7
|
+
RFMParseQueryRequest,
|
|
8
|
+
RFMParseQueryResponse,
|
|
4
9
|
RFMPredictResponse,
|
|
5
10
|
RFMValidateQueryRequest,
|
|
6
11
|
RFMValidateQueryResponse,
|
|
@@ -25,28 +30,35 @@ class RFMAPI:
|
|
|
25
30
|
Returns:
|
|
26
31
|
RFMPredictResponse containing the predictions
|
|
27
32
|
"""
|
|
28
|
-
# Send binary data to the predict endpoint
|
|
29
33
|
response = self._client._request(
|
|
30
|
-
RFMEndpoints.predict,
|
|
31
|
-
|
|
34
|
+
RFMEndpoints.predict,
|
|
35
|
+
data=request,
|
|
36
|
+
headers={'Content-Type': 'application/x-protobuf'},
|
|
37
|
+
)
|
|
32
38
|
raise_on_error(response)
|
|
33
39
|
return parse_response(RFMPredictResponse, response)
|
|
34
40
|
|
|
35
|
-
def explain(
|
|
41
|
+
def explain(
|
|
42
|
+
self,
|
|
43
|
+
request: bytes,
|
|
44
|
+
skip_summary: bool = False,
|
|
45
|
+
) -> RFMExplanationResponse:
|
|
36
46
|
"""Explain the RFM model on the given context.
|
|
37
47
|
|
|
38
48
|
Args:
|
|
39
49
|
request: The predict request as serialized protobuf.
|
|
50
|
+
skip_summary: Whether to skip generating a human-readable summary
|
|
51
|
+
of the explanation.
|
|
40
52
|
|
|
41
53
|
Returns:
|
|
42
54
|
RFMPredictResponse containing the explanations
|
|
43
55
|
"""
|
|
44
|
-
|
|
56
|
+
params: dict[str, Any] = {'generate_summary': not skip_summary}
|
|
45
57
|
response = self._client._request(
|
|
46
|
-
RFMEndpoints.explain, data=request,
|
|
58
|
+
RFMEndpoints.explain, data=request, params=params,
|
|
47
59
|
headers={'Content-Type': 'application/x-protobuf'})
|
|
48
60
|
raise_on_error(response)
|
|
49
|
-
return parse_response(
|
|
61
|
+
return parse_response(RFMExplanationResponse, response)
|
|
50
62
|
|
|
51
63
|
def evaluate(self, request: bytes) -> RFMEvaluateResponse:
|
|
52
64
|
"""Evaluate the RFM model on the given context.
|
|
@@ -57,7 +69,6 @@ class RFMAPI:
|
|
|
57
69
|
Returns:
|
|
58
70
|
RFMEvaluateResponse containing the computed metrics
|
|
59
71
|
"""
|
|
60
|
-
# Send binary data to the evaluate endpoint
|
|
61
72
|
response = self._client._request(
|
|
62
73
|
RFMEndpoints.evaluate, data=request,
|
|
63
74
|
headers={'Content-Type': 'application/x-protobuf'})
|
|
@@ -81,3 +92,21 @@ class RFMAPI:
|
|
|
81
92
|
json=to_json_dict(request))
|
|
82
93
|
raise_on_error(response)
|
|
83
94
|
return parse_response(RFMValidateQueryResponse, response)
|
|
95
|
+
|
|
96
|
+
def parse_query(
|
|
97
|
+
self,
|
|
98
|
+
request: RFMParseQueryRequest,
|
|
99
|
+
) -> RFMParseQueryResponse:
|
|
100
|
+
"""Validate a predictive query against a graph.
|
|
101
|
+
|
|
102
|
+
Args:
|
|
103
|
+
request: The request object containing
|
|
104
|
+
the query and graph definition
|
|
105
|
+
|
|
106
|
+
Returns:
|
|
107
|
+
RFMParseQueryResponse containing the QueryDefinition
|
|
108
|
+
"""
|
|
109
|
+
response = self._client._request(RFMEndpoints.parse_query,
|
|
110
|
+
json=to_json_dict(request))
|
|
111
|
+
raise_on_error(response)
|
|
112
|
+
return parse_response(RFMParseQueryResponse, response)
|
|
@@ -1,7 +1,5 @@
|
|
|
1
|
-
import os
|
|
2
1
|
from typing import List
|
|
3
2
|
|
|
4
|
-
from kumoapi.data_source import DeleteUploadedFileRequest
|
|
5
3
|
from kumoapi.source_table import (
|
|
6
4
|
DataSourceType,
|
|
7
5
|
FileType,
|
|
@@ -13,15 +11,7 @@ from typing_extensions import override
|
|
|
13
11
|
|
|
14
12
|
from kumoai import global_state
|
|
15
13
|
from kumoai.connector.base import Connector
|
|
16
|
-
from kumoai.connector.utils import
|
|
17
|
-
CONNECTOR_ID_MAP,
|
|
18
|
-
MAX_PARTITION_SIZE,
|
|
19
|
-
MIN_PARTITION_SIZE,
|
|
20
|
-
_upload_partitioned_csv,
|
|
21
|
-
_upload_partitioned_parquet,
|
|
22
|
-
_upload_single_file,
|
|
23
|
-
logger,
|
|
24
|
-
)
|
|
14
|
+
from kumoai.connector.utils import delete_uploaded_table, upload_table
|
|
25
15
|
|
|
26
16
|
|
|
27
17
|
class FileUploadConnector(Connector):
|
|
@@ -61,7 +51,6 @@ class FileUploadConnector(Connector):
|
|
|
61
51
|
def name(self) -> str:
|
|
62
52
|
return f'{self._file_type}_upload_connector'
|
|
63
53
|
|
|
64
|
-
@override
|
|
65
54
|
@property
|
|
66
55
|
def source_type(self) -> DataSourceType:
|
|
67
56
|
return DataSourceType.S3
|
|
@@ -91,100 +80,92 @@ class FileUploadConnector(Connector):
|
|
|
91
80
|
auto_partition: bool = True,
|
|
92
81
|
partition_size_mb: int = 250,
|
|
93
82
|
) -> None:
|
|
94
|
-
r"""
|
|
95
|
-
|
|
96
|
-
|
|
97
|
-
|
|
98
|
-
this
|
|
99
|
-
|
|
100
|
-
|
|
101
|
-
|
|
102
|
-
|
|
103
|
-
|
|
83
|
+
r"""Upload a table to Kumo from a local or remote path.
|
|
84
|
+
|
|
85
|
+
Supports ``s3://``, ``gs://``, ``abfs://``, ``abfss://``, and ``az://``
|
|
86
|
+
|
|
87
|
+
Tables uploaded this way can be accessed from this
|
|
88
|
+
``FileUploadConnector`` using the provided name, e.g.,
|
|
89
|
+
``connector_obj["my_table"]``.
|
|
90
|
+
|
|
91
|
+
Local files
|
|
92
|
+
-----------
|
|
93
|
+
- Accepts one ``.parquet`` or ``.csv`` file (must match this
|
|
94
|
+
connector’s ``file_type``).
|
|
95
|
+
- If the file is > 1 GiB and ``auto_partition=True``, it is split
|
|
96
|
+
into ~``partition_size_mb`` MiB parts and uploaded under a common
|
|
97
|
+
prefix so the connector can read them as one table.
|
|
98
|
+
|
|
99
|
+
Remote paths
|
|
100
|
+
------------
|
|
101
|
+
- **Single file** (``.parquet``/``.csv``): validated and uploaded via
|
|
102
|
+
multipart PUT. Files > 1 GiB are rejected — re-shard to ~200 MiB
|
|
103
|
+
and upload the directory instead.
|
|
104
|
+
- **Directory**: must contain only one format (all Parquet or all CSV)
|
|
105
|
+
matching this connector’s ``file_type``. Files are validated
|
|
106
|
+
(consistent schema; CSV headers sanitized) and uploaded in parallel
|
|
107
|
+
with memory-safe budgeting.
|
|
104
108
|
|
|
105
109
|
.. warning::
|
|
106
|
-
|
|
107
|
-
|
|
108
|
-
|
|
110
|
+
For local uploads, input must be a single CSV or Parquet file
|
|
111
|
+
(matching the connector type). For remote uploads, mixed
|
|
112
|
+
CSV/Parquet directories are not supported. Remote single files
|
|
113
|
+
larger than 1 GiB are not supported.
|
|
109
114
|
|
|
115
|
+
Examples:
|
|
116
|
+
---------
|
|
110
117
|
.. code-block:: python
|
|
111
118
|
|
|
112
119
|
import kumoai
|
|
113
|
-
|
|
120
|
+
conn = kumoai.FileUploadConnector(file_type="parquet")
|
|
121
|
+
|
|
122
|
+
# Local: small file
|
|
123
|
+
conn.upload(name="users", path="/data/users.parquet")
|
|
114
124
|
|
|
115
|
-
#
|
|
116
|
-
|
|
125
|
+
# Local: large file (auto-partitions)
|
|
126
|
+
conn.upload(
|
|
127
|
+
name="txns",
|
|
128
|
+
path="/data/large_txns.parquet",
|
|
129
|
+
)
|
|
117
130
|
|
|
118
|
-
#
|
|
119
|
-
|
|
120
|
-
|
|
131
|
+
# Local: disable auto-partitioning (raises if > 1 GiB)
|
|
132
|
+
conn.upload(
|
|
133
|
+
name="users",
|
|
134
|
+
path="/data/users.parquet",
|
|
135
|
+
auto_partition=False,
|
|
136
|
+
)
|
|
121
137
|
|
|
122
|
-
#
|
|
123
|
-
|
|
124
|
-
|
|
138
|
+
# CSV connector
|
|
139
|
+
csv_conn = kumoai.FileUploadConnector(file_type="csv")
|
|
140
|
+
csv_conn.upload(name="sales", path="/data/sales.csv")
|
|
125
141
|
|
|
126
|
-
#
|
|
127
|
-
|
|
142
|
+
# Remote: single file (<= 1 GiB)
|
|
143
|
+
conn.upload(name="logs", path="s3://bkt/path/logs.parquet")
|
|
128
144
|
|
|
129
|
-
#
|
|
130
|
-
|
|
145
|
+
# Remote: directory of shards (uniform format)
|
|
146
|
+
csv_conn.upload(name="events", path="gs://mybkt/events_csv/")
|
|
131
147
|
|
|
132
148
|
Args:
|
|
133
|
-
name:
|
|
134
|
-
|
|
135
|
-
|
|
136
|
-
|
|
137
|
-
|
|
138
|
-
|
|
139
|
-
|
|
140
|
-
If
|
|
141
|
-
|
|
142
|
-
partition_size_mb:
|
|
143
|
-
|
|
149
|
+
name:
|
|
150
|
+
Table name to create in Kumo; access later via this connector.
|
|
151
|
+
path:
|
|
152
|
+
Local path or remote URL to a ``.parquet``/``.csv`` file or a
|
|
153
|
+
directory (uniform format). The format must match this
|
|
154
|
+
connector’s ``file_type``.
|
|
155
|
+
auto_partition:
|
|
156
|
+
Local-only. If ``True`` and the local file is > 1 GiB, split
|
|
157
|
+
into ~``partition_size_mb`` MiB parts.
|
|
158
|
+
partition_size_mb:
|
|
159
|
+
Local-only. Target partition size (100–1000 MiB) when
|
|
160
|
+
``auto_partition`` is ``True``.
|
|
144
161
|
"""
|
|
145
|
-
|
|
146
|
-
|
|
147
|
-
|
|
148
|
-
f"{self._file_type}.")
|
|
149
|
-
|
|
150
|
-
# Validate file type
|
|
151
|
-
if not (path.endswith(".parquet") or path.endswith(".csv")):
|
|
152
|
-
raise ValueError(f"Path {path} must be either a CSV or Parquet "
|
|
153
|
-
f"file. Partitioned data is not currently "
|
|
154
|
-
f"supported.")
|
|
155
|
-
|
|
156
|
-
file_size = os.path.getsize(path)
|
|
157
|
-
|
|
158
|
-
# Route based on file size
|
|
159
|
-
if file_size < MAX_PARTITION_SIZE:
|
|
160
|
-
return _upload_single_file(name, path)
|
|
161
|
-
|
|
162
|
-
if not auto_partition:
|
|
163
|
-
raise ValueError(f"File {path} is {file_size / (1024**3):.2f}GB, "
|
|
164
|
-
f"which exceeds the 1GB limit. Enable "
|
|
165
|
-
f"auto_partition=True to automatically partition "
|
|
166
|
-
f"large files.")
|
|
167
|
-
|
|
168
|
-
# Partition and upload large files
|
|
169
|
-
partition_size = partition_size_mb * 1024**2
|
|
170
|
-
if (partition_size > MAX_PARTITION_SIZE
|
|
171
|
-
or partition_size < MIN_PARTITION_SIZE):
|
|
172
|
-
raise ValueError(f"Partition size {partition_size_mb}MB must be "
|
|
173
|
-
f"between {MIN_PARTITION_SIZE / 1024**2}MB and "
|
|
174
|
-
f"{MAX_PARTITION_SIZE / 1024**2}MB.")
|
|
175
|
-
|
|
176
|
-
logger.info(
|
|
177
|
-
"File %s is large with size %s, partitioning for upload...", path,
|
|
178
|
-
file_size)
|
|
179
|
-
if path.endswith('.parquet'):
|
|
180
|
-
_upload_partitioned_parquet(name, path, partition_size)
|
|
181
|
-
else:
|
|
182
|
-
_upload_partitioned_csv(name, path, partition_size)
|
|
162
|
+
upload_table(name=name, path=path, auto_partition=auto_partition,
|
|
163
|
+
partition_size_mb=partition_size_mb,
|
|
164
|
+
file_type=self._file_type)
|
|
183
165
|
|
|
184
166
|
def delete(
|
|
185
167
|
self,
|
|
186
168
|
name: str,
|
|
187
|
-
file_type: str,
|
|
188
169
|
) -> None:
|
|
189
170
|
r"""Synchronously deletes a previously uploaded table from the Kumo
|
|
190
171
|
data plane.
|
|
@@ -194,27 +175,15 @@ class FileUploadConnector(Connector):
|
|
|
194
175
|
# Assume we have uploaded a `.parquet` table named `users`, and a
|
|
195
176
|
# `FileUploadConnector` has been created called `connector`, and
|
|
196
177
|
# we want to delete this table from Kumo:
|
|
197
|
-
connector.delete(name="users"
|
|
178
|
+
connector.delete(name="users")
|
|
198
179
|
|
|
199
180
|
Args:
|
|
200
181
|
name: The name of the table to be deleted. This table must have
|
|
201
182
|
previously been uploaded with a call to
|
|
202
183
|
:meth:`~kumoai.connector.FileUploadConnector.upload`.
|
|
203
|
-
file_type: The file type of the table to be deleted; this can
|
|
204
|
-
either be :obj:`"parquet"` or :obj:`"csv"`, and must match the
|
|
205
|
-
connector file_type.
|
|
206
184
|
"""
|
|
207
|
-
if file_type.lower() != self._file_type:
|
|
208
|
-
raise ValueError(f"File type {file_type} does not match "
|
|
209
|
-
f"connector file type {self._file_type}.")
|
|
210
|
-
|
|
211
185
|
if not self.has_table(name):
|
|
212
186
|
raise ValueError(f"The table '{name}' does not exist in {self}. "
|
|
213
187
|
f"Please check the existence of the source data.")
|
|
214
188
|
|
|
215
|
-
|
|
216
|
-
source_table_name=name,
|
|
217
|
-
connector_id=CONNECTOR_ID_MAP[file_type],
|
|
218
|
-
)
|
|
219
|
-
global_state.client.connector_api.delete_file_upload(req)
|
|
220
|
-
logger.info("Successfully deleted table %s from Kumo.", name)
|
|
189
|
+
delete_uploaded_table(name, self._file_type)
|