kumoai 2.9.0.dev202509061830__cp311-cp311-macosx_11_0_arm64.whl → 2.12.0.dev202511031731__cp311-cp311-macosx_11_0_arm64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- kumoai/__init__.py +4 -2
- kumoai/_version.py +1 -1
- kumoai/client/client.py +10 -5
- kumoai/client/rfm.py +3 -2
- kumoai/connector/file_upload_connector.py +71 -102
- kumoai/connector/utils.py +1367 -236
- kumoai/experimental/rfm/__init__.py +2 -2
- kumoai/experimental/rfm/authenticate.py +8 -5
- kumoai/experimental/rfm/infer/timestamp.py +7 -4
- kumoai/experimental/rfm/local_graph.py +90 -80
- kumoai/experimental/rfm/local_graph_sampler.py +16 -8
- kumoai/experimental/rfm/local_graph_store.py +22 -6
- kumoai/experimental/rfm/local_pquery_driver.py +129 -28
- kumoai/experimental/rfm/local_table.py +100 -22
- kumoai/experimental/rfm/pquery/__init__.py +4 -0
- kumoai/experimental/rfm/pquery/backend.py +4 -0
- kumoai/experimental/rfm/pquery/executor.py +102 -0
- kumoai/experimental/rfm/pquery/pandas_backend.py +71 -30
- kumoai/experimental/rfm/pquery/pandas_executor.py +506 -0
- kumoai/experimental/rfm/rfm.py +442 -94
- kumoai/jobs.py +1 -0
- kumoai/trainer/trainer.py +19 -10
- kumoai/utils/progress_logger.py +62 -0
- {kumoai-2.9.0.dev202509061830.dist-info → kumoai-2.12.0.dev202511031731.dist-info}/METADATA +4 -5
- {kumoai-2.9.0.dev202509061830.dist-info → kumoai-2.12.0.dev202511031731.dist-info}/RECORD +28 -26
- {kumoai-2.9.0.dev202509061830.dist-info → kumoai-2.12.0.dev202511031731.dist-info}/WHEEL +0 -0
- {kumoai-2.9.0.dev202509061830.dist-info → kumoai-2.12.0.dev202511031731.dist-info}/licenses/LICENSE +0 -0
- {kumoai-2.9.0.dev202509061830.dist-info → kumoai-2.12.0.dev202511031731.dist-info}/top_level.txt +0 -0
kumoai/__init__.py
CHANGED
|
@@ -200,9 +200,11 @@ def init(
|
|
|
200
200
|
|
|
201
201
|
logger = logging.getLogger('kumoai')
|
|
202
202
|
log_level = logging.getLevelName(logger.getEffectiveLevel())
|
|
203
|
+
|
|
203
204
|
logger.info(
|
|
204
|
-
"Successfully initialized the Kumo SDK
|
|
205
|
-
"
|
|
205
|
+
f"Successfully initialized the Kumo SDK (version {__version__}) "
|
|
206
|
+
f"against deployment {url}, with "
|
|
207
|
+
f"log level {log_level}.")
|
|
206
208
|
|
|
207
209
|
|
|
208
210
|
def set_log_level(level: str) -> None:
|
kumoai/_version.py
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
__version__ = '2.
|
|
1
|
+
__version__ = '2.12.0.dev202511031731'
|
kumoai/client/client.py
CHANGED
|
@@ -33,6 +33,7 @@ class KumoClient:
|
|
|
33
33
|
url: str,
|
|
34
34
|
api_key: Optional[str],
|
|
35
35
|
spcs_token: Optional[str] = None,
|
|
36
|
+
verify_ssl: bool = True,
|
|
36
37
|
) -> None:
|
|
37
38
|
r"""Creates a client against the Kumo public API, provided a URL of
|
|
38
39
|
the endpoint and an authentication token.
|
|
@@ -42,11 +43,14 @@ class KumoClient:
|
|
|
42
43
|
api_key: the public API authentication token.
|
|
43
44
|
spcs_token: the SPCS token used for authentication to access the
|
|
44
45
|
Kumo API endpoint.
|
|
46
|
+
verify_ssl: whether to verify SSL certificates. Set to False to
|
|
47
|
+
skip SSL certificate verification (equivalent to curl -k).
|
|
45
48
|
"""
|
|
46
49
|
self._url = url
|
|
47
50
|
self._api_url = f"{url}/{API_VERSION}"
|
|
48
51
|
self._api_key = api_key
|
|
49
52
|
self._spcs_token = spcs_token
|
|
53
|
+
self._verify_ssl = verify_ssl
|
|
50
54
|
|
|
51
55
|
retry_strategy = Retry(
|
|
52
56
|
total=10, # Maximum number of retries
|
|
@@ -73,7 +77,8 @@ class KumoClient:
|
|
|
73
77
|
r"""Raises an exception if authentication fails. Succeeds if the
|
|
74
78
|
client is properly formed.
|
|
75
79
|
"""
|
|
76
|
-
return self._session.get(f"{self._url}/v1/connectors"
|
|
80
|
+
return self._session.get(f"{self._url}/v1/connectors",
|
|
81
|
+
verify=self._verify_ssl).ok
|
|
77
82
|
|
|
78
83
|
def set_spcs_token(self, spcs_token: str) -> None:
|
|
79
84
|
r"""Sets the SPCS token for the client and updates the session
|
|
@@ -184,7 +189,7 @@ class KumoClient:
|
|
|
184
189
|
:meth:`requests.Session.get`.
|
|
185
190
|
"""
|
|
186
191
|
url = self._format_endpoint_url(endpoint)
|
|
187
|
-
return self._session.get(url=url, **kwargs)
|
|
192
|
+
return self._session.get(url=url, verify=self._verify_ssl, **kwargs)
|
|
188
193
|
|
|
189
194
|
def _post(self, endpoint: str, **kwargs: Any) -> requests.Response:
|
|
190
195
|
r"""Send a POST request to the specified endpoint, with keyword
|
|
@@ -192,7 +197,7 @@ class KumoClient:
|
|
|
192
197
|
:meth:`requests.Session.post`.
|
|
193
198
|
"""
|
|
194
199
|
url = self._format_endpoint_url(endpoint)
|
|
195
|
-
return self._session.post(url=url, **kwargs)
|
|
200
|
+
return self._session.post(url=url, verify=self._verify_ssl, **kwargs)
|
|
196
201
|
|
|
197
202
|
def _patch(self, endpoint: str, **kwargs: Any) -> requests.Response:
|
|
198
203
|
r"""Send a PATCH request to the specified endpoint, with keyword
|
|
@@ -200,7 +205,7 @@ class KumoClient:
|
|
|
200
205
|
:meth:`requests.Session.patch`.
|
|
201
206
|
"""
|
|
202
207
|
url = self._format_endpoint_url(endpoint)
|
|
203
|
-
return self._session.patch(url=url, **kwargs)
|
|
208
|
+
return self._session.patch(url=url, verify=self._verify_ssl, **kwargs)
|
|
204
209
|
|
|
205
210
|
def _delete(self, endpoint: str, **kwargs: Any) -> requests.Response:
|
|
206
211
|
r"""Send a DELETE request to the specified endpoint, with keyword
|
|
@@ -208,7 +213,7 @@ class KumoClient:
|
|
|
208
213
|
:meth:`requests.Session.delete`.
|
|
209
214
|
"""
|
|
210
215
|
url = self._format_endpoint_url(endpoint)
|
|
211
|
-
return self._session.delete(url=url, **kwargs)
|
|
216
|
+
return self._session.delete(url=url, verify=self._verify_ssl, **kwargs)
|
|
212
217
|
|
|
213
218
|
def _format_endpoint_url(self, endpoint: str) -> str:
|
|
214
219
|
if endpoint[0] == "/":
|
kumoai/client/rfm.py
CHANGED
|
@@ -1,6 +1,7 @@
|
|
|
1
1
|
from kumoapi.json_serde import to_json_dict
|
|
2
2
|
from kumoapi.rfm import (
|
|
3
3
|
RFMEvaluateResponse,
|
|
4
|
+
RFMExplanationResponse,
|
|
4
5
|
RFMPredictResponse,
|
|
5
6
|
RFMValidateQueryRequest,
|
|
6
7
|
RFMValidateQueryResponse,
|
|
@@ -32,7 +33,7 @@ class RFMAPI:
|
|
|
32
33
|
raise_on_error(response)
|
|
33
34
|
return parse_response(RFMPredictResponse, response)
|
|
34
35
|
|
|
35
|
-
def explain(self, request: bytes) ->
|
|
36
|
+
def explain(self, request: bytes) -> RFMExplanationResponse:
|
|
36
37
|
"""Explain the RFM model on the given context.
|
|
37
38
|
|
|
38
39
|
Args:
|
|
@@ -46,7 +47,7 @@ class RFMAPI:
|
|
|
46
47
|
RFMEndpoints.explain, data=request,
|
|
47
48
|
headers={'Content-Type': 'application/x-protobuf'})
|
|
48
49
|
raise_on_error(response)
|
|
49
|
-
return parse_response(
|
|
50
|
+
return parse_response(RFMExplanationResponse, response)
|
|
50
51
|
|
|
51
52
|
def evaluate(self, request: bytes) -> RFMEvaluateResponse:
|
|
52
53
|
"""Evaluate the RFM model on the given context.
|
|
@@ -1,7 +1,5 @@
|
|
|
1
|
-
import os
|
|
2
1
|
from typing import List
|
|
3
2
|
|
|
4
|
-
from kumoapi.data_source import DeleteUploadedFileRequest
|
|
5
3
|
from kumoapi.source_table import (
|
|
6
4
|
DataSourceType,
|
|
7
5
|
FileType,
|
|
@@ -13,15 +11,7 @@ from typing_extensions import override
|
|
|
13
11
|
|
|
14
12
|
from kumoai import global_state
|
|
15
13
|
from kumoai.connector.base import Connector
|
|
16
|
-
from kumoai.connector.utils import
|
|
17
|
-
CONNECTOR_ID_MAP,
|
|
18
|
-
MAX_PARTITION_SIZE,
|
|
19
|
-
MIN_PARTITION_SIZE,
|
|
20
|
-
_upload_partitioned_csv,
|
|
21
|
-
_upload_partitioned_parquet,
|
|
22
|
-
_upload_single_file,
|
|
23
|
-
logger,
|
|
24
|
-
)
|
|
14
|
+
from kumoai.connector.utils import delete_uploaded_table, upload_table
|
|
25
15
|
|
|
26
16
|
|
|
27
17
|
class FileUploadConnector(Connector):
|
|
@@ -61,7 +51,6 @@ class FileUploadConnector(Connector):
|
|
|
61
51
|
def name(self) -> str:
|
|
62
52
|
return f'{self._file_type}_upload_connector'
|
|
63
53
|
|
|
64
|
-
@override
|
|
65
54
|
@property
|
|
66
55
|
def source_type(self) -> DataSourceType:
|
|
67
56
|
return DataSourceType.S3
|
|
@@ -91,100 +80,92 @@ class FileUploadConnector(Connector):
|
|
|
91
80
|
auto_partition: bool = True,
|
|
92
81
|
partition_size_mb: int = 250,
|
|
93
82
|
) -> None:
|
|
94
|
-
r"""
|
|
95
|
-
|
|
96
|
-
|
|
97
|
-
|
|
98
|
-
this
|
|
99
|
-
|
|
100
|
-
|
|
101
|
-
|
|
102
|
-
|
|
103
|
-
|
|
83
|
+
r"""Upload a table to Kumo from a local or remote path.
|
|
84
|
+
|
|
85
|
+
Supports ``s3://``, ``gs://``, ``abfs://``, ``abfss://``, and ``az://``
|
|
86
|
+
|
|
87
|
+
Tables uploaded this way can be accessed from this
|
|
88
|
+
``FileUploadConnector`` using the provided name, e.g.,
|
|
89
|
+
``connector_obj["my_table"]``.
|
|
90
|
+
|
|
91
|
+
Local files
|
|
92
|
+
-----------
|
|
93
|
+
- Accepts one ``.parquet`` or ``.csv`` file (must match this
|
|
94
|
+
connector’s ``file_type``).
|
|
95
|
+
- If the file is > 1 GiB and ``auto_partition=True``, it is split
|
|
96
|
+
into ~``partition_size_mb`` MiB parts and uploaded under a common
|
|
97
|
+
prefix so the connector can read them as one table.
|
|
98
|
+
|
|
99
|
+
Remote paths
|
|
100
|
+
------------
|
|
101
|
+
- **Single file** (``.parquet``/``.csv``): validated and uploaded via
|
|
102
|
+
multipart PUT. Files > 1 GiB are rejected — re-shard to ~200 MiB
|
|
103
|
+
and upload the directory instead.
|
|
104
|
+
- **Directory**: must contain only one format (all Parquet or all CSV)
|
|
105
|
+
matching this connector’s ``file_type``. Files are validated
|
|
106
|
+
(consistent schema; CSV headers sanitized) and uploaded in parallel
|
|
107
|
+
with memory-safe budgeting.
|
|
104
108
|
|
|
105
109
|
.. warning::
|
|
106
|
-
|
|
107
|
-
|
|
108
|
-
|
|
110
|
+
For local uploads, input must be a single CSV or Parquet file
|
|
111
|
+
(matching the connector type). For remote uploads, mixed
|
|
112
|
+
CSV/Parquet directories are not supported. Remote single files
|
|
113
|
+
larger than 1 GiB are not supported.
|
|
109
114
|
|
|
115
|
+
Examples:
|
|
116
|
+
---------
|
|
110
117
|
.. code-block:: python
|
|
111
118
|
|
|
112
119
|
import kumoai
|
|
113
|
-
|
|
120
|
+
conn = kumoai.FileUploadConnector(file_type="parquet")
|
|
121
|
+
|
|
122
|
+
# Local: small file
|
|
123
|
+
conn.upload(name="users", path="/data/users.parquet")
|
|
114
124
|
|
|
115
|
-
#
|
|
116
|
-
|
|
125
|
+
# Local: large file (auto-partitions)
|
|
126
|
+
conn.upload(
|
|
127
|
+
name="txns",
|
|
128
|
+
path="/data/large_txns.parquet",
|
|
129
|
+
)
|
|
117
130
|
|
|
118
|
-
#
|
|
119
|
-
|
|
120
|
-
|
|
131
|
+
# Local: disable auto-partitioning (raises if > 1 GiB)
|
|
132
|
+
conn.upload(
|
|
133
|
+
name="users",
|
|
134
|
+
path="/data/users.parquet",
|
|
135
|
+
auto_partition=False,
|
|
136
|
+
)
|
|
121
137
|
|
|
122
|
-
#
|
|
123
|
-
|
|
124
|
-
|
|
138
|
+
# CSV connector
|
|
139
|
+
csv_conn = kumoai.FileUploadConnector(file_type="csv")
|
|
140
|
+
csv_conn.upload(name="sales", path="/data/sales.csv")
|
|
125
141
|
|
|
126
|
-
#
|
|
127
|
-
|
|
142
|
+
# Remote: single file (<= 1 GiB)
|
|
143
|
+
conn.upload(name="logs", path="s3://bkt/path/logs.parquet")
|
|
128
144
|
|
|
129
|
-
#
|
|
130
|
-
|
|
145
|
+
# Remote: directory of shards (uniform format)
|
|
146
|
+
csv_conn.upload(name="events", path="gs://mybkt/events_csv/")
|
|
131
147
|
|
|
132
148
|
Args:
|
|
133
|
-
name:
|
|
134
|
-
|
|
135
|
-
|
|
136
|
-
|
|
137
|
-
|
|
138
|
-
|
|
139
|
-
|
|
140
|
-
If
|
|
141
|
-
|
|
142
|
-
partition_size_mb:
|
|
143
|
-
|
|
149
|
+
name:
|
|
150
|
+
Table name to create in Kumo; access later via this connector.
|
|
151
|
+
path:
|
|
152
|
+
Local path or remote URL to a ``.parquet``/``.csv`` file or a
|
|
153
|
+
directory (uniform format). The format must match this
|
|
154
|
+
connector’s ``file_type``.
|
|
155
|
+
auto_partition:
|
|
156
|
+
Local-only. If ``True`` and the local file is > 1 GiB, split
|
|
157
|
+
into ~``partition_size_mb`` MiB parts.
|
|
158
|
+
partition_size_mb:
|
|
159
|
+
Local-only. Target partition size (100–1000 MiB) when
|
|
160
|
+
``auto_partition`` is ``True``.
|
|
144
161
|
"""
|
|
145
|
-
|
|
146
|
-
|
|
147
|
-
|
|
148
|
-
f"{self._file_type}.")
|
|
149
|
-
|
|
150
|
-
# Validate file type
|
|
151
|
-
if not (path.endswith(".parquet") or path.endswith(".csv")):
|
|
152
|
-
raise ValueError(f"Path {path} must be either a CSV or Parquet "
|
|
153
|
-
f"file. Partitioned data is not currently "
|
|
154
|
-
f"supported.")
|
|
155
|
-
|
|
156
|
-
file_size = os.path.getsize(path)
|
|
157
|
-
|
|
158
|
-
# Route based on file size
|
|
159
|
-
if file_size < MAX_PARTITION_SIZE:
|
|
160
|
-
return _upload_single_file(name, path)
|
|
161
|
-
|
|
162
|
-
if not auto_partition:
|
|
163
|
-
raise ValueError(f"File {path} is {file_size / (1024**3):.2f}GB, "
|
|
164
|
-
f"which exceeds the 1GB limit. Enable "
|
|
165
|
-
f"auto_partition=True to automatically partition "
|
|
166
|
-
f"large files.")
|
|
167
|
-
|
|
168
|
-
# Partition and upload large files
|
|
169
|
-
partition_size = partition_size_mb * 1024**2
|
|
170
|
-
if (partition_size > MAX_PARTITION_SIZE
|
|
171
|
-
or partition_size < MIN_PARTITION_SIZE):
|
|
172
|
-
raise ValueError(f"Partition size {partition_size_mb}MB must be "
|
|
173
|
-
f"between {MIN_PARTITION_SIZE / 1024**2}MB and "
|
|
174
|
-
f"{MAX_PARTITION_SIZE / 1024**2}MB.")
|
|
175
|
-
|
|
176
|
-
logger.info(
|
|
177
|
-
"File %s is large with size %s, partitioning for upload...", path,
|
|
178
|
-
file_size)
|
|
179
|
-
if path.endswith('.parquet'):
|
|
180
|
-
_upload_partitioned_parquet(name, path, partition_size)
|
|
181
|
-
else:
|
|
182
|
-
_upload_partitioned_csv(name, path, partition_size)
|
|
162
|
+
upload_table(name=name, path=path, auto_partition=auto_partition,
|
|
163
|
+
partition_size_mb=partition_size_mb,
|
|
164
|
+
file_type=self._file_type)
|
|
183
165
|
|
|
184
166
|
def delete(
|
|
185
167
|
self,
|
|
186
168
|
name: str,
|
|
187
|
-
file_type: str,
|
|
188
169
|
) -> None:
|
|
189
170
|
r"""Synchronously deletes a previously uploaded table from the Kumo
|
|
190
171
|
data plane.
|
|
@@ -194,27 +175,15 @@ class FileUploadConnector(Connector):
|
|
|
194
175
|
# Assume we have uploaded a `.parquet` table named `users`, and a
|
|
195
176
|
# `FileUploadConnector` has been created called `connector`, and
|
|
196
177
|
# we want to delete this table from Kumo:
|
|
197
|
-
connector.delete(name="users"
|
|
178
|
+
connector.delete(name="users")
|
|
198
179
|
|
|
199
180
|
Args:
|
|
200
181
|
name: The name of the table to be deleted. This table must have
|
|
201
182
|
previously been uploaded with a call to
|
|
202
183
|
:meth:`~kumoai.connector.FileUploadConnector.upload`.
|
|
203
|
-
file_type: The file type of the table to be deleted; this can
|
|
204
|
-
either be :obj:`"parquet"` or :obj:`"csv"`, and must match the
|
|
205
|
-
connector file_type.
|
|
206
184
|
"""
|
|
207
|
-
if file_type.lower() != self._file_type:
|
|
208
|
-
raise ValueError(f"File type {file_type} does not match "
|
|
209
|
-
f"connector file type {self._file_type}.")
|
|
210
|
-
|
|
211
185
|
if not self.has_table(name):
|
|
212
186
|
raise ValueError(f"The table '{name}' does not exist in {self}. "
|
|
213
187
|
f"Please check the existence of the source data.")
|
|
214
188
|
|
|
215
|
-
|
|
216
|
-
source_table_name=name,
|
|
217
|
-
connector_id=CONNECTOR_ID_MAP[file_type],
|
|
218
|
-
)
|
|
219
|
-
global_state.client.connector_api.delete_file_upload(req)
|
|
220
|
-
logger.info("Successfully deleted table %s from Kumo.", name)
|
|
189
|
+
delete_uploaded_table(name, self._file_type)
|