kumoai 2.8.0.dev202508221830__cp312-cp312-win_amd64.whl → 2.13.0.dev202512041141__cp312-cp312-win_amd64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of kumoai might be problematic. Click here for more details.

Files changed (52) hide show
  1. kumoai/__init__.py +22 -11
  2. kumoai/_version.py +1 -1
  3. kumoai/client/client.py +17 -16
  4. kumoai/client/endpoints.py +1 -0
  5. kumoai/client/rfm.py +37 -8
  6. kumoai/connector/file_upload_connector.py +94 -85
  7. kumoai/connector/utils.py +1399 -210
  8. kumoai/experimental/rfm/__init__.py +164 -46
  9. kumoai/experimental/rfm/authenticate.py +8 -5
  10. kumoai/experimental/rfm/backend/__init__.py +0 -0
  11. kumoai/experimental/rfm/backend/local/__init__.py +38 -0
  12. kumoai/experimental/rfm/backend/local/table.py +109 -0
  13. kumoai/experimental/rfm/backend/snow/__init__.py +35 -0
  14. kumoai/experimental/rfm/backend/snow/table.py +117 -0
  15. kumoai/experimental/rfm/backend/sqlite/__init__.py +30 -0
  16. kumoai/experimental/rfm/backend/sqlite/table.py +101 -0
  17. kumoai/experimental/rfm/base/__init__.py +10 -0
  18. kumoai/experimental/rfm/base/column.py +66 -0
  19. kumoai/experimental/rfm/base/source.py +18 -0
  20. kumoai/experimental/rfm/base/table.py +545 -0
  21. kumoai/experimental/rfm/{local_graph.py → graph.py} +413 -144
  22. kumoai/experimental/rfm/infer/__init__.py +6 -0
  23. kumoai/experimental/rfm/infer/dtype.py +79 -0
  24. kumoai/experimental/rfm/infer/pkey.py +126 -0
  25. kumoai/experimental/rfm/infer/time_col.py +62 -0
  26. kumoai/experimental/rfm/infer/timestamp.py +7 -4
  27. kumoai/experimental/rfm/local_graph_sampler.py +58 -11
  28. kumoai/experimental/rfm/local_graph_store.py +45 -37
  29. kumoai/experimental/rfm/local_pquery_driver.py +342 -46
  30. kumoai/experimental/rfm/pquery/__init__.py +4 -4
  31. kumoai/experimental/rfm/pquery/{backend.py → executor.py} +28 -58
  32. kumoai/experimental/rfm/pquery/pandas_executor.py +532 -0
  33. kumoai/experimental/rfm/rfm.py +559 -148
  34. kumoai/experimental/rfm/sagemaker.py +138 -0
  35. kumoai/jobs.py +27 -1
  36. kumoai/kumolib.cp312-win_amd64.pyd +0 -0
  37. kumoai/pquery/prediction_table.py +5 -3
  38. kumoai/pquery/training_table.py +5 -3
  39. kumoai/spcs.py +1 -3
  40. kumoai/testing/decorators.py +1 -1
  41. kumoai/trainer/job.py +9 -30
  42. kumoai/trainer/trainer.py +19 -10
  43. kumoai/utils/__init__.py +2 -1
  44. kumoai/utils/progress_logger.py +96 -16
  45. {kumoai-2.8.0.dev202508221830.dist-info → kumoai-2.13.0.dev202512041141.dist-info}/METADATA +14 -5
  46. {kumoai-2.8.0.dev202508221830.dist-info → kumoai-2.13.0.dev202512041141.dist-info}/RECORD +49 -36
  47. kumoai/experimental/rfm/local_table.py +0 -448
  48. kumoai/experimental/rfm/pquery/pandas_backend.py +0 -437
  49. kumoai/experimental/rfm/utils.py +0 -347
  50. {kumoai-2.8.0.dev202508221830.dist-info → kumoai-2.13.0.dev202512041141.dist-info}/WHEEL +0 -0
  51. {kumoai-2.8.0.dev202508221830.dist-info → kumoai-2.13.0.dev202512041141.dist-info}/licenses/LICENSE +0 -0
  52. {kumoai-2.8.0.dev202508221830.dist-info → kumoai-2.13.0.dev202512041141.dist-info}/top_level.txt +0 -0
kumoai/__init__.py CHANGED
@@ -184,15 +184,12 @@ def init(
184
184
  snowflake_credentials
185
185
  ) if not api_key and snowflake_credentials else None
186
186
  client = KumoClient(url=url, api_key=api_key, spcs_token=spcs_token)
187
- if client.authenticate():
188
- global_state._url = client._url
189
- global_state._api_key = client._api_key
190
- global_state._snowflake_credentials = snowflake_credentials
191
- global_state._spcs_token = client._spcs_token
192
- global_state._snowpark_session = snowpark_session
193
- else:
194
- raise ValueError("Client authentication failed. Please check if you "
195
- "have a valid API key.")
187
+ client.authenticate()
188
+ global_state._url = client._url
189
+ global_state._api_key = client._api_key
190
+ global_state._snowflake_credentials = snowflake_credentials
191
+ global_state._spcs_token = client._spcs_token
192
+ global_state._snowpark_session = snowpark_session
196
193
 
197
194
  if not api_key and snowflake_credentials:
198
195
  # Refresh token every 10 minutes (expires in 1 hour):
@@ -200,9 +197,11 @@ def init(
200
197
 
201
198
  logger = logging.getLogger('kumoai')
202
199
  log_level = logging.getLevelName(logger.getEffectiveLevel())
200
+
203
201
  logger.info(
204
- "Successfully initialized the Kumo SDK against deployment %s, with "
205
- "log level %s.", url, log_level)
202
+ f"Successfully initialized the Kumo SDK (version {__version__}) "
203
+ f"against deployment {url}, with "
204
+ f"log level {log_level}.")
206
205
 
207
206
 
208
207
  def set_log_level(level: str) -> None:
@@ -281,7 +280,19 @@ __all__ = [
281
280
  ]
282
281
 
283
282
 
283
+ def in_snowflake_notebook() -> bool:
284
+ try:
285
+ from snowflake.snowpark.context import get_active_session
286
+ import streamlit # noqa: F401
287
+ get_active_session()
288
+ return True
289
+ except Exception:
290
+ return False
291
+
292
+
284
293
  def in_notebook() -> bool:
294
+ if in_snowflake_notebook():
295
+ return True
285
296
  try:
286
297
  from IPython import get_ipython
287
298
  shell = get_ipython()
kumoai/_version.py CHANGED
@@ -1 +1 @@
1
- __version__ = '2.8.0.dev202508221830'
1
+ __version__ = '2.13.0.dev202512041141'
kumoai/client/client.py CHANGED
@@ -20,7 +20,6 @@ if TYPE_CHECKING:
20
20
  )
21
21
  from kumoai.client.online import OnlineServingEndpointAPI
22
22
  from kumoai.client.pquery import PQueryAPI
23
- from kumoai.client.rfm import RFMAPI
24
23
  from kumoai.client.source_table import SourceTableAPI
25
24
  from kumoai.client.table import TableAPI
26
25
 
@@ -33,6 +32,7 @@ class KumoClient:
33
32
  url: str,
34
33
  api_key: Optional[str],
35
34
  spcs_token: Optional[str] = None,
35
+ verify_ssl: bool = True,
36
36
  ) -> None:
37
37
  r"""Creates a client against the Kumo public API, provided a URL of
38
38
  the endpoint and an authentication token.
@@ -42,11 +42,14 @@ class KumoClient:
42
42
  api_key: the public API authentication token.
43
43
  spcs_token: the SPCS token used for authentication to access the
44
44
  Kumo API endpoint.
45
+ verify_ssl: whether to verify SSL certificates. Set to False to
46
+ skip SSL certificate verification (equivalent to curl -k).
45
47
  """
46
48
  self._url = url
47
49
  self._api_url = f"{url}/{API_VERSION}"
48
50
  self._api_key = api_key
49
51
  self._spcs_token = spcs_token
52
+ self._verify_ssl = verify_ssl
50
53
 
51
54
  retry_strategy = Retry(
52
55
  total=10, # Maximum number of retries
@@ -69,11 +72,15 @@ class KumoClient:
69
72
  self._session.headers.update(
70
73
  {'Authorization': f'Snowflake Token={self._spcs_token}'})
71
74
 
72
- def authenticate(self) -> bool:
73
- r"""Raises an exception if authentication fails. Succeeds if the
74
- client is properly formed.
75
- """
76
- return self._session.get(f"{self._url}/v1/connectors").ok
75
+ def authenticate(self) -> None:
76
+ """Raises an exception if authentication fails."""
77
+ try:
78
+ self._session.get(self._url + '/v1/connectors',
79
+ verify=self._verify_ssl).raise_for_status()
80
+ except Exception:
81
+ raise ValueError(
82
+ "Client authentication failed. Please check if you "
83
+ "have a valid API key/credentials.")
77
84
 
78
85
  def set_spcs_token(self, spcs_token: str) -> None:
79
86
  r"""Sets the SPCS token for the client and updates the session
@@ -158,12 +165,6 @@ class KumoClient:
158
165
  from kumoai.client.online import OnlineServingEndpointAPI
159
166
  return OnlineServingEndpointAPI(self)
160
167
 
161
- @property
162
- def rfm_api(self) -> 'RFMAPI':
163
- r"""Returns the typed RFM API."""
164
- from kumoai.client.rfm import RFMAPI
165
- return RFMAPI(self)
166
-
167
168
  def _request(self, endpoint: Endpoint, **kwargs: Any) -> requests.Response:
168
169
  r"""Send a HTTP request to the specified endpoint."""
169
170
  endpoint_str = endpoint.get_path()
@@ -184,7 +185,7 @@ class KumoClient:
184
185
  :meth:`requests.Session.get`.
185
186
  """
186
187
  url = self._format_endpoint_url(endpoint)
187
- return self._session.get(url=url, **kwargs)
188
+ return self._session.get(url=url, verify=self._verify_ssl, **kwargs)
188
189
 
189
190
  def _post(self, endpoint: str, **kwargs: Any) -> requests.Response:
190
191
  r"""Send a POST request to the specified endpoint, with keyword
@@ -192,7 +193,7 @@ class KumoClient:
192
193
  :meth:`requests.Session.post`.
193
194
  """
194
195
  url = self._format_endpoint_url(endpoint)
195
- return self._session.post(url=url, **kwargs)
196
+ return self._session.post(url=url, verify=self._verify_ssl, **kwargs)
196
197
 
197
198
  def _patch(self, endpoint: str, **kwargs: Any) -> requests.Response:
198
199
  r"""Send a PATCH request to the specified endpoint, with keyword
@@ -200,7 +201,7 @@ class KumoClient:
200
201
  :meth:`requests.Session.patch`.
201
202
  """
202
203
  url = self._format_endpoint_url(endpoint)
203
- return self._session.patch(url=url, **kwargs)
204
+ return self._session.patch(url=url, verify=self._verify_ssl, **kwargs)
204
205
 
205
206
  def _delete(self, endpoint: str, **kwargs: Any) -> requests.Response:
206
207
  r"""Send a DELETE request to the specified endpoint, with keyword
@@ -208,7 +209,7 @@ class KumoClient:
208
209
  :meth:`requests.Session.delete`.
209
210
  """
210
211
  url = self._format_endpoint_url(endpoint)
211
- return self._session.delete(url=url, **kwargs)
212
+ return self._session.delete(url=url, verify=self._verify_ssl, **kwargs)
212
213
 
213
214
  def _format_endpoint_url(self, endpoint: str) -> str:
214
215
  if endpoint[0] == "/":
@@ -147,3 +147,4 @@ class RFMEndpoints:
147
147
  explain = Endpoint(f"{BASE}/explain", HTTPMethod.POST)
148
148
  evaluate = Endpoint(f"{BASE}/evaluate", HTTPMethod.POST)
149
149
  validate_query = Endpoint(f"{BASE}/validate_query", HTTPMethod.POST)
150
+ parse_query = Endpoint(f"{BASE}/parse_query", HTTPMethod.POST)
kumoai/client/rfm.py CHANGED
@@ -1,6 +1,11 @@
1
+ from typing import Any
2
+
1
3
  from kumoapi.json_serde import to_json_dict
2
4
  from kumoapi.rfm import (
3
5
  RFMEvaluateResponse,
6
+ RFMExplanationResponse,
7
+ RFMParseQueryRequest,
8
+ RFMParseQueryResponse,
4
9
  RFMPredictResponse,
5
10
  RFMValidateQueryRequest,
6
11
  RFMValidateQueryResponse,
@@ -25,28 +30,35 @@ class RFMAPI:
25
30
  Returns:
26
31
  RFMPredictResponse containing the predictions
27
32
  """
28
- # Send binary data to the predict endpoint
29
33
  response = self._client._request(
30
- RFMEndpoints.predict, data=request,
31
- headers={'Content-Type': 'application/x-protobuf'})
34
+ RFMEndpoints.predict,
35
+ data=request,
36
+ headers={'Content-Type': 'application/x-protobuf'},
37
+ )
32
38
  raise_on_error(response)
33
39
  return parse_response(RFMPredictResponse, response)
34
40
 
35
- def explain(self, request: bytes) -> RFMPredictResponse:
41
+ def explain(
42
+ self,
43
+ request: bytes,
44
+ skip_summary: bool = False,
45
+ ) -> RFMExplanationResponse:
36
46
  """Explain the RFM model on the given context.
37
47
 
38
48
  Args:
39
49
  request: The predict request as serialized protobuf.
50
+ skip_summary: Whether to skip generating a human-readable summary
51
+ of the explanation.
40
52
 
41
53
  Returns:
42
54
  RFMPredictResponse containing the explanations
43
55
  """
44
- # Send binary data to the explain endpoint
56
+ params: dict[str, Any] = {'generate_summary': not skip_summary}
45
57
  response = self._client._request(
46
- RFMEndpoints.explain, data=request,
58
+ RFMEndpoints.explain, data=request, params=params,
47
59
  headers={'Content-Type': 'application/x-protobuf'})
48
60
  raise_on_error(response)
49
- return parse_response(RFMPredictResponse, response)
61
+ return parse_response(RFMExplanationResponse, response)
50
62
 
51
63
  def evaluate(self, request: bytes) -> RFMEvaluateResponse:
52
64
  """Evaluate the RFM model on the given context.
@@ -57,7 +69,6 @@ class RFMAPI:
57
69
  Returns:
58
70
  RFMEvaluateResponse containing the computed metrics
59
71
  """
60
- # Send binary data to the evaluate endpoint
61
72
  response = self._client._request(
62
73
  RFMEndpoints.evaluate, data=request,
63
74
  headers={'Content-Type': 'application/x-protobuf'})
@@ -81,3 +92,21 @@ class RFMAPI:
81
92
  json=to_json_dict(request))
82
93
  raise_on_error(response)
83
94
  return parse_response(RFMValidateQueryResponse, response)
95
+
96
+ def parse_query(
97
+ self,
98
+ request: RFMParseQueryRequest,
99
+ ) -> RFMParseQueryResponse:
100
+ """Validate a predictive query against a graph.
101
+
102
+ Args:
103
+ request: The request object containing
104
+ the query and graph definition
105
+
106
+ Returns:
107
+ RFMParseQueryResponse containing the QueryDefinition
108
+ """
109
+ response = self._client._request(RFMEndpoints.parse_query,
110
+ json=to_json_dict(request))
111
+ raise_on_error(response)
112
+ return parse_response(RFMParseQueryResponse, response)
@@ -1,4 +1,3 @@
1
- import os
2
1
  from typing import List
3
2
 
4
3
  from kumoapi.source_table import (
@@ -12,14 +11,7 @@ from typing_extensions import override
12
11
 
13
12
  from kumoai import global_state
14
13
  from kumoai.connector.base import Connector
15
- from kumoai.connector.utils import (
16
- MAX_PARTITION_SIZE,
17
- MIN_PARTITION_SIZE,
18
- _upload_partitioned_csv,
19
- _upload_partitioned_parquet,
20
- _upload_single_file,
21
- logger,
22
- )
14
+ from kumoai.connector.utils import delete_uploaded_table, upload_table
23
15
 
24
16
 
25
17
  class FileUploadConnector(Connector):
@@ -59,7 +51,6 @@ class FileUploadConnector(Connector):
59
51
  def name(self) -> str:
60
52
  return f'{self._file_type}_upload_connector'
61
53
 
62
- @override
63
54
  @property
64
55
  def source_type(self) -> DataSourceType:
65
56
  return DataSourceType.S3
@@ -89,92 +80,110 @@ class FileUploadConnector(Connector):
89
80
  auto_partition: bool = True,
90
81
  partition_size_mb: int = 250,
91
82
  ) -> None:
92
- r"""Synchronously uploads a table located on your
93
- local machine to the Kumo data plane.
94
-
95
- Tables uploaded in this way can be accessed with
96
- this ``FileUploadConnector`` using the provided name,
97
- for example: ``connector_obj["my_table"]``
98
-
99
- For files larger than 1GB, the table will be automatically partitioned
100
- into smaller chunks and uploaded with common prefix that allows
101
- FileUploadConnector to union them when reading.
83
+ r"""Upload a table to Kumo from a local or remote path.
84
+
85
+ Supports ``s3://``, ``gs://``, ``abfs://``, ``abfss://``, and ``az://``
86
+
87
+ Tables uploaded this way can be accessed from this
88
+ ``FileUploadConnector`` using the provided name, e.g.,
89
+ ``connector_obj["my_table"]``.
90
+
91
+ Local files
92
+ -----------
93
+ - Accepts one ``.parquet`` or ``.csv`` file (must match this
94
+ connector’s ``file_type``).
95
+ - If the file is > 1 GiB and ``auto_partition=True``, it is split
96
+ into ~``partition_size_mb`` MiB parts and uploaded under a common
97
+ prefix so the connector can read them as one table.
98
+
99
+ Remote paths
100
+ ------------
101
+ - **Single file** (``.parquet``/``.csv``): validated and uploaded via
102
+ multipart PUT. Files > 1 GiB are rejected — re-shard to ~200 MiB
103
+ and upload the directory instead.
104
+ - **Directory**: must contain only one format (all Parquet or all CSV)
105
+ matching this connector’s ``file_type``. Files are validated
106
+ (consistent schema; CSV headers sanitized) and uploaded in parallel
107
+ with memory-safe budgeting.
102
108
 
103
109
  .. warning::
104
- Uploaded tables must be single files, either in parquet or CSV
105
- format(must match connector type).
106
- Partitioned tables are not currently supported.
110
+ For local uploads, input must be a single CSV or Parquet file
111
+ (matching the connector type). For remote uploads, mixed
112
+ CSV/Parquet directories are not supported. Remote single files
113
+ larger than 1 GiB are not supported.
107
114
 
115
+ Examples:
116
+ ---------
108
117
  .. code-block:: python
109
118
 
110
119
  import kumoai
111
- connector = kumoai.FileUploadConnector(file_type="parquet")
120
+ conn = kumoai.FileUploadConnector(file_type="parquet")
121
+
122
+ # Local: small file
123
+ conn.upload(name="users", path="/data/users.parquet")
124
+
125
+ # Local: large file (auto-partitions)
126
+ conn.upload(
127
+ name="txns",
128
+ path="/data/large_txns.parquet",
129
+ )
130
+
131
+ # Local: disable auto-partitioning (raises if > 1 GiB)
132
+ conn.upload(
133
+ name="users",
134
+ path="/data/users.parquet",
135
+ auto_partition=False,
136
+ )
137
+
138
+ # CSV connector
139
+ csv_conn = kumoai.FileUploadConnector(file_type="csv")
140
+ csv_conn.upload(name="sales", path="/data/sales.csv")
112
141
 
113
- # Upload a small table
114
- connector.upload(name="users", path="/data/users.parquet")
142
+ # Remote: single file (<= 1 GiB)
143
+ conn.upload(name="logs", path="s3://bkt/path/logs.parquet")
115
144
 
116
- # Upload a large parquet table (will be automatically partitioned)
117
- connector.upload(name="transactions",
118
- path="/data/large_transactions.parquet")
145
+ # Remote: directory of shards (uniform format)
146
+ csv_conn.upload(name="events", path="gs://mybkt/events_csv/")
119
147
 
120
- # Disable auto-partitioning (will raise error for large files)
121
- upload(name="users", path="/data/users.parquet",
122
- auto_partition=False)
148
+ Args:
149
+ name:
150
+ Table name to create in Kumo; access later via this connector.
151
+ path:
152
+ Local path or remote URL to a ``.parquet``/``.csv`` file or a
153
+ directory (uniform format). The format must match this
154
+ connector’s ``file_type``.
155
+ auto_partition:
156
+ Local-only. If ``True`` and the local file is > 1 GiB, split
157
+ into ~``partition_size_mb`` MiB parts.
158
+ partition_size_mb:
159
+ Local-only. Target partition size (100–1000 MiB) when
160
+ ``auto_partition`` is ``True``.
161
+ """
162
+ upload_table(name=name, path=path, auto_partition=auto_partition,
163
+ partition_size_mb=partition_size_mb,
164
+ file_type=self._file_type)
165
+
166
+ def delete(
167
+ self,
168
+ name: str,
169
+ ) -> None:
170
+ r"""Synchronously deletes a previously uploaded table from the Kumo
171
+ data plane.
123
172
 
124
- # Create a file upload connector for CSV files.
125
- connectorCSV = kumoai.FileUploadConnector(file_type="csv")
173
+ .. code-block:: python
126
174
 
127
- # Upload a large CSV table (will be automatically partitioned)
128
- connectorCSV.upload(name="sales", path="/data/large_sales.csv")
175
+ # Assume we have uploaded a `.parquet` table named `users`, and a
176
+ # `FileUploadConnector` has been created called `connector`, and
177
+ # we want to delete this table from Kumo:
178
+ connector.delete(name="users")
129
179
 
130
180
  Args:
131
- name: The name of the table to be uploaded. The uploaded table can
132
- be accessed from the
133
- :class:`~kumoai.connector.FileUploadConnector` with this name.
134
- path: The full path of the table to be uploaded, on the local
135
- machine. File Type must match the connector type.
136
- auto_partition: Whether to automatically
137
- partition large files (>1GB).
138
- If False and file is >1GB, raises ValueError. Supports both
139
- Parquet and CSV files.
140
- partition_size_mb: The size of each partition in MB. Only used if
141
- auto_partition is True.
181
+ name: The name of the table to be deleted. This table must have
182
+ previously been uploaded with a call to
183
+ :meth:`~kumoai.connector.FileUploadConnector.upload`.
142
184
  """
143
- # Validate file type matches connector type
144
- if not path.lower().endswith("." + self._file_type):
145
- raise ValueError(f"File {path} must match connector path type: "
146
- f"{self._file_type}.")
147
-
148
- # Validate file type
149
- if not (path.endswith(".parquet") or path.endswith(".csv")):
150
- raise ValueError(f"Path {path} must be either a CSV or Parquet "
151
- f"file. Partitioned data is not currently "
152
- f"supported.")
153
-
154
- file_size = os.path.getsize(path)
155
-
156
- # Route based on file size
157
- if file_size < MAX_PARTITION_SIZE:
158
- return _upload_single_file(name, path)
159
-
160
- if not auto_partition:
161
- raise ValueError(f"File {path} is {file_size / (1024**3):.2f}GB, "
162
- f"which exceeds the 1GB limit. Enable "
163
- f"auto_partition=True to automatically partition "
164
- f"large files.")
165
-
166
- # Partition and upload large files
167
- partition_size = partition_size_mb * 1024**2
168
- if (partition_size > MAX_PARTITION_SIZE
169
- or partition_size < MIN_PARTITION_SIZE):
170
- raise ValueError(f"Partition size {partition_size_mb}MB must be "
171
- f"between {MIN_PARTITION_SIZE / 1024**2}MB and "
172
- f"{MAX_PARTITION_SIZE / 1024**2}MB.")
173
-
174
- logger.info(
175
- "File %s is large with size %s, partitioning for upload...", path,
176
- file_size)
177
- if path.endswith('.parquet'):
178
- _upload_partitioned_parquet(name, path, partition_size)
179
- else:
180
- _upload_partitioned_csv(name, path, partition_size)
185
+ if not self.has_table(name):
186
+ raise ValueError(f"The table '{name}' does not exist in {self}. "
187
+ f"Please check the existence of the source data.")
188
+
189
+ delete_uploaded_table(name, self._file_type)