kumoai 2.9.0.dev202509081831__cp312-cp312-win_amd64.whl → 2.13.0.dev202511201731__cp312-cp312-win_amd64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (30) hide show
  1. kumoai/__init__.py +10 -11
  2. kumoai/_version.py +1 -1
  3. kumoai/client/client.py +17 -16
  4. kumoai/client/endpoints.py +1 -0
  5. kumoai/client/rfm.py +37 -8
  6. kumoai/connector/file_upload_connector.py +71 -102
  7. kumoai/connector/utils.py +1367 -236
  8. kumoai/experimental/rfm/__init__.py +153 -10
  9. kumoai/experimental/rfm/authenticate.py +8 -5
  10. kumoai/experimental/rfm/infer/timestamp.py +7 -4
  11. kumoai/experimental/rfm/local_graph.py +90 -80
  12. kumoai/experimental/rfm/local_graph_sampler.py +16 -10
  13. kumoai/experimental/rfm/local_graph_store.py +22 -6
  14. kumoai/experimental/rfm/local_pquery_driver.py +336 -42
  15. kumoai/experimental/rfm/local_table.py +100 -22
  16. kumoai/experimental/rfm/pquery/__init__.py +4 -4
  17. kumoai/experimental/rfm/pquery/{backend.py → executor.py} +24 -58
  18. kumoai/experimental/rfm/pquery/{pandas_backend.py → pandas_executor.py} +278 -222
  19. kumoai/experimental/rfm/rfm.py +523 -124
  20. kumoai/experimental/rfm/sagemaker.py +130 -0
  21. kumoai/jobs.py +1 -0
  22. kumoai/kumolib.cp312-win_amd64.pyd +0 -0
  23. kumoai/spcs.py +1 -3
  24. kumoai/trainer/trainer.py +19 -10
  25. kumoai/utils/progress_logger.py +68 -0
  26. {kumoai-2.9.0.dev202509081831.dist-info → kumoai-2.13.0.dev202511201731.dist-info}/METADATA +13 -5
  27. {kumoai-2.9.0.dev202509081831.dist-info → kumoai-2.13.0.dev202511201731.dist-info}/RECORD +30 -29
  28. {kumoai-2.9.0.dev202509081831.dist-info → kumoai-2.13.0.dev202511201731.dist-info}/WHEEL +0 -0
  29. {kumoai-2.9.0.dev202509081831.dist-info → kumoai-2.13.0.dev202511201731.dist-info}/licenses/LICENSE +0 -0
  30. {kumoai-2.9.0.dev202509081831.dist-info → kumoai-2.13.0.dev202511201731.dist-info}/top_level.txt +0 -0
kumoai/__init__.py CHANGED
@@ -184,15 +184,12 @@ def init(
184
184
  snowflake_credentials
185
185
  ) if not api_key and snowflake_credentials else None
186
186
  client = KumoClient(url=url, api_key=api_key, spcs_token=spcs_token)
187
- if client.authenticate():
188
- global_state._url = client._url
189
- global_state._api_key = client._api_key
190
- global_state._snowflake_credentials = snowflake_credentials
191
- global_state._spcs_token = client._spcs_token
192
- global_state._snowpark_session = snowpark_session
193
- else:
194
- raise ValueError("Client authentication failed. Please check if you "
195
- "have a valid API key.")
187
+ client.authenticate()
188
+ global_state._url = client._url
189
+ global_state._api_key = client._api_key
190
+ global_state._snowflake_credentials = snowflake_credentials
191
+ global_state._spcs_token = client._spcs_token
192
+ global_state._snowpark_session = snowpark_session
196
193
 
197
194
  if not api_key and snowflake_credentials:
198
195
  # Refresh token every 10 minutes (expires in 1 hour):
@@ -200,9 +197,11 @@ def init(
200
197
 
201
198
  logger = logging.getLogger('kumoai')
202
199
  log_level = logging.getLevelName(logger.getEffectiveLevel())
200
+
203
201
  logger.info(
204
- "Successfully initialized the Kumo SDK against deployment %s, with "
205
- "log level %s.", url, log_level)
202
+ f"Successfully initialized the Kumo SDK (version {__version__}) "
203
+ f"against deployment {url}, with "
204
+ f"log level {log_level}.")
206
205
 
207
206
 
208
207
  def set_log_level(level: str) -> None:
kumoai/_version.py CHANGED
@@ -1 +1 @@
1
- __version__ = '2.9.0.dev202509081831'
1
+ __version__ = '2.13.0.dev202511201731'
kumoai/client/client.py CHANGED
@@ -20,7 +20,6 @@ if TYPE_CHECKING:
20
20
  )
21
21
  from kumoai.client.online import OnlineServingEndpointAPI
22
22
  from kumoai.client.pquery import PQueryAPI
23
- from kumoai.client.rfm import RFMAPI
24
23
  from kumoai.client.source_table import SourceTableAPI
25
24
  from kumoai.client.table import TableAPI
26
25
 
@@ -33,6 +32,7 @@ class KumoClient:
33
32
  url: str,
34
33
  api_key: Optional[str],
35
34
  spcs_token: Optional[str] = None,
35
+ verify_ssl: bool = True,
36
36
  ) -> None:
37
37
  r"""Creates a client against the Kumo public API, provided a URL of
38
38
  the endpoint and an authentication token.
@@ -42,11 +42,14 @@ class KumoClient:
42
42
  api_key: the public API authentication token.
43
43
  spcs_token: the SPCS token used for authentication to access the
44
44
  Kumo API endpoint.
45
+ verify_ssl: whether to verify SSL certificates. Set to False to
46
+ skip SSL certificate verification (equivalent to curl -k).
45
47
  """
46
48
  self._url = url
47
49
  self._api_url = f"{url}/{API_VERSION}"
48
50
  self._api_key = api_key
49
51
  self._spcs_token = spcs_token
52
+ self._verify_ssl = verify_ssl
50
53
 
51
54
  retry_strategy = Retry(
52
55
  total=10, # Maximum number of retries
@@ -69,11 +72,15 @@ class KumoClient:
69
72
  self._session.headers.update(
70
73
  {'Authorization': f'Snowflake Token={self._spcs_token}'})
71
74
 
72
- def authenticate(self) -> bool:
73
- r"""Raises an exception if authentication fails. Succeeds if the
74
- client is properly formed.
75
- """
76
- return self._session.get(f"{self._url}/v1/connectors").ok
75
+ def authenticate(self) -> None:
76
+ """Raises an exception if authentication fails."""
77
+ try:
78
+ self._session.get(self._url + '/v1/connectors',
79
+ verify=self._verify_ssl).raise_for_status()
80
+ except Exception:
81
+ raise ValueError(
82
+ "Client authentication failed. Please check if you "
83
+ "have a valid API key/credentials.")
77
84
 
78
85
  def set_spcs_token(self, spcs_token: str) -> None:
79
86
  r"""Sets the SPCS token for the client and updates the session
@@ -158,12 +165,6 @@ class KumoClient:
158
165
  from kumoai.client.online import OnlineServingEndpointAPI
159
166
  return OnlineServingEndpointAPI(self)
160
167
 
161
- @property
162
- def rfm_api(self) -> 'RFMAPI':
163
- r"""Returns the typed RFM API."""
164
- from kumoai.client.rfm import RFMAPI
165
- return RFMAPI(self)
166
-
167
168
  def _request(self, endpoint: Endpoint, **kwargs: Any) -> requests.Response:
168
169
  r"""Send a HTTP request to the specified endpoint."""
169
170
  endpoint_str = endpoint.get_path()
@@ -184,7 +185,7 @@ class KumoClient:
184
185
  :meth:`requests.Session.get`.
185
186
  """
186
187
  url = self._format_endpoint_url(endpoint)
187
- return self._session.get(url=url, **kwargs)
188
+ return self._session.get(url=url, verify=self._verify_ssl, **kwargs)
188
189
 
189
190
  def _post(self, endpoint: str, **kwargs: Any) -> requests.Response:
190
191
  r"""Send a POST request to the specified endpoint, with keyword
@@ -192,7 +193,7 @@ class KumoClient:
192
193
  :meth:`requests.Session.post`.
193
194
  """
194
195
  url = self._format_endpoint_url(endpoint)
195
- return self._session.post(url=url, **kwargs)
196
+ return self._session.post(url=url, verify=self._verify_ssl, **kwargs)
196
197
 
197
198
  def _patch(self, endpoint: str, **kwargs: Any) -> requests.Response:
198
199
  r"""Send a PATCH request to the specified endpoint, with keyword
@@ -200,7 +201,7 @@ class KumoClient:
200
201
  :meth:`requests.Session.patch`.
201
202
  """
202
203
  url = self._format_endpoint_url(endpoint)
203
- return self._session.patch(url=url, **kwargs)
204
+ return self._session.patch(url=url, verify=self._verify_ssl, **kwargs)
204
205
 
205
206
  def _delete(self, endpoint: str, **kwargs: Any) -> requests.Response:
206
207
  r"""Send a DELETE request to the specified endpoint, with keyword
@@ -208,7 +209,7 @@ class KumoClient:
208
209
  :meth:`requests.Session.delete`.
209
210
  """
210
211
  url = self._format_endpoint_url(endpoint)
211
- return self._session.delete(url=url, **kwargs)
212
+ return self._session.delete(url=url, verify=self._verify_ssl, **kwargs)
212
213
 
213
214
  def _format_endpoint_url(self, endpoint: str) -> str:
214
215
  if endpoint[0] == "/":
@@ -147,3 +147,4 @@ class RFMEndpoints:
147
147
  explain = Endpoint(f"{BASE}/explain", HTTPMethod.POST)
148
148
  evaluate = Endpoint(f"{BASE}/evaluate", HTTPMethod.POST)
149
149
  validate_query = Endpoint(f"{BASE}/validate_query", HTTPMethod.POST)
150
+ parse_query = Endpoint(f"{BASE}/parse_query", HTTPMethod.POST)
kumoai/client/rfm.py CHANGED
@@ -1,6 +1,11 @@
1
+ from typing import Any
2
+
1
3
  from kumoapi.json_serde import to_json_dict
2
4
  from kumoapi.rfm import (
3
5
  RFMEvaluateResponse,
6
+ RFMExplanationResponse,
7
+ RFMParseQueryRequest,
8
+ RFMParseQueryResponse,
4
9
  RFMPredictResponse,
5
10
  RFMValidateQueryRequest,
6
11
  RFMValidateQueryResponse,
@@ -25,28 +30,35 @@ class RFMAPI:
25
30
  Returns:
26
31
  RFMPredictResponse containing the predictions
27
32
  """
28
- # Send binary data to the predict endpoint
29
33
  response = self._client._request(
30
- RFMEndpoints.predict, data=request,
31
- headers={'Content-Type': 'application/x-protobuf'})
34
+ RFMEndpoints.predict,
35
+ data=request,
36
+ headers={'Content-Type': 'application/x-protobuf'},
37
+ )
32
38
  raise_on_error(response)
33
39
  return parse_response(RFMPredictResponse, response)
34
40
 
35
- def explain(self, request: bytes) -> RFMPredictResponse:
41
+ def explain(
42
+ self,
43
+ request: bytes,
44
+ skip_summary: bool = False,
45
+ ) -> RFMExplanationResponse:
36
46
  """Explain the RFM model on the given context.
37
47
 
38
48
  Args:
39
49
  request: The predict request as serialized protobuf.
50
+ skip_summary: Whether to skip generating a human-readable summary
51
+ of the explanation.
40
52
 
41
53
  Returns:
42
54
  RFMPredictResponse containing the explanations
43
55
  """
44
- # Send binary data to the explain endpoint
56
+ params: dict[str, Any] = {'generate_summary': not skip_summary}
45
57
  response = self._client._request(
46
- RFMEndpoints.explain, data=request,
58
+ RFMEndpoints.explain, data=request, params=params,
47
59
  headers={'Content-Type': 'application/x-protobuf'})
48
60
  raise_on_error(response)
49
- return parse_response(RFMPredictResponse, response)
61
+ return parse_response(RFMExplanationResponse, response)
50
62
 
51
63
  def evaluate(self, request: bytes) -> RFMEvaluateResponse:
52
64
  """Evaluate the RFM model on the given context.
@@ -57,7 +69,6 @@ class RFMAPI:
57
69
  Returns:
58
70
  RFMEvaluateResponse containing the computed metrics
59
71
  """
60
- # Send binary data to the evaluate endpoint
61
72
  response = self._client._request(
62
73
  RFMEndpoints.evaluate, data=request,
63
74
  headers={'Content-Type': 'application/x-protobuf'})
@@ -81,3 +92,21 @@ class RFMAPI:
81
92
  json=to_json_dict(request))
82
93
  raise_on_error(response)
83
94
  return parse_response(RFMValidateQueryResponse, response)
95
+
96
+ def parse_query(
97
+ self,
98
+ request: RFMParseQueryRequest,
99
+ ) -> RFMParseQueryResponse:
100
+ """Validate a predictive query against a graph.
101
+
102
+ Args:
103
+ request: The request object containing
104
+ the query and graph definition
105
+
106
+ Returns:
107
+ RFMParseQueryResponse containing the QueryDefinition
108
+ """
109
+ response = self._client._request(RFMEndpoints.parse_query,
110
+ json=to_json_dict(request))
111
+ raise_on_error(response)
112
+ return parse_response(RFMParseQueryResponse, response)
@@ -1,7 +1,5 @@
1
- import os
2
1
  from typing import List
3
2
 
4
- from kumoapi.data_source import DeleteUploadedFileRequest
5
3
  from kumoapi.source_table import (
6
4
  DataSourceType,
7
5
  FileType,
@@ -13,15 +11,7 @@ from typing_extensions import override
13
11
 
14
12
  from kumoai import global_state
15
13
  from kumoai.connector.base import Connector
16
- from kumoai.connector.utils import (
17
- CONNECTOR_ID_MAP,
18
- MAX_PARTITION_SIZE,
19
- MIN_PARTITION_SIZE,
20
- _upload_partitioned_csv,
21
- _upload_partitioned_parquet,
22
- _upload_single_file,
23
- logger,
24
- )
14
+ from kumoai.connector.utils import delete_uploaded_table, upload_table
25
15
 
26
16
 
27
17
  class FileUploadConnector(Connector):
@@ -61,7 +51,6 @@ class FileUploadConnector(Connector):
61
51
  def name(self) -> str:
62
52
  return f'{self._file_type}_upload_connector'
63
53
 
64
- @override
65
54
  @property
66
55
  def source_type(self) -> DataSourceType:
67
56
  return DataSourceType.S3
@@ -91,100 +80,92 @@ class FileUploadConnector(Connector):
91
80
  auto_partition: bool = True,
92
81
  partition_size_mb: int = 250,
93
82
  ) -> None:
94
- r"""Synchronously uploads a table located on your
95
- local machine to the Kumo data plane.
96
-
97
- Tables uploaded in this way can be accessed with
98
- this ``FileUploadConnector`` using the provided name,
99
- for example: ``connector_obj["my_table"]``
100
-
101
- For files larger than 1GB, the table will be automatically partitioned
102
- into smaller chunks and uploaded with common prefix that allows
103
- FileUploadConnector to union them when reading.
83
+ r"""Upload a table to Kumo from a local or remote path.
84
+
85
+ Supports ``s3://``, ``gs://``, ``abfs://``, ``abfss://``, and ``az://``
86
+
87
+ Tables uploaded this way can be accessed from this
88
+ ``FileUploadConnector`` using the provided name, e.g.,
89
+ ``connector_obj["my_table"]``.
90
+
91
+ Local files
92
+ -----------
93
+ - Accepts one ``.parquet`` or ``.csv`` file (must match this
94
+ connector’s ``file_type``).
95
+ - If the file is > 1 GiB and ``auto_partition=True``, it is split
96
+ into ~``partition_size_mb`` MiB parts and uploaded under a common
97
+ prefix so the connector can read them as one table.
98
+
99
+ Remote paths
100
+ ------------
101
+ - **Single file** (``.parquet``/``.csv``): validated and uploaded via
102
+ multipart PUT. Files > 1 GiB are rejected — re-shard to ~200 MiB
103
+ and upload the directory instead.
104
+ - **Directory**: must contain only one format (all Parquet or all CSV)
105
+ matching this connector’s ``file_type``. Files are validated
106
+ (consistent schema; CSV headers sanitized) and uploaded in parallel
107
+ with memory-safe budgeting.
104
108
 
105
109
  .. warning::
106
- Uploaded tables must be single files, either in parquet or CSV
107
- format(must match connector type).
108
- Partitioned tables are not currently supported.
110
+ For local uploads, input must be a single CSV or Parquet file
111
+ (matching the connector type). For remote uploads, mixed
112
+ CSV/Parquet directories are not supported. Remote single files
113
+ larger than 1 GiB are not supported.
109
114
 
115
+ Examples:
116
+ ---------
110
117
  .. code-block:: python
111
118
 
112
119
  import kumoai
113
- connector = kumoai.FileUploadConnector(file_type="parquet")
120
+ conn = kumoai.FileUploadConnector(file_type="parquet")
121
+
122
+ # Local: small file
123
+ conn.upload(name="users", path="/data/users.parquet")
114
124
 
115
- # Upload a small table
116
- connector.upload(name="users", path="/data/users.parquet")
125
+ # Local: large file (auto-partitions)
126
+ conn.upload(
127
+ name="txns",
128
+ path="/data/large_txns.parquet",
129
+ )
117
130
 
118
- # Upload a large parquet table (will be automatically partitioned)
119
- connector.upload(name="transactions",
120
- path="/data/large_transactions.parquet")
131
+ # Local: disable auto-partitioning (raises if > 1 GiB)
132
+ conn.upload(
133
+ name="users",
134
+ path="/data/users.parquet",
135
+ auto_partition=False,
136
+ )
121
137
 
122
- # Disable auto-partitioning (will raise error for large files)
123
- upload(name="users", path="/data/users.parquet",
124
- auto_partition=False)
138
+ # CSV connector
139
+ csv_conn = kumoai.FileUploadConnector(file_type="csv")
140
+ csv_conn.upload(name="sales", path="/data/sales.csv")
125
141
 
126
- # Create a file upload connector for CSV files.
127
- connectorCSV = kumoai.FileUploadConnector(file_type="csv")
142
+ # Remote: single file (<= 1 GiB)
143
+ conn.upload(name="logs", path="s3://bkt/path/logs.parquet")
128
144
 
129
- # Upload a large CSV table (will be automatically partitioned)
130
- connectorCSV.upload(name="sales", path="/data/large_sales.csv")
145
+ # Remote: directory of shards (uniform format)
146
+ csv_conn.upload(name="events", path="gs://mybkt/events_csv/")
131
147
 
132
148
  Args:
133
- name: The name of the table to be uploaded. The uploaded table can
134
- be accessed from the
135
- :class:`~kumoai.connector.FileUploadConnector` with this name.
136
- path: The full path of the table to be uploaded, on the local
137
- machine. File Type must match the connector type.
138
- auto_partition: Whether to automatically
139
- partition large files (>1GB).
140
- If False and file is >1GB, raises ValueError. Supports both
141
- Parquet and CSV files.
142
- partition_size_mb: The size of each partition in MB. Only used if
143
- auto_partition is True.
149
+ name:
150
+ Table name to create in Kumo; access later via this connector.
151
+ path:
152
+ Local path or remote URL to a ``.parquet``/``.csv`` file or a
153
+ directory (uniform format). The format must match this
154
+ connector’s ``file_type``.
155
+ auto_partition:
156
+ Local-only. If ``True`` and the local file is > 1 GiB, split
157
+ into ~``partition_size_mb`` MiB parts.
158
+ partition_size_mb:
159
+ Local-only. Target partition size (100–1000 MiB) when
160
+ ``auto_partition`` is ``True``.
144
161
  """
145
- # Validate file type matches connector type
146
- if not path.lower().endswith("." + self._file_type):
147
- raise ValueError(f"File {path} must match connector path type: "
148
- f"{self._file_type}.")
149
-
150
- # Validate file type
151
- if not (path.endswith(".parquet") or path.endswith(".csv")):
152
- raise ValueError(f"Path {path} must be either a CSV or Parquet "
153
- f"file. Partitioned data is not currently "
154
- f"supported.")
155
-
156
- file_size = os.path.getsize(path)
157
-
158
- # Route based on file size
159
- if file_size < MAX_PARTITION_SIZE:
160
- return _upload_single_file(name, path)
161
-
162
- if not auto_partition:
163
- raise ValueError(f"File {path} is {file_size / (1024**3):.2f}GB, "
164
- f"which exceeds the 1GB limit. Enable "
165
- f"auto_partition=True to automatically partition "
166
- f"large files.")
167
-
168
- # Partition and upload large files
169
- partition_size = partition_size_mb * 1024**2
170
- if (partition_size > MAX_PARTITION_SIZE
171
- or partition_size < MIN_PARTITION_SIZE):
172
- raise ValueError(f"Partition size {partition_size_mb}MB must be "
173
- f"between {MIN_PARTITION_SIZE / 1024**2}MB and "
174
- f"{MAX_PARTITION_SIZE / 1024**2}MB.")
175
-
176
- logger.info(
177
- "File %s is large with size %s, partitioning for upload...", path,
178
- file_size)
179
- if path.endswith('.parquet'):
180
- _upload_partitioned_parquet(name, path, partition_size)
181
- else:
182
- _upload_partitioned_csv(name, path, partition_size)
162
+ upload_table(name=name, path=path, auto_partition=auto_partition,
163
+ partition_size_mb=partition_size_mb,
164
+ file_type=self._file_type)
183
165
 
184
166
  def delete(
185
167
  self,
186
168
  name: str,
187
- file_type: str,
188
169
  ) -> None:
189
170
  r"""Synchronously deletes a previously uploaded table from the Kumo
190
171
  data plane.
@@ -194,27 +175,15 @@ class FileUploadConnector(Connector):
194
175
  # Assume we have uploaded a `.parquet` table named `users`, and a
195
176
  # `FileUploadConnector` has been created called `connector`, and
196
177
  # we want to delete this table from Kumo:
197
- connector.delete(name="users", file_type="parquet")
178
+ connector.delete(name="users")
198
179
 
199
180
  Args:
200
181
  name: The name of the table to be deleted. This table must have
201
182
  previously been uploaded with a call to
202
183
  :meth:`~kumoai.connector.FileUploadConnector.upload`.
203
- file_type: The file type of the table to be deleted; this can
204
- either be :obj:`"parquet"` or :obj:`"csv"`, and must match the
205
- connector file_type.
206
184
  """
207
- if file_type.lower() != self._file_type:
208
- raise ValueError(f"File type {file_type} does not match "
209
- f"connector file type {self._file_type}.")
210
-
211
185
  if not self.has_table(name):
212
186
  raise ValueError(f"The table '{name}' does not exist in {self}. "
213
187
  f"Please check the existence of the source data.")
214
188
 
215
- req = DeleteUploadedFileRequest(
216
- source_table_name=name,
217
- connector_id=CONNECTOR_ID_MAP[file_type],
218
- )
219
- global_state.client.connector_api.delete_file_upload(req)
220
- logger.info("Successfully deleted table %s from Kumo.", name)
189
+ delete_uploaded_table(name, self._file_type)