kumoai 2.10.0.dev202510021830__py3-none-any.whl → 2.12.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
kumoai/__init__.py CHANGED
@@ -200,9 +200,11 @@ def init(
200
200
 
201
201
  logger = logging.getLogger('kumoai')
202
202
  log_level = logging.getLevelName(logger.getEffectiveLevel())
203
+
203
204
  logger.info(
204
- "Successfully initialized the Kumo SDK against deployment %s, with "
205
- "log level %s.", url, log_level)
205
+ f"Successfully initialized the Kumo SDK (version {__version__}) "
206
+ f"against deployment {url}, with "
207
+ f"log level {log_level}.")
206
208
 
207
209
 
208
210
  def set_log_level(level: str) -> None:
kumoai/_version.py CHANGED
@@ -1 +1 @@
1
- __version__ = '2.10.0.dev202510021830'
1
+ __version__ = '2.12.1'
kumoai/client/client.py CHANGED
@@ -33,6 +33,7 @@ class KumoClient:
33
33
  url: str,
34
34
  api_key: Optional[str],
35
35
  spcs_token: Optional[str] = None,
36
+ verify_ssl: bool = True,
36
37
  ) -> None:
37
38
  r"""Creates a client against the Kumo public API, provided a URL of
38
39
  the endpoint and an authentication token.
@@ -42,11 +43,14 @@ class KumoClient:
42
43
  api_key: the public API authentication token.
43
44
  spcs_token: the SPCS token used for authentication to access the
44
45
  Kumo API endpoint.
46
+ verify_ssl: whether to verify SSL certificates. Set to False to
47
+ skip SSL certificate verification (equivalent to curl -k).
45
48
  """
46
49
  self._url = url
47
50
  self._api_url = f"{url}/{API_VERSION}"
48
51
  self._api_key = api_key
49
52
  self._spcs_token = spcs_token
53
+ self._verify_ssl = verify_ssl
50
54
 
51
55
  retry_strategy = Retry(
52
56
  total=10, # Maximum number of retries
@@ -73,7 +77,8 @@ class KumoClient:
73
77
  r"""Raises an exception if authentication fails. Succeeds if the
74
78
  client is properly formed.
75
79
  """
76
- return self._session.get(f"{self._url}/v1/connectors").ok
80
+ return self._session.get(f"{self._url}/v1/connectors",
81
+ verify=self._verify_ssl).ok
77
82
 
78
83
  def set_spcs_token(self, spcs_token: str) -> None:
79
84
  r"""Sets the SPCS token for the client and updates the session
@@ -184,7 +189,7 @@ class KumoClient:
184
189
  :meth:`requests.Session.get`.
185
190
  """
186
191
  url = self._format_endpoint_url(endpoint)
187
- return self._session.get(url=url, **kwargs)
192
+ return self._session.get(url=url, verify=self._verify_ssl, **kwargs)
188
193
 
189
194
  def _post(self, endpoint: str, **kwargs: Any) -> requests.Response:
190
195
  r"""Send a POST request to the specified endpoint, with keyword
@@ -192,7 +197,7 @@ class KumoClient:
192
197
  :meth:`requests.Session.post`.
193
198
  """
194
199
  url = self._format_endpoint_url(endpoint)
195
- return self._session.post(url=url, **kwargs)
200
+ return self._session.post(url=url, verify=self._verify_ssl, **kwargs)
196
201
 
197
202
  def _patch(self, endpoint: str, **kwargs: Any) -> requests.Response:
198
203
  r"""Send a PATCH request to the specified endpoint, with keyword
@@ -200,7 +205,7 @@ class KumoClient:
200
205
  :meth:`requests.Session.patch`.
201
206
  """
202
207
  url = self._format_endpoint_url(endpoint)
203
- return self._session.patch(url=url, **kwargs)
208
+ return self._session.patch(url=url, verify=self._verify_ssl, **kwargs)
204
209
 
205
210
  def _delete(self, endpoint: str, **kwargs: Any) -> requests.Response:
206
211
  r"""Send a DELETE request to the specified endpoint, with keyword
@@ -208,7 +213,7 @@ class KumoClient:
208
213
  :meth:`requests.Session.delete`.
209
214
  """
210
215
  url = self._format_endpoint_url(endpoint)
211
- return self._session.delete(url=url, **kwargs)
216
+ return self._session.delete(url=url, verify=self._verify_ssl, **kwargs)
212
217
 
213
218
  def _format_endpoint_url(self, endpoint: str) -> str:
214
219
  if endpoint[0] == "/":
@@ -147,3 +147,4 @@ class RFMEndpoints:
147
147
  explain = Endpoint(f"{BASE}/explain", HTTPMethod.POST)
148
148
  evaluate = Endpoint(f"{BASE}/evaluate", HTTPMethod.POST)
149
149
  validate_query = Endpoint(f"{BASE}/validate_query", HTTPMethod.POST)
150
+ parse_query = Endpoint(f"{BASE}/parse_query", HTTPMethod.POST)
kumoai/client/rfm.py CHANGED
@@ -1,7 +1,11 @@
1
+ from typing import Any
2
+
1
3
  from kumoapi.json_serde import to_json_dict
2
4
  from kumoapi.rfm import (
3
5
  RFMEvaluateResponse,
4
6
  RFMExplanationResponse,
7
+ RFMParseQueryRequest,
8
+ RFMParseQueryResponse,
5
9
  RFMPredictResponse,
6
10
  RFMValidateQueryRequest,
7
11
  RFMValidateQueryResponse,
@@ -26,25 +30,32 @@ class RFMAPI:
26
30
  Returns:
27
31
  RFMPredictResponse containing the predictions
28
32
  """
29
- # Send binary data to the predict endpoint
30
33
  response = self._client._request(
31
- RFMEndpoints.predict, data=request,
32
- headers={'Content-Type': 'application/x-protobuf'})
34
+ RFMEndpoints.predict,
35
+ data=request,
36
+ headers={'Content-Type': 'application/x-protobuf'},
37
+ )
33
38
  raise_on_error(response)
34
39
  return parse_response(RFMPredictResponse, response)
35
40
 
36
- def explain(self, request: bytes) -> RFMExplanationResponse:
41
+ def explain(
42
+ self,
43
+ request: bytes,
44
+ skip_summary: bool = False,
45
+ ) -> RFMExplanationResponse:
37
46
  """Explain the RFM model on the given context.
38
47
 
39
48
  Args:
40
49
  request: The predict request as serialized protobuf.
50
+ skip_summary: Whether to skip generating a human-readable summary
51
+ of the explanation.
41
52
 
42
53
  Returns:
43
54
  RFMPredictResponse containing the explanations
44
55
  """
45
- # Send binary data to the explain endpoint
56
+ params: dict[str, Any] = {'generate_summary': not skip_summary}
46
57
  response = self._client._request(
47
- RFMEndpoints.explain, data=request,
58
+ RFMEndpoints.explain, data=request, params=params,
48
59
  headers={'Content-Type': 'application/x-protobuf'})
49
60
  raise_on_error(response)
50
61
  return parse_response(RFMExplanationResponse, response)
@@ -58,7 +69,6 @@ class RFMAPI:
58
69
  Returns:
59
70
  RFMEvaluateResponse containing the computed metrics
60
71
  """
61
- # Send binary data to the evaluate endpoint
62
72
  response = self._client._request(
63
73
  RFMEndpoints.evaluate, data=request,
64
74
  headers={'Content-Type': 'application/x-protobuf'})
@@ -82,3 +92,21 @@ class RFMAPI:
82
92
  json=to_json_dict(request))
83
93
  raise_on_error(response)
84
94
  return parse_response(RFMValidateQueryResponse, response)
95
+
96
+ def parse_query(
97
+ self,
98
+ request: RFMParseQueryRequest,
99
+ ) -> RFMParseQueryResponse:
100
+ """Validate a predictive query against a graph.
101
+
102
+ Args:
103
+ request: The request object containing
104
+ the query and graph definition
105
+
106
+ Returns:
107
+ RFMParseQueryResponse containing the QueryDefinition
108
+ """
109
+ response = self._client._request(RFMEndpoints.parse_query,
110
+ json=to_json_dict(request))
111
+ raise_on_error(response)
112
+ return parse_response(RFMParseQueryResponse, response)
@@ -12,7 +12,7 @@ CPU architecture: {platform.machine()}
12
12
  glibc version: {platform.libc_ver()[1]}
13
13
 
14
14
  ✅ Supported Environments:
15
- * Python versions: 3.9, 3.10, 3.11, 3.12, 3.13
15
+ * Python versions: 3.10, 3.11, 3.12, 3.13
16
16
  * Operating systems and CPU architectures:
17
17
  * Linux (x86_64)
18
18
  * macOS (arm64)
@@ -20,7 +20,7 @@ glibc version: {platform.libc_ver()[1]}
20
20
  * glibc versions: >=2.28
21
21
 
22
22
  ❌ Unsupported Environments:
23
- * Python versions: 3.8, 3.14
23
+ * Python versions: 3.8, 3.9, 3.14
24
24
  * Operating systems and CPU architectures:
25
25
  * Linux (arm64)
26
26
  * macOS (x86_64)
@@ -36,7 +36,7 @@ import os
36
36
  import kumoai
37
37
  from .local_table import LocalTable
38
38
  from .local_graph import LocalGraph
39
- from .rfm import KumoRFM
39
+ from .rfm import ExplainConfig, Explanation, KumoRFM
40
40
  from .authenticate import authenticate
41
41
 
42
42
 
@@ -60,6 +60,8 @@ __all__ = [
60
60
  'LocalTable',
61
61
  'LocalGraph',
62
62
  'KumoRFM',
63
+ 'ExplainConfig',
64
+ 'Explanation',
63
65
  'authenticate',
64
66
  'init',
65
67
  ]
@@ -21,9 +21,7 @@ def contains_timestamp(ser: pd.Series, column_name: str, dtype: Dtype) -> bool:
21
21
  column_name,
22
22
  re.IGNORECASE,
23
23
  )
24
-
25
- if match is not None:
26
- return True
24
+ score = 0.3 if match is not None else 0.0
27
25
 
28
26
  ser = ser.iloc[:100]
29
27
  ser = ser.dropna()
@@ -37,4 +35,7 @@ def contains_timestamp(ser: pd.Series, column_name: str, dtype: Dtype) -> bool:
37
35
  with warnings.catch_warnings():
38
36
  warnings.simplefilter('ignore', UnknownTimezoneWarning)
39
37
  warnings.filterwarnings('ignore', message='Could not infer format')
40
- return pd.to_datetime(ser, errors='coerce').notna().all()
38
+ mask = pd.to_datetime(ser, errors='coerce').notna()
39
+ score += int(mask.sum()) / len(mask)
40
+
41
+ return score >= 1.0
@@ -29,45 +29,46 @@ class LocalGraph:
29
29
 
30
30
  .. code-block:: python
31
31
 
32
- import pandas as pd
33
- import kumoai.experimental.rfm as rfm
34
-
35
- # Load data frames into memory:
36
- df1 = pd.DataFrame(...)
37
- df2 = pd.DataFrame(...)
38
- df3 = pd.DataFrame(...)
39
-
40
- # Define tables from data frames:
41
- table1 = rfm.LocalTable(name="table1", data=df1)
42
- table2 = rfm.LocalTable(name="table2", data=df2)
43
- table3 = rfm.LocalTable(name="table3", data=df3)
44
-
45
- # Create a graph from a dictionary of tables:
46
- graph = rfm.LocalGraph({
47
- "table1": table1,
48
- "table2": table2,
49
- "table3": table3,
50
- })
51
-
52
- # Infer table metadata:
53
- graph.infer_metadata()
54
-
55
- # Infer links/edges:
56
- graph.infer_links()
57
-
58
- # Inspect table metadata:
59
- for table in graph.tables.values():
60
- table.print_metadata()
61
-
62
- # Visualize graph (if graphviz is installed):
63
- graph.visualize()
64
-
65
- # Add/Remove edges between tables:
66
- graph.link(src_table="table1", fkey="id1", dst_table="table2")
67
- graph.unlink(src_table="table1", fkey="id1", dst_table="table2")
68
-
69
- # Validate graph:
70
- graph.validate()
32
+ >>> # doctest: +SKIP
33
+ >>> import pandas as pd
34
+ >>> import kumoai.experimental.rfm as rfm
35
+
36
+ >>> # Load data frames into memory:
37
+ >>> df1 = pd.DataFrame(...)
38
+ >>> df2 = pd.DataFrame(...)
39
+ >>> df3 = pd.DataFrame(...)
40
+
41
+ >>> # Define tables from data frames:
42
+ >>> table1 = rfm.LocalTable(name="table1", data=df1)
43
+ >>> table2 = rfm.LocalTable(name="table2", data=df2)
44
+ >>> table3 = rfm.LocalTable(name="table3", data=df3)
45
+
46
+ >>> # Create a graph from a dictionary of tables:
47
+ >>> graph = rfm.LocalGraph({
48
+ ... "table1": table1,
49
+ ... "table2": table2,
50
+ ... "table3": table3,
51
+ ... })
52
+
53
+ >>> # Infer table metadata:
54
+ >>> graph.infer_metadata()
55
+
56
+ >>> # Infer links/edges:
57
+ >>> graph.infer_links()
58
+
59
+ >>> # Inspect table metadata:
60
+ >>> for table in graph.tables.values():
61
+ ... table.print_metadata()
62
+
63
+ >>> # Visualize graph (if graphviz is installed):
64
+ >>> graph.visualize()
65
+
66
+ >>> # Add/Remove edges between tables:
67
+ >>> graph.link(src_table="table1", fkey="id1", dst_table="table2")
68
+ >>> graph.unlink(src_table="table1", fkey="id1", dst_table="table2")
69
+
70
+ >>> # Validate graph:
71
+ >>> graph.validate()
71
72
  """
72
73
 
73
74
  # Constructors ############################################################
@@ -104,27 +105,28 @@ class LocalGraph:
104
105
 
105
106
  .. code-block:: python
106
107
 
107
- import pandas as pd
108
- import kumoai.experimental.rfm as rfm
108
+ >>> # doctest: +SKIP
109
+ >>> import pandas as pd
110
+ >>> import kumoai.experimental.rfm as rfm
109
111
 
110
- # Load data frames into memory:
111
- df1 = pd.DataFrame(...)
112
- df2 = pd.DataFrame(...)
113
- df3 = pd.DataFrame(...)
112
+ >>> # Load data frames into memory:
113
+ >>> df1 = pd.DataFrame(...)
114
+ >>> df2 = pd.DataFrame(...)
115
+ >>> df3 = pd.DataFrame(...)
114
116
 
115
- # Create a graph from a dictionary of data frames:
116
- graph = rfm.LocalGraph.from_data({
117
- "table1": df1,
118
- "table2": df2,
119
- "table3": df3,
120
- })
117
+ >>> # Create a graph from a dictionary of data frames:
118
+ >>> graph = rfm.LocalGraph.from_data({
119
+ ... "table1": df1,
120
+ ... "table2": df2,
121
+ ... "table3": df3,
122
+ ... })
121
123
 
122
- # Inspect table metadata:
123
- for table in graph.tables.values():
124
- table.print_metadata()
124
+ >>> # Inspect table metadata:
125
+ >>> for table in graph.tables.values():
126
+ ... table.print_metadata()
125
127
 
126
- # Visualize graph (if graphviz is installed):
127
- graph.visualize()
128
+ >>> # Visualize graph (if graphviz is installed):
129
+ >>> graph.visualize()
128
130
 
129
131
  Args:
130
132
  df_dict: A dictionary of data frames, where the keys are the names
@@ -141,6 +143,7 @@ class LocalGraph:
141
143
  graph.
142
144
 
143
145
  Example:
146
+ >>> # doctest: +SKIP
144
147
  >>> import kumoai.experimental.rfm as rfm
145
148
  >>> df1 = pd.DataFrame(...)
146
149
  >>> df2 = pd.DataFrame(...)
@@ -150,7 +153,7 @@ class LocalGraph:
150
153
  ... "table2": df2,
151
154
  ... "table3": df3,
152
155
  ... })
153
- ... graph.validate()
156
+ >>> graph.validate()
154
157
  """
155
158
  tables = [LocalTable(df, name) for name, df in df_dict.items()]
156
159
 
@@ -231,16 +234,17 @@ class LocalGraph:
231
234
  r"""Returns a :class:`pandas.DataFrame` object containing metadata
232
235
  information about the tables in this graph.
233
236
 
234
- The returned dataframe has columns ``name``, ``primary_key``, and
235
- ``time_column``, which provide an aggregate view of the properties of
236
- the tables of this graph.
237
+ The returned dataframe has columns ``name``, ``primary_key``,
238
+ ``time_column``, and ``end_time_column``, which provide an aggregate
239
+ view of the properties of the tables of this graph.
237
240
 
238
241
  Example:
242
+ >>> # doctest: +SKIP
239
243
  >>> import kumoai.experimental.rfm as rfm
240
244
  >>> graph = rfm.LocalGraph(tables=...).infer_metadata()
241
- >>> graph.metadata
242
- name primary_key time_column
243
- 0 users user_id -
245
+ >>> graph.metadata # doctest: +SKIP
246
+ name primary_key time_column end_time_column
247
+ 0 users user_id - -
244
248
  """
245
249
  tables = list(self.tables.values())
246
250
 
@@ -251,6 +255,11 @@ class LocalGraph:
251
255
  pd.Series(dtype=str, data=[t._primary_key or '-' for t in tables]),
252
256
  'time_column':
253
257
  pd.Series(dtype=str, data=[t._time_column or '-' for t in tables]),
258
+ 'end_time_column':
259
+ pd.Series(
260
+ dtype=str,
261
+ data=[t._end_time_column or '-' for t in tables],
262
+ ),
254
263
  })
255
264
 
256
265
  def print_metadata(self) -> None:
@@ -602,8 +611,8 @@ class LocalGraph:
602
611
  raise ValueError(f"{edge} is invalid as foreign key "
603
612
  f"'{fkey}' and primary key '{dst_key.name}' "
604
613
  f"have incompatible data types (got "
605
- f"fkey.dtype '{dst_key.dtype}' and "
606
- f"pkey.dtype '{src_key.dtype}')")
614
+ f"fkey.dtype '{src_key.dtype}' and "
615
+ f"pkey.dtype '{dst_key.dtype}')")
607
616
 
608
617
  return self
609
618
 
@@ -676,6 +685,11 @@ class LocalGraph:
676
685
  ]
677
686
  if time_column := table.time_column:
678
687
  keys += [f'{time_column.name}: Time ({time_column.dtype})']
688
+ if end_time_column := table.end_time_column:
689
+ keys += [
690
+ f'{end_time_column.name}: '
691
+ f'End Time ({end_time_column.dtype})'
692
+ ]
679
693
  key_repr = left_align(keys)
680
694
 
681
695
  columns = []
@@ -683,9 +697,9 @@ class LocalGraph:
683
697
  columns += [
684
698
  f'{column.name}: {column.stype} ({column.dtype})'
685
699
  for column in table.columns
686
- if column.name not in fkeys_dict[table_name]
687
- and column.name != table._primary_key
688
- and column.name != table._time_column
700
+ if column.name not in fkeys_dict[table_name] and
701
+ column.name != table._primary_key and column.name != table.
702
+ _time_column and column.name != table._end_time_column
689
703
  ]
690
704
  column_repr = left_align(columns)
691
705
 
@@ -752,16 +766,18 @@ class LocalGraph:
752
766
  def _to_api_graph_definition(self) -> GraphDefinition:
753
767
  tables: Dict[str, TableDefinition] = {}
754
768
  col_groups: List[ColumnKeyGroup] = []
755
- for t_name, table in self.tables.items():
756
- tables[t_name] = table._to_api_table_definition()
769
+ for table_name, table in self.tables.items():
770
+ tables[table_name] = table._to_api_table_definition()
757
771
  if table.primary_key is None:
758
772
  continue
759
- keys = [ColumnKey(t_name, table.primary_key.name)]
773
+ keys = [ColumnKey(table_name, table.primary_key.name)]
760
774
  for edge in self.edges:
761
- if edge.dst_table == t_name:
775
+ if edge.dst_table == table_name:
762
776
  keys.append(ColumnKey(edge.src_table, edge.fkey))
763
- keys = sorted(list(set(keys)),
764
- key=lambda x: f'{x.table_name}.{x.col_name}')
777
+ keys = sorted(
778
+ list(set(keys)),
779
+ key=lambda x: f'{x.table_name}.{x.col_name}',
780
+ )
765
781
  if len(keys) > 1:
766
782
  col_groups.append(ColumnKeyGroup(keys))
767
783
  return GraphDefinition(tables, col_groups)
@@ -92,15 +92,23 @@ class LocalGraphSampler:
92
92
  )
93
93
  continue
94
94
 
95
- # Only store unique rows in `df` above a certain threshold:
96
- unique_node, inverse_node = np.unique(node, return_inverse=True)
97
- if len(node) > 1.05 * len(unique_node):
98
- df = df.iloc[unique_node]
99
- row = inverse_node
95
+ row: Optional[np.ndarray] = None
96
+ if table_name in self._graph_store.end_time_column_dict:
97
+ # Set end time to NaT for all values greater than anchor time:
98
+ df = df.iloc[node].reset_index(drop=True)
99
+ col_name = self._graph_store.end_time_column_dict[table_name]
100
+ ser = df[col_name]
101
+ value = ser.astype('datetime64[ns]').astype(int).to_numpy()
102
+ mask = value > time[batch]
103
+ df.loc[mask, col_name] = pd.NaT
100
104
  else:
101
- df = df.iloc[node]
102
- row = None
103
- df = df.reset_index(drop=True)
105
+ # Only store unique rows in `df` above a certain threshold:
106
+ unique_node, inverse = np.unique(node, return_inverse=True)
107
+ if len(node) > 1.05 * len(unique_node):
108
+ df = df.iloc[unique_node].reset_index(drop=True)
109
+ row = inverse
110
+ else:
111
+ df = df.iloc[node].reset_index(drop=True)
104
112
 
105
113
  # Filter data frame to minimal set of columns:
106
114
  df = df[columns]
@@ -45,6 +45,7 @@ class LocalGraphStore:
45
45
 
46
46
  (
47
47
  self.time_column_dict,
48
+ self.end_time_column_dict,
48
49
  self.time_dict,
49
50
  self.min_time,
50
51
  self.max_time,
@@ -219,16 +220,21 @@ class LocalGraphStore:
219
220
  self,
220
221
  graph: LocalGraph,
221
222
  ) -> Tuple[
223
+ Dict[str, str],
222
224
  Dict[str, str],
223
225
  Dict[str, np.ndarray],
224
226
  pd.Timestamp,
225
227
  pd.Timestamp,
226
228
  ]:
227
229
  time_column_dict: Dict[str, str] = {}
230
+ end_time_column_dict: Dict[str, str] = {}
228
231
  time_dict: Dict[str, np.ndarray] = {}
229
232
  min_time = pd.Timestamp.max
230
233
  max_time = pd.Timestamp.min
231
234
  for table in graph.tables.values():
235
+ if table._end_time_column is not None:
236
+ end_time_column_dict[table.name] = table._end_time_column
237
+
232
238
  if table._time_column is None:
233
239
  continue
234
240
 
@@ -243,7 +249,13 @@ class LocalGraphStore:
243
249
  min_time = min(min_time, time.min())
244
250
  max_time = max(max_time, time.max())
245
251
 
246
- return time_column_dict, time_dict, min_time, max_time
252
+ return (
253
+ time_column_dict,
254
+ end_time_column_dict,
255
+ time_dict,
256
+ min_time,
257
+ max_time,
258
+ )
247
259
 
248
260
  def get_csc(
249
261
  self,