kumoai 2.9.0.dev202509081831__cp312-cp312-win_amd64.whl → 2.13.0.dev202511201731__cp312-cp312-win_amd64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (30) hide show
  1. kumoai/__init__.py +10 -11
  2. kumoai/_version.py +1 -1
  3. kumoai/client/client.py +17 -16
  4. kumoai/client/endpoints.py +1 -0
  5. kumoai/client/rfm.py +37 -8
  6. kumoai/connector/file_upload_connector.py +71 -102
  7. kumoai/connector/utils.py +1367 -236
  8. kumoai/experimental/rfm/__init__.py +153 -10
  9. kumoai/experimental/rfm/authenticate.py +8 -5
  10. kumoai/experimental/rfm/infer/timestamp.py +7 -4
  11. kumoai/experimental/rfm/local_graph.py +90 -80
  12. kumoai/experimental/rfm/local_graph_sampler.py +16 -10
  13. kumoai/experimental/rfm/local_graph_store.py +22 -6
  14. kumoai/experimental/rfm/local_pquery_driver.py +336 -42
  15. kumoai/experimental/rfm/local_table.py +100 -22
  16. kumoai/experimental/rfm/pquery/__init__.py +4 -4
  17. kumoai/experimental/rfm/pquery/{backend.py → executor.py} +24 -58
  18. kumoai/experimental/rfm/pquery/{pandas_backend.py → pandas_executor.py} +278 -222
  19. kumoai/experimental/rfm/rfm.py +523 -124
  20. kumoai/experimental/rfm/sagemaker.py +130 -0
  21. kumoai/jobs.py +1 -0
  22. kumoai/kumolib.cp312-win_amd64.pyd +0 -0
  23. kumoai/spcs.py +1 -3
  24. kumoai/trainer/trainer.py +19 -10
  25. kumoai/utils/progress_logger.py +68 -0
  26. {kumoai-2.9.0.dev202509081831.dist-info → kumoai-2.13.0.dev202511201731.dist-info}/METADATA +13 -5
  27. {kumoai-2.9.0.dev202509081831.dist-info → kumoai-2.13.0.dev202511201731.dist-info}/RECORD +30 -29
  28. {kumoai-2.9.0.dev202509081831.dist-info → kumoai-2.13.0.dev202511201731.dist-info}/WHEEL +0 -0
  29. {kumoai-2.9.0.dev202509081831.dist-info → kumoai-2.13.0.dev202511201731.dist-info}/licenses/LICENSE +0 -0
  30. {kumoai-2.9.0.dev202509081831.dist-info → kumoai-2.13.0.dev202511201731.dist-info}/top_level.txt +0 -0
@@ -12,7 +12,7 @@ CPU architecture: {platform.machine()}
12
12
  glibc version: {platform.libc_ver()[1]}
13
13
 
14
14
  ✅ Supported Environments:
15
- * Python versions: 3.9, 3.10, 3.11, 3.12, 3.13
15
+ * Python versions: 3.10, 3.11, 3.12, 3.13
16
16
  * Operating systems and CPU architectures:
17
17
  * Linux (x86_64)
18
18
  * macOS (arm64)
@@ -20,7 +20,7 @@ glibc version: {platform.libc_ver()[1]}
20
20
  * glibc versions: >=2.28
21
21
 
22
22
  ❌ Unsupported Environments:
23
- * Python versions: 3.8, 3.14
23
+ * Python versions: 3.8, 3.9, 3.14
24
24
  * Operating systems and CPU architectures:
25
25
  * Linux (arm64)
26
26
  * macOS (x86_64)
@@ -31,14 +31,122 @@ Please create a feature request at 'https://github.com/kumo-ai/kumo-rfm'."""
31
31
 
32
32
  raise RuntimeError(_msg) from e
33
33
 
34
- from typing import Optional, Dict
34
+ from dataclasses import dataclass
35
+ from enum import Enum
36
+ import ipaddress
37
+ import logging
38
+ import re
39
+ import socket
40
+ import threading
41
+ from typing import Optional, Dict, Tuple
35
42
  import os
43
+ from urllib.parse import urlparse
36
44
  import kumoai
45
+ from kumoai.client.client import KumoClient
46
+ from .sagemaker import (KumoClient_SageMakerAdapter,
47
+ KumoClient_SageMakerProxy_Local)
37
48
  from .local_table import LocalTable
38
49
  from .local_graph import LocalGraph
39
- from .rfm import KumoRFM
50
+ from .rfm import ExplainConfig, Explanation, KumoRFM
40
51
  from .authenticate import authenticate
41
52
 
53
+ logger = logging.getLogger('kumoai_rfm')
54
+
55
+
56
+ def _is_local_address(host: str | None) -> bool:
57
+ """Return True if the hostname/IP refers to the local machine."""
58
+ if not host:
59
+ return False
60
+ try:
61
+ infos = socket.getaddrinfo(host, None)
62
+ for _, _, _, _, sockaddr in infos:
63
+ ip = sockaddr[0]
64
+ ip_obj = ipaddress.ip_address(ip)
65
+ if ip_obj.is_loopback or ip_obj.is_unspecified:
66
+ return True
67
+ return False
68
+ except Exception:
69
+ return False
70
+
71
+
72
+ class InferenceBackend(str, Enum):
73
+ REST = "REST"
74
+ LOCAL_SAGEMAKER = "LOCAL_SAGEMAKER"
75
+ AWS_SAGEMAKER = "AWS_SAGEMAKER"
76
+ UNKNOWN = "UNKNOWN"
77
+
78
+
79
+ def _detect_backend(
80
+ url: str) -> Tuple[InferenceBackend, Optional[str], Optional[str]]:
81
+ parsed = urlparse(url)
82
+
83
+ # Remote SageMaker
84
+ if ("runtime.sagemaker" in parsed.netloc
85
+ and parsed.path.endswith("/invocations")):
86
+ # Example: https://runtime.sagemaker.us-west-2.amazonaws.com/
87
+ # endpoints/Name/invocations
88
+ match = re.search(r"runtime\.sagemaker\.([a-z0-9-]+)\.amazonaws\.com",
89
+ parsed.netloc)
90
+ region = match.group(1) if match else None
91
+ m = re.search(r"/endpoints/([^/]+)/invocations", parsed.path)
92
+ endpoint_name = m.group(1) if m else None
93
+ return InferenceBackend.AWS_SAGEMAKER, region, endpoint_name
94
+
95
+ # Local SageMaker
96
+ if parsed.port == 8080 and parsed.path.endswith(
97
+ "/invocations") and _is_local_address(parsed.hostname):
98
+ return InferenceBackend.LOCAL_SAGEMAKER, None, None
99
+
100
+ # Default: regular REST
101
+ return InferenceBackend.REST, None, None
102
+
103
+
104
+ @dataclass
105
+ class RfmGlobalState:
106
+ _url: str = '__url_not_provided__'
107
+ _backend: InferenceBackend = InferenceBackend.UNKNOWN
108
+ _region: Optional[str] = None
109
+ _endpoint_name: Optional[str] = None
110
+ _thread_local = threading.local()
111
+
112
+ # Thread-safe init-once.
113
+ _initialized: bool = False
114
+ _lock: threading.Lock = threading.Lock()
115
+
116
+ @property
117
+ def client(self) -> KumoClient:
118
+ if self._backend == InferenceBackend.REST:
119
+ return kumoai.global_state.client
120
+
121
+ if hasattr(self._thread_local, '_sagemaker'):
122
+ # Set the spcs token in the client to ensure it has the latest.
123
+ return self._thread_local._sagemaker
124
+
125
+ sagemaker_client: KumoClient
126
+ if self._backend == InferenceBackend.LOCAL_SAGEMAKER:
127
+ sagemaker_client = KumoClient_SageMakerProxy_Local(self._url)
128
+ else:
129
+ assert self._backend == InferenceBackend.AWS_SAGEMAKER
130
+ assert self._region
131
+ assert self._endpoint_name
132
+ sagemaker_client = KumoClient_SageMakerAdapter(
133
+ self._region, self._endpoint_name)
134
+
135
+ self._thread_local._sagemaker = sagemaker_client
136
+ return sagemaker_client
137
+
138
+ def reset(self) -> None: # For testing only.
139
+ with self._lock:
140
+ self._initialized = False
141
+ self._url = '__url_not_provided__'
142
+ self._backend = InferenceBackend.UNKNOWN
143
+ self._region = None
144
+ self._endpoint_name = None
145
+ self._thread_local = threading.local()
146
+
147
+
148
+ global_state = RfmGlobalState()
149
+
42
150
 
43
151
  def init(
44
152
  url: Optional[str] = None,
@@ -47,19 +155,54 @@ def init(
47
155
  snowflake_application: Optional[str] = None,
48
156
  log_level: str = "INFO",
49
157
  ) -> None:
50
- if url is None:
51
- url = os.getenv("KUMO_API_URL", "https://kumorfm.ai/api")
158
+ with global_state._lock:
159
+ if global_state._initialized:
160
+ if url != global_state._url:
161
+ raise ValueError(
162
+ "Kumo RFM has already been initialized with a different "
163
+ "URL. Re-initialization with a different URL is not "
164
+ "supported.")
165
+ return
166
+
167
+ if url is None:
168
+ url = os.getenv("RFM_API_URL", "https://kumorfm.ai/api")
169
+
170
+ backend, region, endpoint_name = _detect_backend(url)
171
+ if backend == InferenceBackend.REST:
172
+ # Initialize kumoai.global_state
173
+ if (kumoai.global_state.initialized
174
+ and kumoai.global_state._url != url):
175
+ raise ValueError(
176
+ "Kumo AI SDK has already been initialized with different "
177
+ "API URL. Please restart Python interpreter and "
178
+ "initialize via kumoai.rfm.init()")
179
+ kumoai.init(url=url, api_key=api_key,
180
+ snowflake_credentials=snowflake_credentials,
181
+ snowflake_application=snowflake_application,
182
+ log_level=log_level)
183
+ elif backend == InferenceBackend.AWS_SAGEMAKER:
184
+ assert region
185
+ assert endpoint_name
186
+ KumoClient_SageMakerAdapter(region, endpoint_name).authenticate()
187
+ else:
188
+ assert backend == InferenceBackend.LOCAL_SAGEMAKER
189
+ KumoClient_SageMakerProxy_Local(url).authenticate()
52
190
 
53
- kumoai.init(url=url, api_key=api_key,
54
- snowflake_credentials=snowflake_credentials,
55
- snowflake_application=snowflake_application,
56
- log_level=log_level)
191
+ global_state._url = url
192
+ global_state._backend = backend
193
+ global_state._region = region
194
+ global_state._endpoint_name = endpoint_name
195
+ global_state._initialized = True
196
+ logger.info("Kumo RFM initialized with backend: %s, url: %s", backend,
197
+ url)
57
198
 
58
199
 
59
200
  __all__ = [
60
201
  'LocalTable',
61
202
  'LocalGraph',
62
203
  'KumoRFM',
204
+ 'ExplainConfig',
205
+ 'Explanation',
63
206
  'authenticate',
64
207
  'init',
65
208
  ]
@@ -264,14 +264,17 @@ def _authenticate_local(api_url: str, redirect_port: int = 8765) -> None:
264
264
  f"?callback_url={urllib.parse.quote(callback_url)}" +
265
265
  f"&token_name={urllib.parse.quote(token_name)}")
266
266
 
267
- print("Opening browser page to automatically generate an API key...")
267
+ print(
268
+ "Opening browser page to automatically generate an API key...\n" +
269
+ "If the page does not open, manually create a new API key at " +
270
+ f"{api_url}/api-keys and set it using os.environ[\"KUMO_API_KEY\"] " +
271
+ "= \"YOUR_API_KEY\"")
272
+
268
273
  webbrowser.open(login_url)
269
274
 
270
275
  def get_user_input() -> None:
271
- print("If the page does not open, manually create a new API key at " +
272
- f"{api_url}/api-keys and paste it below:")
273
-
274
- token_entered = getpass("API Key (type then press enter): ").strip()
276
+ token_entered = getpass(
277
+ "or paste the API key here and press enter: ").strip()
275
278
 
276
279
  while (len(token_entered) == 0):
277
280
  token_entered = getpass(
@@ -2,6 +2,7 @@ import re
2
2
  import warnings
3
3
 
4
4
  import pandas as pd
5
+ from dateutil.parser import UnknownTimezoneWarning
5
6
  from kumoapi.typing import Dtype, Stype
6
7
 
7
8
 
@@ -20,9 +21,7 @@ def contains_timestamp(ser: pd.Series, column_name: str, dtype: Dtype) -> bool:
20
21
  column_name,
21
22
  re.IGNORECASE,
22
23
  )
23
-
24
- if match is not None:
25
- return True
24
+ score = 0.3 if match is not None else 0.0
26
25
 
27
26
  ser = ser.iloc[:100]
28
27
  ser = ser.dropna()
@@ -34,5 +33,9 @@ def contains_timestamp(ser: pd.Series, column_name: str, dtype: Dtype) -> bool:
34
33
  ser = ser.astype(str) # Avoid parsing numbers as unix timestamps.
35
34
 
36
35
  with warnings.catch_warnings():
36
+ warnings.simplefilter('ignore', UnknownTimezoneWarning)
37
37
  warnings.filterwarnings('ignore', message='Could not infer format')
38
- return pd.to_datetime(ser, errors='coerce').notna().all()
38
+ mask = pd.to_datetime(ser, errors='coerce').notna()
39
+ score += int(mask.sum()) / len(mask)
40
+
41
+ return score >= 1.0
@@ -29,45 +29,46 @@ class LocalGraph:
29
29
 
30
30
  .. code-block:: python
31
31
 
32
- import pandas as pd
33
- import kumoai.experimental.rfm as rfm
34
-
35
- # Load data frames into memory:
36
- df1 = pd.DataFrame(...)
37
- df2 = pd.DataFrame(...)
38
- df3 = pd.DataFrame(...)
39
-
40
- # Define tables from data frames:
41
- table1 = rfm.LocalTable(name="table1", data=df1)
42
- table2 = rfm.LocalTable(name="table2", data=df2)
43
- table3 = rfm.LocalTable(name="table3", data=df3)
44
-
45
- # Create a graph from a dictionary of tables:
46
- graph = rfm.LocalGraph({
47
- "table1": table1,
48
- "table2": table2,
49
- "table3": table3,
50
- })
51
-
52
- # Infer table metadata:
53
- graph.infer_metadata()
54
-
55
- # Infer links/edges:
56
- graph.infer_links()
57
-
58
- # Inspect table metadata:
59
- for table in graph.tables.values():
60
- table.print_metadata()
61
-
62
- # Visualize graph (if graphviz is installed):
63
- graph.visualize()
64
-
65
- # Add/Remove edges between tables:
66
- graph.link(src_table="table1", fkey="id1", dst_table="table2")
67
- graph.unlink(src_table="table1", fkey="id1", dst_table="table2")
68
-
69
- # Validate graph:
70
- graph.validate()
32
+ >>> # doctest: +SKIP
33
+ >>> import pandas as pd
34
+ >>> import kumoai.experimental.rfm as rfm
35
+
36
+ >>> # Load data frames into memory:
37
+ >>> df1 = pd.DataFrame(...)
38
+ >>> df2 = pd.DataFrame(...)
39
+ >>> df3 = pd.DataFrame(...)
40
+
41
+ >>> # Define tables from data frames:
42
+ >>> table1 = rfm.LocalTable(name="table1", data=df1)
43
+ >>> table2 = rfm.LocalTable(name="table2", data=df2)
44
+ >>> table3 = rfm.LocalTable(name="table3", data=df3)
45
+
46
+ >>> # Create a graph from a dictionary of tables:
47
+ >>> graph = rfm.LocalGraph({
48
+ ... "table1": table1,
49
+ ... "table2": table2,
50
+ ... "table3": table3,
51
+ ... })
52
+
53
+ >>> # Infer table metadata:
54
+ >>> graph.infer_metadata()
55
+
56
+ >>> # Infer links/edges:
57
+ >>> graph.infer_links()
58
+
59
+ >>> # Inspect table metadata:
60
+ >>> for table in graph.tables.values():
61
+ ... table.print_metadata()
62
+
63
+ >>> # Visualize graph (if graphviz is installed):
64
+ >>> graph.visualize()
65
+
66
+ >>> # Add/Remove edges between tables:
67
+ >>> graph.link(src_table="table1", fkey="id1", dst_table="table2")
68
+ >>> graph.unlink(src_table="table1", fkey="id1", dst_table="table2")
69
+
70
+ >>> # Validate graph:
71
+ >>> graph.validate()
71
72
  """
72
73
 
73
74
  # Constructors ############################################################
@@ -104,27 +105,28 @@ class LocalGraph:
104
105
 
105
106
  .. code-block:: python
106
107
 
107
- import pandas as pd
108
- import kumoai.experimental.rfm as rfm
108
+ >>> # doctest: +SKIP
109
+ >>> import pandas as pd
110
+ >>> import kumoai.experimental.rfm as rfm
109
111
 
110
- # Load data frames into memory:
111
- df1 = pd.DataFrame(...)
112
- df2 = pd.DataFrame(...)
113
- df3 = pd.DataFrame(...)
112
+ >>> # Load data frames into memory:
113
+ >>> df1 = pd.DataFrame(...)
114
+ >>> df2 = pd.DataFrame(...)
115
+ >>> df3 = pd.DataFrame(...)
114
116
 
115
- # Create a graph from a dictionary of data frames:
116
- graph = rfm.LocalGraph.from_data({
117
- "table1": df1,
118
- "table2": df2,
119
- "table3": df3,
120
- })
117
+ >>> # Create a graph from a dictionary of data frames:
118
+ >>> graph = rfm.LocalGraph.from_data({
119
+ ... "table1": df1,
120
+ ... "table2": df2,
121
+ ... "table3": df3,
122
+ ... })
121
123
 
122
- # Inspect table metadata:
123
- for table in graph.tables.values():
124
- table.print_metadata()
124
+ >>> # Inspect table metadata:
125
+ >>> for table in graph.tables.values():
126
+ ... table.print_metadata()
125
127
 
126
- # Visualize graph (if graphviz is installed):
127
- graph.visualize()
128
+ >>> # Visualize graph (if graphviz is installed):
129
+ >>> graph.visualize()
128
130
 
129
131
  Args:
130
132
  df_dict: A dictionary of data frames, where the keys are the names
@@ -141,6 +143,7 @@ class LocalGraph:
141
143
  graph.
142
144
 
143
145
  Example:
146
+ >>> # doctest: +SKIP
144
147
  >>> import kumoai.experimental.rfm as rfm
145
148
  >>> df1 = pd.DataFrame(...)
146
149
  >>> df2 = pd.DataFrame(...)
@@ -150,7 +153,7 @@ class LocalGraph:
150
153
  ... "table2": df2,
151
154
  ... "table3": df3,
152
155
  ... })
153
- ... graph.validate()
156
+ >>> graph.validate()
154
157
  """
155
158
  tables = [LocalTable(df, name) for name, df in df_dict.items()]
156
159
 
@@ -197,12 +200,6 @@ class LocalGraph:
197
200
  KeyError: If a table with the same name already exists in the
198
201
  graph.
199
202
  """
200
- if len(self.tables) >= 15:
201
- raise ValueError("Cannot create a graph with more than 15 "
202
- "tables. Please create a feature request at "
203
- "'https://github.com/kumo-ai/kumo-rfm' if you "
204
- "must go beyond this for your use-case.")
205
-
206
203
  if table.name in self._tables:
207
204
  raise KeyError(f"Cannot add table with name '{table.name}' to "
208
205
  f"this graph; table names must be globally unique.")
@@ -237,16 +234,17 @@ class LocalGraph:
237
234
  r"""Returns a :class:`pandas.DataFrame` object containing metadata
238
235
  information about the tables in this graph.
239
236
 
240
- The returned dataframe has columns ``name``, ``primary_key``, and
241
- ``time_column``, which provide an aggregate view of the properties of
242
- the tables of this graph.
237
+ The returned dataframe has columns ``name``, ``primary_key``,
238
+ ``time_column``, and ``end_time_column``, which provide an aggregate
239
+ view of the properties of the tables of this graph.
243
240
 
244
241
  Example:
242
+ >>> # doctest: +SKIP
245
243
  >>> import kumoai.experimental.rfm as rfm
246
244
  >>> graph = rfm.LocalGraph(tables=...).infer_metadata()
247
- >>> graph.metadata
248
- name primary_key time_column
249
- 0 users user_id -
245
+ >>> graph.metadata # doctest: +SKIP
246
+ name primary_key time_column end_time_column
247
+ 0 users user_id - -
250
248
  """
251
249
  tables = list(self.tables.values())
252
250
 
@@ -257,6 +255,11 @@ class LocalGraph:
257
255
  pd.Series(dtype=str, data=[t._primary_key or '-' for t in tables]),
258
256
  'time_column':
259
257
  pd.Series(dtype=str, data=[t._time_column or '-' for t in tables]),
258
+ 'end_time_column':
259
+ pd.Series(
260
+ dtype=str,
261
+ data=[t._end_time_column or '-' for t in tables],
262
+ ),
260
263
  })
261
264
 
262
265
  def print_metadata(self) -> None:
@@ -608,8 +611,8 @@ class LocalGraph:
608
611
  raise ValueError(f"{edge} is invalid as foreign key "
609
612
  f"'{fkey}' and primary key '{dst_key.name}' "
610
613
  f"have incompatible data types (got "
611
- f"fkey.dtype '{dst_key.dtype}' and "
612
- f"pkey.dtype '{src_key.dtype}')")
614
+ f"fkey.dtype '{src_key.dtype}' and "
615
+ f"pkey.dtype '{dst_key.dtype}')")
613
616
 
614
617
  return self
615
618
 
@@ -682,6 +685,11 @@ class LocalGraph:
682
685
  ]
683
686
  if time_column := table.time_column:
684
687
  keys += [f'{time_column.name}: Time ({time_column.dtype})']
688
+ if end_time_column := table.end_time_column:
689
+ keys += [
690
+ f'{end_time_column.name}: '
691
+ f'End Time ({end_time_column.dtype})'
692
+ ]
685
693
  key_repr = left_align(keys)
686
694
 
687
695
  columns = []
@@ -689,9 +697,9 @@ class LocalGraph:
689
697
  columns += [
690
698
  f'{column.name}: {column.stype} ({column.dtype})'
691
699
  for column in table.columns
692
- if column.name not in fkeys_dict[table_name]
693
- and column.name != table._primary_key
694
- and column.name != table._time_column
700
+ if column.name not in fkeys_dict[table_name] and
701
+ column.name != table._primary_key and column.name != table.
702
+ _time_column and column.name != table._end_time_column
695
703
  ]
696
704
  column_repr = left_align(columns)
697
705
 
@@ -758,16 +766,18 @@ class LocalGraph:
758
766
  def _to_api_graph_definition(self) -> GraphDefinition:
759
767
  tables: Dict[str, TableDefinition] = {}
760
768
  col_groups: List[ColumnKeyGroup] = []
761
- for t_name, table in self.tables.items():
762
- tables[t_name] = table._to_api_table_definition()
769
+ for table_name, table in self.tables.items():
770
+ tables[table_name] = table._to_api_table_definition()
763
771
  if table.primary_key is None:
764
772
  continue
765
- keys = [ColumnKey(t_name, table.primary_key.name)]
773
+ keys = [ColumnKey(table_name, table.primary_key.name)]
766
774
  for edge in self.edges:
767
- if edge.dst_table == t_name:
775
+ if edge.dst_table == table_name:
768
776
  keys.append(ColumnKey(edge.src_table, edge.fkey))
769
- keys = sorted(list(set(keys)),
770
- key=lambda x: f'{x.table_name}.{x.col_name}')
777
+ keys = sorted(
778
+ list(set(keys)),
779
+ key=lambda x: f'{x.table_name}.{x.col_name}',
780
+ )
771
781
  if len(keys) > 1:
772
782
  col_groups.append(ColumnKeyGroup(keys))
773
783
  return GraphDefinition(tables, col_groups)
@@ -2,7 +2,6 @@ from typing import Dict, List, Optional, Tuple
2
2
 
3
3
  import numpy as np
4
4
  import pandas as pd
5
- from kumoapi.model_plan import RunMode
6
5
  from kumoapi.rfm.context import EdgeLayout, Link, Subgraph, Table
7
6
  from kumoapi.typing import Stype
8
7
 
@@ -33,7 +32,6 @@ class LocalGraphSampler:
33
32
  entity_table_names: Tuple[str, ...],
34
33
  node: np.ndarray,
35
34
  time: np.ndarray,
36
- run_mode: RunMode,
37
35
  num_neighbors: List[int],
38
36
  exclude_cols_dict: Dict[str, List[str]],
39
37
  ) -> Subgraph:
@@ -92,15 +90,23 @@ class LocalGraphSampler:
92
90
  )
93
91
  continue
94
92
 
95
- # Only store unique rows in `df` above a certain threshold:
96
- unique_node, inverse_node = np.unique(node, return_inverse=True)
97
- if len(node) > 1.05 * len(unique_node):
98
- df = df.iloc[unique_node]
99
- row = inverse_node
93
+ row: Optional[np.ndarray] = None
94
+ if table_name in self._graph_store.end_time_column_dict:
95
+ # Set end time to NaT for all values greater than anchor time:
96
+ df = df.iloc[node].reset_index(drop=True)
97
+ col_name = self._graph_store.end_time_column_dict[table_name]
98
+ ser = df[col_name]
99
+ value = ser.astype('datetime64[ns]').astype(int).to_numpy()
100
+ mask = value > time[batch]
101
+ df.loc[mask, col_name] = pd.NaT
100
102
  else:
101
- df = df.iloc[node]
102
- row = None
103
- df = df.reset_index(drop=True)
103
+ # Only store unique rows in `df` above a certain threshold:
104
+ unique_node, inverse = np.unique(node, return_inverse=True)
105
+ if len(node) > 1.05 * len(unique_node):
106
+ df = df.iloc[unique_node].reset_index(drop=True)
107
+ row = inverse
108
+ else:
109
+ df = df.iloc[node].reset_index(drop=True)
104
110
 
105
111
  # Filter data frame to minimal set of columns:
106
112
  df = df[columns]
@@ -45,6 +45,7 @@ class LocalGraphStore:
45
45
 
46
46
  (
47
47
  self.time_column_dict,
48
+ self.end_time_column_dict,
48
49
  self.time_dict,
49
50
  self.min_time,
50
51
  self.max_time,
@@ -201,11 +202,15 @@ class LocalGraphStore:
201
202
  pkey_map = pkey_map[self.mask_dict[table.name]]
202
203
 
203
204
  if len(pkey_map) == 0:
204
- raise ValueError(
205
- f"Found no valid rows in table '{table.name}' since there "
206
- f"exists not a single row with a non-N/A primary key."
207
- f"Consider fixing your underlying data or removing this "
208
- f"table from the graph.")
205
+ error_msg = f"Found no valid rows in table '{table.name}'. "
206
+ if table.has_time_column():
207
+ error_msg += ("Please make sure that there exists valid "
208
+ "non-N/A primary key and time column pairs "
209
+ "in this table.")
210
+ else:
211
+ error_msg += ("Please make sure that there exists valid "
212
+ "non-N/A primary keys in this table.")
213
+ raise ValueError(error_msg)
209
214
 
210
215
  pkey_map_dict[table.name] = pkey_map
211
216
 
@@ -215,16 +220,21 @@ class LocalGraphStore:
215
220
  self,
216
221
  graph: LocalGraph,
217
222
  ) -> Tuple[
223
+ Dict[str, str],
218
224
  Dict[str, str],
219
225
  Dict[str, np.ndarray],
220
226
  pd.Timestamp,
221
227
  pd.Timestamp,
222
228
  ]:
223
229
  time_column_dict: Dict[str, str] = {}
230
+ end_time_column_dict: Dict[str, str] = {}
224
231
  time_dict: Dict[str, np.ndarray] = {}
225
232
  min_time = pd.Timestamp.max
226
233
  max_time = pd.Timestamp.min
227
234
  for table in graph.tables.values():
235
+ if table._end_time_column is not None:
236
+ end_time_column_dict[table.name] = table._end_time_column
237
+
228
238
  if table._time_column is None:
229
239
  continue
230
240
 
@@ -239,7 +249,13 @@ class LocalGraphStore:
239
249
  min_time = min(min_time, time.min())
240
250
  max_time = max(max_time, time.max())
241
251
 
242
- return time_column_dict, time_dict, min_time, max_time
252
+ return (
253
+ time_column_dict,
254
+ end_time_column_dict,
255
+ time_dict,
256
+ min_time,
257
+ max_time,
258
+ )
243
259
 
244
260
  def get_csc(
245
261
  self,