kumoai 2.10.0.dev202510021830__py3-none-any.whl → 2.12.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -23,11 +23,13 @@ class Column:
23
23
  stype: Stype,
24
24
  is_primary_key: bool = False,
25
25
  is_time_column: bool = False,
26
+ is_end_time_column: bool = False,
26
27
  ) -> None:
27
28
  self._name = name
28
29
  self._dtype = Dtype(dtype)
29
30
  self._is_primary_key = is_primary_key
30
31
  self._is_time_column = is_time_column
32
+ self._is_end_time_column = is_end_time_column
31
33
  self.stype = Stype(stype)
32
34
 
33
35
  @property
@@ -50,9 +52,12 @@ class Column:
50
52
  if self._is_primary_key and val != Stype.ID:
51
53
  raise ValueError(f"Primary key '{self.name}' must have 'ID' "
52
54
  f"semantic type (got '{val}')")
53
- if self.name == self._is_time_column and val != Stype.timestamp:
55
+ if self._is_time_column and val != Stype.timestamp:
54
56
  raise ValueError(f"Time column '{self.name}' must have "
55
57
  f"'timestamp' semantic type (got '{val}')")
58
+ if self._is_end_time_column and val != Stype.timestamp:
59
+ raise ValueError(f"End time column '{self.name}' must have "
60
+ f"'timestamp' semantic type (got '{val}')")
56
61
 
57
62
  super().__setattr__(key, val)
58
63
 
@@ -93,6 +98,7 @@ class LocalTable:
93
98
  name="my_table",
94
99
  primary_key="id",
95
100
  time_column="time",
101
+ end_time_column=None,
96
102
  )
97
103
 
98
104
  # Verify metadata:
@@ -106,6 +112,8 @@ class LocalTable:
106
112
  name: The name of the table.
107
113
  primary_key: The name of the primary key of this table, if it exists.
108
114
  time_column: The name of the time column of this table, if it exists.
115
+ end_time_column: The name of the end time column of this table, if it
116
+ exists.
109
117
  """
110
118
  def __init__(
111
119
  self,
@@ -113,6 +121,7 @@ class LocalTable:
113
121
  name: str,
114
122
  primary_key: Optional[str] = None,
115
123
  time_column: Optional[str] = None,
124
+ end_time_column: Optional[str] = None,
116
125
  ) -> None:
117
126
 
118
127
  if df.empty:
@@ -130,6 +139,7 @@ class LocalTable:
130
139
  self._name = name
131
140
  self._primary_key: Optional[str] = None
132
141
  self._time_column: Optional[str] = None
142
+ self._end_time_column: Optional[str] = None
133
143
 
134
144
  self._columns: Dict[str, Column] = {}
135
145
  for column_name in df.columns:
@@ -141,6 +151,9 @@ class LocalTable:
141
151
  if time_column is not None:
142
152
  self.time_column = time_column
143
153
 
154
+ if end_time_column is not None:
155
+ self.end_time_column = end_time_column
156
+
144
157
  @property
145
158
  def name(self) -> str:
146
159
  r"""The name of the table."""
@@ -230,6 +243,8 @@ class LocalTable:
230
243
  self.primary_key = None
231
244
  if self._time_column == name:
232
245
  self.time_column = None
246
+ if self._end_time_column == name:
247
+ self.end_time_column = None
233
248
  del self._columns[name]
234
249
 
235
250
  return self
@@ -253,9 +268,8 @@ class LocalTable:
253
268
  :class:`ValueError` if the primary key has a non-ID semantic type or
254
269
  if the column name does not match a column in the data frame.
255
270
  """
256
- if not self.has_primary_key():
271
+ if self._primary_key is None:
257
272
  return None
258
- assert self._primary_key is not None
259
273
  return self[self._primary_key]
260
274
 
261
275
  @primary_key.setter
@@ -264,6 +278,10 @@ class LocalTable:
264
278
  raise ValueError(f"Cannot specify column '{name}' as a primary "
265
279
  f"key since it is already defined to be a time "
266
280
  f"column")
281
+ if name is not None and name == self._end_time_column:
282
+ raise ValueError(f"Cannot specify column '{name}' as a primary "
283
+ f"key since it is already defined to be an end "
284
+ f"time column")
267
285
 
268
286
  if self.primary_key is not None:
269
287
  self.primary_key._is_primary_key = False
@@ -295,9 +313,8 @@ class LocalTable:
295
313
  :class:`ValueError` if the time column has a non-timestamp semantic
296
314
  type or if the column name does not match a column in the data frame.
297
315
  """
298
- if not self.has_time_column():
316
+ if self._time_column is None:
299
317
  return None
300
- assert self._time_column is not None
301
318
  return self[self._time_column]
302
319
 
303
320
  @time_column.setter
@@ -306,6 +323,10 @@ class LocalTable:
306
323
  raise ValueError(f"Cannot specify column '{name}' as a time "
307
324
  f"column since it is already defined to be a "
308
325
  f"primary key")
326
+ if name is not None and name == self._end_time_column:
327
+ raise ValueError(f"Cannot specify column '{name}' as a time "
328
+ f"column since it is already defined to be an "
329
+ f"end time column")
309
330
 
310
331
  if self.time_column is not None:
311
332
  self.time_column._is_time_column = False
@@ -318,6 +339,52 @@ class LocalTable:
318
339
  self[name]._is_time_column = True
319
340
  self._time_column = name
320
341
 
342
+ # End Time column #########################################################
343
+
344
+ def has_end_time_column(self) -> bool:
345
+ r"""Returns ``True`` if this table has an end time column; ``False``
346
+ otherwise.
347
+ """
348
+ return self._end_time_column is not None
349
+
350
+ @property
351
+ def end_time_column(self) -> Optional[Column]:
352
+ r"""The end time column of this table.
353
+
354
+ The getter returns the end time column of this table, or ``None`` if no
355
+ such end time column is present.
356
+
357
+ The setter sets a column as an end time column on this table, and
358
+ raises a :class:`ValueError` if the end time column has a non-timestamp
359
+ semantic type or if the column name does not match a column in the data
360
+ frame.
361
+ """
362
+ if self._end_time_column is None:
363
+ return None
364
+ return self[self._end_time_column]
365
+
366
+ @end_time_column.setter
367
+ def end_time_column(self, name: Optional[str]) -> None:
368
+ if name is not None and name == self._primary_key:
369
+ raise ValueError(f"Cannot specify column '{name}' as an end time "
370
+ f"column since it is already defined to be a "
371
+ f"primary key")
372
+ if name is not None and name == self._time_column:
373
+ raise ValueError(f"Cannot specify column '{name}' as an end time "
374
+ f"column since it is already defined to be a "
375
+ f"time column")
376
+
377
+ if self.end_time_column is not None:
378
+ self.end_time_column._is_end_time_column = False
379
+
380
+ if name is None:
381
+ self._end_time_column = None
382
+ return
383
+
384
+ self[name].stype = Stype.timestamp
385
+ self[name]._is_end_time_column = True
386
+ self._end_time_column = name
387
+
321
388
  # Metadata ################################################################
322
389
 
323
390
  @property
@@ -326,16 +393,18 @@ class LocalTable:
326
393
  information about the columns in this table.
327
394
 
328
395
  The returned dataframe has columns ``name``, ``dtype``, ``stype``,
329
- ``is_primary_key``, and ``is_time_column``, which provide an aggregate
330
- view of the properties of the columns of this table.
396
+ ``is_primary_key``, ``is_time_column`` and ``is_end_time_column``,
397
+ which provide an aggregate view of the properties of the columns of
398
+ this table.
331
399
 
332
400
  Example:
401
+ >>> # doctest: +SKIP
333
402
  >>> import kumoai.experimental.rfm as rfm
334
403
  >>> table = rfm.LocalTable(df=..., name=...).infer_metadata()
335
404
  >>> table.metadata
336
- name dtype stype is_primary_key is_time_column
337
- 0 CustomerID float64 ID True False
338
- """
405
+ name dtype stype is_primary_key is_time_column is_end_time_column
406
+ 0 CustomerID float64 ID True False False
407
+ """ # noqa: E501
339
408
  cols = self.columns
340
409
 
341
410
  return pd.DataFrame({
@@ -355,6 +424,11 @@ class LocalTable:
355
424
  dtype=bool,
356
425
  data=[self._time_column == c.name for c in cols],
357
426
  ),
427
+ 'is_end_time_column':
428
+ pd.Series(
429
+ dtype=bool,
430
+ data=[self._end_time_column == c.name for c in cols],
431
+ ),
358
432
  })
359
433
 
360
434
  def print_metadata(self) -> None:
@@ -417,6 +491,7 @@ class LocalTable:
417
491
  candidates = [
418
492
  column.name for column in self.columns
419
493
  if column.stype == Stype.timestamp
494
+ and column.name != self._end_time_column
420
495
  ]
421
496
  if time_column := utils.detect_time_column(self._data, candidates):
422
497
  self.time_column = time_column
@@ -430,24 +505,26 @@ class LocalTable:
430
505
  # Helpers #################################################################
431
506
 
432
507
  def _to_api_table_definition(self) -> TableDefinition:
433
- cols: List[ColumnDefinition] = []
434
- for col in self.columns:
435
- cols.append(ColumnDefinition(col.name, col.stype, col.dtype))
436
- pkey = self._primary_key
437
- time_col = self._time_column
438
- source_table = UnavailableSourceTable(table=self.name)
439
-
440
508
  return TableDefinition(
441
- cols=cols,
442
- source_table=source_table,
443
- pkey=pkey,
444
- time_col=time_col,
509
+ cols=[
510
+ ColumnDefinition(col.name, col.stype, col.dtype)
511
+ for col in self.columns
512
+ ],
513
+ source_table=UnavailableSourceTable(table=self.name),
514
+ pkey=self._primary_key,
515
+ time_col=self._time_column,
516
+ end_time_col=self._end_time_column,
445
517
  )
446
518
 
447
519
  # Python builtins #########################################################
448
520
 
449
521
  def __hash__(self) -> int:
450
- return hash(tuple(self.columns + [self.primary_key, self.time_column]))
522
+ special_columns = [
523
+ self.primary_key,
524
+ self.time_column,
525
+ self.end_time_column,
526
+ ]
527
+ return hash(tuple(self.columns + special_columns))
451
528
 
452
529
  def __contains__(self, name: str) -> bool:
453
530
  return self.has_column(name)
@@ -464,4 +541,5 @@ class LocalTable:
464
541
  f' num_columns={len(self.columns)},\n'
465
542
  f' primary_key={self._primary_key},\n'
466
543
  f' time_column={self._time_column},\n'
544
+ f' end_time_column={self._end_time_column},\n'
467
545
  f')')
@@ -1,7 +1,7 @@
1
- from .backend import PQueryBackend
2
- from .pandas_backend import PQueryPandasBackend
1
+ from .executor import PQueryExecutor
2
+ from .pandas_executor import PQueryPandasExecutor
3
3
 
4
4
  __all__ = [
5
- 'PQueryBackend',
6
- 'PQueryPandasBackend',
5
+ 'PQueryExecutor',
6
+ 'PQueryPandasExecutor',
7
7
  ]
@@ -1,23 +1,14 @@
1
1
  from abc import ABC, abstractmethod
2
- from typing import Dict, Generic, Optional, Tuple, TypeVar, Union
2
+ from typing import Dict, Generic, Tuple, TypeVar
3
3
 
4
- from kumoapi.rfm import PQueryDefinition
5
- from kumoapi.rfm.pquery import (
4
+ from kumoapi.pquery import ValidatedPredictiveQuery
5
+ from kumoapi.pquery.AST import (
6
6
  Aggregation,
7
- AggregationType,
8
- BoolOp,
9
7
  Column,
10
8
  Condition,
11
9
  Filter,
12
- Float,
13
- FloatList,
14
- Int,
15
- IntList,
10
+ Join,
16
11
  LogicalOperation,
17
- MemberOp,
18
- RelOp,
19
- Str,
20
- StrList,
21
12
  )
22
13
 
23
14
  TableData = TypeVar('TableData')
@@ -25,58 +16,33 @@ ColumnData = TypeVar('ColumnData')
25
16
  IndexData = TypeVar('IndexData')
26
17
 
27
18
 
28
- class PQueryBackend(Generic[TableData, ColumnData, IndexData], ABC):
19
+ class PQueryExecutor(Generic[TableData, ColumnData, IndexData], ABC):
29
20
  @abstractmethod
30
- def eval_aggregation_type(
21
+ def execute_column(
31
22
  self,
32
- op: AggregationType,
33
- feat: Optional[ColumnData],
34
- batch: IndexData,
35
- batch_size: int,
23
+ column: Column,
24
+ feat_dict: Dict[str, TableData],
36
25
  filter_na: bool = True,
37
26
  ) -> Tuple[ColumnData, IndexData]:
38
27
  pass
39
28
 
40
29
  @abstractmethod
41
- def eval_rel_op(
42
- self,
43
- left: ColumnData,
44
- op: RelOp,
45
- right: Union[Int, Float, Str, None],
46
- ) -> ColumnData:
47
- pass
48
-
49
- @abstractmethod
50
- def eval_member_op(
51
- self,
52
- left: ColumnData,
53
- op: MemberOp,
54
- right: Union[IntList, FloatList, StrList],
55
- ) -> ColumnData:
56
- pass
57
-
58
- @abstractmethod
59
- def eval_bool_op(
60
- self,
61
- left: ColumnData,
62
- op: BoolOp,
63
- right: Optional[ColumnData],
64
- ) -> ColumnData:
65
- pass
66
-
67
- @abstractmethod
68
- def eval_column(
30
+ def execute_aggregation(
69
31
  self,
70
- column: Column,
32
+ aggr: Aggregation,
71
33
  feat_dict: Dict[str, TableData],
34
+ time_dict: Dict[str, ColumnData],
35
+ batch_dict: Dict[str, IndexData],
36
+ anchor_time: ColumnData,
72
37
  filter_na: bool = True,
38
+ num_forecasts: int = 1,
73
39
  ) -> Tuple[ColumnData, IndexData]:
74
40
  pass
75
41
 
76
42
  @abstractmethod
77
- def eval_aggregation(
43
+ def execute_condition(
78
44
  self,
79
- aggr: Aggregation,
45
+ condition: Condition,
80
46
  feat_dict: Dict[str, TableData],
81
47
  time_dict: Dict[str, ColumnData],
82
48
  batch_dict: Dict[str, IndexData],
@@ -87,9 +53,9 @@ class PQueryBackend(Generic[TableData, ColumnData, IndexData], ABC):
87
53
  pass
88
54
 
89
55
  @abstractmethod
90
- def eval_condition(
56
+ def execute_logical_operation(
91
57
  self,
92
- condition: Condition,
58
+ logical_operation: LogicalOperation,
93
59
  feat_dict: Dict[str, TableData],
94
60
  time_dict: Dict[str, ColumnData],
95
61
  batch_dict: Dict[str, IndexData],
@@ -100,9 +66,9 @@ class PQueryBackend(Generic[TableData, ColumnData, IndexData], ABC):
100
66
  pass
101
67
 
102
68
  @abstractmethod
103
- def eval_logical_operation(
69
+ def execute_join(
104
70
  self,
105
- logical_operation: LogicalOperation,
71
+ join: Join,
106
72
  feat_dict: Dict[str, TableData],
107
73
  time_dict: Dict[str, ColumnData],
108
74
  batch_dict: Dict[str, IndexData],
@@ -113,20 +79,20 @@ class PQueryBackend(Generic[TableData, ColumnData, IndexData], ABC):
113
79
  pass
114
80
 
115
81
  @abstractmethod
116
- def eval_filter(
82
+ def execute_filter(
117
83
  self,
118
84
  filter: Filter,
119
85
  feat_dict: Dict[str, TableData],
120
86
  time_dict: Dict[str, ColumnData],
121
87
  batch_dict: Dict[str, IndexData],
122
88
  anchor_time: ColumnData,
123
- ) -> IndexData:
89
+ ) -> Tuple[ColumnData, IndexData]:
124
90
  pass
125
91
 
126
92
  @abstractmethod
127
- def eval_pquery(
93
+ def execute(
128
94
  self,
129
- query: PQueryDefinition,
95
+ query: ValidatedPredictiveQuery,
130
96
  feat_dict: Dict[str, TableData],
131
97
  time_dict: Dict[str, ColumnData],
132
98
  batch_dict: Dict[str, IndexData],