kumoai 2.13.0.dev202512040649__cp313-cp313-win_amd64.whl → 2.14.0.dev202601081732__cp313-cp313-win_amd64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- kumoai/__init__.py +35 -26
- kumoai/_version.py +1 -1
- kumoai/client/client.py +6 -0
- kumoai/client/jobs.py +26 -0
- kumoai/client/pquery.py +6 -2
- kumoai/connector/utils.py +21 -7
- kumoai/experimental/rfm/__init__.py +51 -24
- kumoai/experimental/rfm/authenticate.py +3 -4
- kumoai/experimental/rfm/backend/local/__init__.py +4 -0
- kumoai/experimental/rfm/{local_graph_store.py → backend/local/graph_store.py} +62 -110
- kumoai/experimental/rfm/backend/local/sampler.py +312 -0
- kumoai/experimental/rfm/backend/local/table.py +35 -31
- kumoai/experimental/rfm/backend/snow/__init__.py +2 -0
- kumoai/experimental/rfm/backend/snow/sampler.py +366 -0
- kumoai/experimental/rfm/backend/snow/table.py +177 -50
- kumoai/experimental/rfm/backend/sqlite/__init__.py +4 -2
- kumoai/experimental/rfm/backend/sqlite/sampler.py +454 -0
- kumoai/experimental/rfm/backend/sqlite/table.py +131 -48
- kumoai/experimental/rfm/base/__init__.py +23 -3
- kumoai/experimental/rfm/base/column.py +96 -10
- kumoai/experimental/rfm/base/expression.py +44 -0
- kumoai/experimental/rfm/base/sampler.py +782 -0
- kumoai/experimental/rfm/base/source.py +2 -1
- kumoai/experimental/rfm/base/sql_sampler.py +247 -0
- kumoai/experimental/rfm/base/table.py +404 -203
- kumoai/experimental/rfm/graph.py +374 -172
- kumoai/experimental/rfm/infer/__init__.py +6 -4
- kumoai/experimental/rfm/infer/dtype.py +7 -4
- kumoai/experimental/rfm/infer/multicategorical.py +1 -1
- kumoai/experimental/rfm/infer/pkey.py +4 -2
- kumoai/experimental/rfm/infer/stype.py +35 -0
- kumoai/experimental/rfm/infer/time_col.py +1 -2
- kumoai/experimental/rfm/pquery/executor.py +27 -27
- kumoai/experimental/rfm/pquery/pandas_executor.py +30 -32
- kumoai/experimental/rfm/relbench.py +76 -0
- kumoai/experimental/rfm/rfm.py +762 -467
- kumoai/experimental/rfm/sagemaker.py +4 -4
- kumoai/experimental/rfm/task_table.py +292 -0
- kumoai/kumolib.cp313-win_amd64.pyd +0 -0
- kumoai/pquery/predictive_query.py +10 -6
- kumoai/pquery/training_table.py +16 -2
- kumoai/testing/snow.py +50 -0
- kumoai/trainer/distilled_trainer.py +175 -0
- kumoai/utils/__init__.py +3 -2
- kumoai/utils/display.py +87 -0
- kumoai/utils/progress_logger.py +190 -12
- kumoai/utils/sql.py +3 -0
- {kumoai-2.13.0.dev202512040649.dist-info → kumoai-2.14.0.dev202601081732.dist-info}/METADATA +3 -2
- {kumoai-2.13.0.dev202512040649.dist-info → kumoai-2.14.0.dev202601081732.dist-info}/RECORD +52 -41
- kumoai/experimental/rfm/local_graph_sampler.py +0 -223
- kumoai/experimental/rfm/local_pquery_driver.py +0 -689
- {kumoai-2.13.0.dev202512040649.dist-info → kumoai-2.14.0.dev202601081732.dist-info}/WHEEL +0 -0
- {kumoai-2.13.0.dev202512040649.dist-info → kumoai-2.14.0.dev202601081732.dist-info}/licenses/LICENSE +0 -0
- {kumoai-2.13.0.dev202512040649.dist-info → kumoai-2.14.0.dev202601081732.dist-info}/top_level.txt +0 -0
|
@@ -1,13 +1,22 @@
|
|
|
1
1
|
import re
|
|
2
|
-
import
|
|
3
|
-
from
|
|
2
|
+
from collections import Counter
|
|
3
|
+
from collections.abc import Sequence
|
|
4
|
+
from typing import cast
|
|
4
5
|
|
|
5
6
|
import pandas as pd
|
|
7
|
+
from kumoapi.model_plan import MissingType
|
|
6
8
|
from kumoapi.typing import Dtype
|
|
7
9
|
|
|
8
10
|
from kumoai.experimental.rfm.backend.sqlite import Connection
|
|
9
|
-
from kumoai.experimental.rfm.base import
|
|
10
|
-
|
|
11
|
+
from kumoai.experimental.rfm.base import (
|
|
12
|
+
ColumnSpec,
|
|
13
|
+
ColumnSpecType,
|
|
14
|
+
DataBackend,
|
|
15
|
+
SourceColumn,
|
|
16
|
+
SourceForeignKey,
|
|
17
|
+
Table,
|
|
18
|
+
)
|
|
19
|
+
from kumoai.utils import quote_ident
|
|
11
20
|
|
|
12
21
|
|
|
13
22
|
class SQLiteTable(Table):
|
|
@@ -16,6 +25,8 @@ class SQLiteTable(Table):
|
|
|
16
25
|
Args:
|
|
17
26
|
connection: The connection to a :class:`sqlite` database.
|
|
18
27
|
name: The name of this table.
|
|
28
|
+
source_name: The source name of this table. If set to ``None``,
|
|
29
|
+
``name`` is being used.
|
|
19
30
|
columns: The selected columns of this table.
|
|
20
31
|
primary_key: The name of the primary key of this table, if it exists.
|
|
21
32
|
time_column: The name of the time column of this table, if it exists.
|
|
@@ -26,76 +37,148 @@ class SQLiteTable(Table):
|
|
|
26
37
|
self,
|
|
27
38
|
connection: Connection,
|
|
28
39
|
name: str,
|
|
29
|
-
|
|
30
|
-
|
|
31
|
-
|
|
32
|
-
|
|
40
|
+
source_name: str | None = None,
|
|
41
|
+
columns: Sequence[ColumnSpecType] | None = None,
|
|
42
|
+
primary_key: MissingType | str | None = MissingType.VALUE,
|
|
43
|
+
time_column: str | None = None,
|
|
44
|
+
end_time_column: str | None = None,
|
|
33
45
|
) -> None:
|
|
34
46
|
|
|
35
47
|
self._connection = connection
|
|
36
48
|
|
|
37
49
|
super().__init__(
|
|
38
50
|
name=name,
|
|
51
|
+
source_name=source_name,
|
|
39
52
|
columns=columns,
|
|
40
53
|
primary_key=primary_key,
|
|
41
54
|
time_column=time_column,
|
|
42
55
|
end_time_column=end_time_column,
|
|
43
56
|
)
|
|
44
57
|
|
|
45
|
-
|
|
46
|
-
|
|
58
|
+
@property
|
|
59
|
+
def backend(self) -> DataBackend:
|
|
60
|
+
return cast(DataBackend, DataBackend.SQLITE)
|
|
61
|
+
|
|
62
|
+
def _get_source_columns(self) -> list[SourceColumn]:
|
|
63
|
+
source_columns: list[SourceColumn] = []
|
|
47
64
|
with self._connection.cursor() as cursor:
|
|
48
|
-
|
|
49
|
-
|
|
65
|
+
sql = f"PRAGMA table_info({self._quoted_source_name})"
|
|
66
|
+
cursor.execute(sql)
|
|
67
|
+
columns = cursor.fetchall()
|
|
68
|
+
|
|
69
|
+
if len(columns) == 0:
|
|
70
|
+
raise ValueError(f"Table '{self.source_name}' does not exist "
|
|
71
|
+
f"in the SQLite database")
|
|
72
|
+
|
|
73
|
+
unique_keys: set[str] = set()
|
|
74
|
+
sql = f"PRAGMA index_list({self._quoted_source_name})"
|
|
75
|
+
cursor.execute(sql)
|
|
76
|
+
for _, index_name, is_unique, *_ in cursor.fetchall():
|
|
77
|
+
if bool(is_unique):
|
|
78
|
+
sql = f"PRAGMA index_info({quote_ident(index_name)})"
|
|
79
|
+
cursor.execute(sql)
|
|
80
|
+
index = cursor.fetchall()
|
|
81
|
+
if len(index) == 1:
|
|
82
|
+
unique_keys.add(index[0][2])
|
|
50
83
|
|
|
51
|
-
|
|
52
|
-
|
|
53
|
-
|
|
54
|
-
|
|
55
|
-
|
|
56
|
-
|
|
57
|
-
|
|
58
|
-
|
|
59
|
-
elif re.search('TEXT|CHAR|CLOB', type):
|
|
60
|
-
dtype = Dtype.string
|
|
61
|
-
elif re.search('REAL|FLOA|DOUB', type):
|
|
62
|
-
dtype = Dtype.float
|
|
63
|
-
else: # NUMERIC affinity.
|
|
64
|
-
ser = self._sample_df[column]
|
|
65
|
-
try:
|
|
66
|
-
dtype = infer_dtype(ser)
|
|
67
|
-
except Exception:
|
|
68
|
-
warnings.warn(
|
|
69
|
-
f"Data type inference for column '{column}' in "
|
|
70
|
-
f"table '{self.name}' failed. Consider changing "
|
|
71
|
-
f"the data type of the column to use it within "
|
|
72
|
-
f"this table.")
|
|
73
|
-
continue
|
|
84
|
+
# Special SQLite case that creates a rowid alias for
|
|
85
|
+
# `INTEGER PRIMARY KEY` annotated columns:
|
|
86
|
+
rowid_candidates = [
|
|
87
|
+
column for _, column, dtype, _, _, is_pkey in columns
|
|
88
|
+
if bool(is_pkey) and dtype.strip().upper() == 'INTEGER'
|
|
89
|
+
]
|
|
90
|
+
if len(rowid_candidates) == 1:
|
|
91
|
+
unique_keys.add(rowid_candidates[0])
|
|
74
92
|
|
|
93
|
+
for _, column, dtype, notnull, _, is_pkey in columns:
|
|
75
94
|
source_column = SourceColumn(
|
|
76
95
|
name=column,
|
|
77
|
-
dtype=dtype,
|
|
96
|
+
dtype=self._to_dtype(dtype),
|
|
78
97
|
is_primary_key=bool(is_pkey),
|
|
79
|
-
is_unique_key=
|
|
98
|
+
is_unique_key=column in unique_keys,
|
|
99
|
+
is_nullable=not bool(is_pkey) and not bool(notnull),
|
|
80
100
|
)
|
|
81
101
|
source_columns.append(source_column)
|
|
82
102
|
|
|
83
103
|
return source_columns
|
|
84
104
|
|
|
85
|
-
def _get_source_foreign_keys(self) ->
|
|
86
|
-
|
|
105
|
+
def _get_source_foreign_keys(self) -> list[SourceForeignKey]:
|
|
106
|
+
source_foreign_keys: list[SourceForeignKey] = []
|
|
87
107
|
with self._connection.cursor() as cursor:
|
|
88
|
-
|
|
89
|
-
|
|
90
|
-
|
|
91
|
-
|
|
108
|
+
sql = f"PRAGMA foreign_key_list({self._quoted_source_name})"
|
|
109
|
+
cursor.execute(sql)
|
|
110
|
+
rows = cursor.fetchall()
|
|
111
|
+
counts = Counter(row[0] for row in rows)
|
|
112
|
+
for idx, _, dst_table, foreign_key, primary_key, *_ in rows:
|
|
113
|
+
if counts[idx] == 1:
|
|
114
|
+
source_foreign_key = SourceForeignKey(
|
|
115
|
+
name=foreign_key,
|
|
116
|
+
dst_table=dst_table,
|
|
117
|
+
primary_key=primary_key,
|
|
118
|
+
)
|
|
119
|
+
source_foreign_keys.append(source_foreign_key)
|
|
120
|
+
return source_foreign_keys
|
|
92
121
|
|
|
93
|
-
def
|
|
122
|
+
def _get_source_sample_df(self) -> pd.DataFrame:
|
|
94
123
|
with self._connection.cursor() as cursor:
|
|
95
|
-
|
|
96
|
-
|
|
124
|
+
columns = [quote_ident(col) for col in self._source_column_dict]
|
|
125
|
+
sql = (f"SELECT {', '.join(columns)} "
|
|
126
|
+
f"FROM {self._quoted_source_name} "
|
|
127
|
+
f"ORDER BY rowid "
|
|
128
|
+
f"LIMIT {self._NUM_SAMPLE_ROWS}")
|
|
129
|
+
cursor.execute(sql)
|
|
97
130
|
table = cursor.fetch_arrow_table()
|
|
98
|
-
return table.to_pandas(types_mapper=pd.ArrowDtype)
|
|
99
131
|
|
|
100
|
-
|
|
132
|
+
if len(table) == 0:
|
|
133
|
+
raise RuntimeError(f"Table '{self.source_name}' is empty")
|
|
134
|
+
|
|
135
|
+
return self._sanitize(
|
|
136
|
+
df=table.to_pandas(types_mapper=pd.ArrowDtype),
|
|
137
|
+
dtype_dict={
|
|
138
|
+
column.name: column.dtype
|
|
139
|
+
for column in self._source_column_dict.values()
|
|
140
|
+
},
|
|
141
|
+
stype_dict=None,
|
|
142
|
+
)
|
|
143
|
+
|
|
144
|
+
def _get_num_rows(self) -> int | None:
|
|
101
145
|
return None
|
|
146
|
+
|
|
147
|
+
def _get_expr_sample_df(
|
|
148
|
+
self,
|
|
149
|
+
columns: Sequence[ColumnSpec],
|
|
150
|
+
) -> pd.DataFrame:
|
|
151
|
+
with self._connection.cursor() as cursor:
|
|
152
|
+
projections = [
|
|
153
|
+
f"{column.expr} AS {quote_ident(column.name)}"
|
|
154
|
+
for column in columns
|
|
155
|
+
]
|
|
156
|
+
sql = (f"SELECT {', '.join(projections)} "
|
|
157
|
+
f"FROM {self._quoted_source_name} "
|
|
158
|
+
f"ORDER BY rowid "
|
|
159
|
+
f"LIMIT {self._NUM_SAMPLE_ROWS}")
|
|
160
|
+
cursor.execute(sql)
|
|
161
|
+
table = cursor.fetch_arrow_table()
|
|
162
|
+
|
|
163
|
+
if len(table) == 0:
|
|
164
|
+
raise RuntimeError(f"Table '{self.source_name}' is empty")
|
|
165
|
+
|
|
166
|
+
return self._sanitize(
|
|
167
|
+
df=table.to_pandas(types_mapper=pd.ArrowDtype),
|
|
168
|
+
dtype_dict={column.name: column.dtype
|
|
169
|
+
for column in columns},
|
|
170
|
+
stype_dict=None,
|
|
171
|
+
)
|
|
172
|
+
|
|
173
|
+
@staticmethod
|
|
174
|
+
def _to_dtype(dtype: str | None) -> Dtype | None:
|
|
175
|
+
if dtype is None:
|
|
176
|
+
return None
|
|
177
|
+
dtype = dtype.strip().upper()
|
|
178
|
+
if re.search('INT', dtype):
|
|
179
|
+
return Dtype.int
|
|
180
|
+
if re.search('TEXT|CHAR|CLOB', dtype):
|
|
181
|
+
return Dtype.string
|
|
182
|
+
if re.search('REAL|FLOA|DOUB', dtype):
|
|
183
|
+
return Dtype.float
|
|
184
|
+
return None # NUMERIC affinity.
|
|
@@ -1,10 +1,30 @@
|
|
|
1
|
-
from .
|
|
2
|
-
|
|
3
|
-
|
|
1
|
+
from kumoapi.common import StrEnum
|
|
2
|
+
|
|
3
|
+
|
|
4
|
+
class DataBackend(StrEnum):
|
|
5
|
+
LOCAL = 'local'
|
|
6
|
+
SQLITE = 'sqlite'
|
|
7
|
+
SNOWFLAKE = 'snowflake'
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
from .source import SourceColumn, SourceForeignKey # noqa: E402
|
|
11
|
+
from .expression import Expression, LocalExpression # noqa: E402
|
|
12
|
+
from .column import ColumnSpec, ColumnSpecType, Column # noqa: E402
|
|
13
|
+
from .table import Table # noqa: E402
|
|
14
|
+
from .sampler import SamplerOutput, Sampler # noqa: E402
|
|
15
|
+
from .sql_sampler import SQLSampler # noqa: E402
|
|
4
16
|
|
|
5
17
|
__all__ = [
|
|
18
|
+
'DataBackend',
|
|
6
19
|
'SourceColumn',
|
|
7
20
|
'SourceForeignKey',
|
|
21
|
+
'Expression',
|
|
22
|
+
'LocalExpression',
|
|
23
|
+
'ColumnSpec',
|
|
24
|
+
'ColumnSpecType',
|
|
8
25
|
'Column',
|
|
9
26
|
'Table',
|
|
27
|
+
'SamplerOutput',
|
|
28
|
+
'Sampler',
|
|
29
|
+
'SQLSampler',
|
|
10
30
|
]
|
|
@@ -1,37 +1,119 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
1
3
|
from dataclasses import dataclass
|
|
2
|
-
from typing import Any
|
|
4
|
+
from typing import Any, Mapping, TypeAlias
|
|
3
5
|
|
|
4
6
|
from kumoapi.typing import Dtype, Stype
|
|
7
|
+
from typing_extensions import Self
|
|
8
|
+
|
|
9
|
+
from kumoai.experimental.rfm.base import Expression
|
|
10
|
+
from kumoai.mixin import CastMixin
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
@dataclass(init=False)
|
|
14
|
+
class ColumnSpec(CastMixin):
|
|
15
|
+
r"""A column specification for adding a column to a table.
|
|
16
|
+
|
|
17
|
+
A column specification can either refer to a physical column present in
|
|
18
|
+
the data source, or be defined logically via an expression.
|
|
19
|
+
|
|
20
|
+
Args:
|
|
21
|
+
name: The name of the column.
|
|
22
|
+
expr: A column expression to define logical columns.
|
|
23
|
+
dtype: The data type of the column.
|
|
24
|
+
"""
|
|
25
|
+
def __init__(
|
|
26
|
+
self,
|
|
27
|
+
name: str,
|
|
28
|
+
expr: Expression | Mapping[str, str] | str | None = None,
|
|
29
|
+
dtype: Dtype | str | None = None,
|
|
30
|
+
stype: Stype | str | None = None,
|
|
31
|
+
) -> None:
|
|
32
|
+
|
|
33
|
+
self.name = name
|
|
34
|
+
self.expr = Expression.coerce(expr)
|
|
35
|
+
self.dtype = Dtype(dtype) if dtype is not None else None
|
|
36
|
+
self.stype = Stype(dtype) if stype is not None else None
|
|
37
|
+
|
|
38
|
+
@classmethod
|
|
39
|
+
def coerce(cls, spec: ColumnSpec | Mapping[str, Any] | str) -> Self:
|
|
40
|
+
r"""Coerces a column specification into a :class:`ColumnSpec`."""
|
|
41
|
+
if isinstance(spec, cls):
|
|
42
|
+
return spec
|
|
43
|
+
if isinstance(spec, str):
|
|
44
|
+
return cls(name=spec)
|
|
45
|
+
if isinstance(spec, Mapping):
|
|
46
|
+
try:
|
|
47
|
+
return cls(**spec)
|
|
48
|
+
except TypeError:
|
|
49
|
+
pass
|
|
50
|
+
raise TypeError(f"Unable to coerce 'ColumnSpec' from '{spec}'")
|
|
51
|
+
|
|
52
|
+
@property
|
|
53
|
+
def is_source(self) -> bool:
|
|
54
|
+
r"""Whether the column specification refers to a phyiscal column
|
|
55
|
+
present in the data source.
|
|
56
|
+
"""
|
|
57
|
+
return self.expr is None
|
|
58
|
+
|
|
59
|
+
|
|
60
|
+
ColumnSpecType: TypeAlias = ColumnSpec | Mapping[str, Any] | str
|
|
5
61
|
|
|
6
62
|
|
|
7
63
|
@dataclass(init=False, repr=False, eq=False)
|
|
8
64
|
class Column:
|
|
65
|
+
r"""Column-level metadata information.
|
|
66
|
+
|
|
67
|
+
A column can either refer to a physical column present in the data source,
|
|
68
|
+
or be defined logically via an expression.
|
|
69
|
+
|
|
70
|
+
Args:
|
|
71
|
+
name: The name of the column.
|
|
72
|
+
expr: A column expression to define logical columns.
|
|
73
|
+
dtype: The data type of the column.
|
|
74
|
+
stype: The semantic type of the column.
|
|
75
|
+
"""
|
|
9
76
|
stype: Stype
|
|
10
77
|
|
|
11
78
|
def __init__(
|
|
12
79
|
self,
|
|
13
80
|
name: str,
|
|
81
|
+
expr: Expression | None,
|
|
14
82
|
dtype: Dtype,
|
|
15
83
|
stype: Stype,
|
|
16
|
-
is_primary_key: bool = False,
|
|
17
|
-
is_time_column: bool = False,
|
|
18
|
-
is_end_time_column: bool = False,
|
|
19
84
|
) -> None:
|
|
20
85
|
self._name = name
|
|
86
|
+
self._expr = expr
|
|
21
87
|
self._dtype = Dtype(dtype)
|
|
22
|
-
|
|
23
|
-
self.
|
|
24
|
-
self.
|
|
88
|
+
|
|
89
|
+
self._is_primary_key = False
|
|
90
|
+
self._is_time_column = False
|
|
91
|
+
self._is_end_time_column = False
|
|
92
|
+
|
|
25
93
|
self.stype = Stype(stype)
|
|
26
94
|
|
|
27
95
|
@property
|
|
28
96
|
def name(self) -> str:
|
|
97
|
+
r"""The name of the column."""
|
|
29
98
|
return self._name
|
|
30
99
|
|
|
100
|
+
@property
|
|
101
|
+
def expr(self) -> Expression | None:
|
|
102
|
+
r"""The expression of column (if logically)."""
|
|
103
|
+
return self._expr
|
|
104
|
+
|
|
31
105
|
@property
|
|
32
106
|
def dtype(self) -> Dtype:
|
|
107
|
+
r"""The data type of the column."""
|
|
33
108
|
return self._dtype
|
|
34
109
|
|
|
110
|
+
@property
|
|
111
|
+
def is_source(self) -> bool:
|
|
112
|
+
r"""Whether the column refers to a phyiscal column present in the data
|
|
113
|
+
source.
|
|
114
|
+
"""
|
|
115
|
+
return self.expr is None
|
|
116
|
+
|
|
35
117
|
def __setattr__(self, key: str, val: Any) -> None:
|
|
36
118
|
if key == 'stype':
|
|
37
119
|
if isinstance(val, str):
|
|
@@ -54,7 +136,7 @@ class Column:
|
|
|
54
136
|
super().__setattr__(key, val)
|
|
55
137
|
|
|
56
138
|
def __hash__(self) -> int:
|
|
57
|
-
return hash((self.name, self.
|
|
139
|
+
return hash((self.name, self.expr, self.dtype, self.stype))
|
|
58
140
|
|
|
59
141
|
def __eq__(self, other: Any) -> bool:
|
|
60
142
|
if not isinstance(other, Column):
|
|
@@ -62,5 +144,9 @@ class Column:
|
|
|
62
144
|
return hash(self) == hash(other)
|
|
63
145
|
|
|
64
146
|
def __repr__(self) -> str:
|
|
65
|
-
|
|
66
|
-
|
|
147
|
+
parts = [f'name={self.name}']
|
|
148
|
+
if self.expr is not None:
|
|
149
|
+
parts.append(f'expr={self.expr}')
|
|
150
|
+
parts.append(f'dtype={self.dtype}')
|
|
151
|
+
parts.append(f'stype={self.stype}')
|
|
152
|
+
return f"{self.__class__.__name__}({', '.join(parts)})"
|
|
@@ -0,0 +1,44 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
from abc import ABC
|
|
4
|
+
from dataclasses import dataclass
|
|
5
|
+
from typing import Mapping
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
class Expression(ABC):
|
|
9
|
+
"""A base expression to define logical columns."""
|
|
10
|
+
@classmethod
|
|
11
|
+
def coerce(
|
|
12
|
+
cls,
|
|
13
|
+
spec: Expression | Mapping[str, str] | str | None,
|
|
14
|
+
) -> Expression | None:
|
|
15
|
+
r"""Coerces an expression specification into an :class:`Expression`, if
|
|
16
|
+
possible.
|
|
17
|
+
"""
|
|
18
|
+
if spec is None:
|
|
19
|
+
return None
|
|
20
|
+
if isinstance(spec, Expression):
|
|
21
|
+
return spec
|
|
22
|
+
if isinstance(spec, str):
|
|
23
|
+
return LocalExpression(spec)
|
|
24
|
+
if isinstance(spec, Mapping):
|
|
25
|
+
for sub_cls in (LocalExpression, ):
|
|
26
|
+
try:
|
|
27
|
+
return sub_cls(**spec)
|
|
28
|
+
except TypeError:
|
|
29
|
+
pass
|
|
30
|
+
raise TypeError(f"Unable to coerce 'Expression' from '{spec}'")
|
|
31
|
+
|
|
32
|
+
|
|
33
|
+
@dataclass(frozen=True, repr=False)
|
|
34
|
+
class LocalExpression(Expression):
|
|
35
|
+
r"""A local expression to define a row-level logical attribute based on
|
|
36
|
+
physical columns of the data source in the same row.
|
|
37
|
+
|
|
38
|
+
Args:
|
|
39
|
+
value: The value of the expression.
|
|
40
|
+
"""
|
|
41
|
+
value: str
|
|
42
|
+
|
|
43
|
+
def __repr__(self) -> str:
|
|
44
|
+
return self.value
|