kumoai 2.13.0.dev202512021731__cp310-cp310-win_amd64.whl → 2.13.0.dev202512040252__cp310-cp310-win_amd64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- kumoai/_version.py +1 -1
- kumoai/experimental/rfm/backend/local/table.py +32 -167
- kumoai/experimental/rfm/backend/snow/__init__.py +3 -0
- kumoai/experimental/rfm/backend/snow/table.py +115 -0
- kumoai/experimental/rfm/backend/sqlite/table.py +58 -81
- kumoai/experimental/rfm/base/__init__.py +3 -0
- kumoai/experimental/rfm/base/source.py +18 -0
- kumoai/experimental/rfm/base/table.py +87 -24
- kumoai/experimental/rfm/graph.py +173 -35
- kumoai/experimental/rfm/infer/__init__.py +6 -2
- kumoai/experimental/rfm/infer/dtype.py +79 -0
- kumoai/experimental/rfm/{utils.py → infer/pkey.py} +2 -101
- kumoai/experimental/rfm/infer/time_col.py +62 -0
- kumoai/experimental/rfm/local_graph_sampler.py +42 -1
- kumoai/experimental/rfm/local_graph_store.py +1 -16
- kumoai/experimental/rfm/rfm.py +1 -11
- kumoai/kumolib.cp310-win_amd64.pyd +0 -0
- {kumoai-2.13.0.dev202512021731.dist-info → kumoai-2.13.0.dev202512040252.dist-info}/METADATA +2 -1
- {kumoai-2.13.0.dev202512021731.dist-info → kumoai-2.13.0.dev202512040252.dist-info}/RECORD +22 -19
- kumoai/experimental/rfm/infer/stype.py +0 -35
- {kumoai-2.13.0.dev202512021731.dist-info → kumoai-2.13.0.dev202512040252.dist-info}/WHEEL +0 -0
- {kumoai-2.13.0.dev202512021731.dist-info → kumoai-2.13.0.dev202512040252.dist-info}/licenses/LICENSE +0 -0
- {kumoai-2.13.0.dev202512021731.dist-info → kumoai-2.13.0.dev202512040252.dist-info}/top_level.txt +0 -0
|
@@ -0,0 +1,62 @@
|
|
|
1
|
+
import re
|
|
2
|
+
import warnings
|
|
3
|
+
from typing import Optional
|
|
4
|
+
|
|
5
|
+
import pandas as pd
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
def infer_time_column(
|
|
9
|
+
df: pd.DataFrame,
|
|
10
|
+
candidates: list[str],
|
|
11
|
+
) -> Optional[str]:
|
|
12
|
+
r"""Auto-detect potential time column.
|
|
13
|
+
|
|
14
|
+
Args:
|
|
15
|
+
df: The pandas DataFrame to analyze.
|
|
16
|
+
candidates: A list of potential candidates.
|
|
17
|
+
|
|
18
|
+
Returns:
|
|
19
|
+
The name of the detected time column, or ``None`` if not found.
|
|
20
|
+
"""
|
|
21
|
+
candidates = [ # Exclude all candidates with `*last*` in column names:
|
|
22
|
+
col_name for col_name in candidates
|
|
23
|
+
if not re.search(r'(^|_)last(_|$)', col_name, re.IGNORECASE)
|
|
24
|
+
]
|
|
25
|
+
|
|
26
|
+
if len(candidates) == 0:
|
|
27
|
+
return None
|
|
28
|
+
|
|
29
|
+
if len(candidates) == 1:
|
|
30
|
+
return candidates[0]
|
|
31
|
+
|
|
32
|
+
# If there exists a dedicated `create*` column, use it as time column:
|
|
33
|
+
create_candidates = [
|
|
34
|
+
candidate for candidate in candidates
|
|
35
|
+
if candidate.lower().startswith('create')
|
|
36
|
+
]
|
|
37
|
+
if len(create_candidates) == 1:
|
|
38
|
+
return create_candidates[0]
|
|
39
|
+
if len(create_candidates) > 1:
|
|
40
|
+
candidates = create_candidates
|
|
41
|
+
|
|
42
|
+
# Find the most optimal time column. Usually, it is the one pointing to
|
|
43
|
+
# the oldest timestamps:
|
|
44
|
+
with warnings.catch_warnings():
|
|
45
|
+
warnings.filterwarnings('ignore', message='Could not infer format')
|
|
46
|
+
min_timestamp_dict = {
|
|
47
|
+
key: pd.to_datetime(df[key].iloc[:10_000], 'coerce')
|
|
48
|
+
for key in candidates
|
|
49
|
+
}
|
|
50
|
+
min_timestamp_dict = {
|
|
51
|
+
key: value.min().tz_localize(None)
|
|
52
|
+
for key, value in min_timestamp_dict.items()
|
|
53
|
+
}
|
|
54
|
+
min_timestamp_dict = {
|
|
55
|
+
key: value
|
|
56
|
+
for key, value in min_timestamp_dict.items() if not pd.isna(value)
|
|
57
|
+
}
|
|
58
|
+
|
|
59
|
+
if len(min_timestamp_dict) == 0:
|
|
60
|
+
return None
|
|
61
|
+
|
|
62
|
+
return min(min_timestamp_dict, key=min_timestamp_dict.get) # type: ignore
|
|
@@ -1,3 +1,4 @@
|
|
|
1
|
+
import re
|
|
1
2
|
from typing import Dict, List, Optional, Tuple
|
|
2
3
|
|
|
3
4
|
import numpy as np
|
|
@@ -7,7 +8,47 @@ from kumoapi.typing import Stype
|
|
|
7
8
|
|
|
8
9
|
import kumoai.kumolib as kumolib
|
|
9
10
|
from kumoai.experimental.rfm.local_graph_store import LocalGraphStore
|
|
10
|
-
|
|
11
|
+
|
|
12
|
+
PUNCTUATION = re.compile(r"[\'\"\.,\(\)\!\?\;\:]")
|
|
13
|
+
MULTISPACE = re.compile(r"\s+")
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
def normalize_text(
|
|
17
|
+
ser: pd.Series,
|
|
18
|
+
max_words: Optional[int] = 50,
|
|
19
|
+
) -> pd.Series:
|
|
20
|
+
r"""Normalizes text into a list of lower-case words.
|
|
21
|
+
|
|
22
|
+
Args:
|
|
23
|
+
ser: The :class:`pandas.Series` to normalize.
|
|
24
|
+
max_words: The maximum number of words to return.
|
|
25
|
+
This will auto-shrink any large text column to avoid blowing up
|
|
26
|
+
context size.
|
|
27
|
+
"""
|
|
28
|
+
if len(ser) == 0 or pd.api.types.is_list_like(ser.iloc[0]):
|
|
29
|
+
return ser
|
|
30
|
+
|
|
31
|
+
def normalize_fn(line: str) -> list[str]:
|
|
32
|
+
line = PUNCTUATION.sub(" ", line)
|
|
33
|
+
line = re.sub(r"<br\s*/?>", " ", line) # Handle <br /> or <br>
|
|
34
|
+
line = MULTISPACE.sub(" ", line)
|
|
35
|
+
words = line.split()
|
|
36
|
+
if max_words is not None:
|
|
37
|
+
words = words[:max_words]
|
|
38
|
+
return words
|
|
39
|
+
|
|
40
|
+
ser = ser.fillna('').astype(str)
|
|
41
|
+
|
|
42
|
+
if max_words is not None:
|
|
43
|
+
# We estimate the number of words as 5 characters + 1 space in an
|
|
44
|
+
# English text on average. We need this pre-filter here, as word
|
|
45
|
+
# splitting on a giant text can be very expensive:
|
|
46
|
+
ser = ser.str[:6 * max_words]
|
|
47
|
+
|
|
48
|
+
ser = ser.str.lower()
|
|
49
|
+
ser = ser.map(normalize_fn)
|
|
50
|
+
|
|
51
|
+
return ser
|
|
11
52
|
|
|
12
53
|
|
|
13
54
|
class LocalGraphSampler:
|
|
@@ -7,7 +7,6 @@ from kumoapi.rfm.context import Subgraph
|
|
|
7
7
|
from kumoapi.typing import Stype
|
|
8
8
|
|
|
9
9
|
from kumoai.experimental.rfm import Graph, LocalTable
|
|
10
|
-
from kumoai.experimental.rfm.utils import normalize_text
|
|
11
10
|
from kumoai.utils import InteractiveProgressLogger, ProgressLogger
|
|
12
11
|
|
|
13
12
|
try:
|
|
@@ -21,7 +20,6 @@ class LocalGraphStore:
|
|
|
21
20
|
def __init__(
|
|
22
21
|
self,
|
|
23
22
|
graph: Graph,
|
|
24
|
-
preprocess: bool = False,
|
|
25
23
|
verbose: Union[bool, ProgressLogger] = True,
|
|
26
24
|
) -> None:
|
|
27
25
|
|
|
@@ -32,7 +30,7 @@ class LocalGraphStore:
|
|
|
32
30
|
)
|
|
33
31
|
|
|
34
32
|
with verbose as logger:
|
|
35
|
-
self.df_dict, self.mask_dict = self.sanitize(graph
|
|
33
|
+
self.df_dict, self.mask_dict = self.sanitize(graph)
|
|
36
34
|
self.stype_dict = self.get_stype_dict(graph)
|
|
37
35
|
logger.log("Sanitized input data")
|
|
38
36
|
|
|
@@ -106,7 +104,6 @@ class LocalGraphStore:
|
|
|
106
104
|
def sanitize(
|
|
107
105
|
self,
|
|
108
106
|
graph: Graph,
|
|
109
|
-
preprocess: bool = False,
|
|
110
107
|
) -> Tuple[Dict[str, pd.DataFrame], Dict[str, np.ndarray]]:
|
|
111
108
|
r"""Sanitizes raw data according to table schema definition:
|
|
112
109
|
|
|
@@ -115,10 +112,6 @@ class LocalGraphStore:
|
|
|
115
112
|
* drops timezone information from timestamps
|
|
116
113
|
* drops duplicate primary keys
|
|
117
114
|
* removes rows with missing primary keys or time values
|
|
118
|
-
|
|
119
|
-
If ``preprocess`` is set to ``True``, it will additionally pre-process
|
|
120
|
-
data for faster model processing. In particular, it:
|
|
121
|
-
* tokenizes any text column that is not a foreign key
|
|
122
115
|
"""
|
|
123
116
|
df_dict: Dict[str, pd.DataFrame] = {}
|
|
124
117
|
for table_name, table in graph.tables.items():
|
|
@@ -126,8 +119,6 @@ class LocalGraphStore:
|
|
|
126
119
|
df = table._data
|
|
127
120
|
df_dict[table_name] = df.copy(deep=False).reset_index(drop=True)
|
|
128
121
|
|
|
129
|
-
foreign_keys = {(edge.src_table, edge.fkey) for edge in graph.edges}
|
|
130
|
-
|
|
131
122
|
mask_dict: Dict[str, np.ndarray] = {}
|
|
132
123
|
for table in graph.tables.values():
|
|
133
124
|
for col in table.columns:
|
|
@@ -145,12 +136,6 @@ class LocalGraphStore:
|
|
|
145
136
|
ser = ser.dt.tz_localize(None)
|
|
146
137
|
df_dict[table.name][col.name] = ser
|
|
147
138
|
|
|
148
|
-
# Normalize text in advance (but exclude foreign keys):
|
|
149
|
-
if (preprocess and col.stype == Stype.text
|
|
150
|
-
and (table.name, col.name) not in foreign_keys):
|
|
151
|
-
ser = df_dict[table.name][col.name]
|
|
152
|
-
df_dict[table.name][col.name] = normalize_text(ser)
|
|
153
|
-
|
|
154
139
|
mask: Optional[np.ndarray] = None
|
|
155
140
|
if table._time_column is not None:
|
|
156
141
|
ser = df_dict[table.name][table._time_column]
|
kumoai/experimental/rfm/rfm.py
CHANGED
|
@@ -150,26 +150,16 @@ class KumoRFM:
|
|
|
150
150
|
|
|
151
151
|
Args:
|
|
152
152
|
graph: The graph.
|
|
153
|
-
preprocess: Whether to pre-process the data in advance during graph
|
|
154
|
-
materialization.
|
|
155
|
-
This is a runtime trade-off between graph materialization and model
|
|
156
|
-
processing speed.
|
|
157
|
-
It can be benefical to preprocess your data once and then run many
|
|
158
|
-
queries on top to achieve maximum model speed.
|
|
159
|
-
However, if activiated, graph materialization can take potentially
|
|
160
|
-
much longer, especially on graphs with many large text columns.
|
|
161
|
-
Best to tune this option manually.
|
|
162
153
|
verbose: Whether to print verbose output.
|
|
163
154
|
"""
|
|
164
155
|
def __init__(
|
|
165
156
|
self,
|
|
166
157
|
graph: Graph,
|
|
167
|
-
preprocess: bool = False,
|
|
168
158
|
verbose: Union[bool, ProgressLogger] = True,
|
|
169
159
|
) -> None:
|
|
170
160
|
graph = graph.validate()
|
|
171
161
|
self._graph_def = graph._to_api_graph_definition()
|
|
172
|
-
self._graph_store = LocalGraphStore(graph,
|
|
162
|
+
self._graph_store = LocalGraphStore(graph, verbose)
|
|
173
163
|
self._graph_sampler = LocalGraphSampler(self._graph_store)
|
|
174
164
|
|
|
175
165
|
self._client: Optional[RFMAPI] = None
|
|
Binary file
|
{kumoai-2.13.0.dev202512021731.dist-info → kumoai-2.13.0.dev202512040252.dist-info}/METADATA
RENAMED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: kumoai
|
|
3
|
-
Version: 2.13.0.
|
|
3
|
+
Version: 2.13.0.dev202512040252
|
|
4
4
|
Summary: AI on the Modern Data Stack
|
|
5
5
|
Author-email: "Kumo.AI" <hello@kumo.ai>
|
|
6
6
|
License-Expression: MIT
|
|
@@ -42,6 +42,7 @@ Provides-Extra: sqlite
|
|
|
42
42
|
Requires-Dist: adbc_driver_sqlite; extra == "sqlite"
|
|
43
43
|
Provides-Extra: snowflake
|
|
44
44
|
Requires-Dist: snowflake-connector-python; extra == "snowflake"
|
|
45
|
+
Requires-Dist: pyyaml; extra == "snowflake"
|
|
45
46
|
Provides-Extra: sagemaker
|
|
46
47
|
Requires-Dist: boto3<2.0,>=1.30.0; extra == "sagemaker"
|
|
47
48
|
Requires-Dist: mypy-boto3-sagemaker-runtime<2.0,>=1.34.0; extra == "sagemaker"
|
|
@@ -1,13 +1,13 @@
|
|
|
1
1
|
kumoai/__init__.py,sha256=qu-qohU2cQlManX1aZIlzA3ivKl52m-cSQBPSW8urUU,10837
|
|
2
2
|
kumoai/_logging.py,sha256=qL4JbMQwKXri2f-SEJoFB8TY5ALG12S-nobGTNWxW-A,915
|
|
3
3
|
kumoai/_singleton.py,sha256=i2BHWKpccNh5SJGDyU0IXsnYzJAYr8Xb0wz4c6LRbpo,861
|
|
4
|
-
kumoai/_version.py,sha256=
|
|
4
|
+
kumoai/_version.py,sha256=16u1rVm-N2IEE7QbyS9U5nn_hjp7P_wxBIQzzAKSnDA,39
|
|
5
5
|
kumoai/databricks.py,sha256=ahwJz6DWLXMkndT0XwEDBxF-hoqhidFR8wBUQ4TLZ68,490
|
|
6
6
|
kumoai/exceptions.py,sha256=7TMs0SC8xrU009_Pgd4QXtSF9lxJq8MtRbeX9pcQUy4,859
|
|
7
7
|
kumoai/formatting.py,sha256=o3uCnLwXPhe1KI5WV9sBgRrcU7ed4rgu_pf89GL9Nc0,983
|
|
8
8
|
kumoai/futures.py,sha256=J8rtZMEYFzdn5xF_x-LAiKJz3KGL6PT02f6rq_2bOJk,3836
|
|
9
9
|
kumoai/jobs.py,sha256=dCi7BAdfm2tCnonYlGU4WJokJWbh3RzFfaOX2EYCIHU,2576
|
|
10
|
-
kumoai/kumolib.cp310-win_amd64.pyd,sha256=
|
|
10
|
+
kumoai/kumolib.cp310-win_amd64.pyd,sha256=3iE0thfrVDx0Yhh0I0li-BwZcIpQfRpaYxYMsSpYofc,194048
|
|
11
11
|
kumoai/mixin.py,sha256=IaiB8SAI0VqOoMVzzIaUlqMt53-QPUK6OB0HikG-V9E,840
|
|
12
12
|
kumoai/spcs.py,sha256=KWfENrwSLruprlD-QPh63uU0N6npiNrwkeKfBk3EUyQ,4260
|
|
13
13
|
kumoai/artifact_export/__init__.py,sha256=UXAQI5q92ChBzWAk8o3J6pElzYHudAzFZssQXd4o7i8,247
|
|
@@ -55,27 +55,30 @@ kumoai/encoder/__init__.py,sha256=8FeP6mUyCeXxr1b8kUIi5dxe5vEXQRft9tPoaV1CBqg,18
|
|
|
55
55
|
kumoai/experimental/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
56
56
|
kumoai/experimental/rfm/__init__.py,sha256=EFZz6IvvskmeO85Vig6p1m_6jdimS_BkeREOndHuRsc,6247
|
|
57
57
|
kumoai/experimental/rfm/authenticate.py,sha256=G89_4TMeUpr5fG_0VTzMF5sdNhaciitA1oc2loTlTmo,19321
|
|
58
|
-
kumoai/experimental/rfm/graph.py,sha256=
|
|
59
|
-
kumoai/experimental/rfm/local_graph_sampler.py,sha256=
|
|
60
|
-
kumoai/experimental/rfm/local_graph_store.py,sha256=
|
|
58
|
+
kumoai/experimental/rfm/graph.py,sha256=kSWve-Fn_9qERFjEpCDO5zDnngtd9T4MOhR_o46PI7s,39602
|
|
59
|
+
kumoai/experimental/rfm/local_graph_sampler.py,sha256=dQ3JnuozTNeZyUFRu2h8OTMNmV1RAoaCA0gvkpgOstg,8110
|
|
60
|
+
kumoai/experimental/rfm/local_graph_store.py,sha256=6jY1ciVIlnBBhZCxWwBTl7SKX1fxRIDLszwrftD0Cdk,13485
|
|
61
61
|
kumoai/experimental/rfm/local_pquery_driver.py,sha256=Yd_yHIrvuDj16IC1pvsqiQvZS41vvOOCRMiuDGtN6Fk,26851
|
|
62
|
-
kumoai/experimental/rfm/rfm.py,sha256=
|
|
62
|
+
kumoai/experimental/rfm/rfm.py,sha256=vOnL8ecHTo1TX2B8_T8xaWGou8qYYz8DyVENu1H93mM,48834
|
|
63
63
|
kumoai/experimental/rfm/sagemaker.py,sha256=sEJSyfEFBA3-7wKinBEzSooKHEn0BgPjrgRnPhYo79g,5120
|
|
64
|
-
kumoai/experimental/rfm/utils.py,sha256=TN7HT_dy26Vhvd8YE90vB0hGZpTFsRMmpSZ3LcQoEIc,7626
|
|
65
64
|
kumoai/experimental/rfm/backend/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
66
65
|
kumoai/experimental/rfm/backend/local/__init__.py,sha256=usMh0fuDxKK-aOVT1sU30BQWFS0eSkfUrhUVILisQQI,934
|
|
67
|
-
kumoai/experimental/rfm/backend/local/table.py,sha256=
|
|
68
|
-
kumoai/experimental/rfm/backend/snow/__init__.py,sha256=
|
|
66
|
+
kumoai/experimental/rfm/backend/local/table.py,sha256=1PqNOROzlnK3SaZHNcU2hyzeifs0N4wssQAS3-Z0Myc,3674
|
|
67
|
+
kumoai/experimental/rfm/backend/snow/__init__.py,sha256=viMeR9VWpB1kjRdSWCTNFMdM7a8Mj_Dtck1twJW8dV8,962
|
|
68
|
+
kumoai/experimental/rfm/backend/snow/table.py,sha256=HVrPtCVvfsisFmq9jMovowsE5Wl5oti3O-kru7ruXlc,4312
|
|
69
69
|
kumoai/experimental/rfm/backend/sqlite/__init__.py,sha256=xw5NNLrWSvUvRkD49X_9hZYjas5EuP1XDANPy0EEjOg,874
|
|
70
|
-
kumoai/experimental/rfm/backend/sqlite/table.py,sha256=
|
|
71
|
-
kumoai/experimental/rfm/base/__init__.py,sha256=
|
|
70
|
+
kumoai/experimental/rfm/backend/sqlite/table.py,sha256=mBiZC21gQwfR4demFrP37GmawMHfIm-G82mLQeBqIZo,3901
|
|
71
|
+
kumoai/experimental/rfm/base/__init__.py,sha256=oXPkeBemtuDxRUK61-0sOT84GZB_oQ6HvaZNU1KFNaw,199
|
|
72
72
|
kumoai/experimental/rfm/base/column.py,sha256=OE-PRQ8HO4uTq0e3_3eHJFfhp5nzw79zd-43g3iMh4g,2385
|
|
73
|
-
kumoai/experimental/rfm/base/
|
|
74
|
-
kumoai/experimental/rfm/
|
|
73
|
+
kumoai/experimental/rfm/base/source.py,sha256=H5yN9xAwK3i_69EdqOV_x58muPGKQiI8ev5BhHQDZEo,290
|
|
74
|
+
kumoai/experimental/rfm/base/table.py,sha256=glyAg4LCQdddM3lIRClJSA7qMyfoHUVAGBf1rEs6B8Y,20113
|
|
75
|
+
kumoai/experimental/rfm/infer/__init__.py,sha256=qKg8or-SpgTApD6ePw1PJ4aUZPrOLTHLRCmBIJ92hrk,486
|
|
75
76
|
kumoai/experimental/rfm/infer/categorical.py,sha256=bqmfrE5ZCBTcb35lA4SyAkCu3MgttAn29VBJYMBNhVg,893
|
|
77
|
+
kumoai/experimental/rfm/infer/dtype.py,sha256=Hf_drluYNuN59lTSe-8GuXalg20Pv93kCktB6Hb9f74,2686
|
|
76
78
|
kumoai/experimental/rfm/infer/id.py,sha256=xaJBETLZa8ttzZCsDwFSwfyCi3VYsLc_kDWT_t_6Ih4,954
|
|
77
79
|
kumoai/experimental/rfm/infer/multicategorical.py,sha256=D-1KwYRkOSkBrOJr4Xa3eTCoAF9O9hPGa7Vg67V5_HU,1150
|
|
78
|
-
kumoai/experimental/rfm/infer/
|
|
80
|
+
kumoai/experimental/rfm/infer/pkey.py,sha256=Hvztcircd4iGdsnFU9Xi1kq_A5ONMnkAdnrpQT5svSs,4519
|
|
81
|
+
kumoai/experimental/rfm/infer/time_col.py,sha256=G98Cgz1m9G9VA-ApnCmGYnJxEFwp1jfaPf3nCMOz_N0,1882
|
|
79
82
|
kumoai/experimental/rfm/infer/timestamp.py,sha256=L2VxjtYTSyUBYAo4M-L08xSQlPpqnHMAVF5_vxjh3Y0,1135
|
|
80
83
|
kumoai/experimental/rfm/pquery/__init__.py,sha256=RkTn0I74uXOUuOiBpa6S-_QEYctMutkUnBEfF9ztQzI,159
|
|
81
84
|
kumoai/experimental/rfm/pquery/executor.py,sha256=S8wwXbAkH-YSnmEVYB8d6wyJF4JJ003mH_0zFTvOp_I,2843
|
|
@@ -101,8 +104,8 @@ kumoai/utils/__init__.py,sha256=wAKgmwtMIGuiauW9D_GGKH95K-24Kgwmld27mm4nsro,278
|
|
|
101
104
|
kumoai/utils/datasets.py,sha256=UyAII-oAn7x3ombuvpbSQ41aVF9SYKBjQthTD-vcT2A,3011
|
|
102
105
|
kumoai/utils/forecasting.py,sha256=ZgKeUCbWLOot0giAkoigwU5du8LkrwAicFOi5hVn6wg,7624
|
|
103
106
|
kumoai/utils/progress_logger.py,sha256=MZsWgHd4UZQKCXiJZgQeW-Emi_BmzlCKPLPXOL_HqBo,5239
|
|
104
|
-
kumoai-2.13.0.
|
|
105
|
-
kumoai-2.13.0.
|
|
106
|
-
kumoai-2.13.0.
|
|
107
|
-
kumoai-2.13.0.
|
|
108
|
-
kumoai-2.13.0.
|
|
107
|
+
kumoai-2.13.0.dev202512040252.dist-info/licenses/LICENSE,sha256=ZUilBDp--4vbhsEr6f_Upw9rnIx09zQ3K9fXQ0rfd6w,1111
|
|
108
|
+
kumoai-2.13.0.dev202512040252.dist-info/METADATA,sha256=T-O--qEm_2QPzB-dDkwR6Ei7r79H7v6qQBbR1e1J8gg,2580
|
|
109
|
+
kumoai-2.13.0.dev202512040252.dist-info/WHEEL,sha256=KUuBC6lxAbHCKilKua8R9W_TM71_-9Sg5uEP3uDWcoU,101
|
|
110
|
+
kumoai-2.13.0.dev202512040252.dist-info/top_level.txt,sha256=YjU6UcmomoDx30vEXLsOU784ED7VztQOsFApk1SFwvs,7
|
|
111
|
+
kumoai-2.13.0.dev202512040252.dist-info/RECORD,,
|
|
@@ -1,35 +0,0 @@
|
|
|
1
|
-
import pandas as pd
|
|
2
|
-
from kumoapi.typing import Dtype, Stype
|
|
3
|
-
|
|
4
|
-
from kumoai.experimental.rfm.infer import (
|
|
5
|
-
contains_categorical,
|
|
6
|
-
contains_id,
|
|
7
|
-
contains_multicategorical,
|
|
8
|
-
contains_timestamp,
|
|
9
|
-
)
|
|
10
|
-
|
|
11
|
-
|
|
12
|
-
def infer_stype(ser: pd.Series, column_name: str, dtype: Dtype) -> Stype:
|
|
13
|
-
r"""Infers the semantic type of a column.
|
|
14
|
-
|
|
15
|
-
Args:
|
|
16
|
-
ser: A :class:`pandas.Series` to analyze.
|
|
17
|
-
column_name: The name of the column (used for pattern matching).
|
|
18
|
-
dtype: The data type.
|
|
19
|
-
|
|
20
|
-
Returns:
|
|
21
|
-
The semantic type.
|
|
22
|
-
"""
|
|
23
|
-
if contains_id(ser, column_name, dtype):
|
|
24
|
-
return Stype.ID
|
|
25
|
-
|
|
26
|
-
if contains_timestamp(ser, column_name, dtype):
|
|
27
|
-
return Stype.timestamp
|
|
28
|
-
|
|
29
|
-
if contains_multicategorical(ser, column_name, dtype):
|
|
30
|
-
return Stype.multicategorical
|
|
31
|
-
|
|
32
|
-
if contains_categorical(ser, column_name, dtype):
|
|
33
|
-
return Stype.categorical
|
|
34
|
-
|
|
35
|
-
return dtype.default_stype
|
|
File without changes
|
{kumoai-2.13.0.dev202512021731.dist-info → kumoai-2.13.0.dev202512040252.dist-info}/licenses/LICENSE
RENAMED
|
File without changes
|
{kumoai-2.13.0.dev202512021731.dist-info → kumoai-2.13.0.dev202512040252.dist-info}/top_level.txt
RENAMED
|
File without changes
|