kumoai 2.14.0.dev202601011731__cp310-cp310-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of kumoai might be problematic. Click here for more details.
- kumoai/__init__.py +300 -0
- kumoai/_logging.py +29 -0
- kumoai/_singleton.py +25 -0
- kumoai/_version.py +1 -0
- kumoai/artifact_export/__init__.py +9 -0
- kumoai/artifact_export/config.py +209 -0
- kumoai/artifact_export/job.py +108 -0
- kumoai/client/__init__.py +5 -0
- kumoai/client/client.py +223 -0
- kumoai/client/connector.py +110 -0
- kumoai/client/endpoints.py +150 -0
- kumoai/client/graph.py +120 -0
- kumoai/client/jobs.py +471 -0
- kumoai/client/online.py +78 -0
- kumoai/client/pquery.py +207 -0
- kumoai/client/rfm.py +112 -0
- kumoai/client/source_table.py +53 -0
- kumoai/client/table.py +101 -0
- kumoai/client/utils.py +130 -0
- kumoai/codegen/__init__.py +19 -0
- kumoai/codegen/cli.py +100 -0
- kumoai/codegen/context.py +16 -0
- kumoai/codegen/edits.py +473 -0
- kumoai/codegen/exceptions.py +10 -0
- kumoai/codegen/generate.py +222 -0
- kumoai/codegen/handlers/__init__.py +4 -0
- kumoai/codegen/handlers/connector.py +118 -0
- kumoai/codegen/handlers/graph.py +71 -0
- kumoai/codegen/handlers/pquery.py +62 -0
- kumoai/codegen/handlers/table.py +109 -0
- kumoai/codegen/handlers/utils.py +42 -0
- kumoai/codegen/identity.py +114 -0
- kumoai/codegen/loader.py +93 -0
- kumoai/codegen/naming.py +94 -0
- kumoai/codegen/registry.py +121 -0
- kumoai/connector/__init__.py +31 -0
- kumoai/connector/base.py +153 -0
- kumoai/connector/bigquery_connector.py +200 -0
- kumoai/connector/databricks_connector.py +213 -0
- kumoai/connector/file_upload_connector.py +189 -0
- kumoai/connector/glue_connector.py +150 -0
- kumoai/connector/s3_connector.py +278 -0
- kumoai/connector/snowflake_connector.py +252 -0
- kumoai/connector/source_table.py +471 -0
- kumoai/connector/utils.py +1796 -0
- kumoai/databricks.py +14 -0
- kumoai/encoder/__init__.py +4 -0
- kumoai/exceptions.py +26 -0
- kumoai/experimental/__init__.py +0 -0
- kumoai/experimental/rfm/__init__.py +210 -0
- kumoai/experimental/rfm/authenticate.py +432 -0
- kumoai/experimental/rfm/backend/__init__.py +0 -0
- kumoai/experimental/rfm/backend/local/__init__.py +42 -0
- kumoai/experimental/rfm/backend/local/graph_store.py +297 -0
- kumoai/experimental/rfm/backend/local/sampler.py +312 -0
- kumoai/experimental/rfm/backend/local/table.py +113 -0
- kumoai/experimental/rfm/backend/snow/__init__.py +37 -0
- kumoai/experimental/rfm/backend/snow/sampler.py +297 -0
- kumoai/experimental/rfm/backend/snow/table.py +242 -0
- kumoai/experimental/rfm/backend/sqlite/__init__.py +32 -0
- kumoai/experimental/rfm/backend/sqlite/sampler.py +398 -0
- kumoai/experimental/rfm/backend/sqlite/table.py +184 -0
- kumoai/experimental/rfm/base/__init__.py +30 -0
- kumoai/experimental/rfm/base/column.py +152 -0
- kumoai/experimental/rfm/base/expression.py +44 -0
- kumoai/experimental/rfm/base/sampler.py +761 -0
- kumoai/experimental/rfm/base/source.py +19 -0
- kumoai/experimental/rfm/base/sql_sampler.py +143 -0
- kumoai/experimental/rfm/base/table.py +736 -0
- kumoai/experimental/rfm/graph.py +1237 -0
- kumoai/experimental/rfm/infer/__init__.py +19 -0
- kumoai/experimental/rfm/infer/categorical.py +40 -0
- kumoai/experimental/rfm/infer/dtype.py +82 -0
- kumoai/experimental/rfm/infer/id.py +46 -0
- kumoai/experimental/rfm/infer/multicategorical.py +48 -0
- kumoai/experimental/rfm/infer/pkey.py +128 -0
- kumoai/experimental/rfm/infer/stype.py +35 -0
- kumoai/experimental/rfm/infer/time_col.py +61 -0
- kumoai/experimental/rfm/infer/timestamp.py +41 -0
- kumoai/experimental/rfm/pquery/__init__.py +7 -0
- kumoai/experimental/rfm/pquery/executor.py +102 -0
- kumoai/experimental/rfm/pquery/pandas_executor.py +530 -0
- kumoai/experimental/rfm/relbench.py +76 -0
- kumoai/experimental/rfm/rfm.py +1184 -0
- kumoai/experimental/rfm/sagemaker.py +138 -0
- kumoai/experimental/rfm/task_table.py +231 -0
- kumoai/formatting.py +30 -0
- kumoai/futures.py +99 -0
- kumoai/graph/__init__.py +12 -0
- kumoai/graph/column.py +106 -0
- kumoai/graph/graph.py +948 -0
- kumoai/graph/table.py +838 -0
- kumoai/jobs.py +80 -0
- kumoai/kumolib.cpython-310-x86_64-linux-gnu.so +0 -0
- kumoai/mixin.py +28 -0
- kumoai/pquery/__init__.py +25 -0
- kumoai/pquery/prediction_table.py +287 -0
- kumoai/pquery/predictive_query.py +641 -0
- kumoai/pquery/training_table.py +424 -0
- kumoai/spcs.py +121 -0
- kumoai/testing/__init__.py +8 -0
- kumoai/testing/decorators.py +57 -0
- kumoai/testing/snow.py +50 -0
- kumoai/trainer/__init__.py +42 -0
- kumoai/trainer/baseline_trainer.py +93 -0
- kumoai/trainer/config.py +2 -0
- kumoai/trainer/distilled_trainer.py +175 -0
- kumoai/trainer/job.py +1192 -0
- kumoai/trainer/online_serving.py +258 -0
- kumoai/trainer/trainer.py +475 -0
- kumoai/trainer/util.py +103 -0
- kumoai/utils/__init__.py +11 -0
- kumoai/utils/datasets.py +83 -0
- kumoai/utils/display.py +51 -0
- kumoai/utils/forecasting.py +209 -0
- kumoai/utils/progress_logger.py +343 -0
- kumoai/utils/sql.py +3 -0
- kumoai-2.14.0.dev202601011731.dist-info/METADATA +71 -0
- kumoai-2.14.0.dev202601011731.dist-info/RECORD +122 -0
- kumoai-2.14.0.dev202601011731.dist-info/WHEEL +6 -0
- kumoai-2.14.0.dev202601011731.dist-info/licenses/LICENSE +9 -0
- kumoai-2.14.0.dev202601011731.dist-info/top_level.txt +1 -0
kumoai/utils/display.py
ADDED
|
@@ -0,0 +1,51 @@
|
|
|
1
|
+
from collections.abc import Sequence
|
|
2
|
+
|
|
3
|
+
import pandas as pd
|
|
4
|
+
|
|
5
|
+
from kumoai import in_notebook, in_snowflake_notebook
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
def message(msg: str) -> None:
|
|
9
|
+
msg = msg.replace("`", "'") if not in_notebook() else msg
|
|
10
|
+
|
|
11
|
+
if in_snowflake_notebook():
|
|
12
|
+
import streamlit as st
|
|
13
|
+
st.markdown(msg)
|
|
14
|
+
elif in_notebook():
|
|
15
|
+
from IPython.display import Markdown, display
|
|
16
|
+
display(Markdown(msg))
|
|
17
|
+
else:
|
|
18
|
+
print(msg)
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
def title(msg: str) -> None:
|
|
22
|
+
message(f"### {msg}" if in_notebook() else f"{msg}:")
|
|
23
|
+
|
|
24
|
+
|
|
25
|
+
def italic(msg: str) -> None:
|
|
26
|
+
message(f"*{msg}*" if in_notebook() else msg)
|
|
27
|
+
|
|
28
|
+
|
|
29
|
+
def unordered_list(items: Sequence[str]) -> None:
|
|
30
|
+
if in_notebook():
|
|
31
|
+
msg = '\n'.join([f"- {item}" for item in items])
|
|
32
|
+
else:
|
|
33
|
+
msg = '\n'.join([f"• {item.replace('`', '')}" for item in items])
|
|
34
|
+
message(msg)
|
|
35
|
+
|
|
36
|
+
|
|
37
|
+
def dataframe(df: pd.DataFrame) -> None:
|
|
38
|
+
if in_snowflake_notebook():
|
|
39
|
+
import streamlit as st
|
|
40
|
+
st.dataframe(df, hide_index=True)
|
|
41
|
+
elif in_notebook():
|
|
42
|
+
from IPython.display import display
|
|
43
|
+
try:
|
|
44
|
+
if hasattr(df.style, 'hide'):
|
|
45
|
+
display(df.style.hide(axis='index')) # pandas=2
|
|
46
|
+
else:
|
|
47
|
+
display(df.style.hide_index()) # pandas<1.3
|
|
48
|
+
except ImportError:
|
|
49
|
+
print(df.to_string(index=False)) # missing jinja2
|
|
50
|
+
else:
|
|
51
|
+
print(df.to_string(index=False))
|
|
@@ -0,0 +1,209 @@
|
|
|
1
|
+
from typing import Dict, List
|
|
2
|
+
|
|
3
|
+
import pandas as pd
|
|
4
|
+
import plotly.graph_objects as go
|
|
5
|
+
from plotly.subplots import make_subplots
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
class ForecastVisualizer:
|
|
9
|
+
r"""A tool for quickly visualizing forecast results from the holdout
|
|
10
|
+
dataframe of a forecasting job.
|
|
11
|
+
|
|
12
|
+
.. code-block:: python
|
|
13
|
+
|
|
14
|
+
import kumoai
|
|
15
|
+
|
|
16
|
+
# Retrieve job results from a training training job. Note
|
|
17
|
+
# that the job ID passed here must be in a completed state:
|
|
18
|
+
job_result = kumoai.TrainingJob("trainingjob-...").result()
|
|
19
|
+
|
|
20
|
+
# Read the holdout table as a Pandas DataFrame:
|
|
21
|
+
holdout_df = job_result.holdout_df()
|
|
22
|
+
|
|
23
|
+
# Pass holdout table to ForecastVisualizer and visualize results
|
|
24
|
+
holdout_forecast = kumoai.utils.ForecastVisualizer(holdout_df)
|
|
25
|
+
holdout_forecast.visualize()
|
|
26
|
+
"""
|
|
27
|
+
def __init__(self, holdout_df: pd.DataFrame) -> None:
|
|
28
|
+
# Sort the holdout dataframe and extract unique entities:
|
|
29
|
+
self.forecast = holdout_df.sort_values(['ENTITY', 'TIMESTAMP'])
|
|
30
|
+
self.entities = holdout_df['ENTITY'].unique().tolist()
|
|
31
|
+
|
|
32
|
+
self.fig = self._initialize_subplot()
|
|
33
|
+
self.buttons: List[Dict] = []
|
|
34
|
+
self.plot_config = {
|
|
35
|
+
'target': {
|
|
36
|
+
'color': 'blue',
|
|
37
|
+
'name': 'TARGET'
|
|
38
|
+
},
|
|
39
|
+
'prediction': {
|
|
40
|
+
'color': 'red',
|
|
41
|
+
'name': 'TARGET_PRED'
|
|
42
|
+
},
|
|
43
|
+
'residuals': {
|
|
44
|
+
'color': 'green',
|
|
45
|
+
'name': 'Residuals'
|
|
46
|
+
},
|
|
47
|
+
'residuals_time': {
|
|
48
|
+
'color': 'orange',
|
|
49
|
+
'name': 'Residuals Over Time'
|
|
50
|
+
}
|
|
51
|
+
}
|
|
52
|
+
|
|
53
|
+
@staticmethod
|
|
54
|
+
def _initialize_subplot() -> go.Figure:
|
|
55
|
+
r"""Initializes the subplot structure with three rows:
|
|
56
|
+
Row 1: Line plot of actual forecast vs predicted
|
|
57
|
+
Row 2: Line plot of residuals overtime
|
|
58
|
+
Row 3: Histogram distribution of residuals
|
|
59
|
+
"""
|
|
60
|
+
return make_subplots(
|
|
61
|
+
rows=3,
|
|
62
|
+
cols=1,
|
|
63
|
+
specs=[[{
|
|
64
|
+
"type": "scatter"
|
|
65
|
+
}], [{
|
|
66
|
+
"type": "scatter"
|
|
67
|
+
}], [{
|
|
68
|
+
"type": "xy"
|
|
69
|
+
}]],
|
|
70
|
+
row_heights=[0.5, 0.25, 0.25],
|
|
71
|
+
subplot_titles=('Forecast vs Actual', 'Residuals Over Time',
|
|
72
|
+
'Residuals Distribution'),
|
|
73
|
+
)
|
|
74
|
+
|
|
75
|
+
def _create_time_series_trace(
|
|
76
|
+
self,
|
|
77
|
+
data: pd.Series,
|
|
78
|
+
entity: str,
|
|
79
|
+
trace_type: str,
|
|
80
|
+
visibility: bool,
|
|
81
|
+
) -> go.Scatter:
|
|
82
|
+
r"""Create a time series trace for either target or prediction"""
|
|
83
|
+
config = self.plot_config[trace_type]
|
|
84
|
+
return go.Scatter(
|
|
85
|
+
x=data["TIMESTAMP"],
|
|
86
|
+
y=data[config['name']],
|
|
87
|
+
name=f"{entity} - {config['name']}",
|
|
88
|
+
mode="lines",
|
|
89
|
+
line=dict(color=config['color']),
|
|
90
|
+
visible=visibility,
|
|
91
|
+
opacity=0.75,
|
|
92
|
+
)
|
|
93
|
+
|
|
94
|
+
def _create_residuals_time_trace(
|
|
95
|
+
self,
|
|
96
|
+
data: pd.Series,
|
|
97
|
+
entity: str,
|
|
98
|
+
visibility: bool,
|
|
99
|
+
) -> go.Scatter:
|
|
100
|
+
r"""Create a time series trace for residuals over time"""
|
|
101
|
+
residuals = data["TARGET"] - data["TARGET_PRED"]
|
|
102
|
+
return go.Scatter(
|
|
103
|
+
x=data["TIMESTAMP"],
|
|
104
|
+
y=residuals,
|
|
105
|
+
name=f"{entity} - Residuals Over Time",
|
|
106
|
+
mode="lines+markers",
|
|
107
|
+
line=dict(color=self.plot_config['residuals_time']['color']),
|
|
108
|
+
visible=visibility,
|
|
109
|
+
opacity=0.75,
|
|
110
|
+
)
|
|
111
|
+
|
|
112
|
+
def _create_residuals_hist_trace(
|
|
113
|
+
self,
|
|
114
|
+
data: pd.Series,
|
|
115
|
+
entity: str,
|
|
116
|
+
visibility: bool,
|
|
117
|
+
) -> go.Histogram:
|
|
118
|
+
r"""Create a histogram trace for residuals distribution."""
|
|
119
|
+
residuals = data["TARGET"] - data["TARGET_PRED"]
|
|
120
|
+
return go.Histogram(
|
|
121
|
+
x=residuals,
|
|
122
|
+
name=f"{entity} - Residuals Distribution",
|
|
123
|
+
marker=dict(color=self.plot_config['residuals']['color']),
|
|
124
|
+
visible=visibility,
|
|
125
|
+
opacity=0.75,
|
|
126
|
+
nbinsx=30,
|
|
127
|
+
)
|
|
128
|
+
|
|
129
|
+
def _create_button(self, index: int, entity: str) -> None:
|
|
130
|
+
r"""Create visibility toggle button for an entity."""
|
|
131
|
+
# target, prediction, residuals time, and residuals hist:
|
|
132
|
+
num_traces_per_entity = 4
|
|
133
|
+
total_traces = len(self.entities) * num_traces_per_entity
|
|
134
|
+
|
|
135
|
+
button = dict(label=entity, method="update", args=[{
|
|
136
|
+
"visible": [False] * total_traces
|
|
137
|
+
}])
|
|
138
|
+
|
|
139
|
+
# Set visibility for the entity's traces:
|
|
140
|
+
base_index = index * num_traces_per_entity
|
|
141
|
+
for i in range(num_traces_per_entity):
|
|
142
|
+
button["args"][0]["visible"][base_index + i] = True # type: ignore
|
|
143
|
+
|
|
144
|
+
self.buttons.append(button)
|
|
145
|
+
|
|
146
|
+
def _create_traces(self) -> None:
|
|
147
|
+
"""Create all traces for the visualization."""
|
|
148
|
+
for i, entity in enumerate(self.entities):
|
|
149
|
+
entity_data = self.forecast.loc[self.forecast.ENTITY == entity]
|
|
150
|
+
|
|
151
|
+
# First entity's traces are visible by default:
|
|
152
|
+
visibility = (i == 0)
|
|
153
|
+
|
|
154
|
+
# Create traces
|
|
155
|
+
trace_target = self._create_time_series_trace(
|
|
156
|
+
entity_data, entity, 'target', visibility)
|
|
157
|
+
trace_pred = self._create_time_series_trace(
|
|
158
|
+
entity_data, entity, 'prediction', visibility)
|
|
159
|
+
trace_residuals_time = self._create_residuals_time_trace(
|
|
160
|
+
entity_data, entity, visibility)
|
|
161
|
+
trace_residuals_hist = self._create_residuals_hist_trace(
|
|
162
|
+
entity_data, entity, visibility)
|
|
163
|
+
|
|
164
|
+
# Add traces to appropriate subplots
|
|
165
|
+
self.fig.add_trace(trace_target, row=1, col=1)
|
|
166
|
+
self.fig.add_trace(trace_pred, row=1, col=1)
|
|
167
|
+
self.fig.add_trace(trace_residuals_time, row=2, col=1)
|
|
168
|
+
self.fig.add_trace(trace_residuals_hist, row=3, col=1)
|
|
169
|
+
|
|
170
|
+
self._create_button(i, entity)
|
|
171
|
+
|
|
172
|
+
def _update_layout(self) -> None:
|
|
173
|
+
r"""Update the figure layout with all necessary configurations."""
|
|
174
|
+
self.fig.update_layout(
|
|
175
|
+
updatemenus=[
|
|
176
|
+
dict(active=0, buttons=self.buttons, direction="down", pad={
|
|
177
|
+
"r": 10,
|
|
178
|
+
"t": 10
|
|
179
|
+
}, showactive=True, x=1, xanchor="left", y=1.07, yanchor="top")
|
|
180
|
+
],
|
|
181
|
+
title="Forecast Results by Department",
|
|
182
|
+
height=1000, # Increased height to accommodate third plot
|
|
183
|
+
width=1300,
|
|
184
|
+
showlegend=True,
|
|
185
|
+
hovermode='x unified')
|
|
186
|
+
|
|
187
|
+
# Update axis labels and add zero reference line for residuals
|
|
188
|
+
self.fig.update_xaxes(title_text="Timestamp", row=1, col=1)
|
|
189
|
+
self.fig.update_xaxes(title_text="Timestamp", row=2, col=1)
|
|
190
|
+
self.fig.update_xaxes(title_text="Residual Value", row=3, col=1)
|
|
191
|
+
|
|
192
|
+
self.fig.update_yaxes(title_text="Patient Volume", row=1, col=1)
|
|
193
|
+
self.fig.update_yaxes(title_text="Residual Value", row=2, col=1)
|
|
194
|
+
self.fig.update_yaxes(title_text="Frequency", row=3, col=1)
|
|
195
|
+
|
|
196
|
+
# Add zero reference line for residuals time series
|
|
197
|
+
self.fig.add_hline(
|
|
198
|
+
y=0,
|
|
199
|
+
line_dash="dash",
|
|
200
|
+
line_color="gray",
|
|
201
|
+
row=2,
|
|
202
|
+
col=1,
|
|
203
|
+
)
|
|
204
|
+
|
|
205
|
+
def visualize(self) -> None:
|
|
206
|
+
r"""Generate and display the complete visualization."""
|
|
207
|
+
self._create_traces()
|
|
208
|
+
self._update_layout()
|
|
209
|
+
self.fig.show()
|
|
@@ -0,0 +1,343 @@
|
|
|
1
|
+
import re
|
|
2
|
+
import sys
|
|
3
|
+
import time
|
|
4
|
+
from typing import Any
|
|
5
|
+
|
|
6
|
+
from rich.console import Console, ConsoleOptions, RenderResult
|
|
7
|
+
from rich.live import Live
|
|
8
|
+
from rich.padding import Padding
|
|
9
|
+
from rich.progress import (
|
|
10
|
+
BarColumn,
|
|
11
|
+
MofNCompleteColumn,
|
|
12
|
+
Progress,
|
|
13
|
+
Task,
|
|
14
|
+
TextColumn,
|
|
15
|
+
TimeRemainingColumn,
|
|
16
|
+
)
|
|
17
|
+
from rich.spinner import Spinner
|
|
18
|
+
from rich.table import Table
|
|
19
|
+
from rich.text import Text
|
|
20
|
+
from typing_extensions import Self
|
|
21
|
+
|
|
22
|
+
|
|
23
|
+
class ProgressLogger:
|
|
24
|
+
def __init__(self, msg: str, verbose: bool = True) -> None:
|
|
25
|
+
self.msg = msg
|
|
26
|
+
self.verbose = verbose
|
|
27
|
+
|
|
28
|
+
self.logs: list[str] = []
|
|
29
|
+
|
|
30
|
+
self.start_time: float | None = None
|
|
31
|
+
self.end_time: float | None = None
|
|
32
|
+
|
|
33
|
+
@classmethod
|
|
34
|
+
def default(cls, msg: str, verbose: bool = True) -> 'ProgressLogger':
|
|
35
|
+
from kumoai import in_snowflake_notebook
|
|
36
|
+
|
|
37
|
+
if in_snowflake_notebook():
|
|
38
|
+
return StreamlitProgressLogger(msg, verbose)
|
|
39
|
+
return RichProgressLogger(msg, verbose)
|
|
40
|
+
|
|
41
|
+
@property
|
|
42
|
+
def duration(self) -> float:
|
|
43
|
+
assert self.start_time is not None
|
|
44
|
+
if self.end_time is not None:
|
|
45
|
+
return self.end_time - self.start_time
|
|
46
|
+
return time.perf_counter() - self.start_time
|
|
47
|
+
|
|
48
|
+
def log(self, msg: str) -> None:
|
|
49
|
+
self.logs.append(msg)
|
|
50
|
+
|
|
51
|
+
def init_progress(self, total: int, description: str) -> None:
|
|
52
|
+
pass
|
|
53
|
+
|
|
54
|
+
def step(self) -> None:
|
|
55
|
+
pass
|
|
56
|
+
|
|
57
|
+
def __enter__(self) -> Self:
|
|
58
|
+
self.start_time = time.perf_counter()
|
|
59
|
+
return self
|
|
60
|
+
|
|
61
|
+
def __exit__(self, exc_type: Any, exc_val: Any, exc_tb: Any) -> None:
|
|
62
|
+
self.end_time = time.perf_counter()
|
|
63
|
+
|
|
64
|
+
def __repr__(self) -> str:
|
|
65
|
+
return f'{self.__class__.__name__}({self.msg})'
|
|
66
|
+
|
|
67
|
+
|
|
68
|
+
class ColoredMofNCompleteColumn(MofNCompleteColumn):
|
|
69
|
+
def __init__(self, style: str = 'green') -> None:
|
|
70
|
+
super().__init__()
|
|
71
|
+
self.style = style
|
|
72
|
+
|
|
73
|
+
def render(self, task: Task) -> Text:
|
|
74
|
+
return Text(str(super().render(task)), style=self.style)
|
|
75
|
+
|
|
76
|
+
|
|
77
|
+
class ColoredTimeRemainingColumn(TimeRemainingColumn):
|
|
78
|
+
def __init__(self, style: str = 'cyan') -> None:
|
|
79
|
+
super().__init__()
|
|
80
|
+
self.style = style
|
|
81
|
+
|
|
82
|
+
def render(self, task: Task) -> Text:
|
|
83
|
+
return Text(str(super().render(task)), style=self.style)
|
|
84
|
+
|
|
85
|
+
|
|
86
|
+
class RichProgressLogger(ProgressLogger):
|
|
87
|
+
def __init__(
|
|
88
|
+
self,
|
|
89
|
+
msg: str,
|
|
90
|
+
verbose: bool = True,
|
|
91
|
+
refresh_per_second: int = 10,
|
|
92
|
+
) -> None:
|
|
93
|
+
super().__init__(msg=msg, verbose=verbose)
|
|
94
|
+
|
|
95
|
+
self.refresh_per_second = refresh_per_second
|
|
96
|
+
|
|
97
|
+
self._progress: Progress | None = None
|
|
98
|
+
self._task: int | None = None
|
|
99
|
+
|
|
100
|
+
self._live: Live | None = None
|
|
101
|
+
self._exception: bool = False
|
|
102
|
+
|
|
103
|
+
def init_progress(self, total: int, description: str) -> None:
|
|
104
|
+
assert self._progress is None
|
|
105
|
+
if self.verbose:
|
|
106
|
+
self._progress = Progress(
|
|
107
|
+
TextColumn(f' ↳ {description}', style='dim'),
|
|
108
|
+
BarColumn(bar_width=None),
|
|
109
|
+
ColoredMofNCompleteColumn(style='dim'),
|
|
110
|
+
TextColumn('•', style='dim'),
|
|
111
|
+
ColoredTimeRemainingColumn(style='dim'),
|
|
112
|
+
)
|
|
113
|
+
self._task = self._progress.add_task("Progress", total=total)
|
|
114
|
+
|
|
115
|
+
def step(self) -> None:
|
|
116
|
+
if self.verbose:
|
|
117
|
+
assert self._progress is not None
|
|
118
|
+
assert self._task is not None
|
|
119
|
+
self._progress.update(self._task, advance=1) # type: ignore
|
|
120
|
+
|
|
121
|
+
def __enter__(self) -> Self:
|
|
122
|
+
from kumoai import in_notebook
|
|
123
|
+
|
|
124
|
+
super().__enter__()
|
|
125
|
+
|
|
126
|
+
if not in_notebook(): # Render progress bar in TUI.
|
|
127
|
+
sys.stdout.write("\x1b]9;4;3\x07")
|
|
128
|
+
sys.stdout.flush()
|
|
129
|
+
|
|
130
|
+
if self.verbose:
|
|
131
|
+
self._live = Live(
|
|
132
|
+
self,
|
|
133
|
+
refresh_per_second=self.refresh_per_second,
|
|
134
|
+
vertical_overflow='visible',
|
|
135
|
+
)
|
|
136
|
+
self._live.start()
|
|
137
|
+
|
|
138
|
+
return self
|
|
139
|
+
|
|
140
|
+
def __exit__(self, exc_type: Any, exc_val: Any, exc_tb: Any) -> None:
|
|
141
|
+
from kumoai import in_notebook
|
|
142
|
+
|
|
143
|
+
super().__exit__(exc_type, exc_val, exc_tb)
|
|
144
|
+
|
|
145
|
+
if exc_type is not None:
|
|
146
|
+
self._exception = True
|
|
147
|
+
|
|
148
|
+
if self._progress is not None:
|
|
149
|
+
self._progress.stop()
|
|
150
|
+
self._progress = None
|
|
151
|
+
self._task = None
|
|
152
|
+
|
|
153
|
+
if self._live is not None:
|
|
154
|
+
self._live.update(self, refresh=True)
|
|
155
|
+
self._live.stop()
|
|
156
|
+
self._live = None
|
|
157
|
+
|
|
158
|
+
if not in_notebook():
|
|
159
|
+
sys.stdout.write("\x1b]9;4;0\x07")
|
|
160
|
+
sys.stdout.flush()
|
|
161
|
+
|
|
162
|
+
def __rich_console__(
|
|
163
|
+
self,
|
|
164
|
+
console: Console,
|
|
165
|
+
options: ConsoleOptions,
|
|
166
|
+
) -> RenderResult:
|
|
167
|
+
|
|
168
|
+
table = Table.grid(padding=(0, 1))
|
|
169
|
+
|
|
170
|
+
icon: Text | Padding
|
|
171
|
+
if self._exception:
|
|
172
|
+
style = 'red'
|
|
173
|
+
icon = Text('❌', style=style)
|
|
174
|
+
elif self.end_time is not None:
|
|
175
|
+
style = 'green'
|
|
176
|
+
icon = Text('✅', style=style)
|
|
177
|
+
else:
|
|
178
|
+
style = 'cyan'
|
|
179
|
+
icon = Padding(Spinner('dots', style=style), (0, 1, 0, 0))
|
|
180
|
+
|
|
181
|
+
title = Text.from_markup(
|
|
182
|
+
f'{self.msg} ({self.duration:.2f}s)',
|
|
183
|
+
style=style,
|
|
184
|
+
)
|
|
185
|
+
table.add_row(icon, title)
|
|
186
|
+
|
|
187
|
+
for log in self.logs:
|
|
188
|
+
table.add_row('', Text(f'↳ {log}', style='dim'))
|
|
189
|
+
|
|
190
|
+
yield table
|
|
191
|
+
|
|
192
|
+
if self.verbose and self._progress is not None:
|
|
193
|
+
yield self._progress.get_renderable()
|
|
194
|
+
|
|
195
|
+
|
|
196
|
+
class StreamlitProgressLogger(ProgressLogger):
|
|
197
|
+
def __init__(
|
|
198
|
+
self,
|
|
199
|
+
msg: str,
|
|
200
|
+
verbose: bool = True,
|
|
201
|
+
) -> None:
|
|
202
|
+
super().__init__(msg=msg, verbose=verbose)
|
|
203
|
+
|
|
204
|
+
self._status: Any = None
|
|
205
|
+
|
|
206
|
+
self._total = 0
|
|
207
|
+
self._current = 0
|
|
208
|
+
self._description: str = ''
|
|
209
|
+
self._progress: Any = None
|
|
210
|
+
|
|
211
|
+
def __enter__(self) -> Self:
|
|
212
|
+
super().__enter__()
|
|
213
|
+
|
|
214
|
+
import streamlit as st
|
|
215
|
+
|
|
216
|
+
# Adjust layout for prettier output:
|
|
217
|
+
st.markdown(STREAMLIT_CSS, unsafe_allow_html=True)
|
|
218
|
+
|
|
219
|
+
if self.verbose:
|
|
220
|
+
self._status = st.status(
|
|
221
|
+
f':blue[{self._sanitize_text(self.msg)}]',
|
|
222
|
+
expanded=True,
|
|
223
|
+
)
|
|
224
|
+
|
|
225
|
+
return self
|
|
226
|
+
|
|
227
|
+
def log(self, msg: str) -> None:
|
|
228
|
+
super().log(msg)
|
|
229
|
+
if self.verbose and self._status is not None:
|
|
230
|
+
self._status.write(self._sanitize_text(msg))
|
|
231
|
+
|
|
232
|
+
def init_progress(self, total: int, description: str) -> None:
|
|
233
|
+
if self.verbose and self._status is not None:
|
|
234
|
+
self._total = total
|
|
235
|
+
self._current = 0
|
|
236
|
+
self._description = self._sanitize_text(description)
|
|
237
|
+
percent = min(self._current / self._total, 1.0)
|
|
238
|
+
self._progress = self._status.progress(
|
|
239
|
+
value=percent,
|
|
240
|
+
text=f'{self._description} [{self._current}/{self._total}]',
|
|
241
|
+
)
|
|
242
|
+
|
|
243
|
+
def step(self) -> None:
|
|
244
|
+
self._current += 1
|
|
245
|
+
|
|
246
|
+
if self.verbose and self._progress is not None:
|
|
247
|
+
percent = min(self._current / self._total, 1.0)
|
|
248
|
+
self._progress.progress(
|
|
249
|
+
value=percent,
|
|
250
|
+
text=f'{self._description} [{self._current}/{self._total}]',
|
|
251
|
+
)
|
|
252
|
+
|
|
253
|
+
def __exit__(self, exc_type: Any, exc_val: Any, exc_tb: Any) -> None:
|
|
254
|
+
super().__exit__(exc_type, exc_val, exc_tb)
|
|
255
|
+
|
|
256
|
+
if not self.verbose or self._status is None:
|
|
257
|
+
return
|
|
258
|
+
|
|
259
|
+
label = f'{self._sanitize_text(self.msg)} ({self.duration:.2f}s)'
|
|
260
|
+
|
|
261
|
+
if exc_type is not None:
|
|
262
|
+
self._status.update(
|
|
263
|
+
label=f':red[{label}]',
|
|
264
|
+
state='error',
|
|
265
|
+
expanded=True,
|
|
266
|
+
)
|
|
267
|
+
else:
|
|
268
|
+
self._status.update(
|
|
269
|
+
label=f':green[{label}]',
|
|
270
|
+
state='complete',
|
|
271
|
+
expanded=True,
|
|
272
|
+
)
|
|
273
|
+
|
|
274
|
+
@staticmethod
|
|
275
|
+
def _sanitize_text(msg: str) -> str:
|
|
276
|
+
return re.sub(r'\[/?bold\]', '**', msg)
|
|
277
|
+
|
|
278
|
+
|
|
279
|
+
STREAMLIT_CSS = """
|
|
280
|
+
<style>
|
|
281
|
+
/* Fix horizontal scrollbar */
|
|
282
|
+
.stExpander summary {
|
|
283
|
+
width: auto;
|
|
284
|
+
}
|
|
285
|
+
|
|
286
|
+
/* Fix paddings/margins */
|
|
287
|
+
.stExpander summary {
|
|
288
|
+
padding: 0.75rem 1rem 0.5rem;
|
|
289
|
+
}
|
|
290
|
+
.stExpander p {
|
|
291
|
+
margin: 0px 0px 0.2rem;
|
|
292
|
+
}
|
|
293
|
+
.stExpander [data-testid="stExpanderDetails"] {
|
|
294
|
+
padding-bottom: 1.45rem;
|
|
295
|
+
}
|
|
296
|
+
.stExpander .stProgress div:first-child {
|
|
297
|
+
padding-bottom: 4px;
|
|
298
|
+
}
|
|
299
|
+
|
|
300
|
+
/* Fix expand icon position */
|
|
301
|
+
.stExpander summary svg {
|
|
302
|
+
height: 1.5rem;
|
|
303
|
+
}
|
|
304
|
+
|
|
305
|
+
/* Fix summary icons */
|
|
306
|
+
.stExpander summary [data-testid="stExpanderIconCheck"] {
|
|
307
|
+
font-size: 1.8rem;
|
|
308
|
+
margin-top: -3px;
|
|
309
|
+
color: rgb(21, 130, 55);
|
|
310
|
+
}
|
|
311
|
+
.stExpander summary [data-testid="stExpanderIconError"] {
|
|
312
|
+
font-size: 1.8rem;
|
|
313
|
+
margin-top: -3px;
|
|
314
|
+
color: rgb(255, 43, 43);
|
|
315
|
+
}
|
|
316
|
+
.stExpander summary span:first-child span:first-child {
|
|
317
|
+
width: 1.6rem;
|
|
318
|
+
}
|
|
319
|
+
|
|
320
|
+
/* Add border between title and content */
|
|
321
|
+
.stExpander [data-testid="stExpanderDetails"] {
|
|
322
|
+
border-top: 1px solid rgba(30, 37, 47, 0.2);
|
|
323
|
+
padding-top: 0.5rem;
|
|
324
|
+
}
|
|
325
|
+
|
|
326
|
+
/* Fix title font size */
|
|
327
|
+
.stExpander summary p {
|
|
328
|
+
font-size: 1rem;
|
|
329
|
+
}
|
|
330
|
+
|
|
331
|
+
/* Gray out content */
|
|
332
|
+
.stExpander [data-testid="stExpanderDetails"] {
|
|
333
|
+
color: rgba(30, 37, 47, 0.5);
|
|
334
|
+
}
|
|
335
|
+
|
|
336
|
+
/* Fix progress bar font size */
|
|
337
|
+
.stExpander .stProgress p {
|
|
338
|
+
line-height: 1.6;
|
|
339
|
+
font-size: 1rem;
|
|
340
|
+
color: rgba(30, 37, 47, 0.5);
|
|
341
|
+
}
|
|
342
|
+
</style>
|
|
343
|
+
"""
|
kumoai/utils/sql.py
ADDED
|
@@ -0,0 +1,71 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: kumoai
|
|
3
|
+
Version: 2.14.0.dev202601011731
|
|
4
|
+
Summary: AI on the Modern Data Stack
|
|
5
|
+
Author-email: "Kumo.AI" <hello@kumo.ai>
|
|
6
|
+
License-Expression: MIT
|
|
7
|
+
Project-URL: homepage, https://kumo.ai
|
|
8
|
+
Project-URL: documentation, https://kumo.ai/docs
|
|
9
|
+
Keywords: deep-learning,graph-neural-networks,cloud-data-warehouse
|
|
10
|
+
Classifier: Development Status :: 5 - Production/Stable
|
|
11
|
+
Classifier: Programming Language :: Python
|
|
12
|
+
Classifier: Programming Language :: Python :: 3.10
|
|
13
|
+
Classifier: Programming Language :: Python :: 3.11
|
|
14
|
+
Classifier: Programming Language :: Python :: 3.12
|
|
15
|
+
Classifier: Programming Language :: Python :: 3.13
|
|
16
|
+
Classifier: Programming Language :: Python :: 3 :: Only
|
|
17
|
+
Requires-Python: >=3.10
|
|
18
|
+
Description-Content-Type: text/markdown
|
|
19
|
+
License-File: LICENSE
|
|
20
|
+
Requires-Dist: pandas
|
|
21
|
+
Requires-Dist: pyarrow<19.0.0,>=8.0.0
|
|
22
|
+
Requires-Dist: requests>=2.28.2
|
|
23
|
+
Requires-Dist: urllib3
|
|
24
|
+
Requires-Dist: plotly
|
|
25
|
+
Requires-Dist: typing_extensions>=4.5.0
|
|
26
|
+
Requires-Dist: kumo-api==0.49.0
|
|
27
|
+
Requires-Dist: tqdm>=4.66.0
|
|
28
|
+
Requires-Dist: aiohttp>=3.10.0
|
|
29
|
+
Requires-Dist: pydantic>=1.10.21
|
|
30
|
+
Requires-Dist: rich>=9.0.0
|
|
31
|
+
Provides-Extra: doc
|
|
32
|
+
Requires-Dist: sphinx; extra == "doc"
|
|
33
|
+
Requires-Dist: sphinx-book-theme; extra == "doc"
|
|
34
|
+
Requires-Dist: sphinx-copybutton; extra == "doc"
|
|
35
|
+
Requires-Dist: sphinx-autodoc-typehints; extra == "doc"
|
|
36
|
+
Requires-Dist: graphviz; extra == "doc"
|
|
37
|
+
Provides-Extra: test
|
|
38
|
+
Requires-Dist: pytest; extra == "test"
|
|
39
|
+
Requires-Dist: pytest-mock; extra == "test"
|
|
40
|
+
Requires-Dist: requests-mock; extra == "test"
|
|
41
|
+
Provides-Extra: sqlite
|
|
42
|
+
Requires-Dist: adbc_driver_sqlite; extra == "sqlite"
|
|
43
|
+
Provides-Extra: snowflake
|
|
44
|
+
Requires-Dist: numpy<2.0; extra == "snowflake"
|
|
45
|
+
Requires-Dist: snowflake-connector-python; extra == "snowflake"
|
|
46
|
+
Requires-Dist: pyyaml; extra == "snowflake"
|
|
47
|
+
Provides-Extra: sagemaker
|
|
48
|
+
Requires-Dist: boto3<2.0,>=1.30.0; extra == "sagemaker"
|
|
49
|
+
Requires-Dist: mypy-boto3-sagemaker-runtime<2.0,>=1.34.0; extra == "sagemaker"
|
|
50
|
+
Provides-Extra: test-sagemaker
|
|
51
|
+
Requires-Dist: sagemaker<3.0; extra == "test-sagemaker"
|
|
52
|
+
Dynamic: license-file
|
|
53
|
+
Dynamic: requires-dist
|
|
54
|
+
|
|
55
|
+
<p align="center">
|
|
56
|
+
<img height="180" src="https://s3.us-west-1.amazonaws.com/data.kumo.ai/img/kumo_pink_md.svg" />
|
|
57
|
+
</p>
|
|
58
|
+
|
|
59
|
+
______________________________________________________________________
|
|
60
|
+
|
|
61
|
+
The Kumo SDK implements a pythonic interface for users to programmatically
|
|
62
|
+
interact with the Kumo machine learning platform
|
|
63
|
+
([documentation](https://kumo-ai.github.io/kumo-sdk/docs/#)).
|
|
64
|
+
|
|
65
|
+
## Installation
|
|
66
|
+
|
|
67
|
+
The Kumo SDK is available for Python 3.10 to Python 3.13. To install, simply run
|
|
68
|
+
|
|
69
|
+
```
|
|
70
|
+
pip install kumoai
|
|
71
|
+
```
|