bidviz 1.0.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- bidviz/__init__.py +14 -0
- bidviz/core/__init__.py +5 -0
- bidviz/core/base.py +43 -0
- bidviz/exceptions.py +69 -0
- bidviz/transformer.py +312 -0
- bidviz/transformers/__init__.py +22 -0
- bidviz/transformers/bar.py +68 -0
- bidviz/transformers/heatmap.py +116 -0
- bidviz/transformers/kpi.py +60 -0
- bidviz/transformers/line.py +126 -0
- bidviz/transformers/other.py +108 -0
- bidviz/transformers/pie.py +48 -0
- bidviz/transformers/table.py +48 -0
- bidviz/utils.py +187 -0
- bidviz-1.0.0.dist-info/METADATA +425 -0
- bidviz-1.0.0.dist-info/RECORD +19 -0
- bidviz-1.0.0.dist-info/WHEEL +5 -0
- bidviz-1.0.0.dist-info/licenses/LICENSE +21 -0
- bidviz-1.0.0.dist-info/top_level.txt +1 -0
|
@@ -0,0 +1,60 @@
|
|
|
1
|
+
"""KPI Cards transformer."""
|
|
2
|
+
|
|
3
|
+
from typing import Any, Dict
|
|
4
|
+
|
|
5
|
+
import pandas as pd
|
|
6
|
+
|
|
7
|
+
from bidviz.core.base import BaseChartTransformer
|
|
8
|
+
from bidviz.exceptions import TransformationError
|
|
9
|
+
from bidviz.utils import format_label, safe_get_value
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
class KPICardsTransformer(BaseChartTransformer):
|
|
13
|
+
"""Transform single-row DataFrame into KPI cards."""
|
|
14
|
+
|
|
15
|
+
def transform(self, df: pd.DataFrame) -> Dict[str, Any]:
|
|
16
|
+
"""
|
|
17
|
+
Transform a single-row DataFrame into KPI cards for dashboard metrics.
|
|
18
|
+
|
|
19
|
+
Args:
|
|
20
|
+
df: Single-row DataFrame containing metrics
|
|
21
|
+
|
|
22
|
+
Returns:
|
|
23
|
+
Dict with chart_type='kpi_cards' and list of card data
|
|
24
|
+
|
|
25
|
+
Raises:
|
|
26
|
+
TransformationError: If DataFrame has more than one row
|
|
27
|
+
"""
|
|
28
|
+
try:
|
|
29
|
+
if len(df) == 0:
|
|
30
|
+
return {"chart_type": "kpi_cards", "data": []}
|
|
31
|
+
|
|
32
|
+
if len(df) > 1:
|
|
33
|
+
raise TransformationError(
|
|
34
|
+
"KPI cards expect a single-row DataFrame",
|
|
35
|
+
chart_type="kpi_cards",
|
|
36
|
+
df_shape=df.shape,
|
|
37
|
+
)
|
|
38
|
+
|
|
39
|
+
row = df.iloc[0]
|
|
40
|
+
cards = []
|
|
41
|
+
|
|
42
|
+
for column in df.columns:
|
|
43
|
+
cards.append(
|
|
44
|
+
{
|
|
45
|
+
"key": column,
|
|
46
|
+
"label": format_label(column),
|
|
47
|
+
"value": safe_get_value(row[column]),
|
|
48
|
+
}
|
|
49
|
+
)
|
|
50
|
+
|
|
51
|
+
return {"chart_type": "kpi_cards", "data": cards}
|
|
52
|
+
|
|
53
|
+
except Exception as e:
|
|
54
|
+
if isinstance(e, TransformationError):
|
|
55
|
+
raise
|
|
56
|
+
raise TransformationError(
|
|
57
|
+
f"Failed to transform KPI cards: {str(e)}",
|
|
58
|
+
chart_type="kpi_cards",
|
|
59
|
+
df_shape=df.shape,
|
|
60
|
+
)
|
|
@@ -0,0 +1,126 @@
|
|
|
1
|
+
"""Line chart transformers."""
|
|
2
|
+
|
|
3
|
+
from typing import Any, Dict, List, Optional
|
|
4
|
+
|
|
5
|
+
import pandas as pd
|
|
6
|
+
|
|
7
|
+
from bidviz.core.base import BaseChartTransformer
|
|
8
|
+
from bidviz.exceptions import TransformationError
|
|
9
|
+
from bidviz.utils import format_label, safe_get_value, validate_columns
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
class LineChartTransformer(BaseChartTransformer):
|
|
13
|
+
"""Transform DataFrame into line chart data."""
|
|
14
|
+
|
|
15
|
+
def transform(
|
|
16
|
+
self,
|
|
17
|
+
df: pd.DataFrame,
|
|
18
|
+
x_column: str,
|
|
19
|
+
y_column: str,
|
|
20
|
+
series_name: Optional[str] = None,
|
|
21
|
+
) -> Dict[str, Any]:
|
|
22
|
+
"""
|
|
23
|
+
Transform DataFrame into line chart data for time series or trends.
|
|
24
|
+
|
|
25
|
+
Args:
|
|
26
|
+
df: DataFrame containing the data
|
|
27
|
+
x_column: Column name for x-axis
|
|
28
|
+
y_column: Column name for y-axis
|
|
29
|
+
series_name: Optional custom name for the data series
|
|
30
|
+
|
|
31
|
+
Returns:
|
|
32
|
+
Dict with chart_type='line_chart', data points, and labels
|
|
33
|
+
"""
|
|
34
|
+
try:
|
|
35
|
+
validate_columns(df, [x_column, y_column])
|
|
36
|
+
|
|
37
|
+
data = []
|
|
38
|
+
for _, row in df.iterrows():
|
|
39
|
+
data.append(
|
|
40
|
+
{
|
|
41
|
+
"x": str(safe_get_value(row[x_column])),
|
|
42
|
+
"y": safe_get_value(row[y_column]),
|
|
43
|
+
}
|
|
44
|
+
)
|
|
45
|
+
|
|
46
|
+
return {
|
|
47
|
+
"chart_type": "line_chart",
|
|
48
|
+
"data": data,
|
|
49
|
+
"series_name": series_name or format_label(y_column),
|
|
50
|
+
"x_label": format_label(x_column),
|
|
51
|
+
"y_label": format_label(y_column),
|
|
52
|
+
}
|
|
53
|
+
|
|
54
|
+
except ValueError as e:
|
|
55
|
+
raise TransformationError(str(e), chart_type="line_chart", df_shape=df.shape)
|
|
56
|
+
except Exception as e:
|
|
57
|
+
raise TransformationError(
|
|
58
|
+
f"Failed to transform line chart: {str(e)}",
|
|
59
|
+
chart_type="line_chart",
|
|
60
|
+
df_shape=df.shape,
|
|
61
|
+
)
|
|
62
|
+
|
|
63
|
+
|
|
64
|
+
class MultiLineChartTransformer(BaseChartTransformer):
|
|
65
|
+
"""Transform DataFrame into multi-line chart data."""
|
|
66
|
+
|
|
67
|
+
def transform(
|
|
68
|
+
self,
|
|
69
|
+
df: pd.DataFrame,
|
|
70
|
+
x_column: str,
|
|
71
|
+
y_columns: List[str],
|
|
72
|
+
series_names: Optional[List[str]] = None,
|
|
73
|
+
) -> Dict[str, Any]:
|
|
74
|
+
"""
|
|
75
|
+
Transform DataFrame into multi-line chart for comparing multiple series.
|
|
76
|
+
|
|
77
|
+
Args:
|
|
78
|
+
df: DataFrame containing the data
|
|
79
|
+
x_column: Column name for x-axis
|
|
80
|
+
y_columns: List of column names for y-axis
|
|
81
|
+
series_names: Optional custom names for each series
|
|
82
|
+
|
|
83
|
+
Returns:
|
|
84
|
+
Dict with chart_type='multi_line_chart' and series data
|
|
85
|
+
"""
|
|
86
|
+
try:
|
|
87
|
+
validate_columns(df, [x_column] + y_columns)
|
|
88
|
+
|
|
89
|
+
if series_names and len(series_names) != len(y_columns):
|
|
90
|
+
raise TransformationError(
|
|
91
|
+
"Number of series_names must match number of y_columns",
|
|
92
|
+
chart_type="multi_line_chart",
|
|
93
|
+
)
|
|
94
|
+
|
|
95
|
+
series = []
|
|
96
|
+
for idx, y_col in enumerate(y_columns):
|
|
97
|
+
data = []
|
|
98
|
+
for _, row in df.iterrows():
|
|
99
|
+
data.append(
|
|
100
|
+
{
|
|
101
|
+
"x": str(safe_get_value(row[x_column])),
|
|
102
|
+
"y": safe_get_value(row[y_col]),
|
|
103
|
+
}
|
|
104
|
+
)
|
|
105
|
+
|
|
106
|
+
series.append(
|
|
107
|
+
{
|
|
108
|
+
"name": series_names[idx] if series_names else format_label(y_col),
|
|
109
|
+
"data": data,
|
|
110
|
+
}
|
|
111
|
+
)
|
|
112
|
+
|
|
113
|
+
return {
|
|
114
|
+
"chart_type": "multi_line_chart",
|
|
115
|
+
"series": series,
|
|
116
|
+
"x_label": format_label(x_column),
|
|
117
|
+
}
|
|
118
|
+
|
|
119
|
+
except ValueError as e:
|
|
120
|
+
raise TransformationError(str(e), chart_type="multi_line_chart", df_shape=df.shape)
|
|
121
|
+
except Exception as e:
|
|
122
|
+
raise TransformationError(
|
|
123
|
+
f"Failed to transform multi-line chart: {str(e)}",
|
|
124
|
+
chart_type="multi_line_chart",
|
|
125
|
+
df_shape=df.shape,
|
|
126
|
+
)
|
|
@@ -0,0 +1,108 @@
|
|
|
1
|
+
"""Funnel and stacked bar chart transformers."""
|
|
2
|
+
|
|
3
|
+
from typing import Any, Dict, List, Optional
|
|
4
|
+
|
|
5
|
+
import pandas as pd
|
|
6
|
+
|
|
7
|
+
from bidviz.core.base import BaseChartTransformer
|
|
8
|
+
from bidviz.exceptions import TransformationError
|
|
9
|
+
from bidviz.utils import format_label, safe_get_value, validate_columns
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
class FunnelChartTransformer(BaseChartTransformer):
|
|
13
|
+
"""Transform DataFrame into funnel chart data."""
|
|
14
|
+
|
|
15
|
+
def transform(self, df: pd.DataFrame, stage_column: str, value_column: str) -> Dict[str, Any]:
|
|
16
|
+
"""
|
|
17
|
+
Transform DataFrame into funnel chart data for conversion pipelines.
|
|
18
|
+
|
|
19
|
+
Args:
|
|
20
|
+
df: DataFrame containing the data
|
|
21
|
+
stage_column: Column name for funnel stages
|
|
22
|
+
value_column: Column name for stage values
|
|
23
|
+
|
|
24
|
+
Returns:
|
|
25
|
+
Dict with chart_type='funnel_chart' and data points
|
|
26
|
+
"""
|
|
27
|
+
try:
|
|
28
|
+
validate_columns(df, [stage_column, value_column])
|
|
29
|
+
|
|
30
|
+
data = []
|
|
31
|
+
for _, row in df.iterrows():
|
|
32
|
+
data.append(
|
|
33
|
+
{
|
|
34
|
+
"stage": str(safe_get_value(row[stage_column])),
|
|
35
|
+
"value": safe_get_value(row[value_column]),
|
|
36
|
+
}
|
|
37
|
+
)
|
|
38
|
+
|
|
39
|
+
return {"chart_type": "funnel_chart", "data": data}
|
|
40
|
+
|
|
41
|
+
except ValueError as e:
|
|
42
|
+
raise TransformationError(str(e), chart_type="funnel_chart", df_shape=df.shape)
|
|
43
|
+
except Exception as e:
|
|
44
|
+
raise TransformationError(
|
|
45
|
+
f"Failed to transform funnel chart: {str(e)}",
|
|
46
|
+
chart_type="funnel_chart",
|
|
47
|
+
df_shape=df.shape,
|
|
48
|
+
)
|
|
49
|
+
|
|
50
|
+
|
|
51
|
+
class StackedBarChartTransformer(BaseChartTransformer):
|
|
52
|
+
"""Transform DataFrame into stacked bar chart data."""
|
|
53
|
+
|
|
54
|
+
def transform(
|
|
55
|
+
self,
|
|
56
|
+
df: pd.DataFrame,
|
|
57
|
+
x_column: str,
|
|
58
|
+
y_columns: List[str],
|
|
59
|
+
category_names: Optional[List[str]] = None,
|
|
60
|
+
) -> Dict[str, Any]:
|
|
61
|
+
"""
|
|
62
|
+
Transform DataFrame into stacked bar chart for composed comparisons.
|
|
63
|
+
|
|
64
|
+
Args:
|
|
65
|
+
df: DataFrame containing the data
|
|
66
|
+
x_column: Column name for x-axis
|
|
67
|
+
y_columns: List of column names for stacked values
|
|
68
|
+
category_names: Optional custom names for each stack
|
|
69
|
+
|
|
70
|
+
Returns:
|
|
71
|
+
Dict with chart_type='stacked_bar_chart' and data
|
|
72
|
+
"""
|
|
73
|
+
try:
|
|
74
|
+
validate_columns(df, [x_column] + y_columns)
|
|
75
|
+
|
|
76
|
+
if category_names and len(category_names) != len(y_columns):
|
|
77
|
+
raise TransformationError(
|
|
78
|
+
"Number of category_names must match number of y_columns",
|
|
79
|
+
chart_type="stacked_bar_chart",
|
|
80
|
+
)
|
|
81
|
+
|
|
82
|
+
data = []
|
|
83
|
+
for _, row in df.iterrows():
|
|
84
|
+
point = {"x": str(safe_get_value(row[x_column]))}
|
|
85
|
+
for y_col in y_columns:
|
|
86
|
+
point[y_col] = safe_get_value(row[y_col])
|
|
87
|
+
data.append(point)
|
|
88
|
+
|
|
89
|
+
categories = [
|
|
90
|
+
category_names[i] if category_names else format_label(y_col)
|
|
91
|
+
for i, y_col in enumerate(y_columns)
|
|
92
|
+
]
|
|
93
|
+
|
|
94
|
+
return {
|
|
95
|
+
"chart_type": "stacked_bar_chart",
|
|
96
|
+
"data": data,
|
|
97
|
+
"categories": categories,
|
|
98
|
+
"x_label": format_label(x_column),
|
|
99
|
+
}
|
|
100
|
+
|
|
101
|
+
except ValueError as e:
|
|
102
|
+
raise TransformationError(str(e), chart_type="stacked_bar_chart", df_shape=df.shape)
|
|
103
|
+
except Exception as e:
|
|
104
|
+
raise TransformationError(
|
|
105
|
+
f"Failed to transform stacked bar chart: {str(e)}",
|
|
106
|
+
chart_type="stacked_bar_chart",
|
|
107
|
+
df_shape=df.shape,
|
|
108
|
+
)
|
|
@@ -0,0 +1,48 @@
|
|
|
1
|
+
"""Pie and donut chart transformers."""
|
|
2
|
+
|
|
3
|
+
from typing import Any, Dict
|
|
4
|
+
|
|
5
|
+
import pandas as pd
|
|
6
|
+
|
|
7
|
+
from bidviz.core.base import BaseChartTransformer
|
|
8
|
+
from bidviz.exceptions import TransformationError
|
|
9
|
+
from bidviz.utils import safe_get_value, validate_columns
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
class PieChartTransformer(BaseChartTransformer):
|
|
13
|
+
"""Transform DataFrame into pie chart data."""
|
|
14
|
+
|
|
15
|
+
def transform(self, df: pd.DataFrame, label_column: str, value_column: str) -> Dict[str, Any]:
|
|
16
|
+
"""
|
|
17
|
+
Transform DataFrame into pie chart data for part-to-whole relationships.
|
|
18
|
+
|
|
19
|
+
Args:
|
|
20
|
+
df: DataFrame containing the data
|
|
21
|
+
label_column: Column name for slice labels
|
|
22
|
+
value_column: Column name for slice values
|
|
23
|
+
|
|
24
|
+
Returns:
|
|
25
|
+
Dict with chart_type='pie_chart' and data points
|
|
26
|
+
"""
|
|
27
|
+
try:
|
|
28
|
+
validate_columns(df, [label_column, value_column])
|
|
29
|
+
|
|
30
|
+
data = []
|
|
31
|
+
for _, row in df.iterrows():
|
|
32
|
+
data.append(
|
|
33
|
+
{
|
|
34
|
+
"label": str(safe_get_value(row[label_column])),
|
|
35
|
+
"value": safe_get_value(row[value_column]),
|
|
36
|
+
}
|
|
37
|
+
)
|
|
38
|
+
|
|
39
|
+
return {"chart_type": "pie_chart", "data": data}
|
|
40
|
+
|
|
41
|
+
except ValueError as e:
|
|
42
|
+
raise TransformationError(str(e), chart_type="pie_chart", df_shape=df.shape)
|
|
43
|
+
except Exception as e:
|
|
44
|
+
raise TransformationError(
|
|
45
|
+
f"Failed to transform pie chart: {str(e)}",
|
|
46
|
+
chart_type="pie_chart",
|
|
47
|
+
df_shape=df.shape,
|
|
48
|
+
)
|
|
@@ -0,0 +1,48 @@
|
|
|
1
|
+
"""Data table transformer."""
|
|
2
|
+
|
|
3
|
+
from typing import Any, Dict
|
|
4
|
+
|
|
5
|
+
import pandas as pd
|
|
6
|
+
|
|
7
|
+
from bidviz.core.base import BaseChartTransformer
|
|
8
|
+
from bidviz.exceptions import TransformationError
|
|
9
|
+
from bidviz.utils import format_label, paginate_dataframe, safe_get_value
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
class DataTableTransformer(BaseChartTransformer):
|
|
13
|
+
"""Transform DataFrame into paginated data table."""
|
|
14
|
+
|
|
15
|
+
def transform(self, df: pd.DataFrame, page: int = 1, page_size: int = 50) -> Dict[str, Any]:
|
|
16
|
+
"""
|
|
17
|
+
Transform DataFrame into paginated data table structure.
|
|
18
|
+
|
|
19
|
+
Args:
|
|
20
|
+
df: DataFrame containing the data
|
|
21
|
+
page: Page number (1-indexed)
|
|
22
|
+
page_size: Number of rows per page
|
|
23
|
+
|
|
24
|
+
Returns:
|
|
25
|
+
Dict with chart_type='data_table', columns, rows, and pagination
|
|
26
|
+
"""
|
|
27
|
+
try:
|
|
28
|
+
paginated_df, metadata = paginate_dataframe(df, page, page_size)
|
|
29
|
+
|
|
30
|
+
columns = []
|
|
31
|
+
for col in df.columns:
|
|
32
|
+
columns.append({"key": col, "label": format_label(col)})
|
|
33
|
+
|
|
34
|
+
rows = []
|
|
35
|
+
for _, row in paginated_df.iterrows():
|
|
36
|
+
row_data = {}
|
|
37
|
+
for col in df.columns:
|
|
38
|
+
row_data[col] = safe_get_value(row[col])
|
|
39
|
+
rows.append(row_data)
|
|
40
|
+
|
|
41
|
+
return {"chart_type": "data_table", "columns": columns, "rows": rows, **metadata}
|
|
42
|
+
|
|
43
|
+
except Exception as e:
|
|
44
|
+
raise TransformationError(
|
|
45
|
+
f"Failed to transform data table: {str(e)}",
|
|
46
|
+
chart_type="data_table",
|
|
47
|
+
df_shape=df.shape,
|
|
48
|
+
)
|
bidviz/utils.py
ADDED
|
@@ -0,0 +1,187 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Utility functions for data transformation and formatting.
|
|
3
|
+
"""
|
|
4
|
+
|
|
5
|
+
from typing import Any, List, Optional
|
|
6
|
+
|
|
7
|
+
import numpy as np
|
|
8
|
+
import pandas as pd
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
def safe_get_value(value: Any) -> Any:
|
|
12
|
+
"""
|
|
13
|
+
Safely extract a value from pandas objects, converting NaN to None.
|
|
14
|
+
|
|
15
|
+
Args:
|
|
16
|
+
value: Value to extract (can be pandas scalar, numpy type, or Python type)
|
|
17
|
+
|
|
18
|
+
Returns:
|
|
19
|
+
Python-native value with NaN converted to None
|
|
20
|
+
|
|
21
|
+
Examples:
|
|
22
|
+
>>> safe_get_value(pd.NA)
|
|
23
|
+
None
|
|
24
|
+
>>> safe_get_value(np.nan)
|
|
25
|
+
None
|
|
26
|
+
>>> safe_get_value(42)
|
|
27
|
+
42
|
|
28
|
+
"""
|
|
29
|
+
if pd.isna(value):
|
|
30
|
+
return None
|
|
31
|
+
if isinstance(value, (np.integer, np.floating)):
|
|
32
|
+
return float(value) if isinstance(value, np.floating) else int(value)
|
|
33
|
+
if isinstance(value, np.bool_):
|
|
34
|
+
return bool(value)
|
|
35
|
+
if isinstance(value, (pd.Timestamp, np.datetime64)):
|
|
36
|
+
return str(value)
|
|
37
|
+
return value
|
|
38
|
+
|
|
39
|
+
|
|
40
|
+
def format_label(column_name: str) -> str:
|
|
41
|
+
"""
|
|
42
|
+
Convert snake_case column name to Title Case label.
|
|
43
|
+
|
|
44
|
+
Args:
|
|
45
|
+
column_name: Column name in snake_case format
|
|
46
|
+
|
|
47
|
+
Returns:
|
|
48
|
+
Formatted label in Title Case
|
|
49
|
+
|
|
50
|
+
Examples:
|
|
51
|
+
>>> format_label('total_gmv')
|
|
52
|
+
'Total Gmv'
|
|
53
|
+
>>> format_label('customer_id')
|
|
54
|
+
'Customer Id'
|
|
55
|
+
>>> format_label('avg_days_to_ship')
|
|
56
|
+
'Avg Days To Ship'
|
|
57
|
+
"""
|
|
58
|
+
return column_name.replace("_", " ").title()
|
|
59
|
+
|
|
60
|
+
|
|
61
|
+
def validate_columns(df: pd.DataFrame, required_columns: List[str]) -> None:
|
|
62
|
+
"""
|
|
63
|
+
Validate that required columns exist in the DataFrame.
|
|
64
|
+
|
|
65
|
+
Args:
|
|
66
|
+
df: DataFrame to validate
|
|
67
|
+
required_columns: List of required column names
|
|
68
|
+
|
|
69
|
+
Raises:
|
|
70
|
+
ValueError: If any required columns are missing
|
|
71
|
+
|
|
72
|
+
Examples:
|
|
73
|
+
>>> df = pd.DataFrame({'a': [1, 2], 'b': [3, 4]})
|
|
74
|
+
>>> validate_columns(df, ['a', 'b']) # No error
|
|
75
|
+
>>> validate_columns(df, ['a', 'c']) # Raises ValueError
|
|
76
|
+
Traceback (most recent call last):
|
|
77
|
+
...
|
|
78
|
+
ValueError: Missing required columns: c
|
|
79
|
+
"""
|
|
80
|
+
missing = [col for col in required_columns if col not in df.columns]
|
|
81
|
+
if missing:
|
|
82
|
+
raise ValueError(f"Missing required columns: {', '.join(missing)}")
|
|
83
|
+
|
|
84
|
+
|
|
85
|
+
def safe_convert_to_numeric(series: pd.Series) -> pd.Series:
|
|
86
|
+
"""
|
|
87
|
+
Safely convert a pandas Series to numeric type.
|
|
88
|
+
|
|
89
|
+
Args:
|
|
90
|
+
series: Series to convert
|
|
91
|
+
|
|
92
|
+
Returns:
|
|
93
|
+
Numeric series with errors coerced to NaN
|
|
94
|
+
|
|
95
|
+
Examples:
|
|
96
|
+
>>> s = pd.Series(['1', '2', 'abc'])
|
|
97
|
+
>>> safe_convert_to_numeric(s)
|
|
98
|
+
0 1.0
|
|
99
|
+
1 2.0
|
|
100
|
+
2 NaN
|
|
101
|
+
dtype: float64
|
|
102
|
+
"""
|
|
103
|
+
return pd.to_numeric(series, errors="coerce")
|
|
104
|
+
|
|
105
|
+
|
|
106
|
+
def clean_dataframe(df: pd.DataFrame) -> pd.DataFrame:
|
|
107
|
+
"""
|
|
108
|
+
Clean DataFrame column names by converting to lowercase and replacing spaces.
|
|
109
|
+
|
|
110
|
+
Args:
|
|
111
|
+
df: DataFrame to clean
|
|
112
|
+
|
|
113
|
+
Returns:
|
|
114
|
+
DataFrame with cleaned column names
|
|
115
|
+
|
|
116
|
+
Examples:
|
|
117
|
+
>>> df = pd.DataFrame({'Total GMV': [100], 'Customer Name': ['John']})
|
|
118
|
+
>>> clean_df = clean_dataframe(df)
|
|
119
|
+
>>> list(clean_df.columns)
|
|
120
|
+
['total_gmv', 'customer_name']
|
|
121
|
+
"""
|
|
122
|
+
df = df.copy()
|
|
123
|
+
df.columns = df.columns.str.lower().str.replace(" ", "_")
|
|
124
|
+
return df
|
|
125
|
+
|
|
126
|
+
|
|
127
|
+
def get_numeric_columns(df: pd.DataFrame) -> List[str]:
|
|
128
|
+
"""
|
|
129
|
+
Get list of numeric column names from DataFrame.
|
|
130
|
+
|
|
131
|
+
Args:
|
|
132
|
+
df: DataFrame to analyze
|
|
133
|
+
|
|
134
|
+
Returns:
|
|
135
|
+
List of numeric column names
|
|
136
|
+
|
|
137
|
+
Examples:
|
|
138
|
+
>>> df = pd.DataFrame({'a': [1, 2], 'b': ['x', 'y'], 'c': [1.5, 2.5]})
|
|
139
|
+
>>> get_numeric_columns(df)
|
|
140
|
+
['a', 'c']
|
|
141
|
+
"""
|
|
142
|
+
return df.select_dtypes(include=[np.number]).columns.tolist()
|
|
143
|
+
|
|
144
|
+
|
|
145
|
+
def paginate_dataframe(
|
|
146
|
+
df: pd.DataFrame, page: int = 1, page_size: int = 50
|
|
147
|
+
) -> tuple[pd.DataFrame, dict]:
|
|
148
|
+
"""
|
|
149
|
+
Paginate a DataFrame and return pagination metadata.
|
|
150
|
+
|
|
151
|
+
Args:
|
|
152
|
+
df: DataFrame to paginate
|
|
153
|
+
page: Page number (1-indexed)
|
|
154
|
+
page_size: Number of rows per page
|
|
155
|
+
|
|
156
|
+
Returns:
|
|
157
|
+
Tuple of (paginated DataFrame, pagination metadata dict)
|
|
158
|
+
|
|
159
|
+
Examples:
|
|
160
|
+
>>> df = pd.DataFrame({'a': range(100)})
|
|
161
|
+
>>> page_df, meta = paginate_dataframe(df, page=2, page_size=25)
|
|
162
|
+
>>> len(page_df)
|
|
163
|
+
25
|
|
164
|
+
>>> meta['total']
|
|
165
|
+
100
|
|
166
|
+
>>> meta['page']
|
|
167
|
+
2
|
|
168
|
+
"""
|
|
169
|
+
total = len(df)
|
|
170
|
+
total_pages = (total + page_size - 1) // page_size # Ceiling division
|
|
171
|
+
|
|
172
|
+
# Ensure page is within valid range
|
|
173
|
+
page = max(1, min(page, total_pages if total_pages > 0 else 1))
|
|
174
|
+
|
|
175
|
+
start_idx = (page - 1) * page_size
|
|
176
|
+
end_idx = start_idx + page_size
|
|
177
|
+
|
|
178
|
+
paginated_df = df.iloc[start_idx:end_idx]
|
|
179
|
+
|
|
180
|
+
metadata = {
|
|
181
|
+
"total": total,
|
|
182
|
+
"page": page,
|
|
183
|
+
"page_size": page_size,
|
|
184
|
+
"total_pages": total_pages,
|
|
185
|
+
}
|
|
186
|
+
|
|
187
|
+
return paginated_df, metadata
|