createsonline 0.1.26__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- createsonline/__init__.py +46 -0
- createsonline/admin/__init__.py +7 -0
- createsonline/admin/content.py +526 -0
- createsonline/admin/crud.py +805 -0
- createsonline/admin/field_builder.py +559 -0
- createsonline/admin/integration.py +482 -0
- createsonline/admin/interface.py +2562 -0
- createsonline/admin/model_creator.py +513 -0
- createsonline/admin/model_manager.py +388 -0
- createsonline/admin/modern_dashboard.py +498 -0
- createsonline/admin/permissions.py +264 -0
- createsonline/admin/user_forms.py +594 -0
- createsonline/ai/__init__.py +202 -0
- createsonline/ai/fields.py +1226 -0
- createsonline/ai/orm.py +325 -0
- createsonline/ai/services.py +1244 -0
- createsonline/app.py +506 -0
- createsonline/auth/__init__.py +8 -0
- createsonline/auth/management.py +228 -0
- createsonline/auth/models.py +552 -0
- createsonline/cli/__init__.py +5 -0
- createsonline/cli/commands/__init__.py +122 -0
- createsonline/cli/commands/database.py +416 -0
- createsonline/cli/commands/info.py +173 -0
- createsonline/cli/commands/initdb.py +218 -0
- createsonline/cli/commands/project.py +545 -0
- createsonline/cli/commands/serve.py +173 -0
- createsonline/cli/commands/shell.py +93 -0
- createsonline/cli/commands/users.py +148 -0
- createsonline/cli/main.py +2041 -0
- createsonline/cli/manage.py +274 -0
- createsonline/config/__init__.py +9 -0
- createsonline/config/app.py +2577 -0
- createsonline/config/database.py +179 -0
- createsonline/config/docs.py +384 -0
- createsonline/config/errors.py +160 -0
- createsonline/config/orm.py +43 -0
- createsonline/config/request.py +93 -0
- createsonline/config/settings.py +176 -0
- createsonline/data/__init__.py +23 -0
- createsonline/data/dataframe.py +925 -0
- createsonline/data/io.py +453 -0
- createsonline/data/series.py +557 -0
- createsonline/database/__init__.py +60 -0
- createsonline/database/abstraction.py +440 -0
- createsonline/database/assistant.py +585 -0
- createsonline/database/fields.py +442 -0
- createsonline/database/migrations.py +132 -0
- createsonline/database/models.py +604 -0
- createsonline/database.py +438 -0
- createsonline/http/__init__.py +28 -0
- createsonline/http/client.py +535 -0
- createsonline/ml/__init__.py +55 -0
- createsonline/ml/classification.py +552 -0
- createsonline/ml/clustering.py +680 -0
- createsonline/ml/metrics.py +542 -0
- createsonline/ml/neural.py +560 -0
- createsonline/ml/preprocessing.py +784 -0
- createsonline/ml/regression.py +501 -0
- createsonline/performance/__init__.py +19 -0
- createsonline/performance/cache.py +444 -0
- createsonline/performance/compression.py +335 -0
- createsonline/performance/core.py +419 -0
- createsonline/project_init.py +789 -0
- createsonline/routing.py +528 -0
- createsonline/security/__init__.py +34 -0
- createsonline/security/core.py +811 -0
- createsonline/security/encryption.py +349 -0
- createsonline/server.py +295 -0
- createsonline/static/css/admin.css +263 -0
- createsonline/static/css/common.css +358 -0
- createsonline/static/css/dashboard.css +89 -0
- createsonline/static/favicon.ico +0 -0
- createsonline/static/icons/icon-128x128.png +0 -0
- createsonline/static/icons/icon-128x128.webp +0 -0
- createsonline/static/icons/icon-16x16.png +0 -0
- createsonline/static/icons/icon-16x16.webp +0 -0
- createsonline/static/icons/icon-180x180.png +0 -0
- createsonline/static/icons/icon-180x180.webp +0 -0
- createsonline/static/icons/icon-192x192.png +0 -0
- createsonline/static/icons/icon-192x192.webp +0 -0
- createsonline/static/icons/icon-256x256.png +0 -0
- createsonline/static/icons/icon-256x256.webp +0 -0
- createsonline/static/icons/icon-32x32.png +0 -0
- createsonline/static/icons/icon-32x32.webp +0 -0
- createsonline/static/icons/icon-384x384.png +0 -0
- createsonline/static/icons/icon-384x384.webp +0 -0
- createsonline/static/icons/icon-48x48.png +0 -0
- createsonline/static/icons/icon-48x48.webp +0 -0
- createsonline/static/icons/icon-512x512.png +0 -0
- createsonline/static/icons/icon-512x512.webp +0 -0
- createsonline/static/icons/icon-64x64.png +0 -0
- createsonline/static/icons/icon-64x64.webp +0 -0
- createsonline/static/image/android-chrome-192x192.png +0 -0
- createsonline/static/image/android-chrome-512x512.png +0 -0
- createsonline/static/image/apple-touch-icon.png +0 -0
- createsonline/static/image/favicon-16x16.png +0 -0
- createsonline/static/image/favicon-32x32.png +0 -0
- createsonline/static/image/favicon.ico +0 -0
- createsonline/static/image/favicon.svg +17 -0
- createsonline/static/image/icon-128x128.png +0 -0
- createsonline/static/image/icon-128x128.webp +0 -0
- createsonline/static/image/icon-16x16.png +0 -0
- createsonline/static/image/icon-16x16.webp +0 -0
- createsonline/static/image/icon-180x180.png +0 -0
- createsonline/static/image/icon-180x180.webp +0 -0
- createsonline/static/image/icon-192x192.png +0 -0
- createsonline/static/image/icon-192x192.webp +0 -0
- createsonline/static/image/icon-256x256.png +0 -0
- createsonline/static/image/icon-256x256.webp +0 -0
- createsonline/static/image/icon-32x32.png +0 -0
- createsonline/static/image/icon-32x32.webp +0 -0
- createsonline/static/image/icon-384x384.png +0 -0
- createsonline/static/image/icon-384x384.webp +0 -0
- createsonline/static/image/icon-48x48.png +0 -0
- createsonline/static/image/icon-48x48.webp +0 -0
- createsonline/static/image/icon-512x512.png +0 -0
- createsonline/static/image/icon-512x512.webp +0 -0
- createsonline/static/image/icon-64x64.png +0 -0
- createsonline/static/image/icon-64x64.webp +0 -0
- createsonline/static/image/logo-header-h100.png +0 -0
- createsonline/static/image/logo-header-h100.webp +0 -0
- createsonline/static/image/logo-header-h200@2x.png +0 -0
- createsonline/static/image/logo-header-h200@2x.webp +0 -0
- createsonline/static/image/logo.png +0 -0
- createsonline/static/js/admin.js +274 -0
- createsonline/static/site.webmanifest +35 -0
- createsonline/static/templates/admin/base.html +87 -0
- createsonline/static/templates/admin/dashboard.html +217 -0
- createsonline/static/templates/admin/model_form.html +270 -0
- createsonline/static/templates/admin/model_list.html +202 -0
- createsonline/static/test_script.js +15 -0
- createsonline/static/test_styles.css +59 -0
- createsonline/static_files.py +365 -0
- createsonline/templates/404.html +100 -0
- createsonline/templates/admin_login.html +169 -0
- createsonline/templates/base.html +102 -0
- createsonline/templates/index.html +151 -0
- createsonline/templates.py +205 -0
- createsonline/testing.py +322 -0
- createsonline/utils.py +448 -0
- createsonline/validation/__init__.py +49 -0
- createsonline/validation/fields.py +598 -0
- createsonline/validation/models.py +504 -0
- createsonline/validation/validators.py +561 -0
- createsonline/views.py +184 -0
- createsonline-0.1.26.dist-info/METADATA +46 -0
- createsonline-0.1.26.dist-info/RECORD +152 -0
- createsonline-0.1.26.dist-info/WHEEL +5 -0
- createsonline-0.1.26.dist-info/entry_points.txt +2 -0
- createsonline-0.1.26.dist-info/licenses/LICENSE +21 -0
- createsonline-0.1.26.dist-info/top_level.txt +1 -0
|
@@ -0,0 +1,925 @@
|
|
|
1
|
+
"""
|
|
2
|
+
CREATESONLINE DataFrame Implementation
|
|
3
|
+
|
|
4
|
+
Pure Python dataframe data structure.
|
|
5
|
+
Lightweight alternative to Pandas DataFrame.
|
|
6
|
+
"""
|
|
7
|
+
|
|
8
|
+
import json
|
|
9
|
+
from typing import Any, Dict, List, Optional, Union, Iterator, Callable, Tuple
|
|
10
|
+
from .series import CreatesonlineSeries
|
|
11
|
+
|
|
12
|
+
# Optional numpy import with fallback
|
|
13
|
+
try:
|
|
14
|
+
import numpy as np
|
|
15
|
+
NUMPY_AVAILABLE = True
|
|
16
|
+
NDArrayType = np.ndarray
|
|
17
|
+
except ImportError:
|
|
18
|
+
NUMPY_AVAILABLE = False
|
|
19
|
+
np = None
|
|
20
|
+
NDArrayType = Any # Fallback type
|
|
21
|
+
|
|
22
|
+
|
|
23
|
+
class CreatesonlineDataFrame:
|
|
24
|
+
"""
|
|
25
|
+
CREATESONLINE DataFrame - Two-dimensional data structure
|
|
26
|
+
|
|
27
|
+
Pure Python implementation of a dataframe similar to Pandas DataFrame
|
|
28
|
+
but with zero external dependencies (except numpy for numerical operations).
|
|
29
|
+
"""
|
|
30
|
+
|
|
31
|
+
def __init__(
|
|
32
|
+
self,
|
|
33
|
+
data: Union[Dict[str, List[Any]], List[Dict[str, Any]], List[List[Any]]] = None,
|
|
34
|
+
columns: Optional[List[str]] = None,
|
|
35
|
+
index: Optional[List[str]] = None
|
|
36
|
+
):
|
|
37
|
+
"""
|
|
38
|
+
Initialize CREATESONLINE DataFrame
|
|
39
|
+
|
|
40
|
+
Args:
|
|
41
|
+
data: DataFrame data as dict of columns, list of rows, or list of lists
|
|
42
|
+
columns: Column names
|
|
43
|
+
index: Row index labels
|
|
44
|
+
"""
|
|
45
|
+
if data is None:
|
|
46
|
+
self._data = {}
|
|
47
|
+
self._columns = columns or []
|
|
48
|
+
self._index = index or []
|
|
49
|
+
elif isinstance(data, dict):
|
|
50
|
+
# Dict of {column: [values]}
|
|
51
|
+
self._columns = list(data.keys())
|
|
52
|
+
self._data = {col: list(values) for col, values in data.items()}
|
|
53
|
+
|
|
54
|
+
# Ensure all columns have same length
|
|
55
|
+
if self._columns:
|
|
56
|
+
expected_length = len(self._data[self._columns[0]])
|
|
57
|
+
for col in self._columns:
|
|
58
|
+
if len(self._data[col]) != expected_length:
|
|
59
|
+
raise ValueError(f"All columns must have same length. Column '{col}' has length {len(self._data[col])}, expected {expected_length}")
|
|
60
|
+
|
|
61
|
+
self._index = index or list(range(expected_length))
|
|
62
|
+
else:
|
|
63
|
+
self._index = []
|
|
64
|
+
elif isinstance(data, list) and data and isinstance(data[0], dict):
|
|
65
|
+
# List of {column: value} dicts (rows)
|
|
66
|
+
all_columns = set()
|
|
67
|
+
for row in data:
|
|
68
|
+
all_columns.update(row.keys())
|
|
69
|
+
|
|
70
|
+
self._columns = columns or sorted(list(all_columns))
|
|
71
|
+
self._data = {col: [] for col in self._columns}
|
|
72
|
+
|
|
73
|
+
for row in data:
|
|
74
|
+
for col in self._columns:
|
|
75
|
+
self._data[col].append(row.get(col, None))
|
|
76
|
+
|
|
77
|
+
self._index = index or list(range(len(data)))
|
|
78
|
+
elif isinstance(data, list) and data and isinstance(data[0], list):
|
|
79
|
+
# List of lists (rows)
|
|
80
|
+
if not columns:
|
|
81
|
+
raise ValueError("columns parameter required for list of lists")
|
|
82
|
+
|
|
83
|
+
self._columns = list(columns)
|
|
84
|
+
self._data = {col: [] for col in self._columns}
|
|
85
|
+
|
|
86
|
+
for row in data:
|
|
87
|
+
if len(row) != len(self._columns):
|
|
88
|
+
raise ValueError(f"Row length {len(row)} doesn't match columns length {len(self._columns)}")
|
|
89
|
+
|
|
90
|
+
for i, val in enumerate(row):
|
|
91
|
+
self._data[self._columns[i]].append(val)
|
|
92
|
+
|
|
93
|
+
self._index = index or list(range(len(data)))
|
|
94
|
+
else:
|
|
95
|
+
raise ValueError("Invalid data format")
|
|
96
|
+
|
|
97
|
+
# Ensure index length matches data length
|
|
98
|
+
if self._columns and len(self._index) != len(self._data[self._columns[0]]):
|
|
99
|
+
if index is None:
|
|
100
|
+
self._index = list(range(len(self._data[self._columns[0]])))
|
|
101
|
+
else:
|
|
102
|
+
raise ValueError("Index length must match data length")
|
|
103
|
+
|
|
104
|
+
def __len__(self) -> int:
|
|
105
|
+
"""Return number of rows"""
|
|
106
|
+
return len(self._index)
|
|
107
|
+
|
|
108
|
+
def __getitem__(self, key: Union[str, List[str], slice, int]) -> Union[CreatesonlineSeries, 'CreatesonlineDataFrame']:
|
|
109
|
+
"""Get column(s) or row(s)"""
|
|
110
|
+
if isinstance(key, str):
|
|
111
|
+
# Single column
|
|
112
|
+
if key not in self._columns:
|
|
113
|
+
raise KeyError(f"Column '{key}' not found")
|
|
114
|
+
return CreatesonlineSeries(
|
|
115
|
+
data=self._data[key],
|
|
116
|
+
index=self._index,
|
|
117
|
+
name=key
|
|
118
|
+
)
|
|
119
|
+
elif isinstance(key, list):
|
|
120
|
+
# Multiple columns
|
|
121
|
+
for col in key:
|
|
122
|
+
if col not in self._columns:
|
|
123
|
+
raise KeyError(f"Column '{col}' not found")
|
|
124
|
+
|
|
125
|
+
new_data = {col: self._data[col] for col in key}
|
|
126
|
+
return CreatesonlineDataFrame(
|
|
127
|
+
data=new_data,
|
|
128
|
+
index=self._index
|
|
129
|
+
)
|
|
130
|
+
elif isinstance(key, (int, slice)):
|
|
131
|
+
# Row(s) by position
|
|
132
|
+
if isinstance(key, int):
|
|
133
|
+
if key < 0:
|
|
134
|
+
key = len(self._index) + key
|
|
135
|
+
if not (0 <= key < len(self._index)):
|
|
136
|
+
raise IndexError("Row index out of range")
|
|
137
|
+
|
|
138
|
+
row_data = {col: self._data[col][key] for col in self._columns}
|
|
139
|
+
return CreatesonlineSeries(
|
|
140
|
+
data=list(row_data.values()),
|
|
141
|
+
index=self._columns,
|
|
142
|
+
name=self._index[key]
|
|
143
|
+
)
|
|
144
|
+
else:
|
|
145
|
+
# Slice
|
|
146
|
+
new_data = {col: self._data[col][key] for col in self._columns}
|
|
147
|
+
return CreatesonlineDataFrame(
|
|
148
|
+
data=new_data,
|
|
149
|
+
index=self._index[key]
|
|
150
|
+
)
|
|
151
|
+
else:
|
|
152
|
+
raise TypeError(f"Invalid key type: {type(key)}")
|
|
153
|
+
|
|
154
|
+
def __setitem__(self, key: str, value: Union[List[Any], CreatesonlineSeries, Any]):
|
|
155
|
+
"""Set column values"""
|
|
156
|
+
if isinstance(value, CreatesonlineSeries):
|
|
157
|
+
if len(value) != len(self._index):
|
|
158
|
+
raise ValueError("Series length must match DataFrame length")
|
|
159
|
+
self._data[key] = value.values
|
|
160
|
+
elif isinstance(value, list):
|
|
161
|
+
if len(value) != len(self._index):
|
|
162
|
+
raise ValueError("List length must match DataFrame length")
|
|
163
|
+
self._data[key] = list(value)
|
|
164
|
+
else:
|
|
165
|
+
# Scalar value - broadcast to all rows
|
|
166
|
+
self._data[key] = [value] * len(self._index)
|
|
167
|
+
|
|
168
|
+
if key not in self._columns:
|
|
169
|
+
self._columns.append(key)
|
|
170
|
+
|
|
171
|
+
def __delitem__(self, key: str):
|
|
172
|
+
"""Delete column"""
|
|
173
|
+
if key not in self._columns:
|
|
174
|
+
raise KeyError(f"Column '{key}' not found")
|
|
175
|
+
|
|
176
|
+
self._columns.remove(key)
|
|
177
|
+
del self._data[key]
|
|
178
|
+
|
|
179
|
+
def __iter__(self) -> Iterator[str]:
|
|
180
|
+
"""Iterate over column names"""
|
|
181
|
+
return iter(self._columns)
|
|
182
|
+
|
|
183
|
+
def __str__(self) -> str:
|
|
184
|
+
"""String representation"""
|
|
185
|
+
if not self._columns or not self._index:
|
|
186
|
+
return "Empty CreatesonlineDataFrame"
|
|
187
|
+
|
|
188
|
+
# Calculate column widths
|
|
189
|
+
col_widths = {}
|
|
190
|
+
for col in self._columns:
|
|
191
|
+
col_widths[col] = max(
|
|
192
|
+
len(str(col)),
|
|
193
|
+
max(len(str(val)) for val in self._data[col][:20]) # Limit to first 20
|
|
194
|
+
)
|
|
195
|
+
|
|
196
|
+
# Build header
|
|
197
|
+
header = "".join(f"{col:<{col_widths[col] + 2}}" for col in self._columns)
|
|
198
|
+
lines = [header]
|
|
199
|
+
lines.append("-" * len(header))
|
|
200
|
+
|
|
201
|
+
# Build rows (limit to 20)
|
|
202
|
+
for i, idx in enumerate(self._index[:20]):
|
|
203
|
+
row = "".join(
|
|
204
|
+
f"{str(self._data[col][i]):<{col_widths[col] + 2}}"
|
|
205
|
+
for col in self._columns
|
|
206
|
+
)
|
|
207
|
+
lines.append(row)
|
|
208
|
+
|
|
209
|
+
if len(self._index) > 20:
|
|
210
|
+
lines.append("...")
|
|
211
|
+
lines.append(f"[{len(self._index)} rows x {len(self._columns)} columns]")
|
|
212
|
+
|
|
213
|
+
return "\n".join(lines)
|
|
214
|
+
|
|
215
|
+
def __repr__(self) -> str:
|
|
216
|
+
"""String representation"""
|
|
217
|
+
return self.__str__()
|
|
218
|
+
|
|
219
|
+
@property
|
|
220
|
+
def shape(self) -> Tuple[int, int]:
|
|
221
|
+
"""Get DataFrame shape (rows, columns)"""
|
|
222
|
+
return (len(self._index), len(self._columns))
|
|
223
|
+
|
|
224
|
+
@property
|
|
225
|
+
def size(self) -> int:
|
|
226
|
+
"""Get total number of elements"""
|
|
227
|
+
return len(self._index) * len(self._columns)
|
|
228
|
+
|
|
229
|
+
@property
|
|
230
|
+
def columns(self) -> List[str]:
|
|
231
|
+
"""Get column names"""
|
|
232
|
+
return self._columns.copy()
|
|
233
|
+
|
|
234
|
+
@property
|
|
235
|
+
def index(self) -> List[str]:
|
|
236
|
+
"""Get row index"""
|
|
237
|
+
return self._index.copy()
|
|
238
|
+
|
|
239
|
+
@property
|
|
240
|
+
def values(self) -> List[List[Any]]:
|
|
241
|
+
"""Get DataFrame values as list of lists"""
|
|
242
|
+
return [
|
|
243
|
+
[self._data[col][i] for col in self._columns]
|
|
244
|
+
for i in range(len(self._index))
|
|
245
|
+
]
|
|
246
|
+
|
|
247
|
+
def head(self, n: int = 5) -> 'CreatesonlineDataFrame':
|
|
248
|
+
"""Get first n rows"""
|
|
249
|
+
return self[:n]
|
|
250
|
+
|
|
251
|
+
def tail(self, n: int = 5) -> 'CreatesonlineDataFrame':
|
|
252
|
+
"""Get last n rows"""
|
|
253
|
+
return self[-n:]
|
|
254
|
+
|
|
255
|
+
def copy(self) -> 'CreatesonlineDataFrame':
|
|
256
|
+
"""Create a copy of the DataFrame"""
|
|
257
|
+
return CreatesonlineDataFrame(
|
|
258
|
+
data={col: values.copy() for col, values in self._data.items()},
|
|
259
|
+
index=self._index.copy()
|
|
260
|
+
)
|
|
261
|
+
|
|
262
|
+
def info(self) -> Dict[str, Any]:
|
|
263
|
+
"""Get DataFrame info"""
|
|
264
|
+
return {
|
|
265
|
+
'shape': self.shape,
|
|
266
|
+
'columns': len(self._columns),
|
|
267
|
+
'non_null_count': {
|
|
268
|
+
col: sum(1 for val in self._data[col] if val is not None)
|
|
269
|
+
for col in self._columns
|
|
270
|
+
},
|
|
271
|
+
'dtypes': {
|
|
272
|
+
col: type(self._data[col][0]).__name__ if self._data[col] else 'object'
|
|
273
|
+
for col in self._columns
|
|
274
|
+
},
|
|
275
|
+
'memory_usage': f"{self.size * 8} bytes" # Rough estimate
|
|
276
|
+
}
|
|
277
|
+
|
|
278
|
+
def describe(self) -> 'CreatesonlineDataFrame':
|
|
279
|
+
"""Descriptive statistics for numeric columns"""
|
|
280
|
+
stats = {}
|
|
281
|
+
|
|
282
|
+
for col in self._columns:
|
|
283
|
+
series = self[col]
|
|
284
|
+
col_stats = series.describe()
|
|
285
|
+
stats[col] = col_stats
|
|
286
|
+
|
|
287
|
+
# Transpose to get stats as rows
|
|
288
|
+
stat_names = list(next(iter(stats.values())).keys())
|
|
289
|
+
result_data = {}
|
|
290
|
+
|
|
291
|
+
for stat in stat_names:
|
|
292
|
+
result_data[stat] = [stats[col].get(stat, None) for col in self._columns]
|
|
293
|
+
|
|
294
|
+
return CreatesonlineDataFrame(
|
|
295
|
+
data=result_data,
|
|
296
|
+
columns=self._columns,
|
|
297
|
+
index=stat_names
|
|
298
|
+
)
|
|
299
|
+
|
|
300
|
+
def sort_values(
|
|
301
|
+
self,
|
|
302
|
+
by: Union[str, List[str]],
|
|
303
|
+
ascending: bool = True
|
|
304
|
+
) -> 'CreatesonlineDataFrame':
|
|
305
|
+
"""Sort DataFrame by column(s)"""
|
|
306
|
+
if isinstance(by, str):
|
|
307
|
+
by = [by]
|
|
308
|
+
|
|
309
|
+
# Check columns exist
|
|
310
|
+
for col in by:
|
|
311
|
+
if col not in self._columns:
|
|
312
|
+
raise KeyError(f"Column '{col}' not found")
|
|
313
|
+
|
|
314
|
+
# Create list of (row_data, original_index) for sorting
|
|
315
|
+
rows_with_index = []
|
|
316
|
+
for i in range(len(self._index)):
|
|
317
|
+
row_data = [self._data[col][i] for col in by]
|
|
318
|
+
rows_with_index.append((row_data, i))
|
|
319
|
+
|
|
320
|
+
# Sort by the specified columns
|
|
321
|
+
rows_with_index.sort(
|
|
322
|
+
key=lambda x: x[0],
|
|
323
|
+
reverse=not ascending
|
|
324
|
+
)
|
|
325
|
+
|
|
326
|
+
# Extract sorted indices
|
|
327
|
+
sorted_indices = [original_i for _, original_i in rows_with_index]
|
|
328
|
+
|
|
329
|
+
# Create new DataFrame with sorted data
|
|
330
|
+
new_data = {}
|
|
331
|
+
for col in self._columns:
|
|
332
|
+
new_data[col] = [self._data[col][i] for i in sorted_indices]
|
|
333
|
+
|
|
334
|
+
new_index = [self._index[i] for i in sorted_indices]
|
|
335
|
+
|
|
336
|
+
return CreatesonlineDataFrame(
|
|
337
|
+
data=new_data,
|
|
338
|
+
index=new_index
|
|
339
|
+
)
|
|
340
|
+
|
|
341
|
+
def sort_index(self, ascending: bool = True) -> 'CreatesonlineDataFrame':
|
|
342
|
+
"""Sort DataFrame by index"""
|
|
343
|
+
index_with_position = [(idx, i) for i, idx in enumerate(self._index)]
|
|
344
|
+
index_with_position.sort(key=lambda x: x[0], reverse=not ascending)
|
|
345
|
+
|
|
346
|
+
sorted_indices = [i for _, i in index_with_position]
|
|
347
|
+
|
|
348
|
+
new_data = {}
|
|
349
|
+
for col in self._columns:
|
|
350
|
+
new_data[col] = [self._data[col][i] for i in sorted_indices]
|
|
351
|
+
|
|
352
|
+
new_index = [self._index[i] for i in sorted_indices]
|
|
353
|
+
|
|
354
|
+
return CreatesonlineDataFrame(
|
|
355
|
+
data=new_data,
|
|
356
|
+
index=new_index
|
|
357
|
+
)
|
|
358
|
+
|
|
359
|
+
def reset_index(self, drop: bool = True) -> 'CreatesonlineDataFrame':
|
|
360
|
+
"""Reset index to default integer index"""
|
|
361
|
+
if drop:
|
|
362
|
+
return CreatesonlineDataFrame(
|
|
363
|
+
data=self._data.copy(),
|
|
364
|
+
columns=self._columns
|
|
365
|
+
)
|
|
366
|
+
else:
|
|
367
|
+
new_data = {'index': self._index.copy()}
|
|
368
|
+
new_data.update(self._data)
|
|
369
|
+
return CreatesonlineDataFrame(data=new_data)
|
|
370
|
+
|
|
371
|
+
def set_index(self, column: str) -> 'CreatesonlineDataFrame':
|
|
372
|
+
"""Set a column as the index"""
|
|
373
|
+
if column not in self._columns:
|
|
374
|
+
raise KeyError(f"Column '{column}' not found")
|
|
375
|
+
|
|
376
|
+
new_index = [str(val) for val in self._data[column]]
|
|
377
|
+
new_data = {col: values for col, values in self._data.items() if col != column}
|
|
378
|
+
new_columns = [col for col in self._columns if col != column]
|
|
379
|
+
|
|
380
|
+
result = CreatesonlineDataFrame(data=new_data, index=new_index)
|
|
381
|
+
result._columns = new_columns
|
|
382
|
+
return result
|
|
383
|
+
|
|
384
|
+
def drop(
|
|
385
|
+
self,
|
|
386
|
+
labels: Union[str, List[str]] = None,
|
|
387
|
+
columns: Union[str, List[str]] = None,
|
|
388
|
+
index: Union[str, List[str]] = None
|
|
389
|
+
) -> 'CreatesonlineDataFrame':
|
|
390
|
+
"""Drop columns or rows"""
|
|
391
|
+
result = self.copy()
|
|
392
|
+
|
|
393
|
+
# Drop columns
|
|
394
|
+
if columns is not None or labels is not None:
|
|
395
|
+
cols_to_drop = columns or labels
|
|
396
|
+
if isinstance(cols_to_drop, str):
|
|
397
|
+
cols_to_drop = [cols_to_drop]
|
|
398
|
+
|
|
399
|
+
for col in cols_to_drop:
|
|
400
|
+
if col in result._columns:
|
|
401
|
+
del result[col]
|
|
402
|
+
|
|
403
|
+
# Drop rows by index
|
|
404
|
+
if index is not None:
|
|
405
|
+
if isinstance(index, str):
|
|
406
|
+
index = [index]
|
|
407
|
+
|
|
408
|
+
indices_to_keep = []
|
|
409
|
+
for i, idx in enumerate(result._index):
|
|
410
|
+
if idx not in index:
|
|
411
|
+
indices_to_keep.append(i)
|
|
412
|
+
|
|
413
|
+
new_data = {}
|
|
414
|
+
for col in result._columns:
|
|
415
|
+
new_data[col] = [result._data[col][i] for i in indices_to_keep]
|
|
416
|
+
|
|
417
|
+
new_index = [result._index[i] for i in indices_to_keep]
|
|
418
|
+
|
|
419
|
+
return CreatesonlineDataFrame(
|
|
420
|
+
data=new_data,
|
|
421
|
+
index=new_index
|
|
422
|
+
)
|
|
423
|
+
|
|
424
|
+
return result
|
|
425
|
+
|
|
426
|
+
def dropna(self, axis: int = 0, how: str = 'any') -> 'CreatesonlineDataFrame':
|
|
427
|
+
"""Drop rows or columns with null values"""
|
|
428
|
+
if axis == 0: # Drop rows
|
|
429
|
+
rows_to_keep = []
|
|
430
|
+
for i in range(len(self._index)):
|
|
431
|
+
row_values = [self._data[col][i] for col in self._columns]
|
|
432
|
+
|
|
433
|
+
if how == 'any':
|
|
434
|
+
if not any(val is None for val in row_values):
|
|
435
|
+
rows_to_keep.append(i)
|
|
436
|
+
elif how == 'all':
|
|
437
|
+
if not all(val is None for val in row_values):
|
|
438
|
+
rows_to_keep.append(i)
|
|
439
|
+
|
|
440
|
+
new_data = {}
|
|
441
|
+
for col in self._columns:
|
|
442
|
+
new_data[col] = [self._data[col][i] for i in rows_to_keep]
|
|
443
|
+
|
|
444
|
+
new_index = [self._index[i] for i in rows_to_keep]
|
|
445
|
+
|
|
446
|
+
return CreatesonlineDataFrame(
|
|
447
|
+
data=new_data,
|
|
448
|
+
index=new_index
|
|
449
|
+
)
|
|
450
|
+
else: # Drop columns
|
|
451
|
+
cols_to_keep = []
|
|
452
|
+
for col in self._columns:
|
|
453
|
+
col_values = self._data[col]
|
|
454
|
+
|
|
455
|
+
if how == 'any':
|
|
456
|
+
if not any(val is None for val in col_values):
|
|
457
|
+
cols_to_keep.append(col)
|
|
458
|
+
elif how == 'all':
|
|
459
|
+
if not all(val is None for val in col_values):
|
|
460
|
+
cols_to_keep.append(col)
|
|
461
|
+
|
|
462
|
+
return self[cols_to_keep]
|
|
463
|
+
|
|
464
|
+
def fillna(self, value: Any) -> 'CreatesonlineDataFrame':
|
|
465
|
+
"""Fill null values with specified value"""
|
|
466
|
+
new_data = {}
|
|
467
|
+
for col in self._columns:
|
|
468
|
+
new_data[col] = [val if val is not None else value for val in self._data[col]]
|
|
469
|
+
|
|
470
|
+
return CreatesonlineDataFrame(
|
|
471
|
+
data=new_data,
|
|
472
|
+
index=self._index
|
|
473
|
+
)
|
|
474
|
+
|
|
475
|
+
def apply(
|
|
476
|
+
self,
|
|
477
|
+
func: Callable,
|
|
478
|
+
axis: int = 0
|
|
479
|
+
) -> Union['CreatesonlineDataFrame', CreatesonlineSeries]:
|
|
480
|
+
"""Apply function along axis"""
|
|
481
|
+
if axis == 0: # Apply to each column
|
|
482
|
+
results = {}
|
|
483
|
+
for col in self._columns:
|
|
484
|
+
series = self[col]
|
|
485
|
+
results[col] = func(series)
|
|
486
|
+
|
|
487
|
+
# If results are scalar, return Series
|
|
488
|
+
if all(not hasattr(val, '__iter__') or isinstance(val, str) for val in results.values()):
|
|
489
|
+
return CreatesonlineSeries(
|
|
490
|
+
data=list(results.values()),
|
|
491
|
+
index=self._columns,
|
|
492
|
+
name='applied'
|
|
493
|
+
)
|
|
494
|
+
else:
|
|
495
|
+
return CreatesonlineDataFrame(data=results, index=self._index)
|
|
496
|
+
else: # Apply to each row
|
|
497
|
+
results = []
|
|
498
|
+
for i in range(len(self._index)):
|
|
499
|
+
row = CreatesonlineSeries(
|
|
500
|
+
data=[self._data[col][i] for col in self._columns],
|
|
501
|
+
index=self._columns,
|
|
502
|
+
name=self._index[i]
|
|
503
|
+
)
|
|
504
|
+
results.append(func(row))
|
|
505
|
+
|
|
506
|
+
return CreatesonlineSeries(
|
|
507
|
+
data=results,
|
|
508
|
+
index=self._index,
|
|
509
|
+
name='applied'
|
|
510
|
+
)
|
|
511
|
+
|
|
512
|
+
def groupby(self, by: Union[str, List[str]]) -> 'DataFrameGroupBy':
|
|
513
|
+
"""Group DataFrame by column(s)"""
|
|
514
|
+
return DataFrameGroupBy(self, by)
|
|
515
|
+
|
|
516
|
+
def merge(
|
|
517
|
+
self,
|
|
518
|
+
other: 'CreatesonlineDataFrame',
|
|
519
|
+
on: Optional[Union[str, List[str]]] = None,
|
|
520
|
+
left_on: Optional[Union[str, List[str]]] = None,
|
|
521
|
+
right_on: Optional[Union[str, List[str]]] = None,
|
|
522
|
+
how: str = 'inner'
|
|
523
|
+
) -> 'CreatesonlineDataFrame':
|
|
524
|
+
"""
|
|
525
|
+
Merge DataFrames with comprehensive join support
|
|
526
|
+
|
|
527
|
+
Args:
|
|
528
|
+
other: DataFrame to merge with
|
|
529
|
+
on: Column(s) to join on (must be present in both DataFrames)
|
|
530
|
+
left_on: Column(s) to join on for left DataFrame
|
|
531
|
+
right_on: Column(s) to join on for right DataFrame
|
|
532
|
+
how: Type of merge ('inner', 'left', 'right', 'outer')
|
|
533
|
+
|
|
534
|
+
Returns:
|
|
535
|
+
Merged DataFrame
|
|
536
|
+
"""
|
|
537
|
+
# Validate join type
|
|
538
|
+
valid_joins = {'inner', 'left', 'right', 'outer'}
|
|
539
|
+
if how not in valid_joins:
|
|
540
|
+
raise ValueError(f"Invalid join type '{how}'. Must be one of: {valid_joins}")
|
|
541
|
+
|
|
542
|
+
# Determine join keys
|
|
543
|
+
if on is not None:
|
|
544
|
+
if left_on is not None or right_on is not None:
|
|
545
|
+
raise ValueError("Cannot specify 'on' with 'left_on' or 'right_on'")
|
|
546
|
+
left_on = right_on = on
|
|
547
|
+
elif left_on is None or right_on is None:
|
|
548
|
+
raise ValueError("Must specify either 'on' or both 'left_on' and 'right_on'")
|
|
549
|
+
|
|
550
|
+
# Ensure keys are lists
|
|
551
|
+
if isinstance(left_on, str):
|
|
552
|
+
left_on = [left_on]
|
|
553
|
+
if isinstance(right_on, str):
|
|
554
|
+
right_on = [right_on]
|
|
555
|
+
|
|
556
|
+
# Validate that join keys exist
|
|
557
|
+
for col in left_on:
|
|
558
|
+
if col not in self._columns:
|
|
559
|
+
raise KeyError(f"Left join key '{col}' not found in DataFrame")
|
|
560
|
+
for col in right_on:
|
|
561
|
+
if col not in other._columns:
|
|
562
|
+
raise KeyError(f"Right join key '{col}' not found in DataFrame")
|
|
563
|
+
|
|
564
|
+
# Build lookup dictionaries
|
|
565
|
+
left_lookup = {}
|
|
566
|
+
for i in range(len(self._index)):
|
|
567
|
+
key = tuple(self._data[col][i] for col in left_on)
|
|
568
|
+
if key not in left_lookup:
|
|
569
|
+
left_lookup[key] = []
|
|
570
|
+
left_lookup[key].append(i)
|
|
571
|
+
|
|
572
|
+
right_lookup = {}
|
|
573
|
+
for i in range(len(other._index)):
|
|
574
|
+
key = tuple(other._data[col][i] for col in right_on)
|
|
575
|
+
if key not in right_lookup:
|
|
576
|
+
right_lookup[key] = []
|
|
577
|
+
right_lookup[key].append(i)
|
|
578
|
+
|
|
579
|
+
# Perform merge based on join type
|
|
580
|
+
merged_rows = []
|
|
581
|
+
|
|
582
|
+
if how == 'inner':
|
|
583
|
+
# Inner join: only matching keys
|
|
584
|
+
for key in left_lookup:
|
|
585
|
+
if key in right_lookup:
|
|
586
|
+
for left_i in left_lookup[key]:
|
|
587
|
+
for right_i in right_lookup[key]:
|
|
588
|
+
merged_rows.append(self._create_merged_row(left_i, right_i, other))
|
|
589
|
+
|
|
590
|
+
elif how == 'left':
|
|
591
|
+
# Left join: all left keys, matching right keys
|
|
592
|
+
for key in left_lookup:
|
|
593
|
+
if key in right_lookup:
|
|
594
|
+
for left_i in left_lookup[key]:
|
|
595
|
+
for right_i in right_lookup[key]:
|
|
596
|
+
merged_rows.append(self._create_merged_row(left_i, right_i, other))
|
|
597
|
+
else:
|
|
598
|
+
for left_i in left_lookup[key]:
|
|
599
|
+
merged_rows.append(self._create_merged_row(left_i, None, other))
|
|
600
|
+
|
|
601
|
+
elif how == 'right':
|
|
602
|
+
# Right join: all right keys, matching left keys
|
|
603
|
+
for key in right_lookup:
|
|
604
|
+
if key in left_lookup:
|
|
605
|
+
for right_i in right_lookup[key]:
|
|
606
|
+
for left_i in left_lookup[key]:
|
|
607
|
+
merged_rows.append(self._create_merged_row(left_i, right_i, other))
|
|
608
|
+
else:
|
|
609
|
+
for right_i in right_lookup[key]:
|
|
610
|
+
merged_rows.append(self._create_merged_row(None, right_i, other))
|
|
611
|
+
|
|
612
|
+
elif how == 'outer':
|
|
613
|
+
# Outer join: all keys from both sides
|
|
614
|
+
all_keys = set(left_lookup.keys()) | set(right_lookup.keys())
|
|
615
|
+
|
|
616
|
+
for key in all_keys:
|
|
617
|
+
left_indices = left_lookup.get(key, [])
|
|
618
|
+
right_indices = right_lookup.get(key, [])
|
|
619
|
+
|
|
620
|
+
if left_indices and right_indices:
|
|
621
|
+
for left_i in left_indices:
|
|
622
|
+
for right_i in right_indices:
|
|
623
|
+
merged_rows.append(self._create_merged_row(left_i, right_i, other))
|
|
624
|
+
elif left_indices:
|
|
625
|
+
for left_i in left_indices:
|
|
626
|
+
merged_rows.append(self._create_merged_row(left_i, None, other))
|
|
627
|
+
else:
|
|
628
|
+
for right_i in right_indices:
|
|
629
|
+
merged_rows.append(self._create_merged_row(None, right_i, other))
|
|
630
|
+
|
|
631
|
+
return CreatesonlineDataFrame(data=merged_rows)
|
|
632
|
+
|
|
633
|
+
def _create_merged_row(self, left_i: Optional[int], right_i: Optional[int], other: 'CreatesonlineDataFrame') -> Dict[str, Any]:
|
|
634
|
+
"""Helper method to create a merged row"""
|
|
635
|
+
row = {}
|
|
636
|
+
|
|
637
|
+
# Add left columns
|
|
638
|
+
if left_i is not None:
|
|
639
|
+
for col in self._columns:
|
|
640
|
+
row[col] = self._data[col][left_i]
|
|
641
|
+
else:
|
|
642
|
+
for col in self._columns:
|
|
643
|
+
row[col] = None
|
|
644
|
+
|
|
645
|
+
# Add right columns (handle name conflicts)
|
|
646
|
+
if right_i is not None:
|
|
647
|
+
for col in other._columns:
|
|
648
|
+
if col not in row:
|
|
649
|
+
row[col] = other._data[col][right_i]
|
|
650
|
+
else:
|
|
651
|
+
row[f"{col}_y"] = other._data[col][right_i]
|
|
652
|
+
if f"{col}_x" not in row:
|
|
653
|
+
row[f"{col}_x"] = row[col]
|
|
654
|
+
del row[col]
|
|
655
|
+
else:
|
|
656
|
+
for col in other._columns:
|
|
657
|
+
if col not in row:
|
|
658
|
+
row[col] = None
|
|
659
|
+
else:
|
|
660
|
+
row[f"{col}_y"] = None
|
|
661
|
+
if f"{col}_x" not in row:
|
|
662
|
+
row[f"{col}_x"] = row[col]
|
|
663
|
+
del row[col]
|
|
664
|
+
|
|
665
|
+
return row
|
|
666
|
+
|
|
667
|
+
def concat(self, other: 'CreatesonlineDataFrame', axis: int = 0) -> 'CreatesonlineDataFrame':
|
|
668
|
+
"""Concatenate DataFrames"""
|
|
669
|
+
if axis == 0: # Concatenate rows
|
|
670
|
+
# Get all columns
|
|
671
|
+
all_columns = list(set(self._columns + other._columns))
|
|
672
|
+
|
|
673
|
+
new_data = {col: [] for col in all_columns}
|
|
674
|
+
|
|
675
|
+
# Add data from self
|
|
676
|
+
for i in range(len(self._index)):
|
|
677
|
+
for col in all_columns:
|
|
678
|
+
if col in self._columns:
|
|
679
|
+
new_data[col].append(self._data[col][i])
|
|
680
|
+
else:
|
|
681
|
+
new_data[col].append(None)
|
|
682
|
+
|
|
683
|
+
# Add data from other
|
|
684
|
+
for i in range(len(other._index)):
|
|
685
|
+
for col in all_columns:
|
|
686
|
+
if col in other._columns:
|
|
687
|
+
new_data[col].append(other._data[col][i])
|
|
688
|
+
else:
|
|
689
|
+
new_data[col].append(None)
|
|
690
|
+
|
|
691
|
+
new_index = self._index + other._index
|
|
692
|
+
|
|
693
|
+
return CreatesonlineDataFrame(
|
|
694
|
+
data=new_data,
|
|
695
|
+
index=new_index
|
|
696
|
+
)
|
|
697
|
+
else: # Concatenate columns
|
|
698
|
+
if len(self._index) != len(other._index):
|
|
699
|
+
raise ValueError("DataFrames must have same number of rows for column concatenation")
|
|
700
|
+
|
|
701
|
+
new_data = self._data.copy()
|
|
702
|
+
|
|
703
|
+
for col in other._columns:
|
|
704
|
+
if col in new_data:
|
|
705
|
+
# Handle duplicate column names
|
|
706
|
+
new_col = f"{col}_1"
|
|
707
|
+
counter = 1
|
|
708
|
+
while new_col in new_data:
|
|
709
|
+
counter += 1
|
|
710
|
+
new_col = f"{col}_{counter}"
|
|
711
|
+
new_data[new_col] = other._data[col]
|
|
712
|
+
else:
|
|
713
|
+
new_data[col] = other._data[col]
|
|
714
|
+
|
|
715
|
+
return CreatesonlineDataFrame(
|
|
716
|
+
data=new_data,
|
|
717
|
+
index=self._index
|
|
718
|
+
)
|
|
719
|
+
|
|
720
|
+
def to_dict(self, orient: str = 'dict') -> Union[Dict[str, List[Any]], List[Dict[str, Any]]]:
|
|
721
|
+
"""Convert DataFrame to dictionary"""
|
|
722
|
+
if orient == 'dict':
|
|
723
|
+
return self._data.copy()
|
|
724
|
+
elif orient == 'records':
|
|
725
|
+
return [
|
|
726
|
+
{col: self._data[col][i] for col in self._columns}
|
|
727
|
+
for i in range(len(self._index))
|
|
728
|
+
]
|
|
729
|
+
elif orient == 'list':
|
|
730
|
+
return {col: list(values) for col, values in self._data.items()}
|
|
731
|
+
else:
|
|
732
|
+
raise ValueError(f"Invalid orient: {orient}")
|
|
733
|
+
|
|
734
|
+
def to_json(self, orient: str = 'records', indent: Optional[int] = None) -> str:
|
|
735
|
+
"""Convert DataFrame to JSON string"""
|
|
736
|
+
data = self.to_dict(orient=orient)
|
|
737
|
+
return json.dumps(data, indent=indent, default=str)
|
|
738
|
+
|
|
739
|
+
def to_numpy(self) -> Any:
|
|
740
|
+
"""Convert DataFrame to numpy array"""
|
|
741
|
+
if not NUMPY_AVAILABLE:
|
|
742
|
+
raise ImportError("NumPy is required for to_numpy(). Install with: pip install numpy")
|
|
743
|
+
return np.array(self.values)
|
|
744
|
+
|
|
745
|
+
|
|
746
|
+
class DataFrameGroupBy:
|
|
747
|
+
"""GroupBy functionality for CreatesonlineDataFrame"""
|
|
748
|
+
|
|
749
|
+
def __init__(self, df: CreatesonlineDataFrame, by: Union[str, List[str]]):
|
|
750
|
+
self.df = df
|
|
751
|
+
self.by = by if isinstance(by, list) else [by]
|
|
752
|
+
self._groups = self._create_groups()
|
|
753
|
+
|
|
754
|
+
def _create_groups(self) -> Dict[tuple, List[int]]:
|
|
755
|
+
"""Create groups dictionary"""
|
|
756
|
+
groups = {}
|
|
757
|
+
|
|
758
|
+
for i in range(len(self.df._index)):
|
|
759
|
+
key = tuple(self.df._data[col][i] for col in self.by)
|
|
760
|
+
if key not in groups:
|
|
761
|
+
groups[key] = []
|
|
762
|
+
groups[key].append(i)
|
|
763
|
+
|
|
764
|
+
return groups
|
|
765
|
+
|
|
766
|
+
def get_group(self, key: tuple) -> CreatesonlineDataFrame:
|
|
767
|
+
"""Get specific group"""
|
|
768
|
+
if key not in self._groups:
|
|
769
|
+
raise KeyError(f"Group {key} not found")
|
|
770
|
+
|
|
771
|
+
indices = self._groups[key]
|
|
772
|
+
new_data = {}
|
|
773
|
+
for col in self.df._columns:
|
|
774
|
+
new_data[col] = [self.df._data[col][i] for i in indices]
|
|
775
|
+
|
|
776
|
+
new_index = [self.df._index[i] for i in indices]
|
|
777
|
+
|
|
778
|
+
return CreatesonlineDataFrame(
|
|
779
|
+
data=new_data,
|
|
780
|
+
index=new_index
|
|
781
|
+
)
|
|
782
|
+
|
|
783
|
+
def agg(self, func: Union[str, Callable, Dict[str, Union[str, Callable]]]) -> CreatesonlineDataFrame:
|
|
784
|
+
"""Aggregate groups"""
|
|
785
|
+
if isinstance(func, str):
|
|
786
|
+
# Single function name with proper null/NaN handling
|
|
787
|
+
results = {}
|
|
788
|
+
for col in self.df._columns:
|
|
789
|
+
if col not in self.by:
|
|
790
|
+
results[col] = []
|
|
791
|
+
|
|
792
|
+
group_keys = []
|
|
793
|
+
|
|
794
|
+
for key, indices in self._groups.items():
|
|
795
|
+
group_keys.append(key)
|
|
796
|
+
for col in self.df._columns:
|
|
797
|
+
if col not in self.by:
|
|
798
|
+
col_values = [self.df._data[col][i] for i in indices]
|
|
799
|
+
|
|
800
|
+
if func == 'count':
|
|
801
|
+
# Count excludes None/NaN values
|
|
802
|
+
results[col].append(len([v for v in col_values if v is not None]))
|
|
803
|
+
elif func == 'mean':
|
|
804
|
+
# Mean calculation excludes None/NaN
|
|
805
|
+
numeric_values = [v for v in col_values if isinstance(v, (int, float)) and v is not None]
|
|
806
|
+
if numeric_values:
|
|
807
|
+
results[col].append(sum(numeric_values) / len(numeric_values))
|
|
808
|
+
else:
|
|
809
|
+
results[col].append(None)
|
|
810
|
+
elif func in ['sum', 'min', 'max']:
|
|
811
|
+
# These operations exclude None/NaN
|
|
812
|
+
numeric_values = [v for v in col_values if isinstance(v, (int, float)) and v is not None]
|
|
813
|
+
if numeric_values:
|
|
814
|
+
if func == 'sum':
|
|
815
|
+
results[col].append(sum(numeric_values))
|
|
816
|
+
elif func == 'min':
|
|
817
|
+
results[col].append(min(numeric_values))
|
|
818
|
+
elif func == 'max':
|
|
819
|
+
results[col].append(max(numeric_values))
|
|
820
|
+
else:
|
|
821
|
+
results[col].append(None)
|
|
822
|
+
else:
|
|
823
|
+
raise ValueError(f"Unknown aggregation function: {func}")
|
|
824
|
+
|
|
825
|
+
# Create index from group keys
|
|
826
|
+
if len(self.by) == 1:
|
|
827
|
+
index = [str(key[0]) for key in group_keys]
|
|
828
|
+
else:
|
|
829
|
+
index = [str(key) for key in group_keys]
|
|
830
|
+
|
|
831
|
+
return CreatesonlineDataFrame(
|
|
832
|
+
data=results,
|
|
833
|
+
index=index
|
|
834
|
+
)
|
|
835
|
+
|
|
836
|
+
elif callable(func):
|
|
837
|
+
# Single function
|
|
838
|
+
results = {}
|
|
839
|
+
for col in self.df._columns:
|
|
840
|
+
if col not in self.by:
|
|
841
|
+
results[col] = []
|
|
842
|
+
|
|
843
|
+
group_keys = []
|
|
844
|
+
|
|
845
|
+
for key, indices in self._groups.items():
|
|
846
|
+
group_keys.append(key)
|
|
847
|
+
for col in self.df._columns:
|
|
848
|
+
if col not in self.by:
|
|
849
|
+
col_values = [self.df._data[col][i] for i in indices]
|
|
850
|
+
results[col].append(func(col_values))
|
|
851
|
+
|
|
852
|
+
if len(self.by) == 1:
|
|
853
|
+
index = [str(key[0]) for key in group_keys]
|
|
854
|
+
else:
|
|
855
|
+
index = [str(key) for key in group_keys]
|
|
856
|
+
|
|
857
|
+
return CreatesonlineDataFrame(
|
|
858
|
+
data=results,
|
|
859
|
+
index=index
|
|
860
|
+
)
|
|
861
|
+
|
|
862
|
+
elif isinstance(func, dict):
|
|
863
|
+
# Different functions for different columns
|
|
864
|
+
results = {}
|
|
865
|
+
group_keys = []
|
|
866
|
+
|
|
867
|
+
for key, indices in self._groups.items():
|
|
868
|
+
group_keys.append(key)
|
|
869
|
+
|
|
870
|
+
for col, col_func in func.items():
|
|
871
|
+
if col not in self.df._columns:
|
|
872
|
+
continue
|
|
873
|
+
|
|
874
|
+
if col not in results:
|
|
875
|
+
results[col] = []
|
|
876
|
+
|
|
877
|
+
col_values = [self.df._data[col][i] for i in indices]
|
|
878
|
+
|
|
879
|
+
if isinstance(col_func, str):
|
|
880
|
+
func_map = {'sum': sum, 'mean': lambda x: sum(x) / len(x), 'count': len, 'min': min, 'max': max}
|
|
881
|
+
if col_func in func_map:
|
|
882
|
+
numeric_values = [v for v in col_values if isinstance(v, (int, float))]
|
|
883
|
+
if numeric_values:
|
|
884
|
+
results[col].append(func_map[col_func](numeric_values))
|
|
885
|
+
else:
|
|
886
|
+
results[col].append(None)
|
|
887
|
+
else:
|
|
888
|
+
results[col].append(None)
|
|
889
|
+
elif callable(col_func):
|
|
890
|
+
results[col].append(col_func(col_values))
|
|
891
|
+
else:
|
|
892
|
+
results[col].append(None)
|
|
893
|
+
|
|
894
|
+
if len(self.by) == 1:
|
|
895
|
+
index = [str(key[0]) for key in group_keys]
|
|
896
|
+
else:
|
|
897
|
+
index = [str(key) for key in group_keys]
|
|
898
|
+
|
|
899
|
+
return CreatesonlineDataFrame(
|
|
900
|
+
data=results,
|
|
901
|
+
index=index
|
|
902
|
+
)
|
|
903
|
+
|
|
904
|
+
else:
|
|
905
|
+
raise TypeError("func must be string, callable, or dict")
|
|
906
|
+
|
|
907
|
+
def sum(self) -> CreatesonlineDataFrame:
|
|
908
|
+
"""Sum of groups"""
|
|
909
|
+
return self.agg('sum')
|
|
910
|
+
|
|
911
|
+
def mean(self) -> CreatesonlineDataFrame:
|
|
912
|
+
"""Mean of groups"""
|
|
913
|
+
return self.agg('mean')
|
|
914
|
+
|
|
915
|
+
def count(self) -> CreatesonlineDataFrame:
|
|
916
|
+
"""Count of groups"""
|
|
917
|
+
return self.agg('count')
|
|
918
|
+
|
|
919
|
+
def min(self) -> CreatesonlineDataFrame:
|
|
920
|
+
"""Minimum of groups"""
|
|
921
|
+
return self.agg('min')
|
|
922
|
+
|
|
923
|
+
def max(self) -> CreatesonlineDataFrame:
|
|
924
|
+
"""Maximum of groups"""
|
|
925
|
+
return self.agg('max')
|