node-pandas 1.0.5 → 2.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.kiro/agents/git-committer-agent.md +208 -0
- package/.kiro/agents/npm-publisher-agent.md +501 -0
- package/.kiro/publish-status-2.0.0.md +134 -0
- package/.kiro/published-versions.md +11 -0
- package/.kiro/specs/pandas-like-enhancements/.config.kiro +1 -0
- package/.kiro/specs/pandas-like-enhancements/design.md +377 -0
- package/.kiro/specs/pandas-like-enhancements/requirements.md +257 -0
- package/.kiro/specs/pandas-like-enhancements/tasks.md +477 -0
- package/CHANGELOG.md +42 -0
- package/README.md +243 -0
- package/TESTING_SETUP.md +183 -0
- package/jest.config.js +25 -0
- package/package.json +11 -3
- package/src/bases/CsvBase.js +4 -13
- package/src/dataframe/dataframe.js +595 -66
- package/src/features/GroupBy.js +561 -0
- package/src/features/dateRange.js +106 -0
- package/src/index.js +6 -1
- package/src/series/series.js +688 -46
- package/src/utils/errors.js +314 -0
- package/src/utils/logger.js +259 -0
- package/src/utils/typeDetection.js +339 -0
- package/src/utils/utils.js +5 -1
- package/src/utils/validation.js +450 -0
- package/tests/README.md +151 -0
- package/tests/integration/.gitkeep +0 -0
- package/tests/integration/README.md +3 -0
- package/tests/property/.gitkeep +0 -0
- package/tests/property/README.md +3 -0
- package/tests/setup.js +16 -0
- package/tests/test.js +2 -1
- package/tests/unit/.gitkeep +0 -0
- package/tests/unit/README.md +3 -0
- package/tests/unit/dataframe.test.js +1141 -0
- package/tests/unit/example.test.js +23 -0
- package/tests/unit/series.test.js +441 -0
- package/tests/unit/tocsv.test.js +838 -0
- package/tests/utils/testAssertions.js +143 -0
- package/tests/utils/testDataGenerator.js +123 -0
|
@@ -0,0 +1,561 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* @fileoverview GroupBy class for the node-pandas library.
|
|
3
|
+
* Provides grouping and aggregation functionality for DataFrames.
|
|
4
|
+
* Allows grouping data by one or more columns and computing aggregate statistics
|
|
5
|
+
* for each group.
|
|
6
|
+
*
|
|
7
|
+
* Validates: Requirements 5.1, 5.2, 5.3, 5.4, 5.5, 5.6
|
|
8
|
+
*/
|
|
9
|
+
|
|
10
|
+
const Series = require('../series/series');
|
|
11
|
+
const { ValidationError, ColumnError } = require('../utils/errors');
|
|
12
|
+
const validation = require('../utils/validation');
|
|
13
|
+
const typeDetection = require('../utils/typeDetection');
|
|
14
|
+
|
|
15
|
+
/**
|
|
16
|
+
* GroupBy class - Groups DataFrame rows by one or more columns.
|
|
17
|
+
*
|
|
18
|
+
* Provides aggregation methods (mean, sum, count, min, max, std) that compute
|
|
19
|
+
* statistics for each group. Supports both single-column and multi-column grouping
|
|
20
|
+
* with hierarchical group organization.
|
|
21
|
+
*
|
|
22
|
+
* @class GroupBy
|
|
23
|
+
*
|
|
24
|
+
* @example
|
|
25
|
+
* // Single-column grouping
|
|
26
|
+
* const df = DataFrame(
|
|
27
|
+
* [[1, 'Rishikesh Agrawani', 32, 'Engineering'],
|
|
28
|
+
* [2, 'Hemkesh Agrawani', 30, 'Sales'],
|
|
29
|
+
* [3, 'Malinikesh Agrawani', 28, 'Engineering']],
|
|
30
|
+
* ['id', 'name', 'age', 'department']
|
|
31
|
+
* );
|
|
32
|
+
*
|
|
33
|
+
* const grouped = df.groupBy('department');
|
|
34
|
+
* const meanAge = grouped.mean(); // Returns DataFrame with mean age by department
|
|
35
|
+
*
|
|
36
|
+
* @example
|
|
37
|
+
* // Multi-column grouping
|
|
38
|
+
* const grouped = df.groupBy(['department', 'name']);
|
|
39
|
+
* const counts = grouped.count(); // Returns DataFrame with counts by department and name
|
|
40
|
+
*/
|
|
41
|
+
class GroupBy {
|
|
42
|
+
/**
|
|
43
|
+
* Creates a new GroupBy instance.
|
|
44
|
+
*
|
|
45
|
+
* @param {DataFrame} dataframe - The DataFrame to group
|
|
46
|
+
* @param {string|Array<string>} groupingColumns - Column name(s) to group by
|
|
47
|
+
*
|
|
48
|
+
* @throws {ValidationError} If groupingColumns is not a string or array
|
|
49
|
+
* @throws {ColumnError} If any grouping column doesn't exist in the DataFrame
|
|
50
|
+
*
|
|
51
|
+
* @example
|
|
52
|
+
* const grouped = new GroupBy(df, 'department');
|
|
53
|
+
* const grouped2 = new GroupBy(df, ['department', 'name']);
|
|
54
|
+
*/
|
|
55
|
+
constructor(dataframe, groupingColumns) {
|
|
56
|
+
// Validate inputs
|
|
57
|
+
if (!dataframe || typeof dataframe !== 'object') {
|
|
58
|
+
throw new ValidationError('dataframe must be a valid DataFrame', {
|
|
59
|
+
operation: 'GroupBy creation',
|
|
60
|
+
value: dataframe
|
|
61
|
+
});
|
|
62
|
+
}
|
|
63
|
+
|
|
64
|
+
// Normalize groupingColumns to array
|
|
65
|
+
let columns = groupingColumns;
|
|
66
|
+
if (typeof groupingColumns === 'string') {
|
|
67
|
+
columns = [groupingColumns];
|
|
68
|
+
} else if (Array.isArray(groupingColumns)) {
|
|
69
|
+
columns = groupingColumns;
|
|
70
|
+
} else {
|
|
71
|
+
throw new ValidationError('groupingColumns must be a string or array of strings', {
|
|
72
|
+
operation: 'GroupBy creation',
|
|
73
|
+
value: groupingColumns
|
|
74
|
+
});
|
|
75
|
+
}
|
|
76
|
+
|
|
77
|
+
// Validate that all grouping columns exist
|
|
78
|
+
try {
|
|
79
|
+
validation.validateColumnsExist(columns, dataframe.columns);
|
|
80
|
+
} catch (error) {
|
|
81
|
+
throw new ColumnError(error.message, {
|
|
82
|
+
operation: 'GroupBy creation',
|
|
83
|
+
column: columns.find(col => !dataframe.columns.includes(col)),
|
|
84
|
+
value: dataframe.columns
|
|
85
|
+
});
|
|
86
|
+
}
|
|
87
|
+
|
|
88
|
+
this.dataframe = dataframe;
|
|
89
|
+
this.groupingColumns = columns;
|
|
90
|
+
this.groups = this._createGroups();
|
|
91
|
+
}
|
|
92
|
+
|
|
93
|
+
/**
|
|
94
|
+
* Creates internal group structure from the DataFrame.
|
|
95
|
+
*
|
|
96
|
+
* For single-column grouping, creates a map where keys are group values
|
|
97
|
+
* and values are arrays of row indices.
|
|
98
|
+
*
|
|
99
|
+
* For multi-column grouping, creates a hierarchical structure where each
|
|
100
|
+
* level represents a grouping column.
|
|
101
|
+
*
|
|
102
|
+
* @returns {Map|Object} The group structure
|
|
103
|
+
* @private
|
|
104
|
+
*/
|
|
105
|
+
_createGroups() {
|
|
106
|
+
if (this.groupingColumns.length === 1) {
|
|
107
|
+
return this._createSingleColumnGroups();
|
|
108
|
+
} else {
|
|
109
|
+
return this._createMultiColumnGroups();
|
|
110
|
+
}
|
|
111
|
+
}
|
|
112
|
+
|
|
113
|
+
/**
|
|
114
|
+
* Creates groups for single-column grouping.
|
|
115
|
+
*
|
|
116
|
+
* @returns {Map} Map where keys are group values and values are row indices
|
|
117
|
+
* @private
|
|
118
|
+
*/
|
|
119
|
+
_createSingleColumnGroups() {
|
|
120
|
+
const groups = new Map();
|
|
121
|
+
const groupColumn = this.groupingColumns[0];
|
|
122
|
+
const columnIndex = this.dataframe.columns.indexOf(groupColumn);
|
|
123
|
+
|
|
124
|
+
for (let i = 0; i < this.dataframe.rows; i++) {
|
|
125
|
+
const row = this.dataframe.data[i];
|
|
126
|
+
const groupKey = Array.isArray(row) ? row[columnIndex] : row[groupColumn];
|
|
127
|
+
const keyStr = String(groupKey);
|
|
128
|
+
|
|
129
|
+
if (!groups.has(keyStr)) {
|
|
130
|
+
groups.set(keyStr, []);
|
|
131
|
+
}
|
|
132
|
+
groups.get(keyStr).push(i);
|
|
133
|
+
}
|
|
134
|
+
|
|
135
|
+
return groups;
|
|
136
|
+
}
|
|
137
|
+
|
|
138
|
+
/**
|
|
139
|
+
* Creates hierarchical groups for multi-column grouping.
|
|
140
|
+
*
|
|
141
|
+
* @returns {Object} Hierarchical object structure representing groups
|
|
142
|
+
* @private
|
|
143
|
+
*/
|
|
144
|
+
_createMultiColumnGroups() {
|
|
145
|
+
const groups = {};
|
|
146
|
+
|
|
147
|
+
for (let i = 0; i < this.dataframe.rows; i++) {
|
|
148
|
+
const row = this.dataframe.data[i];
|
|
149
|
+
let current = groups;
|
|
150
|
+
|
|
151
|
+
// Navigate/create hierarchy for each grouping column
|
|
152
|
+
for (let j = 0; j < this.groupingColumns.length; j++) {
|
|
153
|
+
const colName = this.groupingColumns[j];
|
|
154
|
+
const columnIndex = this.dataframe.columns.indexOf(colName);
|
|
155
|
+
const value = Array.isArray(row) ? row[columnIndex] : row[colName];
|
|
156
|
+
const keyStr = String(value);
|
|
157
|
+
|
|
158
|
+
if (j === this.groupingColumns.length - 1) {
|
|
159
|
+
// Last column - store row indices
|
|
160
|
+
if (!current[keyStr]) {
|
|
161
|
+
current[keyStr] = [];
|
|
162
|
+
}
|
|
163
|
+
current[keyStr].push(i);
|
|
164
|
+
} else {
|
|
165
|
+
// Intermediate column - create nested object
|
|
166
|
+
if (!current[keyStr]) {
|
|
167
|
+
current[keyStr] = {};
|
|
168
|
+
}
|
|
169
|
+
current = current[keyStr];
|
|
170
|
+
}
|
|
171
|
+
}
|
|
172
|
+
}
|
|
173
|
+
|
|
174
|
+
return groups;
|
|
175
|
+
}
|
|
176
|
+
|
|
177
|
+
/**
|
|
178
|
+
* Extracts all row indices from hierarchical group structure.
|
|
179
|
+
*
|
|
180
|
+
* @param {Object} obj - The group object (may be nested)
|
|
181
|
+
* @returns {Array<number>} Array of row indices
|
|
182
|
+
* @private
|
|
183
|
+
*/
|
|
184
|
+
_extractRowIndices(obj) {
|
|
185
|
+
if (Array.isArray(obj)) {
|
|
186
|
+
return obj;
|
|
187
|
+
}
|
|
188
|
+
|
|
189
|
+
const indices = [];
|
|
190
|
+
for (const key in obj) {
|
|
191
|
+
indices.push(...this._extractRowIndices(obj[key]));
|
|
192
|
+
}
|
|
193
|
+
return indices;
|
|
194
|
+
}
|
|
195
|
+
|
|
196
|
+
/**
|
|
197
|
+
* Computes the mean for each group.
|
|
198
|
+
*
|
|
199
|
+
* Returns a DataFrame where each row represents a group with its grouping
|
|
200
|
+
* column values and the mean of numeric columns for that group.
|
|
201
|
+
* Non-numeric values are excluded from the calculation.
|
|
202
|
+
*
|
|
203
|
+
* @returns {DataFrame} DataFrame with group keys and mean values
|
|
204
|
+
*
|
|
205
|
+
* @example
|
|
206
|
+
* const grouped = df.groupBy('department');
|
|
207
|
+
* const result = grouped.mean();
|
|
208
|
+
* // Returns DataFrame with columns: ['department', 'id', 'age']
|
|
209
|
+
* // Each row contains the group key and mean values for numeric columns
|
|
210
|
+
*/
|
|
211
|
+
mean() {
|
|
212
|
+
return this._aggregateNumeric('mean');
|
|
213
|
+
}
|
|
214
|
+
|
|
215
|
+
/**
|
|
216
|
+
* Computes the sum for each group.
|
|
217
|
+
*
|
|
218
|
+
* Returns a DataFrame where each row represents a group with its grouping
|
|
219
|
+
* column values and the sum of numeric columns for that group.
|
|
220
|
+
* Non-numeric values are excluded from the calculation.
|
|
221
|
+
*
|
|
222
|
+
* @returns {DataFrame} DataFrame with group keys and sum values
|
|
223
|
+
*
|
|
224
|
+
* @example
|
|
225
|
+
* const grouped = df.groupBy('department');
|
|
226
|
+
* const result = grouped.sum();
|
|
227
|
+
* // Returns DataFrame with columns: ['department', 'id', 'age']
|
|
228
|
+
* // Each row contains the group key and sum values for numeric columns
|
|
229
|
+
*/
|
|
230
|
+
sum() {
|
|
231
|
+
return this._aggregateNumeric('sum');
|
|
232
|
+
}
|
|
233
|
+
|
|
234
|
+
/**
|
|
235
|
+
* Computes the count for each group.
|
|
236
|
+
*
|
|
237
|
+
* Returns a DataFrame where each row represents a group with its grouping
|
|
238
|
+
* column values and the count of rows in that group.
|
|
239
|
+
*
|
|
240
|
+
* @returns {DataFrame} DataFrame with group keys and counts
|
|
241
|
+
*
|
|
242
|
+
* @example
|
|
243
|
+
* const grouped = df.groupBy('department');
|
|
244
|
+
* const result = grouped.count();
|
|
245
|
+
* // Returns DataFrame with columns: ['department', 'count']
|
|
246
|
+
* // Each row contains the group key and the number of rows in that group
|
|
247
|
+
*/
|
|
248
|
+
count() {
|
|
249
|
+
const results = [];
|
|
250
|
+
const groupKeys = this._getGroupKeys();
|
|
251
|
+
|
|
252
|
+
for (const groupKey of groupKeys) {
|
|
253
|
+
const rowIndices = this._getRowIndicesForKey(groupKey);
|
|
254
|
+
const resultRow = this._buildResultRow(groupKey, { count: rowIndices.length });
|
|
255
|
+
results.push(resultRow);
|
|
256
|
+
}
|
|
257
|
+
|
|
258
|
+
return this._createResultDataFrame(results, { count: true });
|
|
259
|
+
}
|
|
260
|
+
|
|
261
|
+
/**
|
|
262
|
+
* Computes the minimum for each group.
|
|
263
|
+
*
|
|
264
|
+
* Returns a DataFrame where each row represents a group with its grouping
|
|
265
|
+
* column values and the minimum of numeric columns for that group.
|
|
266
|
+
* Non-numeric values are excluded from the calculation.
|
|
267
|
+
*
|
|
268
|
+
* @returns {DataFrame} DataFrame with group keys and minimum values
|
|
269
|
+
*
|
|
270
|
+
* @example
|
|
271
|
+
* const grouped = df.groupBy('department');
|
|
272
|
+
* const result = grouped.min();
|
|
273
|
+
* // Returns DataFrame with columns: ['department', 'id', 'age']
|
|
274
|
+
* // Each row contains the group key and minimum values for numeric columns
|
|
275
|
+
*/
|
|
276
|
+
min() {
|
|
277
|
+
return this._aggregateNumeric('min');
|
|
278
|
+
}
|
|
279
|
+
|
|
280
|
+
/**
|
|
281
|
+
* Computes the maximum for each group.
|
|
282
|
+
*
|
|
283
|
+
* Returns a DataFrame where each row represents a group with its grouping
|
|
284
|
+
* column values and the maximum of numeric columns for that group.
|
|
285
|
+
* Non-numeric values are excluded from the calculation.
|
|
286
|
+
*
|
|
287
|
+
* @returns {DataFrame} DataFrame with group keys and maximum values
|
|
288
|
+
*
|
|
289
|
+
* @example
|
|
290
|
+
* const grouped = df.groupBy('department');
|
|
291
|
+
* const result = grouped.max();
|
|
292
|
+
* // Returns DataFrame with columns: ['department', 'id', 'age']
|
|
293
|
+
* // Each row contains the group key and maximum values for numeric columns
|
|
294
|
+
*/
|
|
295
|
+
max() {
|
|
296
|
+
return this._aggregateNumeric('max');
|
|
297
|
+
}
|
|
298
|
+
|
|
299
|
+
/**
|
|
300
|
+
* Computes the standard deviation for each group.
|
|
301
|
+
*
|
|
302
|
+
* Returns a DataFrame where each row represents a group with its grouping
|
|
303
|
+
* column values and the standard deviation of numeric columns for that group.
|
|
304
|
+
* Non-numeric values are excluded from the calculation.
|
|
305
|
+
* Uses sample standard deviation (divides by n-1).
|
|
306
|
+
*
|
|
307
|
+
* @returns {DataFrame} DataFrame with group keys and standard deviation values
|
|
308
|
+
*
|
|
309
|
+
* @example
|
|
310
|
+
* const grouped = df.groupBy('department');
|
|
311
|
+
* const result = grouped.std();
|
|
312
|
+
* // Returns DataFrame with columns: ['department', 'id', 'age']
|
|
313
|
+
* // Each row contains the group key and std values for numeric columns
|
|
314
|
+
*/
|
|
315
|
+
std() {
|
|
316
|
+
return this._aggregateNumeric('std');
|
|
317
|
+
}
|
|
318
|
+
|
|
319
|
+
/**
|
|
320
|
+
* Aggregates numeric columns using the specified method.
|
|
321
|
+
*
|
|
322
|
+
* @param {string} method - The aggregation method ('mean', 'sum', 'min', 'max', 'std')
|
|
323
|
+
* @returns {DataFrame} DataFrame with aggregation results
|
|
324
|
+
* @private
|
|
325
|
+
*/
|
|
326
|
+
_aggregateNumeric(method) {
|
|
327
|
+
const results = [];
|
|
328
|
+
const groupKeys = this._getGroupKeys();
|
|
329
|
+
|
|
330
|
+
for (const groupKey of groupKeys) {
|
|
331
|
+
const rowIndices = this._getRowIndicesForKey(groupKey);
|
|
332
|
+
const aggregations = {};
|
|
333
|
+
|
|
334
|
+
// Aggregate each numeric column
|
|
335
|
+
for (let colIndex = 0; colIndex < this.dataframe.columns.length; colIndex++) {
|
|
336
|
+
const colName = this.dataframe.columns[colIndex];
|
|
337
|
+
|
|
338
|
+
// Skip grouping columns
|
|
339
|
+
if (this.groupingColumns.includes(colName)) {
|
|
340
|
+
continue;
|
|
341
|
+
}
|
|
342
|
+
|
|
343
|
+
// Extract values for this column from the group
|
|
344
|
+
const values = rowIndices.map(rowIdx => {
|
|
345
|
+
const row = this.dataframe.data[rowIdx];
|
|
346
|
+
return Array.isArray(row) ? row[colIndex] : row[colName];
|
|
347
|
+
});
|
|
348
|
+
|
|
349
|
+
// Check if column is numeric
|
|
350
|
+
const numericValues = values
|
|
351
|
+
.map(v => this._toNumeric(v))
|
|
352
|
+
.filter(v => v !== null);
|
|
353
|
+
|
|
354
|
+
if (numericValues.length > 0) {
|
|
355
|
+
aggregations[colName] = this._computeAggregation(method, numericValues);
|
|
356
|
+
}
|
|
357
|
+
}
|
|
358
|
+
|
|
359
|
+
const resultRow = this._buildResultRow(groupKey, aggregations);
|
|
360
|
+
results.push(resultRow);
|
|
361
|
+
}
|
|
362
|
+
|
|
363
|
+
return this._createResultDataFrame(results, { numeric: true });
|
|
364
|
+
}
|
|
365
|
+
|
|
366
|
+
/**
|
|
367
|
+
* Converts a value to numeric if possible.
|
|
368
|
+
*
|
|
369
|
+
* @param {*} value - The value to convert
|
|
370
|
+
* @returns {number|null} The numeric value or null if not convertible
|
|
371
|
+
* @private
|
|
372
|
+
*/
|
|
373
|
+
_toNumeric(value) {
|
|
374
|
+
if (value === null || value === undefined) {
|
|
375
|
+
return null;
|
|
376
|
+
}
|
|
377
|
+
|
|
378
|
+
if (typeof value === 'number') {
|
|
379
|
+
return value;
|
|
380
|
+
}
|
|
381
|
+
|
|
382
|
+
if (typeof value === 'string') {
|
|
383
|
+
const num = parseFloat(value);
|
|
384
|
+
return isNaN(num) ? null : num;
|
|
385
|
+
}
|
|
386
|
+
|
|
387
|
+
return null;
|
|
388
|
+
}
|
|
389
|
+
|
|
390
|
+
/**
|
|
391
|
+
* Computes an aggregation on numeric values.
|
|
392
|
+
*
|
|
393
|
+
* @param {string} method - The aggregation method
|
|
394
|
+
* @param {Array<number>} values - The numeric values to aggregate
|
|
395
|
+
* @returns {number} The aggregation result
|
|
396
|
+
* @private
|
|
397
|
+
*/
|
|
398
|
+
_computeAggregation(method, values) {
|
|
399
|
+
if (values.length === 0) {
|
|
400
|
+
return null;
|
|
401
|
+
}
|
|
402
|
+
|
|
403
|
+
switch (method) {
|
|
404
|
+
case 'mean':
|
|
405
|
+
return values.reduce((a, b) => a + b, 0) / values.length;
|
|
406
|
+
case 'sum':
|
|
407
|
+
return values.reduce((a, b) => a + b, 0);
|
|
408
|
+
case 'min':
|
|
409
|
+
return Math.min(...values);
|
|
410
|
+
case 'max':
|
|
411
|
+
return Math.max(...values);
|
|
412
|
+
case 'std':
|
|
413
|
+
if (values.length < 2) {
|
|
414
|
+
return null;
|
|
415
|
+
}
|
|
416
|
+
const mean = values.reduce((a, b) => a + b, 0) / values.length;
|
|
417
|
+
const variance = values.reduce((a, b) => a + Math.pow(b - mean, 2), 0) / (values.length - 1);
|
|
418
|
+
return Math.sqrt(variance);
|
|
419
|
+
default:
|
|
420
|
+
return null;
|
|
421
|
+
}
|
|
422
|
+
}
|
|
423
|
+
|
|
424
|
+
/**
|
|
425
|
+
* Gets all unique group keys.
|
|
426
|
+
*
|
|
427
|
+
* @returns {Array} Array of group keys
|
|
428
|
+
* @private
|
|
429
|
+
*/
|
|
430
|
+
_getGroupKeys() {
|
|
431
|
+
if (this.groupingColumns.length === 1) {
|
|
432
|
+
return Array.from(this.groups.keys());
|
|
433
|
+
} else {
|
|
434
|
+
return this._extractGroupKeys(this.groups);
|
|
435
|
+
}
|
|
436
|
+
}
|
|
437
|
+
|
|
438
|
+
/**
|
|
439
|
+
* Extracts all group keys from hierarchical structure.
|
|
440
|
+
*
|
|
441
|
+
* @param {Object} obj - The group object
|
|
442
|
+
* @param {Array} prefix - The prefix for hierarchical keys
|
|
443
|
+
* @returns {Array} Array of group keys
|
|
444
|
+
* @private
|
|
445
|
+
*/
|
|
446
|
+
_extractGroupKeys(obj, prefix = []) {
|
|
447
|
+
const keys = [];
|
|
448
|
+
|
|
449
|
+
for (const key in obj) {
|
|
450
|
+
const value = obj[key];
|
|
451
|
+
const currentPrefix = [...prefix, key];
|
|
452
|
+
|
|
453
|
+
if (Array.isArray(value)) {
|
|
454
|
+
// Leaf node - this is a complete group key
|
|
455
|
+
keys.push(currentPrefix);
|
|
456
|
+
} else {
|
|
457
|
+
// Intermediate node - recurse
|
|
458
|
+
keys.push(...this._extractGroupKeys(value, currentPrefix));
|
|
459
|
+
}
|
|
460
|
+
}
|
|
461
|
+
|
|
462
|
+
return keys;
|
|
463
|
+
}
|
|
464
|
+
|
|
465
|
+
/**
|
|
466
|
+
* Gets row indices for a specific group key.
|
|
467
|
+
*
|
|
468
|
+
* @param {string|Array} groupKey - The group key
|
|
469
|
+
* @returns {Array<number>} Array of row indices in this group
|
|
470
|
+
* @private
|
|
471
|
+
*/
|
|
472
|
+
_getRowIndicesForKey(groupKey) {
|
|
473
|
+
if (this.groupingColumns.length === 1) {
|
|
474
|
+
return this.groups.get(String(groupKey)) || [];
|
|
475
|
+
} else {
|
|
476
|
+
// Navigate hierarchical structure
|
|
477
|
+
let current = this.groups;
|
|
478
|
+
const keyArray = Array.isArray(groupKey) ? groupKey : [groupKey];
|
|
479
|
+
|
|
480
|
+
for (const key of keyArray) {
|
|
481
|
+
current = current[String(key)];
|
|
482
|
+
if (!current) {
|
|
483
|
+
return [];
|
|
484
|
+
}
|
|
485
|
+
}
|
|
486
|
+
|
|
487
|
+
return Array.isArray(current) ? current : [];
|
|
488
|
+
}
|
|
489
|
+
}
|
|
490
|
+
|
|
491
|
+
/**
|
|
492
|
+
* Builds a result row with group key and aggregation values.
|
|
493
|
+
*
|
|
494
|
+
* @param {string|Array} groupKey - The group key
|
|
495
|
+
* @param {Object} aggregations - The aggregation values
|
|
496
|
+
* @returns {Array} The result row as an array
|
|
497
|
+
* @private
|
|
498
|
+
*/
|
|
499
|
+
_buildResultRow(groupKey, aggregations) {
|
|
500
|
+
const keyArray = Array.isArray(groupKey) ? groupKey : [groupKey];
|
|
501
|
+
const row = {};
|
|
502
|
+
|
|
503
|
+
// Add grouping column values
|
|
504
|
+
for (let i = 0; i < this.groupingColumns.length; i++) {
|
|
505
|
+
row[this.groupingColumns[i]] = keyArray[i];
|
|
506
|
+
}
|
|
507
|
+
|
|
508
|
+
// Add aggregation values
|
|
509
|
+
for (const colName in aggregations) {
|
|
510
|
+
row[colName] = aggregations[colName];
|
|
511
|
+
}
|
|
512
|
+
|
|
513
|
+
return row;
|
|
514
|
+
}
|
|
515
|
+
|
|
516
|
+
/**
|
|
517
|
+
* Creates a result DataFrame from aggregation results.
|
|
518
|
+
*
|
|
519
|
+
* @param {Array<Array>} results - The result rows
|
|
520
|
+
* @param {Object} options - Options for result creation
|
|
521
|
+
* @returns {DataFrame} The result DataFrame
|
|
522
|
+
* @private
|
|
523
|
+
*/
|
|
524
|
+
_createResultDataFrame(results, options = {}) {
|
|
525
|
+
// Import here to avoid circular dependency
|
|
526
|
+
const DataFrameFactory = require('../dataframe/dataframe');
|
|
527
|
+
|
|
528
|
+
// Build column names: grouping columns + aggregated columns
|
|
529
|
+
const resultColumns = [...this.groupingColumns];
|
|
530
|
+
|
|
531
|
+
if (options.count) {
|
|
532
|
+
resultColumns.push('count');
|
|
533
|
+
} else {
|
|
534
|
+
// Add non-grouping columns
|
|
535
|
+
for (const colName of this.dataframe.columns) {
|
|
536
|
+
if (!this.groupingColumns.includes(colName)) {
|
|
537
|
+
resultColumns.push(colName);
|
|
538
|
+
}
|
|
539
|
+
}
|
|
540
|
+
}
|
|
541
|
+
|
|
542
|
+
// Create DataFrame from results
|
|
543
|
+
if (results.length === 0) {
|
|
544
|
+
// Return empty DataFrame with correct columns
|
|
545
|
+
const emptyDf = DataFrameFactory([]);
|
|
546
|
+
emptyDf.columns = resultColumns;
|
|
547
|
+
emptyDf.cols = resultColumns.length;
|
|
548
|
+
emptyDf.setDataForColumns();
|
|
549
|
+
return emptyDf;
|
|
550
|
+
}
|
|
551
|
+
|
|
552
|
+
// Convert object rows to array rows in the correct column order
|
|
553
|
+
const arrayResults = results.map(row => {
|
|
554
|
+
return resultColumns.map(col => row[col] !== undefined ? row[col] : null);
|
|
555
|
+
});
|
|
556
|
+
|
|
557
|
+
return DataFrameFactory(arrayResults, resultColumns);
|
|
558
|
+
}
|
|
559
|
+
}
|
|
560
|
+
|
|
561
|
+
module.exports = GroupBy;
|
|
@@ -0,0 +1,106 @@
|
|
|
1
|
+
const messages = require('../messages/messages')
|
|
2
|
+
|
|
3
|
+
// https://javascript.info/mixins
|
|
4
|
+
|
|
5
|
+
|
|
6
|
+
function getDateFormats(sep) {
|
|
7
|
+
/* --- HardCoding (Bad way) ---
|
|
8
|
+
let formats = [
|
|
9
|
+
`yyyy${sep}mm${sep}dd`,
|
|
10
|
+
`yyyy${sep}dd${sep}mm`,
|
|
11
|
+
`dd${sep}mm${sep}yyyy`,
|
|
12
|
+
`dd${sep}yyyy${sep}mm`,
|
|
13
|
+
`mm${sep}dd${sep}yyyy`,
|
|
14
|
+
`mm${sep}yyyy${sep}dd`,
|
|
15
|
+
]
|
|
16
|
+
*/
|
|
17
|
+
|
|
18
|
+
// Good way
|
|
19
|
+
let specifiers = ["dd", "mm", "yyyy"]
|
|
20
|
+
let formats = []
|
|
21
|
+
|
|
22
|
+
for(let indexX = 0; indexX < specifiers.length; ++indexX) {
|
|
23
|
+
let dateElems = [specifiers[indexX]]
|
|
24
|
+
|
|
25
|
+
for(let indexY = 0; indexY < specifiers.length; ++indexY) {
|
|
26
|
+
if(indexY === indexX)
|
|
27
|
+
continue
|
|
28
|
+
|
|
29
|
+
dateElems.push(specifiers[indexY])
|
|
30
|
+
}
|
|
31
|
+
formats.push(dateElems.join(sep))
|
|
32
|
+
}
|
|
33
|
+
|
|
34
|
+
return formats
|
|
35
|
+
}
|
|
36
|
+
|
|
37
|
+
|
|
38
|
+
function getDateSeparators() {
|
|
39
|
+
let seps = [
|
|
40
|
+
"-",
|
|
41
|
+
"/"
|
|
42
|
+
]
|
|
43
|
+
|
|
44
|
+
return seps
|
|
45
|
+
}
|
|
46
|
+
|
|
47
|
+
function getZeroFilled(dateElem, fillZero) {
|
|
48
|
+
if(fillZero) {
|
|
49
|
+
if(`${dateElem}`.length === 1) {
|
|
50
|
+
dateElem = '0' + dateElem
|
|
51
|
+
}
|
|
52
|
+
}
|
|
53
|
+
|
|
54
|
+
return dateElem
|
|
55
|
+
}
|
|
56
|
+
|
|
57
|
+
// New - working way
|
|
58
|
+
function getDateValue(date, format, sep, fillZero) {
|
|
59
|
+
let specifiers = format.split('-')
|
|
60
|
+
|
|
61
|
+
let index = 0
|
|
62
|
+
for(;index < specifiers.length;) {
|
|
63
|
+
switch(specifiers[index]) {
|
|
64
|
+
case "mm":
|
|
65
|
+
specifiers[index] = getZeroFilled(date.getMonth(), fillZero) // Saving space by using the current input array as output array
|
|
66
|
+
break
|
|
67
|
+
case "dd":
|
|
68
|
+
specifiers[index] = getZeroFilled(date.getDate(), fillZero)
|
|
69
|
+
break
|
|
70
|
+
case "yyyy":
|
|
71
|
+
specifiers[index] = date.getFullYear()
|
|
72
|
+
}
|
|
73
|
+
++index
|
|
74
|
+
}
|
|
75
|
+
|
|
76
|
+
return specifiers.join(sep)
|
|
77
|
+
}
|
|
78
|
+
|
|
79
|
+
|
|
80
|
+
function dateRange(n = 1, format = "yyyy-mm-dd", sep = '-', fillZero=false) {
|
|
81
|
+
let seps = getDateSeparators()
|
|
82
|
+
|
|
83
|
+
if(seps.indexOf(sep) < 0) {
|
|
84
|
+
messages.error(`Invalid separator specified as \`${sep}\``)
|
|
85
|
+
return
|
|
86
|
+
}
|
|
87
|
+
|
|
88
|
+
let formats = getDateFormats(sep)
|
|
89
|
+
|
|
90
|
+
if(formats.indexOf(format) < 0) {
|
|
91
|
+
messages.error(`Invalid date format specified as \`${format}\``)
|
|
92
|
+
return
|
|
93
|
+
}
|
|
94
|
+
|
|
95
|
+
dateRanges = []
|
|
96
|
+
let date = new Date()
|
|
97
|
+
|
|
98
|
+
for(let index=0; index < n; ++index) {
|
|
99
|
+
dateRanges.push(getDateValue(date, format, sep, fillZero))
|
|
100
|
+
date.setDate(date.getDate() + 1)
|
|
101
|
+
}
|
|
102
|
+
|
|
103
|
+
return dateRanges
|
|
104
|
+
}
|
|
105
|
+
|
|
106
|
+
module.exports = dateRange
|
package/src/index.js
CHANGED
|
@@ -4,6 +4,7 @@
|
|
|
4
4
|
|
|
5
5
|
const Series = require('./series/series')
|
|
6
6
|
const DataFrame = require('./dataframe/dataframe')
|
|
7
|
+
const dateRange = require("./features/dateRange")
|
|
7
8
|
const path = require('path')
|
|
8
9
|
const fs = require('fs')
|
|
9
10
|
|
|
@@ -42,7 +43,11 @@ function readCsv(csvPath) {
|
|
|
42
43
|
}
|
|
43
44
|
|
|
44
45
|
module.exports = {
|
|
46
|
+
// Node pandas supported data types
|
|
45
47
|
Series,
|
|
46
48
|
DataFrame,
|
|
47
|
-
|
|
49
|
+
|
|
50
|
+
// Features
|
|
51
|
+
readCsv,
|
|
52
|
+
dateRange
|
|
48
53
|
}
|