node-pandas 1.0.4 → 2.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.kiro/agents/git-committer-agent.md +208 -0
- package/.kiro/agents/npm-publisher-agent.md +501 -0
- package/.kiro/publish-status-2.0.0.md +134 -0
- package/.kiro/published-versions.md +11 -0
- package/.kiro/specs/pandas-like-enhancements/.config.kiro +1 -0
- package/.kiro/specs/pandas-like-enhancements/design.md +377 -0
- package/.kiro/specs/pandas-like-enhancements/requirements.md +257 -0
- package/.kiro/specs/pandas-like-enhancements/tasks.md +477 -0
- package/CHANGELOG.md +42 -0
- package/README.md +375 -103
- package/TESTING_SETUP.md +183 -0
- package/jest.config.js +25 -0
- package/package.json +11 -3
- package/src/bases/CsvBase.js +4 -13
- package/src/dataframe/dataframe.js +596 -64
- package/src/features/GroupBy.js +561 -0
- package/src/features/dateRange.js +106 -0
- package/src/index.js +6 -1
- package/src/series/series.js +690 -14
- package/src/utils/errors.js +314 -0
- package/src/utils/getIndicesColumns.js +1 -1
- package/src/utils/getTransformedDataList.js +1 -1
- package/src/utils/logger.js +259 -0
- package/src/utils/typeDetection.js +339 -0
- package/src/utils/utils.js +5 -1
- package/src/utils/validation.js +450 -0
- package/tests/README.md +151 -0
- package/tests/integration/.gitkeep +0 -0
- package/tests/integration/README.md +3 -0
- package/tests/property/.gitkeep +0 -0
- package/tests/property/README.md +3 -0
- package/tests/setup.js +16 -0
- package/tests/test.js +58 -21
- package/tests/unit/.gitkeep +0 -0
- package/tests/unit/README.md +3 -0
- package/tests/unit/dataframe.test.js +1141 -0
- package/tests/unit/example.test.js +23 -0
- package/tests/unit/series.test.js +441 -0
- package/tests/unit/tocsv.test.js +838 -0
- package/tests/utils/testAssertions.js +143 -0
- package/tests/utils/testDataGenerator.js +123 -0
package/README.md
CHANGED
|
@@ -11,10 +11,12 @@ An [npm package](https://www.npmjs.com/package/node-pandas) that incorporates mi
|
|
|
11
11
|
> For now, you can
|
|
12
12
|
>
|
|
13
13
|
> + create Series(using 1D array), DataFrame(using 2D array or file `readCsv()`)
|
|
14
|
+
> + access Series object using exactly an array like syntax (indexing, looping etc.)
|
|
14
15
|
> + view columns, index
|
|
15
16
|
> + save DataFrame in a CSV file `toCsv()`
|
|
16
17
|
> + access elements using indices/column names
|
|
17
18
|
> + view contents in pretty tabular form on console
|
|
19
|
+
> + access DataFrame's columns using column names
|
|
18
20
|
|
|
19
21
|
## Installation
|
|
20
22
|
|
|
@@ -41,6 +43,12 @@ An [npm package](https://www.npmjs.com/package/node-pandas) that incorporates mi
|
|
|
41
43
|
|
|
42
44
|
4. [Example 4 - Accessing columns (Retrieving columns using column name)](#df-ex4) - `df.fullName -> ["R A", "B R", "P K"]`
|
|
43
45
|
|
|
46
|
+
5. [Example 5 - Selecting specific columns using select()](#df-ex5)
|
|
47
|
+
|
|
48
|
+
6. [Example 6 - Filtering DataFrame rows using filter()](#df-ex6)
|
|
49
|
+
|
|
50
|
+
7. [Example 7 - Grouping and aggregating data using groupBy()](#df-ex7)
|
|
51
|
+
|
|
44
52
|
<hr>
|
|
45
53
|
|
|
46
54
|
## Getting started
|
|
@@ -65,7 +73,7 @@ NodeSeries [
|
|
|
65
73
|
-3,
|
|
66
74
|
0,
|
|
67
75
|
5,
|
|
68
|
-
|
|
76
|
+
]
|
|
69
77
|
>
|
|
70
78
|
> s.show
|
|
71
79
|
┌─────────┬────────┐
|
|
@@ -111,21 +119,18 @@ undefined
|
|
|
111
119
|
... ['Wes McKinney', 3, 'Pandas'],
|
|
112
120
|
... ['Ken Thompson', 1, 'B language']
|
|
113
121
|
... ], columns)
|
|
114
|
-
NodeDataFrame
|
|
122
|
+
NodeDataFrame [
|
|
123
|
+
[ 'Guido Van Rossum', 6, 'Python' ],
|
|
124
|
+
[ 'Ryan Dahl', 5, 'Node.js' ],
|
|
125
|
+
[ 'Anders Hezlsberg', 7, 'TypeScript' ],
|
|
126
|
+
[ 'Wes McKinney', 3, 'Pandas' ],
|
|
127
|
+
[ 'Ken Thompson', 1, 'B language' ],
|
|
115
128
|
columns: [ 'full_name', 'user_id', 'technology' ],
|
|
116
129
|
index: [ 0, 1, 2, 3, 4 ],
|
|
117
|
-
|
|
118
|
-
|
|
119
|
-
|
|
120
|
-
|
|
121
|
-
{ full_name: 'Ryan Dahl', user_id: 5, technology: 'Node.js' },
|
|
122
|
-
{ full_name: 'Anders Hezlsberg',
|
|
123
|
-
user_id: 7,
|
|
124
|
-
technology: 'TypeScript' },
|
|
125
|
-
{ full_name: 'Wes McKinney', user_id: 3, technology: 'Pandas' },
|
|
126
|
-
{ full_name: 'Ken Thompson',
|
|
127
|
-
user_id: 1,
|
|
128
|
-
technology: 'B language' } ] }
|
|
130
|
+
rows: 5,
|
|
131
|
+
cols: 3,
|
|
132
|
+
out: true
|
|
133
|
+
]
|
|
129
134
|
>
|
|
130
135
|
> df.show
|
|
131
136
|
┌─────────┬────────────────────┬─────────┬──────────────┐
|
|
@@ -181,37 +186,52 @@ Now have a look the below statements executed on Node REPL.
|
|
|
181
186
|
undefined
|
|
182
187
|
>
|
|
183
188
|
> df = pd.readCsv("/Users/hygull/Projects/NodeJS/node-pandas/docs/csvs/devs.csv")
|
|
184
|
-
NodeDataFrame
|
|
189
|
+
NodeDataFrame [
|
|
190
|
+
{
|
|
191
|
+
fullName: 'Ken Thompson',
|
|
192
|
+
Profession: 'C developer',
|
|
193
|
+
Language: 'C',
|
|
194
|
+
DevId: 1122
|
|
195
|
+
},
|
|
196
|
+
{
|
|
197
|
+
fullName: 'Ron Wilson',
|
|
198
|
+
Profession: 'Ruby developer',
|
|
199
|
+
Language: 'Ruby',
|
|
200
|
+
DevId: 4433
|
|
201
|
+
},
|
|
202
|
+
{
|
|
203
|
+
fullName: 'Jeff Thomas',
|
|
204
|
+
Profession: 'Java developer',
|
|
205
|
+
Language: 'Java',
|
|
206
|
+
DevId: 8899
|
|
207
|
+
},
|
|
208
|
+
{
|
|
209
|
+
fullName: 'Rishikesh Agrawani',
|
|
210
|
+
Profession: 'Python developer',
|
|
211
|
+
Language: 'Python',
|
|
212
|
+
DevId: 6677
|
|
213
|
+
},
|
|
214
|
+
{
|
|
215
|
+
fullName: 'Kylie Dwine',
|
|
216
|
+
Profession: 'C++',
|
|
217
|
+
Language: 'C++ Developer',
|
|
218
|
+
DevId: 11
|
|
219
|
+
},
|
|
220
|
+
{
|
|
221
|
+
fullName: 'Briella Brown',
|
|
222
|
+
Profession: 'JavaScirpt developer',
|
|
223
|
+
Language: 'JavaScript',
|
|
224
|
+
DevId: 8844
|
|
225
|
+
},
|
|
185
226
|
columns: [ 'fullName', 'Profession', 'Language', 'DevId' ],
|
|
186
|
-
index: [
|
|
187
|
-
|
|
188
|
-
|
|
189
|
-
|
|
190
|
-
|
|
191
|
-
DevId: 1122 },
|
|
192
|
-
{ fullName: 'Ron Wilson',
|
|
193
|
-
Profession: 'Ruby developer',
|
|
194
|
-
Language: 'Ruby',
|
|
195
|
-
DevId: 4433 },
|
|
196
|
-
{ fullName: 'Jeff Thomas',
|
|
197
|
-
Profession: 'Java developer',
|
|
198
|
-
Language: 'Java',
|
|
199
|
-
DevId: 8899 },
|
|
200
|
-
{ fullName: 'Rishikesh Agrawani',
|
|
201
|
-
Profession: 'Python developer',
|
|
202
|
-
Language: 'Python',
|
|
203
|
-
DevId: 6677 },
|
|
204
|
-
{ fullName: 'Kylie Dwine',
|
|
205
|
-
Profession: 'C++',
|
|
206
|
-
Language: 'C++ Developer',
|
|
207
|
-
DevId: 11 },
|
|
208
|
-
{ fullName: 'Briella Brown',
|
|
209
|
-
Profession: 'JavaScript developer',
|
|
210
|
-
Language: 'JavaScript',
|
|
211
|
-
DevId: 8844 } ] }
|
|
227
|
+
index: [ 0, 1, 2, 3, 4, 5 ],
|
|
228
|
+
rows: 6,
|
|
229
|
+
cols: 4,
|
|
230
|
+
out: true
|
|
231
|
+
]
|
|
212
232
|
>
|
|
213
233
|
> df.index
|
|
214
|
-
[
|
|
234
|
+
[ 0, 1, 2, 3, 4, 5 ]
|
|
215
235
|
>
|
|
216
236
|
> df.columns
|
|
217
237
|
[ 'fullName', 'Profession', 'Language', 'DevId' ]
|
|
@@ -253,59 +273,48 @@ undefined
|
|
|
253
273
|
>
|
|
254
274
|
> df = pd.readCsv("./docs/csvs/devs.csv")
|
|
255
275
|
NodeDataFrame [
|
|
256
|
-
{
|
|
276
|
+
{
|
|
277
|
+
fullName: 'Ken Thompson',
|
|
257
278
|
Profession: 'C developer',
|
|
258
279
|
Language: 'C',
|
|
259
|
-
DevId: 1122
|
|
260
|
-
|
|
280
|
+
DevId: 1122
|
|
281
|
+
},
|
|
282
|
+
{
|
|
283
|
+
fullName: 'Ron Wilson',
|
|
261
284
|
Profession: 'Ruby developer',
|
|
262
285
|
Language: 'Ruby',
|
|
263
|
-
DevId: 4433
|
|
264
|
-
|
|
286
|
+
DevId: 4433
|
|
287
|
+
},
|
|
288
|
+
{
|
|
289
|
+
fullName: 'Jeff Thomas',
|
|
265
290
|
Profession: 'Java developer',
|
|
266
291
|
Language: 'Java',
|
|
267
|
-
DevId: 8899
|
|
268
|
-
|
|
292
|
+
DevId: 8899
|
|
293
|
+
},
|
|
294
|
+
{
|
|
295
|
+
fullName: 'Rishikesh Agrawani',
|
|
269
296
|
Profession: 'Python developer',
|
|
270
297
|
Language: 'Python',
|
|
271
|
-
DevId: 6677
|
|
272
|
-
|
|
298
|
+
DevId: 6677
|
|
299
|
+
},
|
|
300
|
+
{
|
|
301
|
+
fullName: 'Kylie Dwine',
|
|
273
302
|
Profession: 'C++',
|
|
274
303
|
Language: 'C++ Developer',
|
|
275
|
-
DevId: 11
|
|
276
|
-
|
|
277
|
-
|
|
304
|
+
DevId: 11
|
|
305
|
+
},
|
|
306
|
+
{
|
|
307
|
+
fullName: 'Briella Brown',
|
|
308
|
+
Profession: 'JavaScirpt developer',
|
|
278
309
|
Language: 'JavaScript',
|
|
279
|
-
DevId: 8844
|
|
310
|
+
DevId: 8844
|
|
311
|
+
},
|
|
280
312
|
columns: [ 'fullName', 'Profession', 'Language', 'DevId' ],
|
|
281
|
-
index: [
|
|
282
|
-
_data: [ { fullName: 'Ken Thompson',
|
|
283
|
-
Profession: 'C developer',
|
|
284
|
-
Language: 'C',
|
|
285
|
-
DevId: 1122 },
|
|
286
|
-
{ fullName: 'Ron Wilson',
|
|
287
|
-
Profession: 'Ruby developer',
|
|
288
|
-
Language: 'Ruby',
|
|
289
|
-
DevId: 4433 },
|
|
290
|
-
{ fullName: 'Jeff Thomas',
|
|
291
|
-
Profession: 'Java developer',
|
|
292
|
-
Language: 'Java',
|
|
293
|
-
DevId: 8899 },
|
|
294
|
-
{ fullName: 'Rishikesh Agrawani',
|
|
295
|
-
Profession: 'Python developer',
|
|
296
|
-
Language: 'Python',
|
|
297
|
-
DevId: 6677 },
|
|
298
|
-
{ fullName: 'Kylie Dwine',
|
|
299
|
-
Profession: 'C++',
|
|
300
|
-
Language: 'C++ Developer',
|
|
301
|
-
DevId: 11 },
|
|
302
|
-
{ fullName: 'Briella Brown',
|
|
303
|
-
Profession: 'JavaScript developer',
|
|
304
|
-
Language: 'JavaScript',
|
|
305
|
-
DevId: 8844 } ],
|
|
313
|
+
index: [ 0, 1, 2, 3, 4, 5 ],
|
|
306
314
|
rows: 6,
|
|
307
315
|
cols: 4,
|
|
308
|
-
out: true
|
|
316
|
+
out: true
|
|
317
|
+
]
|
|
309
318
|
>
|
|
310
319
|
> df.cols
|
|
311
320
|
4
|
|
@@ -314,7 +323,7 @@ NodeDataFrame [
|
|
|
314
323
|
> df.columns
|
|
315
324
|
[ 'fullName', 'Profession', 'Language', 'DevId' ]
|
|
316
325
|
> df.index
|
|
317
|
-
[
|
|
326
|
+
[ 0, 1, 2, 3, 4, 5 ]
|
|
318
327
|
>
|
|
319
328
|
> df.toCsv("/Users/hygull/Desktop/newDevs.csv")
|
|
320
329
|
undefined
|
|
@@ -361,38 +370,300 @@ df.show // View DataFrame in tabular form
|
|
|
361
370
|
└─────────┴──────────────────────┴────────────────────────┴─────────────────┴───────┘
|
|
362
371
|
*/
|
|
363
372
|
|
|
364
|
-
console.log(df['fullName'])
|
|
373
|
+
console.log(df['fullName'])
|
|
365
374
|
/*
|
|
366
|
-
[
|
|
367
|
-
|
|
368
|
-
|
|
369
|
-
|
|
370
|
-
|
|
371
|
-
|
|
372
|
-
|
|
375
|
+
NodeSeries [
|
|
376
|
+
'Ken Thompson',
|
|
377
|
+
'Ron Wilson',
|
|
378
|
+
'Jeff Thomas',
|
|
379
|
+
'Rishikesh Agrawani',
|
|
380
|
+
'Kylie Dwine',
|
|
381
|
+
'Briella Brown'
|
|
373
382
|
]
|
|
374
383
|
*/
|
|
375
384
|
|
|
376
|
-
console.log(df.
|
|
385
|
+
console.log(df.DevId)
|
|
386
|
+
/*
|
|
387
|
+
NodeSeries [ 1122, 4433, 8899, 6677, 11, 8844 ]
|
|
388
|
+
*/
|
|
389
|
+
|
|
390
|
+
let languages = df.Language
|
|
391
|
+
console.log(languages)
|
|
377
392
|
/*
|
|
378
|
-
[
|
|
393
|
+
NodeSeries [
|
|
394
|
+
'C',
|
|
395
|
+
'Ruby',
|
|
396
|
+
'Java',
|
|
397
|
+
'Python',
|
|
398
|
+
'C++ Developer',
|
|
399
|
+
'JavaScript'
|
|
400
|
+
]
|
|
379
401
|
*/
|
|
380
402
|
|
|
381
|
-
console.log(
|
|
403
|
+
console.log(languages[0], '&', languages[1]) // C & Ruby
|
|
404
|
+
|
|
405
|
+
|
|
406
|
+
let professions = df.Profession
|
|
407
|
+
console.log(professions)
|
|
382
408
|
/*
|
|
383
|
-
[
|
|
384
|
-
|
|
385
|
-
|
|
386
|
-
|
|
387
|
-
|
|
388
|
-
|
|
389
|
-
|
|
409
|
+
NodeSeries [
|
|
410
|
+
'C developer',
|
|
411
|
+
'Ruby developer',
|
|
412
|
+
'Java developer',
|
|
413
|
+
'Python developer',
|
|
414
|
+
'C++',
|
|
415
|
+
'JavaScirpt developer'
|
|
390
416
|
]
|
|
391
417
|
*/
|
|
392
418
|
|
|
393
|
-
|
|
394
|
-
|
|
395
|
-
|
|
419
|
+
// Iterate like arrays
|
|
420
|
+
for(let profession of professions) {
|
|
421
|
+
console.log(profession)
|
|
422
|
+
}
|
|
423
|
+
/*
|
|
424
|
+
C developer
|
|
425
|
+
Ruby developer
|
|
426
|
+
Java developer
|
|
427
|
+
Python developer
|
|
428
|
+
C++
|
|
429
|
+
JavaScirpt developer
|
|
430
|
+
*/
|
|
431
|
+
```
|
|
432
|
+
|
|
433
|
+
<hr>
|
|
434
|
+
|
|
435
|
+
<h3 id='df-ex5'><code>Example 5 - Selecting specific columns using select()</code></h3>
|
|
436
|
+
|
|
437
|
+
> **Note:** The `select()` method returns a new DataFrame containing only the specified columns.
|
|
438
|
+
|
|
439
|
+
```javascript
|
|
440
|
+
const pd = require("node-pandas")
|
|
441
|
+
|
|
442
|
+
// Create a DataFrame with employee data
|
|
443
|
+
const df = pd.DataFrame([
|
|
444
|
+
['Rishikesh Agrawani', 32, 'Engineering'],
|
|
445
|
+
['Hemkesh Agrawani', 30, 'Marketing'],
|
|
446
|
+
['Malinikesh Agrawani', 28, 'Sales']
|
|
447
|
+
], ['name', 'age', 'department'])
|
|
448
|
+
|
|
449
|
+
df.show
|
|
450
|
+
/*
|
|
451
|
+
┌─────────┬──────────────────────┬─────┬──────────────┐
|
|
452
|
+
│ (index) │ name │ age │ department │
|
|
453
|
+
├─────────┼──────────────────────┼─────┼──────────────┤
|
|
454
|
+
│ 0 │ 'Rishikesh Agrawani' │ 32 │ 'Engineering'│
|
|
455
|
+
│ 1 │ 'Hemkesh Agrawani' │ 30 │ 'Marketing' │
|
|
456
|
+
│ 2 │ 'Malinikesh Agrawani'│ 28 │ 'Sales' │
|
|
457
|
+
└─────────┴──────────────────────┴─────┴──────────────┘
|
|
458
|
+
*/
|
|
459
|
+
|
|
460
|
+
// Select a single column
|
|
461
|
+
const nameOnly = df.select(['name'])
|
|
462
|
+
nameOnly.show
|
|
463
|
+
/*
|
|
464
|
+
┌─────────┬──────────────────────┐
|
|
465
|
+
│ (index) │ name │
|
|
466
|
+
├─────────┼──────────────────────┤
|
|
467
|
+
│ 0 │ 'Rishikesh Agrawani' │
|
|
468
|
+
│ 1 │ 'Hemkesh Agrawani' │
|
|
469
|
+
│ 2 │ 'Malinikesh Agrawani'│
|
|
470
|
+
└─────────┴──────────────────────┘
|
|
471
|
+
*/
|
|
472
|
+
|
|
473
|
+
// Select multiple columns
|
|
474
|
+
const nameAndAge = df.select(['name', 'age'])
|
|
475
|
+
nameAndAge.show
|
|
476
|
+
/*
|
|
477
|
+
┌─────────┬──────────────────────┬─────┐
|
|
478
|
+
│ (index) │ name │ age │
|
|
479
|
+
├─────────┼──────────────────────┼─────┤
|
|
480
|
+
│ 0 │ 'Rishikesh Agrawani' │ 32 │
|
|
481
|
+
│ 1 │ 'Hemkesh Agrawani' │ 30 │
|
|
482
|
+
│ 2 │ 'Malinikesh Agrawani'│ 28 │
|
|
483
|
+
└─────────┴──────────────────────┴─────┘
|
|
484
|
+
*/
|
|
485
|
+
|
|
486
|
+
// Original DataFrame remains unchanged
|
|
487
|
+
console.log(df.columns) // ['name', 'age', 'department']
|
|
488
|
+
```
|
|
489
|
+
|
|
490
|
+
<hr>
|
|
491
|
+
|
|
492
|
+
<h3 id='df-ex6'><code>Example 6 - Filtering DataFrame rows using filter()</code></h3>
|
|
493
|
+
|
|
494
|
+
> **Note:** The `filter()` method returns a new DataFrame containing only rows that match the condition. Multiple filters can be chained together.
|
|
495
|
+
|
|
496
|
+
```javascript
|
|
497
|
+
const pd = require("node-pandas")
|
|
498
|
+
|
|
499
|
+
// Create a DataFrame with employee data
|
|
500
|
+
const df = pd.DataFrame([
|
|
501
|
+
['Rishikesh Agrawani', 32, 'Engineering'],
|
|
502
|
+
['Hemkesh Agrawani', 30, 'Marketing'],
|
|
503
|
+
['Malinikesh Agrawani', 28, 'Sales']
|
|
504
|
+
], ['name', 'age', 'department'])
|
|
505
|
+
|
|
506
|
+
df.show
|
|
507
|
+
/*
|
|
508
|
+
┌─────────┬──────────────────────┬─────┬──────────────┐
|
|
509
|
+
│ (index) │ name │ age │ department │
|
|
510
|
+
├─────────┼──────────────────────┼─────┼──────────────┤
|
|
511
|
+
│ 0 │ 'Rishikesh Agrawani' │ 32 │ 'Engineering'│
|
|
512
|
+
│ 1 │ 'Hemkesh Agrawani' │ 30 │ 'Marketing' │
|
|
513
|
+
│ 2 │ 'Malinikesh Agrawani'│ 28 │ 'Sales' │
|
|
514
|
+
└─────────┴──────────────────────┴─────┴──────────────┘
|
|
515
|
+
*/
|
|
516
|
+
|
|
517
|
+
// Filter rows where age is greater than 28
|
|
518
|
+
const over28 = df.filter(row => row.age > 28)
|
|
519
|
+
over28.show
|
|
520
|
+
/*
|
|
521
|
+
┌─────────┬──────────────────────┬─────┬──────────────┐
|
|
522
|
+
│ (index) │ name │ age │ department │
|
|
523
|
+
├─────────┼──────────────────────┼─────┼──────────────┤
|
|
524
|
+
│ 0 │ 'Rishikesh Agrawani' │ 32 │ 'Engineering'│
|
|
525
|
+
│ 1 │ 'Hemkesh Agrawani' │ 30 │ 'Marketing' │
|
|
526
|
+
└─────────┴──────────────────────┴─────┴──────────────┘
|
|
527
|
+
*/
|
|
528
|
+
|
|
529
|
+
// Filter rows where department is 'Engineering'
|
|
530
|
+
const engineering = df.filter(row => row.department === 'Engineering')
|
|
531
|
+
engineering.show
|
|
532
|
+
/*
|
|
533
|
+
┌─────────┬──────────────────────┬─────┬──────────────┐
|
|
534
|
+
│ (index) │ name │ age │ department │
|
|
535
|
+
├─────────┼──────────────────────┼─────┼──────────────┤
|
|
536
|
+
│ 0 │ 'Rishikesh Agrawani' │ 32 │ 'Engineering'│
|
|
537
|
+
└─────────┴──────────────────────┴─────┴──────────────┘
|
|
538
|
+
*/
|
|
539
|
+
|
|
540
|
+
// Chain multiple filters together
|
|
541
|
+
const result = df
|
|
542
|
+
.filter(row => row.age > 28)
|
|
543
|
+
.filter(row => row.department !== 'Sales')
|
|
544
|
+
result.show
|
|
545
|
+
/*
|
|
546
|
+
┌─────────┬──────────────────────┬─────┬──────────────┐
|
|
547
|
+
│ (index) │ name │ age │ department │
|
|
548
|
+
├─────────┼──────────────────────┼─────┼──────────────┤
|
|
549
|
+
│ 0 │ 'Rishikesh Agrawani' │ 32 │ 'Engineering'│
|
|
550
|
+
│ 1 │ 'Hemkesh Agrawani' │ 30 │ 'Marketing' │
|
|
551
|
+
└─────────┴──────────────────────┴─────┴──────────────┘
|
|
552
|
+
*/
|
|
553
|
+
```
|
|
554
|
+
|
|
555
|
+
<hr>
|
|
556
|
+
|
|
557
|
+
<h3 id='df-ex7'><code>Example 7 - Grouping and aggregating data using groupBy()</code></h3>
|
|
558
|
+
|
|
559
|
+
> **Note:** The `groupBy()` method groups rows by one or more columns and allows aggregation using methods like `mean()`, `sum()`, `count()`, `min()`, and `max()`.
|
|
560
|
+
|
|
561
|
+
```javascript
|
|
562
|
+
const pd = require("node-pandas")
|
|
563
|
+
|
|
564
|
+
// Create a DataFrame with employee data including departments
|
|
565
|
+
const df = pd.DataFrame([
|
|
566
|
+
['Rishikesh Agrawani', 32, 'Engineering', 95000],
|
|
567
|
+
['Hemkesh Agrawani', 30, 'Marketing', 75000],
|
|
568
|
+
['Malinikesh Agrawani', 28, 'Sales', 65000],
|
|
569
|
+
['Alice Johnson', 29, 'Engineering', 92000],
|
|
570
|
+
['Bob Smith', 31, 'Marketing', 78000],
|
|
571
|
+
['Carol White', 27, 'Sales', 62000]
|
|
572
|
+
], ['name', 'age', 'department', 'salary'])
|
|
573
|
+
|
|
574
|
+
df.show
|
|
575
|
+
/*
|
|
576
|
+
┌─────────┬──────────────────────┬─────┬──────────────┬────────┐
|
|
577
|
+
│ (index) │ name │ age │ department │ salary │
|
|
578
|
+
├─────────┼──────────────────────┼─────┼──────────────┼────────┤
|
|
579
|
+
│ 0 │ 'Rishikesh Agrawani' │ 32 │ 'Engineering'│ 95000 │
|
|
580
|
+
│ 1 │ 'Hemkesh Agrawani' │ 30 │ 'Marketing' │ 75000 │
|
|
581
|
+
│ 2 │ 'Malinikesh Agrawani'│ 28 │ 'Sales' │ 65000 │
|
|
582
|
+
│ 3 │ 'Alice Johnson' │ 29 │ 'Engineering'│ 92000 │
|
|
583
|
+
│ 4 │ 'Bob Smith' │ 31 │ 'Marketing' │ 78000 │
|
|
584
|
+
│ 5 │ 'Carol White' │ 27 │ 'Sales' │ 62000 │
|
|
585
|
+
└─────────┴──────────────────────┴─────┴──────────────┴────────┘
|
|
586
|
+
*/
|
|
587
|
+
|
|
588
|
+
// Single-column grouping: Group by department and calculate mean salary
|
|
589
|
+
const avgSalaryByDept = df.groupBy('department').mean('salary')
|
|
590
|
+
avgSalaryByDept.show
|
|
591
|
+
/*
|
|
592
|
+
┌─────────┬──────────────┬──────────────┐
|
|
593
|
+
│ (index) │ department │ salary_mean │
|
|
594
|
+
├─────────┼──────────────┼──────────────┤
|
|
595
|
+
│ 0 │ 'Engineering'│ 93500 │
|
|
596
|
+
│ 1 │ 'Marketing' │ 76500 │
|
|
597
|
+
│ 2 │ 'Sales' │ 63500 │
|
|
598
|
+
└─────────┴──────────────┴──────────────┘
|
|
599
|
+
*/
|
|
600
|
+
|
|
601
|
+
// Group by department and calculate sum of salaries
|
|
602
|
+
const totalSalaryByDept = df.groupBy('department').sum('salary')
|
|
603
|
+
totalSalaryByDept.show
|
|
604
|
+
/*
|
|
605
|
+
┌─────────┬──────────────┬──────────────┐
|
|
606
|
+
│ (index) │ department │ salary_sum │
|
|
607
|
+
├─────────┼──────────────┼──────────────┤
|
|
608
|
+
│ 0 │ 'Engineering'│ 187000 │
|
|
609
|
+
│ 1 │ 'Marketing' │ 153000 │
|
|
610
|
+
│ 2 │ 'Sales' │ 127000 │
|
|
611
|
+
└─────────┴──────────────┴──────────────┘
|
|
612
|
+
*/
|
|
613
|
+
|
|
614
|
+
// Group by department and count employees
|
|
615
|
+
const countByDept = df.groupBy('department').count()
|
|
616
|
+
countByDept.show
|
|
617
|
+
/*
|
|
618
|
+
┌─────────┬──────────────┬───────┐
|
|
619
|
+
│ (index) │ department │ count │
|
|
620
|
+
├─────────┼──────────────┼───────┤
|
|
621
|
+
│ 0 │ 'Engineering'│ 2 │
|
|
622
|
+
│ 1 │ 'Marketing' │ 2 │
|
|
623
|
+
│ 2 │ 'Sales' │ 2 │
|
|
624
|
+
└─────────┴──────────────┴───────┘
|
|
625
|
+
*/
|
|
626
|
+
|
|
627
|
+
// Group by department and find minimum age
|
|
628
|
+
const minAgeByDept = df.groupBy('department').min('age')
|
|
629
|
+
minAgeByDept.show
|
|
630
|
+
/*
|
|
631
|
+
┌─────────┬──────────────┬──────────┐
|
|
632
|
+
│ (index) │ department │ age_min │
|
|
633
|
+
├─────────┼──────────────┼──────────┤
|
|
634
|
+
│ 0 │ 'Engineering'│ 29 │
|
|
635
|
+
│ 1 │ 'Marketing' │ 30 │
|
|
636
|
+
│ 2 │ 'Sales' │ 27 │
|
|
637
|
+
└─────────┴──────────────┴──────────┘
|
|
638
|
+
*/
|
|
639
|
+
|
|
640
|
+
// Group by department and find maximum age
|
|
641
|
+
const maxAgeByDept = df.groupBy('department').max('age')
|
|
642
|
+
maxAgeByDept.show
|
|
643
|
+
/*
|
|
644
|
+
┌─────────┬──────────────┬──────────┐
|
|
645
|
+
│ (index) │ department │ age_max │
|
|
646
|
+
├─────────┼──────────────┼──────────┤
|
|
647
|
+
│ 0 │ 'Engineering'│ 32 │
|
|
648
|
+
│ 1 │ 'Marketing' │ 31 │
|
|
649
|
+
│ 2 │ 'Sales' │ 28 │
|
|
650
|
+
└─────────┴──────────────┴──────────┘
|
|
651
|
+
*/
|
|
652
|
+
|
|
653
|
+
// Multi-column grouping: Group by department and age range
|
|
654
|
+
const groupedByDeptAndAge = df.groupBy(['department', 'age']).count()
|
|
655
|
+
groupedByDeptAndAge.show
|
|
656
|
+
/*
|
|
657
|
+
┌─────────┬──────────────┬─────┬───────┐
|
|
658
|
+
│ (index) │ department │ age │ count │
|
|
659
|
+
├─────────┼──────────────┼─────┼───────┤
|
|
660
|
+
│ 0 │ 'Engineering'│ 29 │ 1 │
|
|
661
|
+
│ 1 │ 'Engineering'│ 32 │ 1 │
|
|
662
|
+
│ 2 │ 'Marketing' │ 30 │ 1 │
|
|
663
|
+
│ 3 │ 'Marketing' │ 31 │ 1 │
|
|
664
|
+
│ 4 │ 'Sales' │ 27 │ 1 │
|
|
665
|
+
│ 5 │ 'Sales' │ 28 │ 1 │
|
|
666
|
+
└─────────┴──────────────┴─────┴───────┘
|
|
396
667
|
*/
|
|
397
668
|
```
|
|
398
669
|
|
|
@@ -426,4 +697,5 @@ console.log(df.DevId)
|
|
|
426
697
|
|
|
427
698
|
+ [JavaScript getter](https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Functions/set)
|
|
428
699
|
|
|
700
|
+
+ [JavaScript (enumerable, writable, configurable)](https://hashnode.com/post/what-are-enumerable-properties-in-javascript-ciljnbtqa000exx53n5nbkykx)
|
|
429
701
|
|