myawesomepkg 0.1.5__py3-none-any.whl → 0.1.6__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,1999 @@
1
+ {
2
+ "cells": [
3
+ {
4
+ "cell_type": "code",
5
+ "execution_count": 6,
6
+ "id": "4f69a04d-f81c-4de7-9fcd-9e8e53dde5d2",
7
+ "metadata": {},
8
+ "outputs": [
9
+ {
10
+ "data": {
11
+ "text/plain": [
12
+ "(1035, 6)"
13
+ ]
14
+ },
15
+ "execution_count": 6,
16
+ "metadata": {},
17
+ "output_type": "execute_result"
18
+ }
19
+ ],
20
+ "source": [
21
+ " # Aggregation and Grouping\n",
22
+ " import seaborn as sns\n",
23
+ " import numpy as np\n",
24
+ " import pandas as pd\n",
25
+ "\n",
26
+ " planets = sns.load_dataset('planets')\n",
27
+ " planets.shape"
28
+ ]
29
+ },
30
+ {
31
+ "cell_type": "code",
32
+ "execution_count": 2,
33
+ "id": "d0a85e69-4dee-4a89-af0b-0b1619214246",
34
+ "metadata": {},
35
+ "outputs": [
36
+ {
37
+ "data": {
38
+ "text/html": [
39
+ "<div>\n",
40
+ "<style scoped>\n",
41
+ " .dataframe tbody tr th:only-of-type {\n",
42
+ " vertical-align: middle;\n",
43
+ " }\n",
44
+ "\n",
45
+ " .dataframe tbody tr th {\n",
46
+ " vertical-align: top;\n",
47
+ " }\n",
48
+ "\n",
49
+ " .dataframe thead th {\n",
50
+ " text-align: right;\n",
51
+ " }\n",
52
+ "</style>\n",
53
+ "<table border=\"1\" class=\"dataframe\">\n",
54
+ " <thead>\n",
55
+ " <tr style=\"text-align: right;\">\n",
56
+ " <th></th>\n",
57
+ " <th>method</th>\n",
58
+ " <th>number</th>\n",
59
+ " <th>orbital_period</th>\n",
60
+ " <th>mass</th>\n",
61
+ " <th>distance</th>\n",
62
+ " <th>year</th>\n",
63
+ " </tr>\n",
64
+ " </thead>\n",
65
+ " <tbody>\n",
66
+ " <tr>\n",
67
+ " <th>0</th>\n",
68
+ " <td>Radial Velocity</td>\n",
69
+ " <td>1</td>\n",
70
+ " <td>269.300</td>\n",
71
+ " <td>7.10</td>\n",
72
+ " <td>77.40</td>\n",
73
+ " <td>2006</td>\n",
74
+ " </tr>\n",
75
+ " <tr>\n",
76
+ " <th>1</th>\n",
77
+ " <td>Radial Velocity</td>\n",
78
+ " <td>1</td>\n",
79
+ " <td>874.774</td>\n",
80
+ " <td>2.21</td>\n",
81
+ " <td>56.95</td>\n",
82
+ " <td>2008</td>\n",
83
+ " </tr>\n",
84
+ " <tr>\n",
85
+ " <th>2</th>\n",
86
+ " <td>Radial Velocity</td>\n",
87
+ " <td>1</td>\n",
88
+ " <td>763.000</td>\n",
89
+ " <td>2.60</td>\n",
90
+ " <td>19.84</td>\n",
91
+ " <td>2011</td>\n",
92
+ " </tr>\n",
93
+ " <tr>\n",
94
+ " <th>3</th>\n",
95
+ " <td>Radial Velocity</td>\n",
96
+ " <td>1</td>\n",
97
+ " <td>326.030</td>\n",
98
+ " <td>19.40</td>\n",
99
+ " <td>110.62</td>\n",
100
+ " <td>2007</td>\n",
101
+ " </tr>\n",
102
+ " <tr>\n",
103
+ " <th>4</th>\n",
104
+ " <td>Radial Velocity</td>\n",
105
+ " <td>1</td>\n",
106
+ " <td>516.220</td>\n",
107
+ " <td>10.50</td>\n",
108
+ " <td>119.47</td>\n",
109
+ " <td>2009</td>\n",
110
+ " </tr>\n",
111
+ " </tbody>\n",
112
+ "</table>\n",
113
+ "</div>"
114
+ ],
115
+ "text/plain": [
116
+ " method number orbital_period mass distance year\n",
117
+ "0 Radial Velocity 1 269.300 7.10 77.40 2006\n",
118
+ "1 Radial Velocity 1 874.774 2.21 56.95 2008\n",
119
+ "2 Radial Velocity 1 763.000 2.60 19.84 2011\n",
120
+ "3 Radial Velocity 1 326.030 19.40 110.62 2007\n",
121
+ "4 Radial Velocity 1 516.220 10.50 119.47 2009"
122
+ ]
123
+ },
124
+ "execution_count": 2,
125
+ "metadata": {},
126
+ "output_type": "execute_result"
127
+ }
128
+ ],
129
+ "source": [
130
+ "planets.head()"
131
+ ]
132
+ },
133
+ {
134
+ "cell_type": "code",
135
+ "execution_count": 7,
136
+ "id": "a3af591e-4d36-421f-9193-a531fbf71daf",
137
+ "metadata": {},
138
+ "outputs": [
139
+ {
140
+ "data": {
141
+ "text/plain": [
142
+ "0 0.374540\n",
143
+ "1 0.950714\n",
144
+ "2 0.731994\n",
145
+ "3 0.598658\n",
146
+ "4 0.156019\n",
147
+ "dtype: float64"
148
+ ]
149
+ },
150
+ "execution_count": 7,
151
+ "metadata": {},
152
+ "output_type": "execute_result"
153
+ }
154
+ ],
155
+ "source": [
156
+ "rng = np.random.RandomState(42)\n",
157
+ "ser = pd.Series(rng.rand(5))\n",
158
+ "ser"
159
+ ]
160
+ },
161
+ {
162
+ "cell_type": "code",
163
+ "execution_count": 8,
164
+ "id": "e1551693-5459-4fda-ad83-6bf8983da04e",
165
+ "metadata": {},
166
+ "outputs": [
167
+ {
168
+ "data": {
169
+ "text/plain": [
170
+ "np.float64(2.811925491708157)"
171
+ ]
172
+ },
173
+ "execution_count": 8,
174
+ "metadata": {},
175
+ "output_type": "execute_result"
176
+ }
177
+ ],
178
+ "source": [
179
+ "ser.sum()"
180
+ ]
181
+ },
182
+ {
183
+ "cell_type": "code",
184
+ "execution_count": 9,
185
+ "id": "4229b069-a49f-4a35-a2a4-4c72665c3f15",
186
+ "metadata": {},
187
+ "outputs": [
188
+ {
189
+ "data": {
190
+ "text/plain": [
191
+ "np.float64(0.5623850983416314)"
192
+ ]
193
+ },
194
+ "execution_count": 9,
195
+ "metadata": {},
196
+ "output_type": "execute_result"
197
+ }
198
+ ],
199
+ "source": [
200
+ "ser.mean()"
201
+ ]
202
+ },
203
+ {
204
+ "cell_type": "code",
205
+ "execution_count": 11,
206
+ "id": "09eb4bfb-5496-4e30-9c93-c50663b5466d",
207
+ "metadata": {},
208
+ "outputs": [
209
+ {
210
+ "data": {
211
+ "text/html": [
212
+ "<div>\n",
213
+ "<style scoped>\n",
214
+ " .dataframe tbody tr th:only-of-type {\n",
215
+ " vertical-align: middle;\n",
216
+ " }\n",
217
+ "\n",
218
+ " .dataframe tbody tr th {\n",
219
+ " vertical-align: top;\n",
220
+ " }\n",
221
+ "\n",
222
+ " .dataframe thead th {\n",
223
+ " text-align: right;\n",
224
+ " }\n",
225
+ "</style>\n",
226
+ "<table border=\"1\" class=\"dataframe\">\n",
227
+ " <thead>\n",
228
+ " <tr style=\"text-align: right;\">\n",
229
+ " <th></th>\n",
230
+ " <th>A</th>\n",
231
+ " <th>B</th>\n",
232
+ " </tr>\n",
233
+ " </thead>\n",
234
+ " <tbody>\n",
235
+ " <tr>\n",
236
+ " <th>0</th>\n",
237
+ " <td>0.183405</td>\n",
238
+ " <td>0.611853</td>\n",
239
+ " </tr>\n",
240
+ " <tr>\n",
241
+ " <th>1</th>\n",
242
+ " <td>0.304242</td>\n",
243
+ " <td>0.139494</td>\n",
244
+ " </tr>\n",
245
+ " <tr>\n",
246
+ " <th>2</th>\n",
247
+ " <td>0.524756</td>\n",
248
+ " <td>0.292145</td>\n",
249
+ " </tr>\n",
250
+ " <tr>\n",
251
+ " <th>3</th>\n",
252
+ " <td>0.431945</td>\n",
253
+ " <td>0.366362</td>\n",
254
+ " </tr>\n",
255
+ " <tr>\n",
256
+ " <th>4</th>\n",
257
+ " <td>0.291229</td>\n",
258
+ " <td>0.456070</td>\n",
259
+ " </tr>\n",
260
+ " </tbody>\n",
261
+ "</table>\n",
262
+ "</div>"
263
+ ],
264
+ "text/plain": [
265
+ " A B\n",
266
+ "0 0.183405 0.611853\n",
267
+ "1 0.304242 0.139494\n",
268
+ "2 0.524756 0.292145\n",
269
+ "3 0.431945 0.366362\n",
270
+ "4 0.291229 0.456070"
271
+ ]
272
+ },
273
+ "execution_count": 11,
274
+ "metadata": {},
275
+ "output_type": "execute_result"
276
+ }
277
+ ],
278
+ "source": [
279
+ "df = pd.DataFrame({'A': rng.rand(5),\n",
280
+ " 'B': rng.rand(5)})\n",
281
+ "df"
282
+ ]
283
+ },
284
+ {
285
+ "cell_type": "code",
286
+ "execution_count": 12,
287
+ "id": "dd941315-2e2b-42a4-80a9-a50dc81a291d",
288
+ "metadata": {},
289
+ "outputs": [
290
+ {
291
+ "data": {
292
+ "text/plain": [
293
+ "A 0.347115\n",
294
+ "B 0.373185\n",
295
+ "dtype: float64"
296
+ ]
297
+ },
298
+ "execution_count": 12,
299
+ "metadata": {},
300
+ "output_type": "execute_result"
301
+ }
302
+ ],
303
+ "source": [
304
+ "df.mean()"
305
+ ]
306
+ },
307
+ {
308
+ "cell_type": "code",
309
+ "execution_count": 13,
310
+ "id": "b1c5221c-fd2f-44aa-a2a7-64cf11940039",
311
+ "metadata": {},
312
+ "outputs": [
313
+ {
314
+ "data": {
315
+ "text/plain": [
316
+ "0 0.397629\n",
317
+ "1 0.221868\n",
318
+ "2 0.408451\n",
319
+ "3 0.399153\n",
320
+ "4 0.373650\n",
321
+ "dtype: float64"
322
+ ]
323
+ },
324
+ "execution_count": 13,
325
+ "metadata": {},
326
+ "output_type": "execute_result"
327
+ }
328
+ ],
329
+ "source": [
330
+ "df.mean(axis='columns')"
331
+ ]
332
+ },
333
+ {
334
+ "cell_type": "code",
335
+ "execution_count": 14,
336
+ "id": "3c3c9815-5ff7-4522-9b78-f79b7d2749c2",
337
+ "metadata": {},
338
+ "outputs": [
339
+ {
340
+ "data": {
341
+ "text/html": [
342
+ "<div>\n",
343
+ "<style scoped>\n",
344
+ " .dataframe tbody tr th:only-of-type {\n",
345
+ " vertical-align: middle;\n",
346
+ " }\n",
347
+ "\n",
348
+ " .dataframe tbody tr th {\n",
349
+ " vertical-align: top;\n",
350
+ " }\n",
351
+ "\n",
352
+ " .dataframe thead th {\n",
353
+ " text-align: right;\n",
354
+ " }\n",
355
+ "</style>\n",
356
+ "<table border=\"1\" class=\"dataframe\">\n",
357
+ " <thead>\n",
358
+ " <tr style=\"text-align: right;\">\n",
359
+ " <th></th>\n",
360
+ " <th>number</th>\n",
361
+ " <th>orbital_period</th>\n",
362
+ " <th>mass</th>\n",
363
+ " <th>distance</th>\n",
364
+ " <th>year</th>\n",
365
+ " </tr>\n",
366
+ " </thead>\n",
367
+ " <tbody>\n",
368
+ " <tr>\n",
369
+ " <th>count</th>\n",
370
+ " <td>498.00000</td>\n",
371
+ " <td>498.000000</td>\n",
372
+ " <td>498.000000</td>\n",
373
+ " <td>498.000000</td>\n",
374
+ " <td>498.000000</td>\n",
375
+ " </tr>\n",
376
+ " <tr>\n",
377
+ " <th>mean</th>\n",
378
+ " <td>1.73494</td>\n",
379
+ " <td>835.778671</td>\n",
380
+ " <td>2.509320</td>\n",
381
+ " <td>52.068213</td>\n",
382
+ " <td>2007.377510</td>\n",
383
+ " </tr>\n",
384
+ " <tr>\n",
385
+ " <th>std</th>\n",
386
+ " <td>1.17572</td>\n",
387
+ " <td>1469.128259</td>\n",
388
+ " <td>3.636274</td>\n",
389
+ " <td>46.596041</td>\n",
390
+ " <td>4.167284</td>\n",
391
+ " </tr>\n",
392
+ " <tr>\n",
393
+ " <th>min</th>\n",
394
+ " <td>1.00000</td>\n",
395
+ " <td>1.328300</td>\n",
396
+ " <td>0.003600</td>\n",
397
+ " <td>1.350000</td>\n",
398
+ " <td>1989.000000</td>\n",
399
+ " </tr>\n",
400
+ " <tr>\n",
401
+ " <th>25%</th>\n",
402
+ " <td>1.00000</td>\n",
403
+ " <td>38.272250</td>\n",
404
+ " <td>0.212500</td>\n",
405
+ " <td>24.497500</td>\n",
406
+ " <td>2005.000000</td>\n",
407
+ " </tr>\n",
408
+ " <tr>\n",
409
+ " <th>50%</th>\n",
410
+ " <td>1.00000</td>\n",
411
+ " <td>357.000000</td>\n",
412
+ " <td>1.245000</td>\n",
413
+ " <td>39.940000</td>\n",
414
+ " <td>2009.000000</td>\n",
415
+ " </tr>\n",
416
+ " <tr>\n",
417
+ " <th>75%</th>\n",
418
+ " <td>2.00000</td>\n",
419
+ " <td>999.600000</td>\n",
420
+ " <td>2.867500</td>\n",
421
+ " <td>59.332500</td>\n",
422
+ " <td>2011.000000</td>\n",
423
+ " </tr>\n",
424
+ " <tr>\n",
425
+ " <th>max</th>\n",
426
+ " <td>6.00000</td>\n",
427
+ " <td>17337.500000</td>\n",
428
+ " <td>25.000000</td>\n",
429
+ " <td>354.000000</td>\n",
430
+ " <td>2014.000000</td>\n",
431
+ " </tr>\n",
432
+ " </tbody>\n",
433
+ "</table>\n",
434
+ "</div>"
435
+ ],
436
+ "text/plain": [
437
+ " number orbital_period mass distance year\n",
438
+ "count 498.00000 498.000000 498.000000 498.000000 498.000000\n",
439
+ "mean 1.73494 835.778671 2.509320 52.068213 2007.377510\n",
440
+ "std 1.17572 1469.128259 3.636274 46.596041 4.167284\n",
441
+ "min 1.00000 1.328300 0.003600 1.350000 1989.000000\n",
442
+ "25% 1.00000 38.272250 0.212500 24.497500 2005.000000\n",
443
+ "50% 1.00000 357.000000 1.245000 39.940000 2009.000000\n",
444
+ "75% 2.00000 999.600000 2.867500 59.332500 2011.000000\n",
445
+ "max 6.00000 17337.500000 25.000000 354.000000 2014.000000"
446
+ ]
447
+ },
448
+ "execution_count": 14,
449
+ "metadata": {},
450
+ "output_type": "execute_result"
451
+ }
452
+ ],
453
+ "source": [
454
+ "planets.dropna().describe()"
455
+ ]
456
+ },
457
+ {
458
+ "cell_type": "code",
459
+ "execution_count": 18,
460
+ "id": "443f472b-83a4-4468-a104-5fc39c327453",
461
+ "metadata": {},
462
+ "outputs": [
463
+ {
464
+ "name": "stdout",
465
+ "output_type": "stream",
466
+ "text": [
467
+ " A B\n",
468
+ "0 0.183405 0.611853\n",
469
+ "1 0.304242 0.139494\n",
470
+ "2 0.524756 0.292145\n",
471
+ "3 0.431945 0.366362\n",
472
+ "4 0.291229 0.456070\n"
473
+ ]
474
+ },
475
+ {
476
+ "ename": "KeyError",
477
+ "evalue": "'key'",
478
+ "output_type": "error",
479
+ "traceback": [
480
+ "\u001b[1;31m---------------------------------------------------------------------------\u001b[0m",
481
+ "\u001b[1;31mKeyError\u001b[0m Traceback (most recent call last)",
482
+ "Cell \u001b[1;32mIn[18], line 3\u001b[0m\n\u001b[0;32m 1\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21mfilter_func\u001b[39m(x):\n\u001b[0;32m 2\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m x[\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mdata2\u001b[39m\u001b[38;5;124m'\u001b[39m]\u001b[38;5;241m.\u001b[39mstd() \u001b[38;5;241m>\u001b[39m \u001b[38;5;241m4\u001b[39m\n\u001b[1;32m----> 3\u001b[0m \u001b[38;5;28mprint\u001b[39m(df); \u001b[38;5;28mprint\u001b[39m(df\u001b[38;5;241m.\u001b[39mgroupby(\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mkey\u001b[39m\u001b[38;5;124m'\u001b[39m)\u001b[38;5;241m.\u001b[39mstd());\n\u001b[0;32m 4\u001b[0m \u001b[38;5;28mprint\u001b[39m(df\u001b[38;5;241m.\u001b[39mgroupby(\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mkey\u001b[39m\u001b[38;5;124m'\u001b[39m)\u001b[38;5;241m.\u001b[39mfilter(filter_func))\n",
483
+ "File \u001b[1;32m~\\AppData\\Local\\Programs\\Python\\Python312\\Lib\\site-packages\\pandas\\core\\frame.py:9183\u001b[0m, in \u001b[0;36mDataFrame.groupby\u001b[1;34m(self, by, axis, level, as_index, sort, group_keys, observed, dropna)\u001b[0m\n\u001b[0;32m 9180\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m level \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m \u001b[38;5;129;01mand\u001b[39;00m by \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m:\n\u001b[0;32m 9181\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mTypeError\u001b[39;00m(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mYou have to supply one of \u001b[39m\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mby\u001b[39m\u001b[38;5;124m'\u001b[39m\u001b[38;5;124m and \u001b[39m\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mlevel\u001b[39m\u001b[38;5;124m'\u001b[39m\u001b[38;5;124m\"\u001b[39m)\n\u001b[1;32m-> 9183\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43mDataFrameGroupBy\u001b[49m\u001b[43m(\u001b[49m\n\u001b[0;32m 9184\u001b[0m \u001b[43m \u001b[49m\u001b[43mobj\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[43m,\u001b[49m\n\u001b[0;32m 9185\u001b[0m \u001b[43m \u001b[49m\u001b[43mkeys\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mby\u001b[49m\u001b[43m,\u001b[49m\n\u001b[0;32m 9186\u001b[0m \u001b[43m \u001b[49m\u001b[43maxis\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43maxis\u001b[49m\u001b[43m,\u001b[49m\n\u001b[0;32m 9187\u001b[0m \u001b[43m \u001b[49m\u001b[43mlevel\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mlevel\u001b[49m\u001b[43m,\u001b[49m\n\u001b[0;32m 9188\u001b[0m \u001b[43m \u001b[49m\u001b[43mas_index\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mas_index\u001b[49m\u001b[43m,\u001b[49m\n\u001b[0;32m 9189\u001b[0m \u001b[43m \u001b[49m\u001b[43msort\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43msort\u001b[49m\u001b[43m,\u001b[49m\n\u001b[0;32m 9190\u001b[0m \u001b[43m \u001b[49m\u001b[43mgroup_keys\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mgroup_keys\u001b[49m\u001b[43m,\u001b[49m\n\u001b[0;32m 9191\u001b[0m \u001b[43m \u001b[49m\u001b[43mobserved\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mobserved\u001b[49m\u001b[43m,\u001b[49m\n\u001b[0;32m 9192\u001b[0m \u001b[43m \u001b[49m\u001b[43mdropna\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mdropna\u001b[49m\u001b[43m,\u001b[49m\n\u001b[0;32m 9193\u001b[0m \u001b[43m\u001b[49m\u001b[43m)\u001b[49m\n",
484
+ "File \u001b[1;32m~\\AppData\\Local\\Programs\\Python\\Python312\\Lib\\site-packages\\pandas\\core\\groupby\\groupby.py:1329\u001b[0m, in \u001b[0;36mGroupBy.__init__\u001b[1;34m(self, obj, keys, axis, level, grouper, exclusions, selection, as_index, sort, group_keys, observed, dropna)\u001b[0m\n\u001b[0;32m 1326\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mdropna \u001b[38;5;241m=\u001b[39m dropna\n\u001b[0;32m 1328\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m grouper \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m:\n\u001b[1;32m-> 1329\u001b[0m grouper, exclusions, obj \u001b[38;5;241m=\u001b[39m \u001b[43mget_grouper\u001b[49m\u001b[43m(\u001b[49m\n\u001b[0;32m 1330\u001b[0m \u001b[43m \u001b[49m\u001b[43mobj\u001b[49m\u001b[43m,\u001b[49m\n\u001b[0;32m 1331\u001b[0m \u001b[43m \u001b[49m\u001b[43mkeys\u001b[49m\u001b[43m,\u001b[49m\n\u001b[0;32m 1332\u001b[0m \u001b[43m \u001b[49m\u001b[43maxis\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43maxis\u001b[49m\u001b[43m,\u001b[49m\n\u001b[0;32m 1333\u001b[0m \u001b[43m \u001b[49m\u001b[43mlevel\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mlevel\u001b[49m\u001b[43m,\u001b[49m\n\u001b[0;32m 1334\u001b[0m \u001b[43m \u001b[49m\u001b[43msort\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43msort\u001b[49m\u001b[43m,\u001b[49m\n\u001b[0;32m 1335\u001b[0m \u001b[43m \u001b[49m\u001b[43mobserved\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;28;43;01mFalse\u001b[39;49;00m\u001b[43m \u001b[49m\u001b[38;5;28;43;01mif\u001b[39;49;00m\u001b[43m \u001b[49m\u001b[43mobserved\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;129;43;01mis\u001b[39;49;00m\u001b[43m \u001b[49m\u001b[43mlib\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mno_default\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;28;43;01melse\u001b[39;49;00m\u001b[43m \u001b[49m\u001b[43mobserved\u001b[49m\u001b[43m,\u001b[49m\n\u001b[0;32m 1336\u001b[0m \u001b[43m \u001b[49m\u001b[43mdropna\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mdropna\u001b[49m\u001b[43m,\u001b[49m\n\u001b[0;32m 1337\u001b[0m \u001b[43m \u001b[49m\u001b[43m)\u001b[49m\n\u001b[0;32m 1339\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m observed \u001b[38;5;129;01mis\u001b[39;00m lib\u001b[38;5;241m.\u001b[39mno_default:\n\u001b[0;32m 1340\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28many\u001b[39m(ping\u001b[38;5;241m.\u001b[39m_passed_categorical \u001b[38;5;28;01mfor\u001b[39;00m ping \u001b[38;5;129;01min\u001b[39;00m grouper\u001b[38;5;241m.\u001b[39mgroupings):\n",
485
+ "File \u001b[1;32m~\\AppData\\Local\\Programs\\Python\\Python312\\Lib\\site-packages\\pandas\\core\\groupby\\grouper.py:1043\u001b[0m, in \u001b[0;36mget_grouper\u001b[1;34m(obj, key, axis, level, sort, observed, validate, dropna)\u001b[0m\n\u001b[0;32m 1041\u001b[0m in_axis, level, gpr \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;01mFalse\u001b[39;00m, gpr, \u001b[38;5;28;01mNone\u001b[39;00m\n\u001b[0;32m 1042\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[1;32m-> 1043\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mKeyError\u001b[39;00m(gpr)\n\u001b[0;32m 1044\u001b[0m \u001b[38;5;28;01melif\u001b[39;00m \u001b[38;5;28misinstance\u001b[39m(gpr, Grouper) \u001b[38;5;129;01mand\u001b[39;00m gpr\u001b[38;5;241m.\u001b[39mkey \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m:\n\u001b[0;32m 1045\u001b[0m \u001b[38;5;66;03m# Add key to exclusions\u001b[39;00m\n\u001b[0;32m 1046\u001b[0m exclusions\u001b[38;5;241m.\u001b[39madd(gpr\u001b[38;5;241m.\u001b[39mkey)\n",
486
+ "\u001b[1;31mKeyError\u001b[0m: 'key'"
487
+ ]
488
+ }
489
+ ],
490
+ "source": [
491
+ " def filter_func(x):\n",
492
+ " return x['data2'].std() > 4\n",
493
+ " print(df); print(df.groupby('key').std());\n",
494
+ " print(df.groupby('key').filter(filter_func))\n",
495
+ " "
496
+ ]
497
+ },
498
+ {
499
+ "cell_type": "code",
500
+ "execution_count": 22,
501
+ "id": "40071505-de3c-4940-89ea-d38b0f1212b8",
502
+ "metadata": {},
503
+ "outputs": [
504
+ {
505
+ "data": {
506
+ "text/html": [
507
+ "<div>\n",
508
+ "<style scoped>\n",
509
+ " .dataframe tbody tr th:only-of-type {\n",
510
+ " vertical-align: middle;\n",
511
+ " }\n",
512
+ "\n",
513
+ " .dataframe tbody tr th {\n",
514
+ " vertical-align: top;\n",
515
+ " }\n",
516
+ "\n",
517
+ " .dataframe thead th {\n",
518
+ " text-align: right;\n",
519
+ " }\n",
520
+ "</style>\n",
521
+ "<table border=\"1\" class=\"dataframe\">\n",
522
+ " <thead>\n",
523
+ " <tr style=\"text-align: right;\">\n",
524
+ " <th></th>\n",
525
+ " <th>key</th>\n",
526
+ " <th>data1</th>\n",
527
+ " <th>data2</th>\n",
528
+ " </tr>\n",
529
+ " </thead>\n",
530
+ " <tbody>\n",
531
+ " <tr>\n",
532
+ " <th>0</th>\n",
533
+ " <td>A</td>\n",
534
+ " <td>0</td>\n",
535
+ " <td>5</td>\n",
536
+ " </tr>\n",
537
+ " <tr>\n",
538
+ " <th>1</th>\n",
539
+ " <td>B</td>\n",
540
+ " <td>1</td>\n",
541
+ " <td>0</td>\n",
542
+ " </tr>\n",
543
+ " <tr>\n",
544
+ " <th>2</th>\n",
545
+ " <td>C</td>\n",
546
+ " <td>2</td>\n",
547
+ " <td>3</td>\n",
548
+ " </tr>\n",
549
+ " <tr>\n",
550
+ " <th>3</th>\n",
551
+ " <td>A</td>\n",
552
+ " <td>3</td>\n",
553
+ " <td>3</td>\n",
554
+ " </tr>\n",
555
+ " <tr>\n",
556
+ " <th>4</th>\n",
557
+ " <td>B</td>\n",
558
+ " <td>4</td>\n",
559
+ " <td>7</td>\n",
560
+ " </tr>\n",
561
+ " <tr>\n",
562
+ " <th>5</th>\n",
563
+ " <td>C</td>\n",
564
+ " <td>5</td>\n",
565
+ " <td>9</td>\n",
566
+ " </tr>\n",
567
+ " </tbody>\n",
568
+ "</table>\n",
569
+ "</div>"
570
+ ],
571
+ "text/plain": [
572
+ " key data1 data2\n",
573
+ "0 A 0 5\n",
574
+ "1 B 1 0\n",
575
+ "2 C 2 3\n",
576
+ "3 A 3 3\n",
577
+ "4 B 4 7\n",
578
+ "5 C 5 9"
579
+ ]
580
+ },
581
+ "execution_count": 22,
582
+ "metadata": {},
583
+ "output_type": "execute_result"
584
+ }
585
+ ],
586
+ "source": [
587
+ "#Aggregate, ilter, transform, apply\n",
588
+ "rng = np.random.RandomState(0)\n",
589
+ "df = pd.DataFrame({'key': ['A', 'B', 'C', 'A', 'B', 'C'],\n",
590
+ " 'data1': range(6),\n",
591
+ " 'data2': rng.randint(0, 10, 6)},\n",
592
+ " columns = ['key', 'data1', 'data2'])\n",
593
+ "df"
594
+ ]
595
+ },
596
+ {
597
+ "cell_type": "code",
598
+ "execution_count": 23,
599
+ "id": "2c3e28cc-a33e-4567-8b3c-84f243692d1c",
600
+ "metadata": {},
601
+ "outputs": [
602
+ {
603
+ "name": "stderr",
604
+ "output_type": "stream",
605
+ "text": [
606
+ "C:\\Users\\darsh\\AppData\\Local\\Temp\\ipykernel_1940\\968873422.py:1: FutureWarning: The provided callable <function median at 0x00000235EACE3060> is currently using SeriesGroupBy.median. In a future version of pandas, the provided callable will be used directly. To keep current behavior pass the string \"median\" instead.\n",
607
+ " df.groupby('key').aggregate(['min', np.median, max])\n",
608
+ "C:\\Users\\darsh\\AppData\\Local\\Temp\\ipykernel_1940\\968873422.py:1: FutureWarning: The provided callable <built-in function max> is currently using SeriesGroupBy.max. In a future version of pandas, the provided callable will be used directly. To keep current behavior pass the string \"max\" instead.\n",
609
+ " df.groupby('key').aggregate(['min', np.median, max])\n"
610
+ ]
611
+ },
612
+ {
613
+ "data": {
614
+ "text/html": [
615
+ "<div>\n",
616
+ "<style scoped>\n",
617
+ " .dataframe tbody tr th:only-of-type {\n",
618
+ " vertical-align: middle;\n",
619
+ " }\n",
620
+ "\n",
621
+ " .dataframe tbody tr th {\n",
622
+ " vertical-align: top;\n",
623
+ " }\n",
624
+ "\n",
625
+ " .dataframe thead tr th {\n",
626
+ " text-align: left;\n",
627
+ " }\n",
628
+ "\n",
629
+ " .dataframe thead tr:last-of-type th {\n",
630
+ " text-align: right;\n",
631
+ " }\n",
632
+ "</style>\n",
633
+ "<table border=\"1\" class=\"dataframe\">\n",
634
+ " <thead>\n",
635
+ " <tr>\n",
636
+ " <th></th>\n",
637
+ " <th colspan=\"3\" halign=\"left\">data1</th>\n",
638
+ " <th colspan=\"3\" halign=\"left\">data2</th>\n",
639
+ " </tr>\n",
640
+ " <tr>\n",
641
+ " <th></th>\n",
642
+ " <th>min</th>\n",
643
+ " <th>median</th>\n",
644
+ " <th>max</th>\n",
645
+ " <th>min</th>\n",
646
+ " <th>median</th>\n",
647
+ " <th>max</th>\n",
648
+ " </tr>\n",
649
+ " <tr>\n",
650
+ " <th>key</th>\n",
651
+ " <th></th>\n",
652
+ " <th></th>\n",
653
+ " <th></th>\n",
654
+ " <th></th>\n",
655
+ " <th></th>\n",
656
+ " <th></th>\n",
657
+ " </tr>\n",
658
+ " </thead>\n",
659
+ " <tbody>\n",
660
+ " <tr>\n",
661
+ " <th>A</th>\n",
662
+ " <td>0</td>\n",
663
+ " <td>1.5</td>\n",
664
+ " <td>3</td>\n",
665
+ " <td>3</td>\n",
666
+ " <td>4.0</td>\n",
667
+ " <td>5</td>\n",
668
+ " </tr>\n",
669
+ " <tr>\n",
670
+ " <th>B</th>\n",
671
+ " <td>1</td>\n",
672
+ " <td>2.5</td>\n",
673
+ " <td>4</td>\n",
674
+ " <td>0</td>\n",
675
+ " <td>3.5</td>\n",
676
+ " <td>7</td>\n",
677
+ " </tr>\n",
678
+ " <tr>\n",
679
+ " <th>C</th>\n",
680
+ " <td>2</td>\n",
681
+ " <td>3.5</td>\n",
682
+ " <td>5</td>\n",
683
+ " <td>3</td>\n",
684
+ " <td>6.0</td>\n",
685
+ " <td>9</td>\n",
686
+ " </tr>\n",
687
+ " </tbody>\n",
688
+ "</table>\n",
689
+ "</div>"
690
+ ],
691
+ "text/plain": [
692
+ " data1 data2 \n",
693
+ " min median max min median max\n",
694
+ "key \n",
695
+ "A 0 1.5 3 3 4.0 5\n",
696
+ "B 1 2.5 4 0 3.5 7\n",
697
+ "C 2 3.5 5 3 6.0 9"
698
+ ]
699
+ },
700
+ "execution_count": 23,
701
+ "metadata": {},
702
+ "output_type": "execute_result"
703
+ }
704
+ ],
705
+ "source": [
706
+ "df.groupby('key').aggregate(['min', np.median, max])"
707
+ ]
708
+ },
709
+ {
710
+ "cell_type": "code",
711
+ "execution_count": 24,
712
+ "id": "c05f9979-f316-4c52-8398-7b4db4623523",
713
+ "metadata": {},
714
+ "outputs": [
715
+ {
716
+ "data": {
717
+ "text/html": [
718
+ "<div>\n",
719
+ "<style scoped>\n",
720
+ " .dataframe tbody tr th:only-of-type {\n",
721
+ " vertical-align: middle;\n",
722
+ " }\n",
723
+ "\n",
724
+ " .dataframe tbody tr th {\n",
725
+ " vertical-align: top;\n",
726
+ " }\n",
727
+ "\n",
728
+ " .dataframe thead th {\n",
729
+ " text-align: right;\n",
730
+ " }\n",
731
+ "</style>\n",
732
+ "<table border=\"1\" class=\"dataframe\">\n",
733
+ " <thead>\n",
734
+ " <tr style=\"text-align: right;\">\n",
735
+ " <th></th>\n",
736
+ " <th>data1</th>\n",
737
+ " <th>data2</th>\n",
738
+ " </tr>\n",
739
+ " <tr>\n",
740
+ " <th>key</th>\n",
741
+ " <th></th>\n",
742
+ " <th></th>\n",
743
+ " </tr>\n",
744
+ " </thead>\n",
745
+ " <tbody>\n",
746
+ " <tr>\n",
747
+ " <th>A</th>\n",
748
+ " <td>0</td>\n",
749
+ " <td>5</td>\n",
750
+ " </tr>\n",
751
+ " <tr>\n",
752
+ " <th>B</th>\n",
753
+ " <td>1</td>\n",
754
+ " <td>7</td>\n",
755
+ " </tr>\n",
756
+ " <tr>\n",
757
+ " <th>C</th>\n",
758
+ " <td>2</td>\n",
759
+ " <td>9</td>\n",
760
+ " </tr>\n",
761
+ " </tbody>\n",
762
+ "</table>\n",
763
+ "</div>"
764
+ ],
765
+ "text/plain": [
766
+ " data1 data2\n",
767
+ "key \n",
768
+ "A 0 5\n",
769
+ "B 1 7\n",
770
+ "C 2 9"
771
+ ]
772
+ },
773
+ "execution_count": 24,
774
+ "metadata": {},
775
+ "output_type": "execute_result"
776
+ }
777
+ ],
778
+ "source": [
779
+ " df.groupby('key').aggregate({'data1': 'min',\n",
780
+ " 'data2': 'max'})"
781
+ ]
782
+ },
783
+ {
784
+ "cell_type": "code",
785
+ "execution_count": 25,
786
+ "id": "c065e650-3fe4-46a3-b1c0-a12c2b854bf3",
787
+ "metadata": {},
788
+ "outputs": [
789
+ {
790
+ "name": "stdout",
791
+ "output_type": "stream",
792
+ "text": [
793
+ " key data1 data2\n",
794
+ "0 A 0 5\n",
795
+ "1 B 1 0\n",
796
+ "2 C 2 3\n",
797
+ "3 A 3 3\n",
798
+ "4 B 4 7\n",
799
+ "5 C 5 9\n",
800
+ " data1 data2\n",
801
+ "key \n",
802
+ "A 2.12132 1.414214\n",
803
+ "B 2.12132 4.949747\n",
804
+ "C 2.12132 4.242641\n",
805
+ " key data1 data2\n",
806
+ "1 B 1 0\n",
807
+ "2 C 2 3\n",
808
+ "4 B 4 7\n",
809
+ "5 C 5 9\n"
810
+ ]
811
+ }
812
+ ],
813
+ "source": [
814
+ "# Filtering. \n",
815
+ "def filter_func(x):\n",
816
+ " return x['data2'].std() > 4\n",
817
+ "print(df); print(df.groupby('key').std());\n",
818
+ "print(df.groupby('key').filter(filter_func))"
819
+ ]
820
+ },
821
+ {
822
+ "cell_type": "code",
823
+ "execution_count": 28,
824
+ "id": "a0f958e6-d237-45df-b85b-483bcb13dfee",
825
+ "metadata": {},
826
+ "outputs": [
827
+ {
828
+ "data": {
829
+ "text/html": [
830
+ "<div>\n",
831
+ "<style scoped>\n",
832
+ " .dataframe tbody tr th:only-of-type {\n",
833
+ " vertical-align: middle;\n",
834
+ " }\n",
835
+ "\n",
836
+ " .dataframe tbody tr th {\n",
837
+ " vertical-align: top;\n",
838
+ " }\n",
839
+ "\n",
840
+ " .dataframe thead th {\n",
841
+ " text-align: right;\n",
842
+ " }\n",
843
+ "</style>\n",
844
+ "<table border=\"1\" class=\"dataframe\">\n",
845
+ " <thead>\n",
846
+ " <tr style=\"text-align: right;\">\n",
847
+ " <th></th>\n",
848
+ " <th>key</th>\n",
849
+ " <th>data1</th>\n",
850
+ " <th>data2</th>\n",
851
+ " </tr>\n",
852
+ " </thead>\n",
853
+ " <tbody>\n",
854
+ " <tr>\n",
855
+ " <th>1</th>\n",
856
+ " <td>B</td>\n",
857
+ " <td>1</td>\n",
858
+ " <td>0</td>\n",
859
+ " </tr>\n",
860
+ " <tr>\n",
861
+ " <th>2</th>\n",
862
+ " <td>C</td>\n",
863
+ " <td>2</td>\n",
864
+ " <td>3</td>\n",
865
+ " </tr>\n",
866
+ " <tr>\n",
867
+ " <th>4</th>\n",
868
+ " <td>B</td>\n",
869
+ " <td>4</td>\n",
870
+ " <td>7</td>\n",
871
+ " </tr>\n",
872
+ " <tr>\n",
873
+ " <th>5</th>\n",
874
+ " <td>C</td>\n",
875
+ " <td>5</td>\n",
876
+ " <td>9</td>\n",
877
+ " </tr>\n",
878
+ " </tbody>\n",
879
+ "</table>\n",
880
+ "</div>"
881
+ ],
882
+ "text/plain": [
883
+ " key data1 data2\n",
884
+ "1 B 1 0\n",
885
+ "2 C 2 3\n",
886
+ "4 B 4 7\n",
887
+ "5 C 5 9"
888
+ ]
889
+ },
890
+ "execution_count": 28,
891
+ "metadata": {},
892
+ "output_type": "execute_result"
893
+ }
894
+ ],
895
+ "source": [
896
+ "df.groupby('key').filter(filter_func)"
897
+ ]
898
+ },
899
+ {
900
+ "cell_type": "code",
901
+ "execution_count": 29,
902
+ "id": "6357cbab-c99d-470c-83b3-22a8edcbd7ac",
903
+ "metadata": {},
904
+ "outputs": [
905
+ {
906
+ "data": {
907
+ "text/html": [
908
+ "<div>\n",
909
+ "<style scoped>\n",
910
+ " .dataframe tbody tr th:only-of-type {\n",
911
+ " vertical-align: middle;\n",
912
+ " }\n",
913
+ "\n",
914
+ " .dataframe tbody tr th {\n",
915
+ " vertical-align: top;\n",
916
+ " }\n",
917
+ "\n",
918
+ " .dataframe thead th {\n",
919
+ " text-align: right;\n",
920
+ " }\n",
921
+ "</style>\n",
922
+ "<table border=\"1\" class=\"dataframe\">\n",
923
+ " <thead>\n",
924
+ " <tr style=\"text-align: right;\">\n",
925
+ " <th></th>\n",
926
+ " <th>data1</th>\n",
927
+ " <th>data2</th>\n",
928
+ " </tr>\n",
929
+ " </thead>\n",
930
+ " <tbody>\n",
931
+ " <tr>\n",
932
+ " <th>0</th>\n",
933
+ " <td>-1.5</td>\n",
934
+ " <td>1.0</td>\n",
935
+ " </tr>\n",
936
+ " <tr>\n",
937
+ " <th>1</th>\n",
938
+ " <td>-1.5</td>\n",
939
+ " <td>-3.5</td>\n",
940
+ " </tr>\n",
941
+ " <tr>\n",
942
+ " <th>2</th>\n",
943
+ " <td>-1.5</td>\n",
944
+ " <td>-3.0</td>\n",
945
+ " </tr>\n",
946
+ " <tr>\n",
947
+ " <th>3</th>\n",
948
+ " <td>1.5</td>\n",
949
+ " <td>-1.0</td>\n",
950
+ " </tr>\n",
951
+ " <tr>\n",
952
+ " <th>4</th>\n",
953
+ " <td>1.5</td>\n",
954
+ " <td>3.5</td>\n",
955
+ " </tr>\n",
956
+ " <tr>\n",
957
+ " <th>5</th>\n",
958
+ " <td>1.5</td>\n",
959
+ " <td>3.0</td>\n",
960
+ " </tr>\n",
961
+ " </tbody>\n",
962
+ "</table>\n",
963
+ "</div>"
964
+ ],
965
+ "text/plain": [
966
+ " data1 data2\n",
967
+ "0 -1.5 1.0\n",
968
+ "1 -1.5 -3.5\n",
969
+ "2 -1.5 -3.0\n",
970
+ "3 1.5 -1.0\n",
971
+ "4 1.5 3.5\n",
972
+ "5 1.5 3.0"
973
+ ]
974
+ },
975
+ "execution_count": 29,
976
+ "metadata": {},
977
+ "output_type": "execute_result"
978
+ }
979
+ ],
980
+ "source": [
981
+ " df.groupby('key').transform(lambda x: x - x.mean())"
982
+ ]
983
+ },
984
+ {
985
+ "cell_type": "code",
986
+ "execution_count": 30,
987
+ "id": "eace356a-087d-4376-84ad-70096afc5f8a",
988
+ "metadata": {},
989
+ "outputs": [
990
+ {
991
+ "name": "stdout",
992
+ "output_type": "stream",
993
+ "text": [
994
+ " key data1 data2\n",
995
+ "0 A 0 5\n",
996
+ "1 B 1 0\n",
997
+ "2 C 2 3\n",
998
+ "3 A 3 3\n",
999
+ "4 B 4 7\n",
1000
+ "5 C 5 9\n",
1001
+ " key data1 data2\n",
1002
+ "key \n",
1003
+ "A 0 A 0.000000 5\n",
1004
+ " 3 A 0.375000 3\n",
1005
+ "B 1 B 0.142857 0\n",
1006
+ " 4 B 0.571429 7\n",
1007
+ "C 2 C 0.166667 3\n",
1008
+ " 5 C 0.416667 9\n"
1009
+ ]
1010
+ },
1011
+ {
1012
+ "name": "stderr",
1013
+ "output_type": "stream",
1014
+ "text": [
1015
+ "C:\\Users\\darsh\\AppData\\Local\\Temp\\ipykernel_1940\\3133374050.py:6: DeprecationWarning: DataFrameGroupBy.apply operated on the grouping columns. This behavior is deprecated, and in a future version of pandas the grouping columns will be excluded from the operation. Either pass `include_groups=False` to exclude the groupings or explicitly select the grouping columns after groupby to silence this warning.\n",
1016
+ " print(df); print(df.groupby('key').apply(norm_by_data2))\n"
1017
+ ]
1018
+ }
1019
+ ],
1020
+ "source": [
1021
+ " # The apply() method.\n",
1022
+ " def norm_by_data2(x):\n",
1023
+ " # x is a DataFrame of group values\n",
1024
+ " x['data1'] /= x['data2'].sum()\n",
1025
+ " return x\n",
1026
+ " print(df); print(df.groupby('key').apply(norm_by_data2))"
1027
+ ]
1028
+ },
1029
+ {
1030
+ "cell_type": "code",
1031
+ "execution_count": 31,
1032
+ "id": "ba028303-402d-4141-b284-e15df4855ff3",
1033
+ "metadata": {},
1034
+ "outputs": [
1035
+ {
1036
+ "name": "stdout",
1037
+ "output_type": "stream",
1038
+ "text": [
1039
+ " key data1 data2\n",
1040
+ "0 A 0 5\n",
1041
+ "1 B 1 0\n",
1042
+ "2 C 2 3\n",
1043
+ "3 A 3 3\n",
1044
+ "4 B 4 7\n",
1045
+ "5 C 5 9\n",
1046
+ " key data1 data2\n",
1047
+ "0 ACC 7 17\n",
1048
+ "1 BA 4 3\n",
1049
+ "2 B 4 7\n"
1050
+ ]
1051
+ }
1052
+ ],
1053
+ "source": [
1054
+ "#Specifying the split key\n",
1055
+ "L = [0, 1, 0, 1, 2, 0]\n",
1056
+ "print(df); print(df.groupby(L).sum())"
1057
+ ]
1058
+ },
1059
+ {
1060
+ "cell_type": "code",
1061
+ "execution_count": 33,
1062
+ "id": "cd2e7ff6-2aaa-4fe4-9efd-d852e2de7cfd",
1063
+ "metadata": {},
1064
+ "outputs": [
1065
+ {
1066
+ "data": {
1067
+ "text/html": [
1068
+ "<div>\n",
1069
+ "<style scoped>\n",
1070
+ " .dataframe tbody tr th:only-of-type {\n",
1071
+ " vertical-align: middle;\n",
1072
+ " }\n",
1073
+ "\n",
1074
+ " .dataframe tbody tr th {\n",
1075
+ " vertical-align: top;\n",
1076
+ " }\n",
1077
+ "\n",
1078
+ " .dataframe thead th {\n",
1079
+ " text-align: right;\n",
1080
+ " }\n",
1081
+ "</style>\n",
1082
+ "<table border=\"1\" class=\"dataframe\">\n",
1083
+ " <thead>\n",
1084
+ " <tr style=\"text-align: right;\">\n",
1085
+ " <th>decade</th>\n",
1086
+ " <th>1980s</th>\n",
1087
+ " <th>1990s</th>\n",
1088
+ " <th>2000s</th>\n",
1089
+ " <th>2010s</th>\n",
1090
+ " </tr>\n",
1091
+ " <tr>\n",
1092
+ " <th>method</th>\n",
1093
+ " <th></th>\n",
1094
+ " <th></th>\n",
1095
+ " <th></th>\n",
1096
+ " <th></th>\n",
1097
+ " </tr>\n",
1098
+ " </thead>\n",
1099
+ " <tbody>\n",
1100
+ " <tr>\n",
1101
+ " <th>Astrometry</th>\n",
1102
+ " <td>0.0</td>\n",
1103
+ " <td>0.0</td>\n",
1104
+ " <td>0.0</td>\n",
1105
+ " <td>2.0</td>\n",
1106
+ " </tr>\n",
1107
+ " <tr>\n",
1108
+ " <th>Eclipse Timing Variations</th>\n",
1109
+ " <td>0.0</td>\n",
1110
+ " <td>0.0</td>\n",
1111
+ " <td>5.0</td>\n",
1112
+ " <td>10.0</td>\n",
1113
+ " </tr>\n",
1114
+ " <tr>\n",
1115
+ " <th>Imaging</th>\n",
1116
+ " <td>0.0</td>\n",
1117
+ " <td>0.0</td>\n",
1118
+ " <td>29.0</td>\n",
1119
+ " <td>21.0</td>\n",
1120
+ " </tr>\n",
1121
+ " <tr>\n",
1122
+ " <th>Microlensing</th>\n",
1123
+ " <td>0.0</td>\n",
1124
+ " <td>0.0</td>\n",
1125
+ " <td>12.0</td>\n",
1126
+ " <td>15.0</td>\n",
1127
+ " </tr>\n",
1128
+ " <tr>\n",
1129
+ " <th>Orbital Brightness Modulation</th>\n",
1130
+ " <td>0.0</td>\n",
1131
+ " <td>0.0</td>\n",
1132
+ " <td>0.0</td>\n",
1133
+ " <td>5.0</td>\n",
1134
+ " </tr>\n",
1135
+ " <tr>\n",
1136
+ " <th>Pulsar Timing</th>\n",
1137
+ " <td>0.0</td>\n",
1138
+ " <td>9.0</td>\n",
1139
+ " <td>1.0</td>\n",
1140
+ " <td>1.0</td>\n",
1141
+ " </tr>\n",
1142
+ " <tr>\n",
1143
+ " <th>Pulsation Timing Variations</th>\n",
1144
+ " <td>0.0</td>\n",
1145
+ " <td>0.0</td>\n",
1146
+ " <td>1.0</td>\n",
1147
+ " <td>0.0</td>\n",
1148
+ " </tr>\n",
1149
+ " <tr>\n",
1150
+ " <th>Radial Velocity</th>\n",
1151
+ " <td>1.0</td>\n",
1152
+ " <td>52.0</td>\n",
1153
+ " <td>475.0</td>\n",
1154
+ " <td>424.0</td>\n",
1155
+ " </tr>\n",
1156
+ " <tr>\n",
1157
+ " <th>Transit</th>\n",
1158
+ " <td>0.0</td>\n",
1159
+ " <td>0.0</td>\n",
1160
+ " <td>64.0</td>\n",
1161
+ " <td>712.0</td>\n",
1162
+ " </tr>\n",
1163
+ " <tr>\n",
1164
+ " <th>Transit Timing Variations</th>\n",
1165
+ " <td>0.0</td>\n",
1166
+ " <td>0.0</td>\n",
1167
+ " <td>0.0</td>\n",
1168
+ " <td>9.0</td>\n",
1169
+ " </tr>\n",
1170
+ " </tbody>\n",
1171
+ "</table>\n",
1172
+ "</div>"
1173
+ ],
1174
+ "text/plain": [
1175
+ "decade 1980s 1990s 2000s 2010s\n",
1176
+ "method \n",
1177
+ "Astrometry 0.0 0.0 0.0 2.0\n",
1178
+ "Eclipse Timing Variations 0.0 0.0 5.0 10.0\n",
1179
+ "Imaging 0.0 0.0 29.0 21.0\n",
1180
+ "Microlensing 0.0 0.0 12.0 15.0\n",
1181
+ "Orbital Brightness Modulation 0.0 0.0 0.0 5.0\n",
1182
+ "Pulsar Timing 0.0 9.0 1.0 1.0\n",
1183
+ "Pulsation Timing Variations 0.0 0.0 1.0 0.0\n",
1184
+ "Radial Velocity 1.0 52.0 475.0 424.0\n",
1185
+ "Transit 0.0 0.0 64.0 712.0\n",
1186
+ "Transit Timing Variations 0.0 0.0 0.0 9.0"
1187
+ ]
1188
+ },
1189
+ "execution_count": 33,
1190
+ "metadata": {},
1191
+ "output_type": "execute_result"
1192
+ }
1193
+ ],
1194
+ "source": [
1195
+ "# Grouping example\n",
1196
+ "decade = 10 * (planets['year'] // 10)\n",
1197
+ "decade = decade.astype(str) + 's'\n",
1198
+ "decade.name = 'decade'\n",
1199
+ "planets.groupby(['method', decade])['number'].sum().unstack().fillna(0)"
1200
+ ]
1201
+ },
1202
+ {
1203
+ "cell_type": "code",
1204
+ "execution_count": 34,
1205
+ "id": "68c8eaac-b321-4e5a-bfda-7366061c8ec3",
1206
+ "metadata": {},
1207
+ "outputs": [],
1208
+ "source": [
1209
+ "# Pivot Tables\n",
1210
+ "import numpy as np\n",
1211
+ "import pandas as pd\n",
1212
+ "import seaborn as sns\n",
1213
+ "titanic = sns.load_dataset('titanic')"
1214
+ ]
1215
+ },
1216
+ {
1217
+ "cell_type": "code",
1218
+ "execution_count": 35,
1219
+ "id": "fe9c52f9-e461-483f-bfd1-36400787c720",
1220
+ "metadata": {},
1221
+ "outputs": [
1222
+ {
1223
+ "data": {
1224
+ "text/html": [
1225
+ "<div>\n",
1226
+ "<style scoped>\n",
1227
+ " .dataframe tbody tr th:only-of-type {\n",
1228
+ " vertical-align: middle;\n",
1229
+ " }\n",
1230
+ "\n",
1231
+ " .dataframe tbody tr th {\n",
1232
+ " vertical-align: top;\n",
1233
+ " }\n",
1234
+ "\n",
1235
+ " .dataframe thead th {\n",
1236
+ " text-align: right;\n",
1237
+ " }\n",
1238
+ "</style>\n",
1239
+ "<table border=\"1\" class=\"dataframe\">\n",
1240
+ " <thead>\n",
1241
+ " <tr style=\"text-align: right;\">\n",
1242
+ " <th></th>\n",
1243
+ " <th>survived</th>\n",
1244
+ " <th>pclass</th>\n",
1245
+ " <th>sex</th>\n",
1246
+ " <th>age</th>\n",
1247
+ " <th>sibsp</th>\n",
1248
+ " <th>parch</th>\n",
1249
+ " <th>fare</th>\n",
1250
+ " <th>embarked</th>\n",
1251
+ " <th>class</th>\n",
1252
+ " <th>who</th>\n",
1253
+ " <th>adult_male</th>\n",
1254
+ " <th>deck</th>\n",
1255
+ " <th>embark_town</th>\n",
1256
+ " <th>alive</th>\n",
1257
+ " <th>alone</th>\n",
1258
+ " </tr>\n",
1259
+ " </thead>\n",
1260
+ " <tbody>\n",
1261
+ " <tr>\n",
1262
+ " <th>0</th>\n",
1263
+ " <td>0</td>\n",
1264
+ " <td>3</td>\n",
1265
+ " <td>male</td>\n",
1266
+ " <td>22.0</td>\n",
1267
+ " <td>1</td>\n",
1268
+ " <td>0</td>\n",
1269
+ " <td>7.2500</td>\n",
1270
+ " <td>S</td>\n",
1271
+ " <td>Third</td>\n",
1272
+ " <td>man</td>\n",
1273
+ " <td>True</td>\n",
1274
+ " <td>NaN</td>\n",
1275
+ " <td>Southampton</td>\n",
1276
+ " <td>no</td>\n",
1277
+ " <td>False</td>\n",
1278
+ " </tr>\n",
1279
+ " <tr>\n",
1280
+ " <th>1</th>\n",
1281
+ " <td>1</td>\n",
1282
+ " <td>1</td>\n",
1283
+ " <td>female</td>\n",
1284
+ " <td>38.0</td>\n",
1285
+ " <td>1</td>\n",
1286
+ " <td>0</td>\n",
1287
+ " <td>71.2833</td>\n",
1288
+ " <td>C</td>\n",
1289
+ " <td>First</td>\n",
1290
+ " <td>woman</td>\n",
1291
+ " <td>False</td>\n",
1292
+ " <td>C</td>\n",
1293
+ " <td>Cherbourg</td>\n",
1294
+ " <td>yes</td>\n",
1295
+ " <td>False</td>\n",
1296
+ " </tr>\n",
1297
+ " <tr>\n",
1298
+ " <th>2</th>\n",
1299
+ " <td>1</td>\n",
1300
+ " <td>3</td>\n",
1301
+ " <td>female</td>\n",
1302
+ " <td>26.0</td>\n",
1303
+ " <td>0</td>\n",
1304
+ " <td>0</td>\n",
1305
+ " <td>7.9250</td>\n",
1306
+ " <td>S</td>\n",
1307
+ " <td>Third</td>\n",
1308
+ " <td>woman</td>\n",
1309
+ " <td>False</td>\n",
1310
+ " <td>NaN</td>\n",
1311
+ " <td>Southampton</td>\n",
1312
+ " <td>yes</td>\n",
1313
+ " <td>True</td>\n",
1314
+ " </tr>\n",
1315
+ " <tr>\n",
1316
+ " <th>3</th>\n",
1317
+ " <td>1</td>\n",
1318
+ " <td>1</td>\n",
1319
+ " <td>female</td>\n",
1320
+ " <td>35.0</td>\n",
1321
+ " <td>1</td>\n",
1322
+ " <td>0</td>\n",
1323
+ " <td>53.1000</td>\n",
1324
+ " <td>S</td>\n",
1325
+ " <td>First</td>\n",
1326
+ " <td>woman</td>\n",
1327
+ " <td>False</td>\n",
1328
+ " <td>C</td>\n",
1329
+ " <td>Southampton</td>\n",
1330
+ " <td>yes</td>\n",
1331
+ " <td>False</td>\n",
1332
+ " </tr>\n",
1333
+ " <tr>\n",
1334
+ " <th>4</th>\n",
1335
+ " <td>0</td>\n",
1336
+ " <td>3</td>\n",
1337
+ " <td>male</td>\n",
1338
+ " <td>35.0</td>\n",
1339
+ " <td>0</td>\n",
1340
+ " <td>0</td>\n",
1341
+ " <td>8.0500</td>\n",
1342
+ " <td>S</td>\n",
1343
+ " <td>Third</td>\n",
1344
+ " <td>man</td>\n",
1345
+ " <td>True</td>\n",
1346
+ " <td>NaN</td>\n",
1347
+ " <td>Southampton</td>\n",
1348
+ " <td>no</td>\n",
1349
+ " <td>True</td>\n",
1350
+ " </tr>\n",
1351
+ " </tbody>\n",
1352
+ "</table>\n",
1353
+ "</div>"
1354
+ ],
1355
+ "text/plain": [
1356
+ " survived pclass sex age sibsp parch fare embarked class \\\n",
1357
+ "0 0 3 male 22.0 1 0 7.2500 S Third \n",
1358
+ "1 1 1 female 38.0 1 0 71.2833 C First \n",
1359
+ "2 1 3 female 26.0 0 0 7.9250 S Third \n",
1360
+ "3 1 1 female 35.0 1 0 53.1000 S First \n",
1361
+ "4 0 3 male 35.0 0 0 8.0500 S Third \n",
1362
+ "\n",
1363
+ " who adult_male deck embark_town alive alone \n",
1364
+ "0 man True NaN Southampton no False \n",
1365
+ "1 woman False C Cherbourg yes False \n",
1366
+ "2 woman False NaN Southampton yes True \n",
1367
+ "3 woman False C Southampton yes False \n",
1368
+ "4 man True NaN Southampton no True "
1369
+ ]
1370
+ },
1371
+ "execution_count": 35,
1372
+ "metadata": {},
1373
+ "output_type": "execute_result"
1374
+ }
1375
+ ],
1376
+ "source": [
1377
+ "titanic.head()"
1378
+ ]
1379
+ },
1380
+ {
1381
+ "cell_type": "code",
1382
+ "execution_count": 36,
1383
+ "id": "d9f3b1fc-5125-4058-8448-95f65d8decd6",
1384
+ "metadata": {},
1385
+ "outputs": [
1386
+ {
1387
+ "data": {
1388
+ "text/html": [
1389
+ "<div>\n",
1390
+ "<style scoped>\n",
1391
+ " .dataframe tbody tr th:only-of-type {\n",
1392
+ " vertical-align: middle;\n",
1393
+ " }\n",
1394
+ "\n",
1395
+ " .dataframe tbody tr th {\n",
1396
+ " vertical-align: top;\n",
1397
+ " }\n",
1398
+ "\n",
1399
+ " .dataframe thead th {\n",
1400
+ " text-align: right;\n",
1401
+ " }\n",
1402
+ "</style>\n",
1403
+ "<table border=\"1\" class=\"dataframe\">\n",
1404
+ " <thead>\n",
1405
+ " <tr style=\"text-align: right;\">\n",
1406
+ " <th></th>\n",
1407
+ " <th>survived</th>\n",
1408
+ " </tr>\n",
1409
+ " <tr>\n",
1410
+ " <th>sex</th>\n",
1411
+ " <th></th>\n",
1412
+ " </tr>\n",
1413
+ " </thead>\n",
1414
+ " <tbody>\n",
1415
+ " <tr>\n",
1416
+ " <th>female</th>\n",
1417
+ " <td>0.742038</td>\n",
1418
+ " </tr>\n",
1419
+ " <tr>\n",
1420
+ " <th>male</th>\n",
1421
+ " <td>0.188908</td>\n",
1422
+ " </tr>\n",
1423
+ " </tbody>\n",
1424
+ "</table>\n",
1425
+ "</div>"
1426
+ ],
1427
+ "text/plain": [
1428
+ " survived\n",
1429
+ "sex \n",
1430
+ "female 0.742038\n",
1431
+ "male 0.188908"
1432
+ ]
1433
+ },
1434
+ "execution_count": 36,
1435
+ "metadata": {},
1436
+ "output_type": "execute_result"
1437
+ }
1438
+ ],
1439
+ "source": [
1440
+ "titanic.groupby('sex')[['survived']].mean()"
1441
+ ]
1442
+ },
1443
+ {
1444
+ "cell_type": "code",
1445
+ "execution_count": 37,
1446
+ "id": "96fc78d1-878a-4418-8b2c-1e6bae80c1f7",
1447
+ "metadata": {},
1448
+ "outputs": [
1449
+ {
1450
+ "name": "stderr",
1451
+ "output_type": "stream",
1452
+ "text": [
1453
+ "C:\\Users\\darsh\\AppData\\Local\\Temp\\ipykernel_1940\\2603839867.py:1: FutureWarning: The default of observed=False is deprecated and will be changed to True in a future version of pandas. Pass observed=False to retain current behavior or observed=True to adopt the future default and silence this warning.\n",
1454
+ " titanic.groupby(['sex', 'class'])['survived'].aggregate('mean').unstack()\n"
1455
+ ]
1456
+ },
1457
+ {
1458
+ "data": {
1459
+ "text/html": [
1460
+ "<div>\n",
1461
+ "<style scoped>\n",
1462
+ " .dataframe tbody tr th:only-of-type {\n",
1463
+ " vertical-align: middle;\n",
1464
+ " }\n",
1465
+ "\n",
1466
+ " .dataframe tbody tr th {\n",
1467
+ " vertical-align: top;\n",
1468
+ " }\n",
1469
+ "\n",
1470
+ " .dataframe thead th {\n",
1471
+ " text-align: right;\n",
1472
+ " }\n",
1473
+ "</style>\n",
1474
+ "<table border=\"1\" class=\"dataframe\">\n",
1475
+ " <thead>\n",
1476
+ " <tr style=\"text-align: right;\">\n",
1477
+ " <th>class</th>\n",
1478
+ " <th>First</th>\n",
1479
+ " <th>Second</th>\n",
1480
+ " <th>Third</th>\n",
1481
+ " </tr>\n",
1482
+ " <tr>\n",
1483
+ " <th>sex</th>\n",
1484
+ " <th></th>\n",
1485
+ " <th></th>\n",
1486
+ " <th></th>\n",
1487
+ " </tr>\n",
1488
+ " </thead>\n",
1489
+ " <tbody>\n",
1490
+ " <tr>\n",
1491
+ " <th>female</th>\n",
1492
+ " <td>0.968085</td>\n",
1493
+ " <td>0.921053</td>\n",
1494
+ " <td>0.500000</td>\n",
1495
+ " </tr>\n",
1496
+ " <tr>\n",
1497
+ " <th>male</th>\n",
1498
+ " <td>0.368852</td>\n",
1499
+ " <td>0.157407</td>\n",
1500
+ " <td>0.135447</td>\n",
1501
+ " </tr>\n",
1502
+ " </tbody>\n",
1503
+ "</table>\n",
1504
+ "</div>"
1505
+ ],
1506
+ "text/plain": [
1507
+ "class First Second Third\n",
1508
+ "sex \n",
1509
+ "female 0.968085 0.921053 0.500000\n",
1510
+ "male 0.368852 0.157407 0.135447"
1511
+ ]
1512
+ },
1513
+ "execution_count": 37,
1514
+ "metadata": {},
1515
+ "output_type": "execute_result"
1516
+ }
1517
+ ],
1518
+ "source": [
1519
+ "titanic.groupby(['sex', 'class'])['survived'].aggregate('mean').unstack()"
1520
+ ]
1521
+ },
1522
+ {
1523
+ "cell_type": "code",
1524
+ "execution_count": 38,
1525
+ "id": "28b5cad3-2411-4cd7-aa24-4a423e03b00b",
1526
+ "metadata": {},
1527
+ "outputs": [
1528
+ {
1529
+ "data": {
1530
+ "text/plain": [
1531
+ "array([ 4, 6, 10, 14, 22, 26])"
1532
+ ]
1533
+ },
1534
+ "execution_count": 38,
1535
+ "metadata": {},
1536
+ "output_type": "execute_result"
1537
+ }
1538
+ ],
1539
+ "source": [
1540
+ "#Vectorized String Operation\n",
1541
+ "import numpy as np\n",
1542
+ "x = np.array([2, 3, 5, 7, 11, 13])\n",
1543
+ "x * 2"
1544
+ ]
1545
+ },
1546
+ {
1547
+ "cell_type": "code",
1548
+ "execution_count": 39,
1549
+ "id": "febdc11c-b284-4b43-a9c0-44001bc9dda3",
1550
+ "metadata": {},
1551
+ "outputs": [
1552
+ {
1553
+ "data": {
1554
+ "text/plain": [
1555
+ "['Peter', 'Paul', 'Mary', 'Guido']"
1556
+ ]
1557
+ },
1558
+ "execution_count": 39,
1559
+ "metadata": {},
1560
+ "output_type": "execute_result"
1561
+ }
1562
+ ],
1563
+ "source": [
1564
+ "data = ['peter', 'Paul', 'MARY', 'gUIDO']\n",
1565
+ "[s.capitalize() for s in data]"
1566
+ ]
1567
+ },
1568
+ {
1569
+ "cell_type": "code",
1570
+ "execution_count": 40,
1571
+ "id": "8d689e1a-ccdc-4916-b665-49cf7b21d02e",
1572
+ "metadata": {},
1573
+ "outputs": [
1574
+ {
1575
+ "ename": "AttributeError",
1576
+ "evalue": "'NoneType' object has no attribute 'capitalize'",
1577
+ "output_type": "error",
1578
+ "traceback": [
1579
+ "\u001b[1;31m---------------------------------------------------------------------------\u001b[0m",
1580
+ "\u001b[1;31mAttributeError\u001b[0m Traceback (most recent call last)",
1581
+ "Cell \u001b[1;32mIn[40], line 2\u001b[0m\n\u001b[0;32m 1\u001b[0m data \u001b[38;5;241m=\u001b[39m [\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mpeter\u001b[39m\u001b[38;5;124m'\u001b[39m, \u001b[38;5;124m'\u001b[39m\u001b[38;5;124mPaul\u001b[39m\u001b[38;5;124m'\u001b[39m, \u001b[38;5;28;01mNone\u001b[39;00m, \u001b[38;5;124m'\u001b[39m\u001b[38;5;124mMARY\u001b[39m\u001b[38;5;124m'\u001b[39m, \u001b[38;5;124m'\u001b[39m\u001b[38;5;124mgUIDO\u001b[39m\u001b[38;5;124m'\u001b[39m]\n\u001b[1;32m----> 2\u001b[0m [\u001b[43ms\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mcapitalize\u001b[49m() \u001b[38;5;28;01mfor\u001b[39;00m s \u001b[38;5;129;01min\u001b[39;00m data]\n",
1582
+ "\u001b[1;31mAttributeError\u001b[0m: 'NoneType' object has no attribute 'capitalize'"
1583
+ ]
1584
+ }
1585
+ ],
1586
+ "source": [
1587
+ "data = ['peter', 'Paul', None, 'MARY', 'gUIDO']\n",
1588
+ "[s.capitalize() for s in data]"
1589
+ ]
1590
+ },
1591
+ {
1592
+ "cell_type": "code",
1593
+ "execution_count": 41,
1594
+ "id": "8c2280ab-a3f9-424b-af89-97823534801e",
1595
+ "metadata": {},
1596
+ "outputs": [
1597
+ {
1598
+ "data": {
1599
+ "text/plain": [
1600
+ "0 peter\n",
1601
+ "1 Paul\n",
1602
+ "2 None\n",
1603
+ "3 MARY\n",
1604
+ "4 gUIDO\n",
1605
+ "dtype: object"
1606
+ ]
1607
+ },
1608
+ "execution_count": 41,
1609
+ "metadata": {},
1610
+ "output_type": "execute_result"
1611
+ }
1612
+ ],
1613
+ "source": [
1614
+ "import pandas as pd\n",
1615
+ "names = pd.Series(data)\n",
1616
+ "names"
1617
+ ]
1618
+ },
1619
+ {
1620
+ "cell_type": "code",
1621
+ "execution_count": 42,
1622
+ "id": "c5271eae-68d8-4a39-8b78-e0509e0bdf6a",
1623
+ "metadata": {},
1624
+ "outputs": [
1625
+ {
1626
+ "data": {
1627
+ "text/plain": [
1628
+ "0 Peter\n",
1629
+ "1 Paul\n",
1630
+ "2 None\n",
1631
+ "3 Mary\n",
1632
+ "4 Guido\n",
1633
+ "dtype: object"
1634
+ ]
1635
+ },
1636
+ "execution_count": 42,
1637
+ "metadata": {},
1638
+ "output_type": "execute_result"
1639
+ }
1640
+ ],
1641
+ "source": [
1642
+ " names.str.capitalize()"
1643
+ ]
1644
+ },
1645
+ {
1646
+ "cell_type": "code",
1647
+ "execution_count": 44,
1648
+ "id": "77c5a86f-9cc8-4c2d-9aa2-3901a484c83c",
1649
+ "metadata": {},
1650
+ "outputs": [],
1651
+ "source": [
1652
+ "monte = pd.Series(['Graham Chapman', 'John Cleese', 'Terry Gilliam',\n",
1653
+ " 'Eric Idle', 'Terry Jones', 'Michael Palin'])"
1654
+ ]
1655
+ },
1656
+ {
1657
+ "cell_type": "code",
1658
+ "execution_count": 45,
1659
+ "id": "ce34f0ac-171b-4e08-a816-e39e348f2508",
1660
+ "metadata": {},
1661
+ "outputs": [
1662
+ {
1663
+ "data": {
1664
+ "text/plain": [
1665
+ "0 graham chapman\n",
1666
+ "1 john cleese\n",
1667
+ "2 terry gilliam\n",
1668
+ "3 eric idle\n",
1669
+ "4 terry jones\n",
1670
+ "5 michael palin\n",
1671
+ "dtype: object"
1672
+ ]
1673
+ },
1674
+ "execution_count": 45,
1675
+ "metadata": {},
1676
+ "output_type": "execute_result"
1677
+ }
1678
+ ],
1679
+ "source": [
1680
+ " monte.str.lower()"
1681
+ ]
1682
+ },
1683
+ {
1684
+ "cell_type": "code",
1685
+ "execution_count": 46,
1686
+ "id": "b5672c72-c2db-4425-8290-7944ec83c77c",
1687
+ "metadata": {},
1688
+ "outputs": [
1689
+ {
1690
+ "data": {
1691
+ "text/plain": [
1692
+ "0 14\n",
1693
+ "1 11\n",
1694
+ "2 13\n",
1695
+ "3 9\n",
1696
+ "4 11\n",
1697
+ "5 13\n",
1698
+ "dtype: int64"
1699
+ ]
1700
+ },
1701
+ "execution_count": 46,
1702
+ "metadata": {},
1703
+ "output_type": "execute_result"
1704
+ }
1705
+ ],
1706
+ "source": [
1707
+ " monte.str.len()"
1708
+ ]
1709
+ },
1710
+ {
1711
+ "cell_type": "code",
1712
+ "execution_count": 47,
1713
+ "id": "9c99eabc-b0f4-41d7-8de8-fd550a7e5448",
1714
+ "metadata": {},
1715
+ "outputs": [
1716
+ {
1717
+ "data": {
1718
+ "text/plain": [
1719
+ "0 [Graham, Chapman]\n",
1720
+ "1 [John, Cleese]\n",
1721
+ "2 [Terry, Gilliam]\n",
1722
+ "3 [Eric, Idle]\n",
1723
+ "4 [Terry, Jones]\n",
1724
+ "5 [Michael, Palin]\n",
1725
+ "dtype: object"
1726
+ ]
1727
+ },
1728
+ "execution_count": 47,
1729
+ "metadata": {},
1730
+ "output_type": "execute_result"
1731
+ }
1732
+ ],
1733
+ "source": [
1734
+ " monte.str.split()"
1735
+ ]
1736
+ },
1737
+ {
1738
+ "cell_type": "code",
1739
+ "execution_count": 48,
1740
+ "id": "e751ca4c-b6e3-4f50-a09e-10e1d1915eb4",
1741
+ "metadata": {},
1742
+ "outputs": [
1743
+ {
1744
+ "data": {
1745
+ "text/plain": [
1746
+ "0 Gra\n",
1747
+ "1 Joh\n",
1748
+ "2 Ter\n",
1749
+ "3 Eri\n",
1750
+ "4 Ter\n",
1751
+ "5 Mic\n",
1752
+ "dtype: object"
1753
+ ]
1754
+ },
1755
+ "execution_count": 48,
1756
+ "metadata": {},
1757
+ "output_type": "execute_result"
1758
+ }
1759
+ ],
1760
+ "source": [
1761
+ "#Vectorized item access and slicing\n",
1762
+ "monte.str[0:3]"
1763
+ ]
1764
+ },
1765
+ {
1766
+ "cell_type": "code",
1767
+ "execution_count": 49,
1768
+ "id": "6c7d9ce7-e394-4492-8ed5-8f7f3772bd0f",
1769
+ "metadata": {},
1770
+ "outputs": [
1771
+ {
1772
+ "data": {
1773
+ "text/plain": [
1774
+ "0 Chapman\n",
1775
+ "1 Cleese\n",
1776
+ "2 Gilliam\n",
1777
+ "3 Idle\n",
1778
+ "4 Jones\n",
1779
+ "5 Palin\n",
1780
+ "dtype: object"
1781
+ ]
1782
+ },
1783
+ "execution_count": 49,
1784
+ "metadata": {},
1785
+ "output_type": "execute_result"
1786
+ }
1787
+ ],
1788
+ "source": [
1789
+ "monte.str.split().str.get(-1)"
1790
+ ]
1791
+ },
1792
+ {
1793
+ "cell_type": "code",
1794
+ "execution_count": 50,
1795
+ "id": "82f5029b-0cc0-4122-ab88-db298650a5c2",
1796
+ "metadata": {},
1797
+ "outputs": [
1798
+ {
1799
+ "data": {
1800
+ "text/plain": [
1801
+ "datetime.datetime(2015, 7, 4, 0, 0)"
1802
+ ]
1803
+ },
1804
+ "execution_count": 50,
1805
+ "metadata": {},
1806
+ "output_type": "execute_result"
1807
+ }
1808
+ ],
1809
+ "source": [
1810
+ " # Working with Time Serie\n",
1811
+ "from datetime import datetime\n",
1812
+ "datetime(year=2015, month=7, day=4)"
1813
+ ]
1814
+ },
1815
+ {
1816
+ "cell_type": "code",
1817
+ "execution_count": 51,
1818
+ "id": "60d56a09-8fd3-4168-89fb-04970425fee8",
1819
+ "metadata": {},
1820
+ "outputs": [
1821
+ {
1822
+ "data": {
1823
+ "text/plain": [
1824
+ "datetime.datetime(2015, 7, 4, 0, 0)"
1825
+ ]
1826
+ },
1827
+ "execution_count": 51,
1828
+ "metadata": {},
1829
+ "output_type": "execute_result"
1830
+ }
1831
+ ],
1832
+ "source": [
1833
+ " from dateutil import parser\n",
1834
+ " date = parser.parse(\"4th of July, 2015\")\n",
1835
+ " date"
1836
+ ]
1837
+ },
1838
+ {
1839
+ "cell_type": "code",
1840
+ "execution_count": 52,
1841
+ "id": "5eae15bf-1a75-4565-b1d0-2b10cef44d3b",
1842
+ "metadata": {},
1843
+ "outputs": [
1844
+ {
1845
+ "data": {
1846
+ "text/plain": [
1847
+ "'Saturday'"
1848
+ ]
1849
+ },
1850
+ "execution_count": 52,
1851
+ "metadata": {},
1852
+ "output_type": "execute_result"
1853
+ }
1854
+ ],
1855
+ "source": [
1856
+ "date.strftime('%A')"
1857
+ ]
1858
+ },
1859
+ {
1860
+ "cell_type": "code",
1861
+ "execution_count": 54,
1862
+ "id": "d6cc3148-aeb7-447e-83b6-66037aad2b88",
1863
+ "metadata": {},
1864
+ "outputs": [
1865
+ {
1866
+ "data": {
1867
+ "text/plain": [
1868
+ "array('2015-07-04', dtype='datetime64[D]')"
1869
+ ]
1870
+ },
1871
+ "execution_count": 54,
1872
+ "metadata": {},
1873
+ "output_type": "execute_result"
1874
+ }
1875
+ ],
1876
+ "source": [
1877
+ "import numpy as np\n",
1878
+ "date = np.array('2015-07-04', dtype=np.datetime64)\n",
1879
+ "date"
1880
+ ]
1881
+ },
1882
+ {
1883
+ "cell_type": "code",
1884
+ "execution_count": 55,
1885
+ "id": "a504a4a8-5a0a-4c62-8c17-80d6124e657a",
1886
+ "metadata": {},
1887
+ "outputs": [
1888
+ {
1889
+ "data": {
1890
+ "text/plain": [
1891
+ "array(['2015-07-04', '2015-07-05', '2015-07-06', '2015-07-07',\n",
1892
+ " '2015-07-08', '2015-07-09', '2015-07-10', '2015-07-11',\n",
1893
+ " '2015-07-12', '2015-07-13', '2015-07-14', '2015-07-15'],\n",
1894
+ " dtype='datetime64[D]')"
1895
+ ]
1896
+ },
1897
+ "execution_count": 55,
1898
+ "metadata": {},
1899
+ "output_type": "execute_result"
1900
+ }
1901
+ ],
1902
+ "source": [
1903
+ "date + np.arange(12)"
1904
+ ]
1905
+ },
1906
+ {
1907
+ "cell_type": "code",
1908
+ "execution_count": 56,
1909
+ "id": "1d245780-95e0-480c-bfd3-9d1e2a3673c3",
1910
+ "metadata": {},
1911
+ "outputs": [
1912
+ {
1913
+ "data": {
1914
+ "text/plain": [
1915
+ "np.datetime64('2015-07-04')"
1916
+ ]
1917
+ },
1918
+ "execution_count": 56,
1919
+ "metadata": {},
1920
+ "output_type": "execute_result"
1921
+ }
1922
+ ],
1923
+ "source": [
1924
+ "np.datetime64('2015-07-04')"
1925
+ ]
1926
+ },
1927
+ {
1928
+ "cell_type": "code",
1929
+ "execution_count": 57,
1930
+ "id": "3f94ee5e-43c8-4956-b004-3f9a850a9b19",
1931
+ "metadata": {},
1932
+ "outputs": [
1933
+ {
1934
+ "data": {
1935
+ "text/plain": [
1936
+ "np.datetime64('2015-07-04T12:00')"
1937
+ ]
1938
+ },
1939
+ "execution_count": 57,
1940
+ "metadata": {},
1941
+ "output_type": "execute_result"
1942
+ }
1943
+ ],
1944
+ "source": [
1945
+ "np.datetime64('2015-07-04 12:00')"
1946
+ ]
1947
+ },
1948
+ {
1949
+ "cell_type": "code",
1950
+ "execution_count": 58,
1951
+ "id": "e8b9830a-584d-41c6-b387-d8244e3a012e",
1952
+ "metadata": {},
1953
+ "outputs": [
1954
+ {
1955
+ "data": {
1956
+ "text/plain": [
1957
+ "np.datetime64('2015-07-04T12:59:59.500000000')"
1958
+ ]
1959
+ },
1960
+ "execution_count": 58,
1961
+ "metadata": {},
1962
+ "output_type": "execute_result"
1963
+ }
1964
+ ],
1965
+ "source": [
1966
+ " np.datetime64('2015-07-04 12:59:59.50', 'ns')"
1967
+ ]
1968
+ },
1969
+ {
1970
+ "cell_type": "code",
1971
+ "execution_count": null,
1972
+ "id": "d63b7366-07ee-4baf-84a3-295458571d0f",
1973
+ "metadata": {},
1974
+ "outputs": [],
1975
+ "source": []
1976
+ }
1977
+ ],
1978
+ "metadata": {
1979
+ "kernelspec": {
1980
+ "display_name": "Python 3 (ipykernel)",
1981
+ "language": "python",
1982
+ "name": "python3"
1983
+ },
1984
+ "language_info": {
1985
+ "codemirror_mode": {
1986
+ "name": "ipython",
1987
+ "version": 3
1988
+ },
1989
+ "file_extension": ".py",
1990
+ "mimetype": "text/x-python",
1991
+ "name": "python",
1992
+ "nbconvert_exporter": "python",
1993
+ "pygments_lexer": "ipython3",
1994
+ "version": "3.12.0"
1995
+ }
1996
+ },
1997
+ "nbformat": 4,
1998
+ "nbformat_minor": 5
1999
+ }