myawesomepkg 0.1.5__py3-none-any.whl → 0.1.6__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,3212 @@
1
+ {
2
+ "cells": [
3
+ {
4
+ "cell_type": "code",
5
+ "execution_count": 1,
6
+ "id": "822792da-dbb3-4ec7-a179-480020ac004b",
7
+ "metadata": {},
8
+ "outputs": [],
9
+ "source": [
10
+ "# Missing Data in Pandas\n",
11
+ "import numpy as np\n",
12
+ "import pandas as pd"
13
+ ]
14
+ },
15
+ {
16
+ "cell_type": "code",
17
+ "execution_count": 2,
18
+ "id": "a7aebbf3-0776-4371-9775-aaa82496ac2a",
19
+ "metadata": {},
20
+ "outputs": [
21
+ {
22
+ "data": {
23
+ "text/plain": [
24
+ "array([1, None, 3, 4], dtype=object)"
25
+ ]
26
+ },
27
+ "execution_count": 2,
28
+ "metadata": {},
29
+ "output_type": "execute_result"
30
+ }
31
+ ],
32
+ "source": [
33
+ "vals1 = np.array([1, None, 3, 4])\n",
34
+ "vals1"
35
+ ]
36
+ },
37
+ {
38
+ "cell_type": "code",
39
+ "execution_count": 3,
40
+ "id": "42e9d234-98ea-40ef-b808-e149dec83b46",
41
+ "metadata": {},
42
+ "outputs": [
43
+ {
44
+ "data": {
45
+ "text/plain": [
46
+ "0 False\n",
47
+ "1 True\n",
48
+ "2 False\n",
49
+ "3 True\n",
50
+ "dtype: bool"
51
+ ]
52
+ },
53
+ "execution_count": 3,
54
+ "metadata": {},
55
+ "output_type": "execute_result"
56
+ }
57
+ ],
58
+ "source": [
59
+ "# Detecting null values\n",
60
+ "data = pd.Series([1, np.nan, 'hello', None])\n",
61
+ "data.isnull()"
62
+ ]
63
+ },
64
+ {
65
+ "cell_type": "code",
66
+ "execution_count": 4,
67
+ "id": "d6078385-1b04-4c3b-b3c2-1f0cc7219a33",
68
+ "metadata": {},
69
+ "outputs": [
70
+ {
71
+ "data": {
72
+ "text/plain": [
73
+ "0 1\n",
74
+ "2 hello\n",
75
+ "dtype: object"
76
+ ]
77
+ },
78
+ "execution_count": 4,
79
+ "metadata": {},
80
+ "output_type": "execute_result"
81
+ }
82
+ ],
83
+ "source": [
84
+ "data[data.notnull()]"
85
+ ]
86
+ },
87
+ {
88
+ "cell_type": "code",
89
+ "execution_count": 5,
90
+ "id": "26ed180e-d636-4eda-bb6c-6aef597e3189",
91
+ "metadata": {},
92
+ "outputs": [
93
+ {
94
+ "name": "stdout",
95
+ "output_type": "stream",
96
+ "text": [
97
+ "dtype = object\n",
98
+ "120 ms ± 21.6 ms per loop (mean ± std. dev. of 7 runs, 10 loops each)\n",
99
+ "\n",
100
+ "dtype = int\n",
101
+ "5.37 ms ± 184 μs per loop (mean ± std. dev. of 7 runs, 100 loops each)\n",
102
+ "\n"
103
+ ]
104
+ }
105
+ ],
106
+ "source": [
107
+ "for dtype in ['object', 'int']:\n",
108
+ " print(\"dtype =\", dtype)\n",
109
+ " %timeit np.arange(1E6, dtype=dtype).sum()\n",
110
+ " print()"
111
+ ]
112
+ },
113
+ {
114
+ "cell_type": "code",
115
+ "execution_count": 7,
116
+ "id": "60a94109-d9bc-4781-9ef5-152f458c9133",
117
+ "metadata": {},
118
+ "outputs": [
119
+ {
120
+ "name": "stdout",
121
+ "output_type": "stream",
122
+ "text": [
123
+ "30.0\n"
124
+ ]
125
+ }
126
+ ],
127
+ "source": [
128
+ "vals1 = np.array([10, 20, np.nan])\n",
129
+ "print(np.nansum(vals1))"
130
+ ]
131
+ },
132
+ {
133
+ "cell_type": "code",
134
+ "execution_count": 8,
135
+ "id": "29e48dea-555c-4a3a-b7d1-6af63234b7a9",
136
+ "metadata": {},
137
+ "outputs": [
138
+ {
139
+ "data": {
140
+ "text/plain": [
141
+ "dtype('float64')"
142
+ ]
143
+ },
144
+ "execution_count": 8,
145
+ "metadata": {},
146
+ "output_type": "execute_result"
147
+ }
148
+ ],
149
+ "source": [
150
+ "vals2 = np.array([1, np.nan, 3, 4])\n",
151
+ "vals2.dtype"
152
+ ]
153
+ },
154
+ {
155
+ "cell_type": "code",
156
+ "execution_count": 9,
157
+ "id": "1c9053b8-8790-4bc8-b8f5-7617a1248b5e",
158
+ "metadata": {},
159
+ "outputs": [
160
+ {
161
+ "data": {
162
+ "text/plain": [
163
+ "nan"
164
+ ]
165
+ },
166
+ "execution_count": 9,
167
+ "metadata": {},
168
+ "output_type": "execute_result"
169
+ }
170
+ ],
171
+ "source": [
172
+ "1 + np.nan"
173
+ ]
174
+ },
175
+ {
176
+ "cell_type": "code",
177
+ "execution_count": 10,
178
+ "id": "4d26e974-d2f9-4aef-bf09-0961429ff7e0",
179
+ "metadata": {},
180
+ "outputs": [
181
+ {
182
+ "data": {
183
+ "text/plain": [
184
+ "nan"
185
+ ]
186
+ },
187
+ "execution_count": 10,
188
+ "metadata": {},
189
+ "output_type": "execute_result"
190
+ }
191
+ ],
192
+ "source": [
193
+ "0 * np.nan"
194
+ ]
195
+ },
196
+ {
197
+ "cell_type": "code",
198
+ "execution_count": 11,
199
+ "id": "ad69d303-24af-4718-8a99-2a063abaa8ce",
200
+ "metadata": {},
201
+ "outputs": [
202
+ {
203
+ "data": {
204
+ "text/plain": [
205
+ "(np.float64(nan), np.float64(nan), np.float64(nan))"
206
+ ]
207
+ },
208
+ "execution_count": 11,
209
+ "metadata": {},
210
+ "output_type": "execute_result"
211
+ }
212
+ ],
213
+ "source": [
214
+ "vals2.sum(), vals2.min(), vals2.max()"
215
+ ]
216
+ },
217
+ {
218
+ "cell_type": "code",
219
+ "execution_count": 12,
220
+ "id": "ae143434-f619-46cd-a43e-ff5a7daf2a28",
221
+ "metadata": {},
222
+ "outputs": [
223
+ {
224
+ "data": {
225
+ "text/plain": [
226
+ "(np.float64(8.0), np.float64(1.0), np.float64(4.0))"
227
+ ]
228
+ },
229
+ "execution_count": 12,
230
+ "metadata": {},
231
+ "output_type": "execute_result"
232
+ }
233
+ ],
234
+ "source": [
235
+ "np.nansum(vals2), np.nanmin(vals2), np.nanmax(vals2)"
236
+ ]
237
+ },
238
+ {
239
+ "cell_type": "code",
240
+ "execution_count": 13,
241
+ "id": "8533305f-78e0-4c3a-b122-23be6b5f73b4",
242
+ "metadata": {},
243
+ "outputs": [
244
+ {
245
+ "data": {
246
+ "text/plain": [
247
+ "0 1.0\n",
248
+ "1 NaN\n",
249
+ "2 2.0\n",
250
+ "3 NaN\n",
251
+ "dtype: float64"
252
+ ]
253
+ },
254
+ "execution_count": 13,
255
+ "metadata": {},
256
+ "output_type": "execute_result"
257
+ }
258
+ ],
259
+ "source": [
260
+ "pd.Series([1, np.nan, 2, None])"
261
+ ]
262
+ },
263
+ {
264
+ "cell_type": "code",
265
+ "execution_count": 14,
266
+ "id": "5317b0d7-a933-4b22-81d6-2fa38a853d5d",
267
+ "metadata": {},
268
+ "outputs": [
269
+ {
270
+ "data": {
271
+ "text/plain": [
272
+ "0 0\n",
273
+ "1 1\n",
274
+ "dtype: int64"
275
+ ]
276
+ },
277
+ "execution_count": 14,
278
+ "metadata": {},
279
+ "output_type": "execute_result"
280
+ }
281
+ ],
282
+ "source": [
283
+ "x = pd.Series(range(2), dtype=int)\n",
284
+ "x"
285
+ ]
286
+ },
287
+ {
288
+ "cell_type": "code",
289
+ "execution_count": 15,
290
+ "id": "10c2aa25-109e-4678-9f17-82965c0d8d2d",
291
+ "metadata": {},
292
+ "outputs": [
293
+ {
294
+ "data": {
295
+ "text/plain": [
296
+ "0 NaN\n",
297
+ "1 1.0\n",
298
+ "dtype: float64"
299
+ ]
300
+ },
301
+ "execution_count": 15,
302
+ "metadata": {},
303
+ "output_type": "execute_result"
304
+ }
305
+ ],
306
+ "source": [
307
+ "x[0] = None\n",
308
+ "x"
309
+ ]
310
+ },
311
+ {
312
+ "cell_type": "code",
313
+ "execution_count": 16,
314
+ "id": "97a73881-49ba-445a-b0f2-670a03f9b562",
315
+ "metadata": {},
316
+ "outputs": [
317
+ {
318
+ "data": {
319
+ "text/plain": [
320
+ "0 False\n",
321
+ "1 True\n",
322
+ "2 False\n",
323
+ "3 True\n",
324
+ "dtype: bool"
325
+ ]
326
+ },
327
+ "execution_count": 16,
328
+ "metadata": {},
329
+ "output_type": "execute_result"
330
+ }
331
+ ],
332
+ "source": [
333
+ "data = pd.Series([1, np.nan, 'hello', None])\n",
334
+ "data.isnull()"
335
+ ]
336
+ },
337
+ {
338
+ "cell_type": "code",
339
+ "execution_count": 17,
340
+ "id": "62c1d7ce-93e6-4fb2-acb2-ecc606a3b14d",
341
+ "metadata": {},
342
+ "outputs": [
343
+ {
344
+ "data": {
345
+ "text/plain": [
346
+ "0 1\n",
347
+ "2 hello\n",
348
+ "dtype: object"
349
+ ]
350
+ },
351
+ "execution_count": 17,
352
+ "metadata": {},
353
+ "output_type": "execute_result"
354
+ }
355
+ ],
356
+ "source": [
357
+ "data[data.notnull()]"
358
+ ]
359
+ },
360
+ {
361
+ "cell_type": "code",
362
+ "execution_count": 18,
363
+ "id": "ccdd04f5-7093-4761-97ed-523295ac42f5",
364
+ "metadata": {},
365
+ "outputs": [
366
+ {
367
+ "data": {
368
+ "text/plain": [
369
+ "0 1\n",
370
+ "2 hello\n",
371
+ "dtype: object"
372
+ ]
373
+ },
374
+ "execution_count": 18,
375
+ "metadata": {},
376
+ "output_type": "execute_result"
377
+ }
378
+ ],
379
+ "source": [
380
+ "data.dropna()"
381
+ ]
382
+ },
383
+ {
384
+ "cell_type": "code",
385
+ "execution_count": 19,
386
+ "id": "8411cb90-03c7-4c26-9fe9-d81b996d45cb",
387
+ "metadata": {},
388
+ "outputs": [
389
+ {
390
+ "data": {
391
+ "text/html": [
392
+ "<div>\n",
393
+ "<style scoped>\n",
394
+ " .dataframe tbody tr th:only-of-type {\n",
395
+ " vertical-align: middle;\n",
396
+ " }\n",
397
+ "\n",
398
+ " .dataframe tbody tr th {\n",
399
+ " vertical-align: top;\n",
400
+ " }\n",
401
+ "\n",
402
+ " .dataframe thead th {\n",
403
+ " text-align: right;\n",
404
+ " }\n",
405
+ "</style>\n",
406
+ "<table border=\"1\" class=\"dataframe\">\n",
407
+ " <thead>\n",
408
+ " <tr style=\"text-align: right;\">\n",
409
+ " <th></th>\n",
410
+ " <th>0</th>\n",
411
+ " <th>1</th>\n",
412
+ " <th>2</th>\n",
413
+ " </tr>\n",
414
+ " </thead>\n",
415
+ " <tbody>\n",
416
+ " <tr>\n",
417
+ " <th>0</th>\n",
418
+ " <td>1.0</td>\n",
419
+ " <td>NaN</td>\n",
420
+ " <td>2</td>\n",
421
+ " </tr>\n",
422
+ " <tr>\n",
423
+ " <th>1</th>\n",
424
+ " <td>2.0</td>\n",
425
+ " <td>3.0</td>\n",
426
+ " <td>5</td>\n",
427
+ " </tr>\n",
428
+ " <tr>\n",
429
+ " <th>2</th>\n",
430
+ " <td>NaN</td>\n",
431
+ " <td>4.0</td>\n",
432
+ " <td>6</td>\n",
433
+ " </tr>\n",
434
+ " </tbody>\n",
435
+ "</table>\n",
436
+ "</div>"
437
+ ],
438
+ "text/plain": [
439
+ " 0 1 2\n",
440
+ "0 1.0 NaN 2\n",
441
+ "1 2.0 3.0 5\n",
442
+ "2 NaN 4.0 6"
443
+ ]
444
+ },
445
+ "execution_count": 19,
446
+ "metadata": {},
447
+ "output_type": "execute_result"
448
+ }
449
+ ],
450
+ "source": [
451
+ "df = pd.DataFrame([[1, np.nan, 2],\n",
452
+ "[2, 3, 5],\n",
453
+ "[np.nan, 4, 6]])\n",
454
+ "df"
455
+ ]
456
+ },
457
+ {
458
+ "cell_type": "code",
459
+ "execution_count": 20,
460
+ "id": "db960192-bdc5-4ca1-8b25-d15a75038c09",
461
+ "metadata": {},
462
+ "outputs": [
463
+ {
464
+ "data": {
465
+ "text/html": [
466
+ "<div>\n",
467
+ "<style scoped>\n",
468
+ " .dataframe tbody tr th:only-of-type {\n",
469
+ " vertical-align: middle;\n",
470
+ " }\n",
471
+ "\n",
472
+ " .dataframe tbody tr th {\n",
473
+ " vertical-align: top;\n",
474
+ " }\n",
475
+ "\n",
476
+ " .dataframe thead th {\n",
477
+ " text-align: right;\n",
478
+ " }\n",
479
+ "</style>\n",
480
+ "<table border=\"1\" class=\"dataframe\">\n",
481
+ " <thead>\n",
482
+ " <tr style=\"text-align: right;\">\n",
483
+ " <th></th>\n",
484
+ " <th>0</th>\n",
485
+ " <th>1</th>\n",
486
+ " <th>2</th>\n",
487
+ " </tr>\n",
488
+ " </thead>\n",
489
+ " <tbody>\n",
490
+ " <tr>\n",
491
+ " <th>1</th>\n",
492
+ " <td>2.0</td>\n",
493
+ " <td>3.0</td>\n",
494
+ " <td>5</td>\n",
495
+ " </tr>\n",
496
+ " </tbody>\n",
497
+ "</table>\n",
498
+ "</div>"
499
+ ],
500
+ "text/plain": [
501
+ " 0 1 2\n",
502
+ "1 2.0 3.0 5"
503
+ ]
504
+ },
505
+ "execution_count": 20,
506
+ "metadata": {},
507
+ "output_type": "execute_result"
508
+ }
509
+ ],
510
+ "source": [
511
+ "df.dropna()"
512
+ ]
513
+ },
514
+ {
515
+ "cell_type": "code",
516
+ "execution_count": 21,
517
+ "id": "192591c6-cdf0-4d9b-9974-ecd6313b7a13",
518
+ "metadata": {},
519
+ "outputs": [
520
+ {
521
+ "data": {
522
+ "text/html": [
523
+ "<div>\n",
524
+ "<style scoped>\n",
525
+ " .dataframe tbody tr th:only-of-type {\n",
526
+ " vertical-align: middle;\n",
527
+ " }\n",
528
+ "\n",
529
+ " .dataframe tbody tr th {\n",
530
+ " vertical-align: top;\n",
531
+ " }\n",
532
+ "\n",
533
+ " .dataframe thead th {\n",
534
+ " text-align: right;\n",
535
+ " }\n",
536
+ "</style>\n",
537
+ "<table border=\"1\" class=\"dataframe\">\n",
538
+ " <thead>\n",
539
+ " <tr style=\"text-align: right;\">\n",
540
+ " <th></th>\n",
541
+ " <th>2</th>\n",
542
+ " </tr>\n",
543
+ " </thead>\n",
544
+ " <tbody>\n",
545
+ " <tr>\n",
546
+ " <th>0</th>\n",
547
+ " <td>2</td>\n",
548
+ " </tr>\n",
549
+ " <tr>\n",
550
+ " <th>1</th>\n",
551
+ " <td>5</td>\n",
552
+ " </tr>\n",
553
+ " <tr>\n",
554
+ " <th>2</th>\n",
555
+ " <td>6</td>\n",
556
+ " </tr>\n",
557
+ " </tbody>\n",
558
+ "</table>\n",
559
+ "</div>"
560
+ ],
561
+ "text/plain": [
562
+ " 2\n",
563
+ "0 2\n",
564
+ "1 5\n",
565
+ "2 6"
566
+ ]
567
+ },
568
+ "execution_count": 21,
569
+ "metadata": {},
570
+ "output_type": "execute_result"
571
+ }
572
+ ],
573
+ "source": [
574
+ "df.dropna(axis='columns')"
575
+ ]
576
+ },
577
+ {
578
+ "cell_type": "code",
579
+ "execution_count": 22,
580
+ "id": "7f404f2c-7825-4be1-9df0-6af94dc64127",
581
+ "metadata": {},
582
+ "outputs": [
583
+ {
584
+ "data": {
585
+ "text/html": [
586
+ "<div>\n",
587
+ "<style scoped>\n",
588
+ " .dataframe tbody tr th:only-of-type {\n",
589
+ " vertical-align: middle;\n",
590
+ " }\n",
591
+ "\n",
592
+ " .dataframe tbody tr th {\n",
593
+ " vertical-align: top;\n",
594
+ " }\n",
595
+ "\n",
596
+ " .dataframe thead th {\n",
597
+ " text-align: right;\n",
598
+ " }\n",
599
+ "</style>\n",
600
+ "<table border=\"1\" class=\"dataframe\">\n",
601
+ " <thead>\n",
602
+ " <tr style=\"text-align: right;\">\n",
603
+ " <th></th>\n",
604
+ " <th>0</th>\n",
605
+ " <th>1</th>\n",
606
+ " <th>2</th>\n",
607
+ " <th>3</th>\n",
608
+ " </tr>\n",
609
+ " </thead>\n",
610
+ " <tbody>\n",
611
+ " <tr>\n",
612
+ " <th>0</th>\n",
613
+ " <td>1.0</td>\n",
614
+ " <td>NaN</td>\n",
615
+ " <td>2</td>\n",
616
+ " <td>NaN</td>\n",
617
+ " </tr>\n",
618
+ " <tr>\n",
619
+ " <th>1</th>\n",
620
+ " <td>2.0</td>\n",
621
+ " <td>3.0</td>\n",
622
+ " <td>5</td>\n",
623
+ " <td>NaN</td>\n",
624
+ " </tr>\n",
625
+ " <tr>\n",
626
+ " <th>2</th>\n",
627
+ " <td>NaN</td>\n",
628
+ " <td>4.0</td>\n",
629
+ " <td>6</td>\n",
630
+ " <td>NaN</td>\n",
631
+ " </tr>\n",
632
+ " </tbody>\n",
633
+ "</table>\n",
634
+ "</div>"
635
+ ],
636
+ "text/plain": [
637
+ " 0 1 2 3\n",
638
+ "0 1.0 NaN 2 NaN\n",
639
+ "1 2.0 3.0 5 NaN\n",
640
+ "2 NaN 4.0 6 NaN"
641
+ ]
642
+ },
643
+ "execution_count": 22,
644
+ "metadata": {},
645
+ "output_type": "execute_result"
646
+ }
647
+ ],
648
+ "source": [
649
+ "df[3] = np.nan\n",
650
+ "df"
651
+ ]
652
+ },
653
+ {
654
+ "cell_type": "code",
655
+ "execution_count": 23,
656
+ "id": "bce0355e-bf7a-488b-9b83-7d8d3d6d11d1",
657
+ "metadata": {},
658
+ "outputs": [
659
+ {
660
+ "data": {
661
+ "text/html": [
662
+ "<div>\n",
663
+ "<style scoped>\n",
664
+ " .dataframe tbody tr th:only-of-type {\n",
665
+ " vertical-align: middle;\n",
666
+ " }\n",
667
+ "\n",
668
+ " .dataframe tbody tr th {\n",
669
+ " vertical-align: top;\n",
670
+ " }\n",
671
+ "\n",
672
+ " .dataframe thead th {\n",
673
+ " text-align: right;\n",
674
+ " }\n",
675
+ "</style>\n",
676
+ "<table border=\"1\" class=\"dataframe\">\n",
677
+ " <thead>\n",
678
+ " <tr style=\"text-align: right;\">\n",
679
+ " <th></th>\n",
680
+ " <th>0</th>\n",
681
+ " <th>1</th>\n",
682
+ " <th>2</th>\n",
683
+ " </tr>\n",
684
+ " </thead>\n",
685
+ " <tbody>\n",
686
+ " <tr>\n",
687
+ " <th>0</th>\n",
688
+ " <td>1.0</td>\n",
689
+ " <td>NaN</td>\n",
690
+ " <td>2</td>\n",
691
+ " </tr>\n",
692
+ " <tr>\n",
693
+ " <th>1</th>\n",
694
+ " <td>2.0</td>\n",
695
+ " <td>3.0</td>\n",
696
+ " <td>5</td>\n",
697
+ " </tr>\n",
698
+ " <tr>\n",
699
+ " <th>2</th>\n",
700
+ " <td>NaN</td>\n",
701
+ " <td>4.0</td>\n",
702
+ " <td>6</td>\n",
703
+ " </tr>\n",
704
+ " </tbody>\n",
705
+ "</table>\n",
706
+ "</div>"
707
+ ],
708
+ "text/plain": [
709
+ " 0 1 2\n",
710
+ "0 1.0 NaN 2\n",
711
+ "1 2.0 3.0 5\n",
712
+ "2 NaN 4.0 6"
713
+ ]
714
+ },
715
+ "execution_count": 23,
716
+ "metadata": {},
717
+ "output_type": "execute_result"
718
+ }
719
+ ],
720
+ "source": [
721
+ "df.dropna(axis='columns', how='all')"
722
+ ]
723
+ },
724
+ {
725
+ "cell_type": "code",
726
+ "execution_count": 24,
727
+ "id": "c1c69f9f-84d9-4259-9b82-bf19948374b0",
728
+ "metadata": {},
729
+ "outputs": [
730
+ {
731
+ "data": {
732
+ "text/html": [
733
+ "<div>\n",
734
+ "<style scoped>\n",
735
+ " .dataframe tbody tr th:only-of-type {\n",
736
+ " vertical-align: middle;\n",
737
+ " }\n",
738
+ "\n",
739
+ " .dataframe tbody tr th {\n",
740
+ " vertical-align: top;\n",
741
+ " }\n",
742
+ "\n",
743
+ " .dataframe thead th {\n",
744
+ " text-align: right;\n",
745
+ " }\n",
746
+ "</style>\n",
747
+ "<table border=\"1\" class=\"dataframe\">\n",
748
+ " <thead>\n",
749
+ " <tr style=\"text-align: right;\">\n",
750
+ " <th></th>\n",
751
+ " <th>0</th>\n",
752
+ " <th>1</th>\n",
753
+ " <th>2</th>\n",
754
+ " <th>3</th>\n",
755
+ " </tr>\n",
756
+ " </thead>\n",
757
+ " <tbody>\n",
758
+ " <tr>\n",
759
+ " <th>1</th>\n",
760
+ " <td>2.0</td>\n",
761
+ " <td>3.0</td>\n",
762
+ " <td>5</td>\n",
763
+ " <td>NaN</td>\n",
764
+ " </tr>\n",
765
+ " </tbody>\n",
766
+ "</table>\n",
767
+ "</div>"
768
+ ],
769
+ "text/plain": [
770
+ " 0 1 2 3\n",
771
+ "1 2.0 3.0 5 NaN"
772
+ ]
773
+ },
774
+ "execution_count": 24,
775
+ "metadata": {},
776
+ "output_type": "execute_result"
777
+ }
778
+ ],
779
+ "source": [
780
+ "df.dropna(axis='rows', thresh=3)\n"
781
+ ]
782
+ },
783
+ {
784
+ "cell_type": "code",
785
+ "execution_count": 25,
786
+ "id": "02274d77-117a-4346-b0cd-25a11e195ea1",
787
+ "metadata": {},
788
+ "outputs": [
789
+ {
790
+ "data": {
791
+ "text/plain": [
792
+ "a 1.0\n",
793
+ "b NaN\n",
794
+ "c 2.0\n",
795
+ "d NaN\n",
796
+ "e 3.0\n",
797
+ "dtype: float64"
798
+ ]
799
+ },
800
+ "execution_count": 25,
801
+ "metadata": {},
802
+ "output_type": "execute_result"
803
+ }
804
+ ],
805
+ "source": [
806
+ "#Filling null values\n",
807
+ "data = pd.Series([1, np.nan, 2, None, 3], index=list('abcde'))\n",
808
+ "data"
809
+ ]
810
+ },
811
+ {
812
+ "cell_type": "code",
813
+ "execution_count": 26,
814
+ "id": "ff7c66b4-8886-4d12-82a9-e033e13bf3f9",
815
+ "metadata": {},
816
+ "outputs": [
817
+ {
818
+ "data": {
819
+ "text/plain": [
820
+ "a 1.0\n",
821
+ "b 0.0\n",
822
+ "c 2.0\n",
823
+ "d 0.0\n",
824
+ "e 3.0\n",
825
+ "dtype: float64"
826
+ ]
827
+ },
828
+ "execution_count": 26,
829
+ "metadata": {},
830
+ "output_type": "execute_result"
831
+ }
832
+ ],
833
+ "source": [
834
+ "data.fillna(0)"
835
+ ]
836
+ },
837
+ {
838
+ "cell_type": "code",
839
+ "execution_count": 27,
840
+ "id": "855559ab-1428-4555-a2b0-6818f0e4be25",
841
+ "metadata": {},
842
+ "outputs": [
843
+ {
844
+ "name": "stderr",
845
+ "output_type": "stream",
846
+ "text": [
847
+ "C:\\Users\\darsh\\AppData\\Local\\Temp\\ipykernel_18940\\3316037056.py:2: FutureWarning: Series.fillna with 'method' is deprecated and will raise in a future version. Use obj.ffill() or obj.bfill() instead.\n",
848
+ " data.fillna(method='ffill')\n"
849
+ ]
850
+ },
851
+ {
852
+ "data": {
853
+ "text/plain": [
854
+ "a 1.0\n",
855
+ "b 1.0\n",
856
+ "c 2.0\n",
857
+ "d 2.0\n",
858
+ "e 3.0\n",
859
+ "dtype: float64"
860
+ ]
861
+ },
862
+ "execution_count": 27,
863
+ "metadata": {},
864
+ "output_type": "execute_result"
865
+ }
866
+ ],
867
+ "source": [
868
+ "# forward-fill\n",
869
+ "data.fillna(method='ffill')"
870
+ ]
871
+ },
872
+ {
873
+ "cell_type": "code",
874
+ "execution_count": 28,
875
+ "id": "cf65f8ea-cce8-4198-bce6-665ade36070c",
876
+ "metadata": {},
877
+ "outputs": [
878
+ {
879
+ "name": "stderr",
880
+ "output_type": "stream",
881
+ "text": [
882
+ "C:\\Users\\darsh\\AppData\\Local\\Temp\\ipykernel_18940\\2650514875.py:2: FutureWarning: Series.fillna with 'method' is deprecated and will raise in a future version. Use obj.ffill() or obj.bfill() instead.\n",
883
+ " data.fillna(method='bfill')\n"
884
+ ]
885
+ },
886
+ {
887
+ "data": {
888
+ "text/plain": [
889
+ "a 1.0\n",
890
+ "b 2.0\n",
891
+ "c 2.0\n",
892
+ "d 3.0\n",
893
+ "e 3.0\n",
894
+ "dtype: float64"
895
+ ]
896
+ },
897
+ "execution_count": 28,
898
+ "metadata": {},
899
+ "output_type": "execute_result"
900
+ }
901
+ ],
902
+ "source": [
903
+ "# back-fill\n",
904
+ "data.fillna(method='bfill')\n"
905
+ ]
906
+ },
907
+ {
908
+ "cell_type": "code",
909
+ "execution_count": 29,
910
+ "id": "242b6c7a-20b1-45ae-bcb4-f898d0f60213",
911
+ "metadata": {},
912
+ "outputs": [
913
+ {
914
+ "data": {
915
+ "text/html": [
916
+ "<div>\n",
917
+ "<style scoped>\n",
918
+ " .dataframe tbody tr th:only-of-type {\n",
919
+ " vertical-align: middle;\n",
920
+ " }\n",
921
+ "\n",
922
+ " .dataframe tbody tr th {\n",
923
+ " vertical-align: top;\n",
924
+ " }\n",
925
+ "\n",
926
+ " .dataframe thead th {\n",
927
+ " text-align: right;\n",
928
+ " }\n",
929
+ "</style>\n",
930
+ "<table border=\"1\" class=\"dataframe\">\n",
931
+ " <thead>\n",
932
+ " <tr style=\"text-align: right;\">\n",
933
+ " <th></th>\n",
934
+ " <th>0</th>\n",
935
+ " <th>1</th>\n",
936
+ " <th>2</th>\n",
937
+ " <th>3</th>\n",
938
+ " </tr>\n",
939
+ " </thead>\n",
940
+ " <tbody>\n",
941
+ " <tr>\n",
942
+ " <th>0</th>\n",
943
+ " <td>1.0</td>\n",
944
+ " <td>NaN</td>\n",
945
+ " <td>2</td>\n",
946
+ " <td>NaN</td>\n",
947
+ " </tr>\n",
948
+ " <tr>\n",
949
+ " <th>1</th>\n",
950
+ " <td>2.0</td>\n",
951
+ " <td>3.0</td>\n",
952
+ " <td>5</td>\n",
953
+ " <td>NaN</td>\n",
954
+ " </tr>\n",
955
+ " <tr>\n",
956
+ " <th>2</th>\n",
957
+ " <td>NaN</td>\n",
958
+ " <td>4.0</td>\n",
959
+ " <td>6</td>\n",
960
+ " <td>NaN</td>\n",
961
+ " </tr>\n",
962
+ " </tbody>\n",
963
+ "</table>\n",
964
+ "</div>"
965
+ ],
966
+ "text/plain": [
967
+ " 0 1 2 3\n",
968
+ "0 1.0 NaN 2 NaN\n",
969
+ "1 2.0 3.0 5 NaN\n",
970
+ "2 NaN 4.0 6 NaN"
971
+ ]
972
+ },
973
+ "execution_count": 29,
974
+ "metadata": {},
975
+ "output_type": "execute_result"
976
+ }
977
+ ],
978
+ "source": [
979
+ "df"
980
+ ]
981
+ },
982
+ {
983
+ "cell_type": "code",
984
+ "execution_count": 30,
985
+ "id": "fab1a27d-5762-4c61-ba7f-57eeb9577b99",
986
+ "metadata": {},
987
+ "outputs": [
988
+ {
989
+ "name": "stderr",
990
+ "output_type": "stream",
991
+ "text": [
992
+ "C:\\Users\\darsh\\AppData\\Local\\Temp\\ipykernel_18940\\3455056665.py:1: FutureWarning: DataFrame.fillna with 'method' is deprecated and will raise in a future version. Use obj.ffill() or obj.bfill() instead.\n",
993
+ " df.fillna(method='ffill', axis=1)\n"
994
+ ]
995
+ },
996
+ {
997
+ "data": {
998
+ "text/html": [
999
+ "<div>\n",
1000
+ "<style scoped>\n",
1001
+ " .dataframe tbody tr th:only-of-type {\n",
1002
+ " vertical-align: middle;\n",
1003
+ " }\n",
1004
+ "\n",
1005
+ " .dataframe tbody tr th {\n",
1006
+ " vertical-align: top;\n",
1007
+ " }\n",
1008
+ "\n",
1009
+ " .dataframe thead th {\n",
1010
+ " text-align: right;\n",
1011
+ " }\n",
1012
+ "</style>\n",
1013
+ "<table border=\"1\" class=\"dataframe\">\n",
1014
+ " <thead>\n",
1015
+ " <tr style=\"text-align: right;\">\n",
1016
+ " <th></th>\n",
1017
+ " <th>0</th>\n",
1018
+ " <th>1</th>\n",
1019
+ " <th>2</th>\n",
1020
+ " <th>3</th>\n",
1021
+ " </tr>\n",
1022
+ " </thead>\n",
1023
+ " <tbody>\n",
1024
+ " <tr>\n",
1025
+ " <th>0</th>\n",
1026
+ " <td>1.0</td>\n",
1027
+ " <td>1.0</td>\n",
1028
+ " <td>2.0</td>\n",
1029
+ " <td>2.0</td>\n",
1030
+ " </tr>\n",
1031
+ " <tr>\n",
1032
+ " <th>1</th>\n",
1033
+ " <td>2.0</td>\n",
1034
+ " <td>3.0</td>\n",
1035
+ " <td>5.0</td>\n",
1036
+ " <td>5.0</td>\n",
1037
+ " </tr>\n",
1038
+ " <tr>\n",
1039
+ " <th>2</th>\n",
1040
+ " <td>NaN</td>\n",
1041
+ " <td>4.0</td>\n",
1042
+ " <td>6.0</td>\n",
1043
+ " <td>6.0</td>\n",
1044
+ " </tr>\n",
1045
+ " </tbody>\n",
1046
+ "</table>\n",
1047
+ "</div>"
1048
+ ],
1049
+ "text/plain": [
1050
+ " 0 1 2 3\n",
1051
+ "0 1.0 1.0 2.0 2.0\n",
1052
+ "1 2.0 3.0 5.0 5.0\n",
1053
+ "2 NaN 4.0 6.0 6.0"
1054
+ ]
1055
+ },
1056
+ "execution_count": 30,
1057
+ "metadata": {},
1058
+ "output_type": "execute_result"
1059
+ }
1060
+ ],
1061
+ "source": [
1062
+ "\n",
1063
+ "df.fillna(method='ffill', axis=1)"
1064
+ ]
1065
+ },
1066
+ {
1067
+ "cell_type": "code",
1068
+ "execution_count": 31,
1069
+ "id": "721bd85e-4b3e-4b34-bcfc-ccc683be017c",
1070
+ "metadata": {},
1071
+ "outputs": [
1072
+ {
1073
+ "data": {
1074
+ "text/plain": [
1075
+ "(California, 2000) 33871648\n",
1076
+ "(California, 2010) 37253956\n",
1077
+ "(New York, 2000) 18976457\n",
1078
+ "(New York, 2010) 19378102\n",
1079
+ "(Texas, 2000) 20851820\n",
1080
+ "(Texas, 2010) 25145561\n",
1081
+ "dtype: int64"
1082
+ ]
1083
+ },
1084
+ "execution_count": 31,
1085
+ "metadata": {},
1086
+ "output_type": "execute_result"
1087
+ }
1088
+ ],
1089
+ "source": [
1090
+ "import pandas as pd\n",
1091
+ "import numpy as np\n",
1092
+ "index = [('California', 2000), ('California', 2010),\n",
1093
+ "('New York', 2000), ('New York', 2010),\n",
1094
+ "('Texas', 2000), ('Texas', 2010)]\n",
1095
+ "populations = [33871648, 37253956,\n",
1096
+ "18976457, 19378102,\n",
1097
+ "20851820, 25145561]\n",
1098
+ "pop = pd.Series(populations, index=index)\n",
1099
+ "pop"
1100
+ ]
1101
+ },
1102
+ {
1103
+ "cell_type": "code",
1104
+ "execution_count": 32,
1105
+ "id": "cd4522c5-a2de-473a-a2e0-509485b3941a",
1106
+ "metadata": {},
1107
+ "outputs": [
1108
+ {
1109
+ "data": {
1110
+ "text/plain": [
1111
+ "(California, 2010) 37253956\n",
1112
+ "(New York, 2000) 18976457\n",
1113
+ "(New York, 2010) 19378102\n",
1114
+ "(Texas, 2000) 20851820\n",
1115
+ "dtype: int64"
1116
+ ]
1117
+ },
1118
+ "execution_count": 32,
1119
+ "metadata": {},
1120
+ "output_type": "execute_result"
1121
+ }
1122
+ ],
1123
+ "source": [
1124
+ "pop[('California', 2010):('Texas', 2000)]"
1125
+ ]
1126
+ },
1127
+ {
1128
+ "cell_type": "code",
1129
+ "execution_count": 33,
1130
+ "id": "f42bb21b-6345-4084-a210-fa6464c084a4",
1131
+ "metadata": {},
1132
+ "outputs": [
1133
+ {
1134
+ "data": {
1135
+ "text/plain": [
1136
+ "MultiIndex([('California', 2000),\n",
1137
+ " ('California', 2010),\n",
1138
+ " ( 'New York', 2000),\n",
1139
+ " ( 'New York', 2010),\n",
1140
+ " ( 'Texas', 2000),\n",
1141
+ " ( 'Texas', 2010)],\n",
1142
+ " )"
1143
+ ]
1144
+ },
1145
+ "execution_count": 33,
1146
+ "metadata": {},
1147
+ "output_type": "execute_result"
1148
+ }
1149
+ ],
1150
+ "source": [
1151
+ "index = pd.MultiIndex.from_tuples(index)\n",
1152
+ "index"
1153
+ ]
1154
+ },
1155
+ {
1156
+ "cell_type": "code",
1157
+ "execution_count": 34,
1158
+ "id": "e1129e15-b746-4822-8f82-e95c1943fef5",
1159
+ "metadata": {},
1160
+ "outputs": [
1161
+ {
1162
+ "data": {
1163
+ "text/plain": [
1164
+ "California 2000 33871648\n",
1165
+ " 2010 37253956\n",
1166
+ "New York 2000 18976457\n",
1167
+ " 2010 19378102\n",
1168
+ "Texas 2000 20851820\n",
1169
+ " 2010 25145561\n",
1170
+ "dtype: int64"
1171
+ ]
1172
+ },
1173
+ "execution_count": 34,
1174
+ "metadata": {},
1175
+ "output_type": "execute_result"
1176
+ }
1177
+ ],
1178
+ "source": [
1179
+ "pop = pop.reindex(index)\n",
1180
+ "pop"
1181
+ ]
1182
+ },
1183
+ {
1184
+ "cell_type": "code",
1185
+ "execution_count": 35,
1186
+ "id": "c8e3ab93-a611-48de-b950-517b9b3af9ed",
1187
+ "metadata": {},
1188
+ "outputs": [
1189
+ {
1190
+ "data": {
1191
+ "text/plain": [
1192
+ "California 37253956\n",
1193
+ "New York 19378102\n",
1194
+ "Texas 25145561\n",
1195
+ "dtype: int64"
1196
+ ]
1197
+ },
1198
+ "execution_count": 35,
1199
+ "metadata": {},
1200
+ "output_type": "execute_result"
1201
+ }
1202
+ ],
1203
+ "source": [
1204
+ "pop[:, 2010]"
1205
+ ]
1206
+ },
1207
+ {
1208
+ "cell_type": "code",
1209
+ "execution_count": 36,
1210
+ "id": "cd13cdf1-25f0-4663-833a-210d91a985db",
1211
+ "metadata": {},
1212
+ "outputs": [
1213
+ {
1214
+ "data": {
1215
+ "text/html": [
1216
+ "<div>\n",
1217
+ "<style scoped>\n",
1218
+ " .dataframe tbody tr th:only-of-type {\n",
1219
+ " vertical-align: middle;\n",
1220
+ " }\n",
1221
+ "\n",
1222
+ " .dataframe tbody tr th {\n",
1223
+ " vertical-align: top;\n",
1224
+ " }\n",
1225
+ "\n",
1226
+ " .dataframe thead th {\n",
1227
+ " text-align: right;\n",
1228
+ " }\n",
1229
+ "</style>\n",
1230
+ "<table border=\"1\" class=\"dataframe\">\n",
1231
+ " <thead>\n",
1232
+ " <tr style=\"text-align: right;\">\n",
1233
+ " <th></th>\n",
1234
+ " <th>2000</th>\n",
1235
+ " <th>2010</th>\n",
1236
+ " </tr>\n",
1237
+ " </thead>\n",
1238
+ " <tbody>\n",
1239
+ " <tr>\n",
1240
+ " <th>California</th>\n",
1241
+ " <td>33871648</td>\n",
1242
+ " <td>37253956</td>\n",
1243
+ " </tr>\n",
1244
+ " <tr>\n",
1245
+ " <th>New York</th>\n",
1246
+ " <td>18976457</td>\n",
1247
+ " <td>19378102</td>\n",
1248
+ " </tr>\n",
1249
+ " <tr>\n",
1250
+ " <th>Texas</th>\n",
1251
+ " <td>20851820</td>\n",
1252
+ " <td>25145561</td>\n",
1253
+ " </tr>\n",
1254
+ " </tbody>\n",
1255
+ "</table>\n",
1256
+ "</div>"
1257
+ ],
1258
+ "text/plain": [
1259
+ " 2000 2010\n",
1260
+ "California 33871648 37253956\n",
1261
+ "New York 18976457 19378102\n",
1262
+ "Texas 20851820 25145561"
1263
+ ]
1264
+ },
1265
+ "execution_count": 36,
1266
+ "metadata": {},
1267
+ "output_type": "execute_result"
1268
+ }
1269
+ ],
1270
+ "source": [
1271
+ "pop_df = pop.unstack()\n",
1272
+ "pop_df"
1273
+ ]
1274
+ },
1275
+ {
1276
+ "cell_type": "code",
1277
+ "execution_count": 37,
1278
+ "id": "9155ecaf-2a85-4731-8f71-0353eb83a3fb",
1279
+ "metadata": {},
1280
+ "outputs": [
1281
+ {
1282
+ "data": {
1283
+ "text/plain": [
1284
+ "California 2000 33871648\n",
1285
+ " 2010 37253956\n",
1286
+ "New York 2000 18976457\n",
1287
+ " 2010 19378102\n",
1288
+ "Texas 2000 20851820\n",
1289
+ " 2010 25145561\n",
1290
+ "dtype: int64"
1291
+ ]
1292
+ },
1293
+ "execution_count": 37,
1294
+ "metadata": {},
1295
+ "output_type": "execute_result"
1296
+ }
1297
+ ],
1298
+ "source": [
1299
+ "pop_df.stack()"
1300
+ ]
1301
+ },
1302
+ {
1303
+ "cell_type": "code",
1304
+ "execution_count": 38,
1305
+ "id": "6be0213d-b75a-49fc-846b-082cf43be9e9",
1306
+ "metadata": {},
1307
+ "outputs": [
1308
+ {
1309
+ "data": {
1310
+ "text/html": [
1311
+ "<div>\n",
1312
+ "<style scoped>\n",
1313
+ " .dataframe tbody tr th:only-of-type {\n",
1314
+ " vertical-align: middle;\n",
1315
+ " }\n",
1316
+ "\n",
1317
+ " .dataframe tbody tr th {\n",
1318
+ " vertical-align: top;\n",
1319
+ " }\n",
1320
+ "\n",
1321
+ " .dataframe thead th {\n",
1322
+ " text-align: right;\n",
1323
+ " }\n",
1324
+ "</style>\n",
1325
+ "<table border=\"1\" class=\"dataframe\">\n",
1326
+ " <thead>\n",
1327
+ " <tr style=\"text-align: right;\">\n",
1328
+ " <th></th>\n",
1329
+ " <th></th>\n",
1330
+ " <th>total</th>\n",
1331
+ " <th>under18</th>\n",
1332
+ " </tr>\n",
1333
+ " </thead>\n",
1334
+ " <tbody>\n",
1335
+ " <tr>\n",
1336
+ " <th rowspan=\"2\" valign=\"top\">California</th>\n",
1337
+ " <th>2000</th>\n",
1338
+ " <td>33871648</td>\n",
1339
+ " <td>9267089</td>\n",
1340
+ " </tr>\n",
1341
+ " <tr>\n",
1342
+ " <th>2010</th>\n",
1343
+ " <td>37253956</td>\n",
1344
+ " <td>9284094</td>\n",
1345
+ " </tr>\n",
1346
+ " <tr>\n",
1347
+ " <th rowspan=\"2\" valign=\"top\">New York</th>\n",
1348
+ " <th>2000</th>\n",
1349
+ " <td>18976457</td>\n",
1350
+ " <td>4687374</td>\n",
1351
+ " </tr>\n",
1352
+ " <tr>\n",
1353
+ " <th>2010</th>\n",
1354
+ " <td>19378102</td>\n",
1355
+ " <td>4318033</td>\n",
1356
+ " </tr>\n",
1357
+ " <tr>\n",
1358
+ " <th rowspan=\"2\" valign=\"top\">Texas</th>\n",
1359
+ " <th>2000</th>\n",
1360
+ " <td>20851820</td>\n",
1361
+ " <td>5906301</td>\n",
1362
+ " </tr>\n",
1363
+ " <tr>\n",
1364
+ " <th>2010</th>\n",
1365
+ " <td>25145561</td>\n",
1366
+ " <td>6879014</td>\n",
1367
+ " </tr>\n",
1368
+ " </tbody>\n",
1369
+ "</table>\n",
1370
+ "</div>"
1371
+ ],
1372
+ "text/plain": [
1373
+ " total under18\n",
1374
+ "California 2000 33871648 9267089\n",
1375
+ " 2010 37253956 9284094\n",
1376
+ "New York 2000 18976457 4687374\n",
1377
+ " 2010 19378102 4318033\n",
1378
+ "Texas 2000 20851820 5906301\n",
1379
+ " 2010 25145561 6879014"
1380
+ ]
1381
+ },
1382
+ "execution_count": 38,
1383
+ "metadata": {},
1384
+ "output_type": "execute_result"
1385
+ }
1386
+ ],
1387
+ "source": [
1388
+ "pop_df = pd.DataFrame({'total': pop,\n",
1389
+ "'under18': [9267089, 9284094,\n",
1390
+ "4687374, 4318033,\n",
1391
+ "5906301, 6879014]})\n",
1392
+ "pop_df"
1393
+ ]
1394
+ },
1395
+ {
1396
+ "cell_type": "code",
1397
+ "execution_count": 39,
1398
+ "id": "37df56a1-bbb4-46aa-8508-e32a3e65304a",
1399
+ "metadata": {},
1400
+ "outputs": [
1401
+ {
1402
+ "data": {
1403
+ "text/html": [
1404
+ "<div>\n",
1405
+ "<style scoped>\n",
1406
+ " .dataframe tbody tr th:only-of-type {\n",
1407
+ " vertical-align: middle;\n",
1408
+ " }\n",
1409
+ "\n",
1410
+ " .dataframe tbody tr th {\n",
1411
+ " vertical-align: top;\n",
1412
+ " }\n",
1413
+ "\n",
1414
+ " .dataframe thead th {\n",
1415
+ " text-align: right;\n",
1416
+ " }\n",
1417
+ "</style>\n",
1418
+ "<table border=\"1\" class=\"dataframe\">\n",
1419
+ " <thead>\n",
1420
+ " <tr style=\"text-align: right;\">\n",
1421
+ " <th></th>\n",
1422
+ " <th>2000</th>\n",
1423
+ " <th>2010</th>\n",
1424
+ " </tr>\n",
1425
+ " </thead>\n",
1426
+ " <tbody>\n",
1427
+ " <tr>\n",
1428
+ " <th>California</th>\n",
1429
+ " <td>0.273594</td>\n",
1430
+ " <td>0.249211</td>\n",
1431
+ " </tr>\n",
1432
+ " <tr>\n",
1433
+ " <th>New York</th>\n",
1434
+ " <td>0.247010</td>\n",
1435
+ " <td>0.222831</td>\n",
1436
+ " </tr>\n",
1437
+ " <tr>\n",
1438
+ " <th>Texas</th>\n",
1439
+ " <td>0.283251</td>\n",
1440
+ " <td>0.273568</td>\n",
1441
+ " </tr>\n",
1442
+ " </tbody>\n",
1443
+ "</table>\n",
1444
+ "</div>"
1445
+ ],
1446
+ "text/plain": [
1447
+ " 2000 2010\n",
1448
+ "California 0.273594 0.249211\n",
1449
+ "New York 0.247010 0.222831\n",
1450
+ "Texas 0.283251 0.273568"
1451
+ ]
1452
+ },
1453
+ "execution_count": 39,
1454
+ "metadata": {},
1455
+ "output_type": "execute_result"
1456
+ }
1457
+ ],
1458
+ "source": [
1459
+ "f_u18 = pop_df['under18'] / pop_df['total']\n",
1460
+ "f_u18.unstack()"
1461
+ ]
1462
+ },
1463
+ {
1464
+ "cell_type": "code",
1465
+ "execution_count": 40,
1466
+ "id": "84c056c9-db9d-4f1c-bbe9-73770c491c33",
1467
+ "metadata": {},
1468
+ "outputs": [
1469
+ {
1470
+ "data": {
1471
+ "text/html": [
1472
+ "<div>\n",
1473
+ "<style scoped>\n",
1474
+ " .dataframe tbody tr th:only-of-type {\n",
1475
+ " vertical-align: middle;\n",
1476
+ " }\n",
1477
+ "\n",
1478
+ " .dataframe tbody tr th {\n",
1479
+ " vertical-align: top;\n",
1480
+ " }\n",
1481
+ "\n",
1482
+ " .dataframe thead th {\n",
1483
+ " text-align: right;\n",
1484
+ " }\n",
1485
+ "</style>\n",
1486
+ "<table border=\"1\" class=\"dataframe\">\n",
1487
+ " <thead>\n",
1488
+ " <tr style=\"text-align: right;\">\n",
1489
+ " <th></th>\n",
1490
+ " <th></th>\n",
1491
+ " <th>data1</th>\n",
1492
+ " <th>data2</th>\n",
1493
+ " </tr>\n",
1494
+ " </thead>\n",
1495
+ " <tbody>\n",
1496
+ " <tr>\n",
1497
+ " <th rowspan=\"2\" valign=\"top\">a</th>\n",
1498
+ " <th>1</th>\n",
1499
+ " <td>0.478627</td>\n",
1500
+ " <td>0.628762</td>\n",
1501
+ " </tr>\n",
1502
+ " <tr>\n",
1503
+ " <th>2</th>\n",
1504
+ " <td>0.327155</td>\n",
1505
+ " <td>0.747383</td>\n",
1506
+ " </tr>\n",
1507
+ " <tr>\n",
1508
+ " <th rowspan=\"2\" valign=\"top\">b</th>\n",
1509
+ " <th>1</th>\n",
1510
+ " <td>0.997522</td>\n",
1511
+ " <td>0.443755</td>\n",
1512
+ " </tr>\n",
1513
+ " <tr>\n",
1514
+ " <th>2</th>\n",
1515
+ " <td>0.428418</td>\n",
1516
+ " <td>0.415810</td>\n",
1517
+ " </tr>\n",
1518
+ " </tbody>\n",
1519
+ "</table>\n",
1520
+ "</div>"
1521
+ ],
1522
+ "text/plain": [
1523
+ " data1 data2\n",
1524
+ "a 1 0.478627 0.628762\n",
1525
+ " 2 0.327155 0.747383\n",
1526
+ "b 1 0.997522 0.443755\n",
1527
+ " 2 0.428418 0.415810"
1528
+ ]
1529
+ },
1530
+ "execution_count": 40,
1531
+ "metadata": {},
1532
+ "output_type": "execute_result"
1533
+ }
1534
+ ],
1535
+ "source": [
1536
+ "df = pd.DataFrame(np.random.rand(4, 2),\n",
1537
+ "index=[['a', 'a', 'b', 'b'], [1, 2, 1, 2]],\n",
1538
+ "columns=['data1', 'data2'])\n",
1539
+ "df"
1540
+ ]
1541
+ },
1542
+ {
1543
+ "cell_type": "code",
1544
+ "execution_count": 41,
1545
+ "id": "87b94068-cad8-47df-a350-e67d083d60cd",
1546
+ "metadata": {},
1547
+ "outputs": [
1548
+ {
1549
+ "data": {
1550
+ "text/plain": [
1551
+ "California 2000 33871648\n",
1552
+ " 2010 37253956\n",
1553
+ "Texas 2000 20851820\n",
1554
+ " 2010 25145561\n",
1555
+ "New York 2000 18976457\n",
1556
+ " 2010 19378102\n",
1557
+ "dtype: int64"
1558
+ ]
1559
+ },
1560
+ "execution_count": 41,
1561
+ "metadata": {},
1562
+ "output_type": "execute_result"
1563
+ }
1564
+ ],
1565
+ "source": [
1566
+ "data = {('California', 2000): 33871648,\n",
1567
+ "('California', 2010): 37253956,\n",
1568
+ "('Texas', 2000): 20851820,\n",
1569
+ "('Texas', 2010): 25145561,\n",
1570
+ "('New York', 2000): 18976457,\n",
1571
+ "('New York', 2010): 19378102}\n",
1572
+ "pd.Series(data)"
1573
+ ]
1574
+ },
1575
+ {
1576
+ "cell_type": "code",
1577
+ "execution_count": 42,
1578
+ "id": "2357c39e-189d-49d8-a652-0cc60fd0549d",
1579
+ "metadata": {},
1580
+ "outputs": [
1581
+ {
1582
+ "data": {
1583
+ "text/plain": [
1584
+ "MultiIndex([('a', 1),\n",
1585
+ " ('a', 2),\n",
1586
+ " ('b', 1),\n",
1587
+ " ('b', 2)],\n",
1588
+ " )"
1589
+ ]
1590
+ },
1591
+ "execution_count": 42,
1592
+ "metadata": {},
1593
+ "output_type": "execute_result"
1594
+ }
1595
+ ],
1596
+ "source": [
1597
+ "pd.MultiIndex.from_arrays([['a', 'a', 'b', 'b'], [1, 2, 1, 2]])"
1598
+ ]
1599
+ },
1600
+ {
1601
+ "cell_type": "code",
1602
+ "execution_count": 43,
1603
+ "id": "1dc18ebb-fe8a-4bac-9fbf-ea38e7d11e16",
1604
+ "metadata": {},
1605
+ "outputs": [
1606
+ {
1607
+ "data": {
1608
+ "text/plain": [
1609
+ "MultiIndex([('a', 1),\n",
1610
+ " ('a', 2),\n",
1611
+ " ('b', 1),\n",
1612
+ " ('b', 2)],\n",
1613
+ " )"
1614
+ ]
1615
+ },
1616
+ "execution_count": 43,
1617
+ "metadata": {},
1618
+ "output_type": "execute_result"
1619
+ }
1620
+ ],
1621
+ "source": [
1622
+ "pd.MultiIndex.from_tuples([('a', 1), ('a', 2), ('b', 1), ('b', 2)])"
1623
+ ]
1624
+ },
1625
+ {
1626
+ "cell_type": "code",
1627
+ "execution_count": 44,
1628
+ "id": "13bff348-d67d-49ba-b16c-16f31032c1f2",
1629
+ "metadata": {},
1630
+ "outputs": [
1631
+ {
1632
+ "data": {
1633
+ "text/plain": [
1634
+ "MultiIndex([('a', 1),\n",
1635
+ " ('a', 2),\n",
1636
+ " ('b', 1),\n",
1637
+ " ('b', 2)],\n",
1638
+ " )"
1639
+ ]
1640
+ },
1641
+ "execution_count": 44,
1642
+ "metadata": {},
1643
+ "output_type": "execute_result"
1644
+ }
1645
+ ],
1646
+ "source": [
1647
+ "pd.MultiIndex.from_product([['a', 'b'], [1, 2]])"
1648
+ ]
1649
+ },
1650
+ {
1651
+ "cell_type": "code",
1652
+ "execution_count": 45,
1653
+ "id": "7ce4bd44-6979-4413-84cd-6195fd5440d1",
1654
+ "metadata": {},
1655
+ "outputs": [
1656
+ {
1657
+ "data": {
1658
+ "text/plain": [
1659
+ "MultiIndex([('a', 1),\n",
1660
+ " ('a', 2),\n",
1661
+ " ('b', 1),\n",
1662
+ " ('b', 2)],\n",
1663
+ " )"
1664
+ ]
1665
+ },
1666
+ "execution_count": 45,
1667
+ "metadata": {},
1668
+ "output_type": "execute_result"
1669
+ }
1670
+ ],
1671
+ "source": [
1672
+ "pd.MultiIndex(levels=[['a', 'b'], [1, 2]],\n",
1673
+ " codes=[[0, 0, 1, 1], [0, 1, 0, 1]])"
1674
+ ]
1675
+ },
1676
+ {
1677
+ "cell_type": "code",
1678
+ "execution_count": 46,
1679
+ "id": "745e2c13-b099-4901-b277-074796dcd5b0",
1680
+ "metadata": {},
1681
+ "outputs": [
1682
+ {
1683
+ "data": {
1684
+ "text/plain": [
1685
+ "state year\n",
1686
+ "California 2000 33871648\n",
1687
+ " 2010 37253956\n",
1688
+ "New York 2000 18976457\n",
1689
+ " 2010 19378102\n",
1690
+ "Texas 2000 20851820\n",
1691
+ " 2010 25145561\n",
1692
+ "dtype: int64"
1693
+ ]
1694
+ },
1695
+ "execution_count": 46,
1696
+ "metadata": {},
1697
+ "output_type": "execute_result"
1698
+ }
1699
+ ],
1700
+ "source": [
1701
+ "pop.index.names = ['state', 'year']\n",
1702
+ "pop"
1703
+ ]
1704
+ },
1705
+ {
1706
+ "cell_type": "code",
1707
+ "execution_count": 47,
1708
+ "id": "0676dea4-4f80-4165-81ea-3869b9ab1f15",
1709
+ "metadata": {},
1710
+ "outputs": [
1711
+ {
1712
+ "data": {
1713
+ "text/html": [
1714
+ "<div>\n",
1715
+ "<style scoped>\n",
1716
+ " .dataframe tbody tr th:only-of-type {\n",
1717
+ " vertical-align: middle;\n",
1718
+ " }\n",
1719
+ "\n",
1720
+ " .dataframe tbody tr th {\n",
1721
+ " vertical-align: top;\n",
1722
+ " }\n",
1723
+ "\n",
1724
+ " .dataframe thead tr th {\n",
1725
+ " text-align: left;\n",
1726
+ " }\n",
1727
+ "\n",
1728
+ " .dataframe thead tr:last-of-type th {\n",
1729
+ " text-align: right;\n",
1730
+ " }\n",
1731
+ "</style>\n",
1732
+ "<table border=\"1\" class=\"dataframe\">\n",
1733
+ " <thead>\n",
1734
+ " <tr>\n",
1735
+ " <th></th>\n",
1736
+ " <th>subject</th>\n",
1737
+ " <th colspan=\"2\" halign=\"left\">Bob</th>\n",
1738
+ " <th colspan=\"2\" halign=\"left\">Guido</th>\n",
1739
+ " <th colspan=\"2\" halign=\"left\">Sue</th>\n",
1740
+ " </tr>\n",
1741
+ " <tr>\n",
1742
+ " <th></th>\n",
1743
+ " <th>type</th>\n",
1744
+ " <th>HR</th>\n",
1745
+ " <th>Temp</th>\n",
1746
+ " <th>HR</th>\n",
1747
+ " <th>Temp</th>\n",
1748
+ " <th>HR</th>\n",
1749
+ " <th>Temp</th>\n",
1750
+ " </tr>\n",
1751
+ " <tr>\n",
1752
+ " <th>year</th>\n",
1753
+ " <th>visit</th>\n",
1754
+ " <th></th>\n",
1755
+ " <th></th>\n",
1756
+ " <th></th>\n",
1757
+ " <th></th>\n",
1758
+ " <th></th>\n",
1759
+ " <th></th>\n",
1760
+ " </tr>\n",
1761
+ " </thead>\n",
1762
+ " <tbody>\n",
1763
+ " <tr>\n",
1764
+ " <th rowspan=\"2\" valign=\"top\">2013</th>\n",
1765
+ " <th>1</th>\n",
1766
+ " <td>49.0</td>\n",
1767
+ " <td>38.5</td>\n",
1768
+ " <td>37.0</td>\n",
1769
+ " <td>38.2</td>\n",
1770
+ " <td>33.0</td>\n",
1771
+ " <td>38.3</td>\n",
1772
+ " </tr>\n",
1773
+ " <tr>\n",
1774
+ " <th>2</th>\n",
1775
+ " <td>43.0</td>\n",
1776
+ " <td>38.6</td>\n",
1777
+ " <td>23.0</td>\n",
1778
+ " <td>37.5</td>\n",
1779
+ " <td>42.0</td>\n",
1780
+ " <td>35.2</td>\n",
1781
+ " </tr>\n",
1782
+ " <tr>\n",
1783
+ " <th rowspan=\"2\" valign=\"top\">2014</th>\n",
1784
+ " <th>1</th>\n",
1785
+ " <td>22.0</td>\n",
1786
+ " <td>37.3</td>\n",
1787
+ " <td>46.0</td>\n",
1788
+ " <td>36.5</td>\n",
1789
+ " <td>35.0</td>\n",
1790
+ " <td>37.3</td>\n",
1791
+ " </tr>\n",
1792
+ " <tr>\n",
1793
+ " <th>2</th>\n",
1794
+ " <td>40.0</td>\n",
1795
+ " <td>36.9</td>\n",
1796
+ " <td>58.0</td>\n",
1797
+ " <td>37.1</td>\n",
1798
+ " <td>48.0</td>\n",
1799
+ " <td>35.8</td>\n",
1800
+ " </tr>\n",
1801
+ " </tbody>\n",
1802
+ "</table>\n",
1803
+ "</div>"
1804
+ ],
1805
+ "text/plain": [
1806
+ "subject Bob Guido Sue \n",
1807
+ "type HR Temp HR Temp HR Temp\n",
1808
+ "year visit \n",
1809
+ "2013 1 49.0 38.5 37.0 38.2 33.0 38.3\n",
1810
+ " 2 43.0 38.6 23.0 37.5 42.0 35.2\n",
1811
+ "2014 1 22.0 37.3 46.0 36.5 35.0 37.3\n",
1812
+ " 2 40.0 36.9 58.0 37.1 48.0 35.8"
1813
+ ]
1814
+ },
1815
+ "execution_count": 47,
1816
+ "metadata": {},
1817
+ "output_type": "execute_result"
1818
+ }
1819
+ ],
1820
+ "source": [
1821
+ "index = pd.MultiIndex.from_product([[2013, 2014], [1, 2]],\n",
1822
+ "names=['year', 'visit'])\n",
1823
+ "columns = pd.MultiIndex.from_product([['Bob', 'Guido', 'Sue'], ['HR', 'Temp']],\n",
1824
+ "names=['subject', 'type'])\n",
1825
+ "\n",
1826
+ "data = np.round(np.random.randn(4, 6), 1)\n",
1827
+ "data[:, ::2] *= 10\n",
1828
+ "data += 37\n",
1829
+ "\n",
1830
+ "health_data = pd.DataFrame(data, index=index, columns=columns)\n",
1831
+ "health_data"
1832
+ ]
1833
+ },
1834
+ {
1835
+ "cell_type": "code",
1836
+ "execution_count": 48,
1837
+ "id": "93319435-e56e-4939-b56c-eb8b3bd97163",
1838
+ "metadata": {},
1839
+ "outputs": [
1840
+ {
1841
+ "data": {
1842
+ "text/html": [
1843
+ "<div>\n",
1844
+ "<style scoped>\n",
1845
+ " .dataframe tbody tr th:only-of-type {\n",
1846
+ " vertical-align: middle;\n",
1847
+ " }\n",
1848
+ "\n",
1849
+ " .dataframe tbody tr th {\n",
1850
+ " vertical-align: top;\n",
1851
+ " }\n",
1852
+ "\n",
1853
+ " .dataframe thead th {\n",
1854
+ " text-align: right;\n",
1855
+ " }\n",
1856
+ "</style>\n",
1857
+ "<table border=\"1\" class=\"dataframe\">\n",
1858
+ " <thead>\n",
1859
+ " <tr style=\"text-align: right;\">\n",
1860
+ " <th></th>\n",
1861
+ " <th>type</th>\n",
1862
+ " <th>HR</th>\n",
1863
+ " <th>Temp</th>\n",
1864
+ " </tr>\n",
1865
+ " <tr>\n",
1866
+ " <th>year</th>\n",
1867
+ " <th>visit</th>\n",
1868
+ " <th></th>\n",
1869
+ " <th></th>\n",
1870
+ " </tr>\n",
1871
+ " </thead>\n",
1872
+ " <tbody>\n",
1873
+ " <tr>\n",
1874
+ " <th rowspan=\"2\" valign=\"top\">2013</th>\n",
1875
+ " <th>1</th>\n",
1876
+ " <td>37.0</td>\n",
1877
+ " <td>38.2</td>\n",
1878
+ " </tr>\n",
1879
+ " <tr>\n",
1880
+ " <th>2</th>\n",
1881
+ " <td>23.0</td>\n",
1882
+ " <td>37.5</td>\n",
1883
+ " </tr>\n",
1884
+ " <tr>\n",
1885
+ " <th rowspan=\"2\" valign=\"top\">2014</th>\n",
1886
+ " <th>1</th>\n",
1887
+ " <td>46.0</td>\n",
1888
+ " <td>36.5</td>\n",
1889
+ " </tr>\n",
1890
+ " <tr>\n",
1891
+ " <th>2</th>\n",
1892
+ " <td>58.0</td>\n",
1893
+ " <td>37.1</td>\n",
1894
+ " </tr>\n",
1895
+ " </tbody>\n",
1896
+ "</table>\n",
1897
+ "</div>"
1898
+ ],
1899
+ "text/plain": [
1900
+ "type HR Temp\n",
1901
+ "year visit \n",
1902
+ "2013 1 37.0 38.2\n",
1903
+ " 2 23.0 37.5\n",
1904
+ "2014 1 46.0 36.5\n",
1905
+ " 2 58.0 37.1"
1906
+ ]
1907
+ },
1908
+ "execution_count": 48,
1909
+ "metadata": {},
1910
+ "output_type": "execute_result"
1911
+ }
1912
+ ],
1913
+ "source": [
1914
+ "health_data['Guido']"
1915
+ ]
1916
+ },
1917
+ {
1918
+ "cell_type": "code",
1919
+ "execution_count": 49,
1920
+ "id": "aa5d6ddb-581f-4d7e-b148-602f94749e1b",
1921
+ "metadata": {},
1922
+ "outputs": [
1923
+ {
1924
+ "data": {
1925
+ "text/plain": [
1926
+ "state year\n",
1927
+ "California 2000 33871648\n",
1928
+ " 2010 37253956\n",
1929
+ "New York 2000 18976457\n",
1930
+ " 2010 19378102\n",
1931
+ "Texas 2000 20851820\n",
1932
+ " 2010 25145561\n",
1933
+ "dtype: int64"
1934
+ ]
1935
+ },
1936
+ "execution_count": 49,
1937
+ "metadata": {},
1938
+ "output_type": "execute_result"
1939
+ }
1940
+ ],
1941
+ "source": [
1942
+ "pop"
1943
+ ]
1944
+ },
1945
+ {
1946
+ "cell_type": "code",
1947
+ "execution_count": 50,
1948
+ "id": "6e2a0550-b4f7-4e1a-9c9e-e6a537554179",
1949
+ "metadata": {},
1950
+ "outputs": [
1951
+ {
1952
+ "data": {
1953
+ "text/plain": [
1954
+ "np.int64(33871648)"
1955
+ ]
1956
+ },
1957
+ "execution_count": 50,
1958
+ "metadata": {},
1959
+ "output_type": "execute_result"
1960
+ }
1961
+ ],
1962
+ "source": [
1963
+ "pop['California', 2000]"
1964
+ ]
1965
+ },
1966
+ {
1967
+ "cell_type": "code",
1968
+ "execution_count": 51,
1969
+ "id": "d07b7114-56b8-4609-b749-c1e0c8c9fa64",
1970
+ "metadata": {},
1971
+ "outputs": [
1972
+ {
1973
+ "data": {
1974
+ "text/plain": [
1975
+ "year\n",
1976
+ "2000 33871648\n",
1977
+ "2010 37253956\n",
1978
+ "dtype: int64"
1979
+ ]
1980
+ },
1981
+ "execution_count": 51,
1982
+ "metadata": {},
1983
+ "output_type": "execute_result"
1984
+ }
1985
+ ],
1986
+ "source": [
1987
+ "pop['California']"
1988
+ ]
1989
+ },
1990
+ {
1991
+ "cell_type": "code",
1992
+ "execution_count": 52,
1993
+ "id": "be5249e4-9244-4145-82a9-6ead97ef95ec",
1994
+ "metadata": {},
1995
+ "outputs": [
1996
+ {
1997
+ "data": {
1998
+ "text/plain": [
1999
+ "state year\n",
2000
+ "California 2000 33871648\n",
2001
+ " 2010 37253956\n",
2002
+ "New York 2000 18976457\n",
2003
+ " 2010 19378102\n",
2004
+ "dtype: int64"
2005
+ ]
2006
+ },
2007
+ "execution_count": 52,
2008
+ "metadata": {},
2009
+ "output_type": "execute_result"
2010
+ }
2011
+ ],
2012
+ "source": [
2013
+ "pop.loc['California':'New York']"
2014
+ ]
2015
+ },
2016
+ {
2017
+ "cell_type": "code",
2018
+ "execution_count": 53,
2019
+ "id": "3f02298e-195e-4f18-a8c6-5352e1fc5c14",
2020
+ "metadata": {},
2021
+ "outputs": [
2022
+ {
2023
+ "data": {
2024
+ "text/plain": [
2025
+ "state\n",
2026
+ "California 33871648\n",
2027
+ "New York 18976457\n",
2028
+ "Texas 20851820\n",
2029
+ "dtype: int64"
2030
+ ]
2031
+ },
2032
+ "execution_count": 53,
2033
+ "metadata": {},
2034
+ "output_type": "execute_result"
2035
+ }
2036
+ ],
2037
+ "source": [
2038
+ "pop[:, 2000]"
2039
+ ]
2040
+ },
2041
+ {
2042
+ "cell_type": "code",
2043
+ "execution_count": 54,
2044
+ "id": "a95754cd-af47-42df-b021-4a3f7acf8be0",
2045
+ "metadata": {},
2046
+ "outputs": [
2047
+ {
2048
+ "data": {
2049
+ "text/plain": [
2050
+ "state year\n",
2051
+ "California 2000 33871648\n",
2052
+ " 2010 37253956\n",
2053
+ "Texas 2010 25145561\n",
2054
+ "dtype: int64"
2055
+ ]
2056
+ },
2057
+ "execution_count": 54,
2058
+ "metadata": {},
2059
+ "output_type": "execute_result"
2060
+ }
2061
+ ],
2062
+ "source": [
2063
+ "pop[pop > 22000000]"
2064
+ ]
2065
+ },
2066
+ {
2067
+ "cell_type": "code",
2068
+ "execution_count": 55,
2069
+ "id": "ca7b8ea9-951b-46e5-bebf-d47ac2f15bf3",
2070
+ "metadata": {},
2071
+ "outputs": [
2072
+ {
2073
+ "data": {
2074
+ "text/plain": [
2075
+ "state year\n",
2076
+ "California 2000 33871648\n",
2077
+ " 2010 37253956\n",
2078
+ "Texas 2000 20851820\n",
2079
+ " 2010 25145561\n",
2080
+ "dtype: int64"
2081
+ ]
2082
+ },
2083
+ "execution_count": 55,
2084
+ "metadata": {},
2085
+ "output_type": "execute_result"
2086
+ }
2087
+ ],
2088
+ "source": [
2089
+ "pop[['California', 'Texas']]"
2090
+ ]
2091
+ },
2092
+ {
2093
+ "cell_type": "code",
2094
+ "execution_count": 56,
2095
+ "id": "3aefb9d1-98c2-4749-b926-a4691b41de28",
2096
+ "metadata": {},
2097
+ "outputs": [
2098
+ {
2099
+ "data": {
2100
+ "text/html": [
2101
+ "<div>\n",
2102
+ "<style scoped>\n",
2103
+ " .dataframe tbody tr th:only-of-type {\n",
2104
+ " vertical-align: middle;\n",
2105
+ " }\n",
2106
+ "\n",
2107
+ " .dataframe tbody tr th {\n",
2108
+ " vertical-align: top;\n",
2109
+ " }\n",
2110
+ "\n",
2111
+ " .dataframe thead tr th {\n",
2112
+ " text-align: left;\n",
2113
+ " }\n",
2114
+ "\n",
2115
+ " .dataframe thead tr:last-of-type th {\n",
2116
+ " text-align: right;\n",
2117
+ " }\n",
2118
+ "</style>\n",
2119
+ "<table border=\"1\" class=\"dataframe\">\n",
2120
+ " <thead>\n",
2121
+ " <tr>\n",
2122
+ " <th></th>\n",
2123
+ " <th>subject</th>\n",
2124
+ " <th colspan=\"2\" halign=\"left\">Bob</th>\n",
2125
+ " <th colspan=\"2\" halign=\"left\">Guido</th>\n",
2126
+ " <th colspan=\"2\" halign=\"left\">Sue</th>\n",
2127
+ " </tr>\n",
2128
+ " <tr>\n",
2129
+ " <th></th>\n",
2130
+ " <th>type</th>\n",
2131
+ " <th>HR</th>\n",
2132
+ " <th>Temp</th>\n",
2133
+ " <th>HR</th>\n",
2134
+ " <th>Temp</th>\n",
2135
+ " <th>HR</th>\n",
2136
+ " <th>Temp</th>\n",
2137
+ " </tr>\n",
2138
+ " <tr>\n",
2139
+ " <th>year</th>\n",
2140
+ " <th>visit</th>\n",
2141
+ " <th></th>\n",
2142
+ " <th></th>\n",
2143
+ " <th></th>\n",
2144
+ " <th></th>\n",
2145
+ " <th></th>\n",
2146
+ " <th></th>\n",
2147
+ " </tr>\n",
2148
+ " </thead>\n",
2149
+ " <tbody>\n",
2150
+ " <tr>\n",
2151
+ " <th rowspan=\"2\" valign=\"top\">2013</th>\n",
2152
+ " <th>1</th>\n",
2153
+ " <td>49.0</td>\n",
2154
+ " <td>38.5</td>\n",
2155
+ " <td>37.0</td>\n",
2156
+ " <td>38.2</td>\n",
2157
+ " <td>33.0</td>\n",
2158
+ " <td>38.3</td>\n",
2159
+ " </tr>\n",
2160
+ " <tr>\n",
2161
+ " <th>2</th>\n",
2162
+ " <td>43.0</td>\n",
2163
+ " <td>38.6</td>\n",
2164
+ " <td>23.0</td>\n",
2165
+ " <td>37.5</td>\n",
2166
+ " <td>42.0</td>\n",
2167
+ " <td>35.2</td>\n",
2168
+ " </tr>\n",
2169
+ " <tr>\n",
2170
+ " <th rowspan=\"2\" valign=\"top\">2014</th>\n",
2171
+ " <th>1</th>\n",
2172
+ " <td>22.0</td>\n",
2173
+ " <td>37.3</td>\n",
2174
+ " <td>46.0</td>\n",
2175
+ " <td>36.5</td>\n",
2176
+ " <td>35.0</td>\n",
2177
+ " <td>37.3</td>\n",
2178
+ " </tr>\n",
2179
+ " <tr>\n",
2180
+ " <th>2</th>\n",
2181
+ " <td>40.0</td>\n",
2182
+ " <td>36.9</td>\n",
2183
+ " <td>58.0</td>\n",
2184
+ " <td>37.1</td>\n",
2185
+ " <td>48.0</td>\n",
2186
+ " <td>35.8</td>\n",
2187
+ " </tr>\n",
2188
+ " </tbody>\n",
2189
+ "</table>\n",
2190
+ "</div>"
2191
+ ],
2192
+ "text/plain": [
2193
+ "subject Bob Guido Sue \n",
2194
+ "type HR Temp HR Temp HR Temp\n",
2195
+ "year visit \n",
2196
+ "2013 1 49.0 38.5 37.0 38.2 33.0 38.3\n",
2197
+ " 2 43.0 38.6 23.0 37.5 42.0 35.2\n",
2198
+ "2014 1 22.0 37.3 46.0 36.5 35.0 37.3\n",
2199
+ " 2 40.0 36.9 58.0 37.1 48.0 35.8"
2200
+ ]
2201
+ },
2202
+ "execution_count": 56,
2203
+ "metadata": {},
2204
+ "output_type": "execute_result"
2205
+ }
2206
+ ],
2207
+ "source": [
2208
+ "health_data"
2209
+ ]
2210
+ },
2211
+ {
2212
+ "cell_type": "code",
2213
+ "execution_count": 57,
2214
+ "id": "c7d0357c-1b12-4eed-8bb9-4f689921749e",
2215
+ "metadata": {},
2216
+ "outputs": [
2217
+ {
2218
+ "data": {
2219
+ "text/plain": [
2220
+ "year visit\n",
2221
+ "2013 1 37.0\n",
2222
+ " 2 23.0\n",
2223
+ "2014 1 46.0\n",
2224
+ " 2 58.0\n",
2225
+ "Name: (Guido, HR), dtype: float64"
2226
+ ]
2227
+ },
2228
+ "execution_count": 57,
2229
+ "metadata": {},
2230
+ "output_type": "execute_result"
2231
+ }
2232
+ ],
2233
+ "source": [
2234
+ "health_data['Guido', 'HR']"
2235
+ ]
2236
+ },
2237
+ {
2238
+ "cell_type": "code",
2239
+ "execution_count": 58,
2240
+ "id": "78e41138-b1f1-45de-9fd5-5d4e2434e5da",
2241
+ "metadata": {},
2242
+ "outputs": [
2243
+ {
2244
+ "data": {
2245
+ "text/html": [
2246
+ "<div>\n",
2247
+ "<style scoped>\n",
2248
+ " .dataframe tbody tr th:only-of-type {\n",
2249
+ " vertical-align: middle;\n",
2250
+ " }\n",
2251
+ "\n",
2252
+ " .dataframe tbody tr th {\n",
2253
+ " vertical-align: top;\n",
2254
+ " }\n",
2255
+ "\n",
2256
+ " .dataframe thead tr th {\n",
2257
+ " text-align: left;\n",
2258
+ " }\n",
2259
+ "\n",
2260
+ " .dataframe thead tr:last-of-type th {\n",
2261
+ " text-align: right;\n",
2262
+ " }\n",
2263
+ "</style>\n",
2264
+ "<table border=\"1\" class=\"dataframe\">\n",
2265
+ " <thead>\n",
2266
+ " <tr>\n",
2267
+ " <th></th>\n",
2268
+ " <th>subject</th>\n",
2269
+ " <th colspan=\"2\" halign=\"left\">Bob</th>\n",
2270
+ " </tr>\n",
2271
+ " <tr>\n",
2272
+ " <th></th>\n",
2273
+ " <th>type</th>\n",
2274
+ " <th>HR</th>\n",
2275
+ " <th>Temp</th>\n",
2276
+ " </tr>\n",
2277
+ " <tr>\n",
2278
+ " <th>year</th>\n",
2279
+ " <th>visit</th>\n",
2280
+ " <th></th>\n",
2281
+ " <th></th>\n",
2282
+ " </tr>\n",
2283
+ " </thead>\n",
2284
+ " <tbody>\n",
2285
+ " <tr>\n",
2286
+ " <th rowspan=\"2\" valign=\"top\">2013</th>\n",
2287
+ " <th>1</th>\n",
2288
+ " <td>49.0</td>\n",
2289
+ " <td>38.5</td>\n",
2290
+ " </tr>\n",
2291
+ " <tr>\n",
2292
+ " <th>2</th>\n",
2293
+ " <td>43.0</td>\n",
2294
+ " <td>38.6</td>\n",
2295
+ " </tr>\n",
2296
+ " </tbody>\n",
2297
+ "</table>\n",
2298
+ "</div>"
2299
+ ],
2300
+ "text/plain": [
2301
+ "subject Bob \n",
2302
+ "type HR Temp\n",
2303
+ "year visit \n",
2304
+ "2013 1 49.0 38.5\n",
2305
+ " 2 43.0 38.6"
2306
+ ]
2307
+ },
2308
+ "execution_count": 58,
2309
+ "metadata": {},
2310
+ "output_type": "execute_result"
2311
+ }
2312
+ ],
2313
+ "source": [
2314
+ "health_data.iloc[:2, :2]"
2315
+ ]
2316
+ },
2317
+ {
2318
+ "cell_type": "code",
2319
+ "execution_count": 59,
2320
+ "id": "b7b91354-56af-4970-b9eb-d7226b01eed8",
2321
+ "metadata": {},
2322
+ "outputs": [
2323
+ {
2324
+ "data": {
2325
+ "text/plain": [
2326
+ "year visit\n",
2327
+ "2013 1 49.0\n",
2328
+ " 2 43.0\n",
2329
+ "2014 1 22.0\n",
2330
+ " 2 40.0\n",
2331
+ "Name: (Bob, HR), dtype: float64"
2332
+ ]
2333
+ },
2334
+ "execution_count": 59,
2335
+ "metadata": {},
2336
+ "output_type": "execute_result"
2337
+ }
2338
+ ],
2339
+ "source": [
2340
+ "health_data.loc[:, ('Bob', 'HR')]"
2341
+ ]
2342
+ },
2343
+ {
2344
+ "cell_type": "code",
2345
+ "execution_count": 62,
2346
+ "id": "da061035-ee51-4c68-a801-c32ca6d40d8b",
2347
+ "metadata": {},
2348
+ "outputs": [
2349
+ {
2350
+ "data": {
2351
+ "text/html": [
2352
+ "<div>\n",
2353
+ "<style scoped>\n",
2354
+ " .dataframe tbody tr th:only-of-type {\n",
2355
+ " vertical-align: middle;\n",
2356
+ " }\n",
2357
+ "\n",
2358
+ " .dataframe tbody tr th {\n",
2359
+ " vertical-align: top;\n",
2360
+ " }\n",
2361
+ "\n",
2362
+ " .dataframe thead tr th {\n",
2363
+ " text-align: left;\n",
2364
+ " }\n",
2365
+ "\n",
2366
+ " .dataframe thead tr:last-of-type th {\n",
2367
+ " text-align: right;\n",
2368
+ " }\n",
2369
+ "</style>\n",
2370
+ "<table border=\"1\" class=\"dataframe\">\n",
2371
+ " <thead>\n",
2372
+ " <tr>\n",
2373
+ " <th></th>\n",
2374
+ " <th>subject</th>\n",
2375
+ " <th>Bob</th>\n",
2376
+ " <th>Guido</th>\n",
2377
+ " <th>Sue</th>\n",
2378
+ " </tr>\n",
2379
+ " <tr>\n",
2380
+ " <th></th>\n",
2381
+ " <th>type</th>\n",
2382
+ " <th>HR</th>\n",
2383
+ " <th>HR</th>\n",
2384
+ " <th>HR</th>\n",
2385
+ " </tr>\n",
2386
+ " <tr>\n",
2387
+ " <th>year</th>\n",
2388
+ " <th>visit</th>\n",
2389
+ " <th></th>\n",
2390
+ " <th></th>\n",
2391
+ " <th></th>\n",
2392
+ " </tr>\n",
2393
+ " </thead>\n",
2394
+ " <tbody>\n",
2395
+ " <tr>\n",
2396
+ " <th>2013</th>\n",
2397
+ " <th>1</th>\n",
2398
+ " <td>49.0</td>\n",
2399
+ " <td>37.0</td>\n",
2400
+ " <td>33.0</td>\n",
2401
+ " </tr>\n",
2402
+ " <tr>\n",
2403
+ " <th>2014</th>\n",
2404
+ " <th>1</th>\n",
2405
+ " <td>22.0</td>\n",
2406
+ " <td>46.0</td>\n",
2407
+ " <td>35.0</td>\n",
2408
+ " </tr>\n",
2409
+ " </tbody>\n",
2410
+ "</table>\n",
2411
+ "</div>"
2412
+ ],
2413
+ "text/plain": [
2414
+ "subject Bob Guido Sue\n",
2415
+ "type HR HR HR\n",
2416
+ "year visit \n",
2417
+ "2013 1 49.0 37.0 33.0\n",
2418
+ "2014 1 22.0 46.0 35.0"
2419
+ ]
2420
+ },
2421
+ "execution_count": 62,
2422
+ "metadata": {},
2423
+ "output_type": "execute_result"
2424
+ }
2425
+ ],
2426
+ "source": [
2427
+ "idx = pd.IndexSlice\n",
2428
+ "health_data.loc[idx[:, 1], idx[:, 'HR']]"
2429
+ ]
2430
+ },
2431
+ {
2432
+ "cell_type": "code",
2433
+ "execution_count": 63,
2434
+ "id": "6186ea7f-0b22-494e-a5bc-f897223b289a",
2435
+ "metadata": {},
2436
+ "outputs": [
2437
+ {
2438
+ "data": {
2439
+ "text/plain": [
2440
+ "char int\n",
2441
+ "a 1 0.617857\n",
2442
+ " 2 0.704381\n",
2443
+ "c 1 0.356883\n",
2444
+ " 2 0.141801\n",
2445
+ "b 1 0.622593\n",
2446
+ " 2 0.786514\n",
2447
+ "dtype: float64"
2448
+ ]
2449
+ },
2450
+ "execution_count": 63,
2451
+ "metadata": {},
2452
+ "output_type": "execute_result"
2453
+ }
2454
+ ],
2455
+ "source": [
2456
+ "index = pd.MultiIndex.from_product([['a', 'c', 'b'], [1, 2]])\n",
2457
+ "data = pd.Series(np.random.rand(6), index=index)\n",
2458
+ "data.index.names = ['char', 'int']\n",
2459
+ "data"
2460
+ ]
2461
+ },
2462
+ {
2463
+ "cell_type": "code",
2464
+ "execution_count": 64,
2465
+ "id": "ebd32db3-50dc-48e9-9715-6b4c36603daa",
2466
+ "metadata": {},
2467
+ "outputs": [
2468
+ {
2469
+ "name": "stdout",
2470
+ "output_type": "stream",
2471
+ "text": [
2472
+ "<class 'pandas.errors.UnsortedIndexError'>\n",
2473
+ "'Key length (1) was greater than MultiIndex lexsort depth (0)'\n"
2474
+ ]
2475
+ }
2476
+ ],
2477
+ "source": [
2478
+ "try:\n",
2479
+ " data['a':'b']\n",
2480
+ "except KeyError as e:\n",
2481
+ " print(type(e))\n",
2482
+ " print(e)"
2483
+ ]
2484
+ },
2485
+ {
2486
+ "cell_type": "code",
2487
+ "execution_count": 65,
2488
+ "id": "2591b9e1-398e-4ad8-b6c6-f4d2fc9998e3",
2489
+ "metadata": {},
2490
+ "outputs": [
2491
+ {
2492
+ "data": {
2493
+ "text/plain": [
2494
+ "char int\n",
2495
+ "a 1 0.617857\n",
2496
+ " 2 0.704381\n",
2497
+ "b 1 0.622593\n",
2498
+ " 2 0.786514\n",
2499
+ "c 1 0.356883\n",
2500
+ " 2 0.141801\n",
2501
+ "dtype: float64"
2502
+ ]
2503
+ },
2504
+ "execution_count": 65,
2505
+ "metadata": {},
2506
+ "output_type": "execute_result"
2507
+ }
2508
+ ],
2509
+ "source": [
2510
+ "data = data.sort_index()\n",
2511
+ "data"
2512
+ ]
2513
+ },
2514
+ {
2515
+ "cell_type": "code",
2516
+ "execution_count": 66,
2517
+ "id": "a476b952-f359-4e97-9161-3a663d50d376",
2518
+ "metadata": {},
2519
+ "outputs": [
2520
+ {
2521
+ "data": {
2522
+ "text/plain": [
2523
+ "char int\n",
2524
+ "a 1 0.617857\n",
2525
+ " 2 0.704381\n",
2526
+ "b 1 0.622593\n",
2527
+ " 2 0.786514\n",
2528
+ "dtype: float64"
2529
+ ]
2530
+ },
2531
+ "execution_count": 66,
2532
+ "metadata": {},
2533
+ "output_type": "execute_result"
2534
+ }
2535
+ ],
2536
+ "source": [
2537
+ "data['a':'b']"
2538
+ ]
2539
+ },
2540
+ {
2541
+ "cell_type": "code",
2542
+ "execution_count": 67,
2543
+ "id": "2ca63cfa-b281-436d-b2d2-5e1e3b649655",
2544
+ "metadata": {},
2545
+ "outputs": [
2546
+ {
2547
+ "data": {
2548
+ "text/html": [
2549
+ "<div>\n",
2550
+ "<style scoped>\n",
2551
+ " .dataframe tbody tr th:only-of-type {\n",
2552
+ " vertical-align: middle;\n",
2553
+ " }\n",
2554
+ "\n",
2555
+ " .dataframe tbody tr th {\n",
2556
+ " vertical-align: top;\n",
2557
+ " }\n",
2558
+ "\n",
2559
+ " .dataframe thead th {\n",
2560
+ " text-align: right;\n",
2561
+ " }\n",
2562
+ "</style>\n",
2563
+ "<table border=\"1\" class=\"dataframe\">\n",
2564
+ " <thead>\n",
2565
+ " <tr style=\"text-align: right;\">\n",
2566
+ " <th>state</th>\n",
2567
+ " <th>California</th>\n",
2568
+ " <th>New York</th>\n",
2569
+ " <th>Texas</th>\n",
2570
+ " </tr>\n",
2571
+ " <tr>\n",
2572
+ " <th>year</th>\n",
2573
+ " <th></th>\n",
2574
+ " <th></th>\n",
2575
+ " <th></th>\n",
2576
+ " </tr>\n",
2577
+ " </thead>\n",
2578
+ " <tbody>\n",
2579
+ " <tr>\n",
2580
+ " <th>2000</th>\n",
2581
+ " <td>33871648</td>\n",
2582
+ " <td>18976457</td>\n",
2583
+ " <td>20851820</td>\n",
2584
+ " </tr>\n",
2585
+ " <tr>\n",
2586
+ " <th>2010</th>\n",
2587
+ " <td>37253956</td>\n",
2588
+ " <td>19378102</td>\n",
2589
+ " <td>25145561</td>\n",
2590
+ " </tr>\n",
2591
+ " </tbody>\n",
2592
+ "</table>\n",
2593
+ "</div>"
2594
+ ],
2595
+ "text/plain": [
2596
+ "state California New York Texas\n",
2597
+ "year \n",
2598
+ "2000 33871648 18976457 20851820\n",
2599
+ "2010 37253956 19378102 25145561"
2600
+ ]
2601
+ },
2602
+ "execution_count": 67,
2603
+ "metadata": {},
2604
+ "output_type": "execute_result"
2605
+ }
2606
+ ],
2607
+ "source": [
2608
+ "pop.unstack(level=0)"
2609
+ ]
2610
+ },
2611
+ {
2612
+ "cell_type": "code",
2613
+ "execution_count": 68,
2614
+ "id": "8057f14d-7163-4e8b-8adb-020a9837577a",
2615
+ "metadata": {},
2616
+ "outputs": [
2617
+ {
2618
+ "data": {
2619
+ "text/html": [
2620
+ "<div>\n",
2621
+ "<style scoped>\n",
2622
+ " .dataframe tbody tr th:only-of-type {\n",
2623
+ " vertical-align: middle;\n",
2624
+ " }\n",
2625
+ "\n",
2626
+ " .dataframe tbody tr th {\n",
2627
+ " vertical-align: top;\n",
2628
+ " }\n",
2629
+ "\n",
2630
+ " .dataframe thead th {\n",
2631
+ " text-align: right;\n",
2632
+ " }\n",
2633
+ "</style>\n",
2634
+ "<table border=\"1\" class=\"dataframe\">\n",
2635
+ " <thead>\n",
2636
+ " <tr style=\"text-align: right;\">\n",
2637
+ " <th>year</th>\n",
2638
+ " <th>2000</th>\n",
2639
+ " <th>2010</th>\n",
2640
+ " </tr>\n",
2641
+ " <tr>\n",
2642
+ " <th>state</th>\n",
2643
+ " <th></th>\n",
2644
+ " <th></th>\n",
2645
+ " </tr>\n",
2646
+ " </thead>\n",
2647
+ " <tbody>\n",
2648
+ " <tr>\n",
2649
+ " <th>California</th>\n",
2650
+ " <td>33871648</td>\n",
2651
+ " <td>37253956</td>\n",
2652
+ " </tr>\n",
2653
+ " <tr>\n",
2654
+ " <th>New York</th>\n",
2655
+ " <td>18976457</td>\n",
2656
+ " <td>19378102</td>\n",
2657
+ " </tr>\n",
2658
+ " <tr>\n",
2659
+ " <th>Texas</th>\n",
2660
+ " <td>20851820</td>\n",
2661
+ " <td>25145561</td>\n",
2662
+ " </tr>\n",
2663
+ " </tbody>\n",
2664
+ "</table>\n",
2665
+ "</div>"
2666
+ ],
2667
+ "text/plain": [
2668
+ "year 2000 2010\n",
2669
+ "state \n",
2670
+ "California 33871648 37253956\n",
2671
+ "New York 18976457 19378102\n",
2672
+ "Texas 20851820 25145561"
2673
+ ]
2674
+ },
2675
+ "execution_count": 68,
2676
+ "metadata": {},
2677
+ "output_type": "execute_result"
2678
+ }
2679
+ ],
2680
+ "source": [
2681
+ "pop.unstack(level=1)"
2682
+ ]
2683
+ },
2684
+ {
2685
+ "cell_type": "code",
2686
+ "execution_count": 69,
2687
+ "id": "496a65b2-aa0a-4705-95ba-8b3e9cb7e673",
2688
+ "metadata": {},
2689
+ "outputs": [
2690
+ {
2691
+ "data": {
2692
+ "text/plain": [
2693
+ "state year\n",
2694
+ "California 2000 33871648\n",
2695
+ " 2010 37253956\n",
2696
+ "New York 2000 18976457\n",
2697
+ " 2010 19378102\n",
2698
+ "Texas 2000 20851820\n",
2699
+ " 2010 25145561\n",
2700
+ "dtype: int64"
2701
+ ]
2702
+ },
2703
+ "execution_count": 69,
2704
+ "metadata": {},
2705
+ "output_type": "execute_result"
2706
+ }
2707
+ ],
2708
+ "source": [
2709
+ "pop.unstack().stack()"
2710
+ ]
2711
+ },
2712
+ {
2713
+ "cell_type": "code",
2714
+ "execution_count": 70,
2715
+ "id": "0ba9ee6c-f39c-4f7a-b748-9c30b64d52e6",
2716
+ "metadata": {},
2717
+ "outputs": [
2718
+ {
2719
+ "data": {
2720
+ "text/html": [
2721
+ "<div>\n",
2722
+ "<style scoped>\n",
2723
+ " .dataframe tbody tr th:only-of-type {\n",
2724
+ " vertical-align: middle;\n",
2725
+ " }\n",
2726
+ "\n",
2727
+ " .dataframe tbody tr th {\n",
2728
+ " vertical-align: top;\n",
2729
+ " }\n",
2730
+ "\n",
2731
+ " .dataframe thead th {\n",
2732
+ " text-align: right;\n",
2733
+ " }\n",
2734
+ "</style>\n",
2735
+ "<table border=\"1\" class=\"dataframe\">\n",
2736
+ " <thead>\n",
2737
+ " <tr style=\"text-align: right;\">\n",
2738
+ " <th></th>\n",
2739
+ " <th>state</th>\n",
2740
+ " <th>year</th>\n",
2741
+ " <th>population</th>\n",
2742
+ " </tr>\n",
2743
+ " </thead>\n",
2744
+ " <tbody>\n",
2745
+ " <tr>\n",
2746
+ " <th>0</th>\n",
2747
+ " <td>California</td>\n",
2748
+ " <td>2000</td>\n",
2749
+ " <td>33871648</td>\n",
2750
+ " </tr>\n",
2751
+ " <tr>\n",
2752
+ " <th>1</th>\n",
2753
+ " <td>California</td>\n",
2754
+ " <td>2010</td>\n",
2755
+ " <td>37253956</td>\n",
2756
+ " </tr>\n",
2757
+ " <tr>\n",
2758
+ " <th>2</th>\n",
2759
+ " <td>New York</td>\n",
2760
+ " <td>2000</td>\n",
2761
+ " <td>18976457</td>\n",
2762
+ " </tr>\n",
2763
+ " <tr>\n",
2764
+ " <th>3</th>\n",
2765
+ " <td>New York</td>\n",
2766
+ " <td>2010</td>\n",
2767
+ " <td>19378102</td>\n",
2768
+ " </tr>\n",
2769
+ " <tr>\n",
2770
+ " <th>4</th>\n",
2771
+ " <td>Texas</td>\n",
2772
+ " <td>2000</td>\n",
2773
+ " <td>20851820</td>\n",
2774
+ " </tr>\n",
2775
+ " <tr>\n",
2776
+ " <th>5</th>\n",
2777
+ " <td>Texas</td>\n",
2778
+ " <td>2010</td>\n",
2779
+ " <td>25145561</td>\n",
2780
+ " </tr>\n",
2781
+ " </tbody>\n",
2782
+ "</table>\n",
2783
+ "</div>"
2784
+ ],
2785
+ "text/plain": [
2786
+ " state year population\n",
2787
+ "0 California 2000 33871648\n",
2788
+ "1 California 2010 37253956\n",
2789
+ "2 New York 2000 18976457\n",
2790
+ "3 New York 2010 19378102\n",
2791
+ "4 Texas 2000 20851820\n",
2792
+ "5 Texas 2010 25145561"
2793
+ ]
2794
+ },
2795
+ "execution_count": 70,
2796
+ "metadata": {},
2797
+ "output_type": "execute_result"
2798
+ }
2799
+ ],
2800
+ "source": [
2801
+ "pop_flat = pop.reset_index(name='population')\n",
2802
+ "pop_flat"
2803
+ ]
2804
+ },
2805
+ {
2806
+ "cell_type": "code",
2807
+ "execution_count": 71,
2808
+ "id": "4e67b8ea-8331-4ec5-96fd-03101e4a2abb",
2809
+ "metadata": {},
2810
+ "outputs": [
2811
+ {
2812
+ "data": {
2813
+ "text/html": [
2814
+ "<div>\n",
2815
+ "<style scoped>\n",
2816
+ " .dataframe tbody tr th:only-of-type {\n",
2817
+ " vertical-align: middle;\n",
2818
+ " }\n",
2819
+ "\n",
2820
+ " .dataframe tbody tr th {\n",
2821
+ " vertical-align: top;\n",
2822
+ " }\n",
2823
+ "\n",
2824
+ " .dataframe thead th {\n",
2825
+ " text-align: right;\n",
2826
+ " }\n",
2827
+ "</style>\n",
2828
+ "<table border=\"1\" class=\"dataframe\">\n",
2829
+ " <thead>\n",
2830
+ " <tr style=\"text-align: right;\">\n",
2831
+ " <th></th>\n",
2832
+ " <th></th>\n",
2833
+ " <th>population</th>\n",
2834
+ " </tr>\n",
2835
+ " <tr>\n",
2836
+ " <th>state</th>\n",
2837
+ " <th>year</th>\n",
2838
+ " <th></th>\n",
2839
+ " </tr>\n",
2840
+ " </thead>\n",
2841
+ " <tbody>\n",
2842
+ " <tr>\n",
2843
+ " <th rowspan=\"2\" valign=\"top\">California</th>\n",
2844
+ " <th>2000</th>\n",
2845
+ " <td>33871648</td>\n",
2846
+ " </tr>\n",
2847
+ " <tr>\n",
2848
+ " <th>2010</th>\n",
2849
+ " <td>37253956</td>\n",
2850
+ " </tr>\n",
2851
+ " <tr>\n",
2852
+ " <th rowspan=\"2\" valign=\"top\">New York</th>\n",
2853
+ " <th>2000</th>\n",
2854
+ " <td>18976457</td>\n",
2855
+ " </tr>\n",
2856
+ " <tr>\n",
2857
+ " <th>2010</th>\n",
2858
+ " <td>19378102</td>\n",
2859
+ " </tr>\n",
2860
+ " <tr>\n",
2861
+ " <th rowspan=\"2\" valign=\"top\">Texas</th>\n",
2862
+ " <th>2000</th>\n",
2863
+ " <td>20851820</td>\n",
2864
+ " </tr>\n",
2865
+ " <tr>\n",
2866
+ " <th>2010</th>\n",
2867
+ " <td>25145561</td>\n",
2868
+ " </tr>\n",
2869
+ " </tbody>\n",
2870
+ "</table>\n",
2871
+ "</div>"
2872
+ ],
2873
+ "text/plain": [
2874
+ " population\n",
2875
+ "state year \n",
2876
+ "California 2000 33871648\n",
2877
+ " 2010 37253956\n",
2878
+ "New York 2000 18976457\n",
2879
+ " 2010 19378102\n",
2880
+ "Texas 2000 20851820\n",
2881
+ " 2010 25145561"
2882
+ ]
2883
+ },
2884
+ "execution_count": 71,
2885
+ "metadata": {},
2886
+ "output_type": "execute_result"
2887
+ }
2888
+ ],
2889
+ "source": [
2890
+ "pop_flat.set_index(['state', 'year'])"
2891
+ ]
2892
+ },
2893
+ {
2894
+ "cell_type": "code",
2895
+ "execution_count": 72,
2896
+ "id": "76f64c1e-51ab-4675-af1c-e77ddcb160fb",
2897
+ "metadata": {},
2898
+ "outputs": [
2899
+ {
2900
+ "data": {
2901
+ "text/html": [
2902
+ "<div>\n",
2903
+ "<style scoped>\n",
2904
+ " .dataframe tbody tr th:only-of-type {\n",
2905
+ " vertical-align: middle;\n",
2906
+ " }\n",
2907
+ "\n",
2908
+ " .dataframe tbody tr th {\n",
2909
+ " vertical-align: top;\n",
2910
+ " }\n",
2911
+ "\n",
2912
+ " .dataframe thead tr th {\n",
2913
+ " text-align: left;\n",
2914
+ " }\n",
2915
+ "\n",
2916
+ " .dataframe thead tr:last-of-type th {\n",
2917
+ " text-align: right;\n",
2918
+ " }\n",
2919
+ "</style>\n",
2920
+ "<table border=\"1\" class=\"dataframe\">\n",
2921
+ " <thead>\n",
2922
+ " <tr>\n",
2923
+ " <th></th>\n",
2924
+ " <th>subject</th>\n",
2925
+ " <th colspan=\"2\" halign=\"left\">Bob</th>\n",
2926
+ " <th colspan=\"2\" halign=\"left\">Guido</th>\n",
2927
+ " <th colspan=\"2\" halign=\"left\">Sue</th>\n",
2928
+ " </tr>\n",
2929
+ " <tr>\n",
2930
+ " <th></th>\n",
2931
+ " <th>type</th>\n",
2932
+ " <th>HR</th>\n",
2933
+ " <th>Temp</th>\n",
2934
+ " <th>HR</th>\n",
2935
+ " <th>Temp</th>\n",
2936
+ " <th>HR</th>\n",
2937
+ " <th>Temp</th>\n",
2938
+ " </tr>\n",
2939
+ " <tr>\n",
2940
+ " <th>year</th>\n",
2941
+ " <th>visit</th>\n",
2942
+ " <th></th>\n",
2943
+ " <th></th>\n",
2944
+ " <th></th>\n",
2945
+ " <th></th>\n",
2946
+ " <th></th>\n",
2947
+ " <th></th>\n",
2948
+ " </tr>\n",
2949
+ " </thead>\n",
2950
+ " <tbody>\n",
2951
+ " <tr>\n",
2952
+ " <th rowspan=\"2\" valign=\"top\">2013</th>\n",
2953
+ " <th>1</th>\n",
2954
+ " <td>49.0</td>\n",
2955
+ " <td>38.5</td>\n",
2956
+ " <td>37.0</td>\n",
2957
+ " <td>38.2</td>\n",
2958
+ " <td>33.0</td>\n",
2959
+ " <td>38.3</td>\n",
2960
+ " </tr>\n",
2961
+ " <tr>\n",
2962
+ " <th>2</th>\n",
2963
+ " <td>43.0</td>\n",
2964
+ " <td>38.6</td>\n",
2965
+ " <td>23.0</td>\n",
2966
+ " <td>37.5</td>\n",
2967
+ " <td>42.0</td>\n",
2968
+ " <td>35.2</td>\n",
2969
+ " </tr>\n",
2970
+ " <tr>\n",
2971
+ " <th rowspan=\"2\" valign=\"top\">2014</th>\n",
2972
+ " <th>1</th>\n",
2973
+ " <td>22.0</td>\n",
2974
+ " <td>37.3</td>\n",
2975
+ " <td>46.0</td>\n",
2976
+ " <td>36.5</td>\n",
2977
+ " <td>35.0</td>\n",
2978
+ " <td>37.3</td>\n",
2979
+ " </tr>\n",
2980
+ " <tr>\n",
2981
+ " <th>2</th>\n",
2982
+ " <td>40.0</td>\n",
2983
+ " <td>36.9</td>\n",
2984
+ " <td>58.0</td>\n",
2985
+ " <td>37.1</td>\n",
2986
+ " <td>48.0</td>\n",
2987
+ " <td>35.8</td>\n",
2988
+ " </tr>\n",
2989
+ " </tbody>\n",
2990
+ "</table>\n",
2991
+ "</div>"
2992
+ ],
2993
+ "text/plain": [
2994
+ "subject Bob Guido Sue \n",
2995
+ "type HR Temp HR Temp HR Temp\n",
2996
+ "year visit \n",
2997
+ "2013 1 49.0 38.5 37.0 38.2 33.0 38.3\n",
2998
+ " 2 43.0 38.6 23.0 37.5 42.0 35.2\n",
2999
+ "2014 1 22.0 37.3 46.0 36.5 35.0 37.3\n",
3000
+ " 2 40.0 36.9 58.0 37.1 48.0 35.8"
3001
+ ]
3002
+ },
3003
+ "execution_count": 72,
3004
+ "metadata": {},
3005
+ "output_type": "execute_result"
3006
+ }
3007
+ ],
3008
+ "source": [
3009
+ "health_data"
3010
+ ]
3011
+ },
3012
+ {
3013
+ "cell_type": "code",
3014
+ "execution_count": 76,
3015
+ "id": "478121ee-78c0-4f0f-b226-d3c7d47e6eb8",
3016
+ "metadata": {},
3017
+ "outputs": [
3018
+ {
3019
+ "data": {
3020
+ "text/html": [
3021
+ "<div>\n",
3022
+ "<style scoped>\n",
3023
+ " .dataframe tbody tr th:only-of-type {\n",
3024
+ " vertical-align: middle;\n",
3025
+ " }\n",
3026
+ "\n",
3027
+ " .dataframe tbody tr th {\n",
3028
+ " vertical-align: top;\n",
3029
+ " }\n",
3030
+ "\n",
3031
+ " .dataframe thead tr th {\n",
3032
+ " text-align: left;\n",
3033
+ " }\n",
3034
+ "\n",
3035
+ " .dataframe thead tr:last-of-type th {\n",
3036
+ " text-align: right;\n",
3037
+ " }\n",
3038
+ "</style>\n",
3039
+ "<table border=\"1\" class=\"dataframe\">\n",
3040
+ " <thead>\n",
3041
+ " <tr>\n",
3042
+ " <th>subject</th>\n",
3043
+ " <th colspan=\"2\" halign=\"left\">Bob</th>\n",
3044
+ " <th colspan=\"2\" halign=\"left\">Guido</th>\n",
3045
+ " <th colspan=\"2\" halign=\"left\">Sue</th>\n",
3046
+ " </tr>\n",
3047
+ " <tr>\n",
3048
+ " <th>type</th>\n",
3049
+ " <th>HR</th>\n",
3050
+ " <th>Temp</th>\n",
3051
+ " <th>HR</th>\n",
3052
+ " <th>Temp</th>\n",
3053
+ " <th>HR</th>\n",
3054
+ " <th>Temp</th>\n",
3055
+ " </tr>\n",
3056
+ " <tr>\n",
3057
+ " <th>year</th>\n",
3058
+ " <th></th>\n",
3059
+ " <th></th>\n",
3060
+ " <th></th>\n",
3061
+ " <th></th>\n",
3062
+ " <th></th>\n",
3063
+ " <th></th>\n",
3064
+ " </tr>\n",
3065
+ " </thead>\n",
3066
+ " <tbody>\n",
3067
+ " <tr>\n",
3068
+ " <th>2013</th>\n",
3069
+ " <td>46.0</td>\n",
3070
+ " <td>38.55</td>\n",
3071
+ " <td>30.0</td>\n",
3072
+ " <td>37.85</td>\n",
3073
+ " <td>37.5</td>\n",
3074
+ " <td>36.75</td>\n",
3075
+ " </tr>\n",
3076
+ " <tr>\n",
3077
+ " <th>2014</th>\n",
3078
+ " <td>31.0</td>\n",
3079
+ " <td>37.10</td>\n",
3080
+ " <td>52.0</td>\n",
3081
+ " <td>36.80</td>\n",
3082
+ " <td>41.5</td>\n",
3083
+ " <td>36.55</td>\n",
3084
+ " </tr>\n",
3085
+ " </tbody>\n",
3086
+ "</table>\n",
3087
+ "</div>"
3088
+ ],
3089
+ "text/plain": [
3090
+ "subject Bob Guido Sue \n",
3091
+ "type HR Temp HR Temp HR Temp\n",
3092
+ "year \n",
3093
+ "2013 46.0 38.55 30.0 37.85 37.5 36.75\n",
3094
+ "2014 31.0 37.10 52.0 36.80 41.5 36.55"
3095
+ ]
3096
+ },
3097
+ "execution_count": 76,
3098
+ "metadata": {},
3099
+ "output_type": "execute_result"
3100
+ }
3101
+ ],
3102
+ "source": [
3103
+ "data_mean = health_data.groupby(level='year').mean()\n",
3104
+ "data_mean\n"
3105
+ ]
3106
+ },
3107
+ {
3108
+ "cell_type": "code",
3109
+ "execution_count": 78,
3110
+ "id": "02eea5df-9a32-451a-9e77-230df16e8dc4",
3111
+ "metadata": {},
3112
+ "outputs": [
3113
+ {
3114
+ "name": "stderr",
3115
+ "output_type": "stream",
3116
+ "text": [
3117
+ "C:\\Users\\darsh\\AppData\\Local\\Temp\\ipykernel_18940\\900051928.py:1: FutureWarning: DataFrame.groupby with axis=1 is deprecated. Do `frame.T.groupby(...)` without axis instead.\n",
3118
+ " data_mean.groupby(axis=1, level='type').mean()\n"
3119
+ ]
3120
+ },
3121
+ {
3122
+ "data": {
3123
+ "text/html": [
3124
+ "<div>\n",
3125
+ "<style scoped>\n",
3126
+ " .dataframe tbody tr th:only-of-type {\n",
3127
+ " vertical-align: middle;\n",
3128
+ " }\n",
3129
+ "\n",
3130
+ " .dataframe tbody tr th {\n",
3131
+ " vertical-align: top;\n",
3132
+ " }\n",
3133
+ "\n",
3134
+ " .dataframe thead th {\n",
3135
+ " text-align: right;\n",
3136
+ " }\n",
3137
+ "</style>\n",
3138
+ "<table border=\"1\" class=\"dataframe\">\n",
3139
+ " <thead>\n",
3140
+ " <tr style=\"text-align: right;\">\n",
3141
+ " <th>type</th>\n",
3142
+ " <th>HR</th>\n",
3143
+ " <th>Temp</th>\n",
3144
+ " </tr>\n",
3145
+ " <tr>\n",
3146
+ " <th>year</th>\n",
3147
+ " <th></th>\n",
3148
+ " <th></th>\n",
3149
+ " </tr>\n",
3150
+ " </thead>\n",
3151
+ " <tbody>\n",
3152
+ " <tr>\n",
3153
+ " <th>2013</th>\n",
3154
+ " <td>37.833333</td>\n",
3155
+ " <td>37.716667</td>\n",
3156
+ " </tr>\n",
3157
+ " <tr>\n",
3158
+ " <th>2014</th>\n",
3159
+ " <td>41.500000</td>\n",
3160
+ " <td>36.816667</td>\n",
3161
+ " </tr>\n",
3162
+ " </tbody>\n",
3163
+ "</table>\n",
3164
+ "</div>"
3165
+ ],
3166
+ "text/plain": [
3167
+ "type HR Temp\n",
3168
+ "year \n",
3169
+ "2013 37.833333 37.716667\n",
3170
+ "2014 41.500000 36.816667"
3171
+ ]
3172
+ },
3173
+ "execution_count": 78,
3174
+ "metadata": {},
3175
+ "output_type": "execute_result"
3176
+ }
3177
+ ],
3178
+ "source": [
3179
+ "data_mean.groupby(axis=1, level='type').mean()\n"
3180
+ ]
3181
+ },
3182
+ {
3183
+ "cell_type": "code",
3184
+ "execution_count": null,
3185
+ "id": "5e820d27-723f-4254-adfb-a55903baa80f",
3186
+ "metadata": {},
3187
+ "outputs": [],
3188
+ "source": []
3189
+ }
3190
+ ],
3191
+ "metadata": {
3192
+ "kernelspec": {
3193
+ "display_name": "Python 3 (ipykernel)",
3194
+ "language": "python",
3195
+ "name": "python3"
3196
+ },
3197
+ "language_info": {
3198
+ "codemirror_mode": {
3199
+ "name": "ipython",
3200
+ "version": 3
3201
+ },
3202
+ "file_extension": ".py",
3203
+ "mimetype": "text/x-python",
3204
+ "name": "python",
3205
+ "nbconvert_exporter": "python",
3206
+ "pygments_lexer": "ipython3",
3207
+ "version": "3.12.0"
3208
+ }
3209
+ },
3210
+ "nbformat": 4,
3211
+ "nbformat_minor": 5
3212
+ }