myawesomepkg 0.1.5__py3-none-any.whl → 0.1.6__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,1209 @@
1
+ {
2
+ "cells": [
3
+ {
4
+ "cell_type": "code",
5
+ "execution_count": 1,
6
+ "id": "0277123f-b689-4974-9dd8-90f13e449430",
7
+ "metadata": {},
8
+ "outputs": [],
9
+ "source": [
10
+ "# Combining Datasets: Merge and Join"
11
+ ]
12
+ },
13
+ {
14
+ "cell_type": "code",
15
+ "execution_count": 3,
16
+ "id": "03dc4f77-84bf-49eb-a6bc-31dcec0e6571",
17
+ "metadata": {},
18
+ "outputs": [
19
+ {
20
+ "name": "stdout",
21
+ "output_type": "stream",
22
+ "text": [
23
+ " employee group\n",
24
+ "0 Bob Accounting\n",
25
+ "1 Jake Engineering\n",
26
+ "2 Lisa Engineering\n",
27
+ "3 Sue HR\n",
28
+ " employee hire_date\n",
29
+ "0 Lisa 2004\n",
30
+ "1 Bob 2008\n",
31
+ "2 Jake 2012\n",
32
+ "3 Sue 2014\n"
33
+ ]
34
+ }
35
+ ],
36
+ "source": [
37
+ "import pandas as pd\n",
38
+ "\n",
39
+ "df1 = pd.DataFrame({'employee': ['Bob', 'Jake', 'Lisa', 'Sue'],\n",
40
+ " 'group': ['Accounting', 'Engineering', 'Engineering', 'HR']})\n",
41
+ "df2 = pd.DataFrame({'employee': ['Lisa', 'Bob', 'Jake', 'Sue'],\n",
42
+ " 'hire_date': [2004, 2008, 2012, 2014]})\n",
43
+ "\n",
44
+ "print(df1)\n",
45
+ "print(df2)\n"
46
+ ]
47
+ },
48
+ {
49
+ "cell_type": "code",
50
+ "execution_count": 4,
51
+ "id": "3b7adc6d-2261-4c55-9834-e0b05e8d4691",
52
+ "metadata": {},
53
+ "outputs": [
54
+ {
55
+ "data": {
56
+ "text/html": [
57
+ "<div>\n",
58
+ "<style scoped>\n",
59
+ " .dataframe tbody tr th:only-of-type {\n",
60
+ " vertical-align: middle;\n",
61
+ " }\n",
62
+ "\n",
63
+ " .dataframe tbody tr th {\n",
64
+ " vertical-align: top;\n",
65
+ " }\n",
66
+ "\n",
67
+ " .dataframe thead th {\n",
68
+ " text-align: right;\n",
69
+ " }\n",
70
+ "</style>\n",
71
+ "<table border=\"1\" class=\"dataframe\">\n",
72
+ " <thead>\n",
73
+ " <tr style=\"text-align: right;\">\n",
74
+ " <th></th>\n",
75
+ " <th>employee</th>\n",
76
+ " <th>group</th>\n",
77
+ " <th>hire_date</th>\n",
78
+ " </tr>\n",
79
+ " </thead>\n",
80
+ " <tbody>\n",
81
+ " <tr>\n",
82
+ " <th>0</th>\n",
83
+ " <td>Bob</td>\n",
84
+ " <td>Accounting</td>\n",
85
+ " <td>2008</td>\n",
86
+ " </tr>\n",
87
+ " <tr>\n",
88
+ " <th>1</th>\n",
89
+ " <td>Jake</td>\n",
90
+ " <td>Engineering</td>\n",
91
+ " <td>2012</td>\n",
92
+ " </tr>\n",
93
+ " <tr>\n",
94
+ " <th>2</th>\n",
95
+ " <td>Lisa</td>\n",
96
+ " <td>Engineering</td>\n",
97
+ " <td>2004</td>\n",
98
+ " </tr>\n",
99
+ " <tr>\n",
100
+ " <th>3</th>\n",
101
+ " <td>Sue</td>\n",
102
+ " <td>HR</td>\n",
103
+ " <td>2014</td>\n",
104
+ " </tr>\n",
105
+ " </tbody>\n",
106
+ "</table>\n",
107
+ "</div>"
108
+ ],
109
+ "text/plain": [
110
+ " employee group hire_date\n",
111
+ "0 Bob Accounting 2008\n",
112
+ "1 Jake Engineering 2012\n",
113
+ "2 Lisa Engineering 2004\n",
114
+ "3 Sue HR 2014"
115
+ ]
116
+ },
117
+ "execution_count": 4,
118
+ "metadata": {},
119
+ "output_type": "execute_result"
120
+ }
121
+ ],
122
+ "source": [
123
+ " df3 = pd.merge(df1, df2)\n",
124
+ " df3"
125
+ ]
126
+ },
127
+ {
128
+ "cell_type": "code",
129
+ "execution_count": 6,
130
+ "id": "e593e641-83dc-41af-98f2-d492333be303",
131
+ "metadata": {},
132
+ "outputs": [
133
+ {
134
+ "name": "stdout",
135
+ "output_type": "stream",
136
+ "text": [
137
+ " employee group hire_date\n",
138
+ "0 Bob Accounting 2008\n",
139
+ "1 Jake Engineering 2012\n",
140
+ "2 Lisa Engineering 2004\n",
141
+ "3 Sue HR 2014\n",
142
+ " group supervisor\n",
143
+ "0 Accounting Carly\n",
144
+ "1 Engineering Guido\n",
145
+ "2 HR Steve\n",
146
+ " employee group hire_date supervisor\n",
147
+ "0 Bob Accounting 2008 Carly\n",
148
+ "1 Jake Engineering 2012 Guido\n",
149
+ "2 Lisa Engineering 2004 Guido\n",
150
+ "3 Sue HR 2014 Steve\n"
151
+ ]
152
+ }
153
+ ],
154
+ "source": [
155
+ "#Many-to-one joins\n",
156
+ "df4 = pd.DataFrame({'group': ['Accounting', 'Engineering', 'HR'],\n",
157
+ " 'supervisor': ['Carly', 'Guido', 'Steve']})\n",
158
+ "print(df3); print(df4); print(pd.merge(df3, df4))"
159
+ ]
160
+ },
161
+ {
162
+ "cell_type": "code",
163
+ "execution_count": 7,
164
+ "id": "01bddbb5-25d2-4bd5-bcb6-60b77dcf58b9",
165
+ "metadata": {},
166
+ "outputs": [
167
+ {
168
+ "data": {
169
+ "text/html": [
170
+ "<div>\n",
171
+ "<style scoped>\n",
172
+ " .dataframe tbody tr th:only-of-type {\n",
173
+ " vertical-align: middle;\n",
174
+ " }\n",
175
+ "\n",
176
+ " .dataframe tbody tr th {\n",
177
+ " vertical-align: top;\n",
178
+ " }\n",
179
+ "\n",
180
+ " .dataframe thead th {\n",
181
+ " text-align: right;\n",
182
+ " }\n",
183
+ "</style>\n",
184
+ "<table border=\"1\" class=\"dataframe\">\n",
185
+ " <thead>\n",
186
+ " <tr style=\"text-align: right;\">\n",
187
+ " <th></th>\n",
188
+ " <th>employee</th>\n",
189
+ " <th>group</th>\n",
190
+ " <th>hire_date</th>\n",
191
+ " <th>supervisor</th>\n",
192
+ " </tr>\n",
193
+ " </thead>\n",
194
+ " <tbody>\n",
195
+ " <tr>\n",
196
+ " <th>0</th>\n",
197
+ " <td>Bob</td>\n",
198
+ " <td>Accounting</td>\n",
199
+ " <td>2008</td>\n",
200
+ " <td>Carly</td>\n",
201
+ " </tr>\n",
202
+ " <tr>\n",
203
+ " <th>1</th>\n",
204
+ " <td>Jake</td>\n",
205
+ " <td>Engineering</td>\n",
206
+ " <td>2012</td>\n",
207
+ " <td>Guido</td>\n",
208
+ " </tr>\n",
209
+ " <tr>\n",
210
+ " <th>2</th>\n",
211
+ " <td>Lisa</td>\n",
212
+ " <td>Engineering</td>\n",
213
+ " <td>2004</td>\n",
214
+ " <td>Guido</td>\n",
215
+ " </tr>\n",
216
+ " <tr>\n",
217
+ " <th>3</th>\n",
218
+ " <td>Sue</td>\n",
219
+ " <td>HR</td>\n",
220
+ " <td>2014</td>\n",
221
+ " <td>Steve</td>\n",
222
+ " </tr>\n",
223
+ " </tbody>\n",
224
+ "</table>\n",
225
+ "</div>"
226
+ ],
227
+ "text/plain": [
228
+ " employee group hire_date supervisor\n",
229
+ "0 Bob Accounting 2008 Carly\n",
230
+ "1 Jake Engineering 2012 Guido\n",
231
+ "2 Lisa Engineering 2004 Guido\n",
232
+ "3 Sue HR 2014 Steve"
233
+ ]
234
+ },
235
+ "execution_count": 7,
236
+ "metadata": {},
237
+ "output_type": "execute_result"
238
+ }
239
+ ],
240
+ "source": [
241
+ "pd.merge(df3, df4)"
242
+ ]
243
+ },
244
+ {
245
+ "cell_type": "code",
246
+ "execution_count": 9,
247
+ "id": "b8ca21b1-4295-4219-91e8-ef4cdf46ea5d",
248
+ "metadata": {},
249
+ "outputs": [
250
+ {
251
+ "name": "stdout",
252
+ "output_type": "stream",
253
+ "text": [
254
+ " employee group\n",
255
+ "0 Bob Accounting\n",
256
+ "1 Jake Engineering\n",
257
+ "2 Lisa Engineering\n",
258
+ "3 Sue HR\n",
259
+ " group skills\n",
260
+ "0 Accounting math\n",
261
+ "1 Accounting spreadsheets\n",
262
+ "2 Engineering coding\n",
263
+ "3 Engineering linux\n",
264
+ "4 HR spreadsheets\n",
265
+ "5 HR organization\n",
266
+ " employee group skills\n",
267
+ "0 Bob Accounting math\n",
268
+ "1 Bob Accounting spreadsheets\n",
269
+ "2 Jake Engineering coding\n",
270
+ "3 Jake Engineering linux\n",
271
+ "4 Lisa Engineering coding\n",
272
+ "5 Lisa Engineering linux\n",
273
+ "6 Sue HR spreadsheets\n",
274
+ "7 Sue HR organization\n"
275
+ ]
276
+ }
277
+ ],
278
+ "source": [
279
+ "import pandas as pd\n",
280
+ "\n",
281
+ "# df1 from earlier\n",
282
+ "df1 = pd.DataFrame({'employee': ['Bob', 'Jake', 'Lisa', 'Sue'],\n",
283
+ " 'group': ['Accounting', 'Engineering', 'Engineering', 'HR']})\n",
284
+ "\n",
285
+ "# Corrected df5\n",
286
+ "df5 = pd.DataFrame({'group': ['Accounting', 'Accounting',\n",
287
+ " 'Engineering', 'Engineering',\n",
288
+ " 'HR', 'HR'],\n",
289
+ " 'skills': ['math', 'spreadsheets',\n",
290
+ " 'coding', 'linux',\n",
291
+ " 'spreadsheets', 'organization']})\n",
292
+ "\n",
293
+ "# Many-to-many merge\n",
294
+ "print(df1)\n",
295
+ "print(df5)\n",
296
+ "print(pd.merge(df1, df5))\n"
297
+ ]
298
+ },
299
+ {
300
+ "cell_type": "code",
301
+ "execution_count": 10,
302
+ "id": "1f2bef56-182a-4edf-a18a-98f2ebb4d65e",
303
+ "metadata": {},
304
+ "outputs": [
305
+ {
306
+ "data": {
307
+ "text/html": [
308
+ "<div>\n",
309
+ "<style scoped>\n",
310
+ " .dataframe tbody tr th:only-of-type {\n",
311
+ " vertical-align: middle;\n",
312
+ " }\n",
313
+ "\n",
314
+ " .dataframe tbody tr th {\n",
315
+ " vertical-align: top;\n",
316
+ " }\n",
317
+ "\n",
318
+ " .dataframe thead th {\n",
319
+ " text-align: right;\n",
320
+ " }\n",
321
+ "</style>\n",
322
+ "<table border=\"1\" class=\"dataframe\">\n",
323
+ " <thead>\n",
324
+ " <tr style=\"text-align: right;\">\n",
325
+ " <th></th>\n",
326
+ " <th>employee</th>\n",
327
+ " <th>group</th>\n",
328
+ " <th>skills</th>\n",
329
+ " </tr>\n",
330
+ " </thead>\n",
331
+ " <tbody>\n",
332
+ " <tr>\n",
333
+ " <th>0</th>\n",
334
+ " <td>Bob</td>\n",
335
+ " <td>Accounting</td>\n",
336
+ " <td>math</td>\n",
337
+ " </tr>\n",
338
+ " <tr>\n",
339
+ " <th>1</th>\n",
340
+ " <td>Bob</td>\n",
341
+ " <td>Accounting</td>\n",
342
+ " <td>spreadsheets</td>\n",
343
+ " </tr>\n",
344
+ " <tr>\n",
345
+ " <th>2</th>\n",
346
+ " <td>Jake</td>\n",
347
+ " <td>Engineering</td>\n",
348
+ " <td>coding</td>\n",
349
+ " </tr>\n",
350
+ " <tr>\n",
351
+ " <th>3</th>\n",
352
+ " <td>Jake</td>\n",
353
+ " <td>Engineering</td>\n",
354
+ " <td>linux</td>\n",
355
+ " </tr>\n",
356
+ " <tr>\n",
357
+ " <th>4</th>\n",
358
+ " <td>Lisa</td>\n",
359
+ " <td>Engineering</td>\n",
360
+ " <td>coding</td>\n",
361
+ " </tr>\n",
362
+ " <tr>\n",
363
+ " <th>5</th>\n",
364
+ " <td>Lisa</td>\n",
365
+ " <td>Engineering</td>\n",
366
+ " <td>linux</td>\n",
367
+ " </tr>\n",
368
+ " <tr>\n",
369
+ " <th>6</th>\n",
370
+ " <td>Sue</td>\n",
371
+ " <td>HR</td>\n",
372
+ " <td>spreadsheets</td>\n",
373
+ " </tr>\n",
374
+ " <tr>\n",
375
+ " <th>7</th>\n",
376
+ " <td>Sue</td>\n",
377
+ " <td>HR</td>\n",
378
+ " <td>organization</td>\n",
379
+ " </tr>\n",
380
+ " </tbody>\n",
381
+ "</table>\n",
382
+ "</div>"
383
+ ],
384
+ "text/plain": [
385
+ " employee group skills\n",
386
+ "0 Bob Accounting math\n",
387
+ "1 Bob Accounting spreadsheets\n",
388
+ "2 Jake Engineering coding\n",
389
+ "3 Jake Engineering linux\n",
390
+ "4 Lisa Engineering coding\n",
391
+ "5 Lisa Engineering linux\n",
392
+ "6 Sue HR spreadsheets\n",
393
+ "7 Sue HR organization"
394
+ ]
395
+ },
396
+ "execution_count": 10,
397
+ "metadata": {},
398
+ "output_type": "execute_result"
399
+ }
400
+ ],
401
+ "source": [
402
+ " pd.merge(df1, df5)"
403
+ ]
404
+ },
405
+ {
406
+ "cell_type": "code",
407
+ "execution_count": 11,
408
+ "id": "c1b26950-214d-4496-8cf9-81954ee85fca",
409
+ "metadata": {},
410
+ "outputs": [
411
+ {
412
+ "name": "stdout",
413
+ "output_type": "stream",
414
+ "text": [
415
+ " employee group\n",
416
+ "0 Bob Accounting\n",
417
+ "1 Jake Engineering\n",
418
+ "2 Lisa Engineering\n",
419
+ "3 Sue HR\n",
420
+ " employee hire_date\n",
421
+ "0 Lisa 2004\n",
422
+ "1 Bob 2008\n",
423
+ "2 Jake 2012\n",
424
+ "3 Sue 2014\n",
425
+ " employee group hire_date\n",
426
+ "0 Bob Accounting 2008\n",
427
+ "1 Jake Engineering 2012\n",
428
+ "2 Lisa Engineering 2004\n",
429
+ "3 Sue HR 2014\n"
430
+ ]
431
+ }
432
+ ],
433
+ "source": [
434
+ "#Speciication of the Merge Key\n",
435
+ "#The on keyword\n",
436
+ "print(df1); print(df2); print(pd.merge(df1, df2, on='employee'))\n"
437
+ ]
438
+ },
439
+ {
440
+ "cell_type": "code",
441
+ "execution_count": 12,
442
+ "id": "45a089a4-f287-4003-abf9-966ad50ed071",
443
+ "metadata": {},
444
+ "outputs": [
445
+ {
446
+ "data": {
447
+ "text/html": [
448
+ "<div>\n",
449
+ "<style scoped>\n",
450
+ " .dataframe tbody tr th:only-of-type {\n",
451
+ " vertical-align: middle;\n",
452
+ " }\n",
453
+ "\n",
454
+ " .dataframe tbody tr th {\n",
455
+ " vertical-align: top;\n",
456
+ " }\n",
457
+ "\n",
458
+ " .dataframe thead th {\n",
459
+ " text-align: right;\n",
460
+ " }\n",
461
+ "</style>\n",
462
+ "<table border=\"1\" class=\"dataframe\">\n",
463
+ " <thead>\n",
464
+ " <tr style=\"text-align: right;\">\n",
465
+ " <th></th>\n",
466
+ " <th>employee</th>\n",
467
+ " <th>group</th>\n",
468
+ " <th>hire_date</th>\n",
469
+ " </tr>\n",
470
+ " </thead>\n",
471
+ " <tbody>\n",
472
+ " <tr>\n",
473
+ " <th>0</th>\n",
474
+ " <td>Bob</td>\n",
475
+ " <td>Accounting</td>\n",
476
+ " <td>2008</td>\n",
477
+ " </tr>\n",
478
+ " <tr>\n",
479
+ " <th>1</th>\n",
480
+ " <td>Jake</td>\n",
481
+ " <td>Engineering</td>\n",
482
+ " <td>2012</td>\n",
483
+ " </tr>\n",
484
+ " <tr>\n",
485
+ " <th>2</th>\n",
486
+ " <td>Lisa</td>\n",
487
+ " <td>Engineering</td>\n",
488
+ " <td>2004</td>\n",
489
+ " </tr>\n",
490
+ " <tr>\n",
491
+ " <th>3</th>\n",
492
+ " <td>Sue</td>\n",
493
+ " <td>HR</td>\n",
494
+ " <td>2014</td>\n",
495
+ " </tr>\n",
496
+ " </tbody>\n",
497
+ "</table>\n",
498
+ "</div>"
499
+ ],
500
+ "text/plain": [
501
+ " employee group hire_date\n",
502
+ "0 Bob Accounting 2008\n",
503
+ "1 Jake Engineering 2012\n",
504
+ "2 Lisa Engineering 2004\n",
505
+ "3 Sue HR 2014"
506
+ ]
507
+ },
508
+ "execution_count": 12,
509
+ "metadata": {},
510
+ "output_type": "execute_result"
511
+ }
512
+ ],
513
+ "source": [
514
+ "pd.merge(df1, df2, on='employee')"
515
+ ]
516
+ },
517
+ {
518
+ "cell_type": "code",
519
+ "execution_count": 13,
520
+ "id": "05617a31-e8b2-48e3-bc77-b1b965f3fcd5",
521
+ "metadata": {},
522
+ "outputs": [
523
+ {
524
+ "name": "stdout",
525
+ "output_type": "stream",
526
+ "text": [
527
+ " employee group\n",
528
+ "0 Bob Accounting\n",
529
+ "1 Jake Engineering\n",
530
+ "2 Lisa Engineering\n",
531
+ "3 Sue HR\n",
532
+ " name salary\n",
533
+ "0 Bob 70000\n",
534
+ "1 Jake 80000\n",
535
+ "2 Lisa 120000\n",
536
+ "3 Sue 90000\n",
537
+ " employee group name salary\n",
538
+ "0 Bob Accounting Bob 70000\n",
539
+ "1 Jake Engineering Jake 80000\n",
540
+ "2 Lisa Engineering Lisa 120000\n",
541
+ "3 Sue HR Sue 90000\n"
542
+ ]
543
+ }
544
+ ],
545
+ "source": [
546
+ "# The left_on and right_on keywords\n",
547
+ "df3 = pd.DataFrame({'name': ['Bob', 'Jake', 'Lisa', 'Sue'],\n",
548
+ " 'salary': [70000, 80000, 120000, 90000]})\n",
549
+ "print(df1); print(df3);\n",
550
+ "print(pd.merge(df1, df3, left_on=\"employee\", right_on=\"name\"))"
551
+ ]
552
+ },
553
+ {
554
+ "cell_type": "code",
555
+ "execution_count": 14,
556
+ "id": "7c8b5949-bf53-4971-b439-aaac86d3c312",
557
+ "metadata": {},
558
+ "outputs": [
559
+ {
560
+ "data": {
561
+ "text/html": [
562
+ "<div>\n",
563
+ "<style scoped>\n",
564
+ " .dataframe tbody tr th:only-of-type {\n",
565
+ " vertical-align: middle;\n",
566
+ " }\n",
567
+ "\n",
568
+ " .dataframe tbody tr th {\n",
569
+ " vertical-align: top;\n",
570
+ " }\n",
571
+ "\n",
572
+ " .dataframe thead th {\n",
573
+ " text-align: right;\n",
574
+ " }\n",
575
+ "</style>\n",
576
+ "<table border=\"1\" class=\"dataframe\">\n",
577
+ " <thead>\n",
578
+ " <tr style=\"text-align: right;\">\n",
579
+ " <th></th>\n",
580
+ " <th>employee</th>\n",
581
+ " <th>group</th>\n",
582
+ " <th>name</th>\n",
583
+ " <th>salary</th>\n",
584
+ " </tr>\n",
585
+ " </thead>\n",
586
+ " <tbody>\n",
587
+ " <tr>\n",
588
+ " <th>0</th>\n",
589
+ " <td>Bob</td>\n",
590
+ " <td>Accounting</td>\n",
591
+ " <td>Bob</td>\n",
592
+ " <td>70000</td>\n",
593
+ " </tr>\n",
594
+ " <tr>\n",
595
+ " <th>1</th>\n",
596
+ " <td>Jake</td>\n",
597
+ " <td>Engineering</td>\n",
598
+ " <td>Jake</td>\n",
599
+ " <td>80000</td>\n",
600
+ " </tr>\n",
601
+ " <tr>\n",
602
+ " <th>2</th>\n",
603
+ " <td>Lisa</td>\n",
604
+ " <td>Engineering</td>\n",
605
+ " <td>Lisa</td>\n",
606
+ " <td>120000</td>\n",
607
+ " </tr>\n",
608
+ " <tr>\n",
609
+ " <th>3</th>\n",
610
+ " <td>Sue</td>\n",
611
+ " <td>HR</td>\n",
612
+ " <td>Sue</td>\n",
613
+ " <td>90000</td>\n",
614
+ " </tr>\n",
615
+ " </tbody>\n",
616
+ "</table>\n",
617
+ "</div>"
618
+ ],
619
+ "text/plain": [
620
+ " employee group name salary\n",
621
+ "0 Bob Accounting Bob 70000\n",
622
+ "1 Jake Engineering Jake 80000\n",
623
+ "2 Lisa Engineering Lisa 120000\n",
624
+ "3 Sue HR Sue 90000"
625
+ ]
626
+ },
627
+ "execution_count": 14,
628
+ "metadata": {},
629
+ "output_type": "execute_result"
630
+ }
631
+ ],
632
+ "source": [
633
+ "pd.merge(df1, df3, left_on=\"employee\", right_on=\"name\")"
634
+ ]
635
+ },
636
+ {
637
+ "cell_type": "code",
638
+ "execution_count": 15,
639
+ "id": "97deb4d4-b7f3-4b7b-84ce-1ab024d22a1e",
640
+ "metadata": {},
641
+ "outputs": [
642
+ {
643
+ "data": {
644
+ "text/html": [
645
+ "<div>\n",
646
+ "<style scoped>\n",
647
+ " .dataframe tbody tr th:only-of-type {\n",
648
+ " vertical-align: middle;\n",
649
+ " }\n",
650
+ "\n",
651
+ " .dataframe tbody tr th {\n",
652
+ " vertical-align: top;\n",
653
+ " }\n",
654
+ "\n",
655
+ " .dataframe thead th {\n",
656
+ " text-align: right;\n",
657
+ " }\n",
658
+ "</style>\n",
659
+ "<table border=\"1\" class=\"dataframe\">\n",
660
+ " <thead>\n",
661
+ " <tr style=\"text-align: right;\">\n",
662
+ " <th></th>\n",
663
+ " <th>employee</th>\n",
664
+ " <th>group</th>\n",
665
+ " <th>salary</th>\n",
666
+ " </tr>\n",
667
+ " </thead>\n",
668
+ " <tbody>\n",
669
+ " <tr>\n",
670
+ " <th>0</th>\n",
671
+ " <td>Bob</td>\n",
672
+ " <td>Accounting</td>\n",
673
+ " <td>70000</td>\n",
674
+ " </tr>\n",
675
+ " <tr>\n",
676
+ " <th>1</th>\n",
677
+ " <td>Jake</td>\n",
678
+ " <td>Engineering</td>\n",
679
+ " <td>80000</td>\n",
680
+ " </tr>\n",
681
+ " <tr>\n",
682
+ " <th>2</th>\n",
683
+ " <td>Lisa</td>\n",
684
+ " <td>Engineering</td>\n",
685
+ " <td>120000</td>\n",
686
+ " </tr>\n",
687
+ " <tr>\n",
688
+ " <th>3</th>\n",
689
+ " <td>Sue</td>\n",
690
+ " <td>HR</td>\n",
691
+ " <td>90000</td>\n",
692
+ " </tr>\n",
693
+ " </tbody>\n",
694
+ "</table>\n",
695
+ "</div>"
696
+ ],
697
+ "text/plain": [
698
+ " employee group salary\n",
699
+ "0 Bob Accounting 70000\n",
700
+ "1 Jake Engineering 80000\n",
701
+ "2 Lisa Engineering 120000\n",
702
+ "3 Sue HR 90000"
703
+ ]
704
+ },
705
+ "execution_count": 15,
706
+ "metadata": {},
707
+ "output_type": "execute_result"
708
+ }
709
+ ],
710
+ "source": [
711
+ "pd.merge(df1, df3, left_on=\"employee\", right_on=\"name\").drop('name', axis=1)"
712
+ ]
713
+ },
714
+ {
715
+ "cell_type": "code",
716
+ "execution_count": 16,
717
+ "id": "a5cf7cf7-eee8-4c51-8406-c1ee6e1ddd2d",
718
+ "metadata": {},
719
+ "outputs": [
720
+ {
721
+ "name": "stdout",
722
+ "output_type": "stream",
723
+ "text": [
724
+ " group\n",
725
+ "employee \n",
726
+ "Bob Accounting\n",
727
+ "Jake Engineering\n",
728
+ "Lisa Engineering\n",
729
+ "Sue HR\n",
730
+ " hire_date\n",
731
+ "employee \n",
732
+ "Lisa 2004\n",
733
+ "Bob 2008\n",
734
+ "Jake 2012\n",
735
+ "Sue 2014\n"
736
+ ]
737
+ }
738
+ ],
739
+ "source": [
740
+ "#The left_index and right_index keywords\n",
741
+ "df1a = df1.set_index('employee')\n",
742
+ "df2a = df2.set_index('employee')\n",
743
+ "print(df1a); print(df2a)"
744
+ ]
745
+ },
746
+ {
747
+ "cell_type": "code",
748
+ "execution_count": 17,
749
+ "id": "585ba1a3-98fc-4866-baf4-2e4f5c2fcf7b",
750
+ "metadata": {},
751
+ "outputs": [
752
+ {
753
+ "name": "stdout",
754
+ "output_type": "stream",
755
+ "text": [
756
+ " group\n",
757
+ "employee \n",
758
+ "Bob Accounting\n",
759
+ "Jake Engineering\n",
760
+ "Lisa Engineering\n",
761
+ "Sue HR\n",
762
+ " hire_date\n",
763
+ "employee \n",
764
+ "Lisa 2004\n",
765
+ "Bob 2008\n",
766
+ "Jake 2012\n",
767
+ "Sue 2014\n",
768
+ " group hire_date\n",
769
+ "employee \n",
770
+ "Bob Accounting 2008\n",
771
+ "Jake Engineering 2012\n",
772
+ "Lisa Engineering 2004\n",
773
+ "Sue HR 2014\n"
774
+ ]
775
+ }
776
+ ],
777
+ "source": [
778
+ "print(df1a); print(df2a);\n",
779
+ "print(pd.merge(df1a, df2a, left_index=True, right_index=True))"
780
+ ]
781
+ },
782
+ {
783
+ "cell_type": "code",
784
+ "execution_count": 18,
785
+ "id": "ff04a499-75ef-4a2d-adc1-c891d87d0d39",
786
+ "metadata": {},
787
+ "outputs": [
788
+ {
789
+ "data": {
790
+ "text/html": [
791
+ "<div>\n",
792
+ "<style scoped>\n",
793
+ " .dataframe tbody tr th:only-of-type {\n",
794
+ " vertical-align: middle;\n",
795
+ " }\n",
796
+ "\n",
797
+ " .dataframe tbody tr th {\n",
798
+ " vertical-align: top;\n",
799
+ " }\n",
800
+ "\n",
801
+ " .dataframe thead th {\n",
802
+ " text-align: right;\n",
803
+ " }\n",
804
+ "</style>\n",
805
+ "<table border=\"1\" class=\"dataframe\">\n",
806
+ " <thead>\n",
807
+ " <tr style=\"text-align: right;\">\n",
808
+ " <th></th>\n",
809
+ " <th>group</th>\n",
810
+ " <th>hire_date</th>\n",
811
+ " </tr>\n",
812
+ " <tr>\n",
813
+ " <th>employee</th>\n",
814
+ " <th></th>\n",
815
+ " <th></th>\n",
816
+ " </tr>\n",
817
+ " </thead>\n",
818
+ " <tbody>\n",
819
+ " <tr>\n",
820
+ " <th>Bob</th>\n",
821
+ " <td>Accounting</td>\n",
822
+ " <td>2008</td>\n",
823
+ " </tr>\n",
824
+ " <tr>\n",
825
+ " <th>Jake</th>\n",
826
+ " <td>Engineering</td>\n",
827
+ " <td>2012</td>\n",
828
+ " </tr>\n",
829
+ " <tr>\n",
830
+ " <th>Lisa</th>\n",
831
+ " <td>Engineering</td>\n",
832
+ " <td>2004</td>\n",
833
+ " </tr>\n",
834
+ " <tr>\n",
835
+ " <th>Sue</th>\n",
836
+ " <td>HR</td>\n",
837
+ " <td>2014</td>\n",
838
+ " </tr>\n",
839
+ " </tbody>\n",
840
+ "</table>\n",
841
+ "</div>"
842
+ ],
843
+ "text/plain": [
844
+ " group hire_date\n",
845
+ "employee \n",
846
+ "Bob Accounting 2008\n",
847
+ "Jake Engineering 2012\n",
848
+ "Lisa Engineering 2004\n",
849
+ "Sue HR 2014"
850
+ ]
851
+ },
852
+ "execution_count": 18,
853
+ "metadata": {},
854
+ "output_type": "execute_result"
855
+ }
856
+ ],
857
+ "source": [
858
+ " pd.merge(df1a, df2a, left_index=True, right_index=True)"
859
+ ]
860
+ },
861
+ {
862
+ "cell_type": "code",
863
+ "execution_count": 19,
864
+ "id": "534ff9b9-8296-4003-8422-6dbc348adc6f",
865
+ "metadata": {},
866
+ "outputs": [
867
+ {
868
+ "name": "stdout",
869
+ "output_type": "stream",
870
+ "text": [
871
+ " group\n",
872
+ "employee \n",
873
+ "Bob Accounting\n",
874
+ "Jake Engineering\n",
875
+ "Lisa Engineering\n",
876
+ "Sue HR\n",
877
+ " hire_date\n",
878
+ "employee \n",
879
+ "Lisa 2004\n",
880
+ "Bob 2008\n",
881
+ "Jake 2012\n",
882
+ "Sue 2014\n",
883
+ " group hire_date\n",
884
+ "employee \n",
885
+ "Bob Accounting 2008\n",
886
+ "Jake Engineering 2012\n",
887
+ "Lisa Engineering 2004\n",
888
+ "Sue HR 2014\n"
889
+ ]
890
+ }
891
+ ],
892
+ "source": [
893
+ "print(df1a); print(df2a); print(df1a.join(df2a))"
894
+ ]
895
+ },
896
+ {
897
+ "cell_type": "code",
898
+ "execution_count": 20,
899
+ "id": "68e87dfd-ae69-4f5f-8a19-3422d4b0269b",
900
+ "metadata": {},
901
+ "outputs": [
902
+ {
903
+ "data": {
904
+ "text/html": [
905
+ "<div>\n",
906
+ "<style scoped>\n",
907
+ " .dataframe tbody tr th:only-of-type {\n",
908
+ " vertical-align: middle;\n",
909
+ " }\n",
910
+ "\n",
911
+ " .dataframe tbody tr th {\n",
912
+ " vertical-align: top;\n",
913
+ " }\n",
914
+ "\n",
915
+ " .dataframe thead th {\n",
916
+ " text-align: right;\n",
917
+ " }\n",
918
+ "</style>\n",
919
+ "<table border=\"1\" class=\"dataframe\">\n",
920
+ " <thead>\n",
921
+ " <tr style=\"text-align: right;\">\n",
922
+ " <th></th>\n",
923
+ " <th>group</th>\n",
924
+ " <th>hire_date</th>\n",
925
+ " </tr>\n",
926
+ " <tr>\n",
927
+ " <th>employee</th>\n",
928
+ " <th></th>\n",
929
+ " <th></th>\n",
930
+ " </tr>\n",
931
+ " </thead>\n",
932
+ " <tbody>\n",
933
+ " <tr>\n",
934
+ " <th>Bob</th>\n",
935
+ " <td>Accounting</td>\n",
936
+ " <td>2008</td>\n",
937
+ " </tr>\n",
938
+ " <tr>\n",
939
+ " <th>Jake</th>\n",
940
+ " <td>Engineering</td>\n",
941
+ " <td>2012</td>\n",
942
+ " </tr>\n",
943
+ " <tr>\n",
944
+ " <th>Lisa</th>\n",
945
+ " <td>Engineering</td>\n",
946
+ " <td>2004</td>\n",
947
+ " </tr>\n",
948
+ " <tr>\n",
949
+ " <th>Sue</th>\n",
950
+ " <td>HR</td>\n",
951
+ " <td>2014</td>\n",
952
+ " </tr>\n",
953
+ " </tbody>\n",
954
+ "</table>\n",
955
+ "</div>"
956
+ ],
957
+ "text/plain": [
958
+ " group hire_date\n",
959
+ "employee \n",
960
+ "Bob Accounting 2008\n",
961
+ "Jake Engineering 2012\n",
962
+ "Lisa Engineering 2004\n",
963
+ "Sue HR 2014"
964
+ ]
965
+ },
966
+ "execution_count": 20,
967
+ "metadata": {},
968
+ "output_type": "execute_result"
969
+ }
970
+ ],
971
+ "source": [
972
+ "df1a.join(df2a)"
973
+ ]
974
+ },
975
+ {
976
+ "cell_type": "code",
977
+ "execution_count": 21,
978
+ "id": "31a8304f-f143-4621-bbee-c774160dd570",
979
+ "metadata": {},
980
+ "outputs": [
981
+ {
982
+ "name": "stdout",
983
+ "output_type": "stream",
984
+ "text": [
985
+ " group\n",
986
+ "employee \n",
987
+ "Bob Accounting\n",
988
+ "Jake Engineering\n",
989
+ "Lisa Engineering\n",
990
+ "Sue HR\n",
991
+ " name salary\n",
992
+ "0 Bob 70000\n",
993
+ "1 Jake 80000\n",
994
+ "2 Lisa 120000\n",
995
+ "3 Sue 90000\n",
996
+ " group name salary\n",
997
+ "0 Accounting Bob 70000\n",
998
+ "1 Engineering Jake 80000\n",
999
+ "2 Engineering Lisa 120000\n",
1000
+ "3 HR Sue 90000\n"
1001
+ ]
1002
+ }
1003
+ ],
1004
+ "source": [
1005
+ "print(df1a); print(df3);\n",
1006
+ "print(pd.merge(df1a, df3, left_index=True, right_on='name'))"
1007
+ ]
1008
+ },
1009
+ {
1010
+ "cell_type": "code",
1011
+ "execution_count": 22,
1012
+ "id": "84466563-32ac-4a81-9726-0538ff300f58",
1013
+ "metadata": {},
1014
+ "outputs": [
1015
+ {
1016
+ "data": {
1017
+ "text/html": [
1018
+ "<div>\n",
1019
+ "<style scoped>\n",
1020
+ " .dataframe tbody tr th:only-of-type {\n",
1021
+ " vertical-align: middle;\n",
1022
+ " }\n",
1023
+ "\n",
1024
+ " .dataframe tbody tr th {\n",
1025
+ " vertical-align: top;\n",
1026
+ " }\n",
1027
+ "\n",
1028
+ " .dataframe thead th {\n",
1029
+ " text-align: right;\n",
1030
+ " }\n",
1031
+ "</style>\n",
1032
+ "<table border=\"1\" class=\"dataframe\">\n",
1033
+ " <thead>\n",
1034
+ " <tr style=\"text-align: right;\">\n",
1035
+ " <th></th>\n",
1036
+ " <th>group</th>\n",
1037
+ " <th>name</th>\n",
1038
+ " <th>salary</th>\n",
1039
+ " </tr>\n",
1040
+ " </thead>\n",
1041
+ " <tbody>\n",
1042
+ " <tr>\n",
1043
+ " <th>0</th>\n",
1044
+ " <td>Accounting</td>\n",
1045
+ " <td>Bob</td>\n",
1046
+ " <td>70000</td>\n",
1047
+ " </tr>\n",
1048
+ " <tr>\n",
1049
+ " <th>1</th>\n",
1050
+ " <td>Engineering</td>\n",
1051
+ " <td>Jake</td>\n",
1052
+ " <td>80000</td>\n",
1053
+ " </tr>\n",
1054
+ " <tr>\n",
1055
+ " <th>2</th>\n",
1056
+ " <td>Engineering</td>\n",
1057
+ " <td>Lisa</td>\n",
1058
+ " <td>120000</td>\n",
1059
+ " </tr>\n",
1060
+ " <tr>\n",
1061
+ " <th>3</th>\n",
1062
+ " <td>HR</td>\n",
1063
+ " <td>Sue</td>\n",
1064
+ " <td>90000</td>\n",
1065
+ " </tr>\n",
1066
+ " </tbody>\n",
1067
+ "</table>\n",
1068
+ "</div>"
1069
+ ],
1070
+ "text/plain": [
1071
+ " group name salary\n",
1072
+ "0 Accounting Bob 70000\n",
1073
+ "1 Engineering Jake 80000\n",
1074
+ "2 Engineering Lisa 120000\n",
1075
+ "3 HR Sue 90000"
1076
+ ]
1077
+ },
1078
+ "execution_count": 22,
1079
+ "metadata": {},
1080
+ "output_type": "execute_result"
1081
+ }
1082
+ ],
1083
+ "source": [
1084
+ "pd.merge(df1a, df3, left_index=True, right_on='name')"
1085
+ ]
1086
+ },
1087
+ {
1088
+ "cell_type": "code",
1089
+ "execution_count": 24,
1090
+ "id": "87a547fa-3fcb-4fd6-972e-9d589df3d8e3",
1091
+ "metadata": {},
1092
+ "outputs": [
1093
+ {
1094
+ "name": "stdout",
1095
+ "output_type": "stream",
1096
+ "text": [
1097
+ " name food\n",
1098
+ "0 Peter fish\n",
1099
+ "1 Paul beans\n",
1100
+ "2 Mary bread\n",
1101
+ " name drink\n",
1102
+ "0 Mary wine\n",
1103
+ "1 Joseph beer\n",
1104
+ " name food drink\n",
1105
+ "0 Mary bread wine\n"
1106
+ ]
1107
+ }
1108
+ ],
1109
+ "source": [
1110
+ "#Specifying Set Arithmetic for Joins\n",
1111
+ "df6 = pd.DataFrame({'name': ['Peter', 'Paul', 'Mary'],\n",
1112
+ " 'food': ['fish', 'beans', 'bread']},\n",
1113
+ " columns=['name', 'food'])\n",
1114
+ "df7 = pd.DataFrame({'name': ['Mary', 'Joseph'],\n",
1115
+ " 'drink': ['wine', 'beer']},\n",
1116
+ " columns=['name', 'drink'])\n",
1117
+ "print(df6); print(df7); print(pd.merge(df6, df7))\n"
1118
+ ]
1119
+ },
1120
+ {
1121
+ "cell_type": "code",
1122
+ "execution_count": 25,
1123
+ "id": "981e59f4-a9b0-43a8-93a2-0b3690232346",
1124
+ "metadata": {},
1125
+ "outputs": [
1126
+ {
1127
+ "name": "stdout",
1128
+ "output_type": "stream",
1129
+ "text": [
1130
+ " name food\n",
1131
+ "0 Peter fish\n",
1132
+ "1 Paul beans\n",
1133
+ "2 Mary bread\n",
1134
+ " name drink\n",
1135
+ "0 Mary wine\n",
1136
+ "1 Joseph beer\n",
1137
+ " name food drink\n",
1138
+ "0 Joseph NaN beer\n",
1139
+ "1 Mary bread wine\n",
1140
+ "2 Paul beans NaN\n",
1141
+ "3 Peter fish NaN\n"
1142
+ ]
1143
+ }
1144
+ ],
1145
+ "source": [
1146
+ "print(df6); print(df7); print(pd.merge(df6, df7, how='outer'))"
1147
+ ]
1148
+ },
1149
+ {
1150
+ "cell_type": "code",
1151
+ "execution_count": 26,
1152
+ "id": "200de952-7217-4b6a-8e88-bb8eaf3453b0",
1153
+ "metadata": {},
1154
+ "outputs": [
1155
+ {
1156
+ "name": "stdout",
1157
+ "output_type": "stream",
1158
+ "text": [
1159
+ " name food\n",
1160
+ "0 Peter fish\n",
1161
+ "1 Paul beans\n",
1162
+ "2 Mary bread\n",
1163
+ " name drink\n",
1164
+ "0 Mary wine\n",
1165
+ "1 Joseph beer\n",
1166
+ " name food drink\n",
1167
+ "0 Peter fish NaN\n",
1168
+ "1 Paul beans NaN\n",
1169
+ "2 Mary bread wine\n"
1170
+ ]
1171
+ }
1172
+ ],
1173
+ "source": [
1174
+ "print(df6); print(df7); print(pd.merge(df6, df7, how='left'))"
1175
+ ]
1176
+ },
1177
+ {
1178
+ "cell_type": "code",
1179
+ "execution_count": null,
1180
+ "id": "f72e5ddd-fc74-47f2-9aeb-5a15b610b669",
1181
+ "metadata": {},
1182
+ "outputs": [],
1183
+ "source": [
1184
+ "#Overlapping Column Names: The suixes Keyword"
1185
+ ]
1186
+ }
1187
+ ],
1188
+ "metadata": {
1189
+ "kernelspec": {
1190
+ "display_name": "Python 3 (ipykernel)",
1191
+ "language": "python",
1192
+ "name": "python3"
1193
+ },
1194
+ "language_info": {
1195
+ "codemirror_mode": {
1196
+ "name": "ipython",
1197
+ "version": 3
1198
+ },
1199
+ "file_extension": ".py",
1200
+ "mimetype": "text/x-python",
1201
+ "name": "python",
1202
+ "nbconvert_exporter": "python",
1203
+ "pygments_lexer": "ipython3",
1204
+ "version": "3.12.0"
1205
+ }
1206
+ },
1207
+ "nbformat": 4,
1208
+ "nbformat_minor": 5
1209
+ }