noshot 0.9.0__py3-none-any.whl → 1.0.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,633 @@
1
+ {
2
+ "cells": [
3
+ {
4
+ "cell_type": "code",
5
+ "execution_count": null,
6
+ "metadata": {
7
+ "id": "KLkDQCnJdSwP"
8
+ },
9
+ "outputs": [],
10
+ "source": [
11
+ "#EDA contd\n",
12
+ "import pandas as pd\n",
13
+ "chips = pd.read_csv('http://bit.ly/chiporders',sep='\\t')"
14
+ ]
15
+ },
16
+ {
17
+ "cell_type": "markdown",
18
+ "metadata": {
19
+ "id": "tqEc5RzBkTNc"
20
+ },
21
+ "source": [
22
+ "Order ID: A unique identifier for each order. Quantity: The number of items ordered. Item Name: The name of the menu item. Choice Description: Additional details or customization options for the ordered item. Item Price: The price of the individual item. Order Date: The date and time when the order was placed. Total: The total cost of the order."
23
+ ]
24
+ },
25
+ {
26
+ "cell_type": "code",
27
+ "execution_count": null,
28
+ "metadata": {
29
+ "id": "CpNMX3vamGxB"
30
+ },
31
+ "outputs": [],
32
+ "source": []
33
+ },
34
+ {
35
+ "cell_type": "code",
36
+ "execution_count": null,
37
+ "metadata": {
38
+ "colab": {
39
+ "base_uri": "https://localhost:8080/",
40
+ "height": 484
41
+ },
42
+ "id": "VdVdAHnMe2Wq",
43
+ "outputId": "484615aa-1583-47ee-93c4-b8e0457cb734"
44
+ },
45
+ "outputs": [],
46
+ "source": [
47
+ "chips.head(10)"
48
+ ]
49
+ },
50
+ {
51
+ "cell_type": "code",
52
+ "execution_count": null,
53
+ "metadata": {
54
+ "colab": {
55
+ "base_uri": "https://localhost:8080/"
56
+ },
57
+ "id": "cVA1ReI4hKAb",
58
+ "outputId": "05b9d9c8-8ea7-47e1-87ae-77d10ccd6d69"
59
+ },
60
+ "outputs": [],
61
+ "source": [
62
+ "chips.shape[0]"
63
+ ]
64
+ },
65
+ {
66
+ "cell_type": "code",
67
+ "execution_count": null,
68
+ "metadata": {
69
+ "colab": {
70
+ "base_uri": "https://localhost:8080/"
71
+ },
72
+ "id": "Qlnk_G60hYtg",
73
+ "outputId": "a43698cb-8c31-4791-c559-1b8e53446669"
74
+ },
75
+ "outputs": [],
76
+ "source": [
77
+ "chips['order_id'].value_counts().head(15) #each order id ordered how many different times\n"
78
+ ]
79
+ },
80
+ {
81
+ "cell_type": "code",
82
+ "execution_count": null,
83
+ "metadata": {
84
+ "colab": {
85
+ "base_uri": "https://localhost:8080/",
86
+ "height": 137
87
+ },
88
+ "id": "54oqEUsM2ok1",
89
+ "outputId": "7c7cc510-bd5e-4885-b100-fea12102301f"
90
+ },
91
+ "outputs": [],
92
+ "source": [
93
+ "#Extract the information of all oredered items containing chicken as one of the ingredint\n",
94
+ "chips[chips['item_name'] == 'Chicken']"
95
+ ]
96
+ },
97
+ {
98
+ "cell_type": "code",
99
+ "execution_count": null,
100
+ "metadata": {
101
+ "id": "hfgJ8K8x2wr1"
102
+ },
103
+ "outputs": [],
104
+ "source": [
105
+ "chk = chips[chips['item_name'].str.contains('Chicken')]"
106
+ ]
107
+ },
108
+ {
109
+ "cell_type": "code",
110
+ "execution_count": null,
111
+ "metadata": {
112
+ "colab": {
113
+ "base_uri": "https://localhost:8080/",
114
+ "height": 363
115
+ },
116
+ "id": "DjbRKWy92zJ2",
117
+ "outputId": "dcee2668-b95b-4bc4-caef-5de24985a126"
118
+ },
119
+ "outputs": [],
120
+ "source": [
121
+ "chk.head(10)"
122
+ ]
123
+ },
124
+ {
125
+ "cell_type": "code",
126
+ "execution_count": null,
127
+ "metadata": {
128
+ "colab": {
129
+ "base_uri": "https://localhost:8080/"
130
+ },
131
+ "id": "r7d8Fyvy25__",
132
+ "outputId": "ed07c163-98db-40b4-d68c-4101da97e754"
133
+ },
134
+ "outputs": [],
135
+ "source": [
136
+ "chips['item_price'].str.replace('$','').astype(float).max()"
137
+ ]
138
+ },
139
+ {
140
+ "cell_type": "code",
141
+ "execution_count": null,
142
+ "metadata": {
143
+ "colab": {
144
+ "base_uri": "https://localhost:8080/"
145
+ },
146
+ "id": "dCELCgWXj_l_",
147
+ "outputId": "84c811f1-12d4-473b-8c0a-8cd3088a523a"
148
+ },
149
+ "outputs": [],
150
+ "source": [
151
+ "c1=chips['item_price'].str.replace('$','')\n",
152
+ "c1.head(5)\n"
153
+ ]
154
+ },
155
+ {
156
+ "cell_type": "code",
157
+ "execution_count": null,
158
+ "metadata": {
159
+ "colab": {
160
+ "base_uri": "https://localhost:8080/",
161
+ "height": 35
162
+ },
163
+ "id": "bUSoL50c3A7X",
164
+ "outputId": "d99c9d2f-9bea-4906-80d8-a2eb91b770e5"
165
+ },
166
+ "outputs": [],
167
+ "source": [
168
+ "chips.loc[3598]['item_name']"
169
+ ]
170
+ },
171
+ {
172
+ "cell_type": "code",
173
+ "execution_count": null,
174
+ "metadata": {
175
+ "colab": {
176
+ "base_uri": "https://localhost:8080/"
177
+ },
178
+ "id": "jAH6N9vEjoRB",
179
+ "outputId": "ca0bf772-6c0a-4100-b930-5003d390c3f3"
180
+ },
181
+ "outputs": [],
182
+ "source": [
183
+ "#Explore oreder id as : - Each order id ordered how many items\n",
184
+ "chips.groupby('order_id')['quantity'].sum().head(20)"
185
+ ]
186
+ },
187
+ {
188
+ "cell_type": "markdown",
189
+ "metadata": {
190
+ "id": "_0MD-W-R40AO"
191
+ },
192
+ "source": [
193
+ "https://datascience.fm/pandas-for-simple-data-analysys/"
194
+ ]
195
+ },
196
+ {
197
+ "cell_type": "markdown",
198
+ "metadata": {
199
+ "id": "qJlmbndSyfW0"
200
+ },
201
+ "source": [
202
+ "Order ID: A unique identifier for each order.\n",
203
+ "Quantity: The number of items ordered.\n",
204
+ "Item Name: The name of the menu item.\n",
205
+ "Choice Description: Additional details or customization options for the ordered item.\n",
206
+ "Item Price: The price of the individual item.\n",
207
+ "Order Date: The date and time when the order was placed.\n",
208
+ "Total: The total cost of the order."
209
+ ]
210
+ },
211
+ {
212
+ "cell_type": "code",
213
+ "execution_count": null,
214
+ "metadata": {
215
+ "colab": {
216
+ "base_uri": "https://localhost:8080/"
217
+ },
218
+ "id": "GBinyLCCygzd",
219
+ "outputId": "9f3fa4d1-9bf4-4673-e37e-c4136d81e7ea"
220
+ },
221
+ "outputs": [],
222
+ "source": [
223
+ "# finding is there any columns present in our data\n",
224
+ "chips.isnull().sum()"
225
+ ]
226
+ },
227
+ {
228
+ "cell_type": "code",
229
+ "execution_count": null,
230
+ "metadata": {
231
+ "colab": {
232
+ "base_uri": "https://localhost:8080/"
233
+ },
234
+ "id": "dM82Vf8ly2jL",
235
+ "outputId": "e91b122b-1b3e-401f-c4f8-9a9b1aa0d54b"
236
+ },
237
+ "outputs": [],
238
+ "source": [
239
+ "chips.index"
240
+ ]
241
+ },
242
+ {
243
+ "cell_type": "code",
244
+ "execution_count": null,
245
+ "metadata": {
246
+ "colab": {
247
+ "base_uri": "https://localhost:8080/",
248
+ "height": 449
249
+ },
250
+ "id": "1_kxS-uNy5jS",
251
+ "outputId": "6932e070-cfef-4c06-ed06-38896291c0f6"
252
+ },
253
+ "outputs": [],
254
+ "source": [
255
+ "#Which was the most ordered item? and How many items were ordered?\n",
256
+ "c = chips.groupby('item_name')\n",
257
+ "c = c.sum()\n",
258
+ "c = c.sort_values(['quantity'], ascending=False)\n",
259
+ "c.head(10)"
260
+ ]
261
+ },
262
+ {
263
+ "cell_type": "code",
264
+ "execution_count": null,
265
+ "metadata": {
266
+ "colab": {
267
+ "base_uri": "https://localhost:8080/",
268
+ "height": 363
269
+ },
270
+ "id": "nBP7cVNa0Dif",
271
+ "outputId": "ee795041-c866-49a2-8583-e3aa66dee4fa"
272
+ },
273
+ "outputs": [],
274
+ "source": [
275
+ "import pandas as pd\n",
276
+ "dt = pd.DataFrame({'id' : [1,2,2,2,3,3,3,4,5,6] ,\n",
277
+ " 'order count' : [1,2,1,3,1,1,2,1,2,1]})\n",
278
+ "dt"
279
+ ]
280
+ },
281
+ {
282
+ "cell_type": "code",
283
+ "execution_count": null,
284
+ "metadata": {
285
+ "colab": {
286
+ "base_uri": "https://localhost:8080/"
287
+ },
288
+ "id": "GzwktX2e2QW3",
289
+ "outputId": "96a044ff-6e27-41a4-9ef6-af3013117041"
290
+ },
291
+ "outputs": [],
292
+ "source": [
293
+ "dt['id'].value_counts()"
294
+ ]
295
+ },
296
+ {
297
+ "cell_type": "code",
298
+ "execution_count": null,
299
+ "metadata": {
300
+ "colab": {
301
+ "base_uri": "https://localhost:8080/"
302
+ },
303
+ "id": "1QDYXTOa2Ysh",
304
+ "outputId": "0e7c485a-b822-4dbc-a557-452b998a9bb5"
305
+ },
306
+ "outputs": [],
307
+ "source": [
308
+ "dt.groupby('id')['order count'].sum()"
309
+ ]
310
+ },
311
+ {
312
+ "cell_type": "code",
313
+ "execution_count": null,
314
+ "metadata": {
315
+ "id": "GCKreTOi_oFA"
316
+ },
317
+ "outputs": [],
318
+ "source": [
319
+ "https://builtin.com/software-engineering-perspectives/pandas-iloc\n",
320
+ "\n",
321
+ "https://www.geeksforgeeks.org/difference-between-loc-and-iloc-in-pandas-dataframe/"
322
+ ]
323
+ },
324
+ {
325
+ "cell_type": "code",
326
+ "execution_count": null,
327
+ "metadata": {
328
+ "id": "2UTru4cnmJFi"
329
+ },
330
+ "outputs": [],
331
+ "source": [
332
+ "TI=pd.read_csv('TRAIN1.csv')"
333
+ ]
334
+ },
335
+ {
336
+ "cell_type": "code",
337
+ "execution_count": null,
338
+ "metadata": {
339
+ "colab": {
340
+ "base_uri": "https://localhost:8080/",
341
+ "height": 503
342
+ },
343
+ "id": "Dlm6gTAEnKGO",
344
+ "outputId": "0efc16b3-96eb-4729-c4b3-9e19baee8e04"
345
+ },
346
+ "outputs": [],
347
+ "source": [
348
+ "TI.head(5)"
349
+ ]
350
+ },
351
+ {
352
+ "cell_type": "code",
353
+ "execution_count": null,
354
+ "metadata": {
355
+ "id": "q7XM07H2ned5"
356
+ },
357
+ "outputs": [],
358
+ "source": []
359
+ },
360
+ {
361
+ "cell_type": "code",
362
+ "execution_count": null,
363
+ "metadata": {
364
+ "colab": {
365
+ "base_uri": "https://localhost:8080/"
366
+ },
367
+ "id": "tMWfUl4uoeuA",
368
+ "outputId": "3252dae9-17c8-44d0-ccdf-1075cebc2097"
369
+ },
370
+ "outputs": [],
371
+ "source": [
372
+ "TI.info()"
373
+ ]
374
+ },
375
+ {
376
+ "cell_type": "code",
377
+ "execution_count": null,
378
+ "metadata": {
379
+ "colab": {
380
+ "base_uri": "https://localhost:8080/",
381
+ "height": 825
382
+ },
383
+ "id": "PFovvtAyojjo",
384
+ "outputId": "e984ce14-e828-45ec-e01c-b2f21b4cd7a4"
385
+ },
386
+ "outputs": [],
387
+ "source": [
388
+ "TI.loc[TI['Sex']=='male'] #to get list of male passengers"
389
+ ]
390
+ },
391
+ {
392
+ "cell_type": "code",
393
+ "execution_count": null,
394
+ "metadata": {
395
+ "colab": {
396
+ "base_uri": "https://localhost:8080/",
397
+ "height": 382
398
+ },
399
+ "id": "5fJ239izo6ib",
400
+ "outputId": "4128a4d6-f941-433a-d3cd-c40948043eb8"
401
+ },
402
+ "outputs": [],
403
+ "source": [
404
+ "titanic=TI\n",
405
+ "titanic.loc[(titanic['Sex']=='male') & (titanic['Embarked']=='S')].head()"
406
+ ]
407
+ },
408
+ {
409
+ "cell_type": "code",
410
+ "execution_count": null,
411
+ "metadata": {
412
+ "colab": {
413
+ "base_uri": "https://localhost:8080/",
414
+ "height": 237
415
+ },
416
+ "id": "DjAfl0z8qLt1",
417
+ "outputId": "dc93282a-9baf-4d9e-e5df-7877da0e16f7"
418
+ },
419
+ "outputs": [],
420
+ "source": [
421
+ "titanic.loc[0:5,['Sex','Age']]"
422
+ ]
423
+ },
424
+ {
425
+ "cell_type": "code",
426
+ "execution_count": null,
427
+ "metadata": {
428
+ "colab": {
429
+ "base_uri": "https://localhost:8080/",
430
+ "height": 174
431
+ },
432
+ "id": "CqicL_wcqc2M",
433
+ "outputId": "e2a2747a-b1f6-441e-c851-b501fa2d3859"
434
+ },
435
+ "outputs": [],
436
+ "source": [
437
+ "titanic.iloc[0:4,2:5]"
438
+ ]
439
+ },
440
+ {
441
+ "cell_type": "code",
442
+ "execution_count": null,
443
+ "metadata": {
444
+ "colab": {
445
+ "base_uri": "https://localhost:8080/",
446
+ "height": 331
447
+ },
448
+ "id": "He6iEhi2qhYm",
449
+ "outputId": "75b67880-3620-4a93-c36b-396f341ca199"
450
+ },
451
+ "outputs": [],
452
+ "source": [
453
+ "# importing the module\n",
454
+ "import pandas as pd\n",
455
+ "\n",
456
+ "# creating a sample dataframe\n",
457
+ "data = pd.DataFrame({'Brand': ['Maruti', 'Hyundai', 'Tata',\n",
458
+ "\t\t\t\t\t\t\t'Mahindra', 'Maruti', 'Hyundai',\n",
459
+ "\t\t\t\t\t\t\t'Renault', 'Tata', 'Maruti'],\n",
460
+ "\t\t\t\t\t'Year': [2012, 2014, 2011, 2015, 2012,\n",
461
+ "\t\t\t\t\t\t\t2016, 2014, 2018, 2019],\n",
462
+ "\t\t\t\t\t'Kms Driven': [50000, 30000, 60000,\n",
463
+ "\t\t\t\t\t\t\t\t\t25000, 10000, 46000,\n",
464
+ "\t\t\t\t\t\t\t\t\t31000, 15000, 12000],\n",
465
+ "\t\t\t\t\t'City': ['Gurgaon', 'Delhi', 'Mumbai',\n",
466
+ "\t\t\t\t\t\t\t'Delhi', 'Mumbai', 'Delhi',\n",
467
+ "\t\t\t\t\t\t\t'Mumbai', 'Chennai', 'Ghaziabad'],\n",
468
+ "\t\t\t\t\t'Mileage': [28, 27, 25, 26, 28,\n",
469
+ "\t\t\t\t\t\t\t\t29, 24, 21, 24]})\n",
470
+ "\n",
471
+ "# displaying the DataFrame\n",
472
+ "display(data)\n"
473
+ ]
474
+ },
475
+ {
476
+ "cell_type": "code",
477
+ "execution_count": null,
478
+ "metadata": {
479
+ "colab": {
480
+ "base_uri": "https://localhost:8080/",
481
+ "height": 112
482
+ },
483
+ "id": "M3Kx0Fc9q61W",
484
+ "outputId": "3693ebf1-957a-4604-f337-4d81f2808f1f"
485
+ },
486
+ "outputs": [],
487
+ "source": [
488
+ "# selecting cars with brand 'Maruti' and Mileage > 25\n",
489
+ "display(data.loc[(data.Brand == 'Maruti') & (data.Mileage > 25)])"
490
+ ]
491
+ },
492
+ {
493
+ "cell_type": "code",
494
+ "execution_count": null,
495
+ "metadata": {
496
+ "colab": {
497
+ "base_uri": "https://localhost:8080/",
498
+ "height": 174
499
+ },
500
+ "id": "tO82gnyMrI58",
501
+ "outputId": "10c47a2e-bf5a-4d8c-9017-a59b9c9da8cf"
502
+ },
503
+ "outputs": [],
504
+ "source": [
505
+ "# selecting range of rows from 2 to 5\n",
506
+ "display(data.loc[2: 5])"
507
+ ]
508
+ },
509
+ {
510
+ "cell_type": "code",
511
+ "execution_count": null,
512
+ "metadata": {
513
+ "colab": {
514
+ "base_uri": "https://localhost:8080/",
515
+ "height": 331
516
+ },
517
+ "id": "vOazh8vUrRTR",
518
+ "outputId": "ca85cb51-427a-4398-f0af-d12d40aa3a35"
519
+ },
520
+ "outputs": [],
521
+ "source": [
522
+ "# updating values of Mileage if Year < 2015\n",
523
+ "data.loc[(data.Year < 2015), ['Mileage']] = 22\n",
524
+ "display(data)"
525
+ ]
526
+ },
527
+ {
528
+ "cell_type": "code",
529
+ "execution_count": null,
530
+ "metadata": {
531
+ "colab": {
532
+ "base_uri": "https://localhost:8080/",
533
+ "height": 174
534
+ },
535
+ "id": "QMZcapyCrWWU",
536
+ "outputId": "32bd940a-938b-41fd-f4c5-4bed9ddfd9cc"
537
+ },
538
+ "outputs": [],
539
+ "source": [
540
+ "\n",
541
+ "# selecting 0th, 2nd, 4th, and 7th index rows\n",
542
+ "display(data.iloc[[0, 2, 4, 7]])"
543
+ ]
544
+ },
545
+ {
546
+ "cell_type": "code",
547
+ "execution_count": null,
548
+ "metadata": {
549
+ "colab": {
550
+ "base_uri": "https://localhost:8080/",
551
+ "height": 174
552
+ },
553
+ "id": "AnwKkMkvrZrt",
554
+ "outputId": "c183117f-5a13-4484-945f-cba21854d475"
555
+ },
556
+ "outputs": [],
557
+ "source": [
558
+ "# selecting rows from 1 to 4 and columns from 2 to 4\n",
559
+ "display(data.iloc[1: 5, 2: 5])"
560
+ ]
561
+ },
562
+ {
563
+ "cell_type": "code",
564
+ "execution_count": null,
565
+ "metadata": {
566
+ "colab": {
567
+ "base_uri": "https://localhost:8080/",
568
+ "height": 1000
569
+ },
570
+ "id": "YBHhMDsgsC4d",
571
+ "outputId": "8599d99a-9522-47a8-8adf-1f107303e673"
572
+ },
573
+ "outputs": [],
574
+ "source": [
575
+ "# Bar plot\n",
576
+ "import seaborn as sns\n",
577
+ "import matplotlib.pyplot as plt\n",
578
+ "\n",
579
+ "df=titanic\n",
580
+ "sns.countplot(x='Survived', data=df)\n",
581
+ "plt.xlabel('Survival Status')\n",
582
+ "plt.ylabel('Count')\n",
583
+ "plt.title('Survival Count')\n",
584
+ "plt.show()\n",
585
+ "\n",
586
+ "# Histogram\n",
587
+ "plt.hist(df['Age'], bins=10)\n",
588
+ "plt.xlabel('Age')\n",
589
+ "plt.ylabel('Frequency')\n",
590
+ "plt.title('Distribution of Age')\n",
591
+ "plt.show()\n",
592
+ "\n",
593
+ "# Scatter plot\n",
594
+ "plt.scatter(df['Age'], df['Fare'])\n",
595
+ "plt.xlabel('Age')\n",
596
+ "plt.ylabel('Fare')\n",
597
+ "plt.title('Age vs. Fare')\n",
598
+ "plt.show()\n",
599
+ "\n",
600
+ "# Box plot\n",
601
+ "sns.boxplot(x=df['Survived'], y=df['Fare'])\n",
602
+ "plt.xlabel('Survival Status')\n",
603
+ "plt.ylabel('Fare')\n",
604
+ "plt.title('Survival Status vs. Fare')\n",
605
+ "plt.show()"
606
+ ]
607
+ }
608
+ ],
609
+ "metadata": {
610
+ "colab": {
611
+ "provenance": []
612
+ },
613
+ "kernelspec": {
614
+ "display_name": "Python 3 (ipykernel)",
615
+ "language": "python",
616
+ "name": "python3"
617
+ },
618
+ "language_info": {
619
+ "codemirror_mode": {
620
+ "name": "ipython",
621
+ "version": 3
622
+ },
623
+ "file_extension": ".py",
624
+ "mimetype": "text/x-python",
625
+ "name": "python",
626
+ "nbconvert_exporter": "python",
627
+ "pygments_lexer": "ipython3",
628
+ "version": "3.12.4"
629
+ }
630
+ },
631
+ "nbformat": 4,
632
+ "nbformat_minor": 4
633
+ }