scypyy 0.7.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
scypyy-0.7.0/PKG-INFO ADDED
@@ -0,0 +1,32 @@
1
+ Metadata-Version: 2.4
2
+ Name: scypyy
3
+ Version: 0.7.0
4
+ Summary: A curated collection.
5
+ License-Expression: MIT
6
+ Project-URL: Homepage, https://google.com
7
+ Classifier: Programming Language :: Python :: 3
8
+ Classifier: Operating System :: OS Independent
9
+ Classifier: Intended Audience :: Science/Research
10
+ Classifier: Topic :: Scientific/Engineering
11
+ Requires-Python: >=3.8
12
+ Description-Content-Type: text/markdown
13
+ Dynamic: requires-python
14
+
15
+ # scypyy
16
+
17
+ A curated collection.
18
+ ## Installation
19
+
20
+ ```bash
21
+ pip install scypyy
22
+ ```
23
+
24
+ ## Usage
25
+
26
+ ```python
27
+ import scypyy
28
+
29
+ print(scypyy.get())
30
+ ```
31
+
32
+ This prints a help.
scypyy-0.7.0/README.md ADDED
@@ -0,0 +1,18 @@
1
+ # scypyy
2
+
3
+ A curated collection.
4
+ ## Installation
5
+
6
+ ```bash
7
+ pip install scypyy
8
+ ```
9
+
10
+ ## Usage
11
+
12
+ ```python
13
+ import scypyy
14
+
15
+ print(scypyy.get())
16
+ ```
17
+
18
+ This prints a help.
@@ -0,0 +1,20 @@
1
+ [build-system]
2
+ requires = ["setuptools>=68", "wheel"]
3
+ build-backend = "setuptools.build_meta"
4
+
5
+ [project]
6
+ name = "scypyy"
7
+ version = "0.7.0"
8
+ description = "A curated collection."
9
+ readme = "README.md"
10
+ license = "MIT"
11
+ requires-python = ">=3.8"
12
+ classifiers = [
13
+ "Programming Language :: Python :: 3",
14
+ "Operating System :: OS Independent",
15
+ "Intended Audience :: Science/Research",
16
+ "Topic :: Scientific/Engineering",
17
+ ]
18
+
19
+ [project.urls]
20
+ Homepage = "https://google.com"
@@ -0,0 +1,5 @@
1
+ """scypyy - A curated collection of common Python imports for data science."""
2
+
3
+ from .core import get
4
+
5
+ __all__ = ["get"]
@@ -0,0 +1,1109 @@
1
+ """Core module containing curated import strings and code templates by topic."""
2
+
3
+
4
+ class _Get:
5
+ """Namespace for categorized import and code helpers."""
6
+
7
+ def libs(self):
8
+ """All commonly used imports for data science, ML, stats, and visualization."""
9
+ print("""\
10
+ import numpy as np
11
+ import pandas as pd
12
+
13
+ import matplotlib.pyplot as plt
14
+ import seaborn as sns
15
+
16
+ from scipy import stats
17
+ from scipy.stats import norm # Z-test (normal distribution)
18
+ from scipy.stats import t # t-test (t-distribution)
19
+ from scipy.stats import binom # binomial distribution
20
+ from scipy.stats import poisson # poisson distribution
21
+ from scipy.stats import bernoulli # bernoulli distribution
22
+ from scipy.optimize import minimize # optimization
23
+ from scipy.special import factorial # factorial function
24
+ from numpy import maximum # element-wise max
25
+
26
+ from statistics import stdev # sample standard deviation
27
+
28
+ import statsmodels.api as sm
29
+ import statsmodels.formula.api as smf # smf.ols(), smf.logit()
30
+ from statsmodels.formula.api import ols # for ANOVA
31
+ from statsmodels.stats.proportion import proportions_ztest # proportion z-test
32
+ from statsmodels.stats.outliers_influence import variance_inflation_factor # VIF
33
+ from statsmodels.tools.tools import add_constant
34
+ import statsmodels.stats.multicomp as mc # multiple comparison (Tukey)
35
+ from statsmodels.multivariate.manova import MANOVA # MANOVA
36
+
37
+ import sklearn.linear_model as lm
38
+ from sklearn.linear_model import LinearRegression
39
+ from sklearn.linear_model import Ridge, RidgeCV
40
+ from sklearn.linear_model import Lasso, LassoCV
41
+ from sklearn.linear_model import LogisticRegression
42
+
43
+ from sklearn import preprocessing
44
+ from sklearn.preprocessing import scale
45
+ from sklearn.preprocessing import StandardScaler
46
+ from sklearn.preprocessing import LabelEncoder
47
+ from sklearn.preprocessing import OneHotEncoder
48
+
49
+ from sklearn.model_selection import train_test_split
50
+ from sklearn.model_selection import KFold
51
+ from sklearn.model_selection import cross_val_score
52
+
53
+ from sklearn.metrics import mean_squared_error
54
+ from sklearn.metrics import confusion_matrix
55
+ from sklearn.metrics import accuracy_score
56
+
57
+ df = sns.load_dataset('dataset_Asked')""")
58
+
59
+ def hypothesis(self):
60
+ """Hypothesis testing — all test types with examples."""
61
+ print("""\
62
+ ## ============================================================
63
+ ## ALL HYPOTHESIS TESTING CODES - LAB 4 TO LAB 7
64
+ ## ============================================================
65
+
66
+ import numpy as np
67
+ from scipy import stats
68
+ from scipy.stats import norm
69
+ from scipy.stats import t
70
+ from statistics import stdev
71
+ from statsmodels.stats.proportion import proportions_ztest
72
+
73
+
74
+ ## ============================================================
75
+ ## 1. ONE-SAMPLE Z-TEST (TWO-SIDED)
76
+ ## Population SD (sigma) KNOWN
77
+ ## Keywords: "differ", "same", "equal", "changed"
78
+ ## ============================================================
79
+
80
+ mu = 84
81
+ xbar = 81.5
82
+ sigma = 10
83
+ n = 75
84
+ alpha = 0.01
85
+
86
+ se = sigma / np.sqrt(n)
87
+ Z_cal = (xbar - mu) / se
88
+
89
+ Z_pos = norm.ppf(1 - alpha/2)
90
+ Z_neg = norm.ppf(alpha/2)
91
+
92
+ print("=== 1. One-Sample Z-Test (Two-Sided) ===")
93
+ print(f"Z_cal = {Z_cal}")
94
+ print(f"Z_table = [{Z_neg}, {Z_pos}]")
95
+ print(f"p-value = {norm.cdf(Z_cal)}")
96
+ print(f"CI = [{mu + Z_neg * se}, {mu + Z_pos * se}]")
97
+ # Accept H0 if |Z_cal| < |Z_table|
98
+ # Accept H0 if p-value > alpha
99
+ # Accept H0 if xbar inside CI
100
+
101
+
102
+ ## ============================================================
103
+ ## 2. ONE-SAMPLE Z-TEST (ONE-SIDED LEFT)
104
+ ## Keywords: "less", "lower", "decreased"
105
+ ## ============================================================
106
+
107
+ mu = 14500
108
+ xbar = 13000
109
+ sigma = 2100
110
+ n = 25
111
+ alpha = 0.01
112
+
113
+ se = sigma / np.sqrt(n)
114
+ Z_cal = (xbar - mu) / se
115
+
116
+ Z_neg = norm.ppf(alpha)
117
+
118
+ print("=== 2. One-Sample Z-Test (One-Sided Left) ===")
119
+ print(f"Z_cal = {Z_cal}")
120
+ print(f"Z_neg = {Z_neg}")
121
+ print(f"p-value = {norm.cdf(Z_cal)}")
122
+ print(f"Boundary = {mu + Z_neg * se}")
123
+ # Reject H0 if Z_cal < Z_neg
124
+ # Reject H0 if p-value < alpha
125
+ # Reject H0 if xbar < boundary
126
+
127
+
128
+ ## ============================================================
129
+ ## 3. ONE-SAMPLE Z-TEST (ONE-SIDED RIGHT)
130
+ ## Keywords: "greater", "higher", "increased"
131
+ ## ============================================================
132
+
133
+ mu = 50
134
+ xbar = 55
135
+ sigma = 10
136
+ n = 36
137
+ alpha = 0.05
138
+
139
+ se = sigma / np.sqrt(n)
140
+ Z_cal = (xbar - mu) / se
141
+
142
+ Z_pos = norm.ppf(1 - alpha)
143
+
144
+ print("=== 3. One-Sample Z-Test (One-Sided Right) ===")
145
+ print(f"Z_cal = {Z_cal}")
146
+ print(f"Z_pos = {Z_pos}")
147
+ print(f"p-value = {1 - norm.cdf(Z_cal)}")
148
+ print(f"Boundary = {mu + Z_pos * se}")
149
+ # Reject H0 if Z_cal > Z_pos
150
+ # Reject H0 if p-value < alpha
151
+ # Reject H0 if xbar > boundary
152
+
153
+
154
+ ## ============================================================
155
+ ## 4. ONE-SAMPLE t-TEST (TWO-SIDED)
156
+ ## Population SD UNKNOWN, use sample SD
157
+ ## Keywords: "differ", "same", "equal"
158
+ ## ============================================================
159
+
160
+ mu = 100
161
+ xbar = 95.8
162
+ s = 17.5
163
+ n = 100
164
+ df = n - 1
165
+ alpha = 0.05
166
+
167
+ se = s / np.sqrt(n)
168
+ t_cal = (xbar - mu) / se
169
+
170
+ t_pos = t.ppf(1 - alpha/2, df)
171
+ t_neg = t.ppf(alpha/2, df)
172
+
173
+ print("=== 4. One-Sample t-Test (Two-Sided) ===")
174
+ print(f"t_cal = {t_cal}")
175
+ print(f"t_table = [{t_neg}, {t_pos}]")
176
+ print(f"p-value = {t.cdf(t_cal, df) * 2}")
177
+ print(f"CI = [{mu + t_neg * se}, {mu + t_pos * se}]")
178
+ # Reject H0 if |t_cal| > |t_table|
179
+ # Reject H0 if p-value < alpha
180
+ # Reject H0 if xbar outside CI
181
+
182
+
183
+ ## ============================================================
184
+ ## 5. ONE-SAMPLE t-TEST (ONE-SIDED LEFT)
185
+ ## Keywords: "less", "lower", "decreased"
186
+ ## ============================================================
187
+
188
+ mu = 100
189
+ xbar = 95.8
190
+ s = 17.5
191
+ n = 100
192
+ df = n - 1
193
+ alpha = 0.05
194
+
195
+ se = s / np.sqrt(n)
196
+ t_cal = (xbar - mu) / se
197
+
198
+ t_neg = t.ppf(alpha, df)
199
+
200
+ print("=== 5. One-Sample t-Test (One-Sided Left) ===")
201
+ print(f"t_cal = {t_cal}")
202
+ print(f"t_neg = {t_neg}")
203
+ print(f"p-value = {t.cdf(t_cal, df)}")
204
+ print(f"Boundary = {mu + t_neg * se}")
205
+ # Reject H0 if t_cal < t_neg
206
+ # Reject H0 if p-value < alpha
207
+ # Reject H0 if xbar < boundary
208
+
209
+
210
+ ## ============================================================
211
+ ## 6. ONE-SAMPLE t-TEST (ONE-SIDED RIGHT)
212
+ ## Keywords: "greater", "higher", "increased"
213
+ ## ============================================================
214
+
215
+ mu = 100
216
+ xbar = 105
217
+ s = 17.5
218
+ n = 100
219
+ df = n - 1
220
+ alpha = 0.05
221
+
222
+ se = s / np.sqrt(n)
223
+ t_cal = (xbar - mu) / se
224
+
225
+ t_pos = t.ppf(1 - alpha, df)
226
+
227
+ print("=== 6. One-Sample t-Test (One-Sided Right) ===")
228
+ print(f"t_cal = {t_cal}")
229
+ print(f"t_pos = {t_pos}")
230
+ print(f"p-value = {1 - t.cdf(t_cal, df)}")
231
+ print(f"Boundary = {mu + t_pos * se}")
232
+ # Reject H0 if t_cal > t_pos
233
+ # Reject H0 if p-value < alpha
234
+ # Reject H0 if xbar > boundary
235
+
236
+
237
+ ## ============================================================
238
+ ## 7. ONE-SAMPLE t-TEST USING BUILT-IN (ttest_1samp)
239
+ ## When you have RAW DATA (not xbar and s)
240
+ ## ============================================================
241
+
242
+ X = np.array([10, 12, 13, 14, 15, 2, 7, 8])
243
+
244
+ print("=== 7. ttest_1samp (Built-in) ===")
245
+ print("Two-sided:", stats.ttest_1samp(X, popmean=12, alternative='two-sided'))
246
+ print("Left: ", stats.ttest_1samp(X, popmean=12, alternative='less'))
247
+ print("Right: ", stats.ttest_1samp(X, popmean=12, alternative='greater'))
248
+ # Reject H0 if pvalue < alpha
249
+
250
+
251
+ ## ============================================================
252
+ ## 8. ONE-SAMPLE PROPORTION Z-TEST (TWO-SIDED)
253
+ ## Keywords: "differ", "same", "equal"
254
+ ## ============================================================
255
+
256
+ p = 0.15
257
+ pbar = 22 / 120
258
+ n = 120
259
+ alpha = 0.02
260
+
261
+ se = np.sqrt((p * (1 - p)) / n)
262
+ Z_cal = (pbar - p) / se
263
+
264
+ Z_pos = norm.ppf(1 - alpha/2)
265
+ Z_neg = norm.ppf(alpha/2)
266
+
267
+ print("=== 8. One-Sample Proportion Z-Test (Two-Sided) ===")
268
+ print(f"Z_cal = {Z_cal}")
269
+ print(f"Z_table = [{Z_neg}, {Z_pos}]")
270
+ print(f"p-value = {norm.cdf(Z_cal)}")
271
+ print(f"CI = [{p + Z_neg * se}, {p + Z_pos * se}]")
272
+ # Accept H0 if |Z_cal| < |Z_table|
273
+ # Accept H0 if p-value > alpha
274
+ # Accept H0 if pbar inside CI
275
+
276
+
277
+ ## ============================================================
278
+ ## 9. ONE-SAMPLE PROPORTION Z-TEST (ONE-SIDED RIGHT)
279
+ ## Keywords: "increased", "greater", "more"
280
+ ## ============================================================
281
+
282
+ p = 0.05
283
+ pbar = 335 / 6000
284
+ n = 6000
285
+ alpha = 0.02
286
+
287
+ se = np.sqrt((p * (1 - p)) / n)
288
+ Z_cal = (pbar - p) / se
289
+
290
+ Z_pos = norm.ppf(1 - alpha)
291
+
292
+ print("=== 9. One-Sample Proportion Z-Test (One-Sided Right) ===")
293
+ print(f"Z_cal = {Z_cal}")
294
+ print(f"Z_pos = {Z_pos}")
295
+ print(f"p-value = {1 - norm.cdf(Z_cal)}")
296
+ print(f"Boundary = {p + Z_pos * se}")
297
+ # Reject H0 if Z_cal > Z_pos
298
+ # Reject H0 if p-value < alpha
299
+ # Reject H0 if pbar > boundary
300
+
301
+
302
+ ## ============================================================
303
+ ## 10. ONE-SAMPLE PROPORTION USING BUILT-IN (proportions_ztest)
304
+ ## ============================================================
305
+
306
+ print("=== 10. proportions_ztest (Built-in) ===")
307
+ print("Two-sided:", proportions_ztest(count=22, nobs=120, value=0.15, alternative='two-sided'))
308
+ print("Right: ", proportions_ztest(count=335, nobs=6000, value=0.05, alternative='larger'))
309
+ print("Left: ", proportions_ztest(count=22, nobs=120, value=0.15, alternative='smaller'))
310
+ # alternative: 'two-sided', 'smaller', 'larger'
311
+ # Reject H0 if pvalue < alpha
312
+
313
+
314
+ ## ============================================================
315
+ ## 11. TWO-SAMPLE Z-TEST FOR MEANS (TWO-SIDED)
316
+ ## Both sigma known
317
+ ## Keywords: "differ", "same", "equal"
318
+ ## ============================================================
319
+
320
+ xbar1, xbar2 = 86, 82
321
+ sigma1, sigma2 = 6, 9
322
+ n1, n2 = 60, 75
323
+ alpha = 0.01
324
+
325
+ se = np.sqrt(sigma1**2/n1 + sigma2**2/n2)
326
+ Z_cal = ((xbar1 - xbar2) - 0) / se
327
+
328
+ Z_pos = norm.ppf(1 - alpha/2)
329
+ Z_neg = norm.ppf(alpha/2)
330
+
331
+ print("=== 11. Two-Sample Z-Test Means (Two-Sided) ===")
332
+ print(f"Z_cal = {Z_cal}")
333
+ print(f"Z_table = [{Z_neg}, {Z_pos}]")
334
+ print(f"p-value = {(1 - norm.cdf(Z_cal)) * 2}")
335
+ print(f"CI = [{0 + Z_neg * se}, {0 + Z_pos * se}]")
336
+ # Reject H0 if |Z_cal| > |Z_table|
337
+ # Reject H0 if p-value < alpha
338
+ # Reject H0 if (xbar1-xbar2) outside CI
339
+
340
+
341
+ ## ============================================================
342
+ ## 12. TWO-SAMPLE Z-TEST FOR MEANS (ONE-SIDED RIGHT)
343
+ ## Keywords: "greater", "higher", "increased"
344
+ ## ============================================================
345
+
346
+ xbar1, xbar2 = 13.8, 9.1
347
+ sigma1, sigma2 = 18.9, 8.7
348
+ n1, n2 = 41, 35
349
+ alpha = 0.10
350
+
351
+ se = np.sqrt(sigma1**2/n1 + sigma2**2/n2)
352
+ Z_cal = ((xbar1 - xbar2) - 0) / se
353
+
354
+ Z_pos = norm.ppf(1 - alpha)
355
+
356
+ print("=== 12. Two-Sample Z-Test Means (One-Sided Right) ===")
357
+ print(f"Z_cal = {Z_cal}")
358
+ print(f"Z_pos = {Z_pos}")
359
+ print(f"p-value = {1 - norm.cdf(Z_cal)}")
360
+ print(f"Boundary = {0 + Z_pos * se}")
361
+ # Reject H0 if Z_cal > Z_pos
362
+ # Reject H0 if p-value < alpha
363
+ # Reject H0 if (xbar1-xbar2) > boundary
364
+
365
+
366
+ ## ============================================================
367
+ ## 13. TWO-SAMPLE t-TEST EQUAL VARIANCE (TWO-SIDED)
368
+ ## sigma unknown, use pooled SD
369
+ ## Keywords: "differ", "same", "equal"
370
+ ## ============================================================
371
+
372
+ X = np.array([10, 12, 13, 14, 15, 2, 7, 8])
373
+ # Using built-in
374
+ print("=== 13. Two-Sample t-Test Equal Var (Built-in) ===")
375
+ a = np.array([56, 128.6, 12, 123.8, 64.34, 78, 763.3])
376
+ b = np.array([1.1, 2.9, 4.2])
377
+ print("Equal var: ", stats.ttest_ind(a, b, equal_var=True, alternative='two-sided'))
378
+ print("Unequal var:", stats.ttest_ind(a, b, equal_var=False, alternative='two-sided'))
379
+ # Reject H0 if pvalue < alpha
380
+
381
+
382
+ ## ============================================================
383
+ ## 14. TWO-SAMPLE t-TEST EQUAL VARIANCE (MANUAL - ONE-SIDED)
384
+ ## Pooled SD formula
385
+ ## ============================================================
386
+
387
+ Bus = np.array([2.86, 2.77, 3.18, 2.80, 3.14, 2.87, 3.19, 3.24, 2.91, 3, 2.83])
388
+ As = np.array([3.35, 3.32, 3.36, 3.63, 3.41, 3.37, 3.45, 3.43, 3.44, 3.17, 3.26, 3.18, 3.41])
389
+
390
+ xbar1 = np.mean(Bus)
391
+ xbar2 = np.mean(As)
392
+ s1 = stdev(Bus)
393
+ s2 = stdev(As)
394
+ n1 = len(Bus)
395
+ n2 = len(As)
396
+ alpha = 0.02
397
+ hypothesized_diff = -0.25
398
+
399
+ # Pooled SD
400
+ sp = np.sqrt(((n1-1)*s1**2 + (n2-1)*s2**2) / (n1 + n2 - 2))
401
+ se = sp * np.sqrt(1/n1 + 1/n2)
402
+ df = n1 + n2 - 2
403
+
404
+ t_cal = ((xbar1 - xbar2) - hypothesized_diff) / se
405
+ t_neg = t.ppf(alpha, df)
406
+
407
+ print("=== 14. Two-Sample t-Test Pooled (One-Sided Left) ===")
408
+ print(f"Pooled SD = {sp}")
409
+ print(f"SE = {se}")
410
+ print(f"t_cal = {t_cal}")
411
+ print(f"t_neg = {t_neg}")
412
+ print(f"df = {df}")
413
+ print(f"p-value = {t.cdf(t_cal, df)}")
414
+ print(f"Boundary = {hypothesized_diff + t_neg * se}")
415
+ # Reject H0 if t_cal < t_neg
416
+ # Reject H0 if p-value < alpha
417
+
418
+
419
+ ## ============================================================
420
+ ## 15. TWO-SAMPLE t-TEST UNEQUAL VARIANCE (MANUAL - ONE-SIDED)
421
+ ## ============================================================
422
+
423
+ xbar1, xbar2 = 27.2, 32.4
424
+ s1, s2 = 3.8, 4.3
425
+ n1, n2 = 12, 9
426
+ alpha = 0.01
427
+
428
+ sp = np.sqrt(((n1-1)*s1**2 + (n2-1)*s2**2) / (n1 + n2 - 2))
429
+ se = sp * np.sqrt(1/n1 + 1/n2)
430
+ df = n1 + n2 - 2
431
+
432
+ t_cal = ((xbar1 - xbar2) - 0) / se
433
+ t_neg = t.ppf(alpha, df)
434
+
435
+ print("=== 15. Two-Sample t-Test Unequal Var (One-Sided Left) ===")
436
+ print(f"t_cal = {t_cal}")
437
+ print(f"t_neg = {t_neg}")
438
+ print(f"p-value = {t.cdf(t_cal, df)}")
439
+ print(f"Boundary = {0 + t_neg * se}")
440
+ # Reject H0 if t_cal < t_neg
441
+ # Reject H0 if p-value < alpha
442
+
443
+
444
+ ## ============================================================
445
+ ## 16. TWO-SAMPLE t-TEST USING BUILT-IN (ttest_ind)
446
+ ## ============================================================
447
+
448
+ a = np.array([56, 128.6, 12, 123.8, 64.34, 78, 763.3])
449
+ b = np.array([1.1, 2.9, 4.2])
450
+
451
+ print("=== 16. ttest_ind (Built-in) ===")
452
+ print("Two-sided equal: ", stats.ttest_ind(a, b, equal_var=True, alternative='two-sided'))
453
+ print("Two-sided unequal:", stats.ttest_ind(a, b, equal_var=False, alternative='two-sided'))
454
+ print("Left equal: ", stats.ttest_ind(a, b, equal_var=True, alternative='less'))
455
+ print("Right equal: ", stats.ttest_ind(a, b, equal_var=True, alternative='greater'))
456
+ # equal_var=True --> pooled (df = n1+n2-2)
457
+ # equal_var=False --> Welch's (fractional df)
458
+ # alternative: 'two-sided', 'less', 'greater'
459
+ # Reject H0 if pvalue < alpha
460
+
461
+
462
+ ## ============================================================
463
+ ## 17. TWO-SAMPLE PROPORTION Z-TEST (ONE-SIDED LEFT)
464
+ ## Keywords: "increased" but p1(old) - p2(new) < 0
465
+ ## ============================================================
466
+
467
+ n1, n2 = 400, 380
468
+ p1 = 166 / 400
469
+ p2 = 205 / 380
470
+ alpha = 0.01
471
+
472
+ phat = (n1*p1 + n2*p2) / (n1 + n2)
473
+ se = np.sqrt(phat * (1 - phat) * (1/n1 + 1/n2))
474
+ Z_cal = ((p1 - p2) - 0) / se
475
+
476
+ Z_neg = norm.ppf(alpha)
477
+
478
+ print("=== 17. Two-Sample Proportion Z-Test (One-Sided Left) ===")
479
+ print(f"Pooled proportion = {phat}")
480
+ print(f"Z_cal = {Z_cal}")
481
+ print(f"Z_neg = {Z_neg}")
482
+ print(f"p-value = {norm.cdf(Z_cal)}")
483
+ print(f"Boundary = {0 + Z_neg * se}")
484
+ # Reject H0 if Z_cal < Z_neg
485
+ # Reject H0 if p-value < alpha
486
+ # Reject H0 if (p1-p2) < boundary
487
+
488
+
489
+ ## ============================================================
490
+ ## 18. TWO-SAMPLE PROPORTION USING BUILT-IN (proportions_ztest)
491
+ ## ============================================================
492
+
493
+ count = np.array([166, 205])
494
+ nobs = np.array([400, 380])
495
+
496
+ print("=== 18. Two-Sample proportions_ztest (Built-in) ===")
497
+ stat, pval = proportions_ztest(count, nobs)
498
+ print(f"Two-sided: stat={stat:.4f}, pval={pval:.4f}")
499
+ # Reject H0 if pval < alpha
500
+
501
+
502
+ ## ============================================================
503
+ ## 19. PAIRED t-TEST (DEPENDENT SAMPLES) - MANUAL
504
+ ## Same subjects measured twice (before/after)
505
+ ## ============================================================
506
+
507
+ a = np.array([56, 128, 12, 123, 64, 78, 763])
508
+ b = np.array([46, 100, 5, 121, 54, 80, 700])
509
+
510
+ c = a - b
511
+ xbar = np.mean(c)
512
+ sample_sd = stdev(c)
513
+ n = len(c)
514
+ df = n - 1
515
+ alpha = 0.05
516
+
517
+ se = sample_sd / np.sqrt(n)
518
+ t_cal = (xbar - 0) / se
519
+
520
+ t_pos = t.ppf(1 - alpha/2, df)
521
+ t_neg = t.ppf(alpha/2, df)
522
+
523
+ print("=== 19. Paired t-Test Manual (Two-Sided) ===")
524
+ print(f"Differences = {list(c)}")
525
+ print(f"Mean diff = {xbar}")
526
+ print(f"t_cal = {t_cal}")
527
+ print(f"t_table = [{t_neg}, {t_pos}]")
528
+ print(f"p-value = {(1 - t.cdf(t_cal, df)) * 2}")
529
+ # Accept H0 if |t_cal| < |t_table|
530
+ # Accept H0 if p-value > alpha
531
+
532
+
533
+ ## ============================================================
534
+ ## 20. PAIRED t-TEST USING BUILT-IN (ttest_rel)
535
+ ## ============================================================
536
+
537
+ a = np.array([56, 128, 12, 123, 64, 78, 763])
538
+ b = np.array([46, 100, 5, 121, 54, 80, 700])
539
+
540
+ print("=== 20. ttest_rel (Built-in) ===")
541
+ print("Two-sided:", stats.ttest_rel(a, b, alternative='two-sided'))
542
+ print("Greater: ", stats.ttest_rel(a, b, alternative='greater'))
543
+ print("Less: ", stats.ttest_rel(a, b, alternative='less'))
544
+ # alternative: 'two-sided', 'less', 'greater'
545
+ # Reject H0 if pvalue < alpha""")
546
+
547
+ def mle(self):
548
+ """Maximum likelihood estimation — Normal, Binomial, Poisson."""
549
+ print("""\
550
+ import numpy as np
551
+ from scipy.stats import norm, poisson
552
+ from scipy.optimize import minimize
553
+ from math import factorial
554
+
555
+
556
+ ## ============================================================
557
+ ## 1. MLE - NORMAL DISTRIBUTION
558
+ ## ============================================================
559
+
560
+ def normal_dist(params,data):
561
+ mu, sd = params
562
+ log1 = norm.logpdf(data, mu, sd)
563
+ lll = -np.sum(log1)
564
+ return lll
565
+
566
+ initial_guess = [10, 3]
567
+ result = minimize(nll, initial_guess, args=(data,), bounds=((None, None), (1e-5, None)))
568
+ print("mean", result.x[0])
569
+ print("sd", result.x[1])
570
+
571
+
572
+ ## ============================================================
573
+ ## 2. MLE - BINOMIAL DISTRIBUTION
574
+ ## ============================================================
575
+
576
+ hospital_data = [
577
+ (100, 85),
578
+ (150, 120),
579
+ (200, 160),
580
+ (250, 210),
581
+ (300, 260)
582
+ ]
583
+
584
+ inta = [0.1]
585
+ def binomial_dist(p,n,k):
586
+ a1=factorial(n)/((factorial(n-k)*factorial(k)))
587
+ a2=(p**k)*((1-p)**(n-k))
588
+ ll = a1*a2
589
+ logll = np.log(ll)
590
+ return -logll
591
+ for n, k in hospital_data:
592
+ result = minimize(BLL, inta, args=(n,k), bounds=[(0.00005, 0.99995)])
593
+ print("prob", result.x[0])
594
+
595
+
596
+ ## ============================================================
597
+ ## 3. MLE - POISSON DISTRIBUTION
598
+ ## ============================================================
599
+
600
+ def poisson_dist(lam, x):
601
+ log=[]
602
+ for i in x:
603
+ logs = np.log(poisson.pmf(x[i], lam))
604
+ log.append(logs)
605
+ return np.sum(log)
606
+ lams = np.arange(0,12,1)
607
+ ls = []
608
+ for lam in lams:
609
+ lsa = pd(lam, data)
610
+ ls.append(lsa)
611
+ bestll = lams[np.argmax(ls)]
612
+ print(bestll)""")
613
+
614
+ def nonpara(self):
615
+ """Non-parametric tests — Mann-Whitney U, Kruskal-Wallis, Sign test."""
616
+ print("""\
617
+ import numpy as np
618
+ from scipy.stats import mannwhitneyu
619
+ from scipy import stats
620
+
621
+
622
+ ## ============================================================
623
+ ## 1. MANN-WHITNEY U TEST
624
+ ## ============================================================
625
+
626
+ a=np.array([])
627
+ b=np.array([])
628
+ u_stat,p_value=mannwhitneyu(a,b,alternative="")
629
+
630
+
631
+ ## ============================================================
632
+ ## 2. KRUSKAL-WALLIS H TEST
633
+ ## ============================================================
634
+
635
+ a=np.array([])
636
+ b=np.array([])
637
+ results=stats.kruskal(a,b)
638
+
639
+
640
+ ## ============================================================
641
+ ## 3. SIGN TEST
642
+ ## ============================================================
643
+
644
+ from statsmodels.stats.descriptivestats import sign_test
645
+ a=np.array([])
646
+ b=np.array([])
647
+ diff=a-b
648
+ stat_val,p_val=sign_test(diff)""")
649
+
650
+ def linearregression(self):
651
+ """Simple linear regression — sklearn, OLS summary, VIF, prediction, train-test."""
652
+ print("""\
653
+ import pandas as pd
654
+ import numpy as np
655
+ import seaborn as sns
656
+ import matplotlib.pyplot as plt
657
+ from sklearn.linear_model import LinearRegression
658
+ from sklearn.model_selection import (train_test_split,KFold,cross_val_score)
659
+ import statsmodels.formula.api as smf
660
+ from statsmodels.stats.outliers_influence import (variance_inflation_factor)
661
+ import statsmodels.api as sm
662
+
663
+ df = pd.read_csv('data.csv')
664
+ print(df.head())
665
+ print(df.info())
666
+ print(df.corr())
667
+ # Regplot
668
+ sns.regplot(x='FV1',y='targetvariable',data=df)
669
+ plt.show()
670
+
671
+ # SIMPLE LINEAR REGRESSION
672
+ lr = LinearRegression()
673
+ X = df['FV1'].values.reshape(-1,1)
674
+ Y = df['targetvariable']
675
+
676
+ #VIF
677
+ X_vif = pd.DataFrame(X,columns=['FV1'])
678
+ X_vif = sm.add_constant(X_vif)
679
+ vif = pd.DataFrame()
680
+ vif["Feature"] = X_vif.columns
681
+ vif["VIF"] = [variance_inflation_factor(X_vif.values,i)
682
+ for i in range(X_vif.shape[1])]
683
+ print(vif)
684
+
685
+ lr.fit(X,Y)
686
+ print("Intercept =", lr.intercept_)
687
+ print("Coefficient =", lr.coef_)
688
+ print(f"Equation: targetvariable = "f"{lr.intercept_:.4f} + "f"{lr.coef_[0]:.4f} * FV1")
689
+
690
+ # RSS
691
+ RSS = np.sum((lr.intercept_ +lr.coef_ * X -Y.values.reshape(-1,1))**2)
692
+ print("RSS =", RSS)
693
+
694
+ # SUMMARY
695
+ lm_fit = smf.ols('targetvariable ~ FV1',df).fit()
696
+ print(lm_fit.summary())
697
+
698
+ # PREDICTION
699
+ predictions = lm_fit.predict(pd.DataFrame({'FV1':[5,10,15]}))
700
+ print(predictions)
701
+
702
+ # TRAIN TEST SPLIT
703
+ X_train, X_test, Y_train, Y_test = train_test_split(X,Y,test_size=0.20,random_state=123)
704
+
705
+ lr.fit(X_train,Y_train)
706
+ pred_test = lr.predict(X_test)
707
+
708
+ # TEST MSE
709
+ mse_test = np.mean((Y_test - pred_test)**2)
710
+ print("Test MSE =", mse_test)
711
+ print("R-Squared =", lm_fit.rsquared)""")
712
+
713
+ def multilr(self):
714
+ """Multiple linear regression — sklearn, OLS, interaction, VIF, scaling, K-Fold."""
715
+ print("""\
716
+ # MULTIPLE LINEAR REGRESSION
717
+ import pandas as pd
718
+ import numpy as np
719
+ import seaborn as sns
720
+ import matplotlib.pyplot as plt
721
+ from sklearn.linear_model import LinearRegression
722
+ from sklearn.model_selection import (train_test_split,KFold,cross_val_score)
723
+ from sklearn.preprocessing import StandardScaler
724
+ import statsmodels.formula.api as smf
725
+ from statsmodels.stats.outliers_influence import (variance_inflation_factor)
726
+ import statsmodels.api as sm
727
+
728
+ df = pd.read_csv('data.csv')
729
+ print(df.head())
730
+ print(df.info())
731
+ print(df.corr())
732
+
733
+ sns.regplot(x='FV1',y='targetvariable',data=df)
734
+ plt.show()
735
+ sns.regplot(x='FV2',y='targetvariable',data=df)
736
+ plt.show()
737
+ sns.regplot(x='FV3',y='targetvariable',data=df)
738
+ plt.show()
739
+
740
+ X = df[['FV1','FV2','FV3']]
741
+ Y = df['targetvariable']
742
+
743
+ X_vif = sm.add_constant(X)
744
+ vif = pd.DataFrame()
745
+ vif["Feature"] = X_vif.columns
746
+ vif["VIF"] = [variance_inflation_factor(X_vif.values,i)
747
+ for i in range(X_vif.shape[1])]
748
+ print(vif)
749
+
750
+ X_train, X_test, Y_train, Y_test = train_test_split(X,Y,test_size=0.20,random_state=123)
751
+ lr = LinearRegression()
752
+ lr.fit(X_train,Y_train)
753
+ print("Intercept =", lr.intercept_)
754
+ print("Coefficients =", lr.coef_)
755
+ pred_test = lr.predict(X_test)
756
+ print(pred_test)
757
+ mse = np.mean((Y_test - pred_test)**2)
758
+ print("MSE =", mse)
759
+ r2_score = lr.score(X_test,Y_test)
760
+ print("R-Squared =", r2_score)
761
+
762
+ #1
763
+ lm_fit = smf.ols('targetvariable ~ FV1 + FV2 + FV3',df).fit()
764
+ print(lm_fit.summary())
765
+ print("R-Squared =", lm_fit.rsquared)
766
+ #2
767
+ lm_fit2 = smf.ols('targetvariable ~ FV1 * FV2',df).fit()
768
+ print(lm_fit2.summary())
769
+ print("R-Squared =", lm_fit.rsquared)
770
+
771
+ scaler = StandardScaler()
772
+ X_scaled = scaler.fit_transform(X)
773
+ model = LinearRegression()
774
+ kfold = KFold(10,random_state=0,shuffle=True)
775
+ mse_cv = cross_val_score(model,X,Y,cv=kfold,scoring='neg_mean_squared_error')
776
+ print("Average MSE =", np.mean(-mse_cv))
777
+ r2_cv = cross_val_score(model,X,Y,cv=kfold,scoring='r2')
778
+ print("R2 Score Per Fold =")
779
+ print(r2_cv)
780
+ print("Average Accuracy (R2 Score) =",np.mean(r2_cv))""")
781
+
782
+ def ridge(self):
783
+ """Ridge regression — with CV and train-test split."""
784
+ print("""\
785
+ import pandas as pd
786
+ import numpy as np
787
+ from sklearn.linear_model import (Ridge,RidgeCV)
788
+
789
+ from sklearn.model_selection import (train_test_split)
790
+
791
+ from sklearn.metrics import (mean_squared_error)
792
+
793
+ from sklearn.preprocessing import (StandardScaler)
794
+
795
+ df = pd.read_csv('data.csv')
796
+ print(df.head())
797
+ print(df.info())
798
+ print(df.corr())
799
+
800
+ X = df[['FV1','FV2','FV3','FV4']]
801
+ Y = df['targetvariable']
802
+ scaler = StandardScaler()
803
+ X_scaled = scaler.fit_transform(X)
804
+
805
+ X_train, X_test, Y_train, Y_test = train_test_split(X_scaled,Y,test_size=0.20,random_state=123)
806
+ ridge = Ridge(alpha=10)
807
+ ridge.fit(X_train,Y_train)
808
+
809
+ print("Ridge Coefficients =")
810
+ print(pd.Series(ridge.coef_,index=X.columns))
811
+
812
+ pred = ridge.predict(X_test)
813
+ print(pred)
814
+
815
+ mse = mean_squared_error(Y_test,pred)
816
+ print("Test MSE =", mse)
817
+
818
+ r2 = ridge.score(X_test,Y_test)
819
+ print("R-Squared =", r2)
820
+
821
+ alphas = 10 ** np.linspace(10,-2,100) * 0.5
822
+
823
+ ridgecv = RidgeCV(alphas=alphas)
824
+
825
+ ridgecv.fit(X_scaled,Y)
826
+
827
+ print("Best Alpha =",ridgecv.alpha_)
828
+ ridge_best = Ridge(alpha=ridgecv.alpha_)
829
+
830
+ ridge_best.fit(X_train,Y_train)
831
+ pred_best = ridge_best.predict(X_test)
832
+
833
+ mse_best = mean_squared_error(Y_test,pred_best)
834
+
835
+ print("Best Model Test MSE =",mse_best)
836
+ r2_best = ridge_best.score(X_test,Y_test)
837
+
838
+ print("Best Model R-Squared =",r2_best)""")
839
+
840
+ def lasso(self):
841
+ """Lasso regression — with LassoCV and train-test split."""
842
+ print("""\
843
+ import pandas as pd
844
+ import numpy as np
845
+
846
+ from sklearn.linear_model import (Lasso,LassoCV)
847
+ from sklearn.model_selection import (train_test_split)
848
+ from sklearn.metrics import (
849
+ mean_squared_error)
850
+ from sklearn.preprocessing import (StandardScaler)
851
+
852
+ df = pd.read_csv('data.csv')
853
+ print(df.head())
854
+ print(df.info())
855
+ print(df.corr())
856
+
857
+ X = df[['FV1','FV2','FV3','FV4']]
858
+ Y = df['targetvariable']
859
+ scaler = StandardScaler()
860
+ X_scaled = scaler.fit_transform(X)
861
+ X_train, X_test, Y_train, Y_test = train_test_split(X_scaled,Y,test_size=0.20,random_state=123)
862
+ lasso = Lasso(alpha=10)
863
+ lasso.fit(X_train,Y_train)
864
+
865
+ print("Lasso Coefficients =")
866
+ print(pd.Series(lasso.coef_,index=X.columns))
867
+
868
+ print("Notice: Lasso may set some coefficients to EXACTLY 0")
869
+ pred_lasso = lasso.predict(X_test)
870
+ print(pred_lasso)
871
+ mse = mean_squared_error(Y_test,pred_lasso)
872
+ print("Test MSE =", mse)
873
+ r2 = lasso.score(X_test,Y_test)
874
+ print("R-Squared =", r2)
875
+
876
+ alphas = 10 ** np.linspace(10,-2,100) * 0.5
877
+ lassocv = LassoCV(alphas=alphas)
878
+ lassocv.fit(X_train,Y_train)
879
+ print("Best Alpha =",lassocv.alpha_)
880
+ lasso_best = Lasso(alpha=lassocv.alpha_)
881
+
882
+ lasso_best.fit(X_train,Y_train)
883
+ pred_lasso_best = lasso_best.predict(X_test)
884
+ print("Lasso Best Model Coefficients =")
885
+ print(pd.Series(lasso_best.coef_,index=X.columns))
886
+ mse_best = mean_squared_error(Y_test,pred_lasso_best)
887
+ print("Best Model Test MSE =",mse_best)
888
+ r2_best = lasso_best.score(X_test,Y_test)
889
+ print("Best Model R-Squared =",r2_best)""")
890
+
891
+ def ridgelasso(self):
892
+ """Combined Ridge, Lasso, Linear Regression with VIF, OLS, and K-Fold."""
893
+ print("""\
894
+ import pandas as pd
895
+ import numpy as np
896
+ import seaborn as sns
897
+ import matplotlib.pyplot as plt
898
+
899
+ from sklearn.linear_model import (LinearRegression,Ridge,RidgeCV,Lasso,LassoCV)
900
+ from sklearn.model_selection import (train_test_split,KFold,cross_val_score)
901
+ from sklearn.metrics import (mean_squared_error)
902
+ from sklearn.preprocessing import (StandardScaler)
903
+ import statsmodels.formula.api as smf
904
+ from statsmodels.stats.outliers_influence import (variance_inflation_factor)
905
+ import statsmodels.api as sm
906
+ df = pd.read_csv('data.csv')
907
+ print(df.head())
908
+ print(df.info())
909
+ print(df.corr())
910
+
911
+ sns.regplot(x='FV1',y='targetvariable',data=df)
912
+ plt.show()
913
+ sns.regplot(x='FV2',y='targetvariable',data=df)
914
+ plt.show()
915
+ sns.regplot(x='FV3',y='targetvariable',data=df)
916
+ plt.show()
917
+
918
+ X = df[['FV1','FV2','FV3','FV4']]
919
+ Y = df['targetvariable']
920
+
921
+ X_vif = sm.add_constant(X)
922
+ vif = pd.DataFrame()
923
+ vif["Feature"] = X_vif.columns
924
+ vif["VIF"] = [variance_inflation_factor(X_vif.values,i)
925
+ for i in range(X_vif.shape[1])]
926
+ print(vif)
927
+
928
+ scaler = StandardScaler()
929
+ X_scaled = scaler.fit_transform(X)
930
+ X_train, X_test, Y_train, Y_test = train_test_split(X_scaled,Y,test_size=0.20,random_state=123)
931
+ lr = LinearRegression()
932
+ lr.fit(X_train,Y_train)
933
+ print("Intercept =", lr.intercept_)
934
+ print("Coefficients =")
935
+ print(pd.Series(lr.coef_,index=X.columns))
936
+ pred_lr = lr.predict(X_test)
937
+ mse_lr = mean_squared_error(Y_test,pred_lr)
938
+ print("Linear Regression MSE =", mse_lr)
939
+ r2_lr = lr.score(X_test,Y_test)
940
+ print("Linear Regression R-Squared =", r2_lr)
941
+
942
+ lm_fit = smf.ols('targetvariable ~ FV1 + FV2 + FV3 + FV4',df).fit()
943
+ print(lm_fit.summary())
944
+ print("R-Squared =", lm_fit.rsquared)
945
+ print("Adjusted R-Squared =",lm_fit.rsquared_adj)
946
+
947
+ ridge = Ridge(alpha=10)
948
+ ridge.fit(X_train,Y_train)
949
+ print("Ridge Coefficients =")
950
+
951
+ print(pd.Series(ridge.coef_,index=X.columns))
952
+ pred_ridge = ridge.predict(X_test)
953
+ mse_ridge = mean_squared_error(Y_test,pred_ridge)
954
+ print("Ridge Test MSE =", mse_ridge)
955
+ r2_ridge = ridge.score(X_test,Y_test)
956
+ print("Ridge R-Squared =", r2_ridge)
957
+ alphas = 10 ** np.linspace(10,-2,100) * 0.5
958
+ ridgecv = RidgeCV(alphas=alphas)
959
+ ridgecv.fit(X_scaled,Y)
960
+ print("Best Ridge Alpha =",ridgecv.alpha_)
961
+
962
+ lasso = Lasso(alpha=10)
963
+ lasso.fit(X_train,Y_train)
964
+ print("Lasso Coefficients =")
965
+ print(pd.Series(lasso.coef_,index=X.columns))
966
+ print("Notice: Lasso may set coefficients to EXACTLY 0")
967
+ pred_lasso = lasso.predict(X_test)
968
+ mse_lasso = mean_squared_error(Y_test,pred_lasso)
969
+ print("Lasso Test MSE =", mse_lasso)
970
+ r2_lasso = lasso.score(X_test,Y_test)
971
+ print("Lasso R-Squared =", r2_lasso)
972
+ model = LinearRegression()
973
+
974
+ kfold = KFold(10,random_state=0,shuffle=True)
975
+ r2_cv = cross_val_score(model,X_scaled,Y,cv=kfold,scoring='r2')
976
+ print("R2 Score Per Fold =")
977
+ print(r2_cv)
978
+ print("Average Accuracy (R2 Score) =",np.mean(r2_cv))""")
979
+
980
+ def logi(self):
981
+ """Logistic regression with VIF, confusion matrix, classification report, K-Fold."""
982
+ print("""\
983
+ import numpy as np
984
+ import pandas as pd
985
+ from sklearn.linear_model import LogisticRegression
986
+ from sklearn.model_selection import (train_test_split,KFold,cross_val_score)
987
+ from sklearn.metrics import (confusion_matrix,accuracy_score,classification_report)
988
+ from statsmodels.stats.outliers_influence import (variance_inflation_factor)
989
+ import statsmodels.api as sm
990
+
991
+ df = pd.read_csv('data.csv')
992
+
993
+ print(df.head())
994
+ print(df.info())
995
+
996
+ X = df[['FV1','FV2','FV3','FV4']]
997
+ Y = df['targetvariable']
998
+
999
+ X_vif = sm.add_constant(X)
1000
+ vif_data = pd.DataFrame()
1001
+ vif_data["Feature"] = X_vif.columns
1002
+ vif_data["VIF"] = [variance_inflation_factor(X_vif.values,i)
1003
+ for i in range(X_vif.shape[1])]
1004
+ print(vif_data)
1005
+ X_train, X_test, Y_train, Y_test = train_test_split(X,Y,test_size=0.20,random_state=123)
1006
+ print("Train Shape =", X_train.shape)
1007
+ print("Test Shape =", X_test.shape)
1008
+ logit = LogisticRegression()
1009
+ logit.fit(X_train, Y_train)
1010
+ print("Intercept =", logit.intercept_)
1011
+ print("Coefficients =", logit.coef_)
1012
+ Y_pred = logit.predict(X_test)
1013
+ print("Predictions =")
1014
+ print(Y_pred)
1015
+
1016
+ cm = confusion_matrix(Y_test,Y_pred)
1017
+ print("Confusion Matrix =")
1018
+ print(cm)
1019
+
1020
+ accuracy = accuracy_score(Y_test,Y_pred)
1021
+ print("Accuracy =", accuracy)
1022
+ print(classification_report(Y_test,Y_pred))
1023
+ K = 5
1024
+ kfold = KFold(K,random_state=0,shuffle=True)
1025
+
1026
+ accuracy_cv = cross_val_score(logit,X,Y,cv=kfold,scoring='accuracy')
1027
+ print("Accuracy Per Fold =")
1028
+ print(accuracy_cv)
1029
+ print("Average Accuracy =",np.mean(accuracy_cv))""")
1030
+
1031
+ def annova(self):
1032
+ """ANOVA — One-way, Two-way, and Tukey post-hoc."""
1033
+ print("""\
1034
+ import pandas as pd
1035
+ import statsmodels.api as sm
1036
+ from statsmodels.formula.api import ols
1037
+ from statsmodels.stats.multicomp import pairwise_tukeyhsd
1038
+
1039
+
1040
+ ## ============================================================
1041
+ ## 1. ONE-WAY ANOVA
1042
+ ## ============================================================
1043
+
1044
+ df = pd.read_csv('data.csv')
1045
+
1046
+ df['column_name'] = df['column_name'].astype('category')
1047
+
1048
+ df.head()
1049
+ df.info()
1050
+
1051
+ oneway_fit = ols('target ~ factor', data=df).fit()
1052
+ oneway_anova = sm.stats.anova_lm(oneway_fit, typ=1)
1053
+ print(oneway_anova)
1054
+
1055
+ # IF 2 Factor variables are given they are to be compared individually
1056
+
1057
+ # If the P value of anova is less than 0.05 then Use Tukey for the Factor Variable.
1058
+
1059
+
1060
+ ## ============================================================
1061
+ ## 2. TWO-WAY ANOVA
1062
+ ## ============================================================
1063
+
1064
+ df2 = pd.read_csv('cars.csv')
1065
+
1066
+ twoway_fit = ols('target ~ C(factor1)+C(factor2)+C(factor1):C(factor2)', data=df2).fit()
1067
+ twoway_anova = sm.stats.anova_lm(twoway_fit, typ=2)
1068
+
1069
+
1070
+ ## ============================================================
1071
+ ## 3. TUKEY POST-HOC TEST
1072
+ ## ============================================================
1073
+
1074
+ tukey = pairwise_tukeyhsd(df1["target"],groups=df1["factor"])
1075
+ tukey._results_table""")
1076
+
1077
+ def manova(self):
1078
+ """MANOVA — One-way and Two-way."""
1079
+ print("""\
1080
+ import pandas as pd
1081
+ from statsmodels.multivariate.manova import MANOVA
1082
+
1083
+
1084
+ ## ============================================================
1085
+ ## 1. ONE-WAY MANOVA
1086
+ ## ============================================================
1087
+
1088
+ url = 'https://vincentarelbundock.github.io/Rdatasets/csv/datasets/iris.csv'
1089
+ df = pd.read_csv(url)
1090
+ print(df.head())
1091
+ df.columns = df.columns.str.replace(".", "_")
1092
+ print(df.head())
1093
+
1094
+ maov = MANOVA.from_formula('target1 + target2 + target3 + ... ~ factor', data=df)
1095
+ print(maov.mv_test())
1096
+
1097
+
1098
+ ## ============================================================
1099
+ ## 2. TWO-WAY MANOVA
1100
+ ## ============================================================
1101
+
1102
+ maov = MANOVA.from_formula('target1 + target2 + target3 + ... ~ factor1 + factor2', data=df)
1103
+ print(maov.mv_test())
1104
+
1105
+
1106
+ # If p value > 0.05, we reject H0""")
1107
+
1108
+
1109
+ get = _Get()
@@ -0,0 +1,32 @@
1
+ Metadata-Version: 2.4
2
+ Name: scypyy
3
+ Version: 0.7.0
4
+ Summary: A curated collection.
5
+ License-Expression: MIT
6
+ Project-URL: Homepage, https://google.com
7
+ Classifier: Programming Language :: Python :: 3
8
+ Classifier: Operating System :: OS Independent
9
+ Classifier: Intended Audience :: Science/Research
10
+ Classifier: Topic :: Scientific/Engineering
11
+ Requires-Python: >=3.8
12
+ Description-Content-Type: text/markdown
13
+ Dynamic: requires-python
14
+
15
+ # scypyy
16
+
17
+ A curated collection.
18
+ ## Installation
19
+
20
+ ```bash
21
+ pip install scypyy
22
+ ```
23
+
24
+ ## Usage
25
+
26
+ ```python
27
+ import scypyy
28
+
29
+ print(scypyy.get())
30
+ ```
31
+
32
+ This prints a help.
@@ -0,0 +1,9 @@
1
+ README.md
2
+ pyproject.toml
3
+ setup.py
4
+ scypyy/__init__.py
5
+ scypyy/core.py
6
+ scypyy.egg-info/PKG-INFO
7
+ scypyy.egg-info/SOURCES.txt
8
+ scypyy.egg-info/dependency_links.txt
9
+ scypyy.egg-info/top_level.txt
@@ -0,0 +1 @@
1
+ scypyy
scypyy-0.7.0/setup.cfg ADDED
@@ -0,0 +1,4 @@
1
+ [egg_info]
2
+ tag_build =
3
+ tag_date = 0
4
+
scypyy-0.7.0/setup.py ADDED
@@ -0,0 +1,17 @@
1
+ from setuptools import setup, find_packages
2
+
3
+ setup(
4
+ name="scypyy",
5
+ version="0.7.0",
6
+ packages=find_packages(),
7
+ description="A curated collection.",
8
+ long_description=open("README.md").read(),
9
+ long_description_content_type="text/markdown",
10
+ python_requires=">=3.8",
11
+ classifiers=[
12
+ "Programming Language :: Python :: 3",
13
+ "Operating System :: OS Independent",
14
+ "Intended Audience :: Science/Research",
15
+ "Topic :: Scientific/Engineering",
16
+ ],
17
+ )