py2ls 0.2.4.16__py3-none-any.whl → 0.2.4.18__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
py2ls/.git/index
CHANGED
Binary file
|
py2ls/ips.py
CHANGED
py2ls/ml2ls.py
CHANGED
@@ -2048,6 +2048,7 @@ def predict(
|
|
2048
2048
|
y_train: pd.Series,
|
2049
2049
|
x_true: pd.DataFrame = None,
|
2050
2050
|
y_true: Optional[pd.Series] = None,
|
2051
|
+
backward:bool=False, # backward_regression
|
2051
2052
|
common_features: set = None,
|
2052
2053
|
purpose: str = "classification", # 'classification' or 'regression'
|
2053
2054
|
cls: Optional[Dict[str, Any]] = None,
|
@@ -2252,6 +2253,12 @@ def predict(
|
|
2252
2253
|
if is_binary:
|
2253
2254
|
y_train = ips.df_encoder(pd.DataFrame(y_train), method="label")
|
2254
2255
|
print('is_binary:',is_binary)
|
2256
|
+
|
2257
|
+
# Perform backward feature selection
|
2258
|
+
if backward:
|
2259
|
+
selected_features = backward_regression(x_train, y_train, threshold_out=0.05)
|
2260
|
+
x_train=x_train[selected_features]
|
2261
|
+
|
2255
2262
|
if x_true is None:
|
2256
2263
|
x_train, x_true, y_train, y_true = train_test_split(
|
2257
2264
|
x_train,
|
@@ -2286,10 +2293,12 @@ def predict(
|
|
2286
2293
|
|
2287
2294
|
# y_train=y_train.values.ravel() if y_train is not None else None
|
2288
2295
|
# y_true=y_true.values.ravel() if y_true is not None else None
|
2289
|
-
y_train
|
2290
|
-
y_train
|
2291
|
-
|
2292
|
-
|
2296
|
+
if y_train is not None:
|
2297
|
+
y_train = (
|
2298
|
+
y_train.ravel() if isinstance(y_train, np.ndarray) else y_train.values.ravel()
|
2299
|
+
)
|
2300
|
+
if y_true is not None:
|
2301
|
+
y_true = y_true.ravel() if isinstance(y_true, np.ndarray) else y_true.values.ravel()
|
2293
2302
|
# Ensure common features are selected
|
2294
2303
|
if common_features is not None:
|
2295
2304
|
x_train, x_true = x_train[common_features], x_true[common_features]
|
@@ -3071,6 +3080,14 @@ def predict(
|
|
3071
3080
|
}
|
3072
3081
|
|
3073
3082
|
else:
|
3083
|
+
validation_scores = cal_metrics(
|
3084
|
+
y_true,
|
3085
|
+
y_pred,
|
3086
|
+
y_pred_proba=y_pred_proba,
|
3087
|
+
is_binary=is_binary,
|
3088
|
+
purpose=purpose,
|
3089
|
+
average="weighted",
|
3090
|
+
)
|
3074
3091
|
results[name] = {
|
3075
3092
|
"best_clf": gs.best_estimator_,
|
3076
3093
|
"best_params": gs.best_params_,
|
@@ -3079,6 +3096,8 @@ def predict(
|
|
3079
3096
|
"predictions_proba": (
|
3080
3097
|
y_pred_proba.tolist() if y_pred_proba is not None else None
|
3081
3098
|
),
|
3099
|
+
"y_train":y_train if y_train is not None else [],
|
3100
|
+
"y_true": y_true if y_true is not None else []
|
3082
3101
|
}
|
3083
3102
|
|
3084
3103
|
# Convert results to DataFrame
|
@@ -3540,6 +3559,49 @@ def img_datasets_preprocessing(
|
|
3540
3559
|
display(df_img.head())
|
3541
3560
|
|
3542
3561
|
return df_img
|
3562
|
+
|
3563
|
+
|
3564
|
+
def backward_regression(X:pd.DataFrame, y:pd.Series, initial_list=[], threshold_out=0.05, verbose=True):
|
3565
|
+
"""
|
3566
|
+
# awesome bit of code from https://www.kaggle.com/code/adibouayjan/house-price-step-by-step-modeling
|
3567
|
+
|
3568
|
+
Evaluates the p-values of all features, which represent the probability of observing a coefficient
|
3569
|
+
as extreme as the one calculated if the feature had no true effect on the target.
|
3570
|
+
|
3571
|
+
Args:
|
3572
|
+
X -- features values
|
3573
|
+
y -- target variable
|
3574
|
+
initial_list -- features header
|
3575
|
+
threshold_out -- pvalue threshold of features to drop
|
3576
|
+
verbose -- true to produce lots of logging output
|
3577
|
+
|
3578
|
+
Returns:
|
3579
|
+
list of selected features for modeling
|
3580
|
+
"""
|
3581
|
+
import statsmodels.api as sm
|
3582
|
+
if isinstance(y, str) and y in X.columns:
|
3583
|
+
y_col_name = y
|
3584
|
+
y = X[y]
|
3585
|
+
X = X.drop(y_col_name, axis=1)
|
3586
|
+
included = list(X.columns)
|
3587
|
+
while True:
|
3588
|
+
changed = False
|
3589
|
+
model = sm.OLS(y, sm.add_constant(pd.DataFrame(X[included]))).fit()
|
3590
|
+
# exclude the intercept for p-value checking
|
3591
|
+
pvalues = model.pvalues.iloc[1:]
|
3592
|
+
worst_pval = pvalues.max()
|
3593
|
+
if worst_pval > threshold_out:
|
3594
|
+
changed = True
|
3595
|
+
worst_feature = pvalues.idxmax()
|
3596
|
+
included.remove(worst_feature)
|
3597
|
+
if verbose:
|
3598
|
+
print(f"Removing feature '{worst_feature}' with p-value {worst_pval}")
|
3599
|
+
if not changed:
|
3600
|
+
break
|
3601
|
+
print(f"\nSelected Features:\n{included}")
|
3602
|
+
return included # Returns the list of selected features
|
3603
|
+
|
3604
|
+
|
3543
3605
|
# Function to apply CLAHE (Contrast Limited Adaptive Histogram Equalization)
|
3544
3606
|
def apply_clahe(img):
|
3545
3607
|
import cv2
|
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.1
|
2
2
|
Name: py2ls
|
3
|
-
Version: 0.2.4.
|
3
|
+
Version: 0.2.4.18
|
4
4
|
Summary: py(thon)2(too)ls
|
5
5
|
Author: Jianfeng
|
6
6
|
Author-email: Jianfeng.Liu0413@gmail.com
|
@@ -200,6 +200,7 @@ Requires-Dist: selenium (>=4.23.1)
|
|
200
200
|
Requires-Dist: setuptools (>=70.3.0)
|
201
201
|
Requires-Dist: shellingham (>=1.5.4)
|
202
202
|
Requires-Dist: six (>=1.16.0)
|
203
|
+
Requires-Dist: skorch (>=1.0.0)
|
203
204
|
Requires-Dist: sniffio (>=1.3.1)
|
204
205
|
Requires-Dist: sortedcontainers (>=2.4.0)
|
205
206
|
Requires-Dist: soupsieve (>=2.5)
|
@@ -17,7 +17,7 @@ py2ls/.git/hooks/pre-receive.sample,sha256=pMPSuce7P9jRRBwxvU7nGlldZrRPz0ndsxAlI
|
|
17
17
|
py2ls/.git/hooks/prepare-commit-msg.sample,sha256=6d3KpBif3dJe2X_Ix4nsp7bKFjkLI5KuMnbwyOGqRhk,1492
|
18
18
|
py2ls/.git/hooks/push-to-checkout.sample,sha256=pT0HQXmLKHxt16-mSu5HPzBeZdP0lGO7nXQI7DsSv18,2783
|
19
19
|
py2ls/.git/hooks/update.sample,sha256=jV8vqD4QPPCLV-qmdSHfkZT0XL28s32lKtWGCXoU0QY,3650
|
20
|
-
py2ls/.git/index,sha256=
|
20
|
+
py2ls/.git/index,sha256=icdOlk1IdpGPwTdGS8m0F_bYf3XNjgzp1gMlKFKx8fU,4232
|
21
21
|
py2ls/.git/info/exclude,sha256=ZnH-g7egfIky7okWTR8nk7IxgFjri5jcXAbuClo7DsE,240
|
22
22
|
py2ls/.git/logs/HEAD,sha256=8ID7WuAe_TlO9g-ARxhIJYdgdL3u3m7-1qrOanaIUlA,3535
|
23
23
|
py2ls/.git/logs/refs/heads/main,sha256=8ID7WuAe_TlO9g-ARxhIJYdgdL3u3m7-1qrOanaIUlA,3535
|
@@ -234,9 +234,9 @@ py2ls/export_requirements.py,sha256=x2WgUF0jYKz9GfA1MVKN-MdsM-oQ8yUeC6Ua8oCymio,
|
|
234
234
|
py2ls/fetch_update.py,sha256=9LXj661GpCEFII2wx_99aINYctDiHni6DOruDs_fdt8,4752
|
235
235
|
py2ls/freqanalysis.py,sha256=F4218VSPbgL5tnngh6xNCYuNnfR-F_QjECUUxrPYZss,32594
|
236
236
|
py2ls/ich2ls.py,sha256=3E9R8oVpyYZXH5PiIQgT3CN5NxLe4Dwtm2LwaeacE6I,21381
|
237
|
-
py2ls/ips.py,sha256=
|
237
|
+
py2ls/ips.py,sha256=2TWuOSFquwhmPdxkmmvU_pcIbE5M0S9aRPtuQgs5B7A,297706
|
238
238
|
py2ls/ml2ls copy.py,sha256=iZJrFLIrdfTieAY2BDsxQFTm29smwnJh0aC4hRB9VGM,113314
|
239
|
-
py2ls/ml2ls.py,sha256=
|
239
|
+
py2ls/ml2ls.py,sha256=Mkf374TLsCdBVYtSYptFzegn8euda33TA-M73nGtzV0,144368
|
240
240
|
py2ls/mol.py,sha256=AZnHzarIk_MjueKdChqn1V6e4tUle3X1NnHSFA6n3Nw,10645
|
241
241
|
py2ls/netfinder.py,sha256=R70NkrnO8LlXjT1y7bf2TN-yE4yOeAYhb0jDBiNp8XA,57536
|
242
242
|
py2ls/ocr.py,sha256=5lhUbJufIKRSOL6wAWVLEo8TqMYSjoI_Q-IO-_4u3DE,31419
|
@@ -246,6 +246,6 @@ py2ls/sleep_events_detectors.py,sha256=bQA3HJqv5qnYKJJEIhCyhlDtkXQfIzqksnD0YRXso
|
|
246
246
|
py2ls/stats.py,sha256=qBn2rJmNa_QLLUqjwYqXUlGzqmW94sgA1bxJU2FC3r0,39175
|
247
247
|
py2ls/translator.py,sha256=77Tp_GjmiiwFbEIJD_q3VYpQ43XL9ZeJo6Mhl44mvh8,34284
|
248
248
|
py2ls/wb_detector.py,sha256=7y6TmBUj9exCZeIgBAJ_9hwuhkDh1x_-yg4dvNY1_GQ,6284
|
249
|
-
py2ls-0.2.4.
|
250
|
-
py2ls-0.2.4.
|
251
|
-
py2ls-0.2.4.
|
249
|
+
py2ls-0.2.4.18.dist-info/METADATA,sha256=q3L5q5BJ3olDRMxjimPsTEEE79pqmLwh93bxgsevPNU,20078
|
250
|
+
py2ls-0.2.4.18.dist-info/WHEEL,sha256=FMvqSimYX_P7y0a7UY-_Mc83r5zkBZsCYPm7Lr0Bsq4,88
|
251
|
+
py2ls-0.2.4.18.dist-info/RECORD,,
|
File without changes
|