chrom-qsar 0.1.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,21 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2026 Your Name / Your Lab
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
@@ -0,0 +1,30 @@
1
+ Metadata-Version: 2.4
2
+ Name: chrom-qsar
3
+ Version: 0.1.0
4
+ Summary: A streamlined machine learning pipeline for QSAR and regression tasks with SHAP interpretability.
5
+ Author-email: Your Name <your.email@university.edu>
6
+ License: MIT
7
+ Keywords: machine-learning,qsar,shap,regression,optuna
8
+ Classifier: Development Status :: 3 - Alpha
9
+ Classifier: Intended Audience :: Science/Research
10
+ Classifier: License :: OSI Approved :: MIT License
11
+ Classifier: Programming Language :: Python :: 3
12
+ Classifier: Programming Language :: Python :: 3.8
13
+ Classifier: Programming Language :: Python :: 3.9
14
+ Classifier: Programming Language :: Python :: 3.10
15
+ Classifier: Programming Language :: Python :: 3.11
16
+ Requires-Python: >=3.8
17
+ Description-Content-Type: text/markdown
18
+ License-File: LICENSE
19
+ Requires-Dist: pandas>=1.3.0
20
+ Requires-Dist: numpy>=1.21.0
21
+ Requires-Dist: scikit-learn>=1.0.0
22
+ Requires-Dist: xgboost>=1.5.0
23
+ Requires-Dist: lightgbm>=3.3.0
24
+ Requires-Dist: optuna>=3.0.0
25
+ Requires-Dist: shap>=0.41.0
26
+ Requires-Dist: matplotlib>=3.4.0
27
+ Requires-Dist: seaborn>=0.11.0
28
+ Requires-Dist: joblib>=1.1.0
29
+ Requires-Dist: openpyxl>=3.0.0
30
+ Dynamic: license-file
@@ -0,0 +1,8 @@
1
+ # chrom_qsar/__init__.py
2
+
3
+ from .pipeline import QSARModelTrainer, QSARModelAnalyzer
4
+
5
+ __version__ = "0.1.0"
6
+ __author__ = "Liu Xinye"
7
+
8
+ __all__ = ["QSARModelTrainer", "QSARModelAnalyzer", "__version__"]
@@ -0,0 +1,24 @@
1
+ import os
2
+ import pandas as pd
3
+
4
+ def load_and_clean_data(filepath: str):
5
+ if not os.path.exists(filepath):
6
+ parent_path = os.path.join('..', filepath)
7
+ if os.path.exists(parent_path):
8
+ filepath = parent_path
9
+ else:
10
+ raise FileNotFoundError(f"找不到数据文件: {filepath}")
11
+
12
+ df = pd.read_excel(filepath)
13
+ y_raw = df.iloc[:, 0]
14
+ X_raw = df.iloc[:, 1:]
15
+
16
+ y = pd.to_numeric(y_raw, errors='coerce')
17
+ X = X_raw.apply(pd.to_numeric, errors='coerce')
18
+ clean_df = pd.concat([y.rename('target'), X], axis=1).dropna()
19
+
20
+ X_clean = clean_df.drop(columns=['target'])
21
+ y_clean = clean_df['target']
22
+
23
+ print(f"数据加载完成: 原始样本 {len(df)} → 有效样本 {len(X_clean)} | 特征维度: {X_clean.shape[1]}")
24
+ return X_clean, y_clean
@@ -0,0 +1,57 @@
1
+ import os
2
+ import shap
3
+ import numpy as np
4
+ import matplotlib.pyplot as plt
5
+ from sklearn.pipeline import Pipeline
6
+
7
+ def run_comprehensive_shap_analysis(model, X_test, feature_names, model_name, out_dir):
8
+ print(f" ├─ 启动 SHAP 分析 ({model_name})...")
9
+ is_pipeline = isinstance(model, Pipeline)
10
+
11
+ try:
12
+ if is_pipeline:
13
+ n_feat = X_test.shape[1]
14
+ min_evals = min(2 * n_feat + 1, 1000)
15
+ explainer = shap.PermutationExplainer(model.predict, X_test, max_evals=min_evals)
16
+ else:
17
+ explainer = shap.Explainer(model, X_test)
18
+ except Exception:
19
+ n_feat = X_test.shape[1]
20
+ min_evals = min(2 * n_feat + 1, 1000)
21
+ explainer = shap.PermutationExplainer(model.predict, X_test, max_evals=min_evals)
22
+
23
+ shap_output = explainer(X_test)
24
+ shap_arr = shap_output.values if hasattr(shap_output, 'values') else np.array(shap_output)
25
+ if shap_arr.ndim == 1:
26
+ shap_arr = shap_arr.reshape(1, -1)
27
+
28
+ mean_abs_shap = np.mean(np.abs(shap_arr), axis=0)
29
+ top_indices = np.argsort(mean_abs_shap)[::-1][:15]
30
+ top_feats = [feature_names[i] for i in top_indices]
31
+
32
+ try:
33
+ plt.figure(figsize=(12, 8))
34
+ if hasattr(shap.plots, 'beeswarm'):
35
+ shap.plots.beeswarm(shap_output, max_display=15, show=False)
36
+ else:
37
+ shap.summary_plot(shap_output, X_test, feature_names=feature_names, max_display=15, show=False)
38
+ plt.title(model_name, fontsize=16, fontweight='bold', pad=10)
39
+ plt.tight_layout()
40
+ plt.savefig(os.path.join(out_dir, f'shap_summary_{model_name}.png'), dpi=300, bbox_inches='tight')
41
+ plt.close()
42
+ except Exception as e:
43
+ print(f" └─ Summary Plot 失败: {e}")
44
+
45
+ plt.figure(figsize=(10, 8))
46
+ plt.barh(range(len(top_feats)), mean_abs_shap[top_indices][::-1], color='#4C72B0', edgecolor='k', alpha=0.8)
47
+ plt.yticks(range(len(top_feats)), top_feats[::-1], fontsize=12)
48
+ plt.xlabel('Mean |SHAP Value|', fontsize=14, fontweight='bold')
49
+ plt.title(model_name, fontsize=16, fontweight='bold', pad=10)
50
+ plt.grid(axis='x', linestyle='--', alpha=0.4)
51
+ plt.tight_layout()
52
+ plt.savefig(os.path.join(out_dir, f'shap_importance_{model_name}.png'), dpi=300, bbox_inches='tight')
53
+ plt.close()
54
+
55
+ import pandas as pd
56
+ pd.DataFrame(shap_arr, columns=feature_names).to_csv(os.path.join(out_dir, f'shap_values_{model_name}.csv'), index=False)
57
+ print(f" └─ SHAP 分析完成,结果已保存至 {out_dir}")
@@ -0,0 +1,37 @@
1
+ from sklearn.linear_model import LinearRegression, Ridge, Lasso, ElasticNet
2
+ from sklearn.cross_decomposition import PLSRegression
3
+ from sklearn.svm import SVR
4
+ from sklearn.tree import DecisionTreeRegressor
5
+ from sklearn.ensemble import RandomForestRegressor, AdaBoostRegressor, ExtraTreesRegressor, HistGradientBoostingRegressor
6
+ import xgboost as xgb
7
+ import lightgbm as lgb
8
+
9
+ def get_model_configs():
10
+ def pls_space(trial): return {'n_components': trial.suggest_int('n_components', 2, 50)}
11
+ def ridge_space(trial): return {'alpha': trial.suggest_float('alpha', 1e-3, 1e3, log=True)}
12
+ def lasso_space(trial): return {'alpha': trial.suggest_float('alpha', 1e-3, 1e3, log=True)}
13
+ def enet_space(trial): return {'alpha': trial.suggest_float('alpha', 1e-3, 1e3, log=True), 'l1_ratio': trial.suggest_float('l1_ratio', 0.0, 1.0)}
14
+ def svr_space(trial): return {'C': trial.suggest_float('C', 0.1, 100.0, log=True), 'epsilon': trial.suggest_float('epsilon', 0.01, 0.5, log=True), 'kernel': trial.suggest_categorical('kernel', ['rbf', 'linear'])}
15
+ def dt_space(trial): return {'max_depth': trial.suggest_int('max_depth', 3, 20), 'min_samples_split': trial.suggest_int('min_samples_split', 2, 10), 'min_samples_leaf': trial.suggest_int('min_samples_leaf', 1, 8)}
16
+ def rf_space(trial): return {'n_estimators': trial.suggest_int('n_estimators', 200, 800, step=50), 'max_depth': trial.suggest_int('max_depth', 5, 30), 'min_samples_split': trial.suggest_int('min_samples_split', 2, 10), 'min_samples_leaf': trial.suggest_int('min_samples_leaf', 1, 8), 'max_features': trial.suggest_categorical('max_features', ['sqrt', 'log2', None])}
17
+ def xgb_space(trial): return {'n_estimators': trial.suggest_int('n_estimators', 200, 1000, step=50), 'max_depth': trial.suggest_int('max_depth', 3, 12), 'learning_rate': trial.suggest_float('learning_rate', 1e-3, 0.3, log=True), 'subsample': trial.suggest_float('subsample', 0.6, 1.0), 'colsample_bytree': trial.suggest_float('colsample_bytree', 0.6, 1.0), 'reg_alpha': trial.suggest_float('reg_alpha', 1e-8, 10.0, log=True), 'reg_lambda': trial.suggest_float('reg_lambda', 1e-8, 10.0, log=True)}
18
+ def lgb_space(trial): return {'n_estimators': trial.suggest_int('n_estimators', 200, 1000, step=50), 'max_depth': trial.suggest_int('max_depth', 3, 12), 'learning_rate': trial.suggest_float('learning_rate', 1e-3, 0.3, log=True), 'num_leaves': trial.suggest_int('num_leaves', 20, 100), 'subsample': trial.suggest_float('subsample', 0.6, 1.0), 'colsample_bytree': trial.suggest_float('colsample_bytree', 0.6, 1.0), 'reg_alpha': trial.suggest_float('reg_alpha', 1e-8, 10.0, log=True), 'reg_lambda': trial.suggest_float('reg_lambda', 1e-8, 10.0, log=True)}
19
+ def ada_space(trial): return {'n_estimators': trial.suggest_int('n_estimators', 50, 500, step=25), 'learning_rate': trial.suggest_float('learning_rate', 0.01, 1.0, log=True), 'loss': trial.suggest_categorical('loss', ['linear', 'square', 'exponential'])}
20
+ def et_space(trial): return {'n_estimators': trial.suggest_int('n_estimators', 200, 800, step=50), 'max_depth': trial.suggest_int('max_depth', 5, 30), 'min_samples_split': trial.suggest_int('min_samples_split', 2, 10), 'min_samples_leaf': trial.suggest_int('min_samples_leaf', 1, 8), 'max_features': trial.suggest_categorical('max_features', ['sqrt', 'log2', None])}
21
+ def hgb_space(trial): return {'max_iter': trial.suggest_int('max_iter', 200, 1000, step=50), 'max_depth': trial.suggest_int('max_depth', 3, 12), 'learning_rate': trial.suggest_float('learning_rate', 1e-3, 0.3, log=True), 'min_samples_leaf': trial.suggest_int('min_samples_leaf', 10, 50), 'l2_regularization': trial.suggest_float('l2_regularization', 1e-4, 10.0, log=True)}
22
+
23
+ return {
24
+ 'PLSR': {'model': PLSRegression, 'space': pls_space},
25
+ 'LinearRegression': {'model': LinearRegression, 'space': None},
26
+ 'Ridge': {'model': Ridge, 'space': ridge_space},
27
+ 'Lasso': {'model': Lasso, 'space': lasso_space},
28
+ 'ElasticNet': {'model': ElasticNet, 'space': enet_space},
29
+ 'SVR': {'model': SVR, 'space': svr_space},
30
+ 'DecisionTree': {'model': DecisionTreeRegressor, 'space': dt_space},
31
+ 'RandomForest': {'model': RandomForestRegressor, 'space': rf_space},
32
+ 'XGBoost': {'model': xgb.XGBRegressor, 'space': xgb_space},
33
+ 'LightGBM': {'model': lgb.LGBMRegressor, 'space': lgb_space},
34
+ 'AdaBoost': {'model': AdaBoostRegressor, 'space': ada_space},
35
+ 'ExtraTrees': {'model': ExtraTreesRegressor, 'space': et_space},
36
+ 'HistGradientBoosting': {'model': HistGradientBoostingRegressor, 'space': hgb_space}
37
+ }
@@ -0,0 +1,137 @@
1
+ import os
2
+ import glob
3
+ import joblib
4
+ import pandas as pd
5
+ import numpy as np
6
+ import matplotlib.pyplot as plt
7
+ from sklearn.model_selection import train_test_split
8
+ from sklearn.metrics import r2_score, mean_squared_error, mean_absolute_error
9
+ from .utils import setup_environment, set_seed
10
+ from .data import load_and_clean_data
11
+ from .models import get_model_configs
12
+ from .trainer import optimize_model
13
+ from .interpreter import run_comprehensive_shap_analysis
14
+
15
+ class QSARModelTrainer:
16
+ def __init__(self, data_path: str, out_dir: str = 'training_results', n_trials: int = 100):
17
+ setup_environment()
18
+ set_seed(42)
19
+ self.data_path = data_path
20
+ self.out_dir = out_dir
21
+ self.n_trials = n_trials
22
+ os.makedirs(out_dir, exist_ok=True)
23
+
24
+ def run(self):
25
+ print("="*60 + "\n 开始自动化建模与优化流程\n" + "="*60)
26
+ X, y = load_and_clean_data(self.data_path)
27
+ X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
28
+ configs = get_model_configs()
29
+ results = []
30
+
31
+ for name, cfg in configs.items():
32
+ print(f"\n[ {name} ] 处理中...")
33
+ if cfg['space'] is not None:
34
+ model, best_params = optimize_model(
35
+ cfg['model'], cfg['space'], X_train, y_train,
36
+ n_trials=self.n_trials, is_svr=(name == 'SVR')
37
+ )
38
+ print(f" ├─ 最优参数: {best_params}")
39
+ else:
40
+ model = cfg['model']()
41
+
42
+ model.fit(X_train, y_train)
43
+ y_pred = model.predict(X_test)
44
+
45
+ r2 = r2_score(y_test, y_pred)
46
+ rmse = np.sqrt(mean_squared_error(y_test, y_pred))
47
+ mae = mean_absolute_error(y_test, y_pred)
48
+ print(f" ├─ 测试集: R²={r2:.4f} | RMSE={rmse:.4f} | MAE={mae:.4f}")
49
+
50
+ results.append({'Model': name, 'R2': r2, 'RMSE': rmse, 'MAE': mae})
51
+
52
+ # 保存模型和特征
53
+ joblib.dump(model, os.path.join(self.out_dir, f'model_{name}.pkl'))
54
+ joblib.dump(X.columns.tolist(), os.path.join(self.out_dir, f'features_{name}.pkl'))
55
+
56
+ # SHAP 分析
57
+ run_comprehensive_shap_analysis(model, X_test, X.columns.tolist(), name, self.out_dir)
58
+
59
+ # 汇总结果
60
+ res_df = pd.DataFrame(results).sort_values('R2', ascending=False)
61
+ res_df.to_csv(os.path.join(self.out_dir, 'model_comparison.csv'), index=False)
62
+ print("\n训练流程结束,结果已保存至:", self.out_dir)
63
+ return res_df
64
+
65
+
66
+ class QSARModelAnalyzer:
67
+ def __init__(self, data_path: str, model_dir: str = '.', out_dir: str = 'batch_analysis_results'):
68
+ setup_environment()
69
+ set_seed(42)
70
+ self.data_path = data_path
71
+ self.model_dir = model_dir
72
+ self.out_dir = out_dir
73
+ os.makedirs(out_dir, exist_ok=True)
74
+
75
+ def run(self):
76
+ print("="*60 + f"\n 开始批量模型分析 (目录: {self.model_dir})\n" + "="*60)
77
+ X, y = load_and_clean_data(self.data_path)
78
+ X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
79
+
80
+ model_files = sorted(glob.glob(os.path.join(self.model_dir, 'model_*.pkl')))
81
+ if not model_files:
82
+ print("未找到任何 model_*.pkl 文件!")
83
+ return
84
+
85
+ results = []
86
+ for mfile in model_files:
87
+ name = os.path.basename(mfile).replace('model_', '').replace('.pkl', '')
88
+ ffile = os.path.join(self.model_dir, f'features_{name}.pkl')
89
+
90
+ if not os.path.exists(ffile):
91
+ print(f"跳过 [{name}]: 缺少特征文件")
92
+ continue
93
+
94
+ print(f"\n[ {name} ] 分析中...")
95
+ model = joblib.load(mfile)
96
+ feat_names = joblib.load(ffile)
97
+ X_test_aligned = X_test[feat_names]
98
+
99
+ # 预测
100
+ y_pred = model.predict(X_test_aligned)
101
+ r2 = r2_score(y_test, y_pred)
102
+ rmse = np.sqrt(mean_squared_error(y_test, y_pred))
103
+ mae = mean_absolute_error(y_test, y_pred)
104
+ results.append({'Model': name, 'R2': r2, 'RMSE': rmse, 'MAE': mae})
105
+
106
+ # 1. True vs Predicted 散点图
107
+ self._plot_true_vs_pred(model, X_train, X_test_aligned, y_train, y_test, feat_names, name)
108
+
109
+ # 2. SHAP 分析
110
+ run_comprehensive_shap_analysis(model, X_test_aligned, feat_names, name, self.out_dir)
111
+
112
+ # 汇总
113
+ res_df = pd.DataFrame(results).sort_values('R2', ascending=False)
114
+ res_df.to_csv(os.path.join(self.out_dir, 'model_summary_metrics.csv'), index=False)
115
+ print("\n批量分析结束,结果已保存至:", self.out_dir)
116
+
117
+ def _plot_true_vs_pred(self, model, X_train, X_test, y_train, y_test, feat_names, name):
118
+ y_train_pred = model.predict(X_train[feat_names])
119
+ y_test_pred = model.predict(X_test)
120
+ r2_train = r2_score(y_train, y_train_pred)
121
+ r2_test = r2_score(y_test, y_test_pred)
122
+
123
+ plt.figure(figsize=(8, 8))
124
+ plt.scatter(y_train, y_train_pred, c='#2E86AB', alpha=0.6, edgecolor='white', s=80, label=f'Train (R²={r2_train:.3f})')
125
+ plt.scatter(y_test, y_test_pred, c='#E94F37', alpha=0.7, edgecolor='black', s=100, label=f'Test (R²={r2_test:.3f})')
126
+
127
+ lims = [min(y_train.min(), y_test.min()) - 0.5, max(y_train.max(), y_test.max()) + 0.5]
128
+ plt.plot(lims, lims, 'k--', lw=2.0, zorder=0)
129
+
130
+ plt.xlabel('True Adsorption Capacity', fontsize=14, fontweight='bold')
131
+ plt.ylabel('Predicted Adsorption Capacity', fontsize=14, fontweight='bold')
132
+ plt.title(name, fontsize=16, pad=10, fontweight='bold')
133
+ plt.legend(fontsize=12, frameon=True, loc='upper left')
134
+ plt.grid(axis='both', linestyle=':', alpha=0.4)
135
+ plt.tight_layout()
136
+ plt.savefig(os.path.join(self.out_dir, f'true_pred_{name}.png'), dpi=300, bbox_inches='tight')
137
+ plt.close()
@@ -0,0 +1,37 @@
1
+ import optuna
2
+ from sklearn.model_selection import cross_val_score
3
+ from sklearn.pipeline import make_pipeline
4
+ from sklearn.preprocessing import StandardScaler
5
+
6
+ def get_safe_params(model_class, params: dict) -> dict:
7
+ safe_params = params.copy()
8
+ try: model_class(random_state=42); safe_params['random_state'] = 42
9
+ except TypeError: pass
10
+ try: model_class(n_jobs=-1); safe_params['n_jobs'] = -1
11
+ except TypeError: pass
12
+ return safe_params
13
+
14
+ def optimize_model(model_class, space_func, X_train, y_train, n_trials: int = 100, is_svr: bool = False):
15
+ def objective(trial):
16
+ params = space_func(trial) if space_func else {}
17
+ safe_params = get_safe_params(model_class, params)
18
+
19
+ if is_svr:
20
+ model = make_pipeline(StandardScaler(), model_class(**safe_params))
21
+ else:
22
+ model = model_class(**safe_params)
23
+
24
+ score = cross_val_score(model, X_train, y_train, cv=10, scoring='neg_root_mean_squared_error', n_jobs=-1).mean()
25
+ return score
26
+
27
+ study = optuna.create_study(
28
+ direction='maximize',
29
+ sampler=optuna.samplers.TPESampler(seed=42),
30
+ pruner=optuna.pruners.MedianPruner(n_startup_trials=5, n_warmup_steps=3)
31
+ )
32
+ study.optimize(objective, n_trials=n_trials, show_progress_bar=True)
33
+
34
+ best_params = get_safe_params(model_class, study.best_trial.params)
35
+ if is_svr:
36
+ return make_pipeline(StandardScaler(), model_class(**best_params)), study.best_trial.params
37
+ return model_class(**best_params), study.best_trial.params
@@ -0,0 +1,21 @@
1
+ import warnings
2
+ import numpy as np
3
+ import random
4
+ import matplotlib.pyplot as plt
5
+ import seaborn as sns
6
+
7
+ def setup_environment():
8
+ warnings.filterwarnings('ignore')
9
+ plt.rcParams['font.sans-serif'] = ['SimHei', 'Arial']
10
+ plt.rcParams['axes.unicode_minus'] = False
11
+ plt.rcParams['font.size'] = 12
12
+ plt.rcParams['axes.labelsize'] = 14
13
+ plt.rcParams['xtick.labelsize'] = 12
14
+ plt.rcParams['ytick.labelsize'] = 12
15
+ plt.rcParams['legend.fontsize'] = 12
16
+ plt.rcParams['axes.titlesize'] = 16
17
+ sns.set_style('whitegrid')
18
+
19
+ def set_seed(seed: int = 42):
20
+ random.seed(seed)
21
+ np.random.seed(seed)
@@ -0,0 +1,30 @@
1
+ Metadata-Version: 2.4
2
+ Name: chrom-qsar
3
+ Version: 0.1.0
4
+ Summary: A streamlined machine learning pipeline for QSAR and regression tasks with SHAP interpretability.
5
+ Author-email: Your Name <your.email@university.edu>
6
+ License: MIT
7
+ Keywords: machine-learning,qsar,shap,regression,optuna
8
+ Classifier: Development Status :: 3 - Alpha
9
+ Classifier: Intended Audience :: Science/Research
10
+ Classifier: License :: OSI Approved :: MIT License
11
+ Classifier: Programming Language :: Python :: 3
12
+ Classifier: Programming Language :: Python :: 3.8
13
+ Classifier: Programming Language :: Python :: 3.9
14
+ Classifier: Programming Language :: Python :: 3.10
15
+ Classifier: Programming Language :: Python :: 3.11
16
+ Requires-Python: >=3.8
17
+ Description-Content-Type: text/markdown
18
+ License-File: LICENSE
19
+ Requires-Dist: pandas>=1.3.0
20
+ Requires-Dist: numpy>=1.21.0
21
+ Requires-Dist: scikit-learn>=1.0.0
22
+ Requires-Dist: xgboost>=1.5.0
23
+ Requires-Dist: lightgbm>=3.3.0
24
+ Requires-Dist: optuna>=3.0.0
25
+ Requires-Dist: shap>=0.41.0
26
+ Requires-Dist: matplotlib>=3.4.0
27
+ Requires-Dist: seaborn>=0.11.0
28
+ Requires-Dist: joblib>=1.1.0
29
+ Requires-Dist: openpyxl>=3.0.0
30
+ Dynamic: license-file
@@ -0,0 +1,14 @@
1
+ LICENSE
2
+ pyproject.toml
3
+ chrom_qsar/__init__.py
4
+ chrom_qsar/data.py
5
+ chrom_qsar/interpreter.py
6
+ chrom_qsar/models.py
7
+ chrom_qsar/pipeline.py
8
+ chrom_qsar/trainer.py
9
+ chrom_qsar/utils.py
10
+ chrom_qsar.egg-info/PKG-INFO
11
+ chrom_qsar.egg-info/SOURCES.txt
12
+ chrom_qsar.egg-info/dependency_links.txt
13
+ chrom_qsar.egg-info/requires.txt
14
+ chrom_qsar.egg-info/top_level.txt
@@ -0,0 +1,11 @@
1
+ pandas>=1.3.0
2
+ numpy>=1.21.0
3
+ scikit-learn>=1.0.0
4
+ xgboost>=1.5.0
5
+ lightgbm>=3.3.0
6
+ optuna>=3.0.0
7
+ shap>=0.41.0
8
+ matplotlib>=3.4.0
9
+ seaborn>=0.11.0
10
+ joblib>=1.1.0
11
+ openpyxl>=3.0.0
@@ -0,0 +1 @@
1
+ chrom_qsar
@@ -0,0 +1,38 @@
1
+ [build-system]
2
+ requires = ["setuptools>=61.0", "wheel"]
3
+ build-backend = "setuptools.build_meta"
4
+
5
+ [project]
6
+ name = "chrom-qsar"
7
+ version = "0.1.0"
8
+ description = "A streamlined machine learning pipeline for QSAR and regression tasks with SHAP interpretability."
9
+ readme = "README.md"
10
+ requires-python = ">=3.8"
11
+ license = {text = "MIT"}
12
+ authors = [
13
+ {name = "Your Name", email = "your.email@university.edu"}
14
+ ]
15
+ keywords = ["machine-learning", "qsar", "shap", "regression", "optuna"]
16
+ classifiers = [
17
+ "Development Status :: 3 - Alpha",
18
+ "Intended Audience :: Science/Research",
19
+ "License :: OSI Approved :: MIT License",
20
+ "Programming Language :: Python :: 3",
21
+ "Programming Language :: Python :: 3.8",
22
+ "Programming Language :: Python :: 3.9",
23
+ "Programming Language :: Python :: 3.10",
24
+ "Programming Language :: Python :: 3.11",
25
+ ]
26
+ dependencies = [
27
+ "pandas>=1.3.0",
28
+ "numpy>=1.21.0",
29
+ "scikit-learn>=1.0.0",
30
+ "xgboost>=1.5.0",
31
+ "lightgbm>=3.3.0",
32
+ "optuna>=3.0.0",
33
+ "shap>=0.41.0",
34
+ "matplotlib>=3.4.0",
35
+ "seaborn>=0.11.0",
36
+ "joblib>=1.1.0",
37
+ "openpyxl>=3.0.0"
38
+ ]
@@ -0,0 +1,4 @@
1
+ [egg_info]
2
+ tag_build =
3
+ tag_date = 0
4
+