jingwei 0.0.1__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
jingwei-0.0.1/PKG-INFO ADDED
@@ -0,0 +1,327 @@
1
+ Metadata-Version: 2.4
2
+ Name: jingwei
3
+ Version: 0.0.1
4
+ Summary: Proteomic data imputation framework with DMF and DCAE methods.
5
+ Author: JINGWEI Contributors
6
+ License: MIT
7
+ Keywords: imputation,proteomics,DMF,DCAE,deep-learning
8
+ Classifier: Development Status :: 3 - Alpha
9
+ Classifier: Intended Audience :: Science/Research
10
+ Classifier: License :: OSI Approved :: MIT License
11
+ Classifier: Programming Language :: Python :: 3
12
+ Classifier: Programming Language :: Python :: 3.12
13
+ Classifier: Topic :: Scientific/Engineering :: Bio-Informatics
14
+ Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
15
+ Requires-Python: >=3.12
16
+ Description-Content-Type: text/markdown
17
+ Requires-Dist: torch>=2.0.0
18
+ Requires-Dist: pytorch-lightning>=2.0.0
19
+ Requires-Dist: numpy>=1.24.0
20
+ Requires-Dist: pandas>=2.0.0
21
+ Requires-Dist: tensorboard>=2.13.0
22
+ Requires-Dist: scipy>=1.10.0
23
+ Requires-Dist: scikit-learn>=1.3.0
24
+ Provides-Extra: dev
25
+ Requires-Dist: pytest>=7.4.0; extra == "dev"
26
+ Requires-Dist: pytest-cov>=4.1.0; extra == "dev"
27
+ Requires-Dist: black>=23.0.0; extra == "dev"
28
+ Requires-Dist: flake8>=6.0.0; extra == "dev"
29
+
30
+ # JINGWEI - Proteomic Data Imputation Framework
31
+
32
+ JINGWEI is a deep learning framework for missing proteomic data imputation, supporting both **DMF (Deep Matrix Factorization)** and **DCAE (Dilated Convolutional AutoEncoder)** methods.
33
+
34
+ ## Features
35
+
36
+ - **Multiple Imputation Methods**: Support for DMF and DCAE algorithms
37
+ - **Flexible Architecture**: Configurable network architectures and hyperparameters
38
+ - **GPU Acceleration**: CUDA support with specific GPU selection
39
+ - **Comprehensive Logging**: TensorBoard integration for training monitoring
40
+ - **Early Stopping**: Prevent overfitting with configurable patience
41
+ - **Batch Processing**: Efficient batch training with customizable batch sizes
42
+
43
+ ## Installation
44
+
45
+ ### Requirements
46
+
47
+ - Python 3.12+
48
+ - CUDA-capable GPU (optional, but recommended)
49
+ It is recommended to use conda to manage the environment.
50
+ ```bash
51
+ conda create -n jingwei python=3.12
52
+ conda activate jingwei
53
+ ```
54
+
55
+ ### Dependencies
56
+
57
+ Install the package in editable mode (recommended for development):
58
+
59
+ ```bash
60
+ pip install -e .
61
+ ```
62
+
63
+ If you want runtime-only dependencies without editable install:
64
+
65
+ ```bash
66
+ pip install -r requirements.txt
67
+ ```
68
+
69
+ Optional dev dependencies:
70
+
71
+ ```bash
72
+ pip install -e .[dev]
73
+ ```
74
+
75
+ ## Usage
76
+
77
+ ### Quick Start
78
+
79
+ ```bash
80
+ # Basic usage with DMF method
81
+ python -m jingwei.train --data-path data/your_dataset.csv
82
+
83
+ # Use DCAE method with GPU 1
84
+ python -m jingwei.train --data-path data/Alzheimer.csv --method DCAE --device cuda --gpu-id 1
85
+
86
+ # Custom parameters with early stopping
87
+ python -m jingwei.train --data-path data/your_dataset.csv \
88
+ --method DMF \
89
+ --hidden-dims 512 256 128 \
90
+ --embedding-dim 128 \
91
+ --early-stopping \
92
+ --max-epochs 100
93
+ ```
94
+
95
+ ### Python API Example
96
+
97
+ ```python
98
+ import pytorch_lightning as pl
99
+
100
+ from jingwei import CSVDataset, DMFImputer, DCAEImputer
101
+
102
+ dataset = CSVDataset("data/Alzheimer.csv")
103
+
104
+ # Choose one of the two methods
105
+ model = DMFImputer(
106
+ full_data_tensor=dataset.data_normalized,
107
+ full_mask_tensor=dataset.mask,
108
+ embedding_dim=64,
109
+ hidden_dims=[256, 128],
110
+ batch_size=1024,
111
+ )
112
+
113
+ # Or use DCAE
114
+ # model = DCAEImputer(
115
+ # full_data_tensor=dataset.data_normalized,
116
+ # full_mask_tensor=dataset.mask,
117
+ # ae_dim=64,
118
+ # batch_size=1024,
119
+ # )
120
+
121
+ trainer = pl.Trainer(max_epochs=5)
122
+ trainer.fit(model)
123
+
124
+ imputed_normalized = model.get_imputed_data()
125
+ imputed = dataset.inverse_transform(imputed_normalized)
126
+ ```
127
+
128
+ ### Available Parameters
129
+
130
+ #### Required Arguments
131
+ - `--data-path PATH`: Path to input CSV file
132
+
133
+ #### Method Selection
134
+ - `--method {DMF,DCAE}`: Imputation method (default: DMF)
135
+
136
+ #### General Network Parameters
137
+ - `--hidden-dims DIMS`: Hidden layer dimensions, space-separated (default: "256 128")
138
+ - `--batch-size SIZE`: Batch size for training (default: 1024)
139
+ - `--learning-rate RATE`: Learning rate (default: 0.001)
140
+ - `--weight-decay DECAY`: Weight decay for optimizer (default: 0.00001)
141
+ - `--gradient-clip VALUE`: Gradient clipping value (default: 1.0)
142
+
143
+ #### DMF Specific Parameters
144
+ - `--embedding-dim DIM`: Embedding dimension (default: 64)
145
+
146
+ #### DCAE Specific Parameters
147
+ - `--latent-dim DIM`: Latent dimension (default: 64)
148
+ - `--num-encoder-blocks NUM`: Number of encoder blocks (default: 2)
149
+ - `--num-decoder-blocks NUM`: Number of decoder blocks (default: 2)
150
+ - `--dilation VALUE`: Dilation factor (default: 2)
151
+
152
+ #### Loss Weights
153
+ - `--mask-weight WEIGHT`: Weight for mask prediction loss (default: 0.5)
154
+ - `--reconstruction-weight WEIGHT`: Weight for reconstruction loss (default: 1.0)
155
+
156
+ #### Training Control
157
+ - `--max-epochs EPOCHS`: Maximum training epochs (default: 200)
158
+ - `--early-stopping`: Enable early stopping
159
+ - `--patience PATIENCE`: Patience for early stopping (default: 20)
160
+
161
+ #### Device Settings
162
+ - `--device {cpu,cuda,auto}`: Device to use (default: auto)
163
+ - `--gpu-id ID`: Specific GPU ID to use (0, 1, etc.)
164
+
165
+ #### Output Settings
166
+ - `--results-dir DIR`: Directory for saving results (default: ./results)
167
+ - `--log-interval INTERVAL`: Logging interval in steps (default: 50)
168
+ - `--progress-bar`: Show progress bar during training
169
+
170
+ ## Data Format
171
+
172
+ The input CSV file should have the following format:
173
+ - First row: Header (will be skipped)
174
+ - First column: Sample IDs/names (will be skipped)
175
+ - Remaining columns: Protein expression data
176
+ - Missing values: Use 0, negative values, or NaN
177
+
178
+ Example:
179
+ ```csv
180
+ Sample_ID,Protein_1,Protein_2,Protein_3,...
181
+ Sample_001,1.23,0.45,NaN,...
182
+ Sample_002,2.34,0,1.67,...
183
+ Sample_003,1.45,1.23,2.89,...
184
+ ```
185
+
186
+ ## Output Files
187
+
188
+ JINGWEI generates the following outputs in the results directory:
189
+
190
+ ```
191
+ results/
192
+ ├── checkpoints/ # Model checkpoints
193
+ ├── logs/ # TensorBoard logs
194
+ └── outputs/
195
+ └── {METHOD}_{DATASET}_{TIMESTAMP}/
196
+ ├── config.json # Training configuration
197
+ ├── imputed_data.csv # Imputed protein data
198
+ ├── training_metrics.csv # Training loss history
199
+ └── model_final.ckpt # Final trained model
200
+ ```
201
+
202
+ ## Examples
203
+
204
+ ### Example 1: DMF with Custom Architecture
205
+ ```bash
206
+ ./src/JINGWEI.sh --data-path data/Alzheimer.csv \
207
+ --method DMF \
208
+ --hidden-dims 512 256 128 64 \
209
+ --embedding-dim 128 \
210
+ --mask-weight 0.3 \
211
+ --learning-rate 0.0005 \
212
+ --max-epochs 150 \
213
+ --early-stopping \
214
+ --progress-bar
215
+ ```
216
+
217
+ ### Example 2: DCAE with GPU Acceleration
218
+ ```bash
219
+ ./src/JINGWEI.sh --data-path data/Alzheimer.csv \
220
+ --method DCAE \
221
+ --device cuda \
222
+ --gpu-id 1 \
223
+ --latent-dim 128 \
224
+ --num-encoder-blocks 3 \
225
+ --num-decoder-blocks 3 \
226
+ --dilation 4 \
227
+ --batch-size 512
228
+ ```
229
+
230
+ ### Example 3: CPU Training with Custom Output Directory
231
+ ```bash
232
+ ./src/JINGWEI.sh --data-path data/Alzheimer.csv \
233
+ --device cpu \
234
+ --results-dir ./my_results \
235
+ --max-epochs 50 \
236
+ --log-interval 10
237
+ ```
238
+
239
+ ## Method Descriptions
240
+
241
+ ### DMF (Deep Matrix Factorization)
242
+ - Uses row and column embeddings to capture latent patterns
243
+ - Suitable for collaborative filtering-style missing data
244
+ - Good for datasets with structured missing patterns
245
+
246
+ ### DCAE (Dilated Convolutional AutoEncoder)
247
+ - Uses dilated convolutions to capture long-range dependencies
248
+ - Suitable for sequential or structured protein data
249
+ - Better for complex missing data patterns
250
+
251
+ ## Monitoring Training
252
+
253
+ ### TensorBoard
254
+ ```bash
255
+ tensorboard --logdir results/logs
256
+ ```
257
+
258
+ ### Training Metrics
259
+ Monitor the following metrics:
260
+ - `train_loss`: Overall training loss
261
+ - `reconstruction_loss`: Data reconstruction quality
262
+ - `mask_loss`: Missing data pattern prediction accuracy
263
+
264
+ ## Troubleshooting
265
+
266
+ ### Common Issues
267
+
268
+ 1. **CUDA Out of Memory**
269
+ - Reduce `--batch-size`
270
+ - Use `--device cpu` for CPU training
271
+
272
+ 2. **Shape Mismatch Errors**
273
+ - Check CSV format (ensure first column is skipped)
274
+ - Verify data contains only numeric values
275
+
276
+ 3. **Slow Training**
277
+ - Use GPU acceleration with `--device cuda`
278
+ - Increase `--batch-size` if memory allows
279
+
280
+ 4. **Poor Performance**
281
+ - Adjust `--mask-weight` (try 0.1-0.8)
282
+ - Experiment with different `--hidden-dims`
283
+ - Enable `--early-stopping`
284
+
285
+ ### Getting Help
286
+
287
+ For help with parameters:
288
+ ```bash
289
+ python -m jingwei.train --help
290
+ ```
291
+
292
+ ## File Structure
293
+
294
+ ```
295
+ JINGWEI/
296
+ ├── README.md
297
+ ├── pyproject.toml
298
+ ├── requirements.txt
299
+ ├── src/
300
+ │ ├── JINGWEI.sh # Legacy training script (optional)
301
+ │ ├── train.py # Thin wrapper for package entry
302
+ │ ├── jingwei/
303
+ │ │ ├── __init__.py
304
+ │ │ ├── train.py # Package training entry
305
+ │ │ ├── datasets.py # Data loading utilities
306
+ │ │ ├── models.py # Model architectures (DMF/DCAE)
307
+ │ │ ├── dmf.py # DMF trainer
308
+ │ │ └── dcae.py # DCAE trainer
309
+ │ └── methods/ # Other baselines (not packaged)
310
+ └── data/
311
+ └── your_datasets.csv
312
+ ```
313
+
314
+
315
+ ## License
316
+
317
+ This project is licensed under the MIT License
318
+
319
+
320
+ ## Changelog
321
+
322
+ ### Version 0.0.1
323
+ - Initial release
324
+ - Support for DMF and DCAE methods
325
+ - GPU acceleration
326
+ - Comprehensive parameter configuration
327
+ - TensorBoard integration
@@ -0,0 +1,298 @@
1
+ # JINGWEI - Proteomic Data Imputation Framework
2
+
3
+ JINGWEI is a deep learning framework for missing proteomic data imputation, supporting both **DMF (Deep Matrix Factorization)** and **DCAE (Dilated Convolutional AutoEncoder)** methods.
4
+
5
+ ## Features
6
+
7
+ - **Multiple Imputation Methods**: Support for DMF and DCAE algorithms
8
+ - **Flexible Architecture**: Configurable network architectures and hyperparameters
9
+ - **GPU Acceleration**: CUDA support with specific GPU selection
10
+ - **Comprehensive Logging**: TensorBoard integration for training monitoring
11
+ - **Early Stopping**: Prevent overfitting with configurable patience
12
+ - **Batch Processing**: Efficient batch training with customizable batch sizes
13
+
14
+ ## Installation
15
+
16
+ ### Requirements
17
+
18
+ - Python 3.12+
19
+ - CUDA-capable GPU (optional, but recommended)
20
+ It is recommended to use conda to manage the environment.
21
+ ```bash
22
+ conda create -n jingwei python=3.12
23
+ conda activate jingwei
24
+ ```
25
+
26
+ ### Dependencies
27
+
28
+ Install the package in editable mode (recommended for development):
29
+
30
+ ```bash
31
+ pip install -e .
32
+ ```
33
+
34
+ If you want runtime-only dependencies without editable install:
35
+
36
+ ```bash
37
+ pip install -r requirements.txt
38
+ ```
39
+
40
+ Optional dev dependencies:
41
+
42
+ ```bash
43
+ pip install -e .[dev]
44
+ ```
45
+
46
+ ## Usage
47
+
48
+ ### Quick Start
49
+
50
+ ```bash
51
+ # Basic usage with DMF method
52
+ python -m jingwei.train --data-path data/your_dataset.csv
53
+
54
+ # Use DCAE method with GPU 1
55
+ python -m jingwei.train --data-path data/Alzheimer.csv --method DCAE --device cuda --gpu-id 1
56
+
57
+ # Custom parameters with early stopping
58
+ python -m jingwei.train --data-path data/your_dataset.csv \
59
+ --method DMF \
60
+ --hidden-dims 512 256 128 \
61
+ --embedding-dim 128 \
62
+ --early-stopping \
63
+ --max-epochs 100
64
+ ```
65
+
66
+ ### Python API Example
67
+
68
+ ```python
69
+ import pytorch_lightning as pl
70
+
71
+ from jingwei import CSVDataset, DMFImputer, DCAEImputer
72
+
73
+ dataset = CSVDataset("data/Alzheimer.csv")
74
+
75
+ # Choose one of the two methods
76
+ model = DMFImputer(
77
+ full_data_tensor=dataset.data_normalized,
78
+ full_mask_tensor=dataset.mask,
79
+ embedding_dim=64,
80
+ hidden_dims=[256, 128],
81
+ batch_size=1024,
82
+ )
83
+
84
+ # Or use DCAE
85
+ # model = DCAEImputer(
86
+ # full_data_tensor=dataset.data_normalized,
87
+ # full_mask_tensor=dataset.mask,
88
+ # ae_dim=64,
89
+ # batch_size=1024,
90
+ # )
91
+
92
+ trainer = pl.Trainer(max_epochs=5)
93
+ trainer.fit(model)
94
+
95
+ imputed_normalized = model.get_imputed_data()
96
+ imputed = dataset.inverse_transform(imputed_normalized)
97
+ ```
98
+
99
+ ### Available Parameters
100
+
101
+ #### Required Arguments
102
+ - `--data-path PATH`: Path to input CSV file
103
+
104
+ #### Method Selection
105
+ - `--method {DMF,DCAE}`: Imputation method (default: DMF)
106
+
107
+ #### General Network Parameters
108
+ - `--hidden-dims DIMS`: Hidden layer dimensions, space-separated (default: "256 128")
109
+ - `--batch-size SIZE`: Batch size for training (default: 1024)
110
+ - `--learning-rate RATE`: Learning rate (default: 0.001)
111
+ - `--weight-decay DECAY`: Weight decay for optimizer (default: 0.00001)
112
+ - `--gradient-clip VALUE`: Gradient clipping value (default: 1.0)
113
+
114
+ #### DMF Specific Parameters
115
+ - `--embedding-dim DIM`: Embedding dimension (default: 64)
116
+
117
+ #### DCAE Specific Parameters
118
+ - `--latent-dim DIM`: Latent dimension (default: 64)
119
+ - `--num-encoder-blocks NUM`: Number of encoder blocks (default: 2)
120
+ - `--num-decoder-blocks NUM`: Number of decoder blocks (default: 2)
121
+ - `--dilation VALUE`: Dilation factor (default: 2)
122
+
123
+ #### Loss Weights
124
+ - `--mask-weight WEIGHT`: Weight for mask prediction loss (default: 0.5)
125
+ - `--reconstruction-weight WEIGHT`: Weight for reconstruction loss (default: 1.0)
126
+
127
+ #### Training Control
128
+ - `--max-epochs EPOCHS`: Maximum training epochs (default: 200)
129
+ - `--early-stopping`: Enable early stopping
130
+ - `--patience PATIENCE`: Patience for early stopping (default: 20)
131
+
132
+ #### Device Settings
133
+ - `--device {cpu,cuda,auto}`: Device to use (default: auto)
134
+ - `--gpu-id ID`: Specific GPU ID to use (0, 1, etc.)
135
+
136
+ #### Output Settings
137
+ - `--results-dir DIR`: Directory for saving results (default: ./results)
138
+ - `--log-interval INTERVAL`: Logging interval in steps (default: 50)
139
+ - `--progress-bar`: Show progress bar during training
140
+
141
+ ## Data Format
142
+
143
+ The input CSV file should have the following format:
144
+ - First row: Header (will be skipped)
145
+ - First column: Sample IDs/names (will be skipped)
146
+ - Remaining columns: Protein expression data
147
+ - Missing values: Use 0, negative values, or NaN
148
+
149
+ Example:
150
+ ```csv
151
+ Sample_ID,Protein_1,Protein_2,Protein_3,...
152
+ Sample_001,1.23,0.45,NaN,...
153
+ Sample_002,2.34,0,1.67,...
154
+ Sample_003,1.45,1.23,2.89,...
155
+ ```
156
+
157
+ ## Output Files
158
+
159
+ JINGWEI generates the following outputs in the results directory:
160
+
161
+ ```
162
+ results/
163
+ ├── checkpoints/ # Model checkpoints
164
+ ├── logs/ # TensorBoard logs
165
+ └── outputs/
166
+ └── {METHOD}_{DATASET}_{TIMESTAMP}/
167
+ ├── config.json # Training configuration
168
+ ├── imputed_data.csv # Imputed protein data
169
+ ├── training_metrics.csv # Training loss history
170
+ └── model_final.ckpt # Final trained model
171
+ ```
172
+
173
+ ## Examples
174
+
175
+ ### Example 1: DMF with Custom Architecture
176
+ ```bash
177
+ ./src/JINGWEI.sh --data-path data/Alzheimer.csv \
178
+ --method DMF \
179
+ --hidden-dims 512 256 128 64 \
180
+ --embedding-dim 128 \
181
+ --mask-weight 0.3 \
182
+ --learning-rate 0.0005 \
183
+ --max-epochs 150 \
184
+ --early-stopping \
185
+ --progress-bar
186
+ ```
187
+
188
+ ### Example 2: DCAE with GPU Acceleration
189
+ ```bash
190
+ ./src/JINGWEI.sh --data-path data/Alzheimer.csv \
191
+ --method DCAE \
192
+ --device cuda \
193
+ --gpu-id 1 \
194
+ --latent-dim 128 \
195
+ --num-encoder-blocks 3 \
196
+ --num-decoder-blocks 3 \
197
+ --dilation 4 \
198
+ --batch-size 512
199
+ ```
200
+
201
+ ### Example 3: CPU Training with Custom Output Directory
202
+ ```bash
203
+ ./src/JINGWEI.sh --data-path data/Alzheimer.csv \
204
+ --device cpu \
205
+ --results-dir ./my_results \
206
+ --max-epochs 50 \
207
+ --log-interval 10
208
+ ```
209
+
210
+ ## Method Descriptions
211
+
212
+ ### DMF (Deep Matrix Factorization)
213
+ - Uses row and column embeddings to capture latent patterns
214
+ - Suitable for collaborative filtering-style missing data
215
+ - Good for datasets with structured missing patterns
216
+
217
+ ### DCAE (Dilated Convolutional AutoEncoder)
218
+ - Uses dilated convolutions to capture long-range dependencies
219
+ - Suitable for sequential or structured protein data
220
+ - Better for complex missing data patterns
221
+
222
+ ## Monitoring Training
223
+
224
+ ### TensorBoard
225
+ ```bash
226
+ tensorboard --logdir results/logs
227
+ ```
228
+
229
+ ### Training Metrics
230
+ Monitor the following metrics:
231
+ - `train_loss`: Overall training loss
232
+ - `reconstruction_loss`: Data reconstruction quality
233
+ - `mask_loss`: Missing data pattern prediction accuracy
234
+
235
+ ## Troubleshooting
236
+
237
+ ### Common Issues
238
+
239
+ 1. **CUDA Out of Memory**
240
+ - Reduce `--batch-size`
241
+ - Use `--device cpu` for CPU training
242
+
243
+ 2. **Shape Mismatch Errors**
244
+ - Check CSV format (ensure first column is skipped)
245
+ - Verify data contains only numeric values
246
+
247
+ 3. **Slow Training**
248
+ - Use GPU acceleration with `--device cuda`
249
+ - Increase `--batch-size` if memory allows
250
+
251
+ 4. **Poor Performance**
252
+ - Adjust `--mask-weight` (try 0.1-0.8)
253
+ - Experiment with different `--hidden-dims`
254
+ - Enable `--early-stopping`
255
+
256
+ ### Getting Help
257
+
258
+ For help with parameters:
259
+ ```bash
260
+ python -m jingwei.train --help
261
+ ```
262
+
263
+ ## File Structure
264
+
265
+ ```
266
+ JINGWEI/
267
+ ├── README.md
268
+ ├── pyproject.toml
269
+ ├── requirements.txt
270
+ ├── src/
271
+ │ ├── JINGWEI.sh # Legacy training script (optional)
272
+ │ ├── train.py # Thin wrapper for package entry
273
+ │ ├── jingwei/
274
+ │ │ ├── __init__.py
275
+ │ │ ├── train.py # Package training entry
276
+ │ │ ├── datasets.py # Data loading utilities
277
+ │ │ ├── models.py # Model architectures (DMF/DCAE)
278
+ │ │ ├── dmf.py # DMF trainer
279
+ │ │ └── dcae.py # DCAE trainer
280
+ │ └── methods/ # Other baselines (not packaged)
281
+ └── data/
282
+ └── your_datasets.csv
283
+ ```
284
+
285
+
286
+ ## License
287
+
288
+ This project is licensed under the MIT License
289
+
290
+
291
+ ## Changelog
292
+
293
+ ### Version 0.0.1
294
+ - Initial release
295
+ - Support for DMF and DCAE methods
296
+ - GPU acceleration
297
+ - Comprehensive parameter configuration
298
+ - TensorBoard integration
@@ -0,0 +1,50 @@
1
+ [build-system]
2
+ requires = ["setuptools>=68", "wheel"]
3
+ build-backend = "setuptools.build_meta"
4
+
5
+ [project]
6
+ name = "jingwei"
7
+ version = "0.0.1"
8
+ description = "Proteomic data imputation framework with DMF and DCAE methods."
9
+ readme = "README.md"
10
+ requires-python = ">=3.12"
11
+ license = { text = "MIT" }
12
+ authors = [
13
+ { name = "JINGWEI Contributors" }
14
+ ]
15
+ keywords = ["imputation", "proteomics", "DMF", "DCAE", "deep-learning"]
16
+ classifiers = [
17
+ "Development Status :: 3 - Alpha",
18
+ "Intended Audience :: Science/Research",
19
+ "License :: OSI Approved :: MIT License",
20
+ "Programming Language :: Python :: 3",
21
+ "Programming Language :: Python :: 3.12",
22
+ "Topic :: Scientific/Engineering :: Bio-Informatics",
23
+ "Topic :: Scientific/Engineering :: Artificial Intelligence"
24
+ ]
25
+
26
+ dependencies = [
27
+ "torch>=2.0.0",
28
+ "pytorch-lightning>=2.0.0",
29
+ "numpy>=1.24.0",
30
+ "pandas>=2.0.0",
31
+ "tensorboard>=2.13.0",
32
+ "scipy>=1.10.0",
33
+ "scikit-learn>=1.3.0"
34
+ ]
35
+
36
+ [project.optional-dependencies]
37
+ dev = [
38
+ "pytest>=7.4.0",
39
+ "pytest-cov>=4.1.0",
40
+ "black>=23.0.0",
41
+ "flake8>=6.0.0"
42
+ ]
43
+
44
+ [tool.setuptools]
45
+ package-dir = {"" = "src"}
46
+
47
+ [tool.setuptools.packages.find]
48
+ where = ["src"]
49
+ include = ["jingwei*"]
50
+
@@ -0,0 +1,4 @@
1
+ [egg_info]
2
+ tag_build =
3
+ tag_date = 0
4
+
@@ -0,0 +1,21 @@
1
+ """JINGWEI package for proteomic data imputation."""
2
+
3
+ __version__ = "0.0.1"
4
+
5
+ from .datasets import CSVDataset
6
+ from .models import DCAE, DMF
7
+ from .dcae import DCAEImputer
8
+ from .dmf import DMFImputer
9
+ from .train import ImputationTrainer, parse_arguments, main as train_main
10
+
11
+ __all__ = [
12
+ "CSVDataset",
13
+ "DCAE",
14
+ "DMF",
15
+ "DCAEImputer",
16
+ "DMFImputer",
17
+ "ImputationTrainer",
18
+ "parse_arguments",
19
+ "train_main",
20
+ "__version__",
21
+ ]