pymethyl2sam 0.1.2__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- pymethyl2sam-0.1.2/PKG-INFO +267 -0
- pymethyl2sam-0.1.2/README.md +229 -0
- pymethyl2sam-0.1.2/pyproject.toml +114 -0
- pymethyl2sam-0.1.2/setup.cfg +4 -0
- pymethyl2sam-0.1.2/src/pymethyl2sam/__init__.py +17 -0
- pymethyl2sam-0.1.2/src/pymethyl2sam/core/__init__.py +6 -0
- pymethyl2sam-0.1.2/src/pymethyl2sam/core/errors.py +201 -0
- pymethyl2sam-0.1.2/src/pymethyl2sam/core/genomics.py +116 -0
- pymethyl2sam-0.1.2/src/pymethyl2sam/core/reference_genome.py +87 -0
- pymethyl2sam-0.1.2/src/pymethyl2sam/core/sequencing.py +221 -0
- pymethyl2sam-0.1.2/src/pymethyl2sam/io/__init__.py +5 -0
- pymethyl2sam-0.1.2/src/pymethyl2sam/io/genome_loader.py +166 -0
- pymethyl2sam-0.1.2/src/pymethyl2sam/simulator/__init__.py +15 -0
- pymethyl2sam-0.1.2/src/pymethyl2sam/simulator/simulator.py +208 -0
- pymethyl2sam-0.1.2/src/pymethyl2sam/simulator/summary.py +67 -0
- pymethyl2sam-0.1.2/src/pymethyl2sam/utils/__init__.py +6 -0
- pymethyl2sam-0.1.2/src/pymethyl2sam/utils/constants.py +55 -0
- pymethyl2sam-0.1.2/src/pymethyl2sam/utils/logging.py +60 -0
- pymethyl2sam-0.1.2/src/pymethyl2sam/utils/pysam.py +96 -0
- pymethyl2sam-0.1.2/src/pymethyl2sam.egg-info/PKG-INFO +267 -0
- pymethyl2sam-0.1.2/src/pymethyl2sam.egg-info/SOURCES.txt +29 -0
- pymethyl2sam-0.1.2/src/pymethyl2sam.egg-info/dependency_links.txt +1 -0
- pymethyl2sam-0.1.2/src/pymethyl2sam.egg-info/requires.txt +15 -0
- pymethyl2sam-0.1.2/src/pymethyl2sam.egg-info/top_level.txt +1 -0
- pymethyl2sam-0.1.2/tests/test_bam_generation.py +228 -0
- pymethyl2sam-0.1.2/tests/test_core_errors.py +163 -0
- pymethyl2sam-0.1.2/tests/test_core_genomics.py +184 -0
- pymethyl2sam-0.1.2/tests/test_core_sequencing.py +246 -0
- pymethyl2sam-0.1.2/tests/test_genome_loader.py +136 -0
- pymethyl2sam-0.1.2/tests/test_logging.py +93 -0
- pymethyl2sam-0.1.2/tests/test_simulator_summary.py +124 -0
@@ -0,0 +1,267 @@
|
|
1
|
+
Metadata-Version: 2.4
|
2
|
+
Name: pymethyl2sam
|
3
|
+
Version: 0.1.2
|
4
|
+
Summary: Simulate DNA methylation and generate synthetic NGS reads with methylation tags.
|
5
|
+
Author-email: Yoni Weissler <yoni.weissler@gmail.com>
|
6
|
+
License: MIT
|
7
|
+
Project-URL: Homepage, https://github.com/yoni-w/pymethyl2sam
|
8
|
+
Project-URL: Repository, https://github.com/yoni-w/pymethyl2sam
|
9
|
+
Project-URL: Documentation, https://pymethyl2sam.readthedocs.io
|
10
|
+
Project-URL: Issues, https://github.com/yoni-w/pymethyl2sam/issues
|
11
|
+
Classifier: Development Status :: 3 - Alpha
|
12
|
+
Classifier: Intended Audience :: Science/Research
|
13
|
+
Classifier: License :: OSI Approved :: MIT License
|
14
|
+
Classifier: Programming Language :: Python :: 3
|
15
|
+
Classifier: Programming Language :: Python :: 3.9
|
16
|
+
Classifier: Programming Language :: Python :: 3.10
|
17
|
+
Classifier: Programming Language :: Python :: 3.11
|
18
|
+
Classifier: Programming Language :: Python :: 3.12
|
19
|
+
Classifier: Topic :: Scientific/Engineering :: Bio-Informatics
|
20
|
+
Classifier: Operating System :: POSIX :: Linux
|
21
|
+
Classifier: Operating System :: MacOS :: MacOS X
|
22
|
+
Requires-Python: >=3.9
|
23
|
+
Description-Content-Type: text/markdown
|
24
|
+
Requires-Dist: pysam>=0.21.0
|
25
|
+
Requires-Dist: numpy>=1.21.0
|
26
|
+
Requires-Dist: pyyaml>=6.0
|
27
|
+
Requires-Dist: click>=8.0.0
|
28
|
+
Requires-Dist: typing-extensions>=4.0.0
|
29
|
+
Requires-Dist: importlib-metadata>=8.7.0
|
30
|
+
Provides-Extra: dev
|
31
|
+
Requires-Dist: pytest>=7.0.0; extra == "dev"
|
32
|
+
Requires-Dist: black>=23.0.0; extra == "dev"
|
33
|
+
Requires-Dist: pylint>=3.2.0; extra == "dev"
|
34
|
+
Requires-Dist: pre-commit>=3.2.0; extra == "dev"
|
35
|
+
Requires-Dist: sphinx>=6.0.0; extra == "dev"
|
36
|
+
Requires-Dist: sphinx-rtd-theme>=1.2.0; extra == "dev"
|
37
|
+
Requires-Dist: coverage[toml]>=7.0.0; extra == "dev"
|
38
|
+
|
39
|
+
# pymethyl2sam
|
40
|
+
|
41
|
+
A Python package for simulating DNA methylation and generating synthetic NGS reads with methylation tags (MM/ML) in SAM/BAM format.
|
42
|
+
|
43
|
+
## Overview
|
44
|
+
|
45
|
+
pymethyl2sam provides a comprehensive framework for simulating DNA methylation patterns and generating synthetic next-generation sequencing reads with proper methylation tags. The package supports both stochastic (random) and deterministic (pattern-based) read generation modes, making it suitable for testing methylation analysis pipelines and educational purposes.
|
46
|
+
|
47
|
+
## Features
|
48
|
+
|
49
|
+
- **Dual Read Generation Modes**: Random (stochastic) and pattern-based (deterministic) simulation
|
50
|
+
- **Flexible Methylation Modeling**: Support for fully, partially, or unmethylated sites
|
51
|
+
- **Valid SAM/BAM Output**: Generates properly formatted files with MM/ML methylation tags
|
52
|
+
- **Configurable Parameters**: Coverage, read length, error rates, strand bias
|
53
|
+
- **Zero-based Coordinates**: Consistent with bioinformatics conventions
|
54
|
+
- **Extensible Architecture**: Modular, layered design for easy extension
|
55
|
+
|
56
|
+
## Installation
|
57
|
+
|
58
|
+
### From PyPI (when available)
|
59
|
+
```bash
|
60
|
+
pip install pymethyl2sam
|
61
|
+
```
|
62
|
+
|
63
|
+
### From Source
|
64
|
+
```bash
|
65
|
+
git clone https://github.com/yoni-w/pymethyl2sam.git
|
66
|
+
cd pymethyl2sam
|
67
|
+
pip install -e .
|
68
|
+
```
|
69
|
+
|
70
|
+
### Development Installation
|
71
|
+
```bash
|
72
|
+
git clone https://github.com/yoni-w/pymethyl2sam.git
|
73
|
+
cd pymethyl2sam
|
74
|
+
pip install -e ".[dev]"
|
75
|
+
```
|
76
|
+
|
77
|
+
## Quick Start
|
78
|
+
|
79
|
+
### Basic Usage (Random Mode)
|
80
|
+
|
81
|
+
```python
|
82
|
+
from pymethyl2sam import MethylationSimulator
|
83
|
+
from pymethyl2sam.core import MethylationSite
|
84
|
+
from pymethyl2sam.core.genomics import StrandOrientation
|
85
|
+
from pymethyl2sam.core.reference_genome import Hg38ReferenceGenome
|
86
|
+
from pymethyl2sam.core.sequencing import ReadGenerator, RandomStrategy
|
87
|
+
from pymethyl2sam.simulator import SequencedChromosome, SequencedRegion
|
88
|
+
|
89
|
+
# Create simulator with random read generation
|
90
|
+
simulator = MethylationSimulator(
|
91
|
+
chromosomes=[
|
92
|
+
SequencedChromosome(
|
93
|
+
name="chr1",
|
94
|
+
length=20_000,
|
95
|
+
regions=[
|
96
|
+
SequencedRegion(
|
97
|
+
start=10100,
|
98
|
+
end=10450,
|
99
|
+
read_generator=ReadGenerator(strategy=RandomStrategy()),
|
100
|
+
)
|
101
|
+
],
|
102
|
+
cpg_sites=[
|
103
|
+
MethylationSite(position=10100, methylation_prob=1.0),
|
104
|
+
MethylationSite(position=10149, methylation_prob=1.0),
|
105
|
+
MethylationSite(position=10155, methylation_prob=0.0),
|
106
|
+
MethylationSite(position=10200, methylation_prob=0.0),
|
107
|
+
MethylationSite(position=10220, methylation_prob=1.0),
|
108
|
+
],
|
109
|
+
)
|
110
|
+
],
|
111
|
+
reference_genome=Hg38ReferenceGenome(),
|
112
|
+
)
|
113
|
+
|
114
|
+
# Generate reads and write to BAM
|
115
|
+
simulator.simulate_reads("output.bam")
|
116
|
+
```
|
117
|
+
|
118
|
+
### Pattern-Based Simulation
|
119
|
+
|
120
|
+
```python
|
121
|
+
from pymethyl2sam import MethylationSimulator
|
122
|
+
from pymethyl2sam.core import MethylationSite
|
123
|
+
from pymethyl2sam.core.genomics import StrandOrientation
|
124
|
+
from pymethyl2sam.core.reference_genome import Hg38ReferenceGenome
|
125
|
+
from pymethyl2sam.core.sequencing import ReadGenerator, PatternStrategy
|
126
|
+
from pymethyl2sam.simulator import SequencedChromosome, SequencedRegion
|
127
|
+
|
128
|
+
# Create simulator with pattern-based read generation
|
129
|
+
simulator = MethylationSimulator(
|
130
|
+
chromosomes=[
|
131
|
+
SequencedChromosome(
|
132
|
+
name="chr1",
|
133
|
+
length=20_000,
|
134
|
+
regions=[
|
135
|
+
SequencedRegion(
|
136
|
+
start=10100,
|
137
|
+
end=10250,
|
138
|
+
read_generator=ReadGenerator(
|
139
|
+
read_length=150,
|
140
|
+
strategy=PatternStrategy.from_offsets(
|
141
|
+
offsets=[0] * 10,
|
142
|
+
orientation=StrandOrientation.RANDOM,
|
143
|
+
),
|
144
|
+
),
|
145
|
+
)
|
146
|
+
],
|
147
|
+
cpg_sites=[
|
148
|
+
MethylationSite(position=10100, methylation_prob=1.0),
|
149
|
+
MethylationSite(position=10149, methylation_prob=1.0),
|
150
|
+
MethylationSite(position=10155, methylation_prob=0.0),
|
151
|
+
MethylationSite(position=10200, methylation_prob=0.0),
|
152
|
+
MethylationSite(position=10220, methylation_prob=1.0),
|
153
|
+
],
|
154
|
+
)
|
155
|
+
],
|
156
|
+
reference_genome=Hg38ReferenceGenome(),
|
157
|
+
)
|
158
|
+
|
159
|
+
# Generate reads and write to BAM
|
160
|
+
simulator.simulate_reads("pattern_output.bam")
|
161
|
+
```
|
162
|
+
|
163
|
+
## Architecture
|
164
|
+
|
165
|
+
The package is organized in layers by functional responsibility:
|
166
|
+
|
167
|
+
- **core/**: Domain logic for methylation modeling, sequencing, and errors
|
168
|
+
- **simulator/**: Orchestration of read and methylation simulation
|
169
|
+
- **io/**: Parsers and I/O for FASTA, BED, YAML, JSON
|
170
|
+
- **utils/**: Logging, constants, shared helpers
|
171
|
+
|
172
|
+
### Core Components
|
173
|
+
|
174
|
+
- `MethylationSimulator`: Main class for orchestrating simulation
|
175
|
+
- `SequencedChromosome`: Represents a chromosome with defined regions and methylation sites
|
176
|
+
- `SequencedRegion`: Defines genomic regions where reads should be generated
|
177
|
+
- `ReadGenerator`: Handles read generation with different strategies (Random/Pattern)
|
178
|
+
- `MethylationSite`: Represents individual methylation sites with probabilities
|
179
|
+
- `ReferenceGenomeProvider`: Interface for accessing reference genome sequences
|
180
|
+
|
181
|
+
## Configuration Examples
|
182
|
+
|
183
|
+
### YAML Configuration Files
|
184
|
+
|
185
|
+
See `examples/config_random.yaml` and `examples/templates_pattern.yaml` for configuration file examples.
|
186
|
+
|
187
|
+
## Coordinate System
|
188
|
+
|
189
|
+
All genomic coordinates follow zero-based, half-open intervals `[start, end)`:
|
190
|
+
|
191
|
+
- A 5-base interval starting at position 100 is `[100, 105)`
|
192
|
+
- Read coordinates and methylation positions adhere to this system
|
193
|
+
- MM/ML tags follow strand-specific SAM/BAM conventions
|
194
|
+
|
195
|
+
## Testing
|
196
|
+
|
197
|
+
Run the test suite:
|
198
|
+
|
199
|
+
```bash
|
200
|
+
pytest
|
201
|
+
```
|
202
|
+
|
203
|
+
Run with coverage:
|
204
|
+
|
205
|
+
```bash
|
206
|
+
pytest --cov=pymethyl2sam --cov-report=html
|
207
|
+
```
|
208
|
+
|
209
|
+
## Development
|
210
|
+
|
211
|
+
### Code Formatting
|
212
|
+
|
213
|
+
```bash
|
214
|
+
black src/ tests/
|
215
|
+
```
|
216
|
+
|
217
|
+
### Linting
|
218
|
+
|
219
|
+
```bash
|
220
|
+
pylint src/pymethyl2sam/
|
221
|
+
```
|
222
|
+
|
223
|
+
### Pre-commit Hooks
|
224
|
+
|
225
|
+
```bash
|
226
|
+
pre-commit install
|
227
|
+
```
|
228
|
+
|
229
|
+
## Examples
|
230
|
+
|
231
|
+
See the `examples/` directory for complete working examples:
|
232
|
+
|
233
|
+
- `simulate_cpgs.py`: Demonstrates both random and pattern-based simulation
|
234
|
+
- `config_random.yaml`: YAML configuration for random mode
|
235
|
+
- `templates_pattern.yaml`: YAML template for pattern-based simulation
|
236
|
+
|
237
|
+
## Documentation
|
238
|
+
|
239
|
+
For detailed API documentation, visit: https://pymethyl2sam.readthedocs.io
|
240
|
+
|
241
|
+
## License
|
242
|
+
|
243
|
+
This project is licensed under the MIT License - see the LICENSE file for details.
|
244
|
+
|
245
|
+
## Contributing
|
246
|
+
|
247
|
+
1. Fork the repository
|
248
|
+
2. Create a feature branch
|
249
|
+
3. Make your changes
|
250
|
+
4. Add tests for new functionality
|
251
|
+
5. Ensure all tests pass
|
252
|
+
6. Submit a pull request
|
253
|
+
|
254
|
+
## Citation
|
255
|
+
|
256
|
+
If you use pymethyl2sam in your research, please cite:
|
257
|
+
|
258
|
+
```
|
259
|
+
pymethyl2sam: A Python package for simulating DNA methylation and generating synthetic NGS reads
|
260
|
+
Yoni Weissler, 2025
|
261
|
+
```
|
262
|
+
|
263
|
+
## Support
|
264
|
+
|
265
|
+
- **Issues**: https://github.com/yoni-w/pymethyl2sam/issues
|
266
|
+
- **Documentation**: https://pymethyl2sam.readthedocs.io
|
267
|
+
- **Email**: yoni.weissler@gmail.com
|
@@ -0,0 +1,229 @@
|
|
1
|
+
# pymethyl2sam
|
2
|
+
|
3
|
+
A Python package for simulating DNA methylation and generating synthetic NGS reads with methylation tags (MM/ML) in SAM/BAM format.
|
4
|
+
|
5
|
+
## Overview
|
6
|
+
|
7
|
+
pymethyl2sam provides a comprehensive framework for simulating DNA methylation patterns and generating synthetic next-generation sequencing reads with proper methylation tags. The package supports both stochastic (random) and deterministic (pattern-based) read generation modes, making it suitable for testing methylation analysis pipelines and educational purposes.
|
8
|
+
|
9
|
+
## Features
|
10
|
+
|
11
|
+
- **Dual Read Generation Modes**: Random (stochastic) and pattern-based (deterministic) simulation
|
12
|
+
- **Flexible Methylation Modeling**: Support for fully, partially, or unmethylated sites
|
13
|
+
- **Valid SAM/BAM Output**: Generates properly formatted files with MM/ML methylation tags
|
14
|
+
- **Configurable Parameters**: Coverage, read length, error rates, strand bias
|
15
|
+
- **Zero-based Coordinates**: Consistent with bioinformatics conventions
|
16
|
+
- **Extensible Architecture**: Modular, layered design for easy extension
|
17
|
+
|
18
|
+
## Installation
|
19
|
+
|
20
|
+
### From PyPI (when available)
|
21
|
+
```bash
|
22
|
+
pip install pymethyl2sam
|
23
|
+
```
|
24
|
+
|
25
|
+
### From Source
|
26
|
+
```bash
|
27
|
+
git clone https://github.com/yoni-w/pymethyl2sam.git
|
28
|
+
cd pymethyl2sam
|
29
|
+
pip install -e .
|
30
|
+
```
|
31
|
+
|
32
|
+
### Development Installation
|
33
|
+
```bash
|
34
|
+
git clone https://github.com/yoni-w/pymethyl2sam.git
|
35
|
+
cd pymethyl2sam
|
36
|
+
pip install -e ".[dev]"
|
37
|
+
```
|
38
|
+
|
39
|
+
## Quick Start
|
40
|
+
|
41
|
+
### Basic Usage (Random Mode)
|
42
|
+
|
43
|
+
```python
|
44
|
+
from pymethyl2sam import MethylationSimulator
|
45
|
+
from pymethyl2sam.core import MethylationSite
|
46
|
+
from pymethyl2sam.core.genomics import StrandOrientation
|
47
|
+
from pymethyl2sam.core.reference_genome import Hg38ReferenceGenome
|
48
|
+
from pymethyl2sam.core.sequencing import ReadGenerator, RandomStrategy
|
49
|
+
from pymethyl2sam.simulator import SequencedChromosome, SequencedRegion
|
50
|
+
|
51
|
+
# Create simulator with random read generation
|
52
|
+
simulator = MethylationSimulator(
|
53
|
+
chromosomes=[
|
54
|
+
SequencedChromosome(
|
55
|
+
name="chr1",
|
56
|
+
length=20_000,
|
57
|
+
regions=[
|
58
|
+
SequencedRegion(
|
59
|
+
start=10100,
|
60
|
+
end=10450,
|
61
|
+
read_generator=ReadGenerator(strategy=RandomStrategy()),
|
62
|
+
)
|
63
|
+
],
|
64
|
+
cpg_sites=[
|
65
|
+
MethylationSite(position=10100, methylation_prob=1.0),
|
66
|
+
MethylationSite(position=10149, methylation_prob=1.0),
|
67
|
+
MethylationSite(position=10155, methylation_prob=0.0),
|
68
|
+
MethylationSite(position=10200, methylation_prob=0.0),
|
69
|
+
MethylationSite(position=10220, methylation_prob=1.0),
|
70
|
+
],
|
71
|
+
)
|
72
|
+
],
|
73
|
+
reference_genome=Hg38ReferenceGenome(),
|
74
|
+
)
|
75
|
+
|
76
|
+
# Generate reads and write to BAM
|
77
|
+
simulator.simulate_reads("output.bam")
|
78
|
+
```
|
79
|
+
|
80
|
+
### Pattern-Based Simulation
|
81
|
+
|
82
|
+
```python
|
83
|
+
from pymethyl2sam import MethylationSimulator
|
84
|
+
from pymethyl2sam.core import MethylationSite
|
85
|
+
from pymethyl2sam.core.genomics import StrandOrientation
|
86
|
+
from pymethyl2sam.core.reference_genome import Hg38ReferenceGenome
|
87
|
+
from pymethyl2sam.core.sequencing import ReadGenerator, PatternStrategy
|
88
|
+
from pymethyl2sam.simulator import SequencedChromosome, SequencedRegion
|
89
|
+
|
90
|
+
# Create simulator with pattern-based read generation
|
91
|
+
simulator = MethylationSimulator(
|
92
|
+
chromosomes=[
|
93
|
+
SequencedChromosome(
|
94
|
+
name="chr1",
|
95
|
+
length=20_000,
|
96
|
+
regions=[
|
97
|
+
SequencedRegion(
|
98
|
+
start=10100,
|
99
|
+
end=10250,
|
100
|
+
read_generator=ReadGenerator(
|
101
|
+
read_length=150,
|
102
|
+
strategy=PatternStrategy.from_offsets(
|
103
|
+
offsets=[0] * 10,
|
104
|
+
orientation=StrandOrientation.RANDOM,
|
105
|
+
),
|
106
|
+
),
|
107
|
+
)
|
108
|
+
],
|
109
|
+
cpg_sites=[
|
110
|
+
MethylationSite(position=10100, methylation_prob=1.0),
|
111
|
+
MethylationSite(position=10149, methylation_prob=1.0),
|
112
|
+
MethylationSite(position=10155, methylation_prob=0.0),
|
113
|
+
MethylationSite(position=10200, methylation_prob=0.0),
|
114
|
+
MethylationSite(position=10220, methylation_prob=1.0),
|
115
|
+
],
|
116
|
+
)
|
117
|
+
],
|
118
|
+
reference_genome=Hg38ReferenceGenome(),
|
119
|
+
)
|
120
|
+
|
121
|
+
# Generate reads and write to BAM
|
122
|
+
simulator.simulate_reads("pattern_output.bam")
|
123
|
+
```
|
124
|
+
|
125
|
+
## Architecture
|
126
|
+
|
127
|
+
The package is organized in layers by functional responsibility:
|
128
|
+
|
129
|
+
- **core/**: Domain logic for methylation modeling, sequencing, and errors
|
130
|
+
- **simulator/**: Orchestration of read and methylation simulation
|
131
|
+
- **io/**: Parsers and I/O for FASTA, BED, YAML, JSON
|
132
|
+
- **utils/**: Logging, constants, shared helpers
|
133
|
+
|
134
|
+
### Core Components
|
135
|
+
|
136
|
+
- `MethylationSimulator`: Main class for orchestrating simulation
|
137
|
+
- `SequencedChromosome`: Represents a chromosome with defined regions and methylation sites
|
138
|
+
- `SequencedRegion`: Defines genomic regions where reads should be generated
|
139
|
+
- `ReadGenerator`: Handles read generation with different strategies (Random/Pattern)
|
140
|
+
- `MethylationSite`: Represents individual methylation sites with probabilities
|
141
|
+
- `ReferenceGenomeProvider`: Interface for accessing reference genome sequences
|
142
|
+
|
143
|
+
## Configuration Examples
|
144
|
+
|
145
|
+
### YAML Configuration Files
|
146
|
+
|
147
|
+
See `examples/config_random.yaml` and `examples/templates_pattern.yaml` for configuration file examples.
|
148
|
+
|
149
|
+
## Coordinate System
|
150
|
+
|
151
|
+
All genomic coordinates follow zero-based, half-open intervals `[start, end)`:
|
152
|
+
|
153
|
+
- A 5-base interval starting at position 100 is `[100, 105)`
|
154
|
+
- Read coordinates and methylation positions adhere to this system
|
155
|
+
- MM/ML tags follow strand-specific SAM/BAM conventions
|
156
|
+
|
157
|
+
## Testing
|
158
|
+
|
159
|
+
Run the test suite:
|
160
|
+
|
161
|
+
```bash
|
162
|
+
pytest
|
163
|
+
```
|
164
|
+
|
165
|
+
Run with coverage:
|
166
|
+
|
167
|
+
```bash
|
168
|
+
pytest --cov=pymethyl2sam --cov-report=html
|
169
|
+
```
|
170
|
+
|
171
|
+
## Development
|
172
|
+
|
173
|
+
### Code Formatting
|
174
|
+
|
175
|
+
```bash
|
176
|
+
black src/ tests/
|
177
|
+
```
|
178
|
+
|
179
|
+
### Linting
|
180
|
+
|
181
|
+
```bash
|
182
|
+
pylint src/pymethyl2sam/
|
183
|
+
```
|
184
|
+
|
185
|
+
### Pre-commit Hooks
|
186
|
+
|
187
|
+
```bash
|
188
|
+
pre-commit install
|
189
|
+
```
|
190
|
+
|
191
|
+
## Examples
|
192
|
+
|
193
|
+
See the `examples/` directory for complete working examples:
|
194
|
+
|
195
|
+
- `simulate_cpgs.py`: Demonstrates both random and pattern-based simulation
|
196
|
+
- `config_random.yaml`: YAML configuration for random mode
|
197
|
+
- `templates_pattern.yaml`: YAML template for pattern-based simulation
|
198
|
+
|
199
|
+
## Documentation
|
200
|
+
|
201
|
+
For detailed API documentation, visit: https://pymethyl2sam.readthedocs.io
|
202
|
+
|
203
|
+
## License
|
204
|
+
|
205
|
+
This project is licensed under the MIT License - see the LICENSE file for details.
|
206
|
+
|
207
|
+
## Contributing
|
208
|
+
|
209
|
+
1. Fork the repository
|
210
|
+
2. Create a feature branch
|
211
|
+
3. Make your changes
|
212
|
+
4. Add tests for new functionality
|
213
|
+
5. Ensure all tests pass
|
214
|
+
6. Submit a pull request
|
215
|
+
|
216
|
+
## Citation
|
217
|
+
|
218
|
+
If you use pymethyl2sam in your research, please cite:
|
219
|
+
|
220
|
+
```
|
221
|
+
pymethyl2sam: A Python package for simulating DNA methylation and generating synthetic NGS reads
|
222
|
+
Yoni Weissler, 2025
|
223
|
+
```
|
224
|
+
|
225
|
+
## Support
|
226
|
+
|
227
|
+
- **Issues**: https://github.com/yoni-w/pymethyl2sam/issues
|
228
|
+
- **Documentation**: https://pymethyl2sam.readthedocs.io
|
229
|
+
- **Email**: yoni.weissler@gmail.com
|
@@ -0,0 +1,114 @@
|
|
1
|
+
[build-system]
|
2
|
+
requires = ["setuptools>=61.0", "wheel"]
|
3
|
+
build-backend = "setuptools.build_meta"
|
4
|
+
|
5
|
+
[project]
|
6
|
+
name = "pymethyl2sam"
|
7
|
+
version = "0.1.2"
|
8
|
+
description = "Simulate DNA methylation and generate synthetic NGS reads with methylation tags."
|
9
|
+
readme = "README.md"
|
10
|
+
license = { text = "MIT" }
|
11
|
+
authors = [
|
12
|
+
{ name = "Yoni Weissler", email = "yoni.weissler@gmail.com" }
|
13
|
+
]
|
14
|
+
classifiers = [
|
15
|
+
"Development Status :: 3 - Alpha",
|
16
|
+
"Intended Audience :: Science/Research",
|
17
|
+
"License :: OSI Approved :: MIT License",
|
18
|
+
"Programming Language :: Python :: 3",
|
19
|
+
"Programming Language :: Python :: 3.9",
|
20
|
+
"Programming Language :: Python :: 3.10",
|
21
|
+
"Programming Language :: Python :: 3.11",
|
22
|
+
"Programming Language :: Python :: 3.12",
|
23
|
+
"Topic :: Scientific/Engineering :: Bio-Informatics",
|
24
|
+
"Operating System :: POSIX :: Linux",
|
25
|
+
"Operating System :: MacOS :: MacOS X"
|
26
|
+
]
|
27
|
+
requires-python = ">=3.9"
|
28
|
+
dependencies = [
|
29
|
+
"pysam>=0.21.0",
|
30
|
+
"numpy>=1.21.0",
|
31
|
+
"pyyaml>=6.0",
|
32
|
+
"click>=8.0.0",
|
33
|
+
"typing-extensions>=4.0.0",
|
34
|
+
"importlib-metadata>=8.7.0"
|
35
|
+
]
|
36
|
+
|
37
|
+
[project.optional-dependencies]
|
38
|
+
dev = [
|
39
|
+
"pytest>=7.0.0",
|
40
|
+
"black>=23.0.0",
|
41
|
+
"pylint>=3.2.0",
|
42
|
+
"pre-commit>=3.2.0",
|
43
|
+
"sphinx>=6.0.0",
|
44
|
+
"sphinx-rtd-theme>=1.2.0",
|
45
|
+
"coverage[toml]>=7.0.0"
|
46
|
+
]
|
47
|
+
|
48
|
+
[project.scripts]
|
49
|
+
|
50
|
+
[project.urls]
|
51
|
+
Homepage = "https://github.com/yoni-w/pymethyl2sam"
|
52
|
+
Repository = "https://github.com/yoni-w/pymethyl2sam"
|
53
|
+
Documentation = "https://pymethyl2sam.readthedocs.io"
|
54
|
+
Issues = "https://github.com/yoni-w/pymethyl2sam/issues"
|
55
|
+
|
56
|
+
[tool.setuptools.packages.find]
|
57
|
+
where = ["src"]
|
58
|
+
|
59
|
+
[tool.black]
|
60
|
+
line-length = 88
|
61
|
+
target-version = ["py38"]
|
62
|
+
include = '\.pyi?$'
|
63
|
+
extend-exclude = '''
|
64
|
+
/(
|
65
|
+
\.eggs
|
66
|
+
| \.git
|
67
|
+
| \.mypy_cache
|
68
|
+
| \.tox
|
69
|
+
| \.venv
|
70
|
+
| build
|
71
|
+
| dist
|
72
|
+
)/
|
73
|
+
'''
|
74
|
+
|
75
|
+
[tool.pylint.messages_control]
|
76
|
+
disable = [
|
77
|
+
"C0114", # missing-module-docstring
|
78
|
+
"C0115", # missing-class-docstring
|
79
|
+
"C0116", # missing-function-docstring
|
80
|
+
"W1203" # logging-fstring-interpolation
|
81
|
+
]
|
82
|
+
|
83
|
+
[tool.pytest.ini_options]
|
84
|
+
testpaths = ["tests"]
|
85
|
+
python_files = ["test_*.py"]
|
86
|
+
python_classes = ["Test*"]
|
87
|
+
python_functions = ["test_*"]
|
88
|
+
addopts = [
|
89
|
+
"--strict-markers",
|
90
|
+
"--strict-config"
|
91
|
+
]
|
92
|
+
|
93
|
+
[tool.coverage.run]
|
94
|
+
branch = true
|
95
|
+
source = ["src/pymethyl2sam"]
|
96
|
+
omit = [
|
97
|
+
"setup_dev.py",
|
98
|
+
"examples/*",
|
99
|
+
"tests/*",
|
100
|
+
]
|
101
|
+
|
102
|
+
[tool.coverage.report]
|
103
|
+
fail_under = 90
|
104
|
+
show_missing = true
|
105
|
+
skip_covered = true
|
106
|
+
exclude_lines = [
|
107
|
+
"pragma: no cover",
|
108
|
+
"if __name__ == .__main__.:"
|
109
|
+
]
|
110
|
+
|
111
|
+
[tool.sphinx]
|
112
|
+
builder = "html"
|
113
|
+
source-dir = "docs"
|
114
|
+
build-dir = "docs/_build"
|
@@ -0,0 +1,17 @@
|
|
1
|
+
"""
|
2
|
+
PyMethyl2Sam - A Python library for methylation data processing and BAM file generation.
|
3
|
+
|
4
|
+
Copyright (c) 2025
|
5
|
+
"""
|
6
|
+
|
7
|
+
from importlib.metadata import version
|
8
|
+
|
9
|
+
__version__ = version("pymethyl2sam")
|
10
|
+
|
11
|
+
# Import core components
|
12
|
+
from .core.genomics import *
|
13
|
+
from .core.sequencing import *
|
14
|
+
from .core.errors import *
|
15
|
+
from .simulator import MethylationSimulator
|
16
|
+
|
17
|
+
__all__ = ["MethylationSimulator"]
|