pymethyl2sam 0.1.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,267 @@
1
+ Metadata-Version: 2.4
2
+ Name: pymethyl2sam
3
+ Version: 0.1.2
4
+ Summary: Simulate DNA methylation and generate synthetic NGS reads with methylation tags.
5
+ Author-email: Yoni Weissler <yoni.weissler@gmail.com>
6
+ License: MIT
7
+ Project-URL: Homepage, https://github.com/yoni-w/pymethyl2sam
8
+ Project-URL: Repository, https://github.com/yoni-w/pymethyl2sam
9
+ Project-URL: Documentation, https://pymethyl2sam.readthedocs.io
10
+ Project-URL: Issues, https://github.com/yoni-w/pymethyl2sam/issues
11
+ Classifier: Development Status :: 3 - Alpha
12
+ Classifier: Intended Audience :: Science/Research
13
+ Classifier: License :: OSI Approved :: MIT License
14
+ Classifier: Programming Language :: Python :: 3
15
+ Classifier: Programming Language :: Python :: 3.9
16
+ Classifier: Programming Language :: Python :: 3.10
17
+ Classifier: Programming Language :: Python :: 3.11
18
+ Classifier: Programming Language :: Python :: 3.12
19
+ Classifier: Topic :: Scientific/Engineering :: Bio-Informatics
20
+ Classifier: Operating System :: POSIX :: Linux
21
+ Classifier: Operating System :: MacOS :: MacOS X
22
+ Requires-Python: >=3.9
23
+ Description-Content-Type: text/markdown
24
+ Requires-Dist: pysam>=0.21.0
25
+ Requires-Dist: numpy>=1.21.0
26
+ Requires-Dist: pyyaml>=6.0
27
+ Requires-Dist: click>=8.0.0
28
+ Requires-Dist: typing-extensions>=4.0.0
29
+ Requires-Dist: importlib-metadata>=8.7.0
30
+ Provides-Extra: dev
31
+ Requires-Dist: pytest>=7.0.0; extra == "dev"
32
+ Requires-Dist: black>=23.0.0; extra == "dev"
33
+ Requires-Dist: pylint>=3.2.0; extra == "dev"
34
+ Requires-Dist: pre-commit>=3.2.0; extra == "dev"
35
+ Requires-Dist: sphinx>=6.0.0; extra == "dev"
36
+ Requires-Dist: sphinx-rtd-theme>=1.2.0; extra == "dev"
37
+ Requires-Dist: coverage[toml]>=7.0.0; extra == "dev"
38
+
39
+ # pymethyl2sam
40
+
41
+ A Python package for simulating DNA methylation and generating synthetic NGS reads with methylation tags (MM/ML) in SAM/BAM format.
42
+
43
+ ## Overview
44
+
45
+ pymethyl2sam provides a comprehensive framework for simulating DNA methylation patterns and generating synthetic next-generation sequencing reads with proper methylation tags. The package supports both stochastic (random) and deterministic (pattern-based) read generation modes, making it suitable for testing methylation analysis pipelines and educational purposes.
46
+
47
+ ## Features
48
+
49
+ - **Dual Read Generation Modes**: Random (stochastic) and pattern-based (deterministic) simulation
50
+ - **Flexible Methylation Modeling**: Support for fully, partially, or unmethylated sites
51
+ - **Valid SAM/BAM Output**: Generates properly formatted files with MM/ML methylation tags
52
+ - **Configurable Parameters**: Coverage, read length, error rates, strand bias
53
+ - **Zero-based Coordinates**: Consistent with bioinformatics conventions
54
+ - **Extensible Architecture**: Modular, layered design for easy extension
55
+
56
+ ## Installation
57
+
58
+ ### From PyPI (when available)
59
+ ```bash
60
+ pip install pymethyl2sam
61
+ ```
62
+
63
+ ### From Source
64
+ ```bash
65
+ git clone https://github.com/yoni-w/pymethyl2sam.git
66
+ cd pymethyl2sam
67
+ pip install -e .
68
+ ```
69
+
70
+ ### Development Installation
71
+ ```bash
72
+ git clone https://github.com/yoni-w/pymethyl2sam.git
73
+ cd pymethyl2sam
74
+ pip install -e ".[dev]"
75
+ ```
76
+
77
+ ## Quick Start
78
+
79
+ ### Basic Usage (Random Mode)
80
+
81
+ ```python
82
+ from pymethyl2sam import MethylationSimulator
83
+ from pymethyl2sam.core import MethylationSite
84
+ from pymethyl2sam.core.genomics import StrandOrientation
85
+ from pymethyl2sam.core.reference_genome import Hg38ReferenceGenome
86
+ from pymethyl2sam.core.sequencing import ReadGenerator, RandomStrategy
87
+ from pymethyl2sam.simulator import SequencedChromosome, SequencedRegion
88
+
89
+ # Create simulator with random read generation
90
+ simulator = MethylationSimulator(
91
+ chromosomes=[
92
+ SequencedChromosome(
93
+ name="chr1",
94
+ length=20_000,
95
+ regions=[
96
+ SequencedRegion(
97
+ start=10100,
98
+ end=10450,
99
+ read_generator=ReadGenerator(strategy=RandomStrategy()),
100
+ )
101
+ ],
102
+ cpg_sites=[
103
+ MethylationSite(position=10100, methylation_prob=1.0),
104
+ MethylationSite(position=10149, methylation_prob=1.0),
105
+ MethylationSite(position=10155, methylation_prob=0.0),
106
+ MethylationSite(position=10200, methylation_prob=0.0),
107
+ MethylationSite(position=10220, methylation_prob=1.0),
108
+ ],
109
+ )
110
+ ],
111
+ reference_genome=Hg38ReferenceGenome(),
112
+ )
113
+
114
+ # Generate reads and write to BAM
115
+ simulator.simulate_reads("output.bam")
116
+ ```
117
+
118
+ ### Pattern-Based Simulation
119
+
120
+ ```python
121
+ from pymethyl2sam import MethylationSimulator
122
+ from pymethyl2sam.core import MethylationSite
123
+ from pymethyl2sam.core.genomics import StrandOrientation
124
+ from pymethyl2sam.core.reference_genome import Hg38ReferenceGenome
125
+ from pymethyl2sam.core.sequencing import ReadGenerator, PatternStrategy
126
+ from pymethyl2sam.simulator import SequencedChromosome, SequencedRegion
127
+
128
+ # Create simulator with pattern-based read generation
129
+ simulator = MethylationSimulator(
130
+ chromosomes=[
131
+ SequencedChromosome(
132
+ name="chr1",
133
+ length=20_000,
134
+ regions=[
135
+ SequencedRegion(
136
+ start=10100,
137
+ end=10250,
138
+ read_generator=ReadGenerator(
139
+ read_length=150,
140
+ strategy=PatternStrategy.from_offsets(
141
+ offsets=[0] * 10,
142
+ orientation=StrandOrientation.RANDOM,
143
+ ),
144
+ ),
145
+ )
146
+ ],
147
+ cpg_sites=[
148
+ MethylationSite(position=10100, methylation_prob=1.0),
149
+ MethylationSite(position=10149, methylation_prob=1.0),
150
+ MethylationSite(position=10155, methylation_prob=0.0),
151
+ MethylationSite(position=10200, methylation_prob=0.0),
152
+ MethylationSite(position=10220, methylation_prob=1.0),
153
+ ],
154
+ )
155
+ ],
156
+ reference_genome=Hg38ReferenceGenome(),
157
+ )
158
+
159
+ # Generate reads and write to BAM
160
+ simulator.simulate_reads("pattern_output.bam")
161
+ ```
162
+
163
+ ## Architecture
164
+
165
+ The package is organized in layers by functional responsibility:
166
+
167
+ - **core/**: Domain logic for methylation modeling, sequencing, and errors
168
+ - **simulator/**: Orchestration of read and methylation simulation
169
+ - **io/**: Parsers and I/O for FASTA, BED, YAML, JSON
170
+ - **utils/**: Logging, constants, shared helpers
171
+
172
+ ### Core Components
173
+
174
+ - `MethylationSimulator`: Main class for orchestrating simulation
175
+ - `SequencedChromosome`: Represents a chromosome with defined regions and methylation sites
176
+ - `SequencedRegion`: Defines genomic regions where reads should be generated
177
+ - `ReadGenerator`: Handles read generation with different strategies (Random/Pattern)
178
+ - `MethylationSite`: Represents individual methylation sites with probabilities
179
+ - `ReferenceGenomeProvider`: Interface for accessing reference genome sequences
180
+
181
+ ## Configuration Examples
182
+
183
+ ### YAML Configuration Files
184
+
185
+ See `examples/config_random.yaml` and `examples/templates_pattern.yaml` for configuration file examples.
186
+
187
+ ## Coordinate System
188
+
189
+ All genomic coordinates follow zero-based, half-open intervals `[start, end)`:
190
+
191
+ - A 5-base interval starting at position 100 is `[100, 105)`
192
+ - Read coordinates and methylation positions adhere to this system
193
+ - MM/ML tags follow strand-specific SAM/BAM conventions
194
+
195
+ ## Testing
196
+
197
+ Run the test suite:
198
+
199
+ ```bash
200
+ pytest
201
+ ```
202
+
203
+ Run with coverage:
204
+
205
+ ```bash
206
+ pytest --cov=pymethyl2sam --cov-report=html
207
+ ```
208
+
209
+ ## Development
210
+
211
+ ### Code Formatting
212
+
213
+ ```bash
214
+ black src/ tests/
215
+ ```
216
+
217
+ ### Linting
218
+
219
+ ```bash
220
+ pylint src/pymethyl2sam/
221
+ ```
222
+
223
+ ### Pre-commit Hooks
224
+
225
+ ```bash
226
+ pre-commit install
227
+ ```
228
+
229
+ ## Examples
230
+
231
+ See the `examples/` directory for complete working examples:
232
+
233
+ - `simulate_cpgs.py`: Demonstrates both random and pattern-based simulation
234
+ - `config_random.yaml`: YAML configuration for random mode
235
+ - `templates_pattern.yaml`: YAML template for pattern-based simulation
236
+
237
+ ## Documentation
238
+
239
+ For detailed API documentation, visit: https://pymethyl2sam.readthedocs.io
240
+
241
+ ## License
242
+
243
+ This project is licensed under the MIT License - see the LICENSE file for details.
244
+
245
+ ## Contributing
246
+
247
+ 1. Fork the repository
248
+ 2. Create a feature branch
249
+ 3. Make your changes
250
+ 4. Add tests for new functionality
251
+ 5. Ensure all tests pass
252
+ 6. Submit a pull request
253
+
254
+ ## Citation
255
+
256
+ If you use pymethyl2sam in your research, please cite:
257
+
258
+ ```
259
+ pymethyl2sam: A Python package for simulating DNA methylation and generating synthetic NGS reads
260
+ Yoni Weissler, 2025
261
+ ```
262
+
263
+ ## Support
264
+
265
+ - **Issues**: https://github.com/yoni-w/pymethyl2sam/issues
266
+ - **Documentation**: https://pymethyl2sam.readthedocs.io
267
+ - **Email**: yoni.weissler@gmail.com
@@ -0,0 +1,19 @@
1
+ pymethyl2sam/__init__.py,sha256=i77oQcOBi5jaBcdVUKWuWut75vIk2P_7f3Iu_tQ7ZbU,389
2
+ pymethyl2sam/core/__init__.py,sha256=S1iH9qNjQKZEDq4pZ_FaS7rv5HfngChS5VI8q28sj9U,189
3
+ pymethyl2sam/core/errors.py,sha256=H-_Vk9ke-YzAuHn2FTnRtoNYgfB0ctAraeAjSqMSyIY,7400
4
+ pymethyl2sam/core/genomics.py,sha256=CPtfUkmjow-xrgR6vViGJB1s5Vnn8z9MfsCB_ReiFcE,3325
5
+ pymethyl2sam/core/reference_genome.py,sha256=XJBfz004h0-EEOaW2rLIz3uN9wFATjMBv5txT7aSmyc,3209
6
+ pymethyl2sam/core/sequencing.py,sha256=yg4WZbNccwY66FYUjzD7aSIR85fk-W_3n6N176F5apI,6619
7
+ pymethyl2sam/io/__init__.py,sha256=49n5vuex_Y6jHo-qLnPnfDwt1wx4IX2JDc7NZyrbv3s,128
8
+ pymethyl2sam/io/genome_loader.py,sha256=YE2rXnX9-K57xkxRgtG9Tc_HnkNeC_E4Lq_fAVitPKc,5231
9
+ pymethyl2sam/simulator/__init__.py,sha256=ptElyO2y3E6-QIS8cV1WgKhR_Fq3TwJoBPj0zr1Uvgw,340
10
+ pymethyl2sam/simulator/simulator.py,sha256=IWWypLK9zKvS-yCtyukNd3iZ_P_kI0eQEXqllkZN5y0,7632
11
+ pymethyl2sam/simulator/summary.py,sha256=8X0QpqPt8yKICMBRmcdz8Lp2DYkvfGh1y_2zmZeq-5Q,2501
12
+ pymethyl2sam/utils/__init__.py,sha256=5MyXuQEcjk9RfU87MD2cgUtJEJxkTS8oylT6Yr47MGQ,132
13
+ pymethyl2sam/utils/constants.py,sha256=2ldM9q1tYX78zIylrirqHT1-GXlm7kuQfmm2T5fopE8,1256
14
+ pymethyl2sam/utils/logging.py,sha256=bDUxk6cdhgjmLHiL0mWOOq72lYtYHSFWjwl0Yduk37E,1520
15
+ pymethyl2sam/utils/pysam.py,sha256=4897IPyCn4BV49QtdjcpPKgff7DF9P1fZicErHwaDEk,2834
16
+ pymethyl2sam-0.1.2.dist-info/METADATA,sha256=AUV_3J2YvJISPD3qOEovURoFMUiJE64EJfu0ZF8whAk,8579
17
+ pymethyl2sam-0.1.2.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
18
+ pymethyl2sam-0.1.2.dist-info/top_level.txt,sha256=9LCoHbvPZhQO2KrjnRLsPf2TIzz5u9VPkr1YtbDQ_TY,13
19
+ pymethyl2sam-0.1.2.dist-info/RECORD,,
@@ -0,0 +1,5 @@
1
+ Wheel-Version: 1.0
2
+ Generator: setuptools (80.9.0)
3
+ Root-Is-Purelib: true
4
+ Tag: py3-none-any
5
+
@@ -0,0 +1 @@
1
+ pymethyl2sam