pyrion 0.1.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- pyrion-0.1.0/API_REFERENCE.md +3418 -0
- pyrion-0.1.0/LICENSE +21 -0
- pyrion-0.1.0/MANIFEST.in +13 -0
- pyrion-0.1.0/PKG-INFO +79 -0
- pyrion-0.1.0/README.md +30 -0
- pyrion-0.1.0/csrc/bed12parser.c +379 -0
- pyrion-0.1.0/csrc/chainparser.c +470 -0
- pyrion-0.1.0/csrc/faiparser.c +386 -0
- pyrion-0.1.0/csrc/fastaparser.c +501 -0
- pyrion-0.1.0/csrc/gtfparser.c +441 -0
- pyrion-0.1.0/csrc/narrowbedparser.c +324 -0
- pyrion-0.1.0/pyproject.toml +201 -0
- pyrion-0.1.0/pyrion/__init__.py +160 -0
- pyrion-0.1.0/pyrion/_bed12parser.pyi +35 -0
- pyrion-0.1.0/pyrion/_chainparser.pyi +41 -0
- pyrion-0.1.0/pyrion/_faiparser.pyi +24 -0
- pyrion-0.1.0/pyrion/_fastaparser.pyi +25 -0
- pyrion-0.1.0/pyrion/_gtfparser.pyi +15 -0
- pyrion-0.1.0/pyrion/_narrowbedparser.pyi +37 -0
- pyrion-0.1.0/pyrion/_version.py +10 -0
- pyrion-0.1.0/pyrion/config.py +221 -0
- pyrion-0.1.0/pyrion/constants.py +13 -0
- pyrion-0.1.0/pyrion/core/__init__.py +13 -0
- pyrion-0.1.0/pyrion/core/amino_acid_auxiliary.py +53 -0
- pyrion-0.1.0/pyrion/core/amino_acid_sequences.py +107 -0
- pyrion-0.1.0/pyrion/core/canonizer.py +103 -0
- pyrion-0.1.0/pyrion/core/codons.py +289 -0
- pyrion-0.1.0/pyrion/core/fai.py +90 -0
- pyrion-0.1.0/pyrion/core/gene_data.py +108 -0
- pyrion-0.1.0/pyrion/core/genes.py +527 -0
- pyrion-0.1.0/pyrion/core/genes_auxiliary.py +290 -0
- pyrion-0.1.0/pyrion/core/genome_alignment.py +223 -0
- pyrion-0.1.0/pyrion/core/genome_alignment_auxiliary.py +17 -0
- pyrion-0.1.0/pyrion/core/intervals.py +313 -0
- pyrion-0.1.0/pyrion/core/intervals_auxiliary.py +38 -0
- pyrion-0.1.0/pyrion/core/nucleotide_sequences.py +130 -0
- pyrion-0.1.0/pyrion/core/sequences_auxiliary.py +74 -0
- pyrion-0.1.0/pyrion/core/strand.py +42 -0
- pyrion-0.1.0/pyrion/core/translation.py +185 -0
- pyrion-0.1.0/pyrion/core_types.py +22 -0
- pyrion-0.1.0/pyrion/io/__init__.py +34 -0
- pyrion-0.1.0/pyrion/io/bed.py +50 -0
- pyrion-0.1.0/pyrion/io/chain.py +26 -0
- pyrion-0.1.0/pyrion/io/fai.py +50 -0
- pyrion-0.1.0/pyrion/io/fasta.py +222 -0
- pyrion-0.1.0/pyrion/io/gene_data.py +111 -0
- pyrion-0.1.0/pyrion/io/genepred.py +143 -0
- pyrion-0.1.0/pyrion/io/gtf.py +134 -0
- pyrion-0.1.0/pyrion/io/twobit.py +120 -0
- pyrion-0.1.0/pyrion/ops/__init__.py +164 -0
- pyrion-0.1.0/pyrion/ops/chain_serialization.py +178 -0
- pyrion-0.1.0/pyrion/ops/chain_slicing.py +232 -0
- pyrion-0.1.0/pyrion/ops/chains.py +351 -0
- pyrion-0.1.0/pyrion/ops/data_consistency.py +111 -0
- pyrion-0.1.0/pyrion/ops/entity_ops.py +221 -0
- pyrion-0.1.0/pyrion/ops/genes.py +104 -0
- pyrion-0.1.0/pyrion/ops/interval_collection_ops.py +220 -0
- pyrion-0.1.0/pyrion/ops/interval_ops.py +237 -0
- pyrion-0.1.0/pyrion/ops/interval_serialization.py +49 -0
- pyrion-0.1.0/pyrion/ops/interval_slicing.py +235 -0
- pyrion-0.1.0/pyrion/ops/intervals.py +176 -0
- pyrion-0.1.0/pyrion/ops/sequence_serialization.py +96 -0
- pyrion-0.1.0/pyrion/ops/transcript_serialization.py +148 -0
- pyrion-0.1.0/pyrion/ops/transcript_slicing.py +76 -0
- pyrion-0.1.0/pyrion/py.typed +2 -0
- pyrion-0.1.0/pyrion/utils/__init__.py +42 -0
- pyrion-0.1.0/pyrion/utils/amino_acid_encoding.py +318 -0
- pyrion-0.1.0/pyrion/utils/encoding.py +189 -0
- pyrion-0.1.0/pyrion/utils/numpy_utils.py +8 -0
- pyrion-0.1.0/pyrion/visualization.py +769 -0
- pyrion-0.1.0/pyrion.egg-info/PKG-INFO +79 -0
- pyrion-0.1.0/pyrion.egg-info/SOURCES.txt +89 -0
- pyrion-0.1.0/pyrion.egg-info/dependency_links.txt +1 -0
- pyrion-0.1.0/pyrion.egg-info/requires.txt +21 -0
- pyrion-0.1.0/pyrion.egg-info/top_level.txt +1 -0
- pyrion-0.1.0/setup.cfg +4 -0
- pyrion-0.1.0/setup.py +60 -0
- pyrion-0.1.0/tests/test_chain_slicing.py +548 -0
- pyrion-0.1.0/tests/test_chains.py +424 -0
- pyrion-0.1.0/tests/test_chains_comprehensive.py +414 -0
- pyrion-0.1.0/tests/test_entity_ops.py +572 -0
- pyrion-0.1.0/tests/test_fasta_io.py +827 -0
- pyrion-0.1.0/tests/test_genes_and_consistency.py +476 -0
- pyrion-0.1.0/tests/test_high_priority_modules.py +556 -0
- pyrion-0.1.0/tests/test_interval_collection_ops.py +537 -0
- pyrion-0.1.0/tests/test_interval_ops.py +566 -0
- pyrion-0.1.0/tests/test_interval_slicing.py +508 -0
- pyrion-0.1.0/tests/test_intervals.py +488 -0
- pyrion-0.1.0/tests/test_runner.py +284 -0
- pyrion-0.1.0/tests/test_sequences.py +612 -0
- pyrion-0.1.0/tests/test_serialization.py +622 -0
|
@@ -0,0 +1,3418 @@
|
|
|
1
|
+
# Pyrion API Reference
|
|
2
|
+
|
|
3
|
+
**Generated:** 2025-08-02 19:53:00
|
|
4
|
+
|
|
5
|
+
Complete API reference with full docstrings and signatures.
|
|
6
|
+
|
|
7
|
+
## Table of Contents
|
|
8
|
+
|
|
9
|
+
- [pyrion](#pyrion)
|
|
10
|
+
- [cite](#cite)
|
|
11
|
+
- [get_version](#get_version)
|
|
12
|
+
- [get_version_info](#get_version_info)
|
|
13
|
+
- [quick_start](#quick_start)
|
|
14
|
+
- [pyrion._bed12parser](#pyrion_bed12parser)
|
|
15
|
+
- [pyrion._chainparser](#pyrion_chainparser)
|
|
16
|
+
- [pyrion._faiparser](#pyrion_faiparser)
|
|
17
|
+
- [pyrion._fastaparser](#pyrion_fastaparser)
|
|
18
|
+
- [pyrion._gtfparser](#pyrion_gtfparser)
|
|
19
|
+
- [pyrion._narrowbedparser](#pyrion_narrowbedparser)
|
|
20
|
+
- [pyrion._version](#pyrion_version)
|
|
21
|
+
- [pyrion.config](#pyrionconfig)
|
|
22
|
+
- [PyrionConfig](#pyrionconfig)
|
|
23
|
+
|
|
24
|
+
---
|
|
25
|
+
|
|
26
|
+
# pyrion
|
|
27
|
+
|
|
28
|
+
Pyrion: A Fast and Efficient Bioinformatics Library for Genomic Data Processing
|
|
29
|
+
|
|
30
|
+
|
|
31
|
+
## Functions
|
|
32
|
+
|
|
33
|
+
### cite
|
|
34
|
+
|
|
35
|
+
**Signature:** `()`
|
|
36
|
+
|
|
37
|
+
Get citation information.
|
|
38
|
+
|
|
39
|
+
|
|
40
|
+
### get_version
|
|
41
|
+
|
|
42
|
+
**Signature:** `()`
|
|
43
|
+
|
|
44
|
+
Get pyrion version.
|
|
45
|
+
|
|
46
|
+
|
|
47
|
+
### get_version_info
|
|
48
|
+
|
|
49
|
+
**Signature:** `()`
|
|
50
|
+
|
|
51
|
+
Get pyrion version as tuple.
|
|
52
|
+
|
|
53
|
+
|
|
54
|
+
### quick_start
|
|
55
|
+
|
|
56
|
+
**Signature:** `()`
|
|
57
|
+
|
|
58
|
+
|
|
59
|
+
---
|
|
60
|
+
|
|
61
|
+
# pyrion._bed12parser
|
|
62
|
+
|
|
63
|
+
|
|
64
|
+
---
|
|
65
|
+
|
|
66
|
+
# pyrion._chainparser
|
|
67
|
+
|
|
68
|
+
|
|
69
|
+
---
|
|
70
|
+
|
|
71
|
+
# pyrion._faiparser
|
|
72
|
+
|
|
73
|
+
Fast FASTA indexer for generating FAI entries - Production optimized
|
|
74
|
+
|
|
75
|
+
|
|
76
|
+
---
|
|
77
|
+
|
|
78
|
+
# pyrion._fastaparser
|
|
79
|
+
|
|
80
|
+
Fast FASTA file parser with numpy integration - Production optimized
|
|
81
|
+
|
|
82
|
+
|
|
83
|
+
---
|
|
84
|
+
|
|
85
|
+
# pyrion._gtfparser
|
|
86
|
+
|
|
87
|
+
High-performance GTF parser
|
|
88
|
+
|
|
89
|
+
|
|
90
|
+
---
|
|
91
|
+
|
|
92
|
+
# pyrion._narrowbedparser
|
|
93
|
+
|
|
94
|
+
|
|
95
|
+
---
|
|
96
|
+
|
|
97
|
+
# pyrion._version
|
|
98
|
+
|
|
99
|
+
Version information for pyrion.
|
|
100
|
+
|
|
101
|
+
|
|
102
|
+
---
|
|
103
|
+
|
|
104
|
+
# pyrion.config
|
|
105
|
+
|
|
106
|
+
Global configuration for pyrion library.
|
|
107
|
+
|
|
108
|
+
|
|
109
|
+
## Classes
|
|
110
|
+
|
|
111
|
+
### PyrionConfig
|
|
112
|
+
|
|
113
|
+
Global configuration for pyrion library.
|
|
114
|
+
|
|
115
|
+
Manages parallelization settings and other global options.
|
|
116
|
+
|
|
117
|
+
**Signature:** `(self)`
|
|
118
|
+
|
|
119
|
+
#### Methods
|
|
120
|
+
|
|
121
|
+
**__init__**
|
|
122
|
+
|
|
123
|
+
*Signature:* `(self)`
|
|
124
|
+
|
|
125
|
+
Initialize self. See help(type(self)) for accurate signature.
|
|
126
|
+
|
|
127
|
+
|
|
128
|
+
**disable_parallel**
|
|
129
|
+
|
|
130
|
+
*Signature:* `(self) -> None`
|
|
131
|
+
|
|
132
|
+
Disable all parallel processing by setting max_cores to 0.
|
|
133
|
+
|
|
134
|
+
|
|
135
|
+
**enable_parallel**
|
|
136
|
+
|
|
137
|
+
*Signature:* `(self, max_cores: Optional[int] = None) -> None`
|
|
138
|
+
|
|
139
|
+
Enable parallel processing.
|
|
140
|
+
|
|
141
|
+
Args:
|
|
142
|
+
max_cores: Maximum cores to use. If None, uses default (min(available, 8)).
|
|
143
|
+
|
|
144
|
+
|
|
145
|
+
**get_optimal_processes**
|
|
146
|
+
|
|
147
|
+
*Signature:* `(self, n_items: int, max_processes: Optional[int] = None) -> int`
|
|
148
|
+
|
|
149
|
+
Determine optimal number of processes based on data size and configuration.
|
|
150
|
+
|
|
151
|
+
Args:
|
|
152
|
+
n_items: Number of items to process
|
|
153
|
+
max_processes: Override max processes for this call
|
|
154
|
+
|
|
155
|
+
Returns:
|
|
156
|
+
Optimal number of processes (0 means use sequential processing)
|
|
157
|
+
|
|
158
|
+
|
|
159
|
+
**summary**
|
|
160
|
+
|
|
161
|
+
*Signature:* `(self) -> dict`
|
|
162
|
+
|
|
163
|
+
Get a summary of current configuration.
|
|
164
|
+
|
|
165
|
+
|
|
166
|
+
#### Properties
|
|
167
|
+
|
|
168
|
+
**available_cores** -> `int`
|
|
169
|
+
|
|
170
|
+
Get the number of available CPU cores (read-only).
|
|
171
|
+
|
|
172
|
+
|
|
173
|
+
**max_cores** -> `int`
|
|
174
|
+
|
|
175
|
+
Get the maximum number of cores to use for parallel processing.
|
|
176
|
+
|
|
177
|
+
|
|
178
|
+
**min_items_for_parallel** -> `int`
|
|
179
|
+
|
|
180
|
+
Get the minimum number of items required to use parallel processing.
|
|
181
|
+
|
|
182
|
+
|
|
183
|
+
**multiprocessing_available** -> `bool`
|
|
184
|
+
|
|
185
|
+
Check if multiprocessing is available (read-only).
|
|
186
|
+
|
|
187
|
+
|
|
188
|
+
|
|
189
|
+
## Functions
|
|
190
|
+
|
|
191
|
+
### disable_parallel
|
|
192
|
+
|
|
193
|
+
**Signature:** `() -> None`
|
|
194
|
+
|
|
195
|
+
Disable all parallel processing.
|
|
196
|
+
|
|
197
|
+
|
|
198
|
+
### enable_parallel
|
|
199
|
+
|
|
200
|
+
**Signature:** `(max_cores: Optional[int] = None) -> None`
|
|
201
|
+
|
|
202
|
+
Enable parallel processing with optional core limit.
|
|
203
|
+
|
|
204
|
+
|
|
205
|
+
### get_available_cores
|
|
206
|
+
|
|
207
|
+
**Signature:** `() -> int`
|
|
208
|
+
|
|
209
|
+
Get the number of available CPU cores.
|
|
210
|
+
|
|
211
|
+
|
|
212
|
+
### get_config_summary
|
|
213
|
+
|
|
214
|
+
**Signature:** `() -> dict`
|
|
215
|
+
|
|
216
|
+
Get a summary of current configuration.
|
|
217
|
+
|
|
218
|
+
|
|
219
|
+
### get_max_cores
|
|
220
|
+
|
|
221
|
+
**Signature:** `() -> int`
|
|
222
|
+
|
|
223
|
+
Get the current maximum number of cores for parallel processing.
|
|
224
|
+
|
|
225
|
+
|
|
226
|
+
### get_min_items_for_parallel
|
|
227
|
+
|
|
228
|
+
**Signature:** `() -> int`
|
|
229
|
+
|
|
230
|
+
Get the minimum number of items required for parallel processing.
|
|
231
|
+
|
|
232
|
+
|
|
233
|
+
### is_multiprocessing_available
|
|
234
|
+
|
|
235
|
+
**Signature:** `() -> bool`
|
|
236
|
+
|
|
237
|
+
Check if multiprocessing is available.
|
|
238
|
+
|
|
239
|
+
|
|
240
|
+
### set_max_cores
|
|
241
|
+
|
|
242
|
+
**Signature:** `(cores: int) -> None`
|
|
243
|
+
|
|
244
|
+
Set the maximum number of cores to use for parallel processing.
|
|
245
|
+
|
|
246
|
+
Args:
|
|
247
|
+
cores: Number of cores to use (1 to available_cores, or 0 to disable)
|
|
248
|
+
|
|
249
|
+
|
|
250
|
+
### set_min_items_for_parallel
|
|
251
|
+
|
|
252
|
+
**Signature:** `(items: int) -> None`
|
|
253
|
+
|
|
254
|
+
Set the minimum number of items required for parallel processing.
|
|
255
|
+
|
|
256
|
+
|
|
257
|
+
---
|
|
258
|
+
|
|
259
|
+
# pyrion.constants
|
|
260
|
+
|
|
261
|
+
|
|
262
|
+
---
|
|
263
|
+
|
|
264
|
+
# pyrion.core
|
|
265
|
+
|
|
266
|
+
Core genomics data structures and types.
|
|
267
|
+
|
|
268
|
+
|
|
269
|
+
---
|
|
270
|
+
|
|
271
|
+
# pyrion.core.amino_acid_auxiliary
|
|
272
|
+
|
|
273
|
+
Auxiliary functions for amino acid sequence objects.
|
|
274
|
+
|
|
275
|
+
|
|
276
|
+
## Functions
|
|
277
|
+
|
|
278
|
+
### calculate_molecular_weight
|
|
279
|
+
|
|
280
|
+
**Signature:** `(sequence) -> float`
|
|
281
|
+
|
|
282
|
+
Calculate approximate molecular weight in Daltons.
|
|
283
|
+
|
|
284
|
+
|
|
285
|
+
### count_amino_acids_in_sequence
|
|
286
|
+
|
|
287
|
+
**Signature:** `(sequence) -> Dict[str, int]`
|
|
288
|
+
|
|
289
|
+
Count occurrences of each amino acid type (ignoring masking).
|
|
290
|
+
|
|
291
|
+
|
|
292
|
+
### get_amino_acid_composition
|
|
293
|
+
|
|
294
|
+
**Signature:** `(sequence) -> Dict[str, float]`
|
|
295
|
+
|
|
296
|
+
|
|
297
|
+
---
|
|
298
|
+
|
|
299
|
+
# pyrion.core.amino_acid_sequences
|
|
300
|
+
|
|
301
|
+
Amino acid sequence representations and storage.
|
|
302
|
+
|
|
303
|
+
|
|
304
|
+
## Classes
|
|
305
|
+
|
|
306
|
+
### AminoAcidSequence
|
|
307
|
+
|
|
308
|
+
AminoAcidSequence(data: 'np.ndarray', metadata: 'Optional[Metadata]' = None)
|
|
309
|
+
|
|
310
|
+
**Signature:** `(self, data: 'np.ndarray', metadata: 'Optional[Metadata]' = None) -> None`
|
|
311
|
+
|
|
312
|
+
#### Methods
|
|
313
|
+
|
|
314
|
+
**__init__**
|
|
315
|
+
|
|
316
|
+
*Signature:* `(self, data: 'np.ndarray', metadata: 'Optional[Metadata]' = None) -> None`
|
|
317
|
+
|
|
318
|
+
Initialize self. See help(type(self)) for accurate signature.
|
|
319
|
+
|
|
320
|
+
|
|
321
|
+
**__repr__**
|
|
322
|
+
|
|
323
|
+
*Signature:* `(self) -> 'str'`
|
|
324
|
+
|
|
325
|
+
Return repr(self).
|
|
326
|
+
|
|
327
|
+
|
|
328
|
+
**__str__**
|
|
329
|
+
|
|
330
|
+
*Signature:* `(self) -> 'str'`
|
|
331
|
+
|
|
332
|
+
Return str(self).
|
|
333
|
+
|
|
334
|
+
|
|
335
|
+
**apply_masking**
|
|
336
|
+
|
|
337
|
+
*Signature:* `(self) -> "'AminoAcidSequence'"`
|
|
338
|
+
|
|
339
|
+
|
|
340
|
+
**count_amino_acids**
|
|
341
|
+
|
|
342
|
+
*Signature:* `(self) -> 'dict'`
|
|
343
|
+
|
|
344
|
+
Count occurrences of each amino acid type (ignoring masking) using vectorized operations.
|
|
345
|
+
|
|
346
|
+
|
|
347
|
+
**find_stop_codons**
|
|
348
|
+
|
|
349
|
+
*Signature:* `(self) -> 'np.ndarray'`
|
|
350
|
+
|
|
351
|
+
Find positions of stop codons in the sequence.
|
|
352
|
+
|
|
353
|
+
|
|
354
|
+
**from_string**
|
|
355
|
+
|
|
356
|
+
*Signature:* `(sequence: 'str', metadata: 'Optional[Metadata]' = None) -> "'AminoAcidSequence'"`
|
|
357
|
+
|
|
358
|
+
|
|
359
|
+
**get_amino_acid_content**
|
|
360
|
+
|
|
361
|
+
*Signature:* `(self) -> 'dict'`
|
|
362
|
+
|
|
363
|
+
Get amino acid composition as percentages.
|
|
364
|
+
|
|
365
|
+
|
|
366
|
+
**get_gap_positions**
|
|
367
|
+
|
|
368
|
+
*Signature:* `(self) -> 'np.ndarray'`
|
|
369
|
+
|
|
370
|
+
Get boolean array indicating which positions are gaps.
|
|
371
|
+
|
|
372
|
+
|
|
373
|
+
**get_masked_positions**
|
|
374
|
+
|
|
375
|
+
*Signature:* `(self) -> 'np.ndarray'`
|
|
376
|
+
|
|
377
|
+
Get boolean array indicating which positions are masked.
|
|
378
|
+
|
|
379
|
+
|
|
380
|
+
**get_stop_positions**
|
|
381
|
+
|
|
382
|
+
*Signature:* `(self) -> 'np.ndarray'`
|
|
383
|
+
|
|
384
|
+
Get boolean array indicating which positions are stop codons.
|
|
385
|
+
|
|
386
|
+
|
|
387
|
+
**molecular_weight**
|
|
388
|
+
|
|
389
|
+
*Signature:* `(self) -> 'float'`
|
|
390
|
+
|
|
391
|
+
Calculate approximate molecular weight in Daltons.
|
|
392
|
+
|
|
393
|
+
|
|
394
|
+
**remove_gaps**
|
|
395
|
+
|
|
396
|
+
*Signature:* `(self) -> "'AminoAcidSequence'"`
|
|
397
|
+
|
|
398
|
+
|
|
399
|
+
**remove_masking**
|
|
400
|
+
|
|
401
|
+
*Signature:* `(self) -> "'AminoAcidSequence'"`
|
|
402
|
+
|
|
403
|
+
|
|
404
|
+
**reverse**
|
|
405
|
+
|
|
406
|
+
*Signature:* `(self) -> "'AminoAcidSequence'"`
|
|
407
|
+
|
|
408
|
+
|
|
409
|
+
**slice**
|
|
410
|
+
|
|
411
|
+
*Signature:* `(self, start: 'int', end: 'int') -> "'AminoAcidSequence'"`
|
|
412
|
+
|
|
413
|
+
|
|
414
|
+
**to_fasta_string**
|
|
415
|
+
|
|
416
|
+
*Signature:* `(self, width: 'int' = 80, header: 'Optional[str]' = None) -> 'str'`
|
|
417
|
+
|
|
418
|
+
|
|
419
|
+
**to_string**
|
|
420
|
+
|
|
421
|
+
*Signature:* `(self) -> 'str'`
|
|
422
|
+
|
|
423
|
+
|
|
424
|
+
|
|
425
|
+
---
|
|
426
|
+
|
|
427
|
+
# pyrion.core.canonizer
|
|
428
|
+
|
|
429
|
+
Canonizer functions for selecting canonical transcripts from transcript lists.
|
|
430
|
+
|
|
431
|
+
|
|
432
|
+
## Functions
|
|
433
|
+
|
|
434
|
+
### DEFAULT_CANONIZER
|
|
435
|
+
|
|
436
|
+
**Signature:** `(transcripts: List, **kwargs) -> Optional[str]`
|
|
437
|
+
|
|
438
|
+
Default canonizer that selects the transcript with the longest total exonic length.
|
|
439
|
+
|
|
440
|
+
|
|
441
|
+
### first_transcript_canonizer
|
|
442
|
+
|
|
443
|
+
**Signature:** `(transcripts: List, **kwargs) -> Optional[str]`
|
|
444
|
+
|
|
445
|
+
|
|
446
|
+
### longest_cds_canonizer
|
|
447
|
+
|
|
448
|
+
**Signature:** `(transcripts: List, **kwargs) -> Optional[str]`
|
|
449
|
+
|
|
450
|
+
|
|
451
|
+
### longest_isoform_canonizer
|
|
452
|
+
|
|
453
|
+
**Signature:** `(transcripts: List, **kwargs) -> Optional[str]`
|
|
454
|
+
|
|
455
|
+
Default canonizer that selects the transcript with the longest total exonic length.
|
|
456
|
+
|
|
457
|
+
|
|
458
|
+
### longest_transcript_span_canonizer
|
|
459
|
+
|
|
460
|
+
**Signature:** `(transcripts: List, **kwargs) -> Optional[str]`
|
|
461
|
+
|
|
462
|
+
|
|
463
|
+
### most_exons_canonizer
|
|
464
|
+
|
|
465
|
+
**Signature:** `(transcripts: List, **kwargs) -> Optional[str]`
|
|
466
|
+
|
|
467
|
+
|
|
468
|
+
---
|
|
469
|
+
|
|
470
|
+
# pyrion.core.codons
|
|
471
|
+
|
|
472
|
+
Codon and codon sequence representations for genomic analysis.
|
|
473
|
+
|
|
474
|
+
|
|
475
|
+
## Classes
|
|
476
|
+
|
|
477
|
+
### Codon
|
|
478
|
+
|
|
479
|
+
Codon representation holding 1-3 non-gap symbols (incomplete codons allowed).
|
|
480
|
+
|
|
481
|
+
**Signature:** `(self, symbols: numpy.ndarray, is_rna: bool = False) -> None`
|
|
482
|
+
|
|
483
|
+
#### Methods
|
|
484
|
+
|
|
485
|
+
**__init__**
|
|
486
|
+
|
|
487
|
+
*Signature:* `(self, symbols: numpy.ndarray, is_rna: bool = False) -> None`
|
|
488
|
+
|
|
489
|
+
Initialize self. See help(type(self)) for accurate signature.
|
|
490
|
+
|
|
491
|
+
|
|
492
|
+
**__repr__**
|
|
493
|
+
|
|
494
|
+
*Signature:* `(self) -> str`
|
|
495
|
+
|
|
496
|
+
Return repr(self).
|
|
497
|
+
|
|
498
|
+
|
|
499
|
+
**__str__**
|
|
500
|
+
|
|
501
|
+
*Signature:* `(self) -> str`
|
|
502
|
+
|
|
503
|
+
Return str(self).
|
|
504
|
+
|
|
505
|
+
|
|
506
|
+
**is_complete**
|
|
507
|
+
|
|
508
|
+
*Signature:* `(self) -> bool`
|
|
509
|
+
|
|
510
|
+
Check if codon has exactly 3 non-gap symbols.
|
|
511
|
+
|
|
512
|
+
|
|
513
|
+
**to_string**
|
|
514
|
+
|
|
515
|
+
*Signature:* `(self) -> str`
|
|
516
|
+
|
|
517
|
+
|
|
518
|
+
**translate**
|
|
519
|
+
|
|
520
|
+
*Signature:* `(self, translation_table=None) -> str`
|
|
521
|
+
|
|
522
|
+
|
|
523
|
+
|
|
524
|
+
### CodonSequence
|
|
525
|
+
|
|
526
|
+
Codon sequence wrapper around NucleotideSequence with codon-wise operations.
|
|
527
|
+
|
|
528
|
+
**Signature:** `(self, nucleotide_sequence)`
|
|
529
|
+
|
|
530
|
+
#### Methods
|
|
531
|
+
|
|
532
|
+
**__init__**
|
|
533
|
+
|
|
534
|
+
*Signature:* `(self, nucleotide_sequence)`
|
|
535
|
+
|
|
536
|
+
Initialize from a NucleotideSequence object.
|
|
537
|
+
|
|
538
|
+
|
|
539
|
+
**__repr__**
|
|
540
|
+
|
|
541
|
+
*Signature:* `(self) -> str`
|
|
542
|
+
|
|
543
|
+
Return repr(self).
|
|
544
|
+
|
|
545
|
+
|
|
546
|
+
**__str__**
|
|
547
|
+
|
|
548
|
+
*Signature:* `(self) -> str`
|
|
549
|
+
|
|
550
|
+
Return str(self).
|
|
551
|
+
|
|
552
|
+
|
|
553
|
+
**get_codons**
|
|
554
|
+
|
|
555
|
+
*Signature:* `(self, preserve_gaps: bool = False) -> List[pyrion.core.codons.Codon]`
|
|
556
|
+
|
|
557
|
+
|
|
558
|
+
**get_frameshift_positions**
|
|
559
|
+
|
|
560
|
+
*Signature:* `(self) -> List[Tuple[int, int]]`
|
|
561
|
+
|
|
562
|
+
|
|
563
|
+
**insert_frameshift**
|
|
564
|
+
|
|
565
|
+
*Signature:* `(self, position: int) -> None`
|
|
566
|
+
|
|
567
|
+
Insert frameshift after the Nth valid nucleotide (atgcnATGCN) (0-based).
|
|
568
|
+
|
|
569
|
+
|
|
570
|
+
**remove_frameshift**
|
|
571
|
+
|
|
572
|
+
*Signature:* `(self, position: int) -> None`
|
|
573
|
+
|
|
574
|
+
Remove frameshift after the Nth valid nucleotide (atgcnATGCN).
|
|
575
|
+
|
|
576
|
+
|
|
577
|
+
**remove_gaps**
|
|
578
|
+
|
|
579
|
+
*Signature:* `(self) -> None`
|
|
580
|
+
|
|
581
|
+
|
|
582
|
+
**to_fasta_string**
|
|
583
|
+
|
|
584
|
+
*Signature:* `(self, width: int = 80, header: Optional[str] = None) -> str`
|
|
585
|
+
|
|
586
|
+
|
|
587
|
+
**translate**
|
|
588
|
+
|
|
589
|
+
*Signature:* `(self, translation_table=None)`
|
|
590
|
+
|
|
591
|
+
|
|
592
|
+
#### Properties
|
|
593
|
+
|
|
594
|
+
**data** -> `ndarray`
|
|
595
|
+
|
|
596
|
+
Access to underlying data array.
|
|
597
|
+
|
|
598
|
+
|
|
599
|
+
|
|
600
|
+
---
|
|
601
|
+
|
|
602
|
+
# pyrion.core.fai
|
|
603
|
+
|
|
604
|
+
FASTA index (FAI) functionality for efficient random access to large FASTA files.
|
|
605
|
+
|
|
606
|
+
|
|
607
|
+
## Classes
|
|
608
|
+
|
|
609
|
+
### FaiEntry
|
|
610
|
+
|
|
611
|
+
FASTA index entry for a single sequence.
|
|
612
|
+
|
|
613
|
+
**Signature:** `(self, name: 'str', length: 'int', offset: 'int', line_bases: 'int', line_bytes: 'int') -> None`
|
|
614
|
+
|
|
615
|
+
#### Methods
|
|
616
|
+
|
|
617
|
+
**__init__**
|
|
618
|
+
|
|
619
|
+
*Signature:* `(self, name: 'str', length: 'int', offset: 'int', line_bases: 'int', line_bytes: 'int') -> None`
|
|
620
|
+
|
|
621
|
+
Initialize self. See help(type(self)) for accurate signature.
|
|
622
|
+
|
|
623
|
+
|
|
624
|
+
**__repr__**
|
|
625
|
+
|
|
626
|
+
*Signature:* `(self) -> 'str'`
|
|
627
|
+
|
|
628
|
+
Return repr(self).
|
|
629
|
+
|
|
630
|
+
|
|
631
|
+
**__str__**
|
|
632
|
+
|
|
633
|
+
*Signature:* `(self) -> 'str'`
|
|
634
|
+
|
|
635
|
+
Format as FAI file line.
|
|
636
|
+
|
|
637
|
+
|
|
638
|
+
**from_fai_line**
|
|
639
|
+
|
|
640
|
+
*Signature:* `(line: 'str') -> "'FaiEntry'"`
|
|
641
|
+
|
|
642
|
+
|
|
643
|
+
**get_sequence_end_offset**
|
|
644
|
+
|
|
645
|
+
*Signature:* `(self) -> 'int'`
|
|
646
|
+
|
|
647
|
+
|
|
648
|
+
|
|
649
|
+
### FaiStore
|
|
650
|
+
|
|
651
|
+
Container for FASTA index entries with dict interface.
|
|
652
|
+
|
|
653
|
+
**Signature:** `(self, entries: 'Optional[Dict[str, FaiEntry]]' = None)`
|
|
654
|
+
|
|
655
|
+
#### Methods
|
|
656
|
+
|
|
657
|
+
**__init__**
|
|
658
|
+
|
|
659
|
+
*Signature:* `(self, entries: 'Optional[Dict[str, FaiEntry]]' = None)`
|
|
660
|
+
|
|
661
|
+
Initialize self. See help(type(self)) for accurate signature.
|
|
662
|
+
|
|
663
|
+
|
|
664
|
+
**__repr__**
|
|
665
|
+
|
|
666
|
+
*Signature:* `(self) -> 'str'`
|
|
667
|
+
|
|
668
|
+
Return repr(self).
|
|
669
|
+
|
|
670
|
+
|
|
671
|
+
**get_total_bases**
|
|
672
|
+
|
|
673
|
+
*Signature:* `(self) -> 'int'`
|
|
674
|
+
|
|
675
|
+
|
|
676
|
+
**load_from_file**
|
|
677
|
+
|
|
678
|
+
*Signature:* `(filename: 'Union[str, Path]') -> "'FaiStore'"`
|
|
679
|
+
|
|
680
|
+
|
|
681
|
+
**save_to_file**
|
|
682
|
+
|
|
683
|
+
*Signature:* `(self, filename: 'Union[str, Path]') -> 'None'`
|
|
684
|
+
|
|
685
|
+
|
|
686
|
+
|
|
687
|
+
---
|
|
688
|
+
|
|
689
|
+
# pyrion.core.gene_data
|
|
690
|
+
|
|
691
|
+
Gene data storage with multiple mappings.
|
|
692
|
+
|
|
693
|
+
|
|
694
|
+
## Classes
|
|
695
|
+
|
|
696
|
+
### GeneData
|
|
697
|
+
|
|
698
|
+
Gene data container with optional mappings for gene-transcript relationships, biotypes, and names.
|
|
699
|
+
|
|
700
|
+
**Signature:** `(self, source_file: Optional[str] = None)`
|
|
701
|
+
|
|
702
|
+
#### Methods
|
|
703
|
+
|
|
704
|
+
**__init__**
|
|
705
|
+
|
|
706
|
+
*Signature:* `(self, source_file: Optional[str] = None)`
|
|
707
|
+
|
|
708
|
+
Initialize self. See help(type(self)) for accurate signature.
|
|
709
|
+
|
|
710
|
+
|
|
711
|
+
**__repr__**
|
|
712
|
+
|
|
713
|
+
*Signature:* `(self) -> str`
|
|
714
|
+
|
|
715
|
+
Return repr(self).
|
|
716
|
+
|
|
717
|
+
|
|
718
|
+
**add_gene_name**
|
|
719
|
+
|
|
720
|
+
*Signature:* `(self, gene_id: str, gene_name: str) -> None`
|
|
721
|
+
|
|
722
|
+
|
|
723
|
+
**add_gene_transcript_mapping**
|
|
724
|
+
|
|
725
|
+
*Signature:* `(self, gene_id: str, transcript_id: str) -> None`
|
|
726
|
+
|
|
727
|
+
|
|
728
|
+
**add_transcript_biotype**
|
|
729
|
+
|
|
730
|
+
*Signature:* `(self, transcript_id: str, biotype: str) -> None`
|
|
731
|
+
|
|
732
|
+
|
|
733
|
+
**get_biotype_count**
|
|
734
|
+
|
|
735
|
+
*Signature:* `(self) -> int`
|
|
736
|
+
|
|
737
|
+
|
|
738
|
+
**get_gene**
|
|
739
|
+
|
|
740
|
+
*Signature:* `(self, transcript_id: str) -> Optional[str]`
|
|
741
|
+
|
|
742
|
+
|
|
743
|
+
**get_gene_name**
|
|
744
|
+
|
|
745
|
+
*Signature:* `(self, gene_id: str) -> Optional[str]`
|
|
746
|
+
|
|
747
|
+
|
|
748
|
+
**get_gene_name_count**
|
|
749
|
+
|
|
750
|
+
*Signature:* `(self) -> int`
|
|
751
|
+
|
|
752
|
+
|
|
753
|
+
**get_gene_transcript_count**
|
|
754
|
+
|
|
755
|
+
*Signature:* `(self) -> int`
|
|
756
|
+
|
|
757
|
+
|
|
758
|
+
**get_genes_by_name**
|
|
759
|
+
|
|
760
|
+
*Signature:* `(self, gene_name: str) -> Set[str]`
|
|
761
|
+
|
|
762
|
+
|
|
763
|
+
**get_transcript_biotype**
|
|
764
|
+
|
|
765
|
+
*Signature:* `(self, transcript_id: str) -> Optional[str]`
|
|
766
|
+
|
|
767
|
+
|
|
768
|
+
**get_transcripts**
|
|
769
|
+
|
|
770
|
+
*Signature:* `(self, gene_id: str) -> Set[str]`
|
|
771
|
+
|
|
772
|
+
|
|
773
|
+
**has_biotype_mapping**
|
|
774
|
+
|
|
775
|
+
*Signature:* `(self) -> bool`
|
|
776
|
+
|
|
777
|
+
|
|
778
|
+
**has_gene**
|
|
779
|
+
|
|
780
|
+
*Signature:* `(self, gene_id: str) -> bool`
|
|
781
|
+
|
|
782
|
+
|
|
783
|
+
**has_gene_name_mapping**
|
|
784
|
+
|
|
785
|
+
*Signature:* `(self) -> bool`
|
|
786
|
+
|
|
787
|
+
|
|
788
|
+
**has_gene_transcript_mapping**
|
|
789
|
+
|
|
790
|
+
*Signature:* `(self) -> bool`
|
|
791
|
+
|
|
792
|
+
|
|
793
|
+
**has_transcript**
|
|
794
|
+
|
|
795
|
+
*Signature:* `(self, transcript_id: str) -> bool`
|
|
796
|
+
|
|
797
|
+
|
|
798
|
+
**summary**
|
|
799
|
+
|
|
800
|
+
*Signature:* `(self) -> str`
|
|
801
|
+
|
|
802
|
+
|
|
803
|
+
#### Properties
|
|
804
|
+
|
|
805
|
+
**gene_ids** -> `Set`
|
|
806
|
+
|
|
807
|
+
|
|
808
|
+
**transcript_ids** -> `Set`
|
|
809
|
+
|
|
810
|
+
|
|
811
|
+
|
|
812
|
+
---
|
|
813
|
+
|
|
814
|
+
# pyrion.core.genes
|
|
815
|
+
|
|
816
|
+
Gene and transcript representations.
|
|
817
|
+
|
|
818
|
+
|
|
819
|
+
## Classes
|
|
820
|
+
|
|
821
|
+
### Gene
|
|
822
|
+
|
|
823
|
+
Gene containing multiple transcripts with computed genomic bounds.
|
|
824
|
+
|
|
825
|
+
**Signature:** `(self, gene_id: str, transcripts: List[pyrion.core.genes.Transcript], gene_name: Optional[str] = None)`
|
|
826
|
+
|
|
827
|
+
#### Methods
|
|
828
|
+
|
|
829
|
+
**__init__**
|
|
830
|
+
|
|
831
|
+
*Signature:* `(self, gene_id: str, transcripts: List[pyrion.core.genes.Transcript], gene_name: Optional[str] = None)`
|
|
832
|
+
|
|
833
|
+
Initialize self. See help(type(self)) for accurate signature.
|
|
834
|
+
|
|
835
|
+
|
|
836
|
+
**__repr__**
|
|
837
|
+
|
|
838
|
+
*Signature:* `(self) -> str`
|
|
839
|
+
|
|
840
|
+
Return repr(self).
|
|
841
|
+
|
|
842
|
+
|
|
843
|
+
**apply_canonizer**
|
|
844
|
+
|
|
845
|
+
*Signature:* `(self, canonizer_func: Optional[Callable] = None, **kwargs) -> None`
|
|
846
|
+
|
|
847
|
+
Set the canonical transcript using a canonizer function.
|
|
848
|
+
|
|
849
|
+
Args:
|
|
850
|
+
canonizer_func: Function that takes transcripts list and returns canonical transcript ID.
|
|
851
|
+
If None, uses the default longest_isoform_canonizer.
|
|
852
|
+
**kwargs: Additional arguments passed to the canonizer function.
|
|
853
|
+
|
|
854
|
+
|
|
855
|
+
**clear_canonical_transcript**
|
|
856
|
+
|
|
857
|
+
*Signature:* `(self) -> None`
|
|
858
|
+
|
|
859
|
+
|
|
860
|
+
**get_transcript**
|
|
861
|
+
|
|
862
|
+
*Signature:* `(self, transcript_id: str) -> Optional[pyrion.core.genes.Transcript]`
|
|
863
|
+
|
|
864
|
+
|
|
865
|
+
**has_transcript**
|
|
866
|
+
|
|
867
|
+
*Signature:* `(self, transcript_id: str) -> bool`
|
|
868
|
+
|
|
869
|
+
|
|
870
|
+
**set_canonical_transcript**
|
|
871
|
+
|
|
872
|
+
*Signature:* `(self, transcript_id: str) -> None`
|
|
873
|
+
|
|
874
|
+
|
|
875
|
+
#### Properties
|
|
876
|
+
|
|
877
|
+
**canonical_transcript** -> `Optional`
|
|
878
|
+
|
|
879
|
+
|
|
880
|
+
**canonical_transcript_id** -> `Optional`
|
|
881
|
+
|
|
882
|
+
|
|
883
|
+
**chrom** -> `str`
|
|
884
|
+
|
|
885
|
+
|
|
886
|
+
**has_canonical_transcript** -> `bool`
|
|
887
|
+
|
|
888
|
+
|
|
889
|
+
**is_coding** -> `bool`
|
|
890
|
+
|
|
891
|
+
Check if gene has any coding transcripts.
|
|
892
|
+
|
|
893
|
+
|
|
894
|
+
**length** -> `int`
|
|
895
|
+
|
|
896
|
+
|
|
897
|
+
**strand** -> `Strand`
|
|
898
|
+
|
|
899
|
+
|
|
900
|
+
**transcript_ids** -> `Set`
|
|
901
|
+
|
|
902
|
+
|
|
903
|
+
**transcripts** -> `List`
|
|
904
|
+
|
|
905
|
+
|
|
906
|
+
|
|
907
|
+
### Transcript
|
|
908
|
+
|
|
909
|
+
Transcript(blocks: numpy.ndarray, strand: pyrion.core.strand.Strand, chrom: str, id: str, cds_start: Optional[int] = None, cds_end: Optional[int] = None, biotype: Optional[str] = None)
|
|
910
|
+
|
|
911
|
+
**Signature:** `(self, blocks: numpy.ndarray, strand: pyrion.core.strand.Strand, chrom: str, id: str, cds_start: Optional[int] = None, cds_end: Optional[int] = None, biotype: Optional[str] = None) -> None`
|
|
912
|
+
|
|
913
|
+
#### Methods
|
|
914
|
+
|
|
915
|
+
**__init__**
|
|
916
|
+
|
|
917
|
+
*Signature:* `(self, blocks: numpy.ndarray, strand: pyrion.core.strand.Strand, chrom: str, id: str, cds_start: Optional[int] = None, cds_end: Optional[int] = None, biotype: Optional[str] = None) -> None`
|
|
918
|
+
|
|
919
|
+
Initialize self. See help(type(self)) for accurate signature.
|
|
920
|
+
|
|
921
|
+
|
|
922
|
+
**__repr__**
|
|
923
|
+
|
|
924
|
+
*Signature:* `(self) -> str`
|
|
925
|
+
|
|
926
|
+
Return repr(self).
|
|
927
|
+
|
|
928
|
+
|
|
929
|
+
**__str__**
|
|
930
|
+
|
|
931
|
+
*Signature:* `(self) -> str`
|
|
932
|
+
|
|
933
|
+
Return str(self).
|
|
934
|
+
|
|
935
|
+
|
|
936
|
+
**compute_flanks**
|
|
937
|
+
|
|
938
|
+
*Signature:* `(self, flank_size: int, chrom_sizes: Dict[str, int]) -> Tuple[Optional[pyrion.core.intervals.GenomicInterval], Optional[pyrion.core.intervals.GenomicInterval]]`
|
|
939
|
+
|
|
940
|
+
|
|
941
|
+
**contains_interval**
|
|
942
|
+
|
|
943
|
+
*Signature:* `(self, interval: pyrion.core.intervals.GenomicInterval) -> bool`
|
|
944
|
+
|
|
945
|
+
|
|
946
|
+
**get_annotated_regions**
|
|
947
|
+
|
|
948
|
+
*Signature:* `(self, chrom_sizes: dict, flank_size: int = 5000) -> pyrion.core.intervals.AnnotatedIntervalSet`
|
|
949
|
+
|
|
950
|
+
|
|
951
|
+
**splice_junctions**
|
|
952
|
+
|
|
953
|
+
*Signature:* `(self)`
|
|
954
|
+
|
|
955
|
+
Generator yielding splice junction coordinates (donor, acceptor) for transcript.
|
|
956
|
+
|
|
957
|
+
|
|
958
|
+
#### Properties
|
|
959
|
+
|
|
960
|
+
**is_coding** -> `bool`
|
|
961
|
+
|
|
962
|
+
|
|
963
|
+
|
|
964
|
+
### TranscriptsCollection
|
|
965
|
+
|
|
966
|
+
Container for many transcripts.
|
|
967
|
+
|
|
968
|
+
**Signature:** `(self, transcripts: Optional[List[pyrion.core.genes.Transcript]] = None, source_file: Optional[str] = None)`
|
|
969
|
+
|
|
970
|
+
#### Methods
|
|
971
|
+
|
|
972
|
+
**__init__**
|
|
973
|
+
|
|
974
|
+
*Signature:* `(self, transcripts: Optional[List[pyrion.core.genes.Transcript]] = None, source_file: Optional[str] = None)`
|
|
975
|
+
|
|
976
|
+
Initialize self. See help(type(self)) for accurate signature.
|
|
977
|
+
|
|
978
|
+
|
|
979
|
+
**__repr__**
|
|
980
|
+
|
|
981
|
+
*Signature:* `(self) -> str`
|
|
982
|
+
|
|
983
|
+
Return repr(self).
|
|
984
|
+
|
|
985
|
+
|
|
986
|
+
**__str__**
|
|
987
|
+
|
|
988
|
+
*Signature:* `(self) -> str`
|
|
989
|
+
|
|
990
|
+
Return str(self).
|
|
991
|
+
|
|
992
|
+
|
|
993
|
+
**apply_gene_canonical_mapping**
|
|
994
|
+
|
|
995
|
+
*Signature:* `(self, gene_to_canonical: Dict[str, str]) -> None`
|
|
996
|
+
|
|
997
|
+
gene_to_canonical: Dictionary mapping gene IDs to canonical transcript IDs
|
|
998
|
+
|
|
999
|
+
|
|
1000
|
+
**bind_gene_data**
|
|
1001
|
+
|
|
1002
|
+
*Signature:* `(self, gene_data: 'GeneData') -> None`
|
|
1003
|
+
|
|
1004
|
+
|
|
1005
|
+
**canonize_transcripts**
|
|
1006
|
+
|
|
1007
|
+
*Signature:* `(self, canonizer_func: Optional[Callable] = None, **kwargs) -> None`
|
|
1008
|
+
|
|
1009
|
+
|
|
1010
|
+
**from_json**
|
|
1011
|
+
|
|
1012
|
+
*Signature:* `(file_path: Union[str, pathlib.Path]) -> 'TranscriptsCollection'`
|
|
1013
|
+
|
|
1014
|
+
|
|
1015
|
+
**get_all_chromosomes**
|
|
1016
|
+
|
|
1017
|
+
*Signature:* `(self) -> List[str]`
|
|
1018
|
+
|
|
1019
|
+
|
|
1020
|
+
**get_by_chrom**
|
|
1021
|
+
|
|
1022
|
+
*Signature:* `(self, chrom: str) -> List[pyrion.core.genes.Transcript]`
|
|
1023
|
+
|
|
1024
|
+
|
|
1025
|
+
**get_by_gene_name**
|
|
1026
|
+
|
|
1027
|
+
*Signature:* `(self, gene_name: str) -> List[pyrion.core.genes.Gene]`
|
|
1028
|
+
|
|
1029
|
+
Get Gene objects by gene name. Multiple genes can have the same name.
|
|
1030
|
+
|
|
1031
|
+
|
|
1032
|
+
**get_by_id**
|
|
1033
|
+
|
|
1034
|
+
*Signature:* `(self, transcript_id: str) -> Optional[pyrion.core.genes.Transcript]`
|
|
1035
|
+
|
|
1036
|
+
|
|
1037
|
+
**get_canonical_transcripts**
|
|
1038
|
+
|
|
1039
|
+
*Signature:* `(self) -> 'TranscriptsCollection'`
|
|
1040
|
+
|
|
1041
|
+
|
|
1042
|
+
**get_gene_by_id**
|
|
1043
|
+
|
|
1044
|
+
*Signature:* `(self, gene_id: str) -> Optional[pyrion.core.genes.Gene]`
|
|
1045
|
+
|
|
1046
|
+
|
|
1047
|
+
**get_gene_by_transcript_id**
|
|
1048
|
+
|
|
1049
|
+
*Signature:* `(self, transcript_id: str) -> Optional[pyrion.core.genes.Gene]`
|
|
1050
|
+
|
|
1051
|
+
|
|
1052
|
+
**get_genes_without_canonical_transcript**
|
|
1053
|
+
|
|
1054
|
+
*Signature:* `(self) -> List[pyrion.core.genes.Gene]`
|
|
1055
|
+
|
|
1056
|
+
|
|
1057
|
+
**get_transcript_ids_by_chrom**
|
|
1058
|
+
|
|
1059
|
+
*Signature:* `(self, chrom: str) -> List[str]`
|
|
1060
|
+
|
|
1061
|
+
|
|
1062
|
+
**get_transcripts_in_interval**
|
|
1063
|
+
|
|
1064
|
+
*Signature:* `(self, interval: pyrion.core.intervals.GenomicInterval, include_partial: bool = True) -> 'TranscriptsCollection'`
|
|
1065
|
+
|
|
1066
|
+
|
|
1067
|
+
**save_to_bed12**
|
|
1068
|
+
|
|
1069
|
+
*Signature:* `(self, file_path: Union[str, pathlib.Path]) -> None`
|
|
1070
|
+
|
|
1071
|
+
|
|
1072
|
+
**save_to_json**
|
|
1073
|
+
|
|
1074
|
+
*Signature:* `(self, file_path: Union[str, pathlib.Path]) -> None`
|
|
1075
|
+
|
|
1076
|
+
|
|
1077
|
+
**summary**
|
|
1078
|
+
|
|
1079
|
+
*Signature:* `(self) -> str`
|
|
1080
|
+
|
|
1081
|
+
|
|
1082
|
+
**to_bed12_string**
|
|
1083
|
+
|
|
1084
|
+
*Signature:* `(self) -> str`
|
|
1085
|
+
|
|
1086
|
+
|
|
1087
|
+
#### Properties
|
|
1088
|
+
|
|
1089
|
+
**applied_biotypes** -> `bool`
|
|
1090
|
+
|
|
1091
|
+
|
|
1092
|
+
**applied_gene_names** -> `bool`
|
|
1093
|
+
|
|
1094
|
+
|
|
1095
|
+
**available_data_mappings** -> `List`
|
|
1096
|
+
|
|
1097
|
+
|
|
1098
|
+
**gene_ids** -> `Set`
|
|
1099
|
+
|
|
1100
|
+
|
|
1101
|
+
**genes** -> `List`
|
|
1102
|
+
|
|
1103
|
+
|
|
1104
|
+
**has_gene_mapping** -> `bool`
|
|
1105
|
+
|
|
1106
|
+
|
|
1107
|
+
|
|
1108
|
+
---
|
|
1109
|
+
|
|
1110
|
+
# pyrion.core.genes_auxiliary
|
|
1111
|
+
|
|
1112
|
+
Auxiliary functions for gene and transcript objects.
|
|
1113
|
+
|
|
1114
|
+
|
|
1115
|
+
## Functions
|
|
1116
|
+
|
|
1117
|
+
### build_annotated_regions
|
|
1118
|
+
|
|
1119
|
+
**Signature:** `(transcript, chrom_sizes: dict, flank_size: int = 5000) -> pyrion.core.intervals.AnnotatedIntervalSet`
|
|
1120
|
+
|
|
1121
|
+
|
|
1122
|
+
### compute_flanks
|
|
1123
|
+
|
|
1124
|
+
**Signature:** `(transcript, flank_size: int, chrom_sizes: Dict[str, int]) -> Tuple[Optional[pyrion.core.intervals.GenomicInterval], Optional[pyrion.core.intervals.GenomicInterval]]`
|
|
1125
|
+
|
|
1126
|
+
Get flanking regions of specified size around a transcript.
|
|
1127
|
+
|
|
1128
|
+
|
|
1129
|
+
### filter_transcripts_in_interval
|
|
1130
|
+
|
|
1131
|
+
**Signature:** `(transcripts_collection, interval: pyrion.core.intervals.GenomicInterval, include_partial: bool = True)`
|
|
1132
|
+
|
|
1133
|
+
Filter transcripts that are within or intersect with a genomic interval.
|
|
1134
|
+
|
|
1135
|
+
|
|
1136
|
+
### get_canonical_transcripts_from_collection
|
|
1137
|
+
|
|
1138
|
+
**Signature:** `(transcripts_collection, canonizer_func: Optional[Callable] = None, **kwargs)`
|
|
1139
|
+
|
|
1140
|
+
Get a new collection containing only canonical transcripts.
|
|
1141
|
+
|
|
1142
|
+
|
|
1143
|
+
### get_canonical_transcripts_only_from_collection
|
|
1144
|
+
|
|
1145
|
+
**Signature:** `(transcripts_collection)`
|
|
1146
|
+
|
|
1147
|
+
Get a new collection containing only already-set canonical transcripts.
|
|
1148
|
+
|
|
1149
|
+
|
|
1150
|
+
### get_cds_blocks
|
|
1151
|
+
|
|
1152
|
+
**Signature:** `(transcript) -> numpy.ndarray`
|
|
1153
|
+
|
|
1154
|
+
Get CDS blocks from transcript using slice operations.
|
|
1155
|
+
|
|
1156
|
+
|
|
1157
|
+
### get_genes_with_canonical_transcripts_from_collection
|
|
1158
|
+
|
|
1159
|
+
**Signature:** `(transcripts_collection) -> List`
|
|
1160
|
+
|
|
1161
|
+
Get all genes that have canonical transcripts set.
|
|
1162
|
+
|
|
1163
|
+
|
|
1164
|
+
### get_left_utr_blocks
|
|
1165
|
+
|
|
1166
|
+
**Signature:** `(transcript) -> numpy.ndarray`
|
|
1167
|
+
|
|
1168
|
+
Get UTR blocks to the left of CDS (chromosomally before CDS start).
|
|
1169
|
+
|
|
1170
|
+
|
|
1171
|
+
### get_right_utr_blocks
|
|
1172
|
+
|
|
1173
|
+
**Signature:** `(transcript) -> numpy.ndarray`
|
|
1174
|
+
|
|
1175
|
+
Get UTR blocks to the right of CDS (chromosomally after CDS end).
|
|
1176
|
+
|
|
1177
|
+
|
|
1178
|
+
### get_transcript_cds_interval
|
|
1179
|
+
|
|
1180
|
+
**Signature:** `(transcript) -> Optional[pyrion.core.intervals.GenomicInterval]`
|
|
1181
|
+
|
|
1182
|
+
Get genomic interval spanning the CDS region.
|
|
1183
|
+
|
|
1184
|
+
|
|
1185
|
+
### get_transcript_interval
|
|
1186
|
+
|
|
1187
|
+
**Signature:** `(transcript) -> pyrion.core.intervals.GenomicInterval`
|
|
1188
|
+
|
|
1189
|
+
Get genomic interval spanning the entire transcript.
|
|
1190
|
+
|
|
1191
|
+
|
|
1192
|
+
### get_utr3_blocks
|
|
1193
|
+
|
|
1194
|
+
**Signature:** `(transcript) -> numpy.ndarray`
|
|
1195
|
+
|
|
1196
|
+
|
|
1197
|
+
### get_utr5_blocks
|
|
1198
|
+
|
|
1199
|
+
**Signature:** `(transcript) -> numpy.ndarray`
|
|
1200
|
+
|
|
1201
|
+
|
|
1202
|
+
### set_canonical_transcripts_for_collection
|
|
1203
|
+
|
|
1204
|
+
**Signature:** `(transcripts_collection, canonizer_func: Optional[Callable] = None, **kwargs) -> None`
|
|
1205
|
+
|
|
1206
|
+
Set canonical transcripts for all genes in a collection using a canonizer function.
|
|
1207
|
+
|
|
1208
|
+
|
|
1209
|
+
---
|
|
1210
|
+
|
|
1211
|
+
# pyrion.core.genome_alignment
|
|
1212
|
+
|
|
1213
|
+
|
|
1214
|
+
## Classes
|
|
1215
|
+
|
|
1216
|
+
### GenomeAlignment
|
|
1217
|
+
|
|
1218
|
+
GenomeAlignment(chain_id: int, score: int, t_chrom: str, t_strand: int, t_size: int, q_chrom: str, q_strand: int, q_size: int, blocks: numpy.ndarray, child_id: Optional[int] = None)
|
|
1219
|
+
|
|
1220
|
+
**Signature:** `(self, chain_id: int, score: int, t_chrom: str, t_strand: int, t_size: int, q_chrom: str, q_strand: int, q_size: int, blocks: numpy.ndarray, child_id: Optional[int] = None) -> None`
|
|
1221
|
+
|
|
1222
|
+
#### Methods
|
|
1223
|
+
|
|
1224
|
+
**__init__**
|
|
1225
|
+
|
|
1226
|
+
*Signature:* `(self, chain_id: int, score: int, t_chrom: str, t_strand: int, t_size: int, q_chrom: str, q_strand: int, q_size: int, blocks: numpy.ndarray, child_id: Optional[int] = None) -> None`
|
|
1227
|
+
|
|
1228
|
+
Initialize self. See help(type(self)) for accurate signature.
|
|
1229
|
+
|
|
1230
|
+
|
|
1231
|
+
**__repr__**
|
|
1232
|
+
|
|
1233
|
+
*Signature:* `(self) -> str`
|
|
1234
|
+
|
|
1235
|
+
Return repr(self).
|
|
1236
|
+
|
|
1237
|
+
|
|
1238
|
+
**aligned_length**
|
|
1239
|
+
|
|
1240
|
+
*Signature:* `(self) -> int`
|
|
1241
|
+
|
|
1242
|
+
|
|
1243
|
+
**blocks_in_query**
|
|
1244
|
+
|
|
1245
|
+
*Signature:* `(self) -> numpy.ndarray`
|
|
1246
|
+
|
|
1247
|
+
|
|
1248
|
+
**blocks_in_target**
|
|
1249
|
+
|
|
1250
|
+
*Signature:* `(self) -> numpy.ndarray`
|
|
1251
|
+
|
|
1252
|
+
|
|
1253
|
+
**q_length**
|
|
1254
|
+
|
|
1255
|
+
*Signature:* `(self) -> int`
|
|
1256
|
+
|
|
1257
|
+
|
|
1258
|
+
**t_length**
|
|
1259
|
+
|
|
1260
|
+
*Signature:* `(self) -> int`
|
|
1261
|
+
|
|
1262
|
+
|
|
1263
|
+
|
|
1264
|
+
### GenomeAlignmentsCollection
|
|
1265
|
+
|
|
1266
|
+
Container for many genome alignments.
|
|
1267
|
+
|
|
1268
|
+
**Signature:** `(self, alignments: Optional[List[pyrion.core.genome_alignment.GenomeAlignment]] = None, source_file: Optional[str] = None)`
|
|
1269
|
+
|
|
1270
|
+
#### Methods
|
|
1271
|
+
|
|
1272
|
+
**__init__**
|
|
1273
|
+
|
|
1274
|
+
*Signature:* `(self, alignments: Optional[List[pyrion.core.genome_alignment.GenomeAlignment]] = None, source_file: Optional[str] = None)`
|
|
1275
|
+
|
|
1276
|
+
Initialize self. See help(type(self)) for accurate signature.
|
|
1277
|
+
|
|
1278
|
+
|
|
1279
|
+
**__repr__**
|
|
1280
|
+
|
|
1281
|
+
*Signature:* `(self) -> str`
|
|
1282
|
+
|
|
1283
|
+
Return repr(self).
|
|
1284
|
+
|
|
1285
|
+
|
|
1286
|
+
**__str__**
|
|
1287
|
+
|
|
1288
|
+
*Signature:* `(self) -> str`
|
|
1289
|
+
|
|
1290
|
+
String representation with summary.
|
|
1291
|
+
|
|
1292
|
+
|
|
1293
|
+
**from_json**
|
|
1294
|
+
|
|
1295
|
+
*Signature:* `(file_path: Union[str, pathlib.Path]) -> 'GenomeAlignmentsCollection'`
|
|
1296
|
+
|
|
1297
|
+
|
|
1298
|
+
**get_alignments_fully_contained**
|
|
1299
|
+
|
|
1300
|
+
*Signature:* `(self, interval: 'GenomicInterval') -> List[pyrion.core.genome_alignment.GenomeAlignment]`
|
|
1301
|
+
|
|
1302
|
+
|
|
1303
|
+
**get_alignments_in_interval**
|
|
1304
|
+
|
|
1305
|
+
*Signature:* `(self, interval: 'GenomicInterval', include_partial: bool = True) -> List[pyrion.core.genome_alignment.GenomeAlignment]`
|
|
1306
|
+
|
|
1307
|
+
|
|
1308
|
+
**get_alignments_overlapping_query_interval**
|
|
1309
|
+
|
|
1310
|
+
*Signature:* `(self, interval: 'GenomicInterval', include_partial: bool = True) -> List[pyrion.core.genome_alignment.GenomeAlignment]`
|
|
1311
|
+
|
|
1312
|
+
|
|
1313
|
+
**get_alignments_overlapping_target_interval**
|
|
1314
|
+
|
|
1315
|
+
*Signature:* `(self, interval: 'GenomicInterval', include_partial: bool = True) -> List[pyrion.core.genome_alignment.GenomeAlignment]`
|
|
1316
|
+
|
|
1317
|
+
|
|
1318
|
+
**get_by_chain_id**
|
|
1319
|
+
|
|
1320
|
+
*Signature:* `(self, chain_id: int) -> Optional[pyrion.core.genome_alignment.GenomeAlignment]`
|
|
1321
|
+
|
|
1322
|
+
|
|
1323
|
+
**get_by_query_chrom**
|
|
1324
|
+
|
|
1325
|
+
*Signature:* `(self, chrom: str) -> List[pyrion.core.genome_alignment.GenomeAlignment]`
|
|
1326
|
+
|
|
1327
|
+
|
|
1328
|
+
**get_by_target_chrom**
|
|
1329
|
+
|
|
1330
|
+
*Signature:* `(self, chrom: str) -> List[pyrion.core.genome_alignment.GenomeAlignment]`
|
|
1331
|
+
|
|
1332
|
+
|
|
1333
|
+
**get_chain_ids_by_query_chrom**
|
|
1334
|
+
|
|
1335
|
+
*Signature:* `(self, chrom: str) -> List[int]`
|
|
1336
|
+
|
|
1337
|
+
|
|
1338
|
+
**get_chain_ids_by_target_chrom**
|
|
1339
|
+
|
|
1340
|
+
*Signature:* `(self, chrom: str) -> List[int]`
|
|
1341
|
+
|
|
1342
|
+
|
|
1343
|
+
**get_query_chromosomes**
|
|
1344
|
+
|
|
1345
|
+
*Signature:* `(self) -> List[str]`
|
|
1346
|
+
|
|
1347
|
+
|
|
1348
|
+
**get_reference_chromosomes**
|
|
1349
|
+
|
|
1350
|
+
*Signature:* `(self) -> List[str]`
|
|
1351
|
+
|
|
1352
|
+
|
|
1353
|
+
**save_to_chain**
|
|
1354
|
+
|
|
1355
|
+
*Signature:* `(self, file_path: Union[str, pathlib.Path]) -> None`
|
|
1356
|
+
|
|
1357
|
+
|
|
1358
|
+
**save_to_json**
|
|
1359
|
+
|
|
1360
|
+
*Signature:* `(self, file_path: Union[str, pathlib.Path]) -> None`
|
|
1361
|
+
|
|
1362
|
+
|
|
1363
|
+
**sort_by_score**
|
|
1364
|
+
|
|
1365
|
+
*Signature:* `(self, max_elems: Optional[int] = None) -> List[Tuple[int, int]]`
|
|
1366
|
+
|
|
1367
|
+
|
|
1368
|
+
**summary**
|
|
1369
|
+
|
|
1370
|
+
*Signature:* `(self) -> str`
|
|
1371
|
+
|
|
1372
|
+
|
|
1373
|
+
|
|
1374
|
+
---
|
|
1375
|
+
|
|
1376
|
+
# pyrion.core.genome_alignment_auxiliary
|
|
1377
|
+
|
|
1378
|
+
Auxiliary functions for genome alignment objects.
|
|
1379
|
+
|
|
1380
|
+
|
|
1381
|
+
## Functions
|
|
1382
|
+
|
|
1383
|
+
### sort_alignments_by_score
|
|
1384
|
+
|
|
1385
|
+
**Signature:** `(alignments_collection, max_elems: Optional[int] = None) -> List[Tuple[int, int]]`
|
|
1386
|
+
|
|
1387
|
+
|
|
1388
|
+
---
|
|
1389
|
+
|
|
1390
|
+
# pyrion.core.intervals
|
|
1391
|
+
|
|
1392
|
+
Genomic interval representations.
|
|
1393
|
+
|
|
1394
|
+
|
|
1395
|
+
## Classes
|
|
1396
|
+
|
|
1397
|
+
### AnnotatedIntervalSet
|
|
1398
|
+
|
|
1399
|
+
AnnotatedIntervalSet(intervals: numpy.ndarray, region_types: numpy.ndarray)
|
|
1400
|
+
|
|
1401
|
+
**Signature:** `(self, intervals: numpy.ndarray, region_types: numpy.ndarray) -> None`
|
|
1402
|
+
|
|
1403
|
+
#### Methods
|
|
1404
|
+
|
|
1405
|
+
**__init__**
|
|
1406
|
+
|
|
1407
|
+
*Signature:* `(self, intervals: numpy.ndarray, region_types: numpy.ndarray) -> None`
|
|
1408
|
+
|
|
1409
|
+
Initialize self. See help(type(self)) for accurate signature.
|
|
1410
|
+
|
|
1411
|
+
|
|
1412
|
+
**__repr__**
|
|
1413
|
+
|
|
1414
|
+
*Signature:* `(self) -> str`
|
|
1415
|
+
|
|
1416
|
+
Return repr(self).
|
|
1417
|
+
|
|
1418
|
+
|
|
1419
|
+
|
|
1420
|
+
### GenomicInterval
|
|
1421
|
+
|
|
1422
|
+
Single genomic interval with strand information and optional ID.
|
|
1423
|
+
|
|
1424
|
+
**Signature:** `(self, chrom: str, start: int, end: int, strand: pyrion.core.strand.Strand = <Strand.UNKNOWN: 0>, id: Optional[str] = None) -> None`
|
|
1425
|
+
|
|
1426
|
+
#### Methods
|
|
1427
|
+
|
|
1428
|
+
**__init__**
|
|
1429
|
+
|
|
1430
|
+
*Signature:* `(self, chrom: str, start: int, end: int, strand: pyrion.core.strand.Strand = <Strand.UNKNOWN: 0>, id: Optional[str] = None) -> None`
|
|
1431
|
+
|
|
1432
|
+
Initialize self. See help(type(self)) for accurate signature.
|
|
1433
|
+
|
|
1434
|
+
|
|
1435
|
+
**__repr__**
|
|
1436
|
+
|
|
1437
|
+
*Signature:* `(self) -> str`
|
|
1438
|
+
|
|
1439
|
+
Detailed representation for debugging.
|
|
1440
|
+
|
|
1441
|
+
|
|
1442
|
+
**__str__**
|
|
1443
|
+
|
|
1444
|
+
*Signature:* `(self) -> str`
|
|
1445
|
+
|
|
1446
|
+
Return str(self).
|
|
1447
|
+
|
|
1448
|
+
|
|
1449
|
+
**contains**
|
|
1450
|
+
|
|
1451
|
+
*Signature:* `(self, pos: int) -> bool`
|
|
1452
|
+
|
|
1453
|
+
|
|
1454
|
+
**flip_strand**
|
|
1455
|
+
|
|
1456
|
+
*Signature:* `(self) -> 'GenomicInterval'`
|
|
1457
|
+
|
|
1458
|
+
|
|
1459
|
+
**from_string**
|
|
1460
|
+
|
|
1461
|
+
*Signature:* `(interval_string: str, id: Optional[str] = None) -> 'GenomicInterval'`
|
|
1462
|
+
|
|
1463
|
+
Create GenomicInterval from string representation.
|
|
1464
|
+
|
|
1465
|
+
Supported formats:
|
|
1466
|
+
- "chr1:100-200" (no strand)
|
|
1467
|
+
- "chr1:100-200:+" (plus strand)
|
|
1468
|
+
- "chr1:100-200:-" (minus strand)
|
|
1469
|
+
- "chr1:1,000,000-2,000,000" (commas in numbers supported)
|
|
1470
|
+
- "chr11:118,300,000-118,400,000:+" (full example with commas)
|
|
1471
|
+
|
|
1472
|
+
|
|
1473
|
+
**intersects**
|
|
1474
|
+
|
|
1475
|
+
*Signature:* `(self, other: 'GenomicInterval') -> bool`
|
|
1476
|
+
|
|
1477
|
+
|
|
1478
|
+
**length**
|
|
1479
|
+
|
|
1480
|
+
*Signature:* `(self) -> int`
|
|
1481
|
+
|
|
1482
|
+
|
|
1483
|
+
**overlap**
|
|
1484
|
+
|
|
1485
|
+
*Signature:* `(self, other: 'GenomicInterval') -> int`
|
|
1486
|
+
|
|
1487
|
+
|
|
1488
|
+
**to_bed6_string**
|
|
1489
|
+
|
|
1490
|
+
*Signature:* `(self, score: int = 1000) -> str`
|
|
1491
|
+
|
|
1492
|
+
Convert to BED6 format string.
|
|
1493
|
+
|
|
1494
|
+
|
|
1495
|
+
**union**
|
|
1496
|
+
|
|
1497
|
+
*Signature:* `(self, other: 'GenomicInterval') -> Optional[ForwardRef('GenomicInterval')]`
|
|
1498
|
+
|
|
1499
|
+
|
|
1500
|
+
|
|
1501
|
+
### GenomicIntervalsCollection
|
|
1502
|
+
|
|
1503
|
+
GenomicIntervalsCollection(chrom: str, strand: pyrion.core.strand.Strand, array: numpy.ndarray, ids: numpy.ndarray)
|
|
1504
|
+
|
|
1505
|
+
**Signature:** `(self, chrom: str, strand: pyrion.core.strand.Strand, array: numpy.ndarray, ids: numpy.ndarray) -> None`
|
|
1506
|
+
|
|
1507
|
+
#### Methods
|
|
1508
|
+
|
|
1509
|
+
**__init__**
|
|
1510
|
+
|
|
1511
|
+
*Signature:* `(self, chrom: str, strand: pyrion.core.strand.Strand, array: numpy.ndarray, ids: numpy.ndarray) -> None`
|
|
1512
|
+
|
|
1513
|
+
Initialize self. See help(type(self)) for accurate signature.
|
|
1514
|
+
|
|
1515
|
+
|
|
1516
|
+
**__repr__**
|
|
1517
|
+
|
|
1518
|
+
*Signature:* `(self) -> str`
|
|
1519
|
+
|
|
1520
|
+
Return repr(self).
|
|
1521
|
+
|
|
1522
|
+
|
|
1523
|
+
**__str__**
|
|
1524
|
+
|
|
1525
|
+
*Signature:* `(self) -> str`
|
|
1526
|
+
|
|
1527
|
+
Return str(self).
|
|
1528
|
+
|
|
1529
|
+
|
|
1530
|
+
**filter_by**
|
|
1531
|
+
|
|
1532
|
+
*Signature:* `(self, predicate: Callable[[pyrion.core.intervals.GenomicInterval], bool]) -> 'GenomicIntervalsCollection'`
|
|
1533
|
+
|
|
1534
|
+
|
|
1535
|
+
**from_array**
|
|
1536
|
+
|
|
1537
|
+
*Signature:* `(array: numpy.ndarray, chrom: str, strand: Optional[pyrion.core.strand.Strand] = None, ids: Optional[List[str]] = None) -> 'GenomicIntervalsCollection'`
|
|
1538
|
+
|
|
1539
|
+
Create collection from numpy array.
|
|
1540
|
+
|
|
1541
|
+
|
|
1542
|
+
**from_intervals**
|
|
1543
|
+
|
|
1544
|
+
*Signature:* `(intervals: List[pyrion.core.intervals.GenomicInterval]) -> 'GenomicIntervalsCollection'`
|
|
1545
|
+
|
|
1546
|
+
Create collection from list of GenomicInterval objects.
|
|
1547
|
+
|
|
1548
|
+
|
|
1549
|
+
**from_strings**
|
|
1550
|
+
|
|
1551
|
+
*Signature:* `(interval_strings, ids: Optional[List[str]] = None) -> Dict[Tuple[str, pyrion.core.strand.Strand], ForwardRef('GenomicIntervalsCollection')]`
|
|
1552
|
+
|
|
1553
|
+
|
|
1554
|
+
**group_by_proximity**
|
|
1555
|
+
|
|
1556
|
+
*Signature:* `(self, max_gap: int) -> List[ForwardRef('GenomicIntervalsCollection')]`
|
|
1557
|
+
|
|
1558
|
+
|
|
1559
|
+
**intersect**
|
|
1560
|
+
|
|
1561
|
+
*Signature:* `(self, other: Union[ForwardRef('GenomicIntervalsCollection'), pyrion.core.intervals.GenomicInterval]) -> 'GenomicIntervalsCollection'`
|
|
1562
|
+
|
|
1563
|
+
|
|
1564
|
+
**is_empty**
|
|
1565
|
+
|
|
1566
|
+
*Signature:* `(self) -> bool`
|
|
1567
|
+
|
|
1568
|
+
|
|
1569
|
+
**merge_close**
|
|
1570
|
+
|
|
1571
|
+
*Signature:* `(self, max_gap: int = 0) -> 'GenomicIntervalsCollection'`
|
|
1572
|
+
|
|
1573
|
+
|
|
1574
|
+
**split_on_gaps**
|
|
1575
|
+
|
|
1576
|
+
*Signature:* `(self, min_gap: int) -> List[ForwardRef('GenomicIntervalsCollection')]`
|
|
1577
|
+
|
|
1578
|
+
|
|
1579
|
+
**to_bed6_string**
|
|
1580
|
+
|
|
1581
|
+
*Signature:* `(self, score: int = 1000) -> str`
|
|
1582
|
+
|
|
1583
|
+
|
|
1584
|
+
**to_intervals_list**
|
|
1585
|
+
|
|
1586
|
+
*Signature:* `(self) -> List[pyrion.core.intervals.GenomicInterval]`
|
|
1587
|
+
|
|
1588
|
+
|
|
1589
|
+
|
|
1590
|
+
### RegionType
|
|
1591
|
+
|
|
1592
|
+
An enumeration.
|
|
1593
|
+
|
|
1594
|
+
**Signature:** `(self, /, *args, **kwargs)`
|
|
1595
|
+
|
|
1596
|
+
|
|
1597
|
+
---
|
|
1598
|
+
|
|
1599
|
+
# pyrion.core.intervals_auxiliary
|
|
1600
|
+
|
|
1601
|
+
Auxiliary functions for interval objects.
|
|
1602
|
+
|
|
1603
|
+
|
|
1604
|
+
## Functions
|
|
1605
|
+
|
|
1606
|
+
### create_intervals_collections_from_strings
|
|
1607
|
+
|
|
1608
|
+
**Signature:** `(interval_strings, ids: Optional[List[str]] = None) -> Dict[Tuple[str, pyrion.core.strand.Strand], pyrion.core.intervals.GenomicIntervalsCollection]`
|
|
1609
|
+
|
|
1610
|
+
Create collections from iterable of string representations, grouped by chromosome and strand.
|
|
1611
|
+
|
|
1612
|
+
|
|
1613
|
+
---
|
|
1614
|
+
|
|
1615
|
+
# pyrion.core.nucleotide_sequences
|
|
1616
|
+
|
|
1617
|
+
Sequence representations and storage.
|
|
1618
|
+
|
|
1619
|
+
|
|
1620
|
+
## Classes
|
|
1621
|
+
|
|
1622
|
+
### NucleotideSequence
|
|
1623
|
+
|
|
1624
|
+
NucleotideSequence(data: 'np.ndarray', is_rna: 'bool' = False, metadata: 'Optional[Metadata]' = None)
|
|
1625
|
+
|
|
1626
|
+
**Signature:** `(self, data: 'np.ndarray', is_rna: 'bool' = False, metadata: 'Optional[Metadata]' = None) -> None`
|
|
1627
|
+
|
|
1628
|
+
#### Methods
|
|
1629
|
+
|
|
1630
|
+
**__init__**
|
|
1631
|
+
|
|
1632
|
+
*Signature:* `(self, data: 'np.ndarray', is_rna: 'bool' = False, metadata: 'Optional[Metadata]' = None) -> None`
|
|
1633
|
+
|
|
1634
|
+
Initialize self. See help(type(self)) for accurate signature.
|
|
1635
|
+
|
|
1636
|
+
|
|
1637
|
+
**__repr__**
|
|
1638
|
+
|
|
1639
|
+
*Signature:* `(self) -> 'str'`
|
|
1640
|
+
|
|
1641
|
+
Return repr(self).
|
|
1642
|
+
|
|
1643
|
+
|
|
1644
|
+
**__str__**
|
|
1645
|
+
|
|
1646
|
+
*Signature:* `(self) -> 'str'`
|
|
1647
|
+
|
|
1648
|
+
Return str(self).
|
|
1649
|
+
|
|
1650
|
+
|
|
1651
|
+
**complement**
|
|
1652
|
+
|
|
1653
|
+
*Signature:* `(self) -> "'NucleotideSequence'"`
|
|
1654
|
+
|
|
1655
|
+
|
|
1656
|
+
**from_string**
|
|
1657
|
+
|
|
1658
|
+
*Signature:* `(sequence: 'str', is_rna: 'bool' = False, metadata: 'Optional[Metadata]' = None) -> "'NucleotideSequence'"`
|
|
1659
|
+
|
|
1660
|
+
|
|
1661
|
+
**get_masked_positions**
|
|
1662
|
+
|
|
1663
|
+
*Signature:* `(self) -> 'np.ndarray'`
|
|
1664
|
+
|
|
1665
|
+
|
|
1666
|
+
**get_unmasked_positions**
|
|
1667
|
+
|
|
1668
|
+
*Signature:* `(self) -> 'np.ndarray'`
|
|
1669
|
+
|
|
1670
|
+
|
|
1671
|
+
**is_position_masked**
|
|
1672
|
+
|
|
1673
|
+
*Signature:* `(self, position: 'int') -> 'bool'`
|
|
1674
|
+
|
|
1675
|
+
|
|
1676
|
+
**mask**
|
|
1677
|
+
|
|
1678
|
+
*Signature:* `(self, start: 'Optional[int]' = None, end: 'Optional[int]' = None) -> "'NucleotideSequence'"`
|
|
1679
|
+
|
|
1680
|
+
|
|
1681
|
+
**merge**
|
|
1682
|
+
|
|
1683
|
+
*Signature:* `(self, other: "'NucleotideSequence'") -> "'NucleotideSequence'"`
|
|
1684
|
+
|
|
1685
|
+
|
|
1686
|
+
**remove_gaps**
|
|
1687
|
+
|
|
1688
|
+
*Signature:* `(self) -> "'NucleotideSequence'"`
|
|
1689
|
+
|
|
1690
|
+
|
|
1691
|
+
**reverse**
|
|
1692
|
+
|
|
1693
|
+
*Signature:* `(self) -> "'NucleotideSequence'"`
|
|
1694
|
+
|
|
1695
|
+
|
|
1696
|
+
**reverse_complement**
|
|
1697
|
+
|
|
1698
|
+
*Signature:* `(self) -> "'NucleotideSequence'"`
|
|
1699
|
+
|
|
1700
|
+
|
|
1701
|
+
**slice**
|
|
1702
|
+
|
|
1703
|
+
*Signature:* `(self, start: 'int', end: 'int') -> "'NucleotideSequence'"`
|
|
1704
|
+
|
|
1705
|
+
|
|
1706
|
+
**to_amino_acids**
|
|
1707
|
+
|
|
1708
|
+
*Signature:* `(self, translation_table=None)`
|
|
1709
|
+
|
|
1710
|
+
|
|
1711
|
+
**to_codons**
|
|
1712
|
+
|
|
1713
|
+
*Signature:* `(self)`
|
|
1714
|
+
|
|
1715
|
+
|
|
1716
|
+
**to_fasta_string**
|
|
1717
|
+
|
|
1718
|
+
*Signature:* `(self, width: 'int' = 80, header: 'Optional[str]' = None) -> 'str'`
|
|
1719
|
+
|
|
1720
|
+
|
|
1721
|
+
**to_string**
|
|
1722
|
+
|
|
1723
|
+
*Signature:* `(self) -> 'str'`
|
|
1724
|
+
|
|
1725
|
+
|
|
1726
|
+
**unmask**
|
|
1727
|
+
|
|
1728
|
+
*Signature:* `(self, start: 'Optional[int]' = None, end: 'Optional[int]' = None) -> "'NucleotideSequence'"`
|
|
1729
|
+
|
|
1730
|
+
|
|
1731
|
+
#### Properties
|
|
1732
|
+
|
|
1733
|
+
**masked_fraction** -> `float`
|
|
1734
|
+
|
|
1735
|
+
|
|
1736
|
+
|
|
1737
|
+
### SequenceType
|
|
1738
|
+
|
|
1739
|
+
Sequence type detection.
|
|
1740
|
+
|
|
1741
|
+
**Signature:** `(self, /, *args, **kwargs)`
|
|
1742
|
+
|
|
1743
|
+
|
|
1744
|
+
---
|
|
1745
|
+
|
|
1746
|
+
# pyrion.core.sequences_auxiliary
|
|
1747
|
+
|
|
1748
|
+
Auxiliary functions for sequences objects.
|
|
1749
|
+
|
|
1750
|
+
|
|
1751
|
+
## Functions
|
|
1752
|
+
|
|
1753
|
+
### mask_nucleotide_sequence_slice
|
|
1754
|
+
|
|
1755
|
+
**Signature:** `(sequence, start: Optional[int] = None, end: Optional[int] = None)`
|
|
1756
|
+
|
|
1757
|
+
|
|
1758
|
+
### merge_nucleotide_sequences
|
|
1759
|
+
|
|
1760
|
+
**Signature:** `(sequence1, sequence2)`
|
|
1761
|
+
|
|
1762
|
+
|
|
1763
|
+
### unmask_nucleotide_sequence_slice
|
|
1764
|
+
|
|
1765
|
+
**Signature:** `(sequence, start: Optional[int] = None, end: Optional[int] = None)`
|
|
1766
|
+
|
|
1767
|
+
|
|
1768
|
+
---
|
|
1769
|
+
|
|
1770
|
+
# pyrion.core.strand
|
|
1771
|
+
|
|
1772
|
+
|
|
1773
|
+
## Classes
|
|
1774
|
+
|
|
1775
|
+
### Strand
|
|
1776
|
+
|
|
1777
|
+
An enumeration.
|
|
1778
|
+
|
|
1779
|
+
**Signature:** `(self, /, *args, **kwargs)`
|
|
1780
|
+
|
|
1781
|
+
|
|
1782
|
+
---
|
|
1783
|
+
|
|
1784
|
+
# pyrion.core.translation
|
|
1785
|
+
|
|
1786
|
+
Translation tables for genetic code.
|
|
1787
|
+
|
|
1788
|
+
|
|
1789
|
+
## Classes
|
|
1790
|
+
|
|
1791
|
+
### TranslationTable
|
|
1792
|
+
|
|
1793
|
+
TranslationTable(table_id: int, name: str, codon_table: Dict[tuple, int], start_codons: set[tuple], stop_codons: set[tuple])
|
|
1794
|
+
|
|
1795
|
+
**Signature:** `(self, table_id: int, name: str, codon_table: Dict[tuple, int], start_codons: set[tuple], stop_codons: set[tuple]) -> None`
|
|
1796
|
+
|
|
1797
|
+
#### Methods
|
|
1798
|
+
|
|
1799
|
+
**__init__**
|
|
1800
|
+
|
|
1801
|
+
*Signature:* `(self, table_id: int, name: str, codon_table: Dict[tuple, int], start_codons: set[tuple], stop_codons: set[tuple]) -> None`
|
|
1802
|
+
|
|
1803
|
+
Initialize self. See help(type(self)) for accurate signature.
|
|
1804
|
+
|
|
1805
|
+
|
|
1806
|
+
**__repr__**
|
|
1807
|
+
|
|
1808
|
+
*Signature:* `(self) -> str`
|
|
1809
|
+
|
|
1810
|
+
Return repr(self).
|
|
1811
|
+
|
|
1812
|
+
|
|
1813
|
+
**is_start_codon**
|
|
1814
|
+
|
|
1815
|
+
*Signature:* `(self, codon_codes: Tuple[int, int, int]) -> bool`
|
|
1816
|
+
|
|
1817
|
+
|
|
1818
|
+
**is_stop_codon**
|
|
1819
|
+
|
|
1820
|
+
*Signature:* `(self, codon_codes: Tuple[int, int, int]) -> bool`
|
|
1821
|
+
|
|
1822
|
+
|
|
1823
|
+
**mitochondrial**
|
|
1824
|
+
|
|
1825
|
+
*Signature:* `() -> 'TranslationTable'`
|
|
1826
|
+
|
|
1827
|
+
Mitochondrial genetic code (NCBI table 2).
|
|
1828
|
+
|
|
1829
|
+
|
|
1830
|
+
**standard**
|
|
1831
|
+
|
|
1832
|
+
*Signature:* `() -> 'TranslationTable'`
|
|
1833
|
+
|
|
1834
|
+
Standard genetic code (NCBI table 1).
|
|
1835
|
+
|
|
1836
|
+
|
|
1837
|
+
**translate_codon**
|
|
1838
|
+
|
|
1839
|
+
*Signature:* `(self, codon_codes: Tuple[int, int, int]) -> int`
|
|
1840
|
+
|
|
1841
|
+
|
|
1842
|
+
|
|
1843
|
+
---
|
|
1844
|
+
|
|
1845
|
+
# pyrion.core_types
|
|
1846
|
+
|
|
1847
|
+
Shared types, enums, and protocols for pyrion.
|
|
1848
|
+
|
|
1849
|
+
|
|
1850
|
+
## Classes
|
|
1851
|
+
|
|
1852
|
+
### ExonType
|
|
1853
|
+
|
|
1854
|
+
Enumeration for exon types in genomic annotations.
|
|
1855
|
+
|
|
1856
|
+
**Signature:** `(self, /, *args, **kwargs)`
|
|
1857
|
+
|
|
1858
|
+
|
|
1859
|
+
---
|
|
1860
|
+
|
|
1861
|
+
# pyrion.io
|
|
1862
|
+
|
|
1863
|
+
I/O modules for various genomic file formats.
|
|
1864
|
+
|
|
1865
|
+
|
|
1866
|
+
---
|
|
1867
|
+
|
|
1868
|
+
# pyrion.io.bed
|
|
1869
|
+
|
|
1870
|
+
BED format I/O support.
|
|
1871
|
+
|
|
1872
|
+
|
|
1873
|
+
## Functions
|
|
1874
|
+
|
|
1875
|
+
### read_bed12_file
|
|
1876
|
+
|
|
1877
|
+
**Signature:** `(file_path: Union[str, pathlib.Path]) -> pyrion.core.genes.TranscriptsCollection`
|
|
1878
|
+
|
|
1879
|
+
Read BED12 file and return TranscriptsCollection.
|
|
1880
|
+
|
|
1881
|
+
|
|
1882
|
+
### read_narrow_bed_file
|
|
1883
|
+
|
|
1884
|
+
**Signature:** `(file_path: Union[str, pathlib.Path]) -> List[pyrion.core.intervals.GenomicInterval]`
|
|
1885
|
+
|
|
1886
|
+
Read a narrow BED file with 3-9 fields and return a list of GenomicInterval objects.
|
|
1887
|
+
|
|
1888
|
+
|
|
1889
|
+
---
|
|
1890
|
+
|
|
1891
|
+
# pyrion.io.chain
|
|
1892
|
+
|
|
1893
|
+
Chain format I/O support.
|
|
1894
|
+
|
|
1895
|
+
|
|
1896
|
+
## Functions
|
|
1897
|
+
|
|
1898
|
+
### read_chain_file
|
|
1899
|
+
|
|
1900
|
+
**Signature:** `(file_path: Union[str, pathlib.Path], min_score: Optional[int] = None) -> pyrion.core.genome_alignment.GenomeAlignmentsCollection`
|
|
1901
|
+
|
|
1902
|
+
Read chain file and return GenomeAlignmentsCollection.
|
|
1903
|
+
|
|
1904
|
+
|
|
1905
|
+
---
|
|
1906
|
+
|
|
1907
|
+
# pyrion.io.fai
|
|
1908
|
+
|
|
1909
|
+
FASTA indexing functionality using fast C extension.
|
|
1910
|
+
|
|
1911
|
+
|
|
1912
|
+
## Functions
|
|
1913
|
+
|
|
1914
|
+
### create_fasta_index
|
|
1915
|
+
|
|
1916
|
+
**Signature:** `(fasta_file: 'Union[str, Path]', fai_file: 'Optional[Union[str, Path]]' = None) -> 'FaiStore'`
|
|
1917
|
+
|
|
1918
|
+
|
|
1919
|
+
### get_or_create_fasta_index
|
|
1920
|
+
|
|
1921
|
+
**Signature:** `(fasta_file: 'Union[str, Path]', force_recreate: 'bool' = False) -> 'FaiStore'`
|
|
1922
|
+
|
|
1923
|
+
|
|
1924
|
+
### load_fasta_index
|
|
1925
|
+
|
|
1926
|
+
**Signature:** `(fai_file: 'Union[str, Path]') -> 'FaiStore'`
|
|
1927
|
+
|
|
1928
|
+
|
|
1929
|
+
---
|
|
1930
|
+
|
|
1931
|
+
# pyrion.io.fasta
|
|
1932
|
+
|
|
1933
|
+
FASTA I/O operations.
|
|
1934
|
+
|
|
1935
|
+
|
|
1936
|
+
## Classes
|
|
1937
|
+
|
|
1938
|
+
### FastaAccessor
|
|
1939
|
+
|
|
1940
|
+
**Signature:** `(self, fasta_file: 'Union[str, Path]', fai_store: 'FaiStore')`
|
|
1941
|
+
|
|
1942
|
+
#### Methods
|
|
1943
|
+
|
|
1944
|
+
**__init__**
|
|
1945
|
+
|
|
1946
|
+
*Signature:* `(self, fasta_file: 'Union[str, Path]', fai_store: 'FaiStore')`
|
|
1947
|
+
|
|
1948
|
+
Initialize FastaAccessor with FASTA file and index.
|
|
1949
|
+
|
|
1950
|
+
|
|
1951
|
+
**__repr__**
|
|
1952
|
+
|
|
1953
|
+
*Signature:* `(self) -> 'str'`
|
|
1954
|
+
|
|
1955
|
+
Return repr(self).
|
|
1956
|
+
|
|
1957
|
+
|
|
1958
|
+
**get_multiple_sequences**
|
|
1959
|
+
|
|
1960
|
+
*Signature:* `(self, regions: 'List[GenomicInterval]', is_rna: 'bool' = False) -> 'Dict[str, NucleotideSequence]'`
|
|
1961
|
+
|
|
1962
|
+
|
|
1963
|
+
**get_sequence**
|
|
1964
|
+
|
|
1965
|
+
*Signature:* `(self, region: 'GenomicInterval', is_rna: 'bool' = False) -> 'NucleotideSequence'`
|
|
1966
|
+
|
|
1967
|
+
|
|
1968
|
+
**get_sequence_length**
|
|
1969
|
+
|
|
1970
|
+
*Signature:* `(self, sequence_name: 'str') -> 'int'`
|
|
1971
|
+
|
|
1972
|
+
|
|
1973
|
+
**get_sequence_names**
|
|
1974
|
+
|
|
1975
|
+
*Signature:* `(self) -> 'List[str]'`
|
|
1976
|
+
|
|
1977
|
+
|
|
1978
|
+
**has_sequence**
|
|
1979
|
+
|
|
1980
|
+
*Signature:* `(self, sequence_name: 'str') -> 'bool'`
|
|
1981
|
+
|
|
1982
|
+
|
|
1983
|
+
|
|
1984
|
+
## Functions
|
|
1985
|
+
|
|
1986
|
+
### read_dna_fasta
|
|
1987
|
+
|
|
1988
|
+
**Signature:** `(filename: 'Union[str, Path]', **kwargs) -> 'Dict[str, NucleotideSequence]'`
|
|
1989
|
+
|
|
1990
|
+
|
|
1991
|
+
### read_fasta
|
|
1992
|
+
|
|
1993
|
+
**Signature:** `(filename: 'Union[str, Path]', sequence_type: 'SequenceType', return_dict: 'bool' = True) -> 'Union[Dict[str, Union[NucleotideSequence, AminoAcidSequence]], List[Union[NucleotideSequence, AminoAcidSequence]]]'`
|
|
1994
|
+
|
|
1995
|
+
|
|
1996
|
+
### read_protein_fasta
|
|
1997
|
+
|
|
1998
|
+
**Signature:** `(filename: 'Union[str, Path]', **kwargs) -> 'Dict[str, AminoAcidSequence]'`
|
|
1999
|
+
|
|
2000
|
+
Read protein sequences from FASTA file.
|
|
2001
|
+
|
|
2002
|
+
|
|
2003
|
+
### read_rna_fasta
|
|
2004
|
+
|
|
2005
|
+
**Signature:** `(filename: 'Union[str, Path]', **kwargs) -> 'Dict[str, NucleotideSequence]'`
|
|
2006
|
+
|
|
2007
|
+
|
|
2008
|
+
### write_fasta
|
|
2009
|
+
|
|
2010
|
+
**Signature:** `(sequences: 'Union[Dict[str, NucleotideSequence], List[NucleotideSequence]]', filename: 'Union[str, Path]', line_width: 'int' = 80) -> 'None'`
|
|
2011
|
+
|
|
2012
|
+
|
|
2013
|
+
---
|
|
2014
|
+
|
|
2015
|
+
# pyrion.io.gene_data
|
|
2016
|
+
|
|
2017
|
+
Gene data I/O support.
|
|
2018
|
+
|
|
2019
|
+
|
|
2020
|
+
## Functions
|
|
2021
|
+
|
|
2022
|
+
### read_gene_data
|
|
2023
|
+
|
|
2024
|
+
**Signature:** `(file_path: Union[str, pathlib.Path], gene_column: Union[int, str, NoneType] = None, transcript_id_column: Union[int, str, NoneType] = None, gene_name_column: Union[int, str, NoneType] = None, transcript_type_column: Union[int, str, NoneType] = None, separator: str = '\t', has_header: bool = True) -> pyrion.core.gene_data.GeneData`
|
|
2025
|
+
|
|
2026
|
+
Read gene data from TSV/CSV file and build mappings.
|
|
2027
|
+
|
|
2028
|
+
Args:
|
|
2029
|
+
file_path: Path to the data file
|
|
2030
|
+
gene_column: Gene ID column index (1-based) or name. Optional.
|
|
2031
|
+
transcript_id_column: Transcript ID column index (1-based) or name. Optional.
|
|
2032
|
+
gene_name_column: Gene name column index (1-based) or name. Optional.
|
|
2033
|
+
transcript_type_column: Transcript type/biotype column index (1-based) or name. Optional.
|
|
2034
|
+
separator: Column separator. Default: ' ' (tab)
|
|
2035
|
+
has_header: Whether file has header row. If False, only numeric column indices work.
|
|
2036
|
+
|
|
2037
|
+
Returns:
|
|
2038
|
+
GeneData object with available mappings built from the data
|
|
2039
|
+
|
|
2040
|
+
Examples:
|
|
2041
|
+
# Build all mappings from biomart TSV with header
|
|
2042
|
+
gene_data = read_gene_data(
|
|
2043
|
+
"transcripts.tsv",
|
|
2044
|
+
gene_column="Gene stable ID",
|
|
2045
|
+
transcript_id_column="Transcript stable ID",
|
|
2046
|
+
gene_name_column="Gene name",
|
|
2047
|
+
transcript_type_column="Transcript type",
|
|
2048
|
+
has_header=True
|
|
2049
|
+
)
|
|
2050
|
+
|
|
2051
|
+
# Build from file without header using column indices (1-based)
|
|
2052
|
+
gene_data = read_gene_data(
|
|
2053
|
+
"file.tsv",
|
|
2054
|
+
gene_column=1,
|
|
2055
|
+
transcript_id_column=2,
|
|
2056
|
+
gene_name_column=5,
|
|
2057
|
+
transcript_type_column=6,
|
|
2058
|
+
has_header=False
|
|
2059
|
+
)
|
|
2060
|
+
|
|
2061
|
+
# Build only transcript-biotype mapping
|
|
2062
|
+
gene_data = read_gene_data(
|
|
2063
|
+
"file.tsv",
|
|
2064
|
+
transcript_id_column="transcript_id",
|
|
2065
|
+
transcript_type_column="biotype"
|
|
2066
|
+
)
|
|
2067
|
+
|
|
2068
|
+
|
|
2069
|
+
### resolve_index
|
|
2070
|
+
|
|
2071
|
+
**Signature:** `(column_idx: Union[int, str], header: List[str]) -> int | None`
|
|
2072
|
+
|
|
2073
|
+
|
|
2074
|
+
---
|
|
2075
|
+
|
|
2076
|
+
# pyrion.io.genepred
|
|
2077
|
+
|
|
2078
|
+
GenePred format I/O support.
|
|
2079
|
+
|
|
2080
|
+
|
|
2081
|
+
## Functions
|
|
2082
|
+
|
|
2083
|
+
### read_genepred_file
|
|
2084
|
+
|
|
2085
|
+
**Signature:** `(file_path: Union[str, pathlib.Path], has_header: bool = False, extended: bool = False) -> pyrion.core.genes.TranscriptsCollection`
|
|
2086
|
+
|
|
2087
|
+
Read genePred file and return TranscriptsCollection.
|
|
2088
|
+
|
|
2089
|
+
|
|
2090
|
+
### read_refflat_file
|
|
2091
|
+
|
|
2092
|
+
**Signature:** `(file_path: Union[str, pathlib.Path], has_header: bool = False) -> pyrion.core.genes.TranscriptsCollection`
|
|
2093
|
+
|
|
2094
|
+
Read refFlat file and return TranscriptsCollection.
|
|
2095
|
+
|
|
2096
|
+
refFlat format is like genePred but with an additional first column for gene name:
|
|
2097
|
+
geneName name chrom strand txStart txEnd cdsStart cdsEnd exonCount exonStarts exonEnds.
|
|
2098
|
+
|
|
2099
|
+
|
|
2100
|
+
---
|
|
2101
|
+
|
|
2102
|
+
# pyrion.io.gtf
|
|
2103
|
+
|
|
2104
|
+
High-performance GTF format I/O support using C extension.
|
|
2105
|
+
|
|
2106
|
+
|
|
2107
|
+
## Classes
|
|
2108
|
+
|
|
2109
|
+
### GTFChunkReader
|
|
2110
|
+
|
|
2111
|
+
**Signature:** `(self, file_path: Union[str, pathlib.Path], chunk_size_mb: int = 512)`
|
|
2112
|
+
|
|
2113
|
+
#### Methods
|
|
2114
|
+
|
|
2115
|
+
**__init__**
|
|
2116
|
+
|
|
2117
|
+
*Signature:* `(self, file_path: Union[str, pathlib.Path], chunk_size_mb: int = 512)`
|
|
2118
|
+
|
|
2119
|
+
Initialize self. See help(type(self)) for accurate signature.
|
|
2120
|
+
|
|
2121
|
+
|
|
2122
|
+
**read_gene_chunks**
|
|
2123
|
+
|
|
2124
|
+
*Signature:* `(self) -> Iterator[List[str]]`
|
|
2125
|
+
|
|
2126
|
+
|
|
2127
|
+
|
|
2128
|
+
## Functions
|
|
2129
|
+
|
|
2130
|
+
### read_gtf
|
|
2131
|
+
|
|
2132
|
+
**Signature:** `(file_path: Union[str, pathlib.Path], chunk_size_mb: int = 512) -> pyrion.core.genes.TranscriptsCollection`
|
|
2133
|
+
|
|
2134
|
+
|
|
2135
|
+
---
|
|
2136
|
+
|
|
2137
|
+
# pyrion.io.twobit
|
|
2138
|
+
|
|
2139
|
+
2bit file format support.
|
|
2140
|
+
|
|
2141
|
+
|
|
2142
|
+
## Classes
|
|
2143
|
+
|
|
2144
|
+
### TwoBitAccessor
|
|
2145
|
+
|
|
2146
|
+
Access sequences from 2bit files using py2bit.
|
|
2147
|
+
|
|
2148
|
+
**Signature:** `(self, file_path: str)`
|
|
2149
|
+
|
|
2150
|
+
#### Methods
|
|
2151
|
+
|
|
2152
|
+
**__init__**
|
|
2153
|
+
|
|
2154
|
+
*Signature:* `(self, file_path: str)`
|
|
2155
|
+
|
|
2156
|
+
Initialize self. See help(type(self)) for accurate signature.
|
|
2157
|
+
|
|
2158
|
+
|
|
2159
|
+
**__repr__**
|
|
2160
|
+
|
|
2161
|
+
*Signature:* `(self) -> str`
|
|
2162
|
+
|
|
2163
|
+
Return repr(self).
|
|
2164
|
+
|
|
2165
|
+
|
|
2166
|
+
**chrom_names**
|
|
2167
|
+
|
|
2168
|
+
*Signature:* `(self) -> List[str]`
|
|
2169
|
+
|
|
2170
|
+
|
|
2171
|
+
**chrom_sizes**
|
|
2172
|
+
|
|
2173
|
+
*Signature:* `(self) -> Dict[str, int]`
|
|
2174
|
+
|
|
2175
|
+
|
|
2176
|
+
**close**
|
|
2177
|
+
|
|
2178
|
+
*Signature:* `(self)`
|
|
2179
|
+
|
|
2180
|
+
|
|
2181
|
+
**fetch**
|
|
2182
|
+
|
|
2183
|
+
*Signature:* `(self, chrom: str, start: int, end: int, strand: pyrion.core.strand.Strand = <Strand.PLUS: 1>) -> pyrion.core.nucleotide_sequences.NucleotideSequence`
|
|
2184
|
+
|
|
2185
|
+
|
|
2186
|
+
**fetch_interval**
|
|
2187
|
+
|
|
2188
|
+
*Signature:* `(self, interval: pyrion.core.intervals.GenomicInterval) -> pyrion.core.nucleotide_sequences.NucleotideSequence`
|
|
2189
|
+
|
|
2190
|
+
|
|
2191
|
+
**list_chromosomes**
|
|
2192
|
+
|
|
2193
|
+
*Signature:* `(self) -> None`
|
|
2194
|
+
|
|
2195
|
+
|
|
2196
|
+
**validate_interval**
|
|
2197
|
+
|
|
2198
|
+
*Signature:* `(self, chrom: str, start: int, end: int) -> bool`
|
|
2199
|
+
|
|
2200
|
+
|
|
2201
|
+
|
|
2202
|
+
---
|
|
2203
|
+
|
|
2204
|
+
# pyrion.ops
|
|
2205
|
+
|
|
2206
|
+
Pyrion operations module.
|
|
2207
|
+
|
|
2208
|
+
|
|
2209
|
+
---
|
|
2210
|
+
|
|
2211
|
+
# pyrion.ops.chain_serialization
|
|
2212
|
+
|
|
2213
|
+
Genome alignment serialization operations for chain format and JSON.
|
|
2214
|
+
|
|
2215
|
+
|
|
2216
|
+
## Functions
|
|
2217
|
+
|
|
2218
|
+
### genome_alignment_from_dict
|
|
2219
|
+
|
|
2220
|
+
**Signature:** `(data: Dict[str, Any]) -> pyrion.core.genome_alignment.GenomeAlignment`
|
|
2221
|
+
|
|
2222
|
+
|
|
2223
|
+
### genome_alignment_to_chain_string
|
|
2224
|
+
|
|
2225
|
+
**Signature:** `(alignment: pyrion.core.genome_alignment.GenomeAlignment) -> str`
|
|
2226
|
+
|
|
2227
|
+
Convert a single GenomeAlignment to chain format string.
|
|
2228
|
+
|
|
2229
|
+
Chain format:
|
|
2230
|
+
chain {score} {t_chrom} {t_size} {t_strand} {t_start} {t_end} {q_chrom} {q_size} {q_strand} {q_start} {q_end} {chain_id}
|
|
2231
|
+
{block_size} {dt} {dq}
|
|
2232
|
+
...
|
|
2233
|
+
{final_block_size}
|
|
2234
|
+
|
|
2235
|
+
|
|
2236
|
+
### genome_alignment_to_dict
|
|
2237
|
+
|
|
2238
|
+
**Signature:** `(alignment: pyrion.core.genome_alignment.GenomeAlignment) -> Dict[str, Any]`
|
|
2239
|
+
|
|
2240
|
+
|
|
2241
|
+
### genome_alignments_collection_from_dict
|
|
2242
|
+
|
|
2243
|
+
**Signature:** `(data: Dict[str, Any]) -> pyrion.core.genome_alignment.GenomeAlignmentsCollection`
|
|
2244
|
+
|
|
2245
|
+
|
|
2246
|
+
### genome_alignments_collection_summary_string
|
|
2247
|
+
|
|
2248
|
+
**Signature:** `(collection: pyrion.core.genome_alignment.GenomeAlignmentsCollection) -> str`
|
|
2249
|
+
|
|
2250
|
+
|
|
2251
|
+
### genome_alignments_collection_to_chain_string
|
|
2252
|
+
|
|
2253
|
+
**Signature:** `(collection: pyrion.core.genome_alignment.GenomeAlignmentsCollection) -> str`
|
|
2254
|
+
|
|
2255
|
+
|
|
2256
|
+
### genome_alignments_collection_to_dict
|
|
2257
|
+
|
|
2258
|
+
**Signature:** `(collection: pyrion.core.genome_alignment.GenomeAlignmentsCollection) -> Dict[str, Any]`
|
|
2259
|
+
|
|
2260
|
+
|
|
2261
|
+
### load_genome_alignments_collection_from_json
|
|
2262
|
+
|
|
2263
|
+
**Signature:** `(file_path: Union[str, pathlib.Path]) -> pyrion.core.genome_alignment.GenomeAlignmentsCollection`
|
|
2264
|
+
|
|
2265
|
+
|
|
2266
|
+
### save_genome_alignments_collection_to_chain
|
|
2267
|
+
|
|
2268
|
+
**Signature:** `(collection: pyrion.core.genome_alignment.GenomeAlignmentsCollection, file_path: Union[str, pathlib.Path]) -> None`
|
|
2269
|
+
|
|
2270
|
+
|
|
2271
|
+
### save_genome_alignments_collection_to_json
|
|
2272
|
+
|
|
2273
|
+
**Signature:** `(collection: pyrion.core.genome_alignment.GenomeAlignmentsCollection, file_path: Union[str, pathlib.Path]) -> None`
|
|
2274
|
+
|
|
2275
|
+
|
|
2276
|
+
---
|
|
2277
|
+
|
|
2278
|
+
# pyrion.ops.chain_slicing
|
|
2279
|
+
|
|
2280
|
+
Chain slicing operations with proper Q strand handling.
|
|
2281
|
+
|
|
2282
|
+
|
|
2283
|
+
## Functions
|
|
2284
|
+
|
|
2285
|
+
### remove_chain_region_target_space
|
|
2286
|
+
|
|
2287
|
+
**Signature:** `(chain: pyrion.core.genome_alignment.GenomeAlignment, start: int, end: int, use_numba: bool = True) -> pyrion.core.genome_alignment.GenomeAlignment`
|
|
2288
|
+
|
|
2289
|
+
|
|
2290
|
+
### slice_chain_query_space
|
|
2291
|
+
|
|
2292
|
+
**Signature:** `(chain: pyrion.core.genome_alignment.GenomeAlignment, start: int, end: int, use_numba: bool = True) -> pyrion.core.genome_alignment.GenomeAlignment`
|
|
2293
|
+
|
|
2294
|
+
|
|
2295
|
+
### slice_chain_target_space
|
|
2296
|
+
|
|
2297
|
+
**Signature:** `(chain: pyrion.core.genome_alignment.GenomeAlignment, start: int, end: int, use_numba: bool = True) -> pyrion.core.genome_alignment.GenomeAlignment`
|
|
2298
|
+
|
|
2299
|
+
|
|
2300
|
+
---
|
|
2301
|
+
|
|
2302
|
+
# pyrion.ops.chains
|
|
2303
|
+
|
|
2304
|
+
Chain alignment operations for projecting genomic intervals.
|
|
2305
|
+
|
|
2306
|
+
|
|
2307
|
+
## Functions
|
|
2308
|
+
|
|
2309
|
+
### get_chain_q_end
|
|
2310
|
+
|
|
2311
|
+
**Signature:** `(genome_alignment) -> int`
|
|
2312
|
+
|
|
2313
|
+
|
|
2314
|
+
### get_chain_q_start
|
|
2315
|
+
|
|
2316
|
+
**Signature:** `(genome_alignment) -> int`
|
|
2317
|
+
|
|
2318
|
+
|
|
2319
|
+
### get_chain_query_interval
|
|
2320
|
+
|
|
2321
|
+
**Signature:** `(genome_alignment) -> pyrion.core.intervals.GenomicInterval`
|
|
2322
|
+
|
|
2323
|
+
|
|
2324
|
+
### get_chain_t_end
|
|
2325
|
+
|
|
2326
|
+
**Signature:** `(genome_alignment) -> int`
|
|
2327
|
+
|
|
2328
|
+
|
|
2329
|
+
### get_chain_t_start
|
|
2330
|
+
|
|
2331
|
+
**Signature:** `(genome_alignment) -> int`
|
|
2332
|
+
|
|
2333
|
+
|
|
2334
|
+
### get_chain_target_interval
|
|
2335
|
+
|
|
2336
|
+
**Signature:** `(genome_alignment) -> pyrion.core.intervals.GenomicInterval`
|
|
2337
|
+
|
|
2338
|
+
|
|
2339
|
+
### project_intervals_through_chain
|
|
2340
|
+
|
|
2341
|
+
**Signature:** `(intervals: numpy.ndarray, chain_blocks: numpy.ndarray) -> List[numpy.ndarray]`
|
|
2342
|
+
|
|
2343
|
+
|
|
2344
|
+
### project_intervals_through_genome_alignment
|
|
2345
|
+
|
|
2346
|
+
**Signature:** `(intervals: numpy.ndarray, genome_alignment) -> List[numpy.ndarray]`
|
|
2347
|
+
|
|
2348
|
+
Convenience function to project intervals through a GenomeAlignment object.
|
|
2349
|
+
|
|
2350
|
+
|
|
2351
|
+
### project_intervals_through_genome_alignment_to_intervals
|
|
2352
|
+
|
|
2353
|
+
**Signature:** `(intervals: numpy.ndarray, genome_alignment, target_chrom: Optional[str] = None, target_strand: Optional[pyrion.core.strand.Strand] = None) -> List[pyrion.core.intervals.GenomicInterval]`
|
|
2354
|
+
|
|
2355
|
+
Project intervals through genome alignment and convert to GenomicInterval objects.
|
|
2356
|
+
|
|
2357
|
+
Args:
|
|
2358
|
+
intervals: Array of intervals to project, shape (N, 2)
|
|
2359
|
+
genome_alignment: GenomeAlignment object to project through
|
|
2360
|
+
target_chrom: Target chromosome name (auto-detected if None)
|
|
2361
|
+
target_strand: Target strand (auto-detected if None)
|
|
2362
|
+
|
|
2363
|
+
|
|
2364
|
+
### project_transcript_through_chain
|
|
2365
|
+
|
|
2366
|
+
**Signature:** `(transcript: pyrion.core.genes.Transcript, chain: pyrion.core.genome_alignment.GenomeAlignment, only_cds=False) -> pyrion.core.intervals.GenomicInterval | None`
|
|
2367
|
+
|
|
2368
|
+
|
|
2369
|
+
### split_genome_alignment
|
|
2370
|
+
|
|
2371
|
+
**Signature:** `(chain: pyrion.core.genome_alignment.GenomeAlignment, intersected_transcripts: List[pyrion.core.genes.Transcript], window_size: int = 1000000, intergenic_margin: int = 10000) -> Tuple[List[pyrion.core.genome_alignment.GenomeAlignment], Dict[int, List[str]]]`
|
|
2372
|
+
|
|
2373
|
+
|
|
2374
|
+
---
|
|
2375
|
+
|
|
2376
|
+
# pyrion.ops.data_consistency
|
|
2377
|
+
|
|
2378
|
+
Data consistency checking utilities.
|
|
2379
|
+
|
|
2380
|
+
|
|
2381
|
+
## Functions
|
|
2382
|
+
|
|
2383
|
+
### check_data_consistency
|
|
2384
|
+
|
|
2385
|
+
**Signature:** `(transcripts_collection: pyrion.core.genes.TranscriptsCollection, detailed: bool = False) -> str`
|
|
2386
|
+
|
|
2387
|
+
Check data consistency in a transcripts collection.
|
|
2388
|
+
|
|
2389
|
+
Analyzes the consistency of applied gene data mappings and reports issues:
|
|
2390
|
+
- Transcripts without gene IDs (if gene-transcript mapping was applied)
|
|
2391
|
+
- Transcripts without biotypes (if biotype mapping was applied)
|
|
2392
|
+
- Genes without names (if gene names were applied)
|
|
2393
|
+
|
|
2394
|
+
|
|
2395
|
+
---
|
|
2396
|
+
|
|
2397
|
+
# pyrion.ops.entity_ops
|
|
2398
|
+
|
|
2399
|
+
Entity-specific operations for Transcripts and GenomeAlignments using low-level interval operations.
|
|
2400
|
+
|
|
2401
|
+
|
|
2402
|
+
## Functions
|
|
2403
|
+
|
|
2404
|
+
### find_alignment_gaps
|
|
2405
|
+
|
|
2406
|
+
**Signature:** `(alignment: pyrion.core.genome_alignment.GenomeAlignment, space: str = 'target', use_numba: bool = True) -> numpy.ndarray`
|
|
2407
|
+
|
|
2408
|
+
Find gaps in a genome alignment.
|
|
2409
|
+
|
|
2410
|
+
Args:
|
|
2411
|
+
alignment: GenomeAlignment object
|
|
2412
|
+
space: "target" or "query" - which coordinate space to find gaps in
|
|
2413
|
+
use_numba: Whether to use numba-optimized operations
|
|
2414
|
+
|
|
2415
|
+
Returns:
|
|
2416
|
+
Array of gap intervals
|
|
2417
|
+
|
|
2418
|
+
|
|
2419
|
+
### find_transcript_overlaps
|
|
2420
|
+
|
|
2421
|
+
**Signature:** `(transcript1: pyrion.core.genes.Transcript, transcript2: pyrion.core.genes.Transcript, region_type: str = 'exon', use_numba: bool = True) -> numpy.ndarray`
|
|
2422
|
+
|
|
2423
|
+
Find overlaps between specific regions of two transcripts.
|
|
2424
|
+
|
|
2425
|
+
|
|
2426
|
+
### get_transcript_cds_in_range
|
|
2427
|
+
|
|
2428
|
+
**Signature:** `(transcript: pyrion.core.genes.Transcript, start: int, end: int, use_numba: bool = True) -> numpy.ndarray`
|
|
2429
|
+
|
|
2430
|
+
Get CDS blocks within a specific genomic range using slice operations.
|
|
2431
|
+
|
|
2432
|
+
|
|
2433
|
+
### get_transcript_introns_in_range
|
|
2434
|
+
|
|
2435
|
+
**Signature:** `(transcript: pyrion.core.genes.Transcript, start: int, end: int, use_numba: bool = True) -> numpy.ndarray`
|
|
2436
|
+
|
|
2437
|
+
Get intron blocks within a specific genomic range.
|
|
2438
|
+
|
|
2439
|
+
|
|
2440
|
+
### get_transcript_utrs_in_range
|
|
2441
|
+
|
|
2442
|
+
**Signature:** `(transcript: pyrion.core.genes.Transcript, start: int, end: int, utr_type: str = 'both', use_numba: bool = True) -> numpy.ndarray`
|
|
2443
|
+
|
|
2444
|
+
Get UTR blocks within a specific genomic range.
|
|
2445
|
+
|
|
2446
|
+
|
|
2447
|
+
### intersect_alignment_with_intervals
|
|
2448
|
+
|
|
2449
|
+
**Signature:** `(alignment: pyrion.core.genome_alignment.GenomeAlignment, intervals: numpy.ndarray, space: str = 'target', use_numba: bool = True) -> numpy.ndarray`
|
|
2450
|
+
|
|
2451
|
+
Find intersections between alignment blocks and given intervals.
|
|
2452
|
+
|
|
2453
|
+
Args:
|
|
2454
|
+
alignment: GenomeAlignment object
|
|
2455
|
+
intervals: Array of intervals to intersect with, shape (N, 2)
|
|
2456
|
+
space: "target" or "query" - which coordinate space to use
|
|
2457
|
+
use_numba: Whether to use numba-optimized operations
|
|
2458
|
+
|
|
2459
|
+
Returns:
|
|
2460
|
+
Array of intersection intervals
|
|
2461
|
+
|
|
2462
|
+
|
|
2463
|
+
### merge_genome_alignments
|
|
2464
|
+
|
|
2465
|
+
**Signature:** `(alignments: List[pyrion.core.genome_alignment.GenomeAlignment], space: str = 'target', use_numba: bool = True) -> numpy.ndarray`
|
|
2466
|
+
|
|
2467
|
+
Merge blocks from multiple genome alignments.
|
|
2468
|
+
|
|
2469
|
+
|
|
2470
|
+
### merge_transcript_cds
|
|
2471
|
+
|
|
2472
|
+
**Signature:** `(transcripts: List[pyrion.core.genes.Transcript], use_numba: bool = True) -> numpy.ndarray`
|
|
2473
|
+
|
|
2474
|
+
Merge CDS blocks from multiple transcripts.
|
|
2475
|
+
|
|
2476
|
+
|
|
2477
|
+
### merge_transcript_utrs
|
|
2478
|
+
|
|
2479
|
+
**Signature:** `(transcripts: List[pyrion.core.genes.Transcript], utr_type: str = 'both', use_numba: bool = True) -> numpy.ndarray`
|
|
2480
|
+
|
|
2481
|
+
Merge UTR blocks from multiple transcripts.
|
|
2482
|
+
|
|
2483
|
+
|
|
2484
|
+
### subtract_transcript_regions
|
|
2485
|
+
|
|
2486
|
+
**Signature:** `(transcript: pyrion.core.genes.Transcript, subtract_regions: numpy.ndarray, region_type: str = 'exon', use_numba: bool = True) -> numpy.ndarray`
|
|
2487
|
+
|
|
2488
|
+
Subtract regions from specific parts of a transcript.
|
|
2489
|
+
|
|
2490
|
+
|
|
2491
|
+
---
|
|
2492
|
+
|
|
2493
|
+
# pyrion.ops.genes
|
|
2494
|
+
|
|
2495
|
+
Gene and transcript operations.
|
|
2496
|
+
|
|
2497
|
+
|
|
2498
|
+
## Classes
|
|
2499
|
+
|
|
2500
|
+
### SequenceAccessor
|
|
2501
|
+
|
|
2502
|
+
Protocol for sequence accessors (TwoBitAccessor, FastaAccessor).
|
|
2503
|
+
|
|
2504
|
+
**Signature:** `(self, *args, **kwargs)`
|
|
2505
|
+
|
|
2506
|
+
#### Methods
|
|
2507
|
+
|
|
2508
|
+
**__init__**
|
|
2509
|
+
|
|
2510
|
+
*Signature:* `(self, *args, **kwargs)`
|
|
2511
|
+
|
|
2512
|
+
|
|
2513
|
+
**fetch**
|
|
2514
|
+
|
|
2515
|
+
*Signature:* `(self, chrom: str, start: int, end: int, strand: pyrion.core.strand.Strand) -> pyrion.core.nucleotide_sequences.NucleotideSequence`
|
|
2516
|
+
|
|
2517
|
+
Fetch sequence from chrom:start-end.
|
|
2518
|
+
|
|
2519
|
+
|
|
2520
|
+
|
|
2521
|
+
## Functions
|
|
2522
|
+
|
|
2523
|
+
### extract_cds_sequence
|
|
2524
|
+
|
|
2525
|
+
**Signature:** `(transcript: pyrion.core.genes.Transcript, accessor: pyrion.ops.genes.SequenceAccessor) -> pyrion.core.nucleotide_sequences.NucleotideSequence`
|
|
2526
|
+
|
|
2527
|
+
|
|
2528
|
+
### extract_exon_sequence
|
|
2529
|
+
|
|
2530
|
+
**Signature:** `(transcript: pyrion.core.genes.Transcript, accessor: pyrion.ops.genes.SequenceAccessor) -> pyrion.core.nucleotide_sequences.NucleotideSequence`
|
|
2531
|
+
|
|
2532
|
+
|
|
2533
|
+
### extract_utr3_sequence
|
|
2534
|
+
|
|
2535
|
+
**Signature:** `(transcript: pyrion.core.genes.Transcript, accessor: pyrion.ops.genes.SequenceAccessor) -> pyrion.core.nucleotide_sequences.NucleotideSequence`
|
|
2536
|
+
|
|
2537
|
+
|
|
2538
|
+
### extract_utr5_sequence
|
|
2539
|
+
|
|
2540
|
+
**Signature:** `(transcript: pyrion.core.genes.Transcript, accessor: pyrion.ops.genes.SequenceAccessor) -> pyrion.core.nucleotide_sequences.NucleotideSequence`
|
|
2541
|
+
|
|
2542
|
+
|
|
2543
|
+
### merge_transcript_intervals
|
|
2544
|
+
|
|
2545
|
+
**Signature:** `(transcripts: List[pyrion.core.genes.Transcript], cds_only: bool = False, use_numba: bool = True) -> List[pyrion.core.intervals.GenomicInterval]`
|
|
2546
|
+
|
|
2547
|
+
Merge overlapping or adjacent intervals from multiple transcripts.
|
|
2548
|
+
|
|
2549
|
+
|
|
2550
|
+
---
|
|
2551
|
+
|
|
2552
|
+
# pyrion.ops.interval_collection_ops
|
|
2553
|
+
|
|
2554
|
+
Bulk operations for GenomicIntervalsCollection using vectorized numpy operations.
|
|
2555
|
+
|
|
2556
|
+
|
|
2557
|
+
## Functions
|
|
2558
|
+
|
|
2559
|
+
### create_collections_from_mixed_intervals
|
|
2560
|
+
|
|
2561
|
+
**Signature:** `(intervals: List[pyrion.core.intervals.GenomicInterval], consider_strand: bool = False) -> List[pyrion.core.intervals.GenomicIntervalsCollection]`
|
|
2562
|
+
|
|
2563
|
+
|
|
2564
|
+
### filter_collection
|
|
2565
|
+
|
|
2566
|
+
**Signature:** `(collection: pyrion.core.intervals.GenomicIntervalsCollection, predicate: Callable[[pyrion.core.intervals.GenomicInterval], bool]) -> pyrion.core.intervals.GenomicIntervalsCollection`
|
|
2567
|
+
|
|
2568
|
+
|
|
2569
|
+
### group_intervals_by_proximity
|
|
2570
|
+
|
|
2571
|
+
**Signature:** `(collection: pyrion.core.intervals.GenomicIntervalsCollection, max_gap: int) -> List[pyrion.core.intervals.GenomicIntervalsCollection]`
|
|
2572
|
+
|
|
2573
|
+
|
|
2574
|
+
### intersect_collections
|
|
2575
|
+
|
|
2576
|
+
**Signature:** `(collection: pyrion.core.intervals.GenomicIntervalsCollection, other: Union[pyrion.core.intervals.GenomicIntervalsCollection, pyrion.core.intervals.GenomicInterval]) -> pyrion.core.intervals.GenomicIntervalsCollection`
|
|
2577
|
+
|
|
2578
|
+
|
|
2579
|
+
### merge_close_intervals
|
|
2580
|
+
|
|
2581
|
+
**Signature:** `(collection: pyrion.core.intervals.GenomicIntervalsCollection, max_gap: int = 0) -> pyrion.core.intervals.GenomicIntervalsCollection`
|
|
2582
|
+
|
|
2583
|
+
|
|
2584
|
+
### split_intervals_on_gaps
|
|
2585
|
+
|
|
2586
|
+
**Signature:** `(collection: pyrion.core.intervals.GenomicIntervalsCollection, min_gap: int) -> List[pyrion.core.intervals.GenomicIntervalsCollection]`
|
|
2587
|
+
|
|
2588
|
+
|
|
2589
|
+
---
|
|
2590
|
+
|
|
2591
|
+
# pyrion.ops.interval_ops
|
|
2592
|
+
|
|
2593
|
+
Additional low-level interval operations for merge, intersection, etc.
|
|
2594
|
+
|
|
2595
|
+
|
|
2596
|
+
## Functions
|
|
2597
|
+
|
|
2598
|
+
### intersect_intervals
|
|
2599
|
+
|
|
2600
|
+
**Signature:** `(intervals1: numpy.ndarray, intervals2: numpy.ndarray, use_numba: bool = True) -> numpy.ndarray`
|
|
2601
|
+
|
|
2602
|
+
|
|
2603
|
+
### intervals_union
|
|
2604
|
+
|
|
2605
|
+
**Signature:** `(intervals_list: List[numpy.ndarray], use_numba: bool = True) -> numpy.ndarray`
|
|
2606
|
+
|
|
2607
|
+
|
|
2608
|
+
### merge_intervals
|
|
2609
|
+
|
|
2610
|
+
**Signature:** `(intervals: numpy.ndarray, use_numba: bool = None) -> numpy.ndarray`
|
|
2611
|
+
|
|
2612
|
+
|
|
2613
|
+
### subtract_intervals
|
|
2614
|
+
|
|
2615
|
+
**Signature:** `(intervals1: numpy.ndarray, intervals2: numpy.ndarray, use_numba: bool = True) -> numpy.ndarray`
|
|
2616
|
+
|
|
2617
|
+
|
|
2618
|
+
---
|
|
2619
|
+
|
|
2620
|
+
# pyrion.ops.interval_serialization
|
|
2621
|
+
|
|
2622
|
+
Genomic interval serialization operations for BED6 format.
|
|
2623
|
+
|
|
2624
|
+
|
|
2625
|
+
## Functions
|
|
2626
|
+
|
|
2627
|
+
### genomic_interval_to_bed6_string
|
|
2628
|
+
|
|
2629
|
+
**Signature:** `(interval: pyrion.core.intervals.GenomicInterval, score: int = 1000) -> str`
|
|
2630
|
+
|
|
2631
|
+
Convert a single GenomicInterval to BED6 format string.
|
|
2632
|
+
|
|
2633
|
+
|
|
2634
|
+
### genomic_intervals_to_bed6_string
|
|
2635
|
+
|
|
2636
|
+
**Signature:** `(intervals: List[pyrion.core.intervals.GenomicInterval], score: int = 1000) -> str`
|
|
2637
|
+
|
|
2638
|
+
|
|
2639
|
+
### save_genomic_intervals_to_bed6
|
|
2640
|
+
|
|
2641
|
+
**Signature:** `(intervals: List[pyrion.core.intervals.GenomicInterval], file_path: Union[str, pathlib.Path], score: int = 1000) -> None`
|
|
2642
|
+
|
|
2643
|
+
|
|
2644
|
+
---
|
|
2645
|
+
|
|
2646
|
+
# pyrion.ops.interval_slicing
|
|
2647
|
+
|
|
2648
|
+
Low-level interval slicing and manipulation operations.
|
|
2649
|
+
|
|
2650
|
+
|
|
2651
|
+
## Functions
|
|
2652
|
+
|
|
2653
|
+
### invert_intervals
|
|
2654
|
+
|
|
2655
|
+
**Signature:** `(intervals: numpy.ndarray, span_start: int, span_end: int, use_numba: bool = None) -> numpy.ndarray`
|
|
2656
|
+
|
|
2657
|
+
Get the inverse (gaps) of intervals within a given span.
|
|
2658
|
+
|
|
2659
|
+
Example:
|
|
2660
|
+
exons = [[100, 150], [200, 210], [400, 600]]
|
|
2661
|
+
invert_intervals(exons, 50, 700) -> [[50, 100], [150, 200], [210, 400], [600, 700]]
|
|
2662
|
+
|
|
2663
|
+
|
|
2664
|
+
### remove_intervals
|
|
2665
|
+
|
|
2666
|
+
**Signature:** `(intervals: numpy.ndarray, remove_start: int, remove_end: int, use_numba: bool = None) -> numpy.ndarray`
|
|
2667
|
+
|
|
2668
|
+
Remove a region from intervals, potentially splitting them.
|
|
2669
|
+
|
|
2670
|
+
Example:
|
|
2671
|
+
blocks = [[10, 100], [150, 300]]
|
|
2672
|
+
remove_intervals(blocks, 50, 200) -> [[10, 50], [200, 300]]
|
|
2673
|
+
|
|
2674
|
+
|
|
2675
|
+
### slice_intervals
|
|
2676
|
+
|
|
2677
|
+
**Signature:** `(intervals: numpy.ndarray, slice_start: int, slice_end: int, use_numba: bool = None) -> numpy.ndarray`
|
|
2678
|
+
|
|
2679
|
+
Slice intervals to get only parts that intersect with [slice_start, slice_end).
|
|
2680
|
+
|
|
2681
|
+
Example:
|
|
2682
|
+
blocks = [[10, 30], [100, 150], [200, 210], [400, 600]]
|
|
2683
|
+
slice_intervals(blocks, 40, 450) -> [[100, 150], [200, 210], [400, 450]]
|
|
2684
|
+
|
|
2685
|
+
|
|
2686
|
+
---
|
|
2687
|
+
|
|
2688
|
+
# pyrion.ops.intervals
|
|
2689
|
+
|
|
2690
|
+
Interval operations for pyrion.
|
|
2691
|
+
|
|
2692
|
+
|
|
2693
|
+
## Functions
|
|
2694
|
+
|
|
2695
|
+
### array_to_intervals
|
|
2696
|
+
|
|
2697
|
+
**Signature:** `(array: numpy.ndarray, chrom: str) -> List`
|
|
2698
|
+
|
|
2699
|
+
Convert 2D numpy array of [start, end] pairs to list of GenomicInterval objects.
|
|
2700
|
+
|
|
2701
|
+
|
|
2702
|
+
### chains_to_arrays
|
|
2703
|
+
|
|
2704
|
+
**Signature:** `(chains: List, for_q: bool = False) -> Tuple[numpy.ndarray, numpy.ndarray]`
|
|
2705
|
+
|
|
2706
|
+
|
|
2707
|
+
### compute_overlap_size
|
|
2708
|
+
|
|
2709
|
+
**Signature:** `(start1: int, end1: int, start2: int, end2: int) -> int`
|
|
2710
|
+
|
|
2711
|
+
|
|
2712
|
+
### find_intersections
|
|
2713
|
+
|
|
2714
|
+
**Signature:** `(arr1: numpy.ndarray, arr2: numpy.ndarray, ids1: Optional[List] = None, ids2: Optional[List] = None) -> Dict[Any, List]`
|
|
2715
|
+
|
|
2716
|
+
Find intersections between two arrays of intervals.
|
|
2717
|
+
|
|
2718
|
+
|
|
2719
|
+
### intervals_to_array
|
|
2720
|
+
|
|
2721
|
+
**Signature:** `(intervals: List) -> numpy.ndarray`
|
|
2722
|
+
|
|
2723
|
+
Convert list of GenomicInterval objects to 2D numpy array of [start, end] pairs.
|
|
2724
|
+
|
|
2725
|
+
|
|
2726
|
+
### projected_intervals_to_genomic_intervals
|
|
2727
|
+
|
|
2728
|
+
**Signature:** `(projected_arrays: List[numpy.ndarray], target_chrom: str, target_strand: pyrion.core.strand.Strand = <Strand.UNKNOWN: 0>, ids: Optional[List[str]] = None) -> List[List[pyrion.core.intervals.GenomicInterval]]`
|
|
2729
|
+
|
|
2730
|
+
Convert projected interval arrays to GenomicInterval objects.
|
|
2731
|
+
|
|
2732
|
+
Convenience function to convert the output of project_intervals_through_genome_alignment
|
|
2733
|
+
into GenomicInterval objects.
|
|
2734
|
+
|
|
2735
|
+
|
|
2736
|
+
### transcripts_to_arrays
|
|
2737
|
+
|
|
2738
|
+
**Signature:** `(transcripts: List) -> Tuple[numpy.ndarray, numpy.ndarray]`
|
|
2739
|
+
|
|
2740
|
+
|
|
2741
|
+
---
|
|
2742
|
+
|
|
2743
|
+
# pyrion.ops.sequence_serialization
|
|
2744
|
+
|
|
2745
|
+
Sequence serialization operations for FASTA format.
|
|
2746
|
+
|
|
2747
|
+
|
|
2748
|
+
## Functions
|
|
2749
|
+
|
|
2750
|
+
### amino_acid_sequence_to_fasta_string
|
|
2751
|
+
|
|
2752
|
+
**Signature:** `(sequence: pyrion.core.amino_acid_sequences.AminoAcidSequence, width: int = 80, header: Optional[str] = None) -> str`
|
|
2753
|
+
|
|
2754
|
+
|
|
2755
|
+
### codon_sequence_to_fasta_string
|
|
2756
|
+
|
|
2757
|
+
**Signature:** `(sequence: pyrion.core.codons.CodonSequence, width: int = 80, header: Optional[str] = None) -> str`
|
|
2758
|
+
|
|
2759
|
+
|
|
2760
|
+
### format_fasta_sequence
|
|
2761
|
+
|
|
2762
|
+
**Signature:** `(sequence_string: str, width: int = 80) -> str`
|
|
2763
|
+
|
|
2764
|
+
Format sequence string with specified line width.
|
|
2765
|
+
|
|
2766
|
+
|
|
2767
|
+
### get_sequence_header
|
|
2768
|
+
|
|
2769
|
+
**Signature:** `(sequence: Any, index: Optional[int] = None) -> str`
|
|
2770
|
+
|
|
2771
|
+
Extract or generate FASTA header for a sequence object.
|
|
2772
|
+
|
|
2773
|
+
|
|
2774
|
+
### nucleotide_sequence_to_fasta_string
|
|
2775
|
+
|
|
2776
|
+
**Signature:** `(sequence: pyrion.core.nucleotide_sequences.NucleotideSequence, width: int = 80, header: Optional[str] = None) -> str`
|
|
2777
|
+
|
|
2778
|
+
|
|
2779
|
+
### save_sequences_to_fasta
|
|
2780
|
+
|
|
2781
|
+
**Signature:** `(sequences: List[Any], file_path: Union[str, pathlib.Path], width: int = 80) -> None`
|
|
2782
|
+
|
|
2783
|
+
|
|
2784
|
+
### sequence_to_fasta_string
|
|
2785
|
+
|
|
2786
|
+
**Signature:** `(sequence: Any, width: int = 80, header: Optional[str] = None) -> str`
|
|
2787
|
+
|
|
2788
|
+
|
|
2789
|
+
### sequences_to_fasta_string
|
|
2790
|
+
|
|
2791
|
+
**Signature:** `(sequences: List[Any], width: int = 80) -> str`
|
|
2792
|
+
|
|
2793
|
+
|
|
2794
|
+
---
|
|
2795
|
+
|
|
2796
|
+
# pyrion.ops.transcript_serialization
|
|
2797
|
+
|
|
2798
|
+
Transcript serialization operations for BED12 and JSON formats.
|
|
2799
|
+
|
|
2800
|
+
|
|
2801
|
+
## Functions
|
|
2802
|
+
|
|
2803
|
+
### load_transcripts_collection_from_json
|
|
2804
|
+
|
|
2805
|
+
**Signature:** `(file_path: Union[str, pathlib.Path]) -> pyrion.core.genes.TranscriptsCollection`
|
|
2806
|
+
|
|
2807
|
+
|
|
2808
|
+
### save_transcripts_collection_to_bed12
|
|
2809
|
+
|
|
2810
|
+
**Signature:** `(collection: pyrion.core.genes.TranscriptsCollection, file_path: Union[str, pathlib.Path]) -> None`
|
|
2811
|
+
|
|
2812
|
+
|
|
2813
|
+
### save_transcripts_collection_to_json
|
|
2814
|
+
|
|
2815
|
+
**Signature:** `(collection: pyrion.core.genes.TranscriptsCollection, file_path: Union[str, pathlib.Path]) -> None`
|
|
2816
|
+
|
|
2817
|
+
|
|
2818
|
+
### transcript_from_dict
|
|
2819
|
+
|
|
2820
|
+
**Signature:** `(data: Dict[str, Any]) -> pyrion.core.genes.Transcript`
|
|
2821
|
+
|
|
2822
|
+
|
|
2823
|
+
### transcript_to_bed12_string
|
|
2824
|
+
|
|
2825
|
+
**Signature:** `(transcript: pyrion.core.genes.Transcript) -> str`
|
|
2826
|
+
|
|
2827
|
+
|
|
2828
|
+
### transcript_to_dict
|
|
2829
|
+
|
|
2830
|
+
**Signature:** `(transcript: pyrion.core.genes.Transcript) -> Dict[str, Any]`
|
|
2831
|
+
|
|
2832
|
+
|
|
2833
|
+
### transcripts_collection_from_dict
|
|
2834
|
+
|
|
2835
|
+
**Signature:** `(data: Dict[str, Any]) -> pyrion.core.genes.TranscriptsCollection`
|
|
2836
|
+
|
|
2837
|
+
|
|
2838
|
+
### transcripts_collection_summary_string
|
|
2839
|
+
|
|
2840
|
+
**Signature:** `(collection: pyrion.core.genes.TranscriptsCollection) -> str`
|
|
2841
|
+
|
|
2842
|
+
|
|
2843
|
+
### transcripts_collection_to_bed12_string
|
|
2844
|
+
|
|
2845
|
+
**Signature:** `(collection: pyrion.core.genes.TranscriptsCollection) -> str`
|
|
2846
|
+
|
|
2847
|
+
|
|
2848
|
+
### transcripts_collection_to_dict
|
|
2849
|
+
|
|
2850
|
+
**Signature:** `(collection: pyrion.core.genes.TranscriptsCollection) -> Dict[str, Any]`
|
|
2851
|
+
|
|
2852
|
+
|
|
2853
|
+
---
|
|
2854
|
+
|
|
2855
|
+
# pyrion.ops.transcript_slicing
|
|
2856
|
+
|
|
2857
|
+
Transcript slicing operations.
|
|
2858
|
+
|
|
2859
|
+
|
|
2860
|
+
## Functions
|
|
2861
|
+
|
|
2862
|
+
### get_transcript_introns
|
|
2863
|
+
|
|
2864
|
+
**Signature:** `(transcript: pyrion.core.genes.Transcript, use_numba: bool = True) -> numpy.ndarray`
|
|
2865
|
+
|
|
2866
|
+
Get intron blocks (gaps between exons) within transcript span.
|
|
2867
|
+
|
|
2868
|
+
|
|
2869
|
+
### remove_transcript_region
|
|
2870
|
+
|
|
2871
|
+
**Signature:** `(transcript: pyrion.core.genes.Transcript, start: int, end: int, use_numba: bool = True) -> pyrion.core.genes.Transcript`
|
|
2872
|
+
|
|
2873
|
+
Remove a region from transcript, potentially splitting blocks.
|
|
2874
|
+
|
|
2875
|
+
Args:
|
|
2876
|
+
transcript: Transcript object
|
|
2877
|
+
start: Start position to remove (inclusive)
|
|
2878
|
+
end: End position to remove (exclusive)
|
|
2879
|
+
use_numba: Whether to use numba-optimized operations
|
|
2880
|
+
|
|
2881
|
+
Returns:
|
|
2882
|
+
New Transcript with region removed
|
|
2883
|
+
|
|
2884
|
+
|
|
2885
|
+
### slice_transcript
|
|
2886
|
+
|
|
2887
|
+
**Signature:** `(transcript: pyrion.core.genes.Transcript, start: int, end: int, use_numba: bool = True) -> pyrion.core.genes.Transcript`
|
|
2888
|
+
|
|
2889
|
+
Slice transcript to get only blocks that intersect with [start, end).
|
|
2890
|
+
|
|
2891
|
+
|
|
2892
|
+
---
|
|
2893
|
+
|
|
2894
|
+
# pyrion.utils
|
|
2895
|
+
|
|
2896
|
+
Utility modules for pyrion.
|
|
2897
|
+
|
|
2898
|
+
|
|
2899
|
+
---
|
|
2900
|
+
|
|
2901
|
+
# pyrion.utils.amino_acid_encoding
|
|
2902
|
+
|
|
2903
|
+
Amino acid encoding utilities using prime-based multiplicative semantics.
|
|
2904
|
+
|
|
2905
|
+
|
|
2906
|
+
## Functions
|
|
2907
|
+
|
|
2908
|
+
### apply_masking_aa
|
|
2909
|
+
|
|
2910
|
+
**Signature:** `(encoded: numpy.ndarray) -> numpy.ndarray`
|
|
2911
|
+
|
|
2912
|
+
Apply masking using multiplicative semantics (multiply by -1, gaps stay 0).
|
|
2913
|
+
|
|
2914
|
+
|
|
2915
|
+
### decode_amino_acids
|
|
2916
|
+
|
|
2917
|
+
**Signature:** `(encoded: numpy.ndarray) -> str`
|
|
2918
|
+
|
|
2919
|
+
Decode int8 array to amino acid sequence using prime-based multiplicative semantics.
|
|
2920
|
+
|
|
2921
|
+
|
|
2922
|
+
### encode_amino_acids
|
|
2923
|
+
|
|
2924
|
+
**Signature:** `(sequence: str) -> numpy.ndarray`
|
|
2925
|
+
|
|
2926
|
+
Encode amino acid sequence to int8 array using prime-based multiplicative semantics.
|
|
2927
|
+
|
|
2928
|
+
|
|
2929
|
+
### get_masking_status_aa
|
|
2930
|
+
|
|
2931
|
+
**Signature:** `(encoded: numpy.ndarray) -> numpy.ndarray`
|
|
2932
|
+
|
|
2933
|
+
Get boolean array indicating which positions are masked.
|
|
2934
|
+
|
|
2935
|
+
|
|
2936
|
+
### is_gap
|
|
2937
|
+
|
|
2938
|
+
**Signature:** `(code: int) -> bool`
|
|
2939
|
+
|
|
2940
|
+
Check if code represents a gap.
|
|
2941
|
+
|
|
2942
|
+
|
|
2943
|
+
### is_masked
|
|
2944
|
+
|
|
2945
|
+
**Signature:** `(code: int) -> bool`
|
|
2946
|
+
|
|
2947
|
+
Check if amino acid is masked (negative and not 0).
|
|
2948
|
+
|
|
2949
|
+
|
|
2950
|
+
### is_stop
|
|
2951
|
+
|
|
2952
|
+
**Signature:** `(code: int) -> bool`
|
|
2953
|
+
|
|
2954
|
+
Check if code represents a stop codon.
|
|
2955
|
+
|
|
2956
|
+
|
|
2957
|
+
### is_unknown
|
|
2958
|
+
|
|
2959
|
+
**Signature:** `(code: int) -> bool`
|
|
2960
|
+
|
|
2961
|
+
Check if code represents an unknown amino acid.
|
|
2962
|
+
|
|
2963
|
+
|
|
2964
|
+
### mask
|
|
2965
|
+
|
|
2966
|
+
**Signature:** `(code: int) -> int`
|
|
2967
|
+
|
|
2968
|
+
Apply masking by multiplying by -1.
|
|
2969
|
+
|
|
2970
|
+
|
|
2971
|
+
### remove_masking_aa
|
|
2972
|
+
|
|
2973
|
+
**Signature:** `(encoded: numpy.ndarray) -> numpy.ndarray`
|
|
2974
|
+
|
|
2975
|
+
Remove masking using multiplicative semantics (take absolute value).
|
|
2976
|
+
|
|
2977
|
+
|
|
2978
|
+
### unmask
|
|
2979
|
+
|
|
2980
|
+
**Signature:** `(code: int) -> int`
|
|
2981
|
+
|
|
2982
|
+
Remove masking by taking absolute value.
|
|
2983
|
+
|
|
2984
|
+
|
|
2985
|
+
---
|
|
2986
|
+
|
|
2987
|
+
# pyrion.utils.encoding
|
|
2988
|
+
|
|
2989
|
+
Encoding utilities for nucleotides using multiplicative semantics.
|
|
2990
|
+
|
|
2991
|
+
|
|
2992
|
+
## Functions
|
|
2993
|
+
|
|
2994
|
+
### apply_complement
|
|
2995
|
+
|
|
2996
|
+
**Signature:** `(encoded: numpy.ndarray) -> numpy.ndarray`
|
|
2997
|
+
|
|
2998
|
+
Apply complement using multiplicative semantics (multiply by -1).
|
|
2999
|
+
|
|
3000
|
+
|
|
3001
|
+
### apply_masking
|
|
3002
|
+
|
|
3003
|
+
**Signature:** `(encoded: numpy.ndarray) -> numpy.ndarray`
|
|
3004
|
+
|
|
3005
|
+
Apply masking using multiplicative semantics (multiply by 5, gaps stay 0).
|
|
3006
|
+
|
|
3007
|
+
|
|
3008
|
+
### complement
|
|
3009
|
+
|
|
3010
|
+
**Signature:** `(code: int) -> int`
|
|
3011
|
+
|
|
3012
|
+
Get complement by multiplying by -1.
|
|
3013
|
+
|
|
3014
|
+
|
|
3015
|
+
### decode_nucleotides
|
|
3016
|
+
|
|
3017
|
+
**Signature:** `(encoded: numpy.ndarray, is_rna: bool = False) -> str`
|
|
3018
|
+
|
|
3019
|
+
Decode int8 array to nucleotide sequence using multiplicative semantics.
|
|
3020
|
+
|
|
3021
|
+
|
|
3022
|
+
### encode_nucleotides
|
|
3023
|
+
|
|
3024
|
+
**Signature:** `(sequence: str) -> numpy.ndarray`
|
|
3025
|
+
|
|
3026
|
+
Encode nucleotide sequence to int8 array using multiplicative semantics.
|
|
3027
|
+
|
|
3028
|
+
|
|
3029
|
+
### get_masking_status
|
|
3030
|
+
|
|
3031
|
+
**Signature:** `(encoded: numpy.ndarray) -> numpy.ndarray`
|
|
3032
|
+
|
|
3033
|
+
Get boolean array indicating which positions are masked.
|
|
3034
|
+
|
|
3035
|
+
|
|
3036
|
+
### is_frameshift
|
|
3037
|
+
|
|
3038
|
+
**Signature:** `(code: int) -> bool`
|
|
3039
|
+
|
|
3040
|
+
Check if code represents a frameshift.
|
|
3041
|
+
|
|
3042
|
+
|
|
3043
|
+
### is_gap
|
|
3044
|
+
|
|
3045
|
+
**Signature:** `(code: int) -> bool`
|
|
3046
|
+
|
|
3047
|
+
Check if code represents a gap.
|
|
3048
|
+
|
|
3049
|
+
|
|
3050
|
+
### is_masked
|
|
3051
|
+
|
|
3052
|
+
**Signature:** `(code: int) -> bool`
|
|
3053
|
+
|
|
3054
|
+
Check if nucleotide is masked using multiplicative test.
|
|
3055
|
+
|
|
3056
|
+
|
|
3057
|
+
### mask
|
|
3058
|
+
|
|
3059
|
+
**Signature:** `(code: int) -> int`
|
|
3060
|
+
|
|
3061
|
+
Apply masking by multiplying by 5.
|
|
3062
|
+
|
|
3063
|
+
|
|
3064
|
+
### remove_masking
|
|
3065
|
+
|
|
3066
|
+
**Signature:** `(encoded: numpy.ndarray) -> numpy.ndarray`
|
|
3067
|
+
|
|
3068
|
+
Remove masking using multiplicative semantics.
|
|
3069
|
+
|
|
3070
|
+
|
|
3071
|
+
### unmask
|
|
3072
|
+
|
|
3073
|
+
**Signature:** `(code: int) -> int`
|
|
3074
|
+
|
|
3075
|
+
Remove masking by dividing by 5.
|
|
3076
|
+
|
|
3077
|
+
|
|
3078
|
+
---
|
|
3079
|
+
|
|
3080
|
+
# pyrion.utils.numpy_utils
|
|
3081
|
+
|
|
3082
|
+
|
|
3083
|
+
---
|
|
3084
|
+
|
|
3085
|
+
# pyrion.visualization
|
|
3086
|
+
|
|
3087
|
+
The visualization module was quickly prototyped in Cursor without thorough design.
|
|
3088
|
+
|
|
3089
|
+
A cleaner architecture and improved visuals are planned for a future release.
|
|
3090
|
+
|
|
3091
|
+
|
|
3092
|
+
## Classes
|
|
3093
|
+
|
|
3094
|
+
### AlignmentFeature
|
|
3095
|
+
|
|
3096
|
+
Wrapper for GenomeAlignment.
|
|
3097
|
+
|
|
3098
|
+
**Signature:** `(self, alignment: pyrion.core.genome_alignment.GenomeAlignment)`
|
|
3099
|
+
|
|
3100
|
+
#### Methods
|
|
3101
|
+
|
|
3102
|
+
**__init__**
|
|
3103
|
+
|
|
3104
|
+
*Signature:* `(self, alignment: pyrion.core.genome_alignment.GenomeAlignment)`
|
|
3105
|
+
|
|
3106
|
+
Initialize self. See help(type(self)) for accurate signature.
|
|
3107
|
+
|
|
3108
|
+
|
|
3109
|
+
#### Properties
|
|
3110
|
+
|
|
3111
|
+
**end** -> `int`
|
|
3112
|
+
|
|
3113
|
+
|
|
3114
|
+
**length** -> `int`
|
|
3115
|
+
|
|
3116
|
+
|
|
3117
|
+
**start** -> `int`
|
|
3118
|
+
|
|
3119
|
+
|
|
3120
|
+
|
|
3121
|
+
### Band
|
|
3122
|
+
|
|
3123
|
+
Holds non-overlapping features placed in same vertical band.
|
|
3124
|
+
|
|
3125
|
+
**Signature:** `(self, index: int)`
|
|
3126
|
+
|
|
3127
|
+
#### Methods
|
|
3128
|
+
|
|
3129
|
+
**__init__**
|
|
3130
|
+
|
|
3131
|
+
*Signature:* `(self, index: int)`
|
|
3132
|
+
|
|
3133
|
+
Initialize self. See help(type(self)) for accurate signature.
|
|
3134
|
+
|
|
3135
|
+
|
|
3136
|
+
**add_feature**
|
|
3137
|
+
|
|
3138
|
+
*Signature:* `(self, feature: pyrion.visualization.GenomicFeature)`
|
|
3139
|
+
|
|
3140
|
+
Add feature to this band.
|
|
3141
|
+
|
|
3142
|
+
|
|
3143
|
+
**can_add_feature**
|
|
3144
|
+
|
|
3145
|
+
*Signature:* `(self, feature: pyrion.visualization.GenomicFeature) -> bool`
|
|
3146
|
+
|
|
3147
|
+
Check if feature can be added without overlap.
|
|
3148
|
+
|
|
3149
|
+
|
|
3150
|
+
|
|
3151
|
+
### GenomicFeature
|
|
3152
|
+
|
|
3153
|
+
Abstract base class for genomic features with start/end coordinates.
|
|
3154
|
+
|
|
3155
|
+
**Signature:** `(self, /, *args, **kwargs)`
|
|
3156
|
+
|
|
3157
|
+
#### Properties
|
|
3158
|
+
|
|
3159
|
+
**end** -> `int`
|
|
3160
|
+
|
|
3161
|
+
|
|
3162
|
+
**length** -> `int`
|
|
3163
|
+
|
|
3164
|
+
|
|
3165
|
+
**start** -> `int`
|
|
3166
|
+
|
|
3167
|
+
|
|
3168
|
+
|
|
3169
|
+
### GenomicRuler
|
|
3170
|
+
|
|
3171
|
+
Renders genomic coordinate ruler with fixed height.
|
|
3172
|
+
|
|
3173
|
+
**Signature:** `(self, interval: pyrion.core.intervals.GenomicInterval, tick_count: int = 10, inverted: bool = False, height: float = 0.15)`
|
|
3174
|
+
|
|
3175
|
+
#### Methods
|
|
3176
|
+
|
|
3177
|
+
**__init__**
|
|
3178
|
+
|
|
3179
|
+
*Signature:* `(self, interval: pyrion.core.intervals.GenomicInterval, tick_count: int = 10, inverted: bool = False, height: float = 0.15)`
|
|
3180
|
+
|
|
3181
|
+
Initialize self. See help(type(self)) for accurate signature.
|
|
3182
|
+
|
|
3183
|
+
|
|
3184
|
+
**draw**
|
|
3185
|
+
|
|
3186
|
+
*Signature:* `(self, ax, y: float = 0.0)`
|
|
3187
|
+
|
|
3188
|
+
Draw ruler at specified y position.
|
|
3189
|
+
|
|
3190
|
+
|
|
3191
|
+
|
|
3192
|
+
### IntervalFeature
|
|
3193
|
+
|
|
3194
|
+
Wrapper for GenomicInterval.
|
|
3195
|
+
|
|
3196
|
+
**Signature:** `(self, interval: pyrion.core.intervals.GenomicInterval)`
|
|
3197
|
+
|
|
3198
|
+
#### Methods
|
|
3199
|
+
|
|
3200
|
+
**__init__**
|
|
3201
|
+
|
|
3202
|
+
*Signature:* `(self, interval: pyrion.core.intervals.GenomicInterval)`
|
|
3203
|
+
|
|
3204
|
+
Initialize self. See help(type(self)) for accurate signature.
|
|
3205
|
+
|
|
3206
|
+
|
|
3207
|
+
#### Properties
|
|
3208
|
+
|
|
3209
|
+
**end** -> `int`
|
|
3210
|
+
|
|
3211
|
+
|
|
3212
|
+
**length** -> `int`
|
|
3213
|
+
|
|
3214
|
+
|
|
3215
|
+
**start** -> `int`
|
|
3216
|
+
|
|
3217
|
+
|
|
3218
|
+
|
|
3219
|
+
### LayoutManager
|
|
3220
|
+
|
|
3221
|
+
Assigns tracks to levels and produces packed band layout.
|
|
3222
|
+
|
|
3223
|
+
**Signature:** `(self)`
|
|
3224
|
+
|
|
3225
|
+
#### Methods
|
|
3226
|
+
|
|
3227
|
+
**__init__**
|
|
3228
|
+
|
|
3229
|
+
*Signature:* `(self)`
|
|
3230
|
+
|
|
3231
|
+
Initialize self. See help(type(self)) for accurate signature.
|
|
3232
|
+
|
|
3233
|
+
|
|
3234
|
+
**add_track**
|
|
3235
|
+
|
|
3236
|
+
*Signature:* `(self, track: 'Track')`
|
|
3237
|
+
|
|
3238
|
+
Assign track to correct level based on type.
|
|
3239
|
+
|
|
3240
|
+
|
|
3241
|
+
**compute_layout**
|
|
3242
|
+
|
|
3243
|
+
*Signature:* `(self)`
|
|
3244
|
+
|
|
3245
|
+
Run layout logic for all levels.
|
|
3246
|
+
|
|
3247
|
+
|
|
3248
|
+
**get_total_bands**
|
|
3249
|
+
|
|
3250
|
+
*Signature:* `(self) -> int`
|
|
3251
|
+
|
|
3252
|
+
Get total number of bands across all levels.
|
|
3253
|
+
|
|
3254
|
+
|
|
3255
|
+
|
|
3256
|
+
### Level
|
|
3257
|
+
|
|
3258
|
+
Contains features of one type, produces non-overlapping bands.
|
|
3259
|
+
|
|
3260
|
+
**Signature:** `(self)`
|
|
3261
|
+
|
|
3262
|
+
#### Methods
|
|
3263
|
+
|
|
3264
|
+
**__init__**
|
|
3265
|
+
|
|
3266
|
+
*Signature:* `(self)`
|
|
3267
|
+
|
|
3268
|
+
Initialize self. See help(type(self)) for accurate signature.
|
|
3269
|
+
|
|
3270
|
+
|
|
3271
|
+
**add_features**
|
|
3272
|
+
|
|
3273
|
+
*Signature:* `(self, features: List[pyrion.visualization.GenomicFeature])`
|
|
3274
|
+
|
|
3275
|
+
Add features to this level.
|
|
3276
|
+
|
|
3277
|
+
|
|
3278
|
+
**compute_bands**
|
|
3279
|
+
|
|
3280
|
+
*Signature:* `(self)`
|
|
3281
|
+
|
|
3282
|
+
Greedy algorithm to assign features to non-overlapping bands.
|
|
3283
|
+
|
|
3284
|
+
|
|
3285
|
+
|
|
3286
|
+
### Track
|
|
3287
|
+
|
|
3288
|
+
Logical unit of data belonging to one category.
|
|
3289
|
+
|
|
3290
|
+
**Signature:** `(self, name: str, features: List[pyrion.visualization.GenomicFeature], track_type: pyrion.visualization.TrackType)`
|
|
3291
|
+
|
|
3292
|
+
#### Methods
|
|
3293
|
+
|
|
3294
|
+
**__init__**
|
|
3295
|
+
|
|
3296
|
+
*Signature:* `(self, name: str, features: List[pyrion.visualization.GenomicFeature], track_type: pyrion.visualization.TrackType)`
|
|
3297
|
+
|
|
3298
|
+
Initialize self. See help(type(self)) for accurate signature.
|
|
3299
|
+
|
|
3300
|
+
|
|
3301
|
+
|
|
3302
|
+
### TrackType
|
|
3303
|
+
|
|
3304
|
+
An enumeration.
|
|
3305
|
+
|
|
3306
|
+
**Signature:** `(self, /, *args, **kwargs)`
|
|
3307
|
+
|
|
3308
|
+
|
|
3309
|
+
### TranscriptFeature
|
|
3310
|
+
|
|
3311
|
+
Wrapper for Transcript.
|
|
3312
|
+
|
|
3313
|
+
**Signature:** `(self, transcript: pyrion.core.genes.Transcript)`
|
|
3314
|
+
|
|
3315
|
+
#### Methods
|
|
3316
|
+
|
|
3317
|
+
**__init__**
|
|
3318
|
+
|
|
3319
|
+
*Signature:* `(self, transcript: pyrion.core.genes.Transcript)`
|
|
3320
|
+
|
|
3321
|
+
Initialize self. See help(type(self)) for accurate signature.
|
|
3322
|
+
|
|
3323
|
+
|
|
3324
|
+
#### Properties
|
|
3325
|
+
|
|
3326
|
+
**end** -> `int`
|
|
3327
|
+
|
|
3328
|
+
|
|
3329
|
+
**length** -> `int`
|
|
3330
|
+
|
|
3331
|
+
|
|
3332
|
+
**start** -> `int`
|
|
3333
|
+
|
|
3334
|
+
|
|
3335
|
+
|
|
3336
|
+
### VisualizationWindow
|
|
3337
|
+
|
|
3338
|
+
Entry point for rendering genomic data visualization.
|
|
3339
|
+
|
|
3340
|
+
**Signature:** `(self, interval: pyrion.core.intervals.GenomicInterval, height: Optional[float] = None, band_height: float = 0.35, band_spacing: float = 0.05, level_spacing: float = 0.2, ruler_height: float = 0.4, label_height: float = 0.15, show_labels: bool = True, left_padding_width: int = 15000, show_feature_labels: bool = True)`
|
|
3341
|
+
|
|
3342
|
+
#### Methods
|
|
3343
|
+
|
|
3344
|
+
**__init__**
|
|
3345
|
+
|
|
3346
|
+
*Signature:* `(self, interval: pyrion.core.intervals.GenomicInterval, height: Optional[float] = None, band_height: float = 0.35, band_spacing: float = 0.05, level_spacing: float = 0.2, ruler_height: float = 0.4, label_height: float = 0.15, show_labels: bool = True, left_padding_width: int = 15000, show_feature_labels: bool = True)`
|
|
3347
|
+
|
|
3348
|
+
Initialize self. See help(type(self)) for accurate signature.
|
|
3349
|
+
|
|
3350
|
+
|
|
3351
|
+
**add_track**
|
|
3352
|
+
|
|
3353
|
+
*Signature:* `(self, track: pyrion.visualization.Track)`
|
|
3354
|
+
|
|
3355
|
+
Add a track to the window.
|
|
3356
|
+
|
|
3357
|
+
|
|
3358
|
+
**show**
|
|
3359
|
+
|
|
3360
|
+
*Signature:* `(self, figsize: tuple = (12, 6))`
|
|
3361
|
+
|
|
3362
|
+
Trigger layout and rendering.
|
|
3363
|
+
|
|
3364
|
+
|
|
3365
|
+
|
|
3366
|
+
## Functions
|
|
3367
|
+
|
|
3368
|
+
### create_alignment_track
|
|
3369
|
+
|
|
3370
|
+
**Signature:** `(name: str, alignments: List[pyrion.core.genome_alignment.GenomeAlignment]) -> pyrion.visualization.Track`
|
|
3371
|
+
|
|
3372
|
+
Create an alignment track from a list of GenomeAlignment objects.
|
|
3373
|
+
|
|
3374
|
+
|
|
3375
|
+
### create_interval_track
|
|
3376
|
+
|
|
3377
|
+
**Signature:** `(name: str, intervals: List[pyrion.core.intervals.GenomicInterval]) -> pyrion.visualization.Track`
|
|
3378
|
+
|
|
3379
|
+
Create an interval track from a list of GenomicInterval objects.
|
|
3380
|
+
|
|
3381
|
+
|
|
3382
|
+
### create_transcript_track
|
|
3383
|
+
|
|
3384
|
+
**Signature:** `(name: str, transcripts: List[pyrion.core.genes.Transcript]) -> pyrion.visualization.Track`
|
|
3385
|
+
|
|
3386
|
+
Create a transcript track from a list of Transcript objects.
|
|
3387
|
+
|
|
3388
|
+
|
|
3389
|
+
### create_window_for_region
|
|
3390
|
+
|
|
3391
|
+
**Signature:** `(chrom: str, start: int, end: int, **kwargs) -> pyrion.visualization.VisualizationWindow`
|
|
3392
|
+
|
|
3393
|
+
Create a VisualizationWindow for a specific genomic region.
|
|
3394
|
+
|
|
3395
|
+
|
|
3396
|
+
### visualize_alignments
|
|
3397
|
+
|
|
3398
|
+
**Signature:** `(alignments: List[pyrion.core.genome_alignment.GenomeAlignment], window_interval: pyrion.core.intervals.GenomicInterval = None, track_name: str = 'Alignments', band_height: float = 0.35, **kwargs)`
|
|
3399
|
+
|
|
3400
|
+
Quick function to visualize a list of alignments.
|
|
3401
|
+
|
|
3402
|
+
|
|
3403
|
+
### visualize_intervals
|
|
3404
|
+
|
|
3405
|
+
**Signature:** `(intervals: List[pyrion.core.intervals.GenomicInterval], window_interval: pyrion.core.intervals.GenomicInterval = None, track_name: str = 'Intervals', band_height: float = 0.35, **kwargs)`
|
|
3406
|
+
|
|
3407
|
+
Quick function to visualize a list of intervals.
|
|
3408
|
+
|
|
3409
|
+
|
|
3410
|
+
### visualize_transcripts
|
|
3411
|
+
|
|
3412
|
+
**Signature:** `(transcripts: List[pyrion.core.genes.Transcript], window_interval: pyrion.core.intervals.GenomicInterval = None, track_name: str = 'Transcripts', band_height: float = 0.35, **kwargs)`
|
|
3413
|
+
|
|
3414
|
+
Quick function to visualize a list of transcripts.
|
|
3415
|
+
|
|
3416
|
+
|
|
3417
|
+
---
|
|
3418
|
+
|