geney 1.4.40__py3-none-any.whl → 1.4.41__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- geney/__init__.py +18 -5
- geney/engines.py +313 -204
- geney/pipelines.py +88 -46
- geney/splice_graph.py +213 -7
- geney/transcripts.py +1 -1
- {geney-1.4.40.dist-info → geney-1.4.41.dist-info}/METADATA +2 -1
- geney-1.4.41.dist-info/RECORD +11 -0
- geney/samples.py +0 -3
- geney/splicing_table.py +0 -142
- geney/utils.py +0 -254
- geney-1.4.40.dist-info/RECORD +0 -14
- {geney-1.4.40.dist-info → geney-1.4.41.dist-info}/WHEEL +0 -0
- {geney-1.4.40.dist-info → geney-1.4.41.dist-info}/top_level.txt +0 -0
geney/utils.py
DELETED
|
@@ -1,254 +0,0 @@
|
|
|
1
|
-
__all__ = ['is_monotonic', 'contains', 'unload_json', 'unload_pickle', 'dump_json', 'dump_pickle', 'generate_random_nucleotide_sequences', 'generate_random_sequence', 'short_hash_of_list']
|
|
2
|
-
|
|
3
|
-
import pickle
|
|
4
|
-
import json
|
|
5
|
-
from pathlib import Path
|
|
6
|
-
from bisect import bisect_left
|
|
7
|
-
import hashlib
|
|
8
|
-
import random
|
|
9
|
-
from typing import Any, List, Sequence, Union
|
|
10
|
-
|
|
11
|
-
# def is_monotonic(A):
|
|
12
|
-
# x, y = [], []
|
|
13
|
-
# x.extend(A)
|
|
14
|
-
# y.extend(A)
|
|
15
|
-
# x.sort()
|
|
16
|
-
# y.sort(reverse=True)
|
|
17
|
-
# if (x == A or y == A):
|
|
18
|
-
# return True
|
|
19
|
-
# return False
|
|
20
|
-
|
|
21
|
-
|
|
22
|
-
# def available_genes(organism='hg38'):
|
|
23
|
-
# from geney import config
|
|
24
|
-
# annotation_path = config[organism]['MRNA_PATH'] / 'protein_coding'
|
|
25
|
-
# return sorted(list(set([m.stem.split('_')[-1] for m in annotation_path.glob('*')])))
|
|
26
|
-
|
|
27
|
-
|
|
28
|
-
def contains(a: Sequence[Any], x: Any) -> bool:
|
|
29
|
-
"""Check if sorted sequence contains value using binary search.
|
|
30
|
-
|
|
31
|
-
Args:
|
|
32
|
-
a: Sorted sequence to search in
|
|
33
|
-
x: Value to search for
|
|
34
|
-
|
|
35
|
-
Returns:
|
|
36
|
-
True if value is found, False otherwise
|
|
37
|
-
|
|
38
|
-
Raises:
|
|
39
|
-
TypeError: If sequence is not sortable
|
|
40
|
-
"""
|
|
41
|
-
if not hasattr(a, '__len__') or not hasattr(a, '__getitem__'):
|
|
42
|
-
raise TypeError("First argument must be a sequence")
|
|
43
|
-
|
|
44
|
-
try:
|
|
45
|
-
i = bisect_left(a, x)
|
|
46
|
-
return i != len(a) and a[i] == x
|
|
47
|
-
except TypeError as e:
|
|
48
|
-
raise TypeError(f"Cannot compare types in sequence: {e}") from e
|
|
49
|
-
|
|
50
|
-
|
|
51
|
-
def unload_json(file_path: Union[str, Path]) -> Any:
|
|
52
|
-
"""Load data from JSON file.
|
|
53
|
-
|
|
54
|
-
Args:
|
|
55
|
-
file_path: Path to JSON file
|
|
56
|
-
|
|
57
|
-
Returns:
|
|
58
|
-
Loaded data structure
|
|
59
|
-
|
|
60
|
-
Raises:
|
|
61
|
-
FileNotFoundError: If file doesn't exist
|
|
62
|
-
JSONDecodeError: If file contains invalid JSON
|
|
63
|
-
"""
|
|
64
|
-
file_path = Path(file_path)
|
|
65
|
-
|
|
66
|
-
if not file_path.exists():
|
|
67
|
-
raise FileNotFoundError(f"JSON file not found: {file_path}")
|
|
68
|
-
|
|
69
|
-
try:
|
|
70
|
-
with open(file_path, 'r', encoding='utf-8') as f:
|
|
71
|
-
data = json.load(f)
|
|
72
|
-
return data
|
|
73
|
-
except json.JSONDecodeError as e:
|
|
74
|
-
raise json.JSONDecodeError(f"Invalid JSON in file {file_path}: {e.msg}", e.doc, e.pos) from e
|
|
75
|
-
|
|
76
|
-
|
|
77
|
-
def dump_json(file_path: Union[str, Path], payload: Any, indent: int = 2) -> None:
|
|
78
|
-
"""Save data to JSON file.
|
|
79
|
-
|
|
80
|
-
Args:
|
|
81
|
-
file_path: Path to output JSON file
|
|
82
|
-
payload: Data to save
|
|
83
|
-
indent: JSON indentation level
|
|
84
|
-
|
|
85
|
-
Raises:
|
|
86
|
-
TypeError: If payload is not JSON serializable
|
|
87
|
-
PermissionError: If cannot write to file
|
|
88
|
-
"""
|
|
89
|
-
file_path = Path(file_path)
|
|
90
|
-
|
|
91
|
-
# Create parent directory if it doesn't exist
|
|
92
|
-
file_path.parent.mkdir(parents=True, exist_ok=True)
|
|
93
|
-
|
|
94
|
-
try:
|
|
95
|
-
with open(file_path, 'w', encoding='utf-8') as f:
|
|
96
|
-
json.dump(payload, f, indent=indent, ensure_ascii=False)
|
|
97
|
-
except TypeError as e:
|
|
98
|
-
raise TypeError(f"Cannot serialize data to JSON: {e}") from e
|
|
99
|
-
|
|
100
|
-
|
|
101
|
-
def unload_pickle(file_path: Union[str, Path]) -> Any:
|
|
102
|
-
"""Load data from pickle file.
|
|
103
|
-
|
|
104
|
-
Args:
|
|
105
|
-
file_path: Path to pickle file
|
|
106
|
-
|
|
107
|
-
Returns:
|
|
108
|
-
Loaded data structure
|
|
109
|
-
|
|
110
|
-
Raises:
|
|
111
|
-
FileNotFoundError: If file doesn't exist
|
|
112
|
-
pickle.UnpicklingError: If file contains invalid pickle data
|
|
113
|
-
"""
|
|
114
|
-
file_path = Path(file_path)
|
|
115
|
-
|
|
116
|
-
if not file_path.exists():
|
|
117
|
-
raise FileNotFoundError(f"Pickle file not found: {file_path}")
|
|
118
|
-
|
|
119
|
-
try:
|
|
120
|
-
with open(file_path, 'rb') as f:
|
|
121
|
-
data = pickle.load(f)
|
|
122
|
-
return data
|
|
123
|
-
except pickle.UnpicklingError as e:
|
|
124
|
-
raise pickle.UnpicklingError(f"Invalid pickle data in file {file_path}: {e}") from e
|
|
125
|
-
|
|
126
|
-
|
|
127
|
-
def dump_pickle(file_path: Union[str, Path], payload: Any) -> None:
|
|
128
|
-
"""Save data to pickle file.
|
|
129
|
-
|
|
130
|
-
Args:
|
|
131
|
-
file_path: Path to output pickle file
|
|
132
|
-
payload: Data to save
|
|
133
|
-
|
|
134
|
-
Raises:
|
|
135
|
-
PermissionError: If cannot write to file
|
|
136
|
-
"""
|
|
137
|
-
file_path = Path(file_path)
|
|
138
|
-
|
|
139
|
-
# Create parent directory if it doesn't exist
|
|
140
|
-
file_path.parent.mkdir(parents=True, exist_ok=True)
|
|
141
|
-
|
|
142
|
-
try:
|
|
143
|
-
with open(file_path, 'wb') as f:
|
|
144
|
-
pickle.dump(payload, f)
|
|
145
|
-
except Exception as e:
|
|
146
|
-
raise RuntimeError(f"Failed to save pickle file {file_path}: {e}") from e
|
|
147
|
-
|
|
148
|
-
|
|
149
|
-
|
|
150
|
-
def is_monotonic(A: Sequence[Any]) -> bool:
|
|
151
|
-
"""Check if sequence is monotonic (non-decreasing or non-increasing).
|
|
152
|
-
|
|
153
|
-
Args:
|
|
154
|
-
A: Sequence to check
|
|
155
|
-
|
|
156
|
-
Returns:
|
|
157
|
-
True if sequence is monotonic, False otherwise
|
|
158
|
-
|
|
159
|
-
Raises:
|
|
160
|
-
TypeError: If sequence elements are not comparable
|
|
161
|
-
"""
|
|
162
|
-
if not hasattr(A, '__len__') or len(A) < 2:
|
|
163
|
-
return True
|
|
164
|
-
|
|
165
|
-
try:
|
|
166
|
-
return (all(x <= y for x, y in zip(A, A[1:])) or
|
|
167
|
-
all(x >= y for x, y in zip(A, A[1:])))
|
|
168
|
-
except TypeError as e:
|
|
169
|
-
raise TypeError(f"Cannot compare sequence elements: {e}") from e
|
|
170
|
-
|
|
171
|
-
|
|
172
|
-
def generate_random_sequence(length: int) -> str:
|
|
173
|
-
"""Generate a random DNA sequence of given length.
|
|
174
|
-
|
|
175
|
-
Args:
|
|
176
|
-
length: Length of sequence to generate
|
|
177
|
-
|
|
178
|
-
Returns:
|
|
179
|
-
Random DNA sequence containing only A, C, G, T
|
|
180
|
-
|
|
181
|
-
Raises:
|
|
182
|
-
ValueError: If length is not positive
|
|
183
|
-
"""
|
|
184
|
-
if not isinstance(length, int):
|
|
185
|
-
raise TypeError(f"Length must be integer, got {type(length).__name__}")
|
|
186
|
-
|
|
187
|
-
if length <= 0:
|
|
188
|
-
raise ValueError(f"Length must be positive, got {length}")
|
|
189
|
-
|
|
190
|
-
return ''.join(random.choices('ACGT', k=length))
|
|
191
|
-
|
|
192
|
-
def generate_random_nucleotide_sequences(num_sequences: int, min_len: int = 3, max_len: int = 10) -> List[str]:
|
|
193
|
-
"""
|
|
194
|
-
Generate random DNA sequences of variable lengths.
|
|
195
|
-
|
|
196
|
-
Args:
|
|
197
|
-
num_sequences: Number of sequences to generate
|
|
198
|
-
min_len: Minimum sequence length
|
|
199
|
-
max_len: Maximum sequence length
|
|
200
|
-
|
|
201
|
-
Returns:
|
|
202
|
-
List of random nucleotide sequences
|
|
203
|
-
|
|
204
|
-
Raises:
|
|
205
|
-
ValueError: If parameters are invalid
|
|
206
|
-
"""
|
|
207
|
-
if not isinstance(num_sequences, int) or num_sequences <= 0:
|
|
208
|
-
raise ValueError(f"num_sequences must be positive integer, got {num_sequences}")
|
|
209
|
-
|
|
210
|
-
if not isinstance(min_len, int) or min_len <= 0:
|
|
211
|
-
raise ValueError(f"min_len must be positive integer, got {min_len}")
|
|
212
|
-
|
|
213
|
-
if not isinstance(max_len, int) or max_len <= 0:
|
|
214
|
-
raise ValueError(f"max_len must be positive integer, got {max_len}")
|
|
215
|
-
|
|
216
|
-
if min_len > max_len:
|
|
217
|
-
raise ValueError(f"min_len ({min_len}) cannot be greater than max_len ({max_len})")
|
|
218
|
-
|
|
219
|
-
nucleotides = ['A', 'C', 'G', 'T']
|
|
220
|
-
lengths = list(range(min_len, max_len + 1))
|
|
221
|
-
|
|
222
|
-
sequences = [
|
|
223
|
-
''.join(random.choices(nucleotides, k=random.choice(lengths)))
|
|
224
|
-
for _ in range(num_sequences)
|
|
225
|
-
]
|
|
226
|
-
return sequences
|
|
227
|
-
|
|
228
|
-
|
|
229
|
-
|
|
230
|
-
def short_hash_of_list(numbers: List[Any], length: int = 5) -> str:
|
|
231
|
-
"""Generate a short hash string from a list of numbers.
|
|
232
|
-
|
|
233
|
-
Args:
|
|
234
|
-
numbers: List of values to hash
|
|
235
|
-
length: Length of output hash string
|
|
236
|
-
|
|
237
|
-
Returns:
|
|
238
|
-
Short hash string
|
|
239
|
-
|
|
240
|
-
Raises:
|
|
241
|
-
ValueError: If length is not positive
|
|
242
|
-
"""
|
|
243
|
-
if not isinstance(length, int) or length <= 0:
|
|
244
|
-
raise ValueError(f"Length must be positive integer, got {length}")
|
|
245
|
-
|
|
246
|
-
if length > 64: # SHA256 hex digest is 64 characters
|
|
247
|
-
raise ValueError(f"Length cannot exceed 64, got {length}")
|
|
248
|
-
|
|
249
|
-
try:
|
|
250
|
-
encoded = repr(numbers).encode('utf-8')
|
|
251
|
-
full_hash = hashlib.sha256(encoded).hexdigest()
|
|
252
|
-
return full_hash[:length]
|
|
253
|
-
except Exception as e:
|
|
254
|
-
raise RuntimeError(f"Failed to generate hash: {e}") from e
|
geney-1.4.40.dist-info/RECORD
DELETED
|
@@ -1,14 +0,0 @@
|
|
|
1
|
-
geney/__init__.py,sha256=1V1SxqcLFPxRJOqr4VmGillv1r4_azJtbmNtf0pZ18I,684
|
|
2
|
-
geney/engines.py,sha256=ZK6x0YdY8_yPRTUmhwL8GWcuS3U5OotqMJBKPE-z7cE,10548
|
|
3
|
-
geney/oncosplice.py,sha256=eGQQl9ftmoFENMYBWoJtenKWmzyxR9N1of5cZst_bHQ,18014
|
|
4
|
-
geney/pipelines.py,sha256=zK1zDFFAxElnxgXWeM_xZqEZtwxyF7CwmtQLCkKOq2w,3356
|
|
5
|
-
geney/samples.py,sha256=3KrWNILHYql-vPC_TidkzqDuFaLx3JSJZbUoVW2RTlo,92
|
|
6
|
-
geney/splice_graph.py,sha256=wCStApnnrwbej_yhk_s39p5sQatRtqg9Ve8GqH2ZfGA,14849
|
|
7
|
-
geney/splicing_table.py,sha256=mXDXUr4h_q7grYQpmXO5Ex15Mt7BchieWF9lawd6src,5412
|
|
8
|
-
geney/transcripts.py,sha256=I6NmBcW9QG5XtRumn6i0TeT8tKECHQycsbSSZ7e8LZo,2601
|
|
9
|
-
geney/utils.py,sha256=pv4_LPIzjYAxwUgmufZJL6UhVVq2SllpF90ix_uH_-Q,7627
|
|
10
|
-
geney/variants.py,sha256=vjbiBH-duZ4TJZyXwXbQ_VmJxCFafjeDwLNTZg3ubSc,11832
|
|
11
|
-
geney-1.4.40.dist-info/METADATA,sha256=BiZJ2yQaYrHybVewBIQ2Cdw_qKNENiHoIEiFPp29xs8,952
|
|
12
|
-
geney-1.4.40.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
|
|
13
|
-
geney-1.4.40.dist-info/top_level.txt,sha256=O-FuNUMb5fn9dhZ-dYCgF0aZtfi1EslMstnzhc5IIVo,6
|
|
14
|
-
geney-1.4.40.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|