geney 1.4.40__py3-none-any.whl → 1.4.41__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
geney/utils.py DELETED
@@ -1,254 +0,0 @@
1
- __all__ = ['is_monotonic', 'contains', 'unload_json', 'unload_pickle', 'dump_json', 'dump_pickle', 'generate_random_nucleotide_sequences', 'generate_random_sequence', 'short_hash_of_list']
2
-
3
- import pickle
4
- import json
5
- from pathlib import Path
6
- from bisect import bisect_left
7
- import hashlib
8
- import random
9
- from typing import Any, List, Sequence, Union
10
-
11
- # def is_monotonic(A):
12
- # x, y = [], []
13
- # x.extend(A)
14
- # y.extend(A)
15
- # x.sort()
16
- # y.sort(reverse=True)
17
- # if (x == A or y == A):
18
- # return True
19
- # return False
20
-
21
-
22
- # def available_genes(organism='hg38'):
23
- # from geney import config
24
- # annotation_path = config[organism]['MRNA_PATH'] / 'protein_coding'
25
- # return sorted(list(set([m.stem.split('_')[-1] for m in annotation_path.glob('*')])))
26
-
27
-
28
- def contains(a: Sequence[Any], x: Any) -> bool:
29
- """Check if sorted sequence contains value using binary search.
30
-
31
- Args:
32
- a: Sorted sequence to search in
33
- x: Value to search for
34
-
35
- Returns:
36
- True if value is found, False otherwise
37
-
38
- Raises:
39
- TypeError: If sequence is not sortable
40
- """
41
- if not hasattr(a, '__len__') or not hasattr(a, '__getitem__'):
42
- raise TypeError("First argument must be a sequence")
43
-
44
- try:
45
- i = bisect_left(a, x)
46
- return i != len(a) and a[i] == x
47
- except TypeError as e:
48
- raise TypeError(f"Cannot compare types in sequence: {e}") from e
49
-
50
-
51
- def unload_json(file_path: Union[str, Path]) -> Any:
52
- """Load data from JSON file.
53
-
54
- Args:
55
- file_path: Path to JSON file
56
-
57
- Returns:
58
- Loaded data structure
59
-
60
- Raises:
61
- FileNotFoundError: If file doesn't exist
62
- JSONDecodeError: If file contains invalid JSON
63
- """
64
- file_path = Path(file_path)
65
-
66
- if not file_path.exists():
67
- raise FileNotFoundError(f"JSON file not found: {file_path}")
68
-
69
- try:
70
- with open(file_path, 'r', encoding='utf-8') as f:
71
- data = json.load(f)
72
- return data
73
- except json.JSONDecodeError as e:
74
- raise json.JSONDecodeError(f"Invalid JSON in file {file_path}: {e.msg}", e.doc, e.pos) from e
75
-
76
-
77
- def dump_json(file_path: Union[str, Path], payload: Any, indent: int = 2) -> None:
78
- """Save data to JSON file.
79
-
80
- Args:
81
- file_path: Path to output JSON file
82
- payload: Data to save
83
- indent: JSON indentation level
84
-
85
- Raises:
86
- TypeError: If payload is not JSON serializable
87
- PermissionError: If cannot write to file
88
- """
89
- file_path = Path(file_path)
90
-
91
- # Create parent directory if it doesn't exist
92
- file_path.parent.mkdir(parents=True, exist_ok=True)
93
-
94
- try:
95
- with open(file_path, 'w', encoding='utf-8') as f:
96
- json.dump(payload, f, indent=indent, ensure_ascii=False)
97
- except TypeError as e:
98
- raise TypeError(f"Cannot serialize data to JSON: {e}") from e
99
-
100
-
101
- def unload_pickle(file_path: Union[str, Path]) -> Any:
102
- """Load data from pickle file.
103
-
104
- Args:
105
- file_path: Path to pickle file
106
-
107
- Returns:
108
- Loaded data structure
109
-
110
- Raises:
111
- FileNotFoundError: If file doesn't exist
112
- pickle.UnpicklingError: If file contains invalid pickle data
113
- """
114
- file_path = Path(file_path)
115
-
116
- if not file_path.exists():
117
- raise FileNotFoundError(f"Pickle file not found: {file_path}")
118
-
119
- try:
120
- with open(file_path, 'rb') as f:
121
- data = pickle.load(f)
122
- return data
123
- except pickle.UnpicklingError as e:
124
- raise pickle.UnpicklingError(f"Invalid pickle data in file {file_path}: {e}") from e
125
-
126
-
127
- def dump_pickle(file_path: Union[str, Path], payload: Any) -> None:
128
- """Save data to pickle file.
129
-
130
- Args:
131
- file_path: Path to output pickle file
132
- payload: Data to save
133
-
134
- Raises:
135
- PermissionError: If cannot write to file
136
- """
137
- file_path = Path(file_path)
138
-
139
- # Create parent directory if it doesn't exist
140
- file_path.parent.mkdir(parents=True, exist_ok=True)
141
-
142
- try:
143
- with open(file_path, 'wb') as f:
144
- pickle.dump(payload, f)
145
- except Exception as e:
146
- raise RuntimeError(f"Failed to save pickle file {file_path}: {e}") from e
147
-
148
-
149
-
150
- def is_monotonic(A: Sequence[Any]) -> bool:
151
- """Check if sequence is monotonic (non-decreasing or non-increasing).
152
-
153
- Args:
154
- A: Sequence to check
155
-
156
- Returns:
157
- True if sequence is monotonic, False otherwise
158
-
159
- Raises:
160
- TypeError: If sequence elements are not comparable
161
- """
162
- if not hasattr(A, '__len__') or len(A) < 2:
163
- return True
164
-
165
- try:
166
- return (all(x <= y for x, y in zip(A, A[1:])) or
167
- all(x >= y for x, y in zip(A, A[1:])))
168
- except TypeError as e:
169
- raise TypeError(f"Cannot compare sequence elements: {e}") from e
170
-
171
-
172
- def generate_random_sequence(length: int) -> str:
173
- """Generate a random DNA sequence of given length.
174
-
175
- Args:
176
- length: Length of sequence to generate
177
-
178
- Returns:
179
- Random DNA sequence containing only A, C, G, T
180
-
181
- Raises:
182
- ValueError: If length is not positive
183
- """
184
- if not isinstance(length, int):
185
- raise TypeError(f"Length must be integer, got {type(length).__name__}")
186
-
187
- if length <= 0:
188
- raise ValueError(f"Length must be positive, got {length}")
189
-
190
- return ''.join(random.choices('ACGT', k=length))
191
-
192
- def generate_random_nucleotide_sequences(num_sequences: int, min_len: int = 3, max_len: int = 10) -> List[str]:
193
- """
194
- Generate random DNA sequences of variable lengths.
195
-
196
- Args:
197
- num_sequences: Number of sequences to generate
198
- min_len: Minimum sequence length
199
- max_len: Maximum sequence length
200
-
201
- Returns:
202
- List of random nucleotide sequences
203
-
204
- Raises:
205
- ValueError: If parameters are invalid
206
- """
207
- if not isinstance(num_sequences, int) or num_sequences <= 0:
208
- raise ValueError(f"num_sequences must be positive integer, got {num_sequences}")
209
-
210
- if not isinstance(min_len, int) or min_len <= 0:
211
- raise ValueError(f"min_len must be positive integer, got {min_len}")
212
-
213
- if not isinstance(max_len, int) or max_len <= 0:
214
- raise ValueError(f"max_len must be positive integer, got {max_len}")
215
-
216
- if min_len > max_len:
217
- raise ValueError(f"min_len ({min_len}) cannot be greater than max_len ({max_len})")
218
-
219
- nucleotides = ['A', 'C', 'G', 'T']
220
- lengths = list(range(min_len, max_len + 1))
221
-
222
- sequences = [
223
- ''.join(random.choices(nucleotides, k=random.choice(lengths)))
224
- for _ in range(num_sequences)
225
- ]
226
- return sequences
227
-
228
-
229
-
230
- def short_hash_of_list(numbers: List[Any], length: int = 5) -> str:
231
- """Generate a short hash string from a list of numbers.
232
-
233
- Args:
234
- numbers: List of values to hash
235
- length: Length of output hash string
236
-
237
- Returns:
238
- Short hash string
239
-
240
- Raises:
241
- ValueError: If length is not positive
242
- """
243
- if not isinstance(length, int) or length <= 0:
244
- raise ValueError(f"Length must be positive integer, got {length}")
245
-
246
- if length > 64: # SHA256 hex digest is 64 characters
247
- raise ValueError(f"Length cannot exceed 64, got {length}")
248
-
249
- try:
250
- encoded = repr(numbers).encode('utf-8')
251
- full_hash = hashlib.sha256(encoded).hexdigest()
252
- return full_hash[:length]
253
- except Exception as e:
254
- raise RuntimeError(f"Failed to generate hash: {e}") from e
@@ -1,14 +0,0 @@
1
- geney/__init__.py,sha256=1V1SxqcLFPxRJOqr4VmGillv1r4_azJtbmNtf0pZ18I,684
2
- geney/engines.py,sha256=ZK6x0YdY8_yPRTUmhwL8GWcuS3U5OotqMJBKPE-z7cE,10548
3
- geney/oncosplice.py,sha256=eGQQl9ftmoFENMYBWoJtenKWmzyxR9N1of5cZst_bHQ,18014
4
- geney/pipelines.py,sha256=zK1zDFFAxElnxgXWeM_xZqEZtwxyF7CwmtQLCkKOq2w,3356
5
- geney/samples.py,sha256=3KrWNILHYql-vPC_TidkzqDuFaLx3JSJZbUoVW2RTlo,92
6
- geney/splice_graph.py,sha256=wCStApnnrwbej_yhk_s39p5sQatRtqg9Ve8GqH2ZfGA,14849
7
- geney/splicing_table.py,sha256=mXDXUr4h_q7grYQpmXO5Ex15Mt7BchieWF9lawd6src,5412
8
- geney/transcripts.py,sha256=I6NmBcW9QG5XtRumn6i0TeT8tKECHQycsbSSZ7e8LZo,2601
9
- geney/utils.py,sha256=pv4_LPIzjYAxwUgmufZJL6UhVVq2SllpF90ix_uH_-Q,7627
10
- geney/variants.py,sha256=vjbiBH-duZ4TJZyXwXbQ_VmJxCFafjeDwLNTZg3ubSc,11832
11
- geney-1.4.40.dist-info/METADATA,sha256=BiZJ2yQaYrHybVewBIQ2Cdw_qKNENiHoIEiFPp29xs8,952
12
- geney-1.4.40.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
13
- geney-1.4.40.dist-info/top_level.txt,sha256=O-FuNUMb5fn9dhZ-dYCgF0aZtfi1EslMstnzhc5IIVo,6
14
- geney-1.4.40.dist-info/RECORD,,
File without changes