geney 1.2.54__py2.py3-none-any.whl → 1.2.56__py2.py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of geney might be problematic. Click here for more details.

geney/oncosplice.py CHANGED
@@ -9,7 +9,6 @@ from .seqmat_utils import *
9
9
  from .mutation_utils import *
10
10
  from .tis_utils import find_tis
11
11
 
12
- ### Scoring
13
12
  def find_continuous_gaps(sequence):
14
13
  """Find continuous gap sequences in an alignment."""
15
14
  return [(m.start(), m.end()) for m in re.finditer(r'-+', sequence)]
@@ -121,43 +120,6 @@ def transform_conservation_vector(conservation_vector, window=13, factor=4):
121
120
  return exp_factors
122
121
 
123
122
 
124
- # def find_modified_positions(sequence_length, deletions, insertions, reach_limit=16):
125
- # """
126
- # Identify unmodified positions in a sequence given deletions and insertions.
127
- #
128
- # :param sequence_length: Length of the sequence.
129
- # :param deletions: Dictionary of deletions.
130
- # :param insertions: Dictionary of insertions.
131
- # :param reach_limit: Limit for considering the effect of insertions/deletions.
132
- # :return: Array indicating unmodified positions.
133
- # """
134
- # unmodified_positions = np.zeros(sequence_length, dtype=float)
135
- #
136
- # for pos, insertion in insertions.items():
137
- # # if pos >= sequence_length:
138
- # # pos = sequence_length - 1
139
- # # add_factor = 1
140
- #
141
- # reach = min(len(insertion) // 2, reach_limit)
142
- # front_end, back_end = max(0, pos - reach), min(sequence_length - 1, pos + reach)
143
- # len_start, len_end = pos - front_end, back_end - pos
144
- # try:
145
- # gradient_front = np.linspace(0, 1, len_start, endpoint=False)
146
- # gradient_back = np.linspace(0, 1, len_end, endpoint=True)[::-1]
147
- # combined_gradient = np.concatenate([gradient_front, np.array([1]), gradient_back])
148
- # unmodified_positions[front_end:back_end + 1] = combined_gradient
149
- #
150
- # except ValueError as e:
151
- # print(
152
- # f"Error: {e} | Lengths: unmodified_positions_slice={back_end - front_end}.")
153
- # unmodified_positions[front_end:back_end] = np.zeros(back_end - front_end)
154
- #
155
- # for pos, deletion in deletions.items():
156
- # deletion_length = len(deletion)
157
- # unmodified_positions[pos:pos + deletion_length] = 1
158
- #
159
- # return unmodified_positions
160
-
161
123
  def find_modified_positions(sequence_length, deletions, insertions, reach_limit=16):
162
124
  """
163
125
  Identify unmodified positions in a sequence given deletions and insertions.
@@ -251,12 +213,7 @@ def moving_average_conv(vector, window_size, factor=1):
251
213
 
252
214
  return np.convolve(vector, np.ones(window_size), mode='same') / window_size
253
215
 
254
-
255
-
256
-
257
-
258
216
  def find_splice_site_proximity(pos, transcript):
259
-
260
217
  for i, (ex_start, ex_end) in enumerate(transcript.exons):
261
218
  if min(ex_start, ex_end) <= pos <= max(ex_start, ex_end):
262
219
  return i + 1, None, abs(pos - ex_start), abs(pos - ex_end)
@@ -323,7 +280,7 @@ def summarize_missplicing_event(pes, pir, es, ne, ir):
323
280
 
324
281
  # Annotating
325
282
  def OncospliceAnnotator(reference_transcript, variant_transcript, mut):
326
- affected_exon, affected_intron, distance_from_5, distance_from_3 = find_splice_site_proximity(mut.indices[0],
283
+ affected_exon, affected_intron, distance_from_5, distance_from_3 = find_splice_site_proximity(np.floor(mut.indices[0]),
327
284
  reference_transcript)
328
285
 
329
286
  report = {}
@@ -361,19 +318,17 @@ def OncospliceAnnotator(reference_transcript, variant_transcript, mut):
361
318
  return report
362
319
 
363
320
 
364
- def oncosplice(mut_id, splicing_threshold=0.5, protein_coding=True, primary_transcript=False, window_length=13, organism='hg38', engine='spliceai', domains=None):
321
+ def oncosplice(mut_id, splicing_threshold=0.5, protein_coding=True, cons_required=False, primary_transcript=False, window_length=13, organism='hg38', engine='spliceai', domains=None):
365
322
  gene = Gene(mut_id.split(':')[0], organism=organism)
366
323
  reference_gene_proteins = {tid: transcript.generate_pre_mrna().generate_mature_mrna().generate_protein() for tid, transcript in gene.run_transcripts(protein_coding=True)}
367
-
368
324
  mutations = [get_mutation(m, rev=gene.rev) for m in mut_id.split('|')]
369
325
 
370
326
  results = []
371
327
  for tid, transcript in gene.run_transcripts(protein_coding=protein_coding, primary_transcript=primary_transcript):
372
- if not transcript.cons_available:
328
+ if cons_required and not transcript.cons_available:
373
329
  continue
374
330
 
375
331
  if all(mutation not in transcript for mutation in mutations):
376
- results.append({'transcript_id': transcript.transcript_id})
377
332
  continue
378
333
 
379
334
  transcript.generate_pre_mrna()
@@ -413,15 +368,15 @@ def oncosplice(mut_id, splicing_threshold=0.5, protein_coding=True, primary_tran
413
368
  report['reference_resemblance'] = reference_gene_proteins.get(transcript.protein, None)
414
369
  results.append(report)
415
370
 
416
- report = pd.DataFrame(results)
417
- return report
418
-
371
+ if len(results) == 0:
372
+ return None
419
373
 
420
- import asyncio
374
+ return pd.DataFrame(results)
421
375
 
422
376
 
377
+ import asyncio
423
378
  async def oncosplice_prototype(mut_id, splicing_threshold=0.5, protein_coding=True, primary_transcript=False,
424
- window_length=13, organism='hg38', engine='spliceai', use_cons=True):
379
+ window_length=13, organism='hg38', engine='spliceai', use_cons=True, require_cons=False):
425
380
  import sys, os
426
381
  needed_file1 = config[organism]['yoram_path'] / 'rest_api_utils.py'
427
382
  needed_file2 = config[organism]['yoram_path'] / 'uniprot_utils.py'
@@ -452,20 +407,17 @@ async def oncosplice_prototype(mut_id, splicing_threshold=0.5, protein_coding=Tr
452
407
 
453
408
  gene = Gene(mut_id.split(':')[0], organism=organism)
454
409
  reference_gene_proteins = {tid: transcript.generate_pre_mrna().generate_mature_mrna().generate_protein() for tid, transcript in gene.run_transcripts(protein_coding=True)}
455
-
456
410
  mutations = [get_mutation(mut_id, rev=gene.rev) for mut_id in mut_id.split('|')]
457
-
458
411
  results = []
459
412
  for tid, transcript in gene.run_transcripts(protein_coding=protein_coding, primary_transcript=primary_transcript):
460
- if not transcript.cons_available:
413
+ if require_cons and not transcript.cons_available:
461
414
  continue
462
415
 
463
416
  if all(mutation not in transcript for mutation in mutations):
464
- results.append({'transcript_id': transcript.transcript_id})
417
+ # results.append({'transcript_id': transcript.transcript_id})
465
418
  continue
466
419
 
467
420
  task1 = asyncio.create_task(background_request(tid))
468
-
469
421
  transcript.generate_pre_mrna()
470
422
  transcript.cons_vector = transform_conservation_vector(transcript.cons_vector, window=window_length)
471
423
  transcript.generate_mature_mrna().generate_protein(inplace=True)
@@ -475,7 +427,7 @@ async def oncosplice_prototype(mut_id, splicing_threshold=0.5, protein_coding=Tr
475
427
  cons_vector = np.ones(len(ref_protein))
476
428
 
477
429
  if sum(cons_vector) == 0:
478
- cons_vector = np.ones(len(ref_protein))/len(ref_protein)
430
+ cons_vector = np.ones(len(ref_protein)) #/len(ref_protein)
479
431
 
480
432
  reference_transcript = copy.deepcopy(transcript)
481
433
 
geney/pangolin_utils.py CHANGED
@@ -52,12 +52,10 @@ def pangolin_predict_probs(true_seq, models):
52
52
  model_nums = [0, 1, 2, 3, 4, 5, 6]
53
53
  INDEX_MAP = {0: 1, 1: 2, 2: 4, 3: 5, 4: 7, 5: 8, 6: 10, 7: 11}
54
54
 
55
- # seq = 'N'*5000 + true_seq + 'N'*5000
56
55
  seq = true_seq
57
56
  true_seq = true_seq[5000:-5000]
58
- acceptor_dinucleotide = np.array([true_seq[i - 2:i] == 'AG' for i in range(len(true_seq))])
59
- # donor_dinucleotide = np.array([true_seq[i + 1:i + 3] == 'GT' for i in range(len(true_seq))])
60
- donor_dinucleotide = np.array([true_seq[i -2:i] == 'GT' for i in range(len(true_seq))])
57
+ acceptor_dinucleotide = np.array([true_seq[i - 2:i] == 'AG' for i in range(len(true_seq))]) # np.ones(len(true_seq)) #
58
+ donor_dinucleotide = np.array([true_seq[i+1:i+3] == 'GT' for i in range(len(true_seq))]) #np.ones(len(true_seq)) #
61
59
 
62
60
  seq = pang_one_hot_encode(seq).T
63
61
  seq = torch.from_numpy(np.expand_dims(seq, axis=0)).float()
@@ -78,4 +76,4 @@ def pangolin_predict_probs(true_seq, models):
78
76
  splicing_pred = np.array(scores).max(axis=0)
79
77
  donor_probs = [splicing_pred[i] * donor_dinucleotide[i] for i in range(len(true_seq))]
80
78
  acceptor_probs = [splicing_pred[i] * acceptor_dinucleotide[i] for i in range(len(true_seq))]
81
- return donor_probs, acceptor_probs
79
+ return donor_probs, acceptor_probs
geney/spliceai_utils.py CHANGED
@@ -12,8 +12,8 @@ if tf.config.list_physical_devices('GPU'):
12
12
  else:
13
13
  print("Running on CPU.")
14
14
 
15
- # tf.config.threading.set_intra_op_parallelism_threads(1)
16
- # tf.config.threading.set_inter_op_parallelism_threads(1)
15
+ tf.config.threading.set_intra_op_parallelism_threads(1)
16
+ tf.config.threading.set_inter_op_parallelism_threads(1)
17
17
 
18
18
  sai_paths = ('models/spliceai{}.h5'.format(x) for x in range(1, 6))
19
19
  sai_models = [load_model(resource_filename('spliceai', x)) for x in sai_paths]
geney/splicing_utils.py CHANGED
@@ -1,128 +1,125 @@
1
- import networkx as nx
2
1
  import numpy as np
3
2
  from .mutation_utils import get_mutation
4
3
  from .seqmat_utils import Gene
5
4
 
6
-
7
- class SpliceSite:
8
- def __init__(self, pos, ss_type, prob):
9
- self.pos = pos
10
- self.ss_type = ss_type # 0 for donors, 1 for acceptors
11
- self.prob = prob
12
-
13
- class SpliceSiteFactory:
14
- @staticmethod
15
- def create_splice_site(pos, ss_type, prob):
16
- return SpliceSite(pos, ss_type, prob)
17
-
18
- def compute_paths_sequential(G, transcript, exon_starts, exon_ends):
19
- """
20
- Compute paths from start to end and from end to start sequentially, then return the paths with their probabilities.
21
- """
22
- new_paths = {}
23
- prob_sum = 0
24
-
25
- # Combine paths in both directions
26
- all_paths = list(nx.all_simple_paths(G, transcript.transcript_start, transcript.transcript_end)) + \
27
- list(nx.all_simple_paths(G, transcript.transcript_end, transcript.transcript_start))
28
-
29
- # Compute the probabilities of each path sequentially
30
- path_probs = [path_weight_mult(G, path, 'weight') for path in all_paths]
31
-
32
- # Populate new_paths dictionary with computed paths and probabilities
33
- for i, (path, curr_prob) in enumerate(zip(all_paths, path_probs)):
34
- prob_sum += curr_prob
35
- new_paths[i] = {
36
- 'acceptors': sorted([p for p in path if p in exon_starts and p != transcript.transcript_start], reverse=transcript.rev),
37
- 'donors': sorted([p for p in path if p in exon_ends and p != transcript.transcript_end], reverse=transcript.rev),
38
- 'path_weight': curr_prob
39
- }
40
- return new_paths, prob_sum
5
+ from collections import defaultdict
6
+
7
+
8
+ def generate_adjacency_list(acceptors, donors, transcript_start, transcript_end, max_distance=50, rev=False):
9
+ # Append the transcript end to donors to allow connection to the end point
10
+ donors.append((transcript_end, 1))
11
+ acceptors = sorted(acceptors, key=lambda x: (x[0], x[1] if not rev else -x[1]), reverse=rev)
12
+ donors = sorted(donors, key=lambda x: (x[0], x[1] if not rev else -x[1]), reverse=rev)
13
+
14
+ # Initialize adjacency list to store downstream connections
15
+ adjacency_list = defaultdict(list)
16
+
17
+ # Connect each donor to the nearest acceptor(s) within the distance threshold
18
+ for d_pos, d_prob in donors:
19
+ running_prob = 1
20
+ for a_pos, a_prob in acceptors:
21
+ correct_orientation = (a_pos > d_pos and not rev) or (a_pos < d_pos and rev)
22
+ distance_valid = abs(a_pos - d_pos) <= max_distance
23
+ if correct_orientation and distance_valid:
24
+ in_between_acceptors = sum([d_pos < a < a_pos for a, _ in acceptors]) if not rev else sum([a_pos < a < d_pos for a, _ in acceptors])
25
+ in_between_donors = sum([d_pos < d < a_pos for d, _ in donors]) if not rev else sum([a_pos < d < d_pos for d, _ in donors])
26
+ in_between_naturals = 0
27
+ if in_between_donors == 0 or in_between_acceptors == 0:
28
+ adjacency_list[(d_pos, 'donor')].append((a_pos, 'acceptor', a_prob))
29
+ running_prob -= a_prob
30
+
31
+ else:
32
+ if running_prob > 0:
33
+ adjacency_list[(d_pos, 'donor')].append((a_pos, 'acceptor', a_prob*running_prob))
34
+ running_prob -= a_prob
35
+ else:
36
+ break
37
+
38
+ # Connect each acceptor to the nearest donor(s) within the distance threshold
39
+ for a_pos, a_prob in acceptors:
40
+ running_prob = 1
41
+ for d_pos, d_prob in donors:
42
+ correct_orientation = (d_pos > a_pos and not rev) or (d_pos < a_pos and rev)
43
+ distance_valid = abs(d_pos - a_pos) <= max_distance
44
+ if correct_orientation and distance_valid:
45
+ in_between_acceptors = sum([a_pos < a < d_pos for a, _ in acceptors]) if not rev else sum([d_pos < a < a_pos for a, _ in acceptors])
46
+ in_between_donors = sum([a_pos < d < d_pos for d, _ in donors]) if not rev else sum([d_pos < d < a_pos for d, _ in donors])
47
+ in_between_naturals = 0
48
+ tag = 'donor' if d_pos != transcript_end else 'transcript_end'
49
+
50
+ if in_between_acceptors == 0:
51
+ adjacency_list[(a_pos, 'acceptor')].append((d_pos, tag, d_prob))
52
+ running_prob -= d_prob
53
+ else:
54
+ if running_prob > 0:
55
+ adjacency_list[(a_pos, 'acceptor')].append((d_pos, tag, d_prob*running_prob))
56
+ running_prob -= d_prob
57
+ else:
58
+ break
59
+
60
+ # Connect the transcript start to the nearest donor(s) within the distance threshold
61
+ running_prob = 1
62
+ for d_pos, d_prob in donors:
63
+ if ((d_pos > transcript_start and not rev) or (d_pos < transcript_start and rev)) and abs(
64
+ d_pos - transcript_start) <= max_distance:
65
+ adjacency_list[(transcript_start, 'transcript_start')].append((d_pos, 'donor', d_prob))
66
+ running_prob -= d_prob
67
+ if running_prob <= 0:
68
+ break
69
+
70
+ # Normalize probabilities to ensure they sum up to 1 for each list of connections
71
+ for k, next_nodes in adjacency_list.items():
72
+ prob_sum = sum([c for a, b, c in next_nodes])
73
+ adjacency_list[k] = [(a, b, round(c / prob_sum, 3)) for a, b, c in next_nodes] if prob_sum > 0 else next_nodes
74
+
75
+ return adjacency_list
76
+
77
+
78
+ def find_all_paths(graph, start, end, path=[], probability=1.0):
79
+ path = path + [start] # Add current node to the path
80
+ if start == end:
81
+ yield path, probability # If end is reached, yield the path and its cumulative probability
82
+ return
83
+ if start not in graph:
84
+ return # If the start node has no outgoing edges, return
85
+
86
+ for (next_node, node_type, prob) in graph[start]:
87
+ # Recur for each connected node, updating the probability
88
+ yield from find_all_paths(graph, (next_node, node_type), end, path, probability * prob)
89
+
90
+
91
+ def prepare_splice_sites(acceptors, donors, aberrant_splicing):
92
+ acceptors = {p: 1 for p in acceptors}
93
+ donors = {p: 1 for p in donors}
94
+
95
+ for p, v in aberrant_splicing[f'missed_donors'].items():
96
+ donors[p] = v['absolute']
97
+
98
+ for p, v in aberrant_splicing[f'discovered_donors'].items():
99
+ donors[p] = v['absolute']
100
+
101
+ for p, v in aberrant_splicing[f'missed_acceptors'].items():
102
+ acceptors[p] = v['absolute']
103
+
104
+ for p, v in aberrant_splicing[f'discovered_acceptors'].items():
105
+ acceptors[p] = v['absolute']
106
+
107
+ acceptors = {int(k): v for k, v in acceptors.items()}
108
+ donors = {int(k): v for k, v in donors.items()}
109
+ return list(acceptors.items()), list(donors.items())
41
110
 
42
111
 
43
112
  def develop_aberrant_splicing(transcript, aberrant_splicing):
44
- # Prepare exon start and end dictionaries
45
- exon_starts = prepare_splice_sites(transcript.acceptors, transcript.transcript_start, aberrant_splicing, 'acceptors')
46
- exon_ends = prepare_splice_sites(transcript.donors, transcript.transcript_end, aberrant_splicing, 'donors')
47
-
48
- # Create SpliceSite nodes and filter based on probability > 0
49
- nodes = [
50
- SpliceSiteFactory.create_splice_site(pos, 0, prob) for pos, prob in exon_ends.items()
51
- ] + [
52
- SpliceSiteFactory.create_splice_site(pos, 1, prob) for pos, prob in exon_starts.items()
53
- ]
54
- nodes = [s for s in nodes if s.prob > 0]
55
-
56
- # Sort nodes based on position, respecting transcript direction
57
- nodes.sort(key=lambda x: x.pos, reverse=transcript.rev)
58
-
59
- # Create the directed graph
60
- G = create_splice_graph(nodes, transcript.rev)
61
-
62
- # Compute new paths and their probabilities sequentially
63
- new_paths, prob_sum = compute_paths_sequential(G, transcript, exon_starts, exon_ends)
64
-
65
- # Normalize probabilities and filter based on threshold
66
- new_paths = normalize_and_filter_paths(new_paths, prob_sum)
67
-
68
- return list(new_paths.values())
69
-
70
-
71
- def prepare_splice_sites(transcript_sites, transcript_boundary, aberrant_splicing, site_type):
72
- """
73
- Prepare and return a dictionary of splice sites (acceptors or donors) including transcript boundaries
74
- and aberrant splicing information.
75
- """
76
- site_dict = {v: 1 for v in transcript_sites}
77
- site_dict.update({transcript_boundary: 1})
78
- site_dict.update({s: v['absolute'] for s, v in aberrant_splicing[f'missed_{site_type}'].items()})
79
- site_dict.update({s: v['absolute'] for s, v in aberrant_splicing[f'discovered_{site_type}'].items()})
80
- return site_dict
81
-
82
-
83
- def create_splice_graph(nodes, reverse_direction):
84
- """
85
- Create and return a directed graph with splice sites as nodes and edges based on splice site type
86
- and probability of occurrence.
87
- """
88
- G = nx.DiGraph()
89
- G.add_nodes_from([n.pos for n in nodes])
90
-
91
- for i in range(len(nodes)):
92
- trailing_prob = 0
93
- in_between = set()
94
- curr_node = nodes[i]
95
-
96
- for j in range(i + 1, len(nodes)):
97
- next_node = nodes[j]
98
- in_between.add(next_node.ss_type)
99
-
100
- if curr_node.ss_type != next_node.ss_type:
101
- new_prob = next_node.prob - trailing_prob
102
- if new_prob > 0:
103
- G.add_edge(curr_node.pos, next_node.pos, weight=new_prob)
104
- trailing_prob += next_node.prob
105
- return G
106
-
107
-
108
- def normalize_and_filter_paths(new_paths, prob_sum):
109
- """
110
- Normalize path probabilities and filter out paths with a probability less than 0.01.
111
- """
112
- for i, d in new_paths.items():
113
- d['path_weight'] = round(d['path_weight'] / prob_sum, 3)
114
- new_paths = {k: v for k, v in new_paths.items() if v['path_weight'] > 0.00001}
115
- return new_paths
116
-
117
-
118
- def path_weight_mult(G, path, weight):
119
- """
120
- Calculate the multiplicative weight of the path.
121
- """
122
- cost = 1
123
- for node, nbr in zip(path[:-1], path[1:]):
124
- cost *= G[node][nbr][weight]
125
- return cost
113
+ all_acceptors, all_donors = prepare_splice_sites(transcript.acceptors, transcript.donors, aberrant_splicing)
114
+ adj_list = generate_adjacency_list(all_acceptors, all_donors, transcript_start=transcript.transcript_start,
115
+ transcript_end=transcript.transcript_end, rev=transcript.rev,
116
+ max_distance=100000)
117
+ end_node = (transcript.transcript_end, 'transcript_end')
118
+ start_node = (transcript.transcript_start, 'transcript_start')
119
+ for path, prob in find_all_paths(adj_list, start_node, end_node):
120
+ yield {'acceptors': [p[0] for p in path if p[1] == 'acceptor'],
121
+ 'donors': [p[0] for p in path if p[1] == 'donor'], 'path_weight': prob}
122
+
126
123
 
127
124
 
128
125
  # Missplicing Detection
geney/tis_utils.py CHANGED
@@ -28,26 +28,13 @@ def find_tis(ref_seq, mut_seq, left_context=100, right_context=102):
28
28
  right_context=right_context,
29
29
  padding='$')
30
30
 
31
- # 3. If condition 2 is not met, we perform a TIS reaquisition. If condition 2 is met, then we return the reference TIS to be used in the mutated sequence
32
31
  if context_conserved:
33
- return tis_coords[0]
34
-
35
- # 4. Reaquisition of TIS follows:
36
- #### The logic:
37
- # a. We need to find all possible start codon candidates as relative indices
38
- # b. We need to find what proteins each alternative start codon would create
39
- # c. We need to make sure we are only looking at a region around a mutation
40
- # d. We need the titer score rank relative to all titer score reference ranks and relative to the reference score
32
+ return [(tis_coords[0], 1, 'canonical')]
41
33
 
42
34
  sc_table = pd.read_pickle(config['titer_path'] / 'titer_tis_scores.pickle')
43
- # target_transcript = sc_table[sc_table.transcript_id == ref_id]
44
- # if len(target_transcript) == 0:
45
- ### reaquire TIS score for ref
46
- # pass
47
-
48
35
  ref_seq_tis_context = ref_seq.asymmetric_subseq(tis_coords[0], left_context=left_context,
49
36
  right_context=right_context, padding='$')
50
- # target_ref_titer_score = target_transcript.tis_score
37
+
51
38
  ref_titer_score = retrieve_titer_score(ref_seq_tis_context)
52
39
  ref_titer_rank = percentileofscore(sc_table['tis_score'], ref_titer_score)
53
40
  ref_protein = ref_seq.translate(tis_coords[0])
@@ -56,7 +43,8 @@ def find_tis(ref_seq, mut_seq, left_context=100, right_context=102):
56
43
  candidate_positions = np.array(
57
44
  [p.align(ref_protein, mut_seq.translate(mut_seq.seqmat[1, i])).score if candidate_positions[i] == True else 0
58
45
  for i in range(len(ref_seq.seq))])
59
- candidate_positions = candidate_positions > sorted(candidate_positions)[-5]
46
+
47
+ candidate_positions = candidate_positions > sorted(candidate_positions)[-5] # implement correct logic
60
48
  candidate_positions = np.array([retrieve_titer_score(
61
49
  mut_seq.asymmetric_subseq(tis_coords[0], left_context=left_context, right_context=right_context,
62
50
  padding='$')) if candidate_positions[i] > 0 else False for i in
@@ -66,7 +54,7 @@ def find_tis(ref_seq, mut_seq, left_context=100, right_context=102):
66
54
  in range(len(ref_seq.seq))])
67
55
  best_position = np.where(candidate_positions == min(candidate_positions))[0][0]
68
56
  out = mut_seq.seqmat[1, best_position]
69
- return out
57
+ return out #output: [(genomic_coord1, probability, filter_tag), (genomic_coord2, probability, filter_tag)]
70
58
 
71
59
 
72
60
  def seq_matrix(seq_list):
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: geney
3
- Version: 1.2.54
3
+ Version: 1.2.56
4
4
  Summary: A Python package for gene expression modeling.
5
5
  Home-page: https://github.com/nicolaslynn/geney
6
6
  Author: Nicolas Lynn
@@ -6,21 +6,21 @@ geney/graphic_utils.py,sha256=oMsBpB9YeEn96gGpKh4MmtagJffWZbk-xPrIwHvkFhA,11016
6
6
  geney/gtex_utils.py,sha256=asL2lHyU5KsbWpV096vkf1Ka7hSo_RRfZqw7p5nERmE,1919
7
7
  geney/immune_utils.py,sha256=ZRni5ttrhpYBnmNr0d0ZatIbNPYs4nmQuoUO00SpsS4,5271
8
8
  geney/mutation_utils.py,sha256=C_kv2MB_L8LlhX3W2ooXjJ3uDoJ8zX1WeDtZKoBZJkI,1547
9
- geney/oncosplice.py,sha256=hPmB9sEPs9lr22BlPGKpQUOd59vUjAttXZ6QKf4A-kg,23534
10
- geney/pangolin_utils.py,sha256=rVi_U23nhw6wCc44fBeD3sv-FshLTGE1UMMtIYwgr9U,2967
9
+ geney/oncosplice.py,sha256=eWgY2Lcj894UBFnIVhbxiVz5oqASHg-Ot1wFbjlJbI8,21857
10
+ geney/pangolin_utils.py,sha256=NJEdY43L_2lielY1hZOjlak0baHqXTa1ITrvx8Tkg5o,2878
11
11
  geney/power_utils.py,sha256=MehZFUdkJ2EFUot709yPEDxSkXmH5XevMebX2HD768A,7330
12
12
  geney/seqmat_utils.py,sha256=2cRXT_Ox4IdzCM8x3H2HexxFZzjo5WHs0HZiUQv8fBM,18347
13
- geney/spliceai_utils.py,sha256=gIGPC8u3J15A7EQrk2Elho5PbF9MmUUNopGGH-eEV8s,1873
14
- geney/splicing_utils.py,sha256=8qbj81ESNxu9-clEjBV-2xj41p1Wly6NFiGlm0etSeg,15564
13
+ geney/spliceai_utils.py,sha256=21_TaiLW3faRuPegMgsVvIf1G1a03penZSiydQ-hOTA,1869
14
+ geney/splicing_utils.py,sha256=t0vE5KTAdYOYJLa9wjaSJ1jqiHhsDxZs64OxrgR-Sqc,16811
15
15
  geney/survival_utils.py,sha256=KnAzEviMuXh6SnVXId9PgsFLSbgkduTvYoIthxN7FPA,6886
16
16
  geney/tcga_utils.py,sha256=D_BNHm-D_K408dlcJm3hzH2c6QNFjQsKvUcOPiQRk7g,17612
17
- geney/tis_utils.py,sha256=vA2ci4gNfwwQZlCjPpO5ehvL2NRVeM7lHI_VyfT-_10,8049
17
+ geney/tis_utils.py,sha256=2makfGfVlDFVIbxzXE85AY9jmAjcNmxyIAxjvkRA5LY,7396
18
18
  geney/utils.py,sha256=EsKvBM-Nz2a3_4ZAhF4Dxd4PwT7_6YYKpxEN4LLgg10,2174
19
19
  geney/translation_initiation/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
20
20
  geney/translation_initiation/tis_utils.py,sha256=AF3siFjuQH-Rs44EV-80zHdbxRMvN4woLFSHroWIETc,4448
21
21
  geney/translation_initiation/resources/kozak_pssm.json,sha256=pcd0Olziutq-6H3mFWDCD9cujQ_AlZO-iiOvBl82hqE,1165
22
22
  geney/translation_initiation/resources/tis_regressor_model.joblib,sha256=IXb4DUDhJ5rBDKcqMk9zE3ECTZZcdj7Jixz3KpoZ7OA,2592025
23
- geney-1.2.54.dist-info/METADATA,sha256=C4gCBwzsS6uD4ZqO1lE5Me2BAldGdvBk50hQkp5C528,948
24
- geney-1.2.54.dist-info/WHEEL,sha256=fS9sRbCBHs7VFcwJLnLXN1MZRR0_TVTxvXKzOnaSFs8,110
25
- geney-1.2.54.dist-info/top_level.txt,sha256=O-FuNUMb5fn9dhZ-dYCgF0aZtfi1EslMstnzhc5IIVo,6
26
- geney-1.2.54.dist-info/RECORD,,
23
+ geney-1.2.56.dist-info/METADATA,sha256=tHCFJyD9OKjk7GnQToKesLQZyzy0dtO9oBsr0Bjz6rI,948
24
+ geney-1.2.56.dist-info/WHEEL,sha256=fS9sRbCBHs7VFcwJLnLXN1MZRR0_TVTxvXKzOnaSFs8,110
25
+ geney-1.2.56.dist-info/top_level.txt,sha256=O-FuNUMb5fn9dhZ-dYCgF0aZtfi1EslMstnzhc5IIVo,6
26
+ geney-1.2.56.dist-info/RECORD,,
File without changes