@datagrok/bio 2.4.15 → 2.4.17
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/105.js +2 -0
- package/dist/105.js.map +1 -0
- package/dist/367.js +2 -0
- package/dist/367.js.map +1 -0
- package/dist/864.js +2 -0
- package/dist/864.js.map +1 -0
- package/dist/package-test.js +1 -1
- package/dist/package-test.js.map +1 -1
- package/dist/package.js +1 -1
- package/dist/package.js.map +1 -1
- package/package.json +3 -3
- package/scripts/sequence_generator.py +289 -0
- package/src/analysis/sequence-activity-cliffs.ts +2 -2
- package/src/analysis/sequence-diversity-viewer.ts +7 -4
- package/src/analysis/sequence-similarity-viewer.ts +7 -2
- package/src/analysis/sequence-space.ts +18 -0
- package/src/demo/bio01-similarity-diversity.ts +19 -4
- package/src/demo/bio01a-hierarchical-clustering-and-sequence-space.ts +3 -0
- package/src/demo/bio01b-hierarchical-clustering-and-activity-cliffs.ts +3 -0
- package/src/demo/bio05-helm-msa-sequence-space.ts +13 -9
- package/src/package.ts +21 -5
- package/src/tests/checkInputColumn-tests.ts +2 -2
- package/src/tests/msa-tests.ts +1 -1
- package/src/tests/renderers-test.ts +2 -2
- package/src/utils/cell-renderer.ts +4 -4
- package/src/utils/multiple-sequence-alignment-ui.ts +19 -18
- package/dist/153.js +0 -2
- package/dist/153.js.map +0 -1
- package/scripts/motif_generator.py +0 -119
|
@@ -1,119 +0,0 @@
|
|
|
1
|
-
#!/usr/bin/env python3
|
|
2
|
-
|
|
3
|
-
import random
|
|
4
|
-
from math import sqrt
|
|
5
|
-
import argparse
|
|
6
|
-
import sys
|
|
7
|
-
|
|
8
|
-
from typing import List, Tuple
|
|
9
|
-
|
|
10
|
-
letter_choice_type = List[str]
|
|
11
|
-
motif_template_type = List[letter_choice_type]
|
|
12
|
-
|
|
13
|
-
default_alphabet = 'A,C,D,E,F,G,H,I,K,L,M,N,P,Q,R,S,T,V,W,Y'
|
|
14
|
-
|
|
15
|
-
def meanrange(mean:int,disp:int) -> int:
|
|
16
|
-
return random.randint(mean - disp, mean + disp)
|
|
17
|
-
|
|
18
|
-
def generate_modif_template(motif_length:int, alphabet:List[str], max_variants_cluster:int, prob_any:float=0.2) -> motif_template_type: # Making a template to generate from it some random motifs
|
|
19
|
-
motif_template = []
|
|
20
|
-
for position in range(motif_length):
|
|
21
|
-
# Selecting letters for position i
|
|
22
|
-
if (0 < position < motif_length-1) and (random.random() <= prob_any):
|
|
23
|
-
letters = ['?'] # this stands for any symbol
|
|
24
|
-
else:
|
|
25
|
-
n_variants = random.randrange(max_variants_cluster) + 1
|
|
26
|
-
letters = [ random.choice(alphabet) for i in range(n_variants)]
|
|
27
|
-
motif_template.append(letters)
|
|
28
|
-
return motif_template
|
|
29
|
-
|
|
30
|
-
def generate_motif(template: motif_template_type, alphabet:List[str]) -> str:
|
|
31
|
-
# Sunbtituting the ? in template for any letter
|
|
32
|
-
template_with_any = [ (letters if not '?' in letters else alphabet) for letters in template ]
|
|
33
|
-
return ''.join([ random.choice(letters) for letters in template_with_any ])
|
|
34
|
-
|
|
35
|
-
def motif_notation(motif_template: motif_template_type) -> str:
|
|
36
|
-
def motif_notation_code(letter_choice:letter_choice_type) -> str:
|
|
37
|
-
if len(letter_choice) == 1:
|
|
38
|
-
return(letter_choice[0])
|
|
39
|
-
else:
|
|
40
|
-
return f"[{''.join(letter_choice)}]"
|
|
41
|
-
|
|
42
|
-
return ''.join([ motif_notation_code(letter_choice) for letter_choice in motif_template])
|
|
43
|
-
|
|
44
|
-
def generate_random(n:int, alphabet:List[str]) -> str:
|
|
45
|
-
return ''.join([ random.choice(alphabet) for i in range(n) ])
|
|
46
|
-
|
|
47
|
-
def make_cliff(motif_template:motif_template_type, alphabet:List[str] , motif:str) -> str:
|
|
48
|
-
# Selecting conservative letter in motif
|
|
49
|
-
pos = random.randrange(len(motif_template))
|
|
50
|
-
while '?' in motif_template[pos]:
|
|
51
|
-
pos = (pos + 1) % len(motif_template) # always will find letters since ends of motif can't be any symbol
|
|
52
|
-
outlier_letters = list(set(alphabet) - set (motif_template[pos]))
|
|
53
|
-
return motif[:pos] + random.choice(outlier_letters) + motif[pos+1:]
|
|
54
|
-
|
|
55
|
-
# ====================================================================================
|
|
56
|
-
|
|
57
|
-
parser = argparse.ArgumentParser(prog='MotifSequencesGenerator',
|
|
58
|
-
description='The program generates set of sequences containing sequence motifs for SAR fucntionality testing',
|
|
59
|
-
epilog='Unitity support: Gennadii Zakharov ')
|
|
60
|
-
|
|
61
|
-
parser.add_argument("-a", "--alphabet", type=str, default=default_alphabet, help="Alphabet to generate sequences, separated by comma",)
|
|
62
|
-
parser.add_argument("-c", "--clusters", type=int, default=1, help="Number of clusters")
|
|
63
|
-
parser.add_argument("-s", "--sequences", type=int, default=500, help="Number of sequences in each cluster",)
|
|
64
|
-
parser.add_argument("-m,", "--motif", type=int, default=12, help="Average length of motif",)
|
|
65
|
-
parser.add_argument("-r,", "--random", type=int, default=4, help="Average length of random sequence parts before and after motif",)
|
|
66
|
-
parser.add_argument("-d,", "--dispersion", type=int, default=2, help="Variation of total sequence lengths",)
|
|
67
|
-
|
|
68
|
-
parser.add_argument("--max-variants-position", type=int, default=3, help="maximum number of different letters in motif position",)
|
|
69
|
-
parser.add_argument("--cliff-probability", type=float, default=0.01, help="Probabaility to make activity cliff of a sequence",)
|
|
70
|
-
parser.add_argument("--cliff-strength", type=float, default=4.0, help="Strength of cliff",)
|
|
71
|
-
|
|
72
|
-
args = parser.parse_args()
|
|
73
|
-
|
|
74
|
-
alphabet:List[str] = args.alphabet.split(',')
|
|
75
|
-
|
|
76
|
-
print('cluster\tsequence_id\tsequence\tactivity\tis_cliff')
|
|
77
|
-
|
|
78
|
-
line_number = 0
|
|
79
|
-
|
|
80
|
-
for n_cluster in range(args.clusters):
|
|
81
|
-
activity_average = random.random() * 10
|
|
82
|
-
activity_dispersion = random.random()
|
|
83
|
-
|
|
84
|
-
# Generatin motif template for cluster
|
|
85
|
-
motif_length = meanrange(args.motif, args.dispersion)
|
|
86
|
-
motif_template = generate_modif_template(motif_length, alphabet, args.max_variants_position)
|
|
87
|
-
sys.stderr.write(f"Cluster {n_cluster:2} motif template: {motif_notation(motif_template)}\n")
|
|
88
|
-
|
|
89
|
-
total_length = meanrange(args.random * 2, args.dispersion) + motif_length
|
|
90
|
-
prefix_length = meanrange(args.random, args.dispersion//2)
|
|
91
|
-
suffix_length = total_length - motif_length - prefix_length
|
|
92
|
-
|
|
93
|
-
cliff_made = False
|
|
94
|
-
for n_seq in range(args.sequences):
|
|
95
|
-
line_number +=1
|
|
96
|
-
activity = random.gauss(activity_average, activity_dispersion)
|
|
97
|
-
|
|
98
|
-
motif = generate_motif(motif_template, alphabet)
|
|
99
|
-
prefix = generate_random(prefix_length, alphabet)
|
|
100
|
-
suffix = generate_random(suffix_length, alphabet)
|
|
101
|
-
seq = prefix + motif + suffix
|
|
102
|
-
|
|
103
|
-
is_cliff = random.random() <= args.cliff_probability
|
|
104
|
-
if is_cliff:
|
|
105
|
-
# Making activity cliff
|
|
106
|
-
cliff_motif = make_cliff(motif_template, alphabet, motif)
|
|
107
|
-
cliff_seq = prefix + cliff_motif + suffix
|
|
108
|
-
# Recalculating activity
|
|
109
|
-
cliff_disp = activity_dispersion * args.cliff_strength * (0.5 + random.random())
|
|
110
|
-
activity = activity_average - cliff_disp
|
|
111
|
-
cliff_activity = activity_average + cliff_disp
|
|
112
|
-
|
|
113
|
-
sys.stderr.write(f"Cliff for sequence #{line_number:4}, cluster {n_cluster} \n")
|
|
114
|
-
sys.stderr.write(f"{activity_average}\t{motif}\t{activity}\n")
|
|
115
|
-
sys.stderr.write(f"{activity_average}\t{cliff_motif}\t{cliff_activity}\n")
|
|
116
|
-
print(f"{n_cluster}\tc{n_cluster}_seq{line_number}\t{cliff_seq}\t{cliff_activity:5.2f}\t{is_cliff}")
|
|
117
|
-
line_number +=1
|
|
118
|
-
print(f"{n_cluster}\tc{n_cluster}_seq{line_number}\t{seq}\t{activity:5.2f}\t{is_cliff}")
|
|
119
|
-
|