@datagrok/bio 2.4.15 → 2.4.16
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/package-test.js +1 -1
- package/dist/package-test.js.map +1 -1
- package/dist/package.js +1 -1
- package/dist/package.js.map +1 -1
- package/package.json +2 -2
- package/scripts/sequence_generator.py +289 -0
- package/src/analysis/sequence-diversity-viewer.ts +7 -4
- package/src/analysis/sequence-similarity-viewer.ts +7 -2
- package/src/demo/bio01-similarity-diversity.ts +19 -4
- package/src/demo/bio01a-hierarchical-clustering-and-sequence-space.ts +3 -0
- package/src/demo/bio01b-hierarchical-clustering-and-activity-cliffs.ts +3 -0
- package/src/demo/bio05-helm-msa-sequence-space.ts +13 -9
- package/src/package.ts +13 -1
- package/src/tests/checkInputColumn-tests.ts +2 -2
- package/src/tests/msa-tests.ts +1 -1
- package/src/tests/renderers-test.ts +2 -2
- package/src/utils/cell-renderer.ts +4 -4
- package/src/utils/multiple-sequence-alignment-ui.ts +19 -18
- package/scripts/motif_generator.py +0 -119
|
@@ -14,14 +14,19 @@ export class MsaWarning extends Error {
|
|
|
14
14
|
super(message, options);
|
|
15
15
|
}
|
|
16
16
|
}
|
|
17
|
+
type multipleSequenceAlginmentUIOptions = {col?: DG.Column<string> | null, clustersCol?: DG.Column | null,
|
|
18
|
+
pepsea?: {method?: typeof pepseaMethods[number], gapOpen?: number, gapExtend?: number}};
|
|
17
19
|
|
|
18
|
-
export async function multipleSequenceAlignmentUI(
|
|
19
|
-
col: DG.Column<string> | null = null,
|
|
20
|
-
pepseaMethod: typeof pepseaMethods[number] = pepseaMethods[0]
|
|
21
|
-
): Promise<DG.Column> {
|
|
20
|
+
export async function multipleSequenceAlignmentUI(options: multipleSequenceAlginmentUIOptions = {}): Promise<DG.Column> {
|
|
22
21
|
return new Promise(async (resolve, reject) => {
|
|
23
|
-
|
|
24
|
-
|
|
22
|
+
options.clustersCol ??= null;
|
|
23
|
+
options.pepsea ??= {};
|
|
24
|
+
options.pepsea.method ??= pepseaMethods[0];
|
|
25
|
+
options.pepsea.gapOpen ??= 1.53;
|
|
26
|
+
options.pepsea.gapExtend ??= 0;
|
|
27
|
+
|
|
28
|
+
const table = options.col?.dataFrame ?? grok.shell.t;
|
|
29
|
+
const seqCol = options.col ?? table.columns.bySemType(DG.SEMTYPE.MACROMOLECULE);
|
|
25
30
|
if (seqCol == null) {
|
|
26
31
|
const errMsg = `MSAError: dataset doesn't conain any Macromolecule column`;
|
|
27
32
|
grok.shell.warning(errMsg);
|
|
@@ -29,11 +34,11 @@ export async function multipleSequenceAlignmentUI(
|
|
|
29
34
|
}
|
|
30
35
|
|
|
31
36
|
// UI
|
|
32
|
-
const methodInput = ui.choiceInput('Method',
|
|
37
|
+
const methodInput = ui.choiceInput('Method', options.pepsea.method, pepseaMethods);
|
|
33
38
|
methodInput.setTooltip('Alignment method');
|
|
34
|
-
const gapOpenInput = ui.floatInput('Gap open',
|
|
39
|
+
const gapOpenInput = ui.floatInput('Gap open', options.pepsea.gapOpen);
|
|
35
40
|
gapOpenInput.setTooltip('Gap opening penalty at group-to-group alignment');
|
|
36
|
-
const gapExtendInput = ui.floatInput('Gap extend',
|
|
41
|
+
const gapExtendInput = ui.floatInput('Gap extend', options.pepsea.gapExtend);
|
|
37
42
|
gapExtendInput.setTooltip('Gap extension penalty to skip the alignment');
|
|
38
43
|
const inputRootStyles = [methodInput.root.style, gapOpenInput.root.style, gapExtendInput.root.style];
|
|
39
44
|
let performAlignment: (() => Promise<DG.Column<string>>) | undefined;
|
|
@@ -41,19 +46,17 @@ export async function multipleSequenceAlignmentUI(
|
|
|
41
46
|
// TODO: allow only macromolecule colums to be chosen
|
|
42
47
|
const colInput = ui.columnInput('Sequence', table, seqCol, () => {
|
|
43
48
|
performAlignment = onColInputChange(
|
|
44
|
-
colInput.value, table, inputRootStyles, methodInput, clustersColInput, gapOpenInput, gapExtendInput
|
|
45
|
-
);
|
|
49
|
+
colInput.value, table, inputRootStyles, methodInput, clustersColInput, gapOpenInput, gapExtendInput);
|
|
46
50
|
}
|
|
47
51
|
) as DG.InputBase<DG.Column<string>>;
|
|
48
52
|
colInput.setTooltip('Sequences column to use for alignment');
|
|
49
|
-
const clustersColInput = ui.columnInput('Clusters', table,
|
|
53
|
+
const clustersColInput = ui.columnInput('Clusters', table, options.clustersCol);
|
|
50
54
|
clustersColInput.nullable = true;
|
|
51
55
|
colInput.fireChanged();
|
|
52
56
|
//if column is specified (from tests), run alignment and resolve with the result
|
|
53
|
-
if (col) {
|
|
57
|
+
if (options.col) {
|
|
54
58
|
performAlignment = onColInputChange(
|
|
55
|
-
col, table, inputRootStyles, methodInput, clustersColInput, gapOpenInput, gapExtendInput
|
|
56
|
-
);
|
|
59
|
+
options.col, table, inputRootStyles, methodInput, clustersColInput, gapOpenInput, gapExtendInput);
|
|
57
60
|
|
|
58
61
|
await onDialogOk(colInput, table, performAlignment, resolve, reject);
|
|
59
62
|
return;
|
|
@@ -64,9 +67,7 @@ export async function multipleSequenceAlignmentUI(
|
|
|
64
67
|
.add(methodInput)
|
|
65
68
|
.add(gapOpenInput)
|
|
66
69
|
.add(gapExtendInput)
|
|
67
|
-
.onOK(async () => {
|
|
68
|
-
await onDialogOk(colInput, table, performAlignment, resolve, reject);
|
|
69
|
-
})
|
|
70
|
+
.onOK(async () => {await onDialogOk(colInput, table, performAlignment, resolve, reject)})
|
|
70
71
|
.show();
|
|
71
72
|
});
|
|
72
73
|
}
|
|
@@ -1,119 +0,0 @@
|
|
|
1
|
-
#!/usr/bin/env python3
|
|
2
|
-
|
|
3
|
-
import random
|
|
4
|
-
from math import sqrt
|
|
5
|
-
import argparse
|
|
6
|
-
import sys
|
|
7
|
-
|
|
8
|
-
from typing import List, Tuple
|
|
9
|
-
|
|
10
|
-
letter_choice_type = List[str]
|
|
11
|
-
motif_template_type = List[letter_choice_type]
|
|
12
|
-
|
|
13
|
-
default_alphabet = 'A,C,D,E,F,G,H,I,K,L,M,N,P,Q,R,S,T,V,W,Y'
|
|
14
|
-
|
|
15
|
-
def meanrange(mean:int,disp:int) -> int:
|
|
16
|
-
return random.randint(mean - disp, mean + disp)
|
|
17
|
-
|
|
18
|
-
def generate_modif_template(motif_length:int, alphabet:List[str], max_variants_cluster:int, prob_any:float=0.2) -> motif_template_type: # Making a template to generate from it some random motifs
|
|
19
|
-
motif_template = []
|
|
20
|
-
for position in range(motif_length):
|
|
21
|
-
# Selecting letters for position i
|
|
22
|
-
if (0 < position < motif_length-1) and (random.random() <= prob_any):
|
|
23
|
-
letters = ['?'] # this stands for any symbol
|
|
24
|
-
else:
|
|
25
|
-
n_variants = random.randrange(max_variants_cluster) + 1
|
|
26
|
-
letters = [ random.choice(alphabet) for i in range(n_variants)]
|
|
27
|
-
motif_template.append(letters)
|
|
28
|
-
return motif_template
|
|
29
|
-
|
|
30
|
-
def generate_motif(template: motif_template_type, alphabet:List[str]) -> str:
|
|
31
|
-
# Sunbtituting the ? in template for any letter
|
|
32
|
-
template_with_any = [ (letters if not '?' in letters else alphabet) for letters in template ]
|
|
33
|
-
return ''.join([ random.choice(letters) for letters in template_with_any ])
|
|
34
|
-
|
|
35
|
-
def motif_notation(motif_template: motif_template_type) -> str:
|
|
36
|
-
def motif_notation_code(letter_choice:letter_choice_type) -> str:
|
|
37
|
-
if len(letter_choice) == 1:
|
|
38
|
-
return(letter_choice[0])
|
|
39
|
-
else:
|
|
40
|
-
return f"[{''.join(letter_choice)}]"
|
|
41
|
-
|
|
42
|
-
return ''.join([ motif_notation_code(letter_choice) for letter_choice in motif_template])
|
|
43
|
-
|
|
44
|
-
def generate_random(n:int, alphabet:List[str]) -> str:
|
|
45
|
-
return ''.join([ random.choice(alphabet) for i in range(n) ])
|
|
46
|
-
|
|
47
|
-
def make_cliff(motif_template:motif_template_type, alphabet:List[str] , motif:str) -> str:
|
|
48
|
-
# Selecting conservative letter in motif
|
|
49
|
-
pos = random.randrange(len(motif_template))
|
|
50
|
-
while '?' in motif_template[pos]:
|
|
51
|
-
pos = (pos + 1) % len(motif_template) # always will find letters since ends of motif can't be any symbol
|
|
52
|
-
outlier_letters = list(set(alphabet) - set (motif_template[pos]))
|
|
53
|
-
return motif[:pos] + random.choice(outlier_letters) + motif[pos+1:]
|
|
54
|
-
|
|
55
|
-
# ====================================================================================
|
|
56
|
-
|
|
57
|
-
parser = argparse.ArgumentParser(prog='MotifSequencesGenerator',
|
|
58
|
-
description='The program generates set of sequences containing sequence motifs for SAR fucntionality testing',
|
|
59
|
-
epilog='Unitity support: Gennadii Zakharov ')
|
|
60
|
-
|
|
61
|
-
parser.add_argument("-a", "--alphabet", type=str, default=default_alphabet, help="Alphabet to generate sequences, separated by comma",)
|
|
62
|
-
parser.add_argument("-c", "--clusters", type=int, default=1, help="Number of clusters")
|
|
63
|
-
parser.add_argument("-s", "--sequences", type=int, default=500, help="Number of sequences in each cluster",)
|
|
64
|
-
parser.add_argument("-m,", "--motif", type=int, default=12, help="Average length of motif",)
|
|
65
|
-
parser.add_argument("-r,", "--random", type=int, default=4, help="Average length of random sequence parts before and after motif",)
|
|
66
|
-
parser.add_argument("-d,", "--dispersion", type=int, default=2, help="Variation of total sequence lengths",)
|
|
67
|
-
|
|
68
|
-
parser.add_argument("--max-variants-position", type=int, default=3, help="maximum number of different letters in motif position",)
|
|
69
|
-
parser.add_argument("--cliff-probability", type=float, default=0.01, help="Probabaility to make activity cliff of a sequence",)
|
|
70
|
-
parser.add_argument("--cliff-strength", type=float, default=4.0, help="Strength of cliff",)
|
|
71
|
-
|
|
72
|
-
args = parser.parse_args()
|
|
73
|
-
|
|
74
|
-
alphabet:List[str] = args.alphabet.split(',')
|
|
75
|
-
|
|
76
|
-
print('cluster\tsequence_id\tsequence\tactivity\tis_cliff')
|
|
77
|
-
|
|
78
|
-
line_number = 0
|
|
79
|
-
|
|
80
|
-
for n_cluster in range(args.clusters):
|
|
81
|
-
activity_average = random.random() * 10
|
|
82
|
-
activity_dispersion = random.random()
|
|
83
|
-
|
|
84
|
-
# Generatin motif template for cluster
|
|
85
|
-
motif_length = meanrange(args.motif, args.dispersion)
|
|
86
|
-
motif_template = generate_modif_template(motif_length, alphabet, args.max_variants_position)
|
|
87
|
-
sys.stderr.write(f"Cluster {n_cluster:2} motif template: {motif_notation(motif_template)}\n")
|
|
88
|
-
|
|
89
|
-
total_length = meanrange(args.random * 2, args.dispersion) + motif_length
|
|
90
|
-
prefix_length = meanrange(args.random, args.dispersion//2)
|
|
91
|
-
suffix_length = total_length - motif_length - prefix_length
|
|
92
|
-
|
|
93
|
-
cliff_made = False
|
|
94
|
-
for n_seq in range(args.sequences):
|
|
95
|
-
line_number +=1
|
|
96
|
-
activity = random.gauss(activity_average, activity_dispersion)
|
|
97
|
-
|
|
98
|
-
motif = generate_motif(motif_template, alphabet)
|
|
99
|
-
prefix = generate_random(prefix_length, alphabet)
|
|
100
|
-
suffix = generate_random(suffix_length, alphabet)
|
|
101
|
-
seq = prefix + motif + suffix
|
|
102
|
-
|
|
103
|
-
is_cliff = random.random() <= args.cliff_probability
|
|
104
|
-
if is_cliff:
|
|
105
|
-
# Making activity cliff
|
|
106
|
-
cliff_motif = make_cliff(motif_template, alphabet, motif)
|
|
107
|
-
cliff_seq = prefix + cliff_motif + suffix
|
|
108
|
-
# Recalculating activity
|
|
109
|
-
cliff_disp = activity_dispersion * args.cliff_strength * (0.5 + random.random())
|
|
110
|
-
activity = activity_average - cliff_disp
|
|
111
|
-
cliff_activity = activity_average + cliff_disp
|
|
112
|
-
|
|
113
|
-
sys.stderr.write(f"Cliff for sequence #{line_number:4}, cluster {n_cluster} \n")
|
|
114
|
-
sys.stderr.write(f"{activity_average}\t{motif}\t{activity}\n")
|
|
115
|
-
sys.stderr.write(f"{activity_average}\t{cliff_motif}\t{cliff_activity}\n")
|
|
116
|
-
print(f"{n_cluster}\tc{n_cluster}_seq{line_number}\t{cliff_seq}\t{cliff_activity:5.2f}\t{is_cliff}")
|
|
117
|
-
line_number +=1
|
|
118
|
-
print(f"{n_cluster}\tc{n_cluster}_seq{line_number}\t{seq}\t{activity:5.2f}\t{is_cliff}")
|
|
119
|
-
|