@datagrok/bio 2.4.14 → 2.4.16

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -14,14 +14,19 @@ export class MsaWarning extends Error {
14
14
  super(message, options);
15
15
  }
16
16
  }
17
+ type multipleSequenceAlginmentUIOptions = {col?: DG.Column<string> | null, clustersCol?: DG.Column | null,
18
+ pepsea?: {method?: typeof pepseaMethods[number], gapOpen?: number, gapExtend?: number}};
17
19
 
18
- export async function multipleSequenceAlignmentUI(
19
- col: DG.Column<string> | null = null,
20
- pepseaMethod: typeof pepseaMethods[number] = pepseaMethods[0]
21
- ): Promise<DG.Column> {
20
+ export async function multipleSequenceAlignmentUI(options: multipleSequenceAlginmentUIOptions = {}): Promise<DG.Column> {
22
21
  return new Promise(async (resolve, reject) => {
23
- const table = col?.dataFrame ?? grok.shell.t;
24
- const seqCol = col ?? table.columns.bySemType(DG.SEMTYPE.MACROMOLECULE);
22
+ options.clustersCol ??= null;
23
+ options.pepsea ??= {};
24
+ options.pepsea.method ??= pepseaMethods[0];
25
+ options.pepsea.gapOpen ??= 1.53;
26
+ options.pepsea.gapExtend ??= 0;
27
+
28
+ const table = options.col?.dataFrame ?? grok.shell.t;
29
+ const seqCol = options.col ?? table.columns.bySemType(DG.SEMTYPE.MACROMOLECULE);
25
30
  if (seqCol == null) {
26
31
  const errMsg = `MSAError: dataset doesn't conain any Macromolecule column`;
27
32
  grok.shell.warning(errMsg);
@@ -29,11 +34,11 @@ export async function multipleSequenceAlignmentUI(
29
34
  }
30
35
 
31
36
  // UI
32
- const methodInput = ui.choiceInput('Method', pepseaMethod, pepseaMethods);
37
+ const methodInput = ui.choiceInput('Method', options.pepsea.method, pepseaMethods);
33
38
  methodInput.setTooltip('Alignment method');
34
- const gapOpenInput = ui.floatInput('Gap open', 1.53);
39
+ const gapOpenInput = ui.floatInput('Gap open', options.pepsea.gapOpen);
35
40
  gapOpenInput.setTooltip('Gap opening penalty at group-to-group alignment');
36
- const gapExtendInput = ui.floatInput('Gap extend', 0);
41
+ const gapExtendInput = ui.floatInput('Gap extend', options.pepsea.gapExtend);
37
42
  gapExtendInput.setTooltip('Gap extension penalty to skip the alignment');
38
43
  const inputRootStyles = [methodInput.root.style, gapOpenInput.root.style, gapExtendInput.root.style];
39
44
  let performAlignment: (() => Promise<DG.Column<string>>) | undefined;
@@ -41,19 +46,17 @@ export async function multipleSequenceAlignmentUI(
41
46
  // TODO: allow only macromolecule colums to be chosen
42
47
  const colInput = ui.columnInput('Sequence', table, seqCol, () => {
43
48
  performAlignment = onColInputChange(
44
- colInput.value, table, inputRootStyles, methodInput, clustersColInput, gapOpenInput, gapExtendInput
45
- );
49
+ colInput.value, table, inputRootStyles, methodInput, clustersColInput, gapOpenInput, gapExtendInput);
46
50
  }
47
51
  ) as DG.InputBase<DG.Column<string>>;
48
52
  colInput.setTooltip('Sequences column to use for alignment');
49
- const clustersColInput = ui.columnInput('Clusters', table, null);
53
+ const clustersColInput = ui.columnInput('Clusters', table, options.clustersCol);
50
54
  clustersColInput.nullable = true;
51
55
  colInput.fireChanged();
52
56
  //if column is specified (from tests), run alignment and resolve with the result
53
- if (col) {
57
+ if (options.col) {
54
58
  performAlignment = onColInputChange(
55
- col, table, inputRootStyles, methodInput, clustersColInput, gapOpenInput, gapExtendInput
56
- );
59
+ options.col, table, inputRootStyles, methodInput, clustersColInput, gapOpenInput, gapExtendInput);
57
60
 
58
61
  await onDialogOk(colInput, table, performAlignment, resolve, reject);
59
62
  return;
@@ -64,9 +67,7 @@ export async function multipleSequenceAlignmentUI(
64
67
  .add(methodInput)
65
68
  .add(gapOpenInput)
66
69
  .add(gapExtendInput)
67
- .onOK(async () => {
68
- await onDialogOk(colInput, table, performAlignment, resolve, reject);
69
- })
70
+ .onOK(async () => {await onDialogOk(colInput, table, performAlignment, resolve, reject)})
70
71
  .show();
71
72
  });
72
73
  }
@@ -1,119 +0,0 @@
1
- #!/usr/bin/env python3
2
-
3
- import random
4
- from math import sqrt
5
- import argparse
6
- import sys
7
-
8
- from typing import List, Tuple
9
-
10
- letter_choice_type = List[str]
11
- motif_template_type = List[letter_choice_type]
12
-
13
- default_alphabet = 'A,C,D,E,F,G,H,I,K,L,M,N,P,Q,R,S,T,V,W,Y'
14
-
15
- def meanrange(mean:int,disp:int) -> int:
16
- return random.randint(mean - disp, mean + disp)
17
-
18
- def generate_modif_template(motif_length:int, alphabet:List[str], max_variants_cluster:int, prob_any:float=0.2) -> motif_template_type: # Making a template to generate from it some random motifs
19
- motif_template = []
20
- for position in range(motif_length):
21
- # Selecting letters for position i
22
- if (0 < position < motif_length-1) and (random.random() <= prob_any):
23
- letters = ['?'] # this stands for any symbol
24
- else:
25
- n_variants = random.randrange(max_variants_cluster) + 1
26
- letters = [ random.choice(alphabet) for i in range(n_variants)]
27
- motif_template.append(letters)
28
- return motif_template
29
-
30
- def generate_motif(template: motif_template_type, alphabet:List[str]) -> str:
31
- # Sunbtituting the ? in template for any letter
32
- template_with_any = [ (letters if not '?' in letters else alphabet) for letters in template ]
33
- return ''.join([ random.choice(letters) for letters in template_with_any ])
34
-
35
- def motif_notation(motif_template: motif_template_type) -> str:
36
- def motif_notation_code(letter_choice:letter_choice_type) -> str:
37
- if len(letter_choice) == 1:
38
- return(letter_choice[0])
39
- else:
40
- return f"[{''.join(letter_choice)}]"
41
-
42
- return ''.join([ motif_notation_code(letter_choice) for letter_choice in motif_template])
43
-
44
- def generate_random(n:int, alphabet:List[str]) -> str:
45
- return ''.join([ random.choice(alphabet) for i in range(n) ])
46
-
47
- def make_cliff(motif_template:motif_template_type, alphabet:List[str] , motif:str) -> str:
48
- # Selecting conservative letter in motif
49
- pos = random.randrange(len(motif_template))
50
- while '?' in motif_template[pos]:
51
- pos = (pos + 1) % len(motif_template) # always will find letters since ends of motif can't be any symbol
52
- outlier_letters = list(set(alphabet) - set (motif_template[pos]))
53
- return motif[:pos] + random.choice(outlier_letters) + motif[pos+1:]
54
-
55
- # ====================================================================================
56
-
57
- parser = argparse.ArgumentParser(prog='MotifSequencesGenerator',
58
- description='The program generates set of sequences containing sequence motifs for SAR fucntionality testing',
59
- epilog='Unitity support: Gennadii Zakharov ')
60
-
61
- parser.add_argument("-a", "--alphabet", type=str, default=default_alphabet, help="Alphabet to generate sequences, separated by comma",)
62
- parser.add_argument("-c", "--clusters", type=int, default=1, help="Number of clusters")
63
- parser.add_argument("-s", "--sequences", type=int, default=500, help="Number of sequences in each cluster",)
64
- parser.add_argument("-m,", "--motif", type=int, default=12, help="Average length of motif",)
65
- parser.add_argument("-r,", "--random", type=int, default=4, help="Average length of random sequence parts before and after motif",)
66
- parser.add_argument("-d,", "--dispersion", type=int, default=2, help="Variation of total sequence lengths",)
67
-
68
- parser.add_argument("--max-variants-position", type=int, default=3, help="maximum number of different letters in motif position",)
69
- parser.add_argument("--cliff-probability", type=float, default=0.01, help="Probabaility to make activity cliff of a sequence",)
70
- parser.add_argument("--cliff-strength", type=float, default=4.0, help="Strength of cliff",)
71
-
72
- args = parser.parse_args()
73
-
74
- alphabet:List[str] = args.alphabet.split(',')
75
-
76
- print('cluster\tsequence_id\tsequence\tactivity\tis_cliff')
77
-
78
- line_number = 0
79
-
80
- for n_cluster in range(args.clusters):
81
- activity_average = random.random() * 10
82
- activity_dispersion = random.random()
83
-
84
- # Generatin motif template for cluster
85
- motif_length = meanrange(args.motif, args.dispersion)
86
- motif_template = generate_modif_template(motif_length, alphabet, args.max_variants_position)
87
- sys.stderr.write(f"Cluster {n_cluster:2} motif template: {motif_notation(motif_template)}\n")
88
-
89
- total_length = meanrange(args.random * 2, args.dispersion) + motif_length
90
- prefix_length = meanrange(args.random, args.dispersion//2)
91
- suffix_length = total_length - motif_length - prefix_length
92
-
93
- cliff_made = False
94
- for n_seq in range(args.sequences):
95
- line_number +=1
96
- activity = random.gauss(activity_average, activity_dispersion)
97
-
98
- motif = generate_motif(motif_template, alphabet)
99
- prefix = generate_random(prefix_length, alphabet)
100
- suffix = generate_random(suffix_length, alphabet)
101
- seq = prefix + motif + suffix
102
-
103
- is_cliff = random.random() <= args.cliff_probability
104
- if is_cliff:
105
- # Making activity cliff
106
- cliff_motif = make_cliff(motif_template, alphabet, motif)
107
- cliff_seq = prefix + cliff_motif + suffix
108
- # Recalculating activity
109
- cliff_disp = activity_dispersion * args.cliff_strength * (0.5 + random.random())
110
- activity = activity_average - cliff_disp
111
- cliff_activity = activity_average + cliff_disp
112
-
113
- sys.stderr.write(f"Cliff for sequence #{line_number:4}, cluster {n_cluster} \n")
114
- sys.stderr.write(f"{activity_average}\t{motif}\t{activity}\n")
115
- sys.stderr.write(f"{activity_average}\t{cliff_motif}\t{cliff_activity}\n")
116
- print(f"{n_cluster}\tc{n_cluster}_seq{line_number}\t{cliff_seq}\t{cliff_activity:5.2f}\t{is_cliff}")
117
- line_number +=1
118
- print(f"{n_cluster}\tc{n_cluster}_seq{line_number}\t{seq}\t{activity:5.2f}\t{is_cliff}")
119
-