@datagrok/bio 1.5.9 → 1.7.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/detectors.js +17 -5
- package/dist/package-test.js +616 -496
- package/dist/package.js +404 -473
- package/files/samples/sample_FASTA.csv +0 -1
- package/files/samples/sample_FASTA.fasta +0 -3
- package/files/samples/sample_FASTA_DNA.csv +101 -0
- package/files/samples/sample_FASTA_PT.csv +101 -0
- package/files/samples/sample_FASTA_RNA.csv +101 -0
- package/files/samples/testSmiles2.csv +12248 -0
- package/package.json +2 -2
- package/scripts/generate_fasta_csv_for_alphabets.R +70 -0
- package/src/package-test.ts +1 -0
- package/src/package.ts +105 -20
- package/src/tests/convert-test.ts +8 -8
- package/src/tests/detectors-test.ts +15 -3
- package/src/tests/renderer-test.ts +40 -18
- package/src/utils/cell-renderer.ts +47 -75
- package/src/utils/convert.ts +10 -14
- package/src/utils/multiple-sequence-alignment.ts +4 -2
- package/src/utils/notation-converter.ts +215 -55
- package/{test-Bio-34f75e5127b8-b47d4664.html → test-Bio-34f75e5127b8-7af21e5d.html} +17 -21
- package/src/utils/chem-palette.ts +0 -280
- package/src/utils/misc.ts +0 -29
package/package.json
CHANGED
|
@@ -2,7 +2,7 @@
|
|
|
2
2
|
"name": "@datagrok/bio",
|
|
3
3
|
"beta": false,
|
|
4
4
|
"friendlyName": "Bio",
|
|
5
|
-
"version": "1.
|
|
5
|
+
"version": "1.7.0",
|
|
6
6
|
"description": "Bio is a [package](https://datagrok.ai/help/develop/develop#packages) for the [Datagrok](https://datagrok.ai) platform",
|
|
7
7
|
"repository": {
|
|
8
8
|
"type": "git",
|
|
@@ -11,7 +11,7 @@
|
|
|
11
11
|
},
|
|
12
12
|
"dependencies": {
|
|
13
13
|
"@biowasm/aioli": ">=2.4.0",
|
|
14
|
-
"@datagrok-libraries/bio": "^2.
|
|
14
|
+
"@datagrok-libraries/bio": "^2.5.0",
|
|
15
15
|
"@datagrok-libraries/utils": "^1.0.0",
|
|
16
16
|
"@datagrok-libraries/ml": "^2.0.9",
|
|
17
17
|
"cash-dom": "latest",
|
|
@@ -0,0 +1,70 @@
|
|
|
1
|
+
require(tidyverse)
|
|
2
|
+
library(random)
|
|
3
|
+
|
|
4
|
+
alphabetDna <- c('A','C','G','T')
|
|
5
|
+
alphabetRna <- c('A','C','G','U')
|
|
6
|
+
alphabetPt <- c('G', 'L', 'Y', 'S', 'E', 'Q', 'D', 'N', 'F', 'A',
|
|
7
|
+
'K', 'R', 'H', 'C', 'V', 'P', 'W', 'I', 'M', 'T',)
|
|
8
|
+
|
|
9
|
+
toAlphabet <- function(v, a;ph){
|
|
10
|
+
paste(sapply(v, function(ci){ alph[ci]; }), collapse = '')
|
|
11
|
+
}
|
|
12
|
+
|
|
13
|
+
mutateString <- function(s, p){
|
|
14
|
+
|
|
15
|
+
}
|
|
16
|
+
|
|
17
|
+
seq <- toAlphabet(sample.int(4, 35, replace=TRUE), alphabet);
|
|
18
|
+
seqPt <- toAlphabet(sample.int(20, 35, replace=TRUE), alphabetPt);
|
|
19
|
+
seqDna <- toAlphabet(sample.int(4, 35, replace=TRUE), alphabetDna);
|
|
20
|
+
seqRna <- toAlphabet(sample.int(4, 35, replace=TRUE), alphabetRna);
|
|
21
|
+
# probability to mutate
|
|
22
|
+
seq_p1 <- c(100, 100, 100, 100, 100, 5, 2, 2, 50, 3,
|
|
23
|
+
100, 100, 7, 2, 2, 7, 2, 33, 100, 100,
|
|
24
|
+
100, 100, 100, 100, 100, 100, 100, 100, 100, 2,
|
|
25
|
+
100, 100, 100, 100, 100)
|
|
26
|
+
seq_p2 <- c(100, 100, 7, 2, 2, 7, 2, 33, 100, 100,
|
|
27
|
+
100, 100, 100, 100, 100, 100, 100, 100, 100, 2,
|
|
28
|
+
100, 100, 100, 100, 100, 5, 2, 2, 50, 3,
|
|
29
|
+
100, 100, 100, 100, 100)
|
|
30
|
+
|
|
31
|
+
# mutate string s with probability p and alphabet
|
|
32
|
+
seq_mutate <- function(s, p, alphabet){
|
|
33
|
+
# s <- seqDna
|
|
34
|
+
# p <- seq_p
|
|
35
|
+
# alphabet <- alphabetDna
|
|
36
|
+
res_s <- s
|
|
37
|
+
res_p <- p
|
|
38
|
+
for (i in 1:(str_length(res_s)*2)) {
|
|
39
|
+
pos <- sample.int(str_length(res_s), 1)
|
|
40
|
+
if (sample.int(100, 1) < res_p[pos]) {
|
|
41
|
+
cast <- sample.int(100, 1) # mutation type probabilty
|
|
42
|
+
if (0 < cast && cast <= 2 ) {
|
|
43
|
+
#insertion
|
|
44
|
+
res_s <- paste(substr(res_s, 1, pos), alphabet[sample.int(4, 1)], substr(res_s, pos+1, str_length(res_s)), collapse='', sep='')
|
|
45
|
+
res_p <- c(res_p[1:pos], c(100), res_p[(pos+1):length(res_p)])
|
|
46
|
+
#cat('insertion');
|
|
47
|
+
} else if (2 < cast && cast <= 4 ) {
|
|
48
|
+
# deletion
|
|
49
|
+
res_s <- paste(substr(res_s, 1, pos-1), substr(res_s, pos+1, str_length(res_s)), collapse = '', sep='')
|
|
50
|
+
res_p <- c(res_p[1: (pos-1)], res_p[(pos+1):length(res_p)])
|
|
51
|
+
#cat('deletion');
|
|
52
|
+
} else {
|
|
53
|
+
# replace
|
|
54
|
+
res_s <- paste(substr(res_s, 1, pos-1), alphabet[sample.int(4, 1)], substr(res_s, pos+1, str_length(res_s)), collapse='', sep='')
|
|
55
|
+
#cat('replace')
|
|
56
|
+
}
|
|
57
|
+
#cat(res, '\n')
|
|
58
|
+
}
|
|
59
|
+
}
|
|
60
|
+
res_s;
|
|
61
|
+
}
|
|
62
|
+
|
|
63
|
+
fastaDna_df <- data.frame(id = 1:100, sequence = sapply(1:100, function(id){ seq_mutate(seqDna, seq_p1, alphabetDna)}));
|
|
64
|
+
write_csv(fastaDna_df, 'D:/HOME/atanas/Datagrok/projs/public/packages/Bio/files/samples/sample_FASTA_DNA.csv');
|
|
65
|
+
|
|
66
|
+
fastaRna_df <- data.frame(id = 1:100, sequence = sapply(1:100, function(id){ seq_mutate(seqRna, seq_p2, alphabetRna)}));
|
|
67
|
+
write_csv(fastaRna_df, 'D:/HOME/atanas/Datagrok/projs/public/packages/Bio/files/samples/sample_FASTA_RNA.csv');
|
|
68
|
+
|
|
69
|
+
fastaPt_df <- data.frame(id = 1:100, sequence = sapply(1:100, function(id){ seq_mutate(seqPt, seq_p2, alphabetPt)}));
|
|
70
|
+
write_csv(fastaPt_df, 'D:/HOME/atanas/Datagrok/projs/public/packages/Bio/files/samples/sample_FASTA_PT.csv');
|
package/src/package-test.ts
CHANGED
package/src/package.ts
CHANGED
|
@@ -19,6 +19,10 @@ import {sequenceGetSimilarities, drawTooltip} from './utils/sequence-activity-cl
|
|
|
19
19
|
import {getMolfilesFromSeq, HELM_CORE_LIB_FILENAME} from './utils/utils';
|
|
20
20
|
import {getMacroMol} from './utils/atomic-works';
|
|
21
21
|
import {MacromoleculeSequenceCellRenderer} from './utils/cell-renderer';
|
|
22
|
+
import {Column} from 'datagrok-api/dg';
|
|
23
|
+
import {SEM_TYPES} from './utils/constants';
|
|
24
|
+
import { delay } from '@datagrok-libraries/utils/src/test';
|
|
25
|
+
import { TableView } from 'datagrok-api/dg';
|
|
22
26
|
|
|
23
27
|
//tags: init
|
|
24
28
|
export async function initBio(): Promise<void> {
|
|
@@ -38,6 +42,29 @@ export function macromoleculeSequenceCellRenderer(): MacromoleculeSequenceCellRe
|
|
|
38
42
|
return new MacromoleculeSequenceCellRenderer();
|
|
39
43
|
}
|
|
40
44
|
|
|
45
|
+
function checkInputColumn(col: DG.Column, name: string,
|
|
46
|
+
allowedNotations: string[] = [], allowedAlphabets: string[] = []): boolean {
|
|
47
|
+
const units: string = col.getTag(DG.TAGS.UNITS);
|
|
48
|
+
if (col.semType !== DG.SEMTYPE.MACROMOLECULE) {
|
|
49
|
+
grok.shell.warning(name + ' analysis is allowed for Macromolecules semantic type');
|
|
50
|
+
return false;
|
|
51
|
+
} else if (
|
|
52
|
+
(allowedAlphabets.length > 0 &&
|
|
53
|
+
!allowedAlphabets.some((a) => units.toUpperCase().endsWith(a.toUpperCase()))) ||
|
|
54
|
+
(allowedNotations.length > 0 &&
|
|
55
|
+
!allowedNotations.some((n) => units.toUpperCase().startsWith(n.toUpperCase())))
|
|
56
|
+
) {
|
|
57
|
+
const notationAdd = allowedNotations.length == 0 ? 'any notation' :
|
|
58
|
+
(`notation${allowedNotations.length > 1 ? 's' : ''} ${allowedNotations.map((n) => `"${n}"`).join(', ')} `);
|
|
59
|
+
const alphabetAdd = allowedNotations.length == 0 ? 'any alphabet' :
|
|
60
|
+
(`alphabet${allowedAlphabets.length > 1 ? 's' : ''} ${allowedAlphabets.map((a) => `"${a}"`).join(', ')}.`);
|
|
61
|
+
|
|
62
|
+
grok.shell.warning(name + ' analysis is allowed for Macromolecules with ' + notationAdd + ' and ' + alphabetAdd);
|
|
63
|
+
return false;
|
|
64
|
+
}
|
|
65
|
+
|
|
66
|
+
return true;
|
|
67
|
+
}
|
|
41
68
|
|
|
42
69
|
//name: sequenceAlignment
|
|
43
70
|
//input: string alignType {choices: ['Local alignment', 'Global alignment']}
|
|
@@ -73,20 +100,23 @@ export function vdRegionViewer() {
|
|
|
73
100
|
//name: Sequence Activity Cliffs
|
|
74
101
|
//description: detect activity cliffs
|
|
75
102
|
//input: dataframe table [Input data table]
|
|
76
|
-
//input: column
|
|
103
|
+
//input: column macroMolecule {semType: Macromolecule}
|
|
77
104
|
//input: column activities
|
|
78
105
|
//input: double similarity = 80 [Similarity cutoff]
|
|
79
106
|
//input: string methodName { choices:["UMAP", "t-SNE", "SPE"] }
|
|
80
|
-
export async function activityCliffs(df: DG.DataFrame,
|
|
107
|
+
export async function activityCliffs(df: DG.DataFrame, macroMolecule: DG.Column, activities: DG.Column,
|
|
81
108
|
similarity: number, methodName: string): Promise<void> {
|
|
109
|
+
if (!checkInputColumn(macroMolecule, 'Activity Cliffs'))
|
|
110
|
+
return;
|
|
111
|
+
|
|
82
112
|
const axesNames = getEmbeddingColsNames(df);
|
|
83
113
|
const options = {
|
|
84
114
|
'SPE': {cycles: 2000, lambda: 1.0, dlambda: 0.0005},
|
|
85
115
|
};
|
|
86
|
-
const units =
|
|
116
|
+
const units = macroMolecule!.tags[DG.TAGS.UNITS];
|
|
87
117
|
await getActivityCliffs(
|
|
88
118
|
df,
|
|
89
|
-
|
|
119
|
+
macroMolecule,
|
|
90
120
|
axesNames,
|
|
91
121
|
'Activity cliffs',
|
|
92
122
|
activities,
|
|
@@ -110,6 +140,9 @@ export async function activityCliffs(df: DG.DataFrame, sequence: DG.Column, acti
|
|
|
110
140
|
//input: bool plotEmbeddings = true
|
|
111
141
|
export async function sequenceSpaceTopMenu(table: DG.DataFrame, macroMolecule: DG.Column, methodName: string,
|
|
112
142
|
similarityMetric: string = 'Levenshtein', plotEmbeddings: boolean): Promise<void> {
|
|
143
|
+
if (!checkInputColumn(macroMolecule, 'Activity Cliffs'))
|
|
144
|
+
return;
|
|
145
|
+
|
|
113
146
|
const embedColsNames = getEmbeddingColsNames(table);
|
|
114
147
|
const chemSpaceParams = {
|
|
115
148
|
seqCol: macroMolecule,
|
|
@@ -133,17 +166,40 @@ export async function sequenceSpaceTopMenu(table: DG.DataFrame, macroMolecule: D
|
|
|
133
166
|
//name: To Atomic Level
|
|
134
167
|
//description: returns molfiles for each monomer from HELM library
|
|
135
168
|
//input: dataframe df [Input data table]
|
|
136
|
-
//input: column
|
|
137
|
-
export async function toAtomicLevel(df: DG.DataFrame,
|
|
169
|
+
//input: column macroMolecule {semType: Macromolecule}
|
|
170
|
+
export async function toAtomicLevel(df: DG.DataFrame, macroMolecule: DG.Column): Promise<void> {
|
|
171
|
+
if (DG.Func.find({package: 'Chem', name: 'getRdKitModule'}).length === 0) {
|
|
172
|
+
grok.shell.warning('Transformation to atomic level requires package "Chem" installed.');
|
|
173
|
+
return;
|
|
174
|
+
}
|
|
175
|
+
if (!checkInputColumn(macroMolecule, 'To Atomic Level'))
|
|
176
|
+
return;
|
|
177
|
+
|
|
178
|
+
let currentView: TableView;
|
|
179
|
+
for (let view of grok.shell.tableViews) {
|
|
180
|
+
if (df.name === view.name) {
|
|
181
|
+
currentView = view;
|
|
182
|
+
}
|
|
183
|
+
}
|
|
184
|
+
const file = await _package.files.readAsText('samples/sar-small.csv');
|
|
185
|
+
const df2 = DG.DataFrame.fromCsv(file);
|
|
186
|
+
const v = grok.shell.addTableView(df2);
|
|
187
|
+
setTimeout(()=> {
|
|
188
|
+
grok.shell.closeTable(df2);
|
|
189
|
+
v.close();
|
|
190
|
+
grok.shell.v = currentView;
|
|
191
|
+
}, 100);
|
|
192
|
+
|
|
138
193
|
const monomersLibFile = await _package.files.readAsText(HELM_CORE_LIB_FILENAME);
|
|
139
194
|
const monomersLibObject: any[] = JSON.parse(monomersLibFile);
|
|
140
|
-
const atomicCodes = getMolfilesFromSeq(
|
|
195
|
+
const atomicCodes = getMolfilesFromSeq(macroMolecule, monomersLibObject);
|
|
141
196
|
const result = await getMacroMol(atomicCodes!);
|
|
142
197
|
|
|
143
198
|
const col = DG.Column.fromStrings('regenerated', result);
|
|
144
199
|
col.semType = DG.SEMTYPE.MOLECULE;
|
|
145
200
|
col.tags[DG.TAGS.UNITS] = 'molblock';
|
|
146
|
-
df.columns.add(col);
|
|
201
|
+
df.columns.add(col, true);
|
|
202
|
+
|
|
147
203
|
}
|
|
148
204
|
|
|
149
205
|
|
|
@@ -152,7 +208,10 @@ export async function toAtomicLevel(df: DG.DataFrame, sequence: DG.Column): Prom
|
|
|
152
208
|
//input: dataframe table
|
|
153
209
|
//input: column sequence { semType: Macromolecule }
|
|
154
210
|
//output: column result
|
|
155
|
-
export async function multipleSequenceAlignmentAny(table: DG.DataFrame, col: DG.Column): Promise<DG.Column> {
|
|
211
|
+
export async function multipleSequenceAlignmentAny(table: DG.DataFrame, col: DG.Column): Promise<DG.Column | null> {
|
|
212
|
+
if (!checkInputColumn(col, 'MSA', ['fasta'], ['DNA', 'RNA', 'PT']))
|
|
213
|
+
return null;
|
|
214
|
+
|
|
156
215
|
const msaCol = await runKalign(col, false);
|
|
157
216
|
table.columns.add(msaCol);
|
|
158
217
|
|
|
@@ -171,19 +230,24 @@ export async function compositionAnalysis(): Promise<void> {
|
|
|
171
230
|
// Higher priority for columns with MSA data to show with WebLogo.
|
|
172
231
|
const tv = grok.shell.tv;
|
|
173
232
|
const df = tv.dataFrame;
|
|
174
|
-
const semTypeColList = df.columns.bySemTypeAll(DG.SEMTYPE.MACROMOLECULE);
|
|
175
|
-
let col: DG.Column | undefined = semTypeColList.find((col) => {
|
|
176
|
-
const units = col.getTag(DG.TAGS.UNITS);
|
|
177
|
-
return units ? units.indexOf('MSA') !== -1 : false;
|
|
178
|
-
});
|
|
179
|
-
if (!col)
|
|
180
|
-
col = semTypeColList[0];
|
|
181
233
|
|
|
234
|
+
const col: DG.Column | null = WebLogo.pickUpSeqCol2(df);
|
|
182
235
|
if (!col) {
|
|
183
236
|
grok.shell.error('Current table does not contain sequences');
|
|
184
237
|
return;
|
|
185
238
|
}
|
|
186
239
|
|
|
240
|
+
if (!checkInputColumn(col, 'Composition'))
|
|
241
|
+
return;
|
|
242
|
+
|
|
243
|
+
const allowedNotations: string[] = ['fasta', 'separator'];
|
|
244
|
+
const units = col.getTag(DG.TAGS.UNITS);
|
|
245
|
+
if (!allowedNotations.some((n) => units.toUpperCase().startsWith(n.toUpperCase()))) {
|
|
246
|
+
grok.shell.warning('Composition analysis is allowed for ' +
|
|
247
|
+
`notation${allowedNotations.length > 1 ? 's' : ''} ${allowedNotations.map((n) => `"${n}"`).join(', ')}.`);
|
|
248
|
+
return;
|
|
249
|
+
}
|
|
250
|
+
|
|
187
251
|
tv.addViewer('WebLogo', {sequenceColumnName: col.name});
|
|
188
252
|
}
|
|
189
253
|
|
|
@@ -201,7 +265,7 @@ function parseMacromolecule(
|
|
|
201
265
|
//name: importFasta
|
|
202
266
|
//description: Opens FASTA file
|
|
203
267
|
//tags: file-handler
|
|
204
|
-
//meta.ext: fasta, fna, ffn, faa, frn, fa
|
|
268
|
+
//meta.ext: fasta, fna, ffn, faa, frn, fa, fst
|
|
205
269
|
//input: string fileContent
|
|
206
270
|
//output: list tables
|
|
207
271
|
export function importFasta(fileContent: string): DG.DataFrame [] {
|
|
@@ -221,13 +285,34 @@ export function importFasta(fileContent: string): DG.DataFrame [] {
|
|
|
221
285
|
const descriptionsArrayCol = DG.Column.fromStrings('description', descriptionsArray);
|
|
222
286
|
const sequenceCol = DG.Column.fromStrings('sequence', sequencesArray);
|
|
223
287
|
sequenceCol.semType = 'Macromolecule';
|
|
224
|
-
|
|
225
288
|
const stats: SeqColStats = WebLogo.getStats(sequenceCol, 5, WebLogo.splitterAsFasta);
|
|
226
289
|
const seqType = stats.sameLength ? 'SEQ.MSA' : 'SEQ';
|
|
290
|
+
|
|
291
|
+
const PeptideFastaAlphabet = new Set([
|
|
292
|
+
'G', 'L', 'Y', 'S', 'E', 'Q', 'D', 'N', 'F', 'A',
|
|
293
|
+
'K', 'R', 'H', 'C', 'V', 'P', 'W', 'I', 'M', 'T',
|
|
294
|
+
]);
|
|
295
|
+
|
|
296
|
+
const DnaFastaAlphabet = new Set(['A', 'C', 'G', 'T']);
|
|
297
|
+
|
|
298
|
+
const RnaFastaAlphabet = new Set(['A', 'C', 'G', 'U']);
|
|
299
|
+
|
|
300
|
+
//const SmilesRawAlphabet = new Set([
|
|
301
|
+
// 'O', 'C', 'c', 'N', 'S', 'F', '(', ')',
|
|
302
|
+
// '1', '2', '3', '4', '5', '6', '7',
|
|
303
|
+
// '+', '-', '@', '[', ']', '/', '\\', '#', '=']);
|
|
304
|
+
|
|
227
305
|
const alphabetCandidates: [string, Set<string>][] = [
|
|
228
|
-
['
|
|
229
|
-
['
|
|
306
|
+
['PT', PeptideFastaAlphabet],
|
|
307
|
+
['DNA', DnaFastaAlphabet],
|
|
308
|
+
['RNA', RnaFastaAlphabet],
|
|
230
309
|
];
|
|
310
|
+
|
|
311
|
+
//const alphabetCandidates: [string, Set<string>][] = [
|
|
312
|
+
// ['NT', new Set(Object.keys(Nucleotides.Names))],
|
|
313
|
+
// ['PT', new Set(Object.keys(Aminoacids.Names))],
|
|
314
|
+
//];
|
|
315
|
+
|
|
231
316
|
// Calculate likelihoods for alphabet_candidates
|
|
232
317
|
const alphabetCandidatesSim: number[] = alphabetCandidates.map(
|
|
233
318
|
(c) => WebLogo.getAlphabetSimilarity(stats.freq, c[1]));
|
|
@@ -44,7 +44,7 @@ PEPTIDE1{M.K.P.S.E.Y.V}$$$
|
|
|
44
44
|
ACGTC
|
|
45
45
|
CAGTGT
|
|
46
46
|
TTCAAC
|
|
47
|
-
|
|
47
|
+
`,
|
|
48
48
|
separatorDna: `seq
|
|
49
49
|
A/C/G/T/C
|
|
50
50
|
C/A/G/T/G/T
|
|
@@ -59,7 +59,7 @@ DNA1{D(T)P.D(T)P.D(C)P.D(A)P.D(A)P.D(C)P}$$$
|
|
|
59
59
|
ACGUC
|
|
60
60
|
CAGUGU
|
|
61
61
|
UUCAAC
|
|
62
|
-
|
|
62
|
+
`,
|
|
63
63
|
separatorRna: `seq
|
|
64
64
|
A*C*G*U*C
|
|
65
65
|
C*A*G*U*G*U
|
|
@@ -90,10 +90,10 @@ RNA1{R(U)P.R(U)P.R(C)P.R(A)P.R(A)P.R(C)P}$$$
|
|
|
90
90
|
return _csvDfs[key];
|
|
91
91
|
};
|
|
92
92
|
|
|
93
|
-
function converter(tgtNotation: NOTATION,
|
|
93
|
+
function converter(tgtNotation: NOTATION, tgtSeparator: string | null = null): ConverterFunc {
|
|
94
94
|
return function(srcCol: DG.Column): DG.Column {
|
|
95
95
|
const converter = new NotationConverter(srcCol);
|
|
96
|
-
const resCol = converter.convert(
|
|
96
|
+
const resCol = converter.convert(tgtNotation, tgtSeparator);
|
|
97
97
|
return resCol;
|
|
98
98
|
};
|
|
99
99
|
};
|
|
@@ -127,7 +127,7 @@ RNA1{R(U)P.R(U)P.R(C)P.R(A)P.R(A)P.R(C)P}$$$
|
|
|
127
127
|
await _testConvert(Samples.fastaDna, converter(NOTATION.HELM), Samples.helmDna);
|
|
128
128
|
});
|
|
129
129
|
test('testFastaRnaToHelm', async () => {
|
|
130
|
-
await _testConvert(Samples.
|
|
130
|
+
await _testConvert(Samples.fastaRna, converter(NOTATION.HELM), Samples.helmRna);
|
|
131
131
|
});
|
|
132
132
|
|
|
133
133
|
test('testSeparatorPtToFasta', async () => {
|
|
@@ -136,15 +136,15 @@ RNA1{R(U)P.R(U)P.R(C)P.R(A)P.R(A)P.R(C)P}$$$
|
|
|
136
136
|
test('testSeparatorDnaToFasta', async () => {
|
|
137
137
|
await _testConvert(Samples.separatorDna, converter(NOTATION.FASTA), Samples.fastaDna);
|
|
138
138
|
});
|
|
139
|
-
test('
|
|
139
|
+
test('testSeparatorRnaToFasta', async () => {
|
|
140
140
|
await _testConvert(Samples.separatorRna, converter(NOTATION.FASTA), Samples.fastaRna);
|
|
141
141
|
});
|
|
142
142
|
|
|
143
143
|
test('testSeparatorPtToHelm', async () => {
|
|
144
|
-
await _testConvert(Samples.
|
|
144
|
+
await _testConvert(Samples.separatorPt, converter(NOTATION.HELM), Samples.helmPt);
|
|
145
145
|
});
|
|
146
146
|
test('testSeparatorDnaToHelm', async () => {
|
|
147
|
-
await _testConvert(Samples.
|
|
147
|
+
await _testConvert(Samples.separatorDna, converter(NOTATION.HELM), Samples.helmDna);
|
|
148
148
|
});
|
|
149
149
|
test('testSeparatorRnaToHelm', async () => {
|
|
150
150
|
await _testConvert(Samples.separatorRna, converter(NOTATION.HELM), Samples.helmRna);
|
|
@@ -104,24 +104,28 @@ MWRSWY-CKHP
|
|
|
104
104
|
peptidesComplex = 'peptidesComplex',
|
|
105
105
|
fastaCsv = 'fastaCsv',
|
|
106
106
|
fastaFasta = 'fastaFasta',
|
|
107
|
+
fastaPtCsv = 'fastaPtCsv',
|
|
107
108
|
msaComplex = 'msaComplex',
|
|
108
109
|
helmCsv = 'helmCsv',
|
|
110
|
+
testDemogCsv = 'testDemogCsv',
|
|
111
|
+
testHelmCsv = 'testHelmCsv',
|
|
109
112
|
testIdCsv = 'testIdCsv',
|
|
110
113
|
testSmilesCsv = 'testSmilesCsv',
|
|
111
|
-
|
|
112
|
-
testDemogCsv = 'testDemogCsv',
|
|
114
|
+
testSmiles2Csv = 'testSmiles2Csv',
|
|
113
115
|
}
|
|
114
116
|
|
|
115
117
|
const samples: { [key: string]: string } = {
|
|
116
118
|
'peptidesComplex': 'System:AppData/Bio/samples/peptides_complex_msa.csv',
|
|
117
119
|
'fastaCsv': 'System:AppData/Bio/samples/sample_FASTA.csv',
|
|
118
120
|
'fastaFasta': 'System:AppData/Bio/samples/sample_FASTA.fasta',
|
|
121
|
+
'fastaPtCsv': 'System:AppData/Bio/samples/sample_FASTA_PT.csv',
|
|
119
122
|
'msaComplex': 'System:AppData/Bio/samples/sample_MSA.csv',
|
|
120
123
|
'helmCsv': 'System:AppData/Bio/samples/sample_HELM.csv',
|
|
121
124
|
'testDemogCsv': 'System:AppData/Bio/samples/testDemog.csv',
|
|
122
|
-
'testIdCsv': 'System:AppData/Bio/samples/id.csv',
|
|
123
125
|
'testHelmCsv': 'System:AppData/Bio/samples/testHelm.csv',
|
|
126
|
+
'testIdCsv': 'System:AppData/Bio/samples/id.csv',
|
|
124
127
|
'testSmilesCsv': 'System:AppData/Bio/samples/testSmiles.csv',
|
|
128
|
+
'testSmiles2Csv': 'System:AppData/Bio/samples/testSmiles2.csv',
|
|
125
129
|
};
|
|
126
130
|
|
|
127
131
|
const _samplesDfs: { [key: string]: Promise<DG.DataFrame> } = {};
|
|
@@ -288,6 +292,14 @@ MWRSWY-CKHP
|
|
|
288
292
|
for (const col of df.columns.toList())
|
|
289
293
|
await _testNeg(dfFunc, col.name);
|
|
290
294
|
});
|
|
295
|
+
|
|
296
|
+
test('samplesTestSmiles2NegativeSmiles', async () => {
|
|
297
|
+
await _testNeg(readSamples(Samples.testSmiles2Csv), 'SMILES');
|
|
298
|
+
});
|
|
299
|
+
|
|
300
|
+
test('samplesFastaPtPosSequence', async () => {
|
|
301
|
+
await (_testPos(readSamples(Samples.fastaPtCsv), 'sequence', 'fasta:SEQ:PT'));
|
|
302
|
+
});
|
|
291
303
|
});
|
|
292
304
|
|
|
293
305
|
export async function _testNeg(readDf: DfReaderFunc, colName: string) {
|
|
@@ -6,31 +6,53 @@ import * as DG from 'datagrok-api/dg';
|
|
|
6
6
|
import {importFasta, multipleSequenceAlignmentAny} from '../package';
|
|
7
7
|
|
|
8
8
|
category('renderers', () => {
|
|
9
|
+
let tvList: DG.TableView[];
|
|
10
|
+
|
|
11
|
+
before(async () => {
|
|
12
|
+
tvList = [];
|
|
13
|
+
});
|
|
14
|
+
|
|
15
|
+
after(async () => {
|
|
16
|
+
tvList.forEach((tv: DG.TableView) => tv.close());
|
|
17
|
+
});
|
|
18
|
+
|
|
9
19
|
test('afterMsa', async () => {
|
|
10
20
|
await _testAfterMsa();
|
|
11
21
|
});
|
|
12
|
-
});
|
|
13
22
|
|
|
14
|
-
|
|
15
|
-
|
|
16
|
-
|
|
23
|
+
async function _testAfterMsa() {
|
|
24
|
+
const fastaTxt: string = await grok.dapi.files.readAsText('System:AppData/Bio/samples/sample_FASTA.fasta');
|
|
25
|
+
const df: DG.DataFrame = importFasta(fastaTxt)[0];
|
|
26
|
+
|
|
27
|
+
const srcSeqCol: DG.Column | null = df.col('sequence');
|
|
28
|
+
expect(srcSeqCol !== null, true);
|
|
29
|
+
console.log('Bio: tests/renderers/afterMsa, src data loaded');
|
|
17
30
|
|
|
18
|
-
|
|
19
|
-
|
|
31
|
+
const tv: DG.TableView = grok.shell.addTableView(df);
|
|
32
|
+
console.log('Bio: tests/renderers/afterMsa, table view');
|
|
20
33
|
|
|
21
|
-
|
|
22
|
-
|
|
34
|
+
await grok.data.detectSemanticTypes(df);
|
|
35
|
+
console.log('Bio: tests/renderers/afterMsa, detectSemanticTypes');
|
|
23
36
|
|
|
24
|
-
|
|
25
|
-
|
|
26
|
-
|
|
37
|
+
console.log('Bio: tests/renderers/afterMsa, src before test semType' +
|
|
38
|
+
`semType="${srcSeqCol!.semType}", units="${srcSeqCol!.getTag(DG.TAGS.UNITS)}", ` +
|
|
39
|
+
`cell.renderer="${srcSeqCol!.getTag('cell.renderer')}"`);
|
|
40
|
+
expect(srcSeqCol!.semType, DG.SEMTYPE.MACROMOLECULE);
|
|
41
|
+
expect(srcSeqCol!.getTag(DG.TAGS.UNITS), 'fasta:SEQ:PT');
|
|
42
|
+
expect(srcSeqCol!.getTag('cell.renderer'), 'Macromolecule');
|
|
43
|
+
console.log('Bio: tests/renderers/afterMsa, src semType tested');
|
|
27
44
|
|
|
28
|
-
|
|
29
|
-
|
|
45
|
+
const msaSeqCol: DG.Column | null = await multipleSequenceAlignmentAny(df, srcSeqCol!);
|
|
46
|
+
console.log('Bio: tests/renderers/afterMsa, msaSeqCol created');
|
|
30
47
|
|
|
31
|
-
|
|
32
|
-
|
|
33
|
-
expect(seqMsaCol!.getTag('cell.renderer'), 'Macromolecule');
|
|
48
|
+
tv.grid.invalidate();
|
|
49
|
+
console.log('Bio: tests/renderers/afterMsa, tv.grid invalidated');
|
|
34
50
|
|
|
35
|
-
|
|
36
|
-
|
|
51
|
+
expect(msaSeqCol!.semType, DG.SEMTYPE.MACROMOLECULE);
|
|
52
|
+
expect(msaSeqCol!.getTag(DG.TAGS.UNITS), 'fasta:SEQ.MSA:PT');
|
|
53
|
+
expect(msaSeqCol!.getTag('cell.renderer'), 'Macromolecule');
|
|
54
|
+
console.log('Bio: tests/renderers/afterMsa, msa semType tested');
|
|
55
|
+
|
|
56
|
+
tvList.push(tv);
|
|
57
|
+
}
|
|
58
|
+
});
|