@datagrok/bio 1.5.10 → 1.7.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/detectors.js +1 -1
- package/dist/package-test.js +522 -475
- package/dist/package.js +312 -469
- package/files/samples/sample_FASTA.csv +0 -1
- package/files/samples/sample_FASTA.fasta +0 -3
- package/files/samples/sample_FASTA_DNA.csv +101 -0
- package/files/samples/sample_FASTA_PT.csv +101 -0
- package/files/samples/sample_FASTA_RNA.csv +101 -0
- package/files/{samples → tests}/peptides_complex_msa.csv +0 -0
- package/files/{samples → tests}/peptides_simple_msa.csv +0 -0
- package/files/{samples/testSmiles.csv → tests/sar-small.csv} +0 -0
- package/files/{samples → tests}/testDemog.csv +0 -0
- package/files/{samples → tests}/testHelm.csv +0 -0
- package/files/{samples → tests}/testId.csv +0 -0
- package/files/tests/testSmiles.csv +201 -0
- package/files/{samples → tests}/testSmiles2.csv +0 -0
- package/package.json +2 -2
- package/scripts/generate_fasta_csv_for_alphabets.R +70 -0
- package/src/package-test.ts +1 -0
- package/src/package.ts +89 -27
- package/src/tests/convert-test.ts +49 -8
- package/src/tests/detectors-test.ts +12 -6
- package/src/utils/cell-renderer.ts +58 -91
- package/src/utils/convert.ts +10 -14
- package/src/utils/multiple-sequence-alignment.ts +0 -1
- package/src/utils/notation-converter.ts +178 -65
- package/{test-Bio-34f75e5127b8-936bf89b.html → test-Bio-34f75e5127b8-726a0649.html} +2 -2
- package/src/utils/chem-palette.ts +0 -280
- package/src/utils/misc.ts +0 -29
package/src/package.ts
CHANGED
|
@@ -19,6 +19,10 @@ import {sequenceGetSimilarities, drawTooltip} from './utils/sequence-activity-cl
|
|
|
19
19
|
import {getMolfilesFromSeq, HELM_CORE_LIB_FILENAME} from './utils/utils';
|
|
20
20
|
import {getMacroMol} from './utils/atomic-works';
|
|
21
21
|
import {MacromoleculeSequenceCellRenderer} from './utils/cell-renderer';
|
|
22
|
+
import {Column} from 'datagrok-api/dg';
|
|
23
|
+
import {SEM_TYPES} from './utils/constants';
|
|
24
|
+
import { delay } from '@datagrok-libraries/utils/src/test';
|
|
25
|
+
import { TableView } from 'datagrok-api/dg';
|
|
22
26
|
|
|
23
27
|
//tags: init
|
|
24
28
|
export async function initBio(): Promise<void> {
|
|
@@ -38,6 +42,29 @@ export function macromoleculeSequenceCellRenderer(): MacromoleculeSequenceCellRe
|
|
|
38
42
|
return new MacromoleculeSequenceCellRenderer();
|
|
39
43
|
}
|
|
40
44
|
|
|
45
|
+
function checkInputColumn(col: DG.Column, name: string,
|
|
46
|
+
allowedNotations: string[] = [], allowedAlphabets: string[] = []): boolean {
|
|
47
|
+
const units: string = col.getTag(DG.TAGS.UNITS);
|
|
48
|
+
if (col.semType !== DG.SEMTYPE.MACROMOLECULE) {
|
|
49
|
+
grok.shell.warning(name + ' analysis is allowed for Macromolecules semantic type');
|
|
50
|
+
return false;
|
|
51
|
+
} else if (
|
|
52
|
+
(allowedAlphabets.length > 0 &&
|
|
53
|
+
!allowedAlphabets.some((a) => units.toUpperCase().endsWith(a.toUpperCase()))) ||
|
|
54
|
+
(allowedNotations.length > 0 &&
|
|
55
|
+
!allowedNotations.some((n) => units.toUpperCase().startsWith(n.toUpperCase())))
|
|
56
|
+
) {
|
|
57
|
+
const notationAdd = allowedNotations.length == 0 ? 'any notation' :
|
|
58
|
+
(`notation${allowedNotations.length > 1 ? 's' : ''} ${allowedNotations.map((n) => `"${n}"`).join(', ')} `);
|
|
59
|
+
const alphabetAdd = allowedNotations.length == 0 ? 'any alphabet' :
|
|
60
|
+
(`alphabet${allowedAlphabets.length > 1 ? 's' : ''} ${allowedAlphabets.map((a) => `"${a}"`).join(', ')}.`);
|
|
61
|
+
|
|
62
|
+
grok.shell.warning(name + ' analysis is allowed for Macromolecules with ' + notationAdd + ' and ' + alphabetAdd);
|
|
63
|
+
return false;
|
|
64
|
+
}
|
|
65
|
+
|
|
66
|
+
return true;
|
|
67
|
+
}
|
|
41
68
|
|
|
42
69
|
//name: sequenceAlignment
|
|
43
70
|
//input: string alignType {choices: ['Local alignment', 'Global alignment']}
|
|
@@ -73,20 +100,23 @@ export function vdRegionViewer() {
|
|
|
73
100
|
//name: Sequence Activity Cliffs
|
|
74
101
|
//description: detect activity cliffs
|
|
75
102
|
//input: dataframe table [Input data table]
|
|
76
|
-
//input: column
|
|
103
|
+
//input: column macroMolecule {semType: Macromolecule}
|
|
77
104
|
//input: column activities
|
|
78
105
|
//input: double similarity = 80 [Similarity cutoff]
|
|
79
106
|
//input: string methodName { choices:["UMAP", "t-SNE", "SPE"] }
|
|
80
|
-
export async function activityCliffs(df: DG.DataFrame,
|
|
107
|
+
export async function activityCliffs(df: DG.DataFrame, macroMolecule: DG.Column, activities: DG.Column,
|
|
81
108
|
similarity: number, methodName: string): Promise<void> {
|
|
109
|
+
if (!checkInputColumn(macroMolecule, 'Activity Cliffs'))
|
|
110
|
+
return;
|
|
111
|
+
|
|
82
112
|
const axesNames = getEmbeddingColsNames(df);
|
|
83
113
|
const options = {
|
|
84
114
|
'SPE': {cycles: 2000, lambda: 1.0, dlambda: 0.0005},
|
|
85
115
|
};
|
|
86
|
-
const units =
|
|
116
|
+
const units = macroMolecule!.tags[DG.TAGS.UNITS];
|
|
87
117
|
await getActivityCliffs(
|
|
88
118
|
df,
|
|
89
|
-
|
|
119
|
+
macroMolecule,
|
|
90
120
|
axesNames,
|
|
91
121
|
'Activity cliffs',
|
|
92
122
|
activities,
|
|
@@ -110,6 +140,9 @@ export async function activityCliffs(df: DG.DataFrame, sequence: DG.Column, acti
|
|
|
110
140
|
//input: bool plotEmbeddings = true
|
|
111
141
|
export async function sequenceSpaceTopMenu(table: DG.DataFrame, macroMolecule: DG.Column, methodName: string,
|
|
112
142
|
similarityMetric: string = 'Levenshtein', plotEmbeddings: boolean): Promise<void> {
|
|
143
|
+
if (!checkInputColumn(macroMolecule, 'Activity Cliffs'))
|
|
144
|
+
return;
|
|
145
|
+
|
|
113
146
|
const embedColsNames = getEmbeddingColsNames(table);
|
|
114
147
|
const chemSpaceParams = {
|
|
115
148
|
seqCol: macroMolecule,
|
|
@@ -133,22 +166,40 @@ export async function sequenceSpaceTopMenu(table: DG.DataFrame, macroMolecule: D
|
|
|
133
166
|
//name: To Atomic Level
|
|
134
167
|
//description: returns molfiles for each monomer from HELM library
|
|
135
168
|
//input: dataframe df [Input data table]
|
|
136
|
-
//input: column
|
|
137
|
-
export async function toAtomicLevel(df: DG.DataFrame,
|
|
138
|
-
if (DG.Func.find({package: 'Chem', name: 'getRdKitModule'})) {
|
|
169
|
+
//input: column macroMolecule {semType: Macromolecule}
|
|
170
|
+
export async function toAtomicLevel(df: DG.DataFrame, macroMolecule: DG.Column): Promise<void> {
|
|
171
|
+
if (DG.Func.find({package: 'Chem', name: 'getRdKitModule'}).length === 0) {
|
|
139
172
|
grok.shell.warning('Transformation to atomic level requires package "Chem" installed.');
|
|
140
173
|
return;
|
|
141
174
|
}
|
|
175
|
+
if (!checkInputColumn(macroMolecule, 'To Atomic Level'))
|
|
176
|
+
return;
|
|
142
177
|
|
|
178
|
+
let currentView: TableView;
|
|
179
|
+
for (let view of grok.shell.tableViews) {
|
|
180
|
+
if (df.name === view.name) {
|
|
181
|
+
currentView = view;
|
|
182
|
+
}
|
|
183
|
+
}
|
|
184
|
+
const file = await _package.files.readAsText('tests/sar-small.csv');
|
|
185
|
+
const df2 = DG.DataFrame.fromCsv(file);
|
|
186
|
+
const v = grok.shell.addTableView(df2);
|
|
187
|
+
setTimeout(()=> {
|
|
188
|
+
grok.shell.closeTable(df2);
|
|
189
|
+
v.close();
|
|
190
|
+
grok.shell.v = currentView;
|
|
191
|
+
}, 100);
|
|
192
|
+
|
|
143
193
|
const monomersLibFile = await _package.files.readAsText(HELM_CORE_LIB_FILENAME);
|
|
144
194
|
const monomersLibObject: any[] = JSON.parse(monomersLibFile);
|
|
145
|
-
const atomicCodes = getMolfilesFromSeq(
|
|
195
|
+
const atomicCodes = getMolfilesFromSeq(macroMolecule, monomersLibObject);
|
|
146
196
|
const result = await getMacroMol(atomicCodes!);
|
|
147
197
|
|
|
148
198
|
const col = DG.Column.fromStrings('regenerated', result);
|
|
149
199
|
col.semType = DG.SEMTYPE.MOLECULE;
|
|
150
200
|
col.tags[DG.TAGS.UNITS] = 'molblock';
|
|
151
|
-
df.columns.add(col);
|
|
201
|
+
df.columns.add(col, true);
|
|
202
|
+
|
|
152
203
|
}
|
|
153
204
|
|
|
154
205
|
|
|
@@ -158,21 +209,8 @@ export async function toAtomicLevel(df: DG.DataFrame, sequence: DG.Column): Prom
|
|
|
158
209
|
//input: column sequence { semType: Macromolecule }
|
|
159
210
|
//output: column result
|
|
160
211
|
export async function multipleSequenceAlignmentAny(table: DG.DataFrame, col: DG.Column): Promise<DG.Column | null> {
|
|
161
|
-
if (col
|
|
162
|
-
grok.shell.warning(`MSA analysis is allowed for semantic type "${DG.SEMTYPE.MACROMOLECULE}" data only.`);
|
|
212
|
+
if (!checkInputColumn(col, 'MSA', ['fasta'], ['DNA', 'RNA', 'PT']))
|
|
163
213
|
return null;
|
|
164
|
-
}
|
|
165
|
-
const units: string = col.getTag(DG.TAGS.UNITS);
|
|
166
|
-
const allowedAlphabets = ['DNA', 'RNA', 'PT'];
|
|
167
|
-
const allowedNotations = ['fasta'];
|
|
168
|
-
if (!allowedAlphabets.some((a) => units.toUpperCase().endsWith(a.toUpperCase())) ||
|
|
169
|
-
!allowedNotations.some((n) => units.toUpperCase().startsWith(n.toUpperCase()))) {
|
|
170
|
-
grok.shell.warning('MSA analysis is allowed for ' +
|
|
171
|
-
`notation${allowedNotations.length > 1 ? 's' : ''} ${allowedNotations.map((n) => `"${n}"`).join(', ')} ` +
|
|
172
|
-
'and ' +
|
|
173
|
-
`alphabet${allowedAlphabets.length > 1 ? 's' : ''} ${allowedAlphabets.map((a) => `"${a}"`).join(', ')}.`);
|
|
174
|
-
return null;
|
|
175
|
-
}
|
|
176
214
|
|
|
177
215
|
const msaCol = await runKalign(col, false);
|
|
178
216
|
table.columns.add(msaCol);
|
|
@@ -199,6 +237,9 @@ export async function compositionAnalysis(): Promise<void> {
|
|
|
199
237
|
return;
|
|
200
238
|
}
|
|
201
239
|
|
|
240
|
+
if (!checkInputColumn(col, 'Composition'))
|
|
241
|
+
return;
|
|
242
|
+
|
|
202
243
|
const allowedNotations: string[] = ['fasta', 'separator'];
|
|
203
244
|
const units = col.getTag(DG.TAGS.UNITS);
|
|
204
245
|
if (!allowedNotations.some((n) => units.toUpperCase().startsWith(n.toUpperCase()))) {
|
|
@@ -224,7 +265,7 @@ function parseMacromolecule(
|
|
|
224
265
|
//name: importFasta
|
|
225
266
|
//description: Opens FASTA file
|
|
226
267
|
//tags: file-handler
|
|
227
|
-
//meta.ext: fasta, fna, ffn, faa, frn, fa
|
|
268
|
+
//meta.ext: fasta, fna, ffn, faa, frn, fa, fst
|
|
228
269
|
//input: string fileContent
|
|
229
270
|
//output: list tables
|
|
230
271
|
export function importFasta(fileContent: string): DG.DataFrame [] {
|
|
@@ -244,13 +285,34 @@ export function importFasta(fileContent: string): DG.DataFrame [] {
|
|
|
244
285
|
const descriptionsArrayCol = DG.Column.fromStrings('description', descriptionsArray);
|
|
245
286
|
const sequenceCol = DG.Column.fromStrings('sequence', sequencesArray);
|
|
246
287
|
sequenceCol.semType = 'Macromolecule';
|
|
247
|
-
|
|
248
288
|
const stats: SeqColStats = WebLogo.getStats(sequenceCol, 5, WebLogo.splitterAsFasta);
|
|
249
289
|
const seqType = stats.sameLength ? 'SEQ.MSA' : 'SEQ';
|
|
290
|
+
|
|
291
|
+
const PeptideFastaAlphabet = new Set([
|
|
292
|
+
'G', 'L', 'Y', 'S', 'E', 'Q', 'D', 'N', 'F', 'A',
|
|
293
|
+
'K', 'R', 'H', 'C', 'V', 'P', 'W', 'I', 'M', 'T',
|
|
294
|
+
]);
|
|
295
|
+
|
|
296
|
+
const DnaFastaAlphabet = new Set(['A', 'C', 'G', 'T']);
|
|
297
|
+
|
|
298
|
+
const RnaFastaAlphabet = new Set(['A', 'C', 'G', 'U']);
|
|
299
|
+
|
|
300
|
+
//const SmilesRawAlphabet = new Set([
|
|
301
|
+
// 'O', 'C', 'c', 'N', 'S', 'F', '(', ')',
|
|
302
|
+
// '1', '2', '3', '4', '5', '6', '7',
|
|
303
|
+
// '+', '-', '@', '[', ']', '/', '\\', '#', '=']);
|
|
304
|
+
|
|
250
305
|
const alphabetCandidates: [string, Set<string>][] = [
|
|
251
|
-
['
|
|
252
|
-
['
|
|
306
|
+
['PT', PeptideFastaAlphabet],
|
|
307
|
+
['DNA', DnaFastaAlphabet],
|
|
308
|
+
['RNA', RnaFastaAlphabet],
|
|
253
309
|
];
|
|
310
|
+
|
|
311
|
+
//const alphabetCandidates: [string, Set<string>][] = [
|
|
312
|
+
// ['NT', new Set(Object.keys(Nucleotides.Names))],
|
|
313
|
+
// ['PT', new Set(Object.keys(Aminoacids.Names))],
|
|
314
|
+
//];
|
|
315
|
+
|
|
254
316
|
// Calculate likelihoods for alphabet_candidates
|
|
255
317
|
const alphabetCandidatesSim: number[] = alphabetCandidates.map(
|
|
256
318
|
(c) => WebLogo.getAlphabetSimilarity(stats.freq, c[1]));
|
|
@@ -22,6 +22,10 @@ category('converters', () => {
|
|
|
22
22
|
fastaRna = 'fastaRna',
|
|
23
23
|
separatorRna = 'separatorRna',
|
|
24
24
|
helmRna = 'helmRna',
|
|
25
|
+
|
|
26
|
+
fastaGaps = 'fastaGaps',
|
|
27
|
+
separatorGaps = 'separatorGaps',
|
|
28
|
+
helmGaps = 'helmGaps'
|
|
25
29
|
}
|
|
26
30
|
|
|
27
31
|
const _csvTxts: { [key: string]: string } = {
|
|
@@ -44,7 +48,7 @@ PEPTIDE1{M.K.P.S.E.Y.V}$$$
|
|
|
44
48
|
ACGTC
|
|
45
49
|
CAGTGT
|
|
46
50
|
TTCAAC
|
|
47
|
-
|
|
51
|
+
`,
|
|
48
52
|
separatorDna: `seq
|
|
49
53
|
A/C/G/T/C
|
|
50
54
|
C/A/G/T/G/T
|
|
@@ -59,7 +63,7 @@ DNA1{D(T)P.D(T)P.D(C)P.D(A)P.D(A)P.D(C)P}$$$
|
|
|
59
63
|
ACGUC
|
|
60
64
|
CAGUGU
|
|
61
65
|
UUCAAC
|
|
62
|
-
|
|
66
|
+
`,
|
|
63
67
|
separatorRna: `seq
|
|
64
68
|
A*C*G*U*C
|
|
65
69
|
C*A*G*U*G*U
|
|
@@ -69,6 +73,24 @@ U*U*C*A*A*C
|
|
|
69
73
|
RNA1{R(A)P.R(C)P.R(G)P.R(U)P.R(C)P}$$$
|
|
70
74
|
RNA1{R(C)P.R(A)P.R(G)P.R(U)P.R(G)P.R(U)P}$$$
|
|
71
75
|
RNA1{R(U)P.R(U)P.R(C)P.R(A)P.R(A)P.R(C)P}$$$
|
|
76
|
+
`,
|
|
77
|
+
|
|
78
|
+
fastaGaps: `seq
|
|
79
|
+
FW-PH-EY
|
|
80
|
+
-YNRQWYV-
|
|
81
|
+
MKP---SEYV
|
|
82
|
+
`,
|
|
83
|
+
|
|
84
|
+
separatorGaps: `seq
|
|
85
|
+
F/W//P/H//E/Y
|
|
86
|
+
//Y/N/R/Q/W/Y/V//
|
|
87
|
+
M/K/P////S/E/Y/V
|
|
88
|
+
`,
|
|
89
|
+
|
|
90
|
+
helmGaps: `seq
|
|
91
|
+
PEPTIDE1{F.W.*.P.H.*.E.Y}$$$
|
|
92
|
+
PEPTIDE1{*.Y.N.R.Q.W.Y.V.*}$$$
|
|
93
|
+
PEPTIDE1{M.K.P.*.*.*.S.E.Y.V}$$$
|
|
72
94
|
`,
|
|
73
95
|
};
|
|
74
96
|
|
|
@@ -90,10 +112,10 @@ RNA1{R(U)P.R(U)P.R(C)P.R(A)P.R(A)P.R(C)P}$$$
|
|
|
90
112
|
return _csvDfs[key];
|
|
91
113
|
};
|
|
92
114
|
|
|
93
|
-
function converter(tgtNotation: NOTATION,
|
|
115
|
+
function converter(tgtNotation: NOTATION, tgtSeparator: string | null = null): ConverterFunc {
|
|
94
116
|
return function(srcCol: DG.Column): DG.Column {
|
|
95
117
|
const converter = new NotationConverter(srcCol);
|
|
96
|
-
const resCol = converter.convert(
|
|
118
|
+
const resCol = converter.convert(tgtNotation, tgtSeparator);
|
|
97
119
|
return resCol;
|
|
98
120
|
};
|
|
99
121
|
};
|
|
@@ -110,6 +132,7 @@ RNA1{R(U)P.R(U)P.R(C)P.R(A)P.R(A)P.R(C)P}$$$
|
|
|
110
132
|
expectArray(resCol.toList(), tgtCol.toList());
|
|
111
133
|
}
|
|
112
134
|
|
|
135
|
+
// FASTA tests
|
|
113
136
|
test('testFastaPtToSeparator', async () => {
|
|
114
137
|
await _testConvert(Samples.fastaPt, converter(NOTATION.SEPARATOR, '-'), Samples.separatorPt);
|
|
115
138
|
});
|
|
@@ -127,26 +150,44 @@ RNA1{R(U)P.R(U)P.R(C)P.R(A)P.R(A)P.R(C)P}$$$
|
|
|
127
150
|
await _testConvert(Samples.fastaDna, converter(NOTATION.HELM), Samples.helmDna);
|
|
128
151
|
});
|
|
129
152
|
test('testFastaRnaToHelm', async () => {
|
|
130
|
-
await _testConvert(Samples.
|
|
153
|
+
await _testConvert(Samples.fastaRna, converter(NOTATION.HELM), Samples.helmRna);
|
|
131
154
|
});
|
|
132
155
|
|
|
156
|
+
test('testFastaGapsToSeparator', async () => {
|
|
157
|
+
await _testConvert(Samples.fastaGaps, converter(NOTATION.SEPARATOR, '/'), Samples.separatorGaps);
|
|
158
|
+
});
|
|
159
|
+
test('testFastaGapsToHelm', async () => {
|
|
160
|
+
await _testConvert(Samples.fastaGaps, converter(NOTATION.SEPARATOR), Samples.helmGaps);
|
|
161
|
+
});
|
|
162
|
+
|
|
163
|
+
|
|
164
|
+
// SEPARATOR tests
|
|
133
165
|
test('testSeparatorPtToFasta', async () => {
|
|
134
166
|
await _testConvert(Samples.separatorPt, converter(NOTATION.FASTA), Samples.fastaPt);
|
|
135
167
|
});
|
|
136
168
|
test('testSeparatorDnaToFasta', async () => {
|
|
137
169
|
await _testConvert(Samples.separatorDna, converter(NOTATION.FASTA), Samples.fastaDna);
|
|
138
170
|
});
|
|
139
|
-
test('
|
|
171
|
+
test('testSeparatorRnaToFasta', async () => {
|
|
140
172
|
await _testConvert(Samples.separatorRna, converter(NOTATION.FASTA), Samples.fastaRna);
|
|
141
173
|
});
|
|
142
174
|
|
|
143
175
|
test('testSeparatorPtToHelm', async () => {
|
|
144
|
-
await _testConvert(Samples.
|
|
176
|
+
await _testConvert(Samples.separatorPt, converter(NOTATION.HELM), Samples.helmPt);
|
|
145
177
|
});
|
|
146
178
|
test('testSeparatorDnaToHelm', async () => {
|
|
147
|
-
await _testConvert(Samples.
|
|
179
|
+
await _testConvert(Samples.separatorDna, converter(NOTATION.HELM), Samples.helmDna);
|
|
148
180
|
});
|
|
149
181
|
test('testSeparatorRnaToHelm', async () => {
|
|
150
182
|
await _testConvert(Samples.separatorRna, converter(NOTATION.HELM), Samples.helmRna);
|
|
151
183
|
});
|
|
184
|
+
test('testSeparatorGapsToFasta', async () => {
|
|
185
|
+
await _testConvert(Samples.separatorGaps, converter(NOTATION.FASTA), Samples.fastaGaps);
|
|
186
|
+
});
|
|
187
|
+
test('testSeparatorGapsToHelm', async () => {
|
|
188
|
+
await _testConvert(Samples.separatorGaps, converter(NOTATION.HELM), Samples.helmGaps);
|
|
189
|
+
});
|
|
190
|
+
|
|
191
|
+
|
|
192
|
+
// HELM tests: TODO
|
|
152
193
|
});
|
|
@@ -104,6 +104,7 @@ MWRSWY-CKHP
|
|
|
104
104
|
peptidesComplex = 'peptidesComplex',
|
|
105
105
|
fastaCsv = 'fastaCsv',
|
|
106
106
|
fastaFasta = 'fastaFasta',
|
|
107
|
+
fastaPtCsv = 'fastaPtCsv',
|
|
107
108
|
msaComplex = 'msaComplex',
|
|
108
109
|
helmCsv = 'helmCsv',
|
|
109
110
|
testDemogCsv = 'testDemogCsv',
|
|
@@ -114,16 +115,17 @@ MWRSWY-CKHP
|
|
|
114
115
|
}
|
|
115
116
|
|
|
116
117
|
const samples: { [key: string]: string } = {
|
|
117
|
-
'peptidesComplex': 'System:AppData/Bio/
|
|
118
|
+
'peptidesComplex': 'System:AppData/Bio/tests/peptides_complex_msa.csv',
|
|
118
119
|
'fastaCsv': 'System:AppData/Bio/samples/sample_FASTA.csv',
|
|
119
120
|
'fastaFasta': 'System:AppData/Bio/samples/sample_FASTA.fasta',
|
|
121
|
+
'fastaPtCsv': 'System:AppData/Bio/samples/sample_FASTA_PT.csv',
|
|
120
122
|
'msaComplex': 'System:AppData/Bio/samples/sample_MSA.csv',
|
|
121
123
|
'helmCsv': 'System:AppData/Bio/samples/sample_HELM.csv',
|
|
122
|
-
'testDemogCsv': 'System:AppData/Bio/
|
|
123
|
-
'testHelmCsv': 'System:AppData/Bio/
|
|
124
|
-
'testIdCsv': 'System:AppData/Bio/
|
|
125
|
-
'testSmilesCsv': 'System:AppData/Bio/
|
|
126
|
-
'testSmiles2Csv': 'System:AppData/Bio/
|
|
124
|
+
'testDemogCsv': 'System:AppData/Bio/tests/testDemog.csv',
|
|
125
|
+
'testHelmCsv': 'System:AppData/Bio/tests/testHelm.csv',
|
|
126
|
+
'testIdCsv': 'System:AppData/Bio/tests/id.csv',
|
|
127
|
+
'testSmilesCsv': 'System:AppData/Bio/tests/testSmiles.csv',
|
|
128
|
+
'testSmiles2Csv': 'System:AppData/Bio/tests/testSmiles2.csv',
|
|
127
129
|
};
|
|
128
130
|
|
|
129
131
|
const _samplesDfs: { [key: string]: Promise<DG.DataFrame> } = {};
|
|
@@ -294,6 +296,10 @@ MWRSWY-CKHP
|
|
|
294
296
|
test('samplesTestSmiles2NegativeSmiles', async () => {
|
|
295
297
|
await _testNeg(readSamples(Samples.testSmiles2Csv), 'SMILES');
|
|
296
298
|
});
|
|
299
|
+
|
|
300
|
+
test('samplesFastaPtPosSequence', async () => {
|
|
301
|
+
await (_testPos(readSamples(Samples.fastaPtCsv), 'sequence', 'fasta:SEQ:PT'));
|
|
302
|
+
});
|
|
297
303
|
});
|
|
298
304
|
|
|
299
305
|
export async function _testNeg(readDf: DfReaderFunc, colName: string) {
|
|
@@ -1,33 +1,36 @@
|
|
|
1
|
-
import * as C from
|
|
2
|
-
import {getSeparator} from "./misc";
|
|
3
|
-
import {ChemPalette} from "./chem-palette";
|
|
1
|
+
import * as C from './constants';
|
|
4
2
|
import * as DG from 'datagrok-api/dg';
|
|
5
|
-
import {AminoacidsPalettes} from
|
|
6
|
-
import {NucleotidesPalettes} from
|
|
7
|
-
import {UnknownSeqPalettes} from
|
|
8
|
-
import {SplitterFunc, WebLogo} from
|
|
9
|
-
import {SeqPalette} from
|
|
3
|
+
import {AminoacidsPalettes} from '@datagrok-libraries/bio/src/aminoacids';
|
|
4
|
+
import {NucleotidesPalettes} from '@datagrok-libraries/bio/src/nucleotides';
|
|
5
|
+
import {UnknownSeqPalettes} from '@datagrok-libraries/bio/src/unknown';
|
|
6
|
+
import {SplitterFunc, WebLogo} from '@datagrok-libraries/bio/src/viewers/web-logo';
|
|
7
|
+
import {SeqPalette} from '@datagrok-libraries/bio/src/seq-palettes';
|
|
10
8
|
import * as ui from 'datagrok-api/ui';
|
|
11
9
|
|
|
12
10
|
const lru = new DG.LruCache<any, any>();
|
|
11
|
+
const undefinedColor = 'rgb(100,100,100)';
|
|
13
12
|
|
|
14
|
-
function getPalleteByType(paletteType: string): SeqPalette
|
|
13
|
+
function getPalleteByType(paletteType: string): SeqPalette {
|
|
15
14
|
switch (paletteType) {
|
|
16
|
-
|
|
17
|
-
|
|
18
|
-
|
|
19
|
-
|
|
20
|
-
|
|
21
|
-
|
|
22
|
-
|
|
15
|
+
case 'PT':
|
|
16
|
+
return AminoacidsPalettes.GrokGroups;
|
|
17
|
+
case 'NT':
|
|
18
|
+
return NucleotidesPalettes.Chromatogram;
|
|
19
|
+
case 'DNA':
|
|
20
|
+
return NucleotidesPalettes.Chromatogram;
|
|
21
|
+
case 'RNA':
|
|
22
|
+
return NucleotidesPalettes.Chromatogram;
|
|
23
|
+
// other
|
|
24
|
+
default:
|
|
25
|
+
return UnknownSeqPalettes.Color;
|
|
23
26
|
}
|
|
24
27
|
}
|
|
25
28
|
|
|
26
29
|
export function processSequence(subParts: string[]): [string[], boolean] {
|
|
27
30
|
const simplified = !subParts.some((amino, index) =>
|
|
28
|
-
|
|
29
|
-
|
|
30
|
-
|
|
31
|
+
amino.length > 1 &&
|
|
32
|
+
index != 0 &&
|
|
33
|
+
index != subParts.length - 1);
|
|
31
34
|
|
|
32
35
|
const text: string[] = [];
|
|
33
36
|
const gap = simplified ? '' : ' ';
|
|
@@ -39,6 +42,7 @@ export function processSequence(subParts: string[]): [string[], boolean] {
|
|
|
39
42
|
});
|
|
40
43
|
return [text, simplified];
|
|
41
44
|
}
|
|
45
|
+
|
|
42
46
|
/**
|
|
43
47
|
* A function that prints a string aligned to left or centered.
|
|
44
48
|
*
|
|
@@ -48,21 +52,25 @@ export function processSequence(subParts: string[]): [string[], boolean] {
|
|
|
48
52
|
* @param {number} h Height.
|
|
49
53
|
* @param {CanvasRenderingContext2D} g Canvas rendering context.
|
|
50
54
|
* @param {string} s String to print.
|
|
51
|
-
* @param {string} [color=
|
|
55
|
+
* @param {string} [color=undefinedColor] String color.
|
|
52
56
|
* @param {number} [pivot=0] Pirvot.
|
|
53
57
|
* @param {boolean} [left=false] Is left aligned.
|
|
54
|
-
* @param {boolean} [hideMod=false] Hide amino acid redidue modifications.
|
|
55
58
|
* @param {number} [transparencyRate=0.0] Transparency rate where 1.0 is fully transparent
|
|
59
|
+
* @param {string} [separator=''] Is separator for sequence.
|
|
60
|
+
* @param {boolean} [last=false] Is checker if element last or not.
|
|
56
61
|
* @return {number} x coordinate to start printing at.
|
|
57
62
|
*/
|
|
58
63
|
function printLeftOrCentered(
|
|
59
|
-
|
|
60
|
-
|
|
61
|
-
|
|
62
|
-
|
|
64
|
+
x: number, y: number, w: number, h: number,
|
|
65
|
+
g: CanvasRenderingContext2D, s: string, color = undefinedColor,
|
|
66
|
+
pivot: number = 0, left = false, transparencyRate: number = 1.0,
|
|
67
|
+
separator: string = '', last: boolean = false): number {
|
|
63
68
|
g.textAlign = 'start';
|
|
64
|
-
|
|
69
|
+
const colorPart = s.substring(0);
|
|
65
70
|
let grayPart = separator;
|
|
71
|
+
if (last) {
|
|
72
|
+
grayPart = '';
|
|
73
|
+
}
|
|
66
74
|
const textSize = g.measureText(colorPart + grayPart);
|
|
67
75
|
const indent = 5;
|
|
68
76
|
|
|
@@ -87,54 +95,12 @@ function printLeftOrCentered(
|
|
|
87
95
|
return x + dx + colorTextSize.width;
|
|
88
96
|
}
|
|
89
97
|
}
|
|
90
|
-
function renderSequense(
|
|
91
|
-
g: CanvasRenderingContext2D, x: number, y: number, w: number, h: number, gridCell: DG.GridCell,
|
|
92
|
-
cellStyle: DG.GridCellStyle,
|
|
93
|
-
): void {
|
|
94
|
-
const grid = gridCell.grid;
|
|
95
|
-
const cell = gridCell.cell;
|
|
96
|
-
const [type, subtype, paletteType] = gridCell.cell.column.getTag(DG.TAGS.UNITS).split(":");
|
|
97
|
-
w = grid ? Math.min(grid.canvas.width - x, w) : g.canvas.width - x;
|
|
98
|
-
g.save();
|
|
99
|
-
g.beginPath();
|
|
100
|
-
g.rect(x, y, w, h);
|
|
101
|
-
g.clip();
|
|
102
|
-
g.font = '12px monospace';
|
|
103
|
-
g.textBaseline = 'top';
|
|
104
|
-
const s: string = cell.value ?? '';
|
|
105
|
-
|
|
106
|
-
//TODO: can this be replaced/merged with splitSequence?
|
|
107
|
-
const units = gridCell.cell.column.getTag(DG.TAGS.UNITS);
|
|
108
|
-
|
|
109
|
-
const palette = getPalleteByType(paletteType);
|
|
110
|
-
|
|
111
|
-
const separator = gridCell.cell.column.getTag('separator') ?? '';
|
|
112
|
-
const splitterFunc: SplitterFunc = WebLogo.getSplitter(units, gridCell.cell.column.getTag('separator') );// splitter,
|
|
113
|
-
|
|
114
|
-
const subParts:string[] = splitterFunc(cell.value);
|
|
115
|
-
|
|
116
|
-
const textSize = g.measureText(subParts.join(''));
|
|
117
|
-
let x1 = Math.max(x, x + (w - textSize.width) / 2);
|
|
118
|
-
|
|
119
|
-
subParts.forEach((amino, index) => {
|
|
120
|
-
let [color, outerAmino,, pivot] = ChemPalette.getColorAAPivot(amino);
|
|
121
|
-
color = palette.get(amino);
|
|
122
|
-
g.fillStyle = ChemPalette.undefinedColor;
|
|
123
|
-
x1 = printLeftOrCentered(x1, y, w, h, g, amino, color, pivot, true, false, 1.0, separator);
|
|
124
|
-
});
|
|
125
|
-
|
|
126
|
-
g.restore();
|
|
127
|
-
}
|
|
128
98
|
|
|
129
99
|
export class MacromoleculeSequenceCellRenderer extends DG.GridCellRenderer {
|
|
130
|
-
|
|
131
|
-
get
|
|
132
|
-
|
|
133
|
-
get
|
|
134
|
-
|
|
135
|
-
get defaultHeight(): number {return 30;}
|
|
136
|
-
|
|
137
|
-
get defaultWidth(): number {return 230;}
|
|
100
|
+
get name(): string { return 'macromoleculeSequence'; }
|
|
101
|
+
get cellType(): string { return C.SEM_TYPES.Macro_Molecule; }
|
|
102
|
+
get defaultHeight(): number { return 30; }
|
|
103
|
+
get defaultWidth(): number { return 230; }
|
|
138
104
|
|
|
139
105
|
/**
|
|
140
106
|
* Cell renderer function.
|
|
@@ -149,32 +115,32 @@ export class MacromoleculeSequenceCellRenderer extends DG.GridCellRenderer {
|
|
|
149
115
|
* @memberof AlignedSequenceCellRenderer
|
|
150
116
|
*/
|
|
151
117
|
render(
|
|
152
|
-
|
|
153
|
-
|
|
118
|
+
g: CanvasRenderingContext2D, x: number, y: number, w: number, h: number, gridCell: DG.GridCell,
|
|
119
|
+
cellStyle: DG.GridCellStyle
|
|
154
120
|
): void {
|
|
155
121
|
const grid = gridCell.grid;
|
|
156
122
|
const cell = gridCell.cell;
|
|
157
123
|
const tag = gridCell.cell.column.getTag(DG.TAGS.UNITS);
|
|
158
124
|
if (tag === 'HELM') {
|
|
159
|
-
|
|
125
|
+
const host = ui.div([], {style: {width: `${w}px`, height: `${h}px`}});
|
|
160
126
|
host.setAttribute('dataformat', 'helm');
|
|
161
127
|
host.setAttribute('data', gridCell.cell.value);
|
|
162
128
|
gridCell.element = host;
|
|
163
129
|
//@ts-ignore
|
|
164
|
-
|
|
165
|
-
|
|
130
|
+
const canvas = new JSDraw2.Editor(host, {width: w, height: h, skin: 'w8', viewonly: true});
|
|
131
|
+
const formula = canvas.getFormula(true);
|
|
166
132
|
if (!formula) {
|
|
167
133
|
gridCell.element = ui.divText(gridCell.cell.value, {style: {color: 'red'}});
|
|
168
134
|
} else {
|
|
169
135
|
gridCell.element = host;
|
|
170
|
-
|
|
171
|
-
|
|
172
|
-
|
|
173
|
-
|
|
136
|
+
const molWeight = Math.round(canvas.getMolWeight() * 100) / 100;
|
|
137
|
+
const coef = Math.round(canvas.getExtinctionCoefficient(true) * 100) / 100;
|
|
138
|
+
const molfile = canvas.getMolfile();
|
|
139
|
+
const result = formula + ', ' + molWeight + ', ' + coef + ', ' + molfile;
|
|
174
140
|
lru.set(gridCell.cell.value, result);
|
|
175
141
|
}
|
|
176
142
|
} else {
|
|
177
|
-
const [type, subtype, paletteType] =
|
|
143
|
+
const [type, subtype, paletteType] = gridCell.cell.column.getTag(DG.TAGS.UNITS).split(':');
|
|
178
144
|
w = grid ? Math.min(grid.canvas.width - x, w) : g.canvas.width - x;
|
|
179
145
|
g.save();
|
|
180
146
|
g.beginPath();
|
|
@@ -190,19 +156,20 @@ export class MacromoleculeSequenceCellRenderer extends DG.GridCellRenderer {
|
|
|
190
156
|
const palette = getPalleteByType(paletteType);
|
|
191
157
|
|
|
192
158
|
const separator = gridCell.cell.column.getTag('separator') ?? '';
|
|
193
|
-
const splitterFunc: SplitterFunc = WebLogo.getSplitter(units, gridCell.cell.column.getTag('separator')
|
|
159
|
+
const splitterFunc: SplitterFunc = WebLogo.getSplitter(units, gridCell.cell.column.getTag('separator'));
|
|
194
160
|
|
|
195
|
-
const subParts:string[] =
|
|
161
|
+
const subParts: string[] = splitterFunc(cell.value);
|
|
196
162
|
// console.log(subParts);
|
|
197
|
-
|
|
198
|
-
|
|
199
|
-
let x1 = Math.max(x, x + (w - textSize.width) / 2);
|
|
200
|
-
|
|
163
|
+
let x1 = x;
|
|
164
|
+
let color = undefinedColor;
|
|
201
165
|
subParts.forEach((amino, index) => {
|
|
202
|
-
let [color, outerAmino,, pivot] = ChemPalette.getColorAAPivot(amino);
|
|
203
166
|
color = palette.get(amino);
|
|
204
|
-
g.fillStyle =
|
|
205
|
-
|
|
167
|
+
g.fillStyle = undefinedColor;
|
|
168
|
+
let last = false;
|
|
169
|
+
if (index === subParts.length - 1) {
|
|
170
|
+
last = true;
|
|
171
|
+
}
|
|
172
|
+
x1 = printLeftOrCentered(x1, y, w, h, g, amino, color, 0, true, 1.0, separator, last);
|
|
206
173
|
});
|
|
207
174
|
|
|
208
175
|
g.restore();
|
package/src/utils/convert.ts
CHANGED
|
@@ -16,26 +16,22 @@ export function convert(col: DG.Column): void {
|
|
|
16
16
|
NOTATION.SEPARATOR,
|
|
17
17
|
NOTATION.HELM
|
|
18
18
|
];
|
|
19
|
-
const
|
|
20
|
-
const
|
|
19
|
+
const separatorArray = ['-', '.', '/'];
|
|
20
|
+
const filteredNotations = notations.filter((e) => e !== current);
|
|
21
|
+
const targetNotationInput = ui.choiceInput('Convert to', filteredNotations[0], filteredNotations);
|
|
21
22
|
|
|
22
|
-
const separatorInput = ui.choiceInput('separator',
|
|
23
|
+
const separatorInput = ui.choiceInput('Choose separator', separatorArray[0], separatorArray);
|
|
23
24
|
|
|
24
|
-
ui.dialog('Convert sequence')
|
|
25
|
+
ui.dialog('Convert sequence notation')
|
|
25
26
|
.add(ui.div([
|
|
26
|
-
ui.h1('
|
|
27
|
-
|
|
28
|
-
|
|
29
|
-
|
|
30
|
-
.add(ui.div([
|
|
31
|
-
ui.h1('Separator'),
|
|
32
|
-
separatorInput,
|
|
33
|
-
|
|
27
|
+
ui.h1('Current notation: ' + current),
|
|
28
|
+
targetNotationInput.root,
|
|
29
|
+
// TODO: conditional separator input
|
|
30
|
+
separatorInput.root
|
|
34
31
|
]))
|
|
35
32
|
.onOK(() => {
|
|
36
33
|
//TODO: create new converted column
|
|
37
|
-
|
|
38
|
-
const targetNotation: NOTATION = targetNotationInput.value as NOTATION;
|
|
34
|
+
const targetNotation = targetNotationInput.value as NOTATION;
|
|
39
35
|
const separator = separatorInput.value!;
|
|
40
36
|
const newColumn = converter.convert(targetNotation, separator);
|
|
41
37
|
col.dataFrame.columns.add(newColumn);
|