@datagrok/bio 1.5.8 → 1.6.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/detectors.js +24 -12
- package/dist/package-test.js +627 -500
- package/dist/package.js +385 -474
- package/files/samples/sample_FASTA.csv +0 -1
- package/files/samples/sample_FASTA.fasta +0 -3
- package/files/samples/testDemog.csv +5851 -0
- package/files/samples/testHelm.csv +6 -0
- package/files/samples/{id.csv → testId.csv} +0 -0
- package/files/samples/{sar-small.csv → testSmiles.csv} +0 -0
- package/files/samples/testSmiles2.csv +12248 -0
- package/package.json +2 -2
- package/src/package-test.ts +1 -0
- package/src/package.ts +86 -19
- package/src/tests/convert-test.ts +8 -8
- package/src/tests/detectors-test.ts +48 -6
- package/src/tests/renderer-test.ts +40 -18
- package/src/utils/cell-renderer.ts +24 -60
- package/src/utils/convert.ts +10 -14
- package/src/utils/multiple-sequence-alignment.ts +4 -2
- package/src/utils/notation-converter.ts +215 -55
- package/{test-Bio-34f75e5127b8-c4c5a3dc.html → test-Bio-34f75e5127b8-95c6fae9.html} +17 -20
- package/src/utils/chem-palette.ts +0 -280
- package/src/utils/misc.ts +0 -29
package/package.json
CHANGED
|
@@ -2,7 +2,7 @@
|
|
|
2
2
|
"name": "@datagrok/bio",
|
|
3
3
|
"beta": false,
|
|
4
4
|
"friendlyName": "Bio",
|
|
5
|
-
"version": "1.
|
|
5
|
+
"version": "1.6.0",
|
|
6
6
|
"description": "Bio is a [package](https://datagrok.ai/help/develop/develop#packages) for the [Datagrok](https://datagrok.ai) platform",
|
|
7
7
|
"repository": {
|
|
8
8
|
"type": "git",
|
|
@@ -11,7 +11,7 @@
|
|
|
11
11
|
},
|
|
12
12
|
"dependencies": {
|
|
13
13
|
"@biowasm/aioli": ">=2.4.0",
|
|
14
|
-
"@datagrok-libraries/bio": "^2.
|
|
14
|
+
"@datagrok-libraries/bio": "^2.5.0",
|
|
15
15
|
"@datagrok-libraries/utils": "^1.0.0",
|
|
16
16
|
"@datagrok-libraries/ml": "^2.0.9",
|
|
17
17
|
"cash-dom": "latest",
|
package/src/package-test.ts
CHANGED
package/src/package.ts
CHANGED
|
@@ -19,6 +19,8 @@ import {sequenceGetSimilarities, drawTooltip} from './utils/sequence-activity-cl
|
|
|
19
19
|
import {getMolfilesFromSeq, HELM_CORE_LIB_FILENAME} from './utils/utils';
|
|
20
20
|
import {getMacroMol} from './utils/atomic-works';
|
|
21
21
|
import {MacromoleculeSequenceCellRenderer} from './utils/cell-renderer';
|
|
22
|
+
import {Column} from 'datagrok-api/dg';
|
|
23
|
+
import {SEM_TYPES} from './utils/constants';
|
|
22
24
|
|
|
23
25
|
//tags: init
|
|
24
26
|
export async function initBio(): Promise<void> {
|
|
@@ -38,6 +40,29 @@ export function macromoleculeSequenceCellRenderer(): MacromoleculeSequenceCellRe
|
|
|
38
40
|
return new MacromoleculeSequenceCellRenderer();
|
|
39
41
|
}
|
|
40
42
|
|
|
43
|
+
function checkInputColumn(col: DG.Column, name: string,
|
|
44
|
+
allowedNotations: string[] = [], allowedAlphabets: string[] = []): boolean {
|
|
45
|
+
const units: string = col.getTag(DG.TAGS.UNITS);
|
|
46
|
+
if (col.semType !== DG.SEMTYPE.MACROMOLECULE) {
|
|
47
|
+
grok.shell.warning(name + ' analysis is allowed for Macromolecules semantic type');
|
|
48
|
+
return false;
|
|
49
|
+
} else if (
|
|
50
|
+
(allowedAlphabets.length > 0 &&
|
|
51
|
+
!allowedAlphabets.some((a) => units.toUpperCase().endsWith(a.toUpperCase()))) ||
|
|
52
|
+
(allowedNotations.length > 0 &&
|
|
53
|
+
!allowedNotations.some((n) => units.toUpperCase().startsWith(n.toUpperCase())))
|
|
54
|
+
) {
|
|
55
|
+
const notationAdd = allowedNotations.length == 0 ? 'any notation' :
|
|
56
|
+
(`notation${allowedNotations.length > 1 ? 's' : ''} ${allowedNotations.map((n) => `"${n}"`).join(', ')} `);
|
|
57
|
+
const alphabetAdd = allowedNotations.length == 0 ? 'any alphabet' :
|
|
58
|
+
(`alphabet${allowedAlphabets.length > 1 ? 's' : ''} ${allowedAlphabets.map((a) => `"${a}"`).join(', ')}.`);
|
|
59
|
+
|
|
60
|
+
grok.shell.warning(name + ' analysis is allowed for Macromolecules with ' + notationAdd + ' and ' + alphabetAdd);
|
|
61
|
+
return false;
|
|
62
|
+
}
|
|
63
|
+
|
|
64
|
+
return true;
|
|
65
|
+
}
|
|
41
66
|
|
|
42
67
|
//name: sequenceAlignment
|
|
43
68
|
//input: string alignType {choices: ['Local alignment', 'Global alignment']}
|
|
@@ -73,20 +98,23 @@ export function vdRegionViewer() {
|
|
|
73
98
|
//name: Sequence Activity Cliffs
|
|
74
99
|
//description: detect activity cliffs
|
|
75
100
|
//input: dataframe table [Input data table]
|
|
76
|
-
//input: column
|
|
101
|
+
//input: column macroMolecule {semType: Macromolecule}
|
|
77
102
|
//input: column activities
|
|
78
103
|
//input: double similarity = 80 [Similarity cutoff]
|
|
79
104
|
//input: string methodName { choices:["UMAP", "t-SNE", "SPE"] }
|
|
80
|
-
export async function activityCliffs(df: DG.DataFrame,
|
|
105
|
+
export async function activityCliffs(df: DG.DataFrame, macroMolecule: DG.Column, activities: DG.Column,
|
|
81
106
|
similarity: number, methodName: string): Promise<void> {
|
|
107
|
+
if (!checkInputColumn(macroMolecule, 'Activity Cliffs'))
|
|
108
|
+
return;
|
|
109
|
+
|
|
82
110
|
const axesNames = getEmbeddingColsNames(df);
|
|
83
111
|
const options = {
|
|
84
112
|
'SPE': {cycles: 2000, lambda: 1.0, dlambda: 0.0005},
|
|
85
113
|
};
|
|
86
|
-
const units =
|
|
114
|
+
const units = macroMolecule!.tags[DG.TAGS.UNITS];
|
|
87
115
|
await getActivityCliffs(
|
|
88
116
|
df,
|
|
89
|
-
|
|
117
|
+
macroMolecule,
|
|
90
118
|
axesNames,
|
|
91
119
|
'Activity cliffs',
|
|
92
120
|
activities,
|
|
@@ -110,6 +138,9 @@ export async function activityCliffs(df: DG.DataFrame, sequence: DG.Column, acti
|
|
|
110
138
|
//input: bool plotEmbeddings = true
|
|
111
139
|
export async function sequenceSpaceTopMenu(table: DG.DataFrame, macroMolecule: DG.Column, methodName: string,
|
|
112
140
|
similarityMetric: string = 'Levenshtein', plotEmbeddings: boolean): Promise<void> {
|
|
141
|
+
if (!checkInputColumn(macroMolecule, 'Activity Cliffs'))
|
|
142
|
+
return;
|
|
143
|
+
|
|
113
144
|
const embedColsNames = getEmbeddingColsNames(table);
|
|
114
145
|
const chemSpaceParams = {
|
|
115
146
|
seqCol: macroMolecule,
|
|
@@ -133,11 +164,18 @@ export async function sequenceSpaceTopMenu(table: DG.DataFrame, macroMolecule: D
|
|
|
133
164
|
//name: To Atomic Level
|
|
134
165
|
//description: returns molfiles for each monomer from HELM library
|
|
135
166
|
//input: dataframe df [Input data table]
|
|
136
|
-
//input: column
|
|
137
|
-
export async function toAtomicLevel(df: DG.DataFrame,
|
|
167
|
+
//input: column macroMolecule {semType: Macromolecule}
|
|
168
|
+
export async function toAtomicLevel(df: DG.DataFrame, macroMolecule: DG.Column): Promise<void> {
|
|
169
|
+
if (DG.Func.find({package: 'Chem', name: 'getRdKitModule'}).length === 0) {
|
|
170
|
+
grok.shell.warning('Transformation to atomic level requires package "Chem" installed.');
|
|
171
|
+
return;
|
|
172
|
+
}
|
|
173
|
+
if (!checkInputColumn(macroMolecule, 'To Atomic Level'))
|
|
174
|
+
return;
|
|
175
|
+
|
|
138
176
|
const monomersLibFile = await _package.files.readAsText(HELM_CORE_LIB_FILENAME);
|
|
139
177
|
const monomersLibObject: any[] = JSON.parse(monomersLibFile);
|
|
140
|
-
const atomicCodes = getMolfilesFromSeq(
|
|
178
|
+
const atomicCodes = getMolfilesFromSeq(macroMolecule, monomersLibObject);
|
|
141
179
|
const result = await getMacroMol(atomicCodes!);
|
|
142
180
|
|
|
143
181
|
const col = DG.Column.fromStrings('regenerated', result);
|
|
@@ -152,7 +190,10 @@ export async function toAtomicLevel(df: DG.DataFrame, sequence: DG.Column): Prom
|
|
|
152
190
|
//input: dataframe table
|
|
153
191
|
//input: column sequence { semType: Macromolecule }
|
|
154
192
|
//output: column result
|
|
155
|
-
export async function multipleSequenceAlignmentAny(table: DG.DataFrame, col: DG.Column): Promise<DG.Column> {
|
|
193
|
+
export async function multipleSequenceAlignmentAny(table: DG.DataFrame, col: DG.Column): Promise<DG.Column | null> {
|
|
194
|
+
if (!checkInputColumn(col, 'MSA', ['fasta'], ['DNA', 'RNA', 'PT']))
|
|
195
|
+
return null;
|
|
196
|
+
|
|
156
197
|
const msaCol = await runKalign(col, false);
|
|
157
198
|
table.columns.add(msaCol);
|
|
158
199
|
|
|
@@ -171,19 +212,24 @@ export async function compositionAnalysis(): Promise<void> {
|
|
|
171
212
|
// Higher priority for columns with MSA data to show with WebLogo.
|
|
172
213
|
const tv = grok.shell.tv;
|
|
173
214
|
const df = tv.dataFrame;
|
|
174
|
-
const semTypeColList = df.columns.bySemTypeAll(DG.SEMTYPE.MACROMOLECULE);
|
|
175
|
-
let col: DG.Column | undefined = semTypeColList.find((col) => {
|
|
176
|
-
const units = col.getTag(DG.TAGS.UNITS);
|
|
177
|
-
return units ? units.indexOf('MSA') !== -1 : false;
|
|
178
|
-
});
|
|
179
|
-
if (!col)
|
|
180
|
-
col = semTypeColList[0];
|
|
181
215
|
|
|
216
|
+
const col: DG.Column | null = WebLogo.pickUpSeqCol2(df);
|
|
182
217
|
if (!col) {
|
|
183
218
|
grok.shell.error('Current table does not contain sequences');
|
|
184
219
|
return;
|
|
185
220
|
}
|
|
186
221
|
|
|
222
|
+
if (!checkInputColumn(col, 'Composition'))
|
|
223
|
+
return;
|
|
224
|
+
|
|
225
|
+
const allowedNotations: string[] = ['fasta', 'separator'];
|
|
226
|
+
const units = col.getTag(DG.TAGS.UNITS);
|
|
227
|
+
if (!allowedNotations.some((n) => units.toUpperCase().startsWith(n.toUpperCase()))) {
|
|
228
|
+
grok.shell.warning('Composition analysis is allowed for ' +
|
|
229
|
+
`notation${allowedNotations.length > 1 ? 's' : ''} ${allowedNotations.map((n) => `"${n}"`).join(', ')}.`);
|
|
230
|
+
return;
|
|
231
|
+
}
|
|
232
|
+
|
|
187
233
|
tv.addViewer('WebLogo', {sequenceColumnName: col.name});
|
|
188
234
|
}
|
|
189
235
|
|
|
@@ -201,7 +247,7 @@ function parseMacromolecule(
|
|
|
201
247
|
//name: importFasta
|
|
202
248
|
//description: Opens FASTA file
|
|
203
249
|
//tags: file-handler
|
|
204
|
-
//meta.ext: fasta, fna, ffn, faa, frn, fa
|
|
250
|
+
//meta.ext: fasta, fna, ffn, faa, frn, fa, fst
|
|
205
251
|
//input: string fileContent
|
|
206
252
|
//output: list tables
|
|
207
253
|
export function importFasta(fileContent: string): DG.DataFrame [] {
|
|
@@ -221,13 +267,34 @@ export function importFasta(fileContent: string): DG.DataFrame [] {
|
|
|
221
267
|
const descriptionsArrayCol = DG.Column.fromStrings('description', descriptionsArray);
|
|
222
268
|
const sequenceCol = DG.Column.fromStrings('sequence', sequencesArray);
|
|
223
269
|
sequenceCol.semType = 'Macromolecule';
|
|
224
|
-
|
|
225
270
|
const stats: SeqColStats = WebLogo.getStats(sequenceCol, 5, WebLogo.splitterAsFasta);
|
|
226
271
|
const seqType = stats.sameLength ? 'SEQ.MSA' : 'SEQ';
|
|
272
|
+
|
|
273
|
+
const PeptideFastaAlphabet = new Set([
|
|
274
|
+
'G', 'L', 'Y', 'S', 'E', 'Q', 'D', 'N', 'F', 'A',
|
|
275
|
+
'K', 'R', 'H', 'C', 'V', 'P', 'W', 'I', 'M', 'T',
|
|
276
|
+
]);
|
|
277
|
+
|
|
278
|
+
const DnaFastaAlphabet = new Set(['A', 'C', 'G', 'T']);
|
|
279
|
+
|
|
280
|
+
const RnaFastaAlphabet = new Set(['A', 'C', 'G', 'U']);
|
|
281
|
+
|
|
282
|
+
//const SmilesRawAlphabet = new Set([
|
|
283
|
+
// 'O', 'C', 'c', 'N', 'S', 'F', '(', ')',
|
|
284
|
+
// '1', '2', '3', '4', '5', '6', '7',
|
|
285
|
+
// '+', '-', '@', '[', ']', '/', '\\', '#', '=']);
|
|
286
|
+
|
|
227
287
|
const alphabetCandidates: [string, Set<string>][] = [
|
|
228
|
-
['
|
|
229
|
-
['
|
|
288
|
+
['PT', PeptideFastaAlphabet],
|
|
289
|
+
['DNA', DnaFastaAlphabet],
|
|
290
|
+
['RNA', RnaFastaAlphabet],
|
|
230
291
|
];
|
|
292
|
+
|
|
293
|
+
//const alphabetCandidates: [string, Set<string>][] = [
|
|
294
|
+
// ['NT', new Set(Object.keys(Nucleotides.Names))],
|
|
295
|
+
// ['PT', new Set(Object.keys(Aminoacids.Names))],
|
|
296
|
+
//];
|
|
297
|
+
|
|
231
298
|
// Calculate likelihoods for alphabet_candidates
|
|
232
299
|
const alphabetCandidatesSim: number[] = alphabetCandidates.map(
|
|
233
300
|
(c) => WebLogo.getAlphabetSimilarity(stats.freq, c[1]));
|
|
@@ -44,7 +44,7 @@ PEPTIDE1{M.K.P.S.E.Y.V}$$$
|
|
|
44
44
|
ACGTC
|
|
45
45
|
CAGTGT
|
|
46
46
|
TTCAAC
|
|
47
|
-
|
|
47
|
+
`,
|
|
48
48
|
separatorDna: `seq
|
|
49
49
|
A/C/G/T/C
|
|
50
50
|
C/A/G/T/G/T
|
|
@@ -59,7 +59,7 @@ DNA1{D(T)P.D(T)P.D(C)P.D(A)P.D(A)P.D(C)P}$$$
|
|
|
59
59
|
ACGUC
|
|
60
60
|
CAGUGU
|
|
61
61
|
UUCAAC
|
|
62
|
-
|
|
62
|
+
`,
|
|
63
63
|
separatorRna: `seq
|
|
64
64
|
A*C*G*U*C
|
|
65
65
|
C*A*G*U*G*U
|
|
@@ -90,10 +90,10 @@ RNA1{R(U)P.R(U)P.R(C)P.R(A)P.R(A)P.R(C)P}$$$
|
|
|
90
90
|
return _csvDfs[key];
|
|
91
91
|
};
|
|
92
92
|
|
|
93
|
-
function converter(tgtNotation: NOTATION,
|
|
93
|
+
function converter(tgtNotation: NOTATION, tgtSeparator: string | null = null): ConverterFunc {
|
|
94
94
|
return function(srcCol: DG.Column): DG.Column {
|
|
95
95
|
const converter = new NotationConverter(srcCol);
|
|
96
|
-
const resCol = converter.convert(
|
|
96
|
+
const resCol = converter.convert(tgtNotation, tgtSeparator);
|
|
97
97
|
return resCol;
|
|
98
98
|
};
|
|
99
99
|
};
|
|
@@ -127,7 +127,7 @@ RNA1{R(U)P.R(U)P.R(C)P.R(A)P.R(A)P.R(C)P}$$$
|
|
|
127
127
|
await _testConvert(Samples.fastaDna, converter(NOTATION.HELM), Samples.helmDna);
|
|
128
128
|
});
|
|
129
129
|
test('testFastaRnaToHelm', async () => {
|
|
130
|
-
await _testConvert(Samples.
|
|
130
|
+
await _testConvert(Samples.fastaRna, converter(NOTATION.HELM), Samples.helmRna);
|
|
131
131
|
});
|
|
132
132
|
|
|
133
133
|
test('testSeparatorPtToFasta', async () => {
|
|
@@ -136,15 +136,15 @@ RNA1{R(U)P.R(U)P.R(C)P.R(A)P.R(A)P.R(C)P}$$$
|
|
|
136
136
|
test('testSeparatorDnaToFasta', async () => {
|
|
137
137
|
await _testConvert(Samples.separatorDna, converter(NOTATION.FASTA), Samples.fastaDna);
|
|
138
138
|
});
|
|
139
|
-
test('
|
|
139
|
+
test('testSeparatorRnaToFasta', async () => {
|
|
140
140
|
await _testConvert(Samples.separatorRna, converter(NOTATION.FASTA), Samples.fastaRna);
|
|
141
141
|
});
|
|
142
142
|
|
|
143
143
|
test('testSeparatorPtToHelm', async () => {
|
|
144
|
-
await _testConvert(Samples.
|
|
144
|
+
await _testConvert(Samples.separatorPt, converter(NOTATION.HELM), Samples.helmPt);
|
|
145
145
|
});
|
|
146
146
|
test('testSeparatorDnaToHelm', async () => {
|
|
147
|
-
await _testConvert(Samples.
|
|
147
|
+
await _testConvert(Samples.separatorDna, converter(NOTATION.HELM), Samples.helmDna);
|
|
148
148
|
});
|
|
149
149
|
test('testSeparatorRnaToHelm', async () => {
|
|
150
150
|
await _testConvert(Samples.separatorRna, converter(NOTATION.HELM), Samples.helmRna);
|
|
@@ -105,9 +105,12 @@ MWRSWY-CKHP
|
|
|
105
105
|
fastaCsv = 'fastaCsv',
|
|
106
106
|
fastaFasta = 'fastaFasta',
|
|
107
107
|
msaComplex = 'msaComplex',
|
|
108
|
-
idCsv = 'idCsv',
|
|
109
|
-
sarSmallCsv = 'sarSmallCsv',
|
|
110
108
|
helmCsv = 'helmCsv',
|
|
109
|
+
testDemogCsv = 'testDemogCsv',
|
|
110
|
+
testHelmCsv = 'testHelmCsv',
|
|
111
|
+
testIdCsv = 'testIdCsv',
|
|
112
|
+
testSmilesCsv = 'testSmilesCsv',
|
|
113
|
+
testSmiles2Csv = 'testSmiles2Csv',
|
|
111
114
|
}
|
|
112
115
|
|
|
113
116
|
const samples: { [key: string]: string } = {
|
|
@@ -115,9 +118,12 @@ MWRSWY-CKHP
|
|
|
115
118
|
'fastaCsv': 'System:AppData/Bio/samples/sample_FASTA.csv',
|
|
116
119
|
'fastaFasta': 'System:AppData/Bio/samples/sample_FASTA.fasta',
|
|
117
120
|
'msaComplex': 'System:AppData/Bio/samples/sample_MSA.csv',
|
|
118
|
-
'idCsv': 'System:AppData/Bio/samples/id.csv',
|
|
119
|
-
'sarSmallCsv': 'System:AppData/Bio/samples/sar-small.csv',
|
|
120
121
|
'helmCsv': 'System:AppData/Bio/samples/sample_HELM.csv',
|
|
122
|
+
'testDemogCsv': 'System:AppData/Bio/samples/testDemog.csv',
|
|
123
|
+
'testHelmCsv': 'System:AppData/Bio/samples/testHelm.csv',
|
|
124
|
+
'testIdCsv': 'System:AppData/Bio/samples/id.csv',
|
|
125
|
+
'testSmilesCsv': 'System:AppData/Bio/samples/testSmiles.csv',
|
|
126
|
+
'testSmiles2Csv': 'System:AppData/Bio/samples/testSmiles2.csv',
|
|
121
127
|
};
|
|
122
128
|
|
|
123
129
|
const _samplesDfs: { [key: string]: Promise<DG.DataFrame> } = {};
|
|
@@ -238,11 +244,11 @@ MWRSWY-CKHP
|
|
|
238
244
|
});
|
|
239
245
|
|
|
240
246
|
test('samplesIdCsvNegativeID', async () => {
|
|
241
|
-
await _testNeg(readSamples(Samples.
|
|
247
|
+
await _testNeg(readSamples(Samples.testIdCsv), 'ID');
|
|
242
248
|
});
|
|
243
249
|
|
|
244
250
|
test('samplesSarSmallCsvNegativeSmiles', async () => {
|
|
245
|
-
await _testNeg(readSamples(Samples.
|
|
251
|
+
await _testNeg(readSamples(Samples.testSmilesCsv), 'smiles');
|
|
246
252
|
});
|
|
247
253
|
|
|
248
254
|
test('samplesHelmCsvHELM', async () => {
|
|
@@ -252,6 +258,42 @@ MWRSWY-CKHP
|
|
|
252
258
|
test('samplesHelmCsvNegativeActivity', async () => {
|
|
253
259
|
await _testNeg(readSamples(Samples.helmCsv), 'Activity');
|
|
254
260
|
});
|
|
261
|
+
|
|
262
|
+
// sample_testHelm.csb
|
|
263
|
+
// columns: ID,Test type,HELM string,Valid?,Mol Weight,Mol Formula,SMILES
|
|
264
|
+
test('samplesTestHelmNegativeID', async () => {
|
|
265
|
+
await _testNeg(readSamples(Samples.testHelmCsv), 'ID');
|
|
266
|
+
});
|
|
267
|
+
test('samplesTestHelmNegativeTestType', async () => {
|
|
268
|
+
await _testNeg(readSamples(Samples.testHelmCsv), 'Test type');
|
|
269
|
+
});
|
|
270
|
+
test('samplesTestHelmPositiveHelmString', async () => {
|
|
271
|
+
await _testPos(readSamples(Samples.testHelmCsv), 'HELM string', 'HELM');
|
|
272
|
+
});
|
|
273
|
+
test('samplesTestHelmNegativeValid', async () => {
|
|
274
|
+
await _testNeg(readSamples(Samples.testHelmCsv), 'Valid?');
|
|
275
|
+
});
|
|
276
|
+
test('samplesTestHelmNegativeMolWeight', async () => {
|
|
277
|
+
await _testNeg(readSamples(Samples.testHelmCsv), 'Mol Weight');
|
|
278
|
+
});
|
|
279
|
+
test('samplesTestHelmNegativeMolFormula', async () => {
|
|
280
|
+
await _testNeg(readSamples(Samples.testHelmCsv), 'Mol Formula');
|
|
281
|
+
});
|
|
282
|
+
test('samplesTestHelmNegativeSmiles', async () => {
|
|
283
|
+
await _testNeg(readSamples(Samples.testHelmCsv), 'Smiles');
|
|
284
|
+
});
|
|
285
|
+
|
|
286
|
+
test('samplesTestDemogNegativeAll', async () => {
|
|
287
|
+
const dfFunc: DfReaderFunc = readSamples(Samples.testDemogCsv);
|
|
288
|
+
const df: DG.DataFrame = await dfFunc();
|
|
289
|
+
|
|
290
|
+
for (const col of df.columns.toList())
|
|
291
|
+
await _testNeg(dfFunc, col.name);
|
|
292
|
+
});
|
|
293
|
+
|
|
294
|
+
test('samplesTestSmiles2NegativeSmiles', async () => {
|
|
295
|
+
await _testNeg(readSamples(Samples.testSmiles2Csv), 'SMILES');
|
|
296
|
+
});
|
|
255
297
|
});
|
|
256
298
|
|
|
257
299
|
export async function _testNeg(readDf: DfReaderFunc, colName: string) {
|
|
@@ -6,31 +6,53 @@ import * as DG from 'datagrok-api/dg';
|
|
|
6
6
|
import {importFasta, multipleSequenceAlignmentAny} from '../package';
|
|
7
7
|
|
|
8
8
|
category('renderers', () => {
|
|
9
|
+
let tvList: DG.TableView[];
|
|
10
|
+
|
|
11
|
+
before(async () => {
|
|
12
|
+
tvList = [];
|
|
13
|
+
});
|
|
14
|
+
|
|
15
|
+
after(async () => {
|
|
16
|
+
tvList.forEach((tv: DG.TableView) => tv.close());
|
|
17
|
+
});
|
|
18
|
+
|
|
9
19
|
test('afterMsa', async () => {
|
|
10
20
|
await _testAfterMsa();
|
|
11
21
|
});
|
|
12
|
-
});
|
|
13
22
|
|
|
14
|
-
|
|
15
|
-
|
|
16
|
-
|
|
23
|
+
async function _testAfterMsa() {
|
|
24
|
+
const fastaTxt: string = await grok.dapi.files.readAsText('System:AppData/Bio/samples/sample_FASTA.fasta');
|
|
25
|
+
const df: DG.DataFrame = importFasta(fastaTxt)[0];
|
|
26
|
+
|
|
27
|
+
const srcSeqCol: DG.Column | null = df.col('sequence');
|
|
28
|
+
expect(srcSeqCol !== null, true);
|
|
29
|
+
console.log('Bio: tests/renderers/afterMsa, src data loaded');
|
|
17
30
|
|
|
18
|
-
|
|
19
|
-
|
|
31
|
+
const tv: DG.TableView = grok.shell.addTableView(df);
|
|
32
|
+
console.log('Bio: tests/renderers/afterMsa, table view');
|
|
20
33
|
|
|
21
|
-
|
|
22
|
-
|
|
34
|
+
await grok.data.detectSemanticTypes(df);
|
|
35
|
+
console.log('Bio: tests/renderers/afterMsa, detectSemanticTypes');
|
|
23
36
|
|
|
24
|
-
|
|
25
|
-
|
|
26
|
-
|
|
37
|
+
console.log('Bio: tests/renderers/afterMsa, src before test semType' +
|
|
38
|
+
`semType="${srcSeqCol!.semType}", units="${srcSeqCol!.getTag(DG.TAGS.UNITS)}", ` +
|
|
39
|
+
`cell.renderer="${srcSeqCol!.getTag('cell.renderer')}"`);
|
|
40
|
+
expect(srcSeqCol!.semType, DG.SEMTYPE.MACROMOLECULE);
|
|
41
|
+
expect(srcSeqCol!.getTag(DG.TAGS.UNITS), 'fasta:SEQ:PT');
|
|
42
|
+
expect(srcSeqCol!.getTag('cell.renderer'), 'Macromolecule');
|
|
43
|
+
console.log('Bio: tests/renderers/afterMsa, src semType tested');
|
|
27
44
|
|
|
28
|
-
|
|
29
|
-
|
|
45
|
+
const msaSeqCol: DG.Column | null = await multipleSequenceAlignmentAny(df, srcSeqCol!);
|
|
46
|
+
console.log('Bio: tests/renderers/afterMsa, msaSeqCol created');
|
|
30
47
|
|
|
31
|
-
|
|
32
|
-
|
|
33
|
-
expect(seqMsaCol!.getTag('cell.renderer'), 'Macromolecule');
|
|
48
|
+
tv.grid.invalidate();
|
|
49
|
+
console.log('Bio: tests/renderers/afterMsa, tv.grid invalidated');
|
|
34
50
|
|
|
35
|
-
|
|
36
|
-
|
|
51
|
+
expect(msaSeqCol!.semType, DG.SEMTYPE.MACROMOLECULE);
|
|
52
|
+
expect(msaSeqCol!.getTag(DG.TAGS.UNITS), 'fasta:SEQ.MSA:PT');
|
|
53
|
+
expect(msaSeqCol!.getTag('cell.renderer'), 'Macromolecule');
|
|
54
|
+
console.log('Bio: tests/renderers/afterMsa, msa semType tested');
|
|
55
|
+
|
|
56
|
+
tvList.push(tv);
|
|
57
|
+
}
|
|
58
|
+
});
|
|
@@ -1,6 +1,4 @@
|
|
|
1
1
|
import * as C from "./constants";
|
|
2
|
-
import {getSeparator} from "./misc";
|
|
3
|
-
import {ChemPalette} from "./chem-palette";
|
|
4
2
|
import * as DG from 'datagrok-api/dg';
|
|
5
3
|
import {AminoacidsPalettes} from "@datagrok-libraries/bio/src/aminoacids";
|
|
6
4
|
import {NucleotidesPalettes} from "@datagrok-libraries/bio/src/nucleotides";
|
|
@@ -9,7 +7,8 @@ import {SplitterFunc, WebLogo} from "@datagrok-libraries/bio/src/viewers/web-log
|
|
|
9
7
|
import {SeqPalette} from "@datagrok-libraries/bio/src/seq-palettes";
|
|
10
8
|
import * as ui from 'datagrok-api/ui';
|
|
11
9
|
|
|
12
|
-
const lru = new DG.LruCache<any, any>();
|
|
10
|
+
const lru = new DG.LruCache<any, any>();
|
|
11
|
+
const undefinedColor = 'rgb(100,100,100)';
|
|
13
12
|
|
|
14
13
|
function getPalleteByType(paletteType: string): SeqPalette {
|
|
15
14
|
switch (paletteType) {
|
|
@@ -48,7 +47,7 @@ export function processSequence(subParts: string[]): [string[], boolean] {
|
|
|
48
47
|
* @param {number} h Height.
|
|
49
48
|
* @param {CanvasRenderingContext2D} g Canvas rendering context.
|
|
50
49
|
* @param {string} s String to print.
|
|
51
|
-
* @param {string} [color=
|
|
50
|
+
* @param {string} [color=undefinedColor] String color.
|
|
52
51
|
* @param {number} [pivot=0] Pirvot.
|
|
53
52
|
* @param {boolean} [left=false] Is left aligned.
|
|
54
53
|
* @param {boolean} [hideMod=false] Hide amino acid redidue modifications.
|
|
@@ -57,12 +56,15 @@ export function processSequence(subParts: string[]): [string[], boolean] {
|
|
|
57
56
|
*/
|
|
58
57
|
function printLeftOrCentered(
|
|
59
58
|
x: number, y: number, w: number, h: number,
|
|
60
|
-
g: CanvasRenderingContext2D, s: string, color =
|
|
59
|
+
g: CanvasRenderingContext2D, s: string, color = undefinedColor,
|
|
61
60
|
pivot: number = 0, left = false, hideMod = false, transparencyRate: number = 1.0,
|
|
62
|
-
separator: string = ''): number {
|
|
61
|
+
separator: string = '', last:boolean = false): number {
|
|
63
62
|
g.textAlign = 'start';
|
|
64
63
|
let colorPart = s.substring(0);
|
|
65
64
|
let grayPart = separator;
|
|
65
|
+
if (last) {
|
|
66
|
+
grayPart = '';
|
|
67
|
+
}
|
|
66
68
|
const textSize = g.measureText(colorPart + grayPart);
|
|
67
69
|
const indent = 5;
|
|
68
70
|
|
|
@@ -87,49 +89,8 @@ function printLeftOrCentered(
|
|
|
87
89
|
return x + dx + colorTextSize.width;
|
|
88
90
|
}
|
|
89
91
|
}
|
|
90
|
-
function renderSequense(
|
|
91
|
-
g: CanvasRenderingContext2D, x: number, y: number, w: number, h: number, gridCell: DG.GridCell,
|
|
92
|
-
cellStyle: DG.GridCellStyle,
|
|
93
|
-
): void {
|
|
94
|
-
const grid = gridCell.grid;
|
|
95
|
-
const cell = gridCell.cell;
|
|
96
|
-
const [type, subtype, paletteType] = gridCell.cell.column.getTag(DG.TAGS.UNITS).split(":");
|
|
97
|
-
w = grid ? Math.min(grid.canvas.width - x, w) : g.canvas.width - x;
|
|
98
|
-
g.save();
|
|
99
|
-
g.beginPath();
|
|
100
|
-
g.rect(x, y, w, h);
|
|
101
|
-
g.clip();
|
|
102
|
-
g.font = '12px monospace';
|
|
103
|
-
g.textBaseline = 'top';
|
|
104
|
-
const s: string = cell.value ?? '';
|
|
105
|
-
|
|
106
|
-
//TODO: can this be replaced/merged with splitSequence?
|
|
107
|
-
const units = gridCell.cell.column.getTag(DG.TAGS.UNITS);
|
|
108
|
-
|
|
109
|
-
const palette = getPalleteByType(paletteType);
|
|
110
|
-
|
|
111
|
-
const separator = gridCell.cell.column.getTag('separator') ?? '';
|
|
112
|
-
const splitterFunc: SplitterFunc = WebLogo.getSplitter(units, gridCell.cell.column.getTag('separator') );// splitter,
|
|
113
|
-
|
|
114
|
-
const subParts:string[] = splitterFunc(cell.value);
|
|
115
|
-
|
|
116
|
-
const textSize = g.measureText(subParts.join(''));
|
|
117
|
-
let x1 = Math.max(x, x + (w - textSize.width) / 2);
|
|
118
|
-
|
|
119
|
-
subParts.forEach((amino, index) => {
|
|
120
|
-
let [color, outerAmino,, pivot] = ChemPalette.getColorAAPivot(amino);
|
|
121
|
-
color = palette.get(amino);
|
|
122
|
-
g.fillStyle = ChemPalette.undefinedColor;
|
|
123
|
-
x1 = printLeftOrCentered(x1, y, w, h, g, amino, color, pivot, true, false, 1.0, separator);
|
|
124
|
-
});
|
|
125
|
-
|
|
126
|
-
g.restore();
|
|
127
|
-
}
|
|
128
92
|
|
|
129
93
|
export class MacromoleculeSequenceCellRenderer extends DG.GridCellRenderer {
|
|
130
|
-
constructor() {
|
|
131
|
-
super();
|
|
132
|
-
}
|
|
133
94
|
|
|
134
95
|
get name(): string {return 'macromoleculeSequence';}
|
|
135
96
|
|
|
@@ -162,7 +123,7 @@ export class MacromoleculeSequenceCellRenderer extends DG.GridCellRenderer {
|
|
|
162
123
|
let host = ui.div([], { style: { width: `${w}px`, height: `${h}px`}});
|
|
163
124
|
host.setAttribute('dataformat', 'helm');
|
|
164
125
|
host.setAttribute('data', gridCell.cell.value);
|
|
165
|
-
|
|
126
|
+
gridCell.element = host;
|
|
166
127
|
//@ts-ignore
|
|
167
128
|
var canvas = new JSDraw2.Editor(host, { width: w, height: h, skin: "w8", viewonly: true });
|
|
168
129
|
var formula = canvas.getFormula(true);
|
|
@@ -186,28 +147,31 @@ export class MacromoleculeSequenceCellRenderer extends DG.GridCellRenderer {
|
|
|
186
147
|
g.font = '12px monospace';
|
|
187
148
|
g.textBaseline = 'top';
|
|
188
149
|
const s: string = cell.value ?? '';
|
|
189
|
-
|
|
150
|
+
|
|
190
151
|
//TODO: can this be replaced/merged with splitSequence?
|
|
191
152
|
const units = gridCell.cell.column.getTag(DG.TAGS.UNITS);
|
|
192
|
-
|
|
153
|
+
|
|
193
154
|
const palette = getPalleteByType(paletteType);
|
|
194
|
-
|
|
155
|
+
|
|
195
156
|
const separator = gridCell.cell.column.getTag('separator') ?? '';
|
|
196
157
|
const splitterFunc: SplitterFunc = WebLogo.getSplitter(units, gridCell.cell.column.getTag('separator') );// splitter,
|
|
197
|
-
|
|
158
|
+
|
|
198
159
|
const subParts:string[] = splitterFunc(cell.value);
|
|
199
|
-
console.log(subParts);
|
|
200
|
-
|
|
160
|
+
// console.log(subParts);
|
|
161
|
+
|
|
201
162
|
const textSize = g.measureText(subParts.join(''));
|
|
202
|
-
let x1 =
|
|
203
|
-
|
|
163
|
+
let x1 = x;
|
|
164
|
+
let color = undefinedColor;
|
|
204
165
|
subParts.forEach((amino, index) => {
|
|
205
|
-
let [color, outerAmino,, pivot] = ChemPalette.getColorAAPivot(amino);
|
|
206
166
|
color = palette.get(amino);
|
|
207
|
-
g.fillStyle =
|
|
208
|
-
|
|
167
|
+
g.fillStyle = undefinedColor;
|
|
168
|
+
let last = false;
|
|
169
|
+
if (index === subParts.length - 1) {
|
|
170
|
+
last = true;
|
|
171
|
+
}
|
|
172
|
+
x1 = printLeftOrCentered(x1, y, w, h, g, amino, color, 0, true, false, 1.0, separator, last);
|
|
209
173
|
});
|
|
210
|
-
|
|
174
|
+
|
|
211
175
|
g.restore();
|
|
212
176
|
}
|
|
213
177
|
}
|
package/src/utils/convert.ts
CHANGED
|
@@ -16,26 +16,22 @@ export function convert(col: DG.Column): void {
|
|
|
16
16
|
NOTATION.SEPARATOR,
|
|
17
17
|
NOTATION.HELM
|
|
18
18
|
];
|
|
19
|
-
const
|
|
20
|
-
const
|
|
19
|
+
const separatorArray = ['-', '.', '/'];
|
|
20
|
+
const filteredNotations = notations.filter((e) => e !== current);
|
|
21
|
+
const targetNotationInput = ui.choiceInput('Convert to', filteredNotations[0], filteredNotations);
|
|
21
22
|
|
|
22
|
-
const separatorInput = ui.choiceInput('separator',
|
|
23
|
+
const separatorInput = ui.choiceInput('Choose separator', separatorArray[0], separatorArray);
|
|
23
24
|
|
|
24
|
-
ui.dialog('Convert sequence')
|
|
25
|
+
ui.dialog('Convert sequence notation')
|
|
25
26
|
.add(ui.div([
|
|
26
|
-
ui.h1('
|
|
27
|
-
|
|
28
|
-
|
|
29
|
-
|
|
30
|
-
.add(ui.div([
|
|
31
|
-
ui.h1('Separator'),
|
|
32
|
-
separatorInput,
|
|
33
|
-
|
|
27
|
+
ui.h1('Current notation: ' + current),
|
|
28
|
+
targetNotationInput.root,
|
|
29
|
+
// TODO: conditional separator input
|
|
30
|
+
separatorInput.root
|
|
34
31
|
]))
|
|
35
32
|
.onOK(() => {
|
|
36
33
|
//TODO: create new converted column
|
|
37
|
-
|
|
38
|
-
const targetNotation: NOTATION = targetNotationInput.value as NOTATION;
|
|
34
|
+
const targetNotation = targetNotationInput.value as NOTATION;
|
|
39
35
|
const separator = separatorInput.value!;
|
|
40
36
|
const newColumn = converter.convert(targetNotation, separator);
|
|
41
37
|
col.dataFrame.columns.add(newColumn);
|
|
@@ -50,10 +50,12 @@ export async function runKalign(srcCol: DG.Column, isAligned = false): Promise<D
|
|
|
50
50
|
|
|
51
51
|
await CLI.fs.writeFile('input.fa', fasta);
|
|
52
52
|
const output = await CLI.exec('kalign input.fa -f fasta -o result.fasta');
|
|
53
|
-
const buf = await CLI.cat('result.fasta');
|
|
54
|
-
|
|
55
53
|
console.warn(output);
|
|
56
54
|
|
|
55
|
+
const buf = await CLI.cat('result.fasta');
|
|
56
|
+
if (!buf)
|
|
57
|
+
throw new Error(`kalign output no result`);
|
|
58
|
+
|
|
57
59
|
const aligned = _fastaToStrings(buf).slice(0, sequences.length);
|
|
58
60
|
const tgtCol = DG.Column.fromStrings(`msa(${srcCol.name})`, aligned);
|
|
59
61
|
|