@datagrok/bio 2.0.5 → 2.0.7
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/package-test.js +143 -127
- package/dist/package.js +10 -106
- package/package.json +1 -1
- package/src/calculations/{fingerprints.ts → monomerLevelMols.ts} +12 -15
- package/src/package.ts +10 -18
- package/src/tests/renderers-test.ts +123 -15
- package/src/tests/splitters-test.ts +26 -2
- package/src/utils/convert.ts +1 -0
- package/test-Bio-7770371320b2-4674dcdc.html +0 -363
package/package.json
CHANGED
|
@@ -5,7 +5,7 @@
|
|
|
5
5
|
"name": "Leonid Stolbov",
|
|
6
6
|
"email": "lstolbov@datagrok.ai"
|
|
7
7
|
},
|
|
8
|
-
"version": "2.0.
|
|
8
|
+
"version": "2.0.7",
|
|
9
9
|
"description": "Bio is a [package](https://datagrok.ai/help/develop/develop#packages) for the [Datagrok](https://datagrok.ai) platform",
|
|
10
10
|
"repository": {
|
|
11
11
|
"type": "git",
|
|
@@ -2,29 +2,24 @@ import * as grok from 'datagrok-api/grok';
|
|
|
2
2
|
import * as ui from 'datagrok-api/ui';
|
|
3
3
|
import * as DG from 'datagrok-api/dg';
|
|
4
4
|
|
|
5
|
+
import { getHelmMonomers } from '../package'
|
|
6
|
+
|
|
5
7
|
const V2000_ATOM_NAME_POS = 31;
|
|
6
8
|
|
|
7
|
-
export async function
|
|
8
|
-
const
|
|
9
|
-
|
|
9
|
+
export async function getMonomericMols(mcol: DG.Column, pattern: boolean = false): Promise<DG.Column> {
|
|
10
|
+
const monomers = getHelmMonomers(mcol);
|
|
11
|
+
let mols = await grok.functions.call('HELM:getMolFiles', {mcol: mcol});
|
|
10
12
|
|
|
11
13
|
let dict = new Map();
|
|
12
14
|
for(let i = 0; i < monomers.length; i++)
|
|
13
|
-
dict.set(monomers[i],
|
|
14
|
-
|
|
15
|
-
mols = changeToV3000(mols, dict);
|
|
15
|
+
dict.set(monomers[i], `${i + 1}`);
|
|
16
16
|
|
|
17
|
-
|
|
18
|
-
const mol = mod.get_mol(mols[i]);
|
|
19
|
-
const fp = mol.get_pattern_fp_as_uint8array();
|
|
20
|
-
fps.push(fp);
|
|
21
|
-
mol?.delete();
|
|
22
|
-
}
|
|
17
|
+
mols = changeToV3000(mols, dict, pattern);
|
|
23
18
|
|
|
24
|
-
return
|
|
19
|
+
return DG.Column.fromStrings('monomericMols', mols);
|
|
25
20
|
}
|
|
26
21
|
|
|
27
|
-
function changeToV3000(mols: Array<string>, dict: Map<string, string
|
|
22
|
+
function changeToV3000(mols: Array<string>, dict: Map<string, string>, pattern: boolean = false): Array<string> {
|
|
28
23
|
for (let i = 0; i < mols.length; i++) {
|
|
29
24
|
let curPos = 0;
|
|
30
25
|
let endPos = 0;
|
|
@@ -50,7 +45,9 @@ M V30 BEGIN CTAB
|
|
|
50
45
|
curPos = mol.indexOf('\n', curPos) + 1 + V2000_ATOM_NAME_POS;
|
|
51
46
|
endPos = mol.indexOf(' ', curPos);
|
|
52
47
|
const monomerName: string = mol.substring(curPos, endPos);
|
|
53
|
-
molV3000 +=
|
|
48
|
+
molV3000 += pattern ?
|
|
49
|
+
`M V30 ${atomRowI + 1} R${dict.get(monomerName)} 0.000 0.000 0 0\n` :
|
|
50
|
+
`M V30 ${atomRowI + 1} At 0.000 0.000 0 0 MASS=${dict.get(monomerName)}\n`;
|
|
54
51
|
}
|
|
55
52
|
|
|
56
53
|
molV3000 += 'M V30 END ATOM\n';
|
package/src/package.ts
CHANGED
|
@@ -32,7 +32,6 @@ import {
|
|
|
32
32
|
|
|
33
33
|
import {splitAlignedSequences} from '@datagrok-libraries/bio/src/utils/splitter';
|
|
34
34
|
import * as C from './utils/constants';
|
|
35
|
-
import {getFingerprints} from './calculations/fingerprints';
|
|
36
35
|
|
|
37
36
|
//tags: init
|
|
38
37
|
export async function initBio() {
|
|
@@ -56,6 +55,16 @@ export function separatorSequenceCellRenderer(): MacromoleculeSequenceCellRender
|
|
|
56
55
|
return new MacromoleculeSequenceCellRenderer();
|
|
57
56
|
}
|
|
58
57
|
|
|
58
|
+
//name: MacromoleculeDifferenceCellRenderer
|
|
59
|
+
//tags: cellRenderer
|
|
60
|
+
//meta.cellType: MacromoleculeDifference
|
|
61
|
+
//meta.columnTags: quality=MacromoleculeDifference
|
|
62
|
+
//output: grid_cell_renderer result
|
|
63
|
+
export function macromoleculeDifferenceCellRenderer(): MacromoleculeDifferenceCellRenderer {
|
|
64
|
+
return new MacromoleculeDifferenceCellRenderer();
|
|
65
|
+
}
|
|
66
|
+
|
|
67
|
+
|
|
59
68
|
function checkInputColumnUi(
|
|
60
69
|
col: DG.Column, name: string, allowedNotations: string[] = [], allowedAlphabets: string[] = []
|
|
61
70
|
): boolean {
|
|
@@ -376,15 +385,6 @@ export function monomerCellRenderer(): MonomerCellRenderer {
|
|
|
376
385
|
return new MonomerCellRenderer();
|
|
377
386
|
}
|
|
378
387
|
|
|
379
|
-
//name: MacromoleculeDifferenceCellRenderer
|
|
380
|
-
//tags: cellRenderer
|
|
381
|
-
//meta.cellType: MacromoleculeDifference
|
|
382
|
-
//meta.columnTags: quality=MacromoleculeDifference
|
|
383
|
-
//output: grid_cell_renderer result
|
|
384
|
-
export function macromoleculeDifferenceCellRenderer(): MacromoleculeDifferenceCellRenderer {
|
|
385
|
-
return new MacromoleculeDifferenceCellRenderer();
|
|
386
|
-
}
|
|
387
|
-
|
|
388
388
|
//name: testDetectMacromolecule
|
|
389
389
|
//input: string path {choices: ['Demo:Files/', 'System:AppData/']}
|
|
390
390
|
//output: dataframe result
|
|
@@ -460,11 +460,3 @@ export function getHelmMonomers(seqCol: DG.Column<string>): string[] {
|
|
|
460
460
|
const stats = WebLogo.getStats(seqCol, 1, WebLogo.splitterAsHelm);
|
|
461
461
|
return Object.keys(stats.freq);
|
|
462
462
|
}
|
|
463
|
-
|
|
464
|
-
export async function macromoleculesFingerprints(mcol: DG.Column): Promise<Uint8Array[]> {
|
|
465
|
-
grok.functions.call('Chem:getRdKitModule');
|
|
466
|
-
const monomers = getHelmMonomers(mcol);
|
|
467
|
-
const mols = await grok.functions.call('HELM:getMolFiles', {mcol: mcol});
|
|
468
|
-
|
|
469
|
-
return getFingerprints(mols.toList(), monomers);
|
|
470
|
-
}
|
|
@@ -31,6 +31,18 @@ category('renderers', () => {
|
|
|
31
31
|
performanceTest(generateManySequences, 'Many sequences');
|
|
32
32
|
});
|
|
33
33
|
|
|
34
|
+
test('rendererMacromoleculeFasta', async () => {
|
|
35
|
+
await _rendererMacromoleculeFasta();
|
|
36
|
+
});
|
|
37
|
+
|
|
38
|
+
test('rendererMacromoleculeSeparator', async () => {
|
|
39
|
+
await _rendererMacromoleculeSeparator();
|
|
40
|
+
});
|
|
41
|
+
|
|
42
|
+
test('rendererMacromoleculeDifference', async () => {
|
|
43
|
+
await _rendererMacromoleculeDifference();
|
|
44
|
+
});
|
|
45
|
+
|
|
34
46
|
test('afterMsa', async () => {
|
|
35
47
|
await _testAfterMsa();
|
|
36
48
|
});
|
|
@@ -39,10 +51,73 @@ category('renderers', () => {
|
|
|
39
51
|
await _testAfterConvert();
|
|
40
52
|
});
|
|
41
53
|
|
|
42
|
-
test('
|
|
54
|
+
test('selectRendererBySemType', async () => {
|
|
55
|
+
await _selectRendererBySemType();
|
|
56
|
+
});
|
|
57
|
+
|
|
58
|
+
test('setRendererManually', async () => {
|
|
43
59
|
await _setRendererManually();
|
|
44
60
|
});
|
|
45
61
|
|
|
62
|
+
async function _rendererMacromoleculeFasta() {
|
|
63
|
+
const csv: string = await grok.dapi.files.readAsText('System:AppData/Bio/samples/sample_FASTA.csv');
|
|
64
|
+
const df: DG.DataFrame = DG.DataFrame.fromCsv(csv);
|
|
65
|
+
|
|
66
|
+
const seqCol = df.getCol('Sequence');
|
|
67
|
+
const semType: string = await grok.functions.call('Bio:detectMacromolecule', {col: seqCol});
|
|
68
|
+
if (semType)
|
|
69
|
+
seqCol.semType = semType;
|
|
70
|
+
|
|
71
|
+
const tv: DG.TableView = grok.shell.addTableView(df);
|
|
72
|
+
// call to calculate 'cell.renderer' tag
|
|
73
|
+
await grok.data.detectSemanticTypes(df);
|
|
74
|
+
|
|
75
|
+
dfList.push(df);
|
|
76
|
+
tvList.push(tv);
|
|
77
|
+
|
|
78
|
+
const resCellRenderer = seqCol.getTag(DG.TAGS.CELL_RENDERER);
|
|
79
|
+
expect(resCellRenderer, 'sequence');
|
|
80
|
+
}
|
|
81
|
+
|
|
82
|
+
async function _rendererMacromoleculeSeparator() {
|
|
83
|
+
const csv: string = await grok.dapi.files.readAsText('System:AppData/Bio/samples/sample_SEPARATOR_PT.csv');
|
|
84
|
+
const df: DG.DataFrame = DG.DataFrame.fromCsv(csv);
|
|
85
|
+
|
|
86
|
+
const seqCol = df.getCol('sequence');
|
|
87
|
+
const semType: string = await grok.functions.call('Bio:detectMacromolecule', {col: seqCol});
|
|
88
|
+
if (semType)
|
|
89
|
+
seqCol.semType = semType;
|
|
90
|
+
|
|
91
|
+
const tv: DG.TableView = grok.shell.addTableView(df);
|
|
92
|
+
// call to calculate 'cell.renderer' tag
|
|
93
|
+
await grok.data.detectSemanticTypes(df);
|
|
94
|
+
|
|
95
|
+
dfList.push(df);
|
|
96
|
+
tvList.push(tv);
|
|
97
|
+
|
|
98
|
+
const resCellRenderer = seqCol.getTag(DG.TAGS.CELL_RENDERER);
|
|
99
|
+
expect(resCellRenderer, 'sequence');
|
|
100
|
+
}
|
|
101
|
+
|
|
102
|
+
async function _rendererMacromoleculeDifference() {
|
|
103
|
+
const seqDiffCol: DG.Column = DG.Column.fromStrings('SequencesDiff',
|
|
104
|
+
['meI/hHis/Aca/N/T/dK/Thr_PO3H2/Aca#D-Tyr_Et/Tyr_ab-dehydroMe/meN/E/N/dV']);
|
|
105
|
+
seqDiffCol.tags[DG.TAGS.UNITS] = 'separator';
|
|
106
|
+
seqDiffCol.tags[TAGS.SEPARATOR] = '/';
|
|
107
|
+
seqDiffCol.semType = SEM_TYPES.MACROMOLECULE_DIFFERENCE;
|
|
108
|
+
const df = DG.DataFrame.fromColumns([seqDiffCol]);
|
|
109
|
+
|
|
110
|
+
const tv: DG.TableView = grok.shell.addTableView(df);
|
|
111
|
+
// call to calculate 'cell.renderer' tag
|
|
112
|
+
await grok.data.detectSemanticTypes(df);
|
|
113
|
+
|
|
114
|
+
dfList.push(df);
|
|
115
|
+
tvList.push(tv);
|
|
116
|
+
|
|
117
|
+
const resCellRenderer = seqDiffCol.getTag(DG.TAGS.CELL_RENDERER);
|
|
118
|
+
expect(resCellRenderer, 'MacromoleculeDifference');
|
|
119
|
+
}
|
|
120
|
+
|
|
46
121
|
async function _testAfterMsa() {
|
|
47
122
|
const fastaTxt: string = await grok.dapi.files.readAsText('System:AppData/Bio/samples/sample_FASTA.fasta');
|
|
48
123
|
const df: DG.DataFrame = importFasta(fastaTxt)[0];
|
|
@@ -86,36 +161,69 @@ category('renderers', () => {
|
|
|
86
161
|
async function _testAfterConvert() {
|
|
87
162
|
const csv: string = await grok.dapi.files.readAsText('System:AppData/Bio/samples/sample_FASTA_PT.csv');
|
|
88
163
|
const df: DG.DataFrame = DG.DataFrame.fromCsv(csv);
|
|
89
|
-
const tv: DG.TableView = grok.shell.addTableView(df);
|
|
90
164
|
|
|
91
165
|
const srcCol: DG.Column = df.col('sequence')!;
|
|
92
|
-
// await grok.data.detectSemanticTypes(df);
|
|
93
166
|
const semType: string = await grok.functions.call('Bio:detectMacromolecule', {col: srcCol});
|
|
94
167
|
if (semType)
|
|
95
168
|
srcCol.semType = semType;
|
|
169
|
+
|
|
170
|
+
const tv: DG.TableView = grok.shell.addTableView(df);
|
|
171
|
+
// call to calculate 'cell.renderer' tag
|
|
96
172
|
await grok.data.detectSemanticTypes(df);
|
|
97
173
|
|
|
174
|
+
tvList.push(tv);
|
|
175
|
+
dfList.push(df);
|
|
176
|
+
|
|
98
177
|
const tgtCol: DG.Column = await convertDo(srcCol, NOTATION.SEPARATOR, '/');
|
|
99
|
-
|
|
178
|
+
|
|
179
|
+
const resCellRenderer = tgtCol.getTag(DG.TAGS.CELL_RENDERER);
|
|
180
|
+
expect(resCellRenderer, 'sequence');
|
|
100
181
|
|
|
101
182
|
// check tgtCol with UnitsHandler constructor
|
|
102
183
|
const uh: UnitsHandler = new UnitsHandler(tgtCol);
|
|
184
|
+
}
|
|
103
185
|
|
|
104
|
-
|
|
186
|
+
async function _selectRendererBySemType() {
|
|
187
|
+
/* There are renderers for semType Macromolecule and MacromoleculeDifference.
|
|
188
|
+
Misbehavior was by selecting Macromolecule renderers for MacromoleculeDifference semType column
|
|
189
|
+
/**/
|
|
190
|
+
const seqDiffCol: DG.Column = DG.Column.fromStrings('SequencesDiff',
|
|
191
|
+
['meI/hHis/Aca/N/T/dK/Thr_PO3H2/Aca#D-Tyr_Et/Tyr_ab-dehydroMe/meN/E/N/dV']);
|
|
192
|
+
seqDiffCol.tags[DG.TAGS.UNITS] = 'separator';
|
|
193
|
+
seqDiffCol.tags[TAGS.SEPARATOR] = '/';
|
|
194
|
+
seqDiffCol.semType = SEM_TYPES.MACROMOLECULE_DIFFERENCE;
|
|
195
|
+
const df = DG.DataFrame.fromColumns([seqDiffCol]);
|
|
196
|
+
const tv = grok.shell.addTableView(df);
|
|
105
197
|
dfList.push(df);
|
|
198
|
+
tvList.push(tv);
|
|
199
|
+
|
|
200
|
+
await delay(100);
|
|
201
|
+
const renderer = seqDiffCol.getTag(DG.TAGS.CELL_RENDERER);
|
|
202
|
+
if (renderer !== 'MacromoleculeDifference') // this is value of MacromoleculeDifferenceCR.cellType
|
|
203
|
+
throw new Error(`Units 'separator', separator '/' and semType 'MacromoleculeDifference' ` +
|
|
204
|
+
`have been manually set on column but after df was added as table, ` +
|
|
205
|
+
`view renderer has set to '${renderer}' instead of correct 'MacromoleculeDifference'.`);
|
|
106
206
|
}
|
|
107
207
|
|
|
108
208
|
async function _setRendererManually() {
|
|
109
|
-
const
|
|
110
|
-
|
|
111
|
-
|
|
112
|
-
|
|
113
|
-
|
|
114
|
-
const
|
|
209
|
+
const seqDiffCol: DG.Column = DG.Column.fromStrings('SequencesDiff',
|
|
210
|
+
['meI/hHis/Aca/N/T/dK/Thr_PO3H2/Aca#D-Tyr_Et/Tyr_ab-dehydroMe/meN/E/N/dV']);
|
|
211
|
+
seqDiffCol.tags[DG.TAGS.UNITS] = 'separator';
|
|
212
|
+
seqDiffCol.tags[TAGS.SEPARATOR] = '/';
|
|
213
|
+
seqDiffCol.semType = SEM_TYPES.MACROMOLECULE;
|
|
214
|
+
const tgtCellRenderer = 'MacromoleculeDifference';
|
|
215
|
+
seqDiffCol.setTag(DG.TAGS.CELL_RENDERER, tgtCellRenderer);
|
|
216
|
+
const df = DG.DataFrame.fromColumns([seqDiffCol]);
|
|
217
|
+
await grok.data.detectSemanticTypes(df);
|
|
218
|
+
const tv = grok.shell.addTableView(df);
|
|
219
|
+
dfList.push(df);
|
|
220
|
+
tvList.push(tv);
|
|
221
|
+
|
|
115
222
|
await delay(100);
|
|
116
|
-
const
|
|
117
|
-
if (
|
|
118
|
-
throw new Error(`
|
|
119
|
-
`
|
|
223
|
+
const resCellRenderer = seqDiffCol.getTag(DG.TAGS.CELL_RENDERER);
|
|
224
|
+
if (resCellRenderer !== tgtCellRenderer) // this is value of MacromoleculeDifferenceCR.cellType
|
|
225
|
+
throw new Error(`Tag 'cell.renderer' has been manually set to '${tgtCellRenderer}' for column ` +
|
|
226
|
+
`but after df was added as table, tag 'cell.renderer' has reset to '${resCellRenderer}' ` +
|
|
227
|
+
`instead of manual '${tgtCellRenderer}'.`);
|
|
120
228
|
}
|
|
121
229
|
});
|
|
@@ -1,10 +1,10 @@
|
|
|
1
|
-
import {after, before, category, test, expect, expectArray} from '@datagrok-libraries/utils/src/test';
|
|
1
|
+
import {after, before, category, test, expect, expectArray, expectObject} from '@datagrok-libraries/utils/src/test';
|
|
2
2
|
|
|
3
3
|
import * as grok from 'datagrok-api/grok';
|
|
4
4
|
import * as ui from 'datagrok-api/ui';
|
|
5
5
|
import * as DG from 'datagrok-api/dg';
|
|
6
6
|
import {WebLogo, SplitterFunc} from '@datagrok-libraries/bio/src/viewers/web-logo';
|
|
7
|
-
import {splitToMonomers, _package} from '../package';
|
|
7
|
+
import {splitToMonomers, _package, getHelmMonomers} from '../package';
|
|
8
8
|
import * as C from '../utils/constants';
|
|
9
9
|
|
|
10
10
|
category('splitters', () => {
|
|
@@ -87,6 +87,30 @@ category('splitters', () => {
|
|
|
87
87
|
splitToMonomers(seqCol);
|
|
88
88
|
expect(df.columns.names().includes('17'), true);
|
|
89
89
|
});
|
|
90
|
+
|
|
91
|
+
test('getHelmMonomers', async () => {
|
|
92
|
+
const df: DG.DataFrame = DG.DataFrame.fromCsv(
|
|
93
|
+
`HELM,Activity
|
|
94
|
+
PEPTIDE1{hHis.N.T}$$$,5.30751
|
|
95
|
+
PEPTIDE1{hHis.Aca.Cys_SEt}$$$,5.72388
|
|
96
|
+
`);
|
|
97
|
+
const expectedMonomerList = ['hHis', 'Aca', 'Cys_SEt', 'N', 'T'];
|
|
98
|
+
|
|
99
|
+
const helmCol: DG.Column = df.getCol('HELM');
|
|
100
|
+
const res = getHelmMonomers(helmCol);
|
|
101
|
+
|
|
102
|
+
const missed = expectedMonomerList.filter((m) => !res.includes(m));
|
|
103
|
+
const unexpected = res.filter((m) => !expectedMonomerList.includes(m));
|
|
104
|
+
if (missed.length > 0 || unexpected.length) {
|
|
105
|
+
const msgs = [];
|
|
106
|
+
if (missed.length > 0)
|
|
107
|
+
msgs.push(`Missed monomers ${JSON.stringify(missed)}.`);
|
|
108
|
+
if (unexpected.length > 0)
|
|
109
|
+
msgs.push(`Unexpected monomers ${JSON.stringify(unexpected)}.`);
|
|
110
|
+
|
|
111
|
+
throw new Error(msgs.join(' '));
|
|
112
|
+
}
|
|
113
|
+
});
|
|
90
114
|
});
|
|
91
115
|
|
|
92
116
|
export async function _testHelmSplitter(src: string, tgt: string[]) {
|
package/src/utils/convert.ts
CHANGED
|
@@ -79,6 +79,7 @@ export function convert(col: DG.Column): void {
|
|
|
79
79
|
}
|
|
80
80
|
}
|
|
81
81
|
|
|
82
|
+
/** Creates a new column with converted sequences and detects its semantic type */
|
|
82
83
|
export async function convertDo(
|
|
83
84
|
srcCol: DG.Column, targetNotation: NOTATION, separator: string | null
|
|
84
85
|
): Promise<DG.Column> {
|