@datagrok/bio 1.8.2 → 1.10.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/detectors.js +37 -0
- package/dist/package-test.js +557 -5547
- package/dist/package.js +513 -5517
- package/package.json +7 -5
- package/src/package.ts +29 -14
- package/src/tests/WebLogo-positions-test.ts +17 -13
- package/src/tests/detectors-test.ts +35 -20
- package/src/tests/msa-tests.ts +2 -1
- package/src/tests/test-sequnces-generators.ts +45 -0
- package/src/utils/atomic-works.ts +2 -0
- package/src/utils/cell-renderer.ts +35 -96
- package/src/utils/sequence-activity-cliffs.ts +5 -21
- package/src/utils/sequence-space.ts +5 -3
- package/src/widgets/representations.ts +1 -1
- package/{test-Bio-dc07f068a0b2-3cd5a505.html → test-Bio-eb4783c07294-0aa1538b.html} +2 -2
- package/webpack.config.js +1 -0
package/package.json
CHANGED
|
@@ -5,7 +5,7 @@
|
|
|
5
5
|
"name": "Leonid Stolbov",
|
|
6
6
|
"email": "lstolbov@datagrok.ai"
|
|
7
7
|
},
|
|
8
|
-
"version": "1.
|
|
8
|
+
"version": "1.10.2",
|
|
9
9
|
"description": "Bio is a [package](https://datagrok.ai/help/develop/develop#packages) for the [Datagrok](https://datagrok.ai) platform",
|
|
10
10
|
"repository": {
|
|
11
11
|
"type": "git",
|
|
@@ -14,16 +14,17 @@
|
|
|
14
14
|
},
|
|
15
15
|
"dependencies": {
|
|
16
16
|
"@biowasm/aioli": ">=2.4.0",
|
|
17
|
-
"@datagrok-libraries/bio": "^
|
|
18
|
-
"@datagrok-libraries/
|
|
17
|
+
"@datagrok-libraries/bio": "^4.1.0",
|
|
18
|
+
"@datagrok-libraries/chem-meta": "1.0.0",
|
|
19
|
+
"@datagrok-libraries/ml": "^4.0.0",
|
|
19
20
|
"@datagrok-libraries/utils": "^1.6.2",
|
|
20
21
|
"cash-dom": "latest",
|
|
21
22
|
"datagrok-api": "^1.5.5",
|
|
22
23
|
"dayjs": "^1.11.4",
|
|
23
24
|
"openchemlib": "6.0.1",
|
|
24
25
|
"rxjs": "^6.5.5",
|
|
25
|
-
"
|
|
26
|
-
"
|
|
26
|
+
"typescript": "^4.4.2",
|
|
27
|
+
"wu": "latest"
|
|
27
28
|
},
|
|
28
29
|
"devDependencies": {
|
|
29
30
|
"@types/jest": "^27.0.0",
|
|
@@ -35,6 +36,7 @@
|
|
|
35
36
|
"jest-html-reporter": "^3.6.0",
|
|
36
37
|
"puppeteer": "^13.7.0",
|
|
37
38
|
"ts-jest": "^27.0.0",
|
|
39
|
+
"ts-loader": "^9.2.5",
|
|
38
40
|
"webpack": "latest",
|
|
39
41
|
"webpack-cli": "^4.10.0",
|
|
40
42
|
"@types/js-yaml": "^4.0.5",
|
package/src/package.ts
CHANGED
|
@@ -15,7 +15,7 @@ import {Aminoacids} from '@datagrok-libraries/bio/src/aminoacids';
|
|
|
15
15
|
import {getEmbeddingColsNames, sequenceSpace} from './utils/sequence-space';
|
|
16
16
|
import {AvailableMetrics} from '@datagrok-libraries/ml/src/typed-metrics';
|
|
17
17
|
import {getActivityCliffs} from '@datagrok-libraries/ml/src/viewers/activity-cliffs';
|
|
18
|
-
import {
|
|
18
|
+
import {drawSequences, sequenceGetSimilarities} from './utils/sequence-activity-cliffs';
|
|
19
19
|
import {createJsonMonomerLibFromSdf, encodeMonomers, getMolfilesFromSeq, HELM_CORE_LIB_FILENAME} from './utils/utils';
|
|
20
20
|
import {getMacroMol} from './utils/atomic-works';
|
|
21
21
|
import {MacromoleculeSequenceCellRenderer} from './utils/cell-renderer';
|
|
@@ -23,13 +23,28 @@ import {convert} from './utils/convert';
|
|
|
23
23
|
import {representationsWidget} from './widgets/representations';
|
|
24
24
|
import {UnitsHandler} from '@datagrok-libraries/bio/src/utils/units-handler';
|
|
25
25
|
import {FastaFileHandler} from '@datagrok-libraries/bio/src/utils/fasta-handler';
|
|
26
|
-
import {removeEmptyStringRows} from '@datagrok-libraries/utils/src/dataframe-utils'
|
|
27
|
-
|
|
26
|
+
import {removeEmptyStringRows} from '@datagrok-libraries/utils/src/dataframe-utils';
|
|
27
|
+
import {
|
|
28
|
+
generateManySequences,
|
|
29
|
+
generateLongSequence,
|
|
30
|
+
performanceTest
|
|
31
|
+
} from './tests/test-sequnces-generators';
|
|
28
32
|
|
|
29
33
|
//tags: init
|
|
30
34
|
export async function initBio() {
|
|
31
35
|
}
|
|
32
36
|
|
|
37
|
+
//name: testManySequencesPerformance
|
|
38
|
+
export function testManySequencesPerformance(): void {
|
|
39
|
+
performanceTest(generateManySequences, 'Many sequences');
|
|
40
|
+
}
|
|
41
|
+
|
|
42
|
+
//name: testLongSequencesPerformance
|
|
43
|
+
export function testLongSequencesPerformance(): void {
|
|
44
|
+
performanceTest(generateLongSequence, 'Long sequences');
|
|
45
|
+
}
|
|
46
|
+
|
|
47
|
+
|
|
33
48
|
//name: fastaSequenceCellRenderer
|
|
34
49
|
//tags: cellRenderer
|
|
35
50
|
//meta.cellType: Sequence
|
|
@@ -51,7 +66,7 @@ export function separatorSequenceCellRenderer(): MacromoleculeSequenceCellRender
|
|
|
51
66
|
function checkInputColumn(col: DG.Column, name: string,
|
|
52
67
|
allowedNotations: string[] = [], allowedAlphabets: string[] = []): boolean {
|
|
53
68
|
const notation: string = col.getTag(DG.TAGS.UNITS);
|
|
54
|
-
const alphabet: string = col.getTag('alphabet')
|
|
69
|
+
const alphabet: string = col.getTag('alphabet');
|
|
55
70
|
if (col.semType !== DG.SEMTYPE.MACROMOLECULE) {
|
|
56
71
|
grok.shell.warning(name + ' analysis is allowed for Macromolecules semantic type');
|
|
57
72
|
return false;
|
|
@@ -127,7 +142,7 @@ export async function activityCliffs(df: DG.DataFrame, macroMolecule: DG.Column,
|
|
|
127
142
|
'aligned': macroMolecule.tags['aligned'],
|
|
128
143
|
'separator': macroMolecule.tags['separator'],
|
|
129
144
|
'alphabet': macroMolecule.tags['alphabet'],
|
|
130
|
-
}
|
|
145
|
+
};
|
|
131
146
|
const sp = await getActivityCliffs(
|
|
132
147
|
df,
|
|
133
148
|
macroMolecule,
|
|
@@ -142,9 +157,9 @@ export async function activityCliffs(df: DG.DataFrame, macroMolecule: DG.Column,
|
|
|
142
157
|
tags,
|
|
143
158
|
sequenceSpace,
|
|
144
159
|
sequenceGetSimilarities,
|
|
145
|
-
|
|
160
|
+
drawSequences,
|
|
146
161
|
(options as any)[methodName]);
|
|
147
|
-
|
|
162
|
+
return sp;
|
|
148
163
|
}
|
|
149
164
|
|
|
150
165
|
//top-menu: Bio | Sequence Space...
|
|
@@ -155,18 +170,18 @@ export async function activityCliffs(df: DG.DataFrame, macroMolecule: DG.Column,
|
|
|
155
170
|
//input: string similarityMetric { choices:["Levenshtein", "Tanimoto"] }
|
|
156
171
|
//input: bool plotEmbeddings = true
|
|
157
172
|
export async function sequenceSpaceTopMenu(table: DG.DataFrame, macroMolecule: DG.Column, methodName: string,
|
|
158
|
-
similarityMetric: string = 'Levenshtein', plotEmbeddings: boolean): Promise<DG.Viewer|undefined> {
|
|
173
|
+
similarityMetric: string = 'Levenshtein', plotEmbeddings: boolean): Promise<DG.Viewer | undefined> {
|
|
159
174
|
if (!checkInputColumn(macroMolecule, 'Activity Cliffs'))
|
|
160
175
|
return;
|
|
161
176
|
const encodedCol = encodeMonomers(macroMolecule);
|
|
162
177
|
if (!encodedCol)
|
|
163
178
|
return;
|
|
164
179
|
const embedColsNames = getEmbeddingColsNames(table);
|
|
165
|
-
const withoutEmptyValues = DG.DataFrame.fromColumns([
|
|
180
|
+
const withoutEmptyValues = DG.DataFrame.fromColumns([encodedCol]).clone();
|
|
166
181
|
const emptyValsIdxs = removeEmptyStringRows(withoutEmptyValues, encodedCol);
|
|
167
182
|
|
|
168
183
|
const chemSpaceParams = {
|
|
169
|
-
seqCol: withoutEmptyValues.col(
|
|
184
|
+
seqCol: withoutEmptyValues.col(encodedCol.name)!,
|
|
170
185
|
methodName: methodName,
|
|
171
186
|
similarityMetric: similarityMetric,
|
|
172
187
|
embedAxesNames: embedColsNames
|
|
@@ -174,11 +189,11 @@ export async function sequenceSpaceTopMenu(table: DG.DataFrame, macroMolecule: D
|
|
|
174
189
|
const sequenceSpaceRes = await sequenceSpace(chemSpaceParams);
|
|
175
190
|
const embeddings = sequenceSpaceRes.coordinates;
|
|
176
191
|
for (const col of embeddings) {
|
|
177
|
-
|
|
178
|
-
|
|
179
|
-
|
|
192
|
+
const listValues = col.toList();
|
|
193
|
+
emptyValsIdxs.forEach((ind: number) => listValues.splice(ind, 0, null));
|
|
194
|
+
table.columns.add(DG.Column.fromList('double', col.name, listValues));
|
|
180
195
|
}
|
|
181
|
-
let sp;
|
|
196
|
+
let sp;
|
|
182
197
|
if (plotEmbeddings) {
|
|
183
198
|
for (const v of grok.shell.views) {
|
|
184
199
|
if (v.name === table.name)
|
|
@@ -12,11 +12,11 @@ category('WebLogo-positions', () => {
|
|
|
12
12
|
let currentView: DG.View;
|
|
13
13
|
|
|
14
14
|
const csvDf1 = `seq
|
|
15
|
-
|
|
16
|
-
|
|
17
|
-
|
|
18
|
-
|
|
19
|
-
|
|
15
|
+
ATC-G-TTGC--
|
|
16
|
+
ATC-G-TTGC--
|
|
17
|
+
-TC-G-TTGC--
|
|
18
|
+
-TC-GCTTGC--
|
|
19
|
+
-TC-GCTTGC--`;
|
|
20
20
|
|
|
21
21
|
|
|
22
22
|
before(async () => {
|
|
@@ -34,8 +34,10 @@ category('WebLogo-positions', () => {
|
|
|
34
34
|
const df: DG.DataFrame = DG.DataFrame.fromCsv(csvDf1);
|
|
35
35
|
const tv: DG.TableView = grok.shell.addTableView(df);
|
|
36
36
|
|
|
37
|
-
df.getCol('seq')
|
|
38
|
-
|
|
37
|
+
const seqCol: DG.Column = df.getCol('seq');
|
|
38
|
+
seqCol.semType = 'Macromolecule';
|
|
39
|
+
seqCol.setTag('units', 'fasta');
|
|
40
|
+
seqCol.setTag('alphabet', 'DNA');
|
|
39
41
|
|
|
40
42
|
const wlViewer: WebLogo = await df.plot.fromType('WebLogo') as unknown as WebLogo;
|
|
41
43
|
tv.dockManager.dock(wlViewer.root, DG.DOCK_TYPE.DOWN);
|
|
@@ -81,10 +83,11 @@ category('WebLogo-positions', () => {
|
|
|
81
83
|
const df: DG.DataFrame = DG.DataFrame.fromCsv(csvDf2);
|
|
82
84
|
const tv: DG.TableView = grok.shell.addTableView(df);
|
|
83
85
|
|
|
84
|
-
df.getCol('seq')
|
|
85
|
-
|
|
86
|
+
const seqCol: DG.Column = df.getCol('seq');
|
|
87
|
+
seqCol.semType = 'Macromolecule';
|
|
88
|
+
seqCol.setTag('units', 'fasta');
|
|
89
|
+
seqCol.setTag('alphabet', 'DNA');
|
|
86
90
|
|
|
87
|
-
let seq: Column = df.getCol('seq');
|
|
88
91
|
df.filter.init((i) => {
|
|
89
92
|
return i > 2;
|
|
90
93
|
});
|
|
@@ -125,8 +128,10 @@ category('WebLogo-positions', () => {
|
|
|
125
128
|
const df: DG.DataFrame = DG.DataFrame.fromCsv(csvDf1);
|
|
126
129
|
const tv: DG.TableView = grok.shell.addTableView(df);
|
|
127
130
|
|
|
128
|
-
df.getCol('seq')
|
|
129
|
-
|
|
131
|
+
const seqCol: DG.Column = df.getCol('seq');
|
|
132
|
+
seqCol.semType = 'Macromolecule';
|
|
133
|
+
seqCol.setTag('units', 'fasta');
|
|
134
|
+
seqCol.setTag('alphabet', 'DNA');
|
|
130
135
|
|
|
131
136
|
const wlViewer: WebLogo = await df.plot.fromType('WebLogo', {'skipEmptyPositions': true}) as unknown as WebLogo;
|
|
132
137
|
tv.dockManager.dock(wlViewer.root, DG.DOCK_TYPE.DOWN);
|
|
@@ -156,7 +161,6 @@ category('WebLogo-positions', () => {
|
|
|
156
161
|
expect(positions[i].freq[key].count, resAllDf1[i].freq[key].count);
|
|
157
162
|
}
|
|
158
163
|
}
|
|
159
|
-
|
|
160
164
|
});
|
|
161
165
|
|
|
162
166
|
});
|
|
@@ -5,6 +5,7 @@ import * as ui from 'datagrok-api/ui';
|
|
|
5
5
|
import * as DG from 'datagrok-api/dg';
|
|
6
6
|
|
|
7
7
|
import {importFasta} from '../package';
|
|
8
|
+
import {UnitsHandler} from '@datagrok-libraries/bio/src/utils/units-handler';
|
|
8
9
|
|
|
9
10
|
type DfReaderFunc = () => Promise<DG.DataFrame>;
|
|
10
11
|
|
|
@@ -198,44 +199,44 @@ MWRSWY-CKHP
|
|
|
198
199
|
test('NegativeSmiles', async () => { await _testNeg(readCsv('csvDfSmiles', csvDfSmiles), 'col1'); });
|
|
199
200
|
|
|
200
201
|
test('Dna1', async () => {
|
|
201
|
-
await _testPos(readCsv('csvDfDna1', csvDfDna1), 'seq', 'fasta', 'SEQ', 'DNA');
|
|
202
|
+
await _testPos(readCsv('csvDfDna1', csvDfDna1), 'seq', 'fasta', 'SEQ', 'DNA', 4, false);
|
|
202
203
|
});
|
|
203
204
|
test('Rna1', async () => {
|
|
204
|
-
await _testPos(readCsv('csvDfRna1', csvDfRna1), 'seq', 'fasta', 'SEQ', 'RNA');
|
|
205
|
+
await _testPos(readCsv('csvDfRna1', csvDfRna1), 'seq', 'fasta', 'SEQ', 'RNA', 4, false);
|
|
205
206
|
});
|
|
206
207
|
test('AA1', async () => {
|
|
207
|
-
await _testPos(readCsv('csvDfPt1', csvDfPt1), 'seq', 'fasta', 'SEQ', 'PT');
|
|
208
|
+
await _testPos(readCsv('csvDfPt1', csvDfPt1), 'seq', 'fasta', 'SEQ', 'PT', 20, false);
|
|
208
209
|
});
|
|
209
210
|
test('MsaDna1', async () => {
|
|
210
|
-
await _testPos(readCsv('csvDfMsaDna1', csvDfMsaDna1), 'seq', 'fasta', 'SEQ.MSA', 'DNA');
|
|
211
|
+
await _testPos(readCsv('csvDfMsaDna1', csvDfMsaDna1), 'seq', 'fasta', 'SEQ.MSA', 'DNA', 4, false);
|
|
211
212
|
});
|
|
212
213
|
|
|
213
214
|
test('MsaAA1', async () => {
|
|
214
|
-
await _testPos(readCsv('csvDfMsaPt1', csvDfMsaPt1), 'seq', 'fasta', 'SEQ.MSA', 'PT');
|
|
215
|
+
await _testPos(readCsv('csvDfMsaPt1', csvDfMsaPt1), 'seq', 'fasta', 'SEQ.MSA', 'PT', 20, false);
|
|
215
216
|
});
|
|
216
217
|
|
|
217
218
|
test('SepDna', async () => {
|
|
218
|
-
await _testPos(readCsv('csvDfSepDna', csvDfSepDna), 'seq', 'separator', 'SEQ', 'DNA', '*');
|
|
219
|
+
await _testPos(readCsv('csvDfSepDna', csvDfSepDna), 'seq', 'separator', 'SEQ', 'DNA', 4, false, '*');
|
|
219
220
|
});
|
|
220
221
|
test('SepRna', async () => {
|
|
221
|
-
await _testPos(readCsv('csvDfSepRna', csvDfSepRna), 'seq', 'separator', 'SEQ', 'RNA', '*');
|
|
222
|
+
await _testPos(readCsv('csvDfSepRna', csvDfSepRna), 'seq', 'separator', 'SEQ', 'RNA', 4, false, '*');
|
|
222
223
|
});
|
|
223
224
|
test('SepPt', async () => {
|
|
224
|
-
await _testPos(readCsv('csvDfSepPt', csvDfSepPt), 'seq', 'separator', 'SEQ', 'PT', '-');
|
|
225
|
+
await _testPos(readCsv('csvDfSepPt', csvDfSepPt), 'seq', 'separator', 'SEQ', 'PT', 20, false, '-');
|
|
225
226
|
});
|
|
226
227
|
test('SepUn1', async () => {
|
|
227
|
-
await _testPos(readCsv('csvDfSepUn1', csvDfSepUn1), 'seq', 'separator', 'SEQ', 'UN', '-');
|
|
228
|
+
await _testPos(readCsv('csvDfSepUn1', csvDfSepUn1), 'seq', 'separator', 'SEQ', 'UN', 8, true, '-');
|
|
228
229
|
});
|
|
229
230
|
test('SepUn2', async () => {
|
|
230
|
-
await _testPos(readCsv('csvDfSepUn2', csvDfSepUn2), 'seq', 'separator', 'SEQ', 'UN', '/');
|
|
231
|
+
await _testPos(readCsv('csvDfSepUn2', csvDfSepUn2), 'seq', 'separator', 'SEQ', 'UN', 9, true, '/');
|
|
231
232
|
});
|
|
232
233
|
|
|
233
234
|
test('SepMsaN1', async () => {
|
|
234
|
-
await _testPos(readCsv('csvDfSepMsaDna1', csvDfSepMsaDna1), 'seq', 'separator', 'SEQ.MSA', 'DNA', '-');
|
|
235
|
+
await _testPos(readCsv('csvDfSepMsaDna1', csvDfSepMsaDna1), 'seq', 'separator', 'SEQ.MSA', 'DNA', 4, false, '-');
|
|
235
236
|
});
|
|
236
237
|
|
|
237
238
|
test('SamplesFastaCsvPt', async () => {
|
|
238
|
-
await _testPos(readSamples(Samples.fastaCsv), 'sequence', 'fasta', 'SEQ', 'PT');
|
|
239
|
+
await _testPos(readSamples(Samples.fastaCsv), 'sequence', 'fasta', 'SEQ', 'PT', 20, false);
|
|
239
240
|
});
|
|
240
241
|
test('SamplesFastaCsvNegativeEntry', async () => {
|
|
241
242
|
await _testNeg(readSamples(Samples.fastaCsv), 'Entry');
|
|
@@ -248,7 +249,7 @@ MWRSWY-CKHP
|
|
|
248
249
|
});
|
|
249
250
|
|
|
250
251
|
test('SamplesFastaFastaPt', async () => {
|
|
251
|
-
await _testPos(readSamples(Samples.fastaFasta, readFileFasta), 'sequence', 'fasta', 'SEQ', 'PT');
|
|
252
|
+
await _testPos(readSamples(Samples.fastaFasta, readFileFasta), 'sequence', 'fasta', 'SEQ', 'PT', 20, false);
|
|
252
253
|
});
|
|
253
254
|
|
|
254
255
|
// peptidesComplex contains monomers with spaces in AlignedSequence columns, which are forbidden
|
|
@@ -266,7 +267,7 @@ MWRSWY-CKHP
|
|
|
266
267
|
});
|
|
267
268
|
|
|
268
269
|
test('samplesMsaComplexUn', async () => {
|
|
269
|
-
await _testPos(readSamples(Samples.msaComplex), 'MSA', 'separator', 'SEQ.MSA', 'UN', '/');
|
|
270
|
+
await _testPos(readSamples(Samples.msaComplex), 'MSA', 'separator', 'SEQ.MSA', 'UN', 161, true, '/');
|
|
270
271
|
});
|
|
271
272
|
test('samplesMsaComplexNegativeActivity', async () => {
|
|
272
273
|
await _testNeg(readSamples(Samples.msaComplex), 'Activity');
|
|
@@ -281,7 +282,7 @@ MWRSWY-CKHP
|
|
|
281
282
|
});
|
|
282
283
|
|
|
283
284
|
test('samplesHelmCsvHELM', async () => {
|
|
284
|
-
await _testPos(readSamples(Samples.helmCsv), 'HELM', 'helm', null, null, null);
|
|
285
|
+
await _testPos(readSamples(Samples.helmCsv), 'HELM', 'helm', null, null, 160, true, null);
|
|
285
286
|
});
|
|
286
287
|
|
|
287
288
|
test('samplesHelmCsvNegativeActivity', async () => {
|
|
@@ -297,7 +298,7 @@ MWRSWY-CKHP
|
|
|
297
298
|
await _testNeg(readSamples(Samples.testHelmCsv), 'Test type');
|
|
298
299
|
});
|
|
299
300
|
test('samplesTestHelmPositiveHelmString', async () => {
|
|
300
|
-
await _testPos(readSamples(Samples.testHelmCsv), 'HELM string', 'helm', null, null, null);
|
|
301
|
+
await _testPos(readSamples(Samples.testHelmCsv), 'HELM string', 'helm', null, null, 9, true, null);
|
|
301
302
|
});
|
|
302
303
|
test('samplesTestHelmNegativeValid', async () => {
|
|
303
304
|
await _testNeg(readSamples(Samples.testHelmCsv), 'Valid?');
|
|
@@ -329,7 +330,7 @@ MWRSWY-CKHP
|
|
|
329
330
|
});
|
|
330
331
|
|
|
331
332
|
test('samplesFastaPtPosSequence', async () => {
|
|
332
|
-
await _testPos(readSamples(Samples.fastaPtCsv), 'sequence', 'fasta', 'SEQ', 'PT');
|
|
333
|
+
await _testPos(readSamples(Samples.fastaPtCsv), 'sequence', 'fasta', 'SEQ', 'PT', 20, false);
|
|
333
334
|
});
|
|
334
335
|
|
|
335
336
|
test('samplesTestCerealNegativeCerealName', async () => {
|
|
@@ -371,7 +372,8 @@ MWRSWY-CKHP
|
|
|
371
372
|
export async function _testNeg(readDf: DfReaderFunc, colName: string) {
|
|
372
373
|
const df: DG.DataFrame = await readDf();
|
|
373
374
|
const col: DG.Column = df.col(colName)!;
|
|
374
|
-
const semType: string = await grok.functions
|
|
375
|
+
const semType: string = await grok.functions
|
|
376
|
+
.call('Bio:detectMacromolecule', {col: col}) as unknown as string;
|
|
375
377
|
if (semType)
|
|
376
378
|
col.semType = semType;
|
|
377
379
|
|
|
@@ -384,10 +386,15 @@ export async function _testNeg(readDf: DfReaderFunc, colName: string) {
|
|
|
384
386
|
}
|
|
385
387
|
}
|
|
386
388
|
|
|
387
|
-
export async function _testPos(
|
|
389
|
+
export async function _testPos(
|
|
390
|
+
readDf: DfReaderFunc, colName: string, units: string,
|
|
391
|
+
aligned: string | null, alphabet: string | null, alphabetSize: number, alphabetIsMultichar: boolean,
|
|
392
|
+
separator: string | null = null
|
|
393
|
+
) {
|
|
388
394
|
const df: DG.DataFrame = await readDf();
|
|
389
395
|
const col: DG.Column = df.col(colName)!;
|
|
390
|
-
const semType: string = await grok.functions
|
|
396
|
+
const semType: string = await grok.functions
|
|
397
|
+
.call('Bio:detectMacromolecule', {col: col}) as unknown as string;
|
|
391
398
|
if (semType)
|
|
392
399
|
col.semType = semType;
|
|
393
400
|
|
|
@@ -397,5 +404,13 @@ export async function _testPos(readDf: DfReaderFunc, colName: string, units: str
|
|
|
397
404
|
expect(col.getTag('alphabet'), alphabet);
|
|
398
405
|
if (separator)
|
|
399
406
|
expect(col.getTag('separator'), separator);
|
|
407
|
+
|
|
408
|
+
const uh = new UnitsHandler(col);
|
|
409
|
+
expect(uh.getAlphabetSize(), alphabetSize);
|
|
410
|
+
expect(uh.getAlphabetIsMultichar(), alphabetIsMultichar);
|
|
411
|
+
if (!uh.isHelm()) {
|
|
412
|
+
expect(uh.aligned, aligned);
|
|
413
|
+
expect(uh.alphabet, alphabet);
|
|
414
|
+
}
|
|
400
415
|
}
|
|
401
416
|
|
package/src/tests/msa-tests.ts
CHANGED
|
@@ -49,7 +49,8 @@ async function _testMsaIsCorrect(srcCsv: string, tgtCsv: string): Promise<void>
|
|
|
49
49
|
const tgtDf: DG.DataFrame = DG.DataFrame.fromCsv(tgtCsv);
|
|
50
50
|
|
|
51
51
|
const srcCol: DG.Column = srcDf.getCol('seq')!;
|
|
52
|
-
const semType: string = await grok.functions
|
|
52
|
+
const semType: string = await grok.functions
|
|
53
|
+
.call('Bio:detectMacromolecule', {col: srcCol}) as unknown as string;
|
|
53
54
|
if (semType)
|
|
54
55
|
srcCol.semType = semType;
|
|
55
56
|
|
|
@@ -0,0 +1,45 @@
|
|
|
1
|
+
import * as DG from 'datagrok-api/dg';
|
|
2
|
+
import * as grok from 'datagrok-api/grok';
|
|
3
|
+
|
|
4
|
+
export function generateManySequences(): string {
|
|
5
|
+
let csvData = `MSA,Activity
|
|
6
|
+
meI/hHis/Aca/N/T/dE/Thr_PO3H2/Aca/D-Tyr_Et/Tyr_ab-dehydroMe/dV/E/N/D-Orn/D-aThr//Phe_4Me,5.30751`;
|
|
7
|
+
for (let i = 0; i < 10 ** 6; i++) {
|
|
8
|
+
csvData += `\n meI/hHis/Aca/N/T/dE/Thr_PO3H2/Aca/D-Tyr_Et/Tyr_ab-dehydroMe/dV/E/N/D-Orn/D-aThr//Phe_4Me,5.30751`;
|
|
9
|
+
}
|
|
10
|
+
return csvData;
|
|
11
|
+
}
|
|
12
|
+
|
|
13
|
+
export function generateLongSequence(): string {
|
|
14
|
+
let longSequence = `meI/hHis/Aca/N/T/dE/Thr_PO3H2/Aca/D-Tyr_Et/Tyr_ab-dehydroMe/dV/E/N/D-Orn/D-aThr`;
|
|
15
|
+
for (let i = 0; i < 10 ** 5; i++) {
|
|
16
|
+
longSequence += `/Aca/N/T/dE/Thr_PO3H2/Aca/D-Tyr_Et/Tyr_ab-dehydroMe/dV/dv`;
|
|
17
|
+
}
|
|
18
|
+
longSequence += `//Phe_4Me,5.30751`;
|
|
19
|
+
let csvData = `MSA,Activity `;
|
|
20
|
+
for (let i = 0; i <= 10 ** 1 * 4; i++) {
|
|
21
|
+
csvData += `\n ${longSequence}`;
|
|
22
|
+
}
|
|
23
|
+
return csvData;
|
|
24
|
+
}
|
|
25
|
+
export function setTagsMacromolecule(col: DG.Column) {
|
|
26
|
+
col.semType = DG.SEMTYPE.MACROMOLECULE;
|
|
27
|
+
col.setTag('units', 'separator');
|
|
28
|
+
col.setTag('aligned', 'SEQ.MSA');
|
|
29
|
+
col.setTag('alphabet', 'UN');
|
|
30
|
+
col.setTag('separator', '/');
|
|
31
|
+
return col;
|
|
32
|
+
}
|
|
33
|
+
|
|
34
|
+
export function performanceTest(generateFunc: () => string,testName: string) {
|
|
35
|
+
const startTime: number = Date.now();
|
|
36
|
+
const csv = generateFunc();
|
|
37
|
+
const df: DG.DataFrame = DG.DataFrame.fromCsv(csv);
|
|
38
|
+
const col: DG.Column = df.columns.byName('MSA');
|
|
39
|
+
setTagsMacromolecule(col);
|
|
40
|
+
grok.shell.addTableView(df);
|
|
41
|
+
|
|
42
|
+
const endTime: number = Date.now();
|
|
43
|
+
const elapsedTime: number = endTime - startTime;
|
|
44
|
+
console.log(`Performance test: ${testName}: ${elapsedTime}ms`);
|
|
45
|
+
}
|
|
@@ -1,6 +1,8 @@
|
|
|
1
1
|
import * as OCL from 'openchemlib/full.js';
|
|
2
2
|
import * as grok from 'datagrok-api/grok';
|
|
3
3
|
|
|
4
|
+
import {RDModule} from '@datagrok-libraries/chem-meta/src/rdkit-api';
|
|
5
|
+
|
|
4
6
|
export async function getMacroMol(monomers: any[][]): Promise<string[]> {
|
|
5
7
|
let result: string[] = [];
|
|
6
8
|
const moduleRdkit = await grok.functions.call('Chem:getRdKitModule');
|
|
@@ -6,9 +6,12 @@ import {UnknownSeqPalette, UnknownSeqPalettes} from '@datagrok-libraries/bio/src
|
|
|
6
6
|
import {SplitterFunc, WebLogo} from '@datagrok-libraries/bio/src/viewers/web-logo';
|
|
7
7
|
import {SeqPalette} from '@datagrok-libraries/bio/src/seq-palettes';
|
|
8
8
|
import * as ui from 'datagrok-api/ui';
|
|
9
|
+
import {printLeftOrCentered, DrawStyle} from '@datagrok-libraries/bio/src/utils/cell-renderer';
|
|
9
10
|
|
|
10
11
|
const undefinedColor = 'rgb(100,100,100)';
|
|
11
|
-
const
|
|
12
|
+
const monomerToShortFunction: (amino: string, maxLengthOfMonomer: number) => string = WebLogo.monomerToShort;
|
|
13
|
+
const gapRenderer = 5;
|
|
14
|
+
|
|
12
15
|
|
|
13
16
|
function getPalleteByType(paletteType: string): SeqPalette {
|
|
14
17
|
switch (paletteType) {
|
|
@@ -44,81 +47,6 @@ export function processSequence(subParts: string[]): [string[], boolean] {
|
|
|
44
47
|
}
|
|
45
48
|
|
|
46
49
|
|
|
47
|
-
/**
|
|
48
|
-
* A function that prints a string aligned to left or centered.
|
|
49
|
-
*
|
|
50
|
-
* @param {number} x x coordinate.
|
|
51
|
-
* @param {number} y y coordinate.
|
|
52
|
-
* @param {number} w Width.
|
|
53
|
-
* @param {number} h Height.
|
|
54
|
-
* @param {CanvasRenderingContext2D} g Canvas rendering context.
|
|
55
|
-
* @param {string} s String to print.
|
|
56
|
-
* @param {string} [color=undefinedColor] String color.
|
|
57
|
-
* @param {number} [pivot=0] Pirvot.
|
|
58
|
-
* @param {boolean} [left=false] Is left aligned.
|
|
59
|
-
* @param {number} [transparencyRate=0.0] Transparency rate where 1.0 is fully transparent
|
|
60
|
-
* @param {string} [separator=''] Is separator for sequence.
|
|
61
|
-
* @param {boolean} [last=false] Is checker if element last or not.
|
|
62
|
-
* @return {number} x coordinate to start printing at.
|
|
63
|
-
*/
|
|
64
|
-
export function printLeftOrCentered(
|
|
65
|
-
x: number, y: number, w: number, h: number,
|
|
66
|
-
g: CanvasRenderingContext2D, s: string, color = undefinedColor,
|
|
67
|
-
pivot: number = 0, left = false, transparencyRate: number = 1.0,
|
|
68
|
-
separator: string = '', last: boolean = false, drawStyle: string = 'classic', maxWord: any = {}, maxWordIdx: number = 0, gridCell: any = {}): number {
|
|
69
|
-
g.textAlign = 'start';
|
|
70
|
-
const colorPart = s.substring(0);
|
|
71
|
-
let grayPart = last ? '' : separator;
|
|
72
|
-
if (drawStyle === 'msa') {
|
|
73
|
-
grayPart = '';
|
|
74
|
-
}
|
|
75
|
-
|
|
76
|
-
let textSize: any = g.measureText(colorPart + grayPart);
|
|
77
|
-
const indent = 5;
|
|
78
|
-
|
|
79
|
-
let maxColorTextSize = g.measureText(colorPart).width;
|
|
80
|
-
let colorTextSize = g.measureText(colorPart).width;
|
|
81
|
-
const dy = (textSize.fontBoundingBoxAscent + textSize.fontBoundingBoxDescent) / 2;
|
|
82
|
-
textSize = textSize.width;
|
|
83
|
-
if (drawStyle === 'msa') {
|
|
84
|
-
maxColorTextSize = maxWord[maxWordIdx];
|
|
85
|
-
textSize = maxWord[maxWordIdx];
|
|
86
|
-
if (maxColorTextSize > maxWord) {
|
|
87
|
-
maxWord[maxWordIdx] = maxColorTextSize;
|
|
88
|
-
gridCell.cell.column.temp = maxWord;
|
|
89
|
-
}
|
|
90
|
-
if (maxWordIdx > (maxWord['bio-maxIndex'] ?? 0)) {
|
|
91
|
-
maxWord['bio-maxIndex'] = maxWordIdx;
|
|
92
|
-
gridCell.cell.column.temp = maxWord;
|
|
93
|
-
}
|
|
94
|
-
}
|
|
95
|
-
|
|
96
|
-
function draw(dx1: number, dx2: number): void {
|
|
97
|
-
g.fillStyle = color;
|
|
98
|
-
g.globalAlpha = transparencyRate;
|
|
99
|
-
if (drawStyle === 'classic') {
|
|
100
|
-
g.fillText(colorPart, x + dx1, y + dy);
|
|
101
|
-
g.fillStyle = grayColor;
|
|
102
|
-
g.fillText(grayPart, x + dx2, y + dy);
|
|
103
|
-
}
|
|
104
|
-
if (drawStyle === 'msa') {
|
|
105
|
-
g.fillStyle = color;
|
|
106
|
-
g.fillText(colorPart, x + dx1 + ((maxWord[maxWordIdx] - colorTextSize) / 2), y + dy);
|
|
107
|
-
}
|
|
108
|
-
}
|
|
109
|
-
|
|
110
|
-
if (left || textSize > w) {
|
|
111
|
-
draw(indent, indent + maxColorTextSize);
|
|
112
|
-
return x + maxColorTextSize + g.measureText(grayPart).width;
|
|
113
|
-
|
|
114
|
-
} else {
|
|
115
|
-
const dx = (w - textSize) / 2;
|
|
116
|
-
draw(dx, dx + maxColorTextSize);
|
|
117
|
-
return x + dx + maxColorTextSize;
|
|
118
|
-
}
|
|
119
|
-
}
|
|
120
|
-
|
|
121
|
-
|
|
122
50
|
export class MacromoleculeSequenceCellRenderer extends DG.GridCellRenderer {
|
|
123
51
|
get name(): string { return 'sequence'; }
|
|
124
52
|
|
|
@@ -133,12 +61,9 @@ export class MacromoleculeSequenceCellRenderer extends DG.GridCellRenderer {
|
|
|
133
61
|
return;
|
|
134
62
|
}
|
|
135
63
|
const maxLengthWordsSum = gridCell.cell.column.temp['bio-sum-maxLengthWords'];
|
|
136
|
-
if (maxLengthWordsSum == null) {
|
|
137
|
-
gridCell.cell.column.setTag('.calculatedCellRender', 'unexist');
|
|
138
|
-
}
|
|
139
64
|
const maxIndex = gridCell.cell.column.temp['bio-maxIndex'];
|
|
140
65
|
//@ts-ignore
|
|
141
|
-
const argsX = e.layerX - gridCell.gridColumn.left
|
|
66
|
+
const argsX = e.layerX - gridCell.gridColumn.left + (gridCell.gridColumn.left - gridCell.bounds.x);
|
|
142
67
|
let left = 0;
|
|
143
68
|
let right = maxIndex;
|
|
144
69
|
let found = false;
|
|
@@ -164,7 +89,7 @@ export class MacromoleculeSequenceCellRenderer extends DG.GridCellRenderer {
|
|
|
164
89
|
const separator = gridCell.cell.column.getTag('separator') ?? '';
|
|
165
90
|
const splitterFunc: SplitterFunc = WebLogo.getSplitter('separator', separator);
|
|
166
91
|
const subParts: string[] = splitterFunc(gridCell.cell.value);
|
|
167
|
-
ui.tooltip.show(ui.div(subParts[left]), e.x + 16, e.y + 16);
|
|
92
|
+
(((subParts[left]?.length ?? 0) > 0)) ? ui.tooltip.show(ui.div(subParts[left]), e.x + 16, e.y + 16) : ui.tooltip.hide();
|
|
168
93
|
}
|
|
169
94
|
|
|
170
95
|
/**
|
|
@@ -186,6 +111,7 @@ export class MacromoleculeSequenceCellRenderer extends DG.GridCellRenderer {
|
|
|
186
111
|
const grid = gridCell.gridRow !== -1 ? gridCell.grid : undefined;
|
|
187
112
|
const cell = gridCell.cell;
|
|
188
113
|
const [type, subtype, paletteType] = gridCell.cell.column.getTag(DG.TAGS.UNITS).split(':');
|
|
114
|
+
const minDistanceRenderer = 50;
|
|
189
115
|
w = grid ? Math.min(grid.canvas.width - x, w) : g.canvas.width - x;
|
|
190
116
|
g.save();
|
|
191
117
|
g.beginPath();
|
|
@@ -201,25 +127,34 @@ export class MacromoleculeSequenceCellRenderer extends DG.GridCellRenderer {
|
|
|
201
127
|
const palette = getPalleteByType(paletteType);
|
|
202
128
|
|
|
203
129
|
const separator = gridCell.cell.column.getTag('separator') ?? '';
|
|
204
|
-
const
|
|
130
|
+
const splitLimit = gridCell.bounds.width / 5;
|
|
131
|
+
const splitterFunc: SplitterFunc = WebLogo.getSplitter(units, separator, gridCell.bounds.width / 5);
|
|
132
|
+
|
|
205
133
|
|
|
206
|
-
const
|
|
207
|
-
let monomerToShortFunction: (amino: string, maxLengthOfMonomer: number) => string = WebLogo.monomerToShort;
|
|
208
|
-
let maxLengthOfMonomer = 8;
|
|
134
|
+
const maxLengthOfMonomer = 8;
|
|
209
135
|
|
|
210
136
|
let maxLengthWords: any = {};
|
|
211
|
-
if (gridCell.cell.column.getTag('.calculatedCellRender') !==
|
|
212
|
-
|
|
213
|
-
|
|
137
|
+
if (gridCell.cell.column.getTag('.calculatedCellRender') !== splitLimit.toString()) {
|
|
138
|
+
let samples = 0;
|
|
139
|
+
while (samples < Math.min(gridCell.cell.column.length, 100)) {
|
|
140
|
+
let column = gridCell.cell.column.get(samples);
|
|
141
|
+
let subParts: string[] = splitterFunc(column);
|
|
214
142
|
subParts.forEach((amino, index) => {
|
|
215
|
-
let
|
|
216
|
-
if (
|
|
217
|
-
maxLengthWords[index] =
|
|
143
|
+
let textSize = monomerToShortFunction(amino, maxLengthOfMonomer).length * 7 + gapRenderer;
|
|
144
|
+
if (textSize > (maxLengthWords[index] ?? 0)) {
|
|
145
|
+
maxLengthWords[index] = textSize;
|
|
218
146
|
}
|
|
219
147
|
if (index > (maxLengthWords['bio-maxIndex'] ?? 0)) {
|
|
220
148
|
maxLengthWords['bio-maxIndex'] = index;
|
|
221
149
|
}
|
|
222
150
|
});
|
|
151
|
+
samples += 1;
|
|
152
|
+
}
|
|
153
|
+
let minLength = 3 * 7;
|
|
154
|
+
for (let i = 0; i <= maxLengthWords['bio-maxIndex']; i++) {
|
|
155
|
+
if (maxLengthWords[i] < minLength) {
|
|
156
|
+
maxLengthWords[i] = minLength;
|
|
157
|
+
}
|
|
223
158
|
}
|
|
224
159
|
let maxLengthWordSum: any = {};
|
|
225
160
|
maxLengthWordSum[0] = maxLengthWords[0];
|
|
@@ -231,7 +166,7 @@ export class MacromoleculeSequenceCellRenderer extends DG.GridCellRenderer {
|
|
|
231
166
|
'bio-maxIndex': maxLengthWords['bio-maxIndex'],
|
|
232
167
|
'bio-maxLengthWords': maxLengthWords
|
|
233
168
|
};
|
|
234
|
-
gridCell.cell.column.setTag('.calculatedCellRender',
|
|
169
|
+
gridCell.cell.column.setTag('.calculatedCellRender', splitLimit.toString());
|
|
235
170
|
} else {
|
|
236
171
|
maxLengthWords = gridCell.cell.column.temp['bio-maxLengthWords'];
|
|
237
172
|
}
|
|
@@ -239,15 +174,19 @@ export class MacromoleculeSequenceCellRenderer extends DG.GridCellRenderer {
|
|
|
239
174
|
const subParts: string[] = splitterFunc(cell.value);
|
|
240
175
|
let x1 = x;
|
|
241
176
|
let color = undefinedColor;
|
|
242
|
-
let drawStyle =
|
|
243
|
-
if (gridCell.cell.column.getTag('aligned').includes('MSA')) {
|
|
244
|
-
drawStyle =
|
|
177
|
+
let drawStyle = DrawStyle.classic;
|
|
178
|
+
if (gridCell.cell.column.getTag('aligned').includes('MSA') && gridCell.cell.column.getTag('units') === 'separator') {
|
|
179
|
+
drawStyle = DrawStyle.MSA;
|
|
245
180
|
}
|
|
246
|
-
subParts.
|
|
181
|
+
subParts.every((amino, index) => {
|
|
247
182
|
color = palette.get(amino);
|
|
248
183
|
g.fillStyle = undefinedColor;
|
|
249
184
|
let last = index === subParts.length - 1;
|
|
250
185
|
x1 = printLeftOrCentered(x1, y, w, h, g, monomerToShortFunction(amino, maxLengthOfMonomer), color, 0, true, 1.0, separator, last, drawStyle, maxLengthWords, index, gridCell);
|
|
186
|
+
if (x1 - minDistanceRenderer - gridCell.gridColumn.left + (gridCell.gridColumn.left - gridCell.bounds.x) > gridCell.bounds.width) {
|
|
187
|
+
return false;
|
|
188
|
+
}
|
|
189
|
+
return true;
|
|
251
190
|
});
|
|
252
191
|
|
|
253
192
|
g.restore();
|