@datagrok/bio 1.7.21 → 1.7.24
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/detectors.js +24 -12
- package/dist/package-test.js +415 -59
- package/dist/package.js +206 -58
- package/files/tests/testAlertCollection.csv +1252 -0
- package/package.json +8 -5
- package/src/__jest__/remote.test.ts +2 -2
- package/src/package-test.ts +1 -0
- package/src/package.ts +27 -12
- package/src/tests/WebLogo-positions-test.ts +78 -6
- package/src/tests/WebLogo-test.ts +1 -0
- package/src/tests/detectors-test.ts +9 -0
- package/src/tests/fasta-handler-test.ts +141 -0
- package/src/utils/cell-renderer.ts +77 -29
- package/src/utils/constants.ts +3 -4
- package/src/utils/sequence-activity-cliffs.ts +1 -1
- package/src/viewers/vd-regions-viewer.ts +17 -0
- package/test-Bio-4f0c8bae6479-367602e1.html +358 -0
- package/test-Bio-4f0c8bae6479-9ad1eb12.html +0 -346
package/package.json
CHANGED
|
@@ -1,8 +1,11 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@datagrok/bio",
|
|
3
|
-
"beta": false,
|
|
4
3
|
"friendlyName": "Bio",
|
|
5
|
-
"
|
|
4
|
+
"author": {
|
|
5
|
+
"name": "Leonid Stolbov",
|
|
6
|
+
"email": "lstolbov@datagrok.ai"
|
|
7
|
+
},
|
|
8
|
+
"version": "1.7.24",
|
|
6
9
|
"description": "Bio is a [package](https://datagrok.ai/help/develop/develop#packages) for the [Datagrok](https://datagrok.ai) platform",
|
|
7
10
|
"repository": {
|
|
8
11
|
"type": "git",
|
|
@@ -11,9 +14,9 @@
|
|
|
11
14
|
},
|
|
12
15
|
"dependencies": {
|
|
13
16
|
"@biowasm/aioli": ">=2.4.0",
|
|
14
|
-
"@datagrok-libraries/bio": "^3.0.
|
|
15
|
-
"@datagrok-libraries/ml": "^3.0.
|
|
16
|
-
"@datagrok-libraries/utils": "^1.4
|
|
17
|
+
"@datagrok-libraries/bio": "^3.0.3",
|
|
18
|
+
"@datagrok-libraries/ml": "^3.0.3",
|
|
19
|
+
"@datagrok-libraries/utils": "^1.5.4",
|
|
17
20
|
"cash-dom": "latest",
|
|
18
21
|
"datagrok-api": "^1.5.0",
|
|
19
22
|
"dayjs": "^1.11.4",
|
|
@@ -51,10 +51,10 @@ it('TEST', async () => {
|
|
|
51
51
|
let failReport = '';
|
|
52
52
|
for (let i = 0; i < df.rowCount; i++) {
|
|
53
53
|
if (cStatus.get(i)) {
|
|
54
|
-
passReport += `Test result : ${targetPackage}.${cCat.get(i)}.${cName.get(i)} : ${cMessage.get(i)}\n`;
|
|
54
|
+
passReport += `Test result : Success : ${targetPackage}.${cCat.get(i)}.${cName.get(i)} : ${cMessage.get(i)}\n`;
|
|
55
55
|
} else {
|
|
56
56
|
failed = true;
|
|
57
|
-
failReport += `Test result : ${targetPackage}.${cCat.get(i)}.${cName.get(i)} : ${cMessage.get(i)}\n`;
|
|
57
|
+
failReport += `Test result : Failed : ${targetPackage}.${cCat.get(i)}.${cName.get(i)} : ${cMessage.get(i)}\n`;
|
|
58
58
|
}
|
|
59
59
|
}
|
|
60
60
|
resolve({failReport, passReport, failed});
|
package/src/package-test.ts
CHANGED
|
@@ -11,6 +11,7 @@ import './tests/activity-cliffs-tests';
|
|
|
11
11
|
import './tests/splitters-test';
|
|
12
12
|
import './tests/renderers-test';
|
|
13
13
|
import './tests/convert-test';
|
|
14
|
+
import './tests/fasta-handler-test';
|
|
14
15
|
import './tests/WebLogo-positions-test';
|
|
15
16
|
|
|
16
17
|
export const _package = new DG.Package();
|
package/src/package.ts
CHANGED
|
@@ -5,7 +5,7 @@ import * as DG from 'datagrok-api/dg';
|
|
|
5
5
|
|
|
6
6
|
export const _package = new DG.Package();
|
|
7
7
|
|
|
8
|
-
import {
|
|
8
|
+
import {MacromoleculeDifferenceCellRenderer, MonomerCellRenderer} from './utils/cell-renderer';
|
|
9
9
|
import {WebLogo, SeqColStats} from '@datagrok-libraries/bio/src/viewers/web-logo';
|
|
10
10
|
import {VdRegionsViewer} from './viewers/vd-regions-viewer';
|
|
11
11
|
import {runKalign, testMSAEnoughMemory} from './utils/multiple-sequence-alignment';
|
|
@@ -24,6 +24,7 @@ import {lru} from './utils/cell-renderer';
|
|
|
24
24
|
import {representationsWidget} from './widgets/representations';
|
|
25
25
|
import {UnitsHandler} from '@datagrok-libraries/bio/src/utils/units-handler';
|
|
26
26
|
import {FastaFileHandler} from '@datagrok-libraries/bio/src/utils/fasta-handler';
|
|
27
|
+
import {removeEmptyStringRows} from '@datagrok-libraries/utils/src/dataframe-utils'
|
|
27
28
|
|
|
28
29
|
|
|
29
30
|
//tags: init
|
|
@@ -161,16 +162,22 @@ export async function sequenceSpaceTopMenu(table: DG.DataFrame, macroMolecule: D
|
|
|
161
162
|
if (!encodedCol)
|
|
162
163
|
return;
|
|
163
164
|
const embedColsNames = getEmbeddingColsNames(table);
|
|
165
|
+
const withoutEmptyValues = DG.DataFrame.fromColumns([macroMolecule]).clone();
|
|
166
|
+
const emptyValsIdxs = removeEmptyStringRows(withoutEmptyValues, encodedCol);
|
|
167
|
+
|
|
164
168
|
const chemSpaceParams = {
|
|
165
|
-
seqCol:
|
|
169
|
+
seqCol: withoutEmptyValues.col(macroMolecule.name)!,
|
|
166
170
|
methodName: methodName,
|
|
167
171
|
similarityMetric: similarityMetric,
|
|
168
172
|
embedAxesNames: embedColsNames
|
|
169
173
|
};
|
|
170
174
|
const sequenceSpaceRes = await sequenceSpace(chemSpaceParams);
|
|
171
175
|
const embeddings = sequenceSpaceRes.coordinates;
|
|
172
|
-
for (const col of embeddings)
|
|
173
|
-
|
|
176
|
+
for (const col of embeddings) {
|
|
177
|
+
const listValues = col.toList();
|
|
178
|
+
emptyValsIdxs.forEach((ind: number) => listValues.splice(ind, 0, null));
|
|
179
|
+
table.columns.add(DG.Column.fromFloat32Array(col.name, listValues));
|
|
180
|
+
}
|
|
174
181
|
if (plotEmbeddings) {
|
|
175
182
|
for (const v of grok.shell.views) {
|
|
176
183
|
if (v.name === table.name)
|
|
@@ -241,6 +248,14 @@ export async function multipleSequenceAlignmentAny(table: DG.DataFrame, col: DG.
|
|
|
241
248
|
return msaCol;
|
|
242
249
|
}
|
|
243
250
|
|
|
251
|
+
//name: Bio | MSA
|
|
252
|
+
//tags: bio, panel
|
|
253
|
+
//input: column sequence { semType: Macromolecule }
|
|
254
|
+
//output: column result
|
|
255
|
+
export async function panelMSA(col: DG.Column): Promise<DG.Column | null> {
|
|
256
|
+
return multipleSequenceAlignmentAny(col.dataFrame, col);
|
|
257
|
+
}
|
|
258
|
+
|
|
244
259
|
//name: Composition Analysis
|
|
245
260
|
//top-menu: Bio | Composition Analysis
|
|
246
261
|
//output: viewer result
|
|
@@ -334,20 +349,20 @@ export function convertPanel(col: DG.Column): void {
|
|
|
334
349
|
convert(col);
|
|
335
350
|
}
|
|
336
351
|
|
|
337
|
-
//name:
|
|
352
|
+
//name: monomerCellRenderer
|
|
338
353
|
//tags: cellRenderer
|
|
339
|
-
//meta.cellType:
|
|
354
|
+
//meta.cellType: Monomer
|
|
340
355
|
//output: grid_cell_renderer result
|
|
341
|
-
export function
|
|
342
|
-
return new
|
|
356
|
+
export function monomerCellRenderer(): MonomerCellRenderer {
|
|
357
|
+
return new MonomerCellRenderer();
|
|
343
358
|
}
|
|
344
359
|
|
|
345
|
-
//name:
|
|
360
|
+
//name: MacromoleculeDifferenceCellRenderer
|
|
346
361
|
//tags: cellRenderer
|
|
347
|
-
//meta.cellType:
|
|
362
|
+
//meta.cellType: MacromoleculeDifference
|
|
348
363
|
//output: grid_cell_renderer result
|
|
349
|
-
export function
|
|
350
|
-
return new
|
|
364
|
+
export function macromoleculeDifferenceCellRenderer(): MacromoleculeDifferenceCellRenderer {
|
|
365
|
+
return new MacromoleculeDifferenceCellRenderer();
|
|
351
366
|
}
|
|
352
367
|
|
|
353
368
|
//name: testDetectMacromolecule
|
|
@@ -1,13 +1,14 @@
|
|
|
1
|
+
|
|
1
2
|
import {after, before, category, test, expect, expectObject} from '@datagrok-libraries/utils/src/test';
|
|
2
3
|
|
|
3
4
|
import * as grok from 'datagrok-api/grok';
|
|
4
5
|
import * as ui from 'datagrok-api/ui';
|
|
5
6
|
import * as DG from 'datagrok-api/dg';
|
|
6
7
|
import {PositionInfo, PositionMonomerInfo, WebLogo} from '@datagrok-libraries/bio/src/viewers/web-logo';
|
|
7
|
-
|
|
8
8
|
category('WebLogo-positions', () => {
|
|
9
9
|
let tvList: DG.TableView[];
|
|
10
10
|
let dfList: DG.DataFrame[];
|
|
11
|
+
let currentView: DG.View;
|
|
11
12
|
|
|
12
13
|
const csvDf1 = `seq
|
|
13
14
|
ATC-G-TTGC--
|
|
@@ -22,13 +23,14 @@ category('WebLogo-positions', () => {
|
|
|
22
23
|
before(async () => {
|
|
23
24
|
tvList = [];
|
|
24
25
|
dfList = [];
|
|
26
|
+
currentView = grok.shell.tv;
|
|
25
27
|
});
|
|
26
28
|
|
|
27
29
|
after(async () => {
|
|
28
|
-
dfList.forEach((df: DG.DataFrame) => { grok.shell.closeTable(df);
|
|
30
|
+
dfList.forEach((df: DG.DataFrame) => { grok.shell.closeTable(df);});
|
|
29
31
|
tvList.forEach((tv: DG.TableView) => tv.close());
|
|
32
|
+
currentView = grok.shell.tv;
|
|
30
33
|
});
|
|
31
|
-
|
|
32
34
|
test('allPositions', async () => {
|
|
33
35
|
const df: DG.DataFrame = DG.DataFrame.fromCsv(csvDf1);
|
|
34
36
|
const tv: DG.TableView = grok.shell.addTableView(df);
|
|
@@ -55,12 +57,82 @@ category('WebLogo-positions', () => {
|
|
|
55
57
|
new PositionInfo('11', {'-': new PositionMonomerInfo(5)}),
|
|
56
58
|
new PositionInfo('12', {'-': new PositionMonomerInfo(5)})
|
|
57
59
|
];
|
|
60
|
+
console.log(positions);
|
|
61
|
+
expect(positions.length,resAllDf1.length);
|
|
58
62
|
// check all positions are equal resAllDf1
|
|
59
63
|
for (let i = 0; i < positions.length; i++) {
|
|
60
64
|
expect(positions[i].name, resAllDf1[i].name);
|
|
61
|
-
|
|
62
|
-
|
|
63
|
-
|
|
65
|
+
for (const key in positions[i].freq) {
|
|
66
|
+
expect(positions[i].freq[key].count, resAllDf1[i].freq[key].count);
|
|
67
|
+
}
|
|
68
|
+
}
|
|
69
|
+
|
|
70
|
+
});
|
|
71
|
+
test('positions with shrinkEmptyTail option', async () => {
|
|
72
|
+
const df: DG.DataFrame = DG.DataFrame.fromCsv(csvDf1);
|
|
73
|
+
const tv: DG.TableView = grok.shell.addTableView(df);
|
|
74
|
+
|
|
75
|
+
const wlViewer: WebLogo = await df.plot.fromType('WebLogo', {'shrinkEmptyTail': true}) as unknown as WebLogo;
|
|
76
|
+
tv.dockManager.dock(wlViewer.root, DG.DOCK_TYPE.DOWN);
|
|
77
|
+
|
|
78
|
+
tvList.push(tv);
|
|
79
|
+
dfList.push(df);
|
|
80
|
+
|
|
81
|
+
const positions: PositionInfo[] = wlViewer['positions'];
|
|
82
|
+
|
|
83
|
+
const resAllDf1: PositionInfo[] = [
|
|
84
|
+
new PositionInfo('1', {'A': new PositionMonomerInfo(2), '-': new PositionMonomerInfo(3)}),
|
|
85
|
+
new PositionInfo('2', {'T': new PositionMonomerInfo(5)}),
|
|
86
|
+
new PositionInfo('3', {'C': new PositionMonomerInfo(5)}),
|
|
87
|
+
new PositionInfo('4', {'-': new PositionMonomerInfo(5)}),
|
|
88
|
+
new PositionInfo('5', {'G': new PositionMonomerInfo(5)}),
|
|
89
|
+
new PositionInfo('6', {'-': new PositionMonomerInfo(3), 'C': new PositionMonomerInfo(2)}),
|
|
90
|
+
new PositionInfo('7', {'T': new PositionMonomerInfo(5)}),
|
|
91
|
+
new PositionInfo('8', {'T': new PositionMonomerInfo(5)}),
|
|
92
|
+
new PositionInfo('9', {'G': new PositionMonomerInfo(5)}),
|
|
93
|
+
new PositionInfo('10', {'C': new PositionMonomerInfo(5)})
|
|
94
|
+
];
|
|
95
|
+
|
|
96
|
+
console.log(positions);
|
|
97
|
+
for (let i = 0; i < positions.length; i++) {
|
|
98
|
+
expect(positions[i].name, resAllDf1[i].name);
|
|
99
|
+
for (const key in positions[i].freq) {
|
|
100
|
+
expect(positions[i].freq[key].count, resAllDf1[i].freq[key].count);
|
|
101
|
+
}
|
|
102
|
+
}
|
|
103
|
+
|
|
104
|
+
});
|
|
105
|
+
|
|
106
|
+
test('positions with skipEmptyPositions option', async () => {
|
|
107
|
+
const df: DG.DataFrame = DG.DataFrame.fromCsv(csvDf1);
|
|
108
|
+
const tv: DG.TableView = grok.shell.addTableView(df);
|
|
109
|
+
|
|
110
|
+
const wlViewer: WebLogo = await df.plot.fromType('WebLogo', {'skipEmptyPositions': false}) as unknown as WebLogo;
|
|
111
|
+
tv.dockManager.dock(wlViewer.root, DG.DOCK_TYPE.DOWN);
|
|
112
|
+
|
|
113
|
+
tvList.push(tv);
|
|
114
|
+
dfList.push(df);
|
|
115
|
+
|
|
116
|
+
const positions: PositionInfo[] = wlViewer['positions'];
|
|
117
|
+
|
|
118
|
+
const resAllDf1: PositionInfo[] = [
|
|
119
|
+
new PositionInfo('1', {'A': new PositionMonomerInfo(2), '-': new PositionMonomerInfo(3)}),
|
|
120
|
+
new PositionInfo('2', {'T': new PositionMonomerInfo(5)}),
|
|
121
|
+
new PositionInfo('3', {'C': new PositionMonomerInfo(5)}),
|
|
122
|
+
new PositionInfo('5', {'G': new PositionMonomerInfo(5)}),
|
|
123
|
+
new PositionInfo('6', {'-': new PositionMonomerInfo(3), 'C': new PositionMonomerInfo(2)}),
|
|
124
|
+
new PositionInfo('7', {'T': new PositionMonomerInfo(5)}),
|
|
125
|
+
new PositionInfo('8', {'T': new PositionMonomerInfo(5)}),
|
|
126
|
+
new PositionInfo('9', {'G': new PositionMonomerInfo(5)}),
|
|
127
|
+
new PositionInfo('10', {'C': new PositionMonomerInfo(5)})
|
|
128
|
+
];
|
|
129
|
+
console.log(positions);
|
|
130
|
+
|
|
131
|
+
for (let i = 0; i < positions.length; i++) {
|
|
132
|
+
expect(positions[i].name, resAllDf1[i].name);
|
|
133
|
+
for (const key in positions[i].freq) {
|
|
134
|
+
expect(positions[i].freq[key].count, resAllDf1[i].freq[key].count);
|
|
135
|
+
}
|
|
64
136
|
}
|
|
65
137
|
|
|
66
138
|
});
|
|
@@ -125,6 +125,7 @@ MWRSWY-CKHP
|
|
|
125
125
|
testSpgi100 = 'testSpgi100',
|
|
126
126
|
testUnichemSources = 'testUnichemSources',
|
|
127
127
|
testDmvOffices = 'testDmvOffices',
|
|
128
|
+
testAlertCollection = 'testAlertCollection',
|
|
128
129
|
}
|
|
129
130
|
|
|
130
131
|
const samples: { [key: string]: string } = {
|
|
@@ -145,6 +146,7 @@ MWRSWY-CKHP
|
|
|
145
146
|
'testSpgi100': 'System:AppData/Bio/tests/testSpgi100.csv',
|
|
146
147
|
'testUnichemSources': 'System:AppData/Bio/tests/testUnichemSources.csv',
|
|
147
148
|
'testDmvOffices': 'System:AppData/Bio/tests/testDmvOffices.csv',
|
|
149
|
+
'testAlertCollection': 'System:AppData/Bio/tests/testAlertCollection.csv',
|
|
148
150
|
};
|
|
149
151
|
|
|
150
152
|
const _samplesDfs: { [key: string]: Promise<DG.DataFrame> } = {};
|
|
@@ -342,6 +344,9 @@ MWRSWY-CKHP
|
|
|
342
344
|
test('samplesTestSpgi100NegativePrimaryScaffoldName', async () => {
|
|
343
345
|
await _testNeg(readSamples(Samples.testSpgi100), 'Primary Scaffold Name');
|
|
344
346
|
});
|
|
347
|
+
test('samplesTestSpgi100NegativeSampleName', async () => {
|
|
348
|
+
await _testNeg(readSamples(Samples.testSpgi100), 'Sample Name');
|
|
349
|
+
});
|
|
345
350
|
|
|
346
351
|
test('samplesTestUnichemSourcesNegativeSrcUrl', async () => {
|
|
347
352
|
await _testNeg(readSamples(Samples.testUnichemSources), 'src_url');
|
|
@@ -356,6 +361,10 @@ MWRSWY-CKHP
|
|
|
356
361
|
test('samplesTestDmvOfficesNegativeCity', async () => {
|
|
357
362
|
await _testNeg(readSamples(Samples.testDmvOffices), 'City');
|
|
358
363
|
});
|
|
364
|
+
|
|
365
|
+
test('samplesTestAlertCollectionNegativeSmarts', async () => {
|
|
366
|
+
await _testNeg(readSamples(Samples.testAlertCollection), 'smarts');
|
|
367
|
+
});
|
|
359
368
|
});
|
|
360
369
|
|
|
361
370
|
export async function _testNeg(readDf: DfReaderFunc, colName: string) {
|
|
@@ -0,0 +1,141 @@
|
|
|
1
|
+
/* Do not change these import lines to match external modules in webpack configuration */
|
|
2
|
+
import * as grok from 'datagrok-api/grok';
|
|
3
|
+
import * as ui from 'datagrok-api/ui';
|
|
4
|
+
import * as DG from 'datagrok-api/dg';
|
|
5
|
+
|
|
6
|
+
import {category, expectArray, test} from '@datagrok-libraries/utils/src/test';
|
|
7
|
+
import {FastaFileHandler} from '@datagrok-libraries/bio/src/utils/fasta-handler';
|
|
8
|
+
import {UnitsHandler} from '@datagrok-libraries/bio/src/utils/units-handler';
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
category('fastaFileHandler', () => {
|
|
12
|
+
const fastaNormalFormatting = `>description:1
|
|
13
|
+
MDYKETLLMPKTDFPMRGGLPNKEPQIQEKW
|
|
14
|
+
|
|
15
|
+
>description:2
|
|
16
|
+
MIEVFLFGIVLGLIPITLAGLFVTAYLQYRRGDQLDL
|
|
17
|
+
|
|
18
|
+
>description:3
|
|
19
|
+
MMELVLKTIIGPIVVGVVLRIVDKWLNKDK
|
|
20
|
+
|
|
21
|
+
>description:4
|
|
22
|
+
MDRTDEVSNHTHDKPTLTWFEEIFEEYHSPFHN
|
|
23
|
+
`;
|
|
24
|
+
|
|
25
|
+
const fastaExtraSpaces = `>description:1
|
|
26
|
+
MDYKETLLMPKTDFPMRGGLPNKEPQIQEKW
|
|
27
|
+
|
|
28
|
+
>description:2
|
|
29
|
+
MI EVF LFGIVLGLI PITLAGLFVTAY LQYRRGDQLDL
|
|
30
|
+
|
|
31
|
+
>description:3
|
|
32
|
+
M MELVLKTI IGPI VVGVVLR IVDKWLNKDK
|
|
33
|
+
|
|
34
|
+
>description:4
|
|
35
|
+
MDR TDEVSNHTHDKP TLTWFEEIFEEYHSPFHN
|
|
36
|
+
`;
|
|
37
|
+
|
|
38
|
+
const fastaExtraNewlines = `>description:1
|
|
39
|
+
|
|
40
|
+
MDYKETLLMPKTDFPMRGGLPNKEPQIQEKW
|
|
41
|
+
|
|
42
|
+
>description:2
|
|
43
|
+
MIEVF
|
|
44
|
+
LFGIVLGLI
|
|
45
|
+
PITLAGLFVTA
|
|
46
|
+
YLQYRRGDQLDL
|
|
47
|
+
|
|
48
|
+
>description:3
|
|
49
|
+
M
|
|
50
|
+
ME
|
|
51
|
+
|
|
52
|
+
LVLKTIIG
|
|
53
|
+
|
|
54
|
+
PIVVGVVLRI
|
|
55
|
+
VDKWLNKDK
|
|
56
|
+
|
|
57
|
+
|
|
58
|
+
>description:4
|
|
59
|
+
|
|
60
|
+
MDRT
|
|
61
|
+
|
|
62
|
+
DEVSNHTHDKP
|
|
63
|
+
|
|
64
|
+
TLTWFEEIFEE
|
|
65
|
+
|
|
66
|
+
|
|
67
|
+
|
|
68
|
+
YHSPFHN
|
|
69
|
+
`;
|
|
70
|
+
// a "broken" fasta file
|
|
71
|
+
// const fastaBroken = `
|
|
72
|
+
|
|
73
|
+
// >description:1
|
|
74
|
+
// MDYKETLLM
|
|
75
|
+
// PKTDFPMRGGLPN
|
|
76
|
+
// KEPQIQEKW
|
|
77
|
+
|
|
78
|
+
|
|
79
|
+
|
|
80
|
+
// >description:2
|
|
81
|
+
// MIEVFL FGIVLGLIPI TLAGLFVTAYLQYRRGDQLDL
|
|
82
|
+
|
|
83
|
+
// >description:3
|
|
84
|
+
|
|
85
|
+
// M
|
|
86
|
+
// MELVLKTIIGP
|
|
87
|
+
// IVVGVVLR
|
|
88
|
+
// IVDKWLNKD
|
|
89
|
+
|
|
90
|
+
// K
|
|
91
|
+
|
|
92
|
+
// >description:4
|
|
93
|
+
// MDRTDEV
|
|
94
|
+
|
|
95
|
+
// SNHTHDKP
|
|
96
|
+
// TLTWFEEI
|
|
97
|
+
// FEE
|
|
98
|
+
|
|
99
|
+
// YHSPFHN
|
|
100
|
+
|
|
101
|
+
|
|
102
|
+
// `;
|
|
103
|
+
|
|
104
|
+
const descriptionsArray = [
|
|
105
|
+
'description:1', 'description:2', 'description:3', 'description:4',
|
|
106
|
+
];
|
|
107
|
+
const descriptionCol = DG.Column.fromStrings('description', descriptionsArray);
|
|
108
|
+
|
|
109
|
+
const sequencesArray = [
|
|
110
|
+
'MDYKETLLMPKTDFPMRGGLPNKEPQIQEKW',
|
|
111
|
+
'MIEVFLFGIVLGLIPITLAGLFVTAYLQYRRGDQLDL',
|
|
112
|
+
'MMELVLKTIIGPIVVGVVLRIVDKWLNKDK',
|
|
113
|
+
'MDRTDEVSNHTHDKPTLTWFEEIFEEYHSPFHN',
|
|
114
|
+
];
|
|
115
|
+
const sequencesCol = DG.Column.fromStrings('sequence', sequencesArray);
|
|
116
|
+
sequencesCol.semType = DG.SEMTYPE.MACROMOLECULE;
|
|
117
|
+
UnitsHandler.setUnitsToFastaColumn(sequencesCol);
|
|
118
|
+
|
|
119
|
+
const fastaDf = DG.DataFrame.fromColumns([descriptionCol, sequencesCol]);
|
|
120
|
+
|
|
121
|
+
function _testColumnsParser(inputFasta: string) {
|
|
122
|
+
const ffh = new FastaFileHandler(inputFasta);
|
|
123
|
+
const parsedDescriptionsArray = ffh.descriptionsArray;
|
|
124
|
+
const parsedSequencesArray = ffh.sequencesArray;
|
|
125
|
+
expectArray(
|
|
126
|
+
[parsedDescriptionsArray, parsedSequencesArray],
|
|
127
|
+
[descriptionsArray, sequencesArray]
|
|
128
|
+
);
|
|
129
|
+
}
|
|
130
|
+
|
|
131
|
+
// test parser
|
|
132
|
+
test('testNormalFormatting', async () => {
|
|
133
|
+
_testColumnsParser(fastaNormalFormatting);
|
|
134
|
+
});
|
|
135
|
+
test('testExtraSpaces', async () => {
|
|
136
|
+
_testColumnsParser(fastaExtraSpaces);
|
|
137
|
+
});
|
|
138
|
+
test('testExtraNewlines', async () => {
|
|
139
|
+
_testColumnsParser(fastaExtraNewlines);
|
|
140
|
+
});
|
|
141
|
+
});
|
|
@@ -2,7 +2,7 @@ import * as C from './constants';
|
|
|
2
2
|
import * as DG from 'datagrok-api/dg';
|
|
3
3
|
import {AminoacidsPalettes} from '@datagrok-libraries/bio/src/aminoacids';
|
|
4
4
|
import {NucleotidesPalettes} from '@datagrok-libraries/bio/src/nucleotides';
|
|
5
|
-
import {UnknownSeqPalettes} from '@datagrok-libraries/bio/src/unknown';
|
|
5
|
+
import {UnknownSeqPalette, UnknownSeqPalettes} from '@datagrok-libraries/bio/src/unknown';
|
|
6
6
|
import {SplitterFunc, WebLogo} from '@datagrok-libraries/bio/src/viewers/web-logo';
|
|
7
7
|
import {SeqPalette} from '@datagrok-libraries/bio/src/seq-palettes';
|
|
8
8
|
import * as ui from 'datagrok-api/ui';
|
|
@@ -44,6 +44,7 @@ export function processSequence(subParts: string[]): [string[], boolean] {
|
|
|
44
44
|
return [text, simplified];
|
|
45
45
|
}
|
|
46
46
|
|
|
47
|
+
|
|
47
48
|
/**
|
|
48
49
|
* A function that prints a string aligned to left or centered.
|
|
49
50
|
*
|
|
@@ -65,33 +66,46 @@ function printLeftOrCentered(
|
|
|
65
66
|
x: number, y: number, w: number, h: number,
|
|
66
67
|
g: CanvasRenderingContext2D, s: string, color = undefinedColor,
|
|
67
68
|
pivot: number = 0, left = false, transparencyRate: number = 1.0,
|
|
68
|
-
separator: string = '', last: boolean = false): number {
|
|
69
|
+
separator: string = '', last: boolean = false, drawStyle: string = 'classic', maxWord: any = {}, maxWordIdx: number = 0, gridCell: any = {}): number {
|
|
69
70
|
g.textAlign = 'start';
|
|
70
71
|
const colorPart = s.substring(0);
|
|
71
|
-
let grayPart =
|
|
72
|
+
let grayPart = last ? '' : separator;
|
|
73
|
+
if (drawStyle === 'msa') {
|
|
74
|
+
grayPart = ' ';
|
|
75
|
+
}
|
|
72
76
|
|
|
73
|
-
|
|
77
|
+
let textSize: any = g.measureText(colorPart + grayPart);
|
|
74
78
|
const indent = 5;
|
|
75
79
|
|
|
76
|
-
|
|
80
|
+
let colorTextSize = g.measureText(colorPart).width;
|
|
77
81
|
const dy = (textSize.fontBoundingBoxAscent + textSize.fontBoundingBoxDescent) / 2;
|
|
82
|
+
textSize = textSize.width;
|
|
83
|
+
if (drawStyle === 'msa') {
|
|
84
|
+
if (colorTextSize > maxWord) {
|
|
85
|
+
maxWord[maxWordIdx] = colorTextSize;
|
|
86
|
+
gridCell.cell.column.temp = maxWord;
|
|
87
|
+
}
|
|
88
|
+
colorTextSize = maxWord[maxWordIdx];
|
|
89
|
+
textSize = maxWord[maxWordIdx];
|
|
90
|
+
}
|
|
78
91
|
|
|
79
92
|
function draw(dx1: number, dx2: number): void {
|
|
80
93
|
g.fillStyle = color;
|
|
81
94
|
g.globalAlpha = transparencyRate;
|
|
82
95
|
g.fillText(colorPart, x + dx1, y + dy);
|
|
83
|
-
|
|
84
|
-
|
|
96
|
+
if (drawStyle === 'classic') {
|
|
97
|
+
g.fillStyle = grayColor;
|
|
98
|
+
g.fillText(grayPart, x + dx2, y + dy);
|
|
99
|
+
}
|
|
85
100
|
}
|
|
86
101
|
|
|
87
|
-
|
|
88
|
-
|
|
89
|
-
|
|
90
|
-
return x + colorTextSize.width + g.measureText(grayPart).width;
|
|
102
|
+
if (left || textSize > w) {
|
|
103
|
+
draw(indent, indent + colorTextSize);
|
|
104
|
+
return x + colorTextSize + g.measureText(grayPart).width;
|
|
91
105
|
} else {
|
|
92
|
-
const dx = (w - textSize
|
|
93
|
-
draw(dx, dx + colorTextSize
|
|
94
|
-
return x + dx + colorTextSize
|
|
106
|
+
const dx = (w - textSize) / 2;
|
|
107
|
+
draw(dx, dx + colorTextSize);
|
|
108
|
+
return x + dx + colorTextSize;
|
|
95
109
|
}
|
|
96
110
|
}
|
|
97
111
|
|
|
@@ -114,7 +128,7 @@ function findMonomers(helmString: string) {
|
|
|
114
128
|
export class MacromoleculeSequenceCellRenderer extends DG.GridCellRenderer {
|
|
115
129
|
get name(): string { return 'macromoleculeSequence'; }
|
|
116
130
|
|
|
117
|
-
get cellType(): string { return C.SEM_TYPES.
|
|
131
|
+
get cellType(): string { return C.SEM_TYPES.MACROMOLECULE; }
|
|
118
132
|
|
|
119
133
|
get defaultHeight(): number { return 30; }
|
|
120
134
|
|
|
@@ -198,15 +212,45 @@ export class MacromoleculeSequenceCellRenderer extends DG.GridCellRenderer {
|
|
|
198
212
|
const separator = gridCell.cell.column.getTag('separator') ?? '';
|
|
199
213
|
const splitterFunc: SplitterFunc = WebLogo.getSplitter(units, gridCell.cell.column.getTag('separator'));
|
|
200
214
|
|
|
215
|
+
const columns = gridCell.cell.column.categories;
|
|
216
|
+
let monomerToShortFunction: (amino: string, maxLengthOfMonomer: number) => string = WebLogo.monomerToShort;
|
|
217
|
+
let maxLengthOfMonomer = 8;
|
|
218
|
+
|
|
219
|
+
let maxLengthWords = {};
|
|
220
|
+
// check if gridCell.cell.column.temp is array
|
|
221
|
+
if (gridCell.cell.column.getTag('.calculatedCellRender') !== 'exist') {
|
|
222
|
+
for (let i = 0; i < columns.length; i++) {
|
|
223
|
+
let subParts: string[] = splitterFunc(columns[i]);
|
|
224
|
+
subParts.forEach((amino, index) => {
|
|
225
|
+
//@ts-ignore
|
|
226
|
+
let textSizeWidth = g.measureText(monomerToShortFunction(amino, maxLengthOfMonomer));
|
|
227
|
+
//@ts-ignore
|
|
228
|
+
if (textSizeWidth.width > (maxLengthWords[index] ?? 0)) {
|
|
229
|
+
//@ts-ignore
|
|
230
|
+
maxLengthWords[index] = textSizeWidth.width;
|
|
231
|
+
}
|
|
232
|
+
});
|
|
233
|
+
}
|
|
234
|
+
gridCell.cell.column.temp = maxLengthWords;
|
|
235
|
+
gridCell.cell.column.setTag('.calculatedCellRender', 'exist');
|
|
236
|
+
} else {
|
|
237
|
+
maxLengthWords = gridCell.cell.column.temp;
|
|
238
|
+
}
|
|
239
|
+
|
|
201
240
|
const subParts: string[] = splitterFunc(cell.value);
|
|
202
|
-
// console.log(subParts);
|
|
203
241
|
let x1 = x;
|
|
204
242
|
let color = undefinedColor;
|
|
243
|
+
// get max length word in subParts
|
|
244
|
+
let tagUnits = gridCell.cell.column.getTag(DG.TAGS.UNITS);
|
|
245
|
+
let drawStyle = 'classic';
|
|
246
|
+
if (tagUnits.includes('MSA')) {
|
|
247
|
+
drawStyle = 'msa';
|
|
248
|
+
}
|
|
205
249
|
subParts.forEach((amino, index) => {
|
|
206
250
|
color = palette.get(amino);
|
|
207
251
|
g.fillStyle = undefinedColor;
|
|
208
252
|
let last = index === subParts.length - 1;
|
|
209
|
-
x1 = printLeftOrCentered(x1, y, w, h, g, amino, color, 0, true, 1.0, separator, last);
|
|
253
|
+
x1 = printLeftOrCentered(x1, y, w, h, g, monomerToShortFunction(amino, maxLengthOfMonomer), color, 0, true, 1.0, separator, last, drawStyle, maxLengthWords, index, gridCell);
|
|
210
254
|
});
|
|
211
255
|
|
|
212
256
|
g.restore();
|
|
@@ -215,11 +259,10 @@ export class MacromoleculeSequenceCellRenderer extends DG.GridCellRenderer {
|
|
|
215
259
|
}
|
|
216
260
|
}
|
|
217
261
|
|
|
262
|
+
export class MonomerCellRenderer extends DG.GridCellRenderer {
|
|
263
|
+
get name(): string {return 'MonomerCR';}
|
|
218
264
|
|
|
219
|
-
|
|
220
|
-
get name(): string {return 'aminoAcidsCR';}
|
|
221
|
-
|
|
222
|
-
get cellType(): string {return C.SEM_TYPES.AMINO_ACIDS;}
|
|
265
|
+
get cellType(): string {return C.SEM_TYPES.MONOMER;}
|
|
223
266
|
|
|
224
267
|
get defaultHeight(): number {return 15;}
|
|
225
268
|
|
|
@@ -256,10 +299,10 @@ export class AminoAcidsCellRenderer extends DG.GridCellRenderer {
|
|
|
256
299
|
}
|
|
257
300
|
}
|
|
258
301
|
|
|
259
|
-
export class
|
|
260
|
-
get name(): string {return '
|
|
302
|
+
export class MacromoleculeDifferenceCellRenderer extends DG.GridCellRenderer {
|
|
303
|
+
get name(): string {return 'MacromoleculeDifferenceCR';}
|
|
261
304
|
|
|
262
|
-
get cellType(): string {return C.SEM_TYPES.
|
|
305
|
+
get cellType(): string {return C.SEM_TYPES.MACROMOLECULE_DIFFERENCE;}
|
|
263
306
|
|
|
264
307
|
get defaultHeight(): number {return 30;}
|
|
265
308
|
|
|
@@ -295,23 +338,28 @@ export class AlignedSequenceDifferenceCellRenderer extends DG.GridCellRenderer {
|
|
|
295
338
|
//TODO: can this be replaced/merged with splitSequence?
|
|
296
339
|
const [s1, s2] = s.split('#');
|
|
297
340
|
const separator = gridCell.tableColumn!.tags[C.TAGS.SEPARATOR];
|
|
298
|
-
const
|
|
299
|
-
const
|
|
341
|
+
const units: string = gridCell.tableColumn!.tags[DG.TAGS.UNITS];
|
|
342
|
+
const splitter = WebLogo.getSplitter(units, separator);
|
|
343
|
+
const subParts1 = splitter(s1);
|
|
344
|
+
const subParts2 = splitter(s2);
|
|
300
345
|
const [text] = processSequence(subParts1);
|
|
301
346
|
const textSize = g.measureText(text.join(''));
|
|
302
347
|
let updatedX = Math.max(x, x + (w - (textSize.width + subParts1.length * 4)) / 2);
|
|
303
348
|
// 28 is the height of the two substitutions on top of each other + space
|
|
304
349
|
const updatedY = Math.max(y, y + (h - 28) / 2);
|
|
305
350
|
|
|
306
|
-
|
|
351
|
+
let palette: SeqPalette = UnknownSeqPalettes.Color;
|
|
352
|
+
if (units != 'HELM')
|
|
353
|
+
palette = getPalleteByType(units.substring(units.length - 2));
|
|
354
|
+
|
|
355
|
+
const vShift = 7;
|
|
307
356
|
for (let i = 0; i < subParts1.length; i++) {
|
|
308
357
|
const amino1 = subParts1[i];
|
|
309
358
|
const amino2 = subParts2[i];
|
|
310
359
|
const color1 = palette.get(amino1);
|
|
311
|
-
const color2 = palette.get(amino2);
|
|
312
360
|
|
|
313
361
|
if (amino1 != amino2) {
|
|
314
|
-
const
|
|
362
|
+
const color2 = palette.get(amino2);
|
|
315
363
|
const subX0 = printLeftOrCentered(updatedX, updatedY - vShift, w, h, g, amino1, color1, 0, true);
|
|
316
364
|
const subX1 = printLeftOrCentered(updatedX, updatedY + vShift, w, h, g, amino2, color2, 0, true);
|
|
317
365
|
updatedX = Math.max(subX1, subX0);
|
package/src/utils/constants.ts
CHANGED
|
@@ -23,12 +23,11 @@ export enum TAGS {
|
|
|
23
23
|
}
|
|
24
24
|
|
|
25
25
|
export enum SEM_TYPES {
|
|
26
|
-
|
|
27
|
-
|
|
28
|
-
ALIGNED_SEQUENCE_DIFFERENCE = 'alignedSequenceDifference',
|
|
26
|
+
MONOMER = 'Monomer',
|
|
27
|
+
MACROMOLECULE_DIFFERENCE = 'MacromoleculeDifference',
|
|
29
28
|
ACTIVITY = 'activity',
|
|
30
29
|
ACTIVITY_SCALED = 'activityScaled',
|
|
31
|
-
|
|
30
|
+
MACROMOLECULE = 'Macromolecule',
|
|
32
31
|
}
|
|
33
32
|
|
|
34
33
|
export const STATS = 'stats';
|
|
@@ -8,7 +8,7 @@ export async function sequenceGetSimilarities(col: DG.Column, seq: string): Prom
|
|
|
8
8
|
const stringArray = col.toList();
|
|
9
9
|
const distances = new Array(stringArray.length).fill(0.0);
|
|
10
10
|
for (let i = 0; i < stringArray.length; ++i)
|
|
11
|
-
distances[i] = getSimilarityFromDistance(AvailableMetrics['String']['Levenshtein'](stringArray[i], seq));
|
|
11
|
+
distances[i] = stringArray[i] ? getSimilarityFromDistance(AvailableMetrics['String']['Levenshtein'](stringArray[i], seq)) : 0;
|
|
12
12
|
return DG.Column.fromList(DG.COLUMN_TYPE.FLOAT, 'distances', distances);
|
|
13
13
|
}
|
|
14
14
|
|