@datagrok/bio 2.4.31 → 2.4.40
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.eslintrc.json +6 -8
- package/README.md +22 -7
- package/css/msa.css +3 -0
- package/detectors.js +21 -12
- package/dist/1.js +2 -0
- package/dist/1.js.map +1 -0
- package/dist/18.js +2 -0
- package/dist/18.js.map +1 -0
- package/dist/190.js +2 -0
- package/dist/190.js.map +1 -0
- package/dist/452.js +2 -0
- package/dist/452.js.map +1 -0
- package/dist/729.js +2 -0
- package/dist/729.js.map +1 -0
- package/dist/package-test.js +1 -1
- package/dist/package-test.js.map +1 -1
- package/dist/package.js +1 -1
- package/dist/package.js.map +1 -1
- package/files/libraries/broken-lib.sdf +136 -0
- package/files/libraries/group1/mock-lib-3.json +74 -0
- package/files/libraries/mock-lib-2.json +48 -0
- package/files/tests/100_3_clustests.csv +100 -0
- package/files/tests/100_3_clustests_empty_vals.csv +100 -0
- package/files/tests/peptides_motif-with-random_10000.csv +9998 -0
- package/package.json +4 -4
- package/scripts/sequence_generator.py +164 -48
- package/src/analysis/sequence-activity-cliffs.ts +7 -9
- package/src/analysis/sequence-diversity-viewer.ts +8 -3
- package/src/analysis/sequence-search-base-viewer.ts +4 -3
- package/src/analysis/sequence-similarity-viewer.ts +13 -7
- package/src/analysis/sequence-space.ts +15 -12
- package/src/analysis/workers/mm-distance-array-service.ts +48 -0
- package/src/analysis/workers/mm-distance-array-worker.ts +29 -0
- package/src/analysis/workers/mm-distance-worker-creator.ts +6 -9
- package/src/apps/web-logo-app.ts +34 -0
- package/src/calculations/monomerLevelMols.ts +10 -12
- package/src/demo/bio01-similarity-diversity.ts +4 -5
- package/src/demo/bio01a-hierarchical-clustering-and-sequence-space.ts +6 -7
- package/src/demo/bio01b-hierarchical-clustering-and-activity-cliffs.ts +7 -8
- package/src/demo/bio03-atomic-level.ts +1 -4
- package/src/demo/bio05-helm-msa-sequence-space.ts +6 -4
- package/src/demo/utils.ts +3 -4
- package/src/package-test.ts +1 -2
- package/src/package.ts +135 -82
- package/src/seq_align.ts +482 -483
- package/src/substructure-search/substructure-search.ts +3 -3
- package/src/tests/Palettes-test.ts +1 -1
- package/src/tests/WebLogo-positions-test.ts +12 -35
- package/src/tests/_first-tests.ts +1 -1
- package/src/tests/activity-cliffs-tests.ts +10 -7
- package/src/tests/activity-cliffs-utils.ts +6 -5
- package/src/tests/bio-tests.ts +20 -25
- package/src/tests/checkInputColumn-tests.ts +5 -11
- package/src/tests/converters-test.ts +19 -37
- package/src/tests/detectors-benchmark-tests.ts +35 -37
- package/src/tests/detectors-tests.ts +29 -34
- package/src/tests/detectors-weak-and-likely-tests.ts +11 -21
- package/src/tests/fasta-export-tests.ts +3 -3
- package/src/tests/fasta-handler-test.ts +2 -3
- package/src/tests/lib-tests.ts +2 -4
- package/src/tests/mm-distance-tests.ts +25 -17
- package/src/tests/monomer-libraries-tests.ts +1 -1
- package/src/tests/msa-tests.ts +12 -9
- package/src/tests/pepsea-tests.ts +6 -3
- package/src/tests/renderers-test.ts +13 -11
- package/src/tests/sequence-space-test.ts +10 -8
- package/src/tests/sequence-space-utils.ts +6 -4
- package/src/tests/similarity-diversity-tests.ts +47 -61
- package/src/tests/splitters-test.ts +14 -20
- package/src/tests/to-atomic-level-tests.ts +9 -17
- package/src/tests/units-handler-splitted-tests.ts +106 -0
- package/src/tests/units-handler-tests.ts +22 -26
- package/src/tests/utils/sequences-generators.ts +6 -2
- package/src/tests/utils.ts +10 -4
- package/src/tests/viewers.ts +1 -1
- package/src/utils/atomic-works.ts +49 -57
- package/src/utils/cell-renderer.ts +25 -8
- package/src/utils/check-input-column.ts +19 -4
- package/src/utils/constants.ts +3 -3
- package/src/utils/convert.ts +56 -23
- package/src/utils/monomer-lib.ts +83 -64
- package/src/utils/multiple-sequence-alignment-ui.ts +35 -21
- package/src/utils/multiple-sequence-alignment.ts +2 -2
- package/src/utils/pepsea.ts +17 -7
- package/src/utils/save-as-fasta.ts +11 -4
- package/src/utils/ui-utils.ts +1 -1
- package/src/viewers/vd-regions-viewer.ts +21 -22
- package/src/viewers/web-logo-viewer.ts +189 -154
- package/src/widgets/bio-substructure-filter.ts +9 -6
- package/src/widgets/representations.ts +11 -12
- package/tsconfig.json +1 -1
- package/dist/258.js +0 -2
- package/dist/258.js.map +0 -1
- package/dist/457.js +0 -2
- package/dist/457.js.map +0 -1
- package/dist/562.js +0 -2
- package/dist/562.js.map +0 -1
- package/dist/925.js +0 -2
- package/dist/925.js.map +0 -1
- package/src/analysis/workers/mm-distance-worker.ts +0 -16
|
@@ -39,7 +39,7 @@ export function substructureSearchDialog(col: DG.Column<string>): void {
|
|
|
39
39
|
});
|
|
40
40
|
|
|
41
41
|
const df = DG.DataFrame.create(1);
|
|
42
|
-
df.columns.addNewString(SUBSTR_HELM_COL_NAME).init((
|
|
42
|
+
df.columns.addNewString(SUBSTR_HELM_COL_NAME).init((_i) => '');
|
|
43
43
|
df.col(SUBSTR_HELM_COL_NAME)!.semType = col.semType;
|
|
44
44
|
df.col(SUBSTR_HELM_COL_NAME)!.setTag(DG.TAGS.UNITS, NOTATION.HELM);
|
|
45
45
|
const grid = df.plot.grid();
|
|
@@ -56,7 +56,7 @@ export function substructureSearchDialog(col: DG.Column<string>): void {
|
|
|
56
56
|
ui.dialog('Substructure Search')
|
|
57
57
|
.add(ui.divV([
|
|
58
58
|
ui.divText(`Notation: ${units}`),
|
|
59
|
-
inputsDiv
|
|
59
|
+
inputsDiv,
|
|
60
60
|
]))
|
|
61
61
|
.onOK(async () => {
|
|
62
62
|
let substructure = units === NOTATION.HELM ? df.get(SUBSTR_HELM_COL_NAME, 0) : substructureInput.value;
|
|
@@ -104,7 +104,7 @@ function prepareSubstructureRegex(substructure: string, separator: string) {
|
|
|
104
104
|
export async function helmSubstructureSearch(substructure: string, col: DG.Column<string>): Promise<DG.BitSet> {
|
|
105
105
|
if (col.version !== col.temp[MONOMERIC_COL_TAGS.LAST_INVALIDATED_VERSION])
|
|
106
106
|
await invalidateMols(col, true);
|
|
107
|
-
const substructureCol: DG.Column<string> = DG.Column.string('helm', 1).init((
|
|
107
|
+
const substructureCol: DG.Column<string> = DG.Column.string('helm', 1).init((_i) => substructure);
|
|
108
108
|
substructureCol.setTag(DG.TAGS.UNITS, NOTATION.HELM);
|
|
109
109
|
const substructureMolsCol =
|
|
110
110
|
await getMonomericMols(substructureCol, true, col.temp[MONOMERIC_COL_TAGS.MONOMERS_DICT]);
|
|
@@ -2,7 +2,7 @@ import * as grok from 'datagrok-api/grok';
|
|
|
2
2
|
import * as ui from 'datagrok-api/ui';
|
|
3
3
|
import * as DG from 'datagrok-api/dg';
|
|
4
4
|
|
|
5
|
-
import {
|
|
5
|
+
import {category, test, expect} from '@datagrok-libraries/utils/src/test';
|
|
6
6
|
|
|
7
7
|
import {_testPaletteN, _testPaletteAA} from '@datagrok-libraries/bio/src/tests/palettes-tests';
|
|
8
8
|
import {AminoacidsPalettes} from '@datagrok-libraries/bio/src/aminoacids';
|
|
@@ -7,14 +7,11 @@ import {
|
|
|
7
7
|
countForMonomerAtPosition,
|
|
8
8
|
PositionInfo as PI,
|
|
9
9
|
PositionMonomerInfo as PMI,
|
|
10
|
-
WebLogoViewer
|
|
10
|
+
WebLogoViewer,
|
|
11
11
|
} from '../viewers/web-logo-viewer';
|
|
12
|
+
import {UnitsHandler} from '@datagrok-libraries/bio/src/utils/units-handler';
|
|
12
13
|
|
|
13
14
|
category('WebLogo-positions', () => {
|
|
14
|
-
let tvList: DG.TableView[];
|
|
15
|
-
let dfList: DG.DataFrame[];
|
|
16
|
-
let currentView: DG.ViewBase;
|
|
17
|
-
|
|
18
15
|
const csvDf1 = `seq
|
|
19
16
|
ATC-G-TTGC--
|
|
20
17
|
ATC-G-TTGC--
|
|
@@ -24,16 +21,10 @@ ATC-G-TTGC--
|
|
|
24
21
|
|
|
25
22
|
|
|
26
23
|
before(async () => {
|
|
27
|
-
tvList = [];
|
|
28
|
-
dfList = [];
|
|
29
|
-
// currentView = grok.shell.v;
|
|
30
24
|
});
|
|
31
25
|
|
|
32
26
|
after(async () => {
|
|
33
27
|
// Closing opened views causes the error 'Cannot read properties of null (reading 'f')'
|
|
34
|
-
// dfList.forEach((df: DG.DataFrame) => { grok.shell.closeTable(df); });
|
|
35
|
-
// tvList.forEach((tv: DG.TableView) => tv.close());
|
|
36
|
-
// grok.shell.v = currentView;
|
|
37
28
|
});
|
|
38
29
|
|
|
39
30
|
test('allPositions', async () => {
|
|
@@ -49,9 +40,6 @@ ATC-G-TTGC--
|
|
|
49
40
|
const wlViewer: WebLogoViewer = (await df.plot.fromType('WebLogo')) as WebLogoViewer;
|
|
50
41
|
tv.dockManager.dock(wlViewer.root, DG.DOCK_TYPE.DOWN);
|
|
51
42
|
|
|
52
|
-
tvList.push(tv);
|
|
53
|
-
dfList.push(df);
|
|
54
|
-
|
|
55
43
|
const positions: PI[] = wlViewer['positions'];
|
|
56
44
|
|
|
57
45
|
const resAllDf1: PI[] = [
|
|
@@ -73,11 +61,10 @@ ATC-G-TTGC--
|
|
|
73
61
|
|
|
74
62
|
for (let i = 0; i < positions.length; i++) {
|
|
75
63
|
expect(positions[i].name, resAllDf1[i].name);
|
|
76
|
-
for (const key in positions[i].freq)
|
|
64
|
+
for (const key in positions[i].freq)
|
|
77
65
|
expect(positions[i].freq[key].count, resAllDf1[i].freq[key].count);
|
|
78
|
-
}
|
|
79
66
|
}
|
|
80
|
-
});
|
|
67
|
+
}, {skipReason: 'GROK-13300'});
|
|
81
68
|
|
|
82
69
|
test('positions with shrinkEmptyTail option true (filtered)', async () => {
|
|
83
70
|
const csvDf2 = `seq
|
|
@@ -104,9 +91,6 @@ ATC-G-TTGC--
|
|
|
104
91
|
{'shrinkEmptyTail': true})) as WebLogoViewer;
|
|
105
92
|
tv.dockManager.dock(wlViewer.root, DG.DOCK_TYPE.DOWN);
|
|
106
93
|
|
|
107
|
-
tvList.push(tv);
|
|
108
|
-
dfList.push(df);
|
|
109
|
-
|
|
110
94
|
const positions: PI[] = wlViewer['positions'];
|
|
111
95
|
|
|
112
96
|
const resAllDf1: PI[] = [
|
|
@@ -125,11 +109,10 @@ ATC-G-TTGC--
|
|
|
125
109
|
|
|
126
110
|
for (let i = 0; i < positions.length; i++) {
|
|
127
111
|
expect(positions[i].name, resAllDf1[i].name);
|
|
128
|
-
for (const key in positions[i].freq)
|
|
112
|
+
for (const key in positions[i].freq)
|
|
129
113
|
expect(positions[i].freq[key].count, resAllDf1[i].freq[key].count);
|
|
130
|
-
}
|
|
131
114
|
}
|
|
132
|
-
});
|
|
115
|
+
}, {skipReason: 'GROK-13300'});
|
|
133
116
|
|
|
134
117
|
test('positions with skipEmptyPositions option', async () => {
|
|
135
118
|
const df: DG.DataFrame = DG.DataFrame.fromCsv(csvDf1);
|
|
@@ -145,9 +128,6 @@ ATC-G-TTGC--
|
|
|
145
128
|
{'skipEmptyPositions': true})) as WebLogoViewer;
|
|
146
129
|
tv.dockManager.dock(wlViewer.root, DG.DOCK_TYPE.DOWN);
|
|
147
130
|
|
|
148
|
-
tvList.push(tv);
|
|
149
|
-
dfList.push(df);
|
|
150
|
-
|
|
151
131
|
const resPosList: PI[] = wlViewer['positions'];
|
|
152
132
|
|
|
153
133
|
const tgtPosList: PI[] = [
|
|
@@ -159,7 +139,7 @@ ATC-G-TTGC--
|
|
|
159
139
|
new PI(6, '7', {'T': new PMI(5)}),
|
|
160
140
|
new PI(7, '8', {'T': new PMI(5)}),
|
|
161
141
|
new PI(8, '9', {'G': new PMI(5)}),
|
|
162
|
-
new PI(9, '10', {'C': new PMI(5)})
|
|
142
|
+
new PI(9, '10', {'C': new PMI(5)}),
|
|
163
143
|
];
|
|
164
144
|
|
|
165
145
|
expect(resPosList.length, tgtPosList.length);
|
|
@@ -168,7 +148,7 @@ ATC-G-TTGC--
|
|
|
168
148
|
const tgtPos = tgtPosList[posI];
|
|
169
149
|
expectPositionInfo(resPos, tgtPos);
|
|
170
150
|
}
|
|
171
|
-
});
|
|
151
|
+
}, {skipReason: 'GROK-13300'});
|
|
172
152
|
|
|
173
153
|
test('count sequences for monomer at position', async () => {
|
|
174
154
|
const df: DG.DataFrame = buildDfWithSeqCol(csvDf1, NOTATION.FASTA, ALPHABET.DNA, 'SEQ.MSA');
|
|
@@ -179,13 +159,10 @@ ATC-G-TTGC--
|
|
|
179
159
|
const wlViewer: WebLogoViewer = (await df.plot.fromType('WebLogo', {
|
|
180
160
|
startPositionName: '3',
|
|
181
161
|
endPositionName: '7',
|
|
182
|
-
skipEmptyPositions: true
|
|
162
|
+
skipEmptyPositions: true,
|
|
183
163
|
})) as WebLogoViewer;
|
|
184
164
|
tv.dockManager.dock(wlViewer.root, DG.DOCK_TYPE.DOWN);
|
|
185
165
|
|
|
186
|
-
tvList.push(tv);
|
|
187
|
-
dfList.push(df);
|
|
188
|
-
|
|
189
166
|
const resPosList: PI[] = wlViewer['positions'];
|
|
190
167
|
const tgtPosList: PI[] = [
|
|
191
168
|
new PI(2, '3', {'C': new PMI(5)}),
|
|
@@ -201,11 +178,11 @@ ATC-G-TTGC--
|
|
|
201
178
|
expectPositionInfo(resPos, tgtPos);
|
|
202
179
|
}
|
|
203
180
|
|
|
204
|
-
const splitter: SplitterFunc = wlViewer['splitter']!;
|
|
205
181
|
const atPI1: PI = resPosList[1];
|
|
206
|
-
const
|
|
182
|
+
const uh = UnitsHandler.getOrCreate(seqCol);
|
|
183
|
+
const countAt1 = countForMonomerAtPosition(df, uh, df.filter, 'G', atPI1);
|
|
207
184
|
expect(countAt1, 5);
|
|
208
|
-
});
|
|
185
|
+
}, {skipReason: 'GROK-13300'});
|
|
209
186
|
});
|
|
210
187
|
|
|
211
188
|
function expectPositionInfo(actualPos: PI, expectedPos: PI): void {
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
import * as DG from 'datagrok-api/dg';
|
|
2
2
|
|
|
3
|
-
import {category,
|
|
3
|
+
import {category, test} from '@datagrok-libraries/utils/src/test';
|
|
4
4
|
|
|
5
5
|
category('_first', () => {
|
|
6
6
|
/** The first test of the package to evaluate problems with imports. */
|
|
@@ -6,7 +6,7 @@ import {after, before, category, test} from '@datagrok-libraries/utils/src/test'
|
|
|
6
6
|
|
|
7
7
|
import {readDataframe} from './utils';
|
|
8
8
|
import {_testActivityCliffsOpen} from './activity-cliffs-utils';
|
|
9
|
-
import {
|
|
9
|
+
import {DimReductionMethods} from '@datagrok-libraries/ml/src/reduce-dimensionality';
|
|
10
10
|
|
|
11
11
|
|
|
12
12
|
category('activityCliffs', async () => {
|
|
@@ -29,20 +29,23 @@ category('activityCliffs', async () => {
|
|
|
29
29
|
});
|
|
30
30
|
|
|
31
31
|
test('activityCliffsOpens', async () => {
|
|
32
|
-
actCliffsDf = await readDataframe(
|
|
32
|
+
actCliffsDf = await readDataframe(
|
|
33
|
+
DG.Test.isInBenchmark ? 'test/peptides_motif-with-random_10000.csv' : 'tests/100_3_clustests.csv',
|
|
34
|
+
);
|
|
33
35
|
dfList.push(actCliffsDf);
|
|
34
36
|
actCliffsTableView = grok.shell.addTableView(actCliffsDf);
|
|
35
37
|
viewList.push(actCliffsTableView);
|
|
38
|
+
const cliffsNum = DG.Test.isInBenchmark ? 6 : 3;
|
|
36
39
|
|
|
37
|
-
await _testActivityCliffsOpen(actCliffsDf,
|
|
38
|
-
}
|
|
40
|
+
await _testActivityCliffsOpen(actCliffsDf, cliffsNum, DimReductionMethods.UMAP, 'sequence');
|
|
41
|
+
});
|
|
39
42
|
|
|
40
43
|
test('activityCliffsWithEmptyRows', async () => {
|
|
41
|
-
actCliffsDfWithEmptyRows = await readDataframe('tests/
|
|
44
|
+
actCliffsDfWithEmptyRows = await readDataframe('tests/100_3_clustests_empty_vals.csv');
|
|
42
45
|
dfList.push(actCliffsDfWithEmptyRows);
|
|
43
46
|
actCliffsTableViewWithEmptyRows = grok.shell.addTableView(actCliffsDfWithEmptyRows);
|
|
44
47
|
viewList.push(actCliffsTableViewWithEmptyRows);
|
|
45
48
|
|
|
46
|
-
await _testActivityCliffsOpen(actCliffsDfWithEmptyRows,
|
|
47
|
-
}
|
|
49
|
+
await _testActivityCliffsOpen(actCliffsDfWithEmptyRows, 3, DimReductionMethods.UMAP, 'sequence');
|
|
50
|
+
});
|
|
48
51
|
});
|
|
@@ -1,15 +1,16 @@
|
|
|
1
1
|
import * as DG from 'datagrok-api/dg';
|
|
2
2
|
import * as grok from 'datagrok-api/grok';
|
|
3
3
|
|
|
4
|
-
import {
|
|
4
|
+
import {expect} from '@datagrok-libraries/utils/src/test';
|
|
5
5
|
import {activityCliffs} from '../package';
|
|
6
|
-
import {
|
|
6
|
+
import {DimReductionMethods} from '@datagrok-libraries/ml/src/reduce-dimensionality';
|
|
7
7
|
|
|
8
|
-
export async function _testActivityCliffsOpen(df: DG.DataFrame, numberCliffs: number, method: DimReductionMethods,
|
|
8
|
+
export async function _testActivityCliffsOpen(df: DG.DataFrame, numberCliffs: number, method: DimReductionMethods,
|
|
9
|
+
colName: string) {
|
|
9
10
|
await grok.data.detectSemanticTypes(df);
|
|
10
11
|
const scatterPlot = await activityCliffs(
|
|
11
|
-
df, df.getCol(colName), df.getCol('
|
|
12
|
-
|
|
12
|
+
df, df.getCol(colName), df.getCol('activity'),
|
|
13
|
+
90, method);
|
|
13
14
|
// const scatterPlot = (await grok.functions.call('Bio:activityCliffs', {
|
|
14
15
|
// table: df, molecules: df.getCol(colName), activities: df.getCol('Activity'),
|
|
15
16
|
// similarity: 50, methodName: method
|
package/src/tests/bio-tests.ts
CHANGED
|
@@ -2,10 +2,9 @@ import * as grok from 'datagrok-api/grok';
|
|
|
2
2
|
import * as ui from 'datagrok-api/ui';
|
|
3
3
|
import * as DG from 'datagrok-api/dg';
|
|
4
4
|
|
|
5
|
-
import {
|
|
5
|
+
import {category, test, expect, expectObject} from '@datagrok-libraries/utils/src/test';
|
|
6
6
|
import {
|
|
7
7
|
getAlphabetSimilarity,
|
|
8
|
-
getStats,
|
|
9
8
|
monomerToShort,
|
|
10
9
|
pickUpPalette,
|
|
11
10
|
splitterAsFasta,
|
|
@@ -14,13 +13,14 @@ import {
|
|
|
14
13
|
import {Nucleotides, NucleotidesPalettes} from '@datagrok-libraries/bio/src/nucleotides';
|
|
15
14
|
import {AminoacidsPalettes} from '@datagrok-libraries/bio/src/aminoacids';
|
|
16
15
|
import {UnknownSeqPalette} from '@datagrok-libraries/bio/src/unknown';
|
|
16
|
+
import {UnitsHandler} from '@datagrok-libraries/bio/src/utils/units-handler';
|
|
17
|
+
import {getStatsForCol} from '@datagrok-libraries/bio/src/utils/macromolecule/utils';
|
|
17
18
|
|
|
18
19
|
category('bio', () => {
|
|
19
20
|
const csvDfN1: string = `seq
|
|
20
21
|
ACGTCT
|
|
21
22
|
CAGTGT
|
|
22
|
-
TTCAAC
|
|
23
|
-
`;
|
|
23
|
+
TTCAAC`;
|
|
24
24
|
|
|
25
25
|
/** 2 - is an error monomer
|
|
26
26
|
* This sequence set should be classified as nucleotides sequences.
|
|
@@ -29,44 +29,39 @@ TTCAAC
|
|
|
29
29
|
const csvDfN1e: string = `seq
|
|
30
30
|
ACGTAT
|
|
31
31
|
CAGTTG
|
|
32
|
-
TTCG2C
|
|
33
|
-
`;
|
|
32
|
+
TTCG2C`;
|
|
34
33
|
|
|
35
34
|
/** Pure amino acids sequence */
|
|
36
35
|
const csvDfAA1: string = `seq
|
|
37
36
|
FWPHEYV
|
|
38
37
|
YNRQWYV
|
|
39
|
-
MKPSEYV
|
|
40
|
-
`;
|
|
38
|
+
MKPSEYV`;
|
|
41
39
|
|
|
42
40
|
/** A - alanine, G - glycine, T -= threonine, C - cysteine, W - tryptophan
|
|
43
41
|
* This sequence set should be detected as amino acids more than nucleotides.
|
|
44
42
|
*/
|
|
45
|
-
const
|
|
43
|
+
const _csvDfAA2: string = `seq
|
|
46
44
|
AGTCAT
|
|
47
45
|
AGTCGC
|
|
48
|
-
AGTCATW
|
|
49
|
-
`;
|
|
46
|
+
AGTCATW`;
|
|
50
47
|
|
|
51
48
|
/** This sequence set should be recognized as unknown. */
|
|
52
49
|
const csvDfX: string = `seq
|
|
53
50
|
XZJ{}2
|
|
54
51
|
5Z4733
|
|
55
52
|
3Z6></
|
|
56
|
-
675687
|
|
57
|
-
`;
|
|
53
|
+
675687`;
|
|
58
54
|
|
|
59
55
|
// anonymous functions specified in test() registering must return Promise<any>
|
|
60
56
|
test('testGetStatsHelm1', async () => {
|
|
61
57
|
const csv = `seq
|
|
62
|
-
PEPTIDE1{meI}
|
|
63
|
-
`;
|
|
58
|
+
PEPTIDE1{meI}$$$$`;
|
|
64
59
|
const df: DG.DataFrame = DG.DataFrame.fromCsv(csv);
|
|
65
60
|
const seqCol: DG.Column = df.getCol('seq')!;
|
|
66
|
-
const stats =
|
|
61
|
+
const stats = getStatsForCol(seqCol, 1, splitterAsHelm);
|
|
67
62
|
|
|
68
63
|
expectObject(stats.freq, {
|
|
69
|
-
'meI': 1
|
|
64
|
+
'meI': 1,
|
|
70
65
|
});
|
|
71
66
|
expect(stats.sameLength, true);
|
|
72
67
|
});
|
|
@@ -82,19 +77,19 @@ PEPTIDE1{meI}$$$$
|
|
|
82
77
|
|
|
83
78
|
category('WebLogo.monomerToShort', () => {
|
|
84
79
|
test('longMonomerSingle', async () => {
|
|
85
|
-
|
|
80
|
+
expect(monomerToShort('S', 5), 'S');
|
|
86
81
|
});
|
|
87
82
|
test('longMonomerShort', async () => {
|
|
88
|
-
|
|
83
|
+
expect(monomerToShort('Short', 5), 'Short');
|
|
89
84
|
});
|
|
90
85
|
test('longMonomerLong56', async () => {
|
|
91
|
-
|
|
86
|
+
expect(monomerToShort('Long56', 5), 'Long5…');
|
|
92
87
|
});
|
|
93
88
|
test('longMonomerComplexFirstPartShort', async () => {
|
|
94
|
-
|
|
89
|
+
expect(monomerToShort('Long-long', 5), 'Long…');
|
|
95
90
|
});
|
|
96
91
|
test('longMonomerComplexFirstPartLong56', async () => {
|
|
97
|
-
|
|
92
|
+
expect(monomerToShort('Long56-long', 5), 'Long5…');
|
|
98
93
|
});
|
|
99
94
|
});
|
|
100
95
|
|
|
@@ -102,13 +97,13 @@ category('WebLogo.monomerToShort', () => {
|
|
|
102
97
|
export async function _testGetStats(csvDfN1: string) {
|
|
103
98
|
const dfN1: DG.DataFrame = DG.DataFrame.fromCsv(csvDfN1);
|
|
104
99
|
const seqCol: DG.Column = dfN1.col('seq')!;
|
|
105
|
-
const stats =
|
|
100
|
+
const stats = getStatsForCol(seqCol, 5, splitterAsFasta);
|
|
106
101
|
|
|
107
102
|
expectObject(stats.freq, {
|
|
108
103
|
'A': 4,
|
|
109
104
|
'C': 5,
|
|
110
105
|
'G': 3,
|
|
111
|
-
'T': 6
|
|
106
|
+
'T': 6,
|
|
112
107
|
});
|
|
113
108
|
expect(stats.sameLength, true);
|
|
114
109
|
}
|
|
@@ -119,7 +114,7 @@ export async function _testGetAlphabetSimilarity() {
|
|
|
119
114
|
'C': 3015,
|
|
120
115
|
'G': 3015,
|
|
121
116
|
'T': 2048,
|
|
122
|
-
'-': 1000
|
|
117
|
+
'-': 1000,
|
|
123
118
|
};
|
|
124
119
|
const alphabet: Set<string> = new Set(Object.keys(Nucleotides.Names));
|
|
125
120
|
const res = getAlphabetSimilarity(freq, alphabet);
|
|
@@ -2,7 +2,7 @@ import * as grok from 'datagrok-api/grok';
|
|
|
2
2
|
import * as ui from 'datagrok-api/ui';
|
|
3
3
|
import * as DG from 'datagrok-api/dg';
|
|
4
4
|
|
|
5
|
-
import {
|
|
5
|
+
import {category, test, expect} from '@datagrok-libraries/utils/src/test';
|
|
6
6
|
|
|
7
7
|
import {ALPHABET, NOTATION, TAGS as bioTAGS} from '@datagrok-libraries/bio/src/utils/macromolecule';
|
|
8
8
|
import {checkInputColumn} from '../utils/check-input-column';
|
|
@@ -15,11 +15,6 @@ seq3,
|
|
|
15
15
|
seq4`;
|
|
16
16
|
|
|
17
17
|
test('testMsaPos', async () => {
|
|
18
|
-
const func: DG.Func = DG.Func.find({package: 'Bio', name: 'multipleSequenceAlignmentDialog'})[0];
|
|
19
|
-
const funcInputColumnProperty: DG.Property = func.inputs.find((i) => i.name == 'sequence')!;
|
|
20
|
-
|
|
21
|
-
const k = 11;
|
|
22
|
-
|
|
23
18
|
const df: DG.DataFrame = DG.DataFrame.fromCsv(csv);
|
|
24
19
|
const col: DG.Column = df.getCol('seq');
|
|
25
20
|
col.semType = DG.SEMTYPE.MACROMOLECULE;
|
|
@@ -27,7 +22,7 @@ seq4`;
|
|
|
27
22
|
col.setTag(bioTAGS.alphabet, ALPHABET.DNA);
|
|
28
23
|
col.setTag(bioTAGS.aligned, 'SEQ');
|
|
29
24
|
|
|
30
|
-
const [res,
|
|
25
|
+
const [res, _msg]: [boolean, string] = checkInputColumn(
|
|
31
26
|
col, 'Test', [NOTATION.FASTA],
|
|
32
27
|
[ALPHABET.DNA, ALPHABET.RNA, ALPHABET.PT]);
|
|
33
28
|
|
|
@@ -42,7 +37,7 @@ seq4`;
|
|
|
42
37
|
// col.setTag(bio.TAGS.alphabetSize, '11');
|
|
43
38
|
col.setTag(bioTAGS.alphabetIsMultichar, 'true');
|
|
44
39
|
|
|
45
|
-
const [res,
|
|
40
|
+
const [res, _msg]: [boolean, string] = checkInputColumn(
|
|
46
41
|
col, 'Test', [NOTATION.FASTA],
|
|
47
42
|
[ALPHABET.DNA, ALPHABET.RNA, ALPHABET.PT]);
|
|
48
43
|
|
|
@@ -59,7 +54,7 @@ seq4`;
|
|
|
59
54
|
col.setTag(bioTAGS.alphabetIsMultichar, 'true');
|
|
60
55
|
col.setTag(bioTAGS.aligned, 'SEQ');
|
|
61
56
|
|
|
62
|
-
const [res,
|
|
57
|
+
const [res, _msg]: [boolean, string] = checkInputColumn(
|
|
63
58
|
col, 'Test', [NOTATION.FASTA],
|
|
64
59
|
[ALPHABET.DNA, ALPHABET.RNA, ALPHABET.PT]);
|
|
65
60
|
|
|
@@ -68,7 +63,6 @@ seq4`;
|
|
|
68
63
|
|
|
69
64
|
test('testGetActionFunctionMeta', async () => {
|
|
70
65
|
const func: DG.Func = DG.Func.find({package: 'Bio', name: 'multipleSequenceAlignmentDialog'})[0];
|
|
71
|
-
const
|
|
72
|
-
const k = 11;
|
|
66
|
+
const _sequenceInput: DG.Property = func.inputs.find((i) => i.name == 'sequence')!;
|
|
73
67
|
});
|
|
74
68
|
});
|
|
@@ -45,94 +45,76 @@ category('converters', () => {
|
|
|
45
45
|
fastaPt: `seq
|
|
46
46
|
FWPHEY
|
|
47
47
|
YNRQWYV
|
|
48
|
-
MKPSEYV
|
|
49
|
-
`,
|
|
48
|
+
MKPSEYV`,
|
|
50
49
|
separatorPt: `seq
|
|
51
50
|
F-W-P-H-E-Y
|
|
52
51
|
Y-N-R-Q-W-Y-V
|
|
53
|
-
M-K-P-S-E-Y-V
|
|
54
|
-
`,
|
|
52
|
+
M-K-P-S-E-Y-V`,
|
|
55
53
|
helmPt: `seq
|
|
56
54
|
PEPTIDE1{F.W.P.H.E.Y}$$$$
|
|
57
55
|
PEPTIDE1{Y.N.R.Q.W.Y.V}$$$$
|
|
58
|
-
PEPTIDE1{M.K.P.S.E.Y.V}
|
|
59
|
-
`,
|
|
56
|
+
PEPTIDE1{M.K.P.S.E.Y.V}$$$$`,
|
|
60
57
|
fastaDna: `seq
|
|
61
58
|
ACGTC
|
|
62
59
|
CAGTGT
|
|
63
|
-
TTCAAC
|
|
64
|
-
`,
|
|
60
|
+
TTCAAC`,
|
|
65
61
|
separatorDna: `seq
|
|
66
62
|
A/C/G/T/C
|
|
67
63
|
C/A/G/T/G/T
|
|
68
|
-
T/T/C/A/A/C
|
|
69
|
-
`,
|
|
64
|
+
T/T/C/A/A/C`,
|
|
70
65
|
helmDna: `seq
|
|
71
66
|
DNA1{D(A)P.D(C)P.D(G)P.D(T)P.D(C)P}$$$$
|
|
72
67
|
DNA1{D(C)P.D(A)P.D(G)P.D(T)P.D(G)P.D(T)P}$$$$
|
|
73
|
-
DNA1{D(T)P.D(T)P.D(C)P.D(A)P.D(A)P.D(C)P}
|
|
74
|
-
`,
|
|
68
|
+
DNA1{D(T)P.D(T)P.D(C)P.D(A)P.D(A)P.D(C)P}$$$$`,
|
|
75
69
|
fastaRna: `seq
|
|
76
70
|
ACGUC
|
|
77
71
|
CAGUGU
|
|
78
|
-
UUCAAC
|
|
79
|
-
`,
|
|
72
|
+
UUCAAC`,
|
|
80
73
|
separatorRna: `seq
|
|
81
74
|
A*C*G*U*C
|
|
82
75
|
C*A*G*U*G*U
|
|
83
|
-
U*U*C*A*A*C
|
|
84
|
-
`,
|
|
76
|
+
U*U*C*A*A*C`,
|
|
85
77
|
helmRna: `seq
|
|
86
78
|
RNA1{R(A)P.R(C)P.R(G)P.R(U)P.R(C)P}$$$$
|
|
87
79
|
RNA1{R(C)P.R(A)P.R(G)P.R(U)P.R(G)P.R(U)P}$$$$
|
|
88
|
-
RNA1{R(U)P.R(U)P.R(C)P.R(A)P.R(A)P.R(C)P}
|
|
89
|
-
`,
|
|
80
|
+
RNA1{R(U)P.R(U)P.R(C)P.R(A)P.R(A)P.R(C)P}$$$$`,
|
|
90
81
|
fastaGaps: `seq
|
|
91
82
|
FW-PH-EYY
|
|
92
83
|
FYNRQWYV-
|
|
93
|
-
FKP-Q-SEYV
|
|
94
|
-
`,
|
|
84
|
+
FKP-Q-SEYV`,
|
|
95
85
|
separatorGaps: `seq
|
|
96
86
|
F/W//P/H//E/Y/Y
|
|
97
87
|
F/Y/N/R/Q/W/Y/V/
|
|
98
|
-
F/K/P//Q//S/E/Y/V
|
|
99
|
-
`,
|
|
88
|
+
F/K/P//Q//S/E/Y/V`,
|
|
100
89
|
helmGaps: `seq
|
|
101
90
|
PEPTIDE1{F.W.*.P.H.*.E.Y.Y}$$$$
|
|
102
91
|
PEPTIDE1{F.Y.N.R.Q.W.Y.V.*}$$$$
|
|
103
|
-
PEPTIDE1{F.K.P.*.Q.*.S.E.Y.V}
|
|
104
|
-
`,
|
|
92
|
+
PEPTIDE1{F.K.P.*.Q.*.S.E.Y.V}$$$$`,
|
|
105
93
|
|
|
106
94
|
fastaUn: `seq
|
|
107
95
|
[meI][hHis][Aca]NT[dE][Thr_PO3H2][Aca]D
|
|
108
96
|
[meI][hHis][Aca][Cys_SEt]T[dK][Thr_PO3H2][Aca][Tyr_PO3H2]
|
|
109
|
-
[Lys_Boc][hHis][Aca][Cys_SEt]T[dK][Thr_PO3H2][Aca][Tyr_PO3H2]
|
|
110
|
-
`,
|
|
97
|
+
[Lys_Boc][hHis][Aca][Cys_SEt]T[dK][Thr_PO3H2][Aca][Tyr_PO3H2]`,
|
|
111
98
|
separatorUn: `seq
|
|
112
99
|
meI-hHis-Aca-N-T-dE-Thr_PO3H2-Aca-D
|
|
113
100
|
meI-hHis-Aca-Cys_SEt-T-dK-Thr_PO3H2-Aca-Tyr_PO3H2
|
|
114
|
-
Lys_Boc-hHis-Aca-Cys_SEt-T-dK-Thr_PO3H2-Aca-Tyr_PO3H2
|
|
115
|
-
`,
|
|
101
|
+
Lys_Boc-hHis-Aca-Cys_SEt-T-dK-Thr_PO3H2-Aca-Tyr_PO3H2`,
|
|
116
102
|
helmUn: `seq
|
|
117
103
|
PEPTIDE1{meI.hHis.Aca.N.T.dE.Thr_PO3H2.Aca.D}$$$$
|
|
118
104
|
PEPTIDE1{meI.hHis.Aca.Cys_SEt.T.dK.Thr_PO3H2.Aca.Tyr_PO3H2}$$$$
|
|
119
|
-
PEPTIDE1{Lys_Boc.hHis.Aca.Cys_SEt.T.dK.Thr_PO3H2.Aca.Tyr_PO3H2}
|
|
120
|
-
`,
|
|
105
|
+
PEPTIDE1{Lys_Boc.hHis.Aca.Cys_SEt.T.dK.Thr_PO3H2.Aca.Tyr_PO3H2}$$$$`,
|
|
121
106
|
helmLoneDeoxyribose: `seq
|
|
122
107
|
DNA1{D(A).D(C).D(G).D(T).D(C)}$$$$
|
|
123
108
|
DNA1{D(C).D(A).D(G).D(T).D(G).D(T)P}$$$$
|
|
124
|
-
DNA1{D(T).D(T).D(C).D(A).D(A).D(C)P}
|
|
125
|
-
`,
|
|
109
|
+
DNA1{D(T).D(T).D(C).D(A).D(A).D(C)P}$$$$`,
|
|
126
110
|
helmLoneRibose: `seq
|
|
127
111
|
RNA1{R(A).R(C).R(G).R(U).R(C)}$$$$
|
|
128
112
|
RNA1{R(C).R(A).R(G).R(U).R(G).R(U)P}$$$$
|
|
129
|
-
RNA1{R(U).R(U).R(C).R(A).R(A).R(C)P}
|
|
130
|
-
`,
|
|
113
|
+
RNA1{R(U).R(U).R(C).R(A).R(A).R(C)P}$$$$`,
|
|
131
114
|
helmLonePhosphorus: `seq
|
|
132
115
|
RNA1{P.P.R(A)P.R(C)P.R(G)P.R(U)P.R(C)P}$$$$
|
|
133
116
|
RNA1{P.P.R(C)P.R(A)P.P.R(G)P.R(U)P.R(G)P.R(U)P}$$$$
|
|
134
|
-
RNA1{P.R(U)P.R(U)P.R(C)P.R(A)P.R(A)P.R(C)P.P.P}
|
|
135
|
-
`,
|
|
117
|
+
RNA1{P.R(U)P.R(U)P.R(C)P.R(A)P.R(A)P.R(C)P.P.P}$$$$`,
|
|
136
118
|
};
|
|
137
119
|
|
|
138
120
|
/** Also detects semantic types
|
|
@@ -171,7 +153,7 @@ RNA1{P.R(U)P.R(U)P.R(C)P.R(A)P.R(A)P.R(C)P.P.P}$$$$
|
|
|
171
153
|
const tgtCol: DG.Column = tgtDf.getCol('seq');
|
|
172
154
|
|
|
173
155
|
expectArray(resCol.toList(), tgtCol.toList());
|
|
174
|
-
const
|
|
156
|
+
const _uh: UnitsHandler = UnitsHandler.getOrCreate(resCol);
|
|
175
157
|
}
|
|
176
158
|
|
|
177
159
|
// FASTA tests
|