@datagrok/bio 2.0.24 → 2.0.26
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/detectors.js +7 -2
- package/dist/package-test.js +2057 -839
- package/dist/package.js +2166 -684
- package/files/tests/nucleotidePairs.csv +146690 -0
- package/files/tests/peptidePairs.csv +103685 -0
- package/files/tests/toAtomicLevelTest.csv +7 -0
- package/package.json +6 -8
- package/setup.sh +1 -1
- package/src/calculations/monomerLevelMols.ts +7 -4
- package/src/package-test.ts +5 -5
- package/src/package.ts +13 -18
- package/src/substructure-search/substructure-search.ts +40 -13
- package/src/tests/{WebLogo-test.ts → bio-tests.ts} +16 -2
- package/src/tests/checkInputColumn-tests.ts +6 -7
- package/src/tests/detectors-test.ts +27 -19
- package/src/tests/renderers-test.ts +9 -5
- package/src/tests/test-sequnces-generators.ts +7 -4
- package/src/utils/cell-renderer.ts +5 -2
- package/src/widgets/bio-substructure-filter.ts +24 -5
- package/src/widgets/representations.ts +3 -2
- package/src/const.ts +0 -30
- package/src/monomer-library.ts +0 -199
- package/src/utils/utils.ts +0 -135
package/package.json
CHANGED
|
@@ -5,7 +5,7 @@
|
|
|
5
5
|
"name": "Leonid Stolbov",
|
|
6
6
|
"email": "lstolbov@datagrok.ai"
|
|
7
7
|
},
|
|
8
|
-
"version": "2.0.
|
|
8
|
+
"version": "2.0.26",
|
|
9
9
|
"description": "Bio is a [package](https://datagrok.ai/help/develop/develop#packages) for the [Datagrok](https://datagrok.ai) platform",
|
|
10
10
|
"repository": {
|
|
11
11
|
"type": "git",
|
|
@@ -14,15 +14,10 @@
|
|
|
14
14
|
},
|
|
15
15
|
"dependencies": {
|
|
16
16
|
"@biowasm/aioli": "^3.1.0",
|
|
17
|
-
"@datagrok-libraries/bio": "^5.
|
|
17
|
+
"@datagrok-libraries/bio": "^5.4.1",
|
|
18
18
|
"@datagrok-libraries/chem-meta": "1.0.1",
|
|
19
19
|
"@datagrok-libraries/ml": "^6.2.0",
|
|
20
|
-
"@datagrok-libraries/utils": "^1.11.
|
|
21
|
-
"@deck.gl/core": "^8.7.5",
|
|
22
|
-
"@deck.gl/layers": "^8.7.5",
|
|
23
|
-
"@luma.gl/constants": "^8.5.10",
|
|
24
|
-
"@luma.gl/core": "^8.5.10",
|
|
25
|
-
"@phylocanvas/phylocanvas.gl": "^1.44.0",
|
|
20
|
+
"@datagrok-libraries/utils": "^1.11.1",
|
|
26
21
|
"cash-dom": "^8.0.0",
|
|
27
22
|
"datagrok-api": "^1.7.0",
|
|
28
23
|
"dayjs": "^1.11.4",
|
|
@@ -51,6 +46,9 @@
|
|
|
51
46
|
"webpack": "^5.64.1",
|
|
52
47
|
"webpack-cli": "^4.6.0"
|
|
53
48
|
},
|
|
49
|
+
"grokDependencies": {
|
|
50
|
+
"@datagrok/chem": "1.3.16"
|
|
51
|
+
},
|
|
54
52
|
"scripts": {
|
|
55
53
|
"link-api": "npm link datagrok-api",
|
|
56
54
|
"link-bio": "npm link @datagrok-libraries/bio",
|
package/setup.sh
CHANGED
|
@@ -8,17 +8,20 @@ import {getHelmMonomers} from '../package';
|
|
|
8
8
|
|
|
9
9
|
const V2000_ATOM_NAME_POS = 31;
|
|
10
10
|
|
|
11
|
-
export async function getMonomericMols(mcol: DG.Column,
|
|
11
|
+
export async function getMonomericMols(mcol: DG.Column,
|
|
12
|
+
pattern: boolean = false, monomersDict?: Map<string, string>): Promise<DG.Column> {
|
|
12
13
|
const separator: string = mcol.tags[C.TAGS.SEPARATOR];
|
|
13
14
|
const units: string = mcol.tags[DG.TAGS.UNITS];
|
|
14
15
|
const splitter = bio.getSplitter(units, separator);
|
|
15
16
|
let molV3000Array;
|
|
16
|
-
|
|
17
|
+
monomersDict ??= new Map();
|
|
17
18
|
const monomers = units === 'helm' ?
|
|
18
19
|
getHelmMonomers(mcol) : Object.keys(bio.getStats(mcol, 0, splitter).freq).filter((it) => it !== '');
|
|
19
20
|
|
|
20
|
-
for (let i = 0; i < monomers.length; i++)
|
|
21
|
-
monomersDict.
|
|
21
|
+
for (let i = 0; i < monomers.length; i++) {
|
|
22
|
+
if (!monomersDict.has(monomers[i]))
|
|
23
|
+
monomersDict.set(monomers[i], `${monomersDict.size + 1}`);
|
|
24
|
+
}
|
|
22
25
|
|
|
23
26
|
if (units === 'helm') {
|
|
24
27
|
molV3000Array = await grok.functions.call('HELM:getMolFiles', {col: mcol});
|
package/src/package-test.ts
CHANGED
|
@@ -1,8 +1,7 @@
|
|
|
1
1
|
import * as DG from 'datagrok-api/dg';
|
|
2
2
|
|
|
3
|
-
import {runTests, tests} from '@datagrok-libraries/utils/src/test';
|
|
3
|
+
import {runTests, TestContext, tests} from '@datagrok-libraries/utils/src/test';
|
|
4
4
|
|
|
5
|
-
import './tests/WebLogo-test';
|
|
6
5
|
import './tests/Palettes-test';
|
|
7
6
|
import './tests/detectors-test';
|
|
8
7
|
import './tests/msa-tests';
|
|
@@ -13,6 +12,7 @@ import './tests/renderers-test';
|
|
|
13
12
|
import './tests/convert-test';
|
|
14
13
|
import './tests/fasta-handler-test';
|
|
15
14
|
import './tests/fasta-export-tests';
|
|
15
|
+
import './tests/bio-tests';
|
|
16
16
|
import './tests/WebLogo-positions-test';
|
|
17
17
|
import './tests/checkInputColumn-tests';
|
|
18
18
|
import './tests/similarity-diversity-tests';
|
|
@@ -24,9 +24,9 @@ export {tests};
|
|
|
24
24
|
//name: test
|
|
25
25
|
//input: string category {optional: true}
|
|
26
26
|
//input: string test {optional: true}
|
|
27
|
-
//input:
|
|
27
|
+
//input: object testContext {optional: true}
|
|
28
28
|
//output: dataframe result
|
|
29
|
-
export async function test(category: string, test: string,
|
|
30
|
-
const data = await runTests({category, test,
|
|
29
|
+
export async function test(category: string, test: string, testContext: TestContext): Promise<DG.DataFrame> {
|
|
30
|
+
const data = await runTests({category, test, testContext});
|
|
31
31
|
return DG.DataFrame.fromObjects(data)!;
|
|
32
32
|
}
|
package/src/package.ts
CHANGED
|
@@ -13,11 +13,17 @@ import {SequenceAlignment, Aligned} from './seq_align';
|
|
|
13
13
|
import {getEmbeddingColsNames, sequenceSpace} from './analysis/sequence-space';
|
|
14
14
|
import {getActivityCliffs} from '@datagrok-libraries/ml/src/viewers/activity-cliffs';
|
|
15
15
|
import {createPropPanelElement, createTooltipElement, getSimilaritiesMarix} from './analysis/sequence-activity-cliffs';
|
|
16
|
-
import {createJsonMonomerLibFromSdf, encodeMonomers, getMolfilesFromSeq
|
|
16
|
+
import {createJsonMonomerLibFromSdf, encodeMonomers, getMolfilesFromSeq} from '@datagrok-libraries/bio/src/utils/monomer-utils';
|
|
17
|
+
import {HELM_CORE_LIB_FILENAME} from '@datagrok-libraries/bio/src/utils/const';
|
|
17
18
|
import {getMacroMol} from './utils/atomic-works';
|
|
18
19
|
import {MacromoleculeSequenceCellRenderer} from './utils/cell-renderer';
|
|
19
20
|
import {convert} from './utils/convert';
|
|
20
21
|
import {getMacroMolColumnPropertyPanel, representationsWidget} from './widgets/representations';
|
|
22
|
+
import {UnitsHandler, ALIGNMENT} from '@datagrok-libraries/bio/src/utils/units-handler';
|
|
23
|
+
import {TAGS} from '@datagrok-libraries/bio/src/utils/macromolecule';
|
|
24
|
+
import {ALPHABET, NOTATION} from '@datagrok-libraries/bio/src/utils/macromolecule'
|
|
25
|
+
import {_toAtomicLevel} from '@datagrok-libraries/bio/src/utils/to-atomic-level';
|
|
26
|
+
import {FastaFileHandler} from '@datagrok-libraries/bio/src/utils/fasta-handler';
|
|
21
27
|
import {removeEmptyStringRows} from '@datagrok-libraries/utils/src/dataframe-utils';
|
|
22
28
|
import {
|
|
23
29
|
generateManySequences,
|
|
@@ -99,7 +105,7 @@ export function checkInputColumn(
|
|
|
99
105
|
) {
|
|
100
106
|
const notationAdd = allowedNotations.length == 0 ? 'any notation' :
|
|
101
107
|
(`notation${allowedNotations.length > 1 ? 's' : ''} ${allowedNotations.map((n) => `"${n}"`).join(', ')} `);
|
|
102
|
-
msg = `${name} analysis is allowed for Macromolecules with ${notationAdd}.`;
|
|
108
|
+
msg = `${name} + ' analysis is allowed for Macromolecules with notation ${notationAdd}.`;
|
|
103
109
|
res = false;
|
|
104
110
|
} else if (!uh.isHelm()) {
|
|
105
111
|
// alphabet is not specified for 'helm' notation
|
|
@@ -169,10 +175,10 @@ export async function activityCliffs(df: DG.DataFrame, macroMolecule: DG.Column,
|
|
|
169
175
|
'SPE': {cycles: 2000, lambda: 1.0, dlambda: 0.0005},
|
|
170
176
|
};
|
|
171
177
|
const tags = {
|
|
172
|
-
'units': macroMolecule.
|
|
173
|
-
'aligned': macroMolecule.
|
|
174
|
-
'separator': macroMolecule.
|
|
175
|
-
'alphabet': macroMolecule.
|
|
178
|
+
'units': macroMolecule.getTag(DG.TAGS.UNITS),
|
|
179
|
+
'aligned': macroMolecule.getTag(TAGS.aligned),
|
|
180
|
+
'separator': macroMolecule.getTag(TAGS.separator),
|
|
181
|
+
'alphabet': macroMolecule.getTag(TAGS.alphabet),
|
|
176
182
|
};
|
|
177
183
|
const sp = await getActivityCliffs(
|
|
178
184
|
df,
|
|
@@ -247,20 +253,11 @@ export async function toAtomicLevel(df: DG.DataFrame, macroMolecule: DG.Column):
|
|
|
247
253
|
}
|
|
248
254
|
if (!checkInputColumnUi(macroMolecule, 'To Atomic Level'))
|
|
249
255
|
return;
|
|
250
|
-
|
|
251
256
|
const monomersLibFile = await _package.files.readAsText(HELM_CORE_LIB_FILENAME);
|
|
252
257
|
const monomersLibObject: any[] = JSON.parse(monomersLibFile);
|
|
253
|
-
|
|
254
|
-
const result = await getMacroMol(atomicCodes!);
|
|
255
|
-
|
|
256
|
-
const col = DG.Column.fromStrings('regenerated', result);
|
|
257
|
-
col.semType = DG.SEMTYPE.MOLECULE;
|
|
258
|
-
col.tags[DG.TAGS.UNITS] = 'molblock';
|
|
259
|
-
df.columns.add(col, true);
|
|
260
|
-
await grok.data.detectSemanticTypes(df);
|
|
258
|
+
_toAtomicLevel(df, macroMolecule, monomersLibObject);
|
|
261
259
|
}
|
|
262
260
|
|
|
263
|
-
|
|
264
261
|
//top-menu: Bio | MSA...
|
|
265
262
|
//name: MSA
|
|
266
263
|
//input: dataframe table
|
|
@@ -528,5 +525,3 @@ export function saveAsFasta() {
|
|
|
528
525
|
export function bioSubstructureFilter(): BioSubstructureFilter {
|
|
529
526
|
return new BioSubstructureFilter();
|
|
530
527
|
}
|
|
531
|
-
|
|
532
|
-
|
|
@@ -8,6 +8,14 @@ import {getMonomericMols} from '../calculations/monomerLevelMols';
|
|
|
8
8
|
import {BitSet} from 'datagrok-api/dg';
|
|
9
9
|
import {updateDivInnerHTML} from '../utils/ui-utils';
|
|
10
10
|
|
|
11
|
+
export const MONOMER_MOLS_COL = 'monomeric-mols';
|
|
12
|
+
|
|
13
|
+
const enum MONOMERIC_COL_TAGS{
|
|
14
|
+
MONOMERIC_MOLS = 'monomeric-mols',
|
|
15
|
+
LAST_INVALIDATED_VERSION = 'last-invalidated-version',
|
|
16
|
+
MONOMERS_DICT = 'monomers-dict'
|
|
17
|
+
}
|
|
18
|
+
|
|
11
19
|
/**
|
|
12
20
|
* Searches substructure in each row of Macromolecule column
|
|
13
21
|
*
|
|
@@ -67,29 +75,48 @@ export function substructureSearchDialog(col: DG.Column): void {
|
|
|
67
75
|
.show();
|
|
68
76
|
}
|
|
69
77
|
|
|
70
|
-
export function linearSubstructureSearch(substructure: string, col: DG.Column): DG.BitSet {
|
|
71
|
-
const
|
|
78
|
+
export function linearSubstructureSearch(substructure: string, col: DG.Column, separator?: string): DG.BitSet {
|
|
79
|
+
const re = separator ? prepareSubstructureRegex(substructure, separator) : substructure;
|
|
72
80
|
const resultArray = DG.BitSet.create(col.length);
|
|
73
81
|
for (let i = 0; i < col.length; i++) {
|
|
74
|
-
const macromolecule = col.get(i)
|
|
75
|
-
if (macromolecule.
|
|
82
|
+
const macromolecule = col.get(i);
|
|
83
|
+
if (macromolecule.match(re) || macromolecule === substructure)
|
|
76
84
|
resultArray.set(i, true, false);
|
|
77
85
|
}
|
|
78
86
|
return resultArray;
|
|
79
87
|
}
|
|
80
88
|
|
|
89
|
+
function prepareSubstructureRegex(substructure: string, separator: string) {
|
|
90
|
+
const char = `${separator}`.replace(/[\-\[\]\/\{\}\(\)\*\+\?\.\\\^\$\|]/g, '\\$&');
|
|
91
|
+
const startsWithSep = substructure.charAt(0) === separator;
|
|
92
|
+
const endsWithSep = substructure.charAt(substructure.length - 1) === separator;
|
|
93
|
+
const substrWithoutSep = substructure.replace(new RegExp(`^${char}|${char}$`, 'g'), '');
|
|
94
|
+
const re = startsWithSep ? endsWithSep ? `${char}${substrWithoutSep}${char}` :
|
|
95
|
+
`${char}${substrWithoutSep}${char}|${char}${substrWithoutSep}$` :
|
|
96
|
+
endsWithSep ? `^${substrWithoutSep}${char}|${char}${substrWithoutSep}${char}` :
|
|
97
|
+
`^${substrWithoutSep}${char}|${char}${substrWithoutSep}${char}|${char}${substrWithoutSep}$`;
|
|
98
|
+
return re;
|
|
99
|
+
}
|
|
100
|
+
|
|
81
101
|
export async function helmSubstructureSearch(substructure: string, col: DG.Column): Promise<BitSet> {
|
|
82
|
-
|
|
83
|
-
|
|
84
|
-
|
|
85
|
-
|
|
86
|
-
const
|
|
87
|
-
|
|
88
|
-
monomericMolsDf.rows.removeAt(col.length);
|
|
102
|
+
if (col.version !== col.temp[MONOMERIC_COL_TAGS.LAST_INVALIDATED_VERSION])
|
|
103
|
+
await invalidateHelmMols(col);
|
|
104
|
+
const substructureCol = DG.Column.string('helm', 1).init((i) => substructure);
|
|
105
|
+
substructureCol.setTag(DG.TAGS.UNITS, bio.NOTATION.HELM);
|
|
106
|
+
const substructureMolsCol =
|
|
107
|
+
await getMonomericMols(substructureCol, true, col.temp[MONOMERIC_COL_TAGS.MONOMERS_DICT]);
|
|
89
108
|
const matchesCol = await grok.functions.call('Chem:searchSubstructure', {
|
|
90
|
-
molStringsColumn:
|
|
91
|
-
molString:
|
|
109
|
+
molStringsColumn: col.temp[MONOMERIC_COL_TAGS.MONOMERIC_MOLS],
|
|
110
|
+
molString: substructureMolsCol.get(0),
|
|
92
111
|
molBlockFailover: '',
|
|
93
112
|
});
|
|
94
113
|
return matchesCol.get(0);
|
|
95
114
|
}
|
|
115
|
+
|
|
116
|
+
export async function invalidateHelmMols(col: DG.Column) {
|
|
117
|
+
const monomersDict = new Map();
|
|
118
|
+
const monomericMolsCol = await getMonomericMols(col, true, monomersDict);
|
|
119
|
+
col.temp[MONOMERIC_COL_TAGS.MONOMERIC_MOLS] = monomericMolsCol;
|
|
120
|
+
col.temp[MONOMERIC_COL_TAGS.MONOMERS_DICT] = monomersDict;
|
|
121
|
+
col.temp[MONOMERIC_COL_TAGS.LAST_INVALIDATED_VERSION] = col.version;
|
|
122
|
+
}
|
|
@@ -5,7 +5,7 @@ import * as bio from '@datagrok-libraries/bio';
|
|
|
5
5
|
|
|
6
6
|
import {after, before, category, test, expect, expectObject} from '@datagrok-libraries/utils/src/test';
|
|
7
7
|
|
|
8
|
-
category('
|
|
8
|
+
category('bio', () => {
|
|
9
9
|
const csvDfN1: string = `seq
|
|
10
10
|
ACGTCT
|
|
11
11
|
CAGTGT
|
|
@@ -47,7 +47,21 @@ XZJ{}2
|
|
|
47
47
|
`;
|
|
48
48
|
|
|
49
49
|
// anonymous functions specified in test() registering must return Promise<any>
|
|
50
|
-
test('
|
|
50
|
+
test('testGetStatsHelm1', async () => {
|
|
51
|
+
const csv = `seq
|
|
52
|
+
PEPTIDE1{meI}$$$$
|
|
53
|
+
`;
|
|
54
|
+
const df: DG.DataFrame = DG.DataFrame.fromCsv(csv);
|
|
55
|
+
const seqCol: DG.Column = df.getCol('seq')!;
|
|
56
|
+
const stats = bio.getStats(seqCol, 1, bio.splitterAsHelm);
|
|
57
|
+
|
|
58
|
+
expectObject(stats.freq, {
|
|
59
|
+
'meI': 1
|
|
60
|
+
});
|
|
61
|
+
expect(stats.sameLength, true);
|
|
62
|
+
});
|
|
63
|
+
|
|
64
|
+
test('testGetStatsN1', async () => { await _testGetStats(csvDfN1); });
|
|
51
65
|
test('testGetAlphabetSimilarity', async () => { await _testGetAlphabetSimilarity(); });
|
|
52
66
|
|
|
53
67
|
test('testPickupPaletteN1', async () => { await _testPickupPaletteN1(csvDfN1); });
|
|
@@ -8,10 +8,9 @@ import {after, before, category, test, expect, expectArray} from '@datagrok-libr
|
|
|
8
8
|
|
|
9
9
|
import {checkInputColumn, multipleSequenceAlignmentAny} from '../package';
|
|
10
10
|
import {UNITS} from 'datagrok-api/dg';
|
|
11
|
-
|
|
11
|
+
import {ALPHABET, NOTATION} from '@datagrok-libraries/bio/src/utils/macromolecule';
|
|
12
12
|
|
|
13
13
|
category('checkInputColumn', () => {
|
|
14
|
-
|
|
15
14
|
const csv = `seq
|
|
16
15
|
seq1,
|
|
17
16
|
seq2,
|
|
@@ -22,7 +21,7 @@ seq4`;
|
|
|
22
21
|
const func: DG.Func = DG.Func.find({package: 'Bio', name: 'multipleSequenceAlignmentAny'})[0];
|
|
23
22
|
const funcInputColumnProperty: DG.Property = func.inputs.find((i) => i.name == 'sequence')!;
|
|
24
23
|
|
|
25
|
-
|
|
24
|
+
const k = 11;
|
|
26
25
|
|
|
27
26
|
const df: DG.DataFrame = DG.DataFrame.fromCsv(csv);
|
|
28
27
|
const col: DG.Column = df.getCol('seq');
|
|
@@ -42,7 +41,7 @@ seq4`;
|
|
|
42
41
|
const df: DG.DataFrame = DG.DataFrame.fromCsv(csv);
|
|
43
42
|
const col: DG.Column = df.getCol('seq');
|
|
44
43
|
col.semType = DG.SEMTYPE.MACROMOLECULE;
|
|
45
|
-
col.setTag(DG.TAGS.UNITS,
|
|
44
|
+
col.setTag(DG.TAGS.UNITS, NOTATION.HELM);
|
|
46
45
|
col.setTag(bio.TAGS.alphabetSize, '11');
|
|
47
46
|
col.setTag(bio.TAGS.alphabetIsMultichar, 'true');
|
|
48
47
|
|
|
@@ -57,7 +56,7 @@ seq4`;
|
|
|
57
56
|
const df: DG.DataFrame = DG.DataFrame.fromCsv(csv);
|
|
58
57
|
const col: DG.Column = df.getCol('seq');
|
|
59
58
|
col.semType = DG.SEMTYPE.MACROMOLECULE;
|
|
60
|
-
col.setTag(DG.TAGS.UNITS,
|
|
59
|
+
col.setTag(DG.TAGS.UNITS, NOTATION.FASTA);
|
|
61
60
|
col.setTag(bio.TAGS.alphabet, 'UN');
|
|
62
61
|
col.setTag(bio.TAGS.alphabetSize, '11');
|
|
63
62
|
col.setTag(bio.TAGS.alphabetIsMultichar, 'true');
|
|
@@ -73,6 +72,6 @@ seq4`;
|
|
|
73
72
|
test('testGetActionFunctionMeta', async () => {
|
|
74
73
|
const func: DG.Func = DG.Func.find({package: 'Bio', name: 'multipleSequenceAlignmentAny'})[0];
|
|
75
74
|
const sequenceInput: DG.Property = func.inputs.find((i) => i.name == 'sequence')!;
|
|
76
|
-
|
|
75
|
+
const k = 11;
|
|
77
76
|
});
|
|
78
|
-
});
|
|
77
|
+
});
|
|
@@ -6,7 +6,8 @@ import * as bio from '@datagrok-libraries/bio';
|
|
|
6
6
|
import {after, before, category, test, expect, expectObject} from '@datagrok-libraries/utils/src/test';
|
|
7
7
|
|
|
8
8
|
import {importFasta} from '../package';
|
|
9
|
-
import {UnitsHandler} from '@datagrok-libraries/bio/src/utils/units-handler';
|
|
9
|
+
import {UnitsHandler, ALIGNMENT} from '@datagrok-libraries/bio/src/utils/units-handler';
|
|
10
|
+
import {NOTATION, ALPHABET} from '@datagrok-libraries/bio/src/utils/macromolecule';
|
|
10
11
|
|
|
11
12
|
type DfReaderFunc = () => Promise<DG.DataFrame>;
|
|
12
13
|
|
|
@@ -200,44 +201,49 @@ MWRSWY-CKHP
|
|
|
200
201
|
test('NegativeSmiles', async () => { await _testNeg(readCsv('csvDfSmiles', csvDfSmiles), 'col1'); });
|
|
201
202
|
|
|
202
203
|
test('Dna1', async () => {
|
|
203
|
-
await _testPos(readCsv('csvDfDna1', csvDfDna1), 'seq',
|
|
204
|
+
await _testPos(readCsv('csvDfDna1', csvDfDna1), 'seq', NOTATION.FASTA, ALIGNMENT.SEQ, ALPHABET.DNA, 4, false);
|
|
204
205
|
});
|
|
205
206
|
test('Rna1', async () => {
|
|
206
|
-
await _testPos(readCsv('csvDfRna1', csvDfRna1), 'seq',
|
|
207
|
+
await _testPos(readCsv('csvDfRna1', csvDfRna1), 'seq', NOTATION.FASTA, ALIGNMENT.SEQ, ALPHABET.RNA, 4, false);
|
|
207
208
|
});
|
|
208
209
|
test('AA1', async () => {
|
|
209
|
-
await _testPos(readCsv('csvDfPt1', csvDfPt1), 'seq',
|
|
210
|
+
await _testPos(readCsv('csvDfPt1', csvDfPt1), 'seq', NOTATION.FASTA, ALIGNMENT.SEQ, ALPHABET.PT, 20, false);
|
|
210
211
|
});
|
|
211
212
|
test('MsaDna1', async () => {
|
|
212
|
-
await _testPos(readCsv('csvDfMsaDna1', csvDfMsaDna1), 'seq',
|
|
213
|
+
await _testPos(readCsv('csvDfMsaDna1', csvDfMsaDna1), 'seq', NOTATION.FASTA, ALIGNMENT.SEQ_MSA, ALPHABET.DNA, 4, false);
|
|
213
214
|
});
|
|
214
215
|
|
|
215
216
|
test('MsaAA1', async () => {
|
|
216
|
-
await _testPos(readCsv('csvDfMsaPt1', csvDfMsaPt1), 'seq',
|
|
217
|
+
await _testPos(readCsv('csvDfMsaPt1', csvDfMsaPt1), 'seq', NOTATION.FASTA,
|
|
218
|
+
ALIGNMENT.SEQ_MSA, ALPHABET.PT, 20, false);
|
|
217
219
|
});
|
|
218
220
|
|
|
219
221
|
test('SepDna', async () => {
|
|
220
|
-
await _testPos(readCsv('csvDfSepDna', csvDfSepDna), 'seq',
|
|
222
|
+
await _testPos(readCsv('csvDfSepDna', csvDfSepDna), 'seq', NOTATION.SEPARATOR, ALIGNMENT.SEQ, ALPHABET.DNA, 4, false, '*');
|
|
221
223
|
});
|
|
222
224
|
test('SepRna', async () => {
|
|
223
|
-
await _testPos(readCsv('csvDfSepRna', csvDfSepRna), 'seq',
|
|
225
|
+
await _testPos(readCsv('csvDfSepRna', csvDfSepRna), 'seq', NOTATION.SEPARATOR, ALIGNMENT.SEQ, ALPHABET.RNA, 4, false, '*');
|
|
224
226
|
});
|
|
225
227
|
test('SepPt', async () => {
|
|
226
|
-
await _testPos(readCsv('csvDfSepPt', csvDfSepPt), 'seq',
|
|
228
|
+
await _testPos(readCsv('csvDfSepPt', csvDfSepPt), 'seq',
|
|
229
|
+
NOTATION.SEPARATOR, ALIGNMENT.SEQ, ALPHABET.PT, 20, false, '-');
|
|
227
230
|
});
|
|
228
231
|
test('SepUn1', async () => {
|
|
229
|
-
await _testPos(readCsv('csvDfSepUn1', csvDfSepUn1), 'seq',
|
|
232
|
+
await _testPos(readCsv('csvDfSepUn1', csvDfSepUn1), 'seq',
|
|
233
|
+
NOTATION.SEPARATOR, ALIGNMENT.SEQ, ALPHABET.UN, 8, true, '-');
|
|
230
234
|
});
|
|
231
235
|
test('SepUn2', async () => {
|
|
232
|
-
await _testPos(readCsv('csvDfSepUn2', csvDfSepUn2), 'seq',
|
|
236
|
+
await _testPos(readCsv('csvDfSepUn2', csvDfSepUn2), 'seq',
|
|
237
|
+
NOTATION.SEPARATOR, ALIGNMENT.SEQ, ALPHABET.UN, 9, true, '/');
|
|
233
238
|
});
|
|
234
239
|
|
|
235
240
|
test('SepMsaN1', async () => {
|
|
236
|
-
await _testPos(readCsv('csvDfSepMsaDna1', csvDfSepMsaDna1), 'seq',
|
|
241
|
+
await _testPos(readCsv('csvDfSepMsaDna1', csvDfSepMsaDna1), 'seq',
|
|
242
|
+
NOTATION.SEPARATOR, ALIGNMENT.SEQ_MSA, ALPHABET.DNA, 4, false, '-');
|
|
237
243
|
});
|
|
238
244
|
|
|
239
245
|
test('SamplesFastaCsvPt', async () => {
|
|
240
|
-
await _testPos(readSamples(Samples.fastaCsv), 'sequence',
|
|
246
|
+
await _testPos(readSamples(Samples.fastaCsv), 'sequence', NOTATION.FASTA, ALIGNMENT.SEQ, ALPHABET.PT, 20, false);
|
|
241
247
|
});
|
|
242
248
|
test('SamplesFastaCsvNegativeEntry', async () => {
|
|
243
249
|
await _testNeg(readSamples(Samples.fastaCsv), 'Entry');
|
|
@@ -250,7 +256,8 @@ MWRSWY-CKHP
|
|
|
250
256
|
});
|
|
251
257
|
|
|
252
258
|
test('SamplesFastaFastaPt', async () => {
|
|
253
|
-
await _testPos(readSamples(Samples.fastaFasta, readFileFasta),
|
|
259
|
+
await _testPos(readSamples(Samples.fastaFasta, readFileFasta),
|
|
260
|
+
'sequence', NOTATION.FASTA, ALIGNMENT.SEQ, ALPHABET.PT, 20, false);
|
|
254
261
|
});
|
|
255
262
|
|
|
256
263
|
// peptidesComplex contains monomers with spaces in AlignedSequence columns, which are forbidden
|
|
@@ -268,7 +275,8 @@ MWRSWY-CKHP
|
|
|
268
275
|
});
|
|
269
276
|
|
|
270
277
|
test('samplesMsaComplexUn', async () => {
|
|
271
|
-
await _testPos(readSamples(Samples.msaComplex), 'MSA',
|
|
278
|
+
await _testPos(readSamples(Samples.msaComplex), 'MSA',
|
|
279
|
+
NOTATION.SEPARATOR, ALIGNMENT.SEQ_MSA, ALPHABET.UN, 161, true, '/');
|
|
272
280
|
});
|
|
273
281
|
test('samplesMsaComplexNegativeActivity', async () => {
|
|
274
282
|
await _testNeg(readSamples(Samples.msaComplex), 'Activity');
|
|
@@ -283,7 +291,7 @@ MWRSWY-CKHP
|
|
|
283
291
|
});
|
|
284
292
|
|
|
285
293
|
test('samplesHelmCsvHELM', async () => {
|
|
286
|
-
await _testPos(readSamples(Samples.helmCsv), 'HELM',
|
|
294
|
+
await _testPos(readSamples(Samples.helmCsv), 'HELM', NOTATION.HELM, null, null, 160, true, null);
|
|
287
295
|
});
|
|
288
296
|
|
|
289
297
|
test('samplesHelmCsvNegativeActivity', async () => {
|
|
@@ -299,7 +307,7 @@ MWRSWY-CKHP
|
|
|
299
307
|
await _testNeg(readSamples(Samples.testHelmCsv), 'Test type');
|
|
300
308
|
});
|
|
301
309
|
test('samplesTestHelmPositiveHelmString', async () => {
|
|
302
|
-
await _testPos(readSamples(Samples.testHelmCsv), 'HELM string',
|
|
310
|
+
await _testPos(readSamples(Samples.testHelmCsv), 'HELM string', NOTATION.HELM, null, null, 9, true, null);
|
|
303
311
|
});
|
|
304
312
|
test('samplesTestHelmNegativeValid', async () => {
|
|
305
313
|
await _testNeg(readSamples(Samples.testHelmCsv), 'Valid?');
|
|
@@ -331,7 +339,7 @@ MWRSWY-CKHP
|
|
|
331
339
|
});
|
|
332
340
|
|
|
333
341
|
test('samplesFastaPtPosSequence', async () => {
|
|
334
|
-
await _testPos(readSamples(Samples.fastaPtCsv), 'sequence',
|
|
342
|
+
await _testPos(readSamples(Samples.fastaPtCsv), 'sequence', NOTATION.FASTA, ALIGNMENT.SEQ, ALPHABET.PT, 20, false);
|
|
335
343
|
});
|
|
336
344
|
|
|
337
345
|
test('samplesTestCerealNegativeCerealName', async () => {
|
|
@@ -383,7 +391,7 @@ export async function _testNeg(readDf: DfReaderFunc, colName: string) {
|
|
|
383
391
|
throw new Error(msg);
|
|
384
392
|
// col.semType = '';
|
|
385
393
|
// col.setTag(DG.TAGS.UNITS, '');
|
|
386
|
-
// col.setTag(
|
|
394
|
+
// col.setTag(NOTATION.SEPARATOR, '');
|
|
387
395
|
}
|
|
388
396
|
}
|
|
389
397
|
|
|
@@ -6,6 +6,8 @@ import {after, before, category, delay, expect, test} from '@datagrok-libraries/
|
|
|
6
6
|
|
|
7
7
|
import {importFasta, multipleSequenceAlignmentAny} from '../package';
|
|
8
8
|
import {convertDo} from '../utils/convert';
|
|
9
|
+
import {ALPHABET, NOTATION} from '@datagrok-libraries/bio/src/utils/macromolecule';
|
|
10
|
+
import {UnitsHandler, ALIGNMENT} from '@datagrok-libraries/bio/src/utils/units-handler';
|
|
9
11
|
import {SEM_TYPES, TAGS} from '../utils/constants';
|
|
10
12
|
import {generateLongSequence, generateManySequences, performanceTest} from './test-sequnces-generators';
|
|
11
13
|
|
|
@@ -103,7 +105,7 @@ category('renderers', () => {
|
|
|
103
105
|
async function _rendererMacromoleculeDifference() {
|
|
104
106
|
const seqDiffCol: DG.Column = DG.Column.fromStrings('SequencesDiff',
|
|
105
107
|
['meI/hHis/Aca/N/T/dK/Thr_PO3H2/Aca#D-Tyr_Et/Tyr_ab-dehydroMe/meN/E/N/dV']);
|
|
106
|
-
seqDiffCol.tags[DG.TAGS.UNITS] =
|
|
108
|
+
seqDiffCol.tags[DG.TAGS.UNITS] = NOTATION.SEPARATOR;
|
|
107
109
|
seqDiffCol.tags[TAGS.SEPARATOR] = '/';
|
|
108
110
|
seqDiffCol.semType = SEM_TYPES.MACROMOLECULE_DIFFERENCE;
|
|
109
111
|
const df = DG.DataFrame.fromColumns([seqDiffCol]);
|
|
@@ -139,7 +141,7 @@ category('renderers', () => {
|
|
|
139
141
|
`cell.renderer="${srcSeqCol!.getTag(DG.TAGS.CELL_RENDERER)}"`);
|
|
140
142
|
expect(srcSeqCol.semType, DG.SEMTYPE.MACROMOLECULE);
|
|
141
143
|
expect(srcSeqCol.getTag(DG.TAGS.UNITS), bio.NOTATION.FASTA);
|
|
142
|
-
expect(srcSeqCol.getTag(bio.TAGS.aligned),
|
|
144
|
+
expect(srcSeqCol.getTag(bio.TAGS.aligned), ALIGNMENT.SEQ);
|
|
143
145
|
expect(srcSeqCol.getTag(bio.TAGS.alphabet), bio.ALPHABET.PT);
|
|
144
146
|
expect(srcSeqCol.getTag(DG.TAGS.CELL_RENDERER), 'sequence');
|
|
145
147
|
|
|
@@ -148,7 +150,7 @@ category('renderers', () => {
|
|
|
148
150
|
|
|
149
151
|
expect(msaSeqCol.semType, DG.SEMTYPE.MACROMOLECULE);
|
|
150
152
|
expect(msaSeqCol.getTag(DG.TAGS.UNITS), bio.NOTATION.FASTA);
|
|
151
|
-
expect(msaSeqCol.getTag(bio.TAGS.aligned),
|
|
153
|
+
expect(msaSeqCol.getTag(bio.TAGS.aligned), ALIGNMENT.SEQ_MSA);
|
|
152
154
|
expect(msaSeqCol.getTag(bio.TAGS.alphabet), bio.ALPHABET.PT);
|
|
153
155
|
expect(msaSeqCol.getTag(DG.TAGS.CELL_RENDERER), 'sequence');
|
|
154
156
|
|
|
@@ -190,7 +192,7 @@ category('renderers', () => {
|
|
|
190
192
|
/**/
|
|
191
193
|
const seqDiffCol: DG.Column = DG.Column.fromStrings('SequencesDiff',
|
|
192
194
|
['meI/hHis/Aca/N/T/dK/Thr_PO3H2/Aca#D-Tyr_Et/Tyr_ab-dehydroMe/meN/E/N/dV']);
|
|
193
|
-
seqDiffCol.tags[DG.TAGS.UNITS] =
|
|
195
|
+
seqDiffCol.tags[DG.TAGS.UNITS] = NOTATION.SEPARATOR;
|
|
194
196
|
seqDiffCol.tags[TAGS.SEPARATOR] = '/';
|
|
195
197
|
seqDiffCol.semType = SEM_TYPES.MACROMOLECULE_DIFFERENCE;
|
|
196
198
|
const df = DG.DataFrame.fromColumns([seqDiffCol]);
|
|
@@ -206,10 +208,12 @@ category('renderers', () => {
|
|
|
206
208
|
`view renderer has set to '${renderer}' instead of correct 'MacromoleculeDifference'.`);
|
|
207
209
|
}
|
|
208
210
|
|
|
211
|
+
/** GROK-11212 Do not overwrite / recalculate 'cell.renderer' tag that has been set programmatically
|
|
212
|
+
* https://reddata.atlassian.net/browse/GROK-11212 */
|
|
209
213
|
async function _setRendererManually() {
|
|
210
214
|
const seqDiffCol: DG.Column = DG.Column.fromStrings('SequencesDiff',
|
|
211
215
|
['meI/hHis/Aca/N/T/dK/Thr_PO3H2/Aca#D-Tyr_Et/Tyr_ab-dehydroMe/meN/E/N/dV']);
|
|
212
|
-
seqDiffCol.tags[DG.TAGS.UNITS] =
|
|
216
|
+
seqDiffCol.tags[DG.TAGS.UNITS] = NOTATION.SEPARATOR;
|
|
213
217
|
seqDiffCol.tags[TAGS.SEPARATOR] = '/';
|
|
214
218
|
seqDiffCol.semType = SEM_TYPES.MACROMOLECULE;
|
|
215
219
|
const tgtCellRenderer = 'MacromoleculeDifference';
|
|
@@ -2,6 +2,9 @@ import * as DG from 'datagrok-api/dg';
|
|
|
2
2
|
import * as grok from 'datagrok-api/grok';
|
|
3
3
|
import {DataFrame} from 'datagrok-api/dg';
|
|
4
4
|
|
|
5
|
+
import {ALIGNMENT, UnitsHandler} from '@datagrok-libraries/bio/src/utils/units-handler';
|
|
6
|
+
import {NOTATION, ALPHABET, TAGS} from '@datagrok-libraries/bio/src/utils/macromolecule';
|
|
7
|
+
|
|
5
8
|
export function generateManySequences(): DG.Column[] {
|
|
6
9
|
let columns: DG.Column[] = [];
|
|
7
10
|
columns.push(DG.Column.fromList('string', 'MSA', new Array(10 ** 6).fill('meI/hHis/Aca/N/T/dE/Thr_PO3H2/Aca/D-Tyr_Et/Tyr_ab-dehydroMe/dV/E/N/D-Orn/D-aThr//Phe_4Me')));
|
|
@@ -19,10 +22,10 @@ export function generateLongSequence(): DG.Column[] {
|
|
|
19
22
|
|
|
20
23
|
export function setTagsMacromolecule(col: DG.Column) {
|
|
21
24
|
col.semType = DG.SEMTYPE.MACROMOLECULE;
|
|
22
|
-
col.setTag(
|
|
23
|
-
col.setTag(
|
|
24
|
-
col.setTag(
|
|
25
|
-
col.setTag(
|
|
25
|
+
col.setTag(DG.TAGS.UNITS, NOTATION.SEPARATOR);
|
|
26
|
+
col.setTag(TAGS.aligned, ALIGNMENT.SEQ_MSA);
|
|
27
|
+
col.setTag(TAGS.alphabet, ALPHABET.UN);
|
|
28
|
+
col.setTag(TAGS.separator, '/');
|
|
26
29
|
return col;
|
|
27
30
|
}
|
|
28
31
|
|
|
@@ -1,6 +1,9 @@
|
|
|
1
1
|
import * as grok from 'datagrok-api/grok';
|
|
2
|
-
import * as ui from 'datagrok-api/ui';
|
|
3
2
|
import * as DG from 'datagrok-api/dg';
|
|
3
|
+
import * as ui from 'datagrok-api/ui';
|
|
4
|
+
|
|
5
|
+
import {printLeftOrCentered, DrawStyle} from '@datagrok-libraries/bio/src/utils/cell-renderer';
|
|
6
|
+
import {ALIGNMENT} from '@datagrok-libraries/bio/src/utils/units-handler';
|
|
4
7
|
import * as bio from '@datagrok-libraries/bio';
|
|
5
8
|
import * as C from './constants';
|
|
6
9
|
|
|
@@ -44,7 +47,7 @@ export class MacromoleculeSequenceCellRenderer extends DG.GridCellRenderer {
|
|
|
44
47
|
}
|
|
45
48
|
|
|
46
49
|
onMouseMove(gridCell: DG.GridCell, e: MouseEvent): void {
|
|
47
|
-
if (gridCell.cell.column.getTag(bio.TAGS.aligned) !==
|
|
50
|
+
if (gridCell.cell.column.getTag(bio.TAGS.aligned) !== ALIGNMENT.SEQ_MSA)
|
|
48
51
|
return;
|
|
49
52
|
|
|
50
53
|
const maxLengthWordsSum = gridCell.cell.column.temp['bio-sum-maxLengthWords'];
|
|
@@ -14,6 +14,7 @@ import {Subject, Subscription} from 'rxjs';
|
|
|
14
14
|
import * as C from '../utils/constants';
|
|
15
15
|
import {updateDivInnerHTML} from '../utils/ui-utils';
|
|
16
16
|
import {NOTATION} from '@datagrok-libraries/bio';
|
|
17
|
+
import { delay } from '@datagrok-libraries/utils/src/test';
|
|
17
18
|
|
|
18
19
|
export class BioSubstructureFilter extends DG.Filter {
|
|
19
20
|
bioFilter: FastaFilter | SeparatorFilter | HelmFilter | null = null;
|
|
@@ -102,9 +103,7 @@ export class BioSubstructureFilter extends DG.Filter {
|
|
|
102
103
|
} else {
|
|
103
104
|
this.calculating = true;
|
|
104
105
|
try {
|
|
105
|
-
this.bitset = this.
|
|
106
|
-
await helmSubstructureSearch(this.bioFilter!.substructure, this.column!) :
|
|
107
|
-
linearSubstructureSearch(this.bioFilter!.substructure, this.column!);
|
|
106
|
+
this.bitset = await this.bioFilter?.substrucrureSearch(this.column!)!;
|
|
108
107
|
this.calculating = false;
|
|
109
108
|
this.dataFrame?.rows.requestFilter();
|
|
110
109
|
} finally {
|
|
@@ -127,6 +126,10 @@ abstract class BioFilterBase {
|
|
|
127
126
|
|
|
128
127
|
set substructure(s: string) {
|
|
129
128
|
}
|
|
129
|
+
|
|
130
|
+
async substrucrureSearch(column: DG.Column): Promise<DG.BitSet | null> {
|
|
131
|
+
return null;
|
|
132
|
+
}
|
|
130
133
|
}
|
|
131
134
|
|
|
132
135
|
class FastaFilter extends BioFilterBase {
|
|
@@ -149,6 +152,10 @@ class FastaFilter extends BioFilterBase {
|
|
|
149
152
|
set substructure(s: string) {
|
|
150
153
|
this.substructureInput.value = s;
|
|
151
154
|
}
|
|
155
|
+
|
|
156
|
+
async substrucrureSearch(column: DG.Column): Promise<DG.BitSet | null> {
|
|
157
|
+
return await linearSubstructureSearch(this.substructure, column);
|
|
158
|
+
}
|
|
152
159
|
}
|
|
153
160
|
|
|
154
161
|
class SeparatorFilter extends FastaFilter {
|
|
@@ -179,6 +186,10 @@ class SeparatorFilter extends FastaFilter {
|
|
|
179
186
|
set substructure(s: string) {
|
|
180
187
|
this.substructureInput.value = s;
|
|
181
188
|
}
|
|
189
|
+
|
|
190
|
+
async substrucrureSearch(column: DG.Column): Promise<DG.BitSet | null> {
|
|
191
|
+
return await linearSubstructureSearch(this.substructure, column, this.colSeparator);
|
|
192
|
+
}
|
|
182
193
|
}
|
|
183
194
|
|
|
184
195
|
class HelmFilter extends BioFilterBase {
|
|
@@ -202,9 +213,9 @@ class HelmFilter extends BioFilterBase {
|
|
|
202
213
|
.onOK(() => {
|
|
203
214
|
const helmString = this.helmEditor
|
|
204
215
|
.webEditor.canvas.getHelm(true).replace(/<\/span>/g, '').replace(/<span style='background:#bbf;'>/g, '');
|
|
205
|
-
this.updateFilterPanel(helmString);
|
|
206
216
|
this.helmSubstructure = helmString;
|
|
207
|
-
this.
|
|
217
|
+
this.updateFilterPanel(this.substructure);
|
|
218
|
+
setTimeout(() => { this.onChanged.next(); }, 10);
|
|
208
219
|
}).show({modal: true, fullScreen: true});
|
|
209
220
|
});
|
|
210
221
|
ui.onSizeChanged(this._filterPanel).subscribe((_) => {
|
|
@@ -247,4 +258,12 @@ class HelmFilter extends BioFilterBase {
|
|
|
247
258
|
this.helmEditor.resizeEditor(width, height);
|
|
248
259
|
}
|
|
249
260
|
}
|
|
261
|
+
|
|
262
|
+
async substrucrureSearch(column: DG.Column): Promise<DG.BitSet | null> {
|
|
263
|
+
ui.setUpdateIndicator(this._filterPanel, true);
|
|
264
|
+
await delay(10);
|
|
265
|
+
const res = await helmSubstructureSearch(this.substructure, column);
|
|
266
|
+
ui.setUpdateIndicator(this._filterPanel, false);
|
|
267
|
+
return res;
|
|
268
|
+
}
|
|
250
269
|
}
|