@datagrok/bio 2.1.10 → 2.1.12
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.eslintrc.json +1 -0
- package/detectors.js +5 -4
- package/dist/package-test.js +2182 -57664
- package/dist/package.js +1773 -57252
- package/dist/vendors-node_modules_datagrok-libraries_ml_src_workers_dimensionality-reducer_js.js +301 -250
- package/package.json +10 -12
- package/src/analysis/sequence-activity-cliffs.ts +5 -5
- package/src/analysis/sequence-diversity-viewer.ts +3 -2
- package/src/analysis/sequence-search-base-viewer.ts +2 -1
- package/src/analysis/sequence-similarity-viewer.ts +2 -2
- package/src/calculations/monomerLevelMols.ts +3 -3
- package/src/package.ts +28 -25
- package/src/substructure-search/substructure-search.ts +9 -9
- package/src/tests/Palettes-test.ts +9 -9
- package/src/tests/WebLogo-positions-test.ts +64 -57
- package/src/tests/bio-tests.ts +31 -21
- package/src/tests/checkInputColumn-tests.ts +17 -17
- package/src/tests/converters-test.ts +30 -30
- package/src/tests/detectors-benchmark-tests.ts +15 -16
- package/src/tests/detectors-tests.ts +31 -24
- package/src/tests/fasta-export-tests.ts +2 -3
- package/src/tests/renderers-test.ts +17 -15
- package/src/tests/splitters-test.ts +3 -3
- package/src/tests/utils/sequences-generators.ts +7 -7
- package/src/utils/cell-renderer.ts +33 -24
- package/src/utils/convert.ts +10 -10
- package/src/utils/multiple-sequence-alignment.ts +6 -7
- package/src/utils/save-as-fasta.ts +8 -8
- package/src/viewers/vd-regions-viewer.ts +15 -14
- package/tsconfig.json +1 -1
- package/jest.config.js +0 -33
- package/src/__jest__/remote.test.ts +0 -77
- package/src/__jest__/test-node.ts +0 -98
- package/test-Bio-62cc009524f3-e7a922ae.html +0 -392
package/package.json
CHANGED
|
@@ -5,7 +5,7 @@
|
|
|
5
5
|
"name": "Leonid Stolbov",
|
|
6
6
|
"email": "lstolbov@datagrok.ai"
|
|
7
7
|
},
|
|
8
|
-
"version": "2.1.
|
|
8
|
+
"version": "2.1.12",
|
|
9
9
|
"description": "Bio is a [package](https://datagrok.ai/help/develop/develop#packages) for the [Datagrok](https://datagrok.ai) platform",
|
|
10
10
|
"repository": {
|
|
11
11
|
"type": "git",
|
|
@@ -14,19 +14,18 @@
|
|
|
14
14
|
},
|
|
15
15
|
"dependencies": {
|
|
16
16
|
"@biowasm/aioli": "^3.1.0",
|
|
17
|
-
"@datagrok-libraries/bio": "^5.
|
|
17
|
+
"@datagrok-libraries/bio": "^5.16.0",
|
|
18
18
|
"@datagrok-libraries/chem-meta": "^1.0.1",
|
|
19
|
-
"@datagrok-libraries/ml": "^6.
|
|
20
|
-
"@datagrok-libraries/utils": "^1.
|
|
19
|
+
"@datagrok-libraries/ml": "^6.3.3",
|
|
20
|
+
"@datagrok-libraries/utils": "^1.19.4",
|
|
21
21
|
"cash-dom": "^8.0.0",
|
|
22
|
-
"datagrok-api": "^1.
|
|
22
|
+
"datagrok-api": "^1.11.1",
|
|
23
23
|
"dayjs": "^1.11.4",
|
|
24
24
|
"openchemlib": "6.0.1",
|
|
25
25
|
"rxjs": "^6.5.5",
|
|
26
26
|
"wu": "latest"
|
|
27
27
|
},
|
|
28
28
|
"devDependencies": {
|
|
29
|
-
"@types/jest": "^27.5.1",
|
|
30
29
|
"@types/js-yaml": "^4.0.5",
|
|
31
30
|
"@types/node": "^17.0.24",
|
|
32
31
|
"@types/node-fetch": "^2.6.2",
|
|
@@ -35,15 +34,13 @@
|
|
|
35
34
|
"@typescript-eslint/parser": "^4.20.0",
|
|
36
35
|
"eslint": "^7.23.0",
|
|
37
36
|
"eslint-config-google": "latest",
|
|
38
|
-
"jest": "^27.5.1",
|
|
39
|
-
"jest-html-reporter": "^3.6.0",
|
|
40
37
|
"js-yaml": "^4.1.0",
|
|
41
38
|
"node-fetch": "^2.6.7",
|
|
42
39
|
"puppeteer": "^13.7.0",
|
|
43
|
-
"ts-jest": "^27.0.0",
|
|
44
40
|
"ts-loader": "^9.2.5",
|
|
45
41
|
"typescript": "^4.2.3",
|
|
46
42
|
"webpack": "^5.64.1",
|
|
43
|
+
"webpack-bundle-analyzer": "latest",
|
|
47
44
|
"webpack-cli": "^4.6.0"
|
|
48
45
|
},
|
|
49
46
|
"grokDependencies": {
|
|
@@ -68,9 +65,10 @@
|
|
|
68
65
|
"release-sequences1-local": "grok publish local --release",
|
|
69
66
|
"lint": "eslint \"./src/**/*.ts\"",
|
|
70
67
|
"lint-fix": "eslint \"./src/**/*.ts\" --fix",
|
|
71
|
-
"test": "
|
|
72
|
-
"test-local": "
|
|
73
|
-
"build-bio-local": "npm --prefix ./../../js-api run build && npm --prefix ./../../libraries/utils run build && npm --prefix ./../../libraries/ml run build && npm run build && npm --prefix ./../../libraries/bio run build && npm run build"
|
|
68
|
+
"test": "grok test",
|
|
69
|
+
"test-local": "grok test --host localhost",
|
|
70
|
+
"build-bio-local": "npm --prefix ./../../js-api run build && npm --prefix ./../../libraries/utils run build && npm --prefix ./../../libraries/ml run build && npm run build && npm --prefix ./../../libraries/bio run build && npm run build",
|
|
71
|
+
"analyze": "webpack --profile --json > ./stats.json && npx webpack-bundle-analyzer ./stats.json"
|
|
74
72
|
},
|
|
75
73
|
"canEdit": [
|
|
76
74
|
"Developers"
|
|
@@ -1,16 +1,16 @@
|
|
|
1
1
|
import * as grok from 'datagrok-api/grok';
|
|
2
2
|
import * as ui from 'datagrok-api/ui';
|
|
3
3
|
import * as DG from 'datagrok-api/dg';
|
|
4
|
-
import * as bio from '@datagrok-libraries/bio';
|
|
5
4
|
|
|
6
5
|
import {ITooltipAndPanelParams} from '@datagrok-libraries/ml/src/viewers/activity-cliffs';
|
|
7
|
-
import {getSimilarityFromDistance} from '@datagrok-libraries/
|
|
6
|
+
import {getSimilarityFromDistance} from '@datagrok-libraries/ml/src/distance-metrics-methods';
|
|
8
7
|
import {AvailableMetrics} from '@datagrok-libraries/ml/src/typed-metrics';
|
|
9
8
|
import {TAGS} from '../utils/constants';
|
|
10
9
|
import {drawMoleculeDifferenceOnCanvas} from '../utils/cell-renderer';
|
|
11
10
|
import * as C from '../utils/constants';
|
|
12
|
-
import {
|
|
13
|
-
import {
|
|
11
|
+
import {GridColumn} from 'datagrok-api/dg';
|
|
12
|
+
import {invalidateMols, MONOMERIC_COL_TAGS} from '../substructure-search/substructure-search';
|
|
13
|
+
import {getSplitter} from '@datagrok-libraries/bio';
|
|
14
14
|
|
|
15
15
|
export async function getDistances(col: DG.Column, seq: string): Promise<Array<number>> {
|
|
16
16
|
const stringArray = col.toList();
|
|
@@ -105,7 +105,7 @@ export function createPropPanelElement(params: ITooltipAndPanelParams): HTMLDivE
|
|
|
105
105
|
const molDifferences: { [key: number]: HTMLCanvasElement } = {};
|
|
106
106
|
const units = params.seqCol.getTag(DG.TAGS.UNITS);
|
|
107
107
|
const separator = params.seqCol.getTag(TAGS.SEPARATOR);
|
|
108
|
-
const splitter =
|
|
108
|
+
const splitter = getSplitter(units, separator);
|
|
109
109
|
const subParts1 = splitter(sequencesArray[0]);
|
|
110
110
|
const subParts2 = splitter(sequencesArray[1]);
|
|
111
111
|
const canvas = createDifferenceCanvas(subParts1, subParts2, units, molDifferences);
|
|
@@ -1,14 +1,15 @@
|
|
|
1
1
|
import * as ui from 'datagrok-api/ui';
|
|
2
2
|
import * as DG from 'datagrok-api/dg';
|
|
3
3
|
import * as grok from 'datagrok-api/grok';
|
|
4
|
+
|
|
4
5
|
import BitArray from '@datagrok-libraries/utils/src/bit-array';
|
|
5
|
-
import {
|
|
6
|
+
import {getDiverseSubset} from '@datagrok-libraries/utils/src/similarity-metrics';
|
|
6
7
|
import $ from 'cash-dom';
|
|
7
8
|
import {ArrayUtils} from '@datagrok-libraries/utils/src/array-utils';
|
|
8
9
|
import {SequenceSearchBaseViewer} from './sequence-search-base-viewer';
|
|
9
10
|
import {getMonomericMols} from '../calculations/monomerLevelMols';
|
|
10
11
|
import {updateDivInnerHTML} from '../utils/ui-utils';
|
|
11
|
-
import {
|
|
12
|
+
import {Subject} from 'rxjs';
|
|
12
13
|
|
|
13
14
|
export class SequenceDiversityViewer extends SequenceSearchBaseViewer {
|
|
14
15
|
renderMolIds: number[] | null = null;
|
|
@@ -1,7 +1,8 @@
|
|
|
1
1
|
import * as ui from 'datagrok-api/ui';
|
|
2
2
|
import * as DG from 'datagrok-api/dg';
|
|
3
3
|
import * as grok from 'datagrok-api/grok';
|
|
4
|
-
|
|
4
|
+
|
|
5
|
+
import {CHEM_SIMILARITY_METRICS} from '@datagrok-libraries/ml/src/distance-metrics-methods';
|
|
5
6
|
import * as C from '../utils/constants';
|
|
6
7
|
|
|
7
8
|
export class SequenceSearchBaseViewer extends DG.JsViewer {
|
|
@@ -1,7 +1,6 @@
|
|
|
1
1
|
import * as grok from 'datagrok-api/grok';
|
|
2
2
|
import * as ui from 'datagrok-api/ui';
|
|
3
3
|
import * as DG from 'datagrok-api/dg';
|
|
4
|
-
import * as bio from '@datagrok-libraries/bio';
|
|
5
4
|
|
|
6
5
|
import {SequenceSearchBaseViewer} from './sequence-search-base-viewer';
|
|
7
6
|
import {getMonomericMols} from '../calculations/monomerLevelMols';
|
|
@@ -9,6 +8,7 @@ import * as C from '../utils/constants';
|
|
|
9
8
|
import {createDifferenceCanvas, createDifferencesWithPositions} from './sequence-activity-cliffs';
|
|
10
9
|
import {updateDivInnerHTML} from '../utils/ui-utils';
|
|
11
10
|
import {Subject} from 'rxjs';
|
|
11
|
+
import {getSplitter} from '@datagrok-libraries/bio';
|
|
12
12
|
|
|
13
13
|
export class SequenceSimilarityViewer extends SequenceSearchBaseViewer {
|
|
14
14
|
hotSearch: boolean;
|
|
@@ -84,7 +84,7 @@ export class SequenceSimilarityViewer extends SequenceSearchBaseViewer {
|
|
|
84
84
|
const molDifferences: { [key: number]: HTMLCanvasElement } = {};
|
|
85
85
|
const units = resDf.col('sequence')!.getTag(DG.TAGS.UNITS);
|
|
86
86
|
const separator = resDf.col('sequence')!.getTag(C.TAGS.SEPARATOR);
|
|
87
|
-
const splitter =
|
|
87
|
+
const splitter = getSplitter(units, separator);
|
|
88
88
|
const subParts1 = splitter(this.moleculeColumn!.get(this.targetMoleculeIdx));
|
|
89
89
|
const subParts2 = splitter(resDf.get('sequence', resDf.currentRowIdx));
|
|
90
90
|
const canvas = createDifferenceCanvas(subParts1, subParts2, units, molDifferences);
|
|
@@ -1,10 +1,10 @@
|
|
|
1
1
|
import * as grok from 'datagrok-api/grok';
|
|
2
2
|
import * as ui from 'datagrok-api/ui';
|
|
3
3
|
import * as DG from 'datagrok-api/dg';
|
|
4
|
-
import * as bio from '@datagrok-libraries/bio';
|
|
5
4
|
|
|
6
5
|
import * as C from '../utils/constants';
|
|
7
6
|
import {getHelmMonomers} from '../package';
|
|
7
|
+
import {getSplitter, getStats} from '@datagrok-libraries/bio';
|
|
8
8
|
|
|
9
9
|
const V2000_ATOM_NAME_POS = 31;
|
|
10
10
|
|
|
@@ -12,11 +12,11 @@ export async function getMonomericMols(mcol: DG.Column,
|
|
|
12
12
|
pattern: boolean = false, monomersDict?: Map<string, string>): Promise<DG.Column> {
|
|
13
13
|
const separator: string = mcol.tags[C.TAGS.SEPARATOR];
|
|
14
14
|
const units: string = mcol.tags[DG.TAGS.UNITS];
|
|
15
|
-
const splitter =
|
|
15
|
+
const splitter = getSplitter(units, separator);
|
|
16
16
|
let molV3000Array;
|
|
17
17
|
monomersDict ??= new Map();
|
|
18
18
|
const monomers = units === 'helm' ?
|
|
19
|
-
getHelmMonomers(mcol) : Object.keys(
|
|
19
|
+
getHelmMonomers(mcol) : Object.keys(getStats(mcol, 0, splitter).freq).filter((it) => it !== '');
|
|
20
20
|
|
|
21
21
|
for (let i = 0; i < monomers.length; i++) {
|
|
22
22
|
if (!monomersDict.has(monomers[i]))
|
package/src/package.ts
CHANGED
|
@@ -43,7 +43,7 @@ import {getMonomericMols} from './calculations/monomerLevelMols';
|
|
|
43
43
|
import {delay} from '@datagrok-libraries/utils/src/test';
|
|
44
44
|
import {from, Observable, Subject} from 'rxjs';
|
|
45
45
|
import {
|
|
46
|
-
TAGS as
|
|
46
|
+
TAGS as bioTAGS,
|
|
47
47
|
Monomer, IMonomerLib, MonomerWorks, MonomerLib, readLibrary,
|
|
48
48
|
SeqPalette, UnitsHandler, WebLogoViewer, getStats, splitterAsHelm
|
|
49
49
|
} from '@datagrok-libraries/bio';
|
|
@@ -70,13 +70,13 @@ export class SeqPaletteCustom implements SeqPalette {
|
|
|
70
70
|
//tags: init
|
|
71
71
|
export async function initBio() {
|
|
72
72
|
await loadLibraries();
|
|
73
|
-
|
|
74
|
-
|
|
73
|
+
const monomers: string[] = [];
|
|
74
|
+
const logPs: number[] = [];
|
|
75
75
|
const module = await grok.functions.call('Chem:getRdKitModule');
|
|
76
76
|
|
|
77
77
|
|
|
78
78
|
const series = monomerLib!.getMonomerMolsByType('PEPTIDE')!;
|
|
79
|
-
Object.keys(series).forEach(symbol => {
|
|
79
|
+
Object.keys(series).forEach((symbol) => {
|
|
80
80
|
monomers.push(symbol);
|
|
81
81
|
const block = series[symbol].replaceAll('#R', 'O ');
|
|
82
82
|
const mol = module.get_mol(block);
|
|
@@ -88,16 +88,17 @@ export async function initBio() {
|
|
|
88
88
|
const sum = logPs.reduce((a, b) => a + b, 0);
|
|
89
89
|
const avg = (sum / logPs.length) || 0;
|
|
90
90
|
|
|
91
|
-
|
|
92
|
-
for (let i = 0; i < monomers.length; i++)
|
|
91
|
+
const palette: { [monomer: string]: string } = {};
|
|
92
|
+
for (let i = 0; i < monomers.length; i++)
|
|
93
93
|
palette[monomers[i]] = logPs[i] < avg ? '#4682B4' : '#DC143C';
|
|
94
|
-
}
|
|
95
94
|
|
|
96
95
|
hydrophobPalette = new SeqPaletteCustom(palette);
|
|
97
96
|
}
|
|
98
97
|
|
|
99
98
|
async function loadLibraries() {
|
|
100
|
-
|
|
99
|
+
//TODO handle if files are in place
|
|
100
|
+
|
|
101
|
+
const uploadedLibraries: string[] = Object.values(await grok.dapi.userDataStorage.get(STORAGE_NAME, true));
|
|
101
102
|
if (uploadedLibraries.length == 0 && monomerLib == null)
|
|
102
103
|
monomerLib = new MonomerLib({});
|
|
103
104
|
for (let i = 0; i < uploadedLibraries.length; ++i)
|
|
@@ -109,9 +110,8 @@ async function loadLibraries() {
|
|
|
109
110
|
export async function monomerManager(value: string) {
|
|
110
111
|
if (monomerLib == null)
|
|
111
112
|
monomerLib = await readLibrary(LIB_PATH, value);
|
|
112
|
-
else
|
|
113
|
+
else
|
|
113
114
|
monomerLib!.update(await readLibrary(LIB_PATH, value));
|
|
114
|
-
}
|
|
115
115
|
}
|
|
116
116
|
|
|
117
117
|
//name: getBioLib
|
|
@@ -135,20 +135,21 @@ export async function manageFiles() {
|
|
|
135
135
|
//output: widget result
|
|
136
136
|
export async function libraryPanel(seqColumn: DG.Column): Promise<DG.Widget> {
|
|
137
137
|
//@ts-ignore
|
|
138
|
-
|
|
139
|
-
|
|
140
|
-
|
|
141
|
-
|
|
138
|
+
const filesButton: HTMLButtonElement = ui.button('Manage', manageFiles);
|
|
139
|
+
const divInputs: HTMLDivElement = ui.div();
|
|
140
|
+
const librariesList: string[] = (await _package.files.list(`${LIBS_PATH}`, false, ''))
|
|
141
|
+
.map((it) => it.fileName);
|
|
142
|
+
const uploadedLibraries: string[] = Object.values(await grok.dapi.userDataStorage.get(STORAGE_NAME, true));
|
|
142
143
|
for (let i = 0; i < uploadedLibraries.length; ++i) {
|
|
143
|
-
|
|
144
|
+
const libraryName: string = uploadedLibraries[i];
|
|
144
145
|
divInputs.append(ui.boolInput(libraryName, true, async () => {
|
|
145
146
|
grok.dapi.userDataStorage.remove(STORAGE_NAME, libraryName, true);
|
|
146
147
|
await loadLibraries();
|
|
147
148
|
}).root);
|
|
148
149
|
}
|
|
149
|
-
|
|
150
|
+
const unusedLibraries: string[] = librariesList.filter((x) => !uploadedLibraries.includes(x));
|
|
150
151
|
for (let i = 0; i < unusedLibraries.length; ++i) {
|
|
151
|
-
|
|
152
|
+
const libraryName: string = unusedLibraries[i];
|
|
152
153
|
divInputs.append(ui.boolInput(libraryName, false, () => {
|
|
153
154
|
monomerManager(libraryName);
|
|
154
155
|
grok.dapi.userDataStorage.postValue(STORAGE_NAME, libraryName, libraryName, true);
|
|
@@ -290,9 +291,9 @@ export async function activityCliffs(df: DG.DataFrame, macroMolecule: DG.Column,
|
|
|
290
291
|
};
|
|
291
292
|
const tags = {
|
|
292
293
|
'units': macroMolecule.getTag(DG.TAGS.UNITS),
|
|
293
|
-
'aligned': macroMolecule.getTag(
|
|
294
|
-
'separator': macroMolecule.getTag(
|
|
295
|
-
'alphabet': macroMolecule.getTag(
|
|
294
|
+
'aligned': macroMolecule.getTag(bioTAGS.aligned),
|
|
295
|
+
'separator': macroMolecule.getTag(bioTAGS.separator),
|
|
296
|
+
'alphabet': macroMolecule.getTag(bioTAGS.alphabet),
|
|
296
297
|
};
|
|
297
298
|
const sp = await getActivityCliffs(
|
|
298
299
|
df,
|
|
@@ -324,7 +325,8 @@ export async function activityCliffs(df: DG.DataFrame, macroMolecule: DG.Column,
|
|
|
324
325
|
//input: bool plotEmbeddings = true
|
|
325
326
|
export async function sequenceSpaceTopMenu(table: DG.DataFrame, macroMolecule: DG.Column, methodName: string,
|
|
326
327
|
similarityMetric: string = 'Tanimoto', plotEmbeddings: boolean): Promise<DG.Viewer | undefined> {
|
|
327
|
-
//
|
|
328
|
+
// Delay is required for initial function dialog to close before starting invalidating of molfiles.
|
|
329
|
+
// Otherwise, dialog is freezing
|
|
328
330
|
await delay(10);
|
|
329
331
|
if (!checkInputColumnUi(macroMolecule, 'Sequence space'))
|
|
330
332
|
return;
|
|
@@ -344,14 +346,13 @@ export async function sequenceSpaceTopMenu(table: DG.DataFrame, macroMolecule: D
|
|
|
344
346
|
for (const col of embeddings) {
|
|
345
347
|
const listValues = col.toList();
|
|
346
348
|
emptyValsIdxs.forEach((ind: number) => listValues.splice(ind, 0, null));
|
|
347
|
-
table.columns.add(DG.Column.
|
|
349
|
+
table.columns.add(DG.Column.float(col.name, table.rowCount).init((i) => listValues[i]));
|
|
348
350
|
}
|
|
349
351
|
if (plotEmbeddings) {
|
|
350
352
|
return grok.shell
|
|
351
353
|
.tableView(table.name)
|
|
352
354
|
.scatterPlot({x: embedColsNames[0], y: embedColsNames[1], title: 'Sequence space'});
|
|
353
355
|
}
|
|
354
|
-
;
|
|
355
356
|
|
|
356
357
|
/* const encodedCol = encodeMonomers(macroMolecule);
|
|
357
358
|
if (!encodedCol)
|
|
@@ -396,7 +397,7 @@ export async function toAtomicLevel(df: DG.DataFrame, macroMolecule: DG.Column):
|
|
|
396
397
|
return;
|
|
397
398
|
const monomersLibFile = await _package.files.readAsText(HELM_CORE_LIB_FILENAME);
|
|
398
399
|
const monomersLibObject: any[] = JSON.parse(monomersLibFile);
|
|
399
|
-
_toAtomicLevel(df, macroMolecule, monomersLibObject);
|
|
400
|
+
await _toAtomicLevel(df, macroMolecule, monomersLibObject);
|
|
400
401
|
}
|
|
401
402
|
|
|
402
403
|
//top-menu: Bio | MSA...
|
|
@@ -404,7 +405,9 @@ export async function toAtomicLevel(df: DG.DataFrame, macroMolecule: DG.Column):
|
|
|
404
405
|
//input: dataframe table
|
|
405
406
|
//input: column sequence { semType: Macromolecule, units: ['fasta'], alphabet: ['DNA', 'RNA', 'PT'] }
|
|
406
407
|
//output: column result
|
|
407
|
-
export async function multipleSequenceAlignmentAny(
|
|
408
|
+
export async function multipleSequenceAlignmentAny(
|
|
409
|
+
table: DG.DataFrame, sequence: DG.Column
|
|
410
|
+
): Promise<DG.Column | null> {
|
|
408
411
|
const func: DG.Func = DG.Func.find({package: 'Bio', name: 'multipleSequenceAlignmentAny'})[0];
|
|
409
412
|
|
|
410
413
|
if (!checkInputColumnUi(sequence, 'MSA', ['fasta'], ['DNA', 'RNA', 'PT']))
|
|
@@ -1,12 +1,12 @@
|
|
|
1
1
|
import * as grok from 'datagrok-api/grok';
|
|
2
2
|
import * as ui from 'datagrok-api/ui';
|
|
3
3
|
import * as DG from 'datagrok-api/dg';
|
|
4
|
-
import * as bio from '@datagrok-libraries/bio';
|
|
5
4
|
|
|
6
5
|
import * as C from '../utils/constants';
|
|
7
6
|
import {getMonomericMols} from '../calculations/monomerLevelMols';
|
|
8
7
|
import {updateDivInnerHTML} from '../utils/ui-utils';
|
|
9
|
-
import {
|
|
8
|
+
import {delay} from '@datagrok-libraries/utils/src/test';
|
|
9
|
+
import {NOTATION} from '@datagrok-libraries/bio';
|
|
10
10
|
|
|
11
11
|
export const MONOMER_MOLS_COL = 'monomeric-mols';
|
|
12
12
|
|
|
@@ -39,14 +39,14 @@ export function substructureSearchDialog(col: DG.Column): void {
|
|
|
39
39
|
const df = DG.DataFrame.create(1);
|
|
40
40
|
df.columns.addNewString('substr_helm').init((i) => '');
|
|
41
41
|
df.col('substr_helm')!.semType = col.semType;
|
|
42
|
-
df.col('substr_helm')!.setTag(DG.TAGS.UNITS,
|
|
42
|
+
df.col('substr_helm')!.setTag(DG.TAGS.UNITS, NOTATION.HELM);
|
|
43
43
|
const grid = df.plot.grid();
|
|
44
44
|
const separatorInput = ui.textInput('Separator', separator);
|
|
45
45
|
|
|
46
46
|
const inputsDiv = ui.div();
|
|
47
47
|
|
|
48
|
-
const inputs = units ===
|
|
49
|
-
units ===
|
|
48
|
+
const inputs = units === NOTATION.HELM ? ui.divV([editHelmLink]) :
|
|
49
|
+
units === NOTATION.SEPARATOR ? ui.inputs([substructureInput, separatorInput]) :
|
|
50
50
|
ui.inputs([substructureInput]);
|
|
51
51
|
|
|
52
52
|
updateDivInnerHTML(inputsDiv, inputs);
|
|
@@ -57,15 +57,15 @@ export function substructureSearchDialog(col: DG.Column): void {
|
|
|
57
57
|
inputsDiv
|
|
58
58
|
]))
|
|
59
59
|
.onOK(async () => {
|
|
60
|
-
let substructure = units ===
|
|
61
|
-
if (units ===
|
|
60
|
+
let substructure = units === NOTATION.HELM ? df.get('substr_helm', 0) : substructureInput.value;
|
|
61
|
+
if (units === NOTATION.SEPARATOR && separatorInput.value !== separator && separatorInput.value !== '')
|
|
62
62
|
substructure = substructure.replaceAll(separatorInput.value, separator);
|
|
63
63
|
const matchesColName = `Matches: ${substructure}`;
|
|
64
64
|
const colExists = col.dataFrame.columns.names()
|
|
65
65
|
.filter((it) => it.toLocaleLowerCase() === matchesColName.toLocaleLowerCase()).length > 0;
|
|
66
66
|
if (!colExists) {
|
|
67
67
|
let matches: DG.BitSet;
|
|
68
|
-
if (units ===
|
|
68
|
+
if (units === NOTATION.HELM)
|
|
69
69
|
matches = await helmSubstructureSearch(substructure, col);
|
|
70
70
|
else
|
|
71
71
|
matches = linearSubstructureSearch(substructure, col);
|
|
@@ -102,7 +102,7 @@ export async function helmSubstructureSearch(substructure: string, col: DG.Colum
|
|
|
102
102
|
if (col.version !== col.temp[MONOMERIC_COL_TAGS.LAST_INVALIDATED_VERSION])
|
|
103
103
|
await invalidateMols(col, true);
|
|
104
104
|
const substructureCol = DG.Column.string('helm', 1).init((i) => substructure);
|
|
105
|
-
substructureCol.setTag(DG.TAGS.UNITS,
|
|
105
|
+
substructureCol.setTag(DG.TAGS.UNITS, NOTATION.HELM);
|
|
106
106
|
const substructureMolsCol =
|
|
107
107
|
await getMonomericMols(substructureCol, true, col.temp[MONOMERIC_COL_TAGS.MONOMERS_DICT]);
|
|
108
108
|
const matchesCol = await grok.functions.call('Chem:searchSubstructure', {
|
|
@@ -1,26 +1,26 @@
|
|
|
1
1
|
import * as grok from 'datagrok-api/grok';
|
|
2
2
|
import * as ui from 'datagrok-api/ui';
|
|
3
3
|
import * as DG from 'datagrok-api/dg';
|
|
4
|
-
import * as bio from '@datagrok-libraries/bio';
|
|
5
4
|
|
|
6
5
|
import {after, before, category, test, expect, expectObject} from '@datagrok-libraries/utils/src/test';
|
|
7
6
|
|
|
8
7
|
import {_testPaletteN, _testPaletteAA} from '@datagrok-libraries/bio/src/tests/palettes-tests';
|
|
8
|
+
import { AminoacidsPalettes } from '@datagrok-libraries/bio';
|
|
9
9
|
|
|
10
10
|
category('Palettes', () => {
|
|
11
11
|
test('testPaletteN', async () => { await _testPaletteN(); });
|
|
12
12
|
test('testPaletteAA', async () => { await _testPaletteAA(); });
|
|
13
13
|
|
|
14
14
|
test('testPalettePtMe', async () => {
|
|
15
|
-
const colorMeNle =
|
|
16
|
-
const colorMeA =
|
|
17
|
-
const colorMeG =
|
|
18
|
-
const colorMeF =
|
|
15
|
+
const colorMeNle = AminoacidsPalettes.GrokGroups.get('MeNle');
|
|
16
|
+
const colorMeA = AminoacidsPalettes.GrokGroups.get('MeA');
|
|
17
|
+
const colorMeG = AminoacidsPalettes.GrokGroups.get('MeG');
|
|
18
|
+
const colorMeF = AminoacidsPalettes.GrokGroups.get('MeF');
|
|
19
19
|
|
|
20
|
-
const colorL =
|
|
21
|
-
const colorA =
|
|
22
|
-
const colorG =
|
|
23
|
-
const colorF =
|
|
20
|
+
const colorL = AminoacidsPalettes.GrokGroups.get('L');
|
|
21
|
+
const colorA = AminoacidsPalettes.GrokGroups.get('A');
|
|
22
|
+
const colorG = AminoacidsPalettes.GrokGroups.get('G');
|
|
23
|
+
const colorF = AminoacidsPalettes.GrokGroups.get('F');
|
|
24
24
|
|
|
25
25
|
expect(colorMeNle, colorL);
|
|
26
26
|
expect(colorMeA, colorA);
|
|
@@ -1,9 +1,16 @@
|
|
|
1
1
|
import * as grok from 'datagrok-api/grok';
|
|
2
2
|
import * as ui from 'datagrok-api/ui';
|
|
3
3
|
import * as DG from 'datagrok-api/dg';
|
|
4
|
-
import * as bio from '@datagrok-libraries/bio';
|
|
5
4
|
|
|
6
5
|
import {after, before, category, test, expect, expectObject, delay} from '@datagrok-libraries/utils/src/test';
|
|
6
|
+
import {
|
|
7
|
+
ALPHABET,
|
|
8
|
+
NOTATION,
|
|
9
|
+
PositionInfo,
|
|
10
|
+
PositionMonomerInfo,
|
|
11
|
+
TAGS as bioTAGS,
|
|
12
|
+
WebLogoViewer
|
|
13
|
+
} from '@datagrok-libraries/bio';
|
|
7
14
|
|
|
8
15
|
category('WebLogo-positions', () => {
|
|
9
16
|
let tvList: DG.TableView[];
|
|
@@ -25,7 +32,7 @@ ATC-G-TTGC--
|
|
|
25
32
|
});
|
|
26
33
|
|
|
27
34
|
after(async () => {
|
|
28
|
-
dfList.forEach((df: DG.DataFrame) => { grok.shell.closeTable(df);});
|
|
35
|
+
dfList.forEach((df: DG.DataFrame) => { grok.shell.closeTable(df); });
|
|
29
36
|
tvList.forEach((tv: DG.TableView) => tv.close());
|
|
30
37
|
grok.shell.v = currentView;
|
|
31
38
|
});
|
|
@@ -36,31 +43,31 @@ ATC-G-TTGC--
|
|
|
36
43
|
|
|
37
44
|
const seqCol: DG.Column = df.getCol('seq');
|
|
38
45
|
seqCol.semType = DG.SEMTYPE.MACROMOLECULE;
|
|
39
|
-
seqCol.setTag(DG.TAGS.UNITS,
|
|
40
|
-
seqCol.setTag(
|
|
41
|
-
seqCol.setTag(
|
|
46
|
+
seqCol.setTag(DG.TAGS.UNITS, NOTATION.FASTA);
|
|
47
|
+
seqCol.setTag(bioTAGS.alphabet, ALPHABET.DNA);
|
|
48
|
+
seqCol.setTag(bioTAGS.aligned, 'SEQ.MSA');
|
|
42
49
|
|
|
43
|
-
const wlViewer:
|
|
50
|
+
const wlViewer: WebLogoViewer = (await df.plot.fromType('WebLogo')) as WebLogoViewer;
|
|
44
51
|
tv.dockManager.dock(wlViewer.root, DG.DOCK_TYPE.DOWN);
|
|
45
52
|
|
|
46
53
|
tvList.push(tv);
|
|
47
54
|
dfList.push(df);
|
|
48
55
|
|
|
49
|
-
const positions:
|
|
50
|
-
|
|
51
|
-
const resAllDf1:
|
|
52
|
-
new
|
|
53
|
-
new
|
|
54
|
-
new
|
|
55
|
-
new
|
|
56
|
-
new
|
|
57
|
-
new
|
|
58
|
-
new
|
|
59
|
-
new
|
|
60
|
-
new
|
|
61
|
-
new
|
|
62
|
-
new
|
|
63
|
-
new
|
|
56
|
+
const positions: PositionInfo[] = wlViewer['positions'];
|
|
57
|
+
|
|
58
|
+
const resAllDf1: PositionInfo[] = [
|
|
59
|
+
new PositionInfo('1', {'A': new PositionMonomerInfo(2), '-': new PositionMonomerInfo(3)}),
|
|
60
|
+
new PositionInfo('2', {'T': new PositionMonomerInfo(5)}),
|
|
61
|
+
new PositionInfo('3', {'C': new PositionMonomerInfo(5)}),
|
|
62
|
+
new PositionInfo('4', {'-': new PositionMonomerInfo(5)}),
|
|
63
|
+
new PositionInfo('5', {'G': new PositionMonomerInfo(5)}),
|
|
64
|
+
new PositionInfo('6', {'-': new PositionMonomerInfo(3), 'C': new PositionMonomerInfo(2)}),
|
|
65
|
+
new PositionInfo('7', {'T': new PositionMonomerInfo(5)}),
|
|
66
|
+
new PositionInfo('8', {'T': new PositionMonomerInfo(5)}),
|
|
67
|
+
new PositionInfo('9', {'G': new PositionMonomerInfo(5)}),
|
|
68
|
+
new PositionInfo('10', {'C': new PositionMonomerInfo(5)}),
|
|
69
|
+
new PositionInfo('11', {'-': new PositionMonomerInfo(5)}),
|
|
70
|
+
new PositionInfo('12', {'-': new PositionMonomerInfo(5)}),
|
|
64
71
|
];
|
|
65
72
|
|
|
66
73
|
expect(positions.length, resAllDf1.length);
|
|
@@ -73,8 +80,8 @@ ATC-G-TTGC--
|
|
|
73
80
|
}
|
|
74
81
|
});
|
|
75
82
|
|
|
76
|
-
test('positions with shrinkEmptyTail option true (
|
|
77
|
-
|
|
83
|
+
test('positions with shrinkEmptyTail option true (filtered)', async () => {
|
|
84
|
+
const csvDf2 = `seq
|
|
78
85
|
-TC-G-TTGC--
|
|
79
86
|
-TC-GCTTGC--
|
|
80
87
|
-T--C-GT-
|
|
@@ -86,33 +93,33 @@ ATC-G-TTGC--
|
|
|
86
93
|
|
|
87
94
|
const seqCol: DG.Column = df.getCol('seq');
|
|
88
95
|
seqCol.semType = DG.SEMTYPE.MACROMOLECULE;
|
|
89
|
-
seqCol.setTag(DG.TAGS.UNITS,
|
|
90
|
-
seqCol.setTag(
|
|
91
|
-
seqCol.setTag(
|
|
96
|
+
seqCol.setTag(DG.TAGS.UNITS, NOTATION.FASTA);
|
|
97
|
+
seqCol.setTag(bioTAGS.alphabet, ALPHABET.DNA);
|
|
98
|
+
seqCol.setTag(bioTAGS.aligned, 'SEQ');
|
|
92
99
|
|
|
93
100
|
df.filter.init((i) => {
|
|
94
101
|
return i > 2;
|
|
95
102
|
});
|
|
96
103
|
df.filter.fireChanged();
|
|
97
|
-
const wlViewer:
|
|
98
|
-
{'shrinkEmptyTail': true})) as
|
|
104
|
+
const wlViewer: WebLogoViewer = (await df.plot.fromType('WebLogo',
|
|
105
|
+
{'shrinkEmptyTail': true})) as WebLogoViewer;
|
|
99
106
|
tv.dockManager.dock(wlViewer.root, DG.DOCK_TYPE.DOWN);
|
|
100
107
|
|
|
101
108
|
tvList.push(tv);
|
|
102
109
|
dfList.push(df);
|
|
103
110
|
|
|
104
|
-
const positions:
|
|
105
|
-
|
|
106
|
-
const resAllDf1:
|
|
107
|
-
new
|
|
108
|
-
new
|
|
109
|
-
new
|
|
110
|
-
new
|
|
111
|
-
new
|
|
112
|
-
new
|
|
113
|
-
new
|
|
114
|
-
new
|
|
115
|
-
new
|
|
111
|
+
const positions: PositionInfo[] = wlViewer['positions'];
|
|
112
|
+
|
|
113
|
+
const resAllDf1: PositionInfo[] = [
|
|
114
|
+
new PositionInfo('1', {'-': new PositionMonomerInfo(3)}),
|
|
115
|
+
new PositionInfo('2', {'T': new PositionMonomerInfo(3)}),
|
|
116
|
+
new PositionInfo('3', {'-': new PositionMonomerInfo(3)}),
|
|
117
|
+
new PositionInfo('4', {'-': new PositionMonomerInfo(3)}),
|
|
118
|
+
new PositionInfo('5', {'C': new PositionMonomerInfo(3)}),
|
|
119
|
+
new PositionInfo('6', {'-': new PositionMonomerInfo(2), 'C': new PositionMonomerInfo(1)}),
|
|
120
|
+
new PositionInfo('7', {'G': new PositionMonomerInfo(3)}),
|
|
121
|
+
new PositionInfo('8', {'T': new PositionMonomerInfo(3)}),
|
|
122
|
+
new PositionInfo('9', {'-': new PositionMonomerInfo(3)}),
|
|
116
123
|
];
|
|
117
124
|
|
|
118
125
|
expect(positions.length, resAllDf1.length);
|
|
@@ -131,29 +138,29 @@ ATC-G-TTGC--
|
|
|
131
138
|
|
|
132
139
|
const seqCol: DG.Column = df.getCol('seq');
|
|
133
140
|
seqCol.semType = DG.SEMTYPE.MACROMOLECULE;
|
|
134
|
-
seqCol.setTag(DG.TAGS.UNITS,
|
|
135
|
-
seqCol.setTag(
|
|
136
|
-
seqCol.setTag(
|
|
141
|
+
seqCol.setTag(DG.TAGS.UNITS, NOTATION.FASTA);
|
|
142
|
+
seqCol.setTag(bioTAGS.alphabet, ALPHABET.DNA);
|
|
143
|
+
seqCol.setTag(bioTAGS.aligned, 'SEQ.MSA');
|
|
137
144
|
|
|
138
|
-
const wlViewer:
|
|
139
|
-
{'skipEmptyPositions': true})) as
|
|
145
|
+
const wlViewer: WebLogoViewer = (await df.plot.fromType('WebLogo',
|
|
146
|
+
{'skipEmptyPositions': true})) as WebLogoViewer;
|
|
140
147
|
tv.dockManager.dock(wlViewer.root, DG.DOCK_TYPE.DOWN);
|
|
141
148
|
|
|
142
149
|
tvList.push(tv);
|
|
143
150
|
dfList.push(df);
|
|
144
151
|
|
|
145
|
-
const positions:
|
|
146
|
-
|
|
147
|
-
const resAllDf1:
|
|
148
|
-
new
|
|
149
|
-
new
|
|
150
|
-
new
|
|
151
|
-
new
|
|
152
|
-
new
|
|
153
|
-
new
|
|
154
|
-
new
|
|
155
|
-
new
|
|
156
|
-
new
|
|
152
|
+
const positions: PositionInfo[] = wlViewer['positions'];
|
|
153
|
+
|
|
154
|
+
const resAllDf1: PositionInfo[] = [
|
|
155
|
+
new PositionInfo('1', {'A': new PositionMonomerInfo(2), '-': new PositionMonomerInfo(3)}),
|
|
156
|
+
new PositionInfo('2', {'T': new PositionMonomerInfo(5)}),
|
|
157
|
+
new PositionInfo('3', {'C': new PositionMonomerInfo(5)}),
|
|
158
|
+
new PositionInfo('5', {'G': new PositionMonomerInfo(5)}),
|
|
159
|
+
new PositionInfo('6', {'-': new PositionMonomerInfo(3), 'C': new PositionMonomerInfo(2)}),
|
|
160
|
+
new PositionInfo('7', {'T': new PositionMonomerInfo(5)}),
|
|
161
|
+
new PositionInfo('8', {'T': new PositionMonomerInfo(5)}),
|
|
162
|
+
new PositionInfo('9', {'G': new PositionMonomerInfo(5)}),
|
|
163
|
+
new PositionInfo('10', {'C': new PositionMonomerInfo(5)})
|
|
157
164
|
];
|
|
158
165
|
|
|
159
166
|
expect(positions.length, resAllDf1.length);
|