@datagrok/bio 2.1.11 → 2.1.12
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/package-test.js +2180 -57663
- package/dist/package.js +1771 -57251
- package/dist/vendors-node_modules_datagrok-libraries_ml_src_workers_dimensionality-reducer_js.js +301 -250
- package/package.json +10 -12
- package/src/analysis/sequence-activity-cliffs.ts +5 -5
- package/src/analysis/sequence-diversity-viewer.ts +3 -2
- package/src/analysis/sequence-search-base-viewer.ts +2 -1
- package/src/analysis/sequence-similarity-viewer.ts +2 -2
- package/src/calculations/monomerLevelMols.ts +3 -3
- package/src/package.ts +25 -24
- package/src/substructure-search/substructure-search.ts +9 -9
- package/src/tests/Palettes-test.ts +9 -9
- package/src/tests/WebLogo-positions-test.ts +64 -57
- package/src/tests/bio-tests.ts +31 -21
- package/src/tests/checkInputColumn-tests.ts +17 -17
- package/src/tests/converters-test.ts +30 -30
- package/src/tests/detectors-benchmark-tests.ts +15 -16
- package/src/tests/detectors-tests.ts +31 -24
- package/src/tests/fasta-export-tests.ts +2 -3
- package/src/tests/renderers-test.ts +17 -15
- package/src/tests/splitters-test.ts +3 -3
- package/src/tests/utils/sequences-generators.ts +7 -7
- package/src/utils/cell-renderer.ts +33 -24
- package/src/utils/convert.ts +10 -10
- package/src/utils/multiple-sequence-alignment.ts +6 -7
- package/src/utils/save-as-fasta.ts +8 -8
- package/src/viewers/vd-regions-viewer.ts +15 -14
- package/tsconfig.json +1 -1
- package/jest.config.js +0 -33
- package/src/__jest__/remote.test.ts +0 -77
- package/src/__jest__/test-node.ts +0 -98
- package/test-Bio-91c83d8913ff-bb573307.html +0 -392
package/package.json
CHANGED
|
@@ -5,7 +5,7 @@
|
|
|
5
5
|
"name": "Leonid Stolbov",
|
|
6
6
|
"email": "lstolbov@datagrok.ai"
|
|
7
7
|
},
|
|
8
|
-
"version": "2.1.
|
|
8
|
+
"version": "2.1.12",
|
|
9
9
|
"description": "Bio is a [package](https://datagrok.ai/help/develop/develop#packages) for the [Datagrok](https://datagrok.ai) platform",
|
|
10
10
|
"repository": {
|
|
11
11
|
"type": "git",
|
|
@@ -14,19 +14,18 @@
|
|
|
14
14
|
},
|
|
15
15
|
"dependencies": {
|
|
16
16
|
"@biowasm/aioli": "^3.1.0",
|
|
17
|
-
"@datagrok-libraries/bio": "^5.
|
|
17
|
+
"@datagrok-libraries/bio": "^5.16.0",
|
|
18
18
|
"@datagrok-libraries/chem-meta": "^1.0.1",
|
|
19
|
-
"@datagrok-libraries/ml": "^6.
|
|
20
|
-
"@datagrok-libraries/utils": "^1.
|
|
19
|
+
"@datagrok-libraries/ml": "^6.3.3",
|
|
20
|
+
"@datagrok-libraries/utils": "^1.19.4",
|
|
21
21
|
"cash-dom": "^8.0.0",
|
|
22
|
-
"datagrok-api": "^1.
|
|
22
|
+
"datagrok-api": "^1.11.1",
|
|
23
23
|
"dayjs": "^1.11.4",
|
|
24
24
|
"openchemlib": "6.0.1",
|
|
25
25
|
"rxjs": "^6.5.5",
|
|
26
26
|
"wu": "latest"
|
|
27
27
|
},
|
|
28
28
|
"devDependencies": {
|
|
29
|
-
"@types/jest": "^27.5.1",
|
|
30
29
|
"@types/js-yaml": "^4.0.5",
|
|
31
30
|
"@types/node": "^17.0.24",
|
|
32
31
|
"@types/node-fetch": "^2.6.2",
|
|
@@ -35,15 +34,13 @@
|
|
|
35
34
|
"@typescript-eslint/parser": "^4.20.0",
|
|
36
35
|
"eslint": "^7.23.0",
|
|
37
36
|
"eslint-config-google": "latest",
|
|
38
|
-
"jest": "^27.5.1",
|
|
39
|
-
"jest-html-reporter": "^3.6.0",
|
|
40
37
|
"js-yaml": "^4.1.0",
|
|
41
38
|
"node-fetch": "^2.6.7",
|
|
42
39
|
"puppeteer": "^13.7.0",
|
|
43
|
-
"ts-jest": "^27.0.0",
|
|
44
40
|
"ts-loader": "^9.2.5",
|
|
45
41
|
"typescript": "^4.2.3",
|
|
46
42
|
"webpack": "^5.64.1",
|
|
43
|
+
"webpack-bundle-analyzer": "latest",
|
|
47
44
|
"webpack-cli": "^4.6.0"
|
|
48
45
|
},
|
|
49
46
|
"grokDependencies": {
|
|
@@ -68,9 +65,10 @@
|
|
|
68
65
|
"release-sequences1-local": "grok publish local --release",
|
|
69
66
|
"lint": "eslint \"./src/**/*.ts\"",
|
|
70
67
|
"lint-fix": "eslint \"./src/**/*.ts\" --fix",
|
|
71
|
-
"test": "
|
|
72
|
-
"test-local": "
|
|
73
|
-
"build-bio-local": "npm --prefix ./../../js-api run build && npm --prefix ./../../libraries/utils run build && npm --prefix ./../../libraries/ml run build && npm run build && npm --prefix ./../../libraries/bio run build && npm run build"
|
|
68
|
+
"test": "grok test",
|
|
69
|
+
"test-local": "grok test --host localhost",
|
|
70
|
+
"build-bio-local": "npm --prefix ./../../js-api run build && npm --prefix ./../../libraries/utils run build && npm --prefix ./../../libraries/ml run build && npm run build && npm --prefix ./../../libraries/bio run build && npm run build",
|
|
71
|
+
"analyze": "webpack --profile --json > ./stats.json && npx webpack-bundle-analyzer ./stats.json"
|
|
74
72
|
},
|
|
75
73
|
"canEdit": [
|
|
76
74
|
"Developers"
|
|
@@ -1,16 +1,16 @@
|
|
|
1
1
|
import * as grok from 'datagrok-api/grok';
|
|
2
2
|
import * as ui from 'datagrok-api/ui';
|
|
3
3
|
import * as DG from 'datagrok-api/dg';
|
|
4
|
-
import * as bio from '@datagrok-libraries/bio';
|
|
5
4
|
|
|
6
5
|
import {ITooltipAndPanelParams} from '@datagrok-libraries/ml/src/viewers/activity-cliffs';
|
|
7
|
-
import {getSimilarityFromDistance} from '@datagrok-libraries/
|
|
6
|
+
import {getSimilarityFromDistance} from '@datagrok-libraries/ml/src/distance-metrics-methods';
|
|
8
7
|
import {AvailableMetrics} from '@datagrok-libraries/ml/src/typed-metrics';
|
|
9
8
|
import {TAGS} from '../utils/constants';
|
|
10
9
|
import {drawMoleculeDifferenceOnCanvas} from '../utils/cell-renderer';
|
|
11
10
|
import * as C from '../utils/constants';
|
|
12
|
-
import {
|
|
13
|
-
import {
|
|
11
|
+
import {GridColumn} from 'datagrok-api/dg';
|
|
12
|
+
import {invalidateMols, MONOMERIC_COL_TAGS} from '../substructure-search/substructure-search';
|
|
13
|
+
import {getSplitter} from '@datagrok-libraries/bio';
|
|
14
14
|
|
|
15
15
|
export async function getDistances(col: DG.Column, seq: string): Promise<Array<number>> {
|
|
16
16
|
const stringArray = col.toList();
|
|
@@ -105,7 +105,7 @@ export function createPropPanelElement(params: ITooltipAndPanelParams): HTMLDivE
|
|
|
105
105
|
const molDifferences: { [key: number]: HTMLCanvasElement } = {};
|
|
106
106
|
const units = params.seqCol.getTag(DG.TAGS.UNITS);
|
|
107
107
|
const separator = params.seqCol.getTag(TAGS.SEPARATOR);
|
|
108
|
-
const splitter =
|
|
108
|
+
const splitter = getSplitter(units, separator);
|
|
109
109
|
const subParts1 = splitter(sequencesArray[0]);
|
|
110
110
|
const subParts2 = splitter(sequencesArray[1]);
|
|
111
111
|
const canvas = createDifferenceCanvas(subParts1, subParts2, units, molDifferences);
|
|
@@ -1,14 +1,15 @@
|
|
|
1
1
|
import * as ui from 'datagrok-api/ui';
|
|
2
2
|
import * as DG from 'datagrok-api/dg';
|
|
3
3
|
import * as grok from 'datagrok-api/grok';
|
|
4
|
+
|
|
4
5
|
import BitArray from '@datagrok-libraries/utils/src/bit-array';
|
|
5
|
-
import {
|
|
6
|
+
import {getDiverseSubset} from '@datagrok-libraries/utils/src/similarity-metrics';
|
|
6
7
|
import $ from 'cash-dom';
|
|
7
8
|
import {ArrayUtils} from '@datagrok-libraries/utils/src/array-utils';
|
|
8
9
|
import {SequenceSearchBaseViewer} from './sequence-search-base-viewer';
|
|
9
10
|
import {getMonomericMols} from '../calculations/monomerLevelMols';
|
|
10
11
|
import {updateDivInnerHTML} from '../utils/ui-utils';
|
|
11
|
-
import {
|
|
12
|
+
import {Subject} from 'rxjs';
|
|
12
13
|
|
|
13
14
|
export class SequenceDiversityViewer extends SequenceSearchBaseViewer {
|
|
14
15
|
renderMolIds: number[] | null = null;
|
|
@@ -1,7 +1,8 @@
|
|
|
1
1
|
import * as ui from 'datagrok-api/ui';
|
|
2
2
|
import * as DG from 'datagrok-api/dg';
|
|
3
3
|
import * as grok from 'datagrok-api/grok';
|
|
4
|
-
|
|
4
|
+
|
|
5
|
+
import {CHEM_SIMILARITY_METRICS} from '@datagrok-libraries/ml/src/distance-metrics-methods';
|
|
5
6
|
import * as C from '../utils/constants';
|
|
6
7
|
|
|
7
8
|
export class SequenceSearchBaseViewer extends DG.JsViewer {
|
|
@@ -1,7 +1,6 @@
|
|
|
1
1
|
import * as grok from 'datagrok-api/grok';
|
|
2
2
|
import * as ui from 'datagrok-api/ui';
|
|
3
3
|
import * as DG from 'datagrok-api/dg';
|
|
4
|
-
import * as bio from '@datagrok-libraries/bio';
|
|
5
4
|
|
|
6
5
|
import {SequenceSearchBaseViewer} from './sequence-search-base-viewer';
|
|
7
6
|
import {getMonomericMols} from '../calculations/monomerLevelMols';
|
|
@@ -9,6 +8,7 @@ import * as C from '../utils/constants';
|
|
|
9
8
|
import {createDifferenceCanvas, createDifferencesWithPositions} from './sequence-activity-cliffs';
|
|
10
9
|
import {updateDivInnerHTML} from '../utils/ui-utils';
|
|
11
10
|
import {Subject} from 'rxjs';
|
|
11
|
+
import {getSplitter} from '@datagrok-libraries/bio';
|
|
12
12
|
|
|
13
13
|
export class SequenceSimilarityViewer extends SequenceSearchBaseViewer {
|
|
14
14
|
hotSearch: boolean;
|
|
@@ -84,7 +84,7 @@ export class SequenceSimilarityViewer extends SequenceSearchBaseViewer {
|
|
|
84
84
|
const molDifferences: { [key: number]: HTMLCanvasElement } = {};
|
|
85
85
|
const units = resDf.col('sequence')!.getTag(DG.TAGS.UNITS);
|
|
86
86
|
const separator = resDf.col('sequence')!.getTag(C.TAGS.SEPARATOR);
|
|
87
|
-
const splitter =
|
|
87
|
+
const splitter = getSplitter(units, separator);
|
|
88
88
|
const subParts1 = splitter(this.moleculeColumn!.get(this.targetMoleculeIdx));
|
|
89
89
|
const subParts2 = splitter(resDf.get('sequence', resDf.currentRowIdx));
|
|
90
90
|
const canvas = createDifferenceCanvas(subParts1, subParts2, units, molDifferences);
|
|
@@ -1,10 +1,10 @@
|
|
|
1
1
|
import * as grok from 'datagrok-api/grok';
|
|
2
2
|
import * as ui from 'datagrok-api/ui';
|
|
3
3
|
import * as DG from 'datagrok-api/dg';
|
|
4
|
-
import * as bio from '@datagrok-libraries/bio';
|
|
5
4
|
|
|
6
5
|
import * as C from '../utils/constants';
|
|
7
6
|
import {getHelmMonomers} from '../package';
|
|
7
|
+
import {getSplitter, getStats} from '@datagrok-libraries/bio';
|
|
8
8
|
|
|
9
9
|
const V2000_ATOM_NAME_POS = 31;
|
|
10
10
|
|
|
@@ -12,11 +12,11 @@ export async function getMonomericMols(mcol: DG.Column,
|
|
|
12
12
|
pattern: boolean = false, monomersDict?: Map<string, string>): Promise<DG.Column> {
|
|
13
13
|
const separator: string = mcol.tags[C.TAGS.SEPARATOR];
|
|
14
14
|
const units: string = mcol.tags[DG.TAGS.UNITS];
|
|
15
|
-
const splitter =
|
|
15
|
+
const splitter = getSplitter(units, separator);
|
|
16
16
|
let molV3000Array;
|
|
17
17
|
monomersDict ??= new Map();
|
|
18
18
|
const monomers = units === 'helm' ?
|
|
19
|
-
getHelmMonomers(mcol) : Object.keys(
|
|
19
|
+
getHelmMonomers(mcol) : Object.keys(getStats(mcol, 0, splitter).freq).filter((it) => it !== '');
|
|
20
20
|
|
|
21
21
|
for (let i = 0; i < monomers.length; i++) {
|
|
22
22
|
if (!monomersDict.has(monomers[i]))
|
package/src/package.ts
CHANGED
|
@@ -43,7 +43,7 @@ import {getMonomericMols} from './calculations/monomerLevelMols';
|
|
|
43
43
|
import {delay} from '@datagrok-libraries/utils/src/test';
|
|
44
44
|
import {from, Observable, Subject} from 'rxjs';
|
|
45
45
|
import {
|
|
46
|
-
TAGS as
|
|
46
|
+
TAGS as bioTAGS,
|
|
47
47
|
Monomer, IMonomerLib, MonomerWorks, MonomerLib, readLibrary,
|
|
48
48
|
SeqPalette, UnitsHandler, WebLogoViewer, getStats, splitterAsHelm
|
|
49
49
|
} from '@datagrok-libraries/bio';
|
|
@@ -70,13 +70,13 @@ export class SeqPaletteCustom implements SeqPalette {
|
|
|
70
70
|
//tags: init
|
|
71
71
|
export async function initBio() {
|
|
72
72
|
await loadLibraries();
|
|
73
|
-
|
|
74
|
-
|
|
73
|
+
const monomers: string[] = [];
|
|
74
|
+
const logPs: number[] = [];
|
|
75
75
|
const module = await grok.functions.call('Chem:getRdKitModule');
|
|
76
76
|
|
|
77
77
|
|
|
78
78
|
const series = monomerLib!.getMonomerMolsByType('PEPTIDE')!;
|
|
79
|
-
Object.keys(series).forEach(symbol => {
|
|
79
|
+
Object.keys(series).forEach((symbol) => {
|
|
80
80
|
monomers.push(symbol);
|
|
81
81
|
const block = series[symbol].replaceAll('#R', 'O ');
|
|
82
82
|
const mol = module.get_mol(block);
|
|
@@ -88,10 +88,9 @@ export async function initBio() {
|
|
|
88
88
|
const sum = logPs.reduce((a, b) => a + b, 0);
|
|
89
89
|
const avg = (sum / logPs.length) || 0;
|
|
90
90
|
|
|
91
|
-
|
|
92
|
-
for (let i = 0; i < monomers.length; i++)
|
|
91
|
+
const palette: { [monomer: string]: string } = {};
|
|
92
|
+
for (let i = 0; i < monomers.length; i++)
|
|
93
93
|
palette[monomers[i]] = logPs[i] < avg ? '#4682B4' : '#DC143C';
|
|
94
|
-
}
|
|
95
94
|
|
|
96
95
|
hydrophobPalette = new SeqPaletteCustom(palette);
|
|
97
96
|
}
|
|
@@ -99,7 +98,7 @@ export async function initBio() {
|
|
|
99
98
|
async function loadLibraries() {
|
|
100
99
|
//TODO handle if files are in place
|
|
101
100
|
|
|
102
|
-
|
|
101
|
+
const uploadedLibraries: string[] = Object.values(await grok.dapi.userDataStorage.get(STORAGE_NAME, true));
|
|
103
102
|
if (uploadedLibraries.length == 0 && monomerLib == null)
|
|
104
103
|
monomerLib = new MonomerLib({});
|
|
105
104
|
for (let i = 0; i < uploadedLibraries.length; ++i)
|
|
@@ -111,9 +110,8 @@ async function loadLibraries() {
|
|
|
111
110
|
export async function monomerManager(value: string) {
|
|
112
111
|
if (monomerLib == null)
|
|
113
112
|
monomerLib = await readLibrary(LIB_PATH, value);
|
|
114
|
-
else
|
|
113
|
+
else
|
|
115
114
|
monomerLib!.update(await readLibrary(LIB_PATH, value));
|
|
116
|
-
}
|
|
117
115
|
}
|
|
118
116
|
|
|
119
117
|
//name: getBioLib
|
|
@@ -137,20 +135,21 @@ export async function manageFiles() {
|
|
|
137
135
|
//output: widget result
|
|
138
136
|
export async function libraryPanel(seqColumn: DG.Column): Promise<DG.Widget> {
|
|
139
137
|
//@ts-ignore
|
|
140
|
-
|
|
141
|
-
|
|
142
|
-
|
|
143
|
-
|
|
138
|
+
const filesButton: HTMLButtonElement = ui.button('Manage', manageFiles);
|
|
139
|
+
const divInputs: HTMLDivElement = ui.div();
|
|
140
|
+
const librariesList: string[] = (await _package.files.list(`${LIBS_PATH}`, false, ''))
|
|
141
|
+
.map((it) => it.fileName);
|
|
142
|
+
const uploadedLibraries: string[] = Object.values(await grok.dapi.userDataStorage.get(STORAGE_NAME, true));
|
|
144
143
|
for (let i = 0; i < uploadedLibraries.length; ++i) {
|
|
145
|
-
|
|
144
|
+
const libraryName: string = uploadedLibraries[i];
|
|
146
145
|
divInputs.append(ui.boolInput(libraryName, true, async () => {
|
|
147
146
|
grok.dapi.userDataStorage.remove(STORAGE_NAME, libraryName, true);
|
|
148
147
|
await loadLibraries();
|
|
149
148
|
}).root);
|
|
150
149
|
}
|
|
151
|
-
|
|
150
|
+
const unusedLibraries: string[] = librariesList.filter((x) => !uploadedLibraries.includes(x));
|
|
152
151
|
for (let i = 0; i < unusedLibraries.length; ++i) {
|
|
153
|
-
|
|
152
|
+
const libraryName: string = unusedLibraries[i];
|
|
154
153
|
divInputs.append(ui.boolInput(libraryName, false, () => {
|
|
155
154
|
monomerManager(libraryName);
|
|
156
155
|
grok.dapi.userDataStorage.postValue(STORAGE_NAME, libraryName, libraryName, true);
|
|
@@ -292,9 +291,9 @@ export async function activityCliffs(df: DG.DataFrame, macroMolecule: DG.Column,
|
|
|
292
291
|
};
|
|
293
292
|
const tags = {
|
|
294
293
|
'units': macroMolecule.getTag(DG.TAGS.UNITS),
|
|
295
|
-
'aligned': macroMolecule.getTag(
|
|
296
|
-
'separator': macroMolecule.getTag(
|
|
297
|
-
'alphabet': macroMolecule.getTag(
|
|
294
|
+
'aligned': macroMolecule.getTag(bioTAGS.aligned),
|
|
295
|
+
'separator': macroMolecule.getTag(bioTAGS.separator),
|
|
296
|
+
'alphabet': macroMolecule.getTag(bioTAGS.alphabet),
|
|
298
297
|
};
|
|
299
298
|
const sp = await getActivityCliffs(
|
|
300
299
|
df,
|
|
@@ -326,7 +325,8 @@ export async function activityCliffs(df: DG.DataFrame, macroMolecule: DG.Column,
|
|
|
326
325
|
//input: bool plotEmbeddings = true
|
|
327
326
|
export async function sequenceSpaceTopMenu(table: DG.DataFrame, macroMolecule: DG.Column, methodName: string,
|
|
328
327
|
similarityMetric: string = 'Tanimoto', plotEmbeddings: boolean): Promise<DG.Viewer | undefined> {
|
|
329
|
-
//
|
|
328
|
+
// Delay is required for initial function dialog to close before starting invalidating of molfiles.
|
|
329
|
+
// Otherwise, dialog is freezing
|
|
330
330
|
await delay(10);
|
|
331
331
|
if (!checkInputColumnUi(macroMolecule, 'Sequence space'))
|
|
332
332
|
return;
|
|
@@ -346,14 +346,13 @@ export async function sequenceSpaceTopMenu(table: DG.DataFrame, macroMolecule: D
|
|
|
346
346
|
for (const col of embeddings) {
|
|
347
347
|
const listValues = col.toList();
|
|
348
348
|
emptyValsIdxs.forEach((ind: number) => listValues.splice(ind, 0, null));
|
|
349
|
-
table.columns.add(DG.Column.
|
|
349
|
+
table.columns.add(DG.Column.float(col.name, table.rowCount).init((i) => listValues[i]));
|
|
350
350
|
}
|
|
351
351
|
if (plotEmbeddings) {
|
|
352
352
|
return grok.shell
|
|
353
353
|
.tableView(table.name)
|
|
354
354
|
.scatterPlot({x: embedColsNames[0], y: embedColsNames[1], title: 'Sequence space'});
|
|
355
355
|
}
|
|
356
|
-
;
|
|
357
356
|
|
|
358
357
|
/* const encodedCol = encodeMonomers(macroMolecule);
|
|
359
358
|
if (!encodedCol)
|
|
@@ -406,7 +405,9 @@ export async function toAtomicLevel(df: DG.DataFrame, macroMolecule: DG.Column):
|
|
|
406
405
|
//input: dataframe table
|
|
407
406
|
//input: column sequence { semType: Macromolecule, units: ['fasta'], alphabet: ['DNA', 'RNA', 'PT'] }
|
|
408
407
|
//output: column result
|
|
409
|
-
export async function multipleSequenceAlignmentAny(
|
|
408
|
+
export async function multipleSequenceAlignmentAny(
|
|
409
|
+
table: DG.DataFrame, sequence: DG.Column
|
|
410
|
+
): Promise<DG.Column | null> {
|
|
410
411
|
const func: DG.Func = DG.Func.find({package: 'Bio', name: 'multipleSequenceAlignmentAny'})[0];
|
|
411
412
|
|
|
412
413
|
if (!checkInputColumnUi(sequence, 'MSA', ['fasta'], ['DNA', 'RNA', 'PT']))
|
|
@@ -1,12 +1,12 @@
|
|
|
1
1
|
import * as grok from 'datagrok-api/grok';
|
|
2
2
|
import * as ui from 'datagrok-api/ui';
|
|
3
3
|
import * as DG from 'datagrok-api/dg';
|
|
4
|
-
import * as bio from '@datagrok-libraries/bio';
|
|
5
4
|
|
|
6
5
|
import * as C from '../utils/constants';
|
|
7
6
|
import {getMonomericMols} from '../calculations/monomerLevelMols';
|
|
8
7
|
import {updateDivInnerHTML} from '../utils/ui-utils';
|
|
9
|
-
import {
|
|
8
|
+
import {delay} from '@datagrok-libraries/utils/src/test';
|
|
9
|
+
import {NOTATION} from '@datagrok-libraries/bio';
|
|
10
10
|
|
|
11
11
|
export const MONOMER_MOLS_COL = 'monomeric-mols';
|
|
12
12
|
|
|
@@ -39,14 +39,14 @@ export function substructureSearchDialog(col: DG.Column): void {
|
|
|
39
39
|
const df = DG.DataFrame.create(1);
|
|
40
40
|
df.columns.addNewString('substr_helm').init((i) => '');
|
|
41
41
|
df.col('substr_helm')!.semType = col.semType;
|
|
42
|
-
df.col('substr_helm')!.setTag(DG.TAGS.UNITS,
|
|
42
|
+
df.col('substr_helm')!.setTag(DG.TAGS.UNITS, NOTATION.HELM);
|
|
43
43
|
const grid = df.plot.grid();
|
|
44
44
|
const separatorInput = ui.textInput('Separator', separator);
|
|
45
45
|
|
|
46
46
|
const inputsDiv = ui.div();
|
|
47
47
|
|
|
48
|
-
const inputs = units ===
|
|
49
|
-
units ===
|
|
48
|
+
const inputs = units === NOTATION.HELM ? ui.divV([editHelmLink]) :
|
|
49
|
+
units === NOTATION.SEPARATOR ? ui.inputs([substructureInput, separatorInput]) :
|
|
50
50
|
ui.inputs([substructureInput]);
|
|
51
51
|
|
|
52
52
|
updateDivInnerHTML(inputsDiv, inputs);
|
|
@@ -57,15 +57,15 @@ export function substructureSearchDialog(col: DG.Column): void {
|
|
|
57
57
|
inputsDiv
|
|
58
58
|
]))
|
|
59
59
|
.onOK(async () => {
|
|
60
|
-
let substructure = units ===
|
|
61
|
-
if (units ===
|
|
60
|
+
let substructure = units === NOTATION.HELM ? df.get('substr_helm', 0) : substructureInput.value;
|
|
61
|
+
if (units === NOTATION.SEPARATOR && separatorInput.value !== separator && separatorInput.value !== '')
|
|
62
62
|
substructure = substructure.replaceAll(separatorInput.value, separator);
|
|
63
63
|
const matchesColName = `Matches: ${substructure}`;
|
|
64
64
|
const colExists = col.dataFrame.columns.names()
|
|
65
65
|
.filter((it) => it.toLocaleLowerCase() === matchesColName.toLocaleLowerCase()).length > 0;
|
|
66
66
|
if (!colExists) {
|
|
67
67
|
let matches: DG.BitSet;
|
|
68
|
-
if (units ===
|
|
68
|
+
if (units === NOTATION.HELM)
|
|
69
69
|
matches = await helmSubstructureSearch(substructure, col);
|
|
70
70
|
else
|
|
71
71
|
matches = linearSubstructureSearch(substructure, col);
|
|
@@ -102,7 +102,7 @@ export async function helmSubstructureSearch(substructure: string, col: DG.Colum
|
|
|
102
102
|
if (col.version !== col.temp[MONOMERIC_COL_TAGS.LAST_INVALIDATED_VERSION])
|
|
103
103
|
await invalidateMols(col, true);
|
|
104
104
|
const substructureCol = DG.Column.string('helm', 1).init((i) => substructure);
|
|
105
|
-
substructureCol.setTag(DG.TAGS.UNITS,
|
|
105
|
+
substructureCol.setTag(DG.TAGS.UNITS, NOTATION.HELM);
|
|
106
106
|
const substructureMolsCol =
|
|
107
107
|
await getMonomericMols(substructureCol, true, col.temp[MONOMERIC_COL_TAGS.MONOMERS_DICT]);
|
|
108
108
|
const matchesCol = await grok.functions.call('Chem:searchSubstructure', {
|
|
@@ -1,26 +1,26 @@
|
|
|
1
1
|
import * as grok from 'datagrok-api/grok';
|
|
2
2
|
import * as ui from 'datagrok-api/ui';
|
|
3
3
|
import * as DG from 'datagrok-api/dg';
|
|
4
|
-
import * as bio from '@datagrok-libraries/bio';
|
|
5
4
|
|
|
6
5
|
import {after, before, category, test, expect, expectObject} from '@datagrok-libraries/utils/src/test';
|
|
7
6
|
|
|
8
7
|
import {_testPaletteN, _testPaletteAA} from '@datagrok-libraries/bio/src/tests/palettes-tests';
|
|
8
|
+
import { AminoacidsPalettes } from '@datagrok-libraries/bio';
|
|
9
9
|
|
|
10
10
|
category('Palettes', () => {
|
|
11
11
|
test('testPaletteN', async () => { await _testPaletteN(); });
|
|
12
12
|
test('testPaletteAA', async () => { await _testPaletteAA(); });
|
|
13
13
|
|
|
14
14
|
test('testPalettePtMe', async () => {
|
|
15
|
-
const colorMeNle =
|
|
16
|
-
const colorMeA =
|
|
17
|
-
const colorMeG =
|
|
18
|
-
const colorMeF =
|
|
15
|
+
const colorMeNle = AminoacidsPalettes.GrokGroups.get('MeNle');
|
|
16
|
+
const colorMeA = AminoacidsPalettes.GrokGroups.get('MeA');
|
|
17
|
+
const colorMeG = AminoacidsPalettes.GrokGroups.get('MeG');
|
|
18
|
+
const colorMeF = AminoacidsPalettes.GrokGroups.get('MeF');
|
|
19
19
|
|
|
20
|
-
const colorL =
|
|
21
|
-
const colorA =
|
|
22
|
-
const colorG =
|
|
23
|
-
const colorF =
|
|
20
|
+
const colorL = AminoacidsPalettes.GrokGroups.get('L');
|
|
21
|
+
const colorA = AminoacidsPalettes.GrokGroups.get('A');
|
|
22
|
+
const colorG = AminoacidsPalettes.GrokGroups.get('G');
|
|
23
|
+
const colorF = AminoacidsPalettes.GrokGroups.get('F');
|
|
24
24
|
|
|
25
25
|
expect(colorMeNle, colorL);
|
|
26
26
|
expect(colorMeA, colorA);
|
|
@@ -1,9 +1,16 @@
|
|
|
1
1
|
import * as grok from 'datagrok-api/grok';
|
|
2
2
|
import * as ui from 'datagrok-api/ui';
|
|
3
3
|
import * as DG from 'datagrok-api/dg';
|
|
4
|
-
import * as bio from '@datagrok-libraries/bio';
|
|
5
4
|
|
|
6
5
|
import {after, before, category, test, expect, expectObject, delay} from '@datagrok-libraries/utils/src/test';
|
|
6
|
+
import {
|
|
7
|
+
ALPHABET,
|
|
8
|
+
NOTATION,
|
|
9
|
+
PositionInfo,
|
|
10
|
+
PositionMonomerInfo,
|
|
11
|
+
TAGS as bioTAGS,
|
|
12
|
+
WebLogoViewer
|
|
13
|
+
} from '@datagrok-libraries/bio';
|
|
7
14
|
|
|
8
15
|
category('WebLogo-positions', () => {
|
|
9
16
|
let tvList: DG.TableView[];
|
|
@@ -25,7 +32,7 @@ ATC-G-TTGC--
|
|
|
25
32
|
});
|
|
26
33
|
|
|
27
34
|
after(async () => {
|
|
28
|
-
dfList.forEach((df: DG.DataFrame) => { grok.shell.closeTable(df);});
|
|
35
|
+
dfList.forEach((df: DG.DataFrame) => { grok.shell.closeTable(df); });
|
|
29
36
|
tvList.forEach((tv: DG.TableView) => tv.close());
|
|
30
37
|
grok.shell.v = currentView;
|
|
31
38
|
});
|
|
@@ -36,31 +43,31 @@ ATC-G-TTGC--
|
|
|
36
43
|
|
|
37
44
|
const seqCol: DG.Column = df.getCol('seq');
|
|
38
45
|
seqCol.semType = DG.SEMTYPE.MACROMOLECULE;
|
|
39
|
-
seqCol.setTag(DG.TAGS.UNITS,
|
|
40
|
-
seqCol.setTag(
|
|
41
|
-
seqCol.setTag(
|
|
46
|
+
seqCol.setTag(DG.TAGS.UNITS, NOTATION.FASTA);
|
|
47
|
+
seqCol.setTag(bioTAGS.alphabet, ALPHABET.DNA);
|
|
48
|
+
seqCol.setTag(bioTAGS.aligned, 'SEQ.MSA');
|
|
42
49
|
|
|
43
|
-
const wlViewer:
|
|
50
|
+
const wlViewer: WebLogoViewer = (await df.plot.fromType('WebLogo')) as WebLogoViewer;
|
|
44
51
|
tv.dockManager.dock(wlViewer.root, DG.DOCK_TYPE.DOWN);
|
|
45
52
|
|
|
46
53
|
tvList.push(tv);
|
|
47
54
|
dfList.push(df);
|
|
48
55
|
|
|
49
|
-
const positions:
|
|
50
|
-
|
|
51
|
-
const resAllDf1:
|
|
52
|
-
new
|
|
53
|
-
new
|
|
54
|
-
new
|
|
55
|
-
new
|
|
56
|
-
new
|
|
57
|
-
new
|
|
58
|
-
new
|
|
59
|
-
new
|
|
60
|
-
new
|
|
61
|
-
new
|
|
62
|
-
new
|
|
63
|
-
new
|
|
56
|
+
const positions: PositionInfo[] = wlViewer['positions'];
|
|
57
|
+
|
|
58
|
+
const resAllDf1: PositionInfo[] = [
|
|
59
|
+
new PositionInfo('1', {'A': new PositionMonomerInfo(2), '-': new PositionMonomerInfo(3)}),
|
|
60
|
+
new PositionInfo('2', {'T': new PositionMonomerInfo(5)}),
|
|
61
|
+
new PositionInfo('3', {'C': new PositionMonomerInfo(5)}),
|
|
62
|
+
new PositionInfo('4', {'-': new PositionMonomerInfo(5)}),
|
|
63
|
+
new PositionInfo('5', {'G': new PositionMonomerInfo(5)}),
|
|
64
|
+
new PositionInfo('6', {'-': new PositionMonomerInfo(3), 'C': new PositionMonomerInfo(2)}),
|
|
65
|
+
new PositionInfo('7', {'T': new PositionMonomerInfo(5)}),
|
|
66
|
+
new PositionInfo('8', {'T': new PositionMonomerInfo(5)}),
|
|
67
|
+
new PositionInfo('9', {'G': new PositionMonomerInfo(5)}),
|
|
68
|
+
new PositionInfo('10', {'C': new PositionMonomerInfo(5)}),
|
|
69
|
+
new PositionInfo('11', {'-': new PositionMonomerInfo(5)}),
|
|
70
|
+
new PositionInfo('12', {'-': new PositionMonomerInfo(5)}),
|
|
64
71
|
];
|
|
65
72
|
|
|
66
73
|
expect(positions.length, resAllDf1.length);
|
|
@@ -73,8 +80,8 @@ ATC-G-TTGC--
|
|
|
73
80
|
}
|
|
74
81
|
});
|
|
75
82
|
|
|
76
|
-
test('positions with shrinkEmptyTail option true (
|
|
77
|
-
|
|
83
|
+
test('positions with shrinkEmptyTail option true (filtered)', async () => {
|
|
84
|
+
const csvDf2 = `seq
|
|
78
85
|
-TC-G-TTGC--
|
|
79
86
|
-TC-GCTTGC--
|
|
80
87
|
-T--C-GT-
|
|
@@ -86,33 +93,33 @@ ATC-G-TTGC--
|
|
|
86
93
|
|
|
87
94
|
const seqCol: DG.Column = df.getCol('seq');
|
|
88
95
|
seqCol.semType = DG.SEMTYPE.MACROMOLECULE;
|
|
89
|
-
seqCol.setTag(DG.TAGS.UNITS,
|
|
90
|
-
seqCol.setTag(
|
|
91
|
-
seqCol.setTag(
|
|
96
|
+
seqCol.setTag(DG.TAGS.UNITS, NOTATION.FASTA);
|
|
97
|
+
seqCol.setTag(bioTAGS.alphabet, ALPHABET.DNA);
|
|
98
|
+
seqCol.setTag(bioTAGS.aligned, 'SEQ');
|
|
92
99
|
|
|
93
100
|
df.filter.init((i) => {
|
|
94
101
|
return i > 2;
|
|
95
102
|
});
|
|
96
103
|
df.filter.fireChanged();
|
|
97
|
-
const wlViewer:
|
|
98
|
-
{'shrinkEmptyTail': true})) as
|
|
104
|
+
const wlViewer: WebLogoViewer = (await df.plot.fromType('WebLogo',
|
|
105
|
+
{'shrinkEmptyTail': true})) as WebLogoViewer;
|
|
99
106
|
tv.dockManager.dock(wlViewer.root, DG.DOCK_TYPE.DOWN);
|
|
100
107
|
|
|
101
108
|
tvList.push(tv);
|
|
102
109
|
dfList.push(df);
|
|
103
110
|
|
|
104
|
-
const positions:
|
|
105
|
-
|
|
106
|
-
const resAllDf1:
|
|
107
|
-
new
|
|
108
|
-
new
|
|
109
|
-
new
|
|
110
|
-
new
|
|
111
|
-
new
|
|
112
|
-
new
|
|
113
|
-
new
|
|
114
|
-
new
|
|
115
|
-
new
|
|
111
|
+
const positions: PositionInfo[] = wlViewer['positions'];
|
|
112
|
+
|
|
113
|
+
const resAllDf1: PositionInfo[] = [
|
|
114
|
+
new PositionInfo('1', {'-': new PositionMonomerInfo(3)}),
|
|
115
|
+
new PositionInfo('2', {'T': new PositionMonomerInfo(3)}),
|
|
116
|
+
new PositionInfo('3', {'-': new PositionMonomerInfo(3)}),
|
|
117
|
+
new PositionInfo('4', {'-': new PositionMonomerInfo(3)}),
|
|
118
|
+
new PositionInfo('5', {'C': new PositionMonomerInfo(3)}),
|
|
119
|
+
new PositionInfo('6', {'-': new PositionMonomerInfo(2), 'C': new PositionMonomerInfo(1)}),
|
|
120
|
+
new PositionInfo('7', {'G': new PositionMonomerInfo(3)}),
|
|
121
|
+
new PositionInfo('8', {'T': new PositionMonomerInfo(3)}),
|
|
122
|
+
new PositionInfo('9', {'-': new PositionMonomerInfo(3)}),
|
|
116
123
|
];
|
|
117
124
|
|
|
118
125
|
expect(positions.length, resAllDf1.length);
|
|
@@ -131,29 +138,29 @@ ATC-G-TTGC--
|
|
|
131
138
|
|
|
132
139
|
const seqCol: DG.Column = df.getCol('seq');
|
|
133
140
|
seqCol.semType = DG.SEMTYPE.MACROMOLECULE;
|
|
134
|
-
seqCol.setTag(DG.TAGS.UNITS,
|
|
135
|
-
seqCol.setTag(
|
|
136
|
-
seqCol.setTag(
|
|
141
|
+
seqCol.setTag(DG.TAGS.UNITS, NOTATION.FASTA);
|
|
142
|
+
seqCol.setTag(bioTAGS.alphabet, ALPHABET.DNA);
|
|
143
|
+
seqCol.setTag(bioTAGS.aligned, 'SEQ.MSA');
|
|
137
144
|
|
|
138
|
-
const wlViewer:
|
|
139
|
-
{'skipEmptyPositions': true})) as
|
|
145
|
+
const wlViewer: WebLogoViewer = (await df.plot.fromType('WebLogo',
|
|
146
|
+
{'skipEmptyPositions': true})) as WebLogoViewer;
|
|
140
147
|
tv.dockManager.dock(wlViewer.root, DG.DOCK_TYPE.DOWN);
|
|
141
148
|
|
|
142
149
|
tvList.push(tv);
|
|
143
150
|
dfList.push(df);
|
|
144
151
|
|
|
145
|
-
const positions:
|
|
146
|
-
|
|
147
|
-
const resAllDf1:
|
|
148
|
-
new
|
|
149
|
-
new
|
|
150
|
-
new
|
|
151
|
-
new
|
|
152
|
-
new
|
|
153
|
-
new
|
|
154
|
-
new
|
|
155
|
-
new
|
|
156
|
-
new
|
|
152
|
+
const positions: PositionInfo[] = wlViewer['positions'];
|
|
153
|
+
|
|
154
|
+
const resAllDf1: PositionInfo[] = [
|
|
155
|
+
new PositionInfo('1', {'A': new PositionMonomerInfo(2), '-': new PositionMonomerInfo(3)}),
|
|
156
|
+
new PositionInfo('2', {'T': new PositionMonomerInfo(5)}),
|
|
157
|
+
new PositionInfo('3', {'C': new PositionMonomerInfo(5)}),
|
|
158
|
+
new PositionInfo('5', {'G': new PositionMonomerInfo(5)}),
|
|
159
|
+
new PositionInfo('6', {'-': new PositionMonomerInfo(3), 'C': new PositionMonomerInfo(2)}),
|
|
160
|
+
new PositionInfo('7', {'T': new PositionMonomerInfo(5)}),
|
|
161
|
+
new PositionInfo('8', {'T': new PositionMonomerInfo(5)}),
|
|
162
|
+
new PositionInfo('9', {'G': new PositionMonomerInfo(5)}),
|
|
163
|
+
new PositionInfo('10', {'C': new PositionMonomerInfo(5)})
|
|
157
164
|
];
|
|
158
165
|
|
|
159
166
|
expect(positions.length, resAllDf1.length);
|