@datagrok/bio 2.0.13 → 2.0.16
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/package-test.js +1862 -1169
- package/dist/package.js +4025 -3457
- package/dist/vendors-node_modules_datagrok-libraries_ml_src_workers_dimensionality-reducer_js.js +135 -152
- package/files/data/sample_HELM_empty_vals.csv +537 -537
- package/files/samples/sample_HELM.csv +540 -540
- package/package.json +19 -12
- package/scripts/read-tree-pkl.py +215 -0
- package/src/__jest__/remote.test.ts +6 -4
- package/src/__jest__/test-node.ts +1 -1
- package/src/{utils → analysis}/sequence-activity-cliffs.ts +33 -30
- package/src/analysis/sequence-diversity-viewer.ts +48 -0
- package/src/analysis/sequence-search-base-viewer.ts +81 -0
- package/src/analysis/sequence-similarity-viewer.ts +107 -0
- package/src/{utils → analysis}/sequence-space.ts +0 -0
- package/src/calculations/monomerLevelMols.ts +64 -20
- package/src/package-test.ts +2 -1
- package/src/package.ts +57 -3
- package/src/substructure-search/substructure-search.ts +65 -0
- package/src/tests/WebLogo-test.ts +6 -6
- package/src/tests/activity-cliffs-tests.ts +8 -16
- package/src/tests/activity-cliffs-utils.ts +3 -2
- package/src/tests/sequence-space-test.ts +7 -14
- package/src/tests/similarity-diversity-tests.ts +78 -0
- package/src/utils/cell-renderer.ts +81 -68
- package/src/utils/ui-utils.ts +4 -0
- package/src/viewers/vd-regions-viewer.ts +2 -1
- package/src/widgets/representations.ts +58 -0
- package/test-Bio-7770371320b2-f8f97c6e.html +0 -374
|
@@ -0,0 +1,107 @@
|
|
|
1
|
+
import * as grok from 'datagrok-api/grok';
|
|
2
|
+
import * as ui from 'datagrok-api/ui';
|
|
3
|
+
import * as DG from 'datagrok-api/dg';
|
|
4
|
+
import {SequenceSearchBaseViewer} from './sequence-search-base-viewer';
|
|
5
|
+
import {getMonomericMols} from '../calculations/monomerLevelMols';
|
|
6
|
+
import * as C from '../utils/constants';
|
|
7
|
+
import {createDifferenceCanvas, createDifferencesWithPositions} from './sequence-activity-cliffs';
|
|
8
|
+
import {updateDivInnerHTML} from '../utils/ui-utils';
|
|
9
|
+
import {WebLogo} from '@datagrok-libraries/bio/src/viewers/web-logo';
|
|
10
|
+
import {TableView} from 'datagrok-api/dg';
|
|
11
|
+
import { Subject } from 'rxjs';
|
|
12
|
+
|
|
13
|
+
export class SequenceSimilarityViewer extends SequenceSearchBaseViewer {
|
|
14
|
+
hotSearch: boolean;
|
|
15
|
+
sketchedMolecule: string = '';
|
|
16
|
+
curIdx: number = 0;
|
|
17
|
+
molCol: DG.Column | null = null;
|
|
18
|
+
idxs: DG.Column | null = null;
|
|
19
|
+
scores: DG.Column | null = null;
|
|
20
|
+
cutoff: number;
|
|
21
|
+
gridSelect: boolean = false;
|
|
22
|
+
targetMoleculeIdx: number = 0;
|
|
23
|
+
computeCompleted = new Subject<boolean>();
|
|
24
|
+
|
|
25
|
+
constructor() {
|
|
26
|
+
super('similarity');
|
|
27
|
+
this.cutoff = this.float('cutoff', 0.01, {min: 0, max: 1});
|
|
28
|
+
this.hotSearch = this.bool('hotSearch', true);
|
|
29
|
+
}
|
|
30
|
+
|
|
31
|
+
init(): void {
|
|
32
|
+
this.hotSearch = true;
|
|
33
|
+
this.initialized = true;
|
|
34
|
+
}
|
|
35
|
+
|
|
36
|
+
async render(computeData = true): Promise<void> {
|
|
37
|
+
if (!this.beforeRender())
|
|
38
|
+
return;
|
|
39
|
+
if (this.moleculeColumn) {
|
|
40
|
+
this.curIdx = this.dataFrame!.currentRowIdx == -1 ? 0 : this.dataFrame!.currentRowIdx;
|
|
41
|
+
if (computeData && !this.gridSelect) {
|
|
42
|
+
this.targetMoleculeIdx = this.dataFrame!.currentRowIdx == -1 ? 0 : this.dataFrame!.currentRowIdx;
|
|
43
|
+
const monomericMols = await getMonomericMols(this.moleculeColumn);
|
|
44
|
+
//need to create df to calculate fingerprints
|
|
45
|
+
const monomericMolsDf = DG.DataFrame.fromColumns([monomericMols]);
|
|
46
|
+
const df = await grok.functions.call('Chem:callChemSimilaritySearch', {
|
|
47
|
+
df: this.dataFrame,
|
|
48
|
+
col: monomericMols,
|
|
49
|
+
molecule: monomericMols.get(this.targetMoleculeIdx),
|
|
50
|
+
metricName: this.distanceMetric,
|
|
51
|
+
limit: this.limit,
|
|
52
|
+
minScore: this.cutoff,
|
|
53
|
+
fingerprint: this.fingerprint
|
|
54
|
+
});
|
|
55
|
+
this.idxs = df.getCol('indexes');
|
|
56
|
+
this.scores = df.getCol('score');
|
|
57
|
+
this.molCol = DG.Column.string('sequence',
|
|
58
|
+
this.idxs!.length).init((i) => this.moleculeColumn?.get(this.idxs?.get(i)));
|
|
59
|
+
this.molCol.semType = DG.SEMTYPE.MACROMOLECULE;
|
|
60
|
+
this.tags.forEach((tag) => this.molCol!.setTag(tag, this.moleculeColumn!.getTag(tag)));
|
|
61
|
+
const resDf = DG.DataFrame.fromColumns([this.idxs!, this.molCol!, this.scores!]);
|
|
62
|
+
resDf.onCurrentRowChanged.subscribe((_) => {
|
|
63
|
+
this.dataFrame.currentRowIdx = resDf.col('indexes')!.get(resDf.currentRowIdx);
|
|
64
|
+
this.createPropertyPanel(resDf);
|
|
65
|
+
this.gridSelect = true;
|
|
66
|
+
});
|
|
67
|
+
const grid = resDf.plot.grid();
|
|
68
|
+
grid.col('indexes')!.visible = false;
|
|
69
|
+
const targetMolRow = this.idxs?.getRawData().findIndex((it) => it == this.targetMoleculeIdx);
|
|
70
|
+
const targetScoreCell = grid.cell('score', targetMolRow!);
|
|
71
|
+
targetScoreCell.cell.value = null;
|
|
72
|
+
(grok.shell.v as TableView).grid.root.addEventListener('click', (event: MouseEvent) => {
|
|
73
|
+
this.gridSelect = false;
|
|
74
|
+
});
|
|
75
|
+
updateDivInnerHTML(this.root, grid.root);
|
|
76
|
+
this.computeCompleted.next(true);
|
|
77
|
+
}
|
|
78
|
+
}
|
|
79
|
+
}
|
|
80
|
+
|
|
81
|
+
|
|
82
|
+
createPropertyPanel(resDf: DG.DataFrame) {
|
|
83
|
+
const propPanel = ui.div();
|
|
84
|
+
const molDifferences: { [key: number]: HTMLCanvasElement } = {};
|
|
85
|
+
const units = resDf.col('sequence')!.getTag(DG.TAGS.UNITS);
|
|
86
|
+
const separator = resDf.col('sequence')!.getTag(C.TAGS.SEPARATOR);
|
|
87
|
+
const splitter = WebLogo.getSplitter(units, separator);
|
|
88
|
+
const subParts1 = splitter(this.moleculeColumn!.get(this.targetMoleculeIdx));
|
|
89
|
+
const subParts2 = splitter(resDf.get('sequence', resDf.currentRowIdx));
|
|
90
|
+
const canvas = createDifferenceCanvas(subParts1, subParts2, units, molDifferences);
|
|
91
|
+
propPanel.append(ui.div(canvas, {style: {width: '300px', overflow: 'scroll'}}));
|
|
92
|
+
if (subParts1.length !== subParts2.length) {
|
|
93
|
+
propPanel.append(ui.divV([
|
|
94
|
+
ui.divText(`Different sequence length:`, {style: {fontWeight: 'bold'}}),
|
|
95
|
+
ui.divText(`target: ${subParts1.length} monomers`),
|
|
96
|
+
ui.divText(`selected: ${subParts2.length} monomers`)
|
|
97
|
+
], {style: {paddingBottom: '10px'}}));
|
|
98
|
+
}
|
|
99
|
+
propPanel.append(createDifferencesWithPositions(molDifferences));
|
|
100
|
+
const acc = ui.accordion();
|
|
101
|
+
const accIcon = ui.element('i');
|
|
102
|
+
accIcon.className = 'grok-icon svg-icon svg-view-layout';
|
|
103
|
+
acc.addTitle(ui.span([accIcon, ui.label(`Similarity search`)]));
|
|
104
|
+
acc.addPane('Differeces', () => propPanel, true);
|
|
105
|
+
grok.shell.o = acc.root;
|
|
106
|
+
}
|
|
107
|
+
}
|
|
File without changes
|
|
@@ -1,25 +1,69 @@
|
|
|
1
1
|
import * as grok from 'datagrok-api/grok';
|
|
2
|
-
import * as ui from 'datagrok-api/ui';
|
|
3
2
|
import * as DG from 'datagrok-api/dg';
|
|
4
|
-
|
|
5
|
-
import {
|
|
3
|
+
import * as C from '../utils/constants';
|
|
4
|
+
import {getHelmMonomers} from '../package';
|
|
5
|
+
import {WebLogo} from '@datagrok-libraries/bio/src/viewers/web-logo';
|
|
6
6
|
|
|
7
7
|
const V2000_ATOM_NAME_POS = 31;
|
|
8
8
|
|
|
9
9
|
export async function getMonomericMols(mcol: DG.Column, pattern: boolean = false): Promise<DG.Column> {
|
|
10
|
-
const
|
|
11
|
-
|
|
10
|
+
const separator: string = mcol.tags[C.TAGS.SEPARATOR];
|
|
11
|
+
const units: string = mcol.tags[DG.TAGS.UNITS];
|
|
12
|
+
const splitter = WebLogo.getSplitter(units, separator);
|
|
13
|
+
let molV3000Array;
|
|
14
|
+
const monomersDict = new Map();
|
|
15
|
+
const monomers = units === 'helm' ?
|
|
16
|
+
getHelmMonomers(mcol) : Object.keys(WebLogo.getStats(mcol, 0, splitter).freq).filter((it) => it !== '');
|
|
17
|
+
|
|
18
|
+
for (let i = 0; i < monomers.length; i++)
|
|
19
|
+
monomersDict.set(monomers[i], `${i + 1}`);
|
|
20
|
+
|
|
21
|
+
if (units === 'helm') {
|
|
22
|
+
molV3000Array = await grok.functions.call('HELM:getMolFiles', {col: mcol});
|
|
23
|
+
molV3000Array = changeV2000ToV3000(molV3000Array, monomersDict, pattern);
|
|
24
|
+
} else {
|
|
25
|
+
molV3000Array = new Array<string>(mcol.length);
|
|
26
|
+
for (let i = 0; i < mcol.length; i++) {
|
|
27
|
+
const sequenceMonomers = splitter(mcol.get(i)).filter((it) => it !== '');
|
|
28
|
+
const molV3000 = molV3000FromNonHelmSequence(sequenceMonomers, monomersDict, pattern);
|
|
29
|
+
molV3000Array[i] = molV3000;
|
|
30
|
+
}
|
|
31
|
+
}
|
|
32
|
+
return DG.Column.fromStrings('monomericMols', molV3000Array);
|
|
33
|
+
}
|
|
34
|
+
|
|
35
|
+
function molV3000FromNonHelmSequence(
|
|
36
|
+
monomers: Array<string>, monomersDict: Map<string, string>, pattern: boolean = false) {
|
|
37
|
+
let molV3000 = `
|
|
38
|
+
Datagrok macromolecule handler
|
|
39
|
+
|
|
40
|
+
0 0 0 0 0 0 999 V3000
|
|
41
|
+
M V30 BEGIN CTAB
|
|
42
|
+
`;
|
|
43
|
+
|
|
44
|
+
molV3000 += `M V30 COUNTS ${monomers.length} ${monomers.length - 1} 0 0 0\n`;
|
|
45
|
+
molV3000 += 'M V30 BEGIN ATOM\n';
|
|
12
46
|
|
|
13
|
-
let
|
|
14
|
-
|
|
15
|
-
|
|
47
|
+
for (let atomRowI = 0; atomRowI < monomers.length; atomRowI++) {
|
|
48
|
+
molV3000 += pattern ?
|
|
49
|
+
`M V30 ${atomRowI + 1} R${monomersDict.get(monomers[atomRowI])} 0.000 0.000 0 0\n` :
|
|
50
|
+
`M V30 ${atomRowI + 1} At 0.000 0.000 0 0 MASS=${monomersDict.get(monomers[atomRowI])}\n`;
|
|
51
|
+
}
|
|
52
|
+
|
|
53
|
+
molV3000 += 'M V30 END ATOM\n';
|
|
54
|
+
molV3000 += 'M V30 BEGIN BOND\n';
|
|
16
55
|
|
|
17
|
-
|
|
56
|
+
for (let bondRowI = 0; bondRowI < monomers.length - 1; bondRowI++)
|
|
57
|
+
molV3000 += `M V30 ${bondRowI + 1} 1 ${bondRowI + 1} ${bondRowI + 2}\n`;
|
|
18
58
|
|
|
19
|
-
|
|
59
|
+
molV3000 += 'M V30 END BOND\n';
|
|
60
|
+
molV3000 += 'M V30 END CTAB\n';
|
|
61
|
+
molV3000 += 'M END';
|
|
62
|
+
return molV3000;
|
|
20
63
|
}
|
|
21
64
|
|
|
22
|
-
function
|
|
65
|
+
function changeV2000ToV3000(mols: DG.Column, dict: Map<string, string>, pattern: boolean = false): Array<string> {
|
|
66
|
+
const molsArray = new Array<string>(mols.length);
|
|
23
67
|
for (let i = 0; i < mols.length; i++) {
|
|
24
68
|
let curPos = 0;
|
|
25
69
|
let endPos = 0;
|
|
@@ -30,7 +74,7 @@ function changeToV3000(mols: Array<string>, dict: Map<string, string>, pattern:
|
|
|
30
74
|
M V30 BEGIN CTAB
|
|
31
75
|
`;
|
|
32
76
|
|
|
33
|
-
const mol = mols
|
|
77
|
+
const mol = mols.get(i);
|
|
34
78
|
curPos = mol.indexOf('\n', curPos) + 1;
|
|
35
79
|
curPos = mol.indexOf('\n', curPos) + 1;
|
|
36
80
|
curPos = mol.indexOf('\n', curPos) + 1;
|
|
@@ -45,10 +89,10 @@ M V30 BEGIN CTAB
|
|
|
45
89
|
curPos = mol.indexOf('\n', curPos) + 1 + V2000_ATOM_NAME_POS;
|
|
46
90
|
endPos = mol.indexOf(' ', curPos);
|
|
47
91
|
const monomerName: string = mol.substring(curPos, endPos);
|
|
48
|
-
molV3000 += pattern ?
|
|
49
|
-
|
|
50
|
-
|
|
51
|
-
}
|
|
92
|
+
molV3000 += pattern ?
|
|
93
|
+
`M V30 ${atomRowI + 1} R${dict.get(monomerName)} 0.000 0.000 0 0\n` :
|
|
94
|
+
`M V30 ${atomRowI + 1} At 0.000 0.000 0 0 MASS=${dict.get(monomerName)}\n`;
|
|
95
|
+
}
|
|
52
96
|
|
|
53
97
|
molV3000 += 'M V30 END ATOM\n';
|
|
54
98
|
molV3000 += 'M V30 BEGIN BOND\n';
|
|
@@ -60,13 +104,13 @@ M V30 BEGIN CTAB
|
|
|
60
104
|
const order = parseInt(mol.substring(curPos + 6, curPos + 9).trim());
|
|
61
105
|
|
|
62
106
|
molV3000 += `M V30 ${bondRowI + 1} ${order} ${firstMonomer} ${secondMonomer}\n`;
|
|
63
|
-
}
|
|
107
|
+
}
|
|
64
108
|
|
|
65
109
|
molV3000 += 'M V30 END BOND\n';
|
|
66
110
|
molV3000 += 'M V30 END CTAB\n';
|
|
67
111
|
molV3000 += 'M END';
|
|
68
|
-
|
|
112
|
+
molsArray[i] = molV3000;
|
|
69
113
|
}
|
|
70
114
|
|
|
71
|
-
return
|
|
72
|
-
}
|
|
115
|
+
return molsArray;
|
|
116
|
+
}
|
package/src/package-test.ts
CHANGED
|
@@ -13,7 +13,8 @@ import './tests/renderers-test';
|
|
|
13
13
|
import './tests/convert-test';
|
|
14
14
|
import './tests/fasta-handler-test';
|
|
15
15
|
import './tests/WebLogo-positions-test';
|
|
16
|
-
import './tests/checkInputColumn-tests'
|
|
16
|
+
import './tests/checkInputColumn-tests';
|
|
17
|
+
import './tests/similarity-diversity-tests';
|
|
17
18
|
|
|
18
19
|
export const _package = new DG.Package();
|
|
19
20
|
export {tests};
|
package/src/package.ts
CHANGED
|
@@ -12,15 +12,15 @@ import {runKalign, testMSAEnoughMemory} from './utils/multiple-sequence-alignmen
|
|
|
12
12
|
import {SequenceAlignment, Aligned} from './seq_align';
|
|
13
13
|
import {Nucleotides} from '@datagrok-libraries/bio/src/nucleotides';
|
|
14
14
|
import {Aminoacids} from '@datagrok-libraries/bio/src/aminoacids';
|
|
15
|
-
import {getEmbeddingColsNames, sequenceSpace} from './
|
|
15
|
+
import {getEmbeddingColsNames, sequenceSpace} from './analysis/sequence-space';
|
|
16
16
|
import {AvailableMetrics} from '@datagrok-libraries/ml/src/typed-metrics';
|
|
17
17
|
import {getActivityCliffs} from '@datagrok-libraries/ml/src/viewers/activity-cliffs';
|
|
18
|
-
import {createPropPanelElement, createTooltipElement, getSimilaritiesMarix} from './
|
|
18
|
+
import {createPropPanelElement, createTooltipElement, getSimilaritiesMarix} from './analysis/sequence-activity-cliffs';
|
|
19
19
|
import {createJsonMonomerLibFromSdf, encodeMonomers, getMolfilesFromSeq, HELM_CORE_LIB_FILENAME} from './utils/utils';
|
|
20
20
|
import {getMacroMol} from './utils/atomic-works';
|
|
21
21
|
import {MacromoleculeSequenceCellRenderer} from './utils/cell-renderer';
|
|
22
22
|
import {convert} from './utils/convert';
|
|
23
|
-
import {representationsWidget} from './widgets/representations';
|
|
23
|
+
import {getMacroMolColumnPropertyPanel, representationsWidget} from './widgets/representations';
|
|
24
24
|
import {UnitsHandler} from '@datagrok-libraries/bio/src/utils/units-handler';
|
|
25
25
|
import {FastaFileHandler} from '@datagrok-libraries/bio/src/utils/fasta-handler';
|
|
26
26
|
import {removeEmptyStringRows} from '@datagrok-libraries/utils/src/dataframe-utils';
|
|
@@ -32,6 +32,9 @@ import {
|
|
|
32
32
|
|
|
33
33
|
import {splitAlignedSequences} from '@datagrok-libraries/bio/src/utils/splitter';
|
|
34
34
|
import * as C from './utils/constants';
|
|
35
|
+
import {SequenceSimilarityViewer} from './analysis/sequence-similarity-viewer';
|
|
36
|
+
import {SequenceDiversityViewer} from './analysis/sequence-diversity-viewer';
|
|
37
|
+
import { substructureSearchDialog } from './substructure-search/substructure-search';
|
|
35
38
|
|
|
36
39
|
//tags: init
|
|
37
40
|
export async function initBio() {
|
|
@@ -46,6 +49,14 @@ export function fastaSequenceCellRenderer(): MacromoleculeSequenceCellRenderer {
|
|
|
46
49
|
return new MacromoleculeSequenceCellRenderer();
|
|
47
50
|
}
|
|
48
51
|
|
|
52
|
+
//name: Sequence Renderer
|
|
53
|
+
//input: column molColumn {semType: Macromolecule}
|
|
54
|
+
//tags: panel
|
|
55
|
+
//output: widget result
|
|
56
|
+
export function macroMolColumnPropertyPanel(molColumn: DG.Column): DG.Widget {
|
|
57
|
+
return getMacroMolColumnPropertyPanel(molColumn);
|
|
58
|
+
}
|
|
59
|
+
|
|
49
60
|
//name: separatorSequenceCellRenderer
|
|
50
61
|
//tags: cellRenderer
|
|
51
62
|
//meta.cellType: sequence
|
|
@@ -462,3 +473,46 @@ export function getHelmMonomers(seqCol: DG.Column<string>): string[] {
|
|
|
462
473
|
const stats = WebLogo.getStats(seqCol, 1, WebLogo.splitterAsHelm);
|
|
463
474
|
return Object.keys(stats.freq);
|
|
464
475
|
}
|
|
476
|
+
|
|
477
|
+
|
|
478
|
+
//name: SequenceSimilaritySearchViewer
|
|
479
|
+
//tags: viewer
|
|
480
|
+
//output: viewer result
|
|
481
|
+
export function similaritySearchViewer(): SequenceSimilarityViewer {
|
|
482
|
+
return new SequenceSimilarityViewer();
|
|
483
|
+
}
|
|
484
|
+
|
|
485
|
+
//top-menu: Bio | Similarity Search...
|
|
486
|
+
//name: similaritySearch
|
|
487
|
+
//description: finds the most similar sequence
|
|
488
|
+
//output: viewer result
|
|
489
|
+
export function similaritySearchTopMenu(): void {
|
|
490
|
+
const view = (grok.shell.v as DG.TableView);
|
|
491
|
+
const viewer = view.addViewer('SequenceSimilaritySearchViewer');
|
|
492
|
+
view.dockManager.dock(viewer, 'down');
|
|
493
|
+
}
|
|
494
|
+
|
|
495
|
+
//name: SequenceDiversitySearchViewer
|
|
496
|
+
//tags: viewer
|
|
497
|
+
//output: viewer result
|
|
498
|
+
export function diversitySearchViewer(): SequenceDiversityViewer {
|
|
499
|
+
return new SequenceDiversityViewer();
|
|
500
|
+
}
|
|
501
|
+
|
|
502
|
+
//top-menu: Bio | Diversity Search...
|
|
503
|
+
//name: diversitySearch
|
|
504
|
+
//description: finds the most diverse molecules
|
|
505
|
+
//output: viewer result
|
|
506
|
+
export function diversitySearchTopMenu() {
|
|
507
|
+
const view = (grok.shell.v as DG.TableView);
|
|
508
|
+
const viewer = view.addViewer('SequenceDiversitySearchViewer');
|
|
509
|
+
view.dockManager.dock(viewer, 'down');
|
|
510
|
+
}
|
|
511
|
+
|
|
512
|
+
//name: Bio | Substructure search ...
|
|
513
|
+
//tags: panel, bio
|
|
514
|
+
//input: column col {semType: Macromolecule}
|
|
515
|
+
export function bioSubstructureSearch(col: DG.Column): void {
|
|
516
|
+
substructureSearchDialog(col);
|
|
517
|
+
}
|
|
518
|
+
|
|
@@ -0,0 +1,65 @@
|
|
|
1
|
+
import * as grok from 'datagrok-api/grok';
|
|
2
|
+
import * as ui from 'datagrok-api/ui';
|
|
3
|
+
import * as DG from 'datagrok-api/dg';
|
|
4
|
+
import {NOTATION} from '@datagrok-libraries/bio/src/utils/units-handler';
|
|
5
|
+
import * as C from '../utils/constants';
|
|
6
|
+
|
|
7
|
+
/**
|
|
8
|
+
* Searches substructure in each row of Macromolecule column
|
|
9
|
+
*
|
|
10
|
+
* @param {DG.column} col Column with 'Macromolecule' semantic type
|
|
11
|
+
*/
|
|
12
|
+
export function substructureSearchDialog(col: DG.Column): void {
|
|
13
|
+
const units = col.getTag(DG.TAGS.UNITS);
|
|
14
|
+
const separator = col.getTag(C.TAGS.SEPARATOR);
|
|
15
|
+
const notations = [NOTATION.FASTA, NOTATION.SEPARATOR];
|
|
16
|
+
|
|
17
|
+
const substructureInput = ui.textInput('Substructure', '');
|
|
18
|
+
const notationInput = ui.choiceInput('Notation', units, notations);
|
|
19
|
+
const separatorInput = ui.textInput('Separator', separator);
|
|
20
|
+
|
|
21
|
+
// hide the separator input for non-SEPARATOR target notations
|
|
22
|
+
const toggleSeparator = () => {
|
|
23
|
+
if (notationInput.value !== NOTATION.SEPARATOR)
|
|
24
|
+
separatorInput.root.hidden = true;
|
|
25
|
+
else
|
|
26
|
+
separatorInput.root.hidden = false;
|
|
27
|
+
};
|
|
28
|
+
|
|
29
|
+
toggleSeparator();
|
|
30
|
+
|
|
31
|
+
notationInput.onChanged(() => {
|
|
32
|
+
toggleSeparator();
|
|
33
|
+
});
|
|
34
|
+
|
|
35
|
+
ui.dialog('Substructure search')
|
|
36
|
+
.add(ui.inputs([
|
|
37
|
+
substructureInput,
|
|
38
|
+
notationInput,
|
|
39
|
+
separatorInput
|
|
40
|
+
]))
|
|
41
|
+
.onOK(() => {
|
|
42
|
+
let substructure = substructureInput.value;
|
|
43
|
+
if (notationInput.value !== NOTATION.FASTA && separatorInput.value !== separator)
|
|
44
|
+
substructure = substructure.replaceAll(separatorInput.value, separator);
|
|
45
|
+
const matchesColName = `Matches: ${substructure}`;
|
|
46
|
+
const colExists = col.dataFrame.columns.names()
|
|
47
|
+
.filter((it) => it.toLocaleLowerCase() === matchesColName.toLocaleLowerCase()).length > 0;
|
|
48
|
+
if (!colExists) {
|
|
49
|
+
const matches = substructureSearch(substructure, col);
|
|
50
|
+
col.dataFrame.columns.add(DG.Column.fromBitSet(matchesColName, matches));
|
|
51
|
+
} else { grok.shell.warning(`Search ${substructure} is already performed`); }
|
|
52
|
+
})
|
|
53
|
+
.show();
|
|
54
|
+
}
|
|
55
|
+
|
|
56
|
+
export function substructureSearch(substructure: string, col: DG.Column): DG.BitSet {
|
|
57
|
+
const lowerCaseSubstr = substructure.toLowerCase();
|
|
58
|
+
const resultArray = DG.BitSet.create(col.length);
|
|
59
|
+
for (let i = 0; i < col.length; i++) {
|
|
60
|
+
const macromolecule = col.get(i).toLowerCase();
|
|
61
|
+
if (macromolecule.indexOf(lowerCaseSubstr) !== -1)
|
|
62
|
+
resultArray.set(i, true, false);
|
|
63
|
+
}
|
|
64
|
+
return resultArray;
|
|
65
|
+
}
|
|
@@ -61,21 +61,21 @@ XZJ{}2
|
|
|
61
61
|
test('testPickupPaletteX', async () => { await _testPickupPaletteX(csvDfX); });
|
|
62
62
|
});
|
|
63
63
|
|
|
64
|
-
category('WebLogo.
|
|
64
|
+
category('WebLogo.monomerToShort', () => {
|
|
65
65
|
test('longMonomerSingle', async () => {
|
|
66
|
-
await expect(WebLogo.
|
|
66
|
+
await expect(WebLogo.monomerToShort('S', 5), 'S');
|
|
67
67
|
});
|
|
68
68
|
test('longMonomerShort', async () => {
|
|
69
|
-
await expect(WebLogo.
|
|
69
|
+
await expect(WebLogo.monomerToShort('Short', 5), 'Short');
|
|
70
70
|
});
|
|
71
71
|
test('longMonomerLong56', async () => {
|
|
72
|
-
await expect(WebLogo.
|
|
72
|
+
await expect(WebLogo.monomerToShort('Long56', 5), 'Long5…');
|
|
73
73
|
});
|
|
74
74
|
test('longMonomerComplexFirstPartShort', async () => {
|
|
75
|
-
await expect(WebLogo.
|
|
75
|
+
await expect(WebLogo.monomerToShort('Long-long', 5), 'Long…');
|
|
76
76
|
});
|
|
77
77
|
test('longMonomerComplexFirstPartLong56', async () => {
|
|
78
|
-
await expect(WebLogo.
|
|
78
|
+
await expect(WebLogo.monomerToShort('Long56-long', 5), 'Long5…');
|
|
79
79
|
});
|
|
80
80
|
});
|
|
81
81
|
|
|
@@ -4,7 +4,7 @@ import * as DG from 'datagrok-api/dg';
|
|
|
4
4
|
import * as grok from 'datagrok-api/grok';
|
|
5
5
|
|
|
6
6
|
import {readDataframe} from './utils';
|
|
7
|
-
import {
|
|
7
|
+
import {_testActivityCliffsOpen} from './activity-cliffs-utils';
|
|
8
8
|
|
|
9
9
|
|
|
10
10
|
category('activityCliffs', async () => {
|
|
@@ -12,28 +12,20 @@ category('activityCliffs', async () => {
|
|
|
12
12
|
let actCliffsDf: DG.DataFrame;
|
|
13
13
|
let actCliffsTableViewWithEmptyRows: DG.TableView;
|
|
14
14
|
let actCliffsDfWithEmptyRows: DG.DataFrame;
|
|
15
|
-
|
|
16
15
|
|
|
17
|
-
|
|
16
|
+
test('activityCliffsOpens', async () => {
|
|
18
17
|
actCliffsDf = await readDataframe('tests/sample_MSA_data.csv');
|
|
19
18
|
actCliffsTableView = grok.shell.addTableView(actCliffsDf);
|
|
20
|
-
|
|
21
|
-
actCliffsTableViewWithEmptyRows = grok.shell.addTableView(actCliffsDfWithEmptyRows);
|
|
22
|
-
});
|
|
23
|
-
|
|
24
|
-
after(async () => {
|
|
19
|
+
await _testActivityCliffsOpen(actCliffsDf, 102, 'UMAP', 'MSA');
|
|
25
20
|
grok.shell.closeTable(actCliffsDf);
|
|
26
21
|
actCliffsTableView.close();
|
|
27
|
-
grok.shell.closeTable(actCliffsDfWithEmptyRows);
|
|
28
|
-
actCliffsTableViewWithEmptyRows.close();
|
|
29
22
|
});
|
|
30
23
|
|
|
31
|
-
test('
|
|
32
|
-
await
|
|
33
|
-
|
|
34
|
-
|
|
35
|
-
test('activityCliffsOpenWithEmptyRows', async () => {
|
|
24
|
+
test('activityCliffsWithEmptyRows', async () => {
|
|
25
|
+
actCliffsDfWithEmptyRows = await readDataframe('tests/sample_MSA_data_empty_vals.csv');
|
|
26
|
+
actCliffsTableViewWithEmptyRows = grok.shell.addTableView(actCliffsDfWithEmptyRows);
|
|
36
27
|
await _testActivityCliffsOpen(actCliffsDfWithEmptyRows, 103, 'UMAP', 'MSA');
|
|
28
|
+
grok.shell.closeTable(actCliffsDfWithEmptyRows);
|
|
29
|
+
actCliffsTableViewWithEmptyRows.close();
|
|
37
30
|
});
|
|
38
|
-
|
|
39
31
|
});
|
|
@@ -2,10 +2,11 @@ import * as DG from 'datagrok-api/dg';
|
|
|
2
2
|
import {delay, expect} from '@datagrok-libraries/utils/src/test';
|
|
3
3
|
import {_package} from '../package-test';
|
|
4
4
|
import { activityCliffs } from '../package';
|
|
5
|
-
|
|
5
|
+
import * as grok from 'datagrok-api/grok';
|
|
6
6
|
|
|
7
7
|
export async function _testActivityCliffsOpen(df: DG.DataFrame, numberCliffs: number, method: string, colName: string) {
|
|
8
|
-
|
|
8
|
+
await grok.data.detectSemanticTypes(df);
|
|
9
|
+
const scatterPlot = await activityCliffs(
|
|
9
10
|
df,
|
|
10
11
|
df.col(colName)!,
|
|
11
12
|
df.col('Activity')!,
|
|
@@ -10,26 +10,19 @@ category('sequenceSpace', async () => {
|
|
|
10
10
|
let testHelmWithEmptyRows: DG.DataFrame;
|
|
11
11
|
let testHelmWithEmptyRowsTableView: DG.TableView;
|
|
12
12
|
|
|
13
|
-
|
|
13
|
+
test('sequenceSpaceOpens', async () => {
|
|
14
14
|
testFastaDf = await readDataframe('samples/sample_FASTA.csv');
|
|
15
15
|
testFastaTableView = grok.shell.addTableView(testFastaDf);
|
|
16
|
-
|
|
17
|
-
testHelmWithEmptyRowsTableView = grok.shell.addTableView(testHelmWithEmptyRows);
|
|
18
|
-
});
|
|
19
|
-
|
|
20
|
-
after(async () => {
|
|
16
|
+
await _testSequenceSpaceReturnsResult(testFastaDf, 'UMAP', 'Sequence');
|
|
21
17
|
grok.shell.closeTable(testFastaDf);
|
|
22
18
|
testFastaTableView.close();
|
|
23
|
-
grok.shell.closeTable(testHelmWithEmptyRows);
|
|
24
|
-
testHelmWithEmptyRowsTableView.close();
|
|
25
19
|
});
|
|
26
20
|
|
|
27
|
-
test('
|
|
28
|
-
await
|
|
29
|
-
|
|
30
|
-
|
|
31
|
-
test('sequenceSpaceOpensWithEmptyRows', async () => {
|
|
21
|
+
test('sequenceSpaceWithEmptyRows', async () => {
|
|
22
|
+
testHelmWithEmptyRows = await readDataframe('data/sample_HELM_empty_vals.csv');
|
|
23
|
+
testHelmWithEmptyRowsTableView = grok.shell.addTableView(testHelmWithEmptyRows);
|
|
32
24
|
await _testSequenceSpaceReturnsResult(testHelmWithEmptyRows, 'UMAP', 'HELM');
|
|
25
|
+
grok.shell.closeTable(testHelmWithEmptyRows);
|
|
26
|
+
testHelmWithEmptyRowsTableView.close();
|
|
33
27
|
});
|
|
34
|
-
|
|
35
28
|
});
|
|
@@ -0,0 +1,78 @@
|
|
|
1
|
+
import {after, before, category, test, expect, delay} from '@datagrok-libraries/utils/src/test';
|
|
2
|
+
import * as DG from 'datagrok-api/dg';
|
|
3
|
+
import {createTableView, readDataframe} from './utils';
|
|
4
|
+
import * as grok from 'datagrok-api/grok';
|
|
5
|
+
import {SequenceSimilarityViewer} from '../analysis/sequence-similarity-viewer';
|
|
6
|
+
|
|
7
|
+
category('similarity/diversity', async () => {
|
|
8
|
+
test('similaritySearchViewer', async () => {
|
|
9
|
+
await _testSimilaritySearchViewer();
|
|
10
|
+
});
|
|
11
|
+
test('diversitySearchViewer', async () => {
|
|
12
|
+
await _testDiversitySearchViewer();
|
|
13
|
+
});
|
|
14
|
+
});
|
|
15
|
+
|
|
16
|
+
async function _testSimilaritySearchViewer() {
|
|
17
|
+
const molecules = await createTableView('samples/sample_MSA.csv');
|
|
18
|
+
const viewer = molecules.addViewer('SequenceSimilaritySearchViewer');
|
|
19
|
+
await delay(100);
|
|
20
|
+
const similaritySearchviewer = getSearchViewer(viewer, 'SequenceSimilaritySearchViewer');
|
|
21
|
+
if (!similaritySearchviewer.molCol)
|
|
22
|
+
await waitForCompute(similaritySearchviewer);
|
|
23
|
+
expect(similaritySearchviewer.fingerprint, 'Morgan');
|
|
24
|
+
expect(similaritySearchviewer.distanceMetric, 'Tanimoto');
|
|
25
|
+
expect(similaritySearchviewer.scores!.get(0), DG.FLOAT_NULL);
|
|
26
|
+
expect(similaritySearchviewer.idxs!.get(0), 0);
|
|
27
|
+
expect(similaritySearchviewer.molCol!.get(0),
|
|
28
|
+
'meI/hHis/Aca/N/T/dE/Thr_PO3H2/Aca/D-Tyr_Et/Tyr_ab-dehydroMe/dV/E/N/D-Orn/D-aThr//Phe_4Me');
|
|
29
|
+
expect(similaritySearchviewer.scores!.get(1), 0.6603773832321167);
|
|
30
|
+
expect(similaritySearchviewer.idxs!.get(1), 100);
|
|
31
|
+
expect(similaritySearchviewer.molCol!.get(1),
|
|
32
|
+
'meI/hHis/Aca/N/T/W/Thr_PO3H2/Aca/D-Tyr_Et/Tyr_ab-dehydroMe/dV/E/N/D-Orn/D-aThr//Phe_4Me');
|
|
33
|
+
molecules.dataFrame.currentRowIdx = 1;
|
|
34
|
+
await delay(100);
|
|
35
|
+
await waitForCompute(similaritySearchviewer);
|
|
36
|
+
expect(similaritySearchviewer.targetMoleculeIdx, 1);
|
|
37
|
+
expect(similaritySearchviewer.molCol!.get(0),
|
|
38
|
+
'meI/hHis/Aca/Cys_SEt/T/dK/Thr_PO3H2/Aca/Tyr_PO3H2/D-Chg/dV/Phe_ab-dehydro/N/D-Orn/D-aThr//Phe_4Me');
|
|
39
|
+
similaritySearchviewer.close();
|
|
40
|
+
molecules.close();
|
|
41
|
+
}
|
|
42
|
+
|
|
43
|
+
|
|
44
|
+
async function _testDiversitySearchViewer() {
|
|
45
|
+
const molecules = await createTableView('samples/sample_MSA.csv');
|
|
46
|
+
const viewer = molecules.addViewer('SequenceDiversitySearchViewer');
|
|
47
|
+
await delay(10);
|
|
48
|
+
const diversitySearchviewer = getSearchViewer(viewer, 'SequenceDiversitySearchViewer');
|
|
49
|
+
if (!diversitySearchviewer.renderMolIds)
|
|
50
|
+
await waitForCompute(diversitySearchviewer);
|
|
51
|
+
expect(diversitySearchviewer.fingerprint, 'Morgan');
|
|
52
|
+
expect(diversitySearchviewer.distanceMetric, 'Tanimoto');
|
|
53
|
+
expect(diversitySearchviewer.initialized, true);
|
|
54
|
+
expect(diversitySearchviewer.renderMolIds.length > 0, true);
|
|
55
|
+
diversitySearchviewer.close();
|
|
56
|
+
molecules.close();
|
|
57
|
+
}
|
|
58
|
+
|
|
59
|
+
function getSearchViewer(viewer: DG.Viewer, name: string) {
|
|
60
|
+
for (const v of viewer.view.viewers) {
|
|
61
|
+
if (v.type === name)
|
|
62
|
+
return v;
|
|
63
|
+
}
|
|
64
|
+
}
|
|
65
|
+
|
|
66
|
+
async function waitForCompute(viewer: SequenceSimilarityViewer) {
|
|
67
|
+
const t = new Promise((resolve, reject) => {
|
|
68
|
+
viewer.computeCompleted.subscribe(async (_: any) => {
|
|
69
|
+
try {
|
|
70
|
+
console.log(`!@#$%^&*()`+viewer.targetMoleculeIdx);
|
|
71
|
+
resolve(true);
|
|
72
|
+
} catch (error) {
|
|
73
|
+
reject(error);
|
|
74
|
+
}
|
|
75
|
+
});
|
|
76
|
+
});
|
|
77
|
+
await t;
|
|
78
|
+
}
|