@datagrok/bio 2.0.13 → 2.0.17
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/package-test.js +2178 -1236
- package/dist/package.js +4196 -3513
- package/dist/vendors-node_modules_datagrok-libraries_ml_src_workers_dimensionality-reducer_js.js +135 -152
- package/files/data/sample_HELM_empty_vals.csv +537 -537
- package/files/samples/sample_HELM.csv +540 -540
- package/package.json +22 -15
- package/scripts/read-tree-pkl.py +215 -0
- package/src/__jest__/remote.test.ts +6 -4
- package/src/__jest__/test-node.ts +1 -1
- package/src/{utils → analysis}/sequence-activity-cliffs.ts +33 -30
- package/src/analysis/sequence-diversity-viewer.ts +48 -0
- package/src/analysis/sequence-search-base-viewer.ts +81 -0
- package/src/analysis/sequence-similarity-viewer.ts +107 -0
- package/src/{utils → analysis}/sequence-space.ts +0 -0
- package/src/calculations/monomerLevelMols.ts +64 -20
- package/src/package-test.ts +3 -1
- package/src/package.ts +64 -3
- package/src/substructure-search/substructure-search.ts +65 -0
- package/src/tests/WebLogo-positions-test.ts +5 -4
- package/src/tests/WebLogo-test.ts +6 -6
- package/src/tests/activity-cliffs-tests.ts +8 -16
- package/src/tests/activity-cliffs-utils.ts +3 -2
- package/src/tests/convert-test.ts +5 -3
- package/src/tests/fasta-export-tests.ts +110 -0
- package/src/tests/sequence-space-test.ts +7 -14
- package/src/tests/similarity-diversity-tests.ts +78 -0
- package/src/tests/splitters-test.ts +13 -0
- package/src/utils/cell-renderer.ts +81 -68
- package/src/utils/convert.ts +1 -1
- package/src/utils/multiple-sequence-alignment.ts +4 -5
- package/src/utils/save-as-fasta.ts +109 -0
- package/src/utils/ui-utils.ts +4 -0
- package/src/viewers/vd-regions-viewer.ts +6 -3
- package/src/widgets/representations.ts +58 -0
- package/test-Bio-7770371320b2-f8f97c6e.html +0 -374
|
@@ -0,0 +1,107 @@
|
|
|
1
|
+
import * as grok from 'datagrok-api/grok';
|
|
2
|
+
import * as ui from 'datagrok-api/ui';
|
|
3
|
+
import * as DG from 'datagrok-api/dg';
|
|
4
|
+
import {SequenceSearchBaseViewer} from './sequence-search-base-viewer';
|
|
5
|
+
import {getMonomericMols} from '../calculations/monomerLevelMols';
|
|
6
|
+
import * as C from '../utils/constants';
|
|
7
|
+
import {createDifferenceCanvas, createDifferencesWithPositions} from './sequence-activity-cliffs';
|
|
8
|
+
import {updateDivInnerHTML} from '../utils/ui-utils';
|
|
9
|
+
import {WebLogo} from '@datagrok-libraries/bio/src/viewers/web-logo';
|
|
10
|
+
import {TableView} from 'datagrok-api/dg';
|
|
11
|
+
import { Subject } from 'rxjs';
|
|
12
|
+
|
|
13
|
+
export class SequenceSimilarityViewer extends SequenceSearchBaseViewer {
|
|
14
|
+
hotSearch: boolean;
|
|
15
|
+
sketchedMolecule: string = '';
|
|
16
|
+
curIdx: number = 0;
|
|
17
|
+
molCol: DG.Column | null = null;
|
|
18
|
+
idxs: DG.Column | null = null;
|
|
19
|
+
scores: DG.Column | null = null;
|
|
20
|
+
cutoff: number;
|
|
21
|
+
gridSelect: boolean = false;
|
|
22
|
+
targetMoleculeIdx: number = 0;
|
|
23
|
+
computeCompleted = new Subject<boolean>();
|
|
24
|
+
|
|
25
|
+
constructor() {
|
|
26
|
+
super('similarity');
|
|
27
|
+
this.cutoff = this.float('cutoff', 0.01, {min: 0, max: 1});
|
|
28
|
+
this.hotSearch = this.bool('hotSearch', true);
|
|
29
|
+
}
|
|
30
|
+
|
|
31
|
+
init(): void {
|
|
32
|
+
this.hotSearch = true;
|
|
33
|
+
this.initialized = true;
|
|
34
|
+
}
|
|
35
|
+
|
|
36
|
+
async render(computeData = true): Promise<void> {
|
|
37
|
+
if (!this.beforeRender())
|
|
38
|
+
return;
|
|
39
|
+
if (this.moleculeColumn) {
|
|
40
|
+
this.curIdx = this.dataFrame!.currentRowIdx == -1 ? 0 : this.dataFrame!.currentRowIdx;
|
|
41
|
+
if (computeData && !this.gridSelect) {
|
|
42
|
+
this.targetMoleculeIdx = this.dataFrame!.currentRowIdx == -1 ? 0 : this.dataFrame!.currentRowIdx;
|
|
43
|
+
const monomericMols = await getMonomericMols(this.moleculeColumn);
|
|
44
|
+
//need to create df to calculate fingerprints
|
|
45
|
+
const monomericMolsDf = DG.DataFrame.fromColumns([monomericMols]);
|
|
46
|
+
const df = await grok.functions.call('Chem:callChemSimilaritySearch', {
|
|
47
|
+
df: this.dataFrame,
|
|
48
|
+
col: monomericMols,
|
|
49
|
+
molecule: monomericMols.get(this.targetMoleculeIdx),
|
|
50
|
+
metricName: this.distanceMetric,
|
|
51
|
+
limit: this.limit,
|
|
52
|
+
minScore: this.cutoff,
|
|
53
|
+
fingerprint: this.fingerprint
|
|
54
|
+
});
|
|
55
|
+
this.idxs = df.getCol('indexes');
|
|
56
|
+
this.scores = df.getCol('score');
|
|
57
|
+
this.molCol = DG.Column.string('sequence',
|
|
58
|
+
this.idxs!.length).init((i) => this.moleculeColumn?.get(this.idxs?.get(i)));
|
|
59
|
+
this.molCol.semType = DG.SEMTYPE.MACROMOLECULE;
|
|
60
|
+
this.tags.forEach((tag) => this.molCol!.setTag(tag, this.moleculeColumn!.getTag(tag)));
|
|
61
|
+
const resDf = DG.DataFrame.fromColumns([this.idxs!, this.molCol!, this.scores!]);
|
|
62
|
+
resDf.onCurrentRowChanged.subscribe((_) => {
|
|
63
|
+
this.dataFrame.currentRowIdx = resDf.col('indexes')!.get(resDf.currentRowIdx);
|
|
64
|
+
this.createPropertyPanel(resDf);
|
|
65
|
+
this.gridSelect = true;
|
|
66
|
+
});
|
|
67
|
+
const grid = resDf.plot.grid();
|
|
68
|
+
grid.col('indexes')!.visible = false;
|
|
69
|
+
const targetMolRow = this.idxs?.getRawData().findIndex((it) => it == this.targetMoleculeIdx);
|
|
70
|
+
const targetScoreCell = grid.cell('score', targetMolRow!);
|
|
71
|
+
targetScoreCell.cell.value = null;
|
|
72
|
+
(grok.shell.v as TableView).grid.root.addEventListener('click', (event: MouseEvent) => {
|
|
73
|
+
this.gridSelect = false;
|
|
74
|
+
});
|
|
75
|
+
updateDivInnerHTML(this.root, grid.root);
|
|
76
|
+
this.computeCompleted.next(true);
|
|
77
|
+
}
|
|
78
|
+
}
|
|
79
|
+
}
|
|
80
|
+
|
|
81
|
+
|
|
82
|
+
createPropertyPanel(resDf: DG.DataFrame) {
|
|
83
|
+
const propPanel = ui.div();
|
|
84
|
+
const molDifferences: { [key: number]: HTMLCanvasElement } = {};
|
|
85
|
+
const units = resDf.col('sequence')!.getTag(DG.TAGS.UNITS);
|
|
86
|
+
const separator = resDf.col('sequence')!.getTag(C.TAGS.SEPARATOR);
|
|
87
|
+
const splitter = WebLogo.getSplitter(units, separator);
|
|
88
|
+
const subParts1 = splitter(this.moleculeColumn!.get(this.targetMoleculeIdx));
|
|
89
|
+
const subParts2 = splitter(resDf.get('sequence', resDf.currentRowIdx));
|
|
90
|
+
const canvas = createDifferenceCanvas(subParts1, subParts2, units, molDifferences);
|
|
91
|
+
propPanel.append(ui.div(canvas, {style: {width: '300px', overflow: 'scroll'}}));
|
|
92
|
+
if (subParts1.length !== subParts2.length) {
|
|
93
|
+
propPanel.append(ui.divV([
|
|
94
|
+
ui.divText(`Different sequence length:`, {style: {fontWeight: 'bold'}}),
|
|
95
|
+
ui.divText(`target: ${subParts1.length} monomers`),
|
|
96
|
+
ui.divText(`selected: ${subParts2.length} monomers`)
|
|
97
|
+
], {style: {paddingBottom: '10px'}}));
|
|
98
|
+
}
|
|
99
|
+
propPanel.append(createDifferencesWithPositions(molDifferences));
|
|
100
|
+
const acc = ui.accordion();
|
|
101
|
+
const accIcon = ui.element('i');
|
|
102
|
+
accIcon.className = 'grok-icon svg-icon svg-view-layout';
|
|
103
|
+
acc.addTitle(ui.span([accIcon, ui.label(`Similarity search`)]));
|
|
104
|
+
acc.addPane('Differeces', () => propPanel, true);
|
|
105
|
+
grok.shell.o = acc.root;
|
|
106
|
+
}
|
|
107
|
+
}
|
|
File without changes
|
|
@@ -1,25 +1,69 @@
|
|
|
1
1
|
import * as grok from 'datagrok-api/grok';
|
|
2
|
-
import * as ui from 'datagrok-api/ui';
|
|
3
2
|
import * as DG from 'datagrok-api/dg';
|
|
4
|
-
|
|
5
|
-
import {
|
|
3
|
+
import * as C from '../utils/constants';
|
|
4
|
+
import {getHelmMonomers} from '../package';
|
|
5
|
+
import {WebLogo} from '@datagrok-libraries/bio/src/viewers/web-logo';
|
|
6
6
|
|
|
7
7
|
const V2000_ATOM_NAME_POS = 31;
|
|
8
8
|
|
|
9
9
|
export async function getMonomericMols(mcol: DG.Column, pattern: boolean = false): Promise<DG.Column> {
|
|
10
|
-
const
|
|
11
|
-
|
|
10
|
+
const separator: string = mcol.tags[C.TAGS.SEPARATOR];
|
|
11
|
+
const units: string = mcol.tags[DG.TAGS.UNITS];
|
|
12
|
+
const splitter = WebLogo.getSplitter(units, separator);
|
|
13
|
+
let molV3000Array;
|
|
14
|
+
const monomersDict = new Map();
|
|
15
|
+
const monomers = units === 'helm' ?
|
|
16
|
+
getHelmMonomers(mcol) : Object.keys(WebLogo.getStats(mcol, 0, splitter).freq).filter((it) => it !== '');
|
|
17
|
+
|
|
18
|
+
for (let i = 0; i < monomers.length; i++)
|
|
19
|
+
monomersDict.set(monomers[i], `${i + 1}`);
|
|
20
|
+
|
|
21
|
+
if (units === 'helm') {
|
|
22
|
+
molV3000Array = await grok.functions.call('HELM:getMolFiles', {col: mcol});
|
|
23
|
+
molV3000Array = changeV2000ToV3000(molV3000Array, monomersDict, pattern);
|
|
24
|
+
} else {
|
|
25
|
+
molV3000Array = new Array<string>(mcol.length);
|
|
26
|
+
for (let i = 0; i < mcol.length; i++) {
|
|
27
|
+
const sequenceMonomers = splitter(mcol.get(i)).filter((it) => it !== '');
|
|
28
|
+
const molV3000 = molV3000FromNonHelmSequence(sequenceMonomers, monomersDict, pattern);
|
|
29
|
+
molV3000Array[i] = molV3000;
|
|
30
|
+
}
|
|
31
|
+
}
|
|
32
|
+
return DG.Column.fromStrings('monomericMols', molV3000Array);
|
|
33
|
+
}
|
|
34
|
+
|
|
35
|
+
function molV3000FromNonHelmSequence(
|
|
36
|
+
monomers: Array<string>, monomersDict: Map<string, string>, pattern: boolean = false) {
|
|
37
|
+
let molV3000 = `
|
|
38
|
+
Datagrok macromolecule handler
|
|
39
|
+
|
|
40
|
+
0 0 0 0 0 0 999 V3000
|
|
41
|
+
M V30 BEGIN CTAB
|
|
42
|
+
`;
|
|
43
|
+
|
|
44
|
+
molV3000 += `M V30 COUNTS ${monomers.length} ${monomers.length - 1} 0 0 0\n`;
|
|
45
|
+
molV3000 += 'M V30 BEGIN ATOM\n';
|
|
12
46
|
|
|
13
|
-
let
|
|
14
|
-
|
|
15
|
-
|
|
47
|
+
for (let atomRowI = 0; atomRowI < monomers.length; atomRowI++) {
|
|
48
|
+
molV3000 += pattern ?
|
|
49
|
+
`M V30 ${atomRowI + 1} R${monomersDict.get(monomers[atomRowI])} 0.000 0.000 0 0\n` :
|
|
50
|
+
`M V30 ${atomRowI + 1} At 0.000 0.000 0 0 MASS=${monomersDict.get(monomers[atomRowI])}\n`;
|
|
51
|
+
}
|
|
52
|
+
|
|
53
|
+
molV3000 += 'M V30 END ATOM\n';
|
|
54
|
+
molV3000 += 'M V30 BEGIN BOND\n';
|
|
16
55
|
|
|
17
|
-
|
|
56
|
+
for (let bondRowI = 0; bondRowI < monomers.length - 1; bondRowI++)
|
|
57
|
+
molV3000 += `M V30 ${bondRowI + 1} 1 ${bondRowI + 1} ${bondRowI + 2}\n`;
|
|
18
58
|
|
|
19
|
-
|
|
59
|
+
molV3000 += 'M V30 END BOND\n';
|
|
60
|
+
molV3000 += 'M V30 END CTAB\n';
|
|
61
|
+
molV3000 += 'M END';
|
|
62
|
+
return molV3000;
|
|
20
63
|
}
|
|
21
64
|
|
|
22
|
-
function
|
|
65
|
+
function changeV2000ToV3000(mols: DG.Column, dict: Map<string, string>, pattern: boolean = false): Array<string> {
|
|
66
|
+
const molsArray = new Array<string>(mols.length);
|
|
23
67
|
for (let i = 0; i < mols.length; i++) {
|
|
24
68
|
let curPos = 0;
|
|
25
69
|
let endPos = 0;
|
|
@@ -30,7 +74,7 @@ function changeToV3000(mols: Array<string>, dict: Map<string, string>, pattern:
|
|
|
30
74
|
M V30 BEGIN CTAB
|
|
31
75
|
`;
|
|
32
76
|
|
|
33
|
-
const mol = mols
|
|
77
|
+
const mol = mols.get(i);
|
|
34
78
|
curPos = mol.indexOf('\n', curPos) + 1;
|
|
35
79
|
curPos = mol.indexOf('\n', curPos) + 1;
|
|
36
80
|
curPos = mol.indexOf('\n', curPos) + 1;
|
|
@@ -45,10 +89,10 @@ M V30 BEGIN CTAB
|
|
|
45
89
|
curPos = mol.indexOf('\n', curPos) + 1 + V2000_ATOM_NAME_POS;
|
|
46
90
|
endPos = mol.indexOf(' ', curPos);
|
|
47
91
|
const monomerName: string = mol.substring(curPos, endPos);
|
|
48
|
-
molV3000 += pattern ?
|
|
49
|
-
|
|
50
|
-
|
|
51
|
-
}
|
|
92
|
+
molV3000 += pattern ?
|
|
93
|
+
`M V30 ${atomRowI + 1} R${dict.get(monomerName)} 0.000 0.000 0 0\n` :
|
|
94
|
+
`M V30 ${atomRowI + 1} At 0.000 0.000 0 0 MASS=${dict.get(monomerName)}\n`;
|
|
95
|
+
}
|
|
52
96
|
|
|
53
97
|
molV3000 += 'M V30 END ATOM\n';
|
|
54
98
|
molV3000 += 'M V30 BEGIN BOND\n';
|
|
@@ -60,13 +104,13 @@ M V30 BEGIN CTAB
|
|
|
60
104
|
const order = parseInt(mol.substring(curPos + 6, curPos + 9).trim());
|
|
61
105
|
|
|
62
106
|
molV3000 += `M V30 ${bondRowI + 1} ${order} ${firstMonomer} ${secondMonomer}\n`;
|
|
63
|
-
}
|
|
107
|
+
}
|
|
64
108
|
|
|
65
109
|
molV3000 += 'M V30 END BOND\n';
|
|
66
110
|
molV3000 += 'M V30 END CTAB\n';
|
|
67
111
|
molV3000 += 'M END';
|
|
68
|
-
|
|
112
|
+
molsArray[i] = molV3000;
|
|
69
113
|
}
|
|
70
114
|
|
|
71
|
-
return
|
|
72
|
-
}
|
|
115
|
+
return molsArray;
|
|
116
|
+
}
|
package/src/package-test.ts
CHANGED
|
@@ -12,8 +12,10 @@ import './tests/splitters-test';
|
|
|
12
12
|
import './tests/renderers-test';
|
|
13
13
|
import './tests/convert-test';
|
|
14
14
|
import './tests/fasta-handler-test';
|
|
15
|
+
import './tests/fasta-export-tests';
|
|
15
16
|
import './tests/WebLogo-positions-test';
|
|
16
|
-
import './tests/checkInputColumn-tests'
|
|
17
|
+
import './tests/checkInputColumn-tests';
|
|
18
|
+
import './tests/similarity-diversity-tests';
|
|
17
19
|
|
|
18
20
|
export const _package = new DG.Package();
|
|
19
21
|
export {tests};
|
package/src/package.ts
CHANGED
|
@@ -12,15 +12,15 @@ import {runKalign, testMSAEnoughMemory} from './utils/multiple-sequence-alignmen
|
|
|
12
12
|
import {SequenceAlignment, Aligned} from './seq_align';
|
|
13
13
|
import {Nucleotides} from '@datagrok-libraries/bio/src/nucleotides';
|
|
14
14
|
import {Aminoacids} from '@datagrok-libraries/bio/src/aminoacids';
|
|
15
|
-
import {getEmbeddingColsNames, sequenceSpace} from './
|
|
15
|
+
import {getEmbeddingColsNames, sequenceSpace} from './analysis/sequence-space';
|
|
16
16
|
import {AvailableMetrics} from '@datagrok-libraries/ml/src/typed-metrics';
|
|
17
17
|
import {getActivityCliffs} from '@datagrok-libraries/ml/src/viewers/activity-cliffs';
|
|
18
|
-
import {createPropPanelElement, createTooltipElement, getSimilaritiesMarix} from './
|
|
18
|
+
import {createPropPanelElement, createTooltipElement, getSimilaritiesMarix} from './analysis/sequence-activity-cliffs';
|
|
19
19
|
import {createJsonMonomerLibFromSdf, encodeMonomers, getMolfilesFromSeq, HELM_CORE_LIB_FILENAME} from './utils/utils';
|
|
20
20
|
import {getMacroMol} from './utils/atomic-works';
|
|
21
21
|
import {MacromoleculeSequenceCellRenderer} from './utils/cell-renderer';
|
|
22
22
|
import {convert} from './utils/convert';
|
|
23
|
-
import {representationsWidget} from './widgets/representations';
|
|
23
|
+
import {getMacroMolColumnPropertyPanel, representationsWidget} from './widgets/representations';
|
|
24
24
|
import {UnitsHandler} from '@datagrok-libraries/bio/src/utils/units-handler';
|
|
25
25
|
import {FastaFileHandler} from '@datagrok-libraries/bio/src/utils/fasta-handler';
|
|
26
26
|
import {removeEmptyStringRows} from '@datagrok-libraries/utils/src/dataframe-utils';
|
|
@@ -32,6 +32,10 @@ import {
|
|
|
32
32
|
|
|
33
33
|
import {splitAlignedSequences} from '@datagrok-libraries/bio/src/utils/splitter';
|
|
34
34
|
import * as C from './utils/constants';
|
|
35
|
+
import {SequenceSimilarityViewer} from './analysis/sequence-similarity-viewer';
|
|
36
|
+
import {SequenceDiversityViewer} from './analysis/sequence-diversity-viewer';
|
|
37
|
+
import {substructureSearchDialog} from './substructure-search/substructure-search';
|
|
38
|
+
import {saveAsFastaUI} from './utils/save-as-fasta';
|
|
35
39
|
|
|
36
40
|
//tags: init
|
|
37
41
|
export async function initBio() {
|
|
@@ -46,6 +50,14 @@ export function fastaSequenceCellRenderer(): MacromoleculeSequenceCellRenderer {
|
|
|
46
50
|
return new MacromoleculeSequenceCellRenderer();
|
|
47
51
|
}
|
|
48
52
|
|
|
53
|
+
//name: Sequence Renderer
|
|
54
|
+
//input: column molColumn {semType: Macromolecule}
|
|
55
|
+
//tags: panel
|
|
56
|
+
//output: widget result
|
|
57
|
+
export function macroMolColumnPropertyPanel(molColumn: DG.Column): DG.Widget {
|
|
58
|
+
return getMacroMolColumnPropertyPanel(molColumn);
|
|
59
|
+
}
|
|
60
|
+
|
|
49
61
|
//name: separatorSequenceCellRenderer
|
|
50
62
|
//tags: cellRenderer
|
|
51
63
|
//meta.cellType: sequence
|
|
@@ -462,3 +474,52 @@ export function getHelmMonomers(seqCol: DG.Column<string>): string[] {
|
|
|
462
474
|
const stats = WebLogo.getStats(seqCol, 1, WebLogo.splitterAsHelm);
|
|
463
475
|
return Object.keys(stats.freq);
|
|
464
476
|
}
|
|
477
|
+
|
|
478
|
+
|
|
479
|
+
//name: SequenceSimilaritySearchViewer
|
|
480
|
+
//tags: viewer
|
|
481
|
+
//output: viewer result
|
|
482
|
+
export function similaritySearchViewer(): SequenceSimilarityViewer {
|
|
483
|
+
return new SequenceSimilarityViewer();
|
|
484
|
+
}
|
|
485
|
+
|
|
486
|
+
//top-menu: Bio | Similarity Search...
|
|
487
|
+
//name: similaritySearch
|
|
488
|
+
//description: finds the most similar sequence
|
|
489
|
+
//output: viewer result
|
|
490
|
+
export function similaritySearchTopMenu(): void {
|
|
491
|
+
const view = (grok.shell.v as DG.TableView);
|
|
492
|
+
const viewer = view.addViewer('SequenceSimilaritySearchViewer');
|
|
493
|
+
view.dockManager.dock(viewer, 'down');
|
|
494
|
+
}
|
|
495
|
+
|
|
496
|
+
//name: SequenceDiversitySearchViewer
|
|
497
|
+
//tags: viewer
|
|
498
|
+
//output: viewer result
|
|
499
|
+
export function diversitySearchViewer(): SequenceDiversityViewer {
|
|
500
|
+
return new SequenceDiversityViewer();
|
|
501
|
+
}
|
|
502
|
+
|
|
503
|
+
//top-menu: Bio | Diversity Search...
|
|
504
|
+
//name: diversitySearch
|
|
505
|
+
//description: finds the most diverse molecules
|
|
506
|
+
//output: viewer result
|
|
507
|
+
export function diversitySearchTopMenu() {
|
|
508
|
+
const view = (grok.shell.v as DG.TableView);
|
|
509
|
+
const viewer = view.addViewer('SequenceDiversitySearchViewer');
|
|
510
|
+
view.dockManager.dock(viewer, 'down');
|
|
511
|
+
}
|
|
512
|
+
|
|
513
|
+
//name: Bio | Substructure search ...
|
|
514
|
+
//tags: panel, bio
|
|
515
|
+
//input: column col {semType: Macromolecule}
|
|
516
|
+
export function bioSubstructureSearch(col: DG.Column): void {
|
|
517
|
+
substructureSearchDialog(col);
|
|
518
|
+
}
|
|
519
|
+
|
|
520
|
+
//name: saveAsFasta
|
|
521
|
+
//description: As FASTA...
|
|
522
|
+
//tags: fileExporter
|
|
523
|
+
export function saveAsFasta() {
|
|
524
|
+
saveAsFastaUI();
|
|
525
|
+
}
|
|
@@ -0,0 +1,65 @@
|
|
|
1
|
+
import * as grok from 'datagrok-api/grok';
|
|
2
|
+
import * as ui from 'datagrok-api/ui';
|
|
3
|
+
import * as DG from 'datagrok-api/dg';
|
|
4
|
+
import {NOTATION} from '@datagrok-libraries/bio/src/utils/units-handler';
|
|
5
|
+
import * as C from '../utils/constants';
|
|
6
|
+
|
|
7
|
+
/**
|
|
8
|
+
* Searches substructure in each row of Macromolecule column
|
|
9
|
+
*
|
|
10
|
+
* @param {DG.column} col Column with 'Macromolecule' semantic type
|
|
11
|
+
*/
|
|
12
|
+
export function substructureSearchDialog(col: DG.Column): void {
|
|
13
|
+
const units = col.getTag(DG.TAGS.UNITS);
|
|
14
|
+
const separator = col.getTag(C.TAGS.SEPARATOR);
|
|
15
|
+
const notations = [NOTATION.FASTA, NOTATION.SEPARATOR];
|
|
16
|
+
|
|
17
|
+
const substructureInput = ui.textInput('Substructure', '');
|
|
18
|
+
const notationInput = ui.choiceInput('Notation', units, notations);
|
|
19
|
+
const separatorInput = ui.textInput('Separator', separator);
|
|
20
|
+
|
|
21
|
+
// hide the separator input for non-SEPARATOR target notations
|
|
22
|
+
const toggleSeparator = () => {
|
|
23
|
+
if (notationInput.value !== NOTATION.SEPARATOR)
|
|
24
|
+
separatorInput.root.hidden = true;
|
|
25
|
+
else
|
|
26
|
+
separatorInput.root.hidden = false;
|
|
27
|
+
};
|
|
28
|
+
|
|
29
|
+
toggleSeparator();
|
|
30
|
+
|
|
31
|
+
notationInput.onChanged(() => {
|
|
32
|
+
toggleSeparator();
|
|
33
|
+
});
|
|
34
|
+
|
|
35
|
+
ui.dialog('Substructure search')
|
|
36
|
+
.add(ui.inputs([
|
|
37
|
+
substructureInput,
|
|
38
|
+
notationInput,
|
|
39
|
+
separatorInput
|
|
40
|
+
]))
|
|
41
|
+
.onOK(() => {
|
|
42
|
+
let substructure = substructureInput.value;
|
|
43
|
+
if (notationInput.value !== NOTATION.FASTA && separatorInput.value !== separator)
|
|
44
|
+
substructure = substructure.replaceAll(separatorInput.value, separator);
|
|
45
|
+
const matchesColName = `Matches: ${substructure}`;
|
|
46
|
+
const colExists = col.dataFrame.columns.names()
|
|
47
|
+
.filter((it) => it.toLocaleLowerCase() === matchesColName.toLocaleLowerCase()).length > 0;
|
|
48
|
+
if (!colExists) {
|
|
49
|
+
const matches = substructureSearch(substructure, col);
|
|
50
|
+
col.dataFrame.columns.add(DG.Column.fromBitSet(matchesColName, matches));
|
|
51
|
+
} else { grok.shell.warning(`Search ${substructure} is already performed`); }
|
|
52
|
+
})
|
|
53
|
+
.show();
|
|
54
|
+
}
|
|
55
|
+
|
|
56
|
+
export function substructureSearch(substructure: string, col: DG.Column): DG.BitSet {
|
|
57
|
+
const lowerCaseSubstr = substructure.toLowerCase();
|
|
58
|
+
const resultArray = DG.BitSet.create(col.length);
|
|
59
|
+
for (let i = 0; i < col.length; i++) {
|
|
60
|
+
const macromolecule = col.get(i).toLowerCase();
|
|
61
|
+
if (macromolecule.indexOf(lowerCaseSubstr) !== -1)
|
|
62
|
+
resultArray.set(i, true, false);
|
|
63
|
+
}
|
|
64
|
+
return resultArray;
|
|
65
|
+
}
|
|
@@ -41,7 +41,7 @@ ATC-G-TTGC--
|
|
|
41
41
|
seqCol.setTag(DG.TAGS.UNITS, NOTATION.FASTA);
|
|
42
42
|
seqCol.setTag(UnitsHandler.TAGS.alphabet, ALPHABET.DNA);
|
|
43
43
|
|
|
44
|
-
const wlViewer: WebLogo = await df.plot.fromType('WebLogo') as
|
|
44
|
+
const wlViewer: WebLogo = (await df.plot.fromType('WebLogo')) as WebLogo;
|
|
45
45
|
tv.dockManager.dock(wlViewer.root, DG.DOCK_TYPE.DOWN);
|
|
46
46
|
|
|
47
47
|
tvList.push(tv);
|
|
@@ -94,8 +94,8 @@ ATC-G-TTGC--
|
|
|
94
94
|
return i > 2;
|
|
95
95
|
});
|
|
96
96
|
df.filter.fireChanged();
|
|
97
|
-
const wlViewer: WebLogo = await df.plot.fromType('WebLogo',
|
|
98
|
-
|
|
97
|
+
const wlViewer: WebLogo = (await df.plot.fromType('WebLogo',
|
|
98
|
+
{'shrinkEmptyTail': true})) as WebLogo;
|
|
99
99
|
tv.dockManager.dock(wlViewer.root, DG.DOCK_TYPE.DOWN);
|
|
100
100
|
|
|
101
101
|
tvList.push(tv);
|
|
@@ -134,7 +134,8 @@ ATC-G-TTGC--
|
|
|
134
134
|
seqCol.setTag(DG.TAGS.UNITS, NOTATION.FASTA);
|
|
135
135
|
seqCol.setTag(UnitsHandler.TAGS.alphabet, ALPHABET.DNA);
|
|
136
136
|
|
|
137
|
-
const wlViewer: WebLogo = await df.plot.fromType('WebLogo',
|
|
137
|
+
const wlViewer: WebLogo = (await df.plot.fromType('WebLogo',
|
|
138
|
+
{'skipEmptyPositions': true})) as WebLogo;
|
|
138
139
|
tv.dockManager.dock(wlViewer.root, DG.DOCK_TYPE.DOWN);
|
|
139
140
|
|
|
140
141
|
tvList.push(tv);
|
|
@@ -61,21 +61,21 @@ XZJ{}2
|
|
|
61
61
|
test('testPickupPaletteX', async () => { await _testPickupPaletteX(csvDfX); });
|
|
62
62
|
});
|
|
63
63
|
|
|
64
|
-
category('WebLogo.
|
|
64
|
+
category('WebLogo.monomerToShort', () => {
|
|
65
65
|
test('longMonomerSingle', async () => {
|
|
66
|
-
await expect(WebLogo.
|
|
66
|
+
await expect(WebLogo.monomerToShort('S', 5), 'S');
|
|
67
67
|
});
|
|
68
68
|
test('longMonomerShort', async () => {
|
|
69
|
-
await expect(WebLogo.
|
|
69
|
+
await expect(WebLogo.monomerToShort('Short', 5), 'Short');
|
|
70
70
|
});
|
|
71
71
|
test('longMonomerLong56', async () => {
|
|
72
|
-
await expect(WebLogo.
|
|
72
|
+
await expect(WebLogo.monomerToShort('Long56', 5), 'Long5…');
|
|
73
73
|
});
|
|
74
74
|
test('longMonomerComplexFirstPartShort', async () => {
|
|
75
|
-
await expect(WebLogo.
|
|
75
|
+
await expect(WebLogo.monomerToShort('Long-long', 5), 'Long…');
|
|
76
76
|
});
|
|
77
77
|
test('longMonomerComplexFirstPartLong56', async () => {
|
|
78
|
-
await expect(WebLogo.
|
|
78
|
+
await expect(WebLogo.monomerToShort('Long56-long', 5), 'Long5…');
|
|
79
79
|
});
|
|
80
80
|
});
|
|
81
81
|
|
|
@@ -4,7 +4,7 @@ import * as DG from 'datagrok-api/dg';
|
|
|
4
4
|
import * as grok from 'datagrok-api/grok';
|
|
5
5
|
|
|
6
6
|
import {readDataframe} from './utils';
|
|
7
|
-
import {
|
|
7
|
+
import {_testActivityCliffsOpen} from './activity-cliffs-utils';
|
|
8
8
|
|
|
9
9
|
|
|
10
10
|
category('activityCliffs', async () => {
|
|
@@ -12,28 +12,20 @@ category('activityCliffs', async () => {
|
|
|
12
12
|
let actCliffsDf: DG.DataFrame;
|
|
13
13
|
let actCliffsTableViewWithEmptyRows: DG.TableView;
|
|
14
14
|
let actCliffsDfWithEmptyRows: DG.DataFrame;
|
|
15
|
-
|
|
16
15
|
|
|
17
|
-
|
|
16
|
+
test('activityCliffsOpens', async () => {
|
|
18
17
|
actCliffsDf = await readDataframe('tests/sample_MSA_data.csv');
|
|
19
18
|
actCliffsTableView = grok.shell.addTableView(actCliffsDf);
|
|
20
|
-
|
|
21
|
-
actCliffsTableViewWithEmptyRows = grok.shell.addTableView(actCliffsDfWithEmptyRows);
|
|
22
|
-
});
|
|
23
|
-
|
|
24
|
-
after(async () => {
|
|
19
|
+
await _testActivityCliffsOpen(actCliffsDf, 102, 'UMAP', 'MSA');
|
|
25
20
|
grok.shell.closeTable(actCliffsDf);
|
|
26
21
|
actCliffsTableView.close();
|
|
27
|
-
grok.shell.closeTable(actCliffsDfWithEmptyRows);
|
|
28
|
-
actCliffsTableViewWithEmptyRows.close();
|
|
29
22
|
});
|
|
30
23
|
|
|
31
|
-
test('
|
|
32
|
-
await
|
|
33
|
-
|
|
34
|
-
|
|
35
|
-
test('activityCliffsOpenWithEmptyRows', async () => {
|
|
24
|
+
test('activityCliffsWithEmptyRows', async () => {
|
|
25
|
+
actCliffsDfWithEmptyRows = await readDataframe('tests/sample_MSA_data_empty_vals.csv');
|
|
26
|
+
actCliffsTableViewWithEmptyRows = grok.shell.addTableView(actCliffsDfWithEmptyRows);
|
|
36
27
|
await _testActivityCliffsOpen(actCliffsDfWithEmptyRows, 103, 'UMAP', 'MSA');
|
|
28
|
+
grok.shell.closeTable(actCliffsDfWithEmptyRows);
|
|
29
|
+
actCliffsTableViewWithEmptyRows.close();
|
|
37
30
|
});
|
|
38
|
-
|
|
39
31
|
});
|
|
@@ -2,10 +2,11 @@ import * as DG from 'datagrok-api/dg';
|
|
|
2
2
|
import {delay, expect} from '@datagrok-libraries/utils/src/test';
|
|
3
3
|
import {_package} from '../package-test';
|
|
4
4
|
import { activityCliffs } from '../package';
|
|
5
|
-
|
|
5
|
+
import * as grok from 'datagrok-api/grok';
|
|
6
6
|
|
|
7
7
|
export async function _testActivityCliffsOpen(df: DG.DataFrame, numberCliffs: number, method: string, colName: string) {
|
|
8
|
-
|
|
8
|
+
await grok.data.detectSemanticTypes(df);
|
|
9
|
+
const scatterPlot = await activityCliffs(
|
|
9
10
|
df,
|
|
10
11
|
df.col(colName)!,
|
|
11
12
|
df.col('Activity')!,
|
|
@@ -1,7 +1,8 @@
|
|
|
1
|
-
import {category, expectArray, test} from '@datagrok-libraries/utils/src/test';
|
|
2
|
-
|
|
3
|
-
import * as grok from 'datagrok-api/grok';
|
|
4
1
|
import * as DG from 'datagrok-api/dg';
|
|
2
|
+
import * as ui from 'datagrok-api/ui';
|
|
3
|
+
import * as grok from 'datagrok-api/grok';
|
|
4
|
+
|
|
5
|
+
import {category, expect, expectArray, test} from '@datagrok-libraries/utils/src/test';
|
|
5
6
|
|
|
6
7
|
import {ConverterFunc} from './types';
|
|
7
8
|
import {NotationConverter} from '@datagrok-libraries/bio/src/utils/notation-converter';
|
|
@@ -139,6 +140,7 @@ RNA1{P.R(U)P.R(U)P.R(C)P.R(A)P.R(A)P.R(C)P.P.P}$$$
|
|
|
139
140
|
return function(srcCol: DG.Column): DG.Column {
|
|
140
141
|
const converter = new NotationConverter(srcCol);
|
|
141
142
|
const resCol = converter.convert(tgtNotation, tgtSeparator);
|
|
143
|
+
expect(resCol.getTag('units'), tgtNotation);
|
|
142
144
|
return resCol;
|
|
143
145
|
};
|
|
144
146
|
};
|
|
@@ -0,0 +1,110 @@
|
|
|
1
|
+
import * as DG from 'datagrok-api/dg';
|
|
2
|
+
import * as ui from 'datagrok-api/ui';
|
|
3
|
+
import * as grok from 'datagrok-api/grok';
|
|
4
|
+
|
|
5
|
+
import {category, expect, expectArray, test} from '@datagrok-libraries/utils/src/test';
|
|
6
|
+
import {saveAsFastaDo, wrapSequence} from '../utils/save-as-fasta';
|
|
7
|
+
import {WebLogo} from '@datagrok-libraries/bio/src/viewers/web-logo';
|
|
8
|
+
|
|
9
|
+
type SaveAsFastaTestArgs = { srcCsv: string, idCols: string [], seqCol: string, lineWidth: number, tgtFasta: string };
|
|
10
|
+
|
|
11
|
+
category('fastaExport', () => {
|
|
12
|
+
|
|
13
|
+
enum WrapDataTest {
|
|
14
|
+
single = 'single',
|
|
15
|
+
multi = 'multi'
|
|
16
|
+
}
|
|
17
|
+
|
|
18
|
+
const wrapData: { [key: string]: { src: string, tgt: string[] } } = {
|
|
19
|
+
[WrapDataTest.single]: {
|
|
20
|
+
src: 'MDYKETLLMPKTDFPMRGGLP',
|
|
21
|
+
tgt: ['MDYKETLLMP', 'KTDFPMRGGL', 'P'],
|
|
22
|
+
},
|
|
23
|
+
[WrapDataTest.multi]: {
|
|
24
|
+
src: 'M[MeI]YKETLL[MeF]PKTDFPMRGGL[MeA]',
|
|
25
|
+
tgt: ['M[MeI]YKETLL[MeF]P', 'KTDFPMRGGL', '[MeA]'],
|
|
26
|
+
},
|
|
27
|
+
};
|
|
28
|
+
|
|
29
|
+
enum SaveAsFastaTests {
|
|
30
|
+
test1 = 'test1',
|
|
31
|
+
test2 = 'test2'
|
|
32
|
+
}
|
|
33
|
+
|
|
34
|
+
const saveAsFastaData: {
|
|
35
|
+
[key: string]: SaveAsFastaTestArgs
|
|
36
|
+
} = {
|
|
37
|
+
[SaveAsFastaTests.test1]: {
|
|
38
|
+
srcCsv: `id,seq
|
|
39
|
+
1,MDYKETLLMP
|
|
40
|
+
2,KTDFPMRGGL
|
|
41
|
+
3,P`,
|
|
42
|
+
idCols: ['id'],
|
|
43
|
+
seqCol: 'seq',
|
|
44
|
+
lineWidth: 10,
|
|
45
|
+
tgtFasta: `>1
|
|
46
|
+
MDYKETLLMP
|
|
47
|
+
>2
|
|
48
|
+
KTDFPMRGGL
|
|
49
|
+
>3
|
|
50
|
+
P
|
|
51
|
+
`
|
|
52
|
+
},
|
|
53
|
+
[SaveAsFastaTests.test2]: {
|
|
54
|
+
srcCsv: `id,id2,seq
|
|
55
|
+
1,seqA,M[MeI]YKETLL[MeF]P
|
|
56
|
+
2,seqB,KTDFPMRGGL
|
|
57
|
+
3,seqC,[MeA]
|
|
58
|
+
`,
|
|
59
|
+
idCols: ['id2', 'id'],
|
|
60
|
+
seqCol: 'seq',
|
|
61
|
+
lineWidth: 5,
|
|
62
|
+
tgtFasta: `>seqA|1
|
|
63
|
+
M[MeI]YKE
|
|
64
|
+
TLL[MeF]P
|
|
65
|
+
>seqB|2
|
|
66
|
+
KTDFP
|
|
67
|
+
MRGGL
|
|
68
|
+
>seqC|3
|
|
69
|
+
[MeA]
|
|
70
|
+
`
|
|
71
|
+
}
|
|
72
|
+
};
|
|
73
|
+
|
|
74
|
+
test('wrapSequenceSingle', async () => {
|
|
75
|
+
_testWrapSequence(WrapDataTest.single, 10);
|
|
76
|
+
});
|
|
77
|
+
|
|
78
|
+
test('wrapSequenceMulti', async () => {
|
|
79
|
+
_testWrapSequence(WrapDataTest.multi, 10);
|
|
80
|
+
});
|
|
81
|
+
|
|
82
|
+
test('saveAsFastaTest1', async () => {
|
|
83
|
+
_testSaveAsFasta(saveAsFastaData[SaveAsFastaTests.test1]);
|
|
84
|
+
});
|
|
85
|
+
|
|
86
|
+
test('saveAsFastaTest2', async () => {
|
|
87
|
+
_testSaveAsFasta(saveAsFastaData[SaveAsFastaTests.test2]);
|
|
88
|
+
});
|
|
89
|
+
|
|
90
|
+
function _testWrapSequence(testKey: string, lineWidth: number = 10) {
|
|
91
|
+
const splitter = WebLogo.splitterAsFasta;
|
|
92
|
+
|
|
93
|
+
const srcSeq: string = wrapData[testKey].src;
|
|
94
|
+
const wrapRes: string[] = wrapSequence(srcSeq, splitter, lineWidth);
|
|
95
|
+
const wrapTgt: string[] = wrapData[testKey].tgt;
|
|
96
|
+
|
|
97
|
+
expectArray(wrapRes, wrapTgt);
|
|
98
|
+
}
|
|
99
|
+
|
|
100
|
+
async function _testSaveAsFasta(args: SaveAsFastaTestArgs) {
|
|
101
|
+
const df: DG.DataFrame = DG.DataFrame.fromCsv(args.srcCsv);
|
|
102
|
+
|
|
103
|
+
const seqCol: DG.Column = df.getCol(args.seqCol);
|
|
104
|
+
const idCols: DG.Column[] = args.idCols.map((colName) => df.getCol(colName));
|
|
105
|
+
|
|
106
|
+
const fastaRes: string = saveAsFastaDo(idCols, seqCol, args.lineWidth);
|
|
107
|
+
expect(fastaRes, args.tgtFasta);
|
|
108
|
+
}
|
|
109
|
+
});
|
|
110
|
+
|