@datagrok/bio 2.0.17 → 2.0.19
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/package-test.js +933 -622
- package/dist/package.js +771 -460
- package/package.json +9 -9
- package/src/analysis/sequence-activity-cliffs.ts +10 -10
- package/src/analysis/sequence-similarity-viewer.ts +4 -3
- package/src/calculations/monomerLevelMols.ts +5 -3
- package/src/package.ts +20 -14
- package/src/substructure-search/substructure-search.ts +51 -22
- package/src/tests/WebLogo-positions-test.ts +47 -47
- package/src/tests/WebLogo-test.ts +14 -14
- package/src/tests/fasta-export-tests.ts +2 -2
- package/src/tests/splitters-test.ts +7 -6
- package/src/utils/cell-renderer.ts +28 -31
- package/src/utils/convert.ts +10 -11
- package/src/utils/save-as-fasta.ts +5 -5
- package/src/utils/utils.ts +7 -5
- package/src/viewers/vd-regions-viewer.ts +12 -15
- package/src/widgets/bio-substructure-filter.ts +151 -0
package/package.json
CHANGED
|
@@ -5,7 +5,7 @@
|
|
|
5
5
|
"name": "Leonid Stolbov",
|
|
6
6
|
"email": "lstolbov@datagrok.ai"
|
|
7
7
|
},
|
|
8
|
-
"version": "2.0.
|
|
8
|
+
"version": "2.0.19",
|
|
9
9
|
"description": "Bio is a [package](https://datagrok.ai/help/develop/develop#packages) for the [Datagrok](https://datagrok.ai) platform",
|
|
10
10
|
"repository": {
|
|
11
11
|
"type": "git",
|
|
@@ -14,7 +14,7 @@
|
|
|
14
14
|
},
|
|
15
15
|
"dependencies": {
|
|
16
16
|
"@biowasm/aioli": "^3.1.0",
|
|
17
|
-
"@datagrok-libraries/bio": "^
|
|
17
|
+
"@datagrok-libraries/bio": "^5.0.0",
|
|
18
18
|
"@datagrok-libraries/chem-meta": "1.0.1",
|
|
19
19
|
"@datagrok-libraries/ml": "^6.2.0",
|
|
20
20
|
"@datagrok-libraries/utils": "^1.10.1",
|
|
@@ -23,7 +23,7 @@
|
|
|
23
23
|
"@luma.gl/constants": "^8.5.10",
|
|
24
24
|
"@luma.gl/core": "^8.5.10",
|
|
25
25
|
"@phylocanvas/phylocanvas.gl": "^1.44.0",
|
|
26
|
-
"cash-dom": "
|
|
26
|
+
"cash-dom": "^8.0.0",
|
|
27
27
|
"datagrok-api": "^1.7.0",
|
|
28
28
|
"dayjs": "^1.11.4",
|
|
29
29
|
"openchemlib": "6.0.1",
|
|
@@ -36,9 +36,9 @@
|
|
|
36
36
|
"@types/node": "^17.0.24",
|
|
37
37
|
"@types/node-fetch": "^2.6.2",
|
|
38
38
|
"@types/wu": "latest",
|
|
39
|
-
"@typescript-eslint/eslint-plugin": "
|
|
40
|
-
"@typescript-eslint/parser": "
|
|
41
|
-
"eslint": "
|
|
39
|
+
"@typescript-eslint/eslint-plugin": "^4.20.0",
|
|
40
|
+
"@typescript-eslint/parser": "^4.20.0",
|
|
41
|
+
"eslint": "^7.23.0",
|
|
42
42
|
"eslint-config-google": "latest",
|
|
43
43
|
"jest": "^27.5.1",
|
|
44
44
|
"jest-html-reporter": "^3.6.0",
|
|
@@ -47,9 +47,9 @@
|
|
|
47
47
|
"puppeteer": "^13.7.0",
|
|
48
48
|
"ts-jest": "^27.0.0",
|
|
49
49
|
"ts-loader": "^9.2.5",
|
|
50
|
-
"typescript": "^4.
|
|
51
|
-
"webpack": "
|
|
52
|
-
"webpack-cli": "^4.
|
|
50
|
+
"typescript": "^4.2.3",
|
|
51
|
+
"webpack": "^5.64.1",
|
|
52
|
+
"webpack-cli": "^4.6.0"
|
|
53
53
|
},
|
|
54
54
|
"scripts": {
|
|
55
55
|
"link-api": "npm link datagrok-api",
|
|
@@ -1,12 +1,12 @@
|
|
|
1
|
-
import
|
|
2
|
-
import * as DG from 'datagrok-api/dg';
|
|
1
|
+
import * as grok from 'datagrok-api/grok';
|
|
3
2
|
import * as ui from 'datagrok-api/ui';
|
|
3
|
+
import * as DG from 'datagrok-api/dg';
|
|
4
|
+
import * as bio from '@datagrok-libraries/bio';
|
|
5
|
+
|
|
6
|
+
import {ITooltipAndPanelParams} from '@datagrok-libraries/ml/src/viewers/activity-cliffs';
|
|
4
7
|
import {getSimilarityFromDistance} from '@datagrok-libraries/utils/src/similarity-metrics';
|
|
5
8
|
import {AvailableMetrics} from '@datagrok-libraries/ml/src/typed-metrics';
|
|
6
|
-
import
|
|
7
|
-
import {SplitterFunc, WebLogo} from '@datagrok-libraries/bio/src/viewers/web-logo';
|
|
8
|
-
import {UnitsHandler} from '@datagrok-libraries/bio/src/utils/units-handler';
|
|
9
|
-
import {SEM_TYPES, TAGS} from '../utils/constants';
|
|
9
|
+
import {TAGS} from '../utils/constants';
|
|
10
10
|
import {drawMoleculeDifferenceOnCanvas} from '../utils/cell-renderer';
|
|
11
11
|
|
|
12
12
|
export async function getDistances(col: DG.Column, seq: string): Promise<Array<number>> {
|
|
@@ -14,7 +14,7 @@ export async function getDistances(col: DG.Column, seq: string): Promise<Array<n
|
|
|
14
14
|
const distances = new Array(stringArray.length).fill(0);
|
|
15
15
|
for (let i = 0; i < stringArray.length; ++i) {
|
|
16
16
|
const distance = stringArray[i] ? AvailableMetrics['String']['Levenshtein'](stringArray[i], seq) : null;
|
|
17
|
-
distances[i] = distance ? distance/Math.max((stringArray[i] as string).length, seq.length) : null;
|
|
17
|
+
distances[i] = distance ? distance / Math.max((stringArray[i] as string).length, seq.length) : null;
|
|
18
18
|
}
|
|
19
19
|
return distances;
|
|
20
20
|
}
|
|
@@ -61,7 +61,7 @@ export function createTooltipElement(params: ITooltipAndPanelParams): HTMLDivEle
|
|
|
61
61
|
}
|
|
62
62
|
|
|
63
63
|
function moleculeInfo(df: DG.DataFrame, idx: number, seqColName: string): HTMLElement {
|
|
64
|
-
const dict: {[key: string]: string} = {};
|
|
64
|
+
const dict: { [key: string]: string } = {};
|
|
65
65
|
for (const col of df.columns) {
|
|
66
66
|
if (col.name !== seqColName)
|
|
67
67
|
dict[col.name] = df.get(col.name, idx);
|
|
@@ -82,10 +82,10 @@ export function createPropPanelElement(params: ITooltipAndPanelParams): HTMLDivE
|
|
|
82
82
|
activitiesArray[idx] = params.activityCol.get(molIdx);
|
|
83
83
|
});
|
|
84
84
|
|
|
85
|
-
const molDifferences: {[key: number]: HTMLCanvasElement} = {};
|
|
85
|
+
const molDifferences: { [key: number]: HTMLCanvasElement } = {};
|
|
86
86
|
const units = params.seqCol.getTag(DG.TAGS.UNITS);
|
|
87
87
|
const separator = params.seqCol.getTag(TAGS.SEPARATOR);
|
|
88
|
-
const splitter =
|
|
88
|
+
const splitter = bio.getSplitter(units, separator);
|
|
89
89
|
const subParts1 = splitter(sequencesArray[0]);
|
|
90
90
|
const subParts2 = splitter(sequencesArray[1]);
|
|
91
91
|
const canvas = createDifferenceCanvas(subParts1, subParts2, units, molDifferences);
|
|
@@ -1,14 +1,15 @@
|
|
|
1
1
|
import * as grok from 'datagrok-api/grok';
|
|
2
2
|
import * as ui from 'datagrok-api/ui';
|
|
3
3
|
import * as DG from 'datagrok-api/dg';
|
|
4
|
+
import * as bio from '@datagrok-libraries/bio';
|
|
5
|
+
|
|
4
6
|
import {SequenceSearchBaseViewer} from './sequence-search-base-viewer';
|
|
5
7
|
import {getMonomericMols} from '../calculations/monomerLevelMols';
|
|
6
8
|
import * as C from '../utils/constants';
|
|
7
9
|
import {createDifferenceCanvas, createDifferencesWithPositions} from './sequence-activity-cliffs';
|
|
8
10
|
import {updateDivInnerHTML} from '../utils/ui-utils';
|
|
9
|
-
import {WebLogo} from '@datagrok-libraries/bio/src/viewers/web-logo';
|
|
10
11
|
import {TableView} from 'datagrok-api/dg';
|
|
11
|
-
import {
|
|
12
|
+
import {Subject} from 'rxjs';
|
|
12
13
|
|
|
13
14
|
export class SequenceSimilarityViewer extends SequenceSearchBaseViewer {
|
|
14
15
|
hotSearch: boolean;
|
|
@@ -84,7 +85,7 @@ export class SequenceSimilarityViewer extends SequenceSearchBaseViewer {
|
|
|
84
85
|
const molDifferences: { [key: number]: HTMLCanvasElement } = {};
|
|
85
86
|
const units = resDf.col('sequence')!.getTag(DG.TAGS.UNITS);
|
|
86
87
|
const separator = resDf.col('sequence')!.getTag(C.TAGS.SEPARATOR);
|
|
87
|
-
const splitter =
|
|
88
|
+
const splitter = bio.getSplitter(units, separator);
|
|
88
89
|
const subParts1 = splitter(this.moleculeColumn!.get(this.targetMoleculeIdx));
|
|
89
90
|
const subParts2 = splitter(resDf.get('sequence', resDf.currentRowIdx));
|
|
90
91
|
const canvas = createDifferenceCanvas(subParts1, subParts2, units, molDifferences);
|
|
@@ -1,19 +1,21 @@
|
|
|
1
1
|
import * as grok from 'datagrok-api/grok';
|
|
2
|
+
import * as ui from 'datagrok-api/ui';
|
|
2
3
|
import * as DG from 'datagrok-api/dg';
|
|
4
|
+
import * as bio from '@datagrok-libraries/bio';
|
|
5
|
+
|
|
3
6
|
import * as C from '../utils/constants';
|
|
4
7
|
import {getHelmMonomers} from '../package';
|
|
5
|
-
import {WebLogo} from '@datagrok-libraries/bio/src/viewers/web-logo';
|
|
6
8
|
|
|
7
9
|
const V2000_ATOM_NAME_POS = 31;
|
|
8
10
|
|
|
9
11
|
export async function getMonomericMols(mcol: DG.Column, pattern: boolean = false): Promise<DG.Column> {
|
|
10
12
|
const separator: string = mcol.tags[C.TAGS.SEPARATOR];
|
|
11
13
|
const units: string = mcol.tags[DG.TAGS.UNITS];
|
|
12
|
-
const splitter =
|
|
14
|
+
const splitter = bio.getSplitter(units, separator);
|
|
13
15
|
let molV3000Array;
|
|
14
16
|
const monomersDict = new Map();
|
|
15
17
|
const monomers = units === 'helm' ?
|
|
16
|
-
getHelmMonomers(mcol) : Object.keys(
|
|
18
|
+
getHelmMonomers(mcol) : Object.keys(bio.getStats(mcol, 0, splitter).freq).filter((it) => it !== '');
|
|
17
19
|
|
|
18
20
|
for (let i = 0; i < monomers.length; i++)
|
|
19
21
|
monomersDict.set(monomers[i], `${i + 1}`);
|
package/src/package.ts
CHANGED
|
@@ -2,18 +2,15 @@
|
|
|
2
2
|
import * as grok from 'datagrok-api/grok';
|
|
3
3
|
import * as ui from 'datagrok-api/ui';
|
|
4
4
|
import * as DG from 'datagrok-api/dg';
|
|
5
|
+
import * as bio from '@datagrok-libraries/bio';
|
|
5
6
|
|
|
6
7
|
export const _package = new DG.Package();
|
|
7
8
|
|
|
8
9
|
import {MacromoleculeDifferenceCellRenderer, MonomerCellRenderer} from './utils/cell-renderer';
|
|
9
|
-
import {WebLogo, SeqColStats} from '@datagrok-libraries/bio/src/viewers/web-logo';
|
|
10
10
|
import {VdRegionsViewer} from './viewers/vd-regions-viewer';
|
|
11
11
|
import {runKalign, testMSAEnoughMemory} from './utils/multiple-sequence-alignment';
|
|
12
12
|
import {SequenceAlignment, Aligned} from './seq_align';
|
|
13
|
-
import {Nucleotides} from '@datagrok-libraries/bio/src/nucleotides';
|
|
14
|
-
import {Aminoacids} from '@datagrok-libraries/bio/src/aminoacids';
|
|
15
13
|
import {getEmbeddingColsNames, sequenceSpace} from './analysis/sequence-space';
|
|
16
|
-
import {AvailableMetrics} from '@datagrok-libraries/ml/src/typed-metrics';
|
|
17
14
|
import {getActivityCliffs} from '@datagrok-libraries/ml/src/viewers/activity-cliffs';
|
|
18
15
|
import {createPropPanelElement, createTooltipElement, getSimilaritiesMarix} from './analysis/sequence-activity-cliffs';
|
|
19
16
|
import {createJsonMonomerLibFromSdf, encodeMonomers, getMolfilesFromSeq, HELM_CORE_LIB_FILENAME} from './utils/utils';
|
|
@@ -21,8 +18,6 @@ import {getMacroMol} from './utils/atomic-works';
|
|
|
21
18
|
import {MacromoleculeSequenceCellRenderer} from './utils/cell-renderer';
|
|
22
19
|
import {convert} from './utils/convert';
|
|
23
20
|
import {getMacroMolColumnPropertyPanel, representationsWidget} from './widgets/representations';
|
|
24
|
-
import {UnitsHandler} from '@datagrok-libraries/bio/src/utils/units-handler';
|
|
25
|
-
import {FastaFileHandler} from '@datagrok-libraries/bio/src/utils/fasta-handler';
|
|
26
21
|
import {removeEmptyStringRows} from '@datagrok-libraries/utils/src/dataframe-utils';
|
|
27
22
|
import {
|
|
28
23
|
generateManySequences,
|
|
@@ -36,6 +31,7 @@ import {SequenceSimilarityViewer} from './analysis/sequence-similarity-viewer';
|
|
|
36
31
|
import {SequenceDiversityViewer} from './analysis/sequence-diversity-viewer';
|
|
37
32
|
import {substructureSearchDialog} from './substructure-search/substructure-search';
|
|
38
33
|
import {saveAsFastaUI} from './utils/save-as-fasta';
|
|
34
|
+
import {BioSubstructureFilter} from './widgets/bio-substructure-filter';
|
|
39
35
|
|
|
40
36
|
//tags: init
|
|
41
37
|
export async function initBio() {
|
|
@@ -92,7 +88,7 @@ export function checkInputColumn(
|
|
|
92
88
|
let res: boolean = true;
|
|
93
89
|
let msg: string = '';
|
|
94
90
|
|
|
95
|
-
const uh = new UnitsHandler(col);
|
|
91
|
+
const uh = new bio.UnitsHandler(col);
|
|
96
92
|
if (col.semType !== DG.SEMTYPE.MACROMOLECULE) {
|
|
97
93
|
grok.shell.warning(name + ' analysis is allowed for Macromolecules semantic type');
|
|
98
94
|
res = false;
|
|
@@ -142,7 +138,7 @@ export function sequenceAlignment(alignType: string, alignTable: string, gap: nu
|
|
|
142
138
|
//tags: viewer, panel
|
|
143
139
|
//output: viewer result
|
|
144
140
|
export function webLogoViewer() {
|
|
145
|
-
return new WebLogo();
|
|
141
|
+
return new bio.WebLogo();
|
|
146
142
|
}
|
|
147
143
|
|
|
148
144
|
//name: VdRegions
|
|
@@ -308,7 +304,7 @@ export async function compositionAnalysis(): Promise<void> {
|
|
|
308
304
|
if (col.semType != DG.SEMTYPE.MACROMOLECULE)
|
|
309
305
|
return false;
|
|
310
306
|
|
|
311
|
-
const colUH = new UnitsHandler(col);
|
|
307
|
+
const colUH = new bio.UnitsHandler(col);
|
|
312
308
|
// TODO: prevent for cyclic, branched or multiple chains in Helm
|
|
313
309
|
return true;
|
|
314
310
|
});
|
|
@@ -327,7 +323,7 @@ export async function compositionAnalysis(): Promise<void> {
|
|
|
327
323
|
return;
|
|
328
324
|
} else if (colList.length > 1) {
|
|
329
325
|
const colListNames: string [] = colList.map((col) => col.name);
|
|
330
|
-
const selectedCol = colList.find((c) => { return (new UnitsHandler(c)).isMsa(); });
|
|
326
|
+
const selectedCol = colList.find((c) => { return (new bio.UnitsHandler(c)).isMsa(); });
|
|
331
327
|
const colInput: DG.InputBase = ui.choiceInput(
|
|
332
328
|
'Column', selectedCol ? selectedCol.name : colListNames[0], colListNames);
|
|
333
329
|
ui.dialog({
|
|
@@ -379,7 +375,7 @@ export async function peptideMolecule(macroMolecule: DG.Cell): Promise<DG.Widget
|
|
|
379
375
|
//input: string fileContent
|
|
380
376
|
//output: list tables
|
|
381
377
|
export function importFasta(fileContent: string): DG.DataFrame [] {
|
|
382
|
-
const ffh = new FastaFileHandler(fileContent);
|
|
378
|
+
const ffh = new bio.FastaFileHandler(fileContent);
|
|
383
379
|
return ffh.importFasta();
|
|
384
380
|
}
|
|
385
381
|
|
|
@@ -454,7 +450,7 @@ export async function testDetectMacromolecule(path: string): Promise<DG.DataFram
|
|
|
454
450
|
//tags: panel, bio
|
|
455
451
|
//input: column col {semType: Macromolecule}
|
|
456
452
|
export function splitToMonomers(col: DG.Column<string>): void {
|
|
457
|
-
if (!col.getTag(UnitsHandler.TAGS.aligned).includes(C.MSA))
|
|
453
|
+
if (!col.getTag(bio.UnitsHandler.TAGS.aligned).includes(C.MSA))
|
|
458
454
|
return grok.shell.error('Splitting is applicable only for aligned sequences');
|
|
459
455
|
|
|
460
456
|
const tempDf = splitAlignedSequences(col);
|
|
@@ -471,7 +467,7 @@ export function splitToMonomers(col: DG.Column<string>): void {
|
|
|
471
467
|
//name: Bio: getHelmMonomers
|
|
472
468
|
//input: column col {semType: Macromolecule}
|
|
473
469
|
export function getHelmMonomers(seqCol: DG.Column<string>): string[] {
|
|
474
|
-
const stats =
|
|
470
|
+
const stats = bio.getStats(seqCol, 1, bio.splitterAsHelm);
|
|
475
471
|
return Object.keys(stats.freq);
|
|
476
472
|
}
|
|
477
473
|
|
|
@@ -522,4 +518,14 @@ export function bioSubstructureSearch(col: DG.Column): void {
|
|
|
522
518
|
//tags: fileExporter
|
|
523
519
|
export function saveAsFasta() {
|
|
524
520
|
saveAsFastaUI();
|
|
525
|
-
}
|
|
521
|
+
}
|
|
522
|
+
//name: BioSubstructureFilter
|
|
523
|
+
//description: Substructure filter for linear macromolecules
|
|
524
|
+
//tags: filter
|
|
525
|
+
//output: filter result
|
|
526
|
+
//meta.semType: Macromolecule
|
|
527
|
+
export function bioSubstructureFilter(): BioSubstructureFilter {
|
|
528
|
+
return new BioSubstructureFilter();
|
|
529
|
+
}
|
|
530
|
+
|
|
531
|
+
|
|
@@ -3,6 +3,9 @@ import * as ui from 'datagrok-api/ui';
|
|
|
3
3
|
import * as DG from 'datagrok-api/dg';
|
|
4
4
|
import {NOTATION} from '@datagrok-libraries/bio/src/utils/units-handler';
|
|
5
5
|
import * as C from '../utils/constants';
|
|
6
|
+
import {getMonomericMols} from '../calculations/monomerLevelMols';
|
|
7
|
+
import {BitSet} from 'datagrok-api/dg';
|
|
8
|
+
import {updateDivInnerHTML} from '../utils/ui-utils';
|
|
6
9
|
|
|
7
10
|
/**
|
|
8
11
|
* Searches substructure in each row of Macromolecule column
|
|
@@ -12,48 +15,58 @@ import * as C from '../utils/constants';
|
|
|
12
15
|
export function substructureSearchDialog(col: DG.Column): void {
|
|
13
16
|
const units = col.getTag(DG.TAGS.UNITS);
|
|
14
17
|
const separator = col.getTag(C.TAGS.SEPARATOR);
|
|
15
|
-
const notations = [NOTATION.FASTA, NOTATION.SEPARATOR];
|
|
18
|
+
// const notations = [NOTATION.FASTA, NOTATION.SEPARATOR, NOTATION.HELM];
|
|
16
19
|
|
|
17
20
|
const substructureInput = ui.textInput('Substructure', '');
|
|
18
|
-
|
|
21
|
+
|
|
22
|
+
const editHelmLink = ui.link('Edit helm', async () => {
|
|
23
|
+
updateDivInnerHTML(inputsDiv, grid.root);
|
|
24
|
+
await ui.tools.waitForElementInDom(grid.root);
|
|
25
|
+
setTimeout(() => {
|
|
26
|
+
grid.cell('substr_helm', 0).element.children[0].dispatchEvent(new KeyboardEvent('keydown', {key: 'Enter'}));
|
|
27
|
+
}, 100);
|
|
28
|
+
});
|
|
29
|
+
|
|
30
|
+
const df = DG.DataFrame.create(1);
|
|
31
|
+
df.columns.addNewString('substr_helm').init((i) => '');
|
|
32
|
+
df.col('substr_helm')!.semType = col.semType;
|
|
33
|
+
df.col('substr_helm')!.setTag(DG.TAGS.UNITS, NOTATION.HELM);
|
|
34
|
+
const grid = df.plot.grid();
|
|
19
35
|
const separatorInput = ui.textInput('Separator', separator);
|
|
20
36
|
|
|
21
|
-
|
|
22
|
-
const toggleSeparator = () => {
|
|
23
|
-
if (notationInput.value !== NOTATION.SEPARATOR)
|
|
24
|
-
separatorInput.root.hidden = true;
|
|
25
|
-
else
|
|
26
|
-
separatorInput.root.hidden = false;
|
|
27
|
-
};
|
|
37
|
+
const inputsDiv = ui.div();
|
|
28
38
|
|
|
29
|
-
|
|
39
|
+
const inputs = units === NOTATION.HELM ? ui.divV([editHelmLink]) :
|
|
40
|
+
units === NOTATION.SEPARATOR ? ui.inputs([substructureInput, separatorInput]) :
|
|
41
|
+
ui.inputs([substructureInput]);
|
|
30
42
|
|
|
31
|
-
|
|
32
|
-
toggleSeparator();
|
|
33
|
-
});
|
|
43
|
+
updateDivInnerHTML(inputsDiv, inputs);
|
|
34
44
|
|
|
35
45
|
ui.dialog('Substructure search')
|
|
36
|
-
.add(ui.
|
|
37
|
-
|
|
38
|
-
|
|
39
|
-
separatorInput
|
|
46
|
+
.add(ui.divV([
|
|
47
|
+
ui.divText(`Notation: ${units}`),
|
|
48
|
+
inputsDiv
|
|
40
49
|
]))
|
|
41
|
-
.onOK(() => {
|
|
42
|
-
let substructure = substructureInput.value;
|
|
43
|
-
if (
|
|
50
|
+
.onOK(async () => {
|
|
51
|
+
let substructure = units === NOTATION.HELM ? df.get('substr_helm', 0) : substructureInput.value;
|
|
52
|
+
if (units === NOTATION.SEPARATOR && separatorInput.value !== separator && separatorInput.value !== '')
|
|
44
53
|
substructure = substructure.replaceAll(separatorInput.value, separator);
|
|
45
54
|
const matchesColName = `Matches: ${substructure}`;
|
|
46
55
|
const colExists = col.dataFrame.columns.names()
|
|
47
56
|
.filter((it) => it.toLocaleLowerCase() === matchesColName.toLocaleLowerCase()).length > 0;
|
|
48
57
|
if (!colExists) {
|
|
49
|
-
|
|
58
|
+
let matches: BitSet;
|
|
59
|
+
if (units === NOTATION.HELM)
|
|
60
|
+
matches = await helmSubstructureSearch(substructure, col);
|
|
61
|
+
else
|
|
62
|
+
matches = linearSubstructureSearch(substructure, col);
|
|
50
63
|
col.dataFrame.columns.add(DG.Column.fromBitSet(matchesColName, matches));
|
|
51
64
|
} else { grok.shell.warning(`Search ${substructure} is already performed`); }
|
|
52
65
|
})
|
|
53
66
|
.show();
|
|
54
67
|
}
|
|
55
68
|
|
|
56
|
-
export function
|
|
69
|
+
export function linearSubstructureSearch(substructure: string, col: DG.Column): DG.BitSet {
|
|
57
70
|
const lowerCaseSubstr = substructure.toLowerCase();
|
|
58
71
|
const resultArray = DG.BitSet.create(col.length);
|
|
59
72
|
for (let i = 0; i < col.length; i++) {
|
|
@@ -63,3 +76,19 @@ export function substructureSearch(substructure: string, col: DG.Column): DG.Bit
|
|
|
63
76
|
}
|
|
64
77
|
return resultArray;
|
|
65
78
|
}
|
|
79
|
+
|
|
80
|
+
async function helmSubstructureSearch(substructure: string, col: DG.Column): Promise<BitSet> {
|
|
81
|
+
const helmColWithSubstructure = DG.Column.string('helm', col.length + 1)
|
|
82
|
+
.init((i) => i === col.length ? substructure : col.get(i));
|
|
83
|
+
helmColWithSubstructure.setTag(DG.TAGS.UNITS, NOTATION.HELM);
|
|
84
|
+
const monomericMolsCol = await getMonomericMols(helmColWithSubstructure, true);
|
|
85
|
+
const molSubstructure = monomericMolsCol.get(col.length);
|
|
86
|
+
const monomericMolsDf = DG.DataFrame.fromColumns([monomericMolsCol]);
|
|
87
|
+
monomericMolsDf.rows.removeAt(col.length);
|
|
88
|
+
const matchesCol = await grok.functions.call('Chem:searchSubstructure', {
|
|
89
|
+
molStringsColumn: monomericMolsDf.columns.byIndex(0),
|
|
90
|
+
molString: molSubstructure,
|
|
91
|
+
molBlockFailover: '',
|
|
92
|
+
});
|
|
93
|
+
return matchesCol.get(0);
|
|
94
|
+
}
|
|
@@ -1,9 +1,9 @@
|
|
|
1
|
-
import {after, before, category, test, expect, expectObject, delay} from '@datagrok-libraries/utils/src/test';
|
|
2
|
-
|
|
3
1
|
import * as grok from 'datagrok-api/grok';
|
|
4
2
|
import * as ui from 'datagrok-api/ui';
|
|
5
3
|
import * as DG from 'datagrok-api/dg';
|
|
6
|
-
import
|
|
4
|
+
import * as bio from '@datagrok-libraries/bio';
|
|
5
|
+
|
|
6
|
+
import {after, before, category, test, expect, expectObject, delay} from '@datagrok-libraries/utils/src/test';
|
|
7
7
|
import {Column} from 'datagrok-api/dg';
|
|
8
8
|
import {ALPHABET, NOTATION, UnitsHandler} from '@datagrok-libraries/bio/src/utils/units-handler';
|
|
9
9
|
|
|
@@ -41,27 +41,27 @@ ATC-G-TTGC--
|
|
|
41
41
|
seqCol.setTag(DG.TAGS.UNITS, NOTATION.FASTA);
|
|
42
42
|
seqCol.setTag(UnitsHandler.TAGS.alphabet, ALPHABET.DNA);
|
|
43
43
|
|
|
44
|
-
const wlViewer: WebLogo = (await df.plot.fromType('WebLogo')) as WebLogo;
|
|
44
|
+
const wlViewer: bio.WebLogo = (await df.plot.fromType('WebLogo')) as bio.WebLogo;
|
|
45
45
|
tv.dockManager.dock(wlViewer.root, DG.DOCK_TYPE.DOWN);
|
|
46
46
|
|
|
47
47
|
tvList.push(tv);
|
|
48
48
|
dfList.push(df);
|
|
49
49
|
|
|
50
|
-
const positions: PositionInfo[] = wlViewer['positions'];
|
|
51
|
-
|
|
52
|
-
const resAllDf1: PositionInfo[] = [
|
|
53
|
-
new PositionInfo('1', {'A': new PositionMonomerInfo(2), '-': new PositionMonomerInfo(3)}),
|
|
54
|
-
new PositionInfo('2', {'T': new PositionMonomerInfo(5)}),
|
|
55
|
-
new PositionInfo('3', {'C': new PositionMonomerInfo(5)}),
|
|
56
|
-
new PositionInfo('4', {'-': new PositionMonomerInfo(5)}),
|
|
57
|
-
new PositionInfo('5', {'G': new PositionMonomerInfo(5)}),
|
|
58
|
-
new PositionInfo('6', {'-': new PositionMonomerInfo(3), 'C': new PositionMonomerInfo(2)}),
|
|
59
|
-
new PositionInfo('7', {'T': new PositionMonomerInfo(5)}),
|
|
60
|
-
new PositionInfo('8', {'T': new PositionMonomerInfo(5)}),
|
|
61
|
-
new PositionInfo('9', {'G': new PositionMonomerInfo(5)}),
|
|
62
|
-
new PositionInfo('10', {'C': new PositionMonomerInfo(5)}),
|
|
63
|
-
new PositionInfo('11', {'-': new PositionMonomerInfo(5)}),
|
|
64
|
-
new PositionInfo('12', {'-': new PositionMonomerInfo(5)})
|
|
50
|
+
const positions: bio.PositionInfo[] = wlViewer['positions'];
|
|
51
|
+
|
|
52
|
+
const resAllDf1: bio.PositionInfo[] = [
|
|
53
|
+
new bio.PositionInfo('1', {'A': new bio.PositionMonomerInfo(2), '-': new bio.PositionMonomerInfo(3)}),
|
|
54
|
+
new bio.PositionInfo('2', {'T': new bio.PositionMonomerInfo(5)}),
|
|
55
|
+
new bio.PositionInfo('3', {'C': new bio.PositionMonomerInfo(5)}),
|
|
56
|
+
new bio.PositionInfo('4', {'-': new bio.PositionMonomerInfo(5)}),
|
|
57
|
+
new bio.PositionInfo('5', {'G': new bio.PositionMonomerInfo(5)}),
|
|
58
|
+
new bio.PositionInfo('6', {'-': new bio.PositionMonomerInfo(3), 'C': new bio.PositionMonomerInfo(2)}),
|
|
59
|
+
new bio.PositionInfo('7', {'T': new bio.PositionMonomerInfo(5)}),
|
|
60
|
+
new bio.PositionInfo('8', {'T': new bio.PositionMonomerInfo(5)}),
|
|
61
|
+
new bio.PositionInfo('9', {'G': new bio.PositionMonomerInfo(5)}),
|
|
62
|
+
new bio.PositionInfo('10', {'C': new bio.PositionMonomerInfo(5)}),
|
|
63
|
+
new bio.PositionInfo('11', {'-': new bio.PositionMonomerInfo(5)}),
|
|
64
|
+
new bio.PositionInfo('12', {'-': new bio.PositionMonomerInfo(5)}),
|
|
65
65
|
];
|
|
66
66
|
|
|
67
67
|
expect(positions.length, resAllDf1.length);
|
|
@@ -94,25 +94,25 @@ ATC-G-TTGC--
|
|
|
94
94
|
return i > 2;
|
|
95
95
|
});
|
|
96
96
|
df.filter.fireChanged();
|
|
97
|
-
const wlViewer: WebLogo = (await df.plot.fromType('WebLogo',
|
|
98
|
-
{'shrinkEmptyTail': true})) as WebLogo;
|
|
97
|
+
const wlViewer: bio.WebLogo = (await df.plot.fromType('WebLogo',
|
|
98
|
+
{'shrinkEmptyTail': true})) as bio.WebLogo;
|
|
99
99
|
tv.dockManager.dock(wlViewer.root, DG.DOCK_TYPE.DOWN);
|
|
100
100
|
|
|
101
101
|
tvList.push(tv);
|
|
102
102
|
dfList.push(df);
|
|
103
103
|
|
|
104
|
-
const positions: PositionInfo[] = wlViewer['positions'];
|
|
105
|
-
|
|
106
|
-
const resAllDf1: PositionInfo[] = [
|
|
107
|
-
new PositionInfo('1', {'-': new PositionMonomerInfo(3)}),
|
|
108
|
-
new PositionInfo('2', {'T': new PositionMonomerInfo(3)}),
|
|
109
|
-
new PositionInfo('3', {'-': new PositionMonomerInfo(3)}),
|
|
110
|
-
new PositionInfo('4', {'-': new PositionMonomerInfo(3)}),
|
|
111
|
-
new PositionInfo('5', {'C': new PositionMonomerInfo(3)}),
|
|
112
|
-
new PositionInfo('6', {'-': new PositionMonomerInfo(2), 'C': new PositionMonomerInfo(1)}),
|
|
113
|
-
new PositionInfo('7', {'G': new PositionMonomerInfo(3)}),
|
|
114
|
-
new PositionInfo('8', {'T': new PositionMonomerInfo(3)}),
|
|
115
|
-
new PositionInfo('9', {'-': new PositionMonomerInfo(3)}),
|
|
104
|
+
const positions: bio.PositionInfo[] = wlViewer['positions'];
|
|
105
|
+
|
|
106
|
+
const resAllDf1: bio.PositionInfo[] = [
|
|
107
|
+
new bio.PositionInfo('1', {'-': new bio.PositionMonomerInfo(3)}),
|
|
108
|
+
new bio.PositionInfo('2', {'T': new bio.PositionMonomerInfo(3)}),
|
|
109
|
+
new bio.PositionInfo('3', {'-': new bio.PositionMonomerInfo(3)}),
|
|
110
|
+
new bio.PositionInfo('4', {'-': new bio.PositionMonomerInfo(3)}),
|
|
111
|
+
new bio.PositionInfo('5', {'C': new bio.PositionMonomerInfo(3)}),
|
|
112
|
+
new bio.PositionInfo('6', {'-': new bio.PositionMonomerInfo(2), 'C': new bio.PositionMonomerInfo(1)}),
|
|
113
|
+
new bio.PositionInfo('7', {'G': new bio.PositionMonomerInfo(3)}),
|
|
114
|
+
new bio.PositionInfo('8', {'T': new bio.PositionMonomerInfo(3)}),
|
|
115
|
+
new bio.PositionInfo('9', {'-': new bio.PositionMonomerInfo(3)}),
|
|
116
116
|
];
|
|
117
117
|
|
|
118
118
|
expect(positions.length, resAllDf1.length);
|
|
@@ -134,25 +134,25 @@ ATC-G-TTGC--
|
|
|
134
134
|
seqCol.setTag(DG.TAGS.UNITS, NOTATION.FASTA);
|
|
135
135
|
seqCol.setTag(UnitsHandler.TAGS.alphabet, ALPHABET.DNA);
|
|
136
136
|
|
|
137
|
-
const wlViewer: WebLogo = (await df.plot.fromType('WebLogo',
|
|
138
|
-
{'skipEmptyPositions': true})) as WebLogo;
|
|
137
|
+
const wlViewer: bio.WebLogo = (await df.plot.fromType('WebLogo',
|
|
138
|
+
{'skipEmptyPositions': true})) as bio.WebLogo;
|
|
139
139
|
tv.dockManager.dock(wlViewer.root, DG.DOCK_TYPE.DOWN);
|
|
140
140
|
|
|
141
141
|
tvList.push(tv);
|
|
142
142
|
dfList.push(df);
|
|
143
143
|
|
|
144
|
-
const positions: PositionInfo[] = wlViewer['positions'];
|
|
145
|
-
|
|
146
|
-
const resAllDf1: PositionInfo[] = [
|
|
147
|
-
new PositionInfo('1', {'A': new PositionMonomerInfo(2), '-': new PositionMonomerInfo(3)}),
|
|
148
|
-
new PositionInfo('2', {'T': new PositionMonomerInfo(5)}),
|
|
149
|
-
new PositionInfo('3', {'C': new PositionMonomerInfo(5)}),
|
|
150
|
-
new PositionInfo('5', {'G': new PositionMonomerInfo(5)}),
|
|
151
|
-
new PositionInfo('6', {'-': new PositionMonomerInfo(3), 'C': new PositionMonomerInfo(2)}),
|
|
152
|
-
new PositionInfo('7', {'T': new PositionMonomerInfo(5)}),
|
|
153
|
-
new PositionInfo('8', {'T': new PositionMonomerInfo(5)}),
|
|
154
|
-
new PositionInfo('9', {'G': new PositionMonomerInfo(5)}),
|
|
155
|
-
new PositionInfo('10', {'C': new PositionMonomerInfo(5)})
|
|
144
|
+
const positions: bio.PositionInfo[] = wlViewer['positions'];
|
|
145
|
+
|
|
146
|
+
const resAllDf1: bio.PositionInfo[] = [
|
|
147
|
+
new bio.PositionInfo('1', {'A': new bio.PositionMonomerInfo(2), '-': new bio.PositionMonomerInfo(3)}),
|
|
148
|
+
new bio.PositionInfo('2', {'T': new bio.PositionMonomerInfo(5)}),
|
|
149
|
+
new bio.PositionInfo('3', {'C': new bio.PositionMonomerInfo(5)}),
|
|
150
|
+
new bio.PositionInfo('5', {'G': new bio.PositionMonomerInfo(5)}),
|
|
151
|
+
new bio.PositionInfo('6', {'-': new bio.PositionMonomerInfo(3), 'C': new bio.PositionMonomerInfo(2)}),
|
|
152
|
+
new bio.PositionInfo('7', {'T': new bio.PositionMonomerInfo(5)}),
|
|
153
|
+
new bio.PositionInfo('8', {'T': new bio.PositionMonomerInfo(5)}),
|
|
154
|
+
new bio.PositionInfo('9', {'G': new bio.PositionMonomerInfo(5)}),
|
|
155
|
+
new bio.PositionInfo('10', {'C': new bio.PositionMonomerInfo(5)})
|
|
156
156
|
];
|
|
157
157
|
|
|
158
158
|
expect(positions.length, resAllDf1.length);
|
|
@@ -1,9 +1,9 @@
|
|
|
1
|
-
import {after, before, category, test, expect, expectObject} from '@datagrok-libraries/utils/src/test';
|
|
2
|
-
|
|
3
1
|
import * as grok from 'datagrok-api/grok';
|
|
4
2
|
import * as ui from 'datagrok-api/ui';
|
|
5
3
|
import * as DG from 'datagrok-api/dg';
|
|
4
|
+
import * as bio from '@datagrok-libraries/bio';
|
|
6
5
|
|
|
6
|
+
import {after, before, category, test, expect, expectObject} from '@datagrok-libraries/utils/src/test';
|
|
7
7
|
import {Nucleotides, NucleotidesPalettes} from '@datagrok-libraries/bio/src/nucleotides';
|
|
8
8
|
import {Aminoacids, AminoacidsPalettes} from '@datagrok-libraries/bio/src/aminoacids';
|
|
9
9
|
import {WebLogo} from '@datagrok-libraries/bio/src/viewers/web-logo';
|
|
@@ -63,19 +63,19 @@ XZJ{}2
|
|
|
63
63
|
|
|
64
64
|
category('WebLogo.monomerToShort', () => {
|
|
65
65
|
test('longMonomerSingle', async () => {
|
|
66
|
-
await expect(
|
|
66
|
+
await expect(bio.monomerToShort('S', 5), 'S');
|
|
67
67
|
});
|
|
68
68
|
test('longMonomerShort', async () => {
|
|
69
|
-
await expect(
|
|
69
|
+
await expect(bio.monomerToShort('Short', 5), 'Short');
|
|
70
70
|
});
|
|
71
71
|
test('longMonomerLong56', async () => {
|
|
72
|
-
await expect(
|
|
72
|
+
await expect(bio.monomerToShort('Long56', 5), 'Long5…');
|
|
73
73
|
});
|
|
74
74
|
test('longMonomerComplexFirstPartShort', async () => {
|
|
75
|
-
await expect(
|
|
75
|
+
await expect(bio.monomerToShort('Long-long', 5), 'Long…');
|
|
76
76
|
});
|
|
77
77
|
test('longMonomerComplexFirstPartLong56', async () => {
|
|
78
|
-
await expect(
|
|
78
|
+
await expect(bio.monomerToShort('Long56-long', 5), 'Long5…');
|
|
79
79
|
});
|
|
80
80
|
});
|
|
81
81
|
|
|
@@ -83,7 +83,7 @@ category('WebLogo.monomerToShort', () => {
|
|
|
83
83
|
export async function _testGetStats(csvDfN1: string) {
|
|
84
84
|
const dfN1: DG.DataFrame = DG.DataFrame.fromCsv(csvDfN1);
|
|
85
85
|
const seqCol: DG.Column = dfN1.col('seq')!;
|
|
86
|
-
const stats =
|
|
86
|
+
const stats = bio.getStats(seqCol, 5, bio.splitterAsFasta);
|
|
87
87
|
|
|
88
88
|
expectObject(stats.freq, {
|
|
89
89
|
'A': 4,
|
|
@@ -103,7 +103,7 @@ export async function _testGetAlphabetSimilarity() {
|
|
|
103
103
|
'-': 1000
|
|
104
104
|
};
|
|
105
105
|
const alphabet: Set<string> = new Set(Object.keys(Nucleotides.Names));
|
|
106
|
-
const res =
|
|
106
|
+
const res = bio.getAlphabetSimilarity(freq, alphabet);
|
|
107
107
|
|
|
108
108
|
expect(res > 0.6, true);
|
|
109
109
|
}
|
|
@@ -111,7 +111,7 @@ export async function _testGetAlphabetSimilarity() {
|
|
|
111
111
|
export async function _testPickupPaletteN1(csvDfN1: string) {
|
|
112
112
|
const df: DG.DataFrame = DG.DataFrame.fromCsv(csvDfN1);
|
|
113
113
|
const col: DG.Column = df.col('seq')!;
|
|
114
|
-
const cp =
|
|
114
|
+
const cp = bio.pickUpPalette(col);
|
|
115
115
|
|
|
116
116
|
expect(cp instanceof NucleotidesPalettes, true);
|
|
117
117
|
}
|
|
@@ -119,7 +119,7 @@ export async function _testPickupPaletteN1(csvDfN1: string) {
|
|
|
119
119
|
export async function _testPickupPaletteN1e(csvDfN1e: string) {
|
|
120
120
|
const df: DG.DataFrame = DG.DataFrame.fromCsv(csvDfN1e);
|
|
121
121
|
const col: DG.Column = df.col('seq')!;
|
|
122
|
-
const cp =
|
|
122
|
+
const cp = bio.pickUpPalette(col);
|
|
123
123
|
|
|
124
124
|
expect(cp instanceof NucleotidesPalettes, true);
|
|
125
125
|
}
|
|
@@ -127,7 +127,7 @@ export async function _testPickupPaletteN1e(csvDfN1e: string) {
|
|
|
127
127
|
export async function _testPickupPaletteAA1(csvDfAA1: string) {
|
|
128
128
|
const df: DG.DataFrame = DG.DataFrame.fromCsv(csvDfAA1);
|
|
129
129
|
const col: DG.Column = df.col('seq')!;
|
|
130
|
-
const cp =
|
|
130
|
+
const cp = bio.pickUpPalette(col);
|
|
131
131
|
|
|
132
132
|
expect(cp instanceof AminoacidsPalettes, true);
|
|
133
133
|
}
|
|
@@ -135,14 +135,14 @@ export async function _testPickupPaletteAA1(csvDfAA1: string) {
|
|
|
135
135
|
export async function _testPickupPaletteX(csvDfX: string) {
|
|
136
136
|
const df: DG.DataFrame = DG.DataFrame.fromCsv(csvDfX);
|
|
137
137
|
const col: DG.Column = df.col('seq')!;
|
|
138
|
-
const cp =
|
|
138
|
+
const cp = bio.pickUpPalette(col);
|
|
139
139
|
|
|
140
140
|
expect(cp instanceof UnknownSeqPalette, true);
|
|
141
141
|
}
|
|
142
142
|
|
|
143
143
|
export async function _testPickupPaletteAA2(dfAA2: DG.DataFrame) {
|
|
144
144
|
const seqCol: DG.Column = dfAA2.col('seq')!;
|
|
145
|
-
const cp =
|
|
145
|
+
const cp = bio.pickUpPalette(seqCol);
|
|
146
146
|
|
|
147
147
|
expect(cp instanceof AminoacidsPalettes, true);
|
|
148
148
|
}
|
|
@@ -1,10 +1,10 @@
|
|
|
1
1
|
import * as DG from 'datagrok-api/dg';
|
|
2
2
|
import * as ui from 'datagrok-api/ui';
|
|
3
3
|
import * as grok from 'datagrok-api/grok';
|
|
4
|
+
import * as bio from '@datagrok-libraries/bio';
|
|
4
5
|
|
|
5
6
|
import {category, expect, expectArray, test} from '@datagrok-libraries/utils/src/test';
|
|
6
7
|
import {saveAsFastaDo, wrapSequence} from '../utils/save-as-fasta';
|
|
7
|
-
import {WebLogo} from '@datagrok-libraries/bio/src/viewers/web-logo';
|
|
8
8
|
|
|
9
9
|
type SaveAsFastaTestArgs = { srcCsv: string, idCols: string [], seqCol: string, lineWidth: number, tgtFasta: string };
|
|
10
10
|
|
|
@@ -88,7 +88,7 @@ MRGGL
|
|
|
88
88
|
});
|
|
89
89
|
|
|
90
90
|
function _testWrapSequence(testKey: string, lineWidth: number = 10) {
|
|
91
|
-
const splitter =
|
|
91
|
+
const splitter = bio.splitterAsFasta;
|
|
92
92
|
|
|
93
93
|
const srcSeq: string = wrapData[testKey].src;
|
|
94
94
|
const wrapRes: string[] = wrapSequence(srcSeq, splitter, lineWidth);
|