@datagrok/bio 2.0.16 → 2.0.18
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/package-test.js +1058 -718
- package/dist/package.js +741 -535
- package/package.json +14 -14
- package/src/analysis/sequence-activity-cliffs.ts +10 -10
- package/src/analysis/sequence-similarity-viewer.ts +4 -3
- package/src/calculations/monomerLevelMols.ts +5 -3
- package/src/package-test.ts +1 -0
- package/src/package.ts +16 -14
- package/src/tests/WebLogo-positions-test.ts +47 -46
- package/src/tests/WebLogo-test.ts +14 -14
- package/src/tests/convert-test.ts +5 -3
- package/src/tests/fasta-export-tests.ts +110 -0
- package/src/tests/splitters-test.ts +19 -5
- package/src/utils/cell-renderer.ts +24 -29
- package/src/utils/convert.ts +11 -12
- package/src/utils/multiple-sequence-alignment.ts +4 -5
- package/src/utils/save-as-fasta.ts +109 -0
- package/src/utils/utils.ts +7 -5
- package/src/viewers/vd-regions-viewer.ts +16 -17
package/package.json
CHANGED
|
@@ -5,7 +5,7 @@
|
|
|
5
5
|
"name": "Leonid Stolbov",
|
|
6
6
|
"email": "lstolbov@datagrok.ai"
|
|
7
7
|
},
|
|
8
|
-
"version": "2.0.
|
|
8
|
+
"version": "2.0.18",
|
|
9
9
|
"description": "Bio is a [package](https://datagrok.ai/help/develop/develop#packages) for the [Datagrok](https://datagrok.ai) platform",
|
|
10
10
|
"repository": {
|
|
11
11
|
"type": "git",
|
|
@@ -13,18 +13,18 @@
|
|
|
13
13
|
"directory": "packages/Bio"
|
|
14
14
|
},
|
|
15
15
|
"dependencies": {
|
|
16
|
-
"@biowasm/aioli": "
|
|
17
|
-
"@datagrok-libraries/bio": "^
|
|
18
|
-
"@datagrok-libraries/chem-meta": "1.0.
|
|
16
|
+
"@biowasm/aioli": "^3.1.0",
|
|
17
|
+
"@datagrok-libraries/bio": "^5.0.0",
|
|
18
|
+
"@datagrok-libraries/chem-meta": "1.0.1",
|
|
19
19
|
"@datagrok-libraries/ml": "^6.2.0",
|
|
20
|
-
"@datagrok-libraries/utils": "^1.
|
|
20
|
+
"@datagrok-libraries/utils": "^1.10.1",
|
|
21
21
|
"@deck.gl/core": "^8.7.5",
|
|
22
22
|
"@deck.gl/layers": "^8.7.5",
|
|
23
23
|
"@luma.gl/constants": "^8.5.10",
|
|
24
24
|
"@luma.gl/core": "^8.5.10",
|
|
25
|
-
"@phylocanvas/phylocanvas.gl": "^1.
|
|
26
|
-
"cash-dom": "
|
|
27
|
-
"datagrok-api": "^1.
|
|
25
|
+
"@phylocanvas/phylocanvas.gl": "^1.44.0",
|
|
26
|
+
"cash-dom": "^8.0.0",
|
|
27
|
+
"datagrok-api": "^1.7.0",
|
|
28
28
|
"dayjs": "^1.11.4",
|
|
29
29
|
"openchemlib": "6.0.1",
|
|
30
30
|
"rxjs": "^6.5.5",
|
|
@@ -36,9 +36,9 @@
|
|
|
36
36
|
"@types/node": "^17.0.24",
|
|
37
37
|
"@types/node-fetch": "^2.6.2",
|
|
38
38
|
"@types/wu": "latest",
|
|
39
|
-
"@typescript-eslint/eslint-plugin": "
|
|
40
|
-
"@typescript-eslint/parser": "
|
|
41
|
-
"eslint": "
|
|
39
|
+
"@typescript-eslint/eslint-plugin": "^4.20.0",
|
|
40
|
+
"@typescript-eslint/parser": "^4.20.0",
|
|
41
|
+
"eslint": "^7.23.0",
|
|
42
42
|
"eslint-config-google": "latest",
|
|
43
43
|
"jest": "^27.5.1",
|
|
44
44
|
"jest-html-reporter": "^3.6.0",
|
|
@@ -47,9 +47,9 @@
|
|
|
47
47
|
"puppeteer": "^13.7.0",
|
|
48
48
|
"ts-jest": "^27.0.0",
|
|
49
49
|
"ts-loader": "^9.2.5",
|
|
50
|
-
"typescript": "^4.
|
|
51
|
-
"webpack": "
|
|
52
|
-
"webpack-cli": "^4.
|
|
50
|
+
"typescript": "^4.2.3",
|
|
51
|
+
"webpack": "^5.64.1",
|
|
52
|
+
"webpack-cli": "^4.6.0"
|
|
53
53
|
},
|
|
54
54
|
"scripts": {
|
|
55
55
|
"link-api": "npm link datagrok-api",
|
|
@@ -1,12 +1,12 @@
|
|
|
1
|
-
import
|
|
2
|
-
import * as DG from 'datagrok-api/dg';
|
|
1
|
+
import * as grok from 'datagrok-api/grok';
|
|
3
2
|
import * as ui from 'datagrok-api/ui';
|
|
3
|
+
import * as DG from 'datagrok-api/dg';
|
|
4
|
+
import * as bio from '@datagrok-libraries/bio';
|
|
5
|
+
|
|
6
|
+
import {ITooltipAndPanelParams} from '@datagrok-libraries/ml/src/viewers/activity-cliffs';
|
|
4
7
|
import {getSimilarityFromDistance} from '@datagrok-libraries/utils/src/similarity-metrics';
|
|
5
8
|
import {AvailableMetrics} from '@datagrok-libraries/ml/src/typed-metrics';
|
|
6
|
-
import
|
|
7
|
-
import {SplitterFunc, WebLogo} from '@datagrok-libraries/bio/src/viewers/web-logo';
|
|
8
|
-
import {UnitsHandler} from '@datagrok-libraries/bio/src/utils/units-handler';
|
|
9
|
-
import {SEM_TYPES, TAGS} from '../utils/constants';
|
|
9
|
+
import {TAGS} from '../utils/constants';
|
|
10
10
|
import {drawMoleculeDifferenceOnCanvas} from '../utils/cell-renderer';
|
|
11
11
|
|
|
12
12
|
export async function getDistances(col: DG.Column, seq: string): Promise<Array<number>> {
|
|
@@ -14,7 +14,7 @@ export async function getDistances(col: DG.Column, seq: string): Promise<Array<n
|
|
|
14
14
|
const distances = new Array(stringArray.length).fill(0);
|
|
15
15
|
for (let i = 0; i < stringArray.length; ++i) {
|
|
16
16
|
const distance = stringArray[i] ? AvailableMetrics['String']['Levenshtein'](stringArray[i], seq) : null;
|
|
17
|
-
distances[i] = distance ? distance/Math.max((stringArray[i] as string).length, seq.length) : null;
|
|
17
|
+
distances[i] = distance ? distance / Math.max((stringArray[i] as string).length, seq.length) : null;
|
|
18
18
|
}
|
|
19
19
|
return distances;
|
|
20
20
|
}
|
|
@@ -61,7 +61,7 @@ export function createTooltipElement(params: ITooltipAndPanelParams): HTMLDivEle
|
|
|
61
61
|
}
|
|
62
62
|
|
|
63
63
|
function moleculeInfo(df: DG.DataFrame, idx: number, seqColName: string): HTMLElement {
|
|
64
|
-
const dict: {[key: string]: string} = {};
|
|
64
|
+
const dict: { [key: string]: string } = {};
|
|
65
65
|
for (const col of df.columns) {
|
|
66
66
|
if (col.name !== seqColName)
|
|
67
67
|
dict[col.name] = df.get(col.name, idx);
|
|
@@ -82,10 +82,10 @@ export function createPropPanelElement(params: ITooltipAndPanelParams): HTMLDivE
|
|
|
82
82
|
activitiesArray[idx] = params.activityCol.get(molIdx);
|
|
83
83
|
});
|
|
84
84
|
|
|
85
|
-
const molDifferences: {[key: number]: HTMLCanvasElement} = {};
|
|
85
|
+
const molDifferences: { [key: number]: HTMLCanvasElement } = {};
|
|
86
86
|
const units = params.seqCol.getTag(DG.TAGS.UNITS);
|
|
87
87
|
const separator = params.seqCol.getTag(TAGS.SEPARATOR);
|
|
88
|
-
const splitter =
|
|
88
|
+
const splitter = bio.getSplitter(units, separator);
|
|
89
89
|
const subParts1 = splitter(sequencesArray[0]);
|
|
90
90
|
const subParts2 = splitter(sequencesArray[1]);
|
|
91
91
|
const canvas = createDifferenceCanvas(subParts1, subParts2, units, molDifferences);
|
|
@@ -1,14 +1,15 @@
|
|
|
1
1
|
import * as grok from 'datagrok-api/grok';
|
|
2
2
|
import * as ui from 'datagrok-api/ui';
|
|
3
3
|
import * as DG from 'datagrok-api/dg';
|
|
4
|
+
import * as bio from '@datagrok-libraries/bio';
|
|
5
|
+
|
|
4
6
|
import {SequenceSearchBaseViewer} from './sequence-search-base-viewer';
|
|
5
7
|
import {getMonomericMols} from '../calculations/monomerLevelMols';
|
|
6
8
|
import * as C from '../utils/constants';
|
|
7
9
|
import {createDifferenceCanvas, createDifferencesWithPositions} from './sequence-activity-cliffs';
|
|
8
10
|
import {updateDivInnerHTML} from '../utils/ui-utils';
|
|
9
|
-
import {WebLogo} from '@datagrok-libraries/bio/src/viewers/web-logo';
|
|
10
11
|
import {TableView} from 'datagrok-api/dg';
|
|
11
|
-
import {
|
|
12
|
+
import {Subject} from 'rxjs';
|
|
12
13
|
|
|
13
14
|
export class SequenceSimilarityViewer extends SequenceSearchBaseViewer {
|
|
14
15
|
hotSearch: boolean;
|
|
@@ -84,7 +85,7 @@ export class SequenceSimilarityViewer extends SequenceSearchBaseViewer {
|
|
|
84
85
|
const molDifferences: { [key: number]: HTMLCanvasElement } = {};
|
|
85
86
|
const units = resDf.col('sequence')!.getTag(DG.TAGS.UNITS);
|
|
86
87
|
const separator = resDf.col('sequence')!.getTag(C.TAGS.SEPARATOR);
|
|
87
|
-
const splitter =
|
|
88
|
+
const splitter = bio.getSplitter(units, separator);
|
|
88
89
|
const subParts1 = splitter(this.moleculeColumn!.get(this.targetMoleculeIdx));
|
|
89
90
|
const subParts2 = splitter(resDf.get('sequence', resDf.currentRowIdx));
|
|
90
91
|
const canvas = createDifferenceCanvas(subParts1, subParts2, units, molDifferences);
|
|
@@ -1,19 +1,21 @@
|
|
|
1
1
|
import * as grok from 'datagrok-api/grok';
|
|
2
|
+
import * as ui from 'datagrok-api/ui';
|
|
2
3
|
import * as DG from 'datagrok-api/dg';
|
|
4
|
+
import * as bio from '@datagrok-libraries/bio';
|
|
5
|
+
|
|
3
6
|
import * as C from '../utils/constants';
|
|
4
7
|
import {getHelmMonomers} from '../package';
|
|
5
|
-
import {WebLogo} from '@datagrok-libraries/bio/src/viewers/web-logo';
|
|
6
8
|
|
|
7
9
|
const V2000_ATOM_NAME_POS = 31;
|
|
8
10
|
|
|
9
11
|
export async function getMonomericMols(mcol: DG.Column, pattern: boolean = false): Promise<DG.Column> {
|
|
10
12
|
const separator: string = mcol.tags[C.TAGS.SEPARATOR];
|
|
11
13
|
const units: string = mcol.tags[DG.TAGS.UNITS];
|
|
12
|
-
const splitter =
|
|
14
|
+
const splitter = bio.getSplitter(units, separator);
|
|
13
15
|
let molV3000Array;
|
|
14
16
|
const monomersDict = new Map();
|
|
15
17
|
const monomers = units === 'helm' ?
|
|
16
|
-
getHelmMonomers(mcol) : Object.keys(
|
|
18
|
+
getHelmMonomers(mcol) : Object.keys(bio.getStats(mcol, 0, splitter).freq).filter((it) => it !== '');
|
|
17
19
|
|
|
18
20
|
for (let i = 0; i < monomers.length; i++)
|
|
19
21
|
monomersDict.set(monomers[i], `${i + 1}`);
|
package/src/package-test.ts
CHANGED
|
@@ -12,6 +12,7 @@ import './tests/splitters-test';
|
|
|
12
12
|
import './tests/renderers-test';
|
|
13
13
|
import './tests/convert-test';
|
|
14
14
|
import './tests/fasta-handler-test';
|
|
15
|
+
import './tests/fasta-export-tests';
|
|
15
16
|
import './tests/WebLogo-positions-test';
|
|
16
17
|
import './tests/checkInputColumn-tests';
|
|
17
18
|
import './tests/similarity-diversity-tests';
|
package/src/package.ts
CHANGED
|
@@ -2,18 +2,15 @@
|
|
|
2
2
|
import * as grok from 'datagrok-api/grok';
|
|
3
3
|
import * as ui from 'datagrok-api/ui';
|
|
4
4
|
import * as DG from 'datagrok-api/dg';
|
|
5
|
+
import * as bio from '@datagrok-libraries/bio';
|
|
5
6
|
|
|
6
7
|
export const _package = new DG.Package();
|
|
7
8
|
|
|
8
9
|
import {MacromoleculeDifferenceCellRenderer, MonomerCellRenderer} from './utils/cell-renderer';
|
|
9
|
-
import {WebLogo, SeqColStats} from '@datagrok-libraries/bio/src/viewers/web-logo';
|
|
10
10
|
import {VdRegionsViewer} from './viewers/vd-regions-viewer';
|
|
11
11
|
import {runKalign, testMSAEnoughMemory} from './utils/multiple-sequence-alignment';
|
|
12
12
|
import {SequenceAlignment, Aligned} from './seq_align';
|
|
13
|
-
import {Nucleotides} from '@datagrok-libraries/bio/src/nucleotides';
|
|
14
|
-
import {Aminoacids} from '@datagrok-libraries/bio/src/aminoacids';
|
|
15
13
|
import {getEmbeddingColsNames, sequenceSpace} from './analysis/sequence-space';
|
|
16
|
-
import {AvailableMetrics} from '@datagrok-libraries/ml/src/typed-metrics';
|
|
17
14
|
import {getActivityCliffs} from '@datagrok-libraries/ml/src/viewers/activity-cliffs';
|
|
18
15
|
import {createPropPanelElement, createTooltipElement, getSimilaritiesMarix} from './analysis/sequence-activity-cliffs';
|
|
19
16
|
import {createJsonMonomerLibFromSdf, encodeMonomers, getMolfilesFromSeq, HELM_CORE_LIB_FILENAME} from './utils/utils';
|
|
@@ -21,8 +18,6 @@ import {getMacroMol} from './utils/atomic-works';
|
|
|
21
18
|
import {MacromoleculeSequenceCellRenderer} from './utils/cell-renderer';
|
|
22
19
|
import {convert} from './utils/convert';
|
|
23
20
|
import {getMacroMolColumnPropertyPanel, representationsWidget} from './widgets/representations';
|
|
24
|
-
import {UnitsHandler} from '@datagrok-libraries/bio/src/utils/units-handler';
|
|
25
|
-
import {FastaFileHandler} from '@datagrok-libraries/bio/src/utils/fasta-handler';
|
|
26
21
|
import {removeEmptyStringRows} from '@datagrok-libraries/utils/src/dataframe-utils';
|
|
27
22
|
import {
|
|
28
23
|
generateManySequences,
|
|
@@ -34,7 +29,8 @@ import {splitAlignedSequences} from '@datagrok-libraries/bio/src/utils/splitter'
|
|
|
34
29
|
import * as C from './utils/constants';
|
|
35
30
|
import {SequenceSimilarityViewer} from './analysis/sequence-similarity-viewer';
|
|
36
31
|
import {SequenceDiversityViewer} from './analysis/sequence-diversity-viewer';
|
|
37
|
-
import {
|
|
32
|
+
import {substructureSearchDialog} from './substructure-search/substructure-search';
|
|
33
|
+
import {saveAsFastaUI} from './utils/save-as-fasta';
|
|
38
34
|
|
|
39
35
|
//tags: init
|
|
40
36
|
export async function initBio() {
|
|
@@ -91,7 +87,7 @@ export function checkInputColumn(
|
|
|
91
87
|
let res: boolean = true;
|
|
92
88
|
let msg: string = '';
|
|
93
89
|
|
|
94
|
-
const uh = new UnitsHandler(col);
|
|
90
|
+
const uh = new bio.UnitsHandler(col);
|
|
95
91
|
if (col.semType !== DG.SEMTYPE.MACROMOLECULE) {
|
|
96
92
|
grok.shell.warning(name + ' analysis is allowed for Macromolecules semantic type');
|
|
97
93
|
res = false;
|
|
@@ -141,7 +137,7 @@ export function sequenceAlignment(alignType: string, alignTable: string, gap: nu
|
|
|
141
137
|
//tags: viewer, panel
|
|
142
138
|
//output: viewer result
|
|
143
139
|
export function webLogoViewer() {
|
|
144
|
-
return new WebLogo();
|
|
140
|
+
return new bio.WebLogo();
|
|
145
141
|
}
|
|
146
142
|
|
|
147
143
|
//name: VdRegions
|
|
@@ -307,7 +303,7 @@ export async function compositionAnalysis(): Promise<void> {
|
|
|
307
303
|
if (col.semType != DG.SEMTYPE.MACROMOLECULE)
|
|
308
304
|
return false;
|
|
309
305
|
|
|
310
|
-
const colUH = new UnitsHandler(col);
|
|
306
|
+
const colUH = new bio.UnitsHandler(col);
|
|
311
307
|
// TODO: prevent for cyclic, branched or multiple chains in Helm
|
|
312
308
|
return true;
|
|
313
309
|
});
|
|
@@ -326,7 +322,7 @@ export async function compositionAnalysis(): Promise<void> {
|
|
|
326
322
|
return;
|
|
327
323
|
} else if (colList.length > 1) {
|
|
328
324
|
const colListNames: string [] = colList.map((col) => col.name);
|
|
329
|
-
const selectedCol = colList.find((c) => { return (new UnitsHandler(c)).isMsa(); });
|
|
325
|
+
const selectedCol = colList.find((c) => { return (new bio.UnitsHandler(c)).isMsa(); });
|
|
330
326
|
const colInput: DG.InputBase = ui.choiceInput(
|
|
331
327
|
'Column', selectedCol ? selectedCol.name : colListNames[0], colListNames);
|
|
332
328
|
ui.dialog({
|
|
@@ -378,7 +374,7 @@ export async function peptideMolecule(macroMolecule: DG.Cell): Promise<DG.Widget
|
|
|
378
374
|
//input: string fileContent
|
|
379
375
|
//output: list tables
|
|
380
376
|
export function importFasta(fileContent: string): DG.DataFrame [] {
|
|
381
|
-
const ffh = new FastaFileHandler(fileContent);
|
|
377
|
+
const ffh = new bio.FastaFileHandler(fileContent);
|
|
382
378
|
return ffh.importFasta();
|
|
383
379
|
}
|
|
384
380
|
|
|
@@ -453,7 +449,7 @@ export async function testDetectMacromolecule(path: string): Promise<DG.DataFram
|
|
|
453
449
|
//tags: panel, bio
|
|
454
450
|
//input: column col {semType: Macromolecule}
|
|
455
451
|
export function splitToMonomers(col: DG.Column<string>): void {
|
|
456
|
-
if (!col.getTag(UnitsHandler.TAGS.aligned).includes(C.MSA))
|
|
452
|
+
if (!col.getTag(bio.UnitsHandler.TAGS.aligned).includes(C.MSA))
|
|
457
453
|
return grok.shell.error('Splitting is applicable only for aligned sequences');
|
|
458
454
|
|
|
459
455
|
const tempDf = splitAlignedSequences(col);
|
|
@@ -470,7 +466,7 @@ export function splitToMonomers(col: DG.Column<string>): void {
|
|
|
470
466
|
//name: Bio: getHelmMonomers
|
|
471
467
|
//input: column col {semType: Macromolecule}
|
|
472
468
|
export function getHelmMonomers(seqCol: DG.Column<string>): string[] {
|
|
473
|
-
const stats =
|
|
469
|
+
const stats = bio.getStats(seqCol, 1, bio.splitterAsHelm);
|
|
474
470
|
return Object.keys(stats.freq);
|
|
475
471
|
}
|
|
476
472
|
|
|
@@ -516,3 +512,9 @@ export function bioSubstructureSearch(col: DG.Column): void {
|
|
|
516
512
|
substructureSearchDialog(col);
|
|
517
513
|
}
|
|
518
514
|
|
|
515
|
+
//name: saveAsFasta
|
|
516
|
+
//description: As FASTA...
|
|
517
|
+
//tags: fileExporter
|
|
518
|
+
export function saveAsFasta() {
|
|
519
|
+
saveAsFastaUI();
|
|
520
|
+
}
|
|
@@ -1,9 +1,9 @@
|
|
|
1
|
-
import {after, before, category, test, expect, expectObject, delay} from '@datagrok-libraries/utils/src/test';
|
|
2
|
-
|
|
3
1
|
import * as grok from 'datagrok-api/grok';
|
|
4
2
|
import * as ui from 'datagrok-api/ui';
|
|
5
3
|
import * as DG from 'datagrok-api/dg';
|
|
6
|
-
import
|
|
4
|
+
import * as bio from '@datagrok-libraries/bio';
|
|
5
|
+
|
|
6
|
+
import {after, before, category, test, expect, expectObject, delay} from '@datagrok-libraries/utils/src/test';
|
|
7
7
|
import {Column} from 'datagrok-api/dg';
|
|
8
8
|
import {ALPHABET, NOTATION, UnitsHandler} from '@datagrok-libraries/bio/src/utils/units-handler';
|
|
9
9
|
|
|
@@ -41,27 +41,27 @@ ATC-G-TTGC--
|
|
|
41
41
|
seqCol.setTag(DG.TAGS.UNITS, NOTATION.FASTA);
|
|
42
42
|
seqCol.setTag(UnitsHandler.TAGS.alphabet, ALPHABET.DNA);
|
|
43
43
|
|
|
44
|
-
const wlViewer: WebLogo = await df.plot.fromType('WebLogo') as
|
|
44
|
+
const wlViewer: bio.WebLogo = (await df.plot.fromType('WebLogo')) as bio.WebLogo;
|
|
45
45
|
tv.dockManager.dock(wlViewer.root, DG.DOCK_TYPE.DOWN);
|
|
46
46
|
|
|
47
47
|
tvList.push(tv);
|
|
48
48
|
dfList.push(df);
|
|
49
49
|
|
|
50
|
-
const positions: PositionInfo[] = wlViewer['positions'];
|
|
51
|
-
|
|
52
|
-
const resAllDf1: PositionInfo[] = [
|
|
53
|
-
new PositionInfo('1', {'A': new PositionMonomerInfo(2), '-': new PositionMonomerInfo(3)}),
|
|
54
|
-
new PositionInfo('2', {'T': new PositionMonomerInfo(5)}),
|
|
55
|
-
new PositionInfo('3', {'C': new PositionMonomerInfo(5)}),
|
|
56
|
-
new PositionInfo('4', {'-': new PositionMonomerInfo(5)}),
|
|
57
|
-
new PositionInfo('5', {'G': new PositionMonomerInfo(5)}),
|
|
58
|
-
new PositionInfo('6', {'-': new PositionMonomerInfo(3), 'C': new PositionMonomerInfo(2)}),
|
|
59
|
-
new PositionInfo('7', {'T': new PositionMonomerInfo(5)}),
|
|
60
|
-
new PositionInfo('8', {'T': new PositionMonomerInfo(5)}),
|
|
61
|
-
new PositionInfo('9', {'G': new PositionMonomerInfo(5)}),
|
|
62
|
-
new PositionInfo('10', {'C': new PositionMonomerInfo(5)}),
|
|
63
|
-
new PositionInfo('11', {'-': new PositionMonomerInfo(5)}),
|
|
64
|
-
new PositionInfo('12', {'-': new PositionMonomerInfo(5)})
|
|
50
|
+
const positions: bio.PositionInfo[] = wlViewer['positions'];
|
|
51
|
+
|
|
52
|
+
const resAllDf1: bio.PositionInfo[] = [
|
|
53
|
+
new bio.PositionInfo('1', {'A': new bio.PositionMonomerInfo(2), '-': new bio.PositionMonomerInfo(3)}),
|
|
54
|
+
new bio.PositionInfo('2', {'T': new bio.PositionMonomerInfo(5)}),
|
|
55
|
+
new bio.PositionInfo('3', {'C': new bio.PositionMonomerInfo(5)}),
|
|
56
|
+
new bio.PositionInfo('4', {'-': new bio.PositionMonomerInfo(5)}),
|
|
57
|
+
new bio.PositionInfo('5', {'G': new bio.PositionMonomerInfo(5)}),
|
|
58
|
+
new bio.PositionInfo('6', {'-': new bio.PositionMonomerInfo(3), 'C': new bio.PositionMonomerInfo(2)}),
|
|
59
|
+
new bio.PositionInfo('7', {'T': new bio.PositionMonomerInfo(5)}),
|
|
60
|
+
new bio.PositionInfo('8', {'T': new bio.PositionMonomerInfo(5)}),
|
|
61
|
+
new bio.PositionInfo('9', {'G': new bio.PositionMonomerInfo(5)}),
|
|
62
|
+
new bio.PositionInfo('10', {'C': new bio.PositionMonomerInfo(5)}),
|
|
63
|
+
new bio.PositionInfo('11', {'-': new bio.PositionMonomerInfo(5)}),
|
|
64
|
+
new bio.PositionInfo('12', {'-': new bio.PositionMonomerInfo(5)}),
|
|
65
65
|
];
|
|
66
66
|
|
|
67
67
|
expect(positions.length, resAllDf1.length);
|
|
@@ -94,25 +94,25 @@ ATC-G-TTGC--
|
|
|
94
94
|
return i > 2;
|
|
95
95
|
});
|
|
96
96
|
df.filter.fireChanged();
|
|
97
|
-
const wlViewer: WebLogo = await df.plot.fromType('WebLogo',
|
|
98
|
-
|
|
97
|
+
const wlViewer: bio.WebLogo = (await df.plot.fromType('WebLogo',
|
|
98
|
+
{'shrinkEmptyTail': true})) as bio.WebLogo;
|
|
99
99
|
tv.dockManager.dock(wlViewer.root, DG.DOCK_TYPE.DOWN);
|
|
100
100
|
|
|
101
101
|
tvList.push(tv);
|
|
102
102
|
dfList.push(df);
|
|
103
103
|
|
|
104
|
-
const positions: PositionInfo[] = wlViewer['positions'];
|
|
105
|
-
|
|
106
|
-
const resAllDf1: PositionInfo[] = [
|
|
107
|
-
new PositionInfo('1', {'-': new PositionMonomerInfo(3)}),
|
|
108
|
-
new PositionInfo('2', {'T': new PositionMonomerInfo(3)}),
|
|
109
|
-
new PositionInfo('3', {'-': new PositionMonomerInfo(3)}),
|
|
110
|
-
new PositionInfo('4', {'-': new PositionMonomerInfo(3)}),
|
|
111
|
-
new PositionInfo('5', {'C': new PositionMonomerInfo(3)}),
|
|
112
|
-
new PositionInfo('6', {'-': new PositionMonomerInfo(2), 'C': new PositionMonomerInfo(1)}),
|
|
113
|
-
new PositionInfo('7', {'G': new PositionMonomerInfo(3)}),
|
|
114
|
-
new PositionInfo('8', {'T': new PositionMonomerInfo(3)}),
|
|
115
|
-
new PositionInfo('9', {'-': new PositionMonomerInfo(3)}),
|
|
104
|
+
const positions: bio.PositionInfo[] = wlViewer['positions'];
|
|
105
|
+
|
|
106
|
+
const resAllDf1: bio.PositionInfo[] = [
|
|
107
|
+
new bio.PositionInfo('1', {'-': new bio.PositionMonomerInfo(3)}),
|
|
108
|
+
new bio.PositionInfo('2', {'T': new bio.PositionMonomerInfo(3)}),
|
|
109
|
+
new bio.PositionInfo('3', {'-': new bio.PositionMonomerInfo(3)}),
|
|
110
|
+
new bio.PositionInfo('4', {'-': new bio.PositionMonomerInfo(3)}),
|
|
111
|
+
new bio.PositionInfo('5', {'C': new bio.PositionMonomerInfo(3)}),
|
|
112
|
+
new bio.PositionInfo('6', {'-': new bio.PositionMonomerInfo(2), 'C': new bio.PositionMonomerInfo(1)}),
|
|
113
|
+
new bio.PositionInfo('7', {'G': new bio.PositionMonomerInfo(3)}),
|
|
114
|
+
new bio.PositionInfo('8', {'T': new bio.PositionMonomerInfo(3)}),
|
|
115
|
+
new bio.PositionInfo('9', {'-': new bio.PositionMonomerInfo(3)}),
|
|
116
116
|
];
|
|
117
117
|
|
|
118
118
|
expect(positions.length, resAllDf1.length);
|
|
@@ -134,24 +134,25 @@ ATC-G-TTGC--
|
|
|
134
134
|
seqCol.setTag(DG.TAGS.UNITS, NOTATION.FASTA);
|
|
135
135
|
seqCol.setTag(UnitsHandler.TAGS.alphabet, ALPHABET.DNA);
|
|
136
136
|
|
|
137
|
-
const wlViewer: WebLogo = await df.plot.fromType('WebLogo',
|
|
137
|
+
const wlViewer: bio.WebLogo = (await df.plot.fromType('WebLogo',
|
|
138
|
+
{'skipEmptyPositions': true})) as bio.WebLogo;
|
|
138
139
|
tv.dockManager.dock(wlViewer.root, DG.DOCK_TYPE.DOWN);
|
|
139
140
|
|
|
140
141
|
tvList.push(tv);
|
|
141
142
|
dfList.push(df);
|
|
142
143
|
|
|
143
|
-
const positions: PositionInfo[] = wlViewer['positions'];
|
|
144
|
-
|
|
145
|
-
const resAllDf1: PositionInfo[] = [
|
|
146
|
-
new PositionInfo('1', {'A': new PositionMonomerInfo(2), '-': new PositionMonomerInfo(3)}),
|
|
147
|
-
new PositionInfo('2', {'T': new PositionMonomerInfo(5)}),
|
|
148
|
-
new PositionInfo('3', {'C': new PositionMonomerInfo(5)}),
|
|
149
|
-
new PositionInfo('5', {'G': new PositionMonomerInfo(5)}),
|
|
150
|
-
new PositionInfo('6', {'-': new PositionMonomerInfo(3), 'C': new PositionMonomerInfo(2)}),
|
|
151
|
-
new PositionInfo('7', {'T': new PositionMonomerInfo(5)}),
|
|
152
|
-
new PositionInfo('8', {'T': new PositionMonomerInfo(5)}),
|
|
153
|
-
new PositionInfo('9', {'G': new PositionMonomerInfo(5)}),
|
|
154
|
-
new PositionInfo('10', {'C': new PositionMonomerInfo(5)})
|
|
144
|
+
const positions: bio.PositionInfo[] = wlViewer['positions'];
|
|
145
|
+
|
|
146
|
+
const resAllDf1: bio.PositionInfo[] = [
|
|
147
|
+
new bio.PositionInfo('1', {'A': new bio.PositionMonomerInfo(2), '-': new bio.PositionMonomerInfo(3)}),
|
|
148
|
+
new bio.PositionInfo('2', {'T': new bio.PositionMonomerInfo(5)}),
|
|
149
|
+
new bio.PositionInfo('3', {'C': new bio.PositionMonomerInfo(5)}),
|
|
150
|
+
new bio.PositionInfo('5', {'G': new bio.PositionMonomerInfo(5)}),
|
|
151
|
+
new bio.PositionInfo('6', {'-': new bio.PositionMonomerInfo(3), 'C': new bio.PositionMonomerInfo(2)}),
|
|
152
|
+
new bio.PositionInfo('7', {'T': new bio.PositionMonomerInfo(5)}),
|
|
153
|
+
new bio.PositionInfo('8', {'T': new bio.PositionMonomerInfo(5)}),
|
|
154
|
+
new bio.PositionInfo('9', {'G': new bio.PositionMonomerInfo(5)}),
|
|
155
|
+
new bio.PositionInfo('10', {'C': new bio.PositionMonomerInfo(5)})
|
|
155
156
|
];
|
|
156
157
|
|
|
157
158
|
expect(positions.length, resAllDf1.length);
|
|
@@ -1,9 +1,9 @@
|
|
|
1
|
-
import {after, before, category, test, expect, expectObject} from '@datagrok-libraries/utils/src/test';
|
|
2
|
-
|
|
3
1
|
import * as grok from 'datagrok-api/grok';
|
|
4
2
|
import * as ui from 'datagrok-api/ui';
|
|
5
3
|
import * as DG from 'datagrok-api/dg';
|
|
4
|
+
import * as bio from '@datagrok-libraries/bio';
|
|
6
5
|
|
|
6
|
+
import {after, before, category, test, expect, expectObject} from '@datagrok-libraries/utils/src/test';
|
|
7
7
|
import {Nucleotides, NucleotidesPalettes} from '@datagrok-libraries/bio/src/nucleotides';
|
|
8
8
|
import {Aminoacids, AminoacidsPalettes} from '@datagrok-libraries/bio/src/aminoacids';
|
|
9
9
|
import {WebLogo} from '@datagrok-libraries/bio/src/viewers/web-logo';
|
|
@@ -63,19 +63,19 @@ XZJ{}2
|
|
|
63
63
|
|
|
64
64
|
category('WebLogo.monomerToShort', () => {
|
|
65
65
|
test('longMonomerSingle', async () => {
|
|
66
|
-
await expect(
|
|
66
|
+
await expect(bio.monomerToShort('S', 5), 'S');
|
|
67
67
|
});
|
|
68
68
|
test('longMonomerShort', async () => {
|
|
69
|
-
await expect(
|
|
69
|
+
await expect(bio.monomerToShort('Short', 5), 'Short');
|
|
70
70
|
});
|
|
71
71
|
test('longMonomerLong56', async () => {
|
|
72
|
-
await expect(
|
|
72
|
+
await expect(bio.monomerToShort('Long56', 5), 'Long5…');
|
|
73
73
|
});
|
|
74
74
|
test('longMonomerComplexFirstPartShort', async () => {
|
|
75
|
-
await expect(
|
|
75
|
+
await expect(bio.monomerToShort('Long-long', 5), 'Long…');
|
|
76
76
|
});
|
|
77
77
|
test('longMonomerComplexFirstPartLong56', async () => {
|
|
78
|
-
await expect(
|
|
78
|
+
await expect(bio.monomerToShort('Long56-long', 5), 'Long5…');
|
|
79
79
|
});
|
|
80
80
|
});
|
|
81
81
|
|
|
@@ -83,7 +83,7 @@ category('WebLogo.monomerToShort', () => {
|
|
|
83
83
|
export async function _testGetStats(csvDfN1: string) {
|
|
84
84
|
const dfN1: DG.DataFrame = DG.DataFrame.fromCsv(csvDfN1);
|
|
85
85
|
const seqCol: DG.Column = dfN1.col('seq')!;
|
|
86
|
-
const stats =
|
|
86
|
+
const stats = bio.getStats(seqCol, 5, bio.splitterAsFasta);
|
|
87
87
|
|
|
88
88
|
expectObject(stats.freq, {
|
|
89
89
|
'A': 4,
|
|
@@ -103,7 +103,7 @@ export async function _testGetAlphabetSimilarity() {
|
|
|
103
103
|
'-': 1000
|
|
104
104
|
};
|
|
105
105
|
const alphabet: Set<string> = new Set(Object.keys(Nucleotides.Names));
|
|
106
|
-
const res =
|
|
106
|
+
const res = bio.getAlphabetSimilarity(freq, alphabet);
|
|
107
107
|
|
|
108
108
|
expect(res > 0.6, true);
|
|
109
109
|
}
|
|
@@ -111,7 +111,7 @@ export async function _testGetAlphabetSimilarity() {
|
|
|
111
111
|
export async function _testPickupPaletteN1(csvDfN1: string) {
|
|
112
112
|
const df: DG.DataFrame = DG.DataFrame.fromCsv(csvDfN1);
|
|
113
113
|
const col: DG.Column = df.col('seq')!;
|
|
114
|
-
const cp =
|
|
114
|
+
const cp = bio.pickUpPalette(col);
|
|
115
115
|
|
|
116
116
|
expect(cp instanceof NucleotidesPalettes, true);
|
|
117
117
|
}
|
|
@@ -119,7 +119,7 @@ export async function _testPickupPaletteN1(csvDfN1: string) {
|
|
|
119
119
|
export async function _testPickupPaletteN1e(csvDfN1e: string) {
|
|
120
120
|
const df: DG.DataFrame = DG.DataFrame.fromCsv(csvDfN1e);
|
|
121
121
|
const col: DG.Column = df.col('seq')!;
|
|
122
|
-
const cp =
|
|
122
|
+
const cp = bio.pickUpPalette(col);
|
|
123
123
|
|
|
124
124
|
expect(cp instanceof NucleotidesPalettes, true);
|
|
125
125
|
}
|
|
@@ -127,7 +127,7 @@ export async function _testPickupPaletteN1e(csvDfN1e: string) {
|
|
|
127
127
|
export async function _testPickupPaletteAA1(csvDfAA1: string) {
|
|
128
128
|
const df: DG.DataFrame = DG.DataFrame.fromCsv(csvDfAA1);
|
|
129
129
|
const col: DG.Column = df.col('seq')!;
|
|
130
|
-
const cp =
|
|
130
|
+
const cp = bio.pickUpPalette(col);
|
|
131
131
|
|
|
132
132
|
expect(cp instanceof AminoacidsPalettes, true);
|
|
133
133
|
}
|
|
@@ -135,14 +135,14 @@ export async function _testPickupPaletteAA1(csvDfAA1: string) {
|
|
|
135
135
|
export async function _testPickupPaletteX(csvDfX: string) {
|
|
136
136
|
const df: DG.DataFrame = DG.DataFrame.fromCsv(csvDfX);
|
|
137
137
|
const col: DG.Column = df.col('seq')!;
|
|
138
|
-
const cp =
|
|
138
|
+
const cp = bio.pickUpPalette(col);
|
|
139
139
|
|
|
140
140
|
expect(cp instanceof UnknownSeqPalette, true);
|
|
141
141
|
}
|
|
142
142
|
|
|
143
143
|
export async function _testPickupPaletteAA2(dfAA2: DG.DataFrame) {
|
|
144
144
|
const seqCol: DG.Column = dfAA2.col('seq')!;
|
|
145
|
-
const cp =
|
|
145
|
+
const cp = bio.pickUpPalette(seqCol);
|
|
146
146
|
|
|
147
147
|
expect(cp instanceof AminoacidsPalettes, true);
|
|
148
148
|
}
|
|
@@ -1,7 +1,8 @@
|
|
|
1
|
-
import {category, expectArray, test} from '@datagrok-libraries/utils/src/test';
|
|
2
|
-
|
|
3
|
-
import * as grok from 'datagrok-api/grok';
|
|
4
1
|
import * as DG from 'datagrok-api/dg';
|
|
2
|
+
import * as ui from 'datagrok-api/ui';
|
|
3
|
+
import * as grok from 'datagrok-api/grok';
|
|
4
|
+
|
|
5
|
+
import {category, expect, expectArray, test} from '@datagrok-libraries/utils/src/test';
|
|
5
6
|
|
|
6
7
|
import {ConverterFunc} from './types';
|
|
7
8
|
import {NotationConverter} from '@datagrok-libraries/bio/src/utils/notation-converter';
|
|
@@ -139,6 +140,7 @@ RNA1{P.R(U)P.R(U)P.R(C)P.R(A)P.R(A)P.R(C)P.P.P}$$$
|
|
|
139
140
|
return function(srcCol: DG.Column): DG.Column {
|
|
140
141
|
const converter = new NotationConverter(srcCol);
|
|
141
142
|
const resCol = converter.convert(tgtNotation, tgtSeparator);
|
|
143
|
+
expect(resCol.getTag('units'), tgtNotation);
|
|
142
144
|
return resCol;
|
|
143
145
|
};
|
|
144
146
|
};
|
|
@@ -0,0 +1,110 @@
|
|
|
1
|
+
import * as DG from 'datagrok-api/dg';
|
|
2
|
+
import * as ui from 'datagrok-api/ui';
|
|
3
|
+
import * as grok from 'datagrok-api/grok';
|
|
4
|
+
import * as bio from '@datagrok-libraries/bio';
|
|
5
|
+
|
|
6
|
+
import {category, expect, expectArray, test} from '@datagrok-libraries/utils/src/test';
|
|
7
|
+
import {saveAsFastaDo, wrapSequence} from '../utils/save-as-fasta';
|
|
8
|
+
|
|
9
|
+
type SaveAsFastaTestArgs = { srcCsv: string, idCols: string [], seqCol: string, lineWidth: number, tgtFasta: string };
|
|
10
|
+
|
|
11
|
+
category('fastaExport', () => {
|
|
12
|
+
|
|
13
|
+
enum WrapDataTest {
|
|
14
|
+
single = 'single',
|
|
15
|
+
multi = 'multi'
|
|
16
|
+
}
|
|
17
|
+
|
|
18
|
+
const wrapData: { [key: string]: { src: string, tgt: string[] } } = {
|
|
19
|
+
[WrapDataTest.single]: {
|
|
20
|
+
src: 'MDYKETLLMPKTDFPMRGGLP',
|
|
21
|
+
tgt: ['MDYKETLLMP', 'KTDFPMRGGL', 'P'],
|
|
22
|
+
},
|
|
23
|
+
[WrapDataTest.multi]: {
|
|
24
|
+
src: 'M[MeI]YKETLL[MeF]PKTDFPMRGGL[MeA]',
|
|
25
|
+
tgt: ['M[MeI]YKETLL[MeF]P', 'KTDFPMRGGL', '[MeA]'],
|
|
26
|
+
},
|
|
27
|
+
};
|
|
28
|
+
|
|
29
|
+
enum SaveAsFastaTests {
|
|
30
|
+
test1 = 'test1',
|
|
31
|
+
test2 = 'test2'
|
|
32
|
+
}
|
|
33
|
+
|
|
34
|
+
const saveAsFastaData: {
|
|
35
|
+
[key: string]: SaveAsFastaTestArgs
|
|
36
|
+
} = {
|
|
37
|
+
[SaveAsFastaTests.test1]: {
|
|
38
|
+
srcCsv: `id,seq
|
|
39
|
+
1,MDYKETLLMP
|
|
40
|
+
2,KTDFPMRGGL
|
|
41
|
+
3,P`,
|
|
42
|
+
idCols: ['id'],
|
|
43
|
+
seqCol: 'seq',
|
|
44
|
+
lineWidth: 10,
|
|
45
|
+
tgtFasta: `>1
|
|
46
|
+
MDYKETLLMP
|
|
47
|
+
>2
|
|
48
|
+
KTDFPMRGGL
|
|
49
|
+
>3
|
|
50
|
+
P
|
|
51
|
+
`
|
|
52
|
+
},
|
|
53
|
+
[SaveAsFastaTests.test2]: {
|
|
54
|
+
srcCsv: `id,id2,seq
|
|
55
|
+
1,seqA,M[MeI]YKETLL[MeF]P
|
|
56
|
+
2,seqB,KTDFPMRGGL
|
|
57
|
+
3,seqC,[MeA]
|
|
58
|
+
`,
|
|
59
|
+
idCols: ['id2', 'id'],
|
|
60
|
+
seqCol: 'seq',
|
|
61
|
+
lineWidth: 5,
|
|
62
|
+
tgtFasta: `>seqA|1
|
|
63
|
+
M[MeI]YKE
|
|
64
|
+
TLL[MeF]P
|
|
65
|
+
>seqB|2
|
|
66
|
+
KTDFP
|
|
67
|
+
MRGGL
|
|
68
|
+
>seqC|3
|
|
69
|
+
[MeA]
|
|
70
|
+
`
|
|
71
|
+
}
|
|
72
|
+
};
|
|
73
|
+
|
|
74
|
+
test('wrapSequenceSingle', async () => {
|
|
75
|
+
_testWrapSequence(WrapDataTest.single, 10);
|
|
76
|
+
});
|
|
77
|
+
|
|
78
|
+
test('wrapSequenceMulti', async () => {
|
|
79
|
+
_testWrapSequence(WrapDataTest.multi, 10);
|
|
80
|
+
});
|
|
81
|
+
|
|
82
|
+
test('saveAsFastaTest1', async () => {
|
|
83
|
+
_testSaveAsFasta(saveAsFastaData[SaveAsFastaTests.test1]);
|
|
84
|
+
});
|
|
85
|
+
|
|
86
|
+
test('saveAsFastaTest2', async () => {
|
|
87
|
+
_testSaveAsFasta(saveAsFastaData[SaveAsFastaTests.test2]);
|
|
88
|
+
});
|
|
89
|
+
|
|
90
|
+
function _testWrapSequence(testKey: string, lineWidth: number = 10) {
|
|
91
|
+
const splitter = bio.splitterAsFasta;
|
|
92
|
+
|
|
93
|
+
const srcSeq: string = wrapData[testKey].src;
|
|
94
|
+
const wrapRes: string[] = wrapSequence(srcSeq, splitter, lineWidth);
|
|
95
|
+
const wrapTgt: string[] = wrapData[testKey].tgt;
|
|
96
|
+
|
|
97
|
+
expectArray(wrapRes, wrapTgt);
|
|
98
|
+
}
|
|
99
|
+
|
|
100
|
+
async function _testSaveAsFasta(args: SaveAsFastaTestArgs) {
|
|
101
|
+
const df: DG.DataFrame = DG.DataFrame.fromCsv(args.srcCsv);
|
|
102
|
+
|
|
103
|
+
const seqCol: DG.Column = df.getCol(args.seqCol);
|
|
104
|
+
const idCols: DG.Column[] = args.idCols.map((colName) => df.getCol(colName));
|
|
105
|
+
|
|
106
|
+
const fastaRes: string = saveAsFastaDo(idCols, seqCol, args.lineWidth);
|
|
107
|
+
expect(fastaRes, args.tgtFasta);
|
|
108
|
+
}
|
|
109
|
+
});
|
|
110
|
+
|