@datagrok/bio 2.0.16 → 2.0.17

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/package.json CHANGED
@@ -5,7 +5,7 @@
5
5
  "name": "Leonid Stolbov",
6
6
  "email": "lstolbov@datagrok.ai"
7
7
  },
8
- "version": "2.0.16",
8
+ "version": "2.0.17",
9
9
  "description": "Bio is a [package](https://datagrok.ai/help/develop/develop#packages) for the [Datagrok](https://datagrok.ai) platform",
10
10
  "repository": {
11
11
  "type": "git",
@@ -13,18 +13,18 @@
13
13
  "directory": "packages/Bio"
14
14
  },
15
15
  "dependencies": {
16
- "@biowasm/aioli": ">=2.4.0",
17
- "@datagrok-libraries/bio": "^4.4.3",
18
- "@datagrok-libraries/chem-meta": "1.0.0",
16
+ "@biowasm/aioli": "^3.1.0",
17
+ "@datagrok-libraries/bio": "^4.4.7",
18
+ "@datagrok-libraries/chem-meta": "1.0.1",
19
19
  "@datagrok-libraries/ml": "^6.2.0",
20
- "@datagrok-libraries/utils": "^1.6.2",
20
+ "@datagrok-libraries/utils": "^1.10.1",
21
21
  "@deck.gl/core": "^8.7.5",
22
22
  "@deck.gl/layers": "^8.7.5",
23
23
  "@luma.gl/constants": "^8.5.10",
24
24
  "@luma.gl/core": "^8.5.10",
25
- "@phylocanvas/phylocanvas.gl": "^1.43.0",
25
+ "@phylocanvas/phylocanvas.gl": "^1.44.0",
26
26
  "cash-dom": "latest",
27
- "datagrok-api": "^1.6.12",
27
+ "datagrok-api": "^1.7.0",
28
28
  "dayjs": "^1.11.4",
29
29
  "openchemlib": "6.0.1",
30
30
  "rxjs": "^6.5.5",
@@ -12,6 +12,7 @@ import './tests/splitters-test';
12
12
  import './tests/renderers-test';
13
13
  import './tests/convert-test';
14
14
  import './tests/fasta-handler-test';
15
+ import './tests/fasta-export-tests';
15
16
  import './tests/WebLogo-positions-test';
16
17
  import './tests/checkInputColumn-tests';
17
18
  import './tests/similarity-diversity-tests';
package/src/package.ts CHANGED
@@ -34,7 +34,8 @@ import {splitAlignedSequences} from '@datagrok-libraries/bio/src/utils/splitter'
34
34
  import * as C from './utils/constants';
35
35
  import {SequenceSimilarityViewer} from './analysis/sequence-similarity-viewer';
36
36
  import {SequenceDiversityViewer} from './analysis/sequence-diversity-viewer';
37
- import { substructureSearchDialog } from './substructure-search/substructure-search';
37
+ import {substructureSearchDialog} from './substructure-search/substructure-search';
38
+ import {saveAsFastaUI} from './utils/save-as-fasta';
38
39
 
39
40
  //tags: init
40
41
  export async function initBio() {
@@ -516,3 +517,9 @@ export function bioSubstructureSearch(col: DG.Column): void {
516
517
  substructureSearchDialog(col);
517
518
  }
518
519
 
520
+ //name: saveAsFasta
521
+ //description: As FASTA...
522
+ //tags: fileExporter
523
+ export function saveAsFasta() {
524
+ saveAsFastaUI();
525
+ }
@@ -41,7 +41,7 @@ ATC-G-TTGC--
41
41
  seqCol.setTag(DG.TAGS.UNITS, NOTATION.FASTA);
42
42
  seqCol.setTag(UnitsHandler.TAGS.alphabet, ALPHABET.DNA);
43
43
 
44
- const wlViewer: WebLogo = await df.plot.fromType('WebLogo') as unknown as WebLogo;
44
+ const wlViewer: WebLogo = (await df.plot.fromType('WebLogo')) as WebLogo;
45
45
  tv.dockManager.dock(wlViewer.root, DG.DOCK_TYPE.DOWN);
46
46
 
47
47
  tvList.push(tv);
@@ -94,8 +94,8 @@ ATC-G-TTGC--
94
94
  return i > 2;
95
95
  });
96
96
  df.filter.fireChanged();
97
- const wlViewer: WebLogo = await df.plot.fromType('WebLogo', {'shrinkEmptyTail': true}) as unknown as WebLogo;
98
-
97
+ const wlViewer: WebLogo = (await df.plot.fromType('WebLogo',
98
+ {'shrinkEmptyTail': true})) as WebLogo;
99
99
  tv.dockManager.dock(wlViewer.root, DG.DOCK_TYPE.DOWN);
100
100
 
101
101
  tvList.push(tv);
@@ -134,7 +134,8 @@ ATC-G-TTGC--
134
134
  seqCol.setTag(DG.TAGS.UNITS, NOTATION.FASTA);
135
135
  seqCol.setTag(UnitsHandler.TAGS.alphabet, ALPHABET.DNA);
136
136
 
137
- const wlViewer: WebLogo = await df.plot.fromType('WebLogo', {'skipEmptyPositions': true}) as unknown as WebLogo;
137
+ const wlViewer: WebLogo = (await df.plot.fromType('WebLogo',
138
+ {'skipEmptyPositions': true})) as WebLogo;
138
139
  tv.dockManager.dock(wlViewer.root, DG.DOCK_TYPE.DOWN);
139
140
 
140
141
  tvList.push(tv);
@@ -1,7 +1,8 @@
1
- import {category, expectArray, test} from '@datagrok-libraries/utils/src/test';
2
-
3
- import * as grok from 'datagrok-api/grok';
4
1
  import * as DG from 'datagrok-api/dg';
2
+ import * as ui from 'datagrok-api/ui';
3
+ import * as grok from 'datagrok-api/grok';
4
+
5
+ import {category, expect, expectArray, test} from '@datagrok-libraries/utils/src/test';
5
6
 
6
7
  import {ConverterFunc} from './types';
7
8
  import {NotationConverter} from '@datagrok-libraries/bio/src/utils/notation-converter';
@@ -139,6 +140,7 @@ RNA1{P.R(U)P.R(U)P.R(C)P.R(A)P.R(A)P.R(C)P.P.P}$$$
139
140
  return function(srcCol: DG.Column): DG.Column {
140
141
  const converter = new NotationConverter(srcCol);
141
142
  const resCol = converter.convert(tgtNotation, tgtSeparator);
143
+ expect(resCol.getTag('units'), tgtNotation);
142
144
  return resCol;
143
145
  };
144
146
  };
@@ -0,0 +1,110 @@
1
+ import * as DG from 'datagrok-api/dg';
2
+ import * as ui from 'datagrok-api/ui';
3
+ import * as grok from 'datagrok-api/grok';
4
+
5
+ import {category, expect, expectArray, test} from '@datagrok-libraries/utils/src/test';
6
+ import {saveAsFastaDo, wrapSequence} from '../utils/save-as-fasta';
7
+ import {WebLogo} from '@datagrok-libraries/bio/src/viewers/web-logo';
8
+
9
+ type SaveAsFastaTestArgs = { srcCsv: string, idCols: string [], seqCol: string, lineWidth: number, tgtFasta: string };
10
+
11
+ category('fastaExport', () => {
12
+
13
+ enum WrapDataTest {
14
+ single = 'single',
15
+ multi = 'multi'
16
+ }
17
+
18
+ const wrapData: { [key: string]: { src: string, tgt: string[] } } = {
19
+ [WrapDataTest.single]: {
20
+ src: 'MDYKETLLMPKTDFPMRGGLP',
21
+ tgt: ['MDYKETLLMP', 'KTDFPMRGGL', 'P'],
22
+ },
23
+ [WrapDataTest.multi]: {
24
+ src: 'M[MeI]YKETLL[MeF]PKTDFPMRGGL[MeA]',
25
+ tgt: ['M[MeI]YKETLL[MeF]P', 'KTDFPMRGGL', '[MeA]'],
26
+ },
27
+ };
28
+
29
+ enum SaveAsFastaTests {
30
+ test1 = 'test1',
31
+ test2 = 'test2'
32
+ }
33
+
34
+ const saveAsFastaData: {
35
+ [key: string]: SaveAsFastaTestArgs
36
+ } = {
37
+ [SaveAsFastaTests.test1]: {
38
+ srcCsv: `id,seq
39
+ 1,MDYKETLLMP
40
+ 2,KTDFPMRGGL
41
+ 3,P`,
42
+ idCols: ['id'],
43
+ seqCol: 'seq',
44
+ lineWidth: 10,
45
+ tgtFasta: `>1
46
+ MDYKETLLMP
47
+ >2
48
+ KTDFPMRGGL
49
+ >3
50
+ P
51
+ `
52
+ },
53
+ [SaveAsFastaTests.test2]: {
54
+ srcCsv: `id,id2,seq
55
+ 1,seqA,M[MeI]YKETLL[MeF]P
56
+ 2,seqB,KTDFPMRGGL
57
+ 3,seqC,[MeA]
58
+ `,
59
+ idCols: ['id2', 'id'],
60
+ seqCol: 'seq',
61
+ lineWidth: 5,
62
+ tgtFasta: `>seqA|1
63
+ M[MeI]YKE
64
+ TLL[MeF]P
65
+ >seqB|2
66
+ KTDFP
67
+ MRGGL
68
+ >seqC|3
69
+ [MeA]
70
+ `
71
+ }
72
+ };
73
+
74
+ test('wrapSequenceSingle', async () => {
75
+ _testWrapSequence(WrapDataTest.single, 10);
76
+ });
77
+
78
+ test('wrapSequenceMulti', async () => {
79
+ _testWrapSequence(WrapDataTest.multi, 10);
80
+ });
81
+
82
+ test('saveAsFastaTest1', async () => {
83
+ _testSaveAsFasta(saveAsFastaData[SaveAsFastaTests.test1]);
84
+ });
85
+
86
+ test('saveAsFastaTest2', async () => {
87
+ _testSaveAsFasta(saveAsFastaData[SaveAsFastaTests.test2]);
88
+ });
89
+
90
+ function _testWrapSequence(testKey: string, lineWidth: number = 10) {
91
+ const splitter = WebLogo.splitterAsFasta;
92
+
93
+ const srcSeq: string = wrapData[testKey].src;
94
+ const wrapRes: string[] = wrapSequence(srcSeq, splitter, lineWidth);
95
+ const wrapTgt: string[] = wrapData[testKey].tgt;
96
+
97
+ expectArray(wrapRes, wrapTgt);
98
+ }
99
+
100
+ async function _testSaveAsFasta(args: SaveAsFastaTestArgs) {
101
+ const df: DG.DataFrame = DG.DataFrame.fromCsv(args.srcCsv);
102
+
103
+ const seqCol: DG.Column = df.getCol(args.seqCol);
104
+ const idCols: DG.Column[] = args.idCols.map((colName) => df.getCol(colName));
105
+
106
+ const fastaRes: string = saveAsFastaDo(idCols, seqCol, args.lineWidth);
107
+ expect(fastaRes, args.tgtFasta);
108
+ }
109
+ });
110
+
@@ -27,6 +27,11 @@ category('splitters', () => {
27
27
  const helm2 = 'PEPTIDE1{meI.hHis.Hcy.Q.T.W.Q.Phe_4NH2.D-Tyr_Et.Tyr_ab-dehydroMe.dV.E.N.N.meK}$$$';
28
28
 
29
29
  const data: { [key: string]: [string, string[]] } = {
30
+ fastaMulti: [
31
+ 'M[MeI]YKETLL[MeF]PKTDFPMRGGL[MeA]',
32
+ ['M', 'MeI', 'Y', 'K', 'E', 'T', 'L', 'L', 'MeF', 'P',
33
+ 'K', 'T', 'D', 'F', 'P', 'M', 'R', 'G', 'G', 'L', 'MeA']
34
+ ],
30
35
  helm1: [
31
36
  'PEPTIDE1{meI.hHis.Aca.N.T.dE.Thr_PO3H2.Aca.D-Tyr_Et.Tyr_ab-dehydroMe.dV.E.N.D-Orn.D-aThr.Phe_4Me}$$$',
32
37
  ['meI', 'hHis', 'Aca', 'N', 'T', 'dE', 'Thr_PO3H2', 'Aca', 'D-Tyr_Et',
@@ -59,6 +64,8 @@ category('splitters', () => {
59
64
  ],
60
65
  };
61
66
 
67
+ test('fastaMulti', async () => { await _testFastaSplitter(data.fastaMulti[0], data.fastaMulti[1]); });
68
+
62
69
  test('helm1', async () => { await _testHelmSplitter(data.helm1[0], data.helm1[1]); });
63
70
  test('helm2', async () => { await _testHelmSplitter(data.helm2[0], data.helm2[1]); });
64
71
  test('helm3-multichar', async () => { await _testHelmSplitter(data.helm3[0], data.helm3[1]); });
@@ -113,6 +120,12 @@ PEPTIDE1{hHis.Aca.Cys_SEt}$$$,5.72388
113
120
  });
114
121
  });
115
122
 
123
+ export async function _testFastaSplitter(src: string, tgt: string[]) {
124
+ const res: string[] = WebLogo.splitterAsFasta(src);
125
+ console.debug(`Bio: tests: splitters: src=${JSON.stringify(src)}, res=${JSON.stringify(res)} .`);
126
+ expectArray(res, tgt);
127
+ }
128
+
116
129
  export async function _testHelmSplitter(src: string, tgt: string[]) {
117
130
  const res: string[] = WebLogo.splitterAsHelm(src);
118
131
  console.debug(`Bio: tests: splitters: src=${JSON.stringify(src)}, res=${JSON.stringify(res)} .`);
@@ -1,8 +1,8 @@
1
1
  import * as DG from 'datagrok-api/dg';
2
2
  import * as ui from 'datagrok-api/ui';
3
3
  import * as grok from 'datagrok-api/grok';
4
- import $ from 'cash-dom';
5
4
 
5
+ import $ from 'cash-dom';
6
6
  import {Subscription} from 'rxjs';
7
7
  import {NotationConverter} from '@datagrok-libraries/bio/src/utils/notation-converter';
8
8
  import {NOTATION, UnitsHandler} from '@datagrok-libraries/bio/src/utils/units-handler';
@@ -36,11 +36,10 @@ export async function runKalign(srcCol: DG.Column, isAligned = false, unUsedName
36
36
  sequences = sequences.map((v: string, _) => AlignedSequenceEncoder.clean(v).replace(/\-/g, ''));
37
37
 
38
38
  const fasta = _stringsToFasta(sequences);
39
- const CLI = await new Aioli({
40
- tool: 'kalign',
41
- version: '3.3.1',
42
- reinit: true,
43
- });
39
+ const CLI = await new Aioli([
40
+ 'base/1.0.0',
41
+ {tool: 'kalign', version: '3.3.1', reinit: true,}
42
+ ]);
44
43
 
45
44
  console.log(['fasta.length =', fasta.length]);
46
45
 
@@ -0,0 +1,109 @@
1
+ import * as DG from 'datagrok-api/dg';
2
+ import * as ui from 'datagrok-api/ui';
3
+ import * as grok from 'datagrok-api/grok';
4
+ import {SplitterFunc, WebLogo} from '@datagrok-libraries/bio/src/viewers/web-logo';
5
+ import wu from 'wu';
6
+ import {UnitsHandler} from '@datagrok-libraries/bio/src/utils/units-handler';
7
+
8
+ const FASTA_LINE_WIDTH = 60;
9
+
10
+ /** Shows dialog to select id columns list and seq column, builds and downloads FASTA content */
11
+ export function saveAsFastaUI() {
12
+ // Use grid for column order adjusted by user
13
+ let grid: DG.Grid = grok.shell.tv.grid;
14
+
15
+ const idGColList: DG.GridColumn[] = wu.count(0).take(grid.columns.length)
16
+ .map((colI: number) => grid.columns.byIndex(colI)!)
17
+ .filter((gcol: DG.GridColumn) => gcol.column ? gcol.column.semType !== DG.SEMTYPE.MACROMOLECULE : false).toArray();
18
+ const defaultIdGCol: DG.GridColumn | undefined = idGColList
19
+ .find((gcol: DG.GridColumn) => gcol.name.toLowerCase().indexOf('id') !== -1);
20
+ const idDefaultValue = defaultIdGCol ? [defaultIdGCol.name] : [];
21
+
22
+ const idGColListInput = ui.multiChoiceInput('Seq id columns', idDefaultValue,
23
+ idGColList.map((gcol: DG.GridColumn) => gcol.name));
24
+
25
+ const seqGColList: DG.GridColumn[] = wu.count(0).take(grid.columns.length)/* range rom 0 to grid.columns.length */
26
+ .map((colI: number) => grid.columns.byIndex(colI)!)
27
+ .filter((gc: DG.GridColumn) => {
28
+ const col: DG.Column | null = gc.column;
29
+ if (col && col.semType === DG.SEMTYPE.MACROMOLECULE) {
30
+ const uh = new UnitsHandler(col);
31
+ return uh.isFasta();
32
+ }
33
+ return false;
34
+ }).toArray();
35
+
36
+ const seqDefaultValue = seqGColList.length > 0 ? seqGColList[0].name : [];
37
+ const seqColInput = ui.choiceInput('Seq column', seqDefaultValue,
38
+ seqGColList.map((gCol: DG.GridColumn) => gCol.name));
39
+
40
+ const lineWidthInput = ui.intInput('FASTA line width', FASTA_LINE_WIDTH);
41
+
42
+ ui.dialog({title: 'Save as FASTA',})
43
+ .add(ui.inputs([
44
+ idGColListInput,
45
+ seqColInput,
46
+ lineWidthInput
47
+ ]))
48
+ .onOK(() => {
49
+ const valueIdColList: DG.Column[] = idGColListInput.value ?
50
+ idGColListInput.value.map((colName: string) => grid.columns.byName(colName)!.column!) : [];
51
+ const valueSeqCol: DG.Column | null = seqColInput.value ?
52
+ grid.columns.byName(seqColInput.value as string)!.column : null;
53
+ const valueLineWidth = lineWidthInput.value ?? FASTA_LINE_WIDTH;
54
+
55
+ if (!valueSeqCol)
56
+ grok.shell.warning(`Seq column is mandatory to save as FASTA.`);
57
+
58
+ const resFastaTxt: string = saveAsFastaDo(valueIdColList, valueSeqCol!, valueLineWidth);
59
+
60
+ const aEl: HTMLAnchorElement = document.createElement('a',);
61
+ aEl.setAttribute('href', `data:text/plain;charset=utf-8,${encodeURIComponent(resFastaTxt)}`);
62
+ aEl.setAttribute('download', `${grid.dataFrame.name}.fasta`);
63
+ aEl.click();
64
+ })
65
+ .show();
66
+ }
67
+
68
+ /** */
69
+ export function saveAsFastaDo(
70
+ idColList: DG.Column[], seqCol: DG.Column, lineWidth: number = FASTA_LINE_WIDTH, lineSeparator: string = '\n'
71
+ ): string {
72
+ const splitter: SplitterFunc = WebLogo.splitterAsFasta;
73
+
74
+ const fastaLines: string[] = [];
75
+
76
+ for (let rowI: number = 0; rowI < seqCol.length; rowI++) {
77
+ // multiple identifiers separated by vertical bars
78
+ // https://en.wikipedia.org/wiki/FASTA_format
79
+
80
+ const seqId: string = idColList.map((col) => col.get(rowI).toString()).join('|');
81
+ const seq: string = seqCol.get(rowI);
82
+ const seqLineList: string[] = wrapSequence(seq, splitter, lineWidth);
83
+
84
+ fastaLines.push(`>${seqId}${lineSeparator}`);
85
+ for (const line of seqLineList)
86
+ fastaLines.push(`${line}${lineSeparator}`);
87
+ }
88
+
89
+ //return fastaLines.join(lineSeparator);
90
+ return ''.concat(...fastaLines);
91
+ }
92
+
93
+ /* split sequence for monomers to prevent wrapping monomer partially */
94
+ export function wrapSequence(seq: string, splitter: SplitterFunc, lineWidth: number = FASTA_LINE_WIDTH): string[] {
95
+ const seqMonomerList = splitter(seq);
96
+ let seqPos: number = 0;
97
+ const seqLength: number = seqMonomerList.length;
98
+
99
+ const seqLineList: string[] = [];
100
+ while (seqPos < seqLength) {
101
+ /* join sliced monomer into line */
102
+ const seqLine: string[] = seqMonomerList.slice(seqPos, seqPos + lineWidth);
103
+ const seqLineTxt: string = seqLine.map((m) => m.length > 1 ? `[${m}]` : m).join('');
104
+ seqLineList.push(seqLineTxt);
105
+ seqPos += seqLine.length;
106
+ }
107
+
108
+ return seqLineList;
109
+ }
@@ -113,8 +113,10 @@ export class VdRegionsViewer extends DG.JsViewer implements IVdRegionsViewer {
113
113
  await this.buildView();
114
114
  }
115
115
 
116
- public override async onTableAttached() {
117
- await this.init();
116
+ public override onTableAttached() {
117
+ window.setTimeout(async () => {
118
+ await this.init();
119
+ }, 0 /* next event cycle */);
118
120
  }
119
121
 
120
122
  public override async onPropertyChanged(property: DG.Property | null) {