@datagrok/bio 2.4.26 → 2.4.28

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/package.json CHANGED
@@ -5,7 +5,7 @@
5
5
  "name": "Leonid Stolbov",
6
6
  "email": "lstolbov@datagrok.ai"
7
7
  },
8
- "version": "2.4.26",
8
+ "version": "2.4.28",
9
9
  "description": "Bioinformatics support (import/export of sequences, conversion, visualization, analysis). [See more](https://github.com/datagrok-ai/public/blob/master/packages/Bio/README.md) for details.",
10
10
  "repository": {
11
11
  "type": "git",
@@ -53,7 +53,7 @@ category('renderers', () => {
53
53
 
54
54
  test('afterConvert', async () => {
55
55
  await _testAfterConvert();
56
- }, {skipReason: 'GROK-12765'});
56
+ });
57
57
 
58
58
  test('selectRendererBySemType', async () => {
59
59
  await _selectRendererBySemType();
@@ -2,9 +2,9 @@ import * as grok from 'datagrok-api/grok';
2
2
  import * as ui from 'datagrok-api/ui';
3
3
  import * as DG from 'datagrok-api/dg';
4
4
 
5
- import {after, before, category, test, expect, expectArray, expectObject} from '@datagrok-libraries/utils/src/test';
5
+ import {after, before, category, test, expect, expectArray, expectObject, delay} from '@datagrok-libraries/utils/src/test';
6
6
  import * as C from '../utils/constants';
7
- import {splitToMonomers, _package, getHelmMonomers} from '../package';
7
+ import {_package, getHelmMonomers} from '../package';
8
8
  import {errorToConsole} from '@datagrok-libraries/utils/src/to-console';
9
9
  import {TAGS as bioTAGS, splitterAsFasta, splitterAsHelm} from '@datagrok-libraries/bio/src/utils/macromolecule';
10
10
 
@@ -89,12 +89,9 @@ category('splitters', () => {
89
89
  // call to calculate 'cell.renderer' tag
90
90
  await grok.data.detectSemanticTypes(df);
91
91
 
92
- dfList.push(df);
93
- tvList.push(tv);
94
-
95
- splitToMonomers();
92
+ await grok.functions.call('Bio:splitToMonomers');
96
93
  expect(df.columns.names().includes('17'), true);
97
- }, {skipReason: 'GROK-12766'});
94
+ });
98
95
 
99
96
  test('getHelmMonomers', async () => {
100
97
  const df: DG.DataFrame = DG.DataFrame.fromCsv(
@@ -1,3 +1,5 @@
1
+ import {pepseaMethods} from './pepsea';
2
+
1
3
  export enum COLUMNS_NAMES {
2
4
  SPLIT_COL = '~split',
3
5
  ACTIVITY = '~activity',
@@ -59,3 +61,17 @@ export namespace PEPSEA {
59
61
  export const SEPARATOR = '.';
60
62
  }
61
63
 
64
+ export const kalignVersion = '3.3.1';
65
+
66
+ export const msaDefaultOptions = {
67
+ pepsea: {
68
+ gapOpen: 1.53,
69
+ gapExtend: 0,
70
+ method: pepseaMethods[0],
71
+ },
72
+ kalign: {
73
+ gapOpen: null,
74
+ gapExtend: null,
75
+ terminalGap: null,
76
+ },
77
+ } as const;
@@ -8,22 +8,24 @@ import {pepseaMethods, runPepsea} from './pepsea';
8
8
  import {checkInputColumnUI} from './check-input-column';
9
9
  import {NotationConverter} from '@datagrok-libraries/bio/src/utils/notation-converter';
10
10
  import {_package} from '../package';
11
-
11
+ import {multipleSequenceAlginmentUIOptions} from './types';
12
+ import {kalignVersion, msaDefaultOptions} from './constants';
13
+ import '../../css/msa.css';
12
14
  export class MsaWarning extends Error {
13
15
  constructor(message: string, options?: ErrorOptions) {
14
16
  super(message, options);
15
17
  }
16
18
  }
17
- type multipleSequenceAlginmentUIOptions = {col?: DG.Column<string> | null, clustersCol?: DG.Column | null,
18
- pepsea?: {method?: typeof pepseaMethods[number], gapOpen?: number, gapExtend?: number}};
19
19
 
20
- export async function multipleSequenceAlignmentUI(options: multipleSequenceAlginmentUIOptions = {}): Promise<DG.Column> {
20
+ export async function multipleSequenceAlignmentUI(
21
+ options: multipleSequenceAlginmentUIOptions = {}
22
+ ): Promise<DG.Column> {
21
23
  return new Promise(async (resolve, reject) => {
22
24
  options.clustersCol ??= null;
23
25
  options.pepsea ??= {};
24
- options.pepsea.method ??= pepseaMethods[0];
25
- options.pepsea.gapOpen ??= 1.53;
26
- options.pepsea.gapExtend ??= 0;
26
+ options.pepsea.method ??= msaDefaultOptions.pepsea.method;
27
+ options.pepsea.gapOpen ??= msaDefaultOptions.pepsea.gapOpen;
28
+ options.pepsea.gapExtend ??= msaDefaultOptions.pepsea.gapExtend;
27
29
 
28
30
  const table = options.col?.dataFrame ?? grok.shell.t;
29
31
  const seqCol = options.col ?? table.columns.bySemType(DG.SEMTYPE.MACROMOLECULE);
@@ -33,20 +35,32 @@ export async function multipleSequenceAlignmentUI(options: multipleSequenceAlgin
33
35
  reject(new MsaWarning(errMsg));
34
36
  }
35
37
 
36
- // UI
38
+ // UI for PepSea alignment
37
39
  const methodInput = ui.choiceInput('Method', options.pepsea.method, pepseaMethods);
38
40
  methodInput.setTooltip('Alignment method');
41
+
42
+ // UI for Kalign alignment
43
+ const terminalGapInput = ui.floatInput('Terminal gap', options?.kalign?.terminalGap ?? null);
44
+ terminalGapInput.setTooltip('Penalty for opening a gap at the beginning or end of the sequence');
45
+ const kalignVersionDiv = ui.p(`Kalign version: ${kalignVersion}`, 'kalign-version');
46
+
47
+ // shared UI
39
48
  const gapOpenInput = ui.floatInput('Gap open', options.pepsea.gapOpen);
40
49
  gapOpenInput.setTooltip('Gap opening penalty at group-to-group alignment');
41
50
  const gapExtendInput = ui.floatInput('Gap extend', options.pepsea.gapExtend);
42
51
  gapExtendInput.setTooltip('Gap extension penalty to skip the alignment');
43
- const inputRootStyles = [methodInput.root.style, gapOpenInput.root.style, gapExtendInput.root.style];
52
+
53
+ const pepseaInputRootStyles: CSSStyleDeclaration[] = [methodInput.root.style];
54
+ const kalignInputRootStyles: CSSStyleDeclaration[] = [terminalGapInput.root.style, kalignVersionDiv.style];
55
+
44
56
  let performAlignment: (() => Promise<DG.Column<string>>) | undefined;
45
57
 
46
58
  // TODO: allow only macromolecule colums to be chosen
47
59
  const colInput = ui.columnInput('Sequence', table, seqCol, async () => {
48
60
  performAlignment = await onColInputChange(
49
- colInput.value, table, inputRootStyles, methodInput, clustersColInput, gapOpenInput, gapExtendInput);
61
+ colInput.value, table, pepseaInputRootStyles, kalignInputRootStyles,
62
+ methodInput, clustersColInput, gapOpenInput, gapExtendInput, terminalGapInput
63
+ );
50
64
  }
51
65
  ) as DG.InputBase<DG.Column<string>>;
52
66
  colInput.setTooltip('Sequences column to use for alignment');
@@ -56,7 +70,9 @@ export async function multipleSequenceAlignmentUI(options: multipleSequenceAlgin
56
70
  //if column is specified (from tests), run alignment and resolve with the result
57
71
  if (options.col) {
58
72
  performAlignment = await onColInputChange(
59
- options.col, table, inputRootStyles, methodInput, clustersColInput, gapOpenInput, gapExtendInput);
73
+ options.col, table, pepseaInputRootStyles, kalignInputRootStyles,
74
+ methodInput, clustersColInput, gapOpenInput, gapExtendInput, terminalGapInput
75
+ );
60
76
 
61
77
  await onDialogOk(colInput, table, performAlignment, resolve, reject);
62
78
  return;
@@ -67,7 +83,9 @@ export async function multipleSequenceAlignmentUI(options: multipleSequenceAlgin
67
83
  .add(methodInput)
68
84
  .add(gapOpenInput)
69
85
  .add(gapExtendInput)
70
- .onOK(async () => {await onDialogOk(colInput, table, performAlignment, resolve, reject)})
86
+ .add(terminalGapInput)
87
+ .add(kalignVersionDiv)
88
+ .onOK(async () => { await onDialogOk(colInput, table, performAlignment, resolve, reject); })
71
89
  .show();
72
90
  });
73
91
  }
@@ -106,13 +124,11 @@ async function onDialogOk(
106
124
 
107
125
 
108
126
  async function onColInputChange(
109
- col: DG.Column<string>,
110
- table: DG.DataFrame,
111
- inputRootStyles: CSSStyleDeclaration[],
112
- methodInput: DG.InputBase<string | null>,
113
- clustersColInput: DG.InputBase<DG.Column<any> | null>,
114
- gapOpenInput: DG.InputBase<number | null>,
115
- gapExtendInput: DG.InputBase<number | null>
127
+ col: DG.Column<string>, table: DG.DataFrame,
128
+ pepseaInputRootStyles: CSSStyleDeclaration[], kalignInputRootStyles: CSSStyleDeclaration[],
129
+ methodInput: DG.InputBase<string | null>, clustersColInput: DG.InputBase<DG.Column<any> | null>,
130
+ gapOpenInput: DG.InputBase<number | null>, gapExtendInput: DG.InputBase<number | null>,
131
+ terminalGapInput: DG.InputBase<number | null>
116
132
  ): Promise<(() => Promise<DG.Column<string>>) | undefined> {
117
133
  try {
118
134
  if (col.semType !== DG.SEMTYPE.MACROMOLECULE)
@@ -122,8 +138,10 @@ async function onColInputChange(
122
138
  if (checkInputColumnUI(col, col.name,
123
139
  [NOTATION.FASTA, NOTATION.SEPARATOR], [ALPHABET.DNA, ALPHABET.RNA, ALPHABET.PT], false)
124
140
  ) { // Kalign - natural alphabets. if the notation is separator, convert to fasta and then run kalign
125
- for (const inputRootStyle of inputRootStyles)
126
- inputRootStyle.display = 'none';
141
+ switchDialog(pepseaInputRootStyles, kalignInputRootStyles, 'kalign');
142
+ gapOpenInput.value = null;
143
+ gapExtendInput.value = null;
144
+ terminalGapInput.value = null;
127
145
  const potentialColNC = new NotationConverter(col);
128
146
  const performCol: DG.Column<string> = potentialColNC.isFasta() ? col :
129
147
  potentialColNC.convert(NOTATION.FASTA);
@@ -131,28 +149,27 @@ async function onColInputChange(
131
149
  } else if (checkInputColumnUI(col, col.name,
132
150
  [NOTATION.HELM], [], false)
133
151
  ) { // PepSeA branch - Helm notation or separator notation with unknown alphabets
134
- for (const inputRootStyle of inputRootStyles)
135
- inputRootStyle.removeProperty('display');
152
+ switchDialog(pepseaInputRootStyles, kalignInputRootStyles, 'pepsea');
153
+ gapOpenInput.value = msaDefaultOptions.pepsea.gapOpen;
154
+ gapExtendInput.value = msaDefaultOptions.pepsea.gapExtend;
136
155
 
137
156
  return async () => await runPepsea(col, unusedName, methodInput.value!,
138
157
  gapOpenInput.value!, gapExtendInput.value!, clustersColInput.value);
139
- } else if (checkInputColumnUI(col, col.name, [NOTATION.SEPARATOR], [ALPHABET.UN], false)) {
140
- //if the column is separator with unknown alphabet, it might be helm. check if it can be converted to helm
141
- const potentialColNC = new NotationConverter(col);
142
- if (!await potentialColNC.checkHelmCompatibility())
143
- return;
144
- const helmCol = potentialColNC.convert(NOTATION.HELM);
145
- for (const inputRootStyle of inputRootStyles)
146
- inputRootStyle.removeProperty('display');
147
- console.log(helmCol.toList());
148
- // convert to helm and assign alignment function to PepSea
149
-
150
- return async () => await runPepsea(helmCol, unusedName, methodInput.value!,
158
+ } else if (checkInputColumnUI(col, col.name, [NOTATION.SEPARATOR], [ALPHABET.UN], false)) {
159
+ //if the column is separator with unknown alphabet, it might be helm. check if it can be converted to helm
160
+ const potentialColNC = new NotationConverter(col);
161
+ if (!await potentialColNC.checkHelmCompatibility())
162
+ return;
163
+ const helmCol = potentialColNC.convert(NOTATION.HELM);
164
+ switchDialog(pepseaInputRootStyles, kalignInputRootStyles, 'pepsea');
165
+ gapOpenInput.value = msaDefaultOptions.pepsea.gapOpen;
166
+ gapExtendInput.value = msaDefaultOptions.pepsea.gapExtend;
167
+ // convert to helm and assign alignment function to PepSea
168
+
169
+ return async () => await runPepsea(helmCol, unusedName, methodInput.value!,
151
170
  gapOpenInput.value!, gapExtendInput.value!, clustersColInput.value);
152
171
  } else {
153
- for (const inputRootStyle of inputRootStyles)
154
- inputRootStyle.display = 'none';
155
-
172
+ switchDialog(pepseaInputRootStyles, kalignInputRootStyles, 'kalign');
156
173
  return;
157
174
  }
158
175
  } catch (err: any) {
@@ -161,3 +178,21 @@ async function onColInputChange(
161
178
  _package.logger.error(errMsg);
162
179
  }
163
180
  }
181
+
182
+ type MSADialogType = 'kalign' | 'pepsea';
183
+
184
+ function switchDialog(
185
+ pepseaInputRootStyles: CSSStyleDeclaration[], kalignInputRootStyles: CSSStyleDeclaration[], dialogType: MSADialogType
186
+ ) {
187
+ if (dialogType === 'kalign') {
188
+ for (const inputRootStyle of pepseaInputRootStyles)
189
+ inputRootStyle.display = 'none';
190
+ for (const inputRootStyle of kalignInputRootStyles)
191
+ inputRootStyle.removeProperty('display');
192
+ } else {
193
+ for (const inputRootStyle of kalignInputRootStyles)
194
+ inputRootStyle.display = 'none';
195
+ for (const inputRootStyle of pepseaInputRootStyles)
196
+ inputRootStyle.removeProperty('display');
197
+ }
198
+ }
@@ -7,6 +7,7 @@ import {ALIGNMENT, TAGS as bioTAGS} from '@datagrok-libraries/bio/src/utils/macr
7
7
  import Aioli from '@biowasm/aioli';
8
8
 
9
9
  import {AlignedSequenceEncoder} from '@datagrok-libraries/bio/src/sequence-encoder';
10
+ import {kalignVersion} from './constants';
10
11
  const fastaInputFilename = 'input.fa';
11
12
  const fastaOutputFilename = 'result.fasta';
12
13
 
@@ -27,10 +28,14 @@ function _stringsToFasta(sequences: string[]): string {
27
28
  * @param {boolean} isAligned Whether the column is aligned.
28
29
  * @param {string | undefined} unUsedName
29
30
  * @param {DG.Column | null} clustersCol Column with clusters.
31
+ * @param {number | undefined} gapOpen Gap open penalty.
32
+ * @param {number | undefined} gapExtend Gap extend penalty.
33
+ * @param {number | undefined} terminalGap Terminal gap penalty.
30
34
  * @return {Promise<DG.Column>} Aligned sequences.
31
35
  */
32
36
  export async function runKalign(srcCol: DG.Column<string>, isAligned: boolean = false, unUsedName: string = '',
33
- clustersCol: DG.Column | null = null): Promise<DG.Column> {
37
+ clustersCol: DG.Column | null = null, gapOpen?: number, gapExtend?: number, terminalGap?: number
38
+ ): Promise<DG.Column> {
34
39
  let sequences: string[] = srcCol.toList();
35
40
 
36
41
  if (isAligned)
@@ -55,7 +60,7 @@ export async function runKalign(srcCol: DG.Column<string>, isAligned: boolean =
55
60
 
56
61
  const CLI = await new Aioli([
57
62
  'base/1.0.0',
58
- {tool: 'kalign', version: '3.3.1', reinit: true}
63
+ {tool: 'kalign', version: kalignVersion, reinit: true}
59
64
  ]);
60
65
  const tgtCol = DG.Column.string(unUsedName, sequencesLength);
61
66
 
@@ -64,7 +69,12 @@ export async function runKalign(srcCol: DG.Column<string>, isAligned: boolean =
64
69
  const fasta = _stringsToFasta(clusterSequences);
65
70
 
66
71
  await CLI.fs.writeFile(fastaInputFilename, fasta);
67
- const output = await CLI.exec(`kalign ${fastaInputFilename} -f fasta -o ${fastaOutputFilename}`);
72
+ const gapOpenCommand = `${gapOpen !== undefined ? ` --gpo ${gapOpen}` : ''}`;
73
+ const gapExtendCommand = `${gapExtend !== undefined ? ` --gpe ${gapExtend}` : ''}`;
74
+ const terminalGapCommand = `${terminalGap !== undefined ? ` --tgpe ${terminalGap}` : ''}`;
75
+ const extraParams = `${gapOpenCommand}${gapExtendCommand}${terminalGapCommand}`;
76
+
77
+ const output = await CLI.exec(`kalign ${fastaInputFilename} -f fasta -o ${fastaOutputFilename}${extraParams}`);
68
78
  console.warn(output);
69
79
 
70
80
  const buf = await CLI.cat(fastaOutputFilename);
@@ -1,5 +1,5 @@
1
1
  import * as DG from 'datagrok-api/dg';
2
-
2
+ import {pepseaMethods} from './pepsea';
3
3
  export type DataFrameDict = {[key: string]: DG.DataFrame};
4
4
 
5
5
  export namespace BarChart {
@@ -11,3 +11,9 @@ export type UTypedArray = Uint8Array | Uint16Array | Uint32Array;
11
11
  //AAR: (Position: (index: indexList))
12
12
  export type SubstitutionsInfo = Map<string, Map<string, Map<number, number[] | UTypedArray>>>;
13
13
  export type SelectionObject = {[postiton: string]: string[]};
14
+
15
+ export type multipleSequenceAlginmentUIOptions = {
16
+ col?: DG.Column<string> | null, clustersCol?: DG.Column | null,
17
+ pepsea?: {method?: typeof pepseaMethods[number], gapOpen?: number, gapExtend?: number},
18
+ kalign?: {gapOpen?: number, gapExtend?: number, terminalGap?: number}
19
+ };