@datagrok/bio 2.22.1 → 2.22.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,3 +1,6 @@
1
+ /* eslint-disable max-len */
2
+ /* eslint-disable max-params */
3
+ /* eslint-disable max-lines-per-function */
1
4
  import * as grok from 'datagrok-api/grok';
2
5
  import * as DG from 'datagrok-api/dg';
3
6
  import * as ui from 'datagrok-api/ui';
@@ -27,10 +30,15 @@ export async function multipleSequenceAlignmentUI(
27
30
  options.pepsea.gapExtend ??= msaDefaultOptions.pepsea.gapExtend;
28
31
 
29
32
  const table = options.col?.dataFrame ?? grok.shell.t;
33
+ if (!table) {
34
+ const errMsg: string = `Multiple Sequence Alignment analysis requires a dataset with a macromolecule column.`;
35
+ reject(new MsaWarning(ui.divText(errMsg)));
36
+ return; // Prevents creating the MSA dialog
37
+ }
38
+
30
39
  const seqCol = options.col ?? table.columns.bySemType(DG.SEMTYPE.MACROMOLECULE);
31
40
  if (seqCol == null) {
32
41
  const errMsg: string = `Multiple Sequence Alignment analysis requires a dataset with a macromolecule column.`;
33
- grok.shell.warning(errMsg);
34
42
  reject(new MsaWarning(ui.divText(errMsg)));
35
43
  return; // Prevents creating the MSA dialog
36
44
  }
@@ -50,6 +58,8 @@ export async function multipleSequenceAlignmentUI(
50
58
  const gapExtendInput = ui.input.float('Gap extend', {value: options.pepsea.gapExtend});
51
59
  gapExtendInput.setTooltip('Gap extension penalty to skip the alignment');
52
60
 
61
+ const onlySelectedRowsInput = ui.input.bool('Selected Rows Only', {value: false});
62
+
53
63
  const msaParamsDiv = ui.inputs([gapOpenInput, gapExtendInput, terminalGapInput]);
54
64
  const msaParamsButton = ui.button('Alignment parameters', () => {
55
65
  msaParamsDiv.hidden = !msaParamsDiv.hidden;
@@ -80,7 +90,7 @@ export async function multipleSequenceAlignmentUI(
80
90
  okBtn.disabled = false;
81
91
  performAlignment = await onColInputChange(
82
92
  colInput.value, table, seqHelper, pepseaInputRootStyles, kalignInputRootStyles,
83
- methodInput, clustersColInput, gapOpenInput, gapExtendInput, terminalGapInput,
93
+ methodInput, clustersColInput, gapOpenInput, gapExtendInput, terminalGapInput, onlySelectedRowsInput
84
94
  );
85
95
  }, filter: (col: DG.Column) => col.semType === DG.SEMTYPE.MACROMOLECULE
86
96
  } as ColumnInputOptions
@@ -95,6 +105,7 @@ export async function multipleSequenceAlignmentUI(
95
105
  .add(methodInput)
96
106
  .add(msaParamsDiv)
97
107
  .add(msaParamsButton)
108
+ .add(onlySelectedRowsInput)
98
109
  .add(kalignVersionDiv)
99
110
  .onOK(async () => { await onDialogOk(colInput, table, performAlignment, resolve, reject); });
100
111
  const okBtn = dlg.getButton('OK');
@@ -104,7 +115,7 @@ export async function multipleSequenceAlignmentUI(
104
115
  if (options.col) {
105
116
  performAlignment = await onColInputChange(
106
117
  options.col, table, seqHelper, pepseaInputRootStyles, kalignInputRootStyles,
107
- methodInput, clustersColInput, gapOpenInput, gapExtendInput, terminalGapInput,
118
+ methodInput, clustersColInput, gapOpenInput, gapExtendInput, terminalGapInput, onlySelectedRowsInput
108
119
  );
109
120
  await onDialogOk(colInput, table, performAlignment, resolve, reject);
110
121
  return; // Prevents show the dialog
@@ -150,7 +161,7 @@ async function onColInputChange(
150
161
  pepseaInputRootStyles: CSSStyleDeclaration[], kalignInputRootStyles: CSSStyleDeclaration[],
151
162
  methodInput: DG.InputBase<string | null>, clustersColInput: DG.InputBase<DG.Column<any> | null>,
152
163
  gapOpenInput: DG.InputBase<number | null>, gapExtendInput: DG.InputBase<number | null>,
153
- terminalGapInput: DG.InputBase<number | null>,
164
+ terminalGapInput: DG.InputBase<number | null>, selectedRowsOnlyInput: DG.InputBase<boolean>,
154
165
  ): Promise<(() => Promise<DG.Column<string> | null>) | undefined> {
155
166
  try {
156
167
  if (col.semType !== DG.SEMTYPE.MACROMOLECULE)
@@ -167,7 +178,7 @@ async function onColInputChange(
167
178
  const potentialColSh = seqHelper.getSeqHandler(col);
168
179
  const performCol: DG.Column<string> = potentialColSh.isFasta() ? col :
169
180
  potentialColSh.convert(NOTATION.FASTA);
170
- return async () => await runKalign(performCol, false, unusedName, clustersColInput.value);
181
+ return async () => await runKalign(table, performCol, false, unusedName, clustersColInput.value, undefined, undefined, undefined, selectedRowsOnlyInput.value);
171
182
  } else if (checkInputColumn(col, col.name, seqHelper, [NOTATION.HELM], [])[0]) {
172
183
  // PepSeA branch - Helm notation or separator notation with unknown alphabets
173
184
  switchDialog(pepseaInputRootStyles, kalignInputRootStyles, 'pepsea');
@@ -175,8 +186,8 @@ async function onColInputChange(
175
186
  gapExtendInput.value ??= msaDefaultOptions.pepsea.gapExtend;
176
187
 
177
188
  return async () => {
178
- return runPepsea(col, unusedName, methodInput.value!,
179
- gapOpenInput.value!, gapExtendInput.value!, clustersColInput.value);
189
+ return runPepsea(table, col, unusedName, methodInput.value!,
190
+ gapOpenInput.value!, gapExtendInput.value!, clustersColInput.value, undefined, selectedRowsOnlyInput.value);
180
191
  };
181
192
  } else if (checkInputColumn(col, col.name, seqHelper, [NOTATION.SEPARATOR], [ALPHABET.UN])[0]) {
182
193
  //if the column is separator with unknown alphabet, it might be helm. check if it can be converted to helm
@@ -188,8 +199,8 @@ async function onColInputChange(
188
199
  // convert to helm and assign alignment function to PepSea
189
200
 
190
201
  return async () => {
191
- return runPepsea(helmCol, unusedName, methodInput.value!,
192
- gapOpenInput.value!, gapExtendInput.value!, clustersColInput.value);
202
+ return runPepsea(table, helmCol, unusedName, methodInput.value!,
203
+ gapOpenInput.value!, gapExtendInput.value!, clustersColInput.value, undefined, selectedRowsOnlyInput.value);
193
204
  };
194
205
  } else {
195
206
  gapOpenInput.value = null;
@@ -1,3 +1,5 @@
1
+ /* eslint-disable max-params */
2
+ /* eslint-disable max-len */
1
3
  import * as grok from 'datagrok-api/grok';
2
4
  import * as ui from 'datagrok-api/ui';
3
5
  import * as DG from 'datagrok-api/dg';
@@ -44,8 +46,8 @@ function _stringsToFasta(sequences: string[]): string {
44
46
  * @param {number | undefined} terminalGap Terminal gap penalty.
45
47
  * @return {Promise<DG.Column>} Aligned sequences.
46
48
  */
47
- export async function runKalign(srcCol: DG.Column<string>, isAligned: boolean = false, unUsedName: string = '',
48
- clustersCol: DG.Column | null = null, gapOpen?: number, gapExtend?: number, terminalGap?: number,
49
+ export async function runKalign(table: DG.DataFrame, srcCol: DG.Column<string>, isAligned: boolean = false, unUsedName: string = '',
50
+ clustersCol: DG.Column | null = null, gapOpen?: number, gapExtend?: number, terminalGap?: number, onlySelected?: boolean
49
51
  ): Promise<DG.Column> {
50
52
  let sequences: string[] = srcCol.toList();
51
53
 
@@ -63,10 +65,21 @@ export async function runKalign(srcCol: DG.Column<string>, isAligned: boolean =
63
65
  const clustersColData = clustersCol.getRawData();
64
66
  const fastaSequences: string[][] = new Array(clustersColCategories.length);
65
67
  const clusterIndexes: number[][] = new Array(clustersColCategories.length);
66
- for (let rowIdx = 0; rowIdx < sequencesLength; ++rowIdx) {
67
- const clusterCategoryIdx = clustersColData[rowIdx];
68
- (fastaSequences[clusterCategoryIdx] ??= []).push(sequences[rowIdx]);
69
- (clusterIndexes[clusterCategoryIdx] ??= []).push(rowIdx);
68
+ if (!onlySelected) {
69
+ for (let rowIdx = 0; rowIdx < sequencesLength; ++rowIdx) {
70
+ const clusterCategoryIdx = clustersColData[rowIdx];
71
+ (fastaSequences[clusterCategoryIdx] ??= []).push(sequences[rowIdx]);
72
+ (clusterIndexes[clusterCategoryIdx] ??= []).push(rowIdx);
73
+ }
74
+ } else {
75
+ const selection = table.selection;
76
+ if (selection.length === 0)
77
+ throw new Error('No selected rows in the table.');
78
+ for (let rowIdx = -1; (rowIdx = selection.findNext(rowIdx, true)) !== -1;) {
79
+ const clusterCategoryIdx = clustersColData[rowIdx];
80
+ (fastaSequences[clusterCategoryIdx] ??= []).push(sequences[rowIdx]);
81
+ (clusterIndexes[clusterCategoryIdx] ??= []).push(rowIdx);
82
+ }
70
83
  }
71
84
  checkForSingleSeqClusters(clusterIndexes, clustersColCategories);
72
85
 
@@ -78,6 +91,8 @@ export async function runKalign(srcCol: DG.Column<string>, isAligned: boolean =
78
91
 
79
92
  for (let clusterIdx = 0; clusterIdx < clustersColCategories.length; ++clusterIdx) {
80
93
  const clusterSequences = fastaSequences[clusterIdx];
94
+ if (!clusterSequences || clusterSequences.length === 0)
95
+ continue; // skip empty clusters
81
96
  const fasta = _stringsToFasta(clusterSequences);
82
97
 
83
98
  await CLI.fs.writeFile(fastaInputFilename, fasta);
@@ -122,7 +137,7 @@ export async function testMSAEnoughMemory(col: DG.Column<string>): Promise<void>
122
137
 
123
138
  for (let i = delta; i < sequencesCount; i += delta) {
124
139
  try {
125
- await runKalign(DG.Column.fromStrings(col.name, col.toList().slice(0, Math.round(i))));
140
+ await runKalign(col.dataFrame, DG.Column.fromStrings(col.name, col.toList().slice(0, Math.round(i))));
126
141
  console.log(`runKalign succeeded on ${i}`);
127
142
  } catch (error) {
128
143
  console.log(`runKalign failed on ${i} with '${error}'`);
@@ -1,13 +1,10 @@
1
+ /* eslint-disable max-params */
1
2
  /* Do not change these import lines to match external modules in webpack configuration */
2
3
  import * as grok from 'datagrok-api/grok';
3
4
  import * as ui from 'datagrok-api/ui';
4
5
  import * as DG from 'datagrok-api/dg';
5
6
 
6
- import {Subject} from 'rxjs';
7
-
8
- import {testEvent} from '@datagrok-libraries/utils/src/test';
9
7
  import {NOTATION, TAGS as bioTAGS, ALIGNMENT, ALPHABET} from '@datagrok-libraries/bio/src/utils/macromolecule';
10
- import {fetchWrapper} from '@datagrok-libraries/utils/src/fetch-utils';
11
8
  import {ILogger} from '@datagrok-libraries/bio/src/utils/logger';
12
9
 
13
10
  import {checkForSingleSeqClusters} from './multiple-sequence-alignment';
@@ -44,9 +41,9 @@ type PepseaBodyUnit = { ID: string, HELM: string };
44
41
  * @param {DG.Column} clustersCol - The column containing the clusters of the sequences.
45
42
  * @param logger {ILogger} Logger
46
43
  */
47
- export async function runPepsea(srcCol: DG.Column<string>, unUsedName: string,
44
+ export async function runPepsea(table: DG.DataFrame, srcCol: DG.Column<string>, unUsedName: string,
48
45
  method: typeof pepseaMethods[number] = 'ginsi', gapOpen: number = 1.53, gapExtend: number = 0.0,
49
- clustersCol: DG.Column<string | number> | null = null, logger?: ILogger
46
+ clustersCol: DG.Column<string | number> | null = null, logger?: ILogger, onlySelected: boolean = false
50
47
  ): Promise<DG.Column<string>> {
51
48
  const pepseaContainer = await Pepsea.getDockerContainer();
52
49
  const peptideCount = srcCol.length;
@@ -60,23 +57,42 @@ export async function runPepsea(srcCol: DG.Column<string>, unUsedName: string,
60
57
  const clusterIndexes: number[][] = new Array(clustersColCategories.length);
61
58
 
62
59
  // Grouping data by clusters
63
- for (let rowIndex = 0; rowIndex < peptideCount; ++rowIndex) {
64
- const clusterCategoryIdx = clustersColData[rowIndex];
65
- const cluster = clustersColCategories[clusterCategoryIdx];
66
- if (cluster === '')
67
- continue;
68
-
69
- const clusterId = clustersColCategories.indexOf(cluster);
70
- const helmSeq = srcCol.get(rowIndex);
71
- if (helmSeq) {
72
- (bodies[clusterId] ??= []).push({ID: rowIndex.toString(), HELM: helmSeq});
73
- (clusterIndexes[clusterCategoryIdx] ??= []).push(rowIndex);
60
+ if (!onlySelected) {
61
+ for (let rowIndex = 0; rowIndex < peptideCount; ++rowIndex) {
62
+ const clusterCategoryIdx = clustersColData[rowIndex];
63
+ const cluster = clustersColCategories[clusterCategoryIdx];
64
+ if (!cluster)
65
+ continue;
66
+
67
+ const clusterId = clusterCategoryIdx;
68
+ const helmSeq = srcCol.get(rowIndex);
69
+ if (helmSeq) {
70
+ (bodies[clusterId] ??= []).push({ID: rowIndex.toString(), HELM: helmSeq});
71
+ (clusterIndexes[clusterCategoryIdx] ??= []).push(rowIndex);
72
+ }
73
+ }
74
+ } else {
75
+ const selection = table.selection;
76
+ for (let rowIndex = -1; (rowIndex = selection.findNext(rowIndex, true)) !== -1;) {
77
+ const clusterCategoryIdx = clustersColData[rowIndex];
78
+ const cluster = clustersColCategories[clusterCategoryIdx];
79
+ if (!cluster)
80
+ continue;
81
+
82
+ const clusterId = clusterCategoryIdx;
83
+ const helmSeq = srcCol.get(rowIndex);
84
+ if (helmSeq) {
85
+ (bodies[clusterId] ??= []).push({ID: rowIndex.toString(), HELM: helmSeq});
86
+ (clusterIndexes[clusterCategoryIdx] ??= []).push(rowIndex);
87
+ }
74
88
  }
75
89
  }
76
90
  checkForSingleSeqClusters(clusterIndexes, clustersColCategories);
77
91
 
78
- const alignedSequences: string[] = new Array(peptideCount);
92
+ const alignedSequences: string[] = new Array(peptideCount).fill(null);
79
93
  for (const body of bodies) { // getting aligned sequences for each cluster
94
+ if (!body || body.length === 0)
95
+ continue;
80
96
  const alignedObject = await requestAlignedObjects(pepseaContainer.id, body, method, gapOpen, gapExtend, logger);
81
97
  const alignments = alignedObject.Alignment;
82
98
 
@@ -6,11 +6,9 @@ import * as DG from 'datagrok-api/dg';
6
6
  import {TAGS as mmcrTAGS} from '@datagrok-libraries/bio/src/utils/cell-renderer';
7
7
 
8
8
  import {MmcrTemps, rendererSettingsChangedState} from '@datagrok-libraries/bio/src/utils/cell-renderer-consts';
9
- import {ISeqHelper} from '@datagrok-libraries/bio/src/utils/seq-helper';
10
- import {getMolfilesFromSingleSeq} from '@datagrok-libraries/bio/src/monomer-works/monomer-utils';
11
9
 
12
10
  import {_package} from '../package';
13
-
11
+ import {NOTATION} from '@datagrok-libraries/bio/src/utils/macromolecule';
14
12
 
15
13
  /**
16
14
  * @export
@@ -107,67 +105,42 @@ export function getMacromoleculeColumnPropertyPanel(col: DG.Column): DG.Widget {
107
105
  tooltipText: 'When on, all sequences get rendered in the "diff" mode'
108
106
  });
109
107
 
110
- const sequenceConfigInputs = ui.inputs([
108
+ const shouldShowMultilineToggle = (): boolean => {
109
+ const units = col.meta.units;
110
+
111
+ // Don't show for formats that have their own complex renderers (like Helm).
112
+ if (units === NOTATION.HELM || units === NOTATION.CUSTOM)
113
+ return false;
114
+
115
+ // For all other cases, including 'UN' (non-canonical), 'fasta', and 'separator' show the multiline toggle.
116
+ return true;
117
+ };
118
+
119
+ let renderMultilineInput = null;
120
+ if (shouldShowMultilineToggle()) {
121
+ renderMultilineInput = ui.input.bool('Multiline Rendering', {
122
+ value: col.getTag('renderMultiline') === 'true',
123
+ onValueChanged: (value) => {
124
+ col.tags['renderMultiline'] = value ? 'true' : 'false';
125
+ col.dataFrame.fireValuesChanged();
126
+ },
127
+ tooltipText: 'Render sequences across multiple lines when they exceed cell width'
128
+ });
129
+ }
130
+
131
+ const inputsArray = [
111
132
  fontSizeInput,
112
133
  maxMonomerLengthInput,
113
134
  gapLengthInput,
114
135
  referenceSequenceInput,
115
136
  colorCodeInput,
116
137
  compareWithCurrentInput,
117
- ]);
138
+ ];
118
139
 
119
- return new DG.Widget(sequenceConfigInputs);
120
- }
140
+ if (renderMultilineInput)
141
+ inputsArray.push(renderMultilineInput);
121
142
 
122
- /**
123
- * 3D representation widget of macromolecule.
124
- *
125
- * @export
126
- * @param {DG.Cell} macroMolecule macromolecule cell.
127
- * @param {any[]} monomersLibObject
128
- * @param {ISeqHelper} seqHelper
129
- * @return {Promise<DG.Widget>} Widget.
130
- */
131
- export async function representationsWidget(
132
- macroMolecule: DG.Cell, monomersLibObject: any[], seqHelper: ISeqHelper
133
- ): Promise<DG.Widget> {
134
- const pi = DG.TaskBarProgressIndicator.create('Creating 3D view');
135
-
136
- let widgetHost;
137
- let molBlock3D = '';
138
- try {
139
- try {
140
- const _atomicCodes = getMolfilesFromSingleSeq(macroMolecule, monomersLibObject, seqHelper);
141
- const result = '';//await getMacroMol(atomicCodes!);
142
- const molBlock2D = result[0];
143
- molBlock3D = (await grok.functions.call('Bio:Embed', {molBlock2D})) as unknown as string;
144
- } catch (e) {
145
- console.warn(e);
146
- }
147
-
148
- try {
149
- molBlock3D = molBlock3D.replaceAll('\\n', '\n');
150
- const stringBlob = new Blob([molBlock3D], {type: 'text/plain'});
151
- const nglHost = ui.div([], {classes: 'd4-ngl-viewer', id: 'ngl-3d-host'});
152
-
153
- //@ts-ignore
154
- const stage = new NGL.Stage(nglHost, {backgroundColor: 'white'});
155
- //@ts-ignore
156
- stage.loadFile(stringBlob, {ext: 'sdf'}).then(function(comp: NGL.StructureComponent) {
157
- stage.setSize(300, 300);
158
- comp.addRepresentation('ball+stick');
159
- comp.autoView();
160
- });
161
- const sketch = grok.chem.svgMol(molBlock3D);
162
- const panel = ui.divH([sketch]);
163
-
164
- widgetHost = ui.div([panel, nglHost]);
165
- } catch (e) {
166
- widgetHost = ui.divText('Couldn\'t get peptide structure');
167
- }
168
- } catch (e) {
169
- widgetHost = ui.divText('Couldn\'t get peptide structure');
170
- }
171
- pi.close();
172
- return new DG.Widget(widgetHost);
143
+
144
+ const sequenceConfigInputs = ui.inputs(inputsArray);
145
+ return new DG.Widget(sequenceConfigInputs);
173
146
  }