@datagrok/bio 2.23.0 → 2.23.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/package.json CHANGED
@@ -5,7 +5,7 @@
5
5
  "name": "Davit Rizhinashvili",
6
6
  "email": "drizhinashvili@datagrok.ai"
7
7
  },
8
- "version": "2.23.0",
8
+ "version": "2.23.2",
9
9
  "description": "Bioinformatics support (import/export of sequences, conversion, visualization, analysis). [See more](https://github.com/datagrok-ai/public/blob/master/packages/Bio/README.md) for details.",
10
10
  "repository": {
11
11
  "type": "git",
@@ -44,12 +44,12 @@
44
44
  ],
45
45
  "dependencies": {
46
46
  "@biowasm/aioli": "^3.1.0",
47
- "@datagrok-libraries/bio": "^5.55.0",
48
- "@datagrok-libraries/chem-meta": "^1.2.7",
47
+ "@datagrok-libraries/bio": "^5.55.1",
48
+ "@datagrok-libraries/chem-meta": "^1.2.9",
49
49
  "@datagrok-libraries/math": "^1.2.6",
50
50
  "@datagrok-libraries/ml": "^6.10.6",
51
- "@datagrok-libraries/tutorials": "^1.6.1",
52
- "@datagrok-libraries/utils": "^4.6.5",
51
+ "@datagrok-libraries/tutorials": "^1.7.4",
52
+ "@datagrok-libraries/utils": "^4.6.9",
53
53
  "@webgpu/types": "^0.1.40",
54
54
  "ajv": "^8.12.0",
55
55
  "ajv-errors": "^3.0.0",
@@ -65,7 +65,7 @@
65
65
  "wu": "^2.1.0"
66
66
  },
67
67
  "devDependencies": {
68
- "@datagrok-libraries/helm-web-editor": "^1.1.14",
68
+ "@datagrok-libraries/helm-web-editor": "^1.1.16",
69
69
  "@datagrok-libraries/js-draw-lite": "^0.0.10",
70
70
  "@datagrok/chem": "^1.15.0",
71
71
  "@datagrok/dendrogram": "^1.2.33",
@@ -98,6 +98,14 @@ export namespace funcs {
98
98
  return await grok.functions.call('Bio:SeparatorSequenceCellRenderer', {});
99
99
  }
100
100
 
101
+ export async function bilnSequenceCellRenderer(): Promise<any> {
102
+ return await grok.functions.call('Bio:BilnSequenceCellRenderer', {});
103
+ }
104
+
105
+ export async function refineNotationProviderForBiln(col: DG.Column , stats: any , separator?: string | null): Promise<boolean> {
106
+ return await grok.functions.call('Bio:RefineNotationProviderForBiln', { col, stats, separator });
107
+ }
108
+
101
109
  export async function macroMolColumnPropertyPanel(molColumn: DG.Column ): Promise<any> {
102
110
  return await grok.functions.call('Bio:MacroMolColumnPropertyPanel', { molColumn });
103
111
  }
@@ -35,7 +35,7 @@ category('PepSeA', () => {
35
35
  const tgtMsaCol = df.getCol('MSA');
36
36
  for (let i = 0; i < resMsaCol!.length; ++i)
37
37
  expect(resMsaCol!.get(i) == tgtMsaCol.get(i), true);
38
- }, {timeout: 60000 /* docker */, stressTest: true, skipReason: 'Fails in docker'});
38
+ }, {timeout: 60000 /* docker */, skipReason: 'Fails in docker'});
39
39
 
40
40
  test('stderr', async () => {
41
41
  const logger = new TestLogger();
@@ -45,7 +45,7 @@ category('PepSeA', () => {
45
45
  const tgtMsaCol = df.getCol('MSA');
46
46
  expectArray(resMsaCol!.toList(), tgtMsaCol.toList());
47
47
  expect(logger.warningList[0].message, pepseaStderrWarningList);
48
- }, {timeout: 60000 /* docker */, stressTest: true, skipReason: 'Fails in docker'});
48
+ }, {timeout: 60000 /* docker */, skipReason: 'Fails in docker'});
49
49
 
50
50
  test('error', async () => {
51
51
  const logger = new TestLogger();
@@ -48,7 +48,7 @@ export function convert(col: DG.Column<string> | undefined, seqHelper: ISeqHelpe
48
48
  if (currentNotation === NOTATION.HELM)
49
49
  separatorInput.value = '/'; // helm monomers can have - in the name like D-aThr;
50
50
  dialogHeader.textContent = 'Current notation: ' + currentNotation;
51
- filteredNotations = notations;//.filter((e) => e !== currentNotation); TEMPORARY DO NOT FORGET TO UNCOMMENT
51
+ filteredNotations = notations.filter((e) => e !== currentNotation);
52
52
  targetNotationInput = ui.input.choice('Convert to', {
53
53
  value: filteredNotations[0], items: filteredNotations,
54
54
  onValueChanged: toggleSeparator
@@ -71,7 +71,7 @@ export function convert(col: DG.Column<string> | undefined, seqHelper: ISeqHelpe
71
71
  });
72
72
 
73
73
  const separatorArray = ['-', '.', '/'];
74
- let filteredNotations = notations;//.filter((e) => e !== currentNotation); // TEMPORARY DO NOT FORGET TO UNCOMMENT
74
+ let filteredNotations = notations.filter((e) => e !== currentNotation);
75
75
 
76
76
  const separatorInput = ui.input.choice('Separator', {value: separatorArray[0], items: separatorArray});
77
77
 
@@ -1,3 +1,4 @@
1
+ /* eslint-disable max-len */
1
2
  import {HELM_POLYMER_TYPE} from '@datagrok-libraries/bio/src/utils/const';
2
3
  import {HELM_ITEM_SEPARATOR} from './const';
3
4
  import {Bond} from './types';
@@ -12,7 +13,7 @@ export class ConnectionList {
12
13
  public connectionItems: string[];
13
14
 
14
15
  private validateConnectionItem(connectionItem: string): void {
15
- const allowedType = `(${HELM_POLYMER_TYPE.PEPTIDE}|${HELM_POLYMER_TYPE.RNA})`;
16
+ const allowedType = `(${HELM_POLYMER_TYPE.PEPTIDE}|${HELM_POLYMER_TYPE.RNA}|${HELM_POLYMER_TYPE.CHEM}|${HELM_POLYMER_TYPE.BLOB})`;
16
17
  const regex = new RegExp(`${allowedType}[0-9]+,${allowedType}[0-9]+,[0-9]+:R[0-9]+-[0-9]+:R[0-9]+`, 'g');
17
18
  if (!connectionItem.match(regex))
18
19
  throw new Error(`Cannot parse connection item from ${connectionItem}`);
@@ -55,7 +55,7 @@ export class Helm {
55
55
  private getSimplePolymerByMonomerIdx(monomerGlobalIdx: number): SimplePolymer {
56
56
  const shifts = this.getMonomerIdxShifts();
57
57
  const shiftValues = Object.values(shifts);
58
- const lowerBound = shiftValues.sort((a, b) => a - b).find(
58
+ const lowerBound = shiftValues.sort((a, b) => b - a).find( // find the largest shift not exceeding monomerGlobalIdx
59
59
  (shift) => monomerGlobalIdx >= shift
60
60
  );
61
61
  if (lowerBound === undefined)
@@ -28,7 +28,7 @@ export class SimplePolymer {
28
28
 
29
29
  private getPolymerType(): string {
30
30
  const regex = new RegExp(
31
- `(${HELM_POLYMER_TYPE.PEPTIDE}|${HELM_POLYMER_TYPE.RNA})[0-9]+{`
31
+ `(${HELM_POLYMER_TYPE.PEPTIDE}|${HELM_POLYMER_TYPE.RNA}|${HELM_POLYMER_TYPE.CHEM}|${HELM_POLYMER_TYPE.BLOB})[0-9]+{`
32
32
  );
33
33
  const match = this.simplePolymer.match(regex);
34
34
  if (!match)
@@ -11,6 +11,10 @@ import {_package} from '../package';
11
11
 
12
12
  /** Used in Macromolecule column tooltip */
13
13
  export class MacromoleculeColumnWidget extends DG.Widget {
14
+ get type(): string {
15
+ return 'MacromoleculeColumnWidget';
16
+ }
17
+
14
18
  private viewed: boolean = false;
15
19
 
16
20
  private wlViewer: WebLogoViewer | null = null;
@@ -3,14 +3,15 @@ import * as DG from 'datagrok-api/dg';
3
3
  import {GridCell} from 'datagrok-api/dg';
4
4
  import * as ui from 'datagrok-api/ui';
5
5
  import {ALPHABET, monomerToShort} from '@datagrok-libraries/bio/src/utils/macromolecule';
6
- import {GAP_SYMBOL, MONOMER_MOTIF_SPLITTER, TAGS as bioTAGS,} from '@datagrok-libraries/bio/src/utils/macromolecule/consts';
6
+ import {BioTags, GAP_SYMBOL, MONOMER_MOTIF_SPLITTER, TAGS as bioTAGS,} from '@datagrok-libraries/bio/src/utils/macromolecule/consts';
7
7
  import {MONOMER_RENDERER_TAGS} from '@datagrok-libraries/bio/src/utils/cell-renderer';
8
8
  import {getGridCellColTemp} from '@datagrok-libraries/bio/src/utils/cell-renderer-back-base';
9
9
 
10
10
  import {CellRendererWithMonomerLibBackBase} from './monomer-cell-renderer-base';
11
11
  import * as C from './constants';
12
12
  import {undefinedColor} from '@datagrok-libraries/bio/src/utils/cell-renderer-monomer-placer';
13
- import {HelmTypes} from '@datagrok-libraries/js-draw-lite/src/types/org';
13
+ import {HelmType, HelmTypes, PolymerType, PolymerTypes} from '@datagrok-libraries/js-draw-lite/src/types/org';
14
+ import {polymerTypeToHelmType} from '@datagrok-libraries/bio/src/utils/macromolecule/utils';
14
15
 
15
16
  const Tags = new class {
16
17
  tooltipHandlerTemp = 'tooltip-handler.Monomer';
@@ -54,15 +55,17 @@ export class MonomerCellRendererBack extends CellRendererWithMonomerLibBackBase
54
55
  const biotype = alphabet === ALPHABET.RNA || alphabet === ALPHABET.DNA ? HelmTypes.NUCLEOTIDE : HelmTypes.AA;
55
56
  for (let i = 0; i < shortSymbols.length; i++) {
56
57
  const symbol: string = symbols[i];
58
+ const actBioType: HelmType = this.getHelmType(gridCell, biotype);
59
+
57
60
  let textcolor = undefinedColor;
58
61
  let backgroundcolor = 'rgb(255, 255, 255)';
59
62
  if (this.monomerLib) {
60
63
  if (applyToBackground) {
61
- const colors = this.monomerLib.getMonomerColors(biotype, symbol);
64
+ const colors = this.monomerLib.getMonomerColors(actBioType, symbol);
62
65
  textcolor = colors?.textcolor ?? textcolor;
63
66
  backgroundcolor = colors?.backgroundcolor ?? backgroundcolor;
64
67
  } else
65
- textcolor = this.monomerLib.getMonomerTextColor(biotype, symbol);
68
+ textcolor = this.monomerLib.getMonomerTextColor(actBioType, symbol);
66
69
  }
67
70
  if (applyToBackground && symbols.length == 1) {
68
71
  g.fillStyle = backgroundcolor;
@@ -101,7 +104,12 @@ export class MonomerCellRendererBack extends CellRendererWithMonomerLibBackBase
101
104
 
102
105
  const biotype = alphabet === ALPHABET.RNA || alphabet === ALPHABET.DNA ? HelmTypes.NUCLEOTIDE : HelmTypes.AA;
103
106
  const tooltipEls = monomerName.split(MONOMER_MOTIF_SPLITTER)
104
- .map((s) => !s || s === GAP_SYMBOL || s === DASH_GAP_SYMBOL ? ui.divText('gap') : this.monomerLib!.getTooltip(biotype, s));
107
+ .map((s) => {
108
+ if (!s || s === GAP_SYMBOL || s === DASH_GAP_SYMBOL)
109
+ return ui.divText('gap');
110
+ const actBioType: HelmType = this.getHelmType(gridCell, biotype);
111
+ return this.monomerLib!.getTooltip(actBioType, s);
112
+ });
105
113
  const tooltipEl = ui.divH(tooltipEls, {style: {alignItems: 'top'}});
106
114
  // tooltip max width is 600px, so we need to shrink the canvases a bit if needed. by default, it is 250px
107
115
  const canvases = Array.from(tooltipEl.querySelectorAll('canvas'));
@@ -117,6 +125,20 @@ export class MonomerCellRendererBack extends CellRendererWithMonomerLibBackBase
117
125
  return true; // To prevent default tooltip behaviour
118
126
  }
119
127
 
128
+ private getHelmType(gridCell: GridCell, defaultType: HelmType): HelmType {
129
+ let biotype = defaultType;
130
+ if ((gridCell.tableRowIndex ?? -1) > -1 && gridCell.tableColumn?.getTag(BioTags.polymerTypeColumnName)) {
131
+ const ptColName = gridCell.tableColumn.getTag(BioTags.polymerTypeColumnName);
132
+ const ptCol = gridCell.tableColumn.dataFrame?.col(ptColName);
133
+ if (ptCol) {
134
+ const ptrString = ptCol.get(gridCell.tableRowIndex!);
135
+ if (ptrString && [PolymerTypes.BLOB, PolymerTypes.CHEM, PolymerTypes.G, PolymerTypes.PEPTIDE, PolymerTypes.RNA].includes(ptrString))
136
+ biotype = polymerTypeToHelmType(ptrString as PolymerType);
137
+ }
138
+ }
139
+ return biotype;
140
+ }
141
+
120
142
  override async awaitRendered(timeout: number = 10000, reason: string = `${timeout} timeout`): Promise<void> {
121
143
  return Promise.resolve();
122
144
  }
@@ -162,5 +184,6 @@ export class MonomerCellRenderer extends DG.GridCellRenderer {
162
184
  onMouseMove(gridCell: GridCell, e: MouseEvent) {
163
185
  const back = MonomerCellRendererBack.getOrCreate(gridCell);
164
186
  back.onMouseMove(gridCell, e);
187
+ e.preventDefault();
165
188
  }
166
189
  }
@@ -304,7 +304,7 @@ export class MonomerLibBase implements IMonomerLibBase {
304
304
  }
305
305
 
306
306
  const naSymbol: string | undefined = monomer[OPT.NATURAL_ANALOG];
307
- if (!res && naSymbol)
307
+ if (!res && naSymbol && naSymbol !== monomer.symbol)
308
308
  return this.getMonomerColors(biotype, naSymbol);
309
309
  }
310
310
 
@@ -17,6 +17,7 @@ import {LIB_PATH} from '../consts';
17
17
 
18
18
  import '../../../../css/monomer-manager.css';
19
19
  import {MONOMER_RENDERER_TAGS} from '@datagrok-libraries/bio/src/utils/cell-renderer';
20
+ import {BioTags} from '@datagrok-libraries/bio/src/utils/macromolecule/consts';
20
21
 
21
22
  // columns of monomers dataframe, note that rgroups is hidden and will be displayed as separate columns
22
23
  export enum MONOMER_DF_COLUMN_NAMES {
@@ -50,7 +51,7 @@ export const MONOMER_DF_COLUMNS = {
50
51
  } as const;
51
52
 
52
53
  export async function standardiseMonomers(monomers: Monomer[]) {
53
- const df = getMonomersDataFrame(monomers);
54
+ const df = await getMonomersDataFrame(monomers);
54
55
  if (monomers.length !== df.rowCount)
55
56
  throw new Error(`Monomers length ${monomers.length} does not match dataframe row count ${df.rowCount}`);
56
57
  const fixedMonomers = await Promise.all(new Array(monomers.length).fill(null).map(async (_, i) => monomerFromDfRow(df.rows.get(i))));
@@ -117,7 +118,7 @@ export async function standardizeMonomerLibrary(libraryString: string) {
117
118
  return libraryStringFixed;
118
119
  }
119
120
 
120
- export function getMonomersDataFrame(monomers: Monomer[]) {
121
+ export async function getMonomersDataFrame(monomers: Monomer[]) {
121
122
  try {
122
123
  const df = DG.DataFrame.create(monomers.length);
123
124
 
@@ -136,11 +137,14 @@ export function getMonomersDataFrame(monomers: Monomer[]) {
136
137
  df.columns.addNew(rgroupName, DG.COLUMN_TYPE.STRING);
137
138
  }
138
139
  }
139
- df.col(MONOMER_DF_COLUMN_NAMES.SYMBOL)!.semType = 'Monomer';
140
- df.col(MONOMER_DF_COLUMN_NAMES.SYMBOL)!.setTag(MONOMER_RENDERER_TAGS.applyToBackground, 'true');
141
-
142
-
143
- for (let i = 0; i < monomers.length; i++) {
140
+ df.col(MONOMER_DF_COLUMN_NAMES.SYMBOL)!.semType = 'Monomer';
141
+ df.col(MONOMER_DF_COLUMN_NAMES.SYMBOL)!.setTag(MONOMER_RENDERER_TAGS.applyToBackground, 'true');
142
+ df.col(MONOMER_DF_COLUMN_NAMES.SYMBOL)!.setTag(BioTags.polymerTypeColumnName, MONOMER_DF_COLUMN_NAMES.POLYMER_TYPE);
143
+ const pg = DG.TaskBarProgressIndicator.create('Creating Monomers DataFrame...');
144
+ for (let i = 0; i < monomers.length; i++) {
145
+ if (i % 20 === 0)
146
+ pg.update(((i + 1) / monomers.length) * 100, 'Creating Monomers DataFrame...');
147
+ const doFill = () => {
144
148
  let molSmiles = getCorrectedSmiles(monomers[i].rgroups, monomers[i].smiles, monomers[i].molfile);
145
149
  molSmiles = fixRGroupsAsElementsSmiles(molSmiles);
146
150
  // r-groups here might be broken, so need to make sure they are correct
@@ -182,12 +186,33 @@ export function getMonomersDataFrame(monomers: Monomer[]) {
182
186
  } catch (e) {
183
187
  console.error(`Error setting date ${monomers[i].createDate}`, e);
184
188
  }
185
- }
186
- df.col(MONOMER_DF_COLUMN_NAMES.MONOMER)!.semType = DG.SEMTYPE.MOLECULE;
187
- uniqueRgroupNames.forEach((rgName) => {
188
- df.col(rgName)!.semType = DG.SEMTYPE.MOLECULE;
189
+ };
190
+ await new Promise<void>((resolve) => {
191
+ // this is done not to block the UI thread for too long
192
+ const inProm = () => {
193
+ try {
194
+ doFill();
195
+ resolve();
196
+ } catch (e) {
197
+ console.error('Error in doFill', e);
198
+ resolve();
199
+ }
200
+ };
201
+ if (i % 20 === 0) {
202
+ setTimeout(() => {
203
+ inProm();
204
+ });
205
+ } else
206
+ inProm();
189
207
  });
190
- return df;
208
+ }
209
+ pg.close();
210
+ df.col(MONOMER_DF_COLUMN_NAMES.MONOMER)!.semType = DG.SEMTYPE.MOLECULE;
211
+
212
+ uniqueRgroupNames.forEach((rgName) => {
213
+ df.col(rgName)!.semType = DG.SEMTYPE.MOLECULE;
214
+ });
215
+ return df;
191
216
  } catch (e) {
192
217
  grok.shell.error('Error creating monomers dataframe');
193
218
  console.error(e);
@@ -506,7 +531,7 @@ export class MonomerManager implements IMonomerManager {
506
531
  return this.activeMonomerLib!.getMonomer(polymerType, symbol)!;
507
532
  });
508
533
  });
509
- const df = getMonomersDataFrame(monomers);
534
+ const df = await getMonomersDataFrame(monomers);
510
535
  return df;
511
536
  } catch (e) {
512
537
  grok.shell.error('Error creating monomers dataframe');
@@ -8,11 +8,12 @@ import {TAGS as bioTAGS, ALPHABET} from '@datagrok-libraries/bio/src/utils/macro
8
8
  import {GAP_SYMBOL} from '@datagrok-libraries/bio/src/utils/macromolecule/consts';
9
9
  import {IMonomerLibBase} from '@datagrok-libraries/bio/src/types';
10
10
  import {HelmType} from '@datagrok-libraries/bio/src/helm/types';
11
- import {HelmTypes} from '@datagrok-libraries/bio/src/helm/consts';
11
+ import {HelmTypes, PolymerTypes} from '@datagrok-libraries/bio/src/helm/consts';
12
12
 
13
- import '../../css/composition-analysis.css';
14
13
  import {ISeqHelper} from '@datagrok-libraries/bio/src/utils/seq-helper';
15
14
  import {buildCompositionTable} from '@datagrok-libraries/bio/src/utils/composition-table';
15
+ import '../../css/composition-analysis.css';
16
+ import {polymerTypeToHelmType} from '@datagrok-libraries/bio/src/utils/macromolecule/utils';
16
17
 
17
18
  export function getCompositionAnalysisWidget(
18
19
  val: DG.SemanticValue, monomerLib: IMonomerLibBase, seqHelper: ISeqHelper
@@ -26,14 +27,22 @@ export function getCompositionAnalysisWidget(
26
27
  const sh = seqHelper.getSeqHandler(val.cell.column as DG.Column<string>);
27
28
  const rowIdx = val.cell.rowIndex;
28
29
  const seqSS = sh.getSplitted(rowIdx);
30
+ // in case of HELM, there might be multiple biotypes in one sequence
31
+ const bioTypes: {[symbol: string]: HelmType} = {};
29
32
  wu.count(0).take(seqSS.length).filter((posIdx) => !seqSS.isGap(posIdx)).forEach((posIdx) => {
30
33
  let cm = seqSS.getCanonical(posIdx);
31
34
  if (biotype === HelmTypes.NUCLEOTIDE && sh.isHelm() && cm[1] === '(' && cm[cm.length - 2] === ')')
32
35
  cm = cm.substring(2, cm.length - 2);
33
36
  const count = counts[cm] || 0;
34
37
  counts[cm] = count + 1;
38
+ if (!bioTypes[cm] && seqSS.graphInfo?.polymerTypes) {
39
+ const polymerType = seqSS.graphInfo.polymerTypes[posIdx];
40
+ bioTypes[cm] = polymerTypeToHelmType(polymerType);
41
+ }
35
42
  });
36
- const table = buildCompositionTable(counts, biotype, monomerLib);
43
+
44
+
45
+ const table = buildCompositionTable(counts, biotype, monomerLib, Object.keys(bioTypes).length ? bioTypes : undefined);
37
46
  Array.from(table.rows).forEach((row) => {
38
47
  const barCol = (row.getElementsByClassName('macromolecule-cell-comp-analysis-bar')[0] as HTMLDivElement)
39
48
  .style.backgroundColor;
@@ -5,6 +5,10 @@ import {_package} from '../package';
5
5
 
6
6
 
7
7
  export class PackageSettingsEditorWidget extends DG.Widget {
8
+ get type(): string {
9
+ return 'PackageSettingsEditorWidget';
10
+ }
11
+
8
12
  maxMonomerLengthProp: DG.Property;
9
13
  tooltipWebLogo: DG.Property;
10
14
  defaultSeparator: DG.Property;
@@ -45,9 +45,13 @@ export async function toAtomicLevelSingle(sequence: DG.SemanticValue): Promise<{
45
45
  if (sequence.cell.column.temp[SeqTemps.notationProvider])
46
46
  singleValCol.temp[SeqTemps.notationProvider] = sequence.cell.column.temp[SeqTemps.notationProvider];
47
47
  // helm and biln will have cyclization marks, so we need to use POM to convert them
48
- const shouldUsePOM = (seqSh.getSplitted(sequence.cell.rowIndex).graphInfo?.connections?.length ?? 0) > 0;
48
+ const seqSplitted = seqSh.getSplitted(sequence.cell.rowIndex);
49
+ const shouldUsePOM = (seqSplitted.graphInfo?.connections?.length ?? 0) > 0;
50
+ const isHelmWithMultiplePolymerTypes = seqSh.isHelm() &&
51
+ (new Set((seqSplitted.graphInfo?.polymerTypes ?? []))).size > 1;
52
+
49
53
  await PackageFunctions.toAtomicLevel(sDf, singleValCol,
50
- shouldUsePOM, false);
54
+ shouldUsePOM || isHelmWithMultiplePolymerTypes, false);
51
55
  if (sDf.columns.length < 2) {
52
56
  errorText = 'No structure generated';
53
57
  return {errorText, mol: ''};