@datagrok/bio 1.9.0 → 1.11.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -4,6 +4,8 @@ import * as grok from 'datagrok-api/grok';
4
4
  import * as ui from 'datagrok-api/ui';
5
5
  import * as DG from 'datagrok-api/dg';
6
6
  import {WebLogo, SplitterFunc} from '@datagrok-libraries/bio/src/viewers/web-logo';
7
+ import {splitToMonomers, _package} from '../package';
8
+ import * as C from '../utils/constants';
7
9
 
8
10
  category('splitters', () => {
9
11
  const helm1 = 'PEPTIDE1{meI.hHis.Aca.N.T.dE.Thr_PO3H2.Aca.D-Tyr_Et.Tyr_ab-dehydroMe.dV.E.N.D-Orn.D-aThr.Phe_4Me}$$$';
@@ -51,6 +53,19 @@ category('splitters', () => {
51
53
  test('testHelm1', async () => { await _testHelmSplitter(data.testHelm1[0], data.testHelm1[1]); });
52
54
  test('testHelm2', async () => { await _testHelmSplitter(data.testHelm2[0], data.testHelm2[1]); });
53
55
  test('testHelm3', async () => { await _testHelmSplitter(data.testHelm3[0], data.testHelm3[1]); });
56
+
57
+ test('splitToMonomers', async () => {
58
+ const df: DG.DataFrame = await grok.dapi.files.readCsv('System:AppData/Bio/samples/sample_MSA.csv');
59
+
60
+ const seqCol = df.getCol('MSA');
61
+ const semType = await grok.functions.call('Bio:detectMacromolecule', {col: seqCol});
62
+ if (semType)
63
+ seqCol.semType = semType;
64
+ seqCol.setTag(C.TAGS.ALIGNED, C.MSA);
65
+
66
+ splitToMonomers(seqCol);
67
+ expect(df.columns.names().includes('17'), true);
68
+ });
54
69
  });
55
70
 
56
71
  export async function _testHelmSplitter(src: string, tgt: string[]) {
@@ -0,0 +1,45 @@
1
+ import * as DG from 'datagrok-api/dg';
2
+ import * as grok from 'datagrok-api/grok';
3
+
4
+ export function generateManySequences(): string {
5
+ let csvData = `MSA,Activity
6
+ meI/hHis/Aca/N/T/dE/Thr_PO3H2/Aca/D-Tyr_Et/Tyr_ab-dehydroMe/dV/E/N/D-Orn/D-aThr//Phe_4Me,5.30751`;
7
+ for (let i = 0; i < 10 ** 6; i++) {
8
+ csvData += `\n meI/hHis/Aca/N/T/dE/Thr_PO3H2/Aca/D-Tyr_Et/Tyr_ab-dehydroMe/dV/E/N/D-Orn/D-aThr//Phe_4Me,5.30751`;
9
+ }
10
+ return csvData;
11
+ }
12
+
13
+ export function generateLongSequence(): string {
14
+ let longSequence = `meI/hHis/Aca/N/T/dE/Thr_PO3H2/Aca/D-Tyr_Et/Tyr_ab-dehydroMe/dV/E/N/D-Orn/D-aThr`;
15
+ for (let i = 0; i < 10 ** 5; i++) {
16
+ longSequence += `/Aca/N/T/dE/Thr_PO3H2/Aca/D-Tyr_Et/Tyr_ab-dehydroMe/dV/dv`;
17
+ }
18
+ longSequence += `//Phe_4Me,5.30751`;
19
+ let csvData = `MSA,Activity `;
20
+ for (let i = 0; i <= 10 ** 1 * 4; i++) {
21
+ csvData += `\n ${longSequence}`;
22
+ }
23
+ return csvData;
24
+ }
25
+ export function setTagsMacromolecule(col: DG.Column) {
26
+ col.semType = DG.SEMTYPE.MACROMOLECULE;
27
+ col.setTag('units', 'separator');
28
+ col.setTag('aligned', 'SEQ.MSA');
29
+ col.setTag('alphabet', 'UN');
30
+ col.setTag('separator', '/');
31
+ return col;
32
+ }
33
+
34
+ export function performanceTest(generateFunc: () => string,testName: string) {
35
+ const startTime: number = Date.now();
36
+ const csv = generateFunc();
37
+ const df: DG.DataFrame = DG.DataFrame.fromCsv(csv);
38
+ const col: DG.Column = df.columns.byName('MSA');
39
+ setTagsMacromolecule(col);
40
+ grok.shell.addTableView(df);
41
+
42
+ const endTime: number = Date.now();
43
+ const elapsedTime: number = endTime - startTime;
44
+ console.log(`Performance test: ${testName}: ${elapsedTime}ms`);
45
+ }
@@ -1,6 +1,8 @@
1
1
  import * as OCL from 'openchemlib/full.js';
2
2
  import * as grok from 'datagrok-api/grok';
3
3
 
4
+ import {RDModule} from '@datagrok-libraries/chem-meta/src/rdkit-api';
5
+
4
6
  export async function getMacroMol(monomers: any[][]): Promise<string[]> {
5
7
  let result: string[] = [];
6
8
  const moduleRdkit = await grok.functions.call('Chem:getRdKitModule');
@@ -6,9 +6,13 @@ import {UnknownSeqPalette, UnknownSeqPalettes} from '@datagrok-libraries/bio/src
6
6
  import {SplitterFunc, WebLogo} from '@datagrok-libraries/bio/src/viewers/web-logo';
7
7
  import {SeqPalette} from '@datagrok-libraries/bio/src/seq-palettes';
8
8
  import * as ui from 'datagrok-api/ui';
9
- import {printLeftOrCentered} from '@datagrok-libraries/bio/src/utils/cell-renderer';
9
+ import {printLeftOrCentered, DrawStyle} from '@datagrok-libraries/bio/src/utils/cell-renderer';
10
+ import {UnitsHandler} from '@datagrok-libraries/bio/src/utils/units-handler';
10
11
 
11
12
  const undefinedColor = 'rgb(100,100,100)';
13
+ const monomerToShortFunction: (amino: string, maxLengthOfMonomer: number) => string = WebLogo.monomerToShort;
14
+ const gapRenderer = 5;
15
+
12
16
 
13
17
  function getPalleteByType(paletteType: string): SeqPalette {
14
18
  switch (paletteType) {
@@ -54,16 +58,13 @@ export class MacromoleculeSequenceCellRenderer extends DG.GridCellRenderer {
54
58
  get defaultWidth(): number { return 230; }
55
59
 
56
60
  onMouseMove(gridCell: DG.GridCell, e: MouseEvent): void {
57
- if (gridCell.cell.column.getTag('aligned') !== 'SEQ.MSA') {
61
+ if (gridCell.cell.column.getTag(UnitsHandler.TAGS.aligned) !== 'SEQ.MSA') {
58
62
  return;
59
63
  }
60
64
  const maxLengthWordsSum = gridCell.cell.column.temp['bio-sum-maxLengthWords'];
61
- if (maxLengthWordsSum == null) {
62
- gridCell.cell.column.setTag('.calculatedCellRender', 'unexist');
63
- }
64
65
  const maxIndex = gridCell.cell.column.temp['bio-maxIndex'];
65
66
  //@ts-ignore
66
- const argsX = e.layerX - gridCell.gridColumn.left - ((gridCell.bounds.x<0) ? gridCell.bounds.x : 0);
67
+ const argsX = e.layerX - gridCell.gridColumn.left + (gridCell.gridColumn.left - gridCell.bounds.x);
67
68
  let left = 0;
68
69
  let right = maxIndex;
69
70
  let found = false;
@@ -89,7 +90,7 @@ export class MacromoleculeSequenceCellRenderer extends DG.GridCellRenderer {
89
90
  const separator = gridCell.cell.column.getTag('separator') ?? '';
90
91
  const splitterFunc: SplitterFunc = WebLogo.getSplitter('separator', separator);
91
92
  const subParts: string[] = splitterFunc(gridCell.cell.value);
92
- ui.tooltip.show(ui.div(subParts[left]), e.x + 16, e.y + 16);
93
+ (((subParts[left]?.length ?? 0) > 0)) ? ui.tooltip.show(ui.div(subParts[left]), e.x + 16, e.y + 16) : ui.tooltip.hide();
93
94
  }
94
95
 
95
96
  /**
@@ -111,6 +112,7 @@ export class MacromoleculeSequenceCellRenderer extends DG.GridCellRenderer {
111
112
  const grid = gridCell.gridRow !== -1 ? gridCell.grid : undefined;
112
113
  const cell = gridCell.cell;
113
114
  const [type, subtype, paletteType] = gridCell.cell.column.getTag(DG.TAGS.UNITS).split(':');
115
+ const minDistanceRenderer = 50;
114
116
  w = grid ? Math.min(grid.canvas.width - x, w) : g.canvas.width - x;
115
117
  g.save();
116
118
  g.beginPath();
@@ -126,25 +128,34 @@ export class MacromoleculeSequenceCellRenderer extends DG.GridCellRenderer {
126
128
  const palette = getPalleteByType(paletteType);
127
129
 
128
130
  const separator = gridCell.cell.column.getTag('separator') ?? '';
129
- const splitterFunc: SplitterFunc = WebLogo.getSplitter(units, separator);
131
+ const splitLimit = gridCell.bounds.width / 5;
132
+ const splitterFunc: SplitterFunc = WebLogo.getSplitter(units, separator, gridCell.bounds.width / 5);
133
+
130
134
 
131
- const columns = gridCell.cell.column.categories;
132
- let monomerToShortFunction: (amino: string, maxLengthOfMonomer: number) => string = WebLogo.monomerToShort;
133
- let maxLengthOfMonomer = 8;
135
+ const maxLengthOfMonomer = 8;
134
136
 
135
137
  let maxLengthWords: any = {};
136
- if (gridCell.cell.column.getTag('.calculatedCellRender') !== 'exist') {
137
- for (let i = 0; i < columns.length; i++) {
138
- let subParts: string[] = splitterFunc(columns[i]);
138
+ if (gridCell.cell.column.getTag('.calculatedCellRender') !== splitLimit.toString()) {
139
+ let samples = 0;
140
+ while (samples < Math.min(gridCell.cell.column.length, 100)) {
141
+ let column = gridCell.cell.column.get(samples);
142
+ let subParts: string[] = splitterFunc(column);
139
143
  subParts.forEach((amino, index) => {
140
- let textSizeWidth = g.measureText(monomerToShortFunction(amino, maxLengthOfMonomer));
141
- if (textSizeWidth.width > (maxLengthWords[index] ?? 0)) {
142
- maxLengthWords[index] = textSizeWidth.width;
144
+ let textSize = monomerToShortFunction(amino, maxLengthOfMonomer).length * 7 + gapRenderer;
145
+ if (textSize > (maxLengthWords[index] ?? 0)) {
146
+ maxLengthWords[index] = textSize;
143
147
  }
144
148
  if (index > (maxLengthWords['bio-maxIndex'] ?? 0)) {
145
149
  maxLengthWords['bio-maxIndex'] = index;
146
150
  }
147
151
  });
152
+ samples += 1;
153
+ }
154
+ let minLength = 3 * 7;
155
+ for (let i = 0; i <= maxLengthWords['bio-maxIndex']; i++) {
156
+ if (maxLengthWords[i] < minLength) {
157
+ maxLengthWords[i] = minLength;
158
+ }
148
159
  }
149
160
  let maxLengthWordSum: any = {};
150
161
  maxLengthWordSum[0] = maxLengthWords[0];
@@ -156,7 +167,7 @@ export class MacromoleculeSequenceCellRenderer extends DG.GridCellRenderer {
156
167
  'bio-maxIndex': maxLengthWords['bio-maxIndex'],
157
168
  'bio-maxLengthWords': maxLengthWords
158
169
  };
159
- gridCell.cell.column.setTag('.calculatedCellRender', 'exist');
170
+ gridCell.cell.column.setTag('.calculatedCellRender', splitLimit.toString());
160
171
  } else {
161
172
  maxLengthWords = gridCell.cell.column.temp['bio-maxLengthWords'];
162
173
  }
@@ -164,15 +175,19 @@ export class MacromoleculeSequenceCellRenderer extends DG.GridCellRenderer {
164
175
  const subParts: string[] = splitterFunc(cell.value);
165
176
  let x1 = x;
166
177
  let color = undefinedColor;
167
- let drawStyle = 'classic';
168
- if (gridCell.cell.column.getTag('aligned').includes('MSA')) {
169
- drawStyle = 'msa';
178
+ let drawStyle = DrawStyle.classic;
179
+ if (gridCell.cell.column.getTag('aligned').includes('MSA') && gridCell.cell.column.getTag('units') === 'separator') {
180
+ drawStyle = DrawStyle.MSA;
170
181
  }
171
- subParts.forEach((amino, index) => {
182
+ subParts.every((amino, index) => {
172
183
  color = palette.get(amino);
173
184
  g.fillStyle = undefinedColor;
174
185
  let last = index === subParts.length - 1;
175
186
  x1 = printLeftOrCentered(x1, y, w, h, g, monomerToShortFunction(amino, maxLengthOfMonomer), color, 0, true, 1.0, separator, last, drawStyle, maxLengthWords, index, gridCell);
187
+ if (x1 - minDistanceRenderer - gridCell.gridColumn.left + (gridCell.gridColumn.left - gridCell.bounds.x) > gridCell.bounds.width) {
188
+ return false;
189
+ }
190
+ return true;
176
191
  });
177
192
 
178
193
  g.restore();
@@ -269,8 +284,9 @@ export class MacromoleculeDifferenceCellRenderer extends DG.GridCellRenderer {
269
284
  // 28 is the height of the two substitutions on top of each other + space
270
285
  const updatedY = Math.max(y, y + (h - 28) / 2);
271
286
 
272
- let palette: SeqPalette = units == 'HELM' ? UnknownSeqPalettes.Color :
273
- getPalleteByType(gridCell.tableColumn!.tags[C.TAGS.ALPHABET]);
287
+ let palette: SeqPalette = UnknownSeqPalettes.Color;
288
+ if (units != 'HELM')
289
+ palette = getPalleteByType(units.substring(units.length - 2));
274
290
 
275
291
  const vShift = 7;
276
292
  for (let i = 0; i < subParts1.length; i++) {
@@ -9,17 +9,13 @@ export enum COLUMNS_NAMES {
9
9
  MEAN_DIFFERENCE = 'Mean difference',
10
10
  }
11
11
 
12
- export enum CATEGORIES {
13
- OTHER = 'Other',
14
- ALL = 'All',
15
- }
16
-
17
12
  export enum TAGS {
18
13
  AAR = 'AAR',
19
14
  POSITION = 'Pos',
20
15
  SEPARATOR = 'separator',
21
16
  SELECTION = 'selection',
22
17
  ALPHABET = 'alphabet',
18
+ ALIGNED = 'aligned',
23
19
  }
24
20
 
25
21
  export enum SEM_TYPES {
@@ -30,6 +26,8 @@ export enum SEM_TYPES {
30
26
  MACROMOLECULE = 'Macromolecule',
31
27
  }
32
28
 
29
+ export const MSA = 'MSA';
30
+
33
31
  export const STATS = 'stats';
34
32
 
35
33
  export const EMBEDDING_STATUS = 'embeddingStatus';
@@ -42,7 +42,7 @@ export function convert(col: DG.Column): void {
42
42
  // set correct visibility on init
43
43
  toggleSeparator();
44
44
 
45
- targetNotationInput.onChanged( () => {
45
+ targetNotationInput.onChanged(() => {
46
46
  toggleSeparator();
47
47
  });
48
48
 
@@ -9,6 +9,7 @@ import {FastaFileHandler} from '@datagrok-libraries/bio/src/utils/fasta-handler'
9
9
  import Aioli from '@biowasm/aioli';
10
10
 
11
11
  import {AlignedSequenceEncoder} from '@datagrok-libraries/bio/src/sequence-encoder';
12
+ import {UnitsHandler} from '@datagrok-libraries/bio/src/utils/units-handler';
12
13
 
13
14
  /**
14
15
  * Converts array of sequences into simple fasta string.
@@ -58,14 +59,14 @@ export async function runKalign(srcCol: DG.Column, isAligned = false, unUsedName
58
59
  // units
59
60
  const srcUnits = srcCol.getTag(DG.TAGS.UNITS);
60
61
  //aligned
61
- const srcAligned = srcCol.getTag('aligned');
62
+ const srcAligned = srcCol.getTag(UnitsHandler.TAGS.aligned);
62
63
  const tgtAligned = srcAligned + '.MSA';
63
64
  //alphabet
64
- const srcAlphabet = srcCol.getTag('alphabet');
65
+ const srcAlphabet = srcCol.getTag(UnitsHandler.TAGS.alphabet);
65
66
 
66
67
  tgtCol.setTag(DG.TAGS.UNITS, srcUnits);
67
- tgtCol.setTag('aligned', tgtAligned);
68
- tgtCol.setTag('alphabet', srcAlphabet);
68
+ tgtCol.setTag(UnitsHandler.TAGS.aligned, tgtAligned);
69
+ tgtCol.setTag(UnitsHandler.TAGS.alphabet, srcAlphabet);
69
70
  tgtCol.semType = DG.SEMTYPE.MACROMOLECULE;
70
71
  return tgtCol;
71
72
  }
@@ -12,9 +12,11 @@ export interface ISequenceSpaceResult {
12
12
  }
13
13
 
14
14
  export async function sequenceSpace(spaceParams: ISequenceSpaceParams): Promise<ISequenceSpaceResult> {
15
- let preparedData: any;
15
+
16
+ // code deprecated since seqCol is encoded
17
+ /* let preparedData: any;
16
18
  if (!(spaceParams.seqCol!.tags[DG.TAGS.UNITS] === 'HELM')) {
17
- const sep = spaceParams.seqCol.getTag('separator');
19
+ const sep = spaceParams.seqCol.getTag(UnitsHandler.TAGS.separator);
18
20
  const sepFinal = sep ? sep === '.' ? '\\\.' : sep : '-';
19
21
  const regex = new RegExp(sepFinal, 'g');
20
22
  if (Object.keys(AvailableMetrics['String']).includes(spaceParams.similarityMetric))
@@ -23,10 +25,10 @@ export async function sequenceSpace(spaceParams: ISequenceSpaceParams): Promise<
23
25
  preparedData = spaceParams.seqCol.toList().map((v: string) => v.replace(regex, '')) as string[];
24
26
  } else {
25
27
  preparedData = spaceParams.seqCol.toList();
26
- }
28
+ } */
27
29
 
28
30
  const sequenceSpaceResult = await reduceDimensinalityWithNormalization(
29
- preparedData,
31
+ spaceParams.seqCol.toList(),
30
32
  spaceParams.methodName,
31
33
  spaceParams.similarityMetric as StringMetrics | BitArrayMetrics,
32
34
  spaceParams.options);
@@ -5,6 +5,7 @@ import {
5
5
  CAP_GROUP_NAME, CAP_GROUP_SMILES, jsonSdfMonomerLibDict, MONOMER_ENCODE_MAX, MONOMER_ENCODE_MIN, MONOMER_SYMBOL,
6
6
  RGROUP_ALTER_ID, RGROUP_FIELD, RGROUP_LABEL, SDF_MONOMER_NAME
7
7
  } from '../const';
8
+ import {UnitsHandler} from '@datagrok-libraries/bio/src/utils/units-handler';
8
9
 
9
10
  export const HELM_CORE_LIB_FILENAME = '/samples/HELMCoreLibrary.json';
10
11
  export const HELM_CORE_LIB_MONOMER_SYMBOL = 'symbol';
@@ -16,7 +17,7 @@ export function encodeMonomers(col: DG.Column): DG.Column | null {
16
17
  let encodeSymbol = MONOMER_ENCODE_MIN;
17
18
  const monomerSymbolDict: { [key: string]: number } = {};
18
19
  const units = col.tags[DG.TAGS.UNITS];
19
- const sep = col.getTag('separator');
20
+ const sep = col.getTag(UnitsHandler.TAGS.separator);
20
21
  const splitterFunc: SplitterFunc = WebLogo.getSplitter(units, sep);
21
22
  const encodedStringArray = [];
22
23
  for (let i = 0; i < col.length; ++i) {
@@ -25,7 +26,7 @@ export function encodeMonomers(col: DG.Column): DG.Column | null {
25
26
  monomers.forEach((m) => {
26
27
  if (!monomerSymbolDict[m]) {
27
28
  if (encodeSymbol > MONOMER_ENCODE_MAX) {
28
- grok.shell.error(`Not enougth symbols to encode monomers`);
29
+ grok.shell.error(`Not enough symbols to encode monomers`);
29
30
  return null;
30
31
  }
31
32
  monomerSymbolDict[m] = encodeSymbol;
@@ -22,7 +22,7 @@ export async function representationsWidget(macroMolecule: DG.Cell, monomersLibO
22
22
  const atomicCodes = getMolfilesFromSingleSeq(macroMolecule, monomersLibObject);
23
23
  const result = await getMacroMol(atomicCodes!);
24
24
  const molBlock2D = result[0];
25
- molBlock3D = (await grok.functions.call('Bio:Embed', {molBlock2D})) as string;
25
+ molBlock3D = (await grok.functions.call('Bio:Embed', {molBlock2D})) as unknown as string;
26
26
  } catch (e) {
27
27
  console.warn(e);
28
28
  }