@datagrok/bio 2.1.11 → 2.4.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (64) hide show
  1. package/README.md +11 -12
  2. package/css/helm.css +10 -0
  3. package/detectors.js +83 -59
  4. package/dist/package-test.js +2 -68651
  5. package/dist/package-test.js.map +1 -0
  6. package/dist/package.js +2 -66040
  7. package/dist/package.js.map +1 -0
  8. package/dockerfiles/Dockerfile +86 -0
  9. package/files/icons/composition-analysis.svg +17 -0
  10. package/files/icons/sequence-diversity-viewer.svg +4 -0
  11. package/files/icons/sequence-similarity-viewer.svg +4 -0
  12. package/files/icons/vdregions-viewer.svg +22 -0
  13. package/files/icons/weblogo-viewer.svg +7 -0
  14. package/files/tests/testUrl.csv +11 -0
  15. package/files/tests/toAtomicLevelTest.csv +4 -0
  16. package/package.json +29 -32
  17. package/src/analysis/sequence-activity-cliffs.ts +15 -13
  18. package/src/analysis/sequence-diversity-viewer.ts +3 -2
  19. package/src/analysis/sequence-search-base-viewer.ts +4 -2
  20. package/src/analysis/sequence-similarity-viewer.ts +4 -4
  21. package/src/analysis/sequence-space.ts +2 -1
  22. package/src/calculations/monomerLevelMols.ts +6 -6
  23. package/src/package-test.ts +9 -2
  24. package/src/package.ts +230 -145
  25. package/src/substructure-search/substructure-search.ts +25 -22
  26. package/src/tests/Palettes-test.ts +9 -9
  27. package/src/tests/WebLogo-positions-test.ts +131 -68
  28. package/src/tests/_first-tests.ts +9 -0
  29. package/src/tests/activity-cliffs-tests.ts +8 -7
  30. package/src/tests/activity-cliffs-utils.ts +17 -9
  31. package/src/tests/bio-tests.ts +30 -21
  32. package/src/tests/checkInputColumn-tests.ts +17 -17
  33. package/src/tests/converters-test.ts +81 -46
  34. package/src/tests/detectors-benchmark-tests.ts +17 -17
  35. package/src/tests/detectors-tests.ts +190 -178
  36. package/src/tests/fasta-export-tests.ts +2 -3
  37. package/src/tests/monomer-libraries-tests.ts +34 -0
  38. package/src/tests/pepsea-tests.ts +21 -0
  39. package/src/tests/renderers-test.ts +33 -29
  40. package/src/tests/sequence-space-test.ts +6 -4
  41. package/src/tests/similarity-diversity-tests.ts +4 -4
  42. package/src/tests/splitters-test.ts +6 -7
  43. package/src/tests/substructure-filters-tests.ts +23 -1
  44. package/src/tests/utils/sequences-generators.ts +7 -7
  45. package/src/tests/utils.ts +2 -1
  46. package/src/tests/viewers.ts +16 -0
  47. package/src/utils/cell-renderer.ts +116 -54
  48. package/src/utils/constants.ts +7 -6
  49. package/src/utils/convert.ts +17 -11
  50. package/src/utils/monomer-lib.ts +174 -0
  51. package/src/utils/multiple-sequence-alignment.ts +49 -26
  52. package/src/utils/pepsea.ts +78 -0
  53. package/src/utils/save-as-fasta.ts +9 -8
  54. package/src/utils/ui-utils.ts +15 -3
  55. package/src/viewers/vd-regions-viewer.ts +125 -83
  56. package/src/viewers/web-logo-viewer.ts +1031 -0
  57. package/src/widgets/bio-substructure-filter.ts +38 -24
  58. package/tsconfig.json +71 -72
  59. package/webpack.config.js +4 -11
  60. package/dist/vendors-node_modules_datagrok-libraries_ml_src_workers_dimensionality-reducer_js.js +0 -8988
  61. package/jest.config.js +0 -33
  62. package/src/__jest__/remote.test.ts +0 -77
  63. package/src/__jest__/test-node.ts +0 -98
  64. package/test-Bio-91c83d8913ff-bb573307.html +0 -392
@@ -3,11 +3,32 @@ import * as DG from 'datagrok-api/dg';
3
3
  import * as ui from 'datagrok-api/ui';
4
4
 
5
5
  import {printLeftOrCentered, DrawStyle} from '@datagrok-libraries/bio/src/utils/cell-renderer';
6
- import * as bio from '@datagrok-libraries/bio';
7
6
  import * as C from './constants';
7
+ import {
8
+ ALIGNMENT,
9
+ getPaletteByType,
10
+ getSplitter,
11
+ monomerToShort,
12
+ NOTATION,
13
+ SplitterFunc,
14
+ TAGS as bioTAGS,
15
+ } from '@datagrok-libraries/bio/src/utils/macromolecule';
16
+ import {SeqPalette} from '@datagrok-libraries/bio/src/seq-palettes';
17
+ import {UnknownSeqPalettes} from '@datagrok-libraries/bio/src/unknown';
18
+
19
+ const enum tempTAGS {
20
+ referenceSequence = 'reference-sequence',
21
+ currentWord = 'current-word',
22
+ monomerWidth = 'monomer-width',
23
+ bioSumMaxLengthWords = 'bio-sum-maxLengthWords',
24
+ bioMaxIndex = 'bio-maxIndex',
25
+ bioMaxLengthWords = 'bio-maxLengthWords',
26
+ }
27
+
28
+ type TempType = { [tagName: string]: any };
8
29
 
9
30
  const undefinedColor = 'rgb(100,100,100)';
10
- const monomerToShortFunction: (amino: string, maxLengthOfMonomer: number) => string = bio.monomerToShort;
31
+ const monomerToShortFunction: (amino: string, maxLengthOfMonomer: number) => string = monomerToShort;
11
32
 
12
33
  function getUpdatedWidth(grid: DG.Grid | null, g: CanvasRenderingContext2D, x: number, w: number): number {
13
34
  return grid ? Math.min(grid.canvas.width - x, w) : g.canvas.width - x;
@@ -41,16 +62,18 @@ export class MacromoleculeSequenceCellRenderer extends DG.GridCellRenderer {
41
62
  get defaultWidth(): number { return 230; }
42
63
 
43
64
  onClick(gridCell: DG.GridCell, e: MouseEvent): void {
44
- gridCell.cell.column.temp['current-word'] = gridCell.cell.value;
65
+ const colTemp: TempType = gridCell.cell.column.temp;
66
+ colTemp[tempTAGS.currentWord] = gridCell.cell.value;
45
67
  gridCell.grid.invalidate();
46
68
  }
47
69
 
48
70
  onMouseMove(gridCell: DG.GridCell, e: MouseEvent): void {
49
- if (gridCell.cell.column.getTag(bio.TAGS.aligned) !== bio.ALIGNMENT.SEQ_MSA)
71
+ if (gridCell.cell.column.getTag(bioTAGS.aligned) !== ALIGNMENT.SEQ_MSA)
50
72
  return;
51
73
 
52
- const maxLengthWordsSum = gridCell.cell.column.temp['bio-sum-maxLengthWords'];
53
- const maxIndex = gridCell.cell.column.temp['bio-maxIndex'];
74
+ const colTemp: TempType = gridCell.cell.column.temp;
75
+ const maxLengthWordsSum = colTemp[tempTAGS.bioSumMaxLengthWords];
76
+ const maxIndex = colTemp[tempTAGS.bioMaxIndex];
54
77
  const argsX = e.offsetX - gridCell.gridColumn.left + (gridCell.gridColumn.left - gridCell.bounds.x);
55
78
  let left = 0;
56
79
  let right = maxIndex;
@@ -74,7 +97,7 @@ export class MacromoleculeSequenceCellRenderer extends DG.GridCellRenderer {
74
97
  }
75
98
  left = (argsX >= maxLengthWordsSum[left]) ? left + 1 : left;
76
99
  const separator = gridCell.cell.column.getTag('separator') ?? '';
77
- const splitterFunc: bio.SplitterFunc = bio.getSplitter('separator', separator);
100
+ const splitterFunc: SplitterFunc = getSplitter('separator', separator);
78
101
  const subParts: string[] = splitterFunc(gridCell.cell.value);
79
102
  (((subParts[left]?.length ?? 0) > 0)) ?
80
103
  ui.tooltip.show(ui.div(subParts[left]), e.x + 16, e.y + 16) : ui.tooltip.hide();
@@ -95,10 +118,10 @@ export class MacromoleculeSequenceCellRenderer extends DG.GridCellRenderer {
95
118
  render(
96
119
  g: CanvasRenderingContext2D, x: number, y: number, w: number, h: number, gridCell: DG.GridCell,
97
120
  cellStyle: DG.GridCellStyle
98
- ): void {
121
+ ) {
99
122
  const grid = gridCell.gridRow !== -1 ? gridCell.grid : null;
100
123
  const cell = gridCell.cell;
101
- const paletteType = gridCell.cell.column.getTag(C.TAGS.ALPHABET);
124
+ const paletteType = gridCell.cell.column.getTag(bioTAGS.alphabet);
102
125
  const minDistanceRenderer = 50;
103
126
  w = getUpdatedWidth(grid, g, x, w);
104
127
  g.save();
@@ -110,19 +133,28 @@ export class MacromoleculeSequenceCellRenderer extends DG.GridCellRenderer {
110
133
 
111
134
  //TODO: can this be replaced/merged with splitSequence?
112
135
  const units = gridCell.cell.column.getTag(DG.TAGS.UNITS);
136
+ const aligned: string = gridCell.cell.column.getTag(bioTAGS.aligned);
113
137
 
114
- const palette = bio.getPaletteByType(paletteType);
138
+ const palette = getPaletteByType(paletteType);
115
139
 
116
- const separator = gridCell.cell.column.getTag('separator') ?? '';
117
- const splitLimit = gridCell.bounds.width / 5;
118
- const splitterFunc: bio.SplitterFunc = bio.getSplitter(units, separator, splitLimit);
119
- const referenceSequence: string[] = splitterFunc(((gridCell.cell.column?.temp['reference-sequence'] != null) && (gridCell.cell.column?.temp['reference-sequence'] != ''))
120
- ? gridCell.cell.column.temp['reference-sequence'] : gridCell.cell.column.temp['current-word'] ?? '');
121
- const monomerWidth = (gridCell.cell.column?.temp['monomer-width'] != null) ? gridCell.cell.column.temp['monomer-width'] : 'short';
122
- let gapRenderer = 5;
140
+ const separator = gridCell.cell.column.getTag(bioTAGS.separator) ?? '';
141
+ const splitLimit = w / 5;
142
+ const splitterFunc: SplitterFunc = getSplitter(units, separator, splitLimit);
123
143
 
124
- let maxIndex = 0;
144
+ // TODO: Store temp data to GridColumn
145
+ // Now the renderer requires data frame table Column underlying GridColumn
146
+ const colTemp: TempType = gridCell.cell.column.temp;
147
+
148
+ const tempReferenceSequence: string | null = colTemp[tempTAGS.referenceSequence];
149
+ const tempCurrentWord: string | null = colTemp[tempTAGS.currentWord];
150
+ const tempMonomerWidth: string | null = colTemp[tempTAGS.monomerWidth];
151
+ const referenceSequence: string[] = splitterFunc(
152
+ ((tempReferenceSequence != null) && (tempReferenceSequence != '')) ?
153
+ tempReferenceSequence : tempCurrentWord ?? '');
154
+ const monomerWidth: string = (tempMonomerWidth != null) ? tempMonomerWidth : 'short';
125
155
 
156
+ let gapRenderer = 5;
157
+ let maxIndex = 0;
126
158
  let maxLengthOfMonomer = 8;
127
159
 
128
160
  if (monomerWidth === 'short') {
@@ -140,44 +172,42 @@ export class MacromoleculeSequenceCellRenderer extends DG.GridCellRenderer {
140
172
  const textSize = monomerToShortFunction(amino, maxLengthOfMonomer).length * 7 + gapRenderer;
141
173
  if (textSize > (maxLengthWords[index] ?? 0))
142
174
  maxLengthWords[index] = textSize;
143
- if (index > maxIndex) {
144
- maxIndex = index;
145
- }
175
+ if (index > maxIndex) maxIndex = index;
146
176
  });
147
177
  samples += 1;
148
178
  }
149
- let minLength = 3 * 7;
179
+ const minLength = 3 * 7;
150
180
  for (let i = 0; i <= maxIndex; i++) {
151
- if (maxLengthWords[i] < minLength) {
152
- maxLengthWords[i] = minLength;
153
- }
181
+ if (maxLengthWords[i] < minLength) maxLengthWords[i] = minLength;
154
182
  const maxLengthWordSum: any = {};
155
183
  maxLengthWordSum[0] = maxLengthWords[0];
156
- for (let i = 1; i <= maxIndex; i++) {
157
- maxLengthWordSum[i] = maxLengthWordSum[i - 1] + maxLengthWords[i];
158
- }
159
- gridCell.cell.column.temp['bio-sum-maxLengthWords'] = maxLengthWordSum;
160
- gridCell.cell.column.temp['bio-maxIndex'] = maxIndex;
161
- gridCell.cell.column.temp['bio-maxLengthWords'] = maxLengthWords;
184
+ for (let i = 1; i <= maxIndex; i++) maxLengthWordSum[i] = maxLengthWordSum[i - 1] + maxLengthWords[i];
185
+ colTemp[tempTAGS.bioSumMaxLengthWords] = maxLengthWordSum;
186
+ colTemp[tempTAGS.bioMaxIndex] = maxIndex;
187
+ colTemp[tempTAGS.bioMaxLengthWords] = maxLengthWords;
162
188
  gridCell.cell.column.setTag('.calculatedCellRender', splitLimit.toString());
163
189
  }
164
190
  } else {
165
- maxLengthWords = gridCell.cell.column.temp['bio-maxLengthWords'];
191
+ maxLengthWords = colTemp[tempTAGS.bioMaxLengthWords];
166
192
  }
167
193
 
168
194
  const subParts: string[] = splitterFunc(cell.value);
169
195
  let x1 = x;
170
196
  let color = undefinedColor;
171
- let drawStyle = bio.DrawStyle.classic;
172
- if (gridCell.cell.column.getTag('aligned').includes('MSA') && gridCell.cell.column.getTag('units') === 'separator')
173
- drawStyle = bio.DrawStyle.MSA;
197
+ let drawStyle = DrawStyle.classic;
198
+
199
+
200
+ if (aligned && aligned.includes('MSA') && units == NOTATION.SEPARATOR)
201
+ drawStyle = DrawStyle.MSA;
174
202
 
175
203
  subParts.every((amino, index) => {
176
204
  color = palette.get(amino);
177
205
  g.fillStyle = undefinedColor;
178
- let last = index === subParts.length - 1;
179
- x1 = bio.printLeftOrCentered(x1, y, w, h, g, amino, color, 0, true, 1.0, separator, last, drawStyle, maxLengthWords, index, gridCell, referenceSequence, maxLengthOfMonomer);
180
- return x1 - minDistanceRenderer - gridCell.gridColumn.left + (gridCell.gridColumn.left - gridCell.bounds.x) <= gridCell.bounds.width;
206
+ const last = index === subParts.length - 1;
207
+ x1 = printLeftOrCentered(x1, y, w, h,
208
+ g, amino, color, 0, true, 1.0, separator, last, drawStyle,
209
+ maxLengthWords, index, gridCell, referenceSequence, maxLengthOfMonomer);
210
+ return minDistanceRenderer <= w;
181
211
  });
182
212
 
183
213
  g.restore();
@@ -212,14 +242,14 @@ export class MonomerCellRenderer extends DG.GridCellRenderer {
212
242
  g.textBaseline = 'middle';
213
243
  g.textAlign = 'center';
214
244
 
215
- const palette = bio.getPaletteByType(gridCell.cell.column.getTag(C.TAGS.ALPHABET));
245
+ const palette = getPaletteByType(gridCell.cell.column.getTag(bioTAGS.alphabet));
216
246
  const s: string = gridCell.cell.value;
217
247
  if (!s)
218
248
  return;
219
249
  const color = palette.get(s);
220
250
 
221
251
  g.fillStyle = color;
222
- g.fillText(s, x + (w / 2), y + (h / 2), w);
252
+ g.fillText(monomerToShort(s, 3), x + (w / 2), y + (h / 2), w);
223
253
  }
224
254
  }
225
255
 
@@ -250,12 +280,12 @@ export class MacromoleculeDifferenceCellRenderer extends DG.GridCellRenderer {
250
280
  const grid = gridCell.grid;
251
281
  const cell = gridCell.cell;
252
282
  const s: string = cell.value ?? '';
253
- const separator = gridCell.tableColumn!.tags[C.TAGS.SEPARATOR];
283
+ const separator = gridCell.tableColumn!.tags[bioTAGS.separator];
254
284
  const units: string = gridCell.tableColumn!.tags[DG.TAGS.UNITS];
255
285
  w = getUpdatedWidth(grid, g, x, w);
256
286
  //TODO: can this be replaced/merged with splitSequence?
257
287
  const [s1, s2] = s.split('#');
258
- const splitter = bio.getSplitter(units, separator);
288
+ const splitter = getSplitter(units, separator);
259
289
  const subParts1 = splitter(s1);
260
290
  const subParts2 = splitter(s2);
261
291
  drawMoleculeDifferenceOnCanvas(g, x, y, w, h, subParts1, subParts2, units);
@@ -272,12 +302,12 @@ export function drawMoleculeDifferenceOnCanvas(
272
302
  subParts2: string [],
273
303
  units: string,
274
304
  fullStringLength?: boolean,
275
- molDifferences?: { [key: number]: HTMLCanvasElement }) {
276
-
305
+ molDifferences?: { [key: number]: HTMLCanvasElement }
306
+ ): void {
277
307
  if (subParts1.length !== subParts2.length) {
278
- const emptyMonomersArray = new Array<string>(Math.abs(subParts1.length - subParts2.length)).fill('');
279
- subParts1.length > subParts2.length ?
280
- subParts2 = subParts2.concat(emptyMonomersArray) : subParts1 = subParts1.concat(emptyMonomersArray);
308
+ const sequences: IComparedSequences = fillShorterSequence(subParts1, subParts2);
309
+ subParts1 = sequences.subParts1;
310
+ subParts2 = sequences.subParts2;
281
311
  }
282
312
  const textSize1 = g.measureText(processSequence(subParts1).join(''));
283
313
  const textSize2 = g.measureText(processSequence(subParts2).join(''));
@@ -297,9 +327,9 @@ export function drawMoleculeDifferenceOnCanvas(
297
327
  g.font = '12px monospace';
298
328
  g.textBaseline = 'top';
299
329
 
300
- let palette: bio.SeqPalette = bio.UnknownSeqPalettes.Color;
330
+ let palette: SeqPalette = UnknownSeqPalettes.Color;
301
331
  if (units != 'HELM')
302
- palette = bio.getPaletteByType(units.substring(units.length - 2));
332
+ palette = getPaletteByType(units.substring(units.length - 2));
303
333
 
304
334
  const vShift = 7;
305
335
  for (let i = 0; i < subParts1.length; i++) {
@@ -309,17 +339,22 @@ export function drawMoleculeDifferenceOnCanvas(
309
339
 
310
340
  if (amino1 != amino2) {
311
341
  const color2 = palette.get(amino2);
312
- const subX0 = bio.printLeftOrCentered(updatedX, updatedY - vShift, w, h, g, amino1, color1, 0, true);
313
- const subX1 = bio.printLeftOrCentered(updatedX, updatedY + vShift, w, h, g, amino2, color2, 0, true);
342
+ const subX0 = printLeftOrCentered(updatedX, updatedY - vShift, w, h, g, amino1, color1, 0, true);
343
+ const subX1 = printLeftOrCentered(updatedX, updatedY + vShift, w, h, g, amino2, color2, 0, true);
314
344
  updatedX = Math.max(subX1, subX0);
315
345
  if (molDifferences)
316
346
  molDifferences[i] = createDifferenceCanvas(amino1, amino2, color1, color2, updatedY, vShift, h);
317
- } else { updatedX = bio.printLeftOrCentered(updatedX, updatedY, w, h, g, amino1, color1, 0, true, 0.5); }
347
+ } else { updatedX = printLeftOrCentered(updatedX, updatedY, w, h, g, amino1, color1, 0, true, 0.5); }
318
348
  updatedX += 4;
319
349
  }
320
350
  g.restore();
321
351
  }
322
352
 
353
+ interface IComparedSequences{
354
+ subParts1: string[];
355
+ subParts2: string[];
356
+ }
357
+
323
358
  function createDifferenceCanvas(
324
359
  amino1: string,
325
360
  amino2: string,
@@ -338,7 +373,34 @@ function createDifferenceCanvas(
338
373
  canvas.width = width + 4;
339
374
  context.font = '12px monospace';
340
375
  context.textBaseline = 'top';
341
- bio.printLeftOrCentered(0, y - shift, width, h, context, amino1, color1, 0, true);
342
- bio.printLeftOrCentered(0, y + shift, width, h, context, amino2, color2, 0, true);
376
+ printLeftOrCentered(0, y - shift, width, h, context, amino1, color1, 0, true);
377
+ printLeftOrCentered(0, y + shift, width, h, context, amino2, color2, 0, true);
343
378
  return canvas;
344
379
  }
380
+
381
+ function fillShorterSequence(subParts1: string[], subParts2: string[]): IComparedSequences {
382
+ let numIdenticalStart = 0;
383
+ let numIdenticalEnd = 0;
384
+ const longerSeq = subParts1.length > subParts2.length ? subParts1 : subParts2;
385
+ let shorterSeq = subParts1.length > subParts2.length ? subParts2 : subParts1;
386
+
387
+ for (let i = 0; i < shorterSeq.length; i++) {
388
+ if (longerSeq[i] === shorterSeq[i])
389
+ numIdenticalStart++;
390
+ }
391
+
392
+ const lengthDiff = longerSeq.length - shorterSeq.length;
393
+ for (let i = longerSeq.length - 1; i > lengthDiff; i--) {
394
+ if (longerSeq[i] === shorterSeq[i - lengthDiff])
395
+ numIdenticalEnd++;
396
+ }
397
+
398
+ const emptyMonomersArray = new Array<string>(Math.abs(subParts1.length - subParts2.length)).fill('');
399
+
400
+ function concatWithEmptyVals(subparts: string[]): string[] {
401
+ return numIdenticalStart > numIdenticalEnd ? subparts.concat(emptyMonomersArray) : emptyMonomersArray.concat(subparts);
402
+ }
403
+
404
+ subParts1.length > subParts2.length ? subParts2 = concatWithEmptyVals(subParts2) : subParts1 = concatWithEmptyVals(subParts1);
405
+ return {subParts1: subParts1, subParts2: subParts2};
406
+ }
@@ -12,18 +12,14 @@ export enum COLUMNS_NAMES {
12
12
  export enum TAGS {
13
13
  AAR = 'AAR',
14
14
  POSITION = 'Pos',
15
- SEPARATOR = 'separator',
16
15
  SELECTION = 'selection',
17
- ALPHABET = 'alphabet',
18
- ALIGNED = 'aligned',
19
16
  }
20
17
 
21
18
  export enum SEM_TYPES {
22
19
  MONOMER = 'Monomer',
23
20
  MACROMOLECULE_DIFFERENCE = 'MacromoleculeDifference',
24
21
  ACTIVITY = 'activity',
25
- ACTIVITY_SCALED = 'activityScaled',
26
- MACROMOLECULE = 'Macromolecule',
22
+ ACTIVITY_SCALED = 'activityScaled'
27
23
  }
28
24
 
29
25
  export const MSA = 'MSA';
@@ -47,7 +43,7 @@ export const aarGroups = {
47
43
  '-': '-',
48
44
  };
49
45
 
50
- export const groupDescription: {[key: string]: {'description': string, aminoAcids: string[]}} = {
46
+ export const groupDescription: { [key: string]: { 'description': string, aminoAcids: string[] } } = {
51
47
  'PC': {'description': 'Positive Amino Acids, with Electrically Charged Side Chains', 'aminoAcids': ['R', 'H', 'K']},
52
48
  'NC': {'description': 'Negative Amino Acids, with Electrically Charged Side Chains', 'aminoAcids': ['D', 'E']},
53
49
  'U': {'description': 'Amino Acids with Polar Uncharged Side Chains', 'aminoAcids': ['S', 'T', 'N', 'Q']},
@@ -58,3 +54,8 @@ export const groupDescription: {[key: string]: {'description': string, aminoAcid
58
54
  },
59
55
  '-': {'description': 'Unknown Amino Acid', 'aminoAcids': ['-']},
60
56
  };
57
+
58
+ export namespace PEPSEA {
59
+ export const SEPARATOR = '.';
60
+ }
61
+
@@ -1,10 +1,11 @@
1
1
  import * as DG from 'datagrok-api/dg';
2
2
  import * as ui from 'datagrok-api/ui';
3
3
  import * as grok from 'datagrok-api/grok';
4
- import * as bio from '@datagrok-libraries/bio';
5
4
 
6
5
  import $ from 'cash-dom';
7
6
  import {Subscription} from 'rxjs';
7
+ import {NOTATION} from '@datagrok-libraries/bio/src/utils/macromolecule';
8
+ import {NotationConverter} from '@datagrok-libraries/bio/src/utils/notation-converter';
8
9
 
9
10
 
10
11
  let convertDialog: DG.Dialog | null = null;
@@ -16,13 +17,13 @@ let convertDialogSubs: Subscription[] = [];
16
17
  * @param {DG.column} col Column with 'Macromolecule' semantic type
17
18
  */
18
19
  export function convert(col: DG.Column): void {
19
- const converter = new bio.NotationConverter(col);
20
- const currentNotation: bio.NOTATION = converter.notation;
20
+ const converter = new NotationConverter(col);
21
+ const currentNotation: NOTATION = converter.notation;
21
22
  //TODO: read all notations
22
23
  const notations = [
23
- bio.NOTATION.FASTA,
24
- bio.NOTATION.SEPARATOR,
25
- bio.NOTATION.HELM
24
+ NOTATION.FASTA,
25
+ NOTATION.SEPARATOR,
26
+ NOTATION.HELM
26
27
  ];
27
28
  const separatorArray = ['-', '.', '/'];
28
29
  const filteredNotations = notations.filter((e) => e !== currentNotation);
@@ -32,7 +33,7 @@ export function convert(col: DG.Column): void {
32
33
 
33
34
  // hide the separator input for non-SEPARATOR target notations
34
35
  const toggleSeparator = () => {
35
- if (targetNotationInput.value !== bio.NOTATION.SEPARATOR)
36
+ if (targetNotationInput.value !== NOTATION.SEPARATOR)
36
37
  $(separatorInput.root).hide();
37
38
  else
38
39
  $(separatorInput.root).show();
@@ -46,7 +47,7 @@ export function convert(col: DG.Column): void {
46
47
  });
47
48
 
48
49
  if (convertDialog == null) {
49
- convertDialog = ui.dialog('Convert sequence notation')
50
+ convertDialog = ui.dialog('Convert Sequence Notation')
50
51
  .add(ui.div([
51
52
  ui.divText(
52
53
  'Current notation: ' + currentNotation,
@@ -63,7 +64,7 @@ export function convert(col: DG.Column): void {
63
64
  separatorInput.root
64
65
  ]))
65
66
  .onOK(async () => {
66
- const targetNotation = targetNotationInput.value as bio.NOTATION;
67
+ const targetNotation = targetNotationInput.value as NOTATION;
67
68
  const separator: string | null = separatorInput.value;
68
69
 
69
70
  await convertDo(col, targetNotation, separator);
@@ -80,12 +81,17 @@ export function convert(col: DG.Column): void {
80
81
 
81
82
  /** Creates a new column with converted sequences and detects its semantic type */
82
83
  export async function convertDo(
83
- srcCol: DG.Column, targetNotation: bio.NOTATION, separator: string | null
84
+ srcCol: DG.Column, targetNotation: NOTATION, separator: string | null
84
85
  ): Promise<DG.Column> {
85
- const converter = new bio.NotationConverter(srcCol);
86
+ const converter = new NotationConverter(srcCol);
86
87
  const newColumn = converter.convert(targetNotation, separator);
87
88
  srcCol.dataFrame.columns.add(newColumn);
88
89
 
90
+ // Call detector directly to escape some error on detectSemanticTypes
91
+ const semType = await grok.functions.call('Bio:detectMacromolecule', {col: newColumn});
92
+ if (semType)
93
+ newColumn.semType = semType;
94
+
89
95
  // call to calculate 'cell.renderer' tag
90
96
  await grok.data.detectSemanticTypes(srcCol.dataFrame);
91
97
 
@@ -0,0 +1,174 @@
1
+ // import * as ui from 'datagrok-api/ui';
2
+ import * as DG from 'datagrok-api/dg';
3
+ import * as grok from 'datagrok-api/grok';
4
+
5
+ import {Observable, Subject} from 'rxjs';
6
+ import {IMonomerLib, Monomer} from '@datagrok-libraries/bio/src/types/index';
7
+ import {
8
+ createJsonMonomerLibFromSdf,
9
+ expectedMonomerData,
10
+ IMonomerLibHelper
11
+ } from '@datagrok-libraries/bio/src/monomer-works/monomer-utils';
12
+
13
+ // -- Monomer libraries --
14
+ export const LIB_STORAGE_NAME = 'Libraries';
15
+ export const LIB_PATH = 'System:AppData/Bio/libraries/';
16
+ export const LIB_DEFAULT: { [fileName: string]: string } = {'HELMCoreLibrary.json': 'HELMCoreLibrary.json'};
17
+
18
+ export class MonomerLib implements IMonomerLib {
19
+ private _monomers: { [type: string]: { [name: string]: Monomer } } = {};
20
+ private _onChanged = new Subject<any>();
21
+
22
+ constructor(monomers: { [type: string]: { [name: string]: Monomer } }) {
23
+ this._monomers = monomers;
24
+ }
25
+
26
+ getMonomer(monomerType: string, monomerName: string): Monomer | null {
27
+ if (monomerType in this._monomers! && monomerName in this._monomers![monomerType])
28
+ return this._monomers![monomerType][monomerName];
29
+ else
30
+ return null;
31
+ }
32
+
33
+ getTypes(): string[] {
34
+ return Object.keys(this._monomers);
35
+ }
36
+
37
+ getMonomerMolsByType(type: string): { [symbol: string]: string } {
38
+ const res: { [symbol: string]: string } = {};
39
+
40
+ Object.keys(this._monomers[type]).forEach((monomerSymbol) => {
41
+ res[monomerSymbol] = this._monomers[type][monomerSymbol].molfile;
42
+ });
43
+
44
+ return res;
45
+ }
46
+
47
+ getMonomerNamesByType(type: string): string[] {
48
+ return Object.keys(this._monomers[type]);
49
+ }
50
+
51
+ get onChanged(): Observable<any> {
52
+ return this._onChanged;
53
+ }
54
+
55
+ private _updateInt(lib: IMonomerLib): void {
56
+ const typesNew = lib.getTypes();
57
+ const types = this.getTypes();
58
+
59
+ typesNew.forEach((type) => {
60
+ //could possibly rewrite -> TODO: check duplicated monomer symbol
61
+
62
+ if (!types.includes(type))
63
+ this._monomers![type] = {};
64
+
65
+ const monomers = lib.getMonomerNamesByType(type);
66
+ monomers.forEach((monomerName) => {
67
+ this._monomers[type][monomerName] = lib.getMonomer(type, monomerName)!;
68
+ });
69
+ });
70
+ }
71
+
72
+ public update(lib: IMonomerLib): void {
73
+ this._updateInt(lib);
74
+ this._onChanged.next();
75
+ }
76
+
77
+ public updateLibs(libList: IMonomerLib[], reload: boolean = false): void {
78
+ if (reload) this._monomers = {};
79
+ for (const lib of libList) this._updateInt(lib);
80
+ this._onChanged.next();
81
+ }
82
+
83
+ public clear(): void {
84
+ this._monomers = {};
85
+ this._onChanged.next();
86
+ }
87
+ }
88
+
89
+ export class MonomerLibHelper implements IMonomerLibHelper {
90
+ private readonly _monomerLib: MonomerLib = new MonomerLib({});
91
+
92
+ /** Protect constructor to prevent multiple instantiation. */
93
+ protected constructor() {}
94
+
95
+ /** Singleton monomer library */
96
+ getBioLib(): IMonomerLib {
97
+ return this._monomerLib;
98
+ }
99
+
100
+ private loadLibrariesPromise: Promise<void> = Promise.resolve();
101
+
102
+ /** Loads libraries based on settings in user storage {@link LIB_STORAGE_NAME}
103
+ * @param {boolean} reload Clean {@link monomerLib} before load libraries [false]
104
+ */
105
+ async loadLibraries(reload: boolean = false): Promise<void> {
106
+ return this.loadLibrariesPromise = this.loadLibrariesPromise.then(async () => {
107
+ const userLibrariesSettings: string[] = Object.keys(await grok.dapi.userDataStorage.get(LIB_STORAGE_NAME, true));
108
+ const libs: IMonomerLib[] = await Promise.all(userLibrariesSettings.map((libFileName) => {
109
+ //TODO handle whether files are in place
110
+ return this.readLibrary(LIB_PATH, libFileName);
111
+ }));
112
+ this._monomerLib.updateLibs(libs, reload);
113
+ });
114
+ }
115
+
116
+ /** Reads library from file shares, handles .json and .sdf */
117
+ async readLibrary(path: string, fileName: string): Promise<IMonomerLib> {
118
+ let data: any[] = [];
119
+ let file;
120
+ let dfSdf;
121
+ const fileSource = new DG.FileSource(path);
122
+ if (fileName.endsWith('.sdf')) {
123
+ const funcList: DG.Func[] = DG.Func.find({package: 'Chem', name: 'importSdf'});
124
+ if (funcList.length === 1) {
125
+ file = await fileSource.readAsBytes(fileName);
126
+ dfSdf = await grok.functions.call('Chem:importSdf', {bytes: file});
127
+ data = createJsonMonomerLibFromSdf(dfSdf[0]);
128
+ } else {
129
+ grok.shell.warning('Chem package is not installed');
130
+ }
131
+ } else {
132
+ const file = await fileSource.readAsText(fileName);
133
+ data = JSON.parse(file);
134
+ }
135
+
136
+ const monomers: { [type: string]: { [name: string]: Monomer } } = {};
137
+ const types: string[] = [];
138
+ //group monomers by their type
139
+ data.forEach((monomer) => {
140
+ const monomerAdd: Monomer = {
141
+ 'symbol': monomer['symbol'],
142
+ 'name': monomer['name'],
143
+ 'naturalAnalog': monomer['naturalAnalog'],
144
+ 'molfile': monomer['molfile'],
145
+ 'rgroups': monomer['rgroups'],
146
+ 'polymerType': monomer['polymerType'],
147
+ 'monomerType': monomer['monomerType'],
148
+ 'data': {}
149
+ };
150
+
151
+ Object.keys(monomer).forEach((prop) => {
152
+ if (!expectedMonomerData.includes(prop))
153
+ monomerAdd.data[prop] = monomer[prop];
154
+ });
155
+
156
+ if (!types.includes(monomer['polymerType'])) {
157
+ monomers[monomer['polymerType']] = {};
158
+ types.push(monomer['polymerType']);
159
+ }
160
+
161
+ monomers[monomer['polymerType']][monomer['symbol']] = monomerAdd;
162
+ });
163
+
164
+ return new MonomerLib(monomers);
165
+ }
166
+
167
+ // -- Instance singleton --
168
+ private static _instance: MonomerLibHelper | null = null;
169
+
170
+ public static get instance(): MonomerLibHelper {
171
+ if (!MonomerLibHelper._instance) MonomerLibHelper._instance = new MonomerLibHelper();
172
+ return MonomerLibHelper._instance;
173
+ }
174
+ }