@datagrok/bio 1.7.23 → 1.8.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,30 +1,35 @@
1
- import {after, before, category, test, expect} from '@datagrok-libraries/utils/src/test';
1
+ import {after, before, category, test, expect, delay} from '@datagrok-libraries/utils/src/test';
2
2
  import * as DG from 'datagrok-api/dg';
3
- import {sequenceSpace} from '../utils/sequence-space';
4
3
  import {readDataframe} from './utils';
5
4
  import * as grok from 'datagrok-api/grok';
5
+ import { _testSequenceSpaceReturnsResult } from './sequence-space-utils';
6
6
 
7
7
  category('sequenceSpace', async () => {
8
8
  let testFastaDf: DG.DataFrame;
9
+ let testFastaTableView: DG.TableView;
10
+ let testHelmWithEmptyRows: DG.DataFrame;
11
+ let testHelmWithEmptyRowsTableView: DG.TableView;
9
12
 
10
13
  before(async () => {
11
14
  testFastaDf = await readDataframe('samples/sample_FASTA.csv');
12
- // await grok.data.detectSemanticTypes(testFastaDf);
15
+ testFastaTableView = grok.shell.addTableView(testFastaDf);
16
+ testHelmWithEmptyRows = await readDataframe('samples/sample_HELM_empty_vals.csv');
17
+ testHelmWithEmptyRowsTableView = grok.shell.addTableView(testHelmWithEmptyRows);
13
18
  });
14
19
 
15
20
  after(async () => {
16
21
  grok.shell.closeTable(testFastaDf);
22
+ testFastaTableView.close();
23
+ grok.shell.closeTable(testHelmWithEmptyRows);
24
+ testHelmWithEmptyRowsTableView.close();
17
25
  });
18
26
 
19
27
  test('sequenceSpaceOpens', async () => {
20
- const sequenceSpaceParams = {
21
- seqCol: testFastaDf.col('Sequence')!,
22
- methodName: 't-SNE',
23
- similarityMetric: 'Levenshtein',
24
- embedAxesNames: ['Embed_X', 'Embed_Y']
25
- };
26
- const res = await sequenceSpace(sequenceSpaceParams);
27
- expect(res.coordinates != undefined, true);
28
- expect(res.distance != undefined, true);
28
+ await _testSequenceSpaceReturnsResult(testFastaDf, 'UMAP', 'Sequence');
29
29
  });
30
+
31
+ test('sequenceSpaceOpensWithEmptyRows', async () => {
32
+ await _testSequenceSpaceReturnsResult(testHelmWithEmptyRows, 'UMAP', 'HELM');
33
+ });
34
+
30
35
  });
@@ -0,0 +1,10 @@
1
+ import * as DG from 'datagrok-api/dg';
2
+ import * as grok from 'datagrok-api/grok';
3
+ import { expect } from '@datagrok-libraries/utils/src/test';
4
+ import { sequenceSpaceTopMenu } from '../package';
5
+
6
+ export async function _testSequenceSpaceReturnsResult(df: DG.DataFrame, algorithm: string, colName: string) {
7
+ await grok.data.detectSemanticTypes(df);
8
+ const sp = await sequenceSpaceTopMenu(df, df.col(colName)!, algorithm, 'Levenshtein', true);
9
+ expect(sp != null, true);
10
+ }
@@ -7,7 +7,6 @@ import {SplitterFunc, WebLogo} from '@datagrok-libraries/bio/src/viewers/web-log
7
7
  import {SeqPalette} from '@datagrok-libraries/bio/src/seq-palettes';
8
8
  import * as ui from 'datagrok-api/ui';
9
9
 
10
- export const lru = new DG.LruCache<any, any>();
11
10
  const undefinedColor = 'rgb(100,100,100)';
12
11
  const grayColor = '#808080';
13
12
 
@@ -62,78 +61,112 @@ export function processSequence(subParts: string[]): [string[], boolean] {
62
61
  * @param {boolean} [last=false] Is checker if element last or not.
63
62
  * @return {number} x coordinate to start printing at.
64
63
  */
65
- function printLeftOrCentered(
64
+ export function printLeftOrCentered(
66
65
  x: number, y: number, w: number, h: number,
67
66
  g: CanvasRenderingContext2D, s: string, color = undefinedColor,
68
67
  pivot: number = 0, left = false, transparencyRate: number = 1.0,
69
- separator: string = '', last: boolean = false, drawStyle: string = 'classic', maxWord:any={}, maxWordIdx:number=0, gridCell:any = {}): number {
68
+ separator: string = '', last: boolean = false, drawStyle: string = 'classic', maxWord: any = {}, maxWordIdx: number = 0, gridCell: any = {}): number {
70
69
  g.textAlign = 'start';
71
70
  const colorPart = s.substring(0);
72
71
  let grayPart = last ? '' : separator;
73
72
  if (drawStyle === 'msa') {
74
- grayPart = ' ';
73
+ grayPart = '';
75
74
  }
76
75
 
77
76
  let textSize: any = g.measureText(colorPart + grayPart);
78
77
  const indent = 5;
79
78
 
79
+ let maxColorTextSize = g.measureText(colorPart).width;
80
80
  let colorTextSize = g.measureText(colorPart).width;
81
81
  const dy = (textSize.fontBoundingBoxAscent + textSize.fontBoundingBoxDescent) / 2;
82
82
  textSize = textSize.width;
83
83
  if (drawStyle === 'msa') {
84
- if (colorTextSize > maxWord) {
85
- maxWord[maxWordIdx] = colorTextSize;
84
+ maxColorTextSize = maxWord[maxWordIdx];
85
+ textSize = maxWord[maxWordIdx];
86
+ if (maxColorTextSize > maxWord) {
87
+ maxWord[maxWordIdx] = maxColorTextSize;
88
+ gridCell.cell.column.temp = maxWord;
89
+ }
90
+ if (maxWordIdx > (maxWord['bio-maxIndex'] ?? 0)) {
91
+ maxWord['bio-maxIndex'] = maxWordIdx;
86
92
  gridCell.cell.column.temp = maxWord;
87
93
  }
88
- colorTextSize = maxWord[maxWordIdx];
89
- textSize = maxWord[maxWordIdx];
90
94
  }
91
95
 
92
96
  function draw(dx1: number, dx2: number): void {
93
97
  g.fillStyle = color;
94
98
  g.globalAlpha = transparencyRate;
95
- g.fillText(colorPart, x + dx1, y + dy);
96
99
  if (drawStyle === 'classic') {
100
+ g.fillText(colorPart, x + dx1, y + dy);
97
101
  g.fillStyle = grayColor;
98
102
  g.fillText(grayPart, x + dx2, y + dy);
99
103
  }
104
+ if (drawStyle === 'msa') {
105
+ g.fillStyle = color;
106
+ g.fillText(colorPart, x + dx1 + ((maxWord[maxWordIdx] - colorTextSize) / 2), y + dy);
107
+ }
100
108
  }
101
109
 
102
110
  if (left || textSize > w) {
103
- draw(indent, indent + colorTextSize);
104
- return x + colorTextSize + g.measureText(grayPart).width;
111
+ draw(indent, indent + maxColorTextSize);
112
+ return x + maxColorTextSize + g.measureText(grayPart).width;
113
+
105
114
  } else {
106
115
  const dx = (w - textSize) / 2;
107
- draw(dx, dx + colorTextSize);
108
- return x + dx + colorTextSize;
116
+ draw(dx, dx + maxColorTextSize);
117
+ return x + dx + maxColorTextSize;
109
118
  }
110
119
  }
111
120
 
112
- function findMonomers(helmString: string) {
113
- //@ts-ignore
114
- const types = Object.keys(org.helm.webeditor.monomerTypeList());
115
- const monomers: any = [];
116
- const monomer_names: any = [];
117
- for (var i = 0; i < types.length; i++) {
118
- //@ts-ignore
119
- monomers.push(new scil.helm.Monomers.getMonomerSet(types[i]));
120
- Object.keys(monomers[i]).forEach(k => {
121
- monomer_names.push(monomers[i][k].id);
122
- });
123
- }
124
- const split_string = WebLogo.splitterAsHelm(helmString);
125
- return new Set(split_string.filter(val => !monomer_names.includes(val)));
126
- }
127
121
 
128
122
  export class MacromoleculeSequenceCellRenderer extends DG.GridCellRenderer {
129
- get name(): string { return 'macromoleculeSequence'; }
123
+ get name(): string { return 'sequence'; }
130
124
 
131
- get cellType(): string { return C.SEM_TYPES.MACROMOLECULE; }
125
+ get cellType(): string { return 'sequence'; }
132
126
 
133
127
  get defaultHeight(): number { return 30; }
134
128
 
135
129
  get defaultWidth(): number { return 230; }
136
130
 
131
+ onMouseMove(gridCell: DG.GridCell, e: MouseEvent): void {
132
+ if (gridCell.cell.column.getTag('aligned') !== 'SEQ.MSA') {
133
+ return;
134
+ }
135
+ const maxLengthWordsSum = gridCell.cell.column.temp['bio-sum-maxLengthWords'];
136
+ if (maxLengthWordsSum == null) {
137
+ gridCell.cell.column.setTag('.calculatedCellRender', 'unexist');
138
+ }
139
+ const maxIndex = gridCell.cell.column.temp['bio-maxIndex'];
140
+ //@ts-ignore
141
+ const argsX = e.layerX - gridCell.gridColumn.left - ((gridCell.bounds.x<0) ? gridCell.bounds.x : 0);
142
+ let left = 0;
143
+ let right = maxIndex;
144
+ let found = false;
145
+ maxLengthWordsSum[maxIndex + 1] = argsX + 1;
146
+ let mid = 0;
147
+ if (argsX > maxLengthWordsSum[0]) {
148
+ while (!found) {
149
+ mid = Math.floor((right + left) / 2);
150
+ if (argsX >= maxLengthWordsSum[mid] && argsX <= maxLengthWordsSum[mid + 1]) {
151
+ left = mid;
152
+ found = true;
153
+ } else if (argsX < maxLengthWordsSum[mid]) {
154
+ right = mid - 1;
155
+ } else if (argsX > maxLengthWordsSum[mid + 1]) {
156
+ left = mid + 1;
157
+ }
158
+ if (left == right) {
159
+ found = true;
160
+ }
161
+ }
162
+ }
163
+ left = (argsX >= maxLengthWordsSum[left]) ? left + 1 : left;
164
+ const separator = gridCell.cell.column.getTag('separator') ?? '';
165
+ const splitterFunc: SplitterFunc = WebLogo.getSplitter('separator', separator);
166
+ const subParts: string[] = splitterFunc(gridCell.cell.value);
167
+ ui.tooltip.show(ui.div(subParts[left]), e.x + 16, e.y + 16);
168
+ }
169
+
137
170
  /**
138
171
  * Cell renderer function.
139
172
  *
@@ -152,116 +185,73 @@ export class MacromoleculeSequenceCellRenderer extends DG.GridCellRenderer {
152
185
  ): void {
153
186
  const grid = gridCell.gridRow !== -1 ? gridCell.grid : undefined;
154
187
  const cell = gridCell.cell;
155
- const tag = gridCell.cell.column.getTag(DG.TAGS.UNITS);
156
- if (tag === 'HELM') {
157
- const monomers = findMonomers(cell.value);
158
- if (monomers.size == 0) {
159
- const host = ui.div([], {style: {width: `${w}px`, height: `${h}px`}});
160
- host.setAttribute('dataformat', 'helm');
161
- host.setAttribute('data', gridCell.cell.value);
162
- gridCell.element = host;
163
- //@ts-ignore
164
- const canvas = new JSDraw2.Editor(host, {width: w, height: h, skin: 'w8', viewonly: true});
165
- const formula = canvas.getFormula(true);
166
- if (!formula) {
167
- gridCell.element = ui.divText(gridCell.cell.value, {style: {color: 'red'}});
168
- }
169
- const molWeight = Math.round(canvas.getMolWeight() * 100) / 100;
170
- const coef = Math.round(canvas.getExtinctionCoefficient(true) * 100) / 100;
171
- const molfile = canvas.getMolfile();
172
- const result = formula + ', ' + molWeight + ', ' + coef + ', ' + molfile;
173
- lru.set(gridCell.cell.value, result);
174
- return;
175
- }
176
- if (monomers.size > 0) {
177
- w = grid ? Math.min(grid.canvas.width - x, w) : g.canvas.width - x;
178
- g.save();
179
- g.beginPath();
180
- g.rect(x, y, w, h);
181
- g.clip();
182
- g.font = '12px monospace';
183
- g.textBaseline = 'top';
184
- let x1 = x;
185
- const s: string = cell.value ?? '';
186
- let subParts: string[] = WebLogo.splitterAsHelm(s);
188
+ const [type, subtype, paletteType] = gridCell.cell.column.getTag(DG.TAGS.UNITS).split(':');
189
+ w = grid ? Math.min(grid.canvas.width - x, w) : g.canvas.width - x;
190
+ g.save();
191
+ g.beginPath();
192
+ g.rect(x, y, w, h);
193
+ g.clip();
194
+ g.font = '12px monospace';
195
+ g.textBaseline = 'top';
196
+ const s: string = cell.value ?? '';
197
+
198
+ //TODO: can this be replaced/merged with splitSequence?
199
+ const units = gridCell.cell.column.getTag(DG.TAGS.UNITS);
200
+
201
+ const palette = getPalleteByType(paletteType);
202
+
203
+ const separator = gridCell.cell.column.getTag('separator') ?? '';
204
+ const splitterFunc: SplitterFunc = WebLogo.getSplitter(units, separator);
205
+
206
+ const columns = gridCell.cell.column.categories;
207
+ let monomerToShortFunction: (amino: string, maxLengthOfMonomer: number) => string = WebLogo.monomerToShort;
208
+ let maxLengthOfMonomer = 8;
209
+
210
+ let maxLengthWords: any = {};
211
+ if (gridCell.cell.column.getTag('.calculatedCellRender') !== 'exist') {
212
+ for (let i = 0; i < columns.length; i++) {
213
+ let subParts: string[] = splitterFunc(columns[i]);
187
214
  subParts.forEach((amino, index) => {
188
- let color = monomers.has(amino) ? 'red' : grayColor;
189
- g.fillStyle = undefinedColor;
190
- let last = index === subParts.length - 1;
191
- x1 = printLeftOrCentered(x1, y, w, h, g, amino, color, 0, true, 1.0, '/', last);
215
+ let textSizeWidth = g.measureText(monomerToShortFunction(amino, maxLengthOfMonomer));
216
+ if (textSizeWidth.width > (maxLengthWords[index] ?? 0)) {
217
+ maxLengthWords[index] = textSizeWidth.width;
218
+ }
219
+ if (index > (maxLengthWords['bio-maxIndex'] ?? 0)) {
220
+ maxLengthWords['bio-maxIndex'] = index;
221
+ }
192
222
  });
193
- g.restore();
194
- return;
195
223
  }
196
- } else {
197
- const [type, subtype, paletteType] = gridCell.cell.column.getTag(DG.TAGS.UNITS).split(':');
198
- w = grid ? Math.min(grid.canvas.width - x, w) : g.canvas.width - x;
199
- g.save();
200
- g.beginPath();
201
- g.rect(x, y, w, h);
202
- g.clip();
203
- g.font = '12px monospace';
204
- g.textBaseline = 'top';
205
- const s: string = cell.value ?? '';
206
-
207
- //TODO: can this be replaced/merged with splitSequence?
208
- const units = gridCell.cell.column.getTag(DG.TAGS.UNITS);
209
-
210
- const palette = getPalleteByType(paletteType);
211
-
212
- const separator = gridCell.cell.column.getTag('separator') ?? '';
213
- const splitterFunc: SplitterFunc = WebLogo.getSplitter(units, gridCell.cell.column.getTag('separator'));
214
-
215
- // обработка новых елементов
216
- const columns = gridCell.cell.column.categories;
217
- let maxLengthWords = {};
218
- // check if gridCell.cell.column.temp is array
219
- if (gridCell.cell.column.getTag('.calculatedCellRender') !== 'exist') {
220
- for (let i = 0; i < columns.length; i++) {
221
- let subParts: string[] = splitterFunc(columns[i]);
222
- subParts.forEach((amino, index) => {
223
- //@ts-ignore
224
- let textSizeWidth = g.measureText(WebLogo.monomerToText(amino) + ' ');
225
- //@ts-ignore
226
- if (textSizeWidth.width > (maxLengthWords[index] ?? 0)) {
227
- //@ts-ignore
228
- maxLengthWords[index] = textSizeWidth.width;
229
- }
230
- });
231
- }
232
- gridCell.cell.column.temp = maxLengthWords;
233
- gridCell.cell.column.setTag('.calculatedCellRender', 'exist');
234
- } else {
235
- maxLengthWords = gridCell.cell.column.temp;
224
+ let maxLengthWordSum: any = {};
225
+ maxLengthWordSum[0] = maxLengthWords[0];
226
+ for (let i = 1; i <= maxLengthWords['bio-maxIndex']; i++) {
227
+ maxLengthWordSum[i] = maxLengthWordSum[i - 1] + maxLengthWords[i];
236
228
  }
229
+ gridCell.cell.column.temp = {
230
+ 'bio-sum-maxLengthWords': maxLengthWordSum,
231
+ 'bio-maxIndex': maxLengthWords['bio-maxIndex'],
232
+ 'bio-maxLengthWords': maxLengthWords
233
+ };
234
+ gridCell.cell.column.setTag('.calculatedCellRender', 'exist');
235
+ } else {
236
+ maxLengthWords = gridCell.cell.column.temp['bio-maxLengthWords'];
237
+ }
237
238
 
238
- const subParts: string[] = splitterFunc(cell.value);
239
- let x1 = x;
240
- let color = undefinedColor;
241
- // get max length word in subParts
242
- let tagUnits = gridCell.cell.column.getTag(DG.TAGS.UNITS);
243
- let maxLength = 0;
244
- let maxWord = '';
245
- let drawStyle = 'classic';
246
- if (tagUnits.includes('MSA')) {
247
- subParts.forEach(part => {
248
- if (part.length > maxLength) {
249
- maxLength = part.length;
250
- maxWord = part;
251
- drawStyle = 'msa';
252
- }
253
- });
254
- }
255
- subParts.forEach((amino, index) => {
256
- color = palette.get(amino);
257
- g.fillStyle = undefinedColor;
258
- let last = index === subParts.length - 1;
259
- x1 = printLeftOrCentered(x1, y, w, h, g, WebLogo.monomerToText(amino), color, 0, true, 1.0, separator, last, drawStyle, maxLengthWords, index, gridCell);
260
- });
261
-
262
- g.restore();
263
- return;
239
+ const subParts: string[] = splitterFunc(cell.value);
240
+ let x1 = x;
241
+ let color = undefinedColor;
242
+ let drawStyle = 'classic';
243
+ if (gridCell.cell.column.getTag('aligned').includes('MSA')) {
244
+ drawStyle = 'msa';
264
245
  }
246
+ subParts.forEach((amino, index) => {
247
+ color = palette.get(amino);
248
+ g.fillStyle = undefinedColor;
249
+ let last = index === subParts.length - 1;
250
+ x1 = printLeftOrCentered(x1, y, w, h, g, monomerToShortFunction(amino, maxLengthOfMonomer), color, 0, true, 1.0, separator, last, drawStyle, maxLengthWords, index, gridCell);
251
+ });
252
+
253
+ g.restore();
254
+ return;
265
255
  }
266
256
  }
267
257
 
@@ -49,7 +49,17 @@ export function convert(col: DG.Column): void {
49
49
  if (convertDialog == null) {
50
50
  convertDialog = ui.dialog('Convert sequence notation')
51
51
  .add(ui.div([
52
- ui.h1('Current notation: ' + currentNotation),
52
+ ui.divText(
53
+ 'Current notation: ' + currentNotation,
54
+ {
55
+ style: {
56
+ 'text-align': 'center',
57
+ 'font-weight': 'bold',
58
+ 'font-size': '14px',
59
+ 'padding': '5px',
60
+ }
61
+ }
62
+ ),
53
63
  targetNotationInput.root,
54
64
  separatorInput.root
55
65
  ]))
@@ -59,7 +69,7 @@ export function convert(col: DG.Column): void {
59
69
 
60
70
  await convertDo(col, targetNotation, separator);
61
71
  })
62
- .show();
72
+ .show({x: 350, y: 100});
63
73
 
64
74
  convertDialogSubs.push(convertDialog.onClose.subscribe((value) => {
65
75
  convertDialogSubs.forEach((s) => { s.unsubscribe(); });
@@ -57,9 +57,15 @@ export async function runKalign(srcCol: DG.Column, isAligned = false, unUsedName
57
57
 
58
58
  // units
59
59
  const srcUnits = srcCol.getTag(DG.TAGS.UNITS);
60
- const tgtUnits = srcUnits.split(':').map((p, i) => i == 1 ? p + '.MSA' : p).join(':');
60
+ //aligned
61
+ const srcAligned = srcCol.getTag('aligned');
62
+ const tgtAligned = srcAligned + '.MSA';
63
+ //alphabet
64
+ const srcAlphabet = srcCol.getTag('alphabet');
61
65
 
62
- tgtCol.setTag(DG.TAGS.UNITS, tgtUnits);
66
+ tgtCol.setTag(DG.TAGS.UNITS, srcUnits);
67
+ tgtCol.setTag('aligned', tgtAligned);
68
+ tgtCol.setTag('alphabet', srcAlphabet);
63
69
  tgtCol.semType = DG.SEMTYPE.MACROMOLECULE;
64
70
  return tgtCol;
65
71
  }
@@ -8,7 +8,7 @@ export async function sequenceGetSimilarities(col: DG.Column, seq: string): Prom
8
8
  const stringArray = col.toList();
9
9
  const distances = new Array(stringArray.length).fill(0.0);
10
10
  for (let i = 0; i < stringArray.length; ++i)
11
- distances[i] = getSimilarityFromDistance(AvailableMetrics['String']['Levenshtein'](stringArray[i], seq));
11
+ distances[i] = stringArray[i] ? getSimilarityFromDistance(AvailableMetrics['String']['Levenshtein'](stringArray[i], seq)) : 0;
12
12
  return DG.Column.fromList(DG.COLUMN_TYPE.FLOAT, 'distances', distances);
13
13
  }
14
14
 
@@ -51,6 +51,7 @@ export class VdRegionsViewer extends DG.JsViewer {
51
51
  public sequenceColumnNamePostfix: string;
52
52
 
53
53
  public skipEmptyPositions: boolean;
54
+ public positionWidth: number;
54
55
 
55
56
 
56
57
  public get df(): DG.DataFrame {
@@ -77,6 +78,7 @@ export class VdRegionsViewer extends DG.JsViewer {
77
78
  this.sequenceColumnNamePostfix = this.string('sequenceColumnNamePostfix', 'chain sequence');
78
79
 
79
80
  this.skipEmptyPositions = this.bool('skipEmptyPositions', false);
81
+ this.positionWidth = this.float('positionWidth', 16);
80
82
  }
81
83
 
82
84
  public async init() {
@@ -135,6 +137,10 @@ export class VdRegionsViewer extends DG.JsViewer {
135
137
  await this.destroyView();
136
138
  await this.buildView();
137
139
  break;
140
+ case 'positionWidth':
141
+ await this.destroyView();
142
+ await this.buildView();
143
+ break;
138
144
  }
139
145
  }
140
146
  }
@@ -205,6 +211,7 @@ export class VdRegionsViewer extends DG.JsViewer {
205
211
  endPositionName: region!.positionEndName,
206
212
  fixWidth: true,
207
213
  skipEmptyPositions: this.skipEmptyPositions,
214
+ positionWidth: this.positionWidth,
208
215
  })) as unknown as WebLogo;
209
216
  }
210
217
  // WebLogo creation fires onRootSizeChanged event even before control being added to this.logos