@datagrok/bio 1.8.2 → 1.10.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/package.json CHANGED
@@ -5,7 +5,7 @@
5
5
  "name": "Leonid Stolbov",
6
6
  "email": "lstolbov@datagrok.ai"
7
7
  },
8
- "version": "1.8.2",
8
+ "version": "1.10.2",
9
9
  "description": "Bio is a [package](https://datagrok.ai/help/develop/develop#packages) for the [Datagrok](https://datagrok.ai) platform",
10
10
  "repository": {
11
11
  "type": "git",
@@ -14,16 +14,17 @@
14
14
  },
15
15
  "dependencies": {
16
16
  "@biowasm/aioli": ">=2.4.0",
17
- "@datagrok-libraries/bio": "^3.1.0",
18
- "@datagrok-libraries/ml": "^3.1.0",
17
+ "@datagrok-libraries/bio": "^4.1.0",
18
+ "@datagrok-libraries/chem-meta": "1.0.0",
19
+ "@datagrok-libraries/ml": "^4.0.0",
19
20
  "@datagrok-libraries/utils": "^1.6.2",
20
21
  "cash-dom": "latest",
21
22
  "datagrok-api": "^1.5.5",
22
23
  "dayjs": "^1.11.4",
23
24
  "openchemlib": "6.0.1",
24
25
  "rxjs": "^6.5.5",
25
- "ts-loader": "^9.2.5",
26
- "typescript": "^4.4.2"
26
+ "typescript": "^4.4.2",
27
+ "wu": "latest"
27
28
  },
28
29
  "devDependencies": {
29
30
  "@types/jest": "^27.0.0",
@@ -35,6 +36,7 @@
35
36
  "jest-html-reporter": "^3.6.0",
36
37
  "puppeteer": "^13.7.0",
37
38
  "ts-jest": "^27.0.0",
39
+ "ts-loader": "^9.2.5",
38
40
  "webpack": "latest",
39
41
  "webpack-cli": "^4.10.0",
40
42
  "@types/js-yaml": "^4.0.5",
package/src/package.ts CHANGED
@@ -15,7 +15,7 @@ import {Aminoacids} from '@datagrok-libraries/bio/src/aminoacids';
15
15
  import {getEmbeddingColsNames, sequenceSpace} from './utils/sequence-space';
16
16
  import {AvailableMetrics} from '@datagrok-libraries/ml/src/typed-metrics';
17
17
  import {getActivityCliffs} from '@datagrok-libraries/ml/src/viewers/activity-cliffs';
18
- import {sequenceGetSimilarities, drawTooltip} from './utils/sequence-activity-cliffs';
18
+ import {drawSequences, sequenceGetSimilarities} from './utils/sequence-activity-cliffs';
19
19
  import {createJsonMonomerLibFromSdf, encodeMonomers, getMolfilesFromSeq, HELM_CORE_LIB_FILENAME} from './utils/utils';
20
20
  import {getMacroMol} from './utils/atomic-works';
21
21
  import {MacromoleculeSequenceCellRenderer} from './utils/cell-renderer';
@@ -23,13 +23,28 @@ import {convert} from './utils/convert';
23
23
  import {representationsWidget} from './widgets/representations';
24
24
  import {UnitsHandler} from '@datagrok-libraries/bio/src/utils/units-handler';
25
25
  import {FastaFileHandler} from '@datagrok-libraries/bio/src/utils/fasta-handler';
26
- import {removeEmptyStringRows} from '@datagrok-libraries/utils/src/dataframe-utils'
27
-
26
+ import {removeEmptyStringRows} from '@datagrok-libraries/utils/src/dataframe-utils';
27
+ import {
28
+ generateManySequences,
29
+ generateLongSequence,
30
+ performanceTest
31
+ } from './tests/test-sequnces-generators';
28
32
 
29
33
  //tags: init
30
34
  export async function initBio() {
31
35
  }
32
36
 
37
+ //name: testManySequencesPerformance
38
+ export function testManySequencesPerformance(): void {
39
+ performanceTest(generateManySequences, 'Many sequences');
40
+ }
41
+
42
+ //name: testLongSequencesPerformance
43
+ export function testLongSequencesPerformance(): void {
44
+ performanceTest(generateLongSequence, 'Long sequences');
45
+ }
46
+
47
+
33
48
  //name: fastaSequenceCellRenderer
34
49
  //tags: cellRenderer
35
50
  //meta.cellType: Sequence
@@ -51,7 +66,7 @@ export function separatorSequenceCellRenderer(): MacromoleculeSequenceCellRender
51
66
  function checkInputColumn(col: DG.Column, name: string,
52
67
  allowedNotations: string[] = [], allowedAlphabets: string[] = []): boolean {
53
68
  const notation: string = col.getTag(DG.TAGS.UNITS);
54
- const alphabet: string = col.getTag('alphabet')
69
+ const alphabet: string = col.getTag('alphabet');
55
70
  if (col.semType !== DG.SEMTYPE.MACROMOLECULE) {
56
71
  grok.shell.warning(name + ' analysis is allowed for Macromolecules semantic type');
57
72
  return false;
@@ -127,7 +142,7 @@ export async function activityCliffs(df: DG.DataFrame, macroMolecule: DG.Column,
127
142
  'aligned': macroMolecule.tags['aligned'],
128
143
  'separator': macroMolecule.tags['separator'],
129
144
  'alphabet': macroMolecule.tags['alphabet'],
130
- }
145
+ };
131
146
  const sp = await getActivityCliffs(
132
147
  df,
133
148
  macroMolecule,
@@ -142,9 +157,9 @@ export async function activityCliffs(df: DG.DataFrame, macroMolecule: DG.Column,
142
157
  tags,
143
158
  sequenceSpace,
144
159
  sequenceGetSimilarities,
145
- drawTooltip,
160
+ drawSequences,
146
161
  (options as any)[methodName]);
147
- return sp;
162
+ return sp;
148
163
  }
149
164
 
150
165
  //top-menu: Bio | Sequence Space...
@@ -155,18 +170,18 @@ export async function activityCliffs(df: DG.DataFrame, macroMolecule: DG.Column,
155
170
  //input: string similarityMetric { choices:["Levenshtein", "Tanimoto"] }
156
171
  //input: bool plotEmbeddings = true
157
172
  export async function sequenceSpaceTopMenu(table: DG.DataFrame, macroMolecule: DG.Column, methodName: string,
158
- similarityMetric: string = 'Levenshtein', plotEmbeddings: boolean): Promise<DG.Viewer|undefined> {
173
+ similarityMetric: string = 'Levenshtein', plotEmbeddings: boolean): Promise<DG.Viewer | undefined> {
159
174
  if (!checkInputColumn(macroMolecule, 'Activity Cliffs'))
160
175
  return;
161
176
  const encodedCol = encodeMonomers(macroMolecule);
162
177
  if (!encodedCol)
163
178
  return;
164
179
  const embedColsNames = getEmbeddingColsNames(table);
165
- const withoutEmptyValues = DG.DataFrame.fromColumns([macroMolecule]).clone();
180
+ const withoutEmptyValues = DG.DataFrame.fromColumns([encodedCol]).clone();
166
181
  const emptyValsIdxs = removeEmptyStringRows(withoutEmptyValues, encodedCol);
167
182
 
168
183
  const chemSpaceParams = {
169
- seqCol: withoutEmptyValues.col(macroMolecule.name)!,
184
+ seqCol: withoutEmptyValues.col(encodedCol.name)!,
170
185
  methodName: methodName,
171
186
  similarityMetric: similarityMetric,
172
187
  embedAxesNames: embedColsNames
@@ -174,11 +189,11 @@ export async function sequenceSpaceTopMenu(table: DG.DataFrame, macroMolecule: D
174
189
  const sequenceSpaceRes = await sequenceSpace(chemSpaceParams);
175
190
  const embeddings = sequenceSpaceRes.coordinates;
176
191
  for (const col of embeddings) {
177
- const listValues = col.toList();
178
- emptyValsIdxs.forEach((ind: number) => listValues.splice(ind, 0, null));
179
- table.columns.add(DG.Column.fromList('double', col.name, listValues));
192
+ const listValues = col.toList();
193
+ emptyValsIdxs.forEach((ind: number) => listValues.splice(ind, 0, null));
194
+ table.columns.add(DG.Column.fromList('double', col.name, listValues));
180
195
  }
181
- let sp;
196
+ let sp;
182
197
  if (plotEmbeddings) {
183
198
  for (const v of grok.shell.views) {
184
199
  if (v.name === table.name)
@@ -12,11 +12,11 @@ category('WebLogo-positions', () => {
12
12
  let currentView: DG.View;
13
13
 
14
14
  const csvDf1 = `seq
15
- ATC-G-TTGC--
16
- ATC-G-TTGC--
17
- -TC-G-TTGC--
18
- -TC-GCTTGC--
19
- -TC-GCTTGC--`;
15
+ ATC-G-TTGC--
16
+ ATC-G-TTGC--
17
+ -TC-G-TTGC--
18
+ -TC-GCTTGC--
19
+ -TC-GCTTGC--`;
20
20
 
21
21
 
22
22
  before(async () => {
@@ -34,8 +34,10 @@ category('WebLogo-positions', () => {
34
34
  const df: DG.DataFrame = DG.DataFrame.fromCsv(csvDf1);
35
35
  const tv: DG.TableView = grok.shell.addTableView(df);
36
36
 
37
- df.getCol('seq').semType = 'Macromolecule';
38
- df.getCol('seq').setTag('units', 'fasta:SEQ.MSA:DNA');
37
+ const seqCol: DG.Column = df.getCol('seq');
38
+ seqCol.semType = 'Macromolecule';
39
+ seqCol.setTag('units', 'fasta');
40
+ seqCol.setTag('alphabet', 'DNA');
39
41
 
40
42
  const wlViewer: WebLogo = await df.plot.fromType('WebLogo') as unknown as WebLogo;
41
43
  tv.dockManager.dock(wlViewer.root, DG.DOCK_TYPE.DOWN);
@@ -81,10 +83,11 @@ category('WebLogo-positions', () => {
81
83
  const df: DG.DataFrame = DG.DataFrame.fromCsv(csvDf2);
82
84
  const tv: DG.TableView = grok.shell.addTableView(df);
83
85
 
84
- df.getCol('seq').semType = 'Macromolecule';
85
- df.getCol('seq').setTag('units', 'fasta:SEQ.MSA:DNA');
86
+ const seqCol: DG.Column = df.getCol('seq');
87
+ seqCol.semType = 'Macromolecule';
88
+ seqCol.setTag('units', 'fasta');
89
+ seqCol.setTag('alphabet', 'DNA');
86
90
 
87
- let seq: Column = df.getCol('seq');
88
91
  df.filter.init((i) => {
89
92
  return i > 2;
90
93
  });
@@ -125,8 +128,10 @@ category('WebLogo-positions', () => {
125
128
  const df: DG.DataFrame = DG.DataFrame.fromCsv(csvDf1);
126
129
  const tv: DG.TableView = grok.shell.addTableView(df);
127
130
 
128
- df.getCol('seq').semType = 'Macromolecule';
129
- df.getCol('seq').setTag('units', 'fasta:SEQ.MSA:DNA');
131
+ const seqCol: DG.Column = df.getCol('seq');
132
+ seqCol.semType = 'Macromolecule';
133
+ seqCol.setTag('units', 'fasta');
134
+ seqCol.setTag('alphabet', 'DNA');
130
135
 
131
136
  const wlViewer: WebLogo = await df.plot.fromType('WebLogo', {'skipEmptyPositions': true}) as unknown as WebLogo;
132
137
  tv.dockManager.dock(wlViewer.root, DG.DOCK_TYPE.DOWN);
@@ -156,7 +161,6 @@ category('WebLogo-positions', () => {
156
161
  expect(positions[i].freq[key].count, resAllDf1[i].freq[key].count);
157
162
  }
158
163
  }
159
-
160
164
  });
161
165
 
162
166
  });
@@ -5,6 +5,7 @@ import * as ui from 'datagrok-api/ui';
5
5
  import * as DG from 'datagrok-api/dg';
6
6
 
7
7
  import {importFasta} from '../package';
8
+ import {UnitsHandler} from '@datagrok-libraries/bio/src/utils/units-handler';
8
9
 
9
10
  type DfReaderFunc = () => Promise<DG.DataFrame>;
10
11
 
@@ -198,44 +199,44 @@ MWRSWY-CKHP
198
199
  test('NegativeSmiles', async () => { await _testNeg(readCsv('csvDfSmiles', csvDfSmiles), 'col1'); });
199
200
 
200
201
  test('Dna1', async () => {
201
- await _testPos(readCsv('csvDfDna1', csvDfDna1), 'seq', 'fasta', 'SEQ', 'DNA');
202
+ await _testPos(readCsv('csvDfDna1', csvDfDna1), 'seq', 'fasta', 'SEQ', 'DNA', 4, false);
202
203
  });
203
204
  test('Rna1', async () => {
204
- await _testPos(readCsv('csvDfRna1', csvDfRna1), 'seq', 'fasta', 'SEQ', 'RNA');
205
+ await _testPos(readCsv('csvDfRna1', csvDfRna1), 'seq', 'fasta', 'SEQ', 'RNA', 4, false);
205
206
  });
206
207
  test('AA1', async () => {
207
- await _testPos(readCsv('csvDfPt1', csvDfPt1), 'seq', 'fasta', 'SEQ', 'PT');
208
+ await _testPos(readCsv('csvDfPt1', csvDfPt1), 'seq', 'fasta', 'SEQ', 'PT', 20, false);
208
209
  });
209
210
  test('MsaDna1', async () => {
210
- await _testPos(readCsv('csvDfMsaDna1', csvDfMsaDna1), 'seq', 'fasta', 'SEQ.MSA', 'DNA');
211
+ await _testPos(readCsv('csvDfMsaDna1', csvDfMsaDna1), 'seq', 'fasta', 'SEQ.MSA', 'DNA', 4, false);
211
212
  });
212
213
 
213
214
  test('MsaAA1', async () => {
214
- await _testPos(readCsv('csvDfMsaPt1', csvDfMsaPt1), 'seq', 'fasta', 'SEQ.MSA', 'PT');
215
+ await _testPos(readCsv('csvDfMsaPt1', csvDfMsaPt1), 'seq', 'fasta', 'SEQ.MSA', 'PT', 20, false);
215
216
  });
216
217
 
217
218
  test('SepDna', async () => {
218
- await _testPos(readCsv('csvDfSepDna', csvDfSepDna), 'seq', 'separator', 'SEQ', 'DNA', '*');
219
+ await _testPos(readCsv('csvDfSepDna', csvDfSepDna), 'seq', 'separator', 'SEQ', 'DNA', 4, false, '*');
219
220
  });
220
221
  test('SepRna', async () => {
221
- await _testPos(readCsv('csvDfSepRna', csvDfSepRna), 'seq', 'separator', 'SEQ', 'RNA', '*');
222
+ await _testPos(readCsv('csvDfSepRna', csvDfSepRna), 'seq', 'separator', 'SEQ', 'RNA', 4, false, '*');
222
223
  });
223
224
  test('SepPt', async () => {
224
- await _testPos(readCsv('csvDfSepPt', csvDfSepPt), 'seq', 'separator', 'SEQ', 'PT', '-');
225
+ await _testPos(readCsv('csvDfSepPt', csvDfSepPt), 'seq', 'separator', 'SEQ', 'PT', 20, false, '-');
225
226
  });
226
227
  test('SepUn1', async () => {
227
- await _testPos(readCsv('csvDfSepUn1', csvDfSepUn1), 'seq', 'separator', 'SEQ', 'UN', '-');
228
+ await _testPos(readCsv('csvDfSepUn1', csvDfSepUn1), 'seq', 'separator', 'SEQ', 'UN', 8, true, '-');
228
229
  });
229
230
  test('SepUn2', async () => {
230
- await _testPos(readCsv('csvDfSepUn2', csvDfSepUn2), 'seq', 'separator', 'SEQ', 'UN', '/');
231
+ await _testPos(readCsv('csvDfSepUn2', csvDfSepUn2), 'seq', 'separator', 'SEQ', 'UN', 9, true, '/');
231
232
  });
232
233
 
233
234
  test('SepMsaN1', async () => {
234
- await _testPos(readCsv('csvDfSepMsaDna1', csvDfSepMsaDna1), 'seq', 'separator', 'SEQ.MSA', 'DNA', '-');
235
+ await _testPos(readCsv('csvDfSepMsaDna1', csvDfSepMsaDna1), 'seq', 'separator', 'SEQ.MSA', 'DNA', 4, false, '-');
235
236
  });
236
237
 
237
238
  test('SamplesFastaCsvPt', async () => {
238
- await _testPos(readSamples(Samples.fastaCsv), 'sequence', 'fasta', 'SEQ', 'PT');
239
+ await _testPos(readSamples(Samples.fastaCsv), 'sequence', 'fasta', 'SEQ', 'PT', 20, false);
239
240
  });
240
241
  test('SamplesFastaCsvNegativeEntry', async () => {
241
242
  await _testNeg(readSamples(Samples.fastaCsv), 'Entry');
@@ -248,7 +249,7 @@ MWRSWY-CKHP
248
249
  });
249
250
 
250
251
  test('SamplesFastaFastaPt', async () => {
251
- await _testPos(readSamples(Samples.fastaFasta, readFileFasta), 'sequence', 'fasta', 'SEQ', 'PT');
252
+ await _testPos(readSamples(Samples.fastaFasta, readFileFasta), 'sequence', 'fasta', 'SEQ', 'PT', 20, false);
252
253
  });
253
254
 
254
255
  // peptidesComplex contains monomers with spaces in AlignedSequence columns, which are forbidden
@@ -266,7 +267,7 @@ MWRSWY-CKHP
266
267
  });
267
268
 
268
269
  test('samplesMsaComplexUn', async () => {
269
- await _testPos(readSamples(Samples.msaComplex), 'MSA', 'separator', 'SEQ.MSA', 'UN', '/');
270
+ await _testPos(readSamples(Samples.msaComplex), 'MSA', 'separator', 'SEQ.MSA', 'UN', 161, true, '/');
270
271
  });
271
272
  test('samplesMsaComplexNegativeActivity', async () => {
272
273
  await _testNeg(readSamples(Samples.msaComplex), 'Activity');
@@ -281,7 +282,7 @@ MWRSWY-CKHP
281
282
  });
282
283
 
283
284
  test('samplesHelmCsvHELM', async () => {
284
- await _testPos(readSamples(Samples.helmCsv), 'HELM', 'helm', null, null, null);
285
+ await _testPos(readSamples(Samples.helmCsv), 'HELM', 'helm', null, null, 160, true, null);
285
286
  });
286
287
 
287
288
  test('samplesHelmCsvNegativeActivity', async () => {
@@ -297,7 +298,7 @@ MWRSWY-CKHP
297
298
  await _testNeg(readSamples(Samples.testHelmCsv), 'Test type');
298
299
  });
299
300
  test('samplesTestHelmPositiveHelmString', async () => {
300
- await _testPos(readSamples(Samples.testHelmCsv), 'HELM string', 'helm', null, null, null);
301
+ await _testPos(readSamples(Samples.testHelmCsv), 'HELM string', 'helm', null, null, 9, true, null);
301
302
  });
302
303
  test('samplesTestHelmNegativeValid', async () => {
303
304
  await _testNeg(readSamples(Samples.testHelmCsv), 'Valid?');
@@ -329,7 +330,7 @@ MWRSWY-CKHP
329
330
  });
330
331
 
331
332
  test('samplesFastaPtPosSequence', async () => {
332
- await _testPos(readSamples(Samples.fastaPtCsv), 'sequence', 'fasta', 'SEQ', 'PT');
333
+ await _testPos(readSamples(Samples.fastaPtCsv), 'sequence', 'fasta', 'SEQ', 'PT', 20, false);
333
334
  });
334
335
 
335
336
  test('samplesTestCerealNegativeCerealName', async () => {
@@ -371,7 +372,8 @@ MWRSWY-CKHP
371
372
  export async function _testNeg(readDf: DfReaderFunc, colName: string) {
372
373
  const df: DG.DataFrame = await readDf();
373
374
  const col: DG.Column = df.col(colName)!;
374
- const semType: string = await grok.functions.call('Bio:detectMacromolecule', {col: col});
375
+ const semType: string = await grok.functions
376
+ .call('Bio:detectMacromolecule', {col: col}) as unknown as string;
375
377
  if (semType)
376
378
  col.semType = semType;
377
379
 
@@ -384,10 +386,15 @@ export async function _testNeg(readDf: DfReaderFunc, colName: string) {
384
386
  }
385
387
  }
386
388
 
387
- export async function _testPos(readDf: DfReaderFunc, colName: string, units: string, aligned: string | null, alphabet: string | null, separator: string | null = null) {
389
+ export async function _testPos(
390
+ readDf: DfReaderFunc, colName: string, units: string,
391
+ aligned: string | null, alphabet: string | null, alphabetSize: number, alphabetIsMultichar: boolean,
392
+ separator: string | null = null
393
+ ) {
388
394
  const df: DG.DataFrame = await readDf();
389
395
  const col: DG.Column = df.col(colName)!;
390
- const semType: string = await grok.functions.call('Bio:detectMacromolecule', {col: col});
396
+ const semType: string = await grok.functions
397
+ .call('Bio:detectMacromolecule', {col: col}) as unknown as string;
391
398
  if (semType)
392
399
  col.semType = semType;
393
400
 
@@ -397,5 +404,13 @@ export async function _testPos(readDf: DfReaderFunc, colName: string, units: str
397
404
  expect(col.getTag('alphabet'), alphabet);
398
405
  if (separator)
399
406
  expect(col.getTag('separator'), separator);
407
+
408
+ const uh = new UnitsHandler(col);
409
+ expect(uh.getAlphabetSize(), alphabetSize);
410
+ expect(uh.getAlphabetIsMultichar(), alphabetIsMultichar);
411
+ if (!uh.isHelm()) {
412
+ expect(uh.aligned, aligned);
413
+ expect(uh.alphabet, alphabet);
414
+ }
400
415
  }
401
416
 
@@ -49,7 +49,8 @@ async function _testMsaIsCorrect(srcCsv: string, tgtCsv: string): Promise<void>
49
49
  const tgtDf: DG.DataFrame = DG.DataFrame.fromCsv(tgtCsv);
50
50
 
51
51
  const srcCol: DG.Column = srcDf.getCol('seq')!;
52
- const semType: string = await grok.functions.call('Bio:detectMacromolecule', {col: srcCol});
52
+ const semType: string = await grok.functions
53
+ .call('Bio:detectMacromolecule', {col: srcCol}) as unknown as string;
53
54
  if (semType)
54
55
  srcCol.semType = semType;
55
56
 
@@ -0,0 +1,45 @@
1
+ import * as DG from 'datagrok-api/dg';
2
+ import * as grok from 'datagrok-api/grok';
3
+
4
+ export function generateManySequences(): string {
5
+ let csvData = `MSA,Activity
6
+ meI/hHis/Aca/N/T/dE/Thr_PO3H2/Aca/D-Tyr_Et/Tyr_ab-dehydroMe/dV/E/N/D-Orn/D-aThr//Phe_4Me,5.30751`;
7
+ for (let i = 0; i < 10 ** 6; i++) {
8
+ csvData += `\n meI/hHis/Aca/N/T/dE/Thr_PO3H2/Aca/D-Tyr_Et/Tyr_ab-dehydroMe/dV/E/N/D-Orn/D-aThr//Phe_4Me,5.30751`;
9
+ }
10
+ return csvData;
11
+ }
12
+
13
+ export function generateLongSequence(): string {
14
+ let longSequence = `meI/hHis/Aca/N/T/dE/Thr_PO3H2/Aca/D-Tyr_Et/Tyr_ab-dehydroMe/dV/E/N/D-Orn/D-aThr`;
15
+ for (let i = 0; i < 10 ** 5; i++) {
16
+ longSequence += `/Aca/N/T/dE/Thr_PO3H2/Aca/D-Tyr_Et/Tyr_ab-dehydroMe/dV/dv`;
17
+ }
18
+ longSequence += `//Phe_4Me,5.30751`;
19
+ let csvData = `MSA,Activity `;
20
+ for (let i = 0; i <= 10 ** 1 * 4; i++) {
21
+ csvData += `\n ${longSequence}`;
22
+ }
23
+ return csvData;
24
+ }
25
+ export function setTagsMacromolecule(col: DG.Column) {
26
+ col.semType = DG.SEMTYPE.MACROMOLECULE;
27
+ col.setTag('units', 'separator');
28
+ col.setTag('aligned', 'SEQ.MSA');
29
+ col.setTag('alphabet', 'UN');
30
+ col.setTag('separator', '/');
31
+ return col;
32
+ }
33
+
34
+ export function performanceTest(generateFunc: () => string,testName: string) {
35
+ const startTime: number = Date.now();
36
+ const csv = generateFunc();
37
+ const df: DG.DataFrame = DG.DataFrame.fromCsv(csv);
38
+ const col: DG.Column = df.columns.byName('MSA');
39
+ setTagsMacromolecule(col);
40
+ grok.shell.addTableView(df);
41
+
42
+ const endTime: number = Date.now();
43
+ const elapsedTime: number = endTime - startTime;
44
+ console.log(`Performance test: ${testName}: ${elapsedTime}ms`);
45
+ }
@@ -1,6 +1,8 @@
1
1
  import * as OCL from 'openchemlib/full.js';
2
2
  import * as grok from 'datagrok-api/grok';
3
3
 
4
+ import {RDModule} from '@datagrok-libraries/chem-meta/src/rdkit-api';
5
+
4
6
  export async function getMacroMol(monomers: any[][]): Promise<string[]> {
5
7
  let result: string[] = [];
6
8
  const moduleRdkit = await grok.functions.call('Chem:getRdKitModule');
@@ -6,9 +6,12 @@ import {UnknownSeqPalette, UnknownSeqPalettes} from '@datagrok-libraries/bio/src
6
6
  import {SplitterFunc, WebLogo} from '@datagrok-libraries/bio/src/viewers/web-logo';
7
7
  import {SeqPalette} from '@datagrok-libraries/bio/src/seq-palettes';
8
8
  import * as ui from 'datagrok-api/ui';
9
+ import {printLeftOrCentered, DrawStyle} from '@datagrok-libraries/bio/src/utils/cell-renderer';
9
10
 
10
11
  const undefinedColor = 'rgb(100,100,100)';
11
- const grayColor = '#808080';
12
+ const monomerToShortFunction: (amino: string, maxLengthOfMonomer: number) => string = WebLogo.monomerToShort;
13
+ const gapRenderer = 5;
14
+
12
15
 
13
16
  function getPalleteByType(paletteType: string): SeqPalette {
14
17
  switch (paletteType) {
@@ -44,81 +47,6 @@ export function processSequence(subParts: string[]): [string[], boolean] {
44
47
  }
45
48
 
46
49
 
47
- /**
48
- * A function that prints a string aligned to left or centered.
49
- *
50
- * @param {number} x x coordinate.
51
- * @param {number} y y coordinate.
52
- * @param {number} w Width.
53
- * @param {number} h Height.
54
- * @param {CanvasRenderingContext2D} g Canvas rendering context.
55
- * @param {string} s String to print.
56
- * @param {string} [color=undefinedColor] String color.
57
- * @param {number} [pivot=0] Pirvot.
58
- * @param {boolean} [left=false] Is left aligned.
59
- * @param {number} [transparencyRate=0.0] Transparency rate where 1.0 is fully transparent
60
- * @param {string} [separator=''] Is separator for sequence.
61
- * @param {boolean} [last=false] Is checker if element last or not.
62
- * @return {number} x coordinate to start printing at.
63
- */
64
- export function printLeftOrCentered(
65
- x: number, y: number, w: number, h: number,
66
- g: CanvasRenderingContext2D, s: string, color = undefinedColor,
67
- pivot: number = 0, left = false, transparencyRate: number = 1.0,
68
- separator: string = '', last: boolean = false, drawStyle: string = 'classic', maxWord: any = {}, maxWordIdx: number = 0, gridCell: any = {}): number {
69
- g.textAlign = 'start';
70
- const colorPart = s.substring(0);
71
- let grayPart = last ? '' : separator;
72
- if (drawStyle === 'msa') {
73
- grayPart = '';
74
- }
75
-
76
- let textSize: any = g.measureText(colorPart + grayPart);
77
- const indent = 5;
78
-
79
- let maxColorTextSize = g.measureText(colorPart).width;
80
- let colorTextSize = g.measureText(colorPart).width;
81
- const dy = (textSize.fontBoundingBoxAscent + textSize.fontBoundingBoxDescent) / 2;
82
- textSize = textSize.width;
83
- if (drawStyle === 'msa') {
84
- maxColorTextSize = maxWord[maxWordIdx];
85
- textSize = maxWord[maxWordIdx];
86
- if (maxColorTextSize > maxWord) {
87
- maxWord[maxWordIdx] = maxColorTextSize;
88
- gridCell.cell.column.temp = maxWord;
89
- }
90
- if (maxWordIdx > (maxWord['bio-maxIndex'] ?? 0)) {
91
- maxWord['bio-maxIndex'] = maxWordIdx;
92
- gridCell.cell.column.temp = maxWord;
93
- }
94
- }
95
-
96
- function draw(dx1: number, dx2: number): void {
97
- g.fillStyle = color;
98
- g.globalAlpha = transparencyRate;
99
- if (drawStyle === 'classic') {
100
- g.fillText(colorPart, x + dx1, y + dy);
101
- g.fillStyle = grayColor;
102
- g.fillText(grayPart, x + dx2, y + dy);
103
- }
104
- if (drawStyle === 'msa') {
105
- g.fillStyle = color;
106
- g.fillText(colorPart, x + dx1 + ((maxWord[maxWordIdx] - colorTextSize) / 2), y + dy);
107
- }
108
- }
109
-
110
- if (left || textSize > w) {
111
- draw(indent, indent + maxColorTextSize);
112
- return x + maxColorTextSize + g.measureText(grayPart).width;
113
-
114
- } else {
115
- const dx = (w - textSize) / 2;
116
- draw(dx, dx + maxColorTextSize);
117
- return x + dx + maxColorTextSize;
118
- }
119
- }
120
-
121
-
122
50
  export class MacromoleculeSequenceCellRenderer extends DG.GridCellRenderer {
123
51
  get name(): string { return 'sequence'; }
124
52
 
@@ -133,12 +61,9 @@ export class MacromoleculeSequenceCellRenderer extends DG.GridCellRenderer {
133
61
  return;
134
62
  }
135
63
  const maxLengthWordsSum = gridCell.cell.column.temp['bio-sum-maxLengthWords'];
136
- if (maxLengthWordsSum == null) {
137
- gridCell.cell.column.setTag('.calculatedCellRender', 'unexist');
138
- }
139
64
  const maxIndex = gridCell.cell.column.temp['bio-maxIndex'];
140
65
  //@ts-ignore
141
- const argsX = e.layerX - gridCell.gridColumn.left - ((gridCell.bounds.x<0) ? gridCell.bounds.x : 0);
66
+ const argsX = e.layerX - gridCell.gridColumn.left + (gridCell.gridColumn.left - gridCell.bounds.x);
142
67
  let left = 0;
143
68
  let right = maxIndex;
144
69
  let found = false;
@@ -164,7 +89,7 @@ export class MacromoleculeSequenceCellRenderer extends DG.GridCellRenderer {
164
89
  const separator = gridCell.cell.column.getTag('separator') ?? '';
165
90
  const splitterFunc: SplitterFunc = WebLogo.getSplitter('separator', separator);
166
91
  const subParts: string[] = splitterFunc(gridCell.cell.value);
167
- ui.tooltip.show(ui.div(subParts[left]), e.x + 16, e.y + 16);
92
+ (((subParts[left]?.length ?? 0) > 0)) ? ui.tooltip.show(ui.div(subParts[left]), e.x + 16, e.y + 16) : ui.tooltip.hide();
168
93
  }
169
94
 
170
95
  /**
@@ -186,6 +111,7 @@ export class MacromoleculeSequenceCellRenderer extends DG.GridCellRenderer {
186
111
  const grid = gridCell.gridRow !== -1 ? gridCell.grid : undefined;
187
112
  const cell = gridCell.cell;
188
113
  const [type, subtype, paletteType] = gridCell.cell.column.getTag(DG.TAGS.UNITS).split(':');
114
+ const minDistanceRenderer = 50;
189
115
  w = grid ? Math.min(grid.canvas.width - x, w) : g.canvas.width - x;
190
116
  g.save();
191
117
  g.beginPath();
@@ -201,25 +127,34 @@ export class MacromoleculeSequenceCellRenderer extends DG.GridCellRenderer {
201
127
  const palette = getPalleteByType(paletteType);
202
128
 
203
129
  const separator = gridCell.cell.column.getTag('separator') ?? '';
204
- const splitterFunc: SplitterFunc = WebLogo.getSplitter(units, separator);
130
+ const splitLimit = gridCell.bounds.width / 5;
131
+ const splitterFunc: SplitterFunc = WebLogo.getSplitter(units, separator, gridCell.bounds.width / 5);
132
+
205
133
 
206
- const columns = gridCell.cell.column.categories;
207
- let monomerToShortFunction: (amino: string, maxLengthOfMonomer: number) => string = WebLogo.monomerToShort;
208
- let maxLengthOfMonomer = 8;
134
+ const maxLengthOfMonomer = 8;
209
135
 
210
136
  let maxLengthWords: any = {};
211
- if (gridCell.cell.column.getTag('.calculatedCellRender') !== 'exist') {
212
- for (let i = 0; i < columns.length; i++) {
213
- let subParts: string[] = splitterFunc(columns[i]);
137
+ if (gridCell.cell.column.getTag('.calculatedCellRender') !== splitLimit.toString()) {
138
+ let samples = 0;
139
+ while (samples < Math.min(gridCell.cell.column.length, 100)) {
140
+ let column = gridCell.cell.column.get(samples);
141
+ let subParts: string[] = splitterFunc(column);
214
142
  subParts.forEach((amino, index) => {
215
- let textSizeWidth = g.measureText(monomerToShortFunction(amino, maxLengthOfMonomer));
216
- if (textSizeWidth.width > (maxLengthWords[index] ?? 0)) {
217
- maxLengthWords[index] = textSizeWidth.width;
143
+ let textSize = monomerToShortFunction(amino, maxLengthOfMonomer).length * 7 + gapRenderer;
144
+ if (textSize > (maxLengthWords[index] ?? 0)) {
145
+ maxLengthWords[index] = textSize;
218
146
  }
219
147
  if (index > (maxLengthWords['bio-maxIndex'] ?? 0)) {
220
148
  maxLengthWords['bio-maxIndex'] = index;
221
149
  }
222
150
  });
151
+ samples += 1;
152
+ }
153
+ let minLength = 3 * 7;
154
+ for (let i = 0; i <= maxLengthWords['bio-maxIndex']; i++) {
155
+ if (maxLengthWords[i] < minLength) {
156
+ maxLengthWords[i] = minLength;
157
+ }
223
158
  }
224
159
  let maxLengthWordSum: any = {};
225
160
  maxLengthWordSum[0] = maxLengthWords[0];
@@ -231,7 +166,7 @@ export class MacromoleculeSequenceCellRenderer extends DG.GridCellRenderer {
231
166
  'bio-maxIndex': maxLengthWords['bio-maxIndex'],
232
167
  'bio-maxLengthWords': maxLengthWords
233
168
  };
234
- gridCell.cell.column.setTag('.calculatedCellRender', 'exist');
169
+ gridCell.cell.column.setTag('.calculatedCellRender', splitLimit.toString());
235
170
  } else {
236
171
  maxLengthWords = gridCell.cell.column.temp['bio-maxLengthWords'];
237
172
  }
@@ -239,15 +174,19 @@ export class MacromoleculeSequenceCellRenderer extends DG.GridCellRenderer {
239
174
  const subParts: string[] = splitterFunc(cell.value);
240
175
  let x1 = x;
241
176
  let color = undefinedColor;
242
- let drawStyle = 'classic';
243
- if (gridCell.cell.column.getTag('aligned').includes('MSA')) {
244
- drawStyle = 'msa';
177
+ let drawStyle = DrawStyle.classic;
178
+ if (gridCell.cell.column.getTag('aligned').includes('MSA') && gridCell.cell.column.getTag('units') === 'separator') {
179
+ drawStyle = DrawStyle.MSA;
245
180
  }
246
- subParts.forEach((amino, index) => {
181
+ subParts.every((amino, index) => {
247
182
  color = palette.get(amino);
248
183
  g.fillStyle = undefinedColor;
249
184
  let last = index === subParts.length - 1;
250
185
  x1 = printLeftOrCentered(x1, y, w, h, g, monomerToShortFunction(amino, maxLengthOfMonomer), color, 0, true, 1.0, separator, last, drawStyle, maxLengthWords, index, gridCell);
186
+ if (x1 - minDistanceRenderer - gridCell.gridColumn.left + (gridCell.gridColumn.left - gridCell.bounds.x) > gridCell.bounds.width) {
187
+ return false;
188
+ }
189
+ return true;
251
190
  });
252
191
 
253
192
  g.restore();