@datagrok/bio 1.10.2 → 1.11.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/package.json CHANGED
@@ -5,7 +5,7 @@
5
5
  "name": "Leonid Stolbov",
6
6
  "email": "lstolbov@datagrok.ai"
7
7
  },
8
- "version": "1.10.2",
8
+ "version": "1.11.2",
9
9
  "description": "Bio is a [package](https://datagrok.ai/help/develop/develop#packages) for the [Datagrok](https://datagrok.ai) platform",
10
10
  "repository": {
11
11
  "type": "git",
@@ -14,12 +14,12 @@
14
14
  },
15
15
  "dependencies": {
16
16
  "@biowasm/aioli": ">=2.4.0",
17
- "@datagrok-libraries/bio": "^4.1.0",
17
+ "@datagrok-libraries/bio": "^4.2.1",
18
18
  "@datagrok-libraries/chem-meta": "1.0.0",
19
- "@datagrok-libraries/ml": "^4.0.0",
19
+ "@datagrok-libraries/ml": "^6.0.0",
20
20
  "@datagrok-libraries/utils": "^1.6.2",
21
21
  "cash-dom": "latest",
22
- "datagrok-api": "^1.5.5",
22
+ "datagrok-api": "^1.6.7",
23
23
  "dayjs": "^1.11.4",
24
24
  "openchemlib": "6.0.1",
25
25
  "rxjs": "^6.5.5",
@@ -48,6 +48,7 @@
48
48
  "link-api": "npm link datagrok-api",
49
49
  "link-bio": "npm link @datagrok-libraries/bio",
50
50
  "link-ml": "npm link @datagrok-libraries/ml",
51
+ "link-utils": "npm link @datagrok-libraries/utils",
51
52
  "link-all": "npm link datagrok-api @datagrok-libraries/utils @datagrok-libraries/bio @datagrok-libraries/ml",
52
53
  "debug-sequences1": "grok publish",
53
54
  "release-sequences1": "grok publish --release",
@@ -13,6 +13,7 @@ import './tests/renderers-test';
13
13
  import './tests/convert-test';
14
14
  import './tests/fasta-handler-test';
15
15
  import './tests/WebLogo-positions-test';
16
+ import './tests/checkInputColumn-tests'
16
17
 
17
18
  export const _package = new DG.Package();
18
19
  export {tests};
package/src/package.ts CHANGED
@@ -15,7 +15,7 @@ import {Aminoacids} from '@datagrok-libraries/bio/src/aminoacids';
15
15
  import {getEmbeddingColsNames, sequenceSpace} from './utils/sequence-space';
16
16
  import {AvailableMetrics} from '@datagrok-libraries/ml/src/typed-metrics';
17
17
  import {getActivityCliffs} from '@datagrok-libraries/ml/src/viewers/activity-cliffs';
18
- import {drawSequences, sequenceGetSimilarities} from './utils/sequence-activity-cliffs';
18
+ import {createPropPanelElement, createTooltipElement, getSimilaritiesMarix} from './utils/sequence-activity-cliffs';
19
19
  import {createJsonMonomerLibFromSdf, encodeMonomers, getMolfilesFromSeq, HELM_CORE_LIB_FILENAME} from './utils/utils';
20
20
  import {getMacroMol} from './utils/atomic-works';
21
21
  import {MacromoleculeSequenceCellRenderer} from './utils/cell-renderer';
@@ -30,6 +30,9 @@ import {
30
30
  performanceTest
31
31
  } from './tests/test-sequnces-generators';
32
32
 
33
+ import {splitAlignedSequences} from '@datagrok-libraries/bio/src/utils/splitter';
34
+ import * as C from './utils/constants';
35
+
33
36
  //tags: init
34
37
  export async function initBio() {
35
38
  }
@@ -63,29 +66,50 @@ export function separatorSequenceCellRenderer(): MacromoleculeSequenceCellRender
63
66
  return new MacromoleculeSequenceCellRenderer();
64
67
  }
65
68
 
66
- function checkInputColumn(col: DG.Column, name: string,
67
- allowedNotations: string[] = [], allowedAlphabets: string[] = []): boolean {
68
- const notation: string = col.getTag(DG.TAGS.UNITS);
69
- const alphabet: string = col.getTag('alphabet');
69
+ function checkInputColumnUi(
70
+ col: DG.Column, name: string, allowedNotations: string[] = [], allowedAlphabets: string[] = []
71
+ ): boolean {
72
+ const [res, msg]: [boolean, string] = checkInputColumn(col, name, allowedNotations, allowedAlphabets);
73
+ if (!res)
74
+ grok.shell.warning(msg);
75
+ return res;
76
+ }
77
+
78
+ export function checkInputColumn(
79
+ col: DG.Column, name: string, allowedNotations: string[] = [], allowedAlphabets: string[] = []
80
+ ): [boolean, string] {
81
+ let res: boolean = true;
82
+ let msg: string = '';
83
+
84
+ const uh = new UnitsHandler(col);
70
85
  if (col.semType !== DG.SEMTYPE.MACROMOLECULE) {
71
86
  grok.shell.warning(name + ' analysis is allowed for Macromolecules semantic type');
72
- return false;
73
- } else if (
74
- (allowedAlphabets.length > 0 &&
75
- !allowedAlphabets.some((a) => alphabet.toUpperCase() == (a.toUpperCase()))) ||
76
- (allowedNotations.length > 0 &&
77
- !allowedNotations.some((n) => notation.toUpperCase() == (n.toUpperCase())))
78
- ) {
79
- const notationAdd = allowedNotations.length == 0 ? 'any notation' :
80
- (`notation${allowedNotations.length > 1 ? 's' : ''} ${allowedNotations.map((n) => `"${n}"`).join(', ')} `);
81
- const alphabetAdd = allowedNotations.length == 0 ? 'any alphabet' :
82
- (`alphabet${allowedAlphabets.length > 1 ? 's' : ''} ${allowedAlphabets.map((a) => `"${a}"`).join(', ')}.`);
83
-
84
- grok.shell.warning(name + ' analysis is allowed for Macromolecules with ' + notationAdd + ' and ' + alphabetAdd);
85
- return false;
87
+ res = false;
88
+ } else {
89
+ const notation: string = uh.notation;
90
+ if (allowedNotations.length > 0 &&
91
+ !allowedNotations.some((n) => notation.toUpperCase() == (n.toUpperCase()))
92
+ ) {
93
+ const notationAdd = allowedNotations.length == 0 ? 'any notation' :
94
+ (`notation${allowedNotations.length > 1 ? 's' : ''} ${allowedNotations.map((n) => `"${n}"`).join(', ')} `);
95
+ msg = `${name} + ' analysis is allowed for Macromolecules with notation ${notationAdd}.`;
96
+ res = false;
97
+ } else if (!uh.isHelm()) {
98
+ // alphabet is not specified for 'helm' notation
99
+ const alphabet: string = uh.alphabet;
100
+ if (
101
+ allowedAlphabets.length > 0 &&
102
+ !allowedAlphabets.some((a) => alphabet.toUpperCase() == (a.toUpperCase()))
103
+ ) {
104
+ const alphabetAdd = allowedAlphabets.length == 0 ? 'any alphabet' :
105
+ (`alphabet${allowedAlphabets.length > 1 ? 's' : ''} ${allowedAlphabets.map((a) => `"${a}"`).join(', ')}.`);
106
+ msg = `${name} + ' analysis is allowed for Macromolecules with alphabet ${alphabetAdd}.`;
107
+ res = false;
108
+ }
109
+ }
86
110
  }
87
111
 
88
- return true;
112
+ return [res, msg];
89
113
  }
90
114
 
91
115
  //name: sequenceAlignment
@@ -128,7 +152,7 @@ export function vdRegionViewer() {
128
152
  //input: string methodName { choices:["UMAP", "t-SNE", "SPE"] }
129
153
  export async function activityCliffs(df: DG.DataFrame, macroMolecule: DG.Column, activities: DG.Column,
130
154
  similarity: number, methodName: string): Promise<DG.Viewer | undefined> {
131
- if (!checkInputColumn(macroMolecule, 'Activity Cliffs'))
155
+ if (!checkInputColumnUi(macroMolecule, 'Activity Cliffs'))
132
156
  return;
133
157
  const encodedCol = encodeMonomers(macroMolecule);
134
158
  if (!encodedCol)
@@ -156,8 +180,9 @@ export async function activityCliffs(df: DG.DataFrame, macroMolecule: DG.Column,
156
180
  DG.SEMTYPE.MACROMOLECULE,
157
181
  tags,
158
182
  sequenceSpace,
159
- sequenceGetSimilarities,
160
- drawSequences,
183
+ getSimilaritiesMarix,
184
+ createTooltipElement,
185
+ createPropPanelElement,
161
186
  (options as any)[methodName]);
162
187
  return sp;
163
188
  }
@@ -171,7 +196,7 @@ export async function activityCliffs(df: DG.DataFrame, macroMolecule: DG.Column,
171
196
  //input: bool plotEmbeddings = true
172
197
  export async function sequenceSpaceTopMenu(table: DG.DataFrame, macroMolecule: DG.Column, methodName: string,
173
198
  similarityMetric: string = 'Levenshtein', plotEmbeddings: boolean): Promise<DG.Viewer | undefined> {
174
- if (!checkInputColumn(macroMolecule, 'Activity Cliffs'))
199
+ if (!checkInputColumnUi(macroMolecule, 'Activity Cliffs'))
175
200
  return;
176
201
  const encodedCol = encodeMonomers(macroMolecule);
177
202
  if (!encodedCol)
@@ -213,7 +238,7 @@ export async function toAtomicLevel(df: DG.DataFrame, macroMolecule: DG.Column):
213
238
  grok.shell.warning('Transformation to atomic level requires package "Chem" installed.');
214
239
  return;
215
240
  }
216
- if (!checkInputColumn(macroMolecule, 'To Atomic Level'))
241
+ if (!checkInputColumnUi(macroMolecule, 'To Atomic Level'))
217
242
  return;
218
243
 
219
244
  const monomersLibFile = await _package.files.readAsText(HELM_CORE_LIB_FILENAME);
@@ -232,10 +257,12 @@ export async function toAtomicLevel(df: DG.DataFrame, macroMolecule: DG.Column):
232
257
  //top-menu: Bio | MSA...
233
258
  //name: MSA
234
259
  //input: dataframe table
235
- //input: column sequence { semType: Macromolecule }
260
+ //input: column sequence { semType: Macromolecule, units: ['fasta'], alphabet: ['DNA', 'RNA', 'PT'] }
236
261
  //output: column result
237
262
  export async function multipleSequenceAlignmentAny(table: DG.DataFrame, col: DG.Column): Promise<DG.Column | null> {
238
- if (!checkInputColumn(col, 'MSA', ['fasta'], ['DNA', 'RNA', 'PT']))
263
+ const func: DG.Func = DG.Func.find({package: 'Bio', name: 'multipleSequenceAlignmentAny'})[0];
264
+
265
+ if (!checkInputColumnUi(col, 'MSA', ['fasta'], ['DNA', 'RNA', 'PT']))
239
266
  return null;
240
267
 
241
268
  const unUsedName = table.columns.getUnusedName(`msa(${col.name})`);
@@ -276,7 +303,7 @@ export async function compositionAnalysis(): Promise<void> {
276
303
  });
277
304
 
278
305
  const handler = async (col: DG.Column) => {
279
- if (!checkInputColumn(col, 'Composition'))
306
+ if (!checkInputColumnUi(col, 'Composition'))
280
307
  return;
281
308
 
282
309
  const wlViewer = tv.addViewer('WebLogo', {sequenceColumnName: col.name});
@@ -391,11 +418,11 @@ export async function testDetectMacromolecule(path: string): Promise<DG.DataFram
391
418
  //console.warn(`file: ${fileInfo.path}, column: ${col.name}, ` +
392
419
  // `semType: ${semType}, units: ${col.getTag(DG.TAGS.UNITS)}`);
393
420
  // console.warn('file: "' + fileInfo.path + '", semType: "' + semType + '", ' +
394
- // 'units: "' + col.getTag('units') + '"');
421
+ // 'units: "' + col.getTag(DG.TAGS.UNITS) + '"');
395
422
 
396
423
  res.push({
397
424
  file: fileInfo.path, result: 'detected', column: col.name,
398
- message: `units: ${col.getTag('units')}`
425
+ message: `units: ${col.getTag(DG.TAGS.UNITS)}`
399
426
  });
400
427
  }
401
428
  }
@@ -418,3 +445,19 @@ export async function testDetectMacromolecule(path: string): Promise<DG.DataFram
418
445
  return resDf;
419
446
  }
420
447
 
448
+ //name: Bio | Split to monomers
449
+ //tags: panel, bio
450
+ //input: column col {semType: Macromolecule}
451
+ export function splitToMonomers(col: DG.Column<string>): void {
452
+ if (!col.getTag(UnitsHandler.TAGS.aligned).includes(C.MSA))
453
+ return grok.shell.error('Splitting is applicable only for aligned sequences');
454
+
455
+ const tempDf = splitAlignedSequences(col);
456
+ const originalDf = col.dataFrame;
457
+ for (const tempCol of tempDf.columns) {
458
+ const newCol = originalDf.columns.add(tempCol);
459
+ newCol.semType = C.SEM_TYPES.MONOMER;
460
+ // newCol.setTag(DG.TAGS.CELL_RENDERER, C.SEM_TYPES.MONOMER);
461
+ newCol.setTag(C.TAGS.ALPHABET, col.getTag(C.TAGS.ALPHABET));
462
+ }
463
+ }
@@ -5,6 +5,7 @@ import * as ui from 'datagrok-api/ui';
5
5
  import * as DG from 'datagrok-api/dg';
6
6
  import {PositionInfo, PositionMonomerInfo, WebLogo} from '@datagrok-libraries/bio/src/viewers/web-logo';
7
7
  import {Column} from 'datagrok-api/dg';
8
+ import {ALPHABET, NOTATION, UnitsHandler} from '@datagrok-libraries/bio/src/utils/units-handler';
8
9
 
9
10
  category('WebLogo-positions', () => {
10
11
  let tvList: DG.TableView[];
@@ -35,9 +36,9 @@ ATC-G-TTGC--
35
36
  const tv: DG.TableView = grok.shell.addTableView(df);
36
37
 
37
38
  const seqCol: DG.Column = df.getCol('seq');
38
- seqCol.semType = 'Macromolecule';
39
- seqCol.setTag('units', 'fasta');
40
- seqCol.setTag('alphabet', 'DNA');
39
+ seqCol.semType = DG.SEMTYPE.MACROMOLECULE;
40
+ seqCol.setTag(DG.TAGS.UNITS, NOTATION.FASTA);
41
+ seqCol.setTag(UnitsHandler.TAGS.alphabet, ALPHABET.DNA);
41
42
 
42
43
  const wlViewer: WebLogo = await df.plot.fromType('WebLogo') as unknown as WebLogo;
43
44
  tv.dockManager.dock(wlViewer.root, DG.DOCK_TYPE.DOWN);
@@ -84,9 +85,9 @@ ATC-G-TTGC--
84
85
  const tv: DG.TableView = grok.shell.addTableView(df);
85
86
 
86
87
  const seqCol: DG.Column = df.getCol('seq');
87
- seqCol.semType = 'Macromolecule';
88
- seqCol.setTag('units', 'fasta');
89
- seqCol.setTag('alphabet', 'DNA');
88
+ seqCol.semType = DG.SEMTYPE.MACROMOLECULE;
89
+ seqCol.setTag(DG.TAGS.UNITS, NOTATION.FASTA);
90
+ seqCol.setTag(UnitsHandler.TAGS.alphabet, ALPHABET.DNA);
90
91
 
91
92
  df.filter.init((i) => {
92
93
  return i > 2;
@@ -129,9 +130,9 @@ ATC-G-TTGC--
129
130
  const tv: DG.TableView = grok.shell.addTableView(df);
130
131
 
131
132
  const seqCol: DG.Column = df.getCol('seq');
132
- seqCol.semType = 'Macromolecule';
133
- seqCol.setTag('units', 'fasta');
134
- seqCol.setTag('alphabet', 'DNA');
133
+ seqCol.semType = DG.SEMTYPE.MACROMOLECULE;
134
+ seqCol.setTag(DG.TAGS.UNITS, NOTATION.FASTA);
135
+ seqCol.setTag(UnitsHandler.TAGS.alphabet, ALPHABET.DNA);
135
136
 
136
137
  const wlViewer: WebLogo = await df.plot.fromType('WebLogo', {'skipEmptyPositions': true}) as unknown as WebLogo;
137
138
  tv.dockManager.dock(wlViewer.root, DG.DOCK_TYPE.DOWN);
@@ -0,0 +1,72 @@
1
+ import {after, before, category, test, expect, expectArray} from '@datagrok-libraries/utils/src/test';
2
+
3
+ import * as grok from 'datagrok-api/grok';
4
+ import * as ui from 'datagrok-api/ui';
5
+ import * as DG from 'datagrok-api/dg';
6
+
7
+ import {checkInputColumn, multipleSequenceAlignmentAny} from '../package';
8
+ import {UNITS} from 'datagrok-api/dg';
9
+ import {ALPHABET, UnitsHandler} from '@datagrok-libraries/bio/src/utils/units-handler';
10
+
11
+
12
+ category('checkInputColumn', () => {
13
+
14
+ const csv = `seq
15
+ seq1,
16
+ seq2,
17
+ seq3,
18
+ seq4`;
19
+
20
+ test('testMsaPos', async () => {
21
+ const func: DG.Func = DG.Func.find({package: 'Bio', name: 'multipleSequenceAlignmentAny'})[0];
22
+ const funcInputColumnProperty: DG.Property = func.inputs.find((i) => i.name == 'sequence')!;
23
+
24
+ let k = 11;
25
+
26
+ const df: DG.DataFrame = DG.DataFrame.fromCsv(csv);
27
+ const col: DG.Column = df.getCol('seq');
28
+ col.semType = DG.SEMTYPE.MACROMOLECULE;
29
+ col.setTag(DG.TAGS.UNITS, 'fasta');
30
+ col.setTag(UnitsHandler.TAGS.alphabet, ALPHABET.DNA);
31
+
32
+ const [res, msg]: [boolean, string] = checkInputColumn(
33
+ col, 'Test', ['fasta',], ['DNA', 'RNA', 'PT']);
34
+
35
+ expect(res, true);
36
+ });
37
+
38
+ test('testMsaNegHelm', async () => {
39
+ const df: DG.DataFrame = DG.DataFrame.fromCsv(csv);
40
+ const col: DG.Column = df.getCol('seq');
41
+ col.semType = DG.SEMTYPE.MACROMOLECULE;
42
+ col.setTag(DG.TAGS.UNITS, 'helm');
43
+ col.setTag(UnitsHandler.TAGS.alphabetSize, '11');
44
+ col.setTag(UnitsHandler.TAGS.alphabetIsMultichar, 'true');
45
+
46
+ const [res, msg]: [boolean, string] = checkInputColumn(
47
+ col, 'Test', ['fasta',], ['DNA', 'RNA', 'PT']);
48
+
49
+ expect(res, false);
50
+ });
51
+
52
+ test('testMsaNegUN', async () => {
53
+ const df: DG.DataFrame = DG.DataFrame.fromCsv(csv);
54
+ const col: DG.Column = df.getCol('seq');
55
+ col.semType = DG.SEMTYPE.MACROMOLECULE;
56
+ col.setTag(DG.TAGS.UNITS, 'fasta');
57
+ col.setTag(UnitsHandler.TAGS.alphabet, 'UN');
58
+ col.setTag(UnitsHandler.TAGS.alphabetSize, '11');
59
+ col.setTag(UnitsHandler.TAGS.alphabetIsMultichar, 'true');
60
+
61
+ const [res, msg]: [boolean, string] = checkInputColumn(
62
+ col, 'Test', ['fasta',], ['DNA', 'RNA', 'PT']);
63
+
64
+ expect(res, false);
65
+ });
66
+
67
+ test('testGetActionFunctionMeta', async () => {
68
+ const func: DG.Func = DG.Func.find({package: 'Bio', name: 'multipleSequenceAlignmentAny'})[0];
69
+ const sequenceInput: DG.Property = func.inputs.find((i) => i.name == 'sequence')!;
70
+ let k = 11;
71
+ });
72
+ });
@@ -5,7 +5,7 @@ import * as DG from 'datagrok-api/dg';
5
5
 
6
6
  import {ConverterFunc} from './types';
7
7
  import {NotationConverter} from '@datagrok-libraries/bio/src/utils/notation-converter';
8
- import {NOTATION} from '@datagrok-libraries/bio/src/utils/units-handler';
8
+ import {NOTATION, UnitsHandler} from '@datagrok-libraries/bio/src/utils/units-handler';
9
9
 
10
10
  // import {mmSemType} from '../const';
11
11
  // import {importFasta} from '../package';
@@ -145,14 +145,17 @@ RNA1{P.R(U)P.R(U)P.R(C)P.R(A)P.R(A)P.R(C)P.P.P}$$$
145
145
 
146
146
  async function _testConvert(srcKey: string, converter: ConverterFunc, tgtKey: string) {
147
147
  const srcDf: DG.DataFrame = await readCsv(srcKey);
148
- const srcCol: DG.Column = srcDf.col('seq')!;
148
+ const srcCol: DG.Column = srcDf.getCol('seq');
149
149
 
150
+ // conversion results
150
151
  const resCol: DG.Column = converter(srcCol);
151
152
 
153
+ // The correct reference data to compare conversion results with.
152
154
  const tgtDf: DG.DataFrame = await readCsv(tgtKey);
153
- const tgtCol: DG.Column = tgtDf.col('seq')!;
155
+ const tgtCol: DG.Column = tgtDf.getCol('seq');
154
156
 
155
157
  expectArray(resCol.toList(), tgtCol.toList());
158
+ const uh: UnitsHandler = new UnitsHandler(resCol);
156
159
  }
157
160
 
158
161
  // FASTA tests
@@ -400,10 +400,10 @@ export async function _testPos(
400
400
 
401
401
  expect(col.semType === DG.SEMTYPE.MACROMOLECULE, true);
402
402
  expect(col.getTag(DG.TAGS.UNITS), units);
403
- expect(col.getTag('aligned'), aligned);
404
- expect(col.getTag('alphabet'), alphabet);
403
+ expect(col.getTag(UnitsHandler.TAGS.aligned), aligned);
404
+ expect(col.getTag(UnitsHandler.TAGS.alphabet), alphabet);
405
405
  if (separator)
406
- expect(col.getTag('separator'), separator);
406
+ expect(col.getTag(UnitsHandler.TAGS.separator), separator);
407
407
 
408
408
  const uh = new UnitsHandler(col);
409
409
  expect(uh.getAlphabetSize(), alphabetSize);
@@ -3,9 +3,9 @@ import {after, before, category, delay, expect, test} from '@datagrok-libraries/
3
3
  import * as grok from 'datagrok-api/grok';
4
4
  import * as DG from 'datagrok-api/dg';
5
5
  import {importFasta, multipleSequenceAlignmentAny} from '../package';
6
- import {readDataframe} from './utils';
7
6
  import {convertDo} from '../utils/convert';
8
- import {NOTATION} from '@datagrok-libraries/bio/src/utils/units-handler';
7
+ import {ALPHABET, NOTATION, UnitsHandler} from '@datagrok-libraries/bio/src/utils/units-handler';
8
+ import {SEM_TYPES, TAGS} from '../utils/constants';
9
9
 
10
10
  category('renderers', () => {
11
11
  let tvList: DG.TableView[];
@@ -30,33 +30,45 @@ category('renderers', () => {
30
30
  await _testAfterConvert();
31
31
  });
32
32
 
33
+ test('setRenderer', async () => {
34
+ await _setRendererManually();
35
+ });
36
+
33
37
  async function _testAfterMsa() {
34
38
  const fastaTxt: string = await grok.dapi.files.readAsText('System:AppData/Bio/samples/sample_FASTA.fasta');
35
39
  const df: DG.DataFrame = importFasta(fastaTxt)[0];
40
+
41
+ const srcSeqCol: DG.Column = df.getCol('sequence');
42
+ const semType: string = await grok.functions.call('Bio:detectMacromolecule', {col: srcSeqCol});
43
+ if (semType)
44
+ srcSeqCol.semType = semType;
45
+
36
46
  const tv: DG.TableView = grok.shell.addTableView(df);
47
+ // call to calculate 'cell.renderer' tag
37
48
  await grok.data.detectSemanticTypes(df);
38
- console.log('Bio: tests/renderers/afterMsa, table view');
39
49
 
40
- const srcSeqCol: DG.Column | null = df.col('sequence');
41
- expect(srcSeqCol !== null, true);
50
+ console.log('Bio: tests/renderers/afterMsa, table view');
42
51
 
43
52
  console.log('Bio: tests/renderers/afterMsa, src before test ' +
44
53
  `semType="${srcSeqCol!.semType}", units="${srcSeqCol!.getTag(DG.TAGS.UNITS)}", ` +
45
- `cell.renderer="${srcSeqCol!.getTag('cell.renderer')}"`);
46
- expect(srcSeqCol!.semType, DG.SEMTYPE.MACROMOLECULE);
47
- expect(srcSeqCol!.getTag(DG.TAGS.UNITS), 'fasta');
48
- expect(srcSeqCol!.getTag('aligned'), 'SEQ');
49
- expect(srcSeqCol!.getTag('alphabet'), 'PT');
50
- expect(srcSeqCol!.getTag('cell.renderer'), 'sequence');
51
-
52
- const msaSeqCol: DG.Column | null = await multipleSequenceAlignmentAny(df, srcSeqCol!);
54
+ `cell.renderer="${srcSeqCol!.getTag(DG.TAGS.CELL_RENDERER)}"`);
55
+ expect(srcSeqCol.semType, DG.SEMTYPE.MACROMOLECULE);
56
+ expect(srcSeqCol.getTag(DG.TAGS.UNITS), NOTATION.FASTA);
57
+ expect(srcSeqCol.getTag(UnitsHandler.TAGS.aligned), 'SEQ');
58
+ expect(srcSeqCol.getTag(UnitsHandler.TAGS.alphabet), ALPHABET.PT);
59
+ expect(srcSeqCol.getTag(DG.TAGS.CELL_RENDERER), 'sequence');
60
+
61
+ const msaSeqCol: DG.Column = (await multipleSequenceAlignmentAny(df, srcSeqCol!))!;
53
62
  tv.grid.invalidate();
54
-
55
- expect(msaSeqCol!.semType, DG.SEMTYPE.MACROMOLECULE);
56
- expect(msaSeqCol!.getTag(DG.TAGS.UNITS), 'fasta');
57
- expect(msaSeqCol!.getTag('aligned'), 'SEQ.MSA');
58
- expect(msaSeqCol!.getTag('alphabet'), 'PT');
59
- expect(msaSeqCol!.getTag('cell.renderer'), 'sequence');
63
+
64
+ expect(msaSeqCol.semType, DG.SEMTYPE.MACROMOLECULE);
65
+ expect(msaSeqCol.getTag(DG.TAGS.UNITS), NOTATION.FASTA);
66
+ expect(msaSeqCol.getTag(UnitsHandler.TAGS.aligned), 'SEQ.MSA');
67
+ expect(msaSeqCol.getTag(UnitsHandler.TAGS.alphabet), ALPHABET.PT);
68
+ expect(msaSeqCol.getTag(DG.TAGS.CELL_RENDERER), 'sequence');
69
+
70
+ // check newColumn with UnitsHandler constructor
71
+ const uh: UnitsHandler = new UnitsHandler(msaSeqCol);
60
72
 
61
73
  dfList.push(df);
62
74
  tvList.push(tv);
@@ -66,13 +78,35 @@ category('renderers', () => {
66
78
  const csv: string = await grok.dapi.files.readAsText('System:AppData/Bio/samples/sample_FASTA_PT.csv');
67
79
  const df: DG.DataFrame = DG.DataFrame.fromCsv(csv);
68
80
  const tv: DG.TableView = grok.shell.addTableView(df);
69
- await grok.data.detectSemanticTypes(df);
70
81
 
71
82
  const srcCol: DG.Column = df.col('sequence')!;
83
+ // await grok.data.detectSemanticTypes(df);
84
+ const semType: string = await grok.functions.call('Bio:detectMacromolecule', {col: srcCol});
85
+ if (semType)
86
+ srcCol.semType = semType;
87
+ await grok.data.detectSemanticTypes(df);
88
+
72
89
  const tgtCol: DG.Column = await convertDo(srcCol, NOTATION.SEPARATOR, '/');
73
- expect(tgtCol.getTag('cell.renderer'), 'sequence');
90
+ expect(tgtCol.getTag(DG.TAGS.CELL_RENDERER), 'sequence');
91
+
92
+ // check tgtCol with UnitsHandler constructor
93
+ const uh: UnitsHandler = new UnitsHandler(tgtCol);
74
94
 
75
95
  tvList.push(tv);
76
96
  dfList.push(df);
77
- };
97
+ }
98
+
99
+ async function _setRendererManually() {
100
+ const df = DG.DataFrame.fromColumns([DG.Column.fromStrings(
101
+ 'SequencesDiff', ['meI/hHis/Aca/N/T/dK/Thr_PO3H2/Aca#D-Tyr_Et/Tyr_ab-dehydroMe/meN/E/N/dV'])]);
102
+ df.col('SequencesDiff')!.tags[DG.TAGS.UNITS] = 'separator';
103
+ df.col('SequencesDiff')!.tags[TAGS.SEPARATOR] = '/';
104
+ df.col('SequencesDiff')!.semType = SEM_TYPES.MACROMOLECULE_DIFFERENCE;
105
+ const tw = grok.shell.addTableView(df);
106
+ await delay(100);
107
+ const renderer = tw.dataFrame.col('SequencesDiff')?.getTag(DG.TAGS.CELL_RENDERER);
108
+ if (renderer !== 'MacromoleculeDifferenceCR')
109
+ throw new Error(`Units 'separator', separator '/' and semType 'MacromoleculeDifference' have been ` +
110
+ `manually set on column but after df aws added as table view renderer has been reset to '${renderer}'`);
111
+ }
78
112
  });
@@ -1,10 +1,15 @@
1
1
  import * as DG from 'datagrok-api/dg';
2
2
  import * as grok from 'datagrok-api/grok';
3
- import { expect } from '@datagrok-libraries/utils/src/test';
4
- import { sequenceSpaceTopMenu } from '../package';
3
+ import {expect} from '@datagrok-libraries/utils/src/test';
4
+ import {sequenceSpaceTopMenu} from '../package';
5
5
 
6
6
  export async function _testSequenceSpaceReturnsResult(df: DG.DataFrame, algorithm: string, colName: string) {
7
- await grok.data.detectSemanticTypes(df);
7
+ // await grok.data.detectSemanticTypes(df);
8
+ const col: DG.Column = df.getCol(colName);
9
+ const semType: string = await grok.functions.call('Bio:detectMacromolecule', {col: col});
10
+ if (semType)
11
+ col.semType = semType;
12
+
8
13
  const sp = await sequenceSpaceTopMenu(df, df.col(colName)!, algorithm, 'Levenshtein', true);
9
14
  expect(sp != null, true);
10
15
  }
@@ -4,6 +4,8 @@ import * as grok from 'datagrok-api/grok';
4
4
  import * as ui from 'datagrok-api/ui';
5
5
  import * as DG from 'datagrok-api/dg';
6
6
  import {WebLogo, SplitterFunc} from '@datagrok-libraries/bio/src/viewers/web-logo';
7
+ import {splitToMonomers, _package} from '../package';
8
+ import * as C from '../utils/constants';
7
9
 
8
10
  category('splitters', () => {
9
11
  const helm1 = 'PEPTIDE1{meI.hHis.Aca.N.T.dE.Thr_PO3H2.Aca.D-Tyr_Et.Tyr_ab-dehydroMe.dV.E.N.D-Orn.D-aThr.Phe_4Me}$$$';
@@ -51,6 +53,19 @@ category('splitters', () => {
51
53
  test('testHelm1', async () => { await _testHelmSplitter(data.testHelm1[0], data.testHelm1[1]); });
52
54
  test('testHelm2', async () => { await _testHelmSplitter(data.testHelm2[0], data.testHelm2[1]); });
53
55
  test('testHelm3', async () => { await _testHelmSplitter(data.testHelm3[0], data.testHelm3[1]); });
56
+
57
+ test('splitToMonomers', async () => {
58
+ const df: DG.DataFrame = await grok.dapi.files.readCsv('System:AppData/Bio/samples/sample_MSA.csv');
59
+
60
+ const seqCol = df.getCol('MSA');
61
+ const semType = await grok.functions.call('Bio:detectMacromolecule', {col: seqCol});
62
+ if (semType)
63
+ seqCol.semType = semType;
64
+ seqCol.setTag(C.TAGS.ALIGNED, C.MSA);
65
+
66
+ splitToMonomers(seqCol);
67
+ expect(df.columns.names().includes('17'), true);
68
+ });
54
69
  });
55
70
 
56
71
  export async function _testHelmSplitter(src: string, tgt: string[]) {
@@ -1,27 +1,22 @@
1
1
  import * as DG from 'datagrok-api/dg';
2
2
  import * as grok from 'datagrok-api/grok';
3
+ import {DataFrame} from 'datagrok-api/dg';
3
4
 
4
- export function generateManySequences(): string {
5
- let csvData = `MSA,Activity
6
- meI/hHis/Aca/N/T/dE/Thr_PO3H2/Aca/D-Tyr_Et/Tyr_ab-dehydroMe/dV/E/N/D-Orn/D-aThr//Phe_4Me,5.30751`;
7
- for (let i = 0; i < 10 ** 6; i++) {
8
- csvData += `\n meI/hHis/Aca/N/T/dE/Thr_PO3H2/Aca/D-Tyr_Et/Tyr_ab-dehydroMe/dV/E/N/D-Orn/D-aThr//Phe_4Me,5.30751`;
9
- }
10
- return csvData;
5
+ export function generateManySequences(): DG.Column[] {
6
+ let columns: DG.Column[] = [];
7
+ columns.push(DG.Column.fromList('string', 'MSA', new Array(10 ** 6).fill('meI/hHis/Aca/N/T/dE/Thr_PO3H2/Aca/D-Tyr_Et/Tyr_ab-dehydroMe/dV/E/N/D-Orn/D-aThr//Phe_4Me')));
8
+ columns.push(DG.Column.fromList('string', 'Activity', new Array(10 ** 6).fill('5.30751')));
9
+ return columns;
11
10
  }
12
11
 
13
- export function generateLongSequence(): string {
14
- let longSequence = `meI/hHis/Aca/N/T/dE/Thr_PO3H2/Aca/D-Tyr_Et/Tyr_ab-dehydroMe/dV/E/N/D-Orn/D-aThr`;
15
- for (let i = 0; i < 10 ** 5; i++) {
16
- longSequence += `/Aca/N/T/dE/Thr_PO3H2/Aca/D-Tyr_Et/Tyr_ab-dehydroMe/dV/dv`;
17
- }
18
- longSequence += `//Phe_4Me,5.30751`;
19
- let csvData = `MSA,Activity `;
20
- for (let i = 0; i <= 10 ** 1 * 4; i++) {
21
- csvData += `\n ${longSequence}`;
22
- }
23
- return csvData;
12
+ export function generateLongSequence(): DG.Column[] {
13
+ let columns: DG.Column[] = [];
14
+ const longSequence = `meI/hHis/Aca/N/T/dE/Thr_PO3H2/Aca/D-Tyr_Et/Tyr_ab-dehydroMe/dV/E/N/D-Orn/D-aThr`.repeat(10 ** 5);
15
+ columns.push(DG.Column.fromList('string', 'MSA', new Array(10 ** 2).fill(longSequence)));
16
+ columns.push(DG.Column.fromList('string', 'Activity', new Array(10 ** 2).fill('7.30751')));
17
+ return columns;
24
18
  }
19
+
25
20
  export function setTagsMacromolecule(col: DG.Column) {
26
21
  col.semType = DG.SEMTYPE.MACROMOLECULE;
27
22
  col.setTag('units', 'separator');
@@ -31,10 +26,10 @@ export function setTagsMacromolecule(col: DG.Column) {
31
26
  return col;
32
27
  }
33
28
 
34
- export function performanceTest(generateFunc: () => string,testName: string) {
29
+ export function performanceTest(generateFunc: () => DG.Column[], testName: string) {
30
+ const columns = generateFunc();
31
+ const df: DG.DataFrame = DG.DataFrame.fromColumns(columns);
35
32
  const startTime: number = Date.now();
36
- const csv = generateFunc();
37
- const df: DG.DataFrame = DG.DataFrame.fromCsv(csv);
38
33
  const col: DG.Column = df.columns.byName('MSA');
39
34
  setTagsMacromolecule(col);
40
35
  grok.shell.addTableView(df);