@datagrok/bio 2.0.5 → 2.0.7

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/package.json CHANGED
@@ -5,7 +5,7 @@
5
5
  "name": "Leonid Stolbov",
6
6
  "email": "lstolbov@datagrok.ai"
7
7
  },
8
- "version": "2.0.5",
8
+ "version": "2.0.7",
9
9
  "description": "Bio is a [package](https://datagrok.ai/help/develop/develop#packages) for the [Datagrok](https://datagrok.ai) platform",
10
10
  "repository": {
11
11
  "type": "git",
@@ -2,29 +2,24 @@ import * as grok from 'datagrok-api/grok';
2
2
  import * as ui from 'datagrok-api/ui';
3
3
  import * as DG from 'datagrok-api/dg';
4
4
 
5
+ import { getHelmMonomers } from '../package'
6
+
5
7
  const V2000_ATOM_NAME_POS = 31;
6
8
 
7
- export async function getFingerprints(mols: Array<string>, monomers: Array<string>): Promise<Uint8Array[]> {
8
- const mod = await grok.functions.call('Chem:getRdKitModule');
9
- const fps: Uint8Array[] = [];
9
+ export async function getMonomericMols(mcol: DG.Column, pattern: boolean = false): Promise<DG.Column> {
10
+ const monomers = getHelmMonomers(mcol);
11
+ let mols = await grok.functions.call('HELM:getMolFiles', {mcol: mcol});
10
12
 
11
13
  let dict = new Map();
12
14
  for(let i = 0; i < monomers.length; i++)
13
- dict.set(monomers[i], `R${Math.pow(10,(i + 1))}`);
14
-
15
- mols = changeToV3000(mols, dict);
15
+ dict.set(monomers[i], `${i + 1}`);
16
16
 
17
- for(let i = 0; i< mols.length; i++) {
18
- const mol = mod.get_mol(mols[i]);
19
- const fp = mol.get_pattern_fp_as_uint8array();
20
- fps.push(fp);
21
- mol?.delete();
22
- }
17
+ mols = changeToV3000(mols, dict, pattern);
23
18
 
24
- return fps;
19
+ return DG.Column.fromStrings('monomericMols', mols);
25
20
  }
26
21
 
27
- function changeToV3000(mols: Array<string>, dict: Map<string, string>): Array<string> {
22
+ function changeToV3000(mols: Array<string>, dict: Map<string, string>, pattern: boolean = false): Array<string> {
28
23
  for (let i = 0; i < mols.length; i++) {
29
24
  let curPos = 0;
30
25
  let endPos = 0;
@@ -50,7 +45,9 @@ M V30 BEGIN CTAB
50
45
  curPos = mol.indexOf('\n', curPos) + 1 + V2000_ATOM_NAME_POS;
51
46
  endPos = mol.indexOf(' ', curPos);
52
47
  const monomerName: string = mol.substring(curPos, endPos);
53
- molV3000 += `M V30 ${atomRowI + 1} ${dict.get(monomerName)} 0.000 0.000 0 0\n`;
48
+ molV3000 += pattern ?
49
+ `M V30 ${atomRowI + 1} R${dict.get(monomerName)} 0.000 0.000 0 0\n` :
50
+ `M V30 ${atomRowI + 1} At 0.000 0.000 0 0 MASS=${dict.get(monomerName)}\n`;
54
51
  }
55
52
 
56
53
  molV3000 += 'M V30 END ATOM\n';
package/src/package.ts CHANGED
@@ -32,7 +32,6 @@ import {
32
32
 
33
33
  import {splitAlignedSequences} from '@datagrok-libraries/bio/src/utils/splitter';
34
34
  import * as C from './utils/constants';
35
- import {getFingerprints} from './calculations/fingerprints';
36
35
 
37
36
  //tags: init
38
37
  export async function initBio() {
@@ -56,6 +55,16 @@ export function separatorSequenceCellRenderer(): MacromoleculeSequenceCellRender
56
55
  return new MacromoleculeSequenceCellRenderer();
57
56
  }
58
57
 
58
+ //name: MacromoleculeDifferenceCellRenderer
59
+ //tags: cellRenderer
60
+ //meta.cellType: MacromoleculeDifference
61
+ //meta.columnTags: quality=MacromoleculeDifference
62
+ //output: grid_cell_renderer result
63
+ export function macromoleculeDifferenceCellRenderer(): MacromoleculeDifferenceCellRenderer {
64
+ return new MacromoleculeDifferenceCellRenderer();
65
+ }
66
+
67
+
59
68
  function checkInputColumnUi(
60
69
  col: DG.Column, name: string, allowedNotations: string[] = [], allowedAlphabets: string[] = []
61
70
  ): boolean {
@@ -376,15 +385,6 @@ export function monomerCellRenderer(): MonomerCellRenderer {
376
385
  return new MonomerCellRenderer();
377
386
  }
378
387
 
379
- //name: MacromoleculeDifferenceCellRenderer
380
- //tags: cellRenderer
381
- //meta.cellType: MacromoleculeDifference
382
- //meta.columnTags: quality=MacromoleculeDifference
383
- //output: grid_cell_renderer result
384
- export function macromoleculeDifferenceCellRenderer(): MacromoleculeDifferenceCellRenderer {
385
- return new MacromoleculeDifferenceCellRenderer();
386
- }
387
-
388
388
  //name: testDetectMacromolecule
389
389
  //input: string path {choices: ['Demo:Files/', 'System:AppData/']}
390
390
  //output: dataframe result
@@ -460,11 +460,3 @@ export function getHelmMonomers(seqCol: DG.Column<string>): string[] {
460
460
  const stats = WebLogo.getStats(seqCol, 1, WebLogo.splitterAsHelm);
461
461
  return Object.keys(stats.freq);
462
462
  }
463
-
464
- export async function macromoleculesFingerprints(mcol: DG.Column): Promise<Uint8Array[]> {
465
- grok.functions.call('Chem:getRdKitModule');
466
- const monomers = getHelmMonomers(mcol);
467
- const mols = await grok.functions.call('HELM:getMolFiles', {mcol: mcol});
468
-
469
- return getFingerprints(mols.toList(), monomers);
470
- }
@@ -31,6 +31,18 @@ category('renderers', () => {
31
31
  performanceTest(generateManySequences, 'Many sequences');
32
32
  });
33
33
 
34
+ test('rendererMacromoleculeFasta', async () => {
35
+ await _rendererMacromoleculeFasta();
36
+ });
37
+
38
+ test('rendererMacromoleculeSeparator', async () => {
39
+ await _rendererMacromoleculeSeparator();
40
+ });
41
+
42
+ test('rendererMacromoleculeDifference', async () => {
43
+ await _rendererMacromoleculeDifference();
44
+ });
45
+
34
46
  test('afterMsa', async () => {
35
47
  await _testAfterMsa();
36
48
  });
@@ -39,10 +51,73 @@ category('renderers', () => {
39
51
  await _testAfterConvert();
40
52
  });
41
53
 
42
- test('setRenderer', async () => {
54
+ test('selectRendererBySemType', async () => {
55
+ await _selectRendererBySemType();
56
+ });
57
+
58
+ test('setRendererManually', async () => {
43
59
  await _setRendererManually();
44
60
  });
45
61
 
62
+ async function _rendererMacromoleculeFasta() {
63
+ const csv: string = await grok.dapi.files.readAsText('System:AppData/Bio/samples/sample_FASTA.csv');
64
+ const df: DG.DataFrame = DG.DataFrame.fromCsv(csv);
65
+
66
+ const seqCol = df.getCol('Sequence');
67
+ const semType: string = await grok.functions.call('Bio:detectMacromolecule', {col: seqCol});
68
+ if (semType)
69
+ seqCol.semType = semType;
70
+
71
+ const tv: DG.TableView = grok.shell.addTableView(df);
72
+ // call to calculate 'cell.renderer' tag
73
+ await grok.data.detectSemanticTypes(df);
74
+
75
+ dfList.push(df);
76
+ tvList.push(tv);
77
+
78
+ const resCellRenderer = seqCol.getTag(DG.TAGS.CELL_RENDERER);
79
+ expect(resCellRenderer, 'sequence');
80
+ }
81
+
82
+ async function _rendererMacromoleculeSeparator() {
83
+ const csv: string = await grok.dapi.files.readAsText('System:AppData/Bio/samples/sample_SEPARATOR_PT.csv');
84
+ const df: DG.DataFrame = DG.DataFrame.fromCsv(csv);
85
+
86
+ const seqCol = df.getCol('sequence');
87
+ const semType: string = await grok.functions.call('Bio:detectMacromolecule', {col: seqCol});
88
+ if (semType)
89
+ seqCol.semType = semType;
90
+
91
+ const tv: DG.TableView = grok.shell.addTableView(df);
92
+ // call to calculate 'cell.renderer' tag
93
+ await grok.data.detectSemanticTypes(df);
94
+
95
+ dfList.push(df);
96
+ tvList.push(tv);
97
+
98
+ const resCellRenderer = seqCol.getTag(DG.TAGS.CELL_RENDERER);
99
+ expect(resCellRenderer, 'sequence');
100
+ }
101
+
102
+ async function _rendererMacromoleculeDifference() {
103
+ const seqDiffCol: DG.Column = DG.Column.fromStrings('SequencesDiff',
104
+ ['meI/hHis/Aca/N/T/dK/Thr_PO3H2/Aca#D-Tyr_Et/Tyr_ab-dehydroMe/meN/E/N/dV']);
105
+ seqDiffCol.tags[DG.TAGS.UNITS] = 'separator';
106
+ seqDiffCol.tags[TAGS.SEPARATOR] = '/';
107
+ seqDiffCol.semType = SEM_TYPES.MACROMOLECULE_DIFFERENCE;
108
+ const df = DG.DataFrame.fromColumns([seqDiffCol]);
109
+
110
+ const tv: DG.TableView = grok.shell.addTableView(df);
111
+ // call to calculate 'cell.renderer' tag
112
+ await grok.data.detectSemanticTypes(df);
113
+
114
+ dfList.push(df);
115
+ tvList.push(tv);
116
+
117
+ const resCellRenderer = seqDiffCol.getTag(DG.TAGS.CELL_RENDERER);
118
+ expect(resCellRenderer, 'MacromoleculeDifference');
119
+ }
120
+
46
121
  async function _testAfterMsa() {
47
122
  const fastaTxt: string = await grok.dapi.files.readAsText('System:AppData/Bio/samples/sample_FASTA.fasta');
48
123
  const df: DG.DataFrame = importFasta(fastaTxt)[0];
@@ -86,36 +161,69 @@ category('renderers', () => {
86
161
  async function _testAfterConvert() {
87
162
  const csv: string = await grok.dapi.files.readAsText('System:AppData/Bio/samples/sample_FASTA_PT.csv');
88
163
  const df: DG.DataFrame = DG.DataFrame.fromCsv(csv);
89
- const tv: DG.TableView = grok.shell.addTableView(df);
90
164
 
91
165
  const srcCol: DG.Column = df.col('sequence')!;
92
- // await grok.data.detectSemanticTypes(df);
93
166
  const semType: string = await grok.functions.call('Bio:detectMacromolecule', {col: srcCol});
94
167
  if (semType)
95
168
  srcCol.semType = semType;
169
+
170
+ const tv: DG.TableView = grok.shell.addTableView(df);
171
+ // call to calculate 'cell.renderer' tag
96
172
  await grok.data.detectSemanticTypes(df);
97
173
 
174
+ tvList.push(tv);
175
+ dfList.push(df);
176
+
98
177
  const tgtCol: DG.Column = await convertDo(srcCol, NOTATION.SEPARATOR, '/');
99
- expect(tgtCol.getTag(DG.TAGS.CELL_RENDERER), 'sequence');
178
+
179
+ const resCellRenderer = tgtCol.getTag(DG.TAGS.CELL_RENDERER);
180
+ expect(resCellRenderer, 'sequence');
100
181
 
101
182
  // check tgtCol with UnitsHandler constructor
102
183
  const uh: UnitsHandler = new UnitsHandler(tgtCol);
184
+ }
103
185
 
104
- tvList.push(tv);
186
+ async function _selectRendererBySemType() {
187
+ /* There are renderers for semType Macromolecule and MacromoleculeDifference.
188
+ Misbehavior was by selecting Macromolecule renderers for MacromoleculeDifference semType column
189
+ /**/
190
+ const seqDiffCol: DG.Column = DG.Column.fromStrings('SequencesDiff',
191
+ ['meI/hHis/Aca/N/T/dK/Thr_PO3H2/Aca#D-Tyr_Et/Tyr_ab-dehydroMe/meN/E/N/dV']);
192
+ seqDiffCol.tags[DG.TAGS.UNITS] = 'separator';
193
+ seqDiffCol.tags[TAGS.SEPARATOR] = '/';
194
+ seqDiffCol.semType = SEM_TYPES.MACROMOLECULE_DIFFERENCE;
195
+ const df = DG.DataFrame.fromColumns([seqDiffCol]);
196
+ const tv = grok.shell.addTableView(df);
105
197
  dfList.push(df);
198
+ tvList.push(tv);
199
+
200
+ await delay(100);
201
+ const renderer = seqDiffCol.getTag(DG.TAGS.CELL_RENDERER);
202
+ if (renderer !== 'MacromoleculeDifference') // this is value of MacromoleculeDifferenceCR.cellType
203
+ throw new Error(`Units 'separator', separator '/' and semType 'MacromoleculeDifference' ` +
204
+ `have been manually set on column but after df was added as table, ` +
205
+ `view renderer has set to '${renderer}' instead of correct 'MacromoleculeDifference'.`);
106
206
  }
107
207
 
108
208
  async function _setRendererManually() {
109
- const df = DG.DataFrame.fromColumns([DG.Column.fromStrings(
110
- 'SequencesDiff', ['meI/hHis/Aca/N/T/dK/Thr_PO3H2/Aca#D-Tyr_Et/Tyr_ab-dehydroMe/meN/E/N/dV'])]);
111
- df.col('SequencesDiff')!.tags[DG.TAGS.UNITS] = 'separator';
112
- df.col('SequencesDiff')!.tags[TAGS.SEPARATOR] = '/';
113
- df.col('SequencesDiff')!.semType = SEM_TYPES.MACROMOLECULE_DIFFERENCE;
114
- const tw = grok.shell.addTableView(df);
209
+ const seqDiffCol: DG.Column = DG.Column.fromStrings('SequencesDiff',
210
+ ['meI/hHis/Aca/N/T/dK/Thr_PO3H2/Aca#D-Tyr_Et/Tyr_ab-dehydroMe/meN/E/N/dV']);
211
+ seqDiffCol.tags[DG.TAGS.UNITS] = 'separator';
212
+ seqDiffCol.tags[TAGS.SEPARATOR] = '/';
213
+ seqDiffCol.semType = SEM_TYPES.MACROMOLECULE;
214
+ const tgtCellRenderer = 'MacromoleculeDifference';
215
+ seqDiffCol.setTag(DG.TAGS.CELL_RENDERER, tgtCellRenderer);
216
+ const df = DG.DataFrame.fromColumns([seqDiffCol]);
217
+ await grok.data.detectSemanticTypes(df);
218
+ const tv = grok.shell.addTableView(df);
219
+ dfList.push(df);
220
+ tvList.push(tv);
221
+
115
222
  await delay(100);
116
- const renderer = tw.dataFrame.col('SequencesDiff')?.getTag(DG.TAGS.CELL_RENDERER);
117
- if (renderer !== 'MacromoleculeDifference') // this is value of MacromoleculeDifferenceCR.cellType
118
- throw new Error(`Units 'separator', separator '/' and semType 'MacromoleculeDifference' have been ` +
119
- `manually set on column but after df aws added as table view renderer has been reset to '${renderer}'`);
223
+ const resCellRenderer = seqDiffCol.getTag(DG.TAGS.CELL_RENDERER);
224
+ if (resCellRenderer !== tgtCellRenderer) // this is value of MacromoleculeDifferenceCR.cellType
225
+ throw new Error(`Tag 'cell.renderer' has been manually set to '${tgtCellRenderer}' for column ` +
226
+ `but after df was added as table, tag 'cell.renderer' has reset to '${resCellRenderer}' ` +
227
+ `instead of manual '${tgtCellRenderer}'.`);
120
228
  }
121
229
  });
@@ -1,10 +1,10 @@
1
- import {after, before, category, test, expect, expectArray} from '@datagrok-libraries/utils/src/test';
1
+ import {after, before, category, test, expect, expectArray, expectObject} from '@datagrok-libraries/utils/src/test';
2
2
 
3
3
  import * as grok from 'datagrok-api/grok';
4
4
  import * as ui from 'datagrok-api/ui';
5
5
  import * as DG from 'datagrok-api/dg';
6
6
  import {WebLogo, SplitterFunc} from '@datagrok-libraries/bio/src/viewers/web-logo';
7
- import {splitToMonomers, _package} from '../package';
7
+ import {splitToMonomers, _package, getHelmMonomers} from '../package';
8
8
  import * as C from '../utils/constants';
9
9
 
10
10
  category('splitters', () => {
@@ -87,6 +87,30 @@ category('splitters', () => {
87
87
  splitToMonomers(seqCol);
88
88
  expect(df.columns.names().includes('17'), true);
89
89
  });
90
+
91
+ test('getHelmMonomers', async () => {
92
+ const df: DG.DataFrame = DG.DataFrame.fromCsv(
93
+ `HELM,Activity
94
+ PEPTIDE1{hHis.N.T}$$$,5.30751
95
+ PEPTIDE1{hHis.Aca.Cys_SEt}$$$,5.72388
96
+ `);
97
+ const expectedMonomerList = ['hHis', 'Aca', 'Cys_SEt', 'N', 'T'];
98
+
99
+ const helmCol: DG.Column = df.getCol('HELM');
100
+ const res = getHelmMonomers(helmCol);
101
+
102
+ const missed = expectedMonomerList.filter((m) => !res.includes(m));
103
+ const unexpected = res.filter((m) => !expectedMonomerList.includes(m));
104
+ if (missed.length > 0 || unexpected.length) {
105
+ const msgs = [];
106
+ if (missed.length > 0)
107
+ msgs.push(`Missed monomers ${JSON.stringify(missed)}.`);
108
+ if (unexpected.length > 0)
109
+ msgs.push(`Unexpected monomers ${JSON.stringify(unexpected)}.`);
110
+
111
+ throw new Error(msgs.join(' '));
112
+ }
113
+ });
90
114
  });
91
115
 
92
116
  export async function _testHelmSplitter(src: string, tgt: string[]) {
@@ -79,6 +79,7 @@ export function convert(col: DG.Column): void {
79
79
  }
80
80
  }
81
81
 
82
+ /** Creates a new column with converted sequences and detects its semantic type */
82
83
  export async function convertDo(
83
84
  srcCol: DG.Column, targetNotation: NOTATION, separator: string | null
84
85
  ): Promise<DG.Column> {