@datagrok/bio 1.7.2 → 1.7.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,6 +1,13 @@
1
1
  import * as DG from 'datagrok-api/dg';
2
2
  import * as ui from 'datagrok-api/ui';
3
- import {NotationConverter, NOTATION} from './notation-converter';
3
+ import * as grok from 'datagrok-api/grok';
4
+
5
+ import {Subscription} from 'rxjs';
6
+ import {NotationConverter, NOTATION} from '@datagrok-libraries/bio/src/utils/notation-converter';
7
+
8
+
9
+ let convertDialog: DG.Dialog | null = null;
10
+ let convertDialogSubs: Subscription[] = [];
4
11
 
5
12
  /**
6
13
  * Converts notations of a Macromolecule column
@@ -22,19 +29,35 @@ export function convert(col: DG.Column): void {
22
29
 
23
30
  const separatorInput = ui.choiceInput('Choose separator', separatorArray[0], separatorArray);
24
31
 
25
- ui.dialog('Convert sequence notation')
26
- .add(ui.div([
27
- ui.h1('Current notation: ' + current),
28
- targetNotationInput.root,
29
- // TODO: conditional separator input
30
- separatorInput.root
31
- ]))
32
- .onOK(() => {
33
- //TODO: create new converted column
34
- const targetNotation = targetNotationInput.value as NOTATION;
35
- const separator = separatorInput.value!;
36
- const newColumn = converter.convert(targetNotation, separator);
37
- col.dataFrame.columns.add(newColumn);
38
- })
39
- .show();
32
+ if (convertDialog == null) {
33
+ convertDialog = ui.dialog('Convert sequence notation')
34
+ .add(ui.div([
35
+ ui.h1('Current notation: ' + current),
36
+ targetNotationInput.root,
37
+ // TODO: conditional separator input
38
+ separatorInput.root
39
+ ]))
40
+ .onOK(async () => {
41
+ const targetNotation = targetNotationInput.value as NOTATION;
42
+ const separator: string | null = separatorInput.value;
43
+
44
+ await convertDo(col, targetNotation, separator);
45
+ })
46
+ .show();
47
+
48
+ convertDialogSubs.push(convertDialog.onClose.subscribe((value) => {
49
+ convertDialogSubs.forEach((s) => {s.unsubscribe(); });
50
+ convertDialogSubs = [];
51
+ convertDialog = null;
52
+ }));
53
+ }
54
+ }
55
+
56
+ export async function convertDo(srcCol: DG.Column, targetNotation: NOTATION, separator: string | null): Promise<DG.Column> {
57
+ const converter = new NotationConverter(srcCol);
58
+ const newColumn = converter.convert(targetNotation, separator);
59
+ srcCol.dataFrame.columns.add(newColumn);
60
+ await grok.data.detectSemanticTypes(srcCol.dataFrame);
61
+ return newColumn;
40
62
  }
63
+
@@ -1,6 +1,7 @@
1
1
  import * as DG from 'datagrok-api/dg';
2
2
  import {WebLogo, SplitterFunc} from '@datagrok-libraries/bio/src/viewers/web-logo';
3
3
  import * as grok from 'datagrok-api/grok';
4
+ import { CAP_GROUP_NAME, CAP_GROUP_SMILES, jsonSdfMonomerLibDict, MONOMER_SYMBOL, RGROUP_ALTER_ID, RGROUP_FIELD, RGROUP_LABEL, SDF_MONOMER_NAME } from '../const';
4
5
 
5
6
  export const HELM_CORE_LIB_FILENAME = '/samples/HELMCoreLibrary.json';
6
7
  export const HELM_CORE_LIB_MONOMER_SYMBOL = 'symbol';
@@ -43,3 +44,37 @@ export function createMomomersMolDict(lib: any[]): { [key: string]: string | any
43
44
  });
44
45
  return dict;
45
46
  }
47
+
48
+
49
+ export function createJsonMonomerLibFromSdf(table: DG.DataFrame): any {
50
+ const resultLib = [];
51
+ for (let i = 0; i < table.rowCount; i++) {
52
+ const monomer: { [key: string]: string | any } = {};
53
+ Object.keys(jsonSdfMonomerLibDict).forEach(key => {
54
+ if (key === MONOMER_SYMBOL) {
55
+ const monomerSymbol = table.get(jsonSdfMonomerLibDict[key], i);
56
+ monomer[key] = monomerSymbol === '.' ? table.get(SDF_MONOMER_NAME, i) : monomerSymbol;
57
+ } else if (key === RGROUP_FIELD) {
58
+ const rgroups = table.get(jsonSdfMonomerLibDict[key], i).split('\n');
59
+ const jsonRgroups: any[] = [];
60
+ rgroups.forEach((g: string) => {
61
+ const rgroup: { [key: string]: string | any } = {};
62
+ const altAtom = g.substring(g.lastIndexOf("]") + 1);
63
+ let radicalNum = g.match(/\[R(\d+)\]/)![1];
64
+ rgroup[CAP_GROUP_SMILES] = altAtom === 'H' ? `[*:${radicalNum}][H]` : `O[*:${radicalNum}]`;
65
+ rgroup[RGROUP_ALTER_ID] = altAtom === 'H' ? `R${radicalNum}-H` : `R${radicalNum}-OH`;
66
+ rgroup[CAP_GROUP_NAME] = altAtom === 'H' ? `H` : `OH`;
67
+ rgroup[RGROUP_LABEL] = `R${radicalNum}`;
68
+ jsonRgroups.push(rgroup);
69
+ })
70
+ monomer[key] = jsonRgroups;
71
+ } else {
72
+ if((jsonSdfMonomerLibDict as { [key: string]: string | any })[key]) {
73
+ monomer[key] = table.get((jsonSdfMonomerLibDict as { [key: string]: string | any })[key], i);
74
+ }
75
+ }
76
+ })
77
+ resultLib.push(monomer);
78
+ }
79
+ return resultLib;
80
+ }
@@ -1,4 +1,4 @@
1
- <html><head><meta charset="utf-8"/><title>Bio Test Report. Datagrok version datagrok/datagrok:latest SHA=34f75e5127b8. Commit ac96da52.</title><style type="text/css">html,
1
+ <html><head><meta charset="utf-8"/><title>Bio Test Report. Datagrok version datagrok/datagrok:latest SHA=34f75e5127b8. Commit 0507068d.</title><style type="text/css">html,
2
2
  body {
3
3
  font-family: Arial, Helvetica, sans-serif;
4
4
  font-size: 1rem;
@@ -229,7 +229,7 @@ header {
229
229
  font-size: 1rem;
230
230
  padding: 0 0.5rem;
231
231
  }
232
- </style></head><body><div id="jesthtml-content"><header><h1 id="title">Bio Test Report. Datagrok version datagrok/datagrok:latest SHA=34f75e5127b8. Commit ac96da52.</h1></header><div id="metadata-container"><div id="timestamp">Started: 2022-07-13 09:24:48</div><div id="summary"><div id="suite-summary"><div class="summary-total">Suites (1)</div><div class="summary-passed summary-empty">0 passed</div><div class="summary-failed">1 failed</div><div class="summary-pending summary-empty">0 pending</div></div><div id="test-summary"><div class="summary-total">Tests (1)</div><div class="summary-passed summary-empty">0 passed</div><div class="summary-failed">1 failed</div><div class="summary-pending summary-empty">0 pending</div></div></div></div><div id="suite-1" class="suite-container"><div class="suite-info"><div class="suite-path">/home/runner/work/public/public/packages/Bio/src/__jest__/remote.test.ts</div><div class="suite-time warn">110.354s</div></div><div class="suite-tests"><div class="test-result failed"><div class="test-info"><div class="test-suitename"> </div><div class="test-title">TEST</div><div class="test-status">failed</div><div class="test-duration">100.001s</div></div><div class="failureMessages"> <pre class="failureMsg">Error: thrown: "Exceeded timeout of 100000 ms for a test.
232
+ </style></head><body><div id="jesthtml-content"><header><h1 id="title">Bio Test Report. Datagrok version datagrok/datagrok:latest SHA=34f75e5127b8. Commit 0507068d.</h1></header><div id="metadata-container"><div id="timestamp">Started: 2022-07-14 14:09:37</div><div id="summary"><div id="suite-summary"><div class="summary-total">Suites (1)</div><div class="summary-passed summary-empty">0 passed</div><div class="summary-failed">1 failed</div><div class="summary-pending summary-empty">0 pending</div></div><div id="test-summary"><div class="summary-total">Tests (1)</div><div class="summary-passed summary-empty">0 passed</div><div class="summary-failed">1 failed</div><div class="summary-pending summary-empty">0 pending</div></div></div></div><div id="suite-1" class="suite-container"><div class="suite-info"><div class="suite-path">/home/runner/work/public/public/packages/Bio/src/__jest__/remote.test.ts</div><div class="suite-time warn">110.386s</div></div><div class="suite-tests"><div class="test-result failed"><div class="test-info"><div class="test-suitename"> </div><div class="test-title">TEST</div><div class="test-status">failed</div><div class="test-duration">100.001s</div></div><div class="failureMessages"> <pre class="failureMsg">Error: thrown: "Exceeded timeout of 100000 ms for a test.
233
233
  Use jest.setTimeout(newTimeout) to increase the timeout value, if this is a long-running test."
234
234
  at Object.&lt;anonymous&gt; (/home/runner/work/public/public/packages/Bio/src/__jest__/remote.test.ts:22:1)
235
235
  at Runtime._execModule (/home/runner/work/public/public/packages/Bio/node_modules/jest-runtime/build/index.js:1646:24)
@@ -1,22 +0,0 @@
1
- import {after, before, category, test, expect, expectObject} from '@datagrok-libraries/utils/src/test';
2
-
3
- import * as grok from 'datagrok-api/grok';
4
- import * as ui from 'datagrok-api/ui';
5
- import * as DG from 'datagrok-api/dg';
6
- import {WebLogo, SplitterFunc} from '@datagrok-libraries/bio/src/viewers/web-logo';
7
-
8
- category('splitter', () => {
9
- const helm1 = 'PEPTIDE1{meI.hHis.Aca.N.T.dE.Thr_PO3H2.Aca.D-Tyr_Et.Tyr_ab-dehydroMe.dV.E.N.D-Orn.D-aThr.Phe_4Me}$$$';
10
- const helm2 = 'PEPTIDE1{meI.hHis.Hcy.Q.T.W.Q.Phe_4NH2.D-Tyr_Et.Tyr_ab-dehydroMe.dV.E.N.N.meK}$$$';
11
-
12
- test('helm1', async () => { await _testHelmSplitter(helm1); });
13
- test('helm2', async () => { await _testHelmSplitter(helm2); });
14
- });
15
-
16
- export async function _testHelmSplitter(txt: string) {
17
- // const splitter: SplitterFunc = WebLogo.getSplitterAsHelm();
18
- //
19
- // const mList: string[] = splitter(txt);
20
- // expect(mList.length, 12);
21
- }
22
-
@@ -1,280 +0,0 @@
1
- import * as DG from 'datagrok-api/dg';
2
- import {WebLogo} from '@datagrok-libraries/bio/src/viewers/web-logo';
3
-
4
- /** enum type to simplify setting "user-friendly" notation if necessary */
5
- export const enum NOTATION {
6
- FASTA = 'FASTA',
7
- SEPARATOR = 'SEPARATOR',
8
- HELM = 'HELM'
9
- }
10
-
11
- /** Class for handling conversion of notation systems in Macromolecule columns */
12
- export class NotationConverter {
13
- private _sourceColumn: DG.Column; // the column to be converted
14
- private _sourceUnits: string; // units, of the form fasta:SEQ:NT, etc.
15
- private _sourceNotation: NOTATION; // current notation (without :SEQ:NT, etc.)
16
- private _defaultGapSymbol: string;
17
- private _defaultGapSymbolsDict = {
18
- helm: '*',
19
- separator: '',
20
- fasta: '-',
21
- };
22
-
23
- private get sourceUnits(): string { return this._sourceUnits; }
24
-
25
- private get sourceColumn(): DG.Column { return this._sourceColumn; }
26
-
27
- public get sourceNotation(): NOTATION { return this._sourceNotation; }
28
-
29
- public get defaultGapSymbol(): string { return this._defaultGapSymbol; }
30
-
31
- public get separator(): string {
32
- const separator = this.sourceColumn.getTag('separator');
33
- if (separator !== null)
34
- return separator;
35
- else
36
- throw new Error('Separator not set');
37
- }
38
-
39
- public isFasta(): boolean { return this.sourceNotation === NOTATION.FASTA; }
40
-
41
- public isSeparator(): boolean { return this.sourceNotation === NOTATION.SEPARATOR; }
42
-
43
- public isHelm(): boolean { return this.sourceNotation === NOTATION.HELM; }
44
-
45
- public toFasta(targetNotation: NOTATION): boolean { return targetNotation === NOTATION.FASTA; }
46
-
47
- public toSeparator(targetNotation: NOTATION): boolean { return targetNotation === NOTATION.SEPARATOR; }
48
-
49
- public toHelm(targetNotation: NOTATION): boolean { return targetNotation === NOTATION.HELM; }
50
-
51
- public isRna(): boolean { return this.sourceUnits.toLowerCase().endsWith('rna'); }
52
-
53
- public isDna(): boolean { return this.sourceUnits.toLowerCase().endsWith('dna'); }
54
-
55
- public isPeptide(): boolean { return this.sourceUnits.toLowerCase().endsWith('pt'); }
56
-
57
- /** Associate notation types with the corresponding units */
58
- /**
59
- * @return {NOTATION} Notation associated with the units type
60
- */
61
- private getSourceNotation(): NOTATION {
62
- if (this.sourceUnits.toLowerCase().startsWith('fasta'))
63
- return NOTATION.FASTA;
64
- else if (this.sourceUnits.toLowerCase().startsWith('separator'))
65
- return NOTATION.SEPARATOR;
66
- else if (this.sourceUnits.toLowerCase().startsWith('helm'))
67
- return NOTATION.HELM;
68
- else
69
- throw new Error('The column has units that do not correspond to any notation');
70
- }
71
-
72
- /**
73
- * Create a new empty column of the specified notation type and the same
74
- * length as sourceColumn
75
- *
76
- * @param {NOTATION} targetNotation
77
- * @return {DG.Column}
78
- */
79
- private getNewColumn(targetNotation: NOTATION): DG.Column {
80
- const col = this.sourceColumn;
81
- const len = col.length;
82
- const name = targetNotation + '(' + col.name + ')';
83
- const newColName = col.dataFrame.columns.getUnusedName(name);
84
- // dummy code
85
- const newColumn = DG.Column.fromList('string', newColName, new Array(len).fill(''));
86
- newColumn.semType = DG.SEMTYPE.MACROMOLECULE;
87
- newColumn.setTag(
88
- DG.TAGS.UNITS,
89
- this.sourceUnits.replace(
90
- this.sourceNotation.toLowerCase().toString(),
91
- targetNotation.toLowerCase().toString()
92
- )
93
- );
94
- // TODO: specify cell renderers for all cases
95
- if (this.toFasta(targetNotation)) {
96
- newColumn.setTag(
97
- DG.TAGS.CELL_RENDERER,
98
- // TODO: replace by the enumeration value
99
- 'Macromolecule');
100
- }
101
- return newColumn;
102
- }
103
-
104
- /**
105
- * Convert a Macromolecule column from FASTA to SEPARATOR notation
106
- *
107
- * @param {string} separator A specific separator to be used
108
- * @param {string} gapSymbol Gap symbol in FASTA, '-' by default
109
- * @return {DG.Column} A new column in SEPARATOR notation
110
- */
111
- private convertFastaToSeparator(separator: string, gapSymbol: string = '-'): DG.Column {
112
- // a function splitting FASTA sequence into an array of monomers:
113
- const splitterAsFasta = WebLogo.splitterAsFasta;
114
-
115
- const newColumn = this.getNewColumn(NOTATION.SEPARATOR);
116
- // assign the values to the newly created empty column
117
- newColumn.init((idx: number) => {
118
- const fastaPolymer = this.sourceColumn.get(idx);
119
- const fastaMonomersArray = splitterAsFasta(fastaPolymer);
120
- for (let i = 0; i < fastaMonomersArray.length; i++) {
121
- if (fastaMonomersArray[i] === gapSymbol)
122
- fastaMonomersArray[i] = '';
123
- }
124
- return fastaMonomersArray.join(separator);
125
- });
126
- return newColumn;
127
- }
128
-
129
- private convertToHelm(sourceGapSymbol: string | null = null) {
130
- if (sourceGapSymbol === null)
131
- sourceGapSymbol = this.defaultGapSymbol;
132
- // A function splitting a sequence into an array of monomers according to
133
- // its notation
134
- const splitter = WebLogo.getSplitterForColumn(this.sourceColumn);
135
-
136
- const prefix = (this.isDna()) ? 'DNA1{' :
137
- (this.isRna()) ? 'RNA1{' :
138
- (this.isPeptide()) ? 'PEPTIDE1{' :
139
- 'Unknown'; // this case should be handled as exceptional
140
-
141
- if (prefix === 'Unknown')
142
- throw new Error('Neither peptide, nor nucleotide');
143
-
144
- const postfix = '}$$$';
145
- const leftWrapper = (this.isDna()) ? 'D(' :
146
- (this.isRna()) ? 'R(' : ''; // no wrapper for peptides
147
- const rightWrapper = (this.isDna() || this.isRna()) ? ')P' : ''; // no wrapper for peptides
148
-
149
- const newColumn = this.getNewColumn(NOTATION.HELM);
150
- // assign the values to the empty column
151
- newColumn.init((idx: number) => {
152
- const sourcePolymer = this.sourceColumn.get(idx);
153
- const sourceMonomersArray = splitter(sourcePolymer);
154
- const helmArray = [prefix];
155
- let firstIteration = true;
156
- for (let i = 0; i < sourceMonomersArray.length; i++) {
157
- const dot = firstIteration ? '' : '.';
158
- let token = sourceMonomersArray[i];
159
- if (token === sourceGapSymbol)
160
- token = this._defaultGapSymbolsDict.helm;
161
- const item = [dot, leftWrapper, token, rightWrapper];
162
- helmArray.push(item.join(''));
163
- firstIteration = false;
164
- }
165
- helmArray.push(postfix);
166
- return helmArray.join('');
167
- });
168
- return newColumn;
169
- }
170
-
171
- private handleSeparatorItemForFasta(
172
- idx: number,
173
- separatorItemsArray: string[],
174
- separator: string,
175
- gapSymbol: string,
176
- fastaMonomersArray: string[]
177
- ): void {
178
- const item = separatorItemsArray[idx];
179
- if (item.length > 1) {
180
- // the case of a multi-character monomer
181
- const monomer = '[' + item + ']';
182
- fastaMonomersArray.push(monomer);
183
- }
184
- if (item === separator) {
185
- if (idx !== 0 && separatorItemsArray[idx - 1] === separator)
186
- fastaMonomersArray.push(gapSymbol);
187
- }
188
- }
189
-
190
- private convertSeparatorToFasta(
191
- separator: string | null = null,
192
- gapSymbol: string = '-'
193
- ): DG.Column {
194
- // TODO: implementation
195
- // * similarly to fasta2separator, divide string into monomers
196
- // * adjacent separators is a gap (symbol to be specified)
197
- // * the monomers MUST be single-character onles, otherwise forbid
198
- // * NO, they can be multi-characters
199
- // conversion
200
- // * consider automatic determining the separator
201
-
202
- if (separator === null)
203
- separator = this.separator;
204
-
205
- // a function splitting FASTA sequence into an array of monomers
206
- //const splitterAsSeparator = WebLogo.getSplitterWithSeparator(separator);
207
- const splitter = WebLogo.getSplitterForColumn(this.sourceColumn);
208
-
209
- const newColumn = this.getNewColumn(NOTATION.FASTA);
210
- // assign the values to the empty column
211
- newColumn.init((idx: number) => {
212
- const separatorPolymer = this.sourceColumn.get(idx);
213
- // items can be monomers or separators
214
- const separatorItemsArray = splitter(separatorPolymer);
215
- const fastaMonomersArray: string[] = [];
216
- for (let i = 0; i < separatorItemsArray.length; i++) {
217
- const item = separatorItemsArray[i];
218
- if (item.length === 0) {
219
- fastaMonomersArray.push(gapSymbol);
220
- } else if (item.length > 1) {
221
- // the case of a multi-character monomer
222
- const monomer = '[' + item + ']';
223
- fastaMonomersArray.push(monomer);
224
- } else {
225
- fastaMonomersArray.push(item);
226
- }
227
- }
228
- return fastaMonomersArray.join('');
229
- });
230
- return newColumn;
231
- }
232
-
233
- private convertHelmToFasta(): DG.Column {
234
- // TODO: implementation
235
- return this.getNewColumn(NOTATION.FASTA);
236
- }
237
-
238
- private convertHelmToSeparator(): DG.Column {
239
- // TODO: implementatioreturn this.getNewColumn();
240
- return this.getNewColumn(NOTATION.SEPARATOR);
241
- }
242
-
243
- /** Dispatcher method for notation conversion
244
- *
245
- * @param {NOTATION} targetNotation Notation we want to convert to
246
- * @param {string | null} tgtSeparator Possible separator
247
- * @return {DG.Column} Converted column
248
- */
249
- public convert(targetNotation: NOTATION, tgtSeparator: string | null = null): DG.Column {
250
- // possible exceptions
251
- if (this.sourceNotation === targetNotation)
252
- throw new Error('Target notation is invalid');
253
- if (this.toSeparator(targetNotation) && tgtSeparator === null)
254
- throw new Error('Target separator is not specified');
255
-
256
- if (this.isFasta() && this.toSeparator(targetNotation) && tgtSeparator !== null)
257
- return this.convertFastaToSeparator(tgtSeparator);
258
- else if ((this.isFasta() || this.isSeparator()) && this.toHelm(targetNotation))
259
- return this.convertToHelm();
260
- else if (this.isSeparator() && this.toFasta(targetNotation))
261
- return this.convertSeparatorToFasta(tgtSeparator!);
262
- else if (this.isHelm() && this.toFasta(targetNotation))
263
- return this.convertHelmToFasta();
264
- else
265
- return this.convertHelmToSeparator();
266
- }
267
-
268
- public constructor(col: DG.Column) {
269
- this._sourceColumn = col;
270
- const units = this._sourceColumn.tags[DG.TAGS.UNITS];
271
- if (units !== null)
272
- this._sourceUnits = units;
273
- else
274
- throw new Error('Units are not specified in column');
275
- this._sourceNotation = this.getSourceNotation();
276
- this._defaultGapSymbol = (this.isFasta()) ? this._defaultGapSymbolsDict.fasta :
277
- (this.isHelm()) ? this._defaultGapSymbolsDict.helm :
278
- this._defaultGapSymbolsDict.separator;
279
- }
280
- }