@datagrok/bio 1.7.0 → 1.7.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/package-test.js +104 -113
- package/dist/package.js +65 -107
- package/files/{samples → tests}/peptides_complex_msa.csv +0 -0
- package/files/{samples → tests}/peptides_simple_msa.csv +0 -0
- package/files/{samples/testSmiles.csv → tests/sar-small.csv} +0 -0
- package/files/{samples → tests}/testDemog.csv +0 -0
- package/files/{samples → tests}/testHelm.csv +0 -0
- package/files/{samples → tests}/testId.csv +0 -0
- package/files/tests/testSmiles.csv +201 -0
- package/files/{samples → tests}/testSmiles2.csv +0 -0
- package/package.json +1 -1
- package/src/package.ts +1 -1
- package/src/tests/convert-test.ts +41 -0
- package/src/tests/detectors-test.ts +6 -6
- package/src/utils/cell-renderer.ts +18 -23
- package/src/utils/multiple-sequence-alignment.ts +0 -1
- package/src/utils/notation-converter.ts +51 -98
- package/{test-Bio-34f75e5127b8-7af21e5d.html → test-Bio-34f75e5127b8-726a0649.html} +2 -2
|
@@ -13,6 +13,12 @@ export class NotationConverter {
|
|
|
13
13
|
private _sourceColumn: DG.Column; // the column to be converted
|
|
14
14
|
private _sourceUnits: string; // units, of the form fasta:SEQ:NT, etc.
|
|
15
15
|
private _sourceNotation: NOTATION; // current notation (without :SEQ:NT, etc.)
|
|
16
|
+
private _defaultGapSymbol: string;
|
|
17
|
+
private _defaultGapSymbolsDict = {
|
|
18
|
+
helm: '*',
|
|
19
|
+
separator: '',
|
|
20
|
+
fasta: '-',
|
|
21
|
+
};
|
|
16
22
|
|
|
17
23
|
private get sourceUnits(): string { return this._sourceUnits; }
|
|
18
24
|
|
|
@@ -20,6 +26,16 @@ export class NotationConverter {
|
|
|
20
26
|
|
|
21
27
|
public get sourceNotation(): NOTATION { return this._sourceNotation; }
|
|
22
28
|
|
|
29
|
+
public get defaultGapSymbol(): string { return this._defaultGapSymbol; }
|
|
30
|
+
|
|
31
|
+
public get separator(): string {
|
|
32
|
+
const separator = this.sourceColumn.getTag('separator');
|
|
33
|
+
if (separator !== null)
|
|
34
|
+
return separator;
|
|
35
|
+
else
|
|
36
|
+
throw new Error('Separator not set');
|
|
37
|
+
}
|
|
38
|
+
|
|
23
39
|
public isFasta(): boolean { return this.sourceNotation === NOTATION.FASTA; }
|
|
24
40
|
|
|
25
41
|
public isSeparator(): boolean { return this.sourceNotation === NOTATION.SEPARATOR; }
|
|
@@ -42,34 +58,15 @@ export class NotationConverter {
|
|
|
42
58
|
/**
|
|
43
59
|
* @return {NOTATION} Notation associated with the units type
|
|
44
60
|
*/
|
|
45
|
-
private
|
|
61
|
+
private getSourceNotation(): NOTATION {
|
|
46
62
|
if (this.sourceUnits.toLowerCase().startsWith('fasta'))
|
|
47
63
|
return NOTATION.FASTA;
|
|
48
64
|
else if (this.sourceUnits.toLowerCase().startsWith('separator'))
|
|
49
65
|
return NOTATION.SEPARATOR;
|
|
50
|
-
else
|
|
51
|
-
// TODO: handle possible exceptions
|
|
66
|
+
else if (this.sourceUnits.toLowerCase().startsWith('helm'))
|
|
52
67
|
return NOTATION.HELM;
|
|
53
|
-
|
|
54
|
-
|
|
55
|
-
/**
|
|
56
|
-
* Determine the separator used in SEPARATOR column
|
|
57
|
-
*
|
|
58
|
-
* @return {string} The detected separator
|
|
59
|
-
*/
|
|
60
|
-
private determineSeparator(): string {
|
|
61
|
-
// TODO: figure out how to determine the separator efficiently
|
|
62
|
-
const col = this.sourceColumn;
|
|
63
|
-
let i = 0;
|
|
64
|
-
const re = /[^a-z]/;
|
|
65
|
-
while (i < col.length) {
|
|
66
|
-
const molecule = col.get(i);
|
|
67
|
-
const foundSeparator = molecule.toLowerCase().match(re);
|
|
68
|
-
if (foundSeparator)
|
|
69
|
-
return foundSeparator[0];
|
|
70
|
-
i++;
|
|
71
|
-
}
|
|
72
|
-
throw new Error('No separators found');
|
|
68
|
+
else
|
|
69
|
+
throw new Error('The column has units that do not correspond to any notation');
|
|
73
70
|
}
|
|
74
71
|
|
|
75
72
|
/**
|
|
@@ -86,7 +83,7 @@ export class NotationConverter {
|
|
|
86
83
|
const newColName = col.dataFrame.columns.getUnusedName(name);
|
|
87
84
|
// dummy code
|
|
88
85
|
const newColumn = DG.Column.fromList('string', newColName, new Array(len).fill(''));
|
|
89
|
-
newColumn.semType =
|
|
86
|
+
newColumn.semType = DG.SEMTYPE.MACROMOLECULE;
|
|
90
87
|
newColumn.setTag(
|
|
91
88
|
DG.TAGS.UNITS,
|
|
92
89
|
this.sourceUnits.replace(
|
|
@@ -98,6 +95,7 @@ export class NotationConverter {
|
|
|
98
95
|
if (this.toFasta(targetNotation)) {
|
|
99
96
|
newColumn.setTag(
|
|
100
97
|
DG.TAGS.CELL_RENDERER,
|
|
98
|
+
// TODO: replace by the enumeration value
|
|
101
99
|
'Macromolecule');
|
|
102
100
|
}
|
|
103
101
|
return newColumn;
|
|
@@ -128,19 +126,12 @@ export class NotationConverter {
|
|
|
128
126
|
return newColumn;
|
|
129
127
|
}
|
|
130
128
|
|
|
131
|
-
|
|
132
|
-
|
|
133
|
-
|
|
134
|
-
|
|
135
|
-
|
|
136
|
-
|
|
137
|
-
*/
|
|
138
|
-
private convertFastaToHelm(
|
|
139
|
-
fastaGapSymbol: string = '-',
|
|
140
|
-
helmGapSymbol: string = '*'
|
|
141
|
-
): DG.Column {
|
|
142
|
-
// a function splitting FASTA sequence into an array of monomers
|
|
143
|
-
const splitterAsFasta = WebLogo.splitterAsFasta;
|
|
129
|
+
private convertToHelm(sourceGapSymbol: string | null = null) {
|
|
130
|
+
if (sourceGapSymbol === null)
|
|
131
|
+
sourceGapSymbol = this.defaultGapSymbol;
|
|
132
|
+
// A function splitting a sequence into an array of monomers according to
|
|
133
|
+
// its notation
|
|
134
|
+
const splitter = WebLogo.getSplitterForColumn(this.sourceColumn);
|
|
144
135
|
|
|
145
136
|
const prefix = (this.isDna()) ? 'DNA1{' :
|
|
146
137
|
(this.isRna()) ? 'RNA1{' :
|
|
@@ -158,19 +149,17 @@ export class NotationConverter {
|
|
|
158
149
|
const newColumn = this.getNewColumn(NOTATION.HELM);
|
|
159
150
|
// assign the values to the empty column
|
|
160
151
|
newColumn.init((idx: number) => {
|
|
161
|
-
const
|
|
162
|
-
const
|
|
152
|
+
const sourcePolymer = this.sourceColumn.get(idx);
|
|
153
|
+
const sourceMonomersArray = splitter(sourcePolymer);
|
|
163
154
|
const helmArray = [prefix];
|
|
164
155
|
let firstIteration = true;
|
|
165
|
-
for (let i = 0; i <
|
|
166
|
-
|
|
167
|
-
|
|
168
|
-
|
|
169
|
-
|
|
170
|
-
|
|
171
|
-
|
|
172
|
-
helmArray.push(item.join(''));
|
|
173
|
-
}
|
|
156
|
+
for (let i = 0; i < sourceMonomersArray.length; i++) {
|
|
157
|
+
const dot = firstIteration ? '' : '.';
|
|
158
|
+
let token = sourceMonomersArray[i];
|
|
159
|
+
if (token === sourceGapSymbol)
|
|
160
|
+
token = this._defaultGapSymbolsDict.helm;
|
|
161
|
+
const item = [dot, leftWrapper, token, rightWrapper];
|
|
162
|
+
helmArray.push(item.join(''));
|
|
174
163
|
firstIteration = false;
|
|
175
164
|
}
|
|
176
165
|
helmArray.push(postfix);
|
|
@@ -210,12 +199,12 @@ export class NotationConverter {
|
|
|
210
199
|
// conversion
|
|
211
200
|
// * consider automatic determining the separator
|
|
212
201
|
|
|
213
|
-
|
|
214
|
-
|
|
202
|
+
if (separator === null)
|
|
203
|
+
separator = this.separator;
|
|
215
204
|
|
|
216
205
|
// a function splitting FASTA sequence into an array of monomers
|
|
217
206
|
//const splitterAsSeparator = WebLogo.getSplitterWithSeparator(separator);
|
|
218
|
-
const splitter = WebLogo.getSplitterForColumn(this.
|
|
207
|
+
const splitter = WebLogo.getSplitterForColumn(this.sourceColumn);
|
|
219
208
|
|
|
220
209
|
const newColumn = this.getNewColumn(NOTATION.FASTA);
|
|
221
210
|
// assign the values to the empty column
|
|
@@ -241,47 +230,6 @@ export class NotationConverter {
|
|
|
241
230
|
return newColumn;
|
|
242
231
|
}
|
|
243
232
|
|
|
244
|
-
private convertSeparatorToHelm(fastaGapSymbol: string = '-', helmGapSymbol: string = '*'): DG.Column {
|
|
245
|
-
// a function splitting FASTA sequence into an array of monomers
|
|
246
|
-
const splitter = WebLogo.getSplitterForColumn(this._sourceColumn);
|
|
247
|
-
|
|
248
|
-
const prefix = (this.isDna()) ? 'DNA1{' :
|
|
249
|
-
(this.isRna()) ? 'RNA1{' :
|
|
250
|
-
(this.isPeptide()) ? 'PEPTIDE1{' :
|
|
251
|
-
'Unknown'; // this case should be handled as exceptional
|
|
252
|
-
|
|
253
|
-
if (prefix === 'Unknown')
|
|
254
|
-
throw new Error('Neither peptide, nor nucleotide');
|
|
255
|
-
|
|
256
|
-
const postfix = '}$$$';
|
|
257
|
-
const leftWrapper = (this.isDna()) ? 'D(' :
|
|
258
|
-
(this.isRna()) ? 'R(' : ''; // no wrapper for peptides
|
|
259
|
-
const rightWrapper = (this.isDna() || this.isRna()) ? ')P' : ''; // no wrapper for peptides
|
|
260
|
-
|
|
261
|
-
const newColumn = this.getNewColumn(NOTATION.HELM);
|
|
262
|
-
// assign the values to the empty column
|
|
263
|
-
newColumn.init((idx: number) => {
|
|
264
|
-
const fastaPolymer = this.sourceColumn.get(idx);
|
|
265
|
-
const fastaMonomersArray = splitter(fastaPolymer);
|
|
266
|
-
const helmArray = [prefix];
|
|
267
|
-
let firstIteration = true;
|
|
268
|
-
for (let i = 0; i < fastaMonomersArray.length; i++) {
|
|
269
|
-
if (fastaMonomersArray[i] === fastaGapSymbol) {
|
|
270
|
-
// TODO: verify the correctness of gap symbols handling
|
|
271
|
-
helmArray.push(helmGapSymbol);
|
|
272
|
-
} else {
|
|
273
|
-
const dot = firstIteration ? '' : '.';
|
|
274
|
-
const item = [dot, leftWrapper, fastaMonomersArray[i], rightWrapper];
|
|
275
|
-
helmArray.push(item.join(''));
|
|
276
|
-
}
|
|
277
|
-
firstIteration = false;
|
|
278
|
-
}
|
|
279
|
-
helmArray.push(postfix);
|
|
280
|
-
return helmArray.join('');
|
|
281
|
-
});
|
|
282
|
-
return newColumn;
|
|
283
|
-
}
|
|
284
|
-
|
|
285
233
|
private convertHelmToFasta(): DG.Column {
|
|
286
234
|
// TODO: implementation
|
|
287
235
|
return this.getNewColumn(NOTATION.FASTA);
|
|
@@ -307,12 +255,10 @@ export class NotationConverter {
|
|
|
307
255
|
|
|
308
256
|
if (this.isFasta() && this.toSeparator(targetNotation) && tgtSeparator !== null)
|
|
309
257
|
return this.convertFastaToSeparator(tgtSeparator);
|
|
310
|
-
else if (this.isFasta() && this.toHelm(targetNotation))
|
|
311
|
-
return this.
|
|
258
|
+
else if ((this.isFasta() || this.isSeparator()) && this.toHelm(targetNotation))
|
|
259
|
+
return this.convertToHelm();
|
|
312
260
|
else if (this.isSeparator() && this.toFasta(targetNotation))
|
|
313
261
|
return this.convertSeparatorToFasta(tgtSeparator!);
|
|
314
|
-
else if (this.isSeparator() && this.toHelm(targetNotation))
|
|
315
|
-
return this.convertSeparatorToHelm();
|
|
316
262
|
else if (this.isHelm() && this.toFasta(targetNotation))
|
|
317
263
|
return this.convertHelmToFasta();
|
|
318
264
|
else
|
|
@@ -321,7 +267,14 @@ export class NotationConverter {
|
|
|
321
267
|
|
|
322
268
|
public constructor(col: DG.Column) {
|
|
323
269
|
this._sourceColumn = col;
|
|
324
|
-
|
|
325
|
-
|
|
270
|
+
const units = this._sourceColumn.tags[DG.TAGS.UNITS];
|
|
271
|
+
if (units !== null)
|
|
272
|
+
this._sourceUnits = units;
|
|
273
|
+
else
|
|
274
|
+
throw new Error('Units are not specified in column');
|
|
275
|
+
this._sourceNotation = this.getSourceNotation();
|
|
276
|
+
this._defaultGapSymbol = (this.isFasta()) ? this._defaultGapSymbolsDict.fasta :
|
|
277
|
+
(this.isHelm()) ? this._defaultGapSymbolsDict.helm :
|
|
278
|
+
this._defaultGapSymbolsDict.separator;
|
|
326
279
|
}
|
|
327
280
|
}
|
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
<html><head><meta charset="utf-8"/><title>Bio Test Report. Datagrok version datagrok/datagrok:latest SHA=34f75e5127b8. Commit
|
|
1
|
+
<html><head><meta charset="utf-8"/><title>Bio Test Report. Datagrok version datagrok/datagrok:latest SHA=34f75e5127b8. Commit 726a0649.</title><style type="text/css">html,
|
|
2
2
|
body {
|
|
3
3
|
font-family: Arial, Helvetica, sans-serif;
|
|
4
4
|
font-size: 1rem;
|
|
@@ -229,7 +229,7 @@ header {
|
|
|
229
229
|
font-size: 1rem;
|
|
230
230
|
padding: 0 0.5rem;
|
|
231
231
|
}
|
|
232
|
-
</style></head><body><div id="jesthtml-content"><header><h1 id="title">Bio Test Report. Datagrok version datagrok/datagrok:latest SHA=34f75e5127b8. Commit
|
|
232
|
+
</style></head><body><div id="jesthtml-content"><header><h1 id="title">Bio Test Report. Datagrok version datagrok/datagrok:latest SHA=34f75e5127b8. Commit 726a0649.</h1></header><div id="metadata-container"><div id="timestamp">Started: 2022-07-13 08:17:30</div><div id="summary"><div id="suite-summary"><div class="summary-total">Suites (1)</div><div class="summary-passed summary-empty">0 passed</div><div class="summary-failed">1 failed</div><div class="summary-pending summary-empty">0 pending</div></div><div id="test-summary"><div class="summary-total">Tests (1)</div><div class="summary-passed summary-empty">0 passed</div><div class="summary-failed">1 failed</div><div class="summary-pending summary-empty">0 pending</div></div></div></div><div id="suite-1" class="suite-container"><div class="suite-info"><div class="suite-path">/home/runner/work/public/public/packages/Bio/src/__jest__/remote.test.ts</div><div class="suite-time warn">111.144s</div></div><div class="suite-tests"><div class="test-result failed"><div class="test-info"><div class="test-suitename"> </div><div class="test-title">TEST</div><div class="test-status">failed</div><div class="test-duration">100.002s</div></div><div class="failureMessages"> <pre class="failureMsg">Error: thrown: "Exceeded timeout of 100000 ms for a test.
|
|
233
233
|
Use jest.setTimeout(newTimeout) to increase the timeout value, if this is a long-running test."
|
|
234
234
|
at Object.<anonymous> (/home/runner/work/public/public/packages/Bio/src/__jest__/remote.test.ts:22:1)
|
|
235
235
|
at Runtime._execModule (/home/runner/work/public/public/packages/Bio/node_modules/jest-runtime/build/index.js:1646:24)
|