@datagrok/bio 1.7.2 → 1.7.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/detectors.js +2 -1
- package/dist/package-test.js +380 -309
- package/dist/package.js +295 -276
- package/package.json +3 -6
- package/src/package-test.ts +1 -1
- package/src/package.ts +13 -16
- package/src/tests/WebLogo-test.ts +18 -0
- package/src/tests/activity-cliffs-tests.ts +14 -9
- package/src/tests/convert-test.ts +1 -1
- package/src/tests/detectors-test.ts +8 -2
- package/src/tests/renderer-test.ts +8 -2
- package/src/tests/sequence-space-test.ts +7 -3
- package/src/tests/splitters-test.ts +35 -0
- package/src/utils/cell-renderer.ts +8 -5
- package/src/utils/convert.ts +1 -1
- package/{test-Bio-34f75e5127b8-ac96da52.html → test-Bio-34f75e5127b8-303cfb72.html} +2 -2
- package/src/tests/splitter-test.ts +0 -22
- package/src/utils/notation-converter.ts +0 -280
|
@@ -1,280 +0,0 @@
|
|
|
1
|
-
import * as DG from 'datagrok-api/dg';
|
|
2
|
-
import {WebLogo} from '@datagrok-libraries/bio/src/viewers/web-logo';
|
|
3
|
-
|
|
4
|
-
/** enum type to simplify setting "user-friendly" notation if necessary */
|
|
5
|
-
export const enum NOTATION {
|
|
6
|
-
FASTA = 'FASTA',
|
|
7
|
-
SEPARATOR = 'SEPARATOR',
|
|
8
|
-
HELM = 'HELM'
|
|
9
|
-
}
|
|
10
|
-
|
|
11
|
-
/** Class for handling conversion of notation systems in Macromolecule columns */
|
|
12
|
-
export class NotationConverter {
|
|
13
|
-
private _sourceColumn: DG.Column; // the column to be converted
|
|
14
|
-
private _sourceUnits: string; // units, of the form fasta:SEQ:NT, etc.
|
|
15
|
-
private _sourceNotation: NOTATION; // current notation (without :SEQ:NT, etc.)
|
|
16
|
-
private _defaultGapSymbol: string;
|
|
17
|
-
private _defaultGapSymbolsDict = {
|
|
18
|
-
helm: '*',
|
|
19
|
-
separator: '',
|
|
20
|
-
fasta: '-',
|
|
21
|
-
};
|
|
22
|
-
|
|
23
|
-
private get sourceUnits(): string { return this._sourceUnits; }
|
|
24
|
-
|
|
25
|
-
private get sourceColumn(): DG.Column { return this._sourceColumn; }
|
|
26
|
-
|
|
27
|
-
public get sourceNotation(): NOTATION { return this._sourceNotation; }
|
|
28
|
-
|
|
29
|
-
public get defaultGapSymbol(): string { return this._defaultGapSymbol; }
|
|
30
|
-
|
|
31
|
-
public get separator(): string {
|
|
32
|
-
const separator = this.sourceColumn.getTag('separator');
|
|
33
|
-
if (separator !== null)
|
|
34
|
-
return separator;
|
|
35
|
-
else
|
|
36
|
-
throw new Error('Separator not set');
|
|
37
|
-
}
|
|
38
|
-
|
|
39
|
-
public isFasta(): boolean { return this.sourceNotation === NOTATION.FASTA; }
|
|
40
|
-
|
|
41
|
-
public isSeparator(): boolean { return this.sourceNotation === NOTATION.SEPARATOR; }
|
|
42
|
-
|
|
43
|
-
public isHelm(): boolean { return this.sourceNotation === NOTATION.HELM; }
|
|
44
|
-
|
|
45
|
-
public toFasta(targetNotation: NOTATION): boolean { return targetNotation === NOTATION.FASTA; }
|
|
46
|
-
|
|
47
|
-
public toSeparator(targetNotation: NOTATION): boolean { return targetNotation === NOTATION.SEPARATOR; }
|
|
48
|
-
|
|
49
|
-
public toHelm(targetNotation: NOTATION): boolean { return targetNotation === NOTATION.HELM; }
|
|
50
|
-
|
|
51
|
-
public isRna(): boolean { return this.sourceUnits.toLowerCase().endsWith('rna'); }
|
|
52
|
-
|
|
53
|
-
public isDna(): boolean { return this.sourceUnits.toLowerCase().endsWith('dna'); }
|
|
54
|
-
|
|
55
|
-
public isPeptide(): boolean { return this.sourceUnits.toLowerCase().endsWith('pt'); }
|
|
56
|
-
|
|
57
|
-
/** Associate notation types with the corresponding units */
|
|
58
|
-
/**
|
|
59
|
-
* @return {NOTATION} Notation associated with the units type
|
|
60
|
-
*/
|
|
61
|
-
private getSourceNotation(): NOTATION {
|
|
62
|
-
if (this.sourceUnits.toLowerCase().startsWith('fasta'))
|
|
63
|
-
return NOTATION.FASTA;
|
|
64
|
-
else if (this.sourceUnits.toLowerCase().startsWith('separator'))
|
|
65
|
-
return NOTATION.SEPARATOR;
|
|
66
|
-
else if (this.sourceUnits.toLowerCase().startsWith('helm'))
|
|
67
|
-
return NOTATION.HELM;
|
|
68
|
-
else
|
|
69
|
-
throw new Error('The column has units that do not correspond to any notation');
|
|
70
|
-
}
|
|
71
|
-
|
|
72
|
-
/**
|
|
73
|
-
* Create a new empty column of the specified notation type and the same
|
|
74
|
-
* length as sourceColumn
|
|
75
|
-
*
|
|
76
|
-
* @param {NOTATION} targetNotation
|
|
77
|
-
* @return {DG.Column}
|
|
78
|
-
*/
|
|
79
|
-
private getNewColumn(targetNotation: NOTATION): DG.Column {
|
|
80
|
-
const col = this.sourceColumn;
|
|
81
|
-
const len = col.length;
|
|
82
|
-
const name = targetNotation + '(' + col.name + ')';
|
|
83
|
-
const newColName = col.dataFrame.columns.getUnusedName(name);
|
|
84
|
-
// dummy code
|
|
85
|
-
const newColumn = DG.Column.fromList('string', newColName, new Array(len).fill(''));
|
|
86
|
-
newColumn.semType = DG.SEMTYPE.MACROMOLECULE;
|
|
87
|
-
newColumn.setTag(
|
|
88
|
-
DG.TAGS.UNITS,
|
|
89
|
-
this.sourceUnits.replace(
|
|
90
|
-
this.sourceNotation.toLowerCase().toString(),
|
|
91
|
-
targetNotation.toLowerCase().toString()
|
|
92
|
-
)
|
|
93
|
-
);
|
|
94
|
-
// TODO: specify cell renderers for all cases
|
|
95
|
-
if (this.toFasta(targetNotation)) {
|
|
96
|
-
newColumn.setTag(
|
|
97
|
-
DG.TAGS.CELL_RENDERER,
|
|
98
|
-
// TODO: replace by the enumeration value
|
|
99
|
-
'Macromolecule');
|
|
100
|
-
}
|
|
101
|
-
return newColumn;
|
|
102
|
-
}
|
|
103
|
-
|
|
104
|
-
/**
|
|
105
|
-
* Convert a Macromolecule column from FASTA to SEPARATOR notation
|
|
106
|
-
*
|
|
107
|
-
* @param {string} separator A specific separator to be used
|
|
108
|
-
* @param {string} gapSymbol Gap symbol in FASTA, '-' by default
|
|
109
|
-
* @return {DG.Column} A new column in SEPARATOR notation
|
|
110
|
-
*/
|
|
111
|
-
private convertFastaToSeparator(separator: string, gapSymbol: string = '-'): DG.Column {
|
|
112
|
-
// a function splitting FASTA sequence into an array of monomers:
|
|
113
|
-
const splitterAsFasta = WebLogo.splitterAsFasta;
|
|
114
|
-
|
|
115
|
-
const newColumn = this.getNewColumn(NOTATION.SEPARATOR);
|
|
116
|
-
// assign the values to the newly created empty column
|
|
117
|
-
newColumn.init((idx: number) => {
|
|
118
|
-
const fastaPolymer = this.sourceColumn.get(idx);
|
|
119
|
-
const fastaMonomersArray = splitterAsFasta(fastaPolymer);
|
|
120
|
-
for (let i = 0; i < fastaMonomersArray.length; i++) {
|
|
121
|
-
if (fastaMonomersArray[i] === gapSymbol)
|
|
122
|
-
fastaMonomersArray[i] = '';
|
|
123
|
-
}
|
|
124
|
-
return fastaMonomersArray.join(separator);
|
|
125
|
-
});
|
|
126
|
-
return newColumn;
|
|
127
|
-
}
|
|
128
|
-
|
|
129
|
-
private convertToHelm(sourceGapSymbol: string | null = null) {
|
|
130
|
-
if (sourceGapSymbol === null)
|
|
131
|
-
sourceGapSymbol = this.defaultGapSymbol;
|
|
132
|
-
// A function splitting a sequence into an array of monomers according to
|
|
133
|
-
// its notation
|
|
134
|
-
const splitter = WebLogo.getSplitterForColumn(this.sourceColumn);
|
|
135
|
-
|
|
136
|
-
const prefix = (this.isDna()) ? 'DNA1{' :
|
|
137
|
-
(this.isRna()) ? 'RNA1{' :
|
|
138
|
-
(this.isPeptide()) ? 'PEPTIDE1{' :
|
|
139
|
-
'Unknown'; // this case should be handled as exceptional
|
|
140
|
-
|
|
141
|
-
if (prefix === 'Unknown')
|
|
142
|
-
throw new Error('Neither peptide, nor nucleotide');
|
|
143
|
-
|
|
144
|
-
const postfix = '}$$$';
|
|
145
|
-
const leftWrapper = (this.isDna()) ? 'D(' :
|
|
146
|
-
(this.isRna()) ? 'R(' : ''; // no wrapper for peptides
|
|
147
|
-
const rightWrapper = (this.isDna() || this.isRna()) ? ')P' : ''; // no wrapper for peptides
|
|
148
|
-
|
|
149
|
-
const newColumn = this.getNewColumn(NOTATION.HELM);
|
|
150
|
-
// assign the values to the empty column
|
|
151
|
-
newColumn.init((idx: number) => {
|
|
152
|
-
const sourcePolymer = this.sourceColumn.get(idx);
|
|
153
|
-
const sourceMonomersArray = splitter(sourcePolymer);
|
|
154
|
-
const helmArray = [prefix];
|
|
155
|
-
let firstIteration = true;
|
|
156
|
-
for (let i = 0; i < sourceMonomersArray.length; i++) {
|
|
157
|
-
const dot = firstIteration ? '' : '.';
|
|
158
|
-
let token = sourceMonomersArray[i];
|
|
159
|
-
if (token === sourceGapSymbol)
|
|
160
|
-
token = this._defaultGapSymbolsDict.helm;
|
|
161
|
-
const item = [dot, leftWrapper, token, rightWrapper];
|
|
162
|
-
helmArray.push(item.join(''));
|
|
163
|
-
firstIteration = false;
|
|
164
|
-
}
|
|
165
|
-
helmArray.push(postfix);
|
|
166
|
-
return helmArray.join('');
|
|
167
|
-
});
|
|
168
|
-
return newColumn;
|
|
169
|
-
}
|
|
170
|
-
|
|
171
|
-
private handleSeparatorItemForFasta(
|
|
172
|
-
idx: number,
|
|
173
|
-
separatorItemsArray: string[],
|
|
174
|
-
separator: string,
|
|
175
|
-
gapSymbol: string,
|
|
176
|
-
fastaMonomersArray: string[]
|
|
177
|
-
): void {
|
|
178
|
-
const item = separatorItemsArray[idx];
|
|
179
|
-
if (item.length > 1) {
|
|
180
|
-
// the case of a multi-character monomer
|
|
181
|
-
const monomer = '[' + item + ']';
|
|
182
|
-
fastaMonomersArray.push(monomer);
|
|
183
|
-
}
|
|
184
|
-
if (item === separator) {
|
|
185
|
-
if (idx !== 0 && separatorItemsArray[idx - 1] === separator)
|
|
186
|
-
fastaMonomersArray.push(gapSymbol);
|
|
187
|
-
}
|
|
188
|
-
}
|
|
189
|
-
|
|
190
|
-
private convertSeparatorToFasta(
|
|
191
|
-
separator: string | null = null,
|
|
192
|
-
gapSymbol: string = '-'
|
|
193
|
-
): DG.Column {
|
|
194
|
-
// TODO: implementation
|
|
195
|
-
// * similarly to fasta2separator, divide string into monomers
|
|
196
|
-
// * adjacent separators is a gap (symbol to be specified)
|
|
197
|
-
// * the monomers MUST be single-character onles, otherwise forbid
|
|
198
|
-
// * NO, they can be multi-characters
|
|
199
|
-
// conversion
|
|
200
|
-
// * consider automatic determining the separator
|
|
201
|
-
|
|
202
|
-
if (separator === null)
|
|
203
|
-
separator = this.separator;
|
|
204
|
-
|
|
205
|
-
// a function splitting FASTA sequence into an array of monomers
|
|
206
|
-
//const splitterAsSeparator = WebLogo.getSplitterWithSeparator(separator);
|
|
207
|
-
const splitter = WebLogo.getSplitterForColumn(this.sourceColumn);
|
|
208
|
-
|
|
209
|
-
const newColumn = this.getNewColumn(NOTATION.FASTA);
|
|
210
|
-
// assign the values to the empty column
|
|
211
|
-
newColumn.init((idx: number) => {
|
|
212
|
-
const separatorPolymer = this.sourceColumn.get(idx);
|
|
213
|
-
// items can be monomers or separators
|
|
214
|
-
const separatorItemsArray = splitter(separatorPolymer);
|
|
215
|
-
const fastaMonomersArray: string[] = [];
|
|
216
|
-
for (let i = 0; i < separatorItemsArray.length; i++) {
|
|
217
|
-
const item = separatorItemsArray[i];
|
|
218
|
-
if (item.length === 0) {
|
|
219
|
-
fastaMonomersArray.push(gapSymbol);
|
|
220
|
-
} else if (item.length > 1) {
|
|
221
|
-
// the case of a multi-character monomer
|
|
222
|
-
const monomer = '[' + item + ']';
|
|
223
|
-
fastaMonomersArray.push(monomer);
|
|
224
|
-
} else {
|
|
225
|
-
fastaMonomersArray.push(item);
|
|
226
|
-
}
|
|
227
|
-
}
|
|
228
|
-
return fastaMonomersArray.join('');
|
|
229
|
-
});
|
|
230
|
-
return newColumn;
|
|
231
|
-
}
|
|
232
|
-
|
|
233
|
-
private convertHelmToFasta(): DG.Column {
|
|
234
|
-
// TODO: implementation
|
|
235
|
-
return this.getNewColumn(NOTATION.FASTA);
|
|
236
|
-
}
|
|
237
|
-
|
|
238
|
-
private convertHelmToSeparator(): DG.Column {
|
|
239
|
-
// TODO: implementatioreturn this.getNewColumn();
|
|
240
|
-
return this.getNewColumn(NOTATION.SEPARATOR);
|
|
241
|
-
}
|
|
242
|
-
|
|
243
|
-
/** Dispatcher method for notation conversion
|
|
244
|
-
*
|
|
245
|
-
* @param {NOTATION} targetNotation Notation we want to convert to
|
|
246
|
-
* @param {string | null} tgtSeparator Possible separator
|
|
247
|
-
* @return {DG.Column} Converted column
|
|
248
|
-
*/
|
|
249
|
-
public convert(targetNotation: NOTATION, tgtSeparator: string | null = null): DG.Column {
|
|
250
|
-
// possible exceptions
|
|
251
|
-
if (this.sourceNotation === targetNotation)
|
|
252
|
-
throw new Error('Target notation is invalid');
|
|
253
|
-
if (this.toSeparator(targetNotation) && tgtSeparator === null)
|
|
254
|
-
throw new Error('Target separator is not specified');
|
|
255
|
-
|
|
256
|
-
if (this.isFasta() && this.toSeparator(targetNotation) && tgtSeparator !== null)
|
|
257
|
-
return this.convertFastaToSeparator(tgtSeparator);
|
|
258
|
-
else if ((this.isFasta() || this.isSeparator()) && this.toHelm(targetNotation))
|
|
259
|
-
return this.convertToHelm();
|
|
260
|
-
else if (this.isSeparator() && this.toFasta(targetNotation))
|
|
261
|
-
return this.convertSeparatorToFasta(tgtSeparator!);
|
|
262
|
-
else if (this.isHelm() && this.toFasta(targetNotation))
|
|
263
|
-
return this.convertHelmToFasta();
|
|
264
|
-
else
|
|
265
|
-
return this.convertHelmToSeparator();
|
|
266
|
-
}
|
|
267
|
-
|
|
268
|
-
public constructor(col: DG.Column) {
|
|
269
|
-
this._sourceColumn = col;
|
|
270
|
-
const units = this._sourceColumn.tags[DG.TAGS.UNITS];
|
|
271
|
-
if (units !== null)
|
|
272
|
-
this._sourceUnits = units;
|
|
273
|
-
else
|
|
274
|
-
throw new Error('Units are not specified in column');
|
|
275
|
-
this._sourceNotation = this.getSourceNotation();
|
|
276
|
-
this._defaultGapSymbol = (this.isFasta()) ? this._defaultGapSymbolsDict.fasta :
|
|
277
|
-
(this.isHelm()) ? this._defaultGapSymbolsDict.helm :
|
|
278
|
-
this._defaultGapSymbolsDict.separator;
|
|
279
|
-
}
|
|
280
|
-
}
|