@datagrok/bio 1.5.3 → 1.5.6
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/detectors.js +11 -0
- package/dist/package-test.js +548 -86
- package/dist/package.js +520 -67
- package/files/samples/sample_HELM.csv +540 -540
- package/files/samples/sample_MSA.csv +540 -540
- package/files/samples/sar-small.csv +201 -0
- package/package.json +5 -4
- package/src/const.ts +0 -1
- package/src/package.ts +37 -14
- package/src/tests/activity-cliffs-tests.ts +1 -0
- package/src/tests/convert-test.ts +31 -0
- package/src/tests/detectors-test.ts +30 -15
- package/src/utils/atomic-works.ts +252 -0
- package/src/utils/convert.ts +15 -3
- package/src/utils/multiple-sequence-alignment.ts +13 -8
- package/src/utils/notation-converter.ts +131 -0
- package/src/utils/sequence-space.ts +4 -4
- package/src/utils/split-to-monomers.ts +8 -0
- package/src/utils/utils.ts +40 -0
- package/tsconfig.json +1 -1
|
@@ -0,0 +1,252 @@
|
|
|
1
|
+
import * as OCL from 'openchemlib/full.js';
|
|
2
|
+
|
|
3
|
+
export function getMacroMol(molBlocks: string[]): string {
|
|
4
|
+
for (let i = 0; i < molBlocks.length; i++)
|
|
5
|
+
molBlocks[i] = rotateBackboneV3000(molBlocks[i]);
|
|
6
|
+
|
|
7
|
+
return linkV3000(molBlocks);
|
|
8
|
+
}
|
|
9
|
+
|
|
10
|
+
function rotateBackboneV3000(molecule: string): string {
|
|
11
|
+
let molBlock = OCL.Molecule.fromMolfile(molecule).toMolfileV3();
|
|
12
|
+
const coordinates = extractAtomDataV3000(molBlock);
|
|
13
|
+
const natom = coordinates.atomIndex.length;
|
|
14
|
+
|
|
15
|
+
const indexFivePrime = coordinates.atomIndex.indexOf(1);
|
|
16
|
+
const indexThreePrime = coordinates.atomIndex.indexOf(natom);
|
|
17
|
+
|
|
18
|
+
const xCenter = (coordinates.x[indexThreePrime] + coordinates.x[indexFivePrime])/2;
|
|
19
|
+
const yCenter = (coordinates.y[indexThreePrime] + coordinates.y[indexFivePrime])/2;
|
|
20
|
+
|
|
21
|
+
//place to center
|
|
22
|
+
for (let i = 0; i < natom; i++) {
|
|
23
|
+
coordinates.x[i] -= xCenter;
|
|
24
|
+
coordinates.y[i] -= yCenter;
|
|
25
|
+
}
|
|
26
|
+
|
|
27
|
+
let angle = 0;
|
|
28
|
+
if (coordinates.x[indexFivePrime] == 0)
|
|
29
|
+
angle = coordinates.y[indexFivePrime] > coordinates.y[indexThreePrime] ? Math.PI/2 : 3*Math.PI/2;
|
|
30
|
+
else if (coordinates.y[indexFivePrime] == 0)
|
|
31
|
+
angle = coordinates.x[indexFivePrime] > coordinates.x[indexThreePrime] ? Math.PI : 0;
|
|
32
|
+
else {
|
|
33
|
+
const derivative = coordinates.y[indexFivePrime]/coordinates.x[indexFivePrime];
|
|
34
|
+
angle = derivative > 0 ? Math.PI - Math.atan(derivative) : Math.atan(derivative);
|
|
35
|
+
}
|
|
36
|
+
|
|
37
|
+
const cos = Math.cos(angle);
|
|
38
|
+
const sin = Math.sin(angle);
|
|
39
|
+
|
|
40
|
+
for (let i = 0; i < natom; i++) {
|
|
41
|
+
const xAdd = coordinates.x[i];
|
|
42
|
+
coordinates.x[i] = xAdd*cos - coordinates.y[i]*sin;
|
|
43
|
+
coordinates.y[i] = xAdd*sin + coordinates.y[i]*cos;
|
|
44
|
+
}
|
|
45
|
+
|
|
46
|
+
//place to right
|
|
47
|
+
const xShift = coordinates.x[indexFivePrime];
|
|
48
|
+
for (let i = 0; i < natom; i++)
|
|
49
|
+
coordinates.x[i] -= xShift;
|
|
50
|
+
|
|
51
|
+
//rewrite molBlock
|
|
52
|
+
let index = molBlock.indexOf('M V30 BEGIN ATOM'); // V3000 index for atoms coordinates
|
|
53
|
+
index = molBlock.indexOf('\n', index);
|
|
54
|
+
let indexEnd = index;
|
|
55
|
+
for (let i = 0; i < natom; i++) {
|
|
56
|
+
index = molBlock.indexOf('V30', index) + 4;
|
|
57
|
+
index = molBlock.indexOf(' ', index) + 1;
|
|
58
|
+
index = molBlock.indexOf(' ', index) + 1;
|
|
59
|
+
indexEnd = molBlock.indexOf(' ', index) + 1;
|
|
60
|
+
indexEnd = molBlock.indexOf(' ', indexEnd);
|
|
61
|
+
|
|
62
|
+
molBlock = molBlock.slice(0, index) +
|
|
63
|
+
coordinates.x[i] + ' ' + coordinates.y[i] +
|
|
64
|
+
molBlock.slice(indexEnd);
|
|
65
|
+
|
|
66
|
+
index = molBlock.indexOf('\n', index) + 1;
|
|
67
|
+
}
|
|
68
|
+
|
|
69
|
+
return molBlock;
|
|
70
|
+
}
|
|
71
|
+
|
|
72
|
+
function extractAtomDataV3000(molBlock: string) {
|
|
73
|
+
const numbers = extractAtomsBondsNumbersV3000(molBlock);
|
|
74
|
+
let index = molBlock.indexOf('M V30 BEGIN ATOM'); // V3000 index for atoms coordinates
|
|
75
|
+
index = molBlock.indexOf('\n', index);
|
|
76
|
+
let indexEnd = index;
|
|
77
|
+
|
|
78
|
+
const indexes: number[] = Array(numbers.natom);
|
|
79
|
+
const types: string[] = Array(numbers.natom);
|
|
80
|
+
const x: number[] = Array(numbers.natom);
|
|
81
|
+
const y: number[] = Array(numbers.natom);
|
|
82
|
+
|
|
83
|
+
for (let i = 0; i < numbers.natom; i++) {
|
|
84
|
+
index = molBlock.indexOf('V30', index) + 4;
|
|
85
|
+
indexEnd = molBlock.indexOf(' ', index);
|
|
86
|
+
indexes[i] = parseInt(molBlock.substring(index, indexEnd));
|
|
87
|
+
|
|
88
|
+
index = indexEnd + 1;
|
|
89
|
+
indexEnd = molBlock.indexOf(' ', index);
|
|
90
|
+
types[i] = molBlock.substring(index, indexEnd);
|
|
91
|
+
|
|
92
|
+
index = indexEnd + 1;
|
|
93
|
+
indexEnd = molBlock.indexOf(' ', index);
|
|
94
|
+
x[i] = parseFloat(molBlock.substring(index, indexEnd));
|
|
95
|
+
|
|
96
|
+
index = indexEnd + 1;
|
|
97
|
+
indexEnd = molBlock.indexOf(' ', index);
|
|
98
|
+
y[i] = parseFloat(molBlock.substring(index, indexEnd));
|
|
99
|
+
|
|
100
|
+
index = molBlock.indexOf('\n', index) + 1;
|
|
101
|
+
}
|
|
102
|
+
|
|
103
|
+
return {atomIndex: indexes, atomType: types, x: x, y: y};
|
|
104
|
+
}
|
|
105
|
+
|
|
106
|
+
function linkV3000(molBlocks: string[]): string {
|
|
107
|
+
let macroMolBlock = '\nDatagrok macromolecule handler\n\n';
|
|
108
|
+
macroMolBlock += ' 0 0 0 0 0 0 999 V3000\n';
|
|
109
|
+
macroMolBlock += 'M V30 BEGIN CTAB\n';
|
|
110
|
+
let atomBlock = '';
|
|
111
|
+
let bondBlock = '';
|
|
112
|
+
let collectionBlock = '';
|
|
113
|
+
const collection: number [] = [];
|
|
114
|
+
let natom = 0;
|
|
115
|
+
let nbond = 0;
|
|
116
|
+
let xShift = 0;
|
|
117
|
+
|
|
118
|
+
for (let i = 0; i < molBlocks.length; i++) {
|
|
119
|
+
molBlocks[i] = molBlocks[i].replaceAll('(-\nM V30 ', '(')
|
|
120
|
+
.replaceAll('-\nM V30 ', '').replaceAll(' )', ')');
|
|
121
|
+
const numbers = extractAtomsBondsNumbersV3000(molBlocks[i]);
|
|
122
|
+
const coordinates = extractAtomDataV3000(molBlocks[i]);
|
|
123
|
+
|
|
124
|
+
let indexAtoms = molBlocks[i].indexOf('M V30 BEGIN ATOM'); // V3000 index for atoms coordinates
|
|
125
|
+
indexAtoms = molBlocks[i].indexOf('\n', indexAtoms);
|
|
126
|
+
let index = indexAtoms;
|
|
127
|
+
let indexEnd = indexAtoms;
|
|
128
|
+
|
|
129
|
+
for (let j = 0; j < numbers.natom; j++) {
|
|
130
|
+
if (coordinates.atomIndex[j] != 1 || i == 0) {
|
|
131
|
+
//rewrite atom number
|
|
132
|
+
index = molBlocks[i].indexOf('V30', index) + 4;
|
|
133
|
+
indexEnd = molBlocks[i].indexOf(' ', index);
|
|
134
|
+
const atomNumber = parseInt(molBlocks[i].substring(index, indexEnd)) + natom;
|
|
135
|
+
molBlocks[i] = molBlocks[i].slice(0, index) + atomNumber + molBlocks[i].slice(indexEnd);
|
|
136
|
+
|
|
137
|
+
//rewrite coordinates
|
|
138
|
+
index = molBlocks[i].indexOf(' ', index) + 1;
|
|
139
|
+
index = molBlocks[i].indexOf(' ', index) + 1;
|
|
140
|
+
indexEnd = molBlocks[i].indexOf(' ', index);
|
|
141
|
+
|
|
142
|
+
const totalShift = xShift - coordinates.x[0];
|
|
143
|
+
let coordinate = Math.round(10000*(parseFloat(molBlocks[i].substring(index, indexEnd)) + totalShift))/10000;
|
|
144
|
+
molBlocks[i] = molBlocks[i].slice(0, index) + coordinate + molBlocks[i].slice(indexEnd);
|
|
145
|
+
|
|
146
|
+
index = molBlocks[i].indexOf(' ', index) + 1;
|
|
147
|
+
indexEnd = molBlocks[i].indexOf(' ', index);
|
|
148
|
+
coordinate = Math.round(10000*(parseFloat(molBlocks[i].substring(index, indexEnd))))/10000;
|
|
149
|
+
molBlocks[i] = molBlocks[i].slice(0, index) + coordinate + molBlocks[i].slice(indexEnd);
|
|
150
|
+
|
|
151
|
+
index = molBlocks[i].indexOf('\n', index) + 1;
|
|
152
|
+
} else {
|
|
153
|
+
index = molBlocks[i].indexOf('M V30', index) - 1;
|
|
154
|
+
indexEnd = molBlocks[i].indexOf('\n', index + 1);
|
|
155
|
+
molBlocks[i] = molBlocks[i].slice(0, index) + molBlocks[i].slice(indexEnd);
|
|
156
|
+
}
|
|
157
|
+
}
|
|
158
|
+
|
|
159
|
+
const indexAtomsEnd = molBlocks[i].indexOf('M V30 END ATOM');
|
|
160
|
+
atomBlock += molBlocks[i].substring(indexAtoms + 1, indexAtomsEnd);
|
|
161
|
+
|
|
162
|
+
let indexBonds = molBlocks[i].indexOf('M V30 BEGIN BOND'); // V3000 index for bonds
|
|
163
|
+
indexBonds = molBlocks[i].indexOf('\n', indexBonds);
|
|
164
|
+
index = indexBonds;
|
|
165
|
+
indexEnd = indexBonds;
|
|
166
|
+
|
|
167
|
+
for (let j = 0; j < numbers.nbond; j++) {
|
|
168
|
+
//rewrite bond number
|
|
169
|
+
index = molBlocks[i].indexOf('V30', index) + 4;
|
|
170
|
+
indexEnd = molBlocks[i].indexOf(' ', index);
|
|
171
|
+
const bondNumber = parseInt(molBlocks[i].substring(index, indexEnd)) + nbond;
|
|
172
|
+
molBlocks[i] = molBlocks[i].slice(0, index) + bondNumber + molBlocks[i].slice(indexEnd);
|
|
173
|
+
|
|
174
|
+
//rewrite atom pair in bond
|
|
175
|
+
index = molBlocks[i].indexOf(' ', index) + 1;
|
|
176
|
+
index = molBlocks[i].indexOf(' ', index) + 1;
|
|
177
|
+
indexEnd = molBlocks[i].indexOf(' ', index);
|
|
178
|
+
let atomNumber = parseInt(molBlocks[i].substring(index, indexEnd)) + natom;
|
|
179
|
+
molBlocks[i] = molBlocks[i].slice(0, index) + atomNumber + molBlocks[i].slice(indexEnd);
|
|
180
|
+
index = molBlocks[i].indexOf(' ', index) + 1;
|
|
181
|
+
indexEnd = Math.min(molBlocks[i].indexOf('\n', index), molBlocks[i].indexOf(' ', index));
|
|
182
|
+
atomNumber = parseInt(molBlocks[i].substring(index, indexEnd)) + natom;
|
|
183
|
+
molBlocks[i] = molBlocks[i].slice(0, index) + atomNumber + molBlocks[i].slice(indexEnd);
|
|
184
|
+
|
|
185
|
+
index = molBlocks[i].indexOf('\n', index) + 1;
|
|
186
|
+
}
|
|
187
|
+
|
|
188
|
+
const indexBondEnd = molBlocks[i].indexOf('M V30 END BOND');
|
|
189
|
+
bondBlock += molBlocks[i].substring(indexBonds + 1, indexBondEnd);
|
|
190
|
+
|
|
191
|
+
let indexCollection = molBlocks[i].indexOf('M V30 MDLV30/STEABS ATOMS=('); // V3000 index for collections
|
|
192
|
+
|
|
193
|
+
while (indexCollection != -1) {
|
|
194
|
+
indexCollection += 28;
|
|
195
|
+
const collectionEnd = molBlocks[i].indexOf(')', indexCollection);
|
|
196
|
+
const collectionEntries = molBlocks[i].substring(indexCollection, collectionEnd).split(' ').slice(1);
|
|
197
|
+
collectionEntries.forEach((e) => {
|
|
198
|
+
collection.push(parseInt(e) + natom);
|
|
199
|
+
});
|
|
200
|
+
indexCollection = collectionEnd;
|
|
201
|
+
indexCollection = molBlocks[i].indexOf('M V30 MDLV30/STEABS ATOMS=(', indexCollection);
|
|
202
|
+
}
|
|
203
|
+
|
|
204
|
+
natom += numbers.natom - 1;
|
|
205
|
+
nbond += numbers.nbond;
|
|
206
|
+
xShift += coordinates.x[numbers.natom - 1] - coordinates.x[0];
|
|
207
|
+
}
|
|
208
|
+
|
|
209
|
+
const entries = 4;
|
|
210
|
+
const collNumber = Math.ceil(collection.length / entries);
|
|
211
|
+
collectionBlock += 'M V30 MDLV30/STEABS ATOMS=(' + collection.length + ' -\n';
|
|
212
|
+
for (let i = 0; i < collNumber; i++) {
|
|
213
|
+
collectionBlock += 'M V30 ';
|
|
214
|
+
const entriesCurrent = i + 1 == collNumber ? collection.length - (collNumber - 1)*entries : entries;
|
|
215
|
+
for (let j = 0; j < entriesCurrent; j++) {
|
|
216
|
+
collectionBlock += (j + 1 == entriesCurrent) ?
|
|
217
|
+
(i == collNumber - 1 ? collection[entries*i + j] + ')\n' : collection[entries*i + j] + ' -\n') :
|
|
218
|
+
collection[entries*i + j] + ' ';
|
|
219
|
+
}
|
|
220
|
+
}
|
|
221
|
+
|
|
222
|
+
//generate file
|
|
223
|
+
natom++;
|
|
224
|
+
macroMolBlock += 'M V30 COUNTS ' + natom + ' ' + nbond + ' 0 0 0\n';
|
|
225
|
+
macroMolBlock += 'M V30 BEGIN ATOM\n';
|
|
226
|
+
macroMolBlock += atomBlock;
|
|
227
|
+
macroMolBlock += 'M V30 END ATOM\n';
|
|
228
|
+
macroMolBlock += 'M V30 BEGIN BOND\n';
|
|
229
|
+
macroMolBlock += bondBlock;
|
|
230
|
+
macroMolBlock += 'M V30 END BOND\n';
|
|
231
|
+
macroMolBlock += 'M V30 BEGIN COLLECTION\n';
|
|
232
|
+
macroMolBlock += collectionBlock;
|
|
233
|
+
macroMolBlock += 'M V30 END COLLECTION\n';
|
|
234
|
+
macroMolBlock += 'M V30 END CTAB\n';
|
|
235
|
+
macroMolBlock += 'M END\n';
|
|
236
|
+
|
|
237
|
+
return macroMolBlock;
|
|
238
|
+
}
|
|
239
|
+
|
|
240
|
+
function extractAtomsBondsNumbersV3000(molBlock: string): {natom: number, nbond: number} {
|
|
241
|
+
molBlock = molBlock.replaceAll('\r', ''); //equalize old and new sdf standards
|
|
242
|
+
let index = molBlock.indexOf('COUNTS') + 7; // V3000 index for atoms and bonds number
|
|
243
|
+
let indexEnd = molBlock.indexOf(' ', index);
|
|
244
|
+
|
|
245
|
+
const atomsNumber = parseInt(molBlock.substring(index, indexEnd));
|
|
246
|
+
index = indexEnd + 1;
|
|
247
|
+
indexEnd = molBlock.indexOf(' ', index);
|
|
248
|
+
const bondsNumber = parseInt(molBlock.substring(index, indexEnd));
|
|
249
|
+
|
|
250
|
+
return {natom: atomsNumber, nbond: bondsNumber};
|
|
251
|
+
}
|
|
252
|
+
|
package/src/utils/convert.ts
CHANGED
|
@@ -1,11 +1,21 @@
|
|
|
1
1
|
import * as DG from 'datagrok-api/dg';
|
|
2
2
|
import * as ui from 'datagrok-api/ui';
|
|
3
|
+
import {NotationConverter} from './notation-converter';
|
|
3
4
|
|
|
5
|
+
/**
|
|
6
|
+
* Converts notations of a Macromolecule column
|
|
7
|
+
*
|
|
8
|
+
* @param {DG.column} col Column with 'Macromolecule' semantic type
|
|
9
|
+
*/
|
|
4
10
|
export function convert(col: DG.Column): void {
|
|
5
11
|
const current = col.tags[DG.TAGS.UNITS];
|
|
6
12
|
//TODO: read all notations
|
|
7
|
-
const
|
|
8
|
-
|
|
13
|
+
const units = [
|
|
14
|
+
'fasta',
|
|
15
|
+
'separator',
|
|
16
|
+
'HELM'
|
|
17
|
+
];
|
|
18
|
+
const choices = ui.choiceInput('convert to', '', units.filter((e) => e !== current));
|
|
9
19
|
|
|
10
20
|
ui.dialog('Convert sequence')
|
|
11
21
|
.add(
|
|
@@ -17,7 +27,9 @@ export function convert(col: DG.Column): void {
|
|
|
17
27
|
)
|
|
18
28
|
.onOK(() => {
|
|
19
29
|
//TODO: create new converted column
|
|
20
|
-
|
|
30
|
+
const converter = new NotationConverter(col, choices.value!);
|
|
31
|
+
const newColumn = converter.convert();
|
|
32
|
+
col.dataFrame.columns.add(newColumn);
|
|
21
33
|
})
|
|
22
34
|
.show();
|
|
23
35
|
}
|
|
@@ -29,12 +29,12 @@ function _fastaToStrings(fasta: string): string[] {
|
|
|
29
29
|
/**
|
|
30
30
|
* Runs Aioli environment with kalign tool.
|
|
31
31
|
*
|
|
32
|
-
* @param {DG.Column}
|
|
32
|
+
* @param {DG.Column} srcCol Column with sequences.
|
|
33
33
|
* @param {boolean} isAligned Whether the column is aligned.
|
|
34
34
|
* @return {Promise<DG.Column>} Aligned sequences.
|
|
35
35
|
*/
|
|
36
|
-
export async function runKalign(
|
|
37
|
-
let sequences =
|
|
36
|
+
export async function runKalign(srcCol: DG.Column, isAligned = false): Promise<DG.Column> {
|
|
37
|
+
let sequences = srcCol.toList();
|
|
38
38
|
|
|
39
39
|
if (isAligned)
|
|
40
40
|
sequences = sequences.map((v: string, _) => AlignedSequenceEncoder.clean(v).replace(/\-/g, ''));
|
|
@@ -55,15 +55,20 @@ export async function runKalign(col: DG.Column, isAligned = false) : Promise<DG.
|
|
|
55
55
|
console.warn(output);
|
|
56
56
|
|
|
57
57
|
const aligned = _fastaToStrings(buf).slice(0, sequences.length);
|
|
58
|
-
const
|
|
59
|
-
|
|
60
|
-
|
|
61
|
-
|
|
58
|
+
const tgtCol = DG.Column.fromStrings(`msa(${srcCol.name})`, aligned);
|
|
59
|
+
|
|
60
|
+
// units
|
|
61
|
+
const srcUnits = srcCol.getTag(DG.TAGS.UNITS);
|
|
62
|
+
const tgtUnits = srcUnits.split(':').map((p, i) => i == 1 ? p + '.MSA' : p).join(':');
|
|
63
|
+
|
|
64
|
+
tgtCol.setTag(DG.TAGS.UNITS, tgtUnits);
|
|
65
|
+
tgtCol.semType = C.SEM_TYPES.Macro_Molecule;
|
|
66
|
+
return tgtCol;
|
|
62
67
|
}
|
|
63
68
|
|
|
64
69
|
export async function testMSAEnoughMemory(col: DG.Column): Promise<void> {
|
|
65
70
|
const sequencesCount = col.length;
|
|
66
|
-
const delta = sequencesCount/100;
|
|
71
|
+
const delta = sequencesCount / 100;
|
|
67
72
|
|
|
68
73
|
for (let i = delta; i < sequencesCount; i += delta) {
|
|
69
74
|
try {
|
|
@@ -0,0 +1,131 @@
|
|
|
1
|
+
import * as DG from 'datagrok-api/dg';
|
|
2
|
+
|
|
3
|
+
// export const enum NOTATION {
|
|
4
|
+
// // these values can be changed to "user-friendly" ones later on
|
|
5
|
+
// FASTA = 'fasta',
|
|
6
|
+
// SEPARATOR = 'separator',
|
|
7
|
+
// HELM = 'helm'
|
|
8
|
+
// }
|
|
9
|
+
|
|
10
|
+
export class NotationConverter {
|
|
11
|
+
private _sourceColumn: DG.Column; // the column to be converted
|
|
12
|
+
private _currentUnits: string; // units of the form fasta:SEQ:NT, etc.
|
|
13
|
+
private _sourceNotation: string; // current notation (without :SEQ:NT, etc.)
|
|
14
|
+
private _targetNotation: string;
|
|
15
|
+
|
|
16
|
+
private get sourceColumn(): DG.Column { return this._sourceColumn; }
|
|
17
|
+
private get currentUnits(): string { return this._currentUnits; }
|
|
18
|
+
private get sourceNotation(): string { return this._sourceNotation; }
|
|
19
|
+
private get targetNotation(): string { return this._targetNotation; }
|
|
20
|
+
|
|
21
|
+
// these values can be changed to "user-friendly" ones later on
|
|
22
|
+
private _fasta = 'fasta';
|
|
23
|
+
private _separator = 'separator';
|
|
24
|
+
private _helm = 'helm';
|
|
25
|
+
|
|
26
|
+
public isFasta(): boolean { return this.sourceNotation == this._fasta; }
|
|
27
|
+
public isSeparator(): boolean { return this.sourceNotation == this._separator; }
|
|
28
|
+
public isHelm(): boolean { return this.sourceNotation == this._helm; }
|
|
29
|
+
|
|
30
|
+
private determineSourceNotation() : string {
|
|
31
|
+
if (this.currentUnits.toLowerCase().startsWith('fasta'))
|
|
32
|
+
return 'fasta';
|
|
33
|
+
else if (this.currentUnits.toLowerCase().startsWith('separator'))
|
|
34
|
+
return 'separator';
|
|
35
|
+
else
|
|
36
|
+
// TODO: handle possible exceptions
|
|
37
|
+
return 'HELM';
|
|
38
|
+
}
|
|
39
|
+
|
|
40
|
+
private convertFastaToSeparator(): DG.Column {
|
|
41
|
+
// TODO: implementation
|
|
42
|
+
const len = this.sourceColumn.length;
|
|
43
|
+
const newColName = 'converted';
|
|
44
|
+
const newColumn = DG.Column.fromList('string', newColName, new Array(len).fill('fasta2sep'));
|
|
45
|
+
newColumn.semType = 'Macromolecule';
|
|
46
|
+
return newColumn;
|
|
47
|
+
}
|
|
48
|
+
|
|
49
|
+
private convertFastaToHelm(): DG.Column {
|
|
50
|
+
// TODO: implementation
|
|
51
|
+
const len = this.sourceColumn.length;
|
|
52
|
+
const newColName = 'converted';
|
|
53
|
+
const newColumn = DG.Column.fromList('string', newColName, new Array(len).fill('fasta2helm'));
|
|
54
|
+
newColumn.semType = 'Macromolecule';
|
|
55
|
+
return newColumn;
|
|
56
|
+
}
|
|
57
|
+
|
|
58
|
+
private convertSeparatorToFasta(): DG.Column {
|
|
59
|
+
// TODO: implementation
|
|
60
|
+
const len = this.sourceColumn.length;
|
|
61
|
+
const newColName = 'converted';
|
|
62
|
+
const newColumn = DG.Column.fromList('string', newColName, new Array(len).fill('sep2fasta'));
|
|
63
|
+
newColumn.semType = 'Macromolecule';
|
|
64
|
+
return newColumn;
|
|
65
|
+
}
|
|
66
|
+
|
|
67
|
+
private convertSeparatorToHelm(): DG.Column {
|
|
68
|
+
// TODO: implementation
|
|
69
|
+
const len = this.sourceColumn.length;
|
|
70
|
+
const newColName = 'converted';
|
|
71
|
+
const newColumn = DG.Column.fromList('string', newColName, new Array(len).fill('sep2helm'));
|
|
72
|
+
newColumn.semType = 'Macromolecule';
|
|
73
|
+
return newColumn;
|
|
74
|
+
}
|
|
75
|
+
|
|
76
|
+
private convertHelmToFasta(): DG.Column {
|
|
77
|
+
// TODO: implementation
|
|
78
|
+
const len = this.sourceColumn.length;
|
|
79
|
+
const newColName = 'converted';
|
|
80
|
+
const newColumn = DG.Column.fromList('string', newColName, new Array(len).fill('helm2fasta'));
|
|
81
|
+
newColumn.semType = 'Macromolecule';
|
|
82
|
+
return newColumn;
|
|
83
|
+
}
|
|
84
|
+
|
|
85
|
+
private convertHelmToSeparator(): DG.Column {
|
|
86
|
+
// TODO: implementation
|
|
87
|
+
const len = this.sourceColumn.length;
|
|
88
|
+
const newColName = 'converted';
|
|
89
|
+
const newColumn = DG.Column.fromList('string', newColName, new Array(len).fill('helm2sep'));
|
|
90
|
+
newColumn.semType = 'Macromolecule';
|
|
91
|
+
return newColumn;
|
|
92
|
+
}
|
|
93
|
+
|
|
94
|
+
// TODO: write the bodies of converter methods
|
|
95
|
+
public convert() : DG.Column {
|
|
96
|
+
if (
|
|
97
|
+
this.sourceNotation == this._fasta &&
|
|
98
|
+
this.targetNotation == this._separator
|
|
99
|
+
)
|
|
100
|
+
return this.convertFastaToSeparator();
|
|
101
|
+
else if (
|
|
102
|
+
this.sourceNotation == this._fasta &&
|
|
103
|
+
this.targetNotation == this._helm
|
|
104
|
+
)
|
|
105
|
+
return this.convertFastaToHelm();
|
|
106
|
+
else if (
|
|
107
|
+
this.sourceNotation == this._separator &&
|
|
108
|
+
this.targetNotation == this._fasta
|
|
109
|
+
)
|
|
110
|
+
return this.convertSeparatorToFasta();
|
|
111
|
+
else if (
|
|
112
|
+
this.sourceNotation == this._separator &&
|
|
113
|
+
this.targetNotation == this._helm
|
|
114
|
+
)
|
|
115
|
+
return this.convertSeparatorToHelm();
|
|
116
|
+
else if (
|
|
117
|
+
this.sourceNotation == this._helm &&
|
|
118
|
+
this.targetNotation == this._fasta
|
|
119
|
+
)
|
|
120
|
+
return this.convertHelmToFasta();
|
|
121
|
+
else
|
|
122
|
+
return this.convertHelmToSeparator();
|
|
123
|
+
}
|
|
124
|
+
|
|
125
|
+
public constructor(col: DG.Column, target: string) {
|
|
126
|
+
this._sourceColumn = col;
|
|
127
|
+
this._currentUnits = this._sourceColumn.tags[DG.TAGS.UNITS];
|
|
128
|
+
this._sourceNotation = this.determineSourceNotation();
|
|
129
|
+
this._targetNotation = target;
|
|
130
|
+
}
|
|
131
|
+
}
|
|
@@ -18,9 +18,9 @@ export async function sequenceSpace(spaceParams: ISequenceSpaceParams): Promise<
|
|
|
18
18
|
const sepFinal = sep ? sep === '.' ? '\\\.' : sep : '-';
|
|
19
19
|
const regex = new RegExp(sepFinal, 'g');
|
|
20
20
|
if (Object.keys(AvailableMetrics['String']).includes(spaceParams.similarityMetric))
|
|
21
|
-
preparedData = spaceParams.seqCol.toList().map((v) => v.replace(regex, '')) as string[];
|
|
21
|
+
preparedData = spaceParams.seqCol.toList().map((v: string) => v.replace(regex, '')) as string[];
|
|
22
22
|
else
|
|
23
|
-
preparedData = spaceParams.seqCol.toList().map((v) => v.replace(regex, '')) as string[];
|
|
23
|
+
preparedData = spaceParams.seqCol.toList().map((v: string) => v.replace(regex, '')) as string[];
|
|
24
24
|
} else {
|
|
25
25
|
preparedData = spaceParams.seqCol.toList();
|
|
26
26
|
}
|
|
@@ -31,13 +31,13 @@ export async function sequenceSpace(spaceParams: ISequenceSpaceParams): Promise<
|
|
|
31
31
|
spaceParams.similarityMetric as StringMetrics | BitArrayMetrics,
|
|
32
32
|
spaceParams.options);
|
|
33
33
|
const cols: DG.Column[] = spaceParams.embedAxesNames.map(
|
|
34
|
-
(name, index) => DG.Column.fromFloat32Array(name, sequenceSpaceResult.embedding[index]));
|
|
34
|
+
(name: string, index: number) => DG.Column.fromFloat32Array(name, sequenceSpaceResult.embedding[index]));
|
|
35
35
|
return {distance: sequenceSpaceResult.distance, coordinates: new DG.ColumnList(cols)};
|
|
36
36
|
}
|
|
37
37
|
|
|
38
38
|
|
|
39
39
|
export function getEmbeddingColsNames(df: DG.DataFrame) {
|
|
40
40
|
const axes = ['Embed_X', 'Embed_Y'];
|
|
41
|
-
const colNameInd = df.columns.names().filter((it) => it.includes(axes[0])).length + 1;
|
|
41
|
+
const colNameInd = df.columns.names().filter((it: string) => it.includes(axes[0])).length + 1;
|
|
42
42
|
return axes.map((it) => `${it}_${colNameInd}`);
|
|
43
43
|
}
|
|
@@ -0,0 +1,40 @@
|
|
|
1
|
+
import * as DG from 'datagrok-api/dg';
|
|
2
|
+
import { WebLogo, SplitterFunc } from '@datagrok-libraries/bio/src/viewers/web-logo';
|
|
3
|
+
import * as grok from 'datagrok-api/grok';
|
|
4
|
+
|
|
5
|
+
export const HELM_CORE_LIB_MONOMER_COL = 'symbol';
|
|
6
|
+
export const HELM_CORE_LIB_MOLFILE_COL = 'molfile';
|
|
7
|
+
export const HELM_CORE_LIB_FILENAME = '/samples/HELMCoreLibrary.json';
|
|
8
|
+
|
|
9
|
+
export function getMolfilesFromSeq(col: DG.Column, monomersLib: DG.DataFrame): string[][] | null {
|
|
10
|
+
const units = col.tags[DG.TAGS.UNITS];
|
|
11
|
+
const sep = col.getTag('separator');
|
|
12
|
+
const splitterFunc: SplitterFunc = WebLogo.getSplitter(units, sep);
|
|
13
|
+
const monomersDict = createMomomersMolDict(monomersLib);
|
|
14
|
+
const molFiles = [];
|
|
15
|
+
for (let i = 0; i < col.length; ++i) {
|
|
16
|
+
const monomers = splitterFunc(col.get(i));
|
|
17
|
+
const molFilesForSeq = [];
|
|
18
|
+
for (let j = 0; j < monomers.length; ++j) {
|
|
19
|
+
if (monomers[j]) {
|
|
20
|
+
if (!monomersDict[monomers[j]]) {
|
|
21
|
+
grok.shell.warning(`Monomer ${monomers[j]} is missing in HELM library. Structure cannot be created`);
|
|
22
|
+
return null;
|
|
23
|
+
}
|
|
24
|
+
molFilesForSeq.push(monomersDict[monomers[j]])
|
|
25
|
+
}
|
|
26
|
+
}
|
|
27
|
+
molFiles.push(molFilesForSeq);
|
|
28
|
+
}
|
|
29
|
+
return molFiles;
|
|
30
|
+
}
|
|
31
|
+
|
|
32
|
+
export function createMomomersMolDict(lib: DG.DataFrame): {[key: string]: string} {
|
|
33
|
+
const dict: {[key: string]: string} = {};
|
|
34
|
+
const monmersCol = lib.col(HELM_CORE_LIB_MONOMER_COL);
|
|
35
|
+
const molCol = lib.col(HELM_CORE_LIB_MOLFILE_COL);
|
|
36
|
+
for (let i = 0; i < lib.rowCount; ++i) {
|
|
37
|
+
dict[monmersCol!.get(i)] = molCol!.get(i);
|
|
38
|
+
}
|
|
39
|
+
return dict;
|
|
40
|
+
}
|
package/tsconfig.json
CHANGED
|
@@ -6,7 +6,7 @@
|
|
|
6
6
|
// "incremental": true, /* Enable incremental compilation */
|
|
7
7
|
"target": "es6", /* Specify ECMAScript target version: 'ES3' (default), 'ES5', 'ES2015', 'ES2016', 'ES2017', 'ES2018', 'ES2019', 'ES2020', or 'ESNEXT'. */
|
|
8
8
|
"module": "es2020", /* Specify module code generation: 'none', 'commonjs', 'amd', 'system', 'umd', 'es2015', 'es2020', or 'ESNext'. */
|
|
9
|
-
"lib": ["es2020", "dom"], /* Specify library files to be included in the compilation. */
|
|
9
|
+
"lib": ["es2020", "dom", "ES2021.String"], /* Specify library files to be included in the compilation. */
|
|
10
10
|
// "allowJs": true, /* Allow javascript files to be compiled. */
|
|
11
11
|
// "checkJs": true, /* Report errors in .js files. */
|
|
12
12
|
// "jsx": "preserve", /* Specify JSX code generation: 'preserve', 'react-native', 'react', 'react-jsx' or 'react-jsxdev'. */
|