@datagrok/bio 1.4.2 → 1.5.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/detectors.js +17 -4
- package/dist/package-test.js +862 -635
- package/dist/package.js +664 -584
- package/dist/vendors-node_modules_datagrok-libraries_ml_src_workers_dimensionality-reducer_js.js +1665 -1651
- package/files/sample_MSA.csv +541 -0
- package/files/samples/id.csv +313 -0
- package/package.json +7 -6
- package/setup.cmd +10 -1
- package/src/package-test.ts +1 -0
- package/src/package.ts +70 -25
- package/src/tests/activity-cliffs-tests.ts +49 -0
- package/src/tests/detectors-test.ts +138 -34
- package/src/tests/sequence-space-test.ts +21 -19
- package/src/tests/utils.ts +9 -3
- package/src/utils/convert.ts +8 -9
- package/src/utils/multiple-sequence-alignment.ts +1 -1
- package/src/utils/sequence-activity-cliffs.ts +36 -0
- package/src/utils/sequence-space.ts +30 -30
|
@@ -7,6 +7,8 @@ import * as DG from 'datagrok-api/dg';
|
|
|
7
7
|
import {mmSemType} from '../const';
|
|
8
8
|
import {importFasta} from '../package';
|
|
9
9
|
|
|
10
|
+
type DfReaderFunc = () => Promise<DG.DataFrame>;
|
|
11
|
+
|
|
10
12
|
category('detectors', () => {
|
|
11
13
|
const csvDf1: string = `col1
|
|
12
14
|
1
|
|
@@ -87,36 +89,126 @@ YNR-WYV-KHP
|
|
|
87
89
|
MWRSWY-CKHP
|
|
88
90
|
`;
|
|
89
91
|
|
|
90
|
-
|
|
91
|
-
|
|
92
|
-
|
|
93
|
-
|
|
94
|
-
|
|
95
|
-
|
|
96
|
-
|
|
97
|
-
|
|
98
|
-
|
|
99
|
-
|
|
100
|
-
|
|
101
|
-
|
|
102
|
-
|
|
103
|
-
|
|
92
|
+
const enum Samples {
|
|
93
|
+
peptidesComplex = 'PeptidesComplex',
|
|
94
|
+
fastaCsv = 'FastaCsv',
|
|
95
|
+
msaComplex = 'MsaComplex',
|
|
96
|
+
idCsv = 'IdCsv',
|
|
97
|
+
}
|
|
98
|
+
|
|
99
|
+
const samples: { [key: string]: string } = {
|
|
100
|
+
'PeptidesComplex': 'System:AppData/Bio/samples/peptides_complex_msa.csv',
|
|
101
|
+
'FastaCsv': 'System:AppData/Bio/samples/sample_FASTA.csv',
|
|
102
|
+
'MsaComplex': 'System:AppData/Bio/samples/sample_MSA.csv',
|
|
103
|
+
'IdCsv': 'System:AppData/Bio/samples/id.csv',
|
|
104
|
+
};
|
|
105
|
+
|
|
106
|
+
const _samplesDfs: { [key: string]: Promise<DG.DataFrame> } = {};
|
|
107
|
+
const readSamplesCsv: (key: string) => DfReaderFunc = (key: string) => {
|
|
108
|
+
return async () => {
|
|
109
|
+
if (!(key in _samplesDfs)) {
|
|
110
|
+
_samplesDfs[key] = (async (): Promise<DG.DataFrame> => {
|
|
111
|
+
const csv: string = await grok.dapi.files.readAsText(samples[key]);
|
|
112
|
+
const df: DG.DataFrame = DG.DataFrame.fromCsv(csv);
|
|
113
|
+
await grok.data.detectSemanticTypes(df);
|
|
114
|
+
return df;
|
|
115
|
+
})();
|
|
116
|
+
}
|
|
117
|
+
return _samplesDfs[key];
|
|
118
|
+
};
|
|
119
|
+
};
|
|
120
|
+
|
|
121
|
+
const _csvDfs: { [key: string]: Promise<DG.DataFrame> } = {};
|
|
122
|
+
const readCsv: (key: string, csv: string) => DfReaderFunc = (key: string, csv: string) => {
|
|
123
|
+
return async () => {
|
|
124
|
+
if (!(key in _csvDfs)) {
|
|
125
|
+
_csvDfs[key] = (async (): Promise<DG.DataFrame> => {
|
|
126
|
+
const df: DG.DataFrame = DG.DataFrame.fromCsv(csv);
|
|
127
|
+
await grok.data.detectSemanticTypes(df);
|
|
128
|
+
return df;
|
|
129
|
+
})();
|
|
130
|
+
}
|
|
131
|
+
return _csvDfs[key];
|
|
132
|
+
};
|
|
133
|
+
};
|
|
134
|
+
|
|
135
|
+
test('Negative1', async () => { await _testNeg(readCsv('csvDf1', csvDf1), 'col1'); });
|
|
136
|
+
test('Negative2', async () => { await _testNeg(readCsv('csvDf2', csvDf2), 'col1'); });
|
|
137
|
+
test('Negative3', async () => { await _testNeg(readCsv('csvDf3', csvDf3), 'col1'); });
|
|
138
|
+
test('NegativeSmiles', async () => { await _testNeg(readCsv('csvDfSmiles', csvDfSmiles), 'col1'); });
|
|
139
|
+
|
|
140
|
+
test('N1', async () => { await _testN1(csvDfN1); });
|
|
141
|
+
test('AA1', async () => { await _testAA1(csvDfAA1); });
|
|
142
|
+
test('MsaN1', async () => { await _testMsaN1(csvDfMsaN1); });
|
|
143
|
+
test('MsaAA1', async () => { await _testMsaAA1(csvDfMsaAA1); });
|
|
144
|
+
|
|
145
|
+
test('SepNt', async () => { await _testSepNt(csvDfSepNt, '*'); });
|
|
146
|
+
test('SepPt', async () => { await _testSepPt(csvDfSepPt, '-'); });
|
|
147
|
+
test('SepUn1', async () => { await _testSepUn(csvDfSepUn1, '-'); });
|
|
148
|
+
test('SepUn2', async () => { await _testSepUn(csvDfSepUn2, '/'); });
|
|
149
|
+
|
|
150
|
+
test('SepMsaN1', async () => { await _testSepMsaN1(csvDfSepMsaN1); });
|
|
151
|
+
|
|
152
|
+
test('SamplesFastaCsvPt', async () => {
|
|
153
|
+
await _testSamplesFastaCsvPt();
|
|
154
|
+
});
|
|
155
|
+
test('SamplesFastaCsvNegativeEntry', async () => {
|
|
156
|
+
await _testNeg(readSamplesCsv(Samples.fastaCsv), 'Entry');
|
|
157
|
+
});
|
|
158
|
+
test('SamplesFastaCsvNegativeLength', async () => {
|
|
159
|
+
await _testNeg(readSamplesCsv(Samples.fastaCsv), 'Length');
|
|
160
|
+
});
|
|
161
|
+
test('SamplesFastaCsvNegativeUniProtKB', async () => {
|
|
162
|
+
await _testNeg(readSamplesCsv(Samples.fastaCsv), 'UniProtKB');
|
|
163
|
+
});
|
|
164
|
+
|
|
165
|
+
test('SamplesFastaFastaPt', async () => { await _testSamplesFastaFastaPt(); });
|
|
166
|
+
|
|
167
|
+
// System:AppData/Bio/samples/peptides_complex_align.csv contains monomers with spaces
|
|
168
|
+
// test('SamplesPeptidesComplexUn', async () => {
|
|
169
|
+
// await _testSamplesPeptidesComplexUn();
|
|
170
|
+
// });
|
|
171
|
+
|
|
172
|
+
test('samplesPeptidesComplexNegativeID', async () => {
|
|
173
|
+
await _testNeg(readSamplesCsv(Samples.peptidesComplex), 'ID');
|
|
174
|
+
});
|
|
175
|
+
test('SamplesPeptidesComplexNegativeMeasured', async () => {
|
|
176
|
+
await _testNeg(readSamplesCsv(Samples.peptidesComplex), 'Measured');
|
|
177
|
+
});
|
|
178
|
+
test('SamplesPeptidesComplexNegativeValue', async () => {
|
|
179
|
+
await _testNeg(readSamplesCsv(Samples.peptidesComplex), 'Value');
|
|
180
|
+
});
|
|
181
|
+
|
|
182
|
+
test('samplesMsaComplexUn', async () => {
|
|
183
|
+
await _testPos(readSamplesCsv(Samples.msaComplex), 'MSA', 'separator:SEQ.MSA:UN', '/');
|
|
184
|
+
});
|
|
185
|
+
test('samplesMsaComplexNegativeActivity', async () => {
|
|
186
|
+
await _testNeg(readSamplesCsv(Samples.msaComplex), 'Activity');
|
|
187
|
+
});
|
|
188
|
+
|
|
189
|
+
test('samplesIdCsvNegativeID', async () => {
|
|
190
|
+
await _testNeg(readSamplesCsv(Samples.idCsv), 'ID');
|
|
191
|
+
});
|
|
192
|
+
});
|
|
104
193
|
|
|
105
|
-
|
|
194
|
+
export async function _testNeg(readDf: DfReaderFunc, colName: string) {
|
|
195
|
+
const df: DG.DataFrame = await readDf();
|
|
106
196
|
|
|
107
|
-
|
|
108
|
-
|
|
109
|
-
}
|
|
197
|
+
const col: DG.Column = df.col(colName)!;
|
|
198
|
+
expect(col.semType === mmSemType, false);
|
|
199
|
+
}
|
|
110
200
|
|
|
111
|
-
export async function
|
|
112
|
-
const df: DG.DataFrame =
|
|
113
|
-
await grok.data.detectSemanticTypes(df);
|
|
201
|
+
export async function _testPos(readDf: DfReaderFunc, colName: string, units: string, separator: string) {
|
|
202
|
+
const df: DG.DataFrame = await readDf();
|
|
114
203
|
|
|
115
|
-
const
|
|
116
|
-
expect(
|
|
204
|
+
const col: DG.Column = df.col(colName)!;
|
|
205
|
+
expect(col.semType === mmSemType, true);
|
|
206
|
+
expect(col.getTag(DG.TAGS.UNITS), units);
|
|
207
|
+
if (separator)
|
|
208
|
+
expect(col.getTag('separator'), separator);
|
|
117
209
|
}
|
|
118
210
|
|
|
119
|
-
export async function
|
|
211
|
+
export async function _testN1(csvDfN1: string) {
|
|
120
212
|
const dfN1: DG.DataFrame = DG.DataFrame.fromCsv(csvDfN1);
|
|
121
213
|
await grok.data.detectSemanticTypes(dfN1);
|
|
122
214
|
|
|
@@ -125,7 +217,7 @@ export async function _testDetectorsN1(csvDfN1: string) {
|
|
|
125
217
|
expect(col.getTag(DG.TAGS.UNITS), 'fasta:SEQ:NT');
|
|
126
218
|
}
|
|
127
219
|
|
|
128
|
-
export async function
|
|
220
|
+
export async function _testAA1(csvDfAA1: string) {
|
|
129
221
|
const dfAA1: DG.DataFrame = DG.DataFrame.fromCsv(csvDfAA1);
|
|
130
222
|
await grok.data.detectSemanticTypes(dfAA1);
|
|
131
223
|
|
|
@@ -134,7 +226,7 @@ export async function _testDetectorsAA1(csvDfAA1: string) {
|
|
|
134
226
|
expect(col.getTag(DG.TAGS.UNITS), 'fasta:SEQ:PT');
|
|
135
227
|
}
|
|
136
228
|
|
|
137
|
-
export async function
|
|
229
|
+
export async function _testMsaN1(csvDfMsaN1: string) {
|
|
138
230
|
const dfMsaN1: DG.DataFrame = DG.DataFrame.fromCsv(csvDfMsaN1);
|
|
139
231
|
await grok.data.detectSemanticTypes(dfMsaN1);
|
|
140
232
|
|
|
@@ -143,7 +235,7 @@ export async function _testDetectorsMsaN1(csvDfMsaN1: string) {
|
|
|
143
235
|
expect(col.getTag(DG.TAGS.UNITS), 'fasta:SEQ.MSA:NT');
|
|
144
236
|
}
|
|
145
237
|
|
|
146
|
-
export async function
|
|
238
|
+
export async function _testMsaAA1(csvDfMsaAA1: string) {
|
|
147
239
|
const dfMsaAA1: DG.DataFrame = DG.DataFrame.fromCsv(csvDfMsaAA1);
|
|
148
240
|
await grok.data.detectSemanticTypes(dfMsaAA1);
|
|
149
241
|
|
|
@@ -152,7 +244,7 @@ export async function _testDetectorsMsaAA1(csvDfMsaAA1: string) {
|
|
|
152
244
|
expect(col.getTag(DG.TAGS.UNITS), 'fasta:SEQ.MSA:PT');
|
|
153
245
|
}
|
|
154
246
|
|
|
155
|
-
export async function
|
|
247
|
+
export async function _testSepNt(csv: string, separator: string) {
|
|
156
248
|
const df: DG.DataFrame = DG.DataFrame.fromCsv(csv);
|
|
157
249
|
await grok.data.detectSemanticTypes(df);
|
|
158
250
|
|
|
@@ -162,7 +254,7 @@ export async function _testDetectorsSepNt(csv: string, separator: string) {
|
|
|
162
254
|
expect(col.getTag('separator'), separator);
|
|
163
255
|
}
|
|
164
256
|
|
|
165
|
-
export async function
|
|
257
|
+
export async function _testSepPt(csv: string, separator: string) {
|
|
166
258
|
const df: DG.DataFrame = DG.DataFrame.fromCsv(csv);
|
|
167
259
|
await grok.data.detectSemanticTypes(df);
|
|
168
260
|
|
|
@@ -172,7 +264,7 @@ export async function _testDetectorsSepPt(csv: string, separator: string) {
|
|
|
172
264
|
expect(col.getTag('separator'), separator);
|
|
173
265
|
}
|
|
174
266
|
|
|
175
|
-
export async function
|
|
267
|
+
export async function _testSepUn(csv: string, separator: string) {
|
|
176
268
|
const df: DG.DataFrame = DG.DataFrame.fromCsv(csv);
|
|
177
269
|
await grok.data.detectSemanticTypes(df);
|
|
178
270
|
|
|
@@ -182,7 +274,7 @@ export async function _testDetectorsSepUn(csv: string, separator: string) {
|
|
|
182
274
|
expect(col.getTag('separator'), separator);
|
|
183
275
|
}
|
|
184
276
|
|
|
185
|
-
export async function
|
|
277
|
+
export async function _testSepMsaN1(csvDfSepMsaN1: string) {
|
|
186
278
|
const dfSepMsaN1: DG.DataFrame = DG.DataFrame.fromCsv(csvDfSepMsaN1);
|
|
187
279
|
await grok.data.detectSemanticTypes(dfSepMsaN1);
|
|
188
280
|
|
|
@@ -191,7 +283,7 @@ export async function _testDetectorsSepMsaN1(csvDfSepMsaN1: string) {
|
|
|
191
283
|
expect(col.getTag(DG.TAGS.UNITS), 'separator:SEQ.MSA:NT');
|
|
192
284
|
}
|
|
193
285
|
|
|
194
|
-
export async function
|
|
286
|
+
export async function _testSamplesFastaCsvPt() {
|
|
195
287
|
const csv: string = await grok.dapi.files.readAsText('System:AppData/Bio/samples/sample_FASTA.csv');
|
|
196
288
|
const df: DG.DataFrame = DG.DataFrame.fromCsv(csv);
|
|
197
289
|
await grok.data.detectSemanticTypes(df);
|
|
@@ -202,7 +294,7 @@ export async function _testDetectorsSamplesFastaCsvPt() {
|
|
|
202
294
|
expect(col.getTag('separator'), null);
|
|
203
295
|
}
|
|
204
296
|
|
|
205
|
-
export async function
|
|
297
|
+
export async function _testSamplesFastaFastaPt() {
|
|
206
298
|
const fasta: string = await grok.dapi.files.readAsText('System:AppData/Bio/samples/sample_FASTA.fasta');
|
|
207
299
|
const df: DG.DataFrame = importFasta(fasta)[0];
|
|
208
300
|
|
|
@@ -210,4 +302,16 @@ export async function _testDetectorsSamplesFastaFastaPt() {
|
|
|
210
302
|
expect(col.semType, mmSemType);
|
|
211
303
|
expect(col.getTag(DG.TAGS.UNITS), 'fasta:SEQ:PT');
|
|
212
304
|
expect(col.getTag('separator'), null);
|
|
213
|
-
}
|
|
305
|
+
}
|
|
306
|
+
|
|
307
|
+
export async function _testSamplesPeptidesComplexUn() {
|
|
308
|
+
const csv: string = await grok.dapi.files.readAsText('System:AppData/Bio/samples/peptides_complex_aligned.csv');
|
|
309
|
+
const df: DG.DataFrame = DG.DataFrame.fromCsv(csv);
|
|
310
|
+
await grok.data.detectSemanticTypes(df);
|
|
311
|
+
|
|
312
|
+
const col: DG.Column = df.col('AlignedSequence')!;
|
|
313
|
+
expect(col.semType, mmSemType);
|
|
314
|
+
expect(col.getTag(DG.TAGS.UNITS), 'separator:SEQ.MSA:UN');
|
|
315
|
+
expect(col.getTag('separator'), '-');
|
|
316
|
+
}
|
|
317
|
+
|
|
@@ -1,24 +1,26 @@
|
|
|
1
1
|
import {before, category, test, expect} from '@datagrok-libraries/utils/src/test';
|
|
2
|
-
import * as DG from
|
|
3
|
-
import {
|
|
4
|
-
import {
|
|
2
|
+
import * as DG from 'datagrok-api/dg';
|
|
3
|
+
import {sequenceSpace} from '../utils/sequence-space';
|
|
4
|
+
import {readDataframe} from './utils';
|
|
5
5
|
//import * as grok from 'datagrok-api/grok';
|
|
6
6
|
|
|
7
7
|
category('sequenceSpace', async () => {
|
|
8
|
+
let testFastaDf: DG.DataFrame;
|
|
8
9
|
|
|
9
|
-
|
|
10
|
-
|
|
11
|
-
|
|
12
|
-
|
|
13
|
-
|
|
14
|
-
|
|
15
|
-
|
|
16
|
-
|
|
17
|
-
|
|
18
|
-
|
|
19
|
-
|
|
20
|
-
|
|
21
|
-
|
|
22
|
-
|
|
23
|
-
|
|
24
|
-
});
|
|
10
|
+
before(async () => {
|
|
11
|
+
testFastaDf = await readDataframe('sample_FASTA.csv');
|
|
12
|
+
});
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
test('sequenceSpaceOpens', async () => {
|
|
16
|
+
const sequenceSpaceParams = {
|
|
17
|
+
seqCol: testFastaDf.col('Sequence')!,
|
|
18
|
+
methodName: 't-SNE',
|
|
19
|
+
similarityMetric: 'Levenshtein',
|
|
20
|
+
embedAxesNames: ['Embed_X', 'Embed_Y']
|
|
21
|
+
};
|
|
22
|
+
const res = await sequenceSpace(sequenceSpaceParams);
|
|
23
|
+
expect(res.coordinates != undefined, true);
|
|
24
|
+
expect(res.distance != undefined, true);
|
|
25
|
+
});
|
|
26
|
+
});
|
package/src/tests/utils.ts
CHANGED
|
@@ -1,8 +1,8 @@
|
|
|
1
1
|
import * as DG from 'datagrok-api/dg';
|
|
2
|
-
import * as grok from
|
|
2
|
+
import * as grok from 'datagrok-api/grok';
|
|
3
3
|
import {expect} from '@datagrok-libraries/utils/src/test';
|
|
4
4
|
import {runKalign} from '../utils/multiple-sequence-alignment';
|
|
5
|
-
import {
|
|
5
|
+
import {_package} from '../package-test';
|
|
6
6
|
|
|
7
7
|
export async function loadFileAsText(name: string): Promise<string> {
|
|
8
8
|
return await _package.files.readAsText(name);
|
|
@@ -15,6 +15,13 @@ export async function readDataframe(tableName: string): Promise<DG.DataFrame> {
|
|
|
15
15
|
return df;
|
|
16
16
|
}
|
|
17
17
|
|
|
18
|
+
export async function createTableView(tableName: string): Promise<DG.TableView> {
|
|
19
|
+
const df = await readDataframe(tableName);
|
|
20
|
+
df.name = tableName.replace('.csv', '');
|
|
21
|
+
const view = grok.shell.addTableView(df);
|
|
22
|
+
return view;
|
|
23
|
+
}
|
|
24
|
+
|
|
18
25
|
|
|
19
26
|
/**
|
|
20
27
|
* Tests if a table has non zero rows and columns.
|
|
@@ -35,5 +42,4 @@ export function _testTableIsNotEmpty(table: DG.DataFrame): void {
|
|
|
35
42
|
export async function _testMSAIsCorrect(col: DG.Column): Promise<void> {
|
|
36
43
|
const msaCol = await runKalign(col, true);
|
|
37
44
|
expect(msaCol.toList().every((v, i) => (v == col.get(i) || v == null)), true);
|
|
38
|
-
|
|
39
45
|
}
|
package/src/utils/convert.ts
CHANGED
|
@@ -2,23 +2,22 @@ import * as DG from 'datagrok-api/dg';
|
|
|
2
2
|
import * as ui from 'datagrok-api/ui';
|
|
3
3
|
|
|
4
4
|
export function convert(col: DG.Column): void {
|
|
5
|
-
|
|
6
5
|
const current = col.tags[DG.TAGS.UNITS];
|
|
7
6
|
//TODO: read all notations
|
|
8
7
|
const notations = ['fasta:SEQ:NT', 'fasta:SEQ:PT', 'fasta:SEQ.MSA:NT', 'fasta:SEQ.MSA:PT', 'HELM'];
|
|
9
|
-
const choices = ui.choiceInput(
|
|
8
|
+
const choices = ui.choiceInput('convert to', '', notations.filter((e) => e !== current));
|
|
10
9
|
|
|
11
10
|
ui.dialog('Convert sequence')
|
|
12
|
-
|
|
11
|
+
.add(
|
|
13
12
|
ui.div([
|
|
14
13
|
ui.h1('current notation'),
|
|
15
14
|
ui.div(current),
|
|
16
15
|
choices.root
|
|
17
16
|
])
|
|
18
|
-
|
|
19
|
-
|
|
20
|
-
|
|
21
|
-
|
|
22
|
-
|
|
23
|
-
|
|
17
|
+
)
|
|
18
|
+
.onOK(() => {
|
|
19
|
+
//TODO: create new converted column
|
|
20
|
+
//col.dataFrame.columns.add();
|
|
21
|
+
})
|
|
22
|
+
.show();
|
|
24
23
|
}
|
|
@@ -56,7 +56,7 @@ export async function runKalign(col: DG.Column, isAligned = false) : Promise<DG.
|
|
|
56
56
|
|
|
57
57
|
const aligned = _fastaToStrings(buf).slice(0, sequences.length);
|
|
58
58
|
const alignedCol = DG.Column.fromStrings(`msa(${col.name})`, aligned);
|
|
59
|
-
alignedCol.setTag(DG.TAGS.UNITS, '');
|
|
59
|
+
alignedCol.setTag(DG.TAGS.UNITS, '');
|
|
60
60
|
alignedCol.semType = C.SEM_TYPES.Macro_Molecule;
|
|
61
61
|
return alignedCol;
|
|
62
62
|
}
|
|
@@ -0,0 +1,36 @@
|
|
|
1
|
+
import {IDrawTooltipParams} from '@datagrok-libraries/ml/src/viewers/activity-cliffs';
|
|
2
|
+
import * as DG from 'datagrok-api/dg';
|
|
3
|
+
import * as ui from 'datagrok-api/ui';
|
|
4
|
+
import {getSimilarityFromDistance} from '@datagrok-libraries/utils/src/similarity-metrics';
|
|
5
|
+
import { AvailableMetrics } from '@datagrok-libraries/ml/src/typed-metrics';
|
|
6
|
+
|
|
7
|
+
export async function sequenceGetSimilarities(col: DG.Column, seq: string): Promise<DG.Column | null>{
|
|
8
|
+
const stringArray = col.toList();
|
|
9
|
+
const distances = new Array(stringArray.length).fill(0.0);
|
|
10
|
+
for (let i = 0; i < stringArray.length; ++i)
|
|
11
|
+
distances[i] = getSimilarityFromDistance(AvailableMetrics['String']['Levenshtein'](stringArray[i], seq));
|
|
12
|
+
return DG.Column.fromList(DG.COLUMN_TYPE.FLOAT, 'distances', distances);
|
|
13
|
+
}
|
|
14
|
+
|
|
15
|
+
export function drawTooltip(params: IDrawTooltipParams) {
|
|
16
|
+
params.tooltips[params.line.id] = ui.divH([]);
|
|
17
|
+
const columnNames = ui.divV([
|
|
18
|
+
ui.divText('sequense'),
|
|
19
|
+
ui.divText(params.activity.name),
|
|
20
|
+
]);
|
|
21
|
+
columnNames.style.fontWeight = 'bold';
|
|
22
|
+
columnNames.style.display = 'flex';
|
|
23
|
+
columnNames.style.justifyContent = 'space-between';
|
|
24
|
+
params.tooltips[params.line.id].append(columnNames);
|
|
25
|
+
params.line.mols.forEach((mol: number) => {
|
|
26
|
+
const seq = ui.divText(params.df.get(params.seqCol.name, mol));
|
|
27
|
+
const activity = ui.divText(params.df.get(params.activity.name, mol).toFixed(2));
|
|
28
|
+
activity.style.display = 'flex';
|
|
29
|
+
activity.style.justifyContent = 'left';
|
|
30
|
+
activity.style.paddingLeft = '30px';
|
|
31
|
+
params.tooltips[params.line.id].append(ui.divV([
|
|
32
|
+
seq,
|
|
33
|
+
activity,
|
|
34
|
+
], {style: {paddingLeft: '5px'}}));
|
|
35
|
+
});
|
|
36
|
+
}
|
|
@@ -1,43 +1,43 @@
|
|
|
1
1
|
import * as DG from 'datagrok-api/dg';
|
|
2
|
-
import {
|
|
2
|
+
import {AvailableMetrics} from '@datagrok-libraries/ml/src/typed-metrics';
|
|
3
3
|
import {reduceDimensinalityWithNormalization} from '@datagrok-libraries/ml/src/sequence-space';
|
|
4
4
|
import {BitArrayMetrics, StringMetrics} from '@datagrok-libraries/ml/src/typed-metrics';
|
|
5
|
-
import {
|
|
5
|
+
import {Matrix} from '@datagrok-libraries/utils/src/type-declarations';
|
|
6
6
|
import BitArray from '@datagrok-libraries/utils/src/bit-array';
|
|
7
|
+
import {ISequenceSpaceParams} from '@datagrok-libraries/ml/src/viewers/activity-cliffs';
|
|
7
8
|
|
|
8
9
|
export interface ISequenceSpaceResult {
|
|
9
10
|
distance: Matrix;
|
|
10
11
|
coordinates: DG.ColumnList;
|
|
11
12
|
}
|
|
12
13
|
|
|
13
|
-
export async function sequenceSpace(
|
|
14
|
-
|
|
15
|
-
|
|
16
|
-
|
|
17
|
-
|
|
18
|
-
|
|
19
|
-
|
|
20
|
-
|
|
21
|
-
|
|
22
|
-
|
|
23
|
-
|
|
24
|
-
|
|
25
|
-
} else {
|
|
26
|
-
preparedData = molColumn.toList();
|
|
27
|
-
}
|
|
28
|
-
|
|
29
|
-
const sequenceSpaceResult = await reduceDimensinalityWithNormalization(
|
|
30
|
-
preparedData,
|
|
31
|
-
methodName,
|
|
32
|
-
similarityMetric as StringMetrics|BitArrayMetrics,
|
|
33
|
-
options);
|
|
34
|
-
const cols: DG.Column[] = axes.map((name, index) => DG.Column.fromFloat32Array(name, sequenceSpaceResult.embedding[index]))
|
|
35
|
-
return {distance: sequenceSpaceResult.distance, coordinates: new DG.ColumnList(cols)};
|
|
14
|
+
export async function sequenceSpace(spaceParams: ISequenceSpaceParams): Promise<ISequenceSpaceResult> {
|
|
15
|
+
let preparedData: any;
|
|
16
|
+
if (!(spaceParams.seqCol!.tags[DG.TAGS.UNITS] === 'HELM')) {
|
|
17
|
+
const sep = spaceParams.seqCol.getTag('separator');
|
|
18
|
+
const sepFinal = sep ? sep === '.' ? '\\\.' : sep : '-';
|
|
19
|
+
const regex = new RegExp(sepFinal, 'g');
|
|
20
|
+
if (Object.keys(AvailableMetrics['String']).includes(spaceParams.similarityMetric))
|
|
21
|
+
preparedData = spaceParams.seqCol.toList().map((v) => v.replace(regex, '')) as string[];
|
|
22
|
+
else
|
|
23
|
+
preparedData = spaceParams.seqCol.toList().map((v) => v.replace(regex, '')) as string[];
|
|
24
|
+
} else {
|
|
25
|
+
preparedData = spaceParams.seqCol.toList();
|
|
36
26
|
}
|
|
37
27
|
|
|
28
|
+
const sequenceSpaceResult = await reduceDimensinalityWithNormalization(
|
|
29
|
+
preparedData,
|
|
30
|
+
spaceParams.methodName,
|
|
31
|
+
spaceParams.similarityMetric as StringMetrics | BitArrayMetrics,
|
|
32
|
+
spaceParams.options);
|
|
33
|
+
const cols: DG.Column[] = spaceParams.embedAxesNames.map(
|
|
34
|
+
(name, index) => DG.Column.fromFloat32Array(name, sequenceSpaceResult.embedding[index]));
|
|
35
|
+
return {distance: sequenceSpaceResult.distance, coordinates: new DG.ColumnList(cols)};
|
|
36
|
+
}
|
|
37
|
+
|
|
38
38
|
|
|
39
|
-
export function getEmbeddingColsNames(df: DG.DataFrame){
|
|
40
|
-
|
|
41
|
-
|
|
42
|
-
|
|
43
|
-
|
|
39
|
+
export function getEmbeddingColsNames(df: DG.DataFrame) {
|
|
40
|
+
const axes = ['Embed_X', 'Embed_Y'];
|
|
41
|
+
const colNameInd = df.columns.names().filter((it) => it.includes(axes[0])).length + 1;
|
|
42
|
+
return axes.map((it) => `${it}_${colNameInd}`);
|
|
43
|
+
}
|