@datagrok/bio 1.4.2 → 1.5.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -7,6 +7,8 @@ import * as DG from 'datagrok-api/dg';
7
7
  import {mmSemType} from '../const';
8
8
  import {importFasta} from '../package';
9
9
 
10
+ type DfReaderFunc = () => Promise<DG.DataFrame>;
11
+
10
12
  category('detectors', () => {
11
13
  const csvDf1: string = `col1
12
14
  1
@@ -87,36 +89,126 @@ YNR-WYV-KHP
87
89
  MWRSWY-CKHP
88
90
  `;
89
91
 
90
- test('testDetectorsNegative1', async () => { await _testDetectorsNegative(csvDf1); });
91
- test('testDetectorsNegative2', async () => { await _testDetectorsNegative(csvDf2); });
92
- test('testDetectorsNegative3', async () => { await _testDetectorsNegative(csvDf3); });
93
- test('testDetectorsNegativeSmiles', async () => { await _testDetectorsNegative(csvDfSmiles); });
94
-
95
- test('testDetectorsN1', async () => { await _testDetectorsN1(csvDfN1); });
96
- test('testDetectorsAA1', async () => { await _testDetectorsAA1(csvDfAA1); });
97
- test('testDetectorsMsaN1', async () => { await _testDetectorsMsaN1(csvDfMsaN1); });
98
- test('testDetectorsMsaAA1', async () => { await _testDetectorsMsaAA1(csvDfMsaAA1); });
99
-
100
- test('testDetectorsSepNt', async () => { await _testDetectorsSepNt(csvDfSepNt, '*'); });
101
- test('testDetectorsSepPt', async () => { await _testDetectorsSepPt(csvDfSepPt, '-'); });
102
- test('testDetectorsSepUn1', async () => { await _testDetectorsSepUn(csvDfSepUn1, '-'); });
103
- test('testDetectorsSepUn2', async () => { await _testDetectorsSepUn(csvDfSepUn2, '/'); });
92
+ const enum Samples {
93
+ peptidesComplex = 'PeptidesComplex',
94
+ fastaCsv = 'FastaCsv',
95
+ msaComplex = 'MsaComplex',
96
+ idCsv = 'IdCsv',
97
+ }
98
+
99
+ const samples: { [key: string]: string } = {
100
+ 'PeptidesComplex': 'System:AppData/Bio/samples/peptides_complex_msa.csv',
101
+ 'FastaCsv': 'System:AppData/Bio/samples/sample_FASTA.csv',
102
+ 'MsaComplex': 'System:AppData/Bio/samples/sample_MSA.csv',
103
+ 'IdCsv': 'System:AppData/Bio/samples/id.csv',
104
+ };
105
+
106
+ const _samplesDfs: { [key: string]: Promise<DG.DataFrame> } = {};
107
+ const readSamplesCsv: (key: string) => DfReaderFunc = (key: string) => {
108
+ return async () => {
109
+ if (!(key in _samplesDfs)) {
110
+ _samplesDfs[key] = (async (): Promise<DG.DataFrame> => {
111
+ const csv: string = await grok.dapi.files.readAsText(samples[key]);
112
+ const df: DG.DataFrame = DG.DataFrame.fromCsv(csv);
113
+ await grok.data.detectSemanticTypes(df);
114
+ return df;
115
+ })();
116
+ }
117
+ return _samplesDfs[key];
118
+ };
119
+ };
120
+
121
+ const _csvDfs: { [key: string]: Promise<DG.DataFrame> } = {};
122
+ const readCsv: (key: string, csv: string) => DfReaderFunc = (key: string, csv: string) => {
123
+ return async () => {
124
+ if (!(key in _csvDfs)) {
125
+ _csvDfs[key] = (async (): Promise<DG.DataFrame> => {
126
+ const df: DG.DataFrame = DG.DataFrame.fromCsv(csv);
127
+ await grok.data.detectSemanticTypes(df);
128
+ return df;
129
+ })();
130
+ }
131
+ return _csvDfs[key];
132
+ };
133
+ };
134
+
135
+ test('Negative1', async () => { await _testNeg(readCsv('csvDf1', csvDf1), 'col1'); });
136
+ test('Negative2', async () => { await _testNeg(readCsv('csvDf2', csvDf2), 'col1'); });
137
+ test('Negative3', async () => { await _testNeg(readCsv('csvDf3', csvDf3), 'col1'); });
138
+ test('NegativeSmiles', async () => { await _testNeg(readCsv('csvDfSmiles', csvDfSmiles), 'col1'); });
139
+
140
+ test('N1', async () => { await _testN1(csvDfN1); });
141
+ test('AA1', async () => { await _testAA1(csvDfAA1); });
142
+ test('MsaN1', async () => { await _testMsaN1(csvDfMsaN1); });
143
+ test('MsaAA1', async () => { await _testMsaAA1(csvDfMsaAA1); });
144
+
145
+ test('SepNt', async () => { await _testSepNt(csvDfSepNt, '*'); });
146
+ test('SepPt', async () => { await _testSepPt(csvDfSepPt, '-'); });
147
+ test('SepUn1', async () => { await _testSepUn(csvDfSepUn1, '-'); });
148
+ test('SepUn2', async () => { await _testSepUn(csvDfSepUn2, '/'); });
149
+
150
+ test('SepMsaN1', async () => { await _testSepMsaN1(csvDfSepMsaN1); });
151
+
152
+ test('SamplesFastaCsvPt', async () => {
153
+ await _testSamplesFastaCsvPt();
154
+ });
155
+ test('SamplesFastaCsvNegativeEntry', async () => {
156
+ await _testNeg(readSamplesCsv(Samples.fastaCsv), 'Entry');
157
+ });
158
+ test('SamplesFastaCsvNegativeLength', async () => {
159
+ await _testNeg(readSamplesCsv(Samples.fastaCsv), 'Length');
160
+ });
161
+ test('SamplesFastaCsvNegativeUniProtKB', async () => {
162
+ await _testNeg(readSamplesCsv(Samples.fastaCsv), 'UniProtKB');
163
+ });
164
+
165
+ test('SamplesFastaFastaPt', async () => { await _testSamplesFastaFastaPt(); });
166
+
167
+ // System:AppData/Bio/samples/peptides_complex_align.csv contains monomers with spaces
168
+ // test('SamplesPeptidesComplexUn', async () => {
169
+ // await _testSamplesPeptidesComplexUn();
170
+ // });
171
+
172
+ test('samplesPeptidesComplexNegativeID', async () => {
173
+ await _testNeg(readSamplesCsv(Samples.peptidesComplex), 'ID');
174
+ });
175
+ test('SamplesPeptidesComplexNegativeMeasured', async () => {
176
+ await _testNeg(readSamplesCsv(Samples.peptidesComplex), 'Measured');
177
+ });
178
+ test('SamplesPeptidesComplexNegativeValue', async () => {
179
+ await _testNeg(readSamplesCsv(Samples.peptidesComplex), 'Value');
180
+ });
181
+
182
+ test('samplesMsaComplexUn', async () => {
183
+ await _testPos(readSamplesCsv(Samples.msaComplex), 'MSA', 'separator:SEQ.MSA:UN', '/');
184
+ });
185
+ test('samplesMsaComplexNegativeActivity', async () => {
186
+ await _testNeg(readSamplesCsv(Samples.msaComplex), 'Activity');
187
+ });
188
+
189
+ test('samplesIdCsvNegativeID', async () => {
190
+ await _testNeg(readSamplesCsv(Samples.idCsv), 'ID');
191
+ });
192
+ });
104
193
 
105
- test('testDetectorsSepMsaN1', async () => { await _testDetectorsSepMsaN1(csvDfSepMsaN1); });
194
+ export async function _testNeg(readDf: DfReaderFunc, colName: string) {
195
+ const df: DG.DataFrame = await readDf();
106
196
 
107
- test('testDetectorsSamplesFastaCsvPt', async () => { await _testDetectorsSamplesFastaCsvPt(); });
108
- test('testDetectorsSamplesFastaFastaPt', async () => { await _testDetectorsSamplesFastaFastaPt(); });
109
- });
197
+ const col: DG.Column = df.col(colName)!;
198
+ expect(col.semType === mmSemType, false);
199
+ }
110
200
 
111
- export async function _testDetectorsNegative(csvDf: string) {
112
- const df: DG.DataFrame = DG.DataFrame.fromCsv(csvDf);
113
- await grok.data.detectSemanticTypes(df);
201
+ export async function _testPos(readDf: DfReaderFunc, colName: string, units: string, separator: string) {
202
+ const df: DG.DataFrame = await readDf();
114
203
 
115
- const col1: DG.Column = df.col('col1')!;
116
- expect(col1.semType == mmSemType, false);
204
+ const col: DG.Column = df.col(colName)!;
205
+ expect(col.semType === mmSemType, true);
206
+ expect(col.getTag(DG.TAGS.UNITS), units);
207
+ if (separator)
208
+ expect(col.getTag('separator'), separator);
117
209
  }
118
210
 
119
- export async function _testDetectorsN1(csvDfN1: string) {
211
+ export async function _testN1(csvDfN1: string) {
120
212
  const dfN1: DG.DataFrame = DG.DataFrame.fromCsv(csvDfN1);
121
213
  await grok.data.detectSemanticTypes(dfN1);
122
214
 
@@ -125,7 +217,7 @@ export async function _testDetectorsN1(csvDfN1: string) {
125
217
  expect(col.getTag(DG.TAGS.UNITS), 'fasta:SEQ:NT');
126
218
  }
127
219
 
128
- export async function _testDetectorsAA1(csvDfAA1: string) {
220
+ export async function _testAA1(csvDfAA1: string) {
129
221
  const dfAA1: DG.DataFrame = DG.DataFrame.fromCsv(csvDfAA1);
130
222
  await grok.data.detectSemanticTypes(dfAA1);
131
223
 
@@ -134,7 +226,7 @@ export async function _testDetectorsAA1(csvDfAA1: string) {
134
226
  expect(col.getTag(DG.TAGS.UNITS), 'fasta:SEQ:PT');
135
227
  }
136
228
 
137
- export async function _testDetectorsMsaN1(csvDfMsaN1: string) {
229
+ export async function _testMsaN1(csvDfMsaN1: string) {
138
230
  const dfMsaN1: DG.DataFrame = DG.DataFrame.fromCsv(csvDfMsaN1);
139
231
  await grok.data.detectSemanticTypes(dfMsaN1);
140
232
 
@@ -143,7 +235,7 @@ export async function _testDetectorsMsaN1(csvDfMsaN1: string) {
143
235
  expect(col.getTag(DG.TAGS.UNITS), 'fasta:SEQ.MSA:NT');
144
236
  }
145
237
 
146
- export async function _testDetectorsMsaAA1(csvDfMsaAA1: string) {
238
+ export async function _testMsaAA1(csvDfMsaAA1: string) {
147
239
  const dfMsaAA1: DG.DataFrame = DG.DataFrame.fromCsv(csvDfMsaAA1);
148
240
  await grok.data.detectSemanticTypes(dfMsaAA1);
149
241
 
@@ -152,7 +244,7 @@ export async function _testDetectorsMsaAA1(csvDfMsaAA1: string) {
152
244
  expect(col.getTag(DG.TAGS.UNITS), 'fasta:SEQ.MSA:PT');
153
245
  }
154
246
 
155
- export async function _testDetectorsSepNt(csv: string, separator: string) {
247
+ export async function _testSepNt(csv: string, separator: string) {
156
248
  const df: DG.DataFrame = DG.DataFrame.fromCsv(csv);
157
249
  await grok.data.detectSemanticTypes(df);
158
250
 
@@ -162,7 +254,7 @@ export async function _testDetectorsSepNt(csv: string, separator: string) {
162
254
  expect(col.getTag('separator'), separator);
163
255
  }
164
256
 
165
- export async function _testDetectorsSepPt(csv: string, separator: string) {
257
+ export async function _testSepPt(csv: string, separator: string) {
166
258
  const df: DG.DataFrame = DG.DataFrame.fromCsv(csv);
167
259
  await grok.data.detectSemanticTypes(df);
168
260
 
@@ -172,7 +264,7 @@ export async function _testDetectorsSepPt(csv: string, separator: string) {
172
264
  expect(col.getTag('separator'), separator);
173
265
  }
174
266
 
175
- export async function _testDetectorsSepUn(csv: string, separator: string) {
267
+ export async function _testSepUn(csv: string, separator: string) {
176
268
  const df: DG.DataFrame = DG.DataFrame.fromCsv(csv);
177
269
  await grok.data.detectSemanticTypes(df);
178
270
 
@@ -182,7 +274,7 @@ export async function _testDetectorsSepUn(csv: string, separator: string) {
182
274
  expect(col.getTag('separator'), separator);
183
275
  }
184
276
 
185
- export async function _testDetectorsSepMsaN1(csvDfSepMsaN1: string) {
277
+ export async function _testSepMsaN1(csvDfSepMsaN1: string) {
186
278
  const dfSepMsaN1: DG.DataFrame = DG.DataFrame.fromCsv(csvDfSepMsaN1);
187
279
  await grok.data.detectSemanticTypes(dfSepMsaN1);
188
280
 
@@ -191,7 +283,7 @@ export async function _testDetectorsSepMsaN1(csvDfSepMsaN1: string) {
191
283
  expect(col.getTag(DG.TAGS.UNITS), 'separator:SEQ.MSA:NT');
192
284
  }
193
285
 
194
- export async function _testDetectorsSamplesFastaCsvPt() {
286
+ export async function _testSamplesFastaCsvPt() {
195
287
  const csv: string = await grok.dapi.files.readAsText('System:AppData/Bio/samples/sample_FASTA.csv');
196
288
  const df: DG.DataFrame = DG.DataFrame.fromCsv(csv);
197
289
  await grok.data.detectSemanticTypes(df);
@@ -202,7 +294,7 @@ export async function _testDetectorsSamplesFastaCsvPt() {
202
294
  expect(col.getTag('separator'), null);
203
295
  }
204
296
 
205
- export async function _testDetectorsSamplesFastaFastaPt() {
297
+ export async function _testSamplesFastaFastaPt() {
206
298
  const fasta: string = await grok.dapi.files.readAsText('System:AppData/Bio/samples/sample_FASTA.fasta');
207
299
  const df: DG.DataFrame = importFasta(fasta)[0];
208
300
 
@@ -210,4 +302,16 @@ export async function _testDetectorsSamplesFastaFastaPt() {
210
302
  expect(col.semType, mmSemType);
211
303
  expect(col.getTag(DG.TAGS.UNITS), 'fasta:SEQ:PT');
212
304
  expect(col.getTag('separator'), null);
213
- }
305
+ }
306
+
307
+ export async function _testSamplesPeptidesComplexUn() {
308
+ const csv: string = await grok.dapi.files.readAsText('System:AppData/Bio/samples/peptides_complex_aligned.csv');
309
+ const df: DG.DataFrame = DG.DataFrame.fromCsv(csv);
310
+ await grok.data.detectSemanticTypes(df);
311
+
312
+ const col: DG.Column = df.col('AlignedSequence')!;
313
+ expect(col.semType, mmSemType);
314
+ expect(col.getTag(DG.TAGS.UNITS), 'separator:SEQ.MSA:UN');
315
+ expect(col.getTag('separator'), '-');
316
+ }
317
+
@@ -1,24 +1,26 @@
1
1
  import {before, category, test, expect} from '@datagrok-libraries/utils/src/test';
2
- import * as DG from "datagrok-api/dg";
3
- import { sequenceSpace } from '../utils/sequence-space';
4
- import { readDataframe } from './utils';
2
+ import * as DG from 'datagrok-api/dg';
3
+ import {sequenceSpace} from '../utils/sequence-space';
4
+ import {readDataframe} from './utils';
5
5
  //import * as grok from 'datagrok-api/grok';
6
6
 
7
7
  category('sequenceSpace', async () => {
8
+ let testFastaDf: DG.DataFrame;
8
9
 
9
- let testFastaDf: DG.DataFrame;
10
-
11
- before(async () => {
12
- //@ts-ignore
13
- testFastaDf = await readDataframe('sample_FASTA.csv');
14
- });
15
-
16
-
17
- test('sequenceSpaceOpens', async () => {
18
- //@ts-ignore
19
- const res = await sequenceSpace(testFastaDf.col('Sequence')!, 't-SNE', 'Levenshtein', ['Embed_X', 'Embed_Y']);
20
- expect(res.coordinates != undefined, true);
21
- expect(res.distance != undefined, true);
22
- });
23
-
24
- });
10
+ before(async () => {
11
+ testFastaDf = await readDataframe('sample_FASTA.csv');
12
+ });
13
+
14
+
15
+ test('sequenceSpaceOpens', async () => {
16
+ const sequenceSpaceParams = {
17
+ seqCol: testFastaDf.col('Sequence')!,
18
+ methodName: 't-SNE',
19
+ similarityMetric: 'Levenshtein',
20
+ embedAxesNames: ['Embed_X', 'Embed_Y']
21
+ };
22
+ const res = await sequenceSpace(sequenceSpaceParams);
23
+ expect(res.coordinates != undefined, true);
24
+ expect(res.distance != undefined, true);
25
+ });
26
+ });
@@ -1,8 +1,8 @@
1
1
  import * as DG from 'datagrok-api/dg';
2
- import * as grok from "datagrok-api/grok";
2
+ import * as grok from 'datagrok-api/grok';
3
3
  import {expect} from '@datagrok-libraries/utils/src/test';
4
4
  import {runKalign} from '../utils/multiple-sequence-alignment';
5
- import { _package} from '../package-test';
5
+ import {_package} from '../package-test';
6
6
 
7
7
  export async function loadFileAsText(name: string): Promise<string> {
8
8
  return await _package.files.readAsText(name);
@@ -15,6 +15,13 @@ export async function readDataframe(tableName: string): Promise<DG.DataFrame> {
15
15
  return df;
16
16
  }
17
17
 
18
+ export async function createTableView(tableName: string): Promise<DG.TableView> {
19
+ const df = await readDataframe(tableName);
20
+ df.name = tableName.replace('.csv', '');
21
+ const view = grok.shell.addTableView(df);
22
+ return view;
23
+ }
24
+
18
25
 
19
26
  /**
20
27
  * Tests if a table has non zero rows and columns.
@@ -35,5 +42,4 @@ export function _testTableIsNotEmpty(table: DG.DataFrame): void {
35
42
  export async function _testMSAIsCorrect(col: DG.Column): Promise<void> {
36
43
  const msaCol = await runKalign(col, true);
37
44
  expect(msaCol.toList().every((v, i) => (v == col.get(i) || v == null)), true);
38
-
39
45
  }
@@ -2,23 +2,22 @@ import * as DG from 'datagrok-api/dg';
2
2
  import * as ui from 'datagrok-api/ui';
3
3
 
4
4
  export function convert(col: DG.Column): void {
5
-
6
5
  const current = col.tags[DG.TAGS.UNITS];
7
6
  //TODO: read all notations
8
7
  const notations = ['fasta:SEQ:NT', 'fasta:SEQ:PT', 'fasta:SEQ.MSA:NT', 'fasta:SEQ.MSA:PT', 'HELM'];
9
- const choices = ui.choiceInput("convert to", "", notations.filter(e => e !== current));
8
+ const choices = ui.choiceInput('convert to', '', notations.filter((e) => e !== current));
10
9
 
11
10
  ui.dialog('Convert sequence')
12
- .add(
11
+ .add(
13
12
  ui.div([
14
13
  ui.h1('current notation'),
15
14
  ui.div(current),
16
15
  choices.root
17
16
  ])
18
- )
19
- .onOK(() => {
20
- //TODO: create new converted column
21
- //col.dataFrame.columns.add();
22
- })
23
- .show();
17
+ )
18
+ .onOK(() => {
19
+ //TODO: create new converted column
20
+ //col.dataFrame.columns.add();
21
+ })
22
+ .show();
24
23
  }
@@ -56,7 +56,7 @@ export async function runKalign(col: DG.Column, isAligned = false) : Promise<DG.
56
56
 
57
57
  const aligned = _fastaToStrings(buf).slice(0, sequences.length);
58
58
  const alignedCol = DG.Column.fromStrings(`msa(${col.name})`, aligned);
59
- alignedCol.setTag(DG.TAGS.UNITS, '');
59
+ alignedCol.setTag(DG.TAGS.UNITS, '');
60
60
  alignedCol.semType = C.SEM_TYPES.Macro_Molecule;
61
61
  return alignedCol;
62
62
  }
@@ -0,0 +1,36 @@
1
+ import {IDrawTooltipParams} from '@datagrok-libraries/ml/src/viewers/activity-cliffs';
2
+ import * as DG from 'datagrok-api/dg';
3
+ import * as ui from 'datagrok-api/ui';
4
+ import {getSimilarityFromDistance} from '@datagrok-libraries/utils/src/similarity-metrics';
5
+ import { AvailableMetrics } from '@datagrok-libraries/ml/src/typed-metrics';
6
+
7
+ export async function sequenceGetSimilarities(col: DG.Column, seq: string): Promise<DG.Column | null>{
8
+ const stringArray = col.toList();
9
+ const distances = new Array(stringArray.length).fill(0.0);
10
+ for (let i = 0; i < stringArray.length; ++i)
11
+ distances[i] = getSimilarityFromDistance(AvailableMetrics['String']['Levenshtein'](stringArray[i], seq));
12
+ return DG.Column.fromList(DG.COLUMN_TYPE.FLOAT, 'distances', distances);
13
+ }
14
+
15
+ export function drawTooltip(params: IDrawTooltipParams) {
16
+ params.tooltips[params.line.id] = ui.divH([]);
17
+ const columnNames = ui.divV([
18
+ ui.divText('sequense'),
19
+ ui.divText(params.activity.name),
20
+ ]);
21
+ columnNames.style.fontWeight = 'bold';
22
+ columnNames.style.display = 'flex';
23
+ columnNames.style.justifyContent = 'space-between';
24
+ params.tooltips[params.line.id].append(columnNames);
25
+ params.line.mols.forEach((mol: number) => {
26
+ const seq = ui.divText(params.df.get(params.seqCol.name, mol));
27
+ const activity = ui.divText(params.df.get(params.activity.name, mol).toFixed(2));
28
+ activity.style.display = 'flex';
29
+ activity.style.justifyContent = 'left';
30
+ activity.style.paddingLeft = '30px';
31
+ params.tooltips[params.line.id].append(ui.divV([
32
+ seq,
33
+ activity,
34
+ ], {style: {paddingLeft: '5px'}}));
35
+ });
36
+ }
@@ -1,43 +1,43 @@
1
1
  import * as DG from 'datagrok-api/dg';
2
- import { AvailableMetrics } from '@datagrok-libraries/ml/src/typed-metrics';
2
+ import {AvailableMetrics} from '@datagrok-libraries/ml/src/typed-metrics';
3
3
  import {reduceDimensinalityWithNormalization} from '@datagrok-libraries/ml/src/sequence-space';
4
4
  import {BitArrayMetrics, StringMetrics} from '@datagrok-libraries/ml/src/typed-metrics';
5
- import { Matrix } from '@datagrok-libraries/utils/src/type-declarations';
5
+ import {Matrix} from '@datagrok-libraries/utils/src/type-declarations';
6
6
  import BitArray from '@datagrok-libraries/utils/src/bit-array';
7
+ import {ISequenceSpaceParams} from '@datagrok-libraries/ml/src/viewers/activity-cliffs';
7
8
 
8
9
  export interface ISequenceSpaceResult {
9
10
  distance: Matrix;
10
11
  coordinates: DG.ColumnList;
11
12
  }
12
13
 
13
- export async function sequenceSpace(molColumn: DG.Column, methodName: string, similarityMetric: string,
14
- axes: string[], options?: any): Promise<ISequenceSpaceResult> {
15
- let preparedData: any;
16
- if (!(molColumn!.tags[DG.TAGS.UNITS] === 'HELM')) {
17
- const sep = molColumn.getTag('separator');
18
- const sepFinal = sep ? sep === '.' ? '\\\.' : sep: '-';
19
- var regex = new RegExp(sepFinal, "g");
20
- if (Object.keys(AvailableMetrics['String']).includes(similarityMetric)) {
21
- preparedData = molColumn.toList().map((v) => v.replace(regex, '')) as string[];
22
- } else {
23
- preparedData = molColumn.toList().map((v) => v.replace(regex, '')) as string[];
24
- }
25
- } else {
26
- preparedData = molColumn.toList();
27
- }
28
-
29
- const sequenceSpaceResult = await reduceDimensinalityWithNormalization(
30
- preparedData,
31
- methodName,
32
- similarityMetric as StringMetrics|BitArrayMetrics,
33
- options);
34
- const cols: DG.Column[] = axes.map((name, index) => DG.Column.fromFloat32Array(name, sequenceSpaceResult.embedding[index]))
35
- return {distance: sequenceSpaceResult.distance, coordinates: new DG.ColumnList(cols)};
14
+ export async function sequenceSpace(spaceParams: ISequenceSpaceParams): Promise<ISequenceSpaceResult> {
15
+ let preparedData: any;
16
+ if (!(spaceParams.seqCol!.tags[DG.TAGS.UNITS] === 'HELM')) {
17
+ const sep = spaceParams.seqCol.getTag('separator');
18
+ const sepFinal = sep ? sep === '.' ? '\\\.' : sep : '-';
19
+ const regex = new RegExp(sepFinal, 'g');
20
+ if (Object.keys(AvailableMetrics['String']).includes(spaceParams.similarityMetric))
21
+ preparedData = spaceParams.seqCol.toList().map((v) => v.replace(regex, '')) as string[];
22
+ else
23
+ preparedData = spaceParams.seqCol.toList().map((v) => v.replace(regex, '')) as string[];
24
+ } else {
25
+ preparedData = spaceParams.seqCol.toList();
36
26
  }
37
27
 
28
+ const sequenceSpaceResult = await reduceDimensinalityWithNormalization(
29
+ preparedData,
30
+ spaceParams.methodName,
31
+ spaceParams.similarityMetric as StringMetrics | BitArrayMetrics,
32
+ spaceParams.options);
33
+ const cols: DG.Column[] = spaceParams.embedAxesNames.map(
34
+ (name, index) => DG.Column.fromFloat32Array(name, sequenceSpaceResult.embedding[index]));
35
+ return {distance: sequenceSpaceResult.distance, coordinates: new DG.ColumnList(cols)};
36
+ }
37
+
38
38
 
39
- export function getEmbeddingColsNames(df: DG.DataFrame){
40
- const axes = ['Embed_X', 'Embed_Y'];
41
- const colNameInd = df.columns.names().filter((it) => it.includes(axes[0])).length + 1;
42
- return axes.map((it) => `${it}_${colNameInd}`);
43
- }
39
+ export function getEmbeddingColsNames(df: DG.DataFrame) {
40
+ const axes = ['Embed_X', 'Embed_Y'];
41
+ const colNameInd = df.columns.names().filter((it) => it.includes(axes[0])).length + 1;
42
+ return axes.map((it) => `${it}_${colNameInd}`);
43
+ }