@datagrok/bio 1.4.2 → 1.5.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/package.json CHANGED
@@ -2,7 +2,7 @@
2
2
  "name": "@datagrok/bio",
3
3
  "beta": false,
4
4
  "friendlyName": "Bio",
5
- "version": "1.4.2",
5
+ "version": "1.5.1",
6
6
  "description": "Bio is a [package](https://datagrok.ai/help/develop/develop#packages) for the [Datagrok](https://datagrok.ai) platform",
7
7
  "repository": {
8
8
  "type": "git",
@@ -11,11 +11,11 @@
11
11
  },
12
12
  "dependencies": {
13
13
  "@biowasm/aioli": ">=2.4.0",
14
- "@datagrok-libraries/bio": "^2.2.0",
15
- "@datagrok-libraries/utils": "^0.4.2",
16
- "@datagrok-libraries/ml": "^2.0.4",
14
+ "@datagrok-libraries/bio": "^2.3.1",
15
+ "@datagrok-libraries/utils": "^1.0.0",
16
+ "@datagrok-libraries/ml": "^2.0.8",
17
17
  "cash-dom": "latest",
18
- "datagrok-api": "^1.4.11",
18
+ "datagrok-api": "^1.4.12",
19
19
  "dayjs": "latest",
20
20
  "ts-loader": "^9.2.5",
21
21
  "typescript": "^4.4.2"
@@ -41,7 +41,8 @@
41
41
  "debug-sequences1": "grok publish --rebuild",
42
42
  "release-sequences1": "grok publish --rebuild --release",
43
43
  "build-sequences1": "webpack",
44
- "local-bio": "grok publish local",
44
+ "debug-local": "grok publish local",
45
+ "release-local": "grok publish local --release",
45
46
  "build": "webpack",
46
47
  "debug-sequences1-public": "grok publish public --rebuild",
47
48
  "release-sequences1-public": "grok publish public --rebuild --release",
@@ -7,6 +7,7 @@ import './tests/Palettes-test';
7
7
  import './tests/detectors-test';
8
8
  import './tests/msa-tests';
9
9
  import './tests/sequence-space-test';
10
+ import './tests/activity-cliffs-tests';
10
11
 
11
12
  export const _package = new DG.Package();
12
13
  export {tests};
package/src/package.ts CHANGED
@@ -2,17 +2,21 @@
2
2
  import * as grok from 'datagrok-api/grok';
3
3
  import * as ui from 'datagrok-api/ui';
4
4
  import * as DG from 'datagrok-api/dg';
5
- import {SequenceAlignment, Aligned} from './seq_align';
6
5
 
7
6
  export const _package = new DG.Package();
8
7
 
9
- import {WebLogo} from '@datagrok-libraries/bio/src/viewers/web-logo';
8
+ import {mmSemType} from './const';
9
+ import {WebLogo, SeqColStats} from '@datagrok-libraries/bio/src/viewers/web-logo';
10
10
  import {VdRegionsViewer} from './viewers/vd-regions-viewer';
11
11
  import {runKalign, testMSAEnoughMemory} from './utils/multiple-sequence-alignment';
12
+ import {SequenceAlignment, Aligned} from './seq_align';
13
+ import {Nucleotides} from '@datagrok-libraries/bio/src/nucleotides';
14
+ import {Aminoacids} from '@datagrok-libraries/bio/src/aminoacids';
12
15
  import {convert} from './utils/convert';
13
- import {TableView} from 'datagrok-api/dg';
14
- import { getEmbeddingColsNames, sequenceSpace } from './utils/sequence-space';
15
- import { AvailableMetrics } from '@datagrok-libraries/ml/src/typed-metrics';
16
+ import {getEmbeddingColsNames, sequenceSpace} from './utils/sequence-space';
17
+ import {AvailableMetrics} from '@datagrok-libraries/ml/src/typed-metrics';
18
+ import {getActivityCliffs} from '@datagrok-libraries/ml/src/viewers/activity-cliffs';
19
+ import {sequenceGetSimilarities, drawTooltip} from './utils/sequence-activity-cliffs';
16
20
 
17
21
  //name: sequenceAlignment
18
22
  //input: string alignType {choices: ['Local alignment', 'Global alignment']}
@@ -48,12 +52,31 @@ export function vdRegionViewer() {
48
52
  //name: Activity Cliffs
49
53
  //description: detect activity cliffs
50
54
  //input: dataframe df [Input data table]
51
- //input: column smiles {type:categorical; semType: Macromolecule}
55
+ //input: column sequence {semType: Macromolecule}
52
56
  //input: column activities
53
57
  //input: double similarity = 80 [Similarity cutoff]
54
58
  //input: string methodName { choices:["UMAP", "t-SNE", "SPE"] }
55
- export async function activityCliffs(df: DG.DataFrame, smiles: DG.Column, activities: DG.Column,
59
+ export async function activityCliffs(df: DG.DataFrame, sequence: DG.Column, activities: DG.Column,
56
60
  similarity: number, methodName: string): Promise<void> {
61
+ const axesNames = getEmbeddingColsNames(df);
62
+ const options = {
63
+ 'SPE': {cycles: 2000, lambda: 1.0, dlambda: 0.0005},
64
+ };
65
+ const units = sequence!.tags[DG.TAGS.UNITS];
66
+ await getActivityCliffs(
67
+ df,
68
+ sequence,
69
+ axesNames,
70
+ activities,
71
+ similarity,
72
+ 'Levenshtein',
73
+ methodName,
74
+ DG.SEMTYPE.MACROMOLECULE,
75
+ units,
76
+ sequenceSpace,
77
+ sequenceGetSimilarities,
78
+ drawTooltip,
79
+ (options as any)[methodName]);
57
80
  }
58
81
 
59
82
  //top-menu: Bio | Sequence Space...
@@ -64,18 +87,24 @@ export async function activityCliffs(df: DG.DataFrame, smiles: DG.Column, activi
64
87
  //input: string similarityMetric { choices:["Levenshtein", "Tanimoto"] }
65
88
  //input: bool plotEmbeddings = true
66
89
  export async function sequenceSpaceTopMenu(table: DG.DataFrame, macroMolecule: DG.Column, methodName: string,
67
- similarityMetric: string = 'Levenshtein', plotEmbeddings: boolean) : Promise<void> {
68
- const embedColsNames = getEmbeddingColsNames(table);
69
- const sequenceSpaceRes = await sequenceSpace(macroMolecule, methodName, similarityMetric, embedColsNames);
70
- const embeddings = sequenceSpaceRes.coordinates;
71
- for (const col of embeddings)
72
- table.columns.add(col);
73
- if (plotEmbeddings) {
74
- for (let v of grok.shell.views) {
75
- if (v.name === table.name)
76
- (v as DG.TableView).scatterPlot({x: embedColsNames[0], y: embedColsNames[1]});
77
- }
78
- }
90
+ similarityMetric: string = 'Levenshtein', plotEmbeddings: boolean): Promise<void> {
91
+ const embedColsNames = getEmbeddingColsNames(table);
92
+ const chemSpaceParams = {
93
+ seqCol: macroMolecule,
94
+ methodName: methodName,
95
+ similarityMetric: similarityMetric,
96
+ embedAxesNames: embedColsNames
97
+ };
98
+ const sequenceSpaceRes = await sequenceSpace(chemSpaceParams);
99
+ const embeddings = sequenceSpaceRes.coordinates;
100
+ for (const col of embeddings)
101
+ table.columns.add(col);
102
+ if (plotEmbeddings) {
103
+ for (const v of grok.shell.views) {
104
+ if (v.name === table.name)
105
+ (v as DG.TableView).scatterPlot({x: embedColsNames[0], y: embedColsNames[1]});
106
+ }
107
+ }
79
108
  };
80
109
 
81
110
  //top-menu: Bio | MSA...
@@ -100,7 +129,7 @@ export async function compositionAnalysis(): Promise<void> {
100
129
  const wl = await col.dataFrame.plot.fromType('WebLogo', {});
101
130
 
102
131
  for (const v of grok.shell.views) {
103
- if (v instanceof TableView && (v as DG.TableView).dataFrame.name === col.dataFrame.name) {
132
+ if (v instanceof DG.TableView && (v as DG.TableView).dataFrame.name === col.dataFrame.name) {
104
133
  (v as DG.TableView).dockManager.dock(wl.root, 'down');
105
134
  break;
106
135
  }
@@ -122,10 +151,10 @@ function parseMacromolecule(
122
151
  //description: Opens FASTA file
123
152
  //tags: file-handler
124
153
  //meta.ext: fasta, fna, ffn, faa, frn, fa
125
- //input: string content
154
+ //input: string fileContent
126
155
  //output: list tables
127
156
  export function importFasta(fileContent: string): DG.DataFrame [] {
128
- const regex = /^>(.*)$/gm; // match the line starting with >
157
+ const regex = /^>(.*)$/gm; // match lines starting with >
129
158
  const descriptionsArray = [];
130
159
  const sequencesArray: string[] = [];
131
160
  let startOfSequence = 0;
@@ -141,6 +170,22 @@ export function importFasta(fileContent: string): DG.DataFrame [] {
141
170
  const descriptionsArrayCol = DG.Column.fromStrings('description', descriptionsArray);
142
171
  const sequenceCol = DG.Column.fromStrings('sequence', sequencesArray);
143
172
  sequenceCol.semType = 'Macromolecule';
173
+
174
+ const stats: SeqColStats = WebLogo.getStats(sequenceCol, 5, WebLogo.splitterAsFasta);
175
+ const seqType = stats.sameLength ? 'SEQ.MSA' : 'SEQ';
176
+ const alphabetCandidates: [string, Set<string>][] = [
177
+ ['NT', new Set(Object.keys(Nucleotides.Names))],
178
+ ['PT', new Set(Object.keys(Aminoacids.Names))],
179
+ ];
180
+ // Calculate likelihoods for alphabet_candidates
181
+ const alphabetCandidatesSim: number[] = alphabetCandidates.map(
182
+ (c) => WebLogo.getAlphabetSimilarity(stats.freq, c[1]));
183
+ const maxCos = Math.max(...alphabetCandidatesSim);
184
+ const alphabet = maxCos > 0.65 ? alphabetCandidates[alphabetCandidatesSim.indexOf(maxCos)][0] : 'UN';
185
+ sequenceCol.semType = mmSemType;
186
+ const units: string = `fasta:${seqType}:${alphabet}`;
187
+ sequenceCol.setTag(DG.TAGS.UNITS, units);
188
+
144
189
  return [DG.DataFrame.fromColumns([
145
190
  descriptionsArrayCol,
146
191
  sequenceCol,
@@ -153,4 +198,4 @@ export function importFasta(fileContent: string): DG.DataFrame [] {
153
198
  //input: column col {semType: Macromolecule}
154
199
  export function convertPanel(col: DG.Column): void {
155
200
  convert(col);
156
- }
201
+ }
@@ -0,0 +1,49 @@
1
+ import {after, before, category, expect, expectFloat, test} from '@datagrok-libraries/utils/src/test';
2
+ import * as DG from 'datagrok-api/dg';
3
+ import {createTableView, readDataframe} from './utils';
4
+ import {_package} from '../package-test';
5
+ import {getEmbeddingColsNames, sequenceSpace} from '../utils/sequence-space';
6
+ import {drawTooltip, sequenceGetSimilarities} from '../utils/sequence-activity-cliffs';
7
+ import {getActivityCliffs} from '@datagrok-libraries/ml/src/viewers/activity-cliffs';
8
+
9
+
10
+ category('activityCliffs', async () => {
11
+ let actCliffsTableView: DG.TableView;
12
+ let actCliffsDf: DG.DataFrame;
13
+
14
+ before(async () => {
15
+ actCliffsTableView = await createTableView('sample_MSA.csv');
16
+ actCliffsDf = await readDataframe('sample_MSA.csv');
17
+ });
18
+
19
+ test('activityCliffsOpen', async () => {
20
+ const axesNames = getEmbeddingColsNames(actCliffsDf);
21
+ const units = actCliffsDf.col('MSA')!.tags[DG.TAGS.UNITS];
22
+ const options = {
23
+ 'SPE': {cycles: 2000, lambda: 1.0, dlambda: 0.0005},
24
+ };
25
+ const scatterPlot = await getActivityCliffs(
26
+ actCliffsDf,
27
+ actCliffsDf.col('MSA')!,
28
+ axesNames,
29
+ actCliffsDf.col('Activity')!,
30
+ 50,
31
+ 'Levenshtein',
32
+ 't-SNE',
33
+ DG.SEMTYPE.MACROMOLECULE,
34
+ units,
35
+ sequenceSpace,
36
+ sequenceGetSimilarities,
37
+ drawTooltip);
38
+
39
+ expect(scatterPlot != null, true);
40
+
41
+ const cliffsLink = (Array.from(scatterPlot.root.children) as Element[])
42
+ .filter((it) => it.className === 'ui-btn ui-btn-ok');
43
+ expect((cliffsLink[0] as HTMLElement).innerText, '101 cliffs');
44
+ });
45
+
46
+ after(async () => {
47
+ actCliffsTableView.close();
48
+ });
49
+ });
@@ -7,6 +7,8 @@ import * as DG from 'datagrok-api/dg';
7
7
  import {mmSemType} from '../const';
8
8
  import {importFasta} from '../package';
9
9
 
10
+ type DfReaderFunc = () => Promise<DG.DataFrame>;
11
+
10
12
  category('detectors', () => {
11
13
  const csvDf1: string = `col1
12
14
  1
@@ -87,36 +89,120 @@ YNR-WYV-KHP
87
89
  MWRSWY-CKHP
88
90
  `;
89
91
 
90
- test('testDetectorsNegative1', async () => { await _testDetectorsNegative(csvDf1); });
91
- test('testDetectorsNegative2', async () => { await _testDetectorsNegative(csvDf2); });
92
- test('testDetectorsNegative3', async () => { await _testDetectorsNegative(csvDf3); });
93
- test('testDetectorsNegativeSmiles', async () => { await _testDetectorsNegative(csvDfSmiles); });
94
-
95
- test('testDetectorsN1', async () => { await _testDetectorsN1(csvDfN1); });
96
- test('testDetectorsAA1', async () => { await _testDetectorsAA1(csvDfAA1); });
97
- test('testDetectorsMsaN1', async () => { await _testDetectorsMsaN1(csvDfMsaN1); });
98
- test('testDetectorsMsaAA1', async () => { await _testDetectorsMsaAA1(csvDfMsaAA1); });
99
-
100
- test('testDetectorsSepNt', async () => { await _testDetectorsSepNt(csvDfSepNt, '*'); });
101
- test('testDetectorsSepPt', async () => { await _testDetectorsSepPt(csvDfSepPt, '-'); });
102
- test('testDetectorsSepUn1', async () => { await _testDetectorsSepUn(csvDfSepUn1, '-'); });
103
- test('testDetectorsSepUn2', async () => { await _testDetectorsSepUn(csvDfSepUn2, '/'); });
92
+ const enum Samples {
93
+ peptidesComplex = 'PeptidesComplex',
94
+ fastaCsv = 'FastaCsv',
95
+ msaComplex = 'MsaComplex',
96
+ }
97
+
98
+ const samples: { [key: string]: string } = {
99
+ 'PeptidesComplex': 'System:AppData/Bio/samples/peptides_complex_aligned.csv',
100
+ 'FastaCsv': 'System:AppData/Bio/samples/sample_FASTA.csv',
101
+ 'MsaComplex': 'System:AppData/Bio/samples/sample_MSA.csv',
102
+ };
103
+
104
+ const _samplesDfs: { [key: string]: Promise<DG.DataFrame> } = {};
105
+ const readSamplesCsv: (key: string) => DfReaderFunc = (key: string) => {
106
+ return async () => {
107
+ if (!(key in _samplesDfs)) {
108
+ _samplesDfs[key] = (async (): Promise<DG.DataFrame> => {
109
+ const csv: string = await grok.dapi.files.readAsText(samples[key]);
110
+ const df: DG.DataFrame = DG.DataFrame.fromCsv(csv);
111
+ await grok.data.detectSemanticTypes(df);
112
+ return df;
113
+ })();
114
+ }
115
+ return _samplesDfs[key];
116
+ };
117
+ };
118
+
119
+ const _csvDfs: { [key: string]: Promise<DG.DataFrame> } = {};
120
+ const readCsv: (key: string, csv: string) => DfReaderFunc = (key: string, csv: string) => {
121
+ return async () => {
122
+ if (!(key in _csvDfs)) {
123
+ _csvDfs[key] = (async (): Promise<DG.DataFrame> => {
124
+ const df: DG.DataFrame = DG.DataFrame.fromCsv(csv);
125
+ await grok.data.detectSemanticTypes(df);
126
+ return df;
127
+ })();
128
+ }
129
+ return _csvDfs[key];
130
+ };
131
+ };
132
+
133
+ test('Negative1', async () => { await _testNeg(readCsv('csvDf1', csvDf1), 'col1'); });
134
+ test('Negative2', async () => { await _testNeg(readCsv('csvDf2', csvDf2), 'col1'); });
135
+ test('Negative3', async () => { await _testNeg(readCsv('csvDf3', csvDf3), 'col1'); });
136
+ test('NegativeSmiles', async () => { await _testNeg(readCsv('csvDfSmiles', csvDfSmiles), 'col1'); });
137
+
138
+ test('N1', async () => { await _testN1(csvDfN1); });
139
+ test('AA1', async () => { await _testAA1(csvDfAA1); });
140
+ test('MsaN1', async () => { await _testMsaN1(csvDfMsaN1); });
141
+ test('MsaAA1', async () => { await _testMsaAA1(csvDfMsaAA1); });
142
+
143
+ test('SepNt', async () => { await _testSepNt(csvDfSepNt, '*'); });
144
+ test('SepPt', async () => { await _testSepPt(csvDfSepPt, '-'); });
145
+ test('SepUn1', async () => { await _testSepUn(csvDfSepUn1, '-'); });
146
+ test('SepUn2', async () => { await _testSepUn(csvDfSepUn2, '/'); });
147
+
148
+ test('SepMsaN1', async () => { await _testSepMsaN1(csvDfSepMsaN1); });
149
+
150
+ test('SamplesFastaCsvPt', async () => {
151
+ await _testSamplesFastaCsvPt();
152
+ });
153
+ test('SamplesFastaCsvNegativeEntry', async () => {
154
+ await _testNeg(readSamplesCsv(Samples.fastaCsv), 'Entry');
155
+ });
156
+ test('SamplesFastaCsvNegativeLength', async () => {
157
+ await _testNeg(readSamplesCsv(Samples.fastaCsv), 'Length');
158
+ });
159
+ test('SamplesFastaCsvNegativeUniProtKB', async () => {
160
+ await _testNeg(readSamplesCsv(Samples.fastaCsv), 'UniProtKB');
161
+ });
162
+
163
+ test('SamplesFastaFastaPt', async () => { await _testSamplesFastaFastaPt(); });
164
+
165
+ // System:AppData/Bio/samples/peptides_complex_align.csv contains monomers with spaces
166
+ // test('SamplesPeptidesComplexUn', async () => {
167
+ // await _testSamplesPeptidesComplexUn();
168
+ // });
169
+
170
+ test('samplesPeptidesComplexNegativeID', async () => {
171
+ await _testNeg(readSamplesCsv(Samples.peptidesComplex), 'ID');
172
+ });
173
+ test('SamplesPeptidesComplexNegativeMeasured', async () => {
174
+ await _testNeg(readSamplesCsv(Samples.peptidesComplex), 'Measured');
175
+ });
176
+ test('SamplesPeptidesComplexNegativeValue', async () => {
177
+ await _testNeg(readSamplesCsv(Samples.peptidesComplex), 'Value');
178
+ });
179
+
180
+ test('samplesMsaComplexUn', async () => {
181
+ await _testPos(readSamplesCsv(Samples.msaComplex), 'MSA', 'separator:SEQ.MSA:UN', '/');
182
+ });
183
+ test('samplesMsaComplexNegativeActivity', async () => {
184
+ await _testNeg(readSamplesCsv(Samples.msaComplex), 'Activity');
185
+ });
186
+ });
104
187
 
105
- test('testDetectorsSepMsaN1', async () => { await _testDetectorsSepMsaN1(csvDfSepMsaN1); });
188
+ export async function _testNeg(readDf: DfReaderFunc, colName: string) {
189
+ const df: DG.DataFrame = await readDf();
106
190
 
107
- test('testDetectorsSamplesFastaCsvPt', async () => { await _testDetectorsSamplesFastaCsvPt(); });
108
- test('testDetectorsSamplesFastaFastaPt', async () => { await _testDetectorsSamplesFastaFastaPt(); });
109
- });
191
+ const col: DG.Column = df.col(colName)!;
192
+ expect(col.semType === mmSemType, false);
193
+ }
110
194
 
111
- export async function _testDetectorsNegative(csvDf: string) {
112
- const df: DG.DataFrame = DG.DataFrame.fromCsv(csvDf);
113
- await grok.data.detectSemanticTypes(df);
195
+ export async function _testPos(readDf: DfReaderFunc, colName: string, units: string, separator: string) {
196
+ const df: DG.DataFrame = await readDf();
114
197
 
115
- const col1: DG.Column = df.col('col1')!;
116
- expect(col1.semType == mmSemType, false);
198
+ const col: DG.Column = df.col(colName)!;
199
+ expect(col.semType === mmSemType, true);
200
+ expect(col.getTag(DG.TAGS.UNITS), units);
201
+ if (separator)
202
+ expect(col.getTag('separator'), separator);
117
203
  }
118
204
 
119
- export async function _testDetectorsN1(csvDfN1: string) {
205
+ export async function _testN1(csvDfN1: string) {
120
206
  const dfN1: DG.DataFrame = DG.DataFrame.fromCsv(csvDfN1);
121
207
  await grok.data.detectSemanticTypes(dfN1);
122
208
 
@@ -125,7 +211,7 @@ export async function _testDetectorsN1(csvDfN1: string) {
125
211
  expect(col.getTag(DG.TAGS.UNITS), 'fasta:SEQ:NT');
126
212
  }
127
213
 
128
- export async function _testDetectorsAA1(csvDfAA1: string) {
214
+ export async function _testAA1(csvDfAA1: string) {
129
215
  const dfAA1: DG.DataFrame = DG.DataFrame.fromCsv(csvDfAA1);
130
216
  await grok.data.detectSemanticTypes(dfAA1);
131
217
 
@@ -134,7 +220,7 @@ export async function _testDetectorsAA1(csvDfAA1: string) {
134
220
  expect(col.getTag(DG.TAGS.UNITS), 'fasta:SEQ:PT');
135
221
  }
136
222
 
137
- export async function _testDetectorsMsaN1(csvDfMsaN1: string) {
223
+ export async function _testMsaN1(csvDfMsaN1: string) {
138
224
  const dfMsaN1: DG.DataFrame = DG.DataFrame.fromCsv(csvDfMsaN1);
139
225
  await grok.data.detectSemanticTypes(dfMsaN1);
140
226
 
@@ -143,7 +229,7 @@ export async function _testDetectorsMsaN1(csvDfMsaN1: string) {
143
229
  expect(col.getTag(DG.TAGS.UNITS), 'fasta:SEQ.MSA:NT');
144
230
  }
145
231
 
146
- export async function _testDetectorsMsaAA1(csvDfMsaAA1: string) {
232
+ export async function _testMsaAA1(csvDfMsaAA1: string) {
147
233
  const dfMsaAA1: DG.DataFrame = DG.DataFrame.fromCsv(csvDfMsaAA1);
148
234
  await grok.data.detectSemanticTypes(dfMsaAA1);
149
235
 
@@ -152,7 +238,7 @@ export async function _testDetectorsMsaAA1(csvDfMsaAA1: string) {
152
238
  expect(col.getTag(DG.TAGS.UNITS), 'fasta:SEQ.MSA:PT');
153
239
  }
154
240
 
155
- export async function _testDetectorsSepNt(csv: string, separator: string) {
241
+ export async function _testSepNt(csv: string, separator: string) {
156
242
  const df: DG.DataFrame = DG.DataFrame.fromCsv(csv);
157
243
  await grok.data.detectSemanticTypes(df);
158
244
 
@@ -162,7 +248,7 @@ export async function _testDetectorsSepNt(csv: string, separator: string) {
162
248
  expect(col.getTag('separator'), separator);
163
249
  }
164
250
 
165
- export async function _testDetectorsSepPt(csv: string, separator: string) {
251
+ export async function _testSepPt(csv: string, separator: string) {
166
252
  const df: DG.DataFrame = DG.DataFrame.fromCsv(csv);
167
253
  await grok.data.detectSemanticTypes(df);
168
254
 
@@ -172,7 +258,7 @@ export async function _testDetectorsSepPt(csv: string, separator: string) {
172
258
  expect(col.getTag('separator'), separator);
173
259
  }
174
260
 
175
- export async function _testDetectorsSepUn(csv: string, separator: string) {
261
+ export async function _testSepUn(csv: string, separator: string) {
176
262
  const df: DG.DataFrame = DG.DataFrame.fromCsv(csv);
177
263
  await grok.data.detectSemanticTypes(df);
178
264
 
@@ -182,7 +268,7 @@ export async function _testDetectorsSepUn(csv: string, separator: string) {
182
268
  expect(col.getTag('separator'), separator);
183
269
  }
184
270
 
185
- export async function _testDetectorsSepMsaN1(csvDfSepMsaN1: string) {
271
+ export async function _testSepMsaN1(csvDfSepMsaN1: string) {
186
272
  const dfSepMsaN1: DG.DataFrame = DG.DataFrame.fromCsv(csvDfSepMsaN1);
187
273
  await grok.data.detectSemanticTypes(dfSepMsaN1);
188
274
 
@@ -191,7 +277,7 @@ export async function _testDetectorsSepMsaN1(csvDfSepMsaN1: string) {
191
277
  expect(col.getTag(DG.TAGS.UNITS), 'separator:SEQ.MSA:NT');
192
278
  }
193
279
 
194
- export async function _testDetectorsSamplesFastaCsvPt() {
280
+ export async function _testSamplesFastaCsvPt() {
195
281
  const csv: string = await grok.dapi.files.readAsText('System:AppData/Bio/samples/sample_FASTA.csv');
196
282
  const df: DG.DataFrame = DG.DataFrame.fromCsv(csv);
197
283
  await grok.data.detectSemanticTypes(df);
@@ -202,7 +288,7 @@ export async function _testDetectorsSamplesFastaCsvPt() {
202
288
  expect(col.getTag('separator'), null);
203
289
  }
204
290
 
205
- export async function _testDetectorsSamplesFastaFastaPt() {
291
+ export async function _testSamplesFastaFastaPt() {
206
292
  const fasta: string = await grok.dapi.files.readAsText('System:AppData/Bio/samples/sample_FASTA.fasta');
207
293
  const df: DG.DataFrame = importFasta(fasta)[0];
208
294
 
@@ -210,4 +296,16 @@ export async function _testDetectorsSamplesFastaFastaPt() {
210
296
  expect(col.semType, mmSemType);
211
297
  expect(col.getTag(DG.TAGS.UNITS), 'fasta:SEQ:PT');
212
298
  expect(col.getTag('separator'), null);
213
- }
299
+ }
300
+
301
+ export async function _testSamplesPeptidesComplexUn() {
302
+ const csv: string = await grok.dapi.files.readAsText('System:AppData/Bio/samples/peptides_complex_aligned.csv');
303
+ const df: DG.DataFrame = DG.DataFrame.fromCsv(csv);
304
+ await grok.data.detectSemanticTypes(df);
305
+
306
+ const col: DG.Column = df.col('AlignedSequence')!;
307
+ expect(col.semType, mmSemType);
308
+ expect(col.getTag(DG.TAGS.UNITS), 'separator:SEQ.MSA:UN');
309
+ expect(col.getTag('separator'), '-');
310
+ }
311
+
@@ -1,24 +1,26 @@
1
1
  import {before, category, test, expect} from '@datagrok-libraries/utils/src/test';
2
- import * as DG from "datagrok-api/dg";
3
- import { sequenceSpace } from '../utils/sequence-space';
4
- import { readDataframe } from './utils';
2
+ import * as DG from 'datagrok-api/dg';
3
+ import {sequenceSpace} from '../utils/sequence-space';
4
+ import {readDataframe} from './utils';
5
5
  //import * as grok from 'datagrok-api/grok';
6
6
 
7
7
  category('sequenceSpace', async () => {
8
+ let testFastaDf: DG.DataFrame;
8
9
 
9
- let testFastaDf: DG.DataFrame;
10
-
11
- before(async () => {
12
- //@ts-ignore
13
- testFastaDf = await readDataframe('sample_FASTA.csv');
14
- });
15
-
16
-
17
- test('sequenceSpaceOpens', async () => {
18
- //@ts-ignore
19
- const res = await sequenceSpace(testFastaDf.col('Sequence')!, 't-SNE', 'Levenshtein', ['Embed_X', 'Embed_Y']);
20
- expect(res.coordinates != undefined, true);
21
- expect(res.distance != undefined, true);
22
- });
23
-
24
- });
10
+ before(async () => {
11
+ testFastaDf = await readDataframe('sample_FASTA.csv');
12
+ });
13
+
14
+
15
+ test('sequenceSpaceOpens', async () => {
16
+ const sequenceSpaceParams = {
17
+ seqCol: testFastaDf.col('Sequence')!,
18
+ methodName: 't-SNE',
19
+ similarityMetric: 'Levenshtein',
20
+ embedAxesNames: ['Embed_X', 'Embed_Y']
21
+ };
22
+ const res = await sequenceSpace(sequenceSpaceParams);
23
+ expect(res.coordinates != undefined, true);
24
+ expect(res.distance != undefined, true);
25
+ });
26
+ });
@@ -1,8 +1,8 @@
1
1
  import * as DG from 'datagrok-api/dg';
2
- import * as grok from "datagrok-api/grok";
2
+ import * as grok from 'datagrok-api/grok';
3
3
  import {expect} from '@datagrok-libraries/utils/src/test';
4
4
  import {runKalign} from '../utils/multiple-sequence-alignment';
5
- import { _package} from '../package-test';
5
+ import {_package} from '../package-test';
6
6
 
7
7
  export async function loadFileAsText(name: string): Promise<string> {
8
8
  return await _package.files.readAsText(name);
@@ -15,6 +15,13 @@ export async function readDataframe(tableName: string): Promise<DG.DataFrame> {
15
15
  return df;
16
16
  }
17
17
 
18
+ export async function createTableView(tableName: string): Promise<DG.TableView> {
19
+ const df = await readDataframe(tableName);
20
+ df.name = tableName.replace('.csv', '');
21
+ const view = grok.shell.addTableView(df);
22
+ return view;
23
+ }
24
+
18
25
 
19
26
  /**
20
27
  * Tests if a table has non zero rows and columns.
@@ -35,5 +42,4 @@ export function _testTableIsNotEmpty(table: DG.DataFrame): void {
35
42
  export async function _testMSAIsCorrect(col: DG.Column): Promise<void> {
36
43
  const msaCol = await runKalign(col, true);
37
44
  expect(msaCol.toList().every((v, i) => (v == col.get(i) || v == null)), true);
38
-
39
45
  }
@@ -2,23 +2,22 @@ import * as DG from 'datagrok-api/dg';
2
2
  import * as ui from 'datagrok-api/ui';
3
3
 
4
4
  export function convert(col: DG.Column): void {
5
-
6
5
  const current = col.tags[DG.TAGS.UNITS];
7
6
  //TODO: read all notations
8
7
  const notations = ['fasta:SEQ:NT', 'fasta:SEQ:PT', 'fasta:SEQ.MSA:NT', 'fasta:SEQ.MSA:PT', 'HELM'];
9
- const choices = ui.choiceInput("convert to", "", notations.filter(e => e !== current));
8
+ const choices = ui.choiceInput('convert to', '', notations.filter((e) => e !== current));
10
9
 
11
10
  ui.dialog('Convert sequence')
12
- .add(
11
+ .add(
13
12
  ui.div([
14
13
  ui.h1('current notation'),
15
14
  ui.div(current),
16
15
  choices.root
17
16
  ])
18
- )
19
- .onOK(() => {
20
- //TODO: create new converted column
21
- //col.dataFrame.columns.add();
22
- })
23
- .show();
17
+ )
18
+ .onOK(() => {
19
+ //TODO: create new converted column
20
+ //col.dataFrame.columns.add();
21
+ })
22
+ .show();
24
23
  }
@@ -56,7 +56,7 @@ export async function runKalign(col: DG.Column, isAligned = false) : Promise<DG.
56
56
 
57
57
  const aligned = _fastaToStrings(buf).slice(0, sequences.length);
58
58
  const alignedCol = DG.Column.fromStrings(`msa(${col.name})`, aligned);
59
- alignedCol.setTag(DG.TAGS.UNITS, '');
59
+ alignedCol.setTag(DG.TAGS.UNITS, '');
60
60
  alignedCol.semType = C.SEM_TYPES.Macro_Molecule;
61
61
  return alignedCol;
62
62
  }
@@ -0,0 +1,30 @@
1
+ import {IDrawTooltipParams} from '@datagrok-libraries/ml/src/viewers/activity-cliffs';
2
+ import * as DG from 'datagrok-api/dg';
3
+ import * as ui from 'datagrok-api/ui';
4
+
5
+ export async function sequenceGetSimilarities(col: DG.Column, seq: string): Promise<DG.Column | null> {
6
+ return null;
7
+ }
8
+
9
+ export function drawTooltip(params: IDrawTooltipParams) {
10
+ params.tooltips[params.line.id] = ui.divH([]);
11
+ const columnNames = ui.divV([
12
+ ui.divText('sequense'),
13
+ ui.divText(params.activity.name),
14
+ ]);
15
+ columnNames.style.fontWeight = 'bold';
16
+ columnNames.style.display = 'flex';
17
+ columnNames.style.justifyContent = 'space-between';
18
+ params.tooltips[params.line.id].append(columnNames);
19
+ params.line.mols.forEach((mol: number) => {
20
+ const seq = ui.divText(params.df.get(params.seqCol.name, mol));
21
+ const activity = ui.divText(params.df.get(params.activity.name, mol).toFixed(2));
22
+ activity.style.display = 'flex';
23
+ activity.style.justifyContent = 'left';
24
+ activity.style.paddingLeft = '30px';
25
+ params.tooltips[params.line.id].append(ui.divV([
26
+ seq,
27
+ activity,
28
+ ], {style: {paddingLeft: '5px'}}));
29
+ });
30
+ }