@datagrok/bio 1.4.1 → 1.5.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/detectors.js +17 -4
- package/dist/package-test.js +859 -636
- package/dist/package.js +664 -584
- package/dist/vendors-node_modules_datagrok-libraries_ml_src_workers_dimensionality-reducer_js.js +1665 -1651
- package/files/sample_MSA.csv +541 -0
- package/package.json +7 -6
- package/setup.cmd +10 -1
- package/src/package-test.ts +1 -0
- package/src/package.ts +70 -25
- package/src/tests/activity-cliffs-tests.ts +49 -0
- package/src/tests/detectors-test.ts +132 -34
- package/src/tests/sequence-space-test.ts +21 -19
- package/src/tests/utils.ts +9 -3
- package/src/utils/convert.ts +8 -9
- package/src/utils/multiple-sequence-alignment.ts +1 -1
- package/src/utils/sequence-activity-cliffs.ts +36 -0
- package/src/utils/sequence-space.ts +30 -30
package/package.json
CHANGED
|
@@ -2,7 +2,7 @@
|
|
|
2
2
|
"name": "@datagrok/bio",
|
|
3
3
|
"beta": false,
|
|
4
4
|
"friendlyName": "Bio",
|
|
5
|
-
"version": "1.
|
|
5
|
+
"version": "1.5.2",
|
|
6
6
|
"description": "Bio is a [package](https://datagrok.ai/help/develop/develop#packages) for the [Datagrok](https://datagrok.ai) platform",
|
|
7
7
|
"repository": {
|
|
8
8
|
"type": "git",
|
|
@@ -11,11 +11,11 @@
|
|
|
11
11
|
},
|
|
12
12
|
"dependencies": {
|
|
13
13
|
"@biowasm/aioli": ">=2.4.0",
|
|
14
|
-
"@datagrok-libraries/bio": "^2.
|
|
15
|
-
"@datagrok-libraries/utils": "^0.
|
|
16
|
-
"@datagrok-libraries/ml": "^2.0.
|
|
14
|
+
"@datagrok-libraries/bio": "^2.3.1",
|
|
15
|
+
"@datagrok-libraries/utils": "^1.0.0",
|
|
16
|
+
"@datagrok-libraries/ml": "^2.0.8",
|
|
17
17
|
"cash-dom": "latest",
|
|
18
|
-
"datagrok-api": "^1.4.
|
|
18
|
+
"datagrok-api": "^1.4.12",
|
|
19
19
|
"dayjs": "latest",
|
|
20
20
|
"ts-loader": "^9.2.5",
|
|
21
21
|
"typescript": "^4.4.2"
|
|
@@ -41,7 +41,8 @@
|
|
|
41
41
|
"debug-sequences1": "grok publish --rebuild",
|
|
42
42
|
"release-sequences1": "grok publish --rebuild --release",
|
|
43
43
|
"build-sequences1": "webpack",
|
|
44
|
-
"local
|
|
44
|
+
"debug-local": "grok publish local",
|
|
45
|
+
"release-local": "grok publish local --release",
|
|
45
46
|
"build": "webpack",
|
|
46
47
|
"debug-sequences1-public": "grok publish public --rebuild",
|
|
47
48
|
"release-sequences1-public": "grok publish public --rebuild --release",
|
package/setup.cmd
CHANGED
|
@@ -1,10 +1,19 @@
|
|
|
1
1
|
cd ../../js-api
|
|
2
2
|
call npm install
|
|
3
3
|
call npm link
|
|
4
|
+
cd ../libraries/utils
|
|
5
|
+
call npm install
|
|
6
|
+
call npm link
|
|
7
|
+
call npm link datagrok-api
|
|
8
|
+
cd ../libraries/ml
|
|
9
|
+
call npm install
|
|
10
|
+
call npm link
|
|
11
|
+
call npm link @datagrok-libraries/utils
|
|
4
12
|
cd ../libraries/bio
|
|
5
13
|
call npm install
|
|
6
14
|
call npm link
|
|
15
|
+
call npm link @datagrok-libraries/utils
|
|
7
16
|
cd ../../packages/Bio
|
|
8
17
|
call npm install
|
|
9
|
-
call npm link datagrok-api @datagrok-libraries/bio
|
|
18
|
+
call npm link datagrok-api @datagrok-libraries/bio @datagrok-libraries/utils @datagrok-libraries/ml
|
|
10
19
|
webpack
|
package/src/package-test.ts
CHANGED
package/src/package.ts
CHANGED
|
@@ -2,17 +2,21 @@
|
|
|
2
2
|
import * as grok from 'datagrok-api/grok';
|
|
3
3
|
import * as ui from 'datagrok-api/ui';
|
|
4
4
|
import * as DG from 'datagrok-api/dg';
|
|
5
|
-
import {SequenceAlignment, Aligned} from './seq_align';
|
|
6
5
|
|
|
7
6
|
export const _package = new DG.Package();
|
|
8
7
|
|
|
9
|
-
import {
|
|
8
|
+
import {mmSemType} from './const';
|
|
9
|
+
import {WebLogo, SeqColStats} from '@datagrok-libraries/bio/src/viewers/web-logo';
|
|
10
10
|
import {VdRegionsViewer} from './viewers/vd-regions-viewer';
|
|
11
11
|
import {runKalign, testMSAEnoughMemory} from './utils/multiple-sequence-alignment';
|
|
12
|
+
import {SequenceAlignment, Aligned} from './seq_align';
|
|
13
|
+
import {Nucleotides} from '@datagrok-libraries/bio/src/nucleotides';
|
|
14
|
+
import {Aminoacids} from '@datagrok-libraries/bio/src/aminoacids';
|
|
12
15
|
import {convert} from './utils/convert';
|
|
13
|
-
import {
|
|
14
|
-
import {
|
|
15
|
-
import {
|
|
16
|
+
import {getEmbeddingColsNames, sequenceSpace} from './utils/sequence-space';
|
|
17
|
+
import {AvailableMetrics} from '@datagrok-libraries/ml/src/typed-metrics';
|
|
18
|
+
import {getActivityCliffs} from '@datagrok-libraries/ml/src/viewers/activity-cliffs';
|
|
19
|
+
import {sequenceGetSimilarities, drawTooltip} from './utils/sequence-activity-cliffs';
|
|
16
20
|
|
|
17
21
|
//name: sequenceAlignment
|
|
18
22
|
//input: string alignType {choices: ['Local alignment', 'Global alignment']}
|
|
@@ -44,16 +48,35 @@ export function vdRegionViewer() {
|
|
|
44
48
|
return new VdRegionsViewer();
|
|
45
49
|
}
|
|
46
50
|
|
|
47
|
-
//top-menu: Bio | Activity Cliffs...
|
|
48
|
-
//name: Activity Cliffs
|
|
51
|
+
//top-menu: Bio | Sequence Activity Cliffs...
|
|
52
|
+
//name: Sequence Activity Cliffs
|
|
49
53
|
//description: detect activity cliffs
|
|
50
54
|
//input: dataframe df [Input data table]
|
|
51
|
-
//input: column
|
|
55
|
+
//input: column sequence {semType: Macromolecule}
|
|
52
56
|
//input: column activities
|
|
53
57
|
//input: double similarity = 80 [Similarity cutoff]
|
|
54
58
|
//input: string methodName { choices:["UMAP", "t-SNE", "SPE"] }
|
|
55
|
-
export async function activityCliffs(df: DG.DataFrame,
|
|
59
|
+
export async function activityCliffs(df: DG.DataFrame, sequence: DG.Column, activities: DG.Column,
|
|
56
60
|
similarity: number, methodName: string): Promise<void> {
|
|
61
|
+
const axesNames = getEmbeddingColsNames(df);
|
|
62
|
+
const options = {
|
|
63
|
+
'SPE': {cycles: 2000, lambda: 1.0, dlambda: 0.0005},
|
|
64
|
+
};
|
|
65
|
+
const units = sequence!.tags[DG.TAGS.UNITS];
|
|
66
|
+
await getActivityCliffs(
|
|
67
|
+
df,
|
|
68
|
+
sequence,
|
|
69
|
+
axesNames,
|
|
70
|
+
activities,
|
|
71
|
+
similarity,
|
|
72
|
+
'Levenshtein',
|
|
73
|
+
methodName,
|
|
74
|
+
DG.SEMTYPE.MACROMOLECULE,
|
|
75
|
+
units,
|
|
76
|
+
sequenceSpace,
|
|
77
|
+
sequenceGetSimilarities,
|
|
78
|
+
drawTooltip,
|
|
79
|
+
(options as any)[methodName]);
|
|
57
80
|
}
|
|
58
81
|
|
|
59
82
|
//top-menu: Bio | Sequence Space...
|
|
@@ -64,18 +87,24 @@ export async function activityCliffs(df: DG.DataFrame, smiles: DG.Column, activi
|
|
|
64
87
|
//input: string similarityMetric { choices:["Levenshtein", "Tanimoto"] }
|
|
65
88
|
//input: bool plotEmbeddings = true
|
|
66
89
|
export async function sequenceSpaceTopMenu(table: DG.DataFrame, macroMolecule: DG.Column, methodName: string,
|
|
67
|
-
similarityMetric: string = 'Levenshtein', plotEmbeddings: boolean)
|
|
68
|
-
|
|
69
|
-
|
|
70
|
-
|
|
71
|
-
|
|
72
|
-
|
|
73
|
-
|
|
74
|
-
|
|
75
|
-
|
|
76
|
-
|
|
77
|
-
|
|
78
|
-
|
|
90
|
+
similarityMetric: string = 'Levenshtein', plotEmbeddings: boolean): Promise<void> {
|
|
91
|
+
const embedColsNames = getEmbeddingColsNames(table);
|
|
92
|
+
const chemSpaceParams = {
|
|
93
|
+
seqCol: macroMolecule,
|
|
94
|
+
methodName: methodName,
|
|
95
|
+
similarityMetric: similarityMetric,
|
|
96
|
+
embedAxesNames: embedColsNames
|
|
97
|
+
};
|
|
98
|
+
const sequenceSpaceRes = await sequenceSpace(chemSpaceParams);
|
|
99
|
+
const embeddings = sequenceSpaceRes.coordinates;
|
|
100
|
+
for (const col of embeddings)
|
|
101
|
+
table.columns.add(col);
|
|
102
|
+
if (plotEmbeddings) {
|
|
103
|
+
for (const v of grok.shell.views) {
|
|
104
|
+
if (v.name === table.name)
|
|
105
|
+
(v as DG.TableView).scatterPlot({x: embedColsNames[0], y: embedColsNames[1]});
|
|
106
|
+
}
|
|
107
|
+
}
|
|
79
108
|
};
|
|
80
109
|
|
|
81
110
|
//top-menu: Bio | MSA...
|
|
@@ -100,7 +129,7 @@ export async function compositionAnalysis(): Promise<void> {
|
|
|
100
129
|
const wl = await col.dataFrame.plot.fromType('WebLogo', {});
|
|
101
130
|
|
|
102
131
|
for (const v of grok.shell.views) {
|
|
103
|
-
if (v instanceof TableView && (v as DG.TableView).dataFrame.name === col.dataFrame.name) {
|
|
132
|
+
if (v instanceof DG.TableView && (v as DG.TableView).dataFrame.name === col.dataFrame.name) {
|
|
104
133
|
(v as DG.TableView).dockManager.dock(wl.root, 'down');
|
|
105
134
|
break;
|
|
106
135
|
}
|
|
@@ -122,10 +151,10 @@ function parseMacromolecule(
|
|
|
122
151
|
//description: Opens FASTA file
|
|
123
152
|
//tags: file-handler
|
|
124
153
|
//meta.ext: fasta, fna, ffn, faa, frn, fa
|
|
125
|
-
//input: string
|
|
154
|
+
//input: string fileContent
|
|
126
155
|
//output: list tables
|
|
127
156
|
export function importFasta(fileContent: string): DG.DataFrame [] {
|
|
128
|
-
const regex = /^>(.*)$/gm; // match
|
|
157
|
+
const regex = /^>(.*)$/gm; // match lines starting with >
|
|
129
158
|
const descriptionsArray = [];
|
|
130
159
|
const sequencesArray: string[] = [];
|
|
131
160
|
let startOfSequence = 0;
|
|
@@ -141,6 +170,22 @@ export function importFasta(fileContent: string): DG.DataFrame [] {
|
|
|
141
170
|
const descriptionsArrayCol = DG.Column.fromStrings('description', descriptionsArray);
|
|
142
171
|
const sequenceCol = DG.Column.fromStrings('sequence', sequencesArray);
|
|
143
172
|
sequenceCol.semType = 'Macromolecule';
|
|
173
|
+
|
|
174
|
+
const stats: SeqColStats = WebLogo.getStats(sequenceCol, 5, WebLogo.splitterAsFasta);
|
|
175
|
+
const seqType = stats.sameLength ? 'SEQ.MSA' : 'SEQ';
|
|
176
|
+
const alphabetCandidates: [string, Set<string>][] = [
|
|
177
|
+
['NT', new Set(Object.keys(Nucleotides.Names))],
|
|
178
|
+
['PT', new Set(Object.keys(Aminoacids.Names))],
|
|
179
|
+
];
|
|
180
|
+
// Calculate likelihoods for alphabet_candidates
|
|
181
|
+
const alphabetCandidatesSim: number[] = alphabetCandidates.map(
|
|
182
|
+
(c) => WebLogo.getAlphabetSimilarity(stats.freq, c[1]));
|
|
183
|
+
const maxCos = Math.max(...alphabetCandidatesSim);
|
|
184
|
+
const alphabet = maxCos > 0.65 ? alphabetCandidates[alphabetCandidatesSim.indexOf(maxCos)][0] : 'UN';
|
|
185
|
+
sequenceCol.semType = mmSemType;
|
|
186
|
+
const units: string = `fasta:${seqType}:${alphabet}`;
|
|
187
|
+
sequenceCol.setTag(DG.TAGS.UNITS, units);
|
|
188
|
+
|
|
144
189
|
return [DG.DataFrame.fromColumns([
|
|
145
190
|
descriptionsArrayCol,
|
|
146
191
|
sequenceCol,
|
|
@@ -153,4 +198,4 @@ export function importFasta(fileContent: string): DG.DataFrame [] {
|
|
|
153
198
|
//input: column col {semType: Macromolecule}
|
|
154
199
|
export function convertPanel(col: DG.Column): void {
|
|
155
200
|
convert(col);
|
|
156
|
-
}
|
|
201
|
+
}
|
|
@@ -0,0 +1,49 @@
|
|
|
1
|
+
import {after, before, category, expect, expectFloat, test} from '@datagrok-libraries/utils/src/test';
|
|
2
|
+
import * as DG from 'datagrok-api/dg';
|
|
3
|
+
import {createTableView, readDataframe} from './utils';
|
|
4
|
+
import {_package} from '../package-test';
|
|
5
|
+
import {getEmbeddingColsNames, sequenceSpace} from '../utils/sequence-space';
|
|
6
|
+
import {drawTooltip, sequenceGetSimilarities} from '../utils/sequence-activity-cliffs';
|
|
7
|
+
import {getActivityCliffs} from '@datagrok-libraries/ml/src/viewers/activity-cliffs';
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
category('activityCliffs', async () => {
|
|
11
|
+
let actCliffsTableView: DG.TableView;
|
|
12
|
+
let actCliffsDf: DG.DataFrame;
|
|
13
|
+
|
|
14
|
+
before(async () => {
|
|
15
|
+
actCliffsTableView = await createTableView('sample_MSA.csv');
|
|
16
|
+
actCliffsDf = await readDataframe('sample_MSA.csv');
|
|
17
|
+
});
|
|
18
|
+
|
|
19
|
+
test('activityCliffsOpen', async () => {
|
|
20
|
+
const axesNames = getEmbeddingColsNames(actCliffsDf);
|
|
21
|
+
const units = actCliffsDf.col('MSA')!.tags[DG.TAGS.UNITS];
|
|
22
|
+
const options = {
|
|
23
|
+
'SPE': {cycles: 2000, lambda: 1.0, dlambda: 0.0005},
|
|
24
|
+
};
|
|
25
|
+
const scatterPlot = await getActivityCliffs(
|
|
26
|
+
actCliffsDf,
|
|
27
|
+
actCliffsDf.col('MSA')!,
|
|
28
|
+
axesNames,
|
|
29
|
+
actCliffsDf.col('Activity')!,
|
|
30
|
+
50,
|
|
31
|
+
'Levenshtein',
|
|
32
|
+
't-SNE',
|
|
33
|
+
DG.SEMTYPE.MACROMOLECULE,
|
|
34
|
+
units,
|
|
35
|
+
sequenceSpace,
|
|
36
|
+
sequenceGetSimilarities,
|
|
37
|
+
drawTooltip);
|
|
38
|
+
|
|
39
|
+
expect(scatterPlot != null, true);
|
|
40
|
+
|
|
41
|
+
const cliffsLink = (Array.from(scatterPlot.root.children) as Element[])
|
|
42
|
+
.filter((it) => it.className === 'ui-btn ui-btn-ok');
|
|
43
|
+
expect((cliffsLink[0] as HTMLElement).innerText, '101 cliffs');
|
|
44
|
+
});
|
|
45
|
+
|
|
46
|
+
after(async () => {
|
|
47
|
+
actCliffsTableView.close();
|
|
48
|
+
});
|
|
49
|
+
});
|
|
@@ -7,6 +7,8 @@ import * as DG from 'datagrok-api/dg';
|
|
|
7
7
|
import {mmSemType} from '../const';
|
|
8
8
|
import {importFasta} from '../package';
|
|
9
9
|
|
|
10
|
+
type DfReaderFunc = () => Promise<DG.DataFrame>;
|
|
11
|
+
|
|
10
12
|
category('detectors', () => {
|
|
11
13
|
const csvDf1: string = `col1
|
|
12
14
|
1
|
|
@@ -87,36 +89,120 @@ YNR-WYV-KHP
|
|
|
87
89
|
MWRSWY-CKHP
|
|
88
90
|
`;
|
|
89
91
|
|
|
90
|
-
|
|
91
|
-
|
|
92
|
-
|
|
93
|
-
|
|
94
|
-
|
|
95
|
-
|
|
96
|
-
|
|
97
|
-
|
|
98
|
-
|
|
99
|
-
|
|
100
|
-
|
|
101
|
-
|
|
102
|
-
|
|
103
|
-
|
|
92
|
+
const enum Samples {
|
|
93
|
+
peptidesComplex = 'PeptidesComplex',
|
|
94
|
+
fastaCsv = 'FastaCsv',
|
|
95
|
+
msaComplex = 'MsaComplex',
|
|
96
|
+
}
|
|
97
|
+
|
|
98
|
+
const samples: { [key: string]: string } = {
|
|
99
|
+
'PeptidesComplex': 'System:AppData/Bio/samples/peptides_complex_aligned.csv',
|
|
100
|
+
'FastaCsv': 'System:AppData/Bio/samples/sample_FASTA.csv',
|
|
101
|
+
'MsaComplex': 'System:AppData/Bio/samples/sample_MSA.csv',
|
|
102
|
+
};
|
|
103
|
+
|
|
104
|
+
const _samplesDfs: { [key: string]: Promise<DG.DataFrame> } = {};
|
|
105
|
+
const readSamplesCsv: (key: string) => DfReaderFunc = (key: string) => {
|
|
106
|
+
return async () => {
|
|
107
|
+
if (!(key in _samplesDfs)) {
|
|
108
|
+
_samplesDfs[key] = (async (): Promise<DG.DataFrame> => {
|
|
109
|
+
const csv: string = await grok.dapi.files.readAsText(samples[key]);
|
|
110
|
+
const df: DG.DataFrame = DG.DataFrame.fromCsv(csv);
|
|
111
|
+
await grok.data.detectSemanticTypes(df);
|
|
112
|
+
return df;
|
|
113
|
+
})();
|
|
114
|
+
}
|
|
115
|
+
return _samplesDfs[key];
|
|
116
|
+
};
|
|
117
|
+
};
|
|
118
|
+
|
|
119
|
+
const _csvDfs: { [key: string]: Promise<DG.DataFrame> } = {};
|
|
120
|
+
const readCsv: (key: string, csv: string) => DfReaderFunc = (key: string, csv: string) => {
|
|
121
|
+
return async () => {
|
|
122
|
+
if (!(key in _csvDfs)) {
|
|
123
|
+
_csvDfs[key] = (async (): Promise<DG.DataFrame> => {
|
|
124
|
+
const df: DG.DataFrame = DG.DataFrame.fromCsv(csv);
|
|
125
|
+
await grok.data.detectSemanticTypes(df);
|
|
126
|
+
return df;
|
|
127
|
+
})();
|
|
128
|
+
}
|
|
129
|
+
return _csvDfs[key];
|
|
130
|
+
};
|
|
131
|
+
};
|
|
132
|
+
|
|
133
|
+
test('Negative1', async () => { await _testNeg(readCsv('csvDf1', csvDf1), 'col1'); });
|
|
134
|
+
test('Negative2', async () => { await _testNeg(readCsv('csvDf2', csvDf2), 'col1'); });
|
|
135
|
+
test('Negative3', async () => { await _testNeg(readCsv('csvDf3', csvDf3), 'col1'); });
|
|
136
|
+
test('NegativeSmiles', async () => { await _testNeg(readCsv('csvDfSmiles', csvDfSmiles), 'col1'); });
|
|
137
|
+
|
|
138
|
+
test('N1', async () => { await _testN1(csvDfN1); });
|
|
139
|
+
test('AA1', async () => { await _testAA1(csvDfAA1); });
|
|
140
|
+
test('MsaN1', async () => { await _testMsaN1(csvDfMsaN1); });
|
|
141
|
+
test('MsaAA1', async () => { await _testMsaAA1(csvDfMsaAA1); });
|
|
142
|
+
|
|
143
|
+
test('SepNt', async () => { await _testSepNt(csvDfSepNt, '*'); });
|
|
144
|
+
test('SepPt', async () => { await _testSepPt(csvDfSepPt, '-'); });
|
|
145
|
+
test('SepUn1', async () => { await _testSepUn(csvDfSepUn1, '-'); });
|
|
146
|
+
test('SepUn2', async () => { await _testSepUn(csvDfSepUn2, '/'); });
|
|
147
|
+
|
|
148
|
+
test('SepMsaN1', async () => { await _testSepMsaN1(csvDfSepMsaN1); });
|
|
149
|
+
|
|
150
|
+
test('SamplesFastaCsvPt', async () => {
|
|
151
|
+
await _testSamplesFastaCsvPt();
|
|
152
|
+
});
|
|
153
|
+
test('SamplesFastaCsvNegativeEntry', async () => {
|
|
154
|
+
await _testNeg(readSamplesCsv(Samples.fastaCsv), 'Entry');
|
|
155
|
+
});
|
|
156
|
+
test('SamplesFastaCsvNegativeLength', async () => {
|
|
157
|
+
await _testNeg(readSamplesCsv(Samples.fastaCsv), 'Length');
|
|
158
|
+
});
|
|
159
|
+
test('SamplesFastaCsvNegativeUniProtKB', async () => {
|
|
160
|
+
await _testNeg(readSamplesCsv(Samples.fastaCsv), 'UniProtKB');
|
|
161
|
+
});
|
|
162
|
+
|
|
163
|
+
test('SamplesFastaFastaPt', async () => { await _testSamplesFastaFastaPt(); });
|
|
164
|
+
|
|
165
|
+
// System:AppData/Bio/samples/peptides_complex_align.csv contains monomers with spaces
|
|
166
|
+
// test('SamplesPeptidesComplexUn', async () => {
|
|
167
|
+
// await _testSamplesPeptidesComplexUn();
|
|
168
|
+
// });
|
|
169
|
+
|
|
170
|
+
test('samplesPeptidesComplexNegativeID', async () => {
|
|
171
|
+
await _testNeg(readSamplesCsv(Samples.peptidesComplex), 'ID');
|
|
172
|
+
});
|
|
173
|
+
test('SamplesPeptidesComplexNegativeMeasured', async () => {
|
|
174
|
+
await _testNeg(readSamplesCsv(Samples.peptidesComplex), 'Measured');
|
|
175
|
+
});
|
|
176
|
+
test('SamplesPeptidesComplexNegativeValue', async () => {
|
|
177
|
+
await _testNeg(readSamplesCsv(Samples.peptidesComplex), 'Value');
|
|
178
|
+
});
|
|
179
|
+
|
|
180
|
+
test('samplesMsaComplexUn', async () => {
|
|
181
|
+
await _testPos(readSamplesCsv(Samples.msaComplex), 'MSA', 'separator:SEQ.MSA:UN', '/');
|
|
182
|
+
});
|
|
183
|
+
test('samplesMsaComplexNegativeActivity', async () => {
|
|
184
|
+
await _testNeg(readSamplesCsv(Samples.msaComplex), 'Activity');
|
|
185
|
+
});
|
|
186
|
+
});
|
|
104
187
|
|
|
105
|
-
|
|
188
|
+
export async function _testNeg(readDf: DfReaderFunc, colName: string) {
|
|
189
|
+
const df: DG.DataFrame = await readDf();
|
|
106
190
|
|
|
107
|
-
|
|
108
|
-
|
|
109
|
-
}
|
|
191
|
+
const col: DG.Column = df.col(colName)!;
|
|
192
|
+
expect(col.semType === mmSemType, false);
|
|
193
|
+
}
|
|
110
194
|
|
|
111
|
-
export async function
|
|
112
|
-
const df: DG.DataFrame =
|
|
113
|
-
await grok.data.detectSemanticTypes(df);
|
|
195
|
+
export async function _testPos(readDf: DfReaderFunc, colName: string, units: string, separator: string) {
|
|
196
|
+
const df: DG.DataFrame = await readDf();
|
|
114
197
|
|
|
115
|
-
const
|
|
116
|
-
expect(
|
|
198
|
+
const col: DG.Column = df.col(colName)!;
|
|
199
|
+
expect(col.semType === mmSemType, true);
|
|
200
|
+
expect(col.getTag(DG.TAGS.UNITS), units);
|
|
201
|
+
if (separator)
|
|
202
|
+
expect(col.getTag('separator'), separator);
|
|
117
203
|
}
|
|
118
204
|
|
|
119
|
-
export async function
|
|
205
|
+
export async function _testN1(csvDfN1: string) {
|
|
120
206
|
const dfN1: DG.DataFrame = DG.DataFrame.fromCsv(csvDfN1);
|
|
121
207
|
await grok.data.detectSemanticTypes(dfN1);
|
|
122
208
|
|
|
@@ -125,7 +211,7 @@ export async function _testDetectorsN1(csvDfN1: string) {
|
|
|
125
211
|
expect(col.getTag(DG.TAGS.UNITS), 'fasta:SEQ:NT');
|
|
126
212
|
}
|
|
127
213
|
|
|
128
|
-
export async function
|
|
214
|
+
export async function _testAA1(csvDfAA1: string) {
|
|
129
215
|
const dfAA1: DG.DataFrame = DG.DataFrame.fromCsv(csvDfAA1);
|
|
130
216
|
await grok.data.detectSemanticTypes(dfAA1);
|
|
131
217
|
|
|
@@ -134,7 +220,7 @@ export async function _testDetectorsAA1(csvDfAA1: string) {
|
|
|
134
220
|
expect(col.getTag(DG.TAGS.UNITS), 'fasta:SEQ:PT');
|
|
135
221
|
}
|
|
136
222
|
|
|
137
|
-
export async function
|
|
223
|
+
export async function _testMsaN1(csvDfMsaN1: string) {
|
|
138
224
|
const dfMsaN1: DG.DataFrame = DG.DataFrame.fromCsv(csvDfMsaN1);
|
|
139
225
|
await grok.data.detectSemanticTypes(dfMsaN1);
|
|
140
226
|
|
|
@@ -143,7 +229,7 @@ export async function _testDetectorsMsaN1(csvDfMsaN1: string) {
|
|
|
143
229
|
expect(col.getTag(DG.TAGS.UNITS), 'fasta:SEQ.MSA:NT');
|
|
144
230
|
}
|
|
145
231
|
|
|
146
|
-
export async function
|
|
232
|
+
export async function _testMsaAA1(csvDfMsaAA1: string) {
|
|
147
233
|
const dfMsaAA1: DG.DataFrame = DG.DataFrame.fromCsv(csvDfMsaAA1);
|
|
148
234
|
await grok.data.detectSemanticTypes(dfMsaAA1);
|
|
149
235
|
|
|
@@ -152,7 +238,7 @@ export async function _testDetectorsMsaAA1(csvDfMsaAA1: string) {
|
|
|
152
238
|
expect(col.getTag(DG.TAGS.UNITS), 'fasta:SEQ.MSA:PT');
|
|
153
239
|
}
|
|
154
240
|
|
|
155
|
-
export async function
|
|
241
|
+
export async function _testSepNt(csv: string, separator: string) {
|
|
156
242
|
const df: DG.DataFrame = DG.DataFrame.fromCsv(csv);
|
|
157
243
|
await grok.data.detectSemanticTypes(df);
|
|
158
244
|
|
|
@@ -162,7 +248,7 @@ export async function _testDetectorsSepNt(csv: string, separator: string) {
|
|
|
162
248
|
expect(col.getTag('separator'), separator);
|
|
163
249
|
}
|
|
164
250
|
|
|
165
|
-
export async function
|
|
251
|
+
export async function _testSepPt(csv: string, separator: string) {
|
|
166
252
|
const df: DG.DataFrame = DG.DataFrame.fromCsv(csv);
|
|
167
253
|
await grok.data.detectSemanticTypes(df);
|
|
168
254
|
|
|
@@ -172,7 +258,7 @@ export async function _testDetectorsSepPt(csv: string, separator: string) {
|
|
|
172
258
|
expect(col.getTag('separator'), separator);
|
|
173
259
|
}
|
|
174
260
|
|
|
175
|
-
export async function
|
|
261
|
+
export async function _testSepUn(csv: string, separator: string) {
|
|
176
262
|
const df: DG.DataFrame = DG.DataFrame.fromCsv(csv);
|
|
177
263
|
await grok.data.detectSemanticTypes(df);
|
|
178
264
|
|
|
@@ -182,7 +268,7 @@ export async function _testDetectorsSepUn(csv: string, separator: string) {
|
|
|
182
268
|
expect(col.getTag('separator'), separator);
|
|
183
269
|
}
|
|
184
270
|
|
|
185
|
-
export async function
|
|
271
|
+
export async function _testSepMsaN1(csvDfSepMsaN1: string) {
|
|
186
272
|
const dfSepMsaN1: DG.DataFrame = DG.DataFrame.fromCsv(csvDfSepMsaN1);
|
|
187
273
|
await grok.data.detectSemanticTypes(dfSepMsaN1);
|
|
188
274
|
|
|
@@ -191,7 +277,7 @@ export async function _testDetectorsSepMsaN1(csvDfSepMsaN1: string) {
|
|
|
191
277
|
expect(col.getTag(DG.TAGS.UNITS), 'separator:SEQ.MSA:NT');
|
|
192
278
|
}
|
|
193
279
|
|
|
194
|
-
export async function
|
|
280
|
+
export async function _testSamplesFastaCsvPt() {
|
|
195
281
|
const csv: string = await grok.dapi.files.readAsText('System:AppData/Bio/samples/sample_FASTA.csv');
|
|
196
282
|
const df: DG.DataFrame = DG.DataFrame.fromCsv(csv);
|
|
197
283
|
await grok.data.detectSemanticTypes(df);
|
|
@@ -202,7 +288,7 @@ export async function _testDetectorsSamplesFastaCsvPt() {
|
|
|
202
288
|
expect(col.getTag('separator'), null);
|
|
203
289
|
}
|
|
204
290
|
|
|
205
|
-
export async function
|
|
291
|
+
export async function _testSamplesFastaFastaPt() {
|
|
206
292
|
const fasta: string = await grok.dapi.files.readAsText('System:AppData/Bio/samples/sample_FASTA.fasta');
|
|
207
293
|
const df: DG.DataFrame = importFasta(fasta)[0];
|
|
208
294
|
|
|
@@ -210,4 +296,16 @@ export async function _testDetectorsSamplesFastaFastaPt() {
|
|
|
210
296
|
expect(col.semType, mmSemType);
|
|
211
297
|
expect(col.getTag(DG.TAGS.UNITS), 'fasta:SEQ:PT');
|
|
212
298
|
expect(col.getTag('separator'), null);
|
|
213
|
-
}
|
|
299
|
+
}
|
|
300
|
+
|
|
301
|
+
export async function _testSamplesPeptidesComplexUn() {
|
|
302
|
+
const csv: string = await grok.dapi.files.readAsText('System:AppData/Bio/samples/peptides_complex_aligned.csv');
|
|
303
|
+
const df: DG.DataFrame = DG.DataFrame.fromCsv(csv);
|
|
304
|
+
await grok.data.detectSemanticTypes(df);
|
|
305
|
+
|
|
306
|
+
const col: DG.Column = df.col('AlignedSequence')!;
|
|
307
|
+
expect(col.semType, mmSemType);
|
|
308
|
+
expect(col.getTag(DG.TAGS.UNITS), 'separator:SEQ.MSA:UN');
|
|
309
|
+
expect(col.getTag('separator'), '-');
|
|
310
|
+
}
|
|
311
|
+
|
|
@@ -1,24 +1,26 @@
|
|
|
1
1
|
import {before, category, test, expect} from '@datagrok-libraries/utils/src/test';
|
|
2
|
-
import * as DG from
|
|
3
|
-
import {
|
|
4
|
-
import {
|
|
2
|
+
import * as DG from 'datagrok-api/dg';
|
|
3
|
+
import {sequenceSpace} from '../utils/sequence-space';
|
|
4
|
+
import {readDataframe} from './utils';
|
|
5
5
|
//import * as grok from 'datagrok-api/grok';
|
|
6
6
|
|
|
7
7
|
category('sequenceSpace', async () => {
|
|
8
|
+
let testFastaDf: DG.DataFrame;
|
|
8
9
|
|
|
9
|
-
|
|
10
|
-
|
|
11
|
-
|
|
12
|
-
|
|
13
|
-
|
|
14
|
-
|
|
15
|
-
|
|
16
|
-
|
|
17
|
-
|
|
18
|
-
|
|
19
|
-
|
|
20
|
-
|
|
21
|
-
|
|
22
|
-
|
|
23
|
-
|
|
24
|
-
});
|
|
10
|
+
before(async () => {
|
|
11
|
+
testFastaDf = await readDataframe('sample_FASTA.csv');
|
|
12
|
+
});
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
test('sequenceSpaceOpens', async () => {
|
|
16
|
+
const sequenceSpaceParams = {
|
|
17
|
+
seqCol: testFastaDf.col('Sequence')!,
|
|
18
|
+
methodName: 't-SNE',
|
|
19
|
+
similarityMetric: 'Levenshtein',
|
|
20
|
+
embedAxesNames: ['Embed_X', 'Embed_Y']
|
|
21
|
+
};
|
|
22
|
+
const res = await sequenceSpace(sequenceSpaceParams);
|
|
23
|
+
expect(res.coordinates != undefined, true);
|
|
24
|
+
expect(res.distance != undefined, true);
|
|
25
|
+
});
|
|
26
|
+
});
|
package/src/tests/utils.ts
CHANGED
|
@@ -1,8 +1,8 @@
|
|
|
1
1
|
import * as DG from 'datagrok-api/dg';
|
|
2
|
-
import * as grok from
|
|
2
|
+
import * as grok from 'datagrok-api/grok';
|
|
3
3
|
import {expect} from '@datagrok-libraries/utils/src/test';
|
|
4
4
|
import {runKalign} from '../utils/multiple-sequence-alignment';
|
|
5
|
-
import {
|
|
5
|
+
import {_package} from '../package-test';
|
|
6
6
|
|
|
7
7
|
export async function loadFileAsText(name: string): Promise<string> {
|
|
8
8
|
return await _package.files.readAsText(name);
|
|
@@ -15,6 +15,13 @@ export async function readDataframe(tableName: string): Promise<DG.DataFrame> {
|
|
|
15
15
|
return df;
|
|
16
16
|
}
|
|
17
17
|
|
|
18
|
+
export async function createTableView(tableName: string): Promise<DG.TableView> {
|
|
19
|
+
const df = await readDataframe(tableName);
|
|
20
|
+
df.name = tableName.replace('.csv', '');
|
|
21
|
+
const view = grok.shell.addTableView(df);
|
|
22
|
+
return view;
|
|
23
|
+
}
|
|
24
|
+
|
|
18
25
|
|
|
19
26
|
/**
|
|
20
27
|
* Tests if a table has non zero rows and columns.
|
|
@@ -35,5 +42,4 @@ export function _testTableIsNotEmpty(table: DG.DataFrame): void {
|
|
|
35
42
|
export async function _testMSAIsCorrect(col: DG.Column): Promise<void> {
|
|
36
43
|
const msaCol = await runKalign(col, true);
|
|
37
44
|
expect(msaCol.toList().every((v, i) => (v == col.get(i) || v == null)), true);
|
|
38
|
-
|
|
39
45
|
}
|
package/src/utils/convert.ts
CHANGED
|
@@ -2,23 +2,22 @@ import * as DG from 'datagrok-api/dg';
|
|
|
2
2
|
import * as ui from 'datagrok-api/ui';
|
|
3
3
|
|
|
4
4
|
export function convert(col: DG.Column): void {
|
|
5
|
-
|
|
6
5
|
const current = col.tags[DG.TAGS.UNITS];
|
|
7
6
|
//TODO: read all notations
|
|
8
7
|
const notations = ['fasta:SEQ:NT', 'fasta:SEQ:PT', 'fasta:SEQ.MSA:NT', 'fasta:SEQ.MSA:PT', 'HELM'];
|
|
9
|
-
const choices = ui.choiceInput(
|
|
8
|
+
const choices = ui.choiceInput('convert to', '', notations.filter((e) => e !== current));
|
|
10
9
|
|
|
11
10
|
ui.dialog('Convert sequence')
|
|
12
|
-
|
|
11
|
+
.add(
|
|
13
12
|
ui.div([
|
|
14
13
|
ui.h1('current notation'),
|
|
15
14
|
ui.div(current),
|
|
16
15
|
choices.root
|
|
17
16
|
])
|
|
18
|
-
|
|
19
|
-
|
|
20
|
-
|
|
21
|
-
|
|
22
|
-
|
|
23
|
-
|
|
17
|
+
)
|
|
18
|
+
.onOK(() => {
|
|
19
|
+
//TODO: create new converted column
|
|
20
|
+
//col.dataFrame.columns.add();
|
|
21
|
+
})
|
|
22
|
+
.show();
|
|
24
23
|
}
|
|
@@ -56,7 +56,7 @@ export async function runKalign(col: DG.Column, isAligned = false) : Promise<DG.
|
|
|
56
56
|
|
|
57
57
|
const aligned = _fastaToStrings(buf).slice(0, sequences.length);
|
|
58
58
|
const alignedCol = DG.Column.fromStrings(`msa(${col.name})`, aligned);
|
|
59
|
-
alignedCol.setTag(DG.TAGS.UNITS, '');
|
|
59
|
+
alignedCol.setTag(DG.TAGS.UNITS, '');
|
|
60
60
|
alignedCol.semType = C.SEM_TYPES.Macro_Molecule;
|
|
61
61
|
return alignedCol;
|
|
62
62
|
}
|