@datagrok/bio 1.4.0 → 1.5.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/detectors.js +17 -4
- package/dist/package-test.js +1577 -160
- package/dist/package.js +1248 -60
- package/dist/vendors-node_modules_datagrok-libraries_ml_src_workers_dimensionality-reducer_js.js +8989 -0
- package/files/sample_FASTA.csv +66 -0
- package/files/sample_FASTA_with_activities.csv +66 -0
- package/files/sample_MSA.csv +541 -0
- package/files/samples/peptides_complex_msa.csv +10275 -0
- package/files/samples/peptides_simple_msa.csv +648 -0
- package/files/samples/sample_HELM.csv +541 -0
- package/files/samples/sample_MSA.csv +541 -0
- package/package.json +11 -7
- package/src/package-test.ts +3 -1
- package/src/package.ts +94 -33
- package/src/tests/activity-cliffs-tests.ts +49 -0
- package/src/tests/detectors-test.ts +132 -34
- package/src/tests/sequence-space-test.ts +26 -0
- package/src/tests/utils.ts +21 -2
- package/src/utils/convert.ts +23 -0
- package/src/utils/multiple-sequence-alignment.ts +2 -33
- package/src/utils/sequence-activity-cliffs.ts +30 -0
- package/src/utils/sequence-space.ts +43 -0
package/package.json
CHANGED
|
@@ -2,7 +2,7 @@
|
|
|
2
2
|
"name": "@datagrok/bio",
|
|
3
3
|
"beta": false,
|
|
4
4
|
"friendlyName": "Bio",
|
|
5
|
-
"version": "1.
|
|
5
|
+
"version": "1.5.1",
|
|
6
6
|
"description": "Bio is a [package](https://datagrok.ai/help/develop/develop#packages) for the [Datagrok](https://datagrok.ai) platform",
|
|
7
7
|
"repository": {
|
|
8
8
|
"type": "git",
|
|
@@ -11,10 +11,11 @@
|
|
|
11
11
|
},
|
|
12
12
|
"dependencies": {
|
|
13
13
|
"@biowasm/aioli": ">=2.4.0",
|
|
14
|
-
"@datagrok-libraries/bio": "^2.
|
|
15
|
-
"@datagrok-libraries/utils": "^0.
|
|
14
|
+
"@datagrok-libraries/bio": "^2.3.1",
|
|
15
|
+
"@datagrok-libraries/utils": "^1.0.0",
|
|
16
|
+
"@datagrok-libraries/ml": "^2.0.8",
|
|
16
17
|
"cash-dom": "latest",
|
|
17
|
-
"datagrok-api": "^1.4.
|
|
18
|
+
"datagrok-api": "^1.4.12",
|
|
18
19
|
"dayjs": "latest",
|
|
19
20
|
"ts-loader": "^9.2.5",
|
|
20
21
|
"typescript": "^4.4.2"
|
|
@@ -35,11 +36,13 @@
|
|
|
35
36
|
"scripts": {
|
|
36
37
|
"link-api": "npm link datagrok-api",
|
|
37
38
|
"link-bio": "npm link @datagrok-libraries/bio",
|
|
38
|
-
"link-
|
|
39
|
+
"link-ml": "npm link @datagrok-libraries/ml",
|
|
40
|
+
"link-all": "npm link datagrok-api @datagrok-libraries/utils @datagrok-libraries/bio @datagrok-libraries/ml",
|
|
39
41
|
"debug-sequences1": "grok publish --rebuild",
|
|
40
42
|
"release-sequences1": "grok publish --rebuild --release",
|
|
41
43
|
"build-sequences1": "webpack",
|
|
42
|
-
"local
|
|
44
|
+
"debug-local": "grok publish local",
|
|
45
|
+
"release-local": "grok publish local --release",
|
|
43
46
|
"build": "webpack",
|
|
44
47
|
"debug-sequences1-public": "grok publish public --rebuild",
|
|
45
48
|
"release-sequences1-public": "grok publish public --rebuild --release",
|
|
@@ -48,7 +51,8 @@
|
|
|
48
51
|
"lint": "eslint \"./src/**/*.ts\"",
|
|
49
52
|
"lint-fix": "eslint \"./src/**/*.ts\" --fix",
|
|
50
53
|
"test": "jest",
|
|
51
|
-
"test-local": "set HOST=localhost && jest"
|
|
54
|
+
"test-local": "set HOST=localhost && jest",
|
|
55
|
+
"build-bio-local": "npm --prefix ./../../js-api run build && npm --prefix ./../../libraries/utils run build && npm --prefix ./../../libraries/ml run build && npm run build && npm --prefix ./../../libraries/bio run build && npm run build"
|
|
52
56
|
},
|
|
53
57
|
"canEdit": [
|
|
54
58
|
"Developers"
|
package/src/package-test.ts
CHANGED
|
@@ -6,8 +6,10 @@ import './tests/WebLogo-test';
|
|
|
6
6
|
import './tests/Palettes-test';
|
|
7
7
|
import './tests/detectors-test';
|
|
8
8
|
import './tests/msa-tests';
|
|
9
|
+
import './tests/sequence-space-test';
|
|
10
|
+
import './tests/activity-cliffs-tests';
|
|
9
11
|
|
|
10
|
-
export const
|
|
12
|
+
export const _package = new DG.Package();
|
|
11
13
|
export {tests};
|
|
12
14
|
|
|
13
15
|
/** For the 'test' function argument names are fixed as 'category' and 'test' because of way it is called. */
|
package/src/package.ts
CHANGED
|
@@ -3,17 +3,20 @@ import * as grok from 'datagrok-api/grok';
|
|
|
3
3
|
import * as ui from 'datagrok-api/ui';
|
|
4
4
|
import * as DG from 'datagrok-api/dg';
|
|
5
5
|
|
|
6
|
-
import {SequenceAlignment, Aligned} from './seq_align';
|
|
7
|
-
|
|
8
6
|
export const _package = new DG.Package();
|
|
9
7
|
|
|
10
|
-
import {
|
|
8
|
+
import {mmSemType} from './const';
|
|
9
|
+
import {WebLogo, SeqColStats} from '@datagrok-libraries/bio/src/viewers/web-logo';
|
|
11
10
|
import {VdRegionsViewer} from './viewers/vd-regions-viewer';
|
|
12
11
|
import {runKalign, testMSAEnoughMemory} from './utils/multiple-sequence-alignment';
|
|
13
|
-
import {
|
|
14
|
-
import {mmSemType} from './const';
|
|
12
|
+
import {SequenceAlignment, Aligned} from './seq_align';
|
|
15
13
|
import {Nucleotides} from '@datagrok-libraries/bio/src/nucleotides';
|
|
16
14
|
import {Aminoacids} from '@datagrok-libraries/bio/src/aminoacids';
|
|
15
|
+
import {convert} from './utils/convert';
|
|
16
|
+
import {getEmbeddingColsNames, sequenceSpace} from './utils/sequence-space';
|
|
17
|
+
import {AvailableMetrics} from '@datagrok-libraries/ml/src/typed-metrics';
|
|
18
|
+
import {getActivityCliffs} from '@datagrok-libraries/ml/src/viewers/activity-cliffs';
|
|
19
|
+
import {sequenceGetSimilarities, drawTooltip} from './utils/sequence-activity-cliffs';
|
|
17
20
|
|
|
18
21
|
//name: sequenceAlignment
|
|
19
22
|
//input: string alignType {choices: ['Local alignment', 'Global alignment']}
|
|
@@ -49,23 +52,59 @@ export function vdRegionViewer() {
|
|
|
49
52
|
//name: Activity Cliffs
|
|
50
53
|
//description: detect activity cliffs
|
|
51
54
|
//input: dataframe df [Input data table]
|
|
52
|
-
//input: column
|
|
55
|
+
//input: column sequence {semType: Macromolecule}
|
|
53
56
|
//input: column activities
|
|
54
57
|
//input: double similarity = 80 [Similarity cutoff]
|
|
55
58
|
//input: string methodName { choices:["UMAP", "t-SNE", "SPE"] }
|
|
56
|
-
export async function activityCliffs(df: DG.DataFrame,
|
|
59
|
+
export async function activityCliffs(df: DG.DataFrame, sequence: DG.Column, activities: DG.Column,
|
|
57
60
|
similarity: number, methodName: string): Promise<void> {
|
|
61
|
+
const axesNames = getEmbeddingColsNames(df);
|
|
62
|
+
const options = {
|
|
63
|
+
'SPE': {cycles: 2000, lambda: 1.0, dlambda: 0.0005},
|
|
64
|
+
};
|
|
65
|
+
const units = sequence!.tags[DG.TAGS.UNITS];
|
|
66
|
+
await getActivityCliffs(
|
|
67
|
+
df,
|
|
68
|
+
sequence,
|
|
69
|
+
axesNames,
|
|
70
|
+
activities,
|
|
71
|
+
similarity,
|
|
72
|
+
'Levenshtein',
|
|
73
|
+
methodName,
|
|
74
|
+
DG.SEMTYPE.MACROMOLECULE,
|
|
75
|
+
units,
|
|
76
|
+
sequenceSpace,
|
|
77
|
+
sequenceGetSimilarities,
|
|
78
|
+
drawTooltip,
|
|
79
|
+
(options as any)[methodName]);
|
|
58
80
|
}
|
|
59
81
|
|
|
60
82
|
//top-menu: Bio | Sequence Space...
|
|
61
83
|
//name: Sequence Space
|
|
62
84
|
//input: dataframe table
|
|
63
|
-
//input: column
|
|
64
|
-
//input: string methodName { choices:["UMAP", "t-SNE", "SPE"
|
|
65
|
-
//input: string similarityMetric { choices:["
|
|
85
|
+
//input: column macroMolecule { semType: Macromolecule }
|
|
86
|
+
//input: string methodName { choices:["UMAP", "t-SNE", "SPE"] }
|
|
87
|
+
//input: string similarityMetric { choices:["Levenshtein", "Tanimoto"] }
|
|
66
88
|
//input: bool plotEmbeddings = true
|
|
67
|
-
export async function
|
|
68
|
-
similarityMetric: string = '
|
|
89
|
+
export async function sequenceSpaceTopMenu(table: DG.DataFrame, macroMolecule: DG.Column, methodName: string,
|
|
90
|
+
similarityMetric: string = 'Levenshtein', plotEmbeddings: boolean): Promise<void> {
|
|
91
|
+
const embedColsNames = getEmbeddingColsNames(table);
|
|
92
|
+
const chemSpaceParams = {
|
|
93
|
+
seqCol: macroMolecule,
|
|
94
|
+
methodName: methodName,
|
|
95
|
+
similarityMetric: similarityMetric,
|
|
96
|
+
embedAxesNames: embedColsNames
|
|
97
|
+
};
|
|
98
|
+
const sequenceSpaceRes = await sequenceSpace(chemSpaceParams);
|
|
99
|
+
const embeddings = sequenceSpaceRes.coordinates;
|
|
100
|
+
for (const col of embeddings)
|
|
101
|
+
table.columns.add(col);
|
|
102
|
+
if (plotEmbeddings) {
|
|
103
|
+
for (const v of grok.shell.views) {
|
|
104
|
+
if (v.name === table.name)
|
|
105
|
+
(v as DG.TableView).scatterPlot({x: embedColsNames[0], y: embedColsNames[1]});
|
|
106
|
+
}
|
|
107
|
+
}
|
|
69
108
|
};
|
|
70
109
|
|
|
71
110
|
//top-menu: Bio | MSA...
|
|
@@ -90,40 +129,53 @@ export async function compositionAnalysis(): Promise<void> {
|
|
|
90
129
|
const wl = await col.dataFrame.plot.fromType('WebLogo', {});
|
|
91
130
|
|
|
92
131
|
for (const v of grok.shell.views) {
|
|
93
|
-
if (v instanceof TableView && (v as DG.TableView).dataFrame.name === col.dataFrame.name) {
|
|
132
|
+
if (v instanceof DG.TableView && (v as DG.TableView).dataFrame.name === col.dataFrame.name) {
|
|
94
133
|
(v as DG.TableView).dockManager.dock(wl.root, 'down');
|
|
95
134
|
break;
|
|
96
135
|
}
|
|
97
136
|
}
|
|
98
137
|
}
|
|
99
138
|
|
|
139
|
+
// helper function for importFasta
|
|
140
|
+
function parseMacromolecule(
|
|
141
|
+
fileContent: string,
|
|
142
|
+
startOfSequence: number,
|
|
143
|
+
endOfSequence: number
|
|
144
|
+
): string {
|
|
145
|
+
const seq = fileContent.slice(startOfSequence, endOfSequence);
|
|
146
|
+
const seqArray = seq.split(/\s/);
|
|
147
|
+
return seqArray.join('');
|
|
148
|
+
}
|
|
149
|
+
|
|
100
150
|
//name: importFasta
|
|
101
151
|
//description: Opens FASTA file
|
|
102
152
|
//tags: file-handler
|
|
103
153
|
//meta.ext: fasta, fna, ffn, faa, frn, fa
|
|
104
|
-
//input: string
|
|
154
|
+
//input: string fileContent
|
|
105
155
|
//output: list tables
|
|
106
|
-
export function importFasta(
|
|
107
|
-
const regex = /^>(.*)$/gm;
|
|
108
|
-
const
|
|
109
|
-
const
|
|
110
|
-
let
|
|
111
|
-
let match;
|
|
112
|
-
while (match = regex.exec(
|
|
113
|
-
|
|
114
|
-
|
|
115
|
-
|
|
116
|
-
|
|
156
|
+
export function importFasta(fileContent: string): DG.DataFrame [] {
|
|
157
|
+
const regex = /^>(.*)$/gm; // match lines starting with >
|
|
158
|
+
const descriptionsArray = [];
|
|
159
|
+
const sequencesArray: string[] = [];
|
|
160
|
+
let startOfSequence = 0;
|
|
161
|
+
let match; // match.index is the beginning of the matched line
|
|
162
|
+
while (match = regex.exec(fileContent)) {
|
|
163
|
+
const description = fileContent.substring(match.index + 1, regex.lastIndex);
|
|
164
|
+
descriptionsArray.push(description);
|
|
165
|
+
if (startOfSequence !== 0)
|
|
166
|
+
sequencesArray.push(parseMacromolecule(fileContent, startOfSequence, match.index));
|
|
167
|
+
startOfSequence = regex.lastIndex + 1;
|
|
117
168
|
}
|
|
118
|
-
|
|
119
|
-
const
|
|
120
|
-
const sequenceCol = DG.Column.fromStrings('sequence',
|
|
169
|
+
sequencesArray.push(parseMacromolecule(fileContent, startOfSequence, -1));
|
|
170
|
+
const descriptionsArrayCol = DG.Column.fromStrings('description', descriptionsArray);
|
|
171
|
+
const sequenceCol = DG.Column.fromStrings('sequence', sequencesArray);
|
|
172
|
+
sequenceCol.semType = 'Macromolecule';
|
|
121
173
|
|
|
122
|
-
const stats:
|
|
174
|
+
const stats: SeqColStats = WebLogo.getStats(sequenceCol, 5, WebLogo.splitterAsFasta);
|
|
123
175
|
const seqType = stats.sameLength ? 'SEQ.MSA' : 'SEQ';
|
|
124
176
|
const alphabetCandidates: [string, Set<string>][] = [
|
|
125
|
-
['NT', new Set(Object.keys(Nucleotides.Names))
|
|
126
|
-
['PT', new Set(Object.keys(Aminoacids.Names))
|
|
177
|
+
['NT', new Set(Object.keys(Nucleotides.Names))],
|
|
178
|
+
['PT', new Set(Object.keys(Aminoacids.Names))],
|
|
127
179
|
];
|
|
128
180
|
// Calculate likelihoods for alphabet_candidates
|
|
129
181
|
const alphabetCandidatesSim: number[] = alphabetCandidates.map(
|
|
@@ -131,10 +183,19 @@ export function importFasta(content: string): DG.DataFrame [] {
|
|
|
131
183
|
const maxCos = Math.max(...alphabetCandidatesSim);
|
|
132
184
|
const alphabet = maxCos > 0.65 ? alphabetCandidates[alphabetCandidatesSim.indexOf(maxCos)][0] : 'UN';
|
|
133
185
|
sequenceCol.semType = mmSemType;
|
|
134
|
-
|
|
186
|
+
const units: string = `fasta:${seqType}:${alphabet}`;
|
|
187
|
+
sequenceCol.setTag(DG.TAGS.UNITS, units);
|
|
135
188
|
|
|
136
189
|
return [DG.DataFrame.fromColumns([
|
|
137
|
-
|
|
190
|
+
descriptionsArrayCol,
|
|
138
191
|
sequenceCol,
|
|
139
192
|
])];
|
|
140
193
|
}
|
|
194
|
+
|
|
195
|
+
//name: Bio | Convert
|
|
196
|
+
//friendly-name: Bio | Convert
|
|
197
|
+
//tags: panel, bio
|
|
198
|
+
//input: column col {semType: Macromolecule}
|
|
199
|
+
export function convertPanel(col: DG.Column): void {
|
|
200
|
+
convert(col);
|
|
201
|
+
}
|
|
@@ -0,0 +1,49 @@
|
|
|
1
|
+
import {after, before, category, expect, expectFloat, test} from '@datagrok-libraries/utils/src/test';
|
|
2
|
+
import * as DG from 'datagrok-api/dg';
|
|
3
|
+
import {createTableView, readDataframe} from './utils';
|
|
4
|
+
import {_package} from '../package-test';
|
|
5
|
+
import {getEmbeddingColsNames, sequenceSpace} from '../utils/sequence-space';
|
|
6
|
+
import {drawTooltip, sequenceGetSimilarities} from '../utils/sequence-activity-cliffs';
|
|
7
|
+
import {getActivityCliffs} from '@datagrok-libraries/ml/src/viewers/activity-cliffs';
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
category('activityCliffs', async () => {
|
|
11
|
+
let actCliffsTableView: DG.TableView;
|
|
12
|
+
let actCliffsDf: DG.DataFrame;
|
|
13
|
+
|
|
14
|
+
before(async () => {
|
|
15
|
+
actCliffsTableView = await createTableView('sample_MSA.csv');
|
|
16
|
+
actCliffsDf = await readDataframe('sample_MSA.csv');
|
|
17
|
+
});
|
|
18
|
+
|
|
19
|
+
test('activityCliffsOpen', async () => {
|
|
20
|
+
const axesNames = getEmbeddingColsNames(actCliffsDf);
|
|
21
|
+
const units = actCliffsDf.col('MSA')!.tags[DG.TAGS.UNITS];
|
|
22
|
+
const options = {
|
|
23
|
+
'SPE': {cycles: 2000, lambda: 1.0, dlambda: 0.0005},
|
|
24
|
+
};
|
|
25
|
+
const scatterPlot = await getActivityCliffs(
|
|
26
|
+
actCliffsDf,
|
|
27
|
+
actCliffsDf.col('MSA')!,
|
|
28
|
+
axesNames,
|
|
29
|
+
actCliffsDf.col('Activity')!,
|
|
30
|
+
50,
|
|
31
|
+
'Levenshtein',
|
|
32
|
+
't-SNE',
|
|
33
|
+
DG.SEMTYPE.MACROMOLECULE,
|
|
34
|
+
units,
|
|
35
|
+
sequenceSpace,
|
|
36
|
+
sequenceGetSimilarities,
|
|
37
|
+
drawTooltip);
|
|
38
|
+
|
|
39
|
+
expect(scatterPlot != null, true);
|
|
40
|
+
|
|
41
|
+
const cliffsLink = (Array.from(scatterPlot.root.children) as Element[])
|
|
42
|
+
.filter((it) => it.className === 'ui-btn ui-btn-ok');
|
|
43
|
+
expect((cliffsLink[0] as HTMLElement).innerText, '101 cliffs');
|
|
44
|
+
});
|
|
45
|
+
|
|
46
|
+
after(async () => {
|
|
47
|
+
actCliffsTableView.close();
|
|
48
|
+
});
|
|
49
|
+
});
|
|
@@ -7,6 +7,8 @@ import * as DG from 'datagrok-api/dg';
|
|
|
7
7
|
import {mmSemType} from '../const';
|
|
8
8
|
import {importFasta} from '../package';
|
|
9
9
|
|
|
10
|
+
type DfReaderFunc = () => Promise<DG.DataFrame>;
|
|
11
|
+
|
|
10
12
|
category('detectors', () => {
|
|
11
13
|
const csvDf1: string = `col1
|
|
12
14
|
1
|
|
@@ -87,36 +89,120 @@ YNR-WYV-KHP
|
|
|
87
89
|
MWRSWY-CKHP
|
|
88
90
|
`;
|
|
89
91
|
|
|
90
|
-
|
|
91
|
-
|
|
92
|
-
|
|
93
|
-
|
|
94
|
-
|
|
95
|
-
|
|
96
|
-
|
|
97
|
-
|
|
98
|
-
|
|
99
|
-
|
|
100
|
-
|
|
101
|
-
|
|
102
|
-
|
|
103
|
-
|
|
92
|
+
const enum Samples {
|
|
93
|
+
peptidesComplex = 'PeptidesComplex',
|
|
94
|
+
fastaCsv = 'FastaCsv',
|
|
95
|
+
msaComplex = 'MsaComplex',
|
|
96
|
+
}
|
|
97
|
+
|
|
98
|
+
const samples: { [key: string]: string } = {
|
|
99
|
+
'PeptidesComplex': 'System:AppData/Bio/samples/peptides_complex_aligned.csv',
|
|
100
|
+
'FastaCsv': 'System:AppData/Bio/samples/sample_FASTA.csv',
|
|
101
|
+
'MsaComplex': 'System:AppData/Bio/samples/sample_MSA.csv',
|
|
102
|
+
};
|
|
103
|
+
|
|
104
|
+
const _samplesDfs: { [key: string]: Promise<DG.DataFrame> } = {};
|
|
105
|
+
const readSamplesCsv: (key: string) => DfReaderFunc = (key: string) => {
|
|
106
|
+
return async () => {
|
|
107
|
+
if (!(key in _samplesDfs)) {
|
|
108
|
+
_samplesDfs[key] = (async (): Promise<DG.DataFrame> => {
|
|
109
|
+
const csv: string = await grok.dapi.files.readAsText(samples[key]);
|
|
110
|
+
const df: DG.DataFrame = DG.DataFrame.fromCsv(csv);
|
|
111
|
+
await grok.data.detectSemanticTypes(df);
|
|
112
|
+
return df;
|
|
113
|
+
})();
|
|
114
|
+
}
|
|
115
|
+
return _samplesDfs[key];
|
|
116
|
+
};
|
|
117
|
+
};
|
|
118
|
+
|
|
119
|
+
const _csvDfs: { [key: string]: Promise<DG.DataFrame> } = {};
|
|
120
|
+
const readCsv: (key: string, csv: string) => DfReaderFunc = (key: string, csv: string) => {
|
|
121
|
+
return async () => {
|
|
122
|
+
if (!(key in _csvDfs)) {
|
|
123
|
+
_csvDfs[key] = (async (): Promise<DG.DataFrame> => {
|
|
124
|
+
const df: DG.DataFrame = DG.DataFrame.fromCsv(csv);
|
|
125
|
+
await grok.data.detectSemanticTypes(df);
|
|
126
|
+
return df;
|
|
127
|
+
})();
|
|
128
|
+
}
|
|
129
|
+
return _csvDfs[key];
|
|
130
|
+
};
|
|
131
|
+
};
|
|
132
|
+
|
|
133
|
+
test('Negative1', async () => { await _testNeg(readCsv('csvDf1', csvDf1), 'col1'); });
|
|
134
|
+
test('Negative2', async () => { await _testNeg(readCsv('csvDf2', csvDf2), 'col1'); });
|
|
135
|
+
test('Negative3', async () => { await _testNeg(readCsv('csvDf3', csvDf3), 'col1'); });
|
|
136
|
+
test('NegativeSmiles', async () => { await _testNeg(readCsv('csvDfSmiles', csvDfSmiles), 'col1'); });
|
|
137
|
+
|
|
138
|
+
test('N1', async () => { await _testN1(csvDfN1); });
|
|
139
|
+
test('AA1', async () => { await _testAA1(csvDfAA1); });
|
|
140
|
+
test('MsaN1', async () => { await _testMsaN1(csvDfMsaN1); });
|
|
141
|
+
test('MsaAA1', async () => { await _testMsaAA1(csvDfMsaAA1); });
|
|
142
|
+
|
|
143
|
+
test('SepNt', async () => { await _testSepNt(csvDfSepNt, '*'); });
|
|
144
|
+
test('SepPt', async () => { await _testSepPt(csvDfSepPt, '-'); });
|
|
145
|
+
test('SepUn1', async () => { await _testSepUn(csvDfSepUn1, '-'); });
|
|
146
|
+
test('SepUn2', async () => { await _testSepUn(csvDfSepUn2, '/'); });
|
|
147
|
+
|
|
148
|
+
test('SepMsaN1', async () => { await _testSepMsaN1(csvDfSepMsaN1); });
|
|
149
|
+
|
|
150
|
+
test('SamplesFastaCsvPt', async () => {
|
|
151
|
+
await _testSamplesFastaCsvPt();
|
|
152
|
+
});
|
|
153
|
+
test('SamplesFastaCsvNegativeEntry', async () => {
|
|
154
|
+
await _testNeg(readSamplesCsv(Samples.fastaCsv), 'Entry');
|
|
155
|
+
});
|
|
156
|
+
test('SamplesFastaCsvNegativeLength', async () => {
|
|
157
|
+
await _testNeg(readSamplesCsv(Samples.fastaCsv), 'Length');
|
|
158
|
+
});
|
|
159
|
+
test('SamplesFastaCsvNegativeUniProtKB', async () => {
|
|
160
|
+
await _testNeg(readSamplesCsv(Samples.fastaCsv), 'UniProtKB');
|
|
161
|
+
});
|
|
162
|
+
|
|
163
|
+
test('SamplesFastaFastaPt', async () => { await _testSamplesFastaFastaPt(); });
|
|
164
|
+
|
|
165
|
+
// System:AppData/Bio/samples/peptides_complex_align.csv contains monomers with spaces
|
|
166
|
+
// test('SamplesPeptidesComplexUn', async () => {
|
|
167
|
+
// await _testSamplesPeptidesComplexUn();
|
|
168
|
+
// });
|
|
169
|
+
|
|
170
|
+
test('samplesPeptidesComplexNegativeID', async () => {
|
|
171
|
+
await _testNeg(readSamplesCsv(Samples.peptidesComplex), 'ID');
|
|
172
|
+
});
|
|
173
|
+
test('SamplesPeptidesComplexNegativeMeasured', async () => {
|
|
174
|
+
await _testNeg(readSamplesCsv(Samples.peptidesComplex), 'Measured');
|
|
175
|
+
});
|
|
176
|
+
test('SamplesPeptidesComplexNegativeValue', async () => {
|
|
177
|
+
await _testNeg(readSamplesCsv(Samples.peptidesComplex), 'Value');
|
|
178
|
+
});
|
|
179
|
+
|
|
180
|
+
test('samplesMsaComplexUn', async () => {
|
|
181
|
+
await _testPos(readSamplesCsv(Samples.msaComplex), 'MSA', 'separator:SEQ.MSA:UN', '/');
|
|
182
|
+
});
|
|
183
|
+
test('samplesMsaComplexNegativeActivity', async () => {
|
|
184
|
+
await _testNeg(readSamplesCsv(Samples.msaComplex), 'Activity');
|
|
185
|
+
});
|
|
186
|
+
});
|
|
104
187
|
|
|
105
|
-
|
|
188
|
+
export async function _testNeg(readDf: DfReaderFunc, colName: string) {
|
|
189
|
+
const df: DG.DataFrame = await readDf();
|
|
106
190
|
|
|
107
|
-
|
|
108
|
-
|
|
109
|
-
}
|
|
191
|
+
const col: DG.Column = df.col(colName)!;
|
|
192
|
+
expect(col.semType === mmSemType, false);
|
|
193
|
+
}
|
|
110
194
|
|
|
111
|
-
export async function
|
|
112
|
-
const df: DG.DataFrame =
|
|
113
|
-
await grok.data.detectSemanticTypes(df);
|
|
195
|
+
export async function _testPos(readDf: DfReaderFunc, colName: string, units: string, separator: string) {
|
|
196
|
+
const df: DG.DataFrame = await readDf();
|
|
114
197
|
|
|
115
|
-
const
|
|
116
|
-
expect(
|
|
198
|
+
const col: DG.Column = df.col(colName)!;
|
|
199
|
+
expect(col.semType === mmSemType, true);
|
|
200
|
+
expect(col.getTag(DG.TAGS.UNITS), units);
|
|
201
|
+
if (separator)
|
|
202
|
+
expect(col.getTag('separator'), separator);
|
|
117
203
|
}
|
|
118
204
|
|
|
119
|
-
export async function
|
|
205
|
+
export async function _testN1(csvDfN1: string) {
|
|
120
206
|
const dfN1: DG.DataFrame = DG.DataFrame.fromCsv(csvDfN1);
|
|
121
207
|
await grok.data.detectSemanticTypes(dfN1);
|
|
122
208
|
|
|
@@ -125,7 +211,7 @@ export async function _testDetectorsN1(csvDfN1: string) {
|
|
|
125
211
|
expect(col.getTag(DG.TAGS.UNITS), 'fasta:SEQ:NT');
|
|
126
212
|
}
|
|
127
213
|
|
|
128
|
-
export async function
|
|
214
|
+
export async function _testAA1(csvDfAA1: string) {
|
|
129
215
|
const dfAA1: DG.DataFrame = DG.DataFrame.fromCsv(csvDfAA1);
|
|
130
216
|
await grok.data.detectSemanticTypes(dfAA1);
|
|
131
217
|
|
|
@@ -134,7 +220,7 @@ export async function _testDetectorsAA1(csvDfAA1: string) {
|
|
|
134
220
|
expect(col.getTag(DG.TAGS.UNITS), 'fasta:SEQ:PT');
|
|
135
221
|
}
|
|
136
222
|
|
|
137
|
-
export async function
|
|
223
|
+
export async function _testMsaN1(csvDfMsaN1: string) {
|
|
138
224
|
const dfMsaN1: DG.DataFrame = DG.DataFrame.fromCsv(csvDfMsaN1);
|
|
139
225
|
await grok.data.detectSemanticTypes(dfMsaN1);
|
|
140
226
|
|
|
@@ -143,7 +229,7 @@ export async function _testDetectorsMsaN1(csvDfMsaN1: string) {
|
|
|
143
229
|
expect(col.getTag(DG.TAGS.UNITS), 'fasta:SEQ.MSA:NT');
|
|
144
230
|
}
|
|
145
231
|
|
|
146
|
-
export async function
|
|
232
|
+
export async function _testMsaAA1(csvDfMsaAA1: string) {
|
|
147
233
|
const dfMsaAA1: DG.DataFrame = DG.DataFrame.fromCsv(csvDfMsaAA1);
|
|
148
234
|
await grok.data.detectSemanticTypes(dfMsaAA1);
|
|
149
235
|
|
|
@@ -152,7 +238,7 @@ export async function _testDetectorsMsaAA1(csvDfMsaAA1: string) {
|
|
|
152
238
|
expect(col.getTag(DG.TAGS.UNITS), 'fasta:SEQ.MSA:PT');
|
|
153
239
|
}
|
|
154
240
|
|
|
155
|
-
export async function
|
|
241
|
+
export async function _testSepNt(csv: string, separator: string) {
|
|
156
242
|
const df: DG.DataFrame = DG.DataFrame.fromCsv(csv);
|
|
157
243
|
await grok.data.detectSemanticTypes(df);
|
|
158
244
|
|
|
@@ -162,7 +248,7 @@ export async function _testDetectorsSepNt(csv: string, separator: string) {
|
|
|
162
248
|
expect(col.getTag('separator'), separator);
|
|
163
249
|
}
|
|
164
250
|
|
|
165
|
-
export async function
|
|
251
|
+
export async function _testSepPt(csv: string, separator: string) {
|
|
166
252
|
const df: DG.DataFrame = DG.DataFrame.fromCsv(csv);
|
|
167
253
|
await grok.data.detectSemanticTypes(df);
|
|
168
254
|
|
|
@@ -172,7 +258,7 @@ export async function _testDetectorsSepPt(csv: string, separator: string) {
|
|
|
172
258
|
expect(col.getTag('separator'), separator);
|
|
173
259
|
}
|
|
174
260
|
|
|
175
|
-
export async function
|
|
261
|
+
export async function _testSepUn(csv: string, separator: string) {
|
|
176
262
|
const df: DG.DataFrame = DG.DataFrame.fromCsv(csv);
|
|
177
263
|
await grok.data.detectSemanticTypes(df);
|
|
178
264
|
|
|
@@ -182,7 +268,7 @@ export async function _testDetectorsSepUn(csv: string, separator: string) {
|
|
|
182
268
|
expect(col.getTag('separator'), separator);
|
|
183
269
|
}
|
|
184
270
|
|
|
185
|
-
export async function
|
|
271
|
+
export async function _testSepMsaN1(csvDfSepMsaN1: string) {
|
|
186
272
|
const dfSepMsaN1: DG.DataFrame = DG.DataFrame.fromCsv(csvDfSepMsaN1);
|
|
187
273
|
await grok.data.detectSemanticTypes(dfSepMsaN1);
|
|
188
274
|
|
|
@@ -191,7 +277,7 @@ export async function _testDetectorsSepMsaN1(csvDfSepMsaN1: string) {
|
|
|
191
277
|
expect(col.getTag(DG.TAGS.UNITS), 'separator:SEQ.MSA:NT');
|
|
192
278
|
}
|
|
193
279
|
|
|
194
|
-
export async function
|
|
280
|
+
export async function _testSamplesFastaCsvPt() {
|
|
195
281
|
const csv: string = await grok.dapi.files.readAsText('System:AppData/Bio/samples/sample_FASTA.csv');
|
|
196
282
|
const df: DG.DataFrame = DG.DataFrame.fromCsv(csv);
|
|
197
283
|
await grok.data.detectSemanticTypes(df);
|
|
@@ -202,7 +288,7 @@ export async function _testDetectorsSamplesFastaCsvPt() {
|
|
|
202
288
|
expect(col.getTag('separator'), null);
|
|
203
289
|
}
|
|
204
290
|
|
|
205
|
-
export async function
|
|
291
|
+
export async function _testSamplesFastaFastaPt() {
|
|
206
292
|
const fasta: string = await grok.dapi.files.readAsText('System:AppData/Bio/samples/sample_FASTA.fasta');
|
|
207
293
|
const df: DG.DataFrame = importFasta(fasta)[0];
|
|
208
294
|
|
|
@@ -210,4 +296,16 @@ export async function _testDetectorsSamplesFastaFastaPt() {
|
|
|
210
296
|
expect(col.semType, mmSemType);
|
|
211
297
|
expect(col.getTag(DG.TAGS.UNITS), 'fasta:SEQ:PT');
|
|
212
298
|
expect(col.getTag('separator'), null);
|
|
213
|
-
}
|
|
299
|
+
}
|
|
300
|
+
|
|
301
|
+
export async function _testSamplesPeptidesComplexUn() {
|
|
302
|
+
const csv: string = await grok.dapi.files.readAsText('System:AppData/Bio/samples/peptides_complex_aligned.csv');
|
|
303
|
+
const df: DG.DataFrame = DG.DataFrame.fromCsv(csv);
|
|
304
|
+
await grok.data.detectSemanticTypes(df);
|
|
305
|
+
|
|
306
|
+
const col: DG.Column = df.col('AlignedSequence')!;
|
|
307
|
+
expect(col.semType, mmSemType);
|
|
308
|
+
expect(col.getTag(DG.TAGS.UNITS), 'separator:SEQ.MSA:UN');
|
|
309
|
+
expect(col.getTag('separator'), '-');
|
|
310
|
+
}
|
|
311
|
+
|
|
@@ -0,0 +1,26 @@
|
|
|
1
|
+
import {before, category, test, expect} from '@datagrok-libraries/utils/src/test';
|
|
2
|
+
import * as DG from 'datagrok-api/dg';
|
|
3
|
+
import {sequenceSpace} from '../utils/sequence-space';
|
|
4
|
+
import {readDataframe} from './utils';
|
|
5
|
+
//import * as grok from 'datagrok-api/grok';
|
|
6
|
+
|
|
7
|
+
category('sequenceSpace', async () => {
|
|
8
|
+
let testFastaDf: DG.DataFrame;
|
|
9
|
+
|
|
10
|
+
before(async () => {
|
|
11
|
+
testFastaDf = await readDataframe('sample_FASTA.csv');
|
|
12
|
+
});
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
test('sequenceSpaceOpens', async () => {
|
|
16
|
+
const sequenceSpaceParams = {
|
|
17
|
+
seqCol: testFastaDf.col('Sequence')!,
|
|
18
|
+
methodName: 't-SNE',
|
|
19
|
+
similarityMetric: 'Levenshtein',
|
|
20
|
+
embedAxesNames: ['Embed_X', 'Embed_Y']
|
|
21
|
+
};
|
|
22
|
+
const res = await sequenceSpace(sequenceSpaceParams);
|
|
23
|
+
expect(res.coordinates != undefined, true);
|
|
24
|
+
expect(res.distance != undefined, true);
|
|
25
|
+
});
|
|
26
|
+
});
|
package/src/tests/utils.ts
CHANGED
|
@@ -1,7 +1,27 @@
|
|
|
1
1
|
import * as DG from 'datagrok-api/dg';
|
|
2
|
-
|
|
2
|
+
import * as grok from 'datagrok-api/grok';
|
|
3
3
|
import {expect} from '@datagrok-libraries/utils/src/test';
|
|
4
4
|
import {runKalign} from '../utils/multiple-sequence-alignment';
|
|
5
|
+
import {_package} from '../package-test';
|
|
6
|
+
|
|
7
|
+
export async function loadFileAsText(name: string): Promise<string> {
|
|
8
|
+
return await _package.files.readAsText(name);
|
|
9
|
+
}
|
|
10
|
+
|
|
11
|
+
export async function readDataframe(tableName: string): Promise<DG.DataFrame> {
|
|
12
|
+
const file = await loadFileAsText(tableName);
|
|
13
|
+
const df = DG.DataFrame.fromCsv(file);
|
|
14
|
+
df.name = tableName.replace('.csv', '');
|
|
15
|
+
return df;
|
|
16
|
+
}
|
|
17
|
+
|
|
18
|
+
export async function createTableView(tableName: string): Promise<DG.TableView> {
|
|
19
|
+
const df = await readDataframe(tableName);
|
|
20
|
+
df.name = tableName.replace('.csv', '');
|
|
21
|
+
const view = grok.shell.addTableView(df);
|
|
22
|
+
return view;
|
|
23
|
+
}
|
|
24
|
+
|
|
5
25
|
|
|
6
26
|
/**
|
|
7
27
|
* Tests if a table has non zero rows and columns.
|
|
@@ -22,5 +42,4 @@ export function _testTableIsNotEmpty(table: DG.DataFrame): void {
|
|
|
22
42
|
export async function _testMSAIsCorrect(col: DG.Column): Promise<void> {
|
|
23
43
|
const msaCol = await runKalign(col, true);
|
|
24
44
|
expect(msaCol.toList().every((v, i) => (v == col.get(i) || v == null)), true);
|
|
25
|
-
|
|
26
45
|
}
|
|
@@ -0,0 +1,23 @@
|
|
|
1
|
+
import * as DG from 'datagrok-api/dg';
|
|
2
|
+
import * as ui from 'datagrok-api/ui';
|
|
3
|
+
|
|
4
|
+
export function convert(col: DG.Column): void {
|
|
5
|
+
const current = col.tags[DG.TAGS.UNITS];
|
|
6
|
+
//TODO: read all notations
|
|
7
|
+
const notations = ['fasta:SEQ:NT', 'fasta:SEQ:PT', 'fasta:SEQ.MSA:NT', 'fasta:SEQ.MSA:PT', 'HELM'];
|
|
8
|
+
const choices = ui.choiceInput('convert to', '', notations.filter((e) => e !== current));
|
|
9
|
+
|
|
10
|
+
ui.dialog('Convert sequence')
|
|
11
|
+
.add(
|
|
12
|
+
ui.div([
|
|
13
|
+
ui.h1('current notation'),
|
|
14
|
+
ui.div(current),
|
|
15
|
+
choices.root
|
|
16
|
+
])
|
|
17
|
+
)
|
|
18
|
+
.onOK(() => {
|
|
19
|
+
//TODO: create new converted column
|
|
20
|
+
//col.dataFrame.columns.add();
|
|
21
|
+
})
|
|
22
|
+
.show();
|
|
23
|
+
}
|