@datagrok/bio 1.2.1 → 1.4.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/src/package.ts CHANGED
@@ -2,12 +2,18 @@
2
2
  import * as grok from 'datagrok-api/grok';
3
3
  import * as ui from 'datagrok-api/ui';
4
4
  import * as DG from 'datagrok-api/dg';
5
+
5
6
  import {SequenceAlignment, Aligned} from './seq_align';
6
7
 
7
8
  export const _package = new DG.Package();
8
9
 
9
10
  import {WebLogo} from '@datagrok-libraries/bio/src/viewers/web-logo';
10
11
  import {VdRegionsViewer} from './viewers/vd-regions-viewer';
12
+ import {runKalign, testMSAEnoughMemory} from './utils/multiple-sequence-alignment';
13
+ import {TableView} from 'datagrok-api/dg';
14
+ import {mmSemType} from './const';
15
+ import {Nucleotides} from '@datagrok-libraries/bio/src/nucleotides';
16
+ import {Aminoacids} from '@datagrok-libraries/bio/src/aminoacids';
11
17
 
12
18
  //name: sequenceAlignment
13
19
  //input: string alignType {choices: ['Local alignment', 'Global alignment']}
@@ -38,3 +44,97 @@ export function webLogoViewer() {
38
44
  export function vdRegionViewer() {
39
45
  return new VdRegionsViewer();
40
46
  }
47
+
48
+ //top-menu: Bio | Activity Cliffs...
49
+ //name: Activity Cliffs
50
+ //description: detect activity cliffs
51
+ //input: dataframe df [Input data table]
52
+ //input: column smiles {type:categorical; semType: Macromolecule}
53
+ //input: column activities
54
+ //input: double similarity = 80 [Similarity cutoff]
55
+ //input: string methodName { choices:["UMAP", "t-SNE", "SPE"] }
56
+ export async function activityCliffs(df: DG.DataFrame, smiles: DG.Column, activities: DG.Column,
57
+ similarity: number, methodName: string): Promise<void> {
58
+ }
59
+
60
+ //top-menu: Bio | Sequence Space...
61
+ //name: Sequence Space
62
+ //input: dataframe table
63
+ //input: column smiles { semType: Macromolecule }
64
+ //input: string methodName { choices:["UMAP", "t-SNE", "SPE", "pSPE", "OriginalSPE"] }
65
+ //input: string similarityMetric { choices:["Tanimoto", "Asymmetric", "Cosine", "Sokal"] }
66
+ //input: bool plotEmbeddings = true
67
+ export async function chemSpaceTopMenu(table: DG.DataFrame, smiles: DG.Column, methodName: string,
68
+ similarityMetric: string = 'Tanimoto', plotEmbeddings: boolean): Promise<void> {
69
+ };
70
+
71
+ //top-menu: Bio | MSA...
72
+ //name: MSA
73
+ //input: dataframe table
74
+ //input: column sequence { semType: Macromolecule }
75
+ export async function multipleSequenceAlignmentAny(table: DG.DataFrame, col: DG.Column): Promise<void> {
76
+ const msaCol = await runKalign(col, false);
77
+ table.columns.add(msaCol);
78
+ }
79
+
80
+ //name: Composition Analysis
81
+ //top-menu: Bio | Composition Analysis
82
+ //output: viewer result
83
+ export async function compositionAnalysis(): Promise<void> {
84
+ const col = grok.shell.t.columns.bySemType('Macromolecule');//DG.SEMTYPE.MACROMOLECULE);
85
+ if (col === null) {
86
+ grok.shell.error('Current table does not contain sequences');
87
+ return;
88
+ }
89
+
90
+ const wl = await col.dataFrame.plot.fromType('WebLogo', {});
91
+
92
+ for (const v of grok.shell.views) {
93
+ if (v instanceof TableView && (v as DG.TableView).dataFrame.name === col.dataFrame.name) {
94
+ (v as DG.TableView).dockManager.dock(wl.root, 'down');
95
+ break;
96
+ }
97
+ }
98
+ }
99
+
100
+ //name: importFasta
101
+ //description: Opens FASTA file
102
+ //tags: file-handler
103
+ //meta.ext: fasta, fna, ffn, faa, frn, fa
104
+ //input: string content
105
+ //output: list tables
106
+ export function importFasta(content: string): DG.DataFrame [] {
107
+ const regex = /^>(.*)$/gm;
108
+ const descriptions = [];
109
+ const sequences = [];
110
+ let index = 0;
111
+ let match;
112
+ while (match = regex.exec(content)) {
113
+ descriptions.push(content.substring(match.index + 1, regex.lastIndex));
114
+ if (index !== 0)
115
+ sequences.push(content.substring(index, regex.lastIndex));
116
+ index = regex.lastIndex + 1;
117
+ }
118
+ sequences.push(content.substring(index));
119
+ const descriptionsCol = DG.Column.fromStrings('description', descriptions);
120
+ const sequenceCol = DG.Column.fromStrings('sequence', sequences);
121
+
122
+ const stats: { freq: { [m: string]: number }, sameLength: boolean } = WebLogo.getStats(sequenceCol, 5, WebLogo.splitterAsFasta);
123
+ const seqType = stats.sameLength ? 'SEQ.MSA' : 'SEQ';
124
+ const alphabetCandidates: [string, Set<string>][] = [
125
+ ['NT', new Set(Object.keys(Nucleotides.Names)),],
126
+ ['PT', new Set(Object.keys(Aminoacids.Names)),],
127
+ ];
128
+ // Calculate likelihoods for alphabet_candidates
129
+ const alphabetCandidatesSim: number[] = alphabetCandidates.map(
130
+ (c) => WebLogo.getAlphabetSimilarity(stats.freq, c[1]));
131
+ const maxCos = Math.max(...alphabetCandidatesSim);
132
+ const alphabet = maxCos > 0.65 ? alphabetCandidates[alphabetCandidatesSim.indexOf(maxCos)][0] : 'UN';
133
+ sequenceCol.semType = mmSemType;
134
+ sequenceCol.setTag(DG.TAGS.UNITS, `fasta:${seqType}:${alphabet}`);
135
+
136
+ return [DG.DataFrame.fromColumns([
137
+ descriptionsCol,
138
+ sequenceCol,
139
+ ])];
140
+ }
@@ -7,6 +7,6 @@ import * as DG from 'datagrok-api/dg';
7
7
  import {_testPaletteN, _testPaletteAA} from '@datagrok-libraries/bio/src/tests/palettes.test';
8
8
 
9
9
  category('Palettes', () => {
10
- test('testPaletteN', async () => { _testPaletteN(); });
11
- test('testPaletteAA', async () => { _testPaletteAA(); });
10
+ test('testPaletteN', async () => { await _testPaletteN(); });
11
+ test('testPaletteAA', async () => { await _testPaletteAA(); });
12
12
  });
@@ -0,0 +1,130 @@
1
+ import {after, before, category, test, expect, expectObject} from '@datagrok-libraries/utils/src/test';
2
+
3
+ import * as grok from 'datagrok-api/grok';
4
+ import * as ui from 'datagrok-api/ui';
5
+ import * as DG from 'datagrok-api/dg';
6
+
7
+ import {Nucleotides, NucleotidesPalettes} from '@datagrok-libraries/bio/src/nucleotides';
8
+ import {Aminoacids, AminoacidsPalettes} from '@datagrok-libraries/bio/src/aminoacids';
9
+ import {WebLogo} from '@datagrok-libraries/bio/src/viewers/web-logo';
10
+ import {SeqPalette} from '@datagrok-libraries/bio/src/seq-palettes';
11
+ import {UnknownSeqPalette} from '@datagrok-libraries/bio/src/unknown';
12
+
13
+ category('WebLogo', () => {
14
+ const csvDfN1: string = `seq
15
+ ACGTCT
16
+ CAGTGT
17
+ TTCAAC
18
+ `;
19
+
20
+ /** 2 - is an error monomer
21
+ * This sequence set should be classified as nucleotides sequences.
22
+ * Small error, not similar to amino acids.
23
+ */
24
+ const csvDfN1e: string = `seq
25
+ ACGTAT
26
+ CAGTTG
27
+ TTCG2C
28
+ `;
29
+
30
+ /** Pure amino acids sequence */
31
+ const csvDfAA1: string = `seq
32
+ FWPHEYV
33
+ YNRQWYV
34
+ MKPSEYV
35
+ `;
36
+
37
+ /** A - alanine, G - glycine, T -= threonine, C - cysteine, W - tryptophan
38
+ * This sequence set should be detected as amino acids more than nucleotides.
39
+ */
40
+ const csvDfAA2: string = `seq
41
+ AGTCAT
42
+ AGTCGC
43
+ AGTCATW
44
+ `;
45
+
46
+ /** This sequence set should be recognized as unknown. */
47
+ const csvDfX: string = `seq
48
+ XZJ{}2
49
+ 5Z4733
50
+ 3Z6></
51
+ 675687
52
+ `;
53
+
54
+ // anonymous functions specified in test() registering must return Promise<any>
55
+ test('testGetStats', async () => { await _testGetStats(csvDfN1); });
56
+ test('testGetAlphabetSimilarity', async () => { await _testGetAlphabetSimilarity(); });
57
+
58
+ test('testPickupPaletteN1', async () => { await _testPickupPaletteN1(csvDfN1); });
59
+ test('testPickupPaletteN1e', async () => { await _testPickupPaletteN1e(csvDfN1e); });
60
+ test('testPickupPaletteAA1', async () => { await _testPickupPaletteAA1(csvDfAA1); });
61
+ test('testPickupPaletteX', async () => { await _testPickupPaletteX(csvDfX); });
62
+ });
63
+
64
+
65
+ export async function _testGetStats(csvDfN1: string) {
66
+ const dfN1: DG.DataFrame = DG.DataFrame.fromCsv(csvDfN1);
67
+ const seqCol: DG.Column = dfN1.col('seq')!;
68
+ const stats = WebLogo.getStats(seqCol, 5, WebLogo.splitterAsFasta);
69
+
70
+ expectObject(stats.freq, {
71
+ 'A': 4,
72
+ 'C': 5,
73
+ 'G': 3,
74
+ 'T': 6
75
+ });
76
+ expect(stats.sameLength, true);
77
+ }
78
+
79
+ export async function _testGetAlphabetSimilarity() {
80
+ const freq: { [m: string]: number } = {
81
+ 'A': 2041,
82
+ 'C': 3015,
83
+ 'G': 3015,
84
+ 'T': 2048,
85
+ '-': 1000
86
+ };
87
+ const alphabet: Set<string> = new Set(Object.keys(Nucleotides.Names));
88
+ const res = WebLogo.getAlphabetSimilarity(freq, alphabet);
89
+
90
+ expect(res > 0.6, true);
91
+ }
92
+
93
+ export async function _testPickupPaletteN1(csvDfN1: string) {
94
+ const df: DG.DataFrame = DG.DataFrame.fromCsv(csvDfN1);
95
+ const col: DG.Column = df.col('seq')!;
96
+ const cp = WebLogo.pickUpPalette(col);
97
+
98
+ expect(cp instanceof NucleotidesPalettes, true);
99
+ }
100
+
101
+ export async function _testPickupPaletteN1e(csvDfN1e: string) {
102
+ const df: DG.DataFrame = DG.DataFrame.fromCsv(csvDfN1e);
103
+ const col: DG.Column = df.col('seq')!;
104
+ const cp = WebLogo.pickUpPalette(col);
105
+
106
+ expect(cp instanceof NucleotidesPalettes, true);
107
+ }
108
+
109
+ export async function _testPickupPaletteAA1(csvDfAA1: string) {
110
+ const df: DG.DataFrame = DG.DataFrame.fromCsv(csvDfAA1);
111
+ const col: DG.Column = df.col('seq')!;
112
+ const cp = WebLogo.pickUpPalette(col);
113
+
114
+ expect(cp instanceof AminoacidsPalettes, true);
115
+ }
116
+
117
+ export async function _testPickupPaletteX(csvDfX: string) {
118
+ const df: DG.DataFrame = DG.DataFrame.fromCsv(csvDfX);
119
+ const col: DG.Column = df.col('seq')!;
120
+ const cp = WebLogo.pickUpPalette(col);
121
+
122
+ expect(cp instanceof UnknownSeqPalette, true);
123
+ }
124
+
125
+ export async function _testPickupPaletteAA2(dfAA2: DG.DataFrame) {
126
+ const seqCol: DG.Column = dfAA2.col('seq')!;
127
+ const cp = WebLogo.pickUpPalette(seqCol);
128
+
129
+ expect(cp instanceof AminoacidsPalettes, true);
130
+ }
@@ -0,0 +1,213 @@
1
+ import {after, before, category, test, expect, expectObject} from '@datagrok-libraries/utils/src/test';
2
+
3
+ import * as grok from 'datagrok-api/grok';
4
+ import * as ui from 'datagrok-api/ui';
5
+ import * as DG from 'datagrok-api/dg';
6
+
7
+ import {mmSemType} from '../const';
8
+ import {importFasta} from '../package';
9
+
10
+ category('detectors', () => {
11
+ const csvDf1: string = `col1
12
+ 1
13
+ 2
14
+ 3`;
15
+
16
+ const csvDf2: string = `col1
17
+ 4
18
+ 5
19
+ 6
20
+ 7`;
21
+
22
+ const csvDf3: string = `col1
23
+ 8
24
+ 9
25
+ 10
26
+ 11
27
+ 12`;
28
+
29
+ const csvDfSmiles: string = `col1
30
+ CCCCN1C(=O)CN=C(c2cc(F)ccc12)C3CCCCC3
31
+ C1CCCCC1
32
+ CCCCCC
33
+ `;
34
+
35
+ const csvDfN1: string = `seq
36
+ ACGTC
37
+ CAGTGT
38
+ TTCAAC
39
+ `;
40
+
41
+ /** Pure amino acids sequence */
42
+ const csvDfAA1: string = `seq
43
+ FWPHEY
44
+ YNRQWYV
45
+ MKPSEYV
46
+ `;
47
+
48
+ const csvDfSepNt: string = `seq
49
+ A*C*G*T*C
50
+ C*A*G*T*G*T
51
+ T*T*C*A*A*C
52
+ `;
53
+
54
+ const csvDfSepPt: string = `seq
55
+ F-W-P-H-E-Y
56
+ Y-N-R-Q-W-Y-V
57
+ M-K-P-S-E-Y-V
58
+ `;
59
+
60
+ const csvDfSepUn1: string = `seq
61
+ abc-dfgg-abc1-cfr3-rty-wert
62
+ rut12-her2-rty-wert-abc-abc1-dfgg
63
+ rut12-rty-her2-abc-cfr3-wert-rut12
64
+ `;
65
+
66
+ const csvDfSepUn2: string = `seq
67
+ abc/dfgg/abc1/cfr3/rty/wert
68
+ rut12/her2/rty/wert//abc/abc1/dfgg
69
+ rut12/rty/her2/abc/cfr3//wert/rut12
70
+ `;
71
+
72
+ const csvDfSepMsaN1: string = `seq
73
+ A-C--G-T--C-T
74
+ C-A-C--T--G-T
75
+ A-C-C-G-T-A-C-T
76
+ `;
77
+
78
+ const csvDfMsaN1: string = `seq
79
+ AC-GT-CT
80
+ CAC-T-GT
81
+ ACCGTACT
82
+ `;
83
+
84
+ const csvDfMsaAA1: string = `seq
85
+ FWR-WYV-KHP
86
+ YNR-WYV-KHP
87
+ MWRSWY-CKHP
88
+ `;
89
+
90
+ test('testDetectorsNegative1', async () => { await _testDetectorsNegative(csvDf1); });
91
+ test('testDetectorsNegative2', async () => { await _testDetectorsNegative(csvDf2); });
92
+ test('testDetectorsNegative3', async () => { await _testDetectorsNegative(csvDf3); });
93
+ test('testDetectorsNegativeSmiles', async () => { await _testDetectorsNegative(csvDfSmiles); });
94
+
95
+ test('testDetectorsN1', async () => { await _testDetectorsN1(csvDfN1); });
96
+ test('testDetectorsAA1', async () => { await _testDetectorsAA1(csvDfAA1); });
97
+ test('testDetectorsMsaN1', async () => { await _testDetectorsMsaN1(csvDfMsaN1); });
98
+ test('testDetectorsMsaAA1', async () => { await _testDetectorsMsaAA1(csvDfMsaAA1); });
99
+
100
+ test('testDetectorsSepNt', async () => { await _testDetectorsSepNt(csvDfSepNt, '*'); });
101
+ test('testDetectorsSepPt', async () => { await _testDetectorsSepPt(csvDfSepPt, '-'); });
102
+ test('testDetectorsSepUn1', async () => { await _testDetectorsSepUn(csvDfSepUn1, '-'); });
103
+ test('testDetectorsSepUn2', async () => { await _testDetectorsSepUn(csvDfSepUn2, '/'); });
104
+
105
+ test('testDetectorsSepMsaN1', async () => { await _testDetectorsSepMsaN1(csvDfSepMsaN1); });
106
+
107
+ test('testDetectorsSamplesFastaCsvPt', async () => { await _testDetectorsSamplesFastaCsvPt(); });
108
+ test('testDetectorsSamplesFastaFastaPt', async () => { await _testDetectorsSamplesFastaFastaPt(); });
109
+ });
110
+
111
+ export async function _testDetectorsNegative(csvDf: string) {
112
+ const df: DG.DataFrame = DG.DataFrame.fromCsv(csvDf);
113
+ await grok.data.detectSemanticTypes(df);
114
+
115
+ const col1: DG.Column = df.col('col1')!;
116
+ expect(col1.semType == mmSemType, false);
117
+ }
118
+
119
+ export async function _testDetectorsN1(csvDfN1: string) {
120
+ const dfN1: DG.DataFrame = DG.DataFrame.fromCsv(csvDfN1);
121
+ await grok.data.detectSemanticTypes(dfN1);
122
+
123
+ const col: DG.Column = dfN1.col('seq')!;
124
+ expect(col.semType, mmSemType);
125
+ expect(col.getTag(DG.TAGS.UNITS), 'fasta:SEQ:NT');
126
+ }
127
+
128
+ export async function _testDetectorsAA1(csvDfAA1: string) {
129
+ const dfAA1: DG.DataFrame = DG.DataFrame.fromCsv(csvDfAA1);
130
+ await grok.data.detectSemanticTypes(dfAA1);
131
+
132
+ const col: DG.Column = dfAA1.col('seq')!;
133
+ expect(col.semType, mmSemType);
134
+ expect(col.getTag(DG.TAGS.UNITS), 'fasta:SEQ:PT');
135
+ }
136
+
137
+ export async function _testDetectorsMsaN1(csvDfMsaN1: string) {
138
+ const dfMsaN1: DG.DataFrame = DG.DataFrame.fromCsv(csvDfMsaN1);
139
+ await grok.data.detectSemanticTypes(dfMsaN1);
140
+
141
+ const col: DG.Column = dfMsaN1.col('seq')!;
142
+ expect(col.semType, mmSemType);
143
+ expect(col.getTag(DG.TAGS.UNITS), 'fasta:SEQ.MSA:NT');
144
+ }
145
+
146
+ export async function _testDetectorsMsaAA1(csvDfMsaAA1: string) {
147
+ const dfMsaAA1: DG.DataFrame = DG.DataFrame.fromCsv(csvDfMsaAA1);
148
+ await grok.data.detectSemanticTypes(dfMsaAA1);
149
+
150
+ const col: DG.Column = dfMsaAA1.col('seq')!;
151
+ expect(col.semType, mmSemType);
152
+ expect(col.getTag(DG.TAGS.UNITS), 'fasta:SEQ.MSA:PT');
153
+ }
154
+
155
+ export async function _testDetectorsSepNt(csv: string, separator: string) {
156
+ const df: DG.DataFrame = DG.DataFrame.fromCsv(csv);
157
+ await grok.data.detectSemanticTypes(df);
158
+
159
+ const col: DG.Column = df.col('seq')!;
160
+ expect(col.semType, mmSemType);
161
+ expect(col.getTag(DG.TAGS.UNITS), 'separator:SEQ:NT');
162
+ expect(col.getTag('separator'), separator);
163
+ }
164
+
165
+ export async function _testDetectorsSepPt(csv: string, separator: string) {
166
+ const df: DG.DataFrame = DG.DataFrame.fromCsv(csv);
167
+ await grok.data.detectSemanticTypes(df);
168
+
169
+ const col: DG.Column = df.col('seq')!;
170
+ expect(col.semType, mmSemType);
171
+ expect(col.getTag(DG.TAGS.UNITS), 'separator:SEQ:PT');
172
+ expect(col.getTag('separator'), separator);
173
+ }
174
+
175
+ export async function _testDetectorsSepUn(csv: string, separator: string) {
176
+ const df: DG.DataFrame = DG.DataFrame.fromCsv(csv);
177
+ await grok.data.detectSemanticTypes(df);
178
+
179
+ const col: DG.Column = df.col('seq')!;
180
+ expect(col.semType, mmSemType);
181
+ expect(col.getTag(DG.TAGS.UNITS), 'separator:SEQ:UN');
182
+ expect(col.getTag('separator'), separator);
183
+ }
184
+
185
+ export async function _testDetectorsSepMsaN1(csvDfSepMsaN1: string) {
186
+ const dfSepMsaN1: DG.DataFrame = DG.DataFrame.fromCsv(csvDfSepMsaN1);
187
+ await grok.data.detectSemanticTypes(dfSepMsaN1);
188
+
189
+ const col: DG.Column = dfSepMsaN1.col('seq')!;
190
+ expect(col.semType, mmSemType);
191
+ expect(col.getTag(DG.TAGS.UNITS), 'separator:SEQ.MSA:NT');
192
+ }
193
+
194
+ export async function _testDetectorsSamplesFastaCsvPt() {
195
+ const csv: string = await grok.dapi.files.readAsText('System:AppData/Bio/samples/sample_FASTA.csv');
196
+ const df: DG.DataFrame = DG.DataFrame.fromCsv(csv);
197
+ await grok.data.detectSemanticTypes(df);
198
+
199
+ const col: DG.Column = df.col('sequence')!;
200
+ expect(col.semType, mmSemType);
201
+ expect(col.getTag(DG.TAGS.UNITS), 'fasta:SEQ:PT');
202
+ expect(col.getTag('separator'), null);
203
+ }
204
+
205
+ export async function _testDetectorsSamplesFastaFastaPt() {
206
+ const fasta: string = await grok.dapi.files.readAsText('System:AppData/Bio/samples/sample_FASTA.fasta');
207
+ const df: DG.DataFrame = importFasta(fasta)[0];
208
+
209
+ const col: DG.Column = df.col('sequence')!;
210
+ expect(col.semType, mmSemType);
211
+ expect(col.getTag(DG.TAGS.UNITS), 'fasta:SEQ:PT');
212
+ expect(col.getTag('separator'), null);
213
+ }
@@ -0,0 +1,34 @@
1
+ import {category, test} from '@datagrok-libraries/utils/src/test';
2
+ import {
3
+ _testMSAIsCorrect,
4
+ _testTableIsNotEmpty,
5
+ } from './utils';
6
+
7
+ import * as DG from 'datagrok-api/dg';
8
+ //import * as grok from 'datagrok-api/grok';
9
+
10
+ export const _package = new DG.Package();
11
+
12
+
13
+ category('MSA', async () => {
14
+ //table = await grok.data.files.openTable('Demo:Files/bio/peptides.csv');
15
+ const fromCsv = `seq
16
+ FWRWYVKHP
17
+ YNRWYVKHP
18
+ MWRSWYCKHP`;
19
+ const toCsv = `seq
20
+ -F-W-R--W-Y-V-K-H-P
21
+ -Y-N-R--W-Y-V-K-H-P
22
+ -M-W-R-S-W-Y-C-K-H-P`;
23
+ const table: DG.DataFrame = DG.DataFrame.fromCsv(fromCsv);
24
+ const toTable: DG.DataFrame = DG.DataFrame.fromCsv(toCsv);
25
+ const alignedSequencesColumn = toTable.getCol('seq');
26
+
27
+ test('test_table.is_not_empty', async () => {
28
+ await _testTableIsNotEmpty(table);
29
+ });
30
+
31
+ test('is_correct', async () => {
32
+ await _testMSAIsCorrect(alignedSequencesColumn);
33
+ });
34
+ });
@@ -0,0 +1,26 @@
1
+ import * as DG from 'datagrok-api/dg';
2
+
3
+ import {expect} from '@datagrok-libraries/utils/src/test';
4
+ import {runKalign} from '../utils/multiple-sequence-alignment';
5
+
6
+ /**
7
+ * Tests if a table has non zero rows and columns.
8
+ *
9
+ * @param {DG.DataFrame} table Target table.
10
+ */
11
+ export function _testTableIsNotEmpty(table: DG.DataFrame): void {
12
+ expect(table.columns.length > 0 && table.rowCount > 0, true);
13
+ }
14
+
15
+
16
+ /**
17
+ * Tests if MSA works and returns consistent result.
18
+ *
19
+ * @export
20
+ * @param {DG.Column} col Aligned sequences column.
21
+ */
22
+ export async function _testMSAIsCorrect(col: DG.Column): Promise<void> {
23
+ const msaCol = await runKalign(col, true);
24
+ expect(msaCol.toList().every((v, i) => (v == col.get(i) || v == null)), true);
25
+
26
+ }
@@ -0,0 +1,62 @@
1
+ export enum COLUMNS_NAMES {
2
+ SPLIT_COL = '~split',
3
+ ACTIVITY = '~activity',
4
+ ACTIVITY_SCALED = 'activity_scaled',
5
+ ALIGNED_SEQUENCE = '~aligned_sequence',
6
+ AMINO_ACID_RESIDUE = 'AAR',
7
+ POSITION = 'Pos',
8
+ P_VALUE = 'pValue',
9
+ MEAN_DIFFERENCE = 'Mean difference',
10
+ }
11
+
12
+ export enum CATEGORIES {
13
+ OTHER = 'Other',
14
+ ALL = 'All',
15
+ }
16
+
17
+ export enum TAGS {
18
+ AAR = 'AAR',
19
+ POSITION = 'Pos',
20
+ SEPARATOR = 'monomer-separator',
21
+ SELECTION = 'selection',
22
+ }
23
+
24
+ export enum SEM_TYPES {
25
+ AMINO_ACIDS = 'aminoAcids',
26
+ ALIGNED_SEQUENCE = 'alignedSequence',
27
+ ALIGNED_SEQUENCE_DIFFERENCE = 'alignedSequenceDifference',
28
+ ACTIVITY = 'activity',
29
+ ACTIVITY_SCALED = 'activityScaled',
30
+ Macro_Molecule = 'Macromolecule',
31
+ }
32
+
33
+ export const STATS = 'stats';
34
+
35
+ export const EMBEDDING_STATUS = 'embeddingStatus';
36
+
37
+ export const PEPTIDES_ANALYSIS = 'isPeptidesAnalysis';
38
+
39
+ export enum FLAGS {
40
+ CELL_CHANGING = 'isCellChanging',
41
+ }
42
+
43
+ export const aarGroups = {
44
+ 'R': 'PC', 'H': 'PC', 'K': 'PC',
45
+ 'D': 'NC', 'E': 'NC',
46
+ 'S': 'U', 'T': 'U', 'N': 'U', 'Q': 'U',
47
+ 'C': 'SC', 'U': 'SC', 'G': 'SC', 'P': 'SC',
48
+ 'A': 'H', 'V': 'H', 'I': 'H', 'L': 'H', 'M': 'H', 'F': 'H', 'Y': 'H', 'W': 'H',
49
+ '-': '-',
50
+ };
51
+
52
+ export const groupDescription: {[key: string]: {'description': string, aminoAcids: string[]}} = {
53
+ 'PC': {'description': 'Positive Amino Acids, with Electrically Charged Side Chains', 'aminoAcids': ['R', 'H', 'K']},
54
+ 'NC': {'description': 'Negative Amino Acids, with Electrically Charged Side Chains', 'aminoAcids': ['D', 'E']},
55
+ 'U': {'description': 'Amino Acids with Polar Uncharged Side Chains', 'aminoAcids': ['S', 'T', 'N', 'Q']},
56
+ 'SC': {'description': 'Special Cases', 'aminoAcids': ['C', 'U', 'G', 'P']},
57
+ 'H': {
58
+ 'description': 'Amino Acids with Hydrophobic Side Chain',
59
+ 'aminoAcids': ['A', 'V', 'I', 'L', 'M', 'F', 'Y', 'W'],
60
+ },
61
+ '-': {'description': 'Unknown Amino Acid', 'aminoAcids': ['-']},
62
+ };