@datagrok/bio 1.2.1 → 1.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/package.json CHANGED
@@ -2,7 +2,7 @@
2
2
  "name": "@datagrok/bio",
3
3
  "beta": false,
4
4
  "friendlyName": "Bio",
5
- "version": "1.2.1",
5
+ "version": "1.3.0",
6
6
  "description": "Bio is a [package](https://datagrok.ai/help/develop/develop#packages) for the [Datagrok](https://datagrok.ai) platform",
7
7
  "repository": {
8
8
  "type": "git",
@@ -10,7 +10,7 @@
10
10
  "directory": "packages/Bio"
11
11
  },
12
12
  "dependencies": {
13
- "@datagrok-libraries/bio": "^2.1.0",
13
+ "@datagrok-libraries/bio": "^2.1.1",
14
14
  "@datagrok-libraries/utils": "^0.4.2",
15
15
  "cash-dom": "latest",
16
16
  "datagrok-api": "^1.3.5",
@@ -2,17 +2,20 @@ import * as DG from 'datagrok-api/dg';
2
2
 
3
3
  import {runTests, tests} from '@datagrok-libraries/utils/src/test';
4
4
 
5
- import './tests/WebLogo.test';
5
+ import './tests/WebLogo-test';
6
+ import './tests/Palettes-test';
7
+ import './tests/detectors-test';
6
8
 
7
9
  export const _packageTest = new DG.Package();
8
10
  export {tests};
9
11
 
12
+ /** For the 'test' function argument names are fixed as 'category' and 'test' because of way it is called. */
10
13
  //name: test
11
14
  //input: string category {optional: true}
12
- //input: string t {optional: true}
15
+ //input: string test {optional: true}
13
16
  //output: dataframe result
14
17
  //top-menu: Tools | Dev | JS API Tests
15
- export async function test(category: string, t: string): Promise<DG.DataFrame> {
16
- const data = await runTests({category, test: t});
18
+ export async function test(category: string, test: string): Promise<DG.DataFrame> {
19
+ const data = await runTests({category, test});
17
20
  return DG.DataFrame.fromObjects(data)!;
18
21
  }
@@ -7,6 +7,6 @@ import * as DG from 'datagrok-api/dg';
7
7
  import {_testPaletteN, _testPaletteAA} from '@datagrok-libraries/bio/src/tests/palettes.test';
8
8
 
9
9
  category('Palettes', () => {
10
- test('testPaletteN', async () => { _testPaletteN(); });
11
- test('testPaletteAA', async () => { _testPaletteAA(); });
10
+ test('testPaletteN', async () => { await _testPaletteN(); });
11
+ test('testPaletteAA', async () => { await _testPaletteAA(); });
12
12
  });
@@ -0,0 +1,127 @@
1
+ import {after, before, category, test, expect, expectObject} from '@datagrok-libraries/utils/src/test';
2
+
3
+ import * as grok from 'datagrok-api/grok';
4
+ import * as ui from 'datagrok-api/ui';
5
+ import * as DG from 'datagrok-api/dg';
6
+
7
+ import {Nucleotides, NucleotidesPalettes} from '@datagrok-libraries/bio/src/nucleotides';
8
+ import {Aminoacids, AminoacidsPalettes} from '@datagrok-libraries/bio/src/aminoacids';
9
+ import {WebLogo} from '@datagrok-libraries/bio/src/viewers/web-logo';
10
+ import {SeqPalette} from '@datagrok-libraries/bio/src/seq-palettes';
11
+ import {UnknownSeqPalette} from '@datagrok-libraries/bio/src/unknown';
12
+
13
+ category('WebLogo', () => {
14
+ const csvDfN1: string = `seq
15
+ ACGTCT
16
+ CAGTGT
17
+ TTCAAC
18
+ `;
19
+
20
+ /** 2 - is an error monomer
21
+ * This sequence set should be classified as nucleotides sequences.
22
+ * Small error, not similar to amino acids.
23
+ */
24
+ const csvDfN1e: string = `seq
25
+ ACGTAT
26
+ CAGTTG
27
+ TTCG2C
28
+ `;
29
+
30
+ /** Pure amino acids sequence */
31
+ const csvDfAA1: string = `seq
32
+ FWPHEYV
33
+ YNRQWYV
34
+ MKPSEYV
35
+ `;
36
+
37
+ /** A - alanine, G - glycine, T -= threonine, C - cysteine, W - tryptophan
38
+ * This sequence set should be detected as amino acids more than nucleotides.
39
+ */
40
+ const csvDfAA2: string = `seq
41
+ AGTCAT
42
+ AGTCGC
43
+ AGTCATW
44
+ `;
45
+
46
+ /** This sequence set should be recognized as unknown. */
47
+ const csvDfX: string = `seq
48
+ XZJ{}2
49
+ 5Z4733
50
+ 3Z6></
51
+ 675687
52
+ `;
53
+
54
+ // anonymous functions specified in test() registering must return Promise<any>
55
+ test('testGetAlphabetSimilarity', async () => { await _testGetAlphabetSimilarity(); });
56
+
57
+ test('testPickupPaletteN1', async () => { await _testPickupPaletteN1(csvDfN1); });
58
+ test('testPickupPaletteN1e', async () => { await _testPickupPaletteN1e(csvDfN1e); });
59
+ test('testPickupPaletteAA1', async () => { await _testPickupPaletteAA1(csvDfAA1); });
60
+ test('testPickupPaletteX', async () => { await _testPickupPaletteX(csvDfX); });
61
+ });
62
+
63
+
64
+ export async function _testGetAlphabetFreqs(dfN1: DG.DataFrame) {
65
+ const seqCol: DG.Column = dfN1.col('seq')!;
66
+ const mFreq = WebLogo.getAlphabetFreqs(seqCol);
67
+
68
+ expectObject(mFreq, {
69
+ 'A': 4,
70
+ 'C': 5,
71
+ 'G': 3,
72
+ 'T': 6
73
+ });
74
+ }
75
+
76
+ export async function _testGetAlphabetSimilarity() {
77
+ const freq: { [m: string]: number } = {
78
+ 'A': 2041,
79
+ 'C': 3015,
80
+ 'G': 3015,
81
+ 'T': 2048,
82
+ '-': 1000
83
+ };
84
+ const alphabet: Set<string> = new Set(Object.keys(Nucleotides.Names));
85
+ const res = WebLogo.getAlphabetSimilarity(freq, alphabet);
86
+
87
+ expect(res > 0.6, true);
88
+ }
89
+
90
+ export async function _testPickupPaletteN1(csvDfN1: string) {
91
+ const df: DG.DataFrame = DG.DataFrame.fromCsv(csvDfN1);
92
+ const col: DG.Column = df.col('seq')!;
93
+ const cp = WebLogo.pickUpPalette(col);
94
+
95
+ expect(cp instanceof NucleotidesPalettes, true);
96
+ }
97
+
98
+ export async function _testPickupPaletteN1e(csvDfN1e: string) {
99
+ const df: DG.DataFrame = DG.DataFrame.fromCsv(csvDfN1e);
100
+ const col: DG.Column = df.col('seq')!;
101
+ const cp = WebLogo.pickUpPalette(col);
102
+
103
+ expect(cp instanceof NucleotidesPalettes, true);
104
+ }
105
+
106
+ export async function _testPickupPaletteAA1(csvDfAA1: string) {
107
+ const df: DG.DataFrame = DG.DataFrame.fromCsv(csvDfAA1);
108
+ const col: DG.Column = df.col('seq')!;
109
+ const cp = WebLogo.pickUpPalette(col);
110
+
111
+ expect(cp instanceof AminoacidsPalettes, true);
112
+ }
113
+
114
+ export async function _testPickupPaletteX(csvDfX: string) {
115
+ const df: DG.DataFrame = DG.DataFrame.fromCsv(csvDfX);
116
+ const col: DG.Column = df.col('seq')!;
117
+ const cp = WebLogo.pickUpPalette(col);
118
+
119
+ expect(cp instanceof UnknownSeqPalette, true);
120
+ }
121
+
122
+ export async function _testPickupPaletteAA2(dfAA2: DG.DataFrame) {
123
+ const seqCol: DG.Column = dfAA2.col('seq')!;
124
+ const cp = WebLogo.pickUpPalette(seqCol);
125
+
126
+ expect(cp instanceof AminoacidsPalettes, true);
127
+ }
@@ -0,0 +1,135 @@
1
+ import {after, before, category, test, expect, expectObject} from '@datagrok-libraries/utils/src/test';
2
+
3
+ import * as grok from 'datagrok-api/grok';
4
+ import * as ui from 'datagrok-api/ui';
5
+ import * as DG from 'datagrok-api/dg';
6
+
7
+ category('detectors', () => {
8
+ const csvDfN1: string = `seq
9
+ ACGTC
10
+ CAGTGT
11
+ TTCAAC
12
+ `;
13
+
14
+ /** Pure amino acids sequence */
15
+ const csvDfAA1: string = `seq
16
+ FWPHEY
17
+ YNRQWYV
18
+ MKPSEYV
19
+ `;
20
+
21
+ const csvDfSepNt: string = `seq
22
+ A*C*G*T*C
23
+ C*A*G*T*G*T
24
+ T*T*C*A*A*C
25
+ `;
26
+
27
+ const csvDfSepPt: string = `seq
28
+ F-W-P-H-E-Y
29
+ Y-N-R-Q-W-Y-V
30
+ M-K-P-S-E-Y-V
31
+ `;
32
+
33
+ const csvDfSepUn1: string = `seq
34
+ abc-dfgg-abc1-cfr3-rty-wert
35
+ rut12-her2-rty-wert-abc-abc1-dfgg
36
+ rut12-rty-her2-abc-cfr3-wert-rut12
37
+ `;
38
+
39
+ const csvDfSepUn2: string = `seq
40
+ abc/dfgg/abc1/cfr3/rty/wert
41
+ rut12/her2/rty/wert//abc/abc1/dfgg
42
+ rut12/rty/her2/abc/cfr3//wert/rut12
43
+ `;
44
+
45
+ const csvDfSepMsaN1: string = `seq
46
+ A-C--G-T--C-T
47
+ C-A-C--T--G-T
48
+ A-C-C-G-T-A-C-T
49
+ `;
50
+
51
+ const csvDfMsaN1: string = `seq
52
+ AC-GT-CT
53
+ CAC-T-GT
54
+ ACCGTACT
55
+ `;
56
+
57
+ const csvDfMsaAA1: string = `seq
58
+ FWR-WYV-KHP
59
+ YNR-WYV-KHP
60
+ MWRSWY-CKHP
61
+ `;
62
+
63
+ test('testDetectorsN1', async () => { await _testDetectorsN1(csvDfN1); });
64
+ test('testDetectorsAA1', async () => { await _testDetectorsAA1(csvDfAA1); });
65
+ test('testDetectorsMsaN1', async () => { await _testDetectorsMsaN1(csvDfMsaN1); });
66
+ test('testDetectorsMsaAA1', async () => { await _testDetectorsMsaAA1(csvDfMsaAA1); });
67
+
68
+ test('testDetectorsSepUn1', async () => { await _testDetectorsSepUn1(csvDfSepUn1); });
69
+ test('testDetectorsSepUn2', async () => { await _testDetectorsSepUn2(csvDfSepUn2); });
70
+
71
+ test('testDetectorsSepMsaN1', async () => { await _testDetectorsSepMsaN1(csvDfSepMsaN1); });
72
+ });
73
+
74
+ export async function _testDetectorsN1(csvDfN1: string) {
75
+ const dfN1: DG.DataFrame = DG.DataFrame.fromCsv(csvDfN1);
76
+ await grok.data.detectSemanticTypes(dfN1);
77
+
78
+ const col: DG.Column = dfN1.col('seq')!;
79
+ expect(col.semType, 'MACROMOLECULE');
80
+ expect(col.getTag(DG.TAGS.UNITS), 'fasta:SEQ:NT');
81
+ }
82
+
83
+ export async function _testDetectorsAA1(csvDfAA1: string) {
84
+ const dfAA1: DG.DataFrame = DG.DataFrame.fromCsv(csvDfAA1);
85
+ await grok.data.detectSemanticTypes(dfAA1);
86
+
87
+ const col: DG.Column = dfAA1.col('seq')!;
88
+ expect(col.semType, 'MACROMOLECULE');
89
+ expect(col.getTag(DG.TAGS.UNITS), 'fasta:SEQ:PT');
90
+ }
91
+
92
+ export async function _testDetectorsMsaN1(csvDfMsaN1: string) {
93
+ const dfMsaN1: DG.DataFrame = DG.DataFrame.fromCsv(csvDfMsaN1);
94
+ await grok.data.detectSemanticTypes(dfMsaN1);
95
+
96
+ const col: DG.Column = dfMsaN1.col('seq')!;
97
+ expect(col.semType, 'MACROMOLECULE');
98
+ expect(col.getTag(DG.TAGS.UNITS), 'fasta:SEQ.MSA:NT');
99
+ }
100
+
101
+ export async function _testDetectorsMsaAA1(csvDfMsaAA1: string) {
102
+ const dfMsaAA1: DG.DataFrame = DG.DataFrame.fromCsv(csvDfMsaAA1);
103
+ await grok.data.detectSemanticTypes(dfMsaAA1);
104
+
105
+ const col: DG.Column = dfMsaAA1.col('seq')!;
106
+ expect(col.semType, 'MACROMOLECULE');
107
+ expect(col.getTag(DG.TAGS.UNITS), 'fasta:SEQ.MSA:PT');
108
+ }
109
+
110
+ export async function _testDetectorsSepUn1(csvDfSepUn1: string) {
111
+ const dfSepUn1: DG.DataFrame = DG.DataFrame.fromCsv(csvDfSepUn1);
112
+ await grok.data.detectSemanticTypes(dfSepUn1);
113
+
114
+ const col: DG.Column = dfSepUn1.col('seq')!;
115
+ expect(col.semType, 'MACROMOLECULE');
116
+ expect(col.getTag(DG.TAGS.UNITS), 'separator:SEQ:UN');
117
+ }
118
+
119
+ export async function _testDetectorsSepUn2(csvDfSepUn2: string) {
120
+ const dfSepUn2: DG.DataFrame = DG.DataFrame.fromCsv(csvDfSepUn2);
121
+ await grok.data.detectSemanticTypes(dfSepUn2);
122
+
123
+ const col: DG.Column = dfSepUn2.col('seq')!;
124
+ expect(col.semType, 'MACROMOLECULE');
125
+ expect(col.getTag(DG.TAGS.UNITS), 'separator:SEQ:UN');
126
+ }
127
+
128
+ export async function _testDetectorsSepMsaN1(csvDfSepMsaN1: string) {
129
+ const dfSepMsaN1: DG.DataFrame = DG.DataFrame.fromCsv(csvDfSepMsaN1);
130
+ await grok.data.detectSemanticTypes(dfSepMsaN1);
131
+
132
+ const col: DG.Column = dfSepMsaN1.col('seq')!;
133
+ expect(col.semType, 'MACROMOLECULE');
134
+ expect(col.getTag(DG.TAGS.UNITS), 'separator:SEQ.MSA:NT');
135
+ }
@@ -1,4 +1,4 @@
1
- <html><head><meta charset="utf-8"/><title>Bio Test Report. Datagrok version datagrok/datagrok:latest SHA=3109311545e4. Commit 270baeb2.</title><style type="text/css">html,
1
+ <html><head><meta charset="utf-8"/><title>Bio Test Report. Datagrok version datagrok/datagrok:latest SHA=ea88b3c1054c. Commit d589d38a.</title><style type="text/css">html,
2
2
  body {
3
3
  font-family: Arial, Helvetica, sans-serif;
4
4
  font-size: 1rem;
@@ -229,7 +229,7 @@ header {
229
229
  font-size: 1rem;
230
230
  padding: 0 0.5rem;
231
231
  }
232
- </style></head><body><div id="jesthtml-content"><header><h1 id="title">Bio Test Report. Datagrok version datagrok/datagrok:latest SHA=3109311545e4. Commit 270baeb2.</h1></header><div id="metadata-container"><div id="timestamp">Started: 2022-06-23 09:29:26</div><div id="summary"><div id="suite-summary"><div class="summary-total">Suites (1)</div><div class="summary-passed">1 passed</div><div class="summary-failed summary-empty">0 failed</div><div class="summary-pending summary-empty">0 pending</div></div><div id="test-summary"><div class="summary-total">Tests (1)</div><div class="summary-passed">1 passed</div><div class="summary-failed summary-empty">0 failed</div><div class="summary-pending summary-empty">0 pending</div></div></div></div><div id="suite-1" class="suite-container"><div class="suite-info"><div class="suite-path">/home/runner/work/public/public/packages/Bio/src/__jest__/remote.test.ts</div><div class="suite-time warn">17.389s</div></div><div class="suite-tests"><div class="test-result passed"><div class="test-info"><div class="test-suitename"> </div><div class="test-title">TEST</div><div class="test-status">passed</div><div class="test-duration">6.514s</div></div></div></div><div class="suite-consolelog"><div class="suite-consolelog-header">Console Log</div><div class="suite-consolelog-item"><pre class="suite-consolelog-item-origin"> at Object.&lt;anonymous&gt; (/home/runner/work/public/public/packages/Bio/src/__jest__/test-node.ts:63:11)
232
+ </style></head><body><div id="jesthtml-content"><header><h1 id="title">Bio Test Report. Datagrok version datagrok/datagrok:latest SHA=ea88b3c1054c. Commit d589d38a.</h1></header><div id="metadata-container"><div id="timestamp">Started: 2022-06-28 11:54:32</div><div id="summary"><div id="suite-summary"><div class="summary-total">Suites (1)</div><div class="summary-passed">1 passed</div><div class="summary-failed summary-empty">0 failed</div><div class="summary-pending summary-empty">0 pending</div></div><div id="test-summary"><div class="summary-total">Tests (1)</div><div class="summary-passed">1 passed</div><div class="summary-failed summary-empty">0 failed</div><div class="summary-pending summary-empty">0 pending</div></div></div></div><div id="suite-1" class="suite-container"><div class="suite-info"><div class="suite-path">/home/runner/work/public/public/packages/Bio/src/__jest__/remote.test.ts</div><div class="suite-time warn">40.832s</div></div><div class="suite-tests"><div class="test-result passed"><div class="test-info"><div class="test-suitename"> </div><div class="test-title">TEST</div><div class="test-status">passed</div><div class="test-duration">28.874s</div></div></div></div><div class="suite-consolelog"><div class="suite-consolelog-header">Console Log</div><div class="suite-consolelog-item"><pre class="suite-consolelog-item-origin"> at Object.&lt;anonymous&gt; (/home/runner/work/public/public/packages/Bio/src/__jest__/test-node.ts:63:11)
233
233
  at Generator.next (&lt;anonymous&gt;)
234
234
  at fulfilled (/home/runner/work/public/public/packages/Bio/src/__jest__/test-node.ts:28:58)
235
235
  at processTicksAndRejections (internal/process/task_queues.js:97:5)</pre><pre class="suite-consolelog-item-message">Using web root: http://localhost:8080</pre></div><div class="suite-consolelog-item"><pre class="suite-consolelog-item-origin"> at /home/runner/work/public/public/packages/Bio/src/__jest__/remote.test.ts:24:11
@@ -1,132 +0,0 @@
1
- import {after, before, category, test, expect, expectObject} from '@datagrok-libraries/utils/src/test';
2
-
3
- import * as grok from 'datagrok-api/grok';
4
- import * as ui from 'datagrok-api/ui';
5
- import * as DG from 'datagrok-api/dg';
6
-
7
- import {Nucleotides, NucleotidesPalettes} from '@datagrok-libraries/bio/src/nucleotides';
8
- import {Aminoacids, AminoacidsPalettes} from '@datagrok-libraries/bio/src/aminoacids';
9
- import {WebLogo} from '@datagrok-libraries/bio/src/viewers/web-logo';
10
- import {SeqPalette} from '@datagrok-libraries/bio/src/seq-palettes';
11
- import {UnknownSeqPalette} from '@datagrok-libraries/bio/src/unknown';
12
-
13
- category('WebLogo', () => {
14
- test('testGetAlphabetSimilarity', async () => { _testGetAlphabetSimilarity(); });
15
-
16
- test('testPickupPaletteN1', async () => { _testPickupPaletteN1(); });
17
- test('testPickupPaletteAA1', async () => { _testPickupPaletteAA1(); });
18
-
19
- // dfAA2 is too similar to nucleotides
20
- // test('testPickupPaletteAA2', async () => {
21
- // _testPickupPaletteAA2();
22
- // });
23
- });
24
-
25
- const dfN1: DG.DataFrame = DG.DataFrame.fromCsv(
26
- `seq
27
- ACGTCT
28
- CAGTGT
29
- TTCAAC
30
- `);
31
-
32
- /** 2 - is an error monomer
33
- * This sequence set should be classified as nucleotides sequences.
34
- * Small error, not similar to amino acids.
35
- */
36
- const dfN1e: DG.DataFrame = DG.DataFrame.fromCsv(
37
- `seq
38
- ACGTAT
39
- CAGTTG
40
- TTCG2C
41
- `);
42
-
43
- /** Pure amino acids sequence */
44
- const dfAA1: DG.DataFrame = DG.DataFrame.fromCsv(
45
- `seq
46
- FWPHEYV
47
- YNRQWYV
48
- MKPSEYV
49
- `);
50
-
51
- /** A - alanine, G - glycine, T -= threonine, C - cysteine, W - tryptophan
52
- * This sequence set should be detected as amino acids more than nucleotides.
53
- */
54
- const dfAA2: DG.DataFrame = DG.DataFrame.fromCsv(
55
- `seq
56
- AGTCAT
57
- AGTCGC
58
- AGTCATW
59
- `);
60
-
61
- /** This sequence set should be recognized as unknown. */
62
- const dfX: DG.DataFrame = DG.DataFrame.fromCsv(
63
- `seq
64
- XZJ{}2
65
- 5Z4733
66
- 3Z6></
67
- 675687
68
- `);
69
-
70
- export function _testGetAlphabetFreqs() {
71
- const seqCol: DG.Column = dfN1.col('seq')!;
72
- const mFreq = WebLogo.getAlphabetFreqs(seqCol);
73
-
74
- expectObject(mFreq, {
75
- 'A': 4,
76
- 'C': 5,
77
- 'G': 3,
78
- 'T': 6
79
- });
80
- }
81
-
82
- export function _testGetAlphabetSimilarity() {
83
- const freq: { [m: string]: number } = {
84
- 'A': 2041,
85
- 'C': 3015,
86
- 'G': 3015,
87
- 'T': 2048,
88
- '-': 1000
89
- };
90
- const alphabet: Set<string> = new Set(Object.keys(Nucleotides.Names));
91
- const res = WebLogo.getAlphabetSimilarity(freq, alphabet);
92
-
93
- expect(res > 0.6, true);
94
- }
95
-
96
- export function _testPickupPaletteN1() {
97
- const seqCol: DG.Column = dfN1.col('seq')!;
98
- const cp = WebLogo.pickUpPalette(seqCol);
99
-
100
- expect(cp instanceof NucleotidesPalettes, true);
101
- }
102
-
103
- export function _testPickupPaletteAA1() {
104
- const seqCol: DG.Column = dfAA1.col('seq')!;
105
- const cp = WebLogo.pickUpPalette(seqCol);
106
-
107
- expect(cp instanceof AminoacidsPalettes, true);
108
- }
109
-
110
- export function _testPickupPaletteAA2() {
111
- const seqCol: DG.Column = dfAA2.col('seq')!;
112
- const cp = WebLogo.pickUpPalette(seqCol);
113
-
114
- expect(cp instanceof AminoacidsPalettes, true);
115
- }
116
-
117
- export function _testPickupPaletteAll() {
118
- const seqColN1: DG.Column = dfN1.col('seq')!;
119
- const seqColAA1: DG.Column = dfAA1.col('seq')!;
120
- const seqColAA2: DG.Column = dfAA2.col('seq')!;
121
- const seqColX: DG.Column = dfX.col('seq')!;
122
-
123
- const cpN1: SeqPalette = WebLogo.pickUpPalette(seqColN1);
124
- const cpAA1: SeqPalette = WebLogo.pickUpPalette(seqColAA1);
125
- const cpAA2: SeqPalette = WebLogo.pickUpPalette(seqColAA2);
126
- const cpX: SeqPalette = WebLogo.pickUpPalette(seqColX);
127
-
128
- expect(cpN1 instanceof NucleotidesPalettes, true);
129
- expect(cpAA1 instanceof AminoacidsPalettes, true);
130
- expect(cpAA2 instanceof AminoacidsPalettes, true);
131
- expect(cpX instanceof UnknownSeqPalette, true);
132
- }