@datagrok/bio 1.2.1 → 1.4.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,107 @@
1
+ import * as DG from 'datagrok-api/dg';
2
+
3
+ //@ts-ignore
4
+ import Aioli from '@biowasm/aioli';
5
+
6
+ import {AlignedSequenceEncoder} from '@datagrok-libraries/bio/src/sequence-encoder';
7
+ import * as C from './constants';
8
+
9
+ /**
10
+ * Converts array of sequences into simple fasta string.
11
+ *
12
+ * @param {string[]} sequences Input list of sequences.
13
+ * @return {string} Fasta-formatted string.
14
+ */
15
+ function _stringsToFasta(sequences: string[]): string {
16
+ return sequences.reduce((a, v, i) => a + `>sample${i + 1}\n${v}\n`, '');
17
+ }
18
+
19
+ /**
20
+ * Extracts array of sequences from simple fasta string.
21
+ *
22
+ * @param {string} fasta Fasta-formatted string.
23
+ * @return {string[]} Output list of sequences.
24
+ */
25
+ function _fastaToStrings(fasta: string): string[] {
26
+ return fasta.replace(/>sample\d+(\r\n|\r|\n)/g, '').split('\n');
27
+ }
28
+
29
+ /**
30
+ * Converts aligned sequence to semantic type format.
31
+ *
32
+ * @param {string} seq Source sequence.
33
+ * @return {string} Formatted sequence.
34
+ */
35
+ function _castAligned(seq: string): string {
36
+ let delimited = '';
37
+
38
+ for (const char of seq)
39
+ delimited += char == '-' ? char : `-${char}`;
40
+
41
+ return delimited;
42
+ }
43
+
44
+ /**
45
+ * Formats a batch of sequences to correspond the semantic type.
46
+ *
47
+ * @param {string[]} alignment List of aligned sequences.
48
+ * @return {string[]} Formatted sequences.
49
+ */
50
+ function _stringsToAligned(alignment: string[]): string[] {
51
+ const nItems = alignment.length;
52
+ const aligned = new Array<string>(nItems);
53
+
54
+ for (let i = 0; i < nItems; ++i)
55
+ aligned[i] = _castAligned(alignment[i]);
56
+
57
+ return aligned;
58
+ }
59
+
60
+ /**
61
+ * Runs Aioli environment with kalign tool.
62
+ *
63
+ * @param {DG.Column} col Column with sequences.
64
+ * @param {boolean} isAligned Whether the column is aligned.
65
+ * @return {Promise<DG.Column>} Aligned sequences.
66
+ */
67
+ export async function runKalign(col: DG.Column, isAligned = false) : Promise<DG.Column> {
68
+ let sequences = col.toList();
69
+
70
+ if (isAligned)
71
+ sequences = sequences.map((v: string, _) => AlignedSequenceEncoder.clean(v).replace(/\-/g, ''));
72
+
73
+ const fasta = _stringsToFasta(sequences);
74
+ const CLI = await new Aioli({
75
+ tool: 'kalign',
76
+ version: '3.3.1',
77
+ reinit: true,
78
+ });
79
+
80
+ console.log(['fasta.length =', fasta.length]);
81
+
82
+ await CLI.fs.writeFile('input.fa', fasta);
83
+ const output = await CLI.exec('kalign input.fa -f fasta -o result.fasta');
84
+ const buf = await CLI.cat('result.fasta');
85
+
86
+ console.warn(output);
87
+
88
+ const aligned = _fastaToStrings(buf).slice(0, sequences.length);
89
+ const alignedCol = DG.Column.fromStrings(`msa(${col.name})`, _stringsToAligned(aligned));
90
+ alignedCol.setTag(DG.TAGS.UNITS, '');
91
+ alignedCol.semType = C.SEM_TYPES.Macro_Molecule;
92
+ return alignedCol;
93
+ }
94
+
95
+ export async function testMSAEnoughMemory(col: DG.Column): Promise<void> {
96
+ const sequencesCount = col.length;
97
+ const delta = sequencesCount/100;
98
+
99
+ for (let i = delta; i < sequencesCount; i += delta) {
100
+ try {
101
+ await runKalign(DG.Column.fromStrings(col.name, col.toList().slice(0, Math.round(i))));
102
+ console.log(`runKalign succeeded on ${i}`);
103
+ } catch (error) {
104
+ console.log(`runKalign failed on ${i} with '${error}'`);
105
+ }
106
+ }
107
+ }
@@ -1,132 +0,0 @@
1
- import {after, before, category, test, expect, expectObject} from '@datagrok-libraries/utils/src/test';
2
-
3
- import * as grok from 'datagrok-api/grok';
4
- import * as ui from 'datagrok-api/ui';
5
- import * as DG from 'datagrok-api/dg';
6
-
7
- import {Nucleotides, NucleotidesPalettes} from '@datagrok-libraries/bio/src/nucleotides';
8
- import {Aminoacids, AminoacidsPalettes} from '@datagrok-libraries/bio/src/aminoacids';
9
- import {WebLogo} from '@datagrok-libraries/bio/src/viewers/web-logo';
10
- import {SeqPalette} from '@datagrok-libraries/bio/src/seq-palettes';
11
- import {UnknownSeqPalette} from '@datagrok-libraries/bio/src/unknown';
12
-
13
- category('WebLogo', () => {
14
- test('testGetAlphabetSimilarity', async () => { _testGetAlphabetSimilarity(); });
15
-
16
- test('testPickupPaletteN1', async () => { _testPickupPaletteN1(); });
17
- test('testPickupPaletteAA1', async () => { _testPickupPaletteAA1(); });
18
-
19
- // dfAA2 is too similar to nucleotides
20
- // test('testPickupPaletteAA2', async () => {
21
- // _testPickupPaletteAA2();
22
- // });
23
- });
24
-
25
- const dfN1: DG.DataFrame = DG.DataFrame.fromCsv(
26
- `seq
27
- ACGTCT
28
- CAGTGT
29
- TTCAAC
30
- `);
31
-
32
- /** 2 - is an error monomer
33
- * This sequence set should be classified as nucleotides sequences.
34
- * Small error, not similar to amino acids.
35
- */
36
- const dfN1e: DG.DataFrame = DG.DataFrame.fromCsv(
37
- `seq
38
- ACGTAT
39
- CAGTTG
40
- TTCG2C
41
- `);
42
-
43
- /** Pure amino acids sequence */
44
- const dfAA1: DG.DataFrame = DG.DataFrame.fromCsv(
45
- `seq
46
- FWPHEYV
47
- YNRQWYV
48
- MKPSEYV
49
- `);
50
-
51
- /** A - alanine, G - glycine, T -= threonine, C - cysteine, W - tryptophan
52
- * This sequence set should be detected as amino acids more than nucleotides.
53
- */
54
- const dfAA2: DG.DataFrame = DG.DataFrame.fromCsv(
55
- `seq
56
- AGTCAT
57
- AGTCGC
58
- AGTCATW
59
- `);
60
-
61
- /** This sequence set should be recognized as unknown. */
62
- const dfX: DG.DataFrame = DG.DataFrame.fromCsv(
63
- `seq
64
- XZJ{}2
65
- 5Z4733
66
- 3Z6></
67
- 675687
68
- `);
69
-
70
- export function _testGetAlphabetFreqs() {
71
- const seqCol: DG.Column = dfN1.col('seq')!;
72
- const mFreq = WebLogo.getAlphabetFreqs(seqCol);
73
-
74
- expectObject(mFreq, {
75
- 'A': 4,
76
- 'C': 5,
77
- 'G': 3,
78
- 'T': 6
79
- });
80
- }
81
-
82
- export function _testGetAlphabetSimilarity() {
83
- const freq: { [m: string]: number } = {
84
- 'A': 2041,
85
- 'C': 3015,
86
- 'G': 3015,
87
- 'T': 2048,
88
- '-': 1000
89
- };
90
- const alphabet: Set<string> = new Set(Object.keys(Nucleotides.Names));
91
- const res = WebLogo.getAlphabetSimilarity(freq, alphabet);
92
-
93
- expect(res > 0.6, true);
94
- }
95
-
96
- export function _testPickupPaletteN1() {
97
- const seqCol: DG.Column = dfN1.col('seq')!;
98
- const cp = WebLogo.pickUpPalette(seqCol);
99
-
100
- expect(cp instanceof NucleotidesPalettes, true);
101
- }
102
-
103
- export function _testPickupPaletteAA1() {
104
- const seqCol: DG.Column = dfAA1.col('seq')!;
105
- const cp = WebLogo.pickUpPalette(seqCol);
106
-
107
- expect(cp instanceof AminoacidsPalettes, true);
108
- }
109
-
110
- export function _testPickupPaletteAA2() {
111
- const seqCol: DG.Column = dfAA2.col('seq')!;
112
- const cp = WebLogo.pickUpPalette(seqCol);
113
-
114
- expect(cp instanceof AminoacidsPalettes, true);
115
- }
116
-
117
- export function _testPickupPaletteAll() {
118
- const seqColN1: DG.Column = dfN1.col('seq')!;
119
- const seqColAA1: DG.Column = dfAA1.col('seq')!;
120
- const seqColAA2: DG.Column = dfAA2.col('seq')!;
121
- const seqColX: DG.Column = dfX.col('seq')!;
122
-
123
- const cpN1: SeqPalette = WebLogo.pickUpPalette(seqColN1);
124
- const cpAA1: SeqPalette = WebLogo.pickUpPalette(seqColAA1);
125
- const cpAA2: SeqPalette = WebLogo.pickUpPalette(seqColAA2);
126
- const cpX: SeqPalette = WebLogo.pickUpPalette(seqColX);
127
-
128
- expect(cpN1 instanceof NucleotidesPalettes, true);
129
- expect(cpAA1 instanceof AminoacidsPalettes, true);
130
- expect(cpAA2 instanceof AminoacidsPalettes, true);
131
- expect(cpX instanceof UnknownSeqPalette, true);
132
- }
@@ -1,245 +0,0 @@
1
- <html><head><meta charset="utf-8"/><title>Bio Test Report. Datagrok version datagrok/datagrok:latest SHA=3109311545e4. Commit 270baeb2.</title><style type="text/css">html,
2
- body {
3
- font-family: Arial, Helvetica, sans-serif;
4
- font-size: 1rem;
5
- margin: 0;
6
- padding: 0;
7
- color: #333;
8
- }
9
- body {
10
- padding: 2rem 1rem;
11
- font-size: 0.85rem;
12
- }
13
- #jesthtml-content {
14
- margin: 0 auto;
15
- max-width: 70rem;
16
- }
17
- header {
18
- display: flex;
19
- align-items: center;
20
- }
21
- #title {
22
- margin: 0;
23
- flex-grow: 1;
24
- }
25
- #logo {
26
- height: 4rem;
27
- }
28
- #timestamp {
29
- color: #777;
30
- margin-top: 0.5rem;
31
- }
32
-
33
- /** SUMMARY */
34
- #summary {
35
- color: #333;
36
- margin: 2rem 0;
37
- display: flex;
38
- font-family: monospace;
39
- font-size: 1rem;
40
- }
41
- #summary > div {
42
- margin-right: 2rem;
43
- background: #eee;
44
- padding: 1rem;
45
- min-width: 15rem;
46
- }
47
- #summary > div:last-child {
48
- margin-right: 0;
49
- }
50
- @media only screen and (max-width: 720px) {
51
- #summary {
52
- flex-direction: column;
53
- }
54
- #summary > div {
55
- margin-right: 0;
56
- margin-top: 2rem;
57
- }
58
- #summary > div:first-child {
59
- margin-top: 0;
60
- }
61
- }
62
-
63
- .summary-total {
64
- font-weight: bold;
65
- margin-bottom: 0.5rem;
66
- }
67
- .summary-passed {
68
- color: #4f8a10;
69
- border-left: 0.4rem solid #4f8a10;
70
- padding-left: 0.5rem;
71
- }
72
- .summary-failed,
73
- .summary-obsolete-snapshots {
74
- color: #d8000c;
75
- border-left: 0.4rem solid #d8000c;
76
- padding-left: 0.5rem;
77
- }
78
- .summary-pending {
79
- color: #9f6000;
80
- border-left: 0.4rem solid #9f6000;
81
- padding-left: 0.5rem;
82
- }
83
- .summary-empty {
84
- color: #999;
85
- border-left: 0.4rem solid #999;
86
- }
87
-
88
- .test-result {
89
- padding: 1rem;
90
- margin-bottom: 0.25rem;
91
- }
92
- .test-result:last-child {
93
- border: 0;
94
- }
95
- .test-result.passed {
96
- background-color: #dff2bf;
97
- color: #4f8a10;
98
- }
99
- .test-result.failed {
100
- background-color: #ffbaba;
101
- color: #d8000c;
102
- }
103
- .test-result.pending {
104
- background-color: #ffdf61;
105
- color: #9f6000;
106
- }
107
-
108
- .test-info {
109
- display: flex;
110
- justify-content: space-between;
111
- }
112
- .test-suitename {
113
- width: 20%;
114
- text-align: left;
115
- font-weight: bold;
116
- word-break: break-word;
117
- }
118
- .test-title {
119
- width: 40%;
120
- text-align: left;
121
- font-style: italic;
122
- }
123
- .test-status {
124
- width: 20%;
125
- text-align: right;
126
- }
127
- .test-duration {
128
- width: 10%;
129
- text-align: right;
130
- font-size: 0.75rem;
131
- }
132
-
133
- .failureMessages {
134
- padding: 0 1rem;
135
- margin-top: 1rem;
136
- border-top: 1px dashed #d8000c;
137
- }
138
- .failureMessages.suiteFailure {
139
- border-top: none;
140
- }
141
- .failureMsg {
142
- white-space: pre-wrap;
143
- white-space: -moz-pre-wrap;
144
- white-space: -pre-wrap;
145
- white-space: -o-pre-wrap;
146
- word-wrap: break-word;
147
- }
148
-
149
- .suite-container {
150
- margin-bottom: 2rem;
151
- }
152
- .suite-info {
153
- padding: 1rem;
154
- background-color: #eee;
155
- color: #777;
156
- display: flex;
157
- align-items: center;
158
- margin-bottom: 0.25rem;
159
- }
160
- .suite-info .suite-path {
161
- word-break: break-all;
162
- flex-grow: 1;
163
- font-family: monospace;
164
- font-size: 1rem;
165
- }
166
- .suite-info .suite-time {
167
- margin-left: 0.5rem;
168
- padding: 0.2rem 0.3rem;
169
- font-size: 0.75rem;
170
- }
171
- .suite-info .suite-time.warn {
172
- background-color: #d8000c;
173
- color: #fff;
174
- }
175
-
176
- /* CONSOLE LOGS */
177
- .suite-consolelog {
178
- margin-bottom: 0.25rem;
179
- padding: 1rem;
180
- background-color: #efefef;
181
- }
182
- .suite-consolelog-header {
183
- font-weight: bold;
184
- }
185
- .suite-consolelog-item {
186
- padding: 0.5rem;
187
- }
188
- .suite-consolelog-item pre {
189
- margin: 0.5rem 0;
190
- white-space: pre-wrap;
191
- white-space: -moz-pre-wrap;
192
- white-space: -pre-wrap;
193
- white-space: -o-pre-wrap;
194
- word-wrap: break-word;
195
- }
196
- .suite-consolelog-item-origin {
197
- color: #777;
198
- font-weight: bold;
199
- }
200
- .suite-consolelog-item-message {
201
- color: #000;
202
- font-size: 1rem;
203
- padding: 0 0.5rem;
204
- }
205
-
206
- /* OBSOLETE SNAPSHOTS */
207
- .suite-obsolete-snapshots {
208
- margin-bottom: 0.25rem;
209
- padding: 1rem;
210
- background-color: #ffbaba;
211
- color: #d8000c;
212
- }
213
- .suite-obsolete-snapshots-header {
214
- font-weight: bold;
215
- }
216
- .suite-obsolete-snapshots-item {
217
- padding: 0.5rem;
218
- }
219
- .suite-obsolete-snapshots-item pre {
220
- margin: 0.5rem 0;
221
- white-space: pre-wrap;
222
- white-space: -moz-pre-wrap;
223
- white-space: -pre-wrap;
224
- white-space: -o-pre-wrap;
225
- word-wrap: break-word;
226
- }
227
- .suite-obsolete-snapshots-item-message {
228
- color: #000;
229
- font-size: 1rem;
230
- padding: 0 0.5rem;
231
- }
232
- </style></head><body><div id="jesthtml-content"><header><h1 id="title">Bio Test Report. Datagrok version datagrok/datagrok:latest SHA=3109311545e4. Commit 270baeb2.</h1></header><div id="metadata-container"><div id="timestamp">Started: 2022-06-23 09:29:26</div><div id="summary"><div id="suite-summary"><div class="summary-total">Suites (1)</div><div class="summary-passed">1 passed</div><div class="summary-failed summary-empty">0 failed</div><div class="summary-pending summary-empty">0 pending</div></div><div id="test-summary"><div class="summary-total">Tests (1)</div><div class="summary-passed">1 passed</div><div class="summary-failed summary-empty">0 failed</div><div class="summary-pending summary-empty">0 pending</div></div></div></div><div id="suite-1" class="suite-container"><div class="suite-info"><div class="suite-path">/home/runner/work/public/public/packages/Bio/src/__jest__/remote.test.ts</div><div class="suite-time warn">17.389s</div></div><div class="suite-tests"><div class="test-result passed"><div class="test-info"><div class="test-suitename"> </div><div class="test-title">TEST</div><div class="test-status">passed</div><div class="test-duration">6.514s</div></div></div></div><div class="suite-consolelog"><div class="suite-consolelog-header">Console Log</div><div class="suite-consolelog-item"><pre class="suite-consolelog-item-origin"> at Object.&lt;anonymous&gt; (/home/runner/work/public/public/packages/Bio/src/__jest__/test-node.ts:63:11)
233
- at Generator.next (&lt;anonymous&gt;)
234
- at fulfilled (/home/runner/work/public/public/packages/Bio/src/__jest__/test-node.ts:28:58)
235
- at processTicksAndRejections (internal/process/task_queues.js:97:5)</pre><pre class="suite-consolelog-item-message">Using web root: http://localhost:8080</pre></div><div class="suite-consolelog-item"><pre class="suite-consolelog-item-origin"> at /home/runner/work/public/public/packages/Bio/src/__jest__/remote.test.ts:24:11
236
- at Generator.next (&lt;anonymous&gt;)
237
- at /home/runner/work/public/public/packages/Bio/src/__jest__/remote.test.ts:34:71
238
- at new Promise (&lt;anonymous&gt;)
239
- at Object.&lt;anonymous&gt;.__awaiter (/home/runner/work/public/public/packages/Bio/src/__jest__/remote.test.ts:30:12)
240
- at Object.&lt;anonymous&gt; (/home/runner/work/public/public/packages/Bio/src/__jest__/remote.test.ts:22:23)
241
- at Promise.then.completed (/home/runner/work/public/public/packages/Bio/node_modules/jest-circus/build/utils.js:391:28)
242
- at new Promise (&lt;anonymous&gt;)</pre><pre class="suite-consolelog-item-message">Testing Bio package</pre></div><div class="suite-consolelog-item"><pre class="suite-consolelog-item-origin"> at /home/runner/work/public/public/packages/Bio/src/__jest__/remote.test.ts:47:11
243
- at Generator.next (&lt;anonymous&gt;)
244
- at fulfilled (/home/runner/work/public/public/packages/Bio/src/__jest__/remote.test.ts:31:58)
245
- at processTicksAndRejections (internal/process/task_queues.js:97:5)</pre><pre class="suite-consolelog-item-message"/></div></div></div></div></body></html>