@datagrok/bio 1.3.1 → 1.4.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,76 @@
1
+ import * as DG from 'datagrok-api/dg';
2
+
3
+ //@ts-ignore
4
+ import Aioli from '@biowasm/aioli';
5
+
6
+ import {AlignedSequenceEncoder} from '@datagrok-libraries/bio/src/sequence-encoder';
7
+ import * as C from './constants';
8
+
9
+ /**
10
+ * Converts array of sequences into simple fasta string.
11
+ *
12
+ * @param {string[]} sequences Input list of sequences.
13
+ * @return {string} Fasta-formatted string.
14
+ */
15
+ function _stringsToFasta(sequences: string[]): string {
16
+ return sequences.reduce((a, v, i) => a + `>sample${i + 1}\n${v}\n`, '');
17
+ }
18
+
19
+ /**
20
+ * Extracts array of sequences from simple fasta string.
21
+ *
22
+ * @param {string} fasta Fasta-formatted string.
23
+ * @return {string[]} Output list of sequences.
24
+ */
25
+ function _fastaToStrings(fasta: string): string[] {
26
+ return fasta.replace(/>sample\d+(\r\n|\r|\n)/g, '').split('\n');
27
+ }
28
+
29
+ /**
30
+ * Runs Aioli environment with kalign tool.
31
+ *
32
+ * @param {DG.Column} col Column with sequences.
33
+ * @param {boolean} isAligned Whether the column is aligned.
34
+ * @return {Promise<DG.Column>} Aligned sequences.
35
+ */
36
+ export async function runKalign(col: DG.Column, isAligned = false) : Promise<DG.Column> {
37
+ let sequences = col.toList();
38
+
39
+ if (isAligned)
40
+ sequences = sequences.map((v: string, _) => AlignedSequenceEncoder.clean(v).replace(/\-/g, ''));
41
+
42
+ const fasta = _stringsToFasta(sequences);
43
+ const CLI = await new Aioli({
44
+ tool: 'kalign',
45
+ version: '3.3.1',
46
+ reinit: true,
47
+ });
48
+
49
+ console.log(['fasta.length =', fasta.length]);
50
+
51
+ await CLI.fs.writeFile('input.fa', fasta);
52
+ const output = await CLI.exec('kalign input.fa -f fasta -o result.fasta');
53
+ const buf = await CLI.cat('result.fasta');
54
+
55
+ console.warn(output);
56
+
57
+ const aligned = _fastaToStrings(buf).slice(0, sequences.length);
58
+ const alignedCol = DG.Column.fromStrings(`msa(${col.name})`, aligned);
59
+ alignedCol.setTag(DG.TAGS.UNITS, '');
60
+ alignedCol.semType = C.SEM_TYPES.Macro_Molecule;
61
+ return alignedCol;
62
+ }
63
+
64
+ export async function testMSAEnoughMemory(col: DG.Column): Promise<void> {
65
+ const sequencesCount = col.length;
66
+ const delta = sequencesCount/100;
67
+
68
+ for (let i = delta; i < sequencesCount; i += delta) {
69
+ try {
70
+ await runKalign(DG.Column.fromStrings(col.name, col.toList().slice(0, Math.round(i))));
71
+ console.log(`runKalign succeeded on ${i}`);
72
+ } catch (error) {
73
+ console.log(`runKalign failed on ${i} with '${error}'`);
74
+ }
75
+ }
76
+ }
@@ -0,0 +1,43 @@
1
+ import * as DG from 'datagrok-api/dg';
2
+ import { AvailableMetrics } from '@datagrok-libraries/ml/src/typed-metrics';
3
+ import {reduceDimensinalityWithNormalization} from '@datagrok-libraries/ml/src/sequence-space';
4
+ import {BitArrayMetrics, StringMetrics} from '@datagrok-libraries/ml/src/typed-metrics';
5
+ import { Matrix } from '@datagrok-libraries/utils/src/type-declarations';
6
+ import BitArray from '@datagrok-libraries/utils/src/bit-array';
7
+
8
+ export interface ISequenceSpaceResult {
9
+ distance: Matrix;
10
+ coordinates: DG.ColumnList;
11
+ }
12
+
13
+ export async function sequenceSpace(molColumn: DG.Column, methodName: string, similarityMetric: string,
14
+ axes: string[], options?: any): Promise<ISequenceSpaceResult> {
15
+ let preparedData: any;
16
+ if (!(molColumn!.tags[DG.TAGS.UNITS] === 'HELM')) {
17
+ const sep = molColumn.getTag('separator');
18
+ const sepFinal = sep ? sep === '.' ? '\\\.' : sep: '-';
19
+ var regex = new RegExp(sepFinal, "g");
20
+ if (Object.keys(AvailableMetrics['String']).includes(similarityMetric)) {
21
+ preparedData = molColumn.toList().map((v) => v.replace(regex, '')) as string[];
22
+ } else {
23
+ preparedData = molColumn.toList().map((v) => v.replace(regex, '')) as string[];
24
+ }
25
+ } else {
26
+ preparedData = molColumn.toList();
27
+ }
28
+
29
+ const sequenceSpaceResult = await reduceDimensinalityWithNormalization(
30
+ preparedData,
31
+ methodName,
32
+ similarityMetric as StringMetrics|BitArrayMetrics,
33
+ options);
34
+ const cols: DG.Column[] = axes.map((name, index) => DG.Column.fromFloat32Array(name, sequenceSpaceResult.embedding[index]))
35
+ return {distance: sequenceSpaceResult.distance, coordinates: new DG.ColumnList(cols)};
36
+ }
37
+
38
+
39
+ export function getEmbeddingColsNames(df: DG.DataFrame){
40
+ const axes = ['Embed_X', 'Embed_Y'];
41
+ const colNameInd = df.columns.names().filter((it) => it.includes(axes[0])).length + 1;
42
+ return axes.map((it) => `${it}_${colNameInd}`);
43
+ }
@@ -1,245 +0,0 @@
1
- <html><head><meta charset="utf-8"/><title>Bio Test Report. Datagrok version datagrok/datagrok:latest SHA=69a4761f6044. Commit 51a4ab35.</title><style type="text/css">html,
2
- body {
3
- font-family: Arial, Helvetica, sans-serif;
4
- font-size: 1rem;
5
- margin: 0;
6
- padding: 0;
7
- color: #333;
8
- }
9
- body {
10
- padding: 2rem 1rem;
11
- font-size: 0.85rem;
12
- }
13
- #jesthtml-content {
14
- margin: 0 auto;
15
- max-width: 70rem;
16
- }
17
- header {
18
- display: flex;
19
- align-items: center;
20
- }
21
- #title {
22
- margin: 0;
23
- flex-grow: 1;
24
- }
25
- #logo {
26
- height: 4rem;
27
- }
28
- #timestamp {
29
- color: #777;
30
- margin-top: 0.5rem;
31
- }
32
-
33
- /** SUMMARY */
34
- #summary {
35
- color: #333;
36
- margin: 2rem 0;
37
- display: flex;
38
- font-family: monospace;
39
- font-size: 1rem;
40
- }
41
- #summary > div {
42
- margin-right: 2rem;
43
- background: #eee;
44
- padding: 1rem;
45
- min-width: 15rem;
46
- }
47
- #summary > div:last-child {
48
- margin-right: 0;
49
- }
50
- @media only screen and (max-width: 720px) {
51
- #summary {
52
- flex-direction: column;
53
- }
54
- #summary > div {
55
- margin-right: 0;
56
- margin-top: 2rem;
57
- }
58
- #summary > div:first-child {
59
- margin-top: 0;
60
- }
61
- }
62
-
63
- .summary-total {
64
- font-weight: bold;
65
- margin-bottom: 0.5rem;
66
- }
67
- .summary-passed {
68
- color: #4f8a10;
69
- border-left: 0.4rem solid #4f8a10;
70
- padding-left: 0.5rem;
71
- }
72
- .summary-failed,
73
- .summary-obsolete-snapshots {
74
- color: #d8000c;
75
- border-left: 0.4rem solid #d8000c;
76
- padding-left: 0.5rem;
77
- }
78
- .summary-pending {
79
- color: #9f6000;
80
- border-left: 0.4rem solid #9f6000;
81
- padding-left: 0.5rem;
82
- }
83
- .summary-empty {
84
- color: #999;
85
- border-left: 0.4rem solid #999;
86
- }
87
-
88
- .test-result {
89
- padding: 1rem;
90
- margin-bottom: 0.25rem;
91
- }
92
- .test-result:last-child {
93
- border: 0;
94
- }
95
- .test-result.passed {
96
- background-color: #dff2bf;
97
- color: #4f8a10;
98
- }
99
- .test-result.failed {
100
- background-color: #ffbaba;
101
- color: #d8000c;
102
- }
103
- .test-result.pending {
104
- background-color: #ffdf61;
105
- color: #9f6000;
106
- }
107
-
108
- .test-info {
109
- display: flex;
110
- justify-content: space-between;
111
- }
112
- .test-suitename {
113
- width: 20%;
114
- text-align: left;
115
- font-weight: bold;
116
- word-break: break-word;
117
- }
118
- .test-title {
119
- width: 40%;
120
- text-align: left;
121
- font-style: italic;
122
- }
123
- .test-status {
124
- width: 20%;
125
- text-align: right;
126
- }
127
- .test-duration {
128
- width: 10%;
129
- text-align: right;
130
- font-size: 0.75rem;
131
- }
132
-
133
- .failureMessages {
134
- padding: 0 1rem;
135
- margin-top: 1rem;
136
- border-top: 1px dashed #d8000c;
137
- }
138
- .failureMessages.suiteFailure {
139
- border-top: none;
140
- }
141
- .failureMsg {
142
- white-space: pre-wrap;
143
- white-space: -moz-pre-wrap;
144
- white-space: -pre-wrap;
145
- white-space: -o-pre-wrap;
146
- word-wrap: break-word;
147
- }
148
-
149
- .suite-container {
150
- margin-bottom: 2rem;
151
- }
152
- .suite-info {
153
- padding: 1rem;
154
- background-color: #eee;
155
- color: #777;
156
- display: flex;
157
- align-items: center;
158
- margin-bottom: 0.25rem;
159
- }
160
- .suite-info .suite-path {
161
- word-break: break-all;
162
- flex-grow: 1;
163
- font-family: monospace;
164
- font-size: 1rem;
165
- }
166
- .suite-info .suite-time {
167
- margin-left: 0.5rem;
168
- padding: 0.2rem 0.3rem;
169
- font-size: 0.75rem;
170
- }
171
- .suite-info .suite-time.warn {
172
- background-color: #d8000c;
173
- color: #fff;
174
- }
175
-
176
- /* CONSOLE LOGS */
177
- .suite-consolelog {
178
- margin-bottom: 0.25rem;
179
- padding: 1rem;
180
- background-color: #efefef;
181
- }
182
- .suite-consolelog-header {
183
- font-weight: bold;
184
- }
185
- .suite-consolelog-item {
186
- padding: 0.5rem;
187
- }
188
- .suite-consolelog-item pre {
189
- margin: 0.5rem 0;
190
- white-space: pre-wrap;
191
- white-space: -moz-pre-wrap;
192
- white-space: -pre-wrap;
193
- white-space: -o-pre-wrap;
194
- word-wrap: break-word;
195
- }
196
- .suite-consolelog-item-origin {
197
- color: #777;
198
- font-weight: bold;
199
- }
200
- .suite-consolelog-item-message {
201
- color: #000;
202
- font-size: 1rem;
203
- padding: 0 0.5rem;
204
- }
205
-
206
- /* OBSOLETE SNAPSHOTS */
207
- .suite-obsolete-snapshots {
208
- margin-bottom: 0.25rem;
209
- padding: 1rem;
210
- background-color: #ffbaba;
211
- color: #d8000c;
212
- }
213
- .suite-obsolete-snapshots-header {
214
- font-weight: bold;
215
- }
216
- .suite-obsolete-snapshots-item {
217
- padding: 0.5rem;
218
- }
219
- .suite-obsolete-snapshots-item pre {
220
- margin: 0.5rem 0;
221
- white-space: pre-wrap;
222
- white-space: -moz-pre-wrap;
223
- white-space: -pre-wrap;
224
- white-space: -o-pre-wrap;
225
- word-wrap: break-word;
226
- }
227
- .suite-obsolete-snapshots-item-message {
228
- color: #000;
229
- font-size: 1rem;
230
- padding: 0 0.5rem;
231
- }
232
- </style></head><body><div id="jesthtml-content"><header><h1 id="title">Bio Test Report. Datagrok version datagrok/datagrok:latest SHA=69a4761f6044. Commit 51a4ab35.</h1></header><div id="metadata-container"><div id="timestamp">Started: 2022-07-01 14:55:40</div><div id="summary"><div id="suite-summary"><div class="summary-total">Suites (1)</div><div class="summary-passed">1 passed</div><div class="summary-failed summary-empty">0 failed</div><div class="summary-pending summary-empty">0 pending</div></div><div id="test-summary"><div class="summary-total">Tests (1)</div><div class="summary-passed">1 passed</div><div class="summary-failed summary-empty">0 failed</div><div class="summary-pending summary-empty">0 pending</div></div></div></div><div id="suite-1" class="suite-container"><div class="suite-info"><div class="suite-path">/home/runner/work/public/public/packages/Bio/src/__jest__/remote.test.ts</div><div class="suite-time warn">39.007s</div></div><div class="suite-tests"><div class="test-result passed"><div class="test-info"><div class="test-suitename"> </div><div class="test-title">TEST</div><div class="test-status">passed</div><div class="test-duration">28.74s</div></div></div></div><div class="suite-consolelog"><div class="suite-consolelog-header">Console Log</div><div class="suite-consolelog-item"><pre class="suite-consolelog-item-origin"> at Object.&lt;anonymous&gt; (/home/runner/work/public/public/packages/Bio/src/__jest__/test-node.ts:63:11)
233
- at Generator.next (&lt;anonymous&gt;)
234
- at fulfilled (/home/runner/work/public/public/packages/Bio/src/__jest__/test-node.ts:28:58)
235
- at processTicksAndRejections (internal/process/task_queues.js:97:5)</pre><pre class="suite-consolelog-item-message">Using web root: http://localhost:8080</pre></div><div class="suite-consolelog-item"><pre class="suite-consolelog-item-origin"> at /home/runner/work/public/public/packages/Bio/src/__jest__/remote.test.ts:24:11
236
- at Generator.next (&lt;anonymous&gt;)
237
- at /home/runner/work/public/public/packages/Bio/src/__jest__/remote.test.ts:34:71
238
- at new Promise (&lt;anonymous&gt;)
239
- at Object.&lt;anonymous&gt;.__awaiter (/home/runner/work/public/public/packages/Bio/src/__jest__/remote.test.ts:30:12)
240
- at Object.&lt;anonymous&gt; (/home/runner/work/public/public/packages/Bio/src/__jest__/remote.test.ts:22:23)
241
- at Promise.then.completed (/home/runner/work/public/public/packages/Bio/node_modules/jest-circus/build/utils.js:391:28)
242
- at new Promise (&lt;anonymous&gt;)</pre><pre class="suite-consolelog-item-message">Testing Bio package</pre></div><div class="suite-consolelog-item"><pre class="suite-consolelog-item-origin"> at /home/runner/work/public/public/packages/Bio/src/__jest__/remote.test.ts:47:11
243
- at Generator.next (&lt;anonymous&gt;)
244
- at fulfilled (/home/runner/work/public/public/packages/Bio/src/__jest__/remote.test.ts:31:58)
245
- at processTicksAndRejections (internal/process/task_queues.js:97:5)</pre><pre class="suite-consolelog-item-message"/></div></div></div></div></body></html>