@datagrok/bio 2.1.4 → 2.1.8

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,320 @@
1
+ vals
2
+ "6.3
3
+
4
+ 43.0
5
+
6
+
7
+ 561.0
8
+
9
+
10
+ 24.0"
11
+ "80.0
12
+
13
+ 421.0
14
+
15
+
16
+ 4.3
17
+ 6.6"
18
+ "231.0
19
+
20
+
21
+ 225.0
22
+
23
+
24
+
25
+
26
+
27
+
28
+ 43.0
29
+
30
+
31
+
32
+
33
+
34
+
35
+
36
+
37
+
38
+
39
+
40
+
41
+
42
+ 3.9"
43
+ "348.0
44
+
45
+
46
+
47
+ 26.0"
48
+ "40.2
49
+
50
+
51
+ 364.0"
52
+ "25.0
53
+
54
+
55
+
56
+ 215.0
57
+
58
+
59
+
60
+
61
+ 69.0
62
+
63
+
64
+ 3.2
65
+ 6.9"
66
+ "12.0
67
+
68
+
69
+
70
+
71
+
72
+ 1392.0
73
+
74
+
75
+
76
+
77
+
78
+
79
+
80
+ 6.0"
81
+ "367.0
82
+
83
+
84
+
85
+
86
+
87
+
88
+
89
+
90
+
91
+
92
+
93
+
94
+
95
+
96
+
97
+
98
+
99
+
100
+
101
+
102
+
103
+
104
+
105
+
106
+ 284.0"
107
+ "118.0
108
+ 79.0
109
+
110
+
111
+
112
+ 161.0
113
+
114
+
115
+ 3.0
116
+
117
+
118
+
119
+ 24.0
120
+
121
+
122
+
123
+ 8.0"
124
+ "149.0
125
+
126
+
127
+
128
+
129
+
130
+
131
+
132
+ 95.7
133
+
134
+
135
+ 564.0
136
+ 153.0"
137
+ "126.0
138
+
139
+
140
+
141
+
142
+
143
+
144
+
145
+
146
+
147
+
148
+
149
+
150
+
151
+
152
+
153
+
154
+
155
+
156
+
157
+
158
+
159
+
160
+ 89.7"
161
+ "294.0
162
+ 60.0
163
+
164
+
165
+
166
+
167
+
168
+
169
+
170
+
171
+
172
+
173
+ 594.0
174
+
175
+
176
+
177
+ 190.0
178
+
179
+
180
+
181
+ 533.0
182
+
183
+
184
+
185
+
186
+
187
+ 671.0
188
+ 304.0"
189
+ "2.0
190
+
191
+
192
+
193
+
194
+
195
+
196
+
197
+
198
+
199
+
200
+ 24.0
201
+ 89.0
202
+
203
+
204
+
205
+
206
+ 2443.0
207
+
208
+
209
+
210
+
211
+
212
+ 108.0
213
+
214
+
215
+
216
+
217
+
218
+
219
+
220
+
221
+
222
+ 119.0"
223
+ "240.0
224
+
225
+
226
+
227
+ 2.0
228
+
229
+
230
+
231
+
232
+
233
+
234
+ 36.0
235
+
236
+
237
+
238
+
239
+ 8.0"
240
+ "223.0
241
+
242
+
243
+
244
+ 372.0
245
+ 93.0
246
+
247
+
248
+
249
+
250
+ 30.0"
251
+ "30.0
252
+
253
+
254
+
255
+
256
+ 5955.0
257
+ 137.0
258
+
259
+
260
+
261
+ 2.0
262
+
263
+
264
+
265
+
266
+
267
+
268
+
269
+
270
+
271
+ 26.0"
272
+ "37.5
273
+
274
+
275
+
276
+ 166.0"
277
+ "135.0
278
+
279
+ 17.0"
280
+ "1173.0
281
+ 539.0"
282
+ "2860.0
283
+
284
+
285
+ 3940.0"
286
+ "763.0
287
+ 1547.0"
288
+ "139.0
289
+
290
+ 426.0"
291
+ "307.0
292
+
293
+
294
+ 86.3"
295
+ 865
296
+ 605
297
+ 226
298
+ 8
299
+ 231
300
+ "54.0
301
+
302
+ 43.3
303
+
304
+
305
+
306
+
307
+
308
+
309
+
310
+
311
+
312
+ 90.9
313
+
314
+
315
+
316
+ 687.0"
317
+ "4.0
318
+
319
+
320
+ 9.0"
package/package.json CHANGED
@@ -5,7 +5,7 @@
5
5
  "name": "Leonid Stolbov",
6
6
  "email": "lstolbov@datagrok.ai"
7
7
  },
8
- "version": "2.1.4",
8
+ "version": "2.1.8",
9
9
  "description": "Bio is a [package](https://datagrok.ai/help/develop/develop#packages) for the [Datagrok](https://datagrok.ai) platform",
10
10
  "repository": {
11
11
  "type": "git",
@@ -14,8 +14,8 @@
14
14
  },
15
15
  "dependencies": {
16
16
  "@biowasm/aioli": "^3.1.0",
17
- "@datagrok-libraries/bio": "^5.9.15",
18
- "@datagrok-libraries/chem-meta": "1.0.1",
17
+ "@datagrok-libraries/bio": "^5.10.0",
18
+ "@datagrok-libraries/chem-meta": "^1.0.1",
19
19
  "@datagrok-libraries/ml": "^6.2.2",
20
20
  "@datagrok-libraries/utils": "^1.15.5",
21
21
  "cash-dom": "^8.0.0",
@@ -4,17 +4,12 @@ library(random)
4
4
  alphabetDna <- c('A','C','G','T')
5
5
  alphabetRna <- c('A','C','G','U')
6
6
  alphabetPt <- c('G', 'L', 'Y', 'S', 'E', 'Q', 'D', 'N', 'F', 'A',
7
- 'K', 'R', 'H', 'C', 'V', 'P', 'W', 'I', 'M', 'T',)
7
+ 'K', 'R', 'H', 'C', 'V', 'P', 'W', 'I', 'M', 'T')
8
8
 
9
- toAlphabet <- function(v, a;ph){
9
+ toAlphabet <- function(v, alph){
10
10
  paste(sapply(v, function(ci){ alph[ci]; }), collapse = '')
11
11
  }
12
12
 
13
- mutateString <- function(s, p){
14
-
15
- }
16
-
17
- seq <- toAlphabet(sample.int(4, 35, replace=TRUE), alphabet);
18
13
  seqPt <- toAlphabet(sample.int(20, 35, replace=TRUE), alphabetPt);
19
14
  seqDna <- toAlphabet(sample.int(4, 35, replace=TRUE), alphabetDna);
20
15
  seqRna <- toAlphabet(sample.int(4, 35, replace=TRUE), alphabetRna);
@@ -60,8 +55,10 @@ seq_mutate <- function(s, p, alphabet){
60
55
  res_s;
61
56
  }
62
57
 
63
- fastaDna_df <- data.frame(id = 1:100, sequence = sapply(1:100, function(id){ seq_mutate(seqDna, seq_p1, alphabetDna)}));
64
- write_csv(fastaDna_df, 'D:/HOME/atanas/Datagrok/projs/public/packages/Bio/files/samples/sample_FASTA_DNA.csv');
58
+ for (n in c(100,1000,10000, 100000, 1000000)){
59
+ fastaDna_df <- data.frame(id = 1:n, sequence = sapply(1:n, function(id){ seq_mutate(seqDna, seq_p1, alphabetDna)}));
60
+ write_csv(fastaDna_df, sprintf('../files/data/sample_FASTA_DNA-%d.csv', n));
61
+ }
65
62
 
66
63
  fastaRna_df <- data.frame(id = 1:100, sequence = sapply(1:100, function(id){ seq_mutate(seqRna, seq_p2, alphabetRna)}));
67
64
  write_csv(fastaRna_df, 'D:/HOME/atanas/Datagrok/projs/public/packages/Bio/files/samples/sample_FASTA_RNA.csv');
@@ -39,33 +39,39 @@ it('TEST', async () => {
39
39
  const targetPackage: string = process.env.TARGET_PACKAGE ?? 'Bio';
40
40
  console.log(`Testing ${targetPackage} package`);
41
41
 
42
- const r = await page.evaluate((targetPackage): Promise<object> => {
42
+ let r = await page.evaluate((targetPackage):Promise<object> => {
43
43
  return new Promise<object>((resolve, reject) => {
44
44
  (<any>window).grok.functions.eval(targetPackage + ':test()').then((df: any) => {
45
45
  const cStatus = df.columns.byName('success');
46
+ const cSkipped = df.columns.byName('skipped');
46
47
  const cMessage = df.columns.byName('result');
47
48
  const cCat = df.columns.byName('category');
48
49
  const cName = df.columns.byName('name');
49
50
  const cTime = df.columns.byName('ms');
50
51
  let failed = false;
52
+ let skipReport = '';
51
53
  let passReport = '';
52
54
  let failReport = '';
53
55
  for (let i = 0; i < df.rowCount; i++) {
54
56
  if (cStatus.get(i)) {
55
- passReport += `Test result : Success : ${cTime.get(i)} : ` +
56
- `${targetPackage}.${cCat.get(i)}.${cName.get(i)} : ${cMessage.get(i)}\n`;
57
+ if (cSkipped.get(i)) {
58
+ skipReport += `Test result : Skipped : ${cTime.get(i)} : ${targetPackage}.${cCat.get(i)}.${cName.get(i)} : ${cMessage.get(i)}\n`;
59
+ } else {
60
+ passReport += `Test result : Success : ${cTime.get(i)} : ${targetPackage}.${cCat.get(i)}.${cName.get(i)} : ${cMessage.get(i)}\n`;
61
+ }
57
62
  } else {
58
63
  failed = true;
59
- failReport += `Test result : Failed : ${cTime.get(i)} : ` +
60
- `${targetPackage}.${cCat.get(i)}.${cName.get(i)} : ${cMessage.get(i)}\n`;
64
+ failReport += `Test result : Failed : ${cTime.get(i)} : ${targetPackage}.${cCat.get(i)}.${cName.get(i)} : ${cMessage.get(i)}\n`;
61
65
  }
62
66
  }
63
- resolve({failReport, passReport, failed});
67
+ resolve({failReport, skipReport, passReport, failed});
64
68
  }).catch((e: any) => reject(e));
65
69
  });
66
70
  }, targetPackage);
67
71
  // @ts-ignore
68
72
  console.log(r.passReport);
69
73
  // @ts-ignore
74
+ console.log(r.skipReport);
75
+ // @ts-ignore
70
76
  expect(r.failed).checkOutput(false, r.failReport);
71
- }, 3600000);
77
+ }, 7200000);
@@ -3,20 +3,21 @@ import * as DG from 'datagrok-api/dg';
3
3
  import {runTests, TestContext, tests} from '@datagrok-libraries/utils/src/test';
4
4
 
5
5
  import './tests/Palettes-test';
6
- import './tests/detectors-test';
6
+ import './tests/detectors-tests';
7
+ import './tests/detectors-benchmark-tests';
7
8
  import './tests/msa-tests';
8
9
  import './tests/sequence-space-test';
9
10
  import './tests/activity-cliffs-tests';
10
11
  import './tests/splitters-test';
11
12
  import './tests/renderers-test';
12
- import './tests/convert-test';
13
+ import './tests/converters-test';
13
14
  import './tests/fasta-handler-test';
14
15
  import './tests/fasta-export-tests';
15
16
  import './tests/bio-tests';
16
17
  import './tests/WebLogo-positions-test';
17
18
  import './tests/checkInputColumn-tests';
18
19
  import './tests/similarity-diversity-tests';
19
- import './tests/substructure-filter-tests';
20
+ import './tests/substructure-filters-tests';
20
21
 
21
22
  export const _package = new DG.Package();
22
23
  export {tests};
package/src/package.ts CHANGED
@@ -11,23 +11,26 @@ import {runKalign, testMSAEnoughMemory} from './utils/multiple-sequence-alignmen
11
11
  import {SequenceAlignment, Aligned} from './seq_align';
12
12
  import {getEmbeddingColsNames, sequenceSpace, sequenceSpaceByFingerprints} from './analysis/sequence-space';
13
13
  import {getActivityCliffs} from '@datagrok-libraries/ml/src/viewers/activity-cliffs';
14
- import {createLinesGrid, createPropPanelElement, createTooltipElement, getChemSimilaritiesMarix, getSimilaritiesMarix} from './analysis/sequence-activity-cliffs';
15
- import {createJsonMonomerLibFromSdf, encodeMonomers, getMolfilesFromSeq} from '@datagrok-libraries/bio/src/monomer-works/monomer-utils';
14
+ import {
15
+ createLinesGrid,
16
+ createPropPanelElement,
17
+ createTooltipElement,
18
+ getChemSimilaritiesMarix,
19
+ getSimilaritiesMarix
20
+ } from './analysis/sequence-activity-cliffs';
21
+ import {
22
+ createJsonMonomerLibFromSdf,
23
+ encodeMonomers,
24
+ getMolfilesFromSeq
25
+ } from '@datagrok-libraries/bio/src/monomer-works/monomer-utils';
16
26
  import {HELM_CORE_LIB_FILENAME} from '@datagrok-libraries/bio/src/utils/const';
17
27
  import {getMacroMol} from './utils/atomic-works';
18
28
  import {MacromoleculeSequenceCellRenderer} from './utils/cell-renderer';
19
29
  import {convert} from './utils/convert';
20
30
  import {getMacroMolColumnPropertyPanel, representationsWidget} from './widgets/representations';
21
- import {MonomerFreqs, TAGS} from '@datagrok-libraries/bio/src/utils/macromolecule';
22
- import {ALPHABET, NOTATION} from '@datagrok-libraries/bio/src/utils/macromolecule'
23
31
  import {_toAtomicLevel} from '@datagrok-libraries/bio/src/monomer-works/to-atomic-level';
24
32
  import {FastaFileHandler} from '@datagrok-libraries/bio/src/utils/fasta-handler';
25
33
  import {removeEmptyStringRows} from '@datagrok-libraries/utils/src/dataframe-utils';
26
- import {
27
- generateManySequences,
28
- generateLongSequence,
29
- performanceTest
30
- } from './tests/test-sequnces-generators';
31
34
 
32
35
  import {splitAlignedSequences} from '@datagrok-libraries/bio/src/utils/splitter';
33
36
  import * as C from './utils/constants';
@@ -36,11 +39,14 @@ import {SequenceDiversityViewer} from './analysis/sequence-diversity-viewer';
36
39
  import {invalidateMols, MONOMERIC_COL_TAGS, substructureSearchDialog} from './substructure-search/substructure-search';
37
40
  import {saveAsFastaUI} from './utils/save-as-fasta';
38
41
  import {BioSubstructureFilter} from './widgets/bio-substructure-filter';
39
- import { getMonomericMols } from './calculations/monomerLevelMols';
40
- import { delay } from '@datagrok-libraries/utils/src/test';
42
+ import {getMonomericMols} from './calculations/monomerLevelMols';
43
+ import {delay} from '@datagrok-libraries/utils/src/test';
41
44
  import {from, Observable, Subject} from 'rxjs';
42
- import {Monomer, IMonomerLib, MonomerWorks, MonomerLib, readLibrary,
43
- SeqPalette, UnitsHandler, WebLogoViewer, getStats, splitterAsHelm} from '@datagrok-libraries/bio';
45
+ import {
46
+ TAGS as bio_TAGS,
47
+ Monomer, IMonomerLib, MonomerWorks, MonomerLib, readLibrary,
48
+ SeqPalette, UnitsHandler, WebLogoViewer, getStats, splitterAsHelm
49
+ } from '@datagrok-libraries/bio';
44
50
 
45
51
  const STORAGE_NAME = 'Libraries';
46
52
  const LIB_PATH = 'System:AppData/Bio/libraries';
@@ -51,6 +57,7 @@ export let hydrophobPalette: SeqPaletteCustom | null = null;
51
57
 
52
58
  export class SeqPaletteCustom implements SeqPalette {
53
59
  private readonly _palette: { [m: string]: string };
60
+
54
61
  constructor(palette: { [m: string]: string }) {
55
62
  this._palette = palette;
56
63
  }
@@ -67,7 +74,7 @@ export async function initBio() {
67
74
  let logPs: number[] = [];
68
75
  const module = await grok.functions.call('Chem:getRdKitModule');
69
76
 
70
-
77
+
71
78
  const series = monomerLib!.getMonomerMolsByType('PEPTIDE')!;
72
79
  Object.keys(series).forEach(symbol => {
73
80
  monomers.push(symbol);
@@ -81,7 +88,7 @@ export async function initBio() {
81
88
  const sum = logPs.reduce((a, b) => a + b, 0);
82
89
  const avg = (sum / logPs.length) || 0;
83
90
 
84
- let palette: {[monomer: string]: string} = {};
91
+ let palette: { [monomer: string]: string } = {};
85
92
  for (let i = 0; i < monomers.length; i++) {
86
93
  palette[monomers[i]] = logPs[i] < avg ? '#4682B4' : '#DC143C';
87
94
  }
@@ -134,7 +141,7 @@ export async function libraryPanel(seqColumn: DG.Column): Promise<DG.Widget> {
134
141
  let uploadedLibraries: string[] = Object.values(await grok.dapi.userDataStorage.get(STORAGE_NAME, true));
135
142
  for (let i = 0; i < uploadedLibraries.length; ++i) {
136
143
  let libraryName: string = uploadedLibraries[i];
137
- divInputs.append(ui.boolInput(libraryName, true, async() => {
144
+ divInputs.append(ui.boolInput(libraryName, true, async () => {
138
145
  grok.dapi.userDataStorage.remove(STORAGE_NAME, libraryName, true);
139
146
  await loadLibraries();
140
147
  }).root);
@@ -283,9 +290,9 @@ export async function activityCliffs(df: DG.DataFrame, macroMolecule: DG.Column,
283
290
  };
284
291
  const tags = {
285
292
  'units': macroMolecule.getTag(DG.TAGS.UNITS),
286
- 'aligned': macroMolecule.getTag(TAGS.aligned),
287
- 'separator': macroMolecule.getTag(TAGS.separator),
288
- 'alphabet': macroMolecule.getTag(TAGS.alphabet),
293
+ 'aligned': macroMolecule.getTag(bio_TAGS.aligned),
294
+ 'separator': macroMolecule.getTag(bio_TAGS.separator),
295
+ 'alphabet': macroMolecule.getTag(bio_TAGS.alphabet),
289
296
  };
290
297
  const sp = await getActivityCliffs(
291
298
  df,
@@ -343,7 +350,8 @@ export async function sequenceSpaceTopMenu(table: DG.DataFrame, macroMolecule: D
343
350
  return grok.shell
344
351
  .tableView(table.name)
345
352
  .scatterPlot({x: embedColsNames[0], y: embedColsNames[1], title: 'Sequence space'});
346
- };
353
+ }
354
+ ;
347
355
 
348
356
  /* const encodedCol = encodeMonomers(macroMolecule);
349
357
  if (!encodedCol)
@@ -580,7 +588,7 @@ export async function testDetectMacromolecule(path: string): Promise<DG.DataFram
580
588
  //tags: panel, bio
581
589
  //input: column col {semType: Macromolecule}
582
590
  export function splitToMonomers(col: DG.Column<string>): void {
583
- if (!col.getTag(TAGS.aligned).includes(C.MSA))
591
+ if (!col.getTag(bio_TAGS.aligned).includes(C.MSA))
584
592
  return grok.shell.error('Splitting is applicable only for aligned sequences');
585
593
 
586
594
  const tempDf = splitAlignedSequences(col);