@datagrok/bio 2.1.2 → 2.1.7
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/detectors.js +206 -147
- package/dist/package-test.js +323 -141
- package/dist/package.js +65 -32
- package/dist/vendors-node_modules_datagrok-libraries_ml_src_workers_dimensionality-reducer_js.js +3 -3
- package/package.json +5 -5
- package/scripts/generate_fasta_csv_for_alphabets.R +6 -9
- package/src/__jest__/remote.test.ts +13 -7
- package/src/package-test.ts +4 -3
- package/src/package.ts +31 -21
- package/src/tests/checkInputColumn-tests.ts +1 -1
- package/src/tests/{convert-test.ts → converters-test.ts} +0 -0
- package/src/tests/detectors-benchmark-tests.ts +165 -0
- package/src/tests/{detectors-test.ts → detectors-tests.ts} +18 -18
- package/src/tests/renderers-test.ts +2 -2
- package/src/tests/splitters-test.ts +1 -1
- package/src/tests/{substructure-filter-tests.ts → substructure-filters-tests.ts} +1 -1
- package/src/tests/{test-sequnces-generators.ts → utils/sequences-generators.ts} +0 -0
- package/{test-Bio-62cc009524f3-73ccfff9.html → test-Bio-62cc009524f3-6c978eb5.html} +114 -113
package/package.json
CHANGED
|
@@ -5,7 +5,7 @@
|
|
|
5
5
|
"name": "Leonid Stolbov",
|
|
6
6
|
"email": "lstolbov@datagrok.ai"
|
|
7
7
|
},
|
|
8
|
-
"version": "2.1.
|
|
8
|
+
"version": "2.1.7",
|
|
9
9
|
"description": "Bio is a [package](https://datagrok.ai/help/develop/develop#packages) for the [Datagrok](https://datagrok.ai) platform",
|
|
10
10
|
"repository": {
|
|
11
11
|
"type": "git",
|
|
@@ -14,12 +14,12 @@
|
|
|
14
14
|
},
|
|
15
15
|
"dependencies": {
|
|
16
16
|
"@biowasm/aioli": "^3.1.0",
|
|
17
|
-
"@datagrok-libraries/bio": "^5.
|
|
18
|
-
"@datagrok-libraries/chem-meta": "1.0.1",
|
|
17
|
+
"@datagrok-libraries/bio": "^5.10.0",
|
|
18
|
+
"@datagrok-libraries/chem-meta": "^1.0.1",
|
|
19
19
|
"@datagrok-libraries/ml": "^6.2.2",
|
|
20
|
-
"@datagrok-libraries/utils": "^1.
|
|
20
|
+
"@datagrok-libraries/utils": "^1.15.5",
|
|
21
21
|
"cash-dom": "^8.0.0",
|
|
22
|
-
"datagrok-api": "^1.8.
|
|
22
|
+
"datagrok-api": "^1.8.2",
|
|
23
23
|
"dayjs": "^1.11.4",
|
|
24
24
|
"openchemlib": "6.0.1",
|
|
25
25
|
"rxjs": "^6.5.5",
|
|
@@ -4,17 +4,12 @@ library(random)
|
|
|
4
4
|
alphabetDna <- c('A','C','G','T')
|
|
5
5
|
alphabetRna <- c('A','C','G','U')
|
|
6
6
|
alphabetPt <- c('G', 'L', 'Y', 'S', 'E', 'Q', 'D', 'N', 'F', 'A',
|
|
7
|
-
'K', 'R', 'H', 'C', 'V', 'P', 'W', 'I', 'M', 'T'
|
|
7
|
+
'K', 'R', 'H', 'C', 'V', 'P', 'W', 'I', 'M', 'T')
|
|
8
8
|
|
|
9
|
-
toAlphabet <- function(v,
|
|
9
|
+
toAlphabet <- function(v, alph){
|
|
10
10
|
paste(sapply(v, function(ci){ alph[ci]; }), collapse = '')
|
|
11
11
|
}
|
|
12
12
|
|
|
13
|
-
mutateString <- function(s, p){
|
|
14
|
-
|
|
15
|
-
}
|
|
16
|
-
|
|
17
|
-
seq <- toAlphabet(sample.int(4, 35, replace=TRUE), alphabet);
|
|
18
13
|
seqPt <- toAlphabet(sample.int(20, 35, replace=TRUE), alphabetPt);
|
|
19
14
|
seqDna <- toAlphabet(sample.int(4, 35, replace=TRUE), alphabetDna);
|
|
20
15
|
seqRna <- toAlphabet(sample.int(4, 35, replace=TRUE), alphabetRna);
|
|
@@ -60,8 +55,10 @@ seq_mutate <- function(s, p, alphabet){
|
|
|
60
55
|
res_s;
|
|
61
56
|
}
|
|
62
57
|
|
|
63
|
-
|
|
64
|
-
|
|
58
|
+
for (n in c(100,1000,10000, 100000, 1000000)){
|
|
59
|
+
fastaDna_df <- data.frame(id = 1:n, sequence = sapply(1:n, function(id){ seq_mutate(seqDna, seq_p1, alphabetDna)}));
|
|
60
|
+
write_csv(fastaDna_df, sprintf('../files/data/sample_FASTA_DNA-%d.csv', n));
|
|
61
|
+
}
|
|
65
62
|
|
|
66
63
|
fastaRna_df <- data.frame(id = 1:100, sequence = sapply(1:100, function(id){ seq_mutate(seqRna, seq_p2, alphabetRna)}));
|
|
67
64
|
write_csv(fastaRna_df, 'D:/HOME/atanas/Datagrok/projs/public/packages/Bio/files/samples/sample_FASTA_RNA.csv');
|
|
@@ -39,33 +39,39 @@ it('TEST', async () => {
|
|
|
39
39
|
const targetPackage: string = process.env.TARGET_PACKAGE ?? 'Bio';
|
|
40
40
|
console.log(`Testing ${targetPackage} package`);
|
|
41
41
|
|
|
42
|
-
|
|
42
|
+
let r = await page.evaluate((targetPackage):Promise<object> => {
|
|
43
43
|
return new Promise<object>((resolve, reject) => {
|
|
44
44
|
(<any>window).grok.functions.eval(targetPackage + ':test()').then((df: any) => {
|
|
45
45
|
const cStatus = df.columns.byName('success');
|
|
46
|
+
const cSkipped = df.columns.byName('skipped');
|
|
46
47
|
const cMessage = df.columns.byName('result');
|
|
47
48
|
const cCat = df.columns.byName('category');
|
|
48
49
|
const cName = df.columns.byName('name');
|
|
49
50
|
const cTime = df.columns.byName('ms');
|
|
50
51
|
let failed = false;
|
|
52
|
+
let skipReport = '';
|
|
51
53
|
let passReport = '';
|
|
52
54
|
let failReport = '';
|
|
53
55
|
for (let i = 0; i < df.rowCount; i++) {
|
|
54
56
|
if (cStatus.get(i)) {
|
|
55
|
-
|
|
56
|
-
|
|
57
|
+
if (cSkipped.get(i)) {
|
|
58
|
+
skipReport += `Test result : Skipped : ${cTime.get(i)} : ${targetPackage}.${cCat.get(i)}.${cName.get(i)} : ${cMessage.get(i)}\n`;
|
|
59
|
+
} else {
|
|
60
|
+
passReport += `Test result : Success : ${cTime.get(i)} : ${targetPackage}.${cCat.get(i)}.${cName.get(i)} : ${cMessage.get(i)}\n`;
|
|
61
|
+
}
|
|
57
62
|
} else {
|
|
58
63
|
failed = true;
|
|
59
|
-
failReport += `Test result : Failed : ${cTime.get(i)} :
|
|
60
|
-
`${targetPackage}.${cCat.get(i)}.${cName.get(i)} : ${cMessage.get(i)}\n`;
|
|
64
|
+
failReport += `Test result : Failed : ${cTime.get(i)} : ${targetPackage}.${cCat.get(i)}.${cName.get(i)} : ${cMessage.get(i)}\n`;
|
|
61
65
|
}
|
|
62
66
|
}
|
|
63
|
-
resolve({failReport, passReport, failed});
|
|
67
|
+
resolve({failReport, skipReport, passReport, failed});
|
|
64
68
|
}).catch((e: any) => reject(e));
|
|
65
69
|
});
|
|
66
70
|
}, targetPackage);
|
|
67
71
|
// @ts-ignore
|
|
68
72
|
console.log(r.passReport);
|
|
69
73
|
// @ts-ignore
|
|
74
|
+
console.log(r.skipReport);
|
|
75
|
+
// @ts-ignore
|
|
70
76
|
expect(r.failed).checkOutput(false, r.failReport);
|
|
71
|
-
},
|
|
77
|
+
}, 7200000);
|
package/src/package-test.ts
CHANGED
|
@@ -3,20 +3,21 @@ import * as DG from 'datagrok-api/dg';
|
|
|
3
3
|
import {runTests, TestContext, tests} from '@datagrok-libraries/utils/src/test';
|
|
4
4
|
|
|
5
5
|
import './tests/Palettes-test';
|
|
6
|
-
import './tests/detectors-
|
|
6
|
+
import './tests/detectors-tests';
|
|
7
|
+
import './tests/detectors-benchmark-tests';
|
|
7
8
|
import './tests/msa-tests';
|
|
8
9
|
import './tests/sequence-space-test';
|
|
9
10
|
import './tests/activity-cliffs-tests';
|
|
10
11
|
import './tests/splitters-test';
|
|
11
12
|
import './tests/renderers-test';
|
|
12
|
-
import './tests/
|
|
13
|
+
import './tests/converters-test';
|
|
13
14
|
import './tests/fasta-handler-test';
|
|
14
15
|
import './tests/fasta-export-tests';
|
|
15
16
|
import './tests/bio-tests';
|
|
16
17
|
import './tests/WebLogo-positions-test';
|
|
17
18
|
import './tests/checkInputColumn-tests';
|
|
18
19
|
import './tests/similarity-diversity-tests';
|
|
19
|
-
import './tests/substructure-
|
|
20
|
+
import './tests/substructure-filters-tests';
|
|
20
21
|
|
|
21
22
|
export const _package = new DG.Package();
|
|
22
23
|
export {tests};
|
package/src/package.ts
CHANGED
|
@@ -11,23 +11,26 @@ import {runKalign, testMSAEnoughMemory} from './utils/multiple-sequence-alignmen
|
|
|
11
11
|
import {SequenceAlignment, Aligned} from './seq_align';
|
|
12
12
|
import {getEmbeddingColsNames, sequenceSpace, sequenceSpaceByFingerprints} from './analysis/sequence-space';
|
|
13
13
|
import {getActivityCliffs} from '@datagrok-libraries/ml/src/viewers/activity-cliffs';
|
|
14
|
-
import {
|
|
15
|
-
|
|
14
|
+
import {
|
|
15
|
+
createLinesGrid,
|
|
16
|
+
createPropPanelElement,
|
|
17
|
+
createTooltipElement,
|
|
18
|
+
getChemSimilaritiesMarix,
|
|
19
|
+
getSimilaritiesMarix
|
|
20
|
+
} from './analysis/sequence-activity-cliffs';
|
|
21
|
+
import {
|
|
22
|
+
createJsonMonomerLibFromSdf,
|
|
23
|
+
encodeMonomers,
|
|
24
|
+
getMolfilesFromSeq
|
|
25
|
+
} from '@datagrok-libraries/bio/src/monomer-works/monomer-utils';
|
|
16
26
|
import {HELM_CORE_LIB_FILENAME} from '@datagrok-libraries/bio/src/utils/const';
|
|
17
27
|
import {getMacroMol} from './utils/atomic-works';
|
|
18
28
|
import {MacromoleculeSequenceCellRenderer} from './utils/cell-renderer';
|
|
19
29
|
import {convert} from './utils/convert';
|
|
20
30
|
import {getMacroMolColumnPropertyPanel, representationsWidget} from './widgets/representations';
|
|
21
|
-
import {MonomerFreqs, TAGS} from '@datagrok-libraries/bio/src/utils/macromolecule';
|
|
22
|
-
import {ALPHABET, NOTATION} from '@datagrok-libraries/bio/src/utils/macromolecule'
|
|
23
31
|
import {_toAtomicLevel} from '@datagrok-libraries/bio/src/monomer-works/to-atomic-level';
|
|
24
32
|
import {FastaFileHandler} from '@datagrok-libraries/bio/src/utils/fasta-handler';
|
|
25
33
|
import {removeEmptyStringRows} from '@datagrok-libraries/utils/src/dataframe-utils';
|
|
26
|
-
import {
|
|
27
|
-
generateManySequences,
|
|
28
|
-
generateLongSequence,
|
|
29
|
-
performanceTest
|
|
30
|
-
} from './tests/test-sequnces-generators';
|
|
31
34
|
|
|
32
35
|
import {splitAlignedSequences} from '@datagrok-libraries/bio/src/utils/splitter';
|
|
33
36
|
import * as C from './utils/constants';
|
|
@@ -36,11 +39,14 @@ import {SequenceDiversityViewer} from './analysis/sequence-diversity-viewer';
|
|
|
36
39
|
import {invalidateMols, MONOMERIC_COL_TAGS, substructureSearchDialog} from './substructure-search/substructure-search';
|
|
37
40
|
import {saveAsFastaUI} from './utils/save-as-fasta';
|
|
38
41
|
import {BioSubstructureFilter} from './widgets/bio-substructure-filter';
|
|
39
|
-
import {
|
|
40
|
-
import {
|
|
42
|
+
import {getMonomericMols} from './calculations/monomerLevelMols';
|
|
43
|
+
import {delay} from '@datagrok-libraries/utils/src/test';
|
|
41
44
|
import {from, Observable, Subject} from 'rxjs';
|
|
42
|
-
import {
|
|
43
|
-
|
|
45
|
+
import {
|
|
46
|
+
TAGS as bio_TAGS,
|
|
47
|
+
Monomer, IMonomerLib, MonomerWorks, MonomerLib, readLibrary,
|
|
48
|
+
SeqPalette, UnitsHandler, WebLogoViewer, getStats, splitterAsHelm
|
|
49
|
+
} from '@datagrok-libraries/bio';
|
|
44
50
|
|
|
45
51
|
const STORAGE_NAME = 'Libraries';
|
|
46
52
|
const LIB_PATH = 'System:AppData/Bio/libraries';
|
|
@@ -51,6 +57,7 @@ export let hydrophobPalette: SeqPaletteCustom | null = null;
|
|
|
51
57
|
|
|
52
58
|
export class SeqPaletteCustom implements SeqPalette {
|
|
53
59
|
private readonly _palette: { [m: string]: string };
|
|
60
|
+
|
|
54
61
|
constructor(palette: { [m: string]: string }) {
|
|
55
62
|
this._palette = palette;
|
|
56
63
|
}
|
|
@@ -67,7 +74,7 @@ export async function initBio() {
|
|
|
67
74
|
let logPs: number[] = [];
|
|
68
75
|
const module = await grok.functions.call('Chem:getRdKitModule');
|
|
69
76
|
|
|
70
|
-
|
|
77
|
+
|
|
71
78
|
const series = monomerLib!.getMonomerMolsByType('PEPTIDE')!;
|
|
72
79
|
Object.keys(series).forEach(symbol => {
|
|
73
80
|
monomers.push(symbol);
|
|
@@ -81,7 +88,7 @@ export async function initBio() {
|
|
|
81
88
|
const sum = logPs.reduce((a, b) => a + b, 0);
|
|
82
89
|
const avg = (sum / logPs.length) || 0;
|
|
83
90
|
|
|
84
|
-
let palette: {[monomer: string]: string} = {};
|
|
91
|
+
let palette: { [monomer: string]: string } = {};
|
|
85
92
|
for (let i = 0; i < monomers.length; i++) {
|
|
86
93
|
palette[monomers[i]] = logPs[i] < avg ? '#4682B4' : '#DC143C';
|
|
87
94
|
}
|
|
@@ -91,6 +98,8 @@ export async function initBio() {
|
|
|
91
98
|
|
|
92
99
|
async function loadLibraries() {
|
|
93
100
|
let uploadedLibraries: string[] = Object.values(await grok.dapi.userDataStorage.get(STORAGE_NAME, true));
|
|
101
|
+
if (uploadedLibraries.length == 0 && monomerLib == null)
|
|
102
|
+
monomerLib = new MonomerLib({});
|
|
94
103
|
for (let i = 0; i < uploadedLibraries.length; ++i)
|
|
95
104
|
await monomerManager(uploadedLibraries[i]);
|
|
96
105
|
}
|
|
@@ -132,7 +141,7 @@ export async function libraryPanel(seqColumn: DG.Column): Promise<DG.Widget> {
|
|
|
132
141
|
let uploadedLibraries: string[] = Object.values(await grok.dapi.userDataStorage.get(STORAGE_NAME, true));
|
|
133
142
|
for (let i = 0; i < uploadedLibraries.length; ++i) {
|
|
134
143
|
let libraryName: string = uploadedLibraries[i];
|
|
135
|
-
divInputs.append(ui.boolInput(libraryName, true, async() => {
|
|
144
|
+
divInputs.append(ui.boolInput(libraryName, true, async () => {
|
|
136
145
|
grok.dapi.userDataStorage.remove(STORAGE_NAME, libraryName, true);
|
|
137
146
|
await loadLibraries();
|
|
138
147
|
}).root);
|
|
@@ -281,9 +290,9 @@ export async function activityCliffs(df: DG.DataFrame, macroMolecule: DG.Column,
|
|
|
281
290
|
};
|
|
282
291
|
const tags = {
|
|
283
292
|
'units': macroMolecule.getTag(DG.TAGS.UNITS),
|
|
284
|
-
'aligned': macroMolecule.getTag(
|
|
285
|
-
'separator': macroMolecule.getTag(
|
|
286
|
-
'alphabet': macroMolecule.getTag(
|
|
293
|
+
'aligned': macroMolecule.getTag(bio_TAGS.aligned),
|
|
294
|
+
'separator': macroMolecule.getTag(bio_TAGS.separator),
|
|
295
|
+
'alphabet': macroMolecule.getTag(bio_TAGS.alphabet),
|
|
287
296
|
};
|
|
288
297
|
const sp = await getActivityCliffs(
|
|
289
298
|
df,
|
|
@@ -341,7 +350,8 @@ export async function sequenceSpaceTopMenu(table: DG.DataFrame, macroMolecule: D
|
|
|
341
350
|
return grok.shell
|
|
342
351
|
.tableView(table.name)
|
|
343
352
|
.scatterPlot({x: embedColsNames[0], y: embedColsNames[1], title: 'Sequence space'});
|
|
344
|
-
}
|
|
353
|
+
}
|
|
354
|
+
;
|
|
345
355
|
|
|
346
356
|
/* const encodedCol = encodeMonomers(macroMolecule);
|
|
347
357
|
if (!encodedCol)
|
|
@@ -578,7 +588,7 @@ export async function testDetectMacromolecule(path: string): Promise<DG.DataFram
|
|
|
578
588
|
//tags: panel, bio
|
|
579
589
|
//input: column col {semType: Macromolecule}
|
|
580
590
|
export function splitToMonomers(col: DG.Column<string>): void {
|
|
581
|
-
if (!col.getTag(
|
|
591
|
+
if (!col.getTag(bio_TAGS.aligned).includes(C.MSA))
|
|
582
592
|
return grok.shell.error('Splitting is applicable only for aligned sequences');
|
|
583
593
|
|
|
584
594
|
const tempDf = splitAlignedSequences(col);
|
|
@@ -40,7 +40,7 @@ seq4`;
|
|
|
40
40
|
const col: DG.Column = df.getCol('seq');
|
|
41
41
|
col.semType = DG.SEMTYPE.MACROMOLECULE;
|
|
42
42
|
col.setTag(DG.TAGS.UNITS, bio.NOTATION.HELM);
|
|
43
|
-
col.setTag(bio.TAGS.alphabetSize, '11');
|
|
43
|
+
// col.setTag(bio.TAGS.alphabetSize, '11');
|
|
44
44
|
col.setTag(bio.TAGS.alphabetIsMultichar, 'true');
|
|
45
45
|
|
|
46
46
|
const [res, msg]: [boolean, string] = checkInputColumn(
|
|
File without changes
|
|
@@ -0,0 +1,165 @@
|
|
|
1
|
+
import * as grok from 'datagrok-api/grok';
|
|
2
|
+
import * as ui from 'datagrok-api/ui';
|
|
3
|
+
import * as DG from 'datagrok-api/dg';
|
|
4
|
+
import * as bio from '@datagrok-libraries/bio';
|
|
5
|
+
|
|
6
|
+
import {after, before, category, test, expect, expectObject} from '@datagrok-libraries/utils/src/test';
|
|
7
|
+
import {UnitsHandler} from '@datagrok-libraries/bio';
|
|
8
|
+
import {Column} from 'datagrok-api/dg';
|
|
9
|
+
|
|
10
|
+
category('detectorsBenchmark', () => {
|
|
11
|
+
|
|
12
|
+
let detectFunc: DG.Func;
|
|
13
|
+
|
|
14
|
+
before(async () => {
|
|
15
|
+
const funcList: DG.Func[] = DG.Func.find({package: 'Bio', name: 'detectMacromolecule'});
|
|
16
|
+
detectFunc = funcList[0];
|
|
17
|
+
|
|
18
|
+
// warm up the detector function
|
|
19
|
+
const col: DG.Column = DG.Column.fromStrings('seq', ['ACGT', 'ACGT', 'ACGT']);
|
|
20
|
+
await detectFunc.prepare({col: col}).call();
|
|
21
|
+
});
|
|
22
|
+
|
|
23
|
+
// -- fasta --
|
|
24
|
+
|
|
25
|
+
test('fastaDnaShorts50Few50', async () => {
|
|
26
|
+
const et: number = await detectMacromoleculeBenchmark(10, bio.NOTATION.FASTA, bio.ALPHABET.DNA, 50, 50);
|
|
27
|
+
},
|
|
28
|
+
{skipReason: '#1192'});
|
|
29
|
+
|
|
30
|
+
test('fastaDnaShorts50Many1E6', async () => {
|
|
31
|
+
const et: number = await detectMacromoleculeBenchmark(10, bio.NOTATION.FASTA, bio.ALPHABET.DNA, 50, 1E6);
|
|
32
|
+
},
|
|
33
|
+
{skipReason: '#1192'});
|
|
34
|
+
|
|
35
|
+
test('fastaDnaLong1e6Few50', async () => {
|
|
36
|
+
const et: number = await detectMacromoleculeBenchmark(10, bio.NOTATION.FASTA, bio.ALPHABET.DNA, 1E6, 50);
|
|
37
|
+
},
|
|
38
|
+
{skipReason: '#1192'});
|
|
39
|
+
|
|
40
|
+
// -- separator --
|
|
41
|
+
|
|
42
|
+
test('separatorDnaShorts50Few50', async () => {
|
|
43
|
+
const et: number = await detectMacromoleculeBenchmark(10, bio.NOTATION.SEPARATOR, bio.ALPHABET.DNA, 50, 50, '/');
|
|
44
|
+
});
|
|
45
|
+
|
|
46
|
+
test('separatorDnaShorts50Many1E6', async () => {
|
|
47
|
+
const et: number = await detectMacromoleculeBenchmark(10, bio.NOTATION.SEPARATOR, bio.ALPHABET.DNA, 50, 1E6, '/');
|
|
48
|
+
},
|
|
49
|
+
{ /* skipReason: 'slow transmit large dataset to detector' */});
|
|
50
|
+
|
|
51
|
+
test('separatorDnaLong1e6Few50', async () => {
|
|
52
|
+
const et: number = await detectMacromoleculeBenchmark(10, bio.NOTATION.SEPARATOR, bio.ALPHABET.DNA, 1E6, 50, '/');
|
|
53
|
+
},
|
|
54
|
+
{skipReason: '#1192'});
|
|
55
|
+
|
|
56
|
+
async function detectMacromoleculeBenchmark(
|
|
57
|
+
maxET: number, notation: bio.NOTATION, alphabet: bio.ALPHABET, length: number, count: number, separator?: string
|
|
58
|
+
): Promise<number> {
|
|
59
|
+
return await benchmark<DG.FuncCall, DG.Column>(10,
|
|
60
|
+
(): DG.FuncCall => {
|
|
61
|
+
const col: DG.Column = generate(notation, [...bio.getAlphabet(alphabet)], length, count, separator);
|
|
62
|
+
const funcCall: DG.FuncCall = detectFunc.prepare({col: col});
|
|
63
|
+
return funcCall;
|
|
64
|
+
},
|
|
65
|
+
async (funcCall: DG.FuncCall): Promise<DG.Column> => {
|
|
66
|
+
return testDetector(funcCall);
|
|
67
|
+
},
|
|
68
|
+
(col: DG.Column) => {
|
|
69
|
+
checkDetectorRes(col, {
|
|
70
|
+
semType: DG.SEMTYPE.MACROMOLECULE,
|
|
71
|
+
notation: notation,
|
|
72
|
+
alphabet: alphabet,
|
|
73
|
+
separator: separator
|
|
74
|
+
});
|
|
75
|
+
});
|
|
76
|
+
}
|
|
77
|
+
|
|
78
|
+
function generate(notation: bio.NOTATION, alphabet: string[], length: number, count: number, separator?: string): DG.Column {
|
|
79
|
+
let seqMerger: (seqMList: string[], separator?: string) => string;
|
|
80
|
+
|
|
81
|
+
switch (notation) {
|
|
82
|
+
case bio.NOTATION.FASTA:
|
|
83
|
+
seqMerger = (seqMList: string[]): string => {
|
|
84
|
+
let res: string = '';
|
|
85
|
+
for (let j = 0; j < seqMList.length; j++) {
|
|
86
|
+
const m = seqMList[j];
|
|
87
|
+
res += m.length == 1 ? m : `[${m}]`;
|
|
88
|
+
}
|
|
89
|
+
return res;
|
|
90
|
+
};
|
|
91
|
+
break;
|
|
92
|
+
case bio.NOTATION.SEPARATOR:
|
|
93
|
+
seqMerger = (seqMList: string[], separator?: string): string => {
|
|
94
|
+
return seqMList.join(separator);
|
|
95
|
+
};
|
|
96
|
+
break;
|
|
97
|
+
default:
|
|
98
|
+
throw new Error(`Not supported notation '${notation}'.`);
|
|
99
|
+
}
|
|
100
|
+
|
|
101
|
+
const buildSeq = (alphabet: string[], length: number): string => {
|
|
102
|
+
const seqMList = new Array<string>(length);
|
|
103
|
+
for (let j = 0; j < length; j++) {
|
|
104
|
+
seqMList[j] = alphabet[Math.floor(Math.random() * alphabet.length)];
|
|
105
|
+
}
|
|
106
|
+
return seqMerger(seqMList, separator);
|
|
107
|
+
};
|
|
108
|
+
|
|
109
|
+
const seqList: string[] = Array(count);
|
|
110
|
+
for (let i = 0; i < count; i++) {
|
|
111
|
+
seqList[i] = buildSeq(alphabet, length);
|
|
112
|
+
}
|
|
113
|
+
|
|
114
|
+
return DG.Column.fromStrings('seq', seqList);
|
|
115
|
+
}
|
|
116
|
+
|
|
117
|
+
type TgtType = { semType: string, notation: bio.NOTATION, alphabet: bio.ALPHABET, separator?: string };
|
|
118
|
+
|
|
119
|
+
function testDetector(funcCall: DG.FuncCall): DG.Column {
|
|
120
|
+
//const semType: string = await grok.functions.call('Bio:detectMacromolecule', {col: col});
|
|
121
|
+
funcCall.callSync();
|
|
122
|
+
const semType = funcCall.getOutputParamValue();
|
|
123
|
+
|
|
124
|
+
const col: DG.Column = funcCall.inputs.col;
|
|
125
|
+
if (semType) col.semType = semType;
|
|
126
|
+
return col;
|
|
127
|
+
}
|
|
128
|
+
|
|
129
|
+
function checkDetectorRes(col: DG.Column, tgt: TgtType): void {
|
|
130
|
+
const uh = new UnitsHandler(col);
|
|
131
|
+
expect(col.semType, tgt.semType);
|
|
132
|
+
expect(uh.notation, tgt.notation);
|
|
133
|
+
expect(uh.alphabet, tgt.alphabet);
|
|
134
|
+
expect(uh.separator, tgt.separator);
|
|
135
|
+
}
|
|
136
|
+
|
|
137
|
+
});
|
|
138
|
+
|
|
139
|
+
|
|
140
|
+
/** Returns ET [ms] of test() */
|
|
141
|
+
async function benchmark<TData, TRes>(
|
|
142
|
+
maxET: number, prepare: () => TData, test: (data: TData) => Promise<TRes>, check: (res: TRes) => void
|
|
143
|
+
): Promise<number> {
|
|
144
|
+
const data: TData = prepare();
|
|
145
|
+
|
|
146
|
+
const t1: number = Date.now();
|
|
147
|
+
// console.profile();
|
|
148
|
+
const res: TRes = await test(data);
|
|
149
|
+
//console.profileEnd();
|
|
150
|
+
const t2: number = Date.now();
|
|
151
|
+
|
|
152
|
+
check(res);
|
|
153
|
+
|
|
154
|
+
const resET: number = t2 - t1;
|
|
155
|
+
if (resET > maxET) {
|
|
156
|
+
const errMsg = `ET ${resET} ms is more than max allowed ${maxET} ms.`;
|
|
157
|
+
console.error(errMsg);
|
|
158
|
+
throw new Error(errMsg);
|
|
159
|
+
} else {
|
|
160
|
+
console.log(`ET ${resET} ms is OK.`);
|
|
161
|
+
}
|
|
162
|
+
|
|
163
|
+
return resET;
|
|
164
|
+
}
|
|
165
|
+
|
|
@@ -130,24 +130,24 @@ MWRSWY-CKHP
|
|
|
130
130
|
}
|
|
131
131
|
|
|
132
132
|
const samples: { [key: string]: string } = {
|
|
133
|
-
|
|
134
|
-
|
|
135
|
-
|
|
136
|
-
|
|
137
|
-
|
|
138
|
-
|
|
139
|
-
|
|
140
|
-
|
|
141
|
-
|
|
142
|
-
|
|
143
|
-
|
|
144
|
-
|
|
145
|
-
|
|
146
|
-
|
|
147
|
-
|
|
148
|
-
|
|
149
|
-
|
|
150
|
-
|
|
133
|
+
[Samples.fastaFasta]: 'System:AppData/Bio/data/sample_FASTA.fasta',
|
|
134
|
+
[Samples.fastaPtCsv]: 'System:AppData/Bio/data/sample_FASTA_PT.csv',
|
|
135
|
+
[Samples.msaComplex]: 'System:AppData/Bio/samples/sample_MSA.csv',
|
|
136
|
+
[Samples.fastaCsv]: 'System:AppData/Bio/samples/sample_FASTA.csv',
|
|
137
|
+
[Samples.helmCsv]: 'System:AppData/Bio/samples/sample_HELM.csv',
|
|
138
|
+
[Samples.peptidesComplex]: 'System:AppData/Bio/tests/peptides_complex_msa.csv',
|
|
139
|
+
[Samples.peptidesSimple]: 'System:AppData/Bio/tests/peptides_simple_msa.csv',
|
|
140
|
+
[Samples.testDemogCsv]: 'System:AppData/Bio/tests/testDemog.csv',
|
|
141
|
+
[Samples.testHelmCsv]: 'System:AppData/Bio/tests/testHelm.csv',
|
|
142
|
+
[Samples.testIdCsv]: 'System:AppData/Bio/tests/testId.csv',
|
|
143
|
+
[Samples.testSmilesCsv]: 'System:AppData/Bio/tests/testSmiles.csv',
|
|
144
|
+
[Samples.testSmiles2Csv]: 'System:AppData/Bio/tests/testSmiles2.csv',
|
|
145
|
+
[Samples.testActivityCliffsCsv]: 'System:AppData/Bio/tests/testActivityCliffs.csv', // smiles
|
|
146
|
+
[Samples.testCerealCsv]: 'System:AppData/Bio/tests/testCereal.csv',
|
|
147
|
+
[Samples.testSpgi100]: 'System:AppData/Bio/tests/testSpgi100.csv',
|
|
148
|
+
[Samples.testUnichemSources]: 'System:AppData/Bio/tests/testUnichemSources.csv',
|
|
149
|
+
[Samples.testDmvOffices]: 'System:AppData/Bio/tests/testDmvOffices.csv',
|
|
150
|
+
[Samples.testAlertCollection]: 'System:AppData/Bio/tests/testAlertCollection.csv',
|
|
151
151
|
};
|
|
152
152
|
|
|
153
153
|
const _samplesDfs: { [key: string]: Promise<DG.DataFrame> } = {};
|
|
@@ -7,14 +7,14 @@ import {after, before, category, delay, expect, test} from '@datagrok-libraries/
|
|
|
7
7
|
import {importFasta, multipleSequenceAlignmentAny} from '../package';
|
|
8
8
|
import {convertDo} from '../utils/convert';
|
|
9
9
|
import {SEM_TYPES, TAGS} from '../utils/constants';
|
|
10
|
-
import {generateLongSequence, generateManySequences, performanceTest} from './
|
|
10
|
+
import {generateLongSequence, generateManySequences, performanceTest} from './utils/sequences-generators';
|
|
11
|
+
import {errorToConsole} from '@datagrok-libraries/utils/src/to-console';
|
|
11
12
|
|
|
12
13
|
category('renderers', () => {
|
|
13
14
|
let tvList: DG.TableView[];
|
|
14
15
|
let dfList: DG.DataFrame[];
|
|
15
16
|
|
|
16
17
|
before(async () => {
|
|
17
|
-
await grok.functions.call('Bio:initBio');
|
|
18
18
|
tvList = [];
|
|
19
19
|
dfList = [];
|
|
20
20
|
});
|
|
@@ -6,6 +6,7 @@ import * as bio from '@datagrok-libraries/bio';
|
|
|
6
6
|
import {after, before, category, test, expect, expectArray, expectObject} from '@datagrok-libraries/utils/src/test';
|
|
7
7
|
import * as C from '../utils/constants';
|
|
8
8
|
import {splitToMonomers, _package, getHelmMonomers} from '../package';
|
|
9
|
+
import {errorToConsole} from '@datagrok-libraries/utils/src/to-console';
|
|
9
10
|
|
|
10
11
|
|
|
11
12
|
category('splitters', () => {
|
|
@@ -13,7 +14,6 @@ category('splitters', () => {
|
|
|
13
14
|
let dfList: DG.DataFrame[];
|
|
14
15
|
|
|
15
16
|
before(async () => {
|
|
16
|
-
await grok.functions.call('Bio:initBio');
|
|
17
17
|
tvList = [];
|
|
18
18
|
dfList = [];
|
|
19
19
|
});
|
|
File without changes
|