@datagrok/bio 2.4.45 → 2.4.47
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/79.js +2 -0
- package/dist/79.js.map +1 -0
- package/dist/package-test.js +1 -1
- package/dist/package-test.js.map +1 -1
- package/dist/package.js +1 -1
- package/dist/package.js.map +1 -1
- package/package.json +10 -2
- package/scripts/sequence_generator.py +8 -0
- package/src/package-types.ts +40 -0
- package/src/package.ts +34 -6
- package/src/tests/WebLogo-positions-test.ts +7 -7
- package/src/tests/msa-tests.ts +0 -3
- package/src/utils/cell-renderer-consts.ts +27 -0
- package/src/utils/cell-renderer.ts +7 -6
- package/src/viewers/web-logo-viewer.ts +382 -273
- package/src/widgets/package-settings-editor-widget.ts +28 -0
- package/src/widgets/representations.ts +18 -3
- /package/files/{libraries → tests/libraries}/broken-lib.sdf +0 -0
- /package/files/{libraries → tests/libraries}/group1/mock-lib-3.json +0 -0
- /package/files/{libraries → tests/libraries}/mock-lib-2.json +0 -0
package/package.json
CHANGED
|
@@ -5,16 +5,24 @@
|
|
|
5
5
|
"name": "Leonid Stolbov",
|
|
6
6
|
"email": "lstolbov@datagrok.ai"
|
|
7
7
|
},
|
|
8
|
-
"version": "2.4.
|
|
8
|
+
"version": "2.4.47",
|
|
9
9
|
"description": "Bioinformatics support (import/export of sequences, conversion, visualization, analysis). [See more](https://github.com/datagrok-ai/public/blob/master/packages/Bio/README.md) for details.",
|
|
10
10
|
"repository": {
|
|
11
11
|
"type": "git",
|
|
12
12
|
"url": "https://github.com/datagrok-ai/public.git",
|
|
13
13
|
"directory": "packages/Bio"
|
|
14
14
|
},
|
|
15
|
+
"properties": [
|
|
16
|
+
{
|
|
17
|
+
"name": "MaxMonomerLength",
|
|
18
|
+
"propertyType": "int",
|
|
19
|
+
"defaultValue": 3,
|
|
20
|
+
"nullable": false
|
|
21
|
+
}
|
|
22
|
+
],
|
|
15
23
|
"dependencies": {
|
|
16
24
|
"@biowasm/aioli": "^3.1.0",
|
|
17
|
-
"@datagrok-libraries/bio": "^5.32.
|
|
25
|
+
"@datagrok-libraries/bio": "^5.32.5",
|
|
18
26
|
"@datagrok-libraries/chem-meta": "^1.0.1",
|
|
19
27
|
"@datagrok-libraries/ml": "^6.3.39",
|
|
20
28
|
"@datagrok-libraries/tutorials": "^1.3.2",
|
|
@@ -16,6 +16,14 @@
|
|
|
16
16
|
# input: double fasta_separator = '' [Separator for a FASTA notation]
|
|
17
17
|
# output: dataframe sequences
|
|
18
18
|
|
|
19
|
+
"""
|
|
20
|
+
The most simple options set running from command line
|
|
21
|
+
python sequence_generator.py -c 4 -s 50 > output_file.tsv
|
|
22
|
+
Basic options:
|
|
23
|
+
-с number of clusters
|
|
24
|
+
-s cluster size (number of sequences per cluster)
|
|
25
|
+
"""
|
|
26
|
+
|
|
19
27
|
import random
|
|
20
28
|
import argparse
|
|
21
29
|
import sys
|
|
@@ -0,0 +1,40 @@
|
|
|
1
|
+
import * as grok from 'datagrok-api/grok';
|
|
2
|
+
import * as DG from 'datagrok-api/dg';
|
|
3
|
+
import * as ui from 'datagrok-api/ui';
|
|
4
|
+
|
|
5
|
+
import {Observable, Subject} from 'rxjs';
|
|
6
|
+
import {ObjectPropertyBag} from 'datagrok-api/dg';
|
|
7
|
+
|
|
8
|
+
/** Names of package properties/settings declared in properties section of {@link './package.json'} */
|
|
9
|
+
export const enum BioPackagePropertiesNames {
|
|
10
|
+
MaxMonomerLength = 'MaxMonomerLength',
|
|
11
|
+
}
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
export class BioPackageProperties extends Map<string, any> {
|
|
15
|
+
|
|
16
|
+
private _onPropertyChanged: Subject<string> = new Subject<string>();
|
|
17
|
+
public get onPropertyChanged(): Observable<string> { return this._onPropertyChanged; }
|
|
18
|
+
|
|
19
|
+
/** Monomer name maximum length displayed in short mode. */
|
|
20
|
+
public get maxMonomerLength(): number {
|
|
21
|
+
return super.get(BioPackagePropertiesNames.MaxMonomerLength) as unknown as number;
|
|
22
|
+
}
|
|
23
|
+
|
|
24
|
+
public set maxMonomerLength(value: number) {
|
|
25
|
+
super.set(BioPackagePropertiesNames.MaxMonomerLength, value as unknown as object);
|
|
26
|
+
this._onPropertyChanged.next(BioPackagePropertiesNames.MaxMonomerLength);
|
|
27
|
+
}
|
|
28
|
+
|
|
29
|
+
constructor(source: any) {
|
|
30
|
+
super(Object.entries(source));
|
|
31
|
+
}
|
|
32
|
+
}
|
|
33
|
+
|
|
34
|
+
export class BioPackage extends DG.Package {
|
|
35
|
+
private _properties: BioPackageProperties;
|
|
36
|
+
/** Package properties/settings declared in properties section of {@link './package.json'} */
|
|
37
|
+
public get properties(): BioPackageProperties { return this._properties; };
|
|
38
|
+
|
|
39
|
+
public set properties(value: BioPackageProperties) { this._properties = value; }
|
|
40
|
+
}
|
package/src/package.ts
CHANGED
|
@@ -14,7 +14,7 @@ import {
|
|
|
14
14
|
createLinesGrid, createPropPanelElement, createTooltipElement, getChemSimilaritiesMatrix,
|
|
15
15
|
} from './analysis/sequence-activity-cliffs';
|
|
16
16
|
import {convert} from './utils/convert';
|
|
17
|
-
import {
|
|
17
|
+
import {getMacromoleculeColumnPropertyPanel} from './widgets/representations';
|
|
18
18
|
import {_toAtomicLevel} from '@datagrok-libraries/bio/src/monomer-works/to-atomic-level';
|
|
19
19
|
import {FastaFileHandler} from '@datagrok-libraries/bio/src/utils/fasta-handler';
|
|
20
20
|
import {removeEmptyStringRows} from '@datagrok-libraries/utils/src/dataframe-utils';
|
|
@@ -53,8 +53,12 @@ import {WebLogoApp} from './apps/web-logo-app';
|
|
|
53
53
|
import {SplitToMonomersFunctionEditor} from './function-edtiors/split-to-monomers-editor';
|
|
54
54
|
import {splitToMonomersUI} from './utils/split-to-monomers';
|
|
55
55
|
import {MonomerCellRenderer} from './utils/monomer-cell-renderer';
|
|
56
|
+
import {BioPackage, BioPackageProperties} from './package-types';
|
|
57
|
+
import {RDModule} from '@datagrok-libraries/chem-meta/src/rdkit-api';
|
|
58
|
+
import {ObjectPropertyBag} from 'datagrok-api/dg';
|
|
59
|
+
import {PackageSettingsEditorWidget} from './widgets/package-settings-editor-widget';
|
|
56
60
|
|
|
57
|
-
export const _package = new
|
|
61
|
+
export const _package = new BioPackage();
|
|
58
62
|
|
|
59
63
|
// /** Avoid reassinging {@link monomerLib} because consumers subscribe to {@link IMonomerLib.onChanged} event */
|
|
60
64
|
// let monomerLib: MonomerLib | null = null;
|
|
@@ -82,11 +86,20 @@ export class SeqPaletteCustom implements SeqPalette {
|
|
|
82
86
|
|
|
83
87
|
//tags: init
|
|
84
88
|
export async function initBio() {
|
|
85
|
-
|
|
89
|
+
let module: RDModule;
|
|
90
|
+
await Promise.all([
|
|
91
|
+
(async () => { await MonomerLibHelper.instance.loadLibraries(); })(),
|
|
92
|
+
(async () => { module = await grok.functions.call('Chem:getRdKitModule'); })(),
|
|
93
|
+
(async () => {
|
|
94
|
+
const pkgProps = await _package.getProperties();
|
|
95
|
+
const bioPkgProps = new BioPackageProperties(pkgProps);
|
|
96
|
+
_package.properties = bioPkgProps;
|
|
97
|
+
})(),
|
|
98
|
+
]);
|
|
99
|
+
|
|
86
100
|
const monomerLib = MonomerLibHelper.instance.getBioLib();
|
|
87
101
|
const monomers: string[] = [];
|
|
88
102
|
const logPs: number[] = [];
|
|
89
|
-
const module = await grok.functions.call('Chem:getRdKitModule');
|
|
90
103
|
|
|
91
104
|
const series = monomerLib!.getMonomerMolsByPolymerType('PEPTIDE')!;
|
|
92
105
|
Object.keys(series).forEach((symbol) => {
|
|
@@ -173,6 +186,21 @@ export async function libraryPanel(_seqColumn: DG.Column): Promise<DG.Widget> {
|
|
|
173
186
|
return new DG.Widget(ui.divV([inputsForm, ui.div(filesButton)]));
|
|
174
187
|
}
|
|
175
188
|
|
|
189
|
+
// -- Package settings editor --
|
|
190
|
+
|
|
191
|
+
//name: packageSettingsEditor
|
|
192
|
+
//description: The database connection
|
|
193
|
+
//tags: packageSettingsEditor
|
|
194
|
+
//input: object propList
|
|
195
|
+
//output: widget result
|
|
196
|
+
export function packageSettingsEditor(propList: DG.Property[]): DG.Widget {
|
|
197
|
+
const widget = new PackageSettingsEditorWidget(propList);
|
|
198
|
+
widget.init().then(); // Ignore promise returned
|
|
199
|
+
return widget;
|
|
200
|
+
}
|
|
201
|
+
|
|
202
|
+
// -- Cell renderers --
|
|
203
|
+
|
|
176
204
|
//name: fastaSequenceCellRenderer
|
|
177
205
|
//tags: cellRenderer
|
|
178
206
|
//meta.cellType: sequence
|
|
@@ -187,7 +215,7 @@ export function fastaSequenceCellRenderer(): MacromoleculeSequenceCellRenderer {
|
|
|
187
215
|
//tags: panel
|
|
188
216
|
//output: widget result
|
|
189
217
|
export function macroMolColumnPropertyPanel(molColumn: DG.Column): DG.Widget {
|
|
190
|
-
return
|
|
218
|
+
return getMacromoleculeColumnPropertyPanel(molColumn);
|
|
191
219
|
}
|
|
192
220
|
|
|
193
221
|
//name: separatorSequenceCellRenderer
|
|
@@ -743,7 +771,7 @@ export async function webLogoLargeApp(): Promise<void> {
|
|
|
743
771
|
const pi = DG.TaskBarProgressIndicator.create('WebLogo');
|
|
744
772
|
try {
|
|
745
773
|
const app = new WebLogoApp();
|
|
746
|
-
const df: DG.DataFrame = await _package.files.readCsv('data/
|
|
774
|
+
const df: DG.DataFrame = await _package.files.readCsv('data/sample_PT_100000x5.csv');
|
|
747
775
|
await grok.data.detectSemanticTypes(df);
|
|
748
776
|
await app.init(df, 'webLogoLargeApp');
|
|
749
777
|
} finally {
|
|
@@ -61,8 +61,8 @@ ATC-G-TTGC--
|
|
|
61
61
|
|
|
62
62
|
for (let i = 0; i < positions.length; i++) {
|
|
63
63
|
expect(positions[i].name, resAllDf1[i].name);
|
|
64
|
-
for (const
|
|
65
|
-
expect(positions[i].
|
|
64
|
+
for (const m of positions[i].getMonomers())
|
|
65
|
+
expect(positions[i].getFreq(m).count, resAllDf1[i].getFreq(m).count);
|
|
66
66
|
}
|
|
67
67
|
}, {skipReason: 'GROK-13300'});
|
|
68
68
|
|
|
@@ -109,8 +109,8 @@ ATC-G-TTGC--
|
|
|
109
109
|
|
|
110
110
|
for (let i = 0; i < positions.length; i++) {
|
|
111
111
|
expect(positions[i].name, resAllDf1[i].name);
|
|
112
|
-
for (const
|
|
113
|
-
expect(positions[i].
|
|
112
|
+
for (const m of positions[i].getMonomers())
|
|
113
|
+
expect(positions[i].getFreq(m).count, resAllDf1[i].getFreq(m).count);
|
|
114
114
|
}
|
|
115
115
|
}, {skipReason: 'GROK-13300'});
|
|
116
116
|
|
|
@@ -187,10 +187,10 @@ ATC-G-TTGC--
|
|
|
187
187
|
|
|
188
188
|
function expectPositionInfo(actualPos: PI, expectedPos: PI): void {
|
|
189
189
|
expect(actualPos.name, expectedPos.name);
|
|
190
|
-
expectArray(
|
|
191
|
-
for (const key
|
|
190
|
+
expectArray(actualPos.getMonomers(), expectedPos.getMonomers());
|
|
191
|
+
for (const key of actualPos.getMonomers()) {
|
|
192
192
|
//
|
|
193
|
-
expect(actualPos.
|
|
193
|
+
expect(actualPos.getFreq(key).count, expectedPos.getFreq(key).count);
|
|
194
194
|
}
|
|
195
195
|
}
|
|
196
196
|
|
package/src/tests/msa-tests.ts
CHANGED
|
@@ -9,9 +9,6 @@ import {multipleSequenceAlignmentUI} from '../utils/multiple-sequence-alignment-
|
|
|
9
9
|
import {awaitContainerStart} from './utils';
|
|
10
10
|
//import * as grok from 'datagrok-api/grok';
|
|
11
11
|
|
|
12
|
-
export const _package = new DG.Package();
|
|
13
|
-
|
|
14
|
-
|
|
15
12
|
category('MSA', async () => {
|
|
16
13
|
//table = await grok.data.files.openTable('Demo:Files/bio/peptides.csv');
|
|
17
14
|
const fromCsv = `seq
|
|
@@ -0,0 +1,27 @@
|
|
|
1
|
+
import * as grok from 'datagrok-api/grok';
|
|
2
|
+
import * as ui from 'datagrok-api/ui';
|
|
3
|
+
import * as DG from 'datagrok-api/dg';
|
|
4
|
+
|
|
5
|
+
export enum MonomerWidthMode {
|
|
6
|
+
long = 'long',
|
|
7
|
+
short = 'short',
|
|
8
|
+
}
|
|
9
|
+
|
|
10
|
+
export const enum Tags {
|
|
11
|
+
calculated = '.mm.cellRenderer.calculated',
|
|
12
|
+
}
|
|
13
|
+
|
|
14
|
+
export const enum Temps {
|
|
15
|
+
monomerWidth = '.mm.cellRenderer.monomerWidth',
|
|
16
|
+
maxMonomerLength = '.mm.cellRenderer.maxMonomerLength',
|
|
17
|
+
colorCode = '.mm.cellRenderer.colorCode',
|
|
18
|
+
compareWithCurrent = '.mm.cellRenderer.compareWithCurrent',
|
|
19
|
+
highlightDifference = '.mm.cellRenderer.highlightDifference',
|
|
20
|
+
}
|
|
21
|
+
|
|
22
|
+
// export const MacromoleculeCellRendererDefaults = new class {
|
|
23
|
+
// monomerWidth: MonomerWidthMode = MonomerWidthMode.short;
|
|
24
|
+
// maxMonomerLength: number = 3;
|
|
25
|
+
// colorCode: boolean = true;
|
|
26
|
+
// compareWithCurrent: boolean = true;
|
|
27
|
+
// }();
|
|
@@ -15,9 +15,10 @@ import {
|
|
|
15
15
|
import {SeqPalette} from '@datagrok-libraries/bio/src/seq-palettes';
|
|
16
16
|
import {UnknownSeqPalettes} from '@datagrok-libraries/bio/src/unknown';
|
|
17
17
|
import {MonomerWorks} from '@datagrok-libraries/bio/src/monomer-works/monomer-works';
|
|
18
|
+
import {Tags as mmcrTags, Temps as mmcrTemps} from '../utils/cell-renderer-consts';
|
|
18
19
|
|
|
19
|
-
import {_package, getMonomerLibHelper} from '../package';
|
|
20
20
|
import * as C from './constants';
|
|
21
|
+
import {_package} from '../package';
|
|
21
22
|
|
|
22
23
|
const enum tempTAGS {
|
|
23
24
|
referenceSequence = 'reference-sequence',
|
|
@@ -154,19 +155,19 @@ export class MacromoleculeSequenceCellRenderer extends DG.GridCellRenderer {
|
|
|
154
155
|
const referenceSequence: string[] = splitterFunc(
|
|
155
156
|
((tempReferenceSequence != null) && (tempReferenceSequence != '')) ?
|
|
156
157
|
tempReferenceSequence : tempCurrentWord ?? '');
|
|
157
|
-
const monomerWidth: string =
|
|
158
|
+
const monomerWidth: string = tempMonomerWidth ?? 'short';
|
|
158
159
|
|
|
159
160
|
let gapRenderer = 5;
|
|
160
161
|
let maxIndex = 0;
|
|
161
|
-
let maxLengthOfMonomer = 8;
|
|
162
|
+
let maxLengthOfMonomer: number = 8;
|
|
162
163
|
|
|
163
164
|
if (monomerWidth === 'short') {
|
|
164
165
|
gapRenderer = 12;
|
|
165
|
-
maxLengthOfMonomer =
|
|
166
|
+
maxLengthOfMonomer = colTemp[mmcrTemps.maxMonomerLength] ?? _package.properties.maxMonomerLength;
|
|
166
167
|
}
|
|
167
168
|
|
|
168
169
|
let maxLengthWords: any = {};
|
|
169
|
-
if (gridCell.cell.column.getTag(
|
|
170
|
+
if (gridCell.cell.column.getTag(mmcrTags.calculated) !== splitLimit.toString()) {
|
|
170
171
|
let samples = 0;
|
|
171
172
|
while (samples < Math.min(gridCell.cell.column.length, 100)) {
|
|
172
173
|
const column = gridCell.cell.column.get(samples);
|
|
@@ -188,7 +189,7 @@ export class MacromoleculeSequenceCellRenderer extends DG.GridCellRenderer {
|
|
|
188
189
|
colTemp[tempTAGS.bioSumMaxLengthWords] = maxLengthWordSum;
|
|
189
190
|
colTemp[tempTAGS.bioMaxIndex] = maxIndex;
|
|
190
191
|
colTemp[tempTAGS.bioMaxLengthWords] = maxLengthWords;
|
|
191
|
-
gridCell.cell.column.setTag(
|
|
192
|
+
gridCell.cell.column.setTag(mmcrTags.calculated, splitLimit.toString());
|
|
192
193
|
}
|
|
193
194
|
} else {
|
|
194
195
|
maxLengthWords = colTemp[tempTAGS.bioMaxLengthWords];
|