@datagrok/bio 2.13.3 → 2.13.6
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.eslintrc.json +1 -1
- package/CHANGELOG.md +23 -0
- package/detectors.js +52 -38
- package/dist/111.js +2 -0
- package/dist/111.js.map +1 -0
- package/dist/234.js +2 -0
- package/dist/234.js.map +1 -0
- package/dist/242.js +2 -0
- package/dist/242.js.map +1 -0
- package/dist/{286.js → 248.js} +1 -1
- package/dist/248.js.map +1 -0
- package/dist/284.js +3 -0
- package/dist/284.js.map +1 -0
- package/dist/317.js +2 -0
- package/dist/317.js.map +1 -0
- package/dist/589.js +2 -0
- package/dist/589.js.map +1 -0
- package/dist/603.js +2 -0
- package/dist/603.js.map +1 -0
- package/dist/682.js +2 -0
- package/dist/682.js.map +1 -0
- package/dist/705.js +2 -0
- package/dist/705.js.map +1 -0
- package/dist/{590.js → 731.js} +2 -2
- package/dist/731.js.map +1 -0
- package/dist/778.js +2 -0
- package/dist/778.js.map +1 -0
- package/dist/793.js +2 -0
- package/dist/793.js.map +1 -0
- package/dist/950.js +2 -0
- package/dist/950.js.map +1 -0
- package/dist/package-test.js +6 -7
- package/dist/package-test.js.map +1 -1
- package/dist/package.js +6 -7
- package/dist/package.js.map +1 -1
- package/files/cache_config.json +7 -0
- package/package.json +12 -11
- package/src/analysis/sequence-activity-cliffs.ts +1 -1
- package/src/function-edtiors/split-to-monomers-editor.ts +6 -7
- package/src/package-types.ts +14 -7
- package/src/package.ts +6 -6
- package/src/substructure-search/substructure-search.ts +9 -10
- package/src/tests/WebLogo-positions-test.ts +6 -6
- package/src/tests/activity-cliffs-tests.ts +5 -2
- package/src/tests/bio-tests.ts +6 -6
- package/src/tests/checkInputColumn-tests.ts +3 -3
- package/src/tests/converters-test.ts +1 -1
- package/src/tests/detectors-tests.ts +25 -13
- package/src/tests/fasta-export-tests.ts +2 -2
- package/src/tests/mm-distance-tests.ts +1 -1
- package/src/tests/msa-tests.ts +2 -2
- package/src/tests/renderers-test.ts +5 -5
- package/src/tests/scoring.ts +26 -5
- package/src/tests/seq-handler-get-region.ts +4 -4
- package/src/tests/sequence-space-test.ts +1 -1
- package/src/tests/substructure-filters-tests.ts +4 -1
- package/src/tests/to-atomic-level-tests.ts +1 -1
- package/src/utils/cell-renderer.ts +4 -4
- package/src/utils/context-menu.ts +1 -1
- package/src/utils/convert.ts +7 -4
- package/src/utils/get-region-func-editor.ts +11 -16
- package/src/utils/get-region.ts +5 -5
- package/src/utils/macromolecule-column-widget.ts +1 -1
- package/src/utils/monomer-lib/lib-manager.ts +20 -8
- package/src/utils/monomer-lib/library-file-manager/file-manager.ts +28 -24
- package/src/utils/monomer-lib/library-file-manager/file-validator.ts +2 -1
- package/src/utils/monomer-lib/library-file-manager/ui.ts +3 -6
- package/src/utils/multiple-sequence-alignment-ui.ts +10 -11
- package/src/utils/multiple-sequence-alignment.ts +2 -2
- package/src/utils/pepsea.ts +1 -1
- package/src/utils/save-as-fasta.ts +5 -5
- package/src/viewers/vd-regions-viewer.ts +2 -2
- package/src/widgets/bio-substructure-filter.ts +7 -7
- package/src/widgets/package-settings-editor-widget.ts +6 -6
- package/src/widgets/representations.ts +1 -1
- package/tsconfig.json +4 -4
- package/dist/23.js +0 -2
- package/dist/23.js.map +0 -1
- package/dist/231.js +0 -2
- package/dist/231.js.map +0 -1
- package/dist/282.js +0 -2
- package/dist/282.js.map +0 -1
- package/dist/286.js.map +0 -1
- package/dist/356.js +0 -2
- package/dist/356.js.map +0 -1
- package/dist/36.js +0 -2
- package/dist/36.js.map +0 -1
- package/dist/40.js +0 -2
- package/dist/40.js.map +0 -1
- package/dist/413.js +0 -2
- package/dist/413.js.map +0 -1
- package/dist/42.js +0 -2
- package/dist/42.js.map +0 -1
- package/dist/427.js +0 -2
- package/dist/427.js.map +0 -1
- package/dist/545.js +0 -3
- package/dist/545.js.map +0 -1
- package/dist/590.js.map +0 -1
- package/dist/65.js +0 -2
- package/dist/65.js.map +0 -1
- package/dist/796.js +0 -2
- package/dist/796.js.map +0 -1
- package/dist/package-test.js.LICENSE.txt +0 -1
- package/dist/package.js.LICENSE.txt +0 -1
- /package/dist/{545.js.LICENSE.txt → 284.js.LICENSE.txt} +0 -0
package/package.json
CHANGED
|
@@ -5,7 +5,7 @@
|
|
|
5
5
|
"name": "Aleksandr Tanas",
|
|
6
6
|
"email": "atanas@datagrok.ai"
|
|
7
7
|
},
|
|
8
|
-
"version": "2.13.
|
|
8
|
+
"version": "2.13.6",
|
|
9
9
|
"description": "Bioinformatics support (import/export of sequences, conversion, visualization, analysis). [See more](https://github.com/datagrok-ai/public/blob/master/packages/Bio/README.md) for details.",
|
|
10
10
|
"repository": {
|
|
11
11
|
"type": "git",
|
|
@@ -37,18 +37,18 @@
|
|
|
37
37
|
],
|
|
38
38
|
"dependencies": {
|
|
39
39
|
"@biowasm/aioli": "^3.1.0",
|
|
40
|
-
"@datagrok-libraries/bio": "^5.42.
|
|
40
|
+
"@datagrok-libraries/bio": "^5.42.6",
|
|
41
41
|
"@datagrok-libraries/chem-meta": "^1.2.5",
|
|
42
42
|
"@datagrok-libraries/math": "^1.1.5",
|
|
43
43
|
"@datagrok-libraries/ml": "^6.6.12",
|
|
44
44
|
"@datagrok-libraries/tutorials": "^1.3.12",
|
|
45
|
-
"@datagrok-libraries/utils": "^4.2.
|
|
45
|
+
"@datagrok-libraries/utils": "^4.2.12",
|
|
46
46
|
"@webgpu/types": "^0.1.40",
|
|
47
47
|
"ajv": "^8.12.0",
|
|
48
48
|
"ajv-errors": "^3.0.0",
|
|
49
49
|
"cash-dom": "^8.0.0",
|
|
50
50
|
"css-loader": "^6.7.3",
|
|
51
|
-
"datagrok-api": "^1.
|
|
51
|
+
"datagrok-api": "^1.20.0",
|
|
52
52
|
"dayjs": "^1.11.4",
|
|
53
53
|
"fastest-levenshtein": "^1.0.16",
|
|
54
54
|
"openchemlib": "^7.2.3",
|
|
@@ -58,8 +58,8 @@
|
|
|
58
58
|
"wu": "latest"
|
|
59
59
|
},
|
|
60
60
|
"devDependencies": {
|
|
61
|
-
"@datagrok-libraries/helm-web-editor": "^1.1.
|
|
62
|
-
"@datagrok-libraries/js-draw-lite": "^0.0.
|
|
61
|
+
"@datagrok-libraries/helm-web-editor": "^1.1.6",
|
|
62
|
+
"@datagrok-libraries/js-draw-lite": "^0.0.4",
|
|
63
63
|
"@datagrok/chem": "^1.9.2",
|
|
64
64
|
"@datagrok/dendrogram": "^1.2.29",
|
|
65
65
|
"@datagrok/helm": "^2.2.1",
|
|
@@ -70,12 +70,13 @@
|
|
|
70
70
|
"datagrok-tools": "latest",
|
|
71
71
|
"eslint": "latest",
|
|
72
72
|
"eslint-config-google": "latest",
|
|
73
|
-
"
|
|
74
|
-
"
|
|
75
|
-
"
|
|
76
|
-
"
|
|
73
|
+
"eslint-plugin-rxjs": "latest",
|
|
74
|
+
"source-map-loader": "latest",
|
|
75
|
+
"ts-loader": "^9.5.1",
|
|
76
|
+
"typescript": "^5.5.3",
|
|
77
|
+
"webpack": "^5.92.1",
|
|
77
78
|
"webpack-bundle-analyzer": "latest",
|
|
78
|
-
"webpack-cli": "^
|
|
79
|
+
"webpack-cli": "^5.1.4"
|
|
79
80
|
},
|
|
80
81
|
"scripts": {
|
|
81
82
|
"link-api": "npm link datagrok-api",
|
|
@@ -164,7 +164,7 @@ export function createLinesGrid(df: DG.DataFrame, colNames: string[]): DG.Grid {
|
|
|
164
164
|
const seqDiffCol = DG.Column.string('seq_diff', df.rowCount)
|
|
165
165
|
.init((i) => `${df.get(colNames[0], i)}#${df.get(colNames[1], i)}`);
|
|
166
166
|
seqDiffCol.semType = 'MacromoleculeDifference';
|
|
167
|
-
seqDiffCol.
|
|
167
|
+
seqDiffCol.meta.units = df.col(colNames[0])!.meta.units;
|
|
168
168
|
seqDiffCol.setTag(bioTAGS.separator, df.col(colNames[0])!.getTag(bioTAGS.separator));
|
|
169
169
|
df.columns.add(seqDiffCol);
|
|
170
170
|
const grid = df.plot.grid();
|
|
@@ -20,14 +20,13 @@ export class SplitToMonomersFunctionEditor {
|
|
|
20
20
|
}
|
|
21
21
|
|
|
22
22
|
constructor() {
|
|
23
|
-
this.tableInput = ui.
|
|
23
|
+
this.tableInput = ui.input.table('Table', {value: grok.shell.tv.dataFrame, onValueChanged: () => {
|
|
24
24
|
this.onTableInputChanged();
|
|
25
|
-
});
|
|
25
|
+
}});
|
|
26
26
|
//TODO: remove when the new version of datagrok-api is available
|
|
27
27
|
const seqColValue = this.tableInput.value!.columns.bySemType(DG.SEMTYPE.MACROMOLECULE);
|
|
28
|
-
|
|
29
|
-
|
|
30
|
-
this.seqColInput = ui.columnInput('Sequence', this.tableInput.value!, seqColValue, null, seqColOptions);
|
|
28
|
+
this.seqColInput = ui.input.column('Sequence', {table: this.tableInput.value!, value: seqColValue,
|
|
29
|
+
filter: (col: DG.Column) => col.semType === DG.SEMTYPE.MACROMOLECULE});
|
|
31
30
|
|
|
32
31
|
this.funcParamsDiv = ui.inputs([
|
|
33
32
|
this.tableInput,
|
|
@@ -36,7 +35,7 @@ export class SplitToMonomersFunctionEditor {
|
|
|
36
35
|
}
|
|
37
36
|
|
|
38
37
|
onTableInputChanged(): void {
|
|
39
|
-
this.seqColInput = ui.
|
|
40
|
-
this.tableInput.value!.columns.bySemType(DG.SEMTYPE.MACROMOLECULE));
|
|
38
|
+
this.seqColInput = ui.input.column('Sequence', {table: this.tableInput.value!,
|
|
39
|
+
value: this.tableInput.value!.columns.bySemType(DG.SEMTYPE.MACROMOLECULE)});
|
|
41
40
|
}
|
|
42
41
|
}
|
package/src/package-types.ts
CHANGED
|
@@ -3,8 +3,9 @@ import * as DG from 'datagrok-api/dg';
|
|
|
3
3
|
import * as ui from 'datagrok-api/ui';
|
|
4
4
|
|
|
5
5
|
import {Observable, Subject} from 'rxjs';
|
|
6
|
+
|
|
6
7
|
import {errInfo} from '@datagrok-libraries/bio/src/utils/err-info';
|
|
7
|
-
import {
|
|
8
|
+
import {LoggerWrapper} from '@datagrok-libraries/bio/src/utils/logger';
|
|
8
9
|
|
|
9
10
|
/** Names of package properties/settings declared in properties section of {@link './package.json'} */
|
|
10
11
|
export const enum BioPackagePropertiesNames {
|
|
@@ -20,31 +21,31 @@ export class BioPackageProperties extends Map<string, any> {
|
|
|
20
21
|
public get onPropertyChanged(): Observable<string> { return this._onPropertyChanged; }
|
|
21
22
|
|
|
22
23
|
/** Monomer symbol maximum length displayed, null for unlimited. */
|
|
23
|
-
public get
|
|
24
|
+
public get maxMonomerLength(): number | null {
|
|
24
25
|
const vs = super.get(BioPackagePropertiesNames.MaxMonomerLength);
|
|
25
26
|
return vs === 'long' ? null : parseInt(vs);
|
|
26
27
|
}
|
|
27
28
|
|
|
28
|
-
public set
|
|
29
|
+
public set maxMonomerLength(value: number | null) {
|
|
29
30
|
const vs = value === null ? 'long' : value.toString();
|
|
30
31
|
super.set(BioPackagePropertiesNames.MaxMonomerLength, vs);
|
|
31
32
|
this._onPropertyChanged.next(BioPackagePropertiesNames.MaxMonomerLength);
|
|
32
33
|
}
|
|
33
34
|
|
|
34
|
-
public get
|
|
35
|
+
public get tooltipWebLogo(): boolean {
|
|
35
36
|
return super.get(BioPackagePropertiesNames.TooltipWebLogo) as boolean;
|
|
36
37
|
}
|
|
37
38
|
|
|
38
|
-
public set
|
|
39
|
+
public set tooltipWebLogo(value: boolean) {
|
|
39
40
|
super.set(BioPackagePropertiesNames.TooltipWebLogo, value);
|
|
40
41
|
this._onPropertyChanged.next(BioPackagePropertiesNames.TooltipWebLogo);
|
|
41
42
|
}
|
|
42
43
|
|
|
43
|
-
public get
|
|
44
|
+
public get defaultSeparator(): string {
|
|
44
45
|
return super.get(BioPackagePropertiesNames.DefaultSeparator) as string;
|
|
45
46
|
}
|
|
46
47
|
|
|
47
|
-
public set
|
|
48
|
+
public set defaultSeparator(value: string) {
|
|
48
49
|
if (value.length !== 1) throw new Error('The separator must be of length one.');
|
|
49
50
|
super.set(BioPackagePropertiesNames.DefaultSeparator, value);
|
|
50
51
|
this._onPropertyChanged.next(BioPackagePropertiesNames.DefaultSeparator);
|
|
@@ -66,6 +67,12 @@ export class BioPackage extends DG.Package {
|
|
|
66
67
|
|
|
67
68
|
public get initialized(): boolean { return this._initialized; }
|
|
68
69
|
|
|
70
|
+
constructor(opts: { debug: boolean } = {debug: false}) {
|
|
71
|
+
super();
|
|
72
|
+
// @ts-ignore
|
|
73
|
+
super._logger = new LoggerWrapper(super.logger, opts.debug);
|
|
74
|
+
}
|
|
75
|
+
|
|
69
76
|
public completeInit(): void { this._initialized = true; }
|
|
70
77
|
|
|
71
78
|
handleErrorUI(err: any) {
|
package/src/package.ts
CHANGED
|
@@ -447,7 +447,7 @@ export async function activityCliffs(table: DG.DataFrame, molecules: DG.Column<s
|
|
|
447
447
|
return;
|
|
448
448
|
const axesNames = getEmbeddingColsNames(table);
|
|
449
449
|
const tags = {
|
|
450
|
-
'units': molecules.
|
|
450
|
+
'units': molecules.meta.units!,
|
|
451
451
|
'aligned': molecules.getTag(bioTAGS.aligned),
|
|
452
452
|
'separator': molecules.getTag(bioTAGS.separator),
|
|
453
453
|
'alphabet': molecules.getTag(bioTAGS.alphabet),
|
|
@@ -665,8 +665,8 @@ export async function compositionAnalysis(): Promise<void> {
|
|
|
665
665
|
} else if (colList.length > 1) {
|
|
666
666
|
const colListNames: string [] = colList.map((col) => col.name);
|
|
667
667
|
const selectedCol = colList.find((c) => { return SeqHandler.forColumn(c).isMsa(); });
|
|
668
|
-
const colInput: DG.InputBase = ui.
|
|
669
|
-
'Column', selectedCol ? selectedCol.name : colListNames[0], colListNames);
|
|
668
|
+
const colInput: DG.InputBase = ui.input.choice(
|
|
669
|
+
'Column', {value: selectedCol ? selectedCol.name : colListNames[0], items: colListNames});
|
|
670
670
|
ui.dialog({
|
|
671
671
|
title: 'Composition Analysis',
|
|
672
672
|
helpUrl: 'https://datagrok.ai/help/datagrok/solutions/domains/bio/#sequence-composition',
|
|
@@ -750,13 +750,13 @@ export async function testDetectMacromolecule(path: string): Promise<DG.DataFram
|
|
|
750
750
|
const semType = await grok.functions.call('Bio:detectMacromolecule', {col: col});
|
|
751
751
|
if (semType === DG.SEMTYPE.MACROMOLECULE) {
|
|
752
752
|
//console.warn(`file: ${fileInfo.path}, column: ${col.name}, ` +
|
|
753
|
-
// `semType: ${semType}, units: ${col.
|
|
753
|
+
// `semType: ${semType}, units: ${col.meta.units}`);
|
|
754
754
|
// console.warn('file: "' + fileInfo.path + '", semType: "' + semType + '", ' +
|
|
755
|
-
// 'units: "' + col.
|
|
755
|
+
// 'units: "' + col.meta.units + '"');
|
|
756
756
|
|
|
757
757
|
res.push({
|
|
758
758
|
file: fileInfo.path, result: 'detected', column: col.name,
|
|
759
|
-
message: `units: ${col.
|
|
759
|
+
message: `units: ${col.meta.units}`,
|
|
760
760
|
});
|
|
761
761
|
}
|
|
762
762
|
}
|
|
@@ -54,7 +54,7 @@ export class SubstructureSearchDialog {
|
|
|
54
54
|
}
|
|
55
55
|
|
|
56
56
|
updateNotationDiv(): void {
|
|
57
|
-
this.units = this.col.
|
|
57
|
+
this.units = this.col.meta.units!;
|
|
58
58
|
this.separator = this.col.getTag(bioTAGS.separator);
|
|
59
59
|
const notationDiv = this.dialog.root.getElementsByClassName('notation-text')[0];
|
|
60
60
|
if (notationDiv)
|
|
@@ -63,26 +63,25 @@ export class SubstructureSearchDialog {
|
|
|
63
63
|
|
|
64
64
|
createUI(): void {
|
|
65
65
|
const dataframe = grok.shell.tv.dataFrame;
|
|
66
|
-
|
|
67
|
-
|
|
68
|
-
this.col = column;
|
|
66
|
+
this.columnsInput = ui.input.column('Column', {table: dataframe, value: this.col, onValueChanged: (input) => {
|
|
67
|
+
this.col = input.value;
|
|
69
68
|
this.updateNotationDiv();
|
|
70
69
|
this.updateInputs();
|
|
71
|
-
},
|
|
70
|
+
}, filter: (col: DG.Column) => col.semType === DG.SEMTYPE.MACROMOLECULE});
|
|
72
71
|
|
|
73
|
-
this.substructureInput = ui.
|
|
72
|
+
this.substructureInput = ui.input.string('Substructure', {value: ''});
|
|
74
73
|
|
|
75
74
|
this.editHelmLink = ui.link('Edit helm', () => this.editHelmLinkAction(), undefined, {style: {position: 'relative', left: '95px'}});
|
|
76
75
|
|
|
77
76
|
const df = DG.DataFrame.create(1);
|
|
78
77
|
df.columns.addNewString(SUBSTR_HELM_COL_NAME).init((_i) => '');
|
|
79
78
|
df.col(SUBSTR_HELM_COL_NAME)!.semType = this.col.semType;
|
|
80
|
-
df.col(SUBSTR_HELM_COL_NAME)!.
|
|
79
|
+
df.col(SUBSTR_HELM_COL_NAME)!.meta.units = NOTATION.HELM;
|
|
81
80
|
this.grid = df.plot.grid();
|
|
82
|
-
this.separatorInput = ui.
|
|
81
|
+
this.separatorInput = ui.input.string('Separator', {value: this.separator});
|
|
83
82
|
|
|
84
83
|
this.inputsDiv = ui.div();
|
|
85
|
-
this.units = this.col.
|
|
84
|
+
this.units = this.col.meta.units!;
|
|
86
85
|
this.separator = this.col.getTag(bioTAGS.separator);
|
|
87
86
|
this.updateInputs();
|
|
88
87
|
|
|
@@ -135,7 +134,7 @@ export async function helmSubstructureSearch(substructure: string, col: DG.Colum
|
|
|
135
134
|
await invalidateMols(col, true);
|
|
136
135
|
const substructureCol: DG.Column<string> = DG.Column.string('helm', 1).init((_i) => substructure);
|
|
137
136
|
substructureCol.semType = DG.SEMTYPE.MACROMOLECULE;
|
|
138
|
-
substructureCol.
|
|
137
|
+
substructureCol.meta.units = NOTATION.HELM;
|
|
139
138
|
const substructureMolsCol =
|
|
140
139
|
await getMonomericMols(substructureCol, true, col.temp[MONOMERIC_COL_TAGS.MONOMERS_DICT]);
|
|
141
140
|
const matchesCol = await grok.functions.call('Chem:searchSubstructure', {
|
|
@@ -29,7 +29,7 @@ ATC-G-TTGC--
|
|
|
29
29
|
|
|
30
30
|
const seqCol: DG.Column = df.getCol('seq');
|
|
31
31
|
seqCol.semType = DG.SEMTYPE.MACROMOLECULE;
|
|
32
|
-
seqCol.
|
|
32
|
+
seqCol.meta.units = NOTATION.FASTA;
|
|
33
33
|
seqCol.setTag(bioTAGS.alphabet, ALPHABET.DNA);
|
|
34
34
|
seqCol.setTag(bioTAGS.aligned, 'SEQ.MSA');
|
|
35
35
|
|
|
@@ -77,7 +77,7 @@ ATC-G-TTGC--
|
|
|
77
77
|
|
|
78
78
|
const seqCol: DG.Column = df.getCol('seq');
|
|
79
79
|
seqCol.semType = DG.SEMTYPE.MACROMOLECULE;
|
|
80
|
-
seqCol.
|
|
80
|
+
seqCol.meta.units = NOTATION.FASTA;
|
|
81
81
|
seqCol.setTag(bioTAGS.alphabet, ALPHABET.DNA);
|
|
82
82
|
seqCol.setTag(bioTAGS.aligned, 'SEQ');
|
|
83
83
|
|
|
@@ -120,7 +120,7 @@ ATC-G-TTGC--
|
|
|
120
120
|
|
|
121
121
|
const seqCol: DG.Column = df.getCol('seq');
|
|
122
122
|
seqCol.semType = DG.SEMTYPE.MACROMOLECULE;
|
|
123
|
-
seqCol.
|
|
123
|
+
seqCol.meta.units = NOTATION.FASTA;
|
|
124
124
|
seqCol.setTag(bioTAGS.alphabet, ALPHABET.DNA);
|
|
125
125
|
seqCol.setTag(bioTAGS.aligned, 'SEQ.MSA');
|
|
126
126
|
|
|
@@ -191,8 +191,8 @@ ATC-G-TTGC--
|
|
|
191
191
|
test('empty', async () => {
|
|
192
192
|
const df: DG.DataFrame = DG.DataFrame.fromColumns([(() => {
|
|
193
193
|
const col = DG.Column.fromStrings('seq', []);
|
|
194
|
-
col.
|
|
195
|
-
col.
|
|
194
|
+
col.semType = DG.SEMTYPE.MACROMOLECULE;
|
|
195
|
+
col.meta.units = NOTATION.FASTA;
|
|
196
196
|
col.setTag(bioTAGS.alphabet, ALPHABET.DNA);
|
|
197
197
|
return col;
|
|
198
198
|
})()]);
|
|
@@ -222,7 +222,7 @@ function buildDfWithSeqCol(csv: string, notation: NOTATION, alphabet: ALPHABET,
|
|
|
222
222
|
|
|
223
223
|
const seqCol: DG.Column = df.getCol('seq');
|
|
224
224
|
seqCol.semType = DG.SEMTYPE.MACROMOLECULE;
|
|
225
|
-
seqCol.
|
|
225
|
+
seqCol.meta.units = notation;
|
|
226
226
|
seqCol.setTag(bioTAGS.alphabet, alphabet);
|
|
227
227
|
seqCol.setTag(bioTAGS.aligned, aligned);
|
|
228
228
|
|
|
@@ -13,21 +13,24 @@ import {
|
|
|
13
13
|
getUserLibSettings, setUserLibSettings, setUserLibSettingsForTests
|
|
14
14
|
} from '@datagrok-libraries/bio/src/monomer-works/lib-settings';
|
|
15
15
|
import {UserLibSettings} from '@datagrok-libraries/bio/src/monomer-works/types';
|
|
16
|
+
import {DimReductionMethods} from '@datagrok-libraries/ml/src/multi-column-dimensionality-reduction/types';
|
|
17
|
+
import {getHelmHelper, IHelmHelper} from '@datagrok-libraries/bio/src/helm/helm-helper';
|
|
16
18
|
|
|
17
19
|
import {_package} from '../package-test';
|
|
18
|
-
import {DimReductionMethods} from '@datagrok-libraries/ml/src/multi-column-dimensionality-reduction/types';
|
|
19
20
|
|
|
20
21
|
|
|
21
22
|
category('activityCliffs', async () => {
|
|
22
23
|
let viewList: DG.ViewBase[] = [];
|
|
23
24
|
let dfList: DG.DataFrame[] = [];
|
|
24
25
|
|
|
26
|
+
let helmHelper: IHelmHelper;
|
|
25
27
|
let monomerLibHelper: IMonomerLibHelper;
|
|
26
28
|
/** Backup actual user's monomer libraries settings */
|
|
27
29
|
let userLibSettings: UserLibSettings;
|
|
28
30
|
const seqEncodingFunc = DG.Func.find({name: 'macromoleculePreprocessingFunction', package: 'Bio'})[0];
|
|
29
31
|
const helmEncodingFunc = DG.Func.find({name: 'helmPreprocessingFunction', package: 'Bio'})[0];
|
|
30
32
|
before(async () => {
|
|
33
|
+
helmHelper = await getHelmHelper(); // init Helm package
|
|
31
34
|
monomerLibHelper = await getMonomerLibHelper();
|
|
32
35
|
userLibSettings = await getUserLibSettings();
|
|
33
36
|
|
|
@@ -59,7 +62,7 @@ category('activityCliffs', async () => {
|
|
|
59
62
|
|
|
60
63
|
await _testActivityCliffsOpen(actCliffsDf, DimReductionMethods.UMAP,
|
|
61
64
|
'sequence', 'Activity', 90, cliffsNum, MmDistanceFunctionsNames.LEVENSHTEIN, seqEncodingFunc);
|
|
62
|
-
});
|
|
65
|
+
}, {benchmark: true});
|
|
63
66
|
|
|
64
67
|
test('activityCliffsWithEmptyRows', async () => {
|
|
65
68
|
const actCliffsDfWithEmptyRows = await readDataframe('tests/100_3_clustests_empty_vals.csv');
|
package/src/tests/bio-tests.ts
CHANGED
|
@@ -58,7 +58,7 @@ PEPTIDE1{meI}$$$$`;
|
|
|
58
58
|
const df: DG.DataFrame = DG.DataFrame.fromCsv(csv);
|
|
59
59
|
const seqCol: DG.Column = df.getCol('seq')!;
|
|
60
60
|
seqCol.semType = DG.SEMTYPE.MACROMOLECULE;
|
|
61
|
-
seqCol.
|
|
61
|
+
seqCol.meta.units = NOTATION.HELM;
|
|
62
62
|
const stats = getStatsForCol(seqCol, 1, splitterAsHelm);
|
|
63
63
|
|
|
64
64
|
expectObject(stats.freq, {
|
|
@@ -129,7 +129,7 @@ export async function _testGetStats(csvDfN1: string) {
|
|
|
129
129
|
const dfN1: DG.DataFrame = DG.DataFrame.fromCsv(csvDfN1);
|
|
130
130
|
const seqCol: DG.Column = dfN1.col('seq')!;
|
|
131
131
|
seqCol.semType = DG.SEMTYPE.MACROMOLECULE;
|
|
132
|
-
seqCol.
|
|
132
|
+
seqCol.meta.units = NOTATION.FASTA;
|
|
133
133
|
const stats = getStatsForCol(seqCol, 5, splitterAsFasta);
|
|
134
134
|
|
|
135
135
|
expectObject(stats.freq, {
|
|
@@ -159,7 +159,7 @@ export async function _testPickupPaletteN1(csvDfN1: string) {
|
|
|
159
159
|
const df: DG.DataFrame = DG.DataFrame.fromCsv(csvDfN1);
|
|
160
160
|
const col: DG.Column = df.col('seq')!;
|
|
161
161
|
col.semType = DG.SEMTYPE.MACROMOLECULE;
|
|
162
|
-
col.
|
|
162
|
+
col.meta.units = NOTATION.FASTA;
|
|
163
163
|
const cp = pickUpPalette(col);
|
|
164
164
|
|
|
165
165
|
expect(cp instanceof NucleotidesPalettes, true);
|
|
@@ -169,7 +169,7 @@ export async function _testPickupPaletteN1e(csvDfN1e: string) {
|
|
|
169
169
|
const df: DG.DataFrame = DG.DataFrame.fromCsv(csvDfN1e);
|
|
170
170
|
const col: DG.Column = df.col('seq')!;
|
|
171
171
|
col.semType = DG.SEMTYPE.MACROMOLECULE;
|
|
172
|
-
col.
|
|
172
|
+
col.meta.units = NOTATION.FASTA;
|
|
173
173
|
const cp = pickUpPalette(col);
|
|
174
174
|
|
|
175
175
|
expect(cp instanceof NucleotidesPalettes, true);
|
|
@@ -179,7 +179,7 @@ export async function _testPickupPaletteAA1(csvDfAA1: string) {
|
|
|
179
179
|
const df: DG.DataFrame = DG.DataFrame.fromCsv(csvDfAA1);
|
|
180
180
|
const col: DG.Column = df.col('seq')!;
|
|
181
181
|
col.semType = DG.SEMTYPE.MACROMOLECULE;
|
|
182
|
-
col.
|
|
182
|
+
col.meta.units = NOTATION.FASTA;
|
|
183
183
|
const cp = pickUpPalette(col);
|
|
184
184
|
|
|
185
185
|
expect(cp instanceof AminoacidsPalettes, true);
|
|
@@ -189,7 +189,7 @@ export async function _testPickupPaletteX(csvDfX: string) {
|
|
|
189
189
|
const df: DG.DataFrame = DG.DataFrame.fromCsv(csvDfX);
|
|
190
190
|
const col: DG.Column = df.col('seq')!;
|
|
191
191
|
col.semType = DG.SEMTYPE.MACROMOLECULE;
|
|
192
|
-
col.
|
|
192
|
+
col.meta.units = NOTATION.FASTA;
|
|
193
193
|
const cp = pickUpPalette(col);
|
|
194
194
|
|
|
195
195
|
expect(cp instanceof UnknownSeqPalette, true);
|
|
@@ -18,7 +18,7 @@ seq4`;
|
|
|
18
18
|
const df: DG.DataFrame = DG.DataFrame.fromCsv(csv);
|
|
19
19
|
const col: DG.Column = df.getCol('seq');
|
|
20
20
|
col.semType = DG.SEMTYPE.MACROMOLECULE;
|
|
21
|
-
col.
|
|
21
|
+
col.meta.units = NOTATION.FASTA;
|
|
22
22
|
col.setTag(bioTAGS.alphabet, ALPHABET.DNA);
|
|
23
23
|
col.setTag(bioTAGS.aligned, 'SEQ');
|
|
24
24
|
|
|
@@ -33,7 +33,7 @@ seq4`;
|
|
|
33
33
|
const df: DG.DataFrame = DG.DataFrame.fromCsv(csv);
|
|
34
34
|
const col: DG.Column = df.getCol('seq');
|
|
35
35
|
col.semType = DG.SEMTYPE.MACROMOLECULE;
|
|
36
|
-
col.
|
|
36
|
+
col.meta.units = NOTATION.HELM;
|
|
37
37
|
// col.setTag(bio.TAGS.alphabetSize, '11');
|
|
38
38
|
col.setTag(bioTAGS.alphabetIsMultichar, 'true');
|
|
39
39
|
|
|
@@ -48,7 +48,7 @@ seq4`;
|
|
|
48
48
|
const df: DG.DataFrame = DG.DataFrame.fromCsv(csv);
|
|
49
49
|
const col: DG.Column = df.getCol('seq');
|
|
50
50
|
col.semType = DG.SEMTYPE.MACROMOLECULE;
|
|
51
|
-
col.
|
|
51
|
+
col.meta.units = NOTATION.FASTA;
|
|
52
52
|
col.setTag(bioTAGS.alphabet, 'UN');
|
|
53
53
|
col.setTag(bioTAGS.alphabetSize, '11');
|
|
54
54
|
col.setTag(bioTAGS.alphabetIsMultichar, 'true');
|
|
@@ -135,7 +135,7 @@ RNA1{p.r(U)p.r(U)p.r(C)p.r(A)p.r(A)p.r(C)p.r(U)p.r(U)p.r(C)p.r(A)p.r(A)p.r(C)p.p
|
|
|
135
135
|
return function(srcCol: DG.Column): DG.Column {
|
|
136
136
|
const converterSh = SeqHandler.forColumn(srcCol);
|
|
137
137
|
const resCol = converterSh.convert(tgtNotation, tgtSeparator);
|
|
138
|
-
expect(resCol.
|
|
138
|
+
expect(resCol.meta.units, tgtNotation);
|
|
139
139
|
return resCol;
|
|
140
140
|
};
|
|
141
141
|
}
|
|
@@ -99,6 +99,15 @@ C1CCCCC1
|
|
|
99
99
|
CCCCCC`,
|
|
100
100
|
neg: ['col1'],
|
|
101
101
|
},
|
|
102
|
+
'negFastaUnSingleChar': {
|
|
103
|
+
csv: `col1
|
|
104
|
+
Alanine
|
|
105
|
+
Cysteine
|
|
106
|
+
Aspartic acid
|
|
107
|
+
Glutamic acid
|
|
108
|
+
Phenylalanine`,
|
|
109
|
+
neg: ['col1']
|
|
110
|
+
},
|
|
102
111
|
|
|
103
112
|
// Same length
|
|
104
113
|
'fastaMsaSameLength': {
|
|
@@ -122,30 +131,33 @@ YN[Re]VYNR[Ac]WYV
|
|
|
122
131
|
[Me]EYVMPSFW[Me]H`,
|
|
123
132
|
pos: {'seq': new PosCol(NOTATION.FASTA, ALIGNMENT.SEQ_MSA, ALPHABET.UN, 14, true, undefined)},
|
|
124
133
|
},
|
|
134
|
+
'fastaMsaExtManyMinus': {
|
|
135
|
+
csv: `seq
|
|
136
|
+
[D-Tic]-------[D-Tyr_Et][Tyr_ab-dehydroMe][dV][Cys_SEt]N[D-Orn][D-aThr]-[Phe_4Me]
|
|
137
|
+
[Phe_2F]--------[Tyr_ab-dehydroMe][dV][Aca]N[D-Orn][D-aThr]-[Phe_4Me]
|
|
138
|
+
[D-Tic]-[Hcy]QTWQ[Phe_4NH2][D-Tyr_Et][Tyr_ab-dehydroMe][dV][Cys_SEt]----[Phe_4Me]`,
|
|
139
|
+
pos: {'seq': new PosCol(NOTATION.FASTA, ALIGNMENT.SEQ_MSA, ALPHABET.UN, 17, true, undefined)}
|
|
140
|
+
},
|
|
125
141
|
'sepSameLength': {
|
|
126
142
|
csv: `seq
|
|
127
143
|
Ac(1)-A-A-A-A-A-A-A-A-A-A-A-A-A-C(1)-G-NH2
|
|
128
144
|
Ac(1)-A-A-A-A-A-A-A-A-A-A-A-A-A-C(1)-G-NH2
|
|
129
|
-
Ac(1)-A-A-A-A-A-A-A-A-A-A-A-A-A-C(1)-G-NH2`,
|
|
130
|
-
|
|
131
|
-
}
|
|
145
|
+
Ac(1)-A-A-A-A-A-A-A-A-A-A-A-A-A-C(1)-G-NH2`,
|
|
146
|
+
pos: {'seq': new PosCol(NOTATION.SEPARATOR, ALIGNMENT.SEQ_MSA, ALPHABET.UN, 5, true, '-')}
|
|
132
147
|
},
|
|
133
148
|
'sepMsaSameLength': {
|
|
134
149
|
csv: `seq
|
|
135
150
|
Ac(1)-A-A-A-A-A-A-A-A-A-A-A-A-A-C(1)-G-NH2
|
|
136
151
|
Ac(1)-A-A(2)-A-A-A-C(2)-A-A-A-A-C(1)-G-NH2
|
|
137
|
-
Ac(1)-A-A-A-A-A-A-A-A-A-A-A-A-A-C(1)-G-NH2`,
|
|
138
|
-
|
|
139
|
-
}
|
|
152
|
+
Ac(1)-A-A-A-A-A-A-A-A-A-A-A-A-A-C(1)-G-NH2`,
|
|
153
|
+
pos: {'seq': new PosCol(NOTATION.SEPARATOR, ALIGNMENT.SEQ, ALPHABET.UN, 5, true, '-')}
|
|
140
154
|
},
|
|
141
155
|
'helmSameLength': {
|
|
142
156
|
csv: `seq
|
|
143
157
|
PEPTIDE1{Ac(1).A.A.A.A.A.A.A.A.A.A.A.A.A.C(1).G.NH2}$$$$
|
|
144
158
|
PEPTIDE1{Ab(1).Y.V.K.H.P.F.W.R.W.Y.A.A.A.C(1).G.NH2}$$$$
|
|
145
159
|
PEPTIDE1{Ad(1).S.W.Y.C.K.H.P.M.W.A.A.A.A.C(1)-G-NH2}$$$$`,
|
|
146
|
-
pos: {
|
|
147
|
-
'seq': new PosCol(NOTATION.HELM, null, null, 19, undefined, undefined)
|
|
148
|
-
}
|
|
160
|
+
pos: {'seq': new PosCol(NOTATION.HELM, null, null, 19, undefined, undefined)}
|
|
149
161
|
},
|
|
150
162
|
};
|
|
151
163
|
|
|
@@ -493,7 +505,7 @@ export async function _testNegList(list: string[]): Promise<void> {
|
|
|
493
505
|
const col: DG.Column = DG.Column.fromList(DG.TYPE.STRING, 'col1', list);
|
|
494
506
|
const semType: string = await grok.functions.call('Bio:detectMacromolecule', {col: col});
|
|
495
507
|
if (col.semType === DG.SEMTYPE.MACROMOLECULE) {
|
|
496
|
-
const msg = `Negative test detected semType='${col.semType}', units='${col.
|
|
508
|
+
const msg = `Negative test detected semType='${col.semType}', units='${col.meta.units}'.`;
|
|
497
509
|
throw new Error(msg);
|
|
498
510
|
}
|
|
499
511
|
}
|
|
@@ -507,7 +519,7 @@ export async function _testNeg(readDf: DfReaderFunc, colName: string) {
|
|
|
507
519
|
col.semType = semType;
|
|
508
520
|
|
|
509
521
|
if (col.semType === DG.SEMTYPE.MACROMOLECULE) {
|
|
510
|
-
const msg = `Negative test detected semType='${col.semType}', units='${col.
|
|
522
|
+
const msg = `Negative test detected semType='${col.semType}', units='${col.meta.units}'.`;
|
|
511
523
|
throw new Error(msg);
|
|
512
524
|
}
|
|
513
525
|
}
|
|
@@ -522,7 +534,7 @@ export async function _testPosList(list: string[], units: NOTATION,
|
|
|
522
534
|
col.semType = semType;
|
|
523
535
|
|
|
524
536
|
expect(col.semType, DG.SEMTYPE.MACROMOLECULE);
|
|
525
|
-
expect(col.
|
|
537
|
+
expect(col.meta.units, units);
|
|
526
538
|
expect(col.getTag(bioTAGS.aligned), aligned);
|
|
527
539
|
expect(col.getTag(bioTAGS.alphabet), alphabet);
|
|
528
540
|
if (separator)
|
|
@@ -550,7 +562,7 @@ export async function _testPos(
|
|
|
550
562
|
col.semType = semType;
|
|
551
563
|
|
|
552
564
|
expect(col.semType, DG.SEMTYPE.MACROMOLECULE);
|
|
553
|
-
expect(col.
|
|
565
|
+
expect(col.meta.units, units);
|
|
554
566
|
expect(col.getTag(bioTAGS.aligned), aligned);
|
|
555
567
|
expect(col.getTag(bioTAGS.alphabet), alphabet);
|
|
556
568
|
if (separator)
|
|
@@ -91,7 +91,7 @@ MRGGL
|
|
|
91
91
|
const srcSeq: string = wrapData[testKey].src;
|
|
92
92
|
const col = DG.Column.fromStrings('src', [srcSeq]);
|
|
93
93
|
col.semType = DG.SEMTYPE.MACROMOLECULE;
|
|
94
|
-
col.
|
|
94
|
+
col.meta.units = NOTATION.FASTA;
|
|
95
95
|
const sh = SeqHandler.forColumn(col);
|
|
96
96
|
const srcSS = sh.getSplitted(0);
|
|
97
97
|
const wrapRes: string[] = wrapSequence(srcSS, lineWidth);
|
|
@@ -105,7 +105,7 @@ MRGGL
|
|
|
105
105
|
|
|
106
106
|
const seqCol: DG.Column = df.getCol(args.seqCol);
|
|
107
107
|
seqCol.semType = DG.SEMTYPE.MACROMOLECULE;
|
|
108
|
-
seqCol.
|
|
108
|
+
seqCol.meta.units = NOTATION.FASTA;
|
|
109
109
|
const idCols: DG.Column[] = args.idCols.map((colName) => df.getCol(colName));
|
|
110
110
|
|
|
111
111
|
const fastaRes: string = saveAsFastaDo(idCols, seqCol, args.lineWidth);
|
|
@@ -122,7 +122,7 @@ ATCGAATCGA`;
|
|
|
122
122
|
const seq2 = Array(10000).fill('FYWRRY').join('');
|
|
123
123
|
_testDistance(seq1, seq2, df, 0.667);
|
|
124
124
|
} else { _testDistance(prot5, prot6, df, 1.143); }
|
|
125
|
-
});
|
|
125
|
+
}, {benchmark: true});
|
|
126
126
|
});
|
|
127
127
|
|
|
128
128
|
async function _initMacromoleculeColumn(csv: string): Promise<SeqHandler> {
|
package/src/tests/msa-tests.ts
CHANGED
|
@@ -125,14 +125,14 @@ async function _testMSAOnColumn(
|
|
|
125
125
|
const tgtCol = tgtDf.getCol('seq')!;
|
|
126
126
|
const srcCol: DG.Column = srcDf.getCol('seq')!;
|
|
127
127
|
expect(srcCol.semType, DG.SEMTYPE.MACROMOLECULE);
|
|
128
|
-
expect(srcCol.
|
|
128
|
+
expect(srcCol.meta.units, srcNotation);
|
|
129
129
|
if (alphabet)
|
|
130
130
|
expect(srcCol.getTag(bioTAGS.alphabet), alphabet);
|
|
131
131
|
|
|
132
132
|
const msaSeqCol = await multipleSequenceAlignmentUI({col: srcCol, pepsea: {method: pepseaMethod}});
|
|
133
133
|
expect(msaSeqCol.semType, DG.SEMTYPE.MACROMOLECULE);
|
|
134
134
|
expect(msaSeqCol.semType, DG.SEMTYPE.MACROMOLECULE);
|
|
135
|
-
expect(msaSeqCol.
|
|
135
|
+
expect(msaSeqCol.meta.units, tgtNotation);
|
|
136
136
|
expect(msaSeqCol.getTag(bioTAGS.aligned), ALIGNMENT.SEQ_MSA);
|
|
137
137
|
if (alphabet)
|
|
138
138
|
expect(msaSeqCol.getTag(bioTAGS.alphabet), alphabet);
|
|
@@ -103,7 +103,7 @@ category('renderers', () => {
|
|
|
103
103
|
async function _rendererMacromoleculeDifference() {
|
|
104
104
|
const seqDiffCol: DG.Column = DG.Column.fromStrings('SequencesDiff',
|
|
105
105
|
['meI/hHis/Aca/N/T/dK/Thr_PO3H2/Aca#D-Tyr_Et/Tyr_ab-dehydroMe/meN/E/N/dV']);
|
|
106
|
-
seqDiffCol.
|
|
106
|
+
seqDiffCol.meta.units = NOTATION.SEPARATOR;
|
|
107
107
|
seqDiffCol.setTag(bioTAGS.separator, '/');
|
|
108
108
|
seqDiffCol.setTag(bioTAGS.aligned, 'SEQ');
|
|
109
109
|
seqDiffCol.setTag(bioTAGS.alphabet, 'UN');
|
|
@@ -140,10 +140,10 @@ category('renderers', () => {
|
|
|
140
140
|
expect(tv.grid.dataFrame.id, df.id);
|
|
141
141
|
|
|
142
142
|
console.log('Bio: tests/renderers/afterMsa, src before test ' +
|
|
143
|
-
`semType="${srcSeqCol!.semType}", units="${srcSeqCol!.
|
|
143
|
+
`semType="${srcSeqCol!.semType}", units="${srcSeqCol!.meta.units}", ` +
|
|
144
144
|
`cell.renderer="${srcSeqCol!.getTag(DG.TAGS.CELL_RENDERER)}"`);
|
|
145
145
|
expect(srcSeqCol.semType, DG.SEMTYPE.MACROMOLECULE);
|
|
146
|
-
expect(srcSeqCol.
|
|
146
|
+
expect(srcSeqCol.meta.units, NOTATION.FASTA);
|
|
147
147
|
expect(srcSeqCol.getTag(bioTAGS.aligned), ALIGNMENT.SEQ);
|
|
148
148
|
expect(srcSeqCol.getTag(bioTAGS.alphabet), ALPHABET.PT);
|
|
149
149
|
expect(srcSeqCol.getTag(DG.TAGS.CELL_RENDERER), 'sequence');
|
|
@@ -153,7 +153,7 @@ category('renderers', () => {
|
|
|
153
153
|
expect(tv.grid.dataFrame.id, df.id);
|
|
154
154
|
|
|
155
155
|
expect(msaSeqCol.semType, DG.SEMTYPE.MACROMOLECULE);
|
|
156
|
-
expect(msaSeqCol.
|
|
156
|
+
expect(msaSeqCol.meta.units, NOTATION.FASTA);
|
|
157
157
|
expect(msaSeqCol.getTag(bioTAGS.aligned), ALIGNMENT.SEQ_MSA);
|
|
158
158
|
expect(msaSeqCol.getTag(bioTAGS.alphabet), ALPHABET.PT);
|
|
159
159
|
expect(msaSeqCol.getTag(DG.TAGS.CELL_RENDERER), 'sequence');
|
|
@@ -205,7 +205,7 @@ category('renderers', () => {
|
|
|
205
205
|
/**/
|
|
206
206
|
const seqDiffCol: DG.Column = DG.Column.fromStrings('SequencesDiff',
|
|
207
207
|
['meI/hHis/Aca/N/T/dK/Thr_PO3H2/Aca#D-Tyr_Et/Tyr_ab-dehydroMe/meN/E/N/dV']);
|
|
208
|
-
seqDiffCol.
|
|
208
|
+
seqDiffCol.meta.units = NOTATION.SEPARATOR;
|
|
209
209
|
seqDiffCol.setTag(bioTAGS.separator, '/');
|
|
210
210
|
seqDiffCol.setTag(bioTAGS.aligned, 'SEQ');
|
|
211
211
|
seqDiffCol.setTag(bioTAGS.alphabet, 'UN');
|