@datagrok/bio 2.18.3 → 2.19.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +10 -0
- package/dist/455.js +1 -1
- package/dist/455.js.map +1 -1
- package/dist/package-test.js +3 -3
- package/dist/package-test.js.map +1 -1
- package/dist/package.js +2 -2
- package/dist/package.js.map +1 -1
- package/files/monomer-libraries/HELMCoreLibrary.json +18926 -18215
- package/files/tests/to-atomic-level-dna-fasta-output.csv +15077 -15077
- package/files/tests/to-atomic-level-msa-fasta-output.csv +1903 -1903
- package/files/tests/to-atomic-level-msa-separator-output.csv +3236 -3236
- package/files/tests/to-atomic-level-peptides-fasta-output.csv +32262 -32262
- package/files/tests/to-atomic-level-pt-fasta-2.mol +29 -29
- package/package.json +3 -3
- package/src/analysis/sequence-search-base-viewer.ts +1 -1
- package/src/package.ts +22 -29
- package/src/tests/scoring.ts +2 -2
- package/src/tests/seq-handler-get-region-tests.ts +2 -1
- package/src/utils/helm-to-molfile/converter/converter.ts +21 -9
- package/src/utils/helm-to-molfile/converter/simple-polymer.ts +18 -9
- package/src/utils/monomer-lib/monomer-lib-base.ts +4 -4
- package/src/utils/monomer-lib/monomer-manager/monomer-manager.ts +19 -1
- package/src/utils/seq-helper/seq-handler.ts +12 -3
- package/src/utils/seq-helper/seq-helper.ts +69 -33
- package/src/utils/sequence-to-mol.ts +1 -1
- package/src/widgets/composition-analysis-widget.ts +3 -1
- package/test-console-output-1.log +886 -892
- package/test-record-1.mp4 +0 -0
|
@@ -5,41 +5,41 @@ Datagrok macromolecule handler
|
|
|
5
5
|
M V30 BEGIN CTAB
|
|
6
6
|
M V30 COUNTS 17 16 0 0 0
|
|
7
7
|
M V30 BEGIN ATOM
|
|
8
|
-
M V30 1 C
|
|
9
|
-
M V30 2 C
|
|
8
|
+
M V30 1 C 1.2991 -2.25 0.000000 0
|
|
9
|
+
M V30 2 C 1.2991 -0.75 0.000000 0
|
|
10
10
|
M V30 3 N 0 0 0.000000 0
|
|
11
|
-
M V30 4 C
|
|
12
|
-
M V30 5 O
|
|
13
|
-
M V30 6 N
|
|
14
|
-
M V30 7 C
|
|
15
|
-
M V30 8 N
|
|
16
|
-
M V30 9 N
|
|
17
|
-
M V30 10 C
|
|
18
|
-
M V30 11 C
|
|
19
|
-
M V30 12 C
|
|
20
|
-
M V30 13 C
|
|
21
|
-
M V30 14 N
|
|
22
|
-
M V30 15 C
|
|
23
|
-
M V30 16 O
|
|
24
|
-
M V30 17 O
|
|
11
|
+
M V30 4 C 2.5981 0 0.000000 0
|
|
12
|
+
M V30 5 O 3.8971 -0.75 0.000000 0
|
|
13
|
+
M V30 6 N 7.7941 9 0.000000 0
|
|
14
|
+
M V30 7 C 6.4951 8.2501 0.000000 0
|
|
15
|
+
M V30 8 N 5.196 9 0.000000 0
|
|
16
|
+
M V30 9 N 6.4951 6.7501 0.000000 0
|
|
17
|
+
M V30 10 C 5.1961 6 0.000000 0
|
|
18
|
+
M V30 11 C 5.1961 4.5 0.000000 0
|
|
19
|
+
M V30 12 C 3.8971 3.75 0.000000 0
|
|
20
|
+
M V30 13 C 3.8971 2.25 0.000000 0
|
|
21
|
+
M V30 14 N 2.5981 1.5 0.000000 0
|
|
22
|
+
M V30 15 C 5.1962 1.5 0.000000 0
|
|
23
|
+
M V30 16 O 6.4952 2.25 0.000000 0
|
|
24
|
+
M V30 17 O 5.1962 -0.0001 0.000000 0
|
|
25
25
|
M V30 END ATOM
|
|
26
26
|
M V30 BEGIN BOND
|
|
27
|
-
M V30 1 1 2 1 CFG=3
|
|
28
|
-
M V30 2 1 2 3
|
|
27
|
+
M V30 1 1 2 1 CFG=3
|
|
28
|
+
M V30 2 1 2 3
|
|
29
29
|
M V30 3 1 2 4
|
|
30
|
-
M V30 4 2 4 5
|
|
30
|
+
M V30 4 2 4 5
|
|
31
31
|
M V30 5 1 4 14
|
|
32
|
-
M V30 6
|
|
33
|
-
M V30 7
|
|
34
|
-
M V30 8 1 7 9
|
|
35
|
-
M V30 9 1 9 10
|
|
36
|
-
M V30 10 1 10 11
|
|
37
|
-
M V30 11 1 11 12
|
|
38
|
-
M V30 12 1 13 12 CFG=1
|
|
39
|
-
M V30 13 1 13 14
|
|
32
|
+
M V30 6 2 6 7
|
|
33
|
+
M V30 7 1 7 8
|
|
34
|
+
M V30 8 1 7 9
|
|
35
|
+
M V30 9 1 9 10
|
|
36
|
+
M V30 10 1 10 11
|
|
37
|
+
M V30 11 1 11 12
|
|
38
|
+
M V30 12 1 13 12 CFG=1
|
|
39
|
+
M V30 13 1 13 14
|
|
40
40
|
M V30 14 1 13 15
|
|
41
|
-
M V30 15 2 15 16
|
|
41
|
+
M V30 15 2 15 16
|
|
42
42
|
M V30 16 1 15 17
|
|
43
43
|
M V30 END BOND
|
|
44
44
|
M V30 END CTAB
|
|
45
|
-
M END
|
|
45
|
+
M END
|
package/package.json
CHANGED
|
@@ -5,7 +5,7 @@
|
|
|
5
5
|
"name": "Leonid Stolbov",
|
|
6
6
|
"email": "lstolbov@datagrok.ai"
|
|
7
7
|
},
|
|
8
|
-
"version": "2.
|
|
8
|
+
"version": "2.19.0",
|
|
9
9
|
"description": "Bioinformatics support (import/export of sequences, conversion, visualization, analysis). [See more](https://github.com/datagrok-ai/public/blob/master/packages/Bio/README.md) for details.",
|
|
10
10
|
"repository": {
|
|
11
11
|
"type": "git",
|
|
@@ -44,13 +44,13 @@
|
|
|
44
44
|
],
|
|
45
45
|
"dependencies": {
|
|
46
46
|
"@biowasm/aioli": "^3.1.0",
|
|
47
|
-
"@datagrok-libraries/bio": "^5.
|
|
47
|
+
"@datagrok-libraries/bio": "^5.50.1",
|
|
48
48
|
"@datagrok-libraries/chem-meta": "^1.2.7",
|
|
49
49
|
"@datagrok-libraries/math": "^1.2.4",
|
|
50
50
|
"@datagrok-libraries/ml": "^6.7.6",
|
|
51
51
|
"@datagrok-libraries/tutorials": "^1.4.3",
|
|
52
52
|
"@datagrok-libraries/utils": "^4.4.0",
|
|
53
|
-
"datagrok-api": "^1.
|
|
53
|
+
"datagrok-api": "^1.24.0",
|
|
54
54
|
"@webgpu/types": "^0.1.40",
|
|
55
55
|
"ajv": "^8.12.0",
|
|
56
56
|
"ajv-errors": "^3.0.0",
|
|
@@ -70,7 +70,7 @@ export class SequenceSearchBaseViewer extends DG.JsViewer {
|
|
|
70
70
|
this.render();
|
|
71
71
|
}
|
|
72
72
|
|
|
73
|
-
/** For tests */ public computeRequested: boolean;
|
|
73
|
+
/** For tests */ public computeRequested: boolean = false;
|
|
74
74
|
public renderPromise: Promise<void> = Promise.resolve();
|
|
75
75
|
|
|
76
76
|
protected render(computeData = true): void {
|
package/src/package.ts
CHANGED
|
@@ -422,7 +422,7 @@ export function getRegion(
|
|
|
422
422
|
start ?? null, end ?? null, name ?? null);
|
|
423
423
|
}
|
|
424
424
|
|
|
425
|
-
//top-menu: Bio |
|
|
425
|
+
//top-menu: Bio | Calculate | Get Region...
|
|
426
426
|
//name: Get Region Top Menu
|
|
427
427
|
//description: Get sequences for a region specified from a Macromolecule
|
|
428
428
|
//input: dataframe table [Input data table]
|
|
@@ -607,14 +607,13 @@ export async function sequenceSpaceTopMenu(table: DG.DataFrame, molecules: DG.Co
|
|
|
607
607
|
return res;
|
|
608
608
|
}
|
|
609
609
|
|
|
610
|
-
//top-menu: Bio |
|
|
610
|
+
//top-menu: Bio | Transform | To Atomic Level...
|
|
611
611
|
//name: To Atomic Level
|
|
612
612
|
//description: Converts sequences to molblocks
|
|
613
613
|
//input: dataframe table [Input data table]
|
|
614
614
|
//input: column seqCol {caption: Sequence; semType: Macromolecule}
|
|
615
615
|
//input: bool nonlinear =false {caption: Non-linear; description: Slower mode for cycling/branching HELM structures}
|
|
616
616
|
//input: bool highlight =false {caption: Highlight monomers; description: Highlight monomers' substructures of the molecule }
|
|
617
|
-
//output:
|
|
618
617
|
export async function toAtomicLevel(
|
|
619
618
|
table: DG.DataFrame, seqCol: DG.Column, nonlinear: boolean, highlight: boolean = false
|
|
620
619
|
): Promise<void> {
|
|
@@ -630,6 +629,17 @@ export async function toAtomicLevel(
|
|
|
630
629
|
}
|
|
631
630
|
}
|
|
632
631
|
|
|
632
|
+
//name: To Atomic Level...
|
|
633
|
+
//input: column seqCol {semType: Macromolecule}
|
|
634
|
+
//meta.action: to atomic level
|
|
635
|
+
export async function toAtomicLevelAction(seqCol: DG.Column) {
|
|
636
|
+
if (!seqCol?.dataFrame)
|
|
637
|
+
throw new Error('Sequence column is not found or its data frame is not empty');
|
|
638
|
+
const func = DG.Func.find({name: 'toAtomicLevel', package: 'Bio'})[0];
|
|
639
|
+
if (!func) throw new Error('To Atomic level Function not found');
|
|
640
|
+
func.prepare({table: seqCol.dataFrame, seqCol: seqCol}).edit();
|
|
641
|
+
}
|
|
642
|
+
|
|
633
643
|
//top-menu: Bio | Analyze | MSA...
|
|
634
644
|
//name: MSA
|
|
635
645
|
//description: Performs multiple sequence alignment
|
|
@@ -743,38 +753,20 @@ export function importBam(fileContent: string): DG.DataFrame [] {
|
|
|
743
753
|
return [];
|
|
744
754
|
}
|
|
745
755
|
|
|
746
|
-
//top-menu: Bio |
|
|
756
|
+
//top-menu: Bio | Transform | Convert Notation...
|
|
747
757
|
//name: convertDialog
|
|
748
758
|
export function convertDialog() {
|
|
749
759
|
const col: DG.Column<string> | undefined = getMacromoleculeColumns()[0];
|
|
750
760
|
convert(col, _package.seqHelper);
|
|
751
761
|
}
|
|
752
762
|
|
|
753
|
-
//
|
|
754
|
-
//
|
|
755
|
-
//
|
|
756
|
-
|
|
757
|
-
|
|
758
|
-
//input: string separator
|
|
759
|
-
//output: string result
|
|
760
|
-
export async function convertSeqNotation(sequence: string, targetNotation: NOTATION, separator?: string): Promise<string | undefined | null> {
|
|
761
|
-
try {
|
|
762
|
-
const col = DG.Column.fromStrings('sequence', [sequence]);
|
|
763
|
-
const _df = DG.DataFrame.fromColumns([col]);
|
|
764
|
-
const semType = await grok.functions.call('Bio:detectMacromolecule', {col: col});
|
|
765
|
-
if (semType)
|
|
766
|
-
col.semType = semType;
|
|
767
|
-
const converterSh = _package.seqHelper.getSeqHandler(col);
|
|
768
|
-
const newColumn = converterSh.convert(targetNotation, separator);
|
|
769
|
-
return newColumn.get(0);
|
|
770
|
-
} catch (err: any) {
|
|
771
|
-
const [errMsg, errStack] = errInfo(err);
|
|
772
|
-
_package.logger.error(errMsg, undefined, errStack);
|
|
773
|
-
throw err;
|
|
774
|
-
}
|
|
763
|
+
//name: Convert Notation...
|
|
764
|
+
//input: column col {semType: Macromolecule}
|
|
765
|
+
//meta.action: Convert Notation...
|
|
766
|
+
export function convertColumnAction(col: DG.Column) {
|
|
767
|
+
convert(col, _package.seqHelper);
|
|
775
768
|
}
|
|
776
769
|
|
|
777
|
-
|
|
778
770
|
//name: monomerCellRenderer
|
|
779
771
|
//tags: cellRenderer
|
|
780
772
|
//meta.cellType: Monomer
|
|
@@ -835,7 +827,7 @@ export async function testDetectMacromolecule(path: string): Promise<DG.DataFram
|
|
|
835
827
|
return resDf;
|
|
836
828
|
}
|
|
837
829
|
|
|
838
|
-
//top-menu: Bio |
|
|
830
|
+
//top-menu: Bio | Transform | Split to Monomers...
|
|
839
831
|
//name: Split to Monomers
|
|
840
832
|
//input: dataframe table
|
|
841
833
|
//input: column sequence { semType: Macromolecule }
|
|
@@ -979,7 +971,8 @@ export async function manageLibrariesApp(): Promise<DG.View> {
|
|
|
979
971
|
export async function manageLibrariesAppTreeBrowser(treeNode: DG.TreeViewGroup, browseView: DG.BrowseView) {
|
|
980
972
|
const libraries = (await (await MonomerLibManager.getInstance()).getFileManager()).getValidLibraryPaths();
|
|
981
973
|
libraries.forEach((libName) => {
|
|
982
|
-
const
|
|
974
|
+
const nodeName = libName.endsWith('.json') ? libName.substring(0, libName.length - 5) : libName;
|
|
975
|
+
const libNode = treeNode.item(nodeName);
|
|
983
976
|
// eslint-disable-next-line rxjs/no-ignored-subscription, rxjs/no-async-subscribe
|
|
984
977
|
libNode.onSelected.subscribe(async () => {
|
|
985
978
|
const monomerManager = await MonomerManager.getNewInstance();
|
package/src/tests/scoring.ts
CHANGED
|
@@ -21,8 +21,8 @@ category('Scoring', () => {
|
|
|
21
21
|
/* eslint-disable max-len */
|
|
22
22
|
const table = DG.DataFrame.fromCsv(`${sequence},${expectedSimilarity},${expectedIdentity}
|
|
23
23
|
PEPTIDE1{Aca.Orn.gGlu.Pqa.D-His_1Bn.dH.hHis.4Abz.D-Tic.D-Dap.Y.Iva.meS.F.P.F.D-1Nal}$$$$,1.0,1.0
|
|
24
|
-
PEPTIDE1{Iva.Gly_allyl.gGlu.Pqa.D-Dip.dH.hHis.4Abz.D-aHyp.D-Dap.Y.Iva.I.Tyr_26diMe.P.Asu.meC}$$$$,0.
|
|
25
|
-
PEPTIDE1{[1Nal].[1Nal].[1Nal].[1Nal].[1Nal].[1Nal].[1Nal].[1Nal].[1Nal].[1Nal].[1Nal].[1Nal].[1Nal].[1Nal].[1Nal].[1Nal].[1Nal]}$$$$V2.0,0.
|
|
24
|
+
PEPTIDE1{Iva.Gly_allyl.gGlu.Pqa.D-Dip.dH.hHis.4Abz.D-aHyp.D-Dap.Y.Iva.I.Tyr_26diMe.P.Asu.meC}$$$$,0.691,0.53
|
|
25
|
+
PEPTIDE1{[1Nal].[1Nal].[1Nal].[1Nal].[1Nal].[1Nal].[1Nal].[1Nal].[1Nal].[1Nal].[1Nal].[1Nal].[1Nal].[1Nal].[1Nal].[1Nal].[1Nal]}$$$$V2.0,0.37,0.0`
|
|
26
26
|
);
|
|
27
27
|
const seqCol: DG.Column<string> = table.getCol(sequence);
|
|
28
28
|
seqCol.meta.units = NOTATION.HELM;
|
|
@@ -1,3 +1,4 @@
|
|
|
1
|
+
/* eslint-disable max-lines-per-function */
|
|
1
2
|
import * as grok from 'datagrok-api/grok';
|
|
2
3
|
import * as DG from 'datagrok-api/dg';
|
|
3
4
|
|
|
@@ -68,7 +69,7 @@ PEPTIDE1{N.T.[dE].[Thr_PO3H2]}$$$$
|
|
|
68
69
|
PEPTIDE1{[Cys_SEt].T.[dK].[Thr_PO3H2]}$$$$
|
|
69
70
|
PEPTIDE1{[Cys_SEt].T.*.*}$$$$`,
|
|
70
71
|
units: NOTATION.HELM,
|
|
71
|
-
alphabet:
|
|
72
|
+
alphabet: ALPHABET.UN,
|
|
72
73
|
|
|
73
74
|
positionNames: {tag: null, start: '4', end: '7'}
|
|
74
75
|
}
|
|
@@ -4,9 +4,8 @@ import * as OCL from 'openchemlib/full';
|
|
|
4
4
|
|
|
5
5
|
import {errInfo} from '@datagrok-libraries/bio/src/utils/err-info';
|
|
6
6
|
import {RDModule, RDMol} from '@datagrok-libraries/chem-meta/src/rdkit-api';
|
|
7
|
-
import {
|
|
8
|
-
import {
|
|
9
|
-
import {getHelmHelper, IHelmHelper} from '@datagrok-libraries/bio/src/helm/helm-helper';
|
|
7
|
+
import {IMonomerLibBase} from '@datagrok-libraries/bio/src/types/index';
|
|
8
|
+
import {IHelmHelper} from '@datagrok-libraries/bio/src/helm/helm-helper';
|
|
10
9
|
import {MolfileWithMap, MonomerMap} from '@datagrok-libraries/bio/src/monomer-works/types';
|
|
11
10
|
|
|
12
11
|
import {Polymer} from './polymer';
|
|
@@ -14,9 +13,10 @@ import {GlobalMonomerPositionHandler} from './position-handler';
|
|
|
14
13
|
|
|
15
14
|
import {_package} from '../../../package';
|
|
16
15
|
import {getUnusedColName} from '@datagrok-libraries/bio/src/monomer-works/utils';
|
|
16
|
+
import {IHelmToMolfileConverter} from '@datagrok-libraries/bio/src/utils/seq-helper';
|
|
17
17
|
|
|
18
18
|
|
|
19
|
-
export class HelmToMolfileConverter {
|
|
19
|
+
export class HelmToMolfileConverter implements IHelmToMolfileConverter {
|
|
20
20
|
constructor(
|
|
21
21
|
private readonly helmHelper: IHelmHelper,
|
|
22
22
|
private readonly rdKitModule: RDModule,
|
|
@@ -41,6 +41,20 @@ export class HelmToMolfileConverter {
|
|
|
41
41
|
return smiles;
|
|
42
42
|
}
|
|
43
43
|
|
|
44
|
+
public molV3KtoMolV3KOCL(molV3k: string): string {
|
|
45
|
+
try {
|
|
46
|
+
if (!molV3k)
|
|
47
|
+
return '';
|
|
48
|
+
const oclMolecule = OCL.Molecule.fromMolfile(molV3k);
|
|
49
|
+
const molV3000 = oclMolecule.toMolfileV3();
|
|
50
|
+
return molV3000.replace('STERAC1', 'STEABS');
|
|
51
|
+
} catch (err) {
|
|
52
|
+
const [errMsg, errStack] = errInfo(err);
|
|
53
|
+
_package.logger.error(errMsg, undefined, errStack);
|
|
54
|
+
return '';
|
|
55
|
+
}
|
|
56
|
+
}
|
|
57
|
+
|
|
44
58
|
public getMolV3000ViaOCL(beautifiedMols: (RDMol | null)[], columnName: string): DG.Column<string> {
|
|
45
59
|
const beautifiedMolV2000 = beautifiedMols.map((mol) => {
|
|
46
60
|
if (mol === null)
|
|
@@ -52,9 +66,7 @@ export class HelmToMolfileConverter {
|
|
|
52
66
|
const molv3000Arr = new Array<string>(beautifiedMolV2000.length);
|
|
53
67
|
const chiralityPb = DG.TaskBarProgressIndicator.create(`Handling chirality...`);
|
|
54
68
|
for (let i = 0; i < beautifiedMolV2000.length; i++) {
|
|
55
|
-
|
|
56
|
-
const molV3000 = oclMolecule.toMolfileV3();
|
|
57
|
-
molv3000Arr[i] = molV3000.replace('STERAC1', 'STEABS');
|
|
69
|
+
molv3000Arr[i] = this.molV3KtoMolV3KOCL(beautifiedMolV2000[i]);
|
|
58
70
|
const progress = i / beautifiedMolV2000.length * 100;
|
|
59
71
|
chiralityPb.update(progress, `${progress?.toFixed(2)}% of molecules completed`);
|
|
60
72
|
}
|
|
@@ -136,9 +148,9 @@ export class HelmToMolfileConverter {
|
|
|
136
148
|
const woGapsRes = this.helmHelper.removeGaps(helm);
|
|
137
149
|
const woGapsHelm = woGapsRes.resHelm;
|
|
138
150
|
const woGapsReverseMap = new Map<number, number>();
|
|
139
|
-
for (const [orgPosIdx, woGapsPosIdx] of (woGapsRes.monomerMap?.entries() ?? []))
|
|
151
|
+
for (const [orgPosIdx, woGapsPosIdx] of (woGapsRes.monomerMap?.entries() ?? []))
|
|
140
152
|
woGapsReverseMap.set(woGapsPosIdx, orgPosIdx);
|
|
141
|
-
|
|
153
|
+
|
|
142
154
|
const pseudoMolfile = this.helmHelper.getMolfiles([woGapsHelm])[0];
|
|
143
155
|
const globalPositionHandler = new GlobalMonomerPositionHandler(pseudoMolfile);
|
|
144
156
|
const woGapsPolymer = new Polymer(woGapsHelm, this.rdKitModule, this.monomerLib);
|
|
@@ -1,3 +1,4 @@
|
|
|
1
|
+
/* eslint-disable max-len */
|
|
1
2
|
import {HELM_MONOMER_TYPE, HELM_POLYMER_TYPE} from '@datagrok-libraries/bio/src/utils/const';
|
|
2
3
|
import {cleanupHelmSymbol} from '@datagrok-libraries/bio/src/helm/utils';
|
|
3
4
|
|
|
@@ -5,8 +6,10 @@ import {Bond} from './types';
|
|
|
5
6
|
|
|
6
7
|
/** Wrapper over simple polymer substring of HELM, like RNA1{d(A)p} */
|
|
7
8
|
export class SimplePolymer {
|
|
9
|
+
private isNucleotideSequence = false;
|
|
8
10
|
constructor(private simplePolymer: string) {
|
|
9
11
|
this.polymerType = this.getPolymerType();
|
|
12
|
+
this.isNucleotideSequence = this.polymerType === HELM_POLYMER_TYPE.RNA;
|
|
10
13
|
this.idx = this.getIdx();
|
|
11
14
|
const {monomers, monomerTypes} = this.getMonomerSymbolsAndTypes();
|
|
12
15
|
this.monomers = monomers;
|
|
@@ -49,17 +52,23 @@ export class SimplePolymer {
|
|
|
49
52
|
const monomerList: string[] = [];
|
|
50
53
|
const monomerTypeList: HELM_MONOMER_TYPE[] = [];
|
|
51
54
|
monomerGroups.forEach((monomerGroup) => {
|
|
52
|
-
|
|
53
|
-
|
|
54
|
-
|
|
55
|
+
if (!this.isNucleotideSequence) {
|
|
56
|
+
// const splitted = monomerGroup.split(/\(|\)/).map((el) => el.replace(/[\[\]]/g, ''));
|
|
57
|
+
// monomerList.push(...splitted);
|
|
58
|
+
// WARNING: only the groups of the form r(A)p, as in RNA, are supported
|
|
55
59
|
|
|
56
|
-
|
|
57
|
-
|
|
58
|
-
|
|
59
|
-
|
|
60
|
+
monomerList.push(cleanupHelmSymbol(monomerGroup));
|
|
61
|
+
// const monomerTypes = splitted.map(
|
|
62
|
+
// (_, idx) => (idx % 2 === 0) ? HELM_MONOMER_TYPE.BACKBONE : HELM_MONOMER_TYPE.BRANCH
|
|
63
|
+
// );
|
|
60
64
|
|
|
61
|
-
|
|
62
|
-
|
|
65
|
+
// monomerTypeList.push(...monomerTypes);
|
|
66
|
+
monomerTypeList.push(HELM_MONOMER_TYPE.BACKBONE);
|
|
67
|
+
} else {
|
|
68
|
+
const splitted = monomerGroup.split(/\(|\)/).filter((el) => !!el).map((el) => cleanupHelmSymbol(el));
|
|
69
|
+
monomerList.push(...splitted);
|
|
70
|
+
splitted.forEach((_, i) => monomerTypeList.push(i % 3 === 1 ? HELM_MONOMER_TYPE.BRANCH : HELM_MONOMER_TYPE.BACKBONE));
|
|
71
|
+
}
|
|
63
72
|
});
|
|
64
73
|
return {monomers: monomerList, monomerTypes: monomerTypeList};
|
|
65
74
|
}
|
|
@@ -177,13 +177,13 @@ export class MonomerLibBase implements IMonomerLibBase {
|
|
|
177
177
|
// Symbol & Name
|
|
178
178
|
const symbol = monomer[REQ.SYMBOL];
|
|
179
179
|
const _name = monomer[REQ.NAME];
|
|
180
|
-
|
|
181
|
-
|
|
180
|
+
const [color, backgroundColor, lineColor] =
|
|
181
|
+
wem ? [wem.textcolor, wem.backgroundcolor, wem.linecolor] : ['#202020', '#A0A0A0', '#202020'];
|
|
182
182
|
res.append(ui.divH([
|
|
183
183
|
ui.div([symbol], {
|
|
184
184
|
style: {
|
|
185
185
|
/* fontWeight: 'bolder', */ textWrap: 'nowrap', marginLeft: '4px', marginRight: '4px',
|
|
186
|
-
color:
|
|
186
|
+
color: color, backgroundColor: backgroundColor, borderColor: lineColor,
|
|
187
187
|
borderWidth: '1px', borderStyle: 'solid', borderRadius: '2px', padding: '3px',
|
|
188
188
|
minWidth: '24px', textAlign: 'center',
|
|
189
189
|
}
|
|
@@ -192,7 +192,7 @@ export class MonomerLibBase implements IMonomerLibBase {
|
|
|
192
192
|
], {style: {display: 'flex', flexDirection: 'row', justifyContent: 'left'}}));
|
|
193
193
|
|
|
194
194
|
// Structure
|
|
195
|
-
const chemOptions = {autoCrop: true, autoCropMargin: 0, suppressChiralText: true};
|
|
195
|
+
//const chemOptions = {autoCrop: true, autoCropMargin: 0, suppressChiralText: true};
|
|
196
196
|
let structureEl: HTMLElement;
|
|
197
197
|
if (monomer.molfile)
|
|
198
198
|
structureEl = drawMoleculeCall(monomer.molfile);
|
|
@@ -1028,6 +1028,7 @@ function getCorrectedMolBlock(molBlock: string) {
|
|
|
1028
1028
|
// 2. RGP field is present in the correct format
|
|
1029
1029
|
// 3. R group labels are written as R# and not just R
|
|
1030
1030
|
// 4. there is no ISO field in the molblock. if there is, it needs to be substituted with RGP field and thats it.
|
|
1031
|
+
// 5. make sure that R groups have no metadata in the atomblocks
|
|
1031
1032
|
|
|
1032
1033
|
const lines = molBlock.split('\n');
|
|
1033
1034
|
|
|
@@ -1055,7 +1056,7 @@ function getCorrectedMolBlock(molBlock: string) {
|
|
|
1055
1056
|
rgroupLineNumbers[atomI - molStartIdx] = rgroupNum;
|
|
1056
1057
|
}
|
|
1057
1058
|
|
|
1058
|
-
const rgroupLineNums = Object.
|
|
1059
|
+
const rgroupLineNums = Object.keys(rgroupLineNumbers);
|
|
1059
1060
|
// find and possibly add rgp field
|
|
1060
1061
|
|
|
1061
1062
|
const rgpLineIdx = lines.findIndex((line) => line.startsWith('M') && line.includes('RGP'));
|
|
@@ -1066,6 +1067,23 @@ function getCorrectedMolBlock(molBlock: string) {
|
|
|
1066
1067
|
const mEndIdx = lines.findIndex((line) => line.startsWith('M') && line.includes('END'));
|
|
1067
1068
|
lines.splice(mEndIdx, 0, rgpLine);
|
|
1068
1069
|
}
|
|
1070
|
+
|
|
1071
|
+
//make sure that R# lines do not have any metadata that can be interpreted as isotopes or anything else
|
|
1072
|
+
//for example, following line could be interpreted as isotope with mass 2 in some cases
|
|
1073
|
+
//" 3.9970 0.3462 0.0000 R# 0 0 0 0 0 1 0 0 0 0 2 0"
|
|
1074
|
+
const rGroupActualLines = rgroupLineNums.filter((rLine) => !!Number.parseInt(rLine)).map((atomLine) => Number.parseInt(atomLine) + molStartIdx);
|
|
1075
|
+
rGroupActualLines.forEach((lineIdx) => {
|
|
1076
|
+
const splitLine = lines[lineIdx].split(' ');
|
|
1077
|
+
const rIdx = splitLine.findIndex((s) => s === 'R#');
|
|
1078
|
+
if (rIdx === -1)
|
|
1079
|
+
return;
|
|
1080
|
+
for (let i = rIdx + 1; i < splitLine.length; i++) {
|
|
1081
|
+
if (!!splitLine[i] && splitLine[i].length == 1 && (Number.parseInt(splitLine[i]) ?? 0) > 0)
|
|
1082
|
+
splitLine[i] = '0';
|
|
1083
|
+
}
|
|
1084
|
+
lines[lineIdx] = splitLine.join(' ');
|
|
1085
|
+
});
|
|
1086
|
+
|
|
1069
1087
|
return lines.join('\n');
|
|
1070
1088
|
}
|
|
1071
1089
|
|
|
@@ -1,3 +1,5 @@
|
|
|
1
|
+
/* eslint-disable max-lines */
|
|
2
|
+
/* eslint-disable @typescript-eslint/no-unused-vars */
|
|
1
3
|
import * as DG from 'datagrok-api/dg';
|
|
2
4
|
|
|
3
5
|
import wu from 'wu';
|
|
@@ -5,7 +7,7 @@ import wu from 'wu';
|
|
|
5
7
|
/* eslint-disable max-len */
|
|
6
8
|
import {ALIGNMENT, ALPHABET, candidateAlphabets, getSplitterWithSeparator, NOTATION, positionSeparator, splitterAsFasta, splitterAsHelm, TAGS} from '@datagrok-libraries/bio/src/utils/macromolecule/index';
|
|
7
9
|
import {INotationProvider, ISeqSplitted, SeqColStats, SplitterFunc,} from '@datagrok-libraries/bio/src/utils/macromolecule/types';
|
|
8
|
-
import {detectAlphabet, splitterAsFastaSimple, StringListSeqSplitted} from '@datagrok-libraries/bio/src/utils/macromolecule/utils';
|
|
10
|
+
import {detectAlphabet, detectHelmAlphabet, splitterAsFastaSimple, StringListSeqSplitted} from '@datagrok-libraries/bio/src/utils/macromolecule/utils';
|
|
9
11
|
import {mmDistanceFunctions, MmDistanceFunctionsNames} from '@datagrok-libraries/ml/src/macromolecule-distance-functions';
|
|
10
12
|
import {mmDistanceFunctionType} from '@datagrok-libraries/ml/src/macromolecule-distance-functions/types';
|
|
11
13
|
import {getMonomerLibHelper, IMonomerLibHelper} from '@datagrok-libraries/bio/src/monomer-works/monomer-utils';
|
|
@@ -123,6 +125,12 @@ export class SeqHandler implements ISeqHandler {
|
|
|
123
125
|
uh.column.setTag(TAGS.alphabetSize, alphabetSize.toString());
|
|
124
126
|
uh.column.setTag(TAGS.alphabetIsMultichar, alphabetIsMultichar ? 'true' : 'false');
|
|
125
127
|
}
|
|
128
|
+
} else if (units === NOTATION.HELM) {
|
|
129
|
+
let alphabet = uh.column.getTag(TAGS.alphabet);
|
|
130
|
+
if (alphabet === null) {
|
|
131
|
+
alphabet = detectHelmAlphabet(uh.stats.freq, candidateAlphabets, uh.defaultGapOriginal);
|
|
132
|
+
uh.column.setTag(TAGS.alphabet, alphabet);
|
|
133
|
+
}
|
|
126
134
|
}
|
|
127
135
|
}
|
|
128
136
|
|
|
@@ -539,6 +547,7 @@ export class SeqHandler implements ISeqHandler {
|
|
|
539
547
|
// convert the peptides list to a set for faster lookup
|
|
540
548
|
const peptidesSet = new Set(peptides);
|
|
541
549
|
// get splitter for given separator and check if all monomers are in the lib
|
|
550
|
+
// eslint-disable-next-line @typescript-eslint/no-unused-vars
|
|
542
551
|
const splitterFunc = getSplitterWithSeparator(this.separator!);
|
|
543
552
|
// iterate over the columns, split them and check if all monomers are in the lib
|
|
544
553
|
//TODO maybe add missing threshold so that if there are not too many missing monomers
|
|
@@ -596,12 +605,12 @@ export class SeqHandler implements ISeqHandler {
|
|
|
596
605
|
|
|
597
606
|
const isNucleotide = srcSeq.startsWith('RNA');
|
|
598
607
|
// items can be monomers or helms
|
|
599
|
-
const helmItemsArray =
|
|
608
|
+
const helmItemsArray = splitterAsHelm(srcSeq);
|
|
600
609
|
const tgtMonomersArray: string[] = [];
|
|
601
610
|
for (let posIdx = 0; posIdx < helmItemsArray.length; ++posIdx) {
|
|
602
611
|
let om: string = helmItemsArray.getOriginal(posIdx);
|
|
603
612
|
if (isNucleotide)
|
|
604
|
-
om = om.replace(HELM_WRAPPERS_REGEXP, '');
|
|
613
|
+
om = om.replace(HELM_WRAPPERS_REGEXP, '$1');
|
|
605
614
|
if (om === GapOriginals[NOTATION.HELM])
|
|
606
615
|
tgtMonomersArray.push(tgtGapOriginal);
|
|
607
616
|
else if (this.toFasta(tgtNotation as NOTATION) && om.length > 1) {
|
|
@@ -9,9 +9,8 @@ import {RDModule, RDMol} from '@datagrok-libraries/chem-meta/src/rdkit-api';
|
|
|
9
9
|
import {IMonomerLibHelper} from '@datagrok-libraries/bio/src/monomer-works/monomer-utils';
|
|
10
10
|
import {getHelmHelper, IHelmHelper} from '@datagrok-libraries/bio/src/helm/helm-helper';
|
|
11
11
|
import {MolfileWithMap} from '@datagrok-libraries/bio/src/monomer-works/types';
|
|
12
|
-
import {getMolColName
|
|
12
|
+
import {getMolColName} from '@datagrok-libraries/bio/src/monomer-works/utils';
|
|
13
13
|
import {ChemTags} from '@datagrok-libraries/chem-meta/src/consts';
|
|
14
|
-
import {getMolHighlight} from '@datagrok-libraries/bio/src/monomer-works/seq-to-molfile';
|
|
15
14
|
import {IMonomerLibBase} from '@datagrok-libraries/bio/src/types/index';
|
|
16
15
|
|
|
17
16
|
import {HelmToMolfileConverter} from '../helm-to-molfile/converter';
|
|
@@ -19,6 +18,8 @@ import {ISeqHandler} from '@datagrok-libraries/bio/src/utils/macromolecule/seq-h
|
|
|
19
18
|
import {SeqHandler} from './seq-handler';
|
|
20
19
|
import {Column} from 'datagrok-api/dg';
|
|
21
20
|
import {NOTATION, TAGS} from '@datagrok-libraries/bio/src/utils/macromolecule';
|
|
21
|
+
import {errInfo} from '@datagrok-libraries/bio/src/utils/err-info';
|
|
22
|
+
import {_package} from '../../package';
|
|
22
23
|
|
|
23
24
|
type SeqHelperWindowType = Window & { $seqHelperPromise?: Promise<SeqHelper> };
|
|
24
25
|
declare const window: SeqHelperWindowType;
|
|
@@ -44,6 +45,37 @@ export class SeqHelper implements ISeqHelper {
|
|
|
44
45
|
return new HelmToMolfileConverter(helmHelper, this.rdKitModule, monomerLib);
|
|
45
46
|
}
|
|
46
47
|
|
|
48
|
+
helmToAtomicLevelSingle(
|
|
49
|
+
helm: string, converter: HelmToMolfileConverter, chiralityEngine?: boolean, beautifyMol: boolean = true) {
|
|
50
|
+
if (!helm)
|
|
51
|
+
return MolfileWithMap.createEmpty();
|
|
52
|
+
const molfileV3k = converter.convertToMolfileV3K([helm])[0];
|
|
53
|
+
if (!molfileV3k || !molfileV3k.molfile)
|
|
54
|
+
return MolfileWithMap.createEmpty();
|
|
55
|
+
let mol: RDMol | null = null;
|
|
56
|
+
try {
|
|
57
|
+
let v3k = molfileV3k.molfile;
|
|
58
|
+
if (beautifyMol) {
|
|
59
|
+
mol = this.rdKitModule.get_mol(v3k);
|
|
60
|
+
if (!mol)
|
|
61
|
+
return MolfileWithMap.createEmpty();
|
|
62
|
+
mol.set_new_coords();
|
|
63
|
+
mol.normalize_depiction(1);
|
|
64
|
+
mol.straighten_depiction(true);
|
|
65
|
+
v3k = mol.get_v3Kmolblock();
|
|
66
|
+
}
|
|
67
|
+
if (chiralityEngine)
|
|
68
|
+
v3k = converter.molV3KtoMolV3KOCL(v3k);
|
|
69
|
+
return new MolfileWithMap(v3k, molfileV3k.monomers);
|
|
70
|
+
} catch (err) {
|
|
71
|
+
const [errMsg, errStack] = errInfo(err);
|
|
72
|
+
_package.logger.error(errMsg, undefined, errStack);
|
|
73
|
+
return MolfileWithMap.createEmpty();
|
|
74
|
+
} finally {
|
|
75
|
+
mol?.delete();
|
|
76
|
+
}
|
|
77
|
+
}
|
|
78
|
+
|
|
47
79
|
async helmToAtomicLevel(
|
|
48
80
|
helmCol: DG.Column<string>, chiralityEngine?: boolean, highlight?: boolean, overrideMonomerLib?: IMonomerLibBase
|
|
49
81
|
): Promise<ToAtomicLevelRes> {
|
|
@@ -54,40 +86,44 @@ export class SeqHelper implements ISeqHelper {
|
|
|
54
86
|
|
|
55
87
|
const converter = await this.getHelmToMolfileConverter(monomerLib);
|
|
56
88
|
|
|
57
|
-
//#region From HelmToMolfileConverter.convertToRdKitBeautifiedMolfileColumn
|
|
58
|
-
|
|
59
|
-
const molfilesV3K = converter.convertToMolfileV3K(helmCol.toList());
|
|
60
|
-
|
|
61
|
-
const beautifiedMolList: (RDMol | null)[] = molfilesV3K.map((item) => {
|
|
62
|
-
|
|
63
|
-
|
|
64
|
-
|
|
65
|
-
|
|
66
|
-
|
|
67
|
-
|
|
68
|
-
|
|
69
|
-
|
|
70
|
-
|
|
71
|
-
|
|
72
|
-
});
|
|
73
|
-
|
|
74
|
-
let molList: string[];
|
|
75
|
-
if (chiralityEngine)
|
|
76
|
-
|
|
77
|
-
|
|
78
|
-
|
|
79
|
-
|
|
80
|
-
|
|
81
|
-
|
|
82
|
-
|
|
83
|
-
|
|
84
|
-
|
|
85
|
-
|
|
86
|
-
}
|
|
89
|
+
// //#region From HelmToMolfileConverter.convertToRdKitBeautifiedMolfileColumn
|
|
90
|
+
|
|
91
|
+
// const molfilesV3K = converter.convertToMolfileV3K(helmCol.toList());
|
|
92
|
+
|
|
93
|
+
// const beautifiedMolList: (RDMol | null)[] = molfilesV3K.map((item) => {
|
|
94
|
+
// const molfile = item.molfile;
|
|
95
|
+
// if (molfile === '')
|
|
96
|
+
// return null;
|
|
97
|
+
// const mol = this.rdKitModule.get_mol(molfile);
|
|
98
|
+
// if (!mol)
|
|
99
|
+
// return null;
|
|
100
|
+
// mol.set_new_coords();
|
|
101
|
+
// mol.normalize_depiction(1);
|
|
102
|
+
// mol.straighten_depiction(true);
|
|
103
|
+
// return mol;
|
|
104
|
+
// });
|
|
105
|
+
|
|
106
|
+
// let molList: string[];
|
|
107
|
+
// if (chiralityEngine)// also creates progress indicator
|
|
108
|
+
// molList = converter.getMolV3000ViaOCL(beautifiedMolList, molColName).toList();
|
|
109
|
+
// // TODO: Cleanup mol objects
|
|
110
|
+
// else {
|
|
111
|
+
// molList = beautifiedMolList.map((mol) => {
|
|
112
|
+
// if (mol === null)
|
|
113
|
+
// return '';
|
|
114
|
+
// const molBlock = mol.get_v3Kmolblock();
|
|
115
|
+
// mol!.delete();
|
|
116
|
+
// return molBlock;
|
|
117
|
+
// });
|
|
118
|
+
// }
|
|
87
119
|
|
|
88
120
|
//#endregion From HelmToMolfileConverter
|
|
121
|
+
const helmList = helmCol.toList();
|
|
122
|
+
const molList = new Array<string>(helmCol.length);
|
|
123
|
+
for (let i = 0; i < helmCol.length; i++)
|
|
124
|
+
molList[i] = (await this.helmToAtomicLevelSingle(helmList[i], converter, chiralityEngine)).molfile;
|
|
89
125
|
|
|
90
|
-
const molHlList = molfilesV3K.map((item: MolfileWithMap) => getMolHighlight(item.monomers.values(), monomerLib));
|
|
126
|
+
//const molHlList = molfilesV3K.map((item: MolfileWithMap) => getMolHighlight(item.monomers.values(), monomerLib));
|
|
91
127
|
|
|
92
128
|
const molCol = DG.Column.fromStrings(molColName, molList);
|
|
93
129
|
molCol.semType = DG.SEMTYPE.MOLECULE;
|
|
@@ -53,7 +53,7 @@ export async function sequenceToMolfile(
|
|
|
53
53
|
res.molCol.name = molColName;
|
|
54
54
|
df.columns.add(res.molCol, true);
|
|
55
55
|
|
|
56
|
-
buildMonomerHoverLink(macroMolecule, res.molCol, monomerLib, seqHelper, rdKitModule);
|
|
56
|
+
await buildMonomerHoverLink(macroMolecule, res.molCol, monomerLib, seqHelper, rdKitModule, nonlinear);
|
|
57
57
|
res.molCol.setTag(ChemTags.SEQUENCE_SRC_HL_MONOMERS, String(highlight));
|
|
58
58
|
await grok.data.detectSemanticTypes(df);
|
|
59
59
|
}
|
|
@@ -26,7 +26,9 @@ export function getCompositionAnalysisWidget(
|
|
|
26
26
|
const rowIdx = val.cell.rowIndex;
|
|
27
27
|
const seqSS = sh.getSplitted(rowIdx);
|
|
28
28
|
wu.count(0).take(seqSS.length).filter((posIdx) => !seqSS.isGap(posIdx)).forEach((posIdx) => {
|
|
29
|
-
|
|
29
|
+
let cm = seqSS.getCanonical(posIdx);
|
|
30
|
+
if (biotype === HelmTypes.NUCLEOTIDE && sh.isHelm() && cm[1] === '(' && cm[cm.length - 2] === ')')
|
|
31
|
+
cm = cm.substring(2, cm.length - 2);
|
|
30
32
|
const count = counts[cm] || 0;
|
|
31
33
|
counts[cm] = count + 1;
|
|
32
34
|
});
|