@datagrok/bio 2.18.4 → 2.19.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +5 -0
- package/dist/package-test.js +2 -2
- package/dist/package-test.js.map +1 -1
- package/dist/package.js +2 -2
- package/dist/package.js.map +1 -1
- package/files/monomer-libraries/HELMCoreLibrary.json +18926 -18215
- package/files/tests/to-atomic-level-dna-fasta-output.csv +15077 -15077
- package/files/tests/to-atomic-level-msa-fasta-output.csv +1903 -1903
- package/files/tests/to-atomic-level-msa-separator-output.csv +3236 -3236
- package/files/tests/to-atomic-level-peptides-fasta-output.csv +32262 -32262
- package/files/tests/to-atomic-level-pt-fasta-2.mol +29 -29
- package/package.json +3 -3
- package/src/package.ts +20 -28
- package/src/tests/scoring.ts +2 -2
- package/src/utils/helm-to-molfile/converter/converter.ts +21 -9
- package/src/utils/helm-to-molfile/converter/simple-polymer.ts +18 -9
- package/src/utils/monomer-lib/monomer-lib-base.ts +4 -4
- package/src/utils/seq-helper/seq-handler.ts +2 -2
- package/src/utils/seq-helper/seq-helper.ts +68 -32
- package/src/utils/sequence-to-mol.ts +1 -1
- package/test-console-output-1.log +895 -886
- package/test-record-1.mp4 +0 -0
|
@@ -5,41 +5,41 @@ Datagrok macromolecule handler
|
|
|
5
5
|
M V30 BEGIN CTAB
|
|
6
6
|
M V30 COUNTS 17 16 0 0 0
|
|
7
7
|
M V30 BEGIN ATOM
|
|
8
|
-
M V30 1 C
|
|
9
|
-
M V30 2 C
|
|
8
|
+
M V30 1 C 1.2991 -2.25 0.000000 0
|
|
9
|
+
M V30 2 C 1.2991 -0.75 0.000000 0
|
|
10
10
|
M V30 3 N 0 0 0.000000 0
|
|
11
|
-
M V30 4 C
|
|
12
|
-
M V30 5 O
|
|
13
|
-
M V30 6 N
|
|
14
|
-
M V30 7 C
|
|
15
|
-
M V30 8 N
|
|
16
|
-
M V30 9 N
|
|
17
|
-
M V30 10 C
|
|
18
|
-
M V30 11 C
|
|
19
|
-
M V30 12 C
|
|
20
|
-
M V30 13 C
|
|
21
|
-
M V30 14 N
|
|
22
|
-
M V30 15 C
|
|
23
|
-
M V30 16 O
|
|
24
|
-
M V30 17 O
|
|
11
|
+
M V30 4 C 2.5981 0 0.000000 0
|
|
12
|
+
M V30 5 O 3.8971 -0.75 0.000000 0
|
|
13
|
+
M V30 6 N 7.7941 9 0.000000 0
|
|
14
|
+
M V30 7 C 6.4951 8.2501 0.000000 0
|
|
15
|
+
M V30 8 N 5.196 9 0.000000 0
|
|
16
|
+
M V30 9 N 6.4951 6.7501 0.000000 0
|
|
17
|
+
M V30 10 C 5.1961 6 0.000000 0
|
|
18
|
+
M V30 11 C 5.1961 4.5 0.000000 0
|
|
19
|
+
M V30 12 C 3.8971 3.75 0.000000 0
|
|
20
|
+
M V30 13 C 3.8971 2.25 0.000000 0
|
|
21
|
+
M V30 14 N 2.5981 1.5 0.000000 0
|
|
22
|
+
M V30 15 C 5.1962 1.5 0.000000 0
|
|
23
|
+
M V30 16 O 6.4952 2.25 0.000000 0
|
|
24
|
+
M V30 17 O 5.1962 -0.0001 0.000000 0
|
|
25
25
|
M V30 END ATOM
|
|
26
26
|
M V30 BEGIN BOND
|
|
27
|
-
M V30 1 1 2 1 CFG=3
|
|
28
|
-
M V30 2 1 2 3
|
|
27
|
+
M V30 1 1 2 1 CFG=3
|
|
28
|
+
M V30 2 1 2 3
|
|
29
29
|
M V30 3 1 2 4
|
|
30
|
-
M V30 4 2 4 5
|
|
30
|
+
M V30 4 2 4 5
|
|
31
31
|
M V30 5 1 4 14
|
|
32
|
-
M V30 6
|
|
33
|
-
M V30 7
|
|
34
|
-
M V30 8 1 7 9
|
|
35
|
-
M V30 9 1 9 10
|
|
36
|
-
M V30 10 1 10 11
|
|
37
|
-
M V30 11 1 11 12
|
|
38
|
-
M V30 12 1 13 12 CFG=1
|
|
39
|
-
M V30 13 1 13 14
|
|
32
|
+
M V30 6 2 6 7
|
|
33
|
+
M V30 7 1 7 8
|
|
34
|
+
M V30 8 1 7 9
|
|
35
|
+
M V30 9 1 9 10
|
|
36
|
+
M V30 10 1 10 11
|
|
37
|
+
M V30 11 1 11 12
|
|
38
|
+
M V30 12 1 13 12 CFG=1
|
|
39
|
+
M V30 13 1 13 14
|
|
40
40
|
M V30 14 1 13 15
|
|
41
|
-
M V30 15 2 15 16
|
|
41
|
+
M V30 15 2 15 16
|
|
42
42
|
M V30 16 1 15 17
|
|
43
43
|
M V30 END BOND
|
|
44
44
|
M V30 END CTAB
|
|
45
|
-
M END
|
|
45
|
+
M END
|
package/package.json
CHANGED
|
@@ -5,7 +5,7 @@
|
|
|
5
5
|
"name": "Leonid Stolbov",
|
|
6
6
|
"email": "lstolbov@datagrok.ai"
|
|
7
7
|
},
|
|
8
|
-
"version": "2.
|
|
8
|
+
"version": "2.19.0",
|
|
9
9
|
"description": "Bioinformatics support (import/export of sequences, conversion, visualization, analysis). [See more](https://github.com/datagrok-ai/public/blob/master/packages/Bio/README.md) for details.",
|
|
10
10
|
"repository": {
|
|
11
11
|
"type": "git",
|
|
@@ -44,13 +44,13 @@
|
|
|
44
44
|
],
|
|
45
45
|
"dependencies": {
|
|
46
46
|
"@biowasm/aioli": "^3.1.0",
|
|
47
|
-
"@datagrok-libraries/bio": "^5.
|
|
47
|
+
"@datagrok-libraries/bio": "^5.50.1",
|
|
48
48
|
"@datagrok-libraries/chem-meta": "^1.2.7",
|
|
49
49
|
"@datagrok-libraries/math": "^1.2.4",
|
|
50
50
|
"@datagrok-libraries/ml": "^6.7.6",
|
|
51
51
|
"@datagrok-libraries/tutorials": "^1.4.3",
|
|
52
52
|
"@datagrok-libraries/utils": "^4.4.0",
|
|
53
|
-
"datagrok-api": "^1.
|
|
53
|
+
"datagrok-api": "^1.24.0",
|
|
54
54
|
"@webgpu/types": "^0.1.40",
|
|
55
55
|
"ajv": "^8.12.0",
|
|
56
56
|
"ajv-errors": "^3.0.0",
|
package/src/package.ts
CHANGED
|
@@ -422,7 +422,7 @@ export function getRegion(
|
|
|
422
422
|
start ?? null, end ?? null, name ?? null);
|
|
423
423
|
}
|
|
424
424
|
|
|
425
|
-
//top-menu: Bio |
|
|
425
|
+
//top-menu: Bio | Calculate | Get Region...
|
|
426
426
|
//name: Get Region Top Menu
|
|
427
427
|
//description: Get sequences for a region specified from a Macromolecule
|
|
428
428
|
//input: dataframe table [Input data table]
|
|
@@ -607,14 +607,13 @@ export async function sequenceSpaceTopMenu(table: DG.DataFrame, molecules: DG.Co
|
|
|
607
607
|
return res;
|
|
608
608
|
}
|
|
609
609
|
|
|
610
|
-
//top-menu: Bio |
|
|
610
|
+
//top-menu: Bio | Transform | To Atomic Level...
|
|
611
611
|
//name: To Atomic Level
|
|
612
612
|
//description: Converts sequences to molblocks
|
|
613
613
|
//input: dataframe table [Input data table]
|
|
614
614
|
//input: column seqCol {caption: Sequence; semType: Macromolecule}
|
|
615
615
|
//input: bool nonlinear =false {caption: Non-linear; description: Slower mode for cycling/branching HELM structures}
|
|
616
616
|
//input: bool highlight =false {caption: Highlight monomers; description: Highlight monomers' substructures of the molecule }
|
|
617
|
-
//output:
|
|
618
617
|
export async function toAtomicLevel(
|
|
619
618
|
table: DG.DataFrame, seqCol: DG.Column, nonlinear: boolean, highlight: boolean = false
|
|
620
619
|
): Promise<void> {
|
|
@@ -630,6 +629,17 @@ export async function toAtomicLevel(
|
|
|
630
629
|
}
|
|
631
630
|
}
|
|
632
631
|
|
|
632
|
+
//name: To Atomic Level...
|
|
633
|
+
//input: column seqCol {semType: Macromolecule}
|
|
634
|
+
//meta.action: to atomic level
|
|
635
|
+
export async function toAtomicLevelAction(seqCol: DG.Column) {
|
|
636
|
+
if (!seqCol?.dataFrame)
|
|
637
|
+
throw new Error('Sequence column is not found or its data frame is not empty');
|
|
638
|
+
const func = DG.Func.find({name: 'toAtomicLevel', package: 'Bio'})[0];
|
|
639
|
+
if (!func) throw new Error('To Atomic level Function not found');
|
|
640
|
+
func.prepare({table: seqCol.dataFrame, seqCol: seqCol}).edit();
|
|
641
|
+
}
|
|
642
|
+
|
|
633
643
|
//top-menu: Bio | Analyze | MSA...
|
|
634
644
|
//name: MSA
|
|
635
645
|
//description: Performs multiple sequence alignment
|
|
@@ -743,38 +753,20 @@ export function importBam(fileContent: string): DG.DataFrame [] {
|
|
|
743
753
|
return [];
|
|
744
754
|
}
|
|
745
755
|
|
|
746
|
-
//top-menu: Bio |
|
|
756
|
+
//top-menu: Bio | Transform | Convert Notation...
|
|
747
757
|
//name: convertDialog
|
|
748
758
|
export function convertDialog() {
|
|
749
759
|
const col: DG.Column<string> | undefined = getMacromoleculeColumns()[0];
|
|
750
760
|
convert(col, _package.seqHelper);
|
|
751
761
|
}
|
|
752
762
|
|
|
753
|
-
//
|
|
754
|
-
//
|
|
755
|
-
//
|
|
756
|
-
|
|
757
|
-
|
|
758
|
-
//input: string separator
|
|
759
|
-
//output: string result
|
|
760
|
-
export async function convertSeqNotation(sequence: string, targetNotation: NOTATION, separator?: string): Promise<string | undefined | null> {
|
|
761
|
-
try {
|
|
762
|
-
const col = DG.Column.fromStrings('sequence', [sequence]);
|
|
763
|
-
const _df = DG.DataFrame.fromColumns([col]);
|
|
764
|
-
const semType = await grok.functions.call('Bio:detectMacromolecule', {col: col});
|
|
765
|
-
if (semType)
|
|
766
|
-
col.semType = semType;
|
|
767
|
-
const converterSh = _package.seqHelper.getSeqHandler(col);
|
|
768
|
-
const newColumn = converterSh.convert(targetNotation, separator);
|
|
769
|
-
return newColumn.get(0);
|
|
770
|
-
} catch (err: any) {
|
|
771
|
-
const [errMsg, errStack] = errInfo(err);
|
|
772
|
-
_package.logger.error(errMsg, undefined, errStack);
|
|
773
|
-
throw err;
|
|
774
|
-
}
|
|
763
|
+
//name: Convert Notation...
|
|
764
|
+
//input: column col {semType: Macromolecule}
|
|
765
|
+
//meta.action: Convert Notation...
|
|
766
|
+
export function convertColumnAction(col: DG.Column) {
|
|
767
|
+
convert(col, _package.seqHelper);
|
|
775
768
|
}
|
|
776
769
|
|
|
777
|
-
|
|
778
770
|
//name: monomerCellRenderer
|
|
779
771
|
//tags: cellRenderer
|
|
780
772
|
//meta.cellType: Monomer
|
|
@@ -835,7 +827,7 @@ export async function testDetectMacromolecule(path: string): Promise<DG.DataFram
|
|
|
835
827
|
return resDf;
|
|
836
828
|
}
|
|
837
829
|
|
|
838
|
-
//top-menu: Bio |
|
|
830
|
+
//top-menu: Bio | Transform | Split to Monomers...
|
|
839
831
|
//name: Split to Monomers
|
|
840
832
|
//input: dataframe table
|
|
841
833
|
//input: column sequence { semType: Macromolecule }
|
package/src/tests/scoring.ts
CHANGED
|
@@ -21,8 +21,8 @@ category('Scoring', () => {
|
|
|
21
21
|
/* eslint-disable max-len */
|
|
22
22
|
const table = DG.DataFrame.fromCsv(`${sequence},${expectedSimilarity},${expectedIdentity}
|
|
23
23
|
PEPTIDE1{Aca.Orn.gGlu.Pqa.D-His_1Bn.dH.hHis.4Abz.D-Tic.D-Dap.Y.Iva.meS.F.P.F.D-1Nal}$$$$,1.0,1.0
|
|
24
|
-
PEPTIDE1{Iva.Gly_allyl.gGlu.Pqa.D-Dip.dH.hHis.4Abz.D-aHyp.D-Dap.Y.Iva.I.Tyr_26diMe.P.Asu.meC}$$$$,0.
|
|
25
|
-
PEPTIDE1{[1Nal].[1Nal].[1Nal].[1Nal].[1Nal].[1Nal].[1Nal].[1Nal].[1Nal].[1Nal].[1Nal].[1Nal].[1Nal].[1Nal].[1Nal].[1Nal].[1Nal]}$$$$V2.0,0.
|
|
24
|
+
PEPTIDE1{Iva.Gly_allyl.gGlu.Pqa.D-Dip.dH.hHis.4Abz.D-aHyp.D-Dap.Y.Iva.I.Tyr_26diMe.P.Asu.meC}$$$$,0.691,0.53
|
|
25
|
+
PEPTIDE1{[1Nal].[1Nal].[1Nal].[1Nal].[1Nal].[1Nal].[1Nal].[1Nal].[1Nal].[1Nal].[1Nal].[1Nal].[1Nal].[1Nal].[1Nal].[1Nal].[1Nal]}$$$$V2.0,0.37,0.0`
|
|
26
26
|
);
|
|
27
27
|
const seqCol: DG.Column<string> = table.getCol(sequence);
|
|
28
28
|
seqCol.meta.units = NOTATION.HELM;
|
|
@@ -4,9 +4,8 @@ import * as OCL from 'openchemlib/full';
|
|
|
4
4
|
|
|
5
5
|
import {errInfo} from '@datagrok-libraries/bio/src/utils/err-info';
|
|
6
6
|
import {RDModule, RDMol} from '@datagrok-libraries/chem-meta/src/rdkit-api';
|
|
7
|
-
import {
|
|
8
|
-
import {
|
|
9
|
-
import {getHelmHelper, IHelmHelper} from '@datagrok-libraries/bio/src/helm/helm-helper';
|
|
7
|
+
import {IMonomerLibBase} from '@datagrok-libraries/bio/src/types/index';
|
|
8
|
+
import {IHelmHelper} from '@datagrok-libraries/bio/src/helm/helm-helper';
|
|
10
9
|
import {MolfileWithMap, MonomerMap} from '@datagrok-libraries/bio/src/monomer-works/types';
|
|
11
10
|
|
|
12
11
|
import {Polymer} from './polymer';
|
|
@@ -14,9 +13,10 @@ import {GlobalMonomerPositionHandler} from './position-handler';
|
|
|
14
13
|
|
|
15
14
|
import {_package} from '../../../package';
|
|
16
15
|
import {getUnusedColName} from '@datagrok-libraries/bio/src/monomer-works/utils';
|
|
16
|
+
import {IHelmToMolfileConverter} from '@datagrok-libraries/bio/src/utils/seq-helper';
|
|
17
17
|
|
|
18
18
|
|
|
19
|
-
export class HelmToMolfileConverter {
|
|
19
|
+
export class HelmToMolfileConverter implements IHelmToMolfileConverter {
|
|
20
20
|
constructor(
|
|
21
21
|
private readonly helmHelper: IHelmHelper,
|
|
22
22
|
private readonly rdKitModule: RDModule,
|
|
@@ -41,6 +41,20 @@ export class HelmToMolfileConverter {
|
|
|
41
41
|
return smiles;
|
|
42
42
|
}
|
|
43
43
|
|
|
44
|
+
public molV3KtoMolV3KOCL(molV3k: string): string {
|
|
45
|
+
try {
|
|
46
|
+
if (!molV3k)
|
|
47
|
+
return '';
|
|
48
|
+
const oclMolecule = OCL.Molecule.fromMolfile(molV3k);
|
|
49
|
+
const molV3000 = oclMolecule.toMolfileV3();
|
|
50
|
+
return molV3000.replace('STERAC1', 'STEABS');
|
|
51
|
+
} catch (err) {
|
|
52
|
+
const [errMsg, errStack] = errInfo(err);
|
|
53
|
+
_package.logger.error(errMsg, undefined, errStack);
|
|
54
|
+
return '';
|
|
55
|
+
}
|
|
56
|
+
}
|
|
57
|
+
|
|
44
58
|
public getMolV3000ViaOCL(beautifiedMols: (RDMol | null)[], columnName: string): DG.Column<string> {
|
|
45
59
|
const beautifiedMolV2000 = beautifiedMols.map((mol) => {
|
|
46
60
|
if (mol === null)
|
|
@@ -52,9 +66,7 @@ export class HelmToMolfileConverter {
|
|
|
52
66
|
const molv3000Arr = new Array<string>(beautifiedMolV2000.length);
|
|
53
67
|
const chiralityPb = DG.TaskBarProgressIndicator.create(`Handling chirality...`);
|
|
54
68
|
for (let i = 0; i < beautifiedMolV2000.length; i++) {
|
|
55
|
-
|
|
56
|
-
const molV3000 = oclMolecule.toMolfileV3();
|
|
57
|
-
molv3000Arr[i] = molV3000.replace('STERAC1', 'STEABS');
|
|
69
|
+
molv3000Arr[i] = this.molV3KtoMolV3KOCL(beautifiedMolV2000[i]);
|
|
58
70
|
const progress = i / beautifiedMolV2000.length * 100;
|
|
59
71
|
chiralityPb.update(progress, `${progress?.toFixed(2)}% of molecules completed`);
|
|
60
72
|
}
|
|
@@ -136,9 +148,9 @@ export class HelmToMolfileConverter {
|
|
|
136
148
|
const woGapsRes = this.helmHelper.removeGaps(helm);
|
|
137
149
|
const woGapsHelm = woGapsRes.resHelm;
|
|
138
150
|
const woGapsReverseMap = new Map<number, number>();
|
|
139
|
-
for (const [orgPosIdx, woGapsPosIdx] of (woGapsRes.monomerMap?.entries() ?? []))
|
|
151
|
+
for (const [orgPosIdx, woGapsPosIdx] of (woGapsRes.monomerMap?.entries() ?? []))
|
|
140
152
|
woGapsReverseMap.set(woGapsPosIdx, orgPosIdx);
|
|
141
|
-
|
|
153
|
+
|
|
142
154
|
const pseudoMolfile = this.helmHelper.getMolfiles([woGapsHelm])[0];
|
|
143
155
|
const globalPositionHandler = new GlobalMonomerPositionHandler(pseudoMolfile);
|
|
144
156
|
const woGapsPolymer = new Polymer(woGapsHelm, this.rdKitModule, this.monomerLib);
|
|
@@ -1,3 +1,4 @@
|
|
|
1
|
+
/* eslint-disable max-len */
|
|
1
2
|
import {HELM_MONOMER_TYPE, HELM_POLYMER_TYPE} from '@datagrok-libraries/bio/src/utils/const';
|
|
2
3
|
import {cleanupHelmSymbol} from '@datagrok-libraries/bio/src/helm/utils';
|
|
3
4
|
|
|
@@ -5,8 +6,10 @@ import {Bond} from './types';
|
|
|
5
6
|
|
|
6
7
|
/** Wrapper over simple polymer substring of HELM, like RNA1{d(A)p} */
|
|
7
8
|
export class SimplePolymer {
|
|
9
|
+
private isNucleotideSequence = false;
|
|
8
10
|
constructor(private simplePolymer: string) {
|
|
9
11
|
this.polymerType = this.getPolymerType();
|
|
12
|
+
this.isNucleotideSequence = this.polymerType === HELM_POLYMER_TYPE.RNA;
|
|
10
13
|
this.idx = this.getIdx();
|
|
11
14
|
const {monomers, monomerTypes} = this.getMonomerSymbolsAndTypes();
|
|
12
15
|
this.monomers = monomers;
|
|
@@ -49,17 +52,23 @@ export class SimplePolymer {
|
|
|
49
52
|
const monomerList: string[] = [];
|
|
50
53
|
const monomerTypeList: HELM_MONOMER_TYPE[] = [];
|
|
51
54
|
monomerGroups.forEach((monomerGroup) => {
|
|
52
|
-
|
|
53
|
-
|
|
54
|
-
|
|
55
|
+
if (!this.isNucleotideSequence) {
|
|
56
|
+
// const splitted = monomerGroup.split(/\(|\)/).map((el) => el.replace(/[\[\]]/g, ''));
|
|
57
|
+
// monomerList.push(...splitted);
|
|
58
|
+
// WARNING: only the groups of the form r(A)p, as in RNA, are supported
|
|
55
59
|
|
|
56
|
-
|
|
57
|
-
|
|
58
|
-
|
|
59
|
-
|
|
60
|
+
monomerList.push(cleanupHelmSymbol(monomerGroup));
|
|
61
|
+
// const monomerTypes = splitted.map(
|
|
62
|
+
// (_, idx) => (idx % 2 === 0) ? HELM_MONOMER_TYPE.BACKBONE : HELM_MONOMER_TYPE.BRANCH
|
|
63
|
+
// );
|
|
60
64
|
|
|
61
|
-
|
|
62
|
-
|
|
65
|
+
// monomerTypeList.push(...monomerTypes);
|
|
66
|
+
monomerTypeList.push(HELM_MONOMER_TYPE.BACKBONE);
|
|
67
|
+
} else {
|
|
68
|
+
const splitted = monomerGroup.split(/\(|\)/).filter((el) => !!el).map((el) => cleanupHelmSymbol(el));
|
|
69
|
+
monomerList.push(...splitted);
|
|
70
|
+
splitted.forEach((_, i) => monomerTypeList.push(i % 3 === 1 ? HELM_MONOMER_TYPE.BRANCH : HELM_MONOMER_TYPE.BACKBONE));
|
|
71
|
+
}
|
|
63
72
|
});
|
|
64
73
|
return {monomers: monomerList, monomerTypes: monomerTypeList};
|
|
65
74
|
}
|
|
@@ -177,13 +177,13 @@ export class MonomerLibBase implements IMonomerLibBase {
|
|
|
177
177
|
// Symbol & Name
|
|
178
178
|
const symbol = monomer[REQ.SYMBOL];
|
|
179
179
|
const _name = monomer[REQ.NAME];
|
|
180
|
-
|
|
181
|
-
|
|
180
|
+
const [color, backgroundColor, lineColor] =
|
|
181
|
+
wem ? [wem.textcolor, wem.backgroundcolor, wem.linecolor] : ['#202020', '#A0A0A0', '#202020'];
|
|
182
182
|
res.append(ui.divH([
|
|
183
183
|
ui.div([symbol], {
|
|
184
184
|
style: {
|
|
185
185
|
/* fontWeight: 'bolder', */ textWrap: 'nowrap', marginLeft: '4px', marginRight: '4px',
|
|
186
|
-
color:
|
|
186
|
+
color: color, backgroundColor: backgroundColor, borderColor: lineColor,
|
|
187
187
|
borderWidth: '1px', borderStyle: 'solid', borderRadius: '2px', padding: '3px',
|
|
188
188
|
minWidth: '24px', textAlign: 'center',
|
|
189
189
|
}
|
|
@@ -192,7 +192,7 @@ export class MonomerLibBase implements IMonomerLibBase {
|
|
|
192
192
|
], {style: {display: 'flex', flexDirection: 'row', justifyContent: 'left'}}));
|
|
193
193
|
|
|
194
194
|
// Structure
|
|
195
|
-
const chemOptions = {autoCrop: true, autoCropMargin: 0, suppressChiralText: true};
|
|
195
|
+
//const chemOptions = {autoCrop: true, autoCropMargin: 0, suppressChiralText: true};
|
|
196
196
|
let structureEl: HTMLElement;
|
|
197
197
|
if (monomer.molfile)
|
|
198
198
|
structureEl = drawMoleculeCall(monomer.molfile);
|
|
@@ -605,12 +605,12 @@ export class SeqHandler implements ISeqHandler {
|
|
|
605
605
|
|
|
606
606
|
const isNucleotide = srcSeq.startsWith('RNA');
|
|
607
607
|
// items can be monomers or helms
|
|
608
|
-
const helmItemsArray =
|
|
608
|
+
const helmItemsArray = splitterAsHelm(srcSeq);
|
|
609
609
|
const tgtMonomersArray: string[] = [];
|
|
610
610
|
for (let posIdx = 0; posIdx < helmItemsArray.length; ++posIdx) {
|
|
611
611
|
let om: string = helmItemsArray.getOriginal(posIdx);
|
|
612
612
|
if (isNucleotide)
|
|
613
|
-
om = om.replace(HELM_WRAPPERS_REGEXP, '');
|
|
613
|
+
om = om.replace(HELM_WRAPPERS_REGEXP, '$1');
|
|
614
614
|
if (om === GapOriginals[NOTATION.HELM])
|
|
615
615
|
tgtMonomersArray.push(tgtGapOriginal);
|
|
616
616
|
else if (this.toFasta(tgtNotation as NOTATION) && om.length > 1) {
|
|
@@ -9,9 +9,8 @@ import {RDModule, RDMol} from '@datagrok-libraries/chem-meta/src/rdkit-api';
|
|
|
9
9
|
import {IMonomerLibHelper} from '@datagrok-libraries/bio/src/monomer-works/monomer-utils';
|
|
10
10
|
import {getHelmHelper, IHelmHelper} from '@datagrok-libraries/bio/src/helm/helm-helper';
|
|
11
11
|
import {MolfileWithMap} from '@datagrok-libraries/bio/src/monomer-works/types';
|
|
12
|
-
import {getMolColName
|
|
12
|
+
import {getMolColName} from '@datagrok-libraries/bio/src/monomer-works/utils';
|
|
13
13
|
import {ChemTags} from '@datagrok-libraries/chem-meta/src/consts';
|
|
14
|
-
import {getMolHighlight} from '@datagrok-libraries/bio/src/monomer-works/seq-to-molfile';
|
|
15
14
|
import {IMonomerLibBase} from '@datagrok-libraries/bio/src/types/index';
|
|
16
15
|
|
|
17
16
|
import {HelmToMolfileConverter} from '../helm-to-molfile/converter';
|
|
@@ -19,6 +18,8 @@ import {ISeqHandler} from '@datagrok-libraries/bio/src/utils/macromolecule/seq-h
|
|
|
19
18
|
import {SeqHandler} from './seq-handler';
|
|
20
19
|
import {Column} from 'datagrok-api/dg';
|
|
21
20
|
import {NOTATION, TAGS} from '@datagrok-libraries/bio/src/utils/macromolecule';
|
|
21
|
+
import {errInfo} from '@datagrok-libraries/bio/src/utils/err-info';
|
|
22
|
+
import {_package} from '../../package';
|
|
22
23
|
|
|
23
24
|
type SeqHelperWindowType = Window & { $seqHelperPromise?: Promise<SeqHelper> };
|
|
24
25
|
declare const window: SeqHelperWindowType;
|
|
@@ -44,6 +45,37 @@ export class SeqHelper implements ISeqHelper {
|
|
|
44
45
|
return new HelmToMolfileConverter(helmHelper, this.rdKitModule, monomerLib);
|
|
45
46
|
}
|
|
46
47
|
|
|
48
|
+
helmToAtomicLevelSingle(
|
|
49
|
+
helm: string, converter: HelmToMolfileConverter, chiralityEngine?: boolean, beautifyMol: boolean = true) {
|
|
50
|
+
if (!helm)
|
|
51
|
+
return MolfileWithMap.createEmpty();
|
|
52
|
+
const molfileV3k = converter.convertToMolfileV3K([helm])[0];
|
|
53
|
+
if (!molfileV3k || !molfileV3k.molfile)
|
|
54
|
+
return MolfileWithMap.createEmpty();
|
|
55
|
+
let mol: RDMol | null = null;
|
|
56
|
+
try {
|
|
57
|
+
let v3k = molfileV3k.molfile;
|
|
58
|
+
if (beautifyMol) {
|
|
59
|
+
mol = this.rdKitModule.get_mol(v3k);
|
|
60
|
+
if (!mol)
|
|
61
|
+
return MolfileWithMap.createEmpty();
|
|
62
|
+
mol.set_new_coords();
|
|
63
|
+
mol.normalize_depiction(1);
|
|
64
|
+
mol.straighten_depiction(true);
|
|
65
|
+
v3k = mol.get_v3Kmolblock();
|
|
66
|
+
}
|
|
67
|
+
if (chiralityEngine)
|
|
68
|
+
v3k = converter.molV3KtoMolV3KOCL(v3k);
|
|
69
|
+
return new MolfileWithMap(v3k, molfileV3k.monomers);
|
|
70
|
+
} catch (err) {
|
|
71
|
+
const [errMsg, errStack] = errInfo(err);
|
|
72
|
+
_package.logger.error(errMsg, undefined, errStack);
|
|
73
|
+
return MolfileWithMap.createEmpty();
|
|
74
|
+
} finally {
|
|
75
|
+
mol?.delete();
|
|
76
|
+
}
|
|
77
|
+
}
|
|
78
|
+
|
|
47
79
|
async helmToAtomicLevel(
|
|
48
80
|
helmCol: DG.Column<string>, chiralityEngine?: boolean, highlight?: boolean, overrideMonomerLib?: IMonomerLibBase
|
|
49
81
|
): Promise<ToAtomicLevelRes> {
|
|
@@ -54,38 +86,42 @@ export class SeqHelper implements ISeqHelper {
|
|
|
54
86
|
|
|
55
87
|
const converter = await this.getHelmToMolfileConverter(monomerLib);
|
|
56
88
|
|
|
57
|
-
//#region From HelmToMolfileConverter.convertToRdKitBeautifiedMolfileColumn
|
|
58
|
-
|
|
59
|
-
const molfilesV3K = converter.convertToMolfileV3K(helmCol.toList());
|
|
60
|
-
|
|
61
|
-
const beautifiedMolList: (RDMol | null)[] = molfilesV3K.map((item) => {
|
|
62
|
-
|
|
63
|
-
|
|
64
|
-
|
|
65
|
-
|
|
66
|
-
|
|
67
|
-
|
|
68
|
-
|
|
69
|
-
|
|
70
|
-
|
|
71
|
-
|
|
72
|
-
});
|
|
73
|
-
|
|
74
|
-
let molList: string[];
|
|
75
|
-
if (chiralityEngine)
|
|
76
|
-
|
|
77
|
-
|
|
78
|
-
|
|
79
|
-
|
|
80
|
-
|
|
81
|
-
|
|
82
|
-
|
|
83
|
-
|
|
84
|
-
|
|
85
|
-
|
|
86
|
-
}
|
|
89
|
+
// //#region From HelmToMolfileConverter.convertToRdKitBeautifiedMolfileColumn
|
|
90
|
+
|
|
91
|
+
// const molfilesV3K = converter.convertToMolfileV3K(helmCol.toList());
|
|
92
|
+
|
|
93
|
+
// const beautifiedMolList: (RDMol | null)[] = molfilesV3K.map((item) => {
|
|
94
|
+
// const molfile = item.molfile;
|
|
95
|
+
// if (molfile === '')
|
|
96
|
+
// return null;
|
|
97
|
+
// const mol = this.rdKitModule.get_mol(molfile);
|
|
98
|
+
// if (!mol)
|
|
99
|
+
// return null;
|
|
100
|
+
// mol.set_new_coords();
|
|
101
|
+
// mol.normalize_depiction(1);
|
|
102
|
+
// mol.straighten_depiction(true);
|
|
103
|
+
// return mol;
|
|
104
|
+
// });
|
|
105
|
+
|
|
106
|
+
// let molList: string[];
|
|
107
|
+
// if (chiralityEngine)// also creates progress indicator
|
|
108
|
+
// molList = converter.getMolV3000ViaOCL(beautifiedMolList, molColName).toList();
|
|
109
|
+
// // TODO: Cleanup mol objects
|
|
110
|
+
// else {
|
|
111
|
+
// molList = beautifiedMolList.map((mol) => {
|
|
112
|
+
// if (mol === null)
|
|
113
|
+
// return '';
|
|
114
|
+
// const molBlock = mol.get_v3Kmolblock();
|
|
115
|
+
// mol!.delete();
|
|
116
|
+
// return molBlock;
|
|
117
|
+
// });
|
|
118
|
+
// }
|
|
87
119
|
|
|
88
120
|
//#endregion From HelmToMolfileConverter
|
|
121
|
+
const helmList = helmCol.toList();
|
|
122
|
+
const molList = new Array<string>(helmCol.length);
|
|
123
|
+
for (let i = 0; i < helmCol.length; i++)
|
|
124
|
+
molList[i] = (await this.helmToAtomicLevelSingle(helmList[i], converter, chiralityEngine)).molfile;
|
|
89
125
|
|
|
90
126
|
//const molHlList = molfilesV3K.map((item: MolfileWithMap) => getMolHighlight(item.monomers.values(), monomerLib));
|
|
91
127
|
|
|
@@ -53,7 +53,7 @@ export async function sequenceToMolfile(
|
|
|
53
53
|
res.molCol.name = molColName;
|
|
54
54
|
df.columns.add(res.molCol, true);
|
|
55
55
|
|
|
56
|
-
buildMonomerHoverLink(macroMolecule, res.molCol, monomerLib, seqHelper, rdKitModule);
|
|
56
|
+
await buildMonomerHoverLink(macroMolecule, res.molCol, monomerLib, seqHelper, rdKitModule, nonlinear);
|
|
57
57
|
res.molCol.setTag(ChemTags.SEQUENCE_SRC_HL_MONOMERS, String(highlight));
|
|
58
58
|
await grok.data.detectSemanticTypes(df);
|
|
59
59
|
}
|