@datagrok/bio 2.18.3 → 2.19.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -5,41 +5,41 @@ Datagrok macromolecule handler
5
5
  M V30 BEGIN CTAB
6
6
  M V30 COUNTS 17 16 0 0 0
7
7
  M V30 BEGIN ATOM
8
- M V30 1 C 0.7144 -1.2375 0.000000 0
9
- M V30 2 C 0.7144 -0.4125 0.000000 0
8
+ M V30 1 C 1.2991 -2.25 0.000000 0
9
+ M V30 2 C 1.2991 -0.75 0.000000 0
10
10
  M V30 3 N 0 0 0.000000 0
11
- M V30 4 C 1.4289 0 0.000000 0
12
- M V30 5 O 2.1434 -0.4126 0.000000 0
13
- M V30 6 N 4.2867 4.9501 0.000000 0
14
- M V30 7 C 3.5722 4.5376 0.000000 0
15
- M V30 8 N 2.8578 4.95 0.000000 0
16
- M V30 9 N 3.5723 3.7126 0.000000 0
17
- M V30 10 C 2.8578 3.3001 0.000000 0
18
- M V30 11 C 2.8578 2.475 0.000000 0
19
- M V30 12 C 2.1433 2.0626 0.000000 0
20
- M V30 13 C 2.1433 1.2375 0.000000 0
21
- M V30 14 N 1.4289 0.825 0.000000 0
22
- M V30 15 C 2.8578 0.8251 0.000000 0
23
- M V30 16 O 3.5723 1.2376 0.000000 0
24
- M V30 17 O 2.8578 0 0.000000 0
11
+ M V30 4 C 2.5981 0 0.000000 0
12
+ M V30 5 O 3.8971 -0.75 0.000000 0
13
+ M V30 6 N 7.7941 9 0.000000 0
14
+ M V30 7 C 6.4951 8.2501 0.000000 0
15
+ M V30 8 N 5.196 9 0.000000 0
16
+ M V30 9 N 6.4951 6.7501 0.000000 0
17
+ M V30 10 C 5.1961 6 0.000000 0
18
+ M V30 11 C 5.1961 4.5 0.000000 0
19
+ M V30 12 C 3.8971 3.75 0.000000 0
20
+ M V30 13 C 3.8971 2.25 0.000000 0
21
+ M V30 14 N 2.5981 1.5 0.000000 0
22
+ M V30 15 C 5.1962 1.5 0.000000 0
23
+ M V30 16 O 6.4952 2.25 0.000000 0
24
+ M V30 17 O 5.1962 -0.0001 0.000000 0
25
25
  M V30 END ATOM
26
26
  M V30 BEGIN BOND
27
- M V30 1 1 2 1 CFG=3
28
- M V30 2 1 2 3
27
+ M V30 1 1 2 1 CFG=3
28
+ M V30 2 1 2 3
29
29
  M V30 3 1 2 4
30
- M V30 4 2 4 5
30
+ M V30 4 2 4 5
31
31
  M V30 5 1 4 14
32
- M V30 6 1 6 7
33
- M V30 7 2 7 8
34
- M V30 8 1 7 9
35
- M V30 9 1 9 10
36
- M V30 10 1 10 11
37
- M V30 11 1 11 12
38
- M V30 12 1 13 12 CFG=1
39
- M V30 13 1 13 14
32
+ M V30 6 2 6 7
33
+ M V30 7 1 7 8
34
+ M V30 8 1 7 9
35
+ M V30 9 1 9 10
36
+ M V30 10 1 10 11
37
+ M V30 11 1 11 12
38
+ M V30 12 1 13 12 CFG=1
39
+ M V30 13 1 13 14
40
40
  M V30 14 1 13 15
41
- M V30 15 2 15 16
41
+ M V30 15 2 15 16
42
42
  M V30 16 1 15 17
43
43
  M V30 END BOND
44
44
  M V30 END CTAB
45
- M END
45
+ M END
package/package.json CHANGED
@@ -5,7 +5,7 @@
5
5
  "name": "Leonid Stolbov",
6
6
  "email": "lstolbov@datagrok.ai"
7
7
  },
8
- "version": "2.18.3",
8
+ "version": "2.19.0",
9
9
  "description": "Bioinformatics support (import/export of sequences, conversion, visualization, analysis). [See more](https://github.com/datagrok-ai/public/blob/master/packages/Bio/README.md) for details.",
10
10
  "repository": {
11
11
  "type": "git",
@@ -44,13 +44,13 @@
44
44
  ],
45
45
  "dependencies": {
46
46
  "@biowasm/aioli": "^3.1.0",
47
- "@datagrok-libraries/bio": "^5.48.1",
47
+ "@datagrok-libraries/bio": "^5.50.1",
48
48
  "@datagrok-libraries/chem-meta": "^1.2.7",
49
49
  "@datagrok-libraries/math": "^1.2.4",
50
50
  "@datagrok-libraries/ml": "^6.7.6",
51
51
  "@datagrok-libraries/tutorials": "^1.4.3",
52
52
  "@datagrok-libraries/utils": "^4.4.0",
53
- "datagrok-api": "^1.23.0",
53
+ "datagrok-api": "^1.24.0",
54
54
  "@webgpu/types": "^0.1.40",
55
55
  "ajv": "^8.12.0",
56
56
  "ajv-errors": "^3.0.0",
@@ -70,7 +70,7 @@ export class SequenceSearchBaseViewer extends DG.JsViewer {
70
70
  this.render();
71
71
  }
72
72
 
73
- /** For tests */ public computeRequested: boolean;
73
+ /** For tests */ public computeRequested: boolean = false;
74
74
  public renderPromise: Promise<void> = Promise.resolve();
75
75
 
76
76
  protected render(computeData = true): void {
package/src/package.ts CHANGED
@@ -422,7 +422,7 @@ export function getRegion(
422
422
  start ?? null, end ?? null, name ?? null);
423
423
  }
424
424
 
425
- //top-menu: Bio | Convert | Get Region...
425
+ //top-menu: Bio | Calculate | Get Region...
426
426
  //name: Get Region Top Menu
427
427
  //description: Get sequences for a region specified from a Macromolecule
428
428
  //input: dataframe table [Input data table]
@@ -607,14 +607,13 @@ export async function sequenceSpaceTopMenu(table: DG.DataFrame, molecules: DG.Co
607
607
  return res;
608
608
  }
609
609
 
610
- //top-menu: Bio | Convert | To Atomic Level...
610
+ //top-menu: Bio | Transform | To Atomic Level...
611
611
  //name: To Atomic Level
612
612
  //description: Converts sequences to molblocks
613
613
  //input: dataframe table [Input data table]
614
614
  //input: column seqCol {caption: Sequence; semType: Macromolecule}
615
615
  //input: bool nonlinear =false {caption: Non-linear; description: Slower mode for cycling/branching HELM structures}
616
616
  //input: bool highlight =false {caption: Highlight monomers; description: Highlight monomers' substructures of the molecule }
617
- //output:
618
617
  export async function toAtomicLevel(
619
618
  table: DG.DataFrame, seqCol: DG.Column, nonlinear: boolean, highlight: boolean = false
620
619
  ): Promise<void> {
@@ -630,6 +629,17 @@ export async function toAtomicLevel(
630
629
  }
631
630
  }
632
631
 
632
+ //name: To Atomic Level...
633
+ //input: column seqCol {semType: Macromolecule}
634
+ //meta.action: to atomic level
635
+ export async function toAtomicLevelAction(seqCol: DG.Column) {
636
+ if (!seqCol?.dataFrame)
637
+ throw new Error('Sequence column is not found or its data frame is not empty');
638
+ const func = DG.Func.find({name: 'toAtomicLevel', package: 'Bio'})[0];
639
+ if (!func) throw new Error('To Atomic level Function not found');
640
+ func.prepare({table: seqCol.dataFrame, seqCol: seqCol}).edit();
641
+ }
642
+
633
643
  //top-menu: Bio | Analyze | MSA...
634
644
  //name: MSA
635
645
  //description: Performs multiple sequence alignment
@@ -743,38 +753,20 @@ export function importBam(fileContent: string): DG.DataFrame [] {
743
753
  return [];
744
754
  }
745
755
 
746
- //top-menu: Bio | Convert | Notation...
756
+ //top-menu: Bio | Transform | Convert Notation...
747
757
  //name: convertDialog
748
758
  export function convertDialog() {
749
759
  const col: DG.Column<string> | undefined = getMacromoleculeColumns()[0];
750
760
  convert(col, _package.seqHelper);
751
761
  }
752
762
 
753
- //top-menu: Bio | Convert | TestConvert
754
- //name: convertSeqNotation
755
- //description: RDKit-based conversion for SMILES, SMARTS, InChi, Molfile V2000 and Molfile V3000
756
- //input: string sequence {semType: Macromolecule}
757
- //input: string targetNotation
758
- //input: string separator
759
- //output: string result
760
- export async function convertSeqNotation(sequence: string, targetNotation: NOTATION, separator?: string): Promise<string | undefined | null> {
761
- try {
762
- const col = DG.Column.fromStrings('sequence', [sequence]);
763
- const _df = DG.DataFrame.fromColumns([col]);
764
- const semType = await grok.functions.call('Bio:detectMacromolecule', {col: col});
765
- if (semType)
766
- col.semType = semType;
767
- const converterSh = _package.seqHelper.getSeqHandler(col);
768
- const newColumn = converterSh.convert(targetNotation, separator);
769
- return newColumn.get(0);
770
- } catch (err: any) {
771
- const [errMsg, errStack] = errInfo(err);
772
- _package.logger.error(errMsg, undefined, errStack);
773
- throw err;
774
- }
763
+ //name: Convert Notation...
764
+ //input: column col {semType: Macromolecule}
765
+ //meta.action: Convert Notation...
766
+ export function convertColumnAction(col: DG.Column) {
767
+ convert(col, _package.seqHelper);
775
768
  }
776
769
 
777
-
778
770
  //name: monomerCellRenderer
779
771
  //tags: cellRenderer
780
772
  //meta.cellType: Monomer
@@ -835,7 +827,7 @@ export async function testDetectMacromolecule(path: string): Promise<DG.DataFram
835
827
  return resDf;
836
828
  }
837
829
 
838
- //top-menu: Bio | Convert | Split to Monomers...
830
+ //top-menu: Bio | Transform | Split to Monomers...
839
831
  //name: Split to Monomers
840
832
  //input: dataframe table
841
833
  //input: column sequence { semType: Macromolecule }
@@ -979,7 +971,8 @@ export async function manageLibrariesApp(): Promise<DG.View> {
979
971
  export async function manageLibrariesAppTreeBrowser(treeNode: DG.TreeViewGroup, browseView: DG.BrowseView) {
980
972
  const libraries = (await (await MonomerLibManager.getInstance()).getFileManager()).getValidLibraryPaths();
981
973
  libraries.forEach((libName) => {
982
- const libNode = treeNode.item(libName);
974
+ const nodeName = libName.endsWith('.json') ? libName.substring(0, libName.length - 5) : libName;
975
+ const libNode = treeNode.item(nodeName);
983
976
  // eslint-disable-next-line rxjs/no-ignored-subscription, rxjs/no-async-subscribe
984
977
  libNode.onSelected.subscribe(async () => {
985
978
  const monomerManager = await MonomerManager.getNewInstance();
@@ -21,8 +21,8 @@ category('Scoring', () => {
21
21
  /* eslint-disable max-len */
22
22
  const table = DG.DataFrame.fromCsv(`${sequence},${expectedSimilarity},${expectedIdentity}
23
23
  PEPTIDE1{Aca.Orn.gGlu.Pqa.D-His_1Bn.dH.hHis.4Abz.D-Tic.D-Dap.Y.Iva.meS.F.P.F.D-1Nal}$$$$,1.0,1.0
24
- PEPTIDE1{Iva.Gly_allyl.gGlu.Pqa.D-Dip.dH.hHis.4Abz.D-aHyp.D-Dap.Y.Iva.I.Tyr_26diMe.P.Asu.meC}$$$$,0.68,0.53
25
- PEPTIDE1{[1Nal].[1Nal].[1Nal].[1Nal].[1Nal].[1Nal].[1Nal].[1Nal].[1Nal].[1Nal].[1Nal].[1Nal].[1Nal].[1Nal].[1Nal].[1Nal].[1Nal]}$$$$V2.0,0.34,0.0`
24
+ PEPTIDE1{Iva.Gly_allyl.gGlu.Pqa.D-Dip.dH.hHis.4Abz.D-aHyp.D-Dap.Y.Iva.I.Tyr_26diMe.P.Asu.meC}$$$$,0.691,0.53
25
+ PEPTIDE1{[1Nal].[1Nal].[1Nal].[1Nal].[1Nal].[1Nal].[1Nal].[1Nal].[1Nal].[1Nal].[1Nal].[1Nal].[1Nal].[1Nal].[1Nal].[1Nal].[1Nal]}$$$$V2.0,0.37,0.0`
26
26
  );
27
27
  const seqCol: DG.Column<string> = table.getCol(sequence);
28
28
  seqCol.meta.units = NOTATION.HELM;
@@ -1,3 +1,4 @@
1
+ /* eslint-disable max-lines-per-function */
1
2
  import * as grok from 'datagrok-api/grok';
2
3
  import * as DG from 'datagrok-api/dg';
3
4
 
@@ -68,7 +69,7 @@ PEPTIDE1{N.T.[dE].[Thr_PO3H2]}$$$$
68
69
  PEPTIDE1{[Cys_SEt].T.[dK].[Thr_PO3H2]}$$$$
69
70
  PEPTIDE1{[Cys_SEt].T.*.*}$$$$`,
70
71
  units: NOTATION.HELM,
71
- alphabet: null,
72
+ alphabet: ALPHABET.UN,
72
73
 
73
74
  positionNames: {tag: null, start: '4', end: '7'}
74
75
  }
@@ -4,9 +4,8 @@ import * as OCL from 'openchemlib/full';
4
4
 
5
5
  import {errInfo} from '@datagrok-libraries/bio/src/utils/err-info';
6
6
  import {RDModule, RDMol} from '@datagrok-libraries/chem-meta/src/rdkit-api';
7
- import {IMonomerLib, IMonomerLibBase} from '@datagrok-libraries/bio/src/types/index';
8
- import {IMonomerLibHelper} from '@datagrok-libraries/bio/src/monomer-works/monomer-utils';
9
- import {getHelmHelper, IHelmHelper} from '@datagrok-libraries/bio/src/helm/helm-helper';
7
+ import {IMonomerLibBase} from '@datagrok-libraries/bio/src/types/index';
8
+ import {IHelmHelper} from '@datagrok-libraries/bio/src/helm/helm-helper';
10
9
  import {MolfileWithMap, MonomerMap} from '@datagrok-libraries/bio/src/monomer-works/types';
11
10
 
12
11
  import {Polymer} from './polymer';
@@ -14,9 +13,10 @@ import {GlobalMonomerPositionHandler} from './position-handler';
14
13
 
15
14
  import {_package} from '../../../package';
16
15
  import {getUnusedColName} from '@datagrok-libraries/bio/src/monomer-works/utils';
16
+ import {IHelmToMolfileConverter} from '@datagrok-libraries/bio/src/utils/seq-helper';
17
17
 
18
18
 
19
- export class HelmToMolfileConverter {
19
+ export class HelmToMolfileConverter implements IHelmToMolfileConverter {
20
20
  constructor(
21
21
  private readonly helmHelper: IHelmHelper,
22
22
  private readonly rdKitModule: RDModule,
@@ -41,6 +41,20 @@ export class HelmToMolfileConverter {
41
41
  return smiles;
42
42
  }
43
43
 
44
+ public molV3KtoMolV3KOCL(molV3k: string): string {
45
+ try {
46
+ if (!molV3k)
47
+ return '';
48
+ const oclMolecule = OCL.Molecule.fromMolfile(molV3k);
49
+ const molV3000 = oclMolecule.toMolfileV3();
50
+ return molV3000.replace('STERAC1', 'STEABS');
51
+ } catch (err) {
52
+ const [errMsg, errStack] = errInfo(err);
53
+ _package.logger.error(errMsg, undefined, errStack);
54
+ return '';
55
+ }
56
+ }
57
+
44
58
  public getMolV3000ViaOCL(beautifiedMols: (RDMol | null)[], columnName: string): DG.Column<string> {
45
59
  const beautifiedMolV2000 = beautifiedMols.map((mol) => {
46
60
  if (mol === null)
@@ -52,9 +66,7 @@ export class HelmToMolfileConverter {
52
66
  const molv3000Arr = new Array<string>(beautifiedMolV2000.length);
53
67
  const chiralityPb = DG.TaskBarProgressIndicator.create(`Handling chirality...`);
54
68
  for (let i = 0; i < beautifiedMolV2000.length; i++) {
55
- const oclMolecule = OCL.Molecule.fromMolfile(beautifiedMolV2000[i]);
56
- const molV3000 = oclMolecule.toMolfileV3();
57
- molv3000Arr[i] = molV3000.replace('STERAC1', 'STEABS');
69
+ molv3000Arr[i] = this.molV3KtoMolV3KOCL(beautifiedMolV2000[i]);
58
70
  const progress = i / beautifiedMolV2000.length * 100;
59
71
  chiralityPb.update(progress, `${progress?.toFixed(2)}% of molecules completed`);
60
72
  }
@@ -136,9 +148,9 @@ export class HelmToMolfileConverter {
136
148
  const woGapsRes = this.helmHelper.removeGaps(helm);
137
149
  const woGapsHelm = woGapsRes.resHelm;
138
150
  const woGapsReverseMap = new Map<number, number>();
139
- for (const [orgPosIdx, woGapsPosIdx] of (woGapsRes.monomerMap?.entries() ?? [])) {
151
+ for (const [orgPosIdx, woGapsPosIdx] of (woGapsRes.monomerMap?.entries() ?? []))
140
152
  woGapsReverseMap.set(woGapsPosIdx, orgPosIdx);
141
- }
153
+
142
154
  const pseudoMolfile = this.helmHelper.getMolfiles([woGapsHelm])[0];
143
155
  const globalPositionHandler = new GlobalMonomerPositionHandler(pseudoMolfile);
144
156
  const woGapsPolymer = new Polymer(woGapsHelm, this.rdKitModule, this.monomerLib);
@@ -1,3 +1,4 @@
1
+ /* eslint-disable max-len */
1
2
  import {HELM_MONOMER_TYPE, HELM_POLYMER_TYPE} from '@datagrok-libraries/bio/src/utils/const';
2
3
  import {cleanupHelmSymbol} from '@datagrok-libraries/bio/src/helm/utils';
3
4
 
@@ -5,8 +6,10 @@ import {Bond} from './types';
5
6
 
6
7
  /** Wrapper over simple polymer substring of HELM, like RNA1{d(A)p} */
7
8
  export class SimplePolymer {
9
+ private isNucleotideSequence = false;
8
10
  constructor(private simplePolymer: string) {
9
11
  this.polymerType = this.getPolymerType();
12
+ this.isNucleotideSequence = this.polymerType === HELM_POLYMER_TYPE.RNA;
10
13
  this.idx = this.getIdx();
11
14
  const {monomers, monomerTypes} = this.getMonomerSymbolsAndTypes();
12
15
  this.monomers = monomers;
@@ -49,17 +52,23 @@ export class SimplePolymer {
49
52
  const monomerList: string[] = [];
50
53
  const monomerTypeList: HELM_MONOMER_TYPE[] = [];
51
54
  monomerGroups.forEach((monomerGroup) => {
52
- // const splitted = monomerGroup.split(/\(|\)/).map((el) => el.replace(/[\[\]]/g, ''));
53
- // monomerList.push(...splitted);
54
- // WARNING: only the groups of the form r(A)p, as in RNA, are supported
55
+ if (!this.isNucleotideSequence) {
56
+ // const splitted = monomerGroup.split(/\(|\)/).map((el) => el.replace(/[\[\]]/g, ''));
57
+ // monomerList.push(...splitted);
58
+ // WARNING: only the groups of the form r(A)p, as in RNA, are supported
55
59
 
56
- monomerList.push(cleanupHelmSymbol(monomerGroup));
57
- // const monomerTypes = splitted.map(
58
- // (_, idx) => (idx % 2 === 0) ? HELM_MONOMER_TYPE.BACKBONE : HELM_MONOMER_TYPE.BRANCH
59
- // );
60
+ monomerList.push(cleanupHelmSymbol(monomerGroup));
61
+ // const monomerTypes = splitted.map(
62
+ // (_, idx) => (idx % 2 === 0) ? HELM_MONOMER_TYPE.BACKBONE : HELM_MONOMER_TYPE.BRANCH
63
+ // );
60
64
 
61
- // monomerTypeList.push(...monomerTypes);
62
- monomerTypeList.push(HELM_MONOMER_TYPE.BACKBONE);
65
+ // monomerTypeList.push(...monomerTypes);
66
+ monomerTypeList.push(HELM_MONOMER_TYPE.BACKBONE);
67
+ } else {
68
+ const splitted = monomerGroup.split(/\(|\)/).filter((el) => !!el).map((el) => cleanupHelmSymbol(el));
69
+ monomerList.push(...splitted);
70
+ splitted.forEach((_, i) => monomerTypeList.push(i % 3 === 1 ? HELM_MONOMER_TYPE.BRANCH : HELM_MONOMER_TYPE.BACKBONE));
71
+ }
63
72
  });
64
73
  return {monomers: monomerList, monomerTypes: monomerTypeList};
65
74
  }
@@ -177,13 +177,13 @@ export class MonomerLibBase implements IMonomerLibBase {
177
177
  // Symbol & Name
178
178
  const symbol = monomer[REQ.SYMBOL];
179
179
  const _name = monomer[REQ.NAME];
180
-
181
- const htmlColor = wem.backgroundcolor;
180
+ const [color, backgroundColor, lineColor] =
181
+ wem ? [wem.textcolor, wem.backgroundcolor, wem.linecolor] : ['#202020', '#A0A0A0', '#202020'];
182
182
  res.append(ui.divH([
183
183
  ui.div([symbol], {
184
184
  style: {
185
185
  /* fontWeight: 'bolder', */ textWrap: 'nowrap', marginLeft: '4px', marginRight: '4px',
186
- color: wem.textcolor, backgroundColor: wem.backgroundcolor, borderColor: wem.linecolor,
186
+ color: color, backgroundColor: backgroundColor, borderColor: lineColor,
187
187
  borderWidth: '1px', borderStyle: 'solid', borderRadius: '2px', padding: '3px',
188
188
  minWidth: '24px', textAlign: 'center',
189
189
  }
@@ -192,7 +192,7 @@ export class MonomerLibBase implements IMonomerLibBase {
192
192
  ], {style: {display: 'flex', flexDirection: 'row', justifyContent: 'left'}}));
193
193
 
194
194
  // Structure
195
- const chemOptions = {autoCrop: true, autoCropMargin: 0, suppressChiralText: true};
195
+ //const chemOptions = {autoCrop: true, autoCropMargin: 0, suppressChiralText: true};
196
196
  let structureEl: HTMLElement;
197
197
  if (monomer.molfile)
198
198
  structureEl = drawMoleculeCall(monomer.molfile);
@@ -1028,6 +1028,7 @@ function getCorrectedMolBlock(molBlock: string) {
1028
1028
  // 2. RGP field is present in the correct format
1029
1029
  // 3. R group labels are written as R# and not just R
1030
1030
  // 4. there is no ISO field in the molblock. if there is, it needs to be substituted with RGP field and thats it.
1031
+ // 5. make sure that R groups have no metadata in the atomblocks
1031
1032
 
1032
1033
  const lines = molBlock.split('\n');
1033
1034
 
@@ -1055,7 +1056,7 @@ function getCorrectedMolBlock(molBlock: string) {
1055
1056
  rgroupLineNumbers[atomI - molStartIdx] = rgroupNum;
1056
1057
  }
1057
1058
 
1058
- const rgroupLineNums = Object.values(rgroupLineNumbers);
1059
+ const rgroupLineNums = Object.keys(rgroupLineNumbers);
1059
1060
  // find and possibly add rgp field
1060
1061
 
1061
1062
  const rgpLineIdx = lines.findIndex((line) => line.startsWith('M') && line.includes('RGP'));
@@ -1066,6 +1067,23 @@ function getCorrectedMolBlock(molBlock: string) {
1066
1067
  const mEndIdx = lines.findIndex((line) => line.startsWith('M') && line.includes('END'));
1067
1068
  lines.splice(mEndIdx, 0, rgpLine);
1068
1069
  }
1070
+
1071
+ //make sure that R# lines do not have any metadata that can be interpreted as isotopes or anything else
1072
+ //for example, following line could be interpreted as isotope with mass 2 in some cases
1073
+ //" 3.9970 0.3462 0.0000 R# 0 0 0 0 0 1 0 0 0 0 2 0"
1074
+ const rGroupActualLines = rgroupLineNums.filter((rLine) => !!Number.parseInt(rLine)).map((atomLine) => Number.parseInt(atomLine) + molStartIdx);
1075
+ rGroupActualLines.forEach((lineIdx) => {
1076
+ const splitLine = lines[lineIdx].split(' ');
1077
+ const rIdx = splitLine.findIndex((s) => s === 'R#');
1078
+ if (rIdx === -1)
1079
+ return;
1080
+ for (let i = rIdx + 1; i < splitLine.length; i++) {
1081
+ if (!!splitLine[i] && splitLine[i].length == 1 && (Number.parseInt(splitLine[i]) ?? 0) > 0)
1082
+ splitLine[i] = '0';
1083
+ }
1084
+ lines[lineIdx] = splitLine.join(' ');
1085
+ });
1086
+
1069
1087
  return lines.join('\n');
1070
1088
  }
1071
1089
 
@@ -1,3 +1,5 @@
1
+ /* eslint-disable max-lines */
2
+ /* eslint-disable @typescript-eslint/no-unused-vars */
1
3
  import * as DG from 'datagrok-api/dg';
2
4
 
3
5
  import wu from 'wu';
@@ -5,7 +7,7 @@ import wu from 'wu';
5
7
  /* eslint-disable max-len */
6
8
  import {ALIGNMENT, ALPHABET, candidateAlphabets, getSplitterWithSeparator, NOTATION, positionSeparator, splitterAsFasta, splitterAsHelm, TAGS} from '@datagrok-libraries/bio/src/utils/macromolecule/index';
7
9
  import {INotationProvider, ISeqSplitted, SeqColStats, SplitterFunc,} from '@datagrok-libraries/bio/src/utils/macromolecule/types';
8
- import {detectAlphabet, splitterAsFastaSimple, StringListSeqSplitted} from '@datagrok-libraries/bio/src/utils/macromolecule/utils';
10
+ import {detectAlphabet, detectHelmAlphabet, splitterAsFastaSimple, StringListSeqSplitted} from '@datagrok-libraries/bio/src/utils/macromolecule/utils';
9
11
  import {mmDistanceFunctions, MmDistanceFunctionsNames} from '@datagrok-libraries/ml/src/macromolecule-distance-functions';
10
12
  import {mmDistanceFunctionType} from '@datagrok-libraries/ml/src/macromolecule-distance-functions/types';
11
13
  import {getMonomerLibHelper, IMonomerLibHelper} from '@datagrok-libraries/bio/src/monomer-works/monomer-utils';
@@ -123,6 +125,12 @@ export class SeqHandler implements ISeqHandler {
123
125
  uh.column.setTag(TAGS.alphabetSize, alphabetSize.toString());
124
126
  uh.column.setTag(TAGS.alphabetIsMultichar, alphabetIsMultichar ? 'true' : 'false');
125
127
  }
128
+ } else if (units === NOTATION.HELM) {
129
+ let alphabet = uh.column.getTag(TAGS.alphabet);
130
+ if (alphabet === null) {
131
+ alphabet = detectHelmAlphabet(uh.stats.freq, candidateAlphabets, uh.defaultGapOriginal);
132
+ uh.column.setTag(TAGS.alphabet, alphabet);
133
+ }
126
134
  }
127
135
  }
128
136
 
@@ -539,6 +547,7 @@ export class SeqHandler implements ISeqHandler {
539
547
  // convert the peptides list to a set for faster lookup
540
548
  const peptidesSet = new Set(peptides);
541
549
  // get splitter for given separator and check if all monomers are in the lib
550
+ // eslint-disable-next-line @typescript-eslint/no-unused-vars
542
551
  const splitterFunc = getSplitterWithSeparator(this.separator!);
543
552
  // iterate over the columns, split them and check if all monomers are in the lib
544
553
  //TODO maybe add missing threshold so that if there are not too many missing monomers
@@ -596,12 +605,12 @@ export class SeqHandler implements ISeqHandler {
596
605
 
597
606
  const isNucleotide = srcSeq.startsWith('RNA');
598
607
  // items can be monomers or helms
599
- const helmItemsArray = this.splitter(srcSeq);
608
+ const helmItemsArray = splitterAsHelm(srcSeq);
600
609
  const tgtMonomersArray: string[] = [];
601
610
  for (let posIdx = 0; posIdx < helmItemsArray.length; ++posIdx) {
602
611
  let om: string = helmItemsArray.getOriginal(posIdx);
603
612
  if (isNucleotide)
604
- om = om.replace(HELM_WRAPPERS_REGEXP, '');
613
+ om = om.replace(HELM_WRAPPERS_REGEXP, '$1');
605
614
  if (om === GapOriginals[NOTATION.HELM])
606
615
  tgtMonomersArray.push(tgtGapOriginal);
607
616
  else if (this.toFasta(tgtNotation as NOTATION) && om.length > 1) {
@@ -9,9 +9,8 @@ import {RDModule, RDMol} from '@datagrok-libraries/chem-meta/src/rdkit-api';
9
9
  import {IMonomerLibHelper} from '@datagrok-libraries/bio/src/monomer-works/monomer-utils';
10
10
  import {getHelmHelper, IHelmHelper} from '@datagrok-libraries/bio/src/helm/helm-helper';
11
11
  import {MolfileWithMap} from '@datagrok-libraries/bio/src/monomer-works/types';
12
- import {getMolColName, hexToPercentRgb} from '@datagrok-libraries/bio/src/monomer-works/utils';
12
+ import {getMolColName} from '@datagrok-libraries/bio/src/monomer-works/utils';
13
13
  import {ChemTags} from '@datagrok-libraries/chem-meta/src/consts';
14
- import {getMolHighlight} from '@datagrok-libraries/bio/src/monomer-works/seq-to-molfile';
15
14
  import {IMonomerLibBase} from '@datagrok-libraries/bio/src/types/index';
16
15
 
17
16
  import {HelmToMolfileConverter} from '../helm-to-molfile/converter';
@@ -19,6 +18,8 @@ import {ISeqHandler} from '@datagrok-libraries/bio/src/utils/macromolecule/seq-h
19
18
  import {SeqHandler} from './seq-handler';
20
19
  import {Column} from 'datagrok-api/dg';
21
20
  import {NOTATION, TAGS} from '@datagrok-libraries/bio/src/utils/macromolecule';
21
+ import {errInfo} from '@datagrok-libraries/bio/src/utils/err-info';
22
+ import {_package} from '../../package';
22
23
 
23
24
  type SeqHelperWindowType = Window & { $seqHelperPromise?: Promise<SeqHelper> };
24
25
  declare const window: SeqHelperWindowType;
@@ -44,6 +45,37 @@ export class SeqHelper implements ISeqHelper {
44
45
  return new HelmToMolfileConverter(helmHelper, this.rdKitModule, monomerLib);
45
46
  }
46
47
 
48
+ helmToAtomicLevelSingle(
49
+ helm: string, converter: HelmToMolfileConverter, chiralityEngine?: boolean, beautifyMol: boolean = true) {
50
+ if (!helm)
51
+ return MolfileWithMap.createEmpty();
52
+ const molfileV3k = converter.convertToMolfileV3K([helm])[0];
53
+ if (!molfileV3k || !molfileV3k.molfile)
54
+ return MolfileWithMap.createEmpty();
55
+ let mol: RDMol | null = null;
56
+ try {
57
+ let v3k = molfileV3k.molfile;
58
+ if (beautifyMol) {
59
+ mol = this.rdKitModule.get_mol(v3k);
60
+ if (!mol)
61
+ return MolfileWithMap.createEmpty();
62
+ mol.set_new_coords();
63
+ mol.normalize_depiction(1);
64
+ mol.straighten_depiction(true);
65
+ v3k = mol.get_v3Kmolblock();
66
+ }
67
+ if (chiralityEngine)
68
+ v3k = converter.molV3KtoMolV3KOCL(v3k);
69
+ return new MolfileWithMap(v3k, molfileV3k.monomers);
70
+ } catch (err) {
71
+ const [errMsg, errStack] = errInfo(err);
72
+ _package.logger.error(errMsg, undefined, errStack);
73
+ return MolfileWithMap.createEmpty();
74
+ } finally {
75
+ mol?.delete();
76
+ }
77
+ }
78
+
47
79
  async helmToAtomicLevel(
48
80
  helmCol: DG.Column<string>, chiralityEngine?: boolean, highlight?: boolean, overrideMonomerLib?: IMonomerLibBase
49
81
  ): Promise<ToAtomicLevelRes> {
@@ -54,40 +86,44 @@ export class SeqHelper implements ISeqHelper {
54
86
 
55
87
  const converter = await this.getHelmToMolfileConverter(monomerLib);
56
88
 
57
- //#region From HelmToMolfileConverter.convertToRdKitBeautifiedMolfileColumn
58
-
59
- const molfilesV3K = converter.convertToMolfileV3K(helmCol.toList());
60
-
61
- const beautifiedMolList: (RDMol | null)[] = molfilesV3K.map((item) => {
62
- const molfile = item.molfile;
63
- if (molfile === '')
64
- return null;
65
- const mol = this.rdKitModule.get_mol(molfile);
66
- if (!mol)
67
- return null;
68
- mol.set_new_coords();
69
- mol.normalize_depiction(1);
70
- mol.straighten_depiction(true);
71
- return mol;
72
- });
73
-
74
- let molList: string[];
75
- if (chiralityEngine) {
76
- molList = converter.getMolV3000ViaOCL(beautifiedMolList, molColName).toList();
77
- // TODO: Cleanup mol objects
78
- } else {
79
- molList = beautifiedMolList.map((mol) => {
80
- if (mol === null)
81
- return '';
82
- const molBlock = mol.get_v3Kmolblock();
83
- mol!.delete();
84
- return molBlock;
85
- });
86
- }
89
+ // //#region From HelmToMolfileConverter.convertToRdKitBeautifiedMolfileColumn
90
+
91
+ // const molfilesV3K = converter.convertToMolfileV3K(helmCol.toList());
92
+
93
+ // const beautifiedMolList: (RDMol | null)[] = molfilesV3K.map((item) => {
94
+ // const molfile = item.molfile;
95
+ // if (molfile === '')
96
+ // return null;
97
+ // const mol = this.rdKitModule.get_mol(molfile);
98
+ // if (!mol)
99
+ // return null;
100
+ // mol.set_new_coords();
101
+ // mol.normalize_depiction(1);
102
+ // mol.straighten_depiction(true);
103
+ // return mol;
104
+ // });
105
+
106
+ // let molList: string[];
107
+ // if (chiralityEngine)// also creates progress indicator
108
+ // molList = converter.getMolV3000ViaOCL(beautifiedMolList, molColName).toList();
109
+ // // TODO: Cleanup mol objects
110
+ // else {
111
+ // molList = beautifiedMolList.map((mol) => {
112
+ // if (mol === null)
113
+ // return '';
114
+ // const molBlock = mol.get_v3Kmolblock();
115
+ // mol!.delete();
116
+ // return molBlock;
117
+ // });
118
+ // }
87
119
 
88
120
  //#endregion From HelmToMolfileConverter
121
+ const helmList = helmCol.toList();
122
+ const molList = new Array<string>(helmCol.length);
123
+ for (let i = 0; i < helmCol.length; i++)
124
+ molList[i] = (await this.helmToAtomicLevelSingle(helmList[i], converter, chiralityEngine)).molfile;
89
125
 
90
- const molHlList = molfilesV3K.map((item: MolfileWithMap) => getMolHighlight(item.monomers.values(), monomerLib));
126
+ //const molHlList = molfilesV3K.map((item: MolfileWithMap) => getMolHighlight(item.monomers.values(), monomerLib));
91
127
 
92
128
  const molCol = DG.Column.fromStrings(molColName, molList);
93
129
  molCol.semType = DG.SEMTYPE.MOLECULE;
@@ -53,7 +53,7 @@ export async function sequenceToMolfile(
53
53
  res.molCol.name = molColName;
54
54
  df.columns.add(res.molCol, true);
55
55
 
56
- buildMonomerHoverLink(macroMolecule, res.molCol, monomerLib, seqHelper, rdKitModule);
56
+ await buildMonomerHoverLink(macroMolecule, res.molCol, monomerLib, seqHelper, rdKitModule, nonlinear);
57
57
  res.molCol.setTag(ChemTags.SEQUENCE_SRC_HL_MONOMERS, String(highlight));
58
58
  await grok.data.detectSemanticTypes(df);
59
59
  }
@@ -26,7 +26,9 @@ export function getCompositionAnalysisWidget(
26
26
  const rowIdx = val.cell.rowIndex;
27
27
  const seqSS = sh.getSplitted(rowIdx);
28
28
  wu.count(0).take(seqSS.length).filter((posIdx) => !seqSS.isGap(posIdx)).forEach((posIdx) => {
29
- const cm = seqSS.getCanonical(posIdx);
29
+ let cm = seqSS.getCanonical(posIdx);
30
+ if (biotype === HelmTypes.NUCLEOTIDE && sh.isHelm() && cm[1] === '(' && cm[cm.length - 2] === ')')
31
+ cm = cm.substring(2, cm.length - 2);
30
32
  const count = counts[cm] || 0;
31
33
  counts[cm] = count + 1;
32
34
  });