@datagrok/bio 2.18.2 → 2.18.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/package.json CHANGED
@@ -5,7 +5,7 @@
5
5
  "name": "Leonid Stolbov",
6
6
  "email": "lstolbov@datagrok.ai"
7
7
  },
8
- "version": "2.18.2",
8
+ "version": "2.18.4",
9
9
  "description": "Bioinformatics support (import/export of sequences, conversion, visualization, analysis). [See more](https://github.com/datagrok-ai/public/blob/master/packages/Bio/README.md) for details.",
10
10
  "repository": {
11
11
  "type": "git",
@@ -44,7 +44,7 @@
44
44
  ],
45
45
  "dependencies": {
46
46
  "@biowasm/aioli": "^3.1.0",
47
- "@datagrok-libraries/bio": "^5.48.1",
47
+ "@datagrok-libraries/bio": "^5.49.1",
48
48
  "@datagrok-libraries/chem-meta": "^1.2.7",
49
49
  "@datagrok-libraries/math": "^1.2.4",
50
50
  "@datagrok-libraries/ml": "^6.7.6",
@@ -70,7 +70,7 @@ export class SequenceSearchBaseViewer extends DG.JsViewer {
70
70
  this.render();
71
71
  }
72
72
 
73
- /** For tests */ public computeRequested: boolean;
73
+ /** For tests */ public computeRequested: boolean = false;
74
74
  public renderPromise: Promise<void> = Promise.resolve();
75
75
 
76
76
  protected render(computeData = true): void {
package/src/package.ts CHANGED
@@ -979,7 +979,8 @@ export async function manageLibrariesApp(): Promise<DG.View> {
979
979
  export async function manageLibrariesAppTreeBrowser(treeNode: DG.TreeViewGroup, browseView: DG.BrowseView) {
980
980
  const libraries = (await (await MonomerLibManager.getInstance()).getFileManager()).getValidLibraryPaths();
981
981
  libraries.forEach((libName) => {
982
- const libNode = treeNode.item(libName);
982
+ const nodeName = libName.endsWith('.json') ? libName.substring(0, libName.length - 5) : libName;
983
+ const libNode = treeNode.item(nodeName);
983
984
  // eslint-disable-next-line rxjs/no-ignored-subscription, rxjs/no-async-subscribe
984
985
  libNode.onSelected.subscribe(async () => {
985
986
  const monomerManager = await MonomerManager.getNewInstance();
@@ -1,3 +1,4 @@
1
+ /* eslint-disable max-lines-per-function */
1
2
  import * as grok from 'datagrok-api/grok';
2
3
  import * as DG from 'datagrok-api/dg';
3
4
 
@@ -68,7 +69,7 @@ PEPTIDE1{N.T.[dE].[Thr_PO3H2]}$$$$
68
69
  PEPTIDE1{[Cys_SEt].T.[dK].[Thr_PO3H2]}$$$$
69
70
  PEPTIDE1{[Cys_SEt].T.*.*}$$$$`,
70
71
  units: NOTATION.HELM,
71
- alphabet: null,
72
+ alphabet: ALPHABET.UN,
72
73
 
73
74
  positionNames: {tag: null, start: '4', end: '7'}
74
75
  }
@@ -97,6 +97,8 @@ export class DuplicateMonomerManager {
97
97
  DuplicateMonomerManager._instance = new DuplicateMonomerManager();
98
98
  await DuplicateMonomerManager._instance.refresh();
99
99
  const libManager = await MonomerLibManager.getInstance();
100
+ // reason: subscription happens only once, and is needed throught the lifetime of the app
101
+ // eslint-disable-next-line rxjs/no-async-subscribe, rxjs/no-ignored-subscription
100
102
  libManager.getMonomerLib().onChanged.subscribe(async () => await DuplicateMonomerManager._instance.refresh());
101
103
  }
102
104
  DuplicateMonomerManager._instance.refresh();
@@ -1028,6 +1028,7 @@ function getCorrectedMolBlock(molBlock: string) {
1028
1028
  // 2. RGP field is present in the correct format
1029
1029
  // 3. R group labels are written as R# and not just R
1030
1030
  // 4. there is no ISO field in the molblock. if there is, it needs to be substituted with RGP field and thats it.
1031
+ // 5. make sure that R groups have no metadata in the atomblocks
1031
1032
 
1032
1033
  const lines = molBlock.split('\n');
1033
1034
 
@@ -1055,7 +1056,7 @@ function getCorrectedMolBlock(molBlock: string) {
1055
1056
  rgroupLineNumbers[atomI - molStartIdx] = rgroupNum;
1056
1057
  }
1057
1058
 
1058
- const rgroupLineNums = Object.values(rgroupLineNumbers);
1059
+ const rgroupLineNums = Object.keys(rgroupLineNumbers);
1059
1060
  // find and possibly add rgp field
1060
1061
 
1061
1062
  const rgpLineIdx = lines.findIndex((line) => line.startsWith('M') && line.includes('RGP'));
@@ -1066,6 +1067,23 @@ function getCorrectedMolBlock(molBlock: string) {
1066
1067
  const mEndIdx = lines.findIndex((line) => line.startsWith('M') && line.includes('END'));
1067
1068
  lines.splice(mEndIdx, 0, rgpLine);
1068
1069
  }
1070
+
1071
+ //make sure that R# lines do not have any metadata that can be interpreted as isotopes or anything else
1072
+ //for example, following line could be interpreted as isotope with mass 2 in some cases
1073
+ //" 3.9970 0.3462 0.0000 R# 0 0 0 0 0 1 0 0 0 0 2 0"
1074
+ const rGroupActualLines = rgroupLineNums.filter((rLine) => !!Number.parseInt(rLine)).map((atomLine) => Number.parseInt(atomLine) + molStartIdx);
1075
+ rGroupActualLines.forEach((lineIdx) => {
1076
+ const splitLine = lines[lineIdx].split(' ');
1077
+ const rIdx = splitLine.findIndex((s) => s === 'R#');
1078
+ if (rIdx === -1)
1079
+ return;
1080
+ for (let i = rIdx + 1; i < splitLine.length; i++) {
1081
+ if (!!splitLine[i] && splitLine[i].length == 1 && (Number.parseInt(splitLine[i]) ?? 0) > 0)
1082
+ splitLine[i] = '0';
1083
+ }
1084
+ lines[lineIdx] = splitLine.join(' ');
1085
+ });
1086
+
1069
1087
  return lines.join('\n');
1070
1088
  }
1071
1089
 
@@ -1087,12 +1105,24 @@ function monomerFromDfRow(dfRow: DG.Row): Monomer {
1087
1105
  if (typeof metaJSON[key] === 'object')
1088
1106
  metaJSON[key] = JSON.stringify(metaJSON[key]);
1089
1107
  }
1108
+ const smiles = dfRow.get(MONOMER_DF_COLUMN_NAMES.MONOMER);
1109
+ if (!smiles)
1110
+ throw new Error('Monomer SMILES is empty');
1111
+ let molfile = '';
1112
+
1113
+ try {
1114
+ molfile = grok.chem.convert(smiles, DG.chem.Notation.Smiles, DG.chem.Notation.MolBlock);
1115
+ molfile = getCorrectedMolBlock(molfile);
1116
+ } catch (e) {
1117
+ grok.shell.error(`Error converting SMILES to molfile, \n ${smiles}`);
1118
+ console.error(e);
1119
+ }
1090
1120
 
1091
1121
  return {
1092
1122
  symbol: dfRow.get(MONOMER_DF_COLUMN_NAMES.SYMBOL),
1093
1123
  name: dfRow.get(MONOMER_DF_COLUMN_NAMES.NAME),
1094
- molfile: '',
1095
- smiles: dfRow.get(MONOMER_DF_COLUMN_NAMES.MONOMER),
1124
+ molfile: molfile,
1125
+ smiles: smiles,
1096
1126
  polymerType: dfRow.get(MONOMER_DF_COLUMN_NAMES.POLYMER_TYPE),
1097
1127
  monomerType: dfRow.get(MONOMER_DF_COLUMN_NAMES.MONOMER_TYPE),
1098
1128
  naturalAnalog: dfRow.get(MONOMER_DF_COLUMN_NAMES.NATURAL_ANALOG),
@@ -1,3 +1,5 @@
1
+ /* eslint-disable max-lines */
2
+ /* eslint-disable @typescript-eslint/no-unused-vars */
1
3
  import * as DG from 'datagrok-api/dg';
2
4
 
3
5
  import wu from 'wu';
@@ -5,7 +7,7 @@ import wu from 'wu';
5
7
  /* eslint-disable max-len */
6
8
  import {ALIGNMENT, ALPHABET, candidateAlphabets, getSplitterWithSeparator, NOTATION, positionSeparator, splitterAsFasta, splitterAsHelm, TAGS} from '@datagrok-libraries/bio/src/utils/macromolecule/index';
7
9
  import {INotationProvider, ISeqSplitted, SeqColStats, SplitterFunc,} from '@datagrok-libraries/bio/src/utils/macromolecule/types';
8
- import {detectAlphabet, splitterAsFastaSimple, StringListSeqSplitted} from '@datagrok-libraries/bio/src/utils/macromolecule/utils';
10
+ import {detectAlphabet, detectHelmAlphabet, splitterAsFastaSimple, StringListSeqSplitted} from '@datagrok-libraries/bio/src/utils/macromolecule/utils';
9
11
  import {mmDistanceFunctions, MmDistanceFunctionsNames} from '@datagrok-libraries/ml/src/macromolecule-distance-functions';
10
12
  import {mmDistanceFunctionType} from '@datagrok-libraries/ml/src/macromolecule-distance-functions/types';
11
13
  import {getMonomerLibHelper, IMonomerLibHelper} from '@datagrok-libraries/bio/src/monomer-works/monomer-utils';
@@ -123,6 +125,12 @@ export class SeqHandler implements ISeqHandler {
123
125
  uh.column.setTag(TAGS.alphabetSize, alphabetSize.toString());
124
126
  uh.column.setTag(TAGS.alphabetIsMultichar, alphabetIsMultichar ? 'true' : 'false');
125
127
  }
128
+ } else if (units === NOTATION.HELM) {
129
+ let alphabet = uh.column.getTag(TAGS.alphabet);
130
+ if (alphabet === null) {
131
+ alphabet = detectHelmAlphabet(uh.stats.freq, candidateAlphabets, uh.defaultGapOriginal);
132
+ uh.column.setTag(TAGS.alphabet, alphabet);
133
+ }
126
134
  }
127
135
  }
128
136
 
@@ -539,6 +547,7 @@ export class SeqHandler implements ISeqHandler {
539
547
  // convert the peptides list to a set for faster lookup
540
548
  const peptidesSet = new Set(peptides);
541
549
  // get splitter for given separator and check if all monomers are in the lib
550
+ // eslint-disable-next-line @typescript-eslint/no-unused-vars
542
551
  const splitterFunc = getSplitterWithSeparator(this.separator!);
543
552
  // iterate over the columns, split them and check if all monomers are in the lib
544
553
  //TODO maybe add missing threshold so that if there are not too many missing monomers
@@ -87,7 +87,7 @@ export class SeqHelper implements ISeqHelper {
87
87
 
88
88
  //#endregion From HelmToMolfileConverter
89
89
 
90
- const molHlList = molfilesV3K.map((item: MolfileWithMap) => getMolHighlight(item.monomers.values(), monomerLib));
90
+ //const molHlList = molfilesV3K.map((item: MolfileWithMap) => getMolHighlight(item.monomers.values(), monomerLib));
91
91
 
92
92
  const molCol = DG.Column.fromStrings(molColName, molList);
93
93
  molCol.semType = DG.SEMTYPE.MOLECULE;
@@ -26,7 +26,9 @@ export function getCompositionAnalysisWidget(
26
26
  const rowIdx = val.cell.rowIndex;
27
27
  const seqSS = sh.getSplitted(rowIdx);
28
28
  wu.count(0).take(seqSS.length).filter((posIdx) => !seqSS.isGap(posIdx)).forEach((posIdx) => {
29
- const cm = seqSS.getCanonical(posIdx);
29
+ let cm = seqSS.getCanonical(posIdx);
30
+ if (biotype === HelmTypes.NUCLEOTIDE && sh.isHelm() && cm[1] === '(' && cm[cm.length - 2] === ')')
31
+ cm = cm.substring(2, cm.length - 2);
30
32
  const count = counts[cm] || 0;
31
33
  counts[cm] = count + 1;
32
34
  });