@datagrok/bio 2.20.4 → 2.21.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -85,12 +85,10 @@
85
85
  "symbol",
86
86
  "name",
87
87
  "molfile",
88
- "author",
89
88
  "id",
90
89
  "rgroups",
91
90
  "smiles",
92
91
  "polymerType",
93
- "monomerType",
94
- "createDate"
92
+ "monomerType"
95
93
  ]
96
94
  }
package/package.json CHANGED
@@ -5,7 +5,7 @@
5
5
  "name": "Leonid Stolbov",
6
6
  "email": "lstolbov@datagrok.ai"
7
7
  },
8
- "version": "2.20.4",
8
+ "version": "2.21.0",
9
9
  "description": "Bioinformatics support (import/export of sequences, conversion, visualization, analysis). [See more](https://github.com/datagrok-ai/public/blob/master/packages/Bio/README.md) for details.",
10
10
  "repository": {
11
11
  "type": "git",
@@ -65,7 +65,7 @@
65
65
  "wu": "^2.1.0"
66
66
  },
67
67
  "devDependencies": {
68
- "@datagrok-libraries/helm-web-editor": "^1.1.13",
68
+ "@datagrok-libraries/helm-web-editor": "^1.1.14",
69
69
  "@datagrok-libraries/js-draw-lite": "^0.0.10",
70
70
  "@datagrok/chem": "^1.13.0",
71
71
  "@datagrok/dendrogram": "^1.2.33",
package/src/package.ts CHANGED
@@ -543,7 +543,6 @@ export async function macromoleculePreprocessingFunction(
543
543
  }
544
544
 
545
545
  //name: Helm Fingerprints
546
- //tags: dim-red-preprocessing-function
547
546
  //meta.supportedSemTypes: Macromolecule
548
547
  //meta.supportedTypes: string
549
548
  //meta.supportedUnits: helm
@@ -581,7 +580,6 @@ export async function helmPreprocessingFunction(
581
580
  //input: object options {optional: true}
582
581
  //input: bool clusterEmbeddings = true { optional: true }
583
582
  //input: bool isDemo {optional: true}
584
- //output: viewer result
585
583
  //editor: Bio:SequenceSpaceEditor
586
584
  export async function sequenceSpaceTopMenu(table: DG.DataFrame, molecules: DG.Column,
587
585
  methodName: DimReductionMethods, similarityMetric: BitArrayMetrics | MmDistanceFunctionsNames,
@@ -11,6 +11,7 @@ export async function _testSequenceSpaceReturnsResult(
11
11
  ) {
12
12
  // await grok.data.detectSemanticTypes(df);
13
13
  const col: DG.Column = df.getCol(colName);
14
+ df.name = 'seqSpaceDf';
14
15
  const semType: string = await grok.functions.call('Bio:detectMacromolecule', {col: col});
15
16
  if (semType)
16
17
  col.semType = semType;
@@ -18,12 +19,14 @@ export async function _testSequenceSpaceReturnsResult(
18
19
  const preprocessingFunc = DG.Func.find({package: 'Bio', name: 'macromoleculePreprocessingFunction'})[0];
19
20
  if (!preprocessingFunc)
20
21
  throw new Error('Preprocessing function not found');
21
- const sp = await grok.functions.call('Bio:sequenceSpaceTopMenu', {
22
+ await grok.functions.call('Bio:sequenceSpaceTopMenu', {
22
23
  table: df, molecules: df.col(colName)!,
23
24
  methodName: algorithm, similarityMetric: MmDistanceFunctionsNames.LEVENSHTEIN,
24
25
  plotEmbeddings: true, preprocessingFunction: preprocessingFunc, options: {[BYPASS_LARGE_DATA_WARNING]: true}
25
26
  });
26
27
  // const sp = await sequenceSpaceTopMenu(df, df.col(colName)!, algorithm, MmDistanceFunctionsNames.LEVENSHTEIN, true,
27
28
  // preprocessingFunc, {[BYPASS_LARGE_DATA_WARNING]: true});
28
- expect(sp != null, true);
29
+ const tv = grok.shell.tableView(df.name);
30
+ const sp = Array.from(tv?.viewers ?? [])[1];
31
+ expect(sp != null);
29
32
  }
@@ -31,6 +31,7 @@ import {CellRendererWithMonomerLibBackBase} from './monomer-cell-renderer-base';
31
31
  import * as C from './constants';
32
32
 
33
33
  import {_package} from '../package';
34
+ import {SeqTemps} from '@datagrok-libraries/bio/src/utils/macromolecule/seq-handler';
34
35
 
35
36
  type TempType = { [tagName: string]: any };
36
37
 
@@ -221,7 +222,7 @@ export class MacromoleculeDifferenceCellRendererBack extends CellRendererWithMon
221
222
  w = getUpdatedWidth(grid, g, x, w, dpr);
222
223
  //TODO: can this be replaced/merged with splitSequence?
223
224
  const [s1, s2] = s.split('#');
224
- const splitter = getSplitter(units, separator);
225
+ const splitter = this.tableCol.temp[SeqTemps.notationProvider]?.separatorSplitter ?? getSplitter(units, separator);
225
226
  const s1SS = splitter(s1);
226
227
  const s2SS = splitter(s2);
227
228
  const subParts1 = wu.count(0).take(s1SS.length).map((posIdx) => s1SS.getCanonical(posIdx)).toArray();
@@ -109,7 +109,7 @@ export class HelmToMolfileConverter implements IHelmToMolfileConverter {
109
109
  public convertToMolfileV3KColumn(helmCol: DG.Column<string>): DG.Column<string> {
110
110
  const df = helmCol.dataFrame;
111
111
  const molfileList = this.convertToMolfileV3K(helmCol.toList()).map((mwm) => mwm.molfile);
112
- const molColName = getUnusedColName(df, `molfileV2K(${helmCol.name})`);
112
+ const molColName = getUnusedColName(df, `molfileV3K(${helmCol.name})`);
113
113
  const molfileColumn = DG.Column.fromList('string', molColName, molfileList);
114
114
  return molfileColumn;
115
115
  }
@@ -54,7 +54,7 @@ export class MonomerLibFileValidator {
54
54
  `Bio: Monomer Library File Validator file ${fileName}, monomer '${name}' violating JSON schema:`,
55
55
  monomer,
56
56
  '\nError reason: ',
57
- this.validateMonomerSchema.errors,
57
+ JSON.stringify(this.validateMonomerSchema.errors ?? {}),
58
58
  `\nThere may be other errors in ${fileName} since the validation is stopped after the first error.`,
59
59
  ' Please, verify that the monomer library file satisfies the JSON schema'
60
60
  );
@@ -54,7 +54,7 @@ export class SeqHandler implements ISeqHandler {
54
54
  const defaultGapOriginal = this.isFasta() ? GapOriginals[NOTATION.FASTA] :
55
55
  this.isSeparator() ? GapOriginals[NOTATION.SEPARATOR] :
56
56
  this.isHelm() ? GapOriginals[NOTATION.HELM] :
57
- this.isCustom() ? this.notationProvider.defaultGapOriginal :
57
+ this.isCustom() ? (this.notationProvider?.defaultGapOriginal ?? GapOriginals[NOTATION.SEPARATOR]) :
58
58
  undefined;
59
59
  if (defaultGapOriginal == undefined)
60
60
  throw new Error(`Unexpected defaultGapOriginal for notation '${this.notation}'`);
@@ -97,6 +97,72 @@ export class SeqHandler implements ISeqHandler {
97
97
  }
98
98
 
99
99
  this.columnVersion = this.column.version;
100
+ // refine separator only at this stage
101
+ if (this.isSeparator() && (!this.isCustom() || !this.notationProvider) && !col.temp['seqHandlerRefined']) {
102
+ this.refineSeparatorNotation();
103
+ col.temp['seqHandlerRefined'] = true;
104
+ }
105
+ }
106
+
107
+ private async refineSeparatorNotation() {
108
+ // from detectors.
109
+ const SEQ_SAMPLE_LIMIT = 100; const SEQ_SAMPLE_LENGTH_LIMIT = 100;
110
+ const categoriesSample = [...new Set((this.column.length < SEQ_SAMPLE_LIMIT ?
111
+ wu.count(0).take(Math.min(SEQ_SAMPLE_LIMIT, this.column.length)).map((rowI) => this.column.get(rowI)) :
112
+ this.column.categories.slice(0, SEQ_SAMPLE_LIMIT))
113
+ .map((seq) => !!seq ? seq.substring(0, SEQ_SAMPLE_LENGTH_LIMIT * 5) : '')
114
+ .filter((seq) => seq.length !== 0/* skip empty values for detector */),
115
+ )];
116
+
117
+ const getStats = (values: string[], minLength: number, splitter: (s: string) => string[]) => {
118
+ const freq:{[key: string]: number} = {};
119
+ let sameLength = true;
120
+ let firstLength = null;
121
+
122
+ for (const seq of values) {
123
+ const mSeq = !!seq ? splitter(seq) : [];
124
+
125
+ if (firstLength === null)
126
+ firstLength = mSeq.length;
127
+ else if (mSeq.length !== firstLength)
128
+ sameLength = false;
129
+
130
+ if (mSeq.length >= minLength) {
131
+ for (const m of mSeq) {
132
+ if (!(m in freq)) freq[m] = 0;
133
+ freq[m] += 1;
134
+ }
135
+ }
136
+ }
137
+ return {freq: freq, sameLength: sameLength};
138
+ };
139
+
140
+ const stats = getStats(categoriesSample, 3, (s) => s.split(this.separator!));
141
+ let invalidateRequired = false;
142
+
143
+ const refinerList = [
144
+ {package: 'SequenceTranslator', name: 'refineNotationProviderForHarmonizedSequence'},
145
+ ];
146
+
147
+ for (const refineFuncFind of refinerList) {
148
+ try {
149
+ const funcList = DG.Func.find(refineFuncFind);
150
+ if (funcList.length === 0) continue;
151
+
152
+ const funcFc = funcList[0].prepare({col: this.column, stats: stats, separator: this.separator});
153
+ const refineRes = (await funcFc.call()).getOutputParamValue();
154
+ invalidateRequired ||= refineRes;
155
+ } catch (err) {
156
+ console.error(err);
157
+ }
158
+ }
159
+
160
+ if (invalidateRequired) {
161
+ // Applying custom notation provider MUST invalidate SeqHandler
162
+ delete this.column.temp[SeqTemps.seqHandler];
163
+
164
+ this.column.fireValuesChanged();
165
+ }
100
166
  }
101
167
 
102
168
  /** From detectMacromolecule */
@@ -89,11 +89,16 @@ export class SeqHelper implements ISeqHelper {
89
89
  //#endregion From HelmToMolfileConverter
90
90
  const helmList = helmCol.toList();
91
91
  const molList = new Array<string>(helmCol.length);
92
+ // this function is paralelized and in threads, so will not block the UI. OFC, we prefer to use it.
93
+ // if not found, we will use the default one running in main thread...
92
94
  const beautifyMolsChemFunc = DG.Func.find({package: 'Chem', name: 'beautifyMols'})[0];
93
- // both options set to false, ugly molfiles
95
+ // similarly, OCL Function is also paralelized and in threads, so will not block the UI.
96
+ const OCLFunc = DG.Func.find({package: 'Chem', name: 'convertToV3KViaOCL'})[0];
97
+
98
+ // depending on the function found, we will use it or not. if not, use internal OCL and beautification
94
99
  for (let i = 0; i < helmCol.length; i++) {
95
100
  molList[i] = (this.helmToAtomicLevelSingle(helmList[i], converter,
96
- chiralityEngine, !beautifyMolsChemFunc)).molfile;
101
+ chiralityEngine && !OCLFunc, !beautifyMolsChemFunc)).molfile;
97
102
  }
98
103
  // need to beautify the molfiles
99
104
  if (beautifyMolsChemFunc) {
@@ -103,6 +108,15 @@ export class SeqHelper implements ISeqHelper {
103
108
  beautifiedMols[i] && (molList[i] = beautifiedMols[i]);
104
109
  }
105
110
  }
111
+ // handle OCL
112
+ if (chiralityEngine && OCLFunc) {
113
+ const oclMols = await OCLFunc.apply({mols: molList});
114
+ if (oclMols && Array.isArray(oclMols) && oclMols.length === helmCol.length) {
115
+ for (let i = 0; i < helmCol.length; i++)
116
+ oclMols[i] && (molList[i] = oclMols[i]);
117
+ } else
118
+ grok.shell.warning('OCL function returned an unexpected result');
119
+ }
106
120
  //const molHlList = molfilesV3K.map((item: MolfileWithMap) => getMolHighlight(item.monomers.values(), monomerLib));
107
121
 
108
122
  const molCol = DG.Column.fromStrings(molColName, molList);