@datagrok/bio 2.20.5 → 2.21.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/package.json CHANGED
@@ -5,7 +5,7 @@
5
5
  "name": "Leonid Stolbov",
6
6
  "email": "lstolbov@datagrok.ai"
7
7
  },
8
- "version": "2.20.5",
8
+ "version": "2.21.0",
9
9
  "description": "Bioinformatics support (import/export of sequences, conversion, visualization, analysis). [See more](https://github.com/datagrok-ai/public/blob/master/packages/Bio/README.md) for details.",
10
10
  "repository": {
11
11
  "type": "git",
@@ -31,6 +31,7 @@ import {CellRendererWithMonomerLibBackBase} from './monomer-cell-renderer-base';
31
31
  import * as C from './constants';
32
32
 
33
33
  import {_package} from '../package';
34
+ import {SeqTemps} from '@datagrok-libraries/bio/src/utils/macromolecule/seq-handler';
34
35
 
35
36
  type TempType = { [tagName: string]: any };
36
37
 
@@ -221,7 +222,7 @@ export class MacromoleculeDifferenceCellRendererBack extends CellRendererWithMon
221
222
  w = getUpdatedWidth(grid, g, x, w, dpr);
222
223
  //TODO: can this be replaced/merged with splitSequence?
223
224
  const [s1, s2] = s.split('#');
224
- const splitter = getSplitter(units, separator);
225
+ const splitter = this.tableCol.temp[SeqTemps.notationProvider]?.separatorSplitter ?? getSplitter(units, separator);
225
226
  const s1SS = splitter(s1);
226
227
  const s2SS = splitter(s2);
227
228
  const subParts1 = wu.count(0).take(s1SS.length).map((posIdx) => s1SS.getCanonical(posIdx)).toArray();
@@ -54,7 +54,7 @@ export class SeqHandler implements ISeqHandler {
54
54
  const defaultGapOriginal = this.isFasta() ? GapOriginals[NOTATION.FASTA] :
55
55
  this.isSeparator() ? GapOriginals[NOTATION.SEPARATOR] :
56
56
  this.isHelm() ? GapOriginals[NOTATION.HELM] :
57
- this.isCustom() ? this.notationProvider.defaultGapOriginal :
57
+ this.isCustom() ? (this.notationProvider?.defaultGapOriginal ?? GapOriginals[NOTATION.SEPARATOR]) :
58
58
  undefined;
59
59
  if (defaultGapOriginal == undefined)
60
60
  throw new Error(`Unexpected defaultGapOriginal for notation '${this.notation}'`);
@@ -97,6 +97,72 @@ export class SeqHandler implements ISeqHandler {
97
97
  }
98
98
 
99
99
  this.columnVersion = this.column.version;
100
+ // refine separator only at this stage
101
+ if (this.isSeparator() && (!this.isCustom() || !this.notationProvider) && !col.temp['seqHandlerRefined']) {
102
+ this.refineSeparatorNotation();
103
+ col.temp['seqHandlerRefined'] = true;
104
+ }
105
+ }
106
+
107
+ private async refineSeparatorNotation() {
108
+ // from detectors.
109
+ const SEQ_SAMPLE_LIMIT = 100; const SEQ_SAMPLE_LENGTH_LIMIT = 100;
110
+ const categoriesSample = [...new Set((this.column.length < SEQ_SAMPLE_LIMIT ?
111
+ wu.count(0).take(Math.min(SEQ_SAMPLE_LIMIT, this.column.length)).map((rowI) => this.column.get(rowI)) :
112
+ this.column.categories.slice(0, SEQ_SAMPLE_LIMIT))
113
+ .map((seq) => !!seq ? seq.substring(0, SEQ_SAMPLE_LENGTH_LIMIT * 5) : '')
114
+ .filter((seq) => seq.length !== 0/* skip empty values for detector */),
115
+ )];
116
+
117
+ const getStats = (values: string[], minLength: number, splitter: (s: string) => string[]) => {
118
+ const freq:{[key: string]: number} = {};
119
+ let sameLength = true;
120
+ let firstLength = null;
121
+
122
+ for (const seq of values) {
123
+ const mSeq = !!seq ? splitter(seq) : [];
124
+
125
+ if (firstLength === null)
126
+ firstLength = mSeq.length;
127
+ else if (mSeq.length !== firstLength)
128
+ sameLength = false;
129
+
130
+ if (mSeq.length >= minLength) {
131
+ for (const m of mSeq) {
132
+ if (!(m in freq)) freq[m] = 0;
133
+ freq[m] += 1;
134
+ }
135
+ }
136
+ }
137
+ return {freq: freq, sameLength: sameLength};
138
+ };
139
+
140
+ const stats = getStats(categoriesSample, 3, (s) => s.split(this.separator!));
141
+ let invalidateRequired = false;
142
+
143
+ const refinerList = [
144
+ {package: 'SequenceTranslator', name: 'refineNotationProviderForHarmonizedSequence'},
145
+ ];
146
+
147
+ for (const refineFuncFind of refinerList) {
148
+ try {
149
+ const funcList = DG.Func.find(refineFuncFind);
150
+ if (funcList.length === 0) continue;
151
+
152
+ const funcFc = funcList[0].prepare({col: this.column, stats: stats, separator: this.separator});
153
+ const refineRes = (await funcFc.call()).getOutputParamValue();
154
+ invalidateRequired ||= refineRes;
155
+ } catch (err) {
156
+ console.error(err);
157
+ }
158
+ }
159
+
160
+ if (invalidateRequired) {
161
+ // Applying custom notation provider MUST invalidate SeqHandler
162
+ delete this.column.temp[SeqTemps.seqHandler];
163
+
164
+ this.column.fireValuesChanged();
165
+ }
100
166
  }
101
167
 
102
168
  /** From detectMacromolecule */