@datagrok/bio 2.20.5 → 2.21.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +5 -0
- package/detectors.js +1 -33
- package/dist/package-test.js +1 -1
- package/dist/package-test.js.map +1 -1
- package/dist/package.js +1 -1
- package/dist/package.js.map +1 -1
- package/package.json +1 -1
- package/src/utils/cell-renderer.ts +2 -1
- package/src/utils/seq-helper/seq-handler.ts +67 -1
- package/test-console-output-1.log +344 -336
- package/test-record-1.mp4 +0 -0
package/package.json
CHANGED
|
@@ -5,7 +5,7 @@
|
|
|
5
5
|
"name": "Leonid Stolbov",
|
|
6
6
|
"email": "lstolbov@datagrok.ai"
|
|
7
7
|
},
|
|
8
|
-
"version": "2.
|
|
8
|
+
"version": "2.21.0",
|
|
9
9
|
"description": "Bioinformatics support (import/export of sequences, conversion, visualization, analysis). [See more](https://github.com/datagrok-ai/public/blob/master/packages/Bio/README.md) for details.",
|
|
10
10
|
"repository": {
|
|
11
11
|
"type": "git",
|
|
@@ -31,6 +31,7 @@ import {CellRendererWithMonomerLibBackBase} from './monomer-cell-renderer-base';
|
|
|
31
31
|
import * as C from './constants';
|
|
32
32
|
|
|
33
33
|
import {_package} from '../package';
|
|
34
|
+
import {SeqTemps} from '@datagrok-libraries/bio/src/utils/macromolecule/seq-handler';
|
|
34
35
|
|
|
35
36
|
type TempType = { [tagName: string]: any };
|
|
36
37
|
|
|
@@ -221,7 +222,7 @@ export class MacromoleculeDifferenceCellRendererBack extends CellRendererWithMon
|
|
|
221
222
|
w = getUpdatedWidth(grid, g, x, w, dpr);
|
|
222
223
|
//TODO: can this be replaced/merged with splitSequence?
|
|
223
224
|
const [s1, s2] = s.split('#');
|
|
224
|
-
const splitter = getSplitter(units, separator);
|
|
225
|
+
const splitter = this.tableCol.temp[SeqTemps.notationProvider]?.separatorSplitter ?? getSplitter(units, separator);
|
|
225
226
|
const s1SS = splitter(s1);
|
|
226
227
|
const s2SS = splitter(s2);
|
|
227
228
|
const subParts1 = wu.count(0).take(s1SS.length).map((posIdx) => s1SS.getCanonical(posIdx)).toArray();
|
|
@@ -54,7 +54,7 @@ export class SeqHandler implements ISeqHandler {
|
|
|
54
54
|
const defaultGapOriginal = this.isFasta() ? GapOriginals[NOTATION.FASTA] :
|
|
55
55
|
this.isSeparator() ? GapOriginals[NOTATION.SEPARATOR] :
|
|
56
56
|
this.isHelm() ? GapOriginals[NOTATION.HELM] :
|
|
57
|
-
this.isCustom() ? this.notationProvider
|
|
57
|
+
this.isCustom() ? (this.notationProvider?.defaultGapOriginal ?? GapOriginals[NOTATION.SEPARATOR]) :
|
|
58
58
|
undefined;
|
|
59
59
|
if (defaultGapOriginal == undefined)
|
|
60
60
|
throw new Error(`Unexpected defaultGapOriginal for notation '${this.notation}'`);
|
|
@@ -97,6 +97,72 @@ export class SeqHandler implements ISeqHandler {
|
|
|
97
97
|
}
|
|
98
98
|
|
|
99
99
|
this.columnVersion = this.column.version;
|
|
100
|
+
// refine separator only at this stage
|
|
101
|
+
if (this.isSeparator() && (!this.isCustom() || !this.notationProvider) && !col.temp['seqHandlerRefined']) {
|
|
102
|
+
this.refineSeparatorNotation();
|
|
103
|
+
col.temp['seqHandlerRefined'] = true;
|
|
104
|
+
}
|
|
105
|
+
}
|
|
106
|
+
|
|
107
|
+
private async refineSeparatorNotation() {
|
|
108
|
+
// from detectors.
|
|
109
|
+
const SEQ_SAMPLE_LIMIT = 100; const SEQ_SAMPLE_LENGTH_LIMIT = 100;
|
|
110
|
+
const categoriesSample = [...new Set((this.column.length < SEQ_SAMPLE_LIMIT ?
|
|
111
|
+
wu.count(0).take(Math.min(SEQ_SAMPLE_LIMIT, this.column.length)).map((rowI) => this.column.get(rowI)) :
|
|
112
|
+
this.column.categories.slice(0, SEQ_SAMPLE_LIMIT))
|
|
113
|
+
.map((seq) => !!seq ? seq.substring(0, SEQ_SAMPLE_LENGTH_LIMIT * 5) : '')
|
|
114
|
+
.filter((seq) => seq.length !== 0/* skip empty values for detector */),
|
|
115
|
+
)];
|
|
116
|
+
|
|
117
|
+
const getStats = (values: string[], minLength: number, splitter: (s: string) => string[]) => {
|
|
118
|
+
const freq:{[key: string]: number} = {};
|
|
119
|
+
let sameLength = true;
|
|
120
|
+
let firstLength = null;
|
|
121
|
+
|
|
122
|
+
for (const seq of values) {
|
|
123
|
+
const mSeq = !!seq ? splitter(seq) : [];
|
|
124
|
+
|
|
125
|
+
if (firstLength === null)
|
|
126
|
+
firstLength = mSeq.length;
|
|
127
|
+
else if (mSeq.length !== firstLength)
|
|
128
|
+
sameLength = false;
|
|
129
|
+
|
|
130
|
+
if (mSeq.length >= minLength) {
|
|
131
|
+
for (const m of mSeq) {
|
|
132
|
+
if (!(m in freq)) freq[m] = 0;
|
|
133
|
+
freq[m] += 1;
|
|
134
|
+
}
|
|
135
|
+
}
|
|
136
|
+
}
|
|
137
|
+
return {freq: freq, sameLength: sameLength};
|
|
138
|
+
};
|
|
139
|
+
|
|
140
|
+
const stats = getStats(categoriesSample, 3, (s) => s.split(this.separator!));
|
|
141
|
+
let invalidateRequired = false;
|
|
142
|
+
|
|
143
|
+
const refinerList = [
|
|
144
|
+
{package: 'SequenceTranslator', name: 'refineNotationProviderForHarmonizedSequence'},
|
|
145
|
+
];
|
|
146
|
+
|
|
147
|
+
for (const refineFuncFind of refinerList) {
|
|
148
|
+
try {
|
|
149
|
+
const funcList = DG.Func.find(refineFuncFind);
|
|
150
|
+
if (funcList.length === 0) continue;
|
|
151
|
+
|
|
152
|
+
const funcFc = funcList[0].prepare({col: this.column, stats: stats, separator: this.separator});
|
|
153
|
+
const refineRes = (await funcFc.call()).getOutputParamValue();
|
|
154
|
+
invalidateRequired ||= refineRes;
|
|
155
|
+
} catch (err) {
|
|
156
|
+
console.error(err);
|
|
157
|
+
}
|
|
158
|
+
}
|
|
159
|
+
|
|
160
|
+
if (invalidateRequired) {
|
|
161
|
+
// Applying custom notation provider MUST invalidate SeqHandler
|
|
162
|
+
delete this.column.temp[SeqTemps.seqHandler];
|
|
163
|
+
|
|
164
|
+
this.column.fireValuesChanged();
|
|
165
|
+
}
|
|
100
166
|
}
|
|
101
167
|
|
|
102
168
|
/** From detectMacromolecule */
|