@datagrok/bio 2.11.42 → 2.12.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +6 -0
- package/README.md +1 -1
- package/detectors.js +11 -11
- package/dist/36.js +1 -1
- package/dist/36.js.map +1 -1
- package/dist/413.js +1 -1
- package/dist/413.js.map +1 -1
- package/dist/590.js +1 -1
- package/dist/590.js.map +1 -1
- package/dist/709.js +1 -1
- package/dist/709.js.map +1 -1
- package/dist/895.js +1 -1
- package/dist/895.js.map +1 -1
- package/dist/package-test.js +2 -2
- package/dist/package-test.js.map +1 -1
- package/dist/package.js +2 -2
- package/dist/package.js.map +1 -1
- package/package.json +10 -10
- package/src/analysis/sequence-activity-cliffs.ts +9 -9
- package/src/analysis/sequence-diversity-viewer.ts +3 -3
- package/src/analysis/sequence-search-base-viewer.ts +2 -2
- package/src/analysis/sequence-similarity-viewer.ts +10 -10
- package/src/analysis/sequence-space.ts +26 -23
- package/src/calculations/monomerLevelMols.ts +13 -11
- package/src/package.ts +8 -8
- package/src/tests/WebLogo-layout-tests.ts +5 -2
- package/src/tests/WebLogo-positions-test.ts +5 -5
- package/src/tests/bio-tests.ts +13 -6
- package/src/tests/converters-test.ts +4 -4
- package/src/tests/detectors-benchmark-tests.ts +5 -5
- package/src/tests/detectors-tests.ts +13 -13
- package/src/tests/fasta-export-tests.ts +10 -4
- package/src/tests/mm-distance-tests.ts +10 -10
- package/src/tests/msa-tests.ts +8 -15
- package/src/tests/renderers-monomer-placer.ts +3 -3
- package/src/tests/renderers-test.ts +6 -8
- package/src/tests/splitters-test.ts +14 -13
- package/src/tests/to-atomic-level-tests.ts +2 -2
- package/src/tests/units-handler-get-region.ts +4 -4
- package/src/tests/units-handler-splitted-tests.ts +19 -17
- package/src/tests/units-handler-tests.ts +32 -32
- package/src/utils/cell-renderer.ts +40 -34
- package/src/utils/check-input-column.ts +5 -5
- package/src/utils/context-menu.ts +9 -6
- package/src/utils/convert.ts +9 -9
- package/src/utils/get-region-func-editor.ts +11 -11
- package/src/utils/get-region.ts +10 -12
- package/src/utils/macromolecule-column-widget.ts +4 -3
- package/src/utils/monomer-lib/library-file-manager/event-manager.ts +1 -1
- package/src/utils/multiple-sequence-alignment-ui.ts +6 -6
- package/src/utils/pepsea.ts +1 -0
- package/src/utils/poly-tool/transformation.ts +3 -3
- package/src/utils/save-as-fasta.ts +14 -15
- package/src/utils/sequence-to-mol.ts +4 -4
- package/src/viewers/web-logo-viewer.ts +46 -54
- package/src/widgets/bio-substructure-filter.ts +3 -3
- package/src/widgets/composition-analysis-widget.ts +8 -8
|
@@ -16,7 +16,10 @@ import {
|
|
|
16
16
|
} from '@datagrok-libraries/bio/src/utils/macromolecule';
|
|
17
17
|
import {SeqPalette} from '@datagrok-libraries/bio/src/seq-palettes';
|
|
18
18
|
import {UnknownSeqPalettes} from '@datagrok-libraries/bio/src/unknown';
|
|
19
|
-
import {
|
|
19
|
+
import {GapOriginals, SeqHandler} from '@datagrok-libraries/bio/src/utils/seq-handler';
|
|
20
|
+
import {ISeqSplitted} from '@datagrok-libraries/bio/src/utils/macromolecule/types';
|
|
21
|
+
import {getSplitter} from '@datagrok-libraries/bio/src/utils/macromolecule/utils';
|
|
22
|
+
import {errInfo} from '@datagrok-libraries/bio/src/utils/err-info';
|
|
20
23
|
|
|
21
24
|
import {
|
|
22
25
|
Temps as mmcrTemps, Tags as mmcrTags,
|
|
@@ -25,10 +28,6 @@ import {
|
|
|
25
28
|
import * as C from './constants';
|
|
26
29
|
|
|
27
30
|
import {_package, getBioLib} from '../package';
|
|
28
|
-
import {ISeqSplitted} from '@datagrok-libraries/bio/src/utils/macromolecule/types';
|
|
29
|
-
import {getSplitter} from '@datagrok-libraries/bio/src/utils/macromolecule/utils';
|
|
30
|
-
import {errInfo} from '@datagrok-libraries/bio/src/utils/err-info';
|
|
31
|
-
|
|
32
31
|
|
|
33
32
|
type TempType = { [tagName: string]: any };
|
|
34
33
|
|
|
@@ -39,7 +38,7 @@ function getUpdatedWidth(grid: DG.Grid | null, g: CanvasRenderingContext2D, x: n
|
|
|
39
38
|
return !!grid ? Math.max(Math.min(grid.canvas.width / dpr - x, w)) : Math.max(g.canvas.width / dpr - x, 0);
|
|
40
39
|
}
|
|
41
40
|
|
|
42
|
-
export function processSequence(subParts:
|
|
41
|
+
export function processSequence(subParts: string[]): [string[], boolean] {
|
|
43
42
|
const simplified = !wu.enumerate(subParts).some(([amino, index]) =>
|
|
44
43
|
amino.length > 1 &&
|
|
45
44
|
index != 0 &&
|
|
@@ -99,6 +98,7 @@ export class MacromoleculeSequenceCellRenderer extends DG.GridCellRenderer {
|
|
|
99
98
|
const monomerSymbol: string = seqMonList[left];
|
|
100
99
|
const tooltipElements: HTMLElement[] = [ui.div(monomerSymbol)];
|
|
101
100
|
if (seqColTemp._monomerStructureMap[monomerSymbol]) {
|
|
101
|
+
//
|
|
102
102
|
tooltipElements.push(seqColTemp._monomerStructureMap[monomerSymbol]);
|
|
103
103
|
} else {
|
|
104
104
|
const monomer = seqColTemp.getMonomer(monomerSymbol);
|
|
@@ -111,6 +111,7 @@ export class MacromoleculeSequenceCellRenderer extends DG.GridCellRenderer {
|
|
|
111
111
|
}
|
|
112
112
|
ui.tooltip.show(ui.divV(tooltipElements), e.x + 16, e.y + 16);
|
|
113
113
|
} else {
|
|
114
|
+
//
|
|
114
115
|
ui.tooltip.hide();
|
|
115
116
|
}
|
|
116
117
|
}
|
|
@@ -163,10 +164,10 @@ export class MacromoleculeSequenceCellRenderer extends DG.GridCellRenderer {
|
|
|
163
164
|
if (!seqColTemp) {
|
|
164
165
|
seqColTemp = new MonomerPlacer(grid, tableCol,
|
|
165
166
|
() => {
|
|
166
|
-
const
|
|
167
|
+
const sh = SeqHandler.forColumn(tableCol);
|
|
167
168
|
return {
|
|
168
|
-
|
|
169
|
-
monomerCharWidth: 7, separatorWidth: !
|
|
169
|
+
seqHandler: sh,
|
|
170
|
+
monomerCharWidth: 7, separatorWidth: !sh.isMsa() ? gapLength : msaGapLength,
|
|
170
171
|
monomerToShort: monomerToShortFunction, monomerLengthLimit: maxLengthOfMonomer,
|
|
171
172
|
monomerLib: getBioLib()
|
|
172
173
|
};
|
|
@@ -193,6 +194,7 @@ export class MacromoleculeSequenceCellRenderer extends DG.GridCellRenderer {
|
|
|
193
194
|
const dpr = window.devicePixelRatio;
|
|
194
195
|
const grid = gridCell.gridRow !== -1 ? gridCell.grid : null;
|
|
195
196
|
const value: any = gridCell.cell.value;
|
|
197
|
+
const rowIdx = gridCell.cell.rowIndex;
|
|
196
198
|
const paletteType = tableCol.getTag(bioTAGS.alphabet);
|
|
197
199
|
const minDistanceRenderer = 50;
|
|
198
200
|
w = getUpdatedWidth(grid, g, x, w, dpr);
|
|
@@ -210,18 +212,22 @@ export class MacromoleculeSequenceCellRenderer extends DG.GridCellRenderer {
|
|
|
210
212
|
|
|
211
213
|
const separator = tableCol.getTag(bioTAGS.separator) ?? '';
|
|
212
214
|
const splitLimit = w / 5;
|
|
213
|
-
const
|
|
214
|
-
const splitterFunc: SplitterFunc = uh.getSplitter(splitLimit);
|
|
215
|
+
const sh = SeqHandler.forColumn(tableCol);
|
|
215
216
|
|
|
216
217
|
const tempReferenceSequence: string | null = tableColTemp[tempTAGS.referenceSequence];
|
|
217
218
|
const tempCurrentWord: string | null = tableColTemp[tempTAGS.currentWord];
|
|
218
219
|
if (tempCurrentWord && tableCol?.dataFrame?.currentRowIdx === -1)
|
|
219
220
|
tableColTemp[tempTAGS.currentWord] = null;
|
|
220
|
-
const referenceSequence: ISeqSplitted = splitterFunc(
|
|
221
|
-
((tempReferenceSequence != null) && (tempReferenceSequence != '')) ?
|
|
222
|
-
tempReferenceSequence : tempCurrentWord ?? '');
|
|
223
221
|
|
|
224
|
-
const
|
|
222
|
+
const referenceSequence: string[] = (() => {
|
|
223
|
+
// @ts-ignore
|
|
224
|
+
const splitterFunc: SplitterFunc = sh.getSplitter(splitLimit);
|
|
225
|
+
return wu(splitterFunc(
|
|
226
|
+
((tempReferenceSequence != null) && (tempReferenceSequence != '')) ?
|
|
227
|
+
tempReferenceSequence : tempCurrentWord ?? '').originals).toArray();
|
|
228
|
+
})();
|
|
229
|
+
|
|
230
|
+
const subParts: ISeqSplitted = sh.getSplitted(rowIdx);
|
|
225
231
|
/* let x1 = x; */
|
|
226
232
|
let color = undefinedColor;
|
|
227
233
|
let drawStyle = DrawStyle.classic;
|
|
@@ -229,14 +235,15 @@ export class MacromoleculeSequenceCellRenderer extends DG.GridCellRenderer {
|
|
|
229
235
|
if (aligned && aligned.includes('MSA') && units == NOTATION.SEPARATOR)
|
|
230
236
|
drawStyle = DrawStyle.MSA;
|
|
231
237
|
|
|
232
|
-
for (
|
|
238
|
+
for (let posIdx: number = 0; posIdx < subParts.length; ++posIdx) {
|
|
239
|
+
const amino: string = subParts.getOriginal(posIdx);
|
|
233
240
|
color = palette.get(amino);
|
|
234
241
|
g.fillStyle = undefinedColor;
|
|
235
|
-
const last =
|
|
242
|
+
const last = posIdx === subParts.length - 1;
|
|
236
243
|
/*x1 = */
|
|
237
244
|
printLeftOrCentered(x + this.padding, y, w, h,
|
|
238
245
|
g, amino, color, 0, true, 1.0, separator, last, drawStyle,
|
|
239
|
-
maxLengthWordsSum,
|
|
246
|
+
maxLengthWordsSum, posIdx, gridCell, referenceSequence, maxLengthOfMonomer, seqColTemp._monomerLengthMap);
|
|
240
247
|
if (minDistanceRenderer > w) break;
|
|
241
248
|
}
|
|
242
249
|
} catch (err: any) {
|
|
@@ -285,8 +292,8 @@ export class MacromoleculeDifferenceCellRenderer extends DG.GridCellRenderer {
|
|
|
285
292
|
//TODO: can this be replaced/merged with splitSequence?
|
|
286
293
|
const [s1, s2] = s.split('#');
|
|
287
294
|
const splitter = getSplitter(units, separator);
|
|
288
|
-
const subParts1 = splitter(s1);
|
|
289
|
-
const subParts2 = splitter(s2);
|
|
295
|
+
const subParts1 = wu(splitter(s1).canonicals).toArray();
|
|
296
|
+
const subParts2 = wu(splitter(s2).canonicals).toArray();
|
|
290
297
|
drawMoleculeDifferenceOnCanvas(g, x, y, w, h, subParts1, subParts2, units);
|
|
291
298
|
}
|
|
292
299
|
}
|
|
@@ -297,14 +304,14 @@ export function drawMoleculeDifferenceOnCanvas(
|
|
|
297
304
|
y: number,
|
|
298
305
|
w: number,
|
|
299
306
|
h: number,
|
|
300
|
-
subParts1:
|
|
301
|
-
subParts2:
|
|
307
|
+
subParts1: string[],
|
|
308
|
+
subParts2: string[],
|
|
302
309
|
units: string,
|
|
303
310
|
fullStringLength?: boolean,
|
|
304
311
|
molDifferences?: { [key: number]: HTMLCanvasElement },
|
|
305
312
|
): void {
|
|
306
313
|
if (subParts1.length !== subParts2.length) {
|
|
307
|
-
const sequences: IComparedSequences = fillShorterSequence(
|
|
314
|
+
const sequences: IComparedSequences = fillShorterSequence(subParts1, subParts2);
|
|
308
315
|
subParts1 = sequences.subParts1;
|
|
309
316
|
subParts2 = sequences.subParts2;
|
|
310
317
|
}
|
|
@@ -343,7 +350,10 @@ export function drawMoleculeDifferenceOnCanvas(
|
|
|
343
350
|
updatedX = Math.max(subX1, subX0);
|
|
344
351
|
if (molDifferences)
|
|
345
352
|
molDifferences[i] = createDifferenceCanvas(amino1, amino2, color1, color2, updatedY, vShift, h);
|
|
346
|
-
} else {
|
|
353
|
+
} else {
|
|
354
|
+
//
|
|
355
|
+
updatedX = printLeftOrCentered(updatedX, updatedY, w, h, g, amino1, color1, 0, true, 0.5);
|
|
356
|
+
}
|
|
347
357
|
updatedX += 4;
|
|
348
358
|
}
|
|
349
359
|
g.restore();
|
|
@@ -354,14 +364,9 @@ interface IComparedSequences {
|
|
|
354
364
|
subParts2: string[];
|
|
355
365
|
}
|
|
356
366
|
|
|
357
|
-
function createDifferenceCanvas(
|
|
358
|
-
|
|
359
|
-
|
|
360
|
-
color1: string,
|
|
361
|
-
color2: string,
|
|
362
|
-
y: number,
|
|
363
|
-
shift: number,
|
|
364
|
-
h: number): HTMLCanvasElement {
|
|
367
|
+
function createDifferenceCanvas(amino1: string, amino2: string, color1: string, color2: string,
|
|
368
|
+
y: number, shift: number, h: number
|
|
369
|
+
): HTMLCanvasElement {
|
|
365
370
|
const canvas = document.createElement('canvas');
|
|
366
371
|
const context = canvas.getContext('2d')!;
|
|
367
372
|
context.font = '12px monospace';
|
|
@@ -394,7 +399,8 @@ function fillShorterSequence(subParts1: string[], subParts2: string[]): ICompare
|
|
|
394
399
|
numIdenticalEnd++;
|
|
395
400
|
}
|
|
396
401
|
|
|
397
|
-
const emptyMonomersArray = new Array<string>(Math.abs(subParts1.length - subParts2.length))
|
|
402
|
+
const emptyMonomersArray = new Array<string>(Math.abs(subParts1.length - subParts2.length))
|
|
403
|
+
.fill(GapOriginals[NOTATION.FASTA]);
|
|
398
404
|
|
|
399
405
|
function concatWithEmptyVals(subparts: string[]): string[] {
|
|
400
406
|
return numIdenticalStart > numIdenticalEnd ?
|
|
@@ -402,6 +408,6 @@ function fillShorterSequence(subParts1: string[], subParts2: string[]): ICompare
|
|
|
402
408
|
}
|
|
403
409
|
|
|
404
410
|
subParts1.length > subParts2.length ?
|
|
405
|
-
subParts2 = concatWithEmptyVals(subParts2) : subParts1 = concatWithEmptyVals(subParts1);
|
|
411
|
+
subParts2 = concatWithEmptyVals(wu(subParts2).toArray()) : subParts1 = concatWithEmptyVals(wu(subParts1).toArray());
|
|
406
412
|
return {subParts1: subParts1, subParts2: subParts2};
|
|
407
413
|
}
|
|
@@ -2,7 +2,7 @@ import * as grok from 'datagrok-api/grok';
|
|
|
2
2
|
import * as DG from 'datagrok-api/dg';
|
|
3
3
|
import * as ui from 'datagrok-api/ui';
|
|
4
4
|
|
|
5
|
-
import {
|
|
5
|
+
import {SeqHandler} from '@datagrok-libraries/bio/src/utils/seq-handler';
|
|
6
6
|
|
|
7
7
|
/**
|
|
8
8
|
* Checks if the column is suitable for the analysis.
|
|
@@ -35,12 +35,12 @@ export function checkInputColumn(
|
|
|
35
35
|
let res: boolean = true;
|
|
36
36
|
let msg: string = '';
|
|
37
37
|
|
|
38
|
-
const
|
|
38
|
+
const sh = SeqHandler.forColumn(col);
|
|
39
39
|
if (col.semType !== DG.SEMTYPE.MACROMOLECULE) {
|
|
40
40
|
grok.shell.warning(name + ' analysis is allowed for Macromolecules semantic type');
|
|
41
41
|
res = false;
|
|
42
42
|
} else {
|
|
43
|
-
const notation: string =
|
|
43
|
+
const notation: string = sh.notation;
|
|
44
44
|
if (allowedNotations.length > 0 &&
|
|
45
45
|
!allowedNotations.some((n) => notation.toUpperCase() == (n.toUpperCase()))
|
|
46
46
|
) {
|
|
@@ -48,9 +48,9 @@ export function checkInputColumn(
|
|
|
48
48
|
(`notation${allowedNotations.length > 1 ? 's' : ''} ${allowedNotations.map((n) => `"${n}"`).join(', ')} `);
|
|
49
49
|
msg = `${name} + ' analysis is allowed for Macromolecules with notation ${notationAdd}.`;
|
|
50
50
|
res = false;
|
|
51
|
-
} else if (!
|
|
51
|
+
} else if (!sh.isHelm()) {
|
|
52
52
|
// alphabet is not specified for 'helm' notation
|
|
53
|
-
const alphabet: string =
|
|
53
|
+
const alphabet: string = sh.alphabet;
|
|
54
54
|
if (
|
|
55
55
|
allowedAlphabets.length > 0 &&
|
|
56
56
|
!allowedAlphabets.some((a) => alphabet.toUpperCase() == (a.toUpperCase()))
|
|
@@ -2,22 +2,25 @@ import * as grok from 'datagrok-api/grok';
|
|
|
2
2
|
import * as DG from 'datagrok-api/dg';
|
|
3
3
|
import * as ui from 'datagrok-api/ui';
|
|
4
4
|
|
|
5
|
-
import {
|
|
5
|
+
import {SeqHandler} from '@datagrok-libraries/bio/src/utils/seq-handler';
|
|
6
6
|
import {NOTATION} from '@datagrok-libraries/bio/src/utils/macromolecule';
|
|
7
7
|
|
|
8
8
|
import {_package} from '../package';
|
|
9
9
|
|
|
10
10
|
|
|
11
11
|
export function addCopyMenuUI(cell: DG.Cell, menu: DG.Menu): void {
|
|
12
|
-
const
|
|
13
|
-
const tgtNotationList: string[] = Object.values(NOTATION).filter((v) => v !==
|
|
12
|
+
const sh = SeqHandler.forColumn(cell.column);
|
|
13
|
+
const tgtNotationList: string[] = Object.values(NOTATION).filter((v) => v !== sh.units);
|
|
14
14
|
|
|
15
15
|
menu.group('Copy')
|
|
16
16
|
.items(tgtNotationList, (tgtNotation) => {
|
|
17
|
-
const
|
|
17
|
+
const srcCol = cell.column;
|
|
18
|
+
const srcRowIdx = cell.rowIndex;
|
|
19
|
+
const srcSh = SeqHandler.forColumn(srcCol);
|
|
18
20
|
const separator = tgtNotation === NOTATION.SEPARATOR ? _package.properties.DefaultSeparator : undefined;
|
|
19
|
-
const
|
|
20
|
-
const
|
|
21
|
+
const joiner = srcSh.getJoiner({notation: tgtNotation as NOTATION, separator});
|
|
22
|
+
const srcSS = srcSh.getSplitted(srcRowIdx);
|
|
23
|
+
const tgtSeq = joiner(srcSS);
|
|
21
24
|
|
|
22
25
|
if (!navigator.clipboard) {
|
|
23
26
|
grok.shell.warning('The clipboard functionality requires a secure origin — either HTTPS or localhost');
|
package/src/utils/convert.ts
CHANGED
|
@@ -4,9 +4,9 @@ import * as grok from 'datagrok-api/grok';
|
|
|
4
4
|
|
|
5
5
|
import $ from 'cash-dom';
|
|
6
6
|
import {Subscription} from 'rxjs';
|
|
7
|
-
|
|
8
|
-
import {
|
|
9
|
-
import {
|
|
7
|
+
|
|
8
|
+
import {NOTATION} from '@datagrok-libraries/bio/src/utils/macromolecule';
|
|
9
|
+
import {SeqHandler} from '@datagrok-libraries/bio/src/utils/seq-handler';
|
|
10
10
|
|
|
11
11
|
|
|
12
12
|
let convertDialog: DG.Dialog | null = null;
|
|
@@ -21,8 +21,8 @@ export function convert(col?: DG.Column): void {
|
|
|
21
21
|
let tgtCol = col ?? grok.shell.t.columns.bySemType('Macromolecule')!;
|
|
22
22
|
if (!tgtCol)
|
|
23
23
|
throw new Error('No column with Macromolecule semantic type found');
|
|
24
|
-
let
|
|
25
|
-
let currentNotation: NOTATION =
|
|
24
|
+
let converterSh = SeqHandler.forColumn(tgtCol);
|
|
25
|
+
let currentNotation: NOTATION = converterSh.notation;
|
|
26
26
|
const dialogHeader = ui.divText(
|
|
27
27
|
'Current notation: ' + currentNotation,
|
|
28
28
|
{
|
|
@@ -46,8 +46,8 @@ export function convert(col?: DG.Column): void {
|
|
|
46
46
|
}
|
|
47
47
|
|
|
48
48
|
tgtCol = newCol;
|
|
49
|
-
|
|
50
|
-
currentNotation =
|
|
49
|
+
converterSh = SeqHandler.forColumn(tgtCol);
|
|
50
|
+
currentNotation = converterSh.notation;
|
|
51
51
|
if (currentNotation === NOTATION.HELM)
|
|
52
52
|
separatorInput.value = '/'; // helm monomers can have - in the name like D-aThr;
|
|
53
53
|
dialogHeader.textContent = 'Current notation: ' + currentNotation;
|
|
@@ -116,8 +116,8 @@ export function convert(col?: DG.Column): void {
|
|
|
116
116
|
* @param {string | null} separator Separator for SEPARATOR notation
|
|
117
117
|
*/
|
|
118
118
|
export async function convertDo(srcCol: DG.Column, targetNotation: NOTATION, separator?: string): Promise<DG.Column> {
|
|
119
|
-
const
|
|
120
|
-
const newColumn =
|
|
119
|
+
const converterSh = SeqHandler.forColumn(srcCol);
|
|
120
|
+
const newColumn = converterSh.convert(targetNotation, separator);
|
|
121
121
|
srcCol.dataFrame.columns.add(newColumn);
|
|
122
122
|
|
|
123
123
|
// Call detector directly to escape some error on detectSemanticTypes
|
|
@@ -3,6 +3,9 @@ import * as ui from 'datagrok-api/ui';
|
|
|
3
3
|
import * as DG from 'datagrok-api/dg';
|
|
4
4
|
|
|
5
5
|
import {TAGS as bioTAGS} from '@datagrok-libraries/bio/src/utils/macromolecule';
|
|
6
|
+
import {SeqHandler} from '@datagrok-libraries/bio/src/utils/seq-handler';
|
|
7
|
+
|
|
8
|
+
import {_package} from '../package';
|
|
6
9
|
|
|
7
10
|
export interface GetRegionParams {
|
|
8
11
|
table: DG.DataFrame,
|
|
@@ -12,9 +15,6 @@ export interface GetRegionParams {
|
|
|
12
15
|
/** Name for the column with sequence of the region */ name: string | null,
|
|
13
16
|
}
|
|
14
17
|
|
|
15
|
-
import {UnitsHandler} from '@datagrok-libraries/bio/src/utils/units-handler';
|
|
16
|
-
import {_package} from '../package';
|
|
17
|
-
|
|
18
18
|
export interface SeqRegion {
|
|
19
19
|
name: string,
|
|
20
20
|
description: string,
|
|
@@ -70,7 +70,7 @@ export class GetRegionFuncEditor {
|
|
|
70
70
|
|
|
71
71
|
private sequenceInputChanged(): void {
|
|
72
72
|
const seqCol = this.inputs.sequence.value;
|
|
73
|
-
const
|
|
73
|
+
const sh = seqCol ? SeqHandler.forColumn(seqCol) : null;
|
|
74
74
|
this.updateRegionItems();
|
|
75
75
|
this.updateStartEndInputItems();
|
|
76
76
|
this.updateRegion(true);
|
|
@@ -89,9 +89,9 @@ export class GetRegionFuncEditor {
|
|
|
89
89
|
this.inputs.start.value = reg?.start;
|
|
90
90
|
this.inputs.end.value = reg?.end;
|
|
91
91
|
} else {
|
|
92
|
-
const
|
|
93
|
-
this.inputs.start.value =
|
|
94
|
-
this.inputs.end.value =
|
|
92
|
+
const sh = SeqHandler.forColumn(this.inputs.sequence.value!);
|
|
93
|
+
this.inputs.start.value = sh.posList[0];
|
|
94
|
+
this.inputs.end.value = sh.posList[sh.posList.length - 1];
|
|
95
95
|
}
|
|
96
96
|
} finally {
|
|
97
97
|
this.fixRegion = false;
|
|
@@ -122,13 +122,13 @@ export class GetRegionFuncEditor {
|
|
|
122
122
|
|
|
123
123
|
private updateStartEndInputItems(): void {
|
|
124
124
|
const seqCol = this.inputs.sequence.value;
|
|
125
|
-
const
|
|
125
|
+
const sh = seqCol ? SeqHandler.forColumn(seqCol) : null;
|
|
126
126
|
|
|
127
127
|
const startSE = (this.inputs.start.input as HTMLSelectElement);
|
|
128
128
|
const endSE = (this.inputs.end.input as HTMLSelectElement);
|
|
129
129
|
for (let i = startSE.options.length - 1; i >= 0; --i) startSE.options.remove(i);
|
|
130
130
|
for (let i = endSE.options.length - 1; i >= 0; --i) endSE.options.remove(i);
|
|
131
|
-
for (const pos of
|
|
131
|
+
for (const pos of sh?.posList ?? []) {
|
|
132
132
|
const startPosOE = document.createElement('option');
|
|
133
133
|
const endPosOE = document.createElement('option');
|
|
134
134
|
startPosOE.text = endPosOE.text = pos;
|
|
@@ -136,8 +136,8 @@ export class GetRegionFuncEditor {
|
|
|
136
136
|
startSE.options.add(startPosOE);
|
|
137
137
|
endSE.options.add(endPosOE);
|
|
138
138
|
}
|
|
139
|
-
startSE.value =
|
|
140
|
-
endSE.value =
|
|
139
|
+
startSE.value = sh?.posList[0] ?? '';
|
|
140
|
+
endSE.value = sh?.posList[sh?.posList.length - 1] ?? '';
|
|
141
141
|
}
|
|
142
142
|
|
|
143
143
|
private updateRegionItems(): void {
|
package/src/utils/get-region.ts
CHANGED
|
@@ -2,18 +2,16 @@ import * as grok from 'datagrok-api/grok';
|
|
|
2
2
|
import * as ui from 'datagrok-api/ui';
|
|
3
3
|
import * as DG from 'datagrok-api/dg';
|
|
4
4
|
|
|
5
|
-
import {
|
|
6
|
-
import {UnitsHandler} from '@datagrok-libraries/bio/src/utils/units-handler';
|
|
7
|
-
import {getRegion} from '../package';
|
|
5
|
+
import {SeqHandler} from '@datagrok-libraries/bio/src/utils/seq-handler';
|
|
8
6
|
import {TaskBarProgressIndicator} from 'datagrok-api/dg';
|
|
9
7
|
|
|
10
8
|
export function getRegionUI(col: DG.Column<string>): void {
|
|
11
|
-
const
|
|
9
|
+
const sh = SeqHandler.forColumn(col);
|
|
12
10
|
|
|
13
11
|
const nameInput = ui.stringInput('Name', '');
|
|
14
|
-
const startPositionInput = ui.choiceInput('Start Position',
|
|
12
|
+
const startPositionInput = ui.choiceInput('Start Position', sh.posList[0], sh.posList,
|
|
15
13
|
() => { /* TODO: update name placeholder with getDefaultName() */ });
|
|
16
|
-
const endPositionInput = ui.choiceInput('End Position',
|
|
14
|
+
const endPositionInput = ui.choiceInput('End Position', sh.posList[sh.posList.length], sh.posList,
|
|
17
15
|
() => { /* TODO: update name placeholder with getDefaultName() */ });
|
|
18
16
|
|
|
19
17
|
const getDefaultName = (): string => {
|
|
@@ -41,25 +39,25 @@ export function getRegionUI(col: DG.Column<string>): void {
|
|
|
41
39
|
export function getRegionDo(
|
|
42
40
|
col: DG.Column<string>, startPosName: string | null, endPosName: string | null, name: string | null
|
|
43
41
|
): DG.Column<string> {
|
|
44
|
-
const
|
|
42
|
+
const sh = SeqHandler.forColumn(col);
|
|
45
43
|
|
|
46
44
|
let startPosIdx: number | null = null;
|
|
47
45
|
let endPosIdx: number | null = null;
|
|
48
46
|
|
|
49
|
-
for (let posJ: number = 0; posJ <
|
|
50
|
-
if (
|
|
51
|
-
if (
|
|
47
|
+
for (let posJ: number = 0; posJ < sh.posList.length; ++posJ) {
|
|
48
|
+
if (sh.posList[posJ] == startPosName) startPosIdx = posJ;
|
|
49
|
+
if (sh.posList[posJ] == endPosName) endPosIdx = posJ;
|
|
52
50
|
}
|
|
53
51
|
if (startPosIdx === null && startPosName !== null)
|
|
54
52
|
throw new Error(`Start position ${startPosName} not found.`);
|
|
55
53
|
if (endPosIdx === null && endPosName !== null)
|
|
56
54
|
throw new Error(`End position ${endPosName} not found.`);
|
|
57
55
|
|
|
58
|
-
if (
|
|
56
|
+
if (sh.posList.length < endPosIdx!)
|
|
59
57
|
throw new Error(`End position ${endPosIdx} exceeds positions length`);
|
|
60
58
|
|
|
61
59
|
const regColName: string = !!name ? name : `${col.name}: (${startPosName ?? ''}-${endPosName ?? ''})`;
|
|
62
60
|
|
|
63
|
-
const regCol =
|
|
61
|
+
const regCol = sh.getRegion(startPosIdx, endPosIdx, regColName);
|
|
64
62
|
return regCol;
|
|
65
63
|
}
|
|
@@ -3,10 +3,11 @@ import * as ui from 'datagrok-api/ui';
|
|
|
3
3
|
import * as DG from 'datagrok-api/dg';
|
|
4
4
|
|
|
5
5
|
import {TAGS as wlTAGS} from '@datagrok-libraries/bio/src/viewers/web-logo';
|
|
6
|
+
import {SeqHandler} from '@datagrok-libraries/bio/src/utils/seq-handler';
|
|
7
|
+
|
|
6
8
|
import {WebLogoViewer} from '../viewers/web-logo-viewer';
|
|
7
9
|
|
|
8
10
|
import {_package} from '../package';
|
|
9
|
-
import {UnitsHandler} from '@datagrok-libraries/bio/src/utils/units-handler';
|
|
10
11
|
|
|
11
12
|
/** Used in Macromolecule column tooltip */
|
|
12
13
|
export class MacromoleculeColumnWidget extends DG.Widget {
|
|
@@ -23,7 +24,7 @@ export class MacromoleculeColumnWidget extends DG.Widget {
|
|
|
23
24
|
}
|
|
24
25
|
|
|
25
26
|
async init(): Promise<void> {
|
|
26
|
-
const
|
|
27
|
+
const sh = SeqHandler.forColumn(this.seqCol);
|
|
27
28
|
const pkgTooltipWebLogo = _package.properties.TooltipWebLogo;
|
|
28
29
|
const colTooltipWebLogo = this.seqCol.getTag(wlTAGS.tooltipWebLogo);
|
|
29
30
|
|
|
@@ -32,7 +33,7 @@ export class MacromoleculeColumnWidget extends DG.Widget {
|
|
|
32
33
|
sequenceColumnName: this.seqCol.name,
|
|
33
34
|
backgroundColor: 0x00000000,
|
|
34
35
|
positionHeight: 'Entropy',
|
|
35
|
-
positionWidth: (
|
|
36
|
+
positionWidth: (sh.getAlphabetIsMultichar() ? 24 : 16),
|
|
36
37
|
fixWidth: true,
|
|
37
38
|
fitArea: false,
|
|
38
39
|
// maxHeight: 100,
|
|
@@ -26,7 +26,7 @@ export class MonomerLibFileEventManager {
|
|
|
26
26
|
// WARNING: a temporary solution
|
|
27
27
|
async getValidLibraryPathsAsynchronously(): Promise<string[]> {
|
|
28
28
|
return new Promise((resolve) => {
|
|
29
|
-
this._libraryFilesUpdateSubject$.pipe(
|
|
29
|
+
this._libraryFilesUpdateSubject$.pipe<string[]>(
|
|
30
30
|
skip(1)
|
|
31
31
|
).subscribe((fileNames) => {
|
|
32
32
|
resolve(fileNames);
|
|
@@ -3,7 +3,7 @@ import * as DG from 'datagrok-api/dg';
|
|
|
3
3
|
import * as ui from 'datagrok-api/ui';
|
|
4
4
|
|
|
5
5
|
import {ALPHABET, NOTATION} from '@datagrok-libraries/bio/src/utils/macromolecule';
|
|
6
|
-
import {
|
|
6
|
+
import {SeqHandler} from '@datagrok-libraries/bio/src/utils/seq-handler';
|
|
7
7
|
import {ColumnInputOptions} from '@datagrok-libraries/utils/src/type-declarations';
|
|
8
8
|
|
|
9
9
|
import {runKalign} from './multiple-sequence-alignment';
|
|
@@ -160,9 +160,9 @@ async function onColInputChange(
|
|
|
160
160
|
gapOpenInput.value = null;
|
|
161
161
|
gapExtendInput.value = null;
|
|
162
162
|
terminalGapInput.value = null;
|
|
163
|
-
const
|
|
164
|
-
const performCol: DG.Column<string> =
|
|
165
|
-
|
|
163
|
+
const potentialColSh = SeqHandler.forColumn(col);
|
|
164
|
+
const performCol: DG.Column<string> = potentialColSh.isFasta() ? col :
|
|
165
|
+
potentialColSh.convert(NOTATION.FASTA);
|
|
166
166
|
return async () => await runKalign(performCol, false, unusedName, clustersColInput.value);
|
|
167
167
|
} else if (checkInputColumnUI(col, col.name,
|
|
168
168
|
[NOTATION.HELM], [], false)
|
|
@@ -178,8 +178,8 @@ async function onColInputChange(
|
|
|
178
178
|
};
|
|
179
179
|
} else if (checkInputColumnUI(col, col.name, [NOTATION.SEPARATOR], [ALPHABET.UN], false)) {
|
|
180
180
|
//if the column is separator with unknown alphabet, it might be helm. check if it can be converted to helm
|
|
181
|
-
const
|
|
182
|
-
const helmCol =
|
|
181
|
+
const potentialColSh = SeqHandler.forColumn(col);
|
|
182
|
+
const helmCol = potentialColSh.convert(NOTATION.HELM);
|
|
183
183
|
switchDialog(pepseaInputRootStyles, kalignInputRootStyles, 'pepsea');
|
|
184
184
|
gapOpenInput.value ??= msaDefaultOptions.pepsea.gapOpen;
|
|
185
185
|
gapExtendInput.value ??= msaDefaultOptions.pepsea.gapExtend;
|
package/src/utils/pepsea.ts
CHANGED
|
@@ -132,6 +132,7 @@ async function requestAlignedObjects(
|
|
|
132
132
|
}
|
|
133
133
|
responseObj = await response.json();
|
|
134
134
|
} else {
|
|
135
|
+
// @ts-ignore
|
|
135
136
|
const responseStr = await grok.dapi.docker.dockerContainers.request(dockerfileId, path, params)!;
|
|
136
137
|
if (!responseStr)
|
|
137
138
|
throw new Error('Empty response');
|
|
@@ -3,7 +3,7 @@ import * as ui from 'datagrok-api/ui';
|
|
|
3
3
|
import * as DG from 'datagrok-api/dg';
|
|
4
4
|
|
|
5
5
|
import {NOTATION} from '@datagrok-libraries/bio/src/utils/macromolecule';
|
|
6
|
-
import {
|
|
6
|
+
import {SeqHandler} from '@datagrok-libraries/bio/src/utils/seq-handler';
|
|
7
7
|
import {ALIGNMENT, ALPHABET} from '@datagrok-libraries/bio/src/utils/macromolecule';
|
|
8
8
|
|
|
9
9
|
import {HELM_WRAPPER} from './const';
|
|
@@ -224,8 +224,8 @@ export async function addTransformedColumn(
|
|
|
224
224
|
molColumn: DG.Column<string>, addHelm: boolean, ruleFiles: string[], chiralityEngine?: boolean
|
|
225
225
|
): Promise<void> {
|
|
226
226
|
const df = molColumn.dataFrame;
|
|
227
|
-
const
|
|
228
|
-
const sourceHelmCol =
|
|
227
|
+
const sh = SeqHandler.forColumn(molColumn);
|
|
228
|
+
const sourceHelmCol = sh.convert(NOTATION.HELM);
|
|
229
229
|
const pt = PolymerTransformation.getInstance(sourceHelmCol);
|
|
230
230
|
const fileSource = new DG.FileSource(RULES_PATH);
|
|
231
231
|
|
|
@@ -3,8 +3,8 @@ import * as ui from 'datagrok-api/ui';
|
|
|
3
3
|
import * as grok from 'datagrok-api/grok';
|
|
4
4
|
|
|
5
5
|
import wu from 'wu';
|
|
6
|
-
import {
|
|
7
|
-
import {
|
|
6
|
+
import {SeqHandler} from '@datagrok-libraries/bio/src/utils/seq-handler';
|
|
7
|
+
import {ISeqSplitted} from '@datagrok-libraries/bio/src/utils/macromolecule/types';
|
|
8
8
|
|
|
9
9
|
const FASTA_LINE_WIDTH = 60;
|
|
10
10
|
|
|
@@ -28,8 +28,8 @@ export function saveAsFastaUI() {
|
|
|
28
28
|
.filter((gc: DG.GridColumn) => {
|
|
29
29
|
const col: DG.Column | null = gc.column;
|
|
30
30
|
if (col && col.semType === DG.SEMTYPE.MACROMOLECULE) {
|
|
31
|
-
const
|
|
32
|
-
return
|
|
31
|
+
const sh = SeqHandler.forColumn(col);
|
|
32
|
+
return sh.isFasta();
|
|
33
33
|
}
|
|
34
34
|
return false;
|
|
35
35
|
}).toArray();
|
|
@@ -77,17 +77,16 @@ export function saveAsFastaUI() {
|
|
|
77
77
|
export function saveAsFastaDo(
|
|
78
78
|
idColList: DG.Column[], seqCol: DG.Column, lineWidth: number = FASTA_LINE_WIDTH, lineSeparator: string = '\n',
|
|
79
79
|
): string {
|
|
80
|
-
const
|
|
81
|
-
|
|
80
|
+
const sh = SeqHandler.forColumn(seqCol);
|
|
82
81
|
const fastaLines: string[] = [];
|
|
83
82
|
|
|
84
|
-
for (let
|
|
83
|
+
for (let rowIdx: number = 0; rowIdx < seqCol.length; rowIdx++) {
|
|
85
84
|
// multiple identifiers separated by vertical bars
|
|
86
85
|
// https://en.wikipedia.org/wiki/FASTA_format
|
|
87
86
|
|
|
88
|
-
const seqId: string = idColList.map((col) => col.get(
|
|
89
|
-
const
|
|
90
|
-
const seqLineList: string[] = wrapSequence(
|
|
87
|
+
const seqId: string = idColList.map((col) => col.get(rowIdx).toString()).join('|');
|
|
88
|
+
const srcSS = sh.getSplitted(rowIdx);
|
|
89
|
+
const seqLineList: string[] = wrapSequence(srcSS, lineWidth);
|
|
91
90
|
|
|
92
91
|
fastaLines.push(`>${seqId}${lineSeparator}`);
|
|
93
92
|
for (const line of seqLineList)
|
|
@@ -99,16 +98,16 @@ export function saveAsFastaDo(
|
|
|
99
98
|
}
|
|
100
99
|
|
|
101
100
|
/* split sequence for monomers to prevent wrapping monomer partially */
|
|
102
|
-
export function wrapSequence(
|
|
103
|
-
const seqMonomerList = splitter(seq);
|
|
101
|
+
export function wrapSequence(srcSS: ISeqSplitted, lineWidth: number = FASTA_LINE_WIDTH): string[] {
|
|
104
102
|
let seqPos: number = 0;
|
|
105
|
-
const seqLength: number =
|
|
103
|
+
const seqLength: number = srcSS.length;
|
|
106
104
|
|
|
107
105
|
const seqLineList: string[] = [];
|
|
108
106
|
while (seqPos < seqLength) {
|
|
109
107
|
/* join sliced monomer into line */
|
|
110
|
-
const seqLine
|
|
111
|
-
const seqLineTxt: string = seqLine.map((
|
|
108
|
+
const seqLine = wu(srcSS.originals).slice(seqPos, seqPos + lineWidth).toArray();
|
|
109
|
+
const seqLineTxt: string = seqLine.map((om) => om.length > 1 ? `[${om}]` : om)
|
|
110
|
+
.reduce((a, b) => a + b, '');
|
|
112
111
|
seqLineList.push(seqLineTxt);
|
|
113
112
|
seqPos += seqLine.length;
|
|
114
113
|
}
|
|
@@ -8,7 +8,7 @@ import {helm2mol} from './helm-to-molfile';
|
|
|
8
8
|
import {IMonomerLib} from '@datagrok-libraries/bio/src/types';
|
|
9
9
|
import {checkInputColumnUI} from './check-input-column';
|
|
10
10
|
import {getMonomerLibHelper} from '../package';
|
|
11
|
-
import {
|
|
11
|
+
import {SeqHandler} from '@datagrok-libraries/bio/src/utils/seq-handler';
|
|
12
12
|
import {NOTATION} from '@datagrok-libraries/bio/src/utils/macromolecule';
|
|
13
13
|
|
|
14
14
|
export async function sequenceToMolfile(df: DG.DataFrame, macroMolecule: DG.Column, nonlinear: boolean): Promise<void> {
|
|
@@ -17,9 +17,9 @@ export async function sequenceToMolfile(df: DG.DataFrame, macroMolecule: DG.Colu
|
|
|
17
17
|
return;
|
|
18
18
|
}
|
|
19
19
|
if (nonlinear) {
|
|
20
|
-
const
|
|
21
|
-
if (!
|
|
22
|
-
macroMolecule =
|
|
20
|
+
const seqSh = SeqHandler.forColumn(macroMolecule);
|
|
21
|
+
if (!seqSh.isHelm())
|
|
22
|
+
macroMolecule = seqSh.convert(NOTATION.HELM);
|
|
23
23
|
helm2mol(df, macroMolecule);
|
|
24
24
|
return;
|
|
25
25
|
}
|