@datagrok/sequence-translator 1.0.17 → 1.1.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.eslintrc.json +4 -3
- package/CHANGELOG.md +36 -0
- package/detectors.js +8 -0
- package/dist/package-test.js +2 -73079
- package/dist/package-test.js.map +1 -0
- package/dist/package.js +2 -72284
- package/dist/package.js.map +1 -0
- package/files/axolabs-style.json +97 -0
- package/files/codes-to-symbols.json +67 -0
- package/files/formats-to-helm.json +63 -0
- package/files/linkers.json +22 -0
- package/files/monomer-lib.json +1142 -0
- package/link-bio +7 -0
- package/package.json +30 -31
- package/scripts/build-monomer-lib.py +391 -122
- package/src/demo/demo-st-ui.ts +71 -0
- package/src/demo/handle-error.ts +12 -0
- package/src/model/axolabs/axolabs-tab.ts +111 -0
- package/src/model/axolabs/const.ts +33 -0
- package/src/{axolabs-tab → model/axolabs}/draw-svg.ts +1 -1
- package/src/{axolabs-tab → model/axolabs}/helpers.ts +7 -5
- package/src/model/const.ts +18 -0
- package/src/model/data-loading-utils/const.ts +8 -0
- package/src/model/data-loading-utils/json-loader.ts +38 -0
- package/src/model/data-loading-utils/types.ts +30 -0
- package/src/model/format-translation/const.ts +8 -0
- package/src/model/format-translation/conversion-utils.ts +49 -0
- package/src/model/format-translation/format-converter.ts +109 -0
- package/src/model/helpers.ts +12 -0
- package/src/model/monomer-lib/const.ts +3 -0
- package/src/model/monomer-lib/lib-wrapper.ts +119 -0
- package/src/model/parsing-validation/format-detector.ts +57 -0
- package/src/model/parsing-validation/sequence-validator.ts +52 -0
- package/src/model/sequence-to-structure-utils/const.ts +1 -0
- package/src/{utils/structures-works → model/sequence-to-structure-utils}/mol-transformations.ts +33 -41
- package/src/model/sequence-to-structure-utils/monomer-code-parser.ts +92 -0
- package/src/model/sequence-to-structure-utils/sdf-tab.ts +97 -0
- package/src/model/sequence-to-structure-utils/sequence-to-molfile.ts +409 -0
- package/src/package-test.ts +3 -1
- package/src/package.ts +113 -91
- package/src/tests/const.ts +24 -0
- package/src/tests/formats-support.ts +40 -0
- package/src/tests/formats-to-helm.ts +53 -0
- package/src/tests/helm-to-nucleotides.ts +28 -0
- package/src/view/const/main-tab.ts +3 -0
- package/src/view/const/view.ts +10 -0
- package/src/view/css/axolabs-tab.css +1 -0
- package/src/view/css/colored-text-input.css +27 -0
- package/src/view/css/main-tab.css +46 -0
- package/src/view/css/sdf-tab.css +39 -0
- package/src/view/monomer-lib-viewer/viewer.ts +22 -0
- package/src/view/tabs/axolabs.ts +719 -0
- package/src/view/tabs/main.ts +174 -0
- package/src/view/tabs/sdf.ts +193 -0
- package/src/view/utils/app-info-dialog.ts +18 -0
- package/src/view/utils/colored-input/colored-text-input.ts +56 -0
- package/src/view/utils/colored-input/input-painters.ts +44 -0
- package/src/view/utils/draw-molecule.ts +86 -0
- package/src/view/utils/molecule-img.ts +106 -0
- package/src/view/view.ts +127 -0
- package/tsconfig.json +12 -18
- package/webpack.config.js +17 -4
- package/README.md +0 -84
- package/css/style.css +0 -18
- package/img/Sequence Translator Axolabs.png +0 -0
- package/jest.config.js +0 -33
- package/setup-unlink-clean.cmd +0 -14
- package/setup-unlink-clean.sh +0 -21
- package/setup.cmd +0 -14
- package/setup.sh +0 -37
- package/src/__jest__/remote.test.ts +0 -77
- package/src/__jest__/test-node.ts +0 -97
- package/src/apps/oligo-sd-file-app.ts +0 -58
- package/src/autostart/calculations.ts +0 -40
- package/src/autostart/constants.ts +0 -37
- package/src/autostart/registration.ts +0 -306
- package/src/axolabs-tab/axolabs-tab.ts +0 -873
- package/src/axolabs-tab/define-pattern.ts +0 -874
- package/src/hardcode-to-be-eliminated/ICDs.ts +0 -3
- package/src/hardcode-to-be-eliminated/IDPs.ts +0 -3
- package/src/hardcode-to-be-eliminated/const.ts +0 -5
- package/src/hardcode-to-be-eliminated/constants.ts +0 -101
- package/src/hardcode-to-be-eliminated/converters.ts +0 -323
- package/src/hardcode-to-be-eliminated/map.ts +0 -720
- package/src/hardcode-to-be-eliminated/salts.ts +0 -2
- package/src/hardcode-to-be-eliminated/sources.ts +0 -3
- package/src/hardcode-to-be-eliminated/users.ts +0 -3
- package/src/main-tab/main-tab.ts +0 -210
- package/src/sdf-tab/sdf-tab.ts +0 -163
- package/src/sdf-tab/sequence-codes-tools.ts +0 -347
- package/src/tests/smiles-tests.ts +0 -458
- package/src/utils/const.ts +0 -0
- package/src/utils/helpers.ts +0 -28
- package/src/utils/parse.ts +0 -27
- package/src/utils/sdf-add-columns.ts +0 -118
- package/src/utils/sdf-save-table.ts +0 -56
- package/src/utils/structures-works/draw-molecule.ts +0 -84
- package/src/utils/structures-works/from-monomers.ts +0 -266
- package/test-SequenceTranslator-6288c2fbe346-695b7b55.html +0 -259
- package/vendors/openchemlib-full.js +0 -293
|
@@ -0,0 +1,111 @@
|
|
|
1
|
+
import * as grok from 'datagrok-api/grok';
|
|
2
|
+
import * as ui from 'datagrok-api/ui';
|
|
3
|
+
import * as DG from 'datagrok-api/dg';
|
|
4
|
+
// @ts-ignore
|
|
5
|
+
import * as svg from 'save-svg-as-png';
|
|
6
|
+
import $ from 'cash-dom';
|
|
7
|
+
|
|
8
|
+
import {isOverhang} from './helpers';
|
|
9
|
+
import {axolabsStyleMap} from '../data-loading-utils/json-loader';
|
|
10
|
+
|
|
11
|
+
export function generateExample(sequenceLength: number, sequenceBasis: string): string {
|
|
12
|
+
const AXOLABS_MAP = axolabsStyleMap;
|
|
13
|
+
const uniqueSymbols = AXOLABS_MAP[sequenceBasis].symbols.join('');
|
|
14
|
+
return uniqueSymbols.repeat(Math.floor(sequenceLength / 4)) + uniqueSymbols.slice(0, sequenceLength % 4);
|
|
15
|
+
}
|
|
16
|
+
|
|
17
|
+
export function findDuplicates(data: Int32Array | Float32Array | Float64Array | Uint32Array): number[] {
|
|
18
|
+
return Array.from(new Set(data)).filter((value) => data.indexOf(value) !== data.lastIndexOf(value));
|
|
19
|
+
}
|
|
20
|
+
|
|
21
|
+
export async function isCurrentUserCreatedThisPattern(patternName: string): Promise<boolean> {
|
|
22
|
+
return await grok.dapi.users.current().then((user) => {
|
|
23
|
+
const [firstName, lastName] = getUserName(patternName);
|
|
24
|
+
return (user.firstName !== firstName || user.lastName !== lastName);
|
|
25
|
+
});
|
|
26
|
+
}
|
|
27
|
+
|
|
28
|
+
export function getShortName(patternName: string): string {
|
|
29
|
+
let first = patternName.length + 1;
|
|
30
|
+
for (let i = 0; i < patternName.length; i++) {
|
|
31
|
+
if (patternName[i] === '(') {
|
|
32
|
+
first = i;
|
|
33
|
+
break;
|
|
34
|
+
}
|
|
35
|
+
}
|
|
36
|
+
return patternName.slice(0, first - 1);
|
|
37
|
+
}
|
|
38
|
+
|
|
39
|
+
function getUserName(patternName: string): string[] {
|
|
40
|
+
let first = -1;
|
|
41
|
+
for (let i = 0; i < patternName.length; i++) {
|
|
42
|
+
if (patternName[i] === '(') {
|
|
43
|
+
first = i;
|
|
44
|
+
break;
|
|
45
|
+
}
|
|
46
|
+
}
|
|
47
|
+
return (first === -1) ? ['', ''] : patternName.slice(first + 9, patternName.length - 1).split(' ').slice(1);
|
|
48
|
+
}
|
|
49
|
+
|
|
50
|
+
export function translateSequence(
|
|
51
|
+
sequence: string,
|
|
52
|
+
bases: DG.InputBase[],
|
|
53
|
+
ptoLinkages: DG.InputBase[],
|
|
54
|
+
startModification: DG.InputBase,
|
|
55
|
+
endModification: DG.InputBase,
|
|
56
|
+
firstPtoExist: boolean): string {
|
|
57
|
+
let i: number = -1;
|
|
58
|
+
let mainSequence = sequence.replace(/[AUGC]/g, function(x: string) {
|
|
59
|
+
i++;
|
|
60
|
+
const AXOLABS_MAP = axolabsStyleMap;
|
|
61
|
+
|
|
62
|
+
const baseChoices: string[] = Object.keys(AXOLABS_MAP);
|
|
63
|
+
// const defaultBase: string = baseChoices[0];
|
|
64
|
+
const indexOfSymbol = AXOLABS_MAP['RNA']['symbols'].indexOf(x);
|
|
65
|
+
let symbol = AXOLABS_MAP[bases[i].value]['symbols'][indexOfSymbol];
|
|
66
|
+
if (isOverhang(bases[i].value)) {
|
|
67
|
+
if (i < sequence.length / 2 && !isOverhang(bases[i + 1].value))
|
|
68
|
+
symbol = symbol + x + 'f';
|
|
69
|
+
else if (i > sequence.length / 2 && !isOverhang(bases[i - 1].value))
|
|
70
|
+
symbol = x + 'f' + symbol;
|
|
71
|
+
}
|
|
72
|
+
return (ptoLinkages[i].value) ? symbol + 's' : symbol;
|
|
73
|
+
});
|
|
74
|
+
if (mainSequence.slice(0, 5).split('mU').length === 3)
|
|
75
|
+
mainSequence = '(uu)' + mainSequence.slice(4);
|
|
76
|
+
if (mainSequence.slice(mainSequence.length - 7).split('mU').length === 3)
|
|
77
|
+
mainSequence = mainSequence.slice(0, mainSequence.length - 4) + '(uu)';
|
|
78
|
+
return startModification.value + (firstPtoExist ? 's' : '') + mainSequence + endModification.value;
|
|
79
|
+
}
|
|
80
|
+
|
|
81
|
+
export function addColumnWithIds(tableName: string, columnName: string, patternName: string) {
|
|
82
|
+
const nameOfNewColumn = 'ID ' + patternName;
|
|
83
|
+
const columns = grok.shell.table(tableName).columns;
|
|
84
|
+
if (columns.contains(nameOfNewColumn))
|
|
85
|
+
columns.remove(nameOfNewColumn);
|
|
86
|
+
const columnWithIds = columns.byName(columnName);
|
|
87
|
+
return columns.addNewString(nameOfNewColumn).init((i: number) => {
|
|
88
|
+
return (columnWithIds.getString(i) === '') ? '' : columnWithIds.get(i) + '_' + patternName;
|
|
89
|
+
});
|
|
90
|
+
}
|
|
91
|
+
|
|
92
|
+
export function addColumnWithTranslatedSequences(
|
|
93
|
+
tableName: string,
|
|
94
|
+
columnName: string,
|
|
95
|
+
bases: DG.InputBase[],
|
|
96
|
+
ptoLinkages: DG.InputBase[],
|
|
97
|
+
startModification: DG.InputBase,
|
|
98
|
+
endModification: DG.InputBase,
|
|
99
|
+
firstPtoExist: boolean) {
|
|
100
|
+
const nameOfNewColumn = 'Axolabs ' + columnName;
|
|
101
|
+
const columns = grok.shell.table(tableName).columns;
|
|
102
|
+
if (columns.contains(nameOfNewColumn))
|
|
103
|
+
columns.remove(nameOfNewColumn);
|
|
104
|
+
const columnWithInputSequences = columns.byName(columnName);
|
|
105
|
+
return columns.addNewString(nameOfNewColumn).init((i: number) => {
|
|
106
|
+
return columnWithInputSequences.getString(i) === '' ?
|
|
107
|
+
'' :
|
|
108
|
+
translateSequence(columnWithInputSequences.getString(i), bases, ptoLinkages, startModification, endModification,
|
|
109
|
+
firstPtoExist);
|
|
110
|
+
});
|
|
111
|
+
}
|
|
@@ -0,0 +1,33 @@
|
|
|
1
|
+
export const DEFAULT_PTO: boolean = true;
|
|
2
|
+
export const DEFAULT_SEQUENCE_LENGTH: number = 23;
|
|
3
|
+
export const MAX_SEQUENCE_LENGTH: number = 35;
|
|
4
|
+
export const USER_STORAGE_KEY: string = 'SequenceTranslator';
|
|
5
|
+
export const EXAMPLE_MIN_WIDTH: string = '400px';
|
|
6
|
+
|
|
7
|
+
export const enum JSON_FIELD {
|
|
8
|
+
SS_BASES = 'ssBases',
|
|
9
|
+
AS_BASES = 'asBases',
|
|
10
|
+
SS_PTO = 'ssPtoLinkages',
|
|
11
|
+
AS_PTO = 'asPtoLinkages',
|
|
12
|
+
SS_3 = 'ssThreeModification',
|
|
13
|
+
SS_5 = 'ssFiveModification',
|
|
14
|
+
AS_3 = 'asThreeModification',
|
|
15
|
+
AS_5 = 'asFiveModification',
|
|
16
|
+
COMMENT = 'comment',
|
|
17
|
+
};
|
|
18
|
+
|
|
19
|
+
export const SS = 'SS' as const;
|
|
20
|
+
export const AS = 'AS' as const;
|
|
21
|
+
export const STRANDS = [SS, AS];
|
|
22
|
+
export const STRAND_NAME = {
|
|
23
|
+
[SS]: 'Sense strand',
|
|
24
|
+
[AS]: 'Anti sense',
|
|
25
|
+
}
|
|
26
|
+
|
|
27
|
+
export const THREE_PRIME = 'THREE_PRIME' as const;
|
|
28
|
+
export const FIVE_PRIME = 'FIVE_PRIME' as const;
|
|
29
|
+
export const TERMINAL_KEYS = [THREE_PRIME, FIVE_PRIME];
|
|
30
|
+
export const TERMINAL = {
|
|
31
|
+
[THREE_PRIME]: 3,
|
|
32
|
+
[FIVE_PRIME]: 5,
|
|
33
|
+
}
|
|
@@ -1,8 +1,8 @@
|
|
|
1
|
-
import {
|
|
2
|
-
import {
|
|
1
|
+
import {NUCLEOTIDES} from '../const';
|
|
2
|
+
import {axolabsStyleMap} from '../data-loading-utils/json-loader';
|
|
3
3
|
|
|
4
4
|
export function isOverhang(modification: string): boolean {
|
|
5
|
-
return modification.slice(-3)
|
|
5
|
+
return modification.slice(-3) === '(o)';
|
|
6
6
|
}
|
|
7
7
|
|
|
8
8
|
export function isOneDigitNumber(n: number): boolean {
|
|
@@ -21,7 +21,7 @@ export function getPointsToDrawStar(centerX: number, centerY: number): string {
|
|
|
21
21
|
|
|
22
22
|
let points = '';
|
|
23
23
|
for (let i = 0; i < totalNumberOfPoints; i++) {
|
|
24
|
-
const r = (i % 2
|
|
24
|
+
const r = (i % 2 === 0) ? outerRadius : innerRadius;
|
|
25
25
|
const currentX = centerX + Math.cos(i * angle + angleOffsetToCenterStar) * r;
|
|
26
26
|
const currentY = centerY + Math.sin(i * angle + angleOffsetToCenterStar) * r;
|
|
27
27
|
points += `${currentX},${currentY} `;
|
|
@@ -33,7 +33,7 @@ export function countOverhangsOnTheRightEdge(modifications: string[]): number {
|
|
|
33
33
|
let i = 0;
|
|
34
34
|
while (i < modifications.length && isOverhang(modifications[i]))
|
|
35
35
|
i++;
|
|
36
|
-
return (i
|
|
36
|
+
return (i === modifications.length - 1) ? 0 : i;
|
|
37
37
|
}
|
|
38
38
|
|
|
39
39
|
export function textWidth(text: string, font: number): number {
|
|
@@ -49,11 +49,13 @@ export function textInsideCircle(bases: string[], index: number): string {
|
|
|
49
49
|
}
|
|
50
50
|
|
|
51
51
|
export function fontColorVisibleOnBackground(base: string): string {
|
|
52
|
+
const AXOLABS_MAP = axolabsStyleMap;
|
|
52
53
|
const rgbIntList = AXOLABS_MAP[base].color.match(/\d+/g)!.map((e) => Number(e));
|
|
53
54
|
return (rgbIntList[0] * 0.299 + rgbIntList[1] * 0.587 + rgbIntList[2] * 0.114) > 186 ? '#33333' : '#ffffff';
|
|
54
55
|
}
|
|
55
56
|
|
|
56
57
|
export function baseColor(base: string): string {
|
|
58
|
+
const AXOLABS_MAP = axolabsStyleMap;
|
|
57
59
|
return AXOLABS_MAP[base].color;
|
|
58
60
|
}
|
|
59
61
|
|
|
@@ -0,0 +1,18 @@
|
|
|
1
|
+
/* Do not change these import lines to match external modules in webpack configuration */
|
|
2
|
+
import * as grok from 'datagrok-api/grok';
|
|
3
|
+
import * as ui from 'datagrok-api/ui';
|
|
4
|
+
import * as DG from 'datagrok-api/dg';
|
|
5
|
+
|
|
6
|
+
export const NUCLEOTIDES = ['A', 'G', 'C', 'U', 'T'];
|
|
7
|
+
|
|
8
|
+
export const TECHNOLOGIES = {
|
|
9
|
+
DNA: 'DNA',
|
|
10
|
+
RNA: 'RNA',
|
|
11
|
+
ASO_GAPMERS: 'ASOGapmers',
|
|
12
|
+
SI_RNA: 'siRNA',
|
|
13
|
+
};
|
|
14
|
+
|
|
15
|
+
export enum DEFAULT_FORMATS {
|
|
16
|
+
HELM = 'HELM',
|
|
17
|
+
AXOLABS = 'Axolabs',
|
|
18
|
+
}
|
|
@@ -0,0 +1,8 @@
|
|
|
1
|
+
export const LIB_PATH = 'System:AppData/SequenceTranslator';
|
|
2
|
+
export const DEFAULT_LIB_FILENAME = 'monomer-lib.json';
|
|
3
|
+
|
|
4
|
+
export const APP_PATH = 'System:AppData/SequenceTranslator';
|
|
5
|
+
export const AXOLABS_STYLE_FILENAME = 'axolabs-style.json';
|
|
6
|
+
export const CODES_TO_HELM_DICT_FILENAME = 'formats-to-helm.json';
|
|
7
|
+
export const CODES_TO_SYMBOLS_FILENAME = 'codes-to-symbols.json';
|
|
8
|
+
export const MONOMERS_WITH_PHOSPHATE_LINKERS = 'linkers.json';
|
|
@@ -0,0 +1,38 @@
|
|
|
1
|
+
/* Do not change these import lines to match external modules in webpack configuration */
|
|
2
|
+
import * as grok from 'datagrok-api/grok';
|
|
3
|
+
import * as ui from 'datagrok-api/ui';
|
|
4
|
+
import * as DG from 'datagrok-api/dg';
|
|
5
|
+
|
|
6
|
+
import {APP_PATH, AXOLABS_STYLE_FILENAME, CODES_TO_HELM_DICT_FILENAME, CODES_TO_SYMBOLS_FILENAME, MONOMERS_WITH_PHOSPHATE_LINKERS} from './const';
|
|
7
|
+
import {AxolabsStyle, FormatToHELMDict, CodeToSymbol} from './types';
|
|
8
|
+
|
|
9
|
+
const fileSource = new DG.FileSource(APP_PATH);
|
|
10
|
+
|
|
11
|
+
export let axolabsStyleMap: AxolabsStyle;
|
|
12
|
+
export let codesToHelmDictionary: FormatToHELMDict;
|
|
13
|
+
export let codesToSymbolsDictionary: CodeToSymbol;
|
|
14
|
+
export let monomersWithPhosphateLinkers: {[key: string]: string[]};
|
|
15
|
+
|
|
16
|
+
export async function getJsonData(): Promise<void> {
|
|
17
|
+
const data = [axolabsStyleMap, codesToHelmDictionary, codesToSymbolsDictionary, monomersWithPhosphateLinkers];
|
|
18
|
+
|
|
19
|
+
if (data.every((item) => item !== undefined))
|
|
20
|
+
return;
|
|
21
|
+
|
|
22
|
+
axolabsStyleMap = await parse(AXOLABS_STYLE_FILENAME);
|
|
23
|
+
codesToHelmDictionary = await parse(CODES_TO_HELM_DICT_FILENAME);
|
|
24
|
+
codesToSymbolsDictionary = await parse(CODES_TO_SYMBOLS_FILENAME);
|
|
25
|
+
monomersWithPhosphateLinkers = await parse(MONOMERS_WITH_PHOSPHATE_LINKERS);
|
|
26
|
+
}
|
|
27
|
+
|
|
28
|
+
async function parse(path: string): Promise<any> {
|
|
29
|
+
let parsedJson: string;
|
|
30
|
+
try {
|
|
31
|
+
parsedJson = JSON.parse(await fileSource.readAsText(path))
|
|
32
|
+
} catch (err: any) {
|
|
33
|
+
const errMsg: string = err.hasOwnProperty('message') ? err.message : err.toString();
|
|
34
|
+
throw new Error(`Error loading json from ${path}:` + errMsg);
|
|
35
|
+
}
|
|
36
|
+
return parsedJson;
|
|
37
|
+
}
|
|
38
|
+
|
|
@@ -0,0 +1,30 @@
|
|
|
1
|
+
type KeyToValue = {[key: string]: string};
|
|
2
|
+
|
|
3
|
+
export type Edges = {
|
|
4
|
+
[key: string]: KeyToValue
|
|
5
|
+
}
|
|
6
|
+
|
|
7
|
+
export type AxolabsStyle = {
|
|
8
|
+
[index: string]: {
|
|
9
|
+
fullName: string,
|
|
10
|
+
symbols: string[],
|
|
11
|
+
color: string,
|
|
12
|
+
}
|
|
13
|
+
};
|
|
14
|
+
|
|
15
|
+
export type CodesInfo = {
|
|
16
|
+
[key: string]: { // nucleoside or phosphate
|
|
17
|
+
[code: string]: string
|
|
18
|
+
}
|
|
19
|
+
}
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
export type FormatToHELMDict = {
|
|
23
|
+
[sourceFormat: string]: CodesInfo
|
|
24
|
+
}
|
|
25
|
+
|
|
26
|
+
export type CodeToSymbol = {
|
|
27
|
+
[format: string]: {
|
|
28
|
+
[code: string]: string
|
|
29
|
+
}
|
|
30
|
+
}
|
|
@@ -0,0 +1,49 @@
|
|
|
1
|
+
import {DEFAULT_FORMATS, NUCLEOTIDES} from '../const';
|
|
2
|
+
import {UNKNOWN_SYMBOL} from './const';
|
|
3
|
+
import {FormatConverter} from './format-converter';
|
|
4
|
+
import {codesToHelmDictionary} from '../data-loading-utils/json-loader';
|
|
5
|
+
import {MonomerLibWrapper} from '../monomer-lib/lib-wrapper';
|
|
6
|
+
|
|
7
|
+
export function getTranslatedSequences(sequence: string, indexOfFirstInvalidChar: number, sourceFormat: string): {[key: string]: string} {
|
|
8
|
+
const supportedFormats = Object.keys(codesToHelmDictionary).concat([DEFAULT_FORMATS.HELM]) as string[];
|
|
9
|
+
|
|
10
|
+
if (!sequence || (indexOfFirstInvalidChar !== -1 && sourceFormat !== DEFAULT_FORMATS.HELM))
|
|
11
|
+
return {};
|
|
12
|
+
|
|
13
|
+
if (!supportedFormats.includes(sourceFormat))
|
|
14
|
+
throw new Error(`${sourceFormat} format is not supported by SequenceTranslator`)
|
|
15
|
+
|
|
16
|
+
const outputFormats = supportedFormats.filter((el) => el != sourceFormat)
|
|
17
|
+
.sort((a, b) => a.localeCompare(b));
|
|
18
|
+
const converter = new FormatConverter(sequence, sourceFormat);
|
|
19
|
+
const result = Object.fromEntries(
|
|
20
|
+
outputFormats.map((format) => {
|
|
21
|
+
let translation;
|
|
22
|
+
try {
|
|
23
|
+
translation = converter.convertTo(format);
|
|
24
|
+
} catch {
|
|
25
|
+
translation = null;
|
|
26
|
+
}
|
|
27
|
+
return [format, translation];
|
|
28
|
+
}).filter(([format, translation]) => translation)
|
|
29
|
+
)
|
|
30
|
+
const helm = (sourceFormat === DEFAULT_FORMATS.HELM) ? sequence : result[DEFAULT_FORMATS.HELM];
|
|
31
|
+
const nucleotides = getNucleotidesSequence(helm, MonomerLibWrapper.getInstance());
|
|
32
|
+
if (nucleotides)
|
|
33
|
+
result['Nucleotides'] = nucleotides;
|
|
34
|
+
return result;
|
|
35
|
+
}
|
|
36
|
+
|
|
37
|
+
export function getNucleotidesSequence(helmString: string, monomerLib: MonomerLibWrapper): string | null {
|
|
38
|
+
const re = new RegExp('\\([^()]*\\)', 'g');
|
|
39
|
+
const branches = helmString.match(re);
|
|
40
|
+
if (!branches)
|
|
41
|
+
return null;
|
|
42
|
+
const nucleotides = branches!.map((branch) => {
|
|
43
|
+
const stripped = branch.replace(/[\[\]()]/g, '');
|
|
44
|
+
if (NUCLEOTIDES.includes(stripped))
|
|
45
|
+
return stripped;
|
|
46
|
+
return monomerLib.getNaturalAnalogBySymbol(stripped);
|
|
47
|
+
}).map((el) => el ? el : UNKNOWN_SYMBOL).join('');
|
|
48
|
+
return nucleotides;
|
|
49
|
+
}
|
|
@@ -0,0 +1,109 @@
|
|
|
1
|
+
import * as DG from 'datagrok-api/dg';
|
|
2
|
+
import {DEFAULT_FORMATS} from '../const';
|
|
3
|
+
import {GROUP_TYPE, PHOSPHATE_SYMBOL, UNKNOWN_SYMBOL} from './const';
|
|
4
|
+
import {CodesInfo} from '../data-loading-utils/types';
|
|
5
|
+
import {codesToHelmDictionary} from '../data-loading-utils/json-loader';
|
|
6
|
+
|
|
7
|
+
const HELM_WRAPPER = {
|
|
8
|
+
LEFT: 'RNA1{',
|
|
9
|
+
RIGHT: '}$$$$',
|
|
10
|
+
};
|
|
11
|
+
|
|
12
|
+
export class FormatConverter {
|
|
13
|
+
constructor(private readonly sequence: string, private readonly sourceFormat: string) { };
|
|
14
|
+
|
|
15
|
+
convertTo(targetFormat: string): string {
|
|
16
|
+
const formats = Object.keys(codesToHelmDictionary);
|
|
17
|
+
|
|
18
|
+
if (this.sourceFormat === DEFAULT_FORMATS.HELM && formats.includes(targetFormat))
|
|
19
|
+
return helmToFormat(this.sequence, targetFormat);
|
|
20
|
+
else if (formats.includes(this.sourceFormat) && targetFormat === DEFAULT_FORMATS.HELM)
|
|
21
|
+
return formatToHelm(this.sequence, this.sourceFormat);
|
|
22
|
+
else if ([this.sourceFormat, targetFormat].every((el) => formats.includes(el))) {
|
|
23
|
+
const helm = formatToHelm(this.sequence, this.sourceFormat);
|
|
24
|
+
return helmToFormat(helm, targetFormat);
|
|
25
|
+
}
|
|
26
|
+
else {
|
|
27
|
+
throw new Error (`ST: unsupported translation direction ${this.sourceFormat} -> ${targetFormat}`);
|
|
28
|
+
}
|
|
29
|
+
}
|
|
30
|
+
}
|
|
31
|
+
|
|
32
|
+
function getRegExpPattern(arr: string[]): string {
|
|
33
|
+
const negativeLookBehind = '(?<!\\([^()]*)'; // not '(' followed by non-parenths
|
|
34
|
+
const negativeLookAhead = '(?![^()]*\\))'; // not ')' preceded by non-parenths
|
|
35
|
+
const escaped = arr.map((key) => key.replace(/[.*+?^${}()|[\]\\]/g, '\\$&'))
|
|
36
|
+
.map((key) => {
|
|
37
|
+
if (!key.includes('(') && !key.includes(')'))
|
|
38
|
+
return `${negativeLookBehind}${key}${negativeLookAhead}`;
|
|
39
|
+
return key;
|
|
40
|
+
});
|
|
41
|
+
const result = escaped.join('|');
|
|
42
|
+
return result;
|
|
43
|
+
}
|
|
44
|
+
|
|
45
|
+
function sortCallback(a: string, b: string) {return b.length - a.length};
|
|
46
|
+
|
|
47
|
+
function getHelmToCodeDict(infoObj: CodesInfo) {
|
|
48
|
+
const result: {[key: string]: string | string[]} = {};
|
|
49
|
+
Object.values(infoObj).forEach((obj: {[code: string]: string}) => {
|
|
50
|
+
Object.entries(obj).forEach(([code, helm]) => {
|
|
51
|
+
const key = helm.replace(/\)p/g, ')').replace(/\]p/g, ']');
|
|
52
|
+
if (result[key] === undefined) {
|
|
53
|
+
result[key] = [code];
|
|
54
|
+
} else {
|
|
55
|
+
(result[key] as string[]).push(code);
|
|
56
|
+
}
|
|
57
|
+
})
|
|
58
|
+
});
|
|
59
|
+
Object.entries(result).forEach(([key, value]) => {
|
|
60
|
+
const sorted = (value as string[]).sort(sortCallback);
|
|
61
|
+
result[key] = sorted[0] as string;
|
|
62
|
+
})
|
|
63
|
+
return result as {[key: string]: string};
|
|
64
|
+
}
|
|
65
|
+
|
|
66
|
+
function helmToFormat(helmSequence: string, targetFormat: string): string {
|
|
67
|
+
const codesInfoObject = codesToHelmDictionary[targetFormat] as CodesInfo;
|
|
68
|
+
const dict = getHelmToCodeDict(codesInfoObject);
|
|
69
|
+
const wrapperRegExp = new RegExp(getRegExpPattern(Object.values(HELM_WRAPPER)), 'g')
|
|
70
|
+
let result = helmSequence.replace(wrapperRegExp, '');
|
|
71
|
+
|
|
72
|
+
const helmCodes = Object.keys(dict)
|
|
73
|
+
.sort(sortCallback);
|
|
74
|
+
const helmRegExp = new RegExp(getRegExpPattern(helmCodes) + '|.', 'g');
|
|
75
|
+
result = result.replace(helmRegExp, (match) => {
|
|
76
|
+
return helmCodes.includes(match) ? dict[match] :
|
|
77
|
+
(match === 'p' || match === '.') ? match : '?';
|
|
78
|
+
}).replace(/\?+/g, UNKNOWN_SYMBOL).replace(/p\.|\./g, '');
|
|
79
|
+
result = result.replace(/<empty>/g, '');
|
|
80
|
+
// remove double slash in LCMS codes
|
|
81
|
+
result = result.replace(/\/\//g, '/');
|
|
82
|
+
return result;
|
|
83
|
+
}
|
|
84
|
+
|
|
85
|
+
function formatToHelm(sequence: string, sourceFormat: string): string {
|
|
86
|
+
const codesInfoObject = codesToHelmDictionary[sourceFormat] as CodesInfo;
|
|
87
|
+
const dict = Object.assign({}, ...Object.values(codesInfoObject)) as {[code: string]: string};
|
|
88
|
+
|
|
89
|
+
const formatCodes = Object.keys(dict).sort(sortCallback);
|
|
90
|
+
const formatRegExp = new RegExp(getRegExpPattern(formatCodes) + '|\\([^()]*\\)|.', 'g'); // the added group before '|.' is to avoid mismatch inside parenths
|
|
91
|
+
|
|
92
|
+
const phosphateHELMCodes = Array.from(
|
|
93
|
+
new Set(Object.values(codesInfoObject[GROUP_TYPE.LINKAGE]))
|
|
94
|
+
).sort(sortCallback);
|
|
95
|
+
const phosphateHELMPattern = getRegExpPattern(phosphateHELMCodes);
|
|
96
|
+
const phosphateRegExp = new RegExp(`${PHOSPHATE_SYMBOL}\.(${phosphateHELMPattern})`, 'g');
|
|
97
|
+
|
|
98
|
+
let helm = sequence.replace(formatRegExp, (match) => {
|
|
99
|
+
const result = formatCodes.includes(match) ? dict[match] + '.' : '?';
|
|
100
|
+
return result;
|
|
101
|
+
});
|
|
102
|
+
helm = helm.replace(/\?+/g, `${UNKNOWN_SYMBOL}.`);
|
|
103
|
+
helm = helm.slice(0, -1); // strip last dot
|
|
104
|
+
if (helm[helm.length - 1] === PHOSPHATE_SYMBOL)
|
|
105
|
+
helm = helm.slice(0, -1);
|
|
106
|
+
helm = helm.replace(phosphateRegExp, (match, group) => group);
|
|
107
|
+
helm = helm.replace(/<empty>/g, '');
|
|
108
|
+
return `${HELM_WRAPPER.LEFT + helm + HELM_WRAPPER.RIGHT}`;
|
|
109
|
+
}
|
|
@@ -0,0 +1,12 @@
|
|
|
1
|
+
import * as DG from 'datagrok-api/dg';
|
|
2
|
+
|
|
3
|
+
export function sortByReverseLength(array: string[]): string[] {
|
|
4
|
+
return array.sort((a, b) => b.length - a.length);
|
|
5
|
+
}
|
|
6
|
+
|
|
7
|
+
export function download(name: string, href: string): void {
|
|
8
|
+
const element = document.createElement('a');
|
|
9
|
+
element.setAttribute('href', 'data:text/plain;charset=utf-8,' + href);
|
|
10
|
+
element.setAttribute('download', name);
|
|
11
|
+
element.click();
|
|
12
|
+
}
|
|
@@ -0,0 +1,119 @@
|
|
|
1
|
+
/* Do not change these import lines to match external modules in webpack configuration */
|
|
2
|
+
import * as grok from 'datagrok-api/grok';
|
|
3
|
+
import * as ui from 'datagrok-api/ui';
|
|
4
|
+
import * as DG from 'datagrok-api/dg';
|
|
5
|
+
|
|
6
|
+
import {_package} from '../../package';
|
|
7
|
+
import {DEFAULT_FORMATS} from '../const';
|
|
8
|
+
|
|
9
|
+
import {IMonomerLib, Monomer} from '@datagrok-libraries/bio/src/types';
|
|
10
|
+
|
|
11
|
+
import {HELM_REQUIRED_FIELDS as REQ, HELM_OPTIONAL_FIELDS as OPT} from '@datagrok-libraries/bio/src/utils/const';
|
|
12
|
+
import {META_FIELDS as MET} from './const';
|
|
13
|
+
import {codesToSymbolsDictionary} from '../../model/data-loading-utils/json-loader';
|
|
14
|
+
|
|
15
|
+
export class MonomerLibWrapper {
|
|
16
|
+
private constructor() {
|
|
17
|
+
const lib = _package.monomerLib;
|
|
18
|
+
if (lib === null)
|
|
19
|
+
throw new Error('SequenceTranslator: monomer library is null');
|
|
20
|
+
this.lib = lib!;
|
|
21
|
+
this.allMonomers = this.getAllMonomers();
|
|
22
|
+
}
|
|
23
|
+
|
|
24
|
+
private lib: IMonomerLib;
|
|
25
|
+
private static instance?: MonomerLibWrapper;
|
|
26
|
+
private allMonomers: Monomer[];
|
|
27
|
+
|
|
28
|
+
private formatMonomerForViewer(sourceObj: Monomer): {[key: string]: string} {
|
|
29
|
+
const formattedObject: {[key: string]: string} = {};
|
|
30
|
+
formattedObject[REQ.NAME] = sourceObj[REQ.SYMBOL];
|
|
31
|
+
formattedObject[REQ.SYMBOL] = sourceObj[REQ.SYMBOL];
|
|
32
|
+
formattedObject[REQ.MOLFILE] = sourceObj[REQ.MOLFILE];
|
|
33
|
+
const formats = this.getAllFormats();
|
|
34
|
+
formats.forEach((format) => {
|
|
35
|
+
if (format === DEFAULT_FORMATS.HELM)
|
|
36
|
+
return;
|
|
37
|
+
const map = codesToSymbolsDictionary[format];
|
|
38
|
+
const codes = Object.keys(map).filter((code) => map[code] === sourceObj.symbol);
|
|
39
|
+
formattedObject[format] = codes.join(', ');
|
|
40
|
+
})
|
|
41
|
+
|
|
42
|
+
return formattedObject;
|
|
43
|
+
}
|
|
44
|
+
|
|
45
|
+
private getAllMonomers(): Monomer[] {
|
|
46
|
+
const polymerTypes = this.lib.getPolymerTypes();
|
|
47
|
+
let result: Monomer[] = [];
|
|
48
|
+
for (const polymerType of polymerTypes) {
|
|
49
|
+
const monomerSymbols = this.lib.getMonomerSymbolsByType(polymerType);
|
|
50
|
+
const monomersByType: Monomer[] = monomerSymbols
|
|
51
|
+
.map((monomerSymbol) => this.lib.getMonomer(polymerType, monomerSymbol))
|
|
52
|
+
.filter((monomer): monomer is Monomer => monomer !== null);
|
|
53
|
+
result = result.concat(monomersByType);
|
|
54
|
+
}
|
|
55
|
+
return result;
|
|
56
|
+
}
|
|
57
|
+
|
|
58
|
+
private getMonomer(monomerSymbol: string): Monomer {
|
|
59
|
+
const monomer = this.lib.getMonomer('RNA', monomerSymbol);
|
|
60
|
+
if (monomer === undefined)
|
|
61
|
+
throw new Error(`SequenceTranslator: no monomer with symbol ${monomerSymbol}`);
|
|
62
|
+
return monomer!;
|
|
63
|
+
}
|
|
64
|
+
|
|
65
|
+
static getInstance(): MonomerLibWrapper {
|
|
66
|
+
if (MonomerLibWrapper.instance === undefined)
|
|
67
|
+
MonomerLibWrapper.instance = new MonomerLibWrapper();
|
|
68
|
+
return MonomerLibWrapper.instance!;
|
|
69
|
+
}
|
|
70
|
+
|
|
71
|
+
getMolfileBySymbol(monomerSymbol: string): string {
|
|
72
|
+
const monomer = this.getMonomer(monomerSymbol);
|
|
73
|
+
return monomer.molfile;
|
|
74
|
+
}
|
|
75
|
+
|
|
76
|
+
getNaturalAnalogBySymbol(monomerSymbol: string): string {
|
|
77
|
+
const monomer = this.getMonomer(monomerSymbol);
|
|
78
|
+
const naturalAnalog = monomer.naturalAnalog;
|
|
79
|
+
if (!naturalAnalog)
|
|
80
|
+
throw new Error(`ST: no natural analog for ${monomerSymbol}`);
|
|
81
|
+
return naturalAnalog!;
|
|
82
|
+
}
|
|
83
|
+
|
|
84
|
+
// todo: a better criterion
|
|
85
|
+
isModification(monomerSymbol: string): boolean {
|
|
86
|
+
const molfile = this.getMolfileBySymbol(monomerSymbol);
|
|
87
|
+
return (molfile.includes('MODIFICATION')) ? true : false;
|
|
88
|
+
}
|
|
89
|
+
|
|
90
|
+
getCodeToSymbolMap(format: string): Map<string, string> {
|
|
91
|
+
return new Map<string, string>(Object.entries(codesToSymbolsDictionary[format]));
|
|
92
|
+
}
|
|
93
|
+
|
|
94
|
+
getCodesByFormat(format: string): string[] {
|
|
95
|
+
return Object.keys(codesToSymbolsDictionary[format]);
|
|
96
|
+
}
|
|
97
|
+
|
|
98
|
+
getAllFormats(): string[] {
|
|
99
|
+
return Object.keys(codesToSymbolsDictionary);
|
|
100
|
+
}
|
|
101
|
+
|
|
102
|
+
getTableForViewer(): DG.DataFrame {
|
|
103
|
+
const formattedObjects = this.allMonomers.map((monomer) => this.formatMonomerForViewer(monomer));
|
|
104
|
+
const df = DG.DataFrame.fromObjects(formattedObjects)!;
|
|
105
|
+
return df;
|
|
106
|
+
}
|
|
107
|
+
|
|
108
|
+
getCodesToWeightsMap(): Map<string, number> {
|
|
109
|
+
const codesToWeightsMap = new Map<string, number>();
|
|
110
|
+
Object.entries(codesToSymbolsDictionary).forEach(([_, dict]) => {
|
|
111
|
+
Object.entries(dict).forEach(([code, monomerSymbol]) => {
|
|
112
|
+
const monomer = this.getMonomer(monomerSymbol);
|
|
113
|
+
const weight = monomer[OPT.META]?.[MET.MOLWEIGHT];
|
|
114
|
+
codesToWeightsMap.set(code, weight);
|
|
115
|
+
});
|
|
116
|
+
});
|
|
117
|
+
return codesToWeightsMap;
|
|
118
|
+
}
|
|
119
|
+
}
|
|
@@ -0,0 +1,57 @@
|
|
|
1
|
+
/* Do not change these import lines to match external modules in webpack configuration */
|
|
2
|
+
import * as grok from 'datagrok-api/grok';
|
|
3
|
+
import * as ui from 'datagrok-api/ui';
|
|
4
|
+
import * as DG from 'datagrok-api/dg';
|
|
5
|
+
|
|
6
|
+
import {sortByReverseLength} from '../helpers';
|
|
7
|
+
import {DEFAULT_FORMATS} from '../const';
|
|
8
|
+
import {MonomerLibWrapper} from '../monomer-lib/lib-wrapper';
|
|
9
|
+
import {codesToHelmDictionary} from '../data-loading-utils/json-loader';
|
|
10
|
+
import {SequenceValidator} from './sequence-validator';
|
|
11
|
+
|
|
12
|
+
export class FormatDetector {
|
|
13
|
+
constructor (private sequence: string) {
|
|
14
|
+
this.libWrapper = MonomerLibWrapper.getInstance();
|
|
15
|
+
this.formats = Object.keys(codesToHelmDictionary);
|
|
16
|
+
};
|
|
17
|
+
|
|
18
|
+
private libWrapper: MonomerLibWrapper;
|
|
19
|
+
private formats: string[];
|
|
20
|
+
|
|
21
|
+
getFormat(): string | null {
|
|
22
|
+
// todo: reliable criterion
|
|
23
|
+
if (this.sequence.startsWith('RNA'))
|
|
24
|
+
return DEFAULT_FORMATS.HELM;
|
|
25
|
+
const possibleFormats = this.getListOfPossibleSynthesizersByFirstMatchedCode();
|
|
26
|
+
if (possibleFormats.length === 0)
|
|
27
|
+
return null;
|
|
28
|
+
|
|
29
|
+
const validator = new SequenceValidator(this.sequence);
|
|
30
|
+
const outputIndices = Array(possibleFormats.length).fill(0);
|
|
31
|
+
for (let i = 0; i < possibleFormats.length; ++i) {
|
|
32
|
+
const format = possibleFormats[i];
|
|
33
|
+
outputIndices[i] = validator.getInvalidCodeIndex(format);
|
|
34
|
+
}
|
|
35
|
+
const formatIdx = (outputIndices.some((idx) => idx === -1)) ? -1 : Math.max(...outputIndices);
|
|
36
|
+
return possibleFormats[outputIndices.indexOf(formatIdx)];
|
|
37
|
+
}
|
|
38
|
+
|
|
39
|
+
// todo: rename
|
|
40
|
+
private getListOfPossibleSynthesizersByFirstMatchedCode(): string[] {
|
|
41
|
+
const sequence = this.sequence;
|
|
42
|
+
let synthesizers: string[] = [];
|
|
43
|
+
for (const format of this.formats) {
|
|
44
|
+
let codes = sortByReverseLength(this.libWrapper.getCodesByFormat(format));
|
|
45
|
+
let start = 0;
|
|
46
|
+
for (let i = 0; i < sequence.length; i++) {
|
|
47
|
+
if (sequence[i] === ')' && i !== sequence.length - 1) {
|
|
48
|
+
start = i + 1;
|
|
49
|
+
break;
|
|
50
|
+
}
|
|
51
|
+
}
|
|
52
|
+
if (codes.some((s: string) => s === sequence.slice(start, start + s.length)))
|
|
53
|
+
synthesizers.push(format);
|
|
54
|
+
}
|
|
55
|
+
return synthesizers;
|
|
56
|
+
}
|
|
57
|
+
}
|