@datagrok/sequence-translator 1.0.17 → 1.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (96) hide show
  1. package/.eslintrc.json +4 -3
  2. package/CHANGELOG.md +3 -0
  3. package/detectors.js +8 -0
  4. package/dist/package-test.js +2 -73079
  5. package/dist/package-test.js.map +1 -0
  6. package/dist/package.js +2 -72284
  7. package/dist/package.js.map +1 -0
  8. package/files/axolabs-style.json +97 -0
  9. package/files/codes-to-symbols.json +66 -0
  10. package/files/formats-to-helm.json +59 -0
  11. package/files/linkers.json +22 -0
  12. package/files/monomer-lib.json +1094 -0
  13. package/link-bio +7 -0
  14. package/package.json +30 -28
  15. package/scripts/build-monomer-lib.py +391 -122
  16. package/src/demo/demo-st-ui.ts +71 -0
  17. package/src/demo/handle-error.ts +12 -0
  18. package/src/model/axolabs/axolabs-tab.ts +111 -0
  19. package/src/model/axolabs/const.ts +33 -0
  20. package/src/{axolabs-tab → model/axolabs}/draw-svg.ts +1 -1
  21. package/src/{axolabs-tab → model/axolabs}/helpers.ts +7 -5
  22. package/src/model/const.ts +19 -0
  23. package/src/model/data-loading-utils/const.ts +8 -0
  24. package/src/model/data-loading-utils/json-loader.ts +38 -0
  25. package/src/model/data-loading-utils/types.ts +30 -0
  26. package/src/model/format-translation/const.ts +8 -0
  27. package/src/model/format-translation/conversion-utils.ts +48 -0
  28. package/src/model/format-translation/format-converter.ts +107 -0
  29. package/src/model/helpers.ts +12 -0
  30. package/src/model/monomer-lib/const.ts +3 -0
  31. package/src/model/monomer-lib/lib-wrapper.ts +106 -0
  32. package/src/model/parsing-validation/format-detector.ts +57 -0
  33. package/src/model/parsing-validation/sequence-validator.ts +52 -0
  34. package/src/model/sequence-to-structure-utils/const.ts +1 -0
  35. package/src/{utils/structures-works → model/sequence-to-structure-utils}/mol-transformations.ts +33 -41
  36. package/src/model/sequence-to-structure-utils/monomer-code-parser.ts +92 -0
  37. package/src/model/sequence-to-structure-utils/sdf-tab.ts +94 -0
  38. package/src/model/sequence-to-structure-utils/sequence-to-molfile.ts +409 -0
  39. package/src/package.ts +104 -92
  40. package/src/tests/const.ts +17 -0
  41. package/src/tests/smiles-tests.ts +32 -457
  42. package/src/view/const/main-tab.ts +3 -0
  43. package/src/view/const/view.ts +10 -0
  44. package/src/view/css/axolabs-tab.css +1 -0
  45. package/src/view/css/colored-text-input.css +27 -0
  46. package/src/view/css/main-tab.css +46 -0
  47. package/src/view/css/sdf-tab.css +39 -0
  48. package/src/view/monomer-lib-viewer/viewer.ts +22 -0
  49. package/src/view/tabs/axolabs.ts +720 -0
  50. package/src/view/tabs/main.ts +174 -0
  51. package/src/view/tabs/sdf.ts +173 -0
  52. package/src/view/utils/app-info-dialog.ts +18 -0
  53. package/src/view/utils/colored-input/colored-text-input.ts +56 -0
  54. package/src/view/utils/colored-input/input-painters.ts +44 -0
  55. package/src/view/utils/draw-molecule.ts +86 -0
  56. package/src/view/utils/molecule-img.ts +106 -0
  57. package/src/view/view.ts +129 -0
  58. package/tsconfig.json +12 -18
  59. package/webpack.config.js +17 -4
  60. package/README.md +0 -84
  61. package/css/style.css +0 -18
  62. package/img/Sequence Translator Axolabs.png +0 -0
  63. package/jest.config.js +0 -33
  64. package/setup-unlink-clean.cmd +0 -14
  65. package/setup-unlink-clean.sh +0 -21
  66. package/setup.cmd +0 -14
  67. package/setup.sh +0 -37
  68. package/src/__jest__/remote.test.ts +0 -77
  69. package/src/__jest__/test-node.ts +0 -97
  70. package/src/apps/oligo-sd-file-app.ts +0 -58
  71. package/src/autostart/calculations.ts +0 -40
  72. package/src/autostart/constants.ts +0 -37
  73. package/src/autostart/registration.ts +0 -306
  74. package/src/axolabs-tab/axolabs-tab.ts +0 -873
  75. package/src/axolabs-tab/define-pattern.ts +0 -874
  76. package/src/hardcode-to-be-eliminated/ICDs.ts +0 -3
  77. package/src/hardcode-to-be-eliminated/IDPs.ts +0 -3
  78. package/src/hardcode-to-be-eliminated/const.ts +0 -5
  79. package/src/hardcode-to-be-eliminated/constants.ts +0 -101
  80. package/src/hardcode-to-be-eliminated/converters.ts +0 -323
  81. package/src/hardcode-to-be-eliminated/map.ts +0 -720
  82. package/src/hardcode-to-be-eliminated/salts.ts +0 -2
  83. package/src/hardcode-to-be-eliminated/sources.ts +0 -3
  84. package/src/hardcode-to-be-eliminated/users.ts +0 -3
  85. package/src/main-tab/main-tab.ts +0 -210
  86. package/src/sdf-tab/sdf-tab.ts +0 -163
  87. package/src/sdf-tab/sequence-codes-tools.ts +0 -347
  88. package/src/utils/const.ts +0 -0
  89. package/src/utils/helpers.ts +0 -28
  90. package/src/utils/parse.ts +0 -27
  91. package/src/utils/sdf-add-columns.ts +0 -118
  92. package/src/utils/sdf-save-table.ts +0 -56
  93. package/src/utils/structures-works/draw-molecule.ts +0 -84
  94. package/src/utils/structures-works/from-monomers.ts +0 -266
  95. package/test-SequenceTranslator-6288c2fbe346-695b7b55.html +0 -259
  96. package/vendors/openchemlib-full.js +0 -293
@@ -0,0 +1,71 @@
1
+ import * as grok from 'datagrok-api/grok';
2
+ import * as ui from 'datagrok-api/ui';
3
+ import * as DG from 'datagrok-api/dg';
4
+
5
+ import {handleError} from './handle-error';
6
+
7
+ import {delay} from '@datagrok-libraries/utils/src/test';
8
+ import {getJsonData} from '../model/data-loading-utils/json-loader';
9
+ import {SequenceTranslatorUI} from '../view/view';
10
+ import {_package} from '../package';
11
+
12
+ export async function demoTranslateSequenceUI() {
13
+ try {
14
+ openSequenceTranslatorOnPane(0);
15
+ } catch (err: any) {
16
+ handleError(err);
17
+ }
18
+ }
19
+
20
+ export async function demoDesignPatternUI() {
21
+ try {
22
+ async function emulateUserInput(value: string, idx: number, idxUpdate: (idx: number) => number) {
23
+ await delay(3000);
24
+
25
+ // warning: this redefinition is necessary because
26
+ // the ids of the elements can dynamically change
27
+ const choiceInputs: NodeListOf<HTMLSelectElement> = document.querySelectorAll('.st-pattern-choice-input > select');
28
+ len = choiceInputs.length;
29
+ const selectElement = choiceInputs[idxUpdate(idx)];
30
+ selectElement.value = value;
31
+ const event = new Event('input');
32
+ selectElement.dispatchEvent(event);
33
+ }
34
+
35
+ openSequenceTranslatorOnPane(1);
36
+
37
+ let len: number;
38
+
39
+ const ssNewValues = ['DNA', 'invAb', 'Z-New'];
40
+ ssNewValues.forEach(async (value, idx) => {
41
+ emulateUserInput(value, idx, (i) => 2 * i);
42
+ });
43
+
44
+ const asNewValues = ['2\'-O-Methyl', '2\'-Fluoro', '2\'-O-MOE'];
45
+ asNewValues.forEach(async (value, idx) => {
46
+ emulateUserInput(value, idx, (i) => (len - 2 - 2 * i));
47
+ })
48
+ } catch (err: any) {
49
+ handleError(err);
50
+ }
51
+ }
52
+
53
+ export async function demoVisualizeDuplexUI() {
54
+ try {
55
+ await openSequenceTranslatorOnPane(2);
56
+ } catch (err: any) {
57
+ handleError(err);
58
+ }
59
+ }
60
+
61
+ async function openSequenceTranslatorOnPane(paneNumber: number): Promise<void> {
62
+ let tabControl: DG.TabControl;
63
+ let panes: DG.TabPane[];
64
+ await getJsonData();
65
+ await _package.initMonomerLib();
66
+ const v = new SequenceTranslatorUI();
67
+ await v.createLayout();
68
+ tabControl = (await v.tabs.getControl());
69
+ panes = tabControl.panes;
70
+ tabControl.currentPane = panes[paneNumber];
71
+ }
@@ -0,0 +1,12 @@
1
+ import * as grok from 'datagrok-api/grok';
2
+ import * as DG from 'datagrok-api/dg';
3
+ import * as ui from 'datagrok-api/ui';
4
+
5
+ import {_package} from '../package';
6
+
7
+ export function handleError(err: any): void {
8
+ const errMsg: string = err instanceof Error ? err.message : err.toString();
9
+ const stack: string | undefined = err instanceof Error ? err.stack : undefined;
10
+ grok.shell.error(errMsg);
11
+ _package.logger.error(err.message, undefined, stack);
12
+ }
@@ -0,0 +1,111 @@
1
+ import * as grok from 'datagrok-api/grok';
2
+ import * as ui from 'datagrok-api/ui';
3
+ import * as DG from 'datagrok-api/dg';
4
+ // @ts-ignore
5
+ import * as svg from 'save-svg-as-png';
6
+ import $ from 'cash-dom';
7
+
8
+ import {isOverhang} from './helpers';
9
+ import {axolabsStyleMap} from '../data-loading-utils/json-loader';
10
+
11
+ export function generateExample(sequenceLength: number, sequenceBasis: string): string {
12
+ const AXOLABS_MAP = axolabsStyleMap;
13
+ const uniqueSymbols = AXOLABS_MAP[sequenceBasis].symbols.join('');
14
+ return uniqueSymbols.repeat(Math.floor(sequenceLength / 4)) + uniqueSymbols.slice(0, sequenceLength % 4);
15
+ }
16
+
17
+ export function findDuplicates(data: Int32Array | Float32Array | Float64Array | Uint32Array): number[] {
18
+ return Array.from(new Set(data)).filter((value) => data.indexOf(value) !== data.lastIndexOf(value));
19
+ }
20
+
21
+ export async function isCurrentUserCreatedThisPattern(patternName: string): Promise<boolean> {
22
+ return await grok.dapi.users.current().then((user) => {
23
+ const [firstName, lastName] = getUserName(patternName);
24
+ return (user.firstName !== firstName || user.lastName !== lastName);
25
+ });
26
+ }
27
+
28
+ export function getShortName(patternName: string): string {
29
+ let first = patternName.length + 1;
30
+ for (let i = 0; i < patternName.length; i++) {
31
+ if (patternName[i] === '(') {
32
+ first = i;
33
+ break;
34
+ }
35
+ }
36
+ return patternName.slice(0, first - 1);
37
+ }
38
+
39
+ function getUserName(patternName: string): string[] {
40
+ let first = -1;
41
+ for (let i = 0; i < patternName.length; i++) {
42
+ if (patternName[i] === '(') {
43
+ first = i;
44
+ break;
45
+ }
46
+ }
47
+ return (first === -1) ? ['', ''] : patternName.slice(first + 9, patternName.length - 1).split(' ').slice(1);
48
+ }
49
+
50
+ export function translateSequence(
51
+ sequence: string,
52
+ bases: DG.InputBase[],
53
+ ptoLinkages: DG.InputBase[],
54
+ startModification: DG.InputBase,
55
+ endModification: DG.InputBase,
56
+ firstPtoExist: boolean): string {
57
+ let i: number = -1;
58
+ let mainSequence = sequence.replace(/[AUGC]/g, function(x: string) {
59
+ i++;
60
+ const AXOLABS_MAP = axolabsStyleMap;
61
+
62
+ const baseChoices: string[] = Object.keys(AXOLABS_MAP);
63
+ // const defaultBase: string = baseChoices[0];
64
+ const indexOfSymbol = AXOLABS_MAP['RNA']['symbols'].indexOf(x);
65
+ let symbol = AXOLABS_MAP[bases[i].value]['symbols'][indexOfSymbol];
66
+ if (isOverhang(bases[i].value)) {
67
+ if (i < sequence.length / 2 && !isOverhang(bases[i + 1].value))
68
+ symbol = symbol + x + 'f';
69
+ else if (i > sequence.length / 2 && !isOverhang(bases[i - 1].value))
70
+ symbol = x + 'f' + symbol;
71
+ }
72
+ return (ptoLinkages[i].value) ? symbol + 's' : symbol;
73
+ });
74
+ if (mainSequence.slice(0, 5).split('mU').length === 3)
75
+ mainSequence = '(uu)' + mainSequence.slice(4);
76
+ if (mainSequence.slice(mainSequence.length - 7).split('mU').length === 3)
77
+ mainSequence = mainSequence.slice(0, mainSequence.length - 4) + '(uu)';
78
+ return startModification.value + (firstPtoExist ? 's' : '') + mainSequence + endModification.value;
79
+ }
80
+
81
+ export function addColumnWithIds(tableName: string, columnName: string, patternName: string) {
82
+ const nameOfNewColumn = 'ID ' + patternName;
83
+ const columns = grok.shell.table(tableName).columns;
84
+ if (columns.contains(nameOfNewColumn))
85
+ columns.remove(nameOfNewColumn);
86
+ const columnWithIds = columns.byName(columnName);
87
+ return columns.addNewString(nameOfNewColumn).init((i: number) => {
88
+ return (columnWithIds.getString(i) === '') ? '' : columnWithIds.get(i) + '_' + patternName;
89
+ });
90
+ }
91
+
92
+ export function addColumnWithTranslatedSequences(
93
+ tableName: string,
94
+ columnName: string,
95
+ bases: DG.InputBase[],
96
+ ptoLinkages: DG.InputBase[],
97
+ startModification: DG.InputBase,
98
+ endModification: DG.InputBase,
99
+ firstPtoExist: boolean) {
100
+ const nameOfNewColumn = 'Axolabs ' + columnName;
101
+ const columns = grok.shell.table(tableName).columns;
102
+ if (columns.contains(nameOfNewColumn))
103
+ columns.remove(nameOfNewColumn);
104
+ const columnWithInputSequences = columns.byName(columnName);
105
+ return columns.addNewString(nameOfNewColumn).init((i: number) => {
106
+ return columnWithInputSequences.getString(i) === '' ?
107
+ '' :
108
+ translateSequence(columnWithInputSequences.getString(i), bases, ptoLinkages, startModification, endModification,
109
+ firstPtoExist);
110
+ });
111
+ }
@@ -0,0 +1,33 @@
1
+ export const DEFAULT_PTO: boolean = true;
2
+ export const DEFAULT_SEQUENCE_LENGTH: number = 23;
3
+ export const MAX_SEQUENCE_LENGTH: number = 35;
4
+ export const USER_STORAGE_KEY: string = 'SequenceTranslator';
5
+ export const EXAMPLE_MIN_WIDTH: string = '400px';
6
+
7
+ export const enum JSON_FIELD {
8
+ SS_BASES = 'ssBases',
9
+ AS_BASES = 'asBases',
10
+ SS_PTO = 'ssPtoLinkages',
11
+ AS_PTO = 'asPtoLinkages',
12
+ SS_3 = 'ssThreeModification',
13
+ SS_5 = 'ssFiveModification',
14
+ AS_3 = 'asThreeModification',
15
+ AS_5 = 'asFiveModification',
16
+ COMMENT = 'comment',
17
+ };
18
+
19
+ export const SS = 'SS' as const;
20
+ export const AS = 'AS' as const;
21
+ export const STRANDS = [SS, AS];
22
+ export const STRAND_NAME = {
23
+ [SS]: 'Sense Strand',
24
+ [AS]: 'Antisense Strand',
25
+ }
26
+
27
+ export const THREE_PRIME = 'THREE_PRIME' as const;
28
+ export const FIVE_PRIME = 'FIVE_PRIME' as const;
29
+ export const TERMINAL_KEYS = [THREE_PRIME, FIVE_PRIME];
30
+ export const TERMINAL = {
31
+ [THREE_PRIME]: 3,
32
+ [FIVE_PRIME]: 5,
33
+ }
@@ -1,4 +1,4 @@
1
- import {NUCLEOTIDES} from '../hardcode-to-be-eliminated/map';
1
+ import {NUCLEOTIDES} from '../const';
2
2
  import {isOverhang, svg, textWidth, countOverhangsOnTheRightEdge, baseColor, textInsideCircle,
3
3
  fontColorVisibleOnBackground, isOneDigitNumber} from './helpers';
4
4
 
@@ -1,8 +1,8 @@
1
- import {AXOLABS_MAP} from '../hardcode-to-be-eliminated/constants';
2
- import {NUCLEOTIDES} from '../hardcode-to-be-eliminated/map';
1
+ import {NUCLEOTIDES} from '../const';
2
+ import {axolabsStyleMap} from '../data-loading-utils/json-loader';
3
3
 
4
4
  export function isOverhang(modification: string): boolean {
5
- return modification.slice(-3) == '(o)';
5
+ return modification.slice(-3) === '(o)';
6
6
  }
7
7
 
8
8
  export function isOneDigitNumber(n: number): boolean {
@@ -21,7 +21,7 @@ export function getPointsToDrawStar(centerX: number, centerY: number): string {
21
21
 
22
22
  let points = '';
23
23
  for (let i = 0; i < totalNumberOfPoints; i++) {
24
- const r = (i % 2 == 0) ? outerRadius : innerRadius;
24
+ const r = (i % 2 === 0) ? outerRadius : innerRadius;
25
25
  const currentX = centerX + Math.cos(i * angle + angleOffsetToCenterStar) * r;
26
26
  const currentY = centerY + Math.sin(i * angle + angleOffsetToCenterStar) * r;
27
27
  points += `${currentX},${currentY} `;
@@ -33,7 +33,7 @@ export function countOverhangsOnTheRightEdge(modifications: string[]): number {
33
33
  let i = 0;
34
34
  while (i < modifications.length && isOverhang(modifications[i]))
35
35
  i++;
36
- return (i == modifications.length - 1) ? 0 : i;
36
+ return (i === modifications.length - 1) ? 0 : i;
37
37
  }
38
38
 
39
39
  export function textWidth(text: string, font: number): number {
@@ -49,11 +49,13 @@ export function textInsideCircle(bases: string[], index: number): string {
49
49
  }
50
50
 
51
51
  export function fontColorVisibleOnBackground(base: string): string {
52
+ const AXOLABS_MAP = axolabsStyleMap;
52
53
  const rgbIntList = AXOLABS_MAP[base].color.match(/\d+/g)!.map((e) => Number(e));
53
54
  return (rgbIntList[0] * 0.299 + rgbIntList[1] * 0.587 + rgbIntList[2] * 0.114) > 186 ? '#33333' : '#ffffff';
54
55
  }
55
56
 
56
57
  export function baseColor(base: string): string {
58
+ const AXOLABS_MAP = axolabsStyleMap;
57
59
  return AXOLABS_MAP[base].color;
58
60
  }
59
61
 
@@ -0,0 +1,19 @@
1
+ /* Do not change these import lines to match external modules in webpack configuration */
2
+ import * as grok from 'datagrok-api/grok';
3
+ import * as ui from 'datagrok-api/ui';
4
+ import * as DG from 'datagrok-api/dg';
5
+
6
+ export const DELIMITER = ';'; // what is the need for this?
7
+ export const NUCLEOTIDES = ['A', 'G', 'C', 'U', 'T'];
8
+
9
+ export const TECHNOLOGIES = {
10
+ DNA: 'DNA',
11
+ RNA: 'RNA',
12
+ ASO_GAPMERS: 'ASOGapmers',
13
+ SI_RNA: 'siRNA',
14
+ };
15
+
16
+ export enum DEFAULT_FORMATS {
17
+ HELM = 'HELM',
18
+ AXOLABS = 'Axolabs',
19
+ }
@@ -0,0 +1,8 @@
1
+ export const LIB_PATH = 'System:AppData/SequenceTranslator';
2
+ export const DEFAULT_LIB_FILENAME = 'monomer-lib.json';
3
+
4
+ export const APP_PATH = 'System:AppData/SequenceTranslator';
5
+ export const AXOLABS_STYLE_FILENAME = 'axolabs-style.json';
6
+ export const CODES_TO_HELM_DICT_FILENAME = 'formats-to-helm.json';
7
+ export const CODES_TO_SYMBOLS_FILENAME = 'codes-to-symbols.json';
8
+ export const MONOMERS_WITH_PHOSPHATE_LINKERS = 'linkers.json';
@@ -0,0 +1,38 @@
1
+ /* Do not change these import lines to match external modules in webpack configuration */
2
+ import * as grok from 'datagrok-api/grok';
3
+ import * as ui from 'datagrok-api/ui';
4
+ import * as DG from 'datagrok-api/dg';
5
+
6
+ import {APP_PATH, AXOLABS_STYLE_FILENAME, CODES_TO_HELM_DICT_FILENAME, CODES_TO_SYMBOLS_FILENAME, MONOMERS_WITH_PHOSPHATE_LINKERS} from './const';
7
+ import {AxolabsStyle, FormatToHELMDict, CodeToSymbol} from './types';
8
+
9
+ const fileSource = new DG.FileSource(APP_PATH);
10
+
11
+ export let axolabsStyleMap: AxolabsStyle;
12
+ export let codesToHelmDictionary: FormatToHELMDict;
13
+ export let codesToSymbolsDictionary: CodeToSymbol;
14
+ export let monomersWithPhosphateLinkers: {[key: string]: string[]};
15
+
16
+ export async function getJsonData(): Promise<void> {
17
+ const data = [axolabsStyleMap, codesToHelmDictionary, codesToSymbolsDictionary, monomersWithPhosphateLinkers];
18
+
19
+ if (data.every((item) => item !== undefined))
20
+ return;
21
+
22
+ axolabsStyleMap = await parse(AXOLABS_STYLE_FILENAME);
23
+ codesToHelmDictionary = await parse(CODES_TO_HELM_DICT_FILENAME);
24
+ codesToSymbolsDictionary = await parse(CODES_TO_SYMBOLS_FILENAME);
25
+ monomersWithPhosphateLinkers = await parse(MONOMERS_WITH_PHOSPHATE_LINKERS);
26
+ }
27
+
28
+ async function parse(path: string): Promise<any> {
29
+ let parsedJson: string;
30
+ try {
31
+ parsedJson = JSON.parse(await fileSource.readAsText(path))
32
+ } catch (err: any) {
33
+ const errMsg: string = err.hasOwnProperty('message') ? err.message : err.toString();
34
+ throw new Error(`Error loading json from ${path}:` + errMsg);
35
+ }
36
+ return parsedJson;
37
+ }
38
+
@@ -0,0 +1,30 @@
1
+ type KeyToValue = {[key: string]: string};
2
+
3
+ export type Edges = {
4
+ [key: string]: KeyToValue
5
+ }
6
+
7
+ export type AxolabsStyle = {
8
+ [index: string]: {
9
+ fullName: string,
10
+ symbols: string[],
11
+ color: string,
12
+ }
13
+ };
14
+
15
+ export type CodesInfo = {
16
+ [key: string]: { // nucleoside or phosphate
17
+ [code: string]: string
18
+ }
19
+ }
20
+
21
+
22
+ export type FormatToHELMDict = {
23
+ [sourceFormat: string]: CodesInfo
24
+ }
25
+
26
+ export type CodeToSymbol = {
27
+ [format: string]: {
28
+ [code: string]: string
29
+ }
30
+ }
@@ -0,0 +1,8 @@
1
+ export const GROUP_TYPE = {
2
+ NUCLEOSIDE: 'nucleoside',
3
+ LINKAGE: 'phosphateBackbone',
4
+ } as const;
5
+
6
+ export const PHOSPHATE_SYMBOL = 'p';
7
+
8
+ export const UNKNOWN_SYMBOL = '<?>';
@@ -0,0 +1,48 @@
1
+ import {DEFAULT_FORMATS, NUCLEOTIDES} from '../const';
2
+ import {UNKNOWN_SYMBOL} from './const';
3
+ import {FormatConverter} from './format-converter';
4
+ import {codesToHelmDictionary} from '../data-loading-utils/json-loader';
5
+ import {MonomerLibWrapper} from '../monomer-lib/lib-wrapper';
6
+
7
+ export function getTranslatedSequences(sequence: string, indexOfFirstInvalidChar: number, sourceFormat: string): {[key: string]: string} {
8
+ const supportedFormats = Object.keys(codesToHelmDictionary).concat([DEFAULT_FORMATS.HELM]) as string[];
9
+
10
+ if (!sequence || (indexOfFirstInvalidChar !== -1 && sourceFormat !== DEFAULT_FORMATS.HELM))
11
+ return {};
12
+
13
+ if (!supportedFormats.includes(sourceFormat))
14
+ throw new Error(`${sourceFormat} format is not supported by SequenceTranslator`)
15
+
16
+ const outputFormats = supportedFormats.filter((el) => el != sourceFormat)
17
+ .sort((a, b) => a.localeCompare(b));
18
+ const converter = new FormatConverter(sequence, sourceFormat);
19
+ const result = Object.fromEntries(
20
+ outputFormats.map((format) => {
21
+ let translation;
22
+ try {
23
+ translation = converter.convertTo(format);
24
+ } catch {
25
+ translation = null;
26
+ }
27
+ return [format, translation];
28
+ }).filter(([format, translation]) => translation)
29
+ )
30
+ const nucleotides = getNucleotidesSequence(result[DEFAULT_FORMATS.HELM], MonomerLibWrapper.getInstance());
31
+ if (nucleotides)
32
+ result['Nucleotides'] = nucleotides;
33
+ return result;
34
+ }
35
+
36
+ function getNucleotidesSequence(helmString: string, monomerLib: MonomerLibWrapper): string | null {
37
+ const re = new RegExp('\\([^()]*\\)', 'g');
38
+ const branches = helmString.match(re);
39
+ if (!branches)
40
+ return null;
41
+ const nucleotides = branches!.map((branch) => {
42
+ const stripped = branch.replace(/[\[\]()]/g, '');
43
+ if (NUCLEOTIDES.includes(stripped))
44
+ return stripped;
45
+ return monomerLib.getNaturalAnalogBySymbol(stripped);
46
+ }).map((el) => el ? el : UNKNOWN_SYMBOL).join('');
47
+ return nucleotides;
48
+ }
@@ -0,0 +1,107 @@
1
+ import * as DG from 'datagrok-api/dg';
2
+ import {DEFAULT_FORMATS} from '../const';
3
+ import {GROUP_TYPE, PHOSPHATE_SYMBOL, UNKNOWN_SYMBOL} from './const';
4
+ import {CodesInfo} from '../data-loading-utils/types';
5
+ import {codesToHelmDictionary} from '../data-loading-utils/json-loader';
6
+
7
+ const HELM_WRAPPER = {
8
+ LEFT: 'RNA1{',
9
+ RIGHT: '}$$$$',
10
+ };
11
+
12
+ export class FormatConverter {
13
+ constructor(private readonly sequence: string, private readonly sourceFormat: string) { };
14
+
15
+ convertTo(targetFormat: string): string {
16
+ const formats = Object.keys(codesToHelmDictionary);
17
+
18
+ if (this.sourceFormat === DEFAULT_FORMATS.HELM && formats.includes(targetFormat))
19
+ return helmToFormat(this.sequence, targetFormat);
20
+ else if (formats.includes(this.sourceFormat) && targetFormat === DEFAULT_FORMATS.HELM)
21
+ return formatToHelm(this.sequence, this.sourceFormat);
22
+ else if ([this.sourceFormat, targetFormat].every((el) => formats.includes(el))) {
23
+ const helm = formatToHelm(this.sequence, this.sourceFormat);
24
+ return helmToFormat(helm, targetFormat);
25
+ }
26
+ else {
27
+ throw new Error (`ST: unsupported translation direction ${this.sourceFormat} -> ${targetFormat}`);
28
+ }
29
+ }
30
+ }
31
+
32
+ function getRegExpPattern(arr: string[]): string {
33
+ const negativeLookBehind = '(?<!\\([^()]*)'; // not '(' followed by non-parenths
34
+ const negativeLookAhead = '(?![^()]*\\))'; // not ')' preceded by non-parenths
35
+ const escaped = arr.map((key) => key.replace(/[.*+?^${}()|[\]\\]/g, '\\$&'))
36
+ .map((key) => {
37
+ if (!key.includes('(') && !key.includes(')'))
38
+ return `${negativeLookBehind}${key}${negativeLookAhead}`;
39
+ return key;
40
+ });
41
+ const result = escaped.join('|');
42
+ return result;
43
+ }
44
+
45
+ function sortCallback(a: string, b: string) {return b.length - a.length};
46
+
47
+ function getHelmToCodeDict(infoObj: CodesInfo) {
48
+ const result: {[key: string]: string | string[]} = {};
49
+ Object.values(infoObj).forEach((obj: {[code: string]: string}) => {
50
+ Object.entries(obj).forEach(([code, helm]) => {
51
+ const key = helm.replace(/\)p/g, ')');
52
+ if (result[key] === undefined) {
53
+ result[key] = [code];
54
+ } else {
55
+ (result[key] as string[]).push(code);
56
+ }
57
+ })
58
+ });
59
+ Object.entries(result).forEach(([key, value]) => {
60
+ const sorted = (value as string[]).sort(sortCallback);
61
+ result[key] = sorted[0] as string;
62
+ })
63
+ return result as {[key: string]: string};
64
+ }
65
+
66
+ function helmToFormat(helmSequence: string, targetFormat: string): string {
67
+ const codesInfoObject = codesToHelmDictionary[targetFormat] as CodesInfo;
68
+ const dict = getHelmToCodeDict(codesInfoObject);
69
+ const wrapperRegExp = new RegExp(getRegExpPattern(Object.values(HELM_WRAPPER)), 'g')
70
+ let result = helmSequence.replace(wrapperRegExp, '');
71
+
72
+ const helmCodes = Object.keys(dict)
73
+ .sort(sortCallback);
74
+ const helmRegExp = new RegExp(getRegExpPattern(helmCodes) + '|.', 'g');
75
+ result = result.replace(helmRegExp, (match) => {
76
+ return helmCodes.includes(match) ? dict[match] :
77
+ (match === 'p' || match === '.') ? match : '?';
78
+ }).replace(/\?+/g, UNKNOWN_SYMBOL).replace(/p\.|\./g, '');
79
+ // remove double slash in LCMS codes
80
+ result = result.replace(/\/\//g, '/');
81
+ return result;
82
+ }
83
+
84
+ function formatToHelm(sequence: string, sourceFormat: string): string {
85
+ const codesInfoObject = codesToHelmDictionary[sourceFormat] as CodesInfo;
86
+ const dict = Object.assign({}, ...Object.values(codesInfoObject)) as {[code: string]: string};
87
+
88
+ const formatCodes = Object.keys(dict).sort(sortCallback);
89
+ const formatRegExp = new RegExp(getRegExpPattern(formatCodes) + '|\\([^()]*\\)|.', 'g'); // the added group before '|.' is to avoid mismatch inside parenths
90
+
91
+ const phosphateHELMCodes = Array.from(
92
+ new Set(Object.values(codesInfoObject[GROUP_TYPE.LINKAGE]))
93
+ ).sort(sortCallback);
94
+ const phosphateHELMPattern = getRegExpPattern(phosphateHELMCodes);
95
+ const phosphateRegExp = new RegExp(`${PHOSPHATE_SYMBOL}\.(${phosphateHELMPattern})`, 'g');
96
+
97
+ let helm = sequence.replace(formatRegExp, (match) => {
98
+ const result = formatCodes.includes(match) ? dict[match] + '.' : '?';
99
+ return result;
100
+ });
101
+ helm = helm.replace(/\?+/g, `${UNKNOWN_SYMBOL}.`);
102
+ helm = helm.slice(0, -1); // strip last dot
103
+ if (helm[helm.length - 1] === PHOSPHATE_SYMBOL)
104
+ helm = helm.slice(0, -1);
105
+ helm = helm.replace(phosphateRegExp, (match, group) => group);
106
+ return `${HELM_WRAPPER.LEFT + helm + HELM_WRAPPER.RIGHT}`;
107
+ }
@@ -0,0 +1,12 @@
1
+ import * as DG from 'datagrok-api/dg';
2
+
3
+ export function sortByReverseLength(array: string[]): string[] {
4
+ return array.sort((a, b) => b.length - a.length);
5
+ }
6
+
7
+ export function download(name: string, href: string): void {
8
+ const element = document.createElement('a');
9
+ element.setAttribute('href', 'data:text/plain;charset=utf-8,' + href);
10
+ element.setAttribute('download', name);
11
+ element.click();
12
+ }
@@ -0,0 +1,3 @@
1
+ export const enum META_FIELDS {
2
+ MOLWEIGHT = 'molecularWeight',
3
+ }
@@ -0,0 +1,106 @@
1
+ /* Do not change these import lines to match external modules in webpack configuration */
2
+ import * as grok from 'datagrok-api/grok';
3
+ import * as ui from 'datagrok-api/ui';
4
+ import * as DG from 'datagrok-api/dg';
5
+
6
+ import {_package} from '../../package';
7
+
8
+ import {IMonomerLib, Monomer} from '@datagrok-libraries/bio/src/types';
9
+
10
+ import {HELM_REQUIRED_FIELDS as REQ, HELM_OPTIONAL_FIELDS as OPT} from '@datagrok-libraries/bio/src/utils/const';
11
+ import {META_FIELDS as MET} from './const';
12
+ import {codesToSymbolsDictionary} from '../../model/data-loading-utils/json-loader';
13
+
14
+ export class MonomerLibWrapper {
15
+ private constructor() {
16
+ const lib = _package.monomerLib;
17
+ if (lib === null)
18
+ throw new Error('SequenceTranslator: monomer library is null');
19
+ this.lib = lib!;
20
+ this.allMonomers = this.getAllMonomers();
21
+ }
22
+
23
+ private lib: IMonomerLib;
24
+ private static instance?: MonomerLibWrapper;
25
+ private allMonomers: Monomer[];
26
+
27
+ private formatMonomerForViewer(sourceObj: Monomer): {[key: string]: string} {
28
+ const formattedObject: {[key: string]: string} = {};
29
+ formattedObject[REQ.NAME] = sourceObj[REQ.SYMBOL];
30
+ formattedObject[REQ.SYMBOL] = sourceObj[REQ.SYMBOL];
31
+ formattedObject[REQ.MOLFILE] = sourceObj[REQ.MOLFILE];
32
+
33
+ return formattedObject;
34
+ }
35
+
36
+ private getAllMonomers(): Monomer[] {
37
+ const polymerTypes = this.lib.getPolymerTypes();
38
+ let result: Monomer[] = [];
39
+ for (const polymerType of polymerTypes) {
40
+ const monomerSymbols = this.lib.getMonomerSymbolsByType(polymerType);
41
+ const monomersByType: Monomer[] = monomerSymbols
42
+ .map((monomerSymbol) => this.lib.getMonomer(polymerType, monomerSymbol))
43
+ .filter((monomer): monomer is Monomer => monomer !== null);
44
+ result = result.concat(monomersByType);
45
+ }
46
+ return result;
47
+ }
48
+
49
+ private getMonomer(monomerSymbol: string): Monomer {
50
+ const monomer = this.lib.getMonomer('RNA', monomerSymbol);
51
+ if (monomer === undefined)
52
+ throw new Error(`SequenceTranslator: no monomer with symbol ${monomerSymbol}`);
53
+ return monomer!;
54
+ }
55
+
56
+ static getInstance(): MonomerLibWrapper {
57
+ if (MonomerLibWrapper.instance === undefined)
58
+ MonomerLibWrapper.instance = new MonomerLibWrapper();
59
+ return MonomerLibWrapper.instance!;
60
+ }
61
+
62
+ getMolfileBySymbol(monomerSymbol: string): string {
63
+ const monomer = this.getMonomer(monomerSymbol);
64
+ return monomer.molfile;
65
+ }
66
+
67
+ getNaturalAnalogBySymbol(monomerSymbol: string): string {
68
+ const monomer = this.getMonomer(monomerSymbol);
69
+ const naturalAnalog = monomer.naturalAnalog;
70
+ if (!naturalAnalog)
71
+ throw new Error(`ST: no natural analog for ${monomerSymbol}`);
72
+ return naturalAnalog!;
73
+ }
74
+
75
+ // todo: a better criterion
76
+ isModification(monomerSymbol: string): boolean {
77
+ const molfile = this.getMolfileBySymbol(monomerSymbol);
78
+ return (molfile.includes('MODIFICATION')) ? true : false;
79
+ }
80
+
81
+ getCodeToSymbolMap(format: string): Map<string, string> {
82
+ return new Map<string, string>(Object.entries(codesToSymbolsDictionary[format]));
83
+ }
84
+
85
+ getCodesByFormat(format: string): string[] {
86
+ return Object.keys(codesToSymbolsDictionary[format]);
87
+ }
88
+
89
+ getTableForViewer(): DG.DataFrame {
90
+ const formattedObjects = this.allMonomers.map((monomer) => this.formatMonomerForViewer(monomer));
91
+ const df = DG.DataFrame.fromObjects(formattedObjects)!;
92
+ return df;
93
+ }
94
+
95
+ getCodesToWeightsMap(): Map<string, number> {
96
+ const codesToWeightsMap = new Map<string, number>();
97
+ Object.entries(codesToSymbolsDictionary).forEach(([_, dict]) => {
98
+ Object.entries(dict).forEach(([code, monomerSymbol]) => {
99
+ const monomer = this.getMonomer(monomerSymbol);
100
+ const weight = monomer[OPT.META]?.[MET.MOLWEIGHT];
101
+ codesToWeightsMap.set(code, weight);
102
+ });
103
+ });
104
+ return codesToWeightsMap;
105
+ }
106
+ }