@datagrok/bio 1.5.7 → 1.5.8

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (36) hide show
  1. package/css/helm.css +3 -0
  2. package/detectors.js +9 -10
  3. package/dist/package-test.js +1095 -416
  4. package/dist/package.js +898 -250
  5. package/files/samples/sample_FASTA.csv +66 -66
  6. package/helm/JSDraw/Pistoia.HELM-uncompressed.js +9694 -0
  7. package/helm/JSDraw/Pistoia.HELM.js +27 -0
  8. package/helm/JSDraw/ReadMe.txt +8 -0
  9. package/helm/JSDraw/Scilligence.JSDraw2.Lite-uncompressed.js +31126 -0
  10. package/helm/JSDraw/Scilligence.JSDraw2.Lite.js +12 -0
  11. package/helm/JSDraw/Scilligence.JSDraw2.Resources.js +762 -0
  12. package/helm/JSDraw/dojo.js +250 -0
  13. package/helm/JSDraw/test.html +21 -0
  14. package/package.json +8 -1
  15. package/src/monomer-library.ts +199 -0
  16. package/src/package-test.ts +2 -0
  17. package/src/package.ts +41 -13
  18. package/src/tests/convert-test.ts +143 -22
  19. package/src/tests/detectors-test.ts +97 -156
  20. package/src/tests/renderer-test.ts +36 -0
  21. package/src/tests/splitter-test.ts +22 -0
  22. package/src/tests/types.ts +7 -0
  23. package/src/utils/atomic-works.ts +218 -97
  24. package/src/utils/cell-renderer.ts +214 -0
  25. package/src/utils/chem-palette.ts +280 -0
  26. package/src/utils/convert.ts +25 -16
  27. package/src/utils/misc.ts +29 -0
  28. package/src/utils/multiple-sequence-alignment.ts +1 -1
  29. package/src/utils/notation-converter.ts +120 -84
  30. package/src/utils/sequence-activity-cliffs.ts +2 -2
  31. package/src/utils/types.ts +13 -0
  32. package/src/utils/utils.ts +35 -30
  33. package/test-Bio-34f75e5127b8-c4c5a3dc.html +259 -0
  34. package/files/sample_FASTA.csv +0 -66
  35. package/files/sample_FASTA_with_activities.csv +0 -66
  36. package/files/sample_MSA.csv +0 -541
@@ -0,0 +1,199 @@
1
+ export type MonomerEntry = {
2
+ mol: string,
3
+ type: string,
4
+ analogueCode: string,
5
+ linkages: { [link: string]: { atomNumber: number, type: string } }
6
+ };
7
+ export type MonomerEntries = { [name: string]: MonomerEntry };
8
+ export type LinkData = { [link: string]: { atomNumber: number, type: string } };
9
+
10
+ /** HELM associated sdf libraries with monomer processing*/
11
+ export class MonomerLibrary {
12
+ static libName = 'monomerLibrary';
13
+
14
+ private monomerFields: string[] = [
15
+ 'molecule', 'MonomerType', 'MonomerNaturalAnalogCode', 'MonomerName', 'MonomerCode', 'MonomerCaps', 'BranchMonomer',
16
+ ];
17
+
18
+ private library: MonomerEntries = {};
19
+
20
+ private monomers: string[] = [];
21
+
22
+ constructor(sdf: string) {
23
+ const sdfReader = new SDFReader();
24
+ const data = sdfReader.getColls(sdf);
25
+ this.monomerFields.forEach((f) => {
26
+ if (!(f in data))
27
+ throw new Error(`Monomer library was not compiled: ${f} field is absent in provided file`);
28
+
29
+ if (data[f].length != data.molecule.length)
30
+ throw new Error(`Monomer library was not compiled: ${f} field is not presented for each monomer`);
31
+ });
32
+
33
+ for (let i = 0; i < data.molecule.length; i++) {
34
+ const linkData = this.getLinkData(data.molecule[i], data.MonomerCaps[i], data.MonomerName[i]);
35
+ const entry = {
36
+ mol: data.molecule[i],
37
+ type: 'Peptide',
38
+ code: data.MonomerCode[i],
39
+ analogueCode: data.MonomerNaturalAnalogCode[i],
40
+ linkages: linkData,
41
+ };
42
+
43
+ const name = data.MonomerCode[i] !== '.' ? data.MonomerCode[i] : data.MonomerName[i];
44
+ this.library[name] = entry;
45
+ this.monomers.push(name);
46
+ }
47
+ }
48
+
49
+ /** getting full monomer information from monomer library
50
+ * @param {string} name
51
+ * @return {MonomerEntry}
52
+ */
53
+ public getMonomerEntry(name: string): MonomerEntry {
54
+ if (!this.monomers.includes(name))
55
+ throw new Error(`Monomer library do not contain ${name} monomer`);
56
+
57
+ return this.library[name];
58
+ }
59
+
60
+ /** getting mol as string for monomer
61
+ * @param {string} name
62
+ * @return {string}
63
+ */
64
+ public getMonomerMol(name: string): string {
65
+ if (!this.monomers.includes(name))
66
+ throw new Error(`Monomer library do not contain ${name} monomer`);
67
+
68
+
69
+ const entry = this.library[name];
70
+ let monomerMol = entry.mol.replace(/M RGP .+\n/, '');
71
+
72
+ //order matters
73
+ const links = Object.keys(entry.linkages);
74
+ for (const link of links)
75
+ monomerMol = monomerMol.replace('R#', entry.linkages[link].type + ' ');
76
+
77
+
78
+ return monomerMol;
79
+ }
80
+
81
+ /** getting the list of the minomers available in library*/
82
+ get monomerNames(): string[] {
83
+ return this.monomers;
84
+ }
85
+
86
+ static get id(): string {
87
+ return MonomerLibrary.libName;
88
+ }
89
+
90
+ private getLinkData(mol: string, caps: string, name: string): LinkData {
91
+ const rawData = mol.match(/M RGP .+/);
92
+ if (rawData === null)
93
+ throw new Error(`Monomer library was not compiled: ${name} entry has no RGP`);
94
+
95
+ const types: { [code: string]: string } = {};
96
+ caps.split('\n')?.forEach((e) => {
97
+ types[e.match(/\d+/)![0]] = e.match(/(?<=\])\w+/)![0];
98
+ });
99
+
100
+ const data = rawData[0].replace('M RGP ', '').split(/\s+/);
101
+ const res: LinkData = {};
102
+ for (let i = 0; i < parseInt(data[0]); i++) {
103
+ const code = parseInt(data[2 * i + 2]);
104
+ let type = '';
105
+ switch (code) {
106
+ case 1:
107
+ type = 'N-terminal';
108
+ break;
109
+ case 2:
110
+ type = 'C-terminal';
111
+ break;
112
+ case 3:
113
+ type = 'branch';
114
+ break;
115
+ default:
116
+ break;
117
+ }
118
+ res[type] = {atomNumber: parseInt(data[2 * i + 1]), type: types[code]};
119
+ }
120
+
121
+ return res;
122
+ }
123
+ }
124
+
125
+ //TODO: merge with Chem version
126
+ class SDFReader {
127
+ dataColls: { [_: string]: string [] };
128
+
129
+ constructor() {
130
+ this.dataColls = {'molecule': []};
131
+ }
132
+
133
+ getColls(content: string): { [_: string]: string[] } {
134
+ this.read(content);
135
+ return this.dataColls;
136
+ }
137
+
138
+ read(content: string): void {
139
+ content = content.replaceAll('\r', ''); //equalize old and new sdf standards
140
+ let startIndex = content.indexOf('$$$$', 0);
141
+ this.parse(content, 0, startIndex, (name: string, val: string): void => { // TODO: type
142
+ this.dataColls[name] = [];
143
+ this.dataColls[name].push(val);
144
+ });
145
+ startIndex += 5;
146
+ while (startIndex > -1 && startIndex < content.length)
147
+ startIndex = this.readNext(content, startIndex);
148
+ }
149
+
150
+ readNext(content: string, startIndex: number): number {
151
+ const nextStartIndex = content.indexOf('$$$$', startIndex);
152
+ if (nextStartIndex === -1) {
153
+ return -1;
154
+ } else {
155
+ this.parse(content, startIndex, nextStartIndex,
156
+ (name: string, val: string): void => {
157
+ this.dataColls[name].push(val);
158
+ });
159
+ }
160
+
161
+ if (nextStartIndex > -1)
162
+ return nextStartIndex + 5;
163
+
164
+
165
+ return nextStartIndex;
166
+ }
167
+
168
+ parse(content: string, start: number, end: number, handler: (name: string, val: string) => void): void {
169
+ const molEnd = +content.indexOf('M END\n', start) + 7;
170
+ let localEnd = start;
171
+ this.dataColls['molecule'].push(content.substring(start, molEnd));
172
+
173
+ start = molEnd;
174
+ while (localEnd < end) {
175
+ start = content.indexOf('> <', localEnd);
176
+ if (start === -1)
177
+ return;
178
+
179
+
180
+ start += 3;
181
+ localEnd = content.indexOf('>\n', start);
182
+ if (localEnd === -1)
183
+ return;
184
+
185
+
186
+ const propertyName = content.substring(start, localEnd);
187
+ start = localEnd + 2;
188
+
189
+ localEnd = content.indexOf('\n', start);
190
+ if (localEnd === -1)
191
+ localEnd = end;
192
+ else if (content[localEnd + 1] != '\n')
193
+ localEnd = content.indexOf('\n', localEnd + 1);
194
+
195
+ handler(propertyName, content.substring(start, localEnd));
196
+ localEnd += 2;
197
+ }
198
+ }
199
+ }
@@ -8,6 +8,8 @@ import './tests/detectors-test';
8
8
  import './tests/msa-tests';
9
9
  import './tests/sequence-space-test';
10
10
  import './tests/activity-cliffs-tests';
11
+ import './tests/splitter-test';
12
+ import './tests/renderer-test';
11
13
 
12
14
  export const _package = new DG.Package();
13
15
  export {tests};
package/src/package.ts CHANGED
@@ -16,8 +16,28 @@ import {getEmbeddingColsNames, sequenceSpace} from './utils/sequence-space';
16
16
  import {AvailableMetrics} from '@datagrok-libraries/ml/src/typed-metrics';
17
17
  import {getActivityCliffs} from '@datagrok-libraries/ml/src/viewers/activity-cliffs';
18
18
  import {sequenceGetSimilarities, drawTooltip} from './utils/sequence-activity-cliffs';
19
- import { getMolfilesFromSeq, HELM_CORE_LIB_FILENAME } from './utils/utils';
19
+ import {getMolfilesFromSeq, HELM_CORE_LIB_FILENAME} from './utils/utils';
20
20
  import {getMacroMol} from './utils/atomic-works';
21
+ import {MacromoleculeSequenceCellRenderer} from './utils/cell-renderer';
22
+
23
+ //tags: init
24
+ export async function initBio(): Promise<void> {
25
+ // apparently HELMWebEditor requires dojo to be initialized first
26
+ return new Promise((resolve, reject) => {
27
+ // @ts-ignore
28
+ dojo.ready(function() { resolve(null); });
29
+ });
30
+ }
31
+
32
+
33
+ //name: macromoleculeSequenceCellRenderer
34
+ //tags: cellRenderer
35
+ //meta.cellType: Macromolecule
36
+ //output: grid_cell_renderer result
37
+ export function macromoleculeSequenceCellRenderer(): MacromoleculeSequenceCellRenderer {
38
+ return new MacromoleculeSequenceCellRenderer();
39
+ }
40
+
21
41
 
22
42
  //name: sequenceAlignment
23
43
  //input: string alignType {choices: ['Local alignment', 'Global alignment']}
@@ -109,21 +129,21 @@ export async function sequenceSpaceTopMenu(table: DG.DataFrame, macroMolecule: D
109
129
  }
110
130
  };
111
131
 
112
- //top-menu: Bio | Molfiles From HELM...
113
- //name: Molfiles From HELM
132
+ //top-menu: Bio | To Atomic Level...
133
+ //name: To Atomic Level
114
134
  //description: returns molfiles for each monomer from HELM library
115
135
  //input: dataframe df [Input data table]
116
136
  //input: column sequence {semType: Macromolecule}
117
- export async function molfilesFromHELM(df: DG.DataFrame, sequence: DG.Column): Promise<void> {
137
+ export async function toAtomicLevel(df: DG.DataFrame, sequence: DG.Column): Promise<void> {
118
138
  const monomersLibFile = await _package.files.readAsText(HELM_CORE_LIB_FILENAME);
119
- const monomersLibDf = DG.DataFrame.fromJson(monomersLibFile);
120
- const atomicCodes = getMolfilesFromSeq(sequence, monomersLibDf);
121
-
122
- let result: string[] = [];
123
- for(let i = 0; i < atomicCodes!.length; i++)
124
- result.push(getMacroMol(atomicCodes![i]));
125
-
126
- df.columns.add(DG.Column.fromStrings('regenerated', result));
139
+ const monomersLibObject: any[] = JSON.parse(monomersLibFile);
140
+ const atomicCodes = getMolfilesFromSeq(sequence, monomersLibObject);
141
+ const result = await getMacroMol(atomicCodes!);
142
+
143
+ const col = DG.Column.fromStrings('regenerated', result);
144
+ col.semType = DG.SEMTYPE.MOLECULE;
145
+ col.tags[DG.TAGS.UNITS] = 'molblock';
146
+ df.columns.add(col);
127
147
  }
128
148
 
129
149
 
@@ -131,9 +151,17 @@ export async function molfilesFromHELM(df: DG.DataFrame, sequence: DG.Column): P
131
151
  //name: MSA
132
152
  //input: dataframe table
133
153
  //input: column sequence { semType: Macromolecule }
134
- export async function multipleSequenceAlignmentAny(table: DG.DataFrame, col: DG.Column): Promise<void> {
154
+ //output: column result
155
+ export async function multipleSequenceAlignmentAny(table: DG.DataFrame, col: DG.Column): Promise<DG.Column> {
135
156
  const msaCol = await runKalign(col, false);
136
157
  table.columns.add(msaCol);
158
+
159
+ // This call is required to enable cell renderer activation
160
+ await grok.data.detectSemanticTypes(table);
161
+
162
+ // const tv: DG.TableView = grok.shell.tv;
163
+ // tv.grid.invalidate();
164
+ return msaCol;
137
165
  }
138
166
 
139
167
  //name: Composition Analysis
@@ -1,31 +1,152 @@
1
- import {after, before, category, test, expect, expectObject} from '@datagrok-libraries/utils/src/test';
1
+ import {category, expect, expectArray, test} from '@datagrok-libraries/utils/src/test';
2
2
 
3
3
  import * as grok from 'datagrok-api/grok';
4
- import * as ui from 'datagrok-api/ui';
5
4
  import * as DG from 'datagrok-api/dg';
6
5
 
6
+ import {ConverterFunc, DfReaderFunc} from './types';
7
+ import {NOTATION, NotationConverter} from '../utils/notation-converter';
8
+
7
9
  // import {mmSemType} from '../const';
8
10
  // import {importFasta} from '../package';
9
11
 
10
12
  category('converters', () => {
11
- // test('a', async () => {await _a();});
12
- // test('b', async () => {await _b();});
13
- test('testFastaToSeparator', async () => { await _testFastaToSeparator(); });
14
- test('testSeparatorToFasta', async () => { await _testSeparatorToFasta(); });
15
- });
13
+ enum Samples {
14
+ fastaPt = 'fastaPt',
15
+ separatorPt = 'separatorPt',
16
+ helmPt = 'helmPt',
17
+
18
+ fastaDna = 'fastaDna',
19
+ separatorDna = 'separatorDna',
20
+ helmDna = 'helmDna',
21
+
22
+ fastaRna = 'fastaRna',
23
+ separatorRna = 'separatorRna',
24
+ helmRna = 'helmRna',
25
+ }
26
+
27
+ const _csvTxts: { [key: string]: string } = {
28
+ fastaPt: `seq
29
+ FWPHEY
30
+ YNRQWYV
31
+ MKPSEYV
32
+ `,
33
+ separatorPt: `seq
34
+ F-W-P-H-E-Y
35
+ Y-N-R-Q-W-Y-V
36
+ M-K-P-S-E-Y-V
37
+ `,
38
+ helmPt: `seq
39
+ PEPTIDE1{F.W.P.H.E.Y}$$$
40
+ PEPTIDE1{Y.N.R.Q.W.Y.V}$$$
41
+ PEPTIDE1{M.K.P.S.E.Y.V}$$$
42
+ `,
43
+ fastaDna: `seq
44
+ ACGTC
45
+ CAGTGT
46
+ TTCAAC
47
+ `,
48
+ separatorDna: `seq
49
+ A/C/G/T/C
50
+ C/A/G/T/G/T
51
+ T/T/C/A/A/C
52
+ `,
53
+ helmDna: `seq
54
+ DNA1{D(A)P.D(C)P.D(G)P.D(T)P.D(C)P}$$$
55
+ DNA1{D(C)P.D(A)P.D(G)P.D(T)P.D(G)P.D(T)P}$$$
56
+ DNA1{D(T)P.D(T)P.D(C)P.D(A)P.D(A)P.D(C)P}$$$
57
+ `,
58
+ fastaRna: `seq
59
+ ACGUC
60
+ CAGUGU
61
+ UUCAAC
62
+ `,
63
+ separatorRna: `seq
64
+ A*C*G*U*C
65
+ C*A*G*U*G*U
66
+ U*U*C*A*A*C
67
+ `,
68
+ helmRna: `seq
69
+ RNA1{R(A)P.R(C)P.R(G)P.R(U)P.R(C)P}$$$
70
+ RNA1{R(C)P.R(A)P.R(G)P.R(U)P.R(G)P.R(U)P}$$$
71
+ RNA1{R(U)P.R(U)P.R(C)P.R(A)P.R(A)P.R(C)P}$$$
72
+ `,
73
+ };
74
+
75
+ const _csvDfs: { [key: string]: Promise<DG.DataFrame> } = {};
76
+
77
+ /** Also detects semantic types
78
+ * @param {string} key
79
+ * @return {Promise<DG.DataFrame>}
80
+ */
81
+ function readCsv(key: string): Promise<DG.DataFrame> {
82
+ if (!(key in _csvDfs)) {
83
+ _csvDfs[key] = (async (): Promise<DG.DataFrame> => {
84
+ const csv: string = _csvTxts[key];
85
+ const df: DG.DataFrame = DG.DataFrame.fromCsv(csv);
86
+ await grok.data.detectSemanticTypes(df);
87
+ return df;
88
+ })();
89
+ }
90
+ return _csvDfs[key];
91
+ };
92
+
93
+ function converter(tgtNotation: NOTATION, separator: string | null = null): ConverterFunc {
94
+ return function(srcCol: DG.Column): DG.Column {
95
+ const converter = new NotationConverter(srcCol);
96
+ const resCol = converter.convert(NOTATION.SEPARATOR, separator);
97
+ return resCol;
98
+ };
99
+ };
16
100
 
17
- // export async function _a() {
18
- // expect(1, 1);
19
- // }
20
- //
21
- // export async function _b() {
22
- // expect(1, 2);
23
- // }
24
-
25
- export async function _testFastaToSeparator() {
26
- expect(1, 1);
27
- }
28
-
29
- export async function _testSeparatorToFasta() {
30
- expect(1, 2);
31
- }
101
+ async function _testConvert(srcKey: string, converter: ConverterFunc, tgtKey: string) {
102
+ const srcDf: DG.DataFrame = await readCsv(srcKey);
103
+ const srcCol: DG.Column = srcDf.col('seq')!;
104
+
105
+ const resCol: DG.Column = converter(srcCol);
106
+
107
+ const tgtDf: DG.DataFrame = await readCsv(tgtKey);
108
+ const tgtCol: DG.Column = tgtDf.col('seq')!;
109
+
110
+ expectArray(resCol.toList(), tgtCol.toList());
111
+ }
112
+
113
+ test('testFastaPtToSeparator', async () => {
114
+ await _testConvert(Samples.fastaPt, converter(NOTATION.SEPARATOR, '-'), Samples.separatorPt);
115
+ });
116
+ test('testFastaDnaToSeparator', async () => {
117
+ await _testConvert(Samples.fastaDna, converter(NOTATION.SEPARATOR, '/'), Samples.separatorDna);
118
+ });
119
+ test('testFastaRnaToSeparator', async () => {
120
+ await _testConvert(Samples.fastaRna, converter(NOTATION.SEPARATOR, '*'), Samples.separatorRna);
121
+ });
122
+
123
+ test('testFastaPtToHelm', async () => {
124
+ await _testConvert(Samples.fastaPt, converter(NOTATION.HELM), Samples.helmPt);
125
+ });
126
+ test('testFastaDnaToHelm', async () => {
127
+ await _testConvert(Samples.fastaDna, converter(NOTATION.HELM), Samples.helmDna);
128
+ });
129
+ test('testFastaRnaToHelm', async () => {
130
+ await _testConvert(Samples.fastaDna, converter(NOTATION.HELM), Samples.helmRna);
131
+ });
132
+
133
+ test('testSeparatorPtToFasta', async () => {
134
+ await _testConvert(Samples.separatorPt, converter(NOTATION.FASTA), Samples.fastaPt);
135
+ });
136
+ test('testSeparatorDnaToFasta', async () => {
137
+ await _testConvert(Samples.separatorDna, converter(NOTATION.FASTA), Samples.fastaDna);
138
+ });
139
+ test('testSeparatorDnaToFasta', async () => {
140
+ await _testConvert(Samples.separatorRna, converter(NOTATION.FASTA), Samples.fastaRna);
141
+ });
142
+
143
+ test('testSeparatorPtToHelm', async () => {
144
+ await _testConvert(Samples.separatorRna, converter(NOTATION.HELM), Samples.helmPt);
145
+ });
146
+ test('testSeparatorDnaToHelm', async () => {
147
+ await _testConvert(Samples.separatorRna, converter(NOTATION.HELM), Samples.helmDna);
148
+ });
149
+ test('testSeparatorRnaToHelm', async () => {
150
+ await _testConvert(Samples.separatorRna, converter(NOTATION.HELM), Samples.helmRna);
151
+ });
152
+ });