@datagrok/bio 1.5.10 → 1.6.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/package-test.js +483 -449
- package/dist/package.js +316 -449
- package/files/samples/sample_FASTA.csv +0 -1
- package/files/samples/sample_FASTA.fasta +0 -3
- package/package.json +2 -2
- package/src/package-test.ts +1 -0
- package/src/package.ts +70 -26
- package/src/tests/convert-test.ts +8 -8
- package/src/utils/cell-renderer.ts +15 -48
- package/src/utils/convert.ts +10 -14
- package/src/utils/notation-converter.ts +215 -55
- package/{test-Bio-34f75e5127b8-936bf89b.html → test-Bio-34f75e5127b8-95c6fae9.html} +2 -2
- package/src/utils/chem-palette.ts +0 -280
- package/src/utils/misc.ts +0 -29
|
@@ -63,4 +63,3 @@ UPI0001614A89,37,A0A0A8VBX4; A0A0H3NXY0; A0A0G2N6K7.1 (obsolete); A0A0M1V224.1 (
|
|
|
63
63
|
UPI000169E485,35,A0A153KQI0.1 (obsolete); A0A1J0NK41.1 (obsolete); A0A1V2DF85.1 (obsolete); K7QCB5.1 (obsolete),MHERFLRENRRSPSRVGGAIDAMPAGCGGMVVIGM,-1.5044359483896845
|
|
64
64
|
UPI000169FCE1,39,A0A656IDX3; A0A6C7A0R4; C0Q053; E8XEF8; G5LRF0; G5NFQ1; M7RCS7; A0A021WPB8.1 (obsolete); A0A068IN21.1 (obsolete); A0A068PK03.1 (obsolete); A0A077YJZ3.1 (obsolete); A0A078P440.1 (obsolete); A0A078PGS7.1 (obsolete); A0A080YVC6.1 (obsolete); A0A098GTT7.1 (obsolete); A0A0A8SUG6.1 (obsolete); A0A0B7QMT4.1 (obsolete); A0A0B7QXI3.1 (obsolete); A0A0B7QYQ1.1 (obsolete); A0A0B7RM41.1 (obsolete); A0A0B7RSH2.1 (obsolete); A0A0B7RYE2.1 (obsolete); A0A0B7SSM9.1 (obsolete); A0A0B7STM0.1 (obsolete); A0A0B7T296.1 (obsolete); A0A0B7TX24.1 (obsolete); A0A0B7U1H7.1 (obsolete); A0A0B7U264.1 (obsolete); A0A0B7UC41.1 (obsolete); A0A0B7UME8.1 (obsolete); A0A0B7V6B1.1 (obsolete); A0A0B7VLT0.1 (obsolete); A0A0B7VRU2.1 (obsolete); A0A0B7W472.1 (obsolete); A0A0B7W5J9.1 (obsolete); A0A0B7WUQ0.1 (obsolete); A0A0B8K7B9.1 (obsolete); A0A0C5TXP2.1 (obsolete); A0A0E1Q441.1 (obsolete); A0A0E2MH47.1 (obsolete); A0A0F6DRT0.1 (obsolete); A0A0G2LKZ8.1 (obsolete); A0A0G2NQ03.1 (obsolete); A0A0H3RKU6.1 (obsolete); A0A0N2BI82.1 (obsolete); A0A0N2BNW2.1 (obsolete); A0A0S2WD32.1 (obsolete); A0A1M3ZBY0.1 (obsolete); A0A1M3ZIA1.1 (obsolete); A0A1M4A4L9.1 (obsolete); A0A1M4A739.1 (obsolete); A0A1W2L8M5.1 (obsolete); A0A221AY29.1 (obsolete); A0A486X7G3.1 (obsolete); A0A518UNK4.1 (obsolete); A0A5E4DW86.1 (obsolete); B5C6Q5.1 (obsolete); B5FPF5.1 (obsolete); B5MIR4.1 (obsolete); B5N1F1.1 (obsolete); E7V3X0.1 (obsolete); E8NPX9.1 (obsolete); F2FBP0.1 (obsolete); F2FSL8.1 (obsolete); G5MZY4.1 (obsolete); G7T2W4.1 (obsolete); J1I084.1 (obsolete); J1IMV6.1 (obsolete); J1J8I1.1 (obsolete); J1KGX6.1 (obsolete); J1L3A6.1 (obsolete); J1L833.1 (obsolete); J1LPB8.1 (obsolete); J1NKU7.1 (obsolete); J1PPY6.1 (obsolete); J1S5B6.1 (obsolete); J1VRL7.1 (obsolete); J1WIA4.1 (obsolete); J1WZQ3.1 (obsolete); J2AWK9.1 (obsolete); J2B6K2.1 (obsolete); J2DH55.1 (obsolete); J2DI78.1 (obsolete); J2ENS4.1 (obsolete); J2FR89.1 (obsolete); J2HHJ4.1 (obsolete); J2I9P0.1 (obsolete); K8S348.1 (obsolete); K8S5C1.1 (obsolete); K8S896.1 (obsolete); K8T7U3.1 (obsolete); K8TAY6.1 (obsolete); K8TJL1.1 (obsolete); K8UBJ3.1 (obsolete); K8ULX9.1 (obsolete); K8V3L9.1 (obsolete); K8VAJ8.1 (obsolete); K8VPG1.1 (obsolete); L5W4S4.1 (obsolete); L5WC19.1 (obsolete); L5WHP1.1 (obsolete); L5WP78.1 (obsolete); L5X276.1 (obsolete); L5XDS4.1 (obsolete); L5YA20.1 (obsolete); L5YPV3.1 (obsolete); L5YVT4.1 (obsolete); L5ZH95.1 (obsolete); L5ZKS5.1 (obsolete); L6A2S2.1 (obsolete); L6A7R6.1 (obsolete); L6APT2.1 (obsolete); L6B959.1 (obsolete); L6BV30.1 (obsolete); L6C2V7.1 (obsolete); L6CB61.1 (obsolete); L6CX95.1 (obsolete); L6D2R2.1 (obsolete); L6DYK1.1 (obsolete); L6E2G1.1 (obsolete); L6ECX2.1 (obsolete); L6F2X8.1 (obsolete); L6FBL9.1 (obsolete); L6FXC0.1 (obsolete); L6GAQ7.1 (obsolete); L6GDF3.1 (obsolete); L6GQ18.1 (obsolete); L6GUK0.1 (obsolete); L6HEM7.1 (obsolete); L6HZ42.1 (obsolete); L6IJA8.1 (obsolete); L6IKQ8.1 (obsolete); L6JAX7.1 (obsolete); L6JQW5.1 (obsolete); L6JTZ9.1 (obsolete); L6KHV2.1 (obsolete); L6KJ96.1 (obsolete); L6KPE7.1 (obsolete); L6LIS9.1 (obsolete); L6LRL1.1 (obsolete); L6M479.1 (obsolete); L6MJ49.1 (obsolete); L6MRV4.1 (obsolete); L6N4S1.1 (obsolete); L6NJT7.1 (obsolete); L6NQD7.1 (obsolete); L6NYY9.1 (obsolete); L6PTJ6.1 (obsolete); L6Q5G9.1 (obsolete); L6QG28.1 (obsolete); L6RD28.1 (obsolete); L6RT68.1 (obsolete); L6S8W2.1 (obsolete); L6SG06.1 (obsolete); L6ST86.1 (obsolete); L6TMF4.1 (obsolete); L6TTR9.1 (obsolete); L6U0S5.1 (obsolete); L6UCM0.1 (obsolete); L6UY97.1 (obsolete); L6V2H8.1 (obsolete); L6VEW0.1 (obsolete); L6VGW5.1 (obsolete); L6VM83.1 (obsolete); L6W988.1 (obsolete); L6WWB6.1 (obsolete); L6WY56.1 (obsolete); L6X469.1 (obsolete); L6Y0M1.1 (obsolete); L6YE57.1 (obsolete); L6YI25.1 (obsolete); L6Z0U5.1 (obsolete); L6ZMA5.1 (obsolete); L6ZXN5.1 (obsolete); L9Q2Z1.1 (obsolete); L9Q651.1 (obsolete); L9QWK6.1 (obsolete); L9RTY2.1 (obsolete); L9RZ23.1 (obsolete); L9RZV7.1 (obsolete); L9SPL8.1 (obsolete); L9SUR8.1 (obsolete); L9T0I1.1 (obsolete); L9T3F0.1 (obsolete); M9XM34.1 (obsolete); R7RAX9.1 (obsolete); S4HPD6.1 (obsolete); S4IA20.1 (obsolete); S4IYB2.1 (obsolete); S4JUW3.1 (obsolete); S4KAX3.1 (obsolete); S4KSA9.1 (obsolete); S4KVH5.1 (obsolete); S4LU50.1 (obsolete); S5HC55.1 (obsolete); S5IK76.1 (obsolete); S5V9L5.1 (obsolete); T1YEZ9.1 (obsolete); T2PZY7.1 (obsolete); T2QEG8.1 (obsolete); U1J3E4.1 (obsolete); U1RUM6.1 (obsolete); U3SIJ4.1 (obsolete); U6QI94.1 (obsolete); U6QL14.1 (obsolete); U6VL57.1 (obsolete); U6W0G5.1 (obsolete); U6WAX9.1 (obsolete); U6X5N2.1 (obsolete); U6XU02.1 (obsolete); U6XUE1.1 (obsolete); U6Z0B9.1 (obsolete); V0GUR1.1 (obsolete); V0JCU6.1 (obsolete); V1FK33.1 (obsolete); V1G2K9.1 (obsolete); V1GCR6.1 (obsolete); V1IGN7.1 (obsolete); V1IMQ3.1 (obsolete); V1JBW5.1 (obsolete); V1K0S1.1 (obsolete); V1KS42.1 (obsolete); V1LZZ7.1 (obsolete); V1NPU7.1 (obsolete); V1NX60.1 (obsolete); V1Q9V0.1 (obsolete); V1QHX1.1 (obsolete); V1RCP8.1 (obsolete); V1YX44.1 (obsolete); V2CQS1.1 (obsolete); V2D2U7.1 (obsolete); V2DLF9.1 (obsolete); V2F5F1.1 (obsolete); V2F5Q4.1 (obsolete); V2MGV5.1 (obsolete); V2NI96.1 (obsolete); V2NYL4.1 (obsolete); V2PHB9.1 (obsolete); V5KNP2.1 (obsolete); V5ZLN4.1 (obsolete); V7RZQ1.1 (obsolete); V7WUI4.1 (obsolete); V7X4V6.1 (obsolete); V8M2J1.1 (obsolete); V8ML76.1 (obsolete); X4RML9.1 (obsolete); X5NKI0.1 (obsolete),MFFTQNLWVERQQLIKPFILNVNIIYLKNIIIFFIIGGY,-1.194574836126887
|
|
65
65
|
UPI00016B0142,38,NA,MRDTQCHSADFPVYPNVLAVGVRRALWPLPAQDANRTQ,0.44996359010503817
|
|
66
|
-
UPI000173114C,30,B2L4W4,PADDMPTXRNGPSTTNDSDDFVDPWTVQTG,1.3068624916447211
|
package/package.json
CHANGED
|
@@ -2,7 +2,7 @@
|
|
|
2
2
|
"name": "@datagrok/bio",
|
|
3
3
|
"beta": false,
|
|
4
4
|
"friendlyName": "Bio",
|
|
5
|
-
"version": "1.
|
|
5
|
+
"version": "1.6.0",
|
|
6
6
|
"description": "Bio is a [package](https://datagrok.ai/help/develop/develop#packages) for the [Datagrok](https://datagrok.ai) platform",
|
|
7
7
|
"repository": {
|
|
8
8
|
"type": "git",
|
|
@@ -11,7 +11,7 @@
|
|
|
11
11
|
},
|
|
12
12
|
"dependencies": {
|
|
13
13
|
"@biowasm/aioli": ">=2.4.0",
|
|
14
|
-
"@datagrok-libraries/bio": "^2.
|
|
14
|
+
"@datagrok-libraries/bio": "^2.5.0",
|
|
15
15
|
"@datagrok-libraries/utils": "^1.0.0",
|
|
16
16
|
"@datagrok-libraries/ml": "^2.0.9",
|
|
17
17
|
"cash-dom": "latest",
|
package/src/package-test.ts
CHANGED
package/src/package.ts
CHANGED
|
@@ -19,6 +19,8 @@ import {sequenceGetSimilarities, drawTooltip} from './utils/sequence-activity-cl
|
|
|
19
19
|
import {getMolfilesFromSeq, HELM_CORE_LIB_FILENAME} from './utils/utils';
|
|
20
20
|
import {getMacroMol} from './utils/atomic-works';
|
|
21
21
|
import {MacromoleculeSequenceCellRenderer} from './utils/cell-renderer';
|
|
22
|
+
import {Column} from 'datagrok-api/dg';
|
|
23
|
+
import {SEM_TYPES} from './utils/constants';
|
|
22
24
|
|
|
23
25
|
//tags: init
|
|
24
26
|
export async function initBio(): Promise<void> {
|
|
@@ -38,6 +40,29 @@ export function macromoleculeSequenceCellRenderer(): MacromoleculeSequenceCellRe
|
|
|
38
40
|
return new MacromoleculeSequenceCellRenderer();
|
|
39
41
|
}
|
|
40
42
|
|
|
43
|
+
function checkInputColumn(col: DG.Column, name: string,
|
|
44
|
+
allowedNotations: string[] = [], allowedAlphabets: string[] = []): boolean {
|
|
45
|
+
const units: string = col.getTag(DG.TAGS.UNITS);
|
|
46
|
+
if (col.semType !== DG.SEMTYPE.MACROMOLECULE) {
|
|
47
|
+
grok.shell.warning(name + ' analysis is allowed for Macromolecules semantic type');
|
|
48
|
+
return false;
|
|
49
|
+
} else if (
|
|
50
|
+
(allowedAlphabets.length > 0 &&
|
|
51
|
+
!allowedAlphabets.some((a) => units.toUpperCase().endsWith(a.toUpperCase()))) ||
|
|
52
|
+
(allowedNotations.length > 0 &&
|
|
53
|
+
!allowedNotations.some((n) => units.toUpperCase().startsWith(n.toUpperCase())))
|
|
54
|
+
) {
|
|
55
|
+
const notationAdd = allowedNotations.length == 0 ? 'any notation' :
|
|
56
|
+
(`notation${allowedNotations.length > 1 ? 's' : ''} ${allowedNotations.map((n) => `"${n}"`).join(', ')} `);
|
|
57
|
+
const alphabetAdd = allowedNotations.length == 0 ? 'any alphabet' :
|
|
58
|
+
(`alphabet${allowedAlphabets.length > 1 ? 's' : ''} ${allowedAlphabets.map((a) => `"${a}"`).join(', ')}.`);
|
|
59
|
+
|
|
60
|
+
grok.shell.warning(name + ' analysis is allowed for Macromolecules with ' + notationAdd + ' and ' + alphabetAdd);
|
|
61
|
+
return false;
|
|
62
|
+
}
|
|
63
|
+
|
|
64
|
+
return true;
|
|
65
|
+
}
|
|
41
66
|
|
|
42
67
|
//name: sequenceAlignment
|
|
43
68
|
//input: string alignType {choices: ['Local alignment', 'Global alignment']}
|
|
@@ -73,20 +98,23 @@ export function vdRegionViewer() {
|
|
|
73
98
|
//name: Sequence Activity Cliffs
|
|
74
99
|
//description: detect activity cliffs
|
|
75
100
|
//input: dataframe table [Input data table]
|
|
76
|
-
//input: column
|
|
101
|
+
//input: column macroMolecule {semType: Macromolecule}
|
|
77
102
|
//input: column activities
|
|
78
103
|
//input: double similarity = 80 [Similarity cutoff]
|
|
79
104
|
//input: string methodName { choices:["UMAP", "t-SNE", "SPE"] }
|
|
80
|
-
export async function activityCliffs(df: DG.DataFrame,
|
|
105
|
+
export async function activityCliffs(df: DG.DataFrame, macroMolecule: DG.Column, activities: DG.Column,
|
|
81
106
|
similarity: number, methodName: string): Promise<void> {
|
|
107
|
+
if (!checkInputColumn(macroMolecule, 'Activity Cliffs'))
|
|
108
|
+
return;
|
|
109
|
+
|
|
82
110
|
const axesNames = getEmbeddingColsNames(df);
|
|
83
111
|
const options = {
|
|
84
112
|
'SPE': {cycles: 2000, lambda: 1.0, dlambda: 0.0005},
|
|
85
113
|
};
|
|
86
|
-
const units =
|
|
114
|
+
const units = macroMolecule!.tags[DG.TAGS.UNITS];
|
|
87
115
|
await getActivityCliffs(
|
|
88
116
|
df,
|
|
89
|
-
|
|
117
|
+
macroMolecule,
|
|
90
118
|
axesNames,
|
|
91
119
|
'Activity cliffs',
|
|
92
120
|
activities,
|
|
@@ -110,6 +138,9 @@ export async function activityCliffs(df: DG.DataFrame, sequence: DG.Column, acti
|
|
|
110
138
|
//input: bool plotEmbeddings = true
|
|
111
139
|
export async function sequenceSpaceTopMenu(table: DG.DataFrame, macroMolecule: DG.Column, methodName: string,
|
|
112
140
|
similarityMetric: string = 'Levenshtein', plotEmbeddings: boolean): Promise<void> {
|
|
141
|
+
if (!checkInputColumn(macroMolecule, 'Activity Cliffs'))
|
|
142
|
+
return;
|
|
143
|
+
|
|
113
144
|
const embedColsNames = getEmbeddingColsNames(table);
|
|
114
145
|
const chemSpaceParams = {
|
|
115
146
|
seqCol: macroMolecule,
|
|
@@ -133,16 +164,18 @@ export async function sequenceSpaceTopMenu(table: DG.DataFrame, macroMolecule: D
|
|
|
133
164
|
//name: To Atomic Level
|
|
134
165
|
//description: returns molfiles for each monomer from HELM library
|
|
135
166
|
//input: dataframe df [Input data table]
|
|
136
|
-
//input: column
|
|
137
|
-
export async function toAtomicLevel(df: DG.DataFrame,
|
|
138
|
-
if (DG.Func.find({package: 'Chem', name: 'getRdKitModule'})) {
|
|
167
|
+
//input: column macroMolecule {semType: Macromolecule}
|
|
168
|
+
export async function toAtomicLevel(df: DG.DataFrame, macroMolecule: DG.Column): Promise<void> {
|
|
169
|
+
if (DG.Func.find({package: 'Chem', name: 'getRdKitModule'}).length === 0) {
|
|
139
170
|
grok.shell.warning('Transformation to atomic level requires package "Chem" installed.');
|
|
140
171
|
return;
|
|
141
172
|
}
|
|
173
|
+
if (!checkInputColumn(macroMolecule, 'To Atomic Level'))
|
|
174
|
+
return;
|
|
142
175
|
|
|
143
176
|
const monomersLibFile = await _package.files.readAsText(HELM_CORE_LIB_FILENAME);
|
|
144
177
|
const monomersLibObject: any[] = JSON.parse(monomersLibFile);
|
|
145
|
-
const atomicCodes = getMolfilesFromSeq(
|
|
178
|
+
const atomicCodes = getMolfilesFromSeq(macroMolecule, monomersLibObject);
|
|
146
179
|
const result = await getMacroMol(atomicCodes!);
|
|
147
180
|
|
|
148
181
|
const col = DG.Column.fromStrings('regenerated', result);
|
|
@@ -158,21 +191,8 @@ export async function toAtomicLevel(df: DG.DataFrame, sequence: DG.Column): Prom
|
|
|
158
191
|
//input: column sequence { semType: Macromolecule }
|
|
159
192
|
//output: column result
|
|
160
193
|
export async function multipleSequenceAlignmentAny(table: DG.DataFrame, col: DG.Column): Promise<DG.Column | null> {
|
|
161
|
-
if (col
|
|
162
|
-
grok.shell.warning(`MSA analysis is allowed for semantic type "${DG.SEMTYPE.MACROMOLECULE}" data only.`);
|
|
163
|
-
return null;
|
|
164
|
-
}
|
|
165
|
-
const units: string = col.getTag(DG.TAGS.UNITS);
|
|
166
|
-
const allowedAlphabets = ['DNA', 'RNA', 'PT'];
|
|
167
|
-
const allowedNotations = ['fasta'];
|
|
168
|
-
if (!allowedAlphabets.some((a) => units.toUpperCase().endsWith(a.toUpperCase())) ||
|
|
169
|
-
!allowedNotations.some((n) => units.toUpperCase().startsWith(n.toUpperCase()))) {
|
|
170
|
-
grok.shell.warning('MSA analysis is allowed for ' +
|
|
171
|
-
`notation${allowedNotations.length > 1 ? 's' : ''} ${allowedNotations.map((n) => `"${n}"`).join(', ')} ` +
|
|
172
|
-
'and ' +
|
|
173
|
-
`alphabet${allowedAlphabets.length > 1 ? 's' : ''} ${allowedAlphabets.map((a) => `"${a}"`).join(', ')}.`);
|
|
194
|
+
if (!checkInputColumn(col, 'MSA', ['fasta'], ['DNA', 'RNA', 'PT']))
|
|
174
195
|
return null;
|
|
175
|
-
}
|
|
176
196
|
|
|
177
197
|
const msaCol = await runKalign(col, false);
|
|
178
198
|
table.columns.add(msaCol);
|
|
@@ -199,6 +219,9 @@ export async function compositionAnalysis(): Promise<void> {
|
|
|
199
219
|
return;
|
|
200
220
|
}
|
|
201
221
|
|
|
222
|
+
if (!checkInputColumn(col, 'Composition'))
|
|
223
|
+
return;
|
|
224
|
+
|
|
202
225
|
const allowedNotations: string[] = ['fasta', 'separator'];
|
|
203
226
|
const units = col.getTag(DG.TAGS.UNITS);
|
|
204
227
|
if (!allowedNotations.some((n) => units.toUpperCase().startsWith(n.toUpperCase()))) {
|
|
@@ -224,7 +247,7 @@ function parseMacromolecule(
|
|
|
224
247
|
//name: importFasta
|
|
225
248
|
//description: Opens FASTA file
|
|
226
249
|
//tags: file-handler
|
|
227
|
-
//meta.ext: fasta, fna, ffn, faa, frn, fa
|
|
250
|
+
//meta.ext: fasta, fna, ffn, faa, frn, fa, fst
|
|
228
251
|
//input: string fileContent
|
|
229
252
|
//output: list tables
|
|
230
253
|
export function importFasta(fileContent: string): DG.DataFrame [] {
|
|
@@ -244,13 +267,34 @@ export function importFasta(fileContent: string): DG.DataFrame [] {
|
|
|
244
267
|
const descriptionsArrayCol = DG.Column.fromStrings('description', descriptionsArray);
|
|
245
268
|
const sequenceCol = DG.Column.fromStrings('sequence', sequencesArray);
|
|
246
269
|
sequenceCol.semType = 'Macromolecule';
|
|
247
|
-
|
|
248
270
|
const stats: SeqColStats = WebLogo.getStats(sequenceCol, 5, WebLogo.splitterAsFasta);
|
|
249
271
|
const seqType = stats.sameLength ? 'SEQ.MSA' : 'SEQ';
|
|
272
|
+
|
|
273
|
+
const PeptideFastaAlphabet = new Set([
|
|
274
|
+
'G', 'L', 'Y', 'S', 'E', 'Q', 'D', 'N', 'F', 'A',
|
|
275
|
+
'K', 'R', 'H', 'C', 'V', 'P', 'W', 'I', 'M', 'T',
|
|
276
|
+
]);
|
|
277
|
+
|
|
278
|
+
const DnaFastaAlphabet = new Set(['A', 'C', 'G', 'T']);
|
|
279
|
+
|
|
280
|
+
const RnaFastaAlphabet = new Set(['A', 'C', 'G', 'U']);
|
|
281
|
+
|
|
282
|
+
//const SmilesRawAlphabet = new Set([
|
|
283
|
+
// 'O', 'C', 'c', 'N', 'S', 'F', '(', ')',
|
|
284
|
+
// '1', '2', '3', '4', '5', '6', '7',
|
|
285
|
+
// '+', '-', '@', '[', ']', '/', '\\', '#', '=']);
|
|
286
|
+
|
|
250
287
|
const alphabetCandidates: [string, Set<string>][] = [
|
|
251
|
-
['
|
|
252
|
-
['
|
|
288
|
+
['PT', PeptideFastaAlphabet],
|
|
289
|
+
['DNA', DnaFastaAlphabet],
|
|
290
|
+
['RNA', RnaFastaAlphabet],
|
|
253
291
|
];
|
|
292
|
+
|
|
293
|
+
//const alphabetCandidates: [string, Set<string>][] = [
|
|
294
|
+
// ['NT', new Set(Object.keys(Nucleotides.Names))],
|
|
295
|
+
// ['PT', new Set(Object.keys(Aminoacids.Names))],
|
|
296
|
+
//];
|
|
297
|
+
|
|
254
298
|
// Calculate likelihoods for alphabet_candidates
|
|
255
299
|
const alphabetCandidatesSim: number[] = alphabetCandidates.map(
|
|
256
300
|
(c) => WebLogo.getAlphabetSimilarity(stats.freq, c[1]));
|
|
@@ -44,7 +44,7 @@ PEPTIDE1{M.K.P.S.E.Y.V}$$$
|
|
|
44
44
|
ACGTC
|
|
45
45
|
CAGTGT
|
|
46
46
|
TTCAAC
|
|
47
|
-
|
|
47
|
+
`,
|
|
48
48
|
separatorDna: `seq
|
|
49
49
|
A/C/G/T/C
|
|
50
50
|
C/A/G/T/G/T
|
|
@@ -59,7 +59,7 @@ DNA1{D(T)P.D(T)P.D(C)P.D(A)P.D(A)P.D(C)P}$$$
|
|
|
59
59
|
ACGUC
|
|
60
60
|
CAGUGU
|
|
61
61
|
UUCAAC
|
|
62
|
-
|
|
62
|
+
`,
|
|
63
63
|
separatorRna: `seq
|
|
64
64
|
A*C*G*U*C
|
|
65
65
|
C*A*G*U*G*U
|
|
@@ -90,10 +90,10 @@ RNA1{R(U)P.R(U)P.R(C)P.R(A)P.R(A)P.R(C)P}$$$
|
|
|
90
90
|
return _csvDfs[key];
|
|
91
91
|
};
|
|
92
92
|
|
|
93
|
-
function converter(tgtNotation: NOTATION,
|
|
93
|
+
function converter(tgtNotation: NOTATION, tgtSeparator: string | null = null): ConverterFunc {
|
|
94
94
|
return function(srcCol: DG.Column): DG.Column {
|
|
95
95
|
const converter = new NotationConverter(srcCol);
|
|
96
|
-
const resCol = converter.convert(
|
|
96
|
+
const resCol = converter.convert(tgtNotation, tgtSeparator);
|
|
97
97
|
return resCol;
|
|
98
98
|
};
|
|
99
99
|
};
|
|
@@ -127,7 +127,7 @@ RNA1{R(U)P.R(U)P.R(C)P.R(A)P.R(A)P.R(C)P}$$$
|
|
|
127
127
|
await _testConvert(Samples.fastaDna, converter(NOTATION.HELM), Samples.helmDna);
|
|
128
128
|
});
|
|
129
129
|
test('testFastaRnaToHelm', async () => {
|
|
130
|
-
await _testConvert(Samples.
|
|
130
|
+
await _testConvert(Samples.fastaRna, converter(NOTATION.HELM), Samples.helmRna);
|
|
131
131
|
});
|
|
132
132
|
|
|
133
133
|
test('testSeparatorPtToFasta', async () => {
|
|
@@ -136,15 +136,15 @@ RNA1{R(U)P.R(U)P.R(C)P.R(A)P.R(A)P.R(C)P}$$$
|
|
|
136
136
|
test('testSeparatorDnaToFasta', async () => {
|
|
137
137
|
await _testConvert(Samples.separatorDna, converter(NOTATION.FASTA), Samples.fastaDna);
|
|
138
138
|
});
|
|
139
|
-
test('
|
|
139
|
+
test('testSeparatorRnaToFasta', async () => {
|
|
140
140
|
await _testConvert(Samples.separatorRna, converter(NOTATION.FASTA), Samples.fastaRna);
|
|
141
141
|
});
|
|
142
142
|
|
|
143
143
|
test('testSeparatorPtToHelm', async () => {
|
|
144
|
-
await _testConvert(Samples.
|
|
144
|
+
await _testConvert(Samples.separatorPt, converter(NOTATION.HELM), Samples.helmPt);
|
|
145
145
|
});
|
|
146
146
|
test('testSeparatorDnaToHelm', async () => {
|
|
147
|
-
await _testConvert(Samples.
|
|
147
|
+
await _testConvert(Samples.separatorDna, converter(NOTATION.HELM), Samples.helmDna);
|
|
148
148
|
});
|
|
149
149
|
test('testSeparatorRnaToHelm', async () => {
|
|
150
150
|
await _testConvert(Samples.separatorRna, converter(NOTATION.HELM), Samples.helmRna);
|
|
@@ -1,6 +1,4 @@
|
|
|
1
1
|
import * as C from "./constants";
|
|
2
|
-
import {getSeparator} from "./misc";
|
|
3
|
-
import {ChemPalette} from "./chem-palette";
|
|
4
2
|
import * as DG from 'datagrok-api/dg';
|
|
5
3
|
import {AminoacidsPalettes} from "@datagrok-libraries/bio/src/aminoacids";
|
|
6
4
|
import {NucleotidesPalettes} from "@datagrok-libraries/bio/src/nucleotides";
|
|
@@ -10,6 +8,7 @@ import {SeqPalette} from "@datagrok-libraries/bio/src/seq-palettes";
|
|
|
10
8
|
import * as ui from 'datagrok-api/ui';
|
|
11
9
|
|
|
12
10
|
const lru = new DG.LruCache<any, any>();
|
|
11
|
+
const undefinedColor = 'rgb(100,100,100)';
|
|
13
12
|
|
|
14
13
|
function getPalleteByType(paletteType: string): SeqPalette {
|
|
15
14
|
switch (paletteType) {
|
|
@@ -48,7 +47,7 @@ export function processSequence(subParts: string[]): [string[], boolean] {
|
|
|
48
47
|
* @param {number} h Height.
|
|
49
48
|
* @param {CanvasRenderingContext2D} g Canvas rendering context.
|
|
50
49
|
* @param {string} s String to print.
|
|
51
|
-
* @param {string} [color=
|
|
50
|
+
* @param {string} [color=undefinedColor] String color.
|
|
52
51
|
* @param {number} [pivot=0] Pirvot.
|
|
53
52
|
* @param {boolean} [left=false] Is left aligned.
|
|
54
53
|
* @param {boolean} [hideMod=false] Hide amino acid redidue modifications.
|
|
@@ -57,12 +56,15 @@ export function processSequence(subParts: string[]): [string[], boolean] {
|
|
|
57
56
|
*/
|
|
58
57
|
function printLeftOrCentered(
|
|
59
58
|
x: number, y: number, w: number, h: number,
|
|
60
|
-
g: CanvasRenderingContext2D, s: string, color =
|
|
59
|
+
g: CanvasRenderingContext2D, s: string, color = undefinedColor,
|
|
61
60
|
pivot: number = 0, left = false, hideMod = false, transparencyRate: number = 1.0,
|
|
62
|
-
separator: string = ''): number {
|
|
61
|
+
separator: string = '', last:boolean = false): number {
|
|
63
62
|
g.textAlign = 'start';
|
|
64
63
|
let colorPart = s.substring(0);
|
|
65
64
|
let grayPart = separator;
|
|
65
|
+
if (last) {
|
|
66
|
+
grayPart = '';
|
|
67
|
+
}
|
|
66
68
|
const textSize = g.measureText(colorPart + grayPart);
|
|
67
69
|
const indent = 5;
|
|
68
70
|
|
|
@@ -87,44 +89,6 @@ function printLeftOrCentered(
|
|
|
87
89
|
return x + dx + colorTextSize.width;
|
|
88
90
|
}
|
|
89
91
|
}
|
|
90
|
-
function renderSequense(
|
|
91
|
-
g: CanvasRenderingContext2D, x: number, y: number, w: number, h: number, gridCell: DG.GridCell,
|
|
92
|
-
cellStyle: DG.GridCellStyle,
|
|
93
|
-
): void {
|
|
94
|
-
const grid = gridCell.grid;
|
|
95
|
-
const cell = gridCell.cell;
|
|
96
|
-
const [type, subtype, paletteType] = gridCell.cell.column.getTag(DG.TAGS.UNITS).split(":");
|
|
97
|
-
w = grid ? Math.min(grid.canvas.width - x, w) : g.canvas.width - x;
|
|
98
|
-
g.save();
|
|
99
|
-
g.beginPath();
|
|
100
|
-
g.rect(x, y, w, h);
|
|
101
|
-
g.clip();
|
|
102
|
-
g.font = '12px monospace';
|
|
103
|
-
g.textBaseline = 'top';
|
|
104
|
-
const s: string = cell.value ?? '';
|
|
105
|
-
|
|
106
|
-
//TODO: can this be replaced/merged with splitSequence?
|
|
107
|
-
const units = gridCell.cell.column.getTag(DG.TAGS.UNITS);
|
|
108
|
-
|
|
109
|
-
const palette = getPalleteByType(paletteType);
|
|
110
|
-
|
|
111
|
-
const separator = gridCell.cell.column.getTag('separator') ?? '';
|
|
112
|
-
const splitterFunc: SplitterFunc = WebLogo.getSplitter(units, gridCell.cell.column.getTag('separator') );// splitter,
|
|
113
|
-
|
|
114
|
-
const subParts:string[] = splitterFunc(cell.value);
|
|
115
|
-
|
|
116
|
-
const textSize = g.measureText(subParts.join(''));
|
|
117
|
-
let x1 = Math.max(x, x + (w - textSize.width) / 2);
|
|
118
|
-
|
|
119
|
-
subParts.forEach((amino, index) => {
|
|
120
|
-
let [color, outerAmino,, pivot] = ChemPalette.getColorAAPivot(amino);
|
|
121
|
-
color = palette.get(amino);
|
|
122
|
-
g.fillStyle = ChemPalette.undefinedColor;
|
|
123
|
-
x1 = printLeftOrCentered(x1, y, w, h, g, amino, color, pivot, true, false, 1.0, separator);
|
|
124
|
-
});
|
|
125
|
-
|
|
126
|
-
g.restore();
|
|
127
|
-
}
|
|
128
92
|
|
|
129
93
|
export class MacromoleculeSequenceCellRenderer extends DG.GridCellRenderer {
|
|
130
94
|
|
|
@@ -196,13 +160,16 @@ export class MacromoleculeSequenceCellRenderer extends DG.GridCellRenderer {
|
|
|
196
160
|
// console.log(subParts);
|
|
197
161
|
|
|
198
162
|
const textSize = g.measureText(subParts.join(''));
|
|
199
|
-
let x1 =
|
|
200
|
-
|
|
163
|
+
let x1 = x;
|
|
164
|
+
let color = undefinedColor;
|
|
201
165
|
subParts.forEach((amino, index) => {
|
|
202
|
-
let [color, outerAmino,, pivot] = ChemPalette.getColorAAPivot(amino);
|
|
203
166
|
color = palette.get(amino);
|
|
204
|
-
g.fillStyle =
|
|
205
|
-
|
|
167
|
+
g.fillStyle = undefinedColor;
|
|
168
|
+
let last = false;
|
|
169
|
+
if (index === subParts.length - 1) {
|
|
170
|
+
last = true;
|
|
171
|
+
}
|
|
172
|
+
x1 = printLeftOrCentered(x1, y, w, h, g, amino, color, 0, true, false, 1.0, separator, last);
|
|
206
173
|
});
|
|
207
174
|
|
|
208
175
|
g.restore();
|
package/src/utils/convert.ts
CHANGED
|
@@ -16,26 +16,22 @@ export function convert(col: DG.Column): void {
|
|
|
16
16
|
NOTATION.SEPARATOR,
|
|
17
17
|
NOTATION.HELM
|
|
18
18
|
];
|
|
19
|
-
const
|
|
20
|
-
const
|
|
19
|
+
const separatorArray = ['-', '.', '/'];
|
|
20
|
+
const filteredNotations = notations.filter((e) => e !== current);
|
|
21
|
+
const targetNotationInput = ui.choiceInput('Convert to', filteredNotations[0], filteredNotations);
|
|
21
22
|
|
|
22
|
-
const separatorInput = ui.choiceInput('separator',
|
|
23
|
+
const separatorInput = ui.choiceInput('Choose separator', separatorArray[0], separatorArray);
|
|
23
24
|
|
|
24
|
-
ui.dialog('Convert sequence')
|
|
25
|
+
ui.dialog('Convert sequence notation')
|
|
25
26
|
.add(ui.div([
|
|
26
|
-
ui.h1('
|
|
27
|
-
|
|
28
|
-
|
|
29
|
-
|
|
30
|
-
.add(ui.div([
|
|
31
|
-
ui.h1('Separator'),
|
|
32
|
-
separatorInput,
|
|
33
|
-
|
|
27
|
+
ui.h1('Current notation: ' + current),
|
|
28
|
+
targetNotationInput.root,
|
|
29
|
+
// TODO: conditional separator input
|
|
30
|
+
separatorInput.root
|
|
34
31
|
]))
|
|
35
32
|
.onOK(() => {
|
|
36
33
|
//TODO: create new converted column
|
|
37
|
-
|
|
38
|
-
const targetNotation: NOTATION = targetNotationInput.value as NOTATION;
|
|
34
|
+
const targetNotation = targetNotationInput.value as NOTATION;
|
|
39
35
|
const separator = separatorInput.value!;
|
|
40
36
|
const newColumn = converter.convert(targetNotation, separator);
|
|
41
37
|
col.dataFrame.columns.add(newColumn);
|