@datagrok/bio 2.22.12 → 2.23.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +4 -0
- package/detectors.js +14 -0
- package/dist/455.js +1 -1
- package/dist/455.js.map +1 -1
- package/dist/package-test.js +2 -2
- package/dist/package-test.js.map +1 -1
- package/dist/package.js +2 -2
- package/dist/package.js.map +1 -1
- package/files/samples/BILN.csv +625 -0
- package/files/samples/BILN_W_HELM.csv +5114 -0
- package/package.json +4 -4
- package/src/package-api.ts +7 -0
- package/src/package.g.ts +18 -1
- package/src/package.ts +46 -2
- package/src/tests/biln-tests.ts +167 -0
- package/src/tests/converters-test.ts +14 -0
- package/src/tests/detectors-tests.ts +7 -0
- package/src/utils/biln.ts +69 -0
- package/src/utils/cell-renderer.ts +7 -11
- package/src/utils/convert.ts +3 -2
- package/src/utils/seq-helper/seq-handler.ts +139 -33
- package/src/utils/seq-helper/seq-helper.ts +1 -1
- package/src/widgets/representations.ts +1 -1
- package/src/widgets/to-atomic-level-widget.ts +12 -4
- package/test-console-output-1.log +442 -378
- package/test-record-1.mp4 +0 -0
package/package.json
CHANGED
|
@@ -5,7 +5,7 @@
|
|
|
5
5
|
"name": "Davit Rizhinashvili",
|
|
6
6
|
"email": "drizhinashvili@datagrok.ai"
|
|
7
7
|
},
|
|
8
|
-
"version": "2.
|
|
8
|
+
"version": "2.23.0",
|
|
9
9
|
"description": "Bioinformatics support (import/export of sequences, conversion, visualization, analysis). [See more](https://github.com/datagrok-ai/public/blob/master/packages/Bio/README.md) for details.",
|
|
10
10
|
"repository": {
|
|
11
11
|
"type": "git",
|
|
@@ -44,10 +44,10 @@
|
|
|
44
44
|
],
|
|
45
45
|
"dependencies": {
|
|
46
46
|
"@biowasm/aioli": "^3.1.0",
|
|
47
|
-
"@datagrok-libraries/bio": "^5.
|
|
47
|
+
"@datagrok-libraries/bio": "^5.55.0",
|
|
48
48
|
"@datagrok-libraries/chem-meta": "^1.2.7",
|
|
49
|
-
"@datagrok-libraries/math": "^1.2.
|
|
50
|
-
"@datagrok-libraries/ml": "^6.10.
|
|
49
|
+
"@datagrok-libraries/math": "^1.2.6",
|
|
50
|
+
"@datagrok-libraries/ml": "^6.10.6",
|
|
51
51
|
"@datagrok-libraries/tutorials": "^1.6.1",
|
|
52
52
|
"@datagrok-libraries/utils": "^4.6.5",
|
|
53
53
|
"@webgpu/types": "^0.1.40",
|
package/src/package-api.ts
CHANGED
|
@@ -44,6 +44,13 @@ export namespace funcs {
|
|
|
44
44
|
return await grok.functions.call('Bio:StandardiseMonomerLibrary', { library });
|
|
45
45
|
}
|
|
46
46
|
|
|
47
|
+
/**
|
|
48
|
+
Matches molecules in a column with monomers from the selected library(s)
|
|
49
|
+
*/
|
|
50
|
+
export async function matchWithMonomerLibrary(table: DG.DataFrame , molecules: DG.Column , polymerType: string ): Promise<void> {
|
|
51
|
+
return await grok.functions.call('Bio:MatchWithMonomerLibrary', { table, molecules, polymerType });
|
|
52
|
+
}
|
|
53
|
+
|
|
47
54
|
export async function getBioLib(): Promise<any> {
|
|
48
55
|
return await grok.functions.call('Bio:GetBioLib', {});
|
|
49
56
|
}
|
package/src/package.g.ts
CHANGED
|
@@ -110,6 +110,23 @@ export function separatorSequenceCellRenderer() : any {
|
|
|
110
110
|
return PackageFunctions.separatorSequenceCellRenderer();
|
|
111
111
|
}
|
|
112
112
|
|
|
113
|
+
//tags: cellRenderer
|
|
114
|
+
//output: grid_cell_renderer result
|
|
115
|
+
//meta.cellType: sequence
|
|
116
|
+
//meta.columnTags: quality=Macromolecule, units=biln
|
|
117
|
+
export function bilnSequenceCellRenderer() : any {
|
|
118
|
+
return PackageFunctions.bilnSequenceCellRenderer();
|
|
119
|
+
}
|
|
120
|
+
|
|
121
|
+
//tags: notationRefiner
|
|
122
|
+
//input: column col
|
|
123
|
+
//input: object stats
|
|
124
|
+
//input: string separator { nullable: true; optional: true }
|
|
125
|
+
//output: bool result
|
|
126
|
+
export function refineNotationProviderForBiln(col: DG.Column<any>, stats: any, separator: any) : boolean {
|
|
127
|
+
return PackageFunctions.refineNotationProviderForBiln(col, stats, separator);
|
|
128
|
+
}
|
|
129
|
+
|
|
113
130
|
//name: Bioinformatics | Sequence Renderer
|
|
114
131
|
//tags: panel
|
|
115
132
|
//input: column molColumn { semType: Macromolecule }
|
|
@@ -327,7 +344,7 @@ export function importBam(fileContent: string) : any {
|
|
|
327
344
|
return PackageFunctions.importBam(fileContent);
|
|
328
345
|
}
|
|
329
346
|
|
|
330
|
-
//top-menu: Bio | Transform | Convert Notation...
|
|
347
|
+
//top-menu: Bio | Transform | Convert Sequence Notation...
|
|
331
348
|
export function convertDialog() : void {
|
|
332
349
|
PackageFunctions.convertDialog();
|
|
333
350
|
}
|
package/src/package.ts
CHANGED
|
@@ -30,7 +30,7 @@ import {getUserLibSettings, setUserLibSettings} from '@datagrok-libraries/bio/sr
|
|
|
30
30
|
import {ISeqHelper} from '@datagrok-libraries/bio/src/utils/seq-helper';
|
|
31
31
|
import {RDModule as _RDMoule} from '@datagrok-libraries/chem-meta/src/rdkit-api';
|
|
32
32
|
import {getRdKitModule} from '@datagrok-libraries/bio/src/chem/rdkit-module';
|
|
33
|
-
import {ISeqHandler} from '@datagrok-libraries/bio/src/utils/macromolecule/seq-handler';
|
|
33
|
+
import {ISeqHandler, SeqTemps} from '@datagrok-libraries/bio/src/utils/macromolecule/seq-handler';
|
|
34
34
|
import {MmcrTemps} from '@datagrok-libraries/bio/src/utils/cell-renderer-consts';
|
|
35
35
|
|
|
36
36
|
import {getMacromoleculeColumns} from './utils/ui-utils';
|
|
@@ -77,6 +77,7 @@ import {_toAtomicLevel} from '@datagrok-libraries/bio/src/monomer-works/to-atomi
|
|
|
77
77
|
import {molecular3DStructureWidget, toAtomicLevelWidget} from './widgets/to-atomic-level-widget';
|
|
78
78
|
import {handleSequenceHeaderRendering} from './widgets/sequence-scrolling-widget';
|
|
79
79
|
import {PolymerType} from '@datagrok-libraries/js-draw-lite/src/types/org';
|
|
80
|
+
import {BilnNotationProvider} from './utils/biln';
|
|
80
81
|
export const _package = new BioPackage(/*{debug: true}/**/);
|
|
81
82
|
export * from './package.g';
|
|
82
83
|
|
|
@@ -297,6 +298,49 @@ export class PackageFunctions {
|
|
|
297
298
|
return new MacromoleculeSequenceCellRenderer();
|
|
298
299
|
}
|
|
299
300
|
|
|
301
|
+
@grok.decorators.func({
|
|
302
|
+
name: 'bilnSequenceCellRenderer',
|
|
303
|
+
tags: ['cellRenderer'],
|
|
304
|
+
meta: {
|
|
305
|
+
cellType: 'sequence',
|
|
306
|
+
columnTags: 'quality=Macromolecule, units=biln'
|
|
307
|
+
},
|
|
308
|
+
outputs: [{type: 'grid_cell_renderer', name: 'result'}]
|
|
309
|
+
})
|
|
310
|
+
static bilnSequenceCellRenderer(): MacromoleculeSequenceCellRenderer {
|
|
311
|
+
return new MacromoleculeSequenceCellRenderer();
|
|
312
|
+
}
|
|
313
|
+
|
|
314
|
+
@grok.decorators.func({
|
|
315
|
+
name: 'refineNotationProviderForBiln',
|
|
316
|
+
tags: ['notationRefiner'],
|
|
317
|
+
outputs: [{type: 'bool', name: 'result'}]
|
|
318
|
+
})
|
|
319
|
+
static refineNotationProviderForBiln(
|
|
320
|
+
@grok.decorators.param({type: 'column'}) col: DG.Column<string>,
|
|
321
|
+
@grok.decorators.param({type: 'object'}) stats: {freq: { [key: string]: number; }, sameLength: boolean},
|
|
322
|
+
@grok.decorators.param({type: 'string', options: {nullable: true, optional: true}}) separator: string | null
|
|
323
|
+
): boolean {
|
|
324
|
+
if (separator !== '-')
|
|
325
|
+
return false;// biln uses '-' as a separator
|
|
326
|
+
const reCons = Object.keys(stats.freq).some((om) => om.match(/^.+\(\d{1,2},\d{1,2}\)$/));
|
|
327
|
+
if (!reCons) {
|
|
328
|
+
// biln might also encode monomers with hyphens in names encoded by []
|
|
329
|
+
// here we know that there are no monomers with connections like (1,2) in names, so we can check for []
|
|
330
|
+
const reBrackets = Object.keys(stats.freq).some((om) => om.includes('[') || om.includes(']'));
|
|
331
|
+
if (!reBrackets)
|
|
332
|
+
return false;
|
|
333
|
+
}
|
|
334
|
+
// refine the notation provider
|
|
335
|
+
col.setTag('aligned', 'SEQ');
|
|
336
|
+
col.setTag('alphabet', 'UN');
|
|
337
|
+
col.setTag('.alphabetIsMultichar', 'true');
|
|
338
|
+
col.meta.units = NOTATION.BILN;
|
|
339
|
+
col.temp[SeqTemps.notationProvider] = new BilnNotationProvider(separator, _package.seqHelper, col);
|
|
340
|
+
|
|
341
|
+
return true;
|
|
342
|
+
}
|
|
343
|
+
|
|
300
344
|
// // -- Property panels --
|
|
301
345
|
|
|
302
346
|
@grok.decorators.panel({name: 'Bioinformatics | Sequence Renderer'})
|
|
@@ -757,7 +801,7 @@ export class PackageFunctions {
|
|
|
757
801
|
|
|
758
802
|
@grok.decorators.func({
|
|
759
803
|
name: 'convertDialog',
|
|
760
|
-
'top-menu': 'Bio | Transform | Convert Notation...'
|
|
804
|
+
'top-menu': 'Bio | Transform | Convert Sequence Notation...'
|
|
761
805
|
})
|
|
762
806
|
static convertDialog() {
|
|
763
807
|
const col: DG.Column<string> | undefined = getMacromoleculeColumns()[0];
|
|
@@ -0,0 +1,167 @@
|
|
|
1
|
+
/* eslint-disable max-len */
|
|
2
|
+
/* eslint-disable max-lines */
|
|
3
|
+
/* eslint-disable max-lines-per-function */
|
|
4
|
+
import * as grok from 'datagrok-api/grok';
|
|
5
|
+
import * as ui from 'datagrok-api/ui';
|
|
6
|
+
import * as DG from 'datagrok-api/dg';
|
|
7
|
+
|
|
8
|
+
import {category, test, expect, before} from '@datagrok-libraries/utils/src/test';
|
|
9
|
+
|
|
10
|
+
import {ALIGNMENT, ALPHABET, NOTATION, TAGS as bioTAGS} from '@datagrok-libraries/bio/src/utils/macromolecule';
|
|
11
|
+
import {ISeqHelper, getSeqHelper} from '@datagrok-libraries/bio/src/utils/seq-helper';
|
|
12
|
+
|
|
13
|
+
import {_testNeg, _testPos, DetectorTestData, DfReaderFunc, PosCol} from './utils/detectors-utils';
|
|
14
|
+
import {SeqTemps} from '@datagrok-libraries/bio/src/utils/macromolecule/seq-handler';
|
|
15
|
+
|
|
16
|
+
export async function prepareBiln(list: string[], seqHelper: ISeqHelper): Promise<DG.Column> {
|
|
17
|
+
const col: DG.Column = DG.Column.fromList(DG.TYPE.STRING, 'seq', list);
|
|
18
|
+
const semType: string = await grok.functions.call('Bio:detectMacromolecule', {col: col});
|
|
19
|
+
if (semType)
|
|
20
|
+
col.semType = semType;
|
|
21
|
+
expect(col.semType, DG.SEMTYPE.MACROMOLECULE);
|
|
22
|
+
const sh = seqHelper.getSeqHandler(col);
|
|
23
|
+
await sh.refinerPromise; // wait for refiner to finish
|
|
24
|
+
const _newSh = seqHelper.getSeqHandler(col);
|
|
25
|
+
return col;
|
|
26
|
+
}
|
|
27
|
+
|
|
28
|
+
|
|
29
|
+
export async function _testBilnDetection(list: string[], seqHelper: ISeqHelper, negativeTest = false): Promise<void> {
|
|
30
|
+
const col = await prepareBiln(list, seqHelper);
|
|
31
|
+
expect(col.meta.units === NOTATION.BILN, !negativeTest, `Incorrectly detected as ${col.meta.units}`);
|
|
32
|
+
expect(col.temp[SeqTemps.notationProvider] != null, !negativeTest, `No notation provider for BILN`);
|
|
33
|
+
}
|
|
34
|
+
|
|
35
|
+
export async function _testBilnToHelm(list: string[], expectedHelm: string[], seqHelper: ISeqHelper): Promise<void> {
|
|
36
|
+
const col = await prepareBiln(list, seqHelper);
|
|
37
|
+
const sh = seqHelper.getSeqHandler(col);
|
|
38
|
+
for (let i = 0; i < list.length; i++) {
|
|
39
|
+
const helm = sh.getHelm(i);
|
|
40
|
+
expect(helm === expectedHelm[i], true, `Incorrect HELM conversion for ${list[i]}: Expected ${expectedHelm[i]} \n Got ${helm}`);
|
|
41
|
+
}
|
|
42
|
+
// also test through converter
|
|
43
|
+
const converter = sh.getConverter(NOTATION.HELM);
|
|
44
|
+
for (let i = 0; i < list.length; i++) {
|
|
45
|
+
const helm = converter(list[i]);
|
|
46
|
+
expect(helm === expectedHelm[i], true, `Incorrect HELM conversion for ${list[i]}: Expected ${expectedHelm[i]} \n Got ${helm}`);
|
|
47
|
+
}
|
|
48
|
+
}
|
|
49
|
+
|
|
50
|
+
export async function _testHelmToBiln(helmList: string[], expectedBiln: string[], seqHelper: ISeqHelper): Promise<void> {
|
|
51
|
+
const col: DG.Column = DG.Column.fromList(DG.TYPE.STRING, 'helm', helmList);
|
|
52
|
+
const df = DG.DataFrame.fromColumns([col]);
|
|
53
|
+
await df.meta.detectSemanticTypes();
|
|
54
|
+
await grok.data.detectSemanticTypes(df);
|
|
55
|
+
expect(col.semType === DG.SEMTYPE.MACROMOLECULE, true, `Incorrectly detected as ${col.semType}`);
|
|
56
|
+
expect(col.meta.units === NOTATION.HELM, true, `Incorrectly detected as ${col.meta.units}`);
|
|
57
|
+
const sh = seqHelper.getSeqHandler(col);
|
|
58
|
+
const converter = sh.getConverter(NOTATION.BILN);
|
|
59
|
+
for (let i = 0; i < helmList.length; i++) {
|
|
60
|
+
const biln = converter(helmList[i]);
|
|
61
|
+
expect(biln === expectedBiln[i], true, `Incorrect BILN conversion for ${helmList[i]}: Expected ${expectedBiln[i]} \n Got ${biln}`);
|
|
62
|
+
}
|
|
63
|
+
}
|
|
64
|
+
|
|
65
|
+
export const detectorTestsDataForBiln: {name: string, seqs: string[], negative: boolean}[] = [
|
|
66
|
+
{
|
|
67
|
+
name: 'Valid Biln',
|
|
68
|
+
seqs: [
|
|
69
|
+
'A-C(1,3)-G-G-H-A-V-E-A-K-Y-L-V-C(3,3)-S.G-I-V-E-A-C(2,3)-C(1,3)-T-S-I-C(2,3)-S-L-Y-Q-L-E-N-Y-C(3,3)-Y',
|
|
70
|
+
'A-C(1,3)-G-G-H-A-V-E-A-K-Y-L-V-C(3,3)-S.G-I-V-E-A-C(2,3)-C(1,3)-T-S-I-C(2,3)-S-L-Y-Q-L-E-N-Y-C(3,3)-Y',
|
|
71
|
+
'C-C(1,3)-S-W-P-A-R-C(2,3)-L-H-Q-D-L-C(3,3)-NH2.C(1,1)(2,2)(3,3)',
|
|
72
|
+
'C-C(1,3)-S-W-P-A-R-C(2,3)-L-H-Q-D-L-C(3,3)-NH2.[C](1,1)(2,2)(3,3)',
|
|
73
|
+
'D-T-H-F-P-I-C(1,3)-I-F-C(2,3)-C(3,3)-G-C(2,3)-C(4,3)-H-R-S-K-C(3,3)-G-M-C(4,3)-C(1,3)-K-T'
|
|
74
|
+
], negative: false
|
|
75
|
+
}, {
|
|
76
|
+
name: 'Simple Separator Neg',
|
|
77
|
+
seqs: [
|
|
78
|
+
'meI/hHis/Aca/N/T/dE/Thr_PO3H2/Aca/D-Tyr_Et/Tyr_ab-dehydroMe/dV/E/N/D-Orn/D-aThr//Phe_4Me',
|
|
79
|
+
'meI/hHis/Aca/Cys_SEt/T/dK/Thr_PO3H2/Aca/Tyr_PO3H2/D-Chg/dV/Phe_ab-dehydro/N/D-Orn/D-aThr//Phe_4Me',
|
|
80
|
+
'Lys_Boc/hHis/Aca/Cys_SEt/T/dK/Thr_PO3H2/Aca/Tyr_PO3H2/D-Chg/dV/Thr_PO3H2/N/D-Orn/D-aThr//Phe_4Me',
|
|
81
|
+
'meI/hHis/Aca/Cys_SEt/T/dK/Thr_PO3H2/Aca/Tyr_PO3H2/D-Chg/dV/Thr_PO3H2/N/D-Orn/D-aThr//Phe_4Me',
|
|
82
|
+
'meI/hHis/Aca/N/T/dK/Thr_PO3H2/Aca/D-Tyr_Et/Tyr_ab-dehydroMe/dV/Chg/N/D-Orn/D-aThr//Phe_4Me'
|
|
83
|
+
], negative: true
|
|
84
|
+
}, {
|
|
85
|
+
name: 'Valid Biln without cyclization',
|
|
86
|
+
seqs: [
|
|
87
|
+
'meI-hHis-Aca-N-T-dE-Thr_PO3H2-Aca-[D-Tyr_Et]-[Tyr_ab-dehydroMe]-dV-E-N-[D-Orn]-[D-aThr]--Phe_4Me',
|
|
88
|
+
'meI-hHis-Aca-Cys_SEt-T-dK-Thr_PO3H2-Aca-Tyr_PO3H2-[D-Chg]-dV-[Phe_ab-dehydro]-N-[D-Orn]-[D-aThr]--Phe_4Me',
|
|
89
|
+
'Lys_Boc-hHis-Aca-Cys_SEt-T-dK-Thr_PO3H2-Aca-Tyr_PO3H2-[D-Chg]-dV-Thr_PO3H2-N-[D-Orn]-[D-aThr]--Phe_4Me',
|
|
90
|
+
'meI-hHis-Aca-Cys_SEt-T-dK-Thr_PO3H2-Aca-Tyr_PO3H2-[D-Chg]-dV-Thr_PO3H2-N-[D-Orn]-[D-aThr]--Phe_4Me',
|
|
91
|
+
'meI-hHis-Aca-N-T-dK-Thr_PO3H2-Aca-[D-Tyr_Et]-[Tyr_ab-dehydroMe]-dV-Chg-N-[D-Orn]-[D-aThr]--Phe_4Me',
|
|
92
|
+
'meI-hHis-Aca-N-T-dK-Thr_PO3H2-Aca-[D-Tyr_Et]-Tyr_Bn-dV-E-N-dV---Phe_4Me',
|
|
93
|
+
'meI-hHis-Aca-N-T-dK-Thr_PO3H2-Aca-[D-Tyr_Et]-Aze-dV-E-N-dV---Phe_4Me',
|
|
94
|
+
'meI-hHis-Aca-N-T-dK-Thr_PO3H2-Aca-[D-Tyr_Et]-meQ-dV-E-N-dV---Phe_4Me'
|
|
95
|
+
], negative: false
|
|
96
|
+
}
|
|
97
|
+
];
|
|
98
|
+
|
|
99
|
+
export const bilnToHelmTestsData: {name: string, biln: string[], helm: string[]}[] = [
|
|
100
|
+
{
|
|
101
|
+
name: 'Linear',
|
|
102
|
+
biln: ['meI-hHis-Aca-N-T-dE-Thr_PO3H2-Aca-[D-Tyr_Et]-[Tyr_ab-dehydroMe]-dV-E-N-[D-Orn]-[D-aThr]--Phe_4Me',
|
|
103
|
+
'meI-hHis-Aca-Cys_SEt-T-dK-Thr_PO3H2-Aca-Tyr_PO3H2-[D-Chg]-dV-[Phe_ab-dehydro]-N-[D-Orn]-[D-aThr]--Phe_4Me',
|
|
104
|
+
'Lys_Boc-hHis-Aca-Cys_SEt-T-dK-Thr_PO3H2-Aca-Tyr_PO3H2-[D-Chg]-dV-Thr_PO3H2-N-[D-Orn]-[D-aThr]--Phe_4Me',
|
|
105
|
+
'meI-hHis-Aca-Cys_SEt-T-dK-Thr_PO3H2-Aca-Tyr_PO3H2-[D-Chg]-dV-Thr_PO3H2-N-[D-Orn]-[D-aThr]--Phe_4Me',
|
|
106
|
+
'meI-hHis-Aca-N-T-dK-Thr_PO3H2-Aca-[D-Tyr_Et]-[Tyr_ab-dehydroMe]-dV-Chg-N-[D-Orn]-[D-aThr]--Phe_4Me'],
|
|
107
|
+
helm: [
|
|
108
|
+
'PEPTIDE1{[meI].[hHis].[Aca].N.T.[dE].[Thr_PO3H2].[Aca].[D-Tyr_Et].[Tyr_ab-dehydroMe].[dV].E.N.[D-Orn].[D-aThr].*.[Phe_4Me]}$$$$',
|
|
109
|
+
'PEPTIDE1{[meI].[hHis].[Aca].[Cys_SEt].T.[dK].[Thr_PO3H2].[Aca].[Tyr_PO3H2].[D-Chg].[dV].[Phe_ab-dehydro].N.[D-Orn].[D-aThr].*.[Phe_4Me]}$$$$',
|
|
110
|
+
'PEPTIDE1{[Lys_Boc].[hHis].[Aca].[Cys_SEt].T.[dK].[Thr_PO3H2].[Aca].[Tyr_PO3H2].[D-Chg].[dV].[Thr_PO3H2].N.[D-Orn].[D-aThr].*.[Phe_4Me]}$$$$',
|
|
111
|
+
'PEPTIDE1{[meI].[hHis].[Aca].[Cys_SEt].T.[dK].[Thr_PO3H2].[Aca].[Tyr_PO3H2].[D-Chg].[dV].[Thr_PO3H2].N.[D-Orn].[D-aThr].*.[Phe_4Me]}$$$$',
|
|
112
|
+
'PEPTIDE1{[meI].[hHis].[Aca].N.T.[dK].[Thr_PO3H2].[Aca].[D-Tyr_Et].[Tyr_ab-dehydroMe].[dV].[Chg].N.[D-Orn].[D-aThr].*.[Phe_4Me]}$$$$'
|
|
113
|
+
]
|
|
114
|
+
}, {
|
|
115
|
+
name: 'Cyclic',
|
|
116
|
+
biln: [
|
|
117
|
+
'C-C(1,3)-S-W-P-A-R-C(2,3)-L-H-Q-D-L-C(3,3)-NH2.[C](1,1)(2,2)(3,3)',
|
|
118
|
+
'D-T-H-F-P-I-C(1,3)-I-F-C(2,3)-C(3,3)-G-C(2,3)-C(4,3)-H-R-S-K-C(3,3)-G-M-C(4,3)-C(1,3)-K-T',
|
|
119
|
+
'L-C(1,3)-G-S-H-L-V-E-A-L-Y-L-V-C(2,3)-G.G-I-V-E-Q-C(3,3)-C(1,3)-T-S-I-C(3,3)-S-L-Y-Q-L-E-N-Y-C(2,3)-N',
|
|
120
|
+
'H-Aib-E-G-T-F-T-S-D(2,3)-V-S-S-Y-L-E-G-Q-A-A-K(1,3)-E-F-I-A-W-L-V-R-G-R-G.C(2,3)-gGlu-G-G(1,2)',
|
|
121
|
+
'F(4,2).dI(1,1)(3,3)-Trp_Ome-Asp_OMe-Cys_Bn-meG-Phe_3Cl-dD-T-dI(4,3)-T-dK-aG-3Pal-xiIle-meD-Ala_tBu(1,2).L(2,1)-Pro_4Me3OH-S-NMe2Abz-Q-3Pal-xiIle-D-Hyp-Ala_tBu-dI(3,3)-Trp_Ome-Asp_OMe-N-meG-Phe_34diCl-Phe_34diCl(2,2)'
|
|
122
|
+
],
|
|
123
|
+
helm: [
|
|
124
|
+
'PEPTIDE1{C.C.S.W.P.A.R.C.L.H.Q.D.L.C.[NH2]}|PEPTIDE2{C}$PEPTIDE1,PEPTIDE2,14:R3-1:R3|PEPTIDE1,PEPTIDE2,8:R3-1:R2|PEPTIDE1,PEPTIDE2,2:R3-1:R1$$$V2.0',
|
|
125
|
+
'PEPTIDE1{D.T.H.F.P.I.C.I.F.C.C.G.C.C.H.R.S.K.C.G.M.C.C.K.T}$PEPTIDE1,PEPTIDE1,10:R3-13:R3|PEPTIDE1,PEPTIDE1,11:R3-19:R3|PEPTIDE1,PEPTIDE1,14:R3-22:R3|PEPTIDE1,PEPTIDE1,7:R3-23:R3$$$V2.0',
|
|
126
|
+
'PEPTIDE1{L.C.G.S.H.L.V.E.A.L.Y.L.V.C.G}|PEPTIDE2{G.I.V.E.Q.C.C.T.S.I.C.S.L.Y.Q.L.E.N.Y.C.N}$PEPTIDE1,PEPTIDE2,2:R3-7:R3|PEPTIDE2,PEPTIDE2,6:R3-11:R3|PEPTIDE1,PEPTIDE2,14:R3-20:R3$$$V2.0',
|
|
127
|
+
'PEPTIDE1{H.[Aib].E.G.T.F.T.S.D.V.S.S.Y.L.E.G.Q.A.A.K.E.F.I.A.W.L.V.R.G.R.G}|PEPTIDE2{C.[gGlu].G.G}$PEPTIDE1,PEPTIDE2,9:R3-1:R3|PEPTIDE1,PEPTIDE2,20:R3-4:R2$$$V2.0',
|
|
128
|
+
'PEPTIDE1{F}|PEPTIDE2{[dI].[Trp_Ome].[Asp_OMe].[Cys_Bn].[meG].[Phe_3Cl].[dD].T.[dI].T.[dK].[aG].[3Pal].[xiIle].[meD].[Ala_tBu]}|PEPTIDE3{L.[Pro_4Me3OH].S.[NMe2Abz].Q.[3Pal].[xiIle].D.[Hyp].[Ala_tBu].[dI].[Trp_Ome].[Asp_OMe].N.[meG].[Phe_34diCl].[Phe_34diCl]}$PEPTIDE1,PEPTIDE2,1:R2-9:R3|PEPTIDE2,PEPTIDE2,1:R1-16:R2|PEPTIDE2,PEPTIDE3,1:R3-11:R3|PEPTIDE3,PEPTIDE3,1:R1-17:R2$$$V2.0'
|
|
129
|
+
]
|
|
130
|
+
}
|
|
131
|
+
];
|
|
132
|
+
|
|
133
|
+
export const helmToBilnTestsData: {name: string, helm: string[], biln: string[]}[] = [
|
|
134
|
+
{
|
|
135
|
+
name: 'Linear',
|
|
136
|
+
biln: [
|
|
137
|
+
'meI-hHis-Aca-N-T-dE-Thr_PO3H2-Aca-[D-Tyr_Et]-[Tyr_ab-dehydroMe]-dV-E-N-[D-Orn]-[D-aThr]--Phe_4Me',
|
|
138
|
+
'meI-hHis-Aca-Cys_SEt-T-dK-Thr_PO3H2-Aca-Tyr_PO3H2-[D-Chg]-dV-[Phe_ab-dehydro]-N-[D-Orn]-[D-aThr]--Phe_4Me',
|
|
139
|
+
'Lys_Boc-hHis-Aca-Cys_SEt-T-dK-Thr_PO3H2-Aca-Tyr_PO3H2-[D-Chg]-dV-Thr_PO3H2-N-[D-Orn]-[D-aThr]--Phe_4Me',
|
|
140
|
+
'meI-hHis-Aca-Cys_SEt-T-dK-Thr_PO3H2-Aca-Tyr_PO3H2-[D-Chg]-dV-Thr_PO3H2-N-[D-Orn]-[D-aThr]--Phe_4Me',
|
|
141
|
+
'meI-hHis-Aca-N-T-dK-Thr_PO3H2-Aca-[D-Tyr_Et]-[Tyr_ab-dehydroMe]-dV-Chg-N-[D-Orn]-[D-aThr]--Phe_4Me'
|
|
142
|
+
],
|
|
143
|
+
helm: [
|
|
144
|
+
'PEPTIDE1{[meI].[hHis].[Aca].N.T.[dE].[Thr_PO3H2].[Aca].[D-Tyr_Et].[Tyr_ab-dehydroMe].[dV].E.N.[D-Orn].[D-aThr].*.[Phe_4Me]}$$$$',
|
|
145
|
+
'PEPTIDE1{[meI].[hHis].[Aca].[Cys_SEt].T.[dK].[Thr_PO3H2].[Aca].[Tyr_PO3H2].[D-Chg].[dV].[Phe_ab-dehydro].N.[D-Orn].[D-aThr].*.[Phe_4Me]}$$$$',
|
|
146
|
+
'PEPTIDE1{[Lys_Boc].[hHis].[Aca].[Cys_SEt].T.[dK].[Thr_PO3H2].[Aca].[Tyr_PO3H2].[D-Chg].[dV].[Thr_PO3H2].N.[D-Orn].[D-aThr].*.[Phe_4Me]}$$$$',
|
|
147
|
+
'PEPTIDE1{[meI].[hHis].[Aca].[Cys_SEt].T.[dK].[Thr_PO3H2].[Aca].[Tyr_PO3H2].[D-Chg].[dV].[Thr_PO3H2].N.[D-Orn].[D-aThr].*.[Phe_4Me]}$$$$',
|
|
148
|
+
'PEPTIDE1{[meI].[hHis].[Aca].N.T.[dK].[Thr_PO3H2].[Aca].[D-Tyr_Et].[Tyr_ab-dehydroMe].[dV].[Chg].N.[D-Orn].[D-aThr].*.[Phe_4Me]}$$$$'
|
|
149
|
+
]
|
|
150
|
+
}, {
|
|
151
|
+
name: 'Cyclic',
|
|
152
|
+
biln: [
|
|
153
|
+
'C-C(3,3)-S-W-P-A-R-C(2,3)-L-H-Q-D-L-C(1,3)-NH2.C(1,3)(2,2)(3,1)',
|
|
154
|
+
'D-T-H-F-P-I-C(4,3)-I-F-C(1,3)-C(2,3)-G-C(1,3)-C(3,3)-H-R-S-K-C(2,3)-G-M-C(3,3)-C(4,3)-K-T',
|
|
155
|
+
'L-C(1,3)-G-S-H-L-V-E-A-L-Y-L-V-C(3,3)-G.G-I-V-E-Q-C(2,3)-C(1,3)-T-S-I-C(2,3)-S-L-Y-Q-L-E-N-Y-C(3,3)-N',
|
|
156
|
+
'H-Aib-E-G-T-F-T-S-D(1,3)-V-S-S-Y-L-E-G-Q-A-A-K(2,3)-E-F-I-A-W-L-V-R-G-R-G.C(1,3)-gGlu-G-G(2,2)',
|
|
157
|
+
'F(1,2).dI(2,1)(3,3)-Trp_Ome-Asp_OMe-Cys_Bn-meG-Phe_3Cl-dD-T-dI(1,3)-T-dK-aG-3Pal-xiIle-meD-Ala_tBu(2,2).L(4,1)-Pro_4Me3OH-S-NMe2Abz-Q-3Pal-xiIle-D-Hyp-Ala_tBu-dI(3,3)-Trp_Ome-Asp_OMe-N-meG-Phe_34diCl-Phe_34diCl(4,2)'
|
|
158
|
+
],
|
|
159
|
+
helm: [
|
|
160
|
+
'PEPTIDE1{C.C.S.W.P.A.R.C.L.H.Q.D.L.C.[NH2]}|PEPTIDE2{C}$PEPTIDE1,PEPTIDE2,14:R3-1:R3|PEPTIDE1,PEPTIDE2,8:R3-1:R2|PEPTIDE1,PEPTIDE2,2:R3-1:R1$$$V2.0',
|
|
161
|
+
'PEPTIDE1{D.T.H.F.P.I.C.I.F.C.C.G.C.C.H.R.S.K.C.G.M.C.C.K.T}$PEPTIDE1,PEPTIDE1,10:R3-13:R3|PEPTIDE1,PEPTIDE1,11:R3-19:R3|PEPTIDE1,PEPTIDE1,14:R3-22:R3|PEPTIDE1,PEPTIDE1,7:R3-23:R3$$$V2.0',
|
|
162
|
+
'PEPTIDE1{L.C.G.S.H.L.V.E.A.L.Y.L.V.C.G}|PEPTIDE2{G.I.V.E.Q.C.C.T.S.I.C.S.L.Y.Q.L.E.N.Y.C.N}$PEPTIDE1,PEPTIDE2,2:R3-7:R3|PEPTIDE2,PEPTIDE2,6:R3-11:R3|PEPTIDE1,PEPTIDE2,14:R3-20:R3$$$V2.0',
|
|
163
|
+
'PEPTIDE1{H.[Aib].E.G.T.F.T.S.D.V.S.S.Y.L.E.G.Q.A.A.K.E.F.I.A.W.L.V.R.G.R.G}|PEPTIDE2{C.[gGlu].G.G}$PEPTIDE1,PEPTIDE2,9:R3-1:R3|PEPTIDE1,PEPTIDE2,20:R3-4:R2$$$V2.0',
|
|
164
|
+
'PEPTIDE1{F}|PEPTIDE2{[dI].[Trp_Ome].[Asp_OMe].[Cys_Bn].[meG].[Phe_3Cl].[dD].T.[dI].T.[dK].[aG].[3Pal].[xiIle].[meD].[Ala_tBu]}|PEPTIDE3{L.[Pro_4Me3OH].S.[NMe2Abz].Q.[3Pal].[xiIle].D.[Hyp].[Ala_tBu].[dI].[Trp_Ome].[Asp_OMe].N.[meG].[Phe_34diCl].[Phe_34diCl]}$PEPTIDE1,PEPTIDE2,1:R2-9:R3|PEPTIDE2,PEPTIDE2,1:R1-16:R2|PEPTIDE2,PEPTIDE3,1:R3-11:R3|PEPTIDE3,PEPTIDE3,1:R1-17:R2$$$V2.0'
|
|
165
|
+
]
|
|
166
|
+
}
|
|
167
|
+
];
|
|
@@ -1,3 +1,5 @@
|
|
|
1
|
+
/* eslint-disable max-lines */
|
|
2
|
+
/* eslint-disable max-lines-per-function */
|
|
1
3
|
import * as DG from 'datagrok-api/dg';
|
|
2
4
|
import * as grok from 'datagrok-api/grok';
|
|
3
5
|
|
|
@@ -7,6 +9,7 @@ import {ISeqHelper, getSeqHelper} from '@datagrok-libraries/bio/src/utils/seq-he
|
|
|
7
9
|
import {ISeqHandler} from '@datagrok-libraries/bio/src/utils/macromolecule/seq-handler';
|
|
8
10
|
|
|
9
11
|
import {ConverterFunc} from './types';
|
|
12
|
+
import {_testBilnToHelm, _testHelmToBiln, bilnToHelmTestsData, helmToBilnTestsData} from './biln-tests';
|
|
10
13
|
|
|
11
14
|
|
|
12
15
|
category('converters', () => {
|
|
@@ -287,4 +290,15 @@ RNA1{p.r(U)p.r(U)p.r(C)p.r(A)p.r(A)p.r(C)p.r(U)p.r(U)p.r(C)p.r(A)p.r(A)p.r(C)p.p
|
|
|
287
290
|
test('testHelmLonePhosphorus', async () => {
|
|
288
291
|
await _testConvert(Samples.helmLonePhosphorus, converter(NOTATION.FASTA), Samples.fastaRna);
|
|
289
292
|
});
|
|
293
|
+
|
|
294
|
+
for (const sample of bilnToHelmTestsData) {
|
|
295
|
+
test(`testBilnToHelm_${sample.name}`, async () => {
|
|
296
|
+
await _testBilnToHelm(sample.biln, sample.helm, seqHelper);
|
|
297
|
+
});
|
|
298
|
+
}
|
|
299
|
+
for (const sample of helmToBilnTestsData) {
|
|
300
|
+
test(`testHelmToBiln_${sample.name}`, async () => {
|
|
301
|
+
await _testHelmToBiln(sample.helm, sample.biln, seqHelper);
|
|
302
|
+
});
|
|
303
|
+
}
|
|
290
304
|
});
|
|
@@ -10,6 +10,8 @@ import {ALIGNMENT, ALPHABET, NOTATION, TAGS as bioTAGS} from '@datagrok-librarie
|
|
|
10
10
|
import {ISeqHelper, getSeqHelper} from '@datagrok-libraries/bio/src/utils/seq-helper';
|
|
11
11
|
|
|
12
12
|
import {_testNeg, _testPos, DetectorTestData, DfReaderFunc, PosCol} from './utils/detectors-utils';
|
|
13
|
+
import { SeqTemps } from '@datagrok-libraries/bio/src/utils/macromolecule/seq-handler';
|
|
14
|
+
import { _testBilnDetection, detectorTestsDataForBiln } from './biln-tests';
|
|
13
15
|
|
|
14
16
|
/*
|
|
15
17
|
// snippet to list df columns of semType='Macromolecule' (false positive)
|
|
@@ -418,6 +420,11 @@ MWRSWY-CKHPMWRSWY-CKHP`;
|
|
|
418
420
|
}, seqHelper);
|
|
419
421
|
});
|
|
420
422
|
|
|
423
|
+
for (const bilnT of detectorTestsDataForBiln) {
|
|
424
|
+
test(bilnT.name, async () => {
|
|
425
|
+
await _testBilnDetection(bilnT.seqs, seqHelper, bilnT.negative);
|
|
426
|
+
});
|
|
427
|
+
}
|
|
421
428
|
// test('samplesFastaFasta', async () => {
|
|
422
429
|
// await _testDf(readSamples(Samples.fastaFasta), {
|
|
423
430
|
// 'sequence': new PosCol(NOTATION.FASTA, ALIGNMENT.SEQ, ALPHABET.PT, 20, false),
|
|
@@ -0,0 +1,69 @@
|
|
|
1
|
+
/* eslint-disable max-len */
|
|
2
|
+
/* eslint-disable max-len */
|
|
3
|
+
import * as grok from 'datagrok-api/grok';
|
|
4
|
+
import * as ui from 'datagrok-api/ui';
|
|
5
|
+
import * as DG from 'datagrok-api/dg';
|
|
6
|
+
|
|
7
|
+
/* eslint-disable max-len */
|
|
8
|
+
import {ISeqHelper} from '@datagrok-libraries/bio/src/utils/seq-helper';
|
|
9
|
+
import {INotationProvider, SplitterFunc} from '@datagrok-libraries/bio/src/utils/macromolecule/types';
|
|
10
|
+
import {NOTATION} from '@datagrok-libraries/bio/src/utils/macromolecule/consts';
|
|
11
|
+
import {CellRendererBackBase} from '@datagrok-libraries/bio/src/utils/cell-renderer-back-base';
|
|
12
|
+
import {MonomerPlacer} from '@datagrok-libraries/bio/src/utils/cell-renderer-monomer-placer';
|
|
13
|
+
import {monomerToShort, splitterAsBiln} from '@datagrok-libraries/bio/src/utils/macromolecule/utils';
|
|
14
|
+
import {_package} from '../package';
|
|
15
|
+
/* eslint-enable max-len */
|
|
16
|
+
|
|
17
|
+
export class BilnNotationProvider implements INotationProvider {
|
|
18
|
+
public readonly splitter: SplitterFunc;
|
|
19
|
+
|
|
20
|
+
get defaultGapOriginal(): string { return ''; }
|
|
21
|
+
|
|
22
|
+
constructor(
|
|
23
|
+
public readonly separator: string,
|
|
24
|
+
public readonly seqHelper: ISeqHelper,
|
|
25
|
+
public readonly seqCol: DG.Column
|
|
26
|
+
) {
|
|
27
|
+
this.splitter = splitterAsBiln.bind(this);
|
|
28
|
+
}
|
|
29
|
+
|
|
30
|
+
setUnits(): void {}
|
|
31
|
+
|
|
32
|
+
public getHelm(seq: string, _options?: any): string {
|
|
33
|
+
// return resPseudoHelm;
|
|
34
|
+
// generate helm from biln
|
|
35
|
+
const seqSplitted = this.splitter(seq);
|
|
36
|
+
const sh = this.seqHelper.getSeqHandler(this.seqCol);
|
|
37
|
+
return sh.getJoiner({notation: NOTATION.HELM})(seqSplitted);
|
|
38
|
+
}
|
|
39
|
+
|
|
40
|
+
public createCellRendererBack(gridCol: DG.GridColumn | null, tableCol: DG.Column<string>):
|
|
41
|
+
CellRendererBackBase<string> {
|
|
42
|
+
const maxLengthOfMonomer = _package.properties.maxMonomerLength || 4;
|
|
43
|
+
// (_package.bioProperties ? _package.bioProperties.maxMonomerLength : 4) ?? 50;
|
|
44
|
+
const back = new BilnCellRendererBack(gridCol, tableCol,
|
|
45
|
+
maxLengthOfMonomer, this.seqHelper);
|
|
46
|
+
|
|
47
|
+
back.init().then(() => {});
|
|
48
|
+
return back;
|
|
49
|
+
}
|
|
50
|
+
}
|
|
51
|
+
|
|
52
|
+
export class BilnCellRendererBack extends MonomerPlacer {
|
|
53
|
+
constructor(
|
|
54
|
+
gridCol: DG.GridColumn | null, tableCol: DG.Column,
|
|
55
|
+
maxLengthOfMonomer: number, seqHelper: ISeqHelper
|
|
56
|
+
) {
|
|
57
|
+
super(gridCol, tableCol, _package.logger, maxLengthOfMonomer, () => {
|
|
58
|
+
const sh = seqHelper.getSeqHandler(tableCol);
|
|
59
|
+
const {font, fontWidth} = MonomerPlacer.getFontSettings(tableCol);
|
|
60
|
+
return {
|
|
61
|
+
seqHandler: sh,
|
|
62
|
+
font: font,
|
|
63
|
+
fontCharWidth: fontWidth,
|
|
64
|
+
separatorWidth: 0,
|
|
65
|
+
monomerToShort: monomerToShort,
|
|
66
|
+
};
|
|
67
|
+
});
|
|
68
|
+
}
|
|
69
|
+
}
|
|
@@ -81,10 +81,9 @@ export class MacromoleculeSequenceCellRenderer extends DG.GridCellRenderer {
|
|
|
81
81
|
|
|
82
82
|
getRendererBack(gridCell: DG.GridCell): CellRendererBackBase<string> | null {
|
|
83
83
|
const [gridCol, tableCol, _temp] = getGridCellColTemp<string, any>(gridCell);
|
|
84
|
-
if (
|
|
84
|
+
if (_temp.rendererBack)
|
|
85
85
|
return _temp.rendererBack;
|
|
86
86
|
let back: CellRendererBackBase<string> | null = null;
|
|
87
|
-
|
|
88
87
|
if (this.seqHelper) {
|
|
89
88
|
const sh = this.seqHelper.getSeqHandler(tableCol);
|
|
90
89
|
back = sh.getRendererBack(gridCol, tableCol);
|
|
@@ -96,10 +95,8 @@ export class MacromoleculeSequenceCellRenderer extends DG.GridCellRenderer {
|
|
|
96
95
|
const colTemp: TempType = gridCell.cell.column.temp;
|
|
97
96
|
colTemp[tempTAGS.currentWord] = gridCell.cell.value;
|
|
98
97
|
gridCell.grid.invalidate();
|
|
99
|
-
|
|
100
|
-
|
|
101
|
-
back?.onClick(gridCell, _e);
|
|
102
|
-
}
|
|
98
|
+
const back = this.getRendererBack(gridCell);
|
|
99
|
+
back?.onClick(gridCell, _e);
|
|
103
100
|
}
|
|
104
101
|
|
|
105
102
|
override onMouseEnter(gridCell: DG.GridCell, e: MouseEvent) {
|
|
@@ -195,12 +192,11 @@ export class MacromoleculeSequenceCellRenderer extends DG.GridCellRenderer {
|
|
|
195
192
|
}
|
|
196
193
|
|
|
197
194
|
override render(g: CanvasRenderingContext2D, x: number, y: number, w: number, h: number, gridCell: DG.GridCell, cellStyle: DG.GridCellStyle): void {
|
|
198
|
-
|
|
199
|
-
|
|
195
|
+
const back = this.getRendererBack(gridCell);
|
|
196
|
+
if (back)
|
|
200
197
|
back?.render(g, x, y, w, h, gridCell, cellStyle);
|
|
201
|
-
|
|
202
|
-
|
|
203
|
-
this.renderInt(g, x, y, w, h, gridCell, cellStyle);
|
|
198
|
+
else
|
|
199
|
+
this.renderInt(g, x, y, w, h, gridCell, cellStyle);
|
|
204
200
|
}
|
|
205
201
|
}
|
|
206
202
|
|
package/src/utils/convert.ts
CHANGED
|
@@ -39,6 +39,7 @@ export function convert(col: DG.Column<string> | undefined, seqHelper: ISeqHelpe
|
|
|
39
39
|
NOTATION.FASTA,
|
|
40
40
|
NOTATION.SEPARATOR,
|
|
41
41
|
NOTATION.HELM,
|
|
42
|
+
NOTATION.BILN
|
|
42
43
|
];
|
|
43
44
|
const toggleColumn = (newCol: DG.Column) => {
|
|
44
45
|
srcCol = newCol;
|
|
@@ -47,7 +48,7 @@ export function convert(col: DG.Column<string> | undefined, seqHelper: ISeqHelpe
|
|
|
47
48
|
if (currentNotation === NOTATION.HELM)
|
|
48
49
|
separatorInput.value = '/'; // helm monomers can have - in the name like D-aThr;
|
|
49
50
|
dialogHeader.textContent = 'Current notation: ' + currentNotation;
|
|
50
|
-
filteredNotations = notations
|
|
51
|
+
filteredNotations = notations;//.filter((e) => e !== currentNotation); TEMPORARY DO NOT FORGET TO UNCOMMENT
|
|
51
52
|
targetNotationInput = ui.input.choice('Convert to', {
|
|
52
53
|
value: filteredNotations[0], items: filteredNotations,
|
|
53
54
|
onValueChanged: toggleSeparator
|
|
@@ -70,7 +71,7 @@ export function convert(col: DG.Column<string> | undefined, seqHelper: ISeqHelpe
|
|
|
70
71
|
});
|
|
71
72
|
|
|
72
73
|
const separatorArray = ['-', '.', '/'];
|
|
73
|
-
let filteredNotations = notations
|
|
74
|
+
let filteredNotations = notations;//.filter((e) => e !== currentNotation); // TEMPORARY DO NOT FORGET TO UNCOMMENT
|
|
74
75
|
|
|
75
76
|
const separatorInput = ui.input.choice('Separator', {value: separatorArray[0], items: separatorArray});
|
|
76
77
|
|