@datagrok/bio 2.22.4 → 2.22.6
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/455.js +1 -1
- package/dist/455.js.map +1 -1
- package/dist/package-test.js +2 -2
- package/dist/package-test.js.map +1 -1
- package/dist/package.js +3 -3
- package/dist/package.js.map +1 -1
- package/files/tests/to-atomic-level-dna-fasta-output.csv +12765 -12725
- package/files/tests/to-atomic-level-msa-fasta-output.csv +1904 -1868
- package/files/tests/to-atomic-level-msa-separator-output.csv +3167 -3123
- package/files/tests/to-atomic-level-peptides-fasta-output.csv +30795 -30539
- package/files/tests/to-atomic-level-pt-fasta-2.mol +34 -31
- package/package.json +3 -3
- package/src/tests/to-atomic-level-tests.ts +4 -0
- package/src/utils/monomer-cell-renderer.ts +52 -27
- package/test-console-output-1.log +360 -356
- package/test-record-1.mp4 +0 -0
|
@@ -1,45 +1,48 @@
|
|
|
1
1
|
|
|
2
|
-
|
|
2
|
+
Actelion Java MolfileCreator 2.0
|
|
3
3
|
|
|
4
|
-
0 0 0 0 0 0
|
|
4
|
+
0 0 0 0 0 0 0 V3000
|
|
5
5
|
M V30 BEGIN CTAB
|
|
6
6
|
M V30 COUNTS 17 16 0 0 0
|
|
7
7
|
M V30 BEGIN ATOM
|
|
8
|
-
M V30 1 C 1.2991 -2.25
|
|
9
|
-
M V30 2 C 1.2991 -0.75
|
|
10
|
-
M V30 3 N 0 0
|
|
11
|
-
M V30 4 C 2.5981 0
|
|
12
|
-
M V30 5 O 3.8971 -0.75
|
|
13
|
-
M V30 6 N 7.7941 9
|
|
14
|
-
M V30 7 C 6.4951 8.2501
|
|
15
|
-
M V30 8 N 5.196 9
|
|
16
|
-
M V30 9 N 6.4951 6.7501
|
|
17
|
-
M V30 10 C 5.1961 6
|
|
18
|
-
M V30 11 C 5.1961 4.5
|
|
19
|
-
M V30 12 C 3.8971 3.75
|
|
20
|
-
M V30 13 C 3.8971 2.25
|
|
21
|
-
M V30 14 N 2.5981 1.5
|
|
22
|
-
M V30 15 C 5.1962 1.5
|
|
23
|
-
M V30 16 O 6.4952 2.25
|
|
24
|
-
M V30 17 O 5.1962 -0.0001 0
|
|
8
|
+
M V30 1 C 1.2991 -2.25 0 0
|
|
9
|
+
M V30 2 C 1.2991 -0.75 0 0 CFG=2
|
|
10
|
+
M V30 3 N 0 0 0 0
|
|
11
|
+
M V30 4 C 2.5981 0 0 0
|
|
12
|
+
M V30 5 O 3.8971 -0.75 0 0
|
|
13
|
+
M V30 6 N 7.7941 9 0 0
|
|
14
|
+
M V30 7 C 6.4951 8.2501 0 0
|
|
15
|
+
M V30 8 N 5.196 9 0 0
|
|
16
|
+
M V30 9 N 6.4951 6.7501 0 0
|
|
17
|
+
M V30 10 C 5.1961 6 0 0
|
|
18
|
+
M V30 11 C 5.1961 4.5 0 0
|
|
19
|
+
M V30 12 C 3.8971 3.75 0 0
|
|
20
|
+
M V30 13 C 3.8971 2.25 0 0 CFG=2
|
|
21
|
+
M V30 14 N 2.5981 1.5 0 0
|
|
22
|
+
M V30 15 C 5.1962 1.5 0 0
|
|
23
|
+
M V30 16 O 6.4952 2.25 0 0
|
|
24
|
+
M V30 17 O 5.1962 -0.0001 0 0
|
|
25
25
|
M V30 END ATOM
|
|
26
26
|
M V30 BEGIN BOND
|
|
27
|
-
M V30 1 1 2 1 CFG=3
|
|
28
|
-
M V30 2 1 2 3
|
|
27
|
+
M V30 1 1 2 1 CFG=3
|
|
28
|
+
M V30 2 1 2 3
|
|
29
29
|
M V30 3 1 2 4
|
|
30
|
-
M V30 4 2 4 5
|
|
30
|
+
M V30 4 2 4 5
|
|
31
31
|
M V30 5 1 4 14
|
|
32
|
-
M V30 6 2 6 7
|
|
33
|
-
M V30 7 1 7 8
|
|
34
|
-
M V30 8 1 7 9
|
|
35
|
-
M V30 9 1 9 10
|
|
36
|
-
M V30 10 1 10 11
|
|
37
|
-
M V30 11 1 11 12
|
|
38
|
-
M V30 12 1 13 12 CFG=1
|
|
39
|
-
M V30 13 1 13 14
|
|
32
|
+
M V30 6 2 6 7
|
|
33
|
+
M V30 7 1 7 8
|
|
34
|
+
M V30 8 1 7 9
|
|
35
|
+
M V30 9 1 9 10
|
|
36
|
+
M V30 10 1 10 11
|
|
37
|
+
M V30 11 1 11 12
|
|
38
|
+
M V30 12 1 13 12 CFG=1
|
|
39
|
+
M V30 13 1 13 14
|
|
40
40
|
M V30 14 1 13 15
|
|
41
|
-
M V30 15 2 15 16
|
|
41
|
+
M V30 15 2 15 16
|
|
42
42
|
M V30 16 1 15 17
|
|
43
43
|
M V30 END BOND
|
|
44
|
+
M V30 BEGIN COLLECTION
|
|
45
|
+
M V30 MDLV30/STEABS ATOMS=(2 2 13)
|
|
46
|
+
M V30 END COLLECTION
|
|
44
47
|
M V30 END CTAB
|
|
45
48
|
M END
|
package/package.json
CHANGED
|
@@ -5,7 +5,7 @@
|
|
|
5
5
|
"name": "Davit Rizhinashvili",
|
|
6
6
|
"email": "drizhinashvili@datagrok.ai"
|
|
7
7
|
},
|
|
8
|
-
"version": "2.22.
|
|
8
|
+
"version": "2.22.6",
|
|
9
9
|
"description": "Bioinformatics support (import/export of sequences, conversion, visualization, analysis). [See more](https://github.com/datagrok-ai/public/blob/master/packages/Bio/README.md) for details.",
|
|
10
10
|
"repository": {
|
|
11
11
|
"type": "git",
|
|
@@ -44,7 +44,7 @@
|
|
|
44
44
|
],
|
|
45
45
|
"dependencies": {
|
|
46
46
|
"@biowasm/aioli": "^3.1.0",
|
|
47
|
-
"@datagrok-libraries/bio": "^5.54.
|
|
47
|
+
"@datagrok-libraries/bio": "^5.54.6",
|
|
48
48
|
"@datagrok-libraries/chem-meta": "^1.2.7",
|
|
49
49
|
"@datagrok-libraries/math": "^1.2.4",
|
|
50
50
|
"@datagrok-libraries/ml": "^6.10.4",
|
|
@@ -73,7 +73,7 @@
|
|
|
73
73
|
"@types/wu": "^2.1.44",
|
|
74
74
|
"@typescript-eslint/eslint-plugin": "^8.8.1",
|
|
75
75
|
"@typescript-eslint/parser": "^8.8.1",
|
|
76
|
-
"datagrok-tools": "^4.14.
|
|
76
|
+
"datagrok-tools": "^4.14.40",
|
|
77
77
|
"eslint": "^8.57.1",
|
|
78
78
|
"eslint-config-google": "^0.14.0",
|
|
79
79
|
"eslint-plugin-rxjs": "^5.0.3",
|
|
@@ -78,6 +78,7 @@ category('toAtomicLevel', async () => {
|
|
|
78
78
|
const inputPath = testData.inPath;
|
|
79
79
|
|
|
80
80
|
sourceDf[testName] = DG.DataFrame.fromCsv((await fileSource.readAsText(testData.inPath)).replace(/\n$/, ''));
|
|
81
|
+
sourceDf[testName].name = testData.inPath.split('/').pop()!;
|
|
81
82
|
await grok.data.detectSemanticTypes(sourceDf[testName]);
|
|
82
83
|
targetDf[testName] = DG.DataFrame.fromCsv((await fileSource.readAsText(testData.outPath)).replace(/\n$/, ''));
|
|
83
84
|
}
|
|
@@ -93,6 +94,7 @@ category('toAtomicLevel', async () => {
|
|
|
93
94
|
// await toAtomicLevel(source, inputCol, false);
|
|
94
95
|
await grok.functions.call('Bio:toAtomicLevel', {table: source, seqCol: inputCol, nonlinear: false});
|
|
95
96
|
const obtainedCol = source.getCol(outputColName);
|
|
97
|
+
// DG.Utils.download(source.name.endsWith('.csv') ? source.name : source.name + '.csv', source.toCsv());
|
|
96
98
|
const expectedCol = target.getCol(outputColName);
|
|
97
99
|
const obtainedArray: string[] = wu(obtainedCol.values()).map((mol) => polishMolfile(mol)).toArray();
|
|
98
100
|
const expectedArray: string[] = wu(expectedCol.values()).map((mol) => polishMolfile(mol)).toArray();
|
|
@@ -218,6 +220,8 @@ PEPTIDE1{Lys_Boc.hHis.Aca.Cys_SEt.T.dK.Thr_PO3H2.Aca.Tyr_PO3H2.Thr_PO3H2.Aca.Tyr
|
|
|
218
220
|
seqCol.setTag(bioTAGS.alphabet, ALPHABET.PT);
|
|
219
221
|
const sh = seqHelper.getSeqHandler(seqCol);
|
|
220
222
|
const resCol = (await _testToAtomicLevel(srcDf, 'seq', monomerLibHelper))!;
|
|
223
|
+
// DG.Utils.download('molfile.mol', polishMolfile(resCol.get(0)));
|
|
224
|
+
|
|
221
225
|
expect(polishMolfile(resCol.get(0)), polishMolfile(tgtMol));
|
|
222
226
|
});
|
|
223
227
|
|
|
@@ -3,7 +3,7 @@ import * as DG from 'datagrok-api/dg';
|
|
|
3
3
|
import {GridCell} from 'datagrok-api/dg';
|
|
4
4
|
import * as ui from 'datagrok-api/ui';
|
|
5
5
|
import {ALPHABET, monomerToShort} from '@datagrok-libraries/bio/src/utils/macromolecule';
|
|
6
|
-
import {GAP_SYMBOL, TAGS as bioTAGS,} from '@datagrok-libraries/bio/src/utils/macromolecule/consts';
|
|
6
|
+
import {GAP_SYMBOL, MONOMER_MOTIF_SPLITTER, TAGS as bioTAGS,} from '@datagrok-libraries/bio/src/utils/macromolecule/consts';
|
|
7
7
|
import {MONOMER_RENDERER_TAGS} from '@datagrok-libraries/bio/src/utils/cell-renderer';
|
|
8
8
|
import {getGridCellColTemp} from '@datagrok-libraries/bio/src/utils/cell-renderer-back-base';
|
|
9
9
|
|
|
@@ -16,6 +16,8 @@ const Tags = new class {
|
|
|
16
16
|
tooltipHandlerTemp = 'tooltip-handler.Monomer';
|
|
17
17
|
}();
|
|
18
18
|
|
|
19
|
+
const DASH_GAP_SYMBOL = '-';
|
|
20
|
+
|
|
19
21
|
export class MonomerCellRendererBack extends CellRendererWithMonomerLibBackBase {
|
|
20
22
|
constructor(gridCol: DG.GridColumn | null, tableCol: DG.Column) {
|
|
21
23
|
super(gridCol, tableCol);
|
|
@@ -25,39 +27,51 @@ export class MonomerCellRendererBack extends CellRendererWithMonomerLibBackBase
|
|
|
25
27
|
x: number, y: number, w: number, h: number, gridCell: DG.GridCell, cellStyle: DG.GridCellStyle
|
|
26
28
|
): void {
|
|
27
29
|
g.save();
|
|
30
|
+
// clip the cell
|
|
31
|
+
g.beginPath();
|
|
32
|
+
g.rect(x, y, w, h);
|
|
33
|
+
g.clip();
|
|
28
34
|
try {
|
|
29
35
|
if (!gridCell.isTableCell) return;
|
|
30
36
|
const applyToBackground = gridCell.cell?.column && gridCell.cell.column?.dart && gridCell.cell.column.getTag(MONOMER_RENDERER_TAGS.applyToBackground) === 'true';
|
|
31
37
|
|
|
32
38
|
g.font = `12px monospace`;
|
|
33
39
|
g.textBaseline = 'middle';
|
|
34
|
-
g.textAlign = '
|
|
35
|
-
|
|
36
|
-
const symbol: string = gridCell.cell.value;
|
|
37
|
-
if (!symbol || symbol == GAP_SYMBOL) return;
|
|
38
|
-
|
|
39
|
-
let textcolor = undefinedColor;
|
|
40
|
-
let backgroundcolor = 'rgb(255, 255, 255)';
|
|
41
|
-
if (this.monomerLib) {
|
|
42
|
-
const alphabet = this.tableCol.getTag(bioTAGS.alphabet);
|
|
43
|
-
const biotype = alphabet === ALPHABET.RNA || alphabet === ALPHABET.DNA ? HelmTypes.NUCLEOTIDE : HelmTypes.AA;
|
|
44
|
-
if (applyToBackground) {
|
|
45
|
-
const colors = this.monomerLib.getMonomerColors(biotype, symbol);
|
|
46
|
-
textcolor = colors?.textcolor ?? textcolor;
|
|
47
|
-
backgroundcolor = colors?.backgroundcolor ?? backgroundcolor;
|
|
48
|
-
} else
|
|
49
|
-
textcolor = this.monomerLib.getMonomerTextColor(biotype, symbol);
|
|
50
|
-
}
|
|
40
|
+
g.textAlign = 'left';
|
|
51
41
|
|
|
42
|
+
let value: string = gridCell.cell.value;
|
|
43
|
+
if (!value || value === GAP_SYMBOL)
|
|
44
|
+
value = DASH_GAP_SYMBOL;
|
|
45
|
+
const symbols = value.split(MONOMER_MOTIF_SPLITTER).map((s) => !s || s === GAP_SYMBOL ? DASH_GAP_SYMBOL : s.trim());
|
|
52
46
|
//cell width of monomer should dictate how many characters can be displayed
|
|
53
47
|
// for width 40, 6 characters can be displayed (0.15 is 6 / 40)
|
|
54
|
-
const
|
|
55
|
-
|
|
56
|
-
|
|
57
|
-
|
|
48
|
+
const shortSymbols = symbols.map((s) => monomerToShort(s, Math.max(2, Math.floor(w * 0.15 / symbols.length))));
|
|
49
|
+
const symbolWidths = shortSymbols.map((s) => g.measureText(s).width);
|
|
50
|
+
const totalWidth = symbolWidths.reduce((a, b) => a + b, 0);
|
|
51
|
+
const xOffset = (w - totalWidth) / 2;
|
|
52
|
+
let xPos = x + xOffset;
|
|
53
|
+
const alphabet = this.tableCol.getTag(bioTAGS.alphabet);
|
|
54
|
+
const biotype = alphabet === ALPHABET.RNA || alphabet === ALPHABET.DNA ? HelmTypes.NUCLEOTIDE : HelmTypes.AA;
|
|
55
|
+
for (let i = 0; i < shortSymbols.length; i++) {
|
|
56
|
+
const symbol: string = symbols[i];
|
|
57
|
+
let textcolor = undefinedColor;
|
|
58
|
+
let backgroundcolor = 'rgb(255, 255, 255)';
|
|
59
|
+
if (this.monomerLib) {
|
|
60
|
+
if (applyToBackground) {
|
|
61
|
+
const colors = this.monomerLib.getMonomerColors(biotype, symbol);
|
|
62
|
+
textcolor = colors?.textcolor ?? textcolor;
|
|
63
|
+
backgroundcolor = colors?.backgroundcolor ?? backgroundcolor;
|
|
64
|
+
} else
|
|
65
|
+
textcolor = this.monomerLib.getMonomerTextColor(biotype, symbol);
|
|
66
|
+
}
|
|
67
|
+
if (applyToBackground && symbols.length == 1) {
|
|
68
|
+
g.fillStyle = backgroundcolor;
|
|
69
|
+
g.fillRect(x, y, w, h);
|
|
70
|
+
}
|
|
71
|
+
g.fillStyle = textcolor;
|
|
72
|
+
g.fillText(shortSymbols[i], xPos, y + (h / 2), w);
|
|
73
|
+
xPos += symbolWidths[i];
|
|
58
74
|
}
|
|
59
|
-
g.fillStyle = textcolor;
|
|
60
|
-
g.fillText(monomerToShort(symbol, maxChars), x + (w / 2), y + (h / 2), w);
|
|
61
75
|
} finally {
|
|
62
76
|
g.restore();
|
|
63
77
|
}
|
|
@@ -70,12 +84,12 @@ export class MonomerCellRendererBack extends CellRendererWithMonomerLibBackBase
|
|
|
70
84
|
) return false;
|
|
71
85
|
|
|
72
86
|
const alphabet = gridCell.tableColumn.getTag(bioTAGS.alphabet) as ALPHABET;
|
|
73
|
-
const monomerName = gridCell.cell.value;
|
|
87
|
+
const monomerName: string = gridCell.cell.value;
|
|
74
88
|
const canvasClientRect = gridCell.grid.canvas.getBoundingClientRect();
|
|
75
89
|
const x1 = gridCell.bounds.right + canvasClientRect.left - 4;
|
|
76
90
|
const y1 = gridCell.bounds.bottom + canvasClientRect.top - 4;
|
|
77
91
|
|
|
78
|
-
if (monomerName == GAP_SYMBOL) {
|
|
92
|
+
if (!monomerName || monomerName == GAP_SYMBOL || monomerName == DASH_GAP_SYMBOL) {
|
|
79
93
|
ui.tooltip.show(ui.divText('gap'), x1, y1);
|
|
80
94
|
return true;
|
|
81
95
|
}
|
|
@@ -86,7 +100,18 @@ export class MonomerCellRendererBack extends CellRendererWithMonomerLibBackBase
|
|
|
86
100
|
}
|
|
87
101
|
|
|
88
102
|
const biotype = alphabet === ALPHABET.RNA || alphabet === ALPHABET.DNA ? HelmTypes.NUCLEOTIDE : HelmTypes.AA;
|
|
89
|
-
const
|
|
103
|
+
const tooltipEls = monomerName.split(MONOMER_MOTIF_SPLITTER)
|
|
104
|
+
.map((s) => !s || s === GAP_SYMBOL || s === DASH_GAP_SYMBOL ? ui.divText('gap') : this.monomerLib!.getTooltip(biotype, s));
|
|
105
|
+
const tooltipEl = ui.divH(tooltipEls, {style: {alignItems: 'top'}});
|
|
106
|
+
// tooltip max width is 600px, so we need to shrink the canvases a bit if needed. by default, it is 250px
|
|
107
|
+
const canvases = Array.from(tooltipEl.querySelectorAll('canvas'));
|
|
108
|
+
if (canvases.length > 2) {
|
|
109
|
+
const side = Math.floor(550 / canvases.length);
|
|
110
|
+
canvases.forEach((c) => {
|
|
111
|
+
c.style.setProperty('max-width', `${side}px`, 'important');
|
|
112
|
+
c.style.setProperty('max-height', `${side}px`, 'important');
|
|
113
|
+
});
|
|
114
|
+
}
|
|
90
115
|
ui.tooltip.show(tooltipEl, x1, y1);
|
|
91
116
|
|
|
92
117
|
return true; // To prevent default tooltip behaviour
|