@datagrok/bio 2.27.1 → 2.27.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/422.js +1 -1
- package/dist/package-test.js +2 -2
- package/dist/package-test.js.map +1 -1
- package/dist/package.js +2 -2
- package/dist/package.js.map +1 -1
- package/package.json +3 -3
- package/src/package.g.ts +7 -0
- package/src/package.ts +10 -0
- package/src/utils/compare-sequences.ts +104 -0
- package/test-console-output-1.log +530 -552
- package/test-record-1.mp4 +0 -0
package/package.json
CHANGED
|
@@ -5,7 +5,7 @@
|
|
|
5
5
|
"name": "Davit Rizhinashvili",
|
|
6
6
|
"email": "drizhinashvili@datagrok.ai"
|
|
7
7
|
},
|
|
8
|
-
"version": "2.27.
|
|
8
|
+
"version": "2.27.2",
|
|
9
9
|
"description": "Bioinformatics support (import/export of sequences, conversion, visualization, analysis). [See more](https://github.com/datagrok-ai/public/blob/master/packages/Bio/README.md) for details.",
|
|
10
10
|
"repository": {
|
|
11
11
|
"type": "git",
|
|
@@ -44,7 +44,7 @@
|
|
|
44
44
|
],
|
|
45
45
|
"dependencies": {
|
|
46
46
|
"@biowasm/aioli": "^3.1.0",
|
|
47
|
-
"@datagrok-libraries/bio": "^5.63.
|
|
47
|
+
"@datagrok-libraries/bio": "^5.63.7",
|
|
48
48
|
"@datagrok-libraries/chem-meta": "^1.2.9",
|
|
49
49
|
"@datagrok-libraries/math": "^1.2.6",
|
|
50
50
|
"@datagrok-libraries/ml": "^6.10.11",
|
|
@@ -69,7 +69,7 @@
|
|
|
69
69
|
"@datagrok-libraries/helm-web-editor": "^1.1.16",
|
|
70
70
|
"@datagrok-libraries/js-draw-lite": "^0.0.10",
|
|
71
71
|
"@datagrok/chem": "^1.17.1",
|
|
72
|
-
"@datagrok/dendrogram": "^1.
|
|
72
|
+
"@datagrok/dendrogram": "^1.4.11",
|
|
73
73
|
"@datagrok/eda": "^1.4.13",
|
|
74
74
|
"@datagrok/helm": "^2.13.1",
|
|
75
75
|
"@datagrok/peptides": "^1.27.4",
|
package/src/package.g.ts
CHANGED
|
@@ -456,6 +456,13 @@ export async function pepseaMsa(sequenceCol: DG.Column<any>, method: string, gap
|
|
|
456
456
|
return await PackageFunctions.pepseaMsa(sequenceCol, method, gapOpen, gapExtend);
|
|
457
457
|
}
|
|
458
458
|
|
|
459
|
+
//name: Compare Sequences
|
|
460
|
+
//description: Builds a MacromoleculeDifference column from two sequence columns (seq1#seq2)
|
|
461
|
+
//top-menu: Bio | Analyze | Compare sequences...
|
|
462
|
+
export function compareSequences() : void {
|
|
463
|
+
PackageFunctions.compareSequences();
|
|
464
|
+
}
|
|
465
|
+
|
|
459
466
|
//name: Composition Analysis
|
|
460
467
|
//description: Visualizes sequence composition on a WebLogo plot
|
|
461
468
|
//output: viewer result
|
package/src/package.ts
CHANGED
|
@@ -44,6 +44,7 @@ import {SequenceSimilarityViewer} from './analysis/sequence-similarity-viewer';
|
|
|
44
44
|
import {SequenceDiversityViewer} from './analysis/sequence-diversity-viewer';
|
|
45
45
|
import {invalidateMols, MONOMERIC_COL_TAGS, SubstructureSearchDialog} from './substructure-search/substructure-search';
|
|
46
46
|
import {convert} from './utils/convert';
|
|
47
|
+
import {compareSequencesUI} from './utils/compare-sequences';
|
|
47
48
|
import {getMacromoleculeColumnPropertyPanel} from './widgets/representations';
|
|
48
49
|
import {getMonomerInfoWidget} from './widgets/monomer-info-widget';
|
|
49
50
|
import {saveAsFastaUI} from './utils/save-as-fasta';
|
|
@@ -992,6 +993,15 @@ export class PackageFunctions {
|
|
|
992
993
|
return alignWithPepsea(sequenceCol, method, gapOpen, gapExtend);
|
|
993
994
|
}
|
|
994
995
|
|
|
996
|
+
@grok.decorators.func({
|
|
997
|
+
name: 'Compare Sequences',
|
|
998
|
+
description: 'Builds a MacromoleculeDifference column from two sequence columns (seq1#seq2)',
|
|
999
|
+
'top-menu': 'Bio | Analyze | Compare sequences...',
|
|
1000
|
+
})
|
|
1001
|
+
static compareSequences(): void {
|
|
1002
|
+
compareSequencesUI();
|
|
1003
|
+
}
|
|
1004
|
+
|
|
995
1005
|
@grok.decorators.func({
|
|
996
1006
|
name: 'Composition Analysis',
|
|
997
1007
|
description: 'Visualizes sequence composition on a WebLogo plot',
|
|
@@ -0,0 +1,104 @@
|
|
|
1
|
+
import * as grok from 'datagrok-api/grok';
|
|
2
|
+
import * as ui from 'datagrok-api/ui';
|
|
3
|
+
import * as DG from 'datagrok-api/dg';
|
|
4
|
+
|
|
5
|
+
import {TAGS as bioTAGS} from '@datagrok-libraries/bio/src/utils/macromolecule';
|
|
6
|
+
import {SeqTemps} from '@datagrok-libraries/bio/src/utils/macromolecule/seq-handler';
|
|
7
|
+
|
|
8
|
+
import * as C from './constants';
|
|
9
|
+
import {getMacromoleculeColumns} from './ui-utils';
|
|
10
|
+
|
|
11
|
+
/** Builds a `MacromoleculeDifference` column from two sequence columns by encoding each row as
|
|
12
|
+
* `seq1#seq2`. The result copies the relevant metadata (notation, separator, notation provider)
|
|
13
|
+
* from `seqCol1` — callers are responsible for ensuring the two input columns are compatible. */
|
|
14
|
+
export function compareSequencesDo(
|
|
15
|
+
seqCol1: DG.Column<string>, seqCol2: DG.Column<string>, resultName?: string,
|
|
16
|
+
): DG.Column<string> {
|
|
17
|
+
const rowCount = Math.min(seqCol1.length, seqCol2.length);
|
|
18
|
+
const values: string[] = new Array(rowCount);
|
|
19
|
+
for (let i = 0; i < rowCount; i++) {
|
|
20
|
+
const v1 = seqCol1.get(i) ?? '';
|
|
21
|
+
const v2 = seqCol2.get(i) ?? '';
|
|
22
|
+
values[i] = `${v1}#${v2}`;
|
|
23
|
+
}
|
|
24
|
+
|
|
25
|
+
const name = resultName ?? `${seqCol1.name} vs ${seqCol2.name}`;
|
|
26
|
+
const diffCol = DG.Column.fromStrings(name, values);
|
|
27
|
+
diffCol.semType = C.SEM_TYPES.MACROMOLECULE_DIFFERENCE;
|
|
28
|
+
|
|
29
|
+
const sep = seqCol1.getTag(bioTAGS.separator);
|
|
30
|
+
if (sep != null) diffCol.tags[bioTAGS.separator] = sep;
|
|
31
|
+
const units = seqCol1.getTag(DG.TAGS.UNITS);
|
|
32
|
+
if (units != null) diffCol.tags[DG.TAGS.UNITS] = units;
|
|
33
|
+
const aligned = seqCol1.getTag(bioTAGS.aligned);
|
|
34
|
+
if (aligned != null) diffCol.tags[bioTAGS.aligned] = aligned;
|
|
35
|
+
const alphabet = seqCol1.getTag(bioTAGS.alphabet);
|
|
36
|
+
if (alphabet != null) diffCol.tags[bioTAGS.alphabet] = alphabet;
|
|
37
|
+
|
|
38
|
+
diffCol.tags[DG.TAGS.CELL_RENDERER] = C.SEM_TYPES.MACROMOLECULE_DIFFERENCE;
|
|
39
|
+
diffCol.temp[SeqTemps.notationProvider] = seqCol1.temp[SeqTemps.notationProvider];
|
|
40
|
+
|
|
41
|
+
return diffCol;
|
|
42
|
+
}
|
|
43
|
+
|
|
44
|
+
/** Validates that two macromolecule columns are compatible for pair-wise difference rendering:
|
|
45
|
+
* identical notation/alignment/alphabet and (for separator notation) matching separator. */
|
|
46
|
+
function checkColumnsCompatible(c1: DG.Column, c2: DG.Column): string | null {
|
|
47
|
+
if (c1 === c2) return 'Please choose two distinct columns';
|
|
48
|
+
if (c1.semType !== DG.SEMTYPE.MACROMOLECULE || c2.semType !== DG.SEMTYPE.MACROMOLECULE)
|
|
49
|
+
return 'Both columns must be Macromolecule semantic type';
|
|
50
|
+
if (c1.getTag(DG.TAGS.UNITS) !== c2.getTag(DG.TAGS.UNITS))
|
|
51
|
+
return 'Columns must use the same notation (units)';
|
|
52
|
+
if ((c1.getTag(bioTAGS.separator) ?? '') !== (c2.getTag(bioTAGS.separator) ?? ''))
|
|
53
|
+
return 'Columns must use the same separator';
|
|
54
|
+
if ((c1.getTag(bioTAGS.alphabet) ?? '') !== (c2.getTag(bioTAGS.alphabet) ?? ''))
|
|
55
|
+
return 'Columns must use the same alphabet';
|
|
56
|
+
return null;
|
|
57
|
+
}
|
|
58
|
+
|
|
59
|
+
/** Top-menu entry point for `Bio | Analyze | Compare sequences`. Opens a simple dialog to pick
|
|
60
|
+
* two Macromolecule columns from the current table and appends a `MacromoleculeDifference`
|
|
61
|
+
* column to the dataframe. */
|
|
62
|
+
export function compareSequencesUI(): void {
|
|
63
|
+
const tv = grok.shell.tv;
|
|
64
|
+
if (!tv || !tv.dataFrame) {
|
|
65
|
+
grok.shell.error('No active table');
|
|
66
|
+
return;
|
|
67
|
+
}
|
|
68
|
+
const df = tv.dataFrame;
|
|
69
|
+
const cols: DG.Column[] = getMacromoleculeColumns();
|
|
70
|
+
if (cols.length < 2) {
|
|
71
|
+
grok.shell.error('Current table needs at least two Macromolecule columns');
|
|
72
|
+
return;
|
|
73
|
+
}
|
|
74
|
+
|
|
75
|
+
const names = cols.map((c) => c.name);
|
|
76
|
+
const col1Input = ui.input.choice('Sequence column 1', {value: names[0], items: names});
|
|
77
|
+
const col2Input = ui.input.choice('Sequence column 2', {value: names[1], items: names});
|
|
78
|
+
const resultNameInput = ui.input.string('Result column name', {value: ''});
|
|
79
|
+
resultNameInput.setTooltip('Leave empty to auto-generate from the chosen columns');
|
|
80
|
+
|
|
81
|
+
ui.dialog({title: 'Compare Sequences'})
|
|
82
|
+
.add(col1Input)
|
|
83
|
+
.add(col2Input)
|
|
84
|
+
.add(resultNameInput)
|
|
85
|
+
.onOK(() => {
|
|
86
|
+
const c1 = df.col(col1Input.value ?? '');
|
|
87
|
+
const c2 = df.col(col2Input.value ?? '');
|
|
88
|
+
if (!c1 || !c2) {
|
|
89
|
+
grok.shell.error('Could not resolve chosen columns');
|
|
90
|
+
return;
|
|
91
|
+
}
|
|
92
|
+
const err = checkColumnsCompatible(c1, c2);
|
|
93
|
+
if (err != null) {
|
|
94
|
+
grok.shell.error(err);
|
|
95
|
+
return;
|
|
96
|
+
}
|
|
97
|
+
const desired = (resultNameInput.value ?? '').trim() || `${c1.name} vs ${c2.name}`;
|
|
98
|
+
const name = df.columns.getUnusedName(desired);
|
|
99
|
+
const diffCol = compareSequencesDo(
|
|
100
|
+
c1 as DG.Column<string>, c2 as DG.Column<string>, name);
|
|
101
|
+
df.columns.add(diffCol);
|
|
102
|
+
})
|
|
103
|
+
.show();
|
|
104
|
+
}
|