@datagrok/bio 1.5.1 → 1.5.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/package-test.js +230 -92
- package/dist/package.js +206 -74
- package/files/samples/id.csv +313 -0
- package/files/samples/sample_HELM.csv +540 -540
- package/files/samples/sample_MSA.csv +540 -540
- package/files/samples/sar-small.csv +201 -0
- package/package.json +1 -1
- package/setup.cmd +10 -1
- package/src/const.ts +0 -1
- package/src/package.ts +16 -14
- package/src/tests/convert-test.ts +31 -0
- package/src/tests/detectors-test.ts +26 -15
- package/src/utils/convert.ts +15 -3
- package/src/utils/multiple-sequence-alignment.ts +13 -8
- package/src/utils/notation-converter.ts +131 -0
- package/src/utils/sequence-activity-cliffs.ts +8 -2
- package/src/utils/sequence-space.ts +4 -4
- package/src/utils/split-to-monomers.ts +8 -0
|
@@ -0,0 +1,131 @@
|
|
|
1
|
+
import * as DG from 'datagrok-api/dg';
|
|
2
|
+
|
|
3
|
+
// export const enum NOTATION {
|
|
4
|
+
// // these values can be changed to "user-friendly" ones later on
|
|
5
|
+
// FASTA = 'fasta',
|
|
6
|
+
// SEPARATOR = 'separator',
|
|
7
|
+
// HELM = 'helm'
|
|
8
|
+
// }
|
|
9
|
+
|
|
10
|
+
export class NotationConverter {
|
|
11
|
+
private _sourceColumn: DG.Column; // the column to be converted
|
|
12
|
+
private _currentUnits: string; // units of the form fasta:SEQ:NT, etc.
|
|
13
|
+
private _sourceNotation: string; // current notation (without :SEQ:NT, etc.)
|
|
14
|
+
private _targetNotation: string;
|
|
15
|
+
|
|
16
|
+
private get sourceColumn(): DG.Column { return this._sourceColumn; }
|
|
17
|
+
private get currentUnits(): string { return this._currentUnits; }
|
|
18
|
+
private get sourceNotation(): string { return this._sourceNotation; }
|
|
19
|
+
private get targetNotation(): string { return this._targetNotation; }
|
|
20
|
+
|
|
21
|
+
// these values can be changed to "user-friendly" ones later on
|
|
22
|
+
private _fasta = 'fasta';
|
|
23
|
+
private _separator = 'separator';
|
|
24
|
+
private _helm = 'helm';
|
|
25
|
+
|
|
26
|
+
public isFasta(): boolean { return this.sourceNotation == this._fasta; }
|
|
27
|
+
public isSeparator(): boolean { return this.sourceNotation == this._separator; }
|
|
28
|
+
public isHelm(): boolean { return this.sourceNotation == this._helm; }
|
|
29
|
+
|
|
30
|
+
private determineSourceNotation() : string {
|
|
31
|
+
if (this.currentUnits.toLowerCase().startsWith('fasta'))
|
|
32
|
+
return 'fasta';
|
|
33
|
+
else if (this.currentUnits.toLowerCase().startsWith('separator'))
|
|
34
|
+
return 'separator';
|
|
35
|
+
else
|
|
36
|
+
// TODO: handle possible exceptions
|
|
37
|
+
return 'HELM';
|
|
38
|
+
}
|
|
39
|
+
|
|
40
|
+
private convertFastaToSeparator(): DG.Column {
|
|
41
|
+
// TODO: implementation
|
|
42
|
+
const len = this.sourceColumn.length;
|
|
43
|
+
const newColName = 'converted';
|
|
44
|
+
const newColumn = DG.Column.fromList('string', newColName, new Array(len).fill('fasta2sep'));
|
|
45
|
+
newColumn.semType = 'Macromolecule';
|
|
46
|
+
return newColumn;
|
|
47
|
+
}
|
|
48
|
+
|
|
49
|
+
private convertFastaToHelm(): DG.Column {
|
|
50
|
+
// TODO: implementation
|
|
51
|
+
const len = this.sourceColumn.length;
|
|
52
|
+
const newColName = 'converted';
|
|
53
|
+
const newColumn = DG.Column.fromList('string', newColName, new Array(len).fill('fasta2helm'));
|
|
54
|
+
newColumn.semType = 'Macromolecule';
|
|
55
|
+
return newColumn;
|
|
56
|
+
}
|
|
57
|
+
|
|
58
|
+
private convertSeparatorToFasta(): DG.Column {
|
|
59
|
+
// TODO: implementation
|
|
60
|
+
const len = this.sourceColumn.length;
|
|
61
|
+
const newColName = 'converted';
|
|
62
|
+
const newColumn = DG.Column.fromList('string', newColName, new Array(len).fill('sep2fasta'));
|
|
63
|
+
newColumn.semType = 'Macromolecule';
|
|
64
|
+
return newColumn;
|
|
65
|
+
}
|
|
66
|
+
|
|
67
|
+
private convertSeparatorToHelm(): DG.Column {
|
|
68
|
+
// TODO: implementation
|
|
69
|
+
const len = this.sourceColumn.length;
|
|
70
|
+
const newColName = 'converted';
|
|
71
|
+
const newColumn = DG.Column.fromList('string', newColName, new Array(len).fill('sep2helm'));
|
|
72
|
+
newColumn.semType = 'Macromolecule';
|
|
73
|
+
return newColumn;
|
|
74
|
+
}
|
|
75
|
+
|
|
76
|
+
private convertHelmToFasta(): DG.Column {
|
|
77
|
+
// TODO: implementation
|
|
78
|
+
const len = this.sourceColumn.length;
|
|
79
|
+
const newColName = 'converted';
|
|
80
|
+
const newColumn = DG.Column.fromList('string', newColName, new Array(len).fill('helm2fasta'));
|
|
81
|
+
newColumn.semType = 'Macromolecule';
|
|
82
|
+
return newColumn;
|
|
83
|
+
}
|
|
84
|
+
|
|
85
|
+
private convertHelmToSeparator(): DG.Column {
|
|
86
|
+
// TODO: implementation
|
|
87
|
+
const len = this.sourceColumn.length;
|
|
88
|
+
const newColName = 'converted';
|
|
89
|
+
const newColumn = DG.Column.fromList('string', newColName, new Array(len).fill('helm2sep'));
|
|
90
|
+
newColumn.semType = 'Macromolecule';
|
|
91
|
+
return newColumn;
|
|
92
|
+
}
|
|
93
|
+
|
|
94
|
+
// TODO: write the bodies of converter methods
|
|
95
|
+
public convert() : DG.Column {
|
|
96
|
+
if (
|
|
97
|
+
this.sourceNotation == this._fasta &&
|
|
98
|
+
this.targetNotation == this._separator
|
|
99
|
+
)
|
|
100
|
+
return this.convertFastaToSeparator();
|
|
101
|
+
else if (
|
|
102
|
+
this.sourceNotation == this._fasta &&
|
|
103
|
+
this.targetNotation == this._helm
|
|
104
|
+
)
|
|
105
|
+
return this.convertFastaToHelm();
|
|
106
|
+
else if (
|
|
107
|
+
this.sourceNotation == this._separator &&
|
|
108
|
+
this.targetNotation == this._fasta
|
|
109
|
+
)
|
|
110
|
+
return this.convertSeparatorToFasta();
|
|
111
|
+
else if (
|
|
112
|
+
this.sourceNotation == this._separator &&
|
|
113
|
+
this.targetNotation == this._helm
|
|
114
|
+
)
|
|
115
|
+
return this.convertSeparatorToHelm();
|
|
116
|
+
else if (
|
|
117
|
+
this.sourceNotation == this._helm &&
|
|
118
|
+
this.targetNotation == this._fasta
|
|
119
|
+
)
|
|
120
|
+
return this.convertHelmToFasta();
|
|
121
|
+
else
|
|
122
|
+
return this.convertHelmToSeparator();
|
|
123
|
+
}
|
|
124
|
+
|
|
125
|
+
public constructor(col: DG.Column, target: string) {
|
|
126
|
+
this._sourceColumn = col;
|
|
127
|
+
this._currentUnits = this._sourceColumn.tags[DG.TAGS.UNITS];
|
|
128
|
+
this._sourceNotation = this.determineSourceNotation();
|
|
129
|
+
this._targetNotation = target;
|
|
130
|
+
}
|
|
131
|
+
}
|
|
@@ -1,9 +1,15 @@
|
|
|
1
1
|
import {IDrawTooltipParams} from '@datagrok-libraries/ml/src/viewers/activity-cliffs';
|
|
2
2
|
import * as DG from 'datagrok-api/dg';
|
|
3
3
|
import * as ui from 'datagrok-api/ui';
|
|
4
|
+
import {getSimilarityFromDistance} from '@datagrok-libraries/utils/src/similarity-metrics';
|
|
5
|
+
import { AvailableMetrics } from '@datagrok-libraries/ml/src/typed-metrics';
|
|
4
6
|
|
|
5
|
-
export async function sequenceGetSimilarities(col: DG.Column, seq: string): Promise<DG.Column | null>
|
|
6
|
-
|
|
7
|
+
export async function sequenceGetSimilarities(col: DG.Column, seq: string): Promise<DG.Column | null>{
|
|
8
|
+
const stringArray = col.toList();
|
|
9
|
+
const distances = new Array(stringArray.length).fill(0.0);
|
|
10
|
+
for (let i = 0; i < stringArray.length; ++i)
|
|
11
|
+
distances[i] = getSimilarityFromDistance(AvailableMetrics['String']['Levenshtein'](stringArray[i], seq));
|
|
12
|
+
return DG.Column.fromList(DG.COLUMN_TYPE.FLOAT, 'distances', distances);
|
|
7
13
|
}
|
|
8
14
|
|
|
9
15
|
export function drawTooltip(params: IDrawTooltipParams) {
|
|
@@ -18,9 +18,9 @@ export async function sequenceSpace(spaceParams: ISequenceSpaceParams): Promise<
|
|
|
18
18
|
const sepFinal = sep ? sep === '.' ? '\\\.' : sep : '-';
|
|
19
19
|
const regex = new RegExp(sepFinal, 'g');
|
|
20
20
|
if (Object.keys(AvailableMetrics['String']).includes(spaceParams.similarityMetric))
|
|
21
|
-
preparedData = spaceParams.seqCol.toList().map((v) => v.replace(regex, '')) as string[];
|
|
21
|
+
preparedData = spaceParams.seqCol.toList().map((v: string) => v.replace(regex, '')) as string[];
|
|
22
22
|
else
|
|
23
|
-
preparedData = spaceParams.seqCol.toList().map((v) => v.replace(regex, '')) as string[];
|
|
23
|
+
preparedData = spaceParams.seqCol.toList().map((v: string) => v.replace(regex, '')) as string[];
|
|
24
24
|
} else {
|
|
25
25
|
preparedData = spaceParams.seqCol.toList();
|
|
26
26
|
}
|
|
@@ -31,13 +31,13 @@ export async function sequenceSpace(spaceParams: ISequenceSpaceParams): Promise<
|
|
|
31
31
|
spaceParams.similarityMetric as StringMetrics | BitArrayMetrics,
|
|
32
32
|
spaceParams.options);
|
|
33
33
|
const cols: DG.Column[] = spaceParams.embedAxesNames.map(
|
|
34
|
-
(name, index) => DG.Column.fromFloat32Array(name, sequenceSpaceResult.embedding[index]));
|
|
34
|
+
(name: string, index: number) => DG.Column.fromFloat32Array(name, sequenceSpaceResult.embedding[index]));
|
|
35
35
|
return {distance: sequenceSpaceResult.distance, coordinates: new DG.ColumnList(cols)};
|
|
36
36
|
}
|
|
37
37
|
|
|
38
38
|
|
|
39
39
|
export function getEmbeddingColsNames(df: DG.DataFrame) {
|
|
40
40
|
const axes = ['Embed_X', 'Embed_Y'];
|
|
41
|
-
const colNameInd = df.columns.names().filter((it) => it.includes(axes[0])).length + 1;
|
|
41
|
+
const colNameInd = df.columns.names().filter((it: string) => it.includes(axes[0])).length + 1;
|
|
42
42
|
return axes.map((it) => `${it}_${colNameInd}`);
|
|
43
43
|
}
|