@datagrok/bio 1.4.2 → 1.5.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/detectors.js +17 -4
- package/dist/package-test.js +851 -639
- package/dist/package.js +656 -587
- package/dist/vendors-node_modules_datagrok-libraries_ml_src_workers_dimensionality-reducer_js.js +1665 -1651
- package/files/sample_MSA.csv +541 -0
- package/package.json +7 -6
- package/src/package-test.ts +1 -0
- package/src/package.ts +68 -23
- package/src/tests/activity-cliffs-tests.ts +49 -0
- package/src/tests/detectors-test.ts +132 -34
- package/src/tests/sequence-space-test.ts +21 -19
- package/src/tests/utils.ts +9 -3
- package/src/utils/convert.ts +8 -9
- package/src/utils/multiple-sequence-alignment.ts +1 -1
- package/src/utils/sequence-activity-cliffs.ts +30 -0
- package/src/utils/sequence-space.ts +30 -30
|
@@ -1,43 +1,43 @@
|
|
|
1
1
|
import * as DG from 'datagrok-api/dg';
|
|
2
|
-
import {
|
|
2
|
+
import {AvailableMetrics} from '@datagrok-libraries/ml/src/typed-metrics';
|
|
3
3
|
import {reduceDimensinalityWithNormalization} from '@datagrok-libraries/ml/src/sequence-space';
|
|
4
4
|
import {BitArrayMetrics, StringMetrics} from '@datagrok-libraries/ml/src/typed-metrics';
|
|
5
|
-
import {
|
|
5
|
+
import {Matrix} from '@datagrok-libraries/utils/src/type-declarations';
|
|
6
6
|
import BitArray from '@datagrok-libraries/utils/src/bit-array';
|
|
7
|
+
import {ISequenceSpaceParams} from '@datagrok-libraries/ml/src/viewers/activity-cliffs';
|
|
7
8
|
|
|
8
9
|
export interface ISequenceSpaceResult {
|
|
9
10
|
distance: Matrix;
|
|
10
11
|
coordinates: DG.ColumnList;
|
|
11
12
|
}
|
|
12
13
|
|
|
13
|
-
export async function sequenceSpace(
|
|
14
|
-
|
|
15
|
-
|
|
16
|
-
|
|
17
|
-
|
|
18
|
-
|
|
19
|
-
|
|
20
|
-
|
|
21
|
-
|
|
22
|
-
|
|
23
|
-
|
|
24
|
-
|
|
25
|
-
} else {
|
|
26
|
-
preparedData = molColumn.toList();
|
|
27
|
-
}
|
|
28
|
-
|
|
29
|
-
const sequenceSpaceResult = await reduceDimensinalityWithNormalization(
|
|
30
|
-
preparedData,
|
|
31
|
-
methodName,
|
|
32
|
-
similarityMetric as StringMetrics|BitArrayMetrics,
|
|
33
|
-
options);
|
|
34
|
-
const cols: DG.Column[] = axes.map((name, index) => DG.Column.fromFloat32Array(name, sequenceSpaceResult.embedding[index]))
|
|
35
|
-
return {distance: sequenceSpaceResult.distance, coordinates: new DG.ColumnList(cols)};
|
|
14
|
+
export async function sequenceSpace(spaceParams: ISequenceSpaceParams): Promise<ISequenceSpaceResult> {
|
|
15
|
+
let preparedData: any;
|
|
16
|
+
if (!(spaceParams.seqCol!.tags[DG.TAGS.UNITS] === 'HELM')) {
|
|
17
|
+
const sep = spaceParams.seqCol.getTag('separator');
|
|
18
|
+
const sepFinal = sep ? sep === '.' ? '\\\.' : sep : '-';
|
|
19
|
+
const regex = new RegExp(sepFinal, 'g');
|
|
20
|
+
if (Object.keys(AvailableMetrics['String']).includes(spaceParams.similarityMetric))
|
|
21
|
+
preparedData = spaceParams.seqCol.toList().map((v) => v.replace(regex, '')) as string[];
|
|
22
|
+
else
|
|
23
|
+
preparedData = spaceParams.seqCol.toList().map((v) => v.replace(regex, '')) as string[];
|
|
24
|
+
} else {
|
|
25
|
+
preparedData = spaceParams.seqCol.toList();
|
|
36
26
|
}
|
|
37
27
|
|
|
28
|
+
const sequenceSpaceResult = await reduceDimensinalityWithNormalization(
|
|
29
|
+
preparedData,
|
|
30
|
+
spaceParams.methodName,
|
|
31
|
+
spaceParams.similarityMetric as StringMetrics | BitArrayMetrics,
|
|
32
|
+
spaceParams.options);
|
|
33
|
+
const cols: DG.Column[] = spaceParams.embedAxesNames.map(
|
|
34
|
+
(name, index) => DG.Column.fromFloat32Array(name, sequenceSpaceResult.embedding[index]));
|
|
35
|
+
return {distance: sequenceSpaceResult.distance, coordinates: new DG.ColumnList(cols)};
|
|
36
|
+
}
|
|
37
|
+
|
|
38
38
|
|
|
39
|
-
export function getEmbeddingColsNames(df: DG.DataFrame){
|
|
40
|
-
|
|
41
|
-
|
|
42
|
-
|
|
43
|
-
|
|
39
|
+
export function getEmbeddingColsNames(df: DG.DataFrame) {
|
|
40
|
+
const axes = ['Embed_X', 'Embed_Y'];
|
|
41
|
+
const colNameInd = df.columns.names().filter((it) => it.includes(axes[0])).length + 1;
|
|
42
|
+
return axes.map((it) => `${it}_${colNameInd}`);
|
|
43
|
+
}
|