@datagrok/bio 1.4.0 → 1.5.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/detectors.js +17 -4
- package/dist/package-test.js +1577 -160
- package/dist/package.js +1248 -60
- package/dist/vendors-node_modules_datagrok-libraries_ml_src_workers_dimensionality-reducer_js.js +8989 -0
- package/files/sample_FASTA.csv +66 -0
- package/files/sample_FASTA_with_activities.csv +66 -0
- package/files/sample_MSA.csv +541 -0
- package/files/samples/peptides_complex_msa.csv +10275 -0
- package/files/samples/peptides_simple_msa.csv +648 -0
- package/files/samples/sample_HELM.csv +541 -0
- package/files/samples/sample_MSA.csv +541 -0
- package/package.json +11 -7
- package/src/package-test.ts +3 -1
- package/src/package.ts +94 -33
- package/src/tests/activity-cliffs-tests.ts +49 -0
- package/src/tests/detectors-test.ts +132 -34
- package/src/tests/sequence-space-test.ts +26 -0
- package/src/tests/utils.ts +21 -2
- package/src/utils/convert.ts +23 -0
- package/src/utils/multiple-sequence-alignment.ts +2 -33
- package/src/utils/sequence-activity-cliffs.ts +30 -0
- package/src/utils/sequence-space.ts +43 -0
|
@@ -26,37 +26,6 @@ function _fastaToStrings(fasta: string): string[] {
|
|
|
26
26
|
return fasta.replace(/>sample\d+(\r\n|\r|\n)/g, '').split('\n');
|
|
27
27
|
}
|
|
28
28
|
|
|
29
|
-
/**
|
|
30
|
-
* Converts aligned sequence to semantic type format.
|
|
31
|
-
*
|
|
32
|
-
* @param {string} seq Source sequence.
|
|
33
|
-
* @return {string} Formatted sequence.
|
|
34
|
-
*/
|
|
35
|
-
function _castAligned(seq: string): string {
|
|
36
|
-
let delimited = '';
|
|
37
|
-
|
|
38
|
-
for (const char of seq)
|
|
39
|
-
delimited += char == '-' ? char : `-${char}`;
|
|
40
|
-
|
|
41
|
-
return delimited;
|
|
42
|
-
}
|
|
43
|
-
|
|
44
|
-
/**
|
|
45
|
-
* Formats a batch of sequences to correspond the semantic type.
|
|
46
|
-
*
|
|
47
|
-
* @param {string[]} alignment List of aligned sequences.
|
|
48
|
-
* @return {string[]} Formatted sequences.
|
|
49
|
-
*/
|
|
50
|
-
function _stringsToAligned(alignment: string[]): string[] {
|
|
51
|
-
const nItems = alignment.length;
|
|
52
|
-
const aligned = new Array<string>(nItems);
|
|
53
|
-
|
|
54
|
-
for (let i = 0; i < nItems; ++i)
|
|
55
|
-
aligned[i] = _castAligned(alignment[i]);
|
|
56
|
-
|
|
57
|
-
return aligned;
|
|
58
|
-
}
|
|
59
|
-
|
|
60
29
|
/**
|
|
61
30
|
* Runs Aioli environment with kalign tool.
|
|
62
31
|
*
|
|
@@ -86,8 +55,8 @@ export async function runKalign(col: DG.Column, isAligned = false) : Promise<DG.
|
|
|
86
55
|
console.warn(output);
|
|
87
56
|
|
|
88
57
|
const aligned = _fastaToStrings(buf).slice(0, sequences.length);
|
|
89
|
-
const alignedCol = DG.Column.fromStrings(`msa(${col.name})`,
|
|
90
|
-
alignedCol.setTag(DG.TAGS.UNITS, '');
|
|
58
|
+
const alignedCol = DG.Column.fromStrings(`msa(${col.name})`, aligned);
|
|
59
|
+
alignedCol.setTag(DG.TAGS.UNITS, '');
|
|
91
60
|
alignedCol.semType = C.SEM_TYPES.Macro_Molecule;
|
|
92
61
|
return alignedCol;
|
|
93
62
|
}
|
|
@@ -0,0 +1,30 @@
|
|
|
1
|
+
import {IDrawTooltipParams} from '@datagrok-libraries/ml/src/viewers/activity-cliffs';
|
|
2
|
+
import * as DG from 'datagrok-api/dg';
|
|
3
|
+
import * as ui from 'datagrok-api/ui';
|
|
4
|
+
|
|
5
|
+
export async function sequenceGetSimilarities(col: DG.Column, seq: string): Promise<DG.Column | null> {
|
|
6
|
+
return null;
|
|
7
|
+
}
|
|
8
|
+
|
|
9
|
+
export function drawTooltip(params: IDrawTooltipParams) {
|
|
10
|
+
params.tooltips[params.line.id] = ui.divH([]);
|
|
11
|
+
const columnNames = ui.divV([
|
|
12
|
+
ui.divText('sequense'),
|
|
13
|
+
ui.divText(params.activity.name),
|
|
14
|
+
]);
|
|
15
|
+
columnNames.style.fontWeight = 'bold';
|
|
16
|
+
columnNames.style.display = 'flex';
|
|
17
|
+
columnNames.style.justifyContent = 'space-between';
|
|
18
|
+
params.tooltips[params.line.id].append(columnNames);
|
|
19
|
+
params.line.mols.forEach((mol: number) => {
|
|
20
|
+
const seq = ui.divText(params.df.get(params.seqCol.name, mol));
|
|
21
|
+
const activity = ui.divText(params.df.get(params.activity.name, mol).toFixed(2));
|
|
22
|
+
activity.style.display = 'flex';
|
|
23
|
+
activity.style.justifyContent = 'left';
|
|
24
|
+
activity.style.paddingLeft = '30px';
|
|
25
|
+
params.tooltips[params.line.id].append(ui.divV([
|
|
26
|
+
seq,
|
|
27
|
+
activity,
|
|
28
|
+
], {style: {paddingLeft: '5px'}}));
|
|
29
|
+
});
|
|
30
|
+
}
|
|
@@ -0,0 +1,43 @@
|
|
|
1
|
+
import * as DG from 'datagrok-api/dg';
|
|
2
|
+
import {AvailableMetrics} from '@datagrok-libraries/ml/src/typed-metrics';
|
|
3
|
+
import {reduceDimensinalityWithNormalization} from '@datagrok-libraries/ml/src/sequence-space';
|
|
4
|
+
import {BitArrayMetrics, StringMetrics} from '@datagrok-libraries/ml/src/typed-metrics';
|
|
5
|
+
import {Matrix} from '@datagrok-libraries/utils/src/type-declarations';
|
|
6
|
+
import BitArray from '@datagrok-libraries/utils/src/bit-array';
|
|
7
|
+
import {ISequenceSpaceParams} from '@datagrok-libraries/ml/src/viewers/activity-cliffs';
|
|
8
|
+
|
|
9
|
+
export interface ISequenceSpaceResult {
|
|
10
|
+
distance: Matrix;
|
|
11
|
+
coordinates: DG.ColumnList;
|
|
12
|
+
}
|
|
13
|
+
|
|
14
|
+
export async function sequenceSpace(spaceParams: ISequenceSpaceParams): Promise<ISequenceSpaceResult> {
|
|
15
|
+
let preparedData: any;
|
|
16
|
+
if (!(spaceParams.seqCol!.tags[DG.TAGS.UNITS] === 'HELM')) {
|
|
17
|
+
const sep = spaceParams.seqCol.getTag('separator');
|
|
18
|
+
const sepFinal = sep ? sep === '.' ? '\\\.' : sep : '-';
|
|
19
|
+
const regex = new RegExp(sepFinal, 'g');
|
|
20
|
+
if (Object.keys(AvailableMetrics['String']).includes(spaceParams.similarityMetric))
|
|
21
|
+
preparedData = spaceParams.seqCol.toList().map((v) => v.replace(regex, '')) as string[];
|
|
22
|
+
else
|
|
23
|
+
preparedData = spaceParams.seqCol.toList().map((v) => v.replace(regex, '')) as string[];
|
|
24
|
+
} else {
|
|
25
|
+
preparedData = spaceParams.seqCol.toList();
|
|
26
|
+
}
|
|
27
|
+
|
|
28
|
+
const sequenceSpaceResult = await reduceDimensinalityWithNormalization(
|
|
29
|
+
preparedData,
|
|
30
|
+
spaceParams.methodName,
|
|
31
|
+
spaceParams.similarityMetric as StringMetrics | BitArrayMetrics,
|
|
32
|
+
spaceParams.options);
|
|
33
|
+
const cols: DG.Column[] = spaceParams.embedAxesNames.map(
|
|
34
|
+
(name, index) => DG.Column.fromFloat32Array(name, sequenceSpaceResult.embedding[index]));
|
|
35
|
+
return {distance: sequenceSpaceResult.distance, coordinates: new DG.ColumnList(cols)};
|
|
36
|
+
}
|
|
37
|
+
|
|
38
|
+
|
|
39
|
+
export function getEmbeddingColsNames(df: DG.DataFrame) {
|
|
40
|
+
const axes = ['Embed_X', 'Embed_Y'];
|
|
41
|
+
const colNameInd = df.columns.names().filter((it) => it.includes(axes[0])).length + 1;
|
|
42
|
+
return axes.map((it) => `${it}_${colNameInd}`);
|
|
43
|
+
}
|