@datagrok/bio 2.4.28 → 2.4.29
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/package-test.js +1 -1
- package/dist/package-test.js.map +1 -1
- package/dist/package.js +1 -1
- package/dist/package.js.map +1 -1
- package/package.json +1 -1
- package/src/analysis/sequence-space.ts +17 -5
- package/src/package.ts +15 -5
package/package.json
CHANGED
|
@@ -5,7 +5,7 @@
|
|
|
5
5
|
"name": "Leonid Stolbov",
|
|
6
6
|
"email": "lstolbov@datagrok.ai"
|
|
7
7
|
},
|
|
8
|
-
"version": "2.4.
|
|
8
|
+
"version": "2.4.29",
|
|
9
9
|
"description": "Bioinformatics support (import/export of sequences, conversion, visualization, analysis). [See more](https://github.com/datagrok-ai/public/blob/master/packages/Bio/README.md) for details.",
|
|
10
10
|
"repository": {
|
|
11
11
|
"type": "git",
|
|
@@ -8,6 +8,9 @@ import {ISequenceSpaceParams} from '@datagrok-libraries/ml/src/viewers/activity-
|
|
|
8
8
|
import {invalidateMols, MONOMERIC_COL_TAGS} from '../substructure-search/substructure-search';
|
|
9
9
|
import {UnitsHandler} from '@datagrok-libraries/bio/src/utils/units-handler';
|
|
10
10
|
import * as grok from 'datagrok-api/grok';
|
|
11
|
+
import { NotationConverter } from '@datagrok-libraries/bio/src/utils/notation-converter';
|
|
12
|
+
import { ALPHABET, NOTATION } from '@datagrok-libraries/bio/src/utils/macromolecule';
|
|
13
|
+
import { MmDistanceFunctionsNames } from '@datagrok-libraries/ml/src/macromolecule-distance-functions';
|
|
11
14
|
|
|
12
15
|
export interface ISequenceSpaceResult {
|
|
13
16
|
distance: Matrix;
|
|
@@ -55,15 +58,24 @@ export async function sequenceSpaceByFingerprints(spaceParams: ISequenceSpacePar
|
|
|
55
58
|
}
|
|
56
59
|
|
|
57
60
|
export async function getSequenceSpace(spaceParams: ISequenceSpaceParams): Promise<ISequenceSpaceResult> {
|
|
58
|
-
const
|
|
59
|
-
if (
|
|
60
|
-
|
|
61
|
+
const nc = new NotationConverter(spaceParams.seqCol);
|
|
62
|
+
if (nc.isFasta() || (nc.isSeparator() && nc.alphabet && nc.alphabet !== ALPHABET.UN)) {
|
|
63
|
+
let distanceFName = MmDistanceFunctionsNames.LEVENSHTEIN;
|
|
64
|
+
let seqList = spaceParams.seqCol.toList();
|
|
65
|
+
if (nc.isSeparator()) {
|
|
66
|
+
const fastaCol = nc.convert(NOTATION.FASTA);
|
|
67
|
+
seqList = fastaCol.toList();
|
|
68
|
+
const uh = new UnitsHandler(fastaCol);
|
|
69
|
+
distanceFName = uh.getDistanceFunctionName();
|
|
70
|
+
}
|
|
71
|
+
else {
|
|
72
|
+
distanceFName = nc.getDistanceFunctionName();
|
|
73
|
+
}
|
|
61
74
|
const sequenceSpaceResult = await reduceDimensinalityWithNormalization(
|
|
62
|
-
|
|
75
|
+
seqList,
|
|
63
76
|
spaceParams.methodName,
|
|
64
77
|
distanceFName,
|
|
65
78
|
spaceParams.options);
|
|
66
|
-
console.log(sequenceSpaceResult);
|
|
67
79
|
const cols: DG.Column[] = spaceParams.embedAxesNames.map(
|
|
68
80
|
(name: string, index: number) => DG.Column.fromFloat32Array(name, sequenceSpaceResult.embedding[index]));
|
|
69
81
|
return {distance: sequenceSpaceResult.distance, coordinates: new DG.ColumnList(cols)};
|
package/src/package.ts
CHANGED
|
@@ -32,7 +32,7 @@ import {substructureSearchDialog} from './substructure-search/substructure-searc
|
|
|
32
32
|
import {saveAsFastaUI} from './utils/save-as-fasta';
|
|
33
33
|
import {BioSubstructureFilter} from './widgets/bio-substructure-filter';
|
|
34
34
|
import {delay} from '@datagrok-libraries/utils/src/test';
|
|
35
|
-
import {getStats, splitterAsHelm, TAGS as bioTAGS} from '@datagrok-libraries/bio/src/utils/macromolecule';
|
|
35
|
+
import {getStats, splitterAsHelm, TAGS as bioTAGS, ALPHABET, NOTATION} from '@datagrok-libraries/bio/src/utils/macromolecule';
|
|
36
36
|
import {IMonomerLib} from '@datagrok-libraries/bio/src/types';
|
|
37
37
|
import {SeqPalette} from '@datagrok-libraries/bio/src/seq-palettes';
|
|
38
38
|
import {UnitsHandler} from '@datagrok-libraries/bio/src/utils/units-handler';
|
|
@@ -53,6 +53,7 @@ import {demoBio03UI} from './demo/bio03-atomic-level';
|
|
|
53
53
|
import {demoBio05UI} from './demo/bio05-helm-msa-sequence-space';
|
|
54
54
|
import {checkInputColumnUI} from './utils/check-input-column';
|
|
55
55
|
import {multipleSequenceAlignmentUI} from './utils/multiple-sequence-alignment-ui';
|
|
56
|
+
import { NotationConverter } from '@datagrok-libraries/bio/src/utils/notation-converter';
|
|
56
57
|
|
|
57
58
|
export const _package = new DG.Package();
|
|
58
59
|
|
|
@@ -290,13 +291,22 @@ export async function activityCliffs(df: DG.DataFrame, macroMolecule: DG.Column,
|
|
|
290
291
|
'separator': macroMolecule.getTag(bioTAGS.separator),
|
|
291
292
|
'alphabet': macroMolecule.getTag(bioTAGS.alphabet),
|
|
292
293
|
};
|
|
293
|
-
const
|
|
294
|
+
const nc = new NotationConverter(macroMolecule);
|
|
294
295
|
let columnDistanceMetric = 'Tanimoto';
|
|
295
|
-
|
|
296
|
-
|
|
296
|
+
let seqCol = macroMolecule;
|
|
297
|
+
if (nc.isFasta() || (nc.isSeparator() && nc.alphabet && nc.alphabet !== ALPHABET.UN)){
|
|
298
|
+
if (nc.isFasta()){
|
|
299
|
+
columnDistanceMetric = nc.getDistanceFunctionName();
|
|
300
|
+
} else {
|
|
301
|
+
seqCol = nc.convert(NOTATION.FASTA);
|
|
302
|
+
const uh = new UnitsHandler(seqCol);
|
|
303
|
+
columnDistanceMetric = uh.getDistanceFunctionName();
|
|
304
|
+
tags.units = NOTATION.FASTA;
|
|
305
|
+
}
|
|
306
|
+
}
|
|
297
307
|
const sp = await getActivityCliffs(
|
|
298
308
|
df,
|
|
299
|
-
|
|
309
|
+
seqCol,
|
|
300
310
|
null,
|
|
301
311
|
axesNames,
|
|
302
312
|
'Activity cliffs', //scatterTitle
|