@datagrok/bio 2.4.30 → 2.4.31
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/258.js.map +1 -1
- package/dist/457.js +2 -0
- package/dist/457.js.map +1 -0
- package/dist/package-test.js +1 -1
- package/dist/package-test.js.map +1 -1
- package/dist/package.js +1 -1
- package/dist/package.js.map +1 -1
- package/package.json +3 -3
- package/scripts/sequence_generator.py +34 -13
- package/src/analysis/sequence-activity-cliffs.ts +2 -2
- package/src/analysis/sequence-space.ts +1 -1
- package/src/demo/bio01b-hierarchical-clustering-and-activity-cliffs.ts +2 -1
- package/src/demo/bio05-helm-msa-sequence-space.ts +4 -3
- package/src/demo/utils.ts +3 -1
- package/src/package.ts +9 -7
- package/src/tests/activity-cliffs-tests.ts +3 -2
- package/src/tests/activity-cliffs-utils.ts +2 -1
- package/src/tests/sequence-space-test.ts +3 -2
- package/src/tests/sequence-space-utils.ts +4 -2
- package/dist/705.js +0 -2
- package/dist/705.js.map +0 -1
package/package.json
CHANGED
|
@@ -5,7 +5,7 @@
|
|
|
5
5
|
"name": "Leonid Stolbov",
|
|
6
6
|
"email": "lstolbov@datagrok.ai"
|
|
7
7
|
},
|
|
8
|
-
"version": "2.4.
|
|
8
|
+
"version": "2.4.31",
|
|
9
9
|
"description": "Bioinformatics support (import/export of sequences, conversion, visualization, analysis). [See more](https://github.com/datagrok-ai/public/blob/master/packages/Bio/README.md) for details.",
|
|
10
10
|
"repository": {
|
|
11
11
|
"type": "git",
|
|
@@ -16,9 +16,9 @@
|
|
|
16
16
|
"@biowasm/aioli": "^3.1.0",
|
|
17
17
|
"@datagrok-libraries/bio": "^5.30.0",
|
|
18
18
|
"@datagrok-libraries/chem-meta": "^1.0.1",
|
|
19
|
-
"@datagrok-libraries/ml": "^6.3.
|
|
19
|
+
"@datagrok-libraries/ml": "^6.3.27",
|
|
20
20
|
"@datagrok-libraries/tutorials": "^1.3.2",
|
|
21
|
-
"@datagrok-libraries/utils": "^4.0.
|
|
21
|
+
"@datagrok-libraries/utils": "^4.0.8",
|
|
22
22
|
"cash-dom": "^8.0.0",
|
|
23
23
|
"css-loader": "^6.7.3",
|
|
24
24
|
"datagrok-api": "^1.13.3",
|
|
@@ -3,8 +3,8 @@
|
|
|
3
3
|
# description: Create the model peptides/DNA sequences with peptides data
|
|
4
4
|
# language: python
|
|
5
5
|
# tags: template, demo
|
|
6
|
-
# input: int clusters =
|
|
7
|
-
# input: int num_sequences =
|
|
6
|
+
# input: int clusters = 5 [Number of superclusters]
|
|
7
|
+
# input: int num_sequences = 50 [Number of sequences in each supercluster]
|
|
8
8
|
# input: int motif_length = 12 [Average length of motif]
|
|
9
9
|
# input: int max_variants_position = 3 [Maximum number of different letters in conservative position in motif]
|
|
10
10
|
# input: int random_length = 3 [Average length of random sequence parts before and after motif]
|
|
@@ -59,7 +59,9 @@ def generate_motif_template(
|
|
|
59
59
|
|
|
60
60
|
|
|
61
61
|
def generate_motif(template: motif_template_type, alphabet: alphabet_type) -> str:
|
|
62
|
-
template_with_any = [
|
|
62
|
+
template_with_any = [
|
|
63
|
+
(letters if not "?" in letters else alphabet) for letters in template
|
|
64
|
+
]
|
|
63
65
|
return "".join([random.choice(letters) for letters in template_with_any])
|
|
64
66
|
|
|
65
67
|
|
|
@@ -70,18 +72,24 @@ def motif_notation(motif_template: motif_template_type) -> str:
|
|
|
70
72
|
else:
|
|
71
73
|
return f"[{''.join(letter_choice)}]"
|
|
72
74
|
|
|
73
|
-
return "".join(
|
|
75
|
+
return "".join(
|
|
76
|
+
[motif_notation_code(letter_choice) for letter_choice in motif_template]
|
|
77
|
+
)
|
|
74
78
|
|
|
75
79
|
|
|
76
80
|
def generate_random(n: int, alphabet: alphabet_type) -> str:
|
|
77
81
|
return "".join([random.choice(alphabet) for i in range(n)])
|
|
78
82
|
|
|
79
83
|
|
|
80
|
-
def make_cliff(
|
|
84
|
+
def make_cliff(
|
|
85
|
+
motif_template: motif_template_type, alphabet: alphabet_type, motif: str
|
|
86
|
+
) -> str:
|
|
81
87
|
# Mutate conservative letter in motif
|
|
82
88
|
pos = random.randrange(len(motif_template))
|
|
83
89
|
while "?" in motif_template[pos]:
|
|
84
|
-
pos = (pos + 1) % len(
|
|
90
|
+
pos = (pos + 1) % len(
|
|
91
|
+
motif_template
|
|
92
|
+
) # always will find letters since ends of motif can't be any symbol
|
|
85
93
|
outlier_letters = list(set(alphabet) - set(motif_template[pos]))
|
|
86
94
|
return motif[:pos] + random.choice(outlier_letters) + motif[pos + 1 :]
|
|
87
95
|
|
|
@@ -97,7 +105,9 @@ def generate_cluster(
|
|
|
97
105
|
cliff_probability: float,
|
|
98
106
|
cliff_strength: float,
|
|
99
107
|
) -> Iterator[sequence_record_type]:
|
|
100
|
-
motif_template = generate_motif_template(
|
|
108
|
+
motif_template = generate_motif_template(
|
|
109
|
+
motif_length, alphabet, max_variants_position
|
|
110
|
+
)
|
|
101
111
|
|
|
102
112
|
activity_average = random.random() * 10
|
|
103
113
|
activity_dispersion = random.random()
|
|
@@ -166,7 +176,9 @@ def generate_sequences(
|
|
|
166
176
|
cliff_probability,
|
|
167
177
|
cliff_strength,
|
|
168
178
|
):
|
|
169
|
-
sequences.append(
|
|
179
|
+
sequences.append(
|
|
180
|
+
(n_cluster, f"c{n_cluster}_s{n_seq}", seq, activity, is_cliff)
|
|
181
|
+
)
|
|
170
182
|
return headers, sequences
|
|
171
183
|
|
|
172
184
|
|
|
@@ -178,15 +190,19 @@ def parse_command_line_args() -> Any:
|
|
|
178
190
|
epilog="Utility support: Gennadii Zakharov",
|
|
179
191
|
)
|
|
180
192
|
|
|
181
|
-
parser.add_argument(
|
|
193
|
+
parser.add_argument(
|
|
194
|
+
"-c", "--clusters", type=int, default=5, help="Number of superclusters"
|
|
195
|
+
)
|
|
182
196
|
parser.add_argument(
|
|
183
197
|
"-s",
|
|
184
198
|
"--sequences",
|
|
185
199
|
type=int,
|
|
186
|
-
default=
|
|
200
|
+
default=50,
|
|
187
201
|
help="Number of sequences in each supercluster",
|
|
188
202
|
)
|
|
189
|
-
parser.add_argument(
|
|
203
|
+
parser.add_argument(
|
|
204
|
+
"-m,", "--motif-length", type=int, default=12, help="Average length of motif"
|
|
205
|
+
)
|
|
190
206
|
|
|
191
207
|
parser.add_argument(
|
|
192
208
|
"-r,",
|
|
@@ -208,7 +224,8 @@ def parse_command_line_args() -> Any:
|
|
|
208
224
|
"--alphabet",
|
|
209
225
|
type=str,
|
|
210
226
|
default=list(alphabets.keys())[0],
|
|
211
|
-
help=f"Sequence alphabet: {available_alphabets}. Custom alphabet is a list of values separated "
|
|
227
|
+
help=f"Sequence alphabet: {available_alphabets}. Custom alphabet is a list of values separated "
|
|
228
|
+
f"by comma",
|
|
212
229
|
)
|
|
213
230
|
parser.add_argument(
|
|
214
231
|
"--max-variants-position",
|
|
@@ -258,7 +275,11 @@ if not grok:
|
|
|
258
275
|
cliff_probability = args.cliff_probability
|
|
259
276
|
cliff_strength = args.cliff_strength
|
|
260
277
|
|
|
261
|
-
alphabet: alphabet_type =
|
|
278
|
+
alphabet: alphabet_type = (
|
|
279
|
+
alphabets[alphabet_key].split(",")
|
|
280
|
+
if alphabet_key in alphabets
|
|
281
|
+
else alphabet_key.split(",")
|
|
282
|
+
)
|
|
262
283
|
|
|
263
284
|
# Running sequence generator
|
|
264
285
|
header, data = generate_sequences(
|
|
@@ -4,7 +4,7 @@ import * as DG from 'datagrok-api/dg';
|
|
|
4
4
|
|
|
5
5
|
import {ITooltipAndPanelParams} from '@datagrok-libraries/ml/src/viewers/activity-cliffs';
|
|
6
6
|
import {getSimilarityFromDistance} from '@datagrok-libraries/ml/src/distance-metrics-methods';
|
|
7
|
-
import {AvailableMetrics,
|
|
7
|
+
import {AvailableMetrics, DistanceMetricsSubjects, StringMetricsNames} from '@datagrok-libraries/ml/src/typed-metrics';
|
|
8
8
|
import {drawMoleculeDifferenceOnCanvas} from '../utils/cell-renderer';
|
|
9
9
|
import * as C from '../utils/constants';
|
|
10
10
|
import {GridColumn} from 'datagrok-api/dg';
|
|
@@ -15,7 +15,7 @@ export async function getDistances(col: DG.Column, seq: string): Promise<Array<n
|
|
|
15
15
|
const stringArray = col.toList();
|
|
16
16
|
const distances = new Array(stringArray.length).fill(0);
|
|
17
17
|
const distanceMethod: (x: string, y: string) => number =
|
|
18
|
-
AvailableMetrics[
|
|
18
|
+
AvailableMetrics[DistanceMetricsSubjects.String][StringMetricsNames.Levenshtein];
|
|
19
19
|
for (let i = 0; i < stringArray.length; ++i) {
|
|
20
20
|
const distance = stringArray[i] ? distanceMethod(stringArray[i], seq) : null;
|
|
21
21
|
distances[i] = distance ? distance / Math.max((stringArray[i] as string).length, seq.length) : null;
|
|
@@ -44,7 +44,7 @@ export async function sequenceSpace(spaceParams: ISequenceSpaceParams): Promise<
|
|
|
44
44
|
|
|
45
45
|
export async function sequenceSpaceByFingerprints(spaceParams: ISequenceSpaceParams): Promise<ISequenceSpaceResult> {
|
|
46
46
|
if (spaceParams.seqCol.version !== spaceParams.seqCol.temp[MONOMERIC_COL_TAGS.LAST_INVALIDATED_VERSION])
|
|
47
|
-
await invalidateMols(spaceParams.seqCol
|
|
47
|
+
await invalidateMols(spaceParams.seqCol as unknown as DG.Column<string>, false); //we expect only string columns here
|
|
48
48
|
|
|
49
49
|
const result = await grok.functions.call('Chem:getChemSpaceEmbeddings', {
|
|
50
50
|
col: spaceParams.seqCol.temp[MONOMERIC_COL_TAGS.MONOMERIC_MOLS],
|
|
@@ -12,6 +12,7 @@ import {getTreeHelper, ITreeHelper} from '@datagrok-libraries/bio/src/trees/tree
|
|
|
12
12
|
import {getDendrogramService, IDendrogramService} from '@datagrok-libraries/bio/src/trees/dendrogram';
|
|
13
13
|
import {handleError} from './utils';
|
|
14
14
|
import {DemoScript} from '@datagrok-libraries/tutorials/src/demo-script';
|
|
15
|
+
import { DimReductionMethods } from '@datagrok-libraries/ml/src/reduce-dimensionality';
|
|
15
16
|
|
|
16
17
|
const dataFn: string = 'data/sample_FASTA_PT_activity.csv';
|
|
17
18
|
|
|
@@ -23,7 +24,7 @@ export async function demoBio01bUI() {
|
|
|
23
24
|
let view: DG.TableView;
|
|
24
25
|
let activityCliffsViewer: DG.ScatterPlotViewer;
|
|
25
26
|
|
|
26
|
-
const dimRedMethod:
|
|
27
|
+
const dimRedMethod: DimReductionMethods = DimReductionMethods.UMAP;
|
|
27
28
|
const idRows: { [id: number]: number } = {};
|
|
28
29
|
|
|
29
30
|
try {
|
|
@@ -7,8 +7,9 @@ import {handleError} from './utils';
|
|
|
7
7
|
|
|
8
8
|
import {IWebLogoViewer} from '@datagrok-libraries/bio/src/viewers/web-logo';
|
|
9
9
|
import {pepseaMethods, runPepsea} from '../utils/pepsea';
|
|
10
|
-
import {StringMetricsNames} from '@datagrok-libraries/ml/src/typed-metrics';
|
|
11
10
|
import {DemoScript} from '@datagrok-libraries/tutorials/src/demo-script';
|
|
11
|
+
import { DimReductionMethods } from '@datagrok-libraries/ml/src/reduce-dimensionality';
|
|
12
|
+
import { MmDistanceFunctionsNames } from '@datagrok-libraries/ml/src/macromolecule-distance-functions';
|
|
12
13
|
|
|
13
14
|
const helmFn: string = 'samples/sample_HELM.csv';
|
|
14
15
|
|
|
@@ -22,7 +23,7 @@ export async function demoBio05UI(): Promise<void> {
|
|
|
22
23
|
|
|
23
24
|
const helmColName: string = 'HELM';
|
|
24
25
|
const msaHelmColName: string = 'msa(HELM)';
|
|
25
|
-
const dimRedMethod:
|
|
26
|
+
const dimRedMethod: DimReductionMethods = DimReductionMethods.UMAP;
|
|
26
27
|
|
|
27
28
|
try {
|
|
28
29
|
const demoScript = new DemoScript(
|
|
@@ -52,7 +53,7 @@ export async function demoBio05UI(): Promise<void> {
|
|
|
52
53
|
})
|
|
53
54
|
.step('Build sequence space', async () => {
|
|
54
55
|
ssViewer = (await sequenceSpaceTopMenu(df, msaHelmCol,
|
|
55
|
-
dimRedMethod,
|
|
56
|
+
dimRedMethod, MmDistanceFunctionsNames.LEVENSHTEIN, true)) as DG.ScatterPlotViewer;
|
|
56
57
|
view.dockManager.dock(ssViewer, DG.DOCK_TYPE.RIGHT, null, 'Sequence Space', 0.35);
|
|
57
58
|
}, {
|
|
58
59
|
description: 'Reduce sequence space dimensionality to display on 2D representation.',
|
package/src/demo/utils.ts
CHANGED
|
@@ -6,6 +6,8 @@ import {_package, sequenceSpaceTopMenu} from '../package';
|
|
|
6
6
|
import {reduceDimensinalityWithNormalization} from '@datagrok-libraries/ml/src/sequence-space';
|
|
7
7
|
import {StringMetricsNames} from '@datagrok-libraries/ml/src/typed-metrics';
|
|
8
8
|
import {delay} from '@datagrok-libraries/utils/src/test';
|
|
9
|
+
import { DimReductionMethods } from '@datagrok-libraries/ml/src/reduce-dimensionality';
|
|
10
|
+
import { MmDistanceFunctionsNames } from '@datagrok-libraries/ml/src/macromolecule-distance-functions';
|
|
9
11
|
|
|
10
12
|
enum EMBED_COL_NAMES {
|
|
11
13
|
X = 'Embed_X',
|
|
@@ -63,7 +65,7 @@ export async function demoSequenceSpace(
|
|
|
63
65
|
})) as DG.ScatterPlotViewer;
|
|
64
66
|
} else {
|
|
65
67
|
resSpaceViewer = (await sequenceSpaceTopMenu(df, df.getCol(colName),
|
|
66
|
-
|
|
68
|
+
DimReductionMethods.UMAP, MmDistanceFunctionsNames.LEVENSHTEIN, true)) as DG.ScatterPlotViewer;
|
|
67
69
|
}
|
|
68
70
|
view.dockManager.dock(resSpaceViewer!, DG.DOCK_TYPE.RIGHT, null, 'Sequence Space', 0.35);
|
|
69
71
|
return resSpaceViewer;
|
package/src/package.ts
CHANGED
|
@@ -10,7 +10,7 @@ import {
|
|
|
10
10
|
import {VdRegionsViewer} from './viewers/vd-regions-viewer';
|
|
11
11
|
import {SequenceAlignment} from './seq_align';
|
|
12
12
|
import {getEmbeddingColsNames, sequenceSpaceByFingerprints, getSequenceSpace} from './analysis/sequence-space';
|
|
13
|
-
import {getActivityCliffs} from '@datagrok-libraries/ml/src/viewers/activity-cliffs';
|
|
13
|
+
import {ISequenceSpaceParams, getActivityCliffs} from '@datagrok-libraries/ml/src/viewers/activity-cliffs';
|
|
14
14
|
import {
|
|
15
15
|
createLinesGrid,
|
|
16
16
|
createPropPanelElement,
|
|
@@ -43,7 +43,7 @@ import {
|
|
|
43
43
|
LIB_STORAGE_NAME, LibSettings, getUserLibSettings, setUserLibSetting, getLibFileNameList
|
|
44
44
|
} from './utils/monomer-lib';
|
|
45
45
|
import {getMacromoleculeColumn} from './utils/ui-utils';
|
|
46
|
-
import {ITSNEOptions, IUMAPOptions} from '@datagrok-libraries/ml/src/reduce-dimensionality';
|
|
46
|
+
import {DimReductionMethods, ITSNEOptions, IUMAPOptions} from '@datagrok-libraries/ml/src/reduce-dimensionality';
|
|
47
47
|
import {SequenceSpaceFunctionEditor} from '@datagrok-libraries/ml/src/functionEditors/seq-space-editor';
|
|
48
48
|
import {ActivityCliffsFunctionEditor} from '@datagrok-libraries/ml/src/functionEditors/activity-cliffs-editor';
|
|
49
49
|
import {demoBio01UI} from './demo/bio01-similarity-diversity';
|
|
@@ -53,6 +53,8 @@ import {demoBio03UI} from './demo/bio03-atomic-level';
|
|
|
53
53
|
import {demoBio05UI} from './demo/bio05-helm-msa-sequence-space';
|
|
54
54
|
import {checkInputColumnUI} from './utils/check-input-column';
|
|
55
55
|
import {multipleSequenceAlignmentUI} from './utils/multiple-sequence-alignment-ui';
|
|
56
|
+
import { MmDistanceFunctionsNames } from '@datagrok-libraries/ml/src/macromolecule-distance-functions';
|
|
57
|
+
import { BitArrayMetrics, BitArrayMetricsNames, StringMetricsNames } from '@datagrok-libraries/ml/src/typed-metrics';
|
|
56
58
|
import { NotationConverter } from '@datagrok-libraries/bio/src/utils/notation-converter';
|
|
57
59
|
|
|
58
60
|
export const _package = new DG.Package();
|
|
@@ -280,7 +282,7 @@ export function SeqActivityCliffsEditor(call: DG.FuncCall) {
|
|
|
280
282
|
//output: viewer result
|
|
281
283
|
//editor: Bio:SeqActivityCliffsEditor
|
|
282
284
|
export async function activityCliffs(df: DG.DataFrame, macroMolecule: DG.Column, activities: DG.Column,
|
|
283
|
-
similarity: number, methodName:
|
|
285
|
+
similarity: number, methodName: DimReductionMethods, options?: IUMAPOptions | ITSNEOptions
|
|
284
286
|
): Promise<DG.Viewer | undefined> {
|
|
285
287
|
if (!checkInputColumnUI(macroMolecule, 'Activity Cliffs'))
|
|
286
288
|
return;
|
|
@@ -292,7 +294,7 @@ export async function activityCliffs(df: DG.DataFrame, macroMolecule: DG.Column,
|
|
|
292
294
|
'alphabet': macroMolecule.getTag(bioTAGS.alphabet),
|
|
293
295
|
};
|
|
294
296
|
const nc = new NotationConverter(macroMolecule);
|
|
295
|
-
let columnDistanceMetric =
|
|
297
|
+
let columnDistanceMetric: BitArrayMetricsNames | MmDistanceFunctionsNames = BitArrayMetricsNames.Tanimoto;
|
|
296
298
|
let seqCol = macroMolecule;
|
|
297
299
|
if (nc.isFasta() || (nc.isSeparator() && nc.alphabet && nc.alphabet !== ALPHABET.UN)){
|
|
298
300
|
if (nc.isFasta()){
|
|
@@ -347,8 +349,8 @@ export function SequenceSpaceEditor(call: DG.FuncCall) {
|
|
|
347
349
|
//input: bool plotEmbeddings = true
|
|
348
350
|
//input: object options {optional: true}
|
|
349
351
|
//editor: Bio:SequenceSpaceEditor
|
|
350
|
-
export async function sequenceSpaceTopMenu(table: DG.DataFrame, macroMolecule: DG.Column, methodName:
|
|
351
|
-
similarityMetric:
|
|
352
|
+
export async function sequenceSpaceTopMenu(table: DG.DataFrame, macroMolecule: DG.Column, methodName: DimReductionMethods,
|
|
353
|
+
similarityMetric: BitArrayMetrics | MmDistanceFunctionsNames = BitArrayMetricsNames.Tanimoto, plotEmbeddings: boolean, options?: IUMAPOptions | ITSNEOptions
|
|
352
354
|
): Promise<DG.Viewer | undefined> {
|
|
353
355
|
// Delay is required for initial function dialog to close before starting invalidating of molfiles.
|
|
354
356
|
// Otherwise, dialog is freezing
|
|
@@ -360,7 +362,7 @@ export async function sequenceSpaceTopMenu(table: DG.DataFrame, macroMolecule: D
|
|
|
360
362
|
const withoutEmptyValues = DG.DataFrame.fromColumns([macroMolecule]).clone();
|
|
361
363
|
const emptyValsIdxs = removeEmptyStringRows(withoutEmptyValues, macroMolecule);
|
|
362
364
|
|
|
363
|
-
const chemSpaceParams = {
|
|
365
|
+
const chemSpaceParams: ISequenceSpaceParams = {
|
|
364
366
|
seqCol: withoutEmptyValues.col(macroMolecule.name)!,
|
|
365
367
|
methodName: methodName,
|
|
366
368
|
similarityMetric: similarityMetric,
|
|
@@ -6,6 +6,7 @@ import {after, before, category, test} from '@datagrok-libraries/utils/src/test'
|
|
|
6
6
|
|
|
7
7
|
import {readDataframe} from './utils';
|
|
8
8
|
import {_testActivityCliffsOpen} from './activity-cliffs-utils';
|
|
9
|
+
import { DimReductionMethods } from '@datagrok-libraries/ml/src/reduce-dimensionality';
|
|
9
10
|
|
|
10
11
|
|
|
11
12
|
category('activityCliffs', async () => {
|
|
@@ -33,7 +34,7 @@ category('activityCliffs', async () => {
|
|
|
33
34
|
actCliffsTableView = grok.shell.addTableView(actCliffsDf);
|
|
34
35
|
viewList.push(actCliffsTableView);
|
|
35
36
|
|
|
36
|
-
await _testActivityCliffsOpen(actCliffsDf, 57,
|
|
37
|
+
await _testActivityCliffsOpen(actCliffsDf, 57, DimReductionMethods.UMAP, 'MSA');
|
|
37
38
|
}, {skipReason: 'GROK-12774'});
|
|
38
39
|
|
|
39
40
|
test('activityCliffsWithEmptyRows', async () => {
|
|
@@ -42,6 +43,6 @@ category('activityCliffs', async () => {
|
|
|
42
43
|
actCliffsTableViewWithEmptyRows = grok.shell.addTableView(actCliffsDfWithEmptyRows);
|
|
43
44
|
viewList.push(actCliffsTableViewWithEmptyRows);
|
|
44
45
|
|
|
45
|
-
await _testActivityCliffsOpen(actCliffsDfWithEmptyRows, 57,
|
|
46
|
+
await _testActivityCliffsOpen(actCliffsDfWithEmptyRows, 57, DimReductionMethods.UMAP, 'MSA');
|
|
46
47
|
}, {skipReason: 'GROK-12774'});
|
|
47
48
|
});
|
|
@@ -3,8 +3,9 @@ import * as grok from 'datagrok-api/grok';
|
|
|
3
3
|
|
|
4
4
|
import {delay, expect} from '@datagrok-libraries/utils/src/test';
|
|
5
5
|
import {activityCliffs} from '../package';
|
|
6
|
+
import { DimReductionMethods } from '@datagrok-libraries/ml/src/reduce-dimensionality';
|
|
6
7
|
|
|
7
|
-
export async function _testActivityCliffsOpen(df: DG.DataFrame, numberCliffs: number, method:
|
|
8
|
+
export async function _testActivityCliffsOpen(df: DG.DataFrame, numberCliffs: number, method: DimReductionMethods, colName: string) {
|
|
8
9
|
await grok.data.detectSemanticTypes(df);
|
|
9
10
|
const scatterPlot = await activityCliffs(
|
|
10
11
|
df, df.getCol(colName), df.getCol('Activity'),
|
|
@@ -5,6 +5,7 @@ import * as DG from 'datagrok-api/dg';
|
|
|
5
5
|
import {after, before, category, test, expect, delay} from '@datagrok-libraries/utils/src/test';
|
|
6
6
|
import {readDataframe} from './utils';
|
|
7
7
|
import {_testSequenceSpaceReturnsResult} from './sequence-space-utils';
|
|
8
|
+
import { DimReductionMethods } from '@datagrok-libraries/ml/src/reduce-dimensionality';
|
|
8
9
|
|
|
9
10
|
category('sequenceSpace', async () => {
|
|
10
11
|
let testFastaDf: DG.DataFrame;
|
|
@@ -15,7 +16,7 @@ category('sequenceSpace', async () => {
|
|
|
15
16
|
test('sequenceSpaceOpens', async () => {
|
|
16
17
|
testFastaDf = await readDataframe('tests/sample_MSA_data.csv');
|
|
17
18
|
testFastaTableView = grok.shell.addTableView(testFastaDf);
|
|
18
|
-
await _testSequenceSpaceReturnsResult(testFastaDf,
|
|
19
|
+
await _testSequenceSpaceReturnsResult(testFastaDf, DimReductionMethods.UMAP, 'MSA');
|
|
19
20
|
grok.shell.closeTable(testFastaDf);
|
|
20
21
|
testFastaTableView.close();
|
|
21
22
|
}, {skipReason: 'GROK-12775'});
|
|
@@ -23,7 +24,7 @@ category('sequenceSpace', async () => {
|
|
|
23
24
|
test('sequenceSpaceWithEmptyRows', async () => {
|
|
24
25
|
testHelmWithEmptyRows = await readDataframe('tests/sample_MSA_data_empty_vals.csv');
|
|
25
26
|
testHelmWithEmptyRowsTableView = grok.shell.addTableView(testHelmWithEmptyRows);
|
|
26
|
-
await _testSequenceSpaceReturnsResult(testHelmWithEmptyRows,
|
|
27
|
+
await _testSequenceSpaceReturnsResult(testHelmWithEmptyRows, DimReductionMethods.UMAP, 'MSA');
|
|
27
28
|
grok.shell.closeTable(testHelmWithEmptyRows);
|
|
28
29
|
testHelmWithEmptyRowsTableView.close();
|
|
29
30
|
}, {skipReason: 'GROK-12775'});
|
|
@@ -2,14 +2,16 @@ import * as DG from 'datagrok-api/dg';
|
|
|
2
2
|
import * as grok from 'datagrok-api/grok';
|
|
3
3
|
import {expect} from '@datagrok-libraries/utils/src/test';
|
|
4
4
|
import {sequenceSpaceTopMenu} from '../package';
|
|
5
|
+
import { MmDistanceFunctionsNames } from '@datagrok-libraries/ml/src/macromolecule-distance-functions';
|
|
6
|
+
import { DimReductionMethods } from '@datagrok-libraries/ml/src/reduce-dimensionality';
|
|
5
7
|
|
|
6
|
-
export async function _testSequenceSpaceReturnsResult(df: DG.DataFrame, algorithm:
|
|
8
|
+
export async function _testSequenceSpaceReturnsResult(df: DG.DataFrame, algorithm: DimReductionMethods, colName: string) {
|
|
7
9
|
// await grok.data.detectSemanticTypes(df);
|
|
8
10
|
const col: DG.Column = df.getCol(colName);
|
|
9
11
|
const semType: string = await grok.functions.call('Bio:detectMacromolecule', {col: col});
|
|
10
12
|
if (semType)
|
|
11
13
|
col.semType = semType;
|
|
12
14
|
|
|
13
|
-
const sp = await sequenceSpaceTopMenu(df, df.col(colName)!, algorithm,
|
|
15
|
+
const sp = await sequenceSpaceTopMenu(df, df.col(colName)!, algorithm, MmDistanceFunctionsNames.LEVENSHTEIN, true);
|
|
14
16
|
expect(sp != null, true);
|
|
15
17
|
}
|