@datagrok/bio 2.11.3 → 2.11.6
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +9 -0
- package/detectors.js +99 -48
- package/dist/196.js +1 -1
- package/dist/196.js.map +1 -1
- package/dist/361.js +1 -1
- package/dist/361.js.map +1 -1
- package/dist/381.js +1 -1
- package/dist/381.js.map +1 -1
- package/dist/770.js +1 -1
- package/dist/770.js.map +1 -1
- package/dist/79.js.map +1 -1
- package/dist/868.js +1 -1
- package/dist/868.js.map +1 -1
- package/dist/package-test.js +1 -1
- package/dist/package-test.js.map +1 -1
- package/dist/package.js +1 -1
- package/dist/package.js.map +1 -1
- package/package.json +3 -3
- package/src/analysis/sequence-space.ts +34 -12
- package/src/demo/bio01b-hierarchical-clustering-and-activity-cliffs.ts +2 -1
- package/src/package.ts +51 -29
- package/src/tests/activity-cliffs-tests.ts +5 -3
- package/src/tests/activity-cliffs-utils.ts +5 -2
- package/src/tests/converters-test.ts +72 -72
- package/src/tests/detectors-benchmark-tests.ts +2 -2
- package/src/tests/detectors-tests.ts +36 -36
- package/src/tests/detectors-weak-and-likely-tests.ts +24 -24
- package/src/tests/mm-distance-tests.ts +10 -9
- package/src/tests/units-handler-splitted-tests.ts +33 -36
- package/src/tests/units-handler-tests.ts +9 -9
- package/src/utils/detect-macromolecule-probe.ts +44 -0
- package/src/utils/monomer-lib.ts +4 -9
package/package.json
CHANGED
|
@@ -5,7 +5,7 @@
|
|
|
5
5
|
"name": "Leonid Stolbov",
|
|
6
6
|
"email": "lstolbov@datagrok.ai"
|
|
7
7
|
},
|
|
8
|
-
"version": "2.11.
|
|
8
|
+
"version": "2.11.6",
|
|
9
9
|
"description": "Bioinformatics support (import/export of sequences, conversion, visualization, analysis). [See more](https://github.com/datagrok-ai/public/blob/master/packages/Bio/README.md) for details.",
|
|
10
10
|
"repository": {
|
|
11
11
|
"type": "git",
|
|
@@ -34,9 +34,9 @@
|
|
|
34
34
|
],
|
|
35
35
|
"dependencies": {
|
|
36
36
|
"@biowasm/aioli": "^3.1.0",
|
|
37
|
-
"@datagrok-libraries/bio": "^5.39.
|
|
37
|
+
"@datagrok-libraries/bio": "^5.39.1",
|
|
38
38
|
"@datagrok-libraries/chem-meta": "^1.0.1",
|
|
39
|
-
"@datagrok-libraries/ml": "^6.3.
|
|
39
|
+
"@datagrok-libraries/ml": "^6.3.53",
|
|
40
40
|
"@datagrok-libraries/tutorials": "^1.3.6",
|
|
41
41
|
"@datagrok-libraries/utils": "^4.0.17",
|
|
42
42
|
"cash-dom": "^8.0.0",
|
|
@@ -55,21 +55,19 @@ export async function sequenceSpaceByFingerprints(spaceParams: ISequenceSpacePar
|
|
|
55
55
|
return result;
|
|
56
56
|
}
|
|
57
57
|
|
|
58
|
-
export async function
|
|
59
|
-
|
|
60
|
-
): Promise<
|
|
61
|
-
|
|
62
|
-
|
|
63
|
-
|
|
64
|
-
const seqList = spaceParams.seqCol.toList();
|
|
65
|
-
|
|
58
|
+
export async function getEncodedSeqSpaceCol(
|
|
59
|
+
seqCol: DG.Column, similarityMetric: BitArrayMetrics | MmDistanceFunctionsNames
|
|
60
|
+
): Promise<{seqList:string[], options: {[_:string]: any}}> {
|
|
61
|
+
// encodes sequences using utf charachters to also support multichar and non fasta sequences
|
|
62
|
+
const ncUH = UnitsHandler.getOrCreate(seqCol);
|
|
63
|
+
const seqList = seqCol.toList();
|
|
66
64
|
const splitter = ncUH.getSplitter();
|
|
67
65
|
const seqColLength = seqList.length;
|
|
68
66
|
let charCodeCounter = 36;
|
|
69
67
|
const charCodeMap = new Map<string, string>();
|
|
70
68
|
for (let i = 0; i < seqColLength; i++) {
|
|
71
69
|
const seq = seqList[i];
|
|
72
|
-
if (seqList[i] === null ||
|
|
70
|
+
if (seqList[i] === null || seqCol.isNone(i)) {
|
|
73
71
|
seqList[i] = null;
|
|
74
72
|
continue;
|
|
75
73
|
}
|
|
@@ -84,8 +82,8 @@ export async function getSequenceSpace(spaceParams: ISequenceSpaceParams,
|
|
|
84
82
|
seqList[i] += charCodeMap.get(char)!;
|
|
85
83
|
}
|
|
86
84
|
}
|
|
87
|
-
|
|
88
|
-
if (
|
|
85
|
+
let options = {};
|
|
86
|
+
if (similarityMetric === MmDistanceFunctionsNames.MONOMER_CHEMICAL_DISTANCE) {
|
|
89
87
|
const monomers = Array.from(charCodeMap.keys());
|
|
90
88
|
const monomerRes = await calculateMonomerSimilarity(monomers);
|
|
91
89
|
// the susbstitution matrix contains similarity, but we need distances
|
|
@@ -98,10 +96,34 @@ export async function getSequenceSpace(spaceParams: ISequenceSpaceParams,
|
|
|
98
96
|
Object.entries(monomerRes.alphabetIndexes).forEach(([key, value]) => {
|
|
99
97
|
monomerHashToMatrixMap[charCodeMap.get(key)!] = value;
|
|
100
98
|
});
|
|
101
|
-
|
|
99
|
+
// sets distance function args in place.
|
|
100
|
+
options = {scoringMatrix: monomerRes.scoringMatrix,
|
|
102
101
|
alphabetIndexes: monomerHashToMatrixMap} satisfies mmDistanceFunctionArgs;
|
|
103
102
|
}
|
|
103
|
+
// else if (similarityMetric === MmDistanceFunctionsNames.NEEDLEMANN_WUNSCH) {
|
|
104
|
+
// const alphabetIndexes: any = {};
|
|
105
|
+
// let i = 0;
|
|
106
|
+
// charCodeMap.forEach((value) => {
|
|
107
|
+
// alphabetIndexes[value] = i;
|
|
108
|
+
// i++;
|
|
109
|
+
// });
|
|
110
|
+
// options = {alphabetIndexes};
|
|
111
|
+
// }
|
|
112
|
+
return {seqList, options};
|
|
113
|
+
}
|
|
114
|
+
|
|
115
|
+
export async function getSequenceSpace(spaceParams: ISequenceSpaceParams,
|
|
116
|
+
progressFunc?: (epochNum: number, epochsLength: number, embedding: number[][]) => void
|
|
117
|
+
): Promise<ISequenceSpaceResult> {
|
|
118
|
+
const ncUH = UnitsHandler.getOrCreate(spaceParams.seqCol);
|
|
119
|
+
if (ncUH.isHelm())
|
|
120
|
+
return await sequenceSpaceByFingerprints(spaceParams);
|
|
121
|
+
|
|
122
|
+
|
|
123
|
+
const {seqList, options} = await getEncodedSeqSpaceCol(spaceParams.seqCol, spaceParams.similarityMetric);
|
|
104
124
|
|
|
125
|
+
spaceParams.options = spaceParams.options ?? {};
|
|
126
|
+
spaceParams.options.distanceFnArgs = options;
|
|
105
127
|
const sequenceSpaceResult = await reduceDimensinalityWithNormalization(
|
|
106
128
|
seqList,
|
|
107
129
|
spaceParams.methodName,
|
|
@@ -13,6 +13,7 @@ import {getDendrogramService, IDendrogramService} from '@datagrok-libraries/bio/
|
|
|
13
13
|
import {handleError} from './utils';
|
|
14
14
|
import {DemoScript} from '@datagrok-libraries/tutorials/src/demo-script';
|
|
15
15
|
import {DimReductionMethods} from '@datagrok-libraries/ml/src/reduce-dimensionality';
|
|
16
|
+
import {MmDistanceFunctionsNames} from '@datagrok-libraries/ml/src/macromolecule-distance-functions';
|
|
16
17
|
|
|
17
18
|
const dataFn: string = 'data/sample_FASTA_PT_activity.csv';
|
|
18
19
|
|
|
@@ -53,7 +54,7 @@ export async function demoBio01bUI() {
|
|
|
53
54
|
.step('Find activity cliffs', async () => {
|
|
54
55
|
activityCliffsViewer = (await activityCliffs(
|
|
55
56
|
df, df.getCol('Sequence'), df.getCol('Activity'),
|
|
56
|
-
80, dimRedMethod)) as DG.ScatterPlotViewer;
|
|
57
|
+
80, dimRedMethod, MmDistanceFunctionsNames.LEVENSHTEIN)) as DG.ScatterPlotViewer;
|
|
57
58
|
view.dockManager.dock(activityCliffsViewer, DG.DOCK_TYPE.RIGHT, null, 'Activity Cliffs', 0.35);
|
|
58
59
|
|
|
59
60
|
// Show grid viewer with the cliffs
|
package/src/package.ts
CHANGED
|
@@ -11,10 +11,10 @@ import {DimReductionMethods, ITSNEOptions, IUMAPOptions} from '@datagrok-librari
|
|
|
11
11
|
import {SequenceSpaceFunctionEditor} from '@datagrok-libraries/ml/src/functionEditors/seq-space-editor';
|
|
12
12
|
import {ActivityCliffsFunctionEditor} from '@datagrok-libraries/ml/src/functionEditors/activity-cliffs-editor';
|
|
13
13
|
import {
|
|
14
|
-
ISequenceSpaceParams, getActivityCliffs, SequenceSpaceFunc
|
|
14
|
+
ISequenceSpaceParams, getActivityCliffs, SequenceSpaceFunc, CLIFFS_COL_ENCODE_FN
|
|
15
15
|
} from '@datagrok-libraries/ml/src/viewers/activity-cliffs';
|
|
16
16
|
import {MmDistanceFunctionsNames} from '@datagrok-libraries/ml/src/macromolecule-distance-functions';
|
|
17
|
-
import {BitArrayMetrics
|
|
17
|
+
import {BitArrayMetrics} from '@datagrok-libraries/ml/src/typed-metrics';
|
|
18
18
|
import {
|
|
19
19
|
TAGS as bioTAGS, ALPHABET, NOTATION,
|
|
20
20
|
} from '@datagrok-libraries/bio/src/utils/macromolecule';
|
|
@@ -35,14 +35,14 @@ import {
|
|
|
35
35
|
import {VdRegionsViewer} from './viewers/vd-regions-viewer';
|
|
36
36
|
import {SequenceAlignment} from './seq_align';
|
|
37
37
|
import {
|
|
38
|
-
ISequenceSpaceResult, getEmbeddingColsNames, getSequenceSpace, sequenceSpaceByFingerprints
|
|
38
|
+
ISequenceSpaceResult, getEmbeddingColsNames, getEncodedSeqSpaceCol, getSequenceSpace, sequenceSpaceByFingerprints
|
|
39
39
|
} from './analysis/sequence-space';
|
|
40
40
|
import {
|
|
41
41
|
createLinesGrid, createPropPanelElement, createTooltipElement, getChemSimilaritiesMatrix,
|
|
42
42
|
} from './analysis/sequence-activity-cliffs';
|
|
43
43
|
import {SequenceSimilarityViewer} from './analysis/sequence-similarity-viewer';
|
|
44
44
|
import {SequenceDiversityViewer} from './analysis/sequence-diversity-viewer';
|
|
45
|
-
import {SubstructureSearchDialog} from './substructure-search/substructure-search';
|
|
45
|
+
import {MONOMERIC_COL_TAGS, SubstructureSearchDialog, invalidateMols} from './substructure-search/substructure-search';
|
|
46
46
|
import {convert} from './utils/convert';
|
|
47
47
|
import {getMacromoleculeColumnPropertyPanel} from './widgets/representations';
|
|
48
48
|
import {saveAsFastaUI} from './utils/save-as-fasta';
|
|
@@ -75,10 +75,12 @@ import {GetRegionApp} from './apps/get-region-app';
|
|
|
75
75
|
import {GetRegionFuncEditor} from './utils/get-region-func-editor';
|
|
76
76
|
import {sequenceToMolfile} from './utils/sequence-to-mol';
|
|
77
77
|
import {errInfo} from './utils/err-info';
|
|
78
|
+
import {detectMacromoleculeProbeDo} from './utils/detect-macromolecule-probe';
|
|
78
79
|
|
|
79
80
|
import {SHOW_SCATTERPLOT_PROGRESS} from '@datagrok-libraries/ml/src/functionEditors/seq-space-base-editor';
|
|
80
81
|
import {DIMENSIONALITY_REDUCER_TERMINATE_EVENT}
|
|
81
82
|
from '@datagrok-libraries/ml/src/workers/dimensionality-reducing-worker-creator';
|
|
83
|
+
import BitArray from '@datagrok-libraries/utils/src/bit-array';
|
|
82
84
|
|
|
83
85
|
export const _package = new BioPackage();
|
|
84
86
|
|
|
@@ -389,12 +391,14 @@ export async function getRegionTopMenu(
|
|
|
389
391
|
//input: column activities
|
|
390
392
|
//input: double similarity = 80 [Similarity cutoff]
|
|
391
393
|
//input: string methodName { choices:["UMAP", "t-SNE"] }
|
|
394
|
+
//input: string similarityMetric { choices:["Hamming", "Levenshtein", "Monomer chemical distance"] }
|
|
392
395
|
//input: object options {optional: true}
|
|
393
396
|
//output: viewer result
|
|
394
397
|
//editor: Bio:SeqActivityCliffsEditor
|
|
395
398
|
export async function activityCliffs(df: DG.DataFrame, macroMolecule: DG.Column<string>, activities: DG.Column,
|
|
396
|
-
similarity: number, methodName: DimReductionMethods,
|
|
397
|
-
|
|
399
|
+
similarity: number, methodName: DimReductionMethods,
|
|
400
|
+
similarityMetric: MmDistanceFunctionsNames | BitArrayMetrics,
|
|
401
|
+
options?: (IUMAPOptions | ITSNEOptions) & Options): Promise<DG.Viewer | undefined> {
|
|
398
402
|
if (!checkInputColumnUI(macroMolecule, 'Activity Cliffs'))
|
|
399
403
|
return;
|
|
400
404
|
const axesNames = getEmbeddingColsNames(df);
|
|
@@ -404,21 +408,26 @@ export async function activityCliffs(df: DG.DataFrame, macroMolecule: DG.Column<
|
|
|
404
408
|
'separator': macroMolecule.getTag(bioTAGS.separator),
|
|
405
409
|
'alphabet': macroMolecule.getTag(bioTAGS.alphabet),
|
|
406
410
|
};
|
|
411
|
+
let cliffsEncodeFunction: (seqCol: DG.Column, similarityMetric: MmDistanceFunctionsNames | BitArrayMetrics) => any =
|
|
412
|
+
getEncodedSeqSpaceCol;
|
|
407
413
|
const ncUH = UnitsHandler.getOrCreate(macroMolecule);
|
|
408
|
-
|
|
409
|
-
|
|
410
|
-
|
|
411
|
-
|
|
412
|
-
|
|
413
|
-
|
|
414
|
-
|
|
415
|
-
seqCol
|
|
416
|
-
const
|
|
417
|
-
|
|
418
|
-
|
|
419
|
-
|
|
420
|
-
|
|
414
|
+
const columnDistanceMetric: MmDistanceFunctionsNames | BitArrayMetrics = similarityMetric;
|
|
415
|
+
const seqCol = macroMolecule;
|
|
416
|
+
|
|
417
|
+
let sequenceSpaceFunc: SequenceSpaceFunc = getSequenceSpace;
|
|
418
|
+
if (ncUH.isHelm()) {
|
|
419
|
+
sequenceSpaceFunc = sequenceSpaceByFingerprints;
|
|
420
|
+
cliffsEncodeFunction = async (seqCol: DG.Column, similarityMetric: MmDistanceFunctionsNames | BitArrayMetrics) => {
|
|
421
|
+
await invalidateMols(seqCol, false);
|
|
422
|
+
const molecularCol = seqCol.temp[MONOMERIC_COL_TAGS.MONOMERIC_MOLS];
|
|
423
|
+
const fingerPrints: DG.Column =
|
|
424
|
+
await grok.functions.call('Chem:getMorganFingerprints', {molColumn: molecularCol});
|
|
425
|
+
const fingerPrintsBitArray = fingerPrints.toList().map((f: DG.BitSet) =>
|
|
426
|
+
BitArray.fromUint32Array(f.length, new Uint32Array(f.getBuffer().buffer)));
|
|
427
|
+
return {seqList: fingerPrintsBitArray, options: {}};
|
|
428
|
+
};
|
|
421
429
|
}
|
|
430
|
+
|
|
422
431
|
const runCliffs = async () => {
|
|
423
432
|
const sp = await getActivityCliffs(
|
|
424
433
|
df,
|
|
@@ -437,25 +446,26 @@ export async function activityCliffs(df: DG.DataFrame, macroMolecule: DG.Column<
|
|
|
437
446
|
createTooltipElement,
|
|
438
447
|
createPropPanelElement,
|
|
439
448
|
createLinesGrid,
|
|
440
|
-
options);
|
|
449
|
+
{...(options ?? {}), [CLIFFS_COL_ENCODE_FN]: cliffsEncodeFunction});
|
|
441
450
|
return sp;
|
|
442
451
|
};
|
|
443
452
|
|
|
444
|
-
const allowedRowCount =
|
|
445
|
-
const fastRowCount = methodName === DimReductionMethods.UMAP ?
|
|
453
|
+
const allowedRowCount = methodName === DimReductionMethods.UMAP ? 200_000 : 20_000;
|
|
454
|
+
const fastRowCount = methodName === DimReductionMethods.UMAP ? 5_000 : 2_000;
|
|
446
455
|
if (df.rowCount > allowedRowCount) {
|
|
447
456
|
grok.shell.warning(`Too many rows, maximum for sequence activity cliffs is ${allowedRowCount}`);
|
|
448
457
|
return;
|
|
449
458
|
}
|
|
450
459
|
|
|
451
|
-
|
|
460
|
+
const pi = DG.TaskBarProgressIndicator.create(`Running sequence activity cliffs ...`);
|
|
461
|
+
return new Promise<DG.Viewer | undefined>((resolve, reject) => {
|
|
452
462
|
if (df.rowCount > fastRowCount && !options?.[BYPASS_LARGE_DATA_WARNING]) {
|
|
453
463
|
ui.dialog().add(ui.divText(`Activity cliffs analysis might take several minutes.
|
|
454
464
|
Do you want to continue?`))
|
|
455
465
|
.onOK(async () => {
|
|
456
|
-
|
|
457
|
-
runCliffs().then((res) => resolve(res)).catch((err) => reject(err)).finally(() => { progressBar.close(); });
|
|
466
|
+
runCliffs().then((res) => resolve(res)).catch((err) => reject(err));
|
|
458
467
|
})
|
|
468
|
+
.onCancel(() => { resolve(undefined); })
|
|
459
469
|
.show();
|
|
460
470
|
} else {
|
|
461
471
|
runCliffs().then((res) => resolve(res)).catch((err) => reject(err));
|
|
@@ -464,7 +474,7 @@ export async function activityCliffs(df: DG.DataFrame, macroMolecule: DG.Column<
|
|
|
464
474
|
const [errMsg, errStack] = errInfo(err);
|
|
465
475
|
_package.logger.error(errMsg, undefined, errStack);
|
|
466
476
|
throw err;
|
|
467
|
-
});
|
|
477
|
+
}).finally(() => { pi.close(); });
|
|
468
478
|
}
|
|
469
479
|
|
|
470
480
|
//top-menu: Bio | Analyze | Sequence Space...
|
|
@@ -516,6 +526,7 @@ export async function sequenceSpaceTopMenu(
|
|
|
516
526
|
const progress = (_nEpoch / epochsLength * 100);
|
|
517
527
|
pg.update(progress, `Running sequence space ... ${progress.toFixed(0)}%`);
|
|
518
528
|
}
|
|
529
|
+
|
|
519
530
|
const embedColsNames = getEmbeddingColsNames(table);
|
|
520
531
|
const withoutEmptyValues = DG.DataFrame.fromColumns([macroMolecule]).clone();
|
|
521
532
|
const emptyValsIdxs = removeEmptyStringRows(withoutEmptyValues, macroMolecule);
|
|
@@ -525,7 +536,7 @@ export async function sequenceSpaceTopMenu(
|
|
|
525
536
|
methodName: methodName,
|
|
526
537
|
similarityMetric: similarityMetric,
|
|
527
538
|
embedAxesNames: embedColsNames,
|
|
528
|
-
options: {...options, sparseMatrixThreshold: sparseMatrixThreshold ?? 0.
|
|
539
|
+
options: {...options, sparseMatrixThreshold: sparseMatrixThreshold ?? 0.5,
|
|
529
540
|
usingSparseMatrix: table.rowCount > 20000},
|
|
530
541
|
};
|
|
531
542
|
|
|
@@ -1084,10 +1095,10 @@ export async function demoBioHelmMsaSequenceSpace(): Promise<void> {
|
|
|
1084
1095
|
await demoBio05UI();
|
|
1085
1096
|
}
|
|
1086
1097
|
|
|
1087
|
-
//name:
|
|
1098
|
+
//name: polyToolColumnChoice
|
|
1088
1099
|
//input: dataframe df [Input data table]
|
|
1089
1100
|
//input: column macroMolecule
|
|
1090
|
-
export async function
|
|
1101
|
+
export async function polyToolColumnChoice(df: DG.DataFrame, macroMolecule: DG.Column): Promise<void> {
|
|
1091
1102
|
_setPeptideColumn(macroMolecule);
|
|
1092
1103
|
await grok.data.detectSemanticTypes(df);
|
|
1093
1104
|
}
|
|
@@ -1099,3 +1110,14 @@ export async function sdfToJsonLib(table: DG.DataFrame) {
|
|
|
1099
1110
|
const jsonMonomerLibrary = JSON.stringify(_jsonMonomerLibrary);
|
|
1100
1111
|
DG.Utils.download(`${table.name}.json`, jsonMonomerLibrary);
|
|
1101
1112
|
}
|
|
1113
|
+
|
|
1114
|
+
// -- Utils --
|
|
1115
|
+
|
|
1116
|
+
//name: detectMacromoleculeProbe
|
|
1117
|
+
//input: file file
|
|
1118
|
+
//input: string colName = ''
|
|
1119
|
+
//input: int probeCount = 100
|
|
1120
|
+
export async function detectMacromoleculeProbe(file: DG.FileInfo, colName: string, probeCount: number): Promise<void> {
|
|
1121
|
+
const csv: string = await file.readAsString();
|
|
1122
|
+
await detectMacromoleculeProbeDo(csv, colName, probeCount);
|
|
1123
|
+
}
|
|
@@ -8,6 +8,8 @@ import {_testActivityCliffsOpen} from './activity-cliffs-utils';
|
|
|
8
8
|
import {DimReductionMethods} from '@datagrok-libraries/ml/src/reduce-dimensionality';
|
|
9
9
|
|
|
10
10
|
import {_package} from '../package-test';
|
|
11
|
+
import {MmDistanceFunctionsNames} from '@datagrok-libraries/ml/src/macromolecule-distance-functions';
|
|
12
|
+
import {BitArrayMetricsNames} from '@datagrok-libraries/ml/src/typed-metrics';
|
|
11
13
|
|
|
12
14
|
|
|
13
15
|
category('activityCliffs', async () => {
|
|
@@ -39,7 +41,7 @@ category('activityCliffs', async () => {
|
|
|
39
41
|
const cliffsNum = DG.Test.isInBenchmark ? 6 : 3;
|
|
40
42
|
|
|
41
43
|
await _testActivityCliffsOpen(actCliffsDf, DimReductionMethods.UMAP,
|
|
42
|
-
'sequence', 'Activity', 90, cliffsNum);
|
|
44
|
+
'sequence', 'Activity', 90, cliffsNum, MmDistanceFunctionsNames.LEVENSHTEIN);
|
|
43
45
|
});
|
|
44
46
|
|
|
45
47
|
test('activityCliffsWithEmptyRows', async () => {
|
|
@@ -49,7 +51,7 @@ category('activityCliffs', async () => {
|
|
|
49
51
|
viewList.push(actCliffsTableViewWithEmptyRows);
|
|
50
52
|
|
|
51
53
|
await _testActivityCliffsOpen(actCliffsDfWithEmptyRows, DimReductionMethods.UMAP,
|
|
52
|
-
'sequence', 'Activity', 90, 3);
|
|
54
|
+
'sequence', 'Activity', 90, 3, MmDistanceFunctionsNames.LEVENSHTEIN);
|
|
53
55
|
});
|
|
54
56
|
|
|
55
57
|
test('Helm', async () => {
|
|
@@ -57,6 +59,6 @@ category('activityCliffs', async () => {
|
|
|
57
59
|
const view = grok.shell.addTableView(df);
|
|
58
60
|
|
|
59
61
|
await _testActivityCliffsOpen(df, DimReductionMethods.UMAP,
|
|
60
|
-
'HELM', 'Activity', 90, 53);
|
|
62
|
+
'HELM', 'Activity', 90, 53, BitArrayMetricsNames.Tanimoto);
|
|
61
63
|
});
|
|
62
64
|
});
|
|
@@ -4,14 +4,17 @@ import * as grok from 'datagrok-api/grok';
|
|
|
4
4
|
import {expect} from '@datagrok-libraries/utils/src/test';
|
|
5
5
|
import {activityCliffs, BYPASS_LARGE_DATA_WARNING} from '../package';
|
|
6
6
|
import {DimReductionMethods} from '@datagrok-libraries/ml/src/reduce-dimensionality';
|
|
7
|
+
import {MmDistanceFunctionsNames} from '@datagrok-libraries/ml/src/macromolecule-distance-functions';
|
|
8
|
+
import {BitArrayMetrics} from '@datagrok-libraries/ml/src/typed-metrics';
|
|
7
9
|
|
|
8
10
|
export async function _testActivityCliffsOpen(df: DG.DataFrame, drMethod: DimReductionMethods,
|
|
9
|
-
seqColName: string, activityColName: string, similarityThr: number, tgtNumberCliffs: number
|
|
11
|
+
seqColName: string, activityColName: string, similarityThr: number, tgtNumberCliffs: number,
|
|
12
|
+
similarityMetric: MmDistanceFunctionsNames | BitArrayMetrics
|
|
10
13
|
): Promise<void> {
|
|
11
14
|
await grok.data.detectSemanticTypes(df);
|
|
12
15
|
const scatterPlot = await activityCliffs(
|
|
13
16
|
df, df.getCol(seqColName), df.getCol(activityColName),
|
|
14
|
-
similarityThr, drMethod, {[`${BYPASS_LARGE_DATA_WARNING}`]: true});
|
|
17
|
+
similarityThr, drMethod, similarityMetric, {[`${BYPASS_LARGE_DATA_WARNING}`]: true});
|
|
15
18
|
// const scatterPlot = (await grok.functions.call('Bio:activityCliffs', {
|
|
16
19
|
// table: df, molecules: df.getCol(colName), activities: df.getCol('Activity'),
|
|
17
20
|
// similarity: 50, methodName: method
|
|
@@ -41,79 +41,79 @@ category('converters', () => {
|
|
|
41
41
|
}
|
|
42
42
|
|
|
43
43
|
const _csvTxts: { [key: string]: string } = {
|
|
44
|
-
fastaPt: `seq
|
|
45
|
-
|
|
46
|
-
|
|
47
|
-
|
|
48
|
-
separatorPt: `seq
|
|
49
|
-
F-W-P-H-E-Y
|
|
50
|
-
Y-N-R-Q-W-Y-V
|
|
51
|
-
M-K-P-S-E-Y-V`,
|
|
52
|
-
helmPt: `seq
|
|
53
|
-
PEPTIDE1{F.W.P.H.E.Y}$$$$
|
|
54
|
-
PEPTIDE1{Y.N.R.Q.W.Y.V}$$$$
|
|
55
|
-
PEPTIDE1{M.K.P.S.E.Y.V}$$$$`,
|
|
56
|
-
fastaDna: `seq
|
|
57
|
-
|
|
58
|
-
|
|
59
|
-
|
|
60
|
-
separatorDna: `seq
|
|
61
|
-
A/C/G/T/C
|
|
62
|
-
C/A/G/T/G/T
|
|
63
|
-
T/T/C/A/A/C`,
|
|
64
|
-
helmDna: `seq
|
|
65
|
-
RNA1{d(A)p.d(C)p.d(G)p.d(T)p.d(C)p}$$$$
|
|
66
|
-
RNA1{d(C)p.d(A)p.d(G)p.d(T)p.d(G)p.d(T)p}$$$$
|
|
67
|
-
RNA1{d(T)p.d(T)p.d(C)p.d(A)p.d(A)p.d(C)p}$$$$`,
|
|
68
|
-
fastaRna: `seq
|
|
69
|
-
|
|
70
|
-
|
|
71
|
-
|
|
72
|
-
separatorRna: `seq
|
|
73
|
-
A*C*G*U*C
|
|
74
|
-
C*A*G*U*G*U
|
|
75
|
-
U*U*C*A*A*C`,
|
|
76
|
-
helmRna: `seq
|
|
77
|
-
RNA1{r(A)p.r(C)p.r(G)p.r(U)p.r(C)p}$$$$
|
|
78
|
-
RNA1{r(C)p.r(A)p.r(G)p.r(U)p.r(G)p.r(U)p}$$$$
|
|
79
|
-
RNA1{r(U)p.r(U)p.r(C)p.r(A)p.r(A)p.r(C)p}$$$$`,
|
|
80
|
-
fastaGaps: `seq
|
|
81
|
-
FW-PH-EYY
|
|
82
|
-
FYNRQWYV-
|
|
83
|
-
FKP-Q-SEYV`,
|
|
84
|
-
separatorGaps: `seq
|
|
85
|
-
F/W//P/H//E/Y/Y
|
|
86
|
-
F/Y/N/R/Q/W/Y/V/
|
|
87
|
-
F/K/P//Q//S/E/Y/V`,
|
|
88
|
-
helmGaps: `seq
|
|
89
|
-
PEPTIDE1{F.W.*.P.H.*.E.Y.Y}$$$$
|
|
90
|
-
PEPTIDE1{F.Y.N.R.Q.W.Y.V.*}$$$$
|
|
91
|
-
PEPTIDE1{F.K.P.*.Q.*.S.E.Y.V}$$$$`,
|
|
44
|
+
[Samples.fastaPt]: `seq
|
|
45
|
+
FWPHEYFWPHEY
|
|
46
|
+
YNRQWYVYNRQWYV
|
|
47
|
+
MKPSEYVMKPSEYV`,
|
|
48
|
+
[Samples.separatorPt]: `seq
|
|
49
|
+
F-W-P-H-E-Y-F-W-P-H-E-Y
|
|
50
|
+
Y-N-R-Q-W-Y-V-Y-N-R-Q-W-Y-V
|
|
51
|
+
M-K-P-S-E-Y-V-M-K-P-S-E-Y-V`,
|
|
52
|
+
[Samples.helmPt]: `seq
|
|
53
|
+
PEPTIDE1{F.W.P.H.E.Y.F.W.P.H.E.Y}$$$$
|
|
54
|
+
PEPTIDE1{Y.N.R.Q.W.Y.V.Y.N.R.Q.W.Y.V}$$$$
|
|
55
|
+
PEPTIDE1{M.K.P.S.E.Y.V.M.K.P.S.E.Y.V}$$$$`,
|
|
56
|
+
[Samples.fastaDna]: `seq
|
|
57
|
+
ACGTCACGTC
|
|
58
|
+
CAGTGTCAGTGT
|
|
59
|
+
TTCAACTTCAAC`,
|
|
60
|
+
[Samples.separatorDna]: `seq
|
|
61
|
+
A/C/G/T/C/A/C/G/T/C
|
|
62
|
+
C/A/G/T/G/T/C/A/G/T/G/T
|
|
63
|
+
T/T/C/A/A/C/T/T/C/A/A/C`,
|
|
64
|
+
[Samples.helmDna]: `seq
|
|
65
|
+
RNA1{d(A)p.d(C)p.d(G)p.d(T)p.d(C)p.d(A)p.d(C)p.d(G)p.d(T)p.d(C)p}$$$$
|
|
66
|
+
RNA1{d(C)p.d(A)p.d(G)p.d(T)p.d(G)p.d(T)p.d(C)p.d(A)p.d(G)p.d(T)p.d(G)p.d(T)p}$$$$
|
|
67
|
+
RNA1{d(T)p.d(T)p.d(C)p.d(A)p.d(A)p.d(C)p.d(T)p.d(T)p.d(C)p.d(A)p.d(A)p.d(C)p}$$$$`,
|
|
68
|
+
[Samples.fastaRna]: `seq
|
|
69
|
+
ACGUCACGUC
|
|
70
|
+
CAGUGUCAGUGU
|
|
71
|
+
UUCAACUUCAAC`,
|
|
72
|
+
[Samples.separatorRna]: `seq
|
|
73
|
+
A*C*G*U*C*A*C*G*U*C
|
|
74
|
+
C*A*G*U*G*U*C*A*G*U*G*U
|
|
75
|
+
U*U*C*A*A*C*U*U*C*A*A*C`,
|
|
76
|
+
[Samples.helmRna]: `seq
|
|
77
|
+
RNA1{r(A)p.r(C)p.r(G)p.r(U)p.r(C)p.r(A)p.r(C)p.r(G)p.r(U)p.r(C)p}$$$$
|
|
78
|
+
RNA1{r(C)p.r(A)p.r(G)p.r(U)p.r(G)p.r(U)p.r(C)p.r(A)p.r(G)p.r(U)p.r(G)p.r(U)p}$$$$
|
|
79
|
+
RNA1{r(U)p.r(U)p.r(C)p.r(A)p.r(A)p.r(C)p.r(U)p.r(U)p.r(C)p.r(A)p.r(A)p.r(C)p}$$$$`,
|
|
80
|
+
[Samples.fastaGaps]: `seq
|
|
81
|
+
FW-PH-EYYFW-PH-EYY
|
|
82
|
+
FYNRQWYV-FYNRQWYV-
|
|
83
|
+
FKP-Q-SEYVFKP-Q-SEYV`,
|
|
84
|
+
[Samples.separatorGaps]: `seq
|
|
85
|
+
F/W//P/H//E/Y/Y/F/W//P/H//E/Y/Y
|
|
86
|
+
F/Y/N/R/Q/W/Y/V//F/Y/N/R/Q/W/Y/V/
|
|
87
|
+
F/K/P//Q//S/E/Y/V/F/K/P//Q//S/E/Y/V`,
|
|
88
|
+
[Samples.helmGaps]: `seq
|
|
89
|
+
PEPTIDE1{F.W.*.P.H.*.E.Y.Y.F.W.*.P.H.*.E.Y.Y}$$$$
|
|
90
|
+
PEPTIDE1{F.Y.N.R.Q.W.Y.V.*.F.Y.N.R.Q.W.Y.V.*}$$$$
|
|
91
|
+
PEPTIDE1{F.K.P.*.Q.*.S.E.Y.V.F.K.P.*.Q.*.S.E.Y.V}$$$$`,
|
|
92
92
|
|
|
93
|
-
fastaUn: `seq
|
|
94
|
-
[meI][hHis][Aca]NT[dE][Thr_PO3H2][Aca]D
|
|
95
|
-
[meI][hHis][Aca][Cys_SEt]T[dK][Thr_PO3H2][Aca][Tyr_PO3H2]
|
|
96
|
-
[Lys_Boc][hHis][Aca][Cys_SEt]T[dK][Thr_PO3H2][Aca][Tyr_PO3H2]`,
|
|
97
|
-
separatorUn: `seq
|
|
98
|
-
meI-hHis-Aca-N-T-dE-Thr_PO3H2-Aca-D
|
|
99
|
-
meI-hHis-Aca-Cys_SEt-T-dK-Thr_PO3H2-Aca-Tyr_PO3H2
|
|
100
|
-
Lys_Boc-hHis-Aca-Cys_SEt-T-dK-Thr_PO3H2-Aca-Tyr_PO3H2`,
|
|
101
|
-
helmUn: `seq
|
|
102
|
-
PEPTIDE1{meI.hHis.Aca.N.T.dE.Thr_PO3H2.Aca.D}$$$$
|
|
103
|
-
PEPTIDE1{meI.hHis.Aca.Cys_SEt.T.dK.Thr_PO3H2.Aca.Tyr_PO3H2}$$$$
|
|
104
|
-
PEPTIDE1{Lys_Boc.hHis.Aca.Cys_SEt.T.dK.Thr_PO3H2.Aca.Tyr_PO3H2}$$$$`,
|
|
105
|
-
helmLoneDeoxyribose: `seq
|
|
106
|
-
RNA1{d(A).d(C).d(G).d(T).d(C)}$$$$
|
|
107
|
-
RNA1{d(C).d(A).d(G).d(T).d(G).d(T)p}$$$$
|
|
108
|
-
RNA1{d(T).d(T).d(C).d(A).d(A).d(C)p}$$$$`,
|
|
109
|
-
helmLoneRibose: `seq
|
|
110
|
-
RNA1{r(A).r(C).r(G).r(U).r(C)}$$$$
|
|
111
|
-
RNA1{r(C).r(A).r(G).r(U).r(G).r(U)p}$$$$
|
|
112
|
-
RNA1{r(U).r(U).r(C).r(A).r(A).r(C)p}$$$$`,
|
|
113
|
-
helmLonePhosphorus: `seq
|
|
114
|
-
RNA1{p.p.r(A)p.r(C)p.r(G)p.r(U)p.r(C)p}$$$$
|
|
115
|
-
RNA1{p.p.r(C)p.r(A)p.p.r(G)p.r(U)p.r(G)p.r(U)p}$$$$
|
|
116
|
-
RNA1{p.r(U)p.r(U)p.r(C)p.r(A)p.r(A)p.r(C)p.p.p}$$$$`,
|
|
93
|
+
[Samples.fastaUn]: `seq
|
|
94
|
+
[meI][hHis][Aca]NT[dE][Thr_PO3H2][Aca]D[meI][hHis][Aca]NT[dE][Thr_PO3H2][Aca]D
|
|
95
|
+
[meI][hHis][Aca][Cys_SEt]T[dK][Thr_PO3H2][Aca][Tyr_PO3H2][meI][hHis][Aca][Cys_SEt]T[dK][Thr_PO3H2][Aca][Tyr_PO3H2]
|
|
96
|
+
[Lys_Boc][hHis][Aca][Cys_SEt]T[dK][Thr_PO3H2][Aca][Tyr_PO3H2][Lys_Boc][hHis][Aca][Cys_SEt]T[dK][Thr_PO3H2][Aca]`,
|
|
97
|
+
[Samples.separatorUn]: `seq
|
|
98
|
+
meI-hHis-Aca-N-T-dE-Thr_PO3H2-Aca-D-meI-hHis-Aca-N-T-dE-Thr_PO3H2-Aca-D
|
|
99
|
+
meI-hHis-Aca-Cys_SEt-T-dK-Thr_PO3H2-Aca-Tyr_PO3H2-meI-hHis-Aca-Cys_SEt-T-dK-Thr_PO3H2-Aca-Tyr_PO3H2
|
|
100
|
+
Lys_Boc-hHis-Aca-Cys_SEt-T-dK-Thr_PO3H2-Aca-Tyr_PO3H2-Lys_Boc-hHis-Aca-Cys_SEt-T-dK-Thr_PO3H2-Aca`,
|
|
101
|
+
[Samples.helmUn]: `seq
|
|
102
|
+
PEPTIDE1{meI.hHis.Aca.N.T.dE.Thr_PO3H2.Aca.D.meI.hHis.Aca.N.T.dE.Thr_PO3H2.Aca.D}$$$$
|
|
103
|
+
PEPTIDE1{meI.hHis.Aca.Cys_SEt.T.dK.Thr_PO3H2.Aca.Tyr_PO3H2.meI.hHis.Aca.Cys_SEt.T.dK.Thr_PO3H2.Aca.Tyr_PO3H2}$$$$
|
|
104
|
+
PEPTIDE1{Lys_Boc.hHis.Aca.Cys_SEt.T.dK.Thr_PO3H2.Aca.Tyr_PO3H2.Lys_Boc.hHis.Aca.Cys_SEt.T.dK.Thr_PO3H2.Aca}$$$$`,
|
|
105
|
+
[Samples.helmLoneDeoxyribose]: `seq
|
|
106
|
+
RNA1{d(A).d(C).d(G).d(T).d(C).d(A).d(C).d(G).d(T).d(C)}$$$$
|
|
107
|
+
RNA1{d(C).d(A).d(G).d(T).d(G).d(T)p.d(C).d(A).d(G).d(T).d(G).d(T)p}$$$$
|
|
108
|
+
RNA1{d(T).d(T).d(C).d(A).d(A).d(C)p.d(T).d(T).d(C).d(A).d(A).d(C)p}$$$$`,
|
|
109
|
+
[Samples.helmLoneRibose]: `seq
|
|
110
|
+
RNA1{r(A).r(C).r(G).r(U).r(C).r(A).r(C).r(G).r(U).r(C)}$$$$
|
|
111
|
+
RNA1{r(C).r(A).r(G).r(U).r(G).r(U)p.r(C).r(A).r(G).r(U).r(G).r(U)p}$$$$
|
|
112
|
+
RNA1{r(U).r(U).r(C).r(A).r(A).r(C)p.r(U).r(U).r(C).r(A).r(A).r(C)p}$$$$`,
|
|
113
|
+
[Samples.helmLonePhosphorus]: `seq
|
|
114
|
+
RNA1{p.p.r(A)p.r(C)p.r(G)p.r(U)p.r(C)p.r(A)p.r(C)p.r(G)p.r(U)p.r(C)p}$$$$
|
|
115
|
+
RNA1{p.p.r(C)p.r(A)p.p.r(G)p.r(U)p.r(G)p.r(U)p.r(C)p.r(A)p.p.r(G)p.r(U)p.r(G)p.r(U)p}$$$$
|
|
116
|
+
RNA1{p.r(U)p.r(U)p.r(C)p.r(A)p.r(A)p.r(C)p.r(U)p.r(U)p.r(C)p.r(A)p.r(A)p.r(C)p.p.p}$$$$`,
|
|
117
117
|
};
|
|
118
118
|
|
|
119
119
|
/** Also detects semantic types
|
|
@@ -39,11 +39,11 @@ category('detectorsBenchmark', () => {
|
|
|
39
39
|
});
|
|
40
40
|
|
|
41
41
|
test('separatorDnaShorts50Many1E6', async () => {
|
|
42
|
-
await detectMacromoleculeBenchmark(
|
|
42
|
+
await detectMacromoleculeBenchmark(20, NOTATION.SEPARATOR, ALPHABET.DNA, 50, 1E6, '/');
|
|
43
43
|
});
|
|
44
44
|
|
|
45
45
|
test('separatorDnaLong1e6Few50', async () => {
|
|
46
|
-
await detectMacromoleculeBenchmark(
|
|
46
|
+
await detectMacromoleculeBenchmark(20, NOTATION.SEPARATOR, ALPHABET.DNA, 1E6, 50, '/');
|
|
47
47
|
});
|
|
48
48
|
|
|
49
49
|
async function detectMacromoleculeBenchmark(
|
|
@@ -71,63 +71,63 @@ CCCCN1C(=O)CN=C(c2cc(F)ccc12)C3CCCCC3
|
|
|
71
71
|
C1CCCCC1
|
|
72
72
|
CCCCCC`;
|
|
73
73
|
[csvTests.fastaDna1]: string = `seq
|
|
74
|
-
|
|
75
|
-
|
|
76
|
-
|
|
74
|
+
ACGTCACGTC
|
|
75
|
+
CAGTGTCAGTGT
|
|
76
|
+
TTCAACTTCAAC`;
|
|
77
77
|
[csvTests.fastaRna1]: string = `seq
|
|
78
|
-
|
|
79
|
-
|
|
80
|
-
|
|
78
|
+
ACGUCACGUC
|
|
79
|
+
CAGUGUCAGUGU
|
|
80
|
+
UUCAACUUCAAC`;
|
|
81
81
|
/** Pure amino acids sequence */
|
|
82
82
|
[csvTests.fastaPt1]: string = `seq
|
|
83
83
|
FWPHEY
|
|
84
84
|
YNRQWYV
|
|
85
85
|
MKPSEYV`;
|
|
86
86
|
[csvTests.fastaUn]: string = `seq
|
|
87
|
-
[meI][hHis][Aca]NT[dE][Thr_PO3H2][Aca]
|
|
88
|
-
[meI][hHis][Aca][Cys_SEt]T[dK][Thr_PO3H2][Aca][Tyr_PO3H2]
|
|
89
|
-
[Lys_Boc][hHis][Aca][Cys_SEt]T[dK][Thr_PO3H2][Aca][Tyr_PO3H2]`;
|
|
87
|
+
[meI][hHis][Aca]NT[dE][Thr_PO3H2][Aca]DN
|
|
88
|
+
[meI][hHis][Aca][Cys_SEt]T[dK][Thr_PO3H2][Aca][Tyr_PO3H2][Aca]
|
|
89
|
+
[Lys_Boc][hHis][Aca][Cys_SEt]T[dK][Thr_PO3H2][Aca][Tyr_PO3H2][Aca]`;
|
|
90
90
|
[csvTests.sepDna]: string = `seq
|
|
91
|
-
A*C*G*T*C
|
|
92
|
-
C*A*G*T*G*T
|
|
93
|
-
T*T*C*A*A*C`;
|
|
91
|
+
A*C*G*T*C*A*C*G*T*C
|
|
92
|
+
C*A*G*T*G*T*C*A*G*T*G*T
|
|
93
|
+
T*T*C*A*A*C*T*T*C*A*A*C`;
|
|
94
94
|
[csvTests.sepRna]: string = `seq
|
|
95
|
-
A*C*G*U*C
|
|
96
|
-
C*A*G*U*G*U
|
|
97
|
-
U*U*C*A*A*C`;
|
|
95
|
+
A*C*G*U*C*A*C*G*U*C
|
|
96
|
+
C*A*G*U*G*U*C*A*G*U*G*U
|
|
97
|
+
U*U*C*A*A*C*U*U*C*A*A*C`;
|
|
98
98
|
[csvTests.sepPt]: string = `seq
|
|
99
|
-
F-W-P-H-E-Y
|
|
100
|
-
Y-N-R-Q-W-Y-V
|
|
101
|
-
M-K-P-S-E-Y-V`;
|
|
99
|
+
F-W-P-H-E-Y-F-W-P-H-E-Y
|
|
100
|
+
Y-N-R-Q-W-Y-V-Y-N-R-Q-W-Y-V
|
|
101
|
+
M-K-P-S-E-Y-V-M-K-P-S-E-Y-V`;
|
|
102
102
|
[csvTests.sepUn1]: string = `seq
|
|
103
|
-
abc-dfgg-abc1-cfr3-rty-wert
|
|
104
|
-
rut12-her2-rty-wert-abc-abc1-dfgg
|
|
105
|
-
rut12-rty-her2-abc-cfr3-wert-rut12`;
|
|
103
|
+
abc-dfgg-abc1-cfr3-rty-wert-cfr3-rty-wert
|
|
104
|
+
rut12-her2-rty-wert-abc-abc1-dfgg-abc-abc1-dfgg
|
|
105
|
+
rut12-rty-her2-abc-cfr3-wert-rut12-cfr3-wert-rut12`;
|
|
106
106
|
[csvTests.sepUn2]: string = `seq
|
|
107
|
-
abc/dfgg/abc1/cfr3/rty/wert
|
|
108
|
-
rut12/her2/rty/wert//abc/abc1/dfgg
|
|
109
|
-
rut12/rty/her2/abc/cfr3//wert/rut12`;
|
|
107
|
+
abc/dfgg/abc1/cfr3/rty/wert/abc/dfgg/abc1/cfr3/rty/wert
|
|
108
|
+
rut12/her2/rty/wert//abc/abc1/dfgg/rut12/her2/rty/wert//abc/abc1/dfgg
|
|
109
|
+
rut12/rty/her2/abc/cfr3//wert/rut12/rut12/rty/her2/abc/cfr3//wert/rut12`;
|
|
110
110
|
[csvTests.sepMsaDna1]: string = `seq
|
|
111
|
-
A-C--G-T--C-T
|
|
112
|
-
C-A-C--T--G-T
|
|
113
|
-
A-C-C-G-T-A-C-T`;
|
|
111
|
+
A-C--G-T--C-T-A-C--G-T--C-T
|
|
112
|
+
C-A-C--T--G-T-C-A-C--T--G-T
|
|
113
|
+
A-C-C-G-T-A-C-T-A-C-C-G-T-A-C-T`;
|
|
114
114
|
[csvTests.sepMsaUnWEmpty]: string = `seq
|
|
115
|
-
m1-M-m3-mon4-mon5-N-T-MON8-N9
|
|
116
|
-
m1-mon2-m3-mon4-mon5-Num--MON8-N9
|
|
115
|
+
m1-M-m3-mon4-mon5-N-T-MON8-N9-m1-M-m3-mon4-mon5-N-T-MON8-N9
|
|
116
|
+
m1-mon2-m3-mon4-mon5-Num--MON8-N9-m1-mon2-m3-mon4-mon5-Num--MON8-N9
|
|
117
117
|
|
|
118
|
-
mon1-M-mon3-mon4-mon5---MON8-N9`;
|
|
118
|
+
mon1-M-mon3-mon4-mon5---MON8-N9-mon1-M-mon3-mon4-mon5---MON8-N9`;
|
|
119
119
|
[csvTests.sepComplex]: string = `seq
|
|
120
120
|
Ac(1)-F-K(AEEA-AEEA-R-Ac)-L-mF-V-Y-mNle-D-W-N-mF-C(1)-G-NH2
|
|
121
121
|
Ac(1)-F-K(AEEA-ARRA-W-Ac)-L-mF-V-Y-mNle-D-W-N-mF-C(1)-G-NH2
|
|
122
122
|
Ac(1)-F-K(AEEA-AEEA-Ac)-L-mF-V-Y-mNle-D-W-N-mF-C(1)-G-NH2`;
|
|
123
123
|
[csvTests.fastaMsaDna1]: string = `seq
|
|
124
|
-
AC-GT-CT
|
|
125
|
-
CAC-T-GT
|
|
126
|
-
|
|
124
|
+
AC-GT-CTAC-GT-CT
|
|
125
|
+
CAC-T-GTCAC-T-GT
|
|
126
|
+
ACCGTACTACCGTACT`;
|
|
127
127
|
[csvTests.fastaMsaPt1]: string = `seq
|
|
128
|
-
FWR-WYV-KHP
|
|
129
|
-
YNR-WYV-KHP
|
|
130
|
-
MWRSWY-CKHP`;
|
|
128
|
+
FWR-WYV-KHPFWR-WYV-KHP
|
|
129
|
+
YNR-WYV-KHPYNR-WYV-KHP
|
|
130
|
+
MWRSWY-CKHPMWRSWY-CKHP`;
|
|
131
131
|
}();
|
|
132
132
|
|
|
133
133
|
const enum Samples {
|