@teselagen/sequence-utils 0.3.9 → 0.3.11
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/bioData.d.ts +1 -1
- package/filterSequenceString.d.ts +2 -4
- package/index.js +15 -17
- package/index.mjs +15 -17
- package/index.umd.js +15 -17
- package/package.json +1 -1
- package/src/bioData.js +2 -3
- package/src/filterSequenceString.js +7 -11
- package/src/filterSequenceString.test.js +24 -8
- package/src/insertSequenceDataAtPositionOrRange.test.js +2 -2
- package/src/tidyUpSequenceData.js +2 -3
- package/src/tidyUpSequenceData.test.js +25 -82
package/bioData.d.ts
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
export const protein_letters: "ACDEFGHIKLMNPQRSTVWY";
|
|
2
2
|
export const protein_letters_withUandX: "ACDEFGHIKLMNPQRSTVWYUX";
|
|
3
|
-
export const extended_protein_letters: "ACDEFGHIKLMNPQRSTVWYBXZJUO
|
|
3
|
+
export const extended_protein_letters: "ACDEFGHIKLMNPQRSTVWYBXZJUO";
|
|
4
4
|
export const ambiguous_dna_letters: "GATCRYWSMKHBVDN";
|
|
5
5
|
export const unambiguous_dna_letters: "GATC";
|
|
6
6
|
export const ambiguous_rna_letters: "GAUCRYWSMKHBVDN";
|
|
@@ -1,18 +1,16 @@
|
|
|
1
|
-
export default function filterSequenceString(sequenceString: any, { additionalValidChars, isOligo, name, isProtein, isRna, isMixedRnaAndDna
|
|
1
|
+
export default function filterSequenceString(sequenceString: any, { additionalValidChars, isOligo, name, isProtein, isRna, isMixedRnaAndDna }?: {
|
|
2
2
|
additionalValidChars?: string | undefined;
|
|
3
3
|
isOligo: any;
|
|
4
4
|
name: any;
|
|
5
5
|
isProtein: any;
|
|
6
6
|
isRna: any;
|
|
7
7
|
isMixedRnaAndDna: any;
|
|
8
|
-
includeStopCodon: any;
|
|
9
8
|
}): (string | string[])[];
|
|
10
|
-
export function getAcceptedChars({ isOligo, isProtein, isRna, isMixedRnaAndDna
|
|
9
|
+
export function getAcceptedChars({ isOligo, isProtein, isRna, isMixedRnaAndDna }?: {
|
|
11
10
|
isOligo: any;
|
|
12
11
|
isProtein: any;
|
|
13
12
|
isRna: any;
|
|
14
13
|
isMixedRnaAndDna: any;
|
|
15
|
-
includeStopCodon: any;
|
|
16
14
|
}): string;
|
|
17
15
|
export function getReplaceChars({ isOligo, isProtein, isRna, isMixedRnaAndDna }?: {
|
|
18
16
|
isOligo: any;
|
package/index.js
CHANGED
|
@@ -6000,7 +6000,7 @@ lodash.exports;
|
|
|
6000
6000
|
var lodashExports = lodash.exports;
|
|
6001
6001
|
const protein_letters = "ACDEFGHIKLMNPQRSTVWY";
|
|
6002
6002
|
const protein_letters_withUandX = "ACDEFGHIKLMNPQRSTVWYUX";
|
|
6003
|
-
const extended_protein_letters = "ACDEFGHIKLMNPQRSTVWYBXZJUO
|
|
6003
|
+
const extended_protein_letters = "ACDEFGHIKLMNPQRSTVWYBXZJUO";
|
|
6004
6004
|
const ambiguous_dna_letters = "GATCRYWSMKHBVDN";
|
|
6005
6005
|
const unambiguous_dna_letters = "GATC";
|
|
6006
6006
|
const ambiguous_rna_letters = "GAUCRYWSMKHBVDN";
|
|
@@ -6056,7 +6056,7 @@ const extended_protein_values = {
|
|
|
6056
6056
|
Y: "Y",
|
|
6057
6057
|
Z: "QE",
|
|
6058
6058
|
"*": "\\*\\.",
|
|
6059
|
-
".": "
|
|
6059
|
+
".": "\\.",
|
|
6060
6060
|
"-": "\\-"
|
|
6061
6061
|
};
|
|
6062
6062
|
const bioData = /* @__PURE__ */ Object.freeze(/* @__PURE__ */ Object.defineProperty({
|
|
@@ -12329,15 +12329,13 @@ function filterSequenceString(sequenceString, {
|
|
|
12329
12329
|
name,
|
|
12330
12330
|
isProtein,
|
|
12331
12331
|
isRna,
|
|
12332
|
-
isMixedRnaAndDna
|
|
12333
|
-
includeStopCodon
|
|
12332
|
+
isMixedRnaAndDna
|
|
12334
12333
|
} = {}) {
|
|
12335
12334
|
const acceptedChars = getAcceptedChars({
|
|
12336
12335
|
isOligo,
|
|
12337
12336
|
isProtein,
|
|
12338
12337
|
isRna,
|
|
12339
|
-
isMixedRnaAndDna
|
|
12340
|
-
includeStopCodon
|
|
12338
|
+
isMixedRnaAndDna
|
|
12341
12339
|
});
|
|
12342
12340
|
const replaceChars = getReplaceChars({
|
|
12343
12341
|
isOligo,
|
|
@@ -12387,10 +12385,9 @@ function getAcceptedChars({
|
|
|
12387
12385
|
isOligo,
|
|
12388
12386
|
isProtein,
|
|
12389
12387
|
isRna,
|
|
12390
|
-
isMixedRnaAndDna
|
|
12391
|
-
includeStopCodon
|
|
12388
|
+
isMixedRnaAndDna
|
|
12392
12389
|
} = {}) {
|
|
12393
|
-
return isProtein ? `${
|
|
12390
|
+
return isProtein ? `${extended_protein_letters.toLowerCase()}}` : isOligo ? ambiguous_rna_letters.toLowerCase() + "t" : isRna ? ambiguous_rna_letters.toLowerCase() + "t" : isMixedRnaAndDna ? ambiguous_rna_letters.toLowerCase() + ambiguous_dna_letters.toLowerCase() : (
|
|
12394
12391
|
//just plain old dna
|
|
12395
12392
|
ambiguous_rna_letters.toLowerCase() + ambiguous_dna_letters.toLowerCase()
|
|
12396
12393
|
);
|
|
@@ -12402,9 +12399,12 @@ function getReplaceChars({
|
|
|
12402
12399
|
isRna,
|
|
12403
12400
|
isMixedRnaAndDna
|
|
12404
12401
|
} = {}) {
|
|
12405
|
-
return isProtein ? {} :
|
|
12406
|
-
//
|
|
12407
|
-
{}
|
|
12402
|
+
return isProtein ? {} : (
|
|
12403
|
+
// {".": "*"}
|
|
12404
|
+
isOligo ? {} : isRna ? { t: "u" } : isMixedRnaAndDna ? {} : (
|
|
12405
|
+
//just plain old dna
|
|
12406
|
+
{}
|
|
12407
|
+
)
|
|
12408
12408
|
);
|
|
12409
12409
|
}
|
|
12410
12410
|
__name(getReplaceChars, "getReplaceChars");
|
|
@@ -12544,7 +12544,7 @@ function tidyUpSequenceData(pSeqData, options = {}) {
|
|
|
12544
12544
|
const {
|
|
12545
12545
|
annotationsAsObjects,
|
|
12546
12546
|
logMessages,
|
|
12547
|
-
|
|
12547
|
+
doNotRemoveInvalidChars,
|
|
12548
12548
|
additionalValidChars,
|
|
12549
12549
|
noTranslationData,
|
|
12550
12550
|
doNotProvideIdsForAnnotations,
|
|
@@ -12578,11 +12578,9 @@ function tidyUpSequenceData(pSeqData, options = {}) {
|
|
|
12578
12578
|
if (seqData.isRna) {
|
|
12579
12579
|
seqData.sequence = seqData.sequence.replace(/t/gi, "u");
|
|
12580
12580
|
}
|
|
12581
|
-
if (
|
|
12581
|
+
if (!doNotRemoveInvalidChars) {
|
|
12582
12582
|
if (seqData.isProtein) {
|
|
12583
|
-
const [newSeq] = filterSequenceString(seqData.proteinSequence, __spreadValues({
|
|
12584
|
-
includeStopCodon: true
|
|
12585
|
-
}, topLevelSeqData || seqData));
|
|
12583
|
+
const [newSeq] = filterSequenceString(seqData.proteinSequence, __spreadValues({}, topLevelSeqData || seqData));
|
|
12586
12584
|
seqData.proteinSequence = newSeq;
|
|
12587
12585
|
} else {
|
|
12588
12586
|
const [newSeq] = filterSequenceString(seqData.sequence, __spreadValues({
|
package/index.mjs
CHANGED
|
@@ -5998,7 +5998,7 @@ lodash.exports;
|
|
|
5998
5998
|
var lodashExports = lodash.exports;
|
|
5999
5999
|
const protein_letters = "ACDEFGHIKLMNPQRSTVWY";
|
|
6000
6000
|
const protein_letters_withUandX = "ACDEFGHIKLMNPQRSTVWYUX";
|
|
6001
|
-
const extended_protein_letters = "ACDEFGHIKLMNPQRSTVWYBXZJUO
|
|
6001
|
+
const extended_protein_letters = "ACDEFGHIKLMNPQRSTVWYBXZJUO";
|
|
6002
6002
|
const ambiguous_dna_letters = "GATCRYWSMKHBVDN";
|
|
6003
6003
|
const unambiguous_dna_letters = "GATC";
|
|
6004
6004
|
const ambiguous_rna_letters = "GAUCRYWSMKHBVDN";
|
|
@@ -6054,7 +6054,7 @@ const extended_protein_values = {
|
|
|
6054
6054
|
Y: "Y",
|
|
6055
6055
|
Z: "QE",
|
|
6056
6056
|
"*": "\\*\\.",
|
|
6057
|
-
".": "
|
|
6057
|
+
".": "\\.",
|
|
6058
6058
|
"-": "\\-"
|
|
6059
6059
|
};
|
|
6060
6060
|
const bioData = /* @__PURE__ */ Object.freeze(/* @__PURE__ */ Object.defineProperty({
|
|
@@ -12327,15 +12327,13 @@ function filterSequenceString(sequenceString, {
|
|
|
12327
12327
|
name,
|
|
12328
12328
|
isProtein,
|
|
12329
12329
|
isRna,
|
|
12330
|
-
isMixedRnaAndDna
|
|
12331
|
-
includeStopCodon
|
|
12330
|
+
isMixedRnaAndDna
|
|
12332
12331
|
} = {}) {
|
|
12333
12332
|
const acceptedChars = getAcceptedChars({
|
|
12334
12333
|
isOligo,
|
|
12335
12334
|
isProtein,
|
|
12336
12335
|
isRna,
|
|
12337
|
-
isMixedRnaAndDna
|
|
12338
|
-
includeStopCodon
|
|
12336
|
+
isMixedRnaAndDna
|
|
12339
12337
|
});
|
|
12340
12338
|
const replaceChars = getReplaceChars({
|
|
12341
12339
|
isOligo,
|
|
@@ -12385,10 +12383,9 @@ function getAcceptedChars({
|
|
|
12385
12383
|
isOligo,
|
|
12386
12384
|
isProtein,
|
|
12387
12385
|
isRna,
|
|
12388
|
-
isMixedRnaAndDna
|
|
12389
|
-
includeStopCodon
|
|
12386
|
+
isMixedRnaAndDna
|
|
12390
12387
|
} = {}) {
|
|
12391
|
-
return isProtein ? `${
|
|
12388
|
+
return isProtein ? `${extended_protein_letters.toLowerCase()}}` : isOligo ? ambiguous_rna_letters.toLowerCase() + "t" : isRna ? ambiguous_rna_letters.toLowerCase() + "t" : isMixedRnaAndDna ? ambiguous_rna_letters.toLowerCase() + ambiguous_dna_letters.toLowerCase() : (
|
|
12392
12389
|
//just plain old dna
|
|
12393
12390
|
ambiguous_rna_letters.toLowerCase() + ambiguous_dna_letters.toLowerCase()
|
|
12394
12391
|
);
|
|
@@ -12400,9 +12397,12 @@ function getReplaceChars({
|
|
|
12400
12397
|
isRna,
|
|
12401
12398
|
isMixedRnaAndDna
|
|
12402
12399
|
} = {}) {
|
|
12403
|
-
return isProtein ? {} :
|
|
12404
|
-
//
|
|
12405
|
-
{}
|
|
12400
|
+
return isProtein ? {} : (
|
|
12401
|
+
// {".": "*"}
|
|
12402
|
+
isOligo ? {} : isRna ? { t: "u" } : isMixedRnaAndDna ? {} : (
|
|
12403
|
+
//just plain old dna
|
|
12404
|
+
{}
|
|
12405
|
+
)
|
|
12406
12406
|
);
|
|
12407
12407
|
}
|
|
12408
12408
|
__name(getReplaceChars, "getReplaceChars");
|
|
@@ -12542,7 +12542,7 @@ function tidyUpSequenceData(pSeqData, options = {}) {
|
|
|
12542
12542
|
const {
|
|
12543
12543
|
annotationsAsObjects,
|
|
12544
12544
|
logMessages,
|
|
12545
|
-
|
|
12545
|
+
doNotRemoveInvalidChars,
|
|
12546
12546
|
additionalValidChars,
|
|
12547
12547
|
noTranslationData,
|
|
12548
12548
|
doNotProvideIdsForAnnotations,
|
|
@@ -12576,11 +12576,9 @@ function tidyUpSequenceData(pSeqData, options = {}) {
|
|
|
12576
12576
|
if (seqData.isRna) {
|
|
12577
12577
|
seqData.sequence = seqData.sequence.replace(/t/gi, "u");
|
|
12578
12578
|
}
|
|
12579
|
-
if (
|
|
12579
|
+
if (!doNotRemoveInvalidChars) {
|
|
12580
12580
|
if (seqData.isProtein) {
|
|
12581
|
-
const [newSeq] = filterSequenceString(seqData.proteinSequence, __spreadValues({
|
|
12582
|
-
includeStopCodon: true
|
|
12583
|
-
}, topLevelSeqData || seqData));
|
|
12581
|
+
const [newSeq] = filterSequenceString(seqData.proteinSequence, __spreadValues({}, topLevelSeqData || seqData));
|
|
12584
12582
|
seqData.proteinSequence = newSeq;
|
|
12585
12583
|
} else {
|
|
12586
12584
|
const [newSeq] = filterSequenceString(seqData.sequence, __spreadValues({
|
package/index.umd.js
CHANGED
|
@@ -6002,7 +6002,7 @@ var __name = (target, value) => __defProp(target, "name", { value, configurable:
|
|
|
6002
6002
|
var lodashExports = lodash.exports;
|
|
6003
6003
|
const protein_letters = "ACDEFGHIKLMNPQRSTVWY";
|
|
6004
6004
|
const protein_letters_withUandX = "ACDEFGHIKLMNPQRSTVWYUX";
|
|
6005
|
-
const extended_protein_letters = "ACDEFGHIKLMNPQRSTVWYBXZJUO
|
|
6005
|
+
const extended_protein_letters = "ACDEFGHIKLMNPQRSTVWYBXZJUO";
|
|
6006
6006
|
const ambiguous_dna_letters = "GATCRYWSMKHBVDN";
|
|
6007
6007
|
const unambiguous_dna_letters = "GATC";
|
|
6008
6008
|
const ambiguous_rna_letters = "GAUCRYWSMKHBVDN";
|
|
@@ -6058,7 +6058,7 @@ var __name = (target, value) => __defProp(target, "name", { value, configurable:
|
|
|
6058
6058
|
Y: "Y",
|
|
6059
6059
|
Z: "QE",
|
|
6060
6060
|
"*": "\\*\\.",
|
|
6061
|
-
".": "
|
|
6061
|
+
".": "\\.",
|
|
6062
6062
|
"-": "\\-"
|
|
6063
6063
|
};
|
|
6064
6064
|
const bioData = /* @__PURE__ */ Object.freeze(/* @__PURE__ */ Object.defineProperty({
|
|
@@ -12331,15 +12331,13 @@ var __name = (target, value) => __defProp(target, "name", { value, configurable:
|
|
|
12331
12331
|
name,
|
|
12332
12332
|
isProtein,
|
|
12333
12333
|
isRna,
|
|
12334
|
-
isMixedRnaAndDna
|
|
12335
|
-
includeStopCodon
|
|
12334
|
+
isMixedRnaAndDna
|
|
12336
12335
|
} = {}) {
|
|
12337
12336
|
const acceptedChars = getAcceptedChars({
|
|
12338
12337
|
isOligo,
|
|
12339
12338
|
isProtein,
|
|
12340
12339
|
isRna,
|
|
12341
|
-
isMixedRnaAndDna
|
|
12342
|
-
includeStopCodon
|
|
12340
|
+
isMixedRnaAndDna
|
|
12343
12341
|
});
|
|
12344
12342
|
const replaceChars = getReplaceChars({
|
|
12345
12343
|
isOligo,
|
|
@@ -12389,10 +12387,9 @@ var __name = (target, value) => __defProp(target, "name", { value, configurable:
|
|
|
12389
12387
|
isOligo,
|
|
12390
12388
|
isProtein,
|
|
12391
12389
|
isRna,
|
|
12392
|
-
isMixedRnaAndDna
|
|
12393
|
-
includeStopCodon
|
|
12390
|
+
isMixedRnaAndDna
|
|
12394
12391
|
} = {}) {
|
|
12395
|
-
return isProtein ? `${
|
|
12392
|
+
return isProtein ? `${extended_protein_letters.toLowerCase()}}` : isOligo ? ambiguous_rna_letters.toLowerCase() + "t" : isRna ? ambiguous_rna_letters.toLowerCase() + "t" : isMixedRnaAndDna ? ambiguous_rna_letters.toLowerCase() + ambiguous_dna_letters.toLowerCase() : (
|
|
12396
12393
|
//just plain old dna
|
|
12397
12394
|
ambiguous_rna_letters.toLowerCase() + ambiguous_dna_letters.toLowerCase()
|
|
12398
12395
|
);
|
|
@@ -12404,9 +12401,12 @@ var __name = (target, value) => __defProp(target, "name", { value, configurable:
|
|
|
12404
12401
|
isRna,
|
|
12405
12402
|
isMixedRnaAndDna
|
|
12406
12403
|
} = {}) {
|
|
12407
|
-
return isProtein ? {} :
|
|
12408
|
-
//
|
|
12409
|
-
{}
|
|
12404
|
+
return isProtein ? {} : (
|
|
12405
|
+
// {".": "*"}
|
|
12406
|
+
isOligo ? {} : isRna ? { t: "u" } : isMixedRnaAndDna ? {} : (
|
|
12407
|
+
//just plain old dna
|
|
12408
|
+
{}
|
|
12409
|
+
)
|
|
12410
12410
|
);
|
|
12411
12411
|
}
|
|
12412
12412
|
__name(getReplaceChars, "getReplaceChars");
|
|
@@ -12546,7 +12546,7 @@ var __name = (target, value) => __defProp(target, "name", { value, configurable:
|
|
|
12546
12546
|
const {
|
|
12547
12547
|
annotationsAsObjects,
|
|
12548
12548
|
logMessages,
|
|
12549
|
-
|
|
12549
|
+
doNotRemoveInvalidChars,
|
|
12550
12550
|
additionalValidChars,
|
|
12551
12551
|
noTranslationData,
|
|
12552
12552
|
doNotProvideIdsForAnnotations,
|
|
@@ -12580,11 +12580,9 @@ var __name = (target, value) => __defProp(target, "name", { value, configurable:
|
|
|
12580
12580
|
if (seqData.isRna) {
|
|
12581
12581
|
seqData.sequence = seqData.sequence.replace(/t/gi, "u");
|
|
12582
12582
|
}
|
|
12583
|
-
if (
|
|
12583
|
+
if (!doNotRemoveInvalidChars) {
|
|
12584
12584
|
if (seqData.isProtein) {
|
|
12585
|
-
const [newSeq] = filterSequenceString(seqData.proteinSequence, __spreadValues({
|
|
12586
|
-
includeStopCodon: true
|
|
12587
|
-
}, topLevelSeqData || seqData));
|
|
12585
|
+
const [newSeq] = filterSequenceString(seqData.proteinSequence, __spreadValues({}, topLevelSeqData || seqData));
|
|
12588
12586
|
seqData.proteinSequence = newSeq;
|
|
12589
12587
|
} else {
|
|
12590
12588
|
const [newSeq] = filterSequenceString(seqData.sequence, __spreadValues({
|
package/package.json
CHANGED
package/src/bioData.js
CHANGED
|
@@ -2,8 +2,7 @@
|
|
|
2
2
|
|
|
3
3
|
export const protein_letters = "ACDEFGHIKLMNPQRSTVWY";
|
|
4
4
|
export const protein_letters_withUandX = "ACDEFGHIKLMNPQRSTVWYUX";
|
|
5
|
-
|
|
6
|
-
export const extended_protein_letters = "ACDEFGHIKLMNPQRSTVWYBXZJUO.*-";
|
|
5
|
+
export const extended_protein_letters = "ACDEFGHIKLMNPQRSTVWYBXZJUO";
|
|
7
6
|
export const ambiguous_dna_letters = "GATCRYWSMKHBVDN";
|
|
8
7
|
export const unambiguous_dna_letters = "GATC";
|
|
9
8
|
export const ambiguous_rna_letters = "GAUCRYWSMKHBVDN";
|
|
@@ -61,6 +60,6 @@ export const extended_protein_values = {
|
|
|
61
60
|
Y: "Y",
|
|
62
61
|
Z: "QE",
|
|
63
62
|
"*": "\\*\\.",
|
|
64
|
-
".": "
|
|
63
|
+
".": "\\.",
|
|
65
64
|
"-": "\\-"
|
|
66
65
|
};
|
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
import {
|
|
2
2
|
ambiguous_dna_letters,
|
|
3
3
|
ambiguous_rna_letters,
|
|
4
|
-
|
|
4
|
+
extended_protein_letters
|
|
5
5
|
} from "./bioData";
|
|
6
6
|
|
|
7
7
|
export default function filterSequenceString(
|
|
@@ -12,16 +12,14 @@ export default function filterSequenceString(
|
|
|
12
12
|
name,
|
|
13
13
|
isProtein,
|
|
14
14
|
isRna,
|
|
15
|
-
isMixedRnaAndDna
|
|
16
|
-
includeStopCodon
|
|
15
|
+
isMixedRnaAndDna
|
|
17
16
|
} = {}
|
|
18
17
|
) {
|
|
19
18
|
const acceptedChars = getAcceptedChars({
|
|
20
19
|
isOligo,
|
|
21
20
|
isProtein,
|
|
22
21
|
isRna,
|
|
23
|
-
isMixedRnaAndDna
|
|
24
|
-
includeStopCodon
|
|
22
|
+
isMixedRnaAndDna
|
|
25
23
|
});
|
|
26
24
|
const replaceChars = getReplaceChars({
|
|
27
25
|
isOligo,
|
|
@@ -82,13 +80,10 @@ export function getAcceptedChars({
|
|
|
82
80
|
isOligo,
|
|
83
81
|
isProtein,
|
|
84
82
|
isRna,
|
|
85
|
-
isMixedRnaAndDna
|
|
86
|
-
includeStopCodon
|
|
83
|
+
isMixedRnaAndDna
|
|
87
84
|
} = {}) {
|
|
88
85
|
return isProtein
|
|
89
|
-
? `${
|
|
90
|
-
includeStopCodon ? "*." : ""
|
|
91
|
-
}}`
|
|
86
|
+
? `${extended_protein_letters.toLowerCase()}}`
|
|
92
87
|
: isOligo
|
|
93
88
|
? ambiguous_rna_letters.toLowerCase() + "t"
|
|
94
89
|
: isRna
|
|
@@ -106,7 +101,8 @@ export function getReplaceChars({
|
|
|
106
101
|
} = {}) {
|
|
107
102
|
return isProtein
|
|
108
103
|
? {}
|
|
109
|
-
:
|
|
104
|
+
: // {".": "*"}
|
|
105
|
+
isOligo
|
|
110
106
|
? {}
|
|
111
107
|
: isRna
|
|
112
108
|
? { t: "u" }
|
|
@@ -49,10 +49,11 @@ describe("filterSequenceString", () => {
|
|
|
49
49
|
isProtein: true
|
|
50
50
|
}
|
|
51
51
|
);
|
|
52
|
+
// expect(warnings[0]).toBe(`Replaced "." with "*" 2 times`);
|
|
52
53
|
expect(warnings[0]).toBe(
|
|
53
|
-
'Invalid character(s) detected and removed:
|
|
54
|
+
'Invalid character(s) detected and removed: 3, 4, 2, ", ", ", ,, ,, ., ., / '
|
|
54
55
|
);
|
|
55
|
-
expect(str).toBe("
|
|
56
|
+
expect(str).toBe("bbbxtgalmfwkqespvicyhrnd");
|
|
56
57
|
});
|
|
57
58
|
it("when isProtein: true, should handle upper case letters", () => {
|
|
58
59
|
const [str, warnings] = filterSequenceString("xtgalmfWKQEspvicyhrnd", {
|
|
@@ -61,12 +62,27 @@ describe("filterSequenceString", () => {
|
|
|
61
62
|
expect(warnings.length).toBe(0);
|
|
62
63
|
expect(str).toBe("xtgalmfWKQEspvicyhrnd");
|
|
63
64
|
});
|
|
64
|
-
it("when isProtein: true, should handle the option to includeStopCodon by allowing periods", () => {
|
|
65
|
-
const [str] = filterSequenceString('bbb342"""xtgalmfwkqespvicyhrnd,,../', {
|
|
66
|
-
isProtein: true,
|
|
67
|
-
includeStopCodon: true
|
|
68
|
-
});
|
|
69
65
|
|
|
70
|
-
|
|
66
|
+
it("when isProtein: true it should not filter this aa seq", () => {
|
|
67
|
+
const [str] = filterSequenceString(
|
|
68
|
+
"mhhhhhhgsgsmledlkrqvleanlalpkhnlasgssghvsavdrergvfviapsgvdfrimtaddmvvvsietgevvegekppaedtpthrllyqafpsiggivhthsrhatiwaqagqsipatgtthadhfygtipctrkmtdaeingeyewetgnvivetfekqgidaaqmpgvlvhshgpfawgknaedavhnaivleevaymgifcrqlapqlpdmqqtllnkhylrkhgakayygq",
|
|
69
|
+
{
|
|
70
|
+
isProtein: true
|
|
71
|
+
}
|
|
72
|
+
);
|
|
73
|
+
|
|
74
|
+
expect(str).toBe(
|
|
75
|
+
`mhhhhhhgsgsmledlkrqvleanlalpkhnlasgssghvsavdrergvfviapsgvdfrimtaddmvvvsietgevvegekppaedtpthrllyqafpsiggivhthsrhatiwaqagqsipatgtthadhfygtipctrkmtdaeingeyewetgnvivetfekqgidaaqmpgvlvhshgpfawgknaedavhnaivleevaymgifcrqlapqlpdmqqtllnkhylrkhgakayygq`
|
|
76
|
+
);
|
|
77
|
+
});
|
|
78
|
+
it("when isProtein: true, it should convert . to *", () => {
|
|
79
|
+
const [str] = filterSequenceString(
|
|
80
|
+
'BXZJUO*bbb342"""xtgalbmfwkqespvicyhrnd,,../',
|
|
81
|
+
{
|
|
82
|
+
isProtein: true
|
|
83
|
+
}
|
|
84
|
+
);
|
|
85
|
+
|
|
86
|
+
expect(str).toBe("BXZJUObbbxtgalbmfwkqespvicyhrnd");
|
|
71
87
|
});
|
|
72
88
|
});
|
|
@@ -135,7 +135,7 @@ describe("insertSequenceData", () => {
|
|
|
135
135
|
});
|
|
136
136
|
it("inserts characters at correct origin spanning range with {maintainOriginSplit: true} option", () => {
|
|
137
137
|
const sequenceToInsert = {
|
|
138
|
-
sequence: "
|
|
138
|
+
sequence: "crrrrry",
|
|
139
139
|
// fffffff
|
|
140
140
|
features: [{ name: "feat1", start: 0, end: 6 }]
|
|
141
141
|
};
|
|
@@ -154,7 +154,7 @@ describe("insertSequenceData", () => {
|
|
|
154
154
|
maintainOriginSplit: true
|
|
155
155
|
}
|
|
156
156
|
);
|
|
157
|
-
postInsertSeq.sequence.should.equal("
|
|
157
|
+
postInsertSeq.sequence.should.equal("rrrryagagacr");
|
|
158
158
|
// fffff fff ff
|
|
159
159
|
postInsertSeq.features.should.containSubset([
|
|
160
160
|
{ name: "feat1", start: 10, end: 4 },
|
|
@@ -13,7 +13,7 @@ export default function tidyUpSequenceData(pSeqData, options = {}) {
|
|
|
13
13
|
const {
|
|
14
14
|
annotationsAsObjects,
|
|
15
15
|
logMessages,
|
|
16
|
-
|
|
16
|
+
doNotRemoveInvalidChars,
|
|
17
17
|
additionalValidChars,
|
|
18
18
|
noTranslationData,
|
|
19
19
|
doNotProvideIdsForAnnotations,
|
|
@@ -52,10 +52,9 @@ export default function tidyUpSequenceData(pSeqData, options = {}) {
|
|
|
52
52
|
//flip all t's to u's
|
|
53
53
|
seqData.sequence = seqData.sequence.replace(/t/gi, "u");
|
|
54
54
|
}
|
|
55
|
-
if (
|
|
55
|
+
if (!doNotRemoveInvalidChars) {
|
|
56
56
|
if (seqData.isProtein) {
|
|
57
57
|
const [newSeq] = filterSequenceString(seqData.proteinSequence, {
|
|
58
|
-
includeStopCodon: true,
|
|
59
58
|
...(topLevelSeqData || seqData)
|
|
60
59
|
});
|
|
61
60
|
seqData.proteinSequence = newSeq;
|
|
@@ -5,29 +5,26 @@ import chaiSubset from "chai-subset";
|
|
|
5
5
|
chai.use(chaiSubset);
|
|
6
6
|
chai.should();
|
|
7
7
|
describe("tidyUpSequenceData", () => {
|
|
8
|
-
it("should remove
|
|
9
|
-
const res = tidyUpSequenceData(
|
|
10
|
-
|
|
11
|
-
|
|
12
|
-
|
|
13
|
-
|
|
14
|
-
|
|
15
|
-
|
|
16
|
-
|
|
17
|
-
|
|
18
|
-
|
|
19
|
-
|
|
20
|
-
|
|
21
|
-
|
|
22
|
-
|
|
23
|
-
|
|
24
|
-
|
|
25
|
-
|
|
26
|
-
|
|
27
|
-
|
|
28
|
-
},
|
|
29
|
-
{ removeUnwantedChars: true }
|
|
30
|
-
);
|
|
8
|
+
it("should remove invalid chars by default, while handling annotation start,end (and location start,end) truncation correctly", () => {
|
|
9
|
+
const res = tidyUpSequenceData({
|
|
10
|
+
sequence: "http://localhost:3344/Standalone",
|
|
11
|
+
features: [
|
|
12
|
+
{
|
|
13
|
+
start: 3,
|
|
14
|
+
end: 20,
|
|
15
|
+
locations: [
|
|
16
|
+
{
|
|
17
|
+
start: "3", //this should be converted to an int :)
|
|
18
|
+
end: 5
|
|
19
|
+
},
|
|
20
|
+
{
|
|
21
|
+
start: 10,
|
|
22
|
+
end: 20
|
|
23
|
+
}
|
|
24
|
+
]
|
|
25
|
+
}
|
|
26
|
+
]
|
|
27
|
+
});
|
|
31
28
|
res.should.containSubset({
|
|
32
29
|
sequence: "httcahstStandan",
|
|
33
30
|
circular: false,
|
|
@@ -49,15 +46,6 @@ describe("tidyUpSequenceData", () => {
|
|
|
49
46
|
]
|
|
50
47
|
});
|
|
51
48
|
});
|
|
52
|
-
// const res = tidyUpSequenceData(
|
|
53
|
-
// {
|
|
54
|
-
// isProtein: true,
|
|
55
|
-
// circular: true,
|
|
56
|
-
// proteinSequence: "gagiuhwgagalasjglj*.",
|
|
57
|
-
// features: [{ start: 3, end: 10 }, { start: 10, end: 20 }]
|
|
58
|
-
// },
|
|
59
|
-
// { convertAnnotationsFromAAIndices: true, removeUnwantedChars: true }
|
|
60
|
-
// );
|
|
61
49
|
|
|
62
50
|
it("should handle a protein sequence being passed in with isProtein set to true", () => {
|
|
63
51
|
const res = tidyUpSequenceData(
|
|
@@ -71,61 +59,16 @@ describe("tidyUpSequenceData", () => {
|
|
|
71
59
|
{ name: "iDon'tFit", start: 25, end: 35 }
|
|
72
60
|
]
|
|
73
61
|
},
|
|
74
|
-
{ convertAnnotationsFromAAIndices: true
|
|
62
|
+
{ convertAnnotationsFromAAIndices: true }
|
|
75
63
|
);
|
|
64
|
+
|
|
76
65
|
res.should.containSubset({
|
|
77
|
-
aminoAcidDataForEachBaseOfDNA: [
|
|
78
|
-
{
|
|
79
|
-
aminoAcid: {
|
|
80
|
-
value: ".",
|
|
81
|
-
name: "Gap",
|
|
82
|
-
threeLettersName: "Gap"
|
|
83
|
-
},
|
|
84
|
-
positionInCodon: 0,
|
|
85
|
-
aminoAcidIndex: 17,
|
|
86
|
-
sequenceIndex: 51,
|
|
87
|
-
codonRange: {
|
|
88
|
-
start: 51,
|
|
89
|
-
end: 53
|
|
90
|
-
},
|
|
91
|
-
fullCodon: true
|
|
92
|
-
},
|
|
93
|
-
{
|
|
94
|
-
aminoAcid: {
|
|
95
|
-
value: ".",
|
|
96
|
-
name: "Gap",
|
|
97
|
-
threeLettersName: "Gap"
|
|
98
|
-
},
|
|
99
|
-
positionInCodon: 1,
|
|
100
|
-
aminoAcidIndex: 17,
|
|
101
|
-
sequenceIndex: 52,
|
|
102
|
-
codonRange: {
|
|
103
|
-
start: 51,
|
|
104
|
-
end: 53
|
|
105
|
-
},
|
|
106
|
-
fullCodon: true
|
|
107
|
-
},
|
|
108
|
-
{
|
|
109
|
-
aminoAcid: {
|
|
110
|
-
value: ".",
|
|
111
|
-
name: "Gap",
|
|
112
|
-
threeLettersName: "Gap"
|
|
113
|
-
},
|
|
114
|
-
positionInCodon: 2,
|
|
115
|
-
aminoAcidIndex: 17,
|
|
116
|
-
sequenceIndex: 53,
|
|
117
|
-
codonRange: {
|
|
118
|
-
start: 51,
|
|
119
|
-
end: 53
|
|
120
|
-
},
|
|
121
|
-
fullCodon: true
|
|
122
|
-
}
|
|
123
|
-
],
|
|
66
|
+
aminoAcidDataForEachBaseOfDNA: [],
|
|
124
67
|
isProtein: true,
|
|
125
68
|
size: 54, //size should refer to the DNA length
|
|
126
69
|
proteinSize: 18, //proteinSize should refer to the amino acid length
|
|
127
|
-
sequence: "
|
|
128
|
-
proteinSequence: "
|
|
70
|
+
sequence: "ggngcnggnathtgacaytggggngcnggngcnytngcnwsnhtnggnytnhtn", //degenerate sequence
|
|
71
|
+
proteinSequence: "gagiuhwgagalasjglj",
|
|
129
72
|
circular: false,
|
|
130
73
|
features: [
|
|
131
74
|
{ start: 9, end: 32, forward: true },
|