@teselagen/sequence-utils 0.3.8 → 0.3.9
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/bioData.d.ts +1 -0
- package/filterSequenceString.d.ts +26 -1
- package/index.d.ts +0 -1
- package/index.js +93 -31
- package/index.mjs +93 -31
- package/index.umd.js +93 -31
- package/package.json +1 -1
- package/src/bioData.js +1 -1
- package/src/filterSequenceString.js +110 -17
- package/src/filterSequenceString.test.js +62 -3
- package/src/index.js +0 -1
- package/src/proteinAlphabet.js +2 -5
- package/src/tidyUpSequenceData.js +12 -15
- package/filterAminoAcidSequenceString.d.ts +0 -1
- package/filterAminoAcidSequenceString.test.d.ts +0 -1
- package/src/filterAminoAcidSequenceString.js +0 -10
- package/src/filterAminoAcidSequenceString.test.js +0 -24
package/bioData.d.ts
CHANGED
|
@@ -1,4 +1,5 @@
|
|
|
1
1
|
export const protein_letters: "ACDEFGHIKLMNPQRSTVWY";
|
|
2
|
+
export const protein_letters_withUandX: "ACDEFGHIKLMNPQRSTVWYUX";
|
|
2
3
|
export const extended_protein_letters: "ACDEFGHIKLMNPQRSTVWYBXZJUO.*-";
|
|
3
4
|
export const ambiguous_dna_letters: "GATCRYWSMKHBVDN";
|
|
4
5
|
export const unambiguous_dna_letters: "GATC";
|
|
@@ -1 +1,26 @@
|
|
|
1
|
-
export default function filterSequenceString(sequenceString: any, additionalValidChars
|
|
1
|
+
export default function filterSequenceString(sequenceString: any, { additionalValidChars, isOligo, name, isProtein, isRna, isMixedRnaAndDna, includeStopCodon }?: {
|
|
2
|
+
additionalValidChars?: string | undefined;
|
|
3
|
+
isOligo: any;
|
|
4
|
+
name: any;
|
|
5
|
+
isProtein: any;
|
|
6
|
+
isRna: any;
|
|
7
|
+
isMixedRnaAndDna: any;
|
|
8
|
+
includeStopCodon: any;
|
|
9
|
+
}): (string | string[])[];
|
|
10
|
+
export function getAcceptedChars({ isOligo, isProtein, isRna, isMixedRnaAndDna, includeStopCodon }?: {
|
|
11
|
+
isOligo: any;
|
|
12
|
+
isProtein: any;
|
|
13
|
+
isRna: any;
|
|
14
|
+
isMixedRnaAndDna: any;
|
|
15
|
+
includeStopCodon: any;
|
|
16
|
+
}): string;
|
|
17
|
+
export function getReplaceChars({ isOligo, isProtein, isRna, isMixedRnaAndDna }?: {
|
|
18
|
+
isOligo: any;
|
|
19
|
+
isProtein: any;
|
|
20
|
+
isRna: any;
|
|
21
|
+
isMixedRnaAndDna: any;
|
|
22
|
+
}): {
|
|
23
|
+
t?: undefined;
|
|
24
|
+
} | {
|
|
25
|
+
t: string;
|
|
26
|
+
};
|
package/index.d.ts
CHANGED
|
@@ -20,7 +20,6 @@ export { default as aliasedEnzymesByName } from "./aliasedEnzymesByName";
|
|
|
20
20
|
export { default as defaultEnzymesByName } from "./defaultEnzymesByName";
|
|
21
21
|
export { default as generateSequenceData } from "./generateSequenceData";
|
|
22
22
|
export { default as generateAnnotations } from "./generateAnnotations";
|
|
23
|
-
export { default as filterAminoAcidSequenceString } from "./filterAminoAcidSequenceString";
|
|
24
23
|
export { default as filterSequenceString } from "./filterSequenceString";
|
|
25
24
|
export { default as findNearestRangeOfSequenceOverlapToPosition } from "./findNearestRangeOfSequenceOverlapToPosition";
|
|
26
25
|
export { default as findOrfsInPlasmid } from "./findOrfsInPlasmid";
|
package/index.js
CHANGED
|
@@ -5999,6 +5999,7 @@ lodash.exports;
|
|
|
5999
5999
|
})(lodash, lodash.exports);
|
|
6000
6000
|
var lodashExports = lodash.exports;
|
|
6001
6001
|
const protein_letters = "ACDEFGHIKLMNPQRSTVWY";
|
|
6002
|
+
const protein_letters_withUandX = "ACDEFGHIKLMNPQRSTVWYUX";
|
|
6002
6003
|
const extended_protein_letters = "ACDEFGHIKLMNPQRSTVWYBXZJUO.*-";
|
|
6003
6004
|
const ambiguous_dna_letters = "GATCRYWSMKHBVDN";
|
|
6004
6005
|
const unambiguous_dna_letters = "GATC";
|
|
@@ -6067,6 +6068,7 @@ const bioData = /* @__PURE__ */ Object.freeze(/* @__PURE__ */ Object.definePrope
|
|
|
6067
6068
|
extended_protein_letters,
|
|
6068
6069
|
extended_protein_values,
|
|
6069
6070
|
protein_letters,
|
|
6071
|
+
protein_letters_withUandX,
|
|
6070
6072
|
unambiguous_dna_letters,
|
|
6071
6073
|
unambiguous_rna_letters
|
|
6072
6074
|
}, Symbol.toStringTag, { value: "Module" }));
|
|
@@ -12321,20 +12323,91 @@ const modifiableTypes = [
|
|
|
12321
12323
|
"primers",
|
|
12322
12324
|
"guides"
|
|
12323
12325
|
];
|
|
12324
|
-
function filterSequenceString(sequenceString,
|
|
12325
|
-
|
|
12326
|
-
|
|
12327
|
-
|
|
12328
|
-
|
|
12329
|
-
|
|
12330
|
-
|
|
12331
|
-
|
|
12326
|
+
function filterSequenceString(sequenceString, {
|
|
12327
|
+
additionalValidChars = "",
|
|
12328
|
+
isOligo,
|
|
12329
|
+
name,
|
|
12330
|
+
isProtein,
|
|
12331
|
+
isRna,
|
|
12332
|
+
isMixedRnaAndDna,
|
|
12333
|
+
includeStopCodon
|
|
12334
|
+
} = {}) {
|
|
12335
|
+
const acceptedChars = getAcceptedChars({
|
|
12336
|
+
isOligo,
|
|
12337
|
+
isProtein,
|
|
12338
|
+
isRna,
|
|
12339
|
+
isMixedRnaAndDna,
|
|
12340
|
+
includeStopCodon
|
|
12341
|
+
});
|
|
12342
|
+
const replaceChars = getReplaceChars({
|
|
12343
|
+
isOligo,
|
|
12344
|
+
isProtein,
|
|
12345
|
+
isRna,
|
|
12346
|
+
isMixedRnaAndDna
|
|
12347
|
+
});
|
|
12348
|
+
let sanitizedVal = "";
|
|
12349
|
+
const invalidChars = [];
|
|
12350
|
+
const chars = `${acceptedChars}${additionalValidChars.split("").join("\\")}`;
|
|
12351
|
+
const warnings = [];
|
|
12352
|
+
const replaceCount = {};
|
|
12353
|
+
sequenceString.split("").forEach((letter) => {
|
|
12354
|
+
const lowerLetter = letter.toLowerCase();
|
|
12355
|
+
if (replaceChars && replaceChars[lowerLetter]) {
|
|
12356
|
+
if (!replaceCount[lowerLetter]) {
|
|
12357
|
+
replaceCount[lowerLetter] = 0;
|
|
12358
|
+
}
|
|
12359
|
+
replaceCount[lowerLetter]++;
|
|
12360
|
+
const isUpper = lowerLetter !== letter;
|
|
12361
|
+
sanitizedVal += isUpper ? replaceChars[lowerLetter].toUpperCase() : replaceChars[lowerLetter];
|
|
12362
|
+
} else if (chars.includes(lowerLetter)) {
|
|
12363
|
+
sanitizedVal += letter;
|
|
12364
|
+
} else {
|
|
12365
|
+
invalidChars.push(letter);
|
|
12366
|
+
}
|
|
12367
|
+
});
|
|
12368
|
+
Object.keys(replaceCount).forEach((letter) => {
|
|
12369
|
+
warnings.push(
|
|
12370
|
+
`Replaced "${letter}" with "${replaceChars[letter]}"${replaceCount[letter] > 1 ? ` ${replaceCount[letter]} times` : ""}`
|
|
12332
12371
|
);
|
|
12333
|
-
}
|
|
12334
|
-
|
|
12372
|
+
});
|
|
12373
|
+
if (sequenceString.length !== sanitizedVal.length) {
|
|
12374
|
+
warnings.push(
|
|
12375
|
+
`${name ? `Sequence ${name}: ` : ""}Invalid character(s) detected and removed: ${invalidChars.slice(0, 100).join(", ")} `
|
|
12376
|
+
);
|
|
12377
|
+
}
|
|
12378
|
+
if (typeof window !== "undefined" && window.toastr && warnings.length) {
|
|
12379
|
+
warnings.forEach((warning) => {
|
|
12380
|
+
window.toastr.warning(warning);
|
|
12381
|
+
});
|
|
12335
12382
|
}
|
|
12383
|
+
return [sanitizedVal, warnings];
|
|
12336
12384
|
}
|
|
12337
12385
|
__name(filterSequenceString, "filterSequenceString");
|
|
12386
|
+
function getAcceptedChars({
|
|
12387
|
+
isOligo,
|
|
12388
|
+
isProtein,
|
|
12389
|
+
isRna,
|
|
12390
|
+
isMixedRnaAndDna,
|
|
12391
|
+
includeStopCodon
|
|
12392
|
+
} = {}) {
|
|
12393
|
+
return isProtein ? `${protein_letters_withUandX.toLowerCase()}${includeStopCodon ? "*." : ""}}` : isOligo ? ambiguous_rna_letters.toLowerCase() + "t" : isRna ? ambiguous_rna_letters.toLowerCase() + "t" : isMixedRnaAndDna ? ambiguous_rna_letters.toLowerCase() + ambiguous_dna_letters.toLowerCase() : (
|
|
12394
|
+
//just plain old dna
|
|
12395
|
+
ambiguous_rna_letters.toLowerCase() + ambiguous_dna_letters.toLowerCase()
|
|
12396
|
+
);
|
|
12397
|
+
}
|
|
12398
|
+
__name(getAcceptedChars, "getAcceptedChars");
|
|
12399
|
+
function getReplaceChars({
|
|
12400
|
+
isOligo,
|
|
12401
|
+
isProtein,
|
|
12402
|
+
isRna,
|
|
12403
|
+
isMixedRnaAndDna
|
|
12404
|
+
} = {}) {
|
|
12405
|
+
return isProtein ? {} : isOligo ? {} : isRna ? { t: "u" } : isMixedRnaAndDna ? {} : (
|
|
12406
|
+
//just plain old dna
|
|
12407
|
+
{}
|
|
12408
|
+
);
|
|
12409
|
+
}
|
|
12410
|
+
__name(getReplaceChars, "getReplaceChars");
|
|
12338
12411
|
function tidyUpAnnotation(_annotation, {
|
|
12339
12412
|
sequenceData = {},
|
|
12340
12413
|
convertAnnotationsFromAAIndices,
|
|
@@ -12463,14 +12536,6 @@ function coerceLocation({
|
|
|
12463
12536
|
}
|
|
12464
12537
|
}
|
|
12465
12538
|
__name(coerceLocation, "coerceLocation");
|
|
12466
|
-
function filterAminoAcidSequenceString(sequenceString, options) {
|
|
12467
|
-
options = options || {};
|
|
12468
|
-
if (options.includeStopCodon) {
|
|
12469
|
-
return sequenceString == null ? void 0 : sequenceString.replace(/[^xtgalmfwkqespvicyhrndu.*]/gi, "");
|
|
12470
|
-
}
|
|
12471
|
-
return sequenceString == null ? void 0 : sequenceString.replace(/[^xtgalmfwkqespvicyhrndu]/gi, "");
|
|
12472
|
-
}
|
|
12473
|
-
__name(filterAminoAcidSequenceString, "filterAminoAcidSequenceString");
|
|
12474
12539
|
function getDegenerateDnaStringFromAAString(aaString) {
|
|
12475
12540
|
return aaString.split("").map((char) => aminoAcidToDegenerateDnaMap[char.toLowerCase()] || "nnn").join("");
|
|
12476
12541
|
}
|
|
@@ -12482,11 +12547,10 @@ function tidyUpSequenceData(pSeqData, options = {}) {
|
|
|
12482
12547
|
removeUnwantedChars,
|
|
12483
12548
|
additionalValidChars,
|
|
12484
12549
|
noTranslationData,
|
|
12485
|
-
charOverrides,
|
|
12486
12550
|
doNotProvideIdsForAnnotations,
|
|
12487
|
-
proteinFilterOptions,
|
|
12488
12551
|
noCdsTranslations,
|
|
12489
|
-
convertAnnotationsFromAAIndices
|
|
12552
|
+
convertAnnotationsFromAAIndices,
|
|
12553
|
+
topLevelSeqData
|
|
12490
12554
|
} = options;
|
|
12491
12555
|
let seqData = lodashExports.cloneDeep(pSeqData);
|
|
12492
12556
|
const response = {
|
|
@@ -12516,16 +12580,15 @@ function tidyUpSequenceData(pSeqData, options = {}) {
|
|
|
12516
12580
|
}
|
|
12517
12581
|
if (removeUnwantedChars) {
|
|
12518
12582
|
if (seqData.isProtein) {
|
|
12519
|
-
seqData.proteinSequence
|
|
12520
|
-
|
|
12521
|
-
|
|
12522
|
-
|
|
12583
|
+
const [newSeq] = filterSequenceString(seqData.proteinSequence, __spreadValues({
|
|
12584
|
+
includeStopCodon: true
|
|
12585
|
+
}, topLevelSeqData || seqData));
|
|
12586
|
+
seqData.proteinSequence = newSeq;
|
|
12523
12587
|
} else {
|
|
12524
|
-
|
|
12525
|
-
|
|
12526
|
-
|
|
12527
|
-
|
|
12528
|
-
);
|
|
12588
|
+
const [newSeq] = filterSequenceString(seqData.sequence, __spreadValues({
|
|
12589
|
+
additionalValidChars
|
|
12590
|
+
}, topLevelSeqData || seqData));
|
|
12591
|
+
seqData.sequence = newSeq;
|
|
12529
12592
|
}
|
|
12530
12593
|
}
|
|
12531
12594
|
if (seqData.isProtein) {
|
|
@@ -22665,7 +22728,6 @@ exports.degenerateRnaToAminoAcidMap = degenerateRnaToAminoAcidMap;
|
|
|
22665
22728
|
exports.deleteSequenceDataAtRange = deleteSequenceDataAtRange;
|
|
22666
22729
|
exports.doesEnzymeChopOutsideOfRecognitionSite = doesEnzymeChopOutsideOfRecognitionSite;
|
|
22667
22730
|
exports.featureColors = featureColors;
|
|
22668
|
-
exports.filterAminoAcidSequenceString = filterAminoAcidSequenceString;
|
|
22669
22731
|
exports.filterSequenceString = filterSequenceString;
|
|
22670
22732
|
exports.findNearestRangeOfSequenceOverlapToPosition = findNearestRangeOfSequenceOverlapToPosition;
|
|
22671
22733
|
exports.findOrfsInPlasmid = findOrfsInPlasmid;
|
package/index.mjs
CHANGED
|
@@ -5997,6 +5997,7 @@ lodash.exports;
|
|
|
5997
5997
|
})(lodash, lodash.exports);
|
|
5998
5998
|
var lodashExports = lodash.exports;
|
|
5999
5999
|
const protein_letters = "ACDEFGHIKLMNPQRSTVWY";
|
|
6000
|
+
const protein_letters_withUandX = "ACDEFGHIKLMNPQRSTVWYUX";
|
|
6000
6001
|
const extended_protein_letters = "ACDEFGHIKLMNPQRSTVWYBXZJUO.*-";
|
|
6001
6002
|
const ambiguous_dna_letters = "GATCRYWSMKHBVDN";
|
|
6002
6003
|
const unambiguous_dna_letters = "GATC";
|
|
@@ -6065,6 +6066,7 @@ const bioData = /* @__PURE__ */ Object.freeze(/* @__PURE__ */ Object.definePrope
|
|
|
6065
6066
|
extended_protein_letters,
|
|
6066
6067
|
extended_protein_values,
|
|
6067
6068
|
protein_letters,
|
|
6069
|
+
protein_letters_withUandX,
|
|
6068
6070
|
unambiguous_dna_letters,
|
|
6069
6071
|
unambiguous_rna_letters
|
|
6070
6072
|
}, Symbol.toStringTag, { value: "Module" }));
|
|
@@ -12319,20 +12321,91 @@ const modifiableTypes = [
|
|
|
12319
12321
|
"primers",
|
|
12320
12322
|
"guides"
|
|
12321
12323
|
];
|
|
12322
|
-
function filterSequenceString(sequenceString,
|
|
12323
|
-
|
|
12324
|
-
|
|
12325
|
-
|
|
12326
|
-
|
|
12327
|
-
|
|
12328
|
-
|
|
12329
|
-
|
|
12324
|
+
function filterSequenceString(sequenceString, {
|
|
12325
|
+
additionalValidChars = "",
|
|
12326
|
+
isOligo,
|
|
12327
|
+
name,
|
|
12328
|
+
isProtein,
|
|
12329
|
+
isRna,
|
|
12330
|
+
isMixedRnaAndDna,
|
|
12331
|
+
includeStopCodon
|
|
12332
|
+
} = {}) {
|
|
12333
|
+
const acceptedChars = getAcceptedChars({
|
|
12334
|
+
isOligo,
|
|
12335
|
+
isProtein,
|
|
12336
|
+
isRna,
|
|
12337
|
+
isMixedRnaAndDna,
|
|
12338
|
+
includeStopCodon
|
|
12339
|
+
});
|
|
12340
|
+
const replaceChars = getReplaceChars({
|
|
12341
|
+
isOligo,
|
|
12342
|
+
isProtein,
|
|
12343
|
+
isRna,
|
|
12344
|
+
isMixedRnaAndDna
|
|
12345
|
+
});
|
|
12346
|
+
let sanitizedVal = "";
|
|
12347
|
+
const invalidChars = [];
|
|
12348
|
+
const chars = `${acceptedChars}${additionalValidChars.split("").join("\\")}`;
|
|
12349
|
+
const warnings = [];
|
|
12350
|
+
const replaceCount = {};
|
|
12351
|
+
sequenceString.split("").forEach((letter) => {
|
|
12352
|
+
const lowerLetter = letter.toLowerCase();
|
|
12353
|
+
if (replaceChars && replaceChars[lowerLetter]) {
|
|
12354
|
+
if (!replaceCount[lowerLetter]) {
|
|
12355
|
+
replaceCount[lowerLetter] = 0;
|
|
12356
|
+
}
|
|
12357
|
+
replaceCount[lowerLetter]++;
|
|
12358
|
+
const isUpper = lowerLetter !== letter;
|
|
12359
|
+
sanitizedVal += isUpper ? replaceChars[lowerLetter].toUpperCase() : replaceChars[lowerLetter];
|
|
12360
|
+
} else if (chars.includes(lowerLetter)) {
|
|
12361
|
+
sanitizedVal += letter;
|
|
12362
|
+
} else {
|
|
12363
|
+
invalidChars.push(letter);
|
|
12364
|
+
}
|
|
12365
|
+
});
|
|
12366
|
+
Object.keys(replaceCount).forEach((letter) => {
|
|
12367
|
+
warnings.push(
|
|
12368
|
+
`Replaced "${letter}" with "${replaceChars[letter]}"${replaceCount[letter] > 1 ? ` ${replaceCount[letter]} times` : ""}`
|
|
12330
12369
|
);
|
|
12331
|
-
}
|
|
12332
|
-
|
|
12370
|
+
});
|
|
12371
|
+
if (sequenceString.length !== sanitizedVal.length) {
|
|
12372
|
+
warnings.push(
|
|
12373
|
+
`${name ? `Sequence ${name}: ` : ""}Invalid character(s) detected and removed: ${invalidChars.slice(0, 100).join(", ")} `
|
|
12374
|
+
);
|
|
12375
|
+
}
|
|
12376
|
+
if (typeof window !== "undefined" && window.toastr && warnings.length) {
|
|
12377
|
+
warnings.forEach((warning) => {
|
|
12378
|
+
window.toastr.warning(warning);
|
|
12379
|
+
});
|
|
12333
12380
|
}
|
|
12381
|
+
return [sanitizedVal, warnings];
|
|
12334
12382
|
}
|
|
12335
12383
|
__name(filterSequenceString, "filterSequenceString");
|
|
12384
|
+
function getAcceptedChars({
|
|
12385
|
+
isOligo,
|
|
12386
|
+
isProtein,
|
|
12387
|
+
isRna,
|
|
12388
|
+
isMixedRnaAndDna,
|
|
12389
|
+
includeStopCodon
|
|
12390
|
+
} = {}) {
|
|
12391
|
+
return isProtein ? `${protein_letters_withUandX.toLowerCase()}${includeStopCodon ? "*." : ""}}` : isOligo ? ambiguous_rna_letters.toLowerCase() + "t" : isRna ? ambiguous_rna_letters.toLowerCase() + "t" : isMixedRnaAndDna ? ambiguous_rna_letters.toLowerCase() + ambiguous_dna_letters.toLowerCase() : (
|
|
12392
|
+
//just plain old dna
|
|
12393
|
+
ambiguous_rna_letters.toLowerCase() + ambiguous_dna_letters.toLowerCase()
|
|
12394
|
+
);
|
|
12395
|
+
}
|
|
12396
|
+
__name(getAcceptedChars, "getAcceptedChars");
|
|
12397
|
+
function getReplaceChars({
|
|
12398
|
+
isOligo,
|
|
12399
|
+
isProtein,
|
|
12400
|
+
isRna,
|
|
12401
|
+
isMixedRnaAndDna
|
|
12402
|
+
} = {}) {
|
|
12403
|
+
return isProtein ? {} : isOligo ? {} : isRna ? { t: "u" } : isMixedRnaAndDna ? {} : (
|
|
12404
|
+
//just plain old dna
|
|
12405
|
+
{}
|
|
12406
|
+
);
|
|
12407
|
+
}
|
|
12408
|
+
__name(getReplaceChars, "getReplaceChars");
|
|
12336
12409
|
function tidyUpAnnotation(_annotation, {
|
|
12337
12410
|
sequenceData = {},
|
|
12338
12411
|
convertAnnotationsFromAAIndices,
|
|
@@ -12461,14 +12534,6 @@ function coerceLocation({
|
|
|
12461
12534
|
}
|
|
12462
12535
|
}
|
|
12463
12536
|
__name(coerceLocation, "coerceLocation");
|
|
12464
|
-
function filterAminoAcidSequenceString(sequenceString, options) {
|
|
12465
|
-
options = options || {};
|
|
12466
|
-
if (options.includeStopCodon) {
|
|
12467
|
-
return sequenceString == null ? void 0 : sequenceString.replace(/[^xtgalmfwkqespvicyhrndu.*]/gi, "");
|
|
12468
|
-
}
|
|
12469
|
-
return sequenceString == null ? void 0 : sequenceString.replace(/[^xtgalmfwkqespvicyhrndu]/gi, "");
|
|
12470
|
-
}
|
|
12471
|
-
__name(filterAminoAcidSequenceString, "filterAminoAcidSequenceString");
|
|
12472
12537
|
function getDegenerateDnaStringFromAAString(aaString) {
|
|
12473
12538
|
return aaString.split("").map((char) => aminoAcidToDegenerateDnaMap[char.toLowerCase()] || "nnn").join("");
|
|
12474
12539
|
}
|
|
@@ -12480,11 +12545,10 @@ function tidyUpSequenceData(pSeqData, options = {}) {
|
|
|
12480
12545
|
removeUnwantedChars,
|
|
12481
12546
|
additionalValidChars,
|
|
12482
12547
|
noTranslationData,
|
|
12483
|
-
charOverrides,
|
|
12484
12548
|
doNotProvideIdsForAnnotations,
|
|
12485
|
-
proteinFilterOptions,
|
|
12486
12549
|
noCdsTranslations,
|
|
12487
|
-
convertAnnotationsFromAAIndices
|
|
12550
|
+
convertAnnotationsFromAAIndices,
|
|
12551
|
+
topLevelSeqData
|
|
12488
12552
|
} = options;
|
|
12489
12553
|
let seqData = lodashExports.cloneDeep(pSeqData);
|
|
12490
12554
|
const response = {
|
|
@@ -12514,16 +12578,15 @@ function tidyUpSequenceData(pSeqData, options = {}) {
|
|
|
12514
12578
|
}
|
|
12515
12579
|
if (removeUnwantedChars) {
|
|
12516
12580
|
if (seqData.isProtein) {
|
|
12517
|
-
seqData.proteinSequence
|
|
12518
|
-
|
|
12519
|
-
|
|
12520
|
-
|
|
12581
|
+
const [newSeq] = filterSequenceString(seqData.proteinSequence, __spreadValues({
|
|
12582
|
+
includeStopCodon: true
|
|
12583
|
+
}, topLevelSeqData || seqData));
|
|
12584
|
+
seqData.proteinSequence = newSeq;
|
|
12521
12585
|
} else {
|
|
12522
|
-
|
|
12523
|
-
|
|
12524
|
-
|
|
12525
|
-
|
|
12526
|
-
);
|
|
12586
|
+
const [newSeq] = filterSequenceString(seqData.sequence, __spreadValues({
|
|
12587
|
+
additionalValidChars
|
|
12588
|
+
}, topLevelSeqData || seqData));
|
|
12589
|
+
seqData.sequence = newSeq;
|
|
12527
12590
|
}
|
|
12528
12591
|
}
|
|
12529
12592
|
if (seqData.isProtein) {
|
|
@@ -22664,7 +22727,6 @@ export {
|
|
|
22664
22727
|
deleteSequenceDataAtRange,
|
|
22665
22728
|
doesEnzymeChopOutsideOfRecognitionSite,
|
|
22666
22729
|
featureColors,
|
|
22667
|
-
filterAminoAcidSequenceString,
|
|
22668
22730
|
filterSequenceString,
|
|
22669
22731
|
findNearestRangeOfSequenceOverlapToPosition,
|
|
22670
22732
|
findOrfsInPlasmid,
|
package/index.umd.js
CHANGED
|
@@ -6001,6 +6001,7 @@ var __name = (target, value) => __defProp(target, "name", { value, configurable:
|
|
|
6001
6001
|
})(lodash, lodash.exports);
|
|
6002
6002
|
var lodashExports = lodash.exports;
|
|
6003
6003
|
const protein_letters = "ACDEFGHIKLMNPQRSTVWY";
|
|
6004
|
+
const protein_letters_withUandX = "ACDEFGHIKLMNPQRSTVWYUX";
|
|
6004
6005
|
const extended_protein_letters = "ACDEFGHIKLMNPQRSTVWYBXZJUO.*-";
|
|
6005
6006
|
const ambiguous_dna_letters = "GATCRYWSMKHBVDN";
|
|
6006
6007
|
const unambiguous_dna_letters = "GATC";
|
|
@@ -6069,6 +6070,7 @@ var __name = (target, value) => __defProp(target, "name", { value, configurable:
|
|
|
6069
6070
|
extended_protein_letters,
|
|
6070
6071
|
extended_protein_values,
|
|
6071
6072
|
protein_letters,
|
|
6073
|
+
protein_letters_withUandX,
|
|
6072
6074
|
unambiguous_dna_letters,
|
|
6073
6075
|
unambiguous_rna_letters
|
|
6074
6076
|
}, Symbol.toStringTag, { value: "Module" }));
|
|
@@ -12323,20 +12325,91 @@ var __name = (target, value) => __defProp(target, "name", { value, configurable:
|
|
|
12323
12325
|
"primers",
|
|
12324
12326
|
"guides"
|
|
12325
12327
|
];
|
|
12326
|
-
function filterSequenceString(sequenceString,
|
|
12327
|
-
|
|
12328
|
-
|
|
12329
|
-
|
|
12330
|
-
|
|
12331
|
-
|
|
12332
|
-
|
|
12333
|
-
|
|
12328
|
+
function filterSequenceString(sequenceString, {
|
|
12329
|
+
additionalValidChars = "",
|
|
12330
|
+
isOligo,
|
|
12331
|
+
name,
|
|
12332
|
+
isProtein,
|
|
12333
|
+
isRna,
|
|
12334
|
+
isMixedRnaAndDna,
|
|
12335
|
+
includeStopCodon
|
|
12336
|
+
} = {}) {
|
|
12337
|
+
const acceptedChars = getAcceptedChars({
|
|
12338
|
+
isOligo,
|
|
12339
|
+
isProtein,
|
|
12340
|
+
isRna,
|
|
12341
|
+
isMixedRnaAndDna,
|
|
12342
|
+
includeStopCodon
|
|
12343
|
+
});
|
|
12344
|
+
const replaceChars = getReplaceChars({
|
|
12345
|
+
isOligo,
|
|
12346
|
+
isProtein,
|
|
12347
|
+
isRna,
|
|
12348
|
+
isMixedRnaAndDna
|
|
12349
|
+
});
|
|
12350
|
+
let sanitizedVal = "";
|
|
12351
|
+
const invalidChars = [];
|
|
12352
|
+
const chars = `${acceptedChars}${additionalValidChars.split("").join("\\")}`;
|
|
12353
|
+
const warnings = [];
|
|
12354
|
+
const replaceCount = {};
|
|
12355
|
+
sequenceString.split("").forEach((letter) => {
|
|
12356
|
+
const lowerLetter = letter.toLowerCase();
|
|
12357
|
+
if (replaceChars && replaceChars[lowerLetter]) {
|
|
12358
|
+
if (!replaceCount[lowerLetter]) {
|
|
12359
|
+
replaceCount[lowerLetter] = 0;
|
|
12360
|
+
}
|
|
12361
|
+
replaceCount[lowerLetter]++;
|
|
12362
|
+
const isUpper = lowerLetter !== letter;
|
|
12363
|
+
sanitizedVal += isUpper ? replaceChars[lowerLetter].toUpperCase() : replaceChars[lowerLetter];
|
|
12364
|
+
} else if (chars.includes(lowerLetter)) {
|
|
12365
|
+
sanitizedVal += letter;
|
|
12366
|
+
} else {
|
|
12367
|
+
invalidChars.push(letter);
|
|
12368
|
+
}
|
|
12369
|
+
});
|
|
12370
|
+
Object.keys(replaceCount).forEach((letter) => {
|
|
12371
|
+
warnings.push(
|
|
12372
|
+
`Replaced "${letter}" with "${replaceChars[letter]}"${replaceCount[letter] > 1 ? ` ${replaceCount[letter]} times` : ""}`
|
|
12334
12373
|
);
|
|
12335
|
-
}
|
|
12336
|
-
|
|
12374
|
+
});
|
|
12375
|
+
if (sequenceString.length !== sanitizedVal.length) {
|
|
12376
|
+
warnings.push(
|
|
12377
|
+
`${name ? `Sequence ${name}: ` : ""}Invalid character(s) detected and removed: ${invalidChars.slice(0, 100).join(", ")} `
|
|
12378
|
+
);
|
|
12379
|
+
}
|
|
12380
|
+
if (typeof window !== "undefined" && window.toastr && warnings.length) {
|
|
12381
|
+
warnings.forEach((warning) => {
|
|
12382
|
+
window.toastr.warning(warning);
|
|
12383
|
+
});
|
|
12337
12384
|
}
|
|
12385
|
+
return [sanitizedVal, warnings];
|
|
12338
12386
|
}
|
|
12339
12387
|
__name(filterSequenceString, "filterSequenceString");
|
|
12388
|
+
function getAcceptedChars({
|
|
12389
|
+
isOligo,
|
|
12390
|
+
isProtein,
|
|
12391
|
+
isRna,
|
|
12392
|
+
isMixedRnaAndDna,
|
|
12393
|
+
includeStopCodon
|
|
12394
|
+
} = {}) {
|
|
12395
|
+
return isProtein ? `${protein_letters_withUandX.toLowerCase()}${includeStopCodon ? "*." : ""}}` : isOligo ? ambiguous_rna_letters.toLowerCase() + "t" : isRna ? ambiguous_rna_letters.toLowerCase() + "t" : isMixedRnaAndDna ? ambiguous_rna_letters.toLowerCase() + ambiguous_dna_letters.toLowerCase() : (
|
|
12396
|
+
//just plain old dna
|
|
12397
|
+
ambiguous_rna_letters.toLowerCase() + ambiguous_dna_letters.toLowerCase()
|
|
12398
|
+
);
|
|
12399
|
+
}
|
|
12400
|
+
__name(getAcceptedChars, "getAcceptedChars");
|
|
12401
|
+
function getReplaceChars({
|
|
12402
|
+
isOligo,
|
|
12403
|
+
isProtein,
|
|
12404
|
+
isRna,
|
|
12405
|
+
isMixedRnaAndDna
|
|
12406
|
+
} = {}) {
|
|
12407
|
+
return isProtein ? {} : isOligo ? {} : isRna ? { t: "u" } : isMixedRnaAndDna ? {} : (
|
|
12408
|
+
//just plain old dna
|
|
12409
|
+
{}
|
|
12410
|
+
);
|
|
12411
|
+
}
|
|
12412
|
+
__name(getReplaceChars, "getReplaceChars");
|
|
12340
12413
|
function tidyUpAnnotation(_annotation, {
|
|
12341
12414
|
sequenceData = {},
|
|
12342
12415
|
convertAnnotationsFromAAIndices,
|
|
@@ -12465,14 +12538,6 @@ var __name = (target, value) => __defProp(target, "name", { value, configurable:
|
|
|
12465
12538
|
}
|
|
12466
12539
|
}
|
|
12467
12540
|
__name(coerceLocation, "coerceLocation");
|
|
12468
|
-
function filterAminoAcidSequenceString(sequenceString, options) {
|
|
12469
|
-
options = options || {};
|
|
12470
|
-
if (options.includeStopCodon) {
|
|
12471
|
-
return sequenceString == null ? void 0 : sequenceString.replace(/[^xtgalmfwkqespvicyhrndu.*]/gi, "");
|
|
12472
|
-
}
|
|
12473
|
-
return sequenceString == null ? void 0 : sequenceString.replace(/[^xtgalmfwkqespvicyhrndu]/gi, "");
|
|
12474
|
-
}
|
|
12475
|
-
__name(filterAminoAcidSequenceString, "filterAminoAcidSequenceString");
|
|
12476
12541
|
function getDegenerateDnaStringFromAAString(aaString) {
|
|
12477
12542
|
return aaString.split("").map((char) => aminoAcidToDegenerateDnaMap[char.toLowerCase()] || "nnn").join("");
|
|
12478
12543
|
}
|
|
@@ -12484,11 +12549,10 @@ var __name = (target, value) => __defProp(target, "name", { value, configurable:
|
|
|
12484
12549
|
removeUnwantedChars,
|
|
12485
12550
|
additionalValidChars,
|
|
12486
12551
|
noTranslationData,
|
|
12487
|
-
charOverrides,
|
|
12488
12552
|
doNotProvideIdsForAnnotations,
|
|
12489
|
-
proteinFilterOptions,
|
|
12490
12553
|
noCdsTranslations,
|
|
12491
|
-
convertAnnotationsFromAAIndices
|
|
12554
|
+
convertAnnotationsFromAAIndices,
|
|
12555
|
+
topLevelSeqData
|
|
12492
12556
|
} = options;
|
|
12493
12557
|
let seqData = lodashExports.cloneDeep(pSeqData);
|
|
12494
12558
|
const response = {
|
|
@@ -12518,16 +12582,15 @@ var __name = (target, value) => __defProp(target, "name", { value, configurable:
|
|
|
12518
12582
|
}
|
|
12519
12583
|
if (removeUnwantedChars) {
|
|
12520
12584
|
if (seqData.isProtein) {
|
|
12521
|
-
seqData.proteinSequence
|
|
12522
|
-
|
|
12523
|
-
|
|
12524
|
-
|
|
12585
|
+
const [newSeq] = filterSequenceString(seqData.proteinSequence, __spreadValues({
|
|
12586
|
+
includeStopCodon: true
|
|
12587
|
+
}, topLevelSeqData || seqData));
|
|
12588
|
+
seqData.proteinSequence = newSeq;
|
|
12525
12589
|
} else {
|
|
12526
|
-
|
|
12527
|
-
|
|
12528
|
-
|
|
12529
|
-
|
|
12530
|
-
);
|
|
12590
|
+
const [newSeq] = filterSequenceString(seqData.sequence, __spreadValues({
|
|
12591
|
+
additionalValidChars
|
|
12592
|
+
}, topLevelSeqData || seqData));
|
|
12593
|
+
seqData.sequence = newSeq;
|
|
12531
12594
|
}
|
|
12532
12595
|
}
|
|
12533
12596
|
if (seqData.isProtein) {
|
|
@@ -22667,7 +22730,6 @@ var __name = (target, value) => __defProp(target, "name", { value, configurable:
|
|
|
22667
22730
|
exports2.deleteSequenceDataAtRange = deleteSequenceDataAtRange;
|
|
22668
22731
|
exports2.doesEnzymeChopOutsideOfRecognitionSite = doesEnzymeChopOutsideOfRecognitionSite;
|
|
22669
22732
|
exports2.featureColors = featureColors;
|
|
22670
|
-
exports2.filterAminoAcidSequenceString = filterAminoAcidSequenceString;
|
|
22671
22733
|
exports2.filterSequenceString = filterSequenceString;
|
|
22672
22734
|
exports2.findNearestRangeOfSequenceOverlapToPosition = findNearestRangeOfSequenceOverlapToPosition;
|
|
22673
22735
|
exports2.findOrfsInPlasmid = findOrfsInPlasmid;
|
package/package.json
CHANGED
package/src/bioData.js
CHANGED
|
@@ -1,6 +1,7 @@
|
|
|
1
1
|
//Adapted from biopython. Check the BIOPYTHON_LICENSE for licensing info
|
|
2
2
|
|
|
3
3
|
export const protein_letters = "ACDEFGHIKLMNPQRSTVWY";
|
|
4
|
+
export const protein_letters_withUandX = "ACDEFGHIKLMNPQRSTVWYUX";
|
|
4
5
|
|
|
5
6
|
export const extended_protein_letters = "ACDEFGHIKLMNPQRSTVWYBXZJUO.*-";
|
|
6
7
|
export const ambiguous_dna_letters = "GATCRYWSMKHBVDN";
|
|
@@ -29,7 +30,6 @@ export const ambiguous_dna_values = {
|
|
|
29
30
|
N: "GATC"
|
|
30
31
|
};
|
|
31
32
|
|
|
32
|
-
|
|
33
33
|
export const extended_protein_values = {
|
|
34
34
|
A: "A",
|
|
35
35
|
B: "ND",
|
|
@@ -1,24 +1,117 @@
|
|
|
1
|
-
|
|
1
|
+
import {
|
|
2
|
+
ambiguous_dna_letters,
|
|
3
|
+
ambiguous_rna_letters,
|
|
4
|
+
protein_letters_withUandX
|
|
5
|
+
} from "./bioData";
|
|
2
6
|
|
|
3
|
-
//
|
|
4
7
|
export default function filterSequenceString(
|
|
5
8
|
sequenceString,
|
|
6
|
-
|
|
7
|
-
|
|
9
|
+
{
|
|
10
|
+
additionalValidChars = "",
|
|
11
|
+
isOligo,
|
|
12
|
+
name,
|
|
13
|
+
isProtein,
|
|
14
|
+
isRna,
|
|
15
|
+
isMixedRnaAndDna,
|
|
16
|
+
includeStopCodon
|
|
17
|
+
} = {}
|
|
8
18
|
) {
|
|
9
|
-
|
|
10
|
-
|
|
11
|
-
|
|
12
|
-
|
|
13
|
-
|
|
14
|
-
|
|
15
|
-
|
|
16
|
-
|
|
17
|
-
|
|
18
|
-
|
|
19
|
-
|
|
19
|
+
const acceptedChars = getAcceptedChars({
|
|
20
|
+
isOligo,
|
|
21
|
+
isProtein,
|
|
22
|
+
isRna,
|
|
23
|
+
isMixedRnaAndDna,
|
|
24
|
+
includeStopCodon
|
|
25
|
+
});
|
|
26
|
+
const replaceChars = getReplaceChars({
|
|
27
|
+
isOligo,
|
|
28
|
+
isProtein,
|
|
29
|
+
isRna,
|
|
30
|
+
isMixedRnaAndDna
|
|
31
|
+
});
|
|
32
|
+
|
|
33
|
+
let sanitizedVal = "";
|
|
34
|
+
const invalidChars = [];
|
|
35
|
+
const chars = `${acceptedChars}${additionalValidChars.split("").join("\\")}`;
|
|
36
|
+
const warnings = [];
|
|
37
|
+
const replaceCount = {};
|
|
38
|
+
sequenceString.split("").forEach(letter => {
|
|
39
|
+
const lowerLetter = letter.toLowerCase();
|
|
40
|
+
if (replaceChars && replaceChars[lowerLetter]) {
|
|
41
|
+
if (!replaceCount[lowerLetter]) {
|
|
42
|
+
replaceCount[lowerLetter] = 0;
|
|
43
|
+
}
|
|
44
|
+
replaceCount[lowerLetter]++;
|
|
45
|
+
const isUpper = lowerLetter !== letter;
|
|
46
|
+
sanitizedVal += isUpper
|
|
47
|
+
? replaceChars[lowerLetter].toUpperCase()
|
|
48
|
+
: replaceChars[lowerLetter];
|
|
49
|
+
} else if (chars.includes(lowerLetter)) {
|
|
50
|
+
sanitizedVal += letter;
|
|
51
|
+
} else {
|
|
52
|
+
invalidChars.push(letter);
|
|
53
|
+
}
|
|
54
|
+
});
|
|
55
|
+
//add replace count warnings
|
|
56
|
+
Object.keys(replaceCount).forEach(letter => {
|
|
57
|
+
warnings.push(
|
|
58
|
+
`Replaced "${letter}" with "${replaceChars[letter]}"${
|
|
59
|
+
replaceCount[letter] > 1 ? ` ${replaceCount[letter]} times` : ""
|
|
60
|
+
}`
|
|
61
|
+
);
|
|
62
|
+
});
|
|
63
|
+
if (sequenceString.length !== sanitizedVal.length) {
|
|
64
|
+
warnings.push(
|
|
65
|
+
`${
|
|
66
|
+
name ? `Sequence ${name}: ` : ""
|
|
67
|
+
}Invalid character(s) detected and removed: ${invalidChars
|
|
68
|
+
.slice(0, 100)
|
|
69
|
+
.join(", ")} `
|
|
20
70
|
);
|
|
21
|
-
} else {
|
|
22
|
-
return sequenceString;
|
|
23
71
|
}
|
|
72
|
+
if (typeof window !== "undefined" && window.toastr && warnings.length) {
|
|
73
|
+
warnings.forEach(warning => {
|
|
74
|
+
window.toastr.warning(warning);
|
|
75
|
+
});
|
|
76
|
+
}
|
|
77
|
+
|
|
78
|
+
return [sanitizedVal, warnings];
|
|
79
|
+
}
|
|
80
|
+
|
|
81
|
+
export function getAcceptedChars({
|
|
82
|
+
isOligo,
|
|
83
|
+
isProtein,
|
|
84
|
+
isRna,
|
|
85
|
+
isMixedRnaAndDna,
|
|
86
|
+
includeStopCodon
|
|
87
|
+
} = {}) {
|
|
88
|
+
return isProtein
|
|
89
|
+
? `${protein_letters_withUandX.toLowerCase()}${
|
|
90
|
+
includeStopCodon ? "*." : ""
|
|
91
|
+
}}`
|
|
92
|
+
: isOligo
|
|
93
|
+
? ambiguous_rna_letters.toLowerCase() + "t"
|
|
94
|
+
: isRna
|
|
95
|
+
? ambiguous_rna_letters.toLowerCase() + "t"
|
|
96
|
+
: isMixedRnaAndDna
|
|
97
|
+
? ambiguous_rna_letters.toLowerCase() + ambiguous_dna_letters.toLowerCase()
|
|
98
|
+
: //just plain old dna
|
|
99
|
+
ambiguous_rna_letters.toLowerCase() + ambiguous_dna_letters.toLowerCase();
|
|
100
|
+
}
|
|
101
|
+
export function getReplaceChars({
|
|
102
|
+
isOligo,
|
|
103
|
+
isProtein,
|
|
104
|
+
isRna,
|
|
105
|
+
isMixedRnaAndDna
|
|
106
|
+
} = {}) {
|
|
107
|
+
return isProtein
|
|
108
|
+
? {}
|
|
109
|
+
: isOligo
|
|
110
|
+
? {}
|
|
111
|
+
: isRna
|
|
112
|
+
? { t: "u" }
|
|
113
|
+
: isMixedRnaAndDna
|
|
114
|
+
? {}
|
|
115
|
+
: //just plain old dna
|
|
116
|
+
{};
|
|
24
117
|
}
|
|
@@ -1,13 +1,72 @@
|
|
|
1
1
|
import filterSequenceString from "./filterSequenceString";
|
|
2
|
+
import { expect } from "vitest";
|
|
2
3
|
|
|
3
4
|
describe("filterSequenceString", () => {
|
|
5
|
+
it("should not filter u's and should convert t's to u's from isOligo=true seqs", () => {
|
|
6
|
+
const [str, warnings] = filterSequenceString("tatuuag--a", {
|
|
7
|
+
isOligo: true
|
|
8
|
+
});
|
|
9
|
+
expect(str).toBe("tatuuaga");
|
|
10
|
+
// expect(warnings[0]).toBe('Replaced "t" with "u" 2 times');
|
|
11
|
+
expect(warnings[0]).toBe(
|
|
12
|
+
"Invalid character(s) detected and removed: -, - "
|
|
13
|
+
);
|
|
14
|
+
});
|
|
15
|
+
it("should not convert u's to t's for isDna (default isDna=true) seqs", () => {
|
|
16
|
+
const [str, warnings] = filterSequenceString("tatuuag--a", {});
|
|
17
|
+
// expect(warnings[0]).toBe('Replaced "u" with "t" 2 times');
|
|
18
|
+
expect(warnings[0]).toBe(
|
|
19
|
+
"Invalid character(s) detected and removed: -, - "
|
|
20
|
+
);
|
|
21
|
+
expect(str).toBe("tatuuaga");
|
|
22
|
+
});
|
|
4
23
|
it("should filter out unwanted chars", () => {
|
|
5
|
-
|
|
24
|
+
const [str, warnings] = filterSequenceString("tatag--a");
|
|
25
|
+
expect(warnings[0]).toBe(
|
|
26
|
+
"Invalid character(s) detected and removed: -, - "
|
|
27
|
+
);
|
|
28
|
+
expect(str).toBe("tataga");
|
|
6
29
|
});
|
|
7
30
|
it("should handle additional chars option", () => {
|
|
8
|
-
|
|
31
|
+
const [str, warnings] = filterSequenceString("tatag--a", {
|
|
32
|
+
additionalValidChars: "-"
|
|
33
|
+
});
|
|
34
|
+
expect(warnings.length).toBe(0);
|
|
35
|
+
expect(str).toBe("tatag--a");
|
|
9
36
|
});
|
|
10
37
|
it("should handle additional chars option", () => {
|
|
11
|
-
|
|
38
|
+
const [str, warnings] = filterSequenceString("tatag--a", {
|
|
39
|
+
additionalValidChars: "f-q"
|
|
40
|
+
});
|
|
41
|
+
expect(warnings.length).toBe(0);
|
|
42
|
+
expect(str).toBe("tatag--a");
|
|
43
|
+
});
|
|
44
|
+
|
|
45
|
+
it("when isProtein: true, should filter only valid amino acids by default", () => {
|
|
46
|
+
const [str, warnings] = filterSequenceString(
|
|
47
|
+
'bbb342"""xtgalmfwkqespvicyhrnd,,../',
|
|
48
|
+
{
|
|
49
|
+
isProtein: true
|
|
50
|
+
}
|
|
51
|
+
);
|
|
52
|
+
expect(warnings[0]).toBe(
|
|
53
|
+
'Invalid character(s) detected and removed: b, b, b, 3, 4, 2, ", ", ", ,, ,, ., ., / '
|
|
54
|
+
);
|
|
55
|
+
expect(str).toBe("xtgalmfwkqespvicyhrnd");
|
|
56
|
+
});
|
|
57
|
+
it("when isProtein: true, should handle upper case letters", () => {
|
|
58
|
+
const [str, warnings] = filterSequenceString("xtgalmfWKQEspvicyhrnd", {
|
|
59
|
+
isProtein: true
|
|
60
|
+
});
|
|
61
|
+
expect(warnings.length).toBe(0);
|
|
62
|
+
expect(str).toBe("xtgalmfWKQEspvicyhrnd");
|
|
63
|
+
});
|
|
64
|
+
it("when isProtein: true, should handle the option to includeStopCodon by allowing periods", () => {
|
|
65
|
+
const [str] = filterSequenceString('bbb342"""xtgalmfwkqespvicyhrnd,,../', {
|
|
66
|
+
isProtein: true,
|
|
67
|
+
includeStopCodon: true
|
|
68
|
+
});
|
|
69
|
+
|
|
70
|
+
expect(str).toBe("xtgalmfwkqespvicyhrnd..");
|
|
12
71
|
});
|
|
13
72
|
});
|
package/src/index.js
CHANGED
|
@@ -51,7 +51,6 @@ export { default as aliasedEnzymesByName } from "./aliasedEnzymesByName";
|
|
|
51
51
|
export { default as defaultEnzymesByName } from "./defaultEnzymesByName";
|
|
52
52
|
export { default as generateSequenceData } from "./generateSequenceData";
|
|
53
53
|
export { default as generateAnnotations } from "./generateAnnotations";
|
|
54
|
-
export { default as filterAminoAcidSequenceString } from "./filterAminoAcidSequenceString";
|
|
55
54
|
export { default as filterSequenceString } from "./filterSequenceString";
|
|
56
55
|
export { default as findNearestRangeOfSequenceOverlapToPosition } from "./findNearestRangeOfSequenceOverlapToPosition";
|
|
57
56
|
export { default as findOrfsInPlasmid } from "./findOrfsInPlasmid";
|
package/src/proteinAlphabet.js
CHANGED
|
@@ -1,4 +1,3 @@
|
|
|
1
|
-
|
|
2
1
|
const proteinAlphabet = {
|
|
3
2
|
A: {
|
|
4
3
|
value: "A",
|
|
@@ -18,7 +17,7 @@ const proteinAlphabet = {
|
|
|
18
17
|
color: "hsl(258.1, 100%, 69%)",
|
|
19
18
|
mass: 156.18568
|
|
20
19
|
},
|
|
21
|
-
|
|
20
|
+
|
|
22
21
|
N: {
|
|
23
22
|
value: "N",
|
|
24
23
|
name: "Asparagine",
|
|
@@ -46,7 +45,7 @@ const proteinAlphabet = {
|
|
|
46
45
|
color: "hsl(335.1, 100%, 69%)",
|
|
47
46
|
mass: 103.1429
|
|
48
47
|
},
|
|
49
|
-
|
|
48
|
+
|
|
50
49
|
E: {
|
|
51
50
|
value: "E",
|
|
52
51
|
name: "Glutamic acid",
|
|
@@ -75,7 +74,6 @@ const proteinAlphabet = {
|
|
|
75
74
|
mass: 57.05132
|
|
76
75
|
},
|
|
77
76
|
|
|
78
|
-
|
|
79
77
|
H: {
|
|
80
78
|
value: "H",
|
|
81
79
|
name: "Histidine",
|
|
@@ -114,7 +112,6 @@ const proteinAlphabet = {
|
|
|
114
112
|
mass: 128.17228
|
|
115
113
|
},
|
|
116
114
|
|
|
117
|
-
|
|
118
115
|
M: {
|
|
119
116
|
value: "M",
|
|
120
117
|
name: "Methionine",
|
|
@@ -6,7 +6,6 @@ import { cloneDeep, flatMap } from "lodash";
|
|
|
6
6
|
import { annotationTypes } from "./annotationTypes";
|
|
7
7
|
import filterSequenceString from "./filterSequenceString";
|
|
8
8
|
import tidyUpAnnotation from "./tidyUpAnnotation";
|
|
9
|
-
import filterAminoAcidSequenceString from "./filterAminoAcidSequenceString";
|
|
10
9
|
import getDegenerateDnaStringFromAaString from "./getDegenerateDnaStringFromAAString";
|
|
11
10
|
import { getFeatureTypes } from "./featureTypesAndColors";
|
|
12
11
|
|
|
@@ -17,11 +16,10 @@ export default function tidyUpSequenceData(pSeqData, options = {}) {
|
|
|
17
16
|
removeUnwantedChars,
|
|
18
17
|
additionalValidChars,
|
|
19
18
|
noTranslationData,
|
|
20
|
-
charOverrides,
|
|
21
19
|
doNotProvideIdsForAnnotations,
|
|
22
|
-
proteinFilterOptions,
|
|
23
20
|
noCdsTranslations,
|
|
24
|
-
convertAnnotationsFromAAIndices
|
|
21
|
+
convertAnnotationsFromAAIndices,
|
|
22
|
+
topLevelSeqData
|
|
25
23
|
} = options;
|
|
26
24
|
let seqData = cloneDeep(pSeqData); //sequence is usually immutable, so we clone it and return it
|
|
27
25
|
const response = {
|
|
@@ -56,18 +54,17 @@ export default function tidyUpSequenceData(pSeqData, options = {}) {
|
|
|
56
54
|
}
|
|
57
55
|
if (removeUnwantedChars) {
|
|
58
56
|
if (seqData.isProtein) {
|
|
59
|
-
seqData.proteinSequence
|
|
60
|
-
|
|
61
|
-
|
|
62
|
-
);
|
|
57
|
+
const [newSeq] = filterSequenceString(seqData.proteinSequence, {
|
|
58
|
+
includeStopCodon: true,
|
|
59
|
+
...(topLevelSeqData || seqData)
|
|
60
|
+
});
|
|
61
|
+
seqData.proteinSequence = newSeq;
|
|
63
62
|
} else {
|
|
64
|
-
|
|
65
|
-
|
|
66
|
-
|
|
67
|
-
|
|
68
|
-
|
|
69
|
-
charOverrides
|
|
70
|
-
);
|
|
63
|
+
const [newSeq] = filterSequenceString(seqData.sequence, {
|
|
64
|
+
additionalValidChars,
|
|
65
|
+
...(topLevelSeqData || seqData)
|
|
66
|
+
});
|
|
67
|
+
seqData.sequence = newSeq;
|
|
71
68
|
}
|
|
72
69
|
}
|
|
73
70
|
if (seqData.isProtein) {
|
|
@@ -1 +0,0 @@
|
|
|
1
|
-
export default function filterAminoAcidSequenceString(sequenceString: any, options: any): any;
|
|
@@ -1 +0,0 @@
|
|
|
1
|
-
export {};
|
|
@@ -1,10 +0,0 @@
|
|
|
1
|
-
//
|
|
2
|
-
export default function filterAminoAcidSequenceString(sequenceString, options) {
|
|
3
|
-
options = options || {};
|
|
4
|
-
if (options.includeStopCodon) {
|
|
5
|
-
//tnrtodo this maybe needs the stop codon char in it?
|
|
6
|
-
return sequenceString?.replace(/[^xtgalmfwkqespvicyhrndu.*]/gi, "");
|
|
7
|
-
}
|
|
8
|
-
// ac.throw(ac.string, sequenceString);
|
|
9
|
-
return sequenceString?.replace(/[^xtgalmfwkqespvicyhrndu]/gi, "");
|
|
10
|
-
}
|
|
@@ -1,24 +0,0 @@
|
|
|
1
|
-
import assert from "assert";
|
|
2
|
-
import filterAminoAcidSequenceString from "./filterAminoAcidSequenceString";
|
|
3
|
-
describe("filterAminoAcidSequenceString", () => {
|
|
4
|
-
it("should filter only valid amino acids by default", () => {
|
|
5
|
-
const filteredString = filterAminoAcidSequenceString(
|
|
6
|
-
'bbb342"""xtgalmfwkqespvicyhrnd,,../'
|
|
7
|
-
);
|
|
8
|
-
assert.equal(filteredString, "xtgalmfwkqespvicyhrnd");
|
|
9
|
-
});
|
|
10
|
-
it("should handle upper case letters", () => {
|
|
11
|
-
const filteredString = filterAminoAcidSequenceString(
|
|
12
|
-
"xtgalmfWKQEspvicyhrnd"
|
|
13
|
-
);
|
|
14
|
-
assert.equal(filteredString, "xtgalmfWKQEspvicyhrnd");
|
|
15
|
-
});
|
|
16
|
-
it("should handle the option to includeStopCodon by allowing periods", () => {
|
|
17
|
-
const options = { includeStopCodon: true };
|
|
18
|
-
const filteredString = filterAminoAcidSequenceString(
|
|
19
|
-
'bbb342"""xtgalmfwkqespvicyhrnd,,../',
|
|
20
|
-
options
|
|
21
|
-
);
|
|
22
|
-
assert.equal(filteredString, "xtgalmfwkqespvicyhrnd..");
|
|
23
|
-
});
|
|
24
|
-
});
|