@teselagen/bio-parsers 0.4.1 → 0.4.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/index.js +16 -20
- package/index.mjs +16 -20
- package/index.umd.js +16 -20
- package/package.json +2 -2
- package/src/utils/validateSequence.js +2 -4
package/index.js
CHANGED
|
@@ -6170,7 +6170,7 @@ lodash.exports;
|
|
|
6170
6170
|
})(lodash, lodash.exports);
|
|
6171
6171
|
var lodashExports = lodash.exports;
|
|
6172
6172
|
const _ = /* @__PURE__ */ getDefaultExportFromCjs(lodashExports);
|
|
6173
|
-
const
|
|
6173
|
+
const extended_protein_letters = "ACDEFGHIKLMNPQRSTVWYBXZJUO";
|
|
6174
6174
|
const ambiguous_dna_letters = "GATCRYWSMKHBVDN";
|
|
6175
6175
|
const ambiguous_rna_letters = "GAUCRYWSMKHBVDN";
|
|
6176
6176
|
const aminoAcidToDegenerateDnaMap = {
|
|
@@ -11281,15 +11281,13 @@ function filterSequenceString(sequenceString, {
|
|
|
11281
11281
|
name,
|
|
11282
11282
|
isProtein,
|
|
11283
11283
|
isRna,
|
|
11284
|
-
isMixedRnaAndDna
|
|
11285
|
-
includeStopCodon
|
|
11284
|
+
isMixedRnaAndDna
|
|
11286
11285
|
} = {}) {
|
|
11287
11286
|
const acceptedChars = getAcceptedChars({
|
|
11288
11287
|
isOligo,
|
|
11289
11288
|
isProtein,
|
|
11290
11289
|
isRna,
|
|
11291
|
-
isMixedRnaAndDna
|
|
11292
|
-
includeStopCodon
|
|
11290
|
+
isMixedRnaAndDna
|
|
11293
11291
|
});
|
|
11294
11292
|
const replaceChars = getReplaceChars({
|
|
11295
11293
|
isOligo,
|
|
@@ -11339,10 +11337,9 @@ function getAcceptedChars({
|
|
|
11339
11337
|
isOligo,
|
|
11340
11338
|
isProtein,
|
|
11341
11339
|
isRna,
|
|
11342
|
-
isMixedRnaAndDna
|
|
11343
|
-
includeStopCodon
|
|
11340
|
+
isMixedRnaAndDna
|
|
11344
11341
|
} = {}) {
|
|
11345
|
-
return isProtein ? `${
|
|
11342
|
+
return isProtein ? `${extended_protein_letters.toLowerCase()}}` : isOligo ? ambiguous_rna_letters.toLowerCase() + "t" : isRna ? ambiguous_rna_letters.toLowerCase() + "t" : isMixedRnaAndDna ? ambiguous_rna_letters.toLowerCase() + ambiguous_dna_letters.toLowerCase() : (
|
|
11346
11343
|
//just plain old dna
|
|
11347
11344
|
ambiguous_rna_letters.toLowerCase() + ambiguous_dna_letters.toLowerCase()
|
|
11348
11345
|
);
|
|
@@ -11354,9 +11351,12 @@ function getReplaceChars({
|
|
|
11354
11351
|
isRna,
|
|
11355
11352
|
isMixedRnaAndDna
|
|
11356
11353
|
} = {}) {
|
|
11357
|
-
return isProtein ? {} :
|
|
11358
|
-
//
|
|
11359
|
-
{}
|
|
11354
|
+
return isProtein ? {} : (
|
|
11355
|
+
// {".": "*"}
|
|
11356
|
+
isOligo ? {} : isRna ? { t: "u" } : isMixedRnaAndDna ? {} : (
|
|
11357
|
+
//just plain old dna
|
|
11358
|
+
{}
|
|
11359
|
+
)
|
|
11360
11360
|
);
|
|
11361
11361
|
}
|
|
11362
11362
|
__name(getReplaceChars, "getReplaceChars");
|
|
@@ -11496,7 +11496,7 @@ function tidyUpSequenceData(pSeqData, options = {}) {
|
|
|
11496
11496
|
const {
|
|
11497
11497
|
annotationsAsObjects,
|
|
11498
11498
|
logMessages,
|
|
11499
|
-
|
|
11499
|
+
doNotRemoveInvalidChars,
|
|
11500
11500
|
additionalValidChars,
|
|
11501
11501
|
noTranslationData,
|
|
11502
11502
|
doNotProvideIdsForAnnotations,
|
|
@@ -11530,11 +11530,9 @@ function tidyUpSequenceData(pSeqData, options = {}) {
|
|
|
11530
11530
|
if (seqData.isRna) {
|
|
11531
11531
|
seqData.sequence = seqData.sequence.replace(/t/gi, "u");
|
|
11532
11532
|
}
|
|
11533
|
-
if (
|
|
11533
|
+
if (!doNotRemoveInvalidChars) {
|
|
11534
11534
|
if (seqData.isProtein) {
|
|
11535
|
-
const [newSeq] = filterSequenceString(seqData.proteinSequence, __spreadValues({
|
|
11536
|
-
includeStopCodon: true
|
|
11537
|
-
}, topLevelSeqData || seqData));
|
|
11535
|
+
const [newSeq] = filterSequenceString(seqData.proteinSequence, __spreadValues({}, topLevelSeqData || seqData));
|
|
11538
11536
|
seqData.proteinSequence = newSeq;
|
|
11539
11537
|
} else {
|
|
11540
11538
|
const [newSeq] = filterSequenceString(seqData.sequence, __spreadValues({
|
|
@@ -19375,8 +19373,7 @@ function validateSequence(sequence, options = {}) {
|
|
|
19375
19373
|
inclusive1BasedEnd,
|
|
19376
19374
|
additionalValidChars,
|
|
19377
19375
|
allowOverflowAnnotations,
|
|
19378
|
-
coerceFeatureTypes
|
|
19379
|
-
includeStopCodon
|
|
19376
|
+
coerceFeatureTypes
|
|
19380
19377
|
} = options;
|
|
19381
19378
|
[
|
|
19382
19379
|
"isDNA",
|
|
@@ -19436,8 +19433,7 @@ function validateSequence(sequence, options = {}) {
|
|
|
19436
19433
|
const [validChars, warnings] = filterSequenceString(sequence.sequence, {
|
|
19437
19434
|
name: sequence.name,
|
|
19438
19435
|
isProtein: true,
|
|
19439
|
-
additionalValidChars
|
|
19440
|
-
includeStopCodon
|
|
19436
|
+
additionalValidChars
|
|
19441
19437
|
});
|
|
19442
19438
|
if (validChars !== sequence.sequence) {
|
|
19443
19439
|
sequence.sequence = validChars;
|
package/index.mjs
CHANGED
|
@@ -6168,7 +6168,7 @@ lodash.exports;
|
|
|
6168
6168
|
})(lodash, lodash.exports);
|
|
6169
6169
|
var lodashExports = lodash.exports;
|
|
6170
6170
|
const _ = /* @__PURE__ */ getDefaultExportFromCjs(lodashExports);
|
|
6171
|
-
const
|
|
6171
|
+
const extended_protein_letters = "ACDEFGHIKLMNPQRSTVWYBXZJUO";
|
|
6172
6172
|
const ambiguous_dna_letters = "GATCRYWSMKHBVDN";
|
|
6173
6173
|
const ambiguous_rna_letters = "GAUCRYWSMKHBVDN";
|
|
6174
6174
|
const aminoAcidToDegenerateDnaMap = {
|
|
@@ -11279,15 +11279,13 @@ function filterSequenceString(sequenceString, {
|
|
|
11279
11279
|
name,
|
|
11280
11280
|
isProtein,
|
|
11281
11281
|
isRna,
|
|
11282
|
-
isMixedRnaAndDna
|
|
11283
|
-
includeStopCodon
|
|
11282
|
+
isMixedRnaAndDna
|
|
11284
11283
|
} = {}) {
|
|
11285
11284
|
const acceptedChars = getAcceptedChars({
|
|
11286
11285
|
isOligo,
|
|
11287
11286
|
isProtein,
|
|
11288
11287
|
isRna,
|
|
11289
|
-
isMixedRnaAndDna
|
|
11290
|
-
includeStopCodon
|
|
11288
|
+
isMixedRnaAndDna
|
|
11291
11289
|
});
|
|
11292
11290
|
const replaceChars = getReplaceChars({
|
|
11293
11291
|
isOligo,
|
|
@@ -11337,10 +11335,9 @@ function getAcceptedChars({
|
|
|
11337
11335
|
isOligo,
|
|
11338
11336
|
isProtein,
|
|
11339
11337
|
isRna,
|
|
11340
|
-
isMixedRnaAndDna
|
|
11341
|
-
includeStopCodon
|
|
11338
|
+
isMixedRnaAndDna
|
|
11342
11339
|
} = {}) {
|
|
11343
|
-
return isProtein ? `${
|
|
11340
|
+
return isProtein ? `${extended_protein_letters.toLowerCase()}}` : isOligo ? ambiguous_rna_letters.toLowerCase() + "t" : isRna ? ambiguous_rna_letters.toLowerCase() + "t" : isMixedRnaAndDna ? ambiguous_rna_letters.toLowerCase() + ambiguous_dna_letters.toLowerCase() : (
|
|
11344
11341
|
//just plain old dna
|
|
11345
11342
|
ambiguous_rna_letters.toLowerCase() + ambiguous_dna_letters.toLowerCase()
|
|
11346
11343
|
);
|
|
@@ -11352,9 +11349,12 @@ function getReplaceChars({
|
|
|
11352
11349
|
isRna,
|
|
11353
11350
|
isMixedRnaAndDna
|
|
11354
11351
|
} = {}) {
|
|
11355
|
-
return isProtein ? {} :
|
|
11356
|
-
//
|
|
11357
|
-
{}
|
|
11352
|
+
return isProtein ? {} : (
|
|
11353
|
+
// {".": "*"}
|
|
11354
|
+
isOligo ? {} : isRna ? { t: "u" } : isMixedRnaAndDna ? {} : (
|
|
11355
|
+
//just plain old dna
|
|
11356
|
+
{}
|
|
11357
|
+
)
|
|
11358
11358
|
);
|
|
11359
11359
|
}
|
|
11360
11360
|
__name(getReplaceChars, "getReplaceChars");
|
|
@@ -11494,7 +11494,7 @@ function tidyUpSequenceData(pSeqData, options = {}) {
|
|
|
11494
11494
|
const {
|
|
11495
11495
|
annotationsAsObjects,
|
|
11496
11496
|
logMessages,
|
|
11497
|
-
|
|
11497
|
+
doNotRemoveInvalidChars,
|
|
11498
11498
|
additionalValidChars,
|
|
11499
11499
|
noTranslationData,
|
|
11500
11500
|
doNotProvideIdsForAnnotations,
|
|
@@ -11528,11 +11528,9 @@ function tidyUpSequenceData(pSeqData, options = {}) {
|
|
|
11528
11528
|
if (seqData.isRna) {
|
|
11529
11529
|
seqData.sequence = seqData.sequence.replace(/t/gi, "u");
|
|
11530
11530
|
}
|
|
11531
|
-
if (
|
|
11531
|
+
if (!doNotRemoveInvalidChars) {
|
|
11532
11532
|
if (seqData.isProtein) {
|
|
11533
|
-
const [newSeq] = filterSequenceString(seqData.proteinSequence, __spreadValues({
|
|
11534
|
-
includeStopCodon: true
|
|
11535
|
-
}, topLevelSeqData || seqData));
|
|
11533
|
+
const [newSeq] = filterSequenceString(seqData.proteinSequence, __spreadValues({}, topLevelSeqData || seqData));
|
|
11536
11534
|
seqData.proteinSequence = newSeq;
|
|
11537
11535
|
} else {
|
|
11538
11536
|
const [newSeq] = filterSequenceString(seqData.sequence, __spreadValues({
|
|
@@ -19373,8 +19371,7 @@ function validateSequence(sequence, options = {}) {
|
|
|
19373
19371
|
inclusive1BasedEnd,
|
|
19374
19372
|
additionalValidChars,
|
|
19375
19373
|
allowOverflowAnnotations,
|
|
19376
|
-
coerceFeatureTypes
|
|
19377
|
-
includeStopCodon
|
|
19374
|
+
coerceFeatureTypes
|
|
19378
19375
|
} = options;
|
|
19379
19376
|
[
|
|
19380
19377
|
"isDNA",
|
|
@@ -19434,8 +19431,7 @@ function validateSequence(sequence, options = {}) {
|
|
|
19434
19431
|
const [validChars, warnings] = filterSequenceString(sequence.sequence, {
|
|
19435
19432
|
name: sequence.name,
|
|
19436
19433
|
isProtein: true,
|
|
19437
|
-
additionalValidChars
|
|
19438
|
-
includeStopCodon
|
|
19434
|
+
additionalValidChars
|
|
19439
19435
|
});
|
|
19440
19436
|
if (validChars !== sequence.sequence) {
|
|
19441
19437
|
sequence.sequence = validChars;
|
package/index.umd.js
CHANGED
|
@@ -6172,7 +6172,7 @@ var __async = (__this, __arguments, generator) => {
|
|
|
6172
6172
|
})(lodash, lodash.exports);
|
|
6173
6173
|
var lodashExports = lodash.exports;
|
|
6174
6174
|
const _ = /* @__PURE__ */ getDefaultExportFromCjs(lodashExports);
|
|
6175
|
-
const
|
|
6175
|
+
const extended_protein_letters = "ACDEFGHIKLMNPQRSTVWYBXZJUO";
|
|
6176
6176
|
const ambiguous_dna_letters = "GATCRYWSMKHBVDN";
|
|
6177
6177
|
const ambiguous_rna_letters = "GAUCRYWSMKHBVDN";
|
|
6178
6178
|
const aminoAcidToDegenerateDnaMap = {
|
|
@@ -11283,15 +11283,13 @@ var __async = (__this, __arguments, generator) => {
|
|
|
11283
11283
|
name: name2,
|
|
11284
11284
|
isProtein,
|
|
11285
11285
|
isRna,
|
|
11286
|
-
isMixedRnaAndDna
|
|
11287
|
-
includeStopCodon
|
|
11286
|
+
isMixedRnaAndDna
|
|
11288
11287
|
} = {}) {
|
|
11289
11288
|
const acceptedChars = getAcceptedChars({
|
|
11290
11289
|
isOligo,
|
|
11291
11290
|
isProtein,
|
|
11292
11291
|
isRna,
|
|
11293
|
-
isMixedRnaAndDna
|
|
11294
|
-
includeStopCodon
|
|
11292
|
+
isMixedRnaAndDna
|
|
11295
11293
|
});
|
|
11296
11294
|
const replaceChars = getReplaceChars({
|
|
11297
11295
|
isOligo,
|
|
@@ -11341,10 +11339,9 @@ var __async = (__this, __arguments, generator) => {
|
|
|
11341
11339
|
isOligo,
|
|
11342
11340
|
isProtein,
|
|
11343
11341
|
isRna,
|
|
11344
|
-
isMixedRnaAndDna
|
|
11345
|
-
includeStopCodon
|
|
11342
|
+
isMixedRnaAndDna
|
|
11346
11343
|
} = {}) {
|
|
11347
|
-
return isProtein ? `${
|
|
11344
|
+
return isProtein ? `${extended_protein_letters.toLowerCase()}}` : isOligo ? ambiguous_rna_letters.toLowerCase() + "t" : isRna ? ambiguous_rna_letters.toLowerCase() + "t" : isMixedRnaAndDna ? ambiguous_rna_letters.toLowerCase() + ambiguous_dna_letters.toLowerCase() : (
|
|
11348
11345
|
//just plain old dna
|
|
11349
11346
|
ambiguous_rna_letters.toLowerCase() + ambiguous_dna_letters.toLowerCase()
|
|
11350
11347
|
);
|
|
@@ -11356,9 +11353,12 @@ var __async = (__this, __arguments, generator) => {
|
|
|
11356
11353
|
isRna,
|
|
11357
11354
|
isMixedRnaAndDna
|
|
11358
11355
|
} = {}) {
|
|
11359
|
-
return isProtein ? {} :
|
|
11360
|
-
//
|
|
11361
|
-
{}
|
|
11356
|
+
return isProtein ? {} : (
|
|
11357
|
+
// {".": "*"}
|
|
11358
|
+
isOligo ? {} : isRna ? { t: "u" } : isMixedRnaAndDna ? {} : (
|
|
11359
|
+
//just plain old dna
|
|
11360
|
+
{}
|
|
11361
|
+
)
|
|
11362
11362
|
);
|
|
11363
11363
|
}
|
|
11364
11364
|
__name(getReplaceChars, "getReplaceChars");
|
|
@@ -11498,7 +11498,7 @@ var __async = (__this, __arguments, generator) => {
|
|
|
11498
11498
|
const {
|
|
11499
11499
|
annotationsAsObjects,
|
|
11500
11500
|
logMessages,
|
|
11501
|
-
|
|
11501
|
+
doNotRemoveInvalidChars,
|
|
11502
11502
|
additionalValidChars,
|
|
11503
11503
|
noTranslationData,
|
|
11504
11504
|
doNotProvideIdsForAnnotations,
|
|
@@ -11532,11 +11532,9 @@ var __async = (__this, __arguments, generator) => {
|
|
|
11532
11532
|
if (seqData.isRna) {
|
|
11533
11533
|
seqData.sequence = seqData.sequence.replace(/t/gi, "u");
|
|
11534
11534
|
}
|
|
11535
|
-
if (
|
|
11535
|
+
if (!doNotRemoveInvalidChars) {
|
|
11536
11536
|
if (seqData.isProtein) {
|
|
11537
|
-
const [newSeq] = filterSequenceString(seqData.proteinSequence, __spreadValues({
|
|
11538
|
-
includeStopCodon: true
|
|
11539
|
-
}, topLevelSeqData || seqData));
|
|
11537
|
+
const [newSeq] = filterSequenceString(seqData.proteinSequence, __spreadValues({}, topLevelSeqData || seqData));
|
|
11540
11538
|
seqData.proteinSequence = newSeq;
|
|
11541
11539
|
} else {
|
|
11542
11540
|
const [newSeq] = filterSequenceString(seqData.sequence, __spreadValues({
|
|
@@ -19377,8 +19375,7 @@ var __async = (__this, __arguments, generator) => {
|
|
|
19377
19375
|
inclusive1BasedEnd,
|
|
19378
19376
|
additionalValidChars,
|
|
19379
19377
|
allowOverflowAnnotations,
|
|
19380
|
-
coerceFeatureTypes
|
|
19381
|
-
includeStopCodon
|
|
19378
|
+
coerceFeatureTypes
|
|
19382
19379
|
} = options;
|
|
19383
19380
|
[
|
|
19384
19381
|
"isDNA",
|
|
@@ -19438,8 +19435,7 @@ var __async = (__this, __arguments, generator) => {
|
|
|
19438
19435
|
const [validChars, warnings] = filterSequenceString(sequence.sequence, {
|
|
19439
19436
|
name: sequence.name,
|
|
19440
19437
|
isProtein: true,
|
|
19441
|
-
additionalValidChars
|
|
19442
|
-
includeStopCodon
|
|
19438
|
+
additionalValidChars
|
|
19443
19439
|
});
|
|
19444
19440
|
if (validChars !== sequence.sequence) {
|
|
19445
19441
|
sequence.sequence = validChars;
|
package/package.json
CHANGED
|
@@ -1,8 +1,8 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@teselagen/bio-parsers",
|
|
3
|
-
"version": "0.4.
|
|
3
|
+
"version": "0.4.3",
|
|
4
4
|
"dependencies": {
|
|
5
|
-
"@teselagen/sequence-utils": "0.3.
|
|
5
|
+
"@teselagen/sequence-utils": "0.3.11",
|
|
6
6
|
"@teselagen/range-utils": "0.3.7",
|
|
7
7
|
"@gmod/gff": "^1.2.1",
|
|
8
8
|
"buffer": "^6.0.3",
|
|
@@ -29,8 +29,7 @@ export default function validateSequence(sequence, options = {}) {
|
|
|
29
29
|
inclusive1BasedEnd,
|
|
30
30
|
additionalValidChars,
|
|
31
31
|
allowOverflowAnnotations,
|
|
32
|
-
coerceFeatureTypes
|
|
33
|
-
includeStopCodon
|
|
32
|
+
coerceFeatureTypes
|
|
34
33
|
} = options;
|
|
35
34
|
[
|
|
36
35
|
"isDNA",
|
|
@@ -96,8 +95,7 @@ export default function validateSequence(sequence, options = {}) {
|
|
|
96
95
|
const [validChars, warnings] = filterSequenceString(sequence.sequence, {
|
|
97
96
|
name: sequence.name,
|
|
98
97
|
isProtein: true,
|
|
99
|
-
additionalValidChars
|
|
100
|
-
includeStopCodon
|
|
98
|
+
additionalValidChars
|
|
101
99
|
});
|
|
102
100
|
if (validChars !== sequence.sequence) {
|
|
103
101
|
sequence.sequence = validChars;
|