@teselagen/bio-parsers 0.4.1 → 0.4.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/index.js CHANGED
@@ -6170,7 +6170,7 @@ lodash.exports;
6170
6170
  })(lodash, lodash.exports);
6171
6171
  var lodashExports = lodash.exports;
6172
6172
  const _ = /* @__PURE__ */ getDefaultExportFromCjs(lodashExports);
6173
- const protein_letters_withUandX = "ACDEFGHIKLMNPQRSTVWYUX";
6173
+ const extended_protein_letters = "ACDEFGHIKLMNPQRSTVWYBXZJUO*";
6174
6174
  const ambiguous_dna_letters = "GATCRYWSMKHBVDN";
6175
6175
  const ambiguous_rna_letters = "GAUCRYWSMKHBVDN";
6176
6176
  const aminoAcidToDegenerateDnaMap = {
@@ -11281,15 +11281,13 @@ function filterSequenceString(sequenceString, {
11281
11281
  name,
11282
11282
  isProtein,
11283
11283
  isRna,
11284
- isMixedRnaAndDna,
11285
- includeStopCodon
11284
+ isMixedRnaAndDna
11286
11285
  } = {}) {
11287
11286
  const acceptedChars = getAcceptedChars({
11288
11287
  isOligo,
11289
11288
  isProtein,
11290
11289
  isRna,
11291
- isMixedRnaAndDna,
11292
- includeStopCodon
11290
+ isMixedRnaAndDna
11293
11291
  });
11294
11292
  const replaceChars = getReplaceChars({
11295
11293
  isOligo,
@@ -11339,10 +11337,9 @@ function getAcceptedChars({
11339
11337
  isOligo,
11340
11338
  isProtein,
11341
11339
  isRna,
11342
- isMixedRnaAndDna,
11343
- includeStopCodon
11340
+ isMixedRnaAndDna
11344
11341
  } = {}) {
11345
- return isProtein ? `${protein_letters_withUandX.toLowerCase()}${includeStopCodon ? "*." : ""}}` : isOligo ? ambiguous_rna_letters.toLowerCase() + "t" : isRna ? ambiguous_rna_letters.toLowerCase() + "t" : isMixedRnaAndDna ? ambiguous_rna_letters.toLowerCase() + ambiguous_dna_letters.toLowerCase() : (
11342
+ return isProtein ? `${extended_protein_letters.toLowerCase()}}` : isOligo ? ambiguous_rna_letters.toLowerCase() + "t" : isRna ? ambiguous_rna_letters.toLowerCase() + "t" : isMixedRnaAndDna ? ambiguous_rna_letters.toLowerCase() + ambiguous_dna_letters.toLowerCase() : (
11346
11343
  //just plain old dna
11347
11344
  ambiguous_rna_letters.toLowerCase() + ambiguous_dna_letters.toLowerCase()
11348
11345
  );
@@ -11496,7 +11493,7 @@ function tidyUpSequenceData(pSeqData, options = {}) {
11496
11493
  const {
11497
11494
  annotationsAsObjects,
11498
11495
  logMessages,
11499
- removeUnwantedChars,
11496
+ doNotRemoveInvalidChars,
11500
11497
  additionalValidChars,
11501
11498
  noTranslationData,
11502
11499
  doNotProvideIdsForAnnotations,
@@ -11530,11 +11527,9 @@ function tidyUpSequenceData(pSeqData, options = {}) {
11530
11527
  if (seqData.isRna) {
11531
11528
  seqData.sequence = seqData.sequence.replace(/t/gi, "u");
11532
11529
  }
11533
- if (removeUnwantedChars) {
11530
+ if (!doNotRemoveInvalidChars) {
11534
11531
  if (seqData.isProtein) {
11535
- const [newSeq] = filterSequenceString(seqData.proteinSequence, __spreadValues({
11536
- includeStopCodon: true
11537
- }, topLevelSeqData || seqData));
11532
+ const [newSeq] = filterSequenceString(seqData.proteinSequence, __spreadValues({}, topLevelSeqData || seqData));
11538
11533
  seqData.proteinSequence = newSeq;
11539
11534
  } else {
11540
11535
  const [newSeq] = filterSequenceString(seqData.sequence, __spreadValues({
@@ -19375,8 +19370,7 @@ function validateSequence(sequence, options = {}) {
19375
19370
  inclusive1BasedEnd,
19376
19371
  additionalValidChars,
19377
19372
  allowOverflowAnnotations,
19378
- coerceFeatureTypes,
19379
- includeStopCodon
19373
+ coerceFeatureTypes
19380
19374
  } = options;
19381
19375
  [
19382
19376
  "isDNA",
@@ -19436,8 +19430,7 @@ function validateSequence(sequence, options = {}) {
19436
19430
  const [validChars, warnings] = filterSequenceString(sequence.sequence, {
19437
19431
  name: sequence.name,
19438
19432
  isProtein: true,
19439
- additionalValidChars,
19440
- includeStopCodon
19433
+ additionalValidChars
19441
19434
  });
19442
19435
  if (validChars !== sequence.sequence) {
19443
19436
  sequence.sequence = validChars;
package/index.mjs CHANGED
@@ -6168,7 +6168,7 @@ lodash.exports;
6168
6168
  })(lodash, lodash.exports);
6169
6169
  var lodashExports = lodash.exports;
6170
6170
  const _ = /* @__PURE__ */ getDefaultExportFromCjs(lodashExports);
6171
- const protein_letters_withUandX = "ACDEFGHIKLMNPQRSTVWYUX";
6171
+ const extended_protein_letters = "ACDEFGHIKLMNPQRSTVWYBXZJUO*";
6172
6172
  const ambiguous_dna_letters = "GATCRYWSMKHBVDN";
6173
6173
  const ambiguous_rna_letters = "GAUCRYWSMKHBVDN";
6174
6174
  const aminoAcidToDegenerateDnaMap = {
@@ -11279,15 +11279,13 @@ function filterSequenceString(sequenceString, {
11279
11279
  name,
11280
11280
  isProtein,
11281
11281
  isRna,
11282
- isMixedRnaAndDna,
11283
- includeStopCodon
11282
+ isMixedRnaAndDna
11284
11283
  } = {}) {
11285
11284
  const acceptedChars = getAcceptedChars({
11286
11285
  isOligo,
11287
11286
  isProtein,
11288
11287
  isRna,
11289
- isMixedRnaAndDna,
11290
- includeStopCodon
11288
+ isMixedRnaAndDna
11291
11289
  });
11292
11290
  const replaceChars = getReplaceChars({
11293
11291
  isOligo,
@@ -11337,10 +11335,9 @@ function getAcceptedChars({
11337
11335
  isOligo,
11338
11336
  isProtein,
11339
11337
  isRna,
11340
- isMixedRnaAndDna,
11341
- includeStopCodon
11338
+ isMixedRnaAndDna
11342
11339
  } = {}) {
11343
- return isProtein ? `${protein_letters_withUandX.toLowerCase()}${includeStopCodon ? "*." : ""}}` : isOligo ? ambiguous_rna_letters.toLowerCase() + "t" : isRna ? ambiguous_rna_letters.toLowerCase() + "t" : isMixedRnaAndDna ? ambiguous_rna_letters.toLowerCase() + ambiguous_dna_letters.toLowerCase() : (
11340
+ return isProtein ? `${extended_protein_letters.toLowerCase()}}` : isOligo ? ambiguous_rna_letters.toLowerCase() + "t" : isRna ? ambiguous_rna_letters.toLowerCase() + "t" : isMixedRnaAndDna ? ambiguous_rna_letters.toLowerCase() + ambiguous_dna_letters.toLowerCase() : (
11344
11341
  //just plain old dna
11345
11342
  ambiguous_rna_letters.toLowerCase() + ambiguous_dna_letters.toLowerCase()
11346
11343
  );
@@ -11494,7 +11491,7 @@ function tidyUpSequenceData(pSeqData, options = {}) {
11494
11491
  const {
11495
11492
  annotationsAsObjects,
11496
11493
  logMessages,
11497
- removeUnwantedChars,
11494
+ doNotRemoveInvalidChars,
11498
11495
  additionalValidChars,
11499
11496
  noTranslationData,
11500
11497
  doNotProvideIdsForAnnotations,
@@ -11528,11 +11525,9 @@ function tidyUpSequenceData(pSeqData, options = {}) {
11528
11525
  if (seqData.isRna) {
11529
11526
  seqData.sequence = seqData.sequence.replace(/t/gi, "u");
11530
11527
  }
11531
- if (removeUnwantedChars) {
11528
+ if (!doNotRemoveInvalidChars) {
11532
11529
  if (seqData.isProtein) {
11533
- const [newSeq] = filterSequenceString(seqData.proteinSequence, __spreadValues({
11534
- includeStopCodon: true
11535
- }, topLevelSeqData || seqData));
11530
+ const [newSeq] = filterSequenceString(seqData.proteinSequence, __spreadValues({}, topLevelSeqData || seqData));
11536
11531
  seqData.proteinSequence = newSeq;
11537
11532
  } else {
11538
11533
  const [newSeq] = filterSequenceString(seqData.sequence, __spreadValues({
@@ -19373,8 +19368,7 @@ function validateSequence(sequence, options = {}) {
19373
19368
  inclusive1BasedEnd,
19374
19369
  additionalValidChars,
19375
19370
  allowOverflowAnnotations,
19376
- coerceFeatureTypes,
19377
- includeStopCodon
19371
+ coerceFeatureTypes
19378
19372
  } = options;
19379
19373
  [
19380
19374
  "isDNA",
@@ -19434,8 +19428,7 @@ function validateSequence(sequence, options = {}) {
19434
19428
  const [validChars, warnings] = filterSequenceString(sequence.sequence, {
19435
19429
  name: sequence.name,
19436
19430
  isProtein: true,
19437
- additionalValidChars,
19438
- includeStopCodon
19431
+ additionalValidChars
19439
19432
  });
19440
19433
  if (validChars !== sequence.sequence) {
19441
19434
  sequence.sequence = validChars;
package/index.umd.js CHANGED
@@ -6172,7 +6172,7 @@ var __async = (__this, __arguments, generator) => {
6172
6172
  })(lodash, lodash.exports);
6173
6173
  var lodashExports = lodash.exports;
6174
6174
  const _ = /* @__PURE__ */ getDefaultExportFromCjs(lodashExports);
6175
- const protein_letters_withUandX = "ACDEFGHIKLMNPQRSTVWYUX";
6175
+ const extended_protein_letters = "ACDEFGHIKLMNPQRSTVWYBXZJUO*";
6176
6176
  const ambiguous_dna_letters = "GATCRYWSMKHBVDN";
6177
6177
  const ambiguous_rna_letters = "GAUCRYWSMKHBVDN";
6178
6178
  const aminoAcidToDegenerateDnaMap = {
@@ -11283,15 +11283,13 @@ var __async = (__this, __arguments, generator) => {
11283
11283
  name: name2,
11284
11284
  isProtein,
11285
11285
  isRna,
11286
- isMixedRnaAndDna,
11287
- includeStopCodon
11286
+ isMixedRnaAndDna
11288
11287
  } = {}) {
11289
11288
  const acceptedChars = getAcceptedChars({
11290
11289
  isOligo,
11291
11290
  isProtein,
11292
11291
  isRna,
11293
- isMixedRnaAndDna,
11294
- includeStopCodon
11292
+ isMixedRnaAndDna
11295
11293
  });
11296
11294
  const replaceChars = getReplaceChars({
11297
11295
  isOligo,
@@ -11341,10 +11339,9 @@ var __async = (__this, __arguments, generator) => {
11341
11339
  isOligo,
11342
11340
  isProtein,
11343
11341
  isRna,
11344
- isMixedRnaAndDna,
11345
- includeStopCodon
11342
+ isMixedRnaAndDna
11346
11343
  } = {}) {
11347
- return isProtein ? `${protein_letters_withUandX.toLowerCase()}${includeStopCodon ? "*." : ""}}` : isOligo ? ambiguous_rna_letters.toLowerCase() + "t" : isRna ? ambiguous_rna_letters.toLowerCase() + "t" : isMixedRnaAndDna ? ambiguous_rna_letters.toLowerCase() + ambiguous_dna_letters.toLowerCase() : (
11344
+ return isProtein ? `${extended_protein_letters.toLowerCase()}}` : isOligo ? ambiguous_rna_letters.toLowerCase() + "t" : isRna ? ambiguous_rna_letters.toLowerCase() + "t" : isMixedRnaAndDna ? ambiguous_rna_letters.toLowerCase() + ambiguous_dna_letters.toLowerCase() : (
11348
11345
  //just plain old dna
11349
11346
  ambiguous_rna_letters.toLowerCase() + ambiguous_dna_letters.toLowerCase()
11350
11347
  );
@@ -11498,7 +11495,7 @@ var __async = (__this, __arguments, generator) => {
11498
11495
  const {
11499
11496
  annotationsAsObjects,
11500
11497
  logMessages,
11501
- removeUnwantedChars,
11498
+ doNotRemoveInvalidChars,
11502
11499
  additionalValidChars,
11503
11500
  noTranslationData,
11504
11501
  doNotProvideIdsForAnnotations,
@@ -11532,11 +11529,9 @@ var __async = (__this, __arguments, generator) => {
11532
11529
  if (seqData.isRna) {
11533
11530
  seqData.sequence = seqData.sequence.replace(/t/gi, "u");
11534
11531
  }
11535
- if (removeUnwantedChars) {
11532
+ if (!doNotRemoveInvalidChars) {
11536
11533
  if (seqData.isProtein) {
11537
- const [newSeq] = filterSequenceString(seqData.proteinSequence, __spreadValues({
11538
- includeStopCodon: true
11539
- }, topLevelSeqData || seqData));
11534
+ const [newSeq] = filterSequenceString(seqData.proteinSequence, __spreadValues({}, topLevelSeqData || seqData));
11540
11535
  seqData.proteinSequence = newSeq;
11541
11536
  } else {
11542
11537
  const [newSeq] = filterSequenceString(seqData.sequence, __spreadValues({
@@ -19377,8 +19372,7 @@ var __async = (__this, __arguments, generator) => {
19377
19372
  inclusive1BasedEnd,
19378
19373
  additionalValidChars,
19379
19374
  allowOverflowAnnotations,
19380
- coerceFeatureTypes,
19381
- includeStopCodon
19375
+ coerceFeatureTypes
19382
19376
  } = options;
19383
19377
  [
19384
19378
  "isDNA",
@@ -19438,8 +19432,7 @@ var __async = (__this, __arguments, generator) => {
19438
19432
  const [validChars, warnings] = filterSequenceString(sequence.sequence, {
19439
19433
  name: sequence.name,
19440
19434
  isProtein: true,
19441
- additionalValidChars,
19442
- includeStopCodon
19435
+ additionalValidChars
19443
19436
  });
19444
19437
  if (validChars !== sequence.sequence) {
19445
19438
  sequence.sequence = validChars;
package/package.json CHANGED
@@ -1,8 +1,8 @@
1
1
  {
2
2
  "name": "@teselagen/bio-parsers",
3
- "version": "0.4.1",
3
+ "version": "0.4.2",
4
4
  "dependencies": {
5
- "@teselagen/sequence-utils": "0.3.9",
5
+ "@teselagen/sequence-utils": "0.3.10",
6
6
  "@teselagen/range-utils": "0.3.7",
7
7
  "@gmod/gff": "^1.2.1",
8
8
  "buffer": "^6.0.3",
@@ -30,7 +30,6 @@ export default function validateSequence(sequence, options = {}) {
30
30
  additionalValidChars,
31
31
  allowOverflowAnnotations,
32
32
  coerceFeatureTypes,
33
- includeStopCodon
34
33
  } = options;
35
34
  [
36
35
  "isDNA",
@@ -97,7 +96,6 @@ export default function validateSequence(sequence, options = {}) {
97
96
  name: sequence.name,
98
97
  isProtein: true,
99
98
  additionalValidChars,
100
- includeStopCodon
101
99
  });
102
100
  if (validChars !== sequence.sequence) {
103
101
  sequence.sequence = validChars;