@teselagen/bio-parsers 0.4.1 → 0.4.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/index.js CHANGED
@@ -6170,7 +6170,7 @@ lodash.exports;
6170
6170
  })(lodash, lodash.exports);
6171
6171
  var lodashExports = lodash.exports;
6172
6172
  const _ = /* @__PURE__ */ getDefaultExportFromCjs(lodashExports);
6173
- const protein_letters_withUandX = "ACDEFGHIKLMNPQRSTVWYUX";
6173
+ const extended_protein_letters = "ACDEFGHIKLMNPQRSTVWYBXZJUO";
6174
6174
  const ambiguous_dna_letters = "GATCRYWSMKHBVDN";
6175
6175
  const ambiguous_rna_letters = "GAUCRYWSMKHBVDN";
6176
6176
  const aminoAcidToDegenerateDnaMap = {
@@ -11281,15 +11281,13 @@ function filterSequenceString(sequenceString, {
11281
11281
  name,
11282
11282
  isProtein,
11283
11283
  isRna,
11284
- isMixedRnaAndDna,
11285
- includeStopCodon
11284
+ isMixedRnaAndDna
11286
11285
  } = {}) {
11287
11286
  const acceptedChars = getAcceptedChars({
11288
11287
  isOligo,
11289
11288
  isProtein,
11290
11289
  isRna,
11291
- isMixedRnaAndDna,
11292
- includeStopCodon
11290
+ isMixedRnaAndDna
11293
11291
  });
11294
11292
  const replaceChars = getReplaceChars({
11295
11293
  isOligo,
@@ -11339,10 +11337,9 @@ function getAcceptedChars({
11339
11337
  isOligo,
11340
11338
  isProtein,
11341
11339
  isRna,
11342
- isMixedRnaAndDna,
11343
- includeStopCodon
11340
+ isMixedRnaAndDna
11344
11341
  } = {}) {
11345
- return isProtein ? `${protein_letters_withUandX.toLowerCase()}${includeStopCodon ? "*." : ""}}` : isOligo ? ambiguous_rna_letters.toLowerCase() + "t" : isRna ? ambiguous_rna_letters.toLowerCase() + "t" : isMixedRnaAndDna ? ambiguous_rna_letters.toLowerCase() + ambiguous_dna_letters.toLowerCase() : (
11342
+ return isProtein ? `${extended_protein_letters.toLowerCase()}}` : isOligo ? ambiguous_rna_letters.toLowerCase() + "t" : isRna ? ambiguous_rna_letters.toLowerCase() + "t" : isMixedRnaAndDna ? ambiguous_rna_letters.toLowerCase() + ambiguous_dna_letters.toLowerCase() : (
11346
11343
  //just plain old dna
11347
11344
  ambiguous_rna_letters.toLowerCase() + ambiguous_dna_letters.toLowerCase()
11348
11345
  );
@@ -11354,9 +11351,12 @@ function getReplaceChars({
11354
11351
  isRna,
11355
11352
  isMixedRnaAndDna
11356
11353
  } = {}) {
11357
- return isProtein ? {} : isOligo ? {} : isRna ? { t: "u" } : isMixedRnaAndDna ? {} : (
11358
- //just plain old dna
11359
- {}
11354
+ return isProtein ? {} : (
11355
+ // {".": "*"}
11356
+ isOligo ? {} : isRna ? { t: "u" } : isMixedRnaAndDna ? {} : (
11357
+ //just plain old dna
11358
+ {}
11359
+ )
11360
11360
  );
11361
11361
  }
11362
11362
  __name(getReplaceChars, "getReplaceChars");
@@ -11496,7 +11496,7 @@ function tidyUpSequenceData(pSeqData, options = {}) {
11496
11496
  const {
11497
11497
  annotationsAsObjects,
11498
11498
  logMessages,
11499
- removeUnwantedChars,
11499
+ doNotRemoveInvalidChars,
11500
11500
  additionalValidChars,
11501
11501
  noTranslationData,
11502
11502
  doNotProvideIdsForAnnotations,
@@ -11530,11 +11530,9 @@ function tidyUpSequenceData(pSeqData, options = {}) {
11530
11530
  if (seqData.isRna) {
11531
11531
  seqData.sequence = seqData.sequence.replace(/t/gi, "u");
11532
11532
  }
11533
- if (removeUnwantedChars) {
11533
+ if (!doNotRemoveInvalidChars) {
11534
11534
  if (seqData.isProtein) {
11535
- const [newSeq] = filterSequenceString(seqData.proteinSequence, __spreadValues({
11536
- includeStopCodon: true
11537
- }, topLevelSeqData || seqData));
11535
+ const [newSeq] = filterSequenceString(seqData.proteinSequence, __spreadValues({}, topLevelSeqData || seqData));
11538
11536
  seqData.proteinSequence = newSeq;
11539
11537
  } else {
11540
11538
  const [newSeq] = filterSequenceString(seqData.sequence, __spreadValues({
@@ -19375,8 +19373,7 @@ function validateSequence(sequence, options = {}) {
19375
19373
  inclusive1BasedEnd,
19376
19374
  additionalValidChars,
19377
19375
  allowOverflowAnnotations,
19378
- coerceFeatureTypes,
19379
- includeStopCodon
19376
+ coerceFeatureTypes
19380
19377
  } = options;
19381
19378
  [
19382
19379
  "isDNA",
@@ -19436,8 +19433,7 @@ function validateSequence(sequence, options = {}) {
19436
19433
  const [validChars, warnings] = filterSequenceString(sequence.sequence, {
19437
19434
  name: sequence.name,
19438
19435
  isProtein: true,
19439
- additionalValidChars,
19440
- includeStopCodon
19436
+ additionalValidChars
19441
19437
  });
19442
19438
  if (validChars !== sequence.sequence) {
19443
19439
  sequence.sequence = validChars;
package/index.mjs CHANGED
@@ -6168,7 +6168,7 @@ lodash.exports;
6168
6168
  })(lodash, lodash.exports);
6169
6169
  var lodashExports = lodash.exports;
6170
6170
  const _ = /* @__PURE__ */ getDefaultExportFromCjs(lodashExports);
6171
- const protein_letters_withUandX = "ACDEFGHIKLMNPQRSTVWYUX";
6171
+ const extended_protein_letters = "ACDEFGHIKLMNPQRSTVWYBXZJUO";
6172
6172
  const ambiguous_dna_letters = "GATCRYWSMKHBVDN";
6173
6173
  const ambiguous_rna_letters = "GAUCRYWSMKHBVDN";
6174
6174
  const aminoAcidToDegenerateDnaMap = {
@@ -11279,15 +11279,13 @@ function filterSequenceString(sequenceString, {
11279
11279
  name,
11280
11280
  isProtein,
11281
11281
  isRna,
11282
- isMixedRnaAndDna,
11283
- includeStopCodon
11282
+ isMixedRnaAndDna
11284
11283
  } = {}) {
11285
11284
  const acceptedChars = getAcceptedChars({
11286
11285
  isOligo,
11287
11286
  isProtein,
11288
11287
  isRna,
11289
- isMixedRnaAndDna,
11290
- includeStopCodon
11288
+ isMixedRnaAndDna
11291
11289
  });
11292
11290
  const replaceChars = getReplaceChars({
11293
11291
  isOligo,
@@ -11337,10 +11335,9 @@ function getAcceptedChars({
11337
11335
  isOligo,
11338
11336
  isProtein,
11339
11337
  isRna,
11340
- isMixedRnaAndDna,
11341
- includeStopCodon
11338
+ isMixedRnaAndDna
11342
11339
  } = {}) {
11343
- return isProtein ? `${protein_letters_withUandX.toLowerCase()}${includeStopCodon ? "*." : ""}}` : isOligo ? ambiguous_rna_letters.toLowerCase() + "t" : isRna ? ambiguous_rna_letters.toLowerCase() + "t" : isMixedRnaAndDna ? ambiguous_rna_letters.toLowerCase() + ambiguous_dna_letters.toLowerCase() : (
11340
+ return isProtein ? `${extended_protein_letters.toLowerCase()}}` : isOligo ? ambiguous_rna_letters.toLowerCase() + "t" : isRna ? ambiguous_rna_letters.toLowerCase() + "t" : isMixedRnaAndDna ? ambiguous_rna_letters.toLowerCase() + ambiguous_dna_letters.toLowerCase() : (
11344
11341
  //just plain old dna
11345
11342
  ambiguous_rna_letters.toLowerCase() + ambiguous_dna_letters.toLowerCase()
11346
11343
  );
@@ -11352,9 +11349,12 @@ function getReplaceChars({
11352
11349
  isRna,
11353
11350
  isMixedRnaAndDna
11354
11351
  } = {}) {
11355
- return isProtein ? {} : isOligo ? {} : isRna ? { t: "u" } : isMixedRnaAndDna ? {} : (
11356
- //just plain old dna
11357
- {}
11352
+ return isProtein ? {} : (
11353
+ // {".": "*"}
11354
+ isOligo ? {} : isRna ? { t: "u" } : isMixedRnaAndDna ? {} : (
11355
+ //just plain old dna
11356
+ {}
11357
+ )
11358
11358
  );
11359
11359
  }
11360
11360
  __name(getReplaceChars, "getReplaceChars");
@@ -11494,7 +11494,7 @@ function tidyUpSequenceData(pSeqData, options = {}) {
11494
11494
  const {
11495
11495
  annotationsAsObjects,
11496
11496
  logMessages,
11497
- removeUnwantedChars,
11497
+ doNotRemoveInvalidChars,
11498
11498
  additionalValidChars,
11499
11499
  noTranslationData,
11500
11500
  doNotProvideIdsForAnnotations,
@@ -11528,11 +11528,9 @@ function tidyUpSequenceData(pSeqData, options = {}) {
11528
11528
  if (seqData.isRna) {
11529
11529
  seqData.sequence = seqData.sequence.replace(/t/gi, "u");
11530
11530
  }
11531
- if (removeUnwantedChars) {
11531
+ if (!doNotRemoveInvalidChars) {
11532
11532
  if (seqData.isProtein) {
11533
- const [newSeq] = filterSequenceString(seqData.proteinSequence, __spreadValues({
11534
- includeStopCodon: true
11535
- }, topLevelSeqData || seqData));
11533
+ const [newSeq] = filterSequenceString(seqData.proteinSequence, __spreadValues({}, topLevelSeqData || seqData));
11536
11534
  seqData.proteinSequence = newSeq;
11537
11535
  } else {
11538
11536
  const [newSeq] = filterSequenceString(seqData.sequence, __spreadValues({
@@ -19373,8 +19371,7 @@ function validateSequence(sequence, options = {}) {
19373
19371
  inclusive1BasedEnd,
19374
19372
  additionalValidChars,
19375
19373
  allowOverflowAnnotations,
19376
- coerceFeatureTypes,
19377
- includeStopCodon
19374
+ coerceFeatureTypes
19378
19375
  } = options;
19379
19376
  [
19380
19377
  "isDNA",
@@ -19434,8 +19431,7 @@ function validateSequence(sequence, options = {}) {
19434
19431
  const [validChars, warnings] = filterSequenceString(sequence.sequence, {
19435
19432
  name: sequence.name,
19436
19433
  isProtein: true,
19437
- additionalValidChars,
19438
- includeStopCodon
19434
+ additionalValidChars
19439
19435
  });
19440
19436
  if (validChars !== sequence.sequence) {
19441
19437
  sequence.sequence = validChars;
package/index.umd.js CHANGED
@@ -6172,7 +6172,7 @@ var __async = (__this, __arguments, generator) => {
6172
6172
  })(lodash, lodash.exports);
6173
6173
  var lodashExports = lodash.exports;
6174
6174
  const _ = /* @__PURE__ */ getDefaultExportFromCjs(lodashExports);
6175
- const protein_letters_withUandX = "ACDEFGHIKLMNPQRSTVWYUX";
6175
+ const extended_protein_letters = "ACDEFGHIKLMNPQRSTVWYBXZJUO";
6176
6176
  const ambiguous_dna_letters = "GATCRYWSMKHBVDN";
6177
6177
  const ambiguous_rna_letters = "GAUCRYWSMKHBVDN";
6178
6178
  const aminoAcidToDegenerateDnaMap = {
@@ -11283,15 +11283,13 @@ var __async = (__this, __arguments, generator) => {
11283
11283
  name: name2,
11284
11284
  isProtein,
11285
11285
  isRna,
11286
- isMixedRnaAndDna,
11287
- includeStopCodon
11286
+ isMixedRnaAndDna
11288
11287
  } = {}) {
11289
11288
  const acceptedChars = getAcceptedChars({
11290
11289
  isOligo,
11291
11290
  isProtein,
11292
11291
  isRna,
11293
- isMixedRnaAndDna,
11294
- includeStopCodon
11292
+ isMixedRnaAndDna
11295
11293
  });
11296
11294
  const replaceChars = getReplaceChars({
11297
11295
  isOligo,
@@ -11341,10 +11339,9 @@ var __async = (__this, __arguments, generator) => {
11341
11339
  isOligo,
11342
11340
  isProtein,
11343
11341
  isRna,
11344
- isMixedRnaAndDna,
11345
- includeStopCodon
11342
+ isMixedRnaAndDna
11346
11343
  } = {}) {
11347
- return isProtein ? `${protein_letters_withUandX.toLowerCase()}${includeStopCodon ? "*." : ""}}` : isOligo ? ambiguous_rna_letters.toLowerCase() + "t" : isRna ? ambiguous_rna_letters.toLowerCase() + "t" : isMixedRnaAndDna ? ambiguous_rna_letters.toLowerCase() + ambiguous_dna_letters.toLowerCase() : (
11344
+ return isProtein ? `${extended_protein_letters.toLowerCase()}}` : isOligo ? ambiguous_rna_letters.toLowerCase() + "t" : isRna ? ambiguous_rna_letters.toLowerCase() + "t" : isMixedRnaAndDna ? ambiguous_rna_letters.toLowerCase() + ambiguous_dna_letters.toLowerCase() : (
11348
11345
  //just plain old dna
11349
11346
  ambiguous_rna_letters.toLowerCase() + ambiguous_dna_letters.toLowerCase()
11350
11347
  );
@@ -11356,9 +11353,12 @@ var __async = (__this, __arguments, generator) => {
11356
11353
  isRna,
11357
11354
  isMixedRnaAndDna
11358
11355
  } = {}) {
11359
- return isProtein ? {} : isOligo ? {} : isRna ? { t: "u" } : isMixedRnaAndDna ? {} : (
11360
- //just plain old dna
11361
- {}
11356
+ return isProtein ? {} : (
11357
+ // {".": "*"}
11358
+ isOligo ? {} : isRna ? { t: "u" } : isMixedRnaAndDna ? {} : (
11359
+ //just plain old dna
11360
+ {}
11361
+ )
11362
11362
  );
11363
11363
  }
11364
11364
  __name(getReplaceChars, "getReplaceChars");
@@ -11498,7 +11498,7 @@ var __async = (__this, __arguments, generator) => {
11498
11498
  const {
11499
11499
  annotationsAsObjects,
11500
11500
  logMessages,
11501
- removeUnwantedChars,
11501
+ doNotRemoveInvalidChars,
11502
11502
  additionalValidChars,
11503
11503
  noTranslationData,
11504
11504
  doNotProvideIdsForAnnotations,
@@ -11532,11 +11532,9 @@ var __async = (__this, __arguments, generator) => {
11532
11532
  if (seqData.isRna) {
11533
11533
  seqData.sequence = seqData.sequence.replace(/t/gi, "u");
11534
11534
  }
11535
- if (removeUnwantedChars) {
11535
+ if (!doNotRemoveInvalidChars) {
11536
11536
  if (seqData.isProtein) {
11537
- const [newSeq] = filterSequenceString(seqData.proteinSequence, __spreadValues({
11538
- includeStopCodon: true
11539
- }, topLevelSeqData || seqData));
11537
+ const [newSeq] = filterSequenceString(seqData.proteinSequence, __spreadValues({}, topLevelSeqData || seqData));
11540
11538
  seqData.proteinSequence = newSeq;
11541
11539
  } else {
11542
11540
  const [newSeq] = filterSequenceString(seqData.sequence, __spreadValues({
@@ -19377,8 +19375,7 @@ var __async = (__this, __arguments, generator) => {
19377
19375
  inclusive1BasedEnd,
19378
19376
  additionalValidChars,
19379
19377
  allowOverflowAnnotations,
19380
- coerceFeatureTypes,
19381
- includeStopCodon
19378
+ coerceFeatureTypes
19382
19379
  } = options;
19383
19380
  [
19384
19381
  "isDNA",
@@ -19438,8 +19435,7 @@ var __async = (__this, __arguments, generator) => {
19438
19435
  const [validChars, warnings] = filterSequenceString(sequence.sequence, {
19439
19436
  name: sequence.name,
19440
19437
  isProtein: true,
19441
- additionalValidChars,
19442
- includeStopCodon
19438
+ additionalValidChars
19443
19439
  });
19444
19440
  if (validChars !== sequence.sequence) {
19445
19441
  sequence.sequence = validChars;
package/package.json CHANGED
@@ -1,8 +1,8 @@
1
1
  {
2
2
  "name": "@teselagen/bio-parsers",
3
- "version": "0.4.1",
3
+ "version": "0.4.3",
4
4
  "dependencies": {
5
- "@teselagen/sequence-utils": "0.3.9",
5
+ "@teselagen/sequence-utils": "0.3.11",
6
6
  "@teselagen/range-utils": "0.3.7",
7
7
  "@gmod/gff": "^1.2.1",
8
8
  "buffer": "^6.0.3",
@@ -29,8 +29,7 @@ export default function validateSequence(sequence, options = {}) {
29
29
  inclusive1BasedEnd,
30
30
  additionalValidChars,
31
31
  allowOverflowAnnotations,
32
- coerceFeatureTypes,
33
- includeStopCodon
32
+ coerceFeatureTypes
34
33
  } = options;
35
34
  [
36
35
  "isDNA",
@@ -96,8 +95,7 @@ export default function validateSequence(sequence, options = {}) {
96
95
  const [validChars, warnings] = filterSequenceString(sequence.sequence, {
97
96
  name: sequence.name,
98
97
  isProtein: true,
99
- additionalValidChars,
100
- includeStopCodon
98
+ additionalValidChars
101
99
  });
102
100
  if (validChars !== sequence.sequence) {
103
101
  sequence.sequence = validChars;