@teselagen/bio-parsers 0.4.15 → 0.4.17

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/index.mjs CHANGED
@@ -11504,6 +11504,26 @@ function getDegenerateDnaStringFromAAString(aaString) {
11504
11504
  return aaString.split("").map((char) => aminoAcidToDegenerateDnaMap[char.toLowerCase()] || "nnn").join("");
11505
11505
  }
11506
11506
  __name(getDegenerateDnaStringFromAAString, "getDegenerateDnaStringFromAAString");
11507
+ function getAminoAcidStringFromSequenceString(sequenceString, { doNotExcludeAsterisk } = {}) {
11508
+ const aminoAcidsPerBase = getAminoAcidDataForEachBaseOfDna(
11509
+ sequenceString,
11510
+ true
11511
+ );
11512
+ const aaArray = [];
11513
+ let aaString = "";
11514
+ aminoAcidsPerBase.forEach((aa, index) => {
11515
+ if (!aa.fullCodon) {
11516
+ return;
11517
+ }
11518
+ if (!doNotExcludeAsterisk && index >= aminoAcidsPerBase.length - 3 && aa.aminoAcid.value === "*") {
11519
+ return;
11520
+ }
11521
+ aaArray[aa.aminoAcidIndex] = aa.aminoAcid.value;
11522
+ });
11523
+ aaString = aaArray.join("");
11524
+ return aaString;
11525
+ }
11526
+ __name(getAminoAcidStringFromSequenceString, "getAminoAcidStringFromSequenceString");
11507
11527
  function tidyUpSequenceData(pSeqData, options = {}) {
11508
11528
  const {
11509
11529
  annotationsAsObjects,
@@ -11511,6 +11531,7 @@ function tidyUpSequenceData(pSeqData, options = {}) {
11511
11531
  doNotRemoveInvalidChars,
11512
11532
  additionalValidChars,
11513
11533
  noTranslationData,
11534
+ includeProteinSequence,
11514
11535
  doNotProvideIdsForAnnotations,
11515
11536
  noCdsTranslations,
11516
11537
  convertAnnotationsFromAAIndices,
@@ -11544,7 +11565,9 @@ function tidyUpSequenceData(pSeqData, options = {}) {
11544
11565
  }
11545
11566
  if (!doNotRemoveInvalidChars) {
11546
11567
  if (seqData.isProtein) {
11547
- const [newSeq] = filterSequenceString(seqData.proteinSequence, __spreadValues({}, topLevelSeqData || seqData));
11568
+ const [newSeq] = filterSequenceString(seqData.proteinSequence, __spreadProps(__spreadValues({}, topLevelSeqData || seqData), {
11569
+ isProtein: true
11570
+ }));
11548
11571
  seqData.proteinSequence = newSeq;
11549
11572
  } else {
11550
11573
  const [newSeq] = filterSequenceString(seqData.sequence, __spreadValues({
@@ -11565,6 +11588,10 @@ function tidyUpSequenceData(pSeqData, options = {}) {
11565
11588
  null,
11566
11589
  true
11567
11590
  );
11591
+ } else if (includeProteinSequence) {
11592
+ seqData.proteinSequence = getAminoAcidStringFromSequenceString(
11593
+ seqData.sequence
11594
+ );
11568
11595
  }
11569
11596
  seqData.size = seqData.noSequence ? seqData.size : seqData.sequence.length;
11570
11597
  seqData.proteinSize = seqData.noSequence ? seqData.proteinSize : seqData.proteinSequence.length;
@@ -19785,19 +19812,38 @@ function flattenSequenceArray(parsingResultArray, opts) {
19785
19812
  return parsingResultArray;
19786
19813
  }
19787
19814
  __name(flattenSequenceArray, "flattenSequenceArray");
19815
+ function wrapOriginSpanningFeatures(locArrayInput, sequenceLength, inclusive1BasedStart, inclusive1BasedEnd) {
19816
+ const locArrayOutput = locArrayInput.map((loc) => __spreadValues({}, loc));
19817
+ for (let i = 0; i < locArrayOutput.length - 1; i++) {
19818
+ const firstFeature = locArrayOutput[i];
19819
+ const secondFeature = locArrayOutput[i + 1];
19820
+ if (firstFeature.end === sequenceLength - (inclusive1BasedEnd ? 0 : 1) && secondFeature.start === 1 - (inclusive1BasedStart ? 0 : 1)) {
19821
+ locArrayOutput[i] = {
19822
+ start: firstFeature.start,
19823
+ end: secondFeature.end
19824
+ };
19825
+ locArrayOutput.splice(i + 1, 1);
19826
+ }
19827
+ }
19828
+ return locArrayOutput;
19829
+ }
19830
+ __name(wrapOriginSpanningFeatures, "wrapOriginSpanningFeatures");
19788
19831
  function parseFeatureLocation(locStr, isProtein, inclusive1BasedStart, inclusive1BasedEnd, isCircular, sequenceLength) {
19789
19832
  locStr = locStr.trim();
19790
- const locArr = [];
19791
- locStr.replace(/(\d+)/g, function(string, match) {
19792
- locArr.push(match);
19833
+ const positionsArray = [];
19834
+ const locationParts = locStr.split(",");
19835
+ locationParts.forEach((locPart) => {
19836
+ const extractedPositions = locPart.match(/(\d+)/g);
19837
+ if (extractedPositions === null) {
19838
+ return;
19839
+ }
19840
+ positionsArray.push(extractedPositions[0]);
19841
+ positionsArray.push(extractedPositions[1] || extractedPositions[0]);
19793
19842
  });
19794
19843
  const locArray = [];
19795
- for (let i = 0; i < locArr.length; i += 2) {
19796
- const start = parseInt(locArr[i], 10) - (inclusive1BasedStart ? 0 : 1);
19797
- let end = parseInt(locArr[i + 1], 10) - (inclusive1BasedEnd ? 0 : 1);
19798
- if (isNaN(end)) {
19799
- end = start;
19800
- }
19844
+ for (let i = 0; i < positionsArray.length; i += 2) {
19845
+ const start = parseInt(positionsArray[i], 10) - (inclusive1BasedStart ? 0 : 1);
19846
+ const end = parseInt(positionsArray[i + 1], 10) - (inclusive1BasedEnd ? 0 : 1);
19801
19847
  const location = {
19802
19848
  start,
19803
19849
  end
@@ -19806,20 +19852,16 @@ function parseFeatureLocation(locStr, isProtein, inclusive1BasedStart, inclusive
19806
19852
  isProtein ? convertAACaretPositionOrRangeToDna(location) : location
19807
19853
  );
19808
19854
  }
19809
- if (isCircular) {
19810
- for (let i = 0; i < locArray.length; i += 2) {
19811
- const firstFeature = locArray[i];
19812
- const secondFeature = locArray[i + 1];
19813
- if (firstFeature.end === sequenceLength - (inclusive1BasedEnd ? 0 : 1) && secondFeature.start === 1 - (inclusive1BasedStart ? 0 : 1)) {
19814
- locArray[i] = {
19815
- start: firstFeature.start,
19816
- end: secondFeature.end
19817
- };
19818
- locArray.splice(i + 1, 1);
19819
- }
19820
- }
19855
+ if (isCircular && sequenceLength) {
19856
+ return wrapOriginSpanningFeatures(
19857
+ locArray,
19858
+ sequenceLength,
19859
+ inclusive1BasedStart,
19860
+ inclusive1BasedEnd
19861
+ );
19862
+ } else {
19863
+ return locArray;
19821
19864
  }
19822
- return locArray;
19823
19865
  }
19824
19866
  __name(parseFeatureLocation, "parseFeatureLocation");
19825
19867
  function genbankToJson(string, options = {}) {
@@ -19916,7 +19958,7 @@ function genbankToJson(string, options = {}) {
19916
19958
  parseOrigin(line, key);
19917
19959
  break;
19918
19960
  case genbankAnnotationKey.END_SEQUENCE_TAG:
19919
- endSeq();
19961
+ endSeq(options);
19920
19962
  break;
19921
19963
  case genbankAnnotationKey.DEFINITION_TAG:
19922
19964
  line = line.replace(/DEFINITION/, "");
@@ -20021,9 +20063,9 @@ function genbankToJson(string, options = {}) {
20021
20063
  }
20022
20064
  });
20023
20065
  return results;
20024
- function endSeq() {
20066
+ function endSeq(options2) {
20025
20067
  hasFoundLocus = false;
20026
- postProcessCurSeq();
20068
+ postProcessCurSeq(options2);
20027
20069
  resultsArray.push(result || { success: false });
20028
20070
  }
20029
20071
  __name(endSeq, "endSeq");
@@ -20037,11 +20079,13 @@ function genbankToJson(string, options = {}) {
20037
20079
  }
20038
20080
  }
20039
20081
  __name(addMessage, "addMessage");
20040
- function postProcessCurSeq() {
20082
+ function postProcessCurSeq(options2) {
20041
20083
  if (result && result.parsedSequence && result.parsedSequence.features) {
20042
20084
  for (let i = 0; i < result.parsedSequence.features.length; i++) {
20043
20085
  result.parsedSequence.features[i] = postProcessGenbankFeature(
20044
- result.parsedSequence.features[i]
20086
+ result.parsedSequence.features[i],
20087
+ result.parsedSequence,
20088
+ options2
20045
20089
  );
20046
20090
  }
20047
20091
  }
@@ -20287,7 +20331,7 @@ function genbankToJson(string, options = {}) {
20287
20331
  return runon;
20288
20332
  }
20289
20333
  __name(isKeywordRunon, "isKeywordRunon");
20290
- function postProcessGenbankFeature(feat) {
20334
+ function postProcessGenbankFeature(feat, parsedSequence, options2) {
20291
20335
  if (feat.notes.label) {
20292
20336
  feat.name = feat.notes.label[0];
20293
20337
  } else if (feat.notes.gene) {
@@ -20320,6 +20364,15 @@ function genbankToJson(string, options = {}) {
20320
20364
  feat.arrowheadType = feat.notes.direction[0].toUpperCase() === "BOTH" ? "BOTH" : feat.notes.direction[0].toUpperCase() === "NONE" ? "NONE" : void 0;
20321
20365
  delete feat.notes.direction;
20322
20366
  }
20367
+ if (parsedSequence.circular) {
20368
+ const { inclusive1BasedStart: inclusive1BasedStart2, inclusive1BasedEnd: inclusive1BasedEnd2 } = options2;
20369
+ feat.locations = wrapOriginSpanningFeatures(
20370
+ feat.locations,
20371
+ parsedSequence.sequence.length,
20372
+ inclusive1BasedStart2,
20373
+ inclusive1BasedEnd2
20374
+ );
20375
+ }
20323
20376
  return feat;
20324
20377
  }
20325
20378
  __name(postProcessGenbankFeature, "postProcessGenbankFeature");