@teselagen/bio-parsers 0.4.15 → 0.4.17
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/genbankToJson.d.ts +1 -1
- package/index.cjs +28840 -0
- package/index.d.ts +16 -16
- package/index.js +2204 -6183
- package/index.mjs +82 -29
- package/index.umd.cjs +28844 -0
- package/index.umd.js +82 -29
- package/package.json +1 -1
- package/src/genbankToJson.js +94 -43
- package/src/geneiousXmlToJson.js +2 -2
- package/src/gffToJson.js +1 -1
- package/src/jbeiXmlToJson.js +1 -1
- package/src/jsonToGenbank.js +3 -3
- package/src/jsonToJsonString.js +1 -1
- package/src/sbolXmlToJson.js +1 -1
- package/src/snapgeneToJson.js +1 -1
- package/src/utils/cleanUpTeselagenJsonForExport.js +1 -1
- package/src/utils/searchWholeObjByName.js +1 -1
- package/src/utils/validateSequence.js +3 -3
package/index.mjs
CHANGED
|
@@ -11504,6 +11504,26 @@ function getDegenerateDnaStringFromAAString(aaString) {
|
|
|
11504
11504
|
return aaString.split("").map((char) => aminoAcidToDegenerateDnaMap[char.toLowerCase()] || "nnn").join("");
|
|
11505
11505
|
}
|
|
11506
11506
|
__name(getDegenerateDnaStringFromAAString, "getDegenerateDnaStringFromAAString");
|
|
11507
|
+
function getAminoAcidStringFromSequenceString(sequenceString, { doNotExcludeAsterisk } = {}) {
|
|
11508
|
+
const aminoAcidsPerBase = getAminoAcidDataForEachBaseOfDna(
|
|
11509
|
+
sequenceString,
|
|
11510
|
+
true
|
|
11511
|
+
);
|
|
11512
|
+
const aaArray = [];
|
|
11513
|
+
let aaString = "";
|
|
11514
|
+
aminoAcidsPerBase.forEach((aa, index) => {
|
|
11515
|
+
if (!aa.fullCodon) {
|
|
11516
|
+
return;
|
|
11517
|
+
}
|
|
11518
|
+
if (!doNotExcludeAsterisk && index >= aminoAcidsPerBase.length - 3 && aa.aminoAcid.value === "*") {
|
|
11519
|
+
return;
|
|
11520
|
+
}
|
|
11521
|
+
aaArray[aa.aminoAcidIndex] = aa.aminoAcid.value;
|
|
11522
|
+
});
|
|
11523
|
+
aaString = aaArray.join("");
|
|
11524
|
+
return aaString;
|
|
11525
|
+
}
|
|
11526
|
+
__name(getAminoAcidStringFromSequenceString, "getAminoAcidStringFromSequenceString");
|
|
11507
11527
|
function tidyUpSequenceData(pSeqData, options = {}) {
|
|
11508
11528
|
const {
|
|
11509
11529
|
annotationsAsObjects,
|
|
@@ -11511,6 +11531,7 @@ function tidyUpSequenceData(pSeqData, options = {}) {
|
|
|
11511
11531
|
doNotRemoveInvalidChars,
|
|
11512
11532
|
additionalValidChars,
|
|
11513
11533
|
noTranslationData,
|
|
11534
|
+
includeProteinSequence,
|
|
11514
11535
|
doNotProvideIdsForAnnotations,
|
|
11515
11536
|
noCdsTranslations,
|
|
11516
11537
|
convertAnnotationsFromAAIndices,
|
|
@@ -11544,7 +11565,9 @@ function tidyUpSequenceData(pSeqData, options = {}) {
|
|
|
11544
11565
|
}
|
|
11545
11566
|
if (!doNotRemoveInvalidChars) {
|
|
11546
11567
|
if (seqData.isProtein) {
|
|
11547
|
-
const [newSeq] = filterSequenceString(seqData.proteinSequence, __spreadValues({}, topLevelSeqData || seqData)
|
|
11568
|
+
const [newSeq] = filterSequenceString(seqData.proteinSequence, __spreadProps(__spreadValues({}, topLevelSeqData || seqData), {
|
|
11569
|
+
isProtein: true
|
|
11570
|
+
}));
|
|
11548
11571
|
seqData.proteinSequence = newSeq;
|
|
11549
11572
|
} else {
|
|
11550
11573
|
const [newSeq] = filterSequenceString(seqData.sequence, __spreadValues({
|
|
@@ -11565,6 +11588,10 @@ function tidyUpSequenceData(pSeqData, options = {}) {
|
|
|
11565
11588
|
null,
|
|
11566
11589
|
true
|
|
11567
11590
|
);
|
|
11591
|
+
} else if (includeProteinSequence) {
|
|
11592
|
+
seqData.proteinSequence = getAminoAcidStringFromSequenceString(
|
|
11593
|
+
seqData.sequence
|
|
11594
|
+
);
|
|
11568
11595
|
}
|
|
11569
11596
|
seqData.size = seqData.noSequence ? seqData.size : seqData.sequence.length;
|
|
11570
11597
|
seqData.proteinSize = seqData.noSequence ? seqData.proteinSize : seqData.proteinSequence.length;
|
|
@@ -19785,19 +19812,38 @@ function flattenSequenceArray(parsingResultArray, opts) {
|
|
|
19785
19812
|
return parsingResultArray;
|
|
19786
19813
|
}
|
|
19787
19814
|
__name(flattenSequenceArray, "flattenSequenceArray");
|
|
19815
|
+
function wrapOriginSpanningFeatures(locArrayInput, sequenceLength, inclusive1BasedStart, inclusive1BasedEnd) {
|
|
19816
|
+
const locArrayOutput = locArrayInput.map((loc) => __spreadValues({}, loc));
|
|
19817
|
+
for (let i = 0; i < locArrayOutput.length - 1; i++) {
|
|
19818
|
+
const firstFeature = locArrayOutput[i];
|
|
19819
|
+
const secondFeature = locArrayOutput[i + 1];
|
|
19820
|
+
if (firstFeature.end === sequenceLength - (inclusive1BasedEnd ? 0 : 1) && secondFeature.start === 1 - (inclusive1BasedStart ? 0 : 1)) {
|
|
19821
|
+
locArrayOutput[i] = {
|
|
19822
|
+
start: firstFeature.start,
|
|
19823
|
+
end: secondFeature.end
|
|
19824
|
+
};
|
|
19825
|
+
locArrayOutput.splice(i + 1, 1);
|
|
19826
|
+
}
|
|
19827
|
+
}
|
|
19828
|
+
return locArrayOutput;
|
|
19829
|
+
}
|
|
19830
|
+
__name(wrapOriginSpanningFeatures, "wrapOriginSpanningFeatures");
|
|
19788
19831
|
function parseFeatureLocation(locStr, isProtein, inclusive1BasedStart, inclusive1BasedEnd, isCircular, sequenceLength) {
|
|
19789
19832
|
locStr = locStr.trim();
|
|
19790
|
-
const
|
|
19791
|
-
locStr.
|
|
19792
|
-
|
|
19833
|
+
const positionsArray = [];
|
|
19834
|
+
const locationParts = locStr.split(",");
|
|
19835
|
+
locationParts.forEach((locPart) => {
|
|
19836
|
+
const extractedPositions = locPart.match(/(\d+)/g);
|
|
19837
|
+
if (extractedPositions === null) {
|
|
19838
|
+
return;
|
|
19839
|
+
}
|
|
19840
|
+
positionsArray.push(extractedPositions[0]);
|
|
19841
|
+
positionsArray.push(extractedPositions[1] || extractedPositions[0]);
|
|
19793
19842
|
});
|
|
19794
19843
|
const locArray = [];
|
|
19795
|
-
for (let i = 0; i <
|
|
19796
|
-
const start = parseInt(
|
|
19797
|
-
|
|
19798
|
-
if (isNaN(end)) {
|
|
19799
|
-
end = start;
|
|
19800
|
-
}
|
|
19844
|
+
for (let i = 0; i < positionsArray.length; i += 2) {
|
|
19845
|
+
const start = parseInt(positionsArray[i], 10) - (inclusive1BasedStart ? 0 : 1);
|
|
19846
|
+
const end = parseInt(positionsArray[i + 1], 10) - (inclusive1BasedEnd ? 0 : 1);
|
|
19801
19847
|
const location = {
|
|
19802
19848
|
start,
|
|
19803
19849
|
end
|
|
@@ -19806,20 +19852,16 @@ function parseFeatureLocation(locStr, isProtein, inclusive1BasedStart, inclusive
|
|
|
19806
19852
|
isProtein ? convertAACaretPositionOrRangeToDna(location) : location
|
|
19807
19853
|
);
|
|
19808
19854
|
}
|
|
19809
|
-
if (isCircular) {
|
|
19810
|
-
|
|
19811
|
-
|
|
19812
|
-
|
|
19813
|
-
|
|
19814
|
-
|
|
19815
|
-
|
|
19816
|
-
|
|
19817
|
-
|
|
19818
|
-
locArray.splice(i + 1, 1);
|
|
19819
|
-
}
|
|
19820
|
-
}
|
|
19855
|
+
if (isCircular && sequenceLength) {
|
|
19856
|
+
return wrapOriginSpanningFeatures(
|
|
19857
|
+
locArray,
|
|
19858
|
+
sequenceLength,
|
|
19859
|
+
inclusive1BasedStart,
|
|
19860
|
+
inclusive1BasedEnd
|
|
19861
|
+
);
|
|
19862
|
+
} else {
|
|
19863
|
+
return locArray;
|
|
19821
19864
|
}
|
|
19822
|
-
return locArray;
|
|
19823
19865
|
}
|
|
19824
19866
|
__name(parseFeatureLocation, "parseFeatureLocation");
|
|
19825
19867
|
function genbankToJson(string, options = {}) {
|
|
@@ -19916,7 +19958,7 @@ function genbankToJson(string, options = {}) {
|
|
|
19916
19958
|
parseOrigin(line, key);
|
|
19917
19959
|
break;
|
|
19918
19960
|
case genbankAnnotationKey.END_SEQUENCE_TAG:
|
|
19919
|
-
endSeq();
|
|
19961
|
+
endSeq(options);
|
|
19920
19962
|
break;
|
|
19921
19963
|
case genbankAnnotationKey.DEFINITION_TAG:
|
|
19922
19964
|
line = line.replace(/DEFINITION/, "");
|
|
@@ -20021,9 +20063,9 @@ function genbankToJson(string, options = {}) {
|
|
|
20021
20063
|
}
|
|
20022
20064
|
});
|
|
20023
20065
|
return results;
|
|
20024
|
-
function endSeq() {
|
|
20066
|
+
function endSeq(options2) {
|
|
20025
20067
|
hasFoundLocus = false;
|
|
20026
|
-
postProcessCurSeq();
|
|
20068
|
+
postProcessCurSeq(options2);
|
|
20027
20069
|
resultsArray.push(result || { success: false });
|
|
20028
20070
|
}
|
|
20029
20071
|
__name(endSeq, "endSeq");
|
|
@@ -20037,11 +20079,13 @@ function genbankToJson(string, options = {}) {
|
|
|
20037
20079
|
}
|
|
20038
20080
|
}
|
|
20039
20081
|
__name(addMessage, "addMessage");
|
|
20040
|
-
function postProcessCurSeq() {
|
|
20082
|
+
function postProcessCurSeq(options2) {
|
|
20041
20083
|
if (result && result.parsedSequence && result.parsedSequence.features) {
|
|
20042
20084
|
for (let i = 0; i < result.parsedSequence.features.length; i++) {
|
|
20043
20085
|
result.parsedSequence.features[i] = postProcessGenbankFeature(
|
|
20044
|
-
result.parsedSequence.features[i]
|
|
20086
|
+
result.parsedSequence.features[i],
|
|
20087
|
+
result.parsedSequence,
|
|
20088
|
+
options2
|
|
20045
20089
|
);
|
|
20046
20090
|
}
|
|
20047
20091
|
}
|
|
@@ -20287,7 +20331,7 @@ function genbankToJson(string, options = {}) {
|
|
|
20287
20331
|
return runon;
|
|
20288
20332
|
}
|
|
20289
20333
|
__name(isKeywordRunon, "isKeywordRunon");
|
|
20290
|
-
function postProcessGenbankFeature(feat) {
|
|
20334
|
+
function postProcessGenbankFeature(feat, parsedSequence, options2) {
|
|
20291
20335
|
if (feat.notes.label) {
|
|
20292
20336
|
feat.name = feat.notes.label[0];
|
|
20293
20337
|
} else if (feat.notes.gene) {
|
|
@@ -20320,6 +20364,15 @@ function genbankToJson(string, options = {}) {
|
|
|
20320
20364
|
feat.arrowheadType = feat.notes.direction[0].toUpperCase() === "BOTH" ? "BOTH" : feat.notes.direction[0].toUpperCase() === "NONE" ? "NONE" : void 0;
|
|
20321
20365
|
delete feat.notes.direction;
|
|
20322
20366
|
}
|
|
20367
|
+
if (parsedSequence.circular) {
|
|
20368
|
+
const { inclusive1BasedStart: inclusive1BasedStart2, inclusive1BasedEnd: inclusive1BasedEnd2 } = options2;
|
|
20369
|
+
feat.locations = wrapOriginSpanningFeatures(
|
|
20370
|
+
feat.locations,
|
|
20371
|
+
parsedSequence.sequence.length,
|
|
20372
|
+
inclusive1BasedStart2,
|
|
20373
|
+
inclusive1BasedEnd2
|
|
20374
|
+
);
|
|
20375
|
+
}
|
|
20323
20376
|
return feat;
|
|
20324
20377
|
}
|
|
20325
20378
|
__name(postProcessGenbankFeature, "postProcessGenbankFeature");
|