@teselagen/bio-parsers 0.3.4 → 0.3.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/index.js CHANGED
@@ -19285,9 +19285,7 @@ const reformatName = /* @__PURE__ */ __name(function(pName) {
19285
19285
  return pName.toString().replace(/ /g, "_");
19286
19286
  }, "reformatName");
19287
19287
  function validateSequence(sequence, options = {}) {
19288
- let {
19289
- isProtein,
19290
- isOligo,
19288
+ const {
19291
19289
  guessIfProtein,
19292
19290
  guessIfProteinOptions,
19293
19291
  reformatSeqName,
@@ -19297,6 +19295,19 @@ function validateSequence(sequence, options = {}) {
19297
19295
  allowOverflowAnnotations,
19298
19296
  coerceFeatureTypes
19299
19297
  } = options;
19298
+ [
19299
+ "isDNA",
19300
+ "isOligo",
19301
+ "isRNA",
19302
+ "isDoubleStrandedDNA",
19303
+ "isSingleStrandedDNA",
19304
+ "isDoubleStrandedRNA",
19305
+ "isProtein"
19306
+ ].forEach((k) => {
19307
+ if (options[k] !== void 0 && sequence[k] === void 0) {
19308
+ sequence[k] = options[k];
19309
+ }
19310
+ });
19300
19311
  const response = {
19301
19312
  validatedAndCleanedSequence: {},
19302
19313
  messages: []
@@ -19333,13 +19344,13 @@ function validateSequence(sequence, options = {}) {
19333
19344
  sequence.sequence = "";
19334
19345
  }
19335
19346
  let validChars;
19336
- if (isProtein === void 0 && guessIfProtein) {
19337
- isProtein = !guessIfSequenceIsDnaAndNotProtein(
19347
+ if (sequence.isProtein === void 0 && guessIfProtein) {
19348
+ sequence.isProtein = !guessIfSequenceIsDnaAndNotProtein(
19338
19349
  sequence.sequence,
19339
19350
  guessIfProteinOptions
19340
19351
  );
19341
19352
  }
19342
- if (isProtein) {
19353
+ if (sequence.isProtein) {
19343
19354
  validChars = filterAminoAcidSequenceString(sequence.sequence);
19344
19355
  if (validChars !== sequence.sequence) {
19345
19356
  sequence.sequence = validChars;
@@ -19355,13 +19366,13 @@ function validateSequence(sequence, options = {}) {
19355
19366
  sequence.proteinSize = sequence.proteinSequence.length;
19356
19367
  } else {
19357
19368
  const temp = sequence.sequence;
19358
- if (!isOligo) {
19369
+ if (!sequence.isOligo) {
19359
19370
  sequence.sequence = sequence.sequence.replace(
19360
19371
  /u/gi,
19361
19372
  (u) => u === "U" ? "T" : "t"
19362
19373
  );
19363
19374
  }
19364
- if (temp !== sequence.sequence) {
19375
+ if (temp !== sequence.sequence && !sequence.isDNA && !sequence.isProtein) {
19365
19376
  sequence.type = "RNA";
19366
19377
  sequence.sequence = temp;
19367
19378
  } else {
@@ -19376,7 +19387,7 @@ function validateSequence(sequence, options = {}) {
19376
19387
  }
19377
19388
  }
19378
19389
  if (!sequence.size) {
19379
- sequence.size = isProtein ? sequence.proteinSequence.length * 3 : sequence.sequence.length;
19390
+ sequence.size = sequence.isProtein ? sequence.proteinSequence.length * 3 : sequence.sequence.length;
19380
19391
  }
19381
19392
  let circularityExplicitlyDefined;
19382
19393
  if (sequence.circular === false || sequence.circular === "false" || sequence.circular === -1) {
@@ -19949,10 +19960,13 @@ function genbankToJson(string, options = {}) {
19949
19960
  }
19950
19961
  options.sequenceTypeFromLocus = item;
19951
19962
  if (item.match(/ss-dna/i)) {
19963
+ options.isDNA = true;
19952
19964
  options.isSingleStrandedDNA = true;
19953
- }
19954
- if (item.match(/rna/i)) {
19965
+ } else if (item.match(/rna/i)) {
19955
19966
  options.isRna = true;
19967
+ } else if (item.match(/ds-dna/i) || item.match(/dna/i)) {
19968
+ options.isDNA = true;
19969
+ options.isDoubleStrandedDNA = true;
19956
19970
  }
19957
19971
  if (item.match(/rna/i) && !item.match(/ss-rna/i)) {
19958
19972
  options.isDoubleStrandedRNA = true;
@@ -19967,6 +19981,8 @@ function genbankToJson(string, options = {}) {
19967
19981
  }
19968
19982
  result.parsedSequence.gbDivision = gbDivision;
19969
19983
  result.parsedSequence.sequenceTypeFromLocus = options.sequenceTypeFromLocus;
19984
+ result.parsedSequence.isDNA = options.isDNA;
19985
+ result.parsedSequence.isDoubleStrandedDNA = options.isDoubleStrandedDNA;
19970
19986
  result.parsedSequence.isSingleStrandedDNA = options.isSingleStrandedDNA;
19971
19987
  result.parsedSequence.isRna = options.isRna;
19972
19988
  result.parsedSequence.isDoubleStrandedRNA = options.isDoubleStrandedRNA;
package/index.mjs CHANGED
@@ -19283,9 +19283,7 @@ const reformatName = /* @__PURE__ */ __name(function(pName) {
19283
19283
  return pName.toString().replace(/ /g, "_");
19284
19284
  }, "reformatName");
19285
19285
  function validateSequence(sequence, options = {}) {
19286
- let {
19287
- isProtein,
19288
- isOligo,
19286
+ const {
19289
19287
  guessIfProtein,
19290
19288
  guessIfProteinOptions,
19291
19289
  reformatSeqName,
@@ -19295,6 +19293,19 @@ function validateSequence(sequence, options = {}) {
19295
19293
  allowOverflowAnnotations,
19296
19294
  coerceFeatureTypes
19297
19295
  } = options;
19296
+ [
19297
+ "isDNA",
19298
+ "isOligo",
19299
+ "isRNA",
19300
+ "isDoubleStrandedDNA",
19301
+ "isSingleStrandedDNA",
19302
+ "isDoubleStrandedRNA",
19303
+ "isProtein"
19304
+ ].forEach((k) => {
19305
+ if (options[k] !== void 0 && sequence[k] === void 0) {
19306
+ sequence[k] = options[k];
19307
+ }
19308
+ });
19298
19309
  const response = {
19299
19310
  validatedAndCleanedSequence: {},
19300
19311
  messages: []
@@ -19331,13 +19342,13 @@ function validateSequence(sequence, options = {}) {
19331
19342
  sequence.sequence = "";
19332
19343
  }
19333
19344
  let validChars;
19334
- if (isProtein === void 0 && guessIfProtein) {
19335
- isProtein = !guessIfSequenceIsDnaAndNotProtein(
19345
+ if (sequence.isProtein === void 0 && guessIfProtein) {
19346
+ sequence.isProtein = !guessIfSequenceIsDnaAndNotProtein(
19336
19347
  sequence.sequence,
19337
19348
  guessIfProteinOptions
19338
19349
  );
19339
19350
  }
19340
- if (isProtein) {
19351
+ if (sequence.isProtein) {
19341
19352
  validChars = filterAminoAcidSequenceString(sequence.sequence);
19342
19353
  if (validChars !== sequence.sequence) {
19343
19354
  sequence.sequence = validChars;
@@ -19353,13 +19364,13 @@ function validateSequence(sequence, options = {}) {
19353
19364
  sequence.proteinSize = sequence.proteinSequence.length;
19354
19365
  } else {
19355
19366
  const temp = sequence.sequence;
19356
- if (!isOligo) {
19367
+ if (!sequence.isOligo) {
19357
19368
  sequence.sequence = sequence.sequence.replace(
19358
19369
  /u/gi,
19359
19370
  (u) => u === "U" ? "T" : "t"
19360
19371
  );
19361
19372
  }
19362
- if (temp !== sequence.sequence) {
19373
+ if (temp !== sequence.sequence && !sequence.isDNA && !sequence.isProtein) {
19363
19374
  sequence.type = "RNA";
19364
19375
  sequence.sequence = temp;
19365
19376
  } else {
@@ -19374,7 +19385,7 @@ function validateSequence(sequence, options = {}) {
19374
19385
  }
19375
19386
  }
19376
19387
  if (!sequence.size) {
19377
- sequence.size = isProtein ? sequence.proteinSequence.length * 3 : sequence.sequence.length;
19388
+ sequence.size = sequence.isProtein ? sequence.proteinSequence.length * 3 : sequence.sequence.length;
19378
19389
  }
19379
19390
  let circularityExplicitlyDefined;
19380
19391
  if (sequence.circular === false || sequence.circular === "false" || sequence.circular === -1) {
@@ -19947,10 +19958,13 @@ function genbankToJson(string, options = {}) {
19947
19958
  }
19948
19959
  options.sequenceTypeFromLocus = item;
19949
19960
  if (item.match(/ss-dna/i)) {
19961
+ options.isDNA = true;
19950
19962
  options.isSingleStrandedDNA = true;
19951
- }
19952
- if (item.match(/rna/i)) {
19963
+ } else if (item.match(/rna/i)) {
19953
19964
  options.isRna = true;
19965
+ } else if (item.match(/ds-dna/i) || item.match(/dna/i)) {
19966
+ options.isDNA = true;
19967
+ options.isDoubleStrandedDNA = true;
19954
19968
  }
19955
19969
  if (item.match(/rna/i) && !item.match(/ss-rna/i)) {
19956
19970
  options.isDoubleStrandedRNA = true;
@@ -19965,6 +19979,8 @@ function genbankToJson(string, options = {}) {
19965
19979
  }
19966
19980
  result.parsedSequence.gbDivision = gbDivision;
19967
19981
  result.parsedSequence.sequenceTypeFromLocus = options.sequenceTypeFromLocus;
19982
+ result.parsedSequence.isDNA = options.isDNA;
19983
+ result.parsedSequence.isDoubleStrandedDNA = options.isDoubleStrandedDNA;
19968
19984
  result.parsedSequence.isSingleStrandedDNA = options.isSingleStrandedDNA;
19969
19985
  result.parsedSequence.isRna = options.isRna;
19970
19986
  result.parsedSequence.isDoubleStrandedRNA = options.isDoubleStrandedRNA;
package/index.umd.js CHANGED
@@ -19287,9 +19287,7 @@ var __async = (__this, __arguments, generator) => {
19287
19287
  return pName.toString().replace(/ /g, "_");
19288
19288
  }, "reformatName");
19289
19289
  function validateSequence(sequence, options = {}) {
19290
- let {
19291
- isProtein,
19292
- isOligo,
19290
+ const {
19293
19291
  guessIfProtein,
19294
19292
  guessIfProteinOptions,
19295
19293
  reformatSeqName,
@@ -19299,6 +19297,19 @@ var __async = (__this, __arguments, generator) => {
19299
19297
  allowOverflowAnnotations,
19300
19298
  coerceFeatureTypes
19301
19299
  } = options;
19300
+ [
19301
+ "isDNA",
19302
+ "isOligo",
19303
+ "isRNA",
19304
+ "isDoubleStrandedDNA",
19305
+ "isSingleStrandedDNA",
19306
+ "isDoubleStrandedRNA",
19307
+ "isProtein"
19308
+ ].forEach((k) => {
19309
+ if (options[k] !== void 0 && sequence[k] === void 0) {
19310
+ sequence[k] = options[k];
19311
+ }
19312
+ });
19302
19313
  const response = {
19303
19314
  validatedAndCleanedSequence: {},
19304
19315
  messages: []
@@ -19335,13 +19346,13 @@ var __async = (__this, __arguments, generator) => {
19335
19346
  sequence.sequence = "";
19336
19347
  }
19337
19348
  let validChars;
19338
- if (isProtein === void 0 && guessIfProtein) {
19339
- isProtein = !guessIfSequenceIsDnaAndNotProtein(
19349
+ if (sequence.isProtein === void 0 && guessIfProtein) {
19350
+ sequence.isProtein = !guessIfSequenceIsDnaAndNotProtein(
19340
19351
  sequence.sequence,
19341
19352
  guessIfProteinOptions
19342
19353
  );
19343
19354
  }
19344
- if (isProtein) {
19355
+ if (sequence.isProtein) {
19345
19356
  validChars = filterAminoAcidSequenceString(sequence.sequence);
19346
19357
  if (validChars !== sequence.sequence) {
19347
19358
  sequence.sequence = validChars;
@@ -19357,13 +19368,13 @@ var __async = (__this, __arguments, generator) => {
19357
19368
  sequence.proteinSize = sequence.proteinSequence.length;
19358
19369
  } else {
19359
19370
  const temp = sequence.sequence;
19360
- if (!isOligo) {
19371
+ if (!sequence.isOligo) {
19361
19372
  sequence.sequence = sequence.sequence.replace(
19362
19373
  /u/gi,
19363
19374
  (u) => u === "U" ? "T" : "t"
19364
19375
  );
19365
19376
  }
19366
- if (temp !== sequence.sequence) {
19377
+ if (temp !== sequence.sequence && !sequence.isDNA && !sequence.isProtein) {
19367
19378
  sequence.type = "RNA";
19368
19379
  sequence.sequence = temp;
19369
19380
  } else {
@@ -19378,7 +19389,7 @@ var __async = (__this, __arguments, generator) => {
19378
19389
  }
19379
19390
  }
19380
19391
  if (!sequence.size) {
19381
- sequence.size = isProtein ? sequence.proteinSequence.length * 3 : sequence.sequence.length;
19392
+ sequence.size = sequence.isProtein ? sequence.proteinSequence.length * 3 : sequence.sequence.length;
19382
19393
  }
19383
19394
  let circularityExplicitlyDefined;
19384
19395
  if (sequence.circular === false || sequence.circular === "false" || sequence.circular === -1) {
@@ -19951,10 +19962,13 @@ var __async = (__this, __arguments, generator) => {
19951
19962
  }
19952
19963
  options.sequenceTypeFromLocus = item;
19953
19964
  if (item.match(/ss-dna/i)) {
19965
+ options.isDNA = true;
19954
19966
  options.isSingleStrandedDNA = true;
19955
- }
19956
- if (item.match(/rna/i)) {
19967
+ } else if (item.match(/rna/i)) {
19957
19968
  options.isRna = true;
19969
+ } else if (item.match(/ds-dna/i) || item.match(/dna/i)) {
19970
+ options.isDNA = true;
19971
+ options.isDoubleStrandedDNA = true;
19958
19972
  }
19959
19973
  if (item.match(/rna/i) && !item.match(/ss-rna/i)) {
19960
19974
  options.isDoubleStrandedRNA = true;
@@ -19969,6 +19983,8 @@ var __async = (__this, __arguments, generator) => {
19969
19983
  }
19970
19984
  result.parsedSequence.gbDivision = gbDivision;
19971
19985
  result.parsedSequence.sequenceTypeFromLocus = options.sequenceTypeFromLocus;
19986
+ result.parsedSequence.isDNA = options.isDNA;
19987
+ result.parsedSequence.isDoubleStrandedDNA = options.isDoubleStrandedDNA;
19972
19988
  result.parsedSequence.isSingleStrandedDNA = options.isSingleStrandedDNA;
19973
19989
  result.parsedSequence.isRna = options.isRna;
19974
19990
  result.parsedSequence.isDoubleStrandedRNA = options.isDoubleStrandedRNA;
package/package.json CHANGED
@@ -1,10 +1,10 @@
1
1
  {
2
2
  "name": "@teselagen/bio-parsers",
3
- "version": "0.3.4",
3
+ "version": "0.3.5",
4
4
  "type": "commonjs",
5
5
  "dependencies": {
6
- "@teselagen/sequence-utils": "0.3.4",
7
- "@teselagen/range-utils": "0.3.4",
6
+ "@teselagen/sequence-utils": "0.3.5",
7
+ "@teselagen/range-utils": "0.3.5",
8
8
  "@gmod/gff": "^1.2.1",
9
9
  "buffer": "^6.0.3",
10
10
  "bufferpack": "^0.0.6",
@@ -1,7 +1,7 @@
1
1
  /* eslint-disable no-var*/
2
2
  import { convertAACaretPositionOrRangeToDna } from "@teselagen/sequence-utils";
3
3
 
4
- import { gbDivisions, untitledSequenceName } from "./utils/constants";
4
+ import { gbDivisions, untitledSequenceName } from "./utils/constants";
5
5
  import flattenSequenceArray from "./utils/flattenSequenceArray";
6
6
  import validateSequenceArray from "./utils/validateSequenceArray";
7
7
  import splitStringIntoLines from "./utils/splitStringIntoLines.js";
@@ -11,7 +11,7 @@ import createInitialSequence from "./utils/createInitialSequence";
11
11
  function genbankToJson(string, options = {}) {
12
12
  const {
13
13
  inclusive1BasedStart,
14
- inclusive1BasedEnd,
14
+ inclusive1BasedEnd
15
15
  //these are also valid options:
16
16
  // primersAsFeatures,
17
17
  // sequenceTypeFromLocus,
@@ -43,7 +43,7 @@ function genbankToJson(string, options = {}) {
43
43
  BASE_COUNT_TAG: "BASE COUNT",
44
44
  //CONTIG_TAG: "CONTIG"
45
45
  ORIGIN_TAG: "ORIGIN",
46
- END_SEQUENCE_TAG: "//",
46
+ END_SEQUENCE_TAG: "//"
47
47
  };
48
48
  let hasFoundLocus = false;
49
49
  let featureLocationIndentation;
@@ -214,7 +214,7 @@ function genbankToJson(string, options = {}) {
214
214
  console.error("Error trying to parse file as .gb:", e);
215
215
  result = {
216
216
  success: false,
217
- messages: ["Import Error: Invalid File"],
217
+ messages: ["Import Error: Invalid File"]
218
218
  };
219
219
  }
220
220
 
@@ -338,18 +338,25 @@ function genbankToJson(string, options = {}) {
338
338
 
339
339
  if (
340
340
  j === 4 &&
341
- (item.match(/ds-dna/i) || item.match(/ss-dna/i) || item.match(/dna/i) || item.match(/rna/i))
341
+ (item.match(/ds-dna/i) ||
342
+ item.match(/ss-dna/i) ||
343
+ item.match(/dna/i) ||
344
+ item.match(/rna/i))
342
345
  ) {
343
346
  if (options.isProtein === undefined) {
344
347
  options.isProtein = false;
345
348
  }
346
349
  options.sequenceTypeFromLocus = item;
347
350
  if (item.match(/ss-dna/i)) {
351
+ options.isDNA = true;
348
352
  options.isSingleStrandedDNA = true;
349
- }
350
- if (item.match(/rna/i)) {
353
+ } else if (item.match(/rna/i)) {
351
354
  options.isRna = true;
355
+ } else if (item.match(/ds-dna/i) || item.match(/dna/i)) {
356
+ options.isDNA = true;
357
+ options.isDoubleStrandedDNA = true;
352
358
  }
359
+
353
360
  if (item.match(/rna/i) && !item.match(/ss-rna/i)) {
354
361
  options.isDoubleStrandedRNA = true;
355
362
  }
@@ -373,6 +380,8 @@ function genbankToJson(string, options = {}) {
373
380
  }
374
381
  result.parsedSequence.gbDivision = gbDivision;
375
382
  result.parsedSequence.sequenceTypeFromLocus = options.sequenceTypeFromLocus;
383
+ result.parsedSequence.isDNA = options.isDNA;
384
+ result.parsedSequence.isDoubleStrandedDNA = options.isDoubleStrandedDNA;
376
385
  result.parsedSequence.isSingleStrandedDNA = options.isSingleStrandedDNA;
377
386
  result.parsedSequence.isRna = options.isRna;
378
387
  result.parsedSequence.isDoubleStrandedRNA = options.isDoubleStrandedRNA;
@@ -466,7 +475,7 @@ function genbankToJson(string, options = {}) {
466
475
  function newFeature() {
467
476
  result.parsedSequence.features.push({
468
477
  locations: [],
469
- notes: {},
478
+ notes: {}
470
479
  });
471
480
  }
472
481
 
@@ -504,7 +513,7 @@ function genbankToJson(string, options = {}) {
504
513
  }
505
514
  const location = {
506
515
  start: start,
507
- end: end,
516
+ end: end
508
517
  };
509
518
  const feat = getCurrentFeature();
510
519
  feat.locations.push(
@@ -3,7 +3,7 @@ import { getFeatureTypes } from "@teselagen/sequence-utils";
3
3
  import {
4
4
  filterAminoAcidSequenceString,
5
5
  filterSequenceString,
6
- guessIfSequenceIsDnaAndNotProtein,
6
+ guessIfSequenceIsDnaAndNotProtein
7
7
  } from "@teselagen/sequence-utils";
8
8
  import { filter, some, upperFirst } from "lodash";
9
9
  import pragmasAndTypes from "./pragmasAndTypes.js";
@@ -22,9 +22,7 @@ import { reformatName } from "./NameUtils.js";
22
22
  };
23
23
  */
24
24
  export default function validateSequence(sequence, options = {}) {
25
- let {
26
- isProtein,
27
- isOligo,
25
+ const {
28
26
  guessIfProtein,
29
27
  guessIfProteinOptions,
30
28
  reformatSeqName,
@@ -32,11 +30,25 @@ export default function validateSequence(sequence, options = {}) {
32
30
  inclusive1BasedEnd,
33
31
  additionalValidChars,
34
32
  allowOverflowAnnotations,
35
- coerceFeatureTypes,
33
+ coerceFeatureTypes
36
34
  } = options;
35
+ [
36
+ "isDNA",
37
+ "isOligo",
38
+ "isRNA",
39
+ "isDoubleStrandedDNA",
40
+ "isSingleStrandedDNA",
41
+ "isDoubleStrandedRNA",
42
+ "isProtein"
43
+ ].forEach((k) => {
44
+ if (options[k] !== undefined && sequence[k] === undefined) {
45
+ sequence[k] = options[k];
46
+ }
47
+ });
48
+
37
49
  const response = {
38
50
  validatedAndCleanedSequence: {},
39
- messages: [],
51
+ messages: []
40
52
  };
41
53
  if (!sequence || typeof sequence !== "object") {
42
54
  throw new Error("Invalid sequence");
@@ -73,13 +85,13 @@ export default function validateSequence(sequence, options = {}) {
73
85
  sequence.sequence = "";
74
86
  }
75
87
  let validChars;
76
- if (isProtein === undefined && guessIfProtein) {
77
- isProtein = !guessIfSequenceIsDnaAndNotProtein(
88
+ if (sequence.isProtein === undefined && guessIfProtein) {
89
+ sequence.isProtein = !guessIfSequenceIsDnaAndNotProtein(
78
90
  sequence.sequence,
79
91
  guessIfProteinOptions
80
92
  );
81
93
  }
82
- if (isProtein) {
94
+ if (sequence.isProtein) {
83
95
  //tnr: add code to strip invalid protein data..
84
96
  validChars = filterAminoAcidSequenceString(sequence.sequence);
85
97
  if (validChars !== sequence.sequence) {
@@ -97,12 +109,12 @@ export default function validateSequence(sequence, options = {}) {
97
109
  } else {
98
110
  //todo: this logic won't catch every case of RNA, so we should probably handle RNA conversion at another level..
99
111
  const temp = sequence.sequence;
100
- if (!isOligo) {
112
+ if (!sequence.isOligo) {
101
113
  sequence.sequence = sequence.sequence.replace(/u/gi, (u) =>
102
114
  u === "U" ? "T" : "t"
103
115
  );
104
116
  }
105
- if (temp !== sequence.sequence) {
117
+ if (temp !== sequence.sequence && !sequence.isDNA && !sequence.isProtein) {
106
118
  sequence.type = "RNA";
107
119
  sequence.sequence = temp;
108
120
  } else {
@@ -119,7 +131,7 @@ export default function validateSequence(sequence, options = {}) {
119
131
  }
120
132
 
121
133
  if (!sequence.size) {
122
- sequence.size = isProtein
134
+ sequence.size = sequence.isProtein
123
135
  ? sequence.proteinSequence.length * 3
124
136
  : sequence.sequence.length;
125
137
  }