@teselagen/bio-parsers 0.3.3 → 0.3.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/index.js CHANGED
@@ -19285,9 +19285,7 @@ const reformatName = /* @__PURE__ */ __name(function(pName) {
19285
19285
  return pName.toString().replace(/ /g, "_");
19286
19286
  }, "reformatName");
19287
19287
  function validateSequence(sequence, options = {}) {
19288
- let {
19289
- isProtein,
19290
- isOligo,
19288
+ const {
19291
19289
  guessIfProtein,
19292
19290
  guessIfProteinOptions,
19293
19291
  reformatSeqName,
@@ -19297,6 +19295,19 @@ function validateSequence(sequence, options = {}) {
19297
19295
  allowOverflowAnnotations,
19298
19296
  coerceFeatureTypes
19299
19297
  } = options;
19298
+ [
19299
+ "isDNA",
19300
+ "isOligo",
19301
+ "isRNA",
19302
+ "isDoubleStrandedDNA",
19303
+ "isSingleStrandedDNA",
19304
+ "isDoubleStrandedRNA",
19305
+ "isProtein"
19306
+ ].forEach((k) => {
19307
+ if (options[k] !== void 0 && sequence[k] === void 0) {
19308
+ sequence[k] = options[k];
19309
+ }
19310
+ });
19300
19311
  const response = {
19301
19312
  validatedAndCleanedSequence: {},
19302
19313
  messages: []
@@ -19333,13 +19344,13 @@ function validateSequence(sequence, options = {}) {
19333
19344
  sequence.sequence = "";
19334
19345
  }
19335
19346
  let validChars;
19336
- if (isProtein === void 0 && guessIfProtein) {
19337
- isProtein = !guessIfSequenceIsDnaAndNotProtein(
19347
+ if (sequence.isProtein === void 0 && guessIfProtein) {
19348
+ sequence.isProtein = !guessIfSequenceIsDnaAndNotProtein(
19338
19349
  sequence.sequence,
19339
19350
  guessIfProteinOptions
19340
19351
  );
19341
19352
  }
19342
- if (isProtein) {
19353
+ if (sequence.isProtein) {
19343
19354
  validChars = filterAminoAcidSequenceString(sequence.sequence);
19344
19355
  if (validChars !== sequence.sequence) {
19345
19356
  sequence.sequence = validChars;
@@ -19355,14 +19366,15 @@ function validateSequence(sequence, options = {}) {
19355
19366
  sequence.proteinSize = sequence.proteinSequence.length;
19356
19367
  } else {
19357
19368
  const temp = sequence.sequence;
19358
- if (!isOligo) {
19369
+ if (!sequence.isOligo) {
19359
19370
  sequence.sequence = sequence.sequence.replace(
19360
19371
  /u/gi,
19361
19372
  (u) => u === "U" ? "T" : "t"
19362
19373
  );
19363
19374
  }
19364
- if (temp !== sequence.sequence) {
19375
+ if (temp !== sequence.sequence && !sequence.isDNA && !sequence.isProtein) {
19365
19376
  sequence.type = "RNA";
19377
+ sequence.sequence = temp;
19366
19378
  } else {
19367
19379
  sequence.type = "DNA";
19368
19380
  }
@@ -19375,7 +19387,7 @@ function validateSequence(sequence, options = {}) {
19375
19387
  }
19376
19388
  }
19377
19389
  if (!sequence.size) {
19378
- sequence.size = isProtein ? sequence.proteinSequence.length * 3 : sequence.sequence.length;
19390
+ sequence.size = sequence.isProtein ? sequence.proteinSequence.length * 3 : sequence.sequence.length;
19379
19391
  }
19380
19392
  let circularityExplicitlyDefined;
19381
19393
  if (sequence.circular === false || sequence.circular === "false" || sequence.circular === -1) {
@@ -19948,7 +19960,13 @@ function genbankToJson(string, options = {}) {
19948
19960
  }
19949
19961
  options.sequenceTypeFromLocus = item;
19950
19962
  if (item.match(/ss-dna/i)) {
19963
+ options.isDNA = true;
19951
19964
  options.isSingleStrandedDNA = true;
19965
+ } else if (item.match(/rna/i)) {
19966
+ options.isRna = true;
19967
+ } else if (item.match(/ds-dna/i) || item.match(/dna/i)) {
19968
+ options.isDNA = true;
19969
+ options.isDoubleStrandedDNA = true;
19952
19970
  }
19953
19971
  if (item.match(/rna/i) && !item.match(/ss-rna/i)) {
19954
19972
  options.isDoubleStrandedRNA = true;
@@ -19963,7 +19981,10 @@ function genbankToJson(string, options = {}) {
19963
19981
  }
19964
19982
  result.parsedSequence.gbDivision = gbDivision;
19965
19983
  result.parsedSequence.sequenceTypeFromLocus = options.sequenceTypeFromLocus;
19984
+ result.parsedSequence.isDNA = options.isDNA;
19985
+ result.parsedSequence.isDoubleStrandedDNA = options.isDoubleStrandedDNA;
19966
19986
  result.parsedSequence.isSingleStrandedDNA = options.isSingleStrandedDNA;
19987
+ result.parsedSequence.isRna = options.isRna;
19967
19988
  result.parsedSequence.isDoubleStrandedRNA = options.isDoubleStrandedRNA;
19968
19989
  result.parsedSequence.date = date;
19969
19990
  result.parsedSequence.circular = circular;
package/index.mjs CHANGED
@@ -19283,9 +19283,7 @@ const reformatName = /* @__PURE__ */ __name(function(pName) {
19283
19283
  return pName.toString().replace(/ /g, "_");
19284
19284
  }, "reformatName");
19285
19285
  function validateSequence(sequence, options = {}) {
19286
- let {
19287
- isProtein,
19288
- isOligo,
19286
+ const {
19289
19287
  guessIfProtein,
19290
19288
  guessIfProteinOptions,
19291
19289
  reformatSeqName,
@@ -19295,6 +19293,19 @@ function validateSequence(sequence, options = {}) {
19295
19293
  allowOverflowAnnotations,
19296
19294
  coerceFeatureTypes
19297
19295
  } = options;
19296
+ [
19297
+ "isDNA",
19298
+ "isOligo",
19299
+ "isRNA",
19300
+ "isDoubleStrandedDNA",
19301
+ "isSingleStrandedDNA",
19302
+ "isDoubleStrandedRNA",
19303
+ "isProtein"
19304
+ ].forEach((k) => {
19305
+ if (options[k] !== void 0 && sequence[k] === void 0) {
19306
+ sequence[k] = options[k];
19307
+ }
19308
+ });
19298
19309
  const response = {
19299
19310
  validatedAndCleanedSequence: {},
19300
19311
  messages: []
@@ -19331,13 +19342,13 @@ function validateSequence(sequence, options = {}) {
19331
19342
  sequence.sequence = "";
19332
19343
  }
19333
19344
  let validChars;
19334
- if (isProtein === void 0 && guessIfProtein) {
19335
- isProtein = !guessIfSequenceIsDnaAndNotProtein(
19345
+ if (sequence.isProtein === void 0 && guessIfProtein) {
19346
+ sequence.isProtein = !guessIfSequenceIsDnaAndNotProtein(
19336
19347
  sequence.sequence,
19337
19348
  guessIfProteinOptions
19338
19349
  );
19339
19350
  }
19340
- if (isProtein) {
19351
+ if (sequence.isProtein) {
19341
19352
  validChars = filterAminoAcidSequenceString(sequence.sequence);
19342
19353
  if (validChars !== sequence.sequence) {
19343
19354
  sequence.sequence = validChars;
@@ -19353,14 +19364,15 @@ function validateSequence(sequence, options = {}) {
19353
19364
  sequence.proteinSize = sequence.proteinSequence.length;
19354
19365
  } else {
19355
19366
  const temp = sequence.sequence;
19356
- if (!isOligo) {
19367
+ if (!sequence.isOligo) {
19357
19368
  sequence.sequence = sequence.sequence.replace(
19358
19369
  /u/gi,
19359
19370
  (u) => u === "U" ? "T" : "t"
19360
19371
  );
19361
19372
  }
19362
- if (temp !== sequence.sequence) {
19373
+ if (temp !== sequence.sequence && !sequence.isDNA && !sequence.isProtein) {
19363
19374
  sequence.type = "RNA";
19375
+ sequence.sequence = temp;
19364
19376
  } else {
19365
19377
  sequence.type = "DNA";
19366
19378
  }
@@ -19373,7 +19385,7 @@ function validateSequence(sequence, options = {}) {
19373
19385
  }
19374
19386
  }
19375
19387
  if (!sequence.size) {
19376
- sequence.size = isProtein ? sequence.proteinSequence.length * 3 : sequence.sequence.length;
19388
+ sequence.size = sequence.isProtein ? sequence.proteinSequence.length * 3 : sequence.sequence.length;
19377
19389
  }
19378
19390
  let circularityExplicitlyDefined;
19379
19391
  if (sequence.circular === false || sequence.circular === "false" || sequence.circular === -1) {
@@ -19946,7 +19958,13 @@ function genbankToJson(string, options = {}) {
19946
19958
  }
19947
19959
  options.sequenceTypeFromLocus = item;
19948
19960
  if (item.match(/ss-dna/i)) {
19961
+ options.isDNA = true;
19949
19962
  options.isSingleStrandedDNA = true;
19963
+ } else if (item.match(/rna/i)) {
19964
+ options.isRna = true;
19965
+ } else if (item.match(/ds-dna/i) || item.match(/dna/i)) {
19966
+ options.isDNA = true;
19967
+ options.isDoubleStrandedDNA = true;
19950
19968
  }
19951
19969
  if (item.match(/rna/i) && !item.match(/ss-rna/i)) {
19952
19970
  options.isDoubleStrandedRNA = true;
@@ -19961,7 +19979,10 @@ function genbankToJson(string, options = {}) {
19961
19979
  }
19962
19980
  result.parsedSequence.gbDivision = gbDivision;
19963
19981
  result.parsedSequence.sequenceTypeFromLocus = options.sequenceTypeFromLocus;
19982
+ result.parsedSequence.isDNA = options.isDNA;
19983
+ result.parsedSequence.isDoubleStrandedDNA = options.isDoubleStrandedDNA;
19964
19984
  result.parsedSequence.isSingleStrandedDNA = options.isSingleStrandedDNA;
19985
+ result.parsedSequence.isRna = options.isRna;
19965
19986
  result.parsedSequence.isDoubleStrandedRNA = options.isDoubleStrandedRNA;
19966
19987
  result.parsedSequence.date = date;
19967
19988
  result.parsedSequence.circular = circular;
package/index.umd.js CHANGED
@@ -19287,9 +19287,7 @@ var __async = (__this, __arguments, generator) => {
19287
19287
  return pName.toString().replace(/ /g, "_");
19288
19288
  }, "reformatName");
19289
19289
  function validateSequence(sequence, options = {}) {
19290
- let {
19291
- isProtein,
19292
- isOligo,
19290
+ const {
19293
19291
  guessIfProtein,
19294
19292
  guessIfProteinOptions,
19295
19293
  reformatSeqName,
@@ -19299,6 +19297,19 @@ var __async = (__this, __arguments, generator) => {
19299
19297
  allowOverflowAnnotations,
19300
19298
  coerceFeatureTypes
19301
19299
  } = options;
19300
+ [
19301
+ "isDNA",
19302
+ "isOligo",
19303
+ "isRNA",
19304
+ "isDoubleStrandedDNA",
19305
+ "isSingleStrandedDNA",
19306
+ "isDoubleStrandedRNA",
19307
+ "isProtein"
19308
+ ].forEach((k) => {
19309
+ if (options[k] !== void 0 && sequence[k] === void 0) {
19310
+ sequence[k] = options[k];
19311
+ }
19312
+ });
19302
19313
  const response = {
19303
19314
  validatedAndCleanedSequence: {},
19304
19315
  messages: []
@@ -19335,13 +19346,13 @@ var __async = (__this, __arguments, generator) => {
19335
19346
  sequence.sequence = "";
19336
19347
  }
19337
19348
  let validChars;
19338
- if (isProtein === void 0 && guessIfProtein) {
19339
- isProtein = !guessIfSequenceIsDnaAndNotProtein(
19349
+ if (sequence.isProtein === void 0 && guessIfProtein) {
19350
+ sequence.isProtein = !guessIfSequenceIsDnaAndNotProtein(
19340
19351
  sequence.sequence,
19341
19352
  guessIfProteinOptions
19342
19353
  );
19343
19354
  }
19344
- if (isProtein) {
19355
+ if (sequence.isProtein) {
19345
19356
  validChars = filterAminoAcidSequenceString(sequence.sequence);
19346
19357
  if (validChars !== sequence.sequence) {
19347
19358
  sequence.sequence = validChars;
@@ -19357,14 +19368,15 @@ var __async = (__this, __arguments, generator) => {
19357
19368
  sequence.proteinSize = sequence.proteinSequence.length;
19358
19369
  } else {
19359
19370
  const temp = sequence.sequence;
19360
- if (!isOligo) {
19371
+ if (!sequence.isOligo) {
19361
19372
  sequence.sequence = sequence.sequence.replace(
19362
19373
  /u/gi,
19363
19374
  (u) => u === "U" ? "T" : "t"
19364
19375
  );
19365
19376
  }
19366
- if (temp !== sequence.sequence) {
19377
+ if (temp !== sequence.sequence && !sequence.isDNA && !sequence.isProtein) {
19367
19378
  sequence.type = "RNA";
19379
+ sequence.sequence = temp;
19368
19380
  } else {
19369
19381
  sequence.type = "DNA";
19370
19382
  }
@@ -19377,7 +19389,7 @@ var __async = (__this, __arguments, generator) => {
19377
19389
  }
19378
19390
  }
19379
19391
  if (!sequence.size) {
19380
- sequence.size = isProtein ? sequence.proteinSequence.length * 3 : sequence.sequence.length;
19392
+ sequence.size = sequence.isProtein ? sequence.proteinSequence.length * 3 : sequence.sequence.length;
19381
19393
  }
19382
19394
  let circularityExplicitlyDefined;
19383
19395
  if (sequence.circular === false || sequence.circular === "false" || sequence.circular === -1) {
@@ -19950,7 +19962,13 @@ var __async = (__this, __arguments, generator) => {
19950
19962
  }
19951
19963
  options.sequenceTypeFromLocus = item;
19952
19964
  if (item.match(/ss-dna/i)) {
19965
+ options.isDNA = true;
19953
19966
  options.isSingleStrandedDNA = true;
19967
+ } else if (item.match(/rna/i)) {
19968
+ options.isRna = true;
19969
+ } else if (item.match(/ds-dna/i) || item.match(/dna/i)) {
19970
+ options.isDNA = true;
19971
+ options.isDoubleStrandedDNA = true;
19954
19972
  }
19955
19973
  if (item.match(/rna/i) && !item.match(/ss-rna/i)) {
19956
19974
  options.isDoubleStrandedRNA = true;
@@ -19965,7 +19983,10 @@ var __async = (__this, __arguments, generator) => {
19965
19983
  }
19966
19984
  result.parsedSequence.gbDivision = gbDivision;
19967
19985
  result.parsedSequence.sequenceTypeFromLocus = options.sequenceTypeFromLocus;
19986
+ result.parsedSequence.isDNA = options.isDNA;
19987
+ result.parsedSequence.isDoubleStrandedDNA = options.isDoubleStrandedDNA;
19968
19988
  result.parsedSequence.isSingleStrandedDNA = options.isSingleStrandedDNA;
19989
+ result.parsedSequence.isRna = options.isRna;
19969
19990
  result.parsedSequence.isDoubleStrandedRNA = options.isDoubleStrandedRNA;
19970
19991
  result.parsedSequence.date = date;
19971
19992
  result.parsedSequence.circular = circular;
package/package.json CHANGED
@@ -1,10 +1,10 @@
1
1
  {
2
2
  "name": "@teselagen/bio-parsers",
3
- "version": "0.3.3",
3
+ "version": "0.3.5",
4
4
  "type": "commonjs",
5
5
  "dependencies": {
6
- "@teselagen/sequence-utils": "0.3.3",
7
- "@teselagen/range-utils": "0.3.3",
6
+ "@teselagen/sequence-utils": "0.3.5",
7
+ "@teselagen/range-utils": "0.3.5",
8
8
  "@gmod/gff": "^1.2.1",
9
9
  "buffer": "^6.0.3",
10
10
  "bufferpack": "^0.0.6",
@@ -1,7 +1,7 @@
1
1
  /* eslint-disable no-var*/
2
2
  import { convertAACaretPositionOrRangeToDna } from "@teselagen/sequence-utils";
3
3
 
4
- import { gbDivisions, untitledSequenceName } from "./utils/constants";
4
+ import { gbDivisions, untitledSequenceName } from "./utils/constants";
5
5
  import flattenSequenceArray from "./utils/flattenSequenceArray";
6
6
  import validateSequenceArray from "./utils/validateSequenceArray";
7
7
  import splitStringIntoLines from "./utils/splitStringIntoLines.js";
@@ -11,7 +11,7 @@ import createInitialSequence from "./utils/createInitialSequence";
11
11
  function genbankToJson(string, options = {}) {
12
12
  const {
13
13
  inclusive1BasedStart,
14
- inclusive1BasedEnd,
14
+ inclusive1BasedEnd
15
15
  //these are also valid options:
16
16
  // primersAsFeatures,
17
17
  // sequenceTypeFromLocus,
@@ -43,7 +43,7 @@ function genbankToJson(string, options = {}) {
43
43
  BASE_COUNT_TAG: "BASE COUNT",
44
44
  //CONTIG_TAG: "CONTIG"
45
45
  ORIGIN_TAG: "ORIGIN",
46
- END_SEQUENCE_TAG: "//",
46
+ END_SEQUENCE_TAG: "//"
47
47
  };
48
48
  let hasFoundLocus = false;
49
49
  let featureLocationIndentation;
@@ -214,7 +214,7 @@ function genbankToJson(string, options = {}) {
214
214
  console.error("Error trying to parse file as .gb:", e);
215
215
  result = {
216
216
  success: false,
217
- messages: ["Import Error: Invalid File"],
217
+ messages: ["Import Error: Invalid File"]
218
218
  };
219
219
  }
220
220
 
@@ -338,15 +338,25 @@ function genbankToJson(string, options = {}) {
338
338
 
339
339
  if (
340
340
  j === 4 &&
341
- (item.match(/ds-dna/i) || item.match(/ss-dna/i) || item.match(/dna/i) || item.match(/rna/i))
341
+ (item.match(/ds-dna/i) ||
342
+ item.match(/ss-dna/i) ||
343
+ item.match(/dna/i) ||
344
+ item.match(/rna/i))
342
345
  ) {
343
346
  if (options.isProtein === undefined) {
344
347
  options.isProtein = false;
345
348
  }
346
349
  options.sequenceTypeFromLocus = item;
347
350
  if (item.match(/ss-dna/i)) {
351
+ options.isDNA = true;
348
352
  options.isSingleStrandedDNA = true;
353
+ } else if (item.match(/rna/i)) {
354
+ options.isRna = true;
355
+ } else if (item.match(/ds-dna/i) || item.match(/dna/i)) {
356
+ options.isDNA = true;
357
+ options.isDoubleStrandedDNA = true;
349
358
  }
359
+
350
360
  if (item.match(/rna/i) && !item.match(/ss-rna/i)) {
351
361
  options.isDoubleStrandedRNA = true;
352
362
  }
@@ -370,7 +380,10 @@ function genbankToJson(string, options = {}) {
370
380
  }
371
381
  result.parsedSequence.gbDivision = gbDivision;
372
382
  result.parsedSequence.sequenceTypeFromLocus = options.sequenceTypeFromLocus;
383
+ result.parsedSequence.isDNA = options.isDNA;
384
+ result.parsedSequence.isDoubleStrandedDNA = options.isDoubleStrandedDNA;
373
385
  result.parsedSequence.isSingleStrandedDNA = options.isSingleStrandedDNA;
386
+ result.parsedSequence.isRna = options.isRna;
374
387
  result.parsedSequence.isDoubleStrandedRNA = options.isDoubleStrandedRNA;
375
388
  result.parsedSequence.date = date;
376
389
  result.parsedSequence.circular = circular;
@@ -462,7 +475,7 @@ function genbankToJson(string, options = {}) {
462
475
  function newFeature() {
463
476
  result.parsedSequence.features.push({
464
477
  locations: [],
465
- notes: {},
478
+ notes: {}
466
479
  });
467
480
  }
468
481
 
@@ -500,7 +513,7 @@ function genbankToJson(string, options = {}) {
500
513
  }
501
514
  const location = {
502
515
  start: start,
503
- end: end,
516
+ end: end
504
517
  };
505
518
  const feat = getCurrentFeature();
506
519
  feat.locations.push(
@@ -3,7 +3,7 @@ import { getFeatureTypes } from "@teselagen/sequence-utils";
3
3
  import {
4
4
  filterAminoAcidSequenceString,
5
5
  filterSequenceString,
6
- guessIfSequenceIsDnaAndNotProtein,
6
+ guessIfSequenceIsDnaAndNotProtein
7
7
  } from "@teselagen/sequence-utils";
8
8
  import { filter, some, upperFirst } from "lodash";
9
9
  import pragmasAndTypes from "./pragmasAndTypes.js";
@@ -22,9 +22,7 @@ import { reformatName } from "./NameUtils.js";
22
22
  };
23
23
  */
24
24
  export default function validateSequence(sequence, options = {}) {
25
- let {
26
- isProtein,
27
- isOligo,
25
+ const {
28
26
  guessIfProtein,
29
27
  guessIfProteinOptions,
30
28
  reformatSeqName,
@@ -32,11 +30,25 @@ export default function validateSequence(sequence, options = {}) {
32
30
  inclusive1BasedEnd,
33
31
  additionalValidChars,
34
32
  allowOverflowAnnotations,
35
- coerceFeatureTypes,
33
+ coerceFeatureTypes
36
34
  } = options;
35
+ [
36
+ "isDNA",
37
+ "isOligo",
38
+ "isRNA",
39
+ "isDoubleStrandedDNA",
40
+ "isSingleStrandedDNA",
41
+ "isDoubleStrandedRNA",
42
+ "isProtein"
43
+ ].forEach((k) => {
44
+ if (options[k] !== undefined && sequence[k] === undefined) {
45
+ sequence[k] = options[k];
46
+ }
47
+ });
48
+
37
49
  const response = {
38
50
  validatedAndCleanedSequence: {},
39
- messages: [],
51
+ messages: []
40
52
  };
41
53
  if (!sequence || typeof sequence !== "object") {
42
54
  throw new Error("Invalid sequence");
@@ -73,13 +85,13 @@ export default function validateSequence(sequence, options = {}) {
73
85
  sequence.sequence = "";
74
86
  }
75
87
  let validChars;
76
- if (isProtein === undefined && guessIfProtein) {
77
- isProtein = !guessIfSequenceIsDnaAndNotProtein(
88
+ if (sequence.isProtein === undefined && guessIfProtein) {
89
+ sequence.isProtein = !guessIfSequenceIsDnaAndNotProtein(
78
90
  sequence.sequence,
79
91
  guessIfProteinOptions
80
92
  );
81
93
  }
82
- if (isProtein) {
94
+ if (sequence.isProtein) {
83
95
  //tnr: add code to strip invalid protein data..
84
96
  validChars = filterAminoAcidSequenceString(sequence.sequence);
85
97
  if (validChars !== sequence.sequence) {
@@ -97,13 +109,14 @@ export default function validateSequence(sequence, options = {}) {
97
109
  } else {
98
110
  //todo: this logic won't catch every case of RNA, so we should probably handle RNA conversion at another level..
99
111
  const temp = sequence.sequence;
100
- if (!isOligo) {
112
+ if (!sequence.isOligo) {
101
113
  sequence.sequence = sequence.sequence.replace(/u/gi, (u) =>
102
114
  u === "U" ? "T" : "t"
103
115
  );
104
116
  }
105
- if (temp !== sequence.sequence) {
117
+ if (temp !== sequence.sequence && !sequence.isDNA && !sequence.isProtein) {
106
118
  sequence.type = "RNA";
119
+ sequence.sequence = temp;
107
120
  } else {
108
121
  sequence.type = "DNA";
109
122
  }
@@ -118,7 +131,7 @@ export default function validateSequence(sequence, options = {}) {
118
131
  }
119
132
 
120
133
  if (!sequence.size) {
121
- sequence.size = isProtein
134
+ sequence.size = sequence.isProtein
122
135
  ? sequence.proteinSequence.length * 3
123
136
  : sequence.sequence.length;
124
137
  }