@teselagen/bio-parsers 0.4.9 → 0.4.10

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,3 +1,3 @@
1
- export function parseFeatureLocation(locStr: any, isProtein: any, inclusive1BasedStart: any, inclusive1BasedEnd: any): any[];
1
+ export function parseFeatureLocation(locStr: any, isProtein: any, inclusive1BasedStart: any, inclusive1BasedEnd: any, isCircular: any, sequenceLength: any): any[];
2
2
  export default genbankToJson;
3
3
  declare function genbankToJson(string: any, options?: {}): any;
package/index.js CHANGED
@@ -19772,7 +19772,7 @@ function flattenSequenceArray(parsingResultArray, opts) {
19772
19772
  return parsingResultArray;
19773
19773
  }
19774
19774
  __name(flattenSequenceArray, "flattenSequenceArray");
19775
- function parseFeatureLocation(locStr, isProtein, inclusive1BasedStart, inclusive1BasedEnd) {
19775
+ function parseFeatureLocation(locStr, isProtein, inclusive1BasedStart, inclusive1BasedEnd, isCircular, sequenceLength) {
19776
19776
  locStr = locStr.trim();
19777
19777
  const locArr = [];
19778
19778
  locStr.replace(/(\d+)/g, function(string, match) {
@@ -19793,6 +19793,19 @@ function parseFeatureLocation(locStr, isProtein, inclusive1BasedStart, inclusive
19793
19793
  isProtein ? convertAACaretPositionOrRangeToDna(location) : location
19794
19794
  );
19795
19795
  }
19796
+ if (isCircular) {
19797
+ for (let i = 0; i < locArray.length; i += 2) {
19798
+ const firstFeature = locArray[i];
19799
+ const secondFeature = locArray[i + 1];
19800
+ if (firstFeature.end === sequenceLength - (inclusive1BasedEnd ? 0 : 1) && secondFeature.start === 1 - (inclusive1BasedStart ? 0 : 1)) {
19801
+ locArray[i] = {
19802
+ start: firstFeature.start,
19803
+ end: secondFeature.end
19804
+ };
19805
+ locArray.splice(i + 1, 1);
19806
+ }
19807
+ }
19808
+ }
19796
19809
  return locArray;
19797
19810
  }
19798
19811
  __name(parseFeatureLocation, "parseFeatureLocation");
@@ -20126,7 +20139,9 @@ function genbankToJson(string, options = {}) {
20126
20139
  line.trim(),
20127
20140
  options.isProtein,
20128
20141
  inclusive1BasedStart,
20129
- inclusive1BasedEnd
20142
+ inclusive1BasedEnd,
20143
+ result.parsedSequence.circular,
20144
+ result.parsedSequence.sequence.length
20130
20145
  )
20131
20146
  );
20132
20147
  lastLineWasLocation = true;
@@ -20159,7 +20174,9 @@ function genbankToJson(string, options = {}) {
20159
20174
  val2,
20160
20175
  options.isProtein,
20161
20176
  inclusive1BasedStart,
20162
- inclusive1BasedEnd
20177
+ inclusive1BasedEnd,
20178
+ result.parsedSequence.circular,
20179
+ result.parsedSequence.sequence.length
20163
20180
  )
20164
20181
  );
20165
20182
  lastLineWasLocation = true;
package/index.mjs CHANGED
@@ -19770,7 +19770,7 @@ function flattenSequenceArray(parsingResultArray, opts) {
19770
19770
  return parsingResultArray;
19771
19771
  }
19772
19772
  __name(flattenSequenceArray, "flattenSequenceArray");
19773
- function parseFeatureLocation(locStr, isProtein, inclusive1BasedStart, inclusive1BasedEnd) {
19773
+ function parseFeatureLocation(locStr, isProtein, inclusive1BasedStart, inclusive1BasedEnd, isCircular, sequenceLength) {
19774
19774
  locStr = locStr.trim();
19775
19775
  const locArr = [];
19776
19776
  locStr.replace(/(\d+)/g, function(string, match) {
@@ -19791,6 +19791,19 @@ function parseFeatureLocation(locStr, isProtein, inclusive1BasedStart, inclusive
19791
19791
  isProtein ? convertAACaretPositionOrRangeToDna(location) : location
19792
19792
  );
19793
19793
  }
19794
+ if (isCircular) {
19795
+ for (let i = 0; i < locArray.length; i += 2) {
19796
+ const firstFeature = locArray[i];
19797
+ const secondFeature = locArray[i + 1];
19798
+ if (firstFeature.end === sequenceLength - (inclusive1BasedEnd ? 0 : 1) && secondFeature.start === 1 - (inclusive1BasedStart ? 0 : 1)) {
19799
+ locArray[i] = {
19800
+ start: firstFeature.start,
19801
+ end: secondFeature.end
19802
+ };
19803
+ locArray.splice(i + 1, 1);
19804
+ }
19805
+ }
19806
+ }
19794
19807
  return locArray;
19795
19808
  }
19796
19809
  __name(parseFeatureLocation, "parseFeatureLocation");
@@ -20124,7 +20137,9 @@ function genbankToJson(string, options = {}) {
20124
20137
  line.trim(),
20125
20138
  options.isProtein,
20126
20139
  inclusive1BasedStart,
20127
- inclusive1BasedEnd
20140
+ inclusive1BasedEnd,
20141
+ result.parsedSequence.circular,
20142
+ result.parsedSequence.sequence.length
20128
20143
  )
20129
20144
  );
20130
20145
  lastLineWasLocation = true;
@@ -20157,7 +20172,9 @@ function genbankToJson(string, options = {}) {
20157
20172
  val2,
20158
20173
  options.isProtein,
20159
20174
  inclusive1BasedStart,
20160
- inclusive1BasedEnd
20175
+ inclusive1BasedEnd,
20176
+ result.parsedSequence.circular,
20177
+ result.parsedSequence.sequence.length
20161
20178
  )
20162
20179
  );
20163
20180
  lastLineWasLocation = true;
package/index.umd.js CHANGED
@@ -19774,7 +19774,7 @@ var __async = (__this, __arguments, generator) => {
19774
19774
  return parsingResultArray;
19775
19775
  }
19776
19776
  __name(flattenSequenceArray, "flattenSequenceArray");
19777
- function parseFeatureLocation(locStr, isProtein, inclusive1BasedStart, inclusive1BasedEnd) {
19777
+ function parseFeatureLocation(locStr, isProtein, inclusive1BasedStart, inclusive1BasedEnd, isCircular, sequenceLength) {
19778
19778
  locStr = locStr.trim();
19779
19779
  const locArr = [];
19780
19780
  locStr.replace(/(\d+)/g, function(string, match) {
@@ -19795,6 +19795,19 @@ var __async = (__this, __arguments, generator) => {
19795
19795
  isProtein ? convertAACaretPositionOrRangeToDna(location) : location
19796
19796
  );
19797
19797
  }
19798
+ if (isCircular) {
19799
+ for (let i2 = 0; i2 < locArray.length; i2 += 2) {
19800
+ const firstFeature = locArray[i2];
19801
+ const secondFeature = locArray[i2 + 1];
19802
+ if (firstFeature.end === sequenceLength - (inclusive1BasedEnd ? 0 : 1) && secondFeature.start === 1 - (inclusive1BasedStart ? 0 : 1)) {
19803
+ locArray[i2] = {
19804
+ start: firstFeature.start,
19805
+ end: secondFeature.end
19806
+ };
19807
+ locArray.splice(i2 + 1, 1);
19808
+ }
19809
+ }
19810
+ }
19798
19811
  return locArray;
19799
19812
  }
19800
19813
  __name(parseFeatureLocation, "parseFeatureLocation");
@@ -20128,7 +20141,9 @@ var __async = (__this, __arguments, generator) => {
20128
20141
  line.trim(),
20129
20142
  options.isProtein,
20130
20143
  inclusive1BasedStart,
20131
- inclusive1BasedEnd
20144
+ inclusive1BasedEnd,
20145
+ result.parsedSequence.circular,
20146
+ result.parsedSequence.sequence.length
20132
20147
  )
20133
20148
  );
20134
20149
  lastLineWasLocation = true;
@@ -20161,7 +20176,9 @@ var __async = (__this, __arguments, generator) => {
20161
20176
  val2,
20162
20177
  options.isProtein,
20163
20178
  inclusive1BasedStart,
20164
- inclusive1BasedEnd
20179
+ inclusive1BasedEnd,
20180
+ result.parsedSequence.circular,
20181
+ result.parsedSequence.sequence.length
20165
20182
  )
20166
20183
  );
20167
20184
  lastLineWasLocation = true;
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@teselagen/bio-parsers",
3
- "version": "0.4.9",
3
+ "version": "0.4.10",
4
4
  "dependencies": {
5
5
  "@teselagen/sequence-utils": "0.3.15",
6
6
  "@teselagen/range-utils": "0.3.7",
@@ -12,7 +12,9 @@ export function parseFeatureLocation(
12
12
  locStr,
13
13
  isProtein,
14
14
  inclusive1BasedStart,
15
- inclusive1BasedEnd
15
+ inclusive1BasedEnd,
16
+ isCircular,
17
+ sequenceLength
16
18
  ) {
17
19
  locStr = locStr.trim();
18
20
  const locArr = [];
@@ -39,6 +41,30 @@ export function parseFeatureLocation(
39
41
  isProtein ? convertAACaretPositionOrRangeToDna(location) : location
40
42
  );
41
43
  }
44
+ // In genbank files, origin-spanning features are represented as follows:
45
+ // complement(join(490883..490885,1..879)) (for a circular sequence of length 490885)
46
+ // Then, for locations in locArray we check if there is a feature that ends at sequenceLength
47
+ // joined with a feature that starts at 1. If so, we merge them into a single feature.
48
+ // (see https://github.com/TeselaGen/tg-oss/issues/35)
49
+
50
+ if (isCircular) {
51
+ // Iterate by pairs of features
52
+ for (let i = 0; i < locArray.length; i += 2) {
53
+ const firstFeature = locArray[i];
54
+ const secondFeature = locArray[i + 1];
55
+ if (
56
+ firstFeature.end === sequenceLength - (inclusive1BasedEnd ? 0 : 1) &&
57
+ secondFeature.start === 1 - (inclusive1BasedStart ? 0 : 1)
58
+ ) {
59
+ // Merge the two features
60
+ locArray[i] = {
61
+ start: firstFeature.start,
62
+ end: secondFeature.end
63
+ };
64
+ locArray.splice(i + 1, 1);
65
+ }
66
+ }
67
+ }
42
68
  return locArray;
43
69
  }
44
70
 
@@ -468,7 +494,9 @@ function genbankToJson(string, options = {}) {
468
494
  line.trim(),
469
495
  options.isProtein,
470
496
  inclusive1BasedStart,
471
- inclusive1BasedEnd
497
+ inclusive1BasedEnd,
498
+ result.parsedSequence.circular,
499
+ result.parsedSequence.sequence.length
472
500
  )
473
501
  );
474
502
  lastLineWasLocation = true;
@@ -513,7 +541,9 @@ function genbankToJson(string, options = {}) {
513
541
  val,
514
542
  options.isProtein,
515
543
  inclusive1BasedStart,
516
- inclusive1BasedEnd
544
+ inclusive1BasedEnd,
545
+ result.parsedSequence.circular,
546
+ result.parsedSequence.sequence.length
517
547
  )
518
548
  );
519
549
  lastLineWasLocation = true;