@teselagen/bio-parsers 0.4.9 → 0.4.10
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/genbankToJson.d.ts +1 -1
- package/index.js +20 -3
- package/index.mjs +20 -3
- package/index.umd.js +20 -3
- package/package.json +1 -1
- package/src/genbankToJson.js +33 -3
package/genbankToJson.d.ts
CHANGED
|
@@ -1,3 +1,3 @@
|
|
|
1
|
-
export function parseFeatureLocation(locStr: any, isProtein: any, inclusive1BasedStart: any, inclusive1BasedEnd: any): any[];
|
|
1
|
+
export function parseFeatureLocation(locStr: any, isProtein: any, inclusive1BasedStart: any, inclusive1BasedEnd: any, isCircular: any, sequenceLength: any): any[];
|
|
2
2
|
export default genbankToJson;
|
|
3
3
|
declare function genbankToJson(string: any, options?: {}): any;
|
package/index.js
CHANGED
|
@@ -19772,7 +19772,7 @@ function flattenSequenceArray(parsingResultArray, opts) {
|
|
|
19772
19772
|
return parsingResultArray;
|
|
19773
19773
|
}
|
|
19774
19774
|
__name(flattenSequenceArray, "flattenSequenceArray");
|
|
19775
|
-
function parseFeatureLocation(locStr, isProtein, inclusive1BasedStart, inclusive1BasedEnd) {
|
|
19775
|
+
function parseFeatureLocation(locStr, isProtein, inclusive1BasedStart, inclusive1BasedEnd, isCircular, sequenceLength) {
|
|
19776
19776
|
locStr = locStr.trim();
|
|
19777
19777
|
const locArr = [];
|
|
19778
19778
|
locStr.replace(/(\d+)/g, function(string, match) {
|
|
@@ -19793,6 +19793,19 @@ function parseFeatureLocation(locStr, isProtein, inclusive1BasedStart, inclusive
|
|
|
19793
19793
|
isProtein ? convertAACaretPositionOrRangeToDna(location) : location
|
|
19794
19794
|
);
|
|
19795
19795
|
}
|
|
19796
|
+
if (isCircular) {
|
|
19797
|
+
for (let i = 0; i < locArray.length; i += 2) {
|
|
19798
|
+
const firstFeature = locArray[i];
|
|
19799
|
+
const secondFeature = locArray[i + 1];
|
|
19800
|
+
if (firstFeature.end === sequenceLength - (inclusive1BasedEnd ? 0 : 1) && secondFeature.start === 1 - (inclusive1BasedStart ? 0 : 1)) {
|
|
19801
|
+
locArray[i] = {
|
|
19802
|
+
start: firstFeature.start,
|
|
19803
|
+
end: secondFeature.end
|
|
19804
|
+
};
|
|
19805
|
+
locArray.splice(i + 1, 1);
|
|
19806
|
+
}
|
|
19807
|
+
}
|
|
19808
|
+
}
|
|
19796
19809
|
return locArray;
|
|
19797
19810
|
}
|
|
19798
19811
|
__name(parseFeatureLocation, "parseFeatureLocation");
|
|
@@ -20126,7 +20139,9 @@ function genbankToJson(string, options = {}) {
|
|
|
20126
20139
|
line.trim(),
|
|
20127
20140
|
options.isProtein,
|
|
20128
20141
|
inclusive1BasedStart,
|
|
20129
|
-
inclusive1BasedEnd
|
|
20142
|
+
inclusive1BasedEnd,
|
|
20143
|
+
result.parsedSequence.circular,
|
|
20144
|
+
result.parsedSequence.sequence.length
|
|
20130
20145
|
)
|
|
20131
20146
|
);
|
|
20132
20147
|
lastLineWasLocation = true;
|
|
@@ -20159,7 +20174,9 @@ function genbankToJson(string, options = {}) {
|
|
|
20159
20174
|
val2,
|
|
20160
20175
|
options.isProtein,
|
|
20161
20176
|
inclusive1BasedStart,
|
|
20162
|
-
inclusive1BasedEnd
|
|
20177
|
+
inclusive1BasedEnd,
|
|
20178
|
+
result.parsedSequence.circular,
|
|
20179
|
+
result.parsedSequence.sequence.length
|
|
20163
20180
|
)
|
|
20164
20181
|
);
|
|
20165
20182
|
lastLineWasLocation = true;
|
package/index.mjs
CHANGED
|
@@ -19770,7 +19770,7 @@ function flattenSequenceArray(parsingResultArray, opts) {
|
|
|
19770
19770
|
return parsingResultArray;
|
|
19771
19771
|
}
|
|
19772
19772
|
__name(flattenSequenceArray, "flattenSequenceArray");
|
|
19773
|
-
function parseFeatureLocation(locStr, isProtein, inclusive1BasedStart, inclusive1BasedEnd) {
|
|
19773
|
+
function parseFeatureLocation(locStr, isProtein, inclusive1BasedStart, inclusive1BasedEnd, isCircular, sequenceLength) {
|
|
19774
19774
|
locStr = locStr.trim();
|
|
19775
19775
|
const locArr = [];
|
|
19776
19776
|
locStr.replace(/(\d+)/g, function(string, match) {
|
|
@@ -19791,6 +19791,19 @@ function parseFeatureLocation(locStr, isProtein, inclusive1BasedStart, inclusive
|
|
|
19791
19791
|
isProtein ? convertAACaretPositionOrRangeToDna(location) : location
|
|
19792
19792
|
);
|
|
19793
19793
|
}
|
|
19794
|
+
if (isCircular) {
|
|
19795
|
+
for (let i = 0; i < locArray.length; i += 2) {
|
|
19796
|
+
const firstFeature = locArray[i];
|
|
19797
|
+
const secondFeature = locArray[i + 1];
|
|
19798
|
+
if (firstFeature.end === sequenceLength - (inclusive1BasedEnd ? 0 : 1) && secondFeature.start === 1 - (inclusive1BasedStart ? 0 : 1)) {
|
|
19799
|
+
locArray[i] = {
|
|
19800
|
+
start: firstFeature.start,
|
|
19801
|
+
end: secondFeature.end
|
|
19802
|
+
};
|
|
19803
|
+
locArray.splice(i + 1, 1);
|
|
19804
|
+
}
|
|
19805
|
+
}
|
|
19806
|
+
}
|
|
19794
19807
|
return locArray;
|
|
19795
19808
|
}
|
|
19796
19809
|
__name(parseFeatureLocation, "parseFeatureLocation");
|
|
@@ -20124,7 +20137,9 @@ function genbankToJson(string, options = {}) {
|
|
|
20124
20137
|
line.trim(),
|
|
20125
20138
|
options.isProtein,
|
|
20126
20139
|
inclusive1BasedStart,
|
|
20127
|
-
inclusive1BasedEnd
|
|
20140
|
+
inclusive1BasedEnd,
|
|
20141
|
+
result.parsedSequence.circular,
|
|
20142
|
+
result.parsedSequence.sequence.length
|
|
20128
20143
|
)
|
|
20129
20144
|
);
|
|
20130
20145
|
lastLineWasLocation = true;
|
|
@@ -20157,7 +20172,9 @@ function genbankToJson(string, options = {}) {
|
|
|
20157
20172
|
val2,
|
|
20158
20173
|
options.isProtein,
|
|
20159
20174
|
inclusive1BasedStart,
|
|
20160
|
-
inclusive1BasedEnd
|
|
20175
|
+
inclusive1BasedEnd,
|
|
20176
|
+
result.parsedSequence.circular,
|
|
20177
|
+
result.parsedSequence.sequence.length
|
|
20161
20178
|
)
|
|
20162
20179
|
);
|
|
20163
20180
|
lastLineWasLocation = true;
|
package/index.umd.js
CHANGED
|
@@ -19774,7 +19774,7 @@ var __async = (__this, __arguments, generator) => {
|
|
|
19774
19774
|
return parsingResultArray;
|
|
19775
19775
|
}
|
|
19776
19776
|
__name(flattenSequenceArray, "flattenSequenceArray");
|
|
19777
|
-
function parseFeatureLocation(locStr, isProtein, inclusive1BasedStart, inclusive1BasedEnd) {
|
|
19777
|
+
function parseFeatureLocation(locStr, isProtein, inclusive1BasedStart, inclusive1BasedEnd, isCircular, sequenceLength) {
|
|
19778
19778
|
locStr = locStr.trim();
|
|
19779
19779
|
const locArr = [];
|
|
19780
19780
|
locStr.replace(/(\d+)/g, function(string, match) {
|
|
@@ -19795,6 +19795,19 @@ var __async = (__this, __arguments, generator) => {
|
|
|
19795
19795
|
isProtein ? convertAACaretPositionOrRangeToDna(location) : location
|
|
19796
19796
|
);
|
|
19797
19797
|
}
|
|
19798
|
+
if (isCircular) {
|
|
19799
|
+
for (let i2 = 0; i2 < locArray.length; i2 += 2) {
|
|
19800
|
+
const firstFeature = locArray[i2];
|
|
19801
|
+
const secondFeature = locArray[i2 + 1];
|
|
19802
|
+
if (firstFeature.end === sequenceLength - (inclusive1BasedEnd ? 0 : 1) && secondFeature.start === 1 - (inclusive1BasedStart ? 0 : 1)) {
|
|
19803
|
+
locArray[i2] = {
|
|
19804
|
+
start: firstFeature.start,
|
|
19805
|
+
end: secondFeature.end
|
|
19806
|
+
};
|
|
19807
|
+
locArray.splice(i2 + 1, 1);
|
|
19808
|
+
}
|
|
19809
|
+
}
|
|
19810
|
+
}
|
|
19798
19811
|
return locArray;
|
|
19799
19812
|
}
|
|
19800
19813
|
__name(parseFeatureLocation, "parseFeatureLocation");
|
|
@@ -20128,7 +20141,9 @@ var __async = (__this, __arguments, generator) => {
|
|
|
20128
20141
|
line.trim(),
|
|
20129
20142
|
options.isProtein,
|
|
20130
20143
|
inclusive1BasedStart,
|
|
20131
|
-
inclusive1BasedEnd
|
|
20144
|
+
inclusive1BasedEnd,
|
|
20145
|
+
result.parsedSequence.circular,
|
|
20146
|
+
result.parsedSequence.sequence.length
|
|
20132
20147
|
)
|
|
20133
20148
|
);
|
|
20134
20149
|
lastLineWasLocation = true;
|
|
@@ -20161,7 +20176,9 @@ var __async = (__this, __arguments, generator) => {
|
|
|
20161
20176
|
val2,
|
|
20162
20177
|
options.isProtein,
|
|
20163
20178
|
inclusive1BasedStart,
|
|
20164
|
-
inclusive1BasedEnd
|
|
20179
|
+
inclusive1BasedEnd,
|
|
20180
|
+
result.parsedSequence.circular,
|
|
20181
|
+
result.parsedSequence.sequence.length
|
|
20165
20182
|
)
|
|
20166
20183
|
);
|
|
20167
20184
|
lastLineWasLocation = true;
|
package/package.json
CHANGED
package/src/genbankToJson.js
CHANGED
|
@@ -12,7 +12,9 @@ export function parseFeatureLocation(
|
|
|
12
12
|
locStr,
|
|
13
13
|
isProtein,
|
|
14
14
|
inclusive1BasedStart,
|
|
15
|
-
inclusive1BasedEnd
|
|
15
|
+
inclusive1BasedEnd,
|
|
16
|
+
isCircular,
|
|
17
|
+
sequenceLength
|
|
16
18
|
) {
|
|
17
19
|
locStr = locStr.trim();
|
|
18
20
|
const locArr = [];
|
|
@@ -39,6 +41,30 @@ export function parseFeatureLocation(
|
|
|
39
41
|
isProtein ? convertAACaretPositionOrRangeToDna(location) : location
|
|
40
42
|
);
|
|
41
43
|
}
|
|
44
|
+
// In genbank files, origin-spanning features are represented as follows:
|
|
45
|
+
// complement(join(490883..490885,1..879)) (for a circular sequence of length 490885)
|
|
46
|
+
// Then, for locations in locArray we check if there is a feature that ends at sequenceLength
|
|
47
|
+
// joined with a feature that starts at 1. If so, we merge them into a single feature.
|
|
48
|
+
// (see https://github.com/TeselaGen/tg-oss/issues/35)
|
|
49
|
+
|
|
50
|
+
if (isCircular) {
|
|
51
|
+
// Iterate by pairs of features
|
|
52
|
+
for (let i = 0; i < locArray.length; i += 2) {
|
|
53
|
+
const firstFeature = locArray[i];
|
|
54
|
+
const secondFeature = locArray[i + 1];
|
|
55
|
+
if (
|
|
56
|
+
firstFeature.end === sequenceLength - (inclusive1BasedEnd ? 0 : 1) &&
|
|
57
|
+
secondFeature.start === 1 - (inclusive1BasedStart ? 0 : 1)
|
|
58
|
+
) {
|
|
59
|
+
// Merge the two features
|
|
60
|
+
locArray[i] = {
|
|
61
|
+
start: firstFeature.start,
|
|
62
|
+
end: secondFeature.end
|
|
63
|
+
};
|
|
64
|
+
locArray.splice(i + 1, 1);
|
|
65
|
+
}
|
|
66
|
+
}
|
|
67
|
+
}
|
|
42
68
|
return locArray;
|
|
43
69
|
}
|
|
44
70
|
|
|
@@ -468,7 +494,9 @@ function genbankToJson(string, options = {}) {
|
|
|
468
494
|
line.trim(),
|
|
469
495
|
options.isProtein,
|
|
470
496
|
inclusive1BasedStart,
|
|
471
|
-
inclusive1BasedEnd
|
|
497
|
+
inclusive1BasedEnd,
|
|
498
|
+
result.parsedSequence.circular,
|
|
499
|
+
result.parsedSequence.sequence.length
|
|
472
500
|
)
|
|
473
501
|
);
|
|
474
502
|
lastLineWasLocation = true;
|
|
@@ -513,7 +541,9 @@ function genbankToJson(string, options = {}) {
|
|
|
513
541
|
val,
|
|
514
542
|
options.isProtein,
|
|
515
543
|
inclusive1BasedStart,
|
|
516
|
-
inclusive1BasedEnd
|
|
544
|
+
inclusive1BasedEnd,
|
|
545
|
+
result.parsedSequence.circular,
|
|
546
|
+
result.parsedSequence.sequence.length
|
|
517
547
|
)
|
|
518
548
|
);
|
|
519
549
|
lastLineWasLocation = true;
|