@teselagen/sequence-utils 0.3.13 → 0.3.15
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/getAminoAcidStringFromSequenceString.d.ts +3 -1
- package/index.js +13 -6
- package/index.mjs +13 -6
- package/index.umd.js +13 -6
- package/package.json +1 -1
- package/src/findSequenceMatches.js +7 -3
- package/src/findSequenceMatches.test.js +6 -6
- package/src/getAminoAcidStringFromSequenceString.js +6 -2
- package/src/getAminoAcidStringFromSequenceString.test.js +1 -0
package/index.js
CHANGED
|
@@ -21349,17 +21349,20 @@ function escapeStringRegexp(string) {
|
|
|
21349
21349
|
return string.replace(/[|\\{}()[\]^$+*?.]/g, "\\$&").replace(/-/g, "\\x2d");
|
|
21350
21350
|
}
|
|
21351
21351
|
__name(escapeStringRegexp, "escapeStringRegexp");
|
|
21352
|
-
function getAminoAcidStringFromSequenceString(sequenceString) {
|
|
21352
|
+
function getAminoAcidStringFromSequenceString(sequenceString, { doNotExcludeAsterisk } = {}) {
|
|
21353
21353
|
const aminoAcidsPerBase = getAminoAcidDataForEachBaseOfDna(
|
|
21354
21354
|
sequenceString,
|
|
21355
21355
|
true
|
|
21356
21356
|
);
|
|
21357
21357
|
const aaArray = [];
|
|
21358
21358
|
let aaString = "";
|
|
21359
|
-
aminoAcidsPerBase.forEach((aa) => {
|
|
21359
|
+
aminoAcidsPerBase.forEach((aa, index) => {
|
|
21360
21360
|
if (!aa.fullCodon) {
|
|
21361
21361
|
return;
|
|
21362
21362
|
}
|
|
21363
|
+
if (!doNotExcludeAsterisk && index >= aminoAcidsPerBase.length - 3 && aa.aminoAcid.value === "*") {
|
|
21364
|
+
return;
|
|
21365
|
+
}
|
|
21363
21366
|
aaArray[aa.aminoAcidIndex] = aa.aminoAcid.value;
|
|
21364
21367
|
});
|
|
21365
21368
|
aaString = aaArray.join("");
|
|
@@ -21414,18 +21417,22 @@ function findSequenceMatchesTopStrand(sequence, searchString, options = {}) {
|
|
|
21414
21417
|
if (isProteinSearch) {
|
|
21415
21418
|
sequencesToCheck = [
|
|
21416
21419
|
{
|
|
21417
|
-
seqToCheck: getAminoAcidStringFromSequenceString(sequenceToUse
|
|
21420
|
+
seqToCheck: getAminoAcidStringFromSequenceString(sequenceToUse, {
|
|
21421
|
+
doNotExcludeAsterisk: true
|
|
21422
|
+
}),
|
|
21418
21423
|
offset: 0
|
|
21419
21424
|
},
|
|
21420
21425
|
{
|
|
21421
21426
|
seqToCheck: getAminoAcidStringFromSequenceString(
|
|
21422
|
-
sequenceToUse.substr(1)
|
|
21427
|
+
sequenceToUse.substr(1),
|
|
21428
|
+
{ doNotExcludeAsterisk: true }
|
|
21423
21429
|
),
|
|
21424
21430
|
offset: 1
|
|
21425
21431
|
},
|
|
21426
21432
|
{
|
|
21427
21433
|
seqToCheck: getAminoAcidStringFromSequenceString(
|
|
21428
|
-
sequenceToUse.substr(2)
|
|
21434
|
+
sequenceToUse.substr(2),
|
|
21435
|
+
{ doNotExcludeAsterisk: true }
|
|
21429
21436
|
),
|
|
21430
21437
|
offset: 2
|
|
21431
21438
|
}
|
|
@@ -21519,7 +21526,7 @@ function getCodonRangeForAASliver(aminoAcidPositionInSequence, aminoAcidSliver,
|
|
|
21519
21526
|
}
|
|
21520
21527
|
__name(getCodonRangeForAASliver, "getCodonRangeForAASliver");
|
|
21521
21528
|
function getComplementAminoAcidStringFromSequenceString(sequenceString) {
|
|
21522
|
-
const aaString = getAminoAcidStringFromSequenceString(sequenceString);
|
|
21529
|
+
const aaString = getAminoAcidStringFromSequenceString(sequenceString, true);
|
|
21523
21530
|
return aaString.split("").reverse().join("");
|
|
21524
21531
|
}
|
|
21525
21532
|
__name(getComplementAminoAcidStringFromSequenceString, "getComplementAminoAcidStringFromSequenceString");
|
package/index.mjs
CHANGED
|
@@ -21347,17 +21347,20 @@ function escapeStringRegexp(string) {
|
|
|
21347
21347
|
return string.replace(/[|\\{}()[\]^$+*?.]/g, "\\$&").replace(/-/g, "\\x2d");
|
|
21348
21348
|
}
|
|
21349
21349
|
__name(escapeStringRegexp, "escapeStringRegexp");
|
|
21350
|
-
function getAminoAcidStringFromSequenceString(sequenceString) {
|
|
21350
|
+
function getAminoAcidStringFromSequenceString(sequenceString, { doNotExcludeAsterisk } = {}) {
|
|
21351
21351
|
const aminoAcidsPerBase = getAminoAcidDataForEachBaseOfDna(
|
|
21352
21352
|
sequenceString,
|
|
21353
21353
|
true
|
|
21354
21354
|
);
|
|
21355
21355
|
const aaArray = [];
|
|
21356
21356
|
let aaString = "";
|
|
21357
|
-
aminoAcidsPerBase.forEach((aa) => {
|
|
21357
|
+
aminoAcidsPerBase.forEach((aa, index) => {
|
|
21358
21358
|
if (!aa.fullCodon) {
|
|
21359
21359
|
return;
|
|
21360
21360
|
}
|
|
21361
|
+
if (!doNotExcludeAsterisk && index >= aminoAcidsPerBase.length - 3 && aa.aminoAcid.value === "*") {
|
|
21362
|
+
return;
|
|
21363
|
+
}
|
|
21361
21364
|
aaArray[aa.aminoAcidIndex] = aa.aminoAcid.value;
|
|
21362
21365
|
});
|
|
21363
21366
|
aaString = aaArray.join("");
|
|
@@ -21412,18 +21415,22 @@ function findSequenceMatchesTopStrand(sequence, searchString, options = {}) {
|
|
|
21412
21415
|
if (isProteinSearch) {
|
|
21413
21416
|
sequencesToCheck = [
|
|
21414
21417
|
{
|
|
21415
|
-
seqToCheck: getAminoAcidStringFromSequenceString(sequenceToUse
|
|
21418
|
+
seqToCheck: getAminoAcidStringFromSequenceString(sequenceToUse, {
|
|
21419
|
+
doNotExcludeAsterisk: true
|
|
21420
|
+
}),
|
|
21416
21421
|
offset: 0
|
|
21417
21422
|
},
|
|
21418
21423
|
{
|
|
21419
21424
|
seqToCheck: getAminoAcidStringFromSequenceString(
|
|
21420
|
-
sequenceToUse.substr(1)
|
|
21425
|
+
sequenceToUse.substr(1),
|
|
21426
|
+
{ doNotExcludeAsterisk: true }
|
|
21421
21427
|
),
|
|
21422
21428
|
offset: 1
|
|
21423
21429
|
},
|
|
21424
21430
|
{
|
|
21425
21431
|
seqToCheck: getAminoAcidStringFromSequenceString(
|
|
21426
|
-
sequenceToUse.substr(2)
|
|
21432
|
+
sequenceToUse.substr(2),
|
|
21433
|
+
{ doNotExcludeAsterisk: true }
|
|
21427
21434
|
),
|
|
21428
21435
|
offset: 2
|
|
21429
21436
|
}
|
|
@@ -21517,7 +21524,7 @@ function getCodonRangeForAASliver(aminoAcidPositionInSequence, aminoAcidSliver,
|
|
|
21517
21524
|
}
|
|
21518
21525
|
__name(getCodonRangeForAASliver, "getCodonRangeForAASliver");
|
|
21519
21526
|
function getComplementAminoAcidStringFromSequenceString(sequenceString) {
|
|
21520
|
-
const aaString = getAminoAcidStringFromSequenceString(sequenceString);
|
|
21527
|
+
const aaString = getAminoAcidStringFromSequenceString(sequenceString, true);
|
|
21521
21528
|
return aaString.split("").reverse().join("");
|
|
21522
21529
|
}
|
|
21523
21530
|
__name(getComplementAminoAcidStringFromSequenceString, "getComplementAminoAcidStringFromSequenceString");
|
package/index.umd.js
CHANGED
|
@@ -21351,17 +21351,20 @@ var __name = (target, value) => __defProp(target, "name", { value, configurable:
|
|
|
21351
21351
|
return string.replace(/[|\\{}()[\]^$+*?.]/g, "\\$&").replace(/-/g, "\\x2d");
|
|
21352
21352
|
}
|
|
21353
21353
|
__name(escapeStringRegexp, "escapeStringRegexp");
|
|
21354
|
-
function getAminoAcidStringFromSequenceString(sequenceString) {
|
|
21354
|
+
function getAminoAcidStringFromSequenceString(sequenceString, { doNotExcludeAsterisk } = {}) {
|
|
21355
21355
|
const aminoAcidsPerBase = getAminoAcidDataForEachBaseOfDna(
|
|
21356
21356
|
sequenceString,
|
|
21357
21357
|
true
|
|
21358
21358
|
);
|
|
21359
21359
|
const aaArray = [];
|
|
21360
21360
|
let aaString = "";
|
|
21361
|
-
aminoAcidsPerBase.forEach((aa) => {
|
|
21361
|
+
aminoAcidsPerBase.forEach((aa, index) => {
|
|
21362
21362
|
if (!aa.fullCodon) {
|
|
21363
21363
|
return;
|
|
21364
21364
|
}
|
|
21365
|
+
if (!doNotExcludeAsterisk && index >= aminoAcidsPerBase.length - 3 && aa.aminoAcid.value === "*") {
|
|
21366
|
+
return;
|
|
21367
|
+
}
|
|
21365
21368
|
aaArray[aa.aminoAcidIndex] = aa.aminoAcid.value;
|
|
21366
21369
|
});
|
|
21367
21370
|
aaString = aaArray.join("");
|
|
@@ -21416,18 +21419,22 @@ var __name = (target, value) => __defProp(target, "name", { value, configurable:
|
|
|
21416
21419
|
if (isProteinSearch) {
|
|
21417
21420
|
sequencesToCheck = [
|
|
21418
21421
|
{
|
|
21419
|
-
seqToCheck: getAminoAcidStringFromSequenceString(sequenceToUse
|
|
21422
|
+
seqToCheck: getAminoAcidStringFromSequenceString(sequenceToUse, {
|
|
21423
|
+
doNotExcludeAsterisk: true
|
|
21424
|
+
}),
|
|
21420
21425
|
offset: 0
|
|
21421
21426
|
},
|
|
21422
21427
|
{
|
|
21423
21428
|
seqToCheck: getAminoAcidStringFromSequenceString(
|
|
21424
|
-
sequenceToUse.substr(1)
|
|
21429
|
+
sequenceToUse.substr(1),
|
|
21430
|
+
{ doNotExcludeAsterisk: true }
|
|
21425
21431
|
),
|
|
21426
21432
|
offset: 1
|
|
21427
21433
|
},
|
|
21428
21434
|
{
|
|
21429
21435
|
seqToCheck: getAminoAcidStringFromSequenceString(
|
|
21430
|
-
sequenceToUse.substr(2)
|
|
21436
|
+
sequenceToUse.substr(2),
|
|
21437
|
+
{ doNotExcludeAsterisk: true }
|
|
21431
21438
|
),
|
|
21432
21439
|
offset: 2
|
|
21433
21440
|
}
|
|
@@ -21521,7 +21528,7 @@ var __name = (target, value) => __defProp(target, "name", { value, configurable:
|
|
|
21521
21528
|
}
|
|
21522
21529
|
__name(getCodonRangeForAASliver, "getCodonRangeForAASliver");
|
|
21523
21530
|
function getComplementAminoAcidStringFromSequenceString(sequenceString) {
|
|
21524
|
-
const aaString = getAminoAcidStringFromSequenceString(sequenceString);
|
|
21531
|
+
const aaString = getAminoAcidStringFromSequenceString(sequenceString, true);
|
|
21525
21532
|
return aaString.split("").reverse().join("");
|
|
21526
21533
|
}
|
|
21527
21534
|
__name(getComplementAminoAcidStringFromSequenceString, "getComplementAminoAcidStringFromSequenceString");
|
package/package.json
CHANGED
|
@@ -64,18 +64,22 @@ function findSequenceMatchesTopStrand(sequence, searchString, options = {}) {
|
|
|
64
64
|
if (isProteinSearch) {
|
|
65
65
|
sequencesToCheck = [
|
|
66
66
|
{
|
|
67
|
-
seqToCheck: getAminoAcidStringFromSequenceString(sequenceToUse
|
|
67
|
+
seqToCheck: getAminoAcidStringFromSequenceString(sequenceToUse, {
|
|
68
|
+
doNotExcludeAsterisk: true
|
|
69
|
+
}),
|
|
68
70
|
offset: 0
|
|
69
71
|
},
|
|
70
72
|
{
|
|
71
73
|
seqToCheck: getAminoAcidStringFromSequenceString(
|
|
72
|
-
sequenceToUse.substr(1)
|
|
74
|
+
sequenceToUse.substr(1),
|
|
75
|
+
{ doNotExcludeAsterisk: true }
|
|
73
76
|
),
|
|
74
77
|
offset: 1
|
|
75
78
|
},
|
|
76
79
|
{
|
|
77
80
|
seqToCheck: getAminoAcidStringFromSequenceString(
|
|
78
|
-
sequenceToUse.substr(2)
|
|
81
|
+
sequenceToUse.substr(2),
|
|
82
|
+
{ doNotExcludeAsterisk: true }
|
|
79
83
|
),
|
|
80
84
|
offset: 2
|
|
81
85
|
}
|
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
import findSequenceMatches from "./findSequenceMatches";
|
|
2
2
|
|
|
3
3
|
describe("findSequenceMatches", () => {
|
|
4
|
-
it(
|
|
4
|
+
it('ambiguous protein sequence with asterisk as stop codon', () => {
|
|
5
5
|
expect(
|
|
6
6
|
findSequenceMatches("mmhlrl*", "Mxxlrl*", {
|
|
7
7
|
isAmbiguous: true,
|
|
@@ -29,7 +29,7 @@ describe("findSequenceMatches", () => {
|
|
|
29
29
|
}
|
|
30
30
|
]);
|
|
31
31
|
});
|
|
32
|
-
it(
|
|
32
|
+
it('protein sequence with asterisk as stop codon', () => {
|
|
33
33
|
expect(
|
|
34
34
|
findSequenceMatches("mmhlrl*", "mMh", {
|
|
35
35
|
isProteinSequence: true /* isProteinSearch: true */
|
|
@@ -132,11 +132,11 @@ describe("findSequenceMatches", () => {
|
|
|
132
132
|
const matches = findSequenceMatches("atg", "*", { isAmbiguous: true });
|
|
133
133
|
expect(matches).toEqual([]);
|
|
134
134
|
});
|
|
135
|
-
it(
|
|
135
|
+
it('ambiguous, dna searches with asterisk', () => {
|
|
136
136
|
const matches = findSequenceMatches("atg", "", { isAmbiguous: true });
|
|
137
137
|
expect(matches).toEqual([]);
|
|
138
138
|
});
|
|
139
|
-
it(
|
|
139
|
+
it('AA with asterisk as stop codon in atgtaa', () => {
|
|
140
140
|
expect(
|
|
141
141
|
findSequenceMatches("atgtaa", "M*", { isProteinSearch: true })
|
|
142
142
|
).toEqual([
|
|
@@ -146,7 +146,7 @@ describe("findSequenceMatches", () => {
|
|
|
146
146
|
}
|
|
147
147
|
]);
|
|
148
148
|
});
|
|
149
|
-
it(
|
|
149
|
+
it('AA with asterisk as stop codon in atgtaaccc', () => {
|
|
150
150
|
expect(
|
|
151
151
|
findSequenceMatches("atgtaaccc", "M**", { isProteinSearch: true })
|
|
152
152
|
).toEqual([]);
|
|
@@ -164,7 +164,7 @@ describe("findSequenceMatches", () => {
|
|
|
164
164
|
}
|
|
165
165
|
]);
|
|
166
166
|
});
|
|
167
|
-
it(
|
|
167
|
+
it('works with ambiguous AA with asterisk in search string', () => {
|
|
168
168
|
expect(
|
|
169
169
|
findSequenceMatches("atgtaa", "M*", {
|
|
170
170
|
isProteinSearch: true,
|
|
@@ -1,16 +1,20 @@
|
|
|
1
1
|
import getAminoAcidDataForEachBaseOfDna from "./getAminoAcidDataForEachBaseOfDna";
|
|
2
2
|
|
|
3
|
-
export default function getAminoAcidStringFromSequenceString(sequenceString) {
|
|
3
|
+
export default function getAminoAcidStringFromSequenceString(sequenceString, { doNotExcludeAsterisk } = {}) {
|
|
4
4
|
const aminoAcidsPerBase = getAminoAcidDataForEachBaseOfDna(
|
|
5
5
|
sequenceString,
|
|
6
6
|
true
|
|
7
7
|
);
|
|
8
8
|
const aaArray = [];
|
|
9
9
|
let aaString = "";
|
|
10
|
-
aminoAcidsPerBase.forEach(aa => {
|
|
10
|
+
aminoAcidsPerBase.forEach((aa, index) => {
|
|
11
11
|
if (!aa.fullCodon) {
|
|
12
12
|
return;
|
|
13
13
|
}
|
|
14
|
+
// Check if the current amino acid is the last in the sequence and is a stop codon
|
|
15
|
+
if (!doNotExcludeAsterisk && index >= aminoAcidsPerBase.length - 3 && aa.aminoAcid.value === '*') {
|
|
16
|
+
return;
|
|
17
|
+
}
|
|
14
18
|
aaArray[aa.aminoAcidIndex] = aa.aminoAcid.value;
|
|
15
19
|
});
|
|
16
20
|
aaString = aaArray.join("");
|
|
@@ -14,5 +14,6 @@ describe("getAminoAcidStringFromSequenceString", () => {
|
|
|
14
14
|
assert.equal("MM", getAminoAcidStringFromSequenceString("atgatg"));
|
|
15
15
|
assert.equal("M--", getAminoAcidStringFromSequenceString("atg------"));
|
|
16
16
|
assert.equal("", getAminoAcidStringFromSequenceString("at"));
|
|
17
|
+
assert.equal("MTNYNQKNEN", getAminoAcidStringFromSequenceString("atgactaattataatcaaaaaaatgaaaattaa"));
|
|
17
18
|
});
|
|
18
19
|
});
|