@teselagen/sequence-utils 0.3.13 → 0.3.15

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1 +1,3 @@
1
- export default function getAminoAcidStringFromSequenceString(sequenceString: any): string;
1
+ export default function getAminoAcidStringFromSequenceString(sequenceString: any, { doNotExcludeAsterisk }?: {
2
+ doNotExcludeAsterisk: any;
3
+ }): string;
package/index.js CHANGED
@@ -21349,17 +21349,20 @@ function escapeStringRegexp(string) {
21349
21349
  return string.replace(/[|\\{}()[\]^$+*?.]/g, "\\$&").replace(/-/g, "\\x2d");
21350
21350
  }
21351
21351
  __name(escapeStringRegexp, "escapeStringRegexp");
21352
- function getAminoAcidStringFromSequenceString(sequenceString) {
21352
+ function getAminoAcidStringFromSequenceString(sequenceString, { doNotExcludeAsterisk } = {}) {
21353
21353
  const aminoAcidsPerBase = getAminoAcidDataForEachBaseOfDna(
21354
21354
  sequenceString,
21355
21355
  true
21356
21356
  );
21357
21357
  const aaArray = [];
21358
21358
  let aaString = "";
21359
- aminoAcidsPerBase.forEach((aa) => {
21359
+ aminoAcidsPerBase.forEach((aa, index) => {
21360
21360
  if (!aa.fullCodon) {
21361
21361
  return;
21362
21362
  }
21363
+ if (!doNotExcludeAsterisk && index >= aminoAcidsPerBase.length - 3 && aa.aminoAcid.value === "*") {
21364
+ return;
21365
+ }
21363
21366
  aaArray[aa.aminoAcidIndex] = aa.aminoAcid.value;
21364
21367
  });
21365
21368
  aaString = aaArray.join("");
@@ -21414,18 +21417,22 @@ function findSequenceMatchesTopStrand(sequence, searchString, options = {}) {
21414
21417
  if (isProteinSearch) {
21415
21418
  sequencesToCheck = [
21416
21419
  {
21417
- seqToCheck: getAminoAcidStringFromSequenceString(sequenceToUse),
21420
+ seqToCheck: getAminoAcidStringFromSequenceString(sequenceToUse, {
21421
+ doNotExcludeAsterisk: true
21422
+ }),
21418
21423
  offset: 0
21419
21424
  },
21420
21425
  {
21421
21426
  seqToCheck: getAminoAcidStringFromSequenceString(
21422
- sequenceToUse.substr(1)
21427
+ sequenceToUse.substr(1),
21428
+ { doNotExcludeAsterisk: true }
21423
21429
  ),
21424
21430
  offset: 1
21425
21431
  },
21426
21432
  {
21427
21433
  seqToCheck: getAminoAcidStringFromSequenceString(
21428
- sequenceToUse.substr(2)
21434
+ sequenceToUse.substr(2),
21435
+ { doNotExcludeAsterisk: true }
21429
21436
  ),
21430
21437
  offset: 2
21431
21438
  }
@@ -21519,7 +21526,7 @@ function getCodonRangeForAASliver(aminoAcidPositionInSequence, aminoAcidSliver,
21519
21526
  }
21520
21527
  __name(getCodonRangeForAASliver, "getCodonRangeForAASliver");
21521
21528
  function getComplementAminoAcidStringFromSequenceString(sequenceString) {
21522
- const aaString = getAminoAcidStringFromSequenceString(sequenceString);
21529
+ const aaString = getAminoAcidStringFromSequenceString(sequenceString, true);
21523
21530
  return aaString.split("").reverse().join("");
21524
21531
  }
21525
21532
  __name(getComplementAminoAcidStringFromSequenceString, "getComplementAminoAcidStringFromSequenceString");
package/index.mjs CHANGED
@@ -21347,17 +21347,20 @@ function escapeStringRegexp(string) {
21347
21347
  return string.replace(/[|\\{}()[\]^$+*?.]/g, "\\$&").replace(/-/g, "\\x2d");
21348
21348
  }
21349
21349
  __name(escapeStringRegexp, "escapeStringRegexp");
21350
- function getAminoAcidStringFromSequenceString(sequenceString) {
21350
+ function getAminoAcidStringFromSequenceString(sequenceString, { doNotExcludeAsterisk } = {}) {
21351
21351
  const aminoAcidsPerBase = getAminoAcidDataForEachBaseOfDna(
21352
21352
  sequenceString,
21353
21353
  true
21354
21354
  );
21355
21355
  const aaArray = [];
21356
21356
  let aaString = "";
21357
- aminoAcidsPerBase.forEach((aa) => {
21357
+ aminoAcidsPerBase.forEach((aa, index) => {
21358
21358
  if (!aa.fullCodon) {
21359
21359
  return;
21360
21360
  }
21361
+ if (!doNotExcludeAsterisk && index >= aminoAcidsPerBase.length - 3 && aa.aminoAcid.value === "*") {
21362
+ return;
21363
+ }
21361
21364
  aaArray[aa.aminoAcidIndex] = aa.aminoAcid.value;
21362
21365
  });
21363
21366
  aaString = aaArray.join("");
@@ -21412,18 +21415,22 @@ function findSequenceMatchesTopStrand(sequence, searchString, options = {}) {
21412
21415
  if (isProteinSearch) {
21413
21416
  sequencesToCheck = [
21414
21417
  {
21415
- seqToCheck: getAminoAcidStringFromSequenceString(sequenceToUse),
21418
+ seqToCheck: getAminoAcidStringFromSequenceString(sequenceToUse, {
21419
+ doNotExcludeAsterisk: true
21420
+ }),
21416
21421
  offset: 0
21417
21422
  },
21418
21423
  {
21419
21424
  seqToCheck: getAminoAcidStringFromSequenceString(
21420
- sequenceToUse.substr(1)
21425
+ sequenceToUse.substr(1),
21426
+ { doNotExcludeAsterisk: true }
21421
21427
  ),
21422
21428
  offset: 1
21423
21429
  },
21424
21430
  {
21425
21431
  seqToCheck: getAminoAcidStringFromSequenceString(
21426
- sequenceToUse.substr(2)
21432
+ sequenceToUse.substr(2),
21433
+ { doNotExcludeAsterisk: true }
21427
21434
  ),
21428
21435
  offset: 2
21429
21436
  }
@@ -21517,7 +21524,7 @@ function getCodonRangeForAASliver(aminoAcidPositionInSequence, aminoAcidSliver,
21517
21524
  }
21518
21525
  __name(getCodonRangeForAASliver, "getCodonRangeForAASliver");
21519
21526
  function getComplementAminoAcidStringFromSequenceString(sequenceString) {
21520
- const aaString = getAminoAcidStringFromSequenceString(sequenceString);
21527
+ const aaString = getAminoAcidStringFromSequenceString(sequenceString, true);
21521
21528
  return aaString.split("").reverse().join("");
21522
21529
  }
21523
21530
  __name(getComplementAminoAcidStringFromSequenceString, "getComplementAminoAcidStringFromSequenceString");
package/index.umd.js CHANGED
@@ -21351,17 +21351,20 @@ var __name = (target, value) => __defProp(target, "name", { value, configurable:
21351
21351
  return string.replace(/[|\\{}()[\]^$+*?.]/g, "\\$&").replace(/-/g, "\\x2d");
21352
21352
  }
21353
21353
  __name(escapeStringRegexp, "escapeStringRegexp");
21354
- function getAminoAcidStringFromSequenceString(sequenceString) {
21354
+ function getAminoAcidStringFromSequenceString(sequenceString, { doNotExcludeAsterisk } = {}) {
21355
21355
  const aminoAcidsPerBase = getAminoAcidDataForEachBaseOfDna(
21356
21356
  sequenceString,
21357
21357
  true
21358
21358
  );
21359
21359
  const aaArray = [];
21360
21360
  let aaString = "";
21361
- aminoAcidsPerBase.forEach((aa) => {
21361
+ aminoAcidsPerBase.forEach((aa, index) => {
21362
21362
  if (!aa.fullCodon) {
21363
21363
  return;
21364
21364
  }
21365
+ if (!doNotExcludeAsterisk && index >= aminoAcidsPerBase.length - 3 && aa.aminoAcid.value === "*") {
21366
+ return;
21367
+ }
21365
21368
  aaArray[aa.aminoAcidIndex] = aa.aminoAcid.value;
21366
21369
  });
21367
21370
  aaString = aaArray.join("");
@@ -21416,18 +21419,22 @@ var __name = (target, value) => __defProp(target, "name", { value, configurable:
21416
21419
  if (isProteinSearch) {
21417
21420
  sequencesToCheck = [
21418
21421
  {
21419
- seqToCheck: getAminoAcidStringFromSequenceString(sequenceToUse),
21422
+ seqToCheck: getAminoAcidStringFromSequenceString(sequenceToUse, {
21423
+ doNotExcludeAsterisk: true
21424
+ }),
21420
21425
  offset: 0
21421
21426
  },
21422
21427
  {
21423
21428
  seqToCheck: getAminoAcidStringFromSequenceString(
21424
- sequenceToUse.substr(1)
21429
+ sequenceToUse.substr(1),
21430
+ { doNotExcludeAsterisk: true }
21425
21431
  ),
21426
21432
  offset: 1
21427
21433
  },
21428
21434
  {
21429
21435
  seqToCheck: getAminoAcidStringFromSequenceString(
21430
- sequenceToUse.substr(2)
21436
+ sequenceToUse.substr(2),
21437
+ { doNotExcludeAsterisk: true }
21431
21438
  ),
21432
21439
  offset: 2
21433
21440
  }
@@ -21521,7 +21528,7 @@ var __name = (target, value) => __defProp(target, "name", { value, configurable:
21521
21528
  }
21522
21529
  __name(getCodonRangeForAASliver, "getCodonRangeForAASliver");
21523
21530
  function getComplementAminoAcidStringFromSequenceString(sequenceString) {
21524
- const aaString = getAminoAcidStringFromSequenceString(sequenceString);
21531
+ const aaString = getAminoAcidStringFromSequenceString(sequenceString, true);
21525
21532
  return aaString.split("").reverse().join("");
21526
21533
  }
21527
21534
  __name(getComplementAminoAcidStringFromSequenceString, "getComplementAminoAcidStringFromSequenceString");
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@teselagen/sequence-utils",
3
- "version": "0.3.13",
3
+ "version": "0.3.15",
4
4
  "dependencies": {
5
5
  "@teselagen/range-utils": "0.3.7",
6
6
  "bson-objectid": "^2.0.4",
@@ -64,18 +64,22 @@ function findSequenceMatchesTopStrand(sequence, searchString, options = {}) {
64
64
  if (isProteinSearch) {
65
65
  sequencesToCheck = [
66
66
  {
67
- seqToCheck: getAminoAcidStringFromSequenceString(sequenceToUse),
67
+ seqToCheck: getAminoAcidStringFromSequenceString(sequenceToUse, {
68
+ doNotExcludeAsterisk: true
69
+ }),
68
70
  offset: 0
69
71
  },
70
72
  {
71
73
  seqToCheck: getAminoAcidStringFromSequenceString(
72
- sequenceToUse.substr(1)
74
+ sequenceToUse.substr(1),
75
+ { doNotExcludeAsterisk: true }
73
76
  ),
74
77
  offset: 1
75
78
  },
76
79
  {
77
80
  seqToCheck: getAminoAcidStringFromSequenceString(
78
- sequenceToUse.substr(2)
81
+ sequenceToUse.substr(2),
82
+ { doNotExcludeAsterisk: true }
79
83
  ),
80
84
  offset: 2
81
85
  }
@@ -1,7 +1,7 @@
1
1
  import findSequenceMatches from "./findSequenceMatches";
2
2
 
3
3
  describe("findSequenceMatches", () => {
4
- it("ambiguous protein sequence with * as stop codon", () => {
4
+ it('ambiguous protein sequence with asterisk as stop codon', () => {
5
5
  expect(
6
6
  findSequenceMatches("mmhlrl*", "Mxxlrl*", {
7
7
  isAmbiguous: true,
@@ -29,7 +29,7 @@ describe("findSequenceMatches", () => {
29
29
  }
30
30
  ]);
31
31
  });
32
- it("protein sequence with * as stop codon", () => {
32
+ it('protein sequence with asterisk as stop codon', () => {
33
33
  expect(
34
34
  findSequenceMatches("mmhlrl*", "mMh", {
35
35
  isProteinSequence: true /* isProteinSearch: true */
@@ -132,11 +132,11 @@ describe("findSequenceMatches", () => {
132
132
  const matches = findSequenceMatches("atg", "*", { isAmbiguous: true });
133
133
  expect(matches).toEqual([]);
134
134
  });
135
- it("ambiguous, dna searches with *", () => {
135
+ it('ambiguous, dna searches with asterisk', () => {
136
136
  const matches = findSequenceMatches("atg", "", { isAmbiguous: true });
137
137
  expect(matches).toEqual([]);
138
138
  });
139
- it(" AA with * as stop codon", () => {
139
+ it('AA with asterisk as stop codon in atgtaa', () => {
140
140
  expect(
141
141
  findSequenceMatches("atgtaa", "M*", { isProteinSearch: true })
142
142
  ).toEqual([
@@ -146,7 +146,7 @@ describe("findSequenceMatches", () => {
146
146
  }
147
147
  ]);
148
148
  });
149
- it(" AA with * as stop codon", () => {
149
+ it('AA with asterisk as stop codon in atgtaaccc', () => {
150
150
  expect(
151
151
  findSequenceMatches("atgtaaccc", "M**", { isProteinSearch: true })
152
152
  ).toEqual([]);
@@ -164,7 +164,7 @@ describe("findSequenceMatches", () => {
164
164
  }
165
165
  ]);
166
166
  });
167
- it("works with ambiguous AA with * in search string", () => {
167
+ it('works with ambiguous AA with asterisk in search string', () => {
168
168
  expect(
169
169
  findSequenceMatches("atgtaa", "M*", {
170
170
  isProteinSearch: true,
@@ -1,16 +1,20 @@
1
1
  import getAminoAcidDataForEachBaseOfDna from "./getAminoAcidDataForEachBaseOfDna";
2
2
 
3
- export default function getAminoAcidStringFromSequenceString(sequenceString) {
3
+ export default function getAminoAcidStringFromSequenceString(sequenceString, { doNotExcludeAsterisk } = {}) {
4
4
  const aminoAcidsPerBase = getAminoAcidDataForEachBaseOfDna(
5
5
  sequenceString,
6
6
  true
7
7
  );
8
8
  const aaArray = [];
9
9
  let aaString = "";
10
- aminoAcidsPerBase.forEach(aa => {
10
+ aminoAcidsPerBase.forEach((aa, index) => {
11
11
  if (!aa.fullCodon) {
12
12
  return;
13
13
  }
14
+ // Check if the current amino acid is the last in the sequence and is a stop codon
15
+ if (!doNotExcludeAsterisk && index >= aminoAcidsPerBase.length - 3 && aa.aminoAcid.value === '*') {
16
+ return;
17
+ }
14
18
  aaArray[aa.aminoAcidIndex] = aa.aminoAcid.value;
15
19
  });
16
20
  aaString = aaArray.join("");
@@ -14,5 +14,6 @@ describe("getAminoAcidStringFromSequenceString", () => {
14
14
  assert.equal("MM", getAminoAcidStringFromSequenceString("atgatg"));
15
15
  assert.equal("M--", getAminoAcidStringFromSequenceString("atg------"));
16
16
  assert.equal("", getAminoAcidStringFromSequenceString("at"));
17
+ assert.equal("MTNYNQKNEN", getAminoAcidStringFromSequenceString("atgactaattataatcaaaaaaatgaaaattaa"));
17
18
  });
18
19
  });