@teselagen/sequence-utils 0.3.9 → 0.3.11

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/bioData.d.ts CHANGED
@@ -1,6 +1,6 @@
1
1
  export const protein_letters: "ACDEFGHIKLMNPQRSTVWY";
2
2
  export const protein_letters_withUandX: "ACDEFGHIKLMNPQRSTVWYUX";
3
- export const extended_protein_letters: "ACDEFGHIKLMNPQRSTVWYBXZJUO.*-";
3
+ export const extended_protein_letters: "ACDEFGHIKLMNPQRSTVWYBXZJUO";
4
4
  export const ambiguous_dna_letters: "GATCRYWSMKHBVDN";
5
5
  export const unambiguous_dna_letters: "GATC";
6
6
  export const ambiguous_rna_letters: "GAUCRYWSMKHBVDN";
@@ -1,18 +1,16 @@
1
- export default function filterSequenceString(sequenceString: any, { additionalValidChars, isOligo, name, isProtein, isRna, isMixedRnaAndDna, includeStopCodon }?: {
1
+ export default function filterSequenceString(sequenceString: any, { additionalValidChars, isOligo, name, isProtein, isRna, isMixedRnaAndDna }?: {
2
2
  additionalValidChars?: string | undefined;
3
3
  isOligo: any;
4
4
  name: any;
5
5
  isProtein: any;
6
6
  isRna: any;
7
7
  isMixedRnaAndDna: any;
8
- includeStopCodon: any;
9
8
  }): (string | string[])[];
10
- export function getAcceptedChars({ isOligo, isProtein, isRna, isMixedRnaAndDna, includeStopCodon }?: {
9
+ export function getAcceptedChars({ isOligo, isProtein, isRna, isMixedRnaAndDna }?: {
11
10
  isOligo: any;
12
11
  isProtein: any;
13
12
  isRna: any;
14
13
  isMixedRnaAndDna: any;
15
- includeStopCodon: any;
16
14
  }): string;
17
15
  export function getReplaceChars({ isOligo, isProtein, isRna, isMixedRnaAndDna }?: {
18
16
  isOligo: any;
package/index.js CHANGED
@@ -6000,7 +6000,7 @@ lodash.exports;
6000
6000
  var lodashExports = lodash.exports;
6001
6001
  const protein_letters = "ACDEFGHIKLMNPQRSTVWY";
6002
6002
  const protein_letters_withUandX = "ACDEFGHIKLMNPQRSTVWYUX";
6003
- const extended_protein_letters = "ACDEFGHIKLMNPQRSTVWYBXZJUO.*-";
6003
+ const extended_protein_letters = "ACDEFGHIKLMNPQRSTVWYBXZJUO";
6004
6004
  const ambiguous_dna_letters = "GATCRYWSMKHBVDN";
6005
6005
  const unambiguous_dna_letters = "GATC";
6006
6006
  const ambiguous_rna_letters = "GAUCRYWSMKHBVDN";
@@ -6056,7 +6056,7 @@ const extended_protein_values = {
6056
6056
  Y: "Y",
6057
6057
  Z: "QE",
6058
6058
  "*": "\\*\\.",
6059
- ".": "\\.\\.",
6059
+ ".": "\\.",
6060
6060
  "-": "\\-"
6061
6061
  };
6062
6062
  const bioData = /* @__PURE__ */ Object.freeze(/* @__PURE__ */ Object.defineProperty({
@@ -12329,15 +12329,13 @@ function filterSequenceString(sequenceString, {
12329
12329
  name,
12330
12330
  isProtein,
12331
12331
  isRna,
12332
- isMixedRnaAndDna,
12333
- includeStopCodon
12332
+ isMixedRnaAndDna
12334
12333
  } = {}) {
12335
12334
  const acceptedChars = getAcceptedChars({
12336
12335
  isOligo,
12337
12336
  isProtein,
12338
12337
  isRna,
12339
- isMixedRnaAndDna,
12340
- includeStopCodon
12338
+ isMixedRnaAndDna
12341
12339
  });
12342
12340
  const replaceChars = getReplaceChars({
12343
12341
  isOligo,
@@ -12387,10 +12385,9 @@ function getAcceptedChars({
12387
12385
  isOligo,
12388
12386
  isProtein,
12389
12387
  isRna,
12390
- isMixedRnaAndDna,
12391
- includeStopCodon
12388
+ isMixedRnaAndDna
12392
12389
  } = {}) {
12393
- return isProtein ? `${protein_letters_withUandX.toLowerCase()}${includeStopCodon ? "*." : ""}}` : isOligo ? ambiguous_rna_letters.toLowerCase() + "t" : isRna ? ambiguous_rna_letters.toLowerCase() + "t" : isMixedRnaAndDna ? ambiguous_rna_letters.toLowerCase() + ambiguous_dna_letters.toLowerCase() : (
12390
+ return isProtein ? `${extended_protein_letters.toLowerCase()}}` : isOligo ? ambiguous_rna_letters.toLowerCase() + "t" : isRna ? ambiguous_rna_letters.toLowerCase() + "t" : isMixedRnaAndDna ? ambiguous_rna_letters.toLowerCase() + ambiguous_dna_letters.toLowerCase() : (
12394
12391
  //just plain old dna
12395
12392
  ambiguous_rna_letters.toLowerCase() + ambiguous_dna_letters.toLowerCase()
12396
12393
  );
@@ -12402,9 +12399,12 @@ function getReplaceChars({
12402
12399
  isRna,
12403
12400
  isMixedRnaAndDna
12404
12401
  } = {}) {
12405
- return isProtein ? {} : isOligo ? {} : isRna ? { t: "u" } : isMixedRnaAndDna ? {} : (
12406
- //just plain old dna
12407
- {}
12402
+ return isProtein ? {} : (
12403
+ // {".": "*"}
12404
+ isOligo ? {} : isRna ? { t: "u" } : isMixedRnaAndDna ? {} : (
12405
+ //just plain old dna
12406
+ {}
12407
+ )
12408
12408
  );
12409
12409
  }
12410
12410
  __name(getReplaceChars, "getReplaceChars");
@@ -12544,7 +12544,7 @@ function tidyUpSequenceData(pSeqData, options = {}) {
12544
12544
  const {
12545
12545
  annotationsAsObjects,
12546
12546
  logMessages,
12547
- removeUnwantedChars,
12547
+ doNotRemoveInvalidChars,
12548
12548
  additionalValidChars,
12549
12549
  noTranslationData,
12550
12550
  doNotProvideIdsForAnnotations,
@@ -12578,11 +12578,9 @@ function tidyUpSequenceData(pSeqData, options = {}) {
12578
12578
  if (seqData.isRna) {
12579
12579
  seqData.sequence = seqData.sequence.replace(/t/gi, "u");
12580
12580
  }
12581
- if (removeUnwantedChars) {
12581
+ if (!doNotRemoveInvalidChars) {
12582
12582
  if (seqData.isProtein) {
12583
- const [newSeq] = filterSequenceString(seqData.proteinSequence, __spreadValues({
12584
- includeStopCodon: true
12585
- }, topLevelSeqData || seqData));
12583
+ const [newSeq] = filterSequenceString(seqData.proteinSequence, __spreadValues({}, topLevelSeqData || seqData));
12586
12584
  seqData.proteinSequence = newSeq;
12587
12585
  } else {
12588
12586
  const [newSeq] = filterSequenceString(seqData.sequence, __spreadValues({
package/index.mjs CHANGED
@@ -5998,7 +5998,7 @@ lodash.exports;
5998
5998
  var lodashExports = lodash.exports;
5999
5999
  const protein_letters = "ACDEFGHIKLMNPQRSTVWY";
6000
6000
  const protein_letters_withUandX = "ACDEFGHIKLMNPQRSTVWYUX";
6001
- const extended_protein_letters = "ACDEFGHIKLMNPQRSTVWYBXZJUO.*-";
6001
+ const extended_protein_letters = "ACDEFGHIKLMNPQRSTVWYBXZJUO";
6002
6002
  const ambiguous_dna_letters = "GATCRYWSMKHBVDN";
6003
6003
  const unambiguous_dna_letters = "GATC";
6004
6004
  const ambiguous_rna_letters = "GAUCRYWSMKHBVDN";
@@ -6054,7 +6054,7 @@ const extended_protein_values = {
6054
6054
  Y: "Y",
6055
6055
  Z: "QE",
6056
6056
  "*": "\\*\\.",
6057
- ".": "\\.\\.",
6057
+ ".": "\\.",
6058
6058
  "-": "\\-"
6059
6059
  };
6060
6060
  const bioData = /* @__PURE__ */ Object.freeze(/* @__PURE__ */ Object.defineProperty({
@@ -12327,15 +12327,13 @@ function filterSequenceString(sequenceString, {
12327
12327
  name,
12328
12328
  isProtein,
12329
12329
  isRna,
12330
- isMixedRnaAndDna,
12331
- includeStopCodon
12330
+ isMixedRnaAndDna
12332
12331
  } = {}) {
12333
12332
  const acceptedChars = getAcceptedChars({
12334
12333
  isOligo,
12335
12334
  isProtein,
12336
12335
  isRna,
12337
- isMixedRnaAndDna,
12338
- includeStopCodon
12336
+ isMixedRnaAndDna
12339
12337
  });
12340
12338
  const replaceChars = getReplaceChars({
12341
12339
  isOligo,
@@ -12385,10 +12383,9 @@ function getAcceptedChars({
12385
12383
  isOligo,
12386
12384
  isProtein,
12387
12385
  isRna,
12388
- isMixedRnaAndDna,
12389
- includeStopCodon
12386
+ isMixedRnaAndDna
12390
12387
  } = {}) {
12391
- return isProtein ? `${protein_letters_withUandX.toLowerCase()}${includeStopCodon ? "*." : ""}}` : isOligo ? ambiguous_rna_letters.toLowerCase() + "t" : isRna ? ambiguous_rna_letters.toLowerCase() + "t" : isMixedRnaAndDna ? ambiguous_rna_letters.toLowerCase() + ambiguous_dna_letters.toLowerCase() : (
12388
+ return isProtein ? `${extended_protein_letters.toLowerCase()}}` : isOligo ? ambiguous_rna_letters.toLowerCase() + "t" : isRna ? ambiguous_rna_letters.toLowerCase() + "t" : isMixedRnaAndDna ? ambiguous_rna_letters.toLowerCase() + ambiguous_dna_letters.toLowerCase() : (
12392
12389
  //just plain old dna
12393
12390
  ambiguous_rna_letters.toLowerCase() + ambiguous_dna_letters.toLowerCase()
12394
12391
  );
@@ -12400,9 +12397,12 @@ function getReplaceChars({
12400
12397
  isRna,
12401
12398
  isMixedRnaAndDna
12402
12399
  } = {}) {
12403
- return isProtein ? {} : isOligo ? {} : isRna ? { t: "u" } : isMixedRnaAndDna ? {} : (
12404
- //just plain old dna
12405
- {}
12400
+ return isProtein ? {} : (
12401
+ // {".": "*"}
12402
+ isOligo ? {} : isRna ? { t: "u" } : isMixedRnaAndDna ? {} : (
12403
+ //just plain old dna
12404
+ {}
12405
+ )
12406
12406
  );
12407
12407
  }
12408
12408
  __name(getReplaceChars, "getReplaceChars");
@@ -12542,7 +12542,7 @@ function tidyUpSequenceData(pSeqData, options = {}) {
12542
12542
  const {
12543
12543
  annotationsAsObjects,
12544
12544
  logMessages,
12545
- removeUnwantedChars,
12545
+ doNotRemoveInvalidChars,
12546
12546
  additionalValidChars,
12547
12547
  noTranslationData,
12548
12548
  doNotProvideIdsForAnnotations,
@@ -12576,11 +12576,9 @@ function tidyUpSequenceData(pSeqData, options = {}) {
12576
12576
  if (seqData.isRna) {
12577
12577
  seqData.sequence = seqData.sequence.replace(/t/gi, "u");
12578
12578
  }
12579
- if (removeUnwantedChars) {
12579
+ if (!doNotRemoveInvalidChars) {
12580
12580
  if (seqData.isProtein) {
12581
- const [newSeq] = filterSequenceString(seqData.proteinSequence, __spreadValues({
12582
- includeStopCodon: true
12583
- }, topLevelSeqData || seqData));
12581
+ const [newSeq] = filterSequenceString(seqData.proteinSequence, __spreadValues({}, topLevelSeqData || seqData));
12584
12582
  seqData.proteinSequence = newSeq;
12585
12583
  } else {
12586
12584
  const [newSeq] = filterSequenceString(seqData.sequence, __spreadValues({
package/index.umd.js CHANGED
@@ -6002,7 +6002,7 @@ var __name = (target, value) => __defProp(target, "name", { value, configurable:
6002
6002
  var lodashExports = lodash.exports;
6003
6003
  const protein_letters = "ACDEFGHIKLMNPQRSTVWY";
6004
6004
  const protein_letters_withUandX = "ACDEFGHIKLMNPQRSTVWYUX";
6005
- const extended_protein_letters = "ACDEFGHIKLMNPQRSTVWYBXZJUO.*-";
6005
+ const extended_protein_letters = "ACDEFGHIKLMNPQRSTVWYBXZJUO";
6006
6006
  const ambiguous_dna_letters = "GATCRYWSMKHBVDN";
6007
6007
  const unambiguous_dna_letters = "GATC";
6008
6008
  const ambiguous_rna_letters = "GAUCRYWSMKHBVDN";
@@ -6058,7 +6058,7 @@ var __name = (target, value) => __defProp(target, "name", { value, configurable:
6058
6058
  Y: "Y",
6059
6059
  Z: "QE",
6060
6060
  "*": "\\*\\.",
6061
- ".": "\\.\\.",
6061
+ ".": "\\.",
6062
6062
  "-": "\\-"
6063
6063
  };
6064
6064
  const bioData = /* @__PURE__ */ Object.freeze(/* @__PURE__ */ Object.defineProperty({
@@ -12331,15 +12331,13 @@ var __name = (target, value) => __defProp(target, "name", { value, configurable:
12331
12331
  name,
12332
12332
  isProtein,
12333
12333
  isRna,
12334
- isMixedRnaAndDna,
12335
- includeStopCodon
12334
+ isMixedRnaAndDna
12336
12335
  } = {}) {
12337
12336
  const acceptedChars = getAcceptedChars({
12338
12337
  isOligo,
12339
12338
  isProtein,
12340
12339
  isRna,
12341
- isMixedRnaAndDna,
12342
- includeStopCodon
12340
+ isMixedRnaAndDna
12343
12341
  });
12344
12342
  const replaceChars = getReplaceChars({
12345
12343
  isOligo,
@@ -12389,10 +12387,9 @@ var __name = (target, value) => __defProp(target, "name", { value, configurable:
12389
12387
  isOligo,
12390
12388
  isProtein,
12391
12389
  isRna,
12392
- isMixedRnaAndDna,
12393
- includeStopCodon
12390
+ isMixedRnaAndDna
12394
12391
  } = {}) {
12395
- return isProtein ? `${protein_letters_withUandX.toLowerCase()}${includeStopCodon ? "*." : ""}}` : isOligo ? ambiguous_rna_letters.toLowerCase() + "t" : isRna ? ambiguous_rna_letters.toLowerCase() + "t" : isMixedRnaAndDna ? ambiguous_rna_letters.toLowerCase() + ambiguous_dna_letters.toLowerCase() : (
12392
+ return isProtein ? `${extended_protein_letters.toLowerCase()}}` : isOligo ? ambiguous_rna_letters.toLowerCase() + "t" : isRna ? ambiguous_rna_letters.toLowerCase() + "t" : isMixedRnaAndDna ? ambiguous_rna_letters.toLowerCase() + ambiguous_dna_letters.toLowerCase() : (
12396
12393
  //just plain old dna
12397
12394
  ambiguous_rna_letters.toLowerCase() + ambiguous_dna_letters.toLowerCase()
12398
12395
  );
@@ -12404,9 +12401,12 @@ var __name = (target, value) => __defProp(target, "name", { value, configurable:
12404
12401
  isRna,
12405
12402
  isMixedRnaAndDna
12406
12403
  } = {}) {
12407
- return isProtein ? {} : isOligo ? {} : isRna ? { t: "u" } : isMixedRnaAndDna ? {} : (
12408
- //just plain old dna
12409
- {}
12404
+ return isProtein ? {} : (
12405
+ // {".": "*"}
12406
+ isOligo ? {} : isRna ? { t: "u" } : isMixedRnaAndDna ? {} : (
12407
+ //just plain old dna
12408
+ {}
12409
+ )
12410
12410
  );
12411
12411
  }
12412
12412
  __name(getReplaceChars, "getReplaceChars");
@@ -12546,7 +12546,7 @@ var __name = (target, value) => __defProp(target, "name", { value, configurable:
12546
12546
  const {
12547
12547
  annotationsAsObjects,
12548
12548
  logMessages,
12549
- removeUnwantedChars,
12549
+ doNotRemoveInvalidChars,
12550
12550
  additionalValidChars,
12551
12551
  noTranslationData,
12552
12552
  doNotProvideIdsForAnnotations,
@@ -12580,11 +12580,9 @@ var __name = (target, value) => __defProp(target, "name", { value, configurable:
12580
12580
  if (seqData.isRna) {
12581
12581
  seqData.sequence = seqData.sequence.replace(/t/gi, "u");
12582
12582
  }
12583
- if (removeUnwantedChars) {
12583
+ if (!doNotRemoveInvalidChars) {
12584
12584
  if (seqData.isProtein) {
12585
- const [newSeq] = filterSequenceString(seqData.proteinSequence, __spreadValues({
12586
- includeStopCodon: true
12587
- }, topLevelSeqData || seqData));
12585
+ const [newSeq] = filterSequenceString(seqData.proteinSequence, __spreadValues({}, topLevelSeqData || seqData));
12588
12586
  seqData.proteinSequence = newSeq;
12589
12587
  } else {
12590
12588
  const [newSeq] = filterSequenceString(seqData.sequence, __spreadValues({
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@teselagen/sequence-utils",
3
- "version": "0.3.9",
3
+ "version": "0.3.11",
4
4
  "dependencies": {
5
5
  "@teselagen/range-utils": "0.3.7",
6
6
  "bson-objectid": "^2.0.4",
package/src/bioData.js CHANGED
@@ -2,8 +2,7 @@
2
2
 
3
3
  export const protein_letters = "ACDEFGHIKLMNPQRSTVWY";
4
4
  export const protein_letters_withUandX = "ACDEFGHIKLMNPQRSTVWYUX";
5
-
6
- export const extended_protein_letters = "ACDEFGHIKLMNPQRSTVWYBXZJUO.*-";
5
+ export const extended_protein_letters = "ACDEFGHIKLMNPQRSTVWYBXZJUO";
7
6
  export const ambiguous_dna_letters = "GATCRYWSMKHBVDN";
8
7
  export const unambiguous_dna_letters = "GATC";
9
8
  export const ambiguous_rna_letters = "GAUCRYWSMKHBVDN";
@@ -61,6 +60,6 @@ export const extended_protein_values = {
61
60
  Y: "Y",
62
61
  Z: "QE",
63
62
  "*": "\\*\\.",
64
- ".": "\\.\\.",
63
+ ".": "\\.",
65
64
  "-": "\\-"
66
65
  };
@@ -1,7 +1,7 @@
1
1
  import {
2
2
  ambiguous_dna_letters,
3
3
  ambiguous_rna_letters,
4
- protein_letters_withUandX
4
+ extended_protein_letters
5
5
  } from "./bioData";
6
6
 
7
7
  export default function filterSequenceString(
@@ -12,16 +12,14 @@ export default function filterSequenceString(
12
12
  name,
13
13
  isProtein,
14
14
  isRna,
15
- isMixedRnaAndDna,
16
- includeStopCodon
15
+ isMixedRnaAndDna
17
16
  } = {}
18
17
  ) {
19
18
  const acceptedChars = getAcceptedChars({
20
19
  isOligo,
21
20
  isProtein,
22
21
  isRna,
23
- isMixedRnaAndDna,
24
- includeStopCodon
22
+ isMixedRnaAndDna
25
23
  });
26
24
  const replaceChars = getReplaceChars({
27
25
  isOligo,
@@ -82,13 +80,10 @@ export function getAcceptedChars({
82
80
  isOligo,
83
81
  isProtein,
84
82
  isRna,
85
- isMixedRnaAndDna,
86
- includeStopCodon
83
+ isMixedRnaAndDna
87
84
  } = {}) {
88
85
  return isProtein
89
- ? `${protein_letters_withUandX.toLowerCase()}${
90
- includeStopCodon ? "*." : ""
91
- }}`
86
+ ? `${extended_protein_letters.toLowerCase()}}`
92
87
  : isOligo
93
88
  ? ambiguous_rna_letters.toLowerCase() + "t"
94
89
  : isRna
@@ -106,7 +101,8 @@ export function getReplaceChars({
106
101
  } = {}) {
107
102
  return isProtein
108
103
  ? {}
109
- : isOligo
104
+ : // {".": "*"}
105
+ isOligo
110
106
  ? {}
111
107
  : isRna
112
108
  ? { t: "u" }
@@ -49,10 +49,11 @@ describe("filterSequenceString", () => {
49
49
  isProtein: true
50
50
  }
51
51
  );
52
+ // expect(warnings[0]).toBe(`Replaced "." with "*" 2 times`);
52
53
  expect(warnings[0]).toBe(
53
- 'Invalid character(s) detected and removed: b, b, b, 3, 4, 2, ", ", ", ,, ,, ., ., / '
54
+ 'Invalid character(s) detected and removed: 3, 4, 2, ", ", ", ,, ,, ., ., / '
54
55
  );
55
- expect(str).toBe("xtgalmfwkqespvicyhrnd");
56
+ expect(str).toBe("bbbxtgalmfwkqespvicyhrnd");
56
57
  });
57
58
  it("when isProtein: true, should handle upper case letters", () => {
58
59
  const [str, warnings] = filterSequenceString("xtgalmfWKQEspvicyhrnd", {
@@ -61,12 +62,27 @@ describe("filterSequenceString", () => {
61
62
  expect(warnings.length).toBe(0);
62
63
  expect(str).toBe("xtgalmfWKQEspvicyhrnd");
63
64
  });
64
- it("when isProtein: true, should handle the option to includeStopCodon by allowing periods", () => {
65
- const [str] = filterSequenceString('bbb342"""xtgalmfwkqespvicyhrnd,,../', {
66
- isProtein: true,
67
- includeStopCodon: true
68
- });
69
65
 
70
- expect(str).toBe("xtgalmfwkqespvicyhrnd..");
66
+ it("when isProtein: true it should not filter this aa seq", () => {
67
+ const [str] = filterSequenceString(
68
+ "mhhhhhhgsgsmledlkrqvleanlalpkhnlasgssghvsavdrergvfviapsgvdfrimtaddmvvvsietgevvegekppaedtpthrllyqafpsiggivhthsrhatiwaqagqsipatgtthadhfygtipctrkmtdaeingeyewetgnvivetfekqgidaaqmpgvlvhshgpfawgknaedavhnaivleevaymgifcrqlapqlpdmqqtllnkhylrkhgakayygq",
69
+ {
70
+ isProtein: true
71
+ }
72
+ );
73
+
74
+ expect(str).toBe(
75
+ `mhhhhhhgsgsmledlkrqvleanlalpkhnlasgssghvsavdrergvfviapsgvdfrimtaddmvvvsietgevvegekppaedtpthrllyqafpsiggivhthsrhatiwaqagqsipatgtthadhfygtipctrkmtdaeingeyewetgnvivetfekqgidaaqmpgvlvhshgpfawgknaedavhnaivleevaymgifcrqlapqlpdmqqtllnkhylrkhgakayygq`
76
+ );
77
+ });
78
+ it("when isProtein: true, it should convert . to *", () => {
79
+ const [str] = filterSequenceString(
80
+ 'BXZJUO*bbb342"""xtgalbmfwkqespvicyhrnd,,../',
81
+ {
82
+ isProtein: true
83
+ }
84
+ );
85
+
86
+ expect(str).toBe("BXZJUObbbxtgalbmfwkqespvicyhrnd");
71
87
  });
72
88
  });
@@ -135,7 +135,7 @@ describe("insertSequenceData", () => {
135
135
  });
136
136
  it("inserts characters at correct origin spanning range with {maintainOriginSplit: true} option", () => {
137
137
  const sequenceToInsert = {
138
- sequence: "xrrrrry",
138
+ sequence: "crrrrry",
139
139
  // fffffff
140
140
  features: [{ name: "feat1", start: 0, end: 6 }]
141
141
  };
@@ -154,7 +154,7 @@ describe("insertSequenceData", () => {
154
154
  maintainOriginSplit: true
155
155
  }
156
156
  );
157
- postInsertSeq.sequence.should.equal("rrrryagagaxr");
157
+ postInsertSeq.sequence.should.equal("rrrryagagacr");
158
158
  // fffff fff ff
159
159
  postInsertSeq.features.should.containSubset([
160
160
  { name: "feat1", start: 10, end: 4 },
@@ -13,7 +13,7 @@ export default function tidyUpSequenceData(pSeqData, options = {}) {
13
13
  const {
14
14
  annotationsAsObjects,
15
15
  logMessages,
16
- removeUnwantedChars,
16
+ doNotRemoveInvalidChars,
17
17
  additionalValidChars,
18
18
  noTranslationData,
19
19
  doNotProvideIdsForAnnotations,
@@ -52,10 +52,9 @@ export default function tidyUpSequenceData(pSeqData, options = {}) {
52
52
  //flip all t's to u's
53
53
  seqData.sequence = seqData.sequence.replace(/t/gi, "u");
54
54
  }
55
- if (removeUnwantedChars) {
55
+ if (!doNotRemoveInvalidChars) {
56
56
  if (seqData.isProtein) {
57
57
  const [newSeq] = filterSequenceString(seqData.proteinSequence, {
58
- includeStopCodon: true,
59
58
  ...(topLevelSeqData || seqData)
60
59
  });
61
60
  seqData.proteinSequence = newSeq;
@@ -5,29 +5,26 @@ import chaiSubset from "chai-subset";
5
5
  chai.use(chaiSubset);
6
6
  chai.should();
7
7
  describe("tidyUpSequenceData", () => {
8
- it("should remove unwanted chars if passed that option, while handling annotation start,end (and location start,end) truncation correctly", () => {
9
- const res = tidyUpSequenceData(
10
- {
11
- sequence: "http://localhost:3344/Standalone",
12
- features: [
13
- {
14
- start: 3,
15
- end: 20,
16
- locations: [
17
- {
18
- start: "3", //this should be converted to an int :)
19
- end: 5
20
- },
21
- {
22
- start: 10,
23
- end: 20
24
- }
25
- ]
26
- }
27
- ]
28
- },
29
- { removeUnwantedChars: true }
30
- );
8
+ it("should remove invalid chars by default, while handling annotation start,end (and location start,end) truncation correctly", () => {
9
+ const res = tidyUpSequenceData({
10
+ sequence: "http://localhost:3344/Standalone",
11
+ features: [
12
+ {
13
+ start: 3,
14
+ end: 20,
15
+ locations: [
16
+ {
17
+ start: "3", //this should be converted to an int :)
18
+ end: 5
19
+ },
20
+ {
21
+ start: 10,
22
+ end: 20
23
+ }
24
+ ]
25
+ }
26
+ ]
27
+ });
31
28
  res.should.containSubset({
32
29
  sequence: "httcahstStandan",
33
30
  circular: false,
@@ -49,15 +46,6 @@ describe("tidyUpSequenceData", () => {
49
46
  ]
50
47
  });
51
48
  });
52
- // const res = tidyUpSequenceData(
53
- // {
54
- // isProtein: true,
55
- // circular: true,
56
- // proteinSequence: "gagiuhwgagalasjglj*.",
57
- // features: [{ start: 3, end: 10 }, { start: 10, end: 20 }]
58
- // },
59
- // { convertAnnotationsFromAAIndices: true, removeUnwantedChars: true }
60
- // );
61
49
 
62
50
  it("should handle a protein sequence being passed in with isProtein set to true", () => {
63
51
  const res = tidyUpSequenceData(
@@ -71,61 +59,16 @@ describe("tidyUpSequenceData", () => {
71
59
  { name: "iDon'tFit", start: 25, end: 35 }
72
60
  ]
73
61
  },
74
- { convertAnnotationsFromAAIndices: true, removeUnwantedChars: true }
62
+ { convertAnnotationsFromAAIndices: true }
75
63
  );
64
+
76
65
  res.should.containSubset({
77
- aminoAcidDataForEachBaseOfDNA: [
78
- {
79
- aminoAcid: {
80
- value: ".",
81
- name: "Gap",
82
- threeLettersName: "Gap"
83
- },
84
- positionInCodon: 0,
85
- aminoAcidIndex: 17,
86
- sequenceIndex: 51,
87
- codonRange: {
88
- start: 51,
89
- end: 53
90
- },
91
- fullCodon: true
92
- },
93
- {
94
- aminoAcid: {
95
- value: ".",
96
- name: "Gap",
97
- threeLettersName: "Gap"
98
- },
99
- positionInCodon: 1,
100
- aminoAcidIndex: 17,
101
- sequenceIndex: 52,
102
- codonRange: {
103
- start: 51,
104
- end: 53
105
- },
106
- fullCodon: true
107
- },
108
- {
109
- aminoAcid: {
110
- value: ".",
111
- name: "Gap",
112
- threeLettersName: "Gap"
113
- },
114
- positionInCodon: 2,
115
- aminoAcidIndex: 17,
116
- sequenceIndex: 53,
117
- codonRange: {
118
- start: 51,
119
- end: 53
120
- },
121
- fullCodon: true
122
- }
123
- ],
66
+ aminoAcidDataForEachBaseOfDNA: [],
124
67
  isProtein: true,
125
68
  size: 54, //size should refer to the DNA length
126
69
  proteinSize: 18, //proteinSize should refer to the amino acid length
127
- sequence: "ggngcnggnathtgacaytggggngcnggngcnytngcnwsnggnytntrr...", //degenerate sequence
128
- proteinSequence: "gagiuhwgagalasgl*.",
70
+ sequence: "ggngcnggnathtgacaytggggngcnggngcnytngcnwsnhtnggnytnhtn", //degenerate sequence
71
+ proteinSequence: "gagiuhwgagalasjglj",
129
72
  circular: false,
130
73
  features: [
131
74
  { start: 9, end: 32, forward: true },