@teselagen/sequence-utils 0.3.10 → 0.3.11

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/bioData.d.ts CHANGED
@@ -1,6 +1,6 @@
1
1
  export const protein_letters: "ACDEFGHIKLMNPQRSTVWY";
2
2
  export const protein_letters_withUandX: "ACDEFGHIKLMNPQRSTVWYUX";
3
- export const extended_protein_letters: "ACDEFGHIKLMNPQRSTVWYBXZJUO*";
3
+ export const extended_protein_letters: "ACDEFGHIKLMNPQRSTVWYBXZJUO";
4
4
  export const ambiguous_dna_letters: "GATCRYWSMKHBVDN";
5
5
  export const unambiguous_dna_letters: "GATC";
6
6
  export const ambiguous_rna_letters: "GAUCRYWSMKHBVDN";
@@ -1,4 +1,4 @@
1
- export default function filterSequenceString(sequenceString: any, { additionalValidChars, isOligo, name, isProtein, isRna, isMixedRnaAndDna, }?: {
1
+ export default function filterSequenceString(sequenceString: any, { additionalValidChars, isOligo, name, isProtein, isRna, isMixedRnaAndDna }?: {
2
2
  additionalValidChars?: string | undefined;
3
3
  isOligo: any;
4
4
  name: any;
@@ -6,7 +6,7 @@ export default function filterSequenceString(sequenceString: any, { additionalVa
6
6
  isRna: any;
7
7
  isMixedRnaAndDna: any;
8
8
  }): (string | string[])[];
9
- export function getAcceptedChars({ isOligo, isProtein, isRna, isMixedRnaAndDna, }?: {
9
+ export function getAcceptedChars({ isOligo, isProtein, isRna, isMixedRnaAndDna }?: {
10
10
  isOligo: any;
11
11
  isProtein: any;
12
12
  isRna: any;
package/index.js CHANGED
@@ -6000,7 +6000,7 @@ lodash.exports;
6000
6000
  var lodashExports = lodash.exports;
6001
6001
  const protein_letters = "ACDEFGHIKLMNPQRSTVWY";
6002
6002
  const protein_letters_withUandX = "ACDEFGHIKLMNPQRSTVWYUX";
6003
- const extended_protein_letters = "ACDEFGHIKLMNPQRSTVWYBXZJUO*";
6003
+ const extended_protein_letters = "ACDEFGHIKLMNPQRSTVWYBXZJUO";
6004
6004
  const ambiguous_dna_letters = "GATCRYWSMKHBVDN";
6005
6005
  const unambiguous_dna_letters = "GATC";
6006
6006
  const ambiguous_rna_letters = "GAUCRYWSMKHBVDN";
@@ -6056,7 +6056,7 @@ const extended_protein_values = {
6056
6056
  Y: "Y",
6057
6057
  Z: "QE",
6058
6058
  "*": "\\*\\.",
6059
- ".": "\\.\\.",
6059
+ ".": "\\.",
6060
6060
  "-": "\\-"
6061
6061
  };
6062
6062
  const bioData = /* @__PURE__ */ Object.freeze(/* @__PURE__ */ Object.defineProperty({
@@ -12399,9 +12399,12 @@ function getReplaceChars({
12399
12399
  isRna,
12400
12400
  isMixedRnaAndDna
12401
12401
  } = {}) {
12402
- return isProtein ? {} : isOligo ? {} : isRna ? { t: "u" } : isMixedRnaAndDna ? {} : (
12403
- //just plain old dna
12404
- {}
12402
+ return isProtein ? {} : (
12403
+ // {".": "*"}
12404
+ isOligo ? {} : isRna ? { t: "u" } : isMixedRnaAndDna ? {} : (
12405
+ //just plain old dna
12406
+ {}
12407
+ )
12405
12408
  );
12406
12409
  }
12407
12410
  __name(getReplaceChars, "getReplaceChars");
package/index.mjs CHANGED
@@ -5998,7 +5998,7 @@ lodash.exports;
5998
5998
  var lodashExports = lodash.exports;
5999
5999
  const protein_letters = "ACDEFGHIKLMNPQRSTVWY";
6000
6000
  const protein_letters_withUandX = "ACDEFGHIKLMNPQRSTVWYUX";
6001
- const extended_protein_letters = "ACDEFGHIKLMNPQRSTVWYBXZJUO*";
6001
+ const extended_protein_letters = "ACDEFGHIKLMNPQRSTVWYBXZJUO";
6002
6002
  const ambiguous_dna_letters = "GATCRYWSMKHBVDN";
6003
6003
  const unambiguous_dna_letters = "GATC";
6004
6004
  const ambiguous_rna_letters = "GAUCRYWSMKHBVDN";
@@ -6054,7 +6054,7 @@ const extended_protein_values = {
6054
6054
  Y: "Y",
6055
6055
  Z: "QE",
6056
6056
  "*": "\\*\\.",
6057
- ".": "\\.\\.",
6057
+ ".": "\\.",
6058
6058
  "-": "\\-"
6059
6059
  };
6060
6060
  const bioData = /* @__PURE__ */ Object.freeze(/* @__PURE__ */ Object.defineProperty({
@@ -12397,9 +12397,12 @@ function getReplaceChars({
12397
12397
  isRna,
12398
12398
  isMixedRnaAndDna
12399
12399
  } = {}) {
12400
- return isProtein ? {} : isOligo ? {} : isRna ? { t: "u" } : isMixedRnaAndDna ? {} : (
12401
- //just plain old dna
12402
- {}
12400
+ return isProtein ? {} : (
12401
+ // {".": "*"}
12402
+ isOligo ? {} : isRna ? { t: "u" } : isMixedRnaAndDna ? {} : (
12403
+ //just plain old dna
12404
+ {}
12405
+ )
12403
12406
  );
12404
12407
  }
12405
12408
  __name(getReplaceChars, "getReplaceChars");
package/index.umd.js CHANGED
@@ -6002,7 +6002,7 @@ var __name = (target, value) => __defProp(target, "name", { value, configurable:
6002
6002
  var lodashExports = lodash.exports;
6003
6003
  const protein_letters = "ACDEFGHIKLMNPQRSTVWY";
6004
6004
  const protein_letters_withUandX = "ACDEFGHIKLMNPQRSTVWYUX";
6005
- const extended_protein_letters = "ACDEFGHIKLMNPQRSTVWYBXZJUO*";
6005
+ const extended_protein_letters = "ACDEFGHIKLMNPQRSTVWYBXZJUO";
6006
6006
  const ambiguous_dna_letters = "GATCRYWSMKHBVDN";
6007
6007
  const unambiguous_dna_letters = "GATC";
6008
6008
  const ambiguous_rna_letters = "GAUCRYWSMKHBVDN";
@@ -6058,7 +6058,7 @@ var __name = (target, value) => __defProp(target, "name", { value, configurable:
6058
6058
  Y: "Y",
6059
6059
  Z: "QE",
6060
6060
  "*": "\\*\\.",
6061
- ".": "\\.\\.",
6061
+ ".": "\\.",
6062
6062
  "-": "\\-"
6063
6063
  };
6064
6064
  const bioData = /* @__PURE__ */ Object.freeze(/* @__PURE__ */ Object.defineProperty({
@@ -12401,9 +12401,12 @@ var __name = (target, value) => __defProp(target, "name", { value, configurable:
12401
12401
  isRna,
12402
12402
  isMixedRnaAndDna
12403
12403
  } = {}) {
12404
- return isProtein ? {} : isOligo ? {} : isRna ? { t: "u" } : isMixedRnaAndDna ? {} : (
12405
- //just plain old dna
12406
- {}
12404
+ return isProtein ? {} : (
12405
+ // {".": "*"}
12406
+ isOligo ? {} : isRna ? { t: "u" } : isMixedRnaAndDna ? {} : (
12407
+ //just plain old dna
12408
+ {}
12409
+ )
12407
12410
  );
12408
12411
  }
12409
12412
  __name(getReplaceChars, "getReplaceChars");
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@teselagen/sequence-utils",
3
- "version": "0.3.10",
3
+ "version": "0.3.11",
4
4
  "dependencies": {
5
5
  "@teselagen/range-utils": "0.3.7",
6
6
  "bson-objectid": "^2.0.4",
package/src/bioData.js CHANGED
@@ -2,7 +2,7 @@
2
2
 
3
3
  export const protein_letters = "ACDEFGHIKLMNPQRSTVWY";
4
4
  export const protein_letters_withUandX = "ACDEFGHIKLMNPQRSTVWYUX";
5
- export const extended_protein_letters = "ACDEFGHIKLMNPQRSTVWYBXZJUO*";
5
+ export const extended_protein_letters = "ACDEFGHIKLMNPQRSTVWYBXZJUO";
6
6
  export const ambiguous_dna_letters = "GATCRYWSMKHBVDN";
7
7
  export const unambiguous_dna_letters = "GATC";
8
8
  export const ambiguous_rna_letters = "GAUCRYWSMKHBVDN";
@@ -60,6 +60,6 @@ export const extended_protein_values = {
60
60
  Y: "Y",
61
61
  Z: "QE",
62
62
  "*": "\\*\\.",
63
- ".": "\\.\\.",
63
+ ".": "\\.",
64
64
  "-": "\\-"
65
65
  };
@@ -1,7 +1,7 @@
1
1
  import {
2
2
  ambiguous_dna_letters,
3
3
  ambiguous_rna_letters,
4
- extended_protein_letters,
4
+ extended_protein_letters
5
5
  } from "./bioData";
6
6
 
7
7
  export default function filterSequenceString(
@@ -12,14 +12,14 @@ export default function filterSequenceString(
12
12
  name,
13
13
  isProtein,
14
14
  isRna,
15
- isMixedRnaAndDna,
15
+ isMixedRnaAndDna
16
16
  } = {}
17
17
  ) {
18
18
  const acceptedChars = getAcceptedChars({
19
19
  isOligo,
20
20
  isProtein,
21
21
  isRna,
22
- isMixedRnaAndDna,
22
+ isMixedRnaAndDna
23
23
  });
24
24
  const replaceChars = getReplaceChars({
25
25
  isOligo,
@@ -80,7 +80,7 @@ export function getAcceptedChars({
80
80
  isOligo,
81
81
  isProtein,
82
82
  isRna,
83
- isMixedRnaAndDna,
83
+ isMixedRnaAndDna
84
84
  } = {}) {
85
85
  return isProtein
86
86
  ? `${extended_protein_letters.toLowerCase()}}`
@@ -101,8 +101,8 @@ export function getReplaceChars({
101
101
  } = {}) {
102
102
  return isProtein
103
103
  ? {}
104
- // {".": "*"}
105
- : isOligo
104
+ : // {".": "*"}
105
+ isOligo
106
106
  ? {}
107
107
  : isRna
108
108
  ? { t: "u" }
@@ -50,7 +50,9 @@ describe("filterSequenceString", () => {
50
50
  }
51
51
  );
52
52
  // expect(warnings[0]).toBe(`Replaced "." with "*" 2 times`);
53
- expect(warnings[0]).toBe( 'Invalid character(s) detected and removed: 3, 4, 2, ", ", ", ,, ,, ., ., / ');
53
+ expect(warnings[0]).toBe(
54
+ 'Invalid character(s) detected and removed: 3, 4, 2, ", ", ", ,, ,, ., ., / '
55
+ );
54
56
  expect(str).toBe("bbbxtgalmfwkqespvicyhrnd");
55
57
  });
56
58
  it("when isProtein: true, should handle upper case letters", () => {
@@ -60,11 +62,27 @@ describe("filterSequenceString", () => {
60
62
  expect(warnings.length).toBe(0);
61
63
  expect(str).toBe("xtgalmfWKQEspvicyhrnd");
62
64
  });
65
+
66
+ it("when isProtein: true it should not filter this aa seq", () => {
67
+ const [str] = filterSequenceString(
68
+ "mhhhhhhgsgsmledlkrqvleanlalpkhnlasgssghvsavdrergvfviapsgvdfrimtaddmvvvsietgevvegekppaedtpthrllyqafpsiggivhthsrhatiwaqagqsipatgtthadhfygtipctrkmtdaeingeyewetgnvivetfekqgidaaqmpgvlvhshgpfawgknaedavhnaivleevaymgifcrqlapqlpdmqqtllnkhylrkhgakayygq",
69
+ {
70
+ isProtein: true
71
+ }
72
+ );
73
+
74
+ expect(str).toBe(
75
+ `mhhhhhhgsgsmledlkrqvleanlalpkhnlasgssghvsavdrergvfviapsgvdfrimtaddmvvvsietgevvegekppaedtpthrllyqafpsiggivhthsrhatiwaqagqsipatgtthadhfygtipctrkmtdaeingeyewetgnvivetfekqgidaaqmpgvlvhshgpfawgknaedavhnaivleevaymgifcrqlapqlpdmqqtllnkhylrkhgakayygq`
76
+ );
77
+ });
63
78
  it("when isProtein: true, it should convert . to *", () => {
64
- const [str] = filterSequenceString('BXZJUO*bbb342"""xtgalbmfwkqespvicyhrnd,,../', {
65
- isProtein: true,
66
- });
79
+ const [str] = filterSequenceString(
80
+ 'BXZJUO*bbb342"""xtgalbmfwkqespvicyhrnd,,../',
81
+ {
82
+ isProtein: true
83
+ }
84
+ );
67
85
 
68
- expect(str).toBe("BXZJUO*bbbxtgalbmfwkqespvicyhrnd");
86
+ expect(str).toBe("BXZJUObbbxtgalbmfwkqespvicyhrnd");
69
87
  });
70
88
  });
@@ -135,7 +135,7 @@ describe("insertSequenceData", () => {
135
135
  });
136
136
  it("inserts characters at correct origin spanning range with {maintainOriginSplit: true} option", () => {
137
137
  const sequenceToInsert = {
138
- sequence: "xrrrrry",
138
+ sequence: "crrrrry",
139
139
  // fffffff
140
140
  features: [{ name: "feat1", start: 0, end: 6 }]
141
141
  };
@@ -154,7 +154,7 @@ describe("insertSequenceData", () => {
154
154
  maintainOriginSplit: true
155
155
  }
156
156
  );
157
- postInsertSeq.sequence.should.equal("rrrryagagaxr");
157
+ postInsertSeq.sequence.should.equal("rrrryagagacr");
158
158
  // fffff fff ff
159
159
  postInsertSeq.features.should.containSubset([
160
160
  { name: "feat1", start: 10, end: 4 },
@@ -6,28 +6,25 @@ chai.use(chaiSubset);
6
6
  chai.should();
7
7
  describe("tidyUpSequenceData", () => {
8
8
  it("should remove invalid chars by default, while handling annotation start,end (and location start,end) truncation correctly", () => {
9
- const res = tidyUpSequenceData(
10
- {
11
- sequence: "http://localhost:3344/Standalone",
12
- features: [
13
- {
14
- start: 3,
15
- end: 20,
16
- locations: [
17
- {
18
- start: "3", //this should be converted to an int :)
19
- end: 5
20
- },
21
- {
22
- start: 10,
23
- end: 20
24
- }
25
- ]
26
- }
27
- ]
28
- },
29
-
30
- );
9
+ const res = tidyUpSequenceData({
10
+ sequence: "http://localhost:3344/Standalone",
11
+ features: [
12
+ {
13
+ start: 3,
14
+ end: 20,
15
+ locations: [
16
+ {
17
+ start: "3", //this should be converted to an int :)
18
+ end: 5
19
+ },
20
+ {
21
+ start: 10,
22
+ end: 20
23
+ }
24
+ ]
25
+ }
26
+ ]
27
+ });
31
28
  res.should.containSubset({
32
29
  sequence: "httcahstStandan",
33
30
  circular: false,
@@ -64,22 +61,22 @@ describe("tidyUpSequenceData", () => {
64
61
  },
65
62
  { convertAnnotationsFromAAIndices: true }
66
63
  );
67
-
64
+
68
65
  res.should.containSubset({
69
66
  aminoAcidDataForEachBaseOfDNA: [],
70
67
  isProtein: true,
71
- size: 57, //size should refer to the DNA length
72
- proteinSize: 19, //proteinSize should refer to the amino acid length
73
- sequence: "ggngcnggnathtgacaytggggngcnggngcnytngcnwsnhtnggnytnhtntrr", //degenerate sequence
74
- proteinSequence: "gagiuhwgagalasjglj*",
68
+ size: 54, //size should refer to the DNA length
69
+ proteinSize: 18, //proteinSize should refer to the amino acid length
70
+ sequence: "ggngcnggnathtgacaytggggngcnggngcnytngcnwsnhtnggnytnhtn", //degenerate sequence
71
+ proteinSequence: "gagiuhwgagalasjglj",
75
72
  circular: false,
76
73
  features: [
77
74
  { start: 9, end: 32, forward: true },
78
- { start: 30, end: 56, forward: true },
75
+ { start: 30, end: 53, forward: true },
79
76
  {
80
77
  name: "iDon'tFit",
81
- start: 54,
82
- end: 56,
78
+ start: 51,
79
+ end: 53,
83
80
  forward: true
84
81
  }
85
82
  ]