@teselagen/sequence-utils 0.3.9 → 0.3.10

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/bioData.d.ts CHANGED
@@ -1,6 +1,6 @@
1
1
  export const protein_letters: "ACDEFGHIKLMNPQRSTVWY";
2
2
  export const protein_letters_withUandX: "ACDEFGHIKLMNPQRSTVWYUX";
3
- export const extended_protein_letters: "ACDEFGHIKLMNPQRSTVWYBXZJUO.*-";
3
+ export const extended_protein_letters: "ACDEFGHIKLMNPQRSTVWYBXZJUO*";
4
4
  export const ambiguous_dna_letters: "GATCRYWSMKHBVDN";
5
5
  export const unambiguous_dna_letters: "GATC";
6
6
  export const ambiguous_rna_letters: "GAUCRYWSMKHBVDN";
@@ -1,18 +1,16 @@
1
- export default function filterSequenceString(sequenceString: any, { additionalValidChars, isOligo, name, isProtein, isRna, isMixedRnaAndDna, includeStopCodon }?: {
1
+ export default function filterSequenceString(sequenceString: any, { additionalValidChars, isOligo, name, isProtein, isRna, isMixedRnaAndDna, }?: {
2
2
  additionalValidChars?: string | undefined;
3
3
  isOligo: any;
4
4
  name: any;
5
5
  isProtein: any;
6
6
  isRna: any;
7
7
  isMixedRnaAndDna: any;
8
- includeStopCodon: any;
9
8
  }): (string | string[])[];
10
- export function getAcceptedChars({ isOligo, isProtein, isRna, isMixedRnaAndDna, includeStopCodon }?: {
9
+ export function getAcceptedChars({ isOligo, isProtein, isRna, isMixedRnaAndDna, }?: {
11
10
  isOligo: any;
12
11
  isProtein: any;
13
12
  isRna: any;
14
13
  isMixedRnaAndDna: any;
15
- includeStopCodon: any;
16
14
  }): string;
17
15
  export function getReplaceChars({ isOligo, isProtein, isRna, isMixedRnaAndDna }?: {
18
16
  isOligo: any;
package/index.js CHANGED
@@ -6000,7 +6000,7 @@ lodash.exports;
6000
6000
  var lodashExports = lodash.exports;
6001
6001
  const protein_letters = "ACDEFGHIKLMNPQRSTVWY";
6002
6002
  const protein_letters_withUandX = "ACDEFGHIKLMNPQRSTVWYUX";
6003
- const extended_protein_letters = "ACDEFGHIKLMNPQRSTVWYBXZJUO.*-";
6003
+ const extended_protein_letters = "ACDEFGHIKLMNPQRSTVWYBXZJUO*";
6004
6004
  const ambiguous_dna_letters = "GATCRYWSMKHBVDN";
6005
6005
  const unambiguous_dna_letters = "GATC";
6006
6006
  const ambiguous_rna_letters = "GAUCRYWSMKHBVDN";
@@ -12329,15 +12329,13 @@ function filterSequenceString(sequenceString, {
12329
12329
  name,
12330
12330
  isProtein,
12331
12331
  isRna,
12332
- isMixedRnaAndDna,
12333
- includeStopCodon
12332
+ isMixedRnaAndDna
12334
12333
  } = {}) {
12335
12334
  const acceptedChars = getAcceptedChars({
12336
12335
  isOligo,
12337
12336
  isProtein,
12338
12337
  isRna,
12339
- isMixedRnaAndDna,
12340
- includeStopCodon
12338
+ isMixedRnaAndDna
12341
12339
  });
12342
12340
  const replaceChars = getReplaceChars({
12343
12341
  isOligo,
@@ -12387,10 +12385,9 @@ function getAcceptedChars({
12387
12385
  isOligo,
12388
12386
  isProtein,
12389
12387
  isRna,
12390
- isMixedRnaAndDna,
12391
- includeStopCodon
12388
+ isMixedRnaAndDna
12392
12389
  } = {}) {
12393
- return isProtein ? `${protein_letters_withUandX.toLowerCase()}${includeStopCodon ? "*." : ""}}` : isOligo ? ambiguous_rna_letters.toLowerCase() + "t" : isRna ? ambiguous_rna_letters.toLowerCase() + "t" : isMixedRnaAndDna ? ambiguous_rna_letters.toLowerCase() + ambiguous_dna_letters.toLowerCase() : (
12390
+ return isProtein ? `${extended_protein_letters.toLowerCase()}}` : isOligo ? ambiguous_rna_letters.toLowerCase() + "t" : isRna ? ambiguous_rna_letters.toLowerCase() + "t" : isMixedRnaAndDna ? ambiguous_rna_letters.toLowerCase() + ambiguous_dna_letters.toLowerCase() : (
12394
12391
  //just plain old dna
12395
12392
  ambiguous_rna_letters.toLowerCase() + ambiguous_dna_letters.toLowerCase()
12396
12393
  );
@@ -12544,7 +12541,7 @@ function tidyUpSequenceData(pSeqData, options = {}) {
12544
12541
  const {
12545
12542
  annotationsAsObjects,
12546
12543
  logMessages,
12547
- removeUnwantedChars,
12544
+ doNotRemoveInvalidChars,
12548
12545
  additionalValidChars,
12549
12546
  noTranslationData,
12550
12547
  doNotProvideIdsForAnnotations,
@@ -12578,11 +12575,9 @@ function tidyUpSequenceData(pSeqData, options = {}) {
12578
12575
  if (seqData.isRna) {
12579
12576
  seqData.sequence = seqData.sequence.replace(/t/gi, "u");
12580
12577
  }
12581
- if (removeUnwantedChars) {
12578
+ if (!doNotRemoveInvalidChars) {
12582
12579
  if (seqData.isProtein) {
12583
- const [newSeq] = filterSequenceString(seqData.proteinSequence, __spreadValues({
12584
- includeStopCodon: true
12585
- }, topLevelSeqData || seqData));
12580
+ const [newSeq] = filterSequenceString(seqData.proteinSequence, __spreadValues({}, topLevelSeqData || seqData));
12586
12581
  seqData.proteinSequence = newSeq;
12587
12582
  } else {
12588
12583
  const [newSeq] = filterSequenceString(seqData.sequence, __spreadValues({
package/index.mjs CHANGED
@@ -5998,7 +5998,7 @@ lodash.exports;
5998
5998
  var lodashExports = lodash.exports;
5999
5999
  const protein_letters = "ACDEFGHIKLMNPQRSTVWY";
6000
6000
  const protein_letters_withUandX = "ACDEFGHIKLMNPQRSTVWYUX";
6001
- const extended_protein_letters = "ACDEFGHIKLMNPQRSTVWYBXZJUO.*-";
6001
+ const extended_protein_letters = "ACDEFGHIKLMNPQRSTVWYBXZJUO*";
6002
6002
  const ambiguous_dna_letters = "GATCRYWSMKHBVDN";
6003
6003
  const unambiguous_dna_letters = "GATC";
6004
6004
  const ambiguous_rna_letters = "GAUCRYWSMKHBVDN";
@@ -12327,15 +12327,13 @@ function filterSequenceString(sequenceString, {
12327
12327
  name,
12328
12328
  isProtein,
12329
12329
  isRna,
12330
- isMixedRnaAndDna,
12331
- includeStopCodon
12330
+ isMixedRnaAndDna
12332
12331
  } = {}) {
12333
12332
  const acceptedChars = getAcceptedChars({
12334
12333
  isOligo,
12335
12334
  isProtein,
12336
12335
  isRna,
12337
- isMixedRnaAndDna,
12338
- includeStopCodon
12336
+ isMixedRnaAndDna
12339
12337
  });
12340
12338
  const replaceChars = getReplaceChars({
12341
12339
  isOligo,
@@ -12385,10 +12383,9 @@ function getAcceptedChars({
12385
12383
  isOligo,
12386
12384
  isProtein,
12387
12385
  isRna,
12388
- isMixedRnaAndDna,
12389
- includeStopCodon
12386
+ isMixedRnaAndDna
12390
12387
  } = {}) {
12391
- return isProtein ? `${protein_letters_withUandX.toLowerCase()}${includeStopCodon ? "*." : ""}}` : isOligo ? ambiguous_rna_letters.toLowerCase() + "t" : isRna ? ambiguous_rna_letters.toLowerCase() + "t" : isMixedRnaAndDna ? ambiguous_rna_letters.toLowerCase() + ambiguous_dna_letters.toLowerCase() : (
12388
+ return isProtein ? `${extended_protein_letters.toLowerCase()}}` : isOligo ? ambiguous_rna_letters.toLowerCase() + "t" : isRna ? ambiguous_rna_letters.toLowerCase() + "t" : isMixedRnaAndDna ? ambiguous_rna_letters.toLowerCase() + ambiguous_dna_letters.toLowerCase() : (
12392
12389
  //just plain old dna
12393
12390
  ambiguous_rna_letters.toLowerCase() + ambiguous_dna_letters.toLowerCase()
12394
12391
  );
@@ -12542,7 +12539,7 @@ function tidyUpSequenceData(pSeqData, options = {}) {
12542
12539
  const {
12543
12540
  annotationsAsObjects,
12544
12541
  logMessages,
12545
- removeUnwantedChars,
12542
+ doNotRemoveInvalidChars,
12546
12543
  additionalValidChars,
12547
12544
  noTranslationData,
12548
12545
  doNotProvideIdsForAnnotations,
@@ -12576,11 +12573,9 @@ function tidyUpSequenceData(pSeqData, options = {}) {
12576
12573
  if (seqData.isRna) {
12577
12574
  seqData.sequence = seqData.sequence.replace(/t/gi, "u");
12578
12575
  }
12579
- if (removeUnwantedChars) {
12576
+ if (!doNotRemoveInvalidChars) {
12580
12577
  if (seqData.isProtein) {
12581
- const [newSeq] = filterSequenceString(seqData.proteinSequence, __spreadValues({
12582
- includeStopCodon: true
12583
- }, topLevelSeqData || seqData));
12578
+ const [newSeq] = filterSequenceString(seqData.proteinSequence, __spreadValues({}, topLevelSeqData || seqData));
12584
12579
  seqData.proteinSequence = newSeq;
12585
12580
  } else {
12586
12581
  const [newSeq] = filterSequenceString(seqData.sequence, __spreadValues({
package/index.umd.js CHANGED
@@ -6002,7 +6002,7 @@ var __name = (target, value) => __defProp(target, "name", { value, configurable:
6002
6002
  var lodashExports = lodash.exports;
6003
6003
  const protein_letters = "ACDEFGHIKLMNPQRSTVWY";
6004
6004
  const protein_letters_withUandX = "ACDEFGHIKLMNPQRSTVWYUX";
6005
- const extended_protein_letters = "ACDEFGHIKLMNPQRSTVWYBXZJUO.*-";
6005
+ const extended_protein_letters = "ACDEFGHIKLMNPQRSTVWYBXZJUO*";
6006
6006
  const ambiguous_dna_letters = "GATCRYWSMKHBVDN";
6007
6007
  const unambiguous_dna_letters = "GATC";
6008
6008
  const ambiguous_rna_letters = "GAUCRYWSMKHBVDN";
@@ -12331,15 +12331,13 @@ var __name = (target, value) => __defProp(target, "name", { value, configurable:
12331
12331
  name,
12332
12332
  isProtein,
12333
12333
  isRna,
12334
- isMixedRnaAndDna,
12335
- includeStopCodon
12334
+ isMixedRnaAndDna
12336
12335
  } = {}) {
12337
12336
  const acceptedChars = getAcceptedChars({
12338
12337
  isOligo,
12339
12338
  isProtein,
12340
12339
  isRna,
12341
- isMixedRnaAndDna,
12342
- includeStopCodon
12340
+ isMixedRnaAndDna
12343
12341
  });
12344
12342
  const replaceChars = getReplaceChars({
12345
12343
  isOligo,
@@ -12389,10 +12387,9 @@ var __name = (target, value) => __defProp(target, "name", { value, configurable:
12389
12387
  isOligo,
12390
12388
  isProtein,
12391
12389
  isRna,
12392
- isMixedRnaAndDna,
12393
- includeStopCodon
12390
+ isMixedRnaAndDna
12394
12391
  } = {}) {
12395
- return isProtein ? `${protein_letters_withUandX.toLowerCase()}${includeStopCodon ? "*." : ""}}` : isOligo ? ambiguous_rna_letters.toLowerCase() + "t" : isRna ? ambiguous_rna_letters.toLowerCase() + "t" : isMixedRnaAndDna ? ambiguous_rna_letters.toLowerCase() + ambiguous_dna_letters.toLowerCase() : (
12392
+ return isProtein ? `${extended_protein_letters.toLowerCase()}}` : isOligo ? ambiguous_rna_letters.toLowerCase() + "t" : isRna ? ambiguous_rna_letters.toLowerCase() + "t" : isMixedRnaAndDna ? ambiguous_rna_letters.toLowerCase() + ambiguous_dna_letters.toLowerCase() : (
12396
12393
  //just plain old dna
12397
12394
  ambiguous_rna_letters.toLowerCase() + ambiguous_dna_letters.toLowerCase()
12398
12395
  );
@@ -12546,7 +12543,7 @@ var __name = (target, value) => __defProp(target, "name", { value, configurable:
12546
12543
  const {
12547
12544
  annotationsAsObjects,
12548
12545
  logMessages,
12549
- removeUnwantedChars,
12546
+ doNotRemoveInvalidChars,
12550
12547
  additionalValidChars,
12551
12548
  noTranslationData,
12552
12549
  doNotProvideIdsForAnnotations,
@@ -12580,11 +12577,9 @@ var __name = (target, value) => __defProp(target, "name", { value, configurable:
12580
12577
  if (seqData.isRna) {
12581
12578
  seqData.sequence = seqData.sequence.replace(/t/gi, "u");
12582
12579
  }
12583
- if (removeUnwantedChars) {
12580
+ if (!doNotRemoveInvalidChars) {
12584
12581
  if (seqData.isProtein) {
12585
- const [newSeq] = filterSequenceString(seqData.proteinSequence, __spreadValues({
12586
- includeStopCodon: true
12587
- }, topLevelSeqData || seqData));
12582
+ const [newSeq] = filterSequenceString(seqData.proteinSequence, __spreadValues({}, topLevelSeqData || seqData));
12588
12583
  seqData.proteinSequence = newSeq;
12589
12584
  } else {
12590
12585
  const [newSeq] = filterSequenceString(seqData.sequence, __spreadValues({
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@teselagen/sequence-utils",
3
- "version": "0.3.9",
3
+ "version": "0.3.10",
4
4
  "dependencies": {
5
5
  "@teselagen/range-utils": "0.3.7",
6
6
  "bson-objectid": "^2.0.4",
package/src/bioData.js CHANGED
@@ -2,8 +2,7 @@
2
2
 
3
3
  export const protein_letters = "ACDEFGHIKLMNPQRSTVWY";
4
4
  export const protein_letters_withUandX = "ACDEFGHIKLMNPQRSTVWYUX";
5
-
6
- export const extended_protein_letters = "ACDEFGHIKLMNPQRSTVWYBXZJUO.*-";
5
+ export const extended_protein_letters = "ACDEFGHIKLMNPQRSTVWYBXZJUO*";
7
6
  export const ambiguous_dna_letters = "GATCRYWSMKHBVDN";
8
7
  export const unambiguous_dna_letters = "GATC";
9
8
  export const ambiguous_rna_letters = "GAUCRYWSMKHBVDN";
@@ -1,7 +1,7 @@
1
1
  import {
2
2
  ambiguous_dna_letters,
3
3
  ambiguous_rna_letters,
4
- protein_letters_withUandX
4
+ extended_protein_letters,
5
5
  } from "./bioData";
6
6
 
7
7
  export default function filterSequenceString(
@@ -13,7 +13,6 @@ export default function filterSequenceString(
13
13
  isProtein,
14
14
  isRna,
15
15
  isMixedRnaAndDna,
16
- includeStopCodon
17
16
  } = {}
18
17
  ) {
19
18
  const acceptedChars = getAcceptedChars({
@@ -21,7 +20,6 @@ export default function filterSequenceString(
21
20
  isProtein,
22
21
  isRna,
23
22
  isMixedRnaAndDna,
24
- includeStopCodon
25
23
  });
26
24
  const replaceChars = getReplaceChars({
27
25
  isOligo,
@@ -83,12 +81,9 @@ export function getAcceptedChars({
83
81
  isProtein,
84
82
  isRna,
85
83
  isMixedRnaAndDna,
86
- includeStopCodon
87
84
  } = {}) {
88
85
  return isProtein
89
- ? `${protein_letters_withUandX.toLowerCase()}${
90
- includeStopCodon ? "*." : ""
91
- }}`
86
+ ? `${extended_protein_letters.toLowerCase()}}`
92
87
  : isOligo
93
88
  ? ambiguous_rna_letters.toLowerCase() + "t"
94
89
  : isRna
@@ -106,6 +101,7 @@ export function getReplaceChars({
106
101
  } = {}) {
107
102
  return isProtein
108
103
  ? {}
104
+ // {".": "*"}
109
105
  : isOligo
110
106
  ? {}
111
107
  : isRna
@@ -49,10 +49,9 @@ describe("filterSequenceString", () => {
49
49
  isProtein: true
50
50
  }
51
51
  );
52
- expect(warnings[0]).toBe(
53
- 'Invalid character(s) detected and removed: b, b, b, 3, 4, 2, ", ", ", ,, ,, ., ., / '
54
- );
55
- expect(str).toBe("xtgalmfwkqespvicyhrnd");
52
+ // expect(warnings[0]).toBe(`Replaced "." with "*" 2 times`);
53
+ expect(warnings[0]).toBe( 'Invalid character(s) detected and removed: 3, 4, 2, ", ", ", ,, ,, ., ., / ');
54
+ expect(str).toBe("bbbxtgalmfwkqespvicyhrnd");
56
55
  });
57
56
  it("when isProtein: true, should handle upper case letters", () => {
58
57
  const [str, warnings] = filterSequenceString("xtgalmfWKQEspvicyhrnd", {
@@ -61,12 +60,11 @@ describe("filterSequenceString", () => {
61
60
  expect(warnings.length).toBe(0);
62
61
  expect(str).toBe("xtgalmfWKQEspvicyhrnd");
63
62
  });
64
- it("when isProtein: true, should handle the option to includeStopCodon by allowing periods", () => {
65
- const [str] = filterSequenceString('bbb342"""xtgalmfwkqespvicyhrnd,,../', {
63
+ it("when isProtein: true, it should convert . to *", () => {
64
+ const [str] = filterSequenceString('BXZJUO*bbb342"""xtgalbmfwkqespvicyhrnd,,../', {
66
65
  isProtein: true,
67
- includeStopCodon: true
68
66
  });
69
67
 
70
- expect(str).toBe("xtgalmfwkqespvicyhrnd..");
68
+ expect(str).toBe("BXZJUO*bbbxtgalbmfwkqespvicyhrnd");
71
69
  });
72
70
  });
@@ -13,7 +13,7 @@ export default function tidyUpSequenceData(pSeqData, options = {}) {
13
13
  const {
14
14
  annotationsAsObjects,
15
15
  logMessages,
16
- removeUnwantedChars,
16
+ doNotRemoveInvalidChars,
17
17
  additionalValidChars,
18
18
  noTranslationData,
19
19
  doNotProvideIdsForAnnotations,
@@ -52,10 +52,9 @@ export default function tidyUpSequenceData(pSeqData, options = {}) {
52
52
  //flip all t's to u's
53
53
  seqData.sequence = seqData.sequence.replace(/t/gi, "u");
54
54
  }
55
- if (removeUnwantedChars) {
55
+ if (!doNotRemoveInvalidChars) {
56
56
  if (seqData.isProtein) {
57
57
  const [newSeq] = filterSequenceString(seqData.proteinSequence, {
58
- includeStopCodon: true,
59
58
  ...(topLevelSeqData || seqData)
60
59
  });
61
60
  seqData.proteinSequence = newSeq;
@@ -5,7 +5,7 @@ import chaiSubset from "chai-subset";
5
5
  chai.use(chaiSubset);
6
6
  chai.should();
7
7
  describe("tidyUpSequenceData", () => {
8
- it("should remove unwanted chars if passed that option, while handling annotation start,end (and location start,end) truncation correctly", () => {
8
+ it("should remove invalid chars by default, while handling annotation start,end (and location start,end) truncation correctly", () => {
9
9
  const res = tidyUpSequenceData(
10
10
  {
11
11
  sequence: "http://localhost:3344/Standalone",
@@ -26,7 +26,7 @@ describe("tidyUpSequenceData", () => {
26
26
  }
27
27
  ]
28
28
  },
29
- { removeUnwantedChars: true }
29
+
30
30
  );
31
31
  res.should.containSubset({
32
32
  sequence: "httcahstStandan",
@@ -49,15 +49,6 @@ describe("tidyUpSequenceData", () => {
49
49
  ]
50
50
  });
51
51
  });
52
- // const res = tidyUpSequenceData(
53
- // {
54
- // isProtein: true,
55
- // circular: true,
56
- // proteinSequence: "gagiuhwgagalasjglj*.",
57
- // features: [{ start: 3, end: 10 }, { start: 10, end: 20 }]
58
- // },
59
- // { convertAnnotationsFromAAIndices: true, removeUnwantedChars: true }
60
- // );
61
52
 
62
53
  it("should handle a protein sequence being passed in with isProtein set to true", () => {
63
54
  const res = tidyUpSequenceData(
@@ -71,69 +62,24 @@ describe("tidyUpSequenceData", () => {
71
62
  { name: "iDon'tFit", start: 25, end: 35 }
72
63
  ]
73
64
  },
74
- { convertAnnotationsFromAAIndices: true, removeUnwantedChars: true }
65
+ { convertAnnotationsFromAAIndices: true }
75
66
  );
67
+
76
68
  res.should.containSubset({
77
- aminoAcidDataForEachBaseOfDNA: [
78
- {
79
- aminoAcid: {
80
- value: ".",
81
- name: "Gap",
82
- threeLettersName: "Gap"
83
- },
84
- positionInCodon: 0,
85
- aminoAcidIndex: 17,
86
- sequenceIndex: 51,
87
- codonRange: {
88
- start: 51,
89
- end: 53
90
- },
91
- fullCodon: true
92
- },
93
- {
94
- aminoAcid: {
95
- value: ".",
96
- name: "Gap",
97
- threeLettersName: "Gap"
98
- },
99
- positionInCodon: 1,
100
- aminoAcidIndex: 17,
101
- sequenceIndex: 52,
102
- codonRange: {
103
- start: 51,
104
- end: 53
105
- },
106
- fullCodon: true
107
- },
108
- {
109
- aminoAcid: {
110
- value: ".",
111
- name: "Gap",
112
- threeLettersName: "Gap"
113
- },
114
- positionInCodon: 2,
115
- aminoAcidIndex: 17,
116
- sequenceIndex: 53,
117
- codonRange: {
118
- start: 51,
119
- end: 53
120
- },
121
- fullCodon: true
122
- }
123
- ],
69
+ aminoAcidDataForEachBaseOfDNA: [],
124
70
  isProtein: true,
125
- size: 54, //size should refer to the DNA length
126
- proteinSize: 18, //proteinSize should refer to the amino acid length
127
- sequence: "ggngcnggnathtgacaytggggngcnggngcnytngcnwsnggnytntrr...", //degenerate sequence
128
- proteinSequence: "gagiuhwgagalasgl*.",
71
+ size: 57, //size should refer to the DNA length
72
+ proteinSize: 19, //proteinSize should refer to the amino acid length
73
+ sequence: "ggngcnggnathtgacaytggggngcnggngcnytngcnwsnhtnggnytnhtntrr", //degenerate sequence
74
+ proteinSequence: "gagiuhwgagalasjglj*",
129
75
  circular: false,
130
76
  features: [
131
77
  { start: 9, end: 32, forward: true },
132
- { start: 30, end: 53, forward: true },
78
+ { start: 30, end: 56, forward: true },
133
79
  {
134
80
  name: "iDon'tFit",
135
- start: 51,
136
- end: 53,
81
+ start: 54,
82
+ end: 56,
137
83
  forward: true
138
84
  }
139
85
  ]