@teselagen/sequence-utils 0.3.32 → 0.3.35

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/package.json CHANGED
@@ -1,10 +1,10 @@
1
1
  {
2
2
  "name": "@teselagen/sequence-utils",
3
- "version": "0.3.32",
3
+ "version": "0.3.35",
4
4
  "type": "module",
5
5
  "dependencies": {
6
6
  "escape-string-regexp": "5.0.0",
7
- "jsondiffpatch": "0.4.1",
7
+ "jsondiffpatch": "0.7.3",
8
8
  "string-splice": "^1.3.0",
9
9
  "lodash-es": "^4.17.21",
10
10
  "shortid": "2.2.16",
@@ -108,6 +108,14 @@ declare const proteinAlphabet: {
108
108
  color: string;
109
109
  mass: number;
110
110
  };
111
+ O: {
112
+ value: string;
113
+ name: string;
114
+ threeLettersName: string;
115
+ colorByFamily: string;
116
+ color: string;
117
+ mass: number;
118
+ };
111
119
  M: {
112
120
  value: string;
113
121
  name: string;
package/src/diffUtils.js CHANGED
@@ -1,5 +1,5 @@
1
1
  import { cloneDeep, forEach } from "lodash-es";
2
- import { diff, patch, reverse } from "jsondiffpatch/dist/jsondiffpatch.umd";
2
+ import { diff, patch, reverse } from "jsondiffpatch";
3
3
 
4
4
  import tidyUpSequenceData from "./tidyUpSequenceData";
5
5
 
@@ -103,7 +103,7 @@ export function getAcceptedChars({
103
103
  isMixedRnaAndDna
104
104
  } = {}) {
105
105
  return isProtein
106
- ? `${extended_protein_letters.toLowerCase()}}`
106
+ ? `${extended_protein_letters.toLowerCase()}`
107
107
  : isOligo
108
108
  ? ambiguous_rna_letters.toLowerCase() + "t"
109
109
  : isRna
@@ -186,6 +186,9 @@ export default function getAminoAcidDataForEachBaseOfDna(
186
186
  optionalSubrangeRange,
187
187
  isProteinSequence
188
188
  ) {
189
+ if (!originalSequenceString) {
190
+ return [];
191
+ }
189
192
  // Obtain derived properties, see getTranslatedSequenceProperties
190
193
  const {
191
194
  sequenceString,
@@ -205,7 +208,7 @@ export default function getAminoAcidDataForEachBaseOfDna(
205
208
  // Iterate over the DNA sequence length in increments of 3
206
209
  for (let index = 0; index < sequenceStringLength; index += 3) {
207
210
  let aminoAcid;
208
- const aminoAcidIndex = index / 3;
211
+ const aminoAcidIndex = Math.floor(index / 3);
209
212
  let codonPositionsInCDS;
210
213
  let basesRead;
211
214
 
@@ -225,22 +225,14 @@ describe("getSequenceDataBetweenRange", () => {
225
225
  end: 3
226
226
  }
227
227
  );
228
- res.should.containSubset({
229
- sequence: "gc",
230
- features: [
231
- {
232
- start: 0,
233
- end: 1,
234
- locations: [
235
- {
236
- start: 0,
237
- end: 1
238
- }
239
- ],
240
- name: "happy"
241
- }
242
- ]
243
- });
228
+ res.features.should.containSubset([
229
+ {
230
+ start: 0,
231
+ end: 1,
232
+ name: "happy"
233
+ }
234
+ ]);
235
+ res.sequence.should.equal("gc");
244
236
  });
245
237
  it("feature with locations, non circular enclosing range", () => {
246
238
  const res = getSequenceDataBetweenRange(
@@ -283,7 +275,7 @@ describe("getSequenceDataBetweenRange", () => {
283
275
  ]
284
276
  });
285
277
  });
286
- it.only("feature with locations, non circular, non-fully enclosing range - it should trim the start/end correctly to match the location", () => {
278
+ it("feature with locations, non circular, non-fully enclosing range - it should trim the start/end correctly to match the location", () => {
287
279
  const res = getSequenceDataBetweenRange(
288
280
  {
289
281
  sequence: "gggatgcatgca",
@@ -27,9 +27,10 @@ export default function insertSequenceDataAtPositionOrRange(
27
27
  ...options
28
28
  });
29
29
  const newSequenceData = cloneDeep(existingSequenceData);
30
- const insertLength = sequenceDataToInsert.proteinSequence
31
- ? sequenceDataToInsert.proteinSequence.length * 3
32
- : sequenceDataToInsert.sequence.length;
30
+ const insertLength =
31
+ sequenceDataToInsert.isProtein && sequenceDataToInsert.proteinSequence
32
+ ? sequenceDataToInsert.proteinSequence.length * 3
33
+ : sequenceDataToInsert.sequence.length;
33
34
  let caretPosition = caretPositionOrRange;
34
35
 
35
36
  const isInsertSameLengthAsSelection =
@@ -187,7 +188,7 @@ function adjustAnnotationsToDelete(annotationsToBeAdjusted, range, maxLength) {
187
188
  ...newRange,
188
189
  start: newLocations[0].start,
189
190
  end: newLocations[newLocations.length - 1].end,
190
- ...(newLocations.length > 1 && { locations: newLocations })
191
+ ...(newLocations.length > 0 && { locations: newLocations })
191
192
  };
192
193
  } else {
193
194
  return newRange;
@@ -112,6 +112,15 @@ const proteinAlphabet = {
112
112
  mass: 128.17228
113
113
  },
114
114
 
115
+ O: {
116
+ value: "O",
117
+ name: "Pyrrolysine",
118
+ threeLettersName: "Pyl",
119
+ colorByFamily: "#FFC0CB",
120
+ color: "hsl(264.7, 100%, 69%)",
121
+ mass: 255.313
122
+ },
123
+
115
124
  M: {
116
125
  value: "M",
117
126
  name: "Methionine",
@@ -1,6 +1,6 @@
1
1
  import proteinAlphabet from "./proteinAlphabet";
2
2
 
3
- const threeLetterSequenceStringToAminoAcidMap = {
3
+ const initThreeLetterSequenceStringToAminoAcidMap = {
4
4
  gct: proteinAlphabet.A,
5
5
  gcc: proteinAlphabet.A,
6
6
  gca: proteinAlphabet.A,
@@ -99,8 +99,80 @@ const threeLetterSequenceStringToAminoAcidMap = {
99
99
  taa: proteinAlphabet["*"],
100
100
  tag: proteinAlphabet["*"],
101
101
  tga: proteinAlphabet["*"],
102
+ uaa: proteinAlphabet["*"],
103
+ uag: proteinAlphabet["*"],
104
+ uga: proteinAlphabet["*"],
102
105
  "...": proteinAlphabet["."],
103
106
  "---": proteinAlphabet["-"]
104
107
  };
105
108
 
109
+ // IUPAC nucleotide codes (DNA/RNA) with U awareness
110
+ const IUPAC = {
111
+ A: ["A"],
112
+ C: ["C"],
113
+ G: ["G"],
114
+ T: ["T"],
115
+ U: ["U"],
116
+
117
+ R: ["A", "G"],
118
+ Y: ["C", "T", "U"],
119
+ K: ["G", "T", "U"],
120
+ M: ["A", "C"],
121
+ S: ["G", "C"],
122
+ W: ["A", "T", "U"],
123
+ B: ["C", "G", "T", "U"],
124
+ D: ["A", "G", "T", "U"],
125
+ H: ["A", "C", "T", "U"],
126
+ V: ["A", "C", "G"],
127
+ N: ["A", "C", "G", "T", "U"],
128
+ X: ["A", "C", "G", "T", "U"]
129
+ };
130
+
131
+
132
+ function expandAndResolve(threeLetterCodon) {
133
+ const chars = threeLetterCodon.toUpperCase().split("");
134
+ const picks = chars.map((c) => IUPAC[c] || [c]);
135
+
136
+ let allPossibleThreeLetterCodons = [""];
137
+ for (const set of picks) {
138
+ const next = [];
139
+ for (const prefix of allPossibleThreeLetterCodons) for (const b of set) next.push(prefix + b);
140
+ allPossibleThreeLetterCodons = next;
141
+ }
142
+ let foundAminoAcid = null;
143
+ for (const codon of allPossibleThreeLetterCodons) {
144
+ const lowerCodon = codon.toLowerCase();
145
+ const aminoAcidObj = initThreeLetterSequenceStringToAminoAcidMap[lowerCodon] ?? initThreeLetterSequenceStringToAminoAcidMap[lowerCodon.replace(/u/g, "t")] ?? initThreeLetterSequenceStringToAminoAcidMap[lowerCodon.replace(/t/g, "u")];
146
+ if (aminoAcidObj) {
147
+ if (!foundAminoAcid) {
148
+ foundAminoAcid = aminoAcidObj;
149
+ } else if (foundAminoAcid.value !== aminoAcidObj.value ) {
150
+ return null
151
+ }
152
+ } else {
153
+ return null;
154
+ }
155
+ }
156
+ return foundAminoAcid;
157
+ }
158
+
159
+ function getCodonToAminoAcidMap() {
160
+ const map = initThreeLetterSequenceStringToAminoAcidMap;
161
+ // generate all IUPAC 3-mers
162
+ const codes = Object.keys(IUPAC);
163
+ for (const a of codes)
164
+ for (const b of codes)
165
+ for (const c of codes) {
166
+ const codon = a + b + c;
167
+ const lowerCodon = codon.toLowerCase();
168
+ if (map[lowerCodon]) continue;
169
+ const aminoAcidObj = expandAndResolve(codon);
170
+ if (aminoAcidObj) map[lowerCodon] = aminoAcidObj;
171
+ }
172
+
173
+ return map;
174
+ }
175
+
176
+ const threeLetterSequenceStringToAminoAcidMap = getCodonToAminoAcidMap();
177
+
106
178
  export default threeLetterSequenceStringToAminoAcidMap;
@@ -879,6 +879,30 @@ declare const threeLetterSequenceStringToAminoAcidMap: {
879
879
  color: string;
880
880
  mass: number;
881
881
  };
882
+ uaa: {
883
+ value: string;
884
+ name: string;
885
+ threeLettersName: string;
886
+ colorByFamily: string;
887
+ color: string;
888
+ mass: number;
889
+ };
890
+ uag: {
891
+ value: string;
892
+ name: string;
893
+ threeLettersName: string;
894
+ colorByFamily: string;
895
+ color: string;
896
+ mass: number;
897
+ };
898
+ uga: {
899
+ value: string;
900
+ name: string;
901
+ threeLettersName: string;
902
+ colorByFamily: string;
903
+ color: string;
904
+ mass: number;
905
+ };
882
906
  "...": {
883
907
  value: string;
884
908
  name: string;