@teselagen/sequence-utils 0.3.38-beta.3 → 0.3.42

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (172) hide show
  1. package/DNAComplementMap.d.ts +1 -1
  2. package/README.md +2 -8
  3. package/addGapsToSeqReads.d.ts +3 -16
  4. package/adjustAnnotationsToInsert.d.ts +1 -2
  5. package/adjustBpsToReplaceOrInsert.d.ts +1 -2
  6. package/aliasedEnzymesByName.d.ts +1 -37
  7. package/aminoAcidToDegenerateDnaMap.d.ts +31 -1
  8. package/aminoAcidToDegenerateRnaMap.d.ts +1 -1
  9. package/annotateSingleSeq.d.ts +4 -5
  10. package/annotationTypes.d.ts +2 -2
  11. package/autoAnnotate.d.ts +8 -17
  12. package/bioData.d.ts +58 -10
  13. package/calculateEndStability.d.ts +1 -1
  14. package/calculateNebTa.d.ts +1 -6
  15. package/calculateNebTm.d.ts +4 -6
  16. package/calculatePercentGC.d.ts +1 -1
  17. package/calculateSantaLuciaTm.d.ts +114 -28
  18. package/calculateTm.d.ts +1 -13
  19. package/computeDigestFragments.d.ts +24 -30
  20. package/condensePairwiseAlignmentDifferences.d.ts +1 -1
  21. package/convertAACaretPositionOrRangeToDna.d.ts +1 -2
  22. package/convertDnaCaretPositionOrRangeToAA.d.ts +1 -2
  23. package/cutSequenceByRestrictionEnzyme.d.ts +1 -2
  24. package/defaultEnzymesByName.d.ts +1 -2
  25. package/degenerateDnaToAminoAcidMap.d.ts +1 -1
  26. package/degenerateRnaToAminoAcidMap.d.ts +1 -1
  27. package/deleteSequenceDataAtRange.d.ts +1 -2
  28. package/diffUtils.d.ts +7 -9
  29. package/doesEnzymeChopOutsideOfRecognitionSite.d.ts +1 -2
  30. package/featureTypesAndColors.d.ts +6 -19
  31. package/filterSequenceString.d.ts +10 -14
  32. package/findApproxMatches.d.ts +1 -7
  33. package/findNearestRangeOfSequenceOverlapToPosition.d.ts +1 -2
  34. package/findOrfsInPlasmid.d.ts +11 -2
  35. package/findSequenceMatches.d.ts +1 -11
  36. package/generateAnnotations.d.ts +1 -2
  37. package/generateSequenceData.d.ts +13 -8
  38. package/getAllInsertionsInSeqReads.d.ts +1 -11
  39. package/getAminoAcidDataForEachBaseOfDna.d.ts +5 -6
  40. package/getAminoAcidFromSequenceTriplet.d.ts +1 -1
  41. package/getAminoAcidStringFromSequenceString.d.ts +1 -3
  42. package/getCodonRangeForAASliver.d.ts +4 -3
  43. package/getComplementAminoAcidStringFromSequenceString.d.ts +1 -1
  44. package/getComplementSequenceAndAnnotations.d.ts +1 -5
  45. package/getComplementSequenceString.d.ts +1 -1
  46. package/getCutsiteType.d.ts +1 -2
  47. package/getCutsitesFromSequence.d.ts +1 -2
  48. package/getDegenerateDnaStringFromAAString.d.ts +1 -1
  49. package/getDegenerateRnaStringFromAAString.d.ts +1 -1
  50. package/getDigestFragmentsForCutsites.d.ts +1 -4
  51. package/getDigestFragmentsForRestrictionEnzymes.d.ts +1 -8
  52. package/getInsertBetweenVals.d.ts +1 -2
  53. package/getLeftAndRightOfSequenceInRangeGivenPosition.d.ts +1 -2
  54. package/getOrfsFromSequence.d.ts +11 -17
  55. package/getOverlapBetweenTwoSequences.d.ts +1 -2
  56. package/getPossiblePartsFromSequenceAndEnzymes.d.ts +1 -18
  57. package/getReverseAminoAcidStringFromSequenceString.d.ts +1 -1
  58. package/getReverseComplementAminoAcidStringFromSequenceString.d.ts +1 -1
  59. package/getReverseComplementAnnotation.d.ts +1 -11
  60. package/getReverseComplementSequenceAndAnnotations.d.ts +1 -5
  61. package/getReverseComplementSequenceString.d.ts +1 -1
  62. package/getReverseSequenceString.d.ts +1 -1
  63. package/getSequenceDataBetweenRange.d.ts +1 -9
  64. package/getVirtualDigest.d.ts +10 -11
  65. package/guessIfSequenceIsDnaAndNotProtein.d.ts +1 -5
  66. package/index.cjs +503 -734
  67. package/index.d.ts +5 -8
  68. package/index.js +503 -734
  69. package/index.umd.cjs +503 -734
  70. package/insertGapsIntoRefSeq.d.ts +1 -2
  71. package/insertSequenceDataAtPositionOrRange.d.ts +1 -10
  72. package/isEnzymeType2S.d.ts +1 -2
  73. package/mapAnnotationsToRows.d.ts +1 -9
  74. package/package.json +9 -12
  75. package/prepareCircularViewData.d.ts +1 -2
  76. package/prepareRowData.d.ts +3 -7
  77. package/proteinAlphabet.d.ts +1 -1
  78. package/rotateBpsToPosition.d.ts +1 -1
  79. package/rotateSequenceDataToPosition.d.ts +1 -3
  80. package/shiftAnnotationsByLen.d.ts +3 -4
  81. package/src/autoAnnotate.test.js +1 -0
  82. package/src/filterSequenceString.js +7 -4
  83. package/src/getSequenceDataBetweenRange.js +11 -2
  84. package/src/getSequenceDataBetweenRange.test.js +42 -0
  85. package/src/prepareRowData_output1.json +0 -1
  86. package/src/tidyUpSequenceData.js +5 -2
  87. package/threeLetterSequenceStringToAminoAcidMap.d.ts +921 -11
  88. package/tidyUpAnnotation.d.ts +11 -13
  89. package/tidyUpSequenceData.d.ts +1 -15
  90. package/src/DNAComplementMap.ts +0 -32
  91. package/src/addGapsToSeqReads.ts +0 -436
  92. package/src/adjustAnnotationsToInsert.ts +0 -20
  93. package/src/adjustBpsToReplaceOrInsert.ts +0 -73
  94. package/src/aliasedEnzymesByName.ts +0 -7366
  95. package/src/aminoAcidToDegenerateDnaMap.ts +0 -32
  96. package/src/aminoAcidToDegenerateRnaMap.ts +0 -32
  97. package/src/annotateSingleSeq.ts +0 -37
  98. package/src/annotationTypes.ts +0 -23
  99. package/src/autoAnnotate.ts +0 -290
  100. package/src/bioData.ts +0 -65
  101. package/src/calculateEndStability.ts +0 -91
  102. package/src/calculateNebTa.ts +0 -46
  103. package/src/calculateNebTm.ts +0 -132
  104. package/src/calculatePercentGC.ts +0 -3
  105. package/src/calculateSantaLuciaTm.ts +0 -184
  106. package/src/calculateTm.ts +0 -242
  107. package/src/computeDigestFragments.ts +0 -238
  108. package/src/condensePairwiseAlignmentDifferences.ts +0 -85
  109. package/src/convertAACaretPositionOrRangeToDna.ts +0 -28
  110. package/src/convertDnaCaretPositionOrRangeToAA.ts +0 -28
  111. package/src/cutSequenceByRestrictionEnzyme.ts +0 -345
  112. package/src/defaultEnzymesByName.ts +0 -280
  113. package/src/degenerateDnaToAminoAcidMap.ts +0 -5
  114. package/src/degenerateRnaToAminoAcidMap.ts +0 -5
  115. package/src/deleteSequenceDataAtRange.ts +0 -13
  116. package/src/diffUtils.ts +0 -80
  117. package/src/doesEnzymeChopOutsideOfRecognitionSite.ts +0 -16
  118. package/src/featureTypesAndColors.ts +0 -167
  119. package/src/filterSequenceString.ts +0 -153
  120. package/src/findApproxMatches.ts +0 -58
  121. package/src/findNearestRangeOfSequenceOverlapToPosition.ts +0 -43
  122. package/src/findOrfsInPlasmid.ts +0 -31
  123. package/src/findSequenceMatches.ts +0 -154
  124. package/src/generateAnnotations.ts +0 -39
  125. package/src/generateSequenceData.ts +0 -212
  126. package/src/getAllInsertionsInSeqReads.ts +0 -100
  127. package/src/getAminoAcidDataForEachBaseOfDna.ts +0 -305
  128. package/src/getAminoAcidFromSequenceTriplet.ts +0 -27
  129. package/src/getAminoAcidStringFromSequenceString.ts +0 -36
  130. package/src/getCodonRangeForAASliver.ts +0 -73
  131. package/src/getComplementAminoAcidStringFromSequenceString.ts +0 -10
  132. package/src/getComplementSequenceAndAnnotations.ts +0 -25
  133. package/src/getComplementSequenceString.ts +0 -23
  134. package/src/getCutsiteType.ts +0 -18
  135. package/src/getCutsitesFromSequence.ts +0 -22
  136. package/src/getDegenerateDnaStringFromAAString.ts +0 -15
  137. package/src/getDegenerateRnaStringFromAAString.ts +0 -15
  138. package/src/getDigestFragmentsForCutsites.ts +0 -126
  139. package/src/getDigestFragmentsForRestrictionEnzymes.ts +0 -50
  140. package/src/getInsertBetweenVals.ts +0 -31
  141. package/src/getLeftAndRightOfSequenceInRangeGivenPosition.ts +0 -40
  142. package/src/getMassOfAaString.ts +0 -29
  143. package/src/getOrfsFromSequence.ts +0 -132
  144. package/src/getOverlapBetweenTwoSequences.ts +0 -30
  145. package/src/getPossiblePartsFromSequenceAndEnzymes.ts +0 -149
  146. package/src/getReverseAminoAcidStringFromSequenceString.ts +0 -22
  147. package/src/getReverseComplementAminoAcidStringFromSequenceString.ts +0 -10
  148. package/src/getReverseComplementAnnotation.ts +0 -33
  149. package/src/getReverseComplementSequenceAndAnnotations.ts +0 -46
  150. package/src/getReverseComplementSequenceString.ts +0 -18
  151. package/src/getReverseSequenceString.ts +0 -12
  152. package/src/getSequenceDataBetweenRange.ts +0 -154
  153. package/src/getVirtualDigest.ts +0 -139
  154. package/src/guessIfSequenceIsDnaAndNotProtein.ts +0 -39
  155. package/src/index.test.ts +0 -43
  156. package/src/index.ts +0 -111
  157. package/src/insertGapsIntoRefSeq.ts +0 -43
  158. package/src/insertSequenceDataAtPosition.ts +0 -2
  159. package/src/insertSequenceDataAtPositionOrRange.ts +0 -328
  160. package/src/isEnzymeType2S.ts +0 -5
  161. package/src/mapAnnotationsToRows.ts +0 -256
  162. package/src/prepareCircularViewData.ts +0 -24
  163. package/src/prepareRowData.ts +0 -61
  164. package/src/proteinAlphabet.ts +0 -271
  165. package/src/rotateBpsToPosition.ts +0 -12
  166. package/src/rotateSequenceDataToPosition.ts +0 -54
  167. package/src/shiftAnnotationsByLen.ts +0 -24
  168. package/src/threeLetterSequenceStringToAminoAcidMap.ts +0 -198
  169. package/src/tidyUpAnnotation.ts +0 -205
  170. package/src/tidyUpSequenceData.ts +0 -213
  171. package/src/types.ts +0 -109
  172. package/types.d.ts +0 -105
@@ -1,154 +0,0 @@
1
- import {
2
- modulateRangeBySequenceLength,
3
- flipContainedRange,
4
- Range
5
- } from "@teselagen/range-utils";
6
- import { reduce, uniqBy } from "lodash-es";
7
- import escapeStringRegexp from "escape-string-regexp";
8
- import getAminoAcidStringFromSequenceString from "./getAminoAcidStringFromSequenceString";
9
- import { ambiguous_dna_values, extended_protein_values } from "./bioData";
10
- import getReverseComplementSequenceString from "./getReverseComplementSequenceString";
11
-
12
- export interface FindSequenceMatchesOptions {
13
- searchReverseStrand?: boolean;
14
- isCircular?: boolean;
15
- isAmbiguous?: boolean;
16
- isProteinSequence?: boolean;
17
- isProteinSearch?: boolean;
18
- }
19
-
20
- export default function findSequenceMatches(
21
- sequence: string,
22
- searchString: string,
23
- options: FindSequenceMatchesOptions = {}
24
- ): (Range & { bottomStrand?: boolean })[] {
25
- let matches = findSequenceMatchesTopStrand(sequence, searchString, options);
26
- const { searchReverseStrand } = options;
27
-
28
- if (searchReverseStrand) {
29
- const sequenceLength = sequence.length;
30
- const reverseSeq = getReverseComplementSequenceString(sequence);
31
- const reverseMatches = findSequenceMatchesTopStrand(
32
- reverseSeq,
33
- searchString,
34
- options
35
- );
36
- const flippedReverseMatches = reverseMatches.map(range => {
37
- return {
38
- ...flipContainedRange(
39
- range,
40
- { start: 0, end: sequenceLength - 1 },
41
- sequenceLength
42
- ),
43
- bottomStrand: true
44
- };
45
- });
46
- matches = [...matches, ...flippedReverseMatches];
47
- }
48
- return matches;
49
- }
50
-
51
- function findSequenceMatchesTopStrand(
52
- sequence: string,
53
- searchString: string,
54
- options: FindSequenceMatchesOptions = {}
55
- ): Range[] {
56
- const { isCircular, isAmbiguous, isProteinSequence, isProteinSearch } =
57
- options;
58
- let searchStringToUse = escapeStringRegexp(searchString);
59
- if (isAmbiguous) {
60
- if (isProteinSearch || isProteinSequence) {
61
- searchStringToUse = convertAmbiguousStringToRegex(
62
- searchStringToUse,
63
- true
64
- );
65
- } else {
66
- //we're searching DNA
67
- searchStringToUse = convertAmbiguousStringToRegex(
68
- searchStringToUse,
69
- false
70
- );
71
- }
72
- }
73
- if (!searchStringToUse) return []; //short circuit if nothing is actually being searched for (eg searching for "%%"")
74
- let sequenceToUse = sequence;
75
- if (isCircular) {
76
- sequenceToUse = sequenceToUse + sequenceToUse;
77
- }
78
-
79
- let sequencesToCheck = [{ seqToCheck: sequenceToUse, offset: 0 }];
80
- if (isProteinSearch) {
81
- sequencesToCheck = [
82
- {
83
- seqToCheck: getAminoAcidStringFromSequenceString(sequenceToUse, {
84
- doNotExcludeAsterisk: true
85
- }),
86
- offset: 0
87
- },
88
- {
89
- seqToCheck: getAminoAcidStringFromSequenceString(
90
- sequenceToUse.substr(1),
91
- { doNotExcludeAsterisk: true }
92
- ),
93
- offset: 1
94
- },
95
- {
96
- seqToCheck: getAminoAcidStringFromSequenceString(
97
- sequenceToUse.substr(2),
98
- { doNotExcludeAsterisk: true }
99
- ),
100
- offset: 2
101
- }
102
- ];
103
- }
104
-
105
- const ranges: Range[] = [];
106
- sequencesToCheck.forEach(({ seqToCheck, offset }) => {
107
- const reg = new RegExp(searchStringToUse, "ig");
108
- let match: RegExpExecArray | null;
109
- let range: Range;
110
- /* eslint-disable no-cond-assign*/
111
-
112
- while ((match = reg.exec(seqToCheck)) !== null) {
113
- range = {
114
- start: match.index,
115
- end: match.index + searchString.length - 1 //this should be the original searchString here j
116
- };
117
- if (isProteinSearch) {
118
- range.start = range.start * 3 + offset;
119
- range.end = range.end * 3 + 2 + offset;
120
- }
121
- ranges.push(modulateRangeBySequenceLength(range, sequence.length));
122
- reg.lastIndex = match.index + 1;
123
- }
124
- /* eslint-enable no-cond-assign*/
125
- });
126
-
127
- return uniqBy(ranges, e => {
128
- return e.start + "-" + e.end;
129
- });
130
- }
131
-
132
- function convertAmbiguousStringToRegex(string: string, isProtein: boolean) {
133
- // Search for a DNA subseq in sequence.
134
- // use ambiguous values (like N = A or T or C or G, R = A or G etc.)
135
- // searches only on forward strand
136
- return reduce(
137
- string,
138
- (acc, char) => {
139
- const value = isProtein
140
- ? (extended_protein_values as Record<string, string>)[
141
- char.toUpperCase()
142
- ]
143
- : (ambiguous_dna_values as Record<string, string>)[char.toUpperCase()];
144
- if (!value) return acc;
145
- if (value.length === 1) {
146
- acc += value;
147
- } else {
148
- acc += `[${value}]`;
149
- }
150
- return acc;
151
- },
152
- ""
153
- );
154
- }
@@ -1,39 +0,0 @@
1
- import { generateRandomRange } from "@teselagen/range-utils";
2
- import shortid from "shortid";
3
- import { Annotation } from "./types";
4
-
5
- function generateAnnotations(
6
- numberOfAnnotationsToGenerate: number,
7
- start: number,
8
- end: number,
9
- maxLength: number
10
- ): Annotation[] {
11
- const result: Annotation[] = [];
12
- for (let i = 0; i < numberOfAnnotationsToGenerate; i++) {
13
- const annotation = generateAnnotation(start, end, maxLength);
14
- result.push(annotation);
15
- }
16
- return result;
17
- }
18
-
19
- function generateAnnotation(
20
- start: number,
21
- end: number,
22
- maxLength: number
23
- ): Annotation {
24
- const range = generateRandomRange(start, end, maxLength);
25
- return {
26
- ...range,
27
- name: getRandomInt(0, 100000).toString(),
28
- type: "misc_feature",
29
- id: shortid(),
30
- forward: Math.random() > 0.5,
31
- notes: {}
32
- };
33
- }
34
-
35
- function getRandomInt(min: number, max: number): number {
36
- return Math.floor(Math.random() * (max - min)) + min;
37
- }
38
-
39
- export default generateAnnotations;
@@ -1,212 +0,0 @@
1
- import generateAnnotations from "./generateAnnotations";
2
- import { SequenceData } from "./types";
3
-
4
- export default function generateSequenceData({
5
- isProtein,
6
- sequenceLength = 1000,
7
- numFeatures,
8
- numParts,
9
- numPrimers,
10
- numTranslations
11
- }: {
12
- isProtein?: boolean;
13
- sequenceLength?: number;
14
- numFeatures?: number;
15
- numParts?: number;
16
- numPrimers?: number;
17
- numTranslations?: number;
18
- } = {}): SequenceData {
19
- const proteinSequence = isProtein
20
- ? generateSequence(sequenceLength, true)
21
- : "";
22
- const sequence = !isProtein ? generateSequence(sequenceLength) : "";
23
-
24
- return {
25
- circular: isProtein ? false : Math.random() > 0.5,
26
- name: "p-" + Math.floor(Math.random() * 100),
27
- description: "",
28
- isProtein: !!isProtein,
29
- sequence,
30
- proteinSequence,
31
- translations: isProtein
32
- ? []
33
- : generateAnnotations(
34
- numTranslations || 5,
35
- 0,
36
- sequenceLength - 1,
37
- sequenceLength / 3
38
- ),
39
- features: generateAnnotations(
40
- numFeatures || 10,
41
- 0,
42
- sequenceLength - 1,
43
- sequenceLength / 3
44
- ),
45
- primers: isProtein
46
- ? []
47
- : generateAnnotations(numPrimers || 10, 0, sequenceLength - 1, 50),
48
- parts: generateAnnotations(
49
- numParts || 10,
50
- 0,
51
- sequenceLength - 1,
52
- sequenceLength / 3
53
- )
54
- };
55
- }
56
-
57
- // export default tidyUpSequenceData(exampleData)
58
-
59
- function generateSequence(m = 9, isProtein?: boolean): string {
60
- let s = "";
61
- const r = isProtein ? "ACDEFGHIKLMNPQRSTVWY" : "gatc"; // Added explicit protein string instead of empty string default behavior
62
- for (let i = 0; i < m; i++) {
63
- s += r.charAt(Math.floor(Math.random() * r.length));
64
- }
65
- return s;
66
- }
67
-
68
- // tnr: this is used to generate a very large, multi-featured sequence
69
- // var string = "ggggcccccgggggccc";
70
- // var reallyLongFakeSequence = "";
71
- // for (var i = 1; i < 100000; i++) {
72
- // reallyLongFakeSequence += string;
73
- // if (i % 100 === 0) {
74
- // reallyLongFakeSequence += 'taafatg';
75
- // sequenceData.features.push({
76
- // id: i,
77
- // start: parseInt(i * 10),
78
- // end: parseInt(i * 10 + 100),
79
- // name: 'cooljim',
80
- // color: 'green',
81
- // forward: true,
82
- // annotationType: "feature"
83
- // });
84
- // }
85
- // }
86
- // sequenceData.sequence += reallyLongFakeSequence;
87
- //
88
- // export default function() {
89
- // var baseSeqData = {
90
- //
91
- // }
92
- // function seqGen() {
93
- //
94
- // }
95
- // }
96
- // "features" : [
97
- // {
98
- // "name" : "1",
99
- // "type" : "misc_feature",
100
- // "start" : 1,
101
- // "end" : 1,
102
- // "strand" : 1,
103
- // "notes" : [],
104
- // "color": 'blue'
105
- // },
106
- // {
107
- // "name" : "2",
108
- // "type" : "misc_feature",
109
- // "start" : 1,
110
- // "end" : 1,
111
- // "strand" : 1,
112
- // "notes" : [],
113
- // "color": 'blue'
114
- // },
115
- // {
116
- // "name" : "3",
117
- // "type" : "misc_feature",
118
- // "start" : 1,
119
- // "end" : 1,
120
- // "strand" : 1,
121
- // "notes" : [],
122
- // "color": 'blue'
123
- // },
124
- // {
125
- // "name" : "4",
126
- // "type" : "misc_feature",
127
- // "start" : 1,
128
- // "end" : 14,
129
- // "strand" : 1,
130
- // "notes" : [],
131
- // "color": 'blue'
132
- // },
133
- // {
134
- // "name" : "5",
135
- // "type" : "misc_feature",
136
- // "start" : 1,
137
- // "end" : 1,
138
- // "strand" : 1,
139
- // "notes" : [],
140
- // "color": 'blue'
141
- // },
142
- // {
143
- // "name" : "6",
144
- // "type" : "misc_feature",
145
- // "id" : "5590c1978fafgw979df000a4f02c7a",
146
- // "start" : 4,
147
- // "end" : 6,
148
- // "strand" : 1,
149
- // "notes" : [],
150
- // "color": 'orange'
151
- // },
152
- // {
153
- // "name" : "housemouserousepouse",
154
- // "type" : "misc_feature",
155
- // "id" : "5590c197897fs9df000a4f02c7a",
156
- // "start" : 4,
157
- // "end" : 6,
158
- // "strand" : 1,
159
- // "notes" : [],
160
- // "color": 'orange'
161
- // },
162
- // {
163
- // "name" : "housemouserousepouse",
164
- // "type" : "misc_feature",
165
- // "id" : "5590c1978979dasdfaf000a4f02c7a",
166
- // "start" : 4,
167
- // "end" : 6,
168
- // "strand" : 1,
169
- // "notes" : [],
170
- // "color": 'orange'
171
- // },
172
- // {
173
- // "name" : "housemouserousepouse",
174
- // "type" : "misc_feature",
175
- // "id" : "5590c197faas8979df000a4f02c7a",
176
- // "start" : 4,
177
- // "end" : 6,
178
- // "strand" : 1,
179
- // "notes" : [],
180
- // "color": 'orange'
181
- // },
182
- // {
183
- // "name" : "housemouserousepouse",
184
- // "type" : "misc_feature",
185
- // "id" : "5590c1978979df000a4f02c7aasd",
186
- // "start" : 4,
187
- // "end" : 6,
188
- // "strand" : 1,
189
- // "notes" : [],
190
- // "color": 'orange'
191
- // },
192
- // {
193
- // "name" : "house",
194
- // "type" : "misc_feature",
195
- // "id" : "5590c1978979df000a4f02c7b",
196
- // "start" : 70,
197
- // "end" : 90,
198
- // "strand" : 1,
199
- // "notes" : [],
200
- // "color": 'green'
201
- // },
202
- // {
203
- // "name" : "weer",
204
- // "type" : "misc_feature",
205
- // "id" : "5590c1d88979df000a4f02f5c",
206
- // "start" : 3,
207
- // "end" : 69,
208
- // "strand" : 1,
209
- // "notes" : [],
210
- // "color": 'red'
211
- // }
212
- // ],
@@ -1,100 +0,0 @@
1
- // seqReads should be an array of objects [{name, seq, pos, cigar}, {name, seq, pos, cigar}, ...]
2
-
3
- export interface SeqRead {
4
- name: string;
5
- seq: string;
6
- pos: number;
7
- cigar: string;
8
- }
9
-
10
- export interface InsertionInfo {
11
- bpPos: number;
12
- number: number;
13
- }
14
-
15
- export default function getAllInsertionsInSeqReads(
16
- seqReads: SeqRead[]
17
- ): InsertionInfo[] {
18
- const allInsertionsInSeqReads: InsertionInfo[] = [];
19
- seqReads.forEach(seqRead => {
20
- // split cigar string at M, D, or I (match, deletion, or insertion), e.g. ["2M", "3I", "39M", "3D"...]
21
- const splitSeqRead = seqRead.cigar.match(/([0-9]*[MDI])/g);
22
-
23
- if (!splitSeqRead) return;
24
-
25
- for (let componentI = 0; componentI < splitSeqRead.length; componentI++) {
26
- if (splitSeqRead[componentI].slice(-1) === "I") {
27
- let bpPosOfInsertion = seqRead.pos;
28
- const numberOfInsertions = Number(
29
- splitSeqRead[componentI].slice(0, -1)
30
- );
31
- for (let i = 0; i < componentI; i++) {
32
- if (splitSeqRead[i].slice(-1) !== "I") {
33
- const previousComponentNumber = Number(
34
- splitSeqRead[i].slice(0, -1)
35
- );
36
- bpPosOfInsertion += previousComponentNumber;
37
- }
38
- }
39
- const insertionInfo = {
40
- // keeping bpPos 1-based
41
- bpPos: bpPosOfInsertion,
42
- number: numberOfInsertions
43
- };
44
- allInsertionsInSeqReads.push(insertionInfo);
45
- }
46
- }
47
- });
48
- // sort insertions by ascending bp pos
49
- const sortedInsertions = allInsertionsInSeqReads.sort((a, b) => {
50
- return a.bpPos - b.bpPos;
51
- });
52
- // combine duplicate or overlapping insertions from seq reads
53
- for (let i = 0; i < sortedInsertions.length - 1; i++) {
54
- if (sortedInsertions[i].bpPos === sortedInsertions[i + 1].bpPos) {
55
- if (sortedInsertions[i].number > sortedInsertions[i + 1].number) {
56
- // remove the one with fewer number of gaps from array
57
- sortedInsertions.splice(i + 1, 1);
58
- i--;
59
- } else if (sortedInsertions[i].number < sortedInsertions[i + 1].number) {
60
- sortedInsertions.splice(i, 1);
61
- i--;
62
- } else if (
63
- sortedInsertions[i].number === sortedInsertions[i + 1].number
64
- ) {
65
- sortedInsertions.splice(i, 1);
66
- i--;
67
- }
68
- }
69
- }
70
- // sortedInsertions is an array of objects [{bpPos: bp pos of insertion, number: # of insertions}, {bpPos, number}, ...]
71
- return sortedInsertions;
72
- }
73
-
74
- // function getAllInsertionsInSeqReads(seqReads) {
75
- // let allInsertionBpPosInSeqReads = [];
76
- // seqReads.forEach(seqRead => {
77
- // // split cigar string at M, D, or I (match, deletion, or insertion)
78
- // // ["2M", "3I", "39M", "3D"...]
79
- // const splitSeqRead = seqRead.cigar.match(/([0-9]*[MDI])/g)
80
- // splitSeqRead.forEach(component => {
81
- // // keeping bpPos 1-based
82
- // let bpPosOfInsertion = seqRead.pos;
83
- // if (component.slice(-1) === "I") {
84
- // const numberOfInsertions = Number(component.slice(0, -1));
85
- // const componentIndex = splitSeqRead.indexOf(component);
86
- // for (let i = 0; i < componentIndex; i++) {
87
- // const previousComponentNumber = Number(splitSeqRead[i].slice(0, -1));
88
- // bpPosOfInsertion += previousComponentNumber;
89
- // }
90
- // for (let i = 1; i <= numberOfInsertions; i++) {
91
- // allInsertionBpPosInSeqReads.push(bpPosOfInsertion - i);
92
- // }
93
- // }
94
- // });
95
- // });
96
- // // allInsertionBpPosInSeqReads should be an array of bp pos [6, 15, 9, 2, 23...]
97
- // // remove duplicates, organize in ascending order
98
- // const uniqueInsertionBpPos = [...new Set(allInsertionBpPosInSeqReads)].sort(function(a, b) { return a - b });
99
- // return uniqueInsertionBpPos;
100
- // }