@teselagen/sequence-utils 0.3.38-beta.2 → 0.3.41

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (169) hide show
  1. package/DNAComplementMap.d.ts +1 -1
  2. package/addGapsToSeqReads.d.ts +3 -16
  3. package/adjustAnnotationsToInsert.d.ts +1 -2
  4. package/adjustBpsToReplaceOrInsert.d.ts +1 -2
  5. package/aliasedEnzymesByName.d.ts +1 -37
  6. package/aminoAcidToDegenerateDnaMap.d.ts +31 -1
  7. package/aminoAcidToDegenerateRnaMap.d.ts +1 -1
  8. package/annotateSingleSeq.d.ts +4 -5
  9. package/annotationTypes.d.ts +2 -2
  10. package/autoAnnotate.d.ts +8 -17
  11. package/bioData.d.ts +58 -10
  12. package/calculateEndStability.d.ts +1 -1
  13. package/calculateNebTa.d.ts +1 -6
  14. package/calculateNebTm.d.ts +4 -6
  15. package/calculatePercentGC.d.ts +1 -1
  16. package/calculateSantaLuciaTm.d.ts +114 -28
  17. package/calculateTm.d.ts +1 -13
  18. package/computeDigestFragments.d.ts +24 -30
  19. package/condensePairwiseAlignmentDifferences.d.ts +1 -1
  20. package/convertAACaretPositionOrRangeToDna.d.ts +1 -2
  21. package/convertDnaCaretPositionOrRangeToAA.d.ts +1 -2
  22. package/cutSequenceByRestrictionEnzyme.d.ts +1 -2
  23. package/defaultEnzymesByName.d.ts +1 -2
  24. package/degenerateDnaToAminoAcidMap.d.ts +1 -1
  25. package/degenerateRnaToAminoAcidMap.d.ts +1 -1
  26. package/deleteSequenceDataAtRange.d.ts +1 -2
  27. package/diffUtils.d.ts +7 -9
  28. package/doesEnzymeChopOutsideOfRecognitionSite.d.ts +1 -2
  29. package/featureTypesAndColors.d.ts +6 -19
  30. package/filterSequenceString.d.ts +10 -14
  31. package/findApproxMatches.d.ts +1 -7
  32. package/findNearestRangeOfSequenceOverlapToPosition.d.ts +1 -2
  33. package/findOrfsInPlasmid.d.ts +11 -2
  34. package/findSequenceMatches.d.ts +1 -11
  35. package/generateAnnotations.d.ts +1 -2
  36. package/generateSequenceData.d.ts +13 -8
  37. package/getAllInsertionsInSeqReads.d.ts +1 -11
  38. package/getAminoAcidDataForEachBaseOfDna.d.ts +5 -6
  39. package/getAminoAcidFromSequenceTriplet.d.ts +1 -1
  40. package/getAminoAcidStringFromSequenceString.d.ts +1 -3
  41. package/getCodonRangeForAASliver.d.ts +4 -3
  42. package/getComplementAminoAcidStringFromSequenceString.d.ts +1 -1
  43. package/getComplementSequenceAndAnnotations.d.ts +1 -5
  44. package/getComplementSequenceString.d.ts +1 -1
  45. package/getCutsiteType.d.ts +1 -2
  46. package/getCutsitesFromSequence.d.ts +1 -2
  47. package/getDegenerateDnaStringFromAAString.d.ts +1 -1
  48. package/getDegenerateRnaStringFromAAString.d.ts +1 -1
  49. package/getDigestFragmentsForCutsites.d.ts +1 -4
  50. package/getDigestFragmentsForRestrictionEnzymes.d.ts +1 -8
  51. package/getInsertBetweenVals.d.ts +1 -2
  52. package/getLeftAndRightOfSequenceInRangeGivenPosition.d.ts +1 -2
  53. package/getOrfsFromSequence.d.ts +11 -17
  54. package/getOverlapBetweenTwoSequences.d.ts +1 -2
  55. package/getPossiblePartsFromSequenceAndEnzymes.d.ts +1 -18
  56. package/getReverseAminoAcidStringFromSequenceString.d.ts +1 -1
  57. package/getReverseComplementAminoAcidStringFromSequenceString.d.ts +1 -1
  58. package/getReverseComplementAnnotation.d.ts +1 -11
  59. package/getReverseComplementSequenceAndAnnotations.d.ts +1 -5
  60. package/getReverseComplementSequenceString.d.ts +1 -1
  61. package/getReverseSequenceString.d.ts +1 -1
  62. package/getSequenceDataBetweenRange.d.ts +1 -9
  63. package/getVirtualDigest.d.ts +10 -11
  64. package/guessIfSequenceIsDnaAndNotProtein.d.ts +1 -5
  65. package/index.cjs +491 -728
  66. package/index.d.ts +5 -8
  67. package/index.js +491 -728
  68. package/index.umd.cjs +491 -728
  69. package/insertGapsIntoRefSeq.d.ts +1 -2
  70. package/insertSequenceDataAtPositionOrRange.d.ts +1 -10
  71. package/isEnzymeType2S.d.ts +1 -2
  72. package/mapAnnotationsToRows.d.ts +1 -9
  73. package/package.json +9 -12
  74. package/prepareCircularViewData.d.ts +1 -2
  75. package/prepareRowData.d.ts +3 -7
  76. package/proteinAlphabet.d.ts +1 -1
  77. package/rotateBpsToPosition.d.ts +1 -1
  78. package/rotateSequenceDataToPosition.d.ts +1 -3
  79. package/shiftAnnotationsByLen.d.ts +3 -4
  80. package/src/autoAnnotate.test.js +1 -0
  81. package/src/getSequenceDataBetweenRange.js +11 -2
  82. package/src/getSequenceDataBetweenRange.test.js +42 -0
  83. package/src/prepareRowData_output1.json +0 -1
  84. package/threeLetterSequenceStringToAminoAcidMap.d.ts +921 -11
  85. package/tidyUpAnnotation.d.ts +11 -13
  86. package/tidyUpSequenceData.d.ts +1 -15
  87. package/src/DNAComplementMap.ts +0 -32
  88. package/src/addGapsToSeqReads.ts +0 -436
  89. package/src/adjustAnnotationsToInsert.ts +0 -20
  90. package/src/adjustBpsToReplaceOrInsert.ts +0 -73
  91. package/src/aliasedEnzymesByName.ts +0 -7366
  92. package/src/aminoAcidToDegenerateDnaMap.ts +0 -32
  93. package/src/aminoAcidToDegenerateRnaMap.ts +0 -32
  94. package/src/annotateSingleSeq.ts +0 -37
  95. package/src/annotationTypes.ts +0 -23
  96. package/src/autoAnnotate.ts +0 -290
  97. package/src/bioData.ts +0 -65
  98. package/src/calculateEndStability.ts +0 -91
  99. package/src/calculateNebTa.ts +0 -46
  100. package/src/calculateNebTm.ts +0 -132
  101. package/src/calculatePercentGC.ts +0 -3
  102. package/src/calculateSantaLuciaTm.ts +0 -184
  103. package/src/calculateTm.ts +0 -242
  104. package/src/computeDigestFragments.ts +0 -238
  105. package/src/condensePairwiseAlignmentDifferences.ts +0 -85
  106. package/src/convertAACaretPositionOrRangeToDna.ts +0 -28
  107. package/src/convertDnaCaretPositionOrRangeToAA.ts +0 -28
  108. package/src/cutSequenceByRestrictionEnzyme.ts +0 -345
  109. package/src/defaultEnzymesByName.ts +0 -280
  110. package/src/degenerateDnaToAminoAcidMap.ts +0 -5
  111. package/src/degenerateRnaToAminoAcidMap.ts +0 -5
  112. package/src/deleteSequenceDataAtRange.ts +0 -13
  113. package/src/diffUtils.ts +0 -80
  114. package/src/doesEnzymeChopOutsideOfRecognitionSite.ts +0 -16
  115. package/src/featureTypesAndColors.ts +0 -167
  116. package/src/filterSequenceString.ts +0 -153
  117. package/src/findApproxMatches.ts +0 -58
  118. package/src/findNearestRangeOfSequenceOverlapToPosition.ts +0 -43
  119. package/src/findOrfsInPlasmid.ts +0 -31
  120. package/src/findSequenceMatches.ts +0 -154
  121. package/src/generateAnnotations.ts +0 -39
  122. package/src/generateSequenceData.ts +0 -212
  123. package/src/getAllInsertionsInSeqReads.ts +0 -100
  124. package/src/getAminoAcidDataForEachBaseOfDna.ts +0 -305
  125. package/src/getAminoAcidFromSequenceTriplet.ts +0 -27
  126. package/src/getAminoAcidStringFromSequenceString.ts +0 -36
  127. package/src/getCodonRangeForAASliver.ts +0 -73
  128. package/src/getComplementAminoAcidStringFromSequenceString.ts +0 -10
  129. package/src/getComplementSequenceAndAnnotations.ts +0 -25
  130. package/src/getComplementSequenceString.ts +0 -23
  131. package/src/getCutsiteType.ts +0 -18
  132. package/src/getCutsitesFromSequence.ts +0 -22
  133. package/src/getDegenerateDnaStringFromAAString.ts +0 -15
  134. package/src/getDegenerateRnaStringFromAAString.ts +0 -15
  135. package/src/getDigestFragmentsForCutsites.ts +0 -126
  136. package/src/getDigestFragmentsForRestrictionEnzymes.ts +0 -50
  137. package/src/getInsertBetweenVals.ts +0 -31
  138. package/src/getLeftAndRightOfSequenceInRangeGivenPosition.ts +0 -40
  139. package/src/getMassOfAaString.ts +0 -29
  140. package/src/getOrfsFromSequence.ts +0 -132
  141. package/src/getOverlapBetweenTwoSequences.ts +0 -30
  142. package/src/getPossiblePartsFromSequenceAndEnzymes.ts +0 -149
  143. package/src/getReverseAminoAcidStringFromSequenceString.ts +0 -22
  144. package/src/getReverseComplementAminoAcidStringFromSequenceString.ts +0 -10
  145. package/src/getReverseComplementAnnotation.ts +0 -33
  146. package/src/getReverseComplementSequenceAndAnnotations.ts +0 -46
  147. package/src/getReverseComplementSequenceString.ts +0 -18
  148. package/src/getReverseSequenceString.ts +0 -12
  149. package/src/getSequenceDataBetweenRange.ts +0 -154
  150. package/src/getVirtualDigest.ts +0 -139
  151. package/src/guessIfSequenceIsDnaAndNotProtein.ts +0 -39
  152. package/src/index.test.ts +0 -43
  153. package/src/index.ts +0 -111
  154. package/src/insertGapsIntoRefSeq.ts +0 -43
  155. package/src/insertSequenceDataAtPosition.ts +0 -2
  156. package/src/insertSequenceDataAtPositionOrRange.ts +0 -328
  157. package/src/isEnzymeType2S.ts +0 -5
  158. package/src/mapAnnotationsToRows.ts +0 -256
  159. package/src/prepareCircularViewData.ts +0 -24
  160. package/src/prepareRowData.ts +0 -61
  161. package/src/proteinAlphabet.ts +0 -271
  162. package/src/rotateBpsToPosition.ts +0 -12
  163. package/src/rotateSequenceDataToPosition.ts +0 -54
  164. package/src/shiftAnnotationsByLen.ts +0 -24
  165. package/src/threeLetterSequenceStringToAminoAcidMap.ts +0 -198
  166. package/src/tidyUpAnnotation.ts +0 -205
  167. package/src/tidyUpSequenceData.ts +0 -213
  168. package/src/types.ts +0 -109
  169. package/types.d.ts +0 -105
@@ -1,154 +0,0 @@
1
- import {
2
- modulateRangeBySequenceLength,
3
- flipContainedRange,
4
- Range
5
- } from "@teselagen/range-utils";
6
- import { reduce, uniqBy } from "lodash-es";
7
- import escapeStringRegexp from "escape-string-regexp";
8
- import getAminoAcidStringFromSequenceString from "./getAminoAcidStringFromSequenceString";
9
- import { ambiguous_dna_values, extended_protein_values } from "./bioData";
10
- import getReverseComplementSequenceString from "./getReverseComplementSequenceString";
11
-
12
- export interface FindSequenceMatchesOptions {
13
- searchReverseStrand?: boolean;
14
- isCircular?: boolean;
15
- isAmbiguous?: boolean;
16
- isProteinSequence?: boolean;
17
- isProteinSearch?: boolean;
18
- }
19
-
20
- export default function findSequenceMatches(
21
- sequence: string,
22
- searchString: string,
23
- options: FindSequenceMatchesOptions = {}
24
- ): (Range & { bottomStrand?: boolean })[] {
25
- let matches = findSequenceMatchesTopStrand(sequence, searchString, options);
26
- const { searchReverseStrand } = options;
27
-
28
- if (searchReverseStrand) {
29
- const sequenceLength = sequence.length;
30
- const reverseSeq = getReverseComplementSequenceString(sequence);
31
- const reverseMatches = findSequenceMatchesTopStrand(
32
- reverseSeq,
33
- searchString,
34
- options
35
- );
36
- const flippedReverseMatches = reverseMatches.map(range => {
37
- return {
38
- ...flipContainedRange(
39
- range,
40
- { start: 0, end: sequenceLength - 1 },
41
- sequenceLength
42
- ),
43
- bottomStrand: true
44
- };
45
- });
46
- matches = [...matches, ...flippedReverseMatches];
47
- }
48
- return matches;
49
- }
50
-
51
- function findSequenceMatchesTopStrand(
52
- sequence: string,
53
- searchString: string,
54
- options: FindSequenceMatchesOptions = {}
55
- ): Range[] {
56
- const { isCircular, isAmbiguous, isProteinSequence, isProteinSearch } =
57
- options;
58
- let searchStringToUse = escapeStringRegexp(searchString);
59
- if (isAmbiguous) {
60
- if (isProteinSearch || isProteinSequence) {
61
- searchStringToUse = convertAmbiguousStringToRegex(
62
- searchStringToUse,
63
- true
64
- );
65
- } else {
66
- //we're searching DNA
67
- searchStringToUse = convertAmbiguousStringToRegex(
68
- searchStringToUse,
69
- false
70
- );
71
- }
72
- }
73
- if (!searchStringToUse) return []; //short circuit if nothing is actually being searched for (eg searching for "%%"")
74
- let sequenceToUse = sequence;
75
- if (isCircular) {
76
- sequenceToUse = sequenceToUse + sequenceToUse;
77
- }
78
-
79
- let sequencesToCheck = [{ seqToCheck: sequenceToUse, offset: 0 }];
80
- if (isProteinSearch) {
81
- sequencesToCheck = [
82
- {
83
- seqToCheck: getAminoAcidStringFromSequenceString(sequenceToUse, {
84
- doNotExcludeAsterisk: true
85
- }),
86
- offset: 0
87
- },
88
- {
89
- seqToCheck: getAminoAcidStringFromSequenceString(
90
- sequenceToUse.substr(1),
91
- { doNotExcludeAsterisk: true }
92
- ),
93
- offset: 1
94
- },
95
- {
96
- seqToCheck: getAminoAcidStringFromSequenceString(
97
- sequenceToUse.substr(2),
98
- { doNotExcludeAsterisk: true }
99
- ),
100
- offset: 2
101
- }
102
- ];
103
- }
104
-
105
- const ranges: Range[] = [];
106
- sequencesToCheck.forEach(({ seqToCheck, offset }) => {
107
- const reg = new RegExp(searchStringToUse, "ig");
108
- let match: RegExpExecArray | null;
109
- let range: Range;
110
- /* eslint-disable no-cond-assign*/
111
-
112
- while ((match = reg.exec(seqToCheck)) !== null) {
113
- range = {
114
- start: match.index,
115
- end: match.index + searchString.length - 1 //this should be the original searchString here j
116
- };
117
- if (isProteinSearch) {
118
- range.start = range.start * 3 + offset;
119
- range.end = range.end * 3 + 2 + offset;
120
- }
121
- ranges.push(modulateRangeBySequenceLength(range, sequence.length));
122
- reg.lastIndex = match.index + 1;
123
- }
124
- /* eslint-enable no-cond-assign*/
125
- });
126
-
127
- return uniqBy(ranges, e => {
128
- return e.start + "-" + e.end;
129
- });
130
- }
131
-
132
- function convertAmbiguousStringToRegex(string: string, isProtein: boolean) {
133
- // Search for a DNA subseq in sequence.
134
- // use ambiguous values (like N = A or T or C or G, R = A or G etc.)
135
- // searches only on forward strand
136
- return reduce(
137
- string,
138
- (acc, char) => {
139
- const value = isProtein
140
- ? (extended_protein_values as Record<string, string>)[
141
- char.toUpperCase()
142
- ]
143
- : (ambiguous_dna_values as Record<string, string>)[char.toUpperCase()];
144
- if (!value) return acc;
145
- if (value.length === 1) {
146
- acc += value;
147
- } else {
148
- acc += `[${value}]`;
149
- }
150
- return acc;
151
- },
152
- ""
153
- );
154
- }
@@ -1,39 +0,0 @@
1
- import { generateRandomRange } from "@teselagen/range-utils";
2
- import shortid from "shortid";
3
- import { Annotation } from "./types";
4
-
5
- function generateAnnotations(
6
- numberOfAnnotationsToGenerate: number,
7
- start: number,
8
- end: number,
9
- maxLength: number
10
- ): Annotation[] {
11
- const result: Annotation[] = [];
12
- for (let i = 0; i < numberOfAnnotationsToGenerate; i++) {
13
- const annotation = generateAnnotation(start, end, maxLength);
14
- result.push(annotation);
15
- }
16
- return result;
17
- }
18
-
19
- function generateAnnotation(
20
- start: number,
21
- end: number,
22
- maxLength: number
23
- ): Annotation {
24
- const range = generateRandomRange(start, end, maxLength);
25
- return {
26
- ...range,
27
- name: getRandomInt(0, 100000).toString(),
28
- type: "misc_feature",
29
- id: shortid(),
30
- forward: Math.random() > 0.5,
31
- notes: {}
32
- };
33
- }
34
-
35
- function getRandomInt(min: number, max: number): number {
36
- return Math.floor(Math.random() * (max - min)) + min;
37
- }
38
-
39
- export default generateAnnotations;
@@ -1,212 +0,0 @@
1
- import generateAnnotations from "./generateAnnotations";
2
- import { SequenceData } from "./types";
3
-
4
- export default function generateSequenceData({
5
- isProtein,
6
- sequenceLength = 1000,
7
- numFeatures,
8
- numParts,
9
- numPrimers,
10
- numTranslations
11
- }: {
12
- isProtein?: boolean;
13
- sequenceLength?: number;
14
- numFeatures?: number;
15
- numParts?: number;
16
- numPrimers?: number;
17
- numTranslations?: number;
18
- } = {}): SequenceData {
19
- const proteinSequence = isProtein
20
- ? generateSequence(sequenceLength, true)
21
- : "";
22
- const sequence = !isProtein ? generateSequence(sequenceLength) : "";
23
-
24
- return {
25
- circular: isProtein ? false : Math.random() > 0.5,
26
- name: "p-" + Math.floor(Math.random() * 100),
27
- description: "",
28
- isProtein: !!isProtein,
29
- sequence,
30
- proteinSequence,
31
- translations: isProtein
32
- ? []
33
- : generateAnnotations(
34
- numTranslations || 5,
35
- 0,
36
- sequenceLength - 1,
37
- sequenceLength / 3
38
- ),
39
- features: generateAnnotations(
40
- numFeatures || 10,
41
- 0,
42
- sequenceLength - 1,
43
- sequenceLength / 3
44
- ),
45
- primers: isProtein
46
- ? []
47
- : generateAnnotations(numPrimers || 10, 0, sequenceLength - 1, 50),
48
- parts: generateAnnotations(
49
- numParts || 10,
50
- 0,
51
- sequenceLength - 1,
52
- sequenceLength / 3
53
- )
54
- };
55
- }
56
-
57
- // export default tidyUpSequenceData(exampleData)
58
-
59
- function generateSequence(m = 9, isProtein?: boolean): string {
60
- let s = "";
61
- const r = isProtein ? "ACDEFGHIKLMNPQRSTVWY" : "gatc"; // Added explicit protein string instead of empty string default behavior
62
- for (let i = 0; i < m; i++) {
63
- s += r.charAt(Math.floor(Math.random() * r.length));
64
- }
65
- return s;
66
- }
67
-
68
- // tnr: this is used to generate a very large, multi-featured sequence
69
- // var string = "ggggcccccgggggccc";
70
- // var reallyLongFakeSequence = "";
71
- // for (var i = 1; i < 100000; i++) {
72
- // reallyLongFakeSequence += string;
73
- // if (i % 100 === 0) {
74
- // reallyLongFakeSequence += 'taafatg';
75
- // sequenceData.features.push({
76
- // id: i,
77
- // start: parseInt(i * 10),
78
- // end: parseInt(i * 10 + 100),
79
- // name: 'cooljim',
80
- // color: 'green',
81
- // forward: true,
82
- // annotationType: "feature"
83
- // });
84
- // }
85
- // }
86
- // sequenceData.sequence += reallyLongFakeSequence;
87
- //
88
- // export default function() {
89
- // var baseSeqData = {
90
- //
91
- // }
92
- // function seqGen() {
93
- //
94
- // }
95
- // }
96
- // "features" : [
97
- // {
98
- // "name" : "1",
99
- // "type" : "misc_feature",
100
- // "start" : 1,
101
- // "end" : 1,
102
- // "strand" : 1,
103
- // "notes" : [],
104
- // "color": 'blue'
105
- // },
106
- // {
107
- // "name" : "2",
108
- // "type" : "misc_feature",
109
- // "start" : 1,
110
- // "end" : 1,
111
- // "strand" : 1,
112
- // "notes" : [],
113
- // "color": 'blue'
114
- // },
115
- // {
116
- // "name" : "3",
117
- // "type" : "misc_feature",
118
- // "start" : 1,
119
- // "end" : 1,
120
- // "strand" : 1,
121
- // "notes" : [],
122
- // "color": 'blue'
123
- // },
124
- // {
125
- // "name" : "4",
126
- // "type" : "misc_feature",
127
- // "start" : 1,
128
- // "end" : 14,
129
- // "strand" : 1,
130
- // "notes" : [],
131
- // "color": 'blue'
132
- // },
133
- // {
134
- // "name" : "5",
135
- // "type" : "misc_feature",
136
- // "start" : 1,
137
- // "end" : 1,
138
- // "strand" : 1,
139
- // "notes" : [],
140
- // "color": 'blue'
141
- // },
142
- // {
143
- // "name" : "6",
144
- // "type" : "misc_feature",
145
- // "id" : "5590c1978fafgw979df000a4f02c7a",
146
- // "start" : 4,
147
- // "end" : 6,
148
- // "strand" : 1,
149
- // "notes" : [],
150
- // "color": 'orange'
151
- // },
152
- // {
153
- // "name" : "housemouserousepouse",
154
- // "type" : "misc_feature",
155
- // "id" : "5590c197897fs9df000a4f02c7a",
156
- // "start" : 4,
157
- // "end" : 6,
158
- // "strand" : 1,
159
- // "notes" : [],
160
- // "color": 'orange'
161
- // },
162
- // {
163
- // "name" : "housemouserousepouse",
164
- // "type" : "misc_feature",
165
- // "id" : "5590c1978979dasdfaf000a4f02c7a",
166
- // "start" : 4,
167
- // "end" : 6,
168
- // "strand" : 1,
169
- // "notes" : [],
170
- // "color": 'orange'
171
- // },
172
- // {
173
- // "name" : "housemouserousepouse",
174
- // "type" : "misc_feature",
175
- // "id" : "5590c197faas8979df000a4f02c7a",
176
- // "start" : 4,
177
- // "end" : 6,
178
- // "strand" : 1,
179
- // "notes" : [],
180
- // "color": 'orange'
181
- // },
182
- // {
183
- // "name" : "housemouserousepouse",
184
- // "type" : "misc_feature",
185
- // "id" : "5590c1978979df000a4f02c7aasd",
186
- // "start" : 4,
187
- // "end" : 6,
188
- // "strand" : 1,
189
- // "notes" : [],
190
- // "color": 'orange'
191
- // },
192
- // {
193
- // "name" : "house",
194
- // "type" : "misc_feature",
195
- // "id" : "5590c1978979df000a4f02c7b",
196
- // "start" : 70,
197
- // "end" : 90,
198
- // "strand" : 1,
199
- // "notes" : [],
200
- // "color": 'green'
201
- // },
202
- // {
203
- // "name" : "weer",
204
- // "type" : "misc_feature",
205
- // "id" : "5590c1d88979df000a4f02f5c",
206
- // "start" : 3,
207
- // "end" : 69,
208
- // "strand" : 1,
209
- // "notes" : [],
210
- // "color": 'red'
211
- // }
212
- // ],
@@ -1,100 +0,0 @@
1
- // seqReads should be an array of objects [{name, seq, pos, cigar}, {name, seq, pos, cigar}, ...]
2
-
3
- export interface SeqRead {
4
- name: string;
5
- seq: string;
6
- pos: number;
7
- cigar: string;
8
- }
9
-
10
- export interface InsertionInfo {
11
- bpPos: number;
12
- number: number;
13
- }
14
-
15
- export default function getAllInsertionsInSeqReads(
16
- seqReads: SeqRead[]
17
- ): InsertionInfo[] {
18
- const allInsertionsInSeqReads: InsertionInfo[] = [];
19
- seqReads.forEach(seqRead => {
20
- // split cigar string at M, D, or I (match, deletion, or insertion), e.g. ["2M", "3I", "39M", "3D"...]
21
- const splitSeqRead = seqRead.cigar.match(/([0-9]*[MDI])/g);
22
-
23
- if (!splitSeqRead) return;
24
-
25
- for (let componentI = 0; componentI < splitSeqRead.length; componentI++) {
26
- if (splitSeqRead[componentI].slice(-1) === "I") {
27
- let bpPosOfInsertion = seqRead.pos;
28
- const numberOfInsertions = Number(
29
- splitSeqRead[componentI].slice(0, -1)
30
- );
31
- for (let i = 0; i < componentI; i++) {
32
- if (splitSeqRead[i].slice(-1) !== "I") {
33
- const previousComponentNumber = Number(
34
- splitSeqRead[i].slice(0, -1)
35
- );
36
- bpPosOfInsertion += previousComponentNumber;
37
- }
38
- }
39
- const insertionInfo = {
40
- // keeping bpPos 1-based
41
- bpPos: bpPosOfInsertion,
42
- number: numberOfInsertions
43
- };
44
- allInsertionsInSeqReads.push(insertionInfo);
45
- }
46
- }
47
- });
48
- // sort insertions by ascending bp pos
49
- const sortedInsertions = allInsertionsInSeqReads.sort((a, b) => {
50
- return a.bpPos - b.bpPos;
51
- });
52
- // combine duplicate or overlapping insertions from seq reads
53
- for (let i = 0; i < sortedInsertions.length - 1; i++) {
54
- if (sortedInsertions[i].bpPos === sortedInsertions[i + 1].bpPos) {
55
- if (sortedInsertions[i].number > sortedInsertions[i + 1].number) {
56
- // remove the one with fewer number of gaps from array
57
- sortedInsertions.splice(i + 1, 1);
58
- i--;
59
- } else if (sortedInsertions[i].number < sortedInsertions[i + 1].number) {
60
- sortedInsertions.splice(i, 1);
61
- i--;
62
- } else if (
63
- sortedInsertions[i].number === sortedInsertions[i + 1].number
64
- ) {
65
- sortedInsertions.splice(i, 1);
66
- i--;
67
- }
68
- }
69
- }
70
- // sortedInsertions is an array of objects [{bpPos: bp pos of insertion, number: # of insertions}, {bpPos, number}, ...]
71
- return sortedInsertions;
72
- }
73
-
74
- // function getAllInsertionsInSeqReads(seqReads) {
75
- // let allInsertionBpPosInSeqReads = [];
76
- // seqReads.forEach(seqRead => {
77
- // // split cigar string at M, D, or I (match, deletion, or insertion)
78
- // // ["2M", "3I", "39M", "3D"...]
79
- // const splitSeqRead = seqRead.cigar.match(/([0-9]*[MDI])/g)
80
- // splitSeqRead.forEach(component => {
81
- // // keeping bpPos 1-based
82
- // let bpPosOfInsertion = seqRead.pos;
83
- // if (component.slice(-1) === "I") {
84
- // const numberOfInsertions = Number(component.slice(0, -1));
85
- // const componentIndex = splitSeqRead.indexOf(component);
86
- // for (let i = 0; i < componentIndex; i++) {
87
- // const previousComponentNumber = Number(splitSeqRead[i].slice(0, -1));
88
- // bpPosOfInsertion += previousComponentNumber;
89
- // }
90
- // for (let i = 1; i <= numberOfInsertions; i++) {
91
- // allInsertionBpPosInSeqReads.push(bpPosOfInsertion - i);
92
- // }
93
- // }
94
- // });
95
- // });
96
- // // allInsertionBpPosInSeqReads should be an array of bp pos [6, 15, 9, 2, 23...]
97
- // // remove duplicates, organize in ascending order
98
- // const uniqueInsertionBpPos = [...new Set(allInsertionBpPosInSeqReads)].sort(function(a, b) { return a - b });
99
- // return uniqueInsertionBpPos;
100
- // }