@teselagen/bio-parsers 0.4.27 → 0.4.29-beta.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/package.json CHANGED
@@ -1,19 +1,17 @@
1
1
  {
2
2
  "name": "@teselagen/bio-parsers",
3
- "version": "0.4.27",
3
+ "version": "0.4.29-beta.1",
4
4
  "type": "module",
5
5
  "dependencies": {
6
6
  "@gmod/gff": "^1.2.1",
7
- "buffer": "5.7.1",
8
7
  "bufferpack": "^0.0.6",
9
8
  "color": "3.2.1",
10
9
  "fast-xml-parser": "^4.2.5",
11
10
  "fflate": "^0.8.0",
12
11
  "lodash-es": "^4.17.21",
13
- "string_decoder": "^1.3.0",
14
12
  "validate.io-nonnegative-integer-array": "^1.0.1",
15
- "@teselagen/sequence-utils": "0.3.30",
16
- "@teselagen/range-utils": "0.3.13"
13
+ "@teselagen/sequence-utils": "0.3.32-beta.1",
14
+ "@teselagen/range-utils": "0.3.14-beta.1"
17
15
  },
18
16
  "exports": {
19
17
  ".": {
package/src/ab1ToJson.js CHANGED
@@ -121,50 +121,45 @@ const tagDict = {
121
121
  colorDataC: { tagName: "DATA", tagNum: 12, typeToReturn: "getShort" }
122
122
  };
123
123
 
124
- const correctionAmount = 3;
125
124
  // tnr: this function takes in chromData which has 4 traces and a basePos (which describes where in the trace the base call lands)
126
125
  // It "normalizes" that data into a baseTraces array so that each base has its own set of that data (having a per-base trace makes insertion/deletion/copy/paste actions all easier)
127
126
  function convertBasePosTraceToPerBpTrace(chromData) {
128
- const { basePos, aTrace } = chromData;
129
- const traceLength = aTrace.length;
130
- let startPos = 0;
131
- let nextBasePos = basePos[1];
132
- let endPos;
133
- function setEndPos() {
134
- if (nextBasePos) {
135
- endPos = startPos + Math.ceil((nextBasePos - startPos) / 2);
136
- } else {
137
- endPos = traceLength;
138
- }
127
+ const { basePos } = chromData;
128
+
129
+ const peakEdges = [0];
130
+ for (let i = 0; i < basePos.length - 1; i++) {
131
+ peakEdges.push(Math.ceil((basePos[i] + basePos[i + 1]) / 2));
139
132
  }
140
- setEndPos();
133
+ peakEdges.push(chromData.aTrace.length);
134
+
135
+ // Trim edges of trace so that the first and last peak traces are roughly symmetric
136
+ // around the peak
137
+ const firstBinWidth = peakEdges[1] - peakEdges[0];
138
+ const secondBinWidth = peakEdges[2] - peakEdges[1];
139
+ if (firstBinWidth > secondBinWidth) {
140
+ peakEdges[0] = peakEdges[1] - secondBinWidth;
141
+ }
142
+
143
+ const lastBinWidth =
144
+ peakEdges[peakEdges.length - 1] - peakEdges[peakEdges.length - 2];
145
+ const secondLastBinWidth =
146
+ peakEdges[peakEdges.length - 2] - peakEdges[peakEdges.length - 3];
147
+ if (lastBinWidth > secondLastBinWidth) {
148
+ peakEdges[peakEdges.length - 1] =
149
+ peakEdges[peakEdges.length - 2] + secondLastBinWidth + 1;
150
+ }
151
+
141
152
  const baseTraces = [];
142
- for (let i = 0; i < basePos.length; i++) {
153
+ for (let i = 0; i < peakEdges.length - 1; i++) {
154
+ const start = peakEdges[i];
155
+ const end = peakEdges[i + 1];
143
156
  const tracesForType = {
144
- aTrace: [],
145
- tTrace: [],
146
- gTrace: [],
147
- cTrace: []
157
+ aTrace: chromData.aTrace.slice(start, end),
158
+ tTrace: chromData.tTrace.slice(start, end),
159
+ gTrace: chromData.gTrace.slice(start, end),
160
+ cTrace: chromData.cTrace.slice(start, end)
148
161
  };
149
- baseTraces[i] = tracesForType;
150
- [
151
- "aTrace",
152
- "tTrace",
153
- "gTrace",
154
- "cTrace"
155
- // eslint-disable-next-line no-loop-func
156
- ].forEach(type => {
157
- const traceForType = tracesForType[type];
158
- const traceData = chromData[type];
159
- for (let j = startPos; j < endPos + correctionAmount; j++) {
160
- traceForType.push(traceData[j] || 0);
161
- }
162
- });
163
- if (i !== basePos.length - 1) {
164
- startPos = endPos + correctionAmount;
165
- nextBasePos = basePos[i + 2];
166
- setEndPos();
167
- }
162
+ baseTraces.push(tracesForType);
168
163
  }
169
164
 
170
165
  return {
package/src/anyToJson.js CHANGED
@@ -10,7 +10,7 @@ import { tidyUpSequenceData } from "@teselagen/sequence-utils";
10
10
  import geneiousXmlToJson from "./geneiousXmlToJson";
11
11
  import jbeiXmlToJson from "./jbeiXmlToJson";
12
12
  import { unzipSync } from "fflate";
13
-
13
+ import fastqToJson from "./fastqToJson";
14
14
  /**
15
15
  * takes in file content string and its file name and determines what parser it needs to be sent to.
16
16
  * The file is parsed to our old JSON schema and after it goes through an intermediate step where we convert that json to our new schema
@@ -74,6 +74,9 @@ async function anyToJson(fileContentStringOrFileObj, options) {
74
74
  if (/^(fasta|fas|fa|fna|ffn|faa)$/.test(ext)) {
75
75
  // FASTA
76
76
  return fastaToJson(fileContentString, options);
77
+ } else if (/^(fastq)$/.test(ext)) {
78
+ // FASTQ
79
+ return fastqToJson(fileContentString, options);
77
80
  } else if (/^(gb|gbk)$/.test(ext)) {
78
81
  // GENBANK
79
82
  return genbankToJson(fileContentString, options);
@@ -182,8 +185,8 @@ function getUtf8StringFromFile(file, { emulateBrowser } = {}) {
182
185
  return Buffer.isBuffer(file)
183
186
  ? file.toString("utf-8")
184
187
  : Buffer.isBuffer(file.buffer)
185
- ? file.buffer.toString("utf-8")
186
- : file;
188
+ ? file.buffer.toString("utf-8")
189
+ : file;
187
190
  }
188
191
  const reader = new window.FileReader();
189
192
  reader.readAsText(file, "UTF-8");
@@ -204,8 +207,8 @@ function getUint8ArrayFromFile(file, { emulateBrowser } = {}) {
204
207
  return Buffer.isBuffer(file)
205
208
  ? new Uint8Array(file)
206
209
  : Buffer.isBuffer(file.buffer)
207
- ? new Uint8Array(file.buffer)
208
- : file;
210
+ ? new Uint8Array(file.buffer)
211
+ : file;
209
212
  }
210
213
  const reader = new window.FileReader();
211
214
  // reader.readAsText(file, "UTF-8");
@@ -62,14 +62,7 @@ function fastaToJson(fileString, options = {}) {
62
62
  if (!result) {
63
63
  result = createInitialSequence(options);
64
64
  }
65
- if ("*" === line[line.length - 1]) {
66
- //some resultArray are ended with an asterisk
67
- parseSequenceLine(line.substring(0, line.length - 1));
68
- resultArray.push(result);
69
- result = null;
70
- } else {
71
- parseSequenceLine(line);
72
- }
65
+ parseSequenceLine(line);
73
66
  }
74
67
  if (options && options.parseFastaAsCircular) {
75
68
  result.parsedSequence.circular = true;
@@ -0,0 +1,80 @@
1
+ import { convertBasePosTraceToPerBpTrace } from "./ab1ToJson.js";
2
+ import splitStringIntoLines from "./utils/splitStringIntoLines.js";
3
+ import validateSequenceArray from "./utils/validateSequenceArray";
4
+
5
+ /**
6
+ * parses a fasta file that may or may not contain multiple resultArray
7
+ * @param {[string]} fileString [string respresentation of file contents]
8
+ * @param {[function]} onFileParsed [callback for a parsed sequence]
9
+ * @author Joshua P Nixon
10
+ */
11
+
12
+ function validateFastqSet(header, sequence, plusSign, quality) {
13
+ if (header[0] !== "@") {
14
+ throw new Error("Invalid FASTQ format: header must start with @");
15
+ }
16
+ if (plusSign !== "+") {
17
+ throw new Error("Invalid FASTQ format: plus sign must be +");
18
+ }
19
+ if (quality.length !== sequence.length) {
20
+ throw new Error(
21
+ "Invalid FASTQ format: quality and sequence must be the same length"
22
+ );
23
+ }
24
+ if (quality.split("").some(char => char < "!")) {
25
+ throw new Error("Invalid FASTQ format: quality must be at least !");
26
+ }
27
+ if (!/^[acgt]+$/i.test(sequence)) {
28
+ throw new Error("Invalid FASTQ format: sequence must only contain ACGT");
29
+ }
30
+ }
31
+
32
+ function fastqToJson(fileString, options = {}) {
33
+ options.isProtein = false;
34
+
35
+ const lines = splitStringIntoLines(fileString);
36
+ const resultArray = [];
37
+ // We could check if the number of lines is divisible by 4,
38
+ // but maybe the file is not properly terminated.
39
+ for (let i = 0; i + 3 < lines.length; i += 4) {
40
+ const header = lines[i];
41
+ const sequence = lines[i + 1];
42
+ const plusSign = lines[i + 2];
43
+ const quality = lines[i + 3];
44
+
45
+ validateFastqSet(header, sequence, plusSign, quality);
46
+
47
+ const newChromatogramData = convertBasePosTraceToPerBpTrace({
48
+ aTrace: [],
49
+ tTrace: [],
50
+ gTrace: [],
51
+ cTrace: [],
52
+ basePos: [],
53
+ baseCalls: sequence.split(""),
54
+ baseTraces: sequence.split("").map(() => ({
55
+ aTrace: [],
56
+ tTrace: [],
57
+ gTrace: [],
58
+ cTrace: []
59
+ })),
60
+ qualNums: quality.split("").map(char => char.charCodeAt(0) - 33)
61
+ });
62
+
63
+ const result = {
64
+ success: true,
65
+ messages: [],
66
+ parsedSequence: {
67
+ name: header.slice(1),
68
+ sequence: sequence,
69
+ circular: false,
70
+ description: "",
71
+ chromatogramData: newChromatogramData
72
+ }
73
+ };
74
+ resultArray.push(result);
75
+ }
76
+
77
+ return validateSequenceArray(resultArray, options);
78
+ }
79
+
80
+ export default fastqToJson;
@@ -3,7 +3,7 @@
3
3
 
4
4
  import bufferpack from "bufferpack";
5
5
  import { StringDecoder } from "string_decoder";
6
- import buffer from "buffer";
6
+ import { Buffer } from "buffer";
7
7
 
8
8
  import getArrayBufferFromFile from "./utils/getArrayBufferFromFile";
9
9
  import createInitialSequence from "./utils/createInitialSequence";
@@ -13,8 +13,6 @@ import { get } from "lodash-es";
13
13
  import { XMLParser } from "fast-xml-parser";
14
14
  import extractFileExtension from "./utils/extractFileExtension";
15
15
 
16
- const Buffer = buffer.Buffer;
17
-
18
16
  async function snapgeneToJson(fileObj, options = {}) {
19
17
  try {
20
18
  const returnVal = createInitialSequence(options);
@@ -1,5 +1,2 @@
1
1
  export function unmangleUrls(str: any): any;
2
- export function mangleOrStripUrls(str: any, { mangleUrls, doNotMangleOrStripUrls }?: {
3
- mangleUrls: any;
4
- doNotMangleOrStripUrls: any;
5
- }): any;
2
+ export function mangleOrStripUrls(str: any, { mangleUrls, doNotMangleOrStripUrls }?: {}): any;