@teselagen/bio-parsers 0.4.26 → 0.4.28
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/fastqToJson.d.ts +2 -0
- package/index.cjs +10359 -10407
- package/index.js +10359 -10407
- package/index.umd.cjs +10592 -10640
- package/package.json +3 -3
- package/src/ab1ToJson.js +32 -37
- package/src/anyToJson.js +8 -5
- package/src/fastaToJson.js +1 -8
- package/src/fastqToJson.js +80 -0
- package/utils/unmangleUrls.d.ts +1 -4
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@teselagen/bio-parsers",
|
|
3
|
-
"version": "0.4.
|
|
3
|
+
"version": "0.4.28",
|
|
4
4
|
"type": "module",
|
|
5
5
|
"dependencies": {
|
|
6
6
|
"@gmod/gff": "^1.2.1",
|
|
@@ -12,8 +12,8 @@
|
|
|
12
12
|
"lodash-es": "^4.17.21",
|
|
13
13
|
"string_decoder": "^1.3.0",
|
|
14
14
|
"validate.io-nonnegative-integer-array": "^1.0.1",
|
|
15
|
-
"@teselagen/sequence-utils": "0.3.
|
|
16
|
-
"@teselagen/range-utils": "0.3.
|
|
15
|
+
"@teselagen/sequence-utils": "0.3.31",
|
|
16
|
+
"@teselagen/range-utils": "0.3.13"
|
|
17
17
|
},
|
|
18
18
|
"exports": {
|
|
19
19
|
".": {
|
package/src/ab1ToJson.js
CHANGED
|
@@ -121,50 +121,45 @@ const tagDict = {
|
|
|
121
121
|
colorDataC: { tagName: "DATA", tagNum: 12, typeToReturn: "getShort" }
|
|
122
122
|
};
|
|
123
123
|
|
|
124
|
-
const correctionAmount = 3;
|
|
125
124
|
// tnr: this function takes in chromData which has 4 traces and a basePos (which describes where in the trace the base call lands)
|
|
126
125
|
// It "normalizes" that data into a baseTraces array so that each base has its own set of that data (having a per-base trace makes insertion/deletion/copy/paste actions all easier)
|
|
127
126
|
function convertBasePosTraceToPerBpTrace(chromData) {
|
|
128
|
-
const { basePos
|
|
129
|
-
|
|
130
|
-
|
|
131
|
-
let
|
|
132
|
-
|
|
133
|
-
function setEndPos() {
|
|
134
|
-
if (nextBasePos) {
|
|
135
|
-
endPos = startPos + Math.ceil((nextBasePos - startPos) / 2);
|
|
136
|
-
} else {
|
|
137
|
-
endPos = traceLength;
|
|
138
|
-
}
|
|
127
|
+
const { basePos } = chromData;
|
|
128
|
+
|
|
129
|
+
const peakEdges = [0];
|
|
130
|
+
for (let i = 0; i < basePos.length - 1; i++) {
|
|
131
|
+
peakEdges.push(Math.ceil((basePos[i] + basePos[i + 1]) / 2));
|
|
139
132
|
}
|
|
140
|
-
|
|
133
|
+
peakEdges.push(chromData.aTrace.length);
|
|
134
|
+
|
|
135
|
+
// Trim edges of trace so that the first and last peak traces are roughly symmetric
|
|
136
|
+
// around the peak
|
|
137
|
+
const firstBinWidth = peakEdges[1] - peakEdges[0];
|
|
138
|
+
const secondBinWidth = peakEdges[2] - peakEdges[1];
|
|
139
|
+
if (firstBinWidth > secondBinWidth) {
|
|
140
|
+
peakEdges[0] = peakEdges[1] - secondBinWidth;
|
|
141
|
+
}
|
|
142
|
+
|
|
143
|
+
const lastBinWidth =
|
|
144
|
+
peakEdges[peakEdges.length - 1] - peakEdges[peakEdges.length - 2];
|
|
145
|
+
const secondLastBinWidth =
|
|
146
|
+
peakEdges[peakEdges.length - 2] - peakEdges[peakEdges.length - 3];
|
|
147
|
+
if (lastBinWidth > secondLastBinWidth) {
|
|
148
|
+
peakEdges[peakEdges.length - 1] =
|
|
149
|
+
peakEdges[peakEdges.length - 2] + secondLastBinWidth + 1;
|
|
150
|
+
}
|
|
151
|
+
|
|
141
152
|
const baseTraces = [];
|
|
142
|
-
for (let i = 0; i <
|
|
153
|
+
for (let i = 0; i < peakEdges.length - 1; i++) {
|
|
154
|
+
const start = peakEdges[i];
|
|
155
|
+
const end = peakEdges[i + 1];
|
|
143
156
|
const tracesForType = {
|
|
144
|
-
aTrace:
|
|
145
|
-
tTrace:
|
|
146
|
-
gTrace:
|
|
147
|
-
cTrace:
|
|
157
|
+
aTrace: chromData.aTrace.slice(start, end),
|
|
158
|
+
tTrace: chromData.tTrace.slice(start, end),
|
|
159
|
+
gTrace: chromData.gTrace.slice(start, end),
|
|
160
|
+
cTrace: chromData.cTrace.slice(start, end)
|
|
148
161
|
};
|
|
149
|
-
baseTraces
|
|
150
|
-
[
|
|
151
|
-
"aTrace",
|
|
152
|
-
"tTrace",
|
|
153
|
-
"gTrace",
|
|
154
|
-
"cTrace"
|
|
155
|
-
// eslint-disable-next-line no-loop-func
|
|
156
|
-
].forEach(type => {
|
|
157
|
-
const traceForType = tracesForType[type];
|
|
158
|
-
const traceData = chromData[type];
|
|
159
|
-
for (let j = startPos; j < endPos + correctionAmount; j++) {
|
|
160
|
-
traceForType.push(traceData[j] || 0);
|
|
161
|
-
}
|
|
162
|
-
});
|
|
163
|
-
if (i !== basePos.length - 1) {
|
|
164
|
-
startPos = endPos + correctionAmount;
|
|
165
|
-
nextBasePos = basePos[i + 2];
|
|
166
|
-
setEndPos();
|
|
167
|
-
}
|
|
162
|
+
baseTraces.push(tracesForType);
|
|
168
163
|
}
|
|
169
164
|
|
|
170
165
|
return {
|
package/src/anyToJson.js
CHANGED
|
@@ -10,7 +10,7 @@ import { tidyUpSequenceData } from "@teselagen/sequence-utils";
|
|
|
10
10
|
import geneiousXmlToJson from "./geneiousXmlToJson";
|
|
11
11
|
import jbeiXmlToJson from "./jbeiXmlToJson";
|
|
12
12
|
import { unzipSync } from "fflate";
|
|
13
|
-
|
|
13
|
+
import fastqToJson from "./fastqToJson";
|
|
14
14
|
/**
|
|
15
15
|
* takes in file content string and its file name and determines what parser it needs to be sent to.
|
|
16
16
|
* The file is parsed to our old JSON schema and after it goes through an intermediate step where we convert that json to our new schema
|
|
@@ -74,6 +74,9 @@ async function anyToJson(fileContentStringOrFileObj, options) {
|
|
|
74
74
|
if (/^(fasta|fas|fa|fna|ffn|faa)$/.test(ext)) {
|
|
75
75
|
// FASTA
|
|
76
76
|
return fastaToJson(fileContentString, options);
|
|
77
|
+
} else if (/^(fastq)$/.test(ext)) {
|
|
78
|
+
// FASTQ
|
|
79
|
+
return fastqToJson(fileContentString, options);
|
|
77
80
|
} else if (/^(gb|gbk)$/.test(ext)) {
|
|
78
81
|
// GENBANK
|
|
79
82
|
return genbankToJson(fileContentString, options);
|
|
@@ -182,8 +185,8 @@ function getUtf8StringFromFile(file, { emulateBrowser } = {}) {
|
|
|
182
185
|
return Buffer.isBuffer(file)
|
|
183
186
|
? file.toString("utf-8")
|
|
184
187
|
: Buffer.isBuffer(file.buffer)
|
|
185
|
-
|
|
186
|
-
|
|
188
|
+
? file.buffer.toString("utf-8")
|
|
189
|
+
: file;
|
|
187
190
|
}
|
|
188
191
|
const reader = new window.FileReader();
|
|
189
192
|
reader.readAsText(file, "UTF-8");
|
|
@@ -204,8 +207,8 @@ function getUint8ArrayFromFile(file, { emulateBrowser } = {}) {
|
|
|
204
207
|
return Buffer.isBuffer(file)
|
|
205
208
|
? new Uint8Array(file)
|
|
206
209
|
: Buffer.isBuffer(file.buffer)
|
|
207
|
-
|
|
208
|
-
|
|
210
|
+
? new Uint8Array(file.buffer)
|
|
211
|
+
: file;
|
|
209
212
|
}
|
|
210
213
|
const reader = new window.FileReader();
|
|
211
214
|
// reader.readAsText(file, "UTF-8");
|
package/src/fastaToJson.js
CHANGED
|
@@ -62,14 +62,7 @@ function fastaToJson(fileString, options = {}) {
|
|
|
62
62
|
if (!result) {
|
|
63
63
|
result = createInitialSequence(options);
|
|
64
64
|
}
|
|
65
|
-
|
|
66
|
-
//some resultArray are ended with an asterisk
|
|
67
|
-
parseSequenceLine(line.substring(0, line.length - 1));
|
|
68
|
-
resultArray.push(result);
|
|
69
|
-
result = null;
|
|
70
|
-
} else {
|
|
71
|
-
parseSequenceLine(line);
|
|
72
|
-
}
|
|
65
|
+
parseSequenceLine(line);
|
|
73
66
|
}
|
|
74
67
|
if (options && options.parseFastaAsCircular) {
|
|
75
68
|
result.parsedSequence.circular = true;
|
|
@@ -0,0 +1,80 @@
|
|
|
1
|
+
import { convertBasePosTraceToPerBpTrace } from "./ab1ToJson.js";
|
|
2
|
+
import splitStringIntoLines from "./utils/splitStringIntoLines.js";
|
|
3
|
+
import validateSequenceArray from "./utils/validateSequenceArray";
|
|
4
|
+
|
|
5
|
+
/**
|
|
6
|
+
* parses a fasta file that may or may not contain multiple resultArray
|
|
7
|
+
* @param {[string]} fileString [string respresentation of file contents]
|
|
8
|
+
* @param {[function]} onFileParsed [callback for a parsed sequence]
|
|
9
|
+
* @author Joshua P Nixon
|
|
10
|
+
*/
|
|
11
|
+
|
|
12
|
+
function validateFastqSet(header, sequence, plusSign, quality) {
|
|
13
|
+
if (header[0] !== "@") {
|
|
14
|
+
throw new Error("Invalid FASTQ format: header must start with @");
|
|
15
|
+
}
|
|
16
|
+
if (plusSign !== "+") {
|
|
17
|
+
throw new Error("Invalid FASTQ format: plus sign must be +");
|
|
18
|
+
}
|
|
19
|
+
if (quality.length !== sequence.length) {
|
|
20
|
+
throw new Error(
|
|
21
|
+
"Invalid FASTQ format: quality and sequence must be the same length"
|
|
22
|
+
);
|
|
23
|
+
}
|
|
24
|
+
if (quality.split("").some(char => char < "!")) {
|
|
25
|
+
throw new Error("Invalid FASTQ format: quality must be at least !");
|
|
26
|
+
}
|
|
27
|
+
if (!/^[acgt]+$/i.test(sequence)) {
|
|
28
|
+
throw new Error("Invalid FASTQ format: sequence must only contain ACGT");
|
|
29
|
+
}
|
|
30
|
+
}
|
|
31
|
+
|
|
32
|
+
function fastqToJson(fileString, options = {}) {
|
|
33
|
+
options.isProtein = false;
|
|
34
|
+
|
|
35
|
+
const lines = splitStringIntoLines(fileString);
|
|
36
|
+
const resultArray = [];
|
|
37
|
+
// We could check if the number of lines is divisible by 4,
|
|
38
|
+
// but maybe the file is not properly terminated.
|
|
39
|
+
for (let i = 0; i + 3 < lines.length; i += 4) {
|
|
40
|
+
const header = lines[i];
|
|
41
|
+
const sequence = lines[i + 1];
|
|
42
|
+
const plusSign = lines[i + 2];
|
|
43
|
+
const quality = lines[i + 3];
|
|
44
|
+
|
|
45
|
+
validateFastqSet(header, sequence, plusSign, quality);
|
|
46
|
+
|
|
47
|
+
const newChromatogramData = convertBasePosTraceToPerBpTrace({
|
|
48
|
+
aTrace: [],
|
|
49
|
+
tTrace: [],
|
|
50
|
+
gTrace: [],
|
|
51
|
+
cTrace: [],
|
|
52
|
+
basePos: [],
|
|
53
|
+
baseCalls: sequence.split(""),
|
|
54
|
+
baseTraces: sequence.split("").map(() => ({
|
|
55
|
+
aTrace: [],
|
|
56
|
+
tTrace: [],
|
|
57
|
+
gTrace: [],
|
|
58
|
+
cTrace: []
|
|
59
|
+
})),
|
|
60
|
+
qualNums: quality.split("").map(char => char.charCodeAt(0) - 33)
|
|
61
|
+
});
|
|
62
|
+
|
|
63
|
+
const result = {
|
|
64
|
+
success: true,
|
|
65
|
+
messages: [],
|
|
66
|
+
parsedSequence: {
|
|
67
|
+
name: header.slice(1),
|
|
68
|
+
sequence: sequence,
|
|
69
|
+
circular: false,
|
|
70
|
+
description: "",
|
|
71
|
+
chromatogramData: newChromatogramData
|
|
72
|
+
}
|
|
73
|
+
};
|
|
74
|
+
resultArray.push(result);
|
|
75
|
+
}
|
|
76
|
+
|
|
77
|
+
return validateSequenceArray(resultArray, options);
|
|
78
|
+
}
|
|
79
|
+
|
|
80
|
+
export default fastqToJson;
|
package/utils/unmangleUrls.d.ts
CHANGED
|
@@ -1,5 +1,2 @@
|
|
|
1
1
|
export function unmangleUrls(str: any): any;
|
|
2
|
-
export function mangleOrStripUrls(str: any, { mangleUrls, doNotMangleOrStripUrls }?: {
|
|
3
|
-
mangleUrls: any;
|
|
4
|
-
doNotMangleOrStripUrls: any;
|
|
5
|
-
}): any;
|
|
2
|
+
export function mangleOrStripUrls(str: any, { mangleUrls, doNotMangleOrStripUrls }?: {}): any;
|