npm - @teselagen/bio-parsers - Versions diffs - 0.4.27 → 0.4.29-beta.1 - Mend

@teselagen/bio-parsers 0.4.27 → 0.4.29-beta.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (11) hide show

package/package.json CHANGED Viewed

@@ -1,19 +1,17 @@
 {
   "name": "@teselagen/bio-parsers",
-  "version": "0.4.27",
+  "version": "0.4.29-beta.1",
   "type": "module",
   "dependencies": {
     "@gmod/gff": "^1.2.1",
-    "buffer": "5.7.1",
     "bufferpack": "^0.0.6",
     "color": "3.2.1",
     "fast-xml-parser": "^4.2.5",
     "fflate": "^0.8.0",
     "lodash-es": "^4.17.21",
-    "string_decoder": "^1.3.0",
     "validate.io-nonnegative-integer-array": "^1.0.1",
-    "@teselagen/sequence-utils": "0.3.30",
-    "@teselagen/range-utils": "0.3.13"
+    "@teselagen/sequence-utils": "0.3.32-beta.1",
+    "@teselagen/range-utils": "0.3.14-beta.1"
   },
   "exports": {
     ".": {

package/src/ab1ToJson.js CHANGED Viewed

@@ -121,50 +121,45 @@ const tagDict = {
   colorDataC: { tagName: "DATA", tagNum: 12, typeToReturn: "getShort" }
 };
-const correctionAmount = 3;
 // tnr: this function takes in chromData which has 4 traces and a basePos (which describes where in the trace the base call lands)
 // It "normalizes" that data into a baseTraces array so that each base has its own set of that data (having a per-base trace makes insertion/deletion/copy/paste actions all easier)
 function convertBasePosTraceToPerBpTrace(chromData) {
-  const { basePos, aTrace } = chromData;
-  const traceLength = aTrace.length;
-  let startPos = 0;
-  let nextBasePos = basePos[1];
-  let endPos;
-  function setEndPos() {
-    if (nextBasePos) {
-      endPos = startPos + Math.ceil((nextBasePos - startPos) / 2);
-    } else {
-      endPos = traceLength;
-    }
+  const { basePos } = chromData;
+  const peakEdges = [0];
+  for (let i = 0; i < basePos.length - 1; i++) {
+    peakEdges.push(Math.ceil((basePos[i] + basePos[i + 1]) / 2));
   }
-  setEndPos();
+  peakEdges.push(chromData.aTrace.length);
+  // Trim edges of trace so that the first and last peak traces are roughly symmetric
+  // around the peak
+  const firstBinWidth = peakEdges[1] - peakEdges[0];
+  const secondBinWidth = peakEdges[2] - peakEdges[1];
+  if (firstBinWidth > secondBinWidth) {
+    peakEdges[0] = peakEdges[1] - secondBinWidth;
+  }
+  const lastBinWidth =
+    peakEdges[peakEdges.length - 1] - peakEdges[peakEdges.length - 2];
+  const secondLastBinWidth =
+    peakEdges[peakEdges.length - 2] - peakEdges[peakEdges.length - 3];
+  if (lastBinWidth > secondLastBinWidth) {
+    peakEdges[peakEdges.length - 1] =
+      peakEdges[peakEdges.length - 2] + secondLastBinWidth + 1;
+  }
   const baseTraces = [];
-  for (let i = 0; i < basePos.length; i++) {
+  for (let i = 0; i < peakEdges.length - 1; i++) {
+    const start = peakEdges[i];
+    const end = peakEdges[i + 1];
     const tracesForType = {
-      aTrace: [],
-      tTrace: [],
-      gTrace: [],
-      cTrace: []
+      aTrace: chromData.aTrace.slice(start, end),
+      tTrace: chromData.tTrace.slice(start, end),
+      gTrace: chromData.gTrace.slice(start, end),
+      cTrace: chromData.cTrace.slice(start, end)
     };
-    baseTraces[i] = tracesForType;
-    [
-      "aTrace",
-      "tTrace",
-      "gTrace",
-      "cTrace"
-      // eslint-disable-next-line no-loop-func
-    ].forEach(type => {
-      const traceForType = tracesForType[type];
-      const traceData = chromData[type];
-      for (let j = startPos; j < endPos + correctionAmount; j++) {
-        traceForType.push(traceData[j] || 0);
-      }
-    });
-    if (i !== basePos.length - 1) {
-      startPos = endPos + correctionAmount;
-      nextBasePos = basePos[i + 2];
-      setEndPos();
-    }
+    baseTraces.push(tracesForType);
   }
   return {

package/src/anyToJson.js CHANGED Viewed

@@ -10,7 +10,7 @@ import { tidyUpSequenceData } from "@teselagen/sequence-utils";
 import geneiousXmlToJson from "./geneiousXmlToJson";
 import jbeiXmlToJson from "./jbeiXmlToJson";
 import { unzipSync } from "fflate";
+import fastqToJson from "./fastqToJson";
 /**
  * takes in file content string and its file name and determines what parser it needs to be sent to.
  * The file is parsed to our old JSON schema and after it goes through an intermediate step where we convert that json to our new schema
@@ -74,6 +74,9 @@ async function anyToJson(fileContentStringOrFileObj, options) {
   if (/^(fasta|fas|fa|fna|ffn|faa)$/.test(ext)) {
     // FASTA
     return fastaToJson(fileContentString, options);
+  } else if (/^(fastq)$/.test(ext)) {
+    // FASTQ
+    return fastqToJson(fileContentString, options);
   } else if (/^(gb|gbk)$/.test(ext)) {
     // GENBANK
     return genbankToJson(fileContentString, options);
@@ -182,8 +185,8 @@ function getUtf8StringFromFile(file, { emulateBrowser } = {}) {
     return Buffer.isBuffer(file)
       ? file.toString("utf-8")
       : Buffer.isBuffer(file.buffer)
-      ? file.buffer.toString("utf-8")
-      : file;
+        ? file.buffer.toString("utf-8")
+        : file;
   }
   const reader = new window.FileReader();
   reader.readAsText(file, "UTF-8");
@@ -204,8 +207,8 @@ function getUint8ArrayFromFile(file, { emulateBrowser } = {}) {
     return Buffer.isBuffer(file)
       ? new Uint8Array(file)
       : Buffer.isBuffer(file.buffer)
-      ? new Uint8Array(file.buffer)
-      : file;
+        ? new Uint8Array(file.buffer)
+        : file;
   }
   const reader = new window.FileReader();
   // reader.readAsText(file, "UTF-8");

package/src/fastaToJson.js CHANGED Viewed

@@ -62,14 +62,7 @@ function fastaToJson(fileString, options = {}) {
       if (!result) {
         result = createInitialSequence(options);
       }
-      if ("*" === line[line.length - 1]) {
-        //some resultArray are ended with an asterisk
-        parseSequenceLine(line.substring(0, line.length - 1));
-        resultArray.push(result);
-        result = null;
-      } else {
-        parseSequenceLine(line);
-      }
+      parseSequenceLine(line);
     }
     if (options && options.parseFastaAsCircular) {
       result.parsedSequence.circular = true;

package/src/fastqToJson.js ADDED Viewed

@@ -0,0 +1,80 @@
+import { convertBasePosTraceToPerBpTrace } from "./ab1ToJson.js";
+import splitStringIntoLines from "./utils/splitStringIntoLines.js";
+import validateSequenceArray from "./utils/validateSequenceArray";
+/**
+ * parses a fasta file that may or may not contain multiple resultArray
+ * @param  {[string]} fileString   [string respresentation of file contents]
+ * @param  {[function]} onFileParsed [callback for a parsed sequence]
+ * @author Joshua P Nixon
+ */
+function validateFastqSet(header, sequence, plusSign, quality) {
+  if (header[0] !== "@") {
+    throw new Error("Invalid FASTQ format: header must start with @");
+  }
+  if (plusSign !== "+") {
+    throw new Error("Invalid FASTQ format: plus sign must be +");
+  }
+  if (quality.length !== sequence.length) {
+    throw new Error(
+      "Invalid FASTQ format: quality and sequence must be the same length"
+    );
+  }
+  if (quality.split("").some(char => char < "!")) {
+    throw new Error("Invalid FASTQ format: quality must be at least !");
+  }
+  if (!/^[acgt]+$/i.test(sequence)) {
+    throw new Error("Invalid FASTQ format: sequence must only contain ACGT");
+  }
+}
+function fastqToJson(fileString, options = {}) {
+  options.isProtein = false;
+  const lines = splitStringIntoLines(fileString);
+  const resultArray = [];
+  // We could check if the number of lines is divisible by 4,
+  // but maybe the file is not properly terminated.
+  for (let i = 0; i + 3 < lines.length; i += 4) {
+    const header = lines[i];
+    const sequence = lines[i + 1];
+    const plusSign = lines[i + 2];
+    const quality = lines[i + 3];
+    validateFastqSet(header, sequence, plusSign, quality);
+    const newChromatogramData = convertBasePosTraceToPerBpTrace({
+      aTrace: [],
+      tTrace: [],
+      gTrace: [],
+      cTrace: [],
+      basePos: [],
+      baseCalls: sequence.split(""),
+      baseTraces: sequence.split("").map(() => ({
+        aTrace: [],
+        tTrace: [],
+        gTrace: [],
+        cTrace: []
+      })),
+      qualNums: quality.split("").map(char => char.charCodeAt(0) - 33)
+    });
+    const result = {
+      success: true,
+      messages: [],
+      parsedSequence: {
+        name: header.slice(1),
+        sequence: sequence,
+        circular: false,
+        description: "",
+        chromatogramData: newChromatogramData
+      }
+    };
+    resultArray.push(result);
+  }
+  return validateSequenceArray(resultArray, options);
+}
+export default fastqToJson;

package/src/snapgeneToJson.js CHANGED Viewed

@@ -3,7 +3,7 @@
 import bufferpack from "bufferpack";
 import { StringDecoder } from "string_decoder";
-import buffer from "buffer";
+import { Buffer } from "buffer";
 import getArrayBufferFromFile from "./utils/getArrayBufferFromFile";
 import createInitialSequence from "./utils/createInitialSequence";
@@ -13,8 +13,6 @@ import { get } from "lodash-es";
 import { XMLParser } from "fast-xml-parser";
 import extractFileExtension from "./utils/extractFileExtension";
-const Buffer = buffer.Buffer;
 async function snapgeneToJson(fileObj, options = {}) {
   try {
     const returnVal = createInitialSequence(options);

package/utils/unmangleUrls.d.ts CHANGED Viewed

@@ -1,5 +1,2 @@
 export function unmangleUrls(str: any): any;
-export function mangleOrStripUrls(str: any, { mangleUrls, doNotMangleOrStripUrls }?: {
-    mangleUrls: any;
-    doNotMangleOrStripUrls: any;
-}): any;
+export function mangleOrStripUrls(str: any, { mangleUrls, doNotMangleOrStripUrls }?: {}): any;