@teselagen/bio-parsers 0.1.27 → 0.1.28

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,688 @@
1
+ /* eslint-disable no-var*/
2
+ import { convertAACaretPositionOrRangeToDna } from "@teselagen/sequence-utils";
3
+
4
+ import { gbDivisions, untitledSequenceName } from "./utils/constants";
5
+ import flattenSequenceArray from "./utils/flattenSequenceArray";
6
+ import validateSequenceArray from "./utils/validateSequenceArray";
7
+ import splitStringIntoLines from "./utils/splitStringIntoLines.js";
8
+
9
+ import createInitialSequence from "./utils/createInitialSequence";
10
+
11
+ function genbankToJson(string, options = {}) {
12
+ const {
13
+ inclusive1BasedStart,
14
+ inclusive1BasedEnd,
15
+ //these are also valid options:
16
+ // primersAsFeatures,
17
+ // sequenceTypeFromLocus,
18
+ // isProtein,
19
+ } = options;
20
+
21
+ const resultsArray = [];
22
+ let result;
23
+ let currentFeatureNote;
24
+
25
+ const genbankAnnotationKey = {
26
+ LOCUS_TAG: "LOCUS",
27
+ DEFINITION_TAG: "DEFINITION",
28
+ ACCESSION_TAG: "ACCESSION",
29
+ VERSION_TAG: "VERSION",
30
+ KEYWORDS_TAG: "KEYWORDS",
31
+ //SEGMENT_TAG:"SEGMENT"
32
+ SOURCE_TAG: "SOURCE",
33
+ ORGANISM_TAG: "ORGANISM",
34
+ REFERENCE_TAG: "REFERENCE",
35
+ AUTHORS_TAG: "AUTHORS",
36
+ CONSORTIUM_TAG: "CONSRTM",
37
+ TITLE_TAG: "TITLE",
38
+ JOURNAL_TAG: "JOURNAL",
39
+ PUBMED_TAG: "PUBMED",
40
+ REMARK_TAG: "REMARK",
41
+ COMMENT_TAG: "COMMENT",
42
+ FEATURES_TAG: "FEATURES",
43
+ BASE_COUNT_TAG: "BASE COUNT",
44
+ //CONTIG_TAG: "CONTIG"
45
+ ORIGIN_TAG: "ORIGIN",
46
+ END_SEQUENCE_TAG: "//",
47
+ };
48
+ let hasFoundLocus = false;
49
+ let featureLocationIndentation;
50
+ try {
51
+ const lines = splitStringIntoLines(string);
52
+ let LINETYPE = false;
53
+
54
+ if (lines === null) {
55
+ addMessage("Import Error: Sequence file is empty");
56
+ }
57
+
58
+ lines.some(function (line) {
59
+ if (line === null) {
60
+ return true; //break the some loop
61
+ }
62
+ const key = getLineKey(line);
63
+ const val = getLineVal(line);
64
+ const isKeyRunon = isKeywordRunon(line);
65
+ const isSubKey = isSubKeyword(line);
66
+ const isKey = isKeyword(line);
67
+
68
+ //only set a new LINETYPE in the case that we've encountered a key that warrants it.
69
+ if (key === "LOCUS") {
70
+ LINETYPE = key;
71
+ } else if (key === "REFERENCE") {
72
+ LINETYPE = key;
73
+ } else if (key === "FEATURES") {
74
+ LINETYPE = key;
75
+ } else if (key === "ORIGIN") {
76
+ LINETYPE = key;
77
+ } else if (key === "//") {
78
+ LINETYPE = key;
79
+ } else if (isKey === true) {
80
+ LINETYPE = key;
81
+ }
82
+
83
+ // IGNORE LINES: DO NOT EVEN PROCESS
84
+ if (line.trim() === "" || key === ";") {
85
+ //tnr: don't add the following message because it is not particularly informative
86
+ // addMessage(
87
+ // "Warning: Empty line, or ';' detected. Ignoring line: " +
88
+ // line);
89
+ return false; // go to next line
90
+ }
91
+
92
+ if (!hasFoundLocus && LINETYPE !== genbankAnnotationKey.LOCUS_TAG) {
93
+ // 'Genbank files must start with a LOCUS tag so this must not be a genbank'
94
+ return true; //break the some loop
95
+ }
96
+
97
+ switch (LINETYPE) {
98
+ case genbankAnnotationKey.LOCUS_TAG:
99
+ if (hasFoundLocus) {
100
+ //here we concatenate the locus lines together
101
+ line = hasFoundLocus + line;
102
+ }
103
+ parseLocus(line);
104
+ hasFoundLocus = line;
105
+ break;
106
+ case genbankAnnotationKey.FEATURES_TAG:
107
+ //If no location is specified, exclude feature and return messages
108
+ if (val === "") {
109
+ addMessage(
110
+ "Warning: The feature '" +
111
+ key +
112
+ "'' has no location specified. This line has been ignored: line" +
113
+ line
114
+ );
115
+ break;
116
+ }
117
+ parseFeatures(line, key, val);
118
+ break;
119
+ case genbankAnnotationKey.ORIGIN_TAG:
120
+ parseOrigin(line, key);
121
+ break;
122
+ case genbankAnnotationKey.END_SEQUENCE_TAG:
123
+ endSeq();
124
+ break;
125
+ case genbankAnnotationKey.DEFINITION_TAG:
126
+ line = line.replace(/DEFINITION/, "");
127
+ line = line.trim();
128
+ if (result.parsedSequence) {
129
+ if (result.parsedSequence.definition) {
130
+ result.parsedSequence.definition += " " + line;
131
+ } else {
132
+ result.parsedSequence.definition = line;
133
+ }
134
+ if (result.parsedSequence.description) {
135
+ result.parsedSequence.description += " " + line;
136
+ } else {
137
+ result.parsedSequence.description = line;
138
+ }
139
+ } else {
140
+ throw new Error(
141
+ "no sequence yet created upon which to extract an extra line!"
142
+ );
143
+ }
144
+ break;
145
+ case genbankAnnotationKey.ACCESSION_TAG:
146
+ line = line.replace(/ACCESSION/, "");
147
+ line = line.trim();
148
+ if (result.parsedSequence) {
149
+ result.parsedSequence.accession = line;
150
+ }
151
+ break;
152
+ case genbankAnnotationKey.VERSION_TAG:
153
+ line = line.replace(/VERSION/, "");
154
+ line = line.trim();
155
+ if (result.parsedSequence) {
156
+ result.parsedSequence.version = line;
157
+ }
158
+ break;
159
+ case "COMMENT":
160
+ line = line.replace(/COMMENT/, "");
161
+ line = line.trim();
162
+ if (result.parsedSequence) {
163
+ if (!result.parsedSequence.comments) {
164
+ result.parsedSequence.comments = [];
165
+ }
166
+ if (line.indexOf("teselagen_unique_id:") > -1) {
167
+ //capture the special comment
168
+ result.parsedSequence.teselagen_unique_id = line
169
+ .replace(/ /g, "")
170
+ .replace("teselagen_unique_id:", "");
171
+ } else if (line.indexOf("library:") > -1) {
172
+ result.parsedSequence.library = line
173
+ .replace(/ /g, "")
174
+ .replace("library:", "");
175
+ } else {
176
+ result.parsedSequence.comments.push(line);
177
+ }
178
+ } else {
179
+ throw new Error(
180
+ "no sequence yet created upon which to extract an extra line!"
181
+ );
182
+ }
183
+ break;
184
+ default:
185
+ // FOLLOWING FOR KEYWORDS NOT PREVIOUSLY DEFINED IN CASES
186
+ extractExtraLine(line);
187
+ if (key === "BASE") {
188
+ // do nothing; // BLANK LINES || line with ;;;;;;;;; || "BASE COUNT"
189
+ // console.warn("Parsing GenBank File: This line with BaseCount has been ignored: " + line);
190
+ addMessage(
191
+ "Warning: This BaseCount line has been ignored: " + line
192
+ );
193
+ break;
194
+ } else if (isKey) {
195
+ // REGULAR KEYWORDS (NOT LOCUS/FEATURES/ORIGIN) eg VERSION, ACCESSION, SOURCE, REFERENCE
196
+ // lastObj = parseKeyword(line, gb);
197
+ } else if (isSubKey) {
198
+ // REGULAR SUBKEYWORD, NOT FEATURE eg AUTHOR, ORGANISM
199
+ // tmp = gb.getLastKeyword();
200
+ // lastObj = parseSubKeyword(tmp, line, gb);
201
+ } else if (isKeyRunon) {
202
+ // RUNON LINES FOR NON-FEATURES
203
+ // lastObj.setValue(lastObj.getValue() + Teselagen.StringUtil.rpad("\n"," ",13) + Ext.String.trim(line));
204
+ // lastObj.appendValue(Teselagen.StringUtil.rpad("\n"," ",13) + Ext.String.trim(line), gb);
205
+ } else {
206
+ // console.warn("Parsing GenBank File: This line has been ignored: " + line);
207
+ addMessage("Warning: This line has been ignored: " + line);
208
+ }
209
+ }
210
+ return false;
211
+ });
212
+ } catch (e) {
213
+ //catch any errors and set the result
214
+ console.error("Error trying to parse file as .gb:", e);
215
+ result = {
216
+ success: false,
217
+ messages: ["Import Error: Invalid File"],
218
+ };
219
+ }
220
+
221
+ //catch the case where we've successfully started a sequence and parsed it, but endSeq isn't called correctly
222
+ if (
223
+ !result ||
224
+ (result.success && resultsArray[resultsArray.length - 1] !== result)
225
+ ) {
226
+ //current result isn't in resultsArray yet
227
+ //so we call endSeq here
228
+ endSeq();
229
+ }
230
+ //call the callback
231
+
232
+ //before we call the onFileParsed callback, we need to flatten the sequence, and convert the old sequence data to the new data type
233
+ const results = validateSequenceArray(
234
+ flattenSequenceArray(resultsArray, options),
235
+ options
236
+ );
237
+ // default sequence json has primers at the top level separate from features, e.g. parsedSequence: { primers: [ {}, {} ], features: [ {}, {} ] }
238
+ // if options.primersAsFeatures is set to true, primers are included in features with type set to primer
239
+
240
+ results.forEach((result) => {
241
+ if (result.success) {
242
+ const sequence = result.parsedSequence;
243
+ sequence.features.forEach((feat) => {
244
+ if (feat.type === "primer") {
245
+ feat.type = "primer_bind";
246
+ }
247
+ });
248
+
249
+ if (!options.primersAsFeatures) {
250
+ sequence.primers = sequence.features.filter(
251
+ (feat) => feat.type === "primer_bind"
252
+ );
253
+ sequence.features = sequence.features.filter(
254
+ (feat) => feat.type !== "primer_bind"
255
+ );
256
+ }
257
+ }
258
+ });
259
+
260
+ return results;
261
+
262
+ function endSeq() {
263
+ //do some post processing clean-up
264
+ hasFoundLocus = false;
265
+ postProcessCurSeq();
266
+ //push the result into the resultsArray
267
+ resultsArray.push(result || { success: false });
268
+ }
269
+
270
+ function getCurrentFeature() {
271
+ return result.parsedSequence.features[
272
+ result.parsedSequence.features.length - 1
273
+ ];
274
+ }
275
+
276
+ function addMessage(msg) {
277
+ if (result.messages.indexOf(msg === -1)) {
278
+ return result.messages.push(msg);
279
+ }
280
+ }
281
+
282
+ function postProcessCurSeq() {
283
+ if (result && result.parsedSequence && result.parsedSequence.features) {
284
+ for (let i = 0; i < result.parsedSequence.features.length; i++) {
285
+ result.parsedSequence.features[i] = postProcessGenbankFeature(
286
+ result.parsedSequence.features[i]
287
+ );
288
+ }
289
+ }
290
+ }
291
+
292
+ function parseOrigin(line, key) {
293
+ if (key !== genbankAnnotationKey.ORIGIN_TAG) {
294
+ const new_line = line.replace(/[\s]*[0-9]*/g, "");
295
+ result.parsedSequence.sequence += new_line;
296
+ }
297
+ }
298
+
299
+ function parseLocus(line) {
300
+ result = createInitialSequence(options);
301
+ let locusName;
302
+ let circular;
303
+
304
+ let gbDivision;
305
+ let date;
306
+ const lineArr = line.split(/[\s]+/g);
307
+
308
+ if (lineArr.length <= 1) {
309
+ console.warn(
310
+ "Parsing GenBank File: WARNING! Locus line contains no values!"
311
+ );
312
+ // TODO
313
+ addMessage("Import Warning: Locus line contains no values: " + line);
314
+ }
315
+ locusName = lineArr[1];
316
+
317
+ // Linear vs Circular?
318
+ for (let i = 1; i < lineArr.length; i++) {
319
+ if (lineArr[i].match(/circular/gi)) {
320
+ circular = true;
321
+ } else if (lineArr[i].match(/linear/gi)) {
322
+ circular = false;
323
+ }
324
+ }
325
+
326
+ // Date and Div
327
+ // Date is in format:1-APR-2012
328
+ for (let j = 1; j < lineArr.length; j++) {
329
+ const item = lineArr[j];
330
+ if (item.match(/-[A-Z]{3}-/g)) {
331
+ date = item;
332
+ }
333
+ // isProtein
334
+ if (j === 3 && item.match(/aa/i)) {
335
+ options.sequenceTypeFromLocus = item;
336
+ options.isProtein = true;
337
+ }
338
+
339
+ if (
340
+ j === 4 &&
341
+ (item.match(/ds-dna/i) || item.match(/ss-dna/i) || item.match(/dna/i) || item.match(/rna/i))
342
+ ) {
343
+ if (options.isProtein === undefined) {
344
+ options.isProtein = false;
345
+ }
346
+ options.sequenceTypeFromLocus = item;
347
+ if (item.match(/ss-dna/i)) {
348
+ options.isSingleStrandedDNA = true;
349
+ }
350
+ if (item.match(/rna/i) && !item.match(/ss-rna/i)) {
351
+ options.isDoubleStrandedRNA = true;
352
+ }
353
+ }
354
+
355
+ // Division
356
+ if (
357
+ typeof lineArr[j] === "string" &&
358
+ gbDivisions[lineArr[j].toUpperCase()]
359
+ ) {
360
+ gbDivision = lineArr[j].toUpperCase();
361
+ }
362
+ }
363
+
364
+ //don't use "exported as a file name unless it is out last option"
365
+ if (
366
+ locusName !== "Exported" ||
367
+ result.parsedSequence.name === untitledSequenceName
368
+ ) {
369
+ result.parsedSequence.name = locusName;
370
+ }
371
+ result.parsedSequence.gbDivision = gbDivision;
372
+ result.parsedSequence.sequenceTypeFromLocus = options.sequenceTypeFromLocus;
373
+ result.parsedSequence.isSingleStrandedDNA = options.isSingleStrandedDNA;
374
+ result.parsedSequence.isDoubleStrandedRNA = options.isDoubleStrandedRNA;
375
+ result.parsedSequence.date = date;
376
+ result.parsedSequence.circular = circular;
377
+ }
378
+
379
+ function extractExtraLine(line) {
380
+ if (result.parsedSequence) {
381
+ if (!result.parsedSequence.extraLines) {
382
+ result.parsedSequence.extraLines = [];
383
+ }
384
+ result.parsedSequence.extraLines.push(line);
385
+ } else {
386
+ throw new Error(
387
+ "no sequence yet created upon which to extract an extra line!"
388
+ );
389
+ }
390
+ }
391
+ /* eslint-disable no-var */
392
+ var lastLineWasFeaturesTag;
393
+ var lastLineWasLocation;
394
+ /* eslint-enable no-var*/
395
+
396
+ function parseFeatures(line, key, val) {
397
+ let strand;
398
+ // FOR THE MAIN FEATURES LOCATION/QUALIFIER LINE
399
+ if (key === genbankAnnotationKey.FEATURES_TAG) {
400
+ lastLineWasFeaturesTag = true;
401
+ return;
402
+ }
403
+
404
+ if (lastLineWasFeaturesTag) {
405
+ //we need to get the indentation of feature locations
406
+ featureLocationIndentation =
407
+ getLengthOfWhiteSpaceBeforeStartOfLetters(line);
408
+ //set lastLineWasFeaturesTag to false
409
+ lastLineWasFeaturesTag = false;
410
+ }
411
+
412
+ // FOR LOCATION && QUALIFIER LINES
413
+ if (isFeatureLineRunon(line, featureLocationIndentation)) {
414
+ //the line is a continuation of the above line
415
+ if (lastLineWasLocation) {
416
+ //the last line was a location, so the run-on line is expected to be a feature location as well
417
+ parseFeatureLocation(line.trim(), options);
418
+ lastLineWasLocation = true;
419
+ } else {
420
+ //the last line was a note
421
+ if (currentFeatureNote) {
422
+ //append to the currentFeatureNote
423
+ // only trim file formatting spaces (i.e. the left ones)
424
+ // spaces on the right are necessary (e.g. spacing between words, etc.)
425
+ currentFeatureNote[currentFeatureNote.length - 1] += line
426
+ .trimLeft()
427
+ .replace(/"/g, "");
428
+ }
429
+ lastLineWasLocation = false;
430
+ }
431
+ } else {
432
+ // New Element/Qualifier lines. Not runon lines.
433
+ if (isNote(line)) {
434
+ // is a new Feature Element (e.g. source, CDS) in the form of "[\s] KEY SEQLOCATION"
435
+ // is a FeatureQualifier in the /KEY="BLAH" format; could be multiple per Element
436
+ //Check that feature did not get skipped for missing location
437
+ if (getCurrentFeature()) {
438
+ parseFeatureNote(line);
439
+ lastLineWasLocation = false;
440
+ } else {
441
+ return;
442
+ }
443
+ } else {
444
+ //the line is a location, so we make a new feature from it
445
+ if (val.match(/complement/g)) {
446
+ strand = -1;
447
+ } else {
448
+ strand = 1;
449
+ }
450
+
451
+ newFeature();
452
+ const feat = getCurrentFeature();
453
+ feat.type = key;
454
+ feat.strand = strand;
455
+
456
+ parseFeatureLocation(val, options);
457
+ lastLineWasLocation = true;
458
+ }
459
+ }
460
+ }
461
+
462
+ function newFeature() {
463
+ result.parsedSequence.features.push({
464
+ locations: [],
465
+ notes: {},
466
+ });
467
+ }
468
+
469
+ function isNote(line) {
470
+ let qual = false;
471
+ /*if (line.charAt(21) === "/") {//T.H. Hard coded method
472
+ qual = true;
473
+ }*/
474
+ if (line.trim().charAt(0).match(/\//)) {
475
+ // searches based on looking for / in beginning of line
476
+ qual = true;
477
+ } else if (line.match(/^[\s]*\/[\w]+=[\S]+/)) {
478
+ // searches based on " /key=BLAH" regex
479
+ qual = true;
480
+ }
481
+ return qual;
482
+ }
483
+
484
+ function parseFeatureLocation(locStr, options) {
485
+ locStr = locStr.trim();
486
+ const locArr = [];
487
+ locStr.replace(/(\d+)/g, function (string, match) {
488
+ locArr.push(match);
489
+ });
490
+ for (let i = 0; i < locArr.length; i += 2) {
491
+ const start = parseInt(locArr[i], 10) - (inclusive1BasedStart ? 0 : 1);
492
+ let end = parseInt(locArr[i + 1], 10) - (inclusive1BasedEnd ? 0 : 1);
493
+ if (isNaN(end)) {
494
+ //if no end is supplied, assume that the end should be set to whatever the start is
495
+ //this makes a feature location passed as:
496
+ //147
497
+ //function like:
498
+ //147..147
499
+ end = start;
500
+ }
501
+ const location = {
502
+ start: start,
503
+ end: end,
504
+ };
505
+ const feat = getCurrentFeature();
506
+ feat.locations.push(
507
+ options.isProtein
508
+ ? convertAACaretPositionOrRangeToDna(location)
509
+ : location
510
+ );
511
+ }
512
+ }
513
+
514
+ function parseFeatureNote(line) {
515
+ let newLine, lineArr;
516
+
517
+ // only trim file formatting spaces (i.e. the left ones)
518
+ // spaces on the right are necessary (e.g. spacing between words, etc.)
519
+ newLine = line.trimLeft();
520
+ newLine = newLine.replace(/^\/|"$/g, "");
521
+ lineArr = newLine.split(/="|=/);
522
+
523
+ let val = lineArr.slice(1).join("=");
524
+
525
+ if (val) {
526
+ val = val.replace(/\\/g, " ");
527
+
528
+ if (line.match(/="/g)) {
529
+ val = val.replace(/".*/g, "");
530
+ } else if (val.match(/^\d+$/g)) {
531
+ val = parseInt(val, 10);
532
+ }
533
+ }
534
+ const key = lineArr[0];
535
+ const currentNotes = getCurrentFeature().notes;
536
+ if (currentNotes[key]) {
537
+ //array already exists, so push value into it
538
+ currentNotes[key].push(val);
539
+ } else {
540
+ //array doesn't exist yet, so create it and populate it with the value
541
+ currentNotes[key] = [val];
542
+ }
543
+ currentFeatureNote = currentNotes[key];
544
+ }
545
+
546
+ function getLineKey(line) {
547
+ let arr;
548
+ line = line.replace(/^[\s]*/, "");
549
+
550
+ if (line.indexOf("=") < 0) {
551
+ arr = line.split(/[\s]+/);
552
+ } else {
553
+ arr = line.split(/=/);
554
+ }
555
+
556
+ return arr[0];
557
+ }
558
+
559
+ function getLineVal(line) {
560
+ let arr;
561
+ if (line.indexOf("=") < 0) {
562
+ line = line.replace(/^[\s]*[\S]+[\s]+|[\s]+$/, "");
563
+ line = line.trim();
564
+ return line;
565
+ } else {
566
+ arr = line.split(/=/);
567
+ return arr.slice(1).join("");
568
+ }
569
+ }
570
+
571
+ function isKeyword(line) {
572
+ let isKey = false;
573
+ if (line.substr(0, 10).match(/^[\S]+/)) {
574
+ isKey = true;
575
+ }
576
+ return isKey;
577
+ }
578
+
579
+ function isSubKeyword(line) {
580
+ let isSubKey = false;
581
+ if (line.substr(0, 10).match(/^[\s]+[\S]+/)) {
582
+ isSubKey = true;
583
+ }
584
+ return isSubKey;
585
+ }
586
+
587
+ function isKeywordRunon(line) {
588
+ let runon;
589
+ if (line.substr(0, 10).match(/[\s]{10}/)) {
590
+ runon = true;
591
+ } else {
592
+ runon = false;
593
+ }
594
+ return runon;
595
+ }
596
+
597
+ function postProcessGenbankFeature(feat) {
598
+ if (feat.notes.label) {
599
+ feat.name = feat.notes.label[0];
600
+ } else if (feat.notes.gene) {
601
+ feat.name = feat.notes.gene[0];
602
+ } else if (feat.notes.ApEinfo_label) {
603
+ feat.name = feat.notes.ApEinfo_label[0];
604
+ } else if (feat.notes.name) {
605
+ feat.name = feat.notes.name[0];
606
+ } else if (feat.notes.organism) {
607
+ feat.name = feat.notes.organism[0];
608
+ } else if (feat.notes.locus_tag) {
609
+ feat.name = feat.notes.locus_tag[0];
610
+ } else if (feat.notes.note) {
611
+ //if the name is coming from a note, shorten the name to 100 chars long
612
+ feat.name = feat.notes.note[0].substr(0, 100);
613
+ } else {
614
+ feat.name = "Untitled Feature";
615
+ }
616
+ feat.name = typeof feat.name === "string" ? feat.name : String(feat.name);
617
+ if (feat.name !== 0 && !feat.name) {
618
+ feat.name = "Untitled Feature";
619
+ }
620
+ if (feat.name.length > 100) {
621
+ //shorten the name to a reasonable length if necessary and warn the user about it
622
+ const oldName = feat.name;
623
+ feat.name = feat.name.substr(0, 100);
624
+ addMessage(
625
+ `Warning: Shortening name of feature ${oldName} (max 100 chars)`
626
+ );
627
+ }
628
+
629
+ if (feat.notes.direction) {
630
+ feat.arrowheadType =
631
+ feat.notes.direction[0].toUpperCase() === "BOTH"
632
+ ? "BOTH"
633
+ : feat.notes.direction[0].toUpperCase() === "NONE"
634
+ ? "NONE"
635
+ : undefined;
636
+ delete feat.notes.direction;
637
+ }
638
+ return feat;
639
+ }
640
+ }
641
+
642
+ function isFeatureLineRunon(line, featureLocationIndentation) {
643
+ const indentationOfLine = getLengthOfWhiteSpaceBeforeStartOfLetters(line);
644
+ if (featureLocationIndentation === indentationOfLine) {
645
+ //the feature location indentation calculated right after the feature tag
646
+ //cannot be the same as the indentation of the line
647
+ //
648
+ //FEATURES Location/Qualifiers
649
+ // rep_origin complement(1074..3302)
650
+ //01234 <-- this is the indentation we're talking about
651
+ return false; //the line is NOT a run on
652
+ }
653
+
654
+ const trimmed = line.trim();
655
+ if (trimmed.charAt(0).match(/\//)) {
656
+ //the first char in the trimmed line cannot be a /
657
+ return false; //the line is NOT a run on
658
+ }
659
+ //the line is a run on
660
+ return true;
661
+ //run-on line example:
662
+ //FEATURES Location/Qualifiers
663
+ // rep_origin complement(1074..3302)
664
+ // /label=pSC101**
665
+ // /note="REP_ORIGIN REP_ORIGIN pSC101* aka pMPP6, gives plasm
666
+ // id number 3 -4 copies per cell, BglII site in pSC101* ori h <--run-on line!
667
+ // as been dele ted by quick change agatcT changed to agatcA g <--run-on line!
668
+ // iving pSC101* * pSC101* aka pMPP6, gives plasmid number 3-4 <--run-on line!
669
+ // copies p er cell, BglII site in pSC101* ori has been delet <--run-on line!
670
+ // ed by quic k change agatcT changed to agatcA giving pSC101* <--run-on line!
671
+ // * [pBbS0a-RFP]" <--run-on line!
672
+ // /gene="SC101** Ori"
673
+ // /note="pSC101* aka pMPP6, gives plasmid number 3-4 copies p
674
+ // er cell, BglII site in pSC101* ori has been deleted by qui
675
+ // c k change agatcT changed to agatcA giving pSC101**"
676
+ // /vntifkey="33"
677
+ }
678
+
679
+ function getLengthOfWhiteSpaceBeforeStartOfLetters(string) {
680
+ const match = /^\s*/.exec(string);
681
+ if (match !== null) {
682
+ return match[0].length;
683
+ } else {
684
+ return 0;
685
+ }
686
+ }
687
+
688
+ export default genbankToJson;