npsearch 2.1.2 → 2.1.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 036557512365e70a5fcc7cb9ca30134db25bfc70
4
- data.tar.gz: af59a6031d843d7244820d1a7be721db542e6617
3
+ metadata.gz: 6357adc8080129fa2b76256669596bd7c0b956ba
4
+ data.tar.gz: a78e79d1462978dcb599d62d848a604aa86d883f
5
5
  SHA512:
6
- metadata.gz: 372005332db090e60d2012a7ffda5c0fea85143185a0fcdc47a1f7a52bc7dafdde7dc3555fc309a48dbdb7e969d3af95f99ba3c9506f565ad760c08418c40b16
7
- data.tar.gz: 13b0ff78295b3b12400883f84e9119ce867474b76dabd8b7d57820a2d594e5ece81c6555c23c568aebb418021bcd1e1317f9ef449af3e8b5e74193d53ad89857
6
+ metadata.gz: 6557749bff5b29c99dc26edcf577df767f64382f6eb6cf8ab3f62c73b9ef5916fba201788efcb38914c9412971d42399825fb1b35c2be54e0b5e5e54c6891b3d
7
+ data.tar.gz: 96129c33bb5f7c21628d2578959283c72b4201d157992c0f2bf54f4881af6b8c6b85b309f86cab4a4307c249db4b3653973039c8273264540787e60740ec5a80
@@ -23,7 +23,7 @@ Banner
23
23
 
24
24
  opt[:temp_dir] = File.join(Dir.pwd, '.temp',
25
25
  Dir::Tmpname.make_tmpname('', nil))
26
- opts.on('-d', '--temp_dir path_to_temp_dir',
26
+ opts.on('-t', '--temp_dir path_to_temp_dir',
27
27
  'The full path to the temp dir. NpSearch will create the folder and',
28
28
  ' then delete the folder once it has finished using them.',
29
29
  ' Default: Hidden folder in the current working directory') do |p|
@@ -36,6 +36,11 @@ Banner
36
36
  opt[:num_threads] = n
37
37
  end
38
38
 
39
+ opt[:debug] = false
40
+ opts.on('-d', '--debug', 'Run in debug mode') do
41
+ opt[:debug] = true
42
+ end
43
+
39
44
  opt[:min_orf_length] = 30
40
45
  opts.on('-l', '--min_orf_length N', Integer,
41
46
  'The minimum length of a potential neuropeptide precursor.',
@@ -0,0 +1,112 @@
1
+ >1108382 length 1440 cvg_15.4_tip_1
2
+ CTGGCGTAATTCACGAGCCTCGATGTGAGACGTCAGATGTTCAGGCTATGACAGGATGTGCATGTTATCATGCGGCCCAAATCTTACCTTTTCCTCTTCT
3
+ TCTTAGGCCTTTTTGCTGTGCTGGATTGATCGGTGCATTCTTTTCTCTTGTGAGTGAGTGAGCCACAGATGTGGCACCCCATAGACTGCCTATTGCATTG
4
+ ATGATCTGGTAACTCACATCGCTGGCAAACTGTACAGTGAACGTAACCTGGTTTGACGCATCTCTGACAATCCTTGCAATGCTTATACGTGCTGCCATCT
5
+ TTGGACGTGCAGCTGTTGCATTGAGTACAGTGTATATTCTCTTCCGCAACGTAGCGTTTACACGCTTTACAAAACCTGTAGCCTTCATCGAGAGGAAGAT
6
+ GGATTTCGGAGGGTTGAAGATTGGTGAAGACTCTGACAGGGGAGCCTTTCTTACGAGCCTTCTTCTGATTGCTCTGGAACAACGGGTGATTATCATAGTC
7
+ AACCTTATAGTCAAGCATTGTAAAAGAAGGAAAACTATCCAGAATTCTTTGTTCTAAGAAGTATGGGAAGATCCAAAGTAGTGGAAGCTTAGCTGTTGAG
8
+ CCAAGGTCTTGTAACCCCATTGTCCATCTTCCCATCATCCTCTGGATGACCTCAGCCAGGATCTCCACCCTGCCTCCAAAAGGAGGGTCAGTAACCATAA
9
+ CGATACCATCGCCATGGTTACGATGCAAAAAGTCCTCGCAGATAGTCCTGCTCTTCTCGCCGTCTAAAAAGTGATCGTTGAACATGTTGTAACGACAGAA
10
+ GAGGCTCGGAGGGTAGAAATGACCATAACTATGGTCTATATCCAACAACAGACTGCTGAGGTCTGATGATGCCTCAGACTGTATCATCTCATGTAATCTG
11
+ GGAGACCCAACACACAGCACTCTGTCATAACCGAGCCTCTTAATGGTGGTGATCAGAAACTCAGTGGCTGTTTTAGAGAACAGATATTGAGCGTTTGTTT
12
+ TGTTGTCCTCTAGCGGGAGGAGGAGATAGGATGGTTGACTCAGGATAGCGTCGGTGAGGTCAGCCTTTATCTTATGCTTCGAGTGAGTCTGGACTTCTTG
13
+ TGGACGCAGTAACAGGCTGCAGGATTGACAAAATTTCCTCTCAGATGGACTCAAATCCTTAGTGTCCTGCATTCTTTGGAAAAAATGTTCATGGCTGGTC
14
+ CACGGCTGCTGTGATCTGTTGAGTTCCTCTCTGCTCATCTTCTTAGCCTCAGAGACTTTCTCATCAGCGAGCTGGTAAAAGCTGCAATCTTTACGATCTC
15
+ TGCATGCCGAGCAGGCATAGTAGCGCTGTTTCTTGCGCTTCCCATTGAACCGCTCAAACAGTAACGTCGGACCATGTGGACAACAAGGTGAATTCTGCAC
16
+ CAGAGACACCTCTACGCCTAGAGTATTTGCCACACTAGCA
17
+ >1108384 length 1440 cvg_51.3_tip_1
18
+ AAAAAAAAGACAATTTGTGTTCTTAGTAATTTTTATGTTTTTTTTCTTCGGGTAATCATGGAATCAAACAAACTTTTATACCTCACCGTTGGAGGCTTCA
19
+ TGATGGTAGGGGGCACCACACATACCAGGTTATTTTCCAATCTTTAAAGAGTATCCACACTTTCCATCTTACATAAACTTTCCAATGGAACGTACCGGCA
20
+ GTCCAACAGAACAGCCCTTGTGCACGCGCTGGATACTTTAAAATCCAATCAGAACATTCCTACCTTTCTTACTTTTGAGTCGCCTCACTGGTAATAGCTA
21
+ CACAAAATTCAAATTTTACACAAGTGAATATTATGCGCATCAGTATTTGAAACTTTCCAGTGGAACGATTTGGCACTCCAACGAAATAGCTTGTGTGCAC
22
+ GAACTGGGATTCTTATCCAATCAAATGAGAACAATCCTACTATTCCTTCTTTTGAGGCGCTCCCCAATGTTAATAGTTACTAAAAATTCCACAAATGAAT
23
+ CGATCCATCGTGTCATCATTTCTGCCTCTCCTGACTGATGTCACTCTCGAGTTTTTCCACCTCTCGTTCGATCCCAGTCCGTATATTCTCTGGCAGTTTC
24
+ CACTGCTGTAGGAGGTCCATGCGTTTGTAGTCTGGTAGCCACCATGTTTTACCTTGTTCTACGAGTTCTTCTTTGACGTTGGAGAAATCTACACCCTGTA
25
+ GCTCCGCCCTCTCCCAGCGTTTGTACCACGGCCTCGGCTTCAGTTTGACCTTGATATCATTGATGGGTACTGGTTTGCCAGGGGGGTGTTTGATAGGCAC
26
+ CATGTTAGGATCGATGGTGCTGTATTCCGGCAGAGCATCCTGTAGATATTTCAAGTCGCTGTCTAACCTCTTCTCTAACTTCAACATCTCAACCTTCTGG
27
+ ATGAGTGGGTTGTACAACTCAAAACAAATTTCAACTCCTAAATCGTTAATGACGTTGCGAAGGATGAATGTAGCTCCCAGCCCATAGCCACCTCTTCTTG
28
+ TACAAATACCAACAAATCGGTTGGATTTACCCTTTGCATATATGTCCGCTGTTGTCACTGCCATTATGCTGCCAACATAGAATTCAGGGATATCGATGAC
29
+ TTTCCTTCTCTTGTAGCAATCCTGTCTCTCTAAGTAATCAAGGAGTTTTGACCGTGTTCCCATCGGTGGCAATAACTCTGGACTTGTAAAGCGCTCTCTT
30
+ ATCTCACCGTACCCACTGCCATCGTCGTCATGTGTAAACTCGTGCTTCTTATCGGCTGTAGTTTCTGCAATGGTGATGTCATCTACCGATGTGGCGAAAT
31
+ TGCATTTACCAGCACCTGCAACCGAGACATGTTTATGTAGAGAACCAGCTCTACCGGCCAATCTTAAACTCGATCCAGCCCGCAATAAACACGAAGTTCG
32
+ CACCAGAACACATGCAGACGTCGTGGCCGCCATGATGAAT
33
+ >1108386 length 1440 cvg_65.7_tip_0
34
+ TTTTTTGTTGCACTTTTGAATCAAGCCCCTTGCTTTCTTTGTCTCTTACTGCTGTTTTTATGTCCTCATCAAAAAATTAGCATTTTAAAATAAACCCACC
35
+ ACCAGTAAATACTCTACCTGCTGAAAATTACACTCATAACAACAAACACAAACACAAGTTTTGGTCAGTTAAAGATGAGATTTAAGTATCATGCAGTAAA
36
+ GGATATGTTGTCCAGTCTGTTCAATCTTGTTTCCTCAGCTTTTTTTTTTAAAGAAGAAATTTAGAACTTTCACCGATACTTGATTCTTAAAGATATATAA
37
+ ACACACTCATGATCATCATGTGCGATTACTATAGAGTGACACAAGTAGGATACCACTACACACTTTACTTTATCAAGTAATAAACCACAAGGCCGGCTGG
38
+ GTAACTTTTGATTCATTTTGAGTTGAACGTTTCATTTTAAGCTGACATTAGATAAACGTGTTGTTGCATTTAACTTCTCGGCTAAAATGAACTTGTTCCC
39
+ TTTATTCCCTCAAATTCTTTTCAAACCGTAAATTTTAATTATATTCATTGTAAATATCTTGAGTATTTATTTCTATGGTTTATATCTTACTCTTCTATTA
40
+ ATAAGCGCTTTTAGTTGTGAGTATTAGAGTTATTAATAAAATGTTTTGCTCGTTATGATCCCCCAAAGGGGCTGTTCCACACATAACTGTTTTTGAATCT
41
+ TCAACAAATCTCATCAAATATGCTGGGATGAAACTAAAATAGACAATGAGAAGAGGGTGTGGAACAGTTTTAACTTGCGAGGGCCACGCTGATCTATTGC
42
+ TCATATGGTGGTGGCTTGCTATCAGGAGACTGGATAGCATCACTGTAGGTTGGCAGCTTGGGGAGCAGAGCCTCAGCATCCATGGCATAATTGCCAGATT
43
+ GGGCATTGTCTCCCATCTCACGGGCTGCGATGTACTTGTAGCATGACCACACACAGGCAATGCAGTAGGCCTTGACTACCATGATCATCATGAAGATGAC
44
+ CAGGGCAACGATGACAAACCACTGGTTGTCCAGATCCATCTTGTTGATGGGCAGCTTCTCCTTCCAGTGCATGAACTCCTGGTTGTACGGGAACTTGCCA
45
+ CTGTCTTCGTAGTCAAATTCTGGGTAGTATGACATTAGGCCGATAGCTGTCAGGCATGTGATGAAGAAATCAAACAGCTGCAGGCAGAAGAACGGCAGCA
46
+ GGTAACTTGACTGGTGCTGTGCAACTCCCTTAATCATCATGCTTGTGATCAGAAAGTAGCAACAGATGATGACCAGTCCAACACAGTAATCAGATGCATC
47
+ ATGCTGGTCCTCAATTGACAAACTGGTGGACGTGCTGTCCTCAGTCTTGACAGTGCCCTTGATGATCATAGATGCAATAAGCAGCATAGCACATACTTGC
48
+ CCAACCATGTGCCATAACCCGATCAAAACCGAGCCAGTTC
49
+ >1108388 length 1440 cvg_25.5_tip_1
50
+ CAGCTTATGTAGTTATCCATTGTATCCCACTGGCAGCATTGGAAACACAAAGACCATTAGAGCAGAAAGCAATGGTAATGTGTTCTCATTGGGTACTTTC
51
+ GCTCAGTAACTGGTGATTAGTGGTCAACCATAGCATGATCAAGGCCAACATTGTCATTTTGTGGATTGCTGTTTCGTTTACTGAATACTTCTAGGGTCTT
52
+ ATTAACTGAAGATACACCGCACGCCAGGTCAGCAGCCTGAGAGCTTAGACCGAAAGCAAGAGTGCTGGCTTCGCACACGCTCGTTTCTTTGATAGCTAAG
53
+ GAGTAAACACTGTCATCATCTGAGACCATGGAAGTTGCAGCACTGGGTCCTTTGGAATACTACATCATGGGATTGATACTCACTGTGGAAGCTATACTTG
54
+ GAACAATATGCTGTGTTCGTCTTCTCCTGGTGTATTTGAAAAATCCAACCCTCCACCAACCGCAGAGTCTTCTAGGCATCACACTGTGCATTGGAGACTT
55
+ GGGAATAGCTCTGATGTGCCCATTTGCCGCCTTTGCAAGCTTCAGTGAAACCTGGCCATTTGGAGATGAGTACTGCCAGCTGTATGCCTTTGCTGGGATG
56
+ CTGTTTGGTACACTCAGCATATCAGCTATGGCATGCTTGGCTTTGGACAAATATTACTCAAGCTCAAACGATGCTAAAGGGGGTTCTAGTCAGCCTTACA
57
+ TCTTGATTACATCAATCATCTGGCTAAACGCCCTCTTCTGGTCACTAACCCCACTGAGTCCCATCGGTTGGGGGCGCTATGCCATCGAACCGCCTAAATC
58
+ GACGTGCATGTTGGACTTTGCAAACCGTGAGCCATCATACATGATGTACTTGTTCTTAATGACAAGCACGGTCTATGCGTTGCCAGTAGGCGCCATCTTA
59
+ TGGTGCTTAGTGAAGCTCAGAAAGGGAAAAGATCCAAACAACGGAAAGAGCAAGGTATGTCTTTTGGTGTTGTTCTCATTGATTGTGTACTGGGGAGCCT
60
+ ATGGAATCGTAGCACTATGGGCAGCACTAGATGACATACATAATGTCCCGCTCCGATTGGTTGCTGCTGCTCCAATCCTGGCCAAGATCTGTCCAATCGG
61
+ AAACACTGTAATGCAGGTGCTGACCAATAGGAACATCCGTTGTCTGATGTACAGAAAGGAAACAGTTGCATCCAACAAGAGGGAATGAGGAGCATAATGC
62
+ AGAAAATAATTTCGTGTTATGATAGGTCTAATAATGCCGGTGTTCTCCTTTTCTTTGCACACGCTAAAGGCCATATTTTGTTACACAGCATTGTTTTTAT
63
+ TTCATATCGTTATTGTAGACTTGTATTGCCATTTAAGTTGCAATGTCAGATTTCCTTAGATTTTTTTTTTATCTGAACTAAGTTTGACAGATGGTGCTTT
64
+ TTTAAAACGAAAAAAAACCACAAATTTCCTTAAACCATGG
65
+ >1108390 length 1440 cvg_34.1_tip_1
66
+ GCAACCTTTTGGGTACAGAATGCGAGAAAATCTGGGCAGATCGGTCACAGTATTTTAAGGATTTCTAGGTTAACTTTGAATCAAATAAATTCAAACTCTA
67
+ GGAGGAAGATTATCACATCATGCTGCTGTCGTGGGATTCATCACAAACCGGTATTTGTTTCATTACTGAAAGACAAACTTAAAGGAAGTCTTAATAGGAG
68
+ CCTGGGTCCATGTTACTCCACAAATTCACATTTGTTTCCTAAAGAAGTGACTAGTCCGGAGAAACTCACGCAAGATGAAATAACGGCCCTGCACCAAAAT
69
+ ATTTGTAAGGAATTAATTACAGATGAGAGATCTTTACAAGAGGTTGCACAGTACTATTTTGATGGAAAAGGCAAAGCGTTTAGACCCATGTTGGTTTTAC
70
+ TCACGGCTGGTGCATGCAATACTCATACTCAGGGTTCTAATAGTAAACTGGTGGATACTCAGAGACGGATTGCAATGATAGCAGAGATGATCCACACAGC
71
+ GAGTCTCATGCATGATGATGTCATCGATAATGCAGATACAAGGAGGAACAAAACAGCAATCAATGAAATGTGGGGACAGAGAAAGGCAATTTTAGCTGGA
72
+ GACTTTGTTCTATCAGTTTCATCTCAGGTTTTAGCAAGAATAGGAAACGAGGAAGTTGTTTTAATTCTTTCCCAAGTCATAGAAGATCTTGTTAGAGGGG
73
+ AATTTATGCAACTTGGTTCTAAGGAGGATGAAAATGAGCGCTTTGCCCACTACCTCAAGAAGACGTTCAAGAAAACAGCAAGTCTCATGGCTTACAGCTG
74
+ TCAAGCAGTAGCAATTCTGGGTGGTTGCAGTGCTGAGGTGTGTCAGATAGCCTACGAGTACGGCCGGAATACAGGCATGGCATTCCAGCTGATAGATGAT
75
+ GTATTAGACTTTGTATCCAGTGACGCCGCTATGGGGAAACCGACGGCTGCTGACTTGAAGCTGGGCCTTGCAACCGGGCCAGTGCTGTTTGCTGCCGAGA
76
+ AGTTTCCAGAGTTGGATGCCATGATCATGAGACGGTTTAGCGAGACTGGTGATGTAATGGCTGCAAGGGAAGCTGTTGCCAAGACTGAAGGCATCGAGCA
77
+ GACAAGACACATCGCTAACCAGCACAGCTTAGAAGCTCAGAAACAGATCAGTAAACTACATCCCAGTCCAGAGAGGCAAGCACTTATAGAACTTGCACAG
78
+ CGAGTTGTTACAAGAATAAAGTAAATCCGTAGTTAAAATCCTTGTGTCAGTGCAAGAATGTTGGATCTCTAAAAACAGCCTTTATGCAATACAGGTTGTG
79
+ AAAAATGCACAATGTTGTCTTTCACCATAACTACCCCTGATGTGCACTAGTGTACACAGAAACCCGGTTGCGTCAAAATGAAATGGTGTGAGACAAGACG
80
+ TCCTAATGAAATAGAGGAAACACGTTTTGCATAGATTTCT
81
+ >1108392 length 1440 cvg_27.6_tip_1
82
+ ATTTGATTTGATTTGAAATGGTTAATTAAAGTAAAAAACTGTTTCACAGCCAGTAGGCTGAATTACATGATTTGTATTAGTTAAAAAATATTCAAATGTA
83
+ TTATAGAAATTACAATAAAAACAAACAAATAATGCATAGGCAGTGGAACAGAAAAATGAACGTGGGGATCCCCAAGGAAAACAAGATTTTTACAAGAGCA
84
+ CTTAAAATAAAACAAGGTAAAATAAGGGCACACCGCTGTGAACGACCCAAAACAATGCAATTAAGTAGATTTAATAATAATAAAAAAAAAACAATAAAAA
85
+ TTTTAATAATACGCGTGAGATAAGGAACTTGACTGTTTTAAACCATTCAGCTCTACAGCGATGTATAGTATGCTTTGTTGAACTCCGAGTTTGGTGAATT
86
+ CTTACGTTTCTTCGGTGTAGCAAATGACGGAGGCTTGTTAGTGAATTTTGCATGGATTATGCAAAAGAAAGGCACGATTGAAAGCACCTGCTCTCCCAGG
87
+ TAGGGAGCTGACAAGACAGTGCTTGAGCGCTTGTTCATATTGACTATAGTTGGTATCAAGTATAATCCTGCCTGCTCGCTTCTGTGTCCACTCATGGCTT
88
+ ATACACTGCTCCTTTGTTAGACCTCAAGATCAGACAACAGTCAGCATACTCCAGGACTAGCCTGACAATCGTCATGTAGTAAGTCAGACGGTCGCCTCGT
89
+ GGAAAAGTTGTCTAAATTGAGAGAATGAAGACATATTGATGACTTTTGGAAGGTTTAATGATGGACGCAATATGCGCATTCAGTTCCTAGCAATTTAACG
90
+ GTGTTATGTTGAATGATGACCTCTTTTTTCCCAAAGGAAACGTTCATTGTTGCGCACTTGTTCAGGTTTAGTTGTATTTGTTCCTGTCTGACCAATCATA
91
+ GGGTAACCAGATAGTATCAGGCAACATTGAATGAGCTGATGCATGGCGAGTCTCTACCACTGTTGGGTTGTTCACGTGTCTGAGAACGTGAAGAGCATTA
92
+ GCTGCATCACCAGTGGGCAGCATCATGTTGGTGTCTTTACCACATAGAGCATCTTTGATGAGCAACAGAAAAAGTATAGGTCCCAATTTAGTTCCTTGGG
93
+ GAACACCGGCTGTTGTTGACTGCCACACAGACTTCACACTTTTTGTACTTCACCCATTGTCTGCAATCTGTTAAAAAGTTTGCGAATTATGTTACCAGAC
94
+ TGACGTCAAGTTTGACCAACTTGGATATTGCGATTTCGTGGTCAATGCCATTAAATGCTTTGATGAAGTCAGTAAGGAGGACCGTCGACTCGTATGACAT
95
+ GAAAGAAACTCGAGGGGTCTCGCGAAACATTTACAATTTAAAACAGGATCATTAAGAATTAATTAAAGGAAGATCAGCGTATCGACTCTGACAAATACCA
96
+ GCAGGTTCAGGTTAAGGTTCAGGTTTATTTTCCATACCAT
97
+ >1108394 length 1440 cvg_66.6_tip_0
98
+ AGATAACGACGGGAAAAAGTCAAGATAATTTCACGTACACAACTCTTCAGAAGAAGAACGGAAGCAAGAAAGGATAAGATGATGGTGCGATTCGTAGCCT
99
+ TACTCGGGGCAGTCAGCCTACTGGTATGTCAATCTGCAGGACTTGATGCCGCAGACGTCGAGGAACAAGACGAGTTCAACAAACCCTATGCTCCTGACAG
100
+ TTCGTATGCGGATTTAAATGCACTTTTGGGCAACAATGTGCCAAGTCTACACAGCGCCTCCAAGCGTCAACAAAGTGACAGGGAGCGTGAGGTTGAAGCA
101
+ GCCCAGACGCAATTTTACCCGTATGGAAGAAGAACTGATCCCAGGAAAGCGTCTGGTGGATTCACCTTTGGCAAGAGAGGGCAGTATTTTATCCCCATTC
102
+ CGTACGAGAAACGAGAGATGGATGAGGTGAACCCGTACAGCGTAGCTAAGCGCGACGACGAGCTGACCGGACTAGAGGAGTACCAAGCTAGCAAGAGGTC
103
+ AGGTCCTTATTCCTTTAACAGCGGGCTGACCTTTGGCAAGAGGGAACCCGAGAAGAGGAACATATTCGGATCTTATGACTTCGGGAAGCGGGCTTACGGC
104
+ AACAATTTCAGCTTCGGCAAGCGAGGCATGGGAGTGTCCAGTTTTAGCTTTGGCAAACGATCCGGACTTGAGGGTGAACAAATGATGCCGGAAGACAAAC
105
+ GGGCGTTCGGAGACTTTTCCTTCGGCAAGCGCAATAATGGTCTGTCCAGCTTCACATTCGGCAAGCGAGAGGGTGAACGATAGAACACGAGAGGGCGCCA
106
+ TACTGTCTACAATGTGATAATTATAGTATCTTAATTATTTCAAAACCATACTTGATAAGAAATAACTGCTTGCGTTTTGAGTTAAAACATCAGTTCCAAG
107
+ TATACAAACAAATTTTAAATAACATTGTTTAAAGCACGGATTTTGAATTAAAATGAAACGATAACGCAAAAGTGTGTAAATATAAGCATAGTAAGCTATC
108
+ AACATCTAGAATTATGGAAGATATTTCAAGATACTAAAACTTGGAAGCTTTAGTTAATTTTAGATCTGGTTGAGATTCAAGTCTTCTTTTCTAGATTTTG
109
+ TTTCCAGCCAGGGCTTTTACTATCCTTAACGTTTGGGGAAATTGCCAGGGTTGTGGTCACCTTGTTAAGGTCCTAAACAACACATTTTTTGTTAGTAAAA
110
+ AATTACCCAATTAAGACAATTCATGAGTAAGCGACAAAAACTTAATAATTTAGCATATTTGCTTACGTTAGTTACTACAACAATAATTGAAACTTTCTTA
111
+ AACACGGTTTTTCTAGTATGCTTTCCTGGTCAGCAATTAACTTGTAGATGTTTTTCAAATACTATGAATTCATATCTACGTAGAAATCTTAAAGATGTTT
112
+ TCTGTGATTAAGTTTAGAACAACCTTAGCCAACAATCTCC
@@ -1,7 +1,9 @@
1
1
  require 'bio'
2
+ require 'english'
2
3
  require 'fileutils'
3
4
 
4
5
  require 'npsearch/arg_validator'
6
+ require 'npsearch/logger'
5
7
  require 'npsearch/output'
6
8
  require 'npsearch/pool'
7
9
  require 'npsearch/scoresequence'
@@ -11,21 +13,23 @@ require 'npsearch/signalp'
11
13
  # Top level module / namespace.
12
14
  module NpSearch
13
15
  class <<self
16
+ attr_accessor :logger
14
17
  attr_accessor :opt
15
18
  attr_accessor :sequences
16
19
  attr_reader :sorted_sequences
17
20
 
18
21
  def init(opt)
19
- @opt = ArgumentsValidators.run(opt)
22
+ @opt = opt
23
+ ArgumentsValidators.run(opt)
20
24
  @sequences = []
21
25
  @sorted_sequences = nil
22
- @pool = Pool.new(@opt[:num_threads]) if @opt[:num_threads] > 1
23
- FileUtils.mkdir_p(@opt[:temp_dir])
24
- extract_orf if @opt[:type] == :genetic
26
+ @pool = initialise_thread_pool
27
+ create_temp_directory
28
+ extract_orf
25
29
  end
26
30
 
27
31
  def run
28
- input_file = @opt[:type] == :protein ? @opt[:input_file] : @opt[:orf]
32
+ input_file = @opt[:type] == :genetic ? @opt[:orf] : @opt[:input_file]
29
33
  iterate_input_file(input_file)
30
34
  @sorted_sequences = @sequences.sort_by(&:score).reverse
31
35
  Output.to_fasta(@opt[:input_file], @sorted_sequences, @opt[:type])
@@ -33,16 +37,37 @@ module NpSearch
33
37
  remove_temp_dir
34
38
  end
35
39
 
40
+ def logger
41
+ @logger ||= Logger.new(STDOUT, @opt[:debug])
42
+ end
43
+
36
44
  private
37
45
 
46
+ def initialise_thread_pool
47
+ return if @opt[:num_threads] == 1
48
+ logger.debug "Creating a thread pool of size #{@opt[:num_threads]}"
49
+ Pool.new(@opt[:num_threads])
50
+ end
51
+
52
+ def create_temp_directory
53
+ FileUtils.mkdir_p(@opt[:temp_dir])
54
+ logger.debug "Successfully creating temp directory at: #{@opt[:temp_dir]}"
55
+ end
56
+
38
57
  # Uses getorf from EMBOSS package to extract all ORF
39
58
  def extract_orf(input = @opt[:input_file], minsize = 90)
59
+ return if @opt[:type] == :protein
60
+ logger.debug 'Attempting to extract ORF.'
40
61
  @opt[:orf] = File.join(@opt[:temp_dir], 'input.orf.fa')
41
- system "getorf -sequence #{input} -outseq #{@opt[:orf]}" \
42
- " -minsize #{minsize} >/dev/null 2>&1"
62
+ cmd = "getorf -sequence #{input} -outseq #{@opt[:orf]}" \
63
+ " -minsize #{minsize} >/dev/null 2>&1"
64
+ logger.debug "Running: #{cmd}"
65
+ system(cmd)
66
+ logger.debug("EGexit Code: #{$CHILD_STATUS.exitstatus}")
43
67
  end
44
68
 
45
69
  def iterate_input_file(input_file)
70
+ logger.debug "Iterating the Input File: #{input_file}"
46
71
  Bio::FlatFile.open(Bio::FastaFormat, input_file).each_entry do |entry|
47
72
  if @opt[:num_threads] > 1
48
73
  @pool.schedule(entry) { |e| initialise_seqs(e) }
@@ -54,10 +79,10 @@ module NpSearch
54
79
  end
55
80
 
56
81
  def initialise_seqs(entry)
82
+ logger.debug "-- Analysing: '#{entry.definition}' (#{entry.aaseq.length})"
57
83
  return if entry.aaseq.length > @opt[:max_orf_length]
58
84
  sp = Signalp.analyse_sequence(entry.aaseq.to_s)
59
85
  return if sp[:sp] == 'N'
60
- # seq = Sequence.new(entry.entry_id, entry.definition, entry.aaseq, sp)
61
86
  seq = Sequence.new(entry, sp)
62
87
  ScoreSequence.run(seq, @opt)
63
88
  @sequences << seq
@@ -65,6 +90,7 @@ module NpSearch
65
90
 
66
91
  def remove_temp_dir
67
92
  return unless File.directory?(@opt[:temp_dir])
93
+ logger.debug "Deleting Temporary directory: #{@opt[:temp_dir]}"
68
94
  FileUtils.rm_rf(@opt[:temp_dir])
69
95
  end
70
96
  end
@@ -1,69 +1,86 @@
1
1
  require 'bio'
2
+ require 'forwardable'
2
3
 
3
4
  # Top level module / namespace.
4
5
  module NpSearch
5
6
  # A class that validates the command line opts
6
7
  class ArgumentsValidators
7
8
  class << self
9
+ extend Forwardable
10
+ def_delegators NpSearch, :logger
11
+
8
12
  def run(opt)
9
13
  assert_file_present('input fasta file', opt[:input_file])
10
14
  opt[:input_file] = File.expand_path(opt[:input_file])
11
15
  assert_input_file_not_empty(opt[:input_file])
12
16
  assert_input_file_probably_fasta(opt[:input_file])
13
- opt[:type] = assert_input_sequence(opt[:input_file])
17
+ opt[:type] = assert_input_sequence(opt[:input_file])
14
18
  opt[:num_threads] = check_num_threads(opt[:num_threads])
15
19
  assert_binaries('SignalP 4.1 Script', opt[:signalp_path])
20
+ logger.debug "The validated OPT hash contains: #{opt}"
16
21
  opt
17
22
  end
18
23
 
19
24
  private
20
25
 
21
26
  def assert_file_present(desc, file, exit_code = 1)
27
+ logger.debug "Testing if the #{desc} exists: '#{file}'."
22
28
  return if file && File.exist?(File.expand_path(file))
23
- $stderr.puts "*** Error: Couldn't find the #{desc}: #{file}."
29
+ error_msg = "*** Error: Couldn't find the #{desc}: '#{file}'."
30
+ logger.fatal error_msg
31
+ $stderr.puts error_msg
24
32
  exit exit_code
25
33
  end
26
34
 
27
35
  def assert_input_file_not_empty(file)
36
+ logger.debug "Testing if the input file ('#{file}') is empty."
28
37
  return unless File.zero?(File.expand_path(file))
29
- $stderr.puts "*** Error: The input_file (#{file})" \
30
- ' seems to be empty.'
38
+ error_msg = "*** Error: The input_file ('#{file}') seems to be empty."
39
+ logger.fatal error_msg
40
+ $stderr.puts error_msg
31
41
  exit 1
32
42
  end
33
43
 
34
44
  def assert_input_file_probably_fasta(file)
45
+ logger.debug("Testing whether the input, ('#{file}') is a fasta file.")
35
46
  File.open(file, 'r') do |f|
36
- fasta = (f.readline[0] == '>') ? true : false
47
+ fasta = f.readline[0] == '>' ? true : false
37
48
  return fasta if fasta
38
49
  end
39
- $stderr.puts "*** Error: The input_file (#{file})" \
40
- ' does not seems to be a fasta file.'
50
+ error_msg = "*** Error: The input file (#{file}) does not seems to be" \
51
+ ' to be a fasta file.'
52
+ logger.fatal error_msg
53
+ $stderr.puts error_msg
41
54
  exit 1
42
55
  end
43
56
 
44
57
  def assert_input_sequence(file)
45
58
  type = type_of_sequences(file)
46
59
  return type unless type.nil?
47
- $stderr.puts '*** Error: The input files seems to contain a mixture of'
48
- $stderr.puts ' both protein and nucleotide data.'
49
- $stderr.puts ' Please correct this and try again.'
60
+ error_msg = '*** Error: The input files seems to contain a mixture of' \
61
+ ' both protein and nucleotide data.' \
62
+ ' Please correct this and try again.'
63
+ logger.fatal error_msg
64
+ $stderr.puts error_msg
50
65
  exit 1
51
66
  end
52
67
 
53
68
  # determine file sequence type based on first 500 lines
54
69
  def type_of_sequences(file)
70
+ logger.debug 'Checking the type of sequence in the input file based' \
71
+ ' on the first 500 lines.'
55
72
  fasta_content = File.foreach(file).first(500).join("\n")
56
73
  # the first sequence does not need to have a fasta definition line
57
74
  sequences = fasta_content.split(/^>.*$/).delete_if(&:empty?)
58
75
  # get all sequence types
59
76
  sequence_types = sequences.collect { |seq| guess_sequence_type(seq) }
60
77
  .uniq.compact
78
+ logger.debug " The guessed typed of Sequences are: #{sequence_types}"
61
79
  return nil if sequence_types.empty?
62
80
  sequence_types.first if sequence_types.length == 1
63
81
  end
64
82
 
65
83
  def guess_sequence_type(seq)
66
- # removing non-letter and ambiguous characters
67
84
  cleaned_sequence = seq.gsub(/[^A-Z]|[NX]/i, '')
68
85
  return nil if cleaned_sequence.length < 10 # conservative
69
86
  type = Bio::Sequence.new(cleaned_sequence).guess(0.9)
@@ -71,20 +88,27 @@ module NpSearch
71
88
  end
72
89
 
73
90
  def check_num_threads(num_threads)
91
+ logger.debug "Checking the number of threads: #{num_threads}"
74
92
  num_threads = Integer(num_threads)
75
93
  unless num_threads > 0
76
- $stderr.puts 'Number of threads can not be lower than 0'
77
- $stderr.puts 'Changing number of threads to 1'
94
+ warn_msg = 'Number of threads can not be lower than 0. Changing' \
95
+ ' number of threads to 1'
96
+ logger.warn warn_msg
97
+ $stderr.puts warn_msg
78
98
  num_threads = 1
79
99
  end
80
100
  return num_threads unless num_threads > 256
81
- $stderr.puts "Number of threads set at #{num_threads} is" \
82
- ' unusually high.'
101
+ warn_msg = "Number of threads set at #{num_threads} is unusually high."
102
+ logger.warn warn_msg
103
+ $stderr.puts warn_msg
83
104
  end
84
105
 
85
106
  def assert_binaries(desc, bin)
107
+ logger.debug "Checking #{desc} binary at: #{bin}."
86
108
  return if command?(bin.to_s)
87
- $stderr.puts "NpSearch is unable to use the #{desc} at #{bin}"
109
+ warn_msg = "NpSearch is unable to use the #{desc} at #{bin}"
110
+ logger.warn warn_msg
111
+ $stderr.puts warn_msg
88
112
  end
89
113
 
90
114
  # Return `true` if the given command exists and is executable.
@@ -0,0 +1,11 @@
1
+ require 'logger'
2
+
3
+ module NpSearch
4
+ # Extend stdlib's Logger class for custom initialization
5
+ class Logger < Logger
6
+ def initialize(dev, verbose = false)
7
+ super dev
8
+ self.level = verbose ? DEBUG : INFO
9
+ end
10
+ end
11
+ end
@@ -8,15 +8,15 @@ module NpSearch
8
8
  class Signalp
9
9
  class << self
10
10
  extend Forwardable
11
- def_delegators NpSearch, :opt
11
+ def_delegators NpSearch, :opt, :logger
12
12
 
13
13
  def analyse_sequence(seq)
14
14
  sp_headers = %w(name cmax cmax_pos ymax ymax_pos smax smax_pos smean d
15
15
  sp dmaxcut networks orf)
16
16
  seqs = setup_analysis(seq)
17
17
  sp_results = []
18
- seqs.each do |seq|
19
- sp_results << run_signalp(seq, sp_headers)
18
+ seqs.each do |sequence|
19
+ sp_results << run_signalp(sequence, sp_headers)
20
20
  end
21
21
  sp_results.sort_by { |h| h[:d] }.reverse[0]
22
22
  end
@@ -24,10 +24,10 @@ module NpSearch
24
24
  private
25
25
 
26
26
  def run_signalp(seq, sp_headers)
27
- Timeout::timeout(300) do
27
+ timeout(300) do
28
28
  cmd = "echo '>seq\n#{seq}\n' | #{opt[:signalp_path]} -t euk" \
29
- " -f short -U 0.34 -u 0.34"
30
- stdin, stdout, stderr, wait_thr = Open3.popen3(cmd)
29
+ ' -f short -U 0.34 -u 0.34'
30
+ stdin, stdout, stderr = Open3.popen3(cmd)
31
31
  out = stdout.gets(nil).split("\n").delete_if { |l| l[0] == '#' }
32
32
  if out.nil? || out.empty?
33
33
  print stdout
@@ -40,13 +40,13 @@ module NpSearch
40
40
  stdin.close; stdout.close; stderr.close
41
41
  end
42
42
  rescue Timeout::Error
43
- no_results = [0,0,1,1,1,1,1,1,1,'N',1,1, seq]
43
+ no_results = [0, 0, 1, 1, 1, 1, 1, 1, 1, 'N', 1, 1, seq]
44
44
  return Hash[sp_headers.map(&:to_sym).zip(no_results)]
45
45
  end
46
46
 
47
47
  def setup_analysis(seq)
48
48
  orfs = seq.scan(/(?=(M\w{#{opt[:min_orf_length]},}))./).flatten
49
- (opt[:type] == :protein || orfs.empty? || orfs.nil?) ? [seq] : orfs
49
+ opt[:type] == :protein || orfs.empty? || orfs.nil? ? [seq] : orfs
50
50
  end
51
51
  end
52
52
  end
@@ -1,4 +1,4 @@
1
1
  # Top level module / namespace.
2
2
  module NpSearch
3
- VERSION = '2.1.2'.freeze
3
+ VERSION = '2.1.3'.freeze
4
4
  end
@@ -1,12 +1,13 @@
1
1
  require_relative 'test_helper'
2
-
2
+ require 'npsearch'
3
3
  require 'npsearch/arg_validator'
4
4
 
5
5
  # Class to test the how well the CLI arguments are validated.
6
6
  class TestInputArgumentValidator < Minitest::Test
7
7
  def setup
8
+ NpSearch.logger = Logger.new(STDOUT, true)
8
9
  @c = NpSearch::ArgumentsValidators
9
- @opt = { num_threads: 1, min_orf_length: 30 }
10
+ @opt = { num_threads: 1, min_orf_length: 30, debug: true }
10
11
  end
11
12
 
12
13
  def test_assert_file_present
@@ -17,7 +17,7 @@ class TestSequences < Minitest::Test
17
17
  sp = { name: 'test_sequences', cmax: '0.492', cmax_pos: '31', ymax: '0.612',
18
18
  ymax_pos: '31', smax: '0.950', smax_pos: '17', smean: '0.786',
19
19
  d: '0.706', sp: 'Y', dmaxcut: '0.300', networks: 'SignalP-noTM',
20
- orf: sequence.seq}
20
+ orf: sequence.seq }
21
21
  @seq = NpSearch::Sequence.new(sequence, sp, 2)
22
22
  end
23
23
 
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: npsearch
3
3
  version: !ruby/object:Gem::Version
4
- version: 2.1.2
4
+ version: 2.1.3
5
5
  platform: ruby
6
6
  authors:
7
7
  - Moghul et al.
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2016-11-13 00:00:00.000000000 Z
11
+ date: 2016-11-25 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: bundler
@@ -115,8 +115,10 @@ files:
115
115
  - bin/npsearch
116
116
  - exemplar_data/README.md
117
117
  - exemplar_data/genetic_data.fa
118
+ - exemplar_data/protein.fa
118
119
  - lib/npsearch.rb
119
120
  - lib/npsearch/arg_validator.rb
121
+ - lib/npsearch/logger.rb
120
122
  - lib/npsearch/output.rb
121
123
  - lib/npsearch/pool.rb
122
124
  - lib/npsearch/scoresequence.rb