neurohmmer 0.1.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (133) hide show
  1. checksums.yaml +7 -0
  2. data/.gitignore +35 -0
  3. data/CODE_OF_CONDUCT.md +13 -0
  4. data/Gemfile +4 -0
  5. data/LICENSE +662 -0
  6. data/README.md +136 -0
  7. data/Rakefile +17 -0
  8. data/bin/neurohmmer +69 -0
  9. data/data/hmm/CRH_DH44.fa.hmm +427 -0
  10. data/data/hmm/Chordate_Glucagon_PACAP.fa.hmm +484 -0
  11. data/data/hmm/Chordate_Neurotensin_precursors_neurotensin_like_Ciona.fa.hmm +526 -0
  12. data/data/hmm/Deutrostomian_tachykinin.fa.hmm +367 -0
  13. data/data/hmm/Gastrin_releasing_peptide.fa.hmm +448 -0
  14. data/data/hmm/GnRH.fa.hmm +370 -0
  15. data/data/hmm/Hypocretin_orexin.fa.hmm +451 -0
  16. data/data/hmm/Lophotrochozoan_luqin_arthropod_RYamide.fa.hmm +385 -0
  17. data/data/hmm/NPFF.fa.hmm +430 -0
  18. data/data/hmm/NPS.fa.hmm +790 -0
  19. data/data/hmm/PTH.fa.hmm +502 -0
  20. data/data/hmm/Protostomian_Allatostatin_B.fa.hmm +835 -0
  21. data/data/hmm/Protostomian_CCH_amide.fa.hmm +415 -0
  22. data/data/hmm/Protostomian_Ecdysis_Triggering_Hormone.fa.hmm +433 -0
  23. data/data/hmm/Protostomian_PDF.fa.hmm +265 -0
  24. data/data/hmm/Protostomian_allatostatin_C.fa.hmm +352 -0
  25. data/data/hmm/Protostomian_proctolin.fa.hmm +346 -0
  26. data/data/hmm/Putative_chordate_Opioid.fa.hmm +802 -0
  27. data/data/hmm/SIF.fa.hmm +286 -0
  28. data/data/hmm/TRH.fa.hmm +910 -0
  29. data/data/hmm/allatotropin.fa.hmm +433 -0
  30. data/data/hmm/ambulacrarian_amphioxus_NPS-like.fa.hmm +847 -0
  31. data/data/hmm/avp.fa.hmm +475 -0
  32. data/data/hmm/calcitonin_DH31.fa.hmm +418 -0
  33. data/data/hmm/cholecystokinin_sulfakinin.fa.hmm +433 -0
  34. data/data/hmm/chordate_OX26_QRFP.fa.hmm +475 -0
  35. data/data/hmm/chordate_somatostatin_cortistatin_like.fa.hmm +361 -0
  36. data/data/hmm/f_type_SALMF.fa.hmm +811 -0
  37. data/data/hmm/galanin.fa.hmm +391 -0
  38. data/data/hmm/l_type_SALMF.fa.hmm +523 -0
  39. data/data/hmm/nmu_pyrokinin.fa.hmm +424 -0
  40. data/data/hmm/npy_npf.fa.hmm +301 -0
  41. data/data/hmm/protostomian_CCAP.fa.hmm +415 -0
  42. data/data/hmm/protostomian_allatostatinA_buccalin.fa.hmm +706 -0
  43. data/data/hmm/protostomian_leucokinin.fa.hmm +511 -0
  44. data/data/hmm/protostomian_tackykinin.fa.hmm +760 -0
  45. data/data/hmm/tachykinin.fa.hmm +541 -0
  46. data/data/hmm/vertebrate_ghrelin_motilin.fa.hmm +382 -0
  47. data/data/raw_data/CRH_DH44.fa +44 -0
  48. data/data/raw_data/CRH_DH44.fa.aligned.fa +242 -0
  49. data/data/raw_data/Chordate_Glucagon_PACAP.fa +18 -0
  50. data/data/raw_data/Chordate_Glucagon_PACAP.fa.aligned.fa +72 -0
  51. data/data/raw_data/Chordate_Neurotensin_precursors_neurotensin_like_Ciona.fa +10 -0
  52. data/data/raw_data/Chordate_Neurotensin_precursors_neurotensin_like_Ciona.fa.aligned.fa +25 -0
  53. data/data/raw_data/Deutrostomian_tachykinin.fa +1283 -0
  54. data/data/raw_data/Deutrostomian_tachykinin.fa.aligned.fa +5707 -0
  55. data/data/raw_data/Gastrin_releasing_peptide.fa +12 -0
  56. data/data/raw_data/Gastrin_releasing_peptide.fa.aligned.fa +25 -0
  57. data/data/raw_data/GnRH.fa +22 -0
  58. data/data/raw_data/GnRH.fa.aligned.fa +66 -0
  59. data/data/raw_data/Hypocretin_orexin.fa +14 -0
  60. data/data/raw_data/Hypocretin_orexin.fa.aligned.fa +49 -0
  61. data/data/raw_data/Lophotrochozoan_luqin_arthropod_RYamide.fa +26 -0
  62. data/data/raw_data/Lophotrochozoan_luqin_arthropod_RYamide.fa.aligned.fa +91 -0
  63. data/data/raw_data/NPFF.fa +16 -0
  64. data/data/raw_data/NPFF.fa.aligned.fa +48 -0
  65. data/data/raw_data/NPS.fa +8 -0
  66. data/data/raw_data/NPS.fa.aligned.fa +24 -0
  67. data/data/raw_data/PTH.fa +24 -0
  68. data/data/raw_data/PTH.fa.aligned.fa +144 -0
  69. data/data/raw_data/Protostomian_Allatostatin_B.fa +17 -0
  70. data/data/raw_data/Protostomian_Allatostatin_B.fa.aligned.fa +120 -0
  71. data/data/raw_data/Protostomian_CCH_amide.fa +24 -0
  72. data/data/raw_data/Protostomian_CCH_amide.fa.aligned.fa +72 -0
  73. data/data/raw_data/Protostomian_Ecdysis_Triggering_Hormone.fa +24 -0
  74. data/data/raw_data/Protostomian_Ecdysis_Triggering_Hormone.fa.aligned.fa +84 -0
  75. data/data/raw_data/Protostomian_PDF.fa +16 -0
  76. data/data/raw_data/Protostomian_PDF.fa.aligned.fa +32 -0
  77. data/data/raw_data/Protostomian_allatostatin_C.fa +28 -0
  78. data/data/raw_data/Protostomian_allatostatin_C.fa.aligned.fa +56 -0
  79. data/data/raw_data/Protostomian_proctolin.fa +16 -0
  80. data/data/raw_data/Protostomian_proctolin.fa.aligned.fa +32 -0
  81. data/data/raw_data/Putative_chordate_Opioid.fa +8 -0
  82. data/data/raw_data/Putative_chordate_Opioid.fa.aligned.fa +32 -0
  83. data/data/raw_data/SIF.fa +28 -0
  84. data/data/raw_data/SIF.fa.aligned.fa +70 -0
  85. data/data/raw_data/TRH.fa +10 -0
  86. data/data/raw_data/TRH.fa.aligned.fa +50 -0
  87. data/data/raw_data/allatotropin.fa +31 -0
  88. data/data/raw_data/allatotropin.fa.aligned.fa +90 -0
  89. data/data/raw_data/ambulacrarian_amphioxus_NPS-like.fa +8 -0
  90. data/data/raw_data/ambulacrarian_amphioxus_NPS-like.fa.aligned.fa +24 -0
  91. data/data/raw_data/avp.fa +28 -0
  92. data/data/raw_data/avp.fa.aligned.fa +70 -0
  93. data/data/raw_data/calcitonin_DH31.fa +36 -0
  94. data/data/raw_data/calcitonin_DH31.fa.aligned.fa +126 -0
  95. data/data/raw_data/cholecystokinin_sulfakinin.fa +28 -0
  96. data/data/raw_data/cholecystokinin_sulfakinin.fa.aligned.fa +84 -0
  97. data/data/raw_data/chordate_OX26_QRFP.fa +10 -0
  98. data/data/raw_data/chordate_OX26_QRFP.fa.aligned.fa +25 -0
  99. data/data/raw_data/chordate_somatostatin_cortistatin_like.fa +10 -0
  100. data/data/raw_data/chordate_somatostatin_cortistatin_like.fa.aligned.fa +20 -0
  101. data/data/raw_data/f_type_SALMF.fa +118 -0
  102. data/data/raw_data/f_type_SALMF.fa.aligned.fa +590 -0
  103. data/data/raw_data/galanin.fa +93 -0
  104. data/data/raw_data/galanin.fa.aligned.fa +552 -0
  105. data/data/raw_data/kisspeptin.fa +16 -0
  106. data/data/raw_data/kisspeptin.fa.aligned.fa +0 -0
  107. data/data/raw_data/l_type_SALMF.fa +68 -0
  108. data/data/raw_data/l_type_SALMF.fa.aligned.fa +170 -0
  109. data/data/raw_data/nmu_pyrokinin.fa +18 -0
  110. data/data/raw_data/nmu_pyrokinin.fa.aligned.fa +63 -0
  111. data/data/raw_data/npy_npf.fa +20 -0
  112. data/data/raw_data/npy_npf.fa.aligned.fa +40 -0
  113. data/data/raw_data/protostomian_CCAP.fa +14 -0
  114. data/data/raw_data/protostomian_CCAP.fa.aligned.fa +42 -0
  115. data/data/raw_data/protostomian_allatostatinA_buccalin.fa +100 -0
  116. data/data/raw_data/protostomian_allatostatinA_buccalin.fa.aligned.fa +1350 -0
  117. data/data/raw_data/protostomian_leucokinin.fa +18 -0
  118. data/data/raw_data/protostomian_leucokinin.fa.aligned.fa +63 -0
  119. data/data/raw_data/protostomian_tackykinin.fa +504 -0
  120. data/data/raw_data/protostomian_tackykinin.fa.aligned.fa +2002 -0
  121. data/data/raw_data/tachykinin.fa.aligned.fa +13546 -0
  122. data/data/raw_data/vertebrate_ghrelin_motilin.fa +16 -0
  123. data/data/raw_data/vertebrate_ghrelin_motilin.fa.aligned.fa +32 -0
  124. data/lib/neurohmmer.rb +89 -0
  125. data/lib/neurohmmer/arg_validators.rb +71 -0
  126. data/lib/neurohmmer/hmmer.rb +71 -0
  127. data/lib/neurohmmer/output.rb +50 -0
  128. data/lib/neurohmmer/version.rb +4 -0
  129. data/neurohmmer.gemspec +34 -0
  130. data/spec/neurohmmer_spec.rb +11 -0
  131. data/spec/spec_helper.rb +2 -0
  132. data/template/contents.slim +55 -0
  133. metadata +269 -0
@@ -0,0 +1,16 @@
1
+ >Ensembl_ENSDART00000076405_Drer
2
+ MPLRCRASSMFLLLCVSLSLCLESVSGGTSFLSPTQKPQGRRPPRVGRREAADPEIPVIKEDDRFMMSAPFELSMSLSEAEYEKYGPVLQNLLENLLRDSSFEF
3
+ >gi_55792853_Asch
4
+ MFLKRNTYLLVFLFCSLTLWCKSTSAGSSFLSPSQKPQNRGKSSRVGRQVMQEPQQPTDDKHITISAPFEIGISMTEEDYDEYGVVLQEIIQRLLGGTEAAEGPPQL
5
+ >Uniprot_GHRL_Hsap
6
+ MPSPGTVCSLLLLGMLWLDLAMAGSSFLSPEHQRVQQRKESKKPPAKLQPRALAGWLRPEDGGQAEGAEDELEVRFNAPFDVGIKLSGVQYQQHSQALGKFLQDILWEEAKEAPADK
7
+ >gi_21238918_Ajap
8
+ MRQMKRTAYIILLVCVLALWMDSVQAGSSFLSPSQRPQGKDKKPPRVGRRDSDGILDLFMRPPLQDEDIRHITFNTPFEIGITMTEELFQQYGEVMQKIMQD
9
+ >gi_68445391_Ipun
10
+ MLGHGRVGHMMLLLCAFSLWAETVMCGSSFLSPTQKPQNRGDRKPPRVGRRTAAELEAPLPSEEKIMVSAPFQLAVSLSDAEYEDYGPVLQRMLLDVLGDPPTLDGAN
11
+ >Uniprot_MOTI_Hsap
12
+ MVSRKAVAALLVVHVAAMLASQTEAFVPIFTYGELQRMQEKERNKGQKKSLSVWQRSGEEGPVDPAEPIREEENEMIKLTAPLEIGMRMNSRQLEKYPATLEGLLSEMLPQHAAK
13
+ >Ensembl_ENSOANT00000031359_Oana
14
+ MVSRKAVAFLLVVSVAAMMAEGFIPIFTHSDVQRMQERERNKGQKKSLTVQQRSEQGGLRTLAEPNGEEEGEIIQLAAPVEIGLRMNSRQLAKYRGILEELIMEALLSTQNGESNPDRGRGRCS
15
+ >Ensembl_ENSOCUT00000005807_Ocun
16
+ MVSRKAVAALLLVHATAMLASQTEAFVPIFTYSELQRMQERERNRGHKKSLSVQQRSEPAAAPPAEPTLEEENGRTQLTAPVEIGMRMNSRQLEKYRAALEAERAVHPDAPSRPCPAGGESGWSGEPSPT
@@ -0,0 +1,32 @@
1
+ >Ensembl_ENSDART00000076405_Drer
2
+ MPLRCRASSMFLLLCVSLSLCLESVSGGTSFLSPT-QKPQG-------RRPPRVGRREAA
3
+ ------DPEIPVIKEDDR-FMMSAPFELSMSLSEAEYEKYGPVLQNLLENLLRD-SSFEF
4
+ ------------------------
5
+ >gi_55792853_Asch
6
+ MFLKRNTYLLVFLFC-SLTLWCKSTSAGSSFLSPS-QKPQNR------GKSSRVGRQ---
7
+ ------VMQEPQQPTDDKHITISAPFEIGISMTEEDYDEYGVVLQEIIQRLLGGTEAAEG
8
+ PPQL--------------------
9
+ >Uniprot_GHRL_Hsap
10
+ MPSPGTVCSLLLLGM----LWLDLAMAGSSFLSPEHQRVQQRK--ESKKPPAKLQPRALA
11
+ GWLRPEDGGQAEGAEDELEVRFNAPFDVGIKLSGVQYQQHSQALGKFLQDILWE-EAKEA
12
+ PADK--------------------
13
+ >gi_21238918_Ajap
14
+ MRQMKRTAYIILLVC-VLALWMDSVQAGSSFLSPS-QRPQGK-----DKKPPRVGRRDSD
15
+ GIL-DLFMRPPLQDEDIRHITFNTPFEIGITMTEELFQQYGEVMQKIMQD----------
16
+ ------------------------
17
+ >gi_68445391_Ipun
18
+ MLGHGRVGHMMLLLC-AFSLWAETVMCGSSFLSPT-QKPQNR----GDRKPPRVGRRTAA
19
+ ------ELEAPLPSEEK--IMVSAPFQLAVSLSDAEYEDYGPVLQRMLLDVLGD------
20
+ PPTLDGA----------------N
21
+ >Uniprot_MOTI_Hsap
22
+ MVSRKAVAALLVVHV-AAMLASQTEAFVPIFTYGELQRMQEKERNKGQKKSLSVWQRSGE
23
+ EGPV--DPAEPIREEENEMIKLTAPLEIGMRMNSRQLEKYPATLEGLLSEML--------
24
+ -PQHAAK-----------------
25
+ >Ensembl_ENSOANT00000031359_Oana
26
+ MVSRKAVAFLLVVSV-AAMMA---EGFIPIFTHSDVQRMQERERNKGQKKSLTVQQRSEQ
27
+ GGLR--TLAEPNGEEEGEIIQLAAPVEIGLRMNSRQLAKYRGILEELIMEALLSTQNGES
28
+ NPDRGRGRC--------------S
29
+ >Ensembl_ENSOCUT00000005807_Ocun
30
+ MVSRKAVAALLLVHA-TAMLASQTEAFVPIFTYSELQRMQERERNRGHKKSLSVQQRSEP
31
+ AAA---PPAEPTLEEENGRTQLTAPVEIGMRMNSRQLEKYRAALE----------AERAV
32
+ HPDAPSRPCPAGGESGWSGEPSPT
data/lib/neurohmmer.rb ADDED
@@ -0,0 +1,89 @@
1
+ require 'bio'
2
+ require 'fileutils'
3
+
4
+ require 'neurohmmer/arg_validators'
5
+ require 'neurohmmer/hmmer'
6
+ require 'neurohmmer/output'
7
+
8
+ # Top level module / namespace.
9
+ module Neurohmmer
10
+ class <<self
11
+ attr_accessor :opt
12
+ attr_accessor :conf
13
+
14
+ def init(opt)
15
+ @opt = ArgumentsValidators.run(opt)
16
+ @conf = {
17
+ hmm_dir: File.expand_path('../../data/hmm', __FILE__),
18
+ raw_data: File.expand_path('../../data/raw_data', __FILE__),
19
+ raw_alignments: File.expand_path('../../data/raw_data/alignments',
20
+ __FILE__),
21
+ hmm_output: File.join(@opt[:temp_dir], 'input.hmm_search.out'),
22
+ html_output: "#{@opt[:input_file]}.neurohmmer.html"
23
+ }
24
+ init_input
25
+ end
26
+
27
+ def run
28
+ Hmmer.search
29
+ hmm_analysis = Hmmer.analyse_output
30
+ Output.to_html(hmm_analysis)
31
+ remove_temp_dir
32
+ end
33
+
34
+ def extract_sequence(id)
35
+ id = id.gsub(/\s+/, '')
36
+ idx = @input_index[id]
37
+ seq = IO.binread(@opt[:input_file], idx[1] - idx[0], idx[0])
38
+ seq.scan(/>([^\n]*)\n([A-Za-z\n\*]*)/)[0]
39
+ end
40
+
41
+ private
42
+
43
+ def init_input
44
+ FileUtils.mkdir_p(@opt[:temp_dir])
45
+ @opt[:input_file] = translate_input if @opt[:type] == :genetic
46
+ @input_index = index_input_file
47
+ end
48
+
49
+ # Translates the input data in all 6 frames
50
+ def translate_input(input = @opt[:input_file])
51
+ translated_file = File.join(@opt[:temp_dir], 'input.translated.fa')
52
+ File.open(translated_file, 'w') do |file|
53
+ Bio::FlatFile.open(Bio::FastaFormat, input).each_entry do |entry|
54
+ (1..6).each do |f|
55
+ file.puts ">#{entry.definition}-frame:#{f}"
56
+ file.puts entry.naseq.translate(f)
57
+ end
58
+ end
59
+ end
60
+ translated_file
61
+ end
62
+
63
+ # Indexes the input file - returns a hash in the following format:
64
+ # {seq id: [start byte in file, end byte in file] }
65
+ def index_input_file
66
+ c = IO.binread(@opt[:input_file])
67
+ keys = c.scan(/>(.*)\n/).flatten
68
+ values = c.enum_for(:scan, /(>[^>]+)/).map { Regexp.last_match.begin(0) }
69
+ index(c, keys, values)
70
+ end
71
+
72
+ # A method run from index_input_file that creates a simple hash with the
73
+ # {seq id: [start byte in file, end byte in file] }
74
+ def index(content, keys, values)
75
+ fasta_index = {}
76
+ keys.each_with_index do |k, i|
77
+ id = k[0..115].gsub(/\s+/, '')
78
+ endf = (i == values.length - 1) ? content.length - 1 : values[i + 1]
79
+ fasta_index[id] = [values[i], endf]
80
+ end
81
+ fasta_index
82
+ end
83
+
84
+ def remove_temp_dir
85
+ return unless File.directory?(@opt[:temp_dir])
86
+ FileUtils.rm_rf(@opt[:temp_dir])
87
+ end
88
+ end
89
+ end
@@ -0,0 +1,71 @@
1
+ require 'bio'
2
+
3
+ # Top level module / namespace.
4
+ module Neurohmmer
5
+ # A class that validates the command line opts
6
+ class ArgumentsValidators
7
+ class << self
8
+ def run(opt)
9
+ assert_file_present('input fasta file', opt[:input_file])
10
+ assert_input_file_not_empty(opt[:input_file])
11
+ assert_input_file_probably_fasta(opt[:input_file])
12
+ opt[:type] = assert_input_sequence(opt[:input_file])
13
+ # TODO: Assert hmm & mafft binaries
14
+ opt
15
+ end
16
+
17
+ private
18
+
19
+ def assert_file_present(desc, file, exit_code = 1)
20
+ return if file && File.exist?(File.expand_path(file))
21
+ $stderr.puts "*** Error: Couldn't find the #{desc}: #{file}."
22
+ exit exit_code
23
+ end
24
+
25
+ def assert_input_file_not_empty(file)
26
+ return unless File.zero?(File.expand_path(file))
27
+ $stderr.puts "*** Error: The input_file (#{file})" \
28
+ ' seems to be empty.'
29
+ exit 1
30
+ end
31
+
32
+ def assert_input_file_probably_fasta(file)
33
+ File.open(file, 'r') do |f|
34
+ fasta = (f.readline[0] == '>') ? true : false
35
+ return fasta if fasta
36
+ end
37
+ $stderr.puts "*** Error: The input_file (#{file})" \
38
+ ' does not seems to be a fasta file.'
39
+ exit 1
40
+ end
41
+
42
+ def assert_input_sequence(file)
43
+ type = type_of_sequences(file)
44
+ return type unless type.nil?
45
+ $stderr.puts '*** Error: The input files seems to contain a mixture of'
46
+ $stderr.puts ' both protein and nucleotide data.'
47
+ $stderr.puts ' Please correct this and try again.'
48
+ exit 1
49
+ end
50
+
51
+ def type_of_sequences(file)
52
+ fasta_content = IO.binread(file)
53
+ # the first sequence does not need to have a fasta definition line
54
+ sequences = fasta_content.split(/^>.*$/).delete_if(&:empty?)
55
+ # get all sequence types
56
+ sequence_types = sequences.collect { |seq| guess_sequence_type(seq) }
57
+ .uniq.compact
58
+ return nil if sequence_types.empty?
59
+ sequence_types.first if sequence_types.length == 1
60
+ end
61
+
62
+ def guess_sequence_type(seq)
63
+ # removing non-letter and ambiguous characters
64
+ cleaned_sequence = seq.gsub(/[^A-Z]|[NX]/i, '')
65
+ return nil if cleaned_sequence.length < 10 # conservative
66
+ type = Bio::Sequence.new(cleaned_sequence).guess(0.9)
67
+ (type == Bio::Sequence::NA) ? :genetic : :protein
68
+ end
69
+ end
70
+ end
71
+ end
@@ -0,0 +1,71 @@
1
+ require 'bio-hmmer3_report'
2
+ require 'forwardable'
3
+
4
+ # Top level module / namespace.
5
+ module Neurohmmer
6
+ # A class that holds methods related to Hmmer
7
+ class Hmmer
8
+ class <<self
9
+ extend Forwardable
10
+ def_delegators Neurohmmer, :opt, :conf
11
+
12
+ def search
13
+ Dir.foreach(conf[:hmm_dir]) do |h|
14
+ hmm_file = File.join(conf[:hmm_dir], h)
15
+ next if hmm_file !~ /hmm$/
16
+ hmm_search(opt[:input_file], hmm_file, conf[:hmm_output])
17
+ end
18
+ end
19
+
20
+ def analyse_output
21
+ hmm_results = {}
22
+ hmm_search_output = File.open(conf[:hmm_output])
23
+ hmm_reports = Bio::HMMER::HMMER3.reports(hmm_search_output)
24
+ hmm_reports.each_with_index do |report, idx|
25
+ next if idx + 1 == hmm_reports.length
26
+ hmm_results[report.query] = analyse_hmm_search_report(report)
27
+ end
28
+ hmm_results
29
+ end
30
+
31
+ def generate_hmm_models
32
+ Dir.foreach(conf[:raw_data]) do |file|
33
+ next if file !~ /fa(sta)?$/
34
+ np_fasta_file = File.join(conf[:raw_data], file)
35
+ aligned_file = File.join(conf[:raw_alignments],
36
+ "#{file.gsub(/fa(sta)?$/, '')}.aligned")
37
+ hmm_model_file = File.join(conf[:hmm_dir],
38
+ "#{file.gsub(/fa(sta)?$/, '')}.hmm")
39
+ mafft(np_fasta_file, aligned_file, conf[:num_threads])
40
+ hmm_build(aligned_file, hmm_model_file)
41
+ end
42
+ end
43
+
44
+ private
45
+
46
+ def analyse_hmm_search_report(report)
47
+ report_result = []
48
+ report.hits.each do |hit|
49
+ seq = Neurohmmer.extract_sequence(hit.sequence_name.strip)
50
+ hsps = []
51
+ hit.hsps.each { |hsp| hsps << hsp.flatseq }
52
+ report_result << { id: seq[0], seq: seq[1], flatseq: hsps }
53
+ end
54
+ report_result
55
+ end
56
+
57
+ def mafft(input, aligned_file, num_threads)
58
+ `mafft --maxiterate 1000 --thread #{num_threads} '#{input}' > \
59
+ '#{aligned_file}'`
60
+ end
61
+
62
+ def hmm_build(aligned_file, hmm_model_file)
63
+ `hmmbuild '#{hmm_model_file}' '#{aligned_file}'`
64
+ end
65
+
66
+ def hmm_search(input_file, hmm_file, hmm_output)
67
+ `hmmsearch '#{hmm_file}' '#{input_file}' >> '#{hmm_output}'`
68
+ end
69
+ end
70
+ end
71
+ end
@@ -0,0 +1,50 @@
1
+ require 'slim'
2
+ require 'forwardable'
3
+
4
+ # Top level module / namespace.
5
+ module Neurohmmer
6
+ # A class that holds methods related to Output
7
+ class Output
8
+ class <<self
9
+ extend Forwardable
10
+ def_delegators Neurohmmer, :conf
11
+
12
+ def to_html(hmm_results)
13
+ @html_results = format_seqs_for_html(hmm_results)
14
+ template_path = File.expand_path(File.join(__FILE__, '../../../',
15
+ 'template/contents.slim'))
16
+ contents_temp = File.read(template_path)
17
+ html_content = Slim::Template.new { contents_temp }.render(self)
18
+ File.open(conf[:html_output], 'w') { |f| f.puts html_content }
19
+ end
20
+
21
+ def format_seqs_for_html(hmm_results)
22
+ results = {}
23
+ hmm_results.each do |query, hits|
24
+ next if hits.length == 0
25
+ results[query] = []
26
+ hits.each do |hit|
27
+ html_seq = format_html_seq(hit[:seq], hit[:flatseq])
28
+ results[query] << { id: hit[:id], seq: html_seq }
29
+ end
30
+ end
31
+ results
32
+ end
33
+
34
+ private
35
+
36
+ def format_html_seq(seq, flatseq)
37
+ seq.gsub!("\n", '')
38
+ flatseq.each do |hsp|
39
+ seq.gsub!(/#{hsp.gsub('-', '')}/i, '<span class=hsp>\0</span>')
40
+ end
41
+ seq.gsub(/KR|KK|RR/i, '<span class=clv>\0</span>')
42
+ .gsub(/(K|R)<span class=hsp>(K|R)/i, '<span class=clv>\1</span>' \
43
+ '<span class=clv_i>\2</span><span class=hsp>')
44
+ .gsub('<span class=clv>R</span><span class=clv_i>K</span><span' \
45
+ ' class=hsp>', 'R<span class=hsp>K')
46
+ .gsub(/G<span class=clv>/, '<span class=gly>G</span><span class=clv>')
47
+ end
48
+ end
49
+ end
50
+ end
@@ -0,0 +1,4 @@
1
+ # Top level module / namespace.
2
+ module Neurohmmer
3
+ VERSION = '0.1.0'
4
+ end
@@ -0,0 +1,34 @@
1
+ # coding: utf-8
2
+ lib = File.expand_path('../lib', __FILE__)
3
+ $LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
4
+ require 'neurohmmer/version'
5
+
6
+ Gem::Specification.new do |s|
7
+ s.name = 'neurohmmer'
8
+ s.version = Neurohmmer::VERSION
9
+ s.authors = ['Ismail Moghul', 'Maurice Elphick', 'Yannick Wurm']
10
+ s.email = ['ismail.moghul@gmail.com']
11
+
12
+ s.summary = 'Identify Neuropeptides using powerful Hidden Markov' \
13
+ " Models.\n\n For further information please refer to:" \
14
+ ' https://github.com/wurmlab/neurohmmer'
15
+ s.description = 'Identify Neuropeptides using powerful Hidden Markov' \
16
+ " Models.\n\n For further information please refer to:" \
17
+ ' https://github.com/wurmlab/neurohmmer'
18
+ s.homepage = 'https://github.com/wurmlab/neurohmmer'
19
+ s.license = 'AGPL'
20
+
21
+ s.files = `git ls-files -z`.split("\x0")
22
+ s.executables = s.files.grep(%r{^bin/}) { |f| File.basename(f) }
23
+ s.test_files = s.files.grep(%r{^(test|spec|features)/})
24
+ s.require_paths = ['lib']
25
+
26
+ s.required_ruby_version = '>= 2.0.0'
27
+ s.add_development_dependency 'bundler', '~> 1.10'
28
+ s.add_development_dependency 'rake', '~> 10.0'
29
+ s.add_development_dependency 'rspec', '~> 3.4'
30
+
31
+ s.add_dependency 'bio', '~> 1.5'
32
+ s.add_dependency 'bio-hmmer3_report', '~> 0.1'
33
+ s.add_dependency 'slim', '~> 3.0'
34
+ end
@@ -0,0 +1,11 @@
1
+ require 'spec_helper'
2
+
3
+ describe Neurohmmer do
4
+ it 'has a version number' do
5
+ expect(Neurohmmer::VERSION).not_to be nil
6
+ end
7
+
8
+ it 'does something useful' do
9
+ expect(false).to eq(true)
10
+ end
11
+ end
@@ -0,0 +1,2 @@
1
+ $LOAD_PATH.unshift File.expand_path('../../lib', __FILE__)
2
+ require 'neurohmmer'
@@ -0,0 +1,55 @@
1
+ doctype html
2
+ html lang="en"
3
+ head
4
+ meta charset="utf-8"
5
+ meta content="IE=edge" http-equiv="X-UA-Compatible"
6
+ meta content="width=device-width, initial-scale=1" name="viewport"
7
+ meta content="NeuroHMMer | Identify Neuropeptide Homologs" name="description"
8
+ meta content="Wurmlab" name="author"
9
+ title NeuroHMMer | Identify Neuropeptide Homologs
10
+ css:
11
+ html { position: relative; min-height: 100%; }
12
+ body {margin:0 0 100px; background-color:#F5F5F5; font-family:"Helvetica Neue", Helvetica, Arial, sans-serif; font-size:15px; line-height:1.42857143; color:#2c3e50;}
13
+ .container {margin-right:auto; margin-left:auto; padding-left:15px; padding-right:15px; max-width:1170px; width:95%;}
14
+ footer {bottom:0; width:100%; margin:0 auto; position:absolute; height:100px; overflow:hidden; border-top:2px solid #DBDBDB; color:#b4bcc2;text-align:center;}
15
+ a{color:#18bc9c; text-decoration:none}a:hover,a:focus{color:#18bc9c; text-decoration:underline}
16
+ p {margin:0 0 10.5px}
17
+ .id {font-weight:bold;}
18
+ .sequence {word-break:break-all; font-family:Courier New, Courier, Mono;}
19
+ .clv {color:#00B050; font-weight:bold;}
20
+ .clv_i {color:#00B050; font-weight: bold; background-color:#FFE4B5;}
21
+ .gly {color:#FF0000; font-weight:bold;}
22
+ .hsp {background-color:#FFE4B5;}
23
+ body
24
+ .container
25
+ h1 NeuroHMMer Results
26
+ .results
27
+ - @html_results.each do |query, hits|
28
+ section
29
+ h2 #{query.gsub(/.fa(sta)?.aligned/, '')}
30
+ - hits.each do |hit|
31
+ p.sequence
32
+ span.id
33
+ = hit[:id]
34
+ br
35
+ span.seq
36
+ == hit[:seq]
37
+ hr
38
+ br
39
+ br
40
+ br
41
+ footer
42
+ p
43
+ | Please cite "Moghul I, Elphick M &amp; Wurm Y
44
+ em
45
+ | (in prep)
46
+ | NeuroHMMer: A tool to identify neuropeptide homologs"
47
+ br
48
+ | Developed at
49
+ a href="https://wurmlab.github.io" target="_blank" Wurm Lab
50
+ | ,
51
+ a href="http://www.sbcs.qmul.ac.uk" target="_blank" QMUL
52
+ br
53
+ | This page was created by
54
+ a href="https://github.com/wurmlab/NpSearch" target="_blank" NpSearch
55
+ | v#{Neurohmmer::VERSION}