neurohmmer 0.1.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/.gitignore +35 -0
- data/CODE_OF_CONDUCT.md +13 -0
- data/Gemfile +4 -0
- data/LICENSE +662 -0
- data/README.md +136 -0
- data/Rakefile +17 -0
- data/bin/neurohmmer +69 -0
- data/data/hmm/CRH_DH44.fa.hmm +427 -0
- data/data/hmm/Chordate_Glucagon_PACAP.fa.hmm +484 -0
- data/data/hmm/Chordate_Neurotensin_precursors_neurotensin_like_Ciona.fa.hmm +526 -0
- data/data/hmm/Deutrostomian_tachykinin.fa.hmm +367 -0
- data/data/hmm/Gastrin_releasing_peptide.fa.hmm +448 -0
- data/data/hmm/GnRH.fa.hmm +370 -0
- data/data/hmm/Hypocretin_orexin.fa.hmm +451 -0
- data/data/hmm/Lophotrochozoan_luqin_arthropod_RYamide.fa.hmm +385 -0
- data/data/hmm/NPFF.fa.hmm +430 -0
- data/data/hmm/NPS.fa.hmm +790 -0
- data/data/hmm/PTH.fa.hmm +502 -0
- data/data/hmm/Protostomian_Allatostatin_B.fa.hmm +835 -0
- data/data/hmm/Protostomian_CCH_amide.fa.hmm +415 -0
- data/data/hmm/Protostomian_Ecdysis_Triggering_Hormone.fa.hmm +433 -0
- data/data/hmm/Protostomian_PDF.fa.hmm +265 -0
- data/data/hmm/Protostomian_allatostatin_C.fa.hmm +352 -0
- data/data/hmm/Protostomian_proctolin.fa.hmm +346 -0
- data/data/hmm/Putative_chordate_Opioid.fa.hmm +802 -0
- data/data/hmm/SIF.fa.hmm +286 -0
- data/data/hmm/TRH.fa.hmm +910 -0
- data/data/hmm/allatotropin.fa.hmm +433 -0
- data/data/hmm/ambulacrarian_amphioxus_NPS-like.fa.hmm +847 -0
- data/data/hmm/avp.fa.hmm +475 -0
- data/data/hmm/calcitonin_DH31.fa.hmm +418 -0
- data/data/hmm/cholecystokinin_sulfakinin.fa.hmm +433 -0
- data/data/hmm/chordate_OX26_QRFP.fa.hmm +475 -0
- data/data/hmm/chordate_somatostatin_cortistatin_like.fa.hmm +361 -0
- data/data/hmm/f_type_SALMF.fa.hmm +811 -0
- data/data/hmm/galanin.fa.hmm +391 -0
- data/data/hmm/l_type_SALMF.fa.hmm +523 -0
- data/data/hmm/nmu_pyrokinin.fa.hmm +424 -0
- data/data/hmm/npy_npf.fa.hmm +301 -0
- data/data/hmm/protostomian_CCAP.fa.hmm +415 -0
- data/data/hmm/protostomian_allatostatinA_buccalin.fa.hmm +706 -0
- data/data/hmm/protostomian_leucokinin.fa.hmm +511 -0
- data/data/hmm/protostomian_tackykinin.fa.hmm +760 -0
- data/data/hmm/tachykinin.fa.hmm +541 -0
- data/data/hmm/vertebrate_ghrelin_motilin.fa.hmm +382 -0
- data/data/raw_data/CRH_DH44.fa +44 -0
- data/data/raw_data/CRH_DH44.fa.aligned.fa +242 -0
- data/data/raw_data/Chordate_Glucagon_PACAP.fa +18 -0
- data/data/raw_data/Chordate_Glucagon_PACAP.fa.aligned.fa +72 -0
- data/data/raw_data/Chordate_Neurotensin_precursors_neurotensin_like_Ciona.fa +10 -0
- data/data/raw_data/Chordate_Neurotensin_precursors_neurotensin_like_Ciona.fa.aligned.fa +25 -0
- data/data/raw_data/Deutrostomian_tachykinin.fa +1283 -0
- data/data/raw_data/Deutrostomian_tachykinin.fa.aligned.fa +5707 -0
- data/data/raw_data/Gastrin_releasing_peptide.fa +12 -0
- data/data/raw_data/Gastrin_releasing_peptide.fa.aligned.fa +25 -0
- data/data/raw_data/GnRH.fa +22 -0
- data/data/raw_data/GnRH.fa.aligned.fa +66 -0
- data/data/raw_data/Hypocretin_orexin.fa +14 -0
- data/data/raw_data/Hypocretin_orexin.fa.aligned.fa +49 -0
- data/data/raw_data/Lophotrochozoan_luqin_arthropod_RYamide.fa +26 -0
- data/data/raw_data/Lophotrochozoan_luqin_arthropod_RYamide.fa.aligned.fa +91 -0
- data/data/raw_data/NPFF.fa +16 -0
- data/data/raw_data/NPFF.fa.aligned.fa +48 -0
- data/data/raw_data/NPS.fa +8 -0
- data/data/raw_data/NPS.fa.aligned.fa +24 -0
- data/data/raw_data/PTH.fa +24 -0
- data/data/raw_data/PTH.fa.aligned.fa +144 -0
- data/data/raw_data/Protostomian_Allatostatin_B.fa +17 -0
- data/data/raw_data/Protostomian_Allatostatin_B.fa.aligned.fa +120 -0
- data/data/raw_data/Protostomian_CCH_amide.fa +24 -0
- data/data/raw_data/Protostomian_CCH_amide.fa.aligned.fa +72 -0
- data/data/raw_data/Protostomian_Ecdysis_Triggering_Hormone.fa +24 -0
- data/data/raw_data/Protostomian_Ecdysis_Triggering_Hormone.fa.aligned.fa +84 -0
- data/data/raw_data/Protostomian_PDF.fa +16 -0
- data/data/raw_data/Protostomian_PDF.fa.aligned.fa +32 -0
- data/data/raw_data/Protostomian_allatostatin_C.fa +28 -0
- data/data/raw_data/Protostomian_allatostatin_C.fa.aligned.fa +56 -0
- data/data/raw_data/Protostomian_proctolin.fa +16 -0
- data/data/raw_data/Protostomian_proctolin.fa.aligned.fa +32 -0
- data/data/raw_data/Putative_chordate_Opioid.fa +8 -0
- data/data/raw_data/Putative_chordate_Opioid.fa.aligned.fa +32 -0
- data/data/raw_data/SIF.fa +28 -0
- data/data/raw_data/SIF.fa.aligned.fa +70 -0
- data/data/raw_data/TRH.fa +10 -0
- data/data/raw_data/TRH.fa.aligned.fa +50 -0
- data/data/raw_data/allatotropin.fa +31 -0
- data/data/raw_data/allatotropin.fa.aligned.fa +90 -0
- data/data/raw_data/ambulacrarian_amphioxus_NPS-like.fa +8 -0
- data/data/raw_data/ambulacrarian_amphioxus_NPS-like.fa.aligned.fa +24 -0
- data/data/raw_data/avp.fa +28 -0
- data/data/raw_data/avp.fa.aligned.fa +70 -0
- data/data/raw_data/calcitonin_DH31.fa +36 -0
- data/data/raw_data/calcitonin_DH31.fa.aligned.fa +126 -0
- data/data/raw_data/cholecystokinin_sulfakinin.fa +28 -0
- data/data/raw_data/cholecystokinin_sulfakinin.fa.aligned.fa +84 -0
- data/data/raw_data/chordate_OX26_QRFP.fa +10 -0
- data/data/raw_data/chordate_OX26_QRFP.fa.aligned.fa +25 -0
- data/data/raw_data/chordate_somatostatin_cortistatin_like.fa +10 -0
- data/data/raw_data/chordate_somatostatin_cortistatin_like.fa.aligned.fa +20 -0
- data/data/raw_data/f_type_SALMF.fa +118 -0
- data/data/raw_data/f_type_SALMF.fa.aligned.fa +590 -0
- data/data/raw_data/galanin.fa +93 -0
- data/data/raw_data/galanin.fa.aligned.fa +552 -0
- data/data/raw_data/kisspeptin.fa +16 -0
- data/data/raw_data/kisspeptin.fa.aligned.fa +0 -0
- data/data/raw_data/l_type_SALMF.fa +68 -0
- data/data/raw_data/l_type_SALMF.fa.aligned.fa +170 -0
- data/data/raw_data/nmu_pyrokinin.fa +18 -0
- data/data/raw_data/nmu_pyrokinin.fa.aligned.fa +63 -0
- data/data/raw_data/npy_npf.fa +20 -0
- data/data/raw_data/npy_npf.fa.aligned.fa +40 -0
- data/data/raw_data/protostomian_CCAP.fa +14 -0
- data/data/raw_data/protostomian_CCAP.fa.aligned.fa +42 -0
- data/data/raw_data/protostomian_allatostatinA_buccalin.fa +100 -0
- data/data/raw_data/protostomian_allatostatinA_buccalin.fa.aligned.fa +1350 -0
- data/data/raw_data/protostomian_leucokinin.fa +18 -0
- data/data/raw_data/protostomian_leucokinin.fa.aligned.fa +63 -0
- data/data/raw_data/protostomian_tackykinin.fa +504 -0
- data/data/raw_data/protostomian_tackykinin.fa.aligned.fa +2002 -0
- data/data/raw_data/tachykinin.fa.aligned.fa +13546 -0
- data/data/raw_data/vertebrate_ghrelin_motilin.fa +16 -0
- data/data/raw_data/vertebrate_ghrelin_motilin.fa.aligned.fa +32 -0
- data/lib/neurohmmer.rb +89 -0
- data/lib/neurohmmer/arg_validators.rb +71 -0
- data/lib/neurohmmer/hmmer.rb +71 -0
- data/lib/neurohmmer/output.rb +50 -0
- data/lib/neurohmmer/version.rb +4 -0
- data/neurohmmer.gemspec +34 -0
- data/spec/neurohmmer_spec.rb +11 -0
- data/spec/spec_helper.rb +2 -0
- data/template/contents.slim +55 -0
- metadata +269 -0
@@ -0,0 +1,16 @@
|
|
1
|
+
>Ensembl_ENSDART00000076405_Drer
|
2
|
+
MPLRCRASSMFLLLCVSLSLCLESVSGGTSFLSPTQKPQGRRPPRVGRREAADPEIPVIKEDDRFMMSAPFELSMSLSEAEYEKYGPVLQNLLENLLRDSSFEF
|
3
|
+
>gi_55792853_Asch
|
4
|
+
MFLKRNTYLLVFLFCSLTLWCKSTSAGSSFLSPSQKPQNRGKSSRVGRQVMQEPQQPTDDKHITISAPFEIGISMTEEDYDEYGVVLQEIIQRLLGGTEAAEGPPQL
|
5
|
+
>Uniprot_GHRL_Hsap
|
6
|
+
MPSPGTVCSLLLLGMLWLDLAMAGSSFLSPEHQRVQQRKESKKPPAKLQPRALAGWLRPEDGGQAEGAEDELEVRFNAPFDVGIKLSGVQYQQHSQALGKFLQDILWEEAKEAPADK
|
7
|
+
>gi_21238918_Ajap
|
8
|
+
MRQMKRTAYIILLVCVLALWMDSVQAGSSFLSPSQRPQGKDKKPPRVGRRDSDGILDLFMRPPLQDEDIRHITFNTPFEIGITMTEELFQQYGEVMQKIMQD
|
9
|
+
>gi_68445391_Ipun
|
10
|
+
MLGHGRVGHMMLLLCAFSLWAETVMCGSSFLSPTQKPQNRGDRKPPRVGRRTAAELEAPLPSEEKIMVSAPFQLAVSLSDAEYEDYGPVLQRMLLDVLGDPPTLDGAN
|
11
|
+
>Uniprot_MOTI_Hsap
|
12
|
+
MVSRKAVAALLVVHVAAMLASQTEAFVPIFTYGELQRMQEKERNKGQKKSLSVWQRSGEEGPVDPAEPIREEENEMIKLTAPLEIGMRMNSRQLEKYPATLEGLLSEMLPQHAAK
|
13
|
+
>Ensembl_ENSOANT00000031359_Oana
|
14
|
+
MVSRKAVAFLLVVSVAAMMAEGFIPIFTHSDVQRMQERERNKGQKKSLTVQQRSEQGGLRTLAEPNGEEEGEIIQLAAPVEIGLRMNSRQLAKYRGILEELIMEALLSTQNGESNPDRGRGRCS
|
15
|
+
>Ensembl_ENSOCUT00000005807_Ocun
|
16
|
+
MVSRKAVAALLLVHATAMLASQTEAFVPIFTYSELQRMQERERNRGHKKSLSVQQRSEPAAAPPAEPTLEEENGRTQLTAPVEIGMRMNSRQLEKYRAALEAERAVHPDAPSRPCPAGGESGWSGEPSPT
|
@@ -0,0 +1,32 @@
|
|
1
|
+
>Ensembl_ENSDART00000076405_Drer
|
2
|
+
MPLRCRASSMFLLLCVSLSLCLESVSGGTSFLSPT-QKPQG-------RRPPRVGRREAA
|
3
|
+
------DPEIPVIKEDDR-FMMSAPFELSMSLSEAEYEKYGPVLQNLLENLLRD-SSFEF
|
4
|
+
------------------------
|
5
|
+
>gi_55792853_Asch
|
6
|
+
MFLKRNTYLLVFLFC-SLTLWCKSTSAGSSFLSPS-QKPQNR------GKSSRVGRQ---
|
7
|
+
------VMQEPQQPTDDKHITISAPFEIGISMTEEDYDEYGVVLQEIIQRLLGGTEAAEG
|
8
|
+
PPQL--------------------
|
9
|
+
>Uniprot_GHRL_Hsap
|
10
|
+
MPSPGTVCSLLLLGM----LWLDLAMAGSSFLSPEHQRVQQRK--ESKKPPAKLQPRALA
|
11
|
+
GWLRPEDGGQAEGAEDELEVRFNAPFDVGIKLSGVQYQQHSQALGKFLQDILWE-EAKEA
|
12
|
+
PADK--------------------
|
13
|
+
>gi_21238918_Ajap
|
14
|
+
MRQMKRTAYIILLVC-VLALWMDSVQAGSSFLSPS-QRPQGK-----DKKPPRVGRRDSD
|
15
|
+
GIL-DLFMRPPLQDEDIRHITFNTPFEIGITMTEELFQQYGEVMQKIMQD----------
|
16
|
+
------------------------
|
17
|
+
>gi_68445391_Ipun
|
18
|
+
MLGHGRVGHMMLLLC-AFSLWAETVMCGSSFLSPT-QKPQNR----GDRKPPRVGRRTAA
|
19
|
+
------ELEAPLPSEEK--IMVSAPFQLAVSLSDAEYEDYGPVLQRMLLDVLGD------
|
20
|
+
PPTLDGA----------------N
|
21
|
+
>Uniprot_MOTI_Hsap
|
22
|
+
MVSRKAVAALLVVHV-AAMLASQTEAFVPIFTYGELQRMQEKERNKGQKKSLSVWQRSGE
|
23
|
+
EGPV--DPAEPIREEENEMIKLTAPLEIGMRMNSRQLEKYPATLEGLLSEML--------
|
24
|
+
-PQHAAK-----------------
|
25
|
+
>Ensembl_ENSOANT00000031359_Oana
|
26
|
+
MVSRKAVAFLLVVSV-AAMMA---EGFIPIFTHSDVQRMQERERNKGQKKSLTVQQRSEQ
|
27
|
+
GGLR--TLAEPNGEEEGEIIQLAAPVEIGLRMNSRQLAKYRGILEELIMEALLSTQNGES
|
28
|
+
NPDRGRGRC--------------S
|
29
|
+
>Ensembl_ENSOCUT00000005807_Ocun
|
30
|
+
MVSRKAVAALLLVHA-TAMLASQTEAFVPIFTYSELQRMQERERNRGHKKSLSVQQRSEP
|
31
|
+
AAA---PPAEPTLEEENGRTQLTAPVEIGMRMNSRQLEKYRAALE----------AERAV
|
32
|
+
HPDAPSRPCPAGGESGWSGEPSPT
|
data/lib/neurohmmer.rb
ADDED
@@ -0,0 +1,89 @@
|
|
1
|
+
require 'bio'
|
2
|
+
require 'fileutils'
|
3
|
+
|
4
|
+
require 'neurohmmer/arg_validators'
|
5
|
+
require 'neurohmmer/hmmer'
|
6
|
+
require 'neurohmmer/output'
|
7
|
+
|
8
|
+
# Top level module / namespace.
|
9
|
+
module Neurohmmer
|
10
|
+
class <<self
|
11
|
+
attr_accessor :opt
|
12
|
+
attr_accessor :conf
|
13
|
+
|
14
|
+
def init(opt)
|
15
|
+
@opt = ArgumentsValidators.run(opt)
|
16
|
+
@conf = {
|
17
|
+
hmm_dir: File.expand_path('../../data/hmm', __FILE__),
|
18
|
+
raw_data: File.expand_path('../../data/raw_data', __FILE__),
|
19
|
+
raw_alignments: File.expand_path('../../data/raw_data/alignments',
|
20
|
+
__FILE__),
|
21
|
+
hmm_output: File.join(@opt[:temp_dir], 'input.hmm_search.out'),
|
22
|
+
html_output: "#{@opt[:input_file]}.neurohmmer.html"
|
23
|
+
}
|
24
|
+
init_input
|
25
|
+
end
|
26
|
+
|
27
|
+
def run
|
28
|
+
Hmmer.search
|
29
|
+
hmm_analysis = Hmmer.analyse_output
|
30
|
+
Output.to_html(hmm_analysis)
|
31
|
+
remove_temp_dir
|
32
|
+
end
|
33
|
+
|
34
|
+
def extract_sequence(id)
|
35
|
+
id = id.gsub(/\s+/, '')
|
36
|
+
idx = @input_index[id]
|
37
|
+
seq = IO.binread(@opt[:input_file], idx[1] - idx[0], idx[0])
|
38
|
+
seq.scan(/>([^\n]*)\n([A-Za-z\n\*]*)/)[0]
|
39
|
+
end
|
40
|
+
|
41
|
+
private
|
42
|
+
|
43
|
+
def init_input
|
44
|
+
FileUtils.mkdir_p(@opt[:temp_dir])
|
45
|
+
@opt[:input_file] = translate_input if @opt[:type] == :genetic
|
46
|
+
@input_index = index_input_file
|
47
|
+
end
|
48
|
+
|
49
|
+
# Translates the input data in all 6 frames
|
50
|
+
def translate_input(input = @opt[:input_file])
|
51
|
+
translated_file = File.join(@opt[:temp_dir], 'input.translated.fa')
|
52
|
+
File.open(translated_file, 'w') do |file|
|
53
|
+
Bio::FlatFile.open(Bio::FastaFormat, input).each_entry do |entry|
|
54
|
+
(1..6).each do |f|
|
55
|
+
file.puts ">#{entry.definition}-frame:#{f}"
|
56
|
+
file.puts entry.naseq.translate(f)
|
57
|
+
end
|
58
|
+
end
|
59
|
+
end
|
60
|
+
translated_file
|
61
|
+
end
|
62
|
+
|
63
|
+
# Indexes the input file - returns a hash in the following format:
|
64
|
+
# {seq id: [start byte in file, end byte in file] }
|
65
|
+
def index_input_file
|
66
|
+
c = IO.binread(@opt[:input_file])
|
67
|
+
keys = c.scan(/>(.*)\n/).flatten
|
68
|
+
values = c.enum_for(:scan, /(>[^>]+)/).map { Regexp.last_match.begin(0) }
|
69
|
+
index(c, keys, values)
|
70
|
+
end
|
71
|
+
|
72
|
+
# A method run from index_input_file that creates a simple hash with the
|
73
|
+
# {seq id: [start byte in file, end byte in file] }
|
74
|
+
def index(content, keys, values)
|
75
|
+
fasta_index = {}
|
76
|
+
keys.each_with_index do |k, i|
|
77
|
+
id = k[0..115].gsub(/\s+/, '')
|
78
|
+
endf = (i == values.length - 1) ? content.length - 1 : values[i + 1]
|
79
|
+
fasta_index[id] = [values[i], endf]
|
80
|
+
end
|
81
|
+
fasta_index
|
82
|
+
end
|
83
|
+
|
84
|
+
def remove_temp_dir
|
85
|
+
return unless File.directory?(@opt[:temp_dir])
|
86
|
+
FileUtils.rm_rf(@opt[:temp_dir])
|
87
|
+
end
|
88
|
+
end
|
89
|
+
end
|
@@ -0,0 +1,71 @@
|
|
1
|
+
require 'bio'
|
2
|
+
|
3
|
+
# Top level module / namespace.
|
4
|
+
module Neurohmmer
|
5
|
+
# A class that validates the command line opts
|
6
|
+
class ArgumentsValidators
|
7
|
+
class << self
|
8
|
+
def run(opt)
|
9
|
+
assert_file_present('input fasta file', opt[:input_file])
|
10
|
+
assert_input_file_not_empty(opt[:input_file])
|
11
|
+
assert_input_file_probably_fasta(opt[:input_file])
|
12
|
+
opt[:type] = assert_input_sequence(opt[:input_file])
|
13
|
+
# TODO: Assert hmm & mafft binaries
|
14
|
+
opt
|
15
|
+
end
|
16
|
+
|
17
|
+
private
|
18
|
+
|
19
|
+
def assert_file_present(desc, file, exit_code = 1)
|
20
|
+
return if file && File.exist?(File.expand_path(file))
|
21
|
+
$stderr.puts "*** Error: Couldn't find the #{desc}: #{file}."
|
22
|
+
exit exit_code
|
23
|
+
end
|
24
|
+
|
25
|
+
def assert_input_file_not_empty(file)
|
26
|
+
return unless File.zero?(File.expand_path(file))
|
27
|
+
$stderr.puts "*** Error: The input_file (#{file})" \
|
28
|
+
' seems to be empty.'
|
29
|
+
exit 1
|
30
|
+
end
|
31
|
+
|
32
|
+
def assert_input_file_probably_fasta(file)
|
33
|
+
File.open(file, 'r') do |f|
|
34
|
+
fasta = (f.readline[0] == '>') ? true : false
|
35
|
+
return fasta if fasta
|
36
|
+
end
|
37
|
+
$stderr.puts "*** Error: The input_file (#{file})" \
|
38
|
+
' does not seems to be a fasta file.'
|
39
|
+
exit 1
|
40
|
+
end
|
41
|
+
|
42
|
+
def assert_input_sequence(file)
|
43
|
+
type = type_of_sequences(file)
|
44
|
+
return type unless type.nil?
|
45
|
+
$stderr.puts '*** Error: The input files seems to contain a mixture of'
|
46
|
+
$stderr.puts ' both protein and nucleotide data.'
|
47
|
+
$stderr.puts ' Please correct this and try again.'
|
48
|
+
exit 1
|
49
|
+
end
|
50
|
+
|
51
|
+
def type_of_sequences(file)
|
52
|
+
fasta_content = IO.binread(file)
|
53
|
+
# the first sequence does not need to have a fasta definition line
|
54
|
+
sequences = fasta_content.split(/^>.*$/).delete_if(&:empty?)
|
55
|
+
# get all sequence types
|
56
|
+
sequence_types = sequences.collect { |seq| guess_sequence_type(seq) }
|
57
|
+
.uniq.compact
|
58
|
+
return nil if sequence_types.empty?
|
59
|
+
sequence_types.first if sequence_types.length == 1
|
60
|
+
end
|
61
|
+
|
62
|
+
def guess_sequence_type(seq)
|
63
|
+
# removing non-letter and ambiguous characters
|
64
|
+
cleaned_sequence = seq.gsub(/[^A-Z]|[NX]/i, '')
|
65
|
+
return nil if cleaned_sequence.length < 10 # conservative
|
66
|
+
type = Bio::Sequence.new(cleaned_sequence).guess(0.9)
|
67
|
+
(type == Bio::Sequence::NA) ? :genetic : :protein
|
68
|
+
end
|
69
|
+
end
|
70
|
+
end
|
71
|
+
end
|
@@ -0,0 +1,71 @@
|
|
1
|
+
require 'bio-hmmer3_report'
|
2
|
+
require 'forwardable'
|
3
|
+
|
4
|
+
# Top level module / namespace.
|
5
|
+
module Neurohmmer
|
6
|
+
# A class that holds methods related to Hmmer
|
7
|
+
class Hmmer
|
8
|
+
class <<self
|
9
|
+
extend Forwardable
|
10
|
+
def_delegators Neurohmmer, :opt, :conf
|
11
|
+
|
12
|
+
def search
|
13
|
+
Dir.foreach(conf[:hmm_dir]) do |h|
|
14
|
+
hmm_file = File.join(conf[:hmm_dir], h)
|
15
|
+
next if hmm_file !~ /hmm$/
|
16
|
+
hmm_search(opt[:input_file], hmm_file, conf[:hmm_output])
|
17
|
+
end
|
18
|
+
end
|
19
|
+
|
20
|
+
def analyse_output
|
21
|
+
hmm_results = {}
|
22
|
+
hmm_search_output = File.open(conf[:hmm_output])
|
23
|
+
hmm_reports = Bio::HMMER::HMMER3.reports(hmm_search_output)
|
24
|
+
hmm_reports.each_with_index do |report, idx|
|
25
|
+
next if idx + 1 == hmm_reports.length
|
26
|
+
hmm_results[report.query] = analyse_hmm_search_report(report)
|
27
|
+
end
|
28
|
+
hmm_results
|
29
|
+
end
|
30
|
+
|
31
|
+
def generate_hmm_models
|
32
|
+
Dir.foreach(conf[:raw_data]) do |file|
|
33
|
+
next if file !~ /fa(sta)?$/
|
34
|
+
np_fasta_file = File.join(conf[:raw_data], file)
|
35
|
+
aligned_file = File.join(conf[:raw_alignments],
|
36
|
+
"#{file.gsub(/fa(sta)?$/, '')}.aligned")
|
37
|
+
hmm_model_file = File.join(conf[:hmm_dir],
|
38
|
+
"#{file.gsub(/fa(sta)?$/, '')}.hmm")
|
39
|
+
mafft(np_fasta_file, aligned_file, conf[:num_threads])
|
40
|
+
hmm_build(aligned_file, hmm_model_file)
|
41
|
+
end
|
42
|
+
end
|
43
|
+
|
44
|
+
private
|
45
|
+
|
46
|
+
def analyse_hmm_search_report(report)
|
47
|
+
report_result = []
|
48
|
+
report.hits.each do |hit|
|
49
|
+
seq = Neurohmmer.extract_sequence(hit.sequence_name.strip)
|
50
|
+
hsps = []
|
51
|
+
hit.hsps.each { |hsp| hsps << hsp.flatseq }
|
52
|
+
report_result << { id: seq[0], seq: seq[1], flatseq: hsps }
|
53
|
+
end
|
54
|
+
report_result
|
55
|
+
end
|
56
|
+
|
57
|
+
def mafft(input, aligned_file, num_threads)
|
58
|
+
`mafft --maxiterate 1000 --thread #{num_threads} '#{input}' > \
|
59
|
+
'#{aligned_file}'`
|
60
|
+
end
|
61
|
+
|
62
|
+
def hmm_build(aligned_file, hmm_model_file)
|
63
|
+
`hmmbuild '#{hmm_model_file}' '#{aligned_file}'`
|
64
|
+
end
|
65
|
+
|
66
|
+
def hmm_search(input_file, hmm_file, hmm_output)
|
67
|
+
`hmmsearch '#{hmm_file}' '#{input_file}' >> '#{hmm_output}'`
|
68
|
+
end
|
69
|
+
end
|
70
|
+
end
|
71
|
+
end
|
@@ -0,0 +1,50 @@
|
|
1
|
+
require 'slim'
|
2
|
+
require 'forwardable'
|
3
|
+
|
4
|
+
# Top level module / namespace.
|
5
|
+
module Neurohmmer
|
6
|
+
# A class that holds methods related to Output
|
7
|
+
class Output
|
8
|
+
class <<self
|
9
|
+
extend Forwardable
|
10
|
+
def_delegators Neurohmmer, :conf
|
11
|
+
|
12
|
+
def to_html(hmm_results)
|
13
|
+
@html_results = format_seqs_for_html(hmm_results)
|
14
|
+
template_path = File.expand_path(File.join(__FILE__, '../../../',
|
15
|
+
'template/contents.slim'))
|
16
|
+
contents_temp = File.read(template_path)
|
17
|
+
html_content = Slim::Template.new { contents_temp }.render(self)
|
18
|
+
File.open(conf[:html_output], 'w') { |f| f.puts html_content }
|
19
|
+
end
|
20
|
+
|
21
|
+
def format_seqs_for_html(hmm_results)
|
22
|
+
results = {}
|
23
|
+
hmm_results.each do |query, hits|
|
24
|
+
next if hits.length == 0
|
25
|
+
results[query] = []
|
26
|
+
hits.each do |hit|
|
27
|
+
html_seq = format_html_seq(hit[:seq], hit[:flatseq])
|
28
|
+
results[query] << { id: hit[:id], seq: html_seq }
|
29
|
+
end
|
30
|
+
end
|
31
|
+
results
|
32
|
+
end
|
33
|
+
|
34
|
+
private
|
35
|
+
|
36
|
+
def format_html_seq(seq, flatseq)
|
37
|
+
seq.gsub!("\n", '')
|
38
|
+
flatseq.each do |hsp|
|
39
|
+
seq.gsub!(/#{hsp.gsub('-', '')}/i, '<span class=hsp>\0</span>')
|
40
|
+
end
|
41
|
+
seq.gsub(/KR|KK|RR/i, '<span class=clv>\0</span>')
|
42
|
+
.gsub(/(K|R)<span class=hsp>(K|R)/i, '<span class=clv>\1</span>' \
|
43
|
+
'<span class=clv_i>\2</span><span class=hsp>')
|
44
|
+
.gsub('<span class=clv>R</span><span class=clv_i>K</span><span' \
|
45
|
+
' class=hsp>', 'R<span class=hsp>K')
|
46
|
+
.gsub(/G<span class=clv>/, '<span class=gly>G</span><span class=clv>')
|
47
|
+
end
|
48
|
+
end
|
49
|
+
end
|
50
|
+
end
|
data/neurohmmer.gemspec
ADDED
@@ -0,0 +1,34 @@
|
|
1
|
+
# coding: utf-8
|
2
|
+
lib = File.expand_path('../lib', __FILE__)
|
3
|
+
$LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
|
4
|
+
require 'neurohmmer/version'
|
5
|
+
|
6
|
+
Gem::Specification.new do |s|
|
7
|
+
s.name = 'neurohmmer'
|
8
|
+
s.version = Neurohmmer::VERSION
|
9
|
+
s.authors = ['Ismail Moghul', 'Maurice Elphick', 'Yannick Wurm']
|
10
|
+
s.email = ['ismail.moghul@gmail.com']
|
11
|
+
|
12
|
+
s.summary = 'Identify Neuropeptides using powerful Hidden Markov' \
|
13
|
+
" Models.\n\n For further information please refer to:" \
|
14
|
+
' https://github.com/wurmlab/neurohmmer'
|
15
|
+
s.description = 'Identify Neuropeptides using powerful Hidden Markov' \
|
16
|
+
" Models.\n\n For further information please refer to:" \
|
17
|
+
' https://github.com/wurmlab/neurohmmer'
|
18
|
+
s.homepage = 'https://github.com/wurmlab/neurohmmer'
|
19
|
+
s.license = 'AGPL'
|
20
|
+
|
21
|
+
s.files = `git ls-files -z`.split("\x0")
|
22
|
+
s.executables = s.files.grep(%r{^bin/}) { |f| File.basename(f) }
|
23
|
+
s.test_files = s.files.grep(%r{^(test|spec|features)/})
|
24
|
+
s.require_paths = ['lib']
|
25
|
+
|
26
|
+
s.required_ruby_version = '>= 2.0.0'
|
27
|
+
s.add_development_dependency 'bundler', '~> 1.10'
|
28
|
+
s.add_development_dependency 'rake', '~> 10.0'
|
29
|
+
s.add_development_dependency 'rspec', '~> 3.4'
|
30
|
+
|
31
|
+
s.add_dependency 'bio', '~> 1.5'
|
32
|
+
s.add_dependency 'bio-hmmer3_report', '~> 0.1'
|
33
|
+
s.add_dependency 'slim', '~> 3.0'
|
34
|
+
end
|
data/spec/spec_helper.rb
ADDED
@@ -0,0 +1,55 @@
|
|
1
|
+
doctype html
|
2
|
+
html lang="en"
|
3
|
+
head
|
4
|
+
meta charset="utf-8"
|
5
|
+
meta content="IE=edge" http-equiv="X-UA-Compatible"
|
6
|
+
meta content="width=device-width, initial-scale=1" name="viewport"
|
7
|
+
meta content="NeuroHMMer | Identify Neuropeptide Homologs" name="description"
|
8
|
+
meta content="Wurmlab" name="author"
|
9
|
+
title NeuroHMMer | Identify Neuropeptide Homologs
|
10
|
+
css:
|
11
|
+
html { position: relative; min-height: 100%; }
|
12
|
+
body {margin:0 0 100px; background-color:#F5F5F5; font-family:"Helvetica Neue", Helvetica, Arial, sans-serif; font-size:15px; line-height:1.42857143; color:#2c3e50;}
|
13
|
+
.container {margin-right:auto; margin-left:auto; padding-left:15px; padding-right:15px; max-width:1170px; width:95%;}
|
14
|
+
footer {bottom:0; width:100%; margin:0 auto; position:absolute; height:100px; overflow:hidden; border-top:2px solid #DBDBDB; color:#b4bcc2;text-align:center;}
|
15
|
+
a{color:#18bc9c; text-decoration:none}a:hover,a:focus{color:#18bc9c; text-decoration:underline}
|
16
|
+
p {margin:0 0 10.5px}
|
17
|
+
.id {font-weight:bold;}
|
18
|
+
.sequence {word-break:break-all; font-family:Courier New, Courier, Mono;}
|
19
|
+
.clv {color:#00B050; font-weight:bold;}
|
20
|
+
.clv_i {color:#00B050; font-weight: bold; background-color:#FFE4B5;}
|
21
|
+
.gly {color:#FF0000; font-weight:bold;}
|
22
|
+
.hsp {background-color:#FFE4B5;}
|
23
|
+
body
|
24
|
+
.container
|
25
|
+
h1 NeuroHMMer Results
|
26
|
+
.results
|
27
|
+
- @html_results.each do |query, hits|
|
28
|
+
section
|
29
|
+
h2 #{query.gsub(/.fa(sta)?.aligned/, '')}
|
30
|
+
- hits.each do |hit|
|
31
|
+
p.sequence
|
32
|
+
span.id
|
33
|
+
= hit[:id]
|
34
|
+
br
|
35
|
+
span.seq
|
36
|
+
== hit[:seq]
|
37
|
+
hr
|
38
|
+
br
|
39
|
+
br
|
40
|
+
br
|
41
|
+
footer
|
42
|
+
p
|
43
|
+
| Please cite "Moghul I, Elphick M & Wurm Y
|
44
|
+
em
|
45
|
+
| (in prep)
|
46
|
+
| NeuroHMMer: A tool to identify neuropeptide homologs"
|
47
|
+
br
|
48
|
+
| Developed at
|
49
|
+
a href="https://wurmlab.github.io" target="_blank" Wurm Lab
|
50
|
+
| ,
|
51
|
+
a href="http://www.sbcs.qmul.ac.uk" target="_blank" QMUL
|
52
|
+
br
|
53
|
+
| This page was created by
|
54
|
+
a href="https://github.com/wurmlab/NpSearch" target="_blank" NpSearch
|
55
|
+
| v#{Neurohmmer::VERSION}
|