scbi_distributed_blast 0.0.3 → 0.0.4
Sign up to get free protection for your applications and to get access to all the features.
- data/History.txt +4 -0
- data/README.rdoc +4 -4
- data/bin/scbi_distributed_blast +11 -10
- data/lib/scbi_distributed_blast/scbi_dblast_manager.rb +8 -1
- data/lib/scbi_distributed_blast.rb +1 -1
- metadata +3 -3
data/History.txt
CHANGED
data/README.rdoc
CHANGED
@@ -4,7 +4,7 @@
|
|
4
4
|
|
5
5
|
== DESCRIPTION:
|
6
6
|
|
7
|
-
scbi_distributed_blast is a simple distribution mechanism for blast+ made on top of scbi_mapreduce. With scbi_distributed_blast you can perform distributed blasts using a cluster
|
7
|
+
scbi_distributed_blast is a simple distribution mechanism for blast+ made on top of scbi_mapreduce. With scbi_distributed_blast you can perform distributed blasts using a cluster, a set of machines of your network or your own multi-core personal computer. It uses the same blast+ that you have already installed.
|
8
8
|
|
9
9
|
== FEATURES:
|
10
10
|
|
@@ -16,15 +16,15 @@ scbi_distributed_blast is a simple distribution mechanism for blast+ made on top
|
|
16
16
|
|
17
17
|
Once installed, scbi_distributed_blast is very easy to use. To launch it locally in your own personal computer using 8 cores, you can do:
|
18
18
|
|
19
|
-
$> scbi_distributed_blast -
|
19
|
+
$> scbi_distributed_blast -w 8 'full_blast_cmd'
|
20
20
|
|
21
21
|
Where full_blast_cmd is the blast+ cmd that you would write to execute your desired blast search. Eg.:
|
22
22
|
|
23
|
-
$> scbi_distributed_blast -
|
23
|
+
$> scbi_distributed_blast -w 8 'blastn -task blastn-short -db my_db.fasta -query ~/seqs/sample.fasta -outfmt 6 -out output_file'
|
24
24
|
|
25
25
|
Sequences are sent in chunks of 100, but you can change this value by using the -g parameter:
|
26
26
|
|
27
|
-
$> scbi_distributed_blast -
|
27
|
+
$> scbi_distributed_blast -w 8 -g 200 'blastn -task blastn-short -db my_db.fasta -query ~/seqs/sample.fasta -outfmt 6 -out output_file'
|
28
28
|
|
29
29
|
To get additional help:
|
30
30
|
|
data/bin/scbi_distributed_blast
CHANGED
@@ -8,6 +8,7 @@ require 'scbi_distributed_blast'
|
|
8
8
|
require 'logger'
|
9
9
|
require 'optparse'
|
10
10
|
|
11
|
+
# retrieve environment variables
|
11
12
|
if ENV['SCBI_DISTRIBUTED_BLAST_INIT'] && File.exists?(ENV['SCBI_DISTRIBUTED_BLAST_INIT'])
|
12
13
|
$INIT_FILE=File.expand_path(ENV['SCBI_DISTRIBUTED_BLAST_INIT'])
|
13
14
|
elsif File.exists?(File.join('~','scbi_distributed_blast_init_env'))
|
@@ -17,8 +18,8 @@ else
|
|
17
18
|
end
|
18
19
|
|
19
20
|
|
21
|
+
# parse input options
|
20
22
|
options = {}
|
21
|
-
|
22
23
|
optparse = OptionParser.new do |opts|
|
23
24
|
|
24
25
|
# Set a banner, displayed at the top
|
@@ -27,11 +28,13 @@ optparse = OptionParser.new do |opts|
|
|
27
28
|
|
28
29
|
# Define the options, and what they do
|
29
30
|
|
31
|
+
# server ip
|
30
32
|
options[:server_ip] = '0.0.0.0'
|
31
33
|
opts.on( '-s', '--server IP', 'Server ip. You can use a partial ip to select the apropriate interface' ) do |server_ip|
|
32
34
|
options[:server_ip] = server_ip
|
33
35
|
end
|
34
36
|
|
37
|
+
# server port
|
35
38
|
options[:port] = 0 # any free port
|
36
39
|
opts.on( '-p', '--port PORT', 'Server port. If set to 0, an arbitrary empty port will be used') do |port|
|
37
40
|
options[:port] = port.to_i
|
@@ -55,17 +58,18 @@ optparse = OptionParser.new do |opts|
|
|
55
58
|
end
|
56
59
|
end
|
57
60
|
|
61
|
+
# chunk size
|
58
62
|
options[:chunk_size] = 100
|
59
63
|
opts.on( '-g', '--group_size chunk_size', 'Group sequences in chunks of size <chunk_size>' ) do |cs|
|
60
64
|
options[:chunk_size] = cs.to_i
|
61
65
|
end
|
62
66
|
|
67
|
+
# log_file
|
63
68
|
options[:log_file] = STDOUT
|
64
69
|
opts.on( '-l', '--log_file file', 'Define a log file. STDOUT by default' ) do |cs|
|
65
70
|
options[:log_file] = cs
|
66
71
|
end
|
67
72
|
|
68
|
-
|
69
73
|
# This displays the help screen, all programs are
|
70
74
|
# assumed to have this option.
|
71
75
|
opts.on_tail( '-h', '--help', 'Display this screen' ) do
|
@@ -78,8 +82,10 @@ end
|
|
78
82
|
# parse options and remove from ARGV
|
79
83
|
optparse.parse!
|
80
84
|
|
85
|
+
# use remaining command line arguments as blast_cmd
|
81
86
|
blast_cmd = ARGV.join(' ')
|
82
87
|
|
88
|
+
# enable logger
|
83
89
|
$LOG = Logger.new(options[:log_file])
|
84
90
|
$LOG.datetime_format = "%Y-%m-%d %H:%M:%S"
|
85
91
|
|
@@ -89,6 +95,7 @@ $LOG.info "Original Blast+ CMD: #{blast_cmd}"
|
|
89
95
|
input_file = nil
|
90
96
|
output_file = nil
|
91
97
|
|
98
|
+
# extract query and output files from blast_cmd
|
92
99
|
if blast_cmd.upcase.index('-QUERY')
|
93
100
|
params=blast_cmd.split(' -')
|
94
101
|
params.reverse_each do |param|
|
@@ -109,19 +116,13 @@ if blast_cmd.upcase.index('-QUERY')
|
|
109
116
|
params.delete(param)
|
110
117
|
# break
|
111
118
|
end
|
112
|
-
|
113
|
-
|
114
119
|
end
|
115
120
|
|
116
121
|
blast_cmd=params.join(' -')
|
117
122
|
end
|
118
123
|
|
119
|
-
#
|
120
|
-
|
121
|
-
# puts "Output file: #{output_file}"
|
122
|
-
|
123
|
-
if !input_file.nil? and File.exists?(File.expand_path(input_file))
|
124
|
-
|
124
|
+
# check if query is defined
|
125
|
+
if !input_file.nil? and File.exists?(File.expand_path(input_file))
|
125
126
|
$LOG.info "Query input file: #{input_file}"
|
126
127
|
else
|
127
128
|
$LOG.error "No input file specified in blast command (-query parameter)"
|
@@ -12,13 +12,17 @@ class ScbiDblastManager < ScbiMapreduce::WorkManager
|
|
12
12
|
# worker connection, and thus, all global variables here should be
|
13
13
|
# class variables (starting with @@)
|
14
14
|
def self.init_work_manager(input_file, blast_cmd, output_file)
|
15
|
+
# save blast_cmd
|
15
16
|
@@blast_cmd=blast_cmd
|
17
|
+
|
18
|
+
# define output file
|
16
19
|
if output_file.nil?
|
17
20
|
@@output_file=STDOUT
|
18
21
|
else
|
19
22
|
@@output_file=File.open(output_file,'w')
|
20
23
|
end
|
21
24
|
|
25
|
+
# open input file in fasta
|
22
26
|
@@fqr = FastaQualFile.new(input_file)
|
23
27
|
|
24
28
|
end
|
@@ -26,6 +30,7 @@ class ScbiDblastManager < ScbiMapreduce::WorkManager
|
|
26
30
|
# end_work_manager is executed at the end, when all the process is done.
|
27
31
|
# You can use it to close files opened in init_work_manager
|
28
32
|
def self.end_work_manager
|
33
|
+
# close opened files
|
29
34
|
@@fqr.close
|
30
35
|
@@output_file.close if @@output_file!=STDOUT
|
31
36
|
end
|
@@ -33,6 +38,7 @@ class ScbiDblastManager < ScbiMapreduce::WorkManager
|
|
33
38
|
# worker_initial_config is used to send initial parameters to workers.
|
34
39
|
# The method is executed once per each worker
|
35
40
|
def worker_initial_config
|
41
|
+
# send blast_cmd to workers
|
36
42
|
{:blast_cmd=>@@blast_cmd}
|
37
43
|
end
|
38
44
|
|
@@ -41,6 +47,7 @@ class ScbiDblastManager < ScbiMapreduce::WorkManager
|
|
41
47
|
# This method must return the work data or nil if no more data is available
|
42
48
|
def next_work
|
43
49
|
|
50
|
+
# read next sequence from inputfile
|
44
51
|
n,f = @@fqr.next_seq
|
45
52
|
|
46
53
|
if n.nil?
|
@@ -55,8 +62,8 @@ class ScbiDblastManager < ScbiMapreduce::WorkManager
|
|
55
62
|
# work_received is executed each time a worker has finished a job.
|
56
63
|
# Here you can write results down to disk, perform some aggregated statistics, etc...
|
57
64
|
def work_received(results)
|
65
|
+
# write results to disk
|
58
66
|
@@output_file.puts results
|
59
|
-
# write_data_to_disk(results)
|
60
67
|
end
|
61
68
|
|
62
69
|
end
|
metadata
CHANGED
@@ -2,7 +2,7 @@
|
|
2
2
|
name: scbi_distributed_blast
|
3
3
|
version: !ruby/object:Gem::Version
|
4
4
|
prerelease:
|
5
|
-
version: 0.0.
|
5
|
+
version: 0.0.4
|
6
6
|
platform: ruby
|
7
7
|
authors:
|
8
8
|
- Dario Guerrero
|
@@ -10,7 +10,7 @@ autorequire:
|
|
10
10
|
bindir: bin
|
11
11
|
cert_chain: []
|
12
12
|
|
13
|
-
date: 2011-07-
|
13
|
+
date: 2011-07-12 00:00:00 Z
|
14
14
|
dependencies:
|
15
15
|
- !ruby/object:Gem::Dependency
|
16
16
|
name: scbi_mapreduce
|
@@ -56,7 +56,7 @@ dependencies:
|
|
56
56
|
version: 2.8.0
|
57
57
|
type: :development
|
58
58
|
version_requirements: *id004
|
59
|
-
description: scbi_distributed_blast is a simple distribution mechanism for blast+ made on top of scbi_mapreduce. With scbi_distributed_blast you can perform distributed blasts using a cluster
|
59
|
+
description: scbi_distributed_blast is a simple distribution mechanism for blast+ made on top of scbi_mapreduce. With scbi_distributed_blast you can perform distributed blasts using a cluster, a set of machines of your network or your own multi-core personal computer. It uses the same blast+ that you have already installed.
|
60
60
|
email:
|
61
61
|
- dariogf@gmail.com
|
62
62
|
executables:
|