scbi_distributed_blast 0.0.3 → 0.0.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/History.txt +4 -0
- data/README.rdoc +4 -4
- data/bin/scbi_distributed_blast +11 -10
- data/lib/scbi_distributed_blast/scbi_dblast_manager.rb +8 -1
- data/lib/scbi_distributed_blast.rb +1 -1
- metadata +3 -3
data/History.txt
CHANGED
data/README.rdoc
CHANGED
@@ -4,7 +4,7 @@
|
|
4
4
|
|
5
5
|
== DESCRIPTION:
|
6
6
|
|
7
|
-
scbi_distributed_blast is a simple distribution mechanism for blast+ made on top of scbi_mapreduce. With scbi_distributed_blast you can perform distributed blasts using a cluster
|
7
|
+
scbi_distributed_blast is a simple distribution mechanism for blast+ made on top of scbi_mapreduce. With scbi_distributed_blast you can perform distributed blasts using a cluster, a set of machines of your network or your own multi-core personal computer. It uses the same blast+ that you have already installed.
|
8
8
|
|
9
9
|
== FEATURES:
|
10
10
|
|
@@ -16,15 +16,15 @@ scbi_distributed_blast is a simple distribution mechanism for blast+ made on top
|
|
16
16
|
|
17
17
|
Once installed, scbi_distributed_blast is very easy to use. To launch it locally in your own personal computer using 8 cores, you can do:
|
18
18
|
|
19
|
-
$> scbi_distributed_blast -
|
19
|
+
$> scbi_distributed_blast -w 8 'full_blast_cmd'
|
20
20
|
|
21
21
|
Where full_blast_cmd is the blast+ cmd that you would write to execute your desired blast search. Eg.:
|
22
22
|
|
23
|
-
$> scbi_distributed_blast -
|
23
|
+
$> scbi_distributed_blast -w 8 'blastn -task blastn-short -db my_db.fasta -query ~/seqs/sample.fasta -outfmt 6 -out output_file'
|
24
24
|
|
25
25
|
Sequences are sent in chunks of 100, but you can change this value by using the -g parameter:
|
26
26
|
|
27
|
-
$> scbi_distributed_blast -
|
27
|
+
$> scbi_distributed_blast -w 8 -g 200 'blastn -task blastn-short -db my_db.fasta -query ~/seqs/sample.fasta -outfmt 6 -out output_file'
|
28
28
|
|
29
29
|
To get additional help:
|
30
30
|
|
data/bin/scbi_distributed_blast
CHANGED
@@ -8,6 +8,7 @@ require 'scbi_distributed_blast'
|
|
8
8
|
require 'logger'
|
9
9
|
require 'optparse'
|
10
10
|
|
11
|
+
# retrieve environment variables
|
11
12
|
if ENV['SCBI_DISTRIBUTED_BLAST_INIT'] && File.exists?(ENV['SCBI_DISTRIBUTED_BLAST_INIT'])
|
12
13
|
$INIT_FILE=File.expand_path(ENV['SCBI_DISTRIBUTED_BLAST_INIT'])
|
13
14
|
elsif File.exists?(File.join('~','scbi_distributed_blast_init_env'))
|
@@ -17,8 +18,8 @@ else
|
|
17
18
|
end
|
18
19
|
|
19
20
|
|
21
|
+
# parse input options
|
20
22
|
options = {}
|
21
|
-
|
22
23
|
optparse = OptionParser.new do |opts|
|
23
24
|
|
24
25
|
# Set a banner, displayed at the top
|
@@ -27,11 +28,13 @@ optparse = OptionParser.new do |opts|
|
|
27
28
|
|
28
29
|
# Define the options, and what they do
|
29
30
|
|
31
|
+
# server ip
|
30
32
|
options[:server_ip] = '0.0.0.0'
|
31
33
|
opts.on( '-s', '--server IP', 'Server ip. You can use a partial ip to select the apropriate interface' ) do |server_ip|
|
32
34
|
options[:server_ip] = server_ip
|
33
35
|
end
|
34
36
|
|
37
|
+
# server port
|
35
38
|
options[:port] = 0 # any free port
|
36
39
|
opts.on( '-p', '--port PORT', 'Server port. If set to 0, an arbitrary empty port will be used') do |port|
|
37
40
|
options[:port] = port.to_i
|
@@ -55,17 +58,18 @@ optparse = OptionParser.new do |opts|
|
|
55
58
|
end
|
56
59
|
end
|
57
60
|
|
61
|
+
# chunk size
|
58
62
|
options[:chunk_size] = 100
|
59
63
|
opts.on( '-g', '--group_size chunk_size', 'Group sequences in chunks of size <chunk_size>' ) do |cs|
|
60
64
|
options[:chunk_size] = cs.to_i
|
61
65
|
end
|
62
66
|
|
67
|
+
# log_file
|
63
68
|
options[:log_file] = STDOUT
|
64
69
|
opts.on( '-l', '--log_file file', 'Define a log file. STDOUT by default' ) do |cs|
|
65
70
|
options[:log_file] = cs
|
66
71
|
end
|
67
72
|
|
68
|
-
|
69
73
|
# This displays the help screen, all programs are
|
70
74
|
# assumed to have this option.
|
71
75
|
opts.on_tail( '-h', '--help', 'Display this screen' ) do
|
@@ -78,8 +82,10 @@ end
|
|
78
82
|
# parse options and remove from ARGV
|
79
83
|
optparse.parse!
|
80
84
|
|
85
|
+
# use remaining command line arguments as blast_cmd
|
81
86
|
blast_cmd = ARGV.join(' ')
|
82
87
|
|
88
|
+
# enable logger
|
83
89
|
$LOG = Logger.new(options[:log_file])
|
84
90
|
$LOG.datetime_format = "%Y-%m-%d %H:%M:%S"
|
85
91
|
|
@@ -89,6 +95,7 @@ $LOG.info "Original Blast+ CMD: #{blast_cmd}"
|
|
89
95
|
input_file = nil
|
90
96
|
output_file = nil
|
91
97
|
|
98
|
+
# extract query and output files from blast_cmd
|
92
99
|
if blast_cmd.upcase.index('-QUERY')
|
93
100
|
params=blast_cmd.split(' -')
|
94
101
|
params.reverse_each do |param|
|
@@ -109,19 +116,13 @@ if blast_cmd.upcase.index('-QUERY')
|
|
109
116
|
params.delete(param)
|
110
117
|
# break
|
111
118
|
end
|
112
|
-
|
113
|
-
|
114
119
|
end
|
115
120
|
|
116
121
|
blast_cmd=params.join(' -')
|
117
122
|
end
|
118
123
|
|
119
|
-
#
|
120
|
-
|
121
|
-
# puts "Output file: #{output_file}"
|
122
|
-
|
123
|
-
if !input_file.nil? and File.exists?(File.expand_path(input_file))
|
124
|
-
|
124
|
+
# check if query is defined
|
125
|
+
if !input_file.nil? and File.exists?(File.expand_path(input_file))
|
125
126
|
$LOG.info "Query input file: #{input_file}"
|
126
127
|
else
|
127
128
|
$LOG.error "No input file specified in blast command (-query parameter)"
|
@@ -12,13 +12,17 @@ class ScbiDblastManager < ScbiMapreduce::WorkManager
|
|
12
12
|
# worker connection, and thus, all global variables here should be
|
13
13
|
# class variables (starting with @@)
|
14
14
|
def self.init_work_manager(input_file, blast_cmd, output_file)
|
15
|
+
# save blast_cmd
|
15
16
|
@@blast_cmd=blast_cmd
|
17
|
+
|
18
|
+
# define output file
|
16
19
|
if output_file.nil?
|
17
20
|
@@output_file=STDOUT
|
18
21
|
else
|
19
22
|
@@output_file=File.open(output_file,'w')
|
20
23
|
end
|
21
24
|
|
25
|
+
# open input file in fasta
|
22
26
|
@@fqr = FastaQualFile.new(input_file)
|
23
27
|
|
24
28
|
end
|
@@ -26,6 +30,7 @@ class ScbiDblastManager < ScbiMapreduce::WorkManager
|
|
26
30
|
# end_work_manager is executed at the end, when all the process is done.
|
27
31
|
# You can use it to close files opened in init_work_manager
|
28
32
|
def self.end_work_manager
|
33
|
+
# close opened files
|
29
34
|
@@fqr.close
|
30
35
|
@@output_file.close if @@output_file!=STDOUT
|
31
36
|
end
|
@@ -33,6 +38,7 @@ class ScbiDblastManager < ScbiMapreduce::WorkManager
|
|
33
38
|
# worker_initial_config is used to send initial parameters to workers.
|
34
39
|
# The method is executed once per each worker
|
35
40
|
def worker_initial_config
|
41
|
+
# send blast_cmd to workers
|
36
42
|
{:blast_cmd=>@@blast_cmd}
|
37
43
|
end
|
38
44
|
|
@@ -41,6 +47,7 @@ class ScbiDblastManager < ScbiMapreduce::WorkManager
|
|
41
47
|
# This method must return the work data or nil if no more data is available
|
42
48
|
def next_work
|
43
49
|
|
50
|
+
# read next sequence from inputfile
|
44
51
|
n,f = @@fqr.next_seq
|
45
52
|
|
46
53
|
if n.nil?
|
@@ -55,8 +62,8 @@ class ScbiDblastManager < ScbiMapreduce::WorkManager
|
|
55
62
|
# work_received is executed each time a worker has finished a job.
|
56
63
|
# Here you can write results down to disk, perform some aggregated statistics, etc...
|
57
64
|
def work_received(results)
|
65
|
+
# write results to disk
|
58
66
|
@@output_file.puts results
|
59
|
-
# write_data_to_disk(results)
|
60
67
|
end
|
61
68
|
|
62
69
|
end
|
metadata
CHANGED
@@ -2,7 +2,7 @@
|
|
2
2
|
name: scbi_distributed_blast
|
3
3
|
version: !ruby/object:Gem::Version
|
4
4
|
prerelease:
|
5
|
-
version: 0.0.
|
5
|
+
version: 0.0.4
|
6
6
|
platform: ruby
|
7
7
|
authors:
|
8
8
|
- Dario Guerrero
|
@@ -10,7 +10,7 @@ autorequire:
|
|
10
10
|
bindir: bin
|
11
11
|
cert_chain: []
|
12
12
|
|
13
|
-
date: 2011-07-
|
13
|
+
date: 2011-07-12 00:00:00 Z
|
14
14
|
dependencies:
|
15
15
|
- !ruby/object:Gem::Dependency
|
16
16
|
name: scbi_mapreduce
|
@@ -56,7 +56,7 @@ dependencies:
|
|
56
56
|
version: 2.8.0
|
57
57
|
type: :development
|
58
58
|
version_requirements: *id004
|
59
|
-
description: scbi_distributed_blast is a simple distribution mechanism for blast+ made on top of scbi_mapreduce. With scbi_distributed_blast you can perform distributed blasts using a cluster
|
59
|
+
description: scbi_distributed_blast is a simple distribution mechanism for blast+ made on top of scbi_mapreduce. With scbi_distributed_blast you can perform distributed blasts using a cluster, a set of machines of your network or your own multi-core personal computer. It uses the same blast+ that you have already installed.
|
60
60
|
email:
|
61
61
|
- dariogf@gmail.com
|
62
62
|
executables:
|