scbi_distributed_blast 0.0.3 → 0.0.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/History.txt CHANGED
@@ -1,3 +1,7 @@
1
+ === 0.0.4 2011-07-12
2
+
3
+ updated documentation
4
+
1
5
  === 0.0.3 2011-07-07
2
6
 
3
7
  prerelease
data/README.rdoc CHANGED
@@ -4,7 +4,7 @@
4
4
 
5
5
  == DESCRIPTION:
6
6
 
7
- scbi_distributed_blast is a simple distribution mechanism for blast+ made on top of scbi_mapreduce. With scbi_distributed_blast you can perform distributed blasts using a cluster or a set of machines of your network. It uses the version of blast+ that you have installed.
7
+ scbi_distributed_blast is a simple distribution mechanism for blast+ made on top of scbi_mapreduce. With scbi_distributed_blast you can perform distributed blasts using a cluster, a set of machines of your network or your own multi-core personal computer. It uses the same blast+ that you have already installed.
8
8
 
9
9
  == FEATURES:
10
10
 
@@ -16,15 +16,15 @@ scbi_distributed_blast is a simple distribution mechanism for blast+ made on top
16
16
 
17
17
  Once installed, scbi_distributed_blast is very easy to use. To launch it locally in your own personal computer using 8 cores, you can do:
18
18
 
19
- $> scbi_distributed_blast -s 10.0.0 -w 8 'full_blast_cmd'
19
+ $> scbi_distributed_blast -w 8 'full_blast_cmd'
20
20
 
21
21
  Where full_blast_cmd is the blast+ cmd that you would write to execute your desired blast search. Eg.:
22
22
 
23
- $> scbi_distributed_blast -s 10.0.0 -w 8 'blastn -task blastn-short -db my_db.fasta -query ~/seqs/sample.fasta -outfmt 6 -out output_file'
23
+ $> scbi_distributed_blast -w 8 'blastn -task blastn-short -db my_db.fasta -query ~/seqs/sample.fasta -outfmt 6 -out output_file'
24
24
 
25
25
  Sequences are sent in chunks of 100, but you can change this value by using the -g parameter:
26
26
 
27
- $> scbi_distributed_blast -s 10.0.0 -w 8 -g 200 'blastn -task blastn-short -db my_db.fasta -query ~/seqs/sample.fasta -outfmt 6 -out output_file'
27
+ $> scbi_distributed_blast -w 8 -g 200 'blastn -task blastn-short -db my_db.fasta -query ~/seqs/sample.fasta -outfmt 6 -out output_file'
28
28
 
29
29
  To get additional help:
30
30
 
@@ -8,6 +8,7 @@ require 'scbi_distributed_blast'
8
8
  require 'logger'
9
9
  require 'optparse'
10
10
 
11
+ # retrieve environment variables
11
12
  if ENV['SCBI_DISTRIBUTED_BLAST_INIT'] && File.exists?(ENV['SCBI_DISTRIBUTED_BLAST_INIT'])
12
13
  $INIT_FILE=File.expand_path(ENV['SCBI_DISTRIBUTED_BLAST_INIT'])
13
14
  elsif File.exists?(File.join('~','scbi_distributed_blast_init_env'))
@@ -17,8 +18,8 @@ else
17
18
  end
18
19
 
19
20
 
21
+ # parse input options
20
22
  options = {}
21
-
22
23
  optparse = OptionParser.new do |opts|
23
24
 
24
25
  # Set a banner, displayed at the top
@@ -27,11 +28,13 @@ optparse = OptionParser.new do |opts|
27
28
 
28
29
  # Define the options, and what they do
29
30
 
31
+ # server ip
30
32
  options[:server_ip] = '0.0.0.0'
31
33
  opts.on( '-s', '--server IP', 'Server ip. You can use a partial ip to select the apropriate interface' ) do |server_ip|
32
34
  options[:server_ip] = server_ip
33
35
  end
34
36
 
37
+ # server port
35
38
  options[:port] = 0 # any free port
36
39
  opts.on( '-p', '--port PORT', 'Server port. If set to 0, an arbitrary empty port will be used') do |port|
37
40
  options[:port] = port.to_i
@@ -55,17 +58,18 @@ optparse = OptionParser.new do |opts|
55
58
  end
56
59
  end
57
60
 
61
+ # chunk size
58
62
  options[:chunk_size] = 100
59
63
  opts.on( '-g', '--group_size chunk_size', 'Group sequences in chunks of size <chunk_size>' ) do |cs|
60
64
  options[:chunk_size] = cs.to_i
61
65
  end
62
66
 
67
+ # log_file
63
68
  options[:log_file] = STDOUT
64
69
  opts.on( '-l', '--log_file file', 'Define a log file. STDOUT by default' ) do |cs|
65
70
  options[:log_file] = cs
66
71
  end
67
72
 
68
-
69
73
  # This displays the help screen, all programs are
70
74
  # assumed to have this option.
71
75
  opts.on_tail( '-h', '--help', 'Display this screen' ) do
@@ -78,8 +82,10 @@ end
78
82
  # parse options and remove from ARGV
79
83
  optparse.parse!
80
84
 
85
+ # use remaining command line arguments as blast_cmd
81
86
  blast_cmd = ARGV.join(' ')
82
87
 
88
+ # enable logger
83
89
  $LOG = Logger.new(options[:log_file])
84
90
  $LOG.datetime_format = "%Y-%m-%d %H:%M:%S"
85
91
 
@@ -89,6 +95,7 @@ $LOG.info "Original Blast+ CMD: #{blast_cmd}"
89
95
  input_file = nil
90
96
  output_file = nil
91
97
 
98
+ # extract query and output files from blast_cmd
92
99
  if blast_cmd.upcase.index('-QUERY')
93
100
  params=blast_cmd.split(' -')
94
101
  params.reverse_each do |param|
@@ -109,19 +116,13 @@ if blast_cmd.upcase.index('-QUERY')
109
116
  params.delete(param)
110
117
  # break
111
118
  end
112
-
113
-
114
119
  end
115
120
 
116
121
  blast_cmd=params.join(' -')
117
122
  end
118
123
 
119
- # puts "BLASTCMD: #{blast_cmd}"
120
- # puts "Input file: #{input_file}"
121
- # puts "Output file: #{output_file}"
122
-
123
- if !input_file.nil? and File.exists?(File.expand_path(input_file))
124
-
124
+ # check if query is defined
125
+ if !input_file.nil? and File.exists?(File.expand_path(input_file))
125
126
  $LOG.info "Query input file: #{input_file}"
126
127
  else
127
128
  $LOG.error "No input file specified in blast command (-query parameter)"
@@ -12,13 +12,17 @@ class ScbiDblastManager < ScbiMapreduce::WorkManager
12
12
  # worker connection, and thus, all global variables here should be
13
13
  # class variables (starting with @@)
14
14
  def self.init_work_manager(input_file, blast_cmd, output_file)
15
+ # save blast_cmd
15
16
  @@blast_cmd=blast_cmd
17
+
18
+ # define output file
16
19
  if output_file.nil?
17
20
  @@output_file=STDOUT
18
21
  else
19
22
  @@output_file=File.open(output_file,'w')
20
23
  end
21
24
 
25
+ # open input file in fasta
22
26
  @@fqr = FastaQualFile.new(input_file)
23
27
 
24
28
  end
@@ -26,6 +30,7 @@ class ScbiDblastManager < ScbiMapreduce::WorkManager
26
30
  # end_work_manager is executed at the end, when all the process is done.
27
31
  # You can use it to close files opened in init_work_manager
28
32
  def self.end_work_manager
33
+ # close opened files
29
34
  @@fqr.close
30
35
  @@output_file.close if @@output_file!=STDOUT
31
36
  end
@@ -33,6 +38,7 @@ class ScbiDblastManager < ScbiMapreduce::WorkManager
33
38
  # worker_initial_config is used to send initial parameters to workers.
34
39
  # The method is executed once per each worker
35
40
  def worker_initial_config
41
+ # send blast_cmd to workers
36
42
  {:blast_cmd=>@@blast_cmd}
37
43
  end
38
44
 
@@ -41,6 +47,7 @@ class ScbiDblastManager < ScbiMapreduce::WorkManager
41
47
  # This method must return the work data or nil if no more data is available
42
48
  def next_work
43
49
 
50
+ # read next sequence from inputfile
44
51
  n,f = @@fqr.next_seq
45
52
 
46
53
  if n.nil?
@@ -55,8 +62,8 @@ class ScbiDblastManager < ScbiMapreduce::WorkManager
55
62
  # work_received is executed each time a worker has finished a job.
56
63
  # Here you can write results down to disk, perform some aggregated statistics, etc...
57
64
  def work_received(results)
65
+ # write results to disk
58
66
  @@output_file.puts results
59
- # write_data_to_disk(results)
60
67
  end
61
68
 
62
69
  end
@@ -7,5 +7,5 @@ require 'scbi_distributed_blast/scbi_dblast_manager.rb'
7
7
  ROOT_PATH=File.join(File.dirname(__FILE__),'scbi_distributed_blast')
8
8
 
9
9
  module ScbiDistributedBlast
10
- VERSION = '0.0.3'
10
+ VERSION = '0.0.4'
11
11
  end
metadata CHANGED
@@ -2,7 +2,7 @@
2
2
  name: scbi_distributed_blast
3
3
  version: !ruby/object:Gem::Version
4
4
  prerelease:
5
- version: 0.0.3
5
+ version: 0.0.4
6
6
  platform: ruby
7
7
  authors:
8
8
  - Dario Guerrero
@@ -10,7 +10,7 @@ autorequire:
10
10
  bindir: bin
11
11
  cert_chain: []
12
12
 
13
- date: 2011-07-08 00:00:00 Z
13
+ date: 2011-07-12 00:00:00 Z
14
14
  dependencies:
15
15
  - !ruby/object:Gem::Dependency
16
16
  name: scbi_mapreduce
@@ -56,7 +56,7 @@ dependencies:
56
56
  version: 2.8.0
57
57
  type: :development
58
58
  version_requirements: *id004
59
- description: scbi_distributed_blast is a simple distribution mechanism for blast+ made on top of scbi_mapreduce. With scbi_distributed_blast you can perform distributed blasts using a cluster or a set of machines of your network. It uses the version of blast+ that you have installed.
59
+ description: scbi_distributed_blast is a simple distribution mechanism for blast+ made on top of scbi_mapreduce. With scbi_distributed_blast you can perform distributed blasts using a cluster, a set of machines of your network or your own multi-core personal computer. It uses the same blast+ that you have already installed.
60
60
  email:
61
61
  - dariogf@gmail.com
62
62
  executables: