unipept 0.8.0 → 0.9.0

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 2079ea0c1b40f957d73ba24d167f5ec468f71c5e
4
- data.tar.gz: c9a508bdc4a5293d5801e8bfba74dc8c52d641a8
3
+ metadata.gz: 4b610ddbb209024a30cd8d1c8a6846a26953869f
4
+ data.tar.gz: d0d6b1577ca3e5ddc30d334ef0f5cc1500a1b482
5
5
  SHA512:
6
- metadata.gz: eff7e936edc50c79d0f9d33486b5e9666da22ba937d3989ad19e70a4a3b1b36aee106ef15e7441c6cd422fa8779980dbb7be4f5695bfbf4511566004c425c6a1
7
- data.tar.gz: c411d49dc862f28bbde1edafe74f7b2275c0b9735cea484d3fb75f43ecaf4f3d2e51141a409db7ea35b8d9864178f233986731fa1b2e64817be104aefee1afb2
6
+ metadata.gz: 5c364689109c0f29928078371edc4fd4ba7a9b60999b14537563d63784b7289322af0b2b94d8d6710bfd19508cad49bedde989a1f662ddf94ebc62b81cdc86f6
7
+ data.tar.gz: 244d132a18e970ba7d13811c0429ece4cc97bc39719336d460bac3664fe7c59dc0d361d8fadbd5cd8b5ed57cad41b62492b019b7250ec070c77e3bad4111bdd3
data/.travis.yml CHANGED
@@ -5,3 +5,5 @@ rvm:
5
5
  - 2.0.0
6
6
  - 1.9.3
7
7
  - jruby-19mode
8
+ notifications:
9
+ slack: zeuswpi:yBidVPnaP2JuP2zfa36Ew0E1
data/README.md CHANGED
@@ -14,7 +14,7 @@ To use the Unipept CLI, Ruby version 1.9.3 or higher needs to be installed. You
14
14
 
15
15
  ```
16
16
  $ ruby -v
17
- ruby 2.1.1p76 (2014-02-24 revision 45161) [x86_64-darwin12.0]
17
+ ruby 2.1.6p336 (2015-04-13 revision 50298) [x86_64-darwin14.0]
18
18
  ```
19
19
 
20
20
  More information on installing Ruby can be found at https://www.ruby-lang.org/en/installation/
@@ -23,8 +23,8 @@ The Unipept CLI is available as a *gem*. This means it can easily be installed w
23
23
 
24
24
  ```bash
25
25
  $ gem install unipept
26
- Successfully installed unipept-0.8.0
27
- Parsing documentation for unipept-0.8.0
26
+ Successfully installed unipept-0.9.0
27
+ Parsing documentation for unipept-0.9.0
28
28
  Done installing documentation for unipept after 0 seconds
29
29
  1 gem installed
30
30
  ```
@@ -33,7 +33,7 @@ After successful installation, the unipept command should be available:
33
33
 
34
34
  ```bash
35
35
  $ unipept -v
36
- 0.8.0
36
+ 0.9.0
37
37
  ```
38
38
 
39
39
  The help can be accessed by running `unipept -h`.
data/VERSION CHANGED
@@ -1 +1 @@
1
- 0.8.0
1
+ 0.9.0
@@ -4,25 +4,15 @@ module Unipept
4
4
 
5
5
  attr_reader :url
6
6
 
7
- attr_reader :message_url
8
-
9
7
  attr_reader :user_agent
10
8
 
11
9
  def initialize(args, opts, cmd)
12
10
  super
13
11
  @configuration = Unipept::Configuration.new
14
- set_configuration
15
-
16
- @url = "#{@host}/api/v1/#{cmd.name}.json"
17
- @message_url = "#{@host}/api/v1/messages.json"
18
- end
19
12
 
20
- # Sets the configurable options of the command line app:
21
- # - the host
22
- # - the user agent
23
- def set_configuration
24
13
  @host = host
25
14
  @user_agent = 'Unipept CLI - unipept ' + Unipept::VERSION
15
+ @url = "#{@host}/api/v1/#{cmd.name}.json"
26
16
  end
27
17
 
28
18
  # Returns the host. If a value is defined by both an option and the config
@@ -54,20 +44,39 @@ module Unipept
54
44
  $stdin.each_line
55
45
  end
56
46
 
57
- # Returns the default batch_size of a command.
47
+ def output_writer
48
+ @output_writer ||= OutputWriter.new(options[:output])
49
+ end
50
+
51
+ # Returns the default default_batch_size of a command.
52
+ def default_batch_size
53
+ fail NotImplementedError, 'This must be implemented in a subclass.'
54
+ end
55
+
56
+ # returns the effective batch_size of a command
58
57
  def batch_size
59
- 100
58
+ if options[:batch]
59
+ options[:batch].to_i
60
+ else
61
+ default_batch_size
62
+ end
60
63
  end
61
64
 
62
- # Constructs a request body (a Hash) for set of input strings, using the
63
- # options supplied by the user.
64
- def construct_request_body(input)
65
- names = selected_fields.empty? || selected_fields.any? { |f| f.to_s.include? 'name' }
66
- { input: input,
67
- equate_il: options[:equate] == true,
68
- extra: options[:all] == true,
69
- names: options[:all] == true && names
70
- }
65
+ # Returns a new batch_iterator based on the batch_size
66
+ def batch_iterator
67
+ Unipept::BatchIterator.new(batch_size)
68
+ end
69
+
70
+ def concurrent_requests
71
+ if options[:parallel]
72
+ options[:parallel].to_i
73
+ else
74
+ 10
75
+ end
76
+ end
77
+
78
+ def queue_size
79
+ concurrent_requests * 20
71
80
  end
72
81
 
73
82
  # Returns an array of regular expressions containing all the selected fields
@@ -80,43 +89,27 @@ module Unipept
80
89
  @formatter ||= Unipept::Formatter.new_for_format(options[:format])
81
90
  end
82
91
 
83
- # Checks if the server has a message and prints it if not empty.
84
- # We will only check this once a day and won't print anything if the quiet
85
- # option is set or if we output to a file.
86
- def print_server_message
87
- return if options[:quiet]
88
- return unless $stdout.tty?
89
- return if recently_fetched?
90
- @configuration['last_fetch_date'] = Time.now
91
- @configuration.save
92
- resp = fetch_server_message
93
- puts resp unless resp.empty?
94
- end
95
-
96
- # Fetches a message from the server and returns it
97
- def fetch_server_message
98
- Typhoeus.get(@message_url, params: { version: Unipept::VERSION }).body.chomp
99
- end
100
-
101
- # Returns true if the last check for a server message was less than a day
102
- # ago.
103
- def recently_fetched?
104
- last_fetched = @configuration['last_fetch_date']
105
- !last_fetched.nil? && (last_fetched + 60 * 60 * 24) > Time.now
106
- end
107
-
108
- # Returns a new batch_iterator based on the batch_size
109
- def batch_iterator
110
- Unipept::BatchIterator.new(batch_size)
92
+ # Constructs a request body (a Hash) for set of input strings, using the
93
+ # options supplied by the user.
94
+ def construct_request_body(input)
95
+ names = selected_fields.empty? || selected_fields.any? { |f| f.to_s.include? 'name' }
96
+ { input: input,
97
+ equate_il: options[:equate] == true,
98
+ extra: options[:all] == true,
99
+ names: options[:all] == true && names
100
+ }
111
101
  end
112
102
 
113
103
  # Runs the command
114
104
  def run
115
- print_server_message
116
- hydra = Typhoeus::Hydra.new(max_concurrency: 10)
105
+ ServerMessage.new(@host).print unless options[:quiet]
106
+ hydra = Typhoeus::Hydra.new(max_concurrency: concurrent_requests)
117
107
  batch_order = Unipept::BatchOrder.new
108
+ last_id = 0
118
109
 
119
110
  batch_iterator.iterate(input_iterator) do |input_slice, batch_id, fasta_mapper|
111
+ last_id = batch_id
112
+
120
113
  request = Typhoeus::Request.new(
121
114
  @url,
122
115
  method: :post,
@@ -131,10 +124,11 @@ module Unipept
131
124
  end
132
125
 
133
126
  hydra.queue request
134
- hydra.run if batch_id % 200 == 0
127
+ hydra.run if batch_id % queue_size == 0
135
128
  end
136
129
 
137
130
  hydra.run
131
+ batch_order.wait(last_id + 1) { output_writer.write_line formatter.footer }
138
132
  end
139
133
 
140
134
  # Saves an error to a new file in the .unipept directory in the users home
@@ -146,16 +140,6 @@ module Unipept
146
140
  $stderr.puts "API request failed! log can be found in #{path}"
147
141
  end
148
142
 
149
- # Write a string to the output defined by the command. If a file is given,
150
- # write it to the file. If not, write to stdout
151
- def write_to_output(string)
152
- if options[:output]
153
- File.open(options[:output], 'a') { |f| f.write string }
154
- else
155
- puts string
156
- end
157
- end
158
-
159
143
  private
160
144
 
161
145
  def error_file_path
@@ -166,15 +150,25 @@ module Unipept
166
150
  # Returns a block to execute.
167
151
  def handle_response(response, batch_id, fasta_mapper)
168
152
  if response.success?
169
- result = filter_result(response.response_body)
153
+ handle_success_response(response, batch_id, fasta_mapper)
154
+ else
155
+ handle_failed_response(response)
156
+ end
157
+ end
158
+
159
+ def handle_success_response(response, batch_id, fasta_mapper)
160
+ result = filter_result(response.response_body)
170
161
 
171
- lambda do
172
- unless result.empty?
173
- write_to_output formatter.header(result, fasta_mapper) if batch_id == 0
174
- write_to_output formatter.format(result, fasta_mapper)
175
- end
162
+ lambda do
163
+ unless result.empty?
164
+ output_writer.write_line formatter.header(result, fasta_mapper) if batch_id == 0
165
+ output_writer.write_line formatter.format(result, fasta_mapper, batch_id == 0)
176
166
  end
177
- elsif response.timed_out?
167
+ end
168
+ end
169
+
170
+ def handle_failed_response(response)
171
+ if response.timed_out?
178
172
  -> { save_error('request timed out, continuing anyway, but results might be incomplete') }
179
173
  elsif response.code == 0
180
174
  -> { save_error('could not get an http response, continuing anyway, but results might be incomplete' + response.return_message) }
@@ -1,7 +1,7 @@
1
1
  require_relative 'api_runner'
2
2
  module Unipept::Commands
3
3
  class Pept2lca < ApiRunner
4
- def batch_size
4
+ def default_batch_size
5
5
  if options[:all]
6
6
  100
7
7
  else
@@ -2,7 +2,7 @@ require_relative 'api_runner'
2
2
 
3
3
  module Unipept::Commands
4
4
  class Pept2prot < ApiRunner
5
- def batch_size
5
+ def default_batch_size
6
6
  if options[:all]
7
7
  5
8
8
  else
@@ -1,7 +1,7 @@
1
1
  require_relative 'api_runner'
2
2
  module Unipept::Commands
3
3
  class Pept2taxa < ApiRunner
4
- def batch_size
4
+ def default_batch_size
5
5
  if options[:all]
6
6
  5
7
7
  else
@@ -5,7 +5,7 @@ module Unipept::Commands
5
5
  SimpleBatchIterator.new
6
6
  end
7
7
 
8
- def batch_size
8
+ def default_batch_size
9
9
  fail 'NOT NEEDED FOR TAXA2LCA'
10
10
  end
11
11
  end
@@ -1,7 +1,7 @@
1
1
  require_relative 'api_runner'
2
2
  module Unipept::Commands
3
3
  class Taxonomy < ApiRunner
4
- def batch_size
4
+ def default_batch_size
5
5
  100
6
6
  end
7
7
  end
@@ -1,9 +1,11 @@
1
1
  require 'typhoeus'
2
2
 
3
- require_relative '../formatters'
4
- require_relative '../configuration'
5
3
  require_relative '../batch_order'
6
4
  require_relative '../batch_iterator'
5
+ require_relative '../configuration'
6
+ require_relative '../formatters'
7
+ require_relative '../output_writer'
8
+ require_relative '../server_message'
7
9
  require_relative '../version'
8
10
 
9
11
  require_relative 'unipept/config'
@@ -50,6 +52,8 @@ module Unipept
50
52
  flag :v, :version, 'displays the version'
51
53
  flag :q, :quiet, 'disable service messages'
52
54
  option :i, :input, 'read input from file', argument: :required
55
+ option nil, :batch, 'specify the batch size', argument: :required, hidden: true
56
+ option nil, :parallel, 'specify the number of parallel requests', argument: :required, hidden: true
53
57
  option :o, :output, 'write output to file', argument: :required
54
58
  option :f, :format, "define the output format (available: #{Unipept::Formatter.available.join ', ' }) (default: #{Unipept::Formatter.default})", argument: :required
55
59
 
data/lib/formatters.rb CHANGED
@@ -42,7 +42,7 @@ module Unipept
42
42
 
43
43
  # @return [String] The type of the current formatter
44
44
  def type
45
- ''
45
+ fail NotImplementedError, 'This must be implemented in a subclass.'
46
46
  end
47
47
 
48
48
  # Returns the header row for the given sample_data and fasta_mapper. This
@@ -58,7 +58,15 @@ module Unipept
58
58
  #
59
59
  # @return [String] The header row
60
60
  def header(_sample_data, _fasta_mapper = nil)
61
- ''
61
+ fail NotImplementedError, 'This must be implemented in a subclass.'
62
+ end
63
+
64
+ # Returns the footer row. This row is output only once at the end of the
65
+ # output
66
+ #
67
+ # @return [String] The footer row
68
+ def footer
69
+ fail NotImplementedError, 'This must be implemented in a subclass.'
62
70
  end
63
71
 
64
72
  # Converts the given input data and corresponding fasta headers to another
@@ -66,14 +74,55 @@ module Unipept
66
74
  #
67
75
  # @param [Array] data The data we wish to convert
68
76
  #
69
- # @param [Array<Array<String>>] _fasta_mapper Optional mapping between input
77
+ # @param [Array<Array<String>>] fasta_mapper Optional mapping between input
70
78
  # data and corresponding fasta header. The data is represented as a list
71
79
  # containing tuples where the first element is the fasta header and second
72
80
  # element is the input data
73
81
  #
82
+ # @param [Boolean] Is this the first output batch?
83
+ #
74
84
  # @return [String] The converted input data
75
- def format(data, _fasta_mapper = nil)
76
- data
85
+ def format(data, fasta_mapper = nil, first)
86
+ data = integrate_fasta_headers(data, fasta_mapper) if fasta_mapper
87
+ convert(data, first)
88
+ end
89
+
90
+ # Converts the given input data to another format.
91
+ #
92
+ # @param [Array] data The data we wish to convert
93
+ #
94
+ # @param [Boolean] Is this the first output batch?
95
+ #
96
+ # @return [String] The converted input data
97
+ def convert(_data, _first)
98
+ fail NotImplementedError, 'This must be implemented in a subclass.'
99
+ end
100
+
101
+ # Integrates the fasta headers into the data object
102
+ def integrate_fasta_headers(data, fasta_mapper)
103
+ data_dict = group_by_first_key(data)
104
+ data = fasta_mapper.map do |header, key|
105
+ result = data_dict[key]
106
+ unless result.nil?
107
+ result = result.map do |row|
108
+ copy = { fasta_header: header }
109
+ copy.merge(row)
110
+ end
111
+ end
112
+ result
113
+ end
114
+ data.compact.flatten(1)
115
+ end
116
+
117
+ # Groups the data by the first key of each element, for example
118
+ # [{key1: v1, key2: v2},{key1: v1, key2: v3},{key1: v4, key2: v2}]
119
+ # to {v1 => [{key1: v1, key2: v2},{key1: v1, key2: v3}], v4 => [{key1: v4, key2: v2}]}
120
+ #
121
+ # @param [Array<Hash>] data The data we wish to group
122
+ #
123
+ # @return [Hash] The input data grouped by the first key
124
+ def group_by_first_key(data)
125
+ data.group_by { |el| el.values.first.to_s }
77
126
  end
78
127
  end
79
128
 
@@ -86,20 +135,24 @@ module Unipept
86
135
  'json'
87
136
  end
88
137
 
89
- # Converts the given input data and corresponding fasta headers to JSON.
90
- # Currently ignores the fasta_mapper.
138
+ def header(_data, _fasta_mapper = nil)
139
+ '['
140
+ end
141
+
142
+ def footer
143
+ "]\n"
144
+ end
145
+
146
+ # Converts the given input data to the JSON format.
91
147
  #
92
148
  # @param [Array] data The data we wish to convert
93
149
  #
94
- # @param [Array<Array<String>>] _fasta_mapper Optional mapping between input
95
- # data and corresponding fasta header. The data is represented as a list
96
- # containing tuples where the first element is the fasta header and second
97
- # element is the input data
150
+ # @param [Boolean] Is this the first output batch?
98
151
  #
99
- # @return [String] The input data converted to the JSON format.
100
- def format(data, _fasta_mapper = nil)
101
- # TODO: add fasta header based on fasta_mapper information
102
- data.to_json
152
+ # @return [String] The converted input data in the JSON format
153
+ def convert(data, first)
154
+ output = data.map(&:to_json).join(',')
155
+ first ? output : ',' + output
103
156
  end
104
157
  end
105
158
 
@@ -133,75 +186,24 @@ module Unipept
133
186
  end
134
187
  end
135
188
 
136
- # Converts the given input data and corresponding fasta headers to the csv
137
- # format
138
- #
139
- # @param [Array] data The data we wish to convert
140
- #
141
- # @param [Array<Array<String>>] fasta_mapper Optional mapping between input
142
- # data and corresponding fasta header. The data is represented as a list
143
- # containing tuples where the first element is the fasta header and second
144
- # element is the input data
145
- #
146
- # @return [String] The converted input data into the csv format
147
- def format(data, fasta_mapper = nil)
148
- CSV.generate do |csv|
149
- if fasta_mapper
150
- format_fasta(csv, data, fasta_mapper)
151
- else
152
- format_normal(csv, data)
153
- end
154
- end
155
- end
156
-
157
- # Converts the given input data and corresponding fasta headers to the csv
158
- # format
159
- #
160
- # @param [CSV] csv object we write the csv output to
161
- #
162
- # @param [Array] data The data we wish to convert
163
- #
164
- # @return [String] The converted input data into the csv format
165
- def format_normal(csv, data)
166
- data.each do |o|
167
- csv << o.values.map { |v| v == '' ? nil : v }
168
- end
189
+ def footer
190
+ ''
169
191
  end
170
192
 
171
- # Converts the given input data and corresponding fasta headers to the csv
172
- # format
173
- #
174
- # @param [CSV] csv object we write the csv output to
193
+ # Converts the given input data to the CSV format.
175
194
  #
176
195
  # @param [Array] data The data we wish to convert
177
196
  #
178
- # @param [Array<Array<String>>] fasta_mapper Optional mapping between input
179
- # data and corresponding fasta header. The data is represented as a list
180
- # containing tuples where the first element is the fasta header and second
181
- # element is the input data
197
+ # @param [Boolean] Is this the first output batch?
182
198
  #
183
- # @return [String] The converted input data into the csv format
184
- def format_fasta(csv, data, fasta_mapper)
185
- data_dict = group_by_first_key(data)
186
- fasta_mapper.each do |fasta_header, key|
187
- next if data_dict[key].nil?
188
-
189
- data_dict[key].each do |r|
190
- csv << ([fasta_header] + r.values).map { |v| v == '' ? nil : v }
199
+ # @return [String] The converted input data in the CSV format
200
+ def convert(data, _first)
201
+ CSV.generate do |csv|
202
+ data.each do |o|
203
+ csv << o.values.map { |v| v == '' ? nil : v }
191
204
  end
192
205
  end
193
206
  end
194
-
195
- # Groups the data by the first key of each element, for example
196
- # [{key1: v1, key2: v2},{key1: v1, key2: v3},{key1: v4, key2: v2}]
197
- # to {v1 => [{key1: v1, key2: v2},{key1: v1, key2: v3}], v4 => [{key1: v4, key2: v2}]]
198
- #
199
- # @param [Array<Hash>] data The data we wish to Groups
200
- #
201
- # @return [Hash] The input data grouped by the first key
202
- def group_by_first_key(data)
203
- data.group_by { |el| el.values.first.to_s }
204
- end
205
207
  end
206
208
 
207
209
  class XMLFormatter < Formatter
@@ -215,7 +217,7 @@ module Unipept
215
217
 
216
218
  class ::Array
217
219
  def to_xml(array_name = :array, _item_name = :item)
218
- %(<#{array_name} size="#{size}">) + map { |n|n.to_xml(:item) }.join + "</#{array_name}>"
220
+ %(<#{array_name}>) + map { |n|n.to_xml(:item) }.join + "</#{array_name}>"
219
221
  end
220
222
  end
221
223
 
@@ -233,20 +235,23 @@ module Unipept
233
235
  'xml'
234
236
  end
235
237
 
236
- # Converts the given input data and corresponding fasta headers to XML.
237
- # Currently ignores the fasta_mapper.
238
+ def header(_data, _fasta_mapper = nil)
239
+ '<results>'
240
+ end
241
+
242
+ def footer
243
+ "</results>\n"
244
+ end
245
+
246
+ # Converts the given input data to the XML format.
238
247
  #
239
248
  # @param [Array] data The data we wish to convert
240
249
  #
241
- # @param [Array<Array<String>>] _fasta_mapper Optional mapping between input
242
- # data and corresponding fasta header. The data is represented as a list
243
- # containing tuples where the first element is the fasta header and second
244
- # element is the input data
250
+ # @param [Boolean] Is this the first output batch?
245
251
  #
246
- # @return [String] The input data converted to the XML format.
247
- def format(data, _fasta_mapper = nil)
248
- # TODO: add fasta header based on fasta_mapper information
249
- data.to_xml
252
+ # @return [String] The converted input data in the XML format
253
+ def convert(data, _first)
254
+ data.map { |row| '<result>' + row.to_xml + '</result>' }.join('')
250
255
  end
251
256
  end
252
257
  end
@@ -0,0 +1,13 @@
1
+ module Unipept
2
+ class OutputWriter
3
+ attr_reader :output
4
+
5
+ def initialize(file)
6
+ @output = file ? File.open(file, 'a') : $stdout
7
+ end
8
+
9
+ def write_line(line)
10
+ @output.write line
11
+ end
12
+ end
13
+ end
@@ -0,0 +1,45 @@
1
+ require 'typhoeus'
2
+
3
+ require_relative 'configuration'
4
+
5
+ module Unipept
6
+ class ServerMessage
7
+ attr_reader :message_url
8
+
9
+ attr_reader :configuration
10
+
11
+ def initialize(host)
12
+ @message_url = "#{host}/api/v1/messages.json"
13
+ @configuration = Unipept::Configuration.new
14
+ end
15
+
16
+ # Checks if the server has a message and prints it if not empty.
17
+ # We will only check this once a day and won't print anything if the quiet
18
+ # option is set or if we output to a file.
19
+ def print
20
+ return unless $stdout.tty?
21
+ return if recently_fetched?
22
+ resp = fetch_server_message
23
+ update_fetched
24
+ puts resp unless resp.empty?
25
+ end
26
+
27
+ # Fetches a message from the server and returns it
28
+ def fetch_server_message
29
+ Typhoeus.get(@message_url, params: { version: Unipept::VERSION }).body.chomp
30
+ end
31
+
32
+ # Returns true if the last check for a server message was less than a day
33
+ # ago.
34
+ def recently_fetched?
35
+ last_fetched = @configuration['last_fetch_date']
36
+ !last_fetched.nil? && (last_fetched + 60 * 60 * 24) > Time.now
37
+ end
38
+
39
+ # Updates the last checked timestamp
40
+ def update_fetched
41
+ @configuration['last_fetch_date'] = Time.now
42
+ @configuration.save
43
+ end
44
+ end
45
+ end