unipept 0.8.0 → 0.9.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 2079ea0c1b40f957d73ba24d167f5ec468f71c5e
4
- data.tar.gz: c9a508bdc4a5293d5801e8bfba74dc8c52d641a8
3
+ metadata.gz: 4b610ddbb209024a30cd8d1c8a6846a26953869f
4
+ data.tar.gz: d0d6b1577ca3e5ddc30d334ef0f5cc1500a1b482
5
5
  SHA512:
6
- metadata.gz: eff7e936edc50c79d0f9d33486b5e9666da22ba937d3989ad19e70a4a3b1b36aee106ef15e7441c6cd422fa8779980dbb7be4f5695bfbf4511566004c425c6a1
7
- data.tar.gz: c411d49dc862f28bbde1edafe74f7b2275c0b9735cea484d3fb75f43ecaf4f3d2e51141a409db7ea35b8d9864178f233986731fa1b2e64817be104aefee1afb2
6
+ metadata.gz: 5c364689109c0f29928078371edc4fd4ba7a9b60999b14537563d63784b7289322af0b2b94d8d6710bfd19508cad49bedde989a1f662ddf94ebc62b81cdc86f6
7
+ data.tar.gz: 244d132a18e970ba7d13811c0429ece4cc97bc39719336d460bac3664fe7c59dc0d361d8fadbd5cd8b5ed57cad41b62492b019b7250ec070c77e3bad4111bdd3
data/.travis.yml CHANGED
@@ -5,3 +5,5 @@ rvm:
5
5
  - 2.0.0
6
6
  - 1.9.3
7
7
  - jruby-19mode
8
+ notifications:
9
+ slack: zeuswpi:yBidVPnaP2JuP2zfa36Ew0E1
data/README.md CHANGED
@@ -14,7 +14,7 @@ To use the Unipept CLI, Ruby version 1.9.3 or higher needs to be installed. You
14
14
 
15
15
  ```
16
16
  $ ruby -v
17
- ruby 2.1.1p76 (2014-02-24 revision 45161) [x86_64-darwin12.0]
17
+ ruby 2.1.6p336 (2015-04-13 revision 50298) [x86_64-darwin14.0]
18
18
  ```
19
19
 
20
20
  More information on installing Ruby can be found at https://www.ruby-lang.org/en/installation/
@@ -23,8 +23,8 @@ The Unipept CLI is available as a *gem*. This means it can easily be installed w
23
23
 
24
24
  ```bash
25
25
  $ gem install unipept
26
- Successfully installed unipept-0.8.0
27
- Parsing documentation for unipept-0.8.0
26
+ Successfully installed unipept-0.9.0
27
+ Parsing documentation for unipept-0.9.0
28
28
  Done installing documentation for unipept after 0 seconds
29
29
  1 gem installed
30
30
  ```
@@ -33,7 +33,7 @@ After successful installation, the unipept command should be available:
33
33
 
34
34
  ```bash
35
35
  $ unipept -v
36
- 0.8.0
36
+ 0.9.0
37
37
  ```
38
38
 
39
39
  The help can be accessed by running `unipept -h`.
data/VERSION CHANGED
@@ -1 +1 @@
1
- 0.8.0
1
+ 0.9.0
@@ -4,25 +4,15 @@ module Unipept
4
4
 
5
5
  attr_reader :url
6
6
 
7
- attr_reader :message_url
8
-
9
7
  attr_reader :user_agent
10
8
 
11
9
  def initialize(args, opts, cmd)
12
10
  super
13
11
  @configuration = Unipept::Configuration.new
14
- set_configuration
15
-
16
- @url = "#{@host}/api/v1/#{cmd.name}.json"
17
- @message_url = "#{@host}/api/v1/messages.json"
18
- end
19
12
 
20
- # Sets the configurable options of the command line app:
21
- # - the host
22
- # - the user agent
23
- def set_configuration
24
13
  @host = host
25
14
  @user_agent = 'Unipept CLI - unipept ' + Unipept::VERSION
15
+ @url = "#{@host}/api/v1/#{cmd.name}.json"
26
16
  end
27
17
 
28
18
  # Returns the host. If a value is defined by both an option and the config
@@ -54,20 +44,39 @@ module Unipept
54
44
  $stdin.each_line
55
45
  end
56
46
 
57
- # Returns the default batch_size of a command.
47
+ def output_writer
48
+ @output_writer ||= OutputWriter.new(options[:output])
49
+ end
50
+
51
+ # Returns the default default_batch_size of a command.
52
+ def default_batch_size
53
+ fail NotImplementedError, 'This must be implemented in a subclass.'
54
+ end
55
+
56
+ # returns the effective batch_size of a command
58
57
  def batch_size
59
- 100
58
+ if options[:batch]
59
+ options[:batch].to_i
60
+ else
61
+ default_batch_size
62
+ end
60
63
  end
61
64
 
62
- # Constructs a request body (a Hash) for set of input strings, using the
63
- # options supplied by the user.
64
- def construct_request_body(input)
65
- names = selected_fields.empty? || selected_fields.any? { |f| f.to_s.include? 'name' }
66
- { input: input,
67
- equate_il: options[:equate] == true,
68
- extra: options[:all] == true,
69
- names: options[:all] == true && names
70
- }
65
+ # Returns a new batch_iterator based on the batch_size
66
+ def batch_iterator
67
+ Unipept::BatchIterator.new(batch_size)
68
+ end
69
+
70
+ def concurrent_requests
71
+ if options[:parallel]
72
+ options[:parallel].to_i
73
+ else
74
+ 10
75
+ end
76
+ end
77
+
78
+ def queue_size
79
+ concurrent_requests * 20
71
80
  end
72
81
 
73
82
  # Returns an array of regular expressions containing all the selected fields
@@ -80,43 +89,27 @@ module Unipept
80
89
  @formatter ||= Unipept::Formatter.new_for_format(options[:format])
81
90
  end
82
91
 
83
- # Checks if the server has a message and prints it if not empty.
84
- # We will only check this once a day and won't print anything if the quiet
85
- # option is set or if we output to a file.
86
- def print_server_message
87
- return if options[:quiet]
88
- return unless $stdout.tty?
89
- return if recently_fetched?
90
- @configuration['last_fetch_date'] = Time.now
91
- @configuration.save
92
- resp = fetch_server_message
93
- puts resp unless resp.empty?
94
- end
95
-
96
- # Fetches a message from the server and returns it
97
- def fetch_server_message
98
- Typhoeus.get(@message_url, params: { version: Unipept::VERSION }).body.chomp
99
- end
100
-
101
- # Returns true if the last check for a server message was less than a day
102
- # ago.
103
- def recently_fetched?
104
- last_fetched = @configuration['last_fetch_date']
105
- !last_fetched.nil? && (last_fetched + 60 * 60 * 24) > Time.now
106
- end
107
-
108
- # Returns a new batch_iterator based on the batch_size
109
- def batch_iterator
110
- Unipept::BatchIterator.new(batch_size)
92
+ # Constructs a request body (a Hash) for set of input strings, using the
93
+ # options supplied by the user.
94
+ def construct_request_body(input)
95
+ names = selected_fields.empty? || selected_fields.any? { |f| f.to_s.include? 'name' }
96
+ { input: input,
97
+ equate_il: options[:equate] == true,
98
+ extra: options[:all] == true,
99
+ names: options[:all] == true && names
100
+ }
111
101
  end
112
102
 
113
103
  # Runs the command
114
104
  def run
115
- print_server_message
116
- hydra = Typhoeus::Hydra.new(max_concurrency: 10)
105
+ ServerMessage.new(@host).print unless options[:quiet]
106
+ hydra = Typhoeus::Hydra.new(max_concurrency: concurrent_requests)
117
107
  batch_order = Unipept::BatchOrder.new
108
+ last_id = 0
118
109
 
119
110
  batch_iterator.iterate(input_iterator) do |input_slice, batch_id, fasta_mapper|
111
+ last_id = batch_id
112
+
120
113
  request = Typhoeus::Request.new(
121
114
  @url,
122
115
  method: :post,
@@ -131,10 +124,11 @@ module Unipept
131
124
  end
132
125
 
133
126
  hydra.queue request
134
- hydra.run if batch_id % 200 == 0
127
+ hydra.run if batch_id % queue_size == 0
135
128
  end
136
129
 
137
130
  hydra.run
131
+ batch_order.wait(last_id + 1) { output_writer.write_line formatter.footer }
138
132
  end
139
133
 
140
134
  # Saves an error to a new file in the .unipept directory in the users home
@@ -146,16 +140,6 @@ module Unipept
146
140
  $stderr.puts "API request failed! log can be found in #{path}"
147
141
  end
148
142
 
149
- # Write a string to the output defined by the command. If a file is given,
150
- # write it to the file. If not, write to stdout
151
- def write_to_output(string)
152
- if options[:output]
153
- File.open(options[:output], 'a') { |f| f.write string }
154
- else
155
- puts string
156
- end
157
- end
158
-
159
143
  private
160
144
 
161
145
  def error_file_path
@@ -166,15 +150,25 @@ module Unipept
166
150
  # Returns a block to execute.
167
151
  def handle_response(response, batch_id, fasta_mapper)
168
152
  if response.success?
169
- result = filter_result(response.response_body)
153
+ handle_success_response(response, batch_id, fasta_mapper)
154
+ else
155
+ handle_failed_response(response)
156
+ end
157
+ end
158
+
159
+ def handle_success_response(response, batch_id, fasta_mapper)
160
+ result = filter_result(response.response_body)
170
161
 
171
- lambda do
172
- unless result.empty?
173
- write_to_output formatter.header(result, fasta_mapper) if batch_id == 0
174
- write_to_output formatter.format(result, fasta_mapper)
175
- end
162
+ lambda do
163
+ unless result.empty?
164
+ output_writer.write_line formatter.header(result, fasta_mapper) if batch_id == 0
165
+ output_writer.write_line formatter.format(result, fasta_mapper, batch_id == 0)
176
166
  end
177
- elsif response.timed_out?
167
+ end
168
+ end
169
+
170
+ def handle_failed_response(response)
171
+ if response.timed_out?
178
172
  -> { save_error('request timed out, continuing anyway, but results might be incomplete') }
179
173
  elsif response.code == 0
180
174
  -> { save_error('could not get an http response, continuing anyway, but results might be incomplete' + response.return_message) }
@@ -1,7 +1,7 @@
1
1
  require_relative 'api_runner'
2
2
  module Unipept::Commands
3
3
  class Pept2lca < ApiRunner
4
- def batch_size
4
+ def default_batch_size
5
5
  if options[:all]
6
6
  100
7
7
  else
@@ -2,7 +2,7 @@ require_relative 'api_runner'
2
2
 
3
3
  module Unipept::Commands
4
4
  class Pept2prot < ApiRunner
5
- def batch_size
5
+ def default_batch_size
6
6
  if options[:all]
7
7
  5
8
8
  else
@@ -1,7 +1,7 @@
1
1
  require_relative 'api_runner'
2
2
  module Unipept::Commands
3
3
  class Pept2taxa < ApiRunner
4
- def batch_size
4
+ def default_batch_size
5
5
  if options[:all]
6
6
  5
7
7
  else
@@ -5,7 +5,7 @@ module Unipept::Commands
5
5
  SimpleBatchIterator.new
6
6
  end
7
7
 
8
- def batch_size
8
+ def default_batch_size
9
9
  fail 'NOT NEEDED FOR TAXA2LCA'
10
10
  end
11
11
  end
@@ -1,7 +1,7 @@
1
1
  require_relative 'api_runner'
2
2
  module Unipept::Commands
3
3
  class Taxonomy < ApiRunner
4
- def batch_size
4
+ def default_batch_size
5
5
  100
6
6
  end
7
7
  end
@@ -1,9 +1,11 @@
1
1
  require 'typhoeus'
2
2
 
3
- require_relative '../formatters'
4
- require_relative '../configuration'
5
3
  require_relative '../batch_order'
6
4
  require_relative '../batch_iterator'
5
+ require_relative '../configuration'
6
+ require_relative '../formatters'
7
+ require_relative '../output_writer'
8
+ require_relative '../server_message'
7
9
  require_relative '../version'
8
10
 
9
11
  require_relative 'unipept/config'
@@ -50,6 +52,8 @@ module Unipept
50
52
  flag :v, :version, 'displays the version'
51
53
  flag :q, :quiet, 'disable service messages'
52
54
  option :i, :input, 'read input from file', argument: :required
55
+ option nil, :batch, 'specify the batch size', argument: :required, hidden: true
56
+ option nil, :parallel, 'specify the number of parallel requests', argument: :required, hidden: true
53
57
  option :o, :output, 'write output to file', argument: :required
54
58
  option :f, :format, "define the output format (available: #{Unipept::Formatter.available.join ', ' }) (default: #{Unipept::Formatter.default})", argument: :required
55
59
 
data/lib/formatters.rb CHANGED
@@ -42,7 +42,7 @@ module Unipept
42
42
 
43
43
  # @return [String] The type of the current formatter
44
44
  def type
45
- ''
45
+ fail NotImplementedError, 'This must be implemented in a subclass.'
46
46
  end
47
47
 
48
48
  # Returns the header row for the given sample_data and fasta_mapper. This
@@ -58,7 +58,15 @@ module Unipept
58
58
  #
59
59
  # @return [String] The header row
60
60
  def header(_sample_data, _fasta_mapper = nil)
61
- ''
61
+ fail NotImplementedError, 'This must be implemented in a subclass.'
62
+ end
63
+
64
+ # Returns the footer row. This row is output only once at the end of the
65
+ # output
66
+ #
67
+ # @return [String] The footer row
68
+ def footer
69
+ fail NotImplementedError, 'This must be implemented in a subclass.'
62
70
  end
63
71
 
64
72
  # Converts the given input data and corresponding fasta headers to another
@@ -66,14 +74,55 @@ module Unipept
66
74
  #
67
75
  # @param [Array] data The data we wish to convert
68
76
  #
69
- # @param [Array<Array<String>>] _fasta_mapper Optional mapping between input
77
+ # @param [Array<Array<String>>] fasta_mapper Optional mapping between input
70
78
  # data and corresponding fasta header. The data is represented as a list
71
79
  # containing tuples where the first element is the fasta header and second
72
80
  # element is the input data
73
81
  #
82
+ # @param [Boolean] Is this the first output batch?
83
+ #
74
84
  # @return [String] The converted input data
75
- def format(data, _fasta_mapper = nil)
76
- data
85
+ def format(data, fasta_mapper = nil, first)
86
+ data = integrate_fasta_headers(data, fasta_mapper) if fasta_mapper
87
+ convert(data, first)
88
+ end
89
+
90
+ # Converts the given input data to another format.
91
+ #
92
+ # @param [Array] data The data we wish to convert
93
+ #
94
+ # @param [Boolean] Is this the first output batch?
95
+ #
96
+ # @return [String] The converted input data
97
+ def convert(_data, _first)
98
+ fail NotImplementedError, 'This must be implemented in a subclass.'
99
+ end
100
+
101
+ # Integrates the fasta headers into the data object
102
+ def integrate_fasta_headers(data, fasta_mapper)
103
+ data_dict = group_by_first_key(data)
104
+ data = fasta_mapper.map do |header, key|
105
+ result = data_dict[key]
106
+ unless result.nil?
107
+ result = result.map do |row|
108
+ copy = { fasta_header: header }
109
+ copy.merge(row)
110
+ end
111
+ end
112
+ result
113
+ end
114
+ data.compact.flatten(1)
115
+ end
116
+
117
+ # Groups the data by the first key of each element, for example
118
+ # [{key1: v1, key2: v2},{key1: v1, key2: v3},{key1: v4, key2: v2}]
119
+ # to {v1 => [{key1: v1, key2: v2},{key1: v1, key2: v3}], v4 => [{key1: v4, key2: v2}]}
120
+ #
121
+ # @param [Array<Hash>] data The data we wish to group
122
+ #
123
+ # @return [Hash] The input data grouped by the first key
124
+ def group_by_first_key(data)
125
+ data.group_by { |el| el.values.first.to_s }
77
126
  end
78
127
  end
79
128
 
@@ -86,20 +135,24 @@ module Unipept
86
135
  'json'
87
136
  end
88
137
 
89
- # Converts the given input data and corresponding fasta headers to JSON.
90
- # Currently ignores the fasta_mapper.
138
+ def header(_data, _fasta_mapper = nil)
139
+ '['
140
+ end
141
+
142
+ def footer
143
+ "]\n"
144
+ end
145
+
146
+ # Converts the given input data to the JSON format.
91
147
  #
92
148
  # @param [Array] data The data we wish to convert
93
149
  #
94
- # @param [Array<Array<String>>] _fasta_mapper Optional mapping between input
95
- # data and corresponding fasta header. The data is represented as a list
96
- # containing tuples where the first element is the fasta header and second
97
- # element is the input data
150
+ # @param [Boolean] Is this the first output batch?
98
151
  #
99
- # @return [String] The input data converted to the JSON format.
100
- def format(data, _fasta_mapper = nil)
101
- # TODO: add fasta header based on fasta_mapper information
102
- data.to_json
152
+ # @return [String] The converted input data in the JSON format
153
+ def convert(data, first)
154
+ output = data.map(&:to_json).join(',')
155
+ first ? output : ',' + output
103
156
  end
104
157
  end
105
158
 
@@ -133,75 +186,24 @@ module Unipept
133
186
  end
134
187
  end
135
188
 
136
- # Converts the given input data and corresponding fasta headers to the csv
137
- # format
138
- #
139
- # @param [Array] data The data we wish to convert
140
- #
141
- # @param [Array<Array<String>>] fasta_mapper Optional mapping between input
142
- # data and corresponding fasta header. The data is represented as a list
143
- # containing tuples where the first element is the fasta header and second
144
- # element is the input data
145
- #
146
- # @return [String] The converted input data into the csv format
147
- def format(data, fasta_mapper = nil)
148
- CSV.generate do |csv|
149
- if fasta_mapper
150
- format_fasta(csv, data, fasta_mapper)
151
- else
152
- format_normal(csv, data)
153
- end
154
- end
155
- end
156
-
157
- # Converts the given input data and corresponding fasta headers to the csv
158
- # format
159
- #
160
- # @param [CSV] csv object we write the csv output to
161
- #
162
- # @param [Array] data The data we wish to convert
163
- #
164
- # @return [String] The converted input data into the csv format
165
- def format_normal(csv, data)
166
- data.each do |o|
167
- csv << o.values.map { |v| v == '' ? nil : v }
168
- end
189
+ def footer
190
+ ''
169
191
  end
170
192
 
171
- # Converts the given input data and corresponding fasta headers to the csv
172
- # format
173
- #
174
- # @param [CSV] csv object we write the csv output to
193
+ # Converts the given input data to the CSV format.
175
194
  #
176
195
  # @param [Array] data The data we wish to convert
177
196
  #
178
- # @param [Array<Array<String>>] fasta_mapper Optional mapping between input
179
- # data and corresponding fasta header. The data is represented as a list
180
- # containing tuples where the first element is the fasta header and second
181
- # element is the input data
197
+ # @param [Boolean] Is this the first output batch?
182
198
  #
183
- # @return [String] The converted input data into the csv format
184
- def format_fasta(csv, data, fasta_mapper)
185
- data_dict = group_by_first_key(data)
186
- fasta_mapper.each do |fasta_header, key|
187
- next if data_dict[key].nil?
188
-
189
- data_dict[key].each do |r|
190
- csv << ([fasta_header] + r.values).map { |v| v == '' ? nil : v }
199
+ # @return [String] The converted input data in the CSV format
200
+ def convert(data, _first)
201
+ CSV.generate do |csv|
202
+ data.each do |o|
203
+ csv << o.values.map { |v| v == '' ? nil : v }
191
204
  end
192
205
  end
193
206
  end
194
-
195
- # Groups the data by the first key of each element, for example
196
- # [{key1: v1, key2: v2},{key1: v1, key2: v3},{key1: v4, key2: v2}]
197
- # to {v1 => [{key1: v1, key2: v2},{key1: v1, key2: v3}], v4 => [{key1: v4, key2: v2}]]
198
- #
199
- # @param [Array<Hash>] data The data we wish to Groups
200
- #
201
- # @return [Hash] The input data grouped by the first key
202
- def group_by_first_key(data)
203
- data.group_by { |el| el.values.first.to_s }
204
- end
205
207
  end
206
208
 
207
209
  class XMLFormatter < Formatter
@@ -215,7 +217,7 @@ module Unipept
215
217
 
216
218
  class ::Array
217
219
  def to_xml(array_name = :array, _item_name = :item)
218
- %(<#{array_name} size="#{size}">) + map { |n|n.to_xml(:item) }.join + "</#{array_name}>"
220
+ %(<#{array_name}>) + map { |n|n.to_xml(:item) }.join + "</#{array_name}>"
219
221
  end
220
222
  end
221
223
 
@@ -233,20 +235,23 @@ module Unipept
233
235
  'xml'
234
236
  end
235
237
 
236
- # Converts the given input data and corresponding fasta headers to XML.
237
- # Currently ignores the fasta_mapper.
238
+ def header(_data, _fasta_mapper = nil)
239
+ '<results>'
240
+ end
241
+
242
+ def footer
243
+ "</results>\n"
244
+ end
245
+
246
+ # Converts the given input data to the XML format.
238
247
  #
239
248
  # @param [Array] data The data we wish to convert
240
249
  #
241
- # @param [Array<Array<String>>] _fasta_mapper Optional mapping between input
242
- # data and corresponding fasta header. The data is represented as a list
243
- # containing tuples where the first element is the fasta header and second
244
- # element is the input data
250
+ # @param [Boolean] Is this the first output batch?
245
251
  #
246
- # @return [String] The input data converted to the XML format.
247
- def format(data, _fasta_mapper = nil)
248
- # TODO: add fasta header based on fasta_mapper information
249
- data.to_xml
252
+ # @return [String] The converted input data in the XML format
253
+ def convert(data, _first)
254
+ data.map { |row| '<result>' + row.to_xml + '</result>' }.join('')
250
255
  end
251
256
  end
252
257
  end
@@ -0,0 +1,13 @@
1
+ module Unipept
2
+ class OutputWriter
3
+ attr_reader :output
4
+
5
+ def initialize(file)
6
+ @output = file ? File.open(file, 'a') : $stdout
7
+ end
8
+
9
+ def write_line(line)
10
+ @output.write line
11
+ end
12
+ end
13
+ end
@@ -0,0 +1,45 @@
1
+ require 'typhoeus'
2
+
3
+ require_relative 'configuration'
4
+
5
+ module Unipept
6
+ class ServerMessage
7
+ attr_reader :message_url
8
+
9
+ attr_reader :configuration
10
+
11
+ def initialize(host)
12
+ @message_url = "#{host}/api/v1/messages.json"
13
+ @configuration = Unipept::Configuration.new
14
+ end
15
+
16
+ # Checks if the server has a message and prints it if not empty.
17
+ # We will only check this once a day and won't print anything if the quiet
18
+ # option is set or if we output to a file.
19
+ def print
20
+ return unless $stdout.tty?
21
+ return if recently_fetched?
22
+ resp = fetch_server_message
23
+ update_fetched
24
+ puts resp unless resp.empty?
25
+ end
26
+
27
+ # Fetches a message from the server and returns it
28
+ def fetch_server_message
29
+ Typhoeus.get(@message_url, params: { version: Unipept::VERSION }).body.chomp
30
+ end
31
+
32
+ # Returns true if the last check for a server message was less than a day
33
+ # ago.
34
+ def recently_fetched?
35
+ last_fetched = @configuration['last_fetch_date']
36
+ !last_fetched.nil? && (last_fetched + 60 * 60 * 24) > Time.now
37
+ end
38
+
39
+ # Updates the last checked timestamp
40
+ def update_fetched
41
+ @configuration['last_fetch_date'] = Time.now
42
+ @configuration.save
43
+ end
44
+ end
45
+ end