unipept 0.7.1 → 0.8.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/.rubocop.yml +26 -0
- data/.travis.yml +7 -0
- data/Gemfile +8 -10
- data/Gemfile.lock +35 -21
- data/README.md +6 -4
- data/Rakefile +11 -10
- data/VERSION +1 -1
- data/bin/peptfilter +2 -44
- data/bin/prot2pept +4 -49
- data/bin/unipept +2 -197
- data/bin/uniprot +4 -53
- data/lib/batch_iterator.rb +73 -0
- data/lib/batch_order.rb +20 -0
- data/lib/commands/peptfilter.rb +118 -0
- data/lib/commands/prot2pept.rb +61 -0
- data/lib/commands/unipept/api_runner.rb +199 -0
- data/lib/commands/unipept/config.rb +29 -0
- data/lib/commands/unipept/pept2lca.rb +12 -0
- data/lib/commands/unipept/pept2prot.rb +13 -0
- data/lib/{unipept/commands → commands/unipept}/pept2taxa.rb +7 -0
- data/lib/commands/unipept/taxa2lca.rb +18 -0
- data/lib/{unipept/commands → commands/unipept}/taxonomy.rb +3 -0
- data/lib/commands/unipept.rb +226 -0
- data/lib/commands/uniprot.rb +69 -0
- data/lib/commands.rb +10 -0
- data/lib/configuration.rb +45 -0
- data/lib/formatters.rb +252 -0
- data/lib/version.rb +3 -0
- data/test/commands/test_peptfilter.rb +170 -0
- data/test/commands/test_prot2pept.rb +82 -0
- data/test/commands/test_unipept.rb +37 -0
- data/test/commands/test_uniprot.rb +136 -0
- data/test/commands/unipept/test_api_runner.rb +486 -0
- data/test/commands/unipept/test_config.rb +64 -0
- data/test/commands/unipept/test_pept2lca.rb +40 -0
- data/test/commands/unipept/test_pept2prot.rb +39 -0
- data/test/commands/unipept/test_pept2taxa.rb +39 -0
- data/test/commands/unipept/test_taxa2lca.rb +39 -0
- data/test/commands/unipept/test_taxonomy.rb +37 -0
- data/test/helper.rb +69 -23
- data/test/test_bach_order.rb +57 -0
- data/test/test_base.rb +6 -0
- data/test/test_batch_iterator.rb +87 -0
- data/test/test_configuration.rb +43 -0
- data/test/test_formatters.rb +140 -0
- data/unipept.gemspec +55 -33
- metadata +62 -40
- data/lib/unipept/batch_order.rb +0 -28
- data/lib/unipept/commands/api_runner.rb +0 -239
- data/lib/unipept/commands/pept2lca.rb +0 -6
- data/lib/unipept/commands/pept2prot.rb +0 -20
- data/lib/unipept/commands/taxa2lca.rb +0 -12
- data/lib/unipept/commands.rb +0 -7
- data/lib/unipept/configuration.rb +0 -29
- data/lib/unipept/formatters.rb +0 -135
- data/lib/unipept/version.rb +0 -3
- data/lib/unipept.rb +0 -8
- data/test/test_unipept.rb +0 -7
@@ -0,0 +1,226 @@
|
|
1
|
+
require 'typhoeus'
|
2
|
+
|
3
|
+
require_relative '../formatters'
|
4
|
+
require_relative '../configuration'
|
5
|
+
require_relative '../batch_order'
|
6
|
+
require_relative '../batch_iterator'
|
7
|
+
require_relative '../version'
|
8
|
+
|
9
|
+
require_relative 'unipept/config'
|
10
|
+
require_relative 'unipept/pept2lca'
|
11
|
+
require_relative 'unipept/pept2prot'
|
12
|
+
require_relative 'unipept/pept2taxa'
|
13
|
+
require_relative 'unipept/taxa2lca'
|
14
|
+
require_relative 'unipept/taxonomy'
|
15
|
+
|
16
|
+
module Unipept
|
17
|
+
class Commands::Unipept
|
18
|
+
def initialize
|
19
|
+
@root_command = create_root_command
|
20
|
+
add_config_command
|
21
|
+
add_pept2taxa_command
|
22
|
+
add_pept2lca_command
|
23
|
+
add_taxa2lca_command
|
24
|
+
add_pept2prot_command
|
25
|
+
add_taxonomy_command
|
26
|
+
end
|
27
|
+
|
28
|
+
def run(args)
|
29
|
+
@root_command.run(args)
|
30
|
+
end
|
31
|
+
|
32
|
+
def create_root_command
|
33
|
+
Cri::Command.new_basic_root.modify do
|
34
|
+
name 'unipept'
|
35
|
+
summary 'Command line interface to Unipept web services.'
|
36
|
+
usage 'unipept subcommand [options]'
|
37
|
+
description <<-EOS
|
38
|
+
The unipept subcommands are command line wrappers around the Unipept web services.
|
39
|
+
|
40
|
+
Subcommands that start with pept expect a list of tryptic peptides as input. Subcommands that start with tax expect a list of NCBI Taxonomy Identifiers as input. Input is passed
|
41
|
+
|
42
|
+
- as separate command line arguments
|
43
|
+
|
44
|
+
- in a text file that is passed as an argument to the -i option
|
45
|
+
|
46
|
+
- to standard input
|
47
|
+
|
48
|
+
The command will give priority to the first way the input is passed, in the order as listed above. Text files and standard input should have one tryptic peptide or one NCBI Taxonomy Identifier per line.
|
49
|
+
EOS
|
50
|
+
flag :v, :version, 'displays the version'
|
51
|
+
flag :q, :quiet, 'disable service messages'
|
52
|
+
option :i, :input, 'read input from file', argument: :required
|
53
|
+
option :o, :output, 'write output to file', argument: :required
|
54
|
+
option :f, :format, "define the output format (available: #{Unipept::Formatter.available.join ', ' }) (default: #{Unipept::Formatter.default})", argument: :required
|
55
|
+
|
56
|
+
# Configuration options
|
57
|
+
option nil, 'host', 'specify the server running the Unipept web service', argument: :required
|
58
|
+
|
59
|
+
run do |opts, _args, cmd|
|
60
|
+
if opts[:version]
|
61
|
+
puts Unipept::VERSION
|
62
|
+
else
|
63
|
+
abort cmd.help
|
64
|
+
end
|
65
|
+
end
|
66
|
+
end
|
67
|
+
end
|
68
|
+
|
69
|
+
def add_config_command
|
70
|
+
@root_command.define_command('config') do
|
71
|
+
summary 'Set configuration options.'
|
72
|
+
usage 'config option [value]'
|
73
|
+
description <<-EOS
|
74
|
+
Sets or shows the value for configuration options. All settings are stored in the .unipeptrc file in the home directory of the user.
|
75
|
+
|
76
|
+
Running the command with a value will set that value for the given option, running it without will show the current value.
|
77
|
+
|
78
|
+
These options are currently supported:
|
79
|
+
|
80
|
+
- host: Set the default host for api calls.
|
81
|
+
|
82
|
+
Example: "unipept config host http://api.unipept.ugent.be" will set the default host to the public unipept server.
|
83
|
+
EOS
|
84
|
+
|
85
|
+
runner Commands::Config
|
86
|
+
end
|
87
|
+
end
|
88
|
+
|
89
|
+
def add_pept2taxa_command
|
90
|
+
@root_command.define_command('pept2taxa') do
|
91
|
+
usage 'pept2taxa [options]'
|
92
|
+
summary 'Fetch taxa of Uniprot records that match tryptic peptides.'
|
93
|
+
description <<-EOS
|
94
|
+
For each tryptic peptide the unipept pept2taxa command retrieves from Unipept the set of taxa from all Uniprot records whose protein sequence contains an exact matches to the tryptic peptide. The command expects a list of tryptic peptides that are passed
|
95
|
+
|
96
|
+
- as separate command line arguments
|
97
|
+
|
98
|
+
- in a text file that is passed as an argument to the -i option
|
99
|
+
|
100
|
+
- to standard input
|
101
|
+
|
102
|
+
The command will give priority to the first way tryptic peptides are passed, in the order as listed above. Text files and standard input should have one tryptic peptide per line.
|
103
|
+
|
104
|
+
The unipept pept2taxa subcommand yields NCBI Taxonomy records as output.
|
105
|
+
EOS
|
106
|
+
|
107
|
+
flag :e, :equate, 'equate isoleucine (I) and leucine (L) when matching peptides'
|
108
|
+
flag :a, :all, 'report all information fields of NCBI Taxonomy records available in Unipept. Note that this may have a performance penalty.'
|
109
|
+
option :s, :select, 'select the information fields to return. Selected fields are passed as a comma separated list of field names. Multiple -s (or --select) options may be used.', argument: :required, multiple: true
|
110
|
+
|
111
|
+
runner Commands::Pept2taxa
|
112
|
+
end
|
113
|
+
end
|
114
|
+
|
115
|
+
def add_pept2lca_command
|
116
|
+
@root_command.define_command('pept2lca') do
|
117
|
+
usage 'pept2lca [options]'
|
118
|
+
summary 'Fetch taxonomic lowest common ancestor of Uniprot records that match tryptic peptides.'
|
119
|
+
description <<-EOS
|
120
|
+
For each tryptic peptide the unipept pept2lca command retrieves from Unipept the lowest common ancestor of the set of taxa from all Uniprot records whose protein sequence contains an exact matches to the tryptic peptide. The lowest common ancestor is based on the topology of the Unipept Taxonomy -- a cleaned up version of the NCBI Taxonomy -- and is itself a record from the NCBI Taxonomy. The command expects a list of tryptic peptides that are passed
|
121
|
+
|
122
|
+
- as separate command line arguments
|
123
|
+
|
124
|
+
- in a text file that is passed as an argument to the -i option
|
125
|
+
|
126
|
+
- to standard input
|
127
|
+
|
128
|
+
The command will give priority to the first way tryptic peptides are passed, in the order as listed above. Text files and standard input should have one tryptic peptide per line.
|
129
|
+
|
130
|
+
The unipept pept2lca subcommand yields an NCBI Taxonomy record as output.
|
131
|
+
EOS
|
132
|
+
|
133
|
+
flag :e, :equate, 'equate isoleucine (I) and leucine (L) when matching peptides'
|
134
|
+
flag :a, :all, 'report all information fields of NCBI Taxonomy records available in Unipept. Note that this may have a performance penalty.'
|
135
|
+
option :s, :select, 'select the information fields to return. Selected fields are passed as a comma separated list of field names. Multiple -s (or --select) options may be used.', argument: :required, multiple: true
|
136
|
+
|
137
|
+
runner Commands::Pept2lca
|
138
|
+
end
|
139
|
+
end
|
140
|
+
|
141
|
+
def add_taxa2lca_command
|
142
|
+
@root_command.define_command('taxa2lca') do
|
143
|
+
usage 'taxa2lca [options]'
|
144
|
+
summary 'Compute taxonomic lowest common ancestor for given list of taxa.'
|
145
|
+
description <<-EOS
|
146
|
+
The unipept taxa2lca command computes the lowest common ancestor of a given list of NCBI Taxonomy Identifiers. The lowest common ancestor is based on the topology of the Unipept Taxonomy -- a cleaned up version of the NCBI Taxonomy -- and is itself a record from the NCBI Taxonomy. The command expects a list of NCBI Taxonomy Identifiers that are passed
|
147
|
+
|
148
|
+
- as separate command line arguments
|
149
|
+
|
150
|
+
- in a text file that is passed as an argument to the -i option
|
151
|
+
|
152
|
+
- to standard input
|
153
|
+
|
154
|
+
The command will give priority to the first way NCBI Taxonomy Identifiers are passed, in the order as listed above. Text files and standard input should have one NCBI Taxonomy Identifier per line.
|
155
|
+
|
156
|
+
The unipept taxonomy subcommand yields NCBI Taxonomy records as output.
|
157
|
+
EOS
|
158
|
+
|
159
|
+
flag :a, :all, 'report all information fields of NCBI Taxonomy records available in Unipept. Note that this may have a performance penalty.'
|
160
|
+
option :s, :select, 'select the information fields to return. Selected fields are passed as a comma separated list of field names. Multiple -s (or --select) options may be used.', argument: :required, multiple: true
|
161
|
+
|
162
|
+
runner Commands::Taxa2lca
|
163
|
+
end
|
164
|
+
end
|
165
|
+
|
166
|
+
def add_pept2prot_command
|
167
|
+
@root_command.define_command('pept2prot') do
|
168
|
+
usage 'pept2prot [options]'
|
169
|
+
summary 'Fetch Uniprot records that match tryptic peptides.'
|
170
|
+
description <<-EOS
|
171
|
+
For each tryptic peptide the unipept pept2prot command retrieves from Unipept all Uniprot records whose protein sequence contains an exact matches to the tryptic peptide. The command expects a list of tryptic peptides that are passed
|
172
|
+
|
173
|
+
- as separate command line arguments
|
174
|
+
|
175
|
+
- in a text file that is passed as an argument to the -i option
|
176
|
+
|
177
|
+
- to standard input
|
178
|
+
|
179
|
+
The command will give priority to the first way tryptic peptides are passed, in the order as listed above. Text files and standard input should have one tryptic peptide per line.
|
180
|
+
|
181
|
+
The unipept pept2prot subcommand yields Uniprot records as output.
|
182
|
+
EOS
|
183
|
+
|
184
|
+
flag :e, :equate, 'equate isoleucine (I) and leucine (L) when matching peptides'
|
185
|
+
flag :a, :all, 'report all information fields of Uniprot records available in Unipept. Note that this may have a performance penalty.'
|
186
|
+
option :s, :select, 'select the information fields to return. Selected fields are passed as a comma separated list of field names. Multiple -s (or --select) options may be used.', argument: :required, multiple: true
|
187
|
+
|
188
|
+
runner Commands::Pept2prot
|
189
|
+
end
|
190
|
+
end
|
191
|
+
|
192
|
+
def add_taxonomy_command
|
193
|
+
@root_command.define_command('taxonomy') do
|
194
|
+
usage 'taxonomy [options]'
|
195
|
+
summary 'Fetch taxonomic information from Unipept Taxonomy.'
|
196
|
+
description <<-EOS
|
197
|
+
The unipept taxonomy command yields information from the Unipept Taxonomy records for a given list of NCBI Taxonomy Identifiers. The Unipept Taxonomy is a cleaned up version of the NCBI Taxonomy, and its records are also records of the NCBI Taxonomy. The command expects a list of NCBI Taxonomy Identifiers that are passed
|
198
|
+
|
199
|
+
- as separate command line arguments
|
200
|
+
|
201
|
+
- in a text file that is passed as an argument to the -i option
|
202
|
+
|
203
|
+
- to standard input
|
204
|
+
|
205
|
+
The command will give priority to the first way NCBI Taxonomy Identifiers are passed, in the order as listed above. Text files and standard input should have one NCBI Taxonomy Identifier per line.
|
206
|
+
|
207
|
+
The unipept taxonomy subcommand yields NCBI Taxonomy records as output.
|
208
|
+
EOS
|
209
|
+
|
210
|
+
flag :a, :all, 'report all information fields of NCBI Taxonomy records available in Unipept. Note that this may have a performance penalty.'
|
211
|
+
option :s, :select, 'select the information fields to return. Selected fields are passed as a comma separated list of field names. Multiple -s (or --select) options may be used.', argument: :required, multiple: true
|
212
|
+
|
213
|
+
runner Commands::Taxonomy
|
214
|
+
end
|
215
|
+
end
|
216
|
+
|
217
|
+
# Invokes the unipept command-line tool with the given arguments.
|
218
|
+
#
|
219
|
+
# @param [Array<String>] args An array of command-line arguments
|
220
|
+
#
|
221
|
+
# @return [void]
|
222
|
+
def self.run(args)
|
223
|
+
new.run(args)
|
224
|
+
end
|
225
|
+
end
|
226
|
+
end
|
@@ -0,0 +1,69 @@
|
|
1
|
+
require 'typhoeus'
|
2
|
+
|
3
|
+
module Unipept::Commands
|
4
|
+
class Uniprot
|
5
|
+
attr_reader :root_command
|
6
|
+
attr_reader :valid_formats
|
7
|
+
|
8
|
+
valid_formats = Set.new %w(fasta txt xml rdf gff sequence)
|
9
|
+
@root_command = Cri::Command.define do
|
10
|
+
name 'uniprot'
|
11
|
+
summary 'Command line interface to Uniprot web services.'
|
12
|
+
usage 'uniprot [options]'
|
13
|
+
description <<-EOS
|
14
|
+
The uniprot command is a command line wrapper around the Uniprot web services. The command expects a list of Uniprot Accession Numbers that are passed
|
15
|
+
|
16
|
+
- as separate command line arguments
|
17
|
+
|
18
|
+
- to standard input
|
19
|
+
|
20
|
+
The command will give priority to the first way Uniprot Accession Numbers are passed, in the order as listed above. The standard input should have one Uniprot Accession Number per line.
|
21
|
+
|
22
|
+
The uniprot command yields just the protein sequences as a default, but can return several formats.
|
23
|
+
EOS
|
24
|
+
required :f, :format, 'specify output format (available: ' + valid_formats.to_a.join(', ') + ') (default: sequence)'
|
25
|
+
flag :h, :help, 'show help for this command' do |_value, cmd|
|
26
|
+
puts cmd.help
|
27
|
+
exit 0
|
28
|
+
end
|
29
|
+
run do |opts, args, _cmd|
|
30
|
+
format = opts.fetch(:format, 'sequence')
|
31
|
+
unless valid_formats.include? format
|
32
|
+
$stderr.puts format + ' is not a valid output format. Available formats are: ' + valid_formats.to_a.join(', ')
|
33
|
+
exit 1
|
34
|
+
end
|
35
|
+
iterator = args.empty? ? $stdin.each_line : args
|
36
|
+
iterator.each do |accession|
|
37
|
+
puts Uniprot.get_uniprot_entry(accession.chomp, format)
|
38
|
+
end
|
39
|
+
end
|
40
|
+
end
|
41
|
+
|
42
|
+
# Invokes the uniprot command-line tool with the given arguments.
|
43
|
+
#
|
44
|
+
# @param [Array<String>] args An array of command-line arguments
|
45
|
+
#
|
46
|
+
# @return [void]
|
47
|
+
def self.run(args)
|
48
|
+
@root_command.run(args)
|
49
|
+
end
|
50
|
+
|
51
|
+
# Fetches a Uniprot record from the uniprot website with the given accession
|
52
|
+
# number in the requested format.
|
53
|
+
#
|
54
|
+
# @param [String] accession The accession number of the record to fetch
|
55
|
+
#
|
56
|
+
# @param [String] format The format of of the record. If the format is 'sequence', the sequence will be returned in as a single line
|
57
|
+
#
|
58
|
+
# @return [String] The requested Uniprot record in the requested format
|
59
|
+
def self.get_uniprot_entry(accession, format)
|
60
|
+
if format == 'sequence'
|
61
|
+
get_uniprot_entry(accession, 'fasta').lines.map(&:chomp)[1..-1].join('')
|
62
|
+
else
|
63
|
+
# other format has been specified, just download and output
|
64
|
+
resp = Typhoeus.get("http://www.uniprot.org/uniprot/#{accession}.#{format}")
|
65
|
+
resp.response_body if resp.success?
|
66
|
+
end
|
67
|
+
end
|
68
|
+
end
|
69
|
+
end
|
data/lib/commands.rb
ADDED
@@ -0,0 +1,45 @@
|
|
1
|
+
require 'yaml'
|
2
|
+
|
3
|
+
module Unipept
|
4
|
+
class Configuration
|
5
|
+
attr_reader :config
|
6
|
+
attr_reader :file_name
|
7
|
+
|
8
|
+
# Creates a new config object, based on a given YAML file. If no filename
|
9
|
+
# given, '.unipeptrc' in the home dir of the user will be used.
|
10
|
+
#
|
11
|
+
# If the file doesn't exist, an empty config will be loaded.
|
12
|
+
#
|
13
|
+
# @param [String] file An optional file name of the YAML file to create the
|
14
|
+
# config from
|
15
|
+
def initialize(file = nil)
|
16
|
+
@file_name = file ? file : File.join(Dir.home, '.unipeptrc')
|
17
|
+
if !File.exist? file_name
|
18
|
+
@config = {}
|
19
|
+
else
|
20
|
+
@config = YAML.load_file file_name
|
21
|
+
end
|
22
|
+
end
|
23
|
+
|
24
|
+
# Saves the config to disk. If the file doesn't exist yet, a new one will be
|
25
|
+
# created
|
26
|
+
def save
|
27
|
+
File.open(file_name, 'w') { |f| f.write config.to_yaml }
|
28
|
+
end
|
29
|
+
|
30
|
+
# Deletes a key
|
31
|
+
def delete(key)
|
32
|
+
config.delete(key)
|
33
|
+
end
|
34
|
+
|
35
|
+
# forwards [] to the internal config hash
|
36
|
+
def [](*args)
|
37
|
+
config.[](*args)
|
38
|
+
end
|
39
|
+
|
40
|
+
# forwards =[] to the internal config hash
|
41
|
+
def []=(*args)
|
42
|
+
config.[]=(*args)
|
43
|
+
end
|
44
|
+
end
|
45
|
+
end
|
data/lib/formatters.rb
ADDED
@@ -0,0 +1,252 @@
|
|
1
|
+
require 'json'
|
2
|
+
|
3
|
+
module Unipept
|
4
|
+
class Formatter
|
5
|
+
# The Hash of available formatters
|
6
|
+
#
|
7
|
+
# @return [Hash] A hash of the available formatters
|
8
|
+
def self.formatters
|
9
|
+
@@formatters ||= {}
|
10
|
+
end
|
11
|
+
|
12
|
+
# Returns a new formatter of the given format. If the given format is not available, the
|
13
|
+
# default formatter is returned
|
14
|
+
#
|
15
|
+
# @param [String] format The type of the formatter we want
|
16
|
+
#
|
17
|
+
# @return [Formatter] The requested formatter
|
18
|
+
def self.new_for_format(format)
|
19
|
+
formatters[format].new
|
20
|
+
rescue
|
21
|
+
formatters[default].new
|
22
|
+
end
|
23
|
+
|
24
|
+
# Adds a new formatter to the list of available formats
|
25
|
+
#
|
26
|
+
# @param [Symbol] format The type of the format we want to register
|
27
|
+
def self.register(format)
|
28
|
+
formatters[format.to_s] = self
|
29
|
+
end
|
30
|
+
|
31
|
+
# Returns a list of the available formatters
|
32
|
+
#
|
33
|
+
# @return [Array<String>] The list of available formatters
|
34
|
+
def self.available
|
35
|
+
formatters.keys
|
36
|
+
end
|
37
|
+
|
38
|
+
# @return [String] The type of the default formatter: csv
|
39
|
+
def self.default
|
40
|
+
'csv'
|
41
|
+
end
|
42
|
+
|
43
|
+
# @return [String] The type of the current formatter
|
44
|
+
def type
|
45
|
+
''
|
46
|
+
end
|
47
|
+
|
48
|
+
# Returns the header row for the given sample_data and fasta_mapper. This
|
49
|
+
# row is output only once at the beginning of the output
|
50
|
+
#
|
51
|
+
# @param [Object] _sample_data The data that we will output after this
|
52
|
+
# header. Can be used to extract the keys.
|
53
|
+
#
|
54
|
+
# @param [Array<Array<String>>] _fasta_mapper Optional mapping between input
|
55
|
+
# data and corresponding fasta header. The data is represented as a list
|
56
|
+
# containing tuples where the first element is the fasta header and second
|
57
|
+
# element is the input data
|
58
|
+
#
|
59
|
+
# @return [String] The header row
|
60
|
+
def header(_sample_data, _fasta_mapper = nil)
|
61
|
+
''
|
62
|
+
end
|
63
|
+
|
64
|
+
# Converts the given input data and corresponding fasta headers to another
|
65
|
+
# format.
|
66
|
+
#
|
67
|
+
# @param [Array] data The data we wish to convert
|
68
|
+
#
|
69
|
+
# @param [Array<Array<String>>] _fasta_mapper Optional mapping between input
|
70
|
+
# data and corresponding fasta header. The data is represented as a list
|
71
|
+
# containing tuples where the first element is the fasta header and second
|
72
|
+
# element is the input data
|
73
|
+
#
|
74
|
+
# @return [String] The converted input data
|
75
|
+
def format(data, _fasta_mapper = nil)
|
76
|
+
data
|
77
|
+
end
|
78
|
+
end
|
79
|
+
|
80
|
+
class JSONFormatter < Formatter
|
81
|
+
require 'json'
|
82
|
+
register :json
|
83
|
+
|
84
|
+
# @return [String] The type of the current formatter: json
|
85
|
+
def type
|
86
|
+
'json'
|
87
|
+
end
|
88
|
+
|
89
|
+
# Converts the given input data and corresponding fasta headers to JSON.
|
90
|
+
# Currently ignores the fasta_mapper.
|
91
|
+
#
|
92
|
+
# @param [Array] data The data we wish to convert
|
93
|
+
#
|
94
|
+
# @param [Array<Array<String>>] _fasta_mapper Optional mapping between input
|
95
|
+
# data and corresponding fasta header. The data is represented as a list
|
96
|
+
# containing tuples where the first element is the fasta header and second
|
97
|
+
# element is the input data
|
98
|
+
#
|
99
|
+
# @return [String] The input data converted to the JSON format.
|
100
|
+
def format(data, _fasta_mapper = nil)
|
101
|
+
# TODO: add fasta header based on fasta_mapper information
|
102
|
+
data.to_json
|
103
|
+
end
|
104
|
+
end
|
105
|
+
|
106
|
+
class CSVFormatter < Formatter
|
107
|
+
require 'csv'
|
108
|
+
register :csv
|
109
|
+
|
110
|
+
# @return [String] The type of the current formatter: csv
|
111
|
+
def type
|
112
|
+
'csv'
|
113
|
+
end
|
114
|
+
|
115
|
+
# Returns the header row for the given data and fasta_mapper. This row
|
116
|
+
# contains all the keys of the first element of the data, preceded by
|
117
|
+
# 'fasta_header' if a fasta_mapper is given.
|
118
|
+
#
|
119
|
+
# @param [Array] data The data that we will use to extract the keys from.
|
120
|
+
#
|
121
|
+
# @param [Array<Array<String>>] fasta_mapper Optional mapping between input
|
122
|
+
# data and corresponding fasta header. The data is represented as a list
|
123
|
+
# containing tuples where the first element is the fasta header and second
|
124
|
+
# element is the input data If a fasta_mapper is given, the output will be
|
125
|
+
# preceded with 'fasta_header'.
|
126
|
+
#
|
127
|
+
# @return [String] The header row
|
128
|
+
def header(data, fasta_mapper = nil)
|
129
|
+
CSV.generate do |csv|
|
130
|
+
first = data.first
|
131
|
+
keys = fasta_mapper ? ['fasta_header'] : []
|
132
|
+
csv << (keys + first.keys).map(&:to_s) if first
|
133
|
+
end
|
134
|
+
end
|
135
|
+
|
136
|
+
# Converts the given input data and corresponding fasta headers to the csv
|
137
|
+
# format
|
138
|
+
#
|
139
|
+
# @param [Array] data The data we wish to convert
|
140
|
+
#
|
141
|
+
# @param [Array<Array<String>>] fasta_mapper Optional mapping between input
|
142
|
+
# data and corresponding fasta header. The data is represented as a list
|
143
|
+
# containing tuples where the first element is the fasta header and second
|
144
|
+
# element is the input data
|
145
|
+
#
|
146
|
+
# @return [String] The converted input data into the csv format
|
147
|
+
def format(data, fasta_mapper = nil)
|
148
|
+
CSV.generate do |csv|
|
149
|
+
if fasta_mapper
|
150
|
+
format_fasta(csv, data, fasta_mapper)
|
151
|
+
else
|
152
|
+
format_normal(csv, data)
|
153
|
+
end
|
154
|
+
end
|
155
|
+
end
|
156
|
+
|
157
|
+
# Converts the given input data and corresponding fasta headers to the csv
|
158
|
+
# format
|
159
|
+
#
|
160
|
+
# @param [CSV] csv object we write the csv output to
|
161
|
+
#
|
162
|
+
# @param [Array] data The data we wish to convert
|
163
|
+
#
|
164
|
+
# @return [String] The converted input data into the csv format
|
165
|
+
def format_normal(csv, data)
|
166
|
+
data.each do |o|
|
167
|
+
csv << o.values.map { |v| v == '' ? nil : v }
|
168
|
+
end
|
169
|
+
end
|
170
|
+
|
171
|
+
# Converts the given input data and corresponding fasta headers to the csv
|
172
|
+
# format
|
173
|
+
#
|
174
|
+
# @param [CSV] csv object we write the csv output to
|
175
|
+
#
|
176
|
+
# @param [Array] data The data we wish to convert
|
177
|
+
#
|
178
|
+
# @param [Array<Array<String>>] fasta_mapper Optional mapping between input
|
179
|
+
# data and corresponding fasta header. The data is represented as a list
|
180
|
+
# containing tuples where the first element is the fasta header and second
|
181
|
+
# element is the input data
|
182
|
+
#
|
183
|
+
# @return [String] The converted input data into the csv format
|
184
|
+
def format_fasta(csv, data, fasta_mapper)
|
185
|
+
data_dict = group_by_first_key(data)
|
186
|
+
fasta_mapper.each do |fasta_header, key|
|
187
|
+
next if data_dict[key].nil?
|
188
|
+
|
189
|
+
data_dict[key].each do |r|
|
190
|
+
csv << ([fasta_header] + r.values).map { |v| v == '' ? nil : v }
|
191
|
+
end
|
192
|
+
end
|
193
|
+
end
|
194
|
+
|
195
|
+
# Groups the data by the first key of each element, for example
|
196
|
+
# [{key1: v1, key2: v2},{key1: v1, key2: v3},{key1: v4, key2: v2}]
|
197
|
+
# to {v1 => [{key1: v1, key2: v2},{key1: v1, key2: v3}], v4 => [{key1: v4, key2: v2}]]
|
198
|
+
#
|
199
|
+
# @param [Array<Hash>] data The data we wish to Groups
|
200
|
+
#
|
201
|
+
# @return [Hash] The input data grouped by the first key
|
202
|
+
def group_by_first_key(data)
|
203
|
+
data.group_by { |el| el.values.first.to_s }
|
204
|
+
end
|
205
|
+
end
|
206
|
+
|
207
|
+
class XMLFormatter < Formatter
|
208
|
+
# Monkey patch (do as to_xml, but saner)
|
209
|
+
|
210
|
+
class ::Object
|
211
|
+
def to_xml(name = nil)
|
212
|
+
name ? %(<#{name}>#{self}</#{name}>) : to_s
|
213
|
+
end
|
214
|
+
end
|
215
|
+
|
216
|
+
class ::Array
|
217
|
+
def to_xml(array_name = :array, _item_name = :item)
|
218
|
+
%(<#{array_name} size="#{size}">) + map { |n|n.to_xml(:item) }.join + "</#{array_name}>"
|
219
|
+
end
|
220
|
+
end
|
221
|
+
|
222
|
+
class ::Hash
|
223
|
+
def to_xml(name = nil)
|
224
|
+
data = to_a.map { |k, v|v.to_xml(k) }.join
|
225
|
+
name ? "<#{name}>#{data}</#{name}>" : data
|
226
|
+
end
|
227
|
+
end
|
228
|
+
|
229
|
+
register :xml
|
230
|
+
|
231
|
+
# @return [String] The type of the current formatter: xml
|
232
|
+
def type
|
233
|
+
'xml'
|
234
|
+
end
|
235
|
+
|
236
|
+
# Converts the given input data and corresponding fasta headers to XML.
|
237
|
+
# Currently ignores the fasta_mapper.
|
238
|
+
#
|
239
|
+
# @param [Array] data The data we wish to convert
|
240
|
+
#
|
241
|
+
# @param [Array<Array<String>>] _fasta_mapper Optional mapping between input
|
242
|
+
# data and corresponding fasta header. The data is represented as a list
|
243
|
+
# containing tuples where the first element is the fasta header and second
|
244
|
+
# element is the input data
|
245
|
+
#
|
246
|
+
# @return [String] The input data converted to the XML format.
|
247
|
+
def format(data, _fasta_mapper = nil)
|
248
|
+
# TODO: add fasta header based on fasta_mapper information
|
249
|
+
data.to_xml
|
250
|
+
end
|
251
|
+
end
|
252
|
+
end
|
data/lib/version.rb
ADDED