img_scripts 0.0.1
Sign up to get free protection for your applications and to get access to all the features.
- data/.document +5 -0
- data/Gemfile +15 -0
- data/LICENSE.txt +20 -0
- data/README.rdoc +19 -0
- data/Rakefile +45 -0
- data/VERSION +1 -0
- data/bin/img_metadata_scanner.rb +142 -0
- data/lib/img_scripts.rb +0 -0
- data/test/helper.rb +18 -0
- data/test/test_img_scripts.rb +7 -0
- metadata +116 -0
data/.document
ADDED
data/Gemfile
ADDED
@@ -0,0 +1,15 @@
|
|
1
|
+
source "http://rubygems.org"
|
2
|
+
# Add dependencies required to use your gem here.
|
3
|
+
# Example:
|
4
|
+
# gem "activesupport", ">= 2.3.5"
|
5
|
+
|
6
|
+
gem 'bio-img_metadata'
|
7
|
+
|
8
|
+
# Add dependencies to develop your gem here.
|
9
|
+
# Include everything needed to run rake, tests, features, etc.
|
10
|
+
group :development do
|
11
|
+
gem "shoulda", ">= 0"
|
12
|
+
gem "rdoc", "~> 3.12"
|
13
|
+
gem "bundler", ">= 1.0.0"
|
14
|
+
gem "jeweler", ">= 1.8.4"
|
15
|
+
end
|
data/LICENSE.txt
ADDED
@@ -0,0 +1,20 @@
|
|
1
|
+
Copyright (c) 2013 Ben J. Woodcroft
|
2
|
+
|
3
|
+
Permission is hereby granted, free of charge, to any person obtaining
|
4
|
+
a copy of this software and associated documentation files (the
|
5
|
+
"Software"), to deal in the Software without restriction, including
|
6
|
+
without limitation the rights to use, copy, modify, merge, publish,
|
7
|
+
distribute, sublicense, and/or sell copies of the Software, and to
|
8
|
+
permit persons to whom the Software is furnished to do so, subject to
|
9
|
+
the following conditions:
|
10
|
+
|
11
|
+
The above copyright notice and this permission notice shall be
|
12
|
+
included in all copies or substantial portions of the Software.
|
13
|
+
|
14
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
15
|
+
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
16
|
+
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
17
|
+
NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
|
18
|
+
LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
|
19
|
+
OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
|
20
|
+
WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
data/README.rdoc
ADDED
@@ -0,0 +1,19 @@
|
|
1
|
+
= img_scripts
|
2
|
+
|
3
|
+
Description goes here.
|
4
|
+
|
5
|
+
== Contributing to img_scripts
|
6
|
+
|
7
|
+
* Check out the latest master to make sure the feature hasn't been implemented or the bug hasn't been fixed yet.
|
8
|
+
* Check out the issue tracker to make sure someone already hasn't requested it and/or contributed it.
|
9
|
+
* Fork the project.
|
10
|
+
* Start a feature/bugfix branch.
|
11
|
+
* Commit and push until you are happy with your contribution.
|
12
|
+
* Make sure to add tests for it. This is important so I don't break it in a future version unintentionally.
|
13
|
+
* Please try not to mess with the Rakefile, version, or history. If you want to have your own version, or is otherwise necessary, that is fine, but please isolate to its own commit so I can cherry-pick around it.
|
14
|
+
|
15
|
+
== Copyright
|
16
|
+
|
17
|
+
Copyright (c) 2013 Ben J. Woodcroft. See LICENSE.txt for
|
18
|
+
further details.
|
19
|
+
|
data/Rakefile
ADDED
@@ -0,0 +1,45 @@
|
|
1
|
+
# encoding: utf-8
|
2
|
+
|
3
|
+
require 'rubygems'
|
4
|
+
require 'bundler'
|
5
|
+
begin
|
6
|
+
Bundler.setup(:default, :development)
|
7
|
+
rescue Bundler::BundlerError => e
|
8
|
+
$stderr.puts e.message
|
9
|
+
$stderr.puts "Run `bundle install` to install missing gems"
|
10
|
+
exit e.status_code
|
11
|
+
end
|
12
|
+
require 'rake'
|
13
|
+
|
14
|
+
require 'jeweler'
|
15
|
+
Jeweler::Tasks.new do |gem|
|
16
|
+
# gem is a Gem::Specification... see http://docs.rubygems.org/read/chapter/20 for more options
|
17
|
+
gem.name = "img_scripts"
|
18
|
+
gem.homepage = "http://github.com/wwood/img_scripts"
|
19
|
+
gem.license = "MIT"
|
20
|
+
gem.summary = %Q{Scripts related to the IMG (Integrated Microbial Genomes) database}
|
21
|
+
gem.description = %Q{Scripts related to the IMG (Integrated Microbial Genomes) database}
|
22
|
+
gem.email = "donttrustben near gmail.com"
|
23
|
+
gem.authors = ["Ben J. Woodcroft"]
|
24
|
+
# dependencies defined in Gemfile
|
25
|
+
end
|
26
|
+
Jeweler::RubygemsDotOrgTasks.new
|
27
|
+
|
28
|
+
require 'rake/testtask'
|
29
|
+
Rake::TestTask.new(:test) do |test|
|
30
|
+
test.libs << 'lib' << 'test'
|
31
|
+
test.pattern = 'test/**/test_*.rb'
|
32
|
+
test.verbose = true
|
33
|
+
end
|
34
|
+
|
35
|
+
task :default => :test
|
36
|
+
|
37
|
+
require 'rdoc/task'
|
38
|
+
Rake::RDocTask.new do |rdoc|
|
39
|
+
version = File.exist?('VERSION') ? File.read('VERSION') : ""
|
40
|
+
|
41
|
+
rdoc.rdoc_dir = 'rdoc'
|
42
|
+
rdoc.title = "img_scripts #{version}"
|
43
|
+
rdoc.rdoc_files.include('README*')
|
44
|
+
rdoc.rdoc_files.include('lib/**/*.rb')
|
45
|
+
end
|
data/VERSION
ADDED
@@ -0,0 +1 @@
|
|
1
|
+
0.0.1
|
@@ -0,0 +1,142 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
|
3
|
+
require 'optparse'
|
4
|
+
require 'bio-logger'
|
5
|
+
require 'bio-img_metadata'
|
6
|
+
|
7
|
+
if __FILE__ == $0 #needs to be removed if this script is distributed as part of a rubygem
|
8
|
+
SCRIPT_NAME = File.basename(__FILE__); LOG_NAME = SCRIPT_NAME.gsub('.rb','')
|
9
|
+
IMG_METADATA_FILE_ENV_VARIABLE_NAME='IMG_METADATA_FILE'
|
10
|
+
|
11
|
+
# Parse command line options into the options hash
|
12
|
+
options = {
|
13
|
+
:logger => 'stderr',
|
14
|
+
:output_fields => ['taxon_oid'],
|
15
|
+
:listing_mode => false,
|
16
|
+
}
|
17
|
+
o = OptionParser.new do |opts|
|
18
|
+
opts.banner = "
|
19
|
+
Usage: #{SCRIPT_NAME} [-i <img_metadata_file>] <key=value>
|
20
|
+
|
21
|
+
Runs through an IMG metadata file, and prints out the IMG identifiers of those genomes that match the key=value criteria. E.g. Kingdom=Archaea to grep for all archaeons.
|
22
|
+
|
23
|
+
Example:
|
24
|
+
|
25
|
+
$ img_metadata_scanner.rb Domain=Archaea --output-fields \"Genus,Species\" |head
|
26
|
+
DEBUG img_metadata_scanner: Using environment variable IMG_METADATA_FILE to define path to IMG metadata file /srv/whitlam/bio/db/img/4.0/metadata/img_metadata_4_0_FIXED.csv
|
27
|
+
INFO img_metadata_scanner: Using 1 filters.
|
28
|
+
INFO img_metadata_scanner: Found 4700 taxons in the IMG metadata file
|
29
|
+
Thermococcus gammatolerans
|
30
|
+
Methanolobus
|
31
|
+
Pyrobaculum
|
32
|
+
Methanobacterium sp.
|
33
|
+
Sulfolobus islandicus
|
34
|
+
Desulfurococcus mucosus
|
35
|
+
Haladaptatus paucihalophilus
|
36
|
+
Methanothermobacter thermautotrophicus
|
37
|
+
Methanosarcina barkeri
|
38
|
+
Methanobrevibacter smithii
|
39
|
+
|
40
|
+
\n\n"
|
41
|
+
|
42
|
+
opts.separator "\nOptions:\n\n"
|
43
|
+
opts.on("-i", "--img-metadata-file PATH", "Path to IMG metadata file [required]. This is not necessary if there is a valid environment variable #{IMG_METADATA_FILE_ENV_VARIABLE_NAME} available.") do |arg|
|
44
|
+
options[:img_metadata_file] = arg
|
45
|
+
end
|
46
|
+
opts.on("-o", "--output-fields FIELDS", "List of output fields, comma separated [default: #{options[:output_fields].join(',')}]") do |arg|
|
47
|
+
options[:output_fields] = arg.split ','
|
48
|
+
end
|
49
|
+
opts.on("-l", "--list", "Instead of filtering print a list of the fields in the output file, newline separated [default: #{options[:listing_mode]}]") do |arg|
|
50
|
+
options[:listing_mode] = true
|
51
|
+
end
|
52
|
+
|
53
|
+
# logger options
|
54
|
+
opts.separator "\nVerbosity:\n\n"
|
55
|
+
opts.on("-q", "--quiet", "Run quietly, set logging to ERROR level [default INFO]") {Bio::Log::CLI.trace('error')}
|
56
|
+
opts.on("--logger filename",String,"Log to file [default #{options[:logger]}]") { |name| options[:logger] = name}
|
57
|
+
opts.on("--trace options",String,"Set log level [default INFO]. e.g. '--trace debug' to set logging level to DEBUG"){|s| Bio::Log::CLI.trace(s)}
|
58
|
+
end; o.parse!
|
59
|
+
|
60
|
+
# Setup logging. bio-logger defaults to STDERR not STDOUT, I disagree
|
61
|
+
Bio::Log::CLI.logger(options[:logger]); log = Bio::Log::LoggerPlus.new(LOG_NAME); Bio::Log::CLI.configure(LOG_NAME)
|
62
|
+
|
63
|
+
# Read in the system-specific ENV variable for the path to the img metadata file unless a path has already been specified
|
64
|
+
if options[:img_metadata_file].nil?
|
65
|
+
if ENV[IMG_METADATA_FILE_ENV_VARIABLE_NAME].nil?
|
66
|
+
$stderr.puts "img-metadata-file not found as either a command line option or environment variable, failing."
|
67
|
+
$stderr.puts o
|
68
|
+
exit 1
|
69
|
+
else
|
70
|
+
options[:img_metadata_file] = ENV[IMG_METADATA_FILE_ENV_VARIABLE_NAME]
|
71
|
+
log.debug "Using environment variable #{IMG_METADATA_FILE_ENV_VARIABLE_NAME} to define path to IMG metadata file #{options[:img_metadata_file]}"
|
72
|
+
end
|
73
|
+
end
|
74
|
+
unless File.exist?(options[:img_metadata_file])
|
75
|
+
$stderr.puts "IMG metadata file #{options[:img_metadata_file]} not found - was it specified correctly?"
|
76
|
+
exit 2
|
77
|
+
end
|
78
|
+
|
79
|
+
|
80
|
+
|
81
|
+
# If listing mode is set, don't do any filtering, just list the variables
|
82
|
+
if options[:listing_mode]
|
83
|
+
# Read the IMG metadata
|
84
|
+
img = Bio::IMG::TaxonomyDefinitionFile.read(options[:img_metadata_file])
|
85
|
+
log.info "Found #{img.length} taxons in the IMG metadata file"
|
86
|
+
|
87
|
+
img[0].attributes.keys.each do |key|
|
88
|
+
puts key
|
89
|
+
end
|
90
|
+
|
91
|
+
exit 0
|
92
|
+
end
|
93
|
+
|
94
|
+
# Parse the key/value pairs
|
95
|
+
filter_hash = {}
|
96
|
+
unless ARGV[0].nil?
|
97
|
+
ARGV[0].split(',').each do |split|
|
98
|
+
splits2 = split.split('=')
|
99
|
+
unless splits2.length == 2
|
100
|
+
log.error "Badly parsed key/value pair '#{split}', expected exactly 1 = sign, found #{splits2.length}"
|
101
|
+
exit 1
|
102
|
+
end
|
103
|
+
|
104
|
+
key = splits2[0]
|
105
|
+
unless filter_hash[key].nil?
|
106
|
+
log.error "Duplicate filter key found: #{key}, failing"
|
107
|
+
exit 1
|
108
|
+
end
|
109
|
+
filter_hash[key] = splits2[1]
|
110
|
+
end
|
111
|
+
end
|
112
|
+
log.info "Using #{filter_hash.length} filters."
|
113
|
+
|
114
|
+
# Read the IMG metadata
|
115
|
+
img = Bio::IMG::TaxonomyDefinitionFile.read(options[:img_metadata_file])
|
116
|
+
log.info "Found #{img.length} taxons in the IMG metadata file"
|
117
|
+
|
118
|
+
# Expect that each of the filters are found in the list of headers available, otherwise filtering will be ineffectual
|
119
|
+
# Ditto for the output names
|
120
|
+
[filter_hash.keys, options[:output_fields]].flatten.each do |key|
|
121
|
+
unless img[0].attributes.keys.include?(key)
|
122
|
+
log.warn "Unable to find column named #{key} in the IMG metadata file - typo perhaps?"
|
123
|
+
exit 1
|
124
|
+
end
|
125
|
+
end
|
126
|
+
|
127
|
+
# Go through each row, printing the outputs if they pass the filter
|
128
|
+
img.each do |taxon|
|
129
|
+
passed = true
|
130
|
+
filter_hash.each do |key, value|
|
131
|
+
if taxon.attributes[key] != value
|
132
|
+
passed = false
|
133
|
+
break
|
134
|
+
end
|
135
|
+
end
|
136
|
+
|
137
|
+
if passed
|
138
|
+
puts options[:output_fields].collect{|field| taxon.attributes[field]}.join("\t")
|
139
|
+
end
|
140
|
+
end
|
141
|
+
|
142
|
+
end #end if running as a script
|
data/lib/img_scripts.rb
ADDED
File without changes
|
data/test/helper.rb
ADDED
@@ -0,0 +1,18 @@
|
|
1
|
+
require 'rubygems'
|
2
|
+
require 'bundler'
|
3
|
+
begin
|
4
|
+
Bundler.setup(:default, :development)
|
5
|
+
rescue Bundler::BundlerError => e
|
6
|
+
$stderr.puts e.message
|
7
|
+
$stderr.puts "Run `bundle install` to install missing gems"
|
8
|
+
exit e.status_code
|
9
|
+
end
|
10
|
+
require 'test/unit'
|
11
|
+
require 'shoulda'
|
12
|
+
|
13
|
+
$LOAD_PATH.unshift(File.join(File.dirname(__FILE__), '..', 'lib'))
|
14
|
+
$LOAD_PATH.unshift(File.dirname(__FILE__))
|
15
|
+
require 'img_scripts'
|
16
|
+
|
17
|
+
class Test::Unit::TestCase
|
18
|
+
end
|
metadata
ADDED
@@ -0,0 +1,116 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: img_scripts
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: 0.0.1
|
5
|
+
prerelease:
|
6
|
+
platform: ruby
|
7
|
+
authors:
|
8
|
+
- Ben J. Woodcroft
|
9
|
+
autorequire:
|
10
|
+
bindir: bin
|
11
|
+
cert_chain: []
|
12
|
+
date: 2013-01-11 00:00:00.000000000 Z
|
13
|
+
dependencies:
|
14
|
+
- !ruby/object:Gem::Dependency
|
15
|
+
name: bio-img_metadata
|
16
|
+
requirement: &77007880 !ruby/object:Gem::Requirement
|
17
|
+
none: false
|
18
|
+
requirements:
|
19
|
+
- - ! '>='
|
20
|
+
- !ruby/object:Gem::Version
|
21
|
+
version: '0'
|
22
|
+
type: :runtime
|
23
|
+
prerelease: false
|
24
|
+
version_requirements: *77007880
|
25
|
+
- !ruby/object:Gem::Dependency
|
26
|
+
name: shoulda
|
27
|
+
requirement: &77007610 !ruby/object:Gem::Requirement
|
28
|
+
none: false
|
29
|
+
requirements:
|
30
|
+
- - ! '>='
|
31
|
+
- !ruby/object:Gem::Version
|
32
|
+
version: '0'
|
33
|
+
type: :development
|
34
|
+
prerelease: false
|
35
|
+
version_requirements: *77007610
|
36
|
+
- !ruby/object:Gem::Dependency
|
37
|
+
name: rdoc
|
38
|
+
requirement: &77007320 !ruby/object:Gem::Requirement
|
39
|
+
none: false
|
40
|
+
requirements:
|
41
|
+
- - ~>
|
42
|
+
- !ruby/object:Gem::Version
|
43
|
+
version: '3.12'
|
44
|
+
type: :development
|
45
|
+
prerelease: false
|
46
|
+
version_requirements: *77007320
|
47
|
+
- !ruby/object:Gem::Dependency
|
48
|
+
name: bundler
|
49
|
+
requirement: &77006960 !ruby/object:Gem::Requirement
|
50
|
+
none: false
|
51
|
+
requirements:
|
52
|
+
- - ! '>='
|
53
|
+
- !ruby/object:Gem::Version
|
54
|
+
version: 1.0.0
|
55
|
+
type: :development
|
56
|
+
prerelease: false
|
57
|
+
version_requirements: *77006960
|
58
|
+
- !ruby/object:Gem::Dependency
|
59
|
+
name: jeweler
|
60
|
+
requirement: &77006640 !ruby/object:Gem::Requirement
|
61
|
+
none: false
|
62
|
+
requirements:
|
63
|
+
- - ! '>='
|
64
|
+
- !ruby/object:Gem::Version
|
65
|
+
version: 1.8.4
|
66
|
+
type: :development
|
67
|
+
prerelease: false
|
68
|
+
version_requirements: *77006640
|
69
|
+
description: Scripts related to the IMG (Integrated Microbial Genomes) database
|
70
|
+
email: donttrustben near gmail.com
|
71
|
+
executables:
|
72
|
+
- img_metadata_scanner.rb
|
73
|
+
extensions: []
|
74
|
+
extra_rdoc_files:
|
75
|
+
- LICENSE.txt
|
76
|
+
- README.rdoc
|
77
|
+
files:
|
78
|
+
- .document
|
79
|
+
- Gemfile
|
80
|
+
- LICENSE.txt
|
81
|
+
- README.rdoc
|
82
|
+
- Rakefile
|
83
|
+
- VERSION
|
84
|
+
- bin/img_metadata_scanner.rb
|
85
|
+
- lib/img_scripts.rb
|
86
|
+
- test/helper.rb
|
87
|
+
- test/test_img_scripts.rb
|
88
|
+
homepage: http://github.com/wwood/img_scripts
|
89
|
+
licenses:
|
90
|
+
- MIT
|
91
|
+
post_install_message:
|
92
|
+
rdoc_options: []
|
93
|
+
require_paths:
|
94
|
+
- lib
|
95
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
96
|
+
none: false
|
97
|
+
requirements:
|
98
|
+
- - ! '>='
|
99
|
+
- !ruby/object:Gem::Version
|
100
|
+
version: '0'
|
101
|
+
segments:
|
102
|
+
- 0
|
103
|
+
hash: -1064213905
|
104
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
105
|
+
none: false
|
106
|
+
requirements:
|
107
|
+
- - ! '>='
|
108
|
+
- !ruby/object:Gem::Version
|
109
|
+
version: '0'
|
110
|
+
requirements: []
|
111
|
+
rubyforge_project:
|
112
|
+
rubygems_version: 1.8.17
|
113
|
+
signing_key:
|
114
|
+
specification_version: 3
|
115
|
+
summary: Scripts related to the IMG (Integrated Microbial Genomes) database
|
116
|
+
test_files: []
|