img_scripts 0.0.1

Sign up to get free protection for your applications and to get access to all the features.
data/.document ADDED
@@ -0,0 +1,5 @@
1
+ lib/**/*.rb
2
+ bin/*
3
+ -
4
+ features/**/*.feature
5
+ LICENSE.txt
data/Gemfile ADDED
@@ -0,0 +1,15 @@
1
+ source "http://rubygems.org"
2
+ # Add dependencies required to use your gem here.
3
+ # Example:
4
+ # gem "activesupport", ">= 2.3.5"
5
+
6
+ gem 'bio-img_metadata'
7
+
8
+ # Add dependencies to develop your gem here.
9
+ # Include everything needed to run rake, tests, features, etc.
10
+ group :development do
11
+ gem "shoulda", ">= 0"
12
+ gem "rdoc", "~> 3.12"
13
+ gem "bundler", ">= 1.0.0"
14
+ gem "jeweler", ">= 1.8.4"
15
+ end
data/LICENSE.txt ADDED
@@ -0,0 +1,20 @@
1
+ Copyright (c) 2013 Ben J. Woodcroft
2
+
3
+ Permission is hereby granted, free of charge, to any person obtaining
4
+ a copy of this software and associated documentation files (the
5
+ "Software"), to deal in the Software without restriction, including
6
+ without limitation the rights to use, copy, modify, merge, publish,
7
+ distribute, sublicense, and/or sell copies of the Software, and to
8
+ permit persons to whom the Software is furnished to do so, subject to
9
+ the following conditions:
10
+
11
+ The above copyright notice and this permission notice shall be
12
+ included in all copies or substantial portions of the Software.
13
+
14
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
15
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
16
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
17
+ NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
18
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
19
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
20
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
data/README.rdoc ADDED
@@ -0,0 +1,19 @@
1
+ = img_scripts
2
+
3
+ Description goes here.
4
+
5
+ == Contributing to img_scripts
6
+
7
+ * Check out the latest master to make sure the feature hasn't been implemented or the bug hasn't been fixed yet.
8
+ * Check out the issue tracker to make sure someone already hasn't requested it and/or contributed it.
9
+ * Fork the project.
10
+ * Start a feature/bugfix branch.
11
+ * Commit and push until you are happy with your contribution.
12
+ * Make sure to add tests for it. This is important so I don't break it in a future version unintentionally.
13
+ * Please try not to mess with the Rakefile, version, or history. If you want to have your own version, or is otherwise necessary, that is fine, but please isolate to its own commit so I can cherry-pick around it.
14
+
15
+ == Copyright
16
+
17
+ Copyright (c) 2013 Ben J. Woodcroft. See LICENSE.txt for
18
+ further details.
19
+
data/Rakefile ADDED
@@ -0,0 +1,45 @@
1
+ # encoding: utf-8
2
+
3
+ require 'rubygems'
4
+ require 'bundler'
5
+ begin
6
+ Bundler.setup(:default, :development)
7
+ rescue Bundler::BundlerError => e
8
+ $stderr.puts e.message
9
+ $stderr.puts "Run `bundle install` to install missing gems"
10
+ exit e.status_code
11
+ end
12
+ require 'rake'
13
+
14
+ require 'jeweler'
15
+ Jeweler::Tasks.new do |gem|
16
+ # gem is a Gem::Specification... see http://docs.rubygems.org/read/chapter/20 for more options
17
+ gem.name = "img_scripts"
18
+ gem.homepage = "http://github.com/wwood/img_scripts"
19
+ gem.license = "MIT"
20
+ gem.summary = %Q{Scripts related to the IMG (Integrated Microbial Genomes) database}
21
+ gem.description = %Q{Scripts related to the IMG (Integrated Microbial Genomes) database}
22
+ gem.email = "donttrustben near gmail.com"
23
+ gem.authors = ["Ben J. Woodcroft"]
24
+ # dependencies defined in Gemfile
25
+ end
26
+ Jeweler::RubygemsDotOrgTasks.new
27
+
28
+ require 'rake/testtask'
29
+ Rake::TestTask.new(:test) do |test|
30
+ test.libs << 'lib' << 'test'
31
+ test.pattern = 'test/**/test_*.rb'
32
+ test.verbose = true
33
+ end
34
+
35
+ task :default => :test
36
+
37
+ require 'rdoc/task'
38
+ Rake::RDocTask.new do |rdoc|
39
+ version = File.exist?('VERSION') ? File.read('VERSION') : ""
40
+
41
+ rdoc.rdoc_dir = 'rdoc'
42
+ rdoc.title = "img_scripts #{version}"
43
+ rdoc.rdoc_files.include('README*')
44
+ rdoc.rdoc_files.include('lib/**/*.rb')
45
+ end
data/VERSION ADDED
@@ -0,0 +1 @@
1
+ 0.0.1
@@ -0,0 +1,142 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ require 'optparse'
4
+ require 'bio-logger'
5
+ require 'bio-img_metadata'
6
+
7
+ if __FILE__ == $0 #needs to be removed if this script is distributed as part of a rubygem
8
+ SCRIPT_NAME = File.basename(__FILE__); LOG_NAME = SCRIPT_NAME.gsub('.rb','')
9
+ IMG_METADATA_FILE_ENV_VARIABLE_NAME='IMG_METADATA_FILE'
10
+
11
+ # Parse command line options into the options hash
12
+ options = {
13
+ :logger => 'stderr',
14
+ :output_fields => ['taxon_oid'],
15
+ :listing_mode => false,
16
+ }
17
+ o = OptionParser.new do |opts|
18
+ opts.banner = "
19
+ Usage: #{SCRIPT_NAME} [-i <img_metadata_file>] <key=value>
20
+
21
+ Runs through an IMG metadata file, and prints out the IMG identifiers of those genomes that match the key=value criteria. E.g. Kingdom=Archaea to grep for all archaeons.
22
+
23
+ Example:
24
+
25
+ $ img_metadata_scanner.rb Domain=Archaea --output-fields \"Genus,Species\" |head
26
+ DEBUG img_metadata_scanner: Using environment variable IMG_METADATA_FILE to define path to IMG metadata file /srv/whitlam/bio/db/img/4.0/metadata/img_metadata_4_0_FIXED.csv
27
+ INFO img_metadata_scanner: Using 1 filters.
28
+ INFO img_metadata_scanner: Found 4700 taxons in the IMG metadata file
29
+ Thermococcus gammatolerans
30
+ Methanolobus
31
+ Pyrobaculum
32
+ Methanobacterium sp.
33
+ Sulfolobus islandicus
34
+ Desulfurococcus mucosus
35
+ Haladaptatus paucihalophilus
36
+ Methanothermobacter thermautotrophicus
37
+ Methanosarcina barkeri
38
+ Methanobrevibacter smithii
39
+
40
+ \n\n"
41
+
42
+ opts.separator "\nOptions:\n\n"
43
+ opts.on("-i", "--img-metadata-file PATH", "Path to IMG metadata file [required]. This is not necessary if there is a valid environment variable #{IMG_METADATA_FILE_ENV_VARIABLE_NAME} available.") do |arg|
44
+ options[:img_metadata_file] = arg
45
+ end
46
+ opts.on("-o", "--output-fields FIELDS", "List of output fields, comma separated [default: #{options[:output_fields].join(',')}]") do |arg|
47
+ options[:output_fields] = arg.split ','
48
+ end
49
+ opts.on("-l", "--list", "Instead of filtering print a list of the fields in the output file, newline separated [default: #{options[:listing_mode]}]") do |arg|
50
+ options[:listing_mode] = true
51
+ end
52
+
53
+ # logger options
54
+ opts.separator "\nVerbosity:\n\n"
55
+ opts.on("-q", "--quiet", "Run quietly, set logging to ERROR level [default INFO]") {Bio::Log::CLI.trace('error')}
56
+ opts.on("--logger filename",String,"Log to file [default #{options[:logger]}]") { |name| options[:logger] = name}
57
+ opts.on("--trace options",String,"Set log level [default INFO]. e.g. '--trace debug' to set logging level to DEBUG"){|s| Bio::Log::CLI.trace(s)}
58
+ end; o.parse!
59
+
60
+ # Setup logging. bio-logger defaults to STDERR not STDOUT, I disagree
61
+ Bio::Log::CLI.logger(options[:logger]); log = Bio::Log::LoggerPlus.new(LOG_NAME); Bio::Log::CLI.configure(LOG_NAME)
62
+
63
+ # Read in the system-specific ENV variable for the path to the img metadata file unless a path has already been specified
64
+ if options[:img_metadata_file].nil?
65
+ if ENV[IMG_METADATA_FILE_ENV_VARIABLE_NAME].nil?
66
+ $stderr.puts "img-metadata-file not found as either a command line option or environment variable, failing."
67
+ $stderr.puts o
68
+ exit 1
69
+ else
70
+ options[:img_metadata_file] = ENV[IMG_METADATA_FILE_ENV_VARIABLE_NAME]
71
+ log.debug "Using environment variable #{IMG_METADATA_FILE_ENV_VARIABLE_NAME} to define path to IMG metadata file #{options[:img_metadata_file]}"
72
+ end
73
+ end
74
+ unless File.exist?(options[:img_metadata_file])
75
+ $stderr.puts "IMG metadata file #{options[:img_metadata_file]} not found - was it specified correctly?"
76
+ exit 2
77
+ end
78
+
79
+
80
+
81
+ # If listing mode is set, don't do any filtering, just list the variables
82
+ if options[:listing_mode]
83
+ # Read the IMG metadata
84
+ img = Bio::IMG::TaxonomyDefinitionFile.read(options[:img_metadata_file])
85
+ log.info "Found #{img.length} taxons in the IMG metadata file"
86
+
87
+ img[0].attributes.keys.each do |key|
88
+ puts key
89
+ end
90
+
91
+ exit 0
92
+ end
93
+
94
+ # Parse the key/value pairs
95
+ filter_hash = {}
96
+ unless ARGV[0].nil?
97
+ ARGV[0].split(',').each do |split|
98
+ splits2 = split.split('=')
99
+ unless splits2.length == 2
100
+ log.error "Badly parsed key/value pair '#{split}', expected exactly 1 = sign, found #{splits2.length}"
101
+ exit 1
102
+ end
103
+
104
+ key = splits2[0]
105
+ unless filter_hash[key].nil?
106
+ log.error "Duplicate filter key found: #{key}, failing"
107
+ exit 1
108
+ end
109
+ filter_hash[key] = splits2[1]
110
+ end
111
+ end
112
+ log.info "Using #{filter_hash.length} filters."
113
+
114
+ # Read the IMG metadata
115
+ img = Bio::IMG::TaxonomyDefinitionFile.read(options[:img_metadata_file])
116
+ log.info "Found #{img.length} taxons in the IMG metadata file"
117
+
118
+ # Expect that each of the filters are found in the list of headers available, otherwise filtering will be ineffectual
119
+ # Ditto for the output names
120
+ [filter_hash.keys, options[:output_fields]].flatten.each do |key|
121
+ unless img[0].attributes.keys.include?(key)
122
+ log.warn "Unable to find column named #{key} in the IMG metadata file - typo perhaps?"
123
+ exit 1
124
+ end
125
+ end
126
+
127
+ # Go through each row, printing the outputs if they pass the filter
128
+ img.each do |taxon|
129
+ passed = true
130
+ filter_hash.each do |key, value|
131
+ if taxon.attributes[key] != value
132
+ passed = false
133
+ break
134
+ end
135
+ end
136
+
137
+ if passed
138
+ puts options[:output_fields].collect{|field| taxon.attributes[field]}.join("\t")
139
+ end
140
+ end
141
+
142
+ end #end if running as a script
File without changes
data/test/helper.rb ADDED
@@ -0,0 +1,18 @@
1
+ require 'rubygems'
2
+ require 'bundler'
3
+ begin
4
+ Bundler.setup(:default, :development)
5
+ rescue Bundler::BundlerError => e
6
+ $stderr.puts e.message
7
+ $stderr.puts "Run `bundle install` to install missing gems"
8
+ exit e.status_code
9
+ end
10
+ require 'test/unit'
11
+ require 'shoulda'
12
+
13
+ $LOAD_PATH.unshift(File.join(File.dirname(__FILE__), '..', 'lib'))
14
+ $LOAD_PATH.unshift(File.dirname(__FILE__))
15
+ require 'img_scripts'
16
+
17
+ class Test::Unit::TestCase
18
+ end
@@ -0,0 +1,7 @@
1
+ require 'helper'
2
+
3
+ class TestImgScripts < Test::Unit::TestCase
4
+ should "probably rename this file and start testing for real" do
5
+ flunk "hey buddy, you should probably rename this file and start testing for real"
6
+ end
7
+ end
metadata ADDED
@@ -0,0 +1,116 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: img_scripts
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.0.1
5
+ prerelease:
6
+ platform: ruby
7
+ authors:
8
+ - Ben J. Woodcroft
9
+ autorequire:
10
+ bindir: bin
11
+ cert_chain: []
12
+ date: 2013-01-11 00:00:00.000000000 Z
13
+ dependencies:
14
+ - !ruby/object:Gem::Dependency
15
+ name: bio-img_metadata
16
+ requirement: &77007880 !ruby/object:Gem::Requirement
17
+ none: false
18
+ requirements:
19
+ - - ! '>='
20
+ - !ruby/object:Gem::Version
21
+ version: '0'
22
+ type: :runtime
23
+ prerelease: false
24
+ version_requirements: *77007880
25
+ - !ruby/object:Gem::Dependency
26
+ name: shoulda
27
+ requirement: &77007610 !ruby/object:Gem::Requirement
28
+ none: false
29
+ requirements:
30
+ - - ! '>='
31
+ - !ruby/object:Gem::Version
32
+ version: '0'
33
+ type: :development
34
+ prerelease: false
35
+ version_requirements: *77007610
36
+ - !ruby/object:Gem::Dependency
37
+ name: rdoc
38
+ requirement: &77007320 !ruby/object:Gem::Requirement
39
+ none: false
40
+ requirements:
41
+ - - ~>
42
+ - !ruby/object:Gem::Version
43
+ version: '3.12'
44
+ type: :development
45
+ prerelease: false
46
+ version_requirements: *77007320
47
+ - !ruby/object:Gem::Dependency
48
+ name: bundler
49
+ requirement: &77006960 !ruby/object:Gem::Requirement
50
+ none: false
51
+ requirements:
52
+ - - ! '>='
53
+ - !ruby/object:Gem::Version
54
+ version: 1.0.0
55
+ type: :development
56
+ prerelease: false
57
+ version_requirements: *77006960
58
+ - !ruby/object:Gem::Dependency
59
+ name: jeweler
60
+ requirement: &77006640 !ruby/object:Gem::Requirement
61
+ none: false
62
+ requirements:
63
+ - - ! '>='
64
+ - !ruby/object:Gem::Version
65
+ version: 1.8.4
66
+ type: :development
67
+ prerelease: false
68
+ version_requirements: *77006640
69
+ description: Scripts related to the IMG (Integrated Microbial Genomes) database
70
+ email: donttrustben near gmail.com
71
+ executables:
72
+ - img_metadata_scanner.rb
73
+ extensions: []
74
+ extra_rdoc_files:
75
+ - LICENSE.txt
76
+ - README.rdoc
77
+ files:
78
+ - .document
79
+ - Gemfile
80
+ - LICENSE.txt
81
+ - README.rdoc
82
+ - Rakefile
83
+ - VERSION
84
+ - bin/img_metadata_scanner.rb
85
+ - lib/img_scripts.rb
86
+ - test/helper.rb
87
+ - test/test_img_scripts.rb
88
+ homepage: http://github.com/wwood/img_scripts
89
+ licenses:
90
+ - MIT
91
+ post_install_message:
92
+ rdoc_options: []
93
+ require_paths:
94
+ - lib
95
+ required_ruby_version: !ruby/object:Gem::Requirement
96
+ none: false
97
+ requirements:
98
+ - - ! '>='
99
+ - !ruby/object:Gem::Version
100
+ version: '0'
101
+ segments:
102
+ - 0
103
+ hash: -1064213905
104
+ required_rubygems_version: !ruby/object:Gem::Requirement
105
+ none: false
106
+ requirements:
107
+ - - ! '>='
108
+ - !ruby/object:Gem::Version
109
+ version: '0'
110
+ requirements: []
111
+ rubyforge_project:
112
+ rubygems_version: 1.8.17
113
+ signing_key:
114
+ specification_version: 3
115
+ summary: Scripts related to the IMG (Integrated Microbial Genomes) database
116
+ test_files: []