bio-img_metadata 0.0.1
Sign up to get free protection for your applications and to get access to all the features.
- data/.document +5 -0
- data/.rspec +1 -0
- data/.travis.yml +12 -0
- data/Gemfile +15 -0
- data/LICENSE.txt +20 -0
- data/README.md +52 -0
- data/Rakefile +49 -0
- data/VERSION +1 -0
- data/lib/bio-img_metadata.rb +107 -0
- data/spec/bio-img_metadata_spec.rb +20 -0
- data/spec/data/head.metadata.csv +10 -0
- data/spec/spec_helper.rb +12 -0
- metadata +130 -0
data/.document
ADDED
data/.rspec
ADDED
@@ -0,0 +1 @@
|
|
1
|
+
--color
|
data/.travis.yml
ADDED
@@ -0,0 +1,12 @@
|
|
1
|
+
language: ruby
|
2
|
+
rvm:
|
3
|
+
- 1.9.2
|
4
|
+
- 1.9.3
|
5
|
+
- jruby-19mode # JRuby in 1.9 mode
|
6
|
+
- rbx-19mode
|
7
|
+
# - 1.8.7
|
8
|
+
# - jruby-18mode # JRuby in 1.8 mode
|
9
|
+
# - rbx-18mode
|
10
|
+
|
11
|
+
# uncomment this line if your project needs to run something other than `rake`:
|
12
|
+
# script: bundle exec rspec spec
|
data/Gemfile
ADDED
@@ -0,0 +1,15 @@
|
|
1
|
+
source "http://rubygems.org"
|
2
|
+
# Add dependencies required to use your gem here.
|
3
|
+
# Example:
|
4
|
+
# gem "activesupport", ">= 2.3.5"
|
5
|
+
|
6
|
+
# Add dependencies to develop your gem here.
|
7
|
+
# Include everything needed to run rake, tests, features, etc.
|
8
|
+
group :development do
|
9
|
+
gem "rspec", "~> 2.8.0"
|
10
|
+
gem "rdoc", "~> 3.12"
|
11
|
+
gem "jeweler", "~> 1.8.4"
|
12
|
+
gem "bundler", ">= 1.0.21"
|
13
|
+
gem "bio", ">= 1.4.2"
|
14
|
+
gem "rdoc", "~> 3.12"
|
15
|
+
end
|
data/LICENSE.txt
ADDED
@@ -0,0 +1,20 @@
|
|
1
|
+
Copyright (c) 2013 Ben J. Woodcroft
|
2
|
+
|
3
|
+
Permission is hereby granted, free of charge, to any person obtaining
|
4
|
+
a copy of this software and associated documentation files (the
|
5
|
+
"Software"), to deal in the Software without restriction, including
|
6
|
+
without limitation the rights to use, copy, modify, merge, publish,
|
7
|
+
distribute, sublicense, and/or sell copies of the Software, and to
|
8
|
+
permit persons to whom the Software is furnished to do so, subject to
|
9
|
+
the following conditions:
|
10
|
+
|
11
|
+
The above copyright notice and this permission notice shall be
|
12
|
+
included in all copies or substantial portions of the Software.
|
13
|
+
|
14
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
15
|
+
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
16
|
+
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
17
|
+
NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
|
18
|
+
LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
|
19
|
+
OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
|
20
|
+
WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
data/README.md
ADDED
@@ -0,0 +1,52 @@
|
|
1
|
+
# bio-img_metadata
|
2
|
+
|
3
|
+
[![Build Status](https://secure.travis-ci.org/wwood/bioruby-img_metadata.png)](http://travis-ci.org/wwood/bioruby-img_metadata)
|
4
|
+
|
5
|
+
Reads metadata from Integrated Microbial Genomes (IMG) metadata files. Metadata files are generated by searching for one or more taxons, and then exporting various/all genome-specific characters e.g. kingdom, genus, temperature range, taxon identifier, etc.
|
6
|
+
|
7
|
+
## Installation
|
8
|
+
|
9
|
+
```sh
|
10
|
+
gem install bio-img_metadata
|
11
|
+
```
|
12
|
+
|
13
|
+
## Usage
|
14
|
+
|
15
|
+
```ruby
|
16
|
+
require 'bio-img_metadata'
|
17
|
+
|
18
|
+
d = Bio::IMG::Metadata.read(File.join DATA_DIR, 'head.metadata.csv') #=> an Array of Bio::IMG::Metadata objects
|
19
|
+
|
20
|
+
d.length.should == 9 #=> The array has 9 members, one for each line in the metadata file
|
21
|
+
d[0].kind_of?(Bio::IMG::Lineage).should == true #=> Each lineage's object
|
22
|
+
|
23
|
+
d[0].domain.should == 'Archaea' #=> some attributes are now methods (mostly the taxonomy-related ones)
|
24
|
+
d[1].taxon_id.should == 2515075008
|
25
|
+
|
26
|
+
d[0].attributes['Status'].should == 'Finished' #=> the rest are in the attributes array
|
27
|
+
```
|
28
|
+
|
29
|
+
## Project home page
|
30
|
+
|
31
|
+
Information on the source tree, documentation, examples, issues and
|
32
|
+
how to contribute, see
|
33
|
+
|
34
|
+
http://github.com/wwood/bioruby-img_metadata
|
35
|
+
|
36
|
+
The BioRuby community is on IRC server: irc.freenode.org, channel: #bioruby.
|
37
|
+
|
38
|
+
## Cite
|
39
|
+
|
40
|
+
If you use this software, please cite one of
|
41
|
+
|
42
|
+
* [BioRuby: bioinformatics software for the Ruby programming language](http://dx.doi.org/10.1093/bioinformatics/btq475)
|
43
|
+
* [Biogem: an effective tool-based approach for scaling up open source software development in bioinformatics](http://dx.doi.org/10.1093/bioinformatics/bts080)
|
44
|
+
|
45
|
+
## Biogems.info
|
46
|
+
|
47
|
+
This Biogem is published at (http://biogems.info/index.html#bio-img_metadata)
|
48
|
+
|
49
|
+
## Copyright
|
50
|
+
|
51
|
+
Copyright (c) 2013 Ben J. Woodcroft. See LICENSE.txt for further details.
|
52
|
+
|
data/Rakefile
ADDED
@@ -0,0 +1,49 @@
|
|
1
|
+
# encoding: utf-8
|
2
|
+
|
3
|
+
require 'rubygems'
|
4
|
+
require 'bundler'
|
5
|
+
begin
|
6
|
+
Bundler.setup(:default, :development)
|
7
|
+
rescue Bundler::BundlerError => e
|
8
|
+
$stderr.puts e.message
|
9
|
+
$stderr.puts "Run `bundle install` to install missing gems"
|
10
|
+
exit e.status_code
|
11
|
+
end
|
12
|
+
require 'rake'
|
13
|
+
|
14
|
+
require 'jeweler'
|
15
|
+
Jeweler::Tasks.new do |gem|
|
16
|
+
# gem is a Gem::Specification... see http://docs.rubygems.org/read/chapter/20 for more options
|
17
|
+
gem.name = "bio-img_metadata"
|
18
|
+
gem.homepage = "http://github.com/wwood/bioruby-img_metadata"
|
19
|
+
gem.license = "MIT"
|
20
|
+
gem.summary = %Q{Reads metadata from Integrated Microbial Genomes (IMG) metadata files into a programmaticly useful state}
|
21
|
+
gem.description = %Q{Reads metadata from Integrated Microbial Genomes (IMG) metadata files into a programmaticly useful state.}
|
22
|
+
gem.email = "donttrustben near gmail.com"
|
23
|
+
gem.authors = ["Ben J. Woodcroft"]
|
24
|
+
# dependencies defined in Gemfile
|
25
|
+
end
|
26
|
+
Jeweler::RubygemsDotOrgTasks.new
|
27
|
+
|
28
|
+
require 'rspec/core'
|
29
|
+
require 'rspec/core/rake_task'
|
30
|
+
RSpec::Core::RakeTask.new(:spec) do |spec|
|
31
|
+
spec.pattern = FileList['spec/**/*_spec.rb']
|
32
|
+
end
|
33
|
+
|
34
|
+
RSpec::Core::RakeTask.new(:rcov) do |spec|
|
35
|
+
spec.pattern = 'spec/**/*_spec.rb'
|
36
|
+
spec.rcov = true
|
37
|
+
end
|
38
|
+
|
39
|
+
task :default => :spec
|
40
|
+
|
41
|
+
require 'rdoc/task'
|
42
|
+
Rake::RDocTask.new do |rdoc|
|
43
|
+
version = File.exist?('VERSION') ? File.read('VERSION') : ""
|
44
|
+
|
45
|
+
rdoc.rdoc_dir = 'rdoc'
|
46
|
+
rdoc.title = "bio-img_metadata #{version}"
|
47
|
+
rdoc.rdoc_files.include('README*')
|
48
|
+
rdoc.rdoc_files.include('lib/**/*.rb')
|
49
|
+
end
|
data/VERSION
ADDED
@@ -0,0 +1 @@
|
|
1
|
+
0.0.1
|
@@ -0,0 +1,107 @@
|
|
1
|
+
require 'csv'
|
2
|
+
|
3
|
+
module Bio
|
4
|
+
class Lineage
|
5
|
+
attr_accessor :taxon_id, :domain, :kingdom, :phylum, :class_name, :order, :family, :genus, :species
|
6
|
+
|
7
|
+
def genus_species
|
8
|
+
@genus+' '+@species
|
9
|
+
end
|
10
|
+
end
|
11
|
+
end
|
12
|
+
|
13
|
+
module Bio
|
14
|
+
module IMG
|
15
|
+
class Lineage < Bio::Lineage
|
16
|
+
attr_accessor :definition_line
|
17
|
+
|
18
|
+
# Hash of key => value for each column
|
19
|
+
attr_accessor :attributes
|
20
|
+
end
|
21
|
+
|
22
|
+
# Acts like an array of Bio::IMG::Lineage objects
|
23
|
+
#
|
24
|
+
# Use TaxonomyDefinitionFile#read to read in a file downloaded through the IMG export system
|
25
|
+
class Metadata < Array
|
26
|
+
FIELD_NAMES_TO_CLASSIFICATIONS = {
|
27
|
+
'taxon_oid' => :taxon_id,
|
28
|
+
'Domain' => :domain,
|
29
|
+
'Phylum' => :phylum,
|
30
|
+
'Class' => :class_name,
|
31
|
+
'Order' => :order,
|
32
|
+
'Family' => :family,
|
33
|
+
'Genus' => :genus,
|
34
|
+
'Species' => :species,
|
35
|
+
}
|
36
|
+
|
37
|
+
# Reads an img_taxonomy_file into a new TaxonomyDefinitionFile object.
|
38
|
+
# This object is then an array of Bio::IMG::Lineage objects from that file
|
39
|
+
def self.read(img_taxonomy_filename_path)
|
40
|
+
all_lineages = TaxonomyDefinitionFile.new
|
41
|
+
|
42
|
+
# taxon_oid Domain Status Genome Name Phylum Class Order Family Genus Species Strain Release Date IMG Release
|
43
|
+
# 650716001 Archaea Finished Acidianus hospitalis W1 Crenarchaeota Thermoprotei Sulfolobales Sulfolobaceae Acidianus hospitalis W1 2011-12-01 IMG/W 3.5
|
44
|
+
# 648028003 Archaea Finished Acidilobus saccharovorans 345-15 Crenarchaeota Thermoprotei Acidilobales Acidilobaceae Acidilobus saccharovorans 345-15 2011-01-01 IMG/W 3.3
|
45
|
+
# 646564501 Archaea Finished Aciduliprofundum boonei T469 Euryarchaeota Thermoplasmata Thermoplasmatales Aciduloprofundaceae Aciduliprofundum boonei T469 2010-08-01 IMG/
|
46
|
+
#
|
47
|
+
# Have to use a simple line#split because regular CSV class is narky and IMG metadata files aren't perfectly respetable
|
48
|
+
headers = nil
|
49
|
+
header_indices = {}
|
50
|
+
File.open(img_taxonomy_filename_path).each_line do |line|
|
51
|
+
row = line.chomp.split("\t")
|
52
|
+
|
53
|
+
# If this is the first header row
|
54
|
+
if headers==nil
|
55
|
+
headers = row
|
56
|
+
FIELD_NAMES_TO_CLASSIFICATIONS.each do |header, attribute|
|
57
|
+
header_indices[header] = headers.index(header)
|
58
|
+
end
|
59
|
+
|
60
|
+
else
|
61
|
+
lineage = Bio::IMG::Lineage.new
|
62
|
+
lineage.definition_line = line.chomp
|
63
|
+
|
64
|
+
# 0# 650716001
|
65
|
+
# 1# Archaea
|
66
|
+
# 2# Finished
|
67
|
+
# 3# Acidianus hospitalis W1
|
68
|
+
# 4# Crenarchaeota
|
69
|
+
# 5# Thermoprotei
|
70
|
+
# 6# Sulfolobales
|
71
|
+
# 7# Sulfolobaceae
|
72
|
+
# 8# Acidianus
|
73
|
+
# 9# hospitalis
|
74
|
+
# 10# W1
|
75
|
+
# 11# 2011-12-01
|
76
|
+
# 12# IMG/W 3.5
|
77
|
+
FIELD_NAMES_TO_CLASSIFICATIONS.each do |header, attribute|
|
78
|
+
value = row[header_indices[header]]
|
79
|
+
value = value.to_i if attribute == :taxon_id
|
80
|
+
lineage.send "#{attribute}=".to_sym, value
|
81
|
+
end
|
82
|
+
|
83
|
+
lineage.attributes = {}
|
84
|
+
row.each_with_index do |col, i|
|
85
|
+
lineage.attributes[headers[i]] = col
|
86
|
+
end
|
87
|
+
|
88
|
+
all_lineages.push lineage
|
89
|
+
end
|
90
|
+
end
|
91
|
+
|
92
|
+
return all_lineages
|
93
|
+
end
|
94
|
+
|
95
|
+
# Return a hash, indexed by taxon_oid
|
96
|
+
def to_hash
|
97
|
+
hash = {}
|
98
|
+
each do |taxon|
|
99
|
+
hash[taxon.taxon_id] = taxon
|
100
|
+
end
|
101
|
+
return hash
|
102
|
+
end
|
103
|
+
end
|
104
|
+
|
105
|
+
TaxonomyDefinitionFile = Metadata #Some backwards compatibility
|
106
|
+
end
|
107
|
+
end
|
@@ -0,0 +1,20 @@
|
|
1
|
+
require File.expand_path(File.dirname(__FILE__) + '/spec_helper')
|
2
|
+
|
3
|
+
describe "BioImgMetadata" do
|
4
|
+
DATA_DIR = File.join File.dirname(__FILE__), 'data'
|
5
|
+
|
6
|
+
it "simple" do
|
7
|
+
d = Bio::IMG::TaxonomyDefinitionFile.read(File.join DATA_DIR, 'head.metadata.csv')
|
8
|
+
d.kind_of?(Array).should == true
|
9
|
+
d.length.should == 9
|
10
|
+
d[0].kind_of?(Bio::Lineage).should == true
|
11
|
+
|
12
|
+
d[0].domain.should == 'Archaea'
|
13
|
+
d[0].attributes['Status'].should == 'Finished'
|
14
|
+
d[1].taxon_id.should == 2515075008
|
15
|
+
end
|
16
|
+
|
17
|
+
it 'should have class equivalency TaxonomyDefinitionFile, MetadataFile' do
|
18
|
+
Bio::IMG::TaxonomyDefinitionFile.should == Bio::IMG::Metadata
|
19
|
+
end
|
20
|
+
end
|
@@ -0,0 +1,10 @@
|
|
1
|
+
taxon_oid Domain Status Proposal Name Genome Name / Sample Name Sequencing Center Phylum Class Order Family Genus Species Genome ID NCBI Taxon ID RefSeq Project ID GenBank Project ID Strain Funding Agency Add Date Is Public Release Date IMG Release IMG Product Assignment IMG Submission ID Proposal GOLD ID Altitude Biotic Relationships Body Site Body Subsite Cell Arrangement Cell Shape Contact Email Contact Name Diseases Ecosystem Ecosystem Category Ecosystem Subtype Ecosystem Type Energy Source Funding Program Geographic Location Gram Staining Host Gender Host Name IMG Project ID Isolation Isolation Country Latitude Longitude Metabolism Motility Oxygen Requirement Phenotype Relevance Salinity Specific Ecosystem Sporulation Temperature Range Uncultured Type Genome Size (Number of total bases) Gene Count (Number of total Genes) Scaffold Count (Number of scaffolds) CRISPR Count (Number of CRISPRs) GC Count (Number of GC) GC % (Percentage of GC) Coding Base Count (Total number of coding bases) CDS Count (Number of CDS genes) CDS % (Percentage of CDS genes) RNA Count (Number of RNA genes) RNA % rRNA Count (Number of rRNA genes) 5S rRNA Count (Number of 5S rRNAs) 16S rRNA Count (Number of 16S rRNAs) 18S rRNA Count (Number of 18S rRNAs) 23S rRNA Count (Number of 23S rRNAs) 28S rRNA Count (Number of 28S rRNAs) tRNA Count (Number of tRNA genes) Other RNA Count (Number of other unclassified RNA genes) Pseudo Genes Count (Number of pseudo genes) Pseudo Genes % (Percentage of pseudo genes) Unchar Count (Number of uncharacerized genes) Unchar % (Percentage of uncharacterized genes) Dubious Count Dubious % w/ Func Pred Count (Number of genes with predicted protein product) w/ Func Pred % (Percentage of genes with predicted protein product) w/o Func Pred Sim Count (Number of genes without function prediction with similarity) w/o Func Pred Sim % (Percentage of genes without predicted protein product with similarity) w/o Func Pred No Sim Count (Number of genes without function prediction without similarity) w/o Func Pred No Sim % (Percentage of genes without function prediction without similarity) Orthologs Count Orthologs %` Paralogs Count Paralogs % Obsolete Count (Number of obsolete genes) Obsolete % (Percentage of obsolete genes) Revised Count (Number of revised genes) Revised % (Percentage of revised genes) Fused Count (Number of fused genes) Fused % (Percentage of fused genes) Fusion Component Count (Number of genes involved as fusion components) Fusion component % (Genes involved as fusion components percentage) SwissProt Count (Number of genes in SwissProt protein product) SwissProt % (Percentage of genes in SwissProt protein product) Not SwissProt Count (Number of genes not in SwissProt protein product) Not SwissProt % (Percentage of genes not in SwissProt protein product) SEED Count (Number of genes in SEED) SEED % (Percentage of genes in SEED) Not SEED Count (Number of genes not in SEED) Not SEED % (Percentage of genes not in SEED) COG Count (Number of genes in COG) COG % (Percentage of genes in COG) KOG Count (Number of genes in KOG) KOG % (Percentage of genes in KOG) Pfam Count (Number of genes in Pfam) Pfam % (Percentage of genes in Pfam) TIGRfam Count (Number of genes in TIGRfam) TIGRfam % (Percentage of genes in TIGRfam) InterPro Count (Number of genes in InterPro) InterPro % (Percentage of genes in InterPro) Enzyme Count (Number of genes assigned to enzymes) Enzyme % (Percentage of genes assigned to enzymes) TC Count (Number of genes assigned to Transporter Classification) TC % (Percentage of genes assigned to Transporter Classification) KEGG Count (Number of genes in KEGG) KEGG % (Percentage of genes in KEGG) Not KEGG Count (Number of genes not in KEGG) Not KEGG % (Percentage of genes not in KEGG) KO Count (Number of genes in KEGG Orthology (KO)) KO % (Percentage of genes in KEGG Orthology (KO)) Not KO Count (Number of genes not in KEGG Orthology (KO)) Not KO % (Percentage of genes not in KEGG Orthology (KO)) MetaCyc Count (Number of genes in MetaCyc) MetaCyc % (Percentage of genes in MetaCyc) Not MetaCyc Count (Number of genes not in MetaCyc) Not MetaCyc % (Percentage of genes not in MetaCyc IMG Term Count (Number of genes with IMG terms) IMG Term % (Percentage of genes with IMG terms) IMG Pathwawy Count (Number of genes in IMG pathwawys) IMG Pathway % (Percentage of genes in IMG pathways) IMG Parts List Count (Number of genes in IMG parts list) IMG Parts List % (Percentage of genes in IMG parts list) MyIMG Annotation Count MyIMG Annotation % Signal Peptide Count (Number of genes coding signal peptides) Signal Peptide % (Percentage of genes coding signal peptides) Transmembrane Count (Number of genes coding transmembrane proteins) Transmembrane % (Percentage of genes coding transmembrane proteins) Horizontally Transferred Count Horizontally Transferred % Genome Property Count (Number of genes in Genome Properties) Genome Property % (Percentage of genes in Genome Properties) Ortholog Group Count Paralog Group Count COG Cluster Count (Number of COG clusters) KOG Cluster Count (Number of KOG clusters) Pfam Cluster Count (Number of Pfam clusters) TIGRfam Cluster Count (Number of TIGRfam clusters) IMG Cluster Count IMG Cluster % Chromosomal Cassette Gene Count Chromosomal Cassette Gene % Chromosomal Cassette Count
|
2
|
+
644736411 Archaea Finished Thermococcus gammatolerans EJ3 Thermococcus gammatolerans EJ3 DOE Joint Genome Institute Euryarchaeota Thermococci Thermococcales Thermococcaceae Thermococcus gammatolerans 644736411 593117 59389 33671 EJ3 2009-10-13 Yes 2009-12-01 IMG/W 3.0 Gc01034 Free living Coccus-shaped NCKyrpides@lbl.gov NIKOS None Organotroph oil wells of the Samotlor oil reservoir Gram- 14544 hydrothermal vent chimneys located in the mid-Atlantic Ridge and Guyamas basin. Motile Anaerobe Radiation resistant Biotechnological Nonsporulating Hyperthermophile 2045438 2206 1 1095567 0.54 1910049 2156 97.73 50 2.27 4 2 1 0 1 0 46 0 0 0 0 0 0 0 1427 64.69 1030 46.69 0 0 0 0 99 4.49 708 32.09 178 8.07 1978 89.66 2 0.09 2154 97.64 1524 69.08 757 34.32 1655 75.02 554 25.11 1681 76.2 472 21.4 199 9.02 529 23.98 1627 73.75 1047 47.46 1109 50.27 450 20.4 1706 77.33 554 25.11 161 7.3 193 8.75 0 0 285 12.92 561 25.43 70 3.17 0 0 1071 525 1156 505 2206 100 66
|
3
|
+
2515075008 Archaea Permanent Draft Continuation of the Genomic Encyclopedia of Bacteria and Archaea pilot project Methanolobus tindarius DSM 2278 DOE Joint Genome Institute Euryarchaeota Methanomicrobia Methanosarcinales Methanosarcinaceae Methanolobus 2515075008 1090322 DOE; DSMZ 2012-06-29 Yes 2012-06-29 Yes 9393 Gi02920 Free living Coccus-shaped NCKyrpides@lbl.gov NIKOS None DOE-CSP 2011 Sediment from Lakes of Marinello, Italy Gram- 13501 Sediment from Lakes of Marinello, Italy Italy 38.132591 15.05422 Methanogen Anaerobe Tree of Life, GEBA Mesophile 3151883 3022 1 1254128 0.4 2767116 2957 97.85 65 2.15 9 3 3 0 3 0 50 6 34 1.13 0 0 0 0 2184 72.27 1124 37.19 0 0 0 0 5 0.17 0 0 0 0 2957 97.85 0 0 2957 97.85 2204 72.93 974 32.23 2219 73.43 889 29.42 2282 75.51 643 21.28 303 10.03 776 25.68 2181 72.17 1354 44.8 1603 53.04 624 20.65 2333 77.2 641 21.21 226 7.48 202 6.68 0 0 441 14.59 782 25.88 0 0 0 0 1352 589 1459 711 3022 100 256
|
4
|
+
2512047039 Archaea Finished Pyrobaculum oguniense TE7 Pyrobaculum oguniense TE7, DSM 13380 UCSC Crenarchaeota Thermoprotei Thermoproteales Thermoproteaceae Pyrobaculum 2512047039 698757 84411 42375 2012-03-23 Yes 2012-03-23 No 7574 Gc02118 Rod-shaped NCKyrpides@lbl.gov NIKOS Heterotroph Oguni-cho, Kumamoto, Japan 559 Terrestrial hot spring at Oguni-cho, Kumamoto, Japan Japan 32.806322 130.718651 Facultative Hyperthermophile 2452920 3014 2 1350357 0.55 2154879 2869 95.19 145 4.81 3 1 1 0 1 0 48 94 34 1.13 0 0 0 0 1709 56.7 931 30.89 0 0 0 0 3 0.1 0 0 0 0 2869 95.19 0 0 2869 95.19 1757 58.29 790 26.21 1743 57.83 494 16.39 1804 59.85 559 18.55 210 6.97 670 22.23 2199 72.96 1107 36.73 1762 58.46 539 17.88 2330 77.31 485 16.09 168 5.57 168 5.57 0 0 775 25.71 630 20.9 0 0 0 0 1105 549 1100 460 3014 100 206
|
5
|
+
650716053 Archaea Finished Genome sequencing and comparison of novel methanogens from peatlands and bioreactors Methanobacterium sp. SWAN-1 DOE Joint Genome Institute Euryarchaeota Methanobacteria Methanobacteriales Methanobacteriaceae Methanobacterium sp. 650716053 868131 67359 53561 SWAN-1 2011-08-22 Yes 2011-12-01 IMG/W 3.5 Gc01780 Free living ipagani@lbl.gov Ioanna Pagani None Lithotroph DOE-CSP 2010 Gram- 17802 Methanogen Bioenergy, Biotechnological, Comparative analysis Mesophile 2546541 2500 1 909924 0.36 2010186 2442 97.68 58 2.32 9 3 3 0 3 0 47 2 45 1.8 0 0 0 0 1536 61.44 1151 46.04 0 0 0 0 43 1.72 0 0 0 0 2442 97.68 0 0 2442 97.68 1838 73.52 787 31.48 1879 75.16 704 28.16 1869 74.76 576 23.04 205 8.2 707 28.28 1735 69.4 1149 45.96 1293 51.72 553 22.12 1889 75.56 490 19.6 176 7.04 162 6.48 0 0 490 19.6 589 23.56 240 9.6 0 0 1218 509 1288 625 2500 100 456
|
6
|
+
650377982 Archaea Finished Sulfolobus islandicus REY15A Sulfolobus islandicus REY15A Los Alamos National Lab Crenarchaeota Thermoprotei Sulfolobales Sulfolobaceae Sulfolobus islandicus 650377982 930945 162071 60485 REY15A 2011-03-26 Yes 2011-07-01 IMG/W 3.4 Gc01624 Free living Singles Coccus-shaped ipagani@lbl.gov Ioanna Pagani None Lithotroph Gram- 18556 Nonmotile Facultative Acidophile Biotechnological Nonsporulating Hyperthermophile 2522992 2753 1 890812 0.35 2177858 2644 96.04 109 3.96 4 1 1 0 1 0 45 60 0 0 0 0 0 0 1812 65.82 1184 43.01 0 0 0 0 97 3.52 0 0 0 0 2644 96.04 55 2 2589 94.04 1914 69.52 935 33.96 1922 69.81 588 21.36 1983 72.03 577 20.96 216 7.85 643 23.36 2001 72.68 1056 38.36 1588 57.68 558 20.27 2086 75.77 191 6.94 104 3.78 57 2.07 0 0 474 17.22 577 20.96 5 0.18 0 0 1122 579 1165 496 2753 100 252
|
7
|
+
649633040 Archaea Finished A Genomic Encyclopedia of Bacteria and Archaea (GEBA) Desulfurococcus mucosus 07/1, DSM 2162 DOE Joint Genome Institute Crenarchaeota Thermoprotei Desulfurococcales Desulfurococcaceae Desulfurococcus mucosus 649633040 765177 62227 48641 DSM 2162 2011-03-26 Yes 2011-07-01 IMG/W 3.4 Gc01595 1,053 m Free living Sphere-shaped NCKyrpides@lbl.gov NIKOS None Organotroph DOE-GEBA 2007 Askja, Iceland Gram- 13494 Hot solfataric spring; Iceland Iceland 65.05 -16.8 Sulfur respiration Nonmotile Anaerobe Tree of Life, Biotechnological, GEBA Nonsporulating Hyperthermophile 1314639 1421 1 698621 0.53 1186786 1371 96.48 50 3.52 3 1 1 0 1 0 46 1 26 1.83 0 0 0 0 934 65.73 438 30.82 0 0 0 0 48 3.38 0 0 0 0 1371 96.48 0 0 1371 96.48 1075 75.65 541 38.07 1042 73.33 346 24.35 1057 74.38 329 23.15 138 9.71 406 28.57 965 67.91 754 53.06 617 43.42 320 22.52 1051 73.96 234 16.47 69 4.86 90 6.33 0 0 323 22.73 294 20.69 63 4.43 0 0 799 418 838 329 1421 100 61
|
8
|
+
649989953 Archaea Draft Haladaptatus paucihalophilus DX253 Haladaptatus paucihalophilus DX253 Oklahoma State University Euryarchaeota Halobacteria Halobacteriales Halobacteriaceae Haladaptatus paucihalophilus 649989953 797209 62523 50445 DX253 2011-03-26 Yes 2011-07-01 IMG/W 3.4 Gi07306 Clusters Coccus-shaped ipagani@lbl.gov Ioanna Pagani None Chemoorganotroph Zodletone Spring, Oklahoma Gram- 17533 Low-salt, sulfide- and sulfur-rich spring (Zodletone Spring) in south-western Oklahoma, USA USA 34.996 -98.688 Aerobe Biotechnological Halophile Mesophile 4284805 4496 32 2648659 0.62 3649979 4443 98.82 53 1.18 6 2 3 0 1 0 47 0 0 0 0 0 1 0.02 2528 56.23 2436 54.18 0 0 1 0 246 5.47 0 0 0 0 4443 98.82 0 0 4443 98.82 2986 66.41 1391 30.94 3008 66.9 836 18.59 3097 68.88 831 18.48 493 10.97 950 21.13 3493 77.69 1690 37.59 2753 61.23 809 17.99 3634 80.83 780 17.35 276 6.14 260 5.78 0 0 1106 24.6 1111 24.71 152 3.38 0 0 1401 708 1521 634 4496 100 445
|
9
|
+
638154510 Archaea Finished Methanothermobacter thermoautotrophicus Delta H Methanothermobacter thermoautotrophicus Delta H J. Craig Venter Institute Euryarchaeota Methanobacteria Methanobacteriales Methanobacteriaceae Methanothermobacter thermautotrophicus 638154510 187420 57877 289 str. Delta H DOE 2006-10-02 Yes 2006-12-01 IMG/W 2.0 Gc00009 Free living NCKyrpides@lbl.gov NIKOS None Lithoautotroph, Autotroph, Lithotroph Sewage sludge in 1971 in Urbana Illinois Gram- 10677 Sewage sludge in 1971 in Urbana Illinois USA 40.106649 -88.195627 Methanogen Nonmotile Obligate anaerobe Biotechnological, Energy production Nonsporulating Thermophile 1751377 1893 1 867701 0.5 1570117 1845 97.46 48 2.54 7 3 2 0 2 0 39 2 23 1.22 0 0 35 1.85 1362 71.95 929 49.08 87 5 155 8 108 5.71 726 38.35 191 10.09 1654 87.37 1106 58.43 739 39.04 1458 77.02 637 33.65 1529 80.77 631 33.33 1544 81.56 533 28.16 147 7.77 637 33.65 1208 63.81 1047 55.31 798 42.16 514 27.15 1331 70.31 630 33.28 224 11.83 192 10.14 2 0.11 157 8.29 188 9.93 90 4.75 0 0 1125 477 1193 608 1980 104.6 63
|
10
|
+
637000162 Archaea Finished Methanosarcina barkeri fusaro Methanosarcina barkeri Fusaro, DSM 804 DOE Joint Genome Institute Euryarchaeota Methanomicrobia Methanosarcinales Methanosarcinaceae Methanosarcina barkeri 637000162 269797 57715 103 str. fusaro DOE 2006-10-02 Yes 2006-12-01 IMG/W 2.0 Gc00295 Free living Singles Coccus-shaped NCKyrpides@lbl.gov NIKOS None Lithotroph DOEM 2001 Naples Italy Gram- 10390 Mud samples from Lago del Fusaro Lake in Naples Italy Italy 40.844 14.251 Methanogen Nonmotile Anaerobe Energy production, Environmental, Biotechnological, Carbon cycle Nonsporulating Mesophile 4873766 3834 2 1912156 0.39 3406841 3758 98.02 76 1.98 9 3 3 0 3 0 62 5 134 3.5 0 0 3 0.08 1820 47.47 2631 68.62 0 0 3 0 256 6.68 1567 40.87 329 8.58 3429 89.44 3631 94.71 127 3.31 2703 70.5 1153 30.07 2927 76.34 1026 26.76 2843 74.15 756 19.72 436 11.37 900 23.47 2858 74.54 1619 42.23 2139 55.79 733 19.12 3025 78.9 811 21.15 271 7.07 255 6.65 1 0.03 543 14.16 957 24.96 305 7.96 0 0 1409 629 1524 749 3834 100 887
|
data/spec/spec_helper.rb
ADDED
@@ -0,0 +1,12 @@
|
|
1
|
+
$LOAD_PATH.unshift(File.join(File.dirname(__FILE__), '..', 'lib'))
|
2
|
+
$LOAD_PATH.unshift(File.dirname(__FILE__))
|
3
|
+
require 'rspec'
|
4
|
+
require 'bio-img_metadata'
|
5
|
+
|
6
|
+
# Requires supporting files with custom matchers and macros, etc,
|
7
|
+
# in ./support/ and its subdirectories.
|
8
|
+
Dir["#{File.dirname(__FILE__)}/support/**/*.rb"].each {|f| require f}
|
9
|
+
|
10
|
+
RSpec.configure do |config|
|
11
|
+
|
12
|
+
end
|
metadata
ADDED
@@ -0,0 +1,130 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: bio-img_metadata
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: 0.0.1
|
5
|
+
prerelease:
|
6
|
+
platform: ruby
|
7
|
+
authors:
|
8
|
+
- Ben J. Woodcroft
|
9
|
+
autorequire:
|
10
|
+
bindir: bin
|
11
|
+
cert_chain: []
|
12
|
+
date: 2013-01-11 00:00:00.000000000 Z
|
13
|
+
dependencies:
|
14
|
+
- !ruby/object:Gem::Dependency
|
15
|
+
name: rspec
|
16
|
+
requirement: &80862770 !ruby/object:Gem::Requirement
|
17
|
+
none: false
|
18
|
+
requirements:
|
19
|
+
- - ~>
|
20
|
+
- !ruby/object:Gem::Version
|
21
|
+
version: 2.8.0
|
22
|
+
type: :development
|
23
|
+
prerelease: false
|
24
|
+
version_requirements: *80862770
|
25
|
+
- !ruby/object:Gem::Dependency
|
26
|
+
name: rdoc
|
27
|
+
requirement: &80862450 !ruby/object:Gem::Requirement
|
28
|
+
none: false
|
29
|
+
requirements:
|
30
|
+
- - ~>
|
31
|
+
- !ruby/object:Gem::Version
|
32
|
+
version: '3.12'
|
33
|
+
type: :development
|
34
|
+
prerelease: false
|
35
|
+
version_requirements: *80862450
|
36
|
+
- !ruby/object:Gem::Dependency
|
37
|
+
name: jeweler
|
38
|
+
requirement: &80861630 !ruby/object:Gem::Requirement
|
39
|
+
none: false
|
40
|
+
requirements:
|
41
|
+
- - ~>
|
42
|
+
- !ruby/object:Gem::Version
|
43
|
+
version: 1.8.4
|
44
|
+
type: :development
|
45
|
+
prerelease: false
|
46
|
+
version_requirements: *80861630
|
47
|
+
- !ruby/object:Gem::Dependency
|
48
|
+
name: bundler
|
49
|
+
requirement: &80861100 !ruby/object:Gem::Requirement
|
50
|
+
none: false
|
51
|
+
requirements:
|
52
|
+
- - ! '>='
|
53
|
+
- !ruby/object:Gem::Version
|
54
|
+
version: 1.0.21
|
55
|
+
type: :development
|
56
|
+
prerelease: false
|
57
|
+
version_requirements: *80861100
|
58
|
+
- !ruby/object:Gem::Dependency
|
59
|
+
name: bio
|
60
|
+
requirement: &80860260 !ruby/object:Gem::Requirement
|
61
|
+
none: false
|
62
|
+
requirements:
|
63
|
+
- - ! '>='
|
64
|
+
- !ruby/object:Gem::Version
|
65
|
+
version: 1.4.2
|
66
|
+
type: :development
|
67
|
+
prerelease: false
|
68
|
+
version_requirements: *80860260
|
69
|
+
- !ruby/object:Gem::Dependency
|
70
|
+
name: rdoc
|
71
|
+
requirement: &80859920 !ruby/object:Gem::Requirement
|
72
|
+
none: false
|
73
|
+
requirements:
|
74
|
+
- - ~>
|
75
|
+
- !ruby/object:Gem::Version
|
76
|
+
version: '3.12'
|
77
|
+
type: :development
|
78
|
+
prerelease: false
|
79
|
+
version_requirements: *80859920
|
80
|
+
description: Reads metadata from Integrated Microbial Genomes (IMG) metadata files
|
81
|
+
into a programmaticly useful state.
|
82
|
+
email: donttrustben near gmail.com
|
83
|
+
executables: []
|
84
|
+
extensions: []
|
85
|
+
extra_rdoc_files:
|
86
|
+
- LICENSE.txt
|
87
|
+
- README.md
|
88
|
+
files:
|
89
|
+
- .document
|
90
|
+
- .rspec
|
91
|
+
- .travis.yml
|
92
|
+
- Gemfile
|
93
|
+
- LICENSE.txt
|
94
|
+
- README.md
|
95
|
+
- Rakefile
|
96
|
+
- VERSION
|
97
|
+
- lib/bio-img_metadata.rb
|
98
|
+
- spec/bio-img_metadata_spec.rb
|
99
|
+
- spec/data/head.metadata.csv
|
100
|
+
- spec/spec_helper.rb
|
101
|
+
homepage: http://github.com/wwood/bioruby-img_metadata
|
102
|
+
licenses:
|
103
|
+
- MIT
|
104
|
+
post_install_message:
|
105
|
+
rdoc_options: []
|
106
|
+
require_paths:
|
107
|
+
- lib
|
108
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
109
|
+
none: false
|
110
|
+
requirements:
|
111
|
+
- - ! '>='
|
112
|
+
- !ruby/object:Gem::Version
|
113
|
+
version: '0'
|
114
|
+
segments:
|
115
|
+
- 0
|
116
|
+
hash: 591437005
|
117
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
118
|
+
none: false
|
119
|
+
requirements:
|
120
|
+
- - ! '>='
|
121
|
+
- !ruby/object:Gem::Version
|
122
|
+
version: '0'
|
123
|
+
requirements: []
|
124
|
+
rubyforge_project:
|
125
|
+
rubygems_version: 1.8.17
|
126
|
+
signing_key:
|
127
|
+
specification_version: 3
|
128
|
+
summary: Reads metadata from Integrated Microbial Genomes (IMG) metadata files into
|
129
|
+
a programmaticly useful state
|
130
|
+
test_files: []
|