bio-img_metadata 0.0.1

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,5 @@
1
+ lib/**/*.rb
2
+ bin/*
3
+ -
4
+ features/**/*.feature
5
+ LICENSE.txt
data/.rspec ADDED
@@ -0,0 +1 @@
1
+ --color
@@ -0,0 +1,12 @@
1
+ language: ruby
2
+ rvm:
3
+ - 1.9.2
4
+ - 1.9.3
5
+ - jruby-19mode # JRuby in 1.9 mode
6
+ - rbx-19mode
7
+ # - 1.8.7
8
+ # - jruby-18mode # JRuby in 1.8 mode
9
+ # - rbx-18mode
10
+
11
+ # uncomment this line if your project needs to run something other than `rake`:
12
+ # script: bundle exec rspec spec
data/Gemfile ADDED
@@ -0,0 +1,15 @@
1
+ source "http://rubygems.org"
2
+ # Add dependencies required to use your gem here.
3
+ # Example:
4
+ # gem "activesupport", ">= 2.3.5"
5
+
6
+ # Add dependencies to develop your gem here.
7
+ # Include everything needed to run rake, tests, features, etc.
8
+ group :development do
9
+ gem "rspec", "~> 2.8.0"
10
+ gem "rdoc", "~> 3.12"
11
+ gem "jeweler", "~> 1.8.4"
12
+ gem "bundler", ">= 1.0.21"
13
+ gem "bio", ">= 1.4.2"
14
+ gem "rdoc", "~> 3.12"
15
+ end
@@ -0,0 +1,20 @@
1
+ Copyright (c) 2013 Ben J. Woodcroft
2
+
3
+ Permission is hereby granted, free of charge, to any person obtaining
4
+ a copy of this software and associated documentation files (the
5
+ "Software"), to deal in the Software without restriction, including
6
+ without limitation the rights to use, copy, modify, merge, publish,
7
+ distribute, sublicense, and/or sell copies of the Software, and to
8
+ permit persons to whom the Software is furnished to do so, subject to
9
+ the following conditions:
10
+
11
+ The above copyright notice and this permission notice shall be
12
+ included in all copies or substantial portions of the Software.
13
+
14
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
15
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
16
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
17
+ NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
18
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
19
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
20
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
@@ -0,0 +1,52 @@
1
+ # bio-img_metadata
2
+
3
+ [![Build Status](https://secure.travis-ci.org/wwood/bioruby-img_metadata.png)](http://travis-ci.org/wwood/bioruby-img_metadata)
4
+
5
+ Reads metadata from Integrated Microbial Genomes (IMG) metadata files. Metadata files are generated by searching for one or more taxons, and then exporting various/all genome-specific characters e.g. kingdom, genus, temperature range, taxon identifier, etc.
6
+
7
+ ## Installation
8
+
9
+ ```sh
10
+ gem install bio-img_metadata
11
+ ```
12
+
13
+ ## Usage
14
+
15
+ ```ruby
16
+ require 'bio-img_metadata'
17
+
18
+ d = Bio::IMG::Metadata.read(File.join DATA_DIR, 'head.metadata.csv') #=> an Array of Bio::IMG::Metadata objects
19
+
20
+ d.length.should == 9 #=> The array has 9 members, one for each line in the metadata file
21
+ d[0].kind_of?(Bio::IMG::Lineage).should == true #=> Each lineage's object
22
+
23
+ d[0].domain.should == 'Archaea' #=> some attributes are now methods (mostly the taxonomy-related ones)
24
+ d[1].taxon_id.should == 2515075008
25
+
26
+ d[0].attributes['Status'].should == 'Finished' #=> the rest are in the attributes array
27
+ ```
28
+
29
+ ## Project home page
30
+
31
+ Information on the source tree, documentation, examples, issues and
32
+ how to contribute, see
33
+
34
+ http://github.com/wwood/bioruby-img_metadata
35
+
36
+ The BioRuby community is on IRC server: irc.freenode.org, channel: #bioruby.
37
+
38
+ ## Cite
39
+
40
+ If you use this software, please cite one of
41
+
42
+ * [BioRuby: bioinformatics software for the Ruby programming language](http://dx.doi.org/10.1093/bioinformatics/btq475)
43
+ * [Biogem: an effective tool-based approach for scaling up open source software development in bioinformatics](http://dx.doi.org/10.1093/bioinformatics/bts080)
44
+
45
+ ## Biogems.info
46
+
47
+ This Biogem is published at (http://biogems.info/index.html#bio-img_metadata)
48
+
49
+ ## Copyright
50
+
51
+ Copyright (c) 2013 Ben J. Woodcroft. See LICENSE.txt for further details.
52
+
@@ -0,0 +1,49 @@
1
+ # encoding: utf-8
2
+
3
+ require 'rubygems'
4
+ require 'bundler'
5
+ begin
6
+ Bundler.setup(:default, :development)
7
+ rescue Bundler::BundlerError => e
8
+ $stderr.puts e.message
9
+ $stderr.puts "Run `bundle install` to install missing gems"
10
+ exit e.status_code
11
+ end
12
+ require 'rake'
13
+
14
+ require 'jeweler'
15
+ Jeweler::Tasks.new do |gem|
16
+ # gem is a Gem::Specification... see http://docs.rubygems.org/read/chapter/20 for more options
17
+ gem.name = "bio-img_metadata"
18
+ gem.homepage = "http://github.com/wwood/bioruby-img_metadata"
19
+ gem.license = "MIT"
20
+ gem.summary = %Q{Reads metadata from Integrated Microbial Genomes (IMG) metadata files into a programmaticly useful state}
21
+ gem.description = %Q{Reads metadata from Integrated Microbial Genomes (IMG) metadata files into a programmaticly useful state.}
22
+ gem.email = "donttrustben near gmail.com"
23
+ gem.authors = ["Ben J. Woodcroft"]
24
+ # dependencies defined in Gemfile
25
+ end
26
+ Jeweler::RubygemsDotOrgTasks.new
27
+
28
+ require 'rspec/core'
29
+ require 'rspec/core/rake_task'
30
+ RSpec::Core::RakeTask.new(:spec) do |spec|
31
+ spec.pattern = FileList['spec/**/*_spec.rb']
32
+ end
33
+
34
+ RSpec::Core::RakeTask.new(:rcov) do |spec|
35
+ spec.pattern = 'spec/**/*_spec.rb'
36
+ spec.rcov = true
37
+ end
38
+
39
+ task :default => :spec
40
+
41
+ require 'rdoc/task'
42
+ Rake::RDocTask.new do |rdoc|
43
+ version = File.exist?('VERSION') ? File.read('VERSION') : ""
44
+
45
+ rdoc.rdoc_dir = 'rdoc'
46
+ rdoc.title = "bio-img_metadata #{version}"
47
+ rdoc.rdoc_files.include('README*')
48
+ rdoc.rdoc_files.include('lib/**/*.rb')
49
+ end
data/VERSION ADDED
@@ -0,0 +1 @@
1
+ 0.0.1
@@ -0,0 +1,107 @@
1
+ require 'csv'
2
+
3
+ module Bio
4
+ class Lineage
5
+ attr_accessor :taxon_id, :domain, :kingdom, :phylum, :class_name, :order, :family, :genus, :species
6
+
7
+ def genus_species
8
+ @genus+' '+@species
9
+ end
10
+ end
11
+ end
12
+
13
+ module Bio
14
+ module IMG
15
+ class Lineage < Bio::Lineage
16
+ attr_accessor :definition_line
17
+
18
+ # Hash of key => value for each column
19
+ attr_accessor :attributes
20
+ end
21
+
22
+ # Acts like an array of Bio::IMG::Lineage objects
23
+ #
24
+ # Use TaxonomyDefinitionFile#read to read in a file downloaded through the IMG export system
25
+ class Metadata < Array
26
+ FIELD_NAMES_TO_CLASSIFICATIONS = {
27
+ 'taxon_oid' => :taxon_id,
28
+ 'Domain' => :domain,
29
+ 'Phylum' => :phylum,
30
+ 'Class' => :class_name,
31
+ 'Order' => :order,
32
+ 'Family' => :family,
33
+ 'Genus' => :genus,
34
+ 'Species' => :species,
35
+ }
36
+
37
+ # Reads an img_taxonomy_file into a new TaxonomyDefinitionFile object.
38
+ # This object is then an array of Bio::IMG::Lineage objects from that file
39
+ def self.read(img_taxonomy_filename_path)
40
+ all_lineages = TaxonomyDefinitionFile.new
41
+
42
+ # taxon_oid Domain Status Genome Name Phylum Class Order Family Genus Species Strain Release Date IMG Release
43
+ # 650716001 Archaea Finished Acidianus hospitalis W1 Crenarchaeota Thermoprotei Sulfolobales Sulfolobaceae Acidianus hospitalis W1 2011-12-01 IMG/W 3.5
44
+ # 648028003 Archaea Finished Acidilobus saccharovorans 345-15 Crenarchaeota Thermoprotei Acidilobales Acidilobaceae Acidilobus saccharovorans 345-15 2011-01-01 IMG/W 3.3
45
+ # 646564501 Archaea Finished Aciduliprofundum boonei T469 Euryarchaeota Thermoplasmata Thermoplasmatales Aciduloprofundaceae Aciduliprofundum boonei T469 2010-08-01 IMG/
46
+ #
47
+ # Have to use a simple line#split because regular CSV class is narky and IMG metadata files aren't perfectly respetable
48
+ headers = nil
49
+ header_indices = {}
50
+ File.open(img_taxonomy_filename_path).each_line do |line|
51
+ row = line.chomp.split("\t")
52
+
53
+ # If this is the first header row
54
+ if headers==nil
55
+ headers = row
56
+ FIELD_NAMES_TO_CLASSIFICATIONS.each do |header, attribute|
57
+ header_indices[header] = headers.index(header)
58
+ end
59
+
60
+ else
61
+ lineage = Bio::IMG::Lineage.new
62
+ lineage.definition_line = line.chomp
63
+
64
+ # 0# 650716001
65
+ # 1# Archaea
66
+ # 2# Finished
67
+ # 3# Acidianus hospitalis W1
68
+ # 4# Crenarchaeota
69
+ # 5# Thermoprotei
70
+ # 6# Sulfolobales
71
+ # 7# Sulfolobaceae
72
+ # 8# Acidianus
73
+ # 9# hospitalis
74
+ # 10# W1
75
+ # 11# 2011-12-01
76
+ # 12# IMG/W 3.5
77
+ FIELD_NAMES_TO_CLASSIFICATIONS.each do |header, attribute|
78
+ value = row[header_indices[header]]
79
+ value = value.to_i if attribute == :taxon_id
80
+ lineage.send "#{attribute}=".to_sym, value
81
+ end
82
+
83
+ lineage.attributes = {}
84
+ row.each_with_index do |col, i|
85
+ lineage.attributes[headers[i]] = col
86
+ end
87
+
88
+ all_lineages.push lineage
89
+ end
90
+ end
91
+
92
+ return all_lineages
93
+ end
94
+
95
+ # Return a hash, indexed by taxon_oid
96
+ def to_hash
97
+ hash = {}
98
+ each do |taxon|
99
+ hash[taxon.taxon_id] = taxon
100
+ end
101
+ return hash
102
+ end
103
+ end
104
+
105
+ TaxonomyDefinitionFile = Metadata #Some backwards compatibility
106
+ end
107
+ end
@@ -0,0 +1,20 @@
1
+ require File.expand_path(File.dirname(__FILE__) + '/spec_helper')
2
+
3
+ describe "BioImgMetadata" do
4
+ DATA_DIR = File.join File.dirname(__FILE__), 'data'
5
+
6
+ it "simple" do
7
+ d = Bio::IMG::TaxonomyDefinitionFile.read(File.join DATA_DIR, 'head.metadata.csv')
8
+ d.kind_of?(Array).should == true
9
+ d.length.should == 9
10
+ d[0].kind_of?(Bio::Lineage).should == true
11
+
12
+ d[0].domain.should == 'Archaea'
13
+ d[0].attributes['Status'].should == 'Finished'
14
+ d[1].taxon_id.should == 2515075008
15
+ end
16
+
17
+ it 'should have class equivalency TaxonomyDefinitionFile, MetadataFile' do
18
+ Bio::IMG::TaxonomyDefinitionFile.should == Bio::IMG::Metadata
19
+ end
20
+ end
@@ -0,0 +1,10 @@
1
+ taxon_oid Domain Status Proposal Name Genome Name / Sample Name Sequencing Center Phylum Class Order Family Genus Species Genome ID NCBI Taxon ID RefSeq Project ID GenBank Project ID Strain Funding Agency Add Date Is Public Release Date IMG Release IMG Product Assignment IMG Submission ID Proposal GOLD ID Altitude Biotic Relationships Body Site Body Subsite Cell Arrangement Cell Shape Contact Email Contact Name Diseases Ecosystem Ecosystem Category Ecosystem Subtype Ecosystem Type Energy Source Funding Program Geographic Location Gram Staining Host Gender Host Name IMG Project ID Isolation Isolation Country Latitude Longitude Metabolism Motility Oxygen Requirement Phenotype Relevance Salinity Specific Ecosystem Sporulation Temperature Range Uncultured Type Genome Size (Number of total bases) Gene Count (Number of total Genes) Scaffold Count (Number of scaffolds) CRISPR Count (Number of CRISPRs) GC Count (Number of GC) GC % (Percentage of GC) Coding Base Count (Total number of coding bases) CDS Count (Number of CDS genes) CDS % (Percentage of CDS genes) RNA Count (Number of RNA genes) RNA % rRNA Count (Number of rRNA genes) 5S rRNA Count (Number of 5S rRNAs) 16S rRNA Count (Number of 16S rRNAs) 18S rRNA Count (Number of 18S rRNAs) 23S rRNA Count (Number of 23S rRNAs) 28S rRNA Count (Number of 28S rRNAs) tRNA Count (Number of tRNA genes) Other RNA Count (Number of other unclassified RNA genes) Pseudo Genes Count (Number of pseudo genes) Pseudo Genes % (Percentage of pseudo genes) Unchar Count (Number of uncharacerized genes) Unchar % (Percentage of uncharacterized genes) Dubious Count Dubious % w/ Func Pred Count (Number of genes with predicted protein product) w/ Func Pred % (Percentage of genes with predicted protein product) w/o Func Pred Sim Count (Number of genes without function prediction with similarity) w/o Func Pred Sim % (Percentage of genes without predicted protein product with similarity) w/o Func Pred No Sim Count (Number of genes without function prediction without similarity) w/o Func Pred No Sim % (Percentage of genes without function prediction without similarity) Orthologs Count Orthologs %` Paralogs Count Paralogs % Obsolete Count (Number of obsolete genes) Obsolete % (Percentage of obsolete genes) Revised Count (Number of revised genes) Revised % (Percentage of revised genes) Fused Count (Number of fused genes) Fused % (Percentage of fused genes) Fusion Component Count (Number of genes involved as fusion components) Fusion component % (Genes involved as fusion components percentage) SwissProt Count (Number of genes in SwissProt protein product) SwissProt % (Percentage of genes in SwissProt protein product) Not SwissProt Count (Number of genes not in SwissProt protein product) Not SwissProt % (Percentage of genes not in SwissProt protein product) SEED Count (Number of genes in SEED) SEED % (Percentage of genes in SEED) Not SEED Count (Number of genes not in SEED) Not SEED % (Percentage of genes not in SEED) COG Count (Number of genes in COG) COG % (Percentage of genes in COG) KOG Count (Number of genes in KOG) KOG % (Percentage of genes in KOG) Pfam Count (Number of genes in Pfam) Pfam % (Percentage of genes in Pfam) TIGRfam Count (Number of genes in TIGRfam) TIGRfam % (Percentage of genes in TIGRfam) InterPro Count (Number of genes in InterPro) InterPro % (Percentage of genes in InterPro) Enzyme Count (Number of genes assigned to enzymes) Enzyme % (Percentage of genes assigned to enzymes) TC Count (Number of genes assigned to Transporter Classification) TC % (Percentage of genes assigned to Transporter Classification) KEGG Count (Number of genes in KEGG) KEGG % (Percentage of genes in KEGG) Not KEGG Count (Number of genes not in KEGG) Not KEGG % (Percentage of genes not in KEGG) KO Count (Number of genes in KEGG Orthology (KO)) KO % (Percentage of genes in KEGG Orthology (KO)) Not KO Count (Number of genes not in KEGG Orthology (KO)) Not KO % (Percentage of genes not in KEGG Orthology (KO)) MetaCyc Count (Number of genes in MetaCyc) MetaCyc % (Percentage of genes in MetaCyc) Not MetaCyc Count (Number of genes not in MetaCyc) Not MetaCyc % (Percentage of genes not in MetaCyc IMG Term Count (Number of genes with IMG terms) IMG Term % (Percentage of genes with IMG terms) IMG Pathwawy Count (Number of genes in IMG pathwawys) IMG Pathway % (Percentage of genes in IMG pathways) IMG Parts List Count (Number of genes in IMG parts list) IMG Parts List % (Percentage of genes in IMG parts list) MyIMG Annotation Count MyIMG Annotation % Signal Peptide Count (Number of genes coding signal peptides) Signal Peptide % (Percentage of genes coding signal peptides) Transmembrane Count (Number of genes coding transmembrane proteins) Transmembrane % (Percentage of genes coding transmembrane proteins) Horizontally Transferred Count Horizontally Transferred % Genome Property Count (Number of genes in Genome Properties) Genome Property % (Percentage of genes in Genome Properties) Ortholog Group Count Paralog Group Count COG Cluster Count (Number of COG clusters) KOG Cluster Count (Number of KOG clusters) Pfam Cluster Count (Number of Pfam clusters) TIGRfam Cluster Count (Number of TIGRfam clusters) IMG Cluster Count IMG Cluster % Chromosomal Cassette Gene Count Chromosomal Cassette Gene % Chromosomal Cassette Count
2
+ 644736411 Archaea Finished Thermococcus gammatolerans EJ3 Thermococcus gammatolerans EJ3 DOE Joint Genome Institute Euryarchaeota Thermococci Thermococcales Thermococcaceae Thermococcus gammatolerans 644736411 593117 59389 33671 EJ3 2009-10-13 Yes 2009-12-01 IMG/W 3.0 Gc01034 Free living Coccus-shaped NCKyrpides@lbl.gov NIKOS None Organotroph oil wells of the Samotlor oil reservoir Gram- 14544 hydrothermal vent chimneys located in the mid-Atlantic Ridge and Guyamas basin. Motile Anaerobe Radiation resistant Biotechnological Nonsporulating Hyperthermophile 2045438 2206 1 1095567 0.54 1910049 2156 97.73 50 2.27 4 2 1 0 1 0 46 0 0 0 0 0 0 0 1427 64.69 1030 46.69 0 0 0 0 99 4.49 708 32.09 178 8.07 1978 89.66 2 0.09 2154 97.64 1524 69.08 757 34.32 1655 75.02 554 25.11 1681 76.2 472 21.4 199 9.02 529 23.98 1627 73.75 1047 47.46 1109 50.27 450 20.4 1706 77.33 554 25.11 161 7.3 193 8.75 0 0 285 12.92 561 25.43 70 3.17 0 0 1071 525 1156 505 2206 100 66
3
+ 2515075008 Archaea Permanent Draft Continuation of the Genomic Encyclopedia of Bacteria and Archaea pilot project Methanolobus tindarius DSM 2278 DOE Joint Genome Institute Euryarchaeota Methanomicrobia Methanosarcinales Methanosarcinaceae Methanolobus 2515075008 1090322 DOE; DSMZ 2012-06-29 Yes 2012-06-29 Yes 9393 Gi02920 Free living Coccus-shaped NCKyrpides@lbl.gov NIKOS None DOE-CSP 2011 Sediment from Lakes of Marinello, Italy Gram- 13501 Sediment from Lakes of Marinello, Italy Italy 38.132591 15.05422 Methanogen Anaerobe Tree of Life, GEBA Mesophile 3151883 3022 1 1254128 0.4 2767116 2957 97.85 65 2.15 9 3 3 0 3 0 50 6 34 1.13 0 0 0 0 2184 72.27 1124 37.19 0 0 0 0 5 0.17 0 0 0 0 2957 97.85 0 0 2957 97.85 2204 72.93 974 32.23 2219 73.43 889 29.42 2282 75.51 643 21.28 303 10.03 776 25.68 2181 72.17 1354 44.8 1603 53.04 624 20.65 2333 77.2 641 21.21 226 7.48 202 6.68 0 0 441 14.59 782 25.88 0 0 0 0 1352 589 1459 711 3022 100 256
4
+ 2512047039 Archaea Finished Pyrobaculum oguniense TE7 Pyrobaculum oguniense TE7, DSM 13380 UCSC Crenarchaeota Thermoprotei Thermoproteales Thermoproteaceae Pyrobaculum 2512047039 698757 84411 42375 2012-03-23 Yes 2012-03-23 No 7574 Gc02118 Rod-shaped NCKyrpides@lbl.gov NIKOS Heterotroph Oguni-cho, Kumamoto, Japan 559 Terrestrial hot spring at Oguni-cho, Kumamoto, Japan Japan 32.806322 130.718651 Facultative Hyperthermophile 2452920 3014 2 1350357 0.55 2154879 2869 95.19 145 4.81 3 1 1 0 1 0 48 94 34 1.13 0 0 0 0 1709 56.7 931 30.89 0 0 0 0 3 0.1 0 0 0 0 2869 95.19 0 0 2869 95.19 1757 58.29 790 26.21 1743 57.83 494 16.39 1804 59.85 559 18.55 210 6.97 670 22.23 2199 72.96 1107 36.73 1762 58.46 539 17.88 2330 77.31 485 16.09 168 5.57 168 5.57 0 0 775 25.71 630 20.9 0 0 0 0 1105 549 1100 460 3014 100 206
5
+ 650716053 Archaea Finished Genome sequencing and comparison of novel methanogens from peatlands and bioreactors Methanobacterium sp. SWAN-1 DOE Joint Genome Institute Euryarchaeota Methanobacteria Methanobacteriales Methanobacteriaceae Methanobacterium sp. 650716053 868131 67359 53561 SWAN-1 2011-08-22 Yes 2011-12-01 IMG/W 3.5 Gc01780 Free living ipagani@lbl.gov Ioanna Pagani None Lithotroph DOE-CSP 2010 Gram- 17802 Methanogen Bioenergy, Biotechnological, Comparative analysis Mesophile 2546541 2500 1 909924 0.36 2010186 2442 97.68 58 2.32 9 3 3 0 3 0 47 2 45 1.8 0 0 0 0 1536 61.44 1151 46.04 0 0 0 0 43 1.72 0 0 0 0 2442 97.68 0 0 2442 97.68 1838 73.52 787 31.48 1879 75.16 704 28.16 1869 74.76 576 23.04 205 8.2 707 28.28 1735 69.4 1149 45.96 1293 51.72 553 22.12 1889 75.56 490 19.6 176 7.04 162 6.48 0 0 490 19.6 589 23.56 240 9.6 0 0 1218 509 1288 625 2500 100 456
6
+ 650377982 Archaea Finished Sulfolobus islandicus REY15A Sulfolobus islandicus REY15A Los Alamos National Lab Crenarchaeota Thermoprotei Sulfolobales Sulfolobaceae Sulfolobus islandicus 650377982 930945 162071 60485 REY15A 2011-03-26 Yes 2011-07-01 IMG/W 3.4 Gc01624 Free living Singles Coccus-shaped ipagani@lbl.gov Ioanna Pagani None Lithotroph Gram- 18556 Nonmotile Facultative Acidophile Biotechnological Nonsporulating Hyperthermophile 2522992 2753 1 890812 0.35 2177858 2644 96.04 109 3.96 4 1 1 0 1 0 45 60 0 0 0 0 0 0 1812 65.82 1184 43.01 0 0 0 0 97 3.52 0 0 0 0 2644 96.04 55 2 2589 94.04 1914 69.52 935 33.96 1922 69.81 588 21.36 1983 72.03 577 20.96 216 7.85 643 23.36 2001 72.68 1056 38.36 1588 57.68 558 20.27 2086 75.77 191 6.94 104 3.78 57 2.07 0 0 474 17.22 577 20.96 5 0.18 0 0 1122 579 1165 496 2753 100 252
7
+ 649633040 Archaea Finished A Genomic Encyclopedia of Bacteria and Archaea (GEBA) Desulfurococcus mucosus 07/1, DSM 2162 DOE Joint Genome Institute Crenarchaeota Thermoprotei Desulfurococcales Desulfurococcaceae Desulfurococcus mucosus 649633040 765177 62227 48641 DSM 2162 2011-03-26 Yes 2011-07-01 IMG/W 3.4 Gc01595 1,053 m Free living Sphere-shaped NCKyrpides@lbl.gov NIKOS None Organotroph DOE-GEBA 2007 Askja, Iceland Gram- 13494 Hot solfataric spring; Iceland Iceland 65.05 -16.8 Sulfur respiration Nonmotile Anaerobe Tree of Life, Biotechnological, GEBA Nonsporulating Hyperthermophile 1314639 1421 1 698621 0.53 1186786 1371 96.48 50 3.52 3 1 1 0 1 0 46 1 26 1.83 0 0 0 0 934 65.73 438 30.82 0 0 0 0 48 3.38 0 0 0 0 1371 96.48 0 0 1371 96.48 1075 75.65 541 38.07 1042 73.33 346 24.35 1057 74.38 329 23.15 138 9.71 406 28.57 965 67.91 754 53.06 617 43.42 320 22.52 1051 73.96 234 16.47 69 4.86 90 6.33 0 0 323 22.73 294 20.69 63 4.43 0 0 799 418 838 329 1421 100 61
8
+ 649989953 Archaea Draft Haladaptatus paucihalophilus DX253 Haladaptatus paucihalophilus DX253 Oklahoma State University Euryarchaeota Halobacteria Halobacteriales Halobacteriaceae Haladaptatus paucihalophilus 649989953 797209 62523 50445 DX253 2011-03-26 Yes 2011-07-01 IMG/W 3.4 Gi07306 Clusters Coccus-shaped ipagani@lbl.gov Ioanna Pagani None Chemoorganotroph Zodletone Spring, Oklahoma Gram- 17533 Low-salt, sulfide- and sulfur-rich spring (Zodletone Spring) in south-western Oklahoma, USA USA 34.996 -98.688 Aerobe Biotechnological Halophile Mesophile 4284805 4496 32 2648659 0.62 3649979 4443 98.82 53 1.18 6 2 3 0 1 0 47 0 0 0 0 0 1 0.02 2528 56.23 2436 54.18 0 0 1 0 246 5.47 0 0 0 0 4443 98.82 0 0 4443 98.82 2986 66.41 1391 30.94 3008 66.9 836 18.59 3097 68.88 831 18.48 493 10.97 950 21.13 3493 77.69 1690 37.59 2753 61.23 809 17.99 3634 80.83 780 17.35 276 6.14 260 5.78 0 0 1106 24.6 1111 24.71 152 3.38 0 0 1401 708 1521 634 4496 100 445
9
+ 638154510 Archaea Finished Methanothermobacter thermoautotrophicus Delta H Methanothermobacter thermoautotrophicus Delta H J. Craig Venter Institute Euryarchaeota Methanobacteria Methanobacteriales Methanobacteriaceae Methanothermobacter thermautotrophicus 638154510 187420 57877 289 str. Delta H DOE 2006-10-02 Yes 2006-12-01 IMG/W 2.0 Gc00009 Free living NCKyrpides@lbl.gov NIKOS None Lithoautotroph, Autotroph, Lithotroph Sewage sludge in 1971 in Urbana Illinois Gram- 10677 Sewage sludge in 1971 in Urbana Illinois USA 40.106649 -88.195627 Methanogen Nonmotile Obligate anaerobe Biotechnological, Energy production Nonsporulating Thermophile 1751377 1893 1 867701 0.5 1570117 1845 97.46 48 2.54 7 3 2 0 2 0 39 2 23 1.22 0 0 35 1.85 1362 71.95 929 49.08 87 5 155 8 108 5.71 726 38.35 191 10.09 1654 87.37 1106 58.43 739 39.04 1458 77.02 637 33.65 1529 80.77 631 33.33 1544 81.56 533 28.16 147 7.77 637 33.65 1208 63.81 1047 55.31 798 42.16 514 27.15 1331 70.31 630 33.28 224 11.83 192 10.14 2 0.11 157 8.29 188 9.93 90 4.75 0 0 1125 477 1193 608 1980 104.6 63
10
+ 637000162 Archaea Finished Methanosarcina barkeri fusaro Methanosarcina barkeri Fusaro, DSM 804 DOE Joint Genome Institute Euryarchaeota Methanomicrobia Methanosarcinales Methanosarcinaceae Methanosarcina barkeri 637000162 269797 57715 103 str. fusaro DOE 2006-10-02 Yes 2006-12-01 IMG/W 2.0 Gc00295 Free living Singles Coccus-shaped NCKyrpides@lbl.gov NIKOS None Lithotroph DOEM 2001 Naples Italy Gram- 10390 Mud samples from Lago del Fusaro Lake in Naples Italy Italy 40.844 14.251 Methanogen Nonmotile Anaerobe Energy production, Environmental, Biotechnological, Carbon cycle Nonsporulating Mesophile 4873766 3834 2 1912156 0.39 3406841 3758 98.02 76 1.98 9 3 3 0 3 0 62 5 134 3.5 0 0 3 0.08 1820 47.47 2631 68.62 0 0 3 0 256 6.68 1567 40.87 329 8.58 3429 89.44 3631 94.71 127 3.31 2703 70.5 1153 30.07 2927 76.34 1026 26.76 2843 74.15 756 19.72 436 11.37 900 23.47 2858 74.54 1619 42.23 2139 55.79 733 19.12 3025 78.9 811 21.15 271 7.07 255 6.65 1 0.03 543 14.16 957 24.96 305 7.96 0 0 1409 629 1524 749 3834 100 887
@@ -0,0 +1,12 @@
1
+ $LOAD_PATH.unshift(File.join(File.dirname(__FILE__), '..', 'lib'))
2
+ $LOAD_PATH.unshift(File.dirname(__FILE__))
3
+ require 'rspec'
4
+ require 'bio-img_metadata'
5
+
6
+ # Requires supporting files with custom matchers and macros, etc,
7
+ # in ./support/ and its subdirectories.
8
+ Dir["#{File.dirname(__FILE__)}/support/**/*.rb"].each {|f| require f}
9
+
10
+ RSpec.configure do |config|
11
+
12
+ end
metadata ADDED
@@ -0,0 +1,130 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: bio-img_metadata
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.0.1
5
+ prerelease:
6
+ platform: ruby
7
+ authors:
8
+ - Ben J. Woodcroft
9
+ autorequire:
10
+ bindir: bin
11
+ cert_chain: []
12
+ date: 2013-01-11 00:00:00.000000000 Z
13
+ dependencies:
14
+ - !ruby/object:Gem::Dependency
15
+ name: rspec
16
+ requirement: &80862770 !ruby/object:Gem::Requirement
17
+ none: false
18
+ requirements:
19
+ - - ~>
20
+ - !ruby/object:Gem::Version
21
+ version: 2.8.0
22
+ type: :development
23
+ prerelease: false
24
+ version_requirements: *80862770
25
+ - !ruby/object:Gem::Dependency
26
+ name: rdoc
27
+ requirement: &80862450 !ruby/object:Gem::Requirement
28
+ none: false
29
+ requirements:
30
+ - - ~>
31
+ - !ruby/object:Gem::Version
32
+ version: '3.12'
33
+ type: :development
34
+ prerelease: false
35
+ version_requirements: *80862450
36
+ - !ruby/object:Gem::Dependency
37
+ name: jeweler
38
+ requirement: &80861630 !ruby/object:Gem::Requirement
39
+ none: false
40
+ requirements:
41
+ - - ~>
42
+ - !ruby/object:Gem::Version
43
+ version: 1.8.4
44
+ type: :development
45
+ prerelease: false
46
+ version_requirements: *80861630
47
+ - !ruby/object:Gem::Dependency
48
+ name: bundler
49
+ requirement: &80861100 !ruby/object:Gem::Requirement
50
+ none: false
51
+ requirements:
52
+ - - ! '>='
53
+ - !ruby/object:Gem::Version
54
+ version: 1.0.21
55
+ type: :development
56
+ prerelease: false
57
+ version_requirements: *80861100
58
+ - !ruby/object:Gem::Dependency
59
+ name: bio
60
+ requirement: &80860260 !ruby/object:Gem::Requirement
61
+ none: false
62
+ requirements:
63
+ - - ! '>='
64
+ - !ruby/object:Gem::Version
65
+ version: 1.4.2
66
+ type: :development
67
+ prerelease: false
68
+ version_requirements: *80860260
69
+ - !ruby/object:Gem::Dependency
70
+ name: rdoc
71
+ requirement: &80859920 !ruby/object:Gem::Requirement
72
+ none: false
73
+ requirements:
74
+ - - ~>
75
+ - !ruby/object:Gem::Version
76
+ version: '3.12'
77
+ type: :development
78
+ prerelease: false
79
+ version_requirements: *80859920
80
+ description: Reads metadata from Integrated Microbial Genomes (IMG) metadata files
81
+ into a programmaticly useful state.
82
+ email: donttrustben near gmail.com
83
+ executables: []
84
+ extensions: []
85
+ extra_rdoc_files:
86
+ - LICENSE.txt
87
+ - README.md
88
+ files:
89
+ - .document
90
+ - .rspec
91
+ - .travis.yml
92
+ - Gemfile
93
+ - LICENSE.txt
94
+ - README.md
95
+ - Rakefile
96
+ - VERSION
97
+ - lib/bio-img_metadata.rb
98
+ - spec/bio-img_metadata_spec.rb
99
+ - spec/data/head.metadata.csv
100
+ - spec/spec_helper.rb
101
+ homepage: http://github.com/wwood/bioruby-img_metadata
102
+ licenses:
103
+ - MIT
104
+ post_install_message:
105
+ rdoc_options: []
106
+ require_paths:
107
+ - lib
108
+ required_ruby_version: !ruby/object:Gem::Requirement
109
+ none: false
110
+ requirements:
111
+ - - ! '>='
112
+ - !ruby/object:Gem::Version
113
+ version: '0'
114
+ segments:
115
+ - 0
116
+ hash: 591437005
117
+ required_rubygems_version: !ruby/object:Gem::Requirement
118
+ none: false
119
+ requirements:
120
+ - - ! '>='
121
+ - !ruby/object:Gem::Version
122
+ version: '0'
123
+ requirements: []
124
+ rubyforge_project:
125
+ rubygems_version: 1.8.17
126
+ signing_key:
127
+ specification_version: 3
128
+ summary: Reads metadata from Integrated Microbial Genomes (IMG) metadata files into
129
+ a programmaticly useful state
130
+ test_files: []