RubyGems - publisci - Versions diffs - 0.1.3 → 0.1.4 - Mend

publisci 0.1.3 → 0.1.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (36) hide show

checksums.yaml +4 -4
data/Gemfile +2 -2
data/LICENSE.txt +19 -17
data/README.md +41 -8
data/README.rdoc +3 -5
data/Rakefile +2 -2
data/bin/publisci +9 -7
data/examples/visualization/prov_viz.rb +1 -1
data/lib/publisci.rb +19 -11
data/lib/publisci/datacube_model.rb +2 -2
data/lib/publisci/dataset/ORM/data_cube_orm.rb +2 -2
data/lib/publisci/dataset/data_cube.rb +1 -1
data/lib/publisci/dataset/dataset_for.rb +6 -1
data/lib/publisci/dataset/interactive.rb +1 -46
data/lib/publisci/generators/base.rb +22 -0
data/lib/publisci/generators/maf.rb +172 -0
data/lib/publisci/metadata/generator.rb +1 -1
data/lib/publisci/parser.rb +62 -62
data/lib/publisci/parsers/base.rb +29 -0
data/lib/publisci/parsers/maf.rb +20 -0
data/lib/publisci/readers/arff.rb +43 -43
data/lib/publisci/readers/base.rb +2 -2
data/lib/publisci/readers/csv.rb +2 -1
data/lib/publisci/readers/maf.rb +15 -181
data/lib/publisci/readers/r_matrix.rb +143 -143
data/lib/publisci/writers/arff.rb +1 -1
data/lib/publisci/writers/base.rb +1 -1
data/resources/maf_rdf.ttl +98 -22
data/spec/ORM/data_cube_orm_spec.rb +1 -1
data/spec/ORM/prov_model_spec.rb +3 -3
data/spec/dataset_for_spec.rb +1 -1
data/spec/generators/maf_spec.rb +2 -1
data/spec/maf_query_spec.rb +1 -1
metadata +25 -23
data/lib/r2rdf.rb +0 -226
data/lib/template_bak.rb +0 -12

checksums.yaml CHANGED

@@ -1,7 +1,7 @@
 ---
 SHA1:
-  metadata.gz: 992e9282183a5b68f85d5835b91c761a53dd53f2
-  data.tar.gz: 5764fee6d82a2a8fc6b8835cc3fc0383ab2ad6eb
+  metadata.gz: b2b51e26fd60c38dfa2f7b4d24b7d49f7d874d7c
+  data.tar.gz: 930163fafca5f08a3a0bef75a1defc325fcae8d6
 SHA512:
-  metadata.gz: 186f61e059fbb8d60038c2181829dc8f231af53dd87aaa3556a1ecb59a7e1289d62dc2d682a385e0d0d700b8533f8f4c50235fb6e4f35879407d2703e5a6fdcd
-  data.tar.gz: 341714efc592cbd634f2b427f37a72e6151902184e455028217d3c04d1cf3ee1b589cebb1da9cfde0055f612eb42743a7718f16e09a2d556feef430700fe1e1f
+  metadata.gz: 77c57742c7300988650337353c4401d652cd2c4e3a2e34daffc0e00216d501b9624b0696a74c4ce79d1cb13a06bf62dfc570ec6580fb5dd6faea4afb33ff32aa
+  data.tar.gz: af000fd327fcd04f422fccd40c3fcf3a0a1478fff2aa5c7b447c207d130d62893bdd1963123e80b3acb070526b4842e8524be97eaf51e531ddb4c8abac1f33da

data/Gemfile CHANGED

@@ -7,12 +7,12 @@ source "http://rubygems.org"
 # Include everything needed to run rake, tests, features, etc.
 group :development do
   gem "rspec", "~> 2.8.0"
-  gem "rdoc", "~> 3.12"
   gem "cucumber", ">= 0"
   gem "jeweler", "~> 1.8.4", :git => "https://github.com/technicalpickles/jeweler.git"
   gem "bundler", ">= 1.0.21"
   gem "bio", ">= 1.4.2"
-  gem "rdoc", "~> 3.12"
+  gem "rdoc"
+  gem "pry"
   gem "spoon"
 end

data/LICENSE.txt CHANGED

@@ -1,20 +1,22 @@
-Copyright (c) 2013 wstrinz
+Copyright (c) 2013, Will Strinz
+All rights reserved.
-Permission is hereby granted, free of charge, to any person obtaining
-a copy of this software and associated documentation files (the
-"Software"), to deal in the Software without restriction, including
-without limitation the rights to use, copy, modify, merge, publish,
-distribute, sublicense, and/or sell copies of the Software, and to
-permit persons to whom the Software is furnished to do so, subject to
-the following conditions:
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are met:
-The above copyright notice and this permission notice shall be
-included in all copies or substantial portions of the Software.
+1. Redistributions of source code must retain the above copyright notice, this
+   list of conditions and the following disclaimer.
+2. Redistributions in binary form must reproduce the above copyright notice,
+   this list of conditions and the following disclaimer in the documentation
+   and/or other materials provided with the distribution.
-THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
-EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
-MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
-NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
-LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
-OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
-WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
+ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
+ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

data/README.md CHANGED

@@ -2,9 +2,7 @@
 [![Build Status](https://travis-ci.org/wstrinz/publisci.png?branch=master)](https://travis-ci.org/wstrinz/publisci)
-Full description goes here
-Note: this software is under active development!
+Note: this software is under active development! Until it hits v 1.0.0, the overall API and usage pattern is subject to change.
 ## Installation
@@ -14,6 +12,10 @@ gem install publisci
 ## Usage
+#### DSL
+Most of the gem's functions can be accessed through its DSL
 ```ruby
 require 'publisci'
 include PubliSci::DSL
@@ -21,18 +23,18 @@ include PubliSci::DSL
 # Specify input data
 data do
   # use local or remote paths
-  source 'https://github.com/wstrinz/publisci/raw/master/spec/csv/bacon.csv'
+  source 'https://github.com/wstrinz/publisci/raw/master/spec/csv/bacon.csv'
   # specify datacube properties
-  dimension 'producer', 'pricerange'
+  dimension 'producer', 'pricerange'
   measure 'chunkiness'
   # set parser specific options
-  option 'label_column', 'producer'
+  option 'label_column', 'producer'
 end
 # Describe dataset
-metadata do
+metadata do
   dataset 'bacon'
   title 'Bacon dataset'
   creator 'Will Strinz'
@@ -48,14 +50,45 @@ repo = to_repository
 PubliSci::QueryHelper.execute('select * where {?s ?p ?o} limit 5', repo)
 # export in other formats
-PubliSci::Writers::ARFF.new.from_store(repo)
+PubliSci::Writers::ARFF.new.from_store(repo)
 ```
+#### Gem executable
+Running the gem using the `publisci` executable will attempt to find and run
+an triplifier for your input.
+For example, the following
+```sh
+publisci https://github.com/wstrinz/publisci/raw/master/spec/csv/bacon.csv
+```
+Is equivalent to the DSL code
+```ruby
+require 'publisci'
+include PubliSci::DSL
+data do
+  source 'https://github.com/wstrinz/publisci/raw/master/spec/csv/bacon.csv'
+end
+generate_n3
+```
 The API doc is online. For more code examples see the test files in
 the source tree.
+### Custom Parsers
+Building a parser simply requires you to implement a `generate_n3` method, either at the class or instance level. Then register it using `Publisci::Dataset.register_reader(extension, class)` using your reader's preferred file extension and its class. This way, if you call the `Dataset.for` method on a file with the given extension it will use your reader class.
+Including or extending the `Publisci::Readers::Base` will give you access to many helpful methods for creating a triplifying your data. There is a post on the [project blog](http://gsocsemantic.wordpress.com/2013/08/31/parsing-with-publisci-how-to-get-your-data-into-the-semantic-web/) with further details about how to design and implement a parser.
+The interface is in the process of being more rigdly defined to separate parsing, generation, and output, and it is advisable to you make your parsing code as stateless as possible for better handling of large inputs. Pull requests with parsers for new formats are greatly appreciated however!
 ## Project home page
 Information on the source tree, documentation, examples, issues and

data/README.rdoc CHANGED

@@ -4,8 +4,6 @@
 src="https://secure.travis-ci.org/wstrinz/publisci.png"
 />}[http://travis-ci.org/#!/wstrinz/publisci]
-Full description goes here
 Note: this software is under active development!
 == Installation
@@ -16,13 +14,13 @@ Note: this software is under active development!
 == Developers
-To use the library
+To use the library
         require 'publisci'
 The API doc is online. For more code examples see also the test files in
 the source tree.
 == Project home page
 Information on the source tree, documentation, issues and how to contribute, see
@@ -34,7 +32,7 @@ The BioRuby community is on IRC server: irc.freenode.org, channel: #bioruby.
 == Cite
   If you use this software, please cite one of
 * [BioRuby: bioinformatics software for the Ruby programming language](http://dx.doi.org/10.1093/bioinformatics/btq475)
 * [Biogem: an effective tool-based approach for scaling up open source software development in bioinformatics](http://dx.doi.org/10.1093/bioinformatics/bts080)

data/Rakefile CHANGED

@@ -16,12 +16,12 @@ Jeweler::Tasks.new do |gem|
   # gem is a Gem::Specification... see http://docs.rubygems.org/read/chapter/20 for more options
   gem.name = "publisci"
   gem.homepage = "http://github.com/wstrinz/publisci"
-  gem.license = "MIT"
+  gem.license = "BSD 2-Clause"
   gem.summary = %Q{Publish scientific results to the semantic web}
   gem.description = %Q{A toolkit for publishing scientific results and datasets using RDF, OWL, and related technologies }
   gem.email = "wstrinz@gmail.com"
   gem.authors = ["Will Strinz"]
-  gem.version = "0.1.3"
+  gem.version = "0.1.4"
   # dependencies defined in Gemfile
 end

data/bin/publisci CHANGED

@@ -12,7 +12,7 @@ USAGE = <<-EOF
   Usage:
   publisci file
-  (execute PROV dsl file)
+  (triplify file using best available reader)
 EOF
 gempath = File.dirname(File.dirname(__FILE__))
@@ -27,12 +27,14 @@ if ARGV.size == 0
   print USAGE
 elsif ARGV.size == 1
   #assume file, run DSL (prov for now)
-  if File.exist? ARGV[0]
-    puts "#{PubliSci::Prov.prefixes}\n#{PubliSci::Prov.run(ARGV[0])}"
-  else
-    puts "(no file #{ARGV[0]})"
-    print USAGE
-  end
+  # if File.exist? ARGV[0]
+    # puts "#{PubliSci::Prov.prefixes}\n#{PubliSci::Prov.run(ARGV[0])}"
+  # else
+  #   puts "(no file #{ARGV[0]})"
+  #   print USAGE
+  # end
+  puts PubliSci::Dataset.for(ARGV[0])
+  # PubliSci::Dataset.for(ARGV[0])
 else
   if ARGV.size % 2 == 0
     opts=Hash[*ARGV]

data/examples/visualization/prov_viz.rb CHANGED

@@ -58,7 +58,7 @@ infile = ARGV[0] || 'primer.prov'
 runner = PubliSci::Prov::DSL::Instance.new
 runner.instance_eval(IO.read(infile),infile)
 repo = runner.to_repository
-Spira.add_repository :default, repo
+Spira.repository = repo
 include PubliSci::Prov::Model

data/lib/publisci.rb CHANGED

@@ -4,6 +4,8 @@ require 'tempfile'
 require 'fileutils'
 require 'csv'
+require 'spira'
 require 'rdf'
 require 'sparql'
 require 'sparql/client'
@@ -14,11 +16,11 @@ require 'json/ld'
 require 'rserve'
 require 'rest-client'
-begin
-	require 'spira'
-rescue LoadError
-	puts "can't load spira; orm unavailable"
-end
+# begin
+# 	require 'spira'
+# rescue LoadError
+# 	puts "can't load spira; orm unavailable"
+# end
 def load_folder(folder)
 	Dir.foreach(File.dirname(__FILE__) + "/#{folder}") do |file|
@@ -30,17 +32,23 @@ def load_folder(folder)
 end
 load_folder('publisci/mixins')
-load File.dirname(__FILE__) + '/publisci/dataset/interactive.rb'
-load File.dirname(__FILE__) + '/publisci/query/query_helper.rb'
 load File.dirname(__FILE__) + '/publisci/parser.rb'
-load File.dirname(__FILE__) + '/publisci/post_processor.rb'
-load File.dirname(__FILE__) + '/publisci/analyzer.rb'
-load File.dirname(__FILE__) + '/publisci/store.rb'
+load File.dirname(__FILE__) + '/publisci/dataset/interactive.rb'
 load File.dirname(__FILE__) + '/publisci/dataset/data_cube.rb'
 load File.dirname(__FILE__) + '/publisci/dataset/dataset_for.rb'
 load File.dirname(__FILE__) + '/publisci/dataset/configuration.rb'
 load File.dirname(__FILE__) + '/publisci/dataset/dataset.rb'
+load File.dirname(__FILE__) + '/publisci/generators/base.rb'
+load File.dirname(__FILE__) + '/publisci/parsers/base.rb'
+load_folder('publisci/parsers')
+load_folder('publisci/generators')
+load File.dirname(__FILE__) + '/publisci/query/query_helper.rb'
+load File.dirname(__FILE__) + '/publisci/post_processor.rb'
+load File.dirname(__FILE__) + '/publisci/analyzer.rb'
+load File.dirname(__FILE__) + '/publisci/store.rb'
 load File.dirname(__FILE__) + '/publisci/datacube_model.rb'
 load File.dirname(__FILE__) + '/publisci/output.rb'
 load File.dirname(__FILE__) + '/publisci/metadata/prov/element.rb'

data/lib/publisci/datacube_model.rb CHANGED

@@ -37,7 +37,7 @@ begin
       def load_repo(repo)
         raise "Not an RDF::Repository - #{repo}" unless repo.is_a? RDF::Repository
-        Spira.add_repository :default, repo
+        Spira.repository = repo
       end
       class Observation < Spira::Base
@@ -60,7 +60,7 @@ begin
           uri[-1] = '' if uri[-1] == '>'
           uri.to_s.split('/').last.split('#').last
         end
       end
       def reload_observation

data/lib/publisci/dataset/ORM/data_cube_orm.rb CHANGED

@@ -5,13 +5,13 @@ module PubliSci
         extend PubliSci::Dataset::DataCube
         extend PubliSci::Analyzer
         extend PubliSci::Query
-        extend PubliSci::Parser
+        extend PubliSci::RDFParser
         include PubliSci::Dataset::DataCube
         include PubliSci::Analyzer
         include PubliSci::Metadata::Generator
         include PubliSci::Query
-        include PubliSci::Parser
+        include PubliSci::RDFParser
         attr_accessor :labels
         attr_accessor :dimensions

data/lib/publisci/dataset/data_cube.rb CHANGED

@@ -8,7 +8,7 @@ end
 module PubliSci
   class Dataset
     module DataCube
-      include PubliSci::Parser
+      include PubliSci::RDFParser
       def defaults
       {
         type: :dataframe,

data/lib/publisci/dataset/dataset_for.rb CHANGED

@@ -28,7 +28,12 @@ module PubliSci
           end
           if reader_registry.keys.include? extension
-            reader_registry[extension].new.automatic(object,options,ask_on_ambiguous)
+            k = reader_registry[extension]
+            if k.respond_to? "automatic"
+              reader_registry[extension].automatic(object,options,ask_on_ambiguous)
+            else
+              reader_registry[extension].new.automatic(object,options,ask_on_ambiguous)
+            end
           else
             case extension
             when ".RData"

data/lib/publisci/dataset/interactive.rb CHANGED

@@ -23,50 +23,5 @@ module PubliSci
         default
       end
     end
-    # def interactive(options={})
-    #   options = defaults.merge(options)
-    #   qb = {}
-    #   puts "load config from file? [y/N]"
-    #   if gets.chomp == "y"
-    #     #use yaml or DSL file to configure
-    #   else
-    #     qb[:dimensions] = dimensions()
-    #     qb[:measures] = measures()
-    #   end
-    #   puts "load data from file? [y/N]"
-    #   if gets.chomp == "y"
-    #     #attempt to load dataset from file, ask user to resolve problems or ambiguity
-    #   else
-    #   end
-    #   qb
-    # end
-    # def dimensions
-    #   puts "Enter a list of dimensions, separated by commas"
-    #   arr = gets.chomp.split(",")
-    #   dims = {}
-    #   arr.map{|dim|
-    #     puts "What is the range of #{dim.chomp.strip}? [:coded]"
-    #     type = gets.chomp
-    #     type = :coded if type == ":coded" || type == ""
-    #     dims[dim.chomp.strip] = {type: type}
-    #   }
-    #   dims
-    # end
-    # def measures
-    #   puts "Enter a list of measures, separated by commas"
-    #   arr = gets.chomp.split(",")
-    #   meas = []
-    #   arr.map{|m| meas << m.chomp.strip}
-    #   meas
-    # end
   end
-end
+end

data/lib/publisci/generators/base.rb ADDED

@@ -0,0 +1,22 @@
+module PubliSci
+  module Generators
+    module Base
+      include PubliSci::Dataset::DataCube
+      def write(*args)
+        raise "Should be overriden"
+      end
+      alias_method :generate_n3, :write
+      def write_to(out, string)
+        out.write string
+      end
+      def close_output(out)
+        if out.is_a? File
+          out.close
+        end
+      end
+    end
+  end
+end

data/lib/publisci/generators/maf.rb ADDED

@@ -0,0 +1,172 @@
+module PubliSci
+  module Generators
+    class MAF
+      extend Base
+      COLUMN_NAMES = %w{ Hugo_Symbol Entrez_Gene_Id Center NCBI_Build Chromosome Start_Position End_Position Strand Variant_Classification Variant_Type Reference_Allele Tumor_Seq_Allele1 Tumor_Seq_Allele2 dbSNP_RS  dbSNP_Val_Status Tumor_Sample_Barcode Matched_Norm_Sample_Barcode Match_Norm_Seq_Allele1  Match_Norm_Seq_Allele2  Tumor_Validation_Allele1  Tumor_Validation_Allele2  Match_Norm_Validation_Allele1 Match_Norm_Validation_Allele2 Verification_Status Validation_Status Mutation_Status Sequencing_Phase  Sequence_Source Validation_Method Score BAM_File  Sequencer Tumor_Sample_UUID Matched_Norm_Sample_UUID patient_id sample_id}
+      COMPONENT_RANGES = { "Tumor_Sample_Barcode" => "xsd:string", "Start_position" => "xsd:int", "Center" => "xsd:string", "NCBI_Build" => "xsd:int", "Chromosome" => "xsd:int" }
+      TCGA_CODES =
+        {
+          "Variant_Classification" => %w{Frame_Shift_Del Frame_Shift_Ins In_Frame_Del In_Frame_Ins Missense_Mutation Nonsense_Mutation Silent Splice_Site Translation_Start_Site Nonstop_Mutation 3'UTR 3'Flank 5'UTR 5'Flank IGR1  Intron RNA Targeted_Region},
+          "Variant_Type" => %w{SNP DNP TNP ONP INS DEL Consolidated},
+          "dbSNP_Val_Status" => %w{by1000genomes by2Hit2Allele byCluster byFrequency byHapMap byOtherPop bySubmitter alternate_allele},
+          "Verification_Status" => %w{Verified, Unknown},
+          "Validation_Status" => %w{Untested Inconclusive Valid Invalid},
+          "Mutation_Status" => %w{None Germline Somatic LOH Post-transcriptional modification Unknown},
+          "Sequence_Source" => %w{WGS WGA WXS RNA-Seq miRNA-Seq Bisulfite-Seq VALIDATION Other ncRNA-Seq WCS CLONE POOLCLONE AMPLICON CLONEEND FINISHING ChIP-Seq MNase-Seq DNase-Hypersensitivity EST FL-cDNA CTS MRE-Seq MeDIP-Seq MBD-Seq Tn-Seq FAIRE-seq SELEX RIP-Seq ChIA-PET},
+          "Sequencer" => ["Illumina GAIIx", "Illumina HiSeq", "SOLID", "454", "ABI 3730xl", "Ion Torrent PGM", "Ion Torrent Proton", "PacBio RS", "Illumina MiSeq", "Illumina HiSeq 2500", "454 GS FLX Titanium", "AB SOLiD 4 System" ]
+        }
+      BARCODE_INDEX = COLUMN_NAMES.index('Tumor_Sample_Barcode')
+      class << self
+        def write(record, out, label, options={})
+            options = process_options(options)
+            options[:no_labels] ||= true
+            options[:lookup_hugo] ||= false
+            options[:complex_objects] ||= false
+            options[:ranges] ||= COMPONENT_RANGES
+            write_to(out, process_line(record, label, options))
+        end
+        def write_structure(input, output, options)
+          write_to(output, structure(options))
+        end
+        def process_options(options)
+          options[:dimensions] = dimensions = %w{Variant_Classification Variant_Type dbSNP_Val_Status Verification_Status Validation_Status Mutation_Status Sequence_Source Sequencer}
+          options[:codes] = codes = dimensions
+          options[:measures] = (COLUMN_NAMES - dimensions - codes)
+          options[:dataset_name] ||= "MAF_#{Time.now.nsec.to_s(32)}"
+          options
+        end
+        def process_line(entry,label,options)
+            entry = (entry.fill(nil,entry.length...COLUMN_NAMES.length-2) + parse_barcode(entry[BARCODE_INDEX])).flatten
+            entry[0] = "http://identifiers.org/hgnc.symbol/#{entry[0]}" if entry[0]
+            # A 0 in the entrez-id column appears to mean null
+            col=1
+            entry[col] = nil if entry[col] == '0'
+            entry[col] = "http://identifiers.org/ncbigene/#{entry[col]}" if entry[col]
+            # Only link non-novel dbSNP entries
+            col = COLUMN_NAMES.index('dbSNP_RS')
+            if entry[col] && entry[col][0..1] == "rs"
+              entry[col] = "http://identifiers.org/dbsnp/#{entry[col].gsub('rs','')}"
+            end
+            # optionally create typed objects using sio nodes
+            if options[:complex_objects]
+              entry = sio_values(entry)
+            end
+            data = {}
+            COLUMN_NAMES.each_with_index{|col,i|
+              data[col] = [entry[i]]
+            }
+            observations(options[:measures],options[:dimensions],options[:codes],data,[label],options[:dataset_name],options).first
+        end
+        def sio_values(entry)
+          entry[0] = sio_value('http://edamontology.org/data_1791',entry[0]) if entry[0]
+          # Link entrez genes
+          col=1
+          entry[col] = sio_value("http://identifiers.org/ncbigene",entry[col]) if entry[col]
+          col = COLUMN_NAMES.index('dbSNP_RS')
+          entry[col] = sio_value("http://identifiers.org/dbsnp", entry[col])
+          # test SIO attributes for chromosome
+          col = COLUMN_NAMES.index('Chromosome')
+          entry[col] = sio_value("http://purl.org/obo/owl/SO#SO_0000340",entry[col])
+          # More SIO attrtibutes for alleles
+          %w{Reference_Allele Tumor_Seq_Allele1 Tumor_Seq_Allele2 Match_Norm_Seq_Allele1 Match_Norm_Seq_Allele2}.each{|name|
+            col = COLUMN_NAMES.index(name)
+            entry[col] = sio_value("http://purl.org/obo/owl/SO#SO_0001023",entry[col])
+          }
+          col = COLUMN_NAMES.index("Strand")
+          entry[col] = sio_attribute("http://edamontology.org/data_0853",entry[col])
+          col = COLUMN_NAMES.index("Center")
+          entry[col] = sio_attribute("foaf:homepage",entry[col])
+          # Use faldo for locations End_Position
+          col = COLUMN_NAMES.index("Start_Position")
+          entry[col] = sio_attribute("http://biohackathon.org/resource/faldo#begin", entry[col],"http://biohackathon.org/resource/faldo#Position")
+          col = COLUMN_NAMES.index("End_Position")
+          entry[col] = sio_attribute("http://biohackathon.org/resource/faldo#end", entry[col],"http://biohackathon.org/resource/faldo#Position")
+          entry
+        end
+        def structure(options={})
+          options = process_options(options)
+          str = prefixes(options[:dataset_name],options)
+          str << data_structure_definition(options[:measures],options[:dimensions],options[:codes],options[:dataset_name],options)
+          str << dataset(options[:dataset_name],options)
+          component_specifications(options[:measures], options[:dimensions], options[:codes], options[:dataset_name], options).map{ |c| str << c }
+          measure_properties(options[:measures],options[:dataset_name],options).map{|m| str << m}
+          dimension_properties(options[:dimensions],options[:codes], options[:dataset_name],options).map{|d| str << d}
+          code_lists(options[:codes],TCGA_CODES,options[:dataset_name],options).map{|c| str << c}
+          concept_codes(options[:codes],TCGA_CODES,options[:dataset_name],options).map{|c| str << c}
+          str
+        end
+        def post_process(file)
+          reg = %r{http://identifiers.org/hgnc.symbol/(\w+)}
+          hugo_cache ||= {}
+          PubliSci::PostProcessor.process(file,file,reg){|g|
+            hugo_cache[g] ||= official_symbol(g)
+           'http://identifiers.org/hgnc.symbol/' + cache[g]
+         }
+        end
+        def column_replace(entry,column,prefix,value=nil)
+          if value
+            entry[COLUMN_NAMES.index(column)] = prefix + value
+          else
+            entry[COLUMN_NAMES.index(column)] += prefix
+          end
+        end
+        def official_symbol(hugo_symbol)
+          qry = <<-EOF
+          SELECT distinct ?official where {
+           {?hgnc <http://bio2rdf.org/hgnc_vocabulary:approved_symbol> "#{hugo_symbol}"}
+           UNION
+           {?hgnc <http://bio2rdf.org/hgnc_vocabulary:synonym> "#{hugo_symbol}"}
+           ?hgnc <http://bio2rdf.org/hgnc_vocabulary:approved_symbol> ?official
+          }
+          EOF
+          sparql = SPARQL::Client.new("http://cu.hgnc.bio2rdf.org/sparql")
+          sparql.query(qry).map(&:official).first.to_s
+        end
+        def parse_barcode(code)
+          #TCGA-E9-A22B-01A-11D-A159-09
+          [code[5..11], code[13..-1]]
+        end
+      end
+    end
+  end
+end