RubyGems - bio-publisci - Versions diffs - 0.0.3 → 0.0.4 - Mend

bio-publisci 0.0.3 → 0.0.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (46) hide show

checksums.yaml +4 -4
data/Gemfile +1 -0
data/Rakefile +5 -5
data/bin/bio-publisci +34 -11
data/examples/bio-band_integration.rb +9 -0
data/examples/no_magic.prov +40 -0
data/examples/primer.prov +28 -0
data/examples/prov_dsl.prov +51 -0
data/features/create_generator.feature +5 -9
data/features/integration_steps.rb +8 -8
data/features/metadata.feature +15 -2
data/features/metadata_steps.rb +21 -0
data/features/orm_steps.rb +5 -5
data/features/prov_dsl.feature +14 -0
data/features/prov_dsl_steps.rb +11 -0
data/lib/bio-publisci/dataset/ORM/data_cube_orm.rb +234 -236
data/lib/bio-publisci/dataset/ORM/observation.rb +1 -3
data/lib/bio-publisci/dataset/data_cube.rb +30 -26
data/lib/bio-publisci/dataset/dataset_for.rb +14 -8
data/lib/bio-publisci/metadata/metadata.rb +180 -42
data/lib/bio-publisci/metadata/prov/activity.rb +106 -0
data/lib/bio-publisci/metadata/prov/agent.rb +94 -0
data/lib/bio-publisci/metadata/prov/association.rb +73 -0
data/lib/bio-publisci/metadata/prov/derivation.rb +53 -0
data/lib/bio-publisci/metadata/prov/dsl.rb +159 -0
data/lib/bio-publisci/metadata/prov/element.rb +52 -0
data/lib/bio-publisci/metadata/prov/entity.rb +101 -0
data/lib/bio-publisci/metadata/prov/plan.rb +32 -0
data/lib/bio-publisci/metadata/prov/prov.rb +76 -0
data/lib/bio-publisci/mixins/custom_predicate.rb +26 -0
data/lib/bio-publisci/mixins/vocabulary.rb +8 -0
data/lib/bio-publisci/output.rb +27 -0
data/lib/bio-publisci/parser.rb +17 -8
data/lib/bio-publisci/readers/csv.rb +9 -7
data/lib/bio-publisci/readers/dataframe.rb +9 -8
data/lib/bio-publisci/readers/{big_cross.rb → r_cross.rb} +6 -10
data/lib/bio-publisci/readers/r_matrix.rb +37 -13
data/lib/bio-publisci/spira.rb +82 -0
data/lib/bio-publisci/writers/dataframe.rb +65 -65
data/lib/bio-publisci.rb +9 -4
data/spec/ORM/data_cube_orm_spec.rb +3 -3
data/spec/dataset_for_spec.rb +29 -0
data/spec/generators/r_cross_spec.rb +51 -0
data/spec/generators/r_matrix_spec.rb +14 -5
metadata +42 -8
data/lib/bio-publisci/readers/cross.rb +0 -72

data/lib/bio-publisci/readers/{big_cross.rb → r_cross.rb} RENAMED Viewed

@@ -1,17 +1,17 @@
 module R2RDF
   module Reader
-    class BigCross
+    class RCross
       include R2RDF::Dataset::DataCube
+      include R2RDF::Reader::Output
       def generate_n3(client, var, outfile_base, options={})
         meas = measures(client,var,options)
         dim = dimensions(client,var,options)
         codes = codes(client,var,options)
         #write structure
         open(outfile_base+'_structure.ttl','w'){|f| f.write structure(client,var,options)}
         n_individuals = client.eval("length(#{var}$pheno[[1]])").payload.first
         chromosome_list = (1..19).to_a.map(&:to_s) + ["X"]
         chromosome_list.map{|chrom|
@@ -19,23 +19,19 @@ module R2RDF
           entries_per_individual = client.eval("length(#{var}$geno$'#{chrom}'$map)").to_ruby
           #get genotype data (currently only for chromosome 1)
-          # => puts "#{var}$geno$'#{chrom}'"
           geno_chr = client.eval("#{var}$geno$'#{chrom}'")
           #get number of markers per individual
           #write observations
           n_individuals.times{|indi|
-            #time ||= Time.now
             obs_data = observation_data(client,var,chrom.to_s,indi,geno_chr,entries_per_individual,options)
             labels = labels_for(obs_data,chrom.to_s,indi)
             open(outfile_base+"_#{chrom}.ttl",'a'){|f| observations(meas,dim,codes,obs_data,labels,var,options).map{|obs| f.write obs}}
-            puts "(#{chrom}) #{indi}/#{n_individuals}" #(#{Time.now - time})
-            #time = Time.now
+            puts "(#{chrom}) #{indi}/#{n_individuals}" unless options[:quiet]
           }
         }
-        #generate(measures, dimensions, codes, observation_data, observation_labels, var, options)
       end
       def structure(client,var,options={})
@@ -48,7 +44,7 @@ module R2RDF
         str << dataset(var,options)
         component_specifications(meas, dim, var, options).map{ |c| str << c }
         measure_properties(meas,var,options).map{|m| str << m}
         str
       end
@@ -68,7 +64,7 @@ module R2RDF
       end
       def codes(client, var, options={})
-        []
+        []
       end
       def labels_for(data,chr,individual,options={})

data/lib/bio-publisci/readers/r_matrix.rb CHANGED Viewed

@@ -13,11 +13,11 @@ module R2RDF
 				meas = measures(client,var,options)
 				dim = dimensions(client,var,options)
 				codes = codes(client,var,options)
 				outvar = sanitize([var]).first
 				probes_per_file = options[:probes_per_file] || 100
-				col_select = "colnames"
+				col_select = "colnames"
 				col_select = "names" if options[:type] == :dataframe
 				#write structure
@@ -30,18 +30,42 @@ module R2RDF
 				end
 				markers = rows(client,var,options)
+        if options[:print]
+            puts prefixes(var,options)
+        end
+        if options[:output] == :string
+            str = prefixes(var,options)
+        end
 				probes.each_with_index{|probe,i|
 					#write prefixes and erase old file on first run
-					open(outfile_base+"_#{i/probes_per_file}.ttl",'w'){|f| f.write prefixes(var,options)} if i % probes_per_file == 0
+          unless options[:print] || options[:output] == :string
+  					open(outfile_base+"_#{i/probes_per_file}.ttl",'w'){|f| f.write prefixes(var,options)} if i % probes_per_file == 0
+          end
 					i+=1
 					obs_data = observation_data(client,var,i,markers,options)
 					labels = labels_for(client,var,probe)
 					# labels = sanitize(labels)
 					# return obs_data
-					open(outfile_base+"_#{i/probes_per_file}.ttl",'a'){|f| observations(meas,dim,codes,obs_data,labels,outvar,options).map{|obs| f.write obs}}
-					puts "#{i}/#{probes.size}" unless options[:quiet]
+          if options[:print]
+            observations(meas,dim,codes,obs_data,labels,outvar,options).each{|obs| puts obs}
+          end
+          if options[:output] == :string
+            observations(meas,dim,codes,obs_data,labels,outvar,options).each{|obs| str << obs}
+          end
+          unless options[:print] || options[:output] == :string
+  					open(outfile_base+"_#{i/probes_per_file}.ttl",'a'){|f| observations(meas,dim,codes,obs_data,labels,outvar,options).map{|obs| f.write obs}}
+  					puts "#{i}/#{probes.size}" unless options[:quiet]
+          end
 				}
+        if options[:output] == :string
+          str
+        end
 			end
 			def structure(client,var,outvar,options={})
@@ -54,14 +78,14 @@ module R2RDF
 				str << dataset(outvar,options)
     		component_specifications(meas, dim, var, options).map{ |c| str << c }
 				measure_properties(meas,var,options).map{|m| str << m}
 				str
 			end
 			#for now just make everything a measure
 			def measures(client, var, options={})
 				if options[:measures]
-						options[:measures]
+						options[:measures]
 				else
 					["probe","marker","value"]
 				end
@@ -74,7 +98,7 @@ module R2RDF
 			end
 			def codes(client, var, options={})
-				[]
+				[]
 			end
 			def labels_for(connection,var,probe_id,options={})
@@ -124,11 +148,11 @@ module R2RDF
 				data["#{col_label}"] = []
 				data["#{row_label}"] = []
 				data["#{val_label}"] = []
 				# n_individuals.times{|row_individ|
 					# puts "#{row_individ}/#{n_individuals}"
-				col_select = "colnames"
+				col_select = "colnames"
 				col_select = "names" if options[:type] == :dataframe
 				if options[:type] == :dataframe
@@ -143,7 +167,7 @@ module R2RDF
 					data["#{row_label}"] << row_names[i]
 					data["#{val_label}"] << lod
 				}
 				data.map{|k,v| v.flatten!}
 				data
 			end

data/lib/bio-publisci/spira.rb ADDED Viewed

@@ -0,0 +1,82 @@
+require 'rdf/4store'
+require 'spira'
+module R2RDF
+  module ORM
+    # class Person < Spira::Base
+    # configure :base_uri => "http://example.org/example/people"
+    # property :name, :predicate => FOAF.name, :type => String
+    # end
+    # class Observation < Spira::Base
+    #   type RDF::URI.new('http://purl.org/linked-data/cube#Observation')
+    #   property :label, predicate: RDFS.label
+    # end
+    QB ||= RDF::Vocabulary.new(RDF::URI.new('http://purl.org/linked-data/cube#'))
+    class Component < Spira::Base
+      type QB.ComponentSpecification
+      property :dimension, predicate: QB.dimension # RDF::URI.new('http://purl.org/linked-data/cube#dimension')
+      property :measure, predicate: QB.measure # RDF::URI.new('http://purl.org/linked-data/cube#measure')
+    end
+    class DataStructureDefinition < Spira::Base
+      type QB.DataStructureDefinition
+      has_many :component, predicate: QB.component
+    end
+    class DataSet < Spira::Base
+      type QB.DataSet
+      property :label, predicate: RDFS.label
+    end
+    class Dimension < Spira::Base
+      type QB.DimensionProperty
+      property :range, predicate: RDFS.range
+      property :label, predicate: RDFS.label
+    end
+    class Measure < Spira::Base
+      type QB.MeasureProperty
+      property :label, predicate: RDFS.label
+    end
+    def load_repo(repo)
+      raise "Not an RDF::Repository - #{repo}" unless repo.is_a? RDF::Repository
+      Spira.add_repository :default, repo
+    end
+    def observation
+      unless R2RDF::ORM.const_defined?("Observation")
+        obs = Class.new(Spira::Base) do
+          type RDF::URI.new('http://purl.org/linked-data/cube#Observation')
+          property :structure, predicate: QB.dataSet
+          ((Dimension.each.to_a | Measure.each.to_a) || []).each{|component|
+            property strip_uri(component.subject.to_s), predicate: component.subject
+          }
+        end
+        R2RDF::ORM.const_set("Observation",obs)
+      end
+      Observation
+    end
+    def reload_observation
+      R2RDF::ORM.send(:remove_const, "Observation")
+      observation
+    end
+    def strip_uri(uri)
+      uri = uri.to_s.dup
+      uri[-1] = '' if uri[-1] == '>'
+      uri.to_s.split('/').last.split('#').last
+    end
+  end
+end

data/lib/bio-publisci/writers/dataframe.rb CHANGED Viewed

@@ -1,81 +1,81 @@
 module R2RDF
-		module Writer
-	  	module Dataframe
+  module Writer
+    module Dataframe
-	    def framestring(name,vectors)
-	      framestr = "#{name} = data.frame("
-	      vectors.map{ |k,v| framestr << k + '=' + k +','}
-	      framestr[-1] = ')'
-	      framestr
-	    end
-	    def get_vectors(variable_name, helper, repo)
-	      column_names = helper.get_ary(helper.execute(helper.property_names(variable_name), repo)).flatten.map{|n| n.gsub(' Component','')}
-	      vectors = {}
-	      column_names.map{|n|
-	        vectors[n] = helper.get_ary(helper.execute(helper.property_values(variable_name,n),repo),'to_f').flatten unless n == "refRow"
-	      }
-	      vectors
-	    end
+    def framestring(name,vectors)
+      framestr = "#{name} = data.frame("
+      vectors.map{ |k,v| framestr << k + '=' + k +','}
+      framestr[-1] = ')'
+      framestr
+    end
-	    def create_dataframe(name, connection, rows, vectors)
-	      connection.assign('rows', rows)
-	      vectors.map{ |k,v|
-	        connection.assign(k,v)
-	      }
-	      connection.eval(framestring(name,vectors))
-	      connection.eval("row.names(#{name}) <- rows")
-	      connection.eval(name)
-	    end
+    def get_vectors(variable_name, helper, repo)
+      column_names = helper.get_ary(helper.execute(helper.property_names(variable_name), repo)).flatten.map{|n| n.gsub(' Component','')}
+      vectors = {}
+      column_names.map{|n|
+        vectors[n] = helper.get_ary(helper.execute(helper.property_values(variable_name,n),repo),'to_f').flatten unless n == "refRow"
+      }
+      vectors
+    end
-	    def save_workspace(connection, loc)
-	    	connection.eval "save.image(#{loc})"
-	    end
+    def create_dataframe(name, connection, rows, vectors)
+      connection.assign('rows', rows)
+      vectors.map{ |k,v|
+        connection.assign(k,v)
+      }
+      connection.eval(framestring(name,vectors))
+      connection.eval("row.names(#{name}) <- rows")
+      connection.eval(name)
+    end
-	    def get_rownames(variable, helper, repo)
-	      rows = helper.get_ary(helper.execute(helper.row_names(variable), repo)).flatten
-	    end
+    def save_workspace(connection, loc)
+      connection.eval "save.image(#{loc})"
+    end
-	  end
+    def get_rownames(variable, helper, repo)
+      rows = helper.get_ary(helper.execute(helper.row_names(variable), repo)).flatten
+    end
-	  class Builder
-	    include R2RDF::Writer::Dataframe
+  end
+  class Builder
+    include R2RDF::Writer::Dataframe
-	    def from_turtle(turtle_file, connection, variable_in=nil, variable_out=nil, verbose=true, save=true)
-	      unless variable_in && variable_out
-	        puts "no variable specified. Simple inference coming soon" if verbose
-	        return
-	      end
-	      puts "loading #{turtle_file}" if verbose
-	      repo = RDF::Repository.load(turtle_file)
-	      puts "loaded #{repo.size} statements into temporary repo" if verbose
-	      # connection = Rserve::Connection.new
-	      query = R2RDF::QueryHelper.new
-	      rows = get_rownames(variable_in, query, repo)
-	      puts "frame has #{rows.size} rows" if verbose
-	      vectors = get_vectors(variable_in, query, repo)
-	      puts "got vectors of size #{vectors.first.last.size}" if verbose && vectors.first
+    def from_turtle(turtle_file, connection, variable_in=nil, variable_out=nil, verbose=true, save=true)
+      unless variable_in && variable_out
+        puts "no variable specified. Simple inference coming soon" if verbose
+        return
+      end
+      puts "loading #{turtle_file}" if verbose
+      repo = RDF::Repository.load(turtle_file)
+      puts "loaded #{repo.size} statements into temporary repo" if verbose
+      # connection = Rserve::Connection.new
+      query = R2RDF::QueryHelper.new
+      rows = get_rownames(variable_in, query, repo)
+      puts "frame has #{rows.size} rows" if verbose
-	      create_dataframe(variable_out, connection, rows, vectors)
-	      save_workspace(connection, connection.eval('getwd()').to_ruby) if save
-	    end
+      vectors = get_vectors(variable_in, query, repo)
+      puts "got vectors of size #{vectors.first.last.size}" if verbose && vectors.first
-	    def from_store(endpoint_url,connection,variable_in=nil, variable_out=nil, verbose=true, save=true)
-	    	unless variable_in && variable_out
-	    	  puts "no variable specified. Simple inference coming soon" if verbose
-	    	  return
-	    	end
-	    	puts "connecting to endpoint at #{endpoint_url}" if verbose
-	    	sparql = SPARQL::Client.new(endpoint_url)
-	    	# client = R2RDF::Client.new
-	      query = R2RDF::QueryHelper.new
+      create_dataframe(variable_out, connection, rows, vectors)
+      save_workspace(connection, connection.eval('getwd()').to_ruby) if save
+    end
-	      rows = query.get_ary(sparql.query(query.row_names(variable_in))).flatten
+    def from_store(endpoint_url,connection,variable_in=nil, variable_out=nil, verbose=true, save=true)
+      unless variable_in && variable_out
+        puts "no variable specified. Simple inference coming soon" if verbose
+        return
+      end
+      puts "connecting to endpoint at #{endpoint_url}" if verbose
+      sparql = SPARQL::Client.new(endpoint_url)
+      # client = R2RDF::Client.new
+      query = R2RDF::QueryHelper.new
-	    end
+      rows = query.get_ary(sparql.query(query.row_names(variable_in))).flatten
-	  end
-	end
+    end
+    end
+  end
 end

data/lib/bio-publisci.rb CHANGED Viewed

@@ -8,16 +8,17 @@ require 'sparql'
 require 'sparql/client'
 require 'rdf/turtle'
-# require 'bio-band'
 def load_folder(folder)
 	Dir.foreach(File.dirname(__FILE__) + "/#{folder}") do |file|
 		unless file == "." or file == ".."
-			load File.dirname(__FILE__) + "/#{folder}/" + file
+			f = File.dirname(__FILE__) + "/#{folder}/" + file
+      load f unless File.directory?(f)
 		end
-	end
+  end
 end
+load_folder('bio-publisci/mixins')
 load File.dirname(__FILE__) + '/bio-publisci/dataset/interactive.rb'
 load File.dirname(__FILE__) + '/bio-publisci/query/query_helper.rb'
 load File.dirname(__FILE__) + '/bio-publisci/parser.rb'
@@ -26,9 +27,13 @@ load File.dirname(__FILE__) + '/bio-publisci/analyzer.rb'
 load File.dirname(__FILE__) + '/bio-publisci/store.rb'
 load File.dirname(__FILE__) + '/bio-publisci/dataset/data_cube.rb'
 load File.dirname(__FILE__) + '/bio-publisci/dataset/dataset_for.rb'
+load File.dirname(__FILE__) + '/bio-publisci/output.rb'
+load File.dirname(__FILE__) + '/bio-publisci/metadata/prov/prov.rb'
+load File.dirname(__FILE__) + '/bio-publisci/metadata/prov/element.rb'
 load_folder('bio-publisci/metadata')
+load_folder('bio-publisci/metadata/prov')
 load_folder('bio-publisci/readers')
 load_folder('bio-publisci/writers')
 load_folder('bio-publisci/dataset/ORM')

data/spec/ORM/data_cube_orm_spec.rb CHANGED Viewed

@@ -1,12 +1,12 @@
 require_relative '../../lib/bio-publisci.rb'
-describe R2RDF::Dataset::ORM::DataCube do
+describe R2RDF::ORM::DataCube do
   it "should load and save a turtle file without loss of information" do
     ref = IO.read(File.dirname(__FILE__) + '/../turtle/bacon')
-    cube = R2RDF::Dataset::ORM::DataCube.load(ref, {skip_metadata: true, generator_options: {label_column: 0}})
+    cube = R2RDF::ORM::DataCube.load(ref, {skip_metadata: true, generator_options: {label_column: 0}})
     cube.abbreviate_known(cube.to_n3).should == ref
     # cube.to_n3.should == ref
   end
 end

data/spec/dataset_for_spec.rb ADDED Viewed

@@ -0,0 +1,29 @@
+require_relative '../lib/bio-publisci.rb'
+describe R2RDF::Dataset do
+  context 'with a csv file' do
+    before(:all) do
+      @file = File.dirname(__FILE__) + '/csv/bacon.csv'
+    end
+    it "should load with no prompts if all details are specified" do
+      turtle_string = R2RDF::Dataset.for(@file,{dimensions:["producer"],measures:["pricerange"]},false)
+      (turtle_string =~ /qb:Observation/).should_not be nil
+    end
+    it "will request user input if not provided" do
+      gen = R2RDF::Reader::CSV.new
+      gen.stub(:gets).and_return('pricerange,producer')
+      gen.stub(:puts)
+      turtle_string = gen.automatic(@file,nil,{measures:["chunkiness"]})
+      (turtle_string =~ /prop:pricerange/).should_not be nil
+      (turtle_string =~ /prop:producer/).should_not be nil
+    end
+    it "will try to guess if told not to be interactive" do
+      turtle_string = R2RDF::Dataset.for(@file,false)
+      (turtle_string =~ /prop:pricerange/).should_not be nil
+      (turtle_string =~ /prop:producer/).should_not be nil
+    end
+  end
+end

data/spec/generators/r_cross_spec.rb ADDED Viewed

@@ -0,0 +1,51 @@
+require_relative '../../lib/bio-publisci.rb'
+require 'tempfile'
+describe R2RDF::Reader::RCross do
+  def create_graph(turtle_string)
+    f = Tempfile.new('graph')
+    f.write(turtle_string)
+    f.close
+    graph = RDF::Graph.load(f.path, :format => :ttl)
+    f.unlink
+    graph
+  end
+  context "with reduced listeria cross", no_travis: true do
+    before(:all) do
+      @r = Rserve::Connection.new
+      @generator = R2RDF::Reader::RCross.new
+      @r.eval <<-EOF
+        library(qtl)
+        data(listeria)
+        liscopy = listeria
+        for(i in 1:20)
+          liscopy$geno[[i]]$data <- as.matrix(liscopy$geno[[i]]$data[1:2,])
+          liscopy$pheno <- liscopy$phen[1:2,]
+  EOF
+    end
+    it "generators output to file by default", no_travis: true do
+      f=Tempfile.new('cross')
+      @generator.generate_n3(@r,'liscopy',f.path,{quiet: true})
+      turtle_string = IO.read("#{f.path}_structure.ttl") + IO.read("#{f.path}_1.ttl")
+      graph = create_graph(turtle_string)
+      graph.size.should > 0
+    end
+    it "can generate string output", no_travis: true #do
+      # pending
+      # f=Tempfile.new('cross')
+      # turtle_string = @generator.generate_n3(@connection,'liscopy',f.path,{quiet: false, output: :string})
+      # graph = create_graph(turtle_string)
+      # graph.size.should > 0
+    # end
+  end
+end

data/spec/generators/r_matrix_spec.rb CHANGED Viewed

@@ -7,7 +7,7 @@ require_relative '../../lib/bio-publisci.rb'
 require 'tempfile'
 describe R2RDF::Reader::RMatrix do
 	def create_graph(turtle_string)
 		f = Tempfile.new('graph')
 		f.write(turtle_string)
@@ -17,19 +17,28 @@ describe R2RDF::Reader::RMatrix do
 		graph
 	end
-	before(:each) do
+	before(:each) do
 		@generator = R2RDF::Reader::RMatrix.new
-		@connection = Rserve::Connection.new
+		@connection = Rserve::Connection.new
 	end
 	it "generators a simple output automatically", no_travis: true do
 		f=Tempfile.new('matrix')
-		@connection.eval "mat = matrix(c(2, 4, 3, 1, 5, 7), nrow=3, ncol=2)"
+		@connection.eval "mat = matrix(c(2, 4, 3, 1, 5, 7), nrow=3, ncol=2)"
 		@generator.generate_n3(@connection,'mat',f.path,{quiet: true})
 		turtle_string = IO.read("#{f.path}_structure.ttl") + IO.read("#{f.path}_0.ttl")
-		graph = create_graph(turtle_string)
+		graph = create_graph(turtle_string)
 		graph.size.should > 0
 	end
+  it "can generate string output", no_travis: true do
+    f=Tempfile.new('matrix')
+    @connection.eval "mat = matrix(c(2, 4, 3, 1, 5, 7), nrow=3, ncol=2)"
+    turtle_string = @generator.generate_n3(@connection,'mat',f.path,{quiet: true, output: :string})
+    graph = create_graph(turtle_string)
+    graph.size.should > 0
+  end
 end