publisci 0.1.2
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/.document +5 -0
- data/.rspec +1 -0
- data/.travis.yml +13 -0
- data/Gemfile +36 -0
- data/LICENSE.txt +20 -0
- data/README.md +51 -0
- data/README.rdoc +48 -0
- data/Rakefile +68 -0
- data/bin/bio-publisci +106 -0
- data/bin/bio-publisci-server +50 -0
- data/examples/bio-band_integration.rb +9 -0
- data/examples/no_magic.prov +58 -0
- data/examples/no_magic.rb +58 -0
- data/examples/orm.prov +48 -0
- data/examples/primer-full.prov +120 -0
- data/examples/primer.prov +66 -0
- data/examples/prov_dsl.prov +85 -0
- data/examples/safe_gen.rb +7 -0
- data/examples/visualization/primer.prov +66 -0
- data/examples/visualization/prov_viz.rb +140 -0
- data/examples/visualization/viz.rb +35 -0
- data/features/create_generator.feature +21 -0
- data/features/integration.feature +12 -0
- data/features/integration_steps.rb +10 -0
- data/features/metadata.feature +37 -0
- data/features/metadata_steps.rb +40 -0
- data/features/orm.feature +60 -0
- data/features/orm_steps.rb +74 -0
- data/features/prov_dsl.feature +14 -0
- data/features/prov_dsl_steps.rb +11 -0
- data/features/reader.feature +25 -0
- data/features/reader_steps.rb +61 -0
- data/features/step_definitions/bio-publisci_steps.rb +0 -0
- data/features/store.feature +27 -0
- data/features/store_steps.rb +42 -0
- data/features/support/env.rb +13 -0
- data/features/writer.feature +14 -0
- data/features/writer_steps.rb +24 -0
- data/lib/bio-publisci.rb +64 -0
- data/lib/bio-publisci/analyzer.rb +57 -0
- data/lib/bio-publisci/datacube_model.rb +111 -0
- data/lib/bio-publisci/dataset/ORM/data_cube_orm.rb +240 -0
- data/lib/bio-publisci/dataset/ORM/observation.rb +20 -0
- data/lib/bio-publisci/dataset/configuration.rb +31 -0
- data/lib/bio-publisci/dataset/data_cube.rb +418 -0
- data/lib/bio-publisci/dataset/dataset.rb +11 -0
- data/lib/bio-publisci/dataset/dataset_for.rb +186 -0
- data/lib/bio-publisci/dataset/interactive.rb +72 -0
- data/lib/bio-publisci/dsl/config.rb +34 -0
- data/lib/bio-publisci/dsl/dataset_dsl.rb +93 -0
- data/lib/bio-publisci/dsl/dsl.rb +72 -0
- data/lib/bio-publisci/dsl/metadata_dsl.rb +85 -0
- data/lib/bio-publisci/dsl/prov_dsl.rb +143 -0
- data/lib/bio-publisci/metadata/generator.rb +323 -0
- data/lib/bio-publisci/metadata/metadata.rb +5 -0
- data/lib/bio-publisci/metadata/metadata_model.rb +25 -0
- data/lib/bio-publisci/metadata/prov/activity.rb +88 -0
- data/lib/bio-publisci/metadata/prov/agent.rb +100 -0
- data/lib/bio-publisci/metadata/prov/association.rb +107 -0
- data/lib/bio-publisci/metadata/prov/config.rb +34 -0
- data/lib/bio-publisci/metadata/prov/derivation.rb +60 -0
- data/lib/bio-publisci/metadata/prov/element.rb +120 -0
- data/lib/bio-publisci/metadata/prov/entity.rb +64 -0
- data/lib/bio-publisci/metadata/prov/model/prov_models.rb +109 -0
- data/lib/bio-publisci/metadata/prov/plan.rb +32 -0
- data/lib/bio-publisci/metadata/prov/prov.rb +78 -0
- data/lib/bio-publisci/metadata/prov/role.rb +40 -0
- data/lib/bio-publisci/metadata/prov/usage.rb +64 -0
- data/lib/bio-publisci/metadata/publisher.rb +25 -0
- data/lib/bio-publisci/mixins/custom_predicate.rb +38 -0
- data/lib/bio-publisci/mixins/dereferencable.rb +34 -0
- data/lib/bio-publisci/mixins/registry.rb +27 -0
- data/lib/bio-publisci/mixins/vocabulary.rb +8 -0
- data/lib/bio-publisci/output.rb +27 -0
- data/lib/bio-publisci/parser.rb +266 -0
- data/lib/bio-publisci/post_processor.rb +95 -0
- data/lib/bio-publisci/query/query_helper.rb +123 -0
- data/lib/bio-publisci/r_client.rb +54 -0
- data/lib/bio-publisci/readers/arff.rb +49 -0
- data/lib/bio-publisci/readers/base.rb +57 -0
- data/lib/bio-publisci/readers/csv.rb +88 -0
- data/lib/bio-publisci/readers/dataframe.rb +67 -0
- data/lib/bio-publisci/readers/maf.rb +199 -0
- data/lib/bio-publisci/readers/r_cross.rb +112 -0
- data/lib/bio-publisci/readers/r_matrix.rb +176 -0
- data/lib/bio-publisci/store.rb +56 -0
- data/lib/bio-publisci/writers/arff.rb +91 -0
- data/lib/bio-publisci/writers/base.rb +93 -0
- data/lib/bio-publisci/writers/csv.rb +31 -0
- data/lib/bio-publisci/writers/dataframe.rb +81 -0
- data/lib/bio-publisci/writers/json.rb +18 -0
- data/lib/r2rdf.rb +226 -0
- data/lib/template_bak.rb +12 -0
- data/lib/template_bak/publisci.rb +3 -0
- data/lib/vocabs/cc.rb +18 -0
- data/lib/vocabs/cert.rb +13 -0
- data/lib/vocabs/dc.rb +63 -0
- data/lib/vocabs/dc11.rb +23 -0
- data/lib/vocabs/doap.rb +45 -0
- data/lib/vocabs/exif.rb +168 -0
- data/lib/vocabs/foaf.rb +69 -0
- data/lib/vocabs/geo.rb +13 -0
- data/lib/vocabs/http.rb +26 -0
- data/lib/vocabs/ma.rb +78 -0
- data/lib/vocabs/owl.rb +59 -0
- data/lib/vocabs/rdfs.rb +17 -0
- data/lib/vocabs/rsa.rb +12 -0
- data/lib/vocabs/rss.rb +14 -0
- data/lib/vocabs/sioc.rb +93 -0
- data/lib/vocabs/skos.rb +36 -0
- data/lib/vocabs/wot.rb +21 -0
- data/lib/vocabs/xhtml.rb +9 -0
- data/lib/vocabs/xsd.rb +58 -0
- data/resources/maf_example.maf +10 -0
- data/resources/maf_rdf.ttl +1173 -0
- data/resources/primer.ttl +38 -0
- data/resources/queries/code_resources.rq +10 -0
- data/resources/queries/codes.rq +18 -0
- data/resources/queries/dataset.rq +7 -0
- data/resources/queries/dimension_ranges.rq +8 -0
- data/resources/queries/dimensions.rq +12 -0
- data/resources/queries/gene.rq +16 -0
- data/resources/queries/hugo_to_ensembl.rq +7 -0
- data/resources/queries/maf_column.rq +26 -0
- data/resources/queries/measures.rq +12 -0
- data/resources/queries/observation_labels.rq +8 -0
- data/resources/queries/observations.rq +13 -0
- data/resources/queries/patient.rq +11 -0
- data/resources/queries/patient_list.rq +11 -0
- data/resources/queries/patients_with_mutation.rq +18 -0
- data/resources/queries/properties.rq +8 -0
- data/resources/queries/test.rq +3 -0
- data/resources/weather.numeric.arff +28 -0
- data/scripts/get_gene_lengths.rb +50 -0
- data/scripts/islet_mlratio.rb +6 -0
- data/scripts/scan_islet.rb +6 -0
- data/scripts/update_reference.rb +25 -0
- data/server/helpers.rb +215 -0
- data/server/public/src-min-noconflict/LICENSE +24 -0
- data/server/public/src-min-noconflict/ace.js +11 -0
- data/server/public/src-min-noconflict/ext-chromevox.js +1 -0
- data/server/public/src-min-noconflict/ext-elastic_tabstops_lite.js +1 -0
- data/server/public/src-min-noconflict/ext-emmet.js +1 -0
- data/server/public/src-min-noconflict/ext-keybinding_menu.js +1 -0
- data/server/public/src-min-noconflict/ext-language_tools.js +1 -0
- data/server/public/src-min-noconflict/ext-modelist.js +1 -0
- data/server/public/src-min-noconflict/ext-old_ie.js +1 -0
- data/server/public/src-min-noconflict/ext-searchbox.js +1 -0
- data/server/public/src-min-noconflict/ext-settings_menu.js +1 -0
- data/server/public/src-min-noconflict/ext-spellcheck.js +1 -0
- data/server/public/src-min-noconflict/ext-split.js +1 -0
- data/server/public/src-min-noconflict/ext-static_highlight.js +1 -0
- data/server/public/src-min-noconflict/ext-statusbar.js +1 -0
- data/server/public/src-min-noconflict/ext-textarea.js +1 -0
- data/server/public/src-min-noconflict/ext-themelist.js +1 -0
- data/server/public/src-min-noconflict/ext-whitespace.js +1 -0
- data/server/public/src-min-noconflict/keybinding-emacs.js +1 -0
- data/server/public/src-min-noconflict/keybinding-vim.js +1 -0
- data/server/public/src-min-noconflict/mode-ruby.js +1 -0
- data/server/public/src-min-noconflict/snippets/ruby.js +1 -0
- data/server/public/src-min-noconflict/theme-twilight.js +1 -0
- data/server/public/src-min-noconflict/worker-coffee.js +1 -0
- data/server/public/src-min-noconflict/worker-css.js +1 -0
- data/server/public/src-min-noconflict/worker-javascript.js +1 -0
- data/server/public/src-min-noconflict/worker-json.js +1 -0
- data/server/public/src-min-noconflict/worker-lua.js +1 -0
- data/server/public/src-min-noconflict/worker-php.js +1 -0
- data/server/public/src-min-noconflict/worker-xquery.js +1 -0
- data/server/routes.rb +123 -0
- data/server/views/dsl.haml +65 -0
- data/server/views/dump.haml +3 -0
- data/server/views/import.haml +35 -0
- data/server/views/new_repository.haml +25 -0
- data/server/views/query.haml +28 -0
- data/server/views/repository.haml +25 -0
- data/spec/ORM/data_cube_orm_spec.rb +33 -0
- data/spec/ORM/prov_model_spec.rb +72 -0
- data/spec/analyzer_spec.rb +36 -0
- data/spec/bnode_spec.rb +66 -0
- data/spec/csv/bacon.csv +4 -0
- data/spec/csv/moar_bacon.csv +11 -0
- data/spec/data_cube_spec.rb +169 -0
- data/spec/dataset_for_spec.rb +77 -0
- data/spec/dsl_spec.rb +134 -0
- data/spec/generators/csv_spec.rb +44 -0
- data/spec/generators/dataframe_spec.rb +44 -0
- data/spec/generators/maf_spec.rb +40 -0
- data/spec/generators/r_cross_spec.rb +51 -0
- data/spec/generators/r_matrix_spec.rb +44 -0
- data/spec/length_lookup_spec.rb +0 -0
- data/spec/maf_query_spec.rb +343 -0
- data/spec/metadata/metadata_dsl_spec.rb +68 -0
- data/spec/prov/activity_spec.rb +74 -0
- data/spec/prov/agent_spec.rb +54 -0
- data/spec/prov/association_spec.rb +55 -0
- data/spec/prov/config_spec.rb +28 -0
- data/spec/prov/derivation_spec.rb +30 -0
- data/spec/prov/entity_spec.rb +52 -0
- data/spec/prov/role_spec.rb +94 -0
- data/spec/prov/usage_spec.rb +98 -0
- data/spec/queries/integrity/1.rq +21 -0
- data/spec/queries/integrity/11.rq +29 -0
- data/spec/queries/integrity/12.rq +37 -0
- data/spec/queries/integrity/14.rq +25 -0
- data/spec/queries/integrity/19_1.rq +21 -0
- data/spec/queries/integrity/19_2.rq +15 -0
- data/spec/queries/integrity/2.rq +22 -0
- data/spec/queries/integrity/3.rq +19 -0
- data/spec/queries/integrity/4.rq +13 -0
- data/spec/queries/integrity/5.rq +14 -0
- data/spec/r_builder_spec.rb +33 -0
- data/spec/resource/.RData +0 -0
- data/spec/resource/example.Rhistory +3 -0
- data/spec/spec_helper.rb +17 -0
- data/spec/turtle/bacon +147 -0
- data/spec/turtle/reference +2064 -0
- data/spec/turtle/weather +275 -0
- data/spec/writer_spec.rb +75 -0
- metadata +589 -0
@@ -0,0 +1,112 @@
|
|
1
|
+
module PubliSci
|
2
|
+
module Readers
|
3
|
+
class RCross
|
4
|
+
include PubliSci::Dataset::DataCube
|
5
|
+
include PubliSci::Readers::Output
|
6
|
+
|
7
|
+
def generate_n3(client, var, outfile_base, options={})
|
8
|
+
meas = measures(client,var,options)
|
9
|
+
dim = dimensions(client,var,options)
|
10
|
+
codes = codes(client,var,options)
|
11
|
+
|
12
|
+
#write structure
|
13
|
+
open(outfile_base+'_structure.ttl','w'){|f| f.write structure(client,var,options)}
|
14
|
+
|
15
|
+
n_individuals = client.eval("length(#{var}$pheno[[1]])").payload.first
|
16
|
+
chromosome_list = (1..19).to_a.map(&:to_s) + ["X"]
|
17
|
+
chromosome_list.map{|chrom|
|
18
|
+
open(outfile_base+"_#{chrom}.ttl",'w'){|f| f.write prefixes(var,options)}
|
19
|
+
entries_per_individual = client.eval("length(#{var}$geno$'#{chrom}'$map)").to_ruby
|
20
|
+
|
21
|
+
#get genotype data (currently only for chromosome 1)
|
22
|
+
geno_chr = client.eval("#{var}$geno$'#{chrom}'")
|
23
|
+
|
24
|
+
#get number of markers per individual
|
25
|
+
|
26
|
+
#write observations
|
27
|
+
n_individuals.times{|indi|
|
28
|
+
obs_data = observation_data(client,var,chrom.to_s,indi,geno_chr,entries_per_individual,options)
|
29
|
+
labels = labels_for(obs_data,chrom.to_s,indi)
|
30
|
+
open(outfile_base+"_#{chrom}.ttl",'a'){|f| observations(meas,dim,codes,obs_data,labels,var,options).map{|obs| f.write obs}}
|
31
|
+
puts "(#{chrom}) #{indi}/#{n_individuals}" unless options[:quiet]
|
32
|
+
}
|
33
|
+
}
|
34
|
+
|
35
|
+
end
|
36
|
+
|
37
|
+
def structure(client,var,options={})
|
38
|
+
meas = measures(client,var,options)
|
39
|
+
dim = dimensions(client,var,options)
|
40
|
+
codes = codes(client,var,options)
|
41
|
+
|
42
|
+
str = prefixes(var,options)
|
43
|
+
str << data_structure_definition(meas,dim,codes,var,options)
|
44
|
+
str << dataset(var,options)
|
45
|
+
component_specifications(meas, dim, codes, var, options).map{ |c| str << c }
|
46
|
+
measure_properties(meas,var,options).map{|m| str << m}
|
47
|
+
|
48
|
+
str
|
49
|
+
end
|
50
|
+
|
51
|
+
def measures(client, var, options={})
|
52
|
+
pheno_names = client.eval("names(#{var}$pheno)").to_ruby
|
53
|
+
if options[:measures]
|
54
|
+
(pheno_names & options[:measures]) | ["genotype","markerpos","marker"]
|
55
|
+
else
|
56
|
+
pheno_names | ["genotype","markerpos","marker"]
|
57
|
+
end
|
58
|
+
# measure_properties(measures,var,options)
|
59
|
+
end
|
60
|
+
|
61
|
+
def dimensions(client, var, options={})
|
62
|
+
# dimension_properties([""],var)
|
63
|
+
[]
|
64
|
+
end
|
65
|
+
|
66
|
+
def codes(client, var, options={})
|
67
|
+
[]
|
68
|
+
end
|
69
|
+
|
70
|
+
def labels_for(data,chr,individual,options={})
|
71
|
+
labels=(((data.first.last.size*individual)+1)..(data.first.last.size*(individual+1))).to_a.map(&:to_s)
|
72
|
+
labels.map{|l| l.insert(0,"#{chr}_")}
|
73
|
+
labels
|
74
|
+
end
|
75
|
+
|
76
|
+
def observation_data(client, var, chr, row_individ, geno_chr, entries_per_individual, options={})
|
77
|
+
data = {}
|
78
|
+
|
79
|
+
data["chr"] = []
|
80
|
+
data["genotype"] = []
|
81
|
+
data["individual"] = []
|
82
|
+
data["marker"] = []
|
83
|
+
data["markerpos"] = []
|
84
|
+
|
85
|
+
pheno_names = client.eval("names(#{var}$pheno)").to_ruby
|
86
|
+
pheno_names.map{|name|
|
87
|
+
data[name] = []
|
88
|
+
}
|
89
|
+
data["individual"] << (1..entries_per_individual).to_a.fill(row_individ)
|
90
|
+
|
91
|
+
pheno_names.map{|name|
|
92
|
+
data[name] << (1..entries_per_individual).to_a.fill(client.eval("#{var}$pheno$#{name}").to_ruby[row_individ])
|
93
|
+
}
|
94
|
+
|
95
|
+
num_markers = geno_chr.payload.first.to_ruby.column_size
|
96
|
+
data["chr"] << (1..num_markers).to_a.fill(chr)
|
97
|
+
data["genotype"] << geno_chr.payload["data"].to_ruby.row(row_individ).to_a
|
98
|
+
data["marker"] << client.eval("names(#{var}$geno$'#{chr}'$map)").payload
|
99
|
+
data["markerpos"] << geno_chr.payload["map"].to_a
|
100
|
+
|
101
|
+
data.map{|k,v| v.flatten!}
|
102
|
+
data
|
103
|
+
end
|
104
|
+
|
105
|
+
def num_individuals(client, var, options={})
|
106
|
+
client.eval("#{var}$pheno").payload.first.to_ruby.size
|
107
|
+
end
|
108
|
+
|
109
|
+
|
110
|
+
end
|
111
|
+
end
|
112
|
+
end
|
@@ -0,0 +1,176 @@
|
|
1
|
+
module PubliSci
|
2
|
+
module Readers
|
3
|
+
class RMatrix
|
4
|
+
include PubliSci::Dataset::DataCube
|
5
|
+
|
6
|
+
#NOTE; this is pretty much hard coded for Karl's application right now, and doesn't
|
7
|
+
# do any dimension or code generation. Since its a set of LOD scores indexed by dimension
|
8
|
+
# and marker the usual datacube generator wont work (I think). In the future adding an option
|
9
|
+
# to specify this kind of a dataset would probably be useful
|
10
|
+
|
11
|
+
|
12
|
+
def generate_n3(client, var, outfile_base, options={})
|
13
|
+
meas = measures(client,var,options)
|
14
|
+
dim = dimensions(client,var,options)
|
15
|
+
codes = codes(client,var,options)
|
16
|
+
|
17
|
+
outvar = sanitize([var]).first
|
18
|
+
|
19
|
+
probes_per_file = options[:probes_per_file] || 100
|
20
|
+
col_select = "colnames"
|
21
|
+
col_select = "names" if options[:type] == :dataframe
|
22
|
+
|
23
|
+
#write structure
|
24
|
+
open(outfile_base+'_structure.ttl','w'){|f| f.write structure(client,var,outvar,options)}
|
25
|
+
|
26
|
+
probes=client.eval("#{col_select}(#{var})").to_ruby
|
27
|
+
if probes == nil
|
28
|
+
client.eval("colnames(#{var})=1:ncol(#{var})")
|
29
|
+
probes=client.eval("#{col_select}(#{var})").to_ruby
|
30
|
+
end
|
31
|
+
markers = rows(client,var,options)
|
32
|
+
|
33
|
+
if options[:print]
|
34
|
+
puts prefixes(var,options)
|
35
|
+
end
|
36
|
+
|
37
|
+
if options[:output] == :string
|
38
|
+
str = prefixes(var,options)
|
39
|
+
end
|
40
|
+
|
41
|
+
probes.each_with_index{|probe,i|
|
42
|
+
#write prefixes and erase old file on first run
|
43
|
+
unless options[:print] || options[:output] == :string
|
44
|
+
open(outfile_base+"_#{i/probes_per_file}.ttl",'w'){|f| f.write prefixes(var,options)} if i % probes_per_file == 0
|
45
|
+
end
|
46
|
+
i+=1
|
47
|
+
obs_data = observation_data(client,var,i,markers,options)
|
48
|
+
labels = labels_for(client,var,probe)
|
49
|
+
|
50
|
+
# labels = sanitize(labels)
|
51
|
+
# return obs_data
|
52
|
+
if options[:print]
|
53
|
+
observations(meas,dim,codes,obs_data,labels,outvar,options).each{|obs| puts obs}
|
54
|
+
end
|
55
|
+
|
56
|
+
if options[:output] == :string
|
57
|
+
observations(meas,dim,codes,obs_data,labels,outvar,options).each{|obs| str << obs}
|
58
|
+
end
|
59
|
+
|
60
|
+
unless options[:print] || options[:output] == :string
|
61
|
+
open(outfile_base+"_#{i/probes_per_file}.ttl",'a'){|f| observations(meas,dim,codes,obs_data,labels,outvar,options).map{|obs| f.write obs}}
|
62
|
+
puts "#{i}/#{probes.size}" unless options[:quiet]
|
63
|
+
end
|
64
|
+
}
|
65
|
+
|
66
|
+
if options[:output] == :string
|
67
|
+
str
|
68
|
+
end
|
69
|
+
end
|
70
|
+
|
71
|
+
def structure(client,var,outvar,options={})
|
72
|
+
meas = measures(client,var,options)
|
73
|
+
dim = dimensions(client,var,options)
|
74
|
+
codes = codes(client,var,options)
|
75
|
+
|
76
|
+
str = prefixes(var, options)
|
77
|
+
str << data_structure_definition(meas,[],codes,outvar,options)
|
78
|
+
str << dataset(outvar,options)
|
79
|
+
component_specifications(meas, dim, codes, var, options).map{ |c| str << c }
|
80
|
+
measure_properties(meas,var,options).map{|m| str << m}
|
81
|
+
|
82
|
+
str
|
83
|
+
end
|
84
|
+
|
85
|
+
#for now just make everything a measure
|
86
|
+
def measures(client, var, options={})
|
87
|
+
if options[:measures]
|
88
|
+
options[:measures]
|
89
|
+
else
|
90
|
+
["probe","marker","value"]
|
91
|
+
end
|
92
|
+
# measure_properties(measures,var,options)
|
93
|
+
end
|
94
|
+
|
95
|
+
def dimensions(client, var, options={})
|
96
|
+
# dimension_properties([""],var)
|
97
|
+
[]
|
98
|
+
end
|
99
|
+
|
100
|
+
def codes(client, var, options={})
|
101
|
+
[]
|
102
|
+
end
|
103
|
+
|
104
|
+
def labels_for(connection,var,probe_id,options={})
|
105
|
+
row_names = connection.eval("row.names(#{var})")
|
106
|
+
# row_names = (1..@rexp.payload.first.to_ruby.size).to_a unless row_names.first
|
107
|
+
if row_names == connection.eval('NULL')
|
108
|
+
row_names = (1..connection.eval("nrow(#{var})").payload.first).to_a
|
109
|
+
else
|
110
|
+
row_names = row_names.payload
|
111
|
+
end
|
112
|
+
|
113
|
+
labels = (1..(row_names.size)).to_a.map(&:to_s)
|
114
|
+
labels = labels.map{|l|
|
115
|
+
l.insert(0,probe_id.to_s + "_")
|
116
|
+
}
|
117
|
+
|
118
|
+
labels
|
119
|
+
end
|
120
|
+
|
121
|
+
def rows(connection,var,options={})
|
122
|
+
row_names = connection.eval("row.names(#{var})")
|
123
|
+
#hacky solution because rserve client's .to_ruby method doesn't fully work
|
124
|
+
if row_names == connection.eval('NULL')
|
125
|
+
row_names = (1..connection.eval("nrow(#{var})").payload.first).to_a
|
126
|
+
else
|
127
|
+
row_names = row_names.payload
|
128
|
+
end
|
129
|
+
row_names
|
130
|
+
end
|
131
|
+
|
132
|
+
def observation_data(client, var, probe_number, row_names, options={})
|
133
|
+
|
134
|
+
data = {}
|
135
|
+
# geno_chr = client.eval("#{var}$geno$'#{chr}'")
|
136
|
+
# n_individuals = client.eval("#{var}$pheno[[1]]").to_ruby.size
|
137
|
+
# entries_per_individual = @rexp.payload["geno"].payload[row_individ].payload["map"].payload.size * @rexp.payload["geno"].payload.names.size
|
138
|
+
col_label = "probe"
|
139
|
+
row_label = "marker"
|
140
|
+
val_label = "value"
|
141
|
+
|
142
|
+
if options[:measures]
|
143
|
+
col_label = options[:measures][0] || "probe"
|
144
|
+
row_label = options[:measures][1] || "marker"
|
145
|
+
val_label = options[:measures][2] || "value"
|
146
|
+
end
|
147
|
+
|
148
|
+
data["#{col_label}"] = []
|
149
|
+
data["#{row_label}"] = []
|
150
|
+
data["#{val_label}"] = []
|
151
|
+
|
152
|
+
# n_individuals.times{|row_individ|
|
153
|
+
# puts "#{row_individ}/#{n_individuals}"
|
154
|
+
|
155
|
+
col_select = "colnames"
|
156
|
+
col_select = "names" if options[:type] == :dataframe
|
157
|
+
|
158
|
+
if options[:type] == :dataframe
|
159
|
+
probe_obj = client.eval("#{var}[[#{probe_number}]]").to_ruby
|
160
|
+
else
|
161
|
+
probe_obj = client.eval("#{var}[,#{probe_number}]").to_ruby
|
162
|
+
end
|
163
|
+
# puts probe_obj
|
164
|
+
probe_id = client.eval("#{col_select}(#{var})[[#{probe_number}]]").to_ruby
|
165
|
+
data["#{col_label}"] = (1..(probe_obj.size)).to_a.fill(probe_id)
|
166
|
+
probe_obj.each_with_index{|lod,i|
|
167
|
+
data["#{row_label}"] << row_names[i]
|
168
|
+
data["#{val_label}"] << lod
|
169
|
+
}
|
170
|
+
|
171
|
+
data.map{|k,v| v.flatten!}
|
172
|
+
data
|
173
|
+
end
|
174
|
+
end
|
175
|
+
end
|
176
|
+
end
|
@@ -0,0 +1,56 @@
|
|
1
|
+
module PubliSci
|
2
|
+
# handles connection and messaging to/from the triple store
|
3
|
+
class Store
|
4
|
+
include PubliSci::Query
|
5
|
+
|
6
|
+
def defaults
|
7
|
+
{
|
8
|
+
type: :fourstore,
|
9
|
+
url: "http://localhost:8080", #TODO port etc should eventually be extracted from URI if given
|
10
|
+
replace: false
|
11
|
+
}
|
12
|
+
end
|
13
|
+
|
14
|
+
def add(file,graph)
|
15
|
+
if @options[:type] == :graph
|
16
|
+
throw "please provide an RDF::Repository" unless graph.is_a? RDF::Repository
|
17
|
+
graph.load(file)
|
18
|
+
@store = graph
|
19
|
+
@store
|
20
|
+
elsif @options[:type] == :fourstore
|
21
|
+
if @options[:replace]
|
22
|
+
`curl -T #{file} -H 'Content-Type: application/x-turtle' #{@options[:url]}/data/http%3A%2F%2Frqtl.org%2F#{graph}`
|
23
|
+
else
|
24
|
+
`curl --data-urlencode data@#{file} -d 'graph=http%3A%2F%2Frqtl.org%2F#{graph}' -d 'mime-type=application/x-turtle' #{@options[:url]}/data/`
|
25
|
+
end
|
26
|
+
end
|
27
|
+
end
|
28
|
+
|
29
|
+
def add_all(dir, graph, pattern=nil)
|
30
|
+
pattern = /.+\.ttl/ if pattern == :turtle || pattern == :ttl
|
31
|
+
|
32
|
+
files = Dir.entries(dir) - %w(. ..)
|
33
|
+
files = files.grep(pattern) if pattern.is_a? Regexp
|
34
|
+
nfiles = files.size
|
35
|
+
n = 0
|
36
|
+
files.each{|file| puts file + " #{n+=1}/#{nfiles} files"; puts add(file,graph)}
|
37
|
+
end
|
38
|
+
|
39
|
+
def initialize(options={})
|
40
|
+
@options = defaults.merge(options)
|
41
|
+
end
|
42
|
+
|
43
|
+
def query(string)
|
44
|
+
# execute(string, )
|
45
|
+
if @options[:type] == :graph
|
46
|
+
execute(string, @store, :graph)
|
47
|
+
elsif @options[:type] == :fourstore
|
48
|
+
execute(string, @options[:url], :fourstore)
|
49
|
+
end
|
50
|
+
end
|
51
|
+
|
52
|
+
def url
|
53
|
+
@options[:url]
|
54
|
+
end
|
55
|
+
end
|
56
|
+
end
|
@@ -0,0 +1,91 @@
|
|
1
|
+
module PubliSci
|
2
|
+
module Writers
|
3
|
+
class ARFF < Base
|
4
|
+
# include PubliSci::Query
|
5
|
+
# include PubliSci::Parser
|
6
|
+
# include PubliSci::Analyzer
|
7
|
+
|
8
|
+
def build_arff(relation, attributes, data, source)
|
9
|
+
str = <<-EOS
|
10
|
+
% 1. Title: #{relation.capitalize} Database
|
11
|
+
%
|
12
|
+
% 2. Sources:
|
13
|
+
% (a) Generated from RDF source #{source}
|
14
|
+
%
|
15
|
+
@RELATION #{relation}
|
16
|
+
|
17
|
+
EOS
|
18
|
+
|
19
|
+
Hash[attributes.sort].map{|attribute,type|
|
20
|
+
str << "@ATTRIBUTE #{attribute} #{type}\n"
|
21
|
+
}
|
22
|
+
|
23
|
+
str << "\n@DATA\n"
|
24
|
+
data.map { |d| str << Hash[d[1].sort].values.join(',') + "\n" }
|
25
|
+
|
26
|
+
str
|
27
|
+
end
|
28
|
+
|
29
|
+
def from_turtle(turtle_file, verbose=false)
|
30
|
+
puts "loading #{turtle_file}" if verbose
|
31
|
+
repo = RDF::Repository.load(turtle_file)
|
32
|
+
puts "loaded #{repo.size} statements into temporary repo" if verbose
|
33
|
+
|
34
|
+
dims = dimensions(repo)
|
35
|
+
meas = measures(repo)
|
36
|
+
data = observations(repo)
|
37
|
+
|
38
|
+
relation = dataSet(repo)
|
39
|
+
codes = codes(repo)
|
40
|
+
|
41
|
+
attributes = {}
|
42
|
+
|
43
|
+
(dims | meas).map{|component|
|
44
|
+
attributes[component] = case recommend_range(data.map{|o| o[1][component]})
|
45
|
+
when "xsd:int"
|
46
|
+
"integer"
|
47
|
+
when "xsd:double"
|
48
|
+
"real"
|
49
|
+
when :coded
|
50
|
+
if dims.include? component
|
51
|
+
"{#{codes[component].join(', ')}}"
|
52
|
+
else
|
53
|
+
"string"
|
54
|
+
end
|
55
|
+
end
|
56
|
+
}
|
57
|
+
|
58
|
+
build_arff(relation, attributes, data, turtle_file)
|
59
|
+
end
|
60
|
+
|
61
|
+
def from_store(repo, dataset=nil, title=nil, verbose=false)
|
62
|
+
# data = observation_hash(execute_from_file("observations.rq",repo,:graph,{"%{dataSet}"=>"<#{dataSet}>"}), true)
|
63
|
+
|
64
|
+
dims = dimensions(repo,dataset)
|
65
|
+
meas = measures(repo,dataset)
|
66
|
+
data = observations(repo,dataset)
|
67
|
+
codes = codes(repo,dataset)
|
68
|
+
attributes = {}
|
69
|
+
|
70
|
+
(dims | meas).map{|component|
|
71
|
+
attributes[component] = case recommend_range(data.map{|o| o[1][component]})
|
72
|
+
when "xsd:int"
|
73
|
+
"integer"
|
74
|
+
when "xsd:double"
|
75
|
+
"real"
|
76
|
+
when :coded
|
77
|
+
if dims.include? component
|
78
|
+
"{#{codes[component].join(', ')}}"
|
79
|
+
else
|
80
|
+
"string"
|
81
|
+
end
|
82
|
+
end
|
83
|
+
}
|
84
|
+
|
85
|
+
dataset = dataSet(repo) unless dataset
|
86
|
+
title = dataset unless title
|
87
|
+
build_arff(title,attributes,data,dataset)
|
88
|
+
end
|
89
|
+
end
|
90
|
+
end
|
91
|
+
end
|
@@ -0,0 +1,93 @@
|
|
1
|
+
module PubliSci
|
2
|
+
module Writers
|
3
|
+
class Base
|
4
|
+
include PubliSci::Query
|
5
|
+
include PubliSci::Parser
|
6
|
+
include PubliSci::Analyzer
|
7
|
+
|
8
|
+
def handle_input(input)
|
9
|
+
if input.is_a? String
|
10
|
+
if File.exist? input
|
11
|
+
RDF::Repository.load(input)
|
12
|
+
else
|
13
|
+
raise "UnkownStringInput: #{input}"
|
14
|
+
end
|
15
|
+
elsif input.is_a? RDF::Repository
|
16
|
+
input
|
17
|
+
else
|
18
|
+
raise "UnkownInput: #{input}, #{input.class}"
|
19
|
+
end
|
20
|
+
end
|
21
|
+
|
22
|
+
def dimensions(input, data_set=nil, select=:label)
|
23
|
+
repo = handle_input(input)
|
24
|
+
|
25
|
+
if data_set
|
26
|
+
dims = execute_from_file("dimensions.rq",repo,:graph,{"?dataSet"=>"<#{data_set}>"})
|
27
|
+
else
|
28
|
+
dims = execute_from_file("dimensions.rq",repo,:graph)
|
29
|
+
end
|
30
|
+
|
31
|
+
dims.to_h.map{|d| d[select].to_s}
|
32
|
+
end
|
33
|
+
|
34
|
+
def measures(input, data_set=nil, select=:label)
|
35
|
+
repo = handle_input(input)
|
36
|
+
|
37
|
+
if data_set
|
38
|
+
meas = execute_from_file("measures.rq",repo,:graph,{"?dataSet"=>"<#{data_set}>"})
|
39
|
+
else
|
40
|
+
meas = execute_from_file("measures.rq",repo,:graph)
|
41
|
+
end
|
42
|
+
|
43
|
+
meas.to_h.map{|d| d[select].to_s}
|
44
|
+
end
|
45
|
+
|
46
|
+
def observations(input, data_set = nil, shorten_url = true)
|
47
|
+
repo = handle_input(input)
|
48
|
+
|
49
|
+
if data_set
|
50
|
+
obs = execute_from_file("observations.rq",repo,:graph,{"?dataSet"=>"<#{data_set}>"})
|
51
|
+
else
|
52
|
+
obs = execute_from_file("observations.rq",repo,:graph)
|
53
|
+
end
|
54
|
+
|
55
|
+
observation_hash(obs,shorten_url)
|
56
|
+
end
|
57
|
+
|
58
|
+
def dataSet(input, select = :label)
|
59
|
+
repo = handle_input(input)
|
60
|
+
|
61
|
+
execute_from_file("dataset.rq",repo,:graph).to_h.first[select].to_s
|
62
|
+
end
|
63
|
+
|
64
|
+
def codes(input, data_set = nil, select = :label)
|
65
|
+
repo = handle_input(input)
|
66
|
+
|
67
|
+
if data_set
|
68
|
+
codes = execute_from_file("codes.rq",repo,:graph,{"?dataSet"=>"<#{data_set}>"}).to_h
|
69
|
+
else
|
70
|
+
codes = execute_from_file("codes.rq",repo,:graph).to_h
|
71
|
+
end
|
72
|
+
codes.map{|c| c.values.map(&:to_s)}.inject({}){|h,el|
|
73
|
+
(h[el.first]||=[]) << el.last; h
|
74
|
+
}
|
75
|
+
end
|
76
|
+
|
77
|
+
def turtle_to_ruby(turtle_file, select_dataset=nil, shorten_url=true)
|
78
|
+
repo = RDF::Repository.load(turtle_file)
|
79
|
+
|
80
|
+
repo_to_ruby(repo,select_dataset,shorten_url)
|
81
|
+
end
|
82
|
+
|
83
|
+
def repo_to_ruby(repo,select_dataset=nil, shorten_url=true)
|
84
|
+
select_dataset = dataSet(repo,:dataset) unless select_dataset
|
85
|
+
dims = dimensions(repo,select_dataset)
|
86
|
+
meas = measures(repo,select_dataset)
|
87
|
+
codes = codes(repo,select_dataset)
|
88
|
+
data = observations(repo,select_dataset,shorten_url)
|
89
|
+
{measures: meas, dimensions: dims, coded_dimensions: codes, data: data}
|
90
|
+
end
|
91
|
+
end
|
92
|
+
end
|
93
|
+
end
|