bio-publisci 0.0.1
Sign up to get free protection for your applications and to get access to all the features.
- data/.document +5 -0
- data/.rspec +1 -0
- data/.travis.yml +13 -0
- data/Gemfile +24 -0
- data/LICENSE.txt +20 -0
- data/README.md +47 -0
- data/README.rdoc +48 -0
- data/Rakefile +70 -0
- data/bin/bio-publisci +83 -0
- data/features/create_generator.feature +25 -0
- data/features/integration.feature +12 -0
- data/features/integration_steps.rb +10 -0
- data/features/orm.feature +60 -0
- data/features/orm_steps.rb +74 -0
- data/features/reader.feature +25 -0
- data/features/reader_steps.rb +60 -0
- data/features/step_definitions/bio-publisci_steps.rb +0 -0
- data/features/store.feature +27 -0
- data/features/store_steps.rb +42 -0
- data/features/support/env.rb +13 -0
- data/features/writer.feature +9 -0
- data/features/writer_steps.rb +17 -0
- data/lib/bio-publisci/analyzer.rb +57 -0
- data/lib/bio-publisci/dataset/ORM/data_cube_orm.rb +219 -0
- data/lib/bio-publisci/dataset/ORM/observation.rb +20 -0
- data/lib/bio-publisci/dataset/data_cube.rb +308 -0
- data/lib/bio-publisci/dataset/interactive.rb +57 -0
- data/lib/bio-publisci/loader.rb +36 -0
- data/lib/bio-publisci/metadata/metadata.rb +105 -0
- data/lib/bio-publisci/parser.rb +64 -0
- data/lib/bio-publisci/query/query_helper.rb +114 -0
- data/lib/bio-publisci/r_client.rb +54 -0
- data/lib/bio-publisci/readers/arff.rb +87 -0
- data/lib/bio-publisci/readers/big_cross.rb +119 -0
- data/lib/bio-publisci/readers/cross.rb +72 -0
- data/lib/bio-publisci/readers/csv.rb +54 -0
- data/lib/bio-publisci/readers/dataframe.rb +66 -0
- data/lib/bio-publisci/readers/r_matrix.rb +152 -0
- data/lib/bio-publisci/store.rb +56 -0
- data/lib/bio-publisci/writers/arff.rb +66 -0
- data/lib/bio-publisci/writers/dataframe.rb +81 -0
- data/lib/bio-publisci.rb +36 -0
- data/lib/r2rdf.rb +226 -0
- data/lib/template_bak/publisci.rb +3 -0
- data/lib/template_bak.rb +12 -0
- data/lib/vocabs/cc.rb +18 -0
- data/lib/vocabs/cert.rb +13 -0
- data/lib/vocabs/dc.rb +63 -0
- data/lib/vocabs/dc11.rb +23 -0
- data/lib/vocabs/doap.rb +45 -0
- data/lib/vocabs/exif.rb +168 -0
- data/lib/vocabs/foaf.rb +69 -0
- data/lib/vocabs/geo.rb +13 -0
- data/lib/vocabs/http.rb +26 -0
- data/lib/vocabs/ma.rb +78 -0
- data/lib/vocabs/owl.rb +59 -0
- data/lib/vocabs/rdfs.rb +17 -0
- data/lib/vocabs/rsa.rb +12 -0
- data/lib/vocabs/rss.rb +14 -0
- data/lib/vocabs/sioc.rb +93 -0
- data/lib/vocabs/skos.rb +36 -0
- data/lib/vocabs/wot.rb +21 -0
- data/lib/vocabs/xhtml.rb +9 -0
- data/lib/vocabs/xsd.rb +58 -0
- data/resources/queries/codes.rq +13 -0
- data/resources/queries/dataset.rq +7 -0
- data/resources/queries/dimension_ranges.rq +8 -0
- data/resources/queries/dimensions.rq +7 -0
- data/resources/queries/measures.rq +7 -0
- data/resources/queries/observations.rq +12 -0
- data/resources/queries/test.rq +3 -0
- data/resources/weather.numeric.arff +23 -0
- data/spec/analyzer_spec.rb +36 -0
- data/spec/bio-publisci_spec.rb +7 -0
- data/spec/csv/bacon.csv +4 -0
- data/spec/csv/moar_bacon.csv +11 -0
- data/spec/data_cube_spec.rb +166 -0
- data/spec/generators/csv_spec.rb +44 -0
- data/spec/generators/dataframe_spec.rb +44 -0
- data/spec/generators/r_matrix_spec.rb +35 -0
- data/spec/queries/integrity/1.rq +21 -0
- data/spec/queries/integrity/11.rq +29 -0
- data/spec/queries/integrity/12.rq +37 -0
- data/spec/queries/integrity/14.rq +25 -0
- data/spec/queries/integrity/19_1.rq +21 -0
- data/spec/queries/integrity/19_2.rq +15 -0
- data/spec/queries/integrity/2.rq +22 -0
- data/spec/queries/integrity/3.rq +19 -0
- data/spec/queries/integrity/4.rq +13 -0
- data/spec/queries/integrity/5.rq +14 -0
- data/spec/r_builder_spec.rb +33 -0
- data/spec/spec_helper.rb +17 -0
- data/spec/turtle/bacon +149 -0
- data/spec/turtle/reference +2066 -0
- metadata +259 -0
@@ -0,0 +1,54 @@
|
|
1
|
+
module R2RDF
|
2
|
+
module Reader
|
3
|
+
class CSV
|
4
|
+
include R2RDF::Dataset::DataCube
|
5
|
+
|
6
|
+
def generate_n3(file, dataset_name, options={})
|
7
|
+
@data = ::CSV.read(file)
|
8
|
+
@options = options
|
9
|
+
generate(measures, dimensions, codes, observation_data, observation_labels, dataset_name, options)
|
10
|
+
end
|
11
|
+
|
12
|
+
def dimensions
|
13
|
+
@options[:dimensions] || [@data[0][0]]
|
14
|
+
end
|
15
|
+
|
16
|
+
def codes
|
17
|
+
@options[:codes] || dimensions()
|
18
|
+
end
|
19
|
+
|
20
|
+
def measures
|
21
|
+
@options[:measures] || @data[0] - dimensions()
|
22
|
+
end
|
23
|
+
|
24
|
+
def observation_labels
|
25
|
+
if @options[:label_column]
|
26
|
+
tmp = @data.dup
|
27
|
+
tmp.shift
|
28
|
+
tmp.map{|row|
|
29
|
+
row[@options[:label_column]]
|
30
|
+
}
|
31
|
+
else
|
32
|
+
(1..@data.size - 1).to_a
|
33
|
+
end
|
34
|
+
end
|
35
|
+
|
36
|
+
def observation_data
|
37
|
+
|
38
|
+
obs = {}
|
39
|
+
@data[0].map{|label|
|
40
|
+
obs[label] = []
|
41
|
+
}
|
42
|
+
tmp = @data.dup
|
43
|
+
tmp.shift
|
44
|
+
|
45
|
+
tmp.map{|row|
|
46
|
+
row.each_with_index{|entry,i|
|
47
|
+
obs[@data[0][i]] << entry
|
48
|
+
}
|
49
|
+
}
|
50
|
+
obs
|
51
|
+
end
|
52
|
+
end
|
53
|
+
end
|
54
|
+
end
|
@@ -0,0 +1,66 @@
|
|
1
|
+
module R2RDF
|
2
|
+
module Reader
|
3
|
+
class Dataframe
|
4
|
+
include R2RDF::Dataset::DataCube
|
5
|
+
|
6
|
+
# def initialize(var)
|
7
|
+
# @var = var
|
8
|
+
# end
|
9
|
+
|
10
|
+
def generate_n3(rexp, var, options={})
|
11
|
+
@rexp = rexp
|
12
|
+
@options = options
|
13
|
+
|
14
|
+
generate(measures, dimensions, codes, observation_data, observation_labels, var, options)
|
15
|
+
end
|
16
|
+
|
17
|
+
def dimensions
|
18
|
+
if @options[:dimensions]
|
19
|
+
@options[:dimensions]
|
20
|
+
elsif @options[:row_label]
|
21
|
+
[@options[:row_label]]
|
22
|
+
else
|
23
|
+
["refRow"]
|
24
|
+
end
|
25
|
+
end
|
26
|
+
|
27
|
+
def codes
|
28
|
+
if @options[:codes]
|
29
|
+
@options[:codes]
|
30
|
+
elsif @options[:row_label]
|
31
|
+
[@options[:row_label]]
|
32
|
+
else
|
33
|
+
["refRow"]
|
34
|
+
end
|
35
|
+
end
|
36
|
+
|
37
|
+
def measures
|
38
|
+
if @options[:dimensions]
|
39
|
+
if @options[:measures]
|
40
|
+
@options[:measures] - @options[:dimensions]
|
41
|
+
else
|
42
|
+
@rexp.payload.names - @options[:dimensions]
|
43
|
+
end
|
44
|
+
else
|
45
|
+
@options[:measures] || @rexp.payload.names
|
46
|
+
end
|
47
|
+
end
|
48
|
+
|
49
|
+
def observation_labels
|
50
|
+
row_names = @rexp.attr.payload["row.names"].to_ruby
|
51
|
+
row_names = (1..@rexp.payload.first.to_ruby.size).to_a unless row_names.first
|
52
|
+
row_names
|
53
|
+
end
|
54
|
+
|
55
|
+
def observation_data
|
56
|
+
|
57
|
+
data = {}
|
58
|
+
@rexp.payload.names.map{|name|
|
59
|
+
data[name] = @rexp.payload[name].to_ruby
|
60
|
+
}
|
61
|
+
data[@options[:row_label] || "refRow"] = observation_labels()
|
62
|
+
data
|
63
|
+
end
|
64
|
+
end
|
65
|
+
end
|
66
|
+
end
|
@@ -0,0 +1,152 @@
|
|
1
|
+
module R2RDF
|
2
|
+
module Reader
|
3
|
+
class RMatrix
|
4
|
+
include R2RDF::Dataset::DataCube
|
5
|
+
|
6
|
+
#NOTE; this is pretty much hard coded for Karl's application right now, and doesn't
|
7
|
+
# do any dimension or code generation. Since its a set of LOD scores indexed by dimension
|
8
|
+
# and marker the usual datacube generator wont work (I think). In the future adding an option
|
9
|
+
# to specify this kind of a dataset would probably be useful
|
10
|
+
|
11
|
+
|
12
|
+
def generate_n3(client, var, outfile_base, options={})
|
13
|
+
meas = measures(client,var,options)
|
14
|
+
dim = dimensions(client,var,options)
|
15
|
+
codes = codes(client,var,options)
|
16
|
+
|
17
|
+
outvar = sanitize([var]).first
|
18
|
+
|
19
|
+
probes_per_file = options[:probes_per_file] || 100
|
20
|
+
col_select = "colnames"
|
21
|
+
col_select = "names" if options[:type] == :dataframe
|
22
|
+
|
23
|
+
#write structure
|
24
|
+
open(outfile_base+'_structure.ttl','w'){|f| f.write structure(client,var,outvar,options)}
|
25
|
+
|
26
|
+
probes=client.eval("#{col_select}(#{var})").to_ruby
|
27
|
+
if probes == nil
|
28
|
+
client.eval("colnames(#{var})=1:ncol(#{var})")
|
29
|
+
probes=client.eval("#{col_select}(#{var})").to_ruby
|
30
|
+
end
|
31
|
+
markers = rows(client,var,options)
|
32
|
+
|
33
|
+
probes.each_with_index{|probe,i|
|
34
|
+
#write prefixes and erase old file on first run
|
35
|
+
open(outfile_base+"_#{i/probes_per_file}.ttl",'w'){|f| f.write prefixes(var,options)} if i % probes_per_file == 0
|
36
|
+
i+=1
|
37
|
+
obs_data = observation_data(client,var,i,markers,options)
|
38
|
+
labels = labels_for(client,var,probe)
|
39
|
+
|
40
|
+
# labels = sanitize(labels)
|
41
|
+
# return obs_data
|
42
|
+
open(outfile_base+"_#{i/probes_per_file}.ttl",'a'){|f| observations(meas,dim,codes,obs_data,labels,outvar,options).map{|obs| f.write obs}}
|
43
|
+
puts "#{i}/#{probes.size}" unless options[:quiet]
|
44
|
+
}
|
45
|
+
end
|
46
|
+
|
47
|
+
def structure(client,var,outvar,options={})
|
48
|
+
meas = measures(client,var,options)
|
49
|
+
dim = dimensions(client,var,options)
|
50
|
+
codes = codes(client,var,options)
|
51
|
+
|
52
|
+
str = prefixes(var, options)
|
53
|
+
str << data_structure_definition(meas,outvar,options)
|
54
|
+
str << dataset(outvar,options)
|
55
|
+
component_specifications(meas, dim, var, options).map{ |c| str << c }
|
56
|
+
measure_properties(meas,var,options).map{|m| str << m}
|
57
|
+
|
58
|
+
str
|
59
|
+
end
|
60
|
+
|
61
|
+
#for now just make everything a measure
|
62
|
+
def measures(client, var, options={})
|
63
|
+
if options[:measures]
|
64
|
+
options[:measures]
|
65
|
+
else
|
66
|
+
["probe","marker","value"]
|
67
|
+
end
|
68
|
+
# measure_properties(measures,var,options)
|
69
|
+
end
|
70
|
+
|
71
|
+
def dimensions(client, var, options={})
|
72
|
+
# dimension_properties([""],var)
|
73
|
+
[]
|
74
|
+
end
|
75
|
+
|
76
|
+
def codes(client, var, options={})
|
77
|
+
[]
|
78
|
+
end
|
79
|
+
|
80
|
+
def labels_for(connection,var,probe_id,options={})
|
81
|
+
row_names = connection.eval("row.names(#{var})")
|
82
|
+
# row_names = (1..@rexp.payload.first.to_ruby.size).to_a unless row_names.first
|
83
|
+
if row_names == connection.eval('NULL')
|
84
|
+
row_names = (1..connection.eval("nrow(#{var})").payload.first).to_a
|
85
|
+
else
|
86
|
+
row_names = row_names.payload
|
87
|
+
end
|
88
|
+
|
89
|
+
labels = (1..(row_names.size)).to_a.map(&:to_s)
|
90
|
+
labels = labels.map{|l|
|
91
|
+
l.insert(0,probe_id.to_s + "_")
|
92
|
+
}
|
93
|
+
|
94
|
+
labels
|
95
|
+
end
|
96
|
+
|
97
|
+
def rows(connection,var,options={})
|
98
|
+
row_names = connection.eval("row.names(#{var})")
|
99
|
+
#hacky solution because rserve client's .to_ruby method doesn't fully work
|
100
|
+
if row_names == connection.eval('NULL')
|
101
|
+
row_names = (1..connection.eval("nrow(#{var})").payload.first).to_a
|
102
|
+
else
|
103
|
+
row_names = row_names.payload
|
104
|
+
end
|
105
|
+
row_names
|
106
|
+
end
|
107
|
+
|
108
|
+
def observation_data(client, var, probe_number, row_names, options={})
|
109
|
+
|
110
|
+
data = {}
|
111
|
+
# geno_chr = client.eval("#{var}$geno$'#{chr}'")
|
112
|
+
# n_individuals = client.eval("#{var}$pheno[[1]]").to_ruby.size
|
113
|
+
# entries_per_individual = @rexp.payload["geno"].payload[row_individ].payload["map"].payload.size * @rexp.payload["geno"].payload.names.size
|
114
|
+
col_label = "probe"
|
115
|
+
row_label = "marker"
|
116
|
+
val_label = "value"
|
117
|
+
|
118
|
+
if options[:measures]
|
119
|
+
col_label = options[:measures][0] || "probe"
|
120
|
+
row_label = options[:measures][1] || "marker"
|
121
|
+
val_label = options[:measures][2] || "value"
|
122
|
+
end
|
123
|
+
|
124
|
+
data["#{col_label}"] = []
|
125
|
+
data["#{row_label}"] = []
|
126
|
+
data["#{val_label}"] = []
|
127
|
+
|
128
|
+
# n_individuals.times{|row_individ|
|
129
|
+
# puts "#{row_individ}/#{n_individuals}"
|
130
|
+
|
131
|
+
col_select = "colnames"
|
132
|
+
col_select = "names" if options[:type] == :dataframe
|
133
|
+
|
134
|
+
if options[:type] == :dataframe
|
135
|
+
probe_obj = client.eval("#{var}[[#{probe_number}]]").to_ruby
|
136
|
+
else
|
137
|
+
probe_obj = client.eval("#{var}[,#{probe_number}]").to_ruby
|
138
|
+
end
|
139
|
+
# puts probe_obj
|
140
|
+
probe_id = client.eval("#{col_select}(#{var})[[#{probe_number}]]").to_ruby
|
141
|
+
data["#{col_label}"] = (1..(probe_obj.size)).to_a.fill(probe_id)
|
142
|
+
probe_obj.each_with_index{|lod,i|
|
143
|
+
data["#{row_label}"] << row_names[i]
|
144
|
+
data["#{val_label}"] << lod
|
145
|
+
}
|
146
|
+
|
147
|
+
data.map{|k,v| v.flatten!}
|
148
|
+
data
|
149
|
+
end
|
150
|
+
end
|
151
|
+
end
|
152
|
+
end
|
@@ -0,0 +1,56 @@
|
|
1
|
+
module R2RDF
|
2
|
+
# handles connection and messaging to/from the triple store
|
3
|
+
class Store
|
4
|
+
include R2RDF::Query
|
5
|
+
|
6
|
+
def defaults
|
7
|
+
{
|
8
|
+
type: :fourstore,
|
9
|
+
url: "http://localhost:8080", #TODO port etc should eventually be extracted from URI if given
|
10
|
+
replace: false
|
11
|
+
}
|
12
|
+
end
|
13
|
+
|
14
|
+
def add(file,graph)
|
15
|
+
if @options[:type] == :graph
|
16
|
+
throw "please provide an RDF::Repository" unless graph.is_a? RDF::Repository
|
17
|
+
graph.load(file)
|
18
|
+
@store = graph
|
19
|
+
@store
|
20
|
+
elsif @options[:type] == :fourstore
|
21
|
+
if @options[:replace]
|
22
|
+
`curl -T #{file} -H 'Content-Type: application/x-turtle' #{@options[:url]}/data/http%3A%2F%2Frqtl.org%2F#{graph}`
|
23
|
+
else
|
24
|
+
`curl --data-urlencode data@#{file} -d 'graph=http%3A%2F%2Frqtl.org%2F#{graph}' -d 'mime-type=application/x-turtle' #{@options[:url]}/data/`
|
25
|
+
end
|
26
|
+
end
|
27
|
+
end
|
28
|
+
|
29
|
+
def add_all(dir, graph, pattern=nil)
|
30
|
+
pattern = /.+\.ttl/ if pattern == :turtle || pattern == :ttl
|
31
|
+
|
32
|
+
files = Dir.entries(dir) - %w(. ..)
|
33
|
+
files = files.grep(pattern) if pattern.is_a? Regexp
|
34
|
+
nfiles = files.size
|
35
|
+
n = 0
|
36
|
+
files.each{|file| puts file + " #{n+=1}/#{nfiles} files"; puts add(file,graph)}
|
37
|
+
end
|
38
|
+
|
39
|
+
def initialize(options={})
|
40
|
+
@options = defaults.merge(options)
|
41
|
+
end
|
42
|
+
|
43
|
+
def query(string)
|
44
|
+
# execute(string, )
|
45
|
+
if @options[:type] == :graph
|
46
|
+
execute(string, @store, :graph)
|
47
|
+
elsif @options[:type] == :fourstore
|
48
|
+
execute(string, @options[:url], :fourstore)
|
49
|
+
end
|
50
|
+
end
|
51
|
+
|
52
|
+
def url
|
53
|
+
@options[:url]
|
54
|
+
end
|
55
|
+
end
|
56
|
+
end
|
@@ -0,0 +1,66 @@
|
|
1
|
+
module R2RDF
|
2
|
+
module Writer
|
3
|
+
class ARFF
|
4
|
+
include R2RDF::Query
|
5
|
+
include R2RDF::Parser
|
6
|
+
include R2RDF::Analyzer
|
7
|
+
|
8
|
+
def build_arff(relation, attributes, data, source)
|
9
|
+
str = <<-EOS
|
10
|
+
% 1. Title: #{relation.capitalize} Database
|
11
|
+
%
|
12
|
+
% 2. Sources:
|
13
|
+
% (a) Generated from RDF source #{source}
|
14
|
+
%
|
15
|
+
@RELATION #{relation}
|
16
|
+
|
17
|
+
EOS
|
18
|
+
|
19
|
+
Hash[attributes.sort].map{|attribute,type|
|
20
|
+
str << "@ATTRIBUTE #{attribute} #{type}\n"
|
21
|
+
}
|
22
|
+
|
23
|
+
str << "\n@DATA\n"
|
24
|
+
data.map { |d| str << Hash[d[1].sort].values.join(',') + "\n" }
|
25
|
+
|
26
|
+
str
|
27
|
+
end
|
28
|
+
|
29
|
+
def from_turtle(turtle_file, verbose=false)
|
30
|
+
puts "loading #{turtle_file}" if verbose
|
31
|
+
repo = RDF::Repository.load(turtle_file)
|
32
|
+
puts "loaded #{repo.size} statements into temporary repo" if verbose
|
33
|
+
|
34
|
+
dims = get_ary(execute_from_file("dimensions.rq",repo,:graph)).flatten
|
35
|
+
meas = get_ary(execute_from_file("measures.rq",repo,:graph)).flatten
|
36
|
+
relation = execute_from_file("dataset.rq",repo,:graph).to_h.first[:label].to_s
|
37
|
+
codes = execute_from_file("codes.rq",repo,:graph).to_h.map{|e| e.values.map(&:to_s)}.inject({}){|h,el|
|
38
|
+
(h[el.first]||=[]) << el.last; h
|
39
|
+
}
|
40
|
+
|
41
|
+
data = observation_hash(execute_from_file("observations.rq",repo,:graph), true)
|
42
|
+
attributes = {}
|
43
|
+
(dims | meas).map{|component|
|
44
|
+
attributes[component] = case recommend_range(data.map{|o| o[1][component]})
|
45
|
+
when "xsd:int"
|
46
|
+
"integer"
|
47
|
+
when "xsd:double"
|
48
|
+
"real"
|
49
|
+
when :coded
|
50
|
+
if dims.include? component
|
51
|
+
"{#{codes[component].join(',')}}"
|
52
|
+
else
|
53
|
+
"string"
|
54
|
+
end
|
55
|
+
end
|
56
|
+
}
|
57
|
+
|
58
|
+
build_arff(relation, attributes, data, turtle_file)
|
59
|
+
end
|
60
|
+
|
61
|
+
def from_store(endpoint_url,variable_in=nil, variable_out=nil, verbose=false)
|
62
|
+
raise "not implemented yet"
|
63
|
+
end
|
64
|
+
end
|
65
|
+
end
|
66
|
+
end
|
@@ -0,0 +1,81 @@
|
|
1
|
+
module R2RDF
|
2
|
+
module Writer
|
3
|
+
module Dataframe
|
4
|
+
|
5
|
+
def framestring(name,vectors)
|
6
|
+
framestr = "#{name} = data.frame("
|
7
|
+
vectors.map{ |k,v| framestr << k + '=' + k +','}
|
8
|
+
framestr[-1] = ')'
|
9
|
+
framestr
|
10
|
+
end
|
11
|
+
|
12
|
+
def get_vectors(variable_name, helper, repo)
|
13
|
+
column_names = helper.get_ary(helper.execute(helper.property_names(variable_name), repo)).flatten.map{|n| n.gsub(' Component','')}
|
14
|
+
vectors = {}
|
15
|
+
column_names.map{|n|
|
16
|
+
vectors[n] = helper.get_ary(helper.execute(helper.property_values(variable_name,n),repo),'to_f').flatten unless n == "refRow"
|
17
|
+
}
|
18
|
+
vectors
|
19
|
+
end
|
20
|
+
|
21
|
+
def create_dataframe(name, connection, rows, vectors)
|
22
|
+
connection.assign('rows', rows)
|
23
|
+
vectors.map{ |k,v|
|
24
|
+
connection.assign(k,v)
|
25
|
+
}
|
26
|
+
connection.eval(framestring(name,vectors))
|
27
|
+
connection.eval("row.names(#{name}) <- rows")
|
28
|
+
connection.eval(name)
|
29
|
+
end
|
30
|
+
|
31
|
+
def save_workspace(connection, loc)
|
32
|
+
connection.eval "save.image(#{loc})"
|
33
|
+
end
|
34
|
+
|
35
|
+
def get_rownames(variable, helper, repo)
|
36
|
+
rows = helper.get_ary(helper.execute(helper.row_names(variable), repo)).flatten
|
37
|
+
end
|
38
|
+
|
39
|
+
end
|
40
|
+
|
41
|
+
class Builder
|
42
|
+
include R2RDF::Writer::Dataframe
|
43
|
+
|
44
|
+
|
45
|
+
def from_turtle(turtle_file, connection, variable_in=nil, variable_out=nil, verbose=true, save=true)
|
46
|
+
unless variable_in && variable_out
|
47
|
+
puts "no variable specified. Simple inference coming soon" if verbose
|
48
|
+
return
|
49
|
+
end
|
50
|
+
puts "loading #{turtle_file}" if verbose
|
51
|
+
repo = RDF::Repository.load(turtle_file)
|
52
|
+
puts "loaded #{repo.size} statements into temporary repo" if verbose
|
53
|
+
# connection = Rserve::Connection.new
|
54
|
+
query = R2RDF::QueryHelper.new
|
55
|
+
rows = get_rownames(variable_in, query, repo)
|
56
|
+
puts "frame has #{rows.size} rows" if verbose
|
57
|
+
|
58
|
+
vectors = get_vectors(variable_in, query, repo)
|
59
|
+
puts "got vectors of size #{vectors.first.last.size}" if verbose && vectors.first
|
60
|
+
|
61
|
+
create_dataframe(variable_out, connection, rows, vectors)
|
62
|
+
save_workspace(connection, connection.eval('getwd()').to_ruby) if save
|
63
|
+
end
|
64
|
+
|
65
|
+
def from_store(endpoint_url,connection,variable_in=nil, variable_out=nil, verbose=true, save=true)
|
66
|
+
unless variable_in && variable_out
|
67
|
+
puts "no variable specified. Simple inference coming soon" if verbose
|
68
|
+
return
|
69
|
+
end
|
70
|
+
puts "connecting to endpoint at #{endpoint_url}" if verbose
|
71
|
+
sparql = SPARQL::Client.new(endpoint_url)
|
72
|
+
# client = R2RDF::Client.new
|
73
|
+
query = R2RDF::QueryHelper.new
|
74
|
+
|
75
|
+
rows = query.get_ary(sparql.query(query.row_names(variable_in))).flatten
|
76
|
+
|
77
|
+
end
|
78
|
+
|
79
|
+
end
|
80
|
+
end
|
81
|
+
end
|
data/lib/bio-publisci.rb
ADDED
@@ -0,0 +1,36 @@
|
|
1
|
+
# This is temporary, just to help w/ development so I don't have to rewrite r2rdf.rb to be
|
2
|
+
# a standard gem base yet. Also load s the files instead of require for easy reloading
|
3
|
+
require 'tempfile'
|
4
|
+
require 'rdf'
|
5
|
+
require 'csv'
|
6
|
+
require 'rserve'
|
7
|
+
require 'sparql'
|
8
|
+
require 'sparql/client'
|
9
|
+
require 'rdf/turtle'
|
10
|
+
|
11
|
+
def load_folder(folder)
|
12
|
+
Dir.foreach(File.dirname(__FILE__) + "/#{folder}") do |file|
|
13
|
+
unless file == "." or file == ".."
|
14
|
+
load File.dirname(__FILE__) + "/#{folder}/" + file
|
15
|
+
end
|
16
|
+
end
|
17
|
+
end
|
18
|
+
|
19
|
+
load File.dirname(__FILE__) + '/bio-publisci/dataset/interactive.rb'
|
20
|
+
load File.dirname(__FILE__) + '/bio-publisci/query/query_helper.rb'
|
21
|
+
load File.dirname(__FILE__) + '/bio-publisci/parser.rb'
|
22
|
+
load File.dirname(__FILE__) + '/bio-publisci/r_client.rb'
|
23
|
+
load File.dirname(__FILE__) + '/bio-publisci/analyzer.rb'
|
24
|
+
load File.dirname(__FILE__) + '/bio-publisci/store.rb'
|
25
|
+
load File.dirname(__FILE__) + '/bio-publisci/dataset/data_cube.rb'
|
26
|
+
|
27
|
+
|
28
|
+
load_folder('bio-publisci/metadata')
|
29
|
+
load_folder('bio-publisci/readers')
|
30
|
+
load_folder('bio-publisci/writers')
|
31
|
+
load_folder('bio-publisci/dataset/ORM')
|
32
|
+
# Dir.foreach(File.dirname(__FILE__) + '/generators') do |file|
|
33
|
+
# unless file == "." or file == ".."
|
34
|
+
# load File.dirname(__FILE__) + '/generators/' + file
|
35
|
+
# end
|
36
|
+
# end
|