bio-publisci 0.0.1
Sign up to get free protection for your applications and to get access to all the features.
- data/.document +5 -0
- data/.rspec +1 -0
- data/.travis.yml +13 -0
- data/Gemfile +24 -0
- data/LICENSE.txt +20 -0
- data/README.md +47 -0
- data/README.rdoc +48 -0
- data/Rakefile +70 -0
- data/bin/bio-publisci +83 -0
- data/features/create_generator.feature +25 -0
- data/features/integration.feature +12 -0
- data/features/integration_steps.rb +10 -0
- data/features/orm.feature +60 -0
- data/features/orm_steps.rb +74 -0
- data/features/reader.feature +25 -0
- data/features/reader_steps.rb +60 -0
- data/features/step_definitions/bio-publisci_steps.rb +0 -0
- data/features/store.feature +27 -0
- data/features/store_steps.rb +42 -0
- data/features/support/env.rb +13 -0
- data/features/writer.feature +9 -0
- data/features/writer_steps.rb +17 -0
- data/lib/bio-publisci/analyzer.rb +57 -0
- data/lib/bio-publisci/dataset/ORM/data_cube_orm.rb +219 -0
- data/lib/bio-publisci/dataset/ORM/observation.rb +20 -0
- data/lib/bio-publisci/dataset/data_cube.rb +308 -0
- data/lib/bio-publisci/dataset/interactive.rb +57 -0
- data/lib/bio-publisci/loader.rb +36 -0
- data/lib/bio-publisci/metadata/metadata.rb +105 -0
- data/lib/bio-publisci/parser.rb +64 -0
- data/lib/bio-publisci/query/query_helper.rb +114 -0
- data/lib/bio-publisci/r_client.rb +54 -0
- data/lib/bio-publisci/readers/arff.rb +87 -0
- data/lib/bio-publisci/readers/big_cross.rb +119 -0
- data/lib/bio-publisci/readers/cross.rb +72 -0
- data/lib/bio-publisci/readers/csv.rb +54 -0
- data/lib/bio-publisci/readers/dataframe.rb +66 -0
- data/lib/bio-publisci/readers/r_matrix.rb +152 -0
- data/lib/bio-publisci/store.rb +56 -0
- data/lib/bio-publisci/writers/arff.rb +66 -0
- data/lib/bio-publisci/writers/dataframe.rb +81 -0
- data/lib/bio-publisci.rb +36 -0
- data/lib/r2rdf.rb +226 -0
- data/lib/template_bak/publisci.rb +3 -0
- data/lib/template_bak.rb +12 -0
- data/lib/vocabs/cc.rb +18 -0
- data/lib/vocabs/cert.rb +13 -0
- data/lib/vocabs/dc.rb +63 -0
- data/lib/vocabs/dc11.rb +23 -0
- data/lib/vocabs/doap.rb +45 -0
- data/lib/vocabs/exif.rb +168 -0
- data/lib/vocabs/foaf.rb +69 -0
- data/lib/vocabs/geo.rb +13 -0
- data/lib/vocabs/http.rb +26 -0
- data/lib/vocabs/ma.rb +78 -0
- data/lib/vocabs/owl.rb +59 -0
- data/lib/vocabs/rdfs.rb +17 -0
- data/lib/vocabs/rsa.rb +12 -0
- data/lib/vocabs/rss.rb +14 -0
- data/lib/vocabs/sioc.rb +93 -0
- data/lib/vocabs/skos.rb +36 -0
- data/lib/vocabs/wot.rb +21 -0
- data/lib/vocabs/xhtml.rb +9 -0
- data/lib/vocabs/xsd.rb +58 -0
- data/resources/queries/codes.rq +13 -0
- data/resources/queries/dataset.rq +7 -0
- data/resources/queries/dimension_ranges.rq +8 -0
- data/resources/queries/dimensions.rq +7 -0
- data/resources/queries/measures.rq +7 -0
- data/resources/queries/observations.rq +12 -0
- data/resources/queries/test.rq +3 -0
- data/resources/weather.numeric.arff +23 -0
- data/spec/analyzer_spec.rb +36 -0
- data/spec/bio-publisci_spec.rb +7 -0
- data/spec/csv/bacon.csv +4 -0
- data/spec/csv/moar_bacon.csv +11 -0
- data/spec/data_cube_spec.rb +166 -0
- data/spec/generators/csv_spec.rb +44 -0
- data/spec/generators/dataframe_spec.rb +44 -0
- data/spec/generators/r_matrix_spec.rb +35 -0
- data/spec/queries/integrity/1.rq +21 -0
- data/spec/queries/integrity/11.rq +29 -0
- data/spec/queries/integrity/12.rq +37 -0
- data/spec/queries/integrity/14.rq +25 -0
- data/spec/queries/integrity/19_1.rq +21 -0
- data/spec/queries/integrity/19_2.rq +15 -0
- data/spec/queries/integrity/2.rq +22 -0
- data/spec/queries/integrity/3.rq +19 -0
- data/spec/queries/integrity/4.rq +13 -0
- data/spec/queries/integrity/5.rq +14 -0
- data/spec/r_builder_spec.rb +33 -0
- data/spec/spec_helper.rb +17 -0
- data/spec/turtle/bacon +149 -0
- data/spec/turtle/reference +2066 -0
- metadata +259 -0
@@ -0,0 +1,9 @@
|
|
1
|
+
Feature: export to various formats using writers
|
2
|
+
|
3
|
+
In order to use RDF encoded data in other applications
|
4
|
+
I want to export domain objects using an R2RDF::Writer object
|
5
|
+
|
6
|
+
Scenario: write to ARFF format
|
7
|
+
Given a ARFF writer
|
8
|
+
When I call its from_turtle method on the file spec/turtle/bacon
|
9
|
+
Then I should receive a .arff file as a string
|
@@ -0,0 +1,17 @@
|
|
1
|
+
Given(/^a (.*) writer$/) do |type|
|
2
|
+
@writer = R2RDF::Writer.const_get(type).new
|
3
|
+
end
|
4
|
+
|
5
|
+
When(/^I call its from_turtle method on the file (.*)$/) do |file|
|
6
|
+
@result = @writer.from_turtle(file)
|
7
|
+
end
|
8
|
+
|
9
|
+
When(/^I call its from_turtle method on the turtle string$/) do
|
10
|
+
f=Tempfile.open('writerttl'); f.write @turtle_string; f.close
|
11
|
+
@result = @writer.from_turtle(f.path)
|
12
|
+
f.unlink
|
13
|
+
end
|
14
|
+
|
15
|
+
Then(/^I should receive a \.arff file as a string$/) do
|
16
|
+
@result.is_a?(String).should be true
|
17
|
+
end
|
@@ -0,0 +1,57 @@
|
|
1
|
+
module R2RDF
|
2
|
+
|
3
|
+
#handles analysis of R expression to extract properties and recognize potential
|
4
|
+
#ambiguity
|
5
|
+
module Analyzer
|
6
|
+
def dirty?(data)
|
7
|
+
if data.is_a? Hash
|
8
|
+
data.map{|k,v|
|
9
|
+
return true if dirty?(k) || dirty?(v)
|
10
|
+
}
|
11
|
+
false
|
12
|
+
elsif data.is_a? Array
|
13
|
+
data.map{|datum|
|
14
|
+
return true if dirty?(datum)
|
15
|
+
}
|
16
|
+
else
|
17
|
+
dirty_characters = [".",' ']
|
18
|
+
if data.to_s.scan(/./) & dirty_characters
|
19
|
+
true
|
20
|
+
else
|
21
|
+
false
|
22
|
+
end
|
23
|
+
end
|
24
|
+
end
|
25
|
+
|
26
|
+
def recommend_range(data)
|
27
|
+
classes = data.map{|d| d.class}
|
28
|
+
homogenous = classes.uniq.size == 1
|
29
|
+
if homogenous
|
30
|
+
if classes[0] == Fixnum
|
31
|
+
"xsd:int"
|
32
|
+
elsif classes[0] == Float
|
33
|
+
"xsd:double"
|
34
|
+
elsif classes[0] == String
|
35
|
+
recommend_range_strings(data)
|
36
|
+
else
|
37
|
+
:coded
|
38
|
+
end
|
39
|
+
else
|
40
|
+
:coded
|
41
|
+
end
|
42
|
+
end
|
43
|
+
|
44
|
+
def recommend_range_strings(data)
|
45
|
+
return "xsd:int" if data.all?{|d| Integer(d) rescue nil}
|
46
|
+
return "xsd:int" if data.all?{|d| Float(d) rescue nil}
|
47
|
+
:coded
|
48
|
+
end
|
49
|
+
|
50
|
+
def check_integrity(obs, dimensions, measures)
|
51
|
+
obs.map{|o|
|
52
|
+
raise "MissingValues for #{(dimensions | measures) - o.keys}" unless ((dimensions | measures) - o.keys).empty?
|
53
|
+
raise "UnknownProperty #{o.keys - (dimensions | measures)}" unless (o.keys - (dimensions | measures)).empty?
|
54
|
+
}
|
55
|
+
end
|
56
|
+
end
|
57
|
+
end
|
@@ -0,0 +1,219 @@
|
|
1
|
+
module R2RDF
|
2
|
+
module Dataset
|
3
|
+
module ORM
|
4
|
+
class DataCube
|
5
|
+
extend R2RDF::Dataset::DataCube
|
6
|
+
extend R2RDF::Analyzer
|
7
|
+
extend R2RDF::Metadata
|
8
|
+
extend R2RDF::Query
|
9
|
+
extend R2RDF::Parser
|
10
|
+
|
11
|
+
include R2RDF::Dataset::DataCube
|
12
|
+
include R2RDF::Analyzer
|
13
|
+
include R2RDF::Metadata
|
14
|
+
include R2RDF::Query
|
15
|
+
include R2RDF::Parser
|
16
|
+
|
17
|
+
attr_accessor :labels
|
18
|
+
attr_accessor :dimensions
|
19
|
+
attr_accessor :measures
|
20
|
+
attr_accessor :obs
|
21
|
+
attr_accessor :meta
|
22
|
+
|
23
|
+
def initialize(options={},do_parse = true)
|
24
|
+
@dimensions = {}
|
25
|
+
@measures = []
|
26
|
+
@obs = []
|
27
|
+
@generator_options = {}
|
28
|
+
@options = {}
|
29
|
+
|
30
|
+
@meta = {}
|
31
|
+
|
32
|
+
parse_options options if do_parse
|
33
|
+
end
|
34
|
+
|
35
|
+
def self.load(graph,options={},verbose=false)
|
36
|
+
|
37
|
+
|
38
|
+
graph = create_graph(graph) unless graph =~ /^http/
|
39
|
+
|
40
|
+
# puts get_hashes(execute_from_file('dimension_ranges.rq',graph))
|
41
|
+
dimensions = Hash[get_hashes(execute_from_file('dimension_ranges.rq',graph),"to_s").map{|solution|
|
42
|
+
#TODO coded properties should be found via SPARQL queries
|
43
|
+
if solution[:range].split('/')[-2] == "code"
|
44
|
+
type = :coded
|
45
|
+
else
|
46
|
+
type = strip_uri(solution[:range])
|
47
|
+
end
|
48
|
+
[strip_uri(solution[:dimension]), {type: type}]
|
49
|
+
}]
|
50
|
+
puts "dimensions: #{dimensions}" if verbose
|
51
|
+
measures = get_ary(execute_from_file('measures.rq',graph)).flatten
|
52
|
+
puts "measures: #{measures}" if verbose
|
53
|
+
name = execute_from_file('dataset.rq',graph).to_h.first[:label]
|
54
|
+
puts "dataset: #{name}" if verbose
|
55
|
+
obs = execute_from_file('observations.rq',graph)
|
56
|
+
puts "observations: #{obs}" if verbose
|
57
|
+
# observations = observation_hash(obs)
|
58
|
+
simple_observations = observation_hash(obs,true)
|
59
|
+
|
60
|
+
new_opts = {
|
61
|
+
measures: measures,
|
62
|
+
dimensions: dimensions,
|
63
|
+
observations: simple_observations.values,
|
64
|
+
name: name,
|
65
|
+
}
|
66
|
+
|
67
|
+
options = options.merge(new_opts)
|
68
|
+
puts "creating #{options}" if verbose
|
69
|
+
self.new(options)
|
70
|
+
end
|
71
|
+
|
72
|
+
def parse_options(options)
|
73
|
+
if options[:dimensions]
|
74
|
+
options[:dimensions].each{|name,details|
|
75
|
+
add_dimension(name, details[:type] || :coded)
|
76
|
+
}
|
77
|
+
end
|
78
|
+
|
79
|
+
if options[:measures]
|
80
|
+
options[:measures].each{|m| @measures << m}
|
81
|
+
end
|
82
|
+
|
83
|
+
if options[:observations]
|
84
|
+
options[:observations].each{|obs_data| add_observation obs_data}
|
85
|
+
end
|
86
|
+
|
87
|
+
@generator_options = options[:generator_options] if options[:generator_options]
|
88
|
+
@options[:skip_metadata] = options[:skip_metadata] if options[:skip_metadata]
|
89
|
+
|
90
|
+
if options[:name]
|
91
|
+
@name = options[:name]
|
92
|
+
else
|
93
|
+
raise "No dataset name specified!"
|
94
|
+
end
|
95
|
+
|
96
|
+
if options[:validate_each]
|
97
|
+
@options[:validate_each] = options[:validate_each]
|
98
|
+
end
|
99
|
+
end
|
100
|
+
|
101
|
+
def to_n3
|
102
|
+
|
103
|
+
#create labels if not specified
|
104
|
+
unless @labels.is_a?(Array) && @labels.size == @obs.size
|
105
|
+
if @labels.is_a? Symbol
|
106
|
+
#define some automatic labeling methods
|
107
|
+
else
|
108
|
+
@labels = (1..@obs.size).to_a.map(&:to_s)
|
109
|
+
end
|
110
|
+
end
|
111
|
+
data = {}
|
112
|
+
|
113
|
+
|
114
|
+
#collect observation data
|
115
|
+
check_integrity(@obs.map{|o| o.data}, @dimensions.keys, @measures)
|
116
|
+
@obs.map{|obs|
|
117
|
+
(@measures | @dimensions.keys).map{ |component|
|
118
|
+
(data[component] ||= []) << obs.data[component]
|
119
|
+
}
|
120
|
+
}
|
121
|
+
|
122
|
+
|
123
|
+
codes = @dimensions.map{|d,v| d if v[:type] == :coded}.compact
|
124
|
+
|
125
|
+
|
126
|
+
str = generate(@measures, @dimensions.keys, codes, data, @labels, @name, @generator_options)
|
127
|
+
unless @options[:skip_metadata]
|
128
|
+
fields = {
|
129
|
+
publishers: publishers(),
|
130
|
+
subject: subjects(),
|
131
|
+
author: author(),
|
132
|
+
description: description(),
|
133
|
+
date: date(),
|
134
|
+
var: @name,
|
135
|
+
}
|
136
|
+
# puts basic(fields,@generator_options)
|
137
|
+
str += "\n" + basic(fields,@generator_options)
|
138
|
+
end
|
139
|
+
str
|
140
|
+
end
|
141
|
+
|
142
|
+
def add_dimension(name, type=:coded)
|
143
|
+
@dimensions[name.to_s] = {type: type}
|
144
|
+
end
|
145
|
+
|
146
|
+
def add_measure(name)
|
147
|
+
@measures << name
|
148
|
+
end
|
149
|
+
|
150
|
+
def add_observation(data)
|
151
|
+
data = Hash[data.map{|k,v| [k.to_s, v]}]
|
152
|
+
obs = Observation.new(data)
|
153
|
+
check_integrity([obs.data],@dimensions.keys,@measures) if @options[:validate_each]
|
154
|
+
@obs << obs
|
155
|
+
end
|
156
|
+
|
157
|
+
def insert(observation)
|
158
|
+
@obs << observation
|
159
|
+
end
|
160
|
+
|
161
|
+
def publishers
|
162
|
+
@meta[:publishers] ||= []
|
163
|
+
end
|
164
|
+
|
165
|
+
def publishers=(publishers)
|
166
|
+
@meta[:publishers] = publishers
|
167
|
+
end
|
168
|
+
|
169
|
+
def subjects
|
170
|
+
@meta[:subject] ||= []
|
171
|
+
end
|
172
|
+
|
173
|
+
def subjects=(subjects)
|
174
|
+
@meta[:subject]=subjects
|
175
|
+
end
|
176
|
+
|
177
|
+
def add_publisher(label,uri)
|
178
|
+
publishers << {label: label, uri: uri}
|
179
|
+
end
|
180
|
+
|
181
|
+
def add_subject(id)
|
182
|
+
subject << id
|
183
|
+
end
|
184
|
+
|
185
|
+
def author
|
186
|
+
@meta[:creator] ||= ""
|
187
|
+
end
|
188
|
+
|
189
|
+
def author=(author)
|
190
|
+
@meta[:creator] = author
|
191
|
+
end
|
192
|
+
|
193
|
+
def description
|
194
|
+
@meta[:description] ||= ""
|
195
|
+
end
|
196
|
+
|
197
|
+
def description=(description)
|
198
|
+
@meta[:description] = description
|
199
|
+
end
|
200
|
+
|
201
|
+
def date
|
202
|
+
@meta[:date] ||= "#{Time.now.day}-#{Time.now.month}-#{Time.now.year}"
|
203
|
+
end
|
204
|
+
|
205
|
+
def date=(date)
|
206
|
+
@meta[:date] = date
|
207
|
+
end
|
208
|
+
|
209
|
+
def to_h
|
210
|
+
{
|
211
|
+
measures: @measures,
|
212
|
+
dimensions: @dimensions,
|
213
|
+
observations: @obs.map{|o| o.data}
|
214
|
+
}
|
215
|
+
end
|
216
|
+
end
|
217
|
+
end
|
218
|
+
end
|
219
|
+
end
|
@@ -0,0 +1,20 @@
|
|
1
|
+
module R2RDF
|
2
|
+
module Dataset
|
3
|
+
module ORM
|
4
|
+
class Observation
|
5
|
+
attr_accessor :data
|
6
|
+
def initialize(data={})
|
7
|
+
@data = data
|
8
|
+
end
|
9
|
+
|
10
|
+
def method_missing(name, args)
|
11
|
+
#get entry of data hash
|
12
|
+
end
|
13
|
+
|
14
|
+
def respond_to_missing?(method, *)
|
15
|
+
|
16
|
+
end
|
17
|
+
end
|
18
|
+
end
|
19
|
+
end
|
20
|
+
end
|
@@ -0,0 +1,308 @@
|
|
1
|
+
#monkey patch to make rdf string w/ heredocs prettier ;)
|
2
|
+
class String
|
3
|
+
def unindent
|
4
|
+
gsub /^#{self[/\A\s*/]}/, ''
|
5
|
+
# gsub(/^#{scan(/^\s*/).min_by{|l|l.length}}/, "")
|
6
|
+
end
|
7
|
+
end
|
8
|
+
|
9
|
+
module R2RDF
|
10
|
+
# used to generate data cube observations, data structure definitions, etc
|
11
|
+
module Dataset
|
12
|
+
module DataCube
|
13
|
+
def defaults
|
14
|
+
{
|
15
|
+
type: :dataframe,
|
16
|
+
encode_nulls: false,
|
17
|
+
base_url: "http://www.rqtl.org",
|
18
|
+
}
|
19
|
+
end
|
20
|
+
|
21
|
+
def generate(measures, dimensions, codes, data, observation_labels, var, options={})
|
22
|
+
dimensions = sanitize(dimensions)
|
23
|
+
codes = sanitize(codes)
|
24
|
+
measures = sanitize(measures)
|
25
|
+
var = sanitize([var]).first
|
26
|
+
data = sanitize_hash(data)
|
27
|
+
|
28
|
+
str = prefixes(var,options)
|
29
|
+
str << data_structure_definition((measures | dimensions), var, options)
|
30
|
+
str << dataset(var, options)
|
31
|
+
component_specifications(measures, dimensions, var, options).map{ |c| str << c }
|
32
|
+
dimension_properties(dimensions, codes, var, options).map{|p| str << p}
|
33
|
+
measure_properties(measures, var, options).map{|p| str << p}
|
34
|
+
code_lists(codes, data, var, options).map{|l| str << l}
|
35
|
+
concept_codes(codes, data, var, options).map{|c| str << c}
|
36
|
+
observations(measures, dimensions, codes, data, observation_labels, var, options).map{|o| str << o}
|
37
|
+
str
|
38
|
+
end
|
39
|
+
|
40
|
+
def sanitize(array)
|
41
|
+
#remove spaces and other special characters
|
42
|
+
processed = []
|
43
|
+
array.map{|entry|
|
44
|
+
if entry.is_a? String
|
45
|
+
processed << entry.gsub(/[\s\.]/,'_')
|
46
|
+
else
|
47
|
+
processed << entry
|
48
|
+
end
|
49
|
+
}
|
50
|
+
processed
|
51
|
+
end
|
52
|
+
|
53
|
+
def sanitize_hash(h)
|
54
|
+
mappings = {}
|
55
|
+
h.keys.map{|k|
|
56
|
+
if(k.is_a? String)
|
57
|
+
mappings[k] = k.gsub(' ','_')
|
58
|
+
end
|
59
|
+
}
|
60
|
+
|
61
|
+
h.keys.map{|k|
|
62
|
+
h[mappings[k]] = h.delete(k) if mappings[k]
|
63
|
+
}
|
64
|
+
|
65
|
+
h
|
66
|
+
end
|
67
|
+
|
68
|
+
def prefixes(var, options={})
|
69
|
+
var = sanitize([var]).first
|
70
|
+
options = defaults().merge(options)
|
71
|
+
base = options[:base_url]
|
72
|
+
<<-EOF.unindent
|
73
|
+
@base <#{base}/ns/dc/> .
|
74
|
+
@prefix ns: <#{base}/ns/dataset/#{var}#> .
|
75
|
+
@prefix qb: <http://purl.org/linked-data/cube#> .
|
76
|
+
@prefix rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#> .
|
77
|
+
@prefix rdfs: <http://www.w3.org/2000/01/rdf-schema#> .
|
78
|
+
@prefix prop: <#{base}/dc/properties/> .
|
79
|
+
@prefix dct: <http://purl.org/dc/terms/> .
|
80
|
+
@prefix xsd: <http://www.w3.org/2001/XMLSchema#> .
|
81
|
+
@prefix cs: <#{base}/dc/dataset/#{var}/cs/> .
|
82
|
+
@prefix code: <#{base}/dc/dataset/#{var}/code/> .
|
83
|
+
@prefix class: <#{base}/dc/dataset/#{var}/class/> .
|
84
|
+
@prefix owl: <http://www.w3.org/2002/07/owl#> .
|
85
|
+
@prefix skos: <http://www.w3.org/2004/02/skos/core#> .
|
86
|
+
@prefix foaf: <http://xmlns.com/foaf/0.1/> .
|
87
|
+
@prefix org: <http://www.w3.org/ns/org#> .
|
88
|
+
@prefix prov: <http://www.w3.org/ns/prov#> .
|
89
|
+
|
90
|
+
EOF
|
91
|
+
end
|
92
|
+
|
93
|
+
def data_structure_definition(components,var,options={})
|
94
|
+
var = sanitize([var]).first
|
95
|
+
options = defaults().merge(options)
|
96
|
+
str = "ns:dsd-#{var} a qb:DataStructureDefinition;\n"
|
97
|
+
str << " qb:component\n"
|
98
|
+
components.map{|n|
|
99
|
+
str << " cs:#{n} ,\n"
|
100
|
+
}
|
101
|
+
str[-2]='.'
|
102
|
+
str<<"\n"
|
103
|
+
str
|
104
|
+
end
|
105
|
+
|
106
|
+
def dataset(var,options={})
|
107
|
+
var = sanitize([var]).first
|
108
|
+
options = defaults().merge(options)
|
109
|
+
<<-EOF.unindent
|
110
|
+
ns:dataset-#{var} a qb:DataSet ;
|
111
|
+
rdfs:label "#{var}"@en ;
|
112
|
+
qb:structure ns:dsd-#{var} .
|
113
|
+
|
114
|
+
EOF
|
115
|
+
end
|
116
|
+
|
117
|
+
def component_specifications(measure_names, dimension_names, var, options={})
|
118
|
+
options = defaults().merge(options)
|
119
|
+
specs = []
|
120
|
+
|
121
|
+
dimension_names.map{|d|
|
122
|
+
specs << <<-EOF.unindent
|
123
|
+
cs:#{d} a qb:ComponentSpecification ;
|
124
|
+
rdfs:label "#{d} Component" ;
|
125
|
+
qb:dimension prop:#{d} .
|
126
|
+
|
127
|
+
EOF
|
128
|
+
}
|
129
|
+
|
130
|
+
measure_names.map{|n|
|
131
|
+
specs << <<-EOF.unindent
|
132
|
+
cs:#{n} a qb:ComponentSpecification ;
|
133
|
+
rdfs:label "#{n} Component" ;
|
134
|
+
qb:measure prop:#{n} .
|
135
|
+
|
136
|
+
EOF
|
137
|
+
}
|
138
|
+
|
139
|
+
specs
|
140
|
+
end
|
141
|
+
|
142
|
+
def dimension_properties(dimensions, codes, var, options={})
|
143
|
+
options = defaults().merge(options)
|
144
|
+
props = []
|
145
|
+
|
146
|
+
dimensions.map{|d|
|
147
|
+
if codes.include?(d)
|
148
|
+
props << <<-EOF.unindent
|
149
|
+
prop:#{d} a rdf:Property, qb:DimensionProperty ;
|
150
|
+
rdfs:label "#{d}"@en ;
|
151
|
+
qb:codeList code:#{d.downcase} ;
|
152
|
+
rdfs:range code:#{d.downcase.capitalize} .
|
153
|
+
|
154
|
+
EOF
|
155
|
+
else
|
156
|
+
props << <<-EOF.unindent
|
157
|
+
prop:#{d} a rdf:Property, qb:DimensionProperty ;
|
158
|
+
rdfs:label "#{d}"@en .
|
159
|
+
|
160
|
+
EOF
|
161
|
+
end
|
162
|
+
}
|
163
|
+
|
164
|
+
props
|
165
|
+
end
|
166
|
+
|
167
|
+
def measure_properties(measures, var, options={})
|
168
|
+
options = defaults().merge(options)
|
169
|
+
props = []
|
170
|
+
|
171
|
+
measures.map{ |m|
|
172
|
+
|
173
|
+
props << <<-EOF.unindent
|
174
|
+
prop:#{m} a rdf:Property, qb:MeasureProperty ;
|
175
|
+
rdfs:label "#{m}"@en .
|
176
|
+
|
177
|
+
EOF
|
178
|
+
}
|
179
|
+
|
180
|
+
props
|
181
|
+
end
|
182
|
+
|
183
|
+
def observations(measures, dimensions, codes, data, observation_labels, var, options={})
|
184
|
+
var = sanitize([var]).first
|
185
|
+
options = defaults().merge(options)
|
186
|
+
obs = []
|
187
|
+
observation_labels.each_with_index.map{|r, i|
|
188
|
+
contains_nulls = false
|
189
|
+
str = <<-EOF.unindent
|
190
|
+
ns:obs#{r} a qb:Observation ;
|
191
|
+
qb:dataSet ns:dataset-#{var} ;
|
192
|
+
EOF
|
193
|
+
|
194
|
+
str << " rdfs:label \"#{r}\" ;\n" unless options[:no_labels]
|
195
|
+
|
196
|
+
dimensions.map{|d|
|
197
|
+
contains_nulls = contains_nulls | (data[d][i] == nil)
|
198
|
+
if codes.include? d
|
199
|
+
str << " prop:#{d} <code/#{d.downcase}/#{data[d][i]}> ;\n"
|
200
|
+
else
|
201
|
+
str << " prop:#{d} ns:#{to_resource(data[d][i], options)} ;\n"
|
202
|
+
end
|
203
|
+
}
|
204
|
+
|
205
|
+
measures.map{|m|
|
206
|
+
contains_nulls = contains_nulls | (data[m][i] == nil)
|
207
|
+
str << " prop:#{m} #{to_literal(data[m][i], options)} ;\n"
|
208
|
+
|
209
|
+
}
|
210
|
+
|
211
|
+
str << " .\n\n"
|
212
|
+
obs << str unless contains_nulls && !options[:encode_nulls]
|
213
|
+
|
214
|
+
}
|
215
|
+
obs
|
216
|
+
end
|
217
|
+
|
218
|
+
def code_lists(codes, data, var, options={})
|
219
|
+
options = defaults().merge(options)
|
220
|
+
lists = []
|
221
|
+
codes.map{|code|
|
222
|
+
str = <<-EOF.unindent
|
223
|
+
code:#{code.downcase.capitalize} a rdfs:Class, owl:Class;
|
224
|
+
rdfs:subClassOf skos:Concept ;
|
225
|
+
rdfs:label "Code list for #{code} - codelist class"@en;
|
226
|
+
rdfs:comment "Specifies the #{code} for each observation";
|
227
|
+
rdfs:seeAlso code:#{code.downcase} .
|
228
|
+
|
229
|
+
code:#{code.downcase} a skos:ConceptScheme;
|
230
|
+
skos:prefLabel "Code list for #{code} - codelist scheme"@en;
|
231
|
+
rdfs:label "Code list for #{code} - codelist scheme"@en;
|
232
|
+
skos:notation "CL_#{code.upcase}";
|
233
|
+
skos:note "Specifies the #{code} for each observation";
|
234
|
+
EOF
|
235
|
+
data[code].uniq.map{|value|
|
236
|
+
unless value == nil && !options[:encode_nulls]
|
237
|
+
str << " skos:hasTopConcept <code/#{code.downcase}/#{to_resource(value,options)}> ;\n"
|
238
|
+
end
|
239
|
+
}
|
240
|
+
|
241
|
+
str << " .\n\n"
|
242
|
+
lists << str
|
243
|
+
}
|
244
|
+
|
245
|
+
|
246
|
+
lists
|
247
|
+
end
|
248
|
+
|
249
|
+
def concept_codes(codes, data, var, options={})
|
250
|
+
options = defaults().merge(options)
|
251
|
+
concepts = []
|
252
|
+
codes.map{|code|
|
253
|
+
data[code].uniq.map{|value|
|
254
|
+
unless value == nil && !options[:encode_nulls]
|
255
|
+
concepts << <<-EOF.unindent
|
256
|
+
<code/#{code.downcase}/#{to_resource(value,options)}> a skos:Concept, code:#{code.downcase.capitalize};
|
257
|
+
skos:topConceptOf code:#{code.downcase} ;
|
258
|
+
skos:prefLabel "#{to_resource(value,options)}" ;
|
259
|
+
skos:inScheme code:#{code.downcase} .
|
260
|
+
|
261
|
+
EOF
|
262
|
+
end
|
263
|
+
}
|
264
|
+
}
|
265
|
+
|
266
|
+
concepts
|
267
|
+
end
|
268
|
+
|
269
|
+
|
270
|
+
def to_resource(obj, options)
|
271
|
+
if obj.is_a? String
|
272
|
+
#TODO decide the right way to handle missing values, since RDF has no null
|
273
|
+
#probably throw an error here since a missing resource is a bigger problem
|
274
|
+
obj = "NA" if obj.empty?
|
275
|
+
|
276
|
+
#TODO remove special characters (faster) as well (eg '?')
|
277
|
+
obj.gsub(' ','_').gsub('?','')
|
278
|
+
elsif obj == nil && options[:encode_nulls]
|
279
|
+
'"NA"'
|
280
|
+
elsif obj.is_a? Numeric
|
281
|
+
#resources cannot be referred to purely by integer (?)
|
282
|
+
"n"+obj.to_s
|
283
|
+
else
|
284
|
+
obj
|
285
|
+
end
|
286
|
+
end
|
287
|
+
|
288
|
+
def to_literal(obj, options)
|
289
|
+
if obj.is_a? String
|
290
|
+
# Depressing that there's no more elegant way to check if a string is
|
291
|
+
# a number...
|
292
|
+
if val = Integer(obj) rescue nil
|
293
|
+
val
|
294
|
+
elsif val = Float(obj) rescue nil
|
295
|
+
val
|
296
|
+
else
|
297
|
+
'"'+obj+'"'
|
298
|
+
end
|
299
|
+
elsif obj == nil && options[:encode_nulls]
|
300
|
+
#TODO decide the right way to handle missing values, since RDF has no null
|
301
|
+
'"NA"'
|
302
|
+
else
|
303
|
+
obj
|
304
|
+
end
|
305
|
+
end
|
306
|
+
end
|
307
|
+
end
|
308
|
+
end
|
@@ -0,0 +1,57 @@
|
|
1
|
+
module R2RDF
|
2
|
+
module Dataset
|
3
|
+
module Interactive
|
4
|
+
#to be called by other classes if user input is required
|
5
|
+
def defaults
|
6
|
+
{
|
7
|
+
load_from_file: false
|
8
|
+
}
|
9
|
+
end
|
10
|
+
|
11
|
+
def interactive(options={})
|
12
|
+
options = defaults.merge(options)
|
13
|
+
qb = {}
|
14
|
+
|
15
|
+
puts "load config from file? [y/N]"
|
16
|
+
if gets.chomp == "y"
|
17
|
+
#use yaml or DSL file to configure
|
18
|
+
else
|
19
|
+
qb[:dimensions] = dimensions()
|
20
|
+
qb[:measures] = measures()
|
21
|
+
end
|
22
|
+
|
23
|
+
puts "load data from file? [y/N]"
|
24
|
+
if gets.chomp == "y"
|
25
|
+
#attempt to load dataset from file, ask user to resolve problems or ambiguity
|
26
|
+
else
|
27
|
+
end
|
28
|
+
qb
|
29
|
+
end
|
30
|
+
|
31
|
+
def dimensions
|
32
|
+
puts "Enter a list of dimensions, separated by commas"
|
33
|
+
arr = gets.chomp.split(",")
|
34
|
+
dims = {}
|
35
|
+
|
36
|
+
arr.map{|dim|
|
37
|
+
puts "What is the range of #{dim.chomp.strip}? [:coded]"
|
38
|
+
type = gets.chomp
|
39
|
+
type = :coded if type == ":coded" || type == ""
|
40
|
+
dims[dim.chomp.strip] = {type: type}
|
41
|
+
}
|
42
|
+
|
43
|
+
dims
|
44
|
+
end
|
45
|
+
|
46
|
+
def measures
|
47
|
+
puts "Enter a list of measures, separated by commas"
|
48
|
+
arr = gets.chomp.split(",")
|
49
|
+
meas = []
|
50
|
+
|
51
|
+
arr.map{|m| meas << m.chomp.strip}
|
52
|
+
|
53
|
+
meas
|
54
|
+
end
|
55
|
+
end
|
56
|
+
end
|
57
|
+
end
|