bio-publisci 0.0.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/.document +5 -0
- data/.rspec +1 -0
- data/.travis.yml +13 -0
- data/Gemfile +24 -0
- data/LICENSE.txt +20 -0
- data/README.md +47 -0
- data/README.rdoc +48 -0
- data/Rakefile +70 -0
- data/bin/bio-publisci +83 -0
- data/features/create_generator.feature +25 -0
- data/features/integration.feature +12 -0
- data/features/integration_steps.rb +10 -0
- data/features/orm.feature +60 -0
- data/features/orm_steps.rb +74 -0
- data/features/reader.feature +25 -0
- data/features/reader_steps.rb +60 -0
- data/features/step_definitions/bio-publisci_steps.rb +0 -0
- data/features/store.feature +27 -0
- data/features/store_steps.rb +42 -0
- data/features/support/env.rb +13 -0
- data/features/writer.feature +9 -0
- data/features/writer_steps.rb +17 -0
- data/lib/bio-publisci/analyzer.rb +57 -0
- data/lib/bio-publisci/dataset/ORM/data_cube_orm.rb +219 -0
- data/lib/bio-publisci/dataset/ORM/observation.rb +20 -0
- data/lib/bio-publisci/dataset/data_cube.rb +308 -0
- data/lib/bio-publisci/dataset/interactive.rb +57 -0
- data/lib/bio-publisci/loader.rb +36 -0
- data/lib/bio-publisci/metadata/metadata.rb +105 -0
- data/lib/bio-publisci/parser.rb +64 -0
- data/lib/bio-publisci/query/query_helper.rb +114 -0
- data/lib/bio-publisci/r_client.rb +54 -0
- data/lib/bio-publisci/readers/arff.rb +87 -0
- data/lib/bio-publisci/readers/big_cross.rb +119 -0
- data/lib/bio-publisci/readers/cross.rb +72 -0
- data/lib/bio-publisci/readers/csv.rb +54 -0
- data/lib/bio-publisci/readers/dataframe.rb +66 -0
- data/lib/bio-publisci/readers/r_matrix.rb +152 -0
- data/lib/bio-publisci/store.rb +56 -0
- data/lib/bio-publisci/writers/arff.rb +66 -0
- data/lib/bio-publisci/writers/dataframe.rb +81 -0
- data/lib/bio-publisci.rb +36 -0
- data/lib/r2rdf.rb +226 -0
- data/lib/template_bak/publisci.rb +3 -0
- data/lib/template_bak.rb +12 -0
- data/lib/vocabs/cc.rb +18 -0
- data/lib/vocabs/cert.rb +13 -0
- data/lib/vocabs/dc.rb +63 -0
- data/lib/vocabs/dc11.rb +23 -0
- data/lib/vocabs/doap.rb +45 -0
- data/lib/vocabs/exif.rb +168 -0
- data/lib/vocabs/foaf.rb +69 -0
- data/lib/vocabs/geo.rb +13 -0
- data/lib/vocabs/http.rb +26 -0
- data/lib/vocabs/ma.rb +78 -0
- data/lib/vocabs/owl.rb +59 -0
- data/lib/vocabs/rdfs.rb +17 -0
- data/lib/vocabs/rsa.rb +12 -0
- data/lib/vocabs/rss.rb +14 -0
- data/lib/vocabs/sioc.rb +93 -0
- data/lib/vocabs/skos.rb +36 -0
- data/lib/vocabs/wot.rb +21 -0
- data/lib/vocabs/xhtml.rb +9 -0
- data/lib/vocabs/xsd.rb +58 -0
- data/resources/queries/codes.rq +13 -0
- data/resources/queries/dataset.rq +7 -0
- data/resources/queries/dimension_ranges.rq +8 -0
- data/resources/queries/dimensions.rq +7 -0
- data/resources/queries/measures.rq +7 -0
- data/resources/queries/observations.rq +12 -0
- data/resources/queries/test.rq +3 -0
- data/resources/weather.numeric.arff +23 -0
- data/spec/analyzer_spec.rb +36 -0
- data/spec/bio-publisci_spec.rb +7 -0
- data/spec/csv/bacon.csv +4 -0
- data/spec/csv/moar_bacon.csv +11 -0
- data/spec/data_cube_spec.rb +166 -0
- data/spec/generators/csv_spec.rb +44 -0
- data/spec/generators/dataframe_spec.rb +44 -0
- data/spec/generators/r_matrix_spec.rb +35 -0
- data/spec/queries/integrity/1.rq +21 -0
- data/spec/queries/integrity/11.rq +29 -0
- data/spec/queries/integrity/12.rq +37 -0
- data/spec/queries/integrity/14.rq +25 -0
- data/spec/queries/integrity/19_1.rq +21 -0
- data/spec/queries/integrity/19_2.rq +15 -0
- data/spec/queries/integrity/2.rq +22 -0
- data/spec/queries/integrity/3.rq +19 -0
- data/spec/queries/integrity/4.rq +13 -0
- data/spec/queries/integrity/5.rq +14 -0
- data/spec/r_builder_spec.rb +33 -0
- data/spec/spec_helper.rb +17 -0
- data/spec/turtle/bacon +149 -0
- data/spec/turtle/reference +2066 -0
- metadata +259 -0
@@ -0,0 +1,9 @@
|
|
1
|
+
Feature: export to various formats using writers
|
2
|
+
|
3
|
+
In order to use RDF encoded data in other applications
|
4
|
+
I want to export domain objects using an R2RDF::Writer object
|
5
|
+
|
6
|
+
Scenario: write to ARFF format
|
7
|
+
Given a ARFF writer
|
8
|
+
When I call its from_turtle method on the file spec/turtle/bacon
|
9
|
+
Then I should receive a .arff file as a string
|
@@ -0,0 +1,17 @@
|
|
1
|
+
Given(/^a (.*) writer$/) do |type|
|
2
|
+
@writer = R2RDF::Writer.const_get(type).new
|
3
|
+
end
|
4
|
+
|
5
|
+
When(/^I call its from_turtle method on the file (.*)$/) do |file|
|
6
|
+
@result = @writer.from_turtle(file)
|
7
|
+
end
|
8
|
+
|
9
|
+
When(/^I call its from_turtle method on the turtle string$/) do
|
10
|
+
f=Tempfile.open('writerttl'); f.write @turtle_string; f.close
|
11
|
+
@result = @writer.from_turtle(f.path)
|
12
|
+
f.unlink
|
13
|
+
end
|
14
|
+
|
15
|
+
Then(/^I should receive a \.arff file as a string$/) do
|
16
|
+
@result.is_a?(String).should be true
|
17
|
+
end
|
@@ -0,0 +1,57 @@
|
|
1
|
+
module R2RDF
|
2
|
+
|
3
|
+
#handles analysis of R expression to extract properties and recognize potential
|
4
|
+
#ambiguity
|
5
|
+
module Analyzer
|
6
|
+
def dirty?(data)
|
7
|
+
if data.is_a? Hash
|
8
|
+
data.map{|k,v|
|
9
|
+
return true if dirty?(k) || dirty?(v)
|
10
|
+
}
|
11
|
+
false
|
12
|
+
elsif data.is_a? Array
|
13
|
+
data.map{|datum|
|
14
|
+
return true if dirty?(datum)
|
15
|
+
}
|
16
|
+
else
|
17
|
+
dirty_characters = [".",' ']
|
18
|
+
if data.to_s.scan(/./) & dirty_characters
|
19
|
+
true
|
20
|
+
else
|
21
|
+
false
|
22
|
+
end
|
23
|
+
end
|
24
|
+
end
|
25
|
+
|
26
|
+
def recommend_range(data)
|
27
|
+
classes = data.map{|d| d.class}
|
28
|
+
homogenous = classes.uniq.size == 1
|
29
|
+
if homogenous
|
30
|
+
if classes[0] == Fixnum
|
31
|
+
"xsd:int"
|
32
|
+
elsif classes[0] == Float
|
33
|
+
"xsd:double"
|
34
|
+
elsif classes[0] == String
|
35
|
+
recommend_range_strings(data)
|
36
|
+
else
|
37
|
+
:coded
|
38
|
+
end
|
39
|
+
else
|
40
|
+
:coded
|
41
|
+
end
|
42
|
+
end
|
43
|
+
|
44
|
+
def recommend_range_strings(data)
|
45
|
+
return "xsd:int" if data.all?{|d| Integer(d) rescue nil}
|
46
|
+
return "xsd:int" if data.all?{|d| Float(d) rescue nil}
|
47
|
+
:coded
|
48
|
+
end
|
49
|
+
|
50
|
+
def check_integrity(obs, dimensions, measures)
|
51
|
+
obs.map{|o|
|
52
|
+
raise "MissingValues for #{(dimensions | measures) - o.keys}" unless ((dimensions | measures) - o.keys).empty?
|
53
|
+
raise "UnknownProperty #{o.keys - (dimensions | measures)}" unless (o.keys - (dimensions | measures)).empty?
|
54
|
+
}
|
55
|
+
end
|
56
|
+
end
|
57
|
+
end
|
@@ -0,0 +1,219 @@
|
|
1
|
+
module R2RDF
|
2
|
+
module Dataset
|
3
|
+
module ORM
|
4
|
+
class DataCube
|
5
|
+
extend R2RDF::Dataset::DataCube
|
6
|
+
extend R2RDF::Analyzer
|
7
|
+
extend R2RDF::Metadata
|
8
|
+
extend R2RDF::Query
|
9
|
+
extend R2RDF::Parser
|
10
|
+
|
11
|
+
include R2RDF::Dataset::DataCube
|
12
|
+
include R2RDF::Analyzer
|
13
|
+
include R2RDF::Metadata
|
14
|
+
include R2RDF::Query
|
15
|
+
include R2RDF::Parser
|
16
|
+
|
17
|
+
attr_accessor :labels
|
18
|
+
attr_accessor :dimensions
|
19
|
+
attr_accessor :measures
|
20
|
+
attr_accessor :obs
|
21
|
+
attr_accessor :meta
|
22
|
+
|
23
|
+
def initialize(options={},do_parse = true)
|
24
|
+
@dimensions = {}
|
25
|
+
@measures = []
|
26
|
+
@obs = []
|
27
|
+
@generator_options = {}
|
28
|
+
@options = {}
|
29
|
+
|
30
|
+
@meta = {}
|
31
|
+
|
32
|
+
parse_options options if do_parse
|
33
|
+
end
|
34
|
+
|
35
|
+
def self.load(graph,options={},verbose=false)
|
36
|
+
|
37
|
+
|
38
|
+
graph = create_graph(graph) unless graph =~ /^http/
|
39
|
+
|
40
|
+
# puts get_hashes(execute_from_file('dimension_ranges.rq',graph))
|
41
|
+
dimensions = Hash[get_hashes(execute_from_file('dimension_ranges.rq',graph),"to_s").map{|solution|
|
42
|
+
#TODO coded properties should be found via SPARQL queries
|
43
|
+
if solution[:range].split('/')[-2] == "code"
|
44
|
+
type = :coded
|
45
|
+
else
|
46
|
+
type = strip_uri(solution[:range])
|
47
|
+
end
|
48
|
+
[strip_uri(solution[:dimension]), {type: type}]
|
49
|
+
}]
|
50
|
+
puts "dimensions: #{dimensions}" if verbose
|
51
|
+
measures = get_ary(execute_from_file('measures.rq',graph)).flatten
|
52
|
+
puts "measures: #{measures}" if verbose
|
53
|
+
name = execute_from_file('dataset.rq',graph).to_h.first[:label]
|
54
|
+
puts "dataset: #{name}" if verbose
|
55
|
+
obs = execute_from_file('observations.rq',graph)
|
56
|
+
puts "observations: #{obs}" if verbose
|
57
|
+
# observations = observation_hash(obs)
|
58
|
+
simple_observations = observation_hash(obs,true)
|
59
|
+
|
60
|
+
new_opts = {
|
61
|
+
measures: measures,
|
62
|
+
dimensions: dimensions,
|
63
|
+
observations: simple_observations.values,
|
64
|
+
name: name,
|
65
|
+
}
|
66
|
+
|
67
|
+
options = options.merge(new_opts)
|
68
|
+
puts "creating #{options}" if verbose
|
69
|
+
self.new(options)
|
70
|
+
end
|
71
|
+
|
72
|
+
def parse_options(options)
|
73
|
+
if options[:dimensions]
|
74
|
+
options[:dimensions].each{|name,details|
|
75
|
+
add_dimension(name, details[:type] || :coded)
|
76
|
+
}
|
77
|
+
end
|
78
|
+
|
79
|
+
if options[:measures]
|
80
|
+
options[:measures].each{|m| @measures << m}
|
81
|
+
end
|
82
|
+
|
83
|
+
if options[:observations]
|
84
|
+
options[:observations].each{|obs_data| add_observation obs_data}
|
85
|
+
end
|
86
|
+
|
87
|
+
@generator_options = options[:generator_options] if options[:generator_options]
|
88
|
+
@options[:skip_metadata] = options[:skip_metadata] if options[:skip_metadata]
|
89
|
+
|
90
|
+
if options[:name]
|
91
|
+
@name = options[:name]
|
92
|
+
else
|
93
|
+
raise "No dataset name specified!"
|
94
|
+
end
|
95
|
+
|
96
|
+
if options[:validate_each]
|
97
|
+
@options[:validate_each] = options[:validate_each]
|
98
|
+
end
|
99
|
+
end
|
100
|
+
|
101
|
+
def to_n3
|
102
|
+
|
103
|
+
#create labels if not specified
|
104
|
+
unless @labels.is_a?(Array) && @labels.size == @obs.size
|
105
|
+
if @labels.is_a? Symbol
|
106
|
+
#define some automatic labeling methods
|
107
|
+
else
|
108
|
+
@labels = (1..@obs.size).to_a.map(&:to_s)
|
109
|
+
end
|
110
|
+
end
|
111
|
+
data = {}
|
112
|
+
|
113
|
+
|
114
|
+
#collect observation data
|
115
|
+
check_integrity(@obs.map{|o| o.data}, @dimensions.keys, @measures)
|
116
|
+
@obs.map{|obs|
|
117
|
+
(@measures | @dimensions.keys).map{ |component|
|
118
|
+
(data[component] ||= []) << obs.data[component]
|
119
|
+
}
|
120
|
+
}
|
121
|
+
|
122
|
+
|
123
|
+
codes = @dimensions.map{|d,v| d if v[:type] == :coded}.compact
|
124
|
+
|
125
|
+
|
126
|
+
str = generate(@measures, @dimensions.keys, codes, data, @labels, @name, @generator_options)
|
127
|
+
unless @options[:skip_metadata]
|
128
|
+
fields = {
|
129
|
+
publishers: publishers(),
|
130
|
+
subject: subjects(),
|
131
|
+
author: author(),
|
132
|
+
description: description(),
|
133
|
+
date: date(),
|
134
|
+
var: @name,
|
135
|
+
}
|
136
|
+
# puts basic(fields,@generator_options)
|
137
|
+
str += "\n" + basic(fields,@generator_options)
|
138
|
+
end
|
139
|
+
str
|
140
|
+
end
|
141
|
+
|
142
|
+
def add_dimension(name, type=:coded)
|
143
|
+
@dimensions[name.to_s] = {type: type}
|
144
|
+
end
|
145
|
+
|
146
|
+
def add_measure(name)
|
147
|
+
@measures << name
|
148
|
+
end
|
149
|
+
|
150
|
+
def add_observation(data)
|
151
|
+
data = Hash[data.map{|k,v| [k.to_s, v]}]
|
152
|
+
obs = Observation.new(data)
|
153
|
+
check_integrity([obs.data],@dimensions.keys,@measures) if @options[:validate_each]
|
154
|
+
@obs << obs
|
155
|
+
end
|
156
|
+
|
157
|
+
def insert(observation)
|
158
|
+
@obs << observation
|
159
|
+
end
|
160
|
+
|
161
|
+
def publishers
|
162
|
+
@meta[:publishers] ||= []
|
163
|
+
end
|
164
|
+
|
165
|
+
def publishers=(publishers)
|
166
|
+
@meta[:publishers] = publishers
|
167
|
+
end
|
168
|
+
|
169
|
+
def subjects
|
170
|
+
@meta[:subject] ||= []
|
171
|
+
end
|
172
|
+
|
173
|
+
def subjects=(subjects)
|
174
|
+
@meta[:subject]=subjects
|
175
|
+
end
|
176
|
+
|
177
|
+
def add_publisher(label,uri)
|
178
|
+
publishers << {label: label, uri: uri}
|
179
|
+
end
|
180
|
+
|
181
|
+
def add_subject(id)
|
182
|
+
subject << id
|
183
|
+
end
|
184
|
+
|
185
|
+
def author
|
186
|
+
@meta[:creator] ||= ""
|
187
|
+
end
|
188
|
+
|
189
|
+
def author=(author)
|
190
|
+
@meta[:creator] = author
|
191
|
+
end
|
192
|
+
|
193
|
+
def description
|
194
|
+
@meta[:description] ||= ""
|
195
|
+
end
|
196
|
+
|
197
|
+
def description=(description)
|
198
|
+
@meta[:description] = description
|
199
|
+
end
|
200
|
+
|
201
|
+
def date
|
202
|
+
@meta[:date] ||= "#{Time.now.day}-#{Time.now.month}-#{Time.now.year}"
|
203
|
+
end
|
204
|
+
|
205
|
+
def date=(date)
|
206
|
+
@meta[:date] = date
|
207
|
+
end
|
208
|
+
|
209
|
+
def to_h
|
210
|
+
{
|
211
|
+
measures: @measures,
|
212
|
+
dimensions: @dimensions,
|
213
|
+
observations: @obs.map{|o| o.data}
|
214
|
+
}
|
215
|
+
end
|
216
|
+
end
|
217
|
+
end
|
218
|
+
end
|
219
|
+
end
|
@@ -0,0 +1,20 @@
|
|
1
|
+
module R2RDF
|
2
|
+
module Dataset
|
3
|
+
module ORM
|
4
|
+
class Observation
|
5
|
+
attr_accessor :data
|
6
|
+
def initialize(data={})
|
7
|
+
@data = data
|
8
|
+
end
|
9
|
+
|
10
|
+
def method_missing(name, args)
|
11
|
+
#get entry of data hash
|
12
|
+
end
|
13
|
+
|
14
|
+
def respond_to_missing?(method, *)
|
15
|
+
|
16
|
+
end
|
17
|
+
end
|
18
|
+
end
|
19
|
+
end
|
20
|
+
end
|
@@ -0,0 +1,308 @@
|
|
1
|
+
#monkey patch to make rdf string w/ heredocs prettier ;)
|
2
|
+
class String
|
3
|
+
def unindent
|
4
|
+
gsub /^#{self[/\A\s*/]}/, ''
|
5
|
+
# gsub(/^#{scan(/^\s*/).min_by{|l|l.length}}/, "")
|
6
|
+
end
|
7
|
+
end
|
8
|
+
|
9
|
+
module R2RDF
|
10
|
+
# used to generate data cube observations, data structure definitions, etc
|
11
|
+
module Dataset
|
12
|
+
module DataCube
|
13
|
+
def defaults
|
14
|
+
{
|
15
|
+
type: :dataframe,
|
16
|
+
encode_nulls: false,
|
17
|
+
base_url: "http://www.rqtl.org",
|
18
|
+
}
|
19
|
+
end
|
20
|
+
|
21
|
+
def generate(measures, dimensions, codes, data, observation_labels, var, options={})
|
22
|
+
dimensions = sanitize(dimensions)
|
23
|
+
codes = sanitize(codes)
|
24
|
+
measures = sanitize(measures)
|
25
|
+
var = sanitize([var]).first
|
26
|
+
data = sanitize_hash(data)
|
27
|
+
|
28
|
+
str = prefixes(var,options)
|
29
|
+
str << data_structure_definition((measures | dimensions), var, options)
|
30
|
+
str << dataset(var, options)
|
31
|
+
component_specifications(measures, dimensions, var, options).map{ |c| str << c }
|
32
|
+
dimension_properties(dimensions, codes, var, options).map{|p| str << p}
|
33
|
+
measure_properties(measures, var, options).map{|p| str << p}
|
34
|
+
code_lists(codes, data, var, options).map{|l| str << l}
|
35
|
+
concept_codes(codes, data, var, options).map{|c| str << c}
|
36
|
+
observations(measures, dimensions, codes, data, observation_labels, var, options).map{|o| str << o}
|
37
|
+
str
|
38
|
+
end
|
39
|
+
|
40
|
+
def sanitize(array)
|
41
|
+
#remove spaces and other special characters
|
42
|
+
processed = []
|
43
|
+
array.map{|entry|
|
44
|
+
if entry.is_a? String
|
45
|
+
processed << entry.gsub(/[\s\.]/,'_')
|
46
|
+
else
|
47
|
+
processed << entry
|
48
|
+
end
|
49
|
+
}
|
50
|
+
processed
|
51
|
+
end
|
52
|
+
|
53
|
+
def sanitize_hash(h)
|
54
|
+
mappings = {}
|
55
|
+
h.keys.map{|k|
|
56
|
+
if(k.is_a? String)
|
57
|
+
mappings[k] = k.gsub(' ','_')
|
58
|
+
end
|
59
|
+
}
|
60
|
+
|
61
|
+
h.keys.map{|k|
|
62
|
+
h[mappings[k]] = h.delete(k) if mappings[k]
|
63
|
+
}
|
64
|
+
|
65
|
+
h
|
66
|
+
end
|
67
|
+
|
68
|
+
def prefixes(var, options={})
|
69
|
+
var = sanitize([var]).first
|
70
|
+
options = defaults().merge(options)
|
71
|
+
base = options[:base_url]
|
72
|
+
<<-EOF.unindent
|
73
|
+
@base <#{base}/ns/dc/> .
|
74
|
+
@prefix ns: <#{base}/ns/dataset/#{var}#> .
|
75
|
+
@prefix qb: <http://purl.org/linked-data/cube#> .
|
76
|
+
@prefix rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#> .
|
77
|
+
@prefix rdfs: <http://www.w3.org/2000/01/rdf-schema#> .
|
78
|
+
@prefix prop: <#{base}/dc/properties/> .
|
79
|
+
@prefix dct: <http://purl.org/dc/terms/> .
|
80
|
+
@prefix xsd: <http://www.w3.org/2001/XMLSchema#> .
|
81
|
+
@prefix cs: <#{base}/dc/dataset/#{var}/cs/> .
|
82
|
+
@prefix code: <#{base}/dc/dataset/#{var}/code/> .
|
83
|
+
@prefix class: <#{base}/dc/dataset/#{var}/class/> .
|
84
|
+
@prefix owl: <http://www.w3.org/2002/07/owl#> .
|
85
|
+
@prefix skos: <http://www.w3.org/2004/02/skos/core#> .
|
86
|
+
@prefix foaf: <http://xmlns.com/foaf/0.1/> .
|
87
|
+
@prefix org: <http://www.w3.org/ns/org#> .
|
88
|
+
@prefix prov: <http://www.w3.org/ns/prov#> .
|
89
|
+
|
90
|
+
EOF
|
91
|
+
end
|
92
|
+
|
93
|
+
def data_structure_definition(components,var,options={})
|
94
|
+
var = sanitize([var]).first
|
95
|
+
options = defaults().merge(options)
|
96
|
+
str = "ns:dsd-#{var} a qb:DataStructureDefinition;\n"
|
97
|
+
str << " qb:component\n"
|
98
|
+
components.map{|n|
|
99
|
+
str << " cs:#{n} ,\n"
|
100
|
+
}
|
101
|
+
str[-2]='.'
|
102
|
+
str<<"\n"
|
103
|
+
str
|
104
|
+
end
|
105
|
+
|
106
|
+
def dataset(var,options={})
|
107
|
+
var = sanitize([var]).first
|
108
|
+
options = defaults().merge(options)
|
109
|
+
<<-EOF.unindent
|
110
|
+
ns:dataset-#{var} a qb:DataSet ;
|
111
|
+
rdfs:label "#{var}"@en ;
|
112
|
+
qb:structure ns:dsd-#{var} .
|
113
|
+
|
114
|
+
EOF
|
115
|
+
end
|
116
|
+
|
117
|
+
def component_specifications(measure_names, dimension_names, var, options={})
|
118
|
+
options = defaults().merge(options)
|
119
|
+
specs = []
|
120
|
+
|
121
|
+
dimension_names.map{|d|
|
122
|
+
specs << <<-EOF.unindent
|
123
|
+
cs:#{d} a qb:ComponentSpecification ;
|
124
|
+
rdfs:label "#{d} Component" ;
|
125
|
+
qb:dimension prop:#{d} .
|
126
|
+
|
127
|
+
EOF
|
128
|
+
}
|
129
|
+
|
130
|
+
measure_names.map{|n|
|
131
|
+
specs << <<-EOF.unindent
|
132
|
+
cs:#{n} a qb:ComponentSpecification ;
|
133
|
+
rdfs:label "#{n} Component" ;
|
134
|
+
qb:measure prop:#{n} .
|
135
|
+
|
136
|
+
EOF
|
137
|
+
}
|
138
|
+
|
139
|
+
specs
|
140
|
+
end
|
141
|
+
|
142
|
+
def dimension_properties(dimensions, codes, var, options={})
|
143
|
+
options = defaults().merge(options)
|
144
|
+
props = []
|
145
|
+
|
146
|
+
dimensions.map{|d|
|
147
|
+
if codes.include?(d)
|
148
|
+
props << <<-EOF.unindent
|
149
|
+
prop:#{d} a rdf:Property, qb:DimensionProperty ;
|
150
|
+
rdfs:label "#{d}"@en ;
|
151
|
+
qb:codeList code:#{d.downcase} ;
|
152
|
+
rdfs:range code:#{d.downcase.capitalize} .
|
153
|
+
|
154
|
+
EOF
|
155
|
+
else
|
156
|
+
props << <<-EOF.unindent
|
157
|
+
prop:#{d} a rdf:Property, qb:DimensionProperty ;
|
158
|
+
rdfs:label "#{d}"@en .
|
159
|
+
|
160
|
+
EOF
|
161
|
+
end
|
162
|
+
}
|
163
|
+
|
164
|
+
props
|
165
|
+
end
|
166
|
+
|
167
|
+
def measure_properties(measures, var, options={})
|
168
|
+
options = defaults().merge(options)
|
169
|
+
props = []
|
170
|
+
|
171
|
+
measures.map{ |m|
|
172
|
+
|
173
|
+
props << <<-EOF.unindent
|
174
|
+
prop:#{m} a rdf:Property, qb:MeasureProperty ;
|
175
|
+
rdfs:label "#{m}"@en .
|
176
|
+
|
177
|
+
EOF
|
178
|
+
}
|
179
|
+
|
180
|
+
props
|
181
|
+
end
|
182
|
+
|
183
|
+
def observations(measures, dimensions, codes, data, observation_labels, var, options={})
|
184
|
+
var = sanitize([var]).first
|
185
|
+
options = defaults().merge(options)
|
186
|
+
obs = []
|
187
|
+
observation_labels.each_with_index.map{|r, i|
|
188
|
+
contains_nulls = false
|
189
|
+
str = <<-EOF.unindent
|
190
|
+
ns:obs#{r} a qb:Observation ;
|
191
|
+
qb:dataSet ns:dataset-#{var} ;
|
192
|
+
EOF
|
193
|
+
|
194
|
+
str << " rdfs:label \"#{r}\" ;\n" unless options[:no_labels]
|
195
|
+
|
196
|
+
dimensions.map{|d|
|
197
|
+
contains_nulls = contains_nulls | (data[d][i] == nil)
|
198
|
+
if codes.include? d
|
199
|
+
str << " prop:#{d} <code/#{d.downcase}/#{data[d][i]}> ;\n"
|
200
|
+
else
|
201
|
+
str << " prop:#{d} ns:#{to_resource(data[d][i], options)} ;\n"
|
202
|
+
end
|
203
|
+
}
|
204
|
+
|
205
|
+
measures.map{|m|
|
206
|
+
contains_nulls = contains_nulls | (data[m][i] == nil)
|
207
|
+
str << " prop:#{m} #{to_literal(data[m][i], options)} ;\n"
|
208
|
+
|
209
|
+
}
|
210
|
+
|
211
|
+
str << " .\n\n"
|
212
|
+
obs << str unless contains_nulls && !options[:encode_nulls]
|
213
|
+
|
214
|
+
}
|
215
|
+
obs
|
216
|
+
end
|
217
|
+
|
218
|
+
def code_lists(codes, data, var, options={})
|
219
|
+
options = defaults().merge(options)
|
220
|
+
lists = []
|
221
|
+
codes.map{|code|
|
222
|
+
str = <<-EOF.unindent
|
223
|
+
code:#{code.downcase.capitalize} a rdfs:Class, owl:Class;
|
224
|
+
rdfs:subClassOf skos:Concept ;
|
225
|
+
rdfs:label "Code list for #{code} - codelist class"@en;
|
226
|
+
rdfs:comment "Specifies the #{code} for each observation";
|
227
|
+
rdfs:seeAlso code:#{code.downcase} .
|
228
|
+
|
229
|
+
code:#{code.downcase} a skos:ConceptScheme;
|
230
|
+
skos:prefLabel "Code list for #{code} - codelist scheme"@en;
|
231
|
+
rdfs:label "Code list for #{code} - codelist scheme"@en;
|
232
|
+
skos:notation "CL_#{code.upcase}";
|
233
|
+
skos:note "Specifies the #{code} for each observation";
|
234
|
+
EOF
|
235
|
+
data[code].uniq.map{|value|
|
236
|
+
unless value == nil && !options[:encode_nulls]
|
237
|
+
str << " skos:hasTopConcept <code/#{code.downcase}/#{to_resource(value,options)}> ;\n"
|
238
|
+
end
|
239
|
+
}
|
240
|
+
|
241
|
+
str << " .\n\n"
|
242
|
+
lists << str
|
243
|
+
}
|
244
|
+
|
245
|
+
|
246
|
+
lists
|
247
|
+
end
|
248
|
+
|
249
|
+
def concept_codes(codes, data, var, options={})
|
250
|
+
options = defaults().merge(options)
|
251
|
+
concepts = []
|
252
|
+
codes.map{|code|
|
253
|
+
data[code].uniq.map{|value|
|
254
|
+
unless value == nil && !options[:encode_nulls]
|
255
|
+
concepts << <<-EOF.unindent
|
256
|
+
<code/#{code.downcase}/#{to_resource(value,options)}> a skos:Concept, code:#{code.downcase.capitalize};
|
257
|
+
skos:topConceptOf code:#{code.downcase} ;
|
258
|
+
skos:prefLabel "#{to_resource(value,options)}" ;
|
259
|
+
skos:inScheme code:#{code.downcase} .
|
260
|
+
|
261
|
+
EOF
|
262
|
+
end
|
263
|
+
}
|
264
|
+
}
|
265
|
+
|
266
|
+
concepts
|
267
|
+
end
|
268
|
+
|
269
|
+
|
270
|
+
def to_resource(obj, options)
|
271
|
+
if obj.is_a? String
|
272
|
+
#TODO decide the right way to handle missing values, since RDF has no null
|
273
|
+
#probably throw an error here since a missing resource is a bigger problem
|
274
|
+
obj = "NA" if obj.empty?
|
275
|
+
|
276
|
+
#TODO remove special characters (faster) as well (eg '?')
|
277
|
+
obj.gsub(' ','_').gsub('?','')
|
278
|
+
elsif obj == nil && options[:encode_nulls]
|
279
|
+
'"NA"'
|
280
|
+
elsif obj.is_a? Numeric
|
281
|
+
#resources cannot be referred to purely by integer (?)
|
282
|
+
"n"+obj.to_s
|
283
|
+
else
|
284
|
+
obj
|
285
|
+
end
|
286
|
+
end
|
287
|
+
|
288
|
+
def to_literal(obj, options)
|
289
|
+
if obj.is_a? String
|
290
|
+
# Depressing that there's no more elegant way to check if a string is
|
291
|
+
# a number...
|
292
|
+
if val = Integer(obj) rescue nil
|
293
|
+
val
|
294
|
+
elsif val = Float(obj) rescue nil
|
295
|
+
val
|
296
|
+
else
|
297
|
+
'"'+obj+'"'
|
298
|
+
end
|
299
|
+
elsif obj == nil && options[:encode_nulls]
|
300
|
+
#TODO decide the right way to handle missing values, since RDF has no null
|
301
|
+
'"NA"'
|
302
|
+
else
|
303
|
+
obj
|
304
|
+
end
|
305
|
+
end
|
306
|
+
end
|
307
|
+
end
|
308
|
+
end
|
@@ -0,0 +1,57 @@
|
|
1
|
+
module R2RDF
|
2
|
+
module Dataset
|
3
|
+
module Interactive
|
4
|
+
#to be called by other classes if user input is required
|
5
|
+
def defaults
|
6
|
+
{
|
7
|
+
load_from_file: false
|
8
|
+
}
|
9
|
+
end
|
10
|
+
|
11
|
+
def interactive(options={})
|
12
|
+
options = defaults.merge(options)
|
13
|
+
qb = {}
|
14
|
+
|
15
|
+
puts "load config from file? [y/N]"
|
16
|
+
if gets.chomp == "y"
|
17
|
+
#use yaml or DSL file to configure
|
18
|
+
else
|
19
|
+
qb[:dimensions] = dimensions()
|
20
|
+
qb[:measures] = measures()
|
21
|
+
end
|
22
|
+
|
23
|
+
puts "load data from file? [y/N]"
|
24
|
+
if gets.chomp == "y"
|
25
|
+
#attempt to load dataset from file, ask user to resolve problems or ambiguity
|
26
|
+
else
|
27
|
+
end
|
28
|
+
qb
|
29
|
+
end
|
30
|
+
|
31
|
+
def dimensions
|
32
|
+
puts "Enter a list of dimensions, separated by commas"
|
33
|
+
arr = gets.chomp.split(",")
|
34
|
+
dims = {}
|
35
|
+
|
36
|
+
arr.map{|dim|
|
37
|
+
puts "What is the range of #{dim.chomp.strip}? [:coded]"
|
38
|
+
type = gets.chomp
|
39
|
+
type = :coded if type == ":coded" || type == ""
|
40
|
+
dims[dim.chomp.strip] = {type: type}
|
41
|
+
}
|
42
|
+
|
43
|
+
dims
|
44
|
+
end
|
45
|
+
|
46
|
+
def measures
|
47
|
+
puts "Enter a list of measures, separated by commas"
|
48
|
+
arr = gets.chomp.split(",")
|
49
|
+
meas = []
|
50
|
+
|
51
|
+
arr.map{|m| meas << m.chomp.strip}
|
52
|
+
|
53
|
+
meas
|
54
|
+
end
|
55
|
+
end
|
56
|
+
end
|
57
|
+
end
|