bio-publisci 0.0.2 → 0.0.3
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/.travis.yml +1 -1
- data/Gemfile +1 -1
- data/Rakefile +4 -6
- data/features/integration_steps.rb +1 -1
- data/features/metadata.feature +24 -0
- data/features/metadata_steps.rb +21 -0
- data/lib/bio-publisci/dataset/ORM/data_cube_orm.rb +36 -14
- data/lib/bio-publisci/dataset/ORM/observation.rb +1 -1
- data/lib/bio-publisci/dataset/data_cube.rb +192 -131
- data/lib/bio-publisci/dataset/dataset_for.rb +150 -0
- data/lib/bio-publisci/dataset/interactive.rb +70 -55
- data/lib/bio-publisci/metadata/metadata.rb +81 -8
- data/lib/bio-publisci/parser.rb +76 -1
- data/lib/bio-publisci/readers/big_cross.rb +118 -117
- data/lib/bio-publisci/readers/csv.rb +37 -2
- data/lib/bio-publisci/readers/r_matrix.rb +1 -1
- data/lib/bio-publisci/store.rb +31 -31
- data/lib/bio-publisci/writers/arff.rb +48 -49
- data/lib/bio-publisci.rb +3 -0
- data/resources/queries/code_resources.rq +10 -0
- data/resources/queries/dimension_ranges.rq +3 -3
- data/resources/queries/dimensions.rq +3 -3
- data/resources/queries/measures.rq +3 -3
- data/resources/queries/observation_labels.rq +8 -0
- data/resources/queries/properties.rq +8 -0
- data/scripts/islet_mlratio.rb +6 -0
- data/scripts/scan_islet.rb +6 -0
- data/scripts/update_reference.rb +20 -0
- data/spec/ORM/data_cube_orm_spec.rb +12 -0
- data/spec/data_cube_spec.rb +1 -1
- data/spec/generators/dataframe_spec.rb +1 -1
- data/spec/generators/r_matrix_spec.rb +1 -1
- data/spec/r_builder_spec.rb +6 -6
- data/spec/resource/.RData +0 -0
- data/spec/resource/example.Rhistory +3 -0
- data/spec/turtle/bacon +4 -22
- data/spec/turtle/reference +9 -27
- metadata +37 -56
- data/lib/bio-publisci/loader.rb +0 -36
- data/spec/bio-publisci_spec.rb +0 -7
checksums.yaml
ADDED
@@ -0,0 +1,7 @@
|
|
1
|
+
---
|
2
|
+
SHA1:
|
3
|
+
metadata.gz: f03476f5595b28e10a4cb3d950b9c454d621eb4d
|
4
|
+
data.tar.gz: da473d84d9c1e203de4ea1b2d7fef2579cfa8bf8
|
5
|
+
SHA512:
|
6
|
+
metadata.gz: 33662112c4df2115f15d59f415329dfa11368e8bd1a47907d62c02b476bbeb4afe0a1c43599d2c215a093d1765b7ffb05320220e21888267811c86beacef8876
|
7
|
+
data.tar.gz: 683b5874d082ab155227ee92a1e9bfe15a66b0eaf4fe1e590db39a9391346ec31850b0a94d924c9d26b356fb6ed7f82a228c9d16afa1a67d9c6710086e849963
|
data/.travis.yml
CHANGED
data/Gemfile
CHANGED
data/Rakefile
CHANGED
@@ -21,7 +21,8 @@ Jeweler::Tasks.new do |gem|
|
|
21
21
|
gem.description = %Q{A toolkit for publishing scientific results and datasets using RDF and related technologies }
|
22
22
|
gem.email = "wstrinz@gmail.com"
|
23
23
|
gem.authors = ["wstrinz"]
|
24
|
-
gem.version = "0.0.
|
24
|
+
gem.version = "0.0.3"
|
25
|
+
|
25
26
|
# dependencies defined in Gemfile
|
26
27
|
end
|
27
28
|
Jeweler::RubygemsDotOrgTasks.new
|
@@ -29,6 +30,7 @@ Jeweler::RubygemsDotOrgTasks.new
|
|
29
30
|
require 'rspec/core'
|
30
31
|
require 'rspec/core/rake_task'
|
31
32
|
RSpec::Core::RakeTask.new(:spec) do |spec|
|
33
|
+
spec.rspec_opts = "--tag ~no_travis"
|
32
34
|
spec.pattern = FileList['spec/**/*_spec.rb']
|
33
35
|
end
|
34
36
|
|
@@ -44,11 +46,7 @@ Cucumber::Rake::Task.new(:features)
|
|
44
46
|
# task :default => :spec
|
45
47
|
|
46
48
|
task :default => [] do
|
47
|
-
|
48
|
-
Rake::Task[:spec].invoke
|
49
|
-
rescue
|
50
|
-
end
|
51
|
-
Rake::Task[:features].invoke
|
49
|
+
Rake::Task[:spec].invoke
|
52
50
|
end
|
53
51
|
|
54
52
|
task :test => [] do
|
@@ -0,0 +1,24 @@
|
|
1
|
+
Feature: Receive metadata as user input or extract from data sources
|
2
|
+
|
3
|
+
In order to publish and share data about my datasets
|
4
|
+
I want to be able to attach metadata
|
5
|
+
|
6
|
+
Scenario: Attach basic DC Terms info
|
7
|
+
Given a class which includes the Metadata module
|
8
|
+
When I call its basic method with the hash {var: "example", title: "example dataset", creator: "Will Strinz", description: "an example dataset", date: "1-10-2010"}
|
9
|
+
Then I should receive a metadata string
|
10
|
+
|
11
|
+
Scenario: Auto Generate some fields
|
12
|
+
Given a class which includes the Metadata module
|
13
|
+
When I call its basic method with the hash {var: "example", title: "example dataset", description: "an example dataset"}
|
14
|
+
Then I should receive a metadata string
|
15
|
+
|
16
|
+
Scenario: Generate process information
|
17
|
+
Given a class which includes the Metadata module
|
18
|
+
When I call its provenance method with the hash {var: "example", software: {name: "R", process: 'spec/resource/example.Rhistory'}}
|
19
|
+
Then I should receive a metadata string
|
20
|
+
|
21
|
+
Scenario: Generate organizational provenance information
|
22
|
+
Given a class which includes the Metadata module
|
23
|
+
When I call its provenance method with the hash {var: "example", creator: "http://gsocsemantic.wordpress.com/me", organization: "http://sciruby.com/"}
|
24
|
+
Then I should receive a metadata string
|
@@ -0,0 +1,21 @@
|
|
1
|
+
Given(/^a class which includes the Metadata module$/) do
|
2
|
+
class Meta
|
3
|
+
include R2RDF::Metadata
|
4
|
+
end
|
5
|
+
@klass = Meta
|
6
|
+
end
|
7
|
+
|
8
|
+
When(/^I call its basic method with the hash (\{.+\})$/) do |fields|
|
9
|
+
fields = eval(fields)
|
10
|
+
@response = @klass.new.basic(fields)
|
11
|
+
end
|
12
|
+
|
13
|
+
When(/^I call its provenance method with the hash (\{.+\})$/) do |fields|
|
14
|
+
fields = eval(fields)
|
15
|
+
@response = @klass.new.provenance(fields)
|
16
|
+
end
|
17
|
+
|
18
|
+
Then(/^I should receive a metadata string$/) do
|
19
|
+
@response.is_a?(String).should be true
|
20
|
+
puts @response
|
21
|
+
end
|
@@ -1,5 +1,5 @@
|
|
1
1
|
module R2RDF
|
2
|
-
|
2
|
+
class Dataset
|
3
3
|
module ORM
|
4
4
|
class DataCube
|
5
5
|
extend R2RDF::Dataset::DataCube
|
@@ -43,25 +43,41 @@ module R2RDF
|
|
43
43
|
if solution[:range].split('/')[-2] == "code"
|
44
44
|
type = :coded
|
45
45
|
else
|
46
|
-
type =
|
46
|
+
type = solution[:range].to_s
|
47
47
|
end
|
48
|
-
[
|
48
|
+
[solution[:dimension], {type: type}]
|
49
49
|
}]
|
50
50
|
puts "dimensions: #{dimensions}" if verbose
|
51
|
-
|
52
|
-
|
51
|
+
|
52
|
+
codes = execute_from_file('code_resources.rq',graph).to_h.map{|sol|
|
53
|
+
[sol[:dimension].to_s, sol[:codeList].to_s, sol[:class].to_s]
|
54
|
+
}
|
55
|
+
puts "codes: #{codes}" if verbose
|
56
|
+
|
57
|
+
measures = execute_from_file('measures.rq',graph).to_h.map{|m| m[:measure].to_s}
|
58
|
+
puts "measures: #{measures}" if verbose
|
59
|
+
|
53
60
|
name = execute_from_file('dataset.rq',graph).to_h.first[:label]
|
54
61
|
puts "dataset: #{name}" if verbose
|
55
|
-
|
56
|
-
|
57
|
-
|
58
|
-
|
62
|
+
|
63
|
+
obs = execute_from_file('observations.rq',graph)
|
64
|
+
observations = observation_hash(obs)
|
65
|
+
puts "observations: #{observations}" if verbose
|
66
|
+
|
67
|
+
# simple_observations = observation_hash(obs,true)
|
68
|
+
|
69
|
+
labels = execute_from_file('observation_labels.rq', graph)
|
70
|
+
labels = Hash[labels.map{|sol|
|
71
|
+
[sol[:observation].to_s, sol[:label].to_s]
|
72
|
+
}]
|
59
73
|
|
60
74
|
new_opts = {
|
61
75
|
measures: measures,
|
62
76
|
dimensions: dimensions,
|
63
|
-
observations:
|
77
|
+
observations: observations.values,
|
64
78
|
name: name,
|
79
|
+
labels: labels.values,
|
80
|
+
codes: codes
|
65
81
|
}
|
66
82
|
|
67
83
|
options = options.merge(new_opts)
|
@@ -96,6 +112,14 @@ module R2RDF
|
|
96
112
|
if options[:validate_each]
|
97
113
|
@options[:validate_each] = options[:validate_each]
|
98
114
|
end
|
115
|
+
|
116
|
+
if options[:labels]
|
117
|
+
@labels = options[:labels]
|
118
|
+
end
|
119
|
+
|
120
|
+
if options[:codes]
|
121
|
+
@codes = options[:codes]
|
122
|
+
end
|
99
123
|
end
|
100
124
|
|
101
125
|
def to_n3
|
@@ -120,10 +144,8 @@ module R2RDF
|
|
120
144
|
}
|
121
145
|
|
122
146
|
|
123
|
-
codes = @dimensions.map{|d,v| d if v[:type] == :coded}.compact
|
124
|
-
|
125
|
-
|
126
|
-
str = generate(@measures, @dimensions.keys, codes, data, @labels, @name, @generator_options)
|
147
|
+
@codes = @dimensions.map{|d,v| d if v[:type] == :coded}.compact unless @codes
|
148
|
+
str = generate(@measures, @dimensions.keys, @codes, data, @labels, @name, @generator_options)
|
127
149
|
unless @options[:skip_metadata]
|
128
150
|
fields = {
|
129
151
|
publishers: publishers(),
|
@@ -1,68 +1,112 @@
|
|
1
1
|
#monkey patch to make rdf string w/ heredocs prettier ;)
|
2
|
-
|
3
|
-
|
4
|
-
|
5
|
-
# gsub(/^#{scan(/^\s*/).min_by{|l|l.length}}/, "")
|
6
|
-
end
|
2
|
+
class String
|
3
|
+
def unindent
|
4
|
+
gsub /^#{self[/\A\s*/]}/, ''
|
7
5
|
end
|
6
|
+
end
|
8
7
|
|
9
8
|
module R2RDF
|
10
|
-
|
11
|
-
module Dataset
|
9
|
+
class Dataset
|
12
10
|
module DataCube
|
11
|
+
include R2RDF::Parser
|
13
12
|
def defaults
|
14
13
|
{
|
15
14
|
type: :dataframe,
|
16
15
|
encode_nulls: false,
|
17
16
|
base_url: "http://www.rqtl.org",
|
18
17
|
}
|
19
|
-
|
20
|
-
|
21
|
-
def generate(measures, dimensions, codes, data, observation_labels, var, options={})
|
22
|
-
dimensions = sanitize(dimensions)
|
23
|
-
codes = sanitize(codes)
|
24
|
-
measures = sanitize(measures)
|
25
|
-
var = sanitize([var]).first
|
26
|
-
data = sanitize_hash(data)
|
27
|
-
|
28
|
-
str = prefixes(var,options)
|
29
|
-
str << data_structure_definition((measures | dimensions), var, options)
|
30
|
-
str << dataset(var, options)
|
31
|
-
component_specifications(measures, dimensions, var, options).map{ |c| str << c }
|
32
|
-
dimension_properties(dimensions, codes, var, options).map{|p| str << p}
|
33
|
-
measure_properties(measures, var, options).map{|p| str << p}
|
34
|
-
code_lists(codes, data, var, options).map{|l| str << l}
|
35
|
-
concept_codes(codes, data, var, options).map{|c| str << c}
|
36
|
-
observations(measures, dimensions, codes, data, observation_labels, var, options).map{|o| str << o}
|
37
|
-
str
|
38
|
-
end
|
18
|
+
end
|
39
19
|
|
40
|
-
def
|
41
|
-
|
42
|
-
|
43
|
-
|
44
|
-
|
45
|
-
|
20
|
+
def generate_resources(measures, dimensions, codes, options={})
|
21
|
+
newm = measures.map {|m|
|
22
|
+
if m =~ /^http:\/\//
|
23
|
+
"<#{m}>"
|
24
|
+
elsif m =~ /^[a-zA-z]+:[a-zA-z]+$/
|
25
|
+
m
|
46
26
|
else
|
47
|
-
|
48
|
-
end
|
27
|
+
"prop:#{m}"
|
28
|
+
end
|
49
29
|
}
|
50
|
-
processed
|
51
|
-
end
|
52
30
|
|
53
|
-
|
54
|
-
|
55
|
-
|
56
|
-
|
57
|
-
|
58
|
-
|
31
|
+
newc = []
|
32
|
+
|
33
|
+
newd = dimensions.map{|d|
|
34
|
+
if d =~ /^http:\/\//
|
35
|
+
# newc << "<#{d}>" if codes.include? d
|
36
|
+
"<#{d}>"
|
37
|
+
elsif d =~ /^[a-zA-z]+:[a-zA-z]+$/
|
38
|
+
d
|
39
|
+
else
|
40
|
+
# newc << "prop:#{d}" if codes.include? d
|
41
|
+
"prop:#{d}"
|
42
|
+
end
|
59
43
|
}
|
60
44
|
|
61
|
-
|
62
|
-
|
45
|
+
if codes.first.is_a? Array
|
46
|
+
newc = codes.map{|c|
|
47
|
+
c.map{|el|
|
48
|
+
if el =~ /^http:\/\//
|
49
|
+
"<#{el}>"
|
50
|
+
else
|
51
|
+
el
|
52
|
+
end
|
53
|
+
}
|
54
|
+
}
|
55
|
+
else
|
56
|
+
newc = codes.map{|c|
|
57
|
+
["#{c}","code:#{c.downcase}","code:#{c.downcase.capitalize}"]
|
58
|
+
}
|
59
|
+
end
|
60
|
+
[newm, newd, newc]
|
61
|
+
end
|
62
|
+
|
63
|
+
def encode_data(codes,data,var,options={})
|
64
|
+
new_data = {}
|
65
|
+
data.map{|k,v|
|
66
|
+
if codes.include? k
|
67
|
+
new_data[k] = v.map{|val|
|
68
|
+
if val =~ /^http:\/\//
|
69
|
+
"<#{val}>"
|
70
|
+
elsif val =~ /^[a-zA-z]+:[a-zA-z]+$/
|
71
|
+
val
|
72
|
+
else
|
73
|
+
"<code/#{k.downcase}/#{val}>"
|
74
|
+
end
|
75
|
+
}
|
76
|
+
else
|
77
|
+
new_data[k] = v
|
78
|
+
end
|
63
79
|
}
|
80
|
+
new_data
|
81
|
+
end
|
64
82
|
|
65
|
-
|
83
|
+
def vocabulary(vocab,options={})
|
84
|
+
if vocab.is_a?(String) && vocab =~ /^http:\/\//
|
85
|
+
RDF::Vocabulary.new(vocab)
|
86
|
+
elsif RDF.const_defined? vocab.to_sym && RDF.const_get(vocab.to_sym).inspect =~ /^RDF::Vocabulary/
|
87
|
+
RDF.const_get(vocab)
|
88
|
+
else
|
89
|
+
nil
|
90
|
+
end
|
91
|
+
end
|
92
|
+
|
93
|
+
def generate(measures, dimensions, codes, data, observation_labels, var, options={})
|
94
|
+
# dimensions = sanitize(dimensions)
|
95
|
+
# codes = sanitize(codes)
|
96
|
+
# measures = sanitize(measures)
|
97
|
+
var = sanitize([var]).first
|
98
|
+
data = sanitize_hash(data)
|
99
|
+
|
100
|
+
str = prefixes(var,options)
|
101
|
+
str << data_structure_definition(measures, dimensions, codes, var, options)
|
102
|
+
str << dataset(var, options)
|
103
|
+
# component_specifications(measures, dimensions, var, options).map{ |c| str << c }
|
104
|
+
dimension_properties(dimensions, codes, var, options).map{|p| str << p}
|
105
|
+
measure_properties(measures, var, options).map{|p| str << p}
|
106
|
+
code_lists(codes, data, var, options).map{|l| str << l}
|
107
|
+
concept_codes(codes, data, var, options).map{|c| str << c}
|
108
|
+
observations(measures, dimensions, codes, data, observation_labels, var, options).map{|o| str << o}
|
109
|
+
str
|
66
110
|
end
|
67
111
|
|
68
112
|
def prefixes(var, options={})
|
@@ -80,7 +124,6 @@ module R2RDF
|
|
80
124
|
@prefix xsd: <http://www.w3.org/2001/XMLSchema#> .
|
81
125
|
@prefix cs: <#{base}/dc/dataset/#{var}/cs/> .
|
82
126
|
@prefix code: <#{base}/dc/dataset/#{var}/code/> .
|
83
|
-
@prefix class: <#{base}/dc/dataset/#{var}/class/> .
|
84
127
|
@prefix owl: <http://www.w3.org/2002/07/owl#> .
|
85
128
|
@prefix skos: <http://www.w3.org/2004/02/skos/core#> .
|
86
129
|
@prefix foaf: <http://xmlns.com/foaf/0.1/> .
|
@@ -90,13 +133,18 @@ module R2RDF
|
|
90
133
|
EOF
|
91
134
|
end
|
92
135
|
|
93
|
-
def data_structure_definition(
|
136
|
+
def data_structure_definition(measures,dimensions,codes,var,options={})
|
94
137
|
var = sanitize([var]).first
|
95
138
|
options = defaults().merge(options)
|
139
|
+
rdf_measures, rdf_dimensions, rdf_codes = generate_resources(measures, dimensions, codes, options)
|
140
|
+
|
96
141
|
str = "ns:dsd-#{var} a qb:DataStructureDefinition;\n"
|
97
|
-
|
98
|
-
|
99
|
-
|
142
|
+
rdf_dimensions.map{|d|
|
143
|
+
str << " qb:component [ qb:dimension #{d} ] ;\n"
|
144
|
+
}
|
145
|
+
|
146
|
+
rdf_measures.map{|m|
|
147
|
+
str << " qb:component [ qb:measure #{m} ] ;\n"
|
100
148
|
}
|
101
149
|
str[-2]='.'
|
102
150
|
str<<"\n"
|
@@ -141,24 +189,34 @@ module R2RDF
|
|
141
189
|
|
142
190
|
def dimension_properties(dimensions, codes, var, options={})
|
143
191
|
options = defaults().merge(options)
|
192
|
+
rdf_measures, rdf_dimensions, rdf_codes = generate_resources([], dimensions, codes, options)
|
144
193
|
props = []
|
145
|
-
|
146
|
-
dimensions.map{|d|
|
147
|
-
if codes.include?(d)
|
148
|
-
props << <<-EOF.unindent
|
149
|
-
prop:#{d} a rdf:Property, qb:DimensionProperty ;
|
150
|
-
rdfs:label "#{d}"@en ;
|
151
|
-
qb:codeList code:#{d.downcase} ;
|
152
|
-
rdfs:range code:#{d.downcase.capitalize} .
|
153
194
|
|
154
|
-
|
155
|
-
|
156
|
-
|
157
|
-
|
158
|
-
|
195
|
+
dimension_codes = rdf_codes.map{|c|
|
196
|
+
if c[0]=~/^<http:/
|
197
|
+
c[0][1..-2]
|
198
|
+
else
|
199
|
+
c[0]
|
200
|
+
end
|
201
|
+
}
|
159
202
|
|
160
|
-
|
161
|
-
|
203
|
+
rdf_dimensions.each_with_index{|d,i|
|
204
|
+
if dimension_codes.include?(dimensions[i])
|
205
|
+
code = rdf_codes[dimension_codes.index(dimensions[i])]
|
206
|
+
props << <<-EOF.unindent
|
207
|
+
#{d} a rdf:Property, qb:DimensionProperty ;
|
208
|
+
rdfs:label "#{strip_prefixes(strip_uri(d))}"@en ;
|
209
|
+
qb:codeList #{code[1]} ;
|
210
|
+
rdfs:range #{code[2]} .
|
211
|
+
|
212
|
+
EOF
|
213
|
+
else
|
214
|
+
props << <<-EOF.unindent
|
215
|
+
#{d} a rdf:Property, qb:DimensionProperty ;
|
216
|
+
rdfs:label "#{strip_prefixes(strip_uri(d))}"@en .
|
217
|
+
|
218
|
+
EOF
|
219
|
+
end
|
162
220
|
}
|
163
221
|
|
164
222
|
props
|
@@ -166,13 +224,14 @@ module R2RDF
|
|
166
224
|
|
167
225
|
def measure_properties(measures, var, options={})
|
168
226
|
options = defaults().merge(options)
|
227
|
+
rdf_measures = generate_resources(measures, [], [], options)[0]
|
169
228
|
props = []
|
170
229
|
|
171
|
-
|
230
|
+
rdf_measures.map{ |m|
|
172
231
|
|
173
232
|
props << <<-EOF.unindent
|
174
|
-
|
175
|
-
rdfs:label "#{m}"@en .
|
233
|
+
#{m} a rdf:Property, qb:MeasureProperty ;
|
234
|
+
rdfs:label "#{strip_prefixes(strip_uri(m))}"@en .
|
176
235
|
|
177
236
|
EOF
|
178
237
|
}
|
@@ -183,7 +242,18 @@ module R2RDF
|
|
183
242
|
def observations(measures, dimensions, codes, data, observation_labels, var, options={})
|
184
243
|
var = sanitize([var]).first
|
185
244
|
options = defaults().merge(options)
|
245
|
+
rdf_measures, rdf_dimensions, rdf_codes = generate_resources(measures, dimensions, codes, options)
|
246
|
+
data = encode_data(codes, data, var, options)
|
186
247
|
obs = []
|
248
|
+
|
249
|
+
dimension_codes = rdf_codes.map{|c|
|
250
|
+
if c[0]=~/^<http:/
|
251
|
+
c[0][1..-2]
|
252
|
+
else
|
253
|
+
c[0]
|
254
|
+
end
|
255
|
+
}
|
256
|
+
|
187
257
|
observation_labels.each_with_index.map{|r, i|
|
188
258
|
contains_nulls = false
|
189
259
|
str = <<-EOF.unindent
|
@@ -193,48 +263,64 @@ module R2RDF
|
|
193
263
|
|
194
264
|
str << " rdfs:label \"#{r}\" ;\n" unless options[:no_labels]
|
195
265
|
|
196
|
-
dimensions.
|
266
|
+
dimensions.each_with_index{|d,j|
|
197
267
|
contains_nulls = contains_nulls | (data[d][i] == nil)
|
198
|
-
|
199
|
-
|
268
|
+
|
269
|
+
if dimension_codes.include? d
|
270
|
+
# str << " #{rdf_dimensions[j]} <code/#{d.downcase}/#{data[d][i]}> ;\n"
|
271
|
+
str << " #{rdf_dimensions[j]} #{to_resource(data[d][i], options)} ;\n"
|
200
272
|
else
|
201
|
-
str << "
|
273
|
+
str << " #{rdf_dimensions[j]} #{to_literal(data[d][i], options)} ;\n"
|
202
274
|
end
|
203
275
|
}
|
204
276
|
|
205
|
-
measures.
|
277
|
+
measures.each_with_index{|m,j|
|
206
278
|
contains_nulls = contains_nulls | (data[m][i] == nil)
|
207
|
-
str << "
|
279
|
+
str << " #{rdf_measures[j]} #{to_literal(data[m][i], options)} ;\n"
|
208
280
|
|
209
281
|
}
|
210
282
|
|
211
283
|
str << " .\n\n"
|
212
|
-
|
213
|
-
|
284
|
+
if contains_nulls && !options[:encode_nulls]
|
285
|
+
if options[:raise_nils]
|
286
|
+
raise "missing component for observation, skipping: #{str}, "
|
287
|
+
elsif options[:whiny_nils]
|
288
|
+
puts "missing component for observation, skipping: #{str}, "
|
289
|
+
end
|
290
|
+
else
|
291
|
+
obs << str
|
292
|
+
end
|
214
293
|
}
|
215
294
|
obs
|
216
295
|
end
|
217
296
|
|
218
297
|
def code_lists(codes, data, var, options={})
|
219
298
|
options = defaults().merge(options)
|
299
|
+
rdf_measures, rdf_dimensions, rdf_codes = generate_resources([], [], codes, options)
|
300
|
+
data = encode_data(codes, data, var, options)
|
220
301
|
lists = []
|
221
|
-
|
302
|
+
rdf_codes.map{|code|
|
303
|
+
if code[0] =~ /^<.+>$/
|
304
|
+
refcode = code[0][1..-2]
|
305
|
+
else
|
306
|
+
refcode = code[0]
|
307
|
+
end
|
222
308
|
str = <<-EOF.unindent
|
223
|
-
|
309
|
+
#{code[2]} a rdfs:Class, owl:Class;
|
224
310
|
rdfs:subClassOf skos:Concept ;
|
225
|
-
rdfs:label "Code list for #{code} - codelist class"@en;
|
226
|
-
rdfs:comment "Specifies the #{code} for each observation";
|
227
|
-
rdfs:seeAlso
|
228
|
-
|
229
|
-
|
230
|
-
skos:prefLabel "Code list for #{code} - codelist scheme"@en;
|
231
|
-
rdfs:label "Code list for #{code} - codelist scheme"@en;
|
232
|
-
skos:notation "CL_#{code.upcase}";
|
233
|
-
skos:note "Specifies the #{code} for each observation";
|
311
|
+
rdfs:label "Code list for #{strip_prefixes(strip_uri(code[1]))} - codelist class"@en;
|
312
|
+
rdfs:comment "Specifies the #{strip_prefixes(strip_uri(code[1]))} for each observation";
|
313
|
+
rdfs:seeAlso #{code[1]} .
|
314
|
+
|
315
|
+
#{code[1]} a skos:ConceptScheme;
|
316
|
+
skos:prefLabel "Code list for #{strip_prefixes(strip_uri(code[1]))} - codelist scheme"@en;
|
317
|
+
rdfs:label "Code list for #{strip_prefixes(strip_uri(code[1]))} - codelist scheme"@en;
|
318
|
+
skos:notation "CL_#{strip_prefixes(strip_uri(code[1])).upcase}";
|
319
|
+
skos:note "Specifies the #{strip_prefixes(strip_uri(code[1]))} for each observation";
|
234
320
|
EOF
|
235
|
-
data[
|
321
|
+
data[refcode].uniq.map{|value|
|
236
322
|
unless value == nil && !options[:encode_nulls]
|
237
|
-
str << " skos:hasTopConcept
|
323
|
+
str << " skos:hasTopConcept #{to_resource(value,options)} ;\n"
|
238
324
|
end
|
239
325
|
}
|
240
326
|
|
@@ -248,15 +334,22 @@ module R2RDF
|
|
248
334
|
|
249
335
|
def concept_codes(codes, data, var, options={})
|
250
336
|
options = defaults().merge(options)
|
337
|
+
rdf_measures, rdf_dimensions, rdf_codes = generate_resources([], [], codes, options)
|
251
338
|
concepts = []
|
252
|
-
codes
|
253
|
-
|
339
|
+
data = encode_data(codes, data, var, options)
|
340
|
+
rdf_codes.map{|code|
|
341
|
+
if code[0] =~ /^<.+>$/
|
342
|
+
refcode = code[0][1..-2]
|
343
|
+
else
|
344
|
+
refcode = code[0]
|
345
|
+
end
|
346
|
+
data[refcode].uniq.each_with_index{|value,i|
|
254
347
|
unless value == nil && !options[:encode_nulls]
|
255
348
|
concepts << <<-EOF.unindent
|
256
|
-
|
257
|
-
skos:topConceptOf
|
258
|
-
skos:prefLabel "#{
|
259
|
-
skos:inScheme
|
349
|
+
#{to_resource(value,options)} a skos:Concept, #{code[2]};
|
350
|
+
skos:topConceptOf #{code[1]} ;
|
351
|
+
skos:prefLabel "#{strip_uri(data[refcode][i])}" ;
|
352
|
+
skos:inScheme #{code[1]} .
|
260
353
|
|
261
354
|
EOF
|
262
355
|
end
|
@@ -267,41 +360,9 @@ module R2RDF
|
|
267
360
|
end
|
268
361
|
|
269
362
|
|
270
|
-
def
|
271
|
-
|
272
|
-
|
273
|
-
#probably throw an error here since a missing resource is a bigger problem
|
274
|
-
obj = "NA" if obj.empty?
|
275
|
-
|
276
|
-
#TODO remove special characters (faster) as well (eg '?')
|
277
|
-
obj.gsub(' ','_').gsub('?','')
|
278
|
-
elsif obj == nil && options[:encode_nulls]
|
279
|
-
'"NA"'
|
280
|
-
elsif obj.is_a? Numeric
|
281
|
-
#resources cannot be referred to purely by integer (?)
|
282
|
-
"n"+obj.to_s
|
283
|
-
else
|
284
|
-
obj
|
285
|
-
end
|
286
|
-
end
|
287
|
-
|
288
|
-
def to_literal(obj, options)
|
289
|
-
if obj.is_a? String
|
290
|
-
# Depressing that there's no more elegant way to check if a string is
|
291
|
-
# a number...
|
292
|
-
if val = Integer(obj) rescue nil
|
293
|
-
val
|
294
|
-
elsif val = Float(obj) rescue nil
|
295
|
-
val
|
296
|
-
else
|
297
|
-
'"'+obj+'"'
|
298
|
-
end
|
299
|
-
elsif obj == nil && options[:encode_nulls]
|
300
|
-
#TODO decide the right way to handle missing values, since RDF has no null
|
301
|
-
'"NA"'
|
302
|
-
else
|
303
|
-
obj
|
304
|
-
end
|
363
|
+
def abbreviate_known(turtle_string)
|
364
|
+
#debug method
|
365
|
+
turtle_string.gsub(/<http:\/\/www\.rqtl\.org\/dc\/properties\/(\S+)>/, 'prop:\1').gsub(/<http:\/\/www.rqtl.org\/ns\/dc\/code\/(\S+)\/(\S+)>/, '<code/\1/\2>').gsub(/<http:\/\/www.rqtl.org\/dc\/dataset\/(\S+)\/code\/(\S+)>/, 'code:\2')
|
305
366
|
end
|
306
367
|
end
|
307
368
|
end
|