bio-publisci 0.0.2 → 0.0.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/.travis.yml +1 -1
- data/Gemfile +1 -1
- data/Rakefile +4 -6
- data/features/integration_steps.rb +1 -1
- data/features/metadata.feature +24 -0
- data/features/metadata_steps.rb +21 -0
- data/lib/bio-publisci/dataset/ORM/data_cube_orm.rb +36 -14
- data/lib/bio-publisci/dataset/ORM/observation.rb +1 -1
- data/lib/bio-publisci/dataset/data_cube.rb +192 -131
- data/lib/bio-publisci/dataset/dataset_for.rb +150 -0
- data/lib/bio-publisci/dataset/interactive.rb +70 -55
- data/lib/bio-publisci/metadata/metadata.rb +81 -8
- data/lib/bio-publisci/parser.rb +76 -1
- data/lib/bio-publisci/readers/big_cross.rb +118 -117
- data/lib/bio-publisci/readers/csv.rb +37 -2
- data/lib/bio-publisci/readers/r_matrix.rb +1 -1
- data/lib/bio-publisci/store.rb +31 -31
- data/lib/bio-publisci/writers/arff.rb +48 -49
- data/lib/bio-publisci.rb +3 -0
- data/resources/queries/code_resources.rq +10 -0
- data/resources/queries/dimension_ranges.rq +3 -3
- data/resources/queries/dimensions.rq +3 -3
- data/resources/queries/measures.rq +3 -3
- data/resources/queries/observation_labels.rq +8 -0
- data/resources/queries/properties.rq +8 -0
- data/scripts/islet_mlratio.rb +6 -0
- data/scripts/scan_islet.rb +6 -0
- data/scripts/update_reference.rb +20 -0
- data/spec/ORM/data_cube_orm_spec.rb +12 -0
- data/spec/data_cube_spec.rb +1 -1
- data/spec/generators/dataframe_spec.rb +1 -1
- data/spec/generators/r_matrix_spec.rb +1 -1
- data/spec/r_builder_spec.rb +6 -6
- data/spec/resource/.RData +0 -0
- data/spec/resource/example.Rhistory +3 -0
- data/spec/turtle/bacon +4 -22
- data/spec/turtle/reference +9 -27
- metadata +37 -56
- data/lib/bio-publisci/loader.rb +0 -36
- data/spec/bio-publisci_spec.rb +0 -7
checksums.yaml
ADDED
@@ -0,0 +1,7 @@
|
|
1
|
+
---
|
2
|
+
SHA1:
|
3
|
+
metadata.gz: f03476f5595b28e10a4cb3d950b9c454d621eb4d
|
4
|
+
data.tar.gz: da473d84d9c1e203de4ea1b2d7fef2579cfa8bf8
|
5
|
+
SHA512:
|
6
|
+
metadata.gz: 33662112c4df2115f15d59f415329dfa11368e8bd1a47907d62c02b476bbeb4afe0a1c43599d2c215a093d1765b7ffb05320220e21888267811c86beacef8876
|
7
|
+
data.tar.gz: 683b5874d082ab155227ee92a1e9bfe15a66b0eaf4fe1e590db39a9391346ec31850b0a94d924c9d26b356fb6ed7f82a228c9d16afa1a67d9c6710086e849963
|
data/.travis.yml
CHANGED
data/Gemfile
CHANGED
data/Rakefile
CHANGED
@@ -21,7 +21,8 @@ Jeweler::Tasks.new do |gem|
|
|
21
21
|
gem.description = %Q{A toolkit for publishing scientific results and datasets using RDF and related technologies }
|
22
22
|
gem.email = "wstrinz@gmail.com"
|
23
23
|
gem.authors = ["wstrinz"]
|
24
|
-
gem.version = "0.0.
|
24
|
+
gem.version = "0.0.3"
|
25
|
+
|
25
26
|
# dependencies defined in Gemfile
|
26
27
|
end
|
27
28
|
Jeweler::RubygemsDotOrgTasks.new
|
@@ -29,6 +30,7 @@ Jeweler::RubygemsDotOrgTasks.new
|
|
29
30
|
require 'rspec/core'
|
30
31
|
require 'rspec/core/rake_task'
|
31
32
|
RSpec::Core::RakeTask.new(:spec) do |spec|
|
33
|
+
spec.rspec_opts = "--tag ~no_travis"
|
32
34
|
spec.pattern = FileList['spec/**/*_spec.rb']
|
33
35
|
end
|
34
36
|
|
@@ -44,11 +46,7 @@ Cucumber::Rake::Task.new(:features)
|
|
44
46
|
# task :default => :spec
|
45
47
|
|
46
48
|
task :default => [] do
|
47
|
-
|
48
|
-
Rake::Task[:spec].invoke
|
49
|
-
rescue
|
50
|
-
end
|
51
|
-
Rake::Task[:features].invoke
|
49
|
+
Rake::Task[:spec].invoke
|
52
50
|
end
|
53
51
|
|
54
52
|
task :test => [] do
|
@@ -0,0 +1,24 @@
|
|
1
|
+
Feature: Receive metadata as user input or extract from data sources
|
2
|
+
|
3
|
+
In order to publish and share data about my datasets
|
4
|
+
I want to be able to attach metadata
|
5
|
+
|
6
|
+
Scenario: Attach basic DC Terms info
|
7
|
+
Given a class which includes the Metadata module
|
8
|
+
When I call its basic method with the hash {var: "example", title: "example dataset", creator: "Will Strinz", description: "an example dataset", date: "1-10-2010"}
|
9
|
+
Then I should receive a metadata string
|
10
|
+
|
11
|
+
Scenario: Auto Generate some fields
|
12
|
+
Given a class which includes the Metadata module
|
13
|
+
When I call its basic method with the hash {var: "example", title: "example dataset", description: "an example dataset"}
|
14
|
+
Then I should receive a metadata string
|
15
|
+
|
16
|
+
Scenario: Generate process information
|
17
|
+
Given a class which includes the Metadata module
|
18
|
+
When I call its provenance method with the hash {var: "example", software: {name: "R", process: 'spec/resource/example.Rhistory'}}
|
19
|
+
Then I should receive a metadata string
|
20
|
+
|
21
|
+
Scenario: Generate organizational provenance information
|
22
|
+
Given a class which includes the Metadata module
|
23
|
+
When I call its provenance method with the hash {var: "example", creator: "http://gsocsemantic.wordpress.com/me", organization: "http://sciruby.com/"}
|
24
|
+
Then I should receive a metadata string
|
@@ -0,0 +1,21 @@
|
|
1
|
+
Given(/^a class which includes the Metadata module$/) do
|
2
|
+
class Meta
|
3
|
+
include R2RDF::Metadata
|
4
|
+
end
|
5
|
+
@klass = Meta
|
6
|
+
end
|
7
|
+
|
8
|
+
When(/^I call its basic method with the hash (\{.+\})$/) do |fields|
|
9
|
+
fields = eval(fields)
|
10
|
+
@response = @klass.new.basic(fields)
|
11
|
+
end
|
12
|
+
|
13
|
+
When(/^I call its provenance method with the hash (\{.+\})$/) do |fields|
|
14
|
+
fields = eval(fields)
|
15
|
+
@response = @klass.new.provenance(fields)
|
16
|
+
end
|
17
|
+
|
18
|
+
Then(/^I should receive a metadata string$/) do
|
19
|
+
@response.is_a?(String).should be true
|
20
|
+
puts @response
|
21
|
+
end
|
@@ -1,5 +1,5 @@
|
|
1
1
|
module R2RDF
|
2
|
-
|
2
|
+
class Dataset
|
3
3
|
module ORM
|
4
4
|
class DataCube
|
5
5
|
extend R2RDF::Dataset::DataCube
|
@@ -43,25 +43,41 @@ module R2RDF
|
|
43
43
|
if solution[:range].split('/')[-2] == "code"
|
44
44
|
type = :coded
|
45
45
|
else
|
46
|
-
type =
|
46
|
+
type = solution[:range].to_s
|
47
47
|
end
|
48
|
-
[
|
48
|
+
[solution[:dimension], {type: type}]
|
49
49
|
}]
|
50
50
|
puts "dimensions: #{dimensions}" if verbose
|
51
|
-
|
52
|
-
|
51
|
+
|
52
|
+
codes = execute_from_file('code_resources.rq',graph).to_h.map{|sol|
|
53
|
+
[sol[:dimension].to_s, sol[:codeList].to_s, sol[:class].to_s]
|
54
|
+
}
|
55
|
+
puts "codes: #{codes}" if verbose
|
56
|
+
|
57
|
+
measures = execute_from_file('measures.rq',graph).to_h.map{|m| m[:measure].to_s}
|
58
|
+
puts "measures: #{measures}" if verbose
|
59
|
+
|
53
60
|
name = execute_from_file('dataset.rq',graph).to_h.first[:label]
|
54
61
|
puts "dataset: #{name}" if verbose
|
55
|
-
|
56
|
-
|
57
|
-
|
58
|
-
|
62
|
+
|
63
|
+
obs = execute_from_file('observations.rq',graph)
|
64
|
+
observations = observation_hash(obs)
|
65
|
+
puts "observations: #{observations}" if verbose
|
66
|
+
|
67
|
+
# simple_observations = observation_hash(obs,true)
|
68
|
+
|
69
|
+
labels = execute_from_file('observation_labels.rq', graph)
|
70
|
+
labels = Hash[labels.map{|sol|
|
71
|
+
[sol[:observation].to_s, sol[:label].to_s]
|
72
|
+
}]
|
59
73
|
|
60
74
|
new_opts = {
|
61
75
|
measures: measures,
|
62
76
|
dimensions: dimensions,
|
63
|
-
observations:
|
77
|
+
observations: observations.values,
|
64
78
|
name: name,
|
79
|
+
labels: labels.values,
|
80
|
+
codes: codes
|
65
81
|
}
|
66
82
|
|
67
83
|
options = options.merge(new_opts)
|
@@ -96,6 +112,14 @@ module R2RDF
|
|
96
112
|
if options[:validate_each]
|
97
113
|
@options[:validate_each] = options[:validate_each]
|
98
114
|
end
|
115
|
+
|
116
|
+
if options[:labels]
|
117
|
+
@labels = options[:labels]
|
118
|
+
end
|
119
|
+
|
120
|
+
if options[:codes]
|
121
|
+
@codes = options[:codes]
|
122
|
+
end
|
99
123
|
end
|
100
124
|
|
101
125
|
def to_n3
|
@@ -120,10 +144,8 @@ module R2RDF
|
|
120
144
|
}
|
121
145
|
|
122
146
|
|
123
|
-
codes = @dimensions.map{|d,v| d if v[:type] == :coded}.compact
|
124
|
-
|
125
|
-
|
126
|
-
str = generate(@measures, @dimensions.keys, codes, data, @labels, @name, @generator_options)
|
147
|
+
@codes = @dimensions.map{|d,v| d if v[:type] == :coded}.compact unless @codes
|
148
|
+
str = generate(@measures, @dimensions.keys, @codes, data, @labels, @name, @generator_options)
|
127
149
|
unless @options[:skip_metadata]
|
128
150
|
fields = {
|
129
151
|
publishers: publishers(),
|
@@ -1,68 +1,112 @@
|
|
1
1
|
#monkey patch to make rdf string w/ heredocs prettier ;)
|
2
|
-
|
3
|
-
|
4
|
-
|
5
|
-
# gsub(/^#{scan(/^\s*/).min_by{|l|l.length}}/, "")
|
6
|
-
end
|
2
|
+
class String
|
3
|
+
def unindent
|
4
|
+
gsub /^#{self[/\A\s*/]}/, ''
|
7
5
|
end
|
6
|
+
end
|
8
7
|
|
9
8
|
module R2RDF
|
10
|
-
|
11
|
-
module Dataset
|
9
|
+
class Dataset
|
12
10
|
module DataCube
|
11
|
+
include R2RDF::Parser
|
13
12
|
def defaults
|
14
13
|
{
|
15
14
|
type: :dataframe,
|
16
15
|
encode_nulls: false,
|
17
16
|
base_url: "http://www.rqtl.org",
|
18
17
|
}
|
19
|
-
|
20
|
-
|
21
|
-
def generate(measures, dimensions, codes, data, observation_labels, var, options={})
|
22
|
-
dimensions = sanitize(dimensions)
|
23
|
-
codes = sanitize(codes)
|
24
|
-
measures = sanitize(measures)
|
25
|
-
var = sanitize([var]).first
|
26
|
-
data = sanitize_hash(data)
|
27
|
-
|
28
|
-
str = prefixes(var,options)
|
29
|
-
str << data_structure_definition((measures | dimensions), var, options)
|
30
|
-
str << dataset(var, options)
|
31
|
-
component_specifications(measures, dimensions, var, options).map{ |c| str << c }
|
32
|
-
dimension_properties(dimensions, codes, var, options).map{|p| str << p}
|
33
|
-
measure_properties(measures, var, options).map{|p| str << p}
|
34
|
-
code_lists(codes, data, var, options).map{|l| str << l}
|
35
|
-
concept_codes(codes, data, var, options).map{|c| str << c}
|
36
|
-
observations(measures, dimensions, codes, data, observation_labels, var, options).map{|o| str << o}
|
37
|
-
str
|
38
|
-
end
|
18
|
+
end
|
39
19
|
|
40
|
-
def
|
41
|
-
|
42
|
-
|
43
|
-
|
44
|
-
|
45
|
-
|
20
|
+
def generate_resources(measures, dimensions, codes, options={})
|
21
|
+
newm = measures.map {|m|
|
22
|
+
if m =~ /^http:\/\//
|
23
|
+
"<#{m}>"
|
24
|
+
elsif m =~ /^[a-zA-z]+:[a-zA-z]+$/
|
25
|
+
m
|
46
26
|
else
|
47
|
-
|
48
|
-
end
|
27
|
+
"prop:#{m}"
|
28
|
+
end
|
49
29
|
}
|
50
|
-
processed
|
51
|
-
end
|
52
30
|
|
53
|
-
|
54
|
-
|
55
|
-
|
56
|
-
|
57
|
-
|
58
|
-
|
31
|
+
newc = []
|
32
|
+
|
33
|
+
newd = dimensions.map{|d|
|
34
|
+
if d =~ /^http:\/\//
|
35
|
+
# newc << "<#{d}>" if codes.include? d
|
36
|
+
"<#{d}>"
|
37
|
+
elsif d =~ /^[a-zA-z]+:[a-zA-z]+$/
|
38
|
+
d
|
39
|
+
else
|
40
|
+
# newc << "prop:#{d}" if codes.include? d
|
41
|
+
"prop:#{d}"
|
42
|
+
end
|
59
43
|
}
|
60
44
|
|
61
|
-
|
62
|
-
|
45
|
+
if codes.first.is_a? Array
|
46
|
+
newc = codes.map{|c|
|
47
|
+
c.map{|el|
|
48
|
+
if el =~ /^http:\/\//
|
49
|
+
"<#{el}>"
|
50
|
+
else
|
51
|
+
el
|
52
|
+
end
|
53
|
+
}
|
54
|
+
}
|
55
|
+
else
|
56
|
+
newc = codes.map{|c|
|
57
|
+
["#{c}","code:#{c.downcase}","code:#{c.downcase.capitalize}"]
|
58
|
+
}
|
59
|
+
end
|
60
|
+
[newm, newd, newc]
|
61
|
+
end
|
62
|
+
|
63
|
+
def encode_data(codes,data,var,options={})
|
64
|
+
new_data = {}
|
65
|
+
data.map{|k,v|
|
66
|
+
if codes.include? k
|
67
|
+
new_data[k] = v.map{|val|
|
68
|
+
if val =~ /^http:\/\//
|
69
|
+
"<#{val}>"
|
70
|
+
elsif val =~ /^[a-zA-z]+:[a-zA-z]+$/
|
71
|
+
val
|
72
|
+
else
|
73
|
+
"<code/#{k.downcase}/#{val}>"
|
74
|
+
end
|
75
|
+
}
|
76
|
+
else
|
77
|
+
new_data[k] = v
|
78
|
+
end
|
63
79
|
}
|
80
|
+
new_data
|
81
|
+
end
|
64
82
|
|
65
|
-
|
83
|
+
def vocabulary(vocab,options={})
|
84
|
+
if vocab.is_a?(String) && vocab =~ /^http:\/\//
|
85
|
+
RDF::Vocabulary.new(vocab)
|
86
|
+
elsif RDF.const_defined? vocab.to_sym && RDF.const_get(vocab.to_sym).inspect =~ /^RDF::Vocabulary/
|
87
|
+
RDF.const_get(vocab)
|
88
|
+
else
|
89
|
+
nil
|
90
|
+
end
|
91
|
+
end
|
92
|
+
|
93
|
+
def generate(measures, dimensions, codes, data, observation_labels, var, options={})
|
94
|
+
# dimensions = sanitize(dimensions)
|
95
|
+
# codes = sanitize(codes)
|
96
|
+
# measures = sanitize(measures)
|
97
|
+
var = sanitize([var]).first
|
98
|
+
data = sanitize_hash(data)
|
99
|
+
|
100
|
+
str = prefixes(var,options)
|
101
|
+
str << data_structure_definition(measures, dimensions, codes, var, options)
|
102
|
+
str << dataset(var, options)
|
103
|
+
# component_specifications(measures, dimensions, var, options).map{ |c| str << c }
|
104
|
+
dimension_properties(dimensions, codes, var, options).map{|p| str << p}
|
105
|
+
measure_properties(measures, var, options).map{|p| str << p}
|
106
|
+
code_lists(codes, data, var, options).map{|l| str << l}
|
107
|
+
concept_codes(codes, data, var, options).map{|c| str << c}
|
108
|
+
observations(measures, dimensions, codes, data, observation_labels, var, options).map{|o| str << o}
|
109
|
+
str
|
66
110
|
end
|
67
111
|
|
68
112
|
def prefixes(var, options={})
|
@@ -80,7 +124,6 @@ module R2RDF
|
|
80
124
|
@prefix xsd: <http://www.w3.org/2001/XMLSchema#> .
|
81
125
|
@prefix cs: <#{base}/dc/dataset/#{var}/cs/> .
|
82
126
|
@prefix code: <#{base}/dc/dataset/#{var}/code/> .
|
83
|
-
@prefix class: <#{base}/dc/dataset/#{var}/class/> .
|
84
127
|
@prefix owl: <http://www.w3.org/2002/07/owl#> .
|
85
128
|
@prefix skos: <http://www.w3.org/2004/02/skos/core#> .
|
86
129
|
@prefix foaf: <http://xmlns.com/foaf/0.1/> .
|
@@ -90,13 +133,18 @@ module R2RDF
|
|
90
133
|
EOF
|
91
134
|
end
|
92
135
|
|
93
|
-
def data_structure_definition(
|
136
|
+
def data_structure_definition(measures,dimensions,codes,var,options={})
|
94
137
|
var = sanitize([var]).first
|
95
138
|
options = defaults().merge(options)
|
139
|
+
rdf_measures, rdf_dimensions, rdf_codes = generate_resources(measures, dimensions, codes, options)
|
140
|
+
|
96
141
|
str = "ns:dsd-#{var} a qb:DataStructureDefinition;\n"
|
97
|
-
|
98
|
-
|
99
|
-
|
142
|
+
rdf_dimensions.map{|d|
|
143
|
+
str << " qb:component [ qb:dimension #{d} ] ;\n"
|
144
|
+
}
|
145
|
+
|
146
|
+
rdf_measures.map{|m|
|
147
|
+
str << " qb:component [ qb:measure #{m} ] ;\n"
|
100
148
|
}
|
101
149
|
str[-2]='.'
|
102
150
|
str<<"\n"
|
@@ -141,24 +189,34 @@ module R2RDF
|
|
141
189
|
|
142
190
|
def dimension_properties(dimensions, codes, var, options={})
|
143
191
|
options = defaults().merge(options)
|
192
|
+
rdf_measures, rdf_dimensions, rdf_codes = generate_resources([], dimensions, codes, options)
|
144
193
|
props = []
|
145
|
-
|
146
|
-
dimensions.map{|d|
|
147
|
-
if codes.include?(d)
|
148
|
-
props << <<-EOF.unindent
|
149
|
-
prop:#{d} a rdf:Property, qb:DimensionProperty ;
|
150
|
-
rdfs:label "#{d}"@en ;
|
151
|
-
qb:codeList code:#{d.downcase} ;
|
152
|
-
rdfs:range code:#{d.downcase.capitalize} .
|
153
194
|
|
154
|
-
|
155
|
-
|
156
|
-
|
157
|
-
|
158
|
-
|
195
|
+
dimension_codes = rdf_codes.map{|c|
|
196
|
+
if c[0]=~/^<http:/
|
197
|
+
c[0][1..-2]
|
198
|
+
else
|
199
|
+
c[0]
|
200
|
+
end
|
201
|
+
}
|
159
202
|
|
160
|
-
|
161
|
-
|
203
|
+
rdf_dimensions.each_with_index{|d,i|
|
204
|
+
if dimension_codes.include?(dimensions[i])
|
205
|
+
code = rdf_codes[dimension_codes.index(dimensions[i])]
|
206
|
+
props << <<-EOF.unindent
|
207
|
+
#{d} a rdf:Property, qb:DimensionProperty ;
|
208
|
+
rdfs:label "#{strip_prefixes(strip_uri(d))}"@en ;
|
209
|
+
qb:codeList #{code[1]} ;
|
210
|
+
rdfs:range #{code[2]} .
|
211
|
+
|
212
|
+
EOF
|
213
|
+
else
|
214
|
+
props << <<-EOF.unindent
|
215
|
+
#{d} a rdf:Property, qb:DimensionProperty ;
|
216
|
+
rdfs:label "#{strip_prefixes(strip_uri(d))}"@en .
|
217
|
+
|
218
|
+
EOF
|
219
|
+
end
|
162
220
|
}
|
163
221
|
|
164
222
|
props
|
@@ -166,13 +224,14 @@ module R2RDF
|
|
166
224
|
|
167
225
|
def measure_properties(measures, var, options={})
|
168
226
|
options = defaults().merge(options)
|
227
|
+
rdf_measures = generate_resources(measures, [], [], options)[0]
|
169
228
|
props = []
|
170
229
|
|
171
|
-
|
230
|
+
rdf_measures.map{ |m|
|
172
231
|
|
173
232
|
props << <<-EOF.unindent
|
174
|
-
|
175
|
-
rdfs:label "#{m}"@en .
|
233
|
+
#{m} a rdf:Property, qb:MeasureProperty ;
|
234
|
+
rdfs:label "#{strip_prefixes(strip_uri(m))}"@en .
|
176
235
|
|
177
236
|
EOF
|
178
237
|
}
|
@@ -183,7 +242,18 @@ module R2RDF
|
|
183
242
|
def observations(measures, dimensions, codes, data, observation_labels, var, options={})
|
184
243
|
var = sanitize([var]).first
|
185
244
|
options = defaults().merge(options)
|
245
|
+
rdf_measures, rdf_dimensions, rdf_codes = generate_resources(measures, dimensions, codes, options)
|
246
|
+
data = encode_data(codes, data, var, options)
|
186
247
|
obs = []
|
248
|
+
|
249
|
+
dimension_codes = rdf_codes.map{|c|
|
250
|
+
if c[0]=~/^<http:/
|
251
|
+
c[0][1..-2]
|
252
|
+
else
|
253
|
+
c[0]
|
254
|
+
end
|
255
|
+
}
|
256
|
+
|
187
257
|
observation_labels.each_with_index.map{|r, i|
|
188
258
|
contains_nulls = false
|
189
259
|
str = <<-EOF.unindent
|
@@ -193,48 +263,64 @@ module R2RDF
|
|
193
263
|
|
194
264
|
str << " rdfs:label \"#{r}\" ;\n" unless options[:no_labels]
|
195
265
|
|
196
|
-
dimensions.
|
266
|
+
dimensions.each_with_index{|d,j|
|
197
267
|
contains_nulls = contains_nulls | (data[d][i] == nil)
|
198
|
-
|
199
|
-
|
268
|
+
|
269
|
+
if dimension_codes.include? d
|
270
|
+
# str << " #{rdf_dimensions[j]} <code/#{d.downcase}/#{data[d][i]}> ;\n"
|
271
|
+
str << " #{rdf_dimensions[j]} #{to_resource(data[d][i], options)} ;\n"
|
200
272
|
else
|
201
|
-
str << "
|
273
|
+
str << " #{rdf_dimensions[j]} #{to_literal(data[d][i], options)} ;\n"
|
202
274
|
end
|
203
275
|
}
|
204
276
|
|
205
|
-
measures.
|
277
|
+
measures.each_with_index{|m,j|
|
206
278
|
contains_nulls = contains_nulls | (data[m][i] == nil)
|
207
|
-
str << "
|
279
|
+
str << " #{rdf_measures[j]} #{to_literal(data[m][i], options)} ;\n"
|
208
280
|
|
209
281
|
}
|
210
282
|
|
211
283
|
str << " .\n\n"
|
212
|
-
|
213
|
-
|
284
|
+
if contains_nulls && !options[:encode_nulls]
|
285
|
+
if options[:raise_nils]
|
286
|
+
raise "missing component for observation, skipping: #{str}, "
|
287
|
+
elsif options[:whiny_nils]
|
288
|
+
puts "missing component for observation, skipping: #{str}, "
|
289
|
+
end
|
290
|
+
else
|
291
|
+
obs << str
|
292
|
+
end
|
214
293
|
}
|
215
294
|
obs
|
216
295
|
end
|
217
296
|
|
218
297
|
def code_lists(codes, data, var, options={})
|
219
298
|
options = defaults().merge(options)
|
299
|
+
rdf_measures, rdf_dimensions, rdf_codes = generate_resources([], [], codes, options)
|
300
|
+
data = encode_data(codes, data, var, options)
|
220
301
|
lists = []
|
221
|
-
|
302
|
+
rdf_codes.map{|code|
|
303
|
+
if code[0] =~ /^<.+>$/
|
304
|
+
refcode = code[0][1..-2]
|
305
|
+
else
|
306
|
+
refcode = code[0]
|
307
|
+
end
|
222
308
|
str = <<-EOF.unindent
|
223
|
-
|
309
|
+
#{code[2]} a rdfs:Class, owl:Class;
|
224
310
|
rdfs:subClassOf skos:Concept ;
|
225
|
-
rdfs:label "Code list for #{code} - codelist class"@en;
|
226
|
-
rdfs:comment "Specifies the #{code} for each observation";
|
227
|
-
rdfs:seeAlso
|
228
|
-
|
229
|
-
|
230
|
-
skos:prefLabel "Code list for #{code} - codelist scheme"@en;
|
231
|
-
rdfs:label "Code list for #{code} - codelist scheme"@en;
|
232
|
-
skos:notation "CL_#{code.upcase}";
|
233
|
-
skos:note "Specifies the #{code} for each observation";
|
311
|
+
rdfs:label "Code list for #{strip_prefixes(strip_uri(code[1]))} - codelist class"@en;
|
312
|
+
rdfs:comment "Specifies the #{strip_prefixes(strip_uri(code[1]))} for each observation";
|
313
|
+
rdfs:seeAlso #{code[1]} .
|
314
|
+
|
315
|
+
#{code[1]} a skos:ConceptScheme;
|
316
|
+
skos:prefLabel "Code list for #{strip_prefixes(strip_uri(code[1]))} - codelist scheme"@en;
|
317
|
+
rdfs:label "Code list for #{strip_prefixes(strip_uri(code[1]))} - codelist scheme"@en;
|
318
|
+
skos:notation "CL_#{strip_prefixes(strip_uri(code[1])).upcase}";
|
319
|
+
skos:note "Specifies the #{strip_prefixes(strip_uri(code[1]))} for each observation";
|
234
320
|
EOF
|
235
|
-
data[
|
321
|
+
data[refcode].uniq.map{|value|
|
236
322
|
unless value == nil && !options[:encode_nulls]
|
237
|
-
str << " skos:hasTopConcept
|
323
|
+
str << " skos:hasTopConcept #{to_resource(value,options)} ;\n"
|
238
324
|
end
|
239
325
|
}
|
240
326
|
|
@@ -248,15 +334,22 @@ module R2RDF
|
|
248
334
|
|
249
335
|
def concept_codes(codes, data, var, options={})
|
250
336
|
options = defaults().merge(options)
|
337
|
+
rdf_measures, rdf_dimensions, rdf_codes = generate_resources([], [], codes, options)
|
251
338
|
concepts = []
|
252
|
-
codes
|
253
|
-
|
339
|
+
data = encode_data(codes, data, var, options)
|
340
|
+
rdf_codes.map{|code|
|
341
|
+
if code[0] =~ /^<.+>$/
|
342
|
+
refcode = code[0][1..-2]
|
343
|
+
else
|
344
|
+
refcode = code[0]
|
345
|
+
end
|
346
|
+
data[refcode].uniq.each_with_index{|value,i|
|
254
347
|
unless value == nil && !options[:encode_nulls]
|
255
348
|
concepts << <<-EOF.unindent
|
256
|
-
|
257
|
-
skos:topConceptOf
|
258
|
-
skos:prefLabel "#{
|
259
|
-
skos:inScheme
|
349
|
+
#{to_resource(value,options)} a skos:Concept, #{code[2]};
|
350
|
+
skos:topConceptOf #{code[1]} ;
|
351
|
+
skos:prefLabel "#{strip_uri(data[refcode][i])}" ;
|
352
|
+
skos:inScheme #{code[1]} .
|
260
353
|
|
261
354
|
EOF
|
262
355
|
end
|
@@ -267,41 +360,9 @@ module R2RDF
|
|
267
360
|
end
|
268
361
|
|
269
362
|
|
270
|
-
def
|
271
|
-
|
272
|
-
|
273
|
-
#probably throw an error here since a missing resource is a bigger problem
|
274
|
-
obj = "NA" if obj.empty?
|
275
|
-
|
276
|
-
#TODO remove special characters (faster) as well (eg '?')
|
277
|
-
obj.gsub(' ','_').gsub('?','')
|
278
|
-
elsif obj == nil && options[:encode_nulls]
|
279
|
-
'"NA"'
|
280
|
-
elsif obj.is_a? Numeric
|
281
|
-
#resources cannot be referred to purely by integer (?)
|
282
|
-
"n"+obj.to_s
|
283
|
-
else
|
284
|
-
obj
|
285
|
-
end
|
286
|
-
end
|
287
|
-
|
288
|
-
def to_literal(obj, options)
|
289
|
-
if obj.is_a? String
|
290
|
-
# Depressing that there's no more elegant way to check if a string is
|
291
|
-
# a number...
|
292
|
-
if val = Integer(obj) rescue nil
|
293
|
-
val
|
294
|
-
elsif val = Float(obj) rescue nil
|
295
|
-
val
|
296
|
-
else
|
297
|
-
'"'+obj+'"'
|
298
|
-
end
|
299
|
-
elsif obj == nil && options[:encode_nulls]
|
300
|
-
#TODO decide the right way to handle missing values, since RDF has no null
|
301
|
-
'"NA"'
|
302
|
-
else
|
303
|
-
obj
|
304
|
-
end
|
363
|
+
def abbreviate_known(turtle_string)
|
364
|
+
#debug method
|
365
|
+
turtle_string.gsub(/<http:\/\/www\.rqtl\.org\/dc\/properties\/(\S+)>/, 'prop:\1').gsub(/<http:\/\/www.rqtl.org\/ns\/dc\/code\/(\S+)\/(\S+)>/, '<code/\1/\2>').gsub(/<http:\/\/www.rqtl.org\/dc\/dataset\/(\S+)\/code\/(\S+)>/, 'code:\2')
|
305
366
|
end
|
306
367
|
end
|
307
368
|
end
|