publisci 0.1.2
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/.document +5 -0
- data/.rspec +1 -0
- data/.travis.yml +13 -0
- data/Gemfile +36 -0
- data/LICENSE.txt +20 -0
- data/README.md +51 -0
- data/README.rdoc +48 -0
- data/Rakefile +68 -0
- data/bin/bio-publisci +106 -0
- data/bin/bio-publisci-server +50 -0
- data/examples/bio-band_integration.rb +9 -0
- data/examples/no_magic.prov +58 -0
- data/examples/no_magic.rb +58 -0
- data/examples/orm.prov +48 -0
- data/examples/primer-full.prov +120 -0
- data/examples/primer.prov +66 -0
- data/examples/prov_dsl.prov +85 -0
- data/examples/safe_gen.rb +7 -0
- data/examples/visualization/primer.prov +66 -0
- data/examples/visualization/prov_viz.rb +140 -0
- data/examples/visualization/viz.rb +35 -0
- data/features/create_generator.feature +21 -0
- data/features/integration.feature +12 -0
- data/features/integration_steps.rb +10 -0
- data/features/metadata.feature +37 -0
- data/features/metadata_steps.rb +40 -0
- data/features/orm.feature +60 -0
- data/features/orm_steps.rb +74 -0
- data/features/prov_dsl.feature +14 -0
- data/features/prov_dsl_steps.rb +11 -0
- data/features/reader.feature +25 -0
- data/features/reader_steps.rb +61 -0
- data/features/step_definitions/bio-publisci_steps.rb +0 -0
- data/features/store.feature +27 -0
- data/features/store_steps.rb +42 -0
- data/features/support/env.rb +13 -0
- data/features/writer.feature +14 -0
- data/features/writer_steps.rb +24 -0
- data/lib/bio-publisci.rb +64 -0
- data/lib/bio-publisci/analyzer.rb +57 -0
- data/lib/bio-publisci/datacube_model.rb +111 -0
- data/lib/bio-publisci/dataset/ORM/data_cube_orm.rb +240 -0
- data/lib/bio-publisci/dataset/ORM/observation.rb +20 -0
- data/lib/bio-publisci/dataset/configuration.rb +31 -0
- data/lib/bio-publisci/dataset/data_cube.rb +418 -0
- data/lib/bio-publisci/dataset/dataset.rb +11 -0
- data/lib/bio-publisci/dataset/dataset_for.rb +186 -0
- data/lib/bio-publisci/dataset/interactive.rb +72 -0
- data/lib/bio-publisci/dsl/config.rb +34 -0
- data/lib/bio-publisci/dsl/dataset_dsl.rb +93 -0
- data/lib/bio-publisci/dsl/dsl.rb +72 -0
- data/lib/bio-publisci/dsl/metadata_dsl.rb +85 -0
- data/lib/bio-publisci/dsl/prov_dsl.rb +143 -0
- data/lib/bio-publisci/metadata/generator.rb +323 -0
- data/lib/bio-publisci/metadata/metadata.rb +5 -0
- data/lib/bio-publisci/metadata/metadata_model.rb +25 -0
- data/lib/bio-publisci/metadata/prov/activity.rb +88 -0
- data/lib/bio-publisci/metadata/prov/agent.rb +100 -0
- data/lib/bio-publisci/metadata/prov/association.rb +107 -0
- data/lib/bio-publisci/metadata/prov/config.rb +34 -0
- data/lib/bio-publisci/metadata/prov/derivation.rb +60 -0
- data/lib/bio-publisci/metadata/prov/element.rb +120 -0
- data/lib/bio-publisci/metadata/prov/entity.rb +64 -0
- data/lib/bio-publisci/metadata/prov/model/prov_models.rb +109 -0
- data/lib/bio-publisci/metadata/prov/plan.rb +32 -0
- data/lib/bio-publisci/metadata/prov/prov.rb +78 -0
- data/lib/bio-publisci/metadata/prov/role.rb +40 -0
- data/lib/bio-publisci/metadata/prov/usage.rb +64 -0
- data/lib/bio-publisci/metadata/publisher.rb +25 -0
- data/lib/bio-publisci/mixins/custom_predicate.rb +38 -0
- data/lib/bio-publisci/mixins/dereferencable.rb +34 -0
- data/lib/bio-publisci/mixins/registry.rb +27 -0
- data/lib/bio-publisci/mixins/vocabulary.rb +8 -0
- data/lib/bio-publisci/output.rb +27 -0
- data/lib/bio-publisci/parser.rb +266 -0
- data/lib/bio-publisci/post_processor.rb +95 -0
- data/lib/bio-publisci/query/query_helper.rb +123 -0
- data/lib/bio-publisci/r_client.rb +54 -0
- data/lib/bio-publisci/readers/arff.rb +49 -0
- data/lib/bio-publisci/readers/base.rb +57 -0
- data/lib/bio-publisci/readers/csv.rb +88 -0
- data/lib/bio-publisci/readers/dataframe.rb +67 -0
- data/lib/bio-publisci/readers/maf.rb +199 -0
- data/lib/bio-publisci/readers/r_cross.rb +112 -0
- data/lib/bio-publisci/readers/r_matrix.rb +176 -0
- data/lib/bio-publisci/store.rb +56 -0
- data/lib/bio-publisci/writers/arff.rb +91 -0
- data/lib/bio-publisci/writers/base.rb +93 -0
- data/lib/bio-publisci/writers/csv.rb +31 -0
- data/lib/bio-publisci/writers/dataframe.rb +81 -0
- data/lib/bio-publisci/writers/json.rb +18 -0
- data/lib/r2rdf.rb +226 -0
- data/lib/template_bak.rb +12 -0
- data/lib/template_bak/publisci.rb +3 -0
- data/lib/vocabs/cc.rb +18 -0
- data/lib/vocabs/cert.rb +13 -0
- data/lib/vocabs/dc.rb +63 -0
- data/lib/vocabs/dc11.rb +23 -0
- data/lib/vocabs/doap.rb +45 -0
- data/lib/vocabs/exif.rb +168 -0
- data/lib/vocabs/foaf.rb +69 -0
- data/lib/vocabs/geo.rb +13 -0
- data/lib/vocabs/http.rb +26 -0
- data/lib/vocabs/ma.rb +78 -0
- data/lib/vocabs/owl.rb +59 -0
- data/lib/vocabs/rdfs.rb +17 -0
- data/lib/vocabs/rsa.rb +12 -0
- data/lib/vocabs/rss.rb +14 -0
- data/lib/vocabs/sioc.rb +93 -0
- data/lib/vocabs/skos.rb +36 -0
- data/lib/vocabs/wot.rb +21 -0
- data/lib/vocabs/xhtml.rb +9 -0
- data/lib/vocabs/xsd.rb +58 -0
- data/resources/maf_example.maf +10 -0
- data/resources/maf_rdf.ttl +1173 -0
- data/resources/primer.ttl +38 -0
- data/resources/queries/code_resources.rq +10 -0
- data/resources/queries/codes.rq +18 -0
- data/resources/queries/dataset.rq +7 -0
- data/resources/queries/dimension_ranges.rq +8 -0
- data/resources/queries/dimensions.rq +12 -0
- data/resources/queries/gene.rq +16 -0
- data/resources/queries/hugo_to_ensembl.rq +7 -0
- data/resources/queries/maf_column.rq +26 -0
- data/resources/queries/measures.rq +12 -0
- data/resources/queries/observation_labels.rq +8 -0
- data/resources/queries/observations.rq +13 -0
- data/resources/queries/patient.rq +11 -0
- data/resources/queries/patient_list.rq +11 -0
- data/resources/queries/patients_with_mutation.rq +18 -0
- data/resources/queries/properties.rq +8 -0
- data/resources/queries/test.rq +3 -0
- data/resources/weather.numeric.arff +28 -0
- data/scripts/get_gene_lengths.rb +50 -0
- data/scripts/islet_mlratio.rb +6 -0
- data/scripts/scan_islet.rb +6 -0
- data/scripts/update_reference.rb +25 -0
- data/server/helpers.rb +215 -0
- data/server/public/src-min-noconflict/LICENSE +24 -0
- data/server/public/src-min-noconflict/ace.js +11 -0
- data/server/public/src-min-noconflict/ext-chromevox.js +1 -0
- data/server/public/src-min-noconflict/ext-elastic_tabstops_lite.js +1 -0
- data/server/public/src-min-noconflict/ext-emmet.js +1 -0
- data/server/public/src-min-noconflict/ext-keybinding_menu.js +1 -0
- data/server/public/src-min-noconflict/ext-language_tools.js +1 -0
- data/server/public/src-min-noconflict/ext-modelist.js +1 -0
- data/server/public/src-min-noconflict/ext-old_ie.js +1 -0
- data/server/public/src-min-noconflict/ext-searchbox.js +1 -0
- data/server/public/src-min-noconflict/ext-settings_menu.js +1 -0
- data/server/public/src-min-noconflict/ext-spellcheck.js +1 -0
- data/server/public/src-min-noconflict/ext-split.js +1 -0
- data/server/public/src-min-noconflict/ext-static_highlight.js +1 -0
- data/server/public/src-min-noconflict/ext-statusbar.js +1 -0
- data/server/public/src-min-noconflict/ext-textarea.js +1 -0
- data/server/public/src-min-noconflict/ext-themelist.js +1 -0
- data/server/public/src-min-noconflict/ext-whitespace.js +1 -0
- data/server/public/src-min-noconflict/keybinding-emacs.js +1 -0
- data/server/public/src-min-noconflict/keybinding-vim.js +1 -0
- data/server/public/src-min-noconflict/mode-ruby.js +1 -0
- data/server/public/src-min-noconflict/snippets/ruby.js +1 -0
- data/server/public/src-min-noconflict/theme-twilight.js +1 -0
- data/server/public/src-min-noconflict/worker-coffee.js +1 -0
- data/server/public/src-min-noconflict/worker-css.js +1 -0
- data/server/public/src-min-noconflict/worker-javascript.js +1 -0
- data/server/public/src-min-noconflict/worker-json.js +1 -0
- data/server/public/src-min-noconflict/worker-lua.js +1 -0
- data/server/public/src-min-noconflict/worker-php.js +1 -0
- data/server/public/src-min-noconflict/worker-xquery.js +1 -0
- data/server/routes.rb +123 -0
- data/server/views/dsl.haml +65 -0
- data/server/views/dump.haml +3 -0
- data/server/views/import.haml +35 -0
- data/server/views/new_repository.haml +25 -0
- data/server/views/query.haml +28 -0
- data/server/views/repository.haml +25 -0
- data/spec/ORM/data_cube_orm_spec.rb +33 -0
- data/spec/ORM/prov_model_spec.rb +72 -0
- data/spec/analyzer_spec.rb +36 -0
- data/spec/bnode_spec.rb +66 -0
- data/spec/csv/bacon.csv +4 -0
- data/spec/csv/moar_bacon.csv +11 -0
- data/spec/data_cube_spec.rb +169 -0
- data/spec/dataset_for_spec.rb +77 -0
- data/spec/dsl_spec.rb +134 -0
- data/spec/generators/csv_spec.rb +44 -0
- data/spec/generators/dataframe_spec.rb +44 -0
- data/spec/generators/maf_spec.rb +40 -0
- data/spec/generators/r_cross_spec.rb +51 -0
- data/spec/generators/r_matrix_spec.rb +44 -0
- data/spec/length_lookup_spec.rb +0 -0
- data/spec/maf_query_spec.rb +343 -0
- data/spec/metadata/metadata_dsl_spec.rb +68 -0
- data/spec/prov/activity_spec.rb +74 -0
- data/spec/prov/agent_spec.rb +54 -0
- data/spec/prov/association_spec.rb +55 -0
- data/spec/prov/config_spec.rb +28 -0
- data/spec/prov/derivation_spec.rb +30 -0
- data/spec/prov/entity_spec.rb +52 -0
- data/spec/prov/role_spec.rb +94 -0
- data/spec/prov/usage_spec.rb +98 -0
- data/spec/queries/integrity/1.rq +21 -0
- data/spec/queries/integrity/11.rq +29 -0
- data/spec/queries/integrity/12.rq +37 -0
- data/spec/queries/integrity/14.rq +25 -0
- data/spec/queries/integrity/19_1.rq +21 -0
- data/spec/queries/integrity/19_2.rq +15 -0
- data/spec/queries/integrity/2.rq +22 -0
- data/spec/queries/integrity/3.rq +19 -0
- data/spec/queries/integrity/4.rq +13 -0
- data/spec/queries/integrity/5.rq +14 -0
- data/spec/r_builder_spec.rb +33 -0
- data/spec/resource/.RData +0 -0
- data/spec/resource/example.Rhistory +3 -0
- data/spec/spec_helper.rb +17 -0
- data/spec/turtle/bacon +147 -0
- data/spec/turtle/reference +2064 -0
- data/spec/turtle/weather +275 -0
- data/spec/writer_spec.rb +75 -0
- metadata +589 -0
@@ -0,0 +1,240 @@
|
|
1
|
+
module PubliSci
|
2
|
+
module DataSet
|
3
|
+
module ORM
|
4
|
+
class DataCube
|
5
|
+
extend PubliSci::Dataset::DataCube
|
6
|
+
extend PubliSci::Analyzer
|
7
|
+
extend PubliSci::Query
|
8
|
+
extend PubliSci::Parser
|
9
|
+
|
10
|
+
include PubliSci::Dataset::DataCube
|
11
|
+
include PubliSci::Analyzer
|
12
|
+
include PubliSci::Metadata::Generator
|
13
|
+
include PubliSci::Query
|
14
|
+
include PubliSci::Parser
|
15
|
+
|
16
|
+
attr_accessor :labels
|
17
|
+
attr_accessor :dimensions
|
18
|
+
attr_accessor :measures
|
19
|
+
attr_accessor :obs
|
20
|
+
attr_accessor :meta
|
21
|
+
|
22
|
+
def initialize(options={},do_parse = true)
|
23
|
+
@dimensions = {}
|
24
|
+
@measures = []
|
25
|
+
@obs = []
|
26
|
+
@generator_options = {}
|
27
|
+
@options = {}
|
28
|
+
|
29
|
+
@meta = {}
|
30
|
+
|
31
|
+
parse_options options if do_parse
|
32
|
+
end
|
33
|
+
|
34
|
+
def self.load(graph,options={},verbose=false)
|
35
|
+
|
36
|
+
|
37
|
+
graph = load_string(graph) unless graph =~ /^http/
|
38
|
+
|
39
|
+
# puts get_hashes(execute_from_file('dimension_ranges.rq',graph))
|
40
|
+
dimensions = Hash[get_hashes(execute_from_file('dimension_ranges.rq',graph),"to_s").map{|solution|
|
41
|
+
#TODO coded properties should be found via SPARQL queries
|
42
|
+
if solution[:range].split('/')[-2] == "code"
|
43
|
+
type = :coded
|
44
|
+
else
|
45
|
+
type = solution[:range].to_s
|
46
|
+
end
|
47
|
+
[solution[:dimension], {type: type}]
|
48
|
+
}]
|
49
|
+
puts "dimensions: #{dimensions}" if verbose
|
50
|
+
|
51
|
+
codes = execute_from_file('code_resources.rq',graph).to_h.map{|sol|
|
52
|
+
[sol[:dimension].to_s, sol[:codeList].to_s, sol[:class].to_s]
|
53
|
+
}
|
54
|
+
puts "codes: #{codes}" if verbose
|
55
|
+
|
56
|
+
measures = execute_from_file('measures.rq',graph).to_h.map{|m| m[:measure].to_s}
|
57
|
+
puts "measures: #{measures}" if verbose
|
58
|
+
|
59
|
+
name = execute_from_file('dataset.rq',graph).to_h.first[:label]
|
60
|
+
puts "dataset: #{name}" if verbose
|
61
|
+
|
62
|
+
obs = execute_from_file('observations.rq',graph)
|
63
|
+
observations = observation_hash(obs)
|
64
|
+
puts "observations: #{observations}" if verbose
|
65
|
+
|
66
|
+
# simple_observations = observation_hash(obs,true)
|
67
|
+
|
68
|
+
labels = execute_from_file('observation_labels.rq', graph)
|
69
|
+
labels = Hash[labels.map{|sol|
|
70
|
+
[sol[:observation].to_s, sol[:label].to_s]
|
71
|
+
}]
|
72
|
+
|
73
|
+
new_opts = {
|
74
|
+
measures: measures,
|
75
|
+
dimensions: dimensions,
|
76
|
+
observations: observations.values,
|
77
|
+
name: name,
|
78
|
+
labels: labels.values,
|
79
|
+
codes: codes
|
80
|
+
}
|
81
|
+
|
82
|
+
options = options.merge(new_opts)
|
83
|
+
puts "creating #{options}" if verbose
|
84
|
+
self.new(options)
|
85
|
+
end
|
86
|
+
|
87
|
+
def parse_options(options)
|
88
|
+
if options[:dimensions]
|
89
|
+
options[:dimensions].each{|name,details|
|
90
|
+
add_dimension(name, details[:type] || :coded)
|
91
|
+
}
|
92
|
+
end
|
93
|
+
|
94
|
+
if options[:measures]
|
95
|
+
options[:measures].each{|m| @measures << m}
|
96
|
+
end
|
97
|
+
|
98
|
+
if options[:observations]
|
99
|
+
options[:observations].each{|obs_data| add_observation obs_data}
|
100
|
+
end
|
101
|
+
|
102
|
+
@generator_options = options[:generator_options] if options[:generator_options]
|
103
|
+
@options[:skip_metadata] = options[:skip_metadata] if options[:skip_metadata]
|
104
|
+
|
105
|
+
if options[:name]
|
106
|
+
@name = options[:name]
|
107
|
+
else
|
108
|
+
raise "No dataset name specified!"
|
109
|
+
end
|
110
|
+
|
111
|
+
if options[:validate_each]
|
112
|
+
@options[:validate_each] = options[:validate_each]
|
113
|
+
end
|
114
|
+
|
115
|
+
if options[:labels]
|
116
|
+
@labels = options[:labels]
|
117
|
+
end
|
118
|
+
|
119
|
+
if options[:codes]
|
120
|
+
@codes = options[:codes]
|
121
|
+
end
|
122
|
+
end
|
123
|
+
|
124
|
+
def to_n3
|
125
|
+
|
126
|
+
#create labels if not specified
|
127
|
+
unless @labels.is_a?(Array) && @labels.size == @obs.size
|
128
|
+
if @labels.is_a? Symbol
|
129
|
+
#define some automatic labeling methods
|
130
|
+
else
|
131
|
+
@labels = (1..@obs.size).to_a.map(&:to_s)
|
132
|
+
end
|
133
|
+
end
|
134
|
+
data = {}
|
135
|
+
|
136
|
+
|
137
|
+
#collect observation data
|
138
|
+
check_integrity(@obs.map{|o| o.data}, @dimensions.keys, @measures)
|
139
|
+
@obs.map{|obs|
|
140
|
+
(@measures | @dimensions.keys).map{ |component|
|
141
|
+
(data[component] ||= []) << obs.data[component]
|
142
|
+
}
|
143
|
+
}
|
144
|
+
|
145
|
+
|
146
|
+
@codes = @dimensions.map{|d,v| d if v[:type] == :coded}.compact unless @codes
|
147
|
+
str = generate(@measures, @dimensions.keys, @codes, data, @labels, @name, @generator_options)
|
148
|
+
unless @options[:skip_metadata]
|
149
|
+
fields = {
|
150
|
+
publishers: publishers(),
|
151
|
+
subject: subjects(),
|
152
|
+
author: author(),
|
153
|
+
description: description(),
|
154
|
+
date: date(),
|
155
|
+
var: @name,
|
156
|
+
}
|
157
|
+
# puts basic(fields,@generator_options)
|
158
|
+
str += "\n" + basic(fields)
|
159
|
+
end
|
160
|
+
str
|
161
|
+
end
|
162
|
+
|
163
|
+
def add_dimension(name, type=:coded)
|
164
|
+
@dimensions[name.to_s] = {type: type}
|
165
|
+
end
|
166
|
+
|
167
|
+
def add_measure(name)
|
168
|
+
@measures << name
|
169
|
+
end
|
170
|
+
|
171
|
+
def add_observation(data)
|
172
|
+
data = Hash[data.map{|k,v| [k.to_s, v]}]
|
173
|
+
obs = Observation.new(data)
|
174
|
+
check_integrity([obs.data],@dimensions.keys,@measures) if @options[:validate_each]
|
175
|
+
@obs << obs
|
176
|
+
end
|
177
|
+
|
178
|
+
def insert(observation)
|
179
|
+
@obs << observation
|
180
|
+
end
|
181
|
+
|
182
|
+
def publishers
|
183
|
+
@meta[:publishers] ||= []
|
184
|
+
end
|
185
|
+
|
186
|
+
def publishers=(publishers)
|
187
|
+
@meta[:publishers] = publishers
|
188
|
+
end
|
189
|
+
|
190
|
+
def subjects
|
191
|
+
@meta[:subject] ||= []
|
192
|
+
end
|
193
|
+
|
194
|
+
def subjects=(subjects)
|
195
|
+
@meta[:subject]=subjects
|
196
|
+
end
|
197
|
+
|
198
|
+
def add_publisher(label,uri)
|
199
|
+
publishers << {label: label, uri: uri}
|
200
|
+
end
|
201
|
+
|
202
|
+
def add_subject(id)
|
203
|
+
subject << id
|
204
|
+
end
|
205
|
+
|
206
|
+
def author
|
207
|
+
@meta[:creator] ||= ""
|
208
|
+
end
|
209
|
+
|
210
|
+
def author=(author)
|
211
|
+
@meta[:creator] = author
|
212
|
+
end
|
213
|
+
|
214
|
+
def description
|
215
|
+
@meta[:description] ||= ""
|
216
|
+
end
|
217
|
+
|
218
|
+
def description=(description)
|
219
|
+
@meta[:description] = description
|
220
|
+
end
|
221
|
+
|
222
|
+
def date
|
223
|
+
@meta[:date] ||= "#{Time.now.day}-#{Time.now.month}-#{Time.now.year}"
|
224
|
+
end
|
225
|
+
|
226
|
+
def date=(date)
|
227
|
+
@meta[:date] = date
|
228
|
+
end
|
229
|
+
|
230
|
+
def to_h
|
231
|
+
{
|
232
|
+
measures: @measures,
|
233
|
+
dimensions: @dimensions,
|
234
|
+
observations: @obs.map{|o| o.data}
|
235
|
+
}
|
236
|
+
end
|
237
|
+
end
|
238
|
+
end
|
239
|
+
end
|
240
|
+
end
|
@@ -0,0 +1,20 @@
|
|
1
|
+
module PubliSci
|
2
|
+
module DataSet
|
3
|
+
module ORM
|
4
|
+
class Observation
|
5
|
+
attr_accessor :data
|
6
|
+
def initialize(data={})
|
7
|
+
@data = data
|
8
|
+
end
|
9
|
+
|
10
|
+
def method_missing(name, args)
|
11
|
+
#get entry of data hash
|
12
|
+
end
|
13
|
+
|
14
|
+
def respond_to_missing?(method, *)
|
15
|
+
|
16
|
+
end
|
17
|
+
end
|
18
|
+
end
|
19
|
+
end
|
20
|
+
end
|
@@ -0,0 +1,31 @@
|
|
1
|
+
module PubliSci
|
2
|
+
class Dataset
|
3
|
+
class Configuration
|
4
|
+
def self.defaults
|
5
|
+
{
|
6
|
+
interactive: false,
|
7
|
+
}
|
8
|
+
end
|
9
|
+
|
10
|
+
defaults.keys.each{|k|
|
11
|
+
default = defaults[k]
|
12
|
+
define_method(k) do |input=nil|
|
13
|
+
var = instance_variable_get :"@#{k}"
|
14
|
+
if var
|
15
|
+
var
|
16
|
+
else
|
17
|
+
instance_variable_set :"@#{k}", default
|
18
|
+
end
|
19
|
+
|
20
|
+
if input
|
21
|
+
instance_variable_set :"@#{k}", input
|
22
|
+
end
|
23
|
+
|
24
|
+
instance_variable_get :"@#{k}"
|
25
|
+
end
|
26
|
+
|
27
|
+
attr_writer k
|
28
|
+
}
|
29
|
+
end
|
30
|
+
end
|
31
|
+
end
|
@@ -0,0 +1,418 @@
|
|
1
|
+
#monkey patch to make rdf string w/ heredocs prettier ;)
|
2
|
+
class String
|
3
|
+
def unindent
|
4
|
+
gsub /^#{self[/\A\s*/]}/, ''
|
5
|
+
end
|
6
|
+
end
|
7
|
+
|
8
|
+
module PubliSci
|
9
|
+
class Dataset
|
10
|
+
module DataCube
|
11
|
+
include PubliSci::Parser
|
12
|
+
def defaults
|
13
|
+
{
|
14
|
+
type: :dataframe,
|
15
|
+
encode_nulls: false,
|
16
|
+
base_url: "http://onto.strinz.me",
|
17
|
+
}
|
18
|
+
end
|
19
|
+
|
20
|
+
def generate_resources(measures, dimensions, codes, options={})
|
21
|
+
newm = measures.map {|m|
|
22
|
+
if m =~ /^http:\/\//
|
23
|
+
"<#{m}>"
|
24
|
+
elsif m =~ /^[a-zA-z]+:[a-zA-z]+$/
|
25
|
+
m
|
26
|
+
else
|
27
|
+
"prop:#{m}"
|
28
|
+
end
|
29
|
+
}
|
30
|
+
|
31
|
+
newc = []
|
32
|
+
|
33
|
+
newd = dimensions.map{|d|
|
34
|
+
if d =~ /^http:\/\//
|
35
|
+
"<#{d}>"
|
36
|
+
elsif d =~ /^[a-zA-z]+:[a-zA-z]+$/
|
37
|
+
d
|
38
|
+
else
|
39
|
+
"prop:#{d}"
|
40
|
+
end
|
41
|
+
}
|
42
|
+
|
43
|
+
if codes.first.is_a? Array
|
44
|
+
newc = codes.map{|c|
|
45
|
+
c.map{|el|
|
46
|
+
if el =~ /^http:\/\//
|
47
|
+
"<#{el}>"
|
48
|
+
else
|
49
|
+
el
|
50
|
+
end
|
51
|
+
}
|
52
|
+
}
|
53
|
+
else
|
54
|
+
newc = codes.map{|c|
|
55
|
+
["#{sanitize(c).first}","code:#{sanitize(c).first.downcase}","code:#{sanitize(c).first.downcase.capitalize}"]
|
56
|
+
}
|
57
|
+
end
|
58
|
+
[newm, newd, newc]
|
59
|
+
end
|
60
|
+
|
61
|
+
def component_gen(args,var,options={})
|
62
|
+
args = Array[args].flatten
|
63
|
+
args = args.map{|arg| arg.gsub("prop:","cs:").gsub(%r{<#{options[:base_url]}/.+/(\w.+)>$},'cs:'+'\1')}
|
64
|
+
args.map{|arg| arg.gsub(%r{<http://(.+)>},"<#{options[:base_url]}/dc/dataset/#{var}/cs/"+'\1'+'>')}
|
65
|
+
end
|
66
|
+
|
67
|
+
def encode_data(codes,data,var,options={})
|
68
|
+
codes = sanitize(codes)
|
69
|
+
new_data = {}
|
70
|
+
data.map{|k,v|
|
71
|
+
if codes.include? k
|
72
|
+
new_data[k] = v.map{|val|
|
73
|
+
if val =~ /^http:\/\//
|
74
|
+
"<#{val}>"
|
75
|
+
elsif val =~ /^[a-zA-z]+:[a-zA-z]+$/
|
76
|
+
val
|
77
|
+
else
|
78
|
+
"<code/#{k.downcase}/#{sanitize(val).first}>"
|
79
|
+
end
|
80
|
+
}
|
81
|
+
else
|
82
|
+
new_data[k] = v
|
83
|
+
end
|
84
|
+
}
|
85
|
+
new_data
|
86
|
+
end
|
87
|
+
|
88
|
+
def vocabulary(vocab,options={})
|
89
|
+
if vocab.is_a?(String) && vocab =~ /^http:\/\//
|
90
|
+
RDF::Vocabulary.new(vocab)
|
91
|
+
elsif RDF.const_defined? vocab.to_sym && RDF.const_get(vocab.to_sym).inspect =~ /^RDF::Vocabulary/
|
92
|
+
RDF.const_get(vocab)
|
93
|
+
else
|
94
|
+
nil
|
95
|
+
end
|
96
|
+
end
|
97
|
+
|
98
|
+
def generate(measures, dimensions, codes, data, observation_labels, var, options={})
|
99
|
+
# dimensions = sanitize(dimensions)
|
100
|
+
# codes = sanitize(codes)
|
101
|
+
# measures = sanitize(measures)
|
102
|
+
var = sanitize([var]).first
|
103
|
+
data = sanitize_hash(data)
|
104
|
+
|
105
|
+
str = prefixes(var,options)
|
106
|
+
str << data_structure_definition(measures, dimensions, codes, var, options)
|
107
|
+
str << dataset(var, options)
|
108
|
+
component_specifications(measures, dimensions, codes, var, options).map{ |c| str << c }
|
109
|
+
dimension_properties(dimensions, codes, var, options).map{|p| str << p}
|
110
|
+
measure_properties(measures, var, options).map{|p| str << p}
|
111
|
+
code_lists(codes, data, var, options).map{|l| str << l}
|
112
|
+
concept_codes(codes, data, var, options).map{|c| str << c}
|
113
|
+
observations(measures, dimensions, codes, data, observation_labels, var, options).map{|o| str << o}
|
114
|
+
str
|
115
|
+
end
|
116
|
+
|
117
|
+
def prefixes(var, options={})
|
118
|
+
var = sanitize([var]).first
|
119
|
+
options = defaults().merge(options)
|
120
|
+
base = options[:base_url]
|
121
|
+
<<-EOF.unindent
|
122
|
+
@base <#{base}/dc/dataset/#{var}/> .
|
123
|
+
@prefix ns: <#{base}/dc/dataset/#{var}/> .
|
124
|
+
@prefix qb: <http://purl.org/linked-data/cube#> .
|
125
|
+
@prefix rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#> .
|
126
|
+
@prefix rdfs: <http://www.w3.org/2000/01/rdf-schema#> .
|
127
|
+
@prefix prop: <#{base}/properties/> .
|
128
|
+
@prefix dct: <http://purl.org/dc/terms/> .
|
129
|
+
@prefix xsd: <http://www.w3.org/2001/XMLSchema#> .
|
130
|
+
@prefix cs: <#{base}/dc/dataset/#{var}/cs/> .
|
131
|
+
@prefix code: <#{base}/dc/dataset/#{var}/code/> .
|
132
|
+
@prefix owl: <http://www.w3.org/2002/07/owl#> .
|
133
|
+
@prefix skos: <http://www.w3.org/2004/02/skos/core#> .
|
134
|
+
@prefix foaf: <http://xmlns.com/foaf/0.1/> .
|
135
|
+
@prefix org: <http://www.w3.org/ns/org#> .
|
136
|
+
@prefix prov: <http://www.w3.org/ns/prov#> .
|
137
|
+
|
138
|
+
EOF
|
139
|
+
end
|
140
|
+
|
141
|
+
def data_structure_definition(measures,dimensions,codes,var,options={})
|
142
|
+
var = sanitize([var]).first
|
143
|
+
options = defaults().merge(options)
|
144
|
+
rdf_measures, rdf_dimensions, rdf_codes = generate_resources(measures, dimensions, codes, options)
|
145
|
+
cs_dims = component_gen(rdf_dimensions,var,options) #rdf_dimensions.map{|d| d.gsub('prop:','cs:')}
|
146
|
+
cs_meas = component_gen(rdf_measures,var,options) #rdf_measures.map!{|m| m.gsub('prop:','cs:')}
|
147
|
+
str = "ns:dsd-#{var} a qb:DataStructureDefinition;\n"
|
148
|
+
cs_dims.map{|d|
|
149
|
+
str << " qb:component #{d} ;\n"
|
150
|
+
}
|
151
|
+
|
152
|
+
cs_meas.map{|m|
|
153
|
+
str << " qb:component #{m} ;\n"
|
154
|
+
}
|
155
|
+
str[-2]='.'
|
156
|
+
str<<"\n"
|
157
|
+
str
|
158
|
+
end
|
159
|
+
|
160
|
+
def dataset(var,options={})
|
161
|
+
var = sanitize([var]).first
|
162
|
+
options = defaults().merge(options)
|
163
|
+
<<-EOF.unindent
|
164
|
+
ns:dataset-#{var} a qb:DataSet ;
|
165
|
+
rdfs:label "#{var}"@en ;
|
166
|
+
qb:structure ns:dsd-#{var} .
|
167
|
+
|
168
|
+
EOF
|
169
|
+
end
|
170
|
+
|
171
|
+
def component_specifications(measure_names, dimension_names, codes, var, options={})
|
172
|
+
options = defaults().merge(options)
|
173
|
+
rdf_measures, rdf_dimensions, rdf_codes = generate_resources(measure_names, dimension_names, codes, options)
|
174
|
+
cs_dims = component_gen(rdf_dimensions,var,options)
|
175
|
+
cs_meas = component_gen(rdf_measures,var,options)
|
176
|
+
specs = []
|
177
|
+
|
178
|
+
rdf_dimensions.each_with_index.map{|d,i|
|
179
|
+
specs << <<-EOF.unindent
|
180
|
+
#{cs_dims[i]} a qb:ComponentSpecification ;
|
181
|
+
rdfs:label "#{strip_prefixes(strip_uri(dimension_names[i]))}" ;
|
182
|
+
qb:dimension #{d} .
|
183
|
+
|
184
|
+
EOF
|
185
|
+
}
|
186
|
+
|
187
|
+
rdf_measures.each_with_index.map{|n,i|
|
188
|
+
specs << <<-EOF.unindent
|
189
|
+
#{cs_meas[i]} a qb:ComponentSpecification ;
|
190
|
+
rdfs:label "#{strip_prefixes(strip_uri(measure_names[i]))}" ;
|
191
|
+
qb:measure #{n} .
|
192
|
+
|
193
|
+
EOF
|
194
|
+
}
|
195
|
+
|
196
|
+
specs
|
197
|
+
end
|
198
|
+
|
199
|
+
def dimension_properties(dimensions, codes, var, options={})
|
200
|
+
options = defaults().merge(options)
|
201
|
+
rdf_measures, rdf_dimensions, rdf_codes = generate_resources([], dimensions, codes, options)
|
202
|
+
props = []
|
203
|
+
|
204
|
+
dimension_codes = rdf_codes.map{|c|
|
205
|
+
if c[0]=~/^<http:/
|
206
|
+
c[0][1..-2]
|
207
|
+
else
|
208
|
+
c[0]
|
209
|
+
end
|
210
|
+
}
|
211
|
+
|
212
|
+
rdf_dimensions.each_with_index{|d,i|
|
213
|
+
if dimension_codes.include?(dimensions[i])
|
214
|
+
|
215
|
+
code = rdf_codes[dimension_codes.index(dimensions[i])]
|
216
|
+
props << <<-EOF.unindent
|
217
|
+
#{d} a rdf:Property, qb:DimensionProperty ;
|
218
|
+
rdfs:label "#{strip_prefixes(strip_uri(d))}"@en ;
|
219
|
+
qb:codeList #{code[1]} ;
|
220
|
+
rdfs:range #{code[2]} .
|
221
|
+
|
222
|
+
EOF
|
223
|
+
else
|
224
|
+
props << <<-EOF.unindent
|
225
|
+
#{d} a rdf:Property, qb:DimensionProperty ;
|
226
|
+
rdfs:label "#{strip_prefixes(strip_uri(d))}"@en ;
|
227
|
+
EOF
|
228
|
+
if options[:ranges] && options[:ranges][dimension[i]]
|
229
|
+
props.last << "\n rdfs:range #{options[:ranges][dimensions[i]]} .\n\n"
|
230
|
+
else
|
231
|
+
props.last[-2] = ".\n"
|
232
|
+
end
|
233
|
+
end
|
234
|
+
}
|
235
|
+
|
236
|
+
props
|
237
|
+
end
|
238
|
+
|
239
|
+
def measure_properties(measures, var, options={})
|
240
|
+
options = defaults().merge(options)
|
241
|
+
rdf_measures = generate_resources(measures, [], [], options)[0]
|
242
|
+
props = []
|
243
|
+
|
244
|
+
rdf_measures.each_with_index{ |m,i|
|
245
|
+
|
246
|
+
props << <<-EOF.unindent
|
247
|
+
#{m} a rdf:Property, qb:MeasureProperty ;
|
248
|
+
rdfs:label "#{strip_prefixes(strip_uri(m))}"@en ;
|
249
|
+
EOF
|
250
|
+
|
251
|
+
if options[:ranges] && options[:ranges][measures[i]]
|
252
|
+
props.last << " rdfs:range #{options[:ranges][measures[i]]} .\n\n"
|
253
|
+
else
|
254
|
+
props.last[-2] = ".\n"
|
255
|
+
end
|
256
|
+
}
|
257
|
+
|
258
|
+
props
|
259
|
+
end
|
260
|
+
|
261
|
+
def observations(measures, dimensions, codes, data, observation_labels, var, options={})
|
262
|
+
var = sanitize([var]).first
|
263
|
+
measures = sanitize(measures)
|
264
|
+
dimensions = sanitize(dimensions)
|
265
|
+
|
266
|
+
data.each{|k,v| data[k]=Array(v)}
|
267
|
+
observation_labels = Array(observation_labels)
|
268
|
+
options = defaults().merge(options)
|
269
|
+
|
270
|
+
rdf_measures, rdf_dimensions, rdf_codes = generate_resources(measures, dimensions, codes, options)
|
271
|
+
data = encode_data(codes, data, var, options)
|
272
|
+
obs = []
|
273
|
+
|
274
|
+
dimension_codes = rdf_codes.map{|c|
|
275
|
+
if c[0]=~/^<http:/
|
276
|
+
c[0][1..-2]
|
277
|
+
else
|
278
|
+
c[0]
|
279
|
+
end
|
280
|
+
}
|
281
|
+
|
282
|
+
observation_labels.each_with_index.map{|r, i|
|
283
|
+
# contains_nulls = false
|
284
|
+
str = <<-EOF.unindent
|
285
|
+
ns:obs#{r} a qb:Observation ;
|
286
|
+
qb:dataSet ns:dataset-#{var} ;
|
287
|
+
EOF
|
288
|
+
|
289
|
+
str << " rdfs:label \"#{r}\" ;\n" unless options[:no_labels]
|
290
|
+
|
291
|
+
obs_index = 0
|
292
|
+
obs_nodes = []
|
293
|
+
|
294
|
+
dimensions.each_with_index{|d,j|
|
295
|
+
contains_nulls = (data[d][i] == nil)
|
296
|
+
|
297
|
+
unless contains_nulls && !options[:encode_nulls]
|
298
|
+
if is_complex?(data[d][i])
|
299
|
+
str << " #{rdf_dimensions[j]} #{add_node(obs_index,add_node(r))} ;\n"
|
300
|
+
obs_nodes << encode_value(data[d][i], options, obs_index, add_node(r))
|
301
|
+
else
|
302
|
+
str << " #{rdf_dimensions[j]} #{encode_value(data[d][i], options)} ;\n"
|
303
|
+
end
|
304
|
+
end
|
305
|
+
|
306
|
+
obs_index += 1
|
307
|
+
}
|
308
|
+
|
309
|
+
measures.each_with_index{|m,j|
|
310
|
+
contains_nulls = (data[m][i] == nil)
|
311
|
+
|
312
|
+
unless contains_nulls && !options[:encode_nulls]
|
313
|
+
if is_complex?(data[m][i])
|
314
|
+
str << " #{rdf_measures[j]} #{add_node(obs_index,add_node(r))} ;\n"
|
315
|
+
val = encode_value(data[m][i], options, obs_index, add_node(r))
|
316
|
+
|
317
|
+
if val.last.is_a? Array
|
318
|
+
unless val.last.last[-2] == "."
|
319
|
+
val.last.last << ".\n"
|
320
|
+
end
|
321
|
+
end
|
322
|
+
|
323
|
+
obs_nodes << val
|
324
|
+
else
|
325
|
+
str << " #{rdf_measures[j]} #{encode_value(data[m][i], options)} ;\n"
|
326
|
+
end
|
327
|
+
end
|
328
|
+
|
329
|
+
obs_index += 1
|
330
|
+
}
|
331
|
+
|
332
|
+
str << " .\n\n"
|
333
|
+
|
334
|
+
if obs_nodes.size > 0
|
335
|
+
flatted = obs_nodes.flatten
|
336
|
+
str << turtle_indent(flatted.join("\n"))
|
337
|
+
str << " \n\n"
|
338
|
+
end
|
339
|
+
|
340
|
+
obs << str
|
341
|
+
|
342
|
+
}
|
343
|
+
obs
|
344
|
+
end
|
345
|
+
|
346
|
+
def code_lists(codes, data, var, options={})
|
347
|
+
options = defaults().merge(options)
|
348
|
+
rdf_measures, rdf_dimensions, rdf_codes = generate_resources([], [], codes, options)
|
349
|
+
data = encode_data(codes, data, var, options)
|
350
|
+
lists = []
|
351
|
+
rdf_codes.map{|code|
|
352
|
+
if code[0] =~ /^<.+>$/
|
353
|
+
refcode = code[0][1..-2]
|
354
|
+
else
|
355
|
+
refcode = code[0]
|
356
|
+
end
|
357
|
+
str = <<-EOF.unindent
|
358
|
+
#{code[2]} a rdfs:Class, owl:Class;
|
359
|
+
rdfs:subClassOf skos:Concept ;
|
360
|
+
rdfs:label "Code list for #{strip_prefixes(strip_uri(code[1]))} - codelist class"@en;
|
361
|
+
rdfs:comment "Specifies the #{strip_prefixes(strip_uri(code[1]))} for each observation";
|
362
|
+
rdfs:seeAlso #{code[1]} .
|
363
|
+
|
364
|
+
#{code[1]} a skos:ConceptScheme;
|
365
|
+
skos:prefLabel "Code list for #{strip_prefixes(strip_uri(code[1]))} - codelist scheme"@en;
|
366
|
+
rdfs:label "Code list for #{strip_prefixes(strip_uri(code[1]))} - codelist scheme"@en;
|
367
|
+
skos:notation "CL_#{strip_prefixes(strip_uri(code[1])).upcase}";
|
368
|
+
skos:note "Specifies the #{strip_prefixes(strip_uri(code[1]))} for each observation";
|
369
|
+
EOF
|
370
|
+
data[refcode].uniq.map{|value|
|
371
|
+
unless value == nil && !options[:encode_nulls]
|
372
|
+
str << " skos:hasTopConcept #{to_resource(value,options)} ;\n"
|
373
|
+
end
|
374
|
+
}
|
375
|
+
|
376
|
+
str << " .\n\n"
|
377
|
+
lists << str
|
378
|
+
}
|
379
|
+
|
380
|
+
lists
|
381
|
+
end
|
382
|
+
|
383
|
+
def concept_codes(codes, data, var, options={})
|
384
|
+
options = defaults().merge(options)
|
385
|
+
rdf_measures, rdf_dimensions, rdf_codes = generate_resources([], [], codes, options)
|
386
|
+
concepts = []
|
387
|
+
data = encode_data(codes, data, var, options)
|
388
|
+
rdf_codes.map{|code|
|
389
|
+
if code[0] =~ /^<.+>$/
|
390
|
+
refcode = code[0][1..-2]
|
391
|
+
else
|
392
|
+
refcode = code[0]
|
393
|
+
end
|
394
|
+
data[refcode].uniq.each_with_index{|value,i|
|
395
|
+
unless value == nil && !options[:encode_nulls]
|
396
|
+
concepts << <<-EOF.unindent
|
397
|
+
#{to_resource(value,options)} a skos:Concept, #{code[2]};
|
398
|
+
skos:topConceptOf #{code[1]} ;
|
399
|
+
skos:prefLabel "#{strip_uri(value)}" ;
|
400
|
+
skos:inScheme #{code[1]} .
|
401
|
+
|
402
|
+
EOF
|
403
|
+
end
|
404
|
+
}
|
405
|
+
}
|
406
|
+
|
407
|
+
concepts
|
408
|
+
end
|
409
|
+
|
410
|
+
|
411
|
+
def abbreviate_known(turtle_string)
|
412
|
+
#debug method
|
413
|
+
# puts turtle_string
|
414
|
+
turtle_string.gsub(/<http:\/\/www\.rqtl\.org\/dc\/properties\/(\S+)>/, 'prop:\1').gsub(/<http:\/\/www.rqtl.org\/ns\/dc\/code\/(\S+)\/(\S+)>/, '<code/\1/\2>').gsub(/<http:\/\/www.rqtl.org\/dc\/dataset\/(\S+)\/code\/(\w+)>/, 'code:\2').gsub(/<http:\/\/www.rqtl.org\/dc\/dataset\/(\S+)\/code\/(\S+)>/, '<code/' + '\2' +'>')
|
415
|
+
end
|
416
|
+
end
|
417
|
+
end
|
418
|
+
end
|