bio-publisci 0.0.1

Sign up to get free protection for your applications and to get access to all the features.
Files changed (95) hide show
  1. data/.document +5 -0
  2. data/.rspec +1 -0
  3. data/.travis.yml +13 -0
  4. data/Gemfile +24 -0
  5. data/LICENSE.txt +20 -0
  6. data/README.md +47 -0
  7. data/README.rdoc +48 -0
  8. data/Rakefile +70 -0
  9. data/bin/bio-publisci +83 -0
  10. data/features/create_generator.feature +25 -0
  11. data/features/integration.feature +12 -0
  12. data/features/integration_steps.rb +10 -0
  13. data/features/orm.feature +60 -0
  14. data/features/orm_steps.rb +74 -0
  15. data/features/reader.feature +25 -0
  16. data/features/reader_steps.rb +60 -0
  17. data/features/step_definitions/bio-publisci_steps.rb +0 -0
  18. data/features/store.feature +27 -0
  19. data/features/store_steps.rb +42 -0
  20. data/features/support/env.rb +13 -0
  21. data/features/writer.feature +9 -0
  22. data/features/writer_steps.rb +17 -0
  23. data/lib/bio-publisci/analyzer.rb +57 -0
  24. data/lib/bio-publisci/dataset/ORM/data_cube_orm.rb +219 -0
  25. data/lib/bio-publisci/dataset/ORM/observation.rb +20 -0
  26. data/lib/bio-publisci/dataset/data_cube.rb +308 -0
  27. data/lib/bio-publisci/dataset/interactive.rb +57 -0
  28. data/lib/bio-publisci/loader.rb +36 -0
  29. data/lib/bio-publisci/metadata/metadata.rb +105 -0
  30. data/lib/bio-publisci/parser.rb +64 -0
  31. data/lib/bio-publisci/query/query_helper.rb +114 -0
  32. data/lib/bio-publisci/r_client.rb +54 -0
  33. data/lib/bio-publisci/readers/arff.rb +87 -0
  34. data/lib/bio-publisci/readers/big_cross.rb +119 -0
  35. data/lib/bio-publisci/readers/cross.rb +72 -0
  36. data/lib/bio-publisci/readers/csv.rb +54 -0
  37. data/lib/bio-publisci/readers/dataframe.rb +66 -0
  38. data/lib/bio-publisci/readers/r_matrix.rb +152 -0
  39. data/lib/bio-publisci/store.rb +56 -0
  40. data/lib/bio-publisci/writers/arff.rb +66 -0
  41. data/lib/bio-publisci/writers/dataframe.rb +81 -0
  42. data/lib/bio-publisci.rb +36 -0
  43. data/lib/r2rdf.rb +226 -0
  44. data/lib/template_bak/publisci.rb +3 -0
  45. data/lib/template_bak.rb +12 -0
  46. data/lib/vocabs/cc.rb +18 -0
  47. data/lib/vocabs/cert.rb +13 -0
  48. data/lib/vocabs/dc.rb +63 -0
  49. data/lib/vocabs/dc11.rb +23 -0
  50. data/lib/vocabs/doap.rb +45 -0
  51. data/lib/vocabs/exif.rb +168 -0
  52. data/lib/vocabs/foaf.rb +69 -0
  53. data/lib/vocabs/geo.rb +13 -0
  54. data/lib/vocabs/http.rb +26 -0
  55. data/lib/vocabs/ma.rb +78 -0
  56. data/lib/vocabs/owl.rb +59 -0
  57. data/lib/vocabs/rdfs.rb +17 -0
  58. data/lib/vocabs/rsa.rb +12 -0
  59. data/lib/vocabs/rss.rb +14 -0
  60. data/lib/vocabs/sioc.rb +93 -0
  61. data/lib/vocabs/skos.rb +36 -0
  62. data/lib/vocabs/wot.rb +21 -0
  63. data/lib/vocabs/xhtml.rb +9 -0
  64. data/lib/vocabs/xsd.rb +58 -0
  65. data/resources/queries/codes.rq +13 -0
  66. data/resources/queries/dataset.rq +7 -0
  67. data/resources/queries/dimension_ranges.rq +8 -0
  68. data/resources/queries/dimensions.rq +7 -0
  69. data/resources/queries/measures.rq +7 -0
  70. data/resources/queries/observations.rq +12 -0
  71. data/resources/queries/test.rq +3 -0
  72. data/resources/weather.numeric.arff +23 -0
  73. data/spec/analyzer_spec.rb +36 -0
  74. data/spec/bio-publisci_spec.rb +7 -0
  75. data/spec/csv/bacon.csv +4 -0
  76. data/spec/csv/moar_bacon.csv +11 -0
  77. data/spec/data_cube_spec.rb +166 -0
  78. data/spec/generators/csv_spec.rb +44 -0
  79. data/spec/generators/dataframe_spec.rb +44 -0
  80. data/spec/generators/r_matrix_spec.rb +35 -0
  81. data/spec/queries/integrity/1.rq +21 -0
  82. data/spec/queries/integrity/11.rq +29 -0
  83. data/spec/queries/integrity/12.rq +37 -0
  84. data/spec/queries/integrity/14.rq +25 -0
  85. data/spec/queries/integrity/19_1.rq +21 -0
  86. data/spec/queries/integrity/19_2.rq +15 -0
  87. data/spec/queries/integrity/2.rq +22 -0
  88. data/spec/queries/integrity/3.rq +19 -0
  89. data/spec/queries/integrity/4.rq +13 -0
  90. data/spec/queries/integrity/5.rq +14 -0
  91. data/spec/r_builder_spec.rb +33 -0
  92. data/spec/spec_helper.rb +17 -0
  93. data/spec/turtle/bacon +149 -0
  94. data/spec/turtle/reference +2066 -0
  95. metadata +259 -0
@@ -0,0 +1,9 @@
1
+ Feature: export to various formats using writers
2
+
3
+ In order to use RDF encoded data in other applications
4
+ I want to export domain objects using an R2RDF::Writer object
5
+
6
+ Scenario: write to ARFF format
7
+ Given a ARFF writer
8
+ When I call its from_turtle method on the file spec/turtle/bacon
9
+ Then I should receive a .arff file as a string
@@ -0,0 +1,17 @@
1
+ Given(/^a (.*) writer$/) do |type|
2
+ @writer = R2RDF::Writer.const_get(type).new
3
+ end
4
+
5
+ When(/^I call its from_turtle method on the file (.*)$/) do |file|
6
+ @result = @writer.from_turtle(file)
7
+ end
8
+
9
+ When(/^I call its from_turtle method on the turtle string$/) do
10
+ f=Tempfile.open('writerttl'); f.write @turtle_string; f.close
11
+ @result = @writer.from_turtle(f.path)
12
+ f.unlink
13
+ end
14
+
15
+ Then(/^I should receive a \.arff file as a string$/) do
16
+ @result.is_a?(String).should be true
17
+ end
@@ -0,0 +1,57 @@
1
+ module R2RDF
2
+
3
+ #handles analysis of R expression to extract properties and recognize potential
4
+ #ambiguity
5
+ module Analyzer
6
+ def dirty?(data)
7
+ if data.is_a? Hash
8
+ data.map{|k,v|
9
+ return true if dirty?(k) || dirty?(v)
10
+ }
11
+ false
12
+ elsif data.is_a? Array
13
+ data.map{|datum|
14
+ return true if dirty?(datum)
15
+ }
16
+ else
17
+ dirty_characters = [".",' ']
18
+ if data.to_s.scan(/./) & dirty_characters
19
+ true
20
+ else
21
+ false
22
+ end
23
+ end
24
+ end
25
+
26
+ def recommend_range(data)
27
+ classes = data.map{|d| d.class}
28
+ homogenous = classes.uniq.size == 1
29
+ if homogenous
30
+ if classes[0] == Fixnum
31
+ "xsd:int"
32
+ elsif classes[0] == Float
33
+ "xsd:double"
34
+ elsif classes[0] == String
35
+ recommend_range_strings(data)
36
+ else
37
+ :coded
38
+ end
39
+ else
40
+ :coded
41
+ end
42
+ end
43
+
44
+ def recommend_range_strings(data)
45
+ return "xsd:int" if data.all?{|d| Integer(d) rescue nil}
46
+ return "xsd:int" if data.all?{|d| Float(d) rescue nil}
47
+ :coded
48
+ end
49
+
50
+ def check_integrity(obs, dimensions, measures)
51
+ obs.map{|o|
52
+ raise "MissingValues for #{(dimensions | measures) - o.keys}" unless ((dimensions | measures) - o.keys).empty?
53
+ raise "UnknownProperty #{o.keys - (dimensions | measures)}" unless (o.keys - (dimensions | measures)).empty?
54
+ }
55
+ end
56
+ end
57
+ end
@@ -0,0 +1,219 @@
1
+ module R2RDF
2
+ module Dataset
3
+ module ORM
4
+ class DataCube
5
+ extend R2RDF::Dataset::DataCube
6
+ extend R2RDF::Analyzer
7
+ extend R2RDF::Metadata
8
+ extend R2RDF::Query
9
+ extend R2RDF::Parser
10
+
11
+ include R2RDF::Dataset::DataCube
12
+ include R2RDF::Analyzer
13
+ include R2RDF::Metadata
14
+ include R2RDF::Query
15
+ include R2RDF::Parser
16
+
17
+ attr_accessor :labels
18
+ attr_accessor :dimensions
19
+ attr_accessor :measures
20
+ attr_accessor :obs
21
+ attr_accessor :meta
22
+
23
+ def initialize(options={},do_parse = true)
24
+ @dimensions = {}
25
+ @measures = []
26
+ @obs = []
27
+ @generator_options = {}
28
+ @options = {}
29
+
30
+ @meta = {}
31
+
32
+ parse_options options if do_parse
33
+ end
34
+
35
+ def self.load(graph,options={},verbose=false)
36
+
37
+
38
+ graph = create_graph(graph) unless graph =~ /^http/
39
+
40
+ # puts get_hashes(execute_from_file('dimension_ranges.rq',graph))
41
+ dimensions = Hash[get_hashes(execute_from_file('dimension_ranges.rq',graph),"to_s").map{|solution|
42
+ #TODO coded properties should be found via SPARQL queries
43
+ if solution[:range].split('/')[-2] == "code"
44
+ type = :coded
45
+ else
46
+ type = strip_uri(solution[:range])
47
+ end
48
+ [strip_uri(solution[:dimension]), {type: type}]
49
+ }]
50
+ puts "dimensions: #{dimensions}" if verbose
51
+ measures = get_ary(execute_from_file('measures.rq',graph)).flatten
52
+ puts "measures: #{measures}" if verbose
53
+ name = execute_from_file('dataset.rq',graph).to_h.first[:label]
54
+ puts "dataset: #{name}" if verbose
55
+ obs = execute_from_file('observations.rq',graph)
56
+ puts "observations: #{obs}" if verbose
57
+ # observations = observation_hash(obs)
58
+ simple_observations = observation_hash(obs,true)
59
+
60
+ new_opts = {
61
+ measures: measures,
62
+ dimensions: dimensions,
63
+ observations: simple_observations.values,
64
+ name: name,
65
+ }
66
+
67
+ options = options.merge(new_opts)
68
+ puts "creating #{options}" if verbose
69
+ self.new(options)
70
+ end
71
+
72
+ def parse_options(options)
73
+ if options[:dimensions]
74
+ options[:dimensions].each{|name,details|
75
+ add_dimension(name, details[:type] || :coded)
76
+ }
77
+ end
78
+
79
+ if options[:measures]
80
+ options[:measures].each{|m| @measures << m}
81
+ end
82
+
83
+ if options[:observations]
84
+ options[:observations].each{|obs_data| add_observation obs_data}
85
+ end
86
+
87
+ @generator_options = options[:generator_options] if options[:generator_options]
88
+ @options[:skip_metadata] = options[:skip_metadata] if options[:skip_metadata]
89
+
90
+ if options[:name]
91
+ @name = options[:name]
92
+ else
93
+ raise "No dataset name specified!"
94
+ end
95
+
96
+ if options[:validate_each]
97
+ @options[:validate_each] = options[:validate_each]
98
+ end
99
+ end
100
+
101
+ def to_n3
102
+
103
+ #create labels if not specified
104
+ unless @labels.is_a?(Array) && @labels.size == @obs.size
105
+ if @labels.is_a? Symbol
106
+ #define some automatic labeling methods
107
+ else
108
+ @labels = (1..@obs.size).to_a.map(&:to_s)
109
+ end
110
+ end
111
+ data = {}
112
+
113
+
114
+ #collect observation data
115
+ check_integrity(@obs.map{|o| o.data}, @dimensions.keys, @measures)
116
+ @obs.map{|obs|
117
+ (@measures | @dimensions.keys).map{ |component|
118
+ (data[component] ||= []) << obs.data[component]
119
+ }
120
+ }
121
+
122
+
123
+ codes = @dimensions.map{|d,v| d if v[:type] == :coded}.compact
124
+
125
+
126
+ str = generate(@measures, @dimensions.keys, codes, data, @labels, @name, @generator_options)
127
+ unless @options[:skip_metadata]
128
+ fields = {
129
+ publishers: publishers(),
130
+ subject: subjects(),
131
+ author: author(),
132
+ description: description(),
133
+ date: date(),
134
+ var: @name,
135
+ }
136
+ # puts basic(fields,@generator_options)
137
+ str += "\n" + basic(fields,@generator_options)
138
+ end
139
+ str
140
+ end
141
+
142
+ def add_dimension(name, type=:coded)
143
+ @dimensions[name.to_s] = {type: type}
144
+ end
145
+
146
+ def add_measure(name)
147
+ @measures << name
148
+ end
149
+
150
+ def add_observation(data)
151
+ data = Hash[data.map{|k,v| [k.to_s, v]}]
152
+ obs = Observation.new(data)
153
+ check_integrity([obs.data],@dimensions.keys,@measures) if @options[:validate_each]
154
+ @obs << obs
155
+ end
156
+
157
+ def insert(observation)
158
+ @obs << observation
159
+ end
160
+
161
+ def publishers
162
+ @meta[:publishers] ||= []
163
+ end
164
+
165
+ def publishers=(publishers)
166
+ @meta[:publishers] = publishers
167
+ end
168
+
169
+ def subjects
170
+ @meta[:subject] ||= []
171
+ end
172
+
173
+ def subjects=(subjects)
174
+ @meta[:subject]=subjects
175
+ end
176
+
177
+ def add_publisher(label,uri)
178
+ publishers << {label: label, uri: uri}
179
+ end
180
+
181
+ def add_subject(id)
182
+ subject << id
183
+ end
184
+
185
+ def author
186
+ @meta[:creator] ||= ""
187
+ end
188
+
189
+ def author=(author)
190
+ @meta[:creator] = author
191
+ end
192
+
193
+ def description
194
+ @meta[:description] ||= ""
195
+ end
196
+
197
+ def description=(description)
198
+ @meta[:description] = description
199
+ end
200
+
201
+ def date
202
+ @meta[:date] ||= "#{Time.now.day}-#{Time.now.month}-#{Time.now.year}"
203
+ end
204
+
205
+ def date=(date)
206
+ @meta[:date] = date
207
+ end
208
+
209
+ def to_h
210
+ {
211
+ measures: @measures,
212
+ dimensions: @dimensions,
213
+ observations: @obs.map{|o| o.data}
214
+ }
215
+ end
216
+ end
217
+ end
218
+ end
219
+ end
@@ -0,0 +1,20 @@
1
+ module R2RDF
2
+ module Dataset
3
+ module ORM
4
+ class Observation
5
+ attr_accessor :data
6
+ def initialize(data={})
7
+ @data = data
8
+ end
9
+
10
+ def method_missing(name, args)
11
+ #get entry of data hash
12
+ end
13
+
14
+ def respond_to_missing?(method, *)
15
+
16
+ end
17
+ end
18
+ end
19
+ end
20
+ end
@@ -0,0 +1,308 @@
1
+ #monkey patch to make rdf string w/ heredocs prettier ;)
2
+ class String
3
+ def unindent
4
+ gsub /^#{self[/\A\s*/]}/, ''
5
+ # gsub(/^#{scan(/^\s*/).min_by{|l|l.length}}/, "")
6
+ end
7
+ end
8
+
9
+ module R2RDF
10
+ # used to generate data cube observations, data structure definitions, etc
11
+ module Dataset
12
+ module DataCube
13
+ def defaults
14
+ {
15
+ type: :dataframe,
16
+ encode_nulls: false,
17
+ base_url: "http://www.rqtl.org",
18
+ }
19
+ end
20
+
21
+ def generate(measures, dimensions, codes, data, observation_labels, var, options={})
22
+ dimensions = sanitize(dimensions)
23
+ codes = sanitize(codes)
24
+ measures = sanitize(measures)
25
+ var = sanitize([var]).first
26
+ data = sanitize_hash(data)
27
+
28
+ str = prefixes(var,options)
29
+ str << data_structure_definition((measures | dimensions), var, options)
30
+ str << dataset(var, options)
31
+ component_specifications(measures, dimensions, var, options).map{ |c| str << c }
32
+ dimension_properties(dimensions, codes, var, options).map{|p| str << p}
33
+ measure_properties(measures, var, options).map{|p| str << p}
34
+ code_lists(codes, data, var, options).map{|l| str << l}
35
+ concept_codes(codes, data, var, options).map{|c| str << c}
36
+ observations(measures, dimensions, codes, data, observation_labels, var, options).map{|o| str << o}
37
+ str
38
+ end
39
+
40
+ def sanitize(array)
41
+ #remove spaces and other special characters
42
+ processed = []
43
+ array.map{|entry|
44
+ if entry.is_a? String
45
+ processed << entry.gsub(/[\s\.]/,'_')
46
+ else
47
+ processed << entry
48
+ end
49
+ }
50
+ processed
51
+ end
52
+
53
+ def sanitize_hash(h)
54
+ mappings = {}
55
+ h.keys.map{|k|
56
+ if(k.is_a? String)
57
+ mappings[k] = k.gsub(' ','_')
58
+ end
59
+ }
60
+
61
+ h.keys.map{|k|
62
+ h[mappings[k]] = h.delete(k) if mappings[k]
63
+ }
64
+
65
+ h
66
+ end
67
+
68
+ def prefixes(var, options={})
69
+ var = sanitize([var]).first
70
+ options = defaults().merge(options)
71
+ base = options[:base_url]
72
+ <<-EOF.unindent
73
+ @base <#{base}/ns/dc/> .
74
+ @prefix ns: <#{base}/ns/dataset/#{var}#> .
75
+ @prefix qb: <http://purl.org/linked-data/cube#> .
76
+ @prefix rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#> .
77
+ @prefix rdfs: <http://www.w3.org/2000/01/rdf-schema#> .
78
+ @prefix prop: <#{base}/dc/properties/> .
79
+ @prefix dct: <http://purl.org/dc/terms/> .
80
+ @prefix xsd: <http://www.w3.org/2001/XMLSchema#> .
81
+ @prefix cs: <#{base}/dc/dataset/#{var}/cs/> .
82
+ @prefix code: <#{base}/dc/dataset/#{var}/code/> .
83
+ @prefix class: <#{base}/dc/dataset/#{var}/class/> .
84
+ @prefix owl: <http://www.w3.org/2002/07/owl#> .
85
+ @prefix skos: <http://www.w3.org/2004/02/skos/core#> .
86
+ @prefix foaf: <http://xmlns.com/foaf/0.1/> .
87
+ @prefix org: <http://www.w3.org/ns/org#> .
88
+ @prefix prov: <http://www.w3.org/ns/prov#> .
89
+
90
+ EOF
91
+ end
92
+
93
+ def data_structure_definition(components,var,options={})
94
+ var = sanitize([var]).first
95
+ options = defaults().merge(options)
96
+ str = "ns:dsd-#{var} a qb:DataStructureDefinition;\n"
97
+ str << " qb:component\n"
98
+ components.map{|n|
99
+ str << " cs:#{n} ,\n"
100
+ }
101
+ str[-2]='.'
102
+ str<<"\n"
103
+ str
104
+ end
105
+
106
+ def dataset(var,options={})
107
+ var = sanitize([var]).first
108
+ options = defaults().merge(options)
109
+ <<-EOF.unindent
110
+ ns:dataset-#{var} a qb:DataSet ;
111
+ rdfs:label "#{var}"@en ;
112
+ qb:structure ns:dsd-#{var} .
113
+
114
+ EOF
115
+ end
116
+
117
+ def component_specifications(measure_names, dimension_names, var, options={})
118
+ options = defaults().merge(options)
119
+ specs = []
120
+
121
+ dimension_names.map{|d|
122
+ specs << <<-EOF.unindent
123
+ cs:#{d} a qb:ComponentSpecification ;
124
+ rdfs:label "#{d} Component" ;
125
+ qb:dimension prop:#{d} .
126
+
127
+ EOF
128
+ }
129
+
130
+ measure_names.map{|n|
131
+ specs << <<-EOF.unindent
132
+ cs:#{n} a qb:ComponentSpecification ;
133
+ rdfs:label "#{n} Component" ;
134
+ qb:measure prop:#{n} .
135
+
136
+ EOF
137
+ }
138
+
139
+ specs
140
+ end
141
+
142
+ def dimension_properties(dimensions, codes, var, options={})
143
+ options = defaults().merge(options)
144
+ props = []
145
+
146
+ dimensions.map{|d|
147
+ if codes.include?(d)
148
+ props << <<-EOF.unindent
149
+ prop:#{d} a rdf:Property, qb:DimensionProperty ;
150
+ rdfs:label "#{d}"@en ;
151
+ qb:codeList code:#{d.downcase} ;
152
+ rdfs:range code:#{d.downcase.capitalize} .
153
+
154
+ EOF
155
+ else
156
+ props << <<-EOF.unindent
157
+ prop:#{d} a rdf:Property, qb:DimensionProperty ;
158
+ rdfs:label "#{d}"@en .
159
+
160
+ EOF
161
+ end
162
+ }
163
+
164
+ props
165
+ end
166
+
167
+ def measure_properties(measures, var, options={})
168
+ options = defaults().merge(options)
169
+ props = []
170
+
171
+ measures.map{ |m|
172
+
173
+ props << <<-EOF.unindent
174
+ prop:#{m} a rdf:Property, qb:MeasureProperty ;
175
+ rdfs:label "#{m}"@en .
176
+
177
+ EOF
178
+ }
179
+
180
+ props
181
+ end
182
+
183
+ def observations(measures, dimensions, codes, data, observation_labels, var, options={})
184
+ var = sanitize([var]).first
185
+ options = defaults().merge(options)
186
+ obs = []
187
+ observation_labels.each_with_index.map{|r, i|
188
+ contains_nulls = false
189
+ str = <<-EOF.unindent
190
+ ns:obs#{r} a qb:Observation ;
191
+ qb:dataSet ns:dataset-#{var} ;
192
+ EOF
193
+
194
+ str << " rdfs:label \"#{r}\" ;\n" unless options[:no_labels]
195
+
196
+ dimensions.map{|d|
197
+ contains_nulls = contains_nulls | (data[d][i] == nil)
198
+ if codes.include? d
199
+ str << " prop:#{d} <code/#{d.downcase}/#{data[d][i]}> ;\n"
200
+ else
201
+ str << " prop:#{d} ns:#{to_resource(data[d][i], options)} ;\n"
202
+ end
203
+ }
204
+
205
+ measures.map{|m|
206
+ contains_nulls = contains_nulls | (data[m][i] == nil)
207
+ str << " prop:#{m} #{to_literal(data[m][i], options)} ;\n"
208
+
209
+ }
210
+
211
+ str << " .\n\n"
212
+ obs << str unless contains_nulls && !options[:encode_nulls]
213
+
214
+ }
215
+ obs
216
+ end
217
+
218
+ def code_lists(codes, data, var, options={})
219
+ options = defaults().merge(options)
220
+ lists = []
221
+ codes.map{|code|
222
+ str = <<-EOF.unindent
223
+ code:#{code.downcase.capitalize} a rdfs:Class, owl:Class;
224
+ rdfs:subClassOf skos:Concept ;
225
+ rdfs:label "Code list for #{code} - codelist class"@en;
226
+ rdfs:comment "Specifies the #{code} for each observation";
227
+ rdfs:seeAlso code:#{code.downcase} .
228
+
229
+ code:#{code.downcase} a skos:ConceptScheme;
230
+ skos:prefLabel "Code list for #{code} - codelist scheme"@en;
231
+ rdfs:label "Code list for #{code} - codelist scheme"@en;
232
+ skos:notation "CL_#{code.upcase}";
233
+ skos:note "Specifies the #{code} for each observation";
234
+ EOF
235
+ data[code].uniq.map{|value|
236
+ unless value == nil && !options[:encode_nulls]
237
+ str << " skos:hasTopConcept <code/#{code.downcase}/#{to_resource(value,options)}> ;\n"
238
+ end
239
+ }
240
+
241
+ str << " .\n\n"
242
+ lists << str
243
+ }
244
+
245
+
246
+ lists
247
+ end
248
+
249
+ def concept_codes(codes, data, var, options={})
250
+ options = defaults().merge(options)
251
+ concepts = []
252
+ codes.map{|code|
253
+ data[code].uniq.map{|value|
254
+ unless value == nil && !options[:encode_nulls]
255
+ concepts << <<-EOF.unindent
256
+ <code/#{code.downcase}/#{to_resource(value,options)}> a skos:Concept, code:#{code.downcase.capitalize};
257
+ skos:topConceptOf code:#{code.downcase} ;
258
+ skos:prefLabel "#{to_resource(value,options)}" ;
259
+ skos:inScheme code:#{code.downcase} .
260
+
261
+ EOF
262
+ end
263
+ }
264
+ }
265
+
266
+ concepts
267
+ end
268
+
269
+
270
+ def to_resource(obj, options)
271
+ if obj.is_a? String
272
+ #TODO decide the right way to handle missing values, since RDF has no null
273
+ #probably throw an error here since a missing resource is a bigger problem
274
+ obj = "NA" if obj.empty?
275
+
276
+ #TODO remove special characters (faster) as well (eg '?')
277
+ obj.gsub(' ','_').gsub('?','')
278
+ elsif obj == nil && options[:encode_nulls]
279
+ '"NA"'
280
+ elsif obj.is_a? Numeric
281
+ #resources cannot be referred to purely by integer (?)
282
+ "n"+obj.to_s
283
+ else
284
+ obj
285
+ end
286
+ end
287
+
288
+ def to_literal(obj, options)
289
+ if obj.is_a? String
290
+ # Depressing that there's no more elegant way to check if a string is
291
+ # a number...
292
+ if val = Integer(obj) rescue nil
293
+ val
294
+ elsif val = Float(obj) rescue nil
295
+ val
296
+ else
297
+ '"'+obj+'"'
298
+ end
299
+ elsif obj == nil && options[:encode_nulls]
300
+ #TODO decide the right way to handle missing values, since RDF has no null
301
+ '"NA"'
302
+ else
303
+ obj
304
+ end
305
+ end
306
+ end
307
+ end
308
+ end
@@ -0,0 +1,57 @@
1
+ module R2RDF
2
+ module Dataset
3
+ module Interactive
4
+ #to be called by other classes if user input is required
5
+ def defaults
6
+ {
7
+ load_from_file: false
8
+ }
9
+ end
10
+
11
+ def interactive(options={})
12
+ options = defaults.merge(options)
13
+ qb = {}
14
+
15
+ puts "load config from file? [y/N]"
16
+ if gets.chomp == "y"
17
+ #use yaml or DSL file to configure
18
+ else
19
+ qb[:dimensions] = dimensions()
20
+ qb[:measures] = measures()
21
+ end
22
+
23
+ puts "load data from file? [y/N]"
24
+ if gets.chomp == "y"
25
+ #attempt to load dataset from file, ask user to resolve problems or ambiguity
26
+ else
27
+ end
28
+ qb
29
+ end
30
+
31
+ def dimensions
32
+ puts "Enter a list of dimensions, separated by commas"
33
+ arr = gets.chomp.split(",")
34
+ dims = {}
35
+
36
+ arr.map{|dim|
37
+ puts "What is the range of #{dim.chomp.strip}? [:coded]"
38
+ type = gets.chomp
39
+ type = :coded if type == ":coded" || type == ""
40
+ dims[dim.chomp.strip] = {type: type}
41
+ }
42
+
43
+ dims
44
+ end
45
+
46
+ def measures
47
+ puts "Enter a list of measures, separated by commas"
48
+ arr = gets.chomp.split(",")
49
+ meas = []
50
+
51
+ arr.map{|m| meas << m.chomp.strip}
52
+
53
+ meas
54
+ end
55
+ end
56
+ end
57
+ end