bio-publisci 0.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (95) hide show
  1. data/.document +5 -0
  2. data/.rspec +1 -0
  3. data/.travis.yml +13 -0
  4. data/Gemfile +24 -0
  5. data/LICENSE.txt +20 -0
  6. data/README.md +47 -0
  7. data/README.rdoc +48 -0
  8. data/Rakefile +70 -0
  9. data/bin/bio-publisci +83 -0
  10. data/features/create_generator.feature +25 -0
  11. data/features/integration.feature +12 -0
  12. data/features/integration_steps.rb +10 -0
  13. data/features/orm.feature +60 -0
  14. data/features/orm_steps.rb +74 -0
  15. data/features/reader.feature +25 -0
  16. data/features/reader_steps.rb +60 -0
  17. data/features/step_definitions/bio-publisci_steps.rb +0 -0
  18. data/features/store.feature +27 -0
  19. data/features/store_steps.rb +42 -0
  20. data/features/support/env.rb +13 -0
  21. data/features/writer.feature +9 -0
  22. data/features/writer_steps.rb +17 -0
  23. data/lib/bio-publisci/analyzer.rb +57 -0
  24. data/lib/bio-publisci/dataset/ORM/data_cube_orm.rb +219 -0
  25. data/lib/bio-publisci/dataset/ORM/observation.rb +20 -0
  26. data/lib/bio-publisci/dataset/data_cube.rb +308 -0
  27. data/lib/bio-publisci/dataset/interactive.rb +57 -0
  28. data/lib/bio-publisci/loader.rb +36 -0
  29. data/lib/bio-publisci/metadata/metadata.rb +105 -0
  30. data/lib/bio-publisci/parser.rb +64 -0
  31. data/lib/bio-publisci/query/query_helper.rb +114 -0
  32. data/lib/bio-publisci/r_client.rb +54 -0
  33. data/lib/bio-publisci/readers/arff.rb +87 -0
  34. data/lib/bio-publisci/readers/big_cross.rb +119 -0
  35. data/lib/bio-publisci/readers/cross.rb +72 -0
  36. data/lib/bio-publisci/readers/csv.rb +54 -0
  37. data/lib/bio-publisci/readers/dataframe.rb +66 -0
  38. data/lib/bio-publisci/readers/r_matrix.rb +152 -0
  39. data/lib/bio-publisci/store.rb +56 -0
  40. data/lib/bio-publisci/writers/arff.rb +66 -0
  41. data/lib/bio-publisci/writers/dataframe.rb +81 -0
  42. data/lib/bio-publisci.rb +36 -0
  43. data/lib/r2rdf.rb +226 -0
  44. data/lib/template_bak/publisci.rb +3 -0
  45. data/lib/template_bak.rb +12 -0
  46. data/lib/vocabs/cc.rb +18 -0
  47. data/lib/vocabs/cert.rb +13 -0
  48. data/lib/vocabs/dc.rb +63 -0
  49. data/lib/vocabs/dc11.rb +23 -0
  50. data/lib/vocabs/doap.rb +45 -0
  51. data/lib/vocabs/exif.rb +168 -0
  52. data/lib/vocabs/foaf.rb +69 -0
  53. data/lib/vocabs/geo.rb +13 -0
  54. data/lib/vocabs/http.rb +26 -0
  55. data/lib/vocabs/ma.rb +78 -0
  56. data/lib/vocabs/owl.rb +59 -0
  57. data/lib/vocabs/rdfs.rb +17 -0
  58. data/lib/vocabs/rsa.rb +12 -0
  59. data/lib/vocabs/rss.rb +14 -0
  60. data/lib/vocabs/sioc.rb +93 -0
  61. data/lib/vocabs/skos.rb +36 -0
  62. data/lib/vocabs/wot.rb +21 -0
  63. data/lib/vocabs/xhtml.rb +9 -0
  64. data/lib/vocabs/xsd.rb +58 -0
  65. data/resources/queries/codes.rq +13 -0
  66. data/resources/queries/dataset.rq +7 -0
  67. data/resources/queries/dimension_ranges.rq +8 -0
  68. data/resources/queries/dimensions.rq +7 -0
  69. data/resources/queries/measures.rq +7 -0
  70. data/resources/queries/observations.rq +12 -0
  71. data/resources/queries/test.rq +3 -0
  72. data/resources/weather.numeric.arff +23 -0
  73. data/spec/analyzer_spec.rb +36 -0
  74. data/spec/bio-publisci_spec.rb +7 -0
  75. data/spec/csv/bacon.csv +4 -0
  76. data/spec/csv/moar_bacon.csv +11 -0
  77. data/spec/data_cube_spec.rb +166 -0
  78. data/spec/generators/csv_spec.rb +44 -0
  79. data/spec/generators/dataframe_spec.rb +44 -0
  80. data/spec/generators/r_matrix_spec.rb +35 -0
  81. data/spec/queries/integrity/1.rq +21 -0
  82. data/spec/queries/integrity/11.rq +29 -0
  83. data/spec/queries/integrity/12.rq +37 -0
  84. data/spec/queries/integrity/14.rq +25 -0
  85. data/spec/queries/integrity/19_1.rq +21 -0
  86. data/spec/queries/integrity/19_2.rq +15 -0
  87. data/spec/queries/integrity/2.rq +22 -0
  88. data/spec/queries/integrity/3.rq +19 -0
  89. data/spec/queries/integrity/4.rq +13 -0
  90. data/spec/queries/integrity/5.rq +14 -0
  91. data/spec/r_builder_spec.rb +33 -0
  92. data/spec/spec_helper.rb +17 -0
  93. data/spec/turtle/bacon +149 -0
  94. data/spec/turtle/reference +2066 -0
  95. metadata +259 -0
@@ -0,0 +1,9 @@
1
+ Feature: export to various formats using writers
2
+
3
+ In order to use RDF encoded data in other applications
4
+ I want to export domain objects using an R2RDF::Writer object
5
+
6
+ Scenario: write to ARFF format
7
+ Given a ARFF writer
8
+ When I call its from_turtle method on the file spec/turtle/bacon
9
+ Then I should receive a .arff file as a string
@@ -0,0 +1,17 @@
1
+ Given(/^a (.*) writer$/) do |type|
2
+ @writer = R2RDF::Writer.const_get(type).new
3
+ end
4
+
5
+ When(/^I call its from_turtle method on the file (.*)$/) do |file|
6
+ @result = @writer.from_turtle(file)
7
+ end
8
+
9
+ When(/^I call its from_turtle method on the turtle string$/) do
10
+ f=Tempfile.open('writerttl'); f.write @turtle_string; f.close
11
+ @result = @writer.from_turtle(f.path)
12
+ f.unlink
13
+ end
14
+
15
+ Then(/^I should receive a \.arff file as a string$/) do
16
+ @result.is_a?(String).should be true
17
+ end
@@ -0,0 +1,57 @@
1
+ module R2RDF
2
+
3
+ #handles analysis of R expression to extract properties and recognize potential
4
+ #ambiguity
5
+ module Analyzer
6
+ def dirty?(data)
7
+ if data.is_a? Hash
8
+ data.map{|k,v|
9
+ return true if dirty?(k) || dirty?(v)
10
+ }
11
+ false
12
+ elsif data.is_a? Array
13
+ data.map{|datum|
14
+ return true if dirty?(datum)
15
+ }
16
+ else
17
+ dirty_characters = [".",' ']
18
+ if data.to_s.scan(/./) & dirty_characters
19
+ true
20
+ else
21
+ false
22
+ end
23
+ end
24
+ end
25
+
26
+ def recommend_range(data)
27
+ classes = data.map{|d| d.class}
28
+ homogenous = classes.uniq.size == 1
29
+ if homogenous
30
+ if classes[0] == Fixnum
31
+ "xsd:int"
32
+ elsif classes[0] == Float
33
+ "xsd:double"
34
+ elsif classes[0] == String
35
+ recommend_range_strings(data)
36
+ else
37
+ :coded
38
+ end
39
+ else
40
+ :coded
41
+ end
42
+ end
43
+
44
+ def recommend_range_strings(data)
45
+ return "xsd:int" if data.all?{|d| Integer(d) rescue nil}
46
+ return "xsd:int" if data.all?{|d| Float(d) rescue nil}
47
+ :coded
48
+ end
49
+
50
+ def check_integrity(obs, dimensions, measures)
51
+ obs.map{|o|
52
+ raise "MissingValues for #{(dimensions | measures) - o.keys}" unless ((dimensions | measures) - o.keys).empty?
53
+ raise "UnknownProperty #{o.keys - (dimensions | measures)}" unless (o.keys - (dimensions | measures)).empty?
54
+ }
55
+ end
56
+ end
57
+ end
@@ -0,0 +1,219 @@
1
+ module R2RDF
2
+ module Dataset
3
+ module ORM
4
+ class DataCube
5
+ extend R2RDF::Dataset::DataCube
6
+ extend R2RDF::Analyzer
7
+ extend R2RDF::Metadata
8
+ extend R2RDF::Query
9
+ extend R2RDF::Parser
10
+
11
+ include R2RDF::Dataset::DataCube
12
+ include R2RDF::Analyzer
13
+ include R2RDF::Metadata
14
+ include R2RDF::Query
15
+ include R2RDF::Parser
16
+
17
+ attr_accessor :labels
18
+ attr_accessor :dimensions
19
+ attr_accessor :measures
20
+ attr_accessor :obs
21
+ attr_accessor :meta
22
+
23
+ def initialize(options={},do_parse = true)
24
+ @dimensions = {}
25
+ @measures = []
26
+ @obs = []
27
+ @generator_options = {}
28
+ @options = {}
29
+
30
+ @meta = {}
31
+
32
+ parse_options options if do_parse
33
+ end
34
+
35
+ def self.load(graph,options={},verbose=false)
36
+
37
+
38
+ graph = create_graph(graph) unless graph =~ /^http/
39
+
40
+ # puts get_hashes(execute_from_file('dimension_ranges.rq',graph))
41
+ dimensions = Hash[get_hashes(execute_from_file('dimension_ranges.rq',graph),"to_s").map{|solution|
42
+ #TODO coded properties should be found via SPARQL queries
43
+ if solution[:range].split('/')[-2] == "code"
44
+ type = :coded
45
+ else
46
+ type = strip_uri(solution[:range])
47
+ end
48
+ [strip_uri(solution[:dimension]), {type: type}]
49
+ }]
50
+ puts "dimensions: #{dimensions}" if verbose
51
+ measures = get_ary(execute_from_file('measures.rq',graph)).flatten
52
+ puts "measures: #{measures}" if verbose
53
+ name = execute_from_file('dataset.rq',graph).to_h.first[:label]
54
+ puts "dataset: #{name}" if verbose
55
+ obs = execute_from_file('observations.rq',graph)
56
+ puts "observations: #{obs}" if verbose
57
+ # observations = observation_hash(obs)
58
+ simple_observations = observation_hash(obs,true)
59
+
60
+ new_opts = {
61
+ measures: measures,
62
+ dimensions: dimensions,
63
+ observations: simple_observations.values,
64
+ name: name,
65
+ }
66
+
67
+ options = options.merge(new_opts)
68
+ puts "creating #{options}" if verbose
69
+ self.new(options)
70
+ end
71
+
72
+ def parse_options(options)
73
+ if options[:dimensions]
74
+ options[:dimensions].each{|name,details|
75
+ add_dimension(name, details[:type] || :coded)
76
+ }
77
+ end
78
+
79
+ if options[:measures]
80
+ options[:measures].each{|m| @measures << m}
81
+ end
82
+
83
+ if options[:observations]
84
+ options[:observations].each{|obs_data| add_observation obs_data}
85
+ end
86
+
87
+ @generator_options = options[:generator_options] if options[:generator_options]
88
+ @options[:skip_metadata] = options[:skip_metadata] if options[:skip_metadata]
89
+
90
+ if options[:name]
91
+ @name = options[:name]
92
+ else
93
+ raise "No dataset name specified!"
94
+ end
95
+
96
+ if options[:validate_each]
97
+ @options[:validate_each] = options[:validate_each]
98
+ end
99
+ end
100
+
101
+ def to_n3
102
+
103
+ #create labels if not specified
104
+ unless @labels.is_a?(Array) && @labels.size == @obs.size
105
+ if @labels.is_a? Symbol
106
+ #define some automatic labeling methods
107
+ else
108
+ @labels = (1..@obs.size).to_a.map(&:to_s)
109
+ end
110
+ end
111
+ data = {}
112
+
113
+
114
+ #collect observation data
115
+ check_integrity(@obs.map{|o| o.data}, @dimensions.keys, @measures)
116
+ @obs.map{|obs|
117
+ (@measures | @dimensions.keys).map{ |component|
118
+ (data[component] ||= []) << obs.data[component]
119
+ }
120
+ }
121
+
122
+
123
+ codes = @dimensions.map{|d,v| d if v[:type] == :coded}.compact
124
+
125
+
126
+ str = generate(@measures, @dimensions.keys, codes, data, @labels, @name, @generator_options)
127
+ unless @options[:skip_metadata]
128
+ fields = {
129
+ publishers: publishers(),
130
+ subject: subjects(),
131
+ author: author(),
132
+ description: description(),
133
+ date: date(),
134
+ var: @name,
135
+ }
136
+ # puts basic(fields,@generator_options)
137
+ str += "\n" + basic(fields,@generator_options)
138
+ end
139
+ str
140
+ end
141
+
142
+ def add_dimension(name, type=:coded)
143
+ @dimensions[name.to_s] = {type: type}
144
+ end
145
+
146
+ def add_measure(name)
147
+ @measures << name
148
+ end
149
+
150
+ def add_observation(data)
151
+ data = Hash[data.map{|k,v| [k.to_s, v]}]
152
+ obs = Observation.new(data)
153
+ check_integrity([obs.data],@dimensions.keys,@measures) if @options[:validate_each]
154
+ @obs << obs
155
+ end
156
+
157
+ def insert(observation)
158
+ @obs << observation
159
+ end
160
+
161
+ def publishers
162
+ @meta[:publishers] ||= []
163
+ end
164
+
165
+ def publishers=(publishers)
166
+ @meta[:publishers] = publishers
167
+ end
168
+
169
+ def subjects
170
+ @meta[:subject] ||= []
171
+ end
172
+
173
+ def subjects=(subjects)
174
+ @meta[:subject]=subjects
175
+ end
176
+
177
+ def add_publisher(label,uri)
178
+ publishers << {label: label, uri: uri}
179
+ end
180
+
181
+ def add_subject(id)
182
+ subject << id
183
+ end
184
+
185
+ def author
186
+ @meta[:creator] ||= ""
187
+ end
188
+
189
+ def author=(author)
190
+ @meta[:creator] = author
191
+ end
192
+
193
+ def description
194
+ @meta[:description] ||= ""
195
+ end
196
+
197
+ def description=(description)
198
+ @meta[:description] = description
199
+ end
200
+
201
+ def date
202
+ @meta[:date] ||= "#{Time.now.day}-#{Time.now.month}-#{Time.now.year}"
203
+ end
204
+
205
+ def date=(date)
206
+ @meta[:date] = date
207
+ end
208
+
209
+ def to_h
210
+ {
211
+ measures: @measures,
212
+ dimensions: @dimensions,
213
+ observations: @obs.map{|o| o.data}
214
+ }
215
+ end
216
+ end
217
+ end
218
+ end
219
+ end
@@ -0,0 +1,20 @@
1
+ module R2RDF
2
+ module Dataset
3
+ module ORM
4
+ class Observation
5
+ attr_accessor :data
6
+ def initialize(data={})
7
+ @data = data
8
+ end
9
+
10
+ def method_missing(name, args)
11
+ #get entry of data hash
12
+ end
13
+
14
+ def respond_to_missing?(method, *)
15
+
16
+ end
17
+ end
18
+ end
19
+ end
20
+ end
@@ -0,0 +1,308 @@
1
+ #monkey patch to make rdf string w/ heredocs prettier ;)
2
+ class String
3
+ def unindent
4
+ gsub /^#{self[/\A\s*/]}/, ''
5
+ # gsub(/^#{scan(/^\s*/).min_by{|l|l.length}}/, "")
6
+ end
7
+ end
8
+
9
+ module R2RDF
10
+ # used to generate data cube observations, data structure definitions, etc
11
+ module Dataset
12
+ module DataCube
13
+ def defaults
14
+ {
15
+ type: :dataframe,
16
+ encode_nulls: false,
17
+ base_url: "http://www.rqtl.org",
18
+ }
19
+ end
20
+
21
+ def generate(measures, dimensions, codes, data, observation_labels, var, options={})
22
+ dimensions = sanitize(dimensions)
23
+ codes = sanitize(codes)
24
+ measures = sanitize(measures)
25
+ var = sanitize([var]).first
26
+ data = sanitize_hash(data)
27
+
28
+ str = prefixes(var,options)
29
+ str << data_structure_definition((measures | dimensions), var, options)
30
+ str << dataset(var, options)
31
+ component_specifications(measures, dimensions, var, options).map{ |c| str << c }
32
+ dimension_properties(dimensions, codes, var, options).map{|p| str << p}
33
+ measure_properties(measures, var, options).map{|p| str << p}
34
+ code_lists(codes, data, var, options).map{|l| str << l}
35
+ concept_codes(codes, data, var, options).map{|c| str << c}
36
+ observations(measures, dimensions, codes, data, observation_labels, var, options).map{|o| str << o}
37
+ str
38
+ end
39
+
40
+ def sanitize(array)
41
+ #remove spaces and other special characters
42
+ processed = []
43
+ array.map{|entry|
44
+ if entry.is_a? String
45
+ processed << entry.gsub(/[\s\.]/,'_')
46
+ else
47
+ processed << entry
48
+ end
49
+ }
50
+ processed
51
+ end
52
+
53
+ def sanitize_hash(h)
54
+ mappings = {}
55
+ h.keys.map{|k|
56
+ if(k.is_a? String)
57
+ mappings[k] = k.gsub(' ','_')
58
+ end
59
+ }
60
+
61
+ h.keys.map{|k|
62
+ h[mappings[k]] = h.delete(k) if mappings[k]
63
+ }
64
+
65
+ h
66
+ end
67
+
68
+ def prefixes(var, options={})
69
+ var = sanitize([var]).first
70
+ options = defaults().merge(options)
71
+ base = options[:base_url]
72
+ <<-EOF.unindent
73
+ @base <#{base}/ns/dc/> .
74
+ @prefix ns: <#{base}/ns/dataset/#{var}#> .
75
+ @prefix qb: <http://purl.org/linked-data/cube#> .
76
+ @prefix rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#> .
77
+ @prefix rdfs: <http://www.w3.org/2000/01/rdf-schema#> .
78
+ @prefix prop: <#{base}/dc/properties/> .
79
+ @prefix dct: <http://purl.org/dc/terms/> .
80
+ @prefix xsd: <http://www.w3.org/2001/XMLSchema#> .
81
+ @prefix cs: <#{base}/dc/dataset/#{var}/cs/> .
82
+ @prefix code: <#{base}/dc/dataset/#{var}/code/> .
83
+ @prefix class: <#{base}/dc/dataset/#{var}/class/> .
84
+ @prefix owl: <http://www.w3.org/2002/07/owl#> .
85
+ @prefix skos: <http://www.w3.org/2004/02/skos/core#> .
86
+ @prefix foaf: <http://xmlns.com/foaf/0.1/> .
87
+ @prefix org: <http://www.w3.org/ns/org#> .
88
+ @prefix prov: <http://www.w3.org/ns/prov#> .
89
+
90
+ EOF
91
+ end
92
+
93
+ def data_structure_definition(components,var,options={})
94
+ var = sanitize([var]).first
95
+ options = defaults().merge(options)
96
+ str = "ns:dsd-#{var} a qb:DataStructureDefinition;\n"
97
+ str << " qb:component\n"
98
+ components.map{|n|
99
+ str << " cs:#{n} ,\n"
100
+ }
101
+ str[-2]='.'
102
+ str<<"\n"
103
+ str
104
+ end
105
+
106
+ def dataset(var,options={})
107
+ var = sanitize([var]).first
108
+ options = defaults().merge(options)
109
+ <<-EOF.unindent
110
+ ns:dataset-#{var} a qb:DataSet ;
111
+ rdfs:label "#{var}"@en ;
112
+ qb:structure ns:dsd-#{var} .
113
+
114
+ EOF
115
+ end
116
+
117
+ def component_specifications(measure_names, dimension_names, var, options={})
118
+ options = defaults().merge(options)
119
+ specs = []
120
+
121
+ dimension_names.map{|d|
122
+ specs << <<-EOF.unindent
123
+ cs:#{d} a qb:ComponentSpecification ;
124
+ rdfs:label "#{d} Component" ;
125
+ qb:dimension prop:#{d} .
126
+
127
+ EOF
128
+ }
129
+
130
+ measure_names.map{|n|
131
+ specs << <<-EOF.unindent
132
+ cs:#{n} a qb:ComponentSpecification ;
133
+ rdfs:label "#{n} Component" ;
134
+ qb:measure prop:#{n} .
135
+
136
+ EOF
137
+ }
138
+
139
+ specs
140
+ end
141
+
142
+ def dimension_properties(dimensions, codes, var, options={})
143
+ options = defaults().merge(options)
144
+ props = []
145
+
146
+ dimensions.map{|d|
147
+ if codes.include?(d)
148
+ props << <<-EOF.unindent
149
+ prop:#{d} a rdf:Property, qb:DimensionProperty ;
150
+ rdfs:label "#{d}"@en ;
151
+ qb:codeList code:#{d.downcase} ;
152
+ rdfs:range code:#{d.downcase.capitalize} .
153
+
154
+ EOF
155
+ else
156
+ props << <<-EOF.unindent
157
+ prop:#{d} a rdf:Property, qb:DimensionProperty ;
158
+ rdfs:label "#{d}"@en .
159
+
160
+ EOF
161
+ end
162
+ }
163
+
164
+ props
165
+ end
166
+
167
+ def measure_properties(measures, var, options={})
168
+ options = defaults().merge(options)
169
+ props = []
170
+
171
+ measures.map{ |m|
172
+
173
+ props << <<-EOF.unindent
174
+ prop:#{m} a rdf:Property, qb:MeasureProperty ;
175
+ rdfs:label "#{m}"@en .
176
+
177
+ EOF
178
+ }
179
+
180
+ props
181
+ end
182
+
183
+ def observations(measures, dimensions, codes, data, observation_labels, var, options={})
184
+ var = sanitize([var]).first
185
+ options = defaults().merge(options)
186
+ obs = []
187
+ observation_labels.each_with_index.map{|r, i|
188
+ contains_nulls = false
189
+ str = <<-EOF.unindent
190
+ ns:obs#{r} a qb:Observation ;
191
+ qb:dataSet ns:dataset-#{var} ;
192
+ EOF
193
+
194
+ str << " rdfs:label \"#{r}\" ;\n" unless options[:no_labels]
195
+
196
+ dimensions.map{|d|
197
+ contains_nulls = contains_nulls | (data[d][i] == nil)
198
+ if codes.include? d
199
+ str << " prop:#{d} <code/#{d.downcase}/#{data[d][i]}> ;\n"
200
+ else
201
+ str << " prop:#{d} ns:#{to_resource(data[d][i], options)} ;\n"
202
+ end
203
+ }
204
+
205
+ measures.map{|m|
206
+ contains_nulls = contains_nulls | (data[m][i] == nil)
207
+ str << " prop:#{m} #{to_literal(data[m][i], options)} ;\n"
208
+
209
+ }
210
+
211
+ str << " .\n\n"
212
+ obs << str unless contains_nulls && !options[:encode_nulls]
213
+
214
+ }
215
+ obs
216
+ end
217
+
218
+ def code_lists(codes, data, var, options={})
219
+ options = defaults().merge(options)
220
+ lists = []
221
+ codes.map{|code|
222
+ str = <<-EOF.unindent
223
+ code:#{code.downcase.capitalize} a rdfs:Class, owl:Class;
224
+ rdfs:subClassOf skos:Concept ;
225
+ rdfs:label "Code list for #{code} - codelist class"@en;
226
+ rdfs:comment "Specifies the #{code} for each observation";
227
+ rdfs:seeAlso code:#{code.downcase} .
228
+
229
+ code:#{code.downcase} a skos:ConceptScheme;
230
+ skos:prefLabel "Code list for #{code} - codelist scheme"@en;
231
+ rdfs:label "Code list for #{code} - codelist scheme"@en;
232
+ skos:notation "CL_#{code.upcase}";
233
+ skos:note "Specifies the #{code} for each observation";
234
+ EOF
235
+ data[code].uniq.map{|value|
236
+ unless value == nil && !options[:encode_nulls]
237
+ str << " skos:hasTopConcept <code/#{code.downcase}/#{to_resource(value,options)}> ;\n"
238
+ end
239
+ }
240
+
241
+ str << " .\n\n"
242
+ lists << str
243
+ }
244
+
245
+
246
+ lists
247
+ end
248
+
249
+ def concept_codes(codes, data, var, options={})
250
+ options = defaults().merge(options)
251
+ concepts = []
252
+ codes.map{|code|
253
+ data[code].uniq.map{|value|
254
+ unless value == nil && !options[:encode_nulls]
255
+ concepts << <<-EOF.unindent
256
+ <code/#{code.downcase}/#{to_resource(value,options)}> a skos:Concept, code:#{code.downcase.capitalize};
257
+ skos:topConceptOf code:#{code.downcase} ;
258
+ skos:prefLabel "#{to_resource(value,options)}" ;
259
+ skos:inScheme code:#{code.downcase} .
260
+
261
+ EOF
262
+ end
263
+ }
264
+ }
265
+
266
+ concepts
267
+ end
268
+
269
+
270
+ def to_resource(obj, options)
271
+ if obj.is_a? String
272
+ #TODO decide the right way to handle missing values, since RDF has no null
273
+ #probably throw an error here since a missing resource is a bigger problem
274
+ obj = "NA" if obj.empty?
275
+
276
+ #TODO remove special characters (faster) as well (eg '?')
277
+ obj.gsub(' ','_').gsub('?','')
278
+ elsif obj == nil && options[:encode_nulls]
279
+ '"NA"'
280
+ elsif obj.is_a? Numeric
281
+ #resources cannot be referred to purely by integer (?)
282
+ "n"+obj.to_s
283
+ else
284
+ obj
285
+ end
286
+ end
287
+
288
+ def to_literal(obj, options)
289
+ if obj.is_a? String
290
+ # Depressing that there's no more elegant way to check if a string is
291
+ # a number...
292
+ if val = Integer(obj) rescue nil
293
+ val
294
+ elsif val = Float(obj) rescue nil
295
+ val
296
+ else
297
+ '"'+obj+'"'
298
+ end
299
+ elsif obj == nil && options[:encode_nulls]
300
+ #TODO decide the right way to handle missing values, since RDF has no null
301
+ '"NA"'
302
+ else
303
+ obj
304
+ end
305
+ end
306
+ end
307
+ end
308
+ end
@@ -0,0 +1,57 @@
1
+ module R2RDF
2
+ module Dataset
3
+ module Interactive
4
+ #to be called by other classes if user input is required
5
+ def defaults
6
+ {
7
+ load_from_file: false
8
+ }
9
+ end
10
+
11
+ def interactive(options={})
12
+ options = defaults.merge(options)
13
+ qb = {}
14
+
15
+ puts "load config from file? [y/N]"
16
+ if gets.chomp == "y"
17
+ #use yaml or DSL file to configure
18
+ else
19
+ qb[:dimensions] = dimensions()
20
+ qb[:measures] = measures()
21
+ end
22
+
23
+ puts "load data from file? [y/N]"
24
+ if gets.chomp == "y"
25
+ #attempt to load dataset from file, ask user to resolve problems or ambiguity
26
+ else
27
+ end
28
+ qb
29
+ end
30
+
31
+ def dimensions
32
+ puts "Enter a list of dimensions, separated by commas"
33
+ arr = gets.chomp.split(",")
34
+ dims = {}
35
+
36
+ arr.map{|dim|
37
+ puts "What is the range of #{dim.chomp.strip}? [:coded]"
38
+ type = gets.chomp
39
+ type = :coded if type == ":coded" || type == ""
40
+ dims[dim.chomp.strip] = {type: type}
41
+ }
42
+
43
+ dims
44
+ end
45
+
46
+ def measures
47
+ puts "Enter a list of measures, separated by commas"
48
+ arr = gets.chomp.split(",")
49
+ meas = []
50
+
51
+ arr.map{|m| meas << m.chomp.strip}
52
+
53
+ meas
54
+ end
55
+ end
56
+ end
57
+ end